From 90948a42892779734f77d62f20326c868392fd8f Mon Sep 17 00:00:00 2001 From: Matthew Ogilvie Date: Wed, 14 May 2008 22:35:48 -0600 Subject: git-cvsserver: add ability to guess -kb from contents If "gitcvs.allbinary" is set to "guess", then any file that has not been explicitly marked as binary or text using the "crlf" attribute and the "gitcvs.usecrlfattr" config will guess binary based on the contents of the file. Signed-off-by: Matthew Ogilvie Signed-off-by: Junio C Hamano --- git-cvsserver.perl | 193 ++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 177 insertions(+), 16 deletions(-) (limited to 'git-cvsserver.perl') diff --git a/git-cvsserver.perl b/git-cvsserver.perl index 58206aed7c..920bbe15a3 100755 --- a/git-cvsserver.perl +++ b/git-cvsserver.perl @@ -502,7 +502,7 @@ sub req_add print $state->{CVSROOT} . "/$state->{module}/$filename\n"; # this is an "entries" line - my $kopts = kopts_from_path($filename); + my $kopts = kopts_from_path($filename,"sha1",$meta->{filehash}); $log->debug("/$filepart/1.$meta->{revision}//$kopts/"); print "/$filepart/1.$meta->{revision}//$kopts/\n"; # permissions @@ -533,7 +533,8 @@ sub req_add print "Checked-in $dirpart\n"; print "$filename\n"; - my $kopts = kopts_from_path($filename); + my $kopts = kopts_from_path($filename,"file", + $state->{entries}{$filename}{modified_filename}); print "/$filepart/0//$kopts/\n"; my $requestedKopts = $state->{opt}{k}; @@ -631,7 +632,7 @@ sub req_remove print "Checked-in $dirpart\n"; print "$filename\n"; - my $kopts = kopts_from_path($filename); + my $kopts = kopts_from_path($filename,"sha1",$meta->{filehash}); print "/$filepart/-1.$wrev//$kopts/\n"; $rmcount++; @@ -910,7 +911,7 @@ sub req_co print $state->{CVSROOT} . "/$module/" . ( defined ( $git->{dir} ) and $git->{dir} ne "./" ? $git->{dir} . "/" : "" ) . "$git->{name}\n"; # this is an "entries" line - my $kopts = kopts_from_path($fullName); + my $kopts = kopts_from_path($fullName,"sha1",$git->{filehash}); print "/$git->{name}/1.$git->{revision}//$kopts/\n"; # permissions print "u=$git->{mode},g=$git->{mode},o=$git->{mode}\n"; @@ -1119,7 +1120,7 @@ sub req_update print $state->{CVSROOT} . "/$state->{module}/$filename\n"; # this is an "entries" line - my $kopts = kopts_from_path($filename); + my $kopts = kopts_from_path($filename,"sha1",$meta->{filehash}); $log->debug("/$filepart/1.$meta->{revision}//$kopts/"); print "/$filepart/1.$meta->{revision}//$kopts/\n"; @@ -1167,7 +1168,8 @@ sub req_update print "Merged $dirpart\n"; $log->debug($state->{CVSROOT} . "/$state->{module}/$filename"); print $state->{CVSROOT} . "/$state->{module}/$filename\n"; - my $kopts = kopts_from_path("$dirpart/$filepart"); + my $kopts = kopts_from_path("$dirpart/$filepart", + "file",$mergedFile); $log->debug("/$filepart/1.$meta->{revision}//$kopts/"); print "/$filepart/1.$meta->{revision}//$kopts/\n"; } @@ -1183,7 +1185,8 @@ sub req_update { print "Merged $dirpart\n"; print $state->{CVSROOT} . "/$state->{module}/$filename\n"; - my $kopts = kopts_from_path("$dirpart/$filepart"); + my $kopts = kopts_from_path("$dirpart/$filepart", + "file",$mergedFile); print "/$filepart/1.$meta->{revision}/+/$kopts/\n"; } } @@ -1434,7 +1437,7 @@ sub req_ci } print "Checked-in $dirpart\n"; print "$filename\n"; - my $kopts = kopts_from_path($filename); + my $kopts = kopts_from_path($filename,"sha1",$meta->{filehash}); print "/$filepart/1.$meta->{revision}//$kopts/\n"; } } @@ -2312,7 +2315,7 @@ sub cleanupTmpDir # file should get -kb. sub kopts_from_path { - my ($path) = @_; + my ($path, $srcType, $name) = @_; if ( defined ( $cfg->{gitcvs}{usecrlfattr} ) and $cfg->{gitcvs}{usecrlfattr} =~ /\s*(1|true|yes)\s*$/i ) @@ -2332,15 +2335,55 @@ sub kopts_from_path } } - unless ( defined ( $cfg->{gitcvs}{allbinary} ) and $cfg->{gitcvs}{allbinary} =~ /^\s*(1|true|yes)\s*$/i ) + if ( defined ( $cfg->{gitcvs}{allbinary} ) ) { - # Return "" to give no special treatment to any path - return ""; - } else { - # Alternatively, to have all files treated as if they are binary (which - # is more like git itself), always return the "-kb" option - return "-kb"; + if( ($cfg->{gitcvs}{allbinary} =~ /^\s*(1|true|yes)\s*$/i) ) + { + return "-kb"; + } + elsif( ($cfg->{gitcvs}{allbinary} =~ /^\s*guess\s*$/i) ) + { + if( $srcType eq "sha1Or-k" && + !defined($name) ) + { + my ($ret)=$state->{entries}{$path}{options}; + if( !defined($ret) ) + { + $ret=$state->{opt}{k}; + if(defined($ret)) + { + $ret="-k$ret"; + } + else + { + $ret=""; + } + } + if( ! ($ret=~/^(|-kb|-kkv|-kkvl|-kk|-ko|-kv)$/) ) + { + print "E Bad -k option\n"; + $log->warn("Bad -k option: $ret"); + die "Error: Bad -k option: $ret\n"; + } + + return $ret; + } + else + { + if( is_binary($srcType,$name) ) + { + $log->debug("... as binary"); + return "-kb"; + } + else + { + $log->debug("... as text"); + } + } + } } + # Return "" to give no special treatment to any path + return ""; } sub check_attr @@ -2360,6 +2403,124 @@ sub check_attr } } +# This should have the same heuristics as convert.c:is_binary() and related. +# Note that the bare CR test is done by callers in convert.c. +sub is_binary +{ + my ($srcType,$name) = @_; + $log->debug("is_binary($srcType,$name)"); + + # Minimize amount of interpreted code run in the inner per-character + # loop for large files, by totalling each character value and + # then analyzing the totals. + my @counts; + my $i; + for($i=0;$i<256;$i++) + { + $counts[$i]=0; + } + + my $fh = open_blob_or_die($srcType,$name); + my $line; + while( defined($line=<$fh>) ) + { + # Any '\0' and bare CR are considered binary. + if( $line =~ /\0|(\r[^\n])/ ) + { + close($fh); + return 1; + } + + # Count up each character in the line: + my $len=length($line); + for($i=0;$i<$len;$i++) + { + $counts[ord(substr($line,$i,1))]++; + } + } + close $fh; + + # Don't count CR and LF as either printable/nonprintable + $counts[ord("\n")]=0; + $counts[ord("\r")]=0; + + # Categorize individual character count into printable and nonprintable: + my $printable=0; + my $nonprintable=0; + for($i=0;$i<256;$i++) + { + if( $i < 32 && + $i != ord("\b") && + $i != ord("\t") && + $i != 033 && # ESC + $i != 014 ) # FF + { + $nonprintable+=$counts[$i]; + } + elsif( $i==127 ) # DEL + { + $nonprintable+=$counts[$i]; + } + else + { + $printable+=$counts[$i]; + } + } + + return ($printable >> 7) < $nonprintable; +} + +# Returns open file handle. Possible invocations: +# - open_blob_or_die("file",$filename); +# - open_blob_or_die("sha1",$filehash); +sub open_blob_or_die +{ + my ($srcType,$name) = @_; + my ($fh); + if( $srcType eq "file" ) + { + if( !open $fh,"<",$name ) + { + $log->warn("Unable to open file $name: $!"); + die "Unable to open file $name: $!\n"; + } + } + elsif( $srcType eq "sha1" || $srcType eq "sha1Or-k" ) + { + unless ( defined ( $name ) and $name =~ /^[a-zA-Z0-9]{40}$/ ) + { + $log->warn("Need filehash"); + die "Need filehash\n"; + } + + my $type = `git cat-file -t $name`; + chomp $type; + + unless ( defined ( $type ) and $type eq "blob" ) + { + $log->warn("Invalid type '$type' for '$name'"); + die ( "Invalid type '$type' (expected 'blob')" ) + } + + my $size = `git cat-file -s $name`; + chomp $size; + + $log->debug("open_blob_or_die($name) size=$size, type=$type"); + + unless( open $fh, '-|', "git", "cat-file", "blob", $name ) + { + $log->warn("Unable to open sha1 $name"); + die "Unable to open sha1 $name\n"; + } + } + else + { + $log->warn("Unknown type of blob source: $srcType"); + die "Unknown type of blob source: $srcType\n"; + } + return $fh; +} + # Generate a CVS author name from Git author information, by taking # the first eight characters of the user part of the email address. sub cvs_author -- cgit v1.2.3