https://github.com/git/git
Revision 9de0834663f28bee9e6b2c4647ed6128241ed26f authored by Linus Torvalds on 17 October 2006, 02:58:54 UTC, committed by Junio C Hamano on 17 October 2006, 04:27:44 UTC
Jim Mayering noticed that xdiff library took insanely long time
when comparing files with many identical lines.

This was because the hash function used in the library is broken
on 64-bit architectures and caused too many collisions.

http://thread.gmane.org/gmane.comp.version-control.git/28962/focus=28994

Acked-by: Davide Libenzi <davidel@xmaliserver.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
1 parent 6fe5b7f
Raw File
Tip revision: 9de0834663f28bee9e6b2c4647ed6128241ed26f authored by Linus Torvalds on 17 October 2006, 02:58:54 UTC
Fix hash function in xdiff library
Tip revision: 9de0834
git-rerere.perl
#!/usr/bin/perl
#
# REuse REcorded REsolve.  This tool records a conflicted automerge
# result and its hand resolution, and helps to resolve future
# automerge that results in the same conflict.
#
# To enable this feature, create a directory 'rr-cache' under your
# .git/ directory.

use Digest;
use File::Path;
use File::Copy;

my $git_dir = $::ENV{GIT_DIR} || ".git";
my $rr_dir = "$git_dir/rr-cache";
my $merge_rr = "$git_dir/rr-cache/MERGE_RR";

my %merge_rr = ();

sub read_rr {
	if (!-f $merge_rr) {
		%merge_rr = ();
		return;
	}
	my $in;
	local $/ = "\0";
	open $in, "<$merge_rr" or die "$!: $merge_rr";
	while (<$in>) {
		chomp;
		my ($name, $path) = /^([0-9a-f]{40})\t(.*)$/s;
		$merge_rr{$path} = $name;
	}
	close $in;
}

sub write_rr {
	my $out;
	open $out, ">$merge_rr" or die "$!: $merge_rr";
	for my $path (sort keys %merge_rr) {
		my $name = $merge_rr{$path};
		print $out "$name\t$path\0";
	}
	close $out;
}

sub compute_conflict_name {
	my ($path) = @_;
	my @side = ();
	my $in;
	open $in, "<$path"  or die "$!: $path";

	my $sha1 = Digest->new("SHA-1");
	my $hunk = 0;
	while (<$in>) {
		if (/^<<<<<<< .*/) {
			$hunk++;
			@side = ([], undef);
		}
		elsif (/^=======$/) {
			$side[1] = [];
		}
		elsif (/^>>>>>>> .*/) {
			my ($one, $two);
			$one = join('', @{$side[0]});
			$two = join('', @{$side[1]});
			if ($two le $one) {
				($one, $two) = ($two, $one);
			}
			$sha1->add($one);
			$sha1->add("\0");
			$sha1->add($two);
			$sha1->add("\0");
			@side = ();
		}
		elsif (@side == 0) {
			next;
		}
		elsif (defined $side[1]) {
			push @{$side[1]}, $_;
		}
		else {
			push @{$side[0]}, $_;
		}
	}
	close $in;
	return ($sha1->hexdigest, $hunk);
}

sub record_preimage {
	my ($path, $name) = @_;
	my @side = ();
	my ($in, $out);
	open $in, "<$path"  or die "$!: $path";
	open $out, ">$name" or die "$!: $name";

	while (<$in>) {
		if (/^<<<<<<< .*/) {
			@side = ([], undef);
		}
		elsif (/^=======$/) {
			$side[1] = [];
		}
		elsif (/^>>>>>>> .*/) {
			my ($one, $two);
			$one = join('', @{$side[0]});
			$two = join('', @{$side[1]});
			if ($two le $one) {
				($one, $two) = ($two, $one);
			}
			print $out "<<<<<<<\n";
			print $out $one;
			print $out "=======\n";
			print $out $two;
			print $out ">>>>>>>\n";
			@side = ();
		}
		elsif (@side == 0) {
			print $out $_;
		}
		elsif (defined $side[1]) {
			push @{$side[1]}, $_;
		}
		else {
			push @{$side[0]}, $_;
		}
	}
	close $out;
	close $in;
}

sub find_conflict {
	my $in;
	local $/ = "\0";
	my $pid = open($in, '-|');
	die "$!" unless defined $pid;
	if (!$pid) {
		exec(qw(git ls-files -z -u)) or die "$!: ls-files";
	}
	my %path = ();
	my @path = ();
	while (<$in>) {
		chomp;
		my ($mode, $sha1, $stage, $path) =
		    /^([0-7]+) ([0-9a-f]{40}) ([123])\t(.*)$/s;
		$path{$path} |= (1 << $stage);
	}
	close $in;
	while (my ($path, $status) = each %path) {
		if ($status == 14) { push @path, $path; }
	}
	return @path;
}

sub merge {
	my ($name, $path) = @_;
	record_preimage($path, "$rr_dir/$name/thisimage");
	unless (system('merge', map { "$rr_dir/$name/${_}image" }
		       qw(this pre post))) {
		my $in;
		open $in, "<$rr_dir/$name/thisimage" or
		    die "$!: $name/thisimage";
		my $out;
		open $out, ">$path" or die "$!: $path";
		while (<$in>) { print $out $_; }
		close $in;
		close $out;
		return 1;
	}
	return 0;
}

-d "$rr_dir" || exit(0);

read_rr();
my %conflict = map { $_ => 1 } find_conflict();

# MERGE_RR records paths with conflicts immediately after merge
# failed.  Some of the conflicted paths might have been hand resolved
# in the working tree since then, but the initial run would catch all
# and register their preimages.

for my $path (keys %conflict) {
	# This path has conflict.  If it is not recorded yet,
	# record the pre-image.
	if (!exists $merge_rr{$path}) {
		my ($name, $hunk) = compute_conflict_name($path);
		next unless ($hunk);
		$merge_rr{$path} = $name;
		if (! -d "$rr_dir/$name") {
			mkpath("$rr_dir/$name", 0, 0777);
			print STDERR "Recorded preimage for '$path'\n";
			record_preimage($path, "$rr_dir/$name/preimage");
		}
	}
}

# Now some of the paths that had conflicts earlier might have been
# hand resolved.  Others may be similar to a conflict already that
# was resolved before.

for my $path (keys %merge_rr) {
	my $name = $merge_rr{$path};

	# We could resolve this automatically if we have images.
	if (-f "$rr_dir/$name/preimage" &&
	    -f "$rr_dir/$name/postimage") {
		if (merge($name, $path)) {
			print STDERR "Resolved '$path' using previous resolution.\n";
			# Then we do not have to worry about this path
			# anymore.
			delete $merge_rr{$path};
			next;
		}
	}

	# Let's see if we have resolved it.
	(undef, my $hunk) = compute_conflict_name($path);
	next if ($hunk);

	print STDERR "Recorded resolution for '$path'.\n";
	copy($path, "$rr_dir/$name/postimage");
	# And we do not have to worry about this path anymore.
	delete $merge_rr{$path};
}

# Write out the rest.
write_rr();
back to top