Revision 3ba2e8653c88d220b0b22f35260477bb0afa7d7b authored by Johannes Sixt on 16 March 2011, 08:18:49 UTC, committed by Junio C Hamano on 17 March 2011, 21:54:11 UTC
'git stash create' must operate with a temporary index. For this purpose,
it used 'cp -p' to create a copy. -p is needed to preserve the timestamp
of the index file. Now Jakob Pfender reported a certain combination of
a Linux NFS client, OpenBSD NFS server, and cp implementation where this
operation failed.

Luckily, the first operation in git-stash after copying the index is to
call 'git read-tree'. Therefore, use --index-output instead of 'cp -p'
to write the copy of the index.

--index-output requires that the specified file is on the same volume as
the source index, so that the lock file can be rename()d. For this reason,
the name of the temporary index is constructed in a way different from the
other temporary files. The code path of 'stash -p' also needs a temporary
index, but we do not use the new name because it does not depend on the
same precondition as --index-output.

Signed-off-by: Johannes Sixt <j6t@kdbg.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
1 parent 23a32ff
Raw File
levenshtein.c
#include "cache.h"
#include "levenshtein.h"

/*
 * This function implements the Damerau-Levenshtein algorithm to
 * calculate a distance between strings.
 *
 * Basically, it says how many letters need to be swapped, substituted,
 * deleted from, or added to string1, at least, to get string2.
 *
 * The idea is to build a distance matrix for the substrings of both
 * strings.  To avoid a large space complexity, only the last three rows
 * are kept in memory (if swaps had the same or higher cost as one deletion
 * plus one insertion, only two rows would be needed).
 *
 * At any stage, "i + 1" denotes the length of the current substring of
 * string1 that the distance is calculated for.
 *
 * row2 holds the current row, row1 the previous row (i.e. for the substring
 * of string1 of length "i"), and row0 the row before that.
 *
 * In other words, at the start of the big loop, row2[j + 1] contains the
 * Damerau-Levenshtein distance between the substring of string1 of length
 * "i" and the substring of string2 of length "j + 1".
 *
 * All the big loop does is determine the partial minimum-cost paths.
 *
 * It does so by calculating the costs of the path ending in characters
 * i (in string1) and j (in string2), respectively, given that the last
 * operation is a substitution, a swap, a deletion, or an insertion.
 *
 * This implementation allows the costs to be weighted:
 *
 * - w (as in "sWap")
 * - s (as in "Substitution")
 * - a (for insertion, AKA "Add")
 * - d (as in "Deletion")
 *
 * Note that this algorithm calculates a distance _iff_ d == a.
 */
int levenshtein(const char *string1, const char *string2,
		int w, int s, int a, int d)
{
	int len1 = strlen(string1), len2 = strlen(string2);
	int *row0 = xmalloc(sizeof(int) * (len2 + 1));
	int *row1 = xmalloc(sizeof(int) * (len2 + 1));
	int *row2 = xmalloc(sizeof(int) * (len2 + 1));
	int i, j;

	for (j = 0; j <= len2; j++)
		row1[j] = j * a;
	for (i = 0; i < len1; i++) {
		int *dummy;

		row2[0] = (i + 1) * d;
		for (j = 0; j < len2; j++) {
			/* substitution */
			row2[j + 1] = row1[j] + s * (string1[i] != string2[j]);
			/* swap */
			if (i > 0 && j > 0 && string1[i - 1] == string2[j] &&
					string1[i] == string2[j - 1] &&
					row2[j + 1] > row0[j - 1] + w)
				row2[j + 1] = row0[j - 1] + w;
			/* deletion */
			if (row2[j + 1] > row1[j + 1] + d)
				row2[j + 1] = row1[j + 1] + d;
			/* insertion */
			if (row2[j + 1] > row2[j] + a)
				row2[j + 1] = row2[j] + a;
		}

		dummy = row0;
		row0 = row1;
		row1 = row2;
		row2 = dummy;
	}

	i = row1[len2];
	free(row0);
	free(row1);
	free(row2);

	return i;
}
back to top