Revision - e7cb0b4 - is_ntfs_dotgit: match other .git files

Revision e7cb0b4455c85b53aeba40f88ffddcf6d4002498 authored by Johannes Schindelin on 11 May 2018, 14:03:54 UTC, committed by Jeff King on 22 May 2018, 03:50:11 UTC

is_ntfs_dotgit: match other .git files

When we started to catch NTFS short names that clash with .git, we only
looked for GIT~1. This is sufficient because we only ever clone into an
empty directory, so .git is guaranteed to be the first subdirectory or
file in that directory.

However, even with a fresh clone, .gitmodules is *not* necessarily the
first file to be written that would want the NTFS short name GITMOD~1: a
malicious repository can add .gitmodul0000 and friends, which sorts
before `.gitmodules` and is therefore checked out *first*. For that
reason, we have to test not only for ~1 short names, but for others,
too.

It's hard to just adapt the existing checks in is_ntfs_dotgit(): since
Windows 2000 (i.e., in all Windows versions still supported by Git),
NTFS short names are only generated in the <prefix>~<number> form up to
number 4. After that, a *different* prefix is used, calculated from the
long file name using an undocumented, but stable algorithm.

For example, the short name of .gitmodules would be GITMOD~1, but if it
is taken, and all of ~2, ~3 and ~4 are taken, too, the short name
GI7EBA~1 will be used. From there, collisions are handled by
incrementing the number, shortening the prefix as needed (until ~9999999
is reached, in which case NTFS will not allow the file to be created).

We'd also want to handle .gitignore and .gitattributes, which suffer
from a similar problem, using the fall-back short names GI250A~1 and
GI7D29~1, respectively.

To accommodate for that, we could reimplement the hashing algorithm, but
it is just safer and simpler to provide the known prefixes. This
algorithm has been reverse-engineered and described at
https://usn.pw/blog/gen/2015/06/09/filenames/, which is defunct but
still available via https://web.archive.org/.

These can be recomputed by running the following Perl script:

-- snip --
use warnings;
use strict;

sub compute_short_name_hash ($) {
        my $checksum = 0;
        foreach (split('', $_[0])) {
                $checksum = ($checksum * 0x25 + ord($_)) & 0xffff;
        }

        $checksum = ($checksum * 314159269) & 0xffffffff;
        $checksum = 1 + (~$checksum & 0x7fffffff) if ($checksum & 0x80000000);
        $checksum -= (($checksum * 1152921497) >> 60) * 1000000007;

        return scalar reverse sprintf("%x", $checksum & 0xffff);
}

print compute_short_name_hash($ARGV[0]);
-- snap --

E.g., running that with the argument ".gitignore" will
result in "250a" (which then becomes "gi250a" in the code).

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Jeff King <peff@peff.net>

1 parent 0fc333b

Files
Changes

Permalinks

tag.c

#include "cache.h"
#include "tag.h"
#include "commit.h"
#include "tree.h"
#include "blob.h"
#include "gpg-interface.h"

const char *tag_type = "tag";

static int run_gpg_verify(const char *buf, unsigned long size, unsigned flags)
{
	struct signature_check sigc;
	size_t payload_size;
	int ret;

	memset(&sigc, 0, sizeof(sigc));

	payload_size = parse_signature(buf, size);

	if (size == payload_size) {
		if (flags & GPG_VERIFY_VERBOSE)
			write_in_full(1, buf, payload_size);
		return error("no signature found");
	}

	ret = check_signature(buf, payload_size, buf + payload_size,
				size - payload_size, &sigc);

	if (!(flags & GPG_VERIFY_OMIT_STATUS))
		print_signature_buffer(&sigc, flags);

	signature_check_clear(&sigc);
	return ret;
}

int gpg_verify_tag(const unsigned char *sha1, const char *name_to_report,
		unsigned flags)
{
	enum object_type type;
	char *buf;
	unsigned long size;
	int ret;

	type = sha1_object_info(sha1, NULL);
	if (type != OBJ_TAG)
		return error("%s: cannot verify a non-tag object of type %s.",
				name_to_report ?
				name_to_report :
				find_unique_abbrev(sha1, DEFAULT_ABBREV),
				typename(type));

	buf = read_sha1_file(sha1, &type, &size);
	if (!buf)
		return error("%s: unable to read file.",
				name_to_report ?
				name_to_report :
				find_unique_abbrev(sha1, DEFAULT_ABBREV));

	ret = run_gpg_verify(buf, size, flags);

	free(buf);
	return ret;
}

struct object *deref_tag(struct object *o, const char *warn, int warnlen)
{
	while (o && o->type == OBJ_TAG)
		if (((struct tag *)o)->tagged)
			o = parse_object(((struct tag *)o)->tagged->oid.hash);
		else
			o = NULL;
	if (!o && warn) {
		if (!warnlen)
			warnlen = strlen(warn);
		error("missing object referenced by '%.*s'", warnlen, warn);
	}
	return o;
}

struct object *deref_tag_noverify(struct object *o)
{
	while (o && o->type == OBJ_TAG) {
		o = parse_object(o->oid.hash);
		if (o && o->type == OBJ_TAG && ((struct tag *)o)->tagged)
			o = ((struct tag *)o)->tagged;
		else
			o = NULL;
	}
	return o;
}

struct tag *lookup_tag(const unsigned char *sha1)
{
	struct object *obj = lookup_object(sha1);
	if (!obj)
		return create_object(sha1, alloc_tag_node());
	return object_as_type(obj, OBJ_TAG, 0);
}

static unsigned long parse_tag_date(const char *buf, const char *tail)
{
	const char *dateptr;

	while (buf < tail && *buf++ != '>')
		/* nada */;
	if (buf >= tail)
		return 0;
	dateptr = buf;
	while (buf < tail && *buf++ != '\n')
		/* nada */;
	if (buf >= tail)
		return 0;
	/* dateptr < buf && buf[-1] == '\n', so strtoul will stop at buf-1 */
	return strtoul(dateptr, NULL, 10);
}

int parse_tag_buffer(struct tag *item, const void *data, unsigned long size)
{
	unsigned char sha1[20];
	char type[20];
	const char *bufptr = data;
	const char *tail = bufptr + size;
	const char *nl;

	if (item->object.parsed)
		return 0;
	item->object.parsed = 1;

	if (size < 64)
		return -1;
	if (memcmp("object ", bufptr, 7) || get_sha1_hex(bufptr + 7, sha1) || bufptr[47] != '\n')
		return -1;
	bufptr += 48; /* "object " + sha1 + "\n" */

	if (!starts_with(bufptr, "type "))
		return -1;
	bufptr += 5;
	nl = memchr(bufptr, '\n', tail - bufptr);
	if (!nl || sizeof(type) <= (nl - bufptr))
		return -1;
	memcpy(type, bufptr, nl - bufptr);
	type[nl - bufptr] = '\0';
	bufptr = nl + 1;

	if (!strcmp(type, blob_type)) {
		item->tagged = &lookup_blob(sha1)->object;
	} else if (!strcmp(type, tree_type)) {
		item->tagged = &lookup_tree(sha1)->object;
	} else if (!strcmp(type, commit_type)) {
		item->tagged = &lookup_commit(sha1)->object;
	} else if (!strcmp(type, tag_type)) {
		item->tagged = &lookup_tag(sha1)->object;
	} else {
		error("Unknown type %s", type);
		item->tagged = NULL;
	}

	if (bufptr + 4 < tail && starts_with(bufptr, "tag "))
		; 		/* good */
	else
		return -1;
	bufptr += 4;
	nl = memchr(bufptr, '\n', tail - bufptr);
	if (!nl)
		return -1;
	item->tag = xmemdupz(bufptr, nl - bufptr);
	bufptr = nl + 1;

	if (bufptr + 7 < tail && starts_with(bufptr, "tagger "))
		item->date = parse_tag_date(bufptr, tail);
	else
		item->date = 0;

	return 0;
}

int parse_tag(struct tag *item)
{
	enum object_type type;
	void *data;
	unsigned long size;
	int ret;

	if (item->object.parsed)
		return 0;
	data = read_sha1_file(item->object.oid.hash, &type, &size);
	if (!data)
		return error("Could not read %s",
			     oid_to_hex(&item->object.oid));
	if (type != OBJ_TAG) {
		free(data);
		return error("Object %s not a tag",
			     oid_to_hex(&item->object.oid));
	}
	ret = parse_tag_buffer(item, data, size);
	free(data);
	return ret;
}

Showing with 0 additions and 0 deletions (0 / 0 diffs computed)

Computing file changes ...