Revision 9cf85473209ea8ae2b56c13145c4704d12ee1374 authored by Filip Hejsek on 28 January 2024, 04:09:17 UTC, committed by Johannes Schindelin on 17 April 2024, 20:30:01 UTC
While it is expected to have several git dirs within the `.git/modules/`
tree, it is important that they do not interfere with each other. For
example, if one submodule was called "captain" and another submodule
"captain/hooks", their respective git dirs would clash, as they would be
located in `.git/modules/captain/` and `.git/modules/captain/hooks/`,
respectively, i.e. the latter's files could clash with the actual Git
hooks of the former.

To prevent these clashes, and in particular to prevent hooks from being
written and then executed as part of a recursive clone, we introduced
checks as part of the fix for CVE-2019-1387 in a8dee3ca61 (Disallow
dubiously-nested submodule git directories, 2019-10-01).

It is currently possible to bypass the check for clashing submodule
git dirs in two ways:

1. parallel cloning
2. checkout --recurse-submodules

Let's check not only before, but also after parallel cloning (and before
checking out the submodule), that the git dir is not clashing with
another one, otherwise fail. This addresses the parallel cloning issue.

As to the parallel checkout issue: It requires quite a few manual steps
to create clashing git dirs because Git itself would refuse to
initialize the inner one, as demonstrated by the test case.

Nevertheless, let's teach the recursive checkout (namely, the
`submodule_move_head()` function that is used by the recursive checkout)
to be careful to verify that it does not use a clashing git dir, and if
it does, disable it (by deleting the `HEAD` file so that subsequent Git
calls won't recognize it as a git dir anymore).

Note: The parallel cloning test case contains a `cat err` that proved to
be highly useful when analyzing the racy nature of the operation (the
operation can fail with three different error messages, depending on
timing), and was left on purpose to ease future debugging should the
need arise.

Signed-off-by: Filip Hejsek <filip.hejsek@gmail.com>
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
1 parent b20c10f
Raw File
mktree.c
/*
 * GIT - the stupid content tracker
 *
 * Copyright (c) Junio C Hamano, 2006, 2009
 */
#include "builtin.h"
#include "quote.h"
#include "tree.h"
#include "parse-options.h"
#include "object-store.h"

static struct treeent {
	unsigned mode;
	struct object_id oid;
	int len;
	char name[FLEX_ARRAY];
} **entries;
static int alloc, used;

static void append_to_tree(unsigned mode, struct object_id *oid, char *path)
{
	struct treeent *ent;
	size_t len = strlen(path);
	if (strchr(path, '/'))
		die("path %s contains slash", path);

	FLEX_ALLOC_MEM(ent, name, path, len);
	ent->mode = mode;
	ent->len = len;
	oidcpy(&ent->oid, oid);

	ALLOC_GROW(entries, used + 1, alloc);
	entries[used++] = ent;
}

static int ent_compare(const void *a_, const void *b_)
{
	struct treeent *a = *(struct treeent **)a_;
	struct treeent *b = *(struct treeent **)b_;
	return base_name_compare(a->name, a->len, a->mode,
				 b->name, b->len, b->mode);
}

static void write_tree(struct object_id *oid)
{
	struct strbuf buf;
	size_t size;
	int i;

	QSORT(entries, used, ent_compare);
	for (size = i = 0; i < used; i++)
		size += 32 + entries[i]->len;

	strbuf_init(&buf, size);
	for (i = 0; i < used; i++) {
		struct treeent *ent = entries[i];
		strbuf_addf(&buf, "%o %s%c", ent->mode, ent->name, '\0');
		strbuf_add(&buf, ent->oid.hash, the_hash_algo->rawsz);
	}

	write_object_file(buf.buf, buf.len, OBJ_TREE, oid);
	strbuf_release(&buf);
}

static const char *mktree_usage[] = {
	"git mktree [-z] [--missing] [--batch]",
	NULL
};

static void mktree_line(char *buf, int nul_term_line, int allow_missing)
{
	char *ptr, *ntr;
	const char *p;
	unsigned mode;
	enum object_type mode_type; /* object type derived from mode */
	enum object_type obj_type; /* object type derived from sha */
	struct object_info oi = OBJECT_INFO_INIT;
	char *path, *to_free = NULL;
	struct object_id oid;

	ptr = buf;
	/*
	 * Read non-recursive ls-tree output format:
	 *     mode SP type SP sha1 TAB name
	 */
	mode = strtoul(ptr, &ntr, 8);
	if (ptr == ntr || !ntr || *ntr != ' ')
		die("input format error: %s", buf);
	ptr = ntr + 1; /* type */
	ntr = strchr(ptr, ' ');
	if (!ntr || parse_oid_hex(ntr + 1, &oid, &p) ||
	    *p != '\t')
		die("input format error: %s", buf);

	/* It is perfectly normal if we do not have a commit from a submodule */
	if (S_ISGITLINK(mode))
		allow_missing = 1;


	*ntr++ = 0; /* now at the beginning of SHA1 */

	path = (char *)p + 1;  /* at the beginning of name */
	if (!nul_term_line && path[0] == '"') {
		struct strbuf p_uq = STRBUF_INIT;
		if (unquote_c_style(&p_uq, path, NULL))
			die("invalid quoting");
		path = to_free = strbuf_detach(&p_uq, NULL);
	}

	/*
	 * Object type is redundantly derivable three ways.
	 * These should all agree.
	 */
	mode_type = object_type(mode);
	if (mode_type != type_from_string(ptr)) {
		die("entry '%s' object type (%s) doesn't match mode type (%s)",
			path, ptr, type_name(mode_type));
	}

	/* Check the type of object identified by oid without fetching objects */
	oi.typep = &obj_type;
	if (oid_object_info_extended(the_repository, &oid, &oi,
				     OBJECT_INFO_LOOKUP_REPLACE |
				     OBJECT_INFO_QUICK |
				     OBJECT_INFO_SKIP_FETCH_OBJECT) < 0)
		obj_type = -1;

	if (obj_type < 0) {
		if (allow_missing) {
			; /* no problem - missing objects are presumed to be of the right type */
		} else {
			die("entry '%s' object %s is unavailable", path, oid_to_hex(&oid));
		}
	} else {
		if (obj_type != mode_type) {
			/*
			 * The object exists but is of the wrong type.
			 * This is a problem regardless of allow_missing
			 * because the new tree entry will never be correct.
			 */
			die("entry '%s' object %s is a %s but specified type was (%s)",
				path, oid_to_hex(&oid), type_name(obj_type), type_name(mode_type));
		}
	}

	append_to_tree(mode, &oid, path);
	free(to_free);
}

int cmd_mktree(int ac, const char **av, const char *prefix)
{
	struct strbuf sb = STRBUF_INIT;
	struct object_id oid;
	int nul_term_line = 0;
	int allow_missing = 0;
	int is_batch_mode = 0;
	int got_eof = 0;
	strbuf_getline_fn getline_fn;

	const struct option option[] = {
		OPT_BOOL('z', NULL, &nul_term_line, N_("input is NUL terminated")),
		OPT_SET_INT( 0 , "missing", &allow_missing, N_("allow missing objects"), 1),
		OPT_SET_INT( 0 , "batch", &is_batch_mode, N_("allow creation of more than one tree"), 1),
		OPT_END()
	};

	ac = parse_options(ac, av, prefix, option, mktree_usage, 0);
	getline_fn = nul_term_line ? strbuf_getline_nul : strbuf_getline_lf;

	while (!got_eof) {
		while (1) {
			if (getline_fn(&sb, stdin) == EOF) {
				got_eof = 1;
				break;
			}
			if (sb.buf[0] == '\0') {
				/* empty lines denote tree boundaries in batch mode */
				if (is_batch_mode)
					break;
				die("input format error: (blank line only valid in batch mode)");
			}
			mktree_line(sb.buf, nul_term_line, allow_missing);
		}
		if (is_batch_mode && got_eof && used < 1) {
			/*
			 * Execution gets here if the last tree entry is terminated with a
			 * new-line.  The final new-line has been made optional to be
			 * consistent with the original non-batch behaviour of mktree.
			 */
			; /* skip creating an empty tree */
		} else {
			write_tree(&oid);
			puts(oid_to_hex(&oid));
			fflush(stdout);
		}
		used=0; /* reset tree entry buffer for re-use in batch mode */
	}
	strbuf_release(&sb);
	return 0;
}
back to top