Revision f6a1e1e288d13472f5f7fe2b907bb8c0bd69a018 authored by Jim Hill on 18 May 2015, 00:41:45 UTC, committed by Junio C Hamano on 18 May 2015, 17:15:20 UTC
`git add` of an empty file with a filter pops complaints from
`copy_fd` about a bad file descriptor.

This traces back to these lines in sha1_file.c:index_core:

	if (!size) {
		ret = index_mem(sha1, NULL, size, type, path, flags);

The problem here is that content to be added to the index can be
supplied from an fd, or from a memory buffer, or from a pathname. This
call is supplying a NULL buffer pointer and a zero size.

Downstream logic takes the complete absence of a buffer to mean the
data is to be found elsewhere -- for instance, these, from convert.c:

	if (params->src) {
		write_err = (write_in_full(child_process.in, params->src, params->size) < 0);
	} else {
		write_err = copy_fd(params->fd, child_process.in);
	}

~If there's a buffer, write from that, otherwise the data must be coming
from an open fd.~

Perfectly reasonable logic in a routine that's going to write from
either a buffer or an fd.

So change `index_core` to supply an empty buffer when indexing an empty
file.

There's a patch out there that instead changes the logic quoted above to
take a `-1` fd to mean "use the buffer", but it seems to me that the
distinction between a missing buffer and an empty one carries intrinsic
semantics, where the logic change is adapting the code to handle
incorrect arguments.

Signed-off-by: Jim Hill <gjthill@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
1 parent 282616c
Raw File
pack-objects.c
#include "cache.h"
#include "object.h"
#include "pack.h"
#include "pack-objects.h"

static uint32_t locate_object_entry_hash(struct packing_data *pdata,
					 const unsigned char *sha1,
					 int *found)
{
	uint32_t i, hash, mask = (pdata->index_size - 1);

	memcpy(&hash, sha1, sizeof(uint32_t));
	i = hash & mask;

	while (pdata->index[i] > 0) {
		uint32_t pos = pdata->index[i] - 1;

		if (!hashcmp(sha1, pdata->objects[pos].idx.sha1)) {
			*found = 1;
			return i;
		}

		i = (i + 1) & mask;
	}

	*found = 0;
	return i;
}

static inline uint32_t closest_pow2(uint32_t v)
{
	v = v - 1;
	v |= v >> 1;
	v |= v >> 2;
	v |= v >> 4;
	v |= v >> 8;
	v |= v >> 16;
	return v + 1;
}

static void rehash_objects(struct packing_data *pdata)
{
	uint32_t i;
	struct object_entry *entry;

	pdata->index_size = closest_pow2(pdata->nr_objects * 3);
	if (pdata->index_size < 1024)
		pdata->index_size = 1024;

	free(pdata->index);
	pdata->index = xcalloc(pdata->index_size, sizeof(*pdata->index));

	entry = pdata->objects;

	for (i = 0; i < pdata->nr_objects; i++) {
		int found;
		uint32_t ix = locate_object_entry_hash(pdata, entry->idx.sha1, &found);

		if (found)
			die("BUG: Duplicate object in hash");

		pdata->index[ix] = i + 1;
		entry++;
	}
}

struct object_entry *packlist_find(struct packing_data *pdata,
				   const unsigned char *sha1,
				   uint32_t *index_pos)
{
	uint32_t i;
	int found;

	if (!pdata->index_size)
		return NULL;

	i = locate_object_entry_hash(pdata, sha1, &found);

	if (index_pos)
		*index_pos = i;

	if (!found)
		return NULL;

	return &pdata->objects[pdata->index[i] - 1];
}

struct object_entry *packlist_alloc(struct packing_data *pdata,
				    const unsigned char *sha1,
				    uint32_t index_pos)
{
	struct object_entry *new_entry;

	if (pdata->nr_objects >= pdata->nr_alloc) {
		pdata->nr_alloc = (pdata->nr_alloc  + 1024) * 3 / 2;
		pdata->objects = xrealloc(pdata->objects,
					  pdata->nr_alloc * sizeof(*new_entry));
	}

	new_entry = pdata->objects + pdata->nr_objects++;

	memset(new_entry, 0, sizeof(*new_entry));
	hashcpy(new_entry->idx.sha1, sha1);

	if (pdata->index_size * 3 <= pdata->nr_objects * 4)
		rehash_objects(pdata);
	else
		pdata->index[index_pos] = pdata->nr_objects;

	return new_entry;
}
back to top