Revision 1335d76e4569fa84e52dc24c88c04daeae6e160e authored by Junio C Hamano on 08 July 2016, 17:59:15 UTC, committed by Junio C Hamano on 12 July 2016, 20:06:43 UTC
When merge_recursive() decides what the correct blob object merge
result for a path should be, it uses update_file_flags() helper
function to write it out to a working tree file and then calls
add_cacheinfo().  The add_cacheinfo() function in turn calls
make_cache_entry() to create a new cache entry to replace the
higher-stage entries for the path that represents the conflict.

The make_cache_entry() function calls refresh_cache_entry() to fill
in the cached stat information.  To mark a cache entry as
up-to-date, the data is re-read from the file in the working tree,
and goes through convert_to_git() conversion to be compared with the
blob object name the new cache entry records.

It is important to note that this happens while the higher-stage
entries, which are going to be replaced with the new entry, are
still in the index.  Unfortunately, the convert_to_git() conversion
has a misguided "safer crlf" mechanism baked in, and looks at the
existing cache entry for the path to decide how to convert the
contents in the working tree file.  If our side (i.e. stage#2)
records a text blob with CRLF in it, even when the system is
configured to record LF in blobs and convert them to CRLF upon
checkout (and back to LF upon checkin), the "safer crlf" mechanism
stops us doing so.

This especially poses a problem during a renormalizing merge, where
the merge result for the path is computed by first "normalizing" the
blobs involved in the merge by using convert_to_working_tree()
followed by convert_to_git() with "safer crlf" disabled.  The merge
result that is computed correctly and fed to add_cacheinfo() via
update_file_flags() does _not_ match what refresh_cache_entry() sees
by converting the working tree file via convert_to_git().

We can work this around by not refreshing the new cache entry in
make_cache_entry() called by add_cacheinfo().  After add_cacheinfo()
adds the new entry, we can call refresh_cache_entry() on that,
knowing that addition of this new cache entry would have removed the
stale cache entries that had CRLF in stage #2 that were carried over
before the renormalizing merge started and will not interfere with
the correct recording of the result.

The test update was taken from a series by Torsten Bögershausen
that attempted to fix this with a different approach.

Signed-off-by: Torsten Bögershausen <tboegi@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Reviewed-by: Torsten Bögershausen <tboegi@web.de>
1 parent 6523728
Raw File
commit-slab.h
#ifndef COMMIT_SLAB_H
#define COMMIT_SLAB_H

/*
 * define_commit_slab(slabname, elemtype) creates boilerplate code to define
 * a new struct (struct slabname) that is used to associate a piece of data
 * of elemtype to commits, and a few functions to use that struct.
 *
 * After including this header file, using:
 *
 * define_commit_slab(indegee, int);
 *
 * will let you call the following functions:
 *
 * - int *indegree_at(struct indegree *, struct commit *);
 *
 *   This function locates the data associated with the given commit in
 *   the indegree slab, and returns the pointer to it.  The location to
 *   store the data is allocated as necessary.
 *
 * - int *indegree_peek(struct indegree *, struct commit *);
 *
 *   This function is similar to indegree_at(), but it will return NULL
 *   until a call to indegree_at() was made for the commit.
 *
 * - void init_indegree(struct indegree *);
 *   void init_indegree_with_stride(struct indegree *, int);
 *
 *   Initializes the indegree slab that associates an array of integers
 *   to each commit. 'stride' specifies how big each array is.  The slab
 *   that is initialized by the variant without "_with_stride" associates
 *   each commit with an array of one integer.
 *
 * - void clear_indegree(struct indegree *);
 *
 *   Empties the slab.  The slab can be reused with the same stride
 *   without calling init_indegree() again or can be reconfigured to a
 *   different stride by calling init_indegree_with_stride().
 *
 *   Call this function before the slab falls out of scope to avoid
 *   leaking memory.
 */

/* allocate ~512kB at once, allowing for malloc overhead */
#ifndef COMMIT_SLAB_SIZE
#define COMMIT_SLAB_SIZE (512*1024-32)
#endif

#define MAYBE_UNUSED __attribute__((__unused__))

#define define_commit_slab(slabname, elemtype) 				\
									\
struct slabname {							\
	unsigned slab_size;						\
	unsigned stride;						\
	unsigned slab_count;						\
	elemtype **slab;						\
};									\
static int stat_ ##slabname## realloc;					\
									\
static MAYBE_UNUSED void init_ ##slabname## _with_stride(struct slabname *s, \
						   unsigned stride)	\
{									\
	unsigned int elem_size;						\
	if (!stride)							\
		stride = 1;						\
	s->stride = stride;						\
	elem_size = sizeof(elemtype) * stride;				\
	s->slab_size = COMMIT_SLAB_SIZE / elem_size;			\
	s->slab_count = 0;						\
	s->slab = NULL;							\
}									\
									\
static MAYBE_UNUSED void init_ ##slabname(struct slabname *s)		\
{									\
	init_ ##slabname## _with_stride(s, 1);				\
}									\
									\
static MAYBE_UNUSED void clear_ ##slabname(struct slabname *s)		\
{									\
	int i;								\
	for (i = 0; i < s->slab_count; i++)				\
		free(s->slab[i]);					\
	s->slab_count = 0;						\
	free(s->slab);							\
	s->slab = NULL;							\
}									\
									\
static MAYBE_UNUSED elemtype *slabname## _at_peek(struct slabname *s,	\
						  const struct commit *c, \
						  int add_if_missing)   \
{									\
	int nth_slab, nth_slot;						\
									\
	nth_slab = c->index / s->slab_size;				\
	nth_slot = c->index % s->slab_size;				\
									\
	if (s->slab_count <= nth_slab) {				\
		int i;							\
		if (!add_if_missing)					\
			return NULL;					\
		REALLOC_ARRAY(s->slab, nth_slab + 1);			\
		stat_ ##slabname## realloc++;				\
		for (i = s->slab_count; i <= nth_slab; i++)		\
			s->slab[i] = NULL;				\
		s->slab_count = nth_slab + 1;				\
	}								\
	if (!s->slab[nth_slab]) {					\
		if (!add_if_missing)					\
			return NULL;					\
		s->slab[nth_slab] = xcalloc(s->slab_size,		\
					    sizeof(**s->slab) * s->stride);		\
	}								\
	return &s->slab[nth_slab][nth_slot * s->stride];		\
}									\
									\
static MAYBE_UNUSED elemtype *slabname## _at(struct slabname *s,	\
					     const struct commit *c)	\
{									\
	return slabname##_at_peek(s, c, 1);				\
}									\
									\
static MAYBE_UNUSED elemtype *slabname## _peek(struct slabname *s,	\
					     const struct commit *c)	\
{									\
	return slabname##_at_peek(s, c, 0);				\
}									\
									\
static int stat_ ##slabname## realloc

/*
 * Note that this seemingly redundant second declaration is required
 * to allow a terminating semicolon, which makes instantiations look
 * like function declarations.  I.e., the expansion of
 *
 *    define_commit_slab(indegree, int);
 *
 * ends in 'static int stat_indegreerealloc;'.  This would otherwise
 * be a syntax error according (at least) to ISO C.  It's hard to
 * catch because GCC silently parses it by default.
 */

/*
 * Statically initialize a commit slab named "var". Note that this
 * evaluates "stride" multiple times! Example:
 *
 *   struct indegree indegrees = COMMIT_SLAB_INIT(1, indegrees);
 *
 */
#define COMMIT_SLAB_INIT(stride, var) { \
	COMMIT_SLAB_SIZE / sizeof(**((var).slab)) / (stride), \
	(stride), 0, NULL \
}

#endif /* COMMIT_SLAB_H */
back to top