Revision e53e6b4433f264250c2e586167caf61721b0185c authored by Brian Downing on 11 June 2010, 02:59:07 UTC, committed by Junio C Hamano on 18 June 2010, 15:06:18 UTC
When traversing trees with an index, the current index pointer
(o->cache_bottom) occasionally has to be temporarily advanced forwards to
match the traversal order of the tree, which is not the same as the sort
order of the index.  The existing algorithm that did this (introduced in
730f72840cc50c523fe4cdd796ea2d2fc4571a28) would get "stuck" when the
cache_bottom was popped and then repeatedly check the same index entries
over and over.  This represents a serious performance regression for
large repositories compared to the old "broken" traversal order.

This commit makes a simple change to mitigate this.  Whenever
find_cache_pos sees that the current pos is also the cache_bottom, and
it has already been unpacked, it advances the cache_bottom as well as
the current pos.  This prevents the above "sticking" behavior without
dramatically changing the algorithm.

In addition, this commit moves the unpacked check above the
ce_in_traverse_path() check.  The simple bitmask check is cheaper, and
in the case described above will be firing quite a bit to advance the
cache_bottom after a tree pop.

This yields considerable performance improvements for large trees.
The following are the number of function calls for "git diff HEAD" on
the Linux kernel tree, with 33,307 files:

   Symbol               Calls Before   Calls After
   -------------------  ------------   -----------
   unpack_callback            35,332        35,332
   find_cache_pos             37,357        37,357
   ce_in_traverse_path     4,979,473        37,357
   do_compare_entry        6,828,181       251,925
   df_name_compare         6,828,181       251,925

And on a repository of 187,456 files:

   Symbol               Calls Before   Calls After
   -------------------  ------------   -----------
   unpack_callback           197,958       197,958
   find_cache_pos            208,460       208,460
   ce_in_traverse_path    37,308,336       208,460
   do_compare_entry      156,950,469     2,690,626
   df_name_compare       156,950,469     2,690,626

On the latter repository, user time for "git diff HEAD" was reduced from
5.58 to 0.42 seconds.  This is compared to 0.30 seconds before the
traversal order fix was implemented.

Signed-off-by: Brian Downing <bdowning@lavos.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
1 parent 2543d9b
Raw File
pthread.c
/*
 * Copyright (C) 2009 Andrzej K. Haczewski <ahaczewski@gmail.com>
 *
 * DISCLAIMER: The implementation is Git-specific, it is subset of original
 * Pthreads API, without lots of other features that Git doesn't use.
 * Git also makes sure that the passed arguments are valid, so there's
 * no need for double-checking.
 */

#include "../../git-compat-util.h"
#include "pthread.h"

#include <errno.h>
#include <limits.h>

static unsigned __stdcall win32_start_routine(void *arg)
{
	pthread_t *thread = arg;
	thread->arg = thread->start_routine(thread->arg);
	return 0;
}

int pthread_create(pthread_t *thread, const void *unused,
		   void *(*start_routine)(void*), void *arg)
{
	thread->arg = arg;
	thread->start_routine = start_routine;
	thread->handle = (HANDLE)
		_beginthreadex(NULL, 0, win32_start_routine, thread, 0, NULL);

	if (!thread->handle)
		return errno;
	else
		return 0;
}

int win32_pthread_join(pthread_t *thread, void **value_ptr)
{
	DWORD result = WaitForSingleObject(thread->handle, INFINITE);
	switch (result) {
		case WAIT_OBJECT_0:
			if (value_ptr)
				*value_ptr = thread->arg;
			return 0;
		case WAIT_ABANDONED:
			return EINVAL;
		default:
			return err_win_to_posix(GetLastError());
	}
}

int pthread_cond_init(pthread_cond_t *cond, const void *unused)
{
	cond->waiters = 0;
	cond->was_broadcast = 0;
	InitializeCriticalSection(&cond->waiters_lock);

	cond->sema = CreateSemaphore(NULL, 0, LONG_MAX, NULL);
	if (!cond->sema)
		die("CreateSemaphore() failed");

	cond->continue_broadcast = CreateEvent(NULL,	/* security */
				FALSE,			/* auto-reset */
				FALSE,			/* not signaled */
				NULL);			/* name */
	if (!cond->continue_broadcast)
		die("CreateEvent() failed");

	return 0;
}

int pthread_cond_destroy(pthread_cond_t *cond)
{
	CloseHandle(cond->sema);
	CloseHandle(cond->continue_broadcast);
	DeleteCriticalSection(&cond->waiters_lock);
	return 0;
}

int pthread_cond_wait(pthread_cond_t *cond, CRITICAL_SECTION *mutex)
{
	int last_waiter;

	EnterCriticalSection(&cond->waiters_lock);
	cond->waiters++;
	LeaveCriticalSection(&cond->waiters_lock);

	/*
	 * Unlock external mutex and wait for signal.
	 * NOTE: we've held mutex locked long enough to increment
	 * waiters count above, so there's no problem with
	 * leaving mutex unlocked before we wait on semaphore.
	 */
	LeaveCriticalSection(mutex);

	/* let's wait - ignore return value */
	WaitForSingleObject(cond->sema, INFINITE);

	/*
	 * Decrease waiters count. If we are the last waiter, then we must
	 * notify the broadcasting thread that it can continue.
	 * But if we continued due to cond_signal, we do not have to do that
	 * because the signaling thread knows that only one waiter continued.
	 */
	EnterCriticalSection(&cond->waiters_lock);
	cond->waiters--;
	last_waiter = cond->was_broadcast && cond->waiters == 0;
	LeaveCriticalSection(&cond->waiters_lock);

	if (last_waiter) {
		/*
		 * cond_broadcast was issued while mutex was held. This means
		 * that all other waiters have continued, but are contending
		 * for the mutex at the end of this function because the
		 * broadcasting thread did not leave cond_broadcast, yet.
		 * (This is so that it can be sure that each waiter has
		 * consumed exactly one slice of the semaphor.)
		 * The last waiter must tell the broadcasting thread that it
		 * can go on.
		 */
		SetEvent(cond->continue_broadcast);
		/*
		 * Now we go on to contend with all other waiters for
		 * the mutex. Auf in den Kampf!
		 */
	}
	/* lock external mutex again */
	EnterCriticalSection(mutex);

	return 0;
}

/*
 * IMPORTANT: This implementation requires that pthread_cond_signal
 * is called while the mutex is held that is used in the corresponding
 * pthread_cond_wait calls!
 */
int pthread_cond_signal(pthread_cond_t *cond)
{
	int have_waiters;

	EnterCriticalSection(&cond->waiters_lock);
	have_waiters = cond->waiters > 0;
	LeaveCriticalSection(&cond->waiters_lock);

	/*
	 * Signal only when there are waiters
	 */
	if (have_waiters)
		return ReleaseSemaphore(cond->sema, 1, NULL) ?
			0 : err_win_to_posix(GetLastError());
	else
		return 0;
}

/*
 * DOUBLY IMPORTANT: This implementation requires that pthread_cond_broadcast
 * is called while the mutex is held that is used in the corresponding
 * pthread_cond_wait calls!
 */
int pthread_cond_broadcast(pthread_cond_t *cond)
{
	EnterCriticalSection(&cond->waiters_lock);

	if ((cond->was_broadcast = cond->waiters > 0)) {
		/* wake up all waiters */
		ReleaseSemaphore(cond->sema, cond->waiters, NULL);
		LeaveCriticalSection(&cond->waiters_lock);
		/*
		 * At this point all waiters continue. Each one takes its
		 * slice of the semaphor. Now it's our turn to wait: Since
		 * the external mutex is held, no thread can leave cond_wait,
		 * yet. For this reason, we can be sure that no thread gets
		 * a chance to eat *more* than one slice. OTOH, it means
		 * that the last waiter must send us a wake-up.
		 */
		WaitForSingleObject(cond->continue_broadcast, INFINITE);
		/*
		 * Since the external mutex is held, no thread can enter
		 * cond_wait, and, hence, it is safe to reset this flag
		 * without cond->waiters_lock held.
		 */
		cond->was_broadcast = 0;
	} else {
		LeaveCriticalSection(&cond->waiters_lock);
	}
	return 0;
}
back to top