Revision 1b9e059d3503f935bb3eebb073cfb0f0401e2adf authored by Linus Torvalds on 21 October 2005, 23:40:54 UTC, committed by Junio C Hamano on 23 October 2005, 05:49:52 UTC
This is what the recent git-rev-list changes have all been gearing up for.

When we use a path filter to git-rev-list, the new "--dense" flag asks
git-rev-list to compress the history so that it _only_ contains commits
that change files in the path filter.  It also rewrites the parent
information so that tools like "gitk" will see the result as a dense
history tree.

For example, on the current kernel archive:

	[torvalds@g5 linux]$ git-rev-list HEAD | wc -l
	9904
	[torvalds@g5 linux]$ git-rev-list HEAD -- kernel | wc -l
	5442
	[torvalds@g5 linux]$ git-rev-list --dense HEAD -- kernel | wc -l
	356

which shows that while we have almost ten thousand commits, we can prune
down the work to slightly more than half by only following the merges
that are interesting. But further, we can then compress the history to
just 356 entries that actually make changes to the kernel subdirectory.

To see this in action, try something like

	gitk --dense -- gitk

to see just the history that affects gitk.  Or, to show that true
parallel development still remains parallel, do

	gitk --dense -- daemon.c

which shows some parallel commits in the current git tree.

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
1 parent cf48454
Raw File
mailsplit.c
/*
 * Totally braindamaged mbox splitter program.
 *
 * It just splits a mbox into a list of files: "0001" "0002" ..
 * so you can process them further from there.
 */
#include <unistd.h>
#include <stdlib.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <string.h>
#include <stdio.h>
#include <assert.h>
#include "cache.h"

static const char git_mailsplit_usage[] =
"git-mailsplit [-d<prec>] [<mbox>] <directory>";

static int is_from_line(const char *line, int len)
{
	const char *colon;

	if (len < 20 || memcmp("From ", line, 5))
		return 0;

	colon = line + len - 2;
	line += 5;
	for (;;) {
		if (colon < line)
			return 0;
		if (*--colon == ':')
			break;
	}

	if (!isdigit(colon[-4]) ||
	    !isdigit(colon[-2]) ||
	    !isdigit(colon[-1]) ||
	    !isdigit(colon[ 1]) ||
	    !isdigit(colon[ 2]))
		return 0;

	/* year */
	if (strtol(colon+3, NULL, 10) <= 90)
		return 0;

	/* Ok, close enough */
	return 1;
}

/* Could be as small as 64, enough to hold a Unix "From " line. */
static char buf[4096];

/* Called with the first line (potentially partial)
 * already in buf[] -- normally that should begin with
 * the Unix "From " line.  Write it into the specified
 * file.
 */
static int split_one(FILE *mbox, const char *name)
{
	FILE *output = NULL;
	int len = strlen(buf);
	int fd;
	int status = 0;

	if (!is_from_line(buf, len))
		goto corrupt;

	fd = open(name, O_WRONLY | O_CREAT | O_EXCL, 0666);
	if (fd < 0)
		die("cannot open output file %s", name);
	output = fdopen(fd, "w");

	/* Copy it out, while searching for a line that begins with
	 * "From " and having something that looks like a date format.
	 */
	for (;;) {
		int is_partial = (buf[len-1] != '\n');

		if (fputs(buf, output) == EOF)
			die("cannot write output");

		if (fgets(buf, sizeof(buf), mbox) == NULL) {
			if (feof(mbox)) {
				status = 1;
				break;
			}
			die("cannot read mbox");
		}
		len = strlen(buf);
		if (!is_partial && is_from_line(buf, len))
			break; /* done with one message */
	}
	fclose(output);
	return status;

 corrupt:
	if (output)
		fclose(output);
	unlink(name);
	fprintf(stderr, "corrupt mailbox\n");
	exit(1);
}

int main(int argc, const char **argv)
{
	int i, nr, nr_prec = 4;
	FILE *mbox = NULL;

	for (i = 1; i < argc; i++) {
		const char *arg = argv[i];

		if (arg[0] != '-')
			break;
		/* do flags here */
		if (!strncmp(arg, "-d", 2)) {
			nr_prec = strtol(arg + 2, NULL, 10);
			if (nr_prec < 3 || 10 <= nr_prec)
				usage(git_mailsplit_usage);
			continue;
		}
	}

	/* Either one remaining arg (dir), or two (mbox and dir) */
	switch (argc - i) {
	case 1:
		mbox = stdin;
		break;
	case 2:
		if ((mbox = fopen(argv[i], "r")) == NULL)
			die("cannot open mbox %s for reading", argv[i]);
		break;
	default:
		usage(git_mailsplit_usage);
	}
	if (chdir(argv[argc - 1]) < 0)
		usage(git_mailsplit_usage);

	nr = 0;
	if (fgets(buf, sizeof(buf), mbox) == NULL)
		die("cannot read mbox");

	for (;;) {
		char name[10];

		sprintf(name, "%0*d", nr_prec, ++nr);
		switch (split_one(mbox, name)) {
		case 0:
			break;
		case 1:
			printf("%d\n", nr);
			return 0;
		default:
			exit(1);
		}
	}
}
back to top