// SPDX-License-Identifier: GPL-2.0-only
//
// Traverse the source tree, parsing all .gitignore files, and print file paths
// that are ignored by git.
// The output is suitable to the --exclude-from option of tar.
// This is useful until the --exclude-vcs-ignores option gets working correctly.
//
// Copyright (C) 2023 Masahiro Yamada <masahiroy@kernel.org>
//                      (a lot of code imported from GIT)

#include <assert.h>
#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
#include <getopt.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>

// Imported from commit 23c56f7bd5f1667f8b793d796bf30e39545920f6 in GIT
//
//---------------------------(IMPORT FROM GIT BEGIN)---------------------------

// Copied from environment.c

static bool ignore_case;

// Copied from git-compat-util.h

/* Sane ctype - no locale, and works with signed chars */
#undef isascii
#undef isspace
#undef isdigit
#undef isalpha
#undef isalnum
#undef isprint
#undef islower
#undef isupper
#undef tolower
#undef toupper
#undef iscntrl
#undef ispunct
#undef isxdigit

static const unsigned char sane_ctype[256];
#define GIT_SPACE 0x01
#define GIT_DIGIT 0x02
#define GIT_ALPHA 0x04
#define GIT_GLOB_SPECIAL 0x08
#define GIT_REGEX_SPECIAL 0x10
#define GIT_PATHSPEC_MAGIC 0x20
#define GIT_CNTRL 0x40
#define GIT_PUNCT 0x80
#define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0)
#define isascii(x) (((x) & ~0x7f) == 0)
#define isspace(x) sane_istest(x,GIT_SPACE)
#define isdigit(x) sane_istest(x,GIT_DIGIT)
#define isalpha(x) sane_istest(x,GIT_ALPHA)
#define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT)
#define isprint(x) ((x) >= 0x20 && (x) <= 0x7e)
#define islower(x) sane_iscase(x, 1)
#define isupper(x) sane_iscase(x, 0)
#define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL)
#define iscntrl(x) (sane_istest(x,GIT_CNTRL))
#define ispunct(x) sane_istest(x, GIT_PUNCT | GIT_REGEX_SPECIAL | \
		GIT_GLOB_SPECIAL | GIT_PATHSPEC_MAGIC)
#define isxdigit(x) (hexval_table[(unsigned char)(x)] != -1)
#define tolower(x) sane_case((unsigned char)(x), 0x20)
#define toupper(x) sane_case((unsigned char)(x), 0)

static inline int sane_case(int x, int high)
{
	if (sane_istest(x, GIT_ALPHA))
		x = (x & ~0x20) | high;
	return x;
}

static inline int sane_iscase(int x, int is_lower)
{
	if (!sane_istest(x, GIT_ALPHA))
		return 0;

	if (is_lower)
		return (x & 0x20) != 0;
	else
		return (x & 0x20) == 0;
}

// Copied from ctype.c

enum {
	S = GIT_SPACE,
	A = GIT_ALPHA,
	D = GIT_DIGIT,
	G = GIT_GLOB_SPECIAL,	/* *, ?, [, \\ */
	R = GIT_REGEX_SPECIAL,	/* $, (, ), +, ., ^, {, | */
	P = GIT_PATHSPEC_MAGIC, /* other non-alnum, except for ] and } */
	X = GIT_CNTRL,
	U = GIT_PUNCT,
	Z = GIT_CNTRL | GIT_SPACE
};

static const unsigned char sane_ctype[256] = {
	X, X, X, X, X, X, X, X, X, Z, Z, X, X, Z, X, X,		/*   0.. 15 */
	X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,		/*  16.. 31 */
	S, P, P, P, R, P, P, P, R, R, G, R, P, P, R, P,		/*  32.. 47 */
	D, D, D, D, D, D, D, D, D, D, P, P, P, P, P, G,		/*  48.. 63 */
	P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,		/*  64.. 79 */
	A, A, A, A, A, A, A, A, A, A, A, G, G, U, R, P,		/*  80.. 95 */
	P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,		/*  96..111 */
	A, A, A, A, A, A, A, A, A, A, A, R, R, U, P, X,		/* 112..127 */
	/* Nothing in the 128.. range */
};

// Copied from hex.c

static const signed char hexval_table[256] = {
	 -1, -1, -1, -1, -1, -1, -1, -1,		/* 00-07 */
	 -1, -1, -1, -1, -1, -1, -1, -1,		/* 08-0f */
	 -1, -1, -1, -1, -1, -1, -1, -1,		/* 10-17 */
	 -1, -1, -1, -1, -1, -1, -1, -1,		/* 18-1f */
	 -1, -1, -1, -1, -1, -1, -1, -1,		/* 20-27 */
	 -1, -1, -1, -1, -1, -1, -1, -1,		/* 28-2f */
	  0,  1,  2,  3,  4,  5,  6,  7,		/* 30-37 */
	  8,  9, -1, -1, -1, -1, -1, -1,		/* 38-3f */
	 -1, 10, 11, 12, 13, 14, 15, -1,		/* 40-47 */
	 -1, -1, -1, -1, -1, -1, -1, -1,		/* 48-4f */
	 -1, -1, -1, -1, -1, -1, -1, -1,		/* 50-57 */
	 -1, -1, -1, -1, -1, -1, -1, -1,		/* 58-5f */
	 -1, 10, 11, 12, 13, 14, 15, -1,		/* 60-67 */
	 -1, -1, -1, -1, -1, -1, -1, -1,		/* 68-67 */
	 -1, -1, -1, -1, -1, -1, -1, -1,		/* 70-77 */
	 -1, -1, -1, -1, -1, -1, -1, -1,		/* 78-7f */
	 -1, -1, -1, -1, -1, -1, -1, -1,		/* 80-87 */
	 -1, -1, -1, -1, -1, -1, -1, -1,		/* 88-8f */
	 -1, -1, -1, -1, -1, -1, -1, -1,		/* 90-97 */
	 -1, -1, -1, -1, -1, -1, -1, -1,		/* 98-9f */
	 -1, -1, -1, -1, -1, -1, -1, -1,		/* a0-a7 */
	 -1, -1, -1, -1, -1, -1, -1, -1,		/* a8-af */
	 -1, -1, -1, -1, -1, -1, -1, -1,		/* b0-b7 */
	 -1, -1, -1, -1, -1, -1, -1, -1,		/* b8-bf */
	 -1, -1, -1, -1, -1, -1, -1, -1,		/* c0-c7 */
	 -1, -1, -1, -1, -1, -1, -1, -1,		/* c8-cf */
	 -1, -1, -1, -1, -1, -1, -1, -1,		/* d0-d7 */
	 -1, -1, -1, -1, -1, -1, -1, -1,		/* d8-df */
	 -1, -1, -1, -1, -1, -1, -1, -1,		/* e0-e7 */
	 -1, -1, -1, -1, -1, -1, -1, -1,		/* e8-ef */
	 -1, -1, -1, -1, -1, -1, -1, -1,		/* f0-f7 */
	 -1, -1, -1, -1, -1, -1, -1, -1,		/* f8-ff */
};

// Copied from wildmatch.h

#define WM_CASEFOLD 1
#define WM_PATHNAME 2

#define WM_NOMATCH 1
#define WM_MATCH 0
#define WM_ABORT_ALL -1
#define WM_ABORT_TO_STARSTAR -2

// Copied from wildmatch.c

typedef unsigned char uchar;

// local modification: remove NEGATE_CLASS(2)

#define CC_EQ(class, len, litmatch) ((len) == sizeof (litmatch)-1 \
				    && *(class) == *(litmatch) \
				    && strncmp((char*)class, litmatch, len) == 0)

// local modification: simpilify macros
#define ISBLANK(c) ((c) == ' ' || (c) == '\t')
#define ISGRAPH(c) (isprint(c) && !isspace(c))
#define ISPRINT(c) isprint(c)
#define ISDIGIT(c) isdigit(c)
#define ISALNUM(c) isalnum(c)
#define ISALPHA(c) isalpha(c)
#define ISCNTRL(c) iscntrl(c)
#define ISLOWER(c) islower(c)
#define ISPUNCT(c) ispunct(c)
#define ISSPACE(c) isspace(c)
#define ISUPPER(c) isupper(c)
#define ISXDIGIT(c) isxdigit(c)

/* Match pattern "p" against "text" */
static int dowild(const uchar *p, const uchar *text, unsigned int flags)
{
	uchar p_ch;
	const uchar *pattern = p;

	for ( ; (p_ch = *p) != '\0'; text++, p++) {
		int matched, match_slash, negated;
		uchar t_ch, prev_ch;
		if ((t_ch = *text) == '\0' && p_ch != '*')
			return WM_ABORT_ALL;
		if ((flags & WM_CASEFOLD) && ISUPPER(t_ch))
			t_ch = tolower(t_ch);
		if ((flags & WM_CASEFOLD) && ISUPPER(p_ch))
			p_ch = tolower(p_ch);
		switch (p_ch) {
		case '\\':
			/* Literal match with following character.  Note that the test
			 * in "default" handles the p[1] == '\0' failure case. */
			p_ch = *++p;
			/* FALLTHROUGH */
		default:
			if (t_ch != p_ch)
				return WM_NOMATCH;
			continue;
		case '?':
			/* Match anything but '/'. */
			if ((flags & WM_PATHNAME) && t_ch == '/')
				return WM_NOMATCH;
			continue;
		case '*':
			if (*++p == '*') {
				const uchar *prev_p = p - 2;
				while (*++p == '*') {}
				if (!(flags & WM_PATHNAME))
					/* without WM_PATHNAME, '*' == '**' */
					match_slash = 1;
				else if ((prev_p < pattern || *prev_p == '/') &&
				    (*p == '\0' || *p == '/' ||
				     (p[0] == '\\' && p[1] == '/'))) {
					/*
					 * Assuming we already match 'foo/' and are at
					 * <star star slash>, just assume it matches
					 * nothing and go ahead match the rest of the
					 * pattern with the remaining string. This
					 * helps make foo/<*><*>/bar (<> because
					 * otherwise it breaks C comment syntax) match
					 * both foo/bar and foo/a/bar.
					 */
					if (p[0] == '/' &&
					    dowild(p + 1, text, flags) == WM_MATCH)
						return WM_MATCH;
					match_slash = 1;
				} else /* WM_PATHNAME is set */
					match_slash = 0;
			} else
				/* without WM_PATHNAME, '*' == '**' */
				match_slash = flags & WM_PATHNAME ? 0 : 1;
			if (*p == '\0') {
				/* Trailing "**" matches everything.  Trailing "*" matches
				 * only if there are no more slash characters. */
				if (!match_slash) {
					if (strchr((char *)text, '/'))
						return WM_NOMATCH;
				}
				return WM_MATCH;
			} else if (!match_slash && *p == '/') {
				/*
				 * _one_ asterisk followed by a slash
				 * with WM_PATHNAME matches the next
				 * directory
				 */
				const char *slash = strchr((char*)text, '/');
				if (!slash)
					return WM_NOMATCH;
				text = (const uchar*)slash;
				/* the slash is consumed by the top-level for loop */
				break;
			}
			while (1) {
				if (t_ch == '\0')
					break;
				/*
				 * Try to advance faster when an asterisk is
				 * followed by a literal. We know in this case
				 * that the string before the literal
				 * must belong to "*".
				 * If match_slash is false, do not look past
				 * the first slash as it cannot belong to '*'.
				 */
				if (!is_glob_special(*p)) {
					p_ch = *p;
					if ((flags & WM_CASEFOLD) && ISUPPER(p_ch))
						p_ch = tolower(p_ch);
					while ((t_ch = *text) != '\0' &&
					       (match_slash || t_ch != '/')) {
						if ((flags & WM_CASEFOLD) && ISUPPER(t_ch))
							t_ch = tolower(t_ch);
						if (t_ch == p_ch)
							break;
						text++;
					}
					if (t_ch != p_ch)
						return WM_NOMATCH;
				}
				if ((matched = dowild(p, text, flags)) != WM_NOMATCH) {
					if (!match_slash || matched != WM_ABORT_TO_STARSTAR)
						return matched;
				} else if (!match_slash && t_ch == '/')
					return WM_ABORT_TO_STARSTAR;
				t_ch = *++text;
			}
			return WM_ABORT_ALL;
		case '[':
			p_ch = *++p;
			if (p_ch == '^')
				p_ch = '!';
			/* Assign literal 1/0 because of "matched" comparison. */
			negated = p_ch == '!' ? 1 : 0;
			if (negated) {
				/* Inverted character class. */
				p_ch = *++p;
			}
			prev_ch = 0;
			matched = 0;
			do {
				if (!p_ch)
					return WM_ABORT_ALL;
				if (p_ch == '\\') {
					p_ch = *++p;
					if (!p_ch)
						return WM_ABORT_ALL;
					if (t_ch == p_ch)
						matched = 1;
				} else if (p_ch == '-' && prev_ch && p[1] && p[1] != ']') {
					p_ch = *++p;
					if (p_ch == '\\') {
						p_ch = *++p;
						if (!p_ch)
							return WM_ABORT_ALL;
					}
					if (t_ch <= p_ch && t_ch >= prev_ch)
						matched = 1;
					else if ((flags & WM_CASEFOLD) && ISLOWER(t_ch)) {
						uchar t_ch_upper = toupper(t_ch);
						if (t_ch_upper <= p_ch && t_ch_upper >= prev_ch)
							matched = 1;
					}
					p_ch = 0; /* This makes "prev_ch" get set to 0. */
				} else if (p_ch == '[' && p[1] == ':') {
					const uchar *s;
					int i;
					for (s = p += 2; (p_ch = *p) && p_ch != ']'; p++) {} /*SHARED ITERATOR*/
					if (!p_ch)
						return WM_ABORT_ALL;
					i = p - s - 1;
					if (i < 0 || p[-1] != ':') {
						/* Didn't find ":]", so treat like a normal set. */
						p = s - 2;
						p_ch = '[';
						if (t_ch == p_ch)
							matched = 1;
						continue;
					}
					if (CC_EQ(s,i, "alnum")) {
						if (ISALNUM(t_ch))
							matched = 1;
					} else if (CC_EQ(s,i, "alpha")) {
						if (ISALPHA(t_ch))
							matched = 1;
					} else if (CC_EQ(s,i, "blank")) {
						if (ISBLANK(t_ch))
							matched = 1;
					} else if (CC_EQ(s,i, "cntrl")) {
						if (ISCNTRL(t_ch))
							matched = 1;
					} else if (CC_EQ(s,i, "digit")) {
						if (ISDIGIT(t_ch))
							matched = 1;
					} else if (CC_EQ(s,i, "graph")) {
						if (ISGRAPH(t_ch))
							matched = 1;
					} else if (CC_EQ(s,i, "lower")) {
						if (ISLOWER(t_ch))
							matched = 1;
					} else if (CC_EQ(s,i, "print")) {
						if (ISPRINT(t_ch))
							matched = 1;
					} else if (CC_EQ(s,i, "punct")) {
						if (ISPUNCT(t_ch))
							matched = 1;
					} else if (CC_EQ(s,i, "space")) {
						if (ISSPACE(t_ch))
							matched = 1;
					} else if (CC_EQ(s,i, "upper")) {
						if (ISUPPER(t_ch))
							matched = 1;
						else if ((flags & WM_CASEFOLD) && ISLOWER(t_ch))
							matched = 1;
					} else if (CC_EQ(s,i, "xdigit")) {
						if (ISXDIGIT(t_ch))
							matched = 1;
					} else /* malformed [:class:] string */
						return WM_ABORT_ALL;
					p_ch = 0; /* This makes "prev_ch" get set to 0. */
				} else if (t_ch == p_ch)
					matched = 1;
			} while (prev_ch = p_ch, (p_ch = *++p) != ']');
			if (matched == negated ||
			    ((flags & WM_PATHNAME) && t_ch == '/'))
				return WM_NOMATCH;
			continue;
		}
	}

	return *text ? WM_NOMATCH : WM_MATCH;
}

/* Match the "pattern" against the "text" string. */
static int wildmatch(const char *pattern, const char *text, unsigned int flags)
{
	// local modification: move WM_CASEFOLD here
	if (ignore_case)
		flags |= WM_CASEFOLD;

	return dowild((const uchar*)pattern, (const uchar*)text, flags);
}

// Copied from dir.h

#define PATTERN_FLAG_NODIR 1
#define PATTERN_FLAG_ENDSWITH 4
#define PATTERN_FLAG_MUSTBEDIR 8
#define PATTERN_FLAG_NEGATIVE 16

// Copied from dir.c

static int fspathncmp(const char *a, const char *b, size_t count)
{
	return ignore_case ? strncasecmp(a, b, count) : strncmp(a, b, count);
}

static int simple_length(const char *match)
{
	int len = -1;

	for (;;) {
		unsigned char c = *match++;
		len++;
		if (c == '\0' || is_glob_special(c))
			return len;
	}
}

static int no_wildcard(const char *string)
{
	return string[simple_length(string)] == '\0';
}

static void parse_path_pattern(const char **pattern,
			       int *patternlen,
			       unsigned *flags,
			       int *nowildcardlen)
{
	const char *p = *pattern;
	size_t i, len;

	*flags = 0;
	if (*p == '!') {
		*flags |= PATTERN_FLAG_NEGATIVE;
		p++;
	}
	len = strlen(p);
	if (len && p[len - 1] == '/') {
		len--;
		*flags |= PATTERN_FLAG_MUSTBEDIR;
	}
	for (i = 0; i < len; i++) {
		if (p[i] == '/')
			break;
	}
	if (i == len)
		*flags |= PATTERN_FLAG_NODIR;
	*nowildcardlen = simple_length(p);
	/*
	 * we should have excluded the trailing slash from 'p' too,
	 * but that's one more allocation. Instead just make sure
	 * nowildcardlen does not exceed real patternlen
	 */
	if (*nowildcardlen > len)
		*nowildcardlen = len;
	if (*p == '*' && no_wildcard(p + 1))
		*flags |= PATTERN_FLAG_ENDSWITH;
	*pattern = p;
	*patternlen = len;
}

static void trim_trailing_spaces(char *buf)
{
	char *p, *last_space = NULL;

	for (p = buf; *p; p++)
		switch (*p) {
		case ' ':
			if (!last_space)
				last_space = p;
			break;
		case '\\':
			p++;
			if (!*p)
				return;
			/* fallthrough */
		default:
			last_space = NULL;
		}

	if (last_space)
		*last_space = '\0';
}

static int match_basename(const char *basename, int basenamelen,
			  const char *pattern, int prefix, int patternlen,
			  unsigned flags)
{
	if (prefix == patternlen) {
		if (patternlen == basenamelen &&
		    !fspathncmp(pattern, basename, basenamelen))
			return 1;
	} else if (flags & PATTERN_FLAG_ENDSWITH) {
		/* "*literal" matching against "fooliteral" */
		if (patternlen - 1 <= basenamelen &&
		    !fspathncmp(pattern + 1,
				   basename + basenamelen - (patternlen - 1),
				   patternlen - 1))
			return 1;
	} else {
		// local modification: call wildmatch() directly
		if (!wildmatch(pattern, basename, flags))
			return 1;
	}
	return 0;
}

static int match_pathname(const char *pathname, int pathlen,
			  const char *base, int baselen,
			  const char *pattern, int prefix, int patternlen)
{
	// local modification: remove local variables

	/*
	 * match with FNM_PATHNAME; the pattern has base implicitly
	 * in front of it.
	 */
	if (*pattern == '/') {
		pattern++;
		patternlen--;
		prefix--;
	}

	/*
	 * baselen does not count the trailing slash. base[] may or
	 * may not end with a trailing slash though.
	 */
	if (pathlen < baselen + 1 ||
	    (baselen && pathname[baselen] != '/') ||
	    fspathncmp(pathname, base, baselen))
		return 0;

	// local modification: simplified because always baselen > 0
	pathname += baselen + 1;
	pathlen -= baselen + 1;

	if (prefix) {
		/*
		 * if the non-wildcard part is longer than the
		 * remaining pathname, surely it cannot match.
		 */
		if (prefix > pathlen)
			return 0;

		if (fspathncmp(pattern, pathname, prefix))
			return 0;
		pattern += prefix;
		patternlen -= prefix;
		pathname += prefix;
		pathlen -= prefix;

		/*
		 * If the whole pattern did not have a wildcard,
		 * then our prefix match is all we need; we
		 * do not need to call fnmatch at all.
		 */
		if (!patternlen && !pathlen)
			return 1;
	}

	// local modification: call wildmatch() directly
	return !wildmatch(pattern, pathname, WM_PATHNAME);
}

// Copied from git/utf8.c

static const char utf8_bom[] = "\357\273\277";

//----------------------------(IMPORT FROM GIT END)----------------------------

struct pattern {
	unsigned int flags;
	int nowildcardlen;
	int patternlen;
	int dirlen;
	char pattern[];
};

static struct pattern **pattern_list;
static int nr_patterns, alloced_patterns;

// Remember the number of patterns at each directory level
static int *nr_patterns_at;
// Track the current/max directory level;
static int depth, max_depth;
static bool debug_on;
static FILE *out_fp, *stat_fp;
static char *prefix = "";
static char *progname;

static void __attribute__((noreturn)) perror_exit(const char *s)
{
	perror(s);

	exit(EXIT_FAILURE);
}

static void __attribute__((noreturn)) error_exit(const char *fmt, ...)
{
	va_list args;

	fprintf(stderr, "%s: error: ", progname);

	va_start(args, fmt);
	vfprintf(stderr, fmt, args);
	va_end(args);

	exit(EXIT_FAILURE);
}

static void debug(const char *fmt, ...)
{
	va_list args;
	int i;

	if (!debug_on)
		return;

	fprintf(stderr, "[DEBUG] ");

	for (i = 0; i < depth * 2; i++)
		fputc(' ', stderr);

	va_start(args, fmt);
	vfprintf(stderr, fmt, args);
	va_end(args);
}

static void *xrealloc(void *ptr, size_t size)
{
	ptr = realloc(ptr, size);
	if (!ptr)
		perror_exit(progname);

	return ptr;
}

static void *xmalloc(size_t size)
{
	return xrealloc(NULL, size);
}

// similar to last_matching_pattern_from_list() in GIT
static bool is_ignored(const char *path, int pathlen, int dirlen, bool is_dir)
{
	int i;

	// Search in the reverse order because the last matching pattern wins.
	for (i = nr_patterns - 1; i >= 0; i--) {
		struct pattern *p = pattern_list[i];
		unsigned int flags = p->flags;
		const char *gitignore_dir = p->pattern + p->patternlen + 1;
		bool ignored;

		if ((flags & PATTERN_FLAG_MUSTBEDIR) && !is_dir)
			continue;

		if (flags & PATTERN_FLAG_NODIR) {
			if (!match_basename(path + dirlen + 1,
					    pathlen - dirlen - 1,
					    p->pattern,
					    p->nowildcardlen,
					    p->patternlen,
					    p->flags))
				continue;
		} else {
			if (!match_pathname(path, pathlen,
					    gitignore_dir, p->dirlen,
					    p->pattern,
					    p->nowildcardlen,
					    p->patternlen))
				continue;
		}

		debug("%s: matches %s%s%s (%s/.gitignore)\n", path,
		      flags & PATTERN_FLAG_NEGATIVE ? "!" : "", p->pattern,
		      flags & PATTERN_FLAG_MUSTBEDIR ? "/" : "",
		      gitignore_dir);

		ignored = (flags & PATTERN_FLAG_NEGATIVE) == 0;
		if (ignored)
			debug("Ignore: %s\n", path);

		return ignored;
	}

	debug("%s: no match\n", path);

	return false;
}

static void add_pattern(const char *string, const char *dir, int dirlen)
{
	struct pattern *p;
	int patternlen, nowildcardlen;
	unsigned int flags;

	parse_path_pattern(&string, &patternlen, &flags, &nowildcardlen);

	if (patternlen == 0)
		return;

	p = xmalloc(sizeof(*p) + patternlen + dirlen + 2);

	memcpy(p->pattern, string, patternlen);
	p->pattern[patternlen] = 0;
	memcpy(p->pattern + patternlen + 1, dir, dirlen);
	p->pattern[patternlen + 1 + dirlen] = 0;

	p->patternlen = patternlen;
	p->nowildcardlen = nowildcardlen;
	p->dirlen = dirlen;
	p->flags = flags;

	debug("Add pattern: %s%s%s\n",
	      flags & PATTERN_FLAG_NEGATIVE ? "!" : "", p->pattern,
	      flags & PATTERN_FLAG_MUSTBEDIR ? "/" : "");

	if (nr_patterns >= alloced_patterns) {
		alloced_patterns += 128;
		pattern_list = xrealloc(pattern_list,
					sizeof(*pattern_list) * alloced_patterns);
	}

	pattern_list[nr_patterns++] = p;
}

// similar to add_patterns_from_buffer() in GIT
static void add_patterns_from_gitignore(const char *dir, int dirlen)
{
	struct stat st;
	char path[PATH_MAX], *buf, *entry;
	size_t size;
	int fd, pathlen, i;

	pathlen = snprintf(path, sizeof(path), "%s/.gitignore", dir);
	if (pathlen >= sizeof(path))
		error_exit("%s: too long path was truncated\n", path);

	fd = open(path, O_RDONLY | O_NOFOLLOW);
	if (fd < 0) {
		if (errno != ENOENT)
			return perror_exit(path);
		return;
	}

	if (fstat(fd, &st) < 0)
		perror_exit(path);

	size = st.st_size;

	buf = xmalloc(size + 1);
	if (read(fd, buf, st.st_size) != st.st_size)
		perror_exit(path);

	buf[st.st_size] = '\n';
	if (close(fd))
		perror_exit(path);

	debug("Parse %s\n", path);

	entry = buf;

	// skip utf8 bom
	if (!strncmp(entry, utf8_bom, strlen(utf8_bom)))
		entry += strlen(utf8_bom);

	for (i = entry - buf; i < size; i++) {
		if (buf[i] == '\n') {
			if (entry != buf + i && entry[0] != '#') {
				buf[i - (i && buf[i-1] == '\r')] = 0;
				trim_trailing_spaces(entry);
				add_pattern(entry, dir, dirlen);
			}
			entry = buf + i + 1;
		}
	}

	free(buf);
}

// Save the current number of patterns and increment the depth
static void increment_depth(void)
{
	if (depth >= max_depth) {
		max_depth += 1;
		nr_patterns_at = xrealloc(nr_patterns_at,
					  sizeof(*nr_patterns_at) * max_depth);
	}

	nr_patterns_at[depth] = nr_patterns;
	depth++;
}

// Decrement the depth, and free up the patterns of this directory level.
static void decrement_depth(void)
{
	depth--;
	assert(depth >= 0);

	while (nr_patterns > nr_patterns_at[depth])
		free(pattern_list[--nr_patterns]);
}

static void print_path(const char *path)
{
	// The path always starts with "./"
	assert(strlen(path) >= 2);

	// Replace the root directory with a preferred prefix.
	// This is useful for the tar command.
	fprintf(out_fp, "%s%s\n", prefix, path + 2);
}

static void print_stat(const char *path, struct stat *st)
{
	if (!stat_fp)
		return;

	if (!S_ISREG(st->st_mode) && !S_ISLNK(st->st_mode))
		return;

	assert(strlen(path) >= 2);

	fprintf(stat_fp, "%c %9ld %10ld %s\n",
		S_ISLNK(st->st_mode) ? 'l' : '-',
		st->st_size, st->st_mtim.tv_sec, path + 2);
}

// Traverse the entire directory tree, parsing .gitignore files.
// Print file paths that are not tracked by git.
//
// Return true if all files under the directory are ignored, false otherwise.
static bool traverse_directory(const char *dir, int dirlen)
{
	bool all_ignored = true;
	DIR *dirp;

	debug("Enter[%d]: %s\n", depth, dir);
	increment_depth();

	add_patterns_from_gitignore(dir, dirlen);

	dirp = opendir(dir);
	if (!dirp)
		perror_exit(dir);

	while (1) {
		struct dirent *d;
		struct stat st;
		char path[PATH_MAX];
		int pathlen;
		bool ignored;

		errno = 0;
		d = readdir(dirp);
		if (!d) {
			if (errno)
				perror_exit(dir);
			break;
		}

		if (!strcmp(d->d_name, "..") || !strcmp(d->d_name, "."))
			continue;

		pathlen = snprintf(path, sizeof(path), "%s/%s", dir, d->d_name);
		if (pathlen >= sizeof(path))
			error_exit("%s: too long path was truncated\n", path);

		if (lstat(path, &st) < 0)
			perror_exit(path);

		if ((!S_ISREG(st.st_mode) && !S_ISDIR(st.st_mode) && !S_ISLNK(st.st_mode)) ||
		    is_ignored(path, pathlen, dirlen, S_ISDIR(st.st_mode))) {
			ignored = true;
		} else {
			if (S_ISDIR(st.st_mode) && !S_ISLNK(st.st_mode))
				// If all the files in a directory are ignored,
				// let's ignore that directory as well. This
				// will avoid empty directories in the tarball.
				ignored = traverse_directory(path, pathlen);
			else
				ignored = false;
		}

		if (ignored) {
			print_path(path);
		} else {
			print_stat(path, &st);
			all_ignored = false;
		}
	}

	if (closedir(dirp))
		perror_exit(dir);

	decrement_depth();
	debug("Leave[%d]: %s\n", depth, dir);

	return all_ignored;
}

static void usage(void)
{
	fprintf(stderr,
		"usage: %s [options]\n"
		"\n"
		"Show files that are ignored by git\n"
		"\n"
		"options:\n"
		"  -d, --debug                  print debug messages to stderr\n"
		"  -e, --exclude PATTERN        add the given exclude pattern\n"
		"  -h, --help                   show this help message and exit\n"
		"  -i, --ignore-case            Ignore case differences between the patterns and the files\n"
		"  -o, --output FILE            output the ignored files to a file (default: '-', i.e. stdout)\n"
		"  -p, --prefix PREFIX          prefix added to each path (default: empty string)\n"
		"  -r, --rootdir DIR            root of the source tree (default: current working directory)\n"
		"  -s, --stat FILE              output the file stat of non-ignored files to a file\n",
		progname);
}

static void open_output(const char *pathname, FILE **fp)
{
	if (strcmp(pathname, "-")) {
		*fp = fopen(pathname, "w");
		if (!*fp)
			perror_exit(pathname);
	} else {
		*fp = stdout;
	}
}

static void close_output(const char *pathname, FILE *fp)
{
	fflush(fp);

	if (ferror(fp))
		error_exit("not all data was written to the output\n");

	if (fclose(fp))
		perror_exit(pathname);
}

int main(int argc, char *argv[])
{
	const char *output = "-";
	const char *rootdir = ".";
	const char *stat = NULL;

	progname = strrchr(argv[0], '/');
	if (progname)
		progname++;
	else
		progname = argv[0];

	while (1) {
		static struct option long_options[] = {
			{"debug",       no_argument,       NULL, 'd'},
			{"help",        no_argument,       NULL, 'h'},
			{"ignore-case", no_argument,       NULL, 'i'},
			{"output",      required_argument, NULL, 'o'},
			{"prefix",      required_argument, NULL, 'p'},
			{"rootdir",     required_argument, NULL, 'r'},
			{"stat",        required_argument, NULL, 's'},
			{"exclude",     required_argument, NULL, 'x'},
			{},
		};

		int c = getopt_long(argc, argv, "dhino:p:r:s:x:", long_options, NULL);

		if (c == -1)
			break;

		switch (c) {
		case 'd':
			debug_on = true;
			break;
		case 'h':
			usage();
			exit(0);
		case 'i':
			ignore_case = true;
			break;
		case 'o':
			output = optarg;
			break;
		case 'p':
			prefix = optarg;
			break;
		case 'r':
			rootdir = optarg;
			break;
		case 's':
			stat = optarg;
			break;
		case 'x':
			add_pattern(optarg, ".", strlen("."));
			break;
		case '?':
			usage();
			/* fallthrough */
		default:
			exit(EXIT_FAILURE);
		}
	}

	open_output(output, &out_fp);
	if (stat && stat[0])
		open_output(stat, &stat_fp);

	if (chdir(rootdir))
		perror_exit(rootdir);

	add_pattern(".git/", ".", strlen("."));

	if (traverse_directory(".", strlen(".")))
		print_path("./");

	assert(depth == 0);

	while (nr_patterns > 0)
		free(pattern_list[--nr_patterns]);
	free(pattern_list);
	free(nr_patterns_at);

	close_output(output, out_fp);
	if (stat_fp)
		close_output(stat, stat_fp);

	return 0;
}