diff options
Diffstat (limited to 'scripts/list-gitignored.c')
-rw-r--r-- | scripts/list-gitignored.c | 1057 |
1 files changed, 0 insertions, 1057 deletions
diff --git a/scripts/list-gitignored.c b/scripts/list-gitignored.c deleted file mode 100644 index f9941f8dcd2b..000000000000 --- a/scripts/list-gitignored.c +++ /dev/null @@ -1,1057 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -// -// Traverse the source tree, parsing all .gitignore files, and print file paths -// that are ignored by git. -// The output is suitable to the --exclude-from option of tar. -// This is useful until the --exclude-vcs-ignores option gets working correctly. -// -// Copyright (C) 2023 Masahiro Yamada <masahiroy@kernel.org> -// (a lot of code imported from GIT) - -#include <assert.h> -#include <dirent.h> -#include <errno.h> -#include <fcntl.h> -#include <getopt.h> -#include <stdarg.h> -#include <stdbool.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/stat.h> -#include <sys/types.h> -#include <unistd.h> - -// Imported from commit 23c56f7bd5f1667f8b793d796bf30e39545920f6 in GIT -// -//---------------------------(IMPORT FROM GIT BEGIN)--------------------------- - -// Copied from environment.c - -static bool ignore_case; - -// Copied from git-compat-util.h - -/* Sane ctype - no locale, and works with signed chars */ -#undef isascii -#undef isspace -#undef isdigit -#undef isalpha -#undef isalnum -#undef isprint -#undef islower -#undef isupper -#undef tolower -#undef toupper -#undef iscntrl -#undef ispunct -#undef isxdigit - -static const unsigned char sane_ctype[256]; -#define GIT_SPACE 0x01 -#define GIT_DIGIT 0x02 -#define GIT_ALPHA 0x04 -#define GIT_GLOB_SPECIAL 0x08 -#define GIT_REGEX_SPECIAL 0x10 -#define GIT_PATHSPEC_MAGIC 0x20 -#define GIT_CNTRL 0x40 -#define GIT_PUNCT 0x80 -#define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0) -#define isascii(x) (((x) & ~0x7f) == 0) -#define isspace(x) sane_istest(x,GIT_SPACE) -#define isdigit(x) sane_istest(x,GIT_DIGIT) -#define isalpha(x) sane_istest(x,GIT_ALPHA) -#define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT) -#define isprint(x) ((x) >= 0x20 && (x) <= 0x7e) -#define islower(x) sane_iscase(x, 1) -#define isupper(x) sane_iscase(x, 0) -#define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL) -#define iscntrl(x) (sane_istest(x,GIT_CNTRL)) -#define ispunct(x) sane_istest(x, GIT_PUNCT | GIT_REGEX_SPECIAL | \ - GIT_GLOB_SPECIAL | GIT_PATHSPEC_MAGIC) -#define isxdigit(x) (hexval_table[(unsigned char)(x)] != -1) -#define tolower(x) sane_case((unsigned char)(x), 0x20) -#define toupper(x) sane_case((unsigned char)(x), 0) - -static inline int sane_case(int x, int high) -{ - if (sane_istest(x, GIT_ALPHA)) - x = (x & ~0x20) | high; - return x; -} - -static inline int sane_iscase(int x, int is_lower) -{ - if (!sane_istest(x, GIT_ALPHA)) - return 0; - - if (is_lower) - return (x & 0x20) != 0; - else - return (x & 0x20) == 0; -} - -// Copied from ctype.c - -enum { - S = GIT_SPACE, - A = GIT_ALPHA, - D = GIT_DIGIT, - G = GIT_GLOB_SPECIAL, /* *, ?, [, \\ */ - R = GIT_REGEX_SPECIAL, /* $, (, ), +, ., ^, {, | */ - P = GIT_PATHSPEC_MAGIC, /* other non-alnum, except for ] and } */ - X = GIT_CNTRL, - U = GIT_PUNCT, - Z = GIT_CNTRL | GIT_SPACE -}; - -static const unsigned char sane_ctype[256] = { - X, X, X, X, X, X, X, X, X, Z, Z, X, X, Z, X, X, /* 0.. 15 */ - X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /* 16.. 31 */ - S, P, P, P, R, P, P, P, R, R, G, R, P, P, R, P, /* 32.. 47 */ - D, D, D, D, D, D, D, D, D, D, P, P, P, P, P, G, /* 48.. 63 */ - P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 64.. 79 */ - A, A, A, A, A, A, A, A, A, A, A, G, G, U, R, P, /* 80.. 95 */ - P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 96..111 */ - A, A, A, A, A, A, A, A, A, A, A, R, R, U, P, X, /* 112..127 */ - /* Nothing in the 128.. range */ -}; - -// Copied from hex.c - -static const signed char hexval_table[256] = { - -1, -1, -1, -1, -1, -1, -1, -1, /* 00-07 */ - -1, -1, -1, -1, -1, -1, -1, -1, /* 08-0f */ - -1, -1, -1, -1, -1, -1, -1, -1, /* 10-17 */ - -1, -1, -1, -1, -1, -1, -1, -1, /* 18-1f */ - -1, -1, -1, -1, -1, -1, -1, -1, /* 20-27 */ - -1, -1, -1, -1, -1, -1, -1, -1, /* 28-2f */ - 0, 1, 2, 3, 4, 5, 6, 7, /* 30-37 */ - 8, 9, -1, -1, -1, -1, -1, -1, /* 38-3f */ - -1, 10, 11, 12, 13, 14, 15, -1, /* 40-47 */ - -1, -1, -1, -1, -1, -1, -1, -1, /* 48-4f */ - -1, -1, -1, -1, -1, -1, -1, -1, /* 50-57 */ - -1, -1, -1, -1, -1, -1, -1, -1, /* 58-5f */ - -1, 10, 11, 12, 13, 14, 15, -1, /* 60-67 */ - -1, -1, -1, -1, -1, -1, -1, -1, /* 68-67 */ - -1, -1, -1, -1, -1, -1, -1, -1, /* 70-77 */ - -1, -1, -1, -1, -1, -1, -1, -1, /* 78-7f */ - -1, -1, -1, -1, -1, -1, -1, -1, /* 80-87 */ - -1, -1, -1, -1, -1, -1, -1, -1, /* 88-8f */ - -1, -1, -1, -1, -1, -1, -1, -1, /* 90-97 */ - -1, -1, -1, -1, -1, -1, -1, -1, /* 98-9f */ - -1, -1, -1, -1, -1, -1, -1, -1, /* a0-a7 */ - -1, -1, -1, -1, -1, -1, -1, -1, /* a8-af */ - -1, -1, -1, -1, -1, -1, -1, -1, /* b0-b7 */ - -1, -1, -1, -1, -1, -1, -1, -1, /* b8-bf */ - -1, -1, -1, -1, -1, -1, -1, -1, /* c0-c7 */ - -1, -1, -1, -1, -1, -1, -1, -1, /* c8-cf */ - -1, -1, -1, -1, -1, -1, -1, -1, /* d0-d7 */ - -1, -1, -1, -1, -1, -1, -1, -1, /* d8-df */ - -1, -1, -1, -1, -1, -1, -1, -1, /* e0-e7 */ - -1, -1, -1, -1, -1, -1, -1, -1, /* e8-ef */ - -1, -1, -1, -1, -1, -1, -1, -1, /* f0-f7 */ - -1, -1, -1, -1, -1, -1, -1, -1, /* f8-ff */ -}; - -// Copied from wildmatch.h - -#define WM_CASEFOLD 1 -#define WM_PATHNAME 2 - -#define WM_NOMATCH 1 -#define WM_MATCH 0 -#define WM_ABORT_ALL -1 -#define WM_ABORT_TO_STARSTAR -2 - -// Copied from wildmatch.c - -typedef unsigned char uchar; - -// local modification: remove NEGATE_CLASS(2) - -#define CC_EQ(class, len, litmatch) ((len) == sizeof (litmatch)-1 \ - && *(class) == *(litmatch) \ - && strncmp((char*)class, litmatch, len) == 0) - -// local modification: simpilify macros -#define ISBLANK(c) ((c) == ' ' || (c) == '\t') -#define ISGRAPH(c) (isprint(c) && !isspace(c)) -#define ISPRINT(c) isprint(c) -#define ISDIGIT(c) isdigit(c) -#define ISALNUM(c) isalnum(c) -#define ISALPHA(c) isalpha(c) -#define ISCNTRL(c) iscntrl(c) -#define ISLOWER(c) islower(c) -#define ISPUNCT(c) ispunct(c) -#define ISSPACE(c) isspace(c) -#define ISUPPER(c) isupper(c) -#define ISXDIGIT(c) isxdigit(c) - -/* Match pattern "p" against "text" */ -static int dowild(const uchar *p, const uchar *text, unsigned int flags) -{ - uchar p_ch; - const uchar *pattern = p; - - for ( ; (p_ch = *p) != '\0'; text++, p++) { - int matched, match_slash, negated; - uchar t_ch, prev_ch; - if ((t_ch = *text) == '\0' && p_ch != '*') - return WM_ABORT_ALL; - if ((flags & WM_CASEFOLD) && ISUPPER(t_ch)) - t_ch = tolower(t_ch); - if ((flags & WM_CASEFOLD) && ISUPPER(p_ch)) - p_ch = tolower(p_ch); - switch (p_ch) { - case '\\': - /* Literal match with following character. Note that the test - * in "default" handles the p[1] == '\0' failure case. */ - p_ch = *++p; - /* FALLTHROUGH */ - default: - if (t_ch != p_ch) - return WM_NOMATCH; - continue; - case '?': - /* Match anything but '/'. */ - if ((flags & WM_PATHNAME) && t_ch == '/') - return WM_NOMATCH; - continue; - case '*': - if (*++p == '*') { - const uchar *prev_p = p - 2; - while (*++p == '*') {} - if (!(flags & WM_PATHNAME)) - /* without WM_PATHNAME, '*' == '**' */ - match_slash = 1; - else if ((prev_p < pattern || *prev_p == '/') && - (*p == '\0' || *p == '/' || - (p[0] == '\\' && p[1] == '/'))) { - /* - * Assuming we already match 'foo/' and are at - * <star star slash>, just assume it matches - * nothing and go ahead match the rest of the - * pattern with the remaining string. This - * helps make foo/<*><*>/bar (<> because - * otherwise it breaks C comment syntax) match - * both foo/bar and foo/a/bar. - */ - if (p[0] == '/' && - dowild(p + 1, text, flags) == WM_MATCH) - return WM_MATCH; - match_slash = 1; - } else /* WM_PATHNAME is set */ - match_slash = 0; - } else - /* without WM_PATHNAME, '*' == '**' */ - match_slash = flags & WM_PATHNAME ? 0 : 1; - if (*p == '\0') { - /* Trailing "**" matches everything. Trailing "*" matches - * only if there are no more slash characters. */ - if (!match_slash) { - if (strchr((char *)text, '/')) - return WM_NOMATCH; - } - return WM_MATCH; - } else if (!match_slash && *p == '/') { - /* - * _one_ asterisk followed by a slash - * with WM_PATHNAME matches the next - * directory - */ - const char *slash = strchr((char*)text, '/'); - if (!slash) - return WM_NOMATCH; - text = (const uchar*)slash; - /* the slash is consumed by the top-level for loop */ - break; - } - while (1) { - if (t_ch == '\0') - break; - /* - * Try to advance faster when an asterisk is - * followed by a literal. We know in this case - * that the string before the literal - * must belong to "*". - * If match_slash is false, do not look past - * the first slash as it cannot belong to '*'. - */ - if (!is_glob_special(*p)) { - p_ch = *p; - if ((flags & WM_CASEFOLD) && ISUPPER(p_ch)) - p_ch = tolower(p_ch); - while ((t_ch = *text) != '\0' && - (match_slash || t_ch != '/')) { - if ((flags & WM_CASEFOLD) && ISUPPER(t_ch)) - t_ch = tolower(t_ch); - if (t_ch == p_ch) - break; - text++; - } - if (t_ch != p_ch) - return WM_NOMATCH; - } - if ((matched = dowild(p, text, flags)) != WM_NOMATCH) { - if (!match_slash || matched != WM_ABORT_TO_STARSTAR) - return matched; - } else if (!match_slash && t_ch == '/') - return WM_ABORT_TO_STARSTAR; - t_ch = *++text; - } - return WM_ABORT_ALL; - case '[': - p_ch = *++p; - if (p_ch == '^') - p_ch = '!'; - /* Assign literal 1/0 because of "matched" comparison. */ - negated = p_ch == '!' ? 1 : 0; - if (negated) { - /* Inverted character class. */ - p_ch = *++p; - } - prev_ch = 0; - matched = 0; - do { - if (!p_ch) - return WM_ABORT_ALL; - if (p_ch == '\\') { - p_ch = *++p; - if (!p_ch) - return WM_ABORT_ALL; - if (t_ch == p_ch) - matched = 1; - } else if (p_ch == '-' && prev_ch && p[1] && p[1] != ']') { - p_ch = *++p; - if (p_ch == '\\') { - p_ch = *++p; - if (!p_ch) - return WM_ABORT_ALL; - } - if (t_ch <= p_ch && t_ch >= prev_ch) - matched = 1; - else if ((flags & WM_CASEFOLD) && ISLOWER(t_ch)) { - uchar t_ch_upper = toupper(t_ch); - if (t_ch_upper <= p_ch && t_ch_upper >= prev_ch) - matched = 1; - } - p_ch = 0; /* This makes "prev_ch" get set to 0. */ - } else if (p_ch == '[' && p[1] == ':') { - const uchar *s; - int i; - for (s = p += 2; (p_ch = *p) && p_ch != ']'; p++) {} /*SHARED ITERATOR*/ - if (!p_ch) - return WM_ABORT_ALL; - i = p - s - 1; - if (i < 0 || p[-1] != ':') { - /* Didn't find ":]", so treat like a normal set. */ - p = s - 2; - p_ch = '['; - if (t_ch == p_ch) - matched = 1; - continue; - } - if (CC_EQ(s,i, "alnum")) { - if (ISALNUM(t_ch)) - matched = 1; - } else if (CC_EQ(s,i, "alpha")) { - if (ISALPHA(t_ch)) - matched = 1; - } else if (CC_EQ(s,i, "blank")) { - if (ISBLANK(t_ch)) - matched = 1; - } else if (CC_EQ(s,i, "cntrl")) { - if (ISCNTRL(t_ch)) - matched = 1; - } else if (CC_EQ(s,i, "digit")) { - if (ISDIGIT(t_ch)) - matched = 1; - } else if (CC_EQ(s,i, "graph")) { - if (ISGRAPH(t_ch)) - matched = 1; - } else if (CC_EQ(s,i, "lower")) { - if (ISLOWER(t_ch)) - matched = 1; - } else if (CC_EQ(s,i, "print")) { - if (ISPRINT(t_ch)) - matched = 1; - } else if (CC_EQ(s,i, "punct")) { - if (ISPUNCT(t_ch)) - matched = 1; - } else if (CC_EQ(s,i, "space")) { - if (ISSPACE(t_ch)) - matched = 1; - } else if (CC_EQ(s,i, "upper")) { - if (ISUPPER(t_ch)) - matched = 1; - else if ((flags & WM_CASEFOLD) && ISLOWER(t_ch)) - matched = 1; - } else if (CC_EQ(s,i, "xdigit")) { - if (ISXDIGIT(t_ch)) - matched = 1; - } else /* malformed [:class:] string */ - return WM_ABORT_ALL; - p_ch = 0; /* This makes "prev_ch" get set to 0. */ - } else if (t_ch == p_ch) - matched = 1; - } while (prev_ch = p_ch, (p_ch = *++p) != ']'); - if (matched == negated || - ((flags & WM_PATHNAME) && t_ch == '/')) - return WM_NOMATCH; - continue; - } - } - - return *text ? WM_NOMATCH : WM_MATCH; -} - -/* Match the "pattern" against the "text" string. */ -static int wildmatch(const char *pattern, const char *text, unsigned int flags) -{ - // local modification: move WM_CASEFOLD here - if (ignore_case) - flags |= WM_CASEFOLD; - - return dowild((const uchar*)pattern, (const uchar*)text, flags); -} - -// Copied from dir.h - -#define PATTERN_FLAG_NODIR 1 -#define PATTERN_FLAG_ENDSWITH 4 -#define PATTERN_FLAG_MUSTBEDIR 8 -#define PATTERN_FLAG_NEGATIVE 16 - -// Copied from dir.c - -static int fspathncmp(const char *a, const char *b, size_t count) -{ - return ignore_case ? strncasecmp(a, b, count) : strncmp(a, b, count); -} - -static int simple_length(const char *match) -{ - int len = -1; - - for (;;) { - unsigned char c = *match++; - len++; - if (c == '\0' || is_glob_special(c)) - return len; - } -} - -static int no_wildcard(const char *string) -{ - return string[simple_length(string)] == '\0'; -} - -static void parse_path_pattern(const char **pattern, - int *patternlen, - unsigned *flags, - int *nowildcardlen) -{ - const char *p = *pattern; - size_t i, len; - - *flags = 0; - if (*p == '!') { - *flags |= PATTERN_FLAG_NEGATIVE; - p++; - } - len = strlen(p); - if (len && p[len - 1] == '/') { - len--; - *flags |= PATTERN_FLAG_MUSTBEDIR; - } - for (i = 0; i < len; i++) { - if (p[i] == '/') - break; - } - if (i == len) - *flags |= PATTERN_FLAG_NODIR; - *nowildcardlen = simple_length(p); - /* - * we should have excluded the trailing slash from 'p' too, - * but that's one more allocation. Instead just make sure - * nowildcardlen does not exceed real patternlen - */ - if (*nowildcardlen > len) - *nowildcardlen = len; - if (*p == '*' && no_wildcard(p + 1)) - *flags |= PATTERN_FLAG_ENDSWITH; - *pattern = p; - *patternlen = len; -} - -static void trim_trailing_spaces(char *buf) -{ - char *p, *last_space = NULL; - - for (p = buf; *p; p++) - switch (*p) { - case ' ': - if (!last_space) - last_space = p; - break; - case '\\': - p++; - if (!*p) - return; - /* fallthrough */ - default: - last_space = NULL; - } - - if (last_space) - *last_space = '\0'; -} - -static int match_basename(const char *basename, int basenamelen, - const char *pattern, int prefix, int patternlen, - unsigned flags) -{ - if (prefix == patternlen) { - if (patternlen == basenamelen && - !fspathncmp(pattern, basename, basenamelen)) - return 1; - } else if (flags & PATTERN_FLAG_ENDSWITH) { - /* "*literal" matching against "fooliteral" */ - if (patternlen - 1 <= basenamelen && - !fspathncmp(pattern + 1, - basename + basenamelen - (patternlen - 1), - patternlen - 1)) - return 1; - } else { - // local modification: call wildmatch() directly - if (!wildmatch(pattern, basename, flags)) - return 1; - } - return 0; -} - -static int match_pathname(const char *pathname, int pathlen, - const char *base, int baselen, - const char *pattern, int prefix, int patternlen) -{ - // local modification: remove local variables - - /* - * match with FNM_PATHNAME; the pattern has base implicitly - * in front of it. - */ - if (*pattern == '/') { - pattern++; - patternlen--; - prefix--; - } - - /* - * baselen does not count the trailing slash. base[] may or - * may not end with a trailing slash though. - */ - if (pathlen < baselen + 1 || - (baselen && pathname[baselen] != '/') || - fspathncmp(pathname, base, baselen)) - return 0; - - // local modification: simplified because always baselen > 0 - pathname += baselen + 1; - pathlen -= baselen + 1; - - if (prefix) { - /* - * if the non-wildcard part is longer than the - * remaining pathname, surely it cannot match. - */ - if (prefix > pathlen) - return 0; - - if (fspathncmp(pattern, pathname, prefix)) - return 0; - pattern += prefix; - patternlen -= prefix; - pathname += prefix; - pathlen -= prefix; - - /* - * If the whole pattern did not have a wildcard, - * then our prefix match is all we need; we - * do not need to call fnmatch at all. - */ - if (!patternlen && !pathlen) - return 1; - } - - // local modification: call wildmatch() directly - return !wildmatch(pattern, pathname, WM_PATHNAME); -} - -// Copied from git/utf8.c - -static const char utf8_bom[] = "\357\273\277"; - -//----------------------------(IMPORT FROM GIT END)---------------------------- - -struct pattern { - unsigned int flags; - int nowildcardlen; - int patternlen; - int dirlen; - char pattern[]; -}; - -static struct pattern **pattern_list; -static int nr_patterns, alloced_patterns; - -// Remember the number of patterns at each directory level -static int *nr_patterns_at; -// Track the current/max directory level; -static int depth, max_depth; -static bool debug_on; -static FILE *out_fp, *stat_fp; -static char *prefix = ""; -static char *progname; - -static void __attribute__((noreturn)) perror_exit(const char *s) -{ - perror(s); - - exit(EXIT_FAILURE); -} - -static void __attribute__((noreturn)) error_exit(const char *fmt, ...) -{ - va_list args; - - fprintf(stderr, "%s: error: ", progname); - - va_start(args, fmt); - vfprintf(stderr, fmt, args); - va_end(args); - - exit(EXIT_FAILURE); -} - -static void debug(const char *fmt, ...) -{ - va_list args; - int i; - - if (!debug_on) - return; - - fprintf(stderr, "[DEBUG] "); - - for (i = 0; i < depth * 2; i++) - fputc(' ', stderr); - - va_start(args, fmt); - vfprintf(stderr, fmt, args); - va_end(args); -} - -static void *xrealloc(void *ptr, size_t size) -{ - ptr = realloc(ptr, size); - if (!ptr) - perror_exit(progname); - - return ptr; -} - -static void *xmalloc(size_t size) -{ - return xrealloc(NULL, size); -} - -// similar to last_matching_pattern_from_list() in GIT -static bool is_ignored(const char *path, int pathlen, int dirlen, bool is_dir) -{ - int i; - - // Search in the reverse order because the last matching pattern wins. - for (i = nr_patterns - 1; i >= 0; i--) { - struct pattern *p = pattern_list[i]; - unsigned int flags = p->flags; - const char *gitignore_dir = p->pattern + p->patternlen + 1; - bool ignored; - - if ((flags & PATTERN_FLAG_MUSTBEDIR) && !is_dir) - continue; - - if (flags & PATTERN_FLAG_NODIR) { - if (!match_basename(path + dirlen + 1, - pathlen - dirlen - 1, - p->pattern, - p->nowildcardlen, - p->patternlen, - p->flags)) - continue; - } else { - if (!match_pathname(path, pathlen, - gitignore_dir, p->dirlen, - p->pattern, - p->nowildcardlen, - p->patternlen)) - continue; - } - - debug("%s: matches %s%s%s (%s/.gitignore)\n", path, - flags & PATTERN_FLAG_NEGATIVE ? "!" : "", p->pattern, - flags & PATTERN_FLAG_MUSTBEDIR ? "/" : "", - gitignore_dir); - - ignored = (flags & PATTERN_FLAG_NEGATIVE) == 0; - if (ignored) - debug("Ignore: %s\n", path); - - return ignored; - } - - debug("%s: no match\n", path); - - return false; -} - -static void add_pattern(const char *string, const char *dir, int dirlen) -{ - struct pattern *p; - int patternlen, nowildcardlen; - unsigned int flags; - - parse_path_pattern(&string, &patternlen, &flags, &nowildcardlen); - - if (patternlen == 0) - return; - - p = xmalloc(sizeof(*p) + patternlen + dirlen + 2); - - memcpy(p->pattern, string, patternlen); - p->pattern[patternlen] = 0; - memcpy(p->pattern + patternlen + 1, dir, dirlen); - p->pattern[patternlen + 1 + dirlen] = 0; - - p->patternlen = patternlen; - p->nowildcardlen = nowildcardlen; - p->dirlen = dirlen; - p->flags = flags; - - debug("Add pattern: %s%s%s\n", - flags & PATTERN_FLAG_NEGATIVE ? "!" : "", p->pattern, - flags & PATTERN_FLAG_MUSTBEDIR ? "/" : ""); - - if (nr_patterns >= alloced_patterns) { - alloced_patterns += 128; - pattern_list = xrealloc(pattern_list, - sizeof(*pattern_list) * alloced_patterns); - } - - pattern_list[nr_patterns++] = p; -} - -// similar to add_patterns_from_buffer() in GIT -static void add_patterns_from_gitignore(const char *dir, int dirlen) -{ - struct stat st; - char path[PATH_MAX], *buf, *entry; - size_t size; - int fd, pathlen, i; - - pathlen = snprintf(path, sizeof(path), "%s/.gitignore", dir); - if (pathlen >= sizeof(path)) - error_exit("%s: too long path was truncated\n", path); - - fd = open(path, O_RDONLY | O_NOFOLLOW); - if (fd < 0) { - if (errno != ENOENT) - return perror_exit(path); - return; - } - - if (fstat(fd, &st) < 0) - perror_exit(path); - - size = st.st_size; - - buf = xmalloc(size + 1); - if (read(fd, buf, st.st_size) != st.st_size) - perror_exit(path); - - buf[st.st_size] = '\n'; - if (close(fd)) - perror_exit(path); - - debug("Parse %s\n", path); - - entry = buf; - - // skip utf8 bom - if (!strncmp(entry, utf8_bom, strlen(utf8_bom))) - entry += strlen(utf8_bom); - - for (i = entry - buf; i < size; i++) { - if (buf[i] == '\n') { - if (entry != buf + i && entry[0] != '#') { - buf[i - (i && buf[i-1] == '\r')] = 0; - trim_trailing_spaces(entry); - add_pattern(entry, dir, dirlen); - } - entry = buf + i + 1; - } - } - - free(buf); -} - -// Save the current number of patterns and increment the depth -static void increment_depth(void) -{ - if (depth >= max_depth) { - max_depth += 1; - nr_patterns_at = xrealloc(nr_patterns_at, - sizeof(*nr_patterns_at) * max_depth); - } - - nr_patterns_at[depth] = nr_patterns; - depth++; -} - -// Decrement the depth, and free up the patterns of this directory level. -static void decrement_depth(void) -{ - depth--; - assert(depth >= 0); - - while (nr_patterns > nr_patterns_at[depth]) - free(pattern_list[--nr_patterns]); -} - -static void print_path(const char *path) -{ - // The path always starts with "./" - assert(strlen(path) >= 2); - - // Replace the root directory with a preferred prefix. - // This is useful for the tar command. - fprintf(out_fp, "%s%s\n", prefix, path + 2); -} - -static void print_stat(const char *path, struct stat *st) -{ - if (!stat_fp) - return; - - if (!S_ISREG(st->st_mode) && !S_ISLNK(st->st_mode)) - return; - - assert(strlen(path) >= 2); - - fprintf(stat_fp, "%c %9ld %10ld %s\n", - S_ISLNK(st->st_mode) ? 'l' : '-', - st->st_size, st->st_mtim.tv_sec, path + 2); -} - -// Traverse the entire directory tree, parsing .gitignore files. -// Print file paths that are not tracked by git. -// -// Return true if all files under the directory are ignored, false otherwise. -static bool traverse_directory(const char *dir, int dirlen) -{ - bool all_ignored = true; - DIR *dirp; - - debug("Enter[%d]: %s\n", depth, dir); - increment_depth(); - - add_patterns_from_gitignore(dir, dirlen); - - dirp = opendir(dir); - if (!dirp) - perror_exit(dir); - - while (1) { - struct dirent *d; - struct stat st; - char path[PATH_MAX]; - int pathlen; - bool ignored; - - errno = 0; - d = readdir(dirp); - if (!d) { - if (errno) - perror_exit(dir); - break; - } - - if (!strcmp(d->d_name, "..") || !strcmp(d->d_name, ".")) - continue; - - pathlen = snprintf(path, sizeof(path), "%s/%s", dir, d->d_name); - if (pathlen >= sizeof(path)) - error_exit("%s: too long path was truncated\n", path); - - if (lstat(path, &st) < 0) - perror_exit(path); - - if ((!S_ISREG(st.st_mode) && !S_ISDIR(st.st_mode) && !S_ISLNK(st.st_mode)) || - is_ignored(path, pathlen, dirlen, S_ISDIR(st.st_mode))) { - ignored = true; - } else { - if (S_ISDIR(st.st_mode) && !S_ISLNK(st.st_mode)) - // If all the files in a directory are ignored, - // let's ignore that directory as well. This - // will avoid empty directories in the tarball. - ignored = traverse_directory(path, pathlen); - else - ignored = false; - } - - if (ignored) { - print_path(path); - } else { - print_stat(path, &st); - all_ignored = false; - } - } - - if (closedir(dirp)) - perror_exit(dir); - - decrement_depth(); - debug("Leave[%d]: %s\n", depth, dir); - - return all_ignored; -} - -static void usage(void) -{ - fprintf(stderr, - "usage: %s [options]\n" - "\n" - "Show files that are ignored by git\n" - "\n" - "options:\n" - " -d, --debug print debug messages to stderr\n" - " -e, --exclude PATTERN add the given exclude pattern\n" - " -h, --help show this help message and exit\n" - " -i, --ignore-case Ignore case differences between the patterns and the files\n" - " -o, --output FILE output the ignored files to a file (default: '-', i.e. stdout)\n" - " -p, --prefix PREFIX prefix added to each path (default: empty string)\n" - " -r, --rootdir DIR root of the source tree (default: current working directory)\n" - " -s, --stat FILE output the file stat of non-ignored files to a file\n", - progname); -} - -static void open_output(const char *pathname, FILE **fp) -{ - if (strcmp(pathname, "-")) { - *fp = fopen(pathname, "w"); - if (!*fp) - perror_exit(pathname); - } else { - *fp = stdout; - } -} - -static void close_output(const char *pathname, FILE *fp) -{ - fflush(fp); - - if (ferror(fp)) - error_exit("not all data was written to the output\n"); - - if (fclose(fp)) - perror_exit(pathname); -} - -int main(int argc, char *argv[]) -{ - const char *output = "-"; - const char *rootdir = "."; - const char *stat = NULL; - - progname = strrchr(argv[0], '/'); - if (progname) - progname++; - else - progname = argv[0]; - - while (1) { - static struct option long_options[] = { - {"debug", no_argument, NULL, 'd'}, - {"help", no_argument, NULL, 'h'}, - {"ignore-case", no_argument, NULL, 'i'}, - {"output", required_argument, NULL, 'o'}, - {"prefix", required_argument, NULL, 'p'}, - {"rootdir", required_argument, NULL, 'r'}, - {"stat", required_argument, NULL, 's'}, - {"exclude", required_argument, NULL, 'x'}, - {}, - }; - - int c = getopt_long(argc, argv, "dhino:p:r:s:x:", long_options, NULL); - - if (c == -1) - break; - - switch (c) { - case 'd': - debug_on = true; - break; - case 'h': - usage(); - exit(0); - case 'i': - ignore_case = true; - break; - case 'o': - output = optarg; - break; - case 'p': - prefix = optarg; - break; - case 'r': - rootdir = optarg; - break; - case 's': - stat = optarg; - break; - case 'x': - add_pattern(optarg, ".", strlen(".")); - break; - case '?': - usage(); - /* fallthrough */ - default: - exit(EXIT_FAILURE); - } - } - - open_output(output, &out_fp); - if (stat && stat[0]) - open_output(stat, &stat_fp); - - if (chdir(rootdir)) - perror_exit(rootdir); - - add_pattern(".git/", ".", strlen(".")); - - if (traverse_directory(".", strlen("."))) - print_path("./"); - - assert(depth == 0); - - while (nr_patterns > 0) - free(pattern_list[--nr_patterns]); - free(pattern_list); - free(nr_patterns_at); - - close_output(output, out_fp); - if (stat_fp) - close_output(stat, stat_fp); - - return 0; -} |