]> git.ipfire.org Git - thirdparty/git.git/blobdiff - grep.c
Merge branch 'ab/detox-gettext-tests'
[thirdparty/git.git] / grep.c
diff --git a/grep.c b/grep.c
index efeb6dc58dbec7dad4ea715909d92780faa27ee0..aabfaaa4c32e48569805dbe631db64fcf7b773c6 100644 (file)
--- a/grep.c
+++ b/grep.c
@@ -166,11 +166,6 @@ void grep_init(struct grep_opt *opt, struct repository *repo, const char *prefix
                                        pcre2_malloc, pcre2_free, NULL);
 #endif
 
-#ifdef USE_LIBPCRE1
-       pcre_malloc = malloc;
-       pcre_free = free;
-#endif
-
        *opt = grep_defaults;
 
        opt->repo = repo;
@@ -223,17 +218,7 @@ static void grep_set_pattern_type_option(enum grep_pattern_type pattern_type, st
                break;
 
        case GREP_PATTERN_TYPE_PCRE:
-#ifdef USE_LIBPCRE2
                opt->pcre2 = 1;
-#else
-               /*
-                * It's important that pcre1 always be assigned to
-                * even when there's no USE_LIBPCRE* defined. We still
-                * call the PCRE stub function, it just dies with
-                * "cannot use Perl-compatible regexes[...]".
-                */
-               opt->pcre1 = 1;
-#endif
                break;
        }
 }
@@ -377,92 +362,6 @@ static int is_fixed(const char *s, size_t len)
        return 1;
 }
 
-#ifdef USE_LIBPCRE1
-static void compile_pcre1_regexp(struct grep_pat *p, const struct grep_opt *opt)
-{
-       const char *error;
-       int erroffset;
-       int options = PCRE_MULTILINE;
-       int study_options = 0;
-
-       if (opt->ignore_case) {
-               if (!opt->ignore_locale && has_non_ascii(p->pattern))
-                       p->pcre1_tables = pcre_maketables();
-               options |= PCRE_CASELESS;
-       }
-       if (!opt->ignore_locale && is_utf8_locale() && has_non_ascii(p->pattern))
-               options |= PCRE_UTF8;
-
-       p->pcre1_regexp = pcre_compile(p->pattern, options, &error, &erroffset,
-                                     p->pcre1_tables);
-       if (!p->pcre1_regexp)
-               compile_regexp_failed(p, error);
-
-#if defined(PCRE_CONFIG_JIT) && !defined(NO_LIBPCRE1_JIT)
-       pcre_config(PCRE_CONFIG_JIT, &p->pcre1_jit_on);
-       if (opt->debug)
-               fprintf(stderr, "pcre1_jit_on=%d\n", p->pcre1_jit_on);
-
-       if (p->pcre1_jit_on)
-               study_options = PCRE_STUDY_JIT_COMPILE;
-#endif
-
-       p->pcre1_extra_info = pcre_study(p->pcre1_regexp, study_options, &error);
-       if (!p->pcre1_extra_info && error)
-               die("%s", error);
-}
-
-static int pcre1match(struct grep_pat *p, const char *line, const char *eol,
-               regmatch_t *match, int eflags)
-{
-       int ovector[30], ret, flags = PCRE_NO_UTF8_CHECK;
-
-       if (eflags & REG_NOTBOL)
-               flags |= PCRE_NOTBOL;
-
-       ret = pcre_exec(p->pcre1_regexp, p->pcre1_extra_info, line,
-                       eol - line, 0, flags, ovector,
-                       ARRAY_SIZE(ovector));
-
-       if (ret < 0 && ret != PCRE_ERROR_NOMATCH)
-               die("pcre_exec failed with error code %d", ret);
-       if (ret > 0) {
-               ret = 0;
-               match->rm_so = ovector[0];
-               match->rm_eo = ovector[1];
-       }
-
-       return ret;
-}
-
-static void free_pcre1_regexp(struct grep_pat *p)
-{
-       pcre_free(p->pcre1_regexp);
-#ifdef PCRE_CONFIG_JIT
-       if (p->pcre1_jit_on)
-               pcre_free_study(p->pcre1_extra_info);
-       else
-#endif
-               pcre_free(p->pcre1_extra_info);
-       pcre_free((void *)p->pcre1_tables);
-}
-#else /* !USE_LIBPCRE1 */
-static void compile_pcre1_regexp(struct grep_pat *p, const struct grep_opt *opt)
-{
-       die("cannot use Perl-compatible regexes when not compiled with USE_LIBPCRE");
-}
-
-static int pcre1match(struct grep_pat *p, const char *line, const char *eol,
-               regmatch_t *match, int eflags)
-{
-       return 1;
-}
-
-static void free_pcre1_regexp(struct grep_pat *p)
-{
-}
-#endif /* !USE_LIBPCRE1 */
-
 #ifdef USE_LIBPCRE2
 static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt)
 {
@@ -492,7 +391,23 @@ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt
        }
        if (!opt->ignore_locale && is_utf8_locale() && has_non_ascii(p->pattern) &&
            !(!opt->ignore_case && (p->fixed || p->is_fixed)))
-               options |= PCRE2_UTF;
+               options |= (PCRE2_UTF | PCRE2_MATCH_INVALID_UTF);
+
+       /* Work around https://bugs.exim.org/show_bug.cgi?id=2642 fixed in 10.36 */
+       if (PCRE2_MATCH_INVALID_UTF && options & (PCRE2_UTF | PCRE2_CASELESS)) {
+               struct strbuf buf;
+               int len;
+               int err;
+
+               if ((len = pcre2_config(PCRE2_CONFIG_VERSION, NULL)) < 0)
+                       BUG("pcre2_config(..., NULL) failed: %d", len);
+               strbuf_init(&buf, len + 1);
+               if ((err = pcre2_config(PCRE2_CONFIG_VERSION, buf.buf)) < 0)
+                       BUG("pcre2_config(..., buf.buf) failed: %d", err);
+               if (versioncmp(buf.buf, "10.36") < 0)
+                       options |= PCRE2_NO_START_OPTIMIZE;
+               strbuf_release(&buf);
+       }
 
        p->pcre2_pattern = pcre2_compile((PCRE2_SPTR)p->pattern,
                                         p->patternlen, options, &error, &erroffset,
@@ -508,8 +423,6 @@ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt
        }
 
        pcre2_config(PCRE2_CONFIG_JIT, &p->pcre2_jit_on);
-       if (opt->debug)
-               fprintf(stderr, "pcre2_jit_on=%d\n", p->pcre2_jit_on);
        if (p->pcre2_jit_on) {
                jitret = pcre2_jit_compile(p->pcre2_pattern, PCRE2_JIT_COMPLETE);
                if (jitret)
@@ -535,9 +448,6 @@ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt
                        BUG("pcre2_pattern_info() failed: %d", patinforet);
                if (jitsizearg == 0) {
                        p->pcre2_jit_on = 0;
-                       if (opt->debug)
-                               fprintf(stderr, "pcre2_jit_on=%d: (*NO_JIT) in regex\n",
-                                       p->pcre2_jit_on);
                        return;
                }
        }
@@ -588,11 +498,6 @@ static void free_pcre2_pattern(struct grep_pat *p)
 #else /* !USE_LIBPCRE2 */
 static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt)
 {
-       /*
-        * Unreachable until USE_LIBPCRE2 becomes synonymous with
-        * USE_LIBPCRE. See the sibling comment in
-        * grep_set_pattern_type_option().
-        */
        die("cannot use Perl-compatible regexes when not compiled with USE_LIBPCRE");
 }
 
@@ -616,8 +521,6 @@ static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt)
        if (opt->ignore_case)
                regflags |= REG_ICASE;
        err = regcomp(&p->regexp, sb.buf, regflags);
-       if (opt->debug)
-               fprintf(stderr, "fixed %s\n", sb.buf);
        strbuf_release(&sb);
        if (err) {
                char errbuf[1024];
@@ -693,11 +596,6 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
                return;
        }
 
-       if (opt->pcre1) {
-               compile_pcre1_regexp(p, opt);
-               return;
-       }
-
        if (p->ignore_case)
                regflags |= REG_ICASE;
        if (opt->extended_regexp_option)
@@ -812,87 +710,6 @@ static struct grep_expr *compile_pattern_expr(struct grep_pat **list)
        return compile_pattern_or(list);
 }
 
-static void indent(int in)
-{
-       while (in-- > 0)
-               fputc(' ', stderr);
-}
-
-static void dump_grep_pat(struct grep_pat *p)
-{
-       switch (p->token) {
-       case GREP_AND: fprintf(stderr, "*and*"); break;
-       case GREP_OPEN_PAREN: fprintf(stderr, "*(*"); break;
-       case GREP_CLOSE_PAREN: fprintf(stderr, "*)*"); break;
-       case GREP_NOT: fprintf(stderr, "*not*"); break;
-       case GREP_OR: fprintf(stderr, "*or*"); break;
-
-       case GREP_PATTERN: fprintf(stderr, "pattern"); break;
-       case GREP_PATTERN_HEAD: fprintf(stderr, "pattern_head"); break;
-       case GREP_PATTERN_BODY: fprintf(stderr, "pattern_body"); break;
-       }
-
-       switch (p->token) {
-       default: break;
-       case GREP_PATTERN_HEAD:
-               fprintf(stderr, "<head %d>", p->field); break;
-       case GREP_PATTERN_BODY:
-               fprintf(stderr, "<body>"); break;
-       }
-       switch (p->token) {
-       default: break;
-       case GREP_PATTERN_HEAD:
-       case GREP_PATTERN_BODY:
-       case GREP_PATTERN:
-               fprintf(stderr, "%.*s", (int)p->patternlen, p->pattern);
-               break;
-       }
-       fputc('\n', stderr);
-}
-
-static void dump_grep_expression_1(struct grep_expr *x, int in)
-{
-       indent(in);
-       switch (x->node) {
-       case GREP_NODE_TRUE:
-               fprintf(stderr, "true\n");
-               break;
-       case GREP_NODE_ATOM:
-               dump_grep_pat(x->u.atom);
-               break;
-       case GREP_NODE_NOT:
-               fprintf(stderr, "(not\n");
-               dump_grep_expression_1(x->u.unary, in+1);
-               indent(in);
-               fprintf(stderr, ")\n");
-               break;
-       case GREP_NODE_AND:
-               fprintf(stderr, "(and\n");
-               dump_grep_expression_1(x->u.binary.left, in+1);
-               dump_grep_expression_1(x->u.binary.right, in+1);
-               indent(in);
-               fprintf(stderr, ")\n");
-               break;
-       case GREP_NODE_OR:
-               fprintf(stderr, "(or\n");
-               dump_grep_expression_1(x->u.binary.left, in+1);
-               dump_grep_expression_1(x->u.binary.right, in+1);
-               indent(in);
-               fprintf(stderr, ")\n");
-               break;
-       }
-}
-
-static void dump_grep_expression(struct grep_opt *opt)
-{
-       struct grep_expr *x = opt->pattern_expression;
-
-       if (opt->all_match)
-               fprintf(stderr, "[all-match]\n");
-       dump_grep_expression_1(x, 0);
-       fflush(NULL);
-}
-
 static struct grep_expr *grep_true_expr(void)
 {
        struct grep_expr *z = xcalloc(1, sizeof(*z));
@@ -973,7 +790,7 @@ static struct grep_expr *grep_splice_or(struct grep_expr *x, struct grep_expr *y
        return z;
 }
 
-static void compile_grep_patterns_real(struct grep_opt *opt)
+void compile_grep_patterns(struct grep_opt *opt)
 {
        struct grep_pat *p;
        struct grep_expr *header_expr = prep_header_patterns(opt);
@@ -993,7 +810,7 @@ static void compile_grep_patterns_real(struct grep_opt *opt)
 
        if (opt->all_match || header_expr)
                opt->extended = 1;
-       else if (!opt->extended && !opt->debug)
+       else if (!opt->extended)
                return;
 
        p = opt->pattern_list;
@@ -1016,13 +833,6 @@ static void compile_grep_patterns_real(struct grep_opt *opt)
        opt->all_match = 1;
 }
 
-void compile_grep_patterns(struct grep_opt *opt)
-{
-       compile_grep_patterns_real(opt);
-       if (opt->debug)
-               dump_grep_expression(opt);
-}
-
 static void free_pattern_expr(struct grep_expr *x)
 {
        switch (x->node) {
@@ -1051,9 +861,7 @@ void free_grep_patterns(struct grep_opt *opt)
                case GREP_PATTERN: /* atom */
                case GREP_PATTERN_HEAD:
                case GREP_PATTERN_BODY:
-                       if (p->pcre1_regexp)
-                               free_pcre1_regexp(p);
-                       else if (p->pcre2_pattern)
+                       if (p->pcre2_pattern)
                                free_pcre2_pattern(p);
                        else
                                regfree(&p->regexp);
@@ -1116,9 +924,7 @@ static int patmatch(struct grep_pat *p, char *line, char *eol,
 {
        int hit;
 
-       if (p->pcre1_regexp)
-               hit = !pcre1match(p, line, eol, match, eflags);
-       else if (p->pcre2_pattern)
+       if (p->pcre2_pattern)
                hit = !pcre2match(p, line, eol, match, eflags);
        else
                hit = !regexec_buf(&p->regexp, line, eol - line, 1, match,