]> git.ipfire.org Git - thirdparty/git.git/blame - grep.c
Git 2.45-rc1
[thirdparty/git.git] / grep.c
CommitLineData
4f6728d5 1#include "git-compat-util.h"
b2141fc1 2#include "config.h"
f394e093 3#include "gettext.h"
83b5d2f5 4#include "grep.h"
41771fa4 5#include "hex.h"
a034e910 6#include "object-store-ll.h"
d4a4f929 7#include "pretty.h"
60ecac98 8#include "userdiff.h"
6bfce93e 9#include "xdiff-interface.h"
335ec3bf
JK
10#include "diff.h"
11#include "diffcore.h"
793dc676 12#include "quote.h"
3ac68a93 13#include "help.h"
83b5d2f5 14
07a7d656 15static int grep_source_load(struct grep_source *gs);
acd00ea0
NTND
16static int grep_source_is_binary(struct grep_source *gs,
17 struct index_state *istate);
07a7d656 18
bcba4462 19static void std_output(struct grep_opt *opt UNUSED, const void *buf, size_t size)
96313423
20{
21 fwrite(buf, size, 1, stdout);
22}
23
fa151dc5
NTND
24static const char *color_grep_slots[] = {
25 [GREP_COLOR_CONTEXT] = "context",
26 [GREP_COLOR_FILENAME] = "filename",
27 [GREP_COLOR_FUNCTION] = "function",
28 [GREP_COLOR_LINENO] = "lineNumber",
d036d667 29 [GREP_COLOR_COLUMNNO] = "column",
fa151dc5
NTND
30 [GREP_COLOR_MATCH_CONTEXT] = "matchContext",
31 [GREP_COLOR_MATCH_SELECTED] = "matchSelected",
32 [GREP_COLOR_SELECTED] = "selected",
33 [GREP_COLOR_SEP] = "separator",
34};
35
7687a054
JH
36static int parse_pattern_type_arg(const char *opt, const char *arg)
37{
38 if (!strcmp(arg, "default"))
39 return GREP_PATTERN_TYPE_UNSPECIFIED;
40 else if (!strcmp(arg, "basic"))
41 return GREP_PATTERN_TYPE_BRE;
42 else if (!strcmp(arg, "extended"))
43 return GREP_PATTERN_TYPE_ERE;
44 else if (!strcmp(arg, "fixed"))
45 return GREP_PATTERN_TYPE_FIXED;
46 else if (!strcmp(arg, "perl"))
47 return GREP_PATTERN_TYPE_PCRE;
48 die("bad %s argument: %s", opt, arg);
49}
50
3ac68a93
NTND
51define_list_config_array_extra(color_grep_slots, {"match"});
52
7687a054
JH
53/*
54 * Read the configuration file once and store it in
55 * the grep_defaults template.
56 */
a4e7e317
GC
57int grep_config(const char *var, const char *value,
58 const struct config_context *ctx, void *cb)
7687a054 59{
72365bb4 60 struct grep_opt *opt = cb;
fa151dc5 61 const char *slot;
7687a054
JH
62
63 if (userdiff_config(var, value) < 0)
64 return -1;
65
66 if (!strcmp(var, "grep.extendedregexp")) {
c7e38551 67 opt->extended_regexp_option = git_config_bool(var, value);
7687a054
JH
68 return 0;
69 }
70
71 if (!strcmp(var, "grep.patterntype")) {
72 opt->pattern_type_option = parse_pattern_type_arg(var, value);
73 return 0;
74 }
75
76 if (!strcmp(var, "grep.linenumber")) {
77 opt->linenum = git_config_bool(var, value);
78 return 0;
79 }
6653fec3
TB
80 if (!strcmp(var, "grep.column")) {
81 opt->columnnum = git_config_bool(var, value);
82 return 0;
83 }
7687a054 84
6453f7b3
AS
85 if (!strcmp(var, "grep.fullname")) {
86 opt->relative = !git_config_bool(var, value);
87 return 0;
88 }
89
7687a054
JH
90 if (!strcmp(var, "color.grep"))
91 opt->color = git_config_colorbool(var, value);
fa151dc5 92 if (!strcmp(var, "color.grep.match")) {
a4e7e317 93 if (grep_config("color.grep.matchcontext", value, ctx, cb) < 0)
fa151dc5 94 return -1;
a4e7e317 95 if (grep_config("color.grep.matchselected", value, ctx, cb) < 0)
fa151dc5
NTND
96 return -1;
97 } else if (skip_prefix(var, "color.grep.", &slot)) {
98 int i = LOOKUP_CONFIG(color_grep_slots, slot);
99 char *color;
100
101 if (i < 0)
102 return -1;
103 color = opt->colors[i];
7687a054
JH
104 if (!value)
105 return config_error_nonbool(var);
f6c5a296 106 return color_parse(value, color);
7687a054
JH
107 }
108 return 0;
109}
110
9725c8dd 111void grep_init(struct grep_opt *opt, struct repository *repo)
7687a054 112{
72365bb4
ÆAB
113 struct grep_opt blank = GREP_OPT_INIT;
114 memcpy(opt, &blank, sizeof(*opt));
6ba9bb76 115
38bbc2ea 116 opt->repo = repo;
7687a054
JH
117 opt->pattern_tail = &opt->pattern_list;
118 opt->header_tail = &opt->header_list;
7687a054 119}
07a7d656 120
fc456751
RS
121static struct grep_pat *create_grep_pat(const char *pat, size_t patlen,
122 const char *origin, int no,
123 enum grep_pat_token t,
124 enum grep_header_field field)
a4d7d2c6
JH
125{
126 struct grep_pat *p = xcalloc(1, sizeof(*p));
526a858a 127 p->pattern = xmemdupz(pat, patlen);
fc456751
RS
128 p->patternlen = patlen;
129 p->origin = origin;
130 p->no = no;
131 p->token = t;
a4d7d2c6 132 p->field = field;
fc456751
RS
133 return p;
134}
135
2b3873ff
RS
136static void do_append_grep_pat(struct grep_pat ***tail, struct grep_pat *p)
137{
138 **tail = p;
139 *tail = &p->next;
a4d7d2c6 140 p->next = NULL;
526a858a
RS
141
142 switch (p->token) {
143 case GREP_PATTERN: /* atom */
144 case GREP_PATTERN_HEAD:
145 case GREP_PATTERN_BODY:
146 for (;;) {
147 struct grep_pat *new_pat;
148 size_t len = 0;
149 char *cp = p->pattern + p->patternlen, *nl = NULL;
150 while (++len <= p->patternlen) {
151 if (*(--cp) == '\n') {
152 nl = cp;
153 break;
154 }
155 }
156 if (!nl)
157 break;
158 new_pat = create_grep_pat(nl + 1, len - 1, p->origin,
159 p->no, p->token, p->field);
160 new_pat->next = p->next;
161 if (!p->next)
162 *tail = &new_pat->next;
163 p->next = new_pat;
164 *nl = '\0';
165 p->patternlen -= len;
166 }
167 break;
168 default:
169 break;
170 }
2b3873ff
RS
171}
172
fc456751
RS
173void append_header_grep_pattern(struct grep_opt *opt,
174 enum grep_header_field field, const char *pat)
175{
176 struct grep_pat *p = create_grep_pat(pat, strlen(pat), "header", 0,
177 GREP_PATTERN_HEAD, field);
baa6378f
JH
178 if (field == GREP_HEADER_REFLOG)
179 opt->use_reflog_filter = 1;
2b3873ff 180 do_append_grep_pat(&opt->header_tail, p);
a4d7d2c6
JH
181}
182
83b5d2f5
JH
183void append_grep_pattern(struct grep_opt *opt, const char *pat,
184 const char *origin, int no, enum grep_pat_token t)
ed40a095
RS
185{
186 append_grep_pat(opt, pat, strlen(pat), origin, no, t);
187}
188
189void append_grep_pat(struct grep_opt *opt, const char *pat, size_t patlen,
190 const char *origin, int no, enum grep_pat_token t)
83b5d2f5 191{
fc456751 192 struct grep_pat *p = create_grep_pat(pat, patlen, origin, no, t, 0);
2b3873ff 193 do_append_grep_pat(&opt->pattern_tail, p);
83b5d2f5
JH
194}
195
5b594f45
FK
196struct grep_opt *grep_opt_dup(const struct grep_opt *opt)
197{
198 struct grep_pat *pat;
199 struct grep_opt *ret = xmalloc(sizeof(struct grep_opt));
200 *ret = *opt;
201
202 ret->pattern_list = NULL;
203 ret->pattern_tail = &ret->pattern_list;
204
205 for(pat = opt->pattern_list; pat != NULL; pat = pat->next)
206 {
207 if(pat->token == GREP_PATTERN_HEAD)
208 append_header_grep_pattern(ret, pat->field,
209 pat->pattern);
210 else
ed40a095
RS
211 append_grep_pat(ret, pat->pattern, pat->patternlen,
212 pat->origin, pat->no, pat->token);
5b594f45
FK
213 }
214
215 return ret;
216}
217
a30c148a
MK
218static NORETURN void compile_regexp_failed(const struct grep_pat *p,
219 const char *error)
220{
221 char where[1024];
222
223 if (p->no)
19bdd3e7 224 xsnprintf(where, sizeof(where), "In '%s' at %d, ", p->origin, p->no);
a30c148a 225 else if (p->origin)
19bdd3e7 226 xsnprintf(where, sizeof(where), "%s, ", p->origin);
a30c148a
MK
227 else
228 where[0] = 0;
229
230 die("%s'%s': %s", where, p->pattern, error);
231}
232
543f1c0c
ÆAB
233static int is_fixed(const char *s, size_t len)
234{
235 size_t i;
236
237 for (i = 0; i < len; i++) {
238 if (is_regex_special(s[i]))
239 return 0;
240 }
241
242 return 1;
243}
244
94da9193 245#ifdef USE_LIBPCRE2
c1760352
ÆAB
246#define GREP_PCRE2_DEBUG_MALLOC 0
247
248static void *pcre2_malloc(PCRE2_SIZE size, MAYBE_UNUSED void *memory_data)
249{
250 void *pointer = malloc(size);
251#if GREP_PCRE2_DEBUG_MALLOC
252 static int count = 1;
253 fprintf(stderr, "PCRE2:%p -> #%02d: alloc(%lu)\n", pointer, count++, size);
254#endif
255 return pointer;
256}
257
258static void pcre2_free(void *pointer, MAYBE_UNUSED void *memory_data)
259{
260#if GREP_PCRE2_DEBUG_MALLOC
261 static int count = 1;
262 if (pointer)
263 fprintf(stderr, "PCRE2:%p -> #%02d: free()\n", pointer, count++);
264#endif
265 free(pointer);
266}
267
50b6ad55
MK
268static int pcre2_jit_functional(void)
269{
270 static int jit_working = -1;
271 pcre2_code *code;
272 size_t off;
273 int err;
274
275 if (jit_working != -1)
276 return jit_working;
277
278 /*
279 * Try to JIT compile a simple pattern to probe if the JIT is
280 * working in general. It might fail for systems where creating
281 * memory mappings for runtime code generation is restricted.
282 */
283 code = pcre2_compile((PCRE2_SPTR)".", 1, 0, &err, &off, NULL);
284 if (!code)
285 return 0;
286
287 jit_working = pcre2_jit_compile(code, PCRE2_JIT_COMPLETE) == 0;
288 pcre2_code_free(code);
289
290 return jit_working;
291}
292
94da9193
ÆAB
293static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt)
294{
295 int error;
296 PCRE2_UCHAR errbuf[256];
297 PCRE2_SIZE erroffset;
298 int options = PCRE2_MULTILINE;
94da9193 299 int jitret;
a25b9085
ÆAB
300 int patinforet;
301 size_t jitsizearg;
32e3e8bc 302 int literal = !opt->ignore_case && (p->fixed || p->is_fixed);
94da9193 303
cbe81e65
ÆAB
304 /*
305 * Call pcre2_general_context_create() before calling any
306 * other pcre2_*(). It sets up our malloc()/free() functions
307 * with which everything else is allocated.
308 */
309 p->pcre2_general_context = pcre2_general_context_create(
310 pcre2_malloc, pcre2_free, NULL);
311 if (!p->pcre2_general_context)
312 die("Couldn't allocate PCRE2 general context");
94da9193
ÆAB
313
314 if (opt->ignore_case) {
44570188 315 if (!opt->ignore_locale && has_non_ascii(p->pattern)) {
cbe81e65
ÆAB
316 p->pcre2_tables = pcre2_maketables(p->pcre2_general_context);
317 p->pcre2_compile_context = pcre2_compile_context_create(p->pcre2_general_context);
10da030a
CMAB
318 pcre2_set_character_tables(p->pcre2_compile_context,
319 p->pcre2_tables);
94da9193
ÆAB
320 }
321 options |= PCRE2_CASELESS;
322 }
32e3e8bc 323 if (!opt->ignore_locale && is_utf8_locale() && !literal)
acabd204 324 options |= (PCRE2_UTF | PCRE2_UCP | PCRE2_MATCH_INVALID_UTF);
95ca1f98 325
14b9a044
MK
326#ifndef GIT_PCRE2_VERSION_10_35_OR_HIGHER
327 /*
328 * Work around a JIT bug related to invalid Unicode character handling
329 * fixed in 10.35:
330 * https://github.com/PCRE2Project/pcre2/commit/c21bd977547d
331 */
332 options &= ~PCRE2_UCP;
333#endif
334
97169fc3 335#ifndef GIT_PCRE2_VERSION_10_36_OR_HIGHER
95ca1f98 336 /* Work around https://bugs.exim.org/show_bug.cgi?id=2642 fixed in 10.36 */
797c3599
ÆAB
337 if (PCRE2_MATCH_INVALID_UTF && options & (PCRE2_UTF | PCRE2_CASELESS))
338 options |= PCRE2_NO_START_OPTIMIZE;
339#endif
94da9193
ÆAB
340
341 p->pcre2_pattern = pcre2_compile((PCRE2_SPTR)p->pattern,
342 p->patternlen, options, &error, &erroffset,
343 p->pcre2_compile_context);
344
345 if (p->pcre2_pattern) {
cbe81e65 346 p->pcre2_match_data = pcre2_match_data_create_from_pattern(p->pcre2_pattern, p->pcre2_general_context);
94da9193
ÆAB
347 if (!p->pcre2_match_data)
348 die("Couldn't allocate PCRE2 match data");
349 } else {
350 pcre2_get_error_message(error, errbuf, sizeof(errbuf));
351 compile_regexp_failed(p, (const char *)&errbuf);
352 }
353
354 pcre2_config(PCRE2_CONFIG_JIT, &p->pcre2_jit_on);
04bef50c 355 if (p->pcre2_jit_on) {
94da9193 356 jitret = pcre2_jit_compile(p->pcre2_pattern, PCRE2_JIT_COMPLETE);
50b6ad55
MK
357 if (jitret == PCRE2_ERROR_NOMEMORY && !pcre2_jit_functional()) {
358 /*
359 * Even though pcre2_config(PCRE2_CONFIG_JIT, ...)
360 * indicated JIT support, the library might still
361 * fail to generate JIT code for various reasons,
362 * e.g. when SELinux's 'deny_execmem' or PaX's
363 * MPROTECT prevent creating W|X memory mappings.
364 *
365 * Instead of faling hard, fall back to interpreter
366 * mode, just as if the pattern was prefixed with
367 * '(*NO_JIT)'.
368 */
369 p->pcre2_jit_on = 0;
370 return;
371 } else if (jitret) {
372 int need_clip = p->patternlen > 64;
373 int clip_len = need_clip ? 64 : p->patternlen;
374 die("Couldn't JIT the PCRE2 pattern '%.*s'%s, got '%d'%s",
375 clip_len, p->pattern, need_clip ? "..." : "", jitret,
376 pcre2_jit_functional()
377 ? "\nPerhaps prefix (*NO_JIT) to your pattern?"
378 : "");
379 }
a25b9085
ÆAB
380
381 /*
382 * The pcre2_config(PCRE2_CONFIG_JIT, ...) call just
383 * tells us whether the library itself supports JIT,
384 * but to see whether we're going to be actually using
385 * JIT we need to extract PCRE2_INFO_JITSIZE from the
386 * pattern *after* we do pcre2_jit_compile() above.
387 *
388 * This is because if the pattern contains the
389 * (*NO_JIT) verb (see pcre2syntax(3))
390 * pcre2_jit_compile() will exit early with 0. If we
391 * then proceed to call pcre2_jit_match() further down
392 * the line instead of pcre2_match() we'll either
393 * segfault (pre PCRE 10.31) or run into a fatal error
394 * (post PCRE2 10.31)
395 */
396 patinforet = pcre2_pattern_info(p->pcre2_pattern, PCRE2_INFO_JITSIZE, &jitsizearg);
397 if (patinforet)
398 BUG("pcre2_pattern_info() failed: %d", patinforet);
399 if (jitsizearg == 0) {
400 p->pcre2_jit_on = 0;
401 return;
402 }
94da9193
ÆAB
403 }
404}
405
406static int pcre2match(struct grep_pat *p, const char *line, const char *eol,
407 regmatch_t *match, int eflags)
408{
409 int ret, flags = 0;
410 PCRE2_SIZE *ovector;
411 PCRE2_UCHAR errbuf[256];
412
413 if (eflags & REG_NOTBOL)
414 flags |= PCRE2_NOTBOL;
415
416 if (p->pcre2_jit_on)
417 ret = pcre2_jit_match(p->pcre2_pattern, (unsigned char *)line,
418 eol - line, 0, flags, p->pcre2_match_data,
419 NULL);
420 else
421 ret = pcre2_match(p->pcre2_pattern, (unsigned char *)line,
422 eol - line, 0, flags, p->pcre2_match_data,
423 NULL);
424
425 if (ret < 0 && ret != PCRE2_ERROR_NOMATCH) {
426 pcre2_get_error_message(ret, errbuf, sizeof(errbuf));
427 die("%s failed with error code %d: %s",
428 (p->pcre2_jit_on ? "pcre2_jit_match" : "pcre2_match"), ret,
429 errbuf);
430 }
431 if (ret > 0) {
432 ovector = pcre2_get_ovector_pointer(p->pcre2_match_data);
433 ret = 0;
434 match->rm_so = (int)ovector[0];
435 match->rm_eo = (int)ovector[1];
436 }
437
438 return ret;
439}
440
441static void free_pcre2_pattern(struct grep_pat *p)
442{
443 pcre2_compile_context_free(p->pcre2_compile_context);
444 pcre2_code_free(p->pcre2_pattern);
445 pcre2_match_data_free(p->pcre2_match_data);
b76bf27f 446#ifdef GIT_PCRE2_VERSION_10_34_OR_HIGHER
cbe81e65 447 pcre2_maketables_free(p->pcre2_general_context, p->pcre2_tables);
b76bf27f 448#else
10da030a 449 free((void *)p->pcre2_tables);
b76bf27f 450#endif
cbe81e65 451 pcre2_general_context_free(p->pcre2_general_context);
94da9193
ÆAB
452}
453#else /* !USE_LIBPCRE2 */
4548b014
JK
454static void compile_pcre2_pattern(struct grep_pat *p UNUSED,
455 const struct grep_opt *opt UNUSED)
94da9193 456{
94da9193
ÆAB
457 die("cannot use Perl-compatible regexes when not compiled with USE_LIBPCRE");
458}
459
4548b014
JK
460static int pcre2match(struct grep_pat *p UNUSED, const char *line UNUSED,
461 const char *eol UNUSED, regmatch_t *match UNUSED,
462 int eflags UNUSED)
94da9193
ÆAB
463{
464 return 1;
465}
466
4548b014 467static void free_pcre2_pattern(struct grep_pat *p UNUSED)
94da9193
ÆAB
468{
469}
94da9193 470
793dc676
NTND
471static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt)
472{
473 struct strbuf sb = STRBUF_INIT;
474 int err;
1ceababc 475 int regflags = 0;
793dc676
NTND
476
477 basic_regex_quote_buf(&sb, p->pattern);
793dc676
NTND
478 if (opt->ignore_case)
479 regflags |= REG_ICASE;
480 err = regcomp(&p->regexp, sb.buf, regflags);
793dc676
NTND
481 strbuf_release(&sb);
482 if (err) {
483 char errbuf[1024];
484 regerror(err, &p->regexp, errbuf, sizeof(errbuf));
793dc676
NTND
485 compile_regexp_failed(p, errbuf);
486 }
487}
b65abcaf 488#endif /* !USE_LIBPCRE2 */
793dc676 489
83b5d2f5
JH
490static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
491{
c822255c 492 int err;
07a3d411 493 int regflags = REG_NEWLINE;
c822255c 494
04bf052e
ÆAB
495 if (opt->pattern_type_option == GREP_PATTERN_TYPE_UNSPECIFIED)
496 opt->pattern_type_option = (opt->extended_regexp_option
497 ? GREP_PATTERN_TYPE_ERE
498 : GREP_PATTERN_TYPE_BRE);
499
d7eb527d 500 p->word_regexp = opt->word_regexp;
5183bf67 501 p->ignore_case = opt->ignore_case;
04bf052e 502 p->fixed = opt->pattern_type_option == GREP_PATTERN_TYPE_FIXED;
d7eb527d 503
04bf052e 504 if (opt->pattern_type_option != GREP_PATTERN_TYPE_PCRE &&
ae807d77 505 memchr(p->pattern, 0, p->patternlen))
45d1f37c
ÆAB
506 die(_("given pattern contains NULL byte (via -f <file>). This is only supported with -P under PCRE v2"));
507
09872f64 508 p->is_fixed = is_fixed(p->pattern, p->patternlen);
8a599983
ÆAB
509#ifdef USE_LIBPCRE2
510 if (!p->fixed && !p->is_fixed) {
511 const char *no_jit = "(*NO_JIT)";
512 const int no_jit_len = strlen(no_jit);
513 if (starts_with(p->pattern, no_jit) &&
514 is_fixed(p->pattern + no_jit_len,
515 p->patternlen - no_jit_len))
516 p->is_fixed = 1;
517 }
518#endif
09872f64 519 if (p->fixed || p->is_fixed) {
b65abcaf 520#ifdef USE_LIBPCRE2
09872f64 521 if (p->is_fixed) {
b65abcaf
ÆAB
522 compile_pcre2_pattern(p, opt);
523 } else {
524 /*
525 * E.g. t7811-grep-open.sh relies on the
526 * pattern being restored.
527 */
528 char *old_pattern = p->pattern;
529 size_t old_patternlen = p->patternlen;
530 struct strbuf sb = STRBUF_INIT;
531
532 /*
533 * There is the PCRE2_LITERAL flag, but it's
534 * only in PCRE v2 10.30 and later. Needing to
535 * ifdef our way around that and dealing with
536 * it + PCRE2_MULTILINE being an error is more
537 * complex than just quoting this ourselves.
538 */
539 strbuf_add(&sb, "\\Q", 2);
540 strbuf_add(&sb, p->pattern, p->patternlen);
541 strbuf_add(&sb, "\\E", 2);
542
543 p->pattern = sb.buf;
544 p->patternlen = sb.len;
545 compile_pcre2_pattern(p, opt);
546 p->pattern = old_pattern;
547 p->patternlen = old_patternlen;
548 strbuf_release(&sb);
549 }
550#else /* !USE_LIBPCRE2 */
793dc676 551 compile_fixed_regexp(p, opt);
b65abcaf 552#endif /* !USE_LIBPCRE2 */
793dc676 553 return;
9eceddee 554 }
c822255c 555
04bf052e 556 if (opt->pattern_type_option == GREP_PATTERN_TYPE_PCRE) {
94da9193
ÆAB
557 compile_pcre2_pattern(p, opt);
558 return;
559 }
560
07a3d411
ÆAB
561 if (p->ignore_case)
562 regflags |= REG_ICASE;
04bf052e 563 if (opt->pattern_type_option == GREP_PATTERN_TYPE_ERE)
07a3d411
ÆAB
564 regflags |= REG_EXTENDED;
565 err = regcomp(&p->regexp, p->pattern, regflags);
83b5d2f5
JH
566 if (err) {
567 char errbuf[1024];
83b5d2f5 568 regerror(err, &p->regexp, errbuf, 1024);
a30c148a 569 compile_regexp_failed(p, errbuf);
83b5d2f5
JH
570 }
571}
572
e2b15427
RS
573static struct grep_expr *grep_not_expr(struct grep_expr *expr)
574{
575 struct grep_expr *z = xcalloc(1, sizeof(*z));
576 z->node = GREP_NODE_NOT;
577 z->u.unary = expr;
578 return z;
579}
580
f2d27598
TB
581static struct grep_expr *grep_binexp(enum grep_expr_node kind,
582 struct grep_expr *left,
583 struct grep_expr *right)
9dbf00ba
RS
584{
585 struct grep_expr *z = xcalloc(1, sizeof(*z));
f2d27598 586 z->node = kind;
9dbf00ba
RS
587 z->u.binary.left = left;
588 z->u.binary.right = right;
589 return z;
590}
591
f2d27598
TB
592static struct grep_expr *grep_or_expr(struct grep_expr *left, struct grep_expr *right)
593{
594 return grep_binexp(GREP_NODE_OR, left, right);
595}
596
0a6adc26
TB
597static struct grep_expr *grep_and_expr(struct grep_expr *left, struct grep_expr *right)
598{
599 return grep_binexp(GREP_NODE_AND, left, right);
600}
601
0ab7befa 602static struct grep_expr *compile_pattern_or(struct grep_pat **);
83b5d2f5
JH
603static struct grep_expr *compile_pattern_atom(struct grep_pat **list)
604{
605 struct grep_pat *p;
606 struct grep_expr *x;
607
608 p = *list;
c922b01f
LT
609 if (!p)
610 return NULL;
83b5d2f5
JH
611 switch (p->token) {
612 case GREP_PATTERN: /* atom */
480c1ca6
JH
613 case GREP_PATTERN_HEAD:
614 case GREP_PATTERN_BODY:
ca56dadb 615 CALLOC_ARRAY(x, 1);
83b5d2f5
JH
616 x->node = GREP_NODE_ATOM;
617 x->u.atom = p;
618 *list = p->next;
619 return x;
620 case GREP_OPEN_PAREN:
621 *list = p->next;
0ab7befa 622 x = compile_pattern_or(list);
83b5d2f5 623 if (!*list || (*list)->token != GREP_CLOSE_PAREN)
0d527842 624 die("unmatched ( for expression group");
83b5d2f5
JH
625 *list = (*list)->next;
626 return x;
627 default:
628 return NULL;
629 }
630}
631
632static struct grep_expr *compile_pattern_not(struct grep_pat **list)
633{
634 struct grep_pat *p;
635 struct grep_expr *x;
636
637 p = *list;
c922b01f
LT
638 if (!p)
639 return NULL;
83b5d2f5
JH
640 switch (p->token) {
641 case GREP_NOT:
642 if (!p->next)
643 die("--not not followed by pattern expression");
644 *list = p->next;
e2b15427
RS
645 x = compile_pattern_not(list);
646 if (!x)
83b5d2f5 647 die("--not followed by non pattern expression");
e2b15427 648 return grep_not_expr(x);
83b5d2f5
JH
649 default:
650 return compile_pattern_atom(list);
651 }
652}
653
654static struct grep_expr *compile_pattern_and(struct grep_pat **list)
655{
656 struct grep_pat *p;
0a6adc26 657 struct grep_expr *x, *y;
83b5d2f5
JH
658
659 x = compile_pattern_not(list);
660 p = *list;
661 if (p && p->token == GREP_AND) {
fe7fe62d
RS
662 if (!x)
663 die("--and not preceded by pattern expression");
83b5d2f5
JH
664 if (!p->next)
665 die("--and not followed by pattern expression");
666 *list = p->next;
667 y = compile_pattern_and(list);
668 if (!y)
669 die("--and not followed by pattern expression");
0a6adc26 670 return grep_and_expr(x, y);
83b5d2f5
JH
671 }
672 return x;
673}
674
675static struct grep_expr *compile_pattern_or(struct grep_pat **list)
676{
677 struct grep_pat *p;
9dbf00ba 678 struct grep_expr *x, *y;
83b5d2f5
JH
679
680 x = compile_pattern_and(list);
681 p = *list;
682 if (x && p && p->token != GREP_CLOSE_PAREN) {
683 y = compile_pattern_or(list);
684 if (!y)
685 die("not a pattern expression %s", p->pattern);
9dbf00ba 686 return grep_or_expr(x, y);
83b5d2f5
JH
687 }
688 return x;
689}
690
691static struct grep_expr *compile_pattern_expr(struct grep_pat **list)
692{
693 return compile_pattern_or(list);
694}
695
5aaeb733
JH
696static struct grep_expr *grep_true_expr(void)
697{
698 struct grep_expr *z = xcalloc(1, sizeof(*z));
699 z->node = GREP_NODE_TRUE;
700 return z;
701}
702
95ce9ce2 703static struct grep_expr *prep_header_patterns(struct grep_opt *opt)
83b5d2f5
JH
704{
705 struct grep_pat *p;
95ce9ce2 706 struct grep_expr *header_expr;
5aaeb733
JH
707 struct grep_expr *(header_group[GREP_HEADER_FIELD_MAX]);
708 enum grep_header_field fld;
83b5d2f5 709
95ce9ce2
JH
710 if (!opt->header_list)
711 return NULL;
2385f246 712
95ce9ce2
JH
713 for (p = opt->header_list; p; p = p->next) {
714 if (p->token != GREP_PATTERN_HEAD)
033abf97 715 BUG("a non-header pattern in grep header list.");
3ce3ffb8
AP
716 if (p->field < GREP_HEADER_FIELD_MIN ||
717 GREP_HEADER_FIELD_MAX <= p->field)
033abf97 718 BUG("unknown header field %d", p->field);
95ce9ce2 719 compile_regexp(p, opt);
80235ba7 720 }
5aaeb733
JH
721
722 for (fld = 0; fld < GREP_HEADER_FIELD_MAX; fld++)
723 header_group[fld] = NULL;
724
725 for (p = opt->header_list; p; p = p->next) {
726 struct grep_expr *h;
727 struct grep_pat *pp = p;
728
729 h = compile_pattern_atom(&pp);
730 if (!h || pp != p->next)
033abf97 731 BUG("malformed header expr");
5aaeb733
JH
732 if (!header_group[p->field]) {
733 header_group[p->field] = h;
734 continue;
735 }
736 header_group[p->field] = grep_or_expr(h, header_group[p->field]);
737 }
738
739 header_expr = NULL;
740
741 for (fld = 0; fld < GREP_HEADER_FIELD_MAX; fld++) {
742 if (!header_group[fld])
743 continue;
744 if (!header_expr)
745 header_expr = grep_true_expr();
746 header_expr = grep_or_expr(header_group[fld], header_expr);
747 }
95ce9ce2
JH
748 return header_expr;
749}
750
13e4fc7e
JH
751static struct grep_expr *grep_splice_or(struct grep_expr *x, struct grep_expr *y)
752{
753 struct grep_expr *z = x;
754
755 while (x) {
756 assert(x->node == GREP_NODE_OR);
757 if (x->u.binary.right &&
758 x->u.binary.right->node == GREP_NODE_TRUE) {
759 x->u.binary.right = y;
760 break;
761 }
762 x = x->u.binary.right;
763 }
764 return z;
765}
766
15c96497 767void compile_grep_patterns(struct grep_opt *opt)
95ce9ce2
JH
768{
769 struct grep_pat *p;
770 struct grep_expr *header_expr = prep_header_patterns(opt);
db84376f 771 int extended = 0;
0ab7befa 772
83b5d2f5 773 for (p = opt->pattern_list; p; p = p->next) {
480c1ca6
JH
774 switch (p->token) {
775 case GREP_PATTERN: /* atom */
776 case GREP_PATTERN_HEAD:
777 case GREP_PATTERN_BODY:
c822255c 778 compile_regexp(p, opt);
480c1ca6
JH
779 break;
780 default:
db84376f 781 extended = 1;
480c1ca6
JH
782 break;
783 }
83b5d2f5
JH
784 }
785
794c0002 786 if (opt->all_match || opt->no_body_match || header_expr)
db84376f
ÆAB
787 extended = 1;
788 else if (!extended)
83b5d2f5
JH
789 return;
790
83b5d2f5 791 p = opt->pattern_list;
ba150a3f
MB
792 if (p)
793 opt->pattern_expression = compile_pattern_expr(&p);
83b5d2f5 794 if (p)
0d527842 795 die("incomplete pattern expression group: %s", p->pattern);
80235ba7 796
794c0002
RS
797 if (opt->no_body_match && opt->pattern_expression)
798 opt->pattern_expression = grep_not_expr(opt->pattern_expression);
799
80235ba7
JH
800 if (!header_expr)
801 return;
802
5aaeb733 803 if (!opt->pattern_expression)
80235ba7 804 opt->pattern_expression = header_expr;
13e4fc7e
JH
805 else if (opt->all_match)
806 opt->pattern_expression = grep_splice_or(header_expr,
807 opt->pattern_expression);
5aaeb733
JH
808 else
809 opt->pattern_expression = grep_or_expr(opt->pattern_expression,
810 header_expr);
80235ba7 811 opt->all_match = 1;
83b5d2f5
JH
812}
813
b48fb5b6
JH
814static void free_pattern_expr(struct grep_expr *x)
815{
816 switch (x->node) {
5aaeb733 817 case GREP_NODE_TRUE:
b48fb5b6
JH
818 case GREP_NODE_ATOM:
819 break;
820 case GREP_NODE_NOT:
821 free_pattern_expr(x->u.unary);
822 break;
823 case GREP_NODE_AND:
824 case GREP_NODE_OR:
825 free_pattern_expr(x->u.binary.left);
826 free_pattern_expr(x->u.binary.right);
827 break;
828 }
829 free(x);
830}
831
891c9965 832static void free_grep_pat(struct grep_pat *pattern)
b48fb5b6
JH
833{
834 struct grep_pat *p, *n;
835
891c9965 836 for (p = pattern; p; p = n) {
b48fb5b6
JH
837 n = p->next;
838 switch (p->token) {
839 case GREP_PATTERN: /* atom */
840 case GREP_PATTERN_HEAD:
841 case GREP_PATTERN_BODY:
7599730b 842 if (p->pcre2_pattern)
94da9193 843 free_pcre2_pattern(p);
63e7e9d8
MK
844 else
845 regfree(&p->regexp);
526a858a 846 free(p->pattern);
b48fb5b6
JH
847 break;
848 default:
849 break;
850 }
851 free(p);
852 }
891c9965 853}
b48fb5b6 854
891c9965
ÆAB
855void free_grep_patterns(struct grep_opt *opt)
856{
857 free_grep_pat(opt->pattern_list);
fb2ebe72 858 free_grep_pat(opt->header_list);
891c9965
ÆAB
859
860 if (opt->pattern_expression)
861 free_pattern_expr(opt->pattern_expression);
b48fb5b6
JH
862}
863
1a845fbc 864static const char *end_of_line(const char *cp, unsigned long *left)
83b5d2f5
JH
865{
866 unsigned long l = *left;
867 while (l && *cp != '\n') {
868 l--;
869 cp++;
870 }
871 *left = l;
872 return cp;
873}
874
875static int word_char(char ch)
876{
877 return isalnum(ch) || ch == '_';
878}
879
55f638bd
ML
880static void output_color(struct grep_opt *opt, const void *data, size_t size,
881 const char *color)
882{
daa0c3d9 883 if (want_color(opt->color) && color && color[0]) {
55f638bd
ML
884 opt->output(opt, color, strlen(color));
885 opt->output(opt, data, size);
886 opt->output(opt, GIT_COLOR_RESET, strlen(GIT_COLOR_RESET));
887 } else
888 opt->output(opt, data, size);
889}
890
891static void output_sep(struct grep_opt *opt, char sign)
892{
893 if (opt->null_following_name)
894 opt->output(opt, "\0", 1);
895 else
fa151dc5 896 output_color(opt, &sign, 1, opt->colors[GREP_COLOR_SEP]);
55f638bd
ML
897}
898
83caecca
RZ
899static void show_name(struct grep_opt *opt, const char *name)
900{
fa151dc5 901 output_color(opt, name, strlen(name), opt->colors[GREP_COLOR_FILENAME]);
5b594f45 902 opt->output(opt, opt->null_following_name ? "\0" : "\n", 1);
83caecca
RZ
903}
904
1a845fbc
JK
905static int patmatch(struct grep_pat *p,
906 const char *line, const char *eol,
97e77784
MK
907 regmatch_t *match, int eflags)
908{
909 int hit;
910
7599730b 911 if (p->pcre2_pattern)
94da9193 912 hit = !pcre2match(p, line, eol, match, eflags);
97e77784 913 else
b7d36ffc
JS
914 hit = !regexec_buf(&p->regexp, line, eol - line, 1, match,
915 eflags);
97e77784
MK
916
917 return hit;
918}
919
1a845fbc 920static void strip_timestamp(const char *bol, const char **eol_p)
a4d7d2c6 921{
1a845fbc 922 const char *eol = *eol_p;
a4d7d2c6
JH
923
924 while (bol < --eol) {
925 if (*eol != '>')
926 continue;
927 *eol_p = ++eol;
cc8e26ee 928 break;
a4d7d2c6 929 }
a4d7d2c6
JH
930}
931
932static struct {
933 const char *field;
934 size_t len;
935} header_field[] = {
936 { "author ", 7 },
937 { "committer ", 10 },
72fd13f7 938 { "reflog ", 7 },
a4d7d2c6
JH
939};
940
3f566c4e
HM
941static int headerless_match_one_pattern(struct grep_pat *p,
942 const char *bol, const char *eol,
943 enum grep_context ctx,
944 regmatch_t *pmatch, int eflags)
83b5d2f5
JH
945{
946 int hit = 0;
e701fadb 947 const char *start = bol;
83b5d2f5 948
480c1ca6
JH
949 if ((p->token != GREP_PATTERN) &&
950 ((p->token == GREP_PATTERN_HEAD) != (ctx == GREP_CONTEXT_HEAD)))
951 return 0;
952
83b5d2f5 953 again:
97e77784 954 hit = patmatch(p, bol, eol, pmatch, eflags);
83b5d2f5 955
d7eb527d 956 if (hit && p->word_regexp) {
83b5d2f5 957 if ((pmatch[0].rm_so < 0) ||
84201eae 958 (eol - bol) < pmatch[0].rm_so ||
83b5d2f5
JH
959 (pmatch[0].rm_eo < 0) ||
960 (eol - bol) < pmatch[0].rm_eo)
961 die("regexp returned nonsense");
962
963 /* Match beginning must be either beginning of the
964 * line, or at word boundary (i.e. the last char must
965 * not be a word char). Similarly, match end must be
966 * either end of the line, or at word boundary
967 * (i.e. the next char must not be a word char).
968 */
fb62eb7f 969 if ( ((pmatch[0].rm_so == 0) ||
83b5d2f5
JH
970 !word_char(bol[pmatch[0].rm_so-1])) &&
971 ((pmatch[0].rm_eo == (eol-bol)) ||
972 !word_char(bol[pmatch[0].rm_eo])) )
973 ;
974 else
975 hit = 0;
976
84201eae
RS
977 /* Words consist of at least one character. */
978 if (pmatch->rm_so == pmatch->rm_eo)
979 hit = 0;
980
83b5d2f5
JH
981 if (!hit && pmatch[0].rm_so + bol + 1 < eol) {
982 /* There could be more than one match on the
983 * line, and the first match might not be
984 * strict word match. But later ones could be!
fb62eb7f
RS
985 * Forward to the next possible start, i.e. the
986 * next position following a non-word char.
83b5d2f5
JH
987 */
988 bol = pmatch[0].rm_so + bol + 1;
fb62eb7f
RS
989 while (word_char(bol[-1]) && bol < eol)
990 bol++;
dbb6a4ad 991 eflags |= REG_NOTBOL;
fb62eb7f
RS
992 if (bol < eol)
993 goto again;
83b5d2f5
JH
994 }
995 }
e701fadb
RS
996 if (hit) {
997 pmatch[0].rm_so += bol - start;
998 pmatch[0].rm_eo += bol - start;
999 }
83b5d2f5
JH
1000 return hit;
1001}
1002
3f566c4e
HM
1003static int match_one_pattern(struct grep_pat *p,
1004 const char *bol, const char *eol,
1005 enum grep_context ctx, regmatch_t *pmatch,
1006 int eflags)
1007{
1008 const char *field;
1009 size_t len;
1010
1011 if (p->token == GREP_PATTERN_HEAD) {
1012 assert(p->field < ARRAY_SIZE(header_field));
1013 field = header_field[p->field].field;
1014 len = header_field[p->field].len;
1015 if (strncmp(bol, field, len))
1016 return 0;
1017 bol += len;
1018
1019 switch (p->field) {
1020 case GREP_HEADER_AUTHOR:
1021 case GREP_HEADER_COMMITTER:
1022 strip_timestamp(bol, &eol);
1023 break;
1024 default:
1025 break;
1026 }
1027 }
1028
1029 return headerless_match_one_pattern(p, bol, eol, ctx, pmatch, eflags);
1030}
1031
1032
1a845fbc
JK
1033static int match_expr_eval(struct grep_opt *opt, struct grep_expr *x,
1034 const char *bol, const char *eol,
1035 enum grep_context ctx, ssize_t *col,
68d686e6 1036 ssize_t *icol, int collect_hits)
83b5d2f5 1037{
0ab7befa
JH
1038 int h = 0;
1039
83b5d2f5 1040 switch (x->node) {
5aaeb733
JH
1041 case GREP_NODE_TRUE:
1042 h = 1;
1043 break;
83b5d2f5 1044 case GREP_NODE_ATOM:
68d686e6
TB
1045 {
1046 regmatch_t tmp;
1047 h = match_one_pattern(x->u.atom, bol, eol, ctx,
1048 &tmp, 0);
1049 if (h && (*col < 0 || tmp.rm_so < *col))
1050 *col = tmp.rm_so;
1051 }
794c0002
RS
1052 if (x->u.atom->token == GREP_PATTERN_BODY)
1053 opt->body_hit |= h;
83b5d2f5
JH
1054 break;
1055 case GREP_NODE_NOT:
68d686e6
TB
1056 /*
1057 * Upon visiting a GREP_NODE_NOT, col and icol become swapped.
1058 */
1059 h = !match_expr_eval(opt, x->u.unary, bol, eol, ctx, icol, col,
1060 0);
0ab7befa 1061 break;
83b5d2f5 1062 case GREP_NODE_AND:
017c0fcf 1063 h = match_expr_eval(opt, x->u.binary.left, bol, eol, ctx, col,
68d686e6 1064 icol, 0);
017c0fcf
TB
1065 if (h || opt->columnnum) {
1066 /*
1067 * Don't short-circuit AND when given --column, since a
1068 * NOT earlier in the tree may turn this into an OR. In
1069 * this case, see the below comment.
1070 */
1071 h &= match_expr_eval(opt, x->u.binary.right, bol, eol,
1072 ctx, col, icol, 0);
1073 }
0ab7befa 1074 break;
83b5d2f5 1075 case GREP_NODE_OR:
017c0fcf
TB
1076 if (!(collect_hits || opt->columnnum)) {
1077 /*
1078 * Don't short-circuit OR when given --column (or
1079 * collecting hits) to ensure we don't skip a later
1080 * child that would produce an earlier match.
1081 */
68d686e6
TB
1082 return (match_expr_eval(opt, x->u.binary.left, bol, eol,
1083 ctx, col, icol, 0) ||
1084 match_expr_eval(opt, x->u.binary.right, bol,
1085 eol, ctx, col, icol, 0));
017c0fcf 1086 }
68d686e6
TB
1087 h = match_expr_eval(opt, x->u.binary.left, bol, eol, ctx, col,
1088 icol, 0);
017c0fcf
TB
1089 if (collect_hits)
1090 x->u.binary.left->hit |= h;
68d686e6 1091 h |= match_expr_eval(opt, x->u.binary.right, bol, eol, ctx, col,
017c0fcf 1092 icol, collect_hits);
0ab7befa
JH
1093 break;
1094 default:
d7530708 1095 die("Unexpected node type (internal error) %d", x->node);
83b5d2f5 1096 }
0ab7befa
JH
1097 if (collect_hits)
1098 x->hit |= h;
1099 return h;
83b5d2f5
JH
1100}
1101
1a845fbc
JK
1102static int match_expr(struct grep_opt *opt,
1103 const char *bol, const char *eol,
68d686e6
TB
1104 enum grep_context ctx, ssize_t *col,
1105 ssize_t *icol, int collect_hits)
83b5d2f5
JH
1106{
1107 struct grep_expr *x = opt->pattern_expression;
68d686e6 1108 return match_expr_eval(opt, x, bol, eol, ctx, col, icol, collect_hits);
83b5d2f5
JH
1109}
1110
1a845fbc
JK
1111static int match_line(struct grep_opt *opt,
1112 const char *bol, const char *eol,
68d686e6 1113 ssize_t *col, ssize_t *icol,
0ab7befa 1114 enum grep_context ctx, int collect_hits)
83b5d2f5
JH
1115{
1116 struct grep_pat *p;
017c0fcf 1117 int hit = 0;
79212772 1118
db84376f 1119 if (opt->pattern_expression)
68d686e6
TB
1120 return match_expr(opt, bol, eol, ctx, col, icol,
1121 collect_hits);
0ab7befa
JH
1122
1123 /* we do not call with collect_hits without being extended */
83b5d2f5 1124 for (p = opt->pattern_list; p; p = p->next) {
68d686e6
TB
1125 regmatch_t tmp;
1126 if (match_one_pattern(p, bol, eol, ctx, &tmp, 0)) {
017c0fcf
TB
1127 hit |= 1;
1128 if (!opt->columnnum) {
1129 /*
1130 * Without --column, any single match on a line
1131 * is enough to know that it needs to be
1132 * printed. With --column, scan _all_ patterns
1133 * to find the earliest.
1134 */
1135 break;
1136 }
1137 if (*col < 0 || tmp.rm_so < *col)
1138 *col = tmp.rm_so;
68d686e6 1139 }
83b5d2f5 1140 }
017c0fcf 1141 return hit;
83b5d2f5
JH
1142}
1143
1a845fbc
JK
1144static int match_next_pattern(struct grep_pat *p,
1145 const char *bol, const char *eol,
7e8f59d5
RS
1146 enum grep_context ctx,
1147 regmatch_t *pmatch, int eflags)
1148{
1149 regmatch_t match;
1150
3f566c4e 1151 if (!headerless_match_one_pattern(p, bol, eol, ctx, &match, eflags))
7e8f59d5
RS
1152 return 0;
1153 if (match.rm_so < 0 || match.rm_eo < 0)
1154 return 0;
1155 if (pmatch->rm_so >= 0 && pmatch->rm_eo >= 0) {
1156 if (match.rm_so > pmatch->rm_so)
1157 return 1;
1158 if (match.rm_so == pmatch->rm_so && match.rm_eo < pmatch->rm_eo)
1159 return 1;
1160 }
1161 pmatch->rm_so = match.rm_so;
1162 pmatch->rm_eo = match.rm_eo;
1163 return 1;
1164}
1165
3f566c4e
HM
1166int grep_next_match(struct grep_opt *opt,
1167 const char *bol, const char *eol,
1168 enum grep_context ctx, regmatch_t *pmatch,
1169 enum grep_header_field field, int eflags)
7e8f59d5
RS
1170{
1171 struct grep_pat *p;
1172 int hit = 0;
1173
1174 pmatch->rm_so = pmatch->rm_eo = -1;
1175 if (bol < eol) {
3f566c4e
HM
1176 for (p = ((ctx == GREP_CONTEXT_HEAD)
1177 ? opt->header_list : opt->pattern_list);
1178 p; p = p->next) {
7e8f59d5 1179 switch (p->token) {
7e8f59d5 1180 case GREP_PATTERN_HEAD:
3f566c4e
HM
1181 if ((field != GREP_HEADER_FIELD_MAX) &&
1182 (p->field != field))
1183 continue;
1184 /* fall thru */
1185 case GREP_PATTERN: /* atom */
7e8f59d5
RS
1186 case GREP_PATTERN_BODY:
1187 hit |= match_next_pattern(p, bol, eol, ctx,
1188 pmatch, eflags);
1189 break;
1190 default:
1191 break;
1192 }
1193 }
1194 }
1195 return hit;
1196}
1197
c707ded3
TB
1198static void show_line_header(struct grep_opt *opt, const char *name,
1199 unsigned lno, ssize_t cno, char sign)
7e8f59d5 1200{
1d84f72e 1201 if (opt->heading && opt->last_shown == 0) {
fa151dc5 1202 output_color(opt, name, strlen(name), opt->colors[GREP_COLOR_FILENAME]);
1d84f72e
RS
1203 opt->output(opt, "\n", 1);
1204 }
5dd06d38
RS
1205 opt->last_shown = lno;
1206
1d84f72e 1207 if (!opt->heading && opt->pathname) {
fa151dc5 1208 output_color(opt, name, strlen(name), opt->colors[GREP_COLOR_FILENAME]);
55f638bd 1209 output_sep(opt, sign);
5b594f45
FK
1210 }
1211 if (opt->linenum) {
1212 char buf[32];
1a168e5c 1213 xsnprintf(buf, sizeof(buf), "%d", lno);
fa151dc5 1214 output_color(opt, buf, strlen(buf), opt->colors[GREP_COLOR_LINENO]);
55f638bd 1215 output_sep(opt, sign);
5b594f45 1216 }
89252cd0
TB
1217 /*
1218 * Treat 'cno' as the 1-indexed offset from the start of a non-context
1219 * line to its first match. Otherwise, 'cno' is 0 indicating that we are
1220 * being called with a context line.
1221 */
1222 if (opt->columnnum && cno) {
1223 char buf[32];
1224 xsnprintf(buf, sizeof(buf), "%"PRIuMAX, (uintmax_t)cno);
d036d667 1225 output_color(opt, buf, strlen(buf), opt->colors[GREP_COLOR_COLUMNNO]);
89252cd0
TB
1226 output_sep(opt, sign);
1227 }
c707ded3
TB
1228}
1229
1a845fbc
JK
1230static void show_line(struct grep_opt *opt,
1231 const char *bol, const char *eol,
c707ded3
TB
1232 const char *name, unsigned lno, ssize_t cno, char sign)
1233{
1234 int rest = eol - bol;
9d8db06e
TB
1235 const char *match_color = NULL;
1236 const char *line_color = NULL;
c707ded3
TB
1237
1238 if (opt->file_break && opt->last_shown == 0) {
1239 if (opt->show_hunk_mark)
1240 opt->output(opt, "\n", 1);
1241 } else if (opt->pre_context || opt->post_context || opt->funcbody) {
1242 if (opt->last_shown == 0) {
1243 if (opt->show_hunk_mark) {
87ece7ce 1244 output_color(opt, "--", 2, opt->colors[GREP_COLOR_SEP]);
c707ded3
TB
1245 opt->output(opt, "\n", 1);
1246 }
1247 } else if (lno > opt->last_shown + 1) {
87ece7ce 1248 output_color(opt, "--", 2, opt->colors[GREP_COLOR_SEP]);
c707ded3
TB
1249 opt->output(opt, "\n", 1);
1250 }
1251 }
9d8db06e
TB
1252 if (!opt->only_matching) {
1253 /*
1254 * In case the line we're being called with contains more than
1255 * one match, leave printing each header to the loop below.
1256 */
1257 show_line_header(opt, name, lno, cno, sign);
1258 }
1259 if (opt->color || opt->only_matching) {
7e8f59d5
RS
1260 regmatch_t match;
1261 enum grep_context ctx = GREP_CONTEXT_BODY;
7e8f59d5
RS
1262 int eflags = 0;
1263
9d8db06e
TB
1264 if (opt->color) {
1265 if (sign == ':')
87ece7ce 1266 match_color = opt->colors[GREP_COLOR_MATCH_SELECTED];
9d8db06e 1267 else
87ece7ce 1268 match_color = opt->colors[GREP_COLOR_MATCH_CONTEXT];
9d8db06e 1269 if (sign == ':')
87ece7ce 1270 line_color = opt->colors[GREP_COLOR_SELECTED];
9d8db06e 1271 else if (sign == '-')
87ece7ce 1272 line_color = opt->colors[GREP_COLOR_CONTEXT];
9d8db06e 1273 else if (sign == '=')
87ece7ce 1274 line_color = opt->colors[GREP_COLOR_FUNCTION];
9d8db06e 1275 }
3f566c4e
HM
1276 while (grep_next_match(opt, bol, eol, ctx, &match,
1277 GREP_HEADER_FIELD_MAX, eflags)) {
1f5b9cc4
RS
1278 if (match.rm_so == match.rm_eo)
1279 break;
5b594f45 1280
9d8db06e
TB
1281 if (opt->only_matching)
1282 show_line_header(opt, name, lno, cno, sign);
1283 else
1284 output_color(opt, bol, match.rm_so, line_color);
55f638bd 1285 output_color(opt, bol + match.rm_so,
79a77109 1286 match.rm_eo - match.rm_so, match_color);
9d8db06e
TB
1287 if (opt->only_matching)
1288 opt->output(opt, "\n", 1);
7e8f59d5 1289 bol += match.rm_eo;
9d8db06e 1290 cno += match.rm_eo;
7e8f59d5
RS
1291 rest -= match.rm_eo;
1292 eflags = REG_NOTBOL;
1293 }
7e8f59d5 1294 }
9d8db06e
TB
1295 if (!opt->only_matching) {
1296 output_color(opt, bol, rest, line_color);
1297 opt->output(opt, "\n", 1);
1298 }
7e8f59d5
RS
1299}
1300
78db6ea9
JK
1301int grep_use_locks;
1302
0579f91d
TR
1303/*
1304 * This lock protects access to the gitattributes machinery, which is
1305 * not thread-safe.
1306 */
1307pthread_mutex_t grep_attr_mutex;
1308
78db6ea9 1309static inline void grep_attr_lock(void)
0579f91d 1310{
78db6ea9 1311 if (grep_use_locks)
0579f91d
TR
1312 pthread_mutex_lock(&grep_attr_mutex);
1313}
1314
78db6ea9 1315static inline void grep_attr_unlock(void)
0579f91d 1316{
78db6ea9 1317 if (grep_use_locks)
0579f91d
TR
1318 pthread_mutex_unlock(&grep_attr_mutex);
1319}
b3aeb285 1320
1a845fbc
JK
1321static int match_funcname(struct grep_opt *opt, struct grep_source *gs,
1322 const char *bol, const char *eol)
2944e4e6 1323{
60ecac98 1324 xdemitconf_t *xecfg = opt->priv;
0579f91d 1325 if (xecfg && !xecfg->find_func) {
acd00ea0 1326 grep_source_load_driver(gs, opt->repo->index);
94ad9d9e
JK
1327 if (gs->driver->funcname.pattern) {
1328 const struct userdiff_funcname *pe = &gs->driver->funcname;
0579f91d
TR
1329 xdiff_set_find_func(xecfg, pe->pattern, pe->cflags);
1330 } else {
1331 xecfg = opt->priv = NULL;
1332 }
1333 }
1334
1335 if (xecfg) {
60ecac98
RS
1336 char buf[1];
1337 return xecfg->find_func(bol, eol - bol, buf, 1,
1338 xecfg->find_func_priv) >= 0;
1339 }
1340
2944e4e6
RS
1341 if (bol == eol)
1342 return 0;
1343 if (isalpha(*bol) || *bol == '_' || *bol == '$')
1344 return 1;
1345 return 0;
1346}
1347
e1327023 1348static void show_funcname_line(struct grep_opt *opt, struct grep_source *gs,
1a845fbc 1349 const char *bol, unsigned lno)
2944e4e6 1350{
e1327023 1351 while (bol > gs->buf) {
1a845fbc 1352 const char *eol = --bol;
2944e4e6 1353
e1327023 1354 while (bol > gs->buf && bol[-1] != '\n')
2944e4e6
RS
1355 bol--;
1356 lno--;
1357
1358 if (lno <= opt->last_shown)
1359 break;
1360
e1327023 1361 if (match_funcname(opt, gs, bol, eol)) {
89252cd0 1362 show_line(opt, bol, eol, gs->name, lno, 0, '=');
2944e4e6
RS
1363 break;
1364 }
1365 }
1366}
1367
a5dc20b0
RS
1368static int is_empty_line(const char *bol, const char *eol);
1369
e1327023 1370static void show_pre_context(struct grep_opt *opt, struct grep_source *gs,
1a845fbc 1371 const char *bol, const char *end, unsigned lno)
49de3216 1372{
6653a01b 1373 unsigned cur = lno, from = 1, funcname_lno = 0, orig_from;
a5dc20b0 1374 int funcname_needed = !!opt->funcname, comment_needed = 0;
ba8ea749 1375
49de3216
RS
1376 if (opt->pre_context < lno)
1377 from = lno - opt->pre_context;
1378 if (from <= opt->last_shown)
1379 from = opt->last_shown + 1;
6653a01b 1380 orig_from = from;
a5dc20b0
RS
1381 if (opt->funcbody) {
1382 if (match_funcname(opt, gs, bol, end))
1383 comment_needed = 1;
1384 else
1385 funcname_needed = 1;
6653a01b
RS
1386 from = opt->last_shown + 1;
1387 }
49de3216
RS
1388
1389 /* Rewind. */
6653a01b 1390 while (bol > gs->buf && cur > from) {
1a845fbc
JK
1391 const char *next_bol = bol;
1392 const char *eol = --bol;
2944e4e6 1393
e1327023 1394 while (bol > gs->buf && bol[-1] != '\n')
49de3216
RS
1395 bol--;
1396 cur--;
a5dc20b0
RS
1397 if (comment_needed && (is_empty_line(bol, eol) ||
1398 match_funcname(opt, gs, bol, eol))) {
1399 comment_needed = 0;
1400 from = orig_from;
1401 if (cur < from) {
1402 cur++;
1403 bol = next_bol;
1404 break;
1405 }
1406 }
e1327023 1407 if (funcname_needed && match_funcname(opt, gs, bol, eol)) {
2944e4e6
RS
1408 funcname_lno = cur;
1409 funcname_needed = 0;
a5dc20b0
RS
1410 if (opt->funcbody)
1411 comment_needed = 1;
1412 else
1413 from = orig_from;
2944e4e6 1414 }
49de3216
RS
1415 }
1416
2944e4e6
RS
1417 /* We need to look even further back to find a function signature. */
1418 if (opt->funcname && funcname_needed)
e1327023 1419 show_funcname_line(opt, gs, bol, cur);
2944e4e6 1420
49de3216
RS
1421 /* Back forward. */
1422 while (cur < lno) {
1a845fbc 1423 const char *eol = bol, sign = (cur == funcname_lno) ? '=' : '-';
49de3216
RS
1424
1425 while (*eol != '\n')
1426 eol++;
89252cd0 1427 show_line(opt, bol, eol, gs->name, cur, 0, sign);
49de3216
RS
1428 bol = eol + 1;
1429 cur++;
1430 }
1431}
1432
a26345b6
JH
1433static int should_lookahead(struct grep_opt *opt)
1434{
1435 struct grep_pat *p;
1436
db84376f 1437 if (opt->pattern_expression)
a26345b6
JH
1438 return 0; /* punt for too complex stuff */
1439 if (opt->invert)
1440 return 0;
1441 for (p = opt->pattern_list; p; p = p->next) {
1442 if (p->token != GREP_PATTERN)
1443 return 0; /* punt for "header only" and stuff */
1444 }
1445 return 1;
1446}
1447
1448static int look_ahead(struct grep_opt *opt,
1449 unsigned long *left_p,
1450 unsigned *lno_p,
1a845fbc 1451 const char **bol_p)
a26345b6
JH
1452{
1453 unsigned lno = *lno_p;
1a845fbc 1454 const char *bol = *bol_p;
a26345b6 1455 struct grep_pat *p;
1a845fbc 1456 const char *sp, *last_bol;
a26345b6
JH
1457 regoff_t earliest = -1;
1458
1459 for (p = opt->pattern_list; p; p = p->next) {
1460 int hit;
1461 regmatch_t m;
1462
97e77784 1463 hit = patmatch(p, bol, bol + *left_p, &m, 0);
a26345b6
JH
1464 if (!hit || m.rm_so < 0 || m.rm_eo < 0)
1465 continue;
1466 if (earliest < 0 || m.rm_so < earliest)
1467 earliest = m.rm_so;
1468 }
1469
1470 if (earliest < 0) {
1471 *bol_p = bol + *left_p;
1472 *left_p = 0;
1473 return 1;
1474 }
1475 for (sp = bol + earliest; bol < sp && sp[-1] != '\n'; sp--)
1476 ; /* find the beginning of the line */
1477 last_bol = sp;
1478
1479 for (sp = bol; sp < last_bol; sp++) {
1480 if (*sp == '\n')
1481 lno++;
1482 }
1483 *left_p -= last_bol - bol;
1484 *bol_p = last_bol;
1485 *lno_p = lno;
1486 return 0;
1487}
1488
38bbc2ea
NTND
1489static int fill_textconv_grep(struct repository *r,
1490 struct userdiff_driver *driver,
335ec3bf
JK
1491 struct grep_source *gs)
1492{
1493 struct diff_filespec *df;
1494 char *buf;
1495 size_t size;
1496
1497 if (!driver || !driver->textconv)
1498 return grep_source_load(gs);
1499
1500 /*
1501 * The textconv interface is intimately tied to diff_filespecs, so we
1502 * have to pretend to be one. If we could unify the grep_source
1503 * and diff_filespec structs, this mess could just go away.
1504 */
1505 df = alloc_filespec(gs->path);
1506 switch (gs->type) {
1c41c82b 1507 case GREP_SOURCE_OID:
335ec3bf
JK
1508 fill_filespec(df, gs->identifier, 1, 0100644);
1509 break;
1510 case GREP_SOURCE_FILE:
14228447 1511 fill_filespec(df, null_oid(), 0, 0100644);
335ec3bf
JK
1512 break;
1513 default:
033abf97 1514 BUG("attempt to textconv something without a path?");
335ec3bf
JK
1515 }
1516
1517 /*
1d1729ca
MT
1518 * fill_textconv is not remotely thread-safe; it modifies the global
1519 * diff tempfile structure, writes to the_repo's odb and might
1520 * internally call thread-unsafe functions such as the
1521 * prepare_packed_git() lazy-initializator. Because of the last two, we
1522 * must ensure mutual exclusion between this call and the object reading
1523 * API, thus we use obj_read_lock() here.
1524 *
1525 * TODO: allowing text conversion to run in parallel with object
1526 * reading operations might increase performance in the multithreaded
1527 * non-worktreee git-grep with --textconv.
335ec3bf 1528 */
1d1729ca 1529 obj_read_lock();
38bbc2ea 1530 size = fill_textconv(r, driver, df, &buf);
1d1729ca 1531 obj_read_unlock();
335ec3bf
JK
1532 free_filespec(df);
1533
1534 /*
1535 * The normal fill_textconv usage by the diff machinery would just keep
1536 * the textconv'd buf separate from the diff_filespec. But much of the
1537 * grep code passes around a grep_source and assumes that its "buf"
1538 * pointer is the beginning of the thing we are searching. So let's
1539 * install our textconv'd version into the grep_source, taking care not
1540 * to leak any existing buffer.
1541 */
1542 grep_source_clear_data(gs);
1543 gs->buf = buf;
1544 gs->size = size;
1545
1546 return 0;
1547}
1548
4aa2c475
RS
1549static int is_empty_line(const char *bol, const char *eol)
1550{
1551 while (bol < eol && isspace(*bol))
1552 bol++;
1553 return bol == eol;
1554}
1555
e1327023 1556static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int collect_hits)
83b5d2f5 1557{
1a845fbc
JK
1558 const char *bol;
1559 const char *peek_bol = NULL;
e1327023 1560 unsigned long left;
83b5d2f5 1561 unsigned lno = 1;
83b5d2f5 1562 unsigned last_hit = 0;
83b5d2f5 1563 int binary_match_only = 0;
83b5d2f5 1564 unsigned count = 0;
a26345b6 1565 int try_lookahead = 0;
ba8ea749 1566 int show_function = 0;
335ec3bf 1567 struct userdiff_driver *textconv = NULL;
480c1ca6 1568 enum grep_context ctx = GREP_CONTEXT_HEAD;
60ecac98 1569 xdemitconf_t xecfg;
83b5d2f5 1570
de99eb0c
ES
1571 if (!opt->status_only && gs->name == NULL)
1572 BUG("grep call which could print a name requires "
1573 "grep_source.name be non-NULL");
1574
5b594f45
FK
1575 if (!opt->output)
1576 opt->output = std_output;
1577
ba8ea749
RS
1578 if (opt->pre_context || opt->post_context || opt->file_break ||
1579 opt->funcbody) {
08303c36
RS
1580 /* Show hunk marks, except for the first file. */
1581 if (opt->last_shown)
1582 opt->show_hunk_mark = 1;
1583 /*
1584 * If we're using threads then we can't easily identify
1585 * the first file. Always put hunk marks in that case
1586 * and skip the very first one later in work_done().
1587 */
1588 if (opt->output != std_output)
1589 opt->show_hunk_mark = 1;
1590 }
431d6e7b
RS
1591 opt->last_shown = 0;
1592
335ec3bf 1593 if (opt->allow_textconv) {
acd00ea0 1594 grep_source_load_driver(gs, opt->repo->index);
335ec3bf
JK
1595 /*
1596 * We might set up the shared textconv cache data here, which
1d1729ca
MT
1597 * is not thread-safe. Also, get_oid_with_context() and
1598 * parse_object() might be internally called. As they are not
84544f2e 1599 * currently thread-safe and might be racy with object reading,
1d1729ca 1600 * obj_read_lock() must be called.
335ec3bf
JK
1601 */
1602 grep_attr_lock();
1d1729ca 1603 obj_read_lock();
bd7ad45b 1604 textconv = userdiff_get_textconv(opt->repo, gs->driver);
1d1729ca 1605 obj_read_unlock();
335ec3bf
JK
1606 grep_attr_unlock();
1607 }
1608
1609 /*
1610 * We know the result of a textconv is text, so we only have to care
1611 * about binary handling if we are not using it.
1612 */
1613 if (!textconv) {
1614 switch (opt->binary) {
1615 case GREP_BINARY_DEFAULT:
acd00ea0 1616 if (grep_source_is_binary(gs, opt->repo->index))
335ec3bf
JK
1617 binary_match_only = 1;
1618 break;
1619 case GREP_BINARY_NOMATCH:
acd00ea0 1620 if (grep_source_is_binary(gs, opt->repo->index))
335ec3bf
JK
1621 return 0; /* Assume unmatch */
1622 break;
1623 case GREP_BINARY_TEXT:
1624 break;
1625 default:
033abf97 1626 BUG("unknown binary handling mode");
335ec3bf 1627 }
83b5d2f5
JH
1628 }
1629
60ecac98 1630 memset(&xecfg, 0, sizeof(xecfg));
0579f91d
TR
1631 opt->priv = &xecfg;
1632
a26345b6 1633 try_lookahead = should_lookahead(opt);
60ecac98 1634
38bbc2ea 1635 if (fill_textconv_grep(opt->repo, textconv, gs) < 0)
08265798
JK
1636 return 0;
1637
e1327023
JK
1638 bol = gs->buf;
1639 left = gs->size;
83b5d2f5 1640 while (left) {
1a845fbc 1641 const char *eol;
0ab7befa 1642 int hit;
89252cd0 1643 ssize_t cno;
68d686e6 1644 ssize_t col = -1, icol = -1;
83b5d2f5 1645
a26345b6 1646 /*
8997da38 1647 * look_ahead() skips quickly to the line that possibly
a26345b6
JH
1648 * has the next hit; don't call it if we need to do
1649 * something more than just skipping the current line
1650 * in response to an unmatch for the current line. E.g.
1651 * inside a post-context window, we will show the current
1652 * line as a context around the previous hit when it
1653 * doesn't hit.
1654 */
1655 if (try_lookahead
1656 && !(last_hit
ba8ea749
RS
1657 && (show_function ||
1658 lno <= last_hit + opt->post_context))
a26345b6
JH
1659 && look_ahead(opt, &left, &lno, &bol))
1660 break;
83b5d2f5 1661 eol = end_of_line(bol, &left);
83b5d2f5 1662
480c1ca6
JH
1663 if ((ctx == GREP_CONTEXT_HEAD) && (eol == bol))
1664 ctx = GREP_CONTEXT_BODY;
1665
68d686e6 1666 hit = match_line(opt, bol, eol, &col, &icol, ctx, collect_hits);
83b5d2f5 1667
0ab7befa
JH
1668 if (collect_hits)
1669 goto next_line;
1670
83b5d2f5
JH
1671 /* "grep -v -e foo -e bla" should list lines
1672 * that do not have either, so inversion should
1673 * be done outside.
1674 */
1675 if (opt->invert)
1676 hit = !hit;
1677 if (opt->unmatch_name_only) {
1678 if (hit)
1679 return 0;
1680 goto next_line;
1681 }
68437ede 1682 if (hit && (opt->max_count < 0 || count < opt->max_count)) {
83b5d2f5
JH
1683 count++;
1684 if (opt->status_only)
1685 return 1;
321ffcc0 1686 if (opt->name_only) {
e1327023 1687 show_name(opt, gs->name);
321ffcc0
RS
1688 return 1;
1689 }
c30c10cf
RS
1690 if (opt->count)
1691 goto next_line;
83b5d2f5 1692 if (binary_match_only) {
5b594f45 1693 opt->output(opt, "Binary file ", 12);
e1327023 1694 output_color(opt, gs->name, strlen(gs->name),
fa151dc5 1695 opt->colors[GREP_COLOR_FILENAME]);
5b594f45 1696 opt->output(opt, " matches\n", 9);
83b5d2f5
JH
1697 return 1;
1698 }
83b5d2f5
JH
1699 /* Hit at this line. If we haven't shown the
1700 * pre-context lines, we would need to show them.
83b5d2f5 1701 */
ba8ea749 1702 if (opt->pre_context || opt->funcbody)
e1327023 1703 show_pre_context(opt, gs, bol, eol, lno);
2944e4e6 1704 else if (opt->funcname)
e1327023 1705 show_funcname_line(opt, gs, bol, lno);
89252cd0
TB
1706 cno = opt->invert ? icol : col;
1707 if (cno < 0) {
1708 /*
1709 * A negative cno indicates that there was no
1710 * match on the line. We are thus inverted and
1711 * being asked to show all lines that _don't_
1712 * match a given expression. Therefore, set cno
1713 * to 0 to suggest the whole line matches.
1714 */
1715 cno = 0;
1716 }
1717 show_line(opt, bol, eol, gs->name, lno, cno + 1, ':');
5dd06d38 1718 last_hit = lno;
ba8ea749
RS
1719 if (opt->funcbody)
1720 show_function = 1;
1721 goto next_line;
83b5d2f5 1722 }
4aa2c475
RS
1723 if (show_function && (!peek_bol || peek_bol < bol)) {
1724 unsigned long peek_left = left;
1a845fbc 1725 const char *peek_eol = eol;
4aa2c475
RS
1726
1727 /*
1728 * Trailing empty lines are not interesting.
1729 * Peek past them to see if they belong to the
1730 * body of the current function.
1731 */
1732 peek_bol = bol;
1733 while (is_empty_line(peek_bol, peek_eol)) {
1734 peek_bol = peek_eol + 1;
1735 peek_eol = end_of_line(peek_bol, &peek_left);
1736 }
1737
1738 if (match_funcname(opt, gs, peek_bol, peek_eol))
1739 show_function = 0;
1740 }
ba8ea749
RS
1741 if (show_function ||
1742 (last_hit && lno <= last_hit + opt->post_context)) {
83b5d2f5
JH
1743 /* If the last hit is within the post context,
1744 * we need to show this line.
1745 */
89252cd0 1746 show_line(opt, bol, eol, gs->name, lno, col + 1, '-');
83b5d2f5 1747 }
83b5d2f5
JH
1748
1749 next_line:
1750 bol = eol + 1;
1751 if (!left)
1752 break;
1753 left--;
1754 lno++;
1755 }
1756
0ab7befa
JH
1757 if (collect_hits)
1758 return 0;
b48fb5b6 1759
83b5d2f5 1760 if (opt->status_only)
e1f68c66 1761 return opt->unmatch_name_only;
83b5d2f5
JH
1762 if (opt->unmatch_name_only) {
1763 /* We did not see any hit, so we want to show this */
e1327023 1764 show_name(opt, gs->name);
83b5d2f5
JH
1765 return 1;
1766 }
1767
60ecac98
RS
1768 xdiff_clear_find_func(&xecfg);
1769 opt->priv = NULL;
1770
83b5d2f5
JH
1771 /* NEEDSWORK:
1772 * The real "grep -c foo *.c" gives many "bar.c:0" lines,
1773 * which feels mostly useless but sometimes useful. Maybe
1774 * make it another option? For now suppress them.
1775 */
5b594f45
FK
1776 if (opt->count && count) {
1777 char buf[32];
f76d947a
RS
1778 if (opt->pathname) {
1779 output_color(opt, gs->name, strlen(gs->name),
fa151dc5 1780 opt->colors[GREP_COLOR_FILENAME]);
f76d947a
RS
1781 output_sep(opt, ':');
1782 }
1a168e5c 1783 xsnprintf(buf, sizeof(buf), "%u\n", count);
5b594f45 1784 opt->output(opt, buf, strlen(buf));
c30c10cf 1785 return 1;
5b594f45 1786 }
83b5d2f5
JH
1787 return !!last_hit;
1788}
1789
0ab7befa
JH
1790static void clr_hit_marker(struct grep_expr *x)
1791{
1792 /* All-hit markers are meaningful only at the very top level
1793 * OR node.
1794 */
1795 while (1) {
1796 x->hit = 0;
1797 if (x->node != GREP_NODE_OR)
1798 return;
1799 x->u.binary.left->hit = 0;
1800 x = x->u.binary.right;
1801 }
1802}
1803
1804static int chk_hit_marker(struct grep_expr *x)
1805{
1806 /* Top level nodes have hit markers. See if they all are hits */
1807 while (1) {
1808 if (x->node != GREP_NODE_OR)
1809 return x->hit;
1810 if (!x->u.binary.left->hit)
1811 return 0;
1812 x = x->u.binary.right;
1813 }
1814}
1815
e1327023 1816int grep_source(struct grep_opt *opt, struct grep_source *gs)
0ab7befa
JH
1817{
1818 /*
1819 * we do not have to do the two-pass grep when we do not check
1820 * buffer-wide "all-match".
1821 */
794c0002 1822 if (!opt->all_match && !opt->no_body_match)
e1327023 1823 return grep_source_1(opt, gs, 0);
0ab7befa
JH
1824
1825 /* Otherwise the toplevel "or" terms hit a bit differently.
1826 * We first clear hit markers from them.
1827 */
1828 clr_hit_marker(opt->pattern_expression);
794c0002 1829 opt->body_hit = 0;
e1327023 1830 grep_source_1(opt, gs, 1);
0ab7befa 1831
794c0002
RS
1832 if (opt->all_match && !chk_hit_marker(opt->pattern_expression))
1833 return 0;
1834 if (opt->no_body_match && opt->body_hit)
0ab7befa
JH
1835 return 0;
1836
e1327023
JK
1837 return grep_source_1(opt, gs, 0);
1838}
1839
1e668716
JK
1840static void grep_source_init_buf(struct grep_source *gs,
1841 const char *buf,
50d92b5f
JT
1842 unsigned long size)
1843{
1844 gs->type = GREP_SOURCE_BUF;
1845 gs->name = NULL;
1846 gs->path = NULL;
1847 gs->buf = buf;
1848 gs->size = size;
1849 gs->driver = NULL;
1850 gs->identifier = NULL;
1851}
1852
1e668716 1853int grep_buffer(struct grep_opt *opt, const char *buf, unsigned long size)
e1327023
JK
1854{
1855 struct grep_source gs;
1856 int r;
1857
50d92b5f 1858 grep_source_init_buf(&gs, buf, size);
e1327023
JK
1859
1860 r = grep_source(opt, &gs);
1861
1862 grep_source_clear(&gs);
1863 return r;
1864}
1865
50d92b5f
JT
1866void grep_source_init_file(struct grep_source *gs, const char *name,
1867 const char *path)
e1327023 1868{
50d92b5f 1869 gs->type = GREP_SOURCE_FILE;
8c53f071
JK
1870 gs->name = xstrdup_or_null(name);
1871 gs->path = xstrdup_or_null(path);
e1327023
JK
1872 gs->buf = NULL;
1873 gs->size = 0;
94ad9d9e 1874 gs->driver = NULL;
50d92b5f
JT
1875 gs->identifier = xstrdup(path);
1876}
e1327023 1877
50d92b5f 1878void grep_source_init_oid(struct grep_source *gs, const char *name,
0693806b
JT
1879 const char *path, const struct object_id *oid,
1880 struct repository *repo)
50d92b5f
JT
1881{
1882 gs->type = GREP_SOURCE_OID;
1883 gs->name = xstrdup_or_null(name);
1884 gs->path = xstrdup_or_null(path);
1885 gs->buf = NULL;
1886 gs->size = 0;
1887 gs->driver = NULL;
1888 gs->identifier = oiddup(oid);
0693806b 1889 gs->repo = repo;
e1327023
JK
1890}
1891
1892void grep_source_clear(struct grep_source *gs)
1893{
88ce3ef6
ÆAB
1894 FREE_AND_NULL(gs->name);
1895 FREE_AND_NULL(gs->path);
1896 FREE_AND_NULL(gs->identifier);
e1327023
JK
1897 grep_source_clear_data(gs);
1898}
1899
1900void grep_source_clear_data(struct grep_source *gs)
1901{
1902 switch (gs->type) {
1903 case GREP_SOURCE_FILE:
1c41c82b 1904 case GREP_SOURCE_OID:
1e668716
JK
1905 /* these types own the buffer */
1906 free((char *)gs->buf);
1907 gs->buf = NULL;
e1327023
JK
1908 gs->size = 0;
1909 break;
1910 case GREP_SOURCE_BUF:
1911 /* leave user-provided buf intact */
1912 break;
1913 }
1914}
1915
1c41c82b 1916static int grep_source_load_oid(struct grep_source *gs)
e1327023
JK
1917{
1918 enum object_type type;
1919
0693806b
JT
1920 gs->buf = repo_read_object_file(gs->repo, gs->identifier, &type,
1921 &gs->size);
e1327023
JK
1922 if (!gs->buf)
1923 return error(_("'%s': unable to read %s"),
1924 gs->name,
1c41c82b 1925 oid_to_hex(gs->identifier));
e1327023
JK
1926 return 0;
1927}
1928
1929static int grep_source_load_file(struct grep_source *gs)
1930{
1931 const char *filename = gs->identifier;
1932 struct stat st;
1933 char *data;
1934 size_t size;
1935 int i;
1936
1937 if (lstat(filename, &st) < 0) {
1938 err_ret:
1939 if (errno != ENOENT)
7645d8f1 1940 error_errno(_("failed to stat '%s'"), filename);
e1327023
JK
1941 return -1;
1942 }
1943 if (!S_ISREG(st.st_mode))
1944 return -1;
1945 size = xsize_t(st.st_size);
1946 i = open(filename, O_RDONLY);
1947 if (i < 0)
1948 goto err_ret;
3733e694 1949 data = xmallocz(size);
e1327023 1950 if (st.st_size != read_in_full(i, data, size)) {
7645d8f1 1951 error_errno(_("'%s': short read"), filename);
e1327023
JK
1952 close(i);
1953 free(data);
1954 return -1;
1955 }
1956 close(i);
e1327023
JK
1957
1958 gs->buf = data;
1959 gs->size = size;
1960 return 0;
1961}
1962
3083301e 1963static int grep_source_load(struct grep_source *gs)
e1327023
JK
1964{
1965 if (gs->buf)
1966 return 0;
1967
1968 switch (gs->type) {
1969 case GREP_SOURCE_FILE:
1970 return grep_source_load_file(gs);
1c41c82b
BW
1971 case GREP_SOURCE_OID:
1972 return grep_source_load_oid(gs);
e1327023
JK
1973 case GREP_SOURCE_BUF:
1974 return gs->buf ? 0 : -1;
1975 }
033abf97 1976 BUG("invalid grep_source type to load");
0ab7befa 1977}
94ad9d9e 1978
acd00ea0
NTND
1979void grep_source_load_driver(struct grep_source *gs,
1980 struct index_state *istate)
94ad9d9e
JK
1981{
1982 if (gs->driver)
1983 return;
1984
1985 grep_attr_lock();
1d1729ca 1986 if (gs->path)
acd00ea0 1987 gs->driver = userdiff_find_by_path(istate, gs->path);
94ad9d9e
JK
1988 if (!gs->driver)
1989 gs->driver = userdiff_find_by_name("default");
1990 grep_attr_unlock();
1991}
41b59bfc 1992
acd00ea0
NTND
1993static int grep_source_is_binary(struct grep_source *gs,
1994 struct index_state *istate)
41b59bfc 1995{
acd00ea0 1996 grep_source_load_driver(gs, istate);
41b59bfc
JK
1997 if (gs->driver->binary != -1)
1998 return gs->driver->binary;
1999
2000 if (!grep_source_load(gs))
2001 return buffer_is_binary(gs->buf, gs->size);
2002
2003 return 0;
2004}