]> git.ipfire.org Git - thirdparty/git.git/blame - grep.c
hash-ll.h: split out of hash.h to remove dependency on repository.h
[thirdparty/git.git] / grep.c
CommitLineData
4f6728d5 1#include "git-compat-util.h"
b2141fc1 2#include "config.h"
f394e093 3#include "gettext.h"
83b5d2f5 4#include "grep.h"
41771fa4 5#include "hex.h"
cbd53a21 6#include "object-store.h"
60ecac98 7#include "userdiff.h"
6bfce93e 8#include "xdiff-interface.h"
335ec3bf
JK
9#include "diff.h"
10#include "diffcore.h"
5c1ebcca 11#include "commit.h"
793dc676 12#include "quote.h"
3ac68a93 13#include "help.h"
65156bb7 14#include "wrapper.h"
83b5d2f5 15
07a7d656 16static int grep_source_load(struct grep_source *gs);
acd00ea0
NTND
17static int grep_source_is_binary(struct grep_source *gs,
18 struct index_state *istate);
07a7d656 19
96313423
20static void std_output(struct grep_opt *opt, const void *buf, size_t size)
21{
22 fwrite(buf, size, 1, stdout);
23}
24
fa151dc5
NTND
25static const char *color_grep_slots[] = {
26 [GREP_COLOR_CONTEXT] = "context",
27 [GREP_COLOR_FILENAME] = "filename",
28 [GREP_COLOR_FUNCTION] = "function",
29 [GREP_COLOR_LINENO] = "lineNumber",
d036d667 30 [GREP_COLOR_COLUMNNO] = "column",
fa151dc5
NTND
31 [GREP_COLOR_MATCH_CONTEXT] = "matchContext",
32 [GREP_COLOR_MATCH_SELECTED] = "matchSelected",
33 [GREP_COLOR_SELECTED] = "selected",
34 [GREP_COLOR_SEP] = "separator",
35};
36
7687a054
JH
37static int parse_pattern_type_arg(const char *opt, const char *arg)
38{
39 if (!strcmp(arg, "default"))
40 return GREP_PATTERN_TYPE_UNSPECIFIED;
41 else if (!strcmp(arg, "basic"))
42 return GREP_PATTERN_TYPE_BRE;
43 else if (!strcmp(arg, "extended"))
44 return GREP_PATTERN_TYPE_ERE;
45 else if (!strcmp(arg, "fixed"))
46 return GREP_PATTERN_TYPE_FIXED;
47 else if (!strcmp(arg, "perl"))
48 return GREP_PATTERN_TYPE_PCRE;
49 die("bad %s argument: %s", opt, arg);
50}
51
3ac68a93
NTND
52define_list_config_array_extra(color_grep_slots, {"match"});
53
7687a054
JH
54/*
55 * Read the configuration file once and store it in
56 * the grep_defaults template.
57 */
58int grep_config(const char *var, const char *value, void *cb)
59{
72365bb4 60 struct grep_opt *opt = cb;
fa151dc5 61 const char *slot;
7687a054
JH
62
63 if (userdiff_config(var, value) < 0)
64 return -1;
65
66 if (!strcmp(var, "grep.extendedregexp")) {
c7e38551 67 opt->extended_regexp_option = git_config_bool(var, value);
7687a054
JH
68 return 0;
69 }
70
71 if (!strcmp(var, "grep.patterntype")) {
72 opt->pattern_type_option = parse_pattern_type_arg(var, value);
73 return 0;
74 }
75
76 if (!strcmp(var, "grep.linenumber")) {
77 opt->linenum = git_config_bool(var, value);
78 return 0;
79 }
6653fec3
TB
80 if (!strcmp(var, "grep.column")) {
81 opt->columnnum = git_config_bool(var, value);
82 return 0;
83 }
7687a054 84
6453f7b3
AS
85 if (!strcmp(var, "grep.fullname")) {
86 opt->relative = !git_config_bool(var, value);
87 return 0;
88 }
89
7687a054
JH
90 if (!strcmp(var, "color.grep"))
91 opt->color = git_config_colorbool(var, value);
fa151dc5
NTND
92 if (!strcmp(var, "color.grep.match")) {
93 if (grep_config("color.grep.matchcontext", value, cb) < 0)
94 return -1;
95 if (grep_config("color.grep.matchselected", value, cb) < 0)
96 return -1;
97 } else if (skip_prefix(var, "color.grep.", &slot)) {
98 int i = LOOKUP_CONFIG(color_grep_slots, slot);
99 char *color;
100
101 if (i < 0)
102 return -1;
103 color = opt->colors[i];
7687a054
JH
104 if (!value)
105 return config_error_nonbool(var);
f6c5a296 106 return color_parse(value, color);
7687a054
JH
107 }
108 return 0;
109}
110
9725c8dd 111void grep_init(struct grep_opt *opt, struct repository *repo)
7687a054 112{
72365bb4
ÆAB
113 struct grep_opt blank = GREP_OPT_INIT;
114 memcpy(opt, &blank, sizeof(*opt));
6ba9bb76 115
38bbc2ea 116 opt->repo = repo;
7687a054
JH
117 opt->pattern_tail = &opt->pattern_list;
118 opt->header_tail = &opt->header_list;
7687a054 119}
07a7d656 120
fc456751
RS
121static struct grep_pat *create_grep_pat(const char *pat, size_t patlen,
122 const char *origin, int no,
123 enum grep_pat_token t,
124 enum grep_header_field field)
a4d7d2c6
JH
125{
126 struct grep_pat *p = xcalloc(1, sizeof(*p));
526a858a 127 p->pattern = xmemdupz(pat, patlen);
fc456751
RS
128 p->patternlen = patlen;
129 p->origin = origin;
130 p->no = no;
131 p->token = t;
a4d7d2c6 132 p->field = field;
fc456751
RS
133 return p;
134}
135
2b3873ff
RS
136static void do_append_grep_pat(struct grep_pat ***tail, struct grep_pat *p)
137{
138 **tail = p;
139 *tail = &p->next;
a4d7d2c6 140 p->next = NULL;
526a858a
RS
141
142 switch (p->token) {
143 case GREP_PATTERN: /* atom */
144 case GREP_PATTERN_HEAD:
145 case GREP_PATTERN_BODY:
146 for (;;) {
147 struct grep_pat *new_pat;
148 size_t len = 0;
149 char *cp = p->pattern + p->patternlen, *nl = NULL;
150 while (++len <= p->patternlen) {
151 if (*(--cp) == '\n') {
152 nl = cp;
153 break;
154 }
155 }
156 if (!nl)
157 break;
158 new_pat = create_grep_pat(nl + 1, len - 1, p->origin,
159 p->no, p->token, p->field);
160 new_pat->next = p->next;
161 if (!p->next)
162 *tail = &new_pat->next;
163 p->next = new_pat;
164 *nl = '\0';
165 p->patternlen -= len;
166 }
167 break;
168 default:
169 break;
170 }
2b3873ff
RS
171}
172
fc456751
RS
173void append_header_grep_pattern(struct grep_opt *opt,
174 enum grep_header_field field, const char *pat)
175{
176 struct grep_pat *p = create_grep_pat(pat, strlen(pat), "header", 0,
177 GREP_PATTERN_HEAD, field);
baa6378f
JH
178 if (field == GREP_HEADER_REFLOG)
179 opt->use_reflog_filter = 1;
2b3873ff 180 do_append_grep_pat(&opt->header_tail, p);
a4d7d2c6
JH
181}
182
83b5d2f5
JH
183void append_grep_pattern(struct grep_opt *opt, const char *pat,
184 const char *origin, int no, enum grep_pat_token t)
ed40a095
RS
185{
186 append_grep_pat(opt, pat, strlen(pat), origin, no, t);
187}
188
189void append_grep_pat(struct grep_opt *opt, const char *pat, size_t patlen,
190 const char *origin, int no, enum grep_pat_token t)
83b5d2f5 191{
fc456751 192 struct grep_pat *p = create_grep_pat(pat, patlen, origin, no, t, 0);
2b3873ff 193 do_append_grep_pat(&opt->pattern_tail, p);
83b5d2f5
JH
194}
195
5b594f45
FK
196struct grep_opt *grep_opt_dup(const struct grep_opt *opt)
197{
198 struct grep_pat *pat;
199 struct grep_opt *ret = xmalloc(sizeof(struct grep_opt));
200 *ret = *opt;
201
202 ret->pattern_list = NULL;
203 ret->pattern_tail = &ret->pattern_list;
204
205 for(pat = opt->pattern_list; pat != NULL; pat = pat->next)
206 {
207 if(pat->token == GREP_PATTERN_HEAD)
208 append_header_grep_pattern(ret, pat->field,
209 pat->pattern);
210 else
ed40a095
RS
211 append_grep_pat(ret, pat->pattern, pat->patternlen,
212 pat->origin, pat->no, pat->token);
5b594f45
FK
213 }
214
215 return ret;
216}
217
a30c148a
MK
218static NORETURN void compile_regexp_failed(const struct grep_pat *p,
219 const char *error)
220{
221 char where[1024];
222
223 if (p->no)
19bdd3e7 224 xsnprintf(where, sizeof(where), "In '%s' at %d, ", p->origin, p->no);
a30c148a 225 else if (p->origin)
19bdd3e7 226 xsnprintf(where, sizeof(where), "%s, ", p->origin);
a30c148a
MK
227 else
228 where[0] = 0;
229
230 die("%s'%s': %s", where, p->pattern, error);
231}
232
543f1c0c
ÆAB
233static int is_fixed(const char *s, size_t len)
234{
235 size_t i;
236
237 for (i = 0; i < len; i++) {
238 if (is_regex_special(s[i]))
239 return 0;
240 }
241
242 return 1;
243}
244
94da9193 245#ifdef USE_LIBPCRE2
c1760352
ÆAB
246#define GREP_PCRE2_DEBUG_MALLOC 0
247
248static void *pcre2_malloc(PCRE2_SIZE size, MAYBE_UNUSED void *memory_data)
249{
250 void *pointer = malloc(size);
251#if GREP_PCRE2_DEBUG_MALLOC
252 static int count = 1;
253 fprintf(stderr, "PCRE2:%p -> #%02d: alloc(%lu)\n", pointer, count++, size);
254#endif
255 return pointer;
256}
257
258static void pcre2_free(void *pointer, MAYBE_UNUSED void *memory_data)
259{
260#if GREP_PCRE2_DEBUG_MALLOC
261 static int count = 1;
262 if (pointer)
263 fprintf(stderr, "PCRE2:%p -> #%02d: free()\n", pointer, count++);
264#endif
265 free(pointer);
266}
267
50b6ad55
MK
268static int pcre2_jit_functional(void)
269{
270 static int jit_working = -1;
271 pcre2_code *code;
272 size_t off;
273 int err;
274
275 if (jit_working != -1)
276 return jit_working;
277
278 /*
279 * Try to JIT compile a simple pattern to probe if the JIT is
280 * working in general. It might fail for systems where creating
281 * memory mappings for runtime code generation is restricted.
282 */
283 code = pcre2_compile((PCRE2_SPTR)".", 1, 0, &err, &off, NULL);
284 if (!code)
285 return 0;
286
287 jit_working = pcre2_jit_compile(code, PCRE2_JIT_COMPLETE) == 0;
288 pcre2_code_free(code);
289
290 return jit_working;
291}
292
94da9193
ÆAB
293static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt)
294{
295 int error;
296 PCRE2_UCHAR errbuf[256];
297 PCRE2_SIZE erroffset;
298 int options = PCRE2_MULTILINE;
94da9193 299 int jitret;
a25b9085
ÆAB
300 int patinforet;
301 size_t jitsizearg;
32e3e8bc 302 int literal = !opt->ignore_case && (p->fixed || p->is_fixed);
94da9193 303
cbe81e65
ÆAB
304 /*
305 * Call pcre2_general_context_create() before calling any
306 * other pcre2_*(). It sets up our malloc()/free() functions
307 * with which everything else is allocated.
308 */
309 p->pcre2_general_context = pcre2_general_context_create(
310 pcre2_malloc, pcre2_free, NULL);
311 if (!p->pcre2_general_context)
312 die("Couldn't allocate PCRE2 general context");
94da9193
ÆAB
313
314 if (opt->ignore_case) {
44570188 315 if (!opt->ignore_locale && has_non_ascii(p->pattern)) {
cbe81e65
ÆAB
316 p->pcre2_tables = pcre2_maketables(p->pcre2_general_context);
317 p->pcre2_compile_context = pcre2_compile_context_create(p->pcre2_general_context);
10da030a
CMAB
318 pcre2_set_character_tables(p->pcre2_compile_context,
319 p->pcre2_tables);
94da9193
ÆAB
320 }
321 options |= PCRE2_CASELESS;
322 }
32e3e8bc 323 if (!opt->ignore_locale && is_utf8_locale() && !literal)
acabd204 324 options |= (PCRE2_UTF | PCRE2_UCP | PCRE2_MATCH_INVALID_UTF);
95ca1f98 325
97169fc3 326#ifndef GIT_PCRE2_VERSION_10_36_OR_HIGHER
95ca1f98 327 /* Work around https://bugs.exim.org/show_bug.cgi?id=2642 fixed in 10.36 */
797c3599
ÆAB
328 if (PCRE2_MATCH_INVALID_UTF && options & (PCRE2_UTF | PCRE2_CASELESS))
329 options |= PCRE2_NO_START_OPTIMIZE;
330#endif
94da9193
ÆAB
331
332 p->pcre2_pattern = pcre2_compile((PCRE2_SPTR)p->pattern,
333 p->patternlen, options, &error, &erroffset,
334 p->pcre2_compile_context);
335
336 if (p->pcre2_pattern) {
cbe81e65 337 p->pcre2_match_data = pcre2_match_data_create_from_pattern(p->pcre2_pattern, p->pcre2_general_context);
94da9193
ÆAB
338 if (!p->pcre2_match_data)
339 die("Couldn't allocate PCRE2 match data");
340 } else {
341 pcre2_get_error_message(error, errbuf, sizeof(errbuf));
342 compile_regexp_failed(p, (const char *)&errbuf);
343 }
344
345 pcre2_config(PCRE2_CONFIG_JIT, &p->pcre2_jit_on);
04bef50c 346 if (p->pcre2_jit_on) {
94da9193 347 jitret = pcre2_jit_compile(p->pcre2_pattern, PCRE2_JIT_COMPLETE);
50b6ad55
MK
348 if (jitret == PCRE2_ERROR_NOMEMORY && !pcre2_jit_functional()) {
349 /*
350 * Even though pcre2_config(PCRE2_CONFIG_JIT, ...)
351 * indicated JIT support, the library might still
352 * fail to generate JIT code for various reasons,
353 * e.g. when SELinux's 'deny_execmem' or PaX's
354 * MPROTECT prevent creating W|X memory mappings.
355 *
356 * Instead of faling hard, fall back to interpreter
357 * mode, just as if the pattern was prefixed with
358 * '(*NO_JIT)'.
359 */
360 p->pcre2_jit_on = 0;
361 return;
362 } else if (jitret) {
363 int need_clip = p->patternlen > 64;
364 int clip_len = need_clip ? 64 : p->patternlen;
365 die("Couldn't JIT the PCRE2 pattern '%.*s'%s, got '%d'%s",
366 clip_len, p->pattern, need_clip ? "..." : "", jitret,
367 pcre2_jit_functional()
368 ? "\nPerhaps prefix (*NO_JIT) to your pattern?"
369 : "");
370 }
a25b9085
ÆAB
371
372 /*
373 * The pcre2_config(PCRE2_CONFIG_JIT, ...) call just
374 * tells us whether the library itself supports JIT,
375 * but to see whether we're going to be actually using
376 * JIT we need to extract PCRE2_INFO_JITSIZE from the
377 * pattern *after* we do pcre2_jit_compile() above.
378 *
379 * This is because if the pattern contains the
380 * (*NO_JIT) verb (see pcre2syntax(3))
381 * pcre2_jit_compile() will exit early with 0. If we
382 * then proceed to call pcre2_jit_match() further down
383 * the line instead of pcre2_match() we'll either
384 * segfault (pre PCRE 10.31) or run into a fatal error
385 * (post PCRE2 10.31)
386 */
387 patinforet = pcre2_pattern_info(p->pcre2_pattern, PCRE2_INFO_JITSIZE, &jitsizearg);
388 if (patinforet)
389 BUG("pcre2_pattern_info() failed: %d", patinforet);
390 if (jitsizearg == 0) {
391 p->pcre2_jit_on = 0;
392 return;
393 }
94da9193
ÆAB
394 }
395}
396
397static int pcre2match(struct grep_pat *p, const char *line, const char *eol,
398 regmatch_t *match, int eflags)
399{
400 int ret, flags = 0;
401 PCRE2_SIZE *ovector;
402 PCRE2_UCHAR errbuf[256];
403
404 if (eflags & REG_NOTBOL)
405 flags |= PCRE2_NOTBOL;
406
407 if (p->pcre2_jit_on)
408 ret = pcre2_jit_match(p->pcre2_pattern, (unsigned char *)line,
409 eol - line, 0, flags, p->pcre2_match_data,
410 NULL);
411 else
412 ret = pcre2_match(p->pcre2_pattern, (unsigned char *)line,
413 eol - line, 0, flags, p->pcre2_match_data,
414 NULL);
415
416 if (ret < 0 && ret != PCRE2_ERROR_NOMATCH) {
417 pcre2_get_error_message(ret, errbuf, sizeof(errbuf));
418 die("%s failed with error code %d: %s",
419 (p->pcre2_jit_on ? "pcre2_jit_match" : "pcre2_match"), ret,
420 errbuf);
421 }
422 if (ret > 0) {
423 ovector = pcre2_get_ovector_pointer(p->pcre2_match_data);
424 ret = 0;
425 match->rm_so = (int)ovector[0];
426 match->rm_eo = (int)ovector[1];
427 }
428
429 return ret;
430}
431
432static void free_pcre2_pattern(struct grep_pat *p)
433{
434 pcre2_compile_context_free(p->pcre2_compile_context);
435 pcre2_code_free(p->pcre2_pattern);
436 pcre2_match_data_free(p->pcre2_match_data);
b76bf27f 437#ifdef GIT_PCRE2_VERSION_10_34_OR_HIGHER
cbe81e65 438 pcre2_maketables_free(p->pcre2_general_context, p->pcre2_tables);
b76bf27f 439#else
10da030a 440 free((void *)p->pcre2_tables);
b76bf27f 441#endif
cbe81e65 442 pcre2_general_context_free(p->pcre2_general_context);
94da9193
ÆAB
443}
444#else /* !USE_LIBPCRE2 */
445static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt)
446{
94da9193
ÆAB
447 die("cannot use Perl-compatible regexes when not compiled with USE_LIBPCRE");
448}
449
450static int pcre2match(struct grep_pat *p, const char *line, const char *eol,
451 regmatch_t *match, int eflags)
452{
453 return 1;
454}
455
456static void free_pcre2_pattern(struct grep_pat *p)
457{
458}
94da9193 459
793dc676
NTND
460static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt)
461{
462 struct strbuf sb = STRBUF_INIT;
463 int err;
1ceababc 464 int regflags = 0;
793dc676
NTND
465
466 basic_regex_quote_buf(&sb, p->pattern);
793dc676
NTND
467 if (opt->ignore_case)
468 regflags |= REG_ICASE;
469 err = regcomp(&p->regexp, sb.buf, regflags);
793dc676
NTND
470 strbuf_release(&sb);
471 if (err) {
472 char errbuf[1024];
473 regerror(err, &p->regexp, errbuf, sizeof(errbuf));
793dc676
NTND
474 compile_regexp_failed(p, errbuf);
475 }
476}
b65abcaf 477#endif /* !USE_LIBPCRE2 */
793dc676 478
83b5d2f5
JH
479static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
480{
c822255c 481 int err;
07a3d411 482 int regflags = REG_NEWLINE;
c822255c 483
04bf052e
ÆAB
484 if (opt->pattern_type_option == GREP_PATTERN_TYPE_UNSPECIFIED)
485 opt->pattern_type_option = (opt->extended_regexp_option
486 ? GREP_PATTERN_TYPE_ERE
487 : GREP_PATTERN_TYPE_BRE);
488
d7eb527d 489 p->word_regexp = opt->word_regexp;
5183bf67 490 p->ignore_case = opt->ignore_case;
04bf052e 491 p->fixed = opt->pattern_type_option == GREP_PATTERN_TYPE_FIXED;
d7eb527d 492
04bf052e 493 if (opt->pattern_type_option != GREP_PATTERN_TYPE_PCRE &&
ae807d77 494 memchr(p->pattern, 0, p->patternlen))
45d1f37c
ÆAB
495 die(_("given pattern contains NULL byte (via -f <file>). This is only supported with -P under PCRE v2"));
496
09872f64 497 p->is_fixed = is_fixed(p->pattern, p->patternlen);
8a599983
ÆAB
498#ifdef USE_LIBPCRE2
499 if (!p->fixed && !p->is_fixed) {
500 const char *no_jit = "(*NO_JIT)";
501 const int no_jit_len = strlen(no_jit);
502 if (starts_with(p->pattern, no_jit) &&
503 is_fixed(p->pattern + no_jit_len,
504 p->patternlen - no_jit_len))
505 p->is_fixed = 1;
506 }
507#endif
09872f64 508 if (p->fixed || p->is_fixed) {
b65abcaf 509#ifdef USE_LIBPCRE2
09872f64 510 if (p->is_fixed) {
b65abcaf
ÆAB
511 compile_pcre2_pattern(p, opt);
512 } else {
513 /*
514 * E.g. t7811-grep-open.sh relies on the
515 * pattern being restored.
516 */
517 char *old_pattern = p->pattern;
518 size_t old_patternlen = p->patternlen;
519 struct strbuf sb = STRBUF_INIT;
520
521 /*
522 * There is the PCRE2_LITERAL flag, but it's
523 * only in PCRE v2 10.30 and later. Needing to
524 * ifdef our way around that and dealing with
525 * it + PCRE2_MULTILINE being an error is more
526 * complex than just quoting this ourselves.
527 */
528 strbuf_add(&sb, "\\Q", 2);
529 strbuf_add(&sb, p->pattern, p->patternlen);
530 strbuf_add(&sb, "\\E", 2);
531
532 p->pattern = sb.buf;
533 p->patternlen = sb.len;
534 compile_pcre2_pattern(p, opt);
535 p->pattern = old_pattern;
536 p->patternlen = old_patternlen;
537 strbuf_release(&sb);
538 }
539#else /* !USE_LIBPCRE2 */
793dc676 540 compile_fixed_regexp(p, opt);
b65abcaf 541#endif /* !USE_LIBPCRE2 */
793dc676 542 return;
9eceddee 543 }
c822255c 544
04bf052e 545 if (opt->pattern_type_option == GREP_PATTERN_TYPE_PCRE) {
94da9193
ÆAB
546 compile_pcre2_pattern(p, opt);
547 return;
548 }
549
07a3d411
ÆAB
550 if (p->ignore_case)
551 regflags |= REG_ICASE;
04bf052e 552 if (opt->pattern_type_option == GREP_PATTERN_TYPE_ERE)
07a3d411
ÆAB
553 regflags |= REG_EXTENDED;
554 err = regcomp(&p->regexp, p->pattern, regflags);
83b5d2f5
JH
555 if (err) {
556 char errbuf[1024];
83b5d2f5 557 regerror(err, &p->regexp, errbuf, 1024);
a30c148a 558 compile_regexp_failed(p, errbuf);
83b5d2f5
JH
559 }
560}
561
e2b15427
RS
562static struct grep_expr *grep_not_expr(struct grep_expr *expr)
563{
564 struct grep_expr *z = xcalloc(1, sizeof(*z));
565 z->node = GREP_NODE_NOT;
566 z->u.unary = expr;
567 return z;
568}
569
f2d27598
TB
570static struct grep_expr *grep_binexp(enum grep_expr_node kind,
571 struct grep_expr *left,
572 struct grep_expr *right)
9dbf00ba
RS
573{
574 struct grep_expr *z = xcalloc(1, sizeof(*z));
f2d27598 575 z->node = kind;
9dbf00ba
RS
576 z->u.binary.left = left;
577 z->u.binary.right = right;
578 return z;
579}
580
f2d27598
TB
581static struct grep_expr *grep_or_expr(struct grep_expr *left, struct grep_expr *right)
582{
583 return grep_binexp(GREP_NODE_OR, left, right);
584}
585
0a6adc26
TB
586static struct grep_expr *grep_and_expr(struct grep_expr *left, struct grep_expr *right)
587{
588 return grep_binexp(GREP_NODE_AND, left, right);
589}
590
0ab7befa 591static struct grep_expr *compile_pattern_or(struct grep_pat **);
83b5d2f5
JH
592static struct grep_expr *compile_pattern_atom(struct grep_pat **list)
593{
594 struct grep_pat *p;
595 struct grep_expr *x;
596
597 p = *list;
c922b01f
LT
598 if (!p)
599 return NULL;
83b5d2f5
JH
600 switch (p->token) {
601 case GREP_PATTERN: /* atom */
480c1ca6
JH
602 case GREP_PATTERN_HEAD:
603 case GREP_PATTERN_BODY:
ca56dadb 604 CALLOC_ARRAY(x, 1);
83b5d2f5
JH
605 x->node = GREP_NODE_ATOM;
606 x->u.atom = p;
607 *list = p->next;
608 return x;
609 case GREP_OPEN_PAREN:
610 *list = p->next;
0ab7befa 611 x = compile_pattern_or(list);
83b5d2f5
JH
612 if (!*list || (*list)->token != GREP_CLOSE_PAREN)
613 die("unmatched parenthesis");
614 *list = (*list)->next;
615 return x;
616 default:
617 return NULL;
618 }
619}
620
621static struct grep_expr *compile_pattern_not(struct grep_pat **list)
622{
623 struct grep_pat *p;
624 struct grep_expr *x;
625
626 p = *list;
c922b01f
LT
627 if (!p)
628 return NULL;
83b5d2f5
JH
629 switch (p->token) {
630 case GREP_NOT:
631 if (!p->next)
632 die("--not not followed by pattern expression");
633 *list = p->next;
e2b15427
RS
634 x = compile_pattern_not(list);
635 if (!x)
83b5d2f5 636 die("--not followed by non pattern expression");
e2b15427 637 return grep_not_expr(x);
83b5d2f5
JH
638 default:
639 return compile_pattern_atom(list);
640 }
641}
642
643static struct grep_expr *compile_pattern_and(struct grep_pat **list)
644{
645 struct grep_pat *p;
0a6adc26 646 struct grep_expr *x, *y;
83b5d2f5
JH
647
648 x = compile_pattern_not(list);
649 p = *list;
650 if (p && p->token == GREP_AND) {
fe7fe62d
RS
651 if (!x)
652 die("--and not preceded by pattern expression");
83b5d2f5
JH
653 if (!p->next)
654 die("--and not followed by pattern expression");
655 *list = p->next;
656 y = compile_pattern_and(list);
657 if (!y)
658 die("--and not followed by pattern expression");
0a6adc26 659 return grep_and_expr(x, y);
83b5d2f5
JH
660 }
661 return x;
662}
663
664static struct grep_expr *compile_pattern_or(struct grep_pat **list)
665{
666 struct grep_pat *p;
9dbf00ba 667 struct grep_expr *x, *y;
83b5d2f5
JH
668
669 x = compile_pattern_and(list);
670 p = *list;
671 if (x && p && p->token != GREP_CLOSE_PAREN) {
672 y = compile_pattern_or(list);
673 if (!y)
674 die("not a pattern expression %s", p->pattern);
9dbf00ba 675 return grep_or_expr(x, y);
83b5d2f5
JH
676 }
677 return x;
678}
679
680static struct grep_expr *compile_pattern_expr(struct grep_pat **list)
681{
682 return compile_pattern_or(list);
683}
684
5aaeb733
JH
685static struct grep_expr *grep_true_expr(void)
686{
687 struct grep_expr *z = xcalloc(1, sizeof(*z));
688 z->node = GREP_NODE_TRUE;
689 return z;
690}
691
95ce9ce2 692static struct grep_expr *prep_header_patterns(struct grep_opt *opt)
83b5d2f5
JH
693{
694 struct grep_pat *p;
95ce9ce2 695 struct grep_expr *header_expr;
5aaeb733
JH
696 struct grep_expr *(header_group[GREP_HEADER_FIELD_MAX]);
697 enum grep_header_field fld;
83b5d2f5 698
95ce9ce2
JH
699 if (!opt->header_list)
700 return NULL;
2385f246 701
95ce9ce2
JH
702 for (p = opt->header_list; p; p = p->next) {
703 if (p->token != GREP_PATTERN_HEAD)
033abf97 704 BUG("a non-header pattern in grep header list.");
3ce3ffb8
AP
705 if (p->field < GREP_HEADER_FIELD_MIN ||
706 GREP_HEADER_FIELD_MAX <= p->field)
033abf97 707 BUG("unknown header field %d", p->field);
95ce9ce2 708 compile_regexp(p, opt);
80235ba7 709 }
5aaeb733
JH
710
711 for (fld = 0; fld < GREP_HEADER_FIELD_MAX; fld++)
712 header_group[fld] = NULL;
713
714 for (p = opt->header_list; p; p = p->next) {
715 struct grep_expr *h;
716 struct grep_pat *pp = p;
717
718 h = compile_pattern_atom(&pp);
719 if (!h || pp != p->next)
033abf97 720 BUG("malformed header expr");
5aaeb733
JH
721 if (!header_group[p->field]) {
722 header_group[p->field] = h;
723 continue;
724 }
725 header_group[p->field] = grep_or_expr(h, header_group[p->field]);
726 }
727
728 header_expr = NULL;
729
730 for (fld = 0; fld < GREP_HEADER_FIELD_MAX; fld++) {
731 if (!header_group[fld])
732 continue;
733 if (!header_expr)
734 header_expr = grep_true_expr();
735 header_expr = grep_or_expr(header_group[fld], header_expr);
736 }
95ce9ce2
JH
737 return header_expr;
738}
739
13e4fc7e
JH
740static struct grep_expr *grep_splice_or(struct grep_expr *x, struct grep_expr *y)
741{
742 struct grep_expr *z = x;
743
744 while (x) {
745 assert(x->node == GREP_NODE_OR);
746 if (x->u.binary.right &&
747 x->u.binary.right->node == GREP_NODE_TRUE) {
748 x->u.binary.right = y;
749 break;
750 }
751 x = x->u.binary.right;
752 }
753 return z;
754}
755
15c96497 756void compile_grep_patterns(struct grep_opt *opt)
95ce9ce2
JH
757{
758 struct grep_pat *p;
759 struct grep_expr *header_expr = prep_header_patterns(opt);
db84376f 760 int extended = 0;
0ab7befa 761
83b5d2f5 762 for (p = opt->pattern_list; p; p = p->next) {
480c1ca6
JH
763 switch (p->token) {
764 case GREP_PATTERN: /* atom */
765 case GREP_PATTERN_HEAD:
766 case GREP_PATTERN_BODY:
c822255c 767 compile_regexp(p, opt);
480c1ca6
JH
768 break;
769 default:
db84376f 770 extended = 1;
480c1ca6
JH
771 break;
772 }
83b5d2f5
JH
773 }
774
794c0002 775 if (opt->all_match || opt->no_body_match || header_expr)
db84376f
ÆAB
776 extended = 1;
777 else if (!extended)
83b5d2f5
JH
778 return;
779
83b5d2f5 780 p = opt->pattern_list;
ba150a3f
MB
781 if (p)
782 opt->pattern_expression = compile_pattern_expr(&p);
83b5d2f5
JH
783 if (p)
784 die("incomplete pattern expression: %s", p->pattern);
80235ba7 785
794c0002
RS
786 if (opt->no_body_match && opt->pattern_expression)
787 opt->pattern_expression = grep_not_expr(opt->pattern_expression);
788
80235ba7
JH
789 if (!header_expr)
790 return;
791
5aaeb733 792 if (!opt->pattern_expression)
80235ba7 793 opt->pattern_expression = header_expr;
13e4fc7e
JH
794 else if (opt->all_match)
795 opt->pattern_expression = grep_splice_or(header_expr,
796 opt->pattern_expression);
5aaeb733
JH
797 else
798 opt->pattern_expression = grep_or_expr(opt->pattern_expression,
799 header_expr);
80235ba7 800 opt->all_match = 1;
83b5d2f5
JH
801}
802
b48fb5b6
JH
803static void free_pattern_expr(struct grep_expr *x)
804{
805 switch (x->node) {
5aaeb733 806 case GREP_NODE_TRUE:
b48fb5b6
JH
807 case GREP_NODE_ATOM:
808 break;
809 case GREP_NODE_NOT:
810 free_pattern_expr(x->u.unary);
811 break;
812 case GREP_NODE_AND:
813 case GREP_NODE_OR:
814 free_pattern_expr(x->u.binary.left);
815 free_pattern_expr(x->u.binary.right);
816 break;
817 }
818 free(x);
819}
820
891c9965 821static void free_grep_pat(struct grep_pat *pattern)
b48fb5b6
JH
822{
823 struct grep_pat *p, *n;
824
891c9965 825 for (p = pattern; p; p = n) {
b48fb5b6
JH
826 n = p->next;
827 switch (p->token) {
828 case GREP_PATTERN: /* atom */
829 case GREP_PATTERN_HEAD:
830 case GREP_PATTERN_BODY:
7599730b 831 if (p->pcre2_pattern)
94da9193 832 free_pcre2_pattern(p);
63e7e9d8
MK
833 else
834 regfree(&p->regexp);
526a858a 835 free(p->pattern);
b48fb5b6
JH
836 break;
837 default:
838 break;
839 }
840 free(p);
841 }
891c9965 842}
b48fb5b6 843
891c9965
ÆAB
844void free_grep_patterns(struct grep_opt *opt)
845{
846 free_grep_pat(opt->pattern_list);
fb2ebe72 847 free_grep_pat(opt->header_list);
891c9965
ÆAB
848
849 if (opt->pattern_expression)
850 free_pattern_expr(opt->pattern_expression);
b48fb5b6
JH
851}
852
1a845fbc 853static const char *end_of_line(const char *cp, unsigned long *left)
83b5d2f5
JH
854{
855 unsigned long l = *left;
856 while (l && *cp != '\n') {
857 l--;
858 cp++;
859 }
860 *left = l;
861 return cp;
862}
863
864static int word_char(char ch)
865{
866 return isalnum(ch) || ch == '_';
867}
868
55f638bd
ML
869static void output_color(struct grep_opt *opt, const void *data, size_t size,
870 const char *color)
871{
daa0c3d9 872 if (want_color(opt->color) && color && color[0]) {
55f638bd
ML
873 opt->output(opt, color, strlen(color));
874 opt->output(opt, data, size);
875 opt->output(opt, GIT_COLOR_RESET, strlen(GIT_COLOR_RESET));
876 } else
877 opt->output(opt, data, size);
878}
879
880static void output_sep(struct grep_opt *opt, char sign)
881{
882 if (opt->null_following_name)
883 opt->output(opt, "\0", 1);
884 else
fa151dc5 885 output_color(opt, &sign, 1, opt->colors[GREP_COLOR_SEP]);
55f638bd
ML
886}
887
83caecca
RZ
888static void show_name(struct grep_opt *opt, const char *name)
889{
fa151dc5 890 output_color(opt, name, strlen(name), opt->colors[GREP_COLOR_FILENAME]);
5b594f45 891 opt->output(opt, opt->null_following_name ? "\0" : "\n", 1);
83caecca
RZ
892}
893
1a845fbc
JK
894static int patmatch(struct grep_pat *p,
895 const char *line, const char *eol,
97e77784
MK
896 regmatch_t *match, int eflags)
897{
898 int hit;
899
7599730b 900 if (p->pcre2_pattern)
94da9193 901 hit = !pcre2match(p, line, eol, match, eflags);
97e77784 902 else
b7d36ffc
JS
903 hit = !regexec_buf(&p->regexp, line, eol - line, 1, match,
904 eflags);
97e77784
MK
905
906 return hit;
907}
908
1a845fbc 909static void strip_timestamp(const char *bol, const char **eol_p)
a4d7d2c6 910{
1a845fbc 911 const char *eol = *eol_p;
a4d7d2c6
JH
912
913 while (bol < --eol) {
914 if (*eol != '>')
915 continue;
916 *eol_p = ++eol;
cc8e26ee 917 break;
a4d7d2c6 918 }
a4d7d2c6
JH
919}
920
921static struct {
922 const char *field;
923 size_t len;
924} header_field[] = {
925 { "author ", 7 },
926 { "committer ", 10 },
72fd13f7 927 { "reflog ", 7 },
a4d7d2c6
JH
928};
929
3f566c4e
HM
930static int headerless_match_one_pattern(struct grep_pat *p,
931 const char *bol, const char *eol,
932 enum grep_context ctx,
933 regmatch_t *pmatch, int eflags)
83b5d2f5
JH
934{
935 int hit = 0;
e701fadb 936 const char *start = bol;
83b5d2f5 937
480c1ca6
JH
938 if ((p->token != GREP_PATTERN) &&
939 ((p->token == GREP_PATTERN_HEAD) != (ctx == GREP_CONTEXT_HEAD)))
940 return 0;
941
83b5d2f5 942 again:
97e77784 943 hit = patmatch(p, bol, eol, pmatch, eflags);
83b5d2f5 944
d7eb527d 945 if (hit && p->word_regexp) {
83b5d2f5 946 if ((pmatch[0].rm_so < 0) ||
84201eae 947 (eol - bol) < pmatch[0].rm_so ||
83b5d2f5
JH
948 (pmatch[0].rm_eo < 0) ||
949 (eol - bol) < pmatch[0].rm_eo)
950 die("regexp returned nonsense");
951
952 /* Match beginning must be either beginning of the
953 * line, or at word boundary (i.e. the last char must
954 * not be a word char). Similarly, match end must be
955 * either end of the line, or at word boundary
956 * (i.e. the next char must not be a word char).
957 */
fb62eb7f 958 if ( ((pmatch[0].rm_so == 0) ||
83b5d2f5
JH
959 !word_char(bol[pmatch[0].rm_so-1])) &&
960 ((pmatch[0].rm_eo == (eol-bol)) ||
961 !word_char(bol[pmatch[0].rm_eo])) )
962 ;
963 else
964 hit = 0;
965
84201eae
RS
966 /* Words consist of at least one character. */
967 if (pmatch->rm_so == pmatch->rm_eo)
968 hit = 0;
969
83b5d2f5
JH
970 if (!hit && pmatch[0].rm_so + bol + 1 < eol) {
971 /* There could be more than one match on the
972 * line, and the first match might not be
973 * strict word match. But later ones could be!
fb62eb7f
RS
974 * Forward to the next possible start, i.e. the
975 * next position following a non-word char.
83b5d2f5
JH
976 */
977 bol = pmatch[0].rm_so + bol + 1;
fb62eb7f
RS
978 while (word_char(bol[-1]) && bol < eol)
979 bol++;
dbb6a4ad 980 eflags |= REG_NOTBOL;
fb62eb7f
RS
981 if (bol < eol)
982 goto again;
83b5d2f5
JH
983 }
984 }
e701fadb
RS
985 if (hit) {
986 pmatch[0].rm_so += bol - start;
987 pmatch[0].rm_eo += bol - start;
988 }
83b5d2f5
JH
989 return hit;
990}
991
3f566c4e
HM
992static int match_one_pattern(struct grep_pat *p,
993 const char *bol, const char *eol,
994 enum grep_context ctx, regmatch_t *pmatch,
995 int eflags)
996{
997 const char *field;
998 size_t len;
999
1000 if (p->token == GREP_PATTERN_HEAD) {
1001 assert(p->field < ARRAY_SIZE(header_field));
1002 field = header_field[p->field].field;
1003 len = header_field[p->field].len;
1004 if (strncmp(bol, field, len))
1005 return 0;
1006 bol += len;
1007
1008 switch (p->field) {
1009 case GREP_HEADER_AUTHOR:
1010 case GREP_HEADER_COMMITTER:
1011 strip_timestamp(bol, &eol);
1012 break;
1013 default:
1014 break;
1015 }
1016 }
1017
1018 return headerless_match_one_pattern(p, bol, eol, ctx, pmatch, eflags);
1019}
1020
1021
1a845fbc
JK
1022static int match_expr_eval(struct grep_opt *opt, struct grep_expr *x,
1023 const char *bol, const char *eol,
1024 enum grep_context ctx, ssize_t *col,
68d686e6 1025 ssize_t *icol, int collect_hits)
83b5d2f5 1026{
0ab7befa
JH
1027 int h = 0;
1028
83b5d2f5 1029 switch (x->node) {
5aaeb733
JH
1030 case GREP_NODE_TRUE:
1031 h = 1;
1032 break;
83b5d2f5 1033 case GREP_NODE_ATOM:
68d686e6
TB
1034 {
1035 regmatch_t tmp;
1036 h = match_one_pattern(x->u.atom, bol, eol, ctx,
1037 &tmp, 0);
1038 if (h && (*col < 0 || tmp.rm_so < *col))
1039 *col = tmp.rm_so;
1040 }
794c0002
RS
1041 if (x->u.atom->token == GREP_PATTERN_BODY)
1042 opt->body_hit |= h;
83b5d2f5
JH
1043 break;
1044 case GREP_NODE_NOT:
68d686e6
TB
1045 /*
1046 * Upon visiting a GREP_NODE_NOT, col and icol become swapped.
1047 */
1048 h = !match_expr_eval(opt, x->u.unary, bol, eol, ctx, icol, col,
1049 0);
0ab7befa 1050 break;
83b5d2f5 1051 case GREP_NODE_AND:
017c0fcf 1052 h = match_expr_eval(opt, x->u.binary.left, bol, eol, ctx, col,
68d686e6 1053 icol, 0);
017c0fcf
TB
1054 if (h || opt->columnnum) {
1055 /*
1056 * Don't short-circuit AND when given --column, since a
1057 * NOT earlier in the tree may turn this into an OR. In
1058 * this case, see the below comment.
1059 */
1060 h &= match_expr_eval(opt, x->u.binary.right, bol, eol,
1061 ctx, col, icol, 0);
1062 }
0ab7befa 1063 break;
83b5d2f5 1064 case GREP_NODE_OR:
017c0fcf
TB
1065 if (!(collect_hits || opt->columnnum)) {
1066 /*
1067 * Don't short-circuit OR when given --column (or
1068 * collecting hits) to ensure we don't skip a later
1069 * child that would produce an earlier match.
1070 */
68d686e6
TB
1071 return (match_expr_eval(opt, x->u.binary.left, bol, eol,
1072 ctx, col, icol, 0) ||
1073 match_expr_eval(opt, x->u.binary.right, bol,
1074 eol, ctx, col, icol, 0));
017c0fcf 1075 }
68d686e6
TB
1076 h = match_expr_eval(opt, x->u.binary.left, bol, eol, ctx, col,
1077 icol, 0);
017c0fcf
TB
1078 if (collect_hits)
1079 x->u.binary.left->hit |= h;
68d686e6 1080 h |= match_expr_eval(opt, x->u.binary.right, bol, eol, ctx, col,
017c0fcf 1081 icol, collect_hits);
0ab7befa
JH
1082 break;
1083 default:
d7530708 1084 die("Unexpected node type (internal error) %d", x->node);
83b5d2f5 1085 }
0ab7befa
JH
1086 if (collect_hits)
1087 x->hit |= h;
1088 return h;
83b5d2f5
JH
1089}
1090
1a845fbc
JK
1091static int match_expr(struct grep_opt *opt,
1092 const char *bol, const char *eol,
68d686e6
TB
1093 enum grep_context ctx, ssize_t *col,
1094 ssize_t *icol, int collect_hits)
83b5d2f5
JH
1095{
1096 struct grep_expr *x = opt->pattern_expression;
68d686e6 1097 return match_expr_eval(opt, x, bol, eol, ctx, col, icol, collect_hits);
83b5d2f5
JH
1098}
1099
1a845fbc
JK
1100static int match_line(struct grep_opt *opt,
1101 const char *bol, const char *eol,
68d686e6 1102 ssize_t *col, ssize_t *icol,
0ab7befa 1103 enum grep_context ctx, int collect_hits)
83b5d2f5
JH
1104{
1105 struct grep_pat *p;
017c0fcf 1106 int hit = 0;
79212772 1107
db84376f 1108 if (opt->pattern_expression)
68d686e6
TB
1109 return match_expr(opt, bol, eol, ctx, col, icol,
1110 collect_hits);
0ab7befa
JH
1111
1112 /* we do not call with collect_hits without being extended */
83b5d2f5 1113 for (p = opt->pattern_list; p; p = p->next) {
68d686e6
TB
1114 regmatch_t tmp;
1115 if (match_one_pattern(p, bol, eol, ctx, &tmp, 0)) {
017c0fcf
TB
1116 hit |= 1;
1117 if (!opt->columnnum) {
1118 /*
1119 * Without --column, any single match on a line
1120 * is enough to know that it needs to be
1121 * printed. With --column, scan _all_ patterns
1122 * to find the earliest.
1123 */
1124 break;
1125 }
1126 if (*col < 0 || tmp.rm_so < *col)
1127 *col = tmp.rm_so;
68d686e6 1128 }
83b5d2f5 1129 }
017c0fcf 1130 return hit;
83b5d2f5
JH
1131}
1132
1a845fbc
JK
1133static int match_next_pattern(struct grep_pat *p,
1134 const char *bol, const char *eol,
7e8f59d5
RS
1135 enum grep_context ctx,
1136 regmatch_t *pmatch, int eflags)
1137{
1138 regmatch_t match;
1139
3f566c4e 1140 if (!headerless_match_one_pattern(p, bol, eol, ctx, &match, eflags))
7e8f59d5
RS
1141 return 0;
1142 if (match.rm_so < 0 || match.rm_eo < 0)
1143 return 0;
1144 if (pmatch->rm_so >= 0 && pmatch->rm_eo >= 0) {
1145 if (match.rm_so > pmatch->rm_so)
1146 return 1;
1147 if (match.rm_so == pmatch->rm_so && match.rm_eo < pmatch->rm_eo)
1148 return 1;
1149 }
1150 pmatch->rm_so = match.rm_so;
1151 pmatch->rm_eo = match.rm_eo;
1152 return 1;
1153}
1154
3f566c4e
HM
1155int grep_next_match(struct grep_opt *opt,
1156 const char *bol, const char *eol,
1157 enum grep_context ctx, regmatch_t *pmatch,
1158 enum grep_header_field field, int eflags)
7e8f59d5
RS
1159{
1160 struct grep_pat *p;
1161 int hit = 0;
1162
1163 pmatch->rm_so = pmatch->rm_eo = -1;
1164 if (bol < eol) {
3f566c4e
HM
1165 for (p = ((ctx == GREP_CONTEXT_HEAD)
1166 ? opt->header_list : opt->pattern_list);
1167 p; p = p->next) {
7e8f59d5 1168 switch (p->token) {
7e8f59d5 1169 case GREP_PATTERN_HEAD:
3f566c4e
HM
1170 if ((field != GREP_HEADER_FIELD_MAX) &&
1171 (p->field != field))
1172 continue;
1173 /* fall thru */
1174 case GREP_PATTERN: /* atom */
7e8f59d5
RS
1175 case GREP_PATTERN_BODY:
1176 hit |= match_next_pattern(p, bol, eol, ctx,
1177 pmatch, eflags);
1178 break;
1179 default:
1180 break;
1181 }
1182 }
1183 }
1184 return hit;
1185}
1186
c707ded3
TB
1187static void show_line_header(struct grep_opt *opt, const char *name,
1188 unsigned lno, ssize_t cno, char sign)
7e8f59d5 1189{
1d84f72e 1190 if (opt->heading && opt->last_shown == 0) {
fa151dc5 1191 output_color(opt, name, strlen(name), opt->colors[GREP_COLOR_FILENAME]);
1d84f72e
RS
1192 opt->output(opt, "\n", 1);
1193 }
5dd06d38
RS
1194 opt->last_shown = lno;
1195
1d84f72e 1196 if (!opt->heading && opt->pathname) {
fa151dc5 1197 output_color(opt, name, strlen(name), opt->colors[GREP_COLOR_FILENAME]);
55f638bd 1198 output_sep(opt, sign);
5b594f45
FK
1199 }
1200 if (opt->linenum) {
1201 char buf[32];
1a168e5c 1202 xsnprintf(buf, sizeof(buf), "%d", lno);
fa151dc5 1203 output_color(opt, buf, strlen(buf), opt->colors[GREP_COLOR_LINENO]);
55f638bd 1204 output_sep(opt, sign);
5b594f45 1205 }
89252cd0
TB
1206 /*
1207 * Treat 'cno' as the 1-indexed offset from the start of a non-context
1208 * line to its first match. Otherwise, 'cno' is 0 indicating that we are
1209 * being called with a context line.
1210 */
1211 if (opt->columnnum && cno) {
1212 char buf[32];
1213 xsnprintf(buf, sizeof(buf), "%"PRIuMAX, (uintmax_t)cno);
d036d667 1214 output_color(opt, buf, strlen(buf), opt->colors[GREP_COLOR_COLUMNNO]);
89252cd0
TB
1215 output_sep(opt, sign);
1216 }
c707ded3
TB
1217}
1218
1a845fbc
JK
1219static void show_line(struct grep_opt *opt,
1220 const char *bol, const char *eol,
c707ded3
TB
1221 const char *name, unsigned lno, ssize_t cno, char sign)
1222{
1223 int rest = eol - bol;
9d8db06e
TB
1224 const char *match_color = NULL;
1225 const char *line_color = NULL;
c707ded3
TB
1226
1227 if (opt->file_break && opt->last_shown == 0) {
1228 if (opt->show_hunk_mark)
1229 opt->output(opt, "\n", 1);
1230 } else if (opt->pre_context || opt->post_context || opt->funcbody) {
1231 if (opt->last_shown == 0) {
1232 if (opt->show_hunk_mark) {
87ece7ce 1233 output_color(opt, "--", 2, opt->colors[GREP_COLOR_SEP]);
c707ded3
TB
1234 opt->output(opt, "\n", 1);
1235 }
1236 } else if (lno > opt->last_shown + 1) {
87ece7ce 1237 output_color(opt, "--", 2, opt->colors[GREP_COLOR_SEP]);
c707ded3
TB
1238 opt->output(opt, "\n", 1);
1239 }
1240 }
9d8db06e
TB
1241 if (!opt->only_matching) {
1242 /*
1243 * In case the line we're being called with contains more than
1244 * one match, leave printing each header to the loop below.
1245 */
1246 show_line_header(opt, name, lno, cno, sign);
1247 }
1248 if (opt->color || opt->only_matching) {
7e8f59d5
RS
1249 regmatch_t match;
1250 enum grep_context ctx = GREP_CONTEXT_BODY;
7e8f59d5
RS
1251 int eflags = 0;
1252
9d8db06e
TB
1253 if (opt->color) {
1254 if (sign == ':')
87ece7ce 1255 match_color = opt->colors[GREP_COLOR_MATCH_SELECTED];
9d8db06e 1256 else
87ece7ce 1257 match_color = opt->colors[GREP_COLOR_MATCH_CONTEXT];
9d8db06e 1258 if (sign == ':')
87ece7ce 1259 line_color = opt->colors[GREP_COLOR_SELECTED];
9d8db06e 1260 else if (sign == '-')
87ece7ce 1261 line_color = opt->colors[GREP_COLOR_CONTEXT];
9d8db06e 1262 else if (sign == '=')
87ece7ce 1263 line_color = opt->colors[GREP_COLOR_FUNCTION];
9d8db06e 1264 }
3f566c4e
HM
1265 while (grep_next_match(opt, bol, eol, ctx, &match,
1266 GREP_HEADER_FIELD_MAX, eflags)) {
1f5b9cc4
RS
1267 if (match.rm_so == match.rm_eo)
1268 break;
5b594f45 1269
9d8db06e
TB
1270 if (opt->only_matching)
1271 show_line_header(opt, name, lno, cno, sign);
1272 else
1273 output_color(opt, bol, match.rm_so, line_color);
55f638bd 1274 output_color(opt, bol + match.rm_so,
79a77109 1275 match.rm_eo - match.rm_so, match_color);
9d8db06e
TB
1276 if (opt->only_matching)
1277 opt->output(opt, "\n", 1);
7e8f59d5 1278 bol += match.rm_eo;
9d8db06e 1279 cno += match.rm_eo;
7e8f59d5
RS
1280 rest -= match.rm_eo;
1281 eflags = REG_NOTBOL;
1282 }
7e8f59d5 1283 }
9d8db06e
TB
1284 if (!opt->only_matching) {
1285 output_color(opt, bol, rest, line_color);
1286 opt->output(opt, "\n", 1);
1287 }
7e8f59d5
RS
1288}
1289
78db6ea9
JK
1290int grep_use_locks;
1291
0579f91d
TR
1292/*
1293 * This lock protects access to the gitattributes machinery, which is
1294 * not thread-safe.
1295 */
1296pthread_mutex_t grep_attr_mutex;
1297
78db6ea9 1298static inline void grep_attr_lock(void)
0579f91d 1299{
78db6ea9 1300 if (grep_use_locks)
0579f91d
TR
1301 pthread_mutex_lock(&grep_attr_mutex);
1302}
1303
78db6ea9 1304static inline void grep_attr_unlock(void)
0579f91d 1305{
78db6ea9 1306 if (grep_use_locks)
0579f91d
TR
1307 pthread_mutex_unlock(&grep_attr_mutex);
1308}
b3aeb285 1309
1a845fbc
JK
1310static int match_funcname(struct grep_opt *opt, struct grep_source *gs,
1311 const char *bol, const char *eol)
2944e4e6 1312{
60ecac98 1313 xdemitconf_t *xecfg = opt->priv;
0579f91d 1314 if (xecfg && !xecfg->find_func) {
acd00ea0 1315 grep_source_load_driver(gs, opt->repo->index);
94ad9d9e
JK
1316 if (gs->driver->funcname.pattern) {
1317 const struct userdiff_funcname *pe = &gs->driver->funcname;
0579f91d
TR
1318 xdiff_set_find_func(xecfg, pe->pattern, pe->cflags);
1319 } else {
1320 xecfg = opt->priv = NULL;
1321 }
1322 }
1323
1324 if (xecfg) {
60ecac98
RS
1325 char buf[1];
1326 return xecfg->find_func(bol, eol - bol, buf, 1,
1327 xecfg->find_func_priv) >= 0;
1328 }
1329
2944e4e6
RS
1330 if (bol == eol)
1331 return 0;
1332 if (isalpha(*bol) || *bol == '_' || *bol == '$')
1333 return 1;
1334 return 0;
1335}
1336
e1327023 1337static void show_funcname_line(struct grep_opt *opt, struct grep_source *gs,
1a845fbc 1338 const char *bol, unsigned lno)
2944e4e6 1339{
e1327023 1340 while (bol > gs->buf) {
1a845fbc 1341 const char *eol = --bol;
2944e4e6 1342
e1327023 1343 while (bol > gs->buf && bol[-1] != '\n')
2944e4e6
RS
1344 bol--;
1345 lno--;
1346
1347 if (lno <= opt->last_shown)
1348 break;
1349
e1327023 1350 if (match_funcname(opt, gs, bol, eol)) {
89252cd0 1351 show_line(opt, bol, eol, gs->name, lno, 0, '=');
2944e4e6
RS
1352 break;
1353 }
1354 }
1355}
1356
a5dc20b0
RS
1357static int is_empty_line(const char *bol, const char *eol);
1358
e1327023 1359static void show_pre_context(struct grep_opt *opt, struct grep_source *gs,
1a845fbc 1360 const char *bol, const char *end, unsigned lno)
49de3216 1361{
6653a01b 1362 unsigned cur = lno, from = 1, funcname_lno = 0, orig_from;
a5dc20b0 1363 int funcname_needed = !!opt->funcname, comment_needed = 0;
ba8ea749 1364
49de3216
RS
1365 if (opt->pre_context < lno)
1366 from = lno - opt->pre_context;
1367 if (from <= opt->last_shown)
1368 from = opt->last_shown + 1;
6653a01b 1369 orig_from = from;
a5dc20b0
RS
1370 if (opt->funcbody) {
1371 if (match_funcname(opt, gs, bol, end))
1372 comment_needed = 1;
1373 else
1374 funcname_needed = 1;
6653a01b
RS
1375 from = opt->last_shown + 1;
1376 }
49de3216
RS
1377
1378 /* Rewind. */
6653a01b 1379 while (bol > gs->buf && cur > from) {
1a845fbc
JK
1380 const char *next_bol = bol;
1381 const char *eol = --bol;
2944e4e6 1382
e1327023 1383 while (bol > gs->buf && bol[-1] != '\n')
49de3216
RS
1384 bol--;
1385 cur--;
a5dc20b0
RS
1386 if (comment_needed && (is_empty_line(bol, eol) ||
1387 match_funcname(opt, gs, bol, eol))) {
1388 comment_needed = 0;
1389 from = orig_from;
1390 if (cur < from) {
1391 cur++;
1392 bol = next_bol;
1393 break;
1394 }
1395 }
e1327023 1396 if (funcname_needed && match_funcname(opt, gs, bol, eol)) {
2944e4e6
RS
1397 funcname_lno = cur;
1398 funcname_needed = 0;
a5dc20b0
RS
1399 if (opt->funcbody)
1400 comment_needed = 1;
1401 else
1402 from = orig_from;
2944e4e6 1403 }
49de3216
RS
1404 }
1405
2944e4e6
RS
1406 /* We need to look even further back to find a function signature. */
1407 if (opt->funcname && funcname_needed)
e1327023 1408 show_funcname_line(opt, gs, bol, cur);
2944e4e6 1409
49de3216
RS
1410 /* Back forward. */
1411 while (cur < lno) {
1a845fbc 1412 const char *eol = bol, sign = (cur == funcname_lno) ? '=' : '-';
49de3216
RS
1413
1414 while (*eol != '\n')
1415 eol++;
89252cd0 1416 show_line(opt, bol, eol, gs->name, cur, 0, sign);
49de3216
RS
1417 bol = eol + 1;
1418 cur++;
1419 }
1420}
1421
a26345b6
JH
1422static int should_lookahead(struct grep_opt *opt)
1423{
1424 struct grep_pat *p;
1425
db84376f 1426 if (opt->pattern_expression)
a26345b6
JH
1427 return 0; /* punt for too complex stuff */
1428 if (opt->invert)
1429 return 0;
1430 for (p = opt->pattern_list; p; p = p->next) {
1431 if (p->token != GREP_PATTERN)
1432 return 0; /* punt for "header only" and stuff */
1433 }
1434 return 1;
1435}
1436
1437static int look_ahead(struct grep_opt *opt,
1438 unsigned long *left_p,
1439 unsigned *lno_p,
1a845fbc 1440 const char **bol_p)
a26345b6
JH
1441{
1442 unsigned lno = *lno_p;
1a845fbc 1443 const char *bol = *bol_p;
a26345b6 1444 struct grep_pat *p;
1a845fbc 1445 const char *sp, *last_bol;
a26345b6
JH
1446 regoff_t earliest = -1;
1447
1448 for (p = opt->pattern_list; p; p = p->next) {
1449 int hit;
1450 regmatch_t m;
1451
97e77784 1452 hit = patmatch(p, bol, bol + *left_p, &m, 0);
a26345b6
JH
1453 if (!hit || m.rm_so < 0 || m.rm_eo < 0)
1454 continue;
1455 if (earliest < 0 || m.rm_so < earliest)
1456 earliest = m.rm_so;
1457 }
1458
1459 if (earliest < 0) {
1460 *bol_p = bol + *left_p;
1461 *left_p = 0;
1462 return 1;
1463 }
1464 for (sp = bol + earliest; bol < sp && sp[-1] != '\n'; sp--)
1465 ; /* find the beginning of the line */
1466 last_bol = sp;
1467
1468 for (sp = bol; sp < last_bol; sp++) {
1469 if (*sp == '\n')
1470 lno++;
1471 }
1472 *left_p -= last_bol - bol;
1473 *bol_p = last_bol;
1474 *lno_p = lno;
1475 return 0;
1476}
1477
38bbc2ea
NTND
1478static int fill_textconv_grep(struct repository *r,
1479 struct userdiff_driver *driver,
335ec3bf
JK
1480 struct grep_source *gs)
1481{
1482 struct diff_filespec *df;
1483 char *buf;
1484 size_t size;
1485
1486 if (!driver || !driver->textconv)
1487 return grep_source_load(gs);
1488
1489 /*
1490 * The textconv interface is intimately tied to diff_filespecs, so we
1491 * have to pretend to be one. If we could unify the grep_source
1492 * and diff_filespec structs, this mess could just go away.
1493 */
1494 df = alloc_filespec(gs->path);
1495 switch (gs->type) {
1c41c82b 1496 case GREP_SOURCE_OID:
335ec3bf
JK
1497 fill_filespec(df, gs->identifier, 1, 0100644);
1498 break;
1499 case GREP_SOURCE_FILE:
14228447 1500 fill_filespec(df, null_oid(), 0, 0100644);
335ec3bf
JK
1501 break;
1502 default:
033abf97 1503 BUG("attempt to textconv something without a path?");
335ec3bf
JK
1504 }
1505
1506 /*
1d1729ca
MT
1507 * fill_textconv is not remotely thread-safe; it modifies the global
1508 * diff tempfile structure, writes to the_repo's odb and might
1509 * internally call thread-unsafe functions such as the
1510 * prepare_packed_git() lazy-initializator. Because of the last two, we
1511 * must ensure mutual exclusion between this call and the object reading
1512 * API, thus we use obj_read_lock() here.
1513 *
1514 * TODO: allowing text conversion to run in parallel with object
1515 * reading operations might increase performance in the multithreaded
1516 * non-worktreee git-grep with --textconv.
335ec3bf 1517 */
1d1729ca 1518 obj_read_lock();
38bbc2ea 1519 size = fill_textconv(r, driver, df, &buf);
1d1729ca 1520 obj_read_unlock();
335ec3bf
JK
1521 free_filespec(df);
1522
1523 /*
1524 * The normal fill_textconv usage by the diff machinery would just keep
1525 * the textconv'd buf separate from the diff_filespec. But much of the
1526 * grep code passes around a grep_source and assumes that its "buf"
1527 * pointer is the beginning of the thing we are searching. So let's
1528 * install our textconv'd version into the grep_source, taking care not
1529 * to leak any existing buffer.
1530 */
1531 grep_source_clear_data(gs);
1532 gs->buf = buf;
1533 gs->size = size;
1534
1535 return 0;
1536}
1537
4aa2c475
RS
1538static int is_empty_line(const char *bol, const char *eol)
1539{
1540 while (bol < eol && isspace(*bol))
1541 bol++;
1542 return bol == eol;
1543}
1544
e1327023 1545static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int collect_hits)
83b5d2f5 1546{
1a845fbc
JK
1547 const char *bol;
1548 const char *peek_bol = NULL;
e1327023 1549 unsigned long left;
83b5d2f5 1550 unsigned lno = 1;
83b5d2f5 1551 unsigned last_hit = 0;
83b5d2f5 1552 int binary_match_only = 0;
83b5d2f5 1553 unsigned count = 0;
a26345b6 1554 int try_lookahead = 0;
ba8ea749 1555 int show_function = 0;
335ec3bf 1556 struct userdiff_driver *textconv = NULL;
480c1ca6 1557 enum grep_context ctx = GREP_CONTEXT_HEAD;
60ecac98 1558 xdemitconf_t xecfg;
83b5d2f5 1559
de99eb0c
ES
1560 if (!opt->status_only && gs->name == NULL)
1561 BUG("grep call which could print a name requires "
1562 "grep_source.name be non-NULL");
1563
5b594f45
FK
1564 if (!opt->output)
1565 opt->output = std_output;
1566
ba8ea749
RS
1567 if (opt->pre_context || opt->post_context || opt->file_break ||
1568 opt->funcbody) {
08303c36
RS
1569 /* Show hunk marks, except for the first file. */
1570 if (opt->last_shown)
1571 opt->show_hunk_mark = 1;
1572 /*
1573 * If we're using threads then we can't easily identify
1574 * the first file. Always put hunk marks in that case
1575 * and skip the very first one later in work_done().
1576 */
1577 if (opt->output != std_output)
1578 opt->show_hunk_mark = 1;
1579 }
431d6e7b
RS
1580 opt->last_shown = 0;
1581
335ec3bf 1582 if (opt->allow_textconv) {
acd00ea0 1583 grep_source_load_driver(gs, opt->repo->index);
335ec3bf
JK
1584 /*
1585 * We might set up the shared textconv cache data here, which
1d1729ca
MT
1586 * is not thread-safe. Also, get_oid_with_context() and
1587 * parse_object() might be internally called. As they are not
84544f2e 1588 * currently thread-safe and might be racy with object reading,
1d1729ca 1589 * obj_read_lock() must be called.
335ec3bf
JK
1590 */
1591 grep_attr_lock();
1d1729ca 1592 obj_read_lock();
bd7ad45b 1593 textconv = userdiff_get_textconv(opt->repo, gs->driver);
1d1729ca 1594 obj_read_unlock();
335ec3bf
JK
1595 grep_attr_unlock();
1596 }
1597
1598 /*
1599 * We know the result of a textconv is text, so we only have to care
1600 * about binary handling if we are not using it.
1601 */
1602 if (!textconv) {
1603 switch (opt->binary) {
1604 case GREP_BINARY_DEFAULT:
acd00ea0 1605 if (grep_source_is_binary(gs, opt->repo->index))
335ec3bf
JK
1606 binary_match_only = 1;
1607 break;
1608 case GREP_BINARY_NOMATCH:
acd00ea0 1609 if (grep_source_is_binary(gs, opt->repo->index))
335ec3bf
JK
1610 return 0; /* Assume unmatch */
1611 break;
1612 case GREP_BINARY_TEXT:
1613 break;
1614 default:
033abf97 1615 BUG("unknown binary handling mode");
335ec3bf 1616 }
83b5d2f5
JH
1617 }
1618
60ecac98 1619 memset(&xecfg, 0, sizeof(xecfg));
0579f91d
TR
1620 opt->priv = &xecfg;
1621
a26345b6 1622 try_lookahead = should_lookahead(opt);
60ecac98 1623
38bbc2ea 1624 if (fill_textconv_grep(opt->repo, textconv, gs) < 0)
08265798
JK
1625 return 0;
1626
e1327023
JK
1627 bol = gs->buf;
1628 left = gs->size;
83b5d2f5 1629 while (left) {
1a845fbc 1630 const char *eol;
0ab7befa 1631 int hit;
89252cd0 1632 ssize_t cno;
68d686e6 1633 ssize_t col = -1, icol = -1;
83b5d2f5 1634
a26345b6 1635 /*
8997da38 1636 * look_ahead() skips quickly to the line that possibly
a26345b6
JH
1637 * has the next hit; don't call it if we need to do
1638 * something more than just skipping the current line
1639 * in response to an unmatch for the current line. E.g.
1640 * inside a post-context window, we will show the current
1641 * line as a context around the previous hit when it
1642 * doesn't hit.
1643 */
1644 if (try_lookahead
1645 && !(last_hit
ba8ea749
RS
1646 && (show_function ||
1647 lno <= last_hit + opt->post_context))
a26345b6
JH
1648 && look_ahead(opt, &left, &lno, &bol))
1649 break;
83b5d2f5 1650 eol = end_of_line(bol, &left);
83b5d2f5 1651
480c1ca6
JH
1652 if ((ctx == GREP_CONTEXT_HEAD) && (eol == bol))
1653 ctx = GREP_CONTEXT_BODY;
1654
68d686e6 1655 hit = match_line(opt, bol, eol, &col, &icol, ctx, collect_hits);
83b5d2f5 1656
0ab7befa
JH
1657 if (collect_hits)
1658 goto next_line;
1659
83b5d2f5
JH
1660 /* "grep -v -e foo -e bla" should list lines
1661 * that do not have either, so inversion should
1662 * be done outside.
1663 */
1664 if (opt->invert)
1665 hit = !hit;
1666 if (opt->unmatch_name_only) {
1667 if (hit)
1668 return 0;
1669 goto next_line;
1670 }
68437ede 1671 if (hit && (opt->max_count < 0 || count < opt->max_count)) {
83b5d2f5
JH
1672 count++;
1673 if (opt->status_only)
1674 return 1;
321ffcc0 1675 if (opt->name_only) {
e1327023 1676 show_name(opt, gs->name);
321ffcc0
RS
1677 return 1;
1678 }
c30c10cf
RS
1679 if (opt->count)
1680 goto next_line;
83b5d2f5 1681 if (binary_match_only) {
5b594f45 1682 opt->output(opt, "Binary file ", 12);
e1327023 1683 output_color(opt, gs->name, strlen(gs->name),
fa151dc5 1684 opt->colors[GREP_COLOR_FILENAME]);
5b594f45 1685 opt->output(opt, " matches\n", 9);
83b5d2f5
JH
1686 return 1;
1687 }
83b5d2f5
JH
1688 /* Hit at this line. If we haven't shown the
1689 * pre-context lines, we would need to show them.
83b5d2f5 1690 */
ba8ea749 1691 if (opt->pre_context || opt->funcbody)
e1327023 1692 show_pre_context(opt, gs, bol, eol, lno);
2944e4e6 1693 else if (opt->funcname)
e1327023 1694 show_funcname_line(opt, gs, bol, lno);
89252cd0
TB
1695 cno = opt->invert ? icol : col;
1696 if (cno < 0) {
1697 /*
1698 * A negative cno indicates that there was no
1699 * match on the line. We are thus inverted and
1700 * being asked to show all lines that _don't_
1701 * match a given expression. Therefore, set cno
1702 * to 0 to suggest the whole line matches.
1703 */
1704 cno = 0;
1705 }
1706 show_line(opt, bol, eol, gs->name, lno, cno + 1, ':');
5dd06d38 1707 last_hit = lno;
ba8ea749
RS
1708 if (opt->funcbody)
1709 show_function = 1;
1710 goto next_line;
83b5d2f5 1711 }
4aa2c475
RS
1712 if (show_function && (!peek_bol || peek_bol < bol)) {
1713 unsigned long peek_left = left;
1a845fbc 1714 const char *peek_eol = eol;
4aa2c475
RS
1715
1716 /*
1717 * Trailing empty lines are not interesting.
1718 * Peek past them to see if they belong to the
1719 * body of the current function.
1720 */
1721 peek_bol = bol;
1722 while (is_empty_line(peek_bol, peek_eol)) {
1723 peek_bol = peek_eol + 1;
1724 peek_eol = end_of_line(peek_bol, &peek_left);
1725 }
1726
1727 if (match_funcname(opt, gs, peek_bol, peek_eol))
1728 show_function = 0;
1729 }
ba8ea749
RS
1730 if (show_function ||
1731 (last_hit && lno <= last_hit + opt->post_context)) {
83b5d2f5
JH
1732 /* If the last hit is within the post context,
1733 * we need to show this line.
1734 */
89252cd0 1735 show_line(opt, bol, eol, gs->name, lno, col + 1, '-');
83b5d2f5 1736 }
83b5d2f5
JH
1737
1738 next_line:
1739 bol = eol + 1;
1740 if (!left)
1741 break;
1742 left--;
1743 lno++;
1744 }
1745
0ab7befa
JH
1746 if (collect_hits)
1747 return 0;
b48fb5b6 1748
83b5d2f5 1749 if (opt->status_only)
e1f68c66 1750 return opt->unmatch_name_only;
83b5d2f5
JH
1751 if (opt->unmatch_name_only) {
1752 /* We did not see any hit, so we want to show this */
e1327023 1753 show_name(opt, gs->name);
83b5d2f5
JH
1754 return 1;
1755 }
1756
60ecac98
RS
1757 xdiff_clear_find_func(&xecfg);
1758 opt->priv = NULL;
1759
83b5d2f5
JH
1760 /* NEEDSWORK:
1761 * The real "grep -c foo *.c" gives many "bar.c:0" lines,
1762 * which feels mostly useless but sometimes useful. Maybe
1763 * make it another option? For now suppress them.
1764 */
5b594f45
FK
1765 if (opt->count && count) {
1766 char buf[32];
f76d947a
RS
1767 if (opt->pathname) {
1768 output_color(opt, gs->name, strlen(gs->name),
fa151dc5 1769 opt->colors[GREP_COLOR_FILENAME]);
f76d947a
RS
1770 output_sep(opt, ':');
1771 }
1a168e5c 1772 xsnprintf(buf, sizeof(buf), "%u\n", count);
5b594f45 1773 opt->output(opt, buf, strlen(buf));
c30c10cf 1774 return 1;
5b594f45 1775 }
83b5d2f5
JH
1776 return !!last_hit;
1777}
1778
0ab7befa
JH
1779static void clr_hit_marker(struct grep_expr *x)
1780{
1781 /* All-hit markers are meaningful only at the very top level
1782 * OR node.
1783 */
1784 while (1) {
1785 x->hit = 0;
1786 if (x->node != GREP_NODE_OR)
1787 return;
1788 x->u.binary.left->hit = 0;
1789 x = x->u.binary.right;
1790 }
1791}
1792
1793static int chk_hit_marker(struct grep_expr *x)
1794{
1795 /* Top level nodes have hit markers. See if they all are hits */
1796 while (1) {
1797 if (x->node != GREP_NODE_OR)
1798 return x->hit;
1799 if (!x->u.binary.left->hit)
1800 return 0;
1801 x = x->u.binary.right;
1802 }
1803}
1804
e1327023 1805int grep_source(struct grep_opt *opt, struct grep_source *gs)
0ab7befa
JH
1806{
1807 /*
1808 * we do not have to do the two-pass grep when we do not check
1809 * buffer-wide "all-match".
1810 */
794c0002 1811 if (!opt->all_match && !opt->no_body_match)
e1327023 1812 return grep_source_1(opt, gs, 0);
0ab7befa
JH
1813
1814 /* Otherwise the toplevel "or" terms hit a bit differently.
1815 * We first clear hit markers from them.
1816 */
1817 clr_hit_marker(opt->pattern_expression);
794c0002 1818 opt->body_hit = 0;
e1327023 1819 grep_source_1(opt, gs, 1);
0ab7befa 1820
794c0002
RS
1821 if (opt->all_match && !chk_hit_marker(opt->pattern_expression))
1822 return 0;
1823 if (opt->no_body_match && opt->body_hit)
0ab7befa
JH
1824 return 0;
1825
e1327023
JK
1826 return grep_source_1(opt, gs, 0);
1827}
1828
1e668716
JK
1829static void grep_source_init_buf(struct grep_source *gs,
1830 const char *buf,
50d92b5f
JT
1831 unsigned long size)
1832{
1833 gs->type = GREP_SOURCE_BUF;
1834 gs->name = NULL;
1835 gs->path = NULL;
1836 gs->buf = buf;
1837 gs->size = size;
1838 gs->driver = NULL;
1839 gs->identifier = NULL;
1840}
1841
1e668716 1842int grep_buffer(struct grep_opt *opt, const char *buf, unsigned long size)
e1327023
JK
1843{
1844 struct grep_source gs;
1845 int r;
1846
50d92b5f 1847 grep_source_init_buf(&gs, buf, size);
e1327023
JK
1848
1849 r = grep_source(opt, &gs);
1850
1851 grep_source_clear(&gs);
1852 return r;
1853}
1854
50d92b5f
JT
1855void grep_source_init_file(struct grep_source *gs, const char *name,
1856 const char *path)
e1327023 1857{
50d92b5f 1858 gs->type = GREP_SOURCE_FILE;
8c53f071
JK
1859 gs->name = xstrdup_or_null(name);
1860 gs->path = xstrdup_or_null(path);
e1327023
JK
1861 gs->buf = NULL;
1862 gs->size = 0;
94ad9d9e 1863 gs->driver = NULL;
50d92b5f
JT
1864 gs->identifier = xstrdup(path);
1865}
e1327023 1866
50d92b5f 1867void grep_source_init_oid(struct grep_source *gs, const char *name,
0693806b
JT
1868 const char *path, const struct object_id *oid,
1869 struct repository *repo)
50d92b5f
JT
1870{
1871 gs->type = GREP_SOURCE_OID;
1872 gs->name = xstrdup_or_null(name);
1873 gs->path = xstrdup_or_null(path);
1874 gs->buf = NULL;
1875 gs->size = 0;
1876 gs->driver = NULL;
1877 gs->identifier = oiddup(oid);
0693806b 1878 gs->repo = repo;
e1327023
JK
1879}
1880
1881void grep_source_clear(struct grep_source *gs)
1882{
88ce3ef6
ÆAB
1883 FREE_AND_NULL(gs->name);
1884 FREE_AND_NULL(gs->path);
1885 FREE_AND_NULL(gs->identifier);
e1327023
JK
1886 grep_source_clear_data(gs);
1887}
1888
1889void grep_source_clear_data(struct grep_source *gs)
1890{
1891 switch (gs->type) {
1892 case GREP_SOURCE_FILE:
1c41c82b 1893 case GREP_SOURCE_OID:
1e668716
JK
1894 /* these types own the buffer */
1895 free((char *)gs->buf);
1896 gs->buf = NULL;
e1327023
JK
1897 gs->size = 0;
1898 break;
1899 case GREP_SOURCE_BUF:
1900 /* leave user-provided buf intact */
1901 break;
1902 }
1903}
1904
1c41c82b 1905static int grep_source_load_oid(struct grep_source *gs)
e1327023
JK
1906{
1907 enum object_type type;
1908
0693806b
JT
1909 gs->buf = repo_read_object_file(gs->repo, gs->identifier, &type,
1910 &gs->size);
e1327023
JK
1911 if (!gs->buf)
1912 return error(_("'%s': unable to read %s"),
1913 gs->name,
1c41c82b 1914 oid_to_hex(gs->identifier));
e1327023
JK
1915 return 0;
1916}
1917
1918static int grep_source_load_file(struct grep_source *gs)
1919{
1920 const char *filename = gs->identifier;
1921 struct stat st;
1922 char *data;
1923 size_t size;
1924 int i;
1925
1926 if (lstat(filename, &st) < 0) {
1927 err_ret:
1928 if (errno != ENOENT)
7645d8f1 1929 error_errno(_("failed to stat '%s'"), filename);
e1327023
JK
1930 return -1;
1931 }
1932 if (!S_ISREG(st.st_mode))
1933 return -1;
1934 size = xsize_t(st.st_size);
1935 i = open(filename, O_RDONLY);
1936 if (i < 0)
1937 goto err_ret;
3733e694 1938 data = xmallocz(size);
e1327023 1939 if (st.st_size != read_in_full(i, data, size)) {
7645d8f1 1940 error_errno(_("'%s': short read"), filename);
e1327023
JK
1941 close(i);
1942 free(data);
1943 return -1;
1944 }
1945 close(i);
e1327023
JK
1946
1947 gs->buf = data;
1948 gs->size = size;
1949 return 0;
1950}
1951
3083301e 1952static int grep_source_load(struct grep_source *gs)
e1327023
JK
1953{
1954 if (gs->buf)
1955 return 0;
1956
1957 switch (gs->type) {
1958 case GREP_SOURCE_FILE:
1959 return grep_source_load_file(gs);
1c41c82b
BW
1960 case GREP_SOURCE_OID:
1961 return grep_source_load_oid(gs);
e1327023
JK
1962 case GREP_SOURCE_BUF:
1963 return gs->buf ? 0 : -1;
1964 }
033abf97 1965 BUG("invalid grep_source type to load");
0ab7befa 1966}
94ad9d9e 1967
acd00ea0
NTND
1968void grep_source_load_driver(struct grep_source *gs,
1969 struct index_state *istate)
94ad9d9e
JK
1970{
1971 if (gs->driver)
1972 return;
1973
1974 grep_attr_lock();
1d1729ca 1975 if (gs->path)
acd00ea0 1976 gs->driver = userdiff_find_by_path(istate, gs->path);
94ad9d9e
JK
1977 if (!gs->driver)
1978 gs->driver = userdiff_find_by_name("default");
1979 grep_attr_unlock();
1980}
41b59bfc 1981
acd00ea0
NTND
1982static int grep_source_is_binary(struct grep_source *gs,
1983 struct index_state *istate)
41b59bfc 1984{
acd00ea0 1985 grep_source_load_driver(gs, istate);
41b59bfc
JK
1986 if (gs->driver->binary != -1)
1987 return gs->driver->binary;
1988
1989 if (!grep_source_load(gs))
1990 return buffer_is_binary(gs->buf, gs->size);
1991
1992 return 0;
1993}