]> git.ipfire.org Git - thirdparty/git.git/blame - grep.c
Merge branch 'master' of git://github.com/git-l10n/git-po
[thirdparty/git.git] / grep.c
CommitLineData
83b5d2f5 1#include "cache.h"
b2141fc1 2#include "config.h"
83b5d2f5 3#include "grep.h"
cbd53a21 4#include "object-store.h"
60ecac98 5#include "userdiff.h"
6bfce93e 6#include "xdiff-interface.h"
335ec3bf
JK
7#include "diff.h"
8#include "diffcore.h"
5c1ebcca 9#include "commit.h"
793dc676 10#include "quote.h"
3ac68a93 11#include "help.h"
83b5d2f5 12
07a7d656
JH
13static int grep_source_load(struct grep_source *gs);
14static int grep_source_is_binary(struct grep_source *gs);
15
7687a054
JH
16static struct grep_opt grep_defaults;
17
fa151dc5
NTND
18static const char *color_grep_slots[] = {
19 [GREP_COLOR_CONTEXT] = "context",
20 [GREP_COLOR_FILENAME] = "filename",
21 [GREP_COLOR_FUNCTION] = "function",
22 [GREP_COLOR_LINENO] = "lineNumber",
d036d667 23 [GREP_COLOR_COLUMNNO] = "column",
fa151dc5
NTND
24 [GREP_COLOR_MATCH_CONTEXT] = "matchContext",
25 [GREP_COLOR_MATCH_SELECTED] = "matchSelected",
26 [GREP_COLOR_SELECTED] = "selected",
27 [GREP_COLOR_SEP] = "separator",
28};
29
379642bc
BW
30static void std_output(struct grep_opt *opt, const void *buf, size_t size)
31{
32 fwrite(buf, size, 1, stdout);
33}
34
75e5e9c3
SB
35static void color_set(char *dst, const char *color_bytes)
36{
37 xsnprintf(dst, COLOR_MAXLEN, "%s", color_bytes);
38}
39
7687a054
JH
40/*
41 * Initialize the grep_defaults template with hardcoded defaults.
42 * We could let the compiler do this, but without C99 initializers
43 * the code gets unwieldy and unreadable, so...
44 */
45void init_grep_defaults(void)
46{
47 struct grep_opt *opt = &grep_defaults;
918d4e1c
JH
48 static int run_once;
49
50 if (run_once)
51 return;
52 run_once++;
7687a054
JH
53
54 memset(opt, 0, sizeof(*opt));
55 opt->relative = 1;
56 opt->pathname = 1;
7687a054
JH
57 opt->max_depth = -1;
58 opt->pattern_type_option = GREP_PATTERN_TYPE_UNSPECIFIED;
fa151dc5
NTND
59 color_set(opt->colors[GREP_COLOR_CONTEXT], "");
60 color_set(opt->colors[GREP_COLOR_FILENAME], "");
61 color_set(opt->colors[GREP_COLOR_FUNCTION], "");
62 color_set(opt->colors[GREP_COLOR_LINENO], "");
d036d667 63 color_set(opt->colors[GREP_COLOR_COLUMNNO], "");
fa151dc5
NTND
64 color_set(opt->colors[GREP_COLOR_MATCH_CONTEXT], GIT_COLOR_BOLD_RED);
65 color_set(opt->colors[GREP_COLOR_MATCH_SELECTED], GIT_COLOR_BOLD_RED);
66 color_set(opt->colors[GREP_COLOR_SELECTED], "");
67 color_set(opt->colors[GREP_COLOR_SEP], GIT_COLOR_CYAN);
9d8db06e 68 opt->only_matching = 0;
7687a054 69 opt->color = -1;
379642bc 70 opt->output = std_output;
7687a054
JH
71}
72
73static int parse_pattern_type_arg(const char *opt, const char *arg)
74{
75 if (!strcmp(arg, "default"))
76 return GREP_PATTERN_TYPE_UNSPECIFIED;
77 else if (!strcmp(arg, "basic"))
78 return GREP_PATTERN_TYPE_BRE;
79 else if (!strcmp(arg, "extended"))
80 return GREP_PATTERN_TYPE_ERE;
81 else if (!strcmp(arg, "fixed"))
82 return GREP_PATTERN_TYPE_FIXED;
83 else if (!strcmp(arg, "perl"))
84 return GREP_PATTERN_TYPE_PCRE;
85 die("bad %s argument: %s", opt, arg);
86}
87
3ac68a93
NTND
88define_list_config_array_extra(color_grep_slots, {"match"});
89
7687a054
JH
90/*
91 * Read the configuration file once and store it in
92 * the grep_defaults template.
93 */
94int grep_config(const char *var, const char *value, void *cb)
95{
96 struct grep_opt *opt = &grep_defaults;
fa151dc5 97 const char *slot;
7687a054
JH
98
99 if (userdiff_config(var, value) < 0)
100 return -1;
101
102 if (!strcmp(var, "grep.extendedregexp")) {
c7e38551 103 opt->extended_regexp_option = git_config_bool(var, value);
7687a054
JH
104 return 0;
105 }
106
107 if (!strcmp(var, "grep.patterntype")) {
108 opt->pattern_type_option = parse_pattern_type_arg(var, value);
109 return 0;
110 }
111
112 if (!strcmp(var, "grep.linenumber")) {
113 opt->linenum = git_config_bool(var, value);
114 return 0;
115 }
6653fec3
TB
116 if (!strcmp(var, "grep.column")) {
117 opt->columnnum = git_config_bool(var, value);
118 return 0;
119 }
7687a054 120
6453f7b3
AS
121 if (!strcmp(var, "grep.fullname")) {
122 opt->relative = !git_config_bool(var, value);
123 return 0;
124 }
125
7687a054
JH
126 if (!strcmp(var, "color.grep"))
127 opt->color = git_config_colorbool(var, value);
fa151dc5
NTND
128 if (!strcmp(var, "color.grep.match")) {
129 if (grep_config("color.grep.matchcontext", value, cb) < 0)
130 return -1;
131 if (grep_config("color.grep.matchselected", value, cb) < 0)
132 return -1;
133 } else if (skip_prefix(var, "color.grep.", &slot)) {
134 int i = LOOKUP_CONFIG(color_grep_slots, slot);
135 char *color;
136
137 if (i < 0)
138 return -1;
139 color = opt->colors[i];
7687a054
JH
140 if (!value)
141 return config_error_nonbool(var);
f6c5a296 142 return color_parse(value, color);
7687a054
JH
143 }
144 return 0;
145}
146
147/*
148 * Initialize one instance of grep_opt and copy the
149 * default values from the template we read the configuration
150 * information in an earlier call to git_config(grep_config).
151 */
152void grep_init(struct grep_opt *opt, const char *prefix)
153{
154 struct grep_opt *def = &grep_defaults;
fa151dc5 155 int i;
7687a054
JH
156
157 memset(opt, 0, sizeof(*opt));
158 opt->prefix = prefix;
159 opt->prefix_length = (prefix && *prefix) ? strlen(prefix) : 0;
160 opt->pattern_tail = &opt->pattern_list;
161 opt->header_tail = &opt->header_list;
162
9d8db06e 163 opt->only_matching = def->only_matching;
7687a054
JH
164 opt->color = def->color;
165 opt->extended_regexp_option = def->extended_regexp_option;
166 opt->pattern_type_option = def->pattern_type_option;
167 opt->linenum = def->linenum;
017c0fcf 168 opt->columnnum = def->columnnum;
7687a054
JH
169 opt->max_depth = def->max_depth;
170 opt->pathname = def->pathname;
7687a054 171 opt->relative = def->relative;
379642bc 172 opt->output = def->output;
7687a054 173
fa151dc5
NTND
174 for (i = 0; i < NR_GREP_COLORS; i++)
175 color_set(opt->colors[i], def->colors[i]);
7687a054 176}
07a7d656 177
8465541e 178static void grep_set_pattern_type_option(enum grep_pattern_type pattern_type, struct grep_opt *opt)
c5c31d33 179{
07a3d411
ÆAB
180 /*
181 * When committing to the pattern type by setting the relevant
182 * fields in grep_opt it's generally not necessary to zero out
183 * the fields we're not choosing, since they won't have been
184 * set by anything. The extended_regexp_option field is the
185 * only exception to this.
186 *
187 * This is because in the process of parsing grep.patternType
188 * & grep.extendedRegexp we set opt->pattern_type_option and
189 * opt->extended_regexp_option, respectively. We then
190 * internally use opt->extended_regexp_option to see if we're
191 * compiling an ERE. It must be unset if that's not actually
192 * the case.
193 */
194 if (pattern_type != GREP_PATTERN_TYPE_ERE &&
195 opt->extended_regexp_option)
196 opt->extended_regexp_option = 0;
197
c5c31d33
JH
198 switch (pattern_type) {
199 case GREP_PATTERN_TYPE_UNSPECIFIED:
200 /* fall through */
201
202 case GREP_PATTERN_TYPE_BRE:
c5c31d33
JH
203 break;
204
205 case GREP_PATTERN_TYPE_ERE:
07a3d411 206 opt->extended_regexp_option = 1;
c5c31d33
JH
207 break;
208
209 case GREP_PATTERN_TYPE_FIXED:
210 opt->fixed = 1;
c5c31d33
JH
211 break;
212
213 case GREP_PATTERN_TYPE_PCRE:
94da9193 214#ifdef USE_LIBPCRE2
94da9193
ÆAB
215 opt->pcre2 = 1;
216#else
217 /*
218 * It's important that pcre1 always be assigned to
219 * even when there's no USE_LIBPCRE* defined. We still
220 * call the PCRE stub function, it just dies with
221 * "cannot use Perl-compatible regexes[...]".
222 */
6d4b5747 223 opt->pcre1 = 1;
94da9193 224#endif
c5c31d33
JH
225 break;
226 }
227}
228
8465541e
JH
229void grep_commit_pattern_type(enum grep_pattern_type pattern_type, struct grep_opt *opt)
230{
231 if (pattern_type != GREP_PATTERN_TYPE_UNSPECIFIED)
232 grep_set_pattern_type_option(pattern_type, opt);
233 else if (opt->pattern_type_option != GREP_PATTERN_TYPE_UNSPECIFIED)
234 grep_set_pattern_type_option(opt->pattern_type_option, opt);
235 else if (opt->extended_regexp_option)
07a3d411
ÆAB
236 /*
237 * This branch *must* happen after setting from the
238 * opt->pattern_type_option above, we don't want
239 * grep.extendedRegexp to override grep.patternType!
240 */
8465541e
JH
241 grep_set_pattern_type_option(GREP_PATTERN_TYPE_ERE, opt);
242}
243
fc456751
RS
244static struct grep_pat *create_grep_pat(const char *pat, size_t patlen,
245 const char *origin, int no,
246 enum grep_pat_token t,
247 enum grep_header_field field)
a4d7d2c6
JH
248{
249 struct grep_pat *p = xcalloc(1, sizeof(*p));
526a858a 250 p->pattern = xmemdupz(pat, patlen);
fc456751
RS
251 p->patternlen = patlen;
252 p->origin = origin;
253 p->no = no;
254 p->token = t;
a4d7d2c6 255 p->field = field;
fc456751
RS
256 return p;
257}
258
2b3873ff
RS
259static void do_append_grep_pat(struct grep_pat ***tail, struct grep_pat *p)
260{
261 **tail = p;
262 *tail = &p->next;
a4d7d2c6 263 p->next = NULL;
526a858a
RS
264
265 switch (p->token) {
266 case GREP_PATTERN: /* atom */
267 case GREP_PATTERN_HEAD:
268 case GREP_PATTERN_BODY:
269 for (;;) {
270 struct grep_pat *new_pat;
271 size_t len = 0;
272 char *cp = p->pattern + p->patternlen, *nl = NULL;
273 while (++len <= p->patternlen) {
274 if (*(--cp) == '\n') {
275 nl = cp;
276 break;
277 }
278 }
279 if (!nl)
280 break;
281 new_pat = create_grep_pat(nl + 1, len - 1, p->origin,
282 p->no, p->token, p->field);
283 new_pat->next = p->next;
284 if (!p->next)
285 *tail = &new_pat->next;
286 p->next = new_pat;
287 *nl = '\0';
288 p->patternlen -= len;
289 }
290 break;
291 default:
292 break;
293 }
2b3873ff
RS
294}
295
fc456751
RS
296void append_header_grep_pattern(struct grep_opt *opt,
297 enum grep_header_field field, const char *pat)
298{
299 struct grep_pat *p = create_grep_pat(pat, strlen(pat), "header", 0,
300 GREP_PATTERN_HEAD, field);
baa6378f
JH
301 if (field == GREP_HEADER_REFLOG)
302 opt->use_reflog_filter = 1;
2b3873ff 303 do_append_grep_pat(&opt->header_tail, p);
a4d7d2c6
JH
304}
305
83b5d2f5
JH
306void append_grep_pattern(struct grep_opt *opt, const char *pat,
307 const char *origin, int no, enum grep_pat_token t)
ed40a095
RS
308{
309 append_grep_pat(opt, pat, strlen(pat), origin, no, t);
310}
311
312void append_grep_pat(struct grep_opt *opt, const char *pat, size_t patlen,
313 const char *origin, int no, enum grep_pat_token t)
83b5d2f5 314{
fc456751 315 struct grep_pat *p = create_grep_pat(pat, patlen, origin, no, t, 0);
2b3873ff 316 do_append_grep_pat(&opt->pattern_tail, p);
83b5d2f5
JH
317}
318
5b594f45
FK
319struct grep_opt *grep_opt_dup(const struct grep_opt *opt)
320{
321 struct grep_pat *pat;
322 struct grep_opt *ret = xmalloc(sizeof(struct grep_opt));
323 *ret = *opt;
324
325 ret->pattern_list = NULL;
326 ret->pattern_tail = &ret->pattern_list;
327
328 for(pat = opt->pattern_list; pat != NULL; pat = pat->next)
329 {
330 if(pat->token == GREP_PATTERN_HEAD)
331 append_header_grep_pattern(ret, pat->field,
332 pat->pattern);
333 else
ed40a095
RS
334 append_grep_pat(ret, pat->pattern, pat->patternlen,
335 pat->origin, pat->no, pat->token);
5b594f45
FK
336 }
337
338 return ret;
339}
340
a30c148a
MK
341static NORETURN void compile_regexp_failed(const struct grep_pat *p,
342 const char *error)
343{
344 char where[1024];
345
346 if (p->no)
19bdd3e7 347 xsnprintf(where, sizeof(where), "In '%s' at %d, ", p->origin, p->no);
a30c148a 348 else if (p->origin)
19bdd3e7 349 xsnprintf(where, sizeof(where), "%s, ", p->origin);
a30c148a
MK
350 else
351 where[0] = 0;
352
353 die("%s'%s': %s", where, p->pattern, error);
354}
355
543f1c0c
ÆAB
356static int is_fixed(const char *s, size_t len)
357{
358 size_t i;
359
360 for (i = 0; i < len; i++) {
361 if (is_regex_special(s[i]))
362 return 0;
363 }
364
365 return 1;
366}
367
219e65b6
ÆAB
368static int has_null(const char *s, size_t len)
369{
370 /*
371 * regcomp cannot accept patterns with NULs so when using it
372 * we consider any pattern containing a NUL fixed.
373 */
374 if (memchr(s, 0, len))
375 return 1;
376
377 return 0;
378}
379
3485bea1 380#ifdef USE_LIBPCRE1
6d4b5747 381static void compile_pcre1_regexp(struct grep_pat *p, const struct grep_opt *opt)
63e7e9d8
MK
382{
383 const char *error;
384 int erroffset;
fba4f125 385 int options = PCRE_MULTILINE;
63e7e9d8 386
9d9babb8
NTND
387 if (opt->ignore_case) {
388 if (has_non_ascii(p->pattern))
6d4b5747 389 p->pcre1_tables = pcre_maketables();
63e7e9d8 390 options |= PCRE_CASELESS;
9d9babb8 391 }
18547aac
NTND
392 if (is_utf8_locale() && has_non_ascii(p->pattern))
393 options |= PCRE_UTF8;
63e7e9d8 394
6d4b5747
ÆAB
395 p->pcre1_regexp = pcre_compile(p->pattern, options, &error, &erroffset,
396 p->pcre1_tables);
397 if (!p->pcre1_regexp)
63e7e9d8
MK
398 compile_regexp_failed(p, error);
399
2fff1e19 400 p->pcre1_extra_info = pcre_study(p->pcre1_regexp, GIT_PCRE_STUDY_JIT_COMPILE, &error);
6d4b5747 401 if (!p->pcre1_extra_info && error)
63e7e9d8 402 die("%s", error);
fbaceaac 403
e87de7ca 404#ifdef GIT_PCRE1_USE_JIT
fbaceaac
ÆAB
405 pcre_config(PCRE_CONFIG_JIT, &p->pcre1_jit_on);
406 if (p->pcre1_jit_on == 1) {
407 p->pcre1_jit_stack = pcre_jit_stack_alloc(1, 1024 * 1024);
408 if (!p->pcre1_jit_stack)
409 die("Couldn't allocate PCRE JIT stack");
410 pcre_assign_jit_stack(p->pcre1_extra_info, NULL, p->pcre1_jit_stack);
411 } else if (p->pcre1_jit_on != 0) {
033abf97 412 BUG("The pcre1_jit_on variable should be 0 or 1, not %d",
fbaceaac
ÆAB
413 p->pcre1_jit_on);
414 }
415#endif
63e7e9d8
MK
416}
417
6d4b5747 418static int pcre1match(struct grep_pat *p, const char *line, const char *eol,
63e7e9d8
MK
419 regmatch_t *match, int eflags)
420{
421 int ovector[30], ret, flags = 0;
422
423 if (eflags & REG_NOTBOL)
424 flags |= PCRE_NOTBOL;
425
e87de7ca 426#ifdef GIT_PCRE1_USE_JIT
fbaceaac
ÆAB
427 if (p->pcre1_jit_on) {
428 ret = pcre_jit_exec(p->pcre1_regexp, p->pcre1_extra_info, line,
429 eol - line, 0, flags, ovector,
430 ARRAY_SIZE(ovector), p->pcre1_jit_stack);
431 } else
432#endif
433 {
434 ret = pcre_exec(p->pcre1_regexp, p->pcre1_extra_info, line,
435 eol - line, 0, flags, ovector,
436 ARRAY_SIZE(ovector));
437 }
438
63e7e9d8
MK
439 if (ret < 0 && ret != PCRE_ERROR_NOMATCH)
440 die("pcre_exec failed with error code %d", ret);
441 if (ret > 0) {
442 ret = 0;
443 match->rm_so = ovector[0];
444 match->rm_eo = ovector[1];
445 }
446
447 return ret;
448}
449
6d4b5747 450static void free_pcre1_regexp(struct grep_pat *p)
63e7e9d8 451{
6d4b5747 452 pcre_free(p->pcre1_regexp);
e87de7ca 453#ifdef GIT_PCRE1_USE_JIT
fbaceaac
ÆAB
454 if (p->pcre1_jit_on) {
455 pcre_free_study(p->pcre1_extra_info);
456 pcre_jit_stack_free(p->pcre1_jit_stack);
457 } else
458#endif
459 {
460 pcre_free(p->pcre1_extra_info);
461 }
6d4b5747 462 pcre_free((void *)p->pcre1_tables);
63e7e9d8 463}
3485bea1 464#else /* !USE_LIBPCRE1 */
6d4b5747 465static void compile_pcre1_regexp(struct grep_pat *p, const struct grep_opt *opt)
63e7e9d8
MK
466{
467 die("cannot use Perl-compatible regexes when not compiled with USE_LIBPCRE");
468}
469
6d4b5747 470static int pcre1match(struct grep_pat *p, const char *line, const char *eol,
63e7e9d8
MK
471 regmatch_t *match, int eflags)
472{
473 return 1;
474}
475
6d4b5747 476static void free_pcre1_regexp(struct grep_pat *p)
63e7e9d8
MK
477{
478}
3485bea1 479#endif /* !USE_LIBPCRE1 */
63e7e9d8 480
94da9193
ÆAB
481#ifdef USE_LIBPCRE2
482static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt)
483{
484 int error;
485 PCRE2_UCHAR errbuf[256];
486 PCRE2_SIZE erroffset;
487 int options = PCRE2_MULTILINE;
488 const uint8_t *character_tables = NULL;
489 int jitret;
a25b9085
ÆAB
490 int patinforet;
491 size_t jitsizearg;
94da9193
ÆAB
492
493 assert(opt->pcre2);
494
495 p->pcre2_compile_context = NULL;
496
497 if (opt->ignore_case) {
498 if (has_non_ascii(p->pattern)) {
499 character_tables = pcre2_maketables(NULL);
500 p->pcre2_compile_context = pcre2_compile_context_create(NULL);
501 pcre2_set_character_tables(p->pcre2_compile_context, character_tables);
502 }
503 options |= PCRE2_CASELESS;
504 }
505 if (is_utf8_locale() && has_non_ascii(p->pattern))
506 options |= PCRE2_UTF;
507
508 p->pcre2_pattern = pcre2_compile((PCRE2_SPTR)p->pattern,
509 p->patternlen, options, &error, &erroffset,
510 p->pcre2_compile_context);
511
512 if (p->pcre2_pattern) {
513 p->pcre2_match_data = pcre2_match_data_create_from_pattern(p->pcre2_pattern, NULL);
514 if (!p->pcre2_match_data)
515 die("Couldn't allocate PCRE2 match data");
516 } else {
517 pcre2_get_error_message(error, errbuf, sizeof(errbuf));
518 compile_regexp_failed(p, (const char *)&errbuf);
519 }
520
521 pcre2_config(PCRE2_CONFIG_JIT, &p->pcre2_jit_on);
522 if (p->pcre2_jit_on == 1) {
523 jitret = pcre2_jit_compile(p->pcre2_pattern, PCRE2_JIT_COMPLETE);
524 if (jitret)
525 die("Couldn't JIT the PCRE2 pattern '%s', got '%d'\n", p->pattern, jitret);
a25b9085
ÆAB
526
527 /*
528 * The pcre2_config(PCRE2_CONFIG_JIT, ...) call just
529 * tells us whether the library itself supports JIT,
530 * but to see whether we're going to be actually using
531 * JIT we need to extract PCRE2_INFO_JITSIZE from the
532 * pattern *after* we do pcre2_jit_compile() above.
533 *
534 * This is because if the pattern contains the
535 * (*NO_JIT) verb (see pcre2syntax(3))
536 * pcre2_jit_compile() will exit early with 0. If we
537 * then proceed to call pcre2_jit_match() further down
538 * the line instead of pcre2_match() we'll either
539 * segfault (pre PCRE 10.31) or run into a fatal error
540 * (post PCRE2 10.31)
541 */
542 patinforet = pcre2_pattern_info(p->pcre2_pattern, PCRE2_INFO_JITSIZE, &jitsizearg);
543 if (patinforet)
544 BUG("pcre2_pattern_info() failed: %d", patinforet);
545 if (jitsizearg == 0) {
546 p->pcre2_jit_on = 0;
547 return;
548 }
549
94da9193
ÆAB
550 p->pcre2_jit_stack = pcre2_jit_stack_create(1, 1024 * 1024, NULL);
551 if (!p->pcre2_jit_stack)
552 die("Couldn't allocate PCRE2 JIT stack");
553 p->pcre2_match_context = pcre2_match_context_create(NULL);
674ad936 554 if (!p->pcre2_match_context)
94da9193
ÆAB
555 die("Couldn't allocate PCRE2 match context");
556 pcre2_jit_stack_assign(p->pcre2_match_context, NULL, p->pcre2_jit_stack);
557 } else if (p->pcre2_jit_on != 0) {
033abf97 558 BUG("The pcre2_jit_on variable should be 0 or 1, not %d",
94da9193
ÆAB
559 p->pcre1_jit_on);
560 }
561}
562
563static int pcre2match(struct grep_pat *p, const char *line, const char *eol,
564 regmatch_t *match, int eflags)
565{
566 int ret, flags = 0;
567 PCRE2_SIZE *ovector;
568 PCRE2_UCHAR errbuf[256];
569
570 if (eflags & REG_NOTBOL)
571 flags |= PCRE2_NOTBOL;
572
573 if (p->pcre2_jit_on)
574 ret = pcre2_jit_match(p->pcre2_pattern, (unsigned char *)line,
575 eol - line, 0, flags, p->pcre2_match_data,
576 NULL);
577 else
578 ret = pcre2_match(p->pcre2_pattern, (unsigned char *)line,
579 eol - line, 0, flags, p->pcre2_match_data,
580 NULL);
581
582 if (ret < 0 && ret != PCRE2_ERROR_NOMATCH) {
583 pcre2_get_error_message(ret, errbuf, sizeof(errbuf));
584 die("%s failed with error code %d: %s",
585 (p->pcre2_jit_on ? "pcre2_jit_match" : "pcre2_match"), ret,
586 errbuf);
587 }
588 if (ret > 0) {
589 ovector = pcre2_get_ovector_pointer(p->pcre2_match_data);
590 ret = 0;
591 match->rm_so = (int)ovector[0];
592 match->rm_eo = (int)ovector[1];
593 }
594
595 return ret;
596}
597
598static void free_pcre2_pattern(struct grep_pat *p)
599{
600 pcre2_compile_context_free(p->pcre2_compile_context);
601 pcre2_code_free(p->pcre2_pattern);
602 pcre2_match_data_free(p->pcre2_match_data);
603 pcre2_jit_stack_free(p->pcre2_jit_stack);
604 pcre2_match_context_free(p->pcre2_match_context);
605}
606#else /* !USE_LIBPCRE2 */
607static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt)
608{
609 /*
610 * Unreachable until USE_LIBPCRE2 becomes synonymous with
611 * USE_LIBPCRE. See the sibling comment in
612 * grep_set_pattern_type_option().
613 */
614 die("cannot use Perl-compatible regexes when not compiled with USE_LIBPCRE");
615}
616
617static int pcre2match(struct grep_pat *p, const char *line, const char *eol,
618 regmatch_t *match, int eflags)
619{
620 return 1;
621}
622
623static void free_pcre2_pattern(struct grep_pat *p)
624{
625}
626#endif /* !USE_LIBPCRE2 */
627
793dc676
NTND
628static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt)
629{
630 struct strbuf sb = STRBUF_INIT;
631 int err;
1ceababc 632 int regflags = 0;
793dc676
NTND
633
634 basic_regex_quote_buf(&sb, p->pattern);
793dc676
NTND
635 if (opt->ignore_case)
636 regflags |= REG_ICASE;
637 err = regcomp(&p->regexp, sb.buf, regflags);
638 if (opt->debug)
639 fprintf(stderr, "fixed %s\n", sb.buf);
640 strbuf_release(&sb);
641 if (err) {
642 char errbuf[1024];
643 regerror(err, &p->regexp, errbuf, sizeof(errbuf));
793dc676
NTND
644 compile_regexp_failed(p, errbuf);
645 }
646}
647
83b5d2f5
JH
648static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
649{
07a3d411 650 int ascii_only;
c822255c 651 int err;
07a3d411 652 int regflags = REG_NEWLINE;
c822255c 653
d7eb527d 654 p->word_regexp = opt->word_regexp;
5183bf67 655 p->ignore_case = opt->ignore_case;
5c1ebcca 656 ascii_only = !has_non_ascii(p->pattern);
d7eb527d 657
793dc676
NTND
658 /*
659 * Even when -F (fixed) asks us to do a non-regexp search, we
660 * may not be able to correctly case-fold when -i
661 * (ignore-case) is asked (in which case, we'll synthesize a
662 * regexp to match the pattern that matches regexp special
663 * characters literally, while ignoring case differences). On
664 * the other hand, even without -F, if the pattern does not
665 * have any regexp special characters and there is no need for
666 * case-folding search, we can internally turn it into a
667 * simple string match using kws. p->fixed tells us if we
668 * want to use kws.
669 */
219e65b6
ÆAB
670 if (opt->fixed ||
671 has_null(p->pattern, p->patternlen) ||
672 is_fixed(p->pattern, p->patternlen))
07a3d411 673 p->fixed = !p->ignore_case || ascii_only;
9eceddee
FK
674
675 if (p->fixed) {
07a3d411 676 p->kws = kwsalloc(p->ignore_case ? tolower_trans_tbl : NULL);
9eceddee
FK
677 kwsincr(p->kws, p->pattern, p->patternlen);
678 kwsprep(p->kws);
c822255c 679 return;
793dc676
NTND
680 } else if (opt->fixed) {
681 /*
682 * We come here when the pattern has the non-ascii
683 * characters we cannot case-fold, and asked to
684 * ignore-case.
685 */
686 compile_fixed_regexp(p, opt);
687 return;
9eceddee 688 }
c822255c 689
94da9193
ÆAB
690 if (opt->pcre2) {
691 compile_pcre2_pattern(p, opt);
692 return;
693 }
694
6d4b5747
ÆAB
695 if (opt->pcre1) {
696 compile_pcre1_regexp(p, opt);
63e7e9d8
MK
697 return;
698 }
699
07a3d411
ÆAB
700 if (p->ignore_case)
701 regflags |= REG_ICASE;
702 if (opt->extended_regexp_option)
703 regflags |= REG_EXTENDED;
704 err = regcomp(&p->regexp, p->pattern, regflags);
83b5d2f5
JH
705 if (err) {
706 char errbuf[1024];
83b5d2f5 707 regerror(err, &p->regexp, errbuf, 1024);
a30c148a 708 compile_regexp_failed(p, errbuf);
83b5d2f5
JH
709 }
710}
711
0ab7befa 712static struct grep_expr *compile_pattern_or(struct grep_pat **);
83b5d2f5
JH
713static struct grep_expr *compile_pattern_atom(struct grep_pat **list)
714{
715 struct grep_pat *p;
716 struct grep_expr *x;
717
718 p = *list;
c922b01f
LT
719 if (!p)
720 return NULL;
83b5d2f5
JH
721 switch (p->token) {
722 case GREP_PATTERN: /* atom */
480c1ca6
JH
723 case GREP_PATTERN_HEAD:
724 case GREP_PATTERN_BODY:
83b5d2f5
JH
725 x = xcalloc(1, sizeof (struct grep_expr));
726 x->node = GREP_NODE_ATOM;
727 x->u.atom = p;
728 *list = p->next;
729 return x;
730 case GREP_OPEN_PAREN:
731 *list = p->next;
0ab7befa 732 x = compile_pattern_or(list);
83b5d2f5
JH
733 if (!*list || (*list)->token != GREP_CLOSE_PAREN)
734 die("unmatched parenthesis");
735 *list = (*list)->next;
736 return x;
737 default:
738 return NULL;
739 }
740}
741
742static struct grep_expr *compile_pattern_not(struct grep_pat **list)
743{
744 struct grep_pat *p;
745 struct grep_expr *x;
746
747 p = *list;
c922b01f
LT
748 if (!p)
749 return NULL;
83b5d2f5
JH
750 switch (p->token) {
751 case GREP_NOT:
752 if (!p->next)
753 die("--not not followed by pattern expression");
754 *list = p->next;
755 x = xcalloc(1, sizeof (struct grep_expr));
756 x->node = GREP_NODE_NOT;
757 x->u.unary = compile_pattern_not(list);
758 if (!x->u.unary)
759 die("--not followed by non pattern expression");
760 return x;
761 default:
762 return compile_pattern_atom(list);
763 }
764}
765
766static struct grep_expr *compile_pattern_and(struct grep_pat **list)
767{
768 struct grep_pat *p;
769 struct grep_expr *x, *y, *z;
770
771 x = compile_pattern_not(list);
772 p = *list;
773 if (p && p->token == GREP_AND) {
774 if (!p->next)
775 die("--and not followed by pattern expression");
776 *list = p->next;
777 y = compile_pattern_and(list);
778 if (!y)
779 die("--and not followed by pattern expression");
780 z = xcalloc(1, sizeof (struct grep_expr));
781 z->node = GREP_NODE_AND;
782 z->u.binary.left = x;
783 z->u.binary.right = y;
784 return z;
785 }
786 return x;
787}
788
789static struct grep_expr *compile_pattern_or(struct grep_pat **list)
790{
791 struct grep_pat *p;
792 struct grep_expr *x, *y, *z;
793
794 x = compile_pattern_and(list);
795 p = *list;
796 if (x && p && p->token != GREP_CLOSE_PAREN) {
797 y = compile_pattern_or(list);
798 if (!y)
799 die("not a pattern expression %s", p->pattern);
800 z = xcalloc(1, sizeof (struct grep_expr));
801 z->node = GREP_NODE_OR;
802 z->u.binary.left = x;
803 z->u.binary.right = y;
804 return z;
805 }
806 return x;
807}
808
809static struct grep_expr *compile_pattern_expr(struct grep_pat **list)
810{
811 return compile_pattern_or(list);
812}
813
17bf35a3
JH
814static void indent(int in)
815{
816 while (in-- > 0)
817 fputc(' ', stderr);
818}
819
820static void dump_grep_pat(struct grep_pat *p)
821{
822 switch (p->token) {
823 case GREP_AND: fprintf(stderr, "*and*"); break;
824 case GREP_OPEN_PAREN: fprintf(stderr, "*(*"); break;
825 case GREP_CLOSE_PAREN: fprintf(stderr, "*)*"); break;
826 case GREP_NOT: fprintf(stderr, "*not*"); break;
827 case GREP_OR: fprintf(stderr, "*or*"); break;
828
829 case GREP_PATTERN: fprintf(stderr, "pattern"); break;
830 case GREP_PATTERN_HEAD: fprintf(stderr, "pattern_head"); break;
831 case GREP_PATTERN_BODY: fprintf(stderr, "pattern_body"); break;
832 }
833
834 switch (p->token) {
835 default: break;
836 case GREP_PATTERN_HEAD:
837 fprintf(stderr, "<head %d>", p->field); break;
838 case GREP_PATTERN_BODY:
839 fprintf(stderr, "<body>"); break;
840 }
841 switch (p->token) {
842 default: break;
843 case GREP_PATTERN_HEAD:
844 case GREP_PATTERN_BODY:
845 case GREP_PATTERN:
846 fprintf(stderr, "%.*s", (int)p->patternlen, p->pattern);
847 break;
848 }
849 fputc('\n', stderr);
850}
851
852static void dump_grep_expression_1(struct grep_expr *x, int in)
853{
854 indent(in);
855 switch (x->node) {
856 case GREP_NODE_TRUE:
857 fprintf(stderr, "true\n");
858 break;
859 case GREP_NODE_ATOM:
860 dump_grep_pat(x->u.atom);
861 break;
862 case GREP_NODE_NOT:
863 fprintf(stderr, "(not\n");
864 dump_grep_expression_1(x->u.unary, in+1);
865 indent(in);
866 fprintf(stderr, ")\n");
867 break;
868 case GREP_NODE_AND:
869 fprintf(stderr, "(and\n");
870 dump_grep_expression_1(x->u.binary.left, in+1);
871 dump_grep_expression_1(x->u.binary.right, in+1);
872 indent(in);
873 fprintf(stderr, ")\n");
874 break;
875 case GREP_NODE_OR:
876 fprintf(stderr, "(or\n");
877 dump_grep_expression_1(x->u.binary.left, in+1);
878 dump_grep_expression_1(x->u.binary.right, in+1);
879 indent(in);
880 fprintf(stderr, ")\n");
881 break;
882 }
883}
884
07a7d656 885static void dump_grep_expression(struct grep_opt *opt)
17bf35a3
JH
886{
887 struct grep_expr *x = opt->pattern_expression;
888
889 if (opt->all_match)
890 fprintf(stderr, "[all-match]\n");
891 dump_grep_expression_1(x, 0);
892 fflush(NULL);
893}
894
5aaeb733
JH
895static struct grep_expr *grep_true_expr(void)
896{
897 struct grep_expr *z = xcalloc(1, sizeof(*z));
898 z->node = GREP_NODE_TRUE;
899 return z;
900}
901
902static struct grep_expr *grep_or_expr(struct grep_expr *left, struct grep_expr *right)
903{
904 struct grep_expr *z = xcalloc(1, sizeof(*z));
905 z->node = GREP_NODE_OR;
906 z->u.binary.left = left;
907 z->u.binary.right = right;
908 return z;
909}
910
95ce9ce2 911static struct grep_expr *prep_header_patterns(struct grep_opt *opt)
83b5d2f5
JH
912{
913 struct grep_pat *p;
95ce9ce2 914 struct grep_expr *header_expr;
5aaeb733
JH
915 struct grep_expr *(header_group[GREP_HEADER_FIELD_MAX]);
916 enum grep_header_field fld;
83b5d2f5 917
95ce9ce2
JH
918 if (!opt->header_list)
919 return NULL;
2385f246 920
95ce9ce2
JH
921 for (p = opt->header_list; p; p = p->next) {
922 if (p->token != GREP_PATTERN_HEAD)
033abf97 923 BUG("a non-header pattern in grep header list.");
3ce3ffb8
AP
924 if (p->field < GREP_HEADER_FIELD_MIN ||
925 GREP_HEADER_FIELD_MAX <= p->field)
033abf97 926 BUG("unknown header field %d", p->field);
95ce9ce2 927 compile_regexp(p, opt);
80235ba7 928 }
5aaeb733
JH
929
930 for (fld = 0; fld < GREP_HEADER_FIELD_MAX; fld++)
931 header_group[fld] = NULL;
932
933 for (p = opt->header_list; p; p = p->next) {
934 struct grep_expr *h;
935 struct grep_pat *pp = p;
936
937 h = compile_pattern_atom(&pp);
938 if (!h || pp != p->next)
033abf97 939 BUG("malformed header expr");
5aaeb733
JH
940 if (!header_group[p->field]) {
941 header_group[p->field] = h;
942 continue;
943 }
944 header_group[p->field] = grep_or_expr(h, header_group[p->field]);
945 }
946
947 header_expr = NULL;
948
949 for (fld = 0; fld < GREP_HEADER_FIELD_MAX; fld++) {
950 if (!header_group[fld])
951 continue;
952 if (!header_expr)
953 header_expr = grep_true_expr();
954 header_expr = grep_or_expr(header_group[fld], header_expr);
955 }
95ce9ce2
JH
956 return header_expr;
957}
958
13e4fc7e
JH
959static struct grep_expr *grep_splice_or(struct grep_expr *x, struct grep_expr *y)
960{
961 struct grep_expr *z = x;
962
963 while (x) {
964 assert(x->node == GREP_NODE_OR);
965 if (x->u.binary.right &&
966 x->u.binary.right->node == GREP_NODE_TRUE) {
967 x->u.binary.right = y;
968 break;
969 }
970 x = x->u.binary.right;
971 }
972 return z;
973}
974
17bf35a3 975static void compile_grep_patterns_real(struct grep_opt *opt)
95ce9ce2
JH
976{
977 struct grep_pat *p;
978 struct grep_expr *header_expr = prep_header_patterns(opt);
0ab7befa 979
83b5d2f5 980 for (p = opt->pattern_list; p; p = p->next) {
480c1ca6
JH
981 switch (p->token) {
982 case GREP_PATTERN: /* atom */
983 case GREP_PATTERN_HEAD:
984 case GREP_PATTERN_BODY:
c822255c 985 compile_regexp(p, opt);
480c1ca6
JH
986 break;
987 default:
83b5d2f5 988 opt->extended = 1;
480c1ca6
JH
989 break;
990 }
83b5d2f5
JH
991 }
992
80235ba7
JH
993 if (opt->all_match || header_expr)
994 opt->extended = 1;
17bf35a3 995 else if (!opt->extended && !opt->debug)
83b5d2f5
JH
996 return;
997
83b5d2f5 998 p = opt->pattern_list;
ba150a3f
MB
999 if (p)
1000 opt->pattern_expression = compile_pattern_expr(&p);
83b5d2f5
JH
1001 if (p)
1002 die("incomplete pattern expression: %s", p->pattern);
80235ba7
JH
1003
1004 if (!header_expr)
1005 return;
1006
5aaeb733 1007 if (!opt->pattern_expression)
80235ba7 1008 opt->pattern_expression = header_expr;
13e4fc7e
JH
1009 else if (opt->all_match)
1010 opt->pattern_expression = grep_splice_or(header_expr,
1011 opt->pattern_expression);
5aaeb733
JH
1012 else
1013 opt->pattern_expression = grep_or_expr(opt->pattern_expression,
1014 header_expr);
80235ba7 1015 opt->all_match = 1;
83b5d2f5
JH
1016}
1017
17bf35a3
JH
1018void compile_grep_patterns(struct grep_opt *opt)
1019{
1020 compile_grep_patterns_real(opt);
1021 if (opt->debug)
1022 dump_grep_expression(opt);
1023}
1024
b48fb5b6
JH
1025static void free_pattern_expr(struct grep_expr *x)
1026{
1027 switch (x->node) {
5aaeb733 1028 case GREP_NODE_TRUE:
b48fb5b6
JH
1029 case GREP_NODE_ATOM:
1030 break;
1031 case GREP_NODE_NOT:
1032 free_pattern_expr(x->u.unary);
1033 break;
1034 case GREP_NODE_AND:
1035 case GREP_NODE_OR:
1036 free_pattern_expr(x->u.binary.left);
1037 free_pattern_expr(x->u.binary.right);
1038 break;
1039 }
1040 free(x);
1041}
1042
1043void free_grep_patterns(struct grep_opt *opt)
1044{
1045 struct grep_pat *p, *n;
1046
1047 for (p = opt->pattern_list; p; p = n) {
1048 n = p->next;
1049 switch (p->token) {
1050 case GREP_PATTERN: /* atom */
1051 case GREP_PATTERN_HEAD:
1052 case GREP_PATTERN_BODY:
9eceddee
FK
1053 if (p->kws)
1054 kwsfree(p->kws);
6d4b5747
ÆAB
1055 else if (p->pcre1_regexp)
1056 free_pcre1_regexp(p);
94da9193
ÆAB
1057 else if (p->pcre2_pattern)
1058 free_pcre2_pattern(p);
63e7e9d8
MK
1059 else
1060 regfree(&p->regexp);
526a858a 1061 free(p->pattern);
b48fb5b6
JH
1062 break;
1063 default:
1064 break;
1065 }
1066 free(p);
1067 }
1068
1069 if (!opt->extended)
1070 return;
1071 free_pattern_expr(opt->pattern_expression);
1072}
1073
83b5d2f5
JH
1074static char *end_of_line(char *cp, unsigned long *left)
1075{
1076 unsigned long l = *left;
1077 while (l && *cp != '\n') {
1078 l--;
1079 cp++;
1080 }
1081 *left = l;
1082 return cp;
1083}
1084
1085static int word_char(char ch)
1086{
1087 return isalnum(ch) || ch == '_';
1088}
1089
55f638bd
ML
1090static void output_color(struct grep_opt *opt, const void *data, size_t size,
1091 const char *color)
1092{
daa0c3d9 1093 if (want_color(opt->color) && color && color[0]) {
55f638bd
ML
1094 opt->output(opt, color, strlen(color));
1095 opt->output(opt, data, size);
1096 opt->output(opt, GIT_COLOR_RESET, strlen(GIT_COLOR_RESET));
1097 } else
1098 opt->output(opt, data, size);
1099}
1100
1101static void output_sep(struct grep_opt *opt, char sign)
1102{
1103 if (opt->null_following_name)
1104 opt->output(opt, "\0", 1);
1105 else
fa151dc5 1106 output_color(opt, &sign, 1, opt->colors[GREP_COLOR_SEP]);
55f638bd
ML
1107}
1108
83caecca
RZ
1109static void show_name(struct grep_opt *opt, const char *name)
1110{
fa151dc5 1111 output_color(opt, name, strlen(name), opt->colors[GREP_COLOR_FILENAME]);
5b594f45 1112 opt->output(opt, opt->null_following_name ? "\0" : "\n", 1);
83caecca
RZ
1113}
1114
ed40a095
RS
1115static int fixmatch(struct grep_pat *p, char *line, char *eol,
1116 regmatch_t *match)
83b5d2f5 1117{
9eceddee
FK
1118 struct kwsmatch kwsm;
1119 size_t offset = kwsexec(p->kws, line, eol - line, &kwsm);
1120 if (offset == -1) {
83b5d2f5
JH
1121 match->rm_so = match->rm_eo = -1;
1122 return REG_NOMATCH;
9eceddee
FK
1123 } else {
1124 match->rm_so = offset;
1125 match->rm_eo = match->rm_so + kwsm.size[0];
83b5d2f5
JH
1126 return 0;
1127 }
1128}
1129
97e77784
MK
1130static int patmatch(struct grep_pat *p, char *line, char *eol,
1131 regmatch_t *match, int eflags)
1132{
1133 int hit;
1134
1135 if (p->fixed)
1136 hit = !fixmatch(p, line, eol, match);
6d4b5747
ÆAB
1137 else if (p->pcre1_regexp)
1138 hit = !pcre1match(p, line, eol, match, eflags);
94da9193
ÆAB
1139 else if (p->pcre2_pattern)
1140 hit = !pcre2match(p, line, eol, match, eflags);
97e77784 1141 else
b7d36ffc
JS
1142 hit = !regexec_buf(&p->regexp, line, eol - line, 1, match,
1143 eflags);
97e77784
MK
1144
1145 return hit;
1146}
1147
a4d7d2c6
JH
1148static int strip_timestamp(char *bol, char **eol_p)
1149{
1150 char *eol = *eol_p;
1151 int ch;
1152
1153 while (bol < --eol) {
1154 if (*eol != '>')
1155 continue;
1156 *eol_p = ++eol;
1157 ch = *eol;
1158 *eol = '\0';
1159 return ch;
1160 }
1161 return 0;
1162}
1163
1164static struct {
1165 const char *field;
1166 size_t len;
1167} header_field[] = {
1168 { "author ", 7 },
1169 { "committer ", 10 },
72fd13f7 1170 { "reflog ", 7 },
a4d7d2c6
JH
1171};
1172
d7eb527d 1173static int match_one_pattern(struct grep_pat *p, char *bol, char *eol,
79212772
RS
1174 enum grep_context ctx,
1175 regmatch_t *pmatch, int eflags)
83b5d2f5
JH
1176{
1177 int hit = 0;
a4d7d2c6 1178 int saved_ch = 0;
e701fadb 1179 const char *start = bol;
83b5d2f5 1180
480c1ca6
JH
1181 if ((p->token != GREP_PATTERN) &&
1182 ((p->token == GREP_PATTERN_HEAD) != (ctx == GREP_CONTEXT_HEAD)))
1183 return 0;
1184
a4d7d2c6
JH
1185 if (p->token == GREP_PATTERN_HEAD) {
1186 const char *field;
1187 size_t len;
1188 assert(p->field < ARRAY_SIZE(header_field));
1189 field = header_field[p->field].field;
1190 len = header_field[p->field].len;
1191 if (strncmp(bol, field, len))
1192 return 0;
1193 bol += len;
ad4813b3
NTND
1194 switch (p->field) {
1195 case GREP_HEADER_AUTHOR:
1196 case GREP_HEADER_COMMITTER:
1197 saved_ch = strip_timestamp(bol, &eol);
1198 break;
1199 default:
1200 break;
1201 }
a4d7d2c6
JH
1202 }
1203
83b5d2f5 1204 again:
97e77784 1205 hit = patmatch(p, bol, eol, pmatch, eflags);
83b5d2f5 1206
d7eb527d 1207 if (hit && p->word_regexp) {
83b5d2f5 1208 if ((pmatch[0].rm_so < 0) ||
84201eae 1209 (eol - bol) < pmatch[0].rm_so ||
83b5d2f5
JH
1210 (pmatch[0].rm_eo < 0) ||
1211 (eol - bol) < pmatch[0].rm_eo)
1212 die("regexp returned nonsense");
1213
1214 /* Match beginning must be either beginning of the
1215 * line, or at word boundary (i.e. the last char must
1216 * not be a word char). Similarly, match end must be
1217 * either end of the line, or at word boundary
1218 * (i.e. the next char must not be a word char).
1219 */
fb62eb7f 1220 if ( ((pmatch[0].rm_so == 0) ||
83b5d2f5
JH
1221 !word_char(bol[pmatch[0].rm_so-1])) &&
1222 ((pmatch[0].rm_eo == (eol-bol)) ||
1223 !word_char(bol[pmatch[0].rm_eo])) )
1224 ;
1225 else
1226 hit = 0;
1227
84201eae
RS
1228 /* Words consist of at least one character. */
1229 if (pmatch->rm_so == pmatch->rm_eo)
1230 hit = 0;
1231
83b5d2f5
JH
1232 if (!hit && pmatch[0].rm_so + bol + 1 < eol) {
1233 /* There could be more than one match on the
1234 * line, and the first match might not be
1235 * strict word match. But later ones could be!
fb62eb7f
RS
1236 * Forward to the next possible start, i.e. the
1237 * next position following a non-word char.
83b5d2f5
JH
1238 */
1239 bol = pmatch[0].rm_so + bol + 1;
fb62eb7f
RS
1240 while (word_char(bol[-1]) && bol < eol)
1241 bol++;
dbb6a4ad 1242 eflags |= REG_NOTBOL;
fb62eb7f
RS
1243 if (bol < eol)
1244 goto again;
83b5d2f5
JH
1245 }
1246 }
a4d7d2c6
JH
1247 if (p->token == GREP_PATTERN_HEAD && saved_ch)
1248 *eol = saved_ch;
e701fadb
RS
1249 if (hit) {
1250 pmatch[0].rm_so += bol - start;
1251 pmatch[0].rm_eo += bol - start;
1252 }
83b5d2f5
JH
1253 return hit;
1254}
1255
68d686e6
TB
1256static int match_expr_eval(struct grep_opt *opt, struct grep_expr *x, char *bol,
1257 char *eol, enum grep_context ctx, ssize_t *col,
1258 ssize_t *icol, int collect_hits)
83b5d2f5 1259{
0ab7befa
JH
1260 int h = 0;
1261
c922b01f
LT
1262 if (!x)
1263 die("Not a valid grep expression");
83b5d2f5 1264 switch (x->node) {
5aaeb733
JH
1265 case GREP_NODE_TRUE:
1266 h = 1;
1267 break;
83b5d2f5 1268 case GREP_NODE_ATOM:
68d686e6
TB
1269 {
1270 regmatch_t tmp;
1271 h = match_one_pattern(x->u.atom, bol, eol, ctx,
1272 &tmp, 0);
1273 if (h && (*col < 0 || tmp.rm_so < *col))
1274 *col = tmp.rm_so;
1275 }
83b5d2f5
JH
1276 break;
1277 case GREP_NODE_NOT:
68d686e6
TB
1278 /*
1279 * Upon visiting a GREP_NODE_NOT, col and icol become swapped.
1280 */
1281 h = !match_expr_eval(opt, x->u.unary, bol, eol, ctx, icol, col,
1282 0);
0ab7befa 1283 break;
83b5d2f5 1284 case GREP_NODE_AND:
017c0fcf 1285 h = match_expr_eval(opt, x->u.binary.left, bol, eol, ctx, col,
68d686e6 1286 icol, 0);
017c0fcf
TB
1287 if (h || opt->columnnum) {
1288 /*
1289 * Don't short-circuit AND when given --column, since a
1290 * NOT earlier in the tree may turn this into an OR. In
1291 * this case, see the below comment.
1292 */
1293 h &= match_expr_eval(opt, x->u.binary.right, bol, eol,
1294 ctx, col, icol, 0);
1295 }
0ab7befa 1296 break;
83b5d2f5 1297 case GREP_NODE_OR:
017c0fcf
TB
1298 if (!(collect_hits || opt->columnnum)) {
1299 /*
1300 * Don't short-circuit OR when given --column (or
1301 * collecting hits) to ensure we don't skip a later
1302 * child that would produce an earlier match.
1303 */
68d686e6
TB
1304 return (match_expr_eval(opt, x->u.binary.left, bol, eol,
1305 ctx, col, icol, 0) ||
1306 match_expr_eval(opt, x->u.binary.right, bol,
1307 eol, ctx, col, icol, 0));
017c0fcf 1308 }
68d686e6
TB
1309 h = match_expr_eval(opt, x->u.binary.left, bol, eol, ctx, col,
1310 icol, 0);
017c0fcf
TB
1311 if (collect_hits)
1312 x->u.binary.left->hit |= h;
68d686e6 1313 h |= match_expr_eval(opt, x->u.binary.right, bol, eol, ctx, col,
017c0fcf 1314 icol, collect_hits);
0ab7befa
JH
1315 break;
1316 default:
d7530708 1317 die("Unexpected node type (internal error) %d", x->node);
83b5d2f5 1318 }
0ab7befa
JH
1319 if (collect_hits)
1320 x->hit |= h;
1321 return h;
83b5d2f5
JH
1322}
1323
480c1ca6 1324static int match_expr(struct grep_opt *opt, char *bol, char *eol,
68d686e6
TB
1325 enum grep_context ctx, ssize_t *col,
1326 ssize_t *icol, int collect_hits)
83b5d2f5
JH
1327{
1328 struct grep_expr *x = opt->pattern_expression;
68d686e6 1329 return match_expr_eval(opt, x, bol, eol, ctx, col, icol, collect_hits);
83b5d2f5
JH
1330}
1331
480c1ca6 1332static int match_line(struct grep_opt *opt, char *bol, char *eol,
68d686e6 1333 ssize_t *col, ssize_t *icol,
0ab7befa 1334 enum grep_context ctx, int collect_hits)
83b5d2f5
JH
1335{
1336 struct grep_pat *p;
017c0fcf 1337 int hit = 0;
79212772 1338
83b5d2f5 1339 if (opt->extended)
68d686e6
TB
1340 return match_expr(opt, bol, eol, ctx, col, icol,
1341 collect_hits);
0ab7befa
JH
1342
1343 /* we do not call with collect_hits without being extended */
83b5d2f5 1344 for (p = opt->pattern_list; p; p = p->next) {
68d686e6
TB
1345 regmatch_t tmp;
1346 if (match_one_pattern(p, bol, eol, ctx, &tmp, 0)) {
017c0fcf
TB
1347 hit |= 1;
1348 if (!opt->columnnum) {
1349 /*
1350 * Without --column, any single match on a line
1351 * is enough to know that it needs to be
1352 * printed. With --column, scan _all_ patterns
1353 * to find the earliest.
1354 */
1355 break;
1356 }
1357 if (*col < 0 || tmp.rm_so < *col)
1358 *col = tmp.rm_so;
68d686e6 1359 }
83b5d2f5 1360 }
017c0fcf 1361 return hit;
83b5d2f5
JH
1362}
1363
7e8f59d5
RS
1364static int match_next_pattern(struct grep_pat *p, char *bol, char *eol,
1365 enum grep_context ctx,
1366 regmatch_t *pmatch, int eflags)
1367{
1368 regmatch_t match;
1369
1370 if (!match_one_pattern(p, bol, eol, ctx, &match, eflags))
1371 return 0;
1372 if (match.rm_so < 0 || match.rm_eo < 0)
1373 return 0;
1374 if (pmatch->rm_so >= 0 && pmatch->rm_eo >= 0) {
1375 if (match.rm_so > pmatch->rm_so)
1376 return 1;
1377 if (match.rm_so == pmatch->rm_so && match.rm_eo < pmatch->rm_eo)
1378 return 1;
1379 }
1380 pmatch->rm_so = match.rm_so;
1381 pmatch->rm_eo = match.rm_eo;
1382 return 1;
1383}
1384
1385static int next_match(struct grep_opt *opt, char *bol, char *eol,
1386 enum grep_context ctx, regmatch_t *pmatch, int eflags)
1387{
1388 struct grep_pat *p;
1389 int hit = 0;
1390
1391 pmatch->rm_so = pmatch->rm_eo = -1;
1392 if (bol < eol) {
1393 for (p = opt->pattern_list; p; p = p->next) {
1394 switch (p->token) {
1395 case GREP_PATTERN: /* atom */
1396 case GREP_PATTERN_HEAD:
1397 case GREP_PATTERN_BODY:
1398 hit |= match_next_pattern(p, bol, eol, ctx,
1399 pmatch, eflags);
1400 break;
1401 default:
1402 break;
1403 }
1404 }
1405 }
1406 return hit;
1407}
1408
c707ded3
TB
1409static void show_line_header(struct grep_opt *opt, const char *name,
1410 unsigned lno, ssize_t cno, char sign)
7e8f59d5 1411{
1d84f72e 1412 if (opt->heading && opt->last_shown == 0) {
fa151dc5 1413 output_color(opt, name, strlen(name), opt->colors[GREP_COLOR_FILENAME]);
1d84f72e
RS
1414 opt->output(opt, "\n", 1);
1415 }
5dd06d38
RS
1416 opt->last_shown = lno;
1417
1d84f72e 1418 if (!opt->heading && opt->pathname) {
fa151dc5 1419 output_color(opt, name, strlen(name), opt->colors[GREP_COLOR_FILENAME]);
55f638bd 1420 output_sep(opt, sign);
5b594f45
FK
1421 }
1422 if (opt->linenum) {
1423 char buf[32];
1a168e5c 1424 xsnprintf(buf, sizeof(buf), "%d", lno);
fa151dc5 1425 output_color(opt, buf, strlen(buf), opt->colors[GREP_COLOR_LINENO]);
55f638bd 1426 output_sep(opt, sign);
5b594f45 1427 }
89252cd0
TB
1428 /*
1429 * Treat 'cno' as the 1-indexed offset from the start of a non-context
1430 * line to its first match. Otherwise, 'cno' is 0 indicating that we are
1431 * being called with a context line.
1432 */
1433 if (opt->columnnum && cno) {
1434 char buf[32];
1435 xsnprintf(buf, sizeof(buf), "%"PRIuMAX, (uintmax_t)cno);
d036d667 1436 output_color(opt, buf, strlen(buf), opt->colors[GREP_COLOR_COLUMNNO]);
89252cd0
TB
1437 output_sep(opt, sign);
1438 }
c707ded3
TB
1439}
1440
1441static void show_line(struct grep_opt *opt, char *bol, char *eol,
1442 const char *name, unsigned lno, ssize_t cno, char sign)
1443{
1444 int rest = eol - bol;
9d8db06e
TB
1445 const char *match_color = NULL;
1446 const char *line_color = NULL;
c707ded3
TB
1447
1448 if (opt->file_break && opt->last_shown == 0) {
1449 if (opt->show_hunk_mark)
1450 opt->output(opt, "\n", 1);
1451 } else if (opt->pre_context || opt->post_context || opt->funcbody) {
1452 if (opt->last_shown == 0) {
1453 if (opt->show_hunk_mark) {
87ece7ce 1454 output_color(opt, "--", 2, opt->colors[GREP_COLOR_SEP]);
c707ded3
TB
1455 opt->output(opt, "\n", 1);
1456 }
1457 } else if (lno > opt->last_shown + 1) {
87ece7ce 1458 output_color(opt, "--", 2, opt->colors[GREP_COLOR_SEP]);
c707ded3
TB
1459 opt->output(opt, "\n", 1);
1460 }
1461 }
9d8db06e
TB
1462 if (!opt->only_matching) {
1463 /*
1464 * In case the line we're being called with contains more than
1465 * one match, leave printing each header to the loop below.
1466 */
1467 show_line_header(opt, name, lno, cno, sign);
1468 }
1469 if (opt->color || opt->only_matching) {
7e8f59d5
RS
1470 regmatch_t match;
1471 enum grep_context ctx = GREP_CONTEXT_BODY;
1472 int ch = *eol;
1473 int eflags = 0;
1474
9d8db06e
TB
1475 if (opt->color) {
1476 if (sign == ':')
87ece7ce 1477 match_color = opt->colors[GREP_COLOR_MATCH_SELECTED];
9d8db06e 1478 else
87ece7ce 1479 match_color = opt->colors[GREP_COLOR_MATCH_CONTEXT];
9d8db06e 1480 if (sign == ':')
87ece7ce 1481 line_color = opt->colors[GREP_COLOR_SELECTED];
9d8db06e 1482 else if (sign == '-')
87ece7ce 1483 line_color = opt->colors[GREP_COLOR_CONTEXT];
9d8db06e 1484 else if (sign == '=')
87ece7ce 1485 line_color = opt->colors[GREP_COLOR_FUNCTION];
9d8db06e 1486 }
7e8f59d5
RS
1487 *eol = '\0';
1488 while (next_match(opt, bol, eol, ctx, &match, eflags)) {
1f5b9cc4
RS
1489 if (match.rm_so == match.rm_eo)
1490 break;
5b594f45 1491
9d8db06e
TB
1492 if (opt->only_matching)
1493 show_line_header(opt, name, lno, cno, sign);
1494 else
1495 output_color(opt, bol, match.rm_so, line_color);
55f638bd 1496 output_color(opt, bol + match.rm_so,
79a77109 1497 match.rm_eo - match.rm_so, match_color);
9d8db06e
TB
1498 if (opt->only_matching)
1499 opt->output(opt, "\n", 1);
7e8f59d5 1500 bol += match.rm_eo;
9d8db06e 1501 cno += match.rm_eo;
7e8f59d5
RS
1502 rest -= match.rm_eo;
1503 eflags = REG_NOTBOL;
1504 }
1505 *eol = ch;
1506 }
9d8db06e
TB
1507 if (!opt->only_matching) {
1508 output_color(opt, bol, rest, line_color);
1509 opt->output(opt, "\n", 1);
1510 }
7e8f59d5
RS
1511}
1512
0579f91d 1513#ifndef NO_PTHREADS
78db6ea9
JK
1514int grep_use_locks;
1515
0579f91d
TR
1516/*
1517 * This lock protects access to the gitattributes machinery, which is
1518 * not thread-safe.
1519 */
1520pthread_mutex_t grep_attr_mutex;
1521
78db6ea9 1522static inline void grep_attr_lock(void)
0579f91d 1523{
78db6ea9 1524 if (grep_use_locks)
0579f91d
TR
1525 pthread_mutex_lock(&grep_attr_mutex);
1526}
1527
78db6ea9 1528static inline void grep_attr_unlock(void)
0579f91d 1529{
78db6ea9 1530 if (grep_use_locks)
0579f91d
TR
1531 pthread_mutex_unlock(&grep_attr_mutex);
1532}
b3aeb285
JK
1533
1534/*
1535 * Same as git_attr_mutex, but protecting the thread-unsafe object db access.
1536 */
1537pthread_mutex_t grep_read_mutex;
1538
0579f91d 1539#else
78db6ea9
JK
1540#define grep_attr_lock()
1541#define grep_attr_unlock()
0579f91d
TR
1542#endif
1543
e1327023 1544static int match_funcname(struct grep_opt *opt, struct grep_source *gs, char *bol, char *eol)
2944e4e6 1545{
60ecac98 1546 xdemitconf_t *xecfg = opt->priv;
0579f91d 1547 if (xecfg && !xecfg->find_func) {
94ad9d9e
JK
1548 grep_source_load_driver(gs);
1549 if (gs->driver->funcname.pattern) {
1550 const struct userdiff_funcname *pe = &gs->driver->funcname;
0579f91d
TR
1551 xdiff_set_find_func(xecfg, pe->pattern, pe->cflags);
1552 } else {
1553 xecfg = opt->priv = NULL;
1554 }
1555 }
1556
1557 if (xecfg) {
60ecac98
RS
1558 char buf[1];
1559 return xecfg->find_func(bol, eol - bol, buf, 1,
1560 xecfg->find_func_priv) >= 0;
1561 }
1562
2944e4e6
RS
1563 if (bol == eol)
1564 return 0;
1565 if (isalpha(*bol) || *bol == '_' || *bol == '$')
1566 return 1;
1567 return 0;
1568}
1569
e1327023
JK
1570static void show_funcname_line(struct grep_opt *opt, struct grep_source *gs,
1571 char *bol, unsigned lno)
2944e4e6 1572{
e1327023 1573 while (bol > gs->buf) {
2944e4e6
RS
1574 char *eol = --bol;
1575
e1327023 1576 while (bol > gs->buf && bol[-1] != '\n')
2944e4e6
RS
1577 bol--;
1578 lno--;
1579
1580 if (lno <= opt->last_shown)
1581 break;
1582
e1327023 1583 if (match_funcname(opt, gs, bol, eol)) {
89252cd0 1584 show_line(opt, bol, eol, gs->name, lno, 0, '=');
2944e4e6
RS
1585 break;
1586 }
1587 }
1588}
1589
a5dc20b0
RS
1590static int is_empty_line(const char *bol, const char *eol);
1591
e1327023 1592static void show_pre_context(struct grep_opt *opt, struct grep_source *gs,
ba8ea749 1593 char *bol, char *end, unsigned lno)
49de3216 1594{
6653a01b 1595 unsigned cur = lno, from = 1, funcname_lno = 0, orig_from;
a5dc20b0 1596 int funcname_needed = !!opt->funcname, comment_needed = 0;
ba8ea749 1597
49de3216
RS
1598 if (opt->pre_context < lno)
1599 from = lno - opt->pre_context;
1600 if (from <= opt->last_shown)
1601 from = opt->last_shown + 1;
6653a01b 1602 orig_from = from;
a5dc20b0
RS
1603 if (opt->funcbody) {
1604 if (match_funcname(opt, gs, bol, end))
1605 comment_needed = 1;
1606 else
1607 funcname_needed = 1;
6653a01b
RS
1608 from = opt->last_shown + 1;
1609 }
49de3216
RS
1610
1611 /* Rewind. */
6653a01b 1612 while (bol > gs->buf && cur > from) {
a5dc20b0 1613 char *next_bol = bol;
2944e4e6
RS
1614 char *eol = --bol;
1615
e1327023 1616 while (bol > gs->buf && bol[-1] != '\n')
49de3216
RS
1617 bol--;
1618 cur--;
a5dc20b0
RS
1619 if (comment_needed && (is_empty_line(bol, eol) ||
1620 match_funcname(opt, gs, bol, eol))) {
1621 comment_needed = 0;
1622 from = orig_from;
1623 if (cur < from) {
1624 cur++;
1625 bol = next_bol;
1626 break;
1627 }
1628 }
e1327023 1629 if (funcname_needed && match_funcname(opt, gs, bol, eol)) {
2944e4e6
RS
1630 funcname_lno = cur;
1631 funcname_needed = 0;
a5dc20b0
RS
1632 if (opt->funcbody)
1633 comment_needed = 1;
1634 else
1635 from = orig_from;
2944e4e6 1636 }
49de3216
RS
1637 }
1638
2944e4e6
RS
1639 /* We need to look even further back to find a function signature. */
1640 if (opt->funcname && funcname_needed)
e1327023 1641 show_funcname_line(opt, gs, bol, cur);
2944e4e6 1642
49de3216
RS
1643 /* Back forward. */
1644 while (cur < lno) {
2944e4e6 1645 char *eol = bol, sign = (cur == funcname_lno) ? '=' : '-';
49de3216
RS
1646
1647 while (*eol != '\n')
1648 eol++;
89252cd0 1649 show_line(opt, bol, eol, gs->name, cur, 0, sign);
49de3216
RS
1650 bol = eol + 1;
1651 cur++;
1652 }
1653}
1654
a26345b6
JH
1655static int should_lookahead(struct grep_opt *opt)
1656{
1657 struct grep_pat *p;
1658
1659 if (opt->extended)
1660 return 0; /* punt for too complex stuff */
1661 if (opt->invert)
1662 return 0;
1663 for (p = opt->pattern_list; p; p = p->next) {
1664 if (p->token != GREP_PATTERN)
1665 return 0; /* punt for "header only" and stuff */
1666 }
1667 return 1;
1668}
1669
1670static int look_ahead(struct grep_opt *opt,
1671 unsigned long *left_p,
1672 unsigned *lno_p,
1673 char **bol_p)
1674{
1675 unsigned lno = *lno_p;
1676 char *bol = *bol_p;
1677 struct grep_pat *p;
1678 char *sp, *last_bol;
1679 regoff_t earliest = -1;
1680
1681 for (p = opt->pattern_list; p; p = p->next) {
1682 int hit;
1683 regmatch_t m;
1684
97e77784 1685 hit = patmatch(p, bol, bol + *left_p, &m, 0);
a26345b6
JH
1686 if (!hit || m.rm_so < 0 || m.rm_eo < 0)
1687 continue;
1688 if (earliest < 0 || m.rm_so < earliest)
1689 earliest = m.rm_so;
1690 }
1691
1692 if (earliest < 0) {
1693 *bol_p = bol + *left_p;
1694 *left_p = 0;
1695 return 1;
1696 }
1697 for (sp = bol + earliest; bol < sp && sp[-1] != '\n'; sp--)
1698 ; /* find the beginning of the line */
1699 last_bol = sp;
1700
1701 for (sp = bol; sp < last_bol; sp++) {
1702 if (*sp == '\n')
1703 lno++;
1704 }
1705 *left_p -= last_bol - bol;
1706 *bol_p = last_bol;
1707 *lno_p = lno;
1708 return 0;
1709}
1710
335ec3bf
JK
1711static int fill_textconv_grep(struct userdiff_driver *driver,
1712 struct grep_source *gs)
1713{
1714 struct diff_filespec *df;
1715 char *buf;
1716 size_t size;
1717
1718 if (!driver || !driver->textconv)
1719 return grep_source_load(gs);
1720
1721 /*
1722 * The textconv interface is intimately tied to diff_filespecs, so we
1723 * have to pretend to be one. If we could unify the grep_source
1724 * and diff_filespec structs, this mess could just go away.
1725 */
1726 df = alloc_filespec(gs->path);
1727 switch (gs->type) {
1c41c82b 1728 case GREP_SOURCE_OID:
335ec3bf
JK
1729 fill_filespec(df, gs->identifier, 1, 0100644);
1730 break;
1731 case GREP_SOURCE_FILE:
f9704c2d 1732 fill_filespec(df, &null_oid, 0, 0100644);
335ec3bf
JK
1733 break;
1734 default:
033abf97 1735 BUG("attempt to textconv something without a path?");
335ec3bf
JK
1736 }
1737
1738 /*
1739 * fill_textconv is not remotely thread-safe; it may load objects
1740 * behind the scenes, and it modifies the global diff tempfile
1741 * structure.
1742 */
1743 grep_read_lock();
1744 size = fill_textconv(driver, df, &buf);
1745 grep_read_unlock();
1746 free_filespec(df);
1747
1748 /*
1749 * The normal fill_textconv usage by the diff machinery would just keep
1750 * the textconv'd buf separate from the diff_filespec. But much of the
1751 * grep code passes around a grep_source and assumes that its "buf"
1752 * pointer is the beginning of the thing we are searching. So let's
1753 * install our textconv'd version into the grep_source, taking care not
1754 * to leak any existing buffer.
1755 */
1756 grep_source_clear_data(gs);
1757 gs->buf = buf;
1758 gs->size = size;
1759
1760 return 0;
1761}
1762
4aa2c475
RS
1763static int is_empty_line(const char *bol, const char *eol)
1764{
1765 while (bol < eol && isspace(*bol))
1766 bol++;
1767 return bol == eol;
1768}
1769
e1327023 1770static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int collect_hits)
83b5d2f5 1771{
e1327023 1772 char *bol;
4aa2c475 1773 char *peek_bol = NULL;
e1327023 1774 unsigned long left;
83b5d2f5 1775 unsigned lno = 1;
83b5d2f5 1776 unsigned last_hit = 0;
83b5d2f5 1777 int binary_match_only = 0;
83b5d2f5 1778 unsigned count = 0;
a26345b6 1779 int try_lookahead = 0;
ba8ea749 1780 int show_function = 0;
335ec3bf 1781 struct userdiff_driver *textconv = NULL;
480c1ca6 1782 enum grep_context ctx = GREP_CONTEXT_HEAD;
60ecac98 1783 xdemitconf_t xecfg;
83b5d2f5 1784
5b594f45
FK
1785 if (!opt->output)
1786 opt->output = std_output;
1787
ba8ea749
RS
1788 if (opt->pre_context || opt->post_context || opt->file_break ||
1789 opt->funcbody) {
08303c36
RS
1790 /* Show hunk marks, except for the first file. */
1791 if (opt->last_shown)
1792 opt->show_hunk_mark = 1;
1793 /*
1794 * If we're using threads then we can't easily identify
1795 * the first file. Always put hunk marks in that case
1796 * and skip the very first one later in work_done().
1797 */
1798 if (opt->output != std_output)
1799 opt->show_hunk_mark = 1;
1800 }
431d6e7b
RS
1801 opt->last_shown = 0;
1802
335ec3bf
JK
1803 if (opt->allow_textconv) {
1804 grep_source_load_driver(gs);
1805 /*
1806 * We might set up the shared textconv cache data here, which
1807 * is not thread-safe.
1808 */
1809 grep_attr_lock();
1810 textconv = userdiff_get_textconv(gs->driver);
1811 grep_attr_unlock();
1812 }
1813
1814 /*
1815 * We know the result of a textconv is text, so we only have to care
1816 * about binary handling if we are not using it.
1817 */
1818 if (!textconv) {
1819 switch (opt->binary) {
1820 case GREP_BINARY_DEFAULT:
1821 if (grep_source_is_binary(gs))
1822 binary_match_only = 1;
1823 break;
1824 case GREP_BINARY_NOMATCH:
1825 if (grep_source_is_binary(gs))
1826 return 0; /* Assume unmatch */
1827 break;
1828 case GREP_BINARY_TEXT:
1829 break;
1830 default:
033abf97 1831 BUG("unknown binary handling mode");
335ec3bf 1832 }
83b5d2f5
JH
1833 }
1834
60ecac98 1835 memset(&xecfg, 0, sizeof(xecfg));
0579f91d
TR
1836 opt->priv = &xecfg;
1837
a26345b6 1838 try_lookahead = should_lookahead(opt);
60ecac98 1839
335ec3bf 1840 if (fill_textconv_grep(textconv, gs) < 0)
08265798
JK
1841 return 0;
1842
e1327023
JK
1843 bol = gs->buf;
1844 left = gs->size;
83b5d2f5
JH
1845 while (left) {
1846 char *eol, ch;
0ab7befa 1847 int hit;
89252cd0 1848 ssize_t cno;
68d686e6 1849 ssize_t col = -1, icol = -1;
83b5d2f5 1850
a26345b6 1851 /*
8997da38 1852 * look_ahead() skips quickly to the line that possibly
a26345b6
JH
1853 * has the next hit; don't call it if we need to do
1854 * something more than just skipping the current line
1855 * in response to an unmatch for the current line. E.g.
1856 * inside a post-context window, we will show the current
1857 * line as a context around the previous hit when it
1858 * doesn't hit.
1859 */
1860 if (try_lookahead
1861 && !(last_hit
ba8ea749
RS
1862 && (show_function ||
1863 lno <= last_hit + opt->post_context))
a26345b6
JH
1864 && look_ahead(opt, &left, &lno, &bol))
1865 break;
83b5d2f5
JH
1866 eol = end_of_line(bol, &left);
1867 ch = *eol;
1868 *eol = 0;
1869
480c1ca6
JH
1870 if ((ctx == GREP_CONTEXT_HEAD) && (eol == bol))
1871 ctx = GREP_CONTEXT_BODY;
1872
68d686e6 1873 hit = match_line(opt, bol, eol, &col, &icol, ctx, collect_hits);
83b5d2f5
JH
1874 *eol = ch;
1875
0ab7befa
JH
1876 if (collect_hits)
1877 goto next_line;
1878
83b5d2f5
JH
1879 /* "grep -v -e foo -e bla" should list lines
1880 * that do not have either, so inversion should
1881 * be done outside.
1882 */
1883 if (opt->invert)
1884 hit = !hit;
1885 if (opt->unmatch_name_only) {
1886 if (hit)
1887 return 0;
1888 goto next_line;
1889 }
1890 if (hit) {
1891 count++;
1892 if (opt->status_only)
1893 return 1;
321ffcc0 1894 if (opt->name_only) {
e1327023 1895 show_name(opt, gs->name);
321ffcc0
RS
1896 return 1;
1897 }
c30c10cf
RS
1898 if (opt->count)
1899 goto next_line;
83b5d2f5 1900 if (binary_match_only) {
5b594f45 1901 opt->output(opt, "Binary file ", 12);
e1327023 1902 output_color(opt, gs->name, strlen(gs->name),
fa151dc5 1903 opt->colors[GREP_COLOR_FILENAME]);
5b594f45 1904 opt->output(opt, " matches\n", 9);
83b5d2f5
JH
1905 return 1;
1906 }
83b5d2f5
JH
1907 /* Hit at this line. If we haven't shown the
1908 * pre-context lines, we would need to show them.
83b5d2f5 1909 */
ba8ea749 1910 if (opt->pre_context || opt->funcbody)
e1327023 1911 show_pre_context(opt, gs, bol, eol, lno);
2944e4e6 1912 else if (opt->funcname)
e1327023 1913 show_funcname_line(opt, gs, bol, lno);
89252cd0
TB
1914 cno = opt->invert ? icol : col;
1915 if (cno < 0) {
1916 /*
1917 * A negative cno indicates that there was no
1918 * match on the line. We are thus inverted and
1919 * being asked to show all lines that _don't_
1920 * match a given expression. Therefore, set cno
1921 * to 0 to suggest the whole line matches.
1922 */
1923 cno = 0;
1924 }
1925 show_line(opt, bol, eol, gs->name, lno, cno + 1, ':');
5dd06d38 1926 last_hit = lno;
ba8ea749
RS
1927 if (opt->funcbody)
1928 show_function = 1;
1929 goto next_line;
83b5d2f5 1930 }
4aa2c475
RS
1931 if (show_function && (!peek_bol || peek_bol < bol)) {
1932 unsigned long peek_left = left;
1933 char *peek_eol = eol;
1934
1935 /*
1936 * Trailing empty lines are not interesting.
1937 * Peek past them to see if they belong to the
1938 * body of the current function.
1939 */
1940 peek_bol = bol;
1941 while (is_empty_line(peek_bol, peek_eol)) {
1942 peek_bol = peek_eol + 1;
1943 peek_eol = end_of_line(peek_bol, &peek_left);
1944 }
1945
1946 if (match_funcname(opt, gs, peek_bol, peek_eol))
1947 show_function = 0;
1948 }
ba8ea749
RS
1949 if (show_function ||
1950 (last_hit && lno <= last_hit + opt->post_context)) {
83b5d2f5
JH
1951 /* If the last hit is within the post context,
1952 * we need to show this line.
1953 */
89252cd0 1954 show_line(opt, bol, eol, gs->name, lno, col + 1, '-');
83b5d2f5 1955 }
83b5d2f5
JH
1956
1957 next_line:
1958 bol = eol + 1;
1959 if (!left)
1960 break;
1961 left--;
1962 lno++;
1963 }
1964
0ab7befa
JH
1965 if (collect_hits)
1966 return 0;
b48fb5b6 1967
83b5d2f5 1968 if (opt->status_only)
e1f68c66 1969 return opt->unmatch_name_only;
83b5d2f5
JH
1970 if (opt->unmatch_name_only) {
1971 /* We did not see any hit, so we want to show this */
e1327023 1972 show_name(opt, gs->name);
83b5d2f5
JH
1973 return 1;
1974 }
1975
60ecac98
RS
1976 xdiff_clear_find_func(&xecfg);
1977 opt->priv = NULL;
1978
83b5d2f5
JH
1979 /* NEEDSWORK:
1980 * The real "grep -c foo *.c" gives many "bar.c:0" lines,
1981 * which feels mostly useless but sometimes useful. Maybe
1982 * make it another option? For now suppress them.
1983 */
5b594f45
FK
1984 if (opt->count && count) {
1985 char buf[32];
f76d947a
RS
1986 if (opt->pathname) {
1987 output_color(opt, gs->name, strlen(gs->name),
fa151dc5 1988 opt->colors[GREP_COLOR_FILENAME]);
f76d947a
RS
1989 output_sep(opt, ':');
1990 }
1a168e5c 1991 xsnprintf(buf, sizeof(buf), "%u\n", count);
5b594f45 1992 opt->output(opt, buf, strlen(buf));
c30c10cf 1993 return 1;
5b594f45 1994 }
83b5d2f5
JH
1995 return !!last_hit;
1996}
1997
0ab7befa
JH
1998static void clr_hit_marker(struct grep_expr *x)
1999{
2000 /* All-hit markers are meaningful only at the very top level
2001 * OR node.
2002 */
2003 while (1) {
2004 x->hit = 0;
2005 if (x->node != GREP_NODE_OR)
2006 return;
2007 x->u.binary.left->hit = 0;
2008 x = x->u.binary.right;
2009 }
2010}
2011
2012static int chk_hit_marker(struct grep_expr *x)
2013{
2014 /* Top level nodes have hit markers. See if they all are hits */
2015 while (1) {
2016 if (x->node != GREP_NODE_OR)
2017 return x->hit;
2018 if (!x->u.binary.left->hit)
2019 return 0;
2020 x = x->u.binary.right;
2021 }
2022}
2023
e1327023 2024int grep_source(struct grep_opt *opt, struct grep_source *gs)
0ab7befa
JH
2025{
2026 /*
2027 * we do not have to do the two-pass grep when we do not check
2028 * buffer-wide "all-match".
2029 */
2030 if (!opt->all_match)
e1327023 2031 return grep_source_1(opt, gs, 0);
0ab7befa
JH
2032
2033 /* Otherwise the toplevel "or" terms hit a bit differently.
2034 * We first clear hit markers from them.
2035 */
2036 clr_hit_marker(opt->pattern_expression);
e1327023 2037 grep_source_1(opt, gs, 1);
0ab7befa
JH
2038
2039 if (!chk_hit_marker(opt->pattern_expression))
2040 return 0;
2041
e1327023
JK
2042 return grep_source_1(opt, gs, 0);
2043}
2044
c876d6da 2045int grep_buffer(struct grep_opt *opt, char *buf, unsigned long size)
e1327023
JK
2046{
2047 struct grep_source gs;
2048 int r;
2049
55c61688 2050 grep_source_init(&gs, GREP_SOURCE_BUF, NULL, NULL, NULL);
e1327023
JK
2051 gs.buf = buf;
2052 gs.size = size;
2053
2054 r = grep_source(opt, &gs);
2055
2056 grep_source_clear(&gs);
2057 return r;
2058}
2059
2060void grep_source_init(struct grep_source *gs, enum grep_source_type type,
55c61688
NTND
2061 const char *name, const char *path,
2062 const void *identifier)
e1327023
JK
2063{
2064 gs->type = type;
8c53f071
JK
2065 gs->name = xstrdup_or_null(name);
2066 gs->path = xstrdup_or_null(path);
e1327023
JK
2067 gs->buf = NULL;
2068 gs->size = 0;
94ad9d9e 2069 gs->driver = NULL;
e1327023
JK
2070
2071 switch (type) {
2072 case GREP_SOURCE_FILE:
2073 gs->identifier = xstrdup(identifier);
2074 break;
1c41c82b
BW
2075 case GREP_SOURCE_OID:
2076 gs->identifier = oiddup(identifier);
e1327023
JK
2077 break;
2078 case GREP_SOURCE_BUF:
2079 gs->identifier = NULL;
4538eef5 2080 break;
e1327023
JK
2081 }
2082}
2083
2084void grep_source_clear(struct grep_source *gs)
2085{
88ce3ef6
ÆAB
2086 FREE_AND_NULL(gs->name);
2087 FREE_AND_NULL(gs->path);
2088 FREE_AND_NULL(gs->identifier);
e1327023
JK
2089 grep_source_clear_data(gs);
2090}
2091
2092void grep_source_clear_data(struct grep_source *gs)
2093{
2094 switch (gs->type) {
2095 case GREP_SOURCE_FILE:
1c41c82b 2096 case GREP_SOURCE_OID:
6a83d902 2097 FREE_AND_NULL(gs->buf);
e1327023
JK
2098 gs->size = 0;
2099 break;
2100 case GREP_SOURCE_BUF:
2101 /* leave user-provided buf intact */
2102 break;
2103 }
2104}
2105
1c41c82b 2106static int grep_source_load_oid(struct grep_source *gs)
e1327023
JK
2107{
2108 enum object_type type;
2109
2110 grep_read_lock();
b4f5aca4 2111 gs->buf = read_object_file(gs->identifier, &type, &gs->size);
e1327023
JK
2112 grep_read_unlock();
2113
2114 if (!gs->buf)
2115 return error(_("'%s': unable to read %s"),
2116 gs->name,
1c41c82b 2117 oid_to_hex(gs->identifier));
e1327023
JK
2118 return 0;
2119}
2120
2121static int grep_source_load_file(struct grep_source *gs)
2122{
2123 const char *filename = gs->identifier;
2124 struct stat st;
2125 char *data;
2126 size_t size;
2127 int i;
2128
2129 if (lstat(filename, &st) < 0) {
2130 err_ret:
2131 if (errno != ENOENT)
7645d8f1 2132 error_errno(_("failed to stat '%s'"), filename);
e1327023
JK
2133 return -1;
2134 }
2135 if (!S_ISREG(st.st_mode))
2136 return -1;
2137 size = xsize_t(st.st_size);
2138 i = open(filename, O_RDONLY);
2139 if (i < 0)
2140 goto err_ret;
3733e694 2141 data = xmallocz(size);
e1327023 2142 if (st.st_size != read_in_full(i, data, size)) {
7645d8f1 2143 error_errno(_("'%s': short read"), filename);
e1327023
JK
2144 close(i);
2145 free(data);
2146 return -1;
2147 }
2148 close(i);
e1327023
JK
2149
2150 gs->buf = data;
2151 gs->size = size;
2152 return 0;
2153}
2154
3083301e 2155static int grep_source_load(struct grep_source *gs)
e1327023
JK
2156{
2157 if (gs->buf)
2158 return 0;
2159
2160 switch (gs->type) {
2161 case GREP_SOURCE_FILE:
2162 return grep_source_load_file(gs);
1c41c82b
BW
2163 case GREP_SOURCE_OID:
2164 return grep_source_load_oid(gs);
e1327023
JK
2165 case GREP_SOURCE_BUF:
2166 return gs->buf ? 0 : -1;
2167 }
033abf97 2168 BUG("invalid grep_source type to load");
0ab7befa 2169}
94ad9d9e
JK
2170
2171void grep_source_load_driver(struct grep_source *gs)
2172{
2173 if (gs->driver)
2174 return;
2175
2176 grep_attr_lock();
55c61688
NTND
2177 if (gs->path)
2178 gs->driver = userdiff_find_by_path(gs->path);
94ad9d9e
JK
2179 if (!gs->driver)
2180 gs->driver = userdiff_find_by_name("default");
2181 grep_attr_unlock();
2182}
41b59bfc 2183
3083301e 2184static int grep_source_is_binary(struct grep_source *gs)
41b59bfc
JK
2185{
2186 grep_source_load_driver(gs);
2187 if (gs->driver->binary != -1)
2188 return gs->driver->binary;
2189
2190 if (!grep_source_load(gs))
2191 return buffer_is_binary(gs->buf, gs->size);
2192
2193 return 0;
2194}