]> git.ipfire.org Git - thirdparty/git.git/blame - grep.c
Change NUL char handling of isspecial()
[thirdparty/git.git] / grep.c
CommitLineData
83b5d2f5 1#include "cache.h"
83b5d2f5 2#include "grep.h"
6bfce93e 3#include "xdiff-interface.h"
83b5d2f5 4
a4d7d2c6
JH
5void append_header_grep_pattern(struct grep_opt *opt, enum grep_header_field field, const char *pat)
6{
7 struct grep_pat *p = xcalloc(1, sizeof(*p));
8 p->pattern = pat;
9 p->origin = "header";
10 p->no = 0;
11 p->token = GREP_PATTERN_HEAD;
12 p->field = field;
13 *opt->pattern_tail = p;
14 opt->pattern_tail = &p->next;
15 p->next = NULL;
16}
17
83b5d2f5
JH
18void append_grep_pattern(struct grep_opt *opt, const char *pat,
19 const char *origin, int no, enum grep_pat_token t)
20{
21 struct grep_pat *p = xcalloc(1, sizeof(*p));
22 p->pattern = pat;
23 p->origin = origin;
24 p->no = no;
25 p->token = t;
26 *opt->pattern_tail = p;
27 opt->pattern_tail = &p->next;
28 p->next = NULL;
29}
30
c822255c
RS
31static int isregexspecial(int c)
32{
8cc32992
RS
33 return c == '\0' || is_glob_special(c) ||
34 c == '$' || c == '(' || c == ')' || c == '+' ||
35 c == '.' || c == '^' || c == '{' || c == '|';
c822255c
RS
36}
37
38static int is_fixed(const char *s)
39{
40 while (!isregexspecial(*s))
41 s++;
42 return !*s;
43}
44
83b5d2f5
JH
45static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
46{
c822255c
RS
47 int err;
48
49 if (opt->fixed || is_fixed(p->pattern))
50 p->fixed = 1;
51 if (opt->regflags & REG_ICASE)
52 p->fixed = 0;
53 if (p->fixed)
54 return;
55
56 err = regcomp(&p->regexp, p->pattern, opt->regflags);
83b5d2f5
JH
57 if (err) {
58 char errbuf[1024];
59 char where[1024];
60 if (p->no)
61 sprintf(where, "In '%s' at %d, ",
62 p->origin, p->no);
63 else if (p->origin)
64 sprintf(where, "%s, ", p->origin);
65 else
66 where[0] = 0;
67 regerror(err, &p->regexp, errbuf, 1024);
68 regfree(&p->regexp);
69 die("%s'%s': %s", where, p->pattern, errbuf);
70 }
71}
72
0ab7befa 73static struct grep_expr *compile_pattern_or(struct grep_pat **);
83b5d2f5
JH
74static struct grep_expr *compile_pattern_atom(struct grep_pat **list)
75{
76 struct grep_pat *p;
77 struct grep_expr *x;
78
79 p = *list;
80 switch (p->token) {
81 case GREP_PATTERN: /* atom */
480c1ca6
JH
82 case GREP_PATTERN_HEAD:
83 case GREP_PATTERN_BODY:
83b5d2f5
JH
84 x = xcalloc(1, sizeof (struct grep_expr));
85 x->node = GREP_NODE_ATOM;
86 x->u.atom = p;
87 *list = p->next;
88 return x;
89 case GREP_OPEN_PAREN:
90 *list = p->next;
0ab7befa 91 x = compile_pattern_or(list);
83b5d2f5
JH
92 if (!x)
93 return NULL;
94 if (!*list || (*list)->token != GREP_CLOSE_PAREN)
95 die("unmatched parenthesis");
96 *list = (*list)->next;
97 return x;
98 default:
99 return NULL;
100 }
101}
102
103static struct grep_expr *compile_pattern_not(struct grep_pat **list)
104{
105 struct grep_pat *p;
106 struct grep_expr *x;
107
108 p = *list;
109 switch (p->token) {
110 case GREP_NOT:
111 if (!p->next)
112 die("--not not followed by pattern expression");
113 *list = p->next;
114 x = xcalloc(1, sizeof (struct grep_expr));
115 x->node = GREP_NODE_NOT;
116 x->u.unary = compile_pattern_not(list);
117 if (!x->u.unary)
118 die("--not followed by non pattern expression");
119 return x;
120 default:
121 return compile_pattern_atom(list);
122 }
123}
124
125static struct grep_expr *compile_pattern_and(struct grep_pat **list)
126{
127 struct grep_pat *p;
128 struct grep_expr *x, *y, *z;
129
130 x = compile_pattern_not(list);
131 p = *list;
132 if (p && p->token == GREP_AND) {
133 if (!p->next)
134 die("--and not followed by pattern expression");
135 *list = p->next;
136 y = compile_pattern_and(list);
137 if (!y)
138 die("--and not followed by pattern expression");
139 z = xcalloc(1, sizeof (struct grep_expr));
140 z->node = GREP_NODE_AND;
141 z->u.binary.left = x;
142 z->u.binary.right = y;
143 return z;
144 }
145 return x;
146}
147
148static struct grep_expr *compile_pattern_or(struct grep_pat **list)
149{
150 struct grep_pat *p;
151 struct grep_expr *x, *y, *z;
152
153 x = compile_pattern_and(list);
154 p = *list;
155 if (x && p && p->token != GREP_CLOSE_PAREN) {
156 y = compile_pattern_or(list);
157 if (!y)
158 die("not a pattern expression %s", p->pattern);
159 z = xcalloc(1, sizeof (struct grep_expr));
160 z->node = GREP_NODE_OR;
161 z->u.binary.left = x;
162 z->u.binary.right = y;
163 return z;
164 }
165 return x;
166}
167
168static struct grep_expr *compile_pattern_expr(struct grep_pat **list)
169{
170 return compile_pattern_or(list);
171}
172
173void compile_grep_patterns(struct grep_opt *opt)
174{
175 struct grep_pat *p;
176
0ab7befa
JH
177 if (opt->all_match)
178 opt->extended = 1;
179
83b5d2f5 180 for (p = opt->pattern_list; p; p = p->next) {
480c1ca6
JH
181 switch (p->token) {
182 case GREP_PATTERN: /* atom */
183 case GREP_PATTERN_HEAD:
184 case GREP_PATTERN_BODY:
c822255c 185 compile_regexp(p, opt);
480c1ca6
JH
186 break;
187 default:
83b5d2f5 188 opt->extended = 1;
480c1ca6
JH
189 break;
190 }
83b5d2f5
JH
191 }
192
193 if (!opt->extended)
194 return;
195
196 /* Then bundle them up in an expression.
197 * A classic recursive descent parser would do.
198 */
199 p = opt->pattern_list;
200 opt->pattern_expression = compile_pattern_expr(&p);
201 if (p)
202 die("incomplete pattern expression: %s", p->pattern);
203}
204
b48fb5b6
JH
205static void free_pattern_expr(struct grep_expr *x)
206{
207 switch (x->node) {
208 case GREP_NODE_ATOM:
209 break;
210 case GREP_NODE_NOT:
211 free_pattern_expr(x->u.unary);
212 break;
213 case GREP_NODE_AND:
214 case GREP_NODE_OR:
215 free_pattern_expr(x->u.binary.left);
216 free_pattern_expr(x->u.binary.right);
217 break;
218 }
219 free(x);
220}
221
222void free_grep_patterns(struct grep_opt *opt)
223{
224 struct grep_pat *p, *n;
225
226 for (p = opt->pattern_list; p; p = n) {
227 n = p->next;
228 switch (p->token) {
229 case GREP_PATTERN: /* atom */
230 case GREP_PATTERN_HEAD:
231 case GREP_PATTERN_BODY:
232 regfree(&p->regexp);
233 break;
234 default:
235 break;
236 }
237 free(p);
238 }
239
240 if (!opt->extended)
241 return;
242 free_pattern_expr(opt->pattern_expression);
243}
244
83b5d2f5
JH
245static char *end_of_line(char *cp, unsigned long *left)
246{
247 unsigned long l = *left;
248 while (l && *cp != '\n') {
249 l--;
250 cp++;
251 }
252 *left = l;
253 return cp;
254}
255
256static int word_char(char ch)
257{
258 return isalnum(ch) || ch == '_';
259}
260
261static void show_line(struct grep_opt *opt, const char *bol, const char *eol,
262 const char *name, unsigned lno, char sign)
263{
83caecca
RZ
264 if (opt->null_following_name)
265 sign = '\0';
83b5d2f5
JH
266 if (opt->pathname)
267 printf("%s%c", name, sign);
268 if (opt->linenum)
269 printf("%d%c", lno, sign);
270 printf("%.*s\n", (int)(eol-bol), bol);
271}
272
83caecca
RZ
273static void show_name(struct grep_opt *opt, const char *name)
274{
275 printf("%s%c", name, opt->null_following_name ? '\0' : '\n');
276}
277
83b5d2f5
JH
278static int fixmatch(const char *pattern, char *line, regmatch_t *match)
279{
280 char *hit = strstr(line, pattern);
281 if (!hit) {
282 match->rm_so = match->rm_eo = -1;
283 return REG_NOMATCH;
284 }
285 else {
286 match->rm_so = hit - line;
287 match->rm_eo = match->rm_so + strlen(pattern);
288 return 0;
289 }
290}
291
a4d7d2c6
JH
292static int strip_timestamp(char *bol, char **eol_p)
293{
294 char *eol = *eol_p;
295 int ch;
296
297 while (bol < --eol) {
298 if (*eol != '>')
299 continue;
300 *eol_p = ++eol;
301 ch = *eol;
302 *eol = '\0';
303 return ch;
304 }
305 return 0;
306}
307
308static struct {
309 const char *field;
310 size_t len;
311} header_field[] = {
312 { "author ", 7 },
313 { "committer ", 10 },
314};
315
480c1ca6 316static int match_one_pattern(struct grep_opt *opt, struct grep_pat *p, char *bol, char *eol, enum grep_context ctx)
83b5d2f5
JH
317{
318 int hit = 0;
a4d7d2c6 319 int saved_ch = 0;
83b5d2f5
JH
320 regmatch_t pmatch[10];
321
480c1ca6
JH
322 if ((p->token != GREP_PATTERN) &&
323 ((p->token == GREP_PATTERN_HEAD) != (ctx == GREP_CONTEXT_HEAD)))
324 return 0;
325
a4d7d2c6
JH
326 if (p->token == GREP_PATTERN_HEAD) {
327 const char *field;
328 size_t len;
329 assert(p->field < ARRAY_SIZE(header_field));
330 field = header_field[p->field].field;
331 len = header_field[p->field].len;
332 if (strncmp(bol, field, len))
333 return 0;
334 bol += len;
335 saved_ch = strip_timestamp(bol, &eol);
336 }
337
83b5d2f5 338 again:
c822255c 339 if (!p->fixed) {
83b5d2f5
JH
340 regex_t *exp = &p->regexp;
341 hit = !regexec(exp, bol, ARRAY_SIZE(pmatch),
342 pmatch, 0);
343 }
344 else {
345 hit = !fixmatch(p->pattern, bol, pmatch);
346 }
347
348 if (hit && opt->word_regexp) {
349 if ((pmatch[0].rm_so < 0) ||
350 (eol - bol) <= pmatch[0].rm_so ||
351 (pmatch[0].rm_eo < 0) ||
352 (eol - bol) < pmatch[0].rm_eo)
353 die("regexp returned nonsense");
354
355 /* Match beginning must be either beginning of the
356 * line, or at word boundary (i.e. the last char must
357 * not be a word char). Similarly, match end must be
358 * either end of the line, or at word boundary
359 * (i.e. the next char must not be a word char).
360 */
fb62eb7f 361 if ( ((pmatch[0].rm_so == 0) ||
83b5d2f5
JH
362 !word_char(bol[pmatch[0].rm_so-1])) &&
363 ((pmatch[0].rm_eo == (eol-bol)) ||
364 !word_char(bol[pmatch[0].rm_eo])) )
365 ;
366 else
367 hit = 0;
368
369 if (!hit && pmatch[0].rm_so + bol + 1 < eol) {
370 /* There could be more than one match on the
371 * line, and the first match might not be
372 * strict word match. But later ones could be!
fb62eb7f
RS
373 * Forward to the next possible start, i.e. the
374 * next position following a non-word char.
83b5d2f5
JH
375 */
376 bol = pmatch[0].rm_so + bol + 1;
fb62eb7f
RS
377 while (word_char(bol[-1]) && bol < eol)
378 bol++;
379 if (bol < eol)
380 goto again;
83b5d2f5
JH
381 }
382 }
a4d7d2c6
JH
383 if (p->token == GREP_PATTERN_HEAD && saved_ch)
384 *eol = saved_ch;
83b5d2f5
JH
385 return hit;
386}
387
0ab7befa 388static int match_expr_eval(struct grep_opt *o,
83b5d2f5 389 struct grep_expr *x,
480c1ca6 390 char *bol, char *eol,
0ab7befa
JH
391 enum grep_context ctx,
392 int collect_hits)
83b5d2f5 393{
0ab7befa
JH
394 int h = 0;
395
83b5d2f5
JH
396 switch (x->node) {
397 case GREP_NODE_ATOM:
0ab7befa 398 h = match_one_pattern(o, x->u.atom, bol, eol, ctx);
83b5d2f5
JH
399 break;
400 case GREP_NODE_NOT:
0ab7befa
JH
401 h = !match_expr_eval(o, x->u.unary, bol, eol, ctx, 0);
402 break;
83b5d2f5 403 case GREP_NODE_AND:
0ab7befa
JH
404 if (!collect_hits)
405 return (match_expr_eval(o, x->u.binary.left,
406 bol, eol, ctx, 0) &&
407 match_expr_eval(o, x->u.binary.right,
408 bol, eol, ctx, 0));
409 h = match_expr_eval(o, x->u.binary.left, bol, eol, ctx, 0);
410 h &= match_expr_eval(o, x->u.binary.right, bol, eol, ctx, 0);
411 break;
83b5d2f5 412 case GREP_NODE_OR:
0ab7befa
JH
413 if (!collect_hits)
414 return (match_expr_eval(o, x->u.binary.left,
415 bol, eol, ctx, 0) ||
416 match_expr_eval(o, x->u.binary.right,
417 bol, eol, ctx, 0));
418 h = match_expr_eval(o, x->u.binary.left, bol, eol, ctx, 0);
419 x->u.binary.left->hit |= h;
420 h |= match_expr_eval(o, x->u.binary.right, bol, eol, ctx, 1);
421 break;
422 default:
d7530708 423 die("Unexpected node type (internal error) %d", x->node);
83b5d2f5 424 }
0ab7befa
JH
425 if (collect_hits)
426 x->hit |= h;
427 return h;
83b5d2f5
JH
428}
429
480c1ca6 430static int match_expr(struct grep_opt *opt, char *bol, char *eol,
0ab7befa 431 enum grep_context ctx, int collect_hits)
83b5d2f5
JH
432{
433 struct grep_expr *x = opt->pattern_expression;
0ab7befa 434 return match_expr_eval(opt, x, bol, eol, ctx, collect_hits);
83b5d2f5
JH
435}
436
480c1ca6 437static int match_line(struct grep_opt *opt, char *bol, char *eol,
0ab7befa 438 enum grep_context ctx, int collect_hits)
83b5d2f5
JH
439{
440 struct grep_pat *p;
441 if (opt->extended)
0ab7befa
JH
442 return match_expr(opt, bol, eol, ctx, collect_hits);
443
444 /* we do not call with collect_hits without being extended */
83b5d2f5 445 for (p = opt->pattern_list; p; p = p->next) {
480c1ca6 446 if (match_one_pattern(opt, p, bol, eol, ctx))
83b5d2f5
JH
447 return 1;
448 }
449 return 0;
450}
451
0ab7befa
JH
452static int grep_buffer_1(struct grep_opt *opt, const char *name,
453 char *buf, unsigned long size, int collect_hits)
83b5d2f5
JH
454{
455 char *bol = buf;
456 unsigned long left = size;
457 unsigned lno = 1;
458 struct pre_context_line {
459 char *bol;
460 char *eol;
461 } *prev = NULL, *pcl;
462 unsigned last_hit = 0;
463 unsigned last_shown = 0;
464 int binary_match_only = 0;
465 const char *hunk_mark = "";
466 unsigned count = 0;
480c1ca6 467 enum grep_context ctx = GREP_CONTEXT_HEAD;
83b5d2f5
JH
468
469 if (buffer_is_binary(buf, size)) {
470 switch (opt->binary) {
471 case GREP_BINARY_DEFAULT:
472 binary_match_only = 1;
473 break;
474 case GREP_BINARY_NOMATCH:
475 return 0; /* Assume unmatch */
476 break;
477 default:
478 break;
479 }
480 }
481
482 if (opt->pre_context)
483 prev = xcalloc(opt->pre_context, sizeof(*prev));
484 if (opt->pre_context || opt->post_context)
485 hunk_mark = "--\n";
486
487 while (left) {
488 char *eol, ch;
0ab7befa 489 int hit;
83b5d2f5
JH
490
491 eol = end_of_line(bol, &left);
492 ch = *eol;
493 *eol = 0;
494
480c1ca6
JH
495 if ((ctx == GREP_CONTEXT_HEAD) && (eol == bol))
496 ctx = GREP_CONTEXT_BODY;
497
0ab7befa 498 hit = match_line(opt, bol, eol, ctx, collect_hits);
83b5d2f5
JH
499 *eol = ch;
500
0ab7befa
JH
501 if (collect_hits)
502 goto next_line;
503
83b5d2f5
JH
504 /* "grep -v -e foo -e bla" should list lines
505 * that do not have either, so inversion should
506 * be done outside.
507 */
508 if (opt->invert)
509 hit = !hit;
510 if (opt->unmatch_name_only) {
511 if (hit)
512 return 0;
513 goto next_line;
514 }
515 if (hit) {
516 count++;
517 if (opt->status_only)
518 return 1;
519 if (binary_match_only) {
520 printf("Binary file %s matches\n", name);
521 return 1;
522 }
523 if (opt->name_only) {
83caecca 524 show_name(opt, name);
83b5d2f5
JH
525 return 1;
526 }
527 /* Hit at this line. If we haven't shown the
528 * pre-context lines, we would need to show them.
529 * When asked to do "count", this still show
530 * the context which is nonsense, but the user
531 * deserves to get that ;-).
532 */
533 if (opt->pre_context) {
534 unsigned from;
535 if (opt->pre_context < lno)
536 from = lno - opt->pre_context;
537 else
538 from = 1;
539 if (from <= last_shown)
540 from = last_shown + 1;
541 if (last_shown && from != last_shown + 1)
9db56f71 542 fputs(hunk_mark, stdout);
83b5d2f5
JH
543 while (from < lno) {
544 pcl = &prev[lno-from-1];
545 show_line(opt, pcl->bol, pcl->eol,
546 name, from, '-');
547 from++;
548 }
549 last_shown = lno-1;
550 }
551 if (last_shown && lno != last_shown + 1)
9db56f71 552 fputs(hunk_mark, stdout);
83b5d2f5
JH
553 if (!opt->count)
554 show_line(opt, bol, eol, name, lno, ':');
555 last_shown = last_hit = lno;
556 }
557 else if (last_hit &&
558 lno <= last_hit + opt->post_context) {
559 /* If the last hit is within the post context,
560 * we need to show this line.
561 */
562 if (last_shown && lno != last_shown + 1)
9db56f71 563 fputs(hunk_mark, stdout);
83b5d2f5
JH
564 show_line(opt, bol, eol, name, lno, '-');
565 last_shown = lno;
566 }
567 if (opt->pre_context) {
568 memmove(prev+1, prev,
569 (opt->pre_context-1) * sizeof(*prev));
570 prev->bol = bol;
571 prev->eol = eol;
572 }
573
574 next_line:
575 bol = eol + 1;
576 if (!left)
577 break;
578 left--;
579 lno++;
580 }
581
b48fb5b6 582 free(prev);
0ab7befa
JH
583 if (collect_hits)
584 return 0;
b48fb5b6 585
83b5d2f5
JH
586 if (opt->status_only)
587 return 0;
588 if (opt->unmatch_name_only) {
589 /* We did not see any hit, so we want to show this */
83caecca 590 show_name(opt, name);
83b5d2f5
JH
591 return 1;
592 }
593
594 /* NEEDSWORK:
595 * The real "grep -c foo *.c" gives many "bar.c:0" lines,
596 * which feels mostly useless but sometimes useful. Maybe
597 * make it another option? For now suppress them.
598 */
599 if (opt->count && count)
83caecca
RZ
600 printf("%s%c%u\n", name,
601 opt->null_following_name ? '\0' : ':', count);
83b5d2f5
JH
602 return !!last_hit;
603}
604
0ab7befa
JH
605static void clr_hit_marker(struct grep_expr *x)
606{
607 /* All-hit markers are meaningful only at the very top level
608 * OR node.
609 */
610 while (1) {
611 x->hit = 0;
612 if (x->node != GREP_NODE_OR)
613 return;
614 x->u.binary.left->hit = 0;
615 x = x->u.binary.right;
616 }
617}
618
619static int chk_hit_marker(struct grep_expr *x)
620{
621 /* Top level nodes have hit markers. See if they all are hits */
622 while (1) {
623 if (x->node != GREP_NODE_OR)
624 return x->hit;
625 if (!x->u.binary.left->hit)
626 return 0;
627 x = x->u.binary.right;
628 }
629}
630
631int grep_buffer(struct grep_opt *opt, const char *name, char *buf, unsigned long size)
632{
633 /*
634 * we do not have to do the two-pass grep when we do not check
635 * buffer-wide "all-match".
636 */
637 if (!opt->all_match)
638 return grep_buffer_1(opt, name, buf, size, 0);
639
640 /* Otherwise the toplevel "or" terms hit a bit differently.
641 * We first clear hit markers from them.
642 */
643 clr_hit_marker(opt->pattern_expression);
644 grep_buffer_1(opt, name, buf, size, 1);
645
646 if (!chk_hit_marker(opt->pattern_expression))
647 return 0;
648
649 return grep_buffer_1(opt, name, buf, size, 0);
650}