]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Builtin "git grep" | |
3 | * | |
4 | * Copyright (c) 2006 Junio C Hamano | |
5 | */ | |
6 | #define USE_THE_INDEX_COMPATIBILITY_MACROS | |
7 | #include "cache.h" | |
8 | #include "repository.h" | |
9 | #include "config.h" | |
10 | #include "blob.h" | |
11 | #include "tree.h" | |
12 | #include "commit.h" | |
13 | #include "tag.h" | |
14 | #include "tree-walk.h" | |
15 | #include "builtin.h" | |
16 | #include "parse-options.h" | |
17 | #include "string-list.h" | |
18 | #include "run-command.h" | |
19 | #include "userdiff.h" | |
20 | #include "grep.h" | |
21 | #include "quote.h" | |
22 | #include "dir.h" | |
23 | #include "pathspec.h" | |
24 | #include "submodule.h" | |
25 | #include "submodule-config.h" | |
26 | #include "object-store.h" | |
27 | ||
28 | static char const * const grep_usage[] = { | |
29 | N_("git grep [<options>] [-e] <pattern> [<rev>...] [[--] <path>...]"), | |
30 | NULL | |
31 | }; | |
32 | ||
33 | static int recurse_submodules; | |
34 | ||
35 | #define GREP_NUM_THREADS_DEFAULT 8 | |
36 | static int num_threads; | |
37 | ||
38 | static pthread_t *threads; | |
39 | ||
40 | /* We use one producer thread and THREADS consumer | |
41 | * threads. The producer adds struct work_items to 'todo' and the | |
42 | * consumers pick work items from the same array. | |
43 | */ | |
44 | struct work_item { | |
45 | struct grep_source source; | |
46 | char done; | |
47 | struct strbuf out; | |
48 | }; | |
49 | ||
50 | /* In the range [todo_done, todo_start) in 'todo' we have work_items | |
51 | * that have been or are processed by a consumer thread. We haven't | |
52 | * written the result for these to stdout yet. | |
53 | * | |
54 | * The work_items in [todo_start, todo_end) are waiting to be picked | |
55 | * up by a consumer thread. | |
56 | * | |
57 | * The ranges are modulo TODO_SIZE. | |
58 | */ | |
59 | #define TODO_SIZE 128 | |
60 | static struct work_item todo[TODO_SIZE]; | |
61 | static int todo_start; | |
62 | static int todo_end; | |
63 | static int todo_done; | |
64 | ||
65 | /* Has all work items been added? */ | |
66 | static int all_work_added; | |
67 | ||
68 | /* This lock protects all the variables above. */ | |
69 | static pthread_mutex_t grep_mutex; | |
70 | ||
71 | static inline void grep_lock(void) | |
72 | { | |
73 | pthread_mutex_lock(&grep_mutex); | |
74 | } | |
75 | ||
76 | static inline void grep_unlock(void) | |
77 | { | |
78 | pthread_mutex_unlock(&grep_mutex); | |
79 | } | |
80 | ||
81 | /* Signalled when a new work_item is added to todo. */ | |
82 | static pthread_cond_t cond_add; | |
83 | ||
84 | /* Signalled when the result from one work_item is written to | |
85 | * stdout. | |
86 | */ | |
87 | static pthread_cond_t cond_write; | |
88 | ||
89 | /* Signalled when we are finished with everything. */ | |
90 | static pthread_cond_t cond_result; | |
91 | ||
92 | static int skip_first_line; | |
93 | ||
94 | static void add_work(struct grep_opt *opt, const struct grep_source *gs) | |
95 | { | |
96 | grep_lock(); | |
97 | ||
98 | while ((todo_end+1) % ARRAY_SIZE(todo) == todo_done) { | |
99 | pthread_cond_wait(&cond_write, &grep_mutex); | |
100 | } | |
101 | ||
102 | todo[todo_end].source = *gs; | |
103 | if (opt->binary != GREP_BINARY_TEXT) | |
104 | grep_source_load_driver(&todo[todo_end].source, | |
105 | opt->repo->index); | |
106 | todo[todo_end].done = 0; | |
107 | strbuf_reset(&todo[todo_end].out); | |
108 | todo_end = (todo_end + 1) % ARRAY_SIZE(todo); | |
109 | ||
110 | pthread_cond_signal(&cond_add); | |
111 | grep_unlock(); | |
112 | } | |
113 | ||
114 | static struct work_item *get_work(void) | |
115 | { | |
116 | struct work_item *ret; | |
117 | ||
118 | grep_lock(); | |
119 | while (todo_start == todo_end && !all_work_added) { | |
120 | pthread_cond_wait(&cond_add, &grep_mutex); | |
121 | } | |
122 | ||
123 | if (todo_start == todo_end && all_work_added) { | |
124 | ret = NULL; | |
125 | } else { | |
126 | ret = &todo[todo_start]; | |
127 | todo_start = (todo_start + 1) % ARRAY_SIZE(todo); | |
128 | } | |
129 | grep_unlock(); | |
130 | return ret; | |
131 | } | |
132 | ||
133 | static void work_done(struct work_item *w) | |
134 | { | |
135 | int old_done; | |
136 | ||
137 | grep_lock(); | |
138 | w->done = 1; | |
139 | old_done = todo_done; | |
140 | for(; todo[todo_done].done && todo_done != todo_start; | |
141 | todo_done = (todo_done+1) % ARRAY_SIZE(todo)) { | |
142 | w = &todo[todo_done]; | |
143 | if (w->out.len) { | |
144 | const char *p = w->out.buf; | |
145 | size_t len = w->out.len; | |
146 | ||
147 | /* Skip the leading hunk mark of the first file. */ | |
148 | if (skip_first_line) { | |
149 | while (len) { | |
150 | len--; | |
151 | if (*p++ == '\n') | |
152 | break; | |
153 | } | |
154 | skip_first_line = 0; | |
155 | } | |
156 | ||
157 | write_or_die(1, p, len); | |
158 | } | |
159 | grep_source_clear(&w->source); | |
160 | } | |
161 | ||
162 | if (old_done != todo_done) | |
163 | pthread_cond_signal(&cond_write); | |
164 | ||
165 | if (all_work_added && todo_done == todo_end) | |
166 | pthread_cond_signal(&cond_result); | |
167 | ||
168 | grep_unlock(); | |
169 | } | |
170 | ||
171 | static void *run(void *arg) | |
172 | { | |
173 | int hit = 0; | |
174 | struct grep_opt *opt = arg; | |
175 | ||
176 | while (1) { | |
177 | struct work_item *w = get_work(); | |
178 | if (!w) | |
179 | break; | |
180 | ||
181 | opt->output_priv = w; | |
182 | hit |= grep_source(opt, &w->source); | |
183 | grep_source_clear_data(&w->source); | |
184 | work_done(w); | |
185 | } | |
186 | free_grep_patterns(arg); | |
187 | free(arg); | |
188 | ||
189 | return (void*) (intptr_t) hit; | |
190 | } | |
191 | ||
192 | static void strbuf_out(struct grep_opt *opt, const void *buf, size_t size) | |
193 | { | |
194 | struct work_item *w = opt->output_priv; | |
195 | strbuf_add(&w->out, buf, size); | |
196 | } | |
197 | ||
198 | static void start_threads(struct grep_opt *opt) | |
199 | { | |
200 | int i; | |
201 | ||
202 | pthread_mutex_init(&grep_mutex, NULL); | |
203 | pthread_mutex_init(&grep_read_mutex, NULL); | |
204 | pthread_mutex_init(&grep_attr_mutex, NULL); | |
205 | pthread_cond_init(&cond_add, NULL); | |
206 | pthread_cond_init(&cond_write, NULL); | |
207 | pthread_cond_init(&cond_result, NULL); | |
208 | grep_use_locks = 1; | |
209 | ||
210 | for (i = 0; i < ARRAY_SIZE(todo); i++) { | |
211 | strbuf_init(&todo[i].out, 0); | |
212 | } | |
213 | ||
214 | threads = xcalloc(num_threads, sizeof(*threads)); | |
215 | for (i = 0; i < num_threads; i++) { | |
216 | int err; | |
217 | struct grep_opt *o = grep_opt_dup(opt); | |
218 | o->output = strbuf_out; | |
219 | if (i) | |
220 | o->debug = 0; | |
221 | compile_grep_patterns(o); | |
222 | err = pthread_create(&threads[i], NULL, run, o); | |
223 | ||
224 | if (err) | |
225 | die(_("grep: failed to create thread: %s"), | |
226 | strerror(err)); | |
227 | } | |
228 | } | |
229 | ||
230 | static int wait_all(void) | |
231 | { | |
232 | int hit = 0; | |
233 | int i; | |
234 | ||
235 | if (!HAVE_THREADS) | |
236 | BUG("Never call this function unless you have started threads"); | |
237 | ||
238 | grep_lock(); | |
239 | all_work_added = 1; | |
240 | ||
241 | /* Wait until all work is done. */ | |
242 | while (todo_done != todo_end) | |
243 | pthread_cond_wait(&cond_result, &grep_mutex); | |
244 | ||
245 | /* Wake up all the consumer threads so they can see that there | |
246 | * is no more work to do. | |
247 | */ | |
248 | pthread_cond_broadcast(&cond_add); | |
249 | grep_unlock(); | |
250 | ||
251 | for (i = 0; i < num_threads; i++) { | |
252 | void *h; | |
253 | pthread_join(threads[i], &h); | |
254 | hit |= (int) (intptr_t) h; | |
255 | } | |
256 | ||
257 | free(threads); | |
258 | ||
259 | pthread_mutex_destroy(&grep_mutex); | |
260 | pthread_mutex_destroy(&grep_read_mutex); | |
261 | pthread_mutex_destroy(&grep_attr_mutex); | |
262 | pthread_cond_destroy(&cond_add); | |
263 | pthread_cond_destroy(&cond_write); | |
264 | pthread_cond_destroy(&cond_result); | |
265 | grep_use_locks = 0; | |
266 | ||
267 | return hit; | |
268 | } | |
269 | ||
270 | static int grep_cmd_config(const char *var, const char *value, void *cb) | |
271 | { | |
272 | int st = grep_config(var, value, cb); | |
273 | if (git_color_default_config(var, value, cb) < 0) | |
274 | st = -1; | |
275 | ||
276 | if (!strcmp(var, "grep.threads")) { | |
277 | num_threads = git_config_int(var, value); | |
278 | if (num_threads < 0) | |
279 | die(_("invalid number of threads specified (%d) for %s"), | |
280 | num_threads, var); | |
281 | else if (!HAVE_THREADS && num_threads > 1) { | |
282 | /* | |
283 | * TRANSLATORS: %s is the configuration | |
284 | * variable for tweaking threads, currently | |
285 | * grep.threads | |
286 | */ | |
287 | warning(_("no threads support, ignoring %s"), var); | |
288 | num_threads = 1; | |
289 | } | |
290 | } | |
291 | ||
292 | if (!strcmp(var, "submodule.recurse")) | |
293 | recurse_submodules = git_config_bool(var, value); | |
294 | ||
295 | return st; | |
296 | } | |
297 | ||
298 | static void *lock_and_read_oid_file(const struct object_id *oid, enum object_type *type, unsigned long *size) | |
299 | { | |
300 | void *data; | |
301 | ||
302 | grep_read_lock(); | |
303 | data = read_object_file(oid, type, size); | |
304 | grep_read_unlock(); | |
305 | return data; | |
306 | } | |
307 | ||
308 | static int grep_oid(struct grep_opt *opt, const struct object_id *oid, | |
309 | const char *filename, int tree_name_len, | |
310 | const char *path) | |
311 | { | |
312 | struct strbuf pathbuf = STRBUF_INIT; | |
313 | struct grep_source gs; | |
314 | ||
315 | if (opt->relative && opt->prefix_length) { | |
316 | quote_path_relative(filename + tree_name_len, opt->prefix, &pathbuf); | |
317 | strbuf_insert(&pathbuf, 0, filename, tree_name_len); | |
318 | } else { | |
319 | strbuf_addstr(&pathbuf, filename); | |
320 | } | |
321 | ||
322 | grep_source_init(&gs, GREP_SOURCE_OID, pathbuf.buf, path, oid); | |
323 | strbuf_release(&pathbuf); | |
324 | ||
325 | if (num_threads > 1) { | |
326 | /* | |
327 | * add_work() copies gs and thus assumes ownership of | |
328 | * its fields, so do not call grep_source_clear() | |
329 | */ | |
330 | add_work(opt, &gs); | |
331 | return 0; | |
332 | } else { | |
333 | int hit; | |
334 | ||
335 | hit = grep_source(opt, &gs); | |
336 | ||
337 | grep_source_clear(&gs); | |
338 | return hit; | |
339 | } | |
340 | } | |
341 | ||
342 | static int grep_file(struct grep_opt *opt, const char *filename) | |
343 | { | |
344 | struct strbuf buf = STRBUF_INIT; | |
345 | struct grep_source gs; | |
346 | ||
347 | if (opt->relative && opt->prefix_length) | |
348 | quote_path_relative(filename, opt->prefix, &buf); | |
349 | else | |
350 | strbuf_addstr(&buf, filename); | |
351 | ||
352 | grep_source_init(&gs, GREP_SOURCE_FILE, buf.buf, filename, filename); | |
353 | strbuf_release(&buf); | |
354 | ||
355 | if (num_threads > 1) { | |
356 | /* | |
357 | * add_work() copies gs and thus assumes ownership of | |
358 | * its fields, so do not call grep_source_clear() | |
359 | */ | |
360 | add_work(opt, &gs); | |
361 | return 0; | |
362 | } else { | |
363 | int hit; | |
364 | ||
365 | hit = grep_source(opt, &gs); | |
366 | ||
367 | grep_source_clear(&gs); | |
368 | return hit; | |
369 | } | |
370 | } | |
371 | ||
372 | static void append_path(struct grep_opt *opt, const void *data, size_t len) | |
373 | { | |
374 | struct string_list *path_list = opt->output_priv; | |
375 | ||
376 | if (len == 1 && *(const char *)data == '\0') | |
377 | return; | |
378 | string_list_append(path_list, xstrndup(data, len)); | |
379 | } | |
380 | ||
381 | static void run_pager(struct grep_opt *opt, const char *prefix) | |
382 | { | |
383 | struct string_list *path_list = opt->output_priv; | |
384 | struct child_process child = CHILD_PROCESS_INIT; | |
385 | int i, status; | |
386 | ||
387 | for (i = 0; i < path_list->nr; i++) | |
388 | argv_array_push(&child.args, path_list->items[i].string); | |
389 | child.dir = prefix; | |
390 | child.use_shell = 1; | |
391 | ||
392 | status = run_command(&child); | |
393 | if (status) | |
394 | exit(status); | |
395 | } | |
396 | ||
397 | static int grep_cache(struct grep_opt *opt, | |
398 | const struct pathspec *pathspec, int cached); | |
399 | static int grep_tree(struct grep_opt *opt, const struct pathspec *pathspec, | |
400 | struct tree_desc *tree, struct strbuf *base, int tn_len, | |
401 | int check_attr); | |
402 | ||
403 | static int grep_submodule(struct grep_opt *opt, | |
404 | const struct pathspec *pathspec, | |
405 | const struct object_id *oid, | |
406 | const char *filename, const char *path, int cached) | |
407 | { | |
408 | struct repository subrepo; | |
409 | struct repository *superproject = opt->repo; | |
410 | const struct submodule *sub = submodule_from_path(superproject, | |
411 | &null_oid, path); | |
412 | struct grep_opt subopt; | |
413 | int hit; | |
414 | ||
415 | /* | |
416 | * NEEDSWORK: submodules functions need to be protected because they | |
417 | * access the object store via config_from_gitmodules(): the latter | |
418 | * uses get_oid() which, for now, relies on the global the_repository | |
419 | * object. | |
420 | */ | |
421 | grep_read_lock(); | |
422 | ||
423 | if (!is_submodule_active(superproject, path)) { | |
424 | grep_read_unlock(); | |
425 | return 0; | |
426 | } | |
427 | ||
428 | if (repo_submodule_init(&subrepo, superproject, sub)) { | |
429 | grep_read_unlock(); | |
430 | return 0; | |
431 | } | |
432 | ||
433 | repo_read_gitmodules(&subrepo); | |
434 | ||
435 | /* | |
436 | * NEEDSWORK: This adds the submodule's object directory to the list of | |
437 | * alternates for the single in-memory object store. This has some bad | |
438 | * consequences for memory (processed objects will never be freed) and | |
439 | * performance (this increases the number of pack files git has to pay | |
440 | * attention to, to the sum of the number of pack files in all the | |
441 | * repositories processed so far). This can be removed once the object | |
442 | * store is no longer global and instead is a member of the repository | |
443 | * object. | |
444 | */ | |
445 | add_to_alternates_memory(subrepo.objects->odb->path); | |
446 | grep_read_unlock(); | |
447 | ||
448 | memcpy(&subopt, opt, sizeof(subopt)); | |
449 | subopt.repo = &subrepo; | |
450 | ||
451 | if (oid) { | |
452 | struct object *object; | |
453 | struct tree_desc tree; | |
454 | void *data; | |
455 | unsigned long size; | |
456 | struct strbuf base = STRBUF_INIT; | |
457 | ||
458 | object = parse_object_or_die(oid, oid_to_hex(oid)); | |
459 | ||
460 | grep_read_lock(); | |
461 | data = read_object_with_reference(&subrepo, | |
462 | &object->oid, tree_type, | |
463 | &size, NULL); | |
464 | grep_read_unlock(); | |
465 | ||
466 | if (!data) | |
467 | die(_("unable to read tree (%s)"), oid_to_hex(&object->oid)); | |
468 | ||
469 | strbuf_addstr(&base, filename); | |
470 | strbuf_addch(&base, '/'); | |
471 | ||
472 | init_tree_desc(&tree, data, size); | |
473 | hit = grep_tree(&subopt, pathspec, &tree, &base, base.len, | |
474 | object->type == OBJ_COMMIT); | |
475 | strbuf_release(&base); | |
476 | free(data); | |
477 | } else { | |
478 | hit = grep_cache(&subopt, pathspec, cached); | |
479 | } | |
480 | ||
481 | repo_clear(&subrepo); | |
482 | return hit; | |
483 | } | |
484 | ||
485 | static int grep_cache(struct grep_opt *opt, | |
486 | const struct pathspec *pathspec, int cached) | |
487 | { | |
488 | struct repository *repo = opt->repo; | |
489 | int hit = 0; | |
490 | int nr; | |
491 | struct strbuf name = STRBUF_INIT; | |
492 | int name_base_len = 0; | |
493 | if (repo->submodule_prefix) { | |
494 | name_base_len = strlen(repo->submodule_prefix); | |
495 | strbuf_addstr(&name, repo->submodule_prefix); | |
496 | } | |
497 | ||
498 | if (repo_read_index(repo) < 0) | |
499 | die(_("index file corrupt")); | |
500 | ||
501 | for (nr = 0; nr < repo->index->cache_nr; nr++) { | |
502 | const struct cache_entry *ce = repo->index->cache[nr]; | |
503 | strbuf_setlen(&name, name_base_len); | |
504 | strbuf_addstr(&name, ce->name); | |
505 | ||
506 | if (S_ISREG(ce->ce_mode) && | |
507 | match_pathspec(repo->index, pathspec, name.buf, name.len, 0, NULL, | |
508 | S_ISDIR(ce->ce_mode) || | |
509 | S_ISGITLINK(ce->ce_mode))) { | |
510 | /* | |
511 | * If CE_VALID is on, we assume worktree file and its | |
512 | * cache entry are identical, even if worktree file has | |
513 | * been modified, so use cache version instead | |
514 | */ | |
515 | if (cached || (ce->ce_flags & CE_VALID) || | |
516 | ce_skip_worktree(ce)) { | |
517 | if (ce_stage(ce) || ce_intent_to_add(ce)) | |
518 | continue; | |
519 | hit |= grep_oid(opt, &ce->oid, name.buf, | |
520 | 0, name.buf); | |
521 | } else { | |
522 | hit |= grep_file(opt, name.buf); | |
523 | } | |
524 | } else if (recurse_submodules && S_ISGITLINK(ce->ce_mode) && | |
525 | submodule_path_match(repo->index, pathspec, name.buf, NULL)) { | |
526 | hit |= grep_submodule(opt, pathspec, NULL, ce->name, | |
527 | ce->name, cached); | |
528 | } else { | |
529 | continue; | |
530 | } | |
531 | ||
532 | if (ce_stage(ce)) { | |
533 | do { | |
534 | nr++; | |
535 | } while (nr < repo->index->cache_nr && | |
536 | !strcmp(ce->name, repo->index->cache[nr]->name)); | |
537 | nr--; /* compensate for loop control */ | |
538 | } | |
539 | if (hit && opt->status_only) | |
540 | break; | |
541 | } | |
542 | ||
543 | strbuf_release(&name); | |
544 | return hit; | |
545 | } | |
546 | ||
547 | static int grep_tree(struct grep_opt *opt, const struct pathspec *pathspec, | |
548 | struct tree_desc *tree, struct strbuf *base, int tn_len, | |
549 | int check_attr) | |
550 | { | |
551 | struct repository *repo = opt->repo; | |
552 | int hit = 0; | |
553 | enum interesting match = entry_not_interesting; | |
554 | struct name_entry entry; | |
555 | int old_baselen = base->len; | |
556 | struct strbuf name = STRBUF_INIT; | |
557 | int name_base_len = 0; | |
558 | if (repo->submodule_prefix) { | |
559 | strbuf_addstr(&name, repo->submodule_prefix); | |
560 | name_base_len = name.len; | |
561 | } | |
562 | ||
563 | while (tree_entry(tree, &entry)) { | |
564 | int te_len = tree_entry_len(&entry); | |
565 | ||
566 | if (match != all_entries_interesting) { | |
567 | strbuf_addstr(&name, base->buf + tn_len); | |
568 | match = tree_entry_interesting(repo->index, | |
569 | &entry, &name, | |
570 | 0, pathspec); | |
571 | strbuf_setlen(&name, name_base_len); | |
572 | ||
573 | if (match == all_entries_not_interesting) | |
574 | break; | |
575 | if (match == entry_not_interesting) | |
576 | continue; | |
577 | } | |
578 | ||
579 | strbuf_add(base, entry.path, te_len); | |
580 | ||
581 | if (S_ISREG(entry.mode)) { | |
582 | hit |= grep_oid(opt, &entry.oid, base->buf, tn_len, | |
583 | check_attr ? base->buf + tn_len : NULL); | |
584 | } else if (S_ISDIR(entry.mode)) { | |
585 | enum object_type type; | |
586 | struct tree_desc sub; | |
587 | void *data; | |
588 | unsigned long size; | |
589 | ||
590 | data = lock_and_read_oid_file(&entry.oid, &type, &size); | |
591 | if (!data) | |
592 | die(_("unable to read tree (%s)"), | |
593 | oid_to_hex(&entry.oid)); | |
594 | ||
595 | strbuf_addch(base, '/'); | |
596 | init_tree_desc(&sub, data, size); | |
597 | hit |= grep_tree(opt, pathspec, &sub, base, tn_len, | |
598 | check_attr); | |
599 | free(data); | |
600 | } else if (recurse_submodules && S_ISGITLINK(entry.mode)) { | |
601 | hit |= grep_submodule(opt, pathspec, &entry.oid, | |
602 | base->buf, base->buf + tn_len, | |
603 | 1); /* ignored */ | |
604 | } | |
605 | ||
606 | strbuf_setlen(base, old_baselen); | |
607 | ||
608 | if (hit && opt->status_only) | |
609 | break; | |
610 | } | |
611 | ||
612 | strbuf_release(&name); | |
613 | return hit; | |
614 | } | |
615 | ||
616 | static int grep_object(struct grep_opt *opt, const struct pathspec *pathspec, | |
617 | struct object *obj, const char *name, const char *path) | |
618 | { | |
619 | if (obj->type == OBJ_BLOB) | |
620 | return grep_oid(opt, &obj->oid, name, 0, path); | |
621 | if (obj->type == OBJ_COMMIT || obj->type == OBJ_TREE) { | |
622 | struct tree_desc tree; | |
623 | void *data; | |
624 | unsigned long size; | |
625 | struct strbuf base; | |
626 | int hit, len; | |
627 | ||
628 | grep_read_lock(); | |
629 | data = read_object_with_reference(opt->repo, | |
630 | &obj->oid, tree_type, | |
631 | &size, NULL); | |
632 | grep_read_unlock(); | |
633 | ||
634 | if (!data) | |
635 | die(_("unable to read tree (%s)"), oid_to_hex(&obj->oid)); | |
636 | ||
637 | len = name ? strlen(name) : 0; | |
638 | strbuf_init(&base, PATH_MAX + len + 1); | |
639 | if (len) { | |
640 | strbuf_add(&base, name, len); | |
641 | strbuf_addch(&base, ':'); | |
642 | } | |
643 | init_tree_desc(&tree, data, size); | |
644 | hit = grep_tree(opt, pathspec, &tree, &base, base.len, | |
645 | obj->type == OBJ_COMMIT); | |
646 | strbuf_release(&base); | |
647 | free(data); | |
648 | return hit; | |
649 | } | |
650 | die(_("unable to grep from object of type %s"), type_name(obj->type)); | |
651 | } | |
652 | ||
653 | static int grep_objects(struct grep_opt *opt, const struct pathspec *pathspec, | |
654 | const struct object_array *list) | |
655 | { | |
656 | unsigned int i; | |
657 | int hit = 0; | |
658 | const unsigned int nr = list->nr; | |
659 | ||
660 | for (i = 0; i < nr; i++) { | |
661 | struct object *real_obj; | |
662 | real_obj = deref_tag(opt->repo, list->objects[i].item, | |
663 | NULL, 0); | |
664 | ||
665 | /* load the gitmodules file for this rev */ | |
666 | if (recurse_submodules) { | |
667 | submodule_free(opt->repo); | |
668 | gitmodules_config_oid(&real_obj->oid); | |
669 | } | |
670 | if (grep_object(opt, pathspec, real_obj, list->objects[i].name, | |
671 | list->objects[i].path)) { | |
672 | hit = 1; | |
673 | if (opt->status_only) | |
674 | break; | |
675 | } | |
676 | } | |
677 | return hit; | |
678 | } | |
679 | ||
680 | static int grep_directory(struct grep_opt *opt, const struct pathspec *pathspec, | |
681 | int exc_std, int use_index) | |
682 | { | |
683 | struct dir_struct dir; | |
684 | int i, hit = 0; | |
685 | ||
686 | memset(&dir, 0, sizeof(dir)); | |
687 | if (!use_index) | |
688 | dir.flags |= DIR_NO_GITLINKS; | |
689 | if (exc_std) | |
690 | setup_standard_excludes(&dir); | |
691 | ||
692 | fill_directory(&dir, opt->repo->index, pathspec); | |
693 | for (i = 0; i < dir.nr; i++) { | |
694 | if (!dir_path_match(opt->repo->index, dir.entries[i], pathspec, 0, NULL)) | |
695 | continue; | |
696 | hit |= grep_file(opt, dir.entries[i]->name); | |
697 | if (hit && opt->status_only) | |
698 | break; | |
699 | } | |
700 | return hit; | |
701 | } | |
702 | ||
703 | static int context_callback(const struct option *opt, const char *arg, | |
704 | int unset) | |
705 | { | |
706 | struct grep_opt *grep_opt = opt->value; | |
707 | int value; | |
708 | const char *endp; | |
709 | ||
710 | if (unset) { | |
711 | grep_opt->pre_context = grep_opt->post_context = 0; | |
712 | return 0; | |
713 | } | |
714 | value = strtol(arg, (char **)&endp, 10); | |
715 | if (*endp) { | |
716 | return error(_("switch `%c' expects a numerical value"), | |
717 | opt->short_name); | |
718 | } | |
719 | grep_opt->pre_context = grep_opt->post_context = value; | |
720 | return 0; | |
721 | } | |
722 | ||
723 | static int file_callback(const struct option *opt, const char *arg, int unset) | |
724 | { | |
725 | struct grep_opt *grep_opt = opt->value; | |
726 | int from_stdin; | |
727 | FILE *patterns; | |
728 | int lno = 0; | |
729 | struct strbuf sb = STRBUF_INIT; | |
730 | ||
731 | BUG_ON_OPT_NEG(unset); | |
732 | ||
733 | from_stdin = !strcmp(arg, "-"); | |
734 | patterns = from_stdin ? stdin : fopen(arg, "r"); | |
735 | if (!patterns) | |
736 | die_errno(_("cannot open '%s'"), arg); | |
737 | while (strbuf_getline(&sb, patterns) == 0) { | |
738 | /* ignore empty line like grep does */ | |
739 | if (sb.len == 0) | |
740 | continue; | |
741 | ||
742 | append_grep_pat(grep_opt, sb.buf, sb.len, arg, ++lno, | |
743 | GREP_PATTERN); | |
744 | } | |
745 | if (!from_stdin) | |
746 | fclose(patterns); | |
747 | strbuf_release(&sb); | |
748 | return 0; | |
749 | } | |
750 | ||
751 | static int not_callback(const struct option *opt, const char *arg, int unset) | |
752 | { | |
753 | struct grep_opt *grep_opt = opt->value; | |
754 | BUG_ON_OPT_NEG(unset); | |
755 | BUG_ON_OPT_ARG(arg); | |
756 | append_grep_pattern(grep_opt, "--not", "command line", 0, GREP_NOT); | |
757 | return 0; | |
758 | } | |
759 | ||
760 | static int and_callback(const struct option *opt, const char *arg, int unset) | |
761 | { | |
762 | struct grep_opt *grep_opt = opt->value; | |
763 | BUG_ON_OPT_NEG(unset); | |
764 | BUG_ON_OPT_ARG(arg); | |
765 | append_grep_pattern(grep_opt, "--and", "command line", 0, GREP_AND); | |
766 | return 0; | |
767 | } | |
768 | ||
769 | static int open_callback(const struct option *opt, const char *arg, int unset) | |
770 | { | |
771 | struct grep_opt *grep_opt = opt->value; | |
772 | BUG_ON_OPT_NEG(unset); | |
773 | BUG_ON_OPT_ARG(arg); | |
774 | append_grep_pattern(grep_opt, "(", "command line", 0, GREP_OPEN_PAREN); | |
775 | return 0; | |
776 | } | |
777 | ||
778 | static int close_callback(const struct option *opt, const char *arg, int unset) | |
779 | { | |
780 | struct grep_opt *grep_opt = opt->value; | |
781 | BUG_ON_OPT_NEG(unset); | |
782 | BUG_ON_OPT_ARG(arg); | |
783 | append_grep_pattern(grep_opt, ")", "command line", 0, GREP_CLOSE_PAREN); | |
784 | return 0; | |
785 | } | |
786 | ||
787 | static int pattern_callback(const struct option *opt, const char *arg, | |
788 | int unset) | |
789 | { | |
790 | struct grep_opt *grep_opt = opt->value; | |
791 | BUG_ON_OPT_NEG(unset); | |
792 | append_grep_pattern(grep_opt, arg, "-e option", 0, GREP_PATTERN); | |
793 | return 0; | |
794 | } | |
795 | ||
796 | int cmd_grep(int argc, const char **argv, const char *prefix) | |
797 | { | |
798 | int hit = 0; | |
799 | int cached = 0, untracked = 0, opt_exclude = -1; | |
800 | int seen_dashdash = 0; | |
801 | int external_grep_allowed__ignored; | |
802 | const char *show_in_pager = NULL, *default_pager = "dummy"; | |
803 | struct grep_opt opt; | |
804 | struct object_array list = OBJECT_ARRAY_INIT; | |
805 | struct pathspec pathspec; | |
806 | struct string_list path_list = STRING_LIST_INIT_NODUP; | |
807 | int i; | |
808 | int dummy; | |
809 | int use_index = 1; | |
810 | int pattern_type_arg = GREP_PATTERN_TYPE_UNSPECIFIED; | |
811 | int allow_revs; | |
812 | ||
813 | struct option options[] = { | |
814 | OPT_BOOL(0, "cached", &cached, | |
815 | N_("search in index instead of in the work tree")), | |
816 | OPT_NEGBIT(0, "no-index", &use_index, | |
817 | N_("find in contents not managed by git"), 1), | |
818 | OPT_BOOL(0, "untracked", &untracked, | |
819 | N_("search in both tracked and untracked files")), | |
820 | OPT_SET_INT(0, "exclude-standard", &opt_exclude, | |
821 | N_("ignore files specified via '.gitignore'"), 1), | |
822 | OPT_BOOL(0, "recurse-submodules", &recurse_submodules, | |
823 | N_("recursively search in each submodule")), | |
824 | OPT_GROUP(""), | |
825 | OPT_BOOL('v', "invert-match", &opt.invert, | |
826 | N_("show non-matching lines")), | |
827 | OPT_BOOL('i', "ignore-case", &opt.ignore_case, | |
828 | N_("case insensitive matching")), | |
829 | OPT_BOOL('w', "word-regexp", &opt.word_regexp, | |
830 | N_("match patterns only at word boundaries")), | |
831 | OPT_SET_INT('a', "text", &opt.binary, | |
832 | N_("process binary files as text"), GREP_BINARY_TEXT), | |
833 | OPT_SET_INT('I', NULL, &opt.binary, | |
834 | N_("don't match patterns in binary files"), | |
835 | GREP_BINARY_NOMATCH), | |
836 | OPT_BOOL(0, "textconv", &opt.allow_textconv, | |
837 | N_("process binary files with textconv filters")), | |
838 | OPT_SET_INT('r', "recursive", &opt.max_depth, | |
839 | N_("search in subdirectories (default)"), -1), | |
840 | { OPTION_INTEGER, 0, "max-depth", &opt.max_depth, N_("depth"), | |
841 | N_("descend at most <depth> levels"), PARSE_OPT_NONEG, | |
842 | NULL, 1 }, | |
843 | OPT_GROUP(""), | |
844 | OPT_SET_INT('E', "extended-regexp", &pattern_type_arg, | |
845 | N_("use extended POSIX regular expressions"), | |
846 | GREP_PATTERN_TYPE_ERE), | |
847 | OPT_SET_INT('G', "basic-regexp", &pattern_type_arg, | |
848 | N_("use basic POSIX regular expressions (default)"), | |
849 | GREP_PATTERN_TYPE_BRE), | |
850 | OPT_SET_INT('F', "fixed-strings", &pattern_type_arg, | |
851 | N_("interpret patterns as fixed strings"), | |
852 | GREP_PATTERN_TYPE_FIXED), | |
853 | OPT_SET_INT('P', "perl-regexp", &pattern_type_arg, | |
854 | N_("use Perl-compatible regular expressions"), | |
855 | GREP_PATTERN_TYPE_PCRE), | |
856 | OPT_GROUP(""), | |
857 | OPT_BOOL('n', "line-number", &opt.linenum, N_("show line numbers")), | |
858 | OPT_BOOL(0, "column", &opt.columnnum, N_("show column number of first match")), | |
859 | OPT_NEGBIT('h', NULL, &opt.pathname, N_("don't show filenames"), 1), | |
860 | OPT_BIT('H', NULL, &opt.pathname, N_("show filenames"), 1), | |
861 | OPT_NEGBIT(0, "full-name", &opt.relative, | |
862 | N_("show filenames relative to top directory"), 1), | |
863 | OPT_BOOL('l', "files-with-matches", &opt.name_only, | |
864 | N_("show only filenames instead of matching lines")), | |
865 | OPT_BOOL(0, "name-only", &opt.name_only, | |
866 | N_("synonym for --files-with-matches")), | |
867 | OPT_BOOL('L', "files-without-match", | |
868 | &opt.unmatch_name_only, | |
869 | N_("show only the names of files without match")), | |
870 | OPT_BOOL_F('z', "null", &opt.null_following_name, | |
871 | N_("print NUL after filenames"), | |
872 | PARSE_OPT_NOCOMPLETE), | |
873 | OPT_BOOL('o', "only-matching", &opt.only_matching, | |
874 | N_("show only matching parts of a line")), | |
875 | OPT_BOOL('c', "count", &opt.count, | |
876 | N_("show the number of matches instead of matching lines")), | |
877 | OPT__COLOR(&opt.color, N_("highlight matches")), | |
878 | OPT_BOOL(0, "break", &opt.file_break, | |
879 | N_("print empty line between matches from different files")), | |
880 | OPT_BOOL(0, "heading", &opt.heading, | |
881 | N_("show filename only once above matches from same file")), | |
882 | OPT_GROUP(""), | |
883 | OPT_CALLBACK('C', "context", &opt, N_("n"), | |
884 | N_("show <n> context lines before and after matches"), | |
885 | context_callback), | |
886 | OPT_INTEGER('B', "before-context", &opt.pre_context, | |
887 | N_("show <n> context lines before matches")), | |
888 | OPT_INTEGER('A', "after-context", &opt.post_context, | |
889 | N_("show <n> context lines after matches")), | |
890 | OPT_INTEGER(0, "threads", &num_threads, | |
891 | N_("use <n> worker threads")), | |
892 | OPT_NUMBER_CALLBACK(&opt, N_("shortcut for -C NUM"), | |
893 | context_callback), | |
894 | OPT_BOOL('p', "show-function", &opt.funcname, | |
895 | N_("show a line with the function name before matches")), | |
896 | OPT_BOOL('W', "function-context", &opt.funcbody, | |
897 | N_("show the surrounding function")), | |
898 | OPT_GROUP(""), | |
899 | OPT_CALLBACK('f', NULL, &opt, N_("file"), | |
900 | N_("read patterns from file"), file_callback), | |
901 | { OPTION_CALLBACK, 'e', NULL, &opt, N_("pattern"), | |
902 | N_("match <pattern>"), PARSE_OPT_NONEG, pattern_callback }, | |
903 | { OPTION_CALLBACK, 0, "and", &opt, NULL, | |
904 | N_("combine patterns specified with -e"), | |
905 | PARSE_OPT_NOARG | PARSE_OPT_NONEG, and_callback }, | |
906 | OPT_BOOL(0, "or", &dummy, ""), | |
907 | { OPTION_CALLBACK, 0, "not", &opt, NULL, "", | |
908 | PARSE_OPT_NOARG | PARSE_OPT_NONEG, not_callback }, | |
909 | { OPTION_CALLBACK, '(', NULL, &opt, NULL, "", | |
910 | PARSE_OPT_NOARG | PARSE_OPT_NONEG | PARSE_OPT_NODASH, | |
911 | open_callback }, | |
912 | { OPTION_CALLBACK, ')', NULL, &opt, NULL, "", | |
913 | PARSE_OPT_NOARG | PARSE_OPT_NONEG | PARSE_OPT_NODASH, | |
914 | close_callback }, | |
915 | OPT__QUIET(&opt.status_only, | |
916 | N_("indicate hit with exit status without output")), | |
917 | OPT_BOOL(0, "all-match", &opt.all_match, | |
918 | N_("show only matches from files that match all patterns")), | |
919 | OPT_SET_INT_F(0, "debug", &opt.debug, | |
920 | N_("show parse tree for grep expression"), | |
921 | 1, PARSE_OPT_HIDDEN), | |
922 | OPT_GROUP(""), | |
923 | { OPTION_STRING, 'O', "open-files-in-pager", &show_in_pager, | |
924 | N_("pager"), N_("show matching files in the pager"), | |
925 | PARSE_OPT_OPTARG | PARSE_OPT_NOCOMPLETE, | |
926 | NULL, (intptr_t)default_pager }, | |
927 | OPT_BOOL_F(0, "ext-grep", &external_grep_allowed__ignored, | |
928 | N_("allow calling of grep(1) (ignored by this build)"), | |
929 | PARSE_OPT_NOCOMPLETE), | |
930 | OPT_END() | |
931 | }; | |
932 | ||
933 | init_grep_defaults(the_repository); | |
934 | git_config(grep_cmd_config, NULL); | |
935 | grep_init(&opt, the_repository, prefix); | |
936 | ||
937 | /* | |
938 | * If there is no -- then the paths must exist in the working | |
939 | * tree. If there is no explicit pattern specified with -e or | |
940 | * -f, we take the first unrecognized non option to be the | |
941 | * pattern, but then what follows it must be zero or more | |
942 | * valid refs up to the -- (if exists), and then existing | |
943 | * paths. If there is an explicit pattern, then the first | |
944 | * unrecognized non option is the beginning of the refs list | |
945 | * that continues up to the -- (if exists), and then paths. | |
946 | */ | |
947 | argc = parse_options(argc, argv, prefix, options, grep_usage, | |
948 | PARSE_OPT_KEEP_DASHDASH | | |
949 | PARSE_OPT_STOP_AT_NON_OPTION); | |
950 | grep_commit_pattern_type(pattern_type_arg, &opt); | |
951 | ||
952 | if (use_index && !startup_info->have_repository) { | |
953 | int fallback = 0; | |
954 | git_config_get_bool("grep.fallbacktonoindex", &fallback); | |
955 | if (fallback) | |
956 | use_index = 0; | |
957 | else | |
958 | /* die the same way as if we did it at the beginning */ | |
959 | setup_git_directory(); | |
960 | } | |
961 | ||
962 | /* | |
963 | * skip a -- separator; we know it cannot be | |
964 | * separating revisions from pathnames if | |
965 | * we haven't even had any patterns yet | |
966 | */ | |
967 | if (argc > 0 && !opt.pattern_list && !strcmp(argv[0], "--")) { | |
968 | argv++; | |
969 | argc--; | |
970 | } | |
971 | ||
972 | /* First unrecognized non-option token */ | |
973 | if (argc > 0 && !opt.pattern_list) { | |
974 | append_grep_pattern(&opt, argv[0], "command line", 0, | |
975 | GREP_PATTERN); | |
976 | argv++; | |
977 | argc--; | |
978 | } | |
979 | ||
980 | if (show_in_pager == default_pager) | |
981 | show_in_pager = git_pager(1); | |
982 | if (show_in_pager) { | |
983 | opt.color = 0; | |
984 | opt.name_only = 1; | |
985 | opt.null_following_name = 1; | |
986 | opt.output_priv = &path_list; | |
987 | opt.output = append_path; | |
988 | string_list_append(&path_list, show_in_pager); | |
989 | } | |
990 | ||
991 | if (!opt.pattern_list) | |
992 | die(_("no pattern given")); | |
993 | ||
994 | /* --only-matching has no effect with --invert. */ | |
995 | if (opt.invert) | |
996 | opt.only_matching = 0; | |
997 | ||
998 | /* | |
999 | * We have to find "--" in a separate pass, because its presence | |
1000 | * influences how we will parse arguments that come before it. | |
1001 | */ | |
1002 | for (i = 0; i < argc; i++) { | |
1003 | if (!strcmp(argv[i], "--")) { | |
1004 | seen_dashdash = 1; | |
1005 | break; | |
1006 | } | |
1007 | } | |
1008 | ||
1009 | /* | |
1010 | * Resolve any rev arguments. If we have a dashdash, then everything up | |
1011 | * to it must resolve as a rev. If not, then we stop at the first | |
1012 | * non-rev and assume everything else is a path. | |
1013 | */ | |
1014 | allow_revs = use_index && !untracked; | |
1015 | for (i = 0; i < argc; i++) { | |
1016 | const char *arg = argv[i]; | |
1017 | struct object_id oid; | |
1018 | struct object_context oc; | |
1019 | struct object *object; | |
1020 | ||
1021 | if (!strcmp(arg, "--")) { | |
1022 | i++; | |
1023 | break; | |
1024 | } | |
1025 | ||
1026 | if (!allow_revs) { | |
1027 | if (seen_dashdash) | |
1028 | die(_("--no-index or --untracked cannot be used with revs")); | |
1029 | break; | |
1030 | } | |
1031 | ||
1032 | if (get_oid_with_context(the_repository, arg, | |
1033 | GET_OID_RECORD_PATH, | |
1034 | &oid, &oc)) { | |
1035 | if (seen_dashdash) | |
1036 | die(_("unable to resolve revision: %s"), arg); | |
1037 | break; | |
1038 | } | |
1039 | ||
1040 | object = parse_object_or_die(&oid, arg); | |
1041 | if (!seen_dashdash) | |
1042 | verify_non_filename(prefix, arg); | |
1043 | add_object_array_with_path(object, arg, &list, oc.mode, oc.path); | |
1044 | free(oc.path); | |
1045 | } | |
1046 | ||
1047 | /* | |
1048 | * Anything left over is presumed to be a path. But in the non-dashdash | |
1049 | * "do what I mean" case, we verify and complain when that isn't true. | |
1050 | */ | |
1051 | if (!seen_dashdash) { | |
1052 | int j; | |
1053 | for (j = i; j < argc; j++) | |
1054 | verify_filename(prefix, argv[j], j == i && allow_revs); | |
1055 | } | |
1056 | ||
1057 | parse_pathspec(&pathspec, 0, | |
1058 | PATHSPEC_PREFER_CWD | | |
1059 | (opt.max_depth != -1 ? PATHSPEC_MAXDEPTH_VALID : 0), | |
1060 | prefix, argv + i); | |
1061 | pathspec.max_depth = opt.max_depth; | |
1062 | pathspec.recursive = 1; | |
1063 | pathspec.recurse_submodules = !!recurse_submodules; | |
1064 | ||
1065 | if (list.nr || cached || show_in_pager) { | |
1066 | if (num_threads > 1) | |
1067 | warning(_("invalid option combination, ignoring --threads")); | |
1068 | num_threads = 1; | |
1069 | } else if (!HAVE_THREADS && num_threads > 1) { | |
1070 | warning(_("no threads support, ignoring --threads")); | |
1071 | num_threads = 1; | |
1072 | } else if (num_threads < 0) | |
1073 | die(_("invalid number of threads specified (%d)"), num_threads); | |
1074 | else if (num_threads == 0) | |
1075 | num_threads = HAVE_THREADS ? GREP_NUM_THREADS_DEFAULT : 1; | |
1076 | ||
1077 | if (num_threads > 1) { | |
1078 | if (!HAVE_THREADS) | |
1079 | BUG("Somebody got num_threads calculation wrong!"); | |
1080 | if (!(opt.name_only || opt.unmatch_name_only || opt.count) | |
1081 | && (opt.pre_context || opt.post_context || | |
1082 | opt.file_break || opt.funcbody)) | |
1083 | skip_first_line = 1; | |
1084 | start_threads(&opt); | |
1085 | } else { | |
1086 | /* | |
1087 | * The compiled patterns on the main path are only | |
1088 | * used when not using threading. Otherwise | |
1089 | * start_threads() above calls compile_grep_patterns() | |
1090 | * for each thread. | |
1091 | */ | |
1092 | compile_grep_patterns(&opt); | |
1093 | } | |
1094 | ||
1095 | if (show_in_pager && (cached || list.nr)) | |
1096 | die(_("--open-files-in-pager only works on the worktree")); | |
1097 | ||
1098 | if (show_in_pager && opt.pattern_list && !opt.pattern_list->next) { | |
1099 | const char *pager = path_list.items[0].string; | |
1100 | int len = strlen(pager); | |
1101 | ||
1102 | if (len > 4 && is_dir_sep(pager[len - 5])) | |
1103 | pager += len - 4; | |
1104 | ||
1105 | if (opt.ignore_case && !strcmp("less", pager)) | |
1106 | string_list_append(&path_list, "-I"); | |
1107 | ||
1108 | if (!strcmp("less", pager) || !strcmp("vi", pager)) { | |
1109 | struct strbuf buf = STRBUF_INIT; | |
1110 | strbuf_addf(&buf, "+/%s%s", | |
1111 | strcmp("less", pager) ? "" : "*", | |
1112 | opt.pattern_list->pattern); | |
1113 | string_list_append(&path_list, | |
1114 | strbuf_detach(&buf, NULL)); | |
1115 | } | |
1116 | } | |
1117 | ||
1118 | if (recurse_submodules && (!use_index || untracked)) | |
1119 | die(_("option not supported with --recurse-submodules")); | |
1120 | ||
1121 | if (!show_in_pager && !opt.status_only) | |
1122 | setup_pager(); | |
1123 | ||
1124 | if (!use_index && (untracked || cached)) | |
1125 | die(_("--cached or --untracked cannot be used with --no-index")); | |
1126 | ||
1127 | if (!use_index || untracked) { | |
1128 | int use_exclude = (opt_exclude < 0) ? use_index : !!opt_exclude; | |
1129 | hit = grep_directory(&opt, &pathspec, use_exclude, use_index); | |
1130 | } else if (0 <= opt_exclude) { | |
1131 | die(_("--[no-]exclude-standard cannot be used for tracked contents")); | |
1132 | } else if (!list.nr) { | |
1133 | if (!cached) | |
1134 | setup_work_tree(); | |
1135 | ||
1136 | hit = grep_cache(&opt, &pathspec, cached); | |
1137 | } else { | |
1138 | if (cached) | |
1139 | die(_("both --cached and trees are given")); | |
1140 | ||
1141 | hit = grep_objects(&opt, &pathspec, &list); | |
1142 | } | |
1143 | ||
1144 | if (num_threads > 1) | |
1145 | hit |= wait_all(); | |
1146 | if (hit && show_in_pager) | |
1147 | run_pager(&opt, prefix); | |
1148 | clear_pathspec(&pathspec); | |
1149 | free_grep_patterns(&opt); | |
1150 | return !hit; | |
1151 | } |