]> git.ipfire.org Git - thirdparty/git.git/blob - builtin/fast-export.c
revisions API users: add straightforward release_revisions()
[thirdparty/git.git] / builtin / fast-export.c
1 /*
2 * "git fast-export" builtin command
3 *
4 * Copyright (C) 2007 Johannes E. Schindelin
5 */
6 #include "builtin.h"
7 #include "cache.h"
8 #include "config.h"
9 #include "refs.h"
10 #include "refspec.h"
11 #include "object-store.h"
12 #include "commit.h"
13 #include "object.h"
14 #include "tag.h"
15 #include "diff.h"
16 #include "diffcore.h"
17 #include "log-tree.h"
18 #include "revision.h"
19 #include "decorate.h"
20 #include "string-list.h"
21 #include "utf8.h"
22 #include "parse-options.h"
23 #include "quote.h"
24 #include "remote.h"
25 #include "blob.h"
26 #include "commit-slab.h"
27
28 static const char *fast_export_usage[] = {
29 N_("git fast-export [<rev-list-opts>]"),
30 NULL
31 };
32
33 static int progress;
34 static enum { SIGNED_TAG_ABORT, VERBATIM, WARN, WARN_STRIP, STRIP } signed_tag_mode = SIGNED_TAG_ABORT;
35 static enum { TAG_FILTERING_ABORT, DROP, REWRITE } tag_of_filtered_mode = TAG_FILTERING_ABORT;
36 static enum { REENCODE_ABORT, REENCODE_YES, REENCODE_NO } reencode_mode = REENCODE_ABORT;
37 static int fake_missing_tagger;
38 static int use_done_feature;
39 static int no_data;
40 static int full_tree;
41 static int reference_excluded_commits;
42 static int show_original_ids;
43 static int mark_tags;
44 static struct string_list extra_refs = STRING_LIST_INIT_NODUP;
45 static struct string_list tag_refs = STRING_LIST_INIT_NODUP;
46 static struct refspec refspecs = REFSPEC_INIT_FETCH;
47 static int anonymize;
48 static struct hashmap anonymized_seeds;
49 static struct revision_sources revision_sources;
50
51 static int parse_opt_signed_tag_mode(const struct option *opt,
52 const char *arg, int unset)
53 {
54 if (unset || !strcmp(arg, "abort"))
55 signed_tag_mode = SIGNED_TAG_ABORT;
56 else if (!strcmp(arg, "verbatim") || !strcmp(arg, "ignore"))
57 signed_tag_mode = VERBATIM;
58 else if (!strcmp(arg, "warn"))
59 signed_tag_mode = WARN;
60 else if (!strcmp(arg, "warn-strip"))
61 signed_tag_mode = WARN_STRIP;
62 else if (!strcmp(arg, "strip"))
63 signed_tag_mode = STRIP;
64 else
65 return error("Unknown signed-tags mode: %s", arg);
66 return 0;
67 }
68
69 static int parse_opt_tag_of_filtered_mode(const struct option *opt,
70 const char *arg, int unset)
71 {
72 if (unset || !strcmp(arg, "abort"))
73 tag_of_filtered_mode = TAG_FILTERING_ABORT;
74 else if (!strcmp(arg, "drop"))
75 tag_of_filtered_mode = DROP;
76 else if (!strcmp(arg, "rewrite"))
77 tag_of_filtered_mode = REWRITE;
78 else
79 return error("Unknown tag-of-filtered mode: %s", arg);
80 return 0;
81 }
82
83 static int parse_opt_reencode_mode(const struct option *opt,
84 const char *arg, int unset)
85 {
86 if (unset) {
87 reencode_mode = REENCODE_ABORT;
88 return 0;
89 }
90
91 switch (git_parse_maybe_bool(arg)) {
92 case 0:
93 reencode_mode = REENCODE_NO;
94 break;
95 case 1:
96 reencode_mode = REENCODE_YES;
97 break;
98 default:
99 if (!strcasecmp(arg, "abort"))
100 reencode_mode = REENCODE_ABORT;
101 else
102 return error("Unknown reencoding mode: %s", arg);
103 }
104
105 return 0;
106 }
107
108 static struct decoration idnums;
109 static uint32_t last_idnum;
110 struct anonymized_entry {
111 struct hashmap_entry hash;
112 const char *anon;
113 const char orig[FLEX_ARRAY];
114 };
115
116 struct anonymized_entry_key {
117 struct hashmap_entry hash;
118 const char *orig;
119 size_t orig_len;
120 };
121
122 static int anonymized_entry_cmp(const void *unused_cmp_data,
123 const struct hashmap_entry *eptr,
124 const struct hashmap_entry *entry_or_key,
125 const void *keydata)
126 {
127 const struct anonymized_entry *a, *b;
128
129 a = container_of(eptr, const struct anonymized_entry, hash);
130 if (keydata) {
131 const struct anonymized_entry_key *key = keydata;
132 int equal = !strncmp(a->orig, key->orig, key->orig_len) &&
133 !a->orig[key->orig_len];
134 return !equal;
135 }
136
137 b = container_of(entry_or_key, const struct anonymized_entry, hash);
138 return strcmp(a->orig, b->orig);
139 }
140
141 /*
142 * Basically keep a cache of X->Y so that we can repeatedly replace
143 * the same anonymized string with another. The actual generation
144 * is farmed out to the generate function.
145 */
146 static const char *anonymize_str(struct hashmap *map,
147 char *(*generate)(void *),
148 const char *orig, size_t len,
149 void *data)
150 {
151 struct anonymized_entry_key key;
152 struct anonymized_entry *ret;
153
154 if (!map->cmpfn)
155 hashmap_init(map, anonymized_entry_cmp, NULL, 0);
156
157 hashmap_entry_init(&key.hash, memhash(orig, len));
158 key.orig = orig;
159 key.orig_len = len;
160
161 /* First check if it's a token the user configured manually... */
162 if (anonymized_seeds.cmpfn)
163 ret = hashmap_get_entry(&anonymized_seeds, &key, hash, &key);
164 else
165 ret = NULL;
166
167 /* ...otherwise check if we've already seen it in this context... */
168 if (!ret)
169 ret = hashmap_get_entry(map, &key, hash, &key);
170
171 /* ...and finally generate a new mapping if necessary */
172 if (!ret) {
173 FLEX_ALLOC_MEM(ret, orig, orig, len);
174 hashmap_entry_init(&ret->hash, key.hash.hash);
175 ret->anon = generate(data);
176 hashmap_put(map, &ret->hash);
177 }
178
179 return ret->anon;
180 }
181
182 /*
183 * We anonymize each component of a path individually,
184 * so that paths a/b and a/c will share a common root.
185 * The paths are cached via anonymize_mem so that repeated
186 * lookups for "a" will yield the same value.
187 */
188 static void anonymize_path(struct strbuf *out, const char *path,
189 struct hashmap *map,
190 char *(*generate)(void *))
191 {
192 while (*path) {
193 const char *end_of_component = strchrnul(path, '/');
194 size_t len = end_of_component - path;
195 const char *c = anonymize_str(map, generate, path, len, NULL);
196 strbuf_addstr(out, c);
197 path = end_of_component;
198 if (*path)
199 strbuf_addch(out, *path++);
200 }
201 }
202
203 static inline void *mark_to_ptr(uint32_t mark)
204 {
205 return (void *)(uintptr_t)mark;
206 }
207
208 static inline uint32_t ptr_to_mark(void * mark)
209 {
210 return (uint32_t)(uintptr_t)mark;
211 }
212
213 static inline void mark_object(struct object *object, uint32_t mark)
214 {
215 add_decoration(&idnums, object, mark_to_ptr(mark));
216 }
217
218 static inline void mark_next_object(struct object *object)
219 {
220 mark_object(object, ++last_idnum);
221 }
222
223 static int get_object_mark(struct object *object)
224 {
225 void *decoration = lookup_decoration(&idnums, object);
226 if (!decoration)
227 return 0;
228 return ptr_to_mark(decoration);
229 }
230
231 static struct commit *rewrite_commit(struct commit *p)
232 {
233 for (;;) {
234 if (p->parents && p->parents->next)
235 break;
236 if (p->object.flags & UNINTERESTING)
237 break;
238 if (!(p->object.flags & TREESAME))
239 break;
240 if (!p->parents)
241 return NULL;
242 p = p->parents->item;
243 }
244 return p;
245 }
246
247 static void show_progress(void)
248 {
249 static int counter = 0;
250 if (!progress)
251 return;
252 if ((++counter % progress) == 0)
253 printf("progress %d objects\n", counter);
254 }
255
256 /*
257 * Ideally we would want some transformation of the blob data here
258 * that is unreversible, but would still be the same size and have
259 * the same data relationship to other blobs (so that we get the same
260 * delta and packing behavior as the original). But the first and last
261 * requirements there are probably mutually exclusive, so let's take
262 * the easy way out for now, and just generate arbitrary content.
263 *
264 * There's no need to cache this result with anonymize_mem, since
265 * we already handle blob content caching with marks.
266 */
267 static char *anonymize_blob(unsigned long *size)
268 {
269 static int counter;
270 struct strbuf out = STRBUF_INIT;
271 strbuf_addf(&out, "anonymous blob %d", counter++);
272 *size = out.len;
273 return strbuf_detach(&out, NULL);
274 }
275
276 static void export_blob(const struct object_id *oid)
277 {
278 unsigned long size;
279 enum object_type type;
280 char *buf;
281 struct object *object;
282 int eaten;
283
284 if (no_data)
285 return;
286
287 if (is_null_oid(oid))
288 return;
289
290 object = lookup_object(the_repository, oid);
291 if (object && object->flags & SHOWN)
292 return;
293
294 if (anonymize) {
295 buf = anonymize_blob(&size);
296 object = (struct object *)lookup_blob(the_repository, oid);
297 eaten = 0;
298 } else {
299 buf = read_object_file(oid, &type, &size);
300 if (!buf)
301 die("could not read blob %s", oid_to_hex(oid));
302 if (check_object_signature(the_repository, oid, buf, size,
303 type) < 0)
304 die("oid mismatch in blob %s", oid_to_hex(oid));
305 object = parse_object_buffer(the_repository, oid, type,
306 size, buf, &eaten);
307 }
308
309 if (!object)
310 die("Could not read blob %s", oid_to_hex(oid));
311
312 mark_next_object(object);
313
314 printf("blob\nmark :%"PRIu32"\n", last_idnum);
315 if (show_original_ids)
316 printf("original-oid %s\n", oid_to_hex(oid));
317 printf("data %"PRIuMAX"\n", (uintmax_t)size);
318 if (size && fwrite(buf, size, 1, stdout) != 1)
319 die_errno("could not write blob '%s'", oid_to_hex(oid));
320 printf("\n");
321
322 show_progress();
323
324 object->flags |= SHOWN;
325 if (!eaten)
326 free(buf);
327 }
328
329 static int depth_first(const void *a_, const void *b_)
330 {
331 const struct diff_filepair *a = *((const struct diff_filepair **)a_);
332 const struct diff_filepair *b = *((const struct diff_filepair **)b_);
333 const char *name_a, *name_b;
334 int len_a, len_b, len;
335 int cmp;
336
337 name_a = a->one ? a->one->path : a->two->path;
338 name_b = b->one ? b->one->path : b->two->path;
339
340 len_a = strlen(name_a);
341 len_b = strlen(name_b);
342 len = (len_a < len_b) ? len_a : len_b;
343
344 /* strcmp will sort 'd' before 'd/e', we want 'd/e' before 'd' */
345 cmp = memcmp(name_a, name_b, len);
346 if (cmp)
347 return cmp;
348 cmp = len_b - len_a;
349 if (cmp)
350 return cmp;
351 /*
352 * Move 'R'ename entries last so that all references of the file
353 * appear in the output before it is renamed (e.g., when a file
354 * was copied and renamed in the same commit).
355 */
356 return (a->status == 'R') - (b->status == 'R');
357 }
358
359 static void print_path_1(const char *path)
360 {
361 int need_quote = quote_c_style(path, NULL, NULL, 0);
362 if (need_quote)
363 quote_c_style(path, NULL, stdout, 0);
364 else if (strchr(path, ' '))
365 printf("\"%s\"", path);
366 else
367 printf("%s", path);
368 }
369
370 static char *anonymize_path_component(void *data)
371 {
372 static int counter;
373 struct strbuf out = STRBUF_INIT;
374 strbuf_addf(&out, "path%d", counter++);
375 return strbuf_detach(&out, NULL);
376 }
377
378 static void print_path(const char *path)
379 {
380 if (!anonymize)
381 print_path_1(path);
382 else {
383 static struct hashmap paths;
384 static struct strbuf anon = STRBUF_INIT;
385
386 anonymize_path(&anon, path, &paths, anonymize_path_component);
387 print_path_1(anon.buf);
388 strbuf_reset(&anon);
389 }
390 }
391
392 static char *generate_fake_oid(void *data)
393 {
394 static uint32_t counter = 1; /* avoid null oid */
395 const unsigned hashsz = the_hash_algo->rawsz;
396 struct object_id oid;
397 char *hex = xmallocz(GIT_MAX_HEXSZ);
398
399 oidclr(&oid);
400 put_be32(oid.hash + hashsz - 4, counter++);
401 return oid_to_hex_r(hex, &oid);
402 }
403
404 static const char *anonymize_oid(const char *oid_hex)
405 {
406 static struct hashmap objs;
407 size_t len = strlen(oid_hex);
408 return anonymize_str(&objs, generate_fake_oid, oid_hex, len, NULL);
409 }
410
411 static void show_filemodify(struct diff_queue_struct *q,
412 struct diff_options *options, void *data)
413 {
414 int i;
415 struct string_list *changed = data;
416
417 /*
418 * Handle files below a directory first, in case they are all deleted
419 * and the directory changes to a file or symlink.
420 */
421 QSORT(q->queue, q->nr, depth_first);
422
423 for (i = 0; i < q->nr; i++) {
424 struct diff_filespec *ospec = q->queue[i]->one;
425 struct diff_filespec *spec = q->queue[i]->two;
426
427 switch (q->queue[i]->status) {
428 case DIFF_STATUS_DELETED:
429 printf("D ");
430 print_path(spec->path);
431 string_list_insert(changed, spec->path);
432 putchar('\n');
433 break;
434
435 case DIFF_STATUS_COPIED:
436 case DIFF_STATUS_RENAMED:
437 /*
438 * If a change in the file corresponding to ospec->path
439 * has been observed, we cannot trust its contents
440 * because the diff is calculated based on the prior
441 * contents, not the current contents. So, declare a
442 * copy or rename only if there was no change observed.
443 */
444 if (!string_list_has_string(changed, ospec->path)) {
445 printf("%c ", q->queue[i]->status);
446 print_path(ospec->path);
447 putchar(' ');
448 print_path(spec->path);
449 string_list_insert(changed, spec->path);
450 putchar('\n');
451
452 if (oideq(&ospec->oid, &spec->oid) &&
453 ospec->mode == spec->mode)
454 break;
455 }
456 /* fallthrough */
457
458 case DIFF_STATUS_TYPE_CHANGED:
459 case DIFF_STATUS_MODIFIED:
460 case DIFF_STATUS_ADDED:
461 /*
462 * Links refer to objects in another repositories;
463 * output the SHA-1 verbatim.
464 */
465 if (no_data || S_ISGITLINK(spec->mode))
466 printf("M %06o %s ", spec->mode,
467 anonymize ?
468 anonymize_oid(oid_to_hex(&spec->oid)) :
469 oid_to_hex(&spec->oid));
470 else {
471 struct object *object = lookup_object(the_repository,
472 &spec->oid);
473 printf("M %06o :%d ", spec->mode,
474 get_object_mark(object));
475 }
476 print_path(spec->path);
477 string_list_insert(changed, spec->path);
478 putchar('\n');
479 break;
480
481 default:
482 die("Unexpected comparison status '%c' for %s, %s",
483 q->queue[i]->status,
484 ospec->path ? ospec->path : "none",
485 spec->path ? spec->path : "none");
486 }
487 }
488 }
489
490 static const char *find_encoding(const char *begin, const char *end)
491 {
492 const char *needle = "\nencoding ";
493 char *bol, *eol;
494
495 bol = memmem(begin, end ? end - begin : strlen(begin),
496 needle, strlen(needle));
497 if (!bol)
498 return NULL;
499 bol += strlen(needle);
500 eol = strchrnul(bol, '\n');
501 *eol = '\0';
502 return bol;
503 }
504
505 static char *anonymize_ref_component(void *data)
506 {
507 static int counter;
508 struct strbuf out = STRBUF_INIT;
509 strbuf_addf(&out, "ref%d", counter++);
510 return strbuf_detach(&out, NULL);
511 }
512
513 static const char *anonymize_refname(const char *refname)
514 {
515 /*
516 * If any of these prefixes is found, we will leave it intact
517 * so that tags remain tags and so forth.
518 */
519 static const char *prefixes[] = {
520 "refs/heads/",
521 "refs/tags/",
522 "refs/remotes/",
523 "refs/"
524 };
525 static struct hashmap refs;
526 static struct strbuf anon = STRBUF_INIT;
527 int i;
528
529 strbuf_reset(&anon);
530 for (i = 0; i < ARRAY_SIZE(prefixes); i++) {
531 if (skip_prefix(refname, prefixes[i], &refname)) {
532 strbuf_addstr(&anon, prefixes[i]);
533 break;
534 }
535 }
536
537 anonymize_path(&anon, refname, &refs, anonymize_ref_component);
538 return anon.buf;
539 }
540
541 /*
542 * We do not even bother to cache commit messages, as they are unlikely
543 * to be repeated verbatim, and it is not that interesting when they are.
544 */
545 static char *anonymize_commit_message(const char *old)
546 {
547 static int counter;
548 return xstrfmt("subject %d\n\nbody\n", counter++);
549 }
550
551 static char *anonymize_ident(void *data)
552 {
553 static int counter;
554 struct strbuf out = STRBUF_INIT;
555 strbuf_addf(&out, "User %d <user%d@example.com>", counter, counter);
556 counter++;
557 return strbuf_detach(&out, NULL);
558 }
559
560 /*
561 * Our strategy here is to anonymize the names and email addresses,
562 * but keep timestamps intact, as they influence things like traversal
563 * order (and by themselves should not be too revealing).
564 */
565 static void anonymize_ident_line(const char **beg, const char **end)
566 {
567 static struct hashmap idents;
568 static struct strbuf buffers[] = { STRBUF_INIT, STRBUF_INIT };
569 static unsigned which_buffer;
570
571 struct strbuf *out;
572 struct ident_split split;
573 const char *end_of_header;
574
575 out = &buffers[which_buffer++];
576 which_buffer %= ARRAY_SIZE(buffers);
577 strbuf_reset(out);
578
579 /* skip "committer", "author", "tagger", etc */
580 end_of_header = strchr(*beg, ' ');
581 if (!end_of_header)
582 BUG("malformed line fed to anonymize_ident_line: %.*s",
583 (int)(*end - *beg), *beg);
584 end_of_header++;
585 strbuf_add(out, *beg, end_of_header - *beg);
586
587 if (!split_ident_line(&split, end_of_header, *end - end_of_header) &&
588 split.date_begin) {
589 const char *ident;
590 size_t len;
591
592 len = split.mail_end - split.name_begin;
593 ident = anonymize_str(&idents, anonymize_ident,
594 split.name_begin, len, NULL);
595 strbuf_addstr(out, ident);
596 strbuf_addch(out, ' ');
597 strbuf_add(out, split.date_begin, split.tz_end - split.date_begin);
598 } else {
599 strbuf_addstr(out, "Malformed Ident <malformed@example.com> 0 -0000");
600 }
601
602 *beg = out->buf;
603 *end = out->buf + out->len;
604 }
605
606 static void handle_commit(struct commit *commit, struct rev_info *rev,
607 struct string_list *paths_of_changed_objects)
608 {
609 int saved_output_format = rev->diffopt.output_format;
610 const char *commit_buffer;
611 const char *author, *author_end, *committer, *committer_end;
612 const char *encoding, *message;
613 char *reencoded = NULL;
614 struct commit_list *p;
615 const char *refname;
616 int i;
617
618 rev->diffopt.output_format = DIFF_FORMAT_CALLBACK;
619
620 parse_commit_or_die(commit);
621 commit_buffer = get_commit_buffer(commit, NULL);
622 author = strstr(commit_buffer, "\nauthor ");
623 if (!author)
624 die("could not find author in commit %s",
625 oid_to_hex(&commit->object.oid));
626 author++;
627 author_end = strchrnul(author, '\n');
628 committer = strstr(author_end, "\ncommitter ");
629 if (!committer)
630 die("could not find committer in commit %s",
631 oid_to_hex(&commit->object.oid));
632 committer++;
633 committer_end = strchrnul(committer, '\n');
634 message = strstr(committer_end, "\n\n");
635 encoding = find_encoding(committer_end, message);
636 if (message)
637 message += 2;
638
639 if (commit->parents &&
640 (get_object_mark(&commit->parents->item->object) != 0 ||
641 reference_excluded_commits) &&
642 !full_tree) {
643 parse_commit_or_die(commit->parents->item);
644 diff_tree_oid(get_commit_tree_oid(commit->parents->item),
645 get_commit_tree_oid(commit), "", &rev->diffopt);
646 }
647 else
648 diff_root_tree_oid(get_commit_tree_oid(commit),
649 "", &rev->diffopt);
650
651 /* Export the referenced blobs, and remember the marks. */
652 for (i = 0; i < diff_queued_diff.nr; i++)
653 if (!S_ISGITLINK(diff_queued_diff.queue[i]->two->mode))
654 export_blob(&diff_queued_diff.queue[i]->two->oid);
655
656 refname = *revision_sources_at(&revision_sources, commit);
657 /*
658 * FIXME: string_list_remove() below for each ref is overall
659 * O(N^2). Compared to a history walk and diffing trees, this is
660 * just lost in the noise in practice. However, theoretically a
661 * repo may have enough refs for this to become slow.
662 */
663 string_list_remove(&extra_refs, refname, 0);
664 if (anonymize) {
665 refname = anonymize_refname(refname);
666 anonymize_ident_line(&committer, &committer_end);
667 anonymize_ident_line(&author, &author_end);
668 }
669
670 mark_next_object(&commit->object);
671 if (anonymize) {
672 reencoded = anonymize_commit_message(message);
673 } else if (encoding) {
674 switch(reencode_mode) {
675 case REENCODE_YES:
676 reencoded = reencode_string(message, "UTF-8", encoding);
677 break;
678 case REENCODE_NO:
679 break;
680 case REENCODE_ABORT:
681 die("Encountered commit-specific encoding %s in commit "
682 "%s; use --reencode=[yes|no] to handle it",
683 encoding, oid_to_hex(&commit->object.oid));
684 }
685 }
686 if (!commit->parents)
687 printf("reset %s\n", refname);
688 printf("commit %s\nmark :%"PRIu32"\n", refname, last_idnum);
689 if (show_original_ids)
690 printf("original-oid %s\n", oid_to_hex(&commit->object.oid));
691 printf("%.*s\n%.*s\n",
692 (int)(author_end - author), author,
693 (int)(committer_end - committer), committer);
694 if (!reencoded && encoding)
695 printf("encoding %s\n", encoding);
696 printf("data %u\n%s",
697 (unsigned)(reencoded
698 ? strlen(reencoded) : message
699 ? strlen(message) : 0),
700 reencoded ? reencoded : message ? message : "");
701 free(reencoded);
702 unuse_commit_buffer(commit, commit_buffer);
703
704 for (i = 0, p = commit->parents; p; p = p->next) {
705 struct object *obj = &p->item->object;
706 int mark = get_object_mark(obj);
707
708 if (!mark && !reference_excluded_commits)
709 continue;
710 if (i == 0)
711 printf("from ");
712 else
713 printf("merge ");
714 if (mark)
715 printf(":%d\n", mark);
716 else
717 printf("%s\n",
718 anonymize ?
719 anonymize_oid(oid_to_hex(&obj->oid)) :
720 oid_to_hex(&obj->oid));
721 i++;
722 }
723
724 if (full_tree)
725 printf("deleteall\n");
726 log_tree_diff_flush(rev);
727 string_list_clear(paths_of_changed_objects, 0);
728 rev->diffopt.output_format = saved_output_format;
729
730 printf("\n");
731
732 show_progress();
733 }
734
735 static char *anonymize_tag(void *data)
736 {
737 static int counter;
738 struct strbuf out = STRBUF_INIT;
739 strbuf_addf(&out, "tag message %d", counter++);
740 return strbuf_detach(&out, NULL);
741 }
742
743
744 static void handle_tag(const char *name, struct tag *tag)
745 {
746 unsigned long size;
747 enum object_type type;
748 char *buf;
749 const char *tagger, *tagger_end, *message;
750 size_t message_size = 0;
751 struct object *tagged;
752 int tagged_mark;
753 struct commit *p;
754
755 /* Trees have no identifier in fast-export output, thus we have no way
756 * to output tags of trees, tags of tags of trees, etc. Simply omit
757 * such tags.
758 */
759 tagged = tag->tagged;
760 while (tagged->type == OBJ_TAG) {
761 tagged = ((struct tag *)tagged)->tagged;
762 }
763 if (tagged->type == OBJ_TREE) {
764 warning("Omitting tag %s,\nsince tags of trees (or tags of tags of trees, etc.) are not supported.",
765 oid_to_hex(&tag->object.oid));
766 return;
767 }
768
769 buf = read_object_file(&tag->object.oid, &type, &size);
770 if (!buf)
771 die("could not read tag %s", oid_to_hex(&tag->object.oid));
772 message = memmem(buf, size, "\n\n", 2);
773 if (message) {
774 message += 2;
775 message_size = strlen(message);
776 }
777 tagger = memmem(buf, message ? message - buf : size, "\ntagger ", 8);
778 if (!tagger) {
779 if (fake_missing_tagger)
780 tagger = "tagger Unspecified Tagger "
781 "<unspecified-tagger> 0 +0000";
782 else
783 tagger = "";
784 tagger_end = tagger + strlen(tagger);
785 } else {
786 tagger++;
787 tagger_end = strchrnul(tagger, '\n');
788 if (anonymize)
789 anonymize_ident_line(&tagger, &tagger_end);
790 }
791
792 if (anonymize) {
793 name = anonymize_refname(name);
794 if (message) {
795 static struct hashmap tags;
796 message = anonymize_str(&tags, anonymize_tag,
797 message, message_size, NULL);
798 message_size = strlen(message);
799 }
800 }
801
802 /* handle signed tags */
803 if (message) {
804 const char *signature = strstr(message,
805 "\n-----BEGIN PGP SIGNATURE-----\n");
806 if (signature)
807 switch(signed_tag_mode) {
808 case SIGNED_TAG_ABORT:
809 die("encountered signed tag %s; use "
810 "--signed-tags=<mode> to handle it",
811 oid_to_hex(&tag->object.oid));
812 case WARN:
813 warning("exporting signed tag %s",
814 oid_to_hex(&tag->object.oid));
815 /* fallthru */
816 case VERBATIM:
817 break;
818 case WARN_STRIP:
819 warning("stripping signature from tag %s",
820 oid_to_hex(&tag->object.oid));
821 /* fallthru */
822 case STRIP:
823 message_size = signature + 1 - message;
824 break;
825 }
826 }
827
828 /* handle tag->tagged having been filtered out due to paths specified */
829 tagged = tag->tagged;
830 tagged_mark = get_object_mark(tagged);
831 if (!tagged_mark) {
832 switch(tag_of_filtered_mode) {
833 case TAG_FILTERING_ABORT:
834 die("tag %s tags unexported object; use "
835 "--tag-of-filtered-object=<mode> to handle it",
836 oid_to_hex(&tag->object.oid));
837 case DROP:
838 /* Ignore this tag altogether */
839 free(buf);
840 return;
841 case REWRITE:
842 if (tagged->type == OBJ_TAG && !mark_tags) {
843 die(_("Error: Cannot export nested tags unless --mark-tags is specified."));
844 } else if (tagged->type == OBJ_COMMIT) {
845 p = rewrite_commit((struct commit *)tagged);
846 if (!p) {
847 printf("reset %s\nfrom %s\n\n",
848 name, oid_to_hex(null_oid()));
849 free(buf);
850 return;
851 }
852 tagged_mark = get_object_mark(&p->object);
853 } else {
854 /* tagged->type is either OBJ_BLOB or OBJ_TAG */
855 tagged_mark = get_object_mark(tagged);
856 }
857 }
858 }
859
860 if (tagged->type == OBJ_TAG) {
861 printf("reset %s\nfrom %s\n\n",
862 name, oid_to_hex(null_oid()));
863 }
864 skip_prefix(name, "refs/tags/", &name);
865 printf("tag %s\n", name);
866 if (mark_tags) {
867 mark_next_object(&tag->object);
868 printf("mark :%"PRIu32"\n", last_idnum);
869 }
870 if (tagged_mark)
871 printf("from :%d\n", tagged_mark);
872 else
873 printf("from %s\n", oid_to_hex(&tagged->oid));
874
875 if (show_original_ids)
876 printf("original-oid %s\n", oid_to_hex(&tag->object.oid));
877 printf("%.*s%sdata %d\n%.*s\n",
878 (int)(tagger_end - tagger), tagger,
879 tagger == tagger_end ? "" : "\n",
880 (int)message_size, (int)message_size, message ? message : "");
881 free(buf);
882 }
883
884 static struct commit *get_commit(struct rev_cmdline_entry *e, char *full_name)
885 {
886 switch (e->item->type) {
887 case OBJ_COMMIT:
888 return (struct commit *)e->item;
889 case OBJ_TAG: {
890 struct tag *tag = (struct tag *)e->item;
891
892 /* handle nested tags */
893 while (tag && tag->object.type == OBJ_TAG) {
894 parse_object(the_repository, &tag->object.oid);
895 string_list_append(&tag_refs, full_name)->util = tag;
896 tag = (struct tag *)tag->tagged;
897 }
898 if (!tag)
899 die("Tag %s points nowhere?", e->name);
900 return (struct commit *)tag;
901 }
902 default:
903 return NULL;
904 }
905 }
906
907 static void get_tags_and_duplicates(struct rev_cmdline_info *info)
908 {
909 int i;
910
911 for (i = 0; i < info->nr; i++) {
912 struct rev_cmdline_entry *e = info->rev + i;
913 struct object_id oid;
914 struct commit *commit;
915 char *full_name;
916
917 if (e->flags & UNINTERESTING)
918 continue;
919
920 if (dwim_ref(e->name, strlen(e->name), &oid, &full_name, 0) != 1)
921 continue;
922
923 if (refspecs.nr) {
924 char *private;
925 private = apply_refspecs(&refspecs, full_name);
926 if (private) {
927 free(full_name);
928 full_name = private;
929 }
930 }
931
932 commit = get_commit(e, full_name);
933 if (!commit) {
934 warning("%s: Unexpected object of type %s, skipping.",
935 e->name,
936 type_name(e->item->type));
937 continue;
938 }
939
940 switch(commit->object.type) {
941 case OBJ_COMMIT:
942 break;
943 case OBJ_BLOB:
944 export_blob(&commit->object.oid);
945 continue;
946 default: /* OBJ_TAG (nested tags) is already handled */
947 warning("Tag points to object of unexpected type %s, skipping.",
948 type_name(commit->object.type));
949 continue;
950 }
951
952 /*
953 * Make sure this ref gets properly updated eventually, whether
954 * through a commit or manually at the end.
955 */
956 if (e->item->type != OBJ_TAG)
957 string_list_append(&extra_refs, full_name)->util = commit;
958
959 if (!*revision_sources_at(&revision_sources, commit))
960 *revision_sources_at(&revision_sources, commit) = full_name;
961 }
962
963 string_list_sort(&extra_refs);
964 string_list_remove_duplicates(&extra_refs, 0);
965 }
966
967 static void handle_tags_and_duplicates(struct string_list *extras)
968 {
969 struct commit *commit;
970 int i;
971
972 for (i = extras->nr - 1; i >= 0; i--) {
973 const char *name = extras->items[i].string;
974 struct object *object = extras->items[i].util;
975 int mark;
976
977 switch (object->type) {
978 case OBJ_TAG:
979 handle_tag(name, (struct tag *)object);
980 break;
981 case OBJ_COMMIT:
982 if (anonymize)
983 name = anonymize_refname(name);
984 /* create refs pointing to already seen commits */
985 commit = rewrite_commit((struct commit *)object);
986 if (!commit) {
987 /*
988 * Neither this object nor any of its
989 * ancestors touch any relevant paths, so
990 * it has been filtered to nothing. Delete
991 * it.
992 */
993 printf("reset %s\nfrom %s\n\n",
994 name, oid_to_hex(null_oid()));
995 continue;
996 }
997
998 mark = get_object_mark(&commit->object);
999 if (!mark) {
1000 /*
1001 * Getting here means we have a commit which
1002 * was excluded by a negative refspec (e.g.
1003 * fast-export ^HEAD HEAD). If we are
1004 * referencing excluded commits, set the ref
1005 * to the exact commit. Otherwise, the user
1006 * wants the branch exported but every commit
1007 * in its history to be deleted, which basically
1008 * just means deletion of the ref.
1009 */
1010 if (!reference_excluded_commits) {
1011 /* delete the ref */
1012 printf("reset %s\nfrom %s\n\n",
1013 name, oid_to_hex(null_oid()));
1014 continue;
1015 }
1016 /* set ref to commit using oid, not mark */
1017 printf("reset %s\nfrom %s\n\n", name,
1018 oid_to_hex(&commit->object.oid));
1019 continue;
1020 }
1021
1022 printf("reset %s\nfrom :%d\n\n", name, mark
1023 );
1024 show_progress();
1025 break;
1026 }
1027 }
1028 }
1029
1030 static void export_marks(char *file)
1031 {
1032 unsigned int i;
1033 uint32_t mark;
1034 struct decoration_entry *deco = idnums.entries;
1035 FILE *f;
1036 int e = 0;
1037
1038 f = fopen_for_writing(file);
1039 if (!f)
1040 die_errno("Unable to open marks file %s for writing.", file);
1041
1042 for (i = 0; i < idnums.size; i++) {
1043 if (deco->base && deco->base->type == 1) {
1044 mark = ptr_to_mark(deco->decoration);
1045 if (fprintf(f, ":%"PRIu32" %s\n", mark,
1046 oid_to_hex(&deco->base->oid)) < 0) {
1047 e = 1;
1048 break;
1049 }
1050 }
1051 deco++;
1052 }
1053
1054 e |= ferror(f);
1055 e |= fclose(f);
1056 if (e)
1057 error("Unable to write marks file %s.", file);
1058 }
1059
1060 static void import_marks(char *input_file, int check_exists)
1061 {
1062 char line[512];
1063 FILE *f;
1064 struct stat sb;
1065
1066 if (check_exists && stat(input_file, &sb))
1067 return;
1068
1069 f = xfopen(input_file, "r");
1070 while (fgets(line, sizeof(line), f)) {
1071 uint32_t mark;
1072 char *line_end, *mark_end;
1073 struct object_id oid;
1074 struct object *object;
1075 struct commit *commit;
1076 enum object_type type;
1077
1078 line_end = strchr(line, '\n');
1079 if (line[0] != ':' || !line_end)
1080 die("corrupt mark line: %s", line);
1081 *line_end = '\0';
1082
1083 mark = strtoumax(line + 1, &mark_end, 10);
1084 if (!mark || mark_end == line + 1
1085 || *mark_end != ' ' || get_oid_hex(mark_end + 1, &oid))
1086 die("corrupt mark line: %s", line);
1087
1088 if (last_idnum < mark)
1089 last_idnum = mark;
1090
1091 type = oid_object_info(the_repository, &oid, NULL);
1092 if (type < 0)
1093 die("object not found: %s", oid_to_hex(&oid));
1094
1095 if (type != OBJ_COMMIT)
1096 /* only commits */
1097 continue;
1098
1099 commit = lookup_commit(the_repository, &oid);
1100 if (!commit)
1101 die("not a commit? can't happen: %s", oid_to_hex(&oid));
1102
1103 object = &commit->object;
1104
1105 if (object->flags & SHOWN)
1106 error("Object %s already has a mark", oid_to_hex(&oid));
1107
1108 mark_object(object, mark);
1109
1110 object->flags |= SHOWN;
1111 }
1112 fclose(f);
1113 }
1114
1115 static void handle_deletes(void)
1116 {
1117 int i;
1118 for (i = 0; i < refspecs.nr; i++) {
1119 struct refspec_item *refspec = &refspecs.items[i];
1120 if (*refspec->src)
1121 continue;
1122
1123 printf("reset %s\nfrom %s\n\n",
1124 refspec->dst, oid_to_hex(null_oid()));
1125 }
1126 }
1127
1128 static char *anonymize_seed(void *data)
1129 {
1130 return xstrdup(data);
1131 }
1132
1133 static int parse_opt_anonymize_map(const struct option *opt,
1134 const char *arg, int unset)
1135 {
1136 struct hashmap *map = opt->value;
1137 const char *delim, *value;
1138 size_t keylen;
1139
1140 BUG_ON_OPT_NEG(unset);
1141
1142 delim = strchr(arg, ':');
1143 if (delim) {
1144 keylen = delim - arg;
1145 value = delim + 1;
1146 } else {
1147 keylen = strlen(arg);
1148 value = arg;
1149 }
1150
1151 if (!keylen || !*value)
1152 return error(_("--anonymize-map token cannot be empty"));
1153
1154 anonymize_str(map, anonymize_seed, arg, keylen, (void *)value);
1155
1156 return 0;
1157 }
1158
1159 int cmd_fast_export(int argc, const char **argv, const char *prefix)
1160 {
1161 struct rev_info revs;
1162 struct commit *commit;
1163 char *export_filename = NULL,
1164 *import_filename = NULL,
1165 *import_filename_if_exists = NULL;
1166 uint32_t lastimportid;
1167 struct string_list refspecs_list = STRING_LIST_INIT_NODUP;
1168 struct string_list paths_of_changed_objects = STRING_LIST_INIT_DUP;
1169 struct option options[] = {
1170 OPT_INTEGER(0, "progress", &progress,
1171 N_("show progress after <n> objects")),
1172 OPT_CALLBACK(0, "signed-tags", &signed_tag_mode, N_("mode"),
1173 N_("select handling of signed tags"),
1174 parse_opt_signed_tag_mode),
1175 OPT_CALLBACK(0, "tag-of-filtered-object", &tag_of_filtered_mode, N_("mode"),
1176 N_("select handling of tags that tag filtered objects"),
1177 parse_opt_tag_of_filtered_mode),
1178 OPT_CALLBACK(0, "reencode", &reencode_mode, N_("mode"),
1179 N_("select handling of commit messages in an alternate encoding"),
1180 parse_opt_reencode_mode),
1181 OPT_STRING(0, "export-marks", &export_filename, N_("file"),
1182 N_("dump marks to this file")),
1183 OPT_STRING(0, "import-marks", &import_filename, N_("file"),
1184 N_("import marks from this file")),
1185 OPT_STRING(0, "import-marks-if-exists",
1186 &import_filename_if_exists,
1187 N_("file"),
1188 N_("import marks from this file if it exists")),
1189 OPT_BOOL(0, "fake-missing-tagger", &fake_missing_tagger,
1190 N_("fake a tagger when tags lack one")),
1191 OPT_BOOL(0, "full-tree", &full_tree,
1192 N_("output full tree for each commit")),
1193 OPT_BOOL(0, "use-done-feature", &use_done_feature,
1194 N_("use the done feature to terminate the stream")),
1195 OPT_BOOL(0, "no-data", &no_data, N_("skip output of blob data")),
1196 OPT_STRING_LIST(0, "refspec", &refspecs_list, N_("refspec"),
1197 N_("apply refspec to exported refs")),
1198 OPT_BOOL(0, "anonymize", &anonymize, N_("anonymize output")),
1199 OPT_CALLBACK_F(0, "anonymize-map", &anonymized_seeds, N_("from:to"),
1200 N_("convert <from> to <to> in anonymized output"),
1201 PARSE_OPT_NONEG, parse_opt_anonymize_map),
1202 OPT_BOOL(0, "reference-excluded-parents",
1203 &reference_excluded_commits, N_("reference parents which are not in fast-export stream by object id")),
1204 OPT_BOOL(0, "show-original-ids", &show_original_ids,
1205 N_("show original object ids of blobs/commits")),
1206 OPT_BOOL(0, "mark-tags", &mark_tags,
1207 N_("label tags with mark ids")),
1208
1209 OPT_END()
1210 };
1211
1212 if (argc == 1)
1213 usage_with_options (fast_export_usage, options);
1214
1215 /* we handle encodings */
1216 git_config(git_default_config, NULL);
1217
1218 repo_init_revisions(the_repository, &revs, prefix);
1219 init_revision_sources(&revision_sources);
1220 revs.topo_order = 1;
1221 revs.sources = &revision_sources;
1222 revs.rewrite_parents = 1;
1223 argc = parse_options(argc, argv, prefix, options, fast_export_usage,
1224 PARSE_OPT_KEEP_ARGV0 | PARSE_OPT_KEEP_UNKNOWN);
1225 argc = setup_revisions(argc, argv, &revs, NULL);
1226 if (argc > 1)
1227 usage_with_options (fast_export_usage, options);
1228
1229 if (anonymized_seeds.cmpfn && !anonymize)
1230 die(_("the option '%s' requires '%s'"), "--anonymize-map", "--anonymize");
1231
1232 if (refspecs_list.nr) {
1233 int i;
1234
1235 for (i = 0; i < refspecs_list.nr; i++)
1236 refspec_append(&refspecs, refspecs_list.items[i].string);
1237
1238 string_list_clear(&refspecs_list, 1);
1239 }
1240
1241 if (use_done_feature)
1242 printf("feature done\n");
1243
1244 if (import_filename && import_filename_if_exists)
1245 die(_("options '%s' and '%s' cannot be used together"), "--import-marks", "--import-marks-if-exists");
1246 if (import_filename)
1247 import_marks(import_filename, 0);
1248 else if (import_filename_if_exists)
1249 import_marks(import_filename_if_exists, 1);
1250 lastimportid = last_idnum;
1251
1252 if (import_filename && revs.prune_data.nr)
1253 full_tree = 1;
1254
1255 get_tags_and_duplicates(&revs.cmdline);
1256
1257 if (prepare_revision_walk(&revs))
1258 die("revision walk setup failed");
1259
1260 revs.reverse = 1;
1261 revs.diffopt.format_callback = show_filemodify;
1262 revs.diffopt.format_callback_data = &paths_of_changed_objects;
1263 revs.diffopt.flags.recursive = 1;
1264 while ((commit = get_revision(&revs)))
1265 handle_commit(commit, &revs, &paths_of_changed_objects);
1266
1267 handle_tags_and_duplicates(&extra_refs);
1268 handle_tags_and_duplicates(&tag_refs);
1269 handle_deletes();
1270
1271 if (export_filename && lastimportid != last_idnum)
1272 export_marks(export_filename);
1273
1274 if (use_done_feature)
1275 printf("done\n");
1276
1277 refspec_clear(&refspecs);
1278 release_revisions(&revs);
1279
1280 return 0;
1281 }