]> git.ipfire.org Git - thirdparty/git.git/blob - git-archimport.perl
GIT-VERSION-FILE: check ./version first.
[thirdparty/git.git] / git-archimport.perl
1 #!/usr/bin/perl -w
2 #
3 # This tool is copyright (c) 2005, Martin Langhoff.
4 # It is released under the Gnu Public License, version 2.
5 #
6 # The basic idea is to walk the output of tla abrowse,
7 # fetch the changesets and apply them.
8 #
9
10 =head1 Invocation
11
12 git-archimport [ -h ] [ -v ] [ -o ] [ -a ] [ -f ] [ -T ]
13 [ -D depth] [ -t tempdir ] <archive>/<branch> [ <archive>/<branch> ]
14
15 Imports a project from one or more Arch repositories. It will follow branches
16 and repositories within the namespaces defined by the <archive/branch>
17 parameters supplied. If it cannot find the remote branch a merge comes from
18 it will just import it as a regular commit. If it can find it, it will mark it
19 as a merge whenever possible.
20
21 See man (1) git-archimport for more details.
22
23 =head1 TODO
24
25 - create tag objects instead of ref tags
26 - audit shell-escaping of filenames
27 - hide our private tags somewhere smarter
28 - find a way to make "cat *patches | patch" safe even when patchfiles are missing newlines
29 - sort and apply patches by graphing ancestry relations instead of just
30 relying in dates supplied in the changeset itself.
31 tla ancestry-graph -m could be helpful here...
32
33 =head1 Devel tricks
34
35 Add print in front of the shell commands invoked via backticks.
36
37 =head1 Devel Notes
38
39 There are several places where Arch and git terminology are intermixed
40 and potentially confused.
41
42 The notion of a "branch" in git is approximately equivalent to
43 a "archive/category--branch--version" in Arch. Also, it should be noted
44 that the "--branch" portion of "archive/category--branch--version" is really
45 optional in Arch although not many people (nor tools!) seem to know this.
46 This means that "archive/category--version" is also a valid "branch"
47 in git terms.
48
49 We always refer to Arch names by their fully qualified variant (which
50 means the "archive" name is prefixed.
51
52 For people unfamiliar with Arch, an "archive" is the term for "repository",
53 and can contain multiple, unrelated branches.
54
55 =cut
56
57 use strict;
58 use warnings;
59 use Getopt::Std;
60 use File::Temp qw(tempdir);
61 use File::Path qw(mkpath rmtree);
62 use File::Basename qw(basename dirname);
63 use Data::Dumper qw/ Dumper /;
64 use IPC::Open2;
65
66 $SIG{'PIPE'}="IGNORE";
67 $ENV{'TZ'}="UTC";
68
69 my $git_dir = $ENV{"GIT_DIR"} || ".git";
70 $ENV{"GIT_DIR"} = $git_dir;
71 my $ptag_dir = "$git_dir/archimport/tags";
72
73 our($opt_h,$opt_f,$opt_v,$opt_T,$opt_t,$opt_D,$opt_a,$opt_o);
74
75 sub usage() {
76 print STDERR <<END;
77 Usage: ${\basename $0} # fetch/update GIT from Arch
78 [ -h ] [ -v ] [ -o ] [ -a ] [ -f ] [ -T ] [ -D depth ] [ -t tempdir ]
79 repository/arch-branch [ repository/arch-branch] ...
80 END
81 exit(1);
82 }
83
84 getopts("fThvat:D:") or usage();
85 usage if $opt_h;
86
87 @ARGV >= 1 or usage();
88 # $arch_branches:
89 # values associated with keys:
90 # =1 - Arch version / git 'branch' detected via abrowse on a limit
91 # >1 - Arch version / git 'branch' of an auxiliary branch we've merged
92 my %arch_branches = map { $_ => 1 } @ARGV;
93
94 $ENV{'TMPDIR'} = $opt_t if $opt_t; # $ENV{TMPDIR} will affect tempdir() calls:
95 my $tmp = tempdir('git-archimport-XXXXXX', TMPDIR => 1, CLEANUP => 1);
96 $opt_v && print "+ Using $tmp as temporary directory\n";
97
98 unless (-d $git_dir) { # initial import needs empty directory
99 opendir DIR, '.' or die "Unable to open current directory: $!\n";
100 while (my $entry = readdir DIR) {
101 $entry =~ /^\.\.?$/ or
102 die "Initial import needs an empty current working directory.\n"
103 }
104 closedir DIR
105 }
106
107 my %reachable = (); # Arch repositories we can access
108 my %unreachable = (); # Arch repositories we can't access :<
109 my @psets = (); # the collection
110 my %psets = (); # the collection, by name
111 my %stats = ( # Track which strategy we used to import:
112 get_tag => 0, replay => 0, get_new => 0, get_delta => 0,
113 simple_changeset => 0, import_or_tag => 0
114 );
115
116 my %rptags = (); # my reverse private tags
117 # to map a SHA1 to a commitid
118 my $TLA = $ENV{'ARCH_CLIENT'} || 'tla';
119
120 sub do_abrowse {
121 my $stage = shift;
122 while (my ($limit, $level) = each %arch_branches) {
123 next unless $level == $stage;
124
125 open ABROWSE, "$TLA abrowse -fkD --merges $limit |"
126 or die "Problems with tla abrowse: $!";
127
128 my %ps = (); # the current one
129 my $lastseen = '';
130
131 while (<ABROWSE>) {
132 chomp;
133
134 # first record padded w 8 spaces
135 if (s/^\s{8}\b//) {
136 my ($id, $type) = split(m/\s+/, $_, 2);
137
138 my %last_ps;
139 # store the record we just captured
140 if (%ps && !exists $psets{ $ps{id} }) {
141 %last_ps = %ps; # break references
142 push (@psets, \%last_ps);
143 $psets{ $last_ps{id} } = \%last_ps;
144 }
145
146 my $branch = extract_versionname($id);
147 %ps = ( id => $id, branch => $branch );
148 if (%last_ps && ($last_ps{branch} eq $branch)) {
149 $ps{parent_id} = $last_ps{id};
150 }
151
152 $arch_branches{$branch} = 1;
153 $lastseen = 'id';
154
155 # deal with types (should work with baz or tla):
156 if ($type =~ m/\(.*changeset\)/) {
157 $ps{type} = 's';
158 } elsif ($type =~ /\(.*import\)/) {
159 $ps{type} = 'i';
160 } elsif ($type =~ m/\(tag.*?(\S+\@\S+).*?\)/) {
161 $ps{type} = 't';
162 # read which revision we've tagged when we parse the log
163 $ps{tag} = $1;
164 } else {
165 warn "Unknown type $type";
166 }
167
168 $arch_branches{$branch} = 1;
169 $lastseen = 'id';
170 } elsif (s/^\s{10}//) {
171 # 10 leading spaces or more
172 # indicate commit metadata
173
174 # date
175 if ($lastseen eq 'id' && m/^(\d{4}-\d\d-\d\d \d\d:\d\d:\d\d)/){
176 $ps{date} = $1;
177 $lastseen = 'date';
178 } elsif ($_ eq 'merges in:') {
179 $ps{merges} = [];
180 $lastseen = 'merges';
181 } elsif ($lastseen eq 'merges' && s/^\s{2}//) {
182 my $id = $_;
183 push (@{$ps{merges}}, $id);
184
185 # aggressive branch finding:
186 if ($opt_D) {
187 my $branch = extract_versionname($id);
188 my $repo = extract_reponame($branch);
189
190 if (archive_reachable($repo) &&
191 !defined $arch_branches{$branch}) {
192 $arch_branches{$branch} = $stage + 1;
193 }
194 }
195 } else {
196 warn "more metadata after merges!?: $_\n" unless /^\s*$/;
197 }
198 }
199 }
200
201 if (%ps && !exists $psets{ $ps{id} }) {
202 my %temp = %ps; # break references
203 if (@psets && $psets[$#psets]{branch} eq $ps{branch}) {
204 $temp{parent_id} = $psets[$#psets]{id};
205 }
206 push (@psets, \%temp);
207 $psets{ $temp{id} } = \%temp;
208 }
209
210 close ABROWSE or die "$TLA abrowse failed on $limit\n";
211 }
212 } # end foreach $root
213
214 do_abrowse(1);
215 my $depth = 2;
216 $opt_D ||= 0;
217 while ($depth <= $opt_D) {
218 do_abrowse($depth);
219 $depth++;
220 }
221
222 ## Order patches by time
223 # FIXME see if we can find a more optimal way to do this by graphing
224 # the ancestry data and walking it, that way we won't have to rely on
225 # client-supplied dates
226 @psets = sort {$a->{date}.$b->{id} cmp $b->{date}.$b->{id}} @psets;
227
228 #print Dumper \@psets;
229
230 ##
231 ## TODO cleanup irrelevant patches
232 ## and put an initial import
233 ## or a full tag
234 my $import = 0;
235 unless (-d $git_dir) { # initial import
236 if ($psets[0]{type} eq 'i' || $psets[0]{type} eq 't') {
237 print "Starting import from $psets[0]{id}\n";
238 `git-init`;
239 die $! if $?;
240 $import = 1;
241 } else {
242 die "Need to start from an import or a tag -- cannot use $psets[0]{id}";
243 }
244 } else { # progressing an import
245 # load the rptags
246 opendir(DIR, $ptag_dir)
247 || die "can't opendir: $!";
248 while (my $file = readdir(DIR)) {
249 # skip non-interesting-files
250 next unless -f "$ptag_dir/$file";
251
252 # convert first '--' to '/' from old git-archimport to use
253 # as an archivename/c--b--v private tag
254 if ($file !~ m!,!) {
255 my $oldfile = $file;
256 $file =~ s!--!,!;
257 print STDERR "converting old tag $oldfile to $file\n";
258 rename("$ptag_dir/$oldfile", "$ptag_dir/$file") or die $!;
259 }
260 my $sha = ptag($file);
261 chomp $sha;
262 $rptags{$sha} = $file;
263 }
264 closedir DIR;
265 }
266
267 # process patchsets
268 # extract the Arch repository name (Arch "archive" in Arch-speak)
269 sub extract_reponame {
270 my $fq_cvbr = shift; # archivename/[[[[category]branch]version]revision]
271 return (split(/\//, $fq_cvbr))[0];
272 }
273
274 sub extract_versionname {
275 my $name = shift;
276 $name =~ s/--(?:patch|version(?:fix)?|base)-\d+$//;
277 return $name;
278 }
279
280 # convert a fully-qualified revision or version to a unique dirname:
281 # normalperson@yhbt.net-05/mpd--uclinux--1--patch-2
282 # becomes: normalperson@yhbt.net-05,mpd--uclinux--1
283 #
284 # the git notion of a branch is closer to
285 # archive/category--branch--version than archive/category--branch, so we
286 # use this to convert to git branch names.
287 # Also, keep archive names but replace '/' with ',' since it won't require
288 # subdirectories, and is safer than swapping '--' which could confuse
289 # reverse-mapping when dealing with bastard branches that
290 # are just archive/category--version (no --branch)
291 sub tree_dirname {
292 my $revision = shift;
293 my $name = extract_versionname($revision);
294 $name =~ s#/#,#;
295 return $name;
296 }
297
298 # old versions of git-archimport just use the <category--branch> part:
299 sub old_style_branchname {
300 my $id = shift;
301 my $ret = safe_pipe_capture($TLA,'parse-package-name','-p',$id);
302 chomp $ret;
303 return $ret;
304 }
305
306 *git_branchname = $opt_o ? *old_style_branchname : *tree_dirname;
307
308 sub process_patchset_accurate {
309 my $ps = shift;
310
311 # switch to that branch if we're not already in that branch:
312 if (-e "$git_dir/refs/heads/$ps->{branch}") {
313 system('git-checkout','-f',$ps->{branch}) == 0 or die "$! $?\n";
314
315 # remove any old stuff that got leftover:
316 my $rm = safe_pipe_capture('git-ls-files','--others','-z');
317 rmtree(split(/\0/,$rm)) if $rm;
318 }
319
320 # Apply the import/changeset/merge into the working tree
321 my $dir = sync_to_ps($ps);
322 # read the new log entry:
323 my @commitlog = safe_pipe_capture($TLA,'cat-log','-d',$dir,$ps->{id});
324 die "Error in cat-log: $!" if $?;
325 chomp @commitlog;
326
327 # grab variables we want from the log, new fields get added to $ps:
328 # (author, date, email, summary, message body ...)
329 parselog($ps, \@commitlog);
330
331 if ($ps->{id} =~ /--base-0$/ && $ps->{id} ne $psets[0]{id}) {
332 # this should work when importing continuations
333 if ($ps->{tag} && (my $branchpoint = eval { ptag($ps->{tag}) })) {
334
335 # find where we are supposed to branch from
336 system('git-checkout','-f','-b',$ps->{branch},
337 $branchpoint) == 0 or die "$! $?\n";
338
339 # remove any old stuff that got leftover:
340 my $rm = safe_pipe_capture('git-ls-files','--others','-z');
341 rmtree(split(/\0/,$rm)) if $rm;
342
343 # If we trust Arch with the fact that this is just
344 # a tag, and it does not affect the state of the tree
345 # then we just tag and move on
346 tag($ps->{id}, $branchpoint);
347 ptag($ps->{id}, $branchpoint);
348 print " * Tagged $ps->{id} at $branchpoint\n";
349 return 0;
350 } else {
351 warn "Tagging from unknown id unsupported\n" if $ps->{tag};
352 }
353 # allow multiple bases/imports here since Arch supports cherry-picks
354 # from unrelated trees
355 }
356
357 # update the index with all the changes we got
358 system('git-diff-files --name-only -z | '.
359 'git-update-index --remove -z --stdin') == 0 or die "$! $?\n";
360 system('git-ls-files --others -z | '.
361 'git-update-index --add -z --stdin') == 0 or die "$! $?\n";
362 return 1;
363 }
364
365 # the native changeset processing strategy. This is very fast, but
366 # does not handle permissions or any renames involving directories
367 sub process_patchset_fast {
368 my $ps = shift;
369 #
370 # create the branch if needed
371 #
372 if ($ps->{type} eq 'i' && !$import) {
373 die "Should not have more than one 'Initial import' per GIT import: $ps->{id}";
374 }
375
376 unless ($import) { # skip for import
377 if ( -e "$git_dir/refs/heads/$ps->{branch}") {
378 # we know about this branch
379 system('git-checkout',$ps->{branch});
380 } else {
381 # new branch! we need to verify a few things
382 die "Branch on a non-tag!" unless $ps->{type} eq 't';
383 my $branchpoint = ptag($ps->{tag});
384 die "Tagging from unknown id unsupported: $ps->{tag}"
385 unless $branchpoint;
386
387 # find where we are supposed to branch from
388 system('git-checkout','-b',$ps->{branch},$branchpoint);
389
390 # If we trust Arch with the fact that this is just
391 # a tag, and it does not affect the state of the tree
392 # then we just tag and move on
393 tag($ps->{id}, $branchpoint);
394 ptag($ps->{id}, $branchpoint);
395 print " * Tagged $ps->{id} at $branchpoint\n";
396 return 0;
397 }
398 die $! if $?;
399 }
400
401 #
402 # Apply the import/changeset/merge into the working tree
403 #
404 if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
405 apply_import($ps) or die $!;
406 $stats{import_or_tag}++;
407 $import=0;
408 } elsif ($ps->{type} eq 's') {
409 apply_cset($ps);
410 $stats{simple_changeset}++;
411 }
412
413 #
414 # prepare update git's index, based on what arch knows
415 # about the pset, resolve parents, etc
416 #
417
418 my @commitlog = safe_pipe_capture($TLA,'cat-archive-log',$ps->{id});
419 die "Error in cat-archive-log: $!" if $?;
420
421 parselog($ps,\@commitlog);
422
423 # imports don't give us good info
424 # on added files. Shame on them
425 if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
426 system('git-ls-files --deleted -z | '.
427 'git-update-index --remove -z --stdin') == 0 or die "$! $?\n";
428 system('git-ls-files --others -z | '.
429 'git-update-index --add -z --stdin') == 0 or die "$! $?\n";
430 }
431
432 # TODO: handle removed_directories and renamed_directories:
433
434 if (my $del = $ps->{removed_files}) {
435 unlink @$del;
436 while (@$del) {
437 my @slice = splice(@$del, 0, 100);
438 system('git-update-index','--remove','--',@slice) == 0 or
439 die "Error in git-update-index --remove: $! $?\n";
440 }
441 }
442
443 if (my $ren = $ps->{renamed_files}) { # renamed
444 if (@$ren % 2) {
445 die "Odd number of entries in rename!?";
446 }
447
448 while (@$ren) {
449 my $from = shift @$ren;
450 my $to = shift @$ren;
451
452 unless (-d dirname($to)) {
453 mkpath(dirname($to)); # will die on err
454 }
455 # print "moving $from $to";
456 rename($from, $to) or die "Error renaming '$from' '$to': $!\n";
457 system('git-update-index','--remove','--',$from) == 0 or
458 die "Error in git-update-index --remove: $! $?\n";
459 system('git-update-index','--add','--',$to) == 0 or
460 die "Error in git-update-index --add: $! $?\n";
461 }
462 }
463
464 if (my $add = $ps->{new_files}) {
465 while (@$add) {
466 my @slice = splice(@$add, 0, 100);
467 system('git-update-index','--add','--',@slice) == 0 or
468 die "Error in git-update-index --add: $! $?\n";
469 }
470 }
471
472 if (my $mod = $ps->{modified_files}) {
473 while (@$mod) {
474 my @slice = splice(@$mod, 0, 100);
475 system('git-update-index','--',@slice) == 0 or
476 die "Error in git-update-index: $! $?\n";
477 }
478 }
479 return 1; # we successfully applied the changeset
480 }
481
482 if ($opt_f) {
483 print "Will import patchsets using the fast strategy\n",
484 "Renamed directories and permission changes will be missed\n";
485 *process_patchset = *process_patchset_fast;
486 } else {
487 print "Using the default (accurate) import strategy.\n",
488 "Things may be a bit slow\n";
489 *process_patchset = *process_patchset_accurate;
490 }
491
492 foreach my $ps (@psets) {
493 # process patchsets
494 $ps->{branch} = git_branchname($ps->{id});
495
496 #
497 # ensure we have a clean state
498 #
499 if (my $dirty = `git-diff-files`) {
500 die "Unclean tree when about to process $ps->{id} " .
501 " - did we fail to commit cleanly before?\n$dirty";
502 }
503 die $! if $?;
504
505 #
506 # skip commits already in repo
507 #
508 if (ptag($ps->{id})) {
509 $opt_v && print " * Skipping already imported: $ps->{id}\n";
510 next;
511 }
512
513 print " * Starting to work on $ps->{id}\n";
514
515 process_patchset($ps) or next;
516
517 # warn "errors when running git-update-index! $!";
518 my $tree = `git-write-tree`;
519 die "cannot write tree $!" if $?;
520 chomp $tree;
521
522 #
523 # Who's your daddy?
524 #
525 my @par;
526 if ( -e "$git_dir/refs/heads/$ps->{branch}") {
527 if (open HEAD, "<","$git_dir/refs/heads/$ps->{branch}") {
528 my $p = <HEAD>;
529 close HEAD;
530 chomp $p;
531 push @par, '-p', $p;
532 } else {
533 if ($ps->{type} eq 's') {
534 warn "Could not find the right head for the branch $ps->{branch}";
535 }
536 }
537 }
538
539 if ($ps->{merges}) {
540 push @par, find_parents($ps);
541 }
542
543 #
544 # Commit, tag and clean state
545 #
546 $ENV{TZ} = 'GMT';
547 $ENV{GIT_AUTHOR_NAME} = $ps->{author};
548 $ENV{GIT_AUTHOR_EMAIL} = $ps->{email};
549 $ENV{GIT_AUTHOR_DATE} = $ps->{date};
550 $ENV{GIT_COMMITTER_NAME} = $ps->{author};
551 $ENV{GIT_COMMITTER_EMAIL} = $ps->{email};
552 $ENV{GIT_COMMITTER_DATE} = $ps->{date};
553
554 my $pid = open2(*READER, *WRITER,'git-commit-tree',$tree,@par)
555 or die $!;
556 print WRITER $ps->{summary},"\n";
557 print WRITER $ps->{message},"\n";
558
559 # make it easy to backtrack and figure out which Arch revision this was:
560 print WRITER 'git-archimport-id: ',$ps->{id},"\n";
561
562 close WRITER;
563 my $commitid = <READER>; # read
564 chomp $commitid;
565 close READER;
566 waitpid $pid,0; # close;
567
568 if (length $commitid != 40) {
569 die "Something went wrong with the commit! $! $commitid";
570 }
571 #
572 # Update the branch
573 #
574 open HEAD, ">","$git_dir/refs/heads/$ps->{branch}";
575 print HEAD $commitid;
576 close HEAD;
577 system('git-update-ref', 'HEAD', "$ps->{branch}");
578
579 # tag accordingly
580 ptag($ps->{id}, $commitid); # private tag
581 if ($opt_T || $ps->{type} eq 't' || $ps->{type} eq 'i') {
582 tag($ps->{id}, $commitid);
583 }
584 print " * Committed $ps->{id}\n";
585 print " + tree $tree\n";
586 print " + commit $commitid\n";
587 $opt_v && print " + commit date is $ps->{date} \n";
588 $opt_v && print " + parents: ",join(' ',@par),"\n";
589 }
590
591 if ($opt_v) {
592 foreach (sort keys %stats) {
593 print" $_: $stats{$_}\n";
594 }
595 }
596 exit 0;
597
598 # used by the accurate strategy:
599 sub sync_to_ps {
600 my $ps = shift;
601 my $tree_dir = $tmp.'/'.tree_dirname($ps->{id});
602
603 $opt_v && print "sync_to_ps($ps->{id}) method: ";
604
605 if (-d $tree_dir) {
606 if ($ps->{type} eq 't') {
607 $opt_v && print "get (tag)\n";
608 # looks like a tag-only or (worse,) a mixed tags/changeset branch,
609 # can't rely on replay to work correctly on these
610 rmtree($tree_dir);
611 safe_pipe_capture($TLA,'get','--no-pristine',$ps->{id},$tree_dir);
612 $stats{get_tag}++;
613 } else {
614 my $tree_id = arch_tree_id($tree_dir);
615 if ($ps->{parent_id} && ($ps->{parent_id} eq $tree_id)) {
616 # the common case (hopefully)
617 $opt_v && print "replay\n";
618 safe_pipe_capture($TLA,'replay','-d',$tree_dir,$ps->{id});
619 $stats{replay}++;
620 } else {
621 # getting one tree is usually faster than getting two trees
622 # and applying the delta ...
623 rmtree($tree_dir);
624 $opt_v && print "apply-delta\n";
625 safe_pipe_capture($TLA,'get','--no-pristine',
626 $ps->{id},$tree_dir);
627 $stats{get_delta}++;
628 }
629 }
630 } else {
631 # new branch work
632 $opt_v && print "get (new tree)\n";
633 safe_pipe_capture($TLA,'get','--no-pristine',$ps->{id},$tree_dir);
634 $stats{get_new}++;
635 }
636
637 # added -I flag to rsync since we're going to fast! AIEEEEE!!!!
638 system('rsync','-aI','--delete','--exclude',$git_dir,
639 # '--exclude','.arch-inventory',
640 '--exclude','.arch-ids','--exclude','{arch}',
641 '--exclude','+*','--exclude',',*',
642 "$tree_dir/",'./') == 0 or die "Cannot rsync $tree_dir: $! $?";
643 return $tree_dir;
644 }
645
646 sub apply_import {
647 my $ps = shift;
648 my $bname = git_branchname($ps->{id});
649
650 mkpath($tmp);
651
652 safe_pipe_capture($TLA,'get','-s','--no-pristine',$ps->{id},"$tmp/import");
653 die "Cannot get import: $!" if $?;
654 system('rsync','-aI','--delete', '--exclude',$git_dir,
655 '--exclude','.arch-ids','--exclude','{arch}',
656 "$tmp/import/", './');
657 die "Cannot rsync import:$!" if $?;
658
659 rmtree("$tmp/import");
660 die "Cannot remove tempdir: $!" if $?;
661
662
663 return 1;
664 }
665
666 sub apply_cset {
667 my $ps = shift;
668
669 mkpath($tmp);
670
671 # get the changeset
672 safe_pipe_capture($TLA,'get-changeset',$ps->{id},"$tmp/changeset");
673 die "Cannot get changeset: $!" if $?;
674
675 # apply patches
676 if (`find $tmp/changeset/patches -type f -name '*.patch'`) {
677 # this can be sped up considerably by doing
678 # (find | xargs cat) | patch
679 # but that can get mucked up by patches
680 # with missing trailing newlines or the standard
681 # 'missing newline' flag in the patch - possibly
682 # produced with an old/buggy diff.
683 # slow and safe, we invoke patch once per patchfile
684 `find $tmp/changeset/patches -type f -name '*.patch' -print0 | grep -zv '{arch}' | xargs -iFILE -0 --no-run-if-empty patch -p1 --forward -iFILE`;
685 die "Problem applying patches! $!" if $?;
686 }
687
688 # apply changed binary files
689 if (my @modified = `find $tmp/changeset/patches -type f -name '*.modified'`) {
690 foreach my $mod (@modified) {
691 chomp $mod;
692 my $orig = $mod;
693 $orig =~ s/\.modified$//; # lazy
694 $orig =~ s!^\Q$tmp\E/changeset/patches/!!;
695 #print "rsync -p '$mod' '$orig'";
696 system('rsync','-p',$mod,"./$orig");
697 die "Problem applying binary changes! $!" if $?;
698 }
699 }
700
701 # bring in new files
702 system('rsync','-aI','--exclude',$git_dir,
703 '--exclude','.arch-ids',
704 '--exclude', '{arch}',
705 "$tmp/changeset/new-files-archive/",'./');
706
707 # deleted files are hinted from the commitlog processing
708
709 rmtree("$tmp/changeset");
710 }
711
712
713 # =for reference
714 # notes: *-files/-directories keys cannot have spaces, they're always
715 # pika-escaped. Everything after the first newline
716 # A log entry looks like:
717 # Revision: moodle-org--moodle--1.3.3--patch-15
718 # Archive: arch-eduforge@catalyst.net.nz--2004
719 # Creator: Penny Leach <penny@catalyst.net.nz>
720 # Date: Wed May 25 14:15:34 NZST 2005
721 # Standard-date: 2005-05-25 02:15:34 GMT
722 # New-files: lang/de/.arch-ids/block_glossary_random.php.id
723 # lang/de/.arch-ids/block_html.php.id
724 # New-directories: lang/de/help/questionnaire
725 # lang/de/help/questionnaire/.arch-ids
726 # Renamed-files: .arch-ids/db_sears.sql.id db/.arch-ids/db_sears.sql.id
727 # db_sears.sql db/db_sears.sql
728 # Removed-files: lang/be/docs/.arch-ids/release.html.id
729 # lang/be/docs/.arch-ids/releaseold.html.id
730 # Modified-files: admin/cron.php admin/delete.php
731 # admin/editor.html backup/lib.php backup/restore.php
732 # New-patches: arch-eduforge@catalyst.net.nz--2004/moodle-org--moodle--1.3.3--patch-15
733 # Summary: Updating to latest from MOODLE_14_STABLE (1.4.5+)
734 # summary can be multiline with a leading space just like the above fields
735 # Keywords:
736 #
737 # Updating yadda tadda tadda madda
738 sub parselog {
739 my ($ps, $log) = @_;
740 my $key = undef;
741
742 # headers we want that contain filenames:
743 my %want_headers = (
744 new_files => 1,
745 modified_files => 1,
746 renamed_files => 1,
747 renamed_directories => 1,
748 removed_files => 1,
749 removed_directories => 1,
750 );
751
752 chomp (@$log);
753 while ($_ = shift @$log) {
754 if (/^Continuation-of:\s*(.*)/) {
755 $ps->{tag} = $1;
756 $key = undef;
757 } elsif (/^Summary:\s*(.*)$/ ) {
758 # summary can be multiline as long as it has a leading space
759 $ps->{summary} = [ $1 ];
760 $key = 'summary';
761 } elsif (/^Creator: (.*)\s*<([^\>]+)>/) {
762 $ps->{author} = $1;
763 $ps->{email} = $2;
764 $key = undef;
765 # any *-files or *-directories can be read here:
766 } elsif (/^([A-Z][a-z\-]+):\s*(.*)$/) {
767 my $val = $2;
768 $key = lc $1;
769 $key =~ tr/-/_/; # too lazy to quote :P
770 if ($want_headers{$key}) {
771 push @{$ps->{$key}}, split(/\s+/, $val);
772 } else {
773 $key = undef;
774 }
775 } elsif (/^$/) {
776 last; # remainder of @$log that didn't get shifted off is message
777 } elsif ($key) {
778 if (/^\s+(.*)$/) {
779 if ($key eq 'summary') {
780 push @{$ps->{$key}}, $1;
781 } else { # files/directories:
782 push @{$ps->{$key}}, split(/\s+/, $1);
783 }
784 } else {
785 $key = undef;
786 }
787 }
788 }
789
790 # post-processing:
791 $ps->{summary} = join("\n",@{$ps->{summary}})."\n";
792 $ps->{message} = join("\n",@$log);
793
794 # skip Arch control files, unescape pika-escaped files
795 foreach my $k (keys %want_headers) {
796 next unless (defined $ps->{$k});
797 my @tmp = ();
798 foreach my $t (@{$ps->{$k}}) {
799 next unless length ($t);
800 next if $t =~ m!\{arch\}/!;
801 next if $t =~ m!\.arch-ids/!;
802 # should we skip this?
803 next if $t =~ m!\.arch-inventory$!;
804 # tla cat-archive-log will give us filenames with spaces as file\(sp)name - why?
805 # we can assume that any filename with \ indicates some pika escaping that we want to get rid of.
806 if ($t =~ /\\/ ){
807 $t = (safe_pipe_capture($TLA,'escape','--unescaped',$t))[0];
808 }
809 push @tmp, $t;
810 }
811 $ps->{$k} = \@tmp;
812 }
813 }
814
815 # write/read a tag
816 sub tag {
817 my ($tag, $commit) = @_;
818
819 if ($opt_o) {
820 $tag =~ s|/|--|g;
821 } else {
822 # don't use subdirs for tags yet, it could screw up other porcelains
823 $tag =~ s|/|,|g;
824 }
825
826 if ($commit) {
827 open(C,">","$git_dir/refs/tags/$tag")
828 or die "Cannot create tag $tag: $!\n";
829 print C "$commit\n"
830 or die "Cannot write tag $tag: $!\n";
831 close(C)
832 or die "Cannot write tag $tag: $!\n";
833 print " * Created tag '$tag' on '$commit'\n" if $opt_v;
834 } else { # read
835 open(C,"<","$git_dir/refs/tags/$tag")
836 or die "Cannot read tag $tag: $!\n";
837 $commit = <C>;
838 chomp $commit;
839 die "Error reading tag $tag: $!\n" unless length $commit == 40;
840 close(C)
841 or die "Cannot read tag $tag: $!\n";
842 return $commit;
843 }
844 }
845
846 # write/read a private tag
847 # reads fail softly if the tag isn't there
848 sub ptag {
849 my ($tag, $commit) = @_;
850
851 # don't use subdirs for tags yet, it could screw up other porcelains
852 $tag =~ s|/|,|g;
853
854 my $tag_file = "$ptag_dir/$tag";
855 my $tag_branch_dir = dirname($tag_file);
856 mkpath($tag_branch_dir) unless (-d $tag_branch_dir);
857
858 if ($commit) { # write
859 open(C,">",$tag_file)
860 or die "Cannot create tag $tag: $!\n";
861 print C "$commit\n"
862 or die "Cannot write tag $tag: $!\n";
863 close(C)
864 or die "Cannot write tag $tag: $!\n";
865 $rptags{$commit} = $tag
866 unless $tag =~ m/--base-0$/;
867 } else { # read
868 # if the tag isn't there, return 0
869 unless ( -s $tag_file) {
870 return 0;
871 }
872 open(C,"<",$tag_file)
873 or die "Cannot read tag $tag: $!\n";
874 $commit = <C>;
875 chomp $commit;
876 die "Error reading tag $tag: $!\n" unless length $commit == 40;
877 close(C)
878 or die "Cannot read tag $tag: $!\n";
879 unless (defined $rptags{$commit}) {
880 $rptags{$commit} = $tag;
881 }
882 return $commit;
883 }
884 }
885
886 sub find_parents {
887 #
888 # Identify what branches are merging into me
889 # and whether we are fully merged
890 # git-merge-base <headsha> <headsha> should tell
891 # me what the base of the merge should be
892 #
893 my $ps = shift;
894
895 my %branches; # holds an arrayref per branch
896 # the arrayref contains a list of
897 # merged patches between the base
898 # of the merge and the current head
899
900 my @parents; # parents found for this commit
901
902 # simple loop to split the merges
903 # per branch
904 foreach my $merge (@{$ps->{merges}}) {
905 my $branch = git_branchname($merge);
906 unless (defined $branches{$branch} ){
907 $branches{$branch} = [];
908 }
909 push @{$branches{$branch}}, $merge;
910 }
911
912 #
913 # foreach branch find a merge base and walk it to the
914 # head where we are, collecting the merged patchsets that
915 # Arch has recorded. Keep that in @have
916 # Compare that with the commits on the other branch
917 # between merge-base and the tip of the branch (@need)
918 # and see if we have a series of consecutive patches
919 # starting from the merge base. The tip of the series
920 # of consecutive patches merged is our new parent for
921 # that branch.
922 #
923 foreach my $branch (keys %branches) {
924
925 # check that we actually know about the branch
926 next unless -e "$git_dir/refs/heads/$branch";
927
928 my $mergebase = `git-merge-base $branch $ps->{branch}`;
929 if ($?) {
930 # Don't die here, Arch supports one-way cherry-picking
931 # between branches with no common base (or any relationship
932 # at all beforehand)
933 warn "Cannot find merge base for $branch and $ps->{branch}";
934 next;
935 }
936 chomp $mergebase;
937
938 # now walk up to the mergepoint collecting what patches we have
939 my $branchtip = git_rev_parse($ps->{branch});
940 my @ancestors = `git-rev-list --topo-order $branchtip ^$mergebase`;
941 my %have; # collected merges this branch has
942 foreach my $merge (@{$ps->{merges}}) {
943 $have{$merge} = 1;
944 }
945 my %ancestorshave;
946 foreach my $par (@ancestors) {
947 $par = commitid2pset($par);
948 if (defined $par->{merges}) {
949 foreach my $merge (@{$par->{merges}}) {
950 $ancestorshave{$merge}=1;
951 }
952 }
953 }
954 # print "++++ Merges in $ps->{id} are....\n";
955 # my @have = sort keys %have; print Dumper(\@have);
956
957 # merge what we have with what ancestors have
958 %have = (%have, %ancestorshave);
959
960 # see what the remote branch has - these are the merges we
961 # will want to have in a consecutive series from the mergebase
962 my $otherbranchtip = git_rev_parse($branch);
963 my @needraw = `git-rev-list --topo-order $otherbranchtip ^$mergebase`;
964 my @need;
965 foreach my $needps (@needraw) { # get the psets
966 $needps = commitid2pset($needps);
967 # git-rev-list will also
968 # list commits merged in via earlier
969 # merges. we are only interested in commits
970 # from the branch we're looking at
971 if ($branch eq $needps->{branch}) {
972 push @need, $needps->{id};
973 }
974 }
975
976 # print "++++ Merges from $branch we want are....\n";
977 # print Dumper(\@need);
978
979 my $newparent;
980 while (my $needed_commit = pop @need) {
981 if ($have{$needed_commit}) {
982 $newparent = $needed_commit;
983 } else {
984 last; # break out of the while
985 }
986 }
987 if ($newparent) {
988 push @parents, $newparent;
989 }
990
991
992 } # end foreach branch
993
994 # prune redundant parents
995 my %parents;
996 foreach my $p (@parents) {
997 $parents{$p} = 1;
998 }
999 foreach my $p (@parents) {
1000 next unless exists $psets{$p}{merges};
1001 next unless ref $psets{$p}{merges};
1002 my @merges = @{$psets{$p}{merges}};
1003 foreach my $merge (@merges) {
1004 if ($parents{$merge}) {
1005 delete $parents{$merge};
1006 }
1007 }
1008 }
1009
1010 @parents = ();
1011 foreach (keys %parents) {
1012 push @parents, '-p', ptag($_);
1013 }
1014 return @parents;
1015 }
1016
1017 sub git_rev_parse {
1018 my $name = shift;
1019 my $val = `git-rev-parse $name`;
1020 die "Error: git-rev-parse $name" if $?;
1021 chomp $val;
1022 return $val;
1023 }
1024
1025 # resolve a SHA1 to a known patchset
1026 sub commitid2pset {
1027 my $commitid = shift;
1028 chomp $commitid;
1029 my $name = $rptags{$commitid}
1030 || die "Cannot find reverse tag mapping for $commitid";
1031 $name =~ s|,|/|;
1032 my $ps = $psets{$name}
1033 || (print Dumper(sort keys %psets)) && die "Cannot find patchset for $name";
1034 return $ps;
1035 }
1036
1037
1038 # an alternative to `command` that allows input to be passed as an array
1039 # to work around shell problems with weird characters in arguments
1040 sub safe_pipe_capture {
1041 my @output;
1042 if (my $pid = open my $child, '-|') {
1043 @output = (<$child>);
1044 close $child or die join(' ',@_).": $! $?";
1045 } else {
1046 exec(@_) or die "$! $?"; # exec() can fail the executable can't be found
1047 }
1048 return wantarray ? @output : join('',@output);
1049 }
1050
1051 # `tla logs -rf -d <dir> | head -n1` or `baz tree-id <dir>`
1052 sub arch_tree_id {
1053 my $dir = shift;
1054 chomp( my $ret = (safe_pipe_capture($TLA,'logs','-rf','-d',$dir))[0] );
1055 return $ret;
1056 }
1057
1058 sub archive_reachable {
1059 my $archive = shift;
1060 return 1 if $reachable{$archive};
1061 return 0 if $unreachable{$archive};
1062
1063 if (system "$TLA whereis-archive $archive >/dev/null") {
1064 if ($opt_a && (system($TLA,'register-archive',
1065 "http://mirrors.sourcecontrol.net/$archive") == 0)) {
1066 $reachable{$archive} = 1;
1067 return 1;
1068 }
1069 print STDERR "Archive is unreachable: $archive\n";
1070 $unreachable{$archive} = 1;
1071 return 0;
1072 } else {
1073 $reachable{$archive} = 1;
1074 return 1;
1075 }
1076 }
1077