]> git.ipfire.org Git - thirdparty/git.git/blob - git-archimport.perl
Forbid pattern maching characters in refnames.
[thirdparty/git.git] / git-archimport.perl
1 #!/usr/bin/perl -w
2 #
3 # This tool is copyright (c) 2005, Martin Langhoff.
4 # It is released under the Gnu Public License, version 2.
5 #
6 # The basic idea is to walk the output of tla abrowse,
7 # fetch the changesets and apply them.
8 #
9
10 =head1 Invocation
11
12 git-archimport [ -h ] [ -v ] [ -T ] [ -t tempdir ] <archive>/<branch> [ <archive>/<branch> ]
13
14 Imports a project from one or more Arch repositories. It will follow branches
15 and repositories within the namespaces defined by the <archive/branch>
16 parameters suppplied. If it cannot find the remote branch a merge comes from
17 it will just import it as a regular commit. If it can find it, it will mark it
18 as a merge whenever possible.
19
20 See man (1) git-archimport for more details.
21
22 =head1 TODO
23
24 - create tag objects instead of ref tags
25 - audit shell-escaping of filenames
26 - hide our private tags somewhere smarter
27 - find a way to make "cat *patches | patch" safe even when patchfiles are missing newlines
28 - sort and apply patches by graphing ancestry relations instead of just
29 relying in dates supplied in the changeset itself.
30 tla ancestry-graph -m could be helpful here...
31
32 =head1 Devel tricks
33
34 Add print in front of the shell commands invoked via backticks.
35
36 =head1 Devel Notes
37
38 There are several places where Arch and git terminology are intermixed
39 and potentially confused.
40
41 The notion of a "branch" in git is approximately equivalent to
42 a "archive/category--branch--version" in Arch. Also, it should be noted
43 that the "--branch" portion of "archive/category--branch--version" is really
44 optional in Arch although not many people (nor tools!) seem to know this.
45 This means that "archive/category--version" is also a valid "branch"
46 in git terms.
47
48 We always refer to Arch names by their fully qualified variant (which
49 means the "archive" name is prefixed.
50
51 For people unfamiliar with Arch, an "archive" is the term for "repository",
52 and can contain multiple, unrelated branches.
53
54 =cut
55
56 use strict;
57 use warnings;
58 use Getopt::Std;
59 use File::Temp qw(tempdir);
60 use File::Path qw(mkpath rmtree);
61 use File::Basename qw(basename dirname);
62 use Data::Dumper qw/ Dumper /;
63 use IPC::Open2;
64
65 $SIG{'PIPE'}="IGNORE";
66 $ENV{'TZ'}="UTC";
67
68 my $git_dir = $ENV{"GIT_DIR"} || ".git";
69 $ENV{"GIT_DIR"} = $git_dir;
70 my $ptag_dir = "$git_dir/archimport/tags";
71
72 our($opt_h,$opt_f,$opt_v,$opt_T,$opt_t,$opt_D,$opt_a,$opt_o);
73
74 sub usage() {
75 print STDERR <<END;
76 Usage: ${\basename $0} # fetch/update GIT from Arch
77 [ -f ] [ -o ] [ -h ] [ -v ] [ -T ] [ -a ] [ -D depth ] [ -t tempdir ]
78 repository/arch-branch [ repository/arch-branch] ...
79 END
80 exit(1);
81 }
82
83 getopts("fThvat:D:") or usage();
84 usage if $opt_h;
85
86 @ARGV >= 1 or usage();
87 # $arch_branches:
88 # values associated with keys:
89 # =1 - Arch version / git 'branch' detected via abrowse on a limit
90 # >1 - Arch version / git 'branch' of an auxilliary branch we've merged
91 my %arch_branches = map { $_ => 1 } @ARGV;
92
93 $ENV{'TMPDIR'} = $opt_t if $opt_t; # $ENV{TMPDIR} will affect tempdir() calls:
94 my $tmp = tempdir('git-archimport-XXXXXX', TMPDIR => 1, CLEANUP => 1);
95 $opt_v && print "+ Using $tmp as temporary directory\n";
96
97 my %reachable = (); # Arch repositories we can access
98 my %unreachable = (); # Arch repositories we can't access :<
99 my @psets = (); # the collection
100 my %psets = (); # the collection, by name
101 my %stats = ( # Track which strategy we used to import:
102 get_tag => 0, replay => 0, get_new => 0, get_delta => 0,
103 simple_changeset => 0, import_or_tag => 0
104 );
105
106 my %rptags = (); # my reverse private tags
107 # to map a SHA1 to a commitid
108 my $TLA = $ENV{'ARCH_CLIENT'} || 'tla';
109
110 sub do_abrowse {
111 my $stage = shift;
112 while (my ($limit, $level) = each %arch_branches) {
113 next unless $level == $stage;
114
115 open ABROWSE, "$TLA abrowse -fkD --merges $limit |"
116 or die "Problems with tla abrowse: $!";
117
118 my %ps = (); # the current one
119 my $lastseen = '';
120
121 while (<ABROWSE>) {
122 chomp;
123
124 # first record padded w 8 spaces
125 if (s/^\s{8}\b//) {
126 my ($id, $type) = split(m/\s+/, $_, 2);
127
128 my %last_ps;
129 # store the record we just captured
130 if (%ps && !exists $psets{ $ps{id} }) {
131 %last_ps = %ps; # break references
132 push (@psets, \%last_ps);
133 $psets{ $last_ps{id} } = \%last_ps;
134 }
135
136 my $branch = extract_versionname($id);
137 %ps = ( id => $id, branch => $branch );
138 if (%last_ps && ($last_ps{branch} eq $branch)) {
139 $ps{parent_id} = $last_ps{id};
140 }
141
142 $arch_branches{$branch} = 1;
143 $lastseen = 'id';
144
145 # deal with types (should work with baz or tla):
146 if ($type =~ m/\(.*changeset\)/) {
147 $ps{type} = 's';
148 } elsif ($type =~ /\(.*import\)/) {
149 $ps{type} = 'i';
150 } elsif ($type =~ m/\(tag.*?(\S+\@\S+).*?\)/) {
151 $ps{type} = 't';
152 # read which revision we've tagged when we parse the log
153 $ps{tag} = $1;
154 } else {
155 warn "Unknown type $type";
156 }
157
158 $arch_branches{$branch} = 1;
159 $lastseen = 'id';
160 } elsif (s/^\s{10}//) {
161 # 10 leading spaces or more
162 # indicate commit metadata
163
164 # date
165 if ($lastseen eq 'id' && m/^(\d{4}-\d\d-\d\d \d\d:\d\d:\d\d)/){
166 $ps{date} = $1;
167 $lastseen = 'date';
168 } elsif ($_ eq 'merges in:') {
169 $ps{merges} = [];
170 $lastseen = 'merges';
171 } elsif ($lastseen eq 'merges' && s/^\s{2}//) {
172 my $id = $_;
173 push (@{$ps{merges}}, $id);
174
175 # aggressive branch finding:
176 if ($opt_D) {
177 my $branch = extract_versionname($id);
178 my $repo = extract_reponame($branch);
179
180 if (archive_reachable($repo) &&
181 !defined $arch_branches{$branch}) {
182 $arch_branches{$branch} = $stage + 1;
183 }
184 }
185 } else {
186 warn "more metadata after merges!?: $_\n" unless /^\s*$/;
187 }
188 }
189 }
190
191 if (%ps && !exists $psets{ $ps{id} }) {
192 my %temp = %ps; # break references
193 if (@psets && $psets[$#psets]{branch} eq $ps{branch}) {
194 $temp{parent_id} = $psets[$#psets]{id};
195 }
196 push (@psets, \%temp);
197 $psets{ $temp{id} } = \%temp;
198 }
199
200 close ABROWSE or die "$TLA abrowse failed on $limit\n";
201 }
202 } # end foreach $root
203
204 do_abrowse(1);
205 my $depth = 2;
206 $opt_D ||= 0;
207 while ($depth <= $opt_D) {
208 do_abrowse($depth);
209 $depth++;
210 }
211
212 ## Order patches by time
213 # FIXME see if we can find a more optimal way to do this by graphing
214 # the ancestry data and walking it, that way we won't have to rely on
215 # client-supplied dates
216 @psets = sort {$a->{date}.$b->{id} cmp $b->{date}.$b->{id}} @psets;
217
218 #print Dumper \@psets;
219
220 ##
221 ## TODO cleanup irrelevant patches
222 ## and put an initial import
223 ## or a full tag
224 my $import = 0;
225 unless (-d $git_dir) { # initial import
226 if ($psets[0]{type} eq 'i' || $psets[0]{type} eq 't') {
227 print "Starting import from $psets[0]{id}\n";
228 `git-init-db`;
229 die $! if $?;
230 $import = 1;
231 } else {
232 die "Need to start from an import or a tag -- cannot use $psets[0]{id}";
233 }
234 } else { # progressing an import
235 # load the rptags
236 opendir(DIR, $ptag_dir)
237 || die "can't opendir: $!";
238 while (my $file = readdir(DIR)) {
239 # skip non-interesting-files
240 next unless -f "$ptag_dir/$file";
241
242 # convert first '--' to '/' from old git-archimport to use
243 # as an archivename/c--b--v private tag
244 if ($file !~ m!,!) {
245 my $oldfile = $file;
246 $file =~ s!--!,!;
247 print STDERR "converting old tag $oldfile to $file\n";
248 rename("$ptag_dir/$oldfile", "$ptag_dir/$file") or die $!;
249 }
250 my $sha = ptag($file);
251 chomp $sha;
252 $rptags{$sha} = $file;
253 }
254 closedir DIR;
255 }
256
257 # process patchsets
258 # extract the Arch repository name (Arch "archive" in Arch-speak)
259 sub extract_reponame {
260 my $fq_cvbr = shift; # archivename/[[[[category]branch]version]revision]
261 return (split(/\//, $fq_cvbr))[0];
262 }
263
264 sub extract_versionname {
265 my $name = shift;
266 $name =~ s/--(?:patch|version(?:fix)?|base)-\d+$//;
267 return $name;
268 }
269
270 # convert a fully-qualified revision or version to a unique dirname:
271 # normalperson@yhbt.net-05/mpd--uclinux--1--patch-2
272 # becomes: normalperson@yhbt.net-05,mpd--uclinux--1
273 #
274 # the git notion of a branch is closer to
275 # archive/category--branch--version than archive/category--branch, so we
276 # use this to convert to git branch names.
277 # Also, keep archive names but replace '/' with ',' since it won't require
278 # subdirectories, and is safer than swapping '--' which could confuse
279 # reverse-mapping when dealing with bastard branches that
280 # are just archive/category--version (no --branch)
281 sub tree_dirname {
282 my $revision = shift;
283 my $name = extract_versionname($revision);
284 $name =~ s#/#,#;
285 return $name;
286 }
287
288 # old versions of git-archimport just use the <category--branch> part:
289 sub old_style_branchname {
290 my $id = shift;
291 my $ret = safe_pipe_capture($TLA,'parse-package-name','-p',$id);
292 chomp $ret;
293 return $ret;
294 }
295
296 *git_branchname = $opt_o ? *old_style_branchname : *tree_dirname;
297
298 sub process_patchset_accurate {
299 my $ps = shift;
300
301 # switch to that branch if we're not already in that branch:
302 if (-e "$git_dir/refs/heads/$ps->{branch}") {
303 system('git-checkout','-f',$ps->{branch}) == 0 or die "$! $?\n";
304
305 # remove any old stuff that got leftover:
306 my $rm = safe_pipe_capture('git-ls-files','--others','-z');
307 rmtree(split(/\0/,$rm)) if $rm;
308 }
309
310 # Apply the import/changeset/merge into the working tree
311 my $dir = sync_to_ps($ps);
312 # read the new log entry:
313 my @commitlog = safe_pipe_capture($TLA,'cat-log','-d',$dir,$ps->{id});
314 die "Error in cat-log: $!" if $?;
315 chomp @commitlog;
316
317 # grab variables we want from the log, new fields get added to $ps:
318 # (author, date, email, summary, message body ...)
319 parselog($ps, \@commitlog);
320
321 if ($ps->{id} =~ /--base-0$/ && $ps->{id} ne $psets[0]{id}) {
322 # this should work when importing continuations
323 if ($ps->{tag} && (my $branchpoint = eval { ptag($ps->{tag}) })) {
324
325 # find where we are supposed to branch from
326 system('git-checkout','-f','-b',$ps->{branch},
327 $branchpoint) == 0 or die "$! $?\n";
328
329 # remove any old stuff that got leftover:
330 my $rm = safe_pipe_capture('git-ls-files','--others','-z');
331 rmtree(split(/\0/,$rm)) if $rm;
332
333 # If we trust Arch with the fact that this is just
334 # a tag, and it does not affect the state of the tree
335 # then we just tag and move on
336 tag($ps->{id}, $branchpoint);
337 ptag($ps->{id}, $branchpoint);
338 print " * Tagged $ps->{id} at $branchpoint\n";
339 return 0;
340 } else {
341 warn "Tagging from unknown id unsupported\n" if $ps->{tag};
342 }
343 # allow multiple bases/imports here since Arch supports cherry-picks
344 # from unrelated trees
345 }
346
347 # update the index with all the changes we got
348 system('git-ls-files --others -z | '.
349 'git-update-index --add -z --stdin') == 0 or die "$! $?\n";
350 system('git-ls-files --deleted -z | '.
351 'git-update-index --remove -z --stdin') == 0 or die "$! $?\n";
352 system('git-ls-files -z | '.
353 'git-update-index -z --stdin') == 0 or die "$! $?\n";
354 return 1;
355 }
356
357 # the native changeset processing strategy. This is very fast, but
358 # does not handle permissions or any renames involving directories
359 sub process_patchset_fast {
360 my $ps = shift;
361 #
362 # create the branch if needed
363 #
364 if ($ps->{type} eq 'i' && !$import) {
365 die "Should not have more than one 'Initial import' per GIT import: $ps->{id}";
366 }
367
368 unless ($import) { # skip for import
369 if ( -e "$git_dir/refs/heads/$ps->{branch}") {
370 # we know about this branch
371 system('git-checkout',$ps->{branch});
372 } else {
373 # new branch! we need to verify a few things
374 die "Branch on a non-tag!" unless $ps->{type} eq 't';
375 my $branchpoint = ptag($ps->{tag});
376 die "Tagging from unknown id unsupported: $ps->{tag}"
377 unless $branchpoint;
378
379 # find where we are supposed to branch from
380 system('git-checkout','-b',$ps->{branch},$branchpoint);
381
382 # If we trust Arch with the fact that this is just
383 # a tag, and it does not affect the state of the tree
384 # then we just tag and move on
385 tag($ps->{id}, $branchpoint);
386 ptag($ps->{id}, $branchpoint);
387 print " * Tagged $ps->{id} at $branchpoint\n";
388 return 0;
389 }
390 die $! if $?;
391 }
392
393 #
394 # Apply the import/changeset/merge into the working tree
395 #
396 if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
397 apply_import($ps) or die $!;
398 $stats{import_or_tag}++;
399 $import=0;
400 } elsif ($ps->{type} eq 's') {
401 apply_cset($ps);
402 $stats{simple_changeset}++;
403 }
404
405 #
406 # prepare update git's index, based on what arch knows
407 # about the pset, resolve parents, etc
408 #
409
410 my @commitlog = safe_pipe_capture($TLA,'cat-archive-log',$ps->{id});
411 die "Error in cat-archive-log: $!" if $?;
412
413 parselog($ps,\@commitlog);
414
415 # imports don't give us good info
416 # on added files. Shame on them
417 if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
418 system('git-ls-files --others -z | '.
419 'git-update-index --add -z --stdin') == 0 or die "$! $?\n";
420 system('git-ls-files --deleted -z | '.
421 'git-update-index --remove -z --stdin') == 0 or die "$! $?\n";
422 }
423
424 # TODO: handle removed_directories and renamed_directories:
425
426 if (my $add = $ps->{new_files}) {
427 while (@$add) {
428 my @slice = splice(@$add, 0, 100);
429 system('git-update-index','--add','--',@slice) == 0 or
430 die "Error in git-update-index --add: $! $?\n";
431 }
432 }
433
434 if (my $del = $ps->{removed_files}) {
435 unlink @$del;
436 while (@$del) {
437 my @slice = splice(@$del, 0, 100);
438 system('git-update-index','--remove','--',@slice) == 0 or
439 die "Error in git-update-index --remove: $! $?\n";
440 }
441 }
442
443 if (my $ren = $ps->{renamed_files}) { # renamed
444 if (@$ren % 2) {
445 die "Odd number of entries in rename!?";
446 }
447
448 while (@$ren) {
449 my $from = shift @$ren;
450 my $to = shift @$ren;
451
452 unless (-d dirname($to)) {
453 mkpath(dirname($to)); # will die on err
454 }
455 # print "moving $from $to";
456 rename($from, $to) or die "Error renaming '$from' '$to': $!\n";
457 system('git-update-index','--remove','--',$from) == 0 or
458 die "Error in git-update-index --remove: $! $?\n";
459 system('git-update-index','--add','--',$to) == 0 or
460 die "Error in git-update-index --add: $! $?\n";
461 }
462 }
463
464 if (my $mod = $ps->{modified_files}) {
465 while (@$mod) {
466 my @slice = splice(@$mod, 0, 100);
467 system('git-update-index','--',@slice) == 0 or
468 die "Error in git-update-index: $! $?\n";
469 }
470 }
471 return 1; # we successfully applied the changeset
472 }
473
474 if ($opt_f) {
475 print "Will import patchsets using the fast strategy\n",
476 "Renamed directories and permission changes will be missed\n";
477 *process_patchset = *process_patchset_fast;
478 } else {
479 print "Using the default (accurate) import strategy.\n",
480 "Things may be a bit slow\n";
481 *process_patchset = *process_patchset_accurate;
482 }
483
484 foreach my $ps (@psets) {
485 # process patchsets
486 $ps->{branch} = git_branchname($ps->{id});
487
488 #
489 # ensure we have a clean state
490 #
491 if (my $dirty = `git-diff-files`) {
492 die "Unclean tree when about to process $ps->{id} " .
493 " - did we fail to commit cleanly before?\n$dirty";
494 }
495 die $! if $?;
496
497 #
498 # skip commits already in repo
499 #
500 if (ptag($ps->{id})) {
501 $opt_v && print " * Skipping already imported: $ps->{id}\n";
502 next;
503 }
504
505 print " * Starting to work on $ps->{id}\n";
506
507 process_patchset($ps) or next;
508
509 # warn "errors when running git-update-index! $!";
510 my $tree = `git-write-tree`;
511 die "cannot write tree $!" if $?;
512 chomp $tree;
513
514 #
515 # Who's your daddy?
516 #
517 my @par;
518 if ( -e "$git_dir/refs/heads/$ps->{branch}") {
519 if (open HEAD, "<","$git_dir/refs/heads/$ps->{branch}") {
520 my $p = <HEAD>;
521 close HEAD;
522 chomp $p;
523 push @par, '-p', $p;
524 } else {
525 if ($ps->{type} eq 's') {
526 warn "Could not find the right head for the branch $ps->{branch}";
527 }
528 }
529 }
530
531 if ($ps->{merges}) {
532 push @par, find_parents($ps);
533 }
534
535 #
536 # Commit, tag and clean state
537 #
538 $ENV{TZ} = 'GMT';
539 $ENV{GIT_AUTHOR_NAME} = $ps->{author};
540 $ENV{GIT_AUTHOR_EMAIL} = $ps->{email};
541 $ENV{GIT_AUTHOR_DATE} = $ps->{date};
542 $ENV{GIT_COMMITTER_NAME} = $ps->{author};
543 $ENV{GIT_COMMITTER_EMAIL} = $ps->{email};
544 $ENV{GIT_COMMITTER_DATE} = $ps->{date};
545
546 my $pid = open2(*READER, *WRITER,'git-commit-tree',$tree,@par)
547 or die $!;
548 print WRITER $ps->{summary},"\n";
549 print WRITER $ps->{message},"\n";
550
551 # make it easy to backtrack and figure out which Arch revision this was:
552 print WRITER 'git-archimport-id: ',$ps->{id},"\n";
553
554 close WRITER;
555 my $commitid = <READER>; # read
556 chomp $commitid;
557 close READER;
558 waitpid $pid,0; # close;
559
560 if (length $commitid != 40) {
561 die "Something went wrong with the commit! $! $commitid";
562 }
563 #
564 # Update the branch
565 #
566 open HEAD, ">","$git_dir/refs/heads/$ps->{branch}";
567 print HEAD $commitid;
568 close HEAD;
569 system('git-update-ref', 'HEAD', "$ps->{branch}");
570
571 # tag accordingly
572 ptag($ps->{id}, $commitid); # private tag
573 if ($opt_T || $ps->{type} eq 't' || $ps->{type} eq 'i') {
574 tag($ps->{id}, $commitid);
575 }
576 print " * Committed $ps->{id}\n";
577 print " + tree $tree\n";
578 print " + commit $commitid\n";
579 $opt_v && print " + commit date is $ps->{date} \n";
580 $opt_v && print " + parents: ",join(' ',@par),"\n";
581 }
582
583 if ($opt_v) {
584 foreach (sort keys %stats) {
585 print" $_: $stats{$_}\n";
586 }
587 }
588 exit 0;
589
590 # used by the accurate strategy:
591 sub sync_to_ps {
592 my $ps = shift;
593 my $tree_dir = $tmp.'/'.tree_dirname($ps->{id});
594
595 $opt_v && print "sync_to_ps($ps->{id}) method: ";
596
597 if (-d $tree_dir) {
598 if ($ps->{type} eq 't') {
599 $opt_v && print "get (tag)\n";
600 # looks like a tag-only or (worse,) a mixed tags/changeset branch,
601 # can't rely on replay to work correctly on these
602 rmtree($tree_dir);
603 safe_pipe_capture($TLA,'get','--no-pristine',$ps->{id},$tree_dir);
604 $stats{get_tag}++;
605 } else {
606 my $tree_id = arch_tree_id($tree_dir);
607 if ($ps->{parent_id} && ($ps->{parent_id} eq $tree_id)) {
608 # the common case (hopefully)
609 $opt_v && print "replay\n";
610 safe_pipe_capture($TLA,'replay','-d',$tree_dir,$ps->{id});
611 $stats{replay}++;
612 } else {
613 # getting one tree is usually faster than getting two trees
614 # and applying the delta ...
615 rmtree($tree_dir);
616 $opt_v && print "apply-delta\n";
617 safe_pipe_capture($TLA,'get','--no-pristine',
618 $ps->{id},$tree_dir);
619 $stats{get_delta}++;
620 }
621 }
622 } else {
623 # new branch work
624 $opt_v && print "get (new tree)\n";
625 safe_pipe_capture($TLA,'get','--no-pristine',$ps->{id},$tree_dir);
626 $stats{get_new}++;
627 }
628
629 # added -I flag to rsync since we're going to fast! AIEEEEE!!!!
630 system('rsync','-aI','--delete','--exclude',$git_dir,
631 # '--exclude','.arch-inventory',
632 '--exclude','.arch-ids','--exclude','{arch}',
633 '--exclude','+*','--exclude',',*',
634 "$tree_dir/",'./') == 0 or die "Cannot rsync $tree_dir: $! $?";
635 return $tree_dir;
636 }
637
638 sub apply_import {
639 my $ps = shift;
640 my $bname = git_branchname($ps->{id});
641
642 mkpath($tmp);
643
644 safe_pipe_capture($TLA,'get','-s','--no-pristine',$ps->{id},"$tmp/import");
645 die "Cannot get import: $!" if $?;
646 system('rsync','-aI','--delete', '--exclude',$git_dir,
647 '--exclude','.arch-ids','--exclude','{arch}',
648 "$tmp/import/", './');
649 die "Cannot rsync import:$!" if $?;
650
651 rmtree("$tmp/import");
652 die "Cannot remove tempdir: $!" if $?;
653
654
655 return 1;
656 }
657
658 sub apply_cset {
659 my $ps = shift;
660
661 mkpath($tmp);
662
663 # get the changeset
664 safe_pipe_capture($TLA,'get-changeset',$ps->{id},"$tmp/changeset");
665 die "Cannot get changeset: $!" if $?;
666
667 # apply patches
668 if (`find $tmp/changeset/patches -type f -name '*.patch'`) {
669 # this can be sped up considerably by doing
670 # (find | xargs cat) | patch
671 # but that cna get mucked up by patches
672 # with missing trailing newlines or the standard
673 # 'missing newline' flag in the patch - possibly
674 # produced with an old/buggy diff.
675 # slow and safe, we invoke patch once per patchfile
676 `find $tmp/changeset/patches -type f -name '*.patch' -print0 | grep -zv '{arch}' | xargs -iFILE -0 --no-run-if-empty patch -p1 --forward -iFILE`;
677 die "Problem applying patches! $!" if $?;
678 }
679
680 # apply changed binary files
681 if (my @modified = `find $tmp/changeset/patches -type f -name '*.modified'`) {
682 foreach my $mod (@modified) {
683 chomp $mod;
684 my $orig = $mod;
685 $orig =~ s/\.modified$//; # lazy
686 $orig =~ s!^\Q$tmp\E/changeset/patches/!!;
687 #print "rsync -p '$mod' '$orig'";
688 system('rsync','-p',$mod,"./$orig");
689 die "Problem applying binary changes! $!" if $?;
690 }
691 }
692
693 # bring in new files
694 system('rsync','-aI','--exclude',$git_dir,
695 '--exclude','.arch-ids',
696 '--exclude', '{arch}',
697 "$tmp/changeset/new-files-archive/",'./');
698
699 # deleted files are hinted from the commitlog processing
700
701 rmtree("$tmp/changeset");
702 }
703
704
705 # =for reference
706 # notes: *-files/-directories keys cannot have spaces, they're always
707 # pika-escaped. Everything after the first newline
708 # A log entry looks like:
709 # Revision: moodle-org--moodle--1.3.3--patch-15
710 # Archive: arch-eduforge@catalyst.net.nz--2004
711 # Creator: Penny Leach <penny@catalyst.net.nz>
712 # Date: Wed May 25 14:15:34 NZST 2005
713 # Standard-date: 2005-05-25 02:15:34 GMT
714 # New-files: lang/de/.arch-ids/block_glossary_random.php.id
715 # lang/de/.arch-ids/block_html.php.id
716 # New-directories: lang/de/help/questionnaire
717 # lang/de/help/questionnaire/.arch-ids
718 # Renamed-files: .arch-ids/db_sears.sql.id db/.arch-ids/db_sears.sql.id
719 # db_sears.sql db/db_sears.sql
720 # Removed-files: lang/be/docs/.arch-ids/release.html.id
721 # lang/be/docs/.arch-ids/releaseold.html.id
722 # Modified-files: admin/cron.php admin/delete.php
723 # admin/editor.html backup/lib.php backup/restore.php
724 # New-patches: arch-eduforge@catalyst.net.nz--2004/moodle-org--moodle--1.3.3--patch-15
725 # Summary: Updating to latest from MOODLE_14_STABLE (1.4.5+)
726 # summary can be multiline with a leading space just like the above fields
727 # Keywords:
728 #
729 # Updating yadda tadda tadda madda
730 sub parselog {
731 my ($ps, $log) = @_;
732 my $key = undef;
733
734 # headers we want that contain filenames:
735 my %want_headers = (
736 new_files => 1,
737 modified_files => 1,
738 renamed_files => 1,
739 renamed_directories => 1,
740 removed_files => 1,
741 removed_directories => 1,
742 );
743
744 chomp (@$log);
745 while ($_ = shift @$log) {
746 if (/^Continuation-of:\s*(.*)/) {
747 $ps->{tag} = $1;
748 $key = undef;
749 } elsif (/^Summary:\s*(.*)$/ ) {
750 # summary can be multiline as long as it has a leading space
751 $ps->{summary} = [ $1 ];
752 $key = 'summary';
753 } elsif (/^Creator: (.*)\s*<([^\>]+)>/) {
754 $ps->{author} = $1;
755 $ps->{email} = $2;
756 $key = undef;
757 # any *-files or *-directories can be read here:
758 } elsif (/^([A-Z][a-z\-]+):\s*(.*)$/) {
759 my $val = $2;
760 $key = lc $1;
761 $key =~ tr/-/_/; # too lazy to quote :P
762 if ($want_headers{$key}) {
763 push @{$ps->{$key}}, split(/\s+/, $val);
764 } else {
765 $key = undef;
766 }
767 } elsif (/^$/) {
768 last; # remainder of @$log that didn't get shifted off is message
769 } elsif ($key) {
770 if (/^\s+(.*)$/) {
771 if ($key eq 'summary') {
772 push @{$ps->{$key}}, $1;
773 } else { # files/directories:
774 push @{$ps->{$key}}, split(/\s+/, $1);
775 }
776 } else {
777 $key = undef;
778 }
779 }
780 }
781
782 # post-processing:
783 $ps->{summary} = join("\n",@{$ps->{summary}})."\n";
784 $ps->{message} = join("\n",@$log);
785
786 # skip Arch control files, unescape pika-escaped files
787 foreach my $k (keys %want_headers) {
788 next unless (defined $ps->{$k});
789 my @tmp = ();
790 foreach my $t (@{$ps->{$k}}) {
791 next unless length ($t);
792 next if $t =~ m!\{arch\}/!;
793 next if $t =~ m!\.arch-ids/!;
794 # should we skip this?
795 next if $t =~ m!\.arch-inventory$!;
796 # tla cat-archive-log will give us filenames with spaces as file\(sp)name - why?
797 # we can assume that any filename with \ indicates some pika escaping that we want to get rid of.
798 if ($t =~ /\\/ ){
799 $t = (safe_pipe_capture($TLA,'escape','--unescaped',$t))[0];
800 }
801 push @tmp, $t;
802 }
803 $ps->{$k} = \@tmp;
804 }
805 }
806
807 # write/read a tag
808 sub tag {
809 my ($tag, $commit) = @_;
810
811 if ($opt_o) {
812 $tag =~ s|/|--|g;
813 } else {
814 # don't use subdirs for tags yet, it could screw up other porcelains
815 $tag =~ s|/|,|g;
816 }
817
818 if ($commit) {
819 open(C,">","$git_dir/refs/tags/$tag")
820 or die "Cannot create tag $tag: $!\n";
821 print C "$commit\n"
822 or die "Cannot write tag $tag: $!\n";
823 close(C)
824 or die "Cannot write tag $tag: $!\n";
825 print " * Created tag '$tag' on '$commit'\n" if $opt_v;
826 } else { # read
827 open(C,"<","$git_dir/refs/tags/$tag")
828 or die "Cannot read tag $tag: $!\n";
829 $commit = <C>;
830 chomp $commit;
831 die "Error reading tag $tag: $!\n" unless length $commit == 40;
832 close(C)
833 or die "Cannot read tag $tag: $!\n";
834 return $commit;
835 }
836 }
837
838 # write/read a private tag
839 # reads fail softly if the tag isn't there
840 sub ptag {
841 my ($tag, $commit) = @_;
842
843 # don't use subdirs for tags yet, it could screw up other porcelains
844 $tag =~ s|/|,|g;
845
846 my $tag_file = "$ptag_dir/$tag";
847 my $tag_branch_dir = dirname($tag_file);
848 mkpath($tag_branch_dir) unless (-d $tag_branch_dir);
849
850 if ($commit) { # write
851 open(C,">",$tag_file)
852 or die "Cannot create tag $tag: $!\n";
853 print C "$commit\n"
854 or die "Cannot write tag $tag: $!\n";
855 close(C)
856 or die "Cannot write tag $tag: $!\n";
857 $rptags{$commit} = $tag
858 unless $tag =~ m/--base-0$/;
859 } else { # read
860 # if the tag isn't there, return 0
861 unless ( -s $tag_file) {
862 return 0;
863 }
864 open(C,"<",$tag_file)
865 or die "Cannot read tag $tag: $!\n";
866 $commit = <C>;
867 chomp $commit;
868 die "Error reading tag $tag: $!\n" unless length $commit == 40;
869 close(C)
870 or die "Cannot read tag $tag: $!\n";
871 unless (defined $rptags{$commit}) {
872 $rptags{$commit} = $tag;
873 }
874 return $commit;
875 }
876 }
877
878 sub find_parents {
879 #
880 # Identify what branches are merging into me
881 # and whether we are fully merged
882 # git-merge-base <headsha> <headsha> should tell
883 # me what the base of the merge should be
884 #
885 my $ps = shift;
886
887 my %branches; # holds an arrayref per branch
888 # the arrayref contains a list of
889 # merged patches between the base
890 # of the merge and the current head
891
892 my @parents; # parents found for this commit
893
894 # simple loop to split the merges
895 # per branch
896 foreach my $merge (@{$ps->{merges}}) {
897 my $branch = git_branchname($merge);
898 unless (defined $branches{$branch} ){
899 $branches{$branch} = [];
900 }
901 push @{$branches{$branch}}, $merge;
902 }
903
904 #
905 # foreach branch find a merge base and walk it to the
906 # head where we are, collecting the merged patchsets that
907 # Arch has recorded. Keep that in @have
908 # Compare that with the commits on the other branch
909 # between merge-base and the tip of the branch (@need)
910 # and see if we have a series of consecutive patches
911 # starting from the merge base. The tip of the series
912 # of consecutive patches merged is our new parent for
913 # that branch.
914 #
915 foreach my $branch (keys %branches) {
916
917 # check that we actually know about the branch
918 next unless -e "$git_dir/refs/heads/$branch";
919
920 my $mergebase = `git-merge-base $branch $ps->{branch}`;
921 if ($?) {
922 # Don't die here, Arch supports one-way cherry-picking
923 # between branches with no common base (or any relationship
924 # at all beforehand)
925 warn "Cannot find merge base for $branch and $ps->{branch}";
926 next;
927 }
928 chomp $mergebase;
929
930 # now walk up to the mergepoint collecting what patches we have
931 my $branchtip = git_rev_parse($ps->{branch});
932 my @ancestors = `git-rev-list --merge-order $branchtip ^$mergebase`;
933 my %have; # collected merges this branch has
934 foreach my $merge (@{$ps->{merges}}) {
935 $have{$merge} = 1;
936 }
937 my %ancestorshave;
938 foreach my $par (@ancestors) {
939 $par = commitid2pset($par);
940 if (defined $par->{merges}) {
941 foreach my $merge (@{$par->{merges}}) {
942 $ancestorshave{$merge}=1;
943 }
944 }
945 }
946 # print "++++ Merges in $ps->{id} are....\n";
947 # my @have = sort keys %have; print Dumper(\@have);
948
949 # merge what we have with what ancestors have
950 %have = (%have, %ancestorshave);
951
952 # see what the remote branch has - these are the merges we
953 # will want to have in a consecutive series from the mergebase
954 my $otherbranchtip = git_rev_parse($branch);
955 my @needraw = `git-rev-list --merge-order $otherbranchtip ^$mergebase`;
956 my @need;
957 foreach my $needps (@needraw) { # get the psets
958 $needps = commitid2pset($needps);
959 # git-rev-list will also
960 # list commits merged in via earlier
961 # merges. we are only interested in commits
962 # from the branch we're looking at
963 if ($branch eq $needps->{branch}) {
964 push @need, $needps->{id};
965 }
966 }
967
968 # print "++++ Merges from $branch we want are....\n";
969 # print Dumper(\@need);
970
971 my $newparent;
972 while (my $needed_commit = pop @need) {
973 if ($have{$needed_commit}) {
974 $newparent = $needed_commit;
975 } else {
976 last; # break out of the while
977 }
978 }
979 if ($newparent) {
980 push @parents, $newparent;
981 }
982
983
984 } # end foreach branch
985
986 # prune redundant parents
987 my %parents;
988 foreach my $p (@parents) {
989 $parents{$p} = 1;
990 }
991 foreach my $p (@parents) {
992 next unless exists $psets{$p}{merges};
993 next unless ref $psets{$p}{merges};
994 my @merges = @{$psets{$p}{merges}};
995 foreach my $merge (@merges) {
996 if ($parents{$merge}) {
997 delete $parents{$merge};
998 }
999 }
1000 }
1001
1002 @parents = ();
1003 foreach (keys %parents) {
1004 push @parents, '-p', ptag($_);
1005 }
1006 return @parents;
1007 }
1008
1009 sub git_rev_parse {
1010 my $name = shift;
1011 my $val = `git-rev-parse $name`;
1012 die "Error: git-rev-parse $name" if $?;
1013 chomp $val;
1014 return $val;
1015 }
1016
1017 # resolve a SHA1 to a known patchset
1018 sub commitid2pset {
1019 my $commitid = shift;
1020 chomp $commitid;
1021 my $name = $rptags{$commitid}
1022 || die "Cannot find reverse tag mapping for $commitid";
1023 $name =~ s|,|/|;
1024 my $ps = $psets{$name}
1025 || (print Dumper(sort keys %psets)) && die "Cannot find patchset for $name";
1026 return $ps;
1027 }
1028
1029
1030 # an alterative to `command` that allows input to be passed as an array
1031 # to work around shell problems with weird characters in arguments
1032 sub safe_pipe_capture {
1033 my @output;
1034 if (my $pid = open my $child, '-|') {
1035 @output = (<$child>);
1036 close $child or die join(' ',@_).": $! $?";
1037 } else {
1038 exec(@_) or die "$! $?"; # exec() can fail the executable can't be found
1039 }
1040 return wantarray ? @output : join('',@output);
1041 }
1042
1043 # `tla logs -rf -d <dir> | head -n1` or `baz tree-id <dir>`
1044 sub arch_tree_id {
1045 my $dir = shift;
1046 chomp( my $ret = (safe_pipe_capture($TLA,'logs','-rf','-d',$dir))[0] );
1047 return $ret;
1048 }
1049
1050 sub archive_reachable {
1051 my $archive = shift;
1052 return 1 if $reachable{$archive};
1053 return 0 if $unreachable{$archive};
1054
1055 if (system "$TLA whereis-archive $archive >/dev/null") {
1056 if ($opt_a && (system($TLA,'register-archive',
1057 "http://mirrors.sourcecontrol.net/$archive") == 0)) {
1058 $reachable{$archive} = 1;
1059 return 1;
1060 }
1061 print STDERR "Archive is unreachable: $archive\n";
1062 $unreachable{$archive} = 1;
1063 return 0;
1064 } else {
1065 $reachable{$archive} = 1;
1066 return 1;
1067 }
1068 }
1069