]> git.ipfire.org Git - thirdparty/git.git/blame - git-archimport.perl
Explain "Not a git repository: '.git'".
[thirdparty/git.git] / git-archimport.perl
CommitLineData
d3968363
ML
1#!/usr/bin/perl -w
2#
3# This tool is copyright (c) 2005, Martin Langhoff.
4# It is released under the Gnu Public License, version 2.
5#
6# The basic idea is to walk the output of tla abrowse,
7# fetch the changesets and apply them.
8#
241b5967 9
d3968363
ML
10=head1 Invocation
11
42f4570c
EW
12 git-archimport [ -h ] [ -v ] [ -o ] [ -a ] [ -f ] [ -T ]
13 [ -D depth] [ -t tempdir ] <archive>/<branch> [ <archive>/<branch> ]
d3968363 14
241b5967
ML
15Imports a project from one or more Arch repositories. It will follow branches
16and repositories within the namespaces defined by the <archive/branch>
82e5a82f 17parameters supplied. If it cannot find the remote branch a merge comes from
241b5967
ML
18it will just import it as a regular commit. If it can find it, it will mark it
19as a merge whenever possible.
d3968363 20
241b5967 21See man (1) git-archimport for more details.
d3968363 22
241b5967 23=head1 TODO
d3968363 24
241b5967 25 - create tag objects instead of ref tags
d3968363 26 - audit shell-escaping of filenames
241b5967
ML
27 - hide our private tags somewhere smarter
28 - find a way to make "cat *patches | patch" safe even when patchfiles are missing newlines
3e525e67
EW
29 - sort and apply patches by graphing ancestry relations instead of just
30 relying in dates supplied in the changeset itself.
31 tla ancestry-graph -m could be helpful here...
d3968363
ML
32
33=head1 Devel tricks
34
35Add print in front of the shell commands invoked via backticks.
36
22ff00fc
EW
37=head1 Devel Notes
38
39There are several places where Arch and git terminology are intermixed
40and potentially confused.
41
42The notion of a "branch" in git is approximately equivalent to
43a "archive/category--branch--version" in Arch. Also, it should be noted
44that the "--branch" portion of "archive/category--branch--version" is really
45optional in Arch although not many people (nor tools!) seem to know this.
46This means that "archive/category--version" is also a valid "branch"
47in git terms.
48
49We always refer to Arch names by their fully qualified variant (which
50means the "archive" name is prefixed.
51
52For people unfamiliar with Arch, an "archive" is the term for "repository",
53and can contain multiple, unrelated branches.
54
d3968363
ML
55=cut
56
57use strict;
58use warnings;
59use Getopt::Std;
42f44b08 60use File::Temp qw(tempdir);
f88961a8 61use File::Path qw(mkpath rmtree);
d3968363 62use File::Basename qw(basename dirname);
d3968363
ML
63use Data::Dumper qw/ Dumper /;
64use IPC::Open2;
65
66$SIG{'PIPE'}="IGNORE";
67$ENV{'TZ'}="UTC";
68
1d4710d0
ML
69my $git_dir = $ENV{"GIT_DIR"} || ".git";
70$ENV{"GIT_DIR"} = $git_dir;
a7fb51d3 71my $ptag_dir = "$git_dir/archimport/tags";
1d4710d0 72
3e525e67 73our($opt_h,$opt_f,$opt_v,$opt_T,$opt_t,$opt_D,$opt_a,$opt_o);
d3968363
ML
74
75sub usage() {
76 print STDERR <<END;
77Usage: ${\basename $0} # fetch/update GIT from Arch
42f4570c 78 [ -h ] [ -v ] [ -o ] [ -a ] [ -f ] [ -T ] [ -D depth ] [ -t tempdir ]
d3968363
ML
79 repository/arch-branch [ repository/arch-branch] ...
80END
81 exit(1);
82}
83
3e525e67 84getopts("fThvat:D:") or usage();
d3968363
ML
85usage if $opt_h;
86
87@ARGV >= 1 or usage();
42f44b08
EW
88# $arch_branches:
89# values associated with keys:
90# =1 - Arch version / git 'branch' detected via abrowse on a limit
82e5a82f 91# >1 - Arch version / git 'branch' of an auxiliary branch we've merged
42f44b08 92my %arch_branches = map { $_ => 1 } @ARGV;
d3968363 93
5744f277
EW
94$ENV{'TMPDIR'} = $opt_t if $opt_t; # $ENV{TMPDIR} will affect tempdir() calls:
95my $tmp = tempdir('git-archimport-XXXXXX', TMPDIR => 1, CLEANUP => 1);
127bf00f 96$opt_v && print "+ Using $tmp as temporary directory\n";
d3968363 97
42f44b08
EW
98my %reachable = (); # Arch repositories we can access
99my %unreachable = (); # Arch repositories we can't access :<
d3968363 100my @psets = (); # the collection
b779d5f0 101my %psets = (); # the collection, by name
3e525e67
EW
102my %stats = ( # Track which strategy we used to import:
103 get_tag => 0, replay => 0, get_new => 0, get_delta => 0,
104 simple_changeset => 0, import_or_tag => 0
105);
b779d5f0
ML
106
107my %rptags = (); # my reverse private tags
108 # to map a SHA1 to a commitid
2777ef76 109my $TLA = $ENV{'ARCH_CLIENT'} || 'tla';
d3968363 110
42f44b08
EW
111sub do_abrowse {
112 my $stage = shift;
113 while (my ($limit, $level) = each %arch_branches) {
114 next unless $level == $stage;
115
116 open ABROWSE, "$TLA abrowse -fkD --merges $limit |"
117 or die "Problems with tla abrowse: $!";
d3968363 118
42f44b08
EW
119 my %ps = (); # the current one
120 my $lastseen = '';
d3968363 121
42f44b08
EW
122 while (<ABROWSE>) {
123 chomp;
d3968363 124
42f44b08
EW
125 # first record padded w 8 spaces
126 if (s/^\s{8}\b//) {
127 my ($id, $type) = split(m/\s+/, $_, 2);
128
129 my %last_ps;
130 # store the record we just captured
131 if (%ps && !exists $psets{ $ps{id} }) {
132 %last_ps = %ps; # break references
133 push (@psets, \%last_ps);
134 $psets{ $last_ps{id} } = \%last_ps;
135 }
d3968363 136
42f44b08
EW
137 my $branch = extract_versionname($id);
138 %ps = ( id => $id, branch => $branch );
139 if (%last_ps && ($last_ps{branch} eq $branch)) {
140 $ps{parent_id} = $last_ps{id};
141 }
142
143 $arch_branches{$branch} = 1;
144 $lastseen = 'id';
145
146 # deal with types (should work with baz or tla):
147 if ($type =~ m/\(.*changeset\)/) {
148 $ps{type} = 's';
149 } elsif ($type =~ /\(.*import\)/) {
150 $ps{type} = 'i';
6df896b5 151 } elsif ($type =~ m/\(tag.*?(\S+\@\S+).*?\)/) {
42f44b08
EW
152 $ps{type} = 't';
153 # read which revision we've tagged when we parse the log
6df896b5 154 $ps{tag} = $1;
42f44b08
EW
155 } else {
156 warn "Unknown type $type";
157 }
158
159 $arch_branches{$branch} = 1;
160 $lastseen = 'id';
161 } elsif (s/^\s{10}//) {
162 # 10 leading spaces or more
163 # indicate commit metadata
164
165 # date
166 if ($lastseen eq 'id' && m/^(\d{4}-\d\d-\d\d \d\d:\d\d:\d\d)/){
167 $ps{date} = $1;
168 $lastseen = 'date';
169 } elsif ($_ eq 'merges in:') {
170 $ps{merges} = [];
171 $lastseen = 'merges';
172 } elsif ($lastseen eq 'merges' && s/^\s{2}//) {
173 my $id = $_;
174 push (@{$ps{merges}}, $id);
175
176 # aggressive branch finding:
177 if ($opt_D) {
178 my $branch = extract_versionname($id);
179 my $repo = extract_reponame($branch);
180
181 if (archive_reachable($repo) &&
182 !defined $arch_branches{$branch}) {
183 $arch_branches{$branch} = $stage + 1;
184 }
185 }
186 } else {
187 warn "more metadata after merges!?: $_\n" unless /^\s*$/;
d3968363 188 }
d3968363 189 }
d3968363 190 }
d3968363 191
42f44b08
EW
192 if (%ps && !exists $psets{ $ps{id} }) {
193 my %temp = %ps; # break references
194 if (@psets && $psets[$#psets]{branch} eq $ps{branch}) {
195 $temp{parent_id} = $psets[$#psets]{id};
196 }
197 push (@psets, \%temp);
198 $psets{ $temp{id} } = \%temp;
199 }
200
201 close ABROWSE or die "$TLA abrowse failed on $limit\n";
202 }
d3968363
ML
203} # end foreach $root
204
42f44b08
EW
205do_abrowse(1);
206my $depth = 2;
207$opt_D ||= 0;
208while ($depth <= $opt_D) {
209 do_abrowse($depth);
210 $depth++;
211}
212
d3968363 213## Order patches by time
42f44b08
EW
214# FIXME see if we can find a more optimal way to do this by graphing
215# the ancestry data and walking it, that way we won't have to rely on
216# client-supplied dates
d3968363
ML
217@psets = sort {$a->{date}.$b->{id} cmp $b->{date}.$b->{id}} @psets;
218
219#print Dumper \@psets;
220
221##
222## TODO cleanup irrelevant patches
223## and put an initial import
224## or a full tag
3292ae47 225my $import = 0;
1d4710d0 226unless (-d $git_dir) { # initial import
d3968363
ML
227 if ($psets[0]{type} eq 'i' || $psets[0]{type} eq 't') {
228 print "Starting import from $psets[0]{id}\n";
3292ae47
ML
229 `git-init-db`;
230 die $! if $?;
231 $import = 1;
d3968363
ML
232 } else {
233 die "Need to start from an import or a tag -- cannot use $psets[0]{id}";
234 }
b779d5f0
ML
235} else { # progressing an import
236 # load the rptags
42f44b08 237 opendir(DIR, $ptag_dir)
b779d5f0
ML
238 || die "can't opendir: $!";
239 while (my $file = readdir(DIR)) {
a7fb51d3
EW
240 # skip non-interesting-files
241 next unless -f "$ptag_dir/$file";
242
243 # convert first '--' to '/' from old git-archimport to use
244 # as an archivename/c--b--v private tag
245 if ($file !~ m!,!) {
246 my $oldfile = $file;
247 $file =~ s!--!,!;
248 print STDERR "converting old tag $oldfile to $file\n";
249 rename("$ptag_dir/$oldfile", "$ptag_dir/$file") or die $!;
250 }
b779d5f0
ML
251 my $sha = ptag($file);
252 chomp $sha;
b779d5f0
ML
253 $rptags{$sha} = $file;
254 }
255 closedir DIR;
d3968363
ML
256}
257
3292ae47 258# process patchsets
22ff00fc
EW
259# extract the Arch repository name (Arch "archive" in Arch-speak)
260sub extract_reponame {
261 my $fq_cvbr = shift; # archivename/[[[[category]branch]version]revision]
262 return (split(/\//, $fq_cvbr))[0];
263}
264
265sub extract_versionname {
266 my $name = shift;
267 $name =~ s/--(?:patch|version(?:fix)?|base)-\d+$//;
268 return $name;
269}
d3968363 270
22ff00fc
EW
271# convert a fully-qualified revision or version to a unique dirname:
272# normalperson@yhbt.net-05/mpd--uclinux--1--patch-2
273# becomes: normalperson@yhbt.net-05,mpd--uclinux--1
274#
275# the git notion of a branch is closer to
276# archive/category--branch--version than archive/category--branch, so we
277# use this to convert to git branch names.
278# Also, keep archive names but replace '/' with ',' since it won't require
279# subdirectories, and is safer than swapping '--' which could confuse
280# reverse-mapping when dealing with bastard branches that
281# are just archive/category--version (no --branch)
282sub tree_dirname {
283 my $revision = shift;
284 my $name = extract_versionname($revision);
285 $name =~ s#/#,#;
286 return $name;
287}
288
fee3365f
ML
289# old versions of git-archimport just use the <category--branch> part:
290sub old_style_branchname {
291 my $id = shift;
292 my $ret = safe_pipe_capture($TLA,'parse-package-name','-p',$id);
293 chomp $ret;
294 return $ret;
295}
296
297*git_branchname = $opt_o ? *old_style_branchname : *tree_dirname;
22ff00fc 298
3e525e67
EW
299sub process_patchset_accurate {
300 my $ps = shift;
301
302 # switch to that branch if we're not already in that branch:
303 if (-e "$git_dir/refs/heads/$ps->{branch}") {
304 system('git-checkout','-f',$ps->{branch}) == 0 or die "$! $?\n";
d3968363 305
3e525e67
EW
306 # remove any old stuff that got leftover:
307 my $rm = safe_pipe_capture('git-ls-files','--others','-z');
308 rmtree(split(/\0/,$rm)) if $rm;
3292ae47 309 }
3e525e67
EW
310
311 # Apply the import/changeset/merge into the working tree
312 my $dir = sync_to_ps($ps);
313 # read the new log entry:
314 my @commitlog = safe_pipe_capture($TLA,'cat-log','-d',$dir,$ps->{id});
315 die "Error in cat-log: $!" if $?;
316 chomp @commitlog;
317
318 # grab variables we want from the log, new fields get added to $ps:
319 # (author, date, email, summary, message body ...)
320 parselog($ps, \@commitlog);
321
322 if ($ps->{id} =~ /--base-0$/ && $ps->{id} ne $psets[0]{id}) {
323 # this should work when importing continuations
324 if ($ps->{tag} && (my $branchpoint = eval { ptag($ps->{tag}) })) {
325
326 # find where we are supposed to branch from
327 system('git-checkout','-f','-b',$ps->{branch},
328 $branchpoint) == 0 or die "$! $?\n";
329
330 # remove any old stuff that got leftover:
331 my $rm = safe_pipe_capture('git-ls-files','--others','-z');
332 rmtree(split(/\0/,$rm)) if $rm;
3292ae47 333
3e525e67
EW
334 # If we trust Arch with the fact that this is just
335 # a tag, and it does not affect the state of the tree
336 # then we just tag and move on
337 tag($ps->{id}, $branchpoint);
338 ptag($ps->{id}, $branchpoint);
339 print " * Tagged $ps->{id} at $branchpoint\n";
340 return 0;
341 } else {
342 warn "Tagging from unknown id unsupported\n" if $ps->{tag};
343 }
344 # allow multiple bases/imports here since Arch supports cherry-picks
345 # from unrelated trees
346 }
347
348 # update the index with all the changes we got
3ff903bf
EW
349 system('git-diff-files --name-only -z | '.
350 'git-update-index --remove -z --stdin') == 0 or die "$! $?\n";
3e525e67
EW
351 system('git-ls-files --others -z | '.
352 'git-update-index --add -z --stdin') == 0 or die "$! $?\n";
3e525e67
EW
353 return 1;
354}
37f15d50 355
3e525e67
EW
356# the native changeset processing strategy. This is very fast, but
357# does not handle permissions or any renames involving directories
358sub process_patchset_fast {
359 my $ps = shift;
d3968363
ML
360 #
361 # create the branch if needed
362 #
3292ae47
ML
363 if ($ps->{type} eq 'i' && !$import) {
364 die "Should not have more than one 'Initial import' per GIT import: $ps->{id}";
d3968363
ML
365 }
366
3292ae47 367 unless ($import) { # skip for import
1d4710d0 368 if ( -e "$git_dir/refs/heads/$ps->{branch}") {
d3968363 369 # we know about this branch
f88961a8 370 system('git-checkout',$ps->{branch});
d3968363
ML
371 } else {
372 # new branch! we need to verify a few things
373 die "Branch on a non-tag!" unless $ps->{type} eq 't';
374 my $branchpoint = ptag($ps->{tag});
375 die "Tagging from unknown id unsupported: $ps->{tag}"
376 unless $branchpoint;
377
378 # find where we are supposed to branch from
f88961a8 379 system('git-checkout','-b',$ps->{branch},$branchpoint);
52586ecb
ML
380
381 # If we trust Arch with the fact that this is just
382 # a tag, and it does not affect the state of the tree
383 # then we just tag and move on
384 tag($ps->{id}, $branchpoint);
385 ptag($ps->{id}, $branchpoint);
386 print " * Tagged $ps->{id} at $branchpoint\n";
3e525e67 387 return 0;
d3968363
ML
388 }
389 die $! if $?;
390 }
391
d3968363
ML
392 #
393 # Apply the import/changeset/merge into the working tree
394 #
395 if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
d3968363 396 apply_import($ps) or die $!;
3e525e67 397 $stats{import_or_tag}++;
3292ae47 398 $import=0;
d3968363
ML
399 } elsif ($ps->{type} eq 's') {
400 apply_cset($ps);
3e525e67 401 $stats{simple_changeset}++;
d3968363
ML
402 }
403
404 #
405 # prepare update git's index, based on what arch knows
406 # about the pset, resolve parents, etc
407 #
d3968363 408
6df896b5 409 my @commitlog = safe_pipe_capture($TLA,'cat-archive-log',$ps->{id});
d3968363
ML
410 die "Error in cat-archive-log: $!" if $?;
411
6df896b5 412 parselog($ps,\@commitlog);
d3968363
ML
413
414 # imports don't give us good info
415 # on added files. Shame on them
6df896b5 416 if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
6df896b5
EW
417 system('git-ls-files --deleted -z | '.
418 'git-update-index --remove -z --stdin') == 0 or die "$! $?\n";
3ff903bf
EW
419 system('git-ls-files --others -z | '.
420 'git-update-index --add -z --stdin') == 0 or die "$! $?\n";
d3968363
ML
421 }
422
6df896b5 423 # TODO: handle removed_directories and renamed_directories:
3ff903bf 424
6df896b5
EW
425 if (my $del = $ps->{removed_files}) {
426 unlink @$del;
d3968363
ML
427 while (@$del) {
428 my @slice = splice(@$del, 0, 100);
6df896b5
EW
429 system('git-update-index','--remove','--',@slice) == 0 or
430 die "Error in git-update-index --remove: $! $?\n";
d3968363
ML
431 }
432 }
6df896b5
EW
433
434 if (my $ren = $ps->{renamed_files}) { # renamed
d3968363
ML
435 if (@$ren % 2) {
436 die "Odd number of entries in rename!?";
437 }
6df896b5 438
d3968363 439 while (@$ren) {
6df896b5
EW
440 my $from = shift @$ren;
441 my $to = shift @$ren;
d3968363
ML
442
443 unless (-d dirname($to)) {
444 mkpath(dirname($to)); # will die on err
445 }
3e525e67 446 # print "moving $from $to";
6df896b5
EW
447 rename($from, $to) or die "Error renaming '$from' '$to': $!\n";
448 system('git-update-index','--remove','--',$from) == 0 or
449 die "Error in git-update-index --remove: $! $?\n";
450 system('git-update-index','--add','--',$to) == 0 or
451 die "Error in git-update-index --add: $! $?\n";
d3968363 452 }
d3968363 453 }
6df896b5 454
3ff903bf
EW
455 if (my $add = $ps->{new_files}) {
456 while (@$add) {
457 my @slice = splice(@$add, 0, 100);
458 system('git-update-index','--add','--',@slice) == 0 or
459 die "Error in git-update-index --add: $! $?\n";
460 }
461 }
462
6df896b5 463 if (my $mod = $ps->{modified_files}) {
d3968363
ML
464 while (@$mod) {
465 my @slice = splice(@$mod, 0, 100);
6df896b5
EW
466 system('git-update-index','--',@slice) == 0 or
467 die "Error in git-update-index: $! $?\n";
d3968363
ML
468 }
469 }
3e525e67
EW
470 return 1; # we successfully applied the changeset
471}
472
473if ($opt_f) {
474 print "Will import patchsets using the fast strategy\n",
475 "Renamed directories and permission changes will be missed\n";
476 *process_patchset = *process_patchset_fast;
477} else {
478 print "Using the default (accurate) import strategy.\n",
479 "Things may be a bit slow\n";
480 *process_patchset = *process_patchset_accurate;
481}
6df896b5 482
3e525e67
EW
483foreach my $ps (@psets) {
484 # process patchsets
485 $ps->{branch} = git_branchname($ps->{id});
486
487 #
488 # ensure we have a clean state
489 #
490 if (my $dirty = `git-diff-files`) {
491 die "Unclean tree when about to process $ps->{id} " .
492 " - did we fail to commit cleanly before?\n$dirty";
493 }
494 die $! if $?;
495
496 #
497 # skip commits already in repo
498 #
499 if (ptag($ps->{id})) {
500 $opt_v && print " * Skipping already imported: $ps->{id}\n";
10945e00 501 next;
3e525e67
EW
502 }
503
504 print " * Starting to work on $ps->{id}\n";
505
506 process_patchset($ps) or next;
507
215a7ad1 508 # warn "errors when running git-update-index! $!";
3e525e67 509 my $tree = `git-write-tree`;
d3968363
ML
510 die "cannot write tree $!" if $?;
511 chomp $tree;
d3968363
ML
512
513 #
514 # Who's your daddy?
515 #
516 my @par;
1d4710d0 517 if ( -e "$git_dir/refs/heads/$ps->{branch}") {
f88961a8 518 if (open HEAD, "<","$git_dir/refs/heads/$ps->{branch}") {
d3968363
ML
519 my $p = <HEAD>;
520 close HEAD;
521 chomp $p;
522 push @par, '-p', $p;
523 } else {
524 if ($ps->{type} eq 's') {
525 warn "Could not find the right head for the branch $ps->{branch}";
526 }
527 }
528 }
529
b779d5f0
ML
530 if ($ps->{merges}) {
531 push @par, find_parents($ps);
532 }
d3968363
ML
533
534 #
535 # Commit, tag and clean state
536 #
537 $ENV{TZ} = 'GMT';
538 $ENV{GIT_AUTHOR_NAME} = $ps->{author};
539 $ENV{GIT_AUTHOR_EMAIL} = $ps->{email};
540 $ENV{GIT_AUTHOR_DATE} = $ps->{date};
541 $ENV{GIT_COMMITTER_NAME} = $ps->{author};
542 $ENV{GIT_COMMITTER_EMAIL} = $ps->{email};
543 $ENV{GIT_COMMITTER_DATE} = $ps->{date};
544
6df896b5 545 my $pid = open2(*READER, *WRITER,'git-commit-tree',$tree,@par)
d3968363 546 or die $!;
6df896b5
EW
547 print WRITER $ps->{summary},"\n";
548 print WRITER $ps->{message},"\n";
549
550 # make it easy to backtrack and figure out which Arch revision this was:
551 print WRITER 'git-archimport-id: ',$ps->{id},"\n";
552
d3968363
ML
553 close WRITER;
554 my $commitid = <READER>; # read
555 chomp $commitid;
556 close READER;
557 waitpid $pid,0; # close;
558
559 if (length $commitid != 40) {
560 die "Something went wrong with the commit! $! $commitid";
561 }
562 #
563 # Update the branch
564 #
f88961a8 565 open HEAD, ">","$git_dir/refs/heads/$ps->{branch}";
d3968363
ML
566 print HEAD $commitid;
567 close HEAD;
8366a10a 568 system('git-update-ref', 'HEAD', "$ps->{branch}");
d3968363
ML
569
570 # tag accordingly
571 ptag($ps->{id}, $commitid); # private tag
572 if ($opt_T || $ps->{type} eq 't' || $ps->{type} eq 'i') {
573 tag($ps->{id}, $commitid);
574 }
575 print " * Committed $ps->{id}\n";
576 print " + tree $tree\n";
577 print " + commit $commitid\n";
b779d5f0 578 $opt_v && print " + commit date is $ps->{date} \n";
f88961a8 579 $opt_v && print " + parents: ",join(' ',@par),"\n";
3e525e67
EW
580}
581
582if ($opt_v) {
583 foreach (sort keys %stats) {
584 print" $_: $stats{$_}\n";
585 }
586}
587exit 0;
588
589# used by the accurate strategy:
590sub sync_to_ps {
591 my $ps = shift;
592 my $tree_dir = $tmp.'/'.tree_dirname($ps->{id});
593
594 $opt_v && print "sync_to_ps($ps->{id}) method: ";
595
596 if (-d $tree_dir) {
597 if ($ps->{type} eq 't') {
598 $opt_v && print "get (tag)\n";
599 # looks like a tag-only or (worse,) a mixed tags/changeset branch,
600 # can't rely on replay to work correctly on these
601 rmtree($tree_dir);
602 safe_pipe_capture($TLA,'get','--no-pristine',$ps->{id},$tree_dir);
603 $stats{get_tag}++;
604 } else {
605 my $tree_id = arch_tree_id($tree_dir);
606 if ($ps->{parent_id} && ($ps->{parent_id} eq $tree_id)) {
607 # the common case (hopefully)
608 $opt_v && print "replay\n";
609 safe_pipe_capture($TLA,'replay','-d',$tree_dir,$ps->{id});
610 $stats{replay}++;
611 } else {
612 # getting one tree is usually faster than getting two trees
613 # and applying the delta ...
614 rmtree($tree_dir);
615 $opt_v && print "apply-delta\n";
616 safe_pipe_capture($TLA,'get','--no-pristine',
617 $ps->{id},$tree_dir);
618 $stats{get_delta}++;
619 }
620 }
621 } else {
622 # new branch work
623 $opt_v && print "get (new tree)\n";
624 safe_pipe_capture($TLA,'get','--no-pristine',$ps->{id},$tree_dir);
625 $stats{get_new}++;
626 }
627
628 # added -I flag to rsync since we're going to fast! AIEEEEE!!!!
629 system('rsync','-aI','--delete','--exclude',$git_dir,
630# '--exclude','.arch-inventory',
631 '--exclude','.arch-ids','--exclude','{arch}',
632 '--exclude','+*','--exclude',',*',
633 "$tree_dir/",'./') == 0 or die "Cannot rsync $tree_dir: $! $?";
634 return $tree_dir;
d3968363
ML
635}
636
d3968363
ML
637sub apply_import {
638 my $ps = shift;
22ff00fc 639 my $bname = git_branchname($ps->{id});
d3968363 640
f88961a8 641 mkpath($tmp);
d3968363 642
f88961a8 643 safe_pipe_capture($TLA,'get','-s','--no-pristine',$ps->{id},"$tmp/import");
d3968363 644 die "Cannot get import: $!" if $?;
f88961a8
EW
645 system('rsync','-aI','--delete', '--exclude',$git_dir,
646 '--exclude','.arch-ids','--exclude','{arch}',
647 "$tmp/import/", './');
d3968363
ML
648 die "Cannot rsync import:$!" if $?;
649
f88961a8 650 rmtree("$tmp/import");
d3968363
ML
651 die "Cannot remove tempdir: $!" if $?;
652
653
654 return 1;
655}
656
657sub apply_cset {
658 my $ps = shift;
659
f88961a8 660 mkpath($tmp);
d3968363
ML
661
662 # get the changeset
f88961a8 663 safe_pipe_capture($TLA,'get-changeset',$ps->{id},"$tmp/changeset");
d3968363
ML
664 die "Cannot get changeset: $!" if $?;
665
666 # apply patches
667 if (`find $tmp/changeset/patches -type f -name '*.patch'`) {
668 # this can be sped up considerably by doing
669 # (find | xargs cat) | patch
82e5a82f 670 # but that can get mucked up by patches
d3968363
ML
671 # with missing trailing newlines or the standard
672 # 'missing newline' flag in the patch - possibly
673 # produced with an old/buggy diff.
674 # slow and safe, we invoke patch once per patchfile
675 `find $tmp/changeset/patches -type f -name '*.patch' -print0 | grep -zv '{arch}' | xargs -iFILE -0 --no-run-if-empty patch -p1 --forward -iFILE`;
676 die "Problem applying patches! $!" if $?;
677 }
678
679 # apply changed binary files
680 if (my @modified = `find $tmp/changeset/patches -type f -name '*.modified'`) {
681 foreach my $mod (@modified) {
682 chomp $mod;
683 my $orig = $mod;
684 $orig =~ s/\.modified$//; # lazy
685 $orig =~ s!^\Q$tmp\E/changeset/patches/!!;
686 #print "rsync -p '$mod' '$orig'";
f88961a8 687 system('rsync','-p',$mod,"./$orig");
d3968363
ML
688 die "Problem applying binary changes! $!" if $?;
689 }
690 }
691
692 # bring in new files
f88961a8
EW
693 system('rsync','-aI','--exclude',$git_dir,
694 '--exclude','.arch-ids',
695 '--exclude', '{arch}',
696 "$tmp/changeset/new-files-archive/",'./');
d3968363
ML
697
698 # deleted files are hinted from the commitlog processing
699
f88961a8 700 rmtree("$tmp/changeset");
d3968363
ML
701}
702
703
704# =for reference
6df896b5
EW
705# notes: *-files/-directories keys cannot have spaces, they're always
706# pika-escaped. Everything after the first newline
707# A log entry looks like:
d3968363
ML
708# Revision: moodle-org--moodle--1.3.3--patch-15
709# Archive: arch-eduforge@catalyst.net.nz--2004
710# Creator: Penny Leach <penny@catalyst.net.nz>
711# Date: Wed May 25 14:15:34 NZST 2005
712# Standard-date: 2005-05-25 02:15:34 GMT
713# New-files: lang/de/.arch-ids/block_glossary_random.php.id
714# lang/de/.arch-ids/block_html.php.id
715# New-directories: lang/de/help/questionnaire
716# lang/de/help/questionnaire/.arch-ids
717# Renamed-files: .arch-ids/db_sears.sql.id db/.arch-ids/db_sears.sql.id
718# db_sears.sql db/db_sears.sql
719# Removed-files: lang/be/docs/.arch-ids/release.html.id
720# lang/be/docs/.arch-ids/releaseold.html.id
721# Modified-files: admin/cron.php admin/delete.php
722# admin/editor.html backup/lib.php backup/restore.php
723# New-patches: arch-eduforge@catalyst.net.nz--2004/moodle-org--moodle--1.3.3--patch-15
724# Summary: Updating to latest from MOODLE_14_STABLE (1.4.5+)
6df896b5 725# summary can be multiline with a leading space just like the above fields
d3968363
ML
726# Keywords:
727#
728# Updating yadda tadda tadda madda
729sub parselog {
6df896b5
EW
730 my ($ps, $log) = @_;
731 my $key = undef;
732
733 # headers we want that contain filenames:
734 my %want_headers = (
735 new_files => 1,
736 modified_files => 1,
737 renamed_files => 1,
738 renamed_directories => 1,
739 removed_files => 1,
740 removed_directories => 1,
741 );
d3968363 742
6df896b5
EW
743 chomp (@$log);
744 while ($_ = shift @$log) {
745 if (/^Continuation-of:\s*(.*)/) {
746 $ps->{tag} = $1;
747 $key = undef;
748 } elsif (/^Summary:\s*(.*)$/ ) {
749 # summary can be multiline as long as it has a leading space
750 $ps->{summary} = [ $1 ];
751 $key = 'summary';
752 } elsif (/^Creator: (.*)\s*<([^\>]+)>/) {
753 $ps->{author} = $1;
754 $ps->{email} = $2;
755 $key = undef;
756 # any *-files or *-directories can be read here:
757 } elsif (/^([A-Z][a-z\-]+):\s*(.*)$/) {
758 my $val = $2;
759 $key = lc $1;
760 $key =~ tr/-/_/; # too lazy to quote :P
761 if ($want_headers{$key}) {
762 push @{$ps->{$key}}, split(/\s+/, $val);
763 } else {
764 $key = undef;
765 }
766 } elsif (/^$/) {
767 last; # remainder of @$log that didn't get shifted off is message
768 } elsif ($key) {
769 if (/^\s+(.*)$/) {
770 if ($key eq 'summary') {
771 push @{$ps->{$key}}, $1;
772 } else { # files/directories:
773 push @{$ps->{$key}}, split(/\s+/, $1);
774 }
775 } else {
776 $key = undef;
777 }
778 }
d3968363 779 }
6df896b5
EW
780
781 # post-processing:
782 $ps->{summary} = join("\n",@{$ps->{summary}})."\n";
783 $ps->{message} = join("\n",@$log);
d3968363 784
6df896b5
EW
785 # skip Arch control files, unescape pika-escaped files
786 foreach my $k (keys %want_headers) {
787 next unless (defined $ps->{$k});
6e33101a 788 my @tmp = ();
6df896b5
EW
789 foreach my $t (@{$ps->{$k}}) {
790 next unless length ($t);
791 next if $t =~ m!\{arch\}/!;
792 next if $t =~ m!\.arch-ids/!;
793 # should we skip this?
794 next if $t =~ m!\.arch-inventory$!;
f84f9d38
ML
795 # tla cat-archive-log will give us filenames with spaces as file\(sp)name - why?
796 # we can assume that any filename with \ indicates some pika escaping that we want to get rid of.
6df896b5 797 if ($t =~ /\\/ ){
f88961a8 798 $t = (safe_pipe_capture($TLA,'escape','--unescaped',$t))[0];
f84f9d38 799 }
6df896b5 800 push @tmp, $t;
d3968363 801 }
6e33101a 802 $ps->{$k} = \@tmp;
d3968363 803 }
d3968363
ML
804}
805
806# write/read a tag
807sub tag {
808 my ($tag, $commit) = @_;
a7fb51d3 809
fee3365f
ML
810 if ($opt_o) {
811 $tag =~ s|/|--|g;
812 } else {
813 # don't use subdirs for tags yet, it could screw up other porcelains
814 $tag =~ s|/|,|g;
815 }
d3968363
ML
816
817 if ($commit) {
a7fb51d3 818 open(C,">","$git_dir/refs/tags/$tag")
d3968363
ML
819 or die "Cannot create tag $tag: $!\n";
820 print C "$commit\n"
821 or die "Cannot write tag $tag: $!\n";
822 close(C)
823 or die "Cannot write tag $tag: $!\n";
a7fb51d3 824 print " * Created tag '$tag' on '$commit'\n" if $opt_v;
d3968363 825 } else { # read
a7fb51d3 826 open(C,"<","$git_dir/refs/tags/$tag")
d3968363
ML
827 or die "Cannot read tag $tag: $!\n";
828 $commit = <C>;
829 chomp $commit;
830 die "Error reading tag $tag: $!\n" unless length $commit == 40;
831 close(C)
832 or die "Cannot read tag $tag: $!\n";
833 return $commit;
834 }
835}
836
837# write/read a private tag
838# reads fail softly if the tag isn't there
839sub ptag {
840 my ($tag, $commit) = @_;
a7fb51d3
EW
841
842 # don't use subdirs for tags yet, it could screw up other porcelains
843 $tag =~ s|/|,|g;
d3968363 844
a7fb51d3
EW
845 my $tag_file = "$ptag_dir/$tag";
846 my $tag_branch_dir = dirname($tag_file);
847 mkpath($tag_branch_dir) unless (-d $tag_branch_dir);
d3968363
ML
848
849 if ($commit) { # write
a7fb51d3 850 open(C,">",$tag_file)
d3968363
ML
851 or die "Cannot create tag $tag: $!\n";
852 print C "$commit\n"
853 or die "Cannot write tag $tag: $!\n";
854 close(C)
855 or die "Cannot write tag $tag: $!\n";
b779d5f0
ML
856 $rptags{$commit} = $tag
857 unless $tag =~ m/--base-0$/;
d3968363
ML
858 } else { # read
859 # if the tag isn't there, return 0
a7fb51d3 860 unless ( -s $tag_file) {
d3968363
ML
861 return 0;
862 }
a7fb51d3 863 open(C,"<",$tag_file)
d3968363
ML
864 or die "Cannot read tag $tag: $!\n";
865 $commit = <C>;
866 chomp $commit;
867 die "Error reading tag $tag: $!\n" unless length $commit == 40;
868 close(C)
869 or die "Cannot read tag $tag: $!\n";
b779d5f0
ML
870 unless (defined $rptags{$commit}) {
871 $rptags{$commit} = $tag;
872 }
d3968363
ML
873 return $commit;
874 }
875}
b779d5f0
ML
876
877sub find_parents {
878 #
879 # Identify what branches are merging into me
880 # and whether we are fully merged
881 # git-merge-base <headsha> <headsha> should tell
882 # me what the base of the merge should be
883 #
884 my $ps = shift;
885
886 my %branches; # holds an arrayref per branch
887 # the arrayref contains a list of
888 # merged patches between the base
889 # of the merge and the current head
890
891 my @parents; # parents found for this commit
892
893 # simple loop to split the merges
894 # per branch
895 foreach my $merge (@{$ps->{merges}}) {
22ff00fc 896 my $branch = git_branchname($merge);
b779d5f0
ML
897 unless (defined $branches{$branch} ){
898 $branches{$branch} = [];
899 }
900 push @{$branches{$branch}}, $merge;
901 }
902
903 #
904 # foreach branch find a merge base and walk it to the
905 # head where we are, collecting the merged patchsets that
906 # Arch has recorded. Keep that in @have
907 # Compare that with the commits on the other branch
908 # between merge-base and the tip of the branch (@need)
909 # and see if we have a series of consecutive patches
910 # starting from the merge base. The tip of the series
911 # of consecutive patches merged is our new parent for
912 # that branch.
913 #
914 foreach my $branch (keys %branches) {
37f15d50
ML
915
916 # check that we actually know about the branch
917 next unless -e "$git_dir/refs/heads/$branch";
918
b779d5f0 919 my $mergebase = `git-merge-base $branch $ps->{branch}`;
9b626e75
EW
920 if ($?) {
921 # Don't die here, Arch supports one-way cherry-picking
922 # between branches with no common base (or any relationship
923 # at all beforehand)
924 warn "Cannot find merge base for $branch and $ps->{branch}";
925 next;
926 }
b779d5f0
ML
927 chomp $mergebase;
928
929 # now walk up to the mergepoint collecting what patches we have
930 my $branchtip = git_rev_parse($ps->{branch});
765ac8ec 931 my @ancestors = `git-rev-list --topo-order $branchtip ^$mergebase`;
b779d5f0
ML
932 my %have; # collected merges this branch has
933 foreach my $merge (@{$ps->{merges}}) {
934 $have{$merge} = 1;
935 }
936 my %ancestorshave;
937 foreach my $par (@ancestors) {
938 $par = commitid2pset($par);
939 if (defined $par->{merges}) {
940 foreach my $merge (@{$par->{merges}}) {
941 $ancestorshave{$merge}=1;
942 }
943 }
944 }
945 # print "++++ Merges in $ps->{id} are....\n";
946 # my @have = sort keys %have; print Dumper(\@have);
947
948 # merge what we have with what ancestors have
949 %have = (%have, %ancestorshave);
950
951 # see what the remote branch has - these are the merges we
952 # will want to have in a consecutive series from the mergebase
953 my $otherbranchtip = git_rev_parse($branch);
765ac8ec 954 my @needraw = `git-rev-list --topo-order $otherbranchtip ^$mergebase`;
b779d5f0
ML
955 my @need;
956 foreach my $needps (@needraw) { # get the psets
957 $needps = commitid2pset($needps);
958 # git-rev-list will also
959 # list commits merged in via earlier
960 # merges. we are only interested in commits
961 # from the branch we're looking at
962 if ($branch eq $needps->{branch}) {
963 push @need, $needps->{id};
964 }
965 }
966
967 # print "++++ Merges from $branch we want are....\n";
968 # print Dumper(\@need);
969
970 my $newparent;
971 while (my $needed_commit = pop @need) {
972 if ($have{$needed_commit}) {
973 $newparent = $needed_commit;
974 } else {
975 last; # break out of the while
976 }
977 }
978 if ($newparent) {
979 push @parents, $newparent;
980 }
981
982
983 } # end foreach branch
984
985 # prune redundant parents
986 my %parents;
987 foreach my $p (@parents) {
988 $parents{$p} = 1;
989 }
990 foreach my $p (@parents) {
991 next unless exists $psets{$p}{merges};
992 next unless ref $psets{$p}{merges};
993 my @merges = @{$psets{$p}{merges}};
994 foreach my $merge (@merges) {
995 if ($parents{$merge}) {
996 delete $parents{$merge};
997 }
998 }
999 }
42f44b08 1000
f88961a8
EW
1001 @parents = ();
1002 foreach (keys %parents) {
1003 push @parents, '-p', ptag($_);
1004 }
b779d5f0
ML
1005 return @parents;
1006}
1007
1008sub git_rev_parse {
1009 my $name = shift;
1010 my $val = `git-rev-parse $name`;
1011 die "Error: git-rev-parse $name" if $?;
1012 chomp $val;
1013 return $val;
1014}
1015
1016# resolve a SHA1 to a known patchset
1017sub commitid2pset {
1018 my $commitid = shift;
1019 chomp $commitid;
1020 my $name = $rptags{$commitid}
1021 || die "Cannot find reverse tag mapping for $commitid";
a7fb51d3 1022 $name =~ s|,|/|;
b779d5f0
ML
1023 my $ps = $psets{$name}
1024 || (print Dumper(sort keys %psets)) && die "Cannot find patchset for $name";
1025 return $ps;
1026}
2777ef76 1027
42f44b08 1028
82e5a82f 1029# an alternative to `command` that allows input to be passed as an array
2777ef76
EW
1030# to work around shell problems with weird characters in arguments
1031sub safe_pipe_capture {
1032 my @output;
1033 if (my $pid = open my $child, '-|') {
1034 @output = (<$child>);
1035 close $child or die join(' ',@_).": $! $?";
1036 } else {
3e525e67 1037 exec(@_) or die "$! $?"; # exec() can fail the executable can't be found
2777ef76
EW
1038 }
1039 return wantarray ? @output : join('',@output);
1040}
1041
42f44b08
EW
1042# `tla logs -rf -d <dir> | head -n1` or `baz tree-id <dir>`
1043sub arch_tree_id {
1044 my $dir = shift;
1045 chomp( my $ret = (safe_pipe_capture($TLA,'logs','-rf','-d',$dir))[0] );
1046 return $ret;
1047}
1048
1049sub archive_reachable {
1050 my $archive = shift;
1051 return 1 if $reachable{$archive};
1052 return 0 if $unreachable{$archive};
1053
1054 if (system "$TLA whereis-archive $archive >/dev/null") {
1055 if ($opt_a && (system($TLA,'register-archive',
1056 "http://mirrors.sourcecontrol.net/$archive") == 0)) {
1057 $reachable{$archive} = 1;
1058 return 1;
1059 }
1060 print STDERR "Archive is unreachable: $archive\n";
1061 $unreachable{$archive} = 1;
1062 return 0;
1063 } else {
1064 $reachable{$archive} = 1;
1065 return 1;
1066 }
1067}
2777ef76 1068