]> git.ipfire.org Git - thirdparty/git.git/blame - git-archimport.perl
archimport: Fix a bug I introduced in the new log parser
[thirdparty/git.git] / git-archimport.perl
CommitLineData
d3968363
ML
1#!/usr/bin/perl -w
2#
3# This tool is copyright (c) 2005, Martin Langhoff.
4# It is released under the Gnu Public License, version 2.
5#
6# The basic idea is to walk the output of tla abrowse,
7# fetch the changesets and apply them.
8#
241b5967 9
d3968363
ML
10=head1 Invocation
11
241b5967 12 git-archimport [ -h ] [ -v ] [ -T ] [ -t tempdir ] <archive>/<branch> [ <archive>/<branch> ]
d3968363 13
241b5967
ML
14Imports a project from one or more Arch repositories. It will follow branches
15and repositories within the namespaces defined by the <archive/branch>
16parameters suppplied. If it cannot find the remote branch a merge comes from
17it will just import it as a regular commit. If it can find it, it will mark it
18as a merge whenever possible.
d3968363 19
241b5967 20See man (1) git-archimport for more details.
d3968363 21
241b5967 22=head1 TODO
d3968363 23
241b5967 24 - create tag objects instead of ref tags
d3968363 25 - audit shell-escaping of filenames
241b5967
ML
26 - hide our private tags somewhere smarter
27 - find a way to make "cat *patches | patch" safe even when patchfiles are missing newlines
3e525e67
EW
28 - sort and apply patches by graphing ancestry relations instead of just
29 relying in dates supplied in the changeset itself.
30 tla ancestry-graph -m could be helpful here...
d3968363
ML
31
32=head1 Devel tricks
33
34Add print in front of the shell commands invoked via backticks.
35
22ff00fc
EW
36=head1 Devel Notes
37
38There are several places where Arch and git terminology are intermixed
39and potentially confused.
40
41The notion of a "branch" in git is approximately equivalent to
42a "archive/category--branch--version" in Arch. Also, it should be noted
43that the "--branch" portion of "archive/category--branch--version" is really
44optional in Arch although not many people (nor tools!) seem to know this.
45This means that "archive/category--version" is also a valid "branch"
46in git terms.
47
48We always refer to Arch names by their fully qualified variant (which
49means the "archive" name is prefixed.
50
51For people unfamiliar with Arch, an "archive" is the term for "repository",
52and can contain multiple, unrelated branches.
53
d3968363
ML
54=cut
55
56use strict;
57use warnings;
58use Getopt::Std;
42f44b08 59use File::Temp qw(tempdir);
f88961a8 60use File::Path qw(mkpath rmtree);
d3968363 61use File::Basename qw(basename dirname);
d3968363
ML
62use Data::Dumper qw/ Dumper /;
63use IPC::Open2;
64
65$SIG{'PIPE'}="IGNORE";
66$ENV{'TZ'}="UTC";
67
1d4710d0
ML
68my $git_dir = $ENV{"GIT_DIR"} || ".git";
69$ENV{"GIT_DIR"} = $git_dir;
a7fb51d3 70my $ptag_dir = "$git_dir/archimport/tags";
1d4710d0 71
3e525e67 72our($opt_h,$opt_f,$opt_v,$opt_T,$opt_t,$opt_D,$opt_a,$opt_o);
d3968363
ML
73
74sub usage() {
75 print STDERR <<END;
76Usage: ${\basename $0} # fetch/update GIT from Arch
3e525e67 77 [ -f ] [ -o ] [ -h ] [ -v ] [ -T ] [ -a ] [ -D depth ] [ -t tempdir ]
d3968363
ML
78 repository/arch-branch [ repository/arch-branch] ...
79END
80 exit(1);
81}
82
3e525e67 83getopts("fThvat:D:") or usage();
d3968363
ML
84usage if $opt_h;
85
86@ARGV >= 1 or usage();
42f44b08
EW
87# $arch_branches:
88# values associated with keys:
89# =1 - Arch version / git 'branch' detected via abrowse on a limit
90# >1 - Arch version / git 'branch' of an auxilliary branch we've merged
91my %arch_branches = map { $_ => 1 } @ARGV;
d3968363 92
5744f277
EW
93$ENV{'TMPDIR'} = $opt_t if $opt_t; # $ENV{TMPDIR} will affect tempdir() calls:
94my $tmp = tempdir('git-archimport-XXXXXX', TMPDIR => 1, CLEANUP => 1);
127bf00f 95$opt_v && print "+ Using $tmp as temporary directory\n";
d3968363 96
42f44b08
EW
97my %reachable = (); # Arch repositories we can access
98my %unreachable = (); # Arch repositories we can't access :<
d3968363 99my @psets = (); # the collection
b779d5f0 100my %psets = (); # the collection, by name
3e525e67
EW
101my %stats = ( # Track which strategy we used to import:
102 get_tag => 0, replay => 0, get_new => 0, get_delta => 0,
103 simple_changeset => 0, import_or_tag => 0
104);
b779d5f0
ML
105
106my %rptags = (); # my reverse private tags
107 # to map a SHA1 to a commitid
2777ef76 108my $TLA = $ENV{'ARCH_CLIENT'} || 'tla';
d3968363 109
42f44b08
EW
110sub do_abrowse {
111 my $stage = shift;
112 while (my ($limit, $level) = each %arch_branches) {
113 next unless $level == $stage;
114
115 open ABROWSE, "$TLA abrowse -fkD --merges $limit |"
116 or die "Problems with tla abrowse: $!";
d3968363 117
42f44b08
EW
118 my %ps = (); # the current one
119 my $lastseen = '';
d3968363 120
42f44b08
EW
121 while (<ABROWSE>) {
122 chomp;
d3968363 123
42f44b08
EW
124 # first record padded w 8 spaces
125 if (s/^\s{8}\b//) {
126 my ($id, $type) = split(m/\s+/, $_, 2);
127
128 my %last_ps;
129 # store the record we just captured
130 if (%ps && !exists $psets{ $ps{id} }) {
131 %last_ps = %ps; # break references
132 push (@psets, \%last_ps);
133 $psets{ $last_ps{id} } = \%last_ps;
134 }
d3968363 135
42f44b08
EW
136 my $branch = extract_versionname($id);
137 %ps = ( id => $id, branch => $branch );
138 if (%last_ps && ($last_ps{branch} eq $branch)) {
139 $ps{parent_id} = $last_ps{id};
140 }
141
142 $arch_branches{$branch} = 1;
143 $lastseen = 'id';
144
145 # deal with types (should work with baz or tla):
146 if ($type =~ m/\(.*changeset\)/) {
147 $ps{type} = 's';
148 } elsif ($type =~ /\(.*import\)/) {
149 $ps{type} = 'i';
6df896b5 150 } elsif ($type =~ m/\(tag.*?(\S+\@\S+).*?\)/) {
42f44b08
EW
151 $ps{type} = 't';
152 # read which revision we've tagged when we parse the log
6df896b5 153 $ps{tag} = $1;
42f44b08
EW
154 } else {
155 warn "Unknown type $type";
156 }
157
158 $arch_branches{$branch} = 1;
159 $lastseen = 'id';
160 } elsif (s/^\s{10}//) {
161 # 10 leading spaces or more
162 # indicate commit metadata
163
164 # date
165 if ($lastseen eq 'id' && m/^(\d{4}-\d\d-\d\d \d\d:\d\d:\d\d)/){
166 $ps{date} = $1;
167 $lastseen = 'date';
168 } elsif ($_ eq 'merges in:') {
169 $ps{merges} = [];
170 $lastseen = 'merges';
171 } elsif ($lastseen eq 'merges' && s/^\s{2}//) {
172 my $id = $_;
173 push (@{$ps{merges}}, $id);
174
175 # aggressive branch finding:
176 if ($opt_D) {
177 my $branch = extract_versionname($id);
178 my $repo = extract_reponame($branch);
179
180 if (archive_reachable($repo) &&
181 !defined $arch_branches{$branch}) {
182 $arch_branches{$branch} = $stage + 1;
183 }
184 }
185 } else {
186 warn "more metadata after merges!?: $_\n" unless /^\s*$/;
d3968363 187 }
d3968363 188 }
d3968363 189 }
d3968363 190
42f44b08
EW
191 if (%ps && !exists $psets{ $ps{id} }) {
192 my %temp = %ps; # break references
193 if (@psets && $psets[$#psets]{branch} eq $ps{branch}) {
194 $temp{parent_id} = $psets[$#psets]{id};
195 }
196 push (@psets, \%temp);
197 $psets{ $temp{id} } = \%temp;
198 }
199
200 close ABROWSE or die "$TLA abrowse failed on $limit\n";
201 }
d3968363
ML
202} # end foreach $root
203
42f44b08
EW
204do_abrowse(1);
205my $depth = 2;
206$opt_D ||= 0;
207while ($depth <= $opt_D) {
208 do_abrowse($depth);
209 $depth++;
210}
211
d3968363 212## Order patches by time
42f44b08
EW
213# FIXME see if we can find a more optimal way to do this by graphing
214# the ancestry data and walking it, that way we won't have to rely on
215# client-supplied dates
d3968363
ML
216@psets = sort {$a->{date}.$b->{id} cmp $b->{date}.$b->{id}} @psets;
217
218#print Dumper \@psets;
219
220##
221## TODO cleanup irrelevant patches
222## and put an initial import
223## or a full tag
3292ae47 224my $import = 0;
1d4710d0 225unless (-d $git_dir) { # initial import
d3968363
ML
226 if ($psets[0]{type} eq 'i' || $psets[0]{type} eq 't') {
227 print "Starting import from $psets[0]{id}\n";
3292ae47
ML
228 `git-init-db`;
229 die $! if $?;
230 $import = 1;
d3968363
ML
231 } else {
232 die "Need to start from an import or a tag -- cannot use $psets[0]{id}";
233 }
b779d5f0
ML
234} else { # progressing an import
235 # load the rptags
42f44b08 236 opendir(DIR, $ptag_dir)
b779d5f0
ML
237 || die "can't opendir: $!";
238 while (my $file = readdir(DIR)) {
a7fb51d3
EW
239 # skip non-interesting-files
240 next unless -f "$ptag_dir/$file";
241
242 # convert first '--' to '/' from old git-archimport to use
243 # as an archivename/c--b--v private tag
244 if ($file !~ m!,!) {
245 my $oldfile = $file;
246 $file =~ s!--!,!;
247 print STDERR "converting old tag $oldfile to $file\n";
248 rename("$ptag_dir/$oldfile", "$ptag_dir/$file") or die $!;
249 }
b779d5f0
ML
250 my $sha = ptag($file);
251 chomp $sha;
b779d5f0
ML
252 $rptags{$sha} = $file;
253 }
254 closedir DIR;
d3968363
ML
255}
256
3292ae47 257# process patchsets
22ff00fc
EW
258# extract the Arch repository name (Arch "archive" in Arch-speak)
259sub extract_reponame {
260 my $fq_cvbr = shift; # archivename/[[[[category]branch]version]revision]
261 return (split(/\//, $fq_cvbr))[0];
262}
263
264sub extract_versionname {
265 my $name = shift;
266 $name =~ s/--(?:patch|version(?:fix)?|base)-\d+$//;
267 return $name;
268}
d3968363 269
22ff00fc
EW
270# convert a fully-qualified revision or version to a unique dirname:
271# normalperson@yhbt.net-05/mpd--uclinux--1--patch-2
272# becomes: normalperson@yhbt.net-05,mpd--uclinux--1
273#
274# the git notion of a branch is closer to
275# archive/category--branch--version than archive/category--branch, so we
276# use this to convert to git branch names.
277# Also, keep archive names but replace '/' with ',' since it won't require
278# subdirectories, and is safer than swapping '--' which could confuse
279# reverse-mapping when dealing with bastard branches that
280# are just archive/category--version (no --branch)
281sub tree_dirname {
282 my $revision = shift;
283 my $name = extract_versionname($revision);
284 $name =~ s#/#,#;
285 return $name;
286}
287
fee3365f
ML
288# old versions of git-archimport just use the <category--branch> part:
289sub old_style_branchname {
290 my $id = shift;
291 my $ret = safe_pipe_capture($TLA,'parse-package-name','-p',$id);
292 chomp $ret;
293 return $ret;
294}
295
296*git_branchname = $opt_o ? *old_style_branchname : *tree_dirname;
22ff00fc 297
3e525e67
EW
298sub process_patchset_accurate {
299 my $ps = shift;
300
301 # switch to that branch if we're not already in that branch:
302 if (-e "$git_dir/refs/heads/$ps->{branch}") {
303 system('git-checkout','-f',$ps->{branch}) == 0 or die "$! $?\n";
d3968363 304
3e525e67
EW
305 # remove any old stuff that got leftover:
306 my $rm = safe_pipe_capture('git-ls-files','--others','-z');
307 rmtree(split(/\0/,$rm)) if $rm;
3292ae47 308 }
3e525e67
EW
309
310 # Apply the import/changeset/merge into the working tree
311 my $dir = sync_to_ps($ps);
312 # read the new log entry:
313 my @commitlog = safe_pipe_capture($TLA,'cat-log','-d',$dir,$ps->{id});
314 die "Error in cat-log: $!" if $?;
315 chomp @commitlog;
316
317 # grab variables we want from the log, new fields get added to $ps:
318 # (author, date, email, summary, message body ...)
319 parselog($ps, \@commitlog);
320
321 if ($ps->{id} =~ /--base-0$/ && $ps->{id} ne $psets[0]{id}) {
322 # this should work when importing continuations
323 if ($ps->{tag} && (my $branchpoint = eval { ptag($ps->{tag}) })) {
324
325 # find where we are supposed to branch from
326 system('git-checkout','-f','-b',$ps->{branch},
327 $branchpoint) == 0 or die "$! $?\n";
328
329 # remove any old stuff that got leftover:
330 my $rm = safe_pipe_capture('git-ls-files','--others','-z');
331 rmtree(split(/\0/,$rm)) if $rm;
3292ae47 332
3e525e67
EW
333 # If we trust Arch with the fact that this is just
334 # a tag, and it does not affect the state of the tree
335 # then we just tag and move on
336 tag($ps->{id}, $branchpoint);
337 ptag($ps->{id}, $branchpoint);
338 print " * Tagged $ps->{id} at $branchpoint\n";
339 return 0;
340 } else {
341 warn "Tagging from unknown id unsupported\n" if $ps->{tag};
342 }
343 # allow multiple bases/imports here since Arch supports cherry-picks
344 # from unrelated trees
345 }
346
347 # update the index with all the changes we got
348 system('git-ls-files --others -z | '.
349 'git-update-index --add -z --stdin') == 0 or die "$! $?\n";
350 system('git-ls-files --deleted -z | '.
351 'git-update-index --remove -z --stdin') == 0 or die "$! $?\n";
352 system('git-ls-files -z | '.
353 'git-update-index -z --stdin') == 0 or die "$! $?\n";
354 return 1;
355}
37f15d50 356
3e525e67
EW
357# the native changeset processing strategy. This is very fast, but
358# does not handle permissions or any renames involving directories
359sub process_patchset_fast {
360 my $ps = shift;
d3968363
ML
361 #
362 # create the branch if needed
363 #
3292ae47
ML
364 if ($ps->{type} eq 'i' && !$import) {
365 die "Should not have more than one 'Initial import' per GIT import: $ps->{id}";
d3968363
ML
366 }
367
3292ae47 368 unless ($import) { # skip for import
1d4710d0 369 if ( -e "$git_dir/refs/heads/$ps->{branch}") {
d3968363 370 # we know about this branch
f88961a8 371 system('git-checkout',$ps->{branch});
d3968363
ML
372 } else {
373 # new branch! we need to verify a few things
374 die "Branch on a non-tag!" unless $ps->{type} eq 't';
375 my $branchpoint = ptag($ps->{tag});
376 die "Tagging from unknown id unsupported: $ps->{tag}"
377 unless $branchpoint;
378
379 # find where we are supposed to branch from
f88961a8 380 system('git-checkout','-b',$ps->{branch},$branchpoint);
52586ecb
ML
381
382 # If we trust Arch with the fact that this is just
383 # a tag, and it does not affect the state of the tree
384 # then we just tag and move on
385 tag($ps->{id}, $branchpoint);
386 ptag($ps->{id}, $branchpoint);
387 print " * Tagged $ps->{id} at $branchpoint\n";
3e525e67 388 return 0;
d3968363
ML
389 }
390 die $! if $?;
391 }
392
d3968363
ML
393 #
394 # Apply the import/changeset/merge into the working tree
395 #
396 if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
d3968363 397 apply_import($ps) or die $!;
3e525e67 398 $stats{import_or_tag}++;
3292ae47 399 $import=0;
d3968363
ML
400 } elsif ($ps->{type} eq 's') {
401 apply_cset($ps);
3e525e67 402 $stats{simple_changeset}++;
d3968363
ML
403 }
404
405 #
406 # prepare update git's index, based on what arch knows
407 # about the pset, resolve parents, etc
408 #
d3968363 409
6df896b5 410 my @commitlog = safe_pipe_capture($TLA,'cat-archive-log',$ps->{id});
d3968363
ML
411 die "Error in cat-archive-log: $!" if $?;
412
6df896b5 413 parselog($ps,\@commitlog);
d3968363
ML
414
415 # imports don't give us good info
416 # on added files. Shame on them
6df896b5
EW
417 if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
418 system('git-ls-files --others -z | '.
419 'git-update-index --add -z --stdin') == 0 or die "$! $?\n";
420 system('git-ls-files --deleted -z | '.
421 'git-update-index --remove -z --stdin') == 0 or die "$! $?\n";
d3968363
ML
422 }
423
6df896b5
EW
424 # TODO: handle removed_directories and renamed_directories:
425
426 if (my $add = $ps->{new_files}) {
d3968363
ML
427 while (@$add) {
428 my @slice = splice(@$add, 0, 100);
6df896b5
EW
429 system('git-update-index','--add','--',@slice) == 0 or
430 die "Error in git-update-index --add: $! $?\n";
d3968363
ML
431 }
432 }
6df896b5
EW
433
434 if (my $del = $ps->{removed_files}) {
435 unlink @$del;
d3968363
ML
436 while (@$del) {
437 my @slice = splice(@$del, 0, 100);
6df896b5
EW
438 system('git-update-index','--remove','--',@slice) == 0 or
439 die "Error in git-update-index --remove: $! $?\n";
d3968363
ML
440 }
441 }
6df896b5
EW
442
443 if (my $ren = $ps->{renamed_files}) { # renamed
d3968363
ML
444 if (@$ren % 2) {
445 die "Odd number of entries in rename!?";
446 }
6df896b5 447
d3968363 448 while (@$ren) {
6df896b5
EW
449 my $from = shift @$ren;
450 my $to = shift @$ren;
d3968363
ML
451
452 unless (-d dirname($to)) {
453 mkpath(dirname($to)); # will die on err
454 }
3e525e67 455 # print "moving $from $to";
6df896b5
EW
456 rename($from, $to) or die "Error renaming '$from' '$to': $!\n";
457 system('git-update-index','--remove','--',$from) == 0 or
458 die "Error in git-update-index --remove: $! $?\n";
459 system('git-update-index','--add','--',$to) == 0 or
460 die "Error in git-update-index --add: $! $?\n";
d3968363 461 }
d3968363 462 }
6df896b5
EW
463
464 if (my $mod = $ps->{modified_files}) {
d3968363
ML
465 while (@$mod) {
466 my @slice = splice(@$mod, 0, 100);
6df896b5
EW
467 system('git-update-index','--',@slice) == 0 or
468 die "Error in git-update-index: $! $?\n";
d3968363
ML
469 }
470 }
3e525e67
EW
471 return 1; # we successfully applied the changeset
472}
473
474if ($opt_f) {
475 print "Will import patchsets using the fast strategy\n",
476 "Renamed directories and permission changes will be missed\n";
477 *process_patchset = *process_patchset_fast;
478} else {
479 print "Using the default (accurate) import strategy.\n",
480 "Things may be a bit slow\n";
481 *process_patchset = *process_patchset_accurate;
482}
6df896b5 483
3e525e67
EW
484foreach my $ps (@psets) {
485 # process patchsets
486 $ps->{branch} = git_branchname($ps->{id});
487
488 #
489 # ensure we have a clean state
490 #
491 if (my $dirty = `git-diff-files`) {
492 die "Unclean tree when about to process $ps->{id} " .
493 " - did we fail to commit cleanly before?\n$dirty";
494 }
495 die $! if $?;
496
497 #
498 # skip commits already in repo
499 #
500 if (ptag($ps->{id})) {
501 $opt_v && print " * Skipping already imported: $ps->{id}\n";
502 return 0;
503 }
504
505 print " * Starting to work on $ps->{id}\n";
506
507 process_patchset($ps) or next;
508
215a7ad1 509 # warn "errors when running git-update-index! $!";
3e525e67 510 my $tree = `git-write-tree`;
d3968363
ML
511 die "cannot write tree $!" if $?;
512 chomp $tree;
d3968363
ML
513
514 #
515 # Who's your daddy?
516 #
517 my @par;
1d4710d0 518 if ( -e "$git_dir/refs/heads/$ps->{branch}") {
f88961a8 519 if (open HEAD, "<","$git_dir/refs/heads/$ps->{branch}") {
d3968363
ML
520 my $p = <HEAD>;
521 close HEAD;
522 chomp $p;
523 push @par, '-p', $p;
524 } else {
525 if ($ps->{type} eq 's') {
526 warn "Could not find the right head for the branch $ps->{branch}";
527 }
528 }
529 }
530
b779d5f0
ML
531 if ($ps->{merges}) {
532 push @par, find_parents($ps);
533 }
d3968363
ML
534
535 #
536 # Commit, tag and clean state
537 #
538 $ENV{TZ} = 'GMT';
539 $ENV{GIT_AUTHOR_NAME} = $ps->{author};
540 $ENV{GIT_AUTHOR_EMAIL} = $ps->{email};
541 $ENV{GIT_AUTHOR_DATE} = $ps->{date};
542 $ENV{GIT_COMMITTER_NAME} = $ps->{author};
543 $ENV{GIT_COMMITTER_EMAIL} = $ps->{email};
544 $ENV{GIT_COMMITTER_DATE} = $ps->{date};
545
6df896b5 546 my $pid = open2(*READER, *WRITER,'git-commit-tree',$tree,@par)
d3968363 547 or die $!;
6df896b5
EW
548 print WRITER $ps->{summary},"\n";
549 print WRITER $ps->{message},"\n";
550
551 # make it easy to backtrack and figure out which Arch revision this was:
552 print WRITER 'git-archimport-id: ',$ps->{id},"\n";
553
d3968363
ML
554 close WRITER;
555 my $commitid = <READER>; # read
556 chomp $commitid;
557 close READER;
558 waitpid $pid,0; # close;
559
560 if (length $commitid != 40) {
561 die "Something went wrong with the commit! $! $commitid";
562 }
563 #
564 # Update the branch
565 #
f88961a8 566 open HEAD, ">","$git_dir/refs/heads/$ps->{branch}";
d3968363
ML
567 print HEAD $commitid;
568 close HEAD;
8366a10a 569 system('git-update-ref', 'HEAD', "$ps->{branch}");
d3968363
ML
570
571 # tag accordingly
572 ptag($ps->{id}, $commitid); # private tag
573 if ($opt_T || $ps->{type} eq 't' || $ps->{type} eq 'i') {
574 tag($ps->{id}, $commitid);
575 }
576 print " * Committed $ps->{id}\n";
577 print " + tree $tree\n";
578 print " + commit $commitid\n";
b779d5f0 579 $opt_v && print " + commit date is $ps->{date} \n";
f88961a8 580 $opt_v && print " + parents: ",join(' ',@par),"\n";
3e525e67
EW
581 if (my $dirty = `git-diff-files`) {
582 die "22 Unclean tree when about to process $ps->{id} " .
583 " - did we fail to commit cleanly before?\n$dirty";
584 }
585}
586
587if ($opt_v) {
588 foreach (sort keys %stats) {
589 print" $_: $stats{$_}\n";
590 }
591}
592exit 0;
593
594# used by the accurate strategy:
595sub sync_to_ps {
596 my $ps = shift;
597 my $tree_dir = $tmp.'/'.tree_dirname($ps->{id});
598
599 $opt_v && print "sync_to_ps($ps->{id}) method: ";
600
601 if (-d $tree_dir) {
602 if ($ps->{type} eq 't') {
603 $opt_v && print "get (tag)\n";
604 # looks like a tag-only or (worse,) a mixed tags/changeset branch,
605 # can't rely on replay to work correctly on these
606 rmtree($tree_dir);
607 safe_pipe_capture($TLA,'get','--no-pristine',$ps->{id},$tree_dir);
608 $stats{get_tag}++;
609 } else {
610 my $tree_id = arch_tree_id($tree_dir);
611 if ($ps->{parent_id} && ($ps->{parent_id} eq $tree_id)) {
612 # the common case (hopefully)
613 $opt_v && print "replay\n";
614 safe_pipe_capture($TLA,'replay','-d',$tree_dir,$ps->{id});
615 $stats{replay}++;
616 } else {
617 # getting one tree is usually faster than getting two trees
618 # and applying the delta ...
619 rmtree($tree_dir);
620 $opt_v && print "apply-delta\n";
621 safe_pipe_capture($TLA,'get','--no-pristine',
622 $ps->{id},$tree_dir);
623 $stats{get_delta}++;
624 }
625 }
626 } else {
627 # new branch work
628 $opt_v && print "get (new tree)\n";
629 safe_pipe_capture($TLA,'get','--no-pristine',$ps->{id},$tree_dir);
630 $stats{get_new}++;
631 }
632
633 # added -I flag to rsync since we're going to fast! AIEEEEE!!!!
634 system('rsync','-aI','--delete','--exclude',$git_dir,
635# '--exclude','.arch-inventory',
636 '--exclude','.arch-ids','--exclude','{arch}',
637 '--exclude','+*','--exclude',',*',
638 "$tree_dir/",'./') == 0 or die "Cannot rsync $tree_dir: $! $?";
639 return $tree_dir;
d3968363
ML
640}
641
d3968363
ML
642sub apply_import {
643 my $ps = shift;
22ff00fc 644 my $bname = git_branchname($ps->{id});
d3968363 645
f88961a8 646 mkpath($tmp);
d3968363 647
f88961a8 648 safe_pipe_capture($TLA,'get','-s','--no-pristine',$ps->{id},"$tmp/import");
d3968363 649 die "Cannot get import: $!" if $?;
f88961a8
EW
650 system('rsync','-aI','--delete', '--exclude',$git_dir,
651 '--exclude','.arch-ids','--exclude','{arch}',
652 "$tmp/import/", './');
d3968363
ML
653 die "Cannot rsync import:$!" if $?;
654
f88961a8 655 rmtree("$tmp/import");
d3968363
ML
656 die "Cannot remove tempdir: $!" if $?;
657
658
659 return 1;
660}
661
662sub apply_cset {
663 my $ps = shift;
664
f88961a8 665 mkpath($tmp);
d3968363
ML
666
667 # get the changeset
f88961a8 668 safe_pipe_capture($TLA,'get-changeset',$ps->{id},"$tmp/changeset");
d3968363
ML
669 die "Cannot get changeset: $!" if $?;
670
671 # apply patches
672 if (`find $tmp/changeset/patches -type f -name '*.patch'`) {
673 # this can be sped up considerably by doing
674 # (find | xargs cat) | patch
675 # but that cna get mucked up by patches
676 # with missing trailing newlines or the standard
677 # 'missing newline' flag in the patch - possibly
678 # produced with an old/buggy diff.
679 # slow and safe, we invoke patch once per patchfile
680 `find $tmp/changeset/patches -type f -name '*.patch' -print0 | grep -zv '{arch}' | xargs -iFILE -0 --no-run-if-empty patch -p1 --forward -iFILE`;
681 die "Problem applying patches! $!" if $?;
682 }
683
684 # apply changed binary files
685 if (my @modified = `find $tmp/changeset/patches -type f -name '*.modified'`) {
686 foreach my $mod (@modified) {
687 chomp $mod;
688 my $orig = $mod;
689 $orig =~ s/\.modified$//; # lazy
690 $orig =~ s!^\Q$tmp\E/changeset/patches/!!;
691 #print "rsync -p '$mod' '$orig'";
f88961a8 692 system('rsync','-p',$mod,"./$orig");
d3968363
ML
693 die "Problem applying binary changes! $!" if $?;
694 }
695 }
696
697 # bring in new files
f88961a8
EW
698 system('rsync','-aI','--exclude',$git_dir,
699 '--exclude','.arch-ids',
700 '--exclude', '{arch}',
701 "$tmp/changeset/new-files-archive/",'./');
d3968363
ML
702
703 # deleted files are hinted from the commitlog processing
704
f88961a8 705 rmtree("$tmp/changeset");
d3968363
ML
706}
707
708
709# =for reference
6df896b5
EW
710# notes: *-files/-directories keys cannot have spaces, they're always
711# pika-escaped. Everything after the first newline
712# A log entry looks like:
d3968363
ML
713# Revision: moodle-org--moodle--1.3.3--patch-15
714# Archive: arch-eduforge@catalyst.net.nz--2004
715# Creator: Penny Leach <penny@catalyst.net.nz>
716# Date: Wed May 25 14:15:34 NZST 2005
717# Standard-date: 2005-05-25 02:15:34 GMT
718# New-files: lang/de/.arch-ids/block_glossary_random.php.id
719# lang/de/.arch-ids/block_html.php.id
720# New-directories: lang/de/help/questionnaire
721# lang/de/help/questionnaire/.arch-ids
722# Renamed-files: .arch-ids/db_sears.sql.id db/.arch-ids/db_sears.sql.id
723# db_sears.sql db/db_sears.sql
724# Removed-files: lang/be/docs/.arch-ids/release.html.id
725# lang/be/docs/.arch-ids/releaseold.html.id
726# Modified-files: admin/cron.php admin/delete.php
727# admin/editor.html backup/lib.php backup/restore.php
728# New-patches: arch-eduforge@catalyst.net.nz--2004/moodle-org--moodle--1.3.3--patch-15
729# Summary: Updating to latest from MOODLE_14_STABLE (1.4.5+)
6df896b5 730# summary can be multiline with a leading space just like the above fields
d3968363
ML
731# Keywords:
732#
733# Updating yadda tadda tadda madda
734sub parselog {
6df896b5
EW
735 my ($ps, $log) = @_;
736 my $key = undef;
737
738 # headers we want that contain filenames:
739 my %want_headers = (
740 new_files => 1,
741 modified_files => 1,
742 renamed_files => 1,
743 renamed_directories => 1,
744 removed_files => 1,
745 removed_directories => 1,
746 );
d3968363 747
6df896b5
EW
748 chomp (@$log);
749 while ($_ = shift @$log) {
750 if (/^Continuation-of:\s*(.*)/) {
751 $ps->{tag} = $1;
752 $key = undef;
753 } elsif (/^Summary:\s*(.*)$/ ) {
754 # summary can be multiline as long as it has a leading space
755 $ps->{summary} = [ $1 ];
756 $key = 'summary';
757 } elsif (/^Creator: (.*)\s*<([^\>]+)>/) {
758 $ps->{author} = $1;
759 $ps->{email} = $2;
760 $key = undef;
761 # any *-files or *-directories can be read here:
762 } elsif (/^([A-Z][a-z\-]+):\s*(.*)$/) {
763 my $val = $2;
764 $key = lc $1;
765 $key =~ tr/-/_/; # too lazy to quote :P
766 if ($want_headers{$key}) {
767 push @{$ps->{$key}}, split(/\s+/, $val);
768 } else {
769 $key = undef;
770 }
771 } elsif (/^$/) {
772 last; # remainder of @$log that didn't get shifted off is message
773 } elsif ($key) {
774 if (/^\s+(.*)$/) {
775 if ($key eq 'summary') {
776 push @{$ps->{$key}}, $1;
777 } else { # files/directories:
778 push @{$ps->{$key}}, split(/\s+/, $1);
779 }
780 } else {
781 $key = undef;
782 }
783 }
d3968363 784 }
6df896b5
EW
785
786 # post-processing:
787 $ps->{summary} = join("\n",@{$ps->{summary}})."\n";
788 $ps->{message} = join("\n",@$log);
d3968363 789
6df896b5
EW
790 # skip Arch control files, unescape pika-escaped files
791 foreach my $k (keys %want_headers) {
792 next unless (defined $ps->{$k});
6e33101a 793 my @tmp = ();
6df896b5
EW
794 foreach my $t (@{$ps->{$k}}) {
795 next unless length ($t);
796 next if $t =~ m!\{arch\}/!;
797 next if $t =~ m!\.arch-ids/!;
798 # should we skip this?
799 next if $t =~ m!\.arch-inventory$!;
f84f9d38
ML
800 # tla cat-archive-log will give us filenames with spaces as file\(sp)name - why?
801 # we can assume that any filename with \ indicates some pika escaping that we want to get rid of.
6df896b5 802 if ($t =~ /\\/ ){
f88961a8 803 $t = (safe_pipe_capture($TLA,'escape','--unescaped',$t))[0];
f84f9d38 804 }
6df896b5 805 push @tmp, $t;
d3968363 806 }
6e33101a 807 $ps->{$k} = \@tmp;
d3968363 808 }
d3968363
ML
809}
810
811# write/read a tag
812sub tag {
813 my ($tag, $commit) = @_;
a7fb51d3 814
fee3365f
ML
815 if ($opt_o) {
816 $tag =~ s|/|--|g;
817 } else {
818 # don't use subdirs for tags yet, it could screw up other porcelains
819 $tag =~ s|/|,|g;
820 }
d3968363
ML
821
822 if ($commit) {
a7fb51d3 823 open(C,">","$git_dir/refs/tags/$tag")
d3968363
ML
824 or die "Cannot create tag $tag: $!\n";
825 print C "$commit\n"
826 or die "Cannot write tag $tag: $!\n";
827 close(C)
828 or die "Cannot write tag $tag: $!\n";
a7fb51d3 829 print " * Created tag '$tag' on '$commit'\n" if $opt_v;
d3968363 830 } else { # read
a7fb51d3 831 open(C,"<","$git_dir/refs/tags/$tag")
d3968363
ML
832 or die "Cannot read tag $tag: $!\n";
833 $commit = <C>;
834 chomp $commit;
835 die "Error reading tag $tag: $!\n" unless length $commit == 40;
836 close(C)
837 or die "Cannot read tag $tag: $!\n";
838 return $commit;
839 }
840}
841
842# write/read a private tag
843# reads fail softly if the tag isn't there
844sub ptag {
845 my ($tag, $commit) = @_;
a7fb51d3
EW
846
847 # don't use subdirs for tags yet, it could screw up other porcelains
848 $tag =~ s|/|,|g;
d3968363 849
a7fb51d3
EW
850 my $tag_file = "$ptag_dir/$tag";
851 my $tag_branch_dir = dirname($tag_file);
852 mkpath($tag_branch_dir) unless (-d $tag_branch_dir);
d3968363
ML
853
854 if ($commit) { # write
a7fb51d3 855 open(C,">",$tag_file)
d3968363
ML
856 or die "Cannot create tag $tag: $!\n";
857 print C "$commit\n"
858 or die "Cannot write tag $tag: $!\n";
859 close(C)
860 or die "Cannot write tag $tag: $!\n";
b779d5f0
ML
861 $rptags{$commit} = $tag
862 unless $tag =~ m/--base-0$/;
d3968363
ML
863 } else { # read
864 # if the tag isn't there, return 0
a7fb51d3 865 unless ( -s $tag_file) {
d3968363
ML
866 return 0;
867 }
a7fb51d3 868 open(C,"<",$tag_file)
d3968363
ML
869 or die "Cannot read tag $tag: $!\n";
870 $commit = <C>;
871 chomp $commit;
872 die "Error reading tag $tag: $!\n" unless length $commit == 40;
873 close(C)
874 or die "Cannot read tag $tag: $!\n";
b779d5f0
ML
875 unless (defined $rptags{$commit}) {
876 $rptags{$commit} = $tag;
877 }
d3968363
ML
878 return $commit;
879 }
880}
b779d5f0
ML
881
882sub find_parents {
883 #
884 # Identify what branches are merging into me
885 # and whether we are fully merged
886 # git-merge-base <headsha> <headsha> should tell
887 # me what the base of the merge should be
888 #
889 my $ps = shift;
890
891 my %branches; # holds an arrayref per branch
892 # the arrayref contains a list of
893 # merged patches between the base
894 # of the merge and the current head
895
896 my @parents; # parents found for this commit
897
898 # simple loop to split the merges
899 # per branch
900 foreach my $merge (@{$ps->{merges}}) {
22ff00fc 901 my $branch = git_branchname($merge);
b779d5f0
ML
902 unless (defined $branches{$branch} ){
903 $branches{$branch} = [];
904 }
905 push @{$branches{$branch}}, $merge;
906 }
907
908 #
909 # foreach branch find a merge base and walk it to the
910 # head where we are, collecting the merged patchsets that
911 # Arch has recorded. Keep that in @have
912 # Compare that with the commits on the other branch
913 # between merge-base and the tip of the branch (@need)
914 # and see if we have a series of consecutive patches
915 # starting from the merge base. The tip of the series
916 # of consecutive patches merged is our new parent for
917 # that branch.
918 #
919 foreach my $branch (keys %branches) {
37f15d50
ML
920
921 # check that we actually know about the branch
922 next unless -e "$git_dir/refs/heads/$branch";
923
b779d5f0 924 my $mergebase = `git-merge-base $branch $ps->{branch}`;
9b626e75
EW
925 if ($?) {
926 # Don't die here, Arch supports one-way cherry-picking
927 # between branches with no common base (or any relationship
928 # at all beforehand)
929 warn "Cannot find merge base for $branch and $ps->{branch}";
930 next;
931 }
b779d5f0
ML
932 chomp $mergebase;
933
934 # now walk up to the mergepoint collecting what patches we have
935 my $branchtip = git_rev_parse($ps->{branch});
936 my @ancestors = `git-rev-list --merge-order $branchtip ^$mergebase`;
937 my %have; # collected merges this branch has
938 foreach my $merge (@{$ps->{merges}}) {
939 $have{$merge} = 1;
940 }
941 my %ancestorshave;
942 foreach my $par (@ancestors) {
943 $par = commitid2pset($par);
944 if (defined $par->{merges}) {
945 foreach my $merge (@{$par->{merges}}) {
946 $ancestorshave{$merge}=1;
947 }
948 }
949 }
950 # print "++++ Merges in $ps->{id} are....\n";
951 # my @have = sort keys %have; print Dumper(\@have);
952
953 # merge what we have with what ancestors have
954 %have = (%have, %ancestorshave);
955
956 # see what the remote branch has - these are the merges we
957 # will want to have in a consecutive series from the mergebase
958 my $otherbranchtip = git_rev_parse($branch);
959 my @needraw = `git-rev-list --merge-order $otherbranchtip ^$mergebase`;
960 my @need;
961 foreach my $needps (@needraw) { # get the psets
962 $needps = commitid2pset($needps);
963 # git-rev-list will also
964 # list commits merged in via earlier
965 # merges. we are only interested in commits
966 # from the branch we're looking at
967 if ($branch eq $needps->{branch}) {
968 push @need, $needps->{id};
969 }
970 }
971
972 # print "++++ Merges from $branch we want are....\n";
973 # print Dumper(\@need);
974
975 my $newparent;
976 while (my $needed_commit = pop @need) {
977 if ($have{$needed_commit}) {
978 $newparent = $needed_commit;
979 } else {
980 last; # break out of the while
981 }
982 }
983 if ($newparent) {
984 push @parents, $newparent;
985 }
986
987
988 } # end foreach branch
989
990 # prune redundant parents
991 my %parents;
992 foreach my $p (@parents) {
993 $parents{$p} = 1;
994 }
995 foreach my $p (@parents) {
996 next unless exists $psets{$p}{merges};
997 next unless ref $psets{$p}{merges};
998 my @merges = @{$psets{$p}{merges}};
999 foreach my $merge (@merges) {
1000 if ($parents{$merge}) {
1001 delete $parents{$merge};
1002 }
1003 }
1004 }
42f44b08 1005
f88961a8
EW
1006 @parents = ();
1007 foreach (keys %parents) {
1008 push @parents, '-p', ptag($_);
1009 }
b779d5f0
ML
1010 return @parents;
1011}
1012
1013sub git_rev_parse {
1014 my $name = shift;
1015 my $val = `git-rev-parse $name`;
1016 die "Error: git-rev-parse $name" if $?;
1017 chomp $val;
1018 return $val;
1019}
1020
1021# resolve a SHA1 to a known patchset
1022sub commitid2pset {
1023 my $commitid = shift;
1024 chomp $commitid;
1025 my $name = $rptags{$commitid}
1026 || die "Cannot find reverse tag mapping for $commitid";
a7fb51d3 1027 $name =~ s|,|/|;
b779d5f0
ML
1028 my $ps = $psets{$name}
1029 || (print Dumper(sort keys %psets)) && die "Cannot find patchset for $name";
1030 return $ps;
1031}
2777ef76 1032
42f44b08 1033
2777ef76
EW
1034# an alterative to `command` that allows input to be passed as an array
1035# to work around shell problems with weird characters in arguments
1036sub safe_pipe_capture {
1037 my @output;
1038 if (my $pid = open my $child, '-|') {
1039 @output = (<$child>);
1040 close $child or die join(' ',@_).": $! $?";
1041 } else {
3e525e67 1042 exec(@_) or die "$! $?"; # exec() can fail the executable can't be found
2777ef76
EW
1043 }
1044 return wantarray ? @output : join('',@output);
1045}
1046
42f44b08
EW
1047# `tla logs -rf -d <dir> | head -n1` or `baz tree-id <dir>`
1048sub arch_tree_id {
1049 my $dir = shift;
1050 chomp( my $ret = (safe_pipe_capture($TLA,'logs','-rf','-d',$dir))[0] );
1051 return $ret;
1052}
1053
1054sub archive_reachable {
1055 my $archive = shift;
1056 return 1 if $reachable{$archive};
1057 return 0 if $unreachable{$archive};
1058
1059 if (system "$TLA whereis-archive $archive >/dev/null") {
1060 if ($opt_a && (system($TLA,'register-archive',
1061 "http://mirrors.sourcecontrol.net/$archive") == 0)) {
1062 $reachable{$archive} = 1;
1063 return 1;
1064 }
1065 print STDERR "Archive is unreachable: $archive\n";
1066 $unreachable{$archive} = 1;
1067 return 0;
1068 } else {
1069 $reachable{$archive} = 1;
1070 return 1;
1071 }
1072}
2777ef76 1073