]> git.ipfire.org Git - thirdparty/git.git/blame - git-archimport.perl
Makefile: Include subdirectories in "make cover" reports
[thirdparty/git.git] / git-archimport.perl
CommitLineData
d3968363
ML
1#!/usr/bin/perl -w
2#
3# This tool is copyright (c) 2005, Martin Langhoff.
4# It is released under the Gnu Public License, version 2.
5#
a6080a0a
JH
6# The basic idea is to walk the output of tla abrowse,
7# fetch the changesets and apply them.
d3968363 8#
241b5967 9
d3968363
ML
10=head1 Invocation
11
1b1dd23f 12 git archimport [ -h ] [ -v ] [ -o ] [ -a ] [ -f ] [ -T ]
a6080a0a 13 [ -D depth] [ -t tempdir ] <archive>/<branch> [ <archive>/<branch> ]
d3968363 14
241b5967
ML
15Imports a project from one or more Arch repositories. It will follow branches
16and repositories within the namespaces defined by the <archive/branch>
82e5a82f 17parameters supplied. If it cannot find the remote branch a merge comes from
a6080a0a 18it will just import it as a regular commit. If it can find it, it will mark it
241b5967 19as a merge whenever possible.
d3968363 20
241b5967 21See man (1) git-archimport for more details.
d3968363 22
241b5967 23=head1 TODO
d3968363 24
241b5967 25 - create tag objects instead of ref tags
d3968363 26 - audit shell-escaping of filenames
241b5967 27 - hide our private tags somewhere smarter
a6080a0a 28 - find a way to make "cat *patches | patch" safe even when patchfiles are missing newlines
3e525e67
EW
29 - sort and apply patches by graphing ancestry relations instead of just
30 relying in dates supplied in the changeset itself.
31 tla ancestry-graph -m could be helpful here...
d3968363
ML
32
33=head1 Devel tricks
34
a6080a0a 35Add print in front of the shell commands invoked via backticks.
d3968363 36
22ff00fc
EW
37=head1 Devel Notes
38
39There are several places where Arch and git terminology are intermixed
40and potentially confused.
41
42The notion of a "branch" in git is approximately equivalent to
43a "archive/category--branch--version" in Arch. Also, it should be noted
44that the "--branch" portion of "archive/category--branch--version" is really
45optional in Arch although not many people (nor tools!) seem to know this.
46This means that "archive/category--version" is also a valid "branch"
47in git terms.
48
49We always refer to Arch names by their fully qualified variant (which
50means the "archive" name is prefixed.
51
52For people unfamiliar with Arch, an "archive" is the term for "repository",
53and can contain multiple, unrelated branches.
54
d3968363
ML
55=cut
56
57use strict;
58use warnings;
59use Getopt::Std;
42f44b08 60use File::Temp qw(tempdir);
f88961a8 61use File::Path qw(mkpath rmtree);
d3968363 62use File::Basename qw(basename dirname);
d3968363
ML
63use Data::Dumper qw/ Dumper /;
64use IPC::Open2;
65
66$SIG{'PIPE'}="IGNORE";
67$ENV{'TZ'}="UTC";
68
1d4710d0
ML
69my $git_dir = $ENV{"GIT_DIR"} || ".git";
70$ENV{"GIT_DIR"} = $git_dir;
a7fb51d3 71my $ptag_dir = "$git_dir/archimport/tags";
1d4710d0 72
3e525e67 73our($opt_h,$opt_f,$opt_v,$opt_T,$opt_t,$opt_D,$opt_a,$opt_o);
d3968363
ML
74
75sub usage() {
76 print STDERR <<END;
1b1dd23f 77Usage: git archimport # fetch/update GIT from Arch
42f4570c 78 [ -h ] [ -v ] [ -o ] [ -a ] [ -f ] [ -T ] [ -D depth ] [ -t tempdir ]
d3968363
ML
79 repository/arch-branch [ repository/arch-branch] ...
80END
81 exit(1);
82}
83
3e525e67 84getopts("fThvat:D:") or usage();
d3968363
ML
85usage if $opt_h;
86
87@ARGV >= 1 or usage();
42f44b08
EW
88# $arch_branches:
89# values associated with keys:
90# =1 - Arch version / git 'branch' detected via abrowse on a limit
82e5a82f 91# >1 - Arch version / git 'branch' of an auxiliary branch we've merged
d9cb5399
PB
92my %arch_branches = map { my $branch = $_; $branch =~ s/:[^:]*$//; $branch => 1 } @ARGV;
93
94# $branch_name_map:
95# maps arch branches to git branch names
96my %branch_name_map = map { m/^(.*):([^:]*)$/; $1 => $2 } grep { m/:/ } @ARGV;
d3968363 97
5744f277
EW
98$ENV{'TMPDIR'} = $opt_t if $opt_t; # $ENV{TMPDIR} will affect tempdir() calls:
99my $tmp = tempdir('git-archimport-XXXXXX', TMPDIR => 1, CLEANUP => 1);
127bf00f 100$opt_v && print "+ Using $tmp as temporary directory\n";
d3968363 101
11dbe9e8
GP
102unless (-d $git_dir) { # initial import needs empty directory
103 opendir DIR, '.' or die "Unable to open current directory: $!\n";
104 while (my $entry = readdir DIR) {
105 $entry =~ /^\.\.?$/ or
106 die "Initial import needs an empty current working directory.\n"
107 }
108 closedir DIR
109}
110
d9cb5399 111my $default_archive; # default Arch archive
42f44b08
EW
112my %reachable = (); # Arch repositories we can access
113my %unreachable = (); # Arch repositories we can't access :<
d3968363 114my @psets = (); # the collection
b779d5f0 115my %psets = (); # the collection, by name
3e525e67
EW
116my %stats = ( # Track which strategy we used to import:
117 get_tag => 0, replay => 0, get_new => 0, get_delta => 0,
118 simple_changeset => 0, import_or_tag => 0
119);
b779d5f0
ML
120
121my %rptags = (); # my reverse private tags
122 # to map a SHA1 to a commitid
2777ef76 123my $TLA = $ENV{'ARCH_CLIENT'} || 'tla';
d3968363 124
42f44b08
EW
125sub do_abrowse {
126 my $stage = shift;
127 while (my ($limit, $level) = each %arch_branches) {
128 next unless $level == $stage;
a6080a0a
JH
129
130 open ABROWSE, "$TLA abrowse -fkD --merges $limit |"
42f44b08 131 or die "Problems with tla abrowse: $!";
a6080a0a 132
42f44b08
EW
133 my %ps = (); # the current one
134 my $lastseen = '';
a6080a0a 135
42f44b08
EW
136 while (<ABROWSE>) {
137 chomp;
a6080a0a 138
42f44b08
EW
139 # first record padded w 8 spaces
140 if (s/^\s{8}\b//) {
141 my ($id, $type) = split(m/\s+/, $_, 2);
142
143 my %last_ps;
144 # store the record we just captured
145 if (%ps && !exists $psets{ $ps{id} }) {
146 %last_ps = %ps; # break references
147 push (@psets, \%last_ps);
148 $psets{ $last_ps{id} } = \%last_ps;
149 }
a6080a0a 150
42f44b08
EW
151 my $branch = extract_versionname($id);
152 %ps = ( id => $id, branch => $branch );
153 if (%last_ps && ($last_ps{branch} eq $branch)) {
154 $ps{parent_id} = $last_ps{id};
155 }
a6080a0a 156
42f44b08
EW
157 $arch_branches{$branch} = 1;
158 $lastseen = 'id';
159
160 # deal with types (should work with baz or tla):
161 if ($type =~ m/\(.*changeset\)/) {
162 $ps{type} = 's';
163 } elsif ($type =~ /\(.*import\)/) {
164 $ps{type} = 'i';
6df896b5 165 } elsif ($type =~ m/\(tag.*?(\S+\@\S+).*?\)/) {
42f44b08
EW
166 $ps{type} = 't';
167 # read which revision we've tagged when we parse the log
6df896b5 168 $ps{tag} = $1;
a6080a0a 169 } else {
42f44b08
EW
170 warn "Unknown type $type";
171 }
172
173 $arch_branches{$branch} = 1;
174 $lastseen = 'id';
a6080a0a
JH
175 } elsif (s/^\s{10}//) {
176 # 10 leading spaces or more
42f44b08 177 # indicate commit metadata
a6080a0a 178
42f44b08
EW
179 # date
180 if ($lastseen eq 'id' && m/^(\d{4}-\d\d-\d\d \d\d:\d\d:\d\d)/){
181 $ps{date} = $1;
182 $lastseen = 'date';
183 } elsif ($_ eq 'merges in:') {
184 $ps{merges} = [];
185 $lastseen = 'merges';
186 } elsif ($lastseen eq 'merges' && s/^\s{2}//) {
187 my $id = $_;
188 push (@{$ps{merges}}, $id);
a6080a0a 189
42f44b08
EW
190 # aggressive branch finding:
191 if ($opt_D) {
192 my $branch = extract_versionname($id);
193 my $repo = extract_reponame($branch);
a6080a0a 194
42f44b08
EW
195 if (archive_reachable($repo) &&
196 !defined $arch_branches{$branch}) {
197 $arch_branches{$branch} = $stage + 1;
198 }
199 }
200 } else {
201 warn "more metadata after merges!?: $_\n" unless /^\s*$/;
d3968363 202 }
d3968363 203 }
d3968363 204 }
d3968363 205
42f44b08
EW
206 if (%ps && !exists $psets{ $ps{id} }) {
207 my %temp = %ps; # break references
208 if (@psets && $psets[$#psets]{branch} eq $ps{branch}) {
209 $temp{parent_id} = $psets[$#psets]{id};
210 }
a6080a0a 211 push (@psets, \%temp);
42f44b08 212 $psets{ $temp{id} } = \%temp;
a6080a0a
JH
213 }
214
42f44b08
EW
215 close ABROWSE or die "$TLA abrowse failed on $limit\n";
216 }
d3968363
ML
217} # end foreach $root
218
42f44b08
EW
219do_abrowse(1);
220my $depth = 2;
221$opt_D ||= 0;
222while ($depth <= $opt_D) {
223 do_abrowse($depth);
224 $depth++;
225}
226
d3968363 227## Order patches by time
42f44b08
EW
228# FIXME see if we can find a more optimal way to do this by graphing
229# the ancestry data and walking it, that way we won't have to rely on
230# client-supplied dates
d3968363
ML
231@psets = sort {$a->{date}.$b->{id} cmp $b->{date}.$b->{id}} @psets;
232
233#print Dumper \@psets;
234
235##
236## TODO cleanup irrelevant patches
237## and put an initial import
238## or a full tag
3292ae47 239my $import = 0;
1d4710d0 240unless (-d $git_dir) { # initial import
d3968363
ML
241 if ($psets[0]{type} eq 'i' || $psets[0]{type} eq 't') {
242 print "Starting import from $psets[0]{id}\n";
5c94f87e 243 `git-init`;
3292ae47
ML
244 die $! if $?;
245 $import = 1;
d3968363
ML
246 } else {
247 die "Need to start from an import or a tag -- cannot use $psets[0]{id}";
248 }
b779d5f0
ML
249} else { # progressing an import
250 # load the rptags
42f44b08 251 opendir(DIR, $ptag_dir)
b779d5f0
ML
252 || die "can't opendir: $!";
253 while (my $file = readdir(DIR)) {
a7fb51d3
EW
254 # skip non-interesting-files
255 next unless -f "$ptag_dir/$file";
a6080a0a 256
a7fb51d3
EW
257 # convert first '--' to '/' from old git-archimport to use
258 # as an archivename/c--b--v private tag
259 if ($file !~ m!,!) {
260 my $oldfile = $file;
261 $file =~ s!--!,!;
262 print STDERR "converting old tag $oldfile to $file\n";
263 rename("$ptag_dir/$oldfile", "$ptag_dir/$file") or die $!;
264 }
b779d5f0
ML
265 my $sha = ptag($file);
266 chomp $sha;
b779d5f0
ML
267 $rptags{$sha} = $file;
268 }
269 closedir DIR;
d3968363
ML
270}
271
3292ae47 272# process patchsets
22ff00fc
EW
273# extract the Arch repository name (Arch "archive" in Arch-speak)
274sub extract_reponame {
275 my $fq_cvbr = shift; # archivename/[[[[category]branch]version]revision]
276 return (split(/\//, $fq_cvbr))[0];
277}
a6080a0a 278
22ff00fc
EW
279sub extract_versionname {
280 my $name = shift;
281 $name =~ s/--(?:patch|version(?:fix)?|base)-\d+$//;
282 return $name;
283}
d3968363 284
22ff00fc 285# convert a fully-qualified revision or version to a unique dirname:
a6080a0a 286# normalperson@yhbt.net-05/mpd--uclinux--1--patch-2
22ff00fc
EW
287# becomes: normalperson@yhbt.net-05,mpd--uclinux--1
288#
289# the git notion of a branch is closer to
290# archive/category--branch--version than archive/category--branch, so we
291# use this to convert to git branch names.
292# Also, keep archive names but replace '/' with ',' since it won't require
293# subdirectories, and is safer than swapping '--' which could confuse
294# reverse-mapping when dealing with bastard branches that
295# are just archive/category--version (no --branch)
296sub tree_dirname {
297 my $revision = shift;
298 my $name = extract_versionname($revision);
299 $name =~ s#/#,#;
300 return $name;
301}
302
fee3365f
ML
303# old versions of git-archimport just use the <category--branch> part:
304sub old_style_branchname {
305 my $id = shift;
306 my $ret = safe_pipe_capture($TLA,'parse-package-name','-p',$id);
307 chomp $ret;
308 return $ret;
309}
310
d9cb5399
PB
311*git_default_branchname = $opt_o ? *old_style_branchname : *tree_dirname;
312
313# retrieve default archive, since $branch_name_map keys might not include it
314sub get_default_archive {
315 if (!defined $default_archive) {
316 $default_archive = safe_pipe_capture($TLA,'my-default-archive');
317 chomp $default_archive;
318 }
319 return $default_archive;
320}
321
322sub git_branchname {
323 my $revision = shift;
324 my $name = extract_versionname($revision);
325
326 if (exists $branch_name_map{$name}) {
327 return $branch_name_map{$name};
328
329 } elsif ($name =~ m#^([^/]*)/(.*)$#
330 && $1 eq get_default_archive()
331 && exists $branch_name_map{$2}) {
332 # the names given in the command-line lacked the archive.
333 return $branch_name_map{$2};
334
335 } else {
336 return git_default_branchname($revision);
337 }
338}
22ff00fc 339
3e525e67
EW
340sub process_patchset_accurate {
341 my $ps = shift;
a6080a0a 342
3e525e67
EW
343 # switch to that branch if we're not already in that branch:
344 if (-e "$git_dir/refs/heads/$ps->{branch}") {
345 system('git-checkout','-f',$ps->{branch}) == 0 or die "$! $?\n";
d3968363 346
3e525e67
EW
347 # remove any old stuff that got leftover:
348 my $rm = safe_pipe_capture('git-ls-files','--others','-z');
349 rmtree(split(/\0/,$rm)) if $rm;
3292ae47 350 }
a6080a0a 351
3e525e67
EW
352 # Apply the import/changeset/merge into the working tree
353 my $dir = sync_to_ps($ps);
354 # read the new log entry:
355 my @commitlog = safe_pipe_capture($TLA,'cat-log','-d',$dir,$ps->{id});
356 die "Error in cat-log: $!" if $?;
357 chomp @commitlog;
358
359 # grab variables we want from the log, new fields get added to $ps:
360 # (author, date, email, summary, message body ...)
361 parselog($ps, \@commitlog);
362
363 if ($ps->{id} =~ /--base-0$/ && $ps->{id} ne $psets[0]{id}) {
a6080a0a 364 # this should work when importing continuations
3e525e67 365 if ($ps->{tag} && (my $branchpoint = eval { ptag($ps->{tag}) })) {
a6080a0a 366
3e525e67 367 # find where we are supposed to branch from
d9cb5399
PB
368 if (! -e "$git_dir/refs/heads/$ps->{branch}") {
369 system('git-branch',$ps->{branch},$branchpoint) == 0 or die "$! $?\n";
370
371 # We trust Arch with the fact that this is just a tag,
372 # and it does not affect the state of the tree, so
373 # we just tag and move on. If the user really wants us
374 # to consolidate more branches into one, don't tag because
375 # the tag name would be already taken.
376 tag($ps->{id}, $branchpoint);
377 ptag($ps->{id}, $branchpoint);
378 print " * Tagged $ps->{id} at $branchpoint\n";
379 }
380 system('git-checkout','-f',$ps->{branch}) == 0 or die "$! $?\n";
381
3e525e67
EW
382 # remove any old stuff that got leftover:
383 my $rm = safe_pipe_capture('git-ls-files','--others','-z');
384 rmtree(split(/\0/,$rm)) if $rm;
3e525e67
EW
385 return 0;
386 } else {
387 warn "Tagging from unknown id unsupported\n" if $ps->{tag};
388 }
389 # allow multiple bases/imports here since Arch supports cherry-picks
390 # from unrelated trees
a6080a0a
JH
391 }
392
3e525e67 393 # update the index with all the changes we got
3ff903bf
EW
394 system('git-diff-files --name-only -z | '.
395 'git-update-index --remove -z --stdin') == 0 or die "$! $?\n";
3e525e67
EW
396 system('git-ls-files --others -z | '.
397 'git-update-index --add -z --stdin') == 0 or die "$! $?\n";
3e525e67
EW
398 return 1;
399}
37f15d50 400
3e525e67
EW
401# the native changeset processing strategy. This is very fast, but
402# does not handle permissions or any renames involving directories
403sub process_patchset_fast {
404 my $ps = shift;
a6080a0a 405 #
d3968363
ML
406 # create the branch if needed
407 #
3292ae47
ML
408 if ($ps->{type} eq 'i' && !$import) {
409 die "Should not have more than one 'Initial import' per GIT import: $ps->{id}";
d3968363
ML
410 }
411
3292ae47 412 unless ($import) { # skip for import
1d4710d0 413 if ( -e "$git_dir/refs/heads/$ps->{branch}") {
d3968363 414 # we know about this branch
f88961a8 415 system('git-checkout',$ps->{branch});
d3968363
ML
416 } else {
417 # new branch! we need to verify a few things
418 die "Branch on a non-tag!" unless $ps->{type} eq 't';
419 my $branchpoint = ptag($ps->{tag});
a6080a0a 420 die "Tagging from unknown id unsupported: $ps->{tag}"
d3968363 421 unless $branchpoint;
a6080a0a 422
d3968363 423 # find where we are supposed to branch from
d9cb5399
PB
424 if (! -e "$git_dir/refs/heads/$ps->{branch}") {
425 system('git-branch',$ps->{branch},$branchpoint) == 0 or die "$! $?\n";
426
427 # We trust Arch with the fact that this is just a tag,
428 # and it does not affect the state of the tree, so
429 # we just tag and move on. If the user really wants us
430 # to consolidate more branches into one, don't tag because
431 # the tag name would be already taken.
432 tag($ps->{id}, $branchpoint);
433 ptag($ps->{id}, $branchpoint);
434 print " * Tagged $ps->{id} at $branchpoint\n";
435 }
436 system('git-checkout',$ps->{branch}) == 0 or die "$! $?\n";
3e525e67 437 return 0;
a6080a0a 438 }
d3968363 439 die $! if $?;
a6080a0a 440 }
d3968363 441
d3968363
ML
442 #
443 # Apply the import/changeset/merge into the working tree
a6080a0a 444 #
d3968363 445 if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
d3968363 446 apply_import($ps) or die $!;
3e525e67 447 $stats{import_or_tag}++;
3292ae47 448 $import=0;
d3968363
ML
449 } elsif ($ps->{type} eq 's') {
450 apply_cset($ps);
3e525e67 451 $stats{simple_changeset}++;
d3968363
ML
452 }
453
454 #
455 # prepare update git's index, based on what arch knows
456 # about the pset, resolve parents, etc
457 #
a6080a0a
JH
458
459 my @commitlog = safe_pipe_capture($TLA,'cat-archive-log',$ps->{id});
d3968363 460 die "Error in cat-archive-log: $!" if $?;
a6080a0a 461
6df896b5 462 parselog($ps,\@commitlog);
d3968363
ML
463
464 # imports don't give us good info
465 # on added files. Shame on them
6df896b5 466 if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
6df896b5
EW
467 system('git-ls-files --deleted -z | '.
468 'git-update-index --remove -z --stdin') == 0 or die "$! $?\n";
3ff903bf
EW
469 system('git-ls-files --others -z | '.
470 'git-update-index --add -z --stdin') == 0 or die "$! $?\n";
d3968363
ML
471 }
472
6df896b5 473 # TODO: handle removed_directories and renamed_directories:
3ff903bf 474
6df896b5
EW
475 if (my $del = $ps->{removed_files}) {
476 unlink @$del;
d3968363
ML
477 while (@$del) {
478 my @slice = splice(@$del, 0, 100);
6df896b5
EW
479 system('git-update-index','--remove','--',@slice) == 0 or
480 die "Error in git-update-index --remove: $! $?\n";
d3968363
ML
481 }
482 }
6df896b5
EW
483
484 if (my $ren = $ps->{renamed_files}) { # renamed
d3968363
ML
485 if (@$ren % 2) {
486 die "Odd number of entries in rename!?";
487 }
a6080a0a 488
d3968363 489 while (@$ren) {
6df896b5 490 my $from = shift @$ren;
a6080a0a 491 my $to = shift @$ren;
d3968363
ML
492
493 unless (-d dirname($to)) {
494 mkpath(dirname($to)); # will die on err
495 }
3e525e67 496 # print "moving $from $to";
6df896b5
EW
497 rename($from, $to) or die "Error renaming '$from' '$to': $!\n";
498 system('git-update-index','--remove','--',$from) == 0 or
499 die "Error in git-update-index --remove: $! $?\n";
500 system('git-update-index','--add','--',$to) == 0 or
501 die "Error in git-update-index --add: $! $?\n";
d3968363 502 }
d3968363 503 }
6df896b5 504
3ff903bf
EW
505 if (my $add = $ps->{new_files}) {
506 while (@$add) {
507 my @slice = splice(@$add, 0, 100);
508 system('git-update-index','--add','--',@slice) == 0 or
509 die "Error in git-update-index --add: $! $?\n";
510 }
511 }
512
6df896b5 513 if (my $mod = $ps->{modified_files}) {
d3968363
ML
514 while (@$mod) {
515 my @slice = splice(@$mod, 0, 100);
6df896b5
EW
516 system('git-update-index','--',@slice) == 0 or
517 die "Error in git-update-index: $! $?\n";
d3968363
ML
518 }
519 }
3e525e67
EW
520 return 1; # we successfully applied the changeset
521}
522
523if ($opt_f) {
524 print "Will import patchsets using the fast strategy\n",
525 "Renamed directories and permission changes will be missed\n";
526 *process_patchset = *process_patchset_fast;
527} else {
528 print "Using the default (accurate) import strategy.\n",
529 "Things may be a bit slow\n";
530 *process_patchset = *process_patchset_accurate;
531}
a6080a0a 532
3e525e67
EW
533foreach my $ps (@psets) {
534 # process patchsets
535 $ps->{branch} = git_branchname($ps->{id});
536
537 #
a6080a0a
JH
538 # ensure we have a clean state
539 #
3e525e67
EW
540 if (my $dirty = `git-diff-files`) {
541 die "Unclean tree when about to process $ps->{id} " .
542 " - did we fail to commit cleanly before?\n$dirty";
543 }
544 die $! if $?;
a6080a0a 545
3e525e67
EW
546 #
547 # skip commits already in repo
548 #
549 if (ptag($ps->{id})) {
550 $opt_v && print " * Skipping already imported: $ps->{id}\n";
10945e00 551 next;
3e525e67
EW
552 }
553
554 print " * Starting to work on $ps->{id}\n";
555
556 process_patchset($ps) or next;
557
215a7ad1 558 # warn "errors when running git-update-index! $!";
3e525e67 559 my $tree = `git-write-tree`;
d3968363
ML
560 die "cannot write tree $!" if $?;
561 chomp $tree;
a6080a0a 562
d3968363
ML
563 #
564 # Who's your daddy?
565 #
566 my @par;
1d4710d0 567 if ( -e "$git_dir/refs/heads/$ps->{branch}") {
f88961a8 568 if (open HEAD, "<","$git_dir/refs/heads/$ps->{branch}") {
d3968363
ML
569 my $p = <HEAD>;
570 close HEAD;
571 chomp $p;
572 push @par, '-p', $p;
a6080a0a 573 } else {
d3968363
ML
574 if ($ps->{type} eq 's') {
575 warn "Could not find the right head for the branch $ps->{branch}";
576 }
577 }
578 }
a6080a0a 579
b779d5f0
ML
580 if ($ps->{merges}) {
581 push @par, find_parents($ps);
582 }
d3968363 583
a6080a0a 584 #
d3968363
ML
585 # Commit, tag and clean state
586 #
587 $ENV{TZ} = 'GMT';
588 $ENV{GIT_AUTHOR_NAME} = $ps->{author};
589 $ENV{GIT_AUTHOR_EMAIL} = $ps->{email};
590 $ENV{GIT_AUTHOR_DATE} = $ps->{date};
591 $ENV{GIT_COMMITTER_NAME} = $ps->{author};
592 $ENV{GIT_COMMITTER_EMAIL} = $ps->{email};
593 $ENV{GIT_COMMITTER_DATE} = $ps->{date};
594
a6080a0a 595 my $pid = open2(*READER, *WRITER,'git-commit-tree',$tree,@par)
d3968363 596 or die $!;
a94f457e 597 print WRITER $ps->{summary},"\n\n";
608403d7
MB
598
599 # only print message if it's not empty, to avoid a spurious blank line;
600 # also append an extra newline, so there's a blank line before the
601 # following "git-archimport-id:" line.
602 print WRITER $ps->{message},"\n\n" if ($ps->{message} ne "");
a6080a0a 603
6df896b5
EW
604 # make it easy to backtrack and figure out which Arch revision this was:
605 print WRITER 'git-archimport-id: ',$ps->{id},"\n";
a6080a0a 606
d3968363
ML
607 close WRITER;
608 my $commitid = <READER>; # read
609 chomp $commitid;
610 close READER;
611 waitpid $pid,0; # close;
612
613 if (length $commitid != 40) {
614 die "Something went wrong with the commit! $! $commitid";
615 }
616 #
617 # Update the branch
a6080a0a 618 #
f88961a8 619 open HEAD, ">","$git_dir/refs/heads/$ps->{branch}";
d3968363
ML
620 print HEAD $commitid;
621 close HEAD;
8366a10a 622 system('git-update-ref', 'HEAD', "$ps->{branch}");
d3968363
ML
623
624 # tag accordingly
625 ptag($ps->{id}, $commitid); # private tag
626 if ($opt_T || $ps->{type} eq 't' || $ps->{type} eq 'i') {
627 tag($ps->{id}, $commitid);
628 }
629 print " * Committed $ps->{id}\n";
630 print " + tree $tree\n";
631 print " + commit $commitid\n";
b779d5f0 632 $opt_v && print " + commit date is $ps->{date} \n";
f88961a8 633 $opt_v && print " + parents: ",join(' ',@par),"\n";
3e525e67
EW
634}
635
636if ($opt_v) {
637 foreach (sort keys %stats) {
638 print" $_: $stats{$_}\n";
639 }
640}
641exit 0;
642
643# used by the accurate strategy:
644sub sync_to_ps {
645 my $ps = shift;
646 my $tree_dir = $tmp.'/'.tree_dirname($ps->{id});
a6080a0a 647
3e525e67
EW
648 $opt_v && print "sync_to_ps($ps->{id}) method: ";
649
650 if (-d $tree_dir) {
651 if ($ps->{type} eq 't') {
652 $opt_v && print "get (tag)\n";
653 # looks like a tag-only or (worse,) a mixed tags/changeset branch,
654 # can't rely on replay to work correctly on these
655 rmtree($tree_dir);
656 safe_pipe_capture($TLA,'get','--no-pristine',$ps->{id},$tree_dir);
657 $stats{get_tag}++;
658 } else {
659 my $tree_id = arch_tree_id($tree_dir);
660 if ($ps->{parent_id} && ($ps->{parent_id} eq $tree_id)) {
661 # the common case (hopefully)
662 $opt_v && print "replay\n";
663 safe_pipe_capture($TLA,'replay','-d',$tree_dir,$ps->{id});
664 $stats{replay}++;
665 } else {
666 # getting one tree is usually faster than getting two trees
667 # and applying the delta ...
668 rmtree($tree_dir);
669 $opt_v && print "apply-delta\n";
670 safe_pipe_capture($TLA,'get','--no-pristine',
671 $ps->{id},$tree_dir);
672 $stats{get_delta}++;
673 }
674 }
675 } else {
676 # new branch work
677 $opt_v && print "get (new tree)\n";
678 safe_pipe_capture($TLA,'get','--no-pristine',$ps->{id},$tree_dir);
679 $stats{get_new}++;
680 }
a6080a0a 681
3e525e67
EW
682 # added -I flag to rsync since we're going to fast! AIEEEEE!!!!
683 system('rsync','-aI','--delete','--exclude',$git_dir,
684# '--exclude','.arch-inventory',
685 '--exclude','.arch-ids','--exclude','{arch}',
686 '--exclude','+*','--exclude',',*',
687 "$tree_dir/",'./') == 0 or die "Cannot rsync $tree_dir: $! $?";
688 return $tree_dir;
d3968363
ML
689}
690
d3968363
ML
691sub apply_import {
692 my $ps = shift;
22ff00fc 693 my $bname = git_branchname($ps->{id});
d3968363 694
f88961a8 695 mkpath($tmp);
d3968363 696
f88961a8 697 safe_pipe_capture($TLA,'get','-s','--no-pristine',$ps->{id},"$tmp/import");
a6080a0a 698 die "Cannot get import: $!" if $?;
f88961a8
EW
699 system('rsync','-aI','--delete', '--exclude',$git_dir,
700 '--exclude','.arch-ids','--exclude','{arch}',
701 "$tmp/import/", './');
d3968363 702 die "Cannot rsync import:$!" if $?;
a6080a0a 703
f88961a8 704 rmtree("$tmp/import");
d3968363 705 die "Cannot remove tempdir: $!" if $?;
a6080a0a 706
d3968363
ML
707
708 return 1;
709}
710
711sub apply_cset {
712 my $ps = shift;
713
f88961a8 714 mkpath($tmp);
d3968363
ML
715
716 # get the changeset
f88961a8 717 safe_pipe_capture($TLA,'get-changeset',$ps->{id},"$tmp/changeset");
d3968363 718 die "Cannot get changeset: $!" if $?;
a6080a0a 719
d3968363
ML
720 # apply patches
721 if (`find $tmp/changeset/patches -type f -name '*.patch'`) {
722 # this can be sped up considerably by doing
723 # (find | xargs cat) | patch
82e5a82f 724 # but that can get mucked up by patches
a6080a0a 725 # with missing trailing newlines or the standard
d3968363
ML
726 # 'missing newline' flag in the patch - possibly
727 # produced with an old/buggy diff.
728 # slow and safe, we invoke patch once per patchfile
729 `find $tmp/changeset/patches -type f -name '*.patch' -print0 | grep -zv '{arch}' | xargs -iFILE -0 --no-run-if-empty patch -p1 --forward -iFILE`;
730 die "Problem applying patches! $!" if $?;
731 }
732
733 # apply changed binary files
734 if (my @modified = `find $tmp/changeset/patches -type f -name '*.modified'`) {
735 foreach my $mod (@modified) {
736 chomp $mod;
737 my $orig = $mod;
738 $orig =~ s/\.modified$//; # lazy
739 $orig =~ s!^\Q$tmp\E/changeset/patches/!!;
740 #print "rsync -p '$mod' '$orig'";
f88961a8 741 system('rsync','-p',$mod,"./$orig");
d3968363
ML
742 die "Problem applying binary changes! $!" if $?;
743 }
744 }
745
746 # bring in new files
f88961a8 747 system('rsync','-aI','--exclude',$git_dir,
a6080a0a 748 '--exclude','.arch-ids',
f88961a8
EW
749 '--exclude', '{arch}',
750 "$tmp/changeset/new-files-archive/",'./');
d3968363
ML
751
752 # deleted files are hinted from the commitlog processing
753
f88961a8 754 rmtree("$tmp/changeset");
d3968363
ML
755}
756
757
758# =for reference
6df896b5
EW
759# notes: *-files/-directories keys cannot have spaces, they're always
760# pika-escaped. Everything after the first newline
761# A log entry looks like:
d3968363
ML
762# Revision: moodle-org--moodle--1.3.3--patch-15
763# Archive: arch-eduforge@catalyst.net.nz--2004
764# Creator: Penny Leach <penny@catalyst.net.nz>
765# Date: Wed May 25 14:15:34 NZST 2005
766# Standard-date: 2005-05-25 02:15:34 GMT
767# New-files: lang/de/.arch-ids/block_glossary_random.php.id
768# lang/de/.arch-ids/block_html.php.id
769# New-directories: lang/de/help/questionnaire
770# lang/de/help/questionnaire/.arch-ids
771# Renamed-files: .arch-ids/db_sears.sql.id db/.arch-ids/db_sears.sql.id
772# db_sears.sql db/db_sears.sql
773# Removed-files: lang/be/docs/.arch-ids/release.html.id
774# lang/be/docs/.arch-ids/releaseold.html.id
775# Modified-files: admin/cron.php admin/delete.php
776# admin/editor.html backup/lib.php backup/restore.php
777# New-patches: arch-eduforge@catalyst.net.nz--2004/moodle-org--moodle--1.3.3--patch-15
778# Summary: Updating to latest from MOODLE_14_STABLE (1.4.5+)
6df896b5 779# summary can be multiline with a leading space just like the above fields
d3968363
ML
780# Keywords:
781#
782# Updating yadda tadda tadda madda
783sub parselog {
6df896b5
EW
784 my ($ps, $log) = @_;
785 my $key = undef;
786
787 # headers we want that contain filenames:
788 my %want_headers = (
789 new_files => 1,
790 modified_files => 1,
791 renamed_files => 1,
792 renamed_directories => 1,
793 removed_files => 1,
794 removed_directories => 1,
795 );
a6080a0a 796
6df896b5
EW
797 chomp (@$log);
798 while ($_ = shift @$log) {
799 if (/^Continuation-of:\s*(.*)/) {
800 $ps->{tag} = $1;
801 $key = undef;
802 } elsif (/^Summary:\s*(.*)$/ ) {
a94f457e
PB
803 # summary can be multiline as long as it has a leading space.
804 # we squeeze it onto a single line, though.
6df896b5
EW
805 $ps->{summary} = [ $1 ];
806 $key = 'summary';
807 } elsif (/^Creator: (.*)\s*<([^\>]+)>/) {
808 $ps->{author} = $1;
809 $ps->{email} = $2;
810 $key = undef;
811 # any *-files or *-directories can be read here:
812 } elsif (/^([A-Z][a-z\-]+):\s*(.*)$/) {
813 my $val = $2;
814 $key = lc $1;
815 $key =~ tr/-/_/; # too lazy to quote :P
816 if ($want_headers{$key}) {
817 push @{$ps->{$key}}, split(/\s+/, $val);
818 } else {
819 $key = undef;
820 }
821 } elsif (/^$/) {
822 last; # remainder of @$log that didn't get shifted off is message
823 } elsif ($key) {
824 if (/^\s+(.*)$/) {
825 if ($key eq 'summary') {
826 push @{$ps->{$key}}, $1;
827 } else { # files/directories:
828 push @{$ps->{$key}}, split(/\s+/, $1);
829 }
830 } else {
831 $key = undef;
832 }
833 }
d3968363 834 }
a6080a0a 835
a94f457e
PB
836 # drop leading empty lines from the log message
837 while (@$log && $log->[0] eq '') {
838 shift @$log;
839 }
840 if (exists $ps->{summary} && @{$ps->{summary}}) {
841 $ps->{summary} = join(' ', @{$ps->{summary}});
842 }
843 elsif (@$log == 0) {
844 $ps->{summary} = 'empty commit message';
845 } else {
846 $ps->{summary} = $log->[0] . '...';
847 }
6df896b5 848 $ps->{message} = join("\n",@$log);
a6080a0a 849
6df896b5
EW
850 # skip Arch control files, unescape pika-escaped files
851 foreach my $k (keys %want_headers) {
852 next unless (defined $ps->{$k});
6e33101a 853 my @tmp = ();
6df896b5
EW
854 foreach my $t (@{$ps->{$k}}) {
855 next unless length ($t);
856 next if $t =~ m!\{arch\}/!;
857 next if $t =~ m!\.arch-ids/!;
858 # should we skip this?
859 next if $t =~ m!\.arch-inventory$!;
f84f9d38
ML
860 # tla cat-archive-log will give us filenames with spaces as file\(sp)name - why?
861 # we can assume that any filename with \ indicates some pika escaping that we want to get rid of.
6df896b5 862 if ($t =~ /\\/ ){
f88961a8 863 $t = (safe_pipe_capture($TLA,'escape','--unescaped',$t))[0];
f84f9d38 864 }
6df896b5 865 push @tmp, $t;
d3968363 866 }
6e33101a 867 $ps->{$k} = \@tmp;
d3968363 868 }
d3968363
ML
869}
870
871# write/read a tag
872sub tag {
873 my ($tag, $commit) = @_;
a6080a0a 874
fee3365f
ML
875 if ($opt_o) {
876 $tag =~ s|/|--|g;
877 } else {
d9cb5399
PB
878 my $patchname = $tag;
879 $patchname =~ s/.*--//;
880 $tag = git_branchname ($tag) . '--' . $patchname;
fee3365f 881 }
a6080a0a 882
d3968363 883 if ($commit) {
a7fb51d3 884 open(C,">","$git_dir/refs/tags/$tag")
d3968363
ML
885 or die "Cannot create tag $tag: $!\n";
886 print C "$commit\n"
887 or die "Cannot write tag $tag: $!\n";
888 close(C)
889 or die "Cannot write tag $tag: $!\n";
a7fb51d3 890 print " * Created tag '$tag' on '$commit'\n" if $opt_v;
d3968363 891 } else { # read
a7fb51d3 892 open(C,"<","$git_dir/refs/tags/$tag")
d3968363
ML
893 or die "Cannot read tag $tag: $!\n";
894 $commit = <C>;
895 chomp $commit;
896 die "Error reading tag $tag: $!\n" unless length $commit == 40;
897 close(C)
898 or die "Cannot read tag $tag: $!\n";
899 return $commit;
900 }
901}
902
903# write/read a private tag
904# reads fail softly if the tag isn't there
905sub ptag {
906 my ($tag, $commit) = @_;
a7fb51d3
EW
907
908 # don't use subdirs for tags yet, it could screw up other porcelains
a6080a0a
JH
909 $tag =~ s|/|,|g;
910
a7fb51d3
EW
911 my $tag_file = "$ptag_dir/$tag";
912 my $tag_branch_dir = dirname($tag_file);
913 mkpath($tag_branch_dir) unless (-d $tag_branch_dir);
d3968363
ML
914
915 if ($commit) { # write
a7fb51d3 916 open(C,">",$tag_file)
d3968363
ML
917 or die "Cannot create tag $tag: $!\n";
918 print C "$commit\n"
919 or die "Cannot write tag $tag: $!\n";
920 close(C)
921 or die "Cannot write tag $tag: $!\n";
a6080a0a 922 $rptags{$commit} = $tag
b779d5f0 923 unless $tag =~ m/--base-0$/;
d3968363
ML
924 } else { # read
925 # if the tag isn't there, return 0
a7fb51d3 926 unless ( -s $tag_file) {
d3968363
ML
927 return 0;
928 }
a7fb51d3 929 open(C,"<",$tag_file)
d3968363
ML
930 or die "Cannot read tag $tag: $!\n";
931 $commit = <C>;
932 chomp $commit;
933 die "Error reading tag $tag: $!\n" unless length $commit == 40;
934 close(C)
935 or die "Cannot read tag $tag: $!\n";
b779d5f0
ML
936 unless (defined $rptags{$commit}) {
937 $rptags{$commit} = $tag;
938 }
d3968363
ML
939 return $commit;
940 }
941}
b779d5f0
ML
942
943sub find_parents {
944 #
945 # Identify what branches are merging into me
946 # and whether we are fully merged
947 # git-merge-base <headsha> <headsha> should tell
a6080a0a 948 # me what the base of the merge should be
b779d5f0
ML
949 #
950 my $ps = shift;
951
952 my %branches; # holds an arrayref per branch
953 # the arrayref contains a list of
954 # merged patches between the base
955 # of the merge and the current head
956
957 my @parents; # parents found for this commit
958
959 # simple loop to split the merges
960 # per branch
961 foreach my $merge (@{$ps->{merges}}) {
22ff00fc 962 my $branch = git_branchname($merge);
b779d5f0
ML
963 unless (defined $branches{$branch} ){
964 $branches{$branch} = [];
965 }
966 push @{$branches{$branch}}, $merge;
967 }
968
969 #
a6080a0a 970 # foreach branch find a merge base and walk it to the
b779d5f0
ML
971 # head where we are, collecting the merged patchsets that
972 # Arch has recorded. Keep that in @have
973 # Compare that with the commits on the other branch
974 # between merge-base and the tip of the branch (@need)
975 # and see if we have a series of consecutive patches
976 # starting from the merge base. The tip of the series
a6080a0a 977 # of consecutive patches merged is our new parent for
b779d5f0
ML
978 # that branch.
979 #
980 foreach my $branch (keys %branches) {
37f15d50
ML
981
982 # check that we actually know about the branch
983 next unless -e "$git_dir/refs/heads/$branch";
984
b779d5f0 985 my $mergebase = `git-merge-base $branch $ps->{branch}`;
a6080a0a
JH
986 if ($?) {
987 # Don't die here, Arch supports one-way cherry-picking
988 # between branches with no common base (or any relationship
989 # at all beforehand)
990 warn "Cannot find merge base for $branch and $ps->{branch}";
991 next;
992 }
b779d5f0
ML
993 chomp $mergebase;
994
995 # now walk up to the mergepoint collecting what patches we have
996 my $branchtip = git_rev_parse($ps->{branch});
765ac8ec 997 my @ancestors = `git-rev-list --topo-order $branchtip ^$mergebase`;
b779d5f0
ML
998 my %have; # collected merges this branch has
999 foreach my $merge (@{$ps->{merges}}) {
1000 $have{$merge} = 1;
1001 }
1002 my %ancestorshave;
1003 foreach my $par (@ancestors) {
1004 $par = commitid2pset($par);
1005 if (defined $par->{merges}) {
1006 foreach my $merge (@{$par->{merges}}) {
1007 $ancestorshave{$merge}=1;
1008 }
1009 }
1010 }
1011 # print "++++ Merges in $ps->{id} are....\n";
1012 # my @have = sort keys %have; print Dumper(\@have);
1013
1014 # merge what we have with what ancestors have
1015 %have = (%have, %ancestorshave);
1016
a6080a0a 1017 # see what the remote branch has - these are the merges we
b779d5f0
ML
1018 # will want to have in a consecutive series from the mergebase
1019 my $otherbranchtip = git_rev_parse($branch);
765ac8ec 1020 my @needraw = `git-rev-list --topo-order $otherbranchtip ^$mergebase`;
b779d5f0
ML
1021 my @need;
1022 foreach my $needps (@needraw) { # get the psets
1023 $needps = commitid2pset($needps);
1024 # git-rev-list will also
a6080a0a 1025 # list commits merged in via earlier
b779d5f0
ML
1026 # merges. we are only interested in commits
1027 # from the branch we're looking at
1028 if ($branch eq $needps->{branch}) {
1029 push @need, $needps->{id};
1030 }
1031 }
1032
1033 # print "++++ Merges from $branch we want are....\n";
1034 # print Dumper(\@need);
1035
1036 my $newparent;
1037 while (my $needed_commit = pop @need) {
1038 if ($have{$needed_commit}) {
1039 $newparent = $needed_commit;
1040 } else {
1041 last; # break out of the while
1042 }
1043 }
1044 if ($newparent) {
1045 push @parents, $newparent;
1046 }
1047
1048
1049 } # end foreach branch
1050
1051 # prune redundant parents
1052 my %parents;
1053 foreach my $p (@parents) {
1054 $parents{$p} = 1;
1055 }
1056 foreach my $p (@parents) {
1057 next unless exists $psets{$p}{merges};
1058 next unless ref $psets{$p}{merges};
1059 my @merges = @{$psets{$p}{merges}};
1060 foreach my $merge (@merges) {
a6080a0a 1061 if ($parents{$merge}) {
b779d5f0
ML
1062 delete $parents{$merge};
1063 }
1064 }
1065 }
42f44b08 1066
f88961a8
EW
1067 @parents = ();
1068 foreach (keys %parents) {
1069 push @parents, '-p', ptag($_);
1070 }
b779d5f0
ML
1071 return @parents;
1072}
1073
1074sub git_rev_parse {
1075 my $name = shift;
1076 my $val = `git-rev-parse $name`;
1077 die "Error: git-rev-parse $name" if $?;
1078 chomp $val;
1079 return $val;
1080}
1081
1082# resolve a SHA1 to a known patchset
1083sub commitid2pset {
1084 my $commitid = shift;
1085 chomp $commitid;
a6080a0a 1086 my $name = $rptags{$commitid}
b779d5f0 1087 || die "Cannot find reverse tag mapping for $commitid";
a7fb51d3 1088 $name =~ s|,|/|;
a6080a0a 1089 my $ps = $psets{$name}
b779d5f0
ML
1090 || (print Dumper(sort keys %psets)) && die "Cannot find patchset for $name";
1091 return $ps;
1092}
2777ef76 1093
42f44b08 1094
82e5a82f 1095# an alternative to `command` that allows input to be passed as an array
2777ef76
EW
1096# to work around shell problems with weird characters in arguments
1097sub safe_pipe_capture {
1098 my @output;
1099 if (my $pid = open my $child, '-|') {
1100 @output = (<$child>);
1101 close $child or die join(' ',@_).": $! $?";
1102 } else {
3e525e67 1103 exec(@_) or die "$! $?"; # exec() can fail the executable can't be found
2777ef76
EW
1104 }
1105 return wantarray ? @output : join('',@output);
1106}
1107
42f44b08
EW
1108# `tla logs -rf -d <dir> | head -n1` or `baz tree-id <dir>`
1109sub arch_tree_id {
1110 my $dir = shift;
1111 chomp( my $ret = (safe_pipe_capture($TLA,'logs','-rf','-d',$dir))[0] );
1112 return $ret;
1113}
1114
1115sub archive_reachable {
1116 my $archive = shift;
1117 return 1 if $reachable{$archive};
1118 return 0 if $unreachable{$archive};
a6080a0a 1119
42f44b08
EW
1120 if (system "$TLA whereis-archive $archive >/dev/null") {
1121 if ($opt_a && (system($TLA,'register-archive',
1122 "http://mirrors.sourcecontrol.net/$archive") == 0)) {
1123 $reachable{$archive} = 1;
1124 return 1;
1125 }
1126 print STDERR "Archive is unreachable: $archive\n";
1127 $unreachable{$archive} = 1;
1128 return 0;
1129 } else {
1130 $reachable{$archive} = 1;
1131 return 1;
1132 }
1133}