[thirdparty/git.git] / git-archimport-script

#!/usr/bin/perl -w
#
# This tool is copyright (c) 2005, Martin Langhoff.
# It is released under the Gnu Public License, version 2.
#
# The basic idea is to walk the output of tla abrowse, 
# fetch the changesets and apply them. 
#
=head1 Invocation

    git-archimport-script -i <archive>/<branch> [<archive>/<branch>]
    [ <archive>/<branch> ]

    The script expects you to provide the key roots where it can start the
    import from an 'initial import' or 'tag' type of Arch commit. It will
    then follow all the branching and tagging within the provided roots.

    It will die if it sees branches that have different roots. 

=head2 TODO

 - keep track of merged patches, and mark a git merge when it happens
 - smarter rules to parse the archive history "up" and "down"
 - be able to continue an import where we left off
 - audit shell-escaping of filenames

=head1 Devel tricks

Add print in front of the shell commands invoked via backticks. 

=cut

use strict;
use warnings;
use Getopt::Std;
use File::Spec;
use File::Temp qw(tempfile);
use File::Path qw(mkpath);
use File::Basename qw(basename dirname);
use String::ShellQuote;
use Time::Local;
use IO::Socket;
use IO::Pipe;
use POSIX qw(strftime dup2);
use Data::Dumper qw/ Dumper /;
use IPC::Open2;

$SIG{'PIPE'}="IGNORE";
$ENV{'TZ'}="UTC";

our($opt_h,$opt_v, $opt_T,
    $opt_C,$opt_t, $opt_i);

sub usage() {
    print STDERR <<END;
Usage: ${\basename $0}     # fetch/update GIT from Arch
       [ -h ] [ -v ] [ -i ] [ -T ] 
       [ -C GIT_repository ] [ -t tempdir ] 
       repository/arch-branch [ repository/arch-branch] ...
END
    exit(1);
}

getopts("hviC:t:") or usage();
usage if $opt_h;

@ARGV >= 1 or usage();
my @arch_roots = @ARGV;

my $tmp = $opt_t;
$tmp ||= '/tmp';
$tmp .= '/git-archimport/';

my $git_tree = $opt_C;
$git_tree ||= ".";


my @psets  = ();                # the collection

foreach my $root (@arch_roots) {
    my ($arepo, $abranch) = split(m!/!, $root);
    open ABROWSE, "tla abrowse -f -A $arepo --desc --merges $abranch |" 
        or die "Problems with tla abrowse: $!";
    
    my %ps        = ();         # the current one
    my $mode      = '';
    my $lastseen  = '';
    
    while (<ABROWSE>) {
        chomp;
        
        # first record padded w 8 spaces
        if (s/^\s{8}\b//) {
            
            # store the record we just captured
            if (%ps) {
                my %temp = %ps; # break references
                push (@psets, \%temp);
                %ps = ();
            }
            
            my ($id, $type) = split(m/\s{3}/, $_);
            $ps{id}   = $id;
            $ps{repo} = $arepo;

            # deal with types
            if ($type =~ m/^\(simple changeset\)/) {
                $ps{type} = 's';
            } elsif ($type eq '(initial import)') {
                $ps{type} = 'i';
            } elsif ($type =~ m/^\(tag revision of (.+)\)/) {
                $ps{type} = 't';
                $ps{tag}  = $1;
            } else { 
                warn "Unknown type $type";
            }
            $lastseen = 'id';
        }
        
        if (s/^\s{10}//) { 
            # 10 leading spaces or more 
            # indicate commit metadata
            
            # date & author 
            if ($lastseen eq 'id' && m/^\d{4}-\d{2}-\d{2}/) {
                
                my ($date, $authoremail) = split(m/\s{2,}/, $_);
                $ps{date}   = $date;
                $ps{date}   =~ s/\bGMT$//; # strip off trailign GMT
                if ($ps{date} =~ m/\b\w+$/) {
                    warn 'Arch dates not in GMT?! - imported dates will be wrong';
                }
            
                $authoremail =~ m/^(.+)\s(\S+)$/;
                $ps{author} = $1;
                $ps{email}  = $2;
            
                $lastseen = 'date';
            
            } elsif ($lastseen eq 'date') {
                # the only hint is position
                # subject is after date
                $ps{subj} = $_;
                $lastseen = 'subj';
            
            } elsif ($lastseen eq 'subj' && $_ eq 'merges in:') {
                $ps{merges} = [];
                $lastseen = 'merges';
            
            } elsif ($lastseen eq 'merges' && s/^\s{2}//) {
                push (@{$ps{merges}}, $_);
            } else {
                warn 'more metadata after merges!?';
            }
            
        }
    }

    if (%ps) {
        my %temp = %ps;         # break references
        push (@psets, \%temp);
        %ps = ();
    }    
    close ABROWSE;
}                               # end foreach $root

## Order patches by time
@psets = sort {$a->{date}.$b->{id} cmp $b->{date}.$b->{id}} @psets;

#print Dumper \@psets;

##
## TODO cleanup irrelevant patches
##      and put an initial import
##      or a full tag

if ($opt_i) {                   # initial import 
    if ($psets[0]{type} eq 'i' || $psets[0]{type} eq 't') {
        print "Starting import from $psets[0]{id}\n";
    } else {
        die "Need to start from an import or a tag -- cannot use $psets[0]{id}";
    }
    `git-init-db`;
    die $! if $?;
}

# process
my $lastbranch = branchname($psets[0]{id}); # only good for initial import
my $importseen = $opt_i ? 0 : 1; # start at 1 if opt_i

foreach my $ps (@psets) {

    $ps->{branch} =  branchname($ps->{id});

    #
    # ensure we have a clean state 
    # 
    if (`git diff-files`) {
        die "Unclean tree when about to process $ps->{id} " .
            " - did we fail to commit cleanly before?";
    }
    die $! if $?;

    # 
    # create the branch if needed
    #
    if ($ps->{type} eq 'i' && $importseen) {
        die "Should not have more than one 'Initial import' per GIT import";
    }

    unless ($opt_i && !$importseen) { # skip for first commit
        if ( -e ".git/refs/heads/$ps->{branch}") {
            # we know about this branch
            `git checkout    $ps->{branch}`;
        } else {
            # new branch! we need to verify a few things
            die "Branch on a non-tag!" unless $ps->{type} eq 't';
            my $branchpoint = ptag($ps->{tag});
            die "Tagging from unknown id unsupported: $ps->{tag}" 
                unless $branchpoint;
            
            # find where we are supposed to branch from
            `git checkout -b $ps->{branch} $branchpoint`;
        } 
        die $! if $?;
    } 

        
    #
    # Apply the import/changeset/merge into the working tree
    # 
    if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
        $importseen = 1;
        apply_import($ps) or die $!;
    } elsif ($ps->{type} eq 's') {
        apply_cset($ps);
    }

    #
    # prepare update git's index, based on what arch knows
    # about the pset, resolve parents, etc
    #
    my $tree;
    
    my $commitlog = `tla cat-archive-log -A $ps->{repo} $ps->{id}`; 
    die "Error in cat-archive-log: $!" if $?;
        
    # parselog will git-add/rm files
    # and generally prepare things for the commit
    # NOTE: parselog will shell-quote filenames! 
    my ($sum, $msg, $add, $del, $mod, $ren) = parselog($commitlog);
    my $logmessage = "$sum\n$msg";


    # imports don't give us good info
    # on added files. Shame on them
    if ($ps->{type} eq 'i' || $ps->{type} eq 't') { 
        `find . -type f -print0 | grep -zv '^./.git' | xargs -0 -l100 git-update-cache --add`;
        `git-ls-files --deleted -z | xargs --no-run-if-empty -0 -l100 git-update-cache --remove`; 
    }

    if (@$add) {
        while (@$add) {
            my @slice = splice(@$add, 0, 100);
            my $slice = join(' ', @slice);          
            `git-update-cache --add $slice`;
            die "Error in git-update-cache --add: $!" if $?;
        }
    }
    if (@$del) {
        foreach my $file (@$del) {
            unlink $file or die "Problems deleting $file : $!";
        }
        while (@$del) {
            my @slice = splice(@$del, 0, 100);
            my $slice = join(' ', @slice);
            `git-update-cache --remove $slice`;
            die "Error in git-update-cache --remove: $!" if $?;
        }
    }
    if (@$ren) {                # renamed
        if (@$ren % 2) {
            die "Odd number of entries in rename!?";
        }
        ;
        while (@$ren) {
            my $from = pop @$ren;
            my $to   = pop @$ren;           

            unless (-d dirname($to)) {
                mkpath(dirname($to)); # will die on err
            }
            #print "moving $from $to";
            `mv $from $to`;
            die "Error renaming $from $to : $!" if $?;
            `git-update-cache --remove $from`;
            die "Error in git-update-cache --remove: $!" if $?;
            `git-update-cache --add $to`;
            die "Error in git-update-cache --add: $!" if $?;
        }

    }
    if (@$mod) {                # must be _after_ renames
        while (@$mod) {
            my @slice = splice(@$mod, 0, 100);
            my $slice = join(' ', @slice);
            `git-update-cache $slice`;
            die "Error in git-update-cache: $!" if $?;
        }
    }

    # warn "errors when running git-update-cache! $!";
    $tree = `git-write-tree`;
    die "cannot write tree $!" if $?;
    chomp $tree;
        
    
    #
    # Who's your daddy?
    #
    my @par;
    if ( -e ".git/refs/heads/$ps->{branch}") {
        if (open HEAD, "<.git/refs/heads/$ps->{branch}") {
            my $p = <HEAD>;
            close HEAD;
            chomp $p;
            push @par, '-p', $p;
        } else { 
            if ($ps->{type} eq 's') {
                warn "Could not find the right head for the branch $ps->{branch}";
            }
        }
    }
    
    my $par = join (' ', @par);

    #    
    # Commit, tag and clean state
    #
    $ENV{TZ}                  = 'GMT';
    $ENV{GIT_AUTHOR_NAME}     = $ps->{author};
    $ENV{GIT_AUTHOR_EMAIL}    = $ps->{email};
    $ENV{GIT_AUTHOR_DATE}     = $ps->{date};
    $ENV{GIT_COMMITTER_NAME}  = $ps->{author};
    $ENV{GIT_COMMITTER_EMAIL} = $ps->{email};
    $ENV{GIT_COMMITTER_DATE}  = $ps->{date};

    my ($pid, $commit_rh, $commit_wh);
    $commit_rh = 'commit_rh';
    $commit_wh = 'commit_wh';
    
    $pid = open2(*READER, *WRITER, "git-commit-tree $tree $par") 
        or die $!;
    print WRITER $logmessage;   # write
    close WRITER;
    my $commitid = <READER>;    # read
    chomp $commitid;
    close READER;
    waitpid $pid,0;             # close;

    if (length $commitid != 40) {
        die "Something went wrong with the commit! $! $commitid";
    }
    #
    # Update the branch
    # 
    open  HEAD, ">.git/refs/heads/$ps->{branch}";
    print HEAD $commitid;
    close HEAD;
    unlink ('.git/HEAD');
    symlink("refs/heads/$ps->{branch}",".git/HEAD");

    # tag accordingly
    ptag($ps->{id}, $commitid); # private tag
    if ($opt_T || $ps->{type} eq 't' || $ps->{type} eq 'i') {
        tag($ps->{id}, $commitid);
    }
    print " * Committed $ps->{id}\n";
    print "   + tree   $tree\n";
    print "   + commit $commitid\n";
    # print "   + commit date is  $ps->{date} \n";
}

sub branchname {
    my $id = shift;
    $id =~ s#^.+?/##;
    my @parts = split(m/--/, $id);
    return join('--', @parts[0..1]);
}

sub apply_import {
    my $ps = shift;
    my $bname = branchname($ps->{id});

    `mkdir -p $tmp`;

    `tla get -s --no-pristine -A $ps->{repo} $ps->{id} $tmp/import`;
    die "Cannot get import: $!" if $?;    
    `rsync -v --archive --delete --exclude '.git' --exclude '.arch-ids' --exclude '{arch}' $tmp/import/* ./`;
    die "Cannot rsync import:$!" if $?;
    
    `rm -fr $tmp/import`;
    die "Cannot remove tempdir: $!" if $?;
    

    return 1;
}

sub apply_cset {
    my $ps = shift;

    `mkdir -p $tmp`;

    # get the changeset
    `tla get-changeset  -A $ps->{repo} $ps->{id} $tmp/changeset`;
    die "Cannot get changeset: $!" if $?;
    
    # apply patches
    if (`find $tmp/changeset/patches -type f -name '*.patch'`) {
        # this can be sped up considerably by doing
        #    (find | xargs cat) | patch
        # but that cna get mucked up by patches
        # with missing trailing newlines or the standard 
        # 'missing newline' flag in the patch - possibly
        # produced with an old/buggy diff.
        # slow and safe, we invoke patch once per patchfile
        `find $tmp/changeset/patches -type f -name '*.patch' -print0 | grep -zv '{arch}' | xargs -iFILE -0 --no-run-if-empty patch -p1 --forward -iFILE`;
        die "Problem applying patches! $!" if $?;
    }

    # apply changed binary files
    if (my @modified = `find $tmp/changeset/patches -type f -name '*.modified'`) {
        foreach my $mod (@modified) {
            chomp $mod;
            my $orig = $mod;
            $orig =~ s/\.modified$//; # lazy
            $orig =~ s!^\Q$tmp\E/changeset/patches/!!;
            #print "rsync -p '$mod' '$orig'";
            `rsync -p $mod ./$orig`;
            die "Problem applying binary changes! $!" if $?;
        }
    }

    # bring in new files
    `rsync --archive --exclude '.git' --exclude '.arch-ids' --exclude '{arch}' $tmp/changeset/new-files-archive/* ./`;

    # deleted files are hinted from the commitlog processing

    `rm -fr $tmp/changeset`;
}


# =for reference
# A log entry looks like 
# Revision: moodle-org--moodle--1.3.3--patch-15
# Archive: arch-eduforge@catalyst.net.nz--2004
# Creator: Penny Leach <penny@catalyst.net.nz>
# Date: Wed May 25 14:15:34 NZST 2005
# Standard-date: 2005-05-25 02:15:34 GMT
# New-files: lang/de/.arch-ids/block_glossary_random.php.id
#     lang/de/.arch-ids/block_html.php.id
# New-directories: lang/de/help/questionnaire
#     lang/de/help/questionnaire/.arch-ids
# Renamed-files: .arch-ids/db_sears.sql.id db/.arch-ids/db_sears.sql.id
#    db_sears.sql db/db_sears.sql
# Removed-files: lang/be/docs/.arch-ids/release.html.id
#     lang/be/docs/.arch-ids/releaseold.html.id
# Modified-files: admin/cron.php admin/delete.php
#     admin/editor.html backup/lib.php backup/restore.php
# New-patches: arch-eduforge@catalyst.net.nz--2004/moodle-org--moodle--1.3.3--patch-15
# Summary: Updating to latest from MOODLE_14_STABLE (1.4.5+)
# Keywords:
#
# Updating yadda tadda tadda madda
sub parselog {
    my $log = shift;
    #print $log;

    my (@add, @del, @mod, @ren, @kw, $sum, $msg );

    if ($log =~ m/(?:\n|^)New-files:(.*?)(?=\n\w)/s ) {
        my $files = $1;
        @add = split(m/\s+/s, $files);
    }
       
    if ($log =~ m/(?:\n|^)Removed-files:(.*?)(?=\n\w)/s ) {
        my $files = $1;
        @del = split(m/\s+/s, $files);
    }
    
    if ($log =~ m/(?:\n|^)Modified-files:(.*?)(?=\n\w)/s ) {
        my $files = $1;
        @mod = split(m/\s+/s, $files);
    }
    
    if ($log =~ m/(?:\n|^)Renamed-files:(.*?)(?=\n\w)/s ) {
        my $files = $1;
        @ren = split(m/\s+/s, $files);
    }

    $sum ='';
    if ($log =~ m/^Summary:(.+?)$/m ) {
        $sum = $1;
        $sum =~ s/^\s+//;
        $sum =~ s/\s+$//;
    }

    $msg = '';
    if ($log =~ m/\n\n(.+)$/s) {
        $msg = $1;
        $msg =~ s/^\s+//;
        $msg =~ s/\s+$//;
    }


    # cleanup the arrays
    foreach my $ref ( (\@add, \@del, \@mod, \@ren) ) {
        my @tmp = ();
        while (my $t = pop @$ref) {
            next unless length ($t);
            next if $t =~ m!\{arch\}/!;
            next if $t =~ m!\.arch-ids/!;
            next if $t =~ m!\.arch-inventory$!;
            push (@tmp, shell_quote($t));
        }
        @$ref = @tmp;
    }
    
    #print Dumper [$sum, $msg, \@add, \@del, \@mod, \@ren]; 
    return       ($sum, $msg, \@add, \@del, \@mod, \@ren); 
}

# write/read a tag
sub tag {
    my ($tag, $commit) = @_;
    $tag =~ s|/|--|g; 
    $tag = shell_quote($tag);
    
    if ($commit) {
        open(C,">.git/refs/tags/$tag")
            or die "Cannot create tag $tag: $!\n";
        print C "$commit\n"
            or die "Cannot write tag $tag: $!\n";
        close(C)
            or die "Cannot write tag $tag: $!\n";
        print "Created tag '$tag' on '$commit'\n" if $opt_v;
    } else {                    # read
        open(C,"<.git/refs/tags/$tag")
            or die "Cannot read tag $tag: $!\n";
        $commit = <C>;
        chomp $commit;
        die "Error reading tag $tag: $!\n" unless length $commit == 40;
        close(C)
            or die "Cannot read tag $tag: $!\n";
        return $commit;
    }
}

# write/read a private tag
# reads fail softly if the tag isn't there
sub ptag {
    my ($tag, $commit) = @_;
    $tag =~ s|/|--|g; 
    $tag = shell_quote($tag);
    
    unless (-d '.git/archimport/tags') {
        mkpath('.git/archimport/tags');
    }

    if ($commit) {              # write
        open(C,">.git/archimport/tags/$tag")
            or die "Cannot create tag $tag: $!\n";
        print C "$commit\n"
            or die "Cannot write tag $tag: $!\n";
        close(C)
            or die "Cannot write tag $tag: $!\n";
    } else {                    # read
        # if the tag isn't there, return 0
        unless ( -s ".git/archimport/tags/$tag") {
            warn "Could not find tag $tag -- perhaps it isn't in the repos we have?\n" 
                if $opt_v;
            return 0;
        }
        open(C,"<.git/archimport/tags/$tag")
            or die "Cannot read tag $tag: $!\n";
        $commit = <C>;
        chomp $commit;
        die "Error reading tag $tag: $!\n" unless length $commit == 40;
        close(C)
            or die "Cannot read tag $tag: $!\n";
        return $commit;
    }
}
Commit	Line	Data
d3968363 ML	1	#!/usr/bin/perl -w
	2	#
	3	# This tool is copyright (c) 2005, Martin Langhoff.
	4	# It is released under the Gnu Public License, version 2.
	5	#
	6	# The basic idea is to walk the output of tla abrowse,
	7	# fetch the changesets and apply them.
	8	#
	9	=head1 Invocation
	10
	11	git-archimport-script -i <archive>/<branch> [<archive>/<branch>]
	12	[ <archive>/<branch> ]
	13
	14	The script expects you to provide the key roots where it can start the
	15	import from an 'initial import' or 'tag' type of Arch commit. It will
	16	then follow all the branching and tagging within the provided roots.
	17
	18	It will die if it sees branches that have different roots.
	19
	20	=head2 TODO
	21
	22	- keep track of merged patches, and mark a git merge when it happens
	23	- smarter rules to parse the archive history "up" and "down"
	24	- be able to continue an import where we left off
	25	- audit shell-escaping of filenames
	26
	27	=head1 Devel tricks
	28
	29	Add print in front of the shell commands invoked via backticks.
	30
	31	=cut
	32
	33	use strict;
	34	use warnings;
	35	use Getopt::Std;
	36	use File::Spec;
	37	use File::Temp qw(tempfile);
	38	use File::Path qw(mkpath);
	39	use File::Basename qw(basename dirname);
	40	use String::ShellQuote;
	41	use Time::Local;
	42	use IO::Socket;
	43	use IO::Pipe;
	44	use POSIX qw(strftime dup2);
	45	use Data::Dumper qw/ Dumper /;
	46	use IPC::Open2;
	47
	48	$SIG{'PIPE'}="IGNORE";
	49	$ENV{'TZ'}="UTC";
	50
	51	our($opt_h,$opt_v, $opt_T,
	52	$opt_C,$opt_t, $opt_i);
	53
	54	sub usage() {
	55	print STDERR <<END;
	56	Usage: ${\basename $0} # fetch/update GIT from Arch
	57	[ -h ] [ -v ] [ -i ] [ -T ]
	58	[ -C GIT_repository ] [ -t tempdir ]
	59	repository/arch-branch [ repository/arch-branch] ...
	60	END
	61	exit(1);
	62	}
	63
	64	getopts("hviC:t:") or usage();
65	usage if $opt_h;
66
67	@ARGV >= 1 or usage();
68	my @arch_roots = @ARGV;
69
70	my $tmp = $opt_t;
71	$tmp \|\|= '/tmp';
72	$tmp .= '/git-archimport/';
73
74	my $git_tree = $opt_C;
75	$git_tree \|\|= ".";
76
77
78	my @psets = (); # the collection
79
80	foreach my $root (@arch_roots) {
81	my ($arepo, $abranch) = split(m!/!, $root);
82	open ABROWSE, "tla abrowse -f -A $arepo --desc --merges $abranch \|"
83	or die "Problems with tla abrowse: $!";
84
85	my %ps = (); # the current one
86	my $mode = '';
87	my $lastseen = '';
88
89	while (<ABROWSE>) {
90	chomp;
91
92	# first record padded w 8 spaces
93	if (s/^\s{8}\b//) {
94
95	# store the record we just captured
96	if (%ps) {
97	my %temp = %ps; # break references
98	push (@psets, \%temp);
99	%ps = ();
100	}
101
102	my ($id, $type) = split(m/\s{3}/, $_);
103	$ps{id} = $id;
104	$ps{repo} = $arepo;
105
106	# deal with types
107	if ($type =~ m/^\(simple changeset\)/) {
108	$ps{type} = 's';
109	} elsif ($type eq '(initial import)') {
110	$ps{type} = 'i';
111	} elsif ($type =~ m/^\(tag revision of (.+)\)/) {
112	$ps{type} = 't';
113	$ps{tag} = $1;
114	} else {
115	warn "Unknown type $type";
116	}
117	$lastseen = 'id';
118	}
119
120	if (s/^\s{10}//) {
121	# 10 leading spaces or more
122	# indicate commit metadata
123
124	# date & author
125	if ($lastseen eq 'id' && m/^\d{4}-\d{2}-\d{2}/) {
126
127	my ($date, $authoremail) = split(m/\s{2,}/, $_);
128	$ps{date} = $date;
129	$ps{date} =~ s/\bGMT$//; # strip off trailign GMT
130	if ($ps{date} =~ m/\b\w+$/) {
131	warn 'Arch dates not in GMT?! - imported dates will be wrong';
132	}
133
134	$authoremail =~ m/^(.+)\s(\S+)$/;
135	$ps{author} = $1;
136	$ps{email} = $2;
137
138	$lastseen = 'date';
139
140	} elsif ($lastseen eq 'date') {
141	# the only hint is position
142	# subject is after date
143	$ps{subj} = $_;
144	$lastseen = 'subj';
145
146	} elsif ($lastseen eq 'subj' && $_ eq 'merges in:') {
147	$ps{merges} = [];
148	$lastseen = 'merges';
149
150	} elsif ($lastseen eq 'merges' && s/^\s{2}//) {
151	push (@{$ps{merges}}, $_);
152	} else {
153	warn 'more metadata after merges!?';
154	}
155
156	}
157	}
158
159	if (%ps) {
160	my %temp = %ps; # break references
161	push (@psets, \%temp);
162	%ps = ();
163	}
164	close ABROWSE;
165	} # end foreach $root
166
167	## Order patches by time
168	@psets = sort {$a->{date}.$b->{id} cmp $b->{date}.$b->{id}} @psets;
169
170	#print Dumper \@psets;
171
172	##
173	## TODO cleanup irrelevant patches
174	## and put an initial import
175	## or a full tag
176
177	if ($opt_i) { # initial import
178	if ($psets[0]{type} eq 'i' \|\| $psets[0]{type} eq 't') {
179	print "Starting import from $psets[0]{id}\n";
180	} else {
181	die "Need to start from an import or a tag -- cannot use $psets[0]{id}";
182	}
183	`git-init-db`;
184	die $! if $?;
185	}
186
187	# process
188	my $lastbranch = branchname($psets[0]{id}); # only good for initial import
189	my $importseen = $opt_i ? 0 : 1; # start at 1 if opt_i
190
191	foreach my $ps (@psets) {
192
193	$ps->{branch} = branchname($ps->{id});
194
195	#
196	# ensure we have a clean state
197	#
198	if (`git diff-files`) {
199	die "Unclean tree when about to process $ps->{id} " .
200	" - did we fail to commit cleanly before?";
201	}
202	die $! if $?;
203
204	#
205	# create the branch if needed
206	#
207	if ($ps->{type} eq 'i' && $importseen) {
208	die "Should not have more than one 'Initial import' per GIT import";
209	}
210
211	unless ($opt_i && !$importseen) { # skip for first commit
212	if ( -e ".git/refs/heads/$ps->{branch}") {
213	# we know about this branch
214	`git checkout $ps->{branch}`;
215	} else {
216	# new branch! we need to verify a few things
217	die "Branch on a non-tag!" unless $ps->{type} eq 't';
218	my $branchpoint = ptag($ps->{tag});
219	die "Tagging from unknown id unsupported: $ps->{tag}"
220	unless $branchpoint;
221
222	# find where we are supposed to branch from
223	`git checkout -b $ps->{branch} $branchpoint`;
224	}
225	die $! if $?;
226	}
227
228
229	#
230	# Apply the import/changeset/merge into the working tree
231	#
232	if ($ps->{type} eq 'i' \|\| $ps->{type} eq 't') {
233	$importseen = 1;
234	apply_import($ps) or die $!;
235	} elsif ($ps->{type} eq 's') {
236	apply_cset($ps);
237	}
238
239	#
240	# prepare update git's index, based on what arch knows
241	# about the pset, resolve parents, etc
242	#
243	my $tree;
244
245	my $commitlog = `tla cat-archive-log -A $ps->{repo} $ps->{id}`;
246	die "Error in cat-archive-log: $!" if $?;
247
248	# parselog will git-add/rm files
249	# and generally prepare things for the commit
250	# NOTE: parselog will shell-quote filenames!
251	my ($sum, $msg, $add, $del, $mod, $ren) = parselog($commitlog);
252	my $logmessage = "$sum\n$msg";
253
254
255	# imports don't give us good info
256	# on added files. Shame on them
257	if ($ps->{type} eq 'i' \|\| $ps->{type} eq 't') {
258	`find . -type f -print0 \| grep -zv '^./.git' \| xargs -0 -l100 git-update-cache --add`;
259	`git-ls-files --deleted -z \| xargs --no-run-if-empty -0 -l100 git-update-cache --remove`;
260	}
261
262	if (@$add) {
263	while (@$add) {
264	my @slice = splice(@$add, 0, 100);
265	my $slice = join(' ', @slice);
266	`git-update-cache --add $slice`;
267	die "Error in git-update-cache --add: $!" if $?;
268	}
269	}
270	if (@$del) {
271	foreach my $file (@$del) {
272	unlink $file or die "Problems deleting $file : $!";
273	}
274	while (@$del) {
275	my @slice = splice(@$del, 0, 100);
276	my $slice = join(' ', @slice);
277	`git-update-cache --remove $slice`;
278	die "Error in git-update-cache --remove: $!" if $?;
279	}
280	}
281	if (@$ren) { # renamed
282	if (@$ren % 2) {
283	die "Odd number of entries in rename!?";
284	}
285	;
286	while (@$ren) {
287	my $from = pop @$ren;
288	my $to = pop @$ren;
289
290	unless (-d dirname($to)) {
291	mkpath(dirname($to)); # will die on err
292	}
293	#print "moving $from $to";
294	`mv $from $to`;
295	die "Error renaming $from $to : $!" if $?;
296	`git-update-cache --remove $from`;
297	die "Error in git-update-cache --remove: $!" if $?;
298	`git-update-cache --add $to`;
299	die "Error in git-update-cache --add: $!" if $?;
300	}
301
302	}
303	if (@$mod) { # must be _after_ renames
304	while (@$mod) {
305	my @slice = splice(@$mod, 0, 100);
306	my $slice = join(' ', @slice);
307	`git-update-cache $slice`;
308	die "Error in git-update-cache: $!" if $?;
309	}
310	}
311
312	# warn "errors when running git-update-cache! $!";
313	$tree = `git-write-tree`;
314	die "cannot write tree $!" if $?;
315	chomp $tree;
316
317
318	#
319	# Who's your daddy?
320	#
321	my @par;
322	if ( -e ".git/refs/heads/$ps->{branch}") {
323	if (open HEAD, "<.git/refs/heads/$ps->{branch}") {
324	my $p = <HEAD>;
325	close HEAD;
326	chomp $p;
327	push @par, '-p', $p;
328	} else {
329	if ($ps->{type} eq 's') {
330	warn "Could not find the right head for the branch $ps->{branch}";
331	}
332	}
333	}
334
335	my $par = join (' ', @par);
336
337	#
338	# Commit, tag and clean state
339	#
340	$ENV{TZ} = 'GMT';
341	$ENV{GIT_AUTHOR_NAME} = $ps->{author};
342	$ENV{GIT_AUTHOR_EMAIL} = $ps->{email};
343	$ENV{GIT_AUTHOR_DATE} = $ps->{date};
344	$ENV{GIT_COMMITTER_NAME} = $ps->{author};
345	$ENV{GIT_COMMITTER_EMAIL} = $ps->{email};
346	$ENV{GIT_COMMITTER_DATE} = $ps->{date};
347
348	my ($pid, $commit_rh, $commit_wh);
349	$commit_rh = 'commit_rh';
350	$commit_wh = 'commit_wh';
351
352	$pid = open2(READER, WRITER, "git-commit-tree $tree $par")
353	or die $!;
354	print WRITER $logmessage; # write
355	close WRITER;
356	my $commitid = <READER>; # read
357	chomp $commitid;
358	close READER;
359	waitpid $pid,0; # close;
360
361	if (length $commitid != 40) {
362	die "Something went wrong with the commit! $! $commitid";
363	}
364	#
365	# Update the branch
366	#
367	open HEAD, ">.git/refs/heads/$ps->{branch}";
368	print HEAD $commitid;
369	close HEAD;
370	unlink ('.git/HEAD');
371	symlink("refs/heads/$ps->{branch}",".git/HEAD");
372
373	# tag accordingly
374	ptag($ps->{id}, $commitid); # private tag
375	if ($opt_T \|\| $ps->{type} eq 't' \|\| $ps->{type} eq 'i') {
376	tag($ps->{id}, $commitid);
377	}
378	print " * Committed $ps->{id}\n";
379	print " + tree $tree\n";
380	print " + commit $commitid\n";
381	# print " + commit date is $ps->{date} \n";
382	}
383
384	sub branchname {
385	my $id = shift;
386	$id =~ s#^.+?/##;
387	my @parts = split(m/--/, $id);
388	return join('--', @parts[0..1]);
389	}
390
391	sub apply_import {
392	my $ps = shift;
393	my $bname = branchname($ps->{id});
394
395	`mkdir -p $tmp`;
396
397	`tla get -s --no-pristine -A $ps->{repo} $ps->{id} $tmp/import`;
398	die "Cannot get import: $!" if $?;
399	`rsync -v --archive --delete --exclude '.git' --exclude '.arch-ids' --exclude '{arch}' $tmp/import/* ./`;
400	die "Cannot rsync import:$!" if $?;
401
402	`rm -fr $tmp/import`;
403	die "Cannot remove tempdir: $!" if $?;
404
405
406	return 1;
407	}
408
409	sub apply_cset {
410	my $ps = shift;
411
412	`mkdir -p $tmp`;
413
414	# get the changeset
415	`tla get-changeset -A $ps->{repo} $ps->{id} $tmp/changeset`;
416	die "Cannot get changeset: $!" if $?;
417
418	# apply patches
419	if (`find $tmp/changeset/patches -type f -name '*.patch'`) {
420	# this can be sped up considerably by doing
421	# (find \| xargs cat) \| patch
422	# but that cna get mucked up by patches
423	# with missing trailing newlines or the standard
424	# 'missing newline' flag in the patch - possibly
425	# produced with an old/buggy diff.
426	# slow and safe, we invoke patch once per patchfile
427	`find $tmp/changeset/patches -type f -name '*.patch' -print0 \| grep -zv '{arch}' \| xargs -iFILE -0 --no-run-if-empty patch -p1 --forward -iFILE`;
428	die "Problem applying patches! $!" if $?;
429	}
430
431	# apply changed binary files
432	if (my @modified = `find $tmp/changeset/patches -type f -name '*.modified'`) {
433	foreach my $mod (@modified) {
434	chomp $mod;
435	my $orig = $mod;
436	$orig =~ s/\.modified$//; # lazy
437	$orig =~ s!^\Q$tmp\E/changeset/patches/!!;
438	#print "rsync -p '$mod' '$orig'";
439	`rsync -p $mod ./$orig`;
440	die "Problem applying binary changes! $!" if $?;
441	}
442	}
443
444	# bring in new files
445	`rsync --archive --exclude '.git' --exclude '.arch-ids' --exclude '{arch}' $tmp/changeset/new-files-archive/* ./`;
446
447	# deleted files are hinted from the commitlog processing
448
449	`rm -fr $tmp/changeset`;
450	}
451
452
453	# =for reference
454	# A log entry looks like
455	# Revision: moodle-org--moodle--1.3.3--patch-15
456	# Archive: arch-eduforge@catalyst.net.nz--2004
457	# Creator: Penny Leach <penny@catalyst.net.nz>
458	# Date: Wed May 25 14:15:34 NZST 2005
459	# Standard-date: 2005-05-25 02:15:34 GMT
460	# New-files: lang/de/.arch-ids/block_glossary_random.php.id
461	# lang/de/.arch-ids/block_html.php.id
462	# New-directories: lang/de/help/questionnaire
463	# lang/de/help/questionnaire/.arch-ids
464	# Renamed-files: .arch-ids/db_sears.sql.id db/.arch-ids/db_sears.sql.id
465	# db_sears.sql db/db_sears.sql
466	# Removed-files: lang/be/docs/.arch-ids/release.html.id
467	# lang/be/docs/.arch-ids/releaseold.html.id
468	# Modified-files: admin/cron.php admin/delete.php
469	# admin/editor.html backup/lib.php backup/restore.php
470	# New-patches: arch-eduforge@catalyst.net.nz--2004/moodle-org--moodle--1.3.3--patch-15
471	# Summary: Updating to latest from MOODLE_14_STABLE (1.4.5+)
472	# Keywords:
473	#
474	# Updating yadda tadda tadda madda
475	sub parselog {
476	my $log = shift;
477	#print $log;
478
479	my (@add, @del, @mod, @ren, @kw, $sum, $msg );
480
481	if ($log =~ m/(?:\n\|^)New-files:(.*?)(?=\n\w)/s ) {
482	my $files = $1;
483	@add = split(m/\s+/s, $files);
484	}
485
486	if ($log =~ m/(?:\n\|^)Removed-files:(.*?)(?=\n\w)/s ) {
487	my $files = $1;
488	@del = split(m/\s+/s, $files);
489	}
490
491	if ($log =~ m/(?:\n\|^)Modified-files:(.*?)(?=\n\w)/s ) {
492	my $files = $1;
493	@mod = split(m/\s+/s, $files);
494	}
495
496	if ($log =~ m/(?:\n\|^)Renamed-files:(.*?)(?=\n\w)/s ) {
497	my $files = $1;
498	@ren = split(m/\s+/s, $files);
499	}
500
501	$sum ='';
502	if ($log =~ m/^Summary:(.+?)$/m ) {
503	$sum = $1;
504	$sum =~ s/^\s+//;
505	$sum =~ s/\s+$//;
506	}
507
508	$msg = '';
509	if ($log =~ m/\n\n(.+)$/s) {
510	$msg = $1;
511	$msg =~ s/^\s+//;
512	$msg =~ s/\s+$//;
513	}
514
515
516	# cleanup the arrays
517	foreach my $ref ( (\@add, \@del, \@mod, \@ren) ) {
518	my @tmp = ();
519	while (my $t = pop @$ref) {
520	next unless length ($t);
521	next if $t =~ m!\{arch\}/!;
522	next if $t =~ m!\.arch-ids/!;
523	next if $t =~ m!\.arch-inventory$!;
524	push (@tmp, shell_quote($t));
525	}
526	@$ref = @tmp;
527	}
528
529	#print Dumper [$sum, $msg, \@add, \@del, \@mod, \@ren];
530	return ($sum, $msg, \@add, \@del, \@mod, \@ren);
531	}
532
533	# write/read a tag
534	sub tag {
535	my ($tag, $commit) = @_;
536	$tag =~ s\|/\|--\|g;
537	$tag = shell_quote($tag);
538
539	if ($commit) {
540	open(C,">.git/refs/tags/$tag")
541	or die "Cannot create tag $tag: $!\n";
542	print C "$commit\n"
543	or die "Cannot write tag $tag: $!\n";
544	close(C)
545	or die "Cannot write tag $tag: $!\n";
546	print "Created tag '$tag' on '$commit'\n" if $opt_v;
547	} else { # read
548	open(C,"<.git/refs/tags/$tag")
549	or die "Cannot read tag $tag: $!\n";
550	$commit = <C>;
551	chomp $commit;
552	die "Error reading tag $tag: $!\n" unless length $commit == 40;
553	close(C)
554	or die "Cannot read tag $tag: $!\n";
555	return $commit;
556	}
557	}
558
559	# write/read a private tag
560	# reads fail softly if the tag isn't there
561	sub ptag {
562	my ($tag, $commit) = @_;
563	$tag =~ s\|/\|--\|g;
564	$tag = shell_quote($tag);
565
566	unless (-d '.git/archimport/tags') {
567	mkpath('.git/archimport/tags');
568	}
569
570	if ($commit) { # write
571	open(C,">.git/archimport/tags/$tag")
572	or die "Cannot create tag $tag: $!\n";
573	print C "$commit\n"
574	or die "Cannot write tag $tag: $!\n";
575	close(C)
576	or die "Cannot write tag $tag: $!\n";
577	} else { # read
578	# if the tag isn't there, return 0
579	unless ( -s ".git/archimport/tags/$tag") {
580	warn "Could not find tag $tag -- perhaps it isn't in the repos we have?\n"
581	if $opt_v;
582	return 0;
583	}
584	open(C,"<.git/archimport/tags/$tag")
585	or die "Cannot read tag $tag: $!\n";
586	$commit = <C>;
587	chomp $commit;
588	die "Error reading tag $tag: $!\n" unless length $commit == 40;
589	close(C)
590	or die "Cannot read tag $tag: $!\n";
591	return $commit;
592	}
593	}