]> git.ipfire.org Git - thirdparty/git.git/blob - git-cvsimport.perl
Merge branch 'ml/maint-grep-doc'
[thirdparty/git.git] / git-cvsimport.perl
1 #!/usr/bin/perl -w
2
3 # This tool is copyright (c) 2005, Matthias Urlichs.
4 # It is released under the Gnu Public License, version 2.
5 #
6 # The basic idea is to aggregate CVS check-ins into related changes.
7 # Fortunately, "cvsps" does that for us; all we have to do is to parse
8 # its output.
9 #
10 # Checking out the files is done by a single long-running CVS connection
11 # / server process.
12 #
13 # The head revision is on branch "origin" by default.
14 # You can change that with the '-o' option.
15
16 use strict;
17 use warnings;
18 use Getopt::Long;
19 use File::Spec;
20 use File::Temp qw(tempfile tmpnam);
21 use File::Path qw(mkpath);
22 use File::Basename qw(basename dirname);
23 use Time::Local;
24 use IO::Socket;
25 use IO::Pipe;
26 use POSIX qw(strftime dup2 ENOENT);
27 use IPC::Open2;
28
29 $SIG{'PIPE'}="IGNORE";
30 $ENV{'TZ'}="UTC";
31
32 our ($opt_h,$opt_o,$opt_v,$opt_k,$opt_u,$opt_d,$opt_p,$opt_C,$opt_z,$opt_i,$opt_P, $opt_s,$opt_m,@opt_M,$opt_A,$opt_S,$opt_L, $opt_a, $opt_r);
33 my (%conv_author_name, %conv_author_email);
34
35 sub usage(;$) {
36 my $msg = shift;
37 print(STDERR "Error: $msg\n") if $msg;
38 print STDERR <<END;
39 Usage: git cvsimport # fetch/update GIT from CVS
40 [-o branch-for-HEAD] [-h] [-v] [-d CVSROOT] [-A author-conv-file]
41 [-p opts-for-cvsps] [-P file] [-C GIT_repository] [-z fuzz] [-i] [-k]
42 [-u] [-s subst] [-a] [-m] [-M regex] [-S regex] [-L commitlimit]
43 [-r remote] [CVS_module]
44 END
45 exit(1);
46 }
47
48 sub read_author_info($) {
49 my ($file) = @_;
50 my $user;
51 open my $f, '<', "$file" or die("Failed to open $file: $!\n");
52
53 while (<$f>) {
54 # Expected format is this:
55 # exon=Andreas Ericsson <ae@op5.se>
56 if (m/^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*$/) {
57 $user = $1;
58 $conv_author_name{$user} = $2;
59 $conv_author_email{$user} = $3;
60 }
61 # However, we also read from CVSROOT/users format
62 # to ease migration.
63 elsif (/^(\w+):(['"]?)(.+?)\2\s*$/) {
64 my $mapped;
65 ($user, $mapped) = ($1, $3);
66 if ($mapped =~ /^\s*(.*?)\s*<(.*)>\s*$/) {
67 $conv_author_name{$user} = $1;
68 $conv_author_email{$user} = $2;
69 }
70 elsif ($mapped =~ /^<?(.*)>?$/) {
71 $conv_author_name{$user} = $user;
72 $conv_author_email{$user} = $1;
73 }
74 }
75 # NEEDSWORK: Maybe warn on unrecognized lines?
76 }
77 close ($f);
78 }
79
80 sub write_author_info($) {
81 my ($file) = @_;
82 open my $f, '>', $file or
83 die("Failed to open $file for writing: $!");
84
85 foreach (keys %conv_author_name) {
86 print $f "$_=$conv_author_name{$_} <$conv_author_email{$_}>\n";
87 }
88 close ($f);
89 }
90
91 # convert getopts specs for use by git config
92 sub read_repo_config {
93 # Split the string between characters, unless there is a ':'
94 # So "abc:de" becomes ["a", "b", "c:", "d", "e"]
95 my @opts = split(/ *(?!:)/, shift);
96 foreach my $o (@opts) {
97 my $key = $o;
98 $key =~ s/://g;
99 my $arg = 'git config';
100 $arg .= ' --bool' if ($o !~ /:$/);
101
102 chomp(my $tmp = `$arg --get cvsimport.$key`);
103 if ($tmp && !($arg =~ /--bool/ && $tmp eq 'false')) {
104 no strict 'refs';
105 my $opt_name = "opt_" . $key;
106 if (!$$opt_name) {
107 $$opt_name = $tmp;
108 }
109 }
110 }
111 }
112
113 my $opts = "haivmkuo:d:p:r:C:z:s:M:P:A:S:L:";
114 read_repo_config($opts);
115 Getopt::Long::Configure( 'no_ignore_case', 'bundling' );
116
117 # turn the Getopt::Std specification in a Getopt::Long one,
118 # with support for multiple -M options
119 GetOptions( map { s/:/=s/; /M/ ? "$_\@" : $_ } split( /(?!:)/, $opts ) )
120 or usage();
121 usage if $opt_h;
122
123 if (@ARGV == 0) {
124 chomp(my $module = `git config --get cvsimport.module`);
125 push(@ARGV, $module) if $? == 0;
126 }
127 @ARGV <= 1 or usage("You can't specify more than one CVS module");
128
129 if ($opt_d) {
130 $ENV{"CVSROOT"} = $opt_d;
131 } elsif (-f 'CVS/Root') {
132 open my $f, '<', 'CVS/Root' or die 'Failed to open CVS/Root';
133 $opt_d = <$f>;
134 chomp $opt_d;
135 close $f;
136 $ENV{"CVSROOT"} = $opt_d;
137 } elsif ($ENV{"CVSROOT"}) {
138 $opt_d = $ENV{"CVSROOT"};
139 } else {
140 usage("CVSROOT needs to be set");
141 }
142 $opt_s ||= "-";
143 $opt_a ||= 0;
144
145 my $git_tree = $opt_C;
146 $git_tree ||= ".";
147
148 my $remote;
149 if (defined $opt_r) {
150 $remote = 'refs/remotes/' . $opt_r;
151 $opt_o ||= "master";
152 } else {
153 $opt_o ||= "origin";
154 $remote = 'refs/heads';
155 }
156
157 my $cvs_tree;
158 if ($#ARGV == 0) {
159 $cvs_tree = $ARGV[0];
160 } elsif (-f 'CVS/Repository') {
161 open my $f, '<', 'CVS/Repository' or
162 die 'Failed to open CVS/Repository';
163 $cvs_tree = <$f>;
164 chomp $cvs_tree;
165 close $f;
166 } else {
167 usage("CVS module has to be specified");
168 }
169
170 our @mergerx = ();
171 if ($opt_m) {
172 @mergerx = ( qr/\b(?:from|of|merge|merging|merged) ([-\w]+)/i );
173 }
174 if (@opt_M) {
175 push (@mergerx, map { qr/$_/ } @opt_M);
176 }
177
178 # Remember UTC of our starting time
179 # we'll want to avoid importing commits
180 # that are too recent
181 our $starttime = time();
182
183 select(STDERR); $|=1; select(STDOUT);
184
185
186 package CVSconn;
187 # Basic CVS dialog.
188 # We're only interested in connecting and downloading, so ...
189
190 use File::Spec;
191 use File::Temp qw(tempfile);
192 use POSIX qw(strftime dup2);
193
194 sub new {
195 my ($what,$repo,$subdir) = @_;
196 $what=ref($what) if ref($what);
197
198 my $self = {};
199 $self->{'buffer'} = "";
200 bless($self,$what);
201
202 $repo =~ s#/+$##;
203 $self->{'fullrep'} = $repo;
204 $self->conn();
205
206 $self->{'subdir'} = $subdir;
207 $self->{'lines'} = undef;
208
209 return $self;
210 }
211
212 sub conn {
213 my $self = shift;
214 my $repo = $self->{'fullrep'};
215 if ($repo =~ s/^:pserver(?:([^:]*)):(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?//) {
216 my ($param,$user,$pass,$serv,$port) = ($1,$2,$3,$4,$5);
217
218 my ($proxyhost,$proxyport);
219 if ($param && ($param =~ m/proxy=([^;]+)/)) {
220 $proxyhost = $1;
221 # Default proxyport, if not specified, is 8080.
222 $proxyport = 8080;
223 if ($ENV{"CVS_PROXY_PORT"}) {
224 $proxyport = $ENV{"CVS_PROXY_PORT"};
225 }
226 if ($param =~ m/proxyport=([^;]+)/) {
227 $proxyport = $1;
228 }
229 }
230 $repo ||= '/';
231
232 # if username is not explicit in CVSROOT, then use current user, as cvs would
233 $user=(getlogin() || $ENV{'LOGNAME'} || $ENV{'USER'} || "anonymous") unless $user;
234 my $rr2 = "-";
235 unless ($port) {
236 $rr2 = ":pserver:$user\@$serv:$repo";
237 $port=2401;
238 }
239 my $rr = ":pserver:$user\@$serv:$port$repo";
240
241 if ($pass) {
242 $pass = $self->_scramble($pass);
243 } else {
244 open(H,$ENV{'HOME'}."/.cvspass") and do {
245 # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
246 while (<H>) {
247 chomp;
248 s/^\/\d+\s+//;
249 my ($w,$p) = split(/\s/,$_,2);
250 if ($w eq $rr or $w eq $rr2) {
251 $pass = $p;
252 last;
253 }
254 }
255 };
256 $pass = "A" unless $pass;
257 }
258
259 my ($s, $rep);
260 if ($proxyhost) {
261
262 # Use a HTTP Proxy. Only works for HTTP proxies that
263 # don't require user authentication
264 #
265 # See: http://www.ietf.org/rfc/rfc2817.txt
266
267 $s = IO::Socket::INET->new(PeerHost => $proxyhost, PeerPort => $proxyport);
268 die "Socket to $proxyhost: $!\n" unless defined $s;
269 $s->write("CONNECT $serv:$port HTTP/1.1\r\nHost: $serv:$port\r\n\r\n")
270 or die "Write to $proxyhost: $!\n";
271 $s->flush();
272
273 $rep = <$s>;
274
275 # The answer should look like 'HTTP/1.x 2yy ....'
276 if (!($rep =~ m#^HTTP/1\.. 2[0-9][0-9]#)) {
277 die "Proxy connect: $rep\n";
278 }
279 # Skip up to the empty line of the proxy server output
280 # including the response headers.
281 while ($rep = <$s>) {
282 last if (!defined $rep ||
283 $rep eq "\n" ||
284 $rep eq "\r\n");
285 }
286 } else {
287 $s = IO::Socket::INET->new(PeerHost => $serv, PeerPort => $port);
288 die "Socket to $serv: $!\n" unless defined $s;
289 }
290
291 $s->write("BEGIN AUTH REQUEST\n$repo\n$user\n$pass\nEND AUTH REQUEST\n")
292 or die "Write to $serv: $!\n";
293 $s->flush();
294
295 $rep = <$s>;
296
297 if ($rep ne "I LOVE YOU\n") {
298 $rep="<unknown>" unless $rep;
299 die "AuthReply: $rep\n";
300 }
301 $self->{'socketo'} = $s;
302 $self->{'socketi'} = $s;
303 } else { # local or ext: Fork off our own cvs server.
304 my $pr = IO::Pipe->new();
305 my $pw = IO::Pipe->new();
306 my $pid = fork();
307 die "Fork: $!\n" unless defined $pid;
308 my $cvs = 'cvs';
309 $cvs = $ENV{CVS_SERVER} if exists $ENV{CVS_SERVER};
310 my $rsh = 'rsh';
311 $rsh = $ENV{CVS_RSH} if exists $ENV{CVS_RSH};
312
313 my @cvs = ($cvs, 'server');
314 my ($local, $user, $host);
315 $local = $repo =~ s/:local://;
316 if (!$local) {
317 $repo =~ s/:ext://;
318 $local = !($repo =~ s/^(?:([^\@:]+)\@)?([^:]+)://);
319 ($user, $host) = ($1, $2);
320 }
321 if (!$local) {
322 if ($user) {
323 unshift @cvs, $rsh, '-l', $user, $host;
324 } else {
325 unshift @cvs, $rsh, $host;
326 }
327 }
328
329 unless ($pid) {
330 $pr->writer();
331 $pw->reader();
332 dup2($pw->fileno(),0);
333 dup2($pr->fileno(),1);
334 $pr->close();
335 $pw->close();
336 exec(@cvs);
337 }
338 $pw->writer();
339 $pr->reader();
340 $self->{'socketo'} = $pw;
341 $self->{'socketi'} = $pr;
342 }
343 $self->{'socketo'}->write("Root $repo\n");
344
345 # Trial and error says that this probably is the minimum set
346 $self->{'socketo'}->write("Valid-responses ok error Valid-requests Mode M Mbinary E Checked-in Created Updated Merged Removed\n");
347
348 $self->{'socketo'}->write("valid-requests\n");
349 $self->{'socketo'}->flush();
350
351 chomp(my $rep=$self->readline());
352 if ($rep !~ s/^Valid-requests\s*//) {
353 $rep="<unknown>" unless $rep;
354 die "Expected Valid-requests from server, but got: $rep\n";
355 }
356 chomp(my $res=$self->readline());
357 die "validReply: $res\n" if $res ne "ok";
358
359 $self->{'socketo'}->write("UseUnchanged\n") if $rep =~ /\bUseUnchanged\b/;
360 $self->{'repo'} = $repo;
361 }
362
363 sub readline {
364 my ($self) = @_;
365 return $self->{'socketi'}->getline();
366 }
367
368 sub _file {
369 # Request a file with a given revision.
370 # Trial and error says this is a good way to do it. :-/
371 my ($self,$fn,$rev) = @_;
372 $self->{'socketo'}->write("Argument -N\n") or return undef;
373 $self->{'socketo'}->write("Argument -P\n") or return undef;
374 # -kk: Linus' version doesn't use it - defaults to off
375 if ($opt_k) {
376 $self->{'socketo'}->write("Argument -kk\n") or return undef;
377 }
378 $self->{'socketo'}->write("Argument -r\n") or return undef;
379 $self->{'socketo'}->write("Argument $rev\n") or return undef;
380 $self->{'socketo'}->write("Argument --\n") or return undef;
381 $self->{'socketo'}->write("Argument $self->{'subdir'}/$fn\n") or return undef;
382 $self->{'socketo'}->write("Directory .\n") or return undef;
383 $self->{'socketo'}->write("$self->{'repo'}\n") or return undef;
384 # $self->{'socketo'}->write("Sticky T1.0\n") or return undef;
385 $self->{'socketo'}->write("co\n") or return undef;
386 $self->{'socketo'}->flush() or return undef;
387 $self->{'lines'} = 0;
388 return 1;
389 }
390 sub _line {
391 # Read a line from the server.
392 # ... except that 'line' may be an entire file. ;-)
393 my ($self, $fh) = @_;
394 die "Not in lines" unless defined $self->{'lines'};
395
396 my $line;
397 my $res=0;
398 while (defined($line = $self->readline())) {
399 # M U gnupg-cvs-rep/AUTHORS
400 # Updated gnupg-cvs-rep/
401 # /daten/src/rsync/gnupg-cvs-rep/AUTHORS
402 # /AUTHORS/1.1///T1.1
403 # u=rw,g=rw,o=rw
404 # 0
405 # ok
406
407 if ($line =~ s/^(?:Created|Updated) //) {
408 $line = $self->readline(); # path
409 $line = $self->readline(); # Entries line
410 my $mode = $self->readline(); chomp $mode;
411 $self->{'mode'} = $mode;
412 defined (my $cnt = $self->readline())
413 or die "EOF from server after 'Changed'\n";
414 chomp $cnt;
415 die "Duh: Filesize $cnt" if $cnt !~ /^\d+$/;
416 $line="";
417 $res = $self->_fetchfile($fh, $cnt);
418 } elsif ($line =~ s/^ //) {
419 print $fh $line;
420 $res += length($line);
421 } elsif ($line =~ /^M\b/) {
422 # output, do nothing
423 } elsif ($line =~ /^Mbinary\b/) {
424 my $cnt;
425 die "EOF from server after 'Mbinary'" unless defined ($cnt = $self->readline());
426 chomp $cnt;
427 die "Duh: Mbinary $cnt" if $cnt !~ /^\d+$/ or $cnt<1;
428 $line="";
429 $res += $self->_fetchfile($fh, $cnt);
430 } else {
431 chomp $line;
432 if ($line eq "ok") {
433 # print STDERR "S: ok (".length($res).")\n";
434 return $res;
435 } elsif ($line =~ s/^E //) {
436 # print STDERR "S: $line\n";
437 } elsif ($line =~ /^(Remove-entry|Removed) /i) {
438 $line = $self->readline(); # filename
439 $line = $self->readline(); # OK
440 chomp $line;
441 die "Unknown: $line" if $line ne "ok";
442 return -1;
443 } else {
444 die "Unknown: $line\n";
445 }
446 }
447 }
448 return undef;
449 }
450 sub file {
451 my ($self,$fn,$rev) = @_;
452 my $res;
453
454 my ($fh, $name) = tempfile('gitcvs.XXXXXX',
455 DIR => File::Spec->tmpdir(), UNLINK => 1);
456
457 $self->_file($fn,$rev) and $res = $self->_line($fh);
458
459 if (!defined $res) {
460 print STDERR "Server has gone away while fetching $fn $rev, retrying...\n";
461 truncate $fh, 0;
462 $self->conn();
463 $self->_file($fn,$rev) or die "No file command send";
464 $res = $self->_line($fh);
465 die "Retry failed" unless defined $res;
466 }
467 close ($fh);
468
469 return ($name, $res);
470 }
471 sub _fetchfile {
472 my ($self, $fh, $cnt) = @_;
473 my $res = 0;
474 my $bufsize = 1024 * 1024;
475 while ($cnt) {
476 if ($bufsize > $cnt) {
477 $bufsize = $cnt;
478 }
479 my $buf;
480 my $num = $self->{'socketi'}->read($buf,$bufsize);
481 die "Server: Filesize $cnt: $num: $!\n" if not defined $num or $num<=0;
482 print $fh $buf;
483 $res += $num;
484 $cnt -= $num;
485 }
486 return $res;
487 }
488
489 sub _scramble {
490 my ($self, $pass) = @_;
491 my $scrambled = "A";
492
493 return $scrambled unless $pass;
494
495 my $pass_len = length($pass);
496 my @pass_arr = split("", $pass);
497 my $i;
498
499 # from cvs/src/scramble.c
500 my @shifts = (
501 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
502 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
503 114,120, 53, 79, 96,109, 72,108, 70, 64, 76, 67,116, 74, 68, 87,
504 111, 52, 75,119, 49, 34, 82, 81, 95, 65,112, 86,118,110,122,105,
505 41, 57, 83, 43, 46,102, 40, 89, 38,103, 45, 50, 42,123, 91, 35,
506 125, 55, 54, 66,124,126, 59, 47, 92, 71,115, 78, 88,107,106, 56,
507 36,121,117,104,101,100, 69, 73, 99, 63, 94, 93, 39, 37, 61, 48,
508 58,113, 32, 90, 44, 98, 60, 51, 33, 97, 62, 77, 84, 80, 85,223,
509 225,216,187,166,229,189,222,188,141,249,148,200,184,136,248,190,
510 199,170,181,204,138,232,218,183,255,234,220,247,213,203,226,193,
511 174,172,228,252,217,201,131,230,197,211,145,238,161,179,160,212,
512 207,221,254,173,202,146,224,151,140,196,205,130,135,133,143,246,
513 192,159,244,239,185,168,215,144,139,165,180,157,147,186,214,176,
514 227,231,219,169,175,156,206,198,129,164,150,210,154,177,134,127,
515 182,128,158,208,162,132,167,209,149,241,153,251,237,236,171,195,
516 243,233,253,240,194,250,191,155,142,137,245,235,163,242,178,152
517 );
518
519 for ($i = 0; $i < $pass_len; $i++) {
520 $scrambled .= pack("C", $shifts[ord($pass_arr[$i])]);
521 }
522
523 return $scrambled;
524 }
525
526 package main;
527
528 my $cvs = CVSconn->new($opt_d, $cvs_tree);
529
530
531 sub pdate($) {
532 my ($d) = @_;
533 m#(\d{2,4})/(\d\d)/(\d\d)\s(\d\d):(\d\d)(?::(\d\d))?#
534 or die "Unparseable date: $d\n";
535 my $y=$1; $y-=1900 if $y>1900;
536 return timegm($6||0,$5,$4,$3,$2-1,$y);
537 }
538
539 sub pmode($) {
540 my ($mode) = @_;
541 my $m = 0;
542 my $mm = 0;
543 my $um = 0;
544 for my $x(split(//,$mode)) {
545 if ($x eq ",") {
546 $m |= $mm&$um;
547 $mm = 0;
548 $um = 0;
549 } elsif ($x eq "u") { $um |= 0700;
550 } elsif ($x eq "g") { $um |= 0070;
551 } elsif ($x eq "o") { $um |= 0007;
552 } elsif ($x eq "r") { $mm |= 0444;
553 } elsif ($x eq "w") { $mm |= 0222;
554 } elsif ($x eq "x") { $mm |= 0111;
555 } elsif ($x eq "=") { # do nothing
556 } else { die "Unknown mode: $mode\n";
557 }
558 }
559 $m |= $mm&$um;
560 return $m;
561 }
562
563 sub getwd() {
564 my $pwd = `pwd`;
565 chomp $pwd;
566 return $pwd;
567 }
568
569 sub is_sha1 {
570 my $s = shift;
571 return $s =~ /^[a-f0-9]{40}$/;
572 }
573
574 sub get_headref ($) {
575 my $name = shift;
576 my $r = `git rev-parse --verify '$name' 2>/dev/null`;
577 return undef unless $? == 0;
578 chomp $r;
579 return $r;
580 }
581
582 my $user_filename_prepend = '';
583 sub munge_user_filename {
584 my $name = shift;
585 return File::Spec->file_name_is_absolute($name) ?
586 $name :
587 $user_filename_prepend . $name;
588 }
589
590 -d $git_tree
591 or mkdir($git_tree,0777)
592 or die "Could not create $git_tree: $!";
593 if ($git_tree ne '.') {
594 $user_filename_prepend = getwd() . '/';
595 chdir($git_tree);
596 }
597
598 my $last_branch = "";
599 my $orig_branch = "";
600 my %branch_date;
601 my $tip_at_start = undef;
602
603 my $git_dir = $ENV{"GIT_DIR"} || ".git";
604 $git_dir = getwd()."/".$git_dir unless $git_dir =~ m#^/#;
605 $ENV{"GIT_DIR"} = $git_dir;
606 my $orig_git_index;
607 $orig_git_index = $ENV{GIT_INDEX_FILE} if exists $ENV{GIT_INDEX_FILE};
608
609 my %index; # holds filenames of one index per branch
610
611 unless (-d $git_dir) {
612 system(qw(git init));
613 die "Cannot init the GIT db at $git_tree: $?\n" if $?;
614 system(qw(git read-tree));
615 die "Cannot init an empty tree: $?\n" if $?;
616
617 $last_branch = $opt_o;
618 $orig_branch = "";
619 } else {
620 open(F, "-|", qw(git symbolic-ref HEAD)) or
621 die "Cannot run git symbolic-ref: $!\n";
622 chomp ($last_branch = <F>);
623 $last_branch = basename($last_branch);
624 close(F);
625 unless ($last_branch) {
626 warn "Cannot read the last branch name: $! -- assuming 'master'\n";
627 $last_branch = "master";
628 }
629 $orig_branch = $last_branch;
630 $tip_at_start = `git rev-parse --verify HEAD`;
631
632 # Get the last import timestamps
633 my $fmt = '($ref, $author) = (%(refname), %(author));';
634 my @cmd = ('git', 'for-each-ref', '--perl', "--format=$fmt", $remote);
635 open(H, "-|", @cmd) or die "Cannot run git for-each-ref: $!\n";
636 while (defined(my $entry = <H>)) {
637 my ($ref, $author);
638 eval($entry) || die "cannot eval refs list: $@";
639 my ($head) = ($ref =~ m|^$remote/(.*)|);
640 $author =~ /^.*\s(\d+)\s[-+]\d{4}$/;
641 $branch_date{$head} = $1;
642 }
643 close(H);
644 if (!exists $branch_date{$opt_o}) {
645 die "Branch '$opt_o' does not exist.\n".
646 "Either use the correct '-o branch' option,\n".
647 "or import to a new repository.\n";
648 }
649 }
650
651 -d $git_dir
652 or die "Could not create git subdir ($git_dir).\n";
653
654 # now we read (and possibly save) author-info as well
655 -f "$git_dir/cvs-authors" and
656 read_author_info("$git_dir/cvs-authors");
657 if ($opt_A) {
658 read_author_info(munge_user_filename($opt_A));
659 write_author_info("$git_dir/cvs-authors");
660 }
661
662
663 #
664 # run cvsps into a file unless we are getting
665 # it passed as a file via $opt_P
666 #
667 my $cvspsfile;
668 unless ($opt_P) {
669 print "Running cvsps...\n" if $opt_v;
670 my $pid = open(CVSPS,"-|");
671 my $cvspsfh;
672 die "Cannot fork: $!\n" unless defined $pid;
673 unless ($pid) {
674 my @opt;
675 @opt = split(/,/,$opt_p) if defined $opt_p;
676 unshift @opt, '-z', $opt_z if defined $opt_z;
677 unshift @opt, '-q' unless defined $opt_v;
678 unless (defined($opt_p) && $opt_p =~ m/--no-cvs-direct/) {
679 push @opt, '--cvs-direct';
680 }
681 exec("cvsps","--norc",@opt,"-u","-A",'--root',$opt_d,$cvs_tree);
682 die "Could not start cvsps: $!\n";
683 }
684 ($cvspsfh, $cvspsfile) = tempfile('gitXXXXXX', SUFFIX => '.cvsps',
685 DIR => File::Spec->tmpdir());
686 while (<CVSPS>) {
687 print $cvspsfh $_;
688 }
689 close CVSPS;
690 $? == 0 or die "git cvsimport: fatal: cvsps reported error\n";
691 close $cvspsfh;
692 } else {
693 $cvspsfile = munge_user_filename($opt_P);
694 }
695
696 open(CVS, "<$cvspsfile") or die $!;
697
698 ## cvsps output:
699 #---------------------
700 #PatchSet 314
701 #Date: 1999/09/18 13:03:59
702 #Author: wkoch
703 #Branch: STABLE-BRANCH-1-0
704 #Ancestor branch: HEAD
705 #Tag: (none)
706 #Log:
707 # See ChangeLog: Sat Sep 18 13:03:28 CEST 1999 Werner Koch
708 #Members:
709 # README:1.57->1.57.2.1
710 # VERSION:1.96->1.96.2.1
711 #
712 #---------------------
713
714 my $state = 0;
715
716 sub update_index (\@\@) {
717 my $old = shift;
718 my $new = shift;
719 open(my $fh, '|-', qw(git update-index -z --index-info))
720 or die "unable to open git update-index: $!";
721 print $fh
722 (map { "0 0000000000000000000000000000000000000000\t$_\0" }
723 @$old),
724 (map { '100' . sprintf('%o', $_->[0]) . " $_->[1]\t$_->[2]\0" }
725 @$new)
726 or die "unable to write to git update-index: $!";
727 close $fh
728 or die "unable to write to git update-index: $!";
729 $? and die "git update-index reported error: $?";
730 }
731
732 sub write_tree () {
733 open(my $fh, '-|', qw(git write-tree))
734 or die "unable to open git write-tree: $!";
735 chomp(my $tree = <$fh>);
736 is_sha1($tree)
737 or die "Cannot get tree id ($tree): $!";
738 close($fh)
739 or die "Error running git write-tree: $?\n";
740 print "Tree ID $tree\n" if $opt_v;
741 return $tree;
742 }
743
744 my ($patchset,$date,$author_name,$author_email,$branch,$ancestor,$tag,$logmsg);
745 my (@old,@new,@skipped,%ignorebranch);
746
747 # commits that cvsps cannot place anywhere...
748 $ignorebranch{'#CVSPS_NO_BRANCH'} = 1;
749
750 sub commit {
751 if ($branch eq $opt_o && !$index{branch} &&
752 !get_headref("$remote/$branch")) {
753 # looks like an initial commit
754 # use the index primed by git init
755 $ENV{GIT_INDEX_FILE} = "$git_dir/index";
756 $index{$branch} = "$git_dir/index";
757 } else {
758 # use an index per branch to speed up
759 # imports of projects with many branches
760 unless ($index{$branch}) {
761 $index{$branch} = tmpnam();
762 $ENV{GIT_INDEX_FILE} = $index{$branch};
763 if ($ancestor) {
764 system("git", "read-tree", "$remote/$ancestor");
765 } else {
766 system("git", "read-tree", "$remote/$branch");
767 }
768 die "read-tree failed: $?\n" if $?;
769 }
770 }
771 $ENV{GIT_INDEX_FILE} = $index{$branch};
772
773 update_index(@old, @new);
774 @old = @new = ();
775 my $tree = write_tree();
776 my $parent = get_headref("$remote/$last_branch");
777 print "Parent ID " . ($parent ? $parent : "(empty)") . "\n" if $opt_v;
778
779 my @commit_args;
780 push @commit_args, ("-p", $parent) if $parent;
781
782 # loose detection of merges
783 # based on the commit msg
784 foreach my $rx (@mergerx) {
785 next unless $logmsg =~ $rx && $1;
786 my $mparent = $1 eq 'HEAD' ? $opt_o : $1;
787 if (my $sha1 = get_headref("$remote/$mparent")) {
788 push @commit_args, '-p', "$remote/$mparent";
789 print "Merge parent branch: $mparent\n" if $opt_v;
790 }
791 }
792
793 my $commit_date = strftime("+0000 %Y-%m-%d %H:%M:%S",gmtime($date));
794 $ENV{GIT_AUTHOR_NAME} = $author_name;
795 $ENV{GIT_AUTHOR_EMAIL} = $author_email;
796 $ENV{GIT_AUTHOR_DATE} = $commit_date;
797 $ENV{GIT_COMMITTER_NAME} = $author_name;
798 $ENV{GIT_COMMITTER_EMAIL} = $author_email;
799 $ENV{GIT_COMMITTER_DATE} = $commit_date;
800 my $pid = open2(my $commit_read, my $commit_write,
801 'git', 'commit-tree', $tree, @commit_args);
802
803 # compatibility with git2cvs
804 substr($logmsg,32767) = "" if length($logmsg) > 32767;
805 $logmsg =~ s/[\s\n]+\z//;
806
807 if (@skipped) {
808 $logmsg .= "\n\n\nSKIPPED:\n\t";
809 $logmsg .= join("\n\t", @skipped) . "\n";
810 @skipped = ();
811 }
812
813 print($commit_write "$logmsg\n") && close($commit_write)
814 or die "Error writing to git commit-tree: $!\n";
815
816 print "Committed patch $patchset ($branch $commit_date)\n" if $opt_v;
817 chomp(my $cid = <$commit_read>);
818 is_sha1($cid) or die "Cannot get commit id ($cid): $!\n";
819 print "Commit ID $cid\n" if $opt_v;
820 close($commit_read);
821
822 waitpid($pid,0);
823 die "Error running git commit-tree: $?\n" if $?;
824
825 system('git' , 'update-ref', "$remote/$branch", $cid) == 0
826 or die "Cannot write branch $branch for update: $!\n";
827
828 if ($tag) {
829 my ($xtag) = $tag;
830 $xtag =~ s/\s+\*\*.*$//; # Remove stuff like ** INVALID ** and ** FUNKY **
831 $xtag =~ tr/_/\./ if ( $opt_u );
832 $xtag =~ s/[\/]/$opt_s/g;
833 $xtag =~ s/\[//g;
834
835 system('git' , 'tag', '-f', $xtag, $cid) == 0
836 or die "Cannot create tag $xtag: $!\n";
837
838 print "Created tag '$xtag' on '$branch'\n" if $opt_v;
839 }
840 };
841
842 my $commitcount = 1;
843 while (<CVS>) {
844 chomp;
845 if ($state == 0 and /^-+$/) {
846 $state = 1;
847 } elsif ($state == 0) {
848 $state = 1;
849 redo;
850 } elsif (($state==0 or $state==1) and s/^PatchSet\s+//) {
851 $patchset = 0+$_;
852 $state=2;
853 } elsif ($state == 2 and s/^Date:\s+//) {
854 $date = pdate($_);
855 unless ($date) {
856 print STDERR "Could not parse date: $_\n";
857 $state=0;
858 next;
859 }
860 $state=3;
861 } elsif ($state == 3 and s/^Author:\s+//) {
862 s/\s+$//;
863 if (/^(.*?)\s+<(.*)>/) {
864 ($author_name, $author_email) = ($1, $2);
865 } elsif ($conv_author_name{$_}) {
866 $author_name = $conv_author_name{$_};
867 $author_email = $conv_author_email{$_};
868 } else {
869 $author_name = $author_email = $_;
870 }
871 $state = 4;
872 } elsif ($state == 4 and s/^Branch:\s+//) {
873 s/\s+$//;
874 tr/_/\./ if ( $opt_u );
875 s/[\/]/$opt_s/g;
876 $branch = $_;
877 $state = 5;
878 } elsif ($state == 5 and s/^Ancestor branch:\s+//) {
879 s/\s+$//;
880 $ancestor = $_;
881 $ancestor = $opt_o if $ancestor eq "HEAD";
882 $state = 6;
883 } elsif ($state == 5) {
884 $ancestor = undef;
885 $state = 6;
886 redo;
887 } elsif ($state == 6 and s/^Tag:\s+//) {
888 s/\s+$//;
889 if ($_ eq "(none)") {
890 $tag = undef;
891 } else {
892 $tag = $_;
893 }
894 $state = 7;
895 } elsif ($state == 7 and /^Log:/) {
896 $logmsg = "";
897 $state = 8;
898 } elsif ($state == 8 and /^Members:/) {
899 $branch = $opt_o if $branch eq "HEAD";
900 if (defined $branch_date{$branch} and $branch_date{$branch} >= $date) {
901 # skip
902 print "skip patchset $patchset: $date before $branch_date{$branch}\n" if $opt_v;
903 $state = 11;
904 next;
905 }
906 if (!$opt_a && $starttime - 300 - (defined $opt_z ? $opt_z : 300) <= $date) {
907 # skip if the commit is too recent
908 # given that the cvsps default fuzz is 300s, we give ourselves another
909 # 300s just in case -- this also prevents skipping commits
910 # due to server clock drift
911 print "skip patchset $patchset: $date too recent\n" if $opt_v;
912 $state = 11;
913 next;
914 }
915 if (exists $ignorebranch{$branch}) {
916 print STDERR "Skipping $branch\n";
917 $state = 11;
918 next;
919 }
920 if ($ancestor) {
921 if ($ancestor eq $branch) {
922 print STDERR "Branch $branch erroneously stems from itself -- changed ancestor to $opt_o\n";
923 $ancestor = $opt_o;
924 }
925 if (defined get_headref("$remote/$branch")) {
926 print STDERR "Branch $branch already exists!\n";
927 $state=11;
928 next;
929 }
930 my $id = get_headref("$remote/$ancestor");
931 if (!$id) {
932 print STDERR "Branch $ancestor does not exist!\n";
933 $ignorebranch{$branch} = 1;
934 $state=11;
935 next;
936 }
937
938 system(qw(git update-ref -m cvsimport),
939 "$remote/$branch", $id);
940 if($? != 0) {
941 print STDERR "Could not create branch $branch\n";
942 $ignorebranch{$branch} = 1;
943 $state=11;
944 next;
945 }
946 }
947 $last_branch = $branch if $branch ne $last_branch;
948 $state = 9;
949 } elsif ($state == 8) {
950 $logmsg .= "$_\n";
951 } elsif ($state == 9 and /^\s+(.+?):(INITIAL|\d+(?:\.\d+)+)->(\d+(?:\.\d+)+)\s*$/) {
952 # VERSION:1.96->1.96.2.1
953 my $init = ($2 eq "INITIAL");
954 my $fn = $1;
955 my $rev = $3;
956 $fn =~ s#^/+##;
957 if ($opt_S && $fn =~ m/$opt_S/) {
958 print "SKIPPING $fn v $rev\n";
959 push(@skipped, $fn);
960 next;
961 }
962 print "Fetching $fn v $rev\n" if $opt_v;
963 my ($tmpname, $size) = $cvs->file($fn,$rev);
964 if ($size == -1) {
965 push(@old,$fn);
966 print "Drop $fn\n" if $opt_v;
967 } else {
968 print "".($init ? "New" : "Update")." $fn: $size bytes\n" if $opt_v;
969 my $pid = open(my $F, '-|');
970 die $! unless defined $pid;
971 if (!$pid) {
972 exec("git", "hash-object", "-w", $tmpname)
973 or die "Cannot create object: $!\n";
974 }
975 my $sha = <$F>;
976 chomp $sha;
977 close $F;
978 my $mode = pmode($cvs->{'mode'});
979 push(@new,[$mode, $sha, $fn]); # may be resurrected!
980 }
981 unlink($tmpname);
982 } elsif ($state == 9 and /^\s+(.+?):\d+(?:\.\d+)+->(\d+(?:\.\d+)+)\(DEAD\)\s*$/) {
983 my $fn = $1;
984 $fn =~ s#^/+##;
985 push(@old,$fn);
986 print "Delete $fn\n" if $opt_v;
987 } elsif ($state == 9 and /^\s*$/) {
988 $state = 10;
989 } elsif (($state == 9 or $state == 10) and /^-+$/) {
990 $commitcount++;
991 if ($opt_L && $commitcount > $opt_L) {
992 last;
993 }
994 commit();
995 if (($commitcount & 1023) == 0) {
996 system(qw(git repack -a -d));
997 }
998 $state = 1;
999 } elsif ($state == 11 and /^-+$/) {
1000 $state = 1;
1001 } elsif (/^-+$/) { # end of unknown-line processing
1002 $state = 1;
1003 } elsif ($state != 11) { # ignore stuff when skipping
1004 print STDERR "* UNKNOWN LINE * $_\n";
1005 }
1006 }
1007 commit() if $branch and $state != 11;
1008
1009 unless ($opt_P) {
1010 unlink($cvspsfile);
1011 }
1012
1013 # The heuristic of repacking every 1024 commits can leave a
1014 # lot of unpacked data. If there is more than 1MB worth of
1015 # not-packed objects, repack once more.
1016 my $line = `git count-objects`;
1017 if ($line =~ /^(\d+) objects, (\d+) kilobytes$/) {
1018 my ($n_objects, $kb) = ($1, $2);
1019 1024 < $kb
1020 and system(qw(git repack -a -d));
1021 }
1022
1023 foreach my $git_index (values %index) {
1024 if ($git_index ne "$git_dir/index") {
1025 unlink($git_index);
1026 }
1027 }
1028
1029 if (defined $orig_git_index) {
1030 $ENV{GIT_INDEX_FILE} = $orig_git_index;
1031 } else {
1032 delete $ENV{GIT_INDEX_FILE};
1033 }
1034
1035 # Now switch back to the branch we were in before all of this happened
1036 if ($orig_branch) {
1037 print "DONE.\n" if $opt_v;
1038 if ($opt_i) {
1039 exit 0;
1040 }
1041 my $tip_at_end = `git rev-parse --verify HEAD`;
1042 if ($tip_at_start ne $tip_at_end) {
1043 for ($tip_at_start, $tip_at_end) { chomp; }
1044 print "Fetched into the current branch.\n" if $opt_v;
1045 system(qw(git read-tree -u -m),
1046 $tip_at_start, $tip_at_end);
1047 die "Fast-forward update failed: $?\n" if $?;
1048 }
1049 else {
1050 system(qw(git merge cvsimport HEAD), "$remote/$opt_o");
1051 die "Could not merge $opt_o into the current branch.\n" if $?;
1052 }
1053 } else {
1054 $orig_branch = "master";
1055 print "DONE; creating $orig_branch branch\n" if $opt_v;
1056 system("git", "update-ref", "refs/heads/master", "$remote/$opt_o")
1057 unless defined get_headref('refs/heads/master');
1058 system("git", "symbolic-ref", "$remote/HEAD", "$remote/$opt_o")
1059 if ($opt_r && $opt_o ne 'HEAD');
1060 system('git', 'update-ref', 'HEAD', "$orig_branch");
1061 unless ($opt_i) {
1062 system(qw(git checkout -f));
1063 die "checkout failed: $?\n" if $?;
1064 }
1065 }