]>
Commit | Line | Data |
---|---|---|
102fc37f RA |
1 | #!/usr/bin/env perl |
2 | # Copyright 2005, Ryan Anderson <ryan@michonline.com> | |
3 | # Distribution permitted under the GPL v2, as distributed | |
4 | # by the Free Software Foundation. | |
5 | # Later versions of the GPL at the discretion of Linus Torvalds | |
6 | # | |
7 | # Scan two git object-trees, and hardlink any common objects between them. | |
8 | ||
9 | use 5.006; | |
10 | use strict; | |
11 | use warnings; | |
12 | use Getopt::Long; | |
13 | ||
14 | sub get_canonical_form($); | |
15 | sub do_scan_directory($$$); | |
16 | sub compare_two_files($$); | |
17 | sub usage(); | |
18 | sub link_two_files($$); | |
19 | ||
20 | # stats | |
21 | my $total_linked = 0; | |
22 | my $total_already = 0; | |
23 | my ($linked,$already); | |
24 | ||
25 | my $fail_on_different_sizes = 0; | |
26 | my $help = 0; | |
27 | GetOptions("safe" => \$fail_on_different_sizes, | |
28 | "help" => \$help); | |
29 | ||
30 | usage() if $help; | |
31 | ||
32 | my (@dirs) = @ARGV; | |
33 | ||
34 | usage() if (!defined $dirs[0] || !defined $dirs[1]); | |
35 | ||
36 | $_ = get_canonical_form($_) foreach (@dirs); | |
37 | ||
38 | my $master_dir = pop @dirs; | |
39 | ||
40 | opendir(D,$master_dir . "objects/") | |
41 | or die "Failed to open $master_dir/objects/ : $!"; | |
42 | ||
43 | my @hashdirs = grep !/^\.{1,2}$/, readdir(D); | |
44 | ||
45 | foreach my $repo (@dirs) { | |
46 | $linked = 0; | |
47 | $already = 0; | |
48 | printf("Searching '%s' and '%s' for common objects and hardlinking them...\n", | |
49 | $master_dir,$repo); | |
50 | ||
51 | foreach my $hashdir (@hashdirs) { | |
52 | do_scan_directory($master_dir, $hashdir, $repo); | |
53 | } | |
54 | ||
55 | printf("Linked %d files, %d were already linked.\n",$linked, $already); | |
56 | ||
57 | $total_linked += $linked; | |
58 | $total_already += $already; | |
59 | } | |
60 | ||
61 | printf("Totals: Linked %d files, %d were already linked.\n", | |
62 | $total_linked, $total_already); | |
63 | ||
64 | ||
65 | sub do_scan_directory($$$) { | |
66 | my ($srcdir, $subdir, $dstdir) = @_; | |
67 | ||
68 | my $sfulldir = sprintf("%sobjects/%s/",$srcdir,$subdir); | |
69 | my $dfulldir = sprintf("%sobjects/%s/",$dstdir,$subdir); | |
70 | ||
71 | opendir(S,$sfulldir) | |
72 | or die "Failed to opendir $sfulldir: $!"; | |
73 | ||
74 | foreach my $file (grep(!/\.{1,2}$/, readdir(S))) { | |
75 | my $sfilename = $sfulldir . $file; | |
76 | my $dfilename = $dfulldir . $file; | |
77 | ||
78 | compare_two_files($sfilename,$dfilename); | |
79 | ||
80 | } | |
81 | closedir(S); | |
82 | } | |
83 | ||
84 | sub compare_two_files($$) { | |
85 | my ($sfilename, $dfilename) = @_; | |
86 | ||
87 | # Perl's stat returns relevant information as follows: | |
88 | # 0 = dev number | |
89 | # 1 = inode number | |
90 | # 7 = size | |
91 | my @sstatinfo = stat($sfilename); | |
92 | my @dstatinfo = stat($dfilename); | |
93 | ||
94 | if (@sstatinfo == 0 && @dstatinfo == 0) { | |
95 | die sprintf("Stat of both %s and %s failed: %s\n",$sfilename, $dfilename, $!); | |
96 | ||
97 | } elsif (@dstatinfo == 0) { | |
98 | return; | |
99 | } | |
100 | ||
101 | if ( ($sstatinfo[0] == $dstatinfo[0]) && | |
102 | ($sstatinfo[1] != $dstatinfo[1])) { | |
103 | if ($sstatinfo[7] == $dstatinfo[7]) { | |
104 | link_two_files($sfilename, $dfilename); | |
105 | ||
106 | } else { | |
107 | my $err = sprintf("ERROR: File sizes are not the same, cannot relink %s to %s.\n", | |
108 | $sfilename, $dfilename); | |
109 | if ($fail_on_different_sizes) { | |
110 | die $err; | |
111 | } else { | |
112 | warn $err; | |
113 | } | |
114 | } | |
115 | ||
116 | } elsif ( ($sstatinfo[0] == $dstatinfo[0]) && | |
117 | ($sstatinfo[1] == $dstatinfo[1])) { | |
118 | $already++; | |
119 | } | |
120 | } | |
121 | ||
122 | sub get_canonical_form($) { | |
123 | my $dir = shift; | |
124 | my $original = $dir; | |
125 | ||
126 | die "$dir is not a directory." unless -d $dir; | |
127 | ||
128 | $dir .= "/" unless $dir =~ m#/$#; | |
129 | $dir .= ".git/" unless $dir =~ m#\.git/$#; | |
130 | ||
131 | die "$original does not have a .git/ subdirectory.\n" unless -d $dir; | |
132 | ||
133 | return $dir; | |
134 | } | |
135 | ||
136 | sub link_two_files($$) { | |
137 | my ($sfilename, $dfilename) = @_; | |
138 | my $tmpdname = sprintf("%s.old",$dfilename); | |
139 | rename($dfilename,$tmpdname) | |
140 | or die sprintf("Failure renaming %s to %s: %s", | |
141 | $dfilename, $tmpdname, $!); | |
142 | ||
143 | if (! link($sfilename,$dfilename)) { | |
144 | my $failtxt = ""; | |
145 | unless (rename($tmpdname,$dfilename)) { | |
146 | $failtxt = sprintf( | |
147 | "Git Repository containing %s is probably corrupted, " . | |
148 | "please copy '%s' to '%s' to fix.\n", | |
149 | $tmpdname, $dfilename); | |
150 | } | |
151 | ||
152 | die sprintf("Failed to link %s to %s: %s\n%s" . | |
153 | $sfilename, $dfilename, | |
154 | $!, $dfilename, $failtxt); | |
155 | } | |
156 | ||
157 | unlink($tmpdname) | |
158 | or die sprintf("Unlink of %s failed: %s\n", | |
159 | $dfilename, $!); | |
160 | ||
161 | $linked++; | |
162 | } | |
163 | ||
164 | ||
165 | sub usage() { | |
166 | print("Usage: $0 [--safe] <dir> [<dir> ...] <master_dir> \n"); | |
167 | print("All directories should contain a .git/objects/ subdirectory.\n"); | |
168 | print("Options\n"); | |
169 | print("\t--safe\t" . | |
170 | "Stops if two objects with the same hash exist but " . | |
171 | "have different sizes. Default is to warn and continue.\n"); | |
172 | exit(1); | |
173 | } |