]>
Commit | Line | Data |
---|---|---|
b492bbd8 | 1 | #!/usr/bin/perl |
73f89360 BD |
2 | # |
3 | # This tool will print vaguely pretty information about a pack. It | |
3b1eb124 | 4 | # expects the output of "git verify-pack -v" as input on stdin. |
73f89360 | 5 | # |
3b1eb124 | 6 | # $ git verify-pack -v | packinfo.pl |
73f89360 BD |
7 | # |
8 | # This prints some full-pack statistics; currently "all sizes", "all | |
9 | # path sizes", "tree sizes", "tree path sizes", and "depths". | |
10 | # | |
11 | # * "all sizes" stats are across every object size in the file; | |
12 | # full sizes for base objects, and delta size for deltas. | |
13 | # * "all path sizes" stats are across all object's "path sizes". | |
14 | # A path size is the sum of the size of the delta chain, including the | |
15 | # base object. In other words, it's how many bytes need be read to | |
16 | # reassemble the file from deltas. | |
17 | # * "tree sizes" are object sizes grouped into delta trees. | |
18 | # * "tree path sizes" are path sizes grouped into delta trees. | |
19 | # * "depths" should be obvious. | |
20 | # | |
21 | # When run as: | |
22 | # | |
3b1eb124 | 23 | # $ git verify-pack -v | packinfo.pl -tree |
73f89360 BD |
24 | # |
25 | # the trees of objects are output along with the stats. This looks | |
26 | # like: | |
27 | # | |
28 | # 0 commit 031321c6... 803 803 | |
29 | # | |
30 | # 0 blob 03156f21... 1767 1767 | |
31 | # 1 blob f52a9d7f... 10 1777 | |
32 | # 2 blob a8cc5739... 51 1828 | |
33 | # 3 blob 660e90b1... 15 1843 | |
34 | # 4 blob 0cb8e3bb... 33 1876 | |
35 | # 2 blob e48607f0... 311 2088 | |
36 | # size: count 6 total 2187 min 10 max 1767 mean 364.50 median 51 std_dev 635.85 | |
37 | # path size: count 6 total 11179 min 1767 max 2088 mean 1863.17 median 1843 std_dev 107.26 | |
38 | # | |
39 | # The first number after the sha1 is the object size, the second | |
40 | # number is the path size. The statistics are across all objects in | |
41 | # the previous delta tree. Obviously they are omitted for trees of | |
42 | # one object. | |
43 | # | |
44 | # When run as: | |
45 | # | |
3b1eb124 | 46 | # $ git verify-pack -v | packinfo.pl -tree -filenames |
73f89360 BD |
47 | # |
48 | # it adds filenames to the tree. Getting this information is slow: | |
49 | # | |
50 | # 0 blob 03156f21... 1767 1767 Documentation/git-lost-found.txt @ tags/v1.2.0~142 | |
51 | # 1 blob f52a9d7f... 10 1777 Documentation/git-lost-found.txt @ tags/v1.5.0-rc1~74 | |
52 | # 2 blob a8cc5739... 51 1828 Documentation/git-lost+found.txt @ tags/v0.99.9h^0 | |
53 | # 3 blob 660e90b1... 15 1843 Documentation/git-lost+found.txt @ master~3222^2~2 | |
54 | # 4 blob 0cb8e3bb... 33 1876 Documentation/git-lost+found.txt @ master~3222^2~3 | |
55 | # 2 blob e48607f0... 311 2088 Documentation/git-lost-found.txt @ tags/v1.5.2-rc3~4 | |
56 | # size: count 6 total 2187 min 10 max 1767 mean 364.50 median 51 std_dev 635.85 | |
57 | # path size: count 6 total 11179 min 1767 max 2088 mean 1863.17 median 1843 std_dev 107.26 | |
58 | # | |
59 | # When run as: | |
60 | # | |
3b1eb124 | 61 | # $ git verify-pack -v | packinfo.pl -dump |
73f89360 BD |
62 | # |
63 | # it prints out "sha1 size pathsize depth" for each sha1 in lexical | |
64 | # order. | |
65 | # | |
66 | # 000079a2eaef17b7eae70e1f0f635557ea67b644 30 472 7 | |
67 | # 00013cafe6980411aa6fdd940784917b5ff50f0a 44 1542 4 | |
68 | # 000182eacf99cde27d5916aa415921924b82972c 499 499 0 | |
69 | # ... | |
70 | # | |
71 | # This is handy for comparing two packs. Adding "-filenames" will add | |
72 | # filenames, as per "-tree -filenames" above. | |
73 | ||
74 | use strict; | |
75 | use Getopt::Long; | |
76 | ||
77 | my $filenames = 0; | |
78 | my $tree = 0; | |
79 | my $dump = 0; | |
80 | GetOptions("tree" => \$tree, | |
81 | "filenames" => \$filenames, | |
82 | "dump" => \$dump); | |
83 | ||
84 | my %parents; | |
85 | my %children; | |
86 | my %sizes; | |
87 | my @roots; | |
88 | my %paths; | |
89 | my %types; | |
90 | my @commits; | |
91 | my %names; | |
92 | my %depths; | |
93 | my @depths; | |
94 | ||
95 | while (<STDIN>) { | |
5f4347bb | 96 | my ($sha1, $type, $size, $space, $offset, $depth, $parent) = split(/\s+/, $_); |
73f89360 BD |
97 | next unless ($sha1 =~ /^[0-9a-f]{40}$/); |
98 | $depths{$sha1} = $depth || 0; | |
99 | push(@depths, $depth || 0); | |
100 | push(@commits, $sha1) if ($type eq 'commit'); | |
101 | push(@roots, $sha1) unless $parent; | |
102 | $parents{$sha1} = $parent; | |
103 | $types{$sha1} = $type; | |
104 | push(@{$children{$parent}}, $sha1); | |
105 | $sizes{$sha1} = $size; | |
106 | } | |
107 | ||
108 | if ($filenames && ($tree || $dump)) { | |
3b1eb124 | 109 | open(NAMES, "git name-rev --all|"); |
73f89360 BD |
110 | while (<NAMES>) { |
111 | if (/^(\S+)\s+(.*)$/) { | |
112 | my ($sha1, $name) = ($1, $2); | |
113 | $names{$sha1} = $name; | |
114 | } | |
115 | } | |
116 | close NAMES; | |
117 | ||
118 | for my $commit (@commits) { | |
119 | my $name = $names{$commit}; | |
3b1eb124 | 120 | open(TREE, "git ls-tree -t -r $commit|"); |
73f89360 BD |
121 | print STDERR "Plumbing tree $name\n"; |
122 | while (<TREE>) { | |
123 | if (/^(\S+)\s+(\S+)\s+(\S+)\s+(.*)$/) { | |
124 | my ($mode, $type, $sha1, $path) = ($1, $2, $3, $4); | |
125 | $paths{$sha1} = "$path @ $name"; | |
126 | } | |
127 | } | |
128 | close TREE; | |
129 | } | |
130 | } | |
131 | ||
132 | sub stats { | |
133 | my @data = sort {$a <=> $b} @_; | |
134 | my $min = $data[0]; | |
135 | my $max = $data[$#data]; | |
136 | my $total = 0; | |
137 | my $count = scalar @data; | |
138 | for my $datum (@data) { | |
139 | $total += $datum; | |
140 | } | |
141 | my $mean = $total / $count; | |
142 | my $median = $data[int(@data / 2)]; | |
143 | my $diff_sum = 0; | |
144 | for my $datum (@data) { | |
145 | $diff_sum += ($datum - $mean)**2; | |
146 | } | |
147 | my $std_dev = sqrt($diff_sum / $count); | |
148 | return ($count, $total, $min, $max, $mean, $median, $std_dev); | |
149 | } | |
150 | ||
151 | sub print_stats { | |
152 | my $name = shift; | |
153 | my ($count, $total, $min, $max, $mean, $median, $std_dev) = stats(@_); | |
154 | printf("%s: count %s total %s min %s max %s mean %.2f median %s std_dev %.2f\n", | |
155 | $name, $count, $total, $min, $max, $mean, $median, $std_dev); | |
156 | } | |
157 | ||
158 | my @sizes; | |
159 | my @path_sizes; | |
160 | my @all_sizes; | |
161 | my @all_path_sizes; | |
162 | my %path_sizes; | |
163 | ||
164 | sub dig { | |
165 | my ($sha1, $depth, $path_size) = @_; | |
166 | $path_size += $sizes{$sha1}; | |
167 | push(@sizes, $sizes{$sha1}); | |
168 | push(@all_sizes, $sizes{$sha1}); | |
169 | push(@path_sizes, $path_size); | |
170 | push(@all_path_sizes, $path_size); | |
171 | $path_sizes{$sha1} = $path_size; | |
172 | if ($tree) { | |
173 | printf("%3d%s %6s %s %8d %8d %s\n", | |
174 | $depth, (" " x $depth), $types{$sha1}, | |
175 | $sha1, $sizes{$sha1}, $path_size, $paths{$sha1}); | |
176 | } | |
177 | for my $child (@{$children{$sha1}}) { | |
178 | dig($child, $depth + 1, $path_size); | |
179 | } | |
180 | } | |
181 | ||
182 | my @tree_sizes; | |
183 | my @tree_path_sizes; | |
184 | ||
185 | for my $root (@roots) { | |
186 | undef @sizes; | |
187 | undef @path_sizes; | |
188 | dig($root, 0, 0); | |
189 | my ($aa, $sz_total) = stats(@sizes); | |
190 | my ($bb, $psz_total) = stats(@path_sizes); | |
191 | push(@tree_sizes, $sz_total); | |
192 | push(@tree_path_sizes, $psz_total); | |
193 | if ($tree) { | |
194 | if (@sizes > 1) { | |
195 | print_stats(" size", @sizes); | |
196 | print_stats("path size", @path_sizes); | |
197 | } | |
198 | print "\n"; | |
199 | } | |
200 | } | |
201 | ||
202 | if ($dump) { | |
203 | for my $sha1 (sort keys %sizes) { | |
204 | print "$sha1 $sizes{$sha1} $path_sizes{$sha1} $depths{$sha1} $paths{$sha1}\n"; | |
205 | } | |
206 | } else { | |
207 | print_stats(" all sizes", @all_sizes); | |
208 | print_stats(" all path sizes", @all_path_sizes); | |
209 | print_stats(" tree sizes", @tree_sizes); | |
210 | print_stats("tree path sizes", @tree_path_sizes); | |
211 | print_stats(" depths", @depths); | |
212 | } |