]>
Commit | Line | Data |
---|---|---|
e002a16b NP |
1 | #!/bin/bash |
2 | ||
83ba99bc | 3 | # Example script to deltify an entire GIT repository based on the commit list. |
e002a16b NP |
4 | # The most recent version of a file is the reference and previous versions |
5 | # are made delta against the best earlier version available. And so on for | |
53d4b460 NP |
6 | # successive versions going back in time. This way the increasing delta |
7 | # overhead is pushed towards older versions of any given file. | |
e002a16b NP |
8 | # |
9 | # The -d argument allows to provide a limit on the delta chain depth. | |
53d4b460 NP |
10 | # If 0 is passed then everything is undeltafied. Limiting the delta |
11 | # depth is meaningful for subsequent access performance to old revisions. | |
12 | # A value of 16 might be a good compromize between performance and good | |
13 | # space saving. Current default is unbounded. | |
14 | # | |
15 | # The --max-behind=30 argument is passed to git-mkdelta so to keep | |
16 | # combinations and memory usage bounded a bit. If you have lots of memory | |
17 | # and CPU power you may remove it (or set to 0) to let git-mkdelta find the | |
18 | # best delta match regardless of the number of revisions for a given file. | |
19 | # You can also make the value smaller to make it faster and less | |
20 | # memory hungry. A value of 5 ought to still give pretty good results. | |
21 | # When set to 0 or ommitted then look behind is unbounded. Note that | |
22 | # git-mkdelta might die with a segmentation fault in that case if it | |
23 | # runs out of memory. Note that the GIT repository will still be consistent | |
24 | # even if git-mkdelta dies unexpectedly. | |
e002a16b NP |
25 | |
26 | set -e | |
27 | ||
83ba99bc NP |
28 | max_depth= |
29 | [ "$1" == "-d" ] && max_depth="--max-depth=$2" && shift 2 | |
30 | ||
31 | overlap=30 | |
32 | max_behind="--max-behind=$overlap" | |
e002a16b | 33 | |
53d4b460 NP |
34 | function process_list() { |
35 | if [ "$list" ]; then | |
36 | echo "Processing $curr_file" | |
83ba99bc | 37 | echo "$list" | xargs git-mkdelta $max_depth $max_behind -v |
53d4b460 NP |
38 | fi |
39 | } | |
40 | ||
83ba99bc | 41 | rev_list="" |
e002a16b NP |
42 | curr_file="" |
43 | ||
44 | git-rev-list HEAD | | |
83ba99bc NP |
45 | while true; do |
46 | # Let's batch revisions into groups of 1000 to give it a chance to | |
47 | # scale with repositories containing long revision lists. We also | |
48 | # overlap with the previous batch the size of mkdelta's look behind | |
49 | # value in order to account for the processing discontinuity. | |
50 | rev_list="$(echo -e -n "$rev_list" | tail --lines=$overlap)" | |
51 | for i in $(seq 1000); do | |
52 | read rev || break | |
53 | rev_list="$rev_list$rev\n" | |
54 | done | |
55 | echo -e -n "$rev_list" | | |
56 | git-diff-tree -r -t --stdin | | |
57 | awk '/^:/ { if ($5 == "M") printf "%s %s\n%s %s\n", $4, $6, $3, $6 }' | | |
58 | LC_ALL=C sort -s -k 2 | uniq | | |
59 | while read sha1 file; do | |
60 | if [ "$file" == "$curr_file" ]; then | |
61 | list="$list $sha1" | |
62 | else | |
63 | process_list | |
64 | curr_file="$file" | |
65 | list="$sha1" | |
66 | fi | |
67 | done | |
68 | [ "$rev" ] || break | |
e002a16b | 69 | done |
53d4b460 NP |
70 | process_list |
71 | ||
72 | curr_file="root directory" | |
53d4b460 NP |
73 | list="$( |
74 | git-rev-list HEAD | | |
75 | while read commit; do | |
76 | git-cat-file commit $commit | | |
77 | sed -n 's/tree //p;Q' | |
78 | done | |
79 | )" | |
80 | process_list | |
81 |