]> git.ipfire.org Git - thirdparty/git.git/blame - git-deltafy-script
git-unpack-objects: start parsing the actual packed data
[thirdparty/git.git] / git-deltafy-script
CommitLineData
e002a16b
NP
1#!/bin/bash
2
83ba99bc 3# Example script to deltify an entire GIT repository based on the commit list.
e002a16b
NP
4# The most recent version of a file is the reference and previous versions
5# are made delta against the best earlier version available. And so on for
53d4b460
NP
6# successive versions going back in time. This way the increasing delta
7# overhead is pushed towards older versions of any given file.
e002a16b
NP
8#
9# The -d argument allows to provide a limit on the delta chain depth.
53d4b460
NP
10# If 0 is passed then everything is undeltafied. Limiting the delta
11# depth is meaningful for subsequent access performance to old revisions.
12# A value of 16 might be a good compromize between performance and good
13# space saving. Current default is unbounded.
14#
15# The --max-behind=30 argument is passed to git-mkdelta so to keep
16# combinations and memory usage bounded a bit. If you have lots of memory
17# and CPU power you may remove it (or set to 0) to let git-mkdelta find the
18# best delta match regardless of the number of revisions for a given file.
19# You can also make the value smaller to make it faster and less
20# memory hungry. A value of 5 ought to still give pretty good results.
21# When set to 0 or ommitted then look behind is unbounded. Note that
22# git-mkdelta might die with a segmentation fault in that case if it
23# runs out of memory. Note that the GIT repository will still be consistent
24# even if git-mkdelta dies unexpectedly.
e002a16b
NP
25
26set -e
27
83ba99bc
NP
28max_depth=
29[ "$1" == "-d" ] && max_depth="--max-depth=$2" && shift 2
30
31overlap=30
32max_behind="--max-behind=$overlap"
e002a16b 33
53d4b460
NP
34function process_list() {
35 if [ "$list" ]; then
36 echo "Processing $curr_file"
83ba99bc 37 echo "$list" | xargs git-mkdelta $max_depth $max_behind -v
53d4b460
NP
38 fi
39}
40
83ba99bc 41rev_list=""
e002a16b
NP
42curr_file=""
43
44git-rev-list HEAD |
83ba99bc
NP
45while true; do
46 # Let's batch revisions into groups of 1000 to give it a chance to
47 # scale with repositories containing long revision lists. We also
48 # overlap with the previous batch the size of mkdelta's look behind
49 # value in order to account for the processing discontinuity.
50 rev_list="$(echo -e -n "$rev_list" | tail --lines=$overlap)"
51 for i in $(seq 1000); do
52 read rev || break
53 rev_list="$rev_list$rev\n"
54 done
55 echo -e -n "$rev_list" |
56 git-diff-tree -r -t --stdin |
57 awk '/^:/ { if ($5 == "M") printf "%s %s\n%s %s\n", $4, $6, $3, $6 }' |
58 LC_ALL=C sort -s -k 2 | uniq |
59 while read sha1 file; do
60 if [ "$file" == "$curr_file" ]; then
61 list="$list $sha1"
62 else
63 process_list
64 curr_file="$file"
65 list="$sha1"
66 fi
67 done
68 [ "$rev" ] || break
e002a16b 69done
53d4b460
NP
70process_list
71
72curr_file="root directory"
53d4b460
NP
73list="$(
74 git-rev-list HEAD |
75 while read commit; do
76 git-cat-file commit $commit |
77 sed -n 's/tree //p;Q'
78 done
79 )"
80process_list
81