]>
Commit | Line | Data |
---|---|---|
65416758 JH |
1 | #include "cache.h" |
2 | #include "diff.h" | |
3 | #include "diffcore.h" | |
ba23bbc8 JH |
4 | |
5 | /* | |
6 | * Idea here is very simple. | |
7 | * | |
8 | * We have total of (sz-N+1) N-byte overlapping sequences in buf whose | |
9 | * size is sz. If the same N-byte sequence appears in both source and | |
10 | * destination, we say the byte that starts that sequence is shared | |
11 | * between them (i.e. copied from source to destination). | |
12 | * | |
13 | * For each possible N-byte sequence, if the source buffer has more | |
14 | * instances of it than the destination buffer, that means the | |
15 | * difference are the number of bytes not copied from source to | |
16 | * destination. If the counts are the same, everything was copied | |
17 | * from source to destination. If the destination has more, | |
18 | * everything was copied, and destination added more. | |
19 | * | |
20 | * We are doing an approximation so we do not really have to waste | |
21 | * memory by actually storing the sequence. We just hash them into | |
22 | * somewhere around 2^16 hashbuckets and count the occurrences. | |
23 | * | |
24 | * The length of the sequence is arbitrarily set to 8 for now. | |
25 | */ | |
26 | ||
27 | #define HASHBASE 65537 /* next_prime(2^16) */ | |
28 | ||
29 | static void hash_chars(unsigned char *buf, unsigned long sz, int *count) | |
65416758 | 30 | { |
ba23bbc8 | 31 | unsigned int accum1, accum2, i; |
65416758 | 32 | |
ba23bbc8 JH |
33 | /* an 8-byte shift register made of accum1 and accum2. New |
34 | * bytes come at LSB of accum2, and shifted up to accum1 | |
35 | */ | |
36 | for (i = accum1 = accum2 = 0; i < 7; i++, sz--) { | |
37 | accum1 = (accum1 << 8) | (accum2 >> 24); | |
38 | accum2 = (accum2 << 8) | *buf++; | |
39 | } | |
40 | while (sz) { | |
41 | accum1 = (accum1 << 8) | (accum2 >> 24); | |
42 | accum2 = (accum2 << 8) | *buf++; | |
43 | /* We want something that hashes permuted byte | |
44 | * sequences nicely; simpler hash like (accum1 ^ | |
45 | * accum2) does not perform as well. | |
46 | */ | |
47 | i = (accum1 + accum2 * 0x61) % HASHBASE; | |
48 | count[i]++; | |
49 | sz--; | |
65416758 | 50 | } |
65416758 JH |
51 | } |
52 | ||
53 | int diffcore_count_changes(void *src, unsigned long src_size, | |
54 | void *dst, unsigned long dst_size, | |
55 | unsigned long delta_limit, | |
56 | unsigned long *src_copied, | |
57 | unsigned long *literal_added) | |
58 | { | |
ba23bbc8 JH |
59 | int *src_count, *dst_count, i; |
60 | unsigned long sc, la; | |
61 | ||
62 | if (src_size < 8 || dst_size < 8) | |
63 | return -1; | |
64 | ||
65 | src_count = xcalloc(HASHBASE * 2, sizeof(int)); | |
66 | dst_count = src_count + HASHBASE; | |
67 | hash_chars(src, src_size, src_count); | |
68 | hash_chars(dst, dst_size, dst_count); | |
69 | ||
70 | sc = la = 0; | |
71 | for (i = 0; i < HASHBASE; i++) { | |
72 | if (src_count[i] < dst_count[i]) { | |
73 | la += dst_count[i] - src_count[i]; | |
74 | sc += src_count[i]; | |
75 | } | |
76 | else /* i.e. if (dst_count[i] <= src_count[i]) */ | |
77 | sc += dst_count[i]; | |
78 | } | |
79 | *src_copied = sc; | |
80 | *literal_added = la; | |
81 | free(src_count); | |
82 | return 0; | |
65416758 | 83 | } |