]>
Commit | Line | Data |
---|---|---|
2834bc27 VM |
1 | #ifndef PACK_OBJECTS_H |
2 | #define PACK_OBJECTS_H | |
3 | ||
43fa44fa | 4 | #include "object-store.h" |
9ac3f0e5 | 5 | #include "thread-utils.h" |
ef3ca954 | 6 | #include "pack.h" |
43fa44fa | 7 | |
7c141127 NTND |
8 | struct repository; |
9 | ||
9806f5a7 NTND |
10 | #define DEFAULT_DELTA_CACHE_SIZE (256 * 1024 * 1024) |
11 | ||
0c6804ab | 12 | #define OE_DFS_STATE_BITS 2 |
b5c0cbd8 | 13 | #define OE_DEPTH_BITS 12 |
43fa44fa | 14 | #define OE_IN_PACK_BITS 10 |
0cb3c142 | 15 | #define OE_Z_DELTA_BITS 20 |
ac77d0c3 NTND |
16 | /* |
17 | * Note that oe_set_size() becomes expensive when the given size is | |
18 | * above this limit. Don't lower it too much. | |
19 | */ | |
20 | #define OE_SIZE_BITS 31 | |
9ac3f0e5 | 21 | #define OE_DELTA_SIZE_BITS 23 |
0c6804ab NTND |
22 | |
23 | /* | |
24 | * State flags for depth-first search used for analyzing delta cycles. | |
25 | * | |
26 | * The depth is measured in delta-links to the base (so if A is a delta | |
27 | * against B, then A has a depth of 1, and B a depth of 0). | |
28 | */ | |
29 | enum dfs_state { | |
30 | DFS_NONE = 0, | |
31 | DFS_ACTIVE, | |
32 | DFS_DONE, | |
33 | DFS_NUM_STATES | |
34 | }; | |
35 | ||
8d6ccce1 | 36 | /* |
3b13a5f2 NTND |
37 | * The size of struct nearly determines pack-objects's memory |
38 | * consumption. This struct is packed tight for that reason. When you | |
39 | * add or reorder something in this struct, think a bit about this. | |
40 | * | |
8d6ccce1 NTND |
41 | * basic object info |
42 | * ----------------- | |
43 | * idx.oid is filled up before delta searching starts. idx.crc32 is | |
44 | * only valid after the object is written out and will be used for | |
45 | * generating the index. idx.offset will be both gradually set and | |
46 | * used in writing phase (base objects get offset first, then deltas | |
47 | * refer to them) | |
48 | * | |
49 | * "size" is the uncompressed object size. Compressed size of the raw | |
50 | * data for an object in a pack is not stored anywhere but is computed | |
27a7d067 NTND |
51 | * and made available when reverse .idx is made. Note that when a |
52 | * delta is reused, "size" is the uncompressed _delta_ size, not the | |
53 | * canonical one after the delta has been applied. | |
8d6ccce1 NTND |
54 | * |
55 | * "hash" contains a path name hash which is used for sorting the | |
56 | * delta list and also during delta searching. Once prepare_pack() | |
57 | * returns it's no longer needed. | |
58 | * | |
59 | * source pack info | |
60 | * ---------------- | |
61 | * The (in_pack, in_pack_offset) tuple contains the location of the | |
62 | * object in the source pack. in_pack_header_size allows quickly | |
63 | * skipping the header and going straight to the zlib stream. | |
64 | * | |
65 | * "type" and "in_pack_type" both describe object type. in_pack_type | |
66 | * may contain a delta type, while type is always the canonical type. | |
67 | * | |
68 | * deltas | |
69 | * ------ | |
70 | * Delta links (delta, delta_child and delta_sibling) are created to | |
71 | * reflect that delta graph from the source pack then updated or added | |
72 | * during delta searching phase when we find better deltas. | |
73 | * | |
74 | * delta_child and delta_sibling are last needed in | |
75 | * compute_write_order(). "delta" and "delta_size" must remain valid | |
76 | * at object writing phase in case the delta is not cached. | |
77 | * | |
78 | * If a delta is cached in memory and is compressed, delta_data points | |
79 | * to the data and z_delta_size contains the compressed size. If it's | |
80 | * uncompressed [1], z_delta_size must be zero. delta_size is always | |
81 | * the uncompressed size and must be valid even if the delta is not | |
82 | * cached. | |
83 | * | |
84 | * [1] during try_delta phase we don't bother with compressing because | |
85 | * the delta could be quickly replaced with a better one. | |
86 | */ | |
2834bc27 VM |
87 | struct object_entry { |
88 | struct pack_idx_entry idx; | |
2834bc27 | 89 | void *delta_data; /* cached delta (uncompressed) */ |
3b13a5f2 | 90 | off_t in_pack_offset; |
2834bc27 | 91 | uint32_t hash; /* name hint hash */ |
ac77d0c3 NTND |
92 | unsigned size_:OE_SIZE_BITS; |
93 | unsigned size_valid:1; | |
898eba5e NTND |
94 | uint32_t delta_idx; /* delta base object */ |
95 | uint32_t delta_child_idx; /* deltified objects who bases me */ | |
96 | uint32_t delta_sibling_idx; /* other deltified objects who | |
97 | * uses the same base as me | |
98 | */ | |
0aca34e8 NTND |
99 | unsigned delta_size_:OE_DELTA_SIZE_BITS; /* delta data size (uncompressed) */ |
100 | unsigned delta_size_valid:1; | |
9ac3f0e5 | 101 | unsigned char in_pack_header_size; |
3b13a5f2 | 102 | unsigned in_pack_idx:OE_IN_PACK_BITS; /* already in pack */ |
0cb3c142 | 103 | unsigned z_delta_size:OE_Z_DELTA_BITS; |
3b13a5f2 | 104 | unsigned type_valid:1; |
3b13a5f2 | 105 | unsigned no_try_delta:1; |
9ac3f0e5 | 106 | unsigned type_:TYPE_BITS; |
fd9b1bae | 107 | unsigned in_pack_type:TYPE_BITS; /* could be delta */ |
c8d521fa | 108 | |
2834bc27 VM |
109 | unsigned preferred_base:1; /* |
110 | * we do not pack this, but is available | |
111 | * to be used as the base object to delta | |
112 | * objects against. | |
113 | */ | |
2834bc27 VM |
114 | unsigned tagged:1; /* near the very tip of refs */ |
115 | unsigned filled:1; /* assigned write-order */ | |
0c6804ab | 116 | unsigned dfs_state:OE_DFS_STATE_BITS; |
b5c0cbd8 | 117 | unsigned depth:OE_DEPTH_BITS; |
6a1e32d5 | 118 | unsigned ext_base:1; /* delta_idx points outside packlist */ |
4cf2143e JK |
119 | |
120 | /* | |
3b13a5f2 | 121 | * pahole results on 64-bit linux (gcc and clang) |
7dbabbbe | 122 | * |
9ac3f0e5 | 123 | * size: 80, bit_padding: 9 bits |
3b13a5f2 NTND |
124 | * |
125 | * and on 32-bit (gcc) | |
126 | * | |
9ac3f0e5 | 127 | * size: 76, bit_padding: 9 bits |
4cf2143e | 128 | */ |
2834bc27 VM |
129 | }; |
130 | ||
131 | struct packing_data { | |
7c141127 | 132 | struct repository *repo; |
2834bc27 VM |
133 | struct object_entry *objects; |
134 | uint32_t nr_objects, nr_alloc; | |
135 | ||
136 | int32_t *index; | |
137 | uint32_t index_size; | |
06af3bba NTND |
138 | |
139 | unsigned int *in_pack_pos; | |
9ac3f0e5 | 140 | unsigned long *delta_size; |
43fa44fa NTND |
141 | |
142 | /* | |
143 | * Only one of these can be non-NULL and they have different | |
144 | * sizes. if in_pack_by_idx is allocated, oe_in_pack() returns | |
145 | * the pack of an object using in_pack_idx field. If not, | |
146 | * in_pack[] array is used the same way as in_pack_pos[] | |
147 | */ | |
148 | struct packed_git **in_pack_by_idx; | |
149 | struct packed_git **in_pack; | |
ac77d0c3 | 150 | |
edb673cf PH |
151 | /* |
152 | * During packing with multiple threads, protect the in-core | |
153 | * object database from concurrent accesses. | |
154 | */ | |
155 | pthread_mutex_t odb_lock; | |
9ac3f0e5 | 156 | |
6a1e32d5 JK |
157 | /* |
158 | * This list contains entries for bases which we know the other side | |
159 | * has (e.g., via reachability bitmaps), but which aren't in our | |
160 | * "objects" list. | |
161 | */ | |
162 | struct object_entry *ext_bases; | |
163 | uint32_t nr_ext, alloc_ext; | |
164 | ||
ac77d0c3 | 165 | uintmax_t oe_size_limit; |
9ac3f0e5 | 166 | uintmax_t oe_delta_size_limit; |
108f5303 CC |
167 | |
168 | /* delta islands */ | |
169 | unsigned int *tree_depth; | |
fe0ac2fb | 170 | unsigned char *layer; |
5dfaf49a TB |
171 | |
172 | /* | |
173 | * Used when writing cruft packs. | |
174 | * | |
175 | * Object mtimes are stored in pack order when writing, but | |
176 | * written out in lexicographic (index) order. | |
177 | */ | |
178 | uint32_t *cruft_mtime; | |
2834bc27 VM |
179 | }; |
180 | ||
7c141127 | 181 | void prepare_packing_data(struct repository *r, struct packing_data *pdata); |
9ac3f0e5 | 182 | |
edb673cf | 183 | /* Protect access to object database */ |
9ac3f0e5 NTND |
184 | static inline void packing_data_lock(struct packing_data *pdata) |
185 | { | |
edb673cf | 186 | pthread_mutex_lock(&pdata->odb_lock); |
9ac3f0e5 NTND |
187 | } |
188 | static inline void packing_data_unlock(struct packing_data *pdata) | |
189 | { | |
edb673cf | 190 | pthread_mutex_unlock(&pdata->odb_lock); |
9ac3f0e5 NTND |
191 | } |
192 | ||
2834bc27 | 193 | struct object_entry *packlist_alloc(struct packing_data *pdata, |
3a37876b | 194 | const struct object_id *oid); |
2834bc27 VM |
195 | |
196 | struct object_entry *packlist_find(struct packing_data *pdata, | |
3a37876b | 197 | const struct object_id *oid); |
2834bc27 | 198 | |
68fb36eb VM |
199 | static inline uint32_t pack_name_hash(const char *name) |
200 | { | |
201 | uint32_t c, hash = 0; | |
202 | ||
203 | if (!name) | |
204 | return 0; | |
205 | ||
206 | /* | |
207 | * This effectively just creates a sortable number from the | |
208 | * last sixteen non-whitespace characters. Last characters | |
209 | * count "most", so things that end in ".c" sort together. | |
210 | */ | |
211 | while ((c = *name++) != 0) { | |
212 | if (isspace(c)) | |
213 | continue; | |
214 | hash = (hash >> 2) + (c << 24); | |
215 | } | |
216 | return hash; | |
217 | } | |
218 | ||
fd9b1bae NTND |
219 | static inline enum object_type oe_type(const struct object_entry *e) |
220 | { | |
221 | return e->type_valid ? e->type_ : OBJ_BAD; | |
222 | } | |
223 | ||
224 | static inline void oe_set_type(struct object_entry *e, | |
225 | enum object_type type) | |
226 | { | |
227 | if (type >= OBJ_ANY) | |
228 | BUG("OBJ_ANY cannot be set in pack-objects code"); | |
229 | ||
230 | e->type_valid = type >= OBJ_NONE; | |
231 | e->type_ = (unsigned)type; | |
232 | } | |
233 | ||
06af3bba NTND |
234 | static inline unsigned int oe_in_pack_pos(const struct packing_data *pack, |
235 | const struct object_entry *e) | |
236 | { | |
237 | return pack->in_pack_pos[e - pack->objects]; | |
238 | } | |
239 | ||
240 | static inline void oe_set_in_pack_pos(const struct packing_data *pack, | |
241 | const struct object_entry *e, | |
242 | unsigned int pos) | |
243 | { | |
244 | pack->in_pack_pos[e - pack->objects] = pos; | |
245 | } | |
246 | ||
43fa44fa NTND |
247 | static inline struct packed_git *oe_in_pack(const struct packing_data *pack, |
248 | const struct object_entry *e) | |
249 | { | |
250 | if (pack->in_pack_by_idx) | |
251 | return pack->in_pack_by_idx[e->in_pack_idx]; | |
252 | else | |
253 | return pack->in_pack[e - pack->objects]; | |
254 | } | |
255 | ||
c409d108 JK |
256 | void oe_map_new_pack(struct packing_data *pack); |
257 | ||
43fa44fa NTND |
258 | static inline void oe_set_in_pack(struct packing_data *pack, |
259 | struct object_entry *e, | |
260 | struct packed_git *p) | |
261 | { | |
f66e0401 JK |
262 | if (pack->in_pack_by_idx) { |
263 | if (p->index) { | |
264 | e->in_pack_idx = p->index; | |
265 | return; | |
266 | } | |
267 | /* | |
268 | * We're accessing packs by index, but this pack doesn't have | |
269 | * an index (e.g., because it was added since we created the | |
270 | * in_pack_by_idx array). Bail to oe_map_new_pack(), which | |
271 | * will convert us to using the full in_pack array, and then | |
272 | * fall through to our in_pack handling. | |
273 | */ | |
c409d108 | 274 | oe_map_new_pack(pack); |
f66e0401 JK |
275 | } |
276 | pack->in_pack[e - pack->objects] = p; | |
43fa44fa NTND |
277 | } |
278 | ||
6a1e32d5 JK |
279 | void oe_set_delta_ext(struct packing_data *pack, |
280 | struct object_entry *e, | |
a93c141d | 281 | const struct object_id *oid); |
6a1e32d5 | 282 | |
108f5303 CC |
283 | static inline unsigned int oe_tree_depth(struct packing_data *pack, |
284 | struct object_entry *e) | |
285 | { | |
286 | if (!pack->tree_depth) | |
287 | return 0; | |
288 | return pack->tree_depth[e - pack->objects]; | |
289 | } | |
290 | ||
fe0ac2fb CC |
291 | static inline void oe_set_layer(struct packing_data *pack, |
292 | struct object_entry *e, | |
293 | unsigned char layer) | |
294 | { | |
295 | if (!pack->layer) | |
e159b810 | 296 | CALLOC_ARRAY(pack->layer, pack->nr_alloc); |
fe0ac2fb CC |
297 | pack->layer[e - pack->objects] = layer; |
298 | } | |
299 | ||
5dfaf49a TB |
300 | static inline uint32_t oe_cruft_mtime(struct packing_data *pack, |
301 | struct object_entry *e) | |
302 | { | |
303 | if (!pack->cruft_mtime) | |
304 | return 0; | |
305 | return pack->cruft_mtime[e - pack->objects]; | |
306 | } | |
307 | ||
308 | static inline void oe_set_cruft_mtime(struct packing_data *pack, | |
309 | struct object_entry *e, | |
310 | uint32_t mtime) | |
311 | { | |
312 | if (!pack->cruft_mtime) | |
313 | CALLOC_ARRAY(pack->cruft_mtime, pack->nr_alloc); | |
314 | pack->cruft_mtime[e - pack->objects] = mtime; | |
315 | } | |
316 | ||
2834bc27 | 317 | #endif |