]>
Commit | Line | Data |
---|---|---|
90c62155 SB |
1 | #ifndef OBJECT_STORE_H |
2 | #define OBJECT_STORE_H | |
3 | ||
ef3ca954 | 4 | #include "cache.h" |
d88f9fdf | 5 | #include "oidmap.h" |
14727b7f | 6 | #include "list.h" |
fe299ec5 | 7 | #include "oid-array.h" |
14727b7f | 8 | #include "strbuf.h" |
31877c9a | 9 | #include "thread-utils.h" |
cf2dc1c2 EW |
10 | #include "khash.h" |
11 | #include "dir.h" | |
92d8ed8a | 12 | #include "oidtree.h" |
09ef6617 | 13 | #include "oidset.h" |
d88f9fdf | 14 | |
263db403 JK |
15 | struct object_directory { |
16 | struct object_directory *next; | |
0d4a1321 | 17 | |
0d4a1321 | 18 | /* |
3a2e0824 JK |
19 | * Used to store the results of readdir(3) calls when we are OK |
20 | * sacrificing accuracy due to races for speed. That includes | |
61c7711c | 21 | * object existence with OBJECT_INFO_QUICK, as well as |
3a2e0824 JK |
22 | * our search for unique abbreviated hashes. Don't use it for tasks |
23 | * requiring greater accuracy! | |
24 | * | |
25 | * Be sure to call odb_load_loose_cache() before using. | |
0d4a1321 | 26 | */ |
33f379ee | 27 | uint32_t loose_objects_subdir_seen[8]; /* 256 bits */ |
92d8ed8a | 28 | struct oidtree *loose_objects_cache; |
0d4a1321 | 29 | |
77f012e8 SB |
30 | /* |
31 | * Path to the alternative object store. If this is a relative path, | |
32 | * it is relative to the current working directory. | |
33 | */ | |
f0eaf638 | 34 | char *path; |
031dc927 | 35 | }; |
f0eaf638 | 36 | |
cf2dc1c2 | 37 | KHASH_INIT(odb_path_map, const char * /* key: odb_path */, |
dd3c8a72 | 38 | struct object_directory *, 1, fspathhash, fspatheq) |
cf2dc1c2 | 39 | |
13068bf0 | 40 | void prepare_alt_odb(struct repository *r); |
0d4a1321 | 41 | char *compute_alternate_path(const char *path, struct strbuf *err); |
f57a7396 | 42 | struct object_directory *find_odb(struct repository *r, const char *obj_dir); |
263db403 | 43 | typedef int alt_odb_fn(struct object_directory *, void *); |
0d4a1321 | 44 | int foreach_alt_odb(alt_odb_fn, void*); |
709dfa69 JK |
45 | typedef void alternate_ref_fn(const struct object_id *oid, void *); |
46 | void for_each_alternate_ref(alternate_ref_fn, void *); | |
0d4a1321 | 47 | |
0d4a1321 SB |
48 | /* |
49 | * Add the directory to the on-disk alternates file; the new entry will also | |
50 | * take effect in the current process. | |
51 | */ | |
52 | void add_to_alternates_file(const char *dir); | |
53 | ||
54 | /* | |
55 | * Add the directory to the in-memory list of alternates (along with any | |
56 | * recursive alternates it points to), but do not modify the on-disk alternates | |
57 | * file. | |
58 | */ | |
59 | void add_to_alternates_memory(const char *dir); | |
60 | ||
0000d654 RS |
61 | /* |
62 | * Populate and return the loose object cache array corresponding to the | |
63 | * given object ID. | |
64 | */ | |
92d8ed8a | 65 | struct oidtree *odb_loose_cache(struct object_directory *odb, |
0000d654 RS |
66 | const struct object_id *oid); |
67 | ||
d4e19e51 RS |
68 | /* Empty the loose object cache for the specified object directory. */ |
69 | void odb_clear_loose_cache(struct object_directory *odb); | |
70 | ||
a80d72db | 71 | struct packed_git { |
ec48540f | 72 | struct hashmap_entry packmap_ent; |
a80d72db SB |
73 | struct packed_git *next; |
74 | struct list_head mru; | |
75 | struct pack_window *windows; | |
76 | off_t pack_size; | |
77 | const void *index_data; | |
78 | size_t index_size; | |
79 | uint32_t num_objects; | |
629dffc4 | 80 | uint32_t crc_offset; |
09ef6617 | 81 | struct oidset bad_objects; |
a80d72db SB |
82 | int index_version; |
83 | time_t mtime; | |
84 | int pack_fd; | |
43fa44fa | 85 | int index; /* for builtin/pack-objects.c */ |
a80d72db SB |
86 | unsigned pack_local:1, |
87 | pack_keep:1, | |
ed7e5fc3 | 88 | pack_keep_in_core:1, |
a80d72db SB |
89 | freshened:1, |
90 | do_not_close:1, | |
af96fe33 DS |
91 | pack_promisor:1, |
92 | multi_pack_index:1; | |
538b1523 | 93 | unsigned char hash[GIT_MAX_RAWSZ]; |
a80d72db | 94 | struct revindex_entry *revindex; |
2f4ba2a8 TB |
95 | const uint32_t *revindex_data; |
96 | const uint32_t *revindex_map; | |
97 | size_t revindex_size; | |
a80d72db SB |
98 | /* something like ".git/objects/pack/xxxxx.pack" */ |
99 | char pack_name[FLEX_ARRAY]; /* more */ | |
100 | }; | |
101 | ||
4d80560c DS |
102 | struct multi_pack_index; |
103 | ||
ec48540f CS |
104 | static inline int pack_map_entry_cmp(const void *unused_cmp_data, |
105 | const struct hashmap_entry *entry, | |
106 | const struct hashmap_entry *entry2, | |
107 | const void *keydata) | |
108 | { | |
109 | const char *key = keydata; | |
110 | const struct packed_git *pg1, *pg2; | |
111 | ||
112 | pg1 = container_of(entry, const struct packed_git, packmap_ent); | |
113 | pg2 = container_of(entry2, const struct packed_git, packmap_ent); | |
114 | ||
115 | return strcmp(pg1->pack_name, key ? key : pg2->pack_name); | |
116 | } | |
117 | ||
90c62155 SB |
118 | struct raw_object_store { |
119 | /* | |
f0eaf638 JK |
120 | * Set of all object directories; the main directory is first (and |
121 | * cannot be NULL after initialization). Subsequent directories are | |
122 | * alternates. | |
90c62155 | 123 | */ |
f0eaf638 JK |
124 | struct object_directory *odb; |
125 | struct object_directory **odb_tail; | |
cf2dc1c2 EW |
126 | kh_odb_path_map_t *odb_by_path; |
127 | ||
f0eaf638 | 128 | int loaded_alternates; |
90c62155 | 129 | |
f0eaf638 JK |
130 | /* |
131 | * A list of alternate object directories loaded from the environment; | |
132 | * this should not generally need to be accessed directly, but will | |
133 | * populate the "odb" list when prepare_alt_odb() is run. | |
134 | */ | |
90c62155 | 135 | char *alternate_db; |
031dc927 | 136 | |
d88f9fdf SB |
137 | /* |
138 | * Objects that should be substituted by other objects | |
139 | * (see git-replace(1)). | |
140 | */ | |
c1274495 | 141 | struct oidmap *replace_map; |
b1fc9da1 MT |
142 | unsigned replace_map_initialized : 1; |
143 | pthread_mutex_t replace_mutex; /* protect object replace functions */ | |
d88f9fdf | 144 | |
85277506 JT |
145 | struct commit_graph *commit_graph; |
146 | unsigned commit_graph_attempted : 1; /* if loading has been attempted */ | |
147 | ||
c4d25228 DS |
148 | /* |
149 | * private data | |
150 | * | |
151 | * should only be accessed directly by packfile.c and midx.c | |
152 | */ | |
153 | struct multi_pack_index *multi_pack_index; | |
154 | ||
a80d72db SB |
155 | /* |
156 | * private data | |
157 | * | |
158 | * should only be accessed directly by packfile.c | |
159 | */ | |
160 | ||
161 | struct packed_git *packed_git; | |
162 | /* A most-recently-used ordered version of the packed_git list. */ | |
163 | struct list_head packed_git_mru; | |
5508f693 | 164 | |
20b031fe JK |
165 | struct { |
166 | struct packed_git **packs; | |
167 | unsigned flags; | |
168 | } kept_pack_cache; | |
169 | ||
ec48540f CS |
170 | /* |
171 | * A map of packfiles to packed_git structs for tracking which | |
172 | * packs have been loaded already. | |
173 | */ | |
174 | struct hashmap pack_map; | |
175 | ||
9a00580d SB |
176 | /* |
177 | * A fast, rough count of the number of objects in the repository. | |
178 | * These two fields are not meant for direct access. Use | |
179 | * approximate_object_count() instead. | |
180 | */ | |
181 | unsigned long approximate_object_count; | |
182 | unsigned approximate_object_count_valid : 1; | |
183 | ||
5508f693 SB |
184 | /* |
185 | * Whether packed_git has already been populated with this repository's | |
186 | * packs. | |
187 | */ | |
188 | unsigned packed_git_initialized : 1; | |
90c62155 SB |
189 | }; |
190 | ||
191 | struct raw_object_store *raw_object_store_new(void); | |
192 | void raw_object_store_clear(struct raw_object_store *o); | |
193 | ||
cf78ae4f SB |
194 | /* |
195 | * Put in `buf` the name of the file in the local object database that | |
514c5fdd | 196 | * would be used to store a loose object with the specified oid. |
cf78ae4f | 197 | */ |
514c5fdd JK |
198 | const char *loose_object_path(struct repository *r, struct strbuf *buf, |
199 | const struct object_id *oid); | |
cf78ae4f | 200 | |
514c5fdd JK |
201 | void *map_loose_object(struct repository *r, const struct object_id *oid, |
202 | unsigned long *size); | |
e35454fa | 203 | |
55454427 | 204 | void *read_object_file_extended(struct repository *r, |
ad6dad09 DL |
205 | const struct object_id *oid, |
206 | enum object_type *type, | |
207 | unsigned long *size, int lookup_replace); | |
afd69dcc SB |
208 | static inline void *repo_read_object_file(struct repository *r, |
209 | const struct object_id *oid, | |
210 | enum object_type *type, | |
211 | unsigned long *size) | |
cbd53a21 | 212 | { |
afd69dcc | 213 | return read_object_file_extended(r, oid, type, size, 1); |
cbd53a21 | 214 | } |
afd69dcc SB |
215 | #ifndef NO_THE_REPOSITORY_COMPATIBILITY_MACROS |
216 | #define read_object_file(oid, type, size) repo_read_object_file(the_repository, oid, type, size) | |
217 | #endif | |
cbd53a21 SB |
218 | |
219 | /* Read and unpack an object file into memory, write memory to an object file */ | |
220 | int oid_object_info(struct repository *r, const struct object_id *, unsigned long *); | |
221 | ||
2dcde20e MT |
222 | int hash_object_file(const struct git_hash_algo *algo, const void *buf, |
223 | unsigned long len, const char *type, | |
224 | struct object_id *oid); | |
cbd53a21 | 225 | |
55454427 | 226 | int write_object_file(const void *buf, unsigned long len, |
ad6dad09 | 227 | const char *type, struct object_id *oid); |
cbd53a21 | 228 | |
55454427 | 229 | int hash_object_file_literally(const void *buf, unsigned long len, |
ad6dad09 DL |
230 | const char *type, struct object_id *oid, |
231 | unsigned flags); | |
cbd53a21 | 232 | |
60440d72 JN |
233 | /* |
234 | * Add an object file to the in-memory object store, without writing it | |
235 | * to disk. | |
236 | * | |
237 | * Callers are responsible for calling write_object_file to record the | |
238 | * object in persistent storage before writing any other new objects | |
239 | * that reference it. | |
240 | */ | |
55454427 | 241 | int pretend_object_file(void *, unsigned long, enum object_type, |
ad6dad09 | 242 | struct object_id *oid); |
cbd53a21 | 243 | |
55454427 | 244 | int force_object_loose(const struct object_id *oid, time_t mtime); |
cbd53a21 SB |
245 | |
246 | /* | |
247 | * Open the loose object at path, check its hash, and return the contents, | |
248 | * type, and size. If the object is a blob, then "contents" may return NULL, | |
249 | * to allow streaming of large blobs. | |
250 | * | |
251 | * Returns 0 on success, negative on error (details may be written to stderr). | |
252 | */ | |
253 | int read_loose_object(const char *path, | |
254 | const struct object_id *expected_oid, | |
255 | enum object_type *type, | |
256 | unsigned long *size, | |
257 | void **contents); | |
258 | ||
1d8d9cb6 JT |
259 | /* Retry packed storage after checking packed and loose storage */ |
260 | #define HAS_OBJECT_RECHECK_PACKED 1 | |
261 | ||
262 | /* | |
263 | * Returns 1 if the object exists. This function will not lazily fetch objects | |
264 | * in a partial clone. | |
265 | */ | |
266 | int has_object(struct repository *r, const struct object_id *oid, | |
267 | unsigned flags); | |
268 | ||
269 | /* | |
270 | * These macros and functions are deprecated. If checking existence for an | |
271 | * object that is likely to be missing and/or whose absence is relatively | |
272 | * inconsequential (or is consequential but the caller is prepared to handle | |
273 | * it), use has_object(), which has better defaults (no lazy fetch in a partial | |
274 | * clone and no rechecking of packed storage). In the unlikely event that a | |
275 | * caller needs to assert existence of an object that it fully expects to | |
276 | * exist, and wants to trigger a lazy fetch in a partial clone, use | |
277 | * oid_object_info_extended() with a NULL struct object_info. | |
278 | * | |
279 | * These functions can be removed once all callers have migrated to | |
280 | * has_object() and/or oid_object_info_extended(). | |
281 | */ | |
9b45f499 SB |
282 | #ifndef NO_THE_REPOSITORY_COMPATIBILITY_MACROS |
283 | #define has_sha1_file_with_flags(sha1, flags) repo_has_sha1_file_with_flags(the_repository, sha1, flags) | |
284 | #define has_sha1_file(sha1) repo_has_sha1_file(the_repository, sha1) | |
285 | #endif | |
9b45f499 SB |
286 | int repo_has_object_file(struct repository *r, const struct object_id *oid); |
287 | int repo_has_object_file_with_flags(struct repository *r, | |
288 | const struct object_id *oid, int flags); | |
289 | #ifndef NO_THE_REPOSITORY_COMPATIBILITY_MACROS | |
290 | #define has_object_file(oid) repo_has_object_file(the_repository, oid) | |
291 | #define has_object_file_with_flags(oid, flags) repo_has_object_file_with_flags(the_repository, oid, flags) | |
292 | #endif | |
cbd53a21 SB |
293 | |
294 | /* | |
295 | * Return true iff an alternate object database has a loose object | |
296 | * with the specified name. This function does not respect replace | |
297 | * references. | |
298 | */ | |
55454427 | 299 | int has_loose_object_nonlocal(const struct object_id *); |
cbd53a21 | 300 | |
55454427 | 301 | void assert_oid_type(const struct object_id *oid, enum object_type expect); |
cbd53a21 | 302 | |
31877c9a MT |
303 | /* |
304 | * Enabling the object read lock allows multiple threads to safely call the | |
305 | * following functions in parallel: repo_read_object_file(), read_object_file(), | |
306 | * read_object_file_extended(), read_object_with_reference(), read_object(), | |
307 | * oid_object_info() and oid_object_info_extended(). | |
308 | * | |
309 | * obj_read_lock() and obj_read_unlock() may also be used to protect other | |
310 | * section which cannot execute in parallel with object reading. Since the used | |
311 | * lock is a recursive mutex, these sections can even contain calls to object | |
312 | * reading functions. However, beware that in these cases zlib inflation won't | |
313 | * be performed in parallel, losing performance. | |
314 | * | |
315 | * TODO: oid_object_info_extended()'s call stack has a recursive behavior. If | |
316 | * any of its callees end up calling it, this recursive call won't benefit from | |
317 | * parallel inflation. | |
318 | */ | |
319 | void enable_obj_read_lock(void); | |
320 | void disable_obj_read_lock(void); | |
321 | ||
322 | extern int obj_read_use_lock; | |
323 | extern pthread_mutex_t obj_read_mutex; | |
324 | ||
325 | static inline void obj_read_lock(void) | |
326 | { | |
327 | if(obj_read_use_lock) | |
328 | pthread_mutex_lock(&obj_read_mutex); | |
329 | } | |
330 | ||
331 | static inline void obj_read_unlock(void) | |
332 | { | |
333 | if(obj_read_use_lock) | |
334 | pthread_mutex_unlock(&obj_read_mutex); | |
335 | } | |
336 | ||
cbd53a21 SB |
337 | struct object_info { |
338 | /* Request */ | |
339 | enum object_type *typep; | |
340 | unsigned long *sizep; | |
341 | off_t *disk_sizep; | |
b99b6bcc | 342 | struct object_id *delta_base_oid; |
cbd53a21 SB |
343 | struct strbuf *type_name; |
344 | void **contentp; | |
345 | ||
346 | /* Response */ | |
347 | enum { | |
348 | OI_CACHED, | |
349 | OI_LOOSE, | |
350 | OI_PACKED, | |
351 | OI_DBCACHED | |
352 | } whence; | |
353 | union { | |
354 | /* | |
355 | * struct { | |
356 | * ... Nothing to expose in this case | |
357 | * } cached; | |
358 | * struct { | |
359 | * ... Nothing to expose in this case | |
360 | * } loose; | |
361 | */ | |
362 | struct { | |
363 | struct packed_git *pack; | |
364 | off_t offset; | |
365 | unsigned int is_delta; | |
366 | } packed; | |
367 | } u; | |
368 | }; | |
369 | ||
370 | /* | |
371 | * Initializer for a "struct object_info" that wants no items. You may | |
372 | * also memset() the memory to all-zeroes. | |
373 | */ | |
9865b6e6 | 374 | #define OBJECT_INFO_INIT { 0 } |
cbd53a21 SB |
375 | |
376 | /* Invoke lookup_replace_object() on the given hash */ | |
377 | #define OBJECT_INFO_LOOKUP_REPLACE 1 | |
378 | /* Allow reading from a loose object file of unknown/bogus type */ | |
379 | #define OBJECT_INFO_ALLOW_UNKNOWN_TYPE 2 | |
cbd53a21 SB |
380 | /* Do not retry packed storage after checking packed and loose storage */ |
381 | #define OBJECT_INFO_QUICK 8 | |
382 | /* Do not check loose object */ | |
383 | #define OBJECT_INFO_IGNORE_LOOSE 16 | |
0f4a4fb1 JT |
384 | /* |
385 | * Do not attempt to fetch the object if missing (even if fetch_is_missing is | |
31f5256c | 386 | * nonzero). |
0f4a4fb1 | 387 | */ |
31f5256c DS |
388 | #define OBJECT_INFO_SKIP_FETCH_OBJECT 32 |
389 | /* | |
390 | * This is meant for bulk prefetching of missing blobs in a partial | |
391 | * clone. Implies OBJECT_INFO_SKIP_FETCH_OBJECT and OBJECT_INFO_QUICK | |
392 | */ | |
393 | #define OBJECT_INFO_FOR_PREFETCH (OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_QUICK) | |
cbd53a21 SB |
394 | |
395 | int oid_object_info_extended(struct repository *r, | |
396 | const struct object_id *, | |
397 | struct object_info *, unsigned flags); | |
398 | ||
0889aae1 JK |
399 | /* |
400 | * Iterate over the files in the loose-object parts of the object | |
401 | * directory "path", triggering the following callbacks: | |
402 | * | |
403 | * - loose_object is called for each loose object we find. | |
404 | * | |
405 | * - loose_cruft is called for any files that do not appear to be | |
406 | * loose objects. Note that we only look in the loose object | |
407 | * directories "objects/[0-9a-f]{2}/", so we will not report | |
408 | * "objects/foobar" as cruft. | |
409 | * | |
410 | * - loose_subdir is called for each top-level hashed subdirectory | |
411 | * of the object directory (e.g., "$OBJDIR/f0"). It is called | |
412 | * after the objects in the directory are processed. | |
413 | * | |
414 | * Any callback that is NULL will be ignored. Callbacks returning non-zero | |
415 | * will end the iteration. | |
416 | * | |
417 | * In the "buf" variant, "path" is a strbuf which will also be used as a | |
418 | * scratch buffer, but restored to its original contents before | |
419 | * the function returns. | |
420 | */ | |
421 | typedef int each_loose_object_fn(const struct object_id *oid, | |
422 | const char *path, | |
423 | void *data); | |
424 | typedef int each_loose_cruft_fn(const char *basename, | |
425 | const char *path, | |
426 | void *data); | |
427 | typedef int each_loose_subdir_fn(unsigned int nr, | |
428 | const char *path, | |
429 | void *data); | |
430 | int for_each_file_in_obj_subdir(unsigned int subdir_nr, | |
431 | struct strbuf *path, | |
432 | each_loose_object_fn obj_cb, | |
433 | each_loose_cruft_fn cruft_cb, | |
434 | each_loose_subdir_fn subdir_cb, | |
435 | void *data); | |
436 | int for_each_loose_file_in_objdir(const char *path, | |
437 | each_loose_object_fn obj_cb, | |
438 | each_loose_cruft_fn cruft_cb, | |
439 | each_loose_subdir_fn subdir_cb, | |
440 | void *data); | |
441 | int for_each_loose_file_in_objdir_buf(struct strbuf *path, | |
442 | each_loose_object_fn obj_cb, | |
443 | each_loose_cruft_fn cruft_cb, | |
444 | each_loose_subdir_fn subdir_cb, | |
445 | void *data); | |
446 | ||
447 | /* Flags for for_each_*_object() below. */ | |
448 | enum for_each_object_flags { | |
449 | /* Iterate only over local objects, not alternates. */ | |
450 | FOR_EACH_OBJECT_LOCAL_ONLY = (1<<0), | |
451 | ||
452 | /* Only iterate over packs obtained from the promisor remote. */ | |
453 | FOR_EACH_OBJECT_PROMISOR_ONLY = (1<<1), | |
454 | ||
455 | /* | |
456 | * Visit objects within a pack in packfile order rather than .idx order | |
457 | */ | |
458 | FOR_EACH_OBJECT_PACK_ORDER = (1<<2), | |
a241878a TB |
459 | |
460 | /* Only iterate over packs that are not marked as kept in-core. */ | |
461 | FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS = (1<<3), | |
462 | ||
463 | /* Only iterate over packs that do not have .keep files. */ | |
464 | FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS = (1<<4), | |
0889aae1 JK |
465 | }; |
466 | ||
467 | /* | |
468 | * Iterate over all accessible loose objects without respect to | |
469 | * reachability. By default, this includes both local and alternate objects. | |
470 | * The order in which objects are visited is unspecified. | |
471 | * | |
472 | * Any flags specific to packs are ignored. | |
473 | */ | |
474 | int for_each_loose_object(each_loose_object_fn, void *, | |
475 | enum for_each_object_flags flags); | |
476 | ||
477 | /* | |
478 | * Iterate over all accessible packed objects without respect to reachability. | |
479 | * By default, this includes both local and alternate packs. | |
480 | * | |
481 | * Note that some objects may appear twice if they are found in multiple packs. | |
482 | * Each pack is visited in an unspecified order. By default, objects within a | |
483 | * pack are visited in pack-idx order (i.e., sorted by oid). | |
484 | */ | |
485 | typedef int each_packed_object_fn(const struct object_id *oid, | |
486 | struct packed_git *pack, | |
487 | uint32_t pos, | |
488 | void *data); | |
489 | int for_each_object_in_pack(struct packed_git *p, | |
490 | each_packed_object_fn, void *data, | |
491 | enum for_each_object_flags flags); | |
492 | int for_each_packed_object(each_packed_object_fn, void *, | |
493 | enum for_each_object_flags flags); | |
494 | ||
90c62155 | 495 | #endif /* OBJECT_STORE_H */ |