]>
Commit | Line | Data |
---|---|---|
a034e910 EN |
1 | #ifndef OBJECT_STORE_LL_H |
2 | #define OBJECT_STORE_LL_H | |
3 | ||
4 | #include "hashmap.h" | |
5 | #include "object.h" | |
6 | #include "list.h" | |
7 | #include "thread-utils.h" | |
8 | #include "oidset.h" | |
9 | ||
10 | struct oidmap; | |
11 | struct oidtree; | |
12 | struct strbuf; | |
13 | ||
14 | struct object_directory { | |
15 | struct object_directory *next; | |
16 | ||
17 | /* | |
18 | * Used to store the results of readdir(3) calls when we are OK | |
19 | * sacrificing accuracy due to races for speed. That includes | |
20 | * object existence with OBJECT_INFO_QUICK, as well as | |
21 | * our search for unique abbreviated hashes. Don't use it for tasks | |
22 | * requiring greater accuracy! | |
23 | * | |
24 | * Be sure to call odb_load_loose_cache() before using. | |
25 | */ | |
26 | uint32_t loose_objects_subdir_seen[8]; /* 256 bits */ | |
27 | struct oidtree *loose_objects_cache; | |
28 | ||
23b2c7e9 | 29 | /* Map between object IDs for loose objects. */ |
30 | struct loose_object_map *loose_map; | |
31 | ||
a034e910 EN |
32 | /* |
33 | * This is a temporary object store created by the tmp_objdir | |
34 | * facility. Disable ref updates since the objects in the store | |
35 | * might be discarded on rollback. | |
36 | */ | |
37 | int disable_ref_updates; | |
38 | ||
39 | /* | |
40 | * This object store is ephemeral, so there is no need to fsync. | |
41 | */ | |
42 | int will_destroy; | |
43 | ||
44 | /* | |
45 | * Path to the alternative object store. If this is a relative path, | |
46 | * it is relative to the current working directory. | |
47 | */ | |
48 | char *path; | |
49 | }; | |
50 | ||
51 | struct input_stream { | |
52 | const void *(*read)(struct input_stream *, unsigned long *len); | |
53 | void *data; | |
54 | int is_finished; | |
55 | }; | |
56 | ||
57 | void prepare_alt_odb(struct repository *r); | |
58 | int has_alt_odb(struct repository *r); | |
59 | char *compute_alternate_path(const char *path, struct strbuf *err); | |
60 | struct object_directory *find_odb(struct repository *r, const char *obj_dir); | |
61 | typedef int alt_odb_fn(struct object_directory *, void *); | |
62 | int foreach_alt_odb(alt_odb_fn, void*); | |
63 | typedef void alternate_ref_fn(const struct object_id *oid, void *); | |
64 | void for_each_alternate_ref(alternate_ref_fn, void *); | |
65 | ||
66 | /* | |
67 | * Add the directory to the on-disk alternates file; the new entry will also | |
68 | * take effect in the current process. | |
69 | */ | |
70 | void add_to_alternates_file(const char *dir); | |
71 | ||
72 | /* | |
73 | * Add the directory to the in-memory list of alternates (along with any | |
74 | * recursive alternates it points to), but do not modify the on-disk alternates | |
75 | * file. | |
76 | */ | |
77 | void add_to_alternates_memory(const char *dir); | |
78 | ||
79 | /* | |
80 | * Replace the current writable object directory with the specified temporary | |
81 | * object directory; returns the former primary object directory. | |
82 | */ | |
83 | struct object_directory *set_temporary_primary_odb(const char *dir, int will_destroy); | |
84 | ||
85 | /* | |
86 | * Restore a previous ODB replaced by set_temporary_main_odb. | |
87 | */ | |
88 | void restore_primary_odb(struct object_directory *restore_odb, const char *old_path); | |
89 | ||
90 | /* | |
91 | * Populate and return the loose object cache array corresponding to the | |
92 | * given object ID. | |
93 | */ | |
94 | struct oidtree *odb_loose_cache(struct object_directory *odb, | |
95 | const struct object_id *oid); | |
96 | ||
97 | /* Empty the loose object cache for the specified object directory. */ | |
98 | void odb_clear_loose_cache(struct object_directory *odb); | |
99 | ||
100 | /* Clear and free the specified object directory */ | |
101 | void free_object_directory(struct object_directory *odb); | |
102 | ||
103 | struct packed_git { | |
104 | struct hashmap_entry packmap_ent; | |
105 | struct packed_git *next; | |
106 | struct list_head mru; | |
107 | struct pack_window *windows; | |
108 | off_t pack_size; | |
109 | const void *index_data; | |
110 | size_t index_size; | |
111 | uint32_t num_objects; | |
4488bb3b | 112 | size_t crc_offset; |
a034e910 EN |
113 | struct oidset bad_objects; |
114 | int index_version; | |
115 | time_t mtime; | |
116 | int pack_fd; | |
117 | int index; /* for builtin/pack-objects.c */ | |
118 | unsigned pack_local:1, | |
119 | pack_keep:1, | |
120 | pack_keep_in_core:1, | |
121 | freshened:1, | |
122 | do_not_close:1, | |
123 | pack_promisor:1, | |
124 | multi_pack_index:1, | |
125 | is_cruft:1; | |
126 | unsigned char hash[GIT_MAX_RAWSZ]; | |
127 | struct revindex_entry *revindex; | |
128 | const uint32_t *revindex_data; | |
129 | const uint32_t *revindex_map; | |
130 | size_t revindex_size; | |
131 | /* | |
132 | * mtimes_map points at the beginning of the memory mapped region of | |
133 | * this pack's corresponding .mtimes file, and mtimes_size is the size | |
134 | * of that .mtimes file | |
135 | */ | |
136 | const uint32_t *mtimes_map; | |
137 | size_t mtimes_size; | |
138 | /* something like ".git/objects/pack/xxxxx.pack" */ | |
139 | char pack_name[FLEX_ARRAY]; /* more */ | |
140 | }; | |
141 | ||
142 | struct multi_pack_index; | |
143 | ||
144 | static inline int pack_map_entry_cmp(const void *cmp_data UNUSED, | |
145 | const struct hashmap_entry *entry, | |
146 | const struct hashmap_entry *entry2, | |
147 | const void *keydata) | |
148 | { | |
149 | const char *key = keydata; | |
150 | const struct packed_git *pg1, *pg2; | |
151 | ||
152 | pg1 = container_of(entry, const struct packed_git, packmap_ent); | |
153 | pg2 = container_of(entry2, const struct packed_git, packmap_ent); | |
154 | ||
155 | return strcmp(pg1->pack_name, key ? key : pg2->pack_name); | |
156 | } | |
157 | ||
158 | struct raw_object_store { | |
159 | /* | |
160 | * Set of all object directories; the main directory is first (and | |
161 | * cannot be NULL after initialization). Subsequent directories are | |
162 | * alternates. | |
163 | */ | |
164 | struct object_directory *odb; | |
165 | struct object_directory **odb_tail; | |
166 | struct kh_odb_path_map *odb_by_path; | |
167 | ||
168 | int loaded_alternates; | |
169 | ||
170 | /* | |
171 | * A list of alternate object directories loaded from the environment; | |
172 | * this should not generally need to be accessed directly, but will | |
173 | * populate the "odb" list when prepare_alt_odb() is run. | |
174 | */ | |
175 | char *alternate_db; | |
176 | ||
177 | /* | |
178 | * Objects that should be substituted by other objects | |
179 | * (see git-replace(1)). | |
180 | */ | |
181 | struct oidmap *replace_map; | |
182 | unsigned replace_map_initialized : 1; | |
183 | pthread_mutex_t replace_mutex; /* protect object replace functions */ | |
184 | ||
185 | struct commit_graph *commit_graph; | |
186 | unsigned commit_graph_attempted : 1; /* if loading has been attempted */ | |
187 | ||
188 | /* | |
189 | * private data | |
190 | * | |
191 | * should only be accessed directly by packfile.c and midx.c | |
192 | */ | |
193 | struct multi_pack_index *multi_pack_index; | |
194 | ||
195 | /* | |
196 | * private data | |
197 | * | |
198 | * should only be accessed directly by packfile.c | |
199 | */ | |
200 | ||
201 | struct packed_git *packed_git; | |
202 | /* A most-recently-used ordered version of the packed_git list. */ | |
203 | struct list_head packed_git_mru; | |
204 | ||
205 | struct { | |
206 | struct packed_git **packs; | |
207 | unsigned flags; | |
208 | } kept_pack_cache; | |
209 | ||
210 | /* | |
211 | * A map of packfiles to packed_git structs for tracking which | |
212 | * packs have been loaded already. | |
213 | */ | |
214 | struct hashmap pack_map; | |
215 | ||
216 | /* | |
217 | * A fast, rough count of the number of objects in the repository. | |
218 | * These two fields are not meant for direct access. Use | |
219 | * repo_approximate_object_count() instead. | |
220 | */ | |
221 | unsigned long approximate_object_count; | |
222 | unsigned approximate_object_count_valid : 1; | |
223 | ||
224 | /* | |
225 | * Whether packed_git has already been populated with this repository's | |
226 | * packs. | |
227 | */ | |
228 | unsigned packed_git_initialized : 1; | |
229 | }; | |
230 | ||
231 | struct raw_object_store *raw_object_store_new(void); | |
232 | void raw_object_store_clear(struct raw_object_store *o); | |
233 | ||
234 | /* | |
235 | * Put in `buf` the name of the file in the local object database that | |
236 | * would be used to store a loose object with the specified oid. | |
237 | */ | |
238 | const char *loose_object_path(struct repository *r, struct strbuf *buf, | |
239 | const struct object_id *oid); | |
240 | ||
241 | void *map_loose_object(struct repository *r, const struct object_id *oid, | |
242 | unsigned long *size); | |
243 | ||
244 | void *repo_read_object_file(struct repository *r, | |
245 | const struct object_id *oid, | |
246 | enum object_type *type, | |
247 | unsigned long *size); | |
248 | ||
249 | /* Read and unpack an object file into memory, write memory to an object file */ | |
250 | int oid_object_info(struct repository *r, const struct object_id *, unsigned long *); | |
251 | ||
252 | void hash_object_file(const struct git_hash_algo *algo, const void *buf, | |
253 | unsigned long len, enum object_type type, | |
254 | struct object_id *oid); | |
255 | ||
256 | int write_object_file_flags(const void *buf, unsigned long len, | |
257 | enum object_type type, struct object_id *oid, | |
c2538492 | 258 | struct object_id *comapt_oid_in, unsigned flags); |
a034e910 EN |
259 | static inline int write_object_file(const void *buf, unsigned long len, |
260 | enum object_type type, struct object_id *oid) | |
261 | { | |
c2538492 | 262 | return write_object_file_flags(buf, len, type, oid, NULL, 0); |
a034e910 EN |
263 | } |
264 | ||
265 | int write_object_file_literally(const void *buf, unsigned long len, | |
266 | const char *type, struct object_id *oid, | |
267 | unsigned flags); | |
268 | int stream_loose_object(struct input_stream *in_stream, size_t len, | |
269 | struct object_id *oid); | |
270 | ||
271 | /* | |
272 | * Add an object file to the in-memory object store, without writing it | |
273 | * to disk. | |
274 | * | |
275 | * Callers are responsible for calling write_object_file to record the | |
276 | * object in persistent storage before writing any other new objects | |
277 | * that reference it. | |
278 | */ | |
279 | int pretend_object_file(void *, unsigned long, enum object_type, | |
280 | struct object_id *oid); | |
281 | ||
282 | int force_object_loose(const struct object_id *oid, time_t mtime); | |
283 | ||
284 | struct object_info { | |
285 | /* Request */ | |
286 | enum object_type *typep; | |
287 | unsigned long *sizep; | |
288 | off_t *disk_sizep; | |
289 | struct object_id *delta_base_oid; | |
290 | struct strbuf *type_name; | |
291 | void **contentp; | |
292 | ||
293 | /* Response */ | |
294 | enum { | |
295 | OI_CACHED, | |
296 | OI_LOOSE, | |
297 | OI_PACKED, | |
298 | OI_DBCACHED | |
299 | } whence; | |
300 | union { | |
301 | /* | |
302 | * struct { | |
303 | * ... Nothing to expose in this case | |
304 | * } cached; | |
305 | * struct { | |
306 | * ... Nothing to expose in this case | |
307 | * } loose; | |
308 | */ | |
309 | struct { | |
310 | struct packed_git *pack; | |
311 | off_t offset; | |
312 | unsigned int is_delta; | |
313 | } packed; | |
314 | } u; | |
315 | }; | |
316 | ||
317 | /* | |
318 | * Initializer for a "struct object_info" that wants no items. You may | |
319 | * also memset() the memory to all-zeroes. | |
320 | */ | |
321 | #define OBJECT_INFO_INIT { 0 } | |
322 | ||
323 | /* Invoke lookup_replace_object() on the given hash */ | |
324 | #define OBJECT_INFO_LOOKUP_REPLACE 1 | |
325 | /* Allow reading from a loose object file of unknown/bogus type */ | |
326 | #define OBJECT_INFO_ALLOW_UNKNOWN_TYPE 2 | |
327 | /* Do not retry packed storage after checking packed and loose storage */ | |
328 | #define OBJECT_INFO_QUICK 8 | |
329 | /* | |
330 | * Do not attempt to fetch the object if missing (even if fetch_is_missing is | |
331 | * nonzero). | |
332 | */ | |
333 | #define OBJECT_INFO_SKIP_FETCH_OBJECT 16 | |
334 | /* | |
335 | * This is meant for bulk prefetching of missing blobs in a partial | |
336 | * clone. Implies OBJECT_INFO_SKIP_FETCH_OBJECT and OBJECT_INFO_QUICK | |
337 | */ | |
338 | #define OBJECT_INFO_FOR_PREFETCH (OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_QUICK) | |
339 | ||
340 | /* Die if object corruption (not just an object being missing) was detected. */ | |
341 | #define OBJECT_INFO_DIE_IF_CORRUPT 32 | |
342 | ||
343 | int oid_object_info_extended(struct repository *r, | |
344 | const struct object_id *, | |
345 | struct object_info *, unsigned flags); | |
346 | ||
347 | /* | |
348 | * Open the loose object at path, check its hash, and return the contents, | |
349 | * use the "oi" argument to assert things about the object, or e.g. populate its | |
350 | * type, and size. If the object is a blob, then "contents" may return NULL, | |
351 | * to allow streaming of large blobs. | |
352 | * | |
353 | * Returns 0 on success, negative on error (details may be written to stderr). | |
354 | */ | |
355 | int read_loose_object(const char *path, | |
356 | const struct object_id *expected_oid, | |
357 | struct object_id *real_oid, | |
358 | void **contents, | |
359 | struct object_info *oi); | |
360 | ||
361 | /* Retry packed storage after checking packed and loose storage */ | |
362 | #define HAS_OBJECT_RECHECK_PACKED 1 | |
363 | ||
364 | /* | |
365 | * Returns 1 if the object exists. This function will not lazily fetch objects | |
366 | * in a partial clone. | |
367 | */ | |
368 | int has_object(struct repository *r, const struct object_id *oid, | |
369 | unsigned flags); | |
370 | ||
371 | /* | |
372 | * These macros and functions are deprecated. If checking existence for an | |
373 | * object that is likely to be missing and/or whose absence is relatively | |
374 | * inconsequential (or is consequential but the caller is prepared to handle | |
375 | * it), use has_object(), which has better defaults (no lazy fetch in a partial | |
376 | * clone and no rechecking of packed storage). In the unlikely event that a | |
377 | * caller needs to assert existence of an object that it fully expects to | |
378 | * exist, and wants to trigger a lazy fetch in a partial clone, use | |
379 | * oid_object_info_extended() with a NULL struct object_info. | |
380 | * | |
381 | * These functions can be removed once all callers have migrated to | |
382 | * has_object() and/or oid_object_info_extended(). | |
383 | */ | |
384 | int repo_has_object_file(struct repository *r, const struct object_id *oid); | |
385 | int repo_has_object_file_with_flags(struct repository *r, | |
386 | const struct object_id *oid, int flags); | |
387 | ||
388 | /* | |
389 | * Return true iff an alternate object database has a loose object | |
390 | * with the specified name. This function does not respect replace | |
391 | * references. | |
392 | */ | |
393 | int has_loose_object_nonlocal(const struct object_id *); | |
394 | ||
395 | int has_loose_object(const struct object_id *); | |
396 | ||
397 | /** | |
398 | * format_object_header() is a thin wrapper around s xsnprintf() that | |
399 | * writes the initial "<type> <obj-len>" part of the loose object | |
400 | * header. It returns the size that snprintf() returns + 1. | |
401 | */ | |
402 | int format_object_header(char *str, size_t size, enum object_type type, | |
403 | size_t objsize); | |
404 | ||
405 | void assert_oid_type(const struct object_id *oid, enum object_type expect); | |
406 | ||
407 | /* | |
408 | * Enabling the object read lock allows multiple threads to safely call the | |
409 | * following functions in parallel: repo_read_object_file(), | |
410 | * read_object_with_reference(), oid_object_info() and oid_object_info_extended(). | |
411 | * | |
412 | * obj_read_lock() and obj_read_unlock() may also be used to protect other | |
413 | * section which cannot execute in parallel with object reading. Since the used | |
414 | * lock is a recursive mutex, these sections can even contain calls to object | |
415 | * reading functions. However, beware that in these cases zlib inflation won't | |
416 | * be performed in parallel, losing performance. | |
417 | * | |
418 | * TODO: oid_object_info_extended()'s call stack has a recursive behavior. If | |
419 | * any of its callees end up calling it, this recursive call won't benefit from | |
420 | * parallel inflation. | |
421 | */ | |
422 | void enable_obj_read_lock(void); | |
423 | void disable_obj_read_lock(void); | |
424 | ||
425 | extern int obj_read_use_lock; | |
426 | extern pthread_mutex_t obj_read_mutex; | |
427 | ||
428 | static inline void obj_read_lock(void) | |
429 | { | |
430 | if(obj_read_use_lock) | |
431 | pthread_mutex_lock(&obj_read_mutex); | |
432 | } | |
433 | ||
434 | static inline void obj_read_unlock(void) | |
435 | { | |
436 | if(obj_read_use_lock) | |
437 | pthread_mutex_unlock(&obj_read_mutex); | |
438 | } | |
439 | ||
440 | /* | |
441 | * Iterate over the files in the loose-object parts of the object | |
442 | * directory "path", triggering the following callbacks: | |
443 | * | |
444 | * - loose_object is called for each loose object we find. | |
445 | * | |
446 | * - loose_cruft is called for any files that do not appear to be | |
447 | * loose objects. Note that we only look in the loose object | |
448 | * directories "objects/[0-9a-f]{2}/", so we will not report | |
449 | * "objects/foobar" as cruft. | |
450 | * | |
451 | * - loose_subdir is called for each top-level hashed subdirectory | |
452 | * of the object directory (e.g., "$OBJDIR/f0"). It is called | |
453 | * after the objects in the directory are processed. | |
454 | * | |
455 | * Any callback that is NULL will be ignored. Callbacks returning non-zero | |
456 | * will end the iteration. | |
457 | * | |
458 | * In the "buf" variant, "path" is a strbuf which will also be used as a | |
459 | * scratch buffer, but restored to its original contents before | |
460 | * the function returns. | |
461 | */ | |
462 | typedef int each_loose_object_fn(const struct object_id *oid, | |
463 | const char *path, | |
464 | void *data); | |
465 | typedef int each_loose_cruft_fn(const char *basename, | |
466 | const char *path, | |
467 | void *data); | |
468 | typedef int each_loose_subdir_fn(unsigned int nr, | |
469 | const char *path, | |
470 | void *data); | |
471 | int for_each_file_in_obj_subdir(unsigned int subdir_nr, | |
472 | struct strbuf *path, | |
473 | each_loose_object_fn obj_cb, | |
474 | each_loose_cruft_fn cruft_cb, | |
475 | each_loose_subdir_fn subdir_cb, | |
476 | void *data); | |
477 | int for_each_loose_file_in_objdir(const char *path, | |
478 | each_loose_object_fn obj_cb, | |
479 | each_loose_cruft_fn cruft_cb, | |
480 | each_loose_subdir_fn subdir_cb, | |
481 | void *data); | |
482 | int for_each_loose_file_in_objdir_buf(struct strbuf *path, | |
483 | each_loose_object_fn obj_cb, | |
484 | each_loose_cruft_fn cruft_cb, | |
485 | each_loose_subdir_fn subdir_cb, | |
486 | void *data); | |
487 | ||
488 | /* Flags for for_each_*_object() below. */ | |
489 | enum for_each_object_flags { | |
490 | /* Iterate only over local objects, not alternates. */ | |
491 | FOR_EACH_OBJECT_LOCAL_ONLY = (1<<0), | |
492 | ||
493 | /* Only iterate over packs obtained from the promisor remote. */ | |
494 | FOR_EACH_OBJECT_PROMISOR_ONLY = (1<<1), | |
495 | ||
496 | /* | |
497 | * Visit objects within a pack in packfile order rather than .idx order | |
498 | */ | |
499 | FOR_EACH_OBJECT_PACK_ORDER = (1<<2), | |
500 | ||
501 | /* Only iterate over packs that are not marked as kept in-core. */ | |
502 | FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS = (1<<3), | |
503 | ||
504 | /* Only iterate over packs that do not have .keep files. */ | |
505 | FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS = (1<<4), | |
506 | }; | |
507 | ||
508 | /* | |
509 | * Iterate over all accessible loose objects without respect to | |
510 | * reachability. By default, this includes both local and alternate objects. | |
511 | * The order in which objects are visited is unspecified. | |
512 | * | |
513 | * Any flags specific to packs are ignored. | |
514 | */ | |
515 | int for_each_loose_object(each_loose_object_fn, void *, | |
516 | enum for_each_object_flags flags); | |
517 | ||
518 | /* | |
519 | * Iterate over all accessible packed objects without respect to reachability. | |
520 | * By default, this includes both local and alternate packs. | |
521 | * | |
522 | * Note that some objects may appear twice if they are found in multiple packs. | |
523 | * Each pack is visited in an unspecified order. By default, objects within a | |
524 | * pack are visited in pack-idx order (i.e., sorted by oid). | |
525 | */ | |
526 | typedef int each_packed_object_fn(const struct object_id *oid, | |
527 | struct packed_git *pack, | |
528 | uint32_t pos, | |
529 | void *data); | |
530 | int for_each_object_in_pack(struct packed_git *p, | |
531 | each_packed_object_fn, void *data, | |
532 | enum for_each_object_flags flags); | |
533 | int for_each_packed_object(each_packed_object_fn, void *, | |
534 | enum for_each_object_flags flags); | |
535 | ||
536 | #endif /* OBJECT_STORE_LL_H */ |