]> git.ipfire.org Git - thirdparty/git.git/blame - list-objects-filter.c
Merge branch 'nd/the-index' into md/list-objects-filter-by-depth
[thirdparty/git.git] / list-objects-filter.c
CommitLineData
25ec7bca
JH
1#include "cache.h"
2#include "dir.h"
3#include "tag.h"
4#include "commit.h"
5#include "tree.h"
6#include "blob.h"
7#include "diff.h"
8#include "tree-walk.h"
9#include "revision.h"
10#include "list-objects.h"
11#include "list-objects-filter.h"
12#include "list-objects-filter-options.h"
13#include "oidset.h"
cbd53a21 14#include "object-store.h"
25ec7bca
JH
15
16/* Remember to update object flag allocation in object.h */
17/*
18 * FILTER_SHOWN_BUT_REVISIT -- we set this bit on tree objects
19 * that have been shown, but should be revisited if they appear
20 * in the traversal (until we mark it SEEN). This is a way to
21 * let us silently de-dup calls to show() in the caller. This
22 * is subtly different from the "revision.h:SHOWN" and the
e5e5e088 23 * "sha1-name.c:ONELINE_SEEN" bits. And also different from
25ec7bca
JH
24 * the non-de-dup usage in pack-bitmap.c
25 */
26#define FILTER_SHOWN_BUT_REVISIT (1<<21)
27
28/*
29 * A filter for list-objects to omit ALL blobs from the traversal.
30 * And to OPTIONALLY collect a list of the omitted OIDs.
31 */
32struct filter_blobs_none_data {
33 struct oidset *omits;
34};
35
36static enum list_objects_filter_result filter_blobs_none(
01d40c84 37 struct repository *r,
25ec7bca
JH
38 enum list_objects_filter_situation filter_situation,
39 struct object *obj,
40 const char *pathname,
41 const char *filename,
42 void *filter_data_)
43{
44 struct filter_blobs_none_data *filter_data = filter_data_;
45
46 switch (filter_situation) {
47 default:
696aa739 48 BUG("unknown filter_situation: %d", filter_situation);
25ec7bca
JH
49
50 case LOFS_BEGIN_TREE:
51 assert(obj->type == OBJ_TREE);
52 /* always include all tree objects */
53 return LOFR_MARK_SEEN | LOFR_DO_SHOW;
54
55 case LOFS_END_TREE:
56 assert(obj->type == OBJ_TREE);
57 return LOFR_ZERO;
58
59 case LOFS_BLOB:
60 assert(obj->type == OBJ_BLOB);
61 assert((obj->flags & SEEN) == 0);
62
63 if (filter_data->omits)
64 oidset_insert(filter_data->omits, &obj->oid);
65 return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */
66 }
67}
68
69static void *filter_blobs_none__init(
70 struct oidset *omitted,
71 struct list_objects_filter_options *filter_options,
72 filter_object_fn *filter_fn,
73 filter_free_fn *filter_free_fn)
74{
75 struct filter_blobs_none_data *d = xcalloc(1, sizeof(*d));
76 d->omits = omitted;
77
78 *filter_fn = filter_blobs_none;
79 *filter_free_fn = free;
80 return d;
81}
82
bc5975d2
MD
83/*
84 * A filter for list-objects to omit ALL trees and blobs from the traversal.
85 * Can OPTIONALLY collect a list of the omitted OIDs.
86 */
87struct filter_trees_none_data {
88 struct oidset *omits;
89};
90
91static enum list_objects_filter_result filter_trees_none(
01d40c84 92 struct repository *r,
bc5975d2
MD
93 enum list_objects_filter_situation filter_situation,
94 struct object *obj,
95 const char *pathname,
96 const char *filename,
97 void *filter_data_)
98{
99 struct filter_trees_none_data *filter_data = filter_data_;
100
101 switch (filter_situation) {
102 default:
103 BUG("unknown filter_situation: %d", filter_situation);
104
105 case LOFS_BEGIN_TREE:
106 case LOFS_BLOB:
8b10a206 107 if (filter_data->omits) {
bc5975d2 108 oidset_insert(filter_data->omits, &obj->oid);
8b10a206
MD
109 /* _MARK_SEEN but not _DO_SHOW (hard omit) */
110 return LOFR_MARK_SEEN;
111 } else {
112 /*
113 * Not collecting omits so no need to to traverse tree.
114 */
115 return LOFR_SKIP_TREE | LOFR_MARK_SEEN;
116 }
bc5975d2
MD
117
118 case LOFS_END_TREE:
119 assert(obj->type == OBJ_TREE);
120 return LOFR_ZERO;
121
122 }
123}
124
125static void* filter_trees_none__init(
126 struct oidset *omitted,
127 struct list_objects_filter_options *filter_options,
128 filter_object_fn *filter_fn,
129 filter_free_fn *filter_free_fn)
130{
131 struct filter_trees_none_data *d = xcalloc(1, sizeof(*d));
132 d->omits = omitted;
133
134 *filter_fn = filter_trees_none;
135 *filter_free_fn = free;
136 return d;
137}
138
25ec7bca
JH
139/*
140 * A filter for list-objects to omit large blobs.
141 * And to OPTIONALLY collect a list of the omitted OIDs.
142 */
143struct filter_blobs_limit_data {
144 struct oidset *omits;
145 unsigned long max_bytes;
146};
147
148static enum list_objects_filter_result filter_blobs_limit(
01d40c84 149 struct repository *r,
25ec7bca
JH
150 enum list_objects_filter_situation filter_situation,
151 struct object *obj,
152 const char *pathname,
153 const char *filename,
154 void *filter_data_)
155{
156 struct filter_blobs_limit_data *filter_data = filter_data_;
157 unsigned long object_length;
158 enum object_type t;
159
160 switch (filter_situation) {
161 default:
696aa739 162 BUG("unknown filter_situation: %d", filter_situation);
25ec7bca
JH
163
164 case LOFS_BEGIN_TREE:
165 assert(obj->type == OBJ_TREE);
166 /* always include all tree objects */
167 return LOFR_MARK_SEEN | LOFR_DO_SHOW;
168
169 case LOFS_END_TREE:
170 assert(obj->type == OBJ_TREE);
171 return LOFR_ZERO;
172
173 case LOFS_BLOB:
174 assert(obj->type == OBJ_BLOB);
175 assert((obj->flags & SEEN) == 0);
176
01d40c84 177 t = oid_object_info(r, &obj->oid, &object_length);
25ec7bca
JH
178 if (t != OBJ_BLOB) { /* probably OBJ_NONE */
179 /*
180 * We DO NOT have the blob locally, so we cannot
181 * apply the size filter criteria. Be conservative
182 * and force show it (and let the caller deal with
183 * the ambiguity).
184 */
185 goto include_it;
186 }
187
188 if (object_length < filter_data->max_bytes)
189 goto include_it;
190
191 if (filter_data->omits)
192 oidset_insert(filter_data->omits, &obj->oid);
193 return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */
194 }
195
196include_it:
197 if (filter_data->omits)
198 oidset_remove(filter_data->omits, &obj->oid);
199 return LOFR_MARK_SEEN | LOFR_DO_SHOW;
200}
201
202static void *filter_blobs_limit__init(
203 struct oidset *omitted,
204 struct list_objects_filter_options *filter_options,
205 filter_object_fn *filter_fn,
206 filter_free_fn *filter_free_fn)
207{
208 struct filter_blobs_limit_data *d = xcalloc(1, sizeof(*d));
209 d->omits = omitted;
210 d->max_bytes = filter_options->blob_limit_value;
211
212 *filter_fn = filter_blobs_limit;
213 *filter_free_fn = free;
214 return d;
215}
216
217/*
218 * A filter driven by a sparse-checkout specification to only
219 * include blobs that a sparse checkout would populate.
220 *
221 * The sparse-checkout spec can be loaded from a blob with the
222 * given OID or from a local pathname. We allow an OID because
223 * the repo may be bare or we may be doing the filtering on the
224 * server.
225 */
226struct frame {
227 /*
228 * defval is the usual default include/exclude value that
229 * should be inherited as we recurse into directories based
230 * upon pattern matching of the directory itself or of a
231 * containing directory.
232 */
233 int defval;
234
235 /*
236 * 1 if the directory (recursively) contains any provisionally
237 * omitted objects.
238 *
239 * 0 if everything (recursively) contained in this directory
240 * has been explicitly included (SHOWN) in the result and
241 * the directory may be short-cut later in the traversal.
242 */
243 unsigned child_prov_omit : 1;
244};
245
246struct filter_sparse_data {
247 struct oidset *omits;
248 struct exclude_list el;
249
250 size_t nr, alloc;
251 struct frame *array_frame;
252};
253
254static enum list_objects_filter_result filter_sparse(
01d40c84 255 struct repository *r,
25ec7bca
JH
256 enum list_objects_filter_situation filter_situation,
257 struct object *obj,
258 const char *pathname,
259 const char *filename,
260 void *filter_data_)
261{
262 struct filter_sparse_data *filter_data = filter_data_;
263 int val, dtype;
264 struct frame *frame;
265
266 switch (filter_situation) {
267 default:
696aa739 268 BUG("unknown filter_situation: %d", filter_situation);
25ec7bca
JH
269
270 case LOFS_BEGIN_TREE:
271 assert(obj->type == OBJ_TREE);
272 dtype = DT_DIR;
273 val = is_excluded_from_list(pathname, strlen(pathname),
274 filename, &dtype, &filter_data->el,
01d40c84 275 r->index);
25ec7bca
JH
276 if (val < 0)
277 val = filter_data->array_frame[filter_data->nr].defval;
278
279 ALLOC_GROW(filter_data->array_frame, filter_data->nr + 1,
280 filter_data->alloc);
281 filter_data->nr++;
282 filter_data->array_frame[filter_data->nr].defval = val;
283 filter_data->array_frame[filter_data->nr].child_prov_omit = 0;
284
285 /*
286 * A directory with this tree OID may appear in multiple
287 * places in the tree. (Think of a directory move or copy,
288 * with no other changes, so the OID is the same, but the
289 * full pathnames of objects within this directory are new
290 * and may match is_excluded() patterns differently.)
291 * So we cannot mark this directory as SEEN (yet), since
292 * that will prevent process_tree() from revisiting this
293 * tree object with other pathname prefixes.
294 *
295 * Only _DO_SHOW the tree object the first time we visit
296 * this tree object.
297 *
298 * We always show all tree objects. A future optimization
299 * may want to attempt to narrow this.
300 */
301 if (obj->flags & FILTER_SHOWN_BUT_REVISIT)
302 return LOFR_ZERO;
303 obj->flags |= FILTER_SHOWN_BUT_REVISIT;
304 return LOFR_DO_SHOW;
305
306 case LOFS_END_TREE:
307 assert(obj->type == OBJ_TREE);
308 assert(filter_data->nr > 0);
309
310 frame = &filter_data->array_frame[filter_data->nr];
311 filter_data->nr--;
312
313 /*
314 * Tell our parent directory if any of our children were
315 * provisionally omitted.
316 */
317 filter_data->array_frame[filter_data->nr].child_prov_omit |=
318 frame->child_prov_omit;
319
320 /*
321 * If there are NO provisionally omitted child objects (ALL child
322 * objects in this folder were INCLUDED), then we can mark the
323 * folder as SEEN (so we will not have to revisit it again).
324 */
325 if (!frame->child_prov_omit)
326 return LOFR_MARK_SEEN;
327 return LOFR_ZERO;
328
329 case LOFS_BLOB:
330 assert(obj->type == OBJ_BLOB);
331 assert((obj->flags & SEEN) == 0);
332
333 frame = &filter_data->array_frame[filter_data->nr];
334
335 dtype = DT_REG;
336 val = is_excluded_from_list(pathname, strlen(pathname),
337 filename, &dtype, &filter_data->el,
01d40c84 338 r->index);
25ec7bca
JH
339 if (val < 0)
340 val = frame->defval;
341 if (val > 0) {
342 if (filter_data->omits)
343 oidset_remove(filter_data->omits, &obj->oid);
344 return LOFR_MARK_SEEN | LOFR_DO_SHOW;
345 }
346
347 /*
348 * Provisionally omit it. We've already established that
349 * this pathname is not in the sparse-checkout specification
350 * with the CURRENT pathname, so we *WANT* to omit this blob.
351 *
352 * However, a pathname elsewhere in the tree may also
353 * reference this same blob, so we cannot reject it yet.
354 * Leave the LOFR_ bits unset so that if the blob appears
355 * again in the traversal, we will be asked again.
356 */
357 if (filter_data->omits)
358 oidset_insert(filter_data->omits, &obj->oid);
359
360 /*
361 * Remember that at least 1 blob in this tree was
362 * provisionally omitted. This prevents us from short
363 * cutting the tree in future iterations.
364 */
365 frame->child_prov_omit = 1;
366 return LOFR_ZERO;
367 }
368}
369
370
371static void filter_sparse_free(void *filter_data)
372{
373 struct filter_sparse_data *d = filter_data;
374 /* TODO free contents of 'd' */
375 free(d);
376}
377
378static void *filter_sparse_oid__init(
379 struct oidset *omitted,
380 struct list_objects_filter_options *filter_options,
381 filter_object_fn *filter_fn,
382 filter_free_fn *filter_free_fn)
383{
384 struct filter_sparse_data *d = xcalloc(1, sizeof(*d));
385 d->omits = omitted;
386 if (add_excludes_from_blob_to_list(filter_options->sparse_oid_value,
387 NULL, 0, &d->el) < 0)
388 die("could not load filter specification");
389
390 ALLOC_GROW(d->array_frame, d->nr + 1, d->alloc);
391 d->array_frame[d->nr].defval = 0; /* default to include */
392 d->array_frame[d->nr].child_prov_omit = 0;
393
394 *filter_fn = filter_sparse;
395 *filter_free_fn = filter_sparse_free;
396 return d;
397}
398
399static void *filter_sparse_path__init(
400 struct oidset *omitted,
401 struct list_objects_filter_options *filter_options,
402 filter_object_fn *filter_fn,
403 filter_free_fn *filter_free_fn)
404{
405 struct filter_sparse_data *d = xcalloc(1, sizeof(*d));
406 d->omits = omitted;
407 if (add_excludes_from_file_to_list(filter_options->sparse_path_value,
408 NULL, 0, &d->el, NULL) < 0)
409 die("could not load filter specification");
410
411 ALLOC_GROW(d->array_frame, d->nr + 1, d->alloc);
412 d->array_frame[d->nr].defval = 0; /* default to include */
413 d->array_frame[d->nr].child_prov_omit = 0;
414
415 *filter_fn = filter_sparse;
416 *filter_free_fn = filter_sparse_free;
417 return d;
418}
419
420typedef void *(*filter_init_fn)(
421 struct oidset *omitted,
422 struct list_objects_filter_options *filter_options,
423 filter_object_fn *filter_fn,
424 filter_free_fn *filter_free_fn);
425
426/*
427 * Must match "enum list_objects_filter_choice".
428 */
429static filter_init_fn s_filters[] = {
430 NULL,
431 filter_blobs_none__init,
432 filter_blobs_limit__init,
bc5975d2 433 filter_trees_none__init,
25ec7bca
JH
434 filter_sparse_oid__init,
435 filter_sparse_path__init,
436};
437
438void *list_objects_filter__init(
439 struct oidset *omitted,
440 struct list_objects_filter_options *filter_options,
441 filter_object_fn *filter_fn,
442 filter_free_fn *filter_free_fn)
443{
444 filter_init_fn init_fn;
445
446 assert((sizeof(s_filters) / sizeof(s_filters[0])) == LOFC__COUNT);
447
448 if (filter_options->choice >= LOFC__COUNT)
696aa739 449 BUG("invalid list-objects filter choice: %d",
25ec7bca
JH
450 filter_options->choice);
451
452 init_fn = s_filters[filter_options->choice];
453 if (init_fn)
454 return init_fn(omitted, filter_options,
455 filter_fn, filter_free_fn);
456 *filter_fn = NULL;
457 *filter_free_fn = NULL;
458 return NULL;
459}