11 #include "list-objects.h"
12 #include "list-objects-filter.h"
13 #include "list-objects-filter-options.h"
16 #include "object-store.h"
18 /* Remember to update object flag allocation in object.h */
20 * FILTER_SHOWN_BUT_REVISIT -- we set this bit on tree objects
21 * that have been shown, but should be revisited if they appear
22 * in the traversal (until we mark it SEEN). This is a way to
23 * let us silently de-dup calls to show() in the caller. This
24 * is subtly different from the "revision.h:SHOWN" and the
25 * "object-name.c:ONELINE_SEEN" bits. And also different from
26 * the non-de-dup usage in pack-bitmap.c
28 #define FILTER_SHOWN_BUT_REVISIT (1<<21)
31 struct filter
*filter
;
34 struct object_id skip_tree
;
35 unsigned is_skipping_tree
: 1;
39 enum list_objects_filter_result (*filter_object_fn
)(
41 enum list_objects_filter_situation filter_situation
,
49 * Optional. If this function is supplied and the filter needs
50 * to collect omits, then this function is called once before
53 * This is required because the following two conditions hold:
55 * a. A tree filter can add and remove objects as an object
57 * b. A combine filter's omit set is the union of all its
58 * subfilters, which may include tree: filters.
60 * As such, the omits sets must be separate sets, and can only
61 * be unioned after the traversal is completed.
63 void (*finalize_omits_fn
)(struct oidset
*omits
, void *filter_data
);
65 void (*free_fn
)(void *filter_data
);
69 /* If non-NULL, the filter collects a list of the omitted OIDs here. */
73 static enum list_objects_filter_result
filter_blobs_none(
74 struct repository
*r UNUSED
,
75 enum list_objects_filter_situation filter_situation
,
77 const char *pathname UNUSED
,
78 const char *filename UNUSED
,
80 void *filter_data_ UNUSED
)
82 switch (filter_situation
) {
84 BUG("unknown filter_situation: %d", filter_situation
);
87 assert(obj
->type
== OBJ_TAG
);
88 /* always include all tag objects */
89 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
92 assert(obj
->type
== OBJ_COMMIT
);
93 /* always include all commit objects */
94 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
97 assert(obj
->type
== OBJ_TREE
);
98 /* always include all tree objects */
99 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
102 assert(obj
->type
== OBJ_TREE
);
106 assert(obj
->type
== OBJ_BLOB
);
107 assert((obj
->flags
& SEEN
) == 0);
110 oidset_insert(omits
, &obj
->oid
);
111 return LOFR_MARK_SEEN
; /* but not LOFR_DO_SHOW (hard omit) */
115 static void filter_blobs_none__init(
116 struct list_objects_filter_options
*filter_options UNUSED
,
117 struct filter
*filter
)
119 filter
->filter_object_fn
= filter_blobs_none
;
120 filter
->free_fn
= free
;
124 * A filter for list-objects to omit ALL trees and blobs from the traversal.
125 * Can OPTIONALLY collect a list of the omitted OIDs.
127 struct filter_trees_depth_data
{
129 * Maps trees to the minimum depth at which they were seen. It is not
130 * necessary to re-traverse a tree at deeper or equal depths than it has
131 * already been traversed.
133 * We can't use LOFR_MARK_SEEN for tree objects since this will prevent
134 * it from being traversed at shallower depths.
136 struct oidmap seen_at_depth
;
138 unsigned long exclude_depth
;
139 unsigned long current_depth
;
142 struct seen_map_entry
{
143 struct oidmap_entry base
;
147 /* Returns 1 if the oid was in the omits set before it was invoked. */
148 static int filter_trees_update_omits(
150 struct oidset
*omits
,
157 return oidset_remove(omits
, &obj
->oid
);
159 return oidset_insert(omits
, &obj
->oid
);
162 static enum list_objects_filter_result
filter_trees_depth(
163 struct repository
*r UNUSED
,
164 enum list_objects_filter_situation filter_situation
,
166 const char *pathname UNUSED
,
167 const char *filename UNUSED
,
168 struct oidset
*omits
,
171 struct filter_trees_depth_data
*filter_data
= filter_data_
;
172 struct seen_map_entry
*seen_info
;
173 int include_it
= filter_data
->current_depth
<
174 filter_data
->exclude_depth
;
179 * Note that we do not use _MARK_SEEN in order to allow re-traversal in
180 * case we encounter a tree or blob again at a shallower depth.
183 switch (filter_situation
) {
185 BUG("unknown filter_situation: %d", filter_situation
);
188 assert(obj
->type
== OBJ_TAG
);
189 /* always include all tag objects */
190 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
193 assert(obj
->type
== OBJ_COMMIT
);
194 /* always include all commit objects */
195 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
198 assert(obj
->type
== OBJ_TREE
);
199 filter_data
->current_depth
--;
203 filter_trees_update_omits(obj
, omits
, include_it
);
204 return include_it
? LOFR_MARK_SEEN
| LOFR_DO_SHOW
: LOFR_ZERO
;
206 case LOFS_BEGIN_TREE
:
207 seen_info
= oidmap_get(
208 &filter_data
->seen_at_depth
, &obj
->oid
);
210 CALLOC_ARRAY(seen_info
, 1);
211 oidcpy(&seen_info
->base
.oid
, &obj
->oid
);
212 seen_info
->depth
= filter_data
->current_depth
;
213 oidmap_put(&filter_data
->seen_at_depth
, seen_info
);
217 filter_data
->current_depth
>= seen_info
->depth
;
221 filter_res
= LOFR_SKIP_TREE
;
223 int been_omitted
= filter_trees_update_omits(
224 obj
, omits
, include_it
);
225 seen_info
->depth
= filter_data
->current_depth
;
228 filter_res
= LOFR_DO_SHOW
;
229 else if (omits
&& !been_omitted
)
231 * Must update omit information of children
232 * recursively; they have not been omitted yet.
234 filter_res
= LOFR_ZERO
;
236 filter_res
= LOFR_SKIP_TREE
;
239 filter_data
->current_depth
++;
244 static void filter_trees_free(void *filter_data
) {
245 struct filter_trees_depth_data
*d
= filter_data
;
248 oidmap_free(&d
->seen_at_depth
, 1);
252 static void filter_trees_depth__init(
253 struct list_objects_filter_options
*filter_options
,
254 struct filter
*filter
)
256 struct filter_trees_depth_data
*d
= xcalloc(1, sizeof(*d
));
257 oidmap_init(&d
->seen_at_depth
, 0);
258 d
->exclude_depth
= filter_options
->tree_exclude_depth
;
259 d
->current_depth
= 0;
261 filter
->filter_data
= d
;
262 filter
->filter_object_fn
= filter_trees_depth
;
263 filter
->free_fn
= filter_trees_free
;
267 * A filter for list-objects to omit large blobs.
268 * And to OPTIONALLY collect a list of the omitted OIDs.
270 struct filter_blobs_limit_data
{
271 unsigned long max_bytes
;
274 static enum list_objects_filter_result
filter_blobs_limit(
275 struct repository
*r
,
276 enum list_objects_filter_situation filter_situation
,
278 const char *pathname UNUSED
,
279 const char *filename UNUSED
,
280 struct oidset
*omits
,
283 struct filter_blobs_limit_data
*filter_data
= filter_data_
;
284 unsigned long object_length
;
287 switch (filter_situation
) {
289 BUG("unknown filter_situation: %d", filter_situation
);
292 assert(obj
->type
== OBJ_TAG
);
293 /* always include all tag objects */
294 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
297 assert(obj
->type
== OBJ_COMMIT
);
298 /* always include all commit objects */
299 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
301 case LOFS_BEGIN_TREE
:
302 assert(obj
->type
== OBJ_TREE
);
303 /* always include all tree objects */
304 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
307 assert(obj
->type
== OBJ_TREE
);
311 assert(obj
->type
== OBJ_BLOB
);
312 assert((obj
->flags
& SEEN
) == 0);
314 t
= oid_object_info(r
, &obj
->oid
, &object_length
);
315 if (t
!= OBJ_BLOB
) { /* probably OBJ_NONE */
317 * We DO NOT have the blob locally, so we cannot
318 * apply the size filter criteria. Be conservative
319 * and force show it (and let the caller deal with
325 if (object_length
< filter_data
->max_bytes
)
329 oidset_insert(omits
, &obj
->oid
);
330 return LOFR_MARK_SEEN
; /* but not LOFR_DO_SHOW (hard omit) */
335 oidset_remove(omits
, &obj
->oid
);
336 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
339 static void filter_blobs_limit__init(
340 struct list_objects_filter_options
*filter_options
,
341 struct filter
*filter
)
343 struct filter_blobs_limit_data
*d
= xcalloc(1, sizeof(*d
));
344 d
->max_bytes
= filter_options
->blob_limit_value
;
346 filter
->filter_data
= d
;
347 filter
->filter_object_fn
= filter_blobs_limit
;
348 filter
->free_fn
= free
;
352 * A filter driven by a sparse-checkout specification to only
353 * include blobs that a sparse checkout would populate.
355 * The sparse-checkout spec can be loaded from a blob with the
356 * given OID or from a local pathname. We allow an OID because
357 * the repo may be bare or we may be doing the filtering on the
362 * default_match is the usual default include/exclude value that
363 * should be inherited as we recurse into directories based
364 * upon pattern matching of the directory itself or of a
365 * containing directory.
367 enum pattern_match_result default_match
;
370 * 1 if the directory (recursively) contains any provisionally
373 * 0 if everything (recursively) contained in this directory
374 * has been explicitly included (SHOWN) in the result and
375 * the directory may be short-cut later in the traversal.
377 unsigned child_prov_omit
: 1;
380 struct filter_sparse_data
{
381 struct pattern_list pl
;
384 struct frame
*array_frame
;
387 static enum list_objects_filter_result
filter_sparse(
388 struct repository
*r
,
389 enum list_objects_filter_situation filter_situation
,
391 const char *pathname
,
392 const char *filename
,
393 struct oidset
*omits
,
396 struct filter_sparse_data
*filter_data
= filter_data_
;
399 enum pattern_match_result match
;
401 switch (filter_situation
) {
403 BUG("unknown filter_situation: %d", filter_situation
);
406 assert(obj
->type
== OBJ_TAG
);
407 /* always include all tag objects */
408 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
411 assert(obj
->type
== OBJ_COMMIT
);
412 /* always include all commit objects */
413 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
415 case LOFS_BEGIN_TREE
:
416 assert(obj
->type
== OBJ_TREE
);
418 match
= path_matches_pattern_list(pathname
, strlen(pathname
),
419 filename
, &dtype
, &filter_data
->pl
,
421 if (match
== UNDECIDED
)
422 match
= filter_data
->array_frame
[filter_data
->nr
- 1].default_match
;
424 ALLOC_GROW(filter_data
->array_frame
, filter_data
->nr
+ 1,
426 filter_data
->array_frame
[filter_data
->nr
].default_match
= match
;
427 filter_data
->array_frame
[filter_data
->nr
].child_prov_omit
= 0;
431 * A directory with this tree OID may appear in multiple
432 * places in the tree. (Think of a directory move or copy,
433 * with no other changes, so the OID is the same, but the
434 * full pathnames of objects within this directory are new
435 * and may match is_excluded() patterns differently.)
436 * So we cannot mark this directory as SEEN (yet), since
437 * that will prevent process_tree() from revisiting this
438 * tree object with other pathname prefixes.
440 * Only _DO_SHOW the tree object the first time we visit
443 * We always show all tree objects. A future optimization
444 * may want to attempt to narrow this.
446 if (obj
->flags
& FILTER_SHOWN_BUT_REVISIT
)
448 obj
->flags
|= FILTER_SHOWN_BUT_REVISIT
;
452 assert(obj
->type
== OBJ_TREE
);
453 assert(filter_data
->nr
> 1);
455 frame
= &filter_data
->array_frame
[--filter_data
->nr
];
458 * Tell our parent directory if any of our children were
459 * provisionally omitted.
461 filter_data
->array_frame
[filter_data
->nr
- 1].child_prov_omit
|=
462 frame
->child_prov_omit
;
465 * If there are NO provisionally omitted child objects (ALL child
466 * objects in this folder were INCLUDED), then we can mark the
467 * folder as SEEN (so we will not have to revisit it again).
469 if (!frame
->child_prov_omit
)
470 return LOFR_MARK_SEEN
;
474 assert(obj
->type
== OBJ_BLOB
);
475 assert((obj
->flags
& SEEN
) == 0);
477 frame
= &filter_data
->array_frame
[filter_data
->nr
- 1];
480 match
= path_matches_pattern_list(pathname
, strlen(pathname
),
481 filename
, &dtype
, &filter_data
->pl
,
483 if (match
== UNDECIDED
)
484 match
= frame
->default_match
;
485 if (match
== MATCHED
) {
487 oidset_remove(omits
, &obj
->oid
);
488 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
492 * Provisionally omit it. We've already established that
493 * this pathname is not in the sparse-checkout specification
494 * with the CURRENT pathname, so we *WANT* to omit this blob.
496 * However, a pathname elsewhere in the tree may also
497 * reference this same blob, so we cannot reject it yet.
498 * Leave the LOFR_ bits unset so that if the blob appears
499 * again in the traversal, we will be asked again.
502 oidset_insert(omits
, &obj
->oid
);
505 * Remember that at least 1 blob in this tree was
506 * provisionally omitted. This prevents us from short
507 * cutting the tree in future iterations.
509 frame
->child_prov_omit
= 1;
515 static void filter_sparse_free(void *filter_data
)
517 struct filter_sparse_data
*d
= filter_data
;
518 clear_pattern_list(&d
->pl
);
519 free(d
->array_frame
);
523 static void filter_sparse_oid__init(
524 struct list_objects_filter_options
*filter_options
,
525 struct filter
*filter
)
527 struct filter_sparse_data
*d
= xcalloc(1, sizeof(*d
));
528 struct object_context oc
;
529 struct object_id sparse_oid
;
531 if (get_oid_with_context(the_repository
,
532 filter_options
->sparse_oid_name
,
533 GET_OID_BLOB
, &sparse_oid
, &oc
))
534 die(_("unable to access sparse blob in '%s'"),
535 filter_options
->sparse_oid_name
);
536 if (add_patterns_from_blob_to_list(&sparse_oid
, "", 0, &d
->pl
) < 0)
537 die(_("unable to parse sparse filter data in %s"),
538 oid_to_hex(&sparse_oid
));
540 ALLOC_GROW(d
->array_frame
, d
->nr
+ 1, d
->alloc
);
541 d
->array_frame
[d
->nr
].default_match
= 0; /* default to include */
542 d
->array_frame
[d
->nr
].child_prov_omit
= 0;
545 filter
->filter_data
= d
;
546 filter
->filter_object_fn
= filter_sparse
;
547 filter
->free_fn
= filter_sparse_free
;
551 * A filter for list-objects to omit large blobs.
552 * And to OPTIONALLY collect a list of the omitted OIDs.
554 struct filter_object_type_data
{
555 enum object_type object_type
;
558 static enum list_objects_filter_result
filter_object_type(
559 struct repository
*r UNUSED
,
560 enum list_objects_filter_situation filter_situation
,
562 const char *pathname UNUSED
,
563 const char *filename UNUSED
,
564 struct oidset
*omits UNUSED
,
567 struct filter_object_type_data
*filter_data
= filter_data_
;
569 switch (filter_situation
) {
571 BUG("unknown filter_situation: %d", filter_situation
);
574 assert(obj
->type
== OBJ_TAG
);
575 if (filter_data
->object_type
== OBJ_TAG
)
576 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
577 return LOFR_MARK_SEEN
;
580 assert(obj
->type
== OBJ_COMMIT
);
581 if (filter_data
->object_type
== OBJ_COMMIT
)
582 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
583 return LOFR_MARK_SEEN
;
585 case LOFS_BEGIN_TREE
:
586 assert(obj
->type
== OBJ_TREE
);
589 * If we only want to show commits or tags, then there is no
590 * need to walk down trees.
592 if (filter_data
->object_type
== OBJ_COMMIT
||
593 filter_data
->object_type
== OBJ_TAG
)
594 return LOFR_SKIP_TREE
;
596 if (filter_data
->object_type
== OBJ_TREE
)
597 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
599 return LOFR_MARK_SEEN
;
602 assert(obj
->type
== OBJ_BLOB
);
604 if (filter_data
->object_type
== OBJ_BLOB
)
605 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
606 return LOFR_MARK_SEEN
;
613 static void filter_object_type__init(
614 struct list_objects_filter_options
*filter_options
,
615 struct filter
*filter
)
617 struct filter_object_type_data
*d
= xcalloc(1, sizeof(*d
));
618 d
->object_type
= filter_options
->object_type
;
620 filter
->filter_data
= d
;
621 filter
->filter_object_fn
= filter_object_type
;
622 filter
->free_fn
= free
;
625 /* A filter which only shows objects shown by all sub-filters. */
626 struct combine_filter_data
{
627 struct subfilter
*sub
;
631 static enum list_objects_filter_result
process_subfilter(
632 struct repository
*r
,
633 enum list_objects_filter_situation filter_situation
,
635 const char *pathname
,
636 const char *filename
,
637 struct subfilter
*sub
)
639 enum list_objects_filter_result result
;
642 * Check and update is_skipping_tree before oidset_contains so
643 * that is_skipping_tree gets unset even when the object is
644 * marked as seen. As of this writing, no filter uses
645 * LOFR_MARK_SEEN on trees that also uses LOFR_SKIP_TREE, so the
646 * ordering is only theoretically important. Be cautious if you
647 * change the order of the below checks and more filters have
650 if (sub
->is_skipping_tree
) {
651 if (filter_situation
== LOFS_END_TREE
&&
652 oideq(&obj
->oid
, &sub
->skip_tree
))
653 sub
->is_skipping_tree
= 0;
657 if (oidset_contains(&sub
->seen
, &obj
->oid
))
660 result
= list_objects_filter__filter_object(
661 r
, filter_situation
, obj
, pathname
, filename
, sub
->filter
);
663 if (result
& LOFR_MARK_SEEN
)
664 oidset_insert(&sub
->seen
, &obj
->oid
);
666 if (result
& LOFR_SKIP_TREE
) {
667 sub
->is_skipping_tree
= 1;
668 sub
->skip_tree
= obj
->oid
;
674 static enum list_objects_filter_result
filter_combine(
675 struct repository
*r
,
676 enum list_objects_filter_situation filter_situation
,
678 const char *pathname
,
679 const char *filename
,
680 struct oidset
*omits UNUSED
,
683 struct combine_filter_data
*d
= filter_data
;
684 enum list_objects_filter_result combined_result
=
685 LOFR_DO_SHOW
| LOFR_MARK_SEEN
| LOFR_SKIP_TREE
;
688 for (sub
= 0; sub
< d
->nr
; sub
++) {
689 enum list_objects_filter_result sub_result
= process_subfilter(
690 r
, filter_situation
, obj
, pathname
, filename
,
692 if (!(sub_result
& LOFR_DO_SHOW
))
693 combined_result
&= ~LOFR_DO_SHOW
;
694 if (!(sub_result
& LOFR_MARK_SEEN
))
695 combined_result
&= ~LOFR_MARK_SEEN
;
696 if (!d
->sub
[sub
].is_skipping_tree
)
697 combined_result
&= ~LOFR_SKIP_TREE
;
700 return combined_result
;
703 static void filter_combine__free(void *filter_data
)
705 struct combine_filter_data
*d
= filter_data
;
707 for (sub
= 0; sub
< d
->nr
; sub
++) {
708 list_objects_filter__free(d
->sub
[sub
].filter
);
709 oidset_clear(&d
->sub
[sub
].seen
);
710 if (d
->sub
[sub
].omits
.set
.size
)
711 BUG("expected oidset to be cleared already");
717 static void add_all(struct oidset
*dest
, struct oidset
*src
) {
718 struct oidset_iter iter
;
719 struct object_id
*src_oid
;
721 oidset_iter_init(src
, &iter
);
722 while ((src_oid
= oidset_iter_next(&iter
)) != NULL
)
723 oidset_insert(dest
, src_oid
);
726 static void filter_combine__finalize_omits(
727 struct oidset
*omits
,
730 struct combine_filter_data
*d
= filter_data
;
733 for (sub
= 0; sub
< d
->nr
; sub
++) {
734 add_all(omits
, &d
->sub
[sub
].omits
);
735 oidset_clear(&d
->sub
[sub
].omits
);
739 static void filter_combine__init(
740 struct list_objects_filter_options
*filter_options
,
741 struct filter
* filter
)
743 struct combine_filter_data
*d
= xcalloc(1, sizeof(*d
));
746 d
->nr
= filter_options
->sub_nr
;
747 CALLOC_ARRAY(d
->sub
, d
->nr
);
748 for (sub
= 0; sub
< d
->nr
; sub
++)
749 d
->sub
[sub
].filter
= list_objects_filter__init(
750 filter
->omits
? &d
->sub
[sub
].omits
: NULL
,
751 &filter_options
->sub
[sub
]);
753 filter
->filter_data
= d
;
754 filter
->filter_object_fn
= filter_combine
;
755 filter
->free_fn
= filter_combine__free
;
756 filter
->finalize_omits_fn
= filter_combine__finalize_omits
;
759 typedef void (*filter_init_fn
)(
760 struct list_objects_filter_options
*filter_options
,
761 struct filter
*filter
);
764 * Must match "enum list_objects_filter_choice".
766 static filter_init_fn s_filters
[] = {
768 filter_blobs_none__init
,
769 filter_blobs_limit__init
,
770 filter_trees_depth__init
,
771 filter_sparse_oid__init
,
772 filter_object_type__init
,
773 filter_combine__init
,
776 struct filter
*list_objects_filter__init(
777 struct oidset
*omitted
,
778 struct list_objects_filter_options
*filter_options
)
780 struct filter
*filter
;
781 filter_init_fn init_fn
;
783 assert((sizeof(s_filters
) / sizeof(s_filters
[0])) == LOFC__COUNT
);
788 if (filter_options
->choice
>= LOFC__COUNT
)
789 BUG("invalid list-objects filter choice: %d",
790 filter_options
->choice
);
792 init_fn
= s_filters
[filter_options
->choice
];
796 CALLOC_ARRAY(filter
, 1);
797 filter
->omits
= omitted
;
798 init_fn(filter_options
, filter
);
802 enum list_objects_filter_result
list_objects_filter__filter_object(
803 struct repository
*r
,
804 enum list_objects_filter_situation filter_situation
,
806 const char *pathname
,
807 const char *filename
,
808 struct filter
*filter
)
810 if (filter
&& (obj
->flags
& NOT_USER_GIVEN
))
811 return filter
->filter_object_fn(r
, filter_situation
, obj
,
814 filter
->filter_data
);
816 * No filter is active or user gave object explicitly. In this case,
817 * always show the object (except when LOFS_END_TREE, since this tree
818 * had already been shown when LOFS_BEGIN_TREE).
820 if (filter_situation
== LOFS_END_TREE
)
822 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
825 void list_objects_filter__free(struct filter
*filter
)
829 if (filter
->finalize_omits_fn
&& filter
->omits
)
830 filter
->finalize_omits_fn(filter
->omits
, filter
->filter_data
);
831 filter
->free_fn(filter
->filter_data
);