TEST_BUILTINS_OBJS += test-mktemp.o
TEST_BUILTINS_OBJS += test-oid-array.o
TEST_BUILTINS_OBJS += test-oidmap.o
+TEST_BUILTINS_OBJS += test-oidtree.o
TEST_BUILTINS_OBJS += test-online-cpus.o
TEST_BUILTINS_OBJS += test-parse-options.o
TEST_BUILTINS_OBJS += test-parse-pathspec-file.o
LIB_OBJS += bulk-checkin.o
LIB_OBJS += bundle.o
LIB_OBJS += cache-tree.o
+LIB_OBJS += cbtree.o
LIB_OBJS += chdir-notify.o
LIB_OBJS += checkout.o
LIB_OBJS += chunk-format.o
LIB_OBJS += oid-array.o
LIB_OBJS += oidmap.o
LIB_OBJS += oidset.o
+LIB_OBJS += oidtree.o
LIB_OBJS += pack-bitmap-write.o
LIB_OBJS += pack-bitmap.o
LIB_OBJS += pack-check.o
--- /dev/null
+/*
+ * crit-bit tree implementation, does no allocations internally
+ * For more information on crit-bit trees: https://cr.yp.to/critbit.html
+ * Based on Adam Langley's adaptation of Dan Bernstein's public domain code
+ * git clone https://github.com/agl/critbit.git
+ */
+#include "cbtree.h"
+
+static struct cb_node *cb_node_of(const void *p)
+{
+ return (struct cb_node *)((uintptr_t)p - 1);
+}
+
+/* locate the best match, does not do a final comparision */
+static struct cb_node *cb_internal_best_match(struct cb_node *p,
+ const uint8_t *k, size_t klen)
+{
+ while (1 & (uintptr_t)p) {
+ struct cb_node *q = cb_node_of(p);
+ uint8_t c = q->byte < klen ? k[q->byte] : 0;
+ size_t direction = (1 + (q->otherbits | c)) >> 8;
+
+ p = q->child[direction];
+ }
+ return p;
+}
+
+/* returns NULL if successful, existing cb_node if duplicate */
+struct cb_node *cb_insert(struct cb_tree *t, struct cb_node *node, size_t klen)
+{
+ size_t newbyte, newotherbits;
+ uint8_t c;
+ int newdirection;
+ struct cb_node **wherep, *p;
+
+ assert(!((uintptr_t)node & 1)); /* allocations must be aligned */
+
+ if (!t->root) { /* insert into empty tree */
+ t->root = node;
+ return NULL; /* success */
+ }
+
+ /* see if a node already exists */
+ p = cb_internal_best_match(t->root, node->k, klen);
+
+ /* find first differing byte */
+ for (newbyte = 0; newbyte < klen; newbyte++) {
+ if (p->k[newbyte] != node->k[newbyte])
+ goto different_byte_found;
+ }
+ return p; /* element exists, let user deal with it */
+
+different_byte_found:
+ newotherbits = p->k[newbyte] ^ node->k[newbyte];
+ newotherbits |= newotherbits >> 1;
+ newotherbits |= newotherbits >> 2;
+ newotherbits |= newotherbits >> 4;
+ newotherbits = (newotherbits & ~(newotherbits >> 1)) ^ 255;
+ c = p->k[newbyte];
+ newdirection = (1 + (newotherbits | c)) >> 8;
+
+ node->byte = newbyte;
+ node->otherbits = newotherbits;
+ node->child[1 - newdirection] = node;
+
+ /* find a place to insert it */
+ wherep = &t->root;
+ for (;;) {
+ struct cb_node *q;
+ size_t direction;
+
+ p = *wherep;
+ if (!(1 & (uintptr_t)p))
+ break;
+ q = cb_node_of(p);
+ if (q->byte > newbyte)
+ break;
+ if (q->byte == newbyte && q->otherbits > newotherbits)
+ break;
+ c = q->byte < klen ? node->k[q->byte] : 0;
+ direction = (1 + (q->otherbits | c)) >> 8;
+ wherep = q->child + direction;
+ }
+
+ node->child[newdirection] = *wherep;
+ *wherep = (struct cb_node *)(1 + (uintptr_t)node);
+
+ return NULL; /* success */
+}
+
+struct cb_node *cb_lookup(struct cb_tree *t, const uint8_t *k, size_t klen)
+{
+ struct cb_node *p = cb_internal_best_match(t->root, k, klen);
+
+ return p && !memcmp(p->k, k, klen) ? p : NULL;
+}
+
+struct cb_node *cb_unlink(struct cb_tree *t, const uint8_t *k, size_t klen)
+{
+ struct cb_node **wherep = &t->root;
+ struct cb_node **whereq = NULL;
+ struct cb_node *q = NULL;
+ size_t direction = 0;
+ uint8_t c;
+ struct cb_node *p = t->root;
+
+ if (!p) return NULL; /* empty tree, nothing to delete */
+
+ /* traverse to find best match, keeping link to parent */
+ while (1 & (uintptr_t)p) {
+ whereq = wherep;
+ q = cb_node_of(p);
+ c = q->byte < klen ? k[q->byte] : 0;
+ direction = (1 + (q->otherbits | c)) >> 8;
+ wherep = q->child + direction;
+ p = *wherep;
+ }
+
+ if (memcmp(p->k, k, klen))
+ return NULL; /* no match, nothing unlinked */
+
+ /* found an exact match */
+ if (whereq) /* update parent */
+ *whereq = q->child[1 - direction];
+ else
+ t->root = NULL;
+ return p;
+}
+
+static enum cb_next cb_descend(struct cb_node *p, cb_iter fn, void *arg)
+{
+ if (1 & (uintptr_t)p) {
+ struct cb_node *q = cb_node_of(p);
+ enum cb_next n = cb_descend(q->child[0], fn, arg);
+
+ return n == CB_BREAK ? n : cb_descend(q->child[1], fn, arg);
+ } else {
+ return fn(p, arg);
+ }
+}
+
+void cb_each(struct cb_tree *t, const uint8_t *kpfx, size_t klen,
+ cb_iter fn, void *arg)
+{
+ struct cb_node *p = t->root;
+ struct cb_node *top = p;
+ size_t i = 0;
+
+ if (!p) return; /* empty tree */
+
+ /* Walk tree, maintaining top pointer */
+ while (1 & (uintptr_t)p) {
+ struct cb_node *q = cb_node_of(p);
+ uint8_t c = q->byte < klen ? kpfx[q->byte] : 0;
+ size_t direction = (1 + (q->otherbits | c)) >> 8;
+
+ p = q->child[direction];
+ if (q->byte < klen)
+ top = p;
+ }
+
+ for (i = 0; i < klen; i++) {
+ if (p->k[i] != kpfx[i])
+ return; /* "best" match failed */
+ }
+ cb_descend(top, fn, arg);
+}
--- /dev/null
+/*
+ * crit-bit tree implementation, does no allocations internally
+ * For more information on crit-bit trees: https://cr.yp.to/critbit.html
+ * Based on Adam Langley's adaptation of Dan Bernstein's public domain code
+ * git clone https://github.com/agl/critbit.git
+ *
+ * This is adapted to store arbitrary data (not just NUL-terminated C strings
+ * and allocates no memory internally. The user needs to allocate
+ * "struct cb_node" and fill cb_node.k[] with arbitrary match data
+ * for memcmp.
+ * If "klen" is variable, then it should be embedded into "c_node.k[]"
+ * Recursion is bound by the maximum value of "klen" used.
+ */
+#ifndef CBTREE_H
+#define CBTREE_H
+
+#include "git-compat-util.h"
+
+struct cb_node;
+struct cb_node {
+ struct cb_node *child[2];
+ /*
+ * n.b. uint32_t for `byte' is excessive for OIDs,
+ * we may consider shorter variants if nothing else gets stored.
+ */
+ uint32_t byte;
+ uint8_t otherbits;
+ uint8_t k[FLEX_ARRAY]; /* arbitrary data */
+};
+
+struct cb_tree {
+ struct cb_node *root;
+};
+
+enum cb_next {
+ CB_CONTINUE = 0,
+ CB_BREAK = 1
+};
+
+#define CBTREE_INIT { .root = NULL }
+
+static inline void cb_init(struct cb_tree *t)
+{
+ t->root = NULL;
+}
+
+struct cb_node *cb_lookup(struct cb_tree *, const uint8_t *k, size_t klen);
+struct cb_node *cb_insert(struct cb_tree *, struct cb_node *, size_t klen);
+struct cb_node *cb_unlink(struct cb_tree *t, const uint8_t *k, size_t klen);
+
+typedef enum cb_next (*cb_iter)(struct cb_node *, void *arg);
+
+void cb_each(struct cb_tree *, const uint8_t *kpfx, size_t klen,
+ cb_iter, void *arg);
+
+#endif /* CBTREE_H */
prepare_alt_odb(r);
for (odb = r->objects->odb; odb; odb = odb->next) {
- if (oid_array_lookup(odb_loose_cache(odb, oid), oid) >= 0)
+ if (oidtree_contains(odb_loose_cache(odb, oid), oid))
return 1;
}
return 0;
static int append_loose_object(const struct object_id *oid, const char *path,
void *data)
{
- oid_array_append(data, oid);
+ oidtree_insert(data, oid);
return 0;
}
-struct oid_array *odb_loose_cache(struct object_directory *odb,
+struct oidtree *odb_loose_cache(struct object_directory *odb,
const struct object_id *oid)
{
int subdir_nr = oid->hash[0];
bitmap = &odb->loose_objects_subdir_seen[word_index];
if (*bitmap & mask)
- return &odb->loose_objects_cache[subdir_nr];
-
+ return odb->loose_objects_cache;
+ if (!odb->loose_objects_cache) {
+ ALLOC_ARRAY(odb->loose_objects_cache, 1);
+ oidtree_init(odb->loose_objects_cache);
+ }
strbuf_addstr(&buf, odb->path);
for_each_file_in_obj_subdir(subdir_nr, &buf,
append_loose_object,
NULL, NULL,
- &odb->loose_objects_cache[subdir_nr]);
+ odb->loose_objects_cache);
*bitmap |= mask;
strbuf_release(&buf);
- return &odb->loose_objects_cache[subdir_nr];
+ return odb->loose_objects_cache;
}
void odb_clear_loose_cache(struct object_directory *odb)
{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(odb->loose_objects_cache); i++)
- oid_array_clear(&odb->loose_objects_cache[i]);
+ oidtree_clear(odb->loose_objects_cache);
+ FREE_AND_NULL(odb->loose_objects_cache);
memset(&odb->loose_objects_subdir_seen, 0,
sizeof(odb->loose_objects_subdir_seen));
}
static int match_hash(unsigned, const unsigned char *, const unsigned char *);
+static enum cb_next match_prefix(const struct object_id *oid, void *arg)
+{
+ struct disambiguate_state *ds = arg;
+ /* no need to call match_hash, oidtree_each did prefix match */
+ update_candidates(ds, oid);
+ return ds->ambiguous ? CB_BREAK : CB_CONTINUE;
+}
+
static void find_short_object_filename(struct disambiguate_state *ds)
{
struct object_directory *odb;
- for (odb = ds->repo->objects->odb; odb && !ds->ambiguous; odb = odb->next) {
- int pos;
- struct oid_array *loose_objects;
-
- loose_objects = odb_loose_cache(odb, &ds->bin_pfx);
- pos = oid_array_lookup(loose_objects, &ds->bin_pfx);
- if (pos < 0)
- pos = -1 - pos;
- while (!ds->ambiguous && pos < loose_objects->nr) {
- const struct object_id *oid;
- oid = loose_objects->oid + pos;
- if (!match_hash(ds->len, ds->bin_pfx.hash, oid->hash))
- break;
- update_candidates(ds, oid);
- pos++;
- }
- }
+ for (odb = ds->repo->objects->odb; odb && !ds->ambiguous; odb = odb->next)
+ oidtree_each(odb_loose_cache(odb, &ds->bin_pfx),
+ &ds->bin_pfx, ds->len, match_prefix, ds);
}
static int match_hash(unsigned len, const unsigned char *a, const unsigned char *b)
#include "thread-utils.h"
#include "khash.h"
#include "dir.h"
+#include "oidtree.h"
struct object_directory {
struct object_directory *next;
* Be sure to call odb_load_loose_cache() before using.
*/
uint32_t loose_objects_subdir_seen[8]; /* 256 bits */
- struct oid_array loose_objects_cache[256];
+ struct oidtree *loose_objects_cache;
/*
* Path to the alternative object store. If this is a relative path,
* Populate and return the loose object cache array corresponding to the
* given object ID.
*/
-struct oid_array *odb_loose_cache(struct object_directory *odb,
+struct oidtree *odb_loose_cache(struct object_directory *odb,
const struct object_id *oid);
/* Empty the loose object cache for the specified object directory. */
--- /dev/null
+/*
+ * A wrapper around cbtree which stores oids
+ * May be used to replace oid-array for prefix (abbreviation) matches
+ */
+#include "oidtree.h"
+#include "alloc.h"
+#include "hash.h"
+
+struct oidtree_node {
+ /* n.k[] is used to store "struct object_id" */
+ struct cb_node n;
+};
+
+struct oidtree_iter_data {
+ oidtree_iter fn;
+ void *arg;
+ size_t *last_nibble_at;
+ int algo;
+ uint8_t last_byte;
+};
+
+void oidtree_init(struct oidtree *ot)
+{
+ cb_init(&ot->tree);
+ mem_pool_init(&ot->mem_pool, 0);
+}
+
+void oidtree_clear(struct oidtree *ot)
+{
+ if (ot) {
+ mem_pool_discard(&ot->mem_pool, 0);
+ oidtree_init(ot);
+ }
+}
+
+void oidtree_insert(struct oidtree *ot, const struct object_id *oid)
+{
+ struct oidtree_node *on;
+
+ if (!oid->algo)
+ BUG("oidtree_insert requires oid->algo");
+
+ on = mem_pool_alloc(&ot->mem_pool, sizeof(*on) + sizeof(*oid));
+ oidcpy_with_padding((struct object_id *)on->n.k, oid);
+
+ /*
+ * n.b. Current callers won't get us duplicates, here. If a
+ * future caller causes duplicates, there'll be a a small leak
+ * that won't be freed until oidtree_clear. Currently it's not
+ * worth maintaining a free list
+ */
+ cb_insert(&ot->tree, &on->n, sizeof(*oid));
+}
+
+
+int oidtree_contains(struct oidtree *ot, const struct object_id *oid)
+{
+ struct object_id k;
+ size_t klen = sizeof(k);
+
+ oidcpy_with_padding(&k, oid);
+
+ if (oid->algo == GIT_HASH_UNKNOWN)
+ klen -= sizeof(oid->algo);
+
+ /* cb_lookup relies on memcmp on the struct, so order matters: */
+ klen += BUILD_ASSERT_OR_ZERO(offsetof(struct object_id, hash) <
+ offsetof(struct object_id, algo));
+
+ return cb_lookup(&ot->tree, (const uint8_t *)&k, klen) ? 1 : 0;
+}
+
+static enum cb_next iter(struct cb_node *n, void *arg)
+{
+ struct oidtree_iter_data *x = arg;
+ const struct object_id *oid = (const struct object_id *)n->k;
+
+ if (x->algo != GIT_HASH_UNKNOWN && x->algo != oid->algo)
+ return CB_CONTINUE;
+
+ if (x->last_nibble_at) {
+ if ((oid->hash[*x->last_nibble_at] ^ x->last_byte) & 0xf0)
+ return CB_CONTINUE;
+ }
+
+ return x->fn(oid, x->arg);
+}
+
+void oidtree_each(struct oidtree *ot, const struct object_id *oid,
+ size_t oidhexsz, oidtree_iter fn, void *arg)
+{
+ size_t klen = oidhexsz / 2;
+ struct oidtree_iter_data x = { 0 };
+ assert(oidhexsz <= GIT_MAX_HEXSZ);
+
+ x.fn = fn;
+ x.arg = arg;
+ x.algo = oid->algo;
+ if (oidhexsz & 1) {
+ x.last_byte = oid->hash[klen];
+ x.last_nibble_at = &klen;
+ }
+ cb_each(&ot->tree, (const uint8_t *)oid, klen, iter, &x);
+}
--- /dev/null
+#ifndef OIDTREE_H
+#define OIDTREE_H
+
+#include "cbtree.h"
+#include "hash.h"
+#include "mem-pool.h"
+
+struct oidtree {
+ struct cb_tree tree;
+ struct mem_pool mem_pool;
+};
+
+void oidtree_init(struct oidtree *);
+void oidtree_clear(struct oidtree *);
+void oidtree_insert(struct oidtree *, const struct object_id *);
+int oidtree_contains(struct oidtree *, const struct object_id *);
+
+typedef enum cb_next (*oidtree_iter)(const struct object_id *, void *data);
+void oidtree_each(struct oidtree *, const struct object_id *,
+ size_t oidhexsz, oidtree_iter, void *data);
+
+#endif /* OIDTREE_H */
--- /dev/null
+#include "test-tool.h"
+#include "cache.h"
+#include "oidtree.h"
+
+static enum cb_next print_oid(const struct object_id *oid, void *data)
+{
+ puts(oid_to_hex(oid));
+ return CB_CONTINUE;
+}
+
+int cmd__oidtree(int argc, const char **argv)
+{
+ struct oidtree ot;
+ struct strbuf line = STRBUF_INIT;
+ int nongit_ok;
+ int algo = GIT_HASH_UNKNOWN;
+
+ oidtree_init(&ot);
+ setup_git_directory_gently(&nongit_ok);
+
+ while (strbuf_getline(&line, stdin) != EOF) {
+ const char *arg;
+ struct object_id oid;
+
+ if (skip_prefix(line.buf, "insert ", &arg)) {
+ if (get_oid_hex_any(arg, &oid) == GIT_HASH_UNKNOWN)
+ die("insert not a hexadecimal oid: %s", arg);
+ algo = oid.algo;
+ oidtree_insert(&ot, &oid);
+ } else if (skip_prefix(line.buf, "contains ", &arg)) {
+ if (get_oid_hex(arg, &oid))
+ die("contains not a hexadecimal oid: %s", arg);
+ printf("%d\n", oidtree_contains(&ot, &oid));
+ } else if (skip_prefix(line.buf, "each ", &arg)) {
+ char buf[GIT_MAX_HEXSZ + 1] = { '0' };
+ memset(&oid, 0, sizeof(oid));
+ memcpy(buf, arg, strlen(arg));
+ buf[hash_algos[algo].hexsz] = '\0';
+ get_oid_hex_any(buf, &oid);
+ oid.algo = algo;
+ oidtree_each(&ot, &oid, strlen(arg), print_oid, NULL);
+ } else if (!strcmp(line.buf, "clear")) {
+ oidtree_clear(&ot);
+ } else {
+ die("unknown command: %s", line.buf);
+ }
+ }
+ return 0;
+}
{ "mktemp", cmd__mktemp },
{ "oid-array", cmd__oid_array },
{ "oidmap", cmd__oidmap },
+ { "oidtree", cmd__oidtree },
{ "online-cpus", cmd__online_cpus },
{ "parse-options", cmd__parse_options },
{ "parse-pathspec-file", cmd__parse_pathspec_file },
int cmd__mergesort(int argc, const char **argv);
int cmd__mktemp(int argc, const char **argv);
int cmd__oidmap(int argc, const char **argv);
+int cmd__oidtree(int argc, const char **argv);
int cmd__online_cpus(int argc, const char **argv);
int cmd__parse_options(int argc, const char **argv);
int cmd__parse_pathspec_file(int argc, const char** argv);
--- /dev/null
+#!/bin/sh
+
+test_description='basic tests for the oidtree implementation'
+. ./test-lib.sh
+
+maxhexsz=$(test_oid hexsz)
+echoid () {
+ prefix="${1:+$1 }"
+ shift
+ while test $# -gt 0
+ do
+ shortoid="$1"
+ shift
+ difference=$(($maxhexsz - ${#shortoid}))
+ printf "%s%s%0${difference}d\\n" "$prefix" "$shortoid" "0"
+ done
+}
+
+test_expect_success 'oidtree insert and contains' '
+ cat >expect <<-\EOF &&
+ 0
+ 0
+ 0
+ 1
+ 1
+ 0
+ EOF
+ {
+ echoid insert 444 1 2 3 4 5 a b c d e &&
+ echoid contains 44 441 440 444 4440 4444
+ echo clear
+ } | test-tool oidtree >actual &&
+ test_cmp expect actual
+'
+
+test_expect_success 'oidtree each' '
+ echoid "" 123 321 321 >expect &&
+ {
+ echoid insert f 9 8 123 321 a b c d e
+ echo each 12300
+ echo each 3211
+ echo each 3210
+ echo each 32100
+ echo clear
+ } | test-tool oidtree >actual &&
+ test_cmp expect actual
+'
+
+test_done