]> git.ipfire.org Git - thirdparty/git.git/blobdiff - refs.c
refs: speed up is_refname_available
[thirdparty/git.git] / refs.c
diff --git a/refs.c b/refs.c
index 84b9070b8dca5eb94fbe9b7d7c463a0ea2af92eb..eb2262ac246f553cd98e943a626233b7e9c55c7e 100644 (file)
--- a/refs.c
+++ b/refs.c
@@ -7,27 +7,21 @@
 
 /*
  * How to handle various characters in refnames:
- * This table is used by both the SIMD and non-SIMD code.  It has
- * some cases that are only useful for the SIMD; these are handled
- * equivalently to the listed disposition in the non-SIMD code.
  * 0: An acceptable character for refs
- * 1: @, look for a following { to reject @{ in refs (SIMD or = 0)
- * 2: \0: End-of-component and string
- * 3: /: End-of-component (SIMD or = 2)
- * 4: ., look for a preceding . to reject .. in refs
- * 5: {, look for a preceding @ to reject @{ in refs
- * 6: *, usually a bad character except, once as a wildcard (SIMD or = 7)
- * 7: A bad character except * (see check_refname_component below)
+ * 1: End-of-component
+ * 2: ., look for a preceding . to reject .. in refs
+ * 3: {, look for a preceding @ to reject @{ in refs
+ * 4: A bad character: ASCII control characters, "~", "^", ":" or SP
  */
 static unsigned char refname_disposition[256] = {
-       2, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-       7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-       7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 4, 3,
-       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 7,
-       1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 7, 0, 7, 0,
+       1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+       4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 2, 1,
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 4,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 7, 7
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 4, 0,
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 4, 4
 };
 
 /*
@@ -39,9 +33,8 @@ static unsigned char refname_disposition[256] = {
  * - any path component of it begins with ".", or
  * - it has double dots "..", or
  * - it has ASCII control character, "~", "^", ":" or SP, anywhere, or
- * - it has pattern-matching notation "*", "?", "[", anywhere, or
- * - it ends with a "/", or
- * - it ends with ".lock", or
+ * - it ends with a "/".
+ * - it ends with ".lock"
  * - it contains a "\" (backslash)
  */
 static int check_refname_component(const char *refname, int flags)
@@ -53,19 +46,17 @@ static int check_refname_component(const char *refname, int flags)
                int ch = *cp & 255;
                unsigned char disp = refname_disposition[ch];
                switch (disp) {
-               case 2: /* fall-through */
-               case 3:
+               case 1:
                        goto out;
-               case 4:
+               case 2:
                        if (last == '.')
                                return -1; /* Refname contains "..". */
                        break;
-               case 5:
+               case 3:
                        if (last == '@')
                                return -1; /* Refname contains "@{". */
                        break;
-               case 6: /* fall-through */
-               case 7:
+               case 4:
                        return -1;
                }
                last = ch;
@@ -88,7 +79,7 @@ out:
        return cp - refname;
 }
 
-static int check_refname_format_bytewise(const char *refname, int flags)
+int check_refname_format(const char *refname, int flags)
 {
        int component_len, component_count = 0;
 
@@ -124,196 +115,6 @@ static int check_refname_format_bytewise(const char *refname, int flags)
        return 0;
 }
 
-#if defined(__GNUC__) && defined(__x86_64__)
-#define SSE_VECTOR_BYTES 16
-
-/* Vectorized version of check_refname_format. */
-int check_refname_format(const char *refname, int flags)
-{
-       const char *cp = refname;
-
-       const __m128i dot = _mm_set1_epi8('.');
-       const __m128i at = _mm_set1_epi8('@');
-       const __m128i curly = _mm_set1_epi8('{');
-       const __m128i slash = _mm_set1_epi8('/');
-       const __m128i zero = _mm_set1_epi8('\000');
-       const __m128i el = _mm_set1_epi8('l');
-
-       /* below '*', all characters are forbidden or rare */
-       const __m128i star_ub = _mm_set1_epi8('*' + 1);
-
-       const __m128i colon = _mm_set1_epi8(':');
-       const __m128i question = _mm_set1_epi8('?');
-
-       /* '['..'^' contains 4 characters: 3 forbidden and 1 rare */
-       const __m128i bracket_lb = _mm_set1_epi8('[' - 1);
-       const __m128i caret_ub = _mm_set1_epi8('^' + 1);
-
-       /* '~' and above are forbidden */
-       const __m128i tilde_lb = _mm_set1_epi8('~' - 1);
-
-       int component_count = 0;
-       int orig_flags = flags;
-
-       if (refname[0] == 0 || refname[0] == '/') {
-               /* entirely empty ref or initial ref component */
-               return -1;
-       }
-
-       /*
-        * Initial ref component of '.'; below we look for /. so we'll
-        * miss this.
-        */
-       if (refname[0] == '.') {
-               if (refname[1] == '/' || refname[1] == '\0')
-                       return -1;
-               if (!(flags & REFNAME_DOT_COMPONENT))
-                       return -1;
-       }
-       while(1) {
-               __m128i tmp, tmp1, result;
-               uint64_t mask;
-
-               if ((uintptr_t) cp % PAGE_SIZE > PAGE_SIZE - SSE_VECTOR_BYTES  - 1)
-                       /*
-                        * End-of-page; fall back to slow method for
-                        * this entire ref.
-                        */
-                       return check_refname_format_bytewise(refname, orig_flags);
-
-               tmp = _mm_loadu_si128((__m128i *)cp);
-               tmp1 = _mm_loadu_si128((__m128i *)(cp + 1));
-
-               /*
-                * This range (note the lt) contains some
-                * permissible-but-rare characters (including all
-                * characters >= 128), which we handle later.  It also
-                * includes \000.
-                */
-               result = _mm_cmplt_epi8(tmp, star_ub);
-
-               result = _mm_or_si128(result, _mm_cmpeq_epi8(tmp, question));
-               result = _mm_or_si128(result, _mm_cmpeq_epi8(tmp, colon));
-
-               /* This range contains the permissible ] as bycatch */
-               result = _mm_or_si128(result, _mm_and_si128(
-                                             _mm_cmpgt_epi8(tmp, bracket_lb),
-                                             _mm_cmplt_epi8(tmp, caret_ub)));
-
-               result = _mm_or_si128(result, _mm_cmpgt_epi8(tmp, tilde_lb));
-
-               /* .. */
-               result = _mm_or_si128(result, _mm_and_si128(
-                                             _mm_cmpeq_epi8(tmp, dot),
-                                             _mm_cmpeq_epi8(tmp1, dot)));
-               /* @{ */
-               result = _mm_or_si128(result, _mm_and_si128(
-                                             _mm_cmpeq_epi8(tmp, at),
-                                             _mm_cmpeq_epi8(tmp1, curly)));
-               /* // */
-               result = _mm_or_si128(result, _mm_and_si128(
-                                             _mm_cmpeq_epi8(tmp, slash),
-                                             _mm_cmpeq_epi8(tmp1, slash)));
-               /* trailing / */
-               result = _mm_or_si128(result, _mm_and_si128(
-                                             _mm_cmpeq_epi8(tmp, slash),
-                                             _mm_cmpeq_epi8(tmp1, zero)));
-               /* .l, beginning of .lock */
-               result = _mm_or_si128(result, _mm_and_si128(
-                                             _mm_cmpeq_epi8(tmp, dot),
-                                             _mm_cmpeq_epi8(tmp1, el)));
-               /*
-                * Even though /. is not necessarily an error, we flag
-                * it anyway. If we find it, we'll check if it's valid
-                * and if so we'll advance just past it.
-                */
-               result = _mm_or_si128(result, _mm_and_si128(
-                                             _mm_cmpeq_epi8(tmp, slash),
-                                             _mm_cmpeq_epi8(tmp1, dot)));
-
-               mask = _mm_movemask_epi8(result);
-               if (mask) {
-                       /*
-                        * We've found either end-of-string, or some
-                        * probably-bad character or substring.
-                        */
-                       int i = __builtin_ctz(mask);
-                       switch (refname_disposition[cp[i] & 255]) {
-                       case 0: /* fall-through */
-                       case 5:
-                               /*
-                                * bycatch: a good character that's in
-                                * one of the ranges of mostly-forbidden
-                                * characters
-                                */
-                               cp += i + 1;
-                               break;
-                       case 1:
-                               if (cp[i + 1] == '{')
-                                       return -1;
-                               cp += i + 1;
-                               break;
-                       case 2:
-                               if (!(flags & REFNAME_ALLOW_ONELEVEL)
-                                   && !component_count && !strchr(refname, '/'))
-                                       /* Refname has only one component. */
-                                       return -1;
-                               return 0;
-                       case 3:
-                               component_count ++;
-                               /*
-                                * Even if leading dots are allowed, don't
-                                * allow "." as a component (".." is
-                                * prevented by case 4 below).
-                                */
-                               if (cp[i + 1] == '.') {
-                                       if (cp[i + 2] == '\0')
-                                               return -1;
-                                       if (flags & REFNAME_DOT_COMPONENT) {
-                                               /* skip to just after the /. */
-                                               cp += i + 2;
-                                               break;
-                                       }
-                                       return -1;
-                               } else if (cp[i + 1] == '/' || cp[i + 1] == '\0')
-                                       return -1;
-                               break;
-                       case 4:
-                               if (cp[i + 1] == '.' || cp[i + 1] == '\0')
-                                       return -1;
-                               /* .lock as end-of-component or end-of-string */
-                               if ((!strncmp(cp + i, ".lock", 5))
-                                   && (cp[i + 5] == '/' || cp[i + 5] == 0))
-                                       return -1;
-                               cp += 1;
-                               break;
-                       case 6:
-                               if (((cp == refname + i) || cp[i - 1] == '/')
-                                   && (cp[i + 1] == '/' || cp[i + 1] == 0))
-                                       if (flags & REFNAME_REFSPEC_PATTERN) {
-                                               flags &= ~REFNAME_REFSPEC_PATTERN;
-                                               /* restart after the * */
-                                               cp += i + 1;
-                                               continue;
-                                       }
-                               /* fall-through */
-                       case 7:
-                               return -1;
-                       }
-               } else
-                       cp += SSE_VECTOR_BYTES;
-       }
-}
-
-#else
-
-int check_refname_format (const char *refname, int flags)
-{
-       return check_refname_format_bytewise(refname, flags);
-}
-
-#endif
-
 struct ref_entry;
 
 /*
@@ -978,37 +779,32 @@ static void prime_ref_dir(struct ref_dir *dir)
                        prime_ref_dir(get_ref_dir(entry));
        }
 }
-/*
- * Return true iff refname1 and refname2 conflict with each other.
- * Two reference names conflict if one of them exactly matches the
- * leading components of the other; e.g., "foo/bar" conflicts with
- * both "foo" and with "foo/bar/baz" but not with "foo/bar" or
- * "foo/barbados".
- */
-static int names_conflict(const char *refname1, const char *refname2)
+
+static int entry_matches(struct ref_entry *entry, const char *refname)
 {
-       for (; *refname1 && *refname1 == *refname2; refname1++, refname2++)
-               ;
-       return (*refname1 == '\0' && *refname2 == '/')
-               || (*refname1 == '/' && *refname2 == '\0');
+       return refname && !strcmp(entry->name, refname);
 }
 
-struct name_conflict_cb {
-       const char *refname;
-       const char *oldrefname;
-       const char *conflicting_refname;
+struct nonmatching_ref_data {
+       const char *skip;
+       struct ref_entry *found;
 };
 
-static int name_conflict_fn(struct ref_entry *entry, void *cb_data)
+static int nonmatching_ref_fn(struct ref_entry *entry, void *vdata)
 {
-       struct name_conflict_cb *data = (struct name_conflict_cb *)cb_data;
-       if (data->oldrefname && !strcmp(data->oldrefname, entry->name))
+       struct nonmatching_ref_data *data = vdata;
+
+       if (entry_matches(entry, data->skip))
                return 0;
-       if (names_conflict(data->refname, entry->name)) {
-               data->conflicting_refname = entry->name;
-               return 1;
-       }
-       return 0;
+
+       data->found = entry;
+       return 1;
+}
+
+static void report_refname_conflict(struct ref_entry *entry,
+                                   const char *refname)
+{
+       error("'%s' exists; cannot create '%s'", entry->name, refname);
 }
 
 /*
@@ -1017,21 +813,84 @@ static int name_conflict_fn(struct ref_entry *entry, void *cb_data)
  * oldrefname is non-NULL, ignore potential conflicts with oldrefname
  * (e.g., because oldrefname is scheduled for deletion in the same
  * operation).
+ *
+ * Two reference names conflict if one of them exactly matches the
+ * leading components of the other; e.g., "foo/bar" conflicts with
+ * both "foo" and with "foo/bar/baz" but not with "foo/bar" or
+ * "foo/barbados".
  */
 static int is_refname_available(const char *refname, const char *oldrefname,
                                struct ref_dir *dir)
 {
-       struct name_conflict_cb data;
-       data.refname = refname;
-       data.oldrefname = oldrefname;
-       data.conflicting_refname = NULL;
+       const char *slash;
+       size_t len;
+       int pos;
+       char *dirname;
 
-       sort_ref_dir(dir);
-       if (do_for_each_entry_in_dir(dir, 0, name_conflict_fn, &data)) {
-               error("'%s' exists; cannot create '%s'",
-                     data.conflicting_refname, refname);
+       for (slash = strchr(refname, '/'); slash; slash = strchr(slash + 1, '/')) {
+               /*
+                * We are still at a leading dir of the refname; we are
+                * looking for a conflict with a leaf entry.
+                *
+                * If we find one, we still must make sure it is
+                * not "oldrefname".
+                */
+               pos = search_ref_dir(dir, refname, slash - refname);
+               if (pos >= 0) {
+                       struct ref_entry *entry = dir->entries[pos];
+                       if (entry_matches(entry, oldrefname))
+                               return 1;
+                       report_refname_conflict(entry, refname);
+                       return 0;
+               }
+
+
+               /*
+                * Otherwise, we can try to continue our search with
+                * the next component; if we come up empty, we know
+                * there is nothing under this whole prefix.
+                */
+               pos = search_ref_dir(dir, refname, slash + 1 - refname);
+               if (pos < 0)
+                       return 1;
+
+               dir = get_ref_dir(dir->entries[pos]);
+       }
+
+       /*
+        * We are at the leaf of our refname; we want to
+        * make sure there are no directories which match it.
+        */
+       len = strlen(refname);
+       dirname = xmallocz(len + 1);
+       sprintf(dirname, "%s/", refname);
+       pos = search_ref_dir(dir, dirname, len + 1);
+       free(dirname);
+
+       if (pos >= 0) {
+               /*
+                * We found a directory named "refname". It is a
+                * problem iff it contains any ref that is not
+                * "oldrefname".
+                */
+               struct ref_entry *entry = dir->entries[pos];
+               struct ref_dir *dir = get_ref_dir(entry);
+               struct nonmatching_ref_data data;
+
+               data.skip = oldrefname;
+               sort_ref_dir(dir);
+               if (!do_for_each_entry_in_dir(dir, 0, nonmatching_ref_fn, &data))
+                       return 1;
+
+               report_refname_conflict(data.found, refname);
                return 0;
        }
+
+       /*
+        * There is no point in searching for another leaf
+        * node which matches it; such an entry would be the
+        * ref we are looking for, not a conflict.
+        */
        return 1;
 }