shorten_unambiguous_ref(): avoid sscanf()

author Jeff King <peff@peff.net>

Wed, 15 Feb 2023 15:16:21 +0000 (10:16 -0500)

committer Junio C Hamano <gitster@pobox.com>

Wed, 15 Feb 2023 16:53:17 +0000 (08:53 -0800)
author Jeff King <peff@peff.net>
Wed, 15 Feb 2023 15:16:21 +0000 (10:16 -0500)
committer Junio C Hamano <gitster@pobox.com>
Wed, 15 Feb 2023 16:53:17 +0000 (08:53 -0800)
diff --git a/refs.c b/refs.c

index 75fd74b06c636c131d500e4394b6f7c46e5c9bb7..a5e546244de257c82f2708e718a417163b432483 100644 (file)
--- a/refs.c
+++ b/refs.c
@@ -1310,53 +1310,62 @@ int update_ref(const char *msg, const char *refname,
                                old_oid, flags, onerr);
  }
  
-char *refs_shorten_unambiguous_ref(struct ref_store *refs,
-                                  const char *refname, int strict)
+/*
+ * Check that the string refname matches a rule of the form
+ * "{prefix}%.*s{suffix}". So "foo/bar/baz" would match the rule
+ * "foo/%.*s/baz", and return the string "bar".
+ */
+static const char *match_parse_rule(const char *refname, const char *rule,
+                                   size_t *len)
  {
-       int i;
-       static char **scanf_fmts;
-       char *short_name;
-       struct strbuf resolved_buf = STRBUF_INIT;
-
-       if (!scanf_fmts) {
-               /*
-                * Pre-generate scanf formats from ref_rev_parse_rules[].
-                * Generate a format suitable for scanf from a
-                * ref_rev_parse_rules rule by interpolating "%s" at the
-                * location of the "%.*s".
-                */
-               size_t total_len = 0;
-               size_t offset = 0;
+       /*
+        * Check that rule matches refname up to the first percent in the rule.
+        * We can bail immediately if not, but otherwise we leave "rule" at the
+        * %-placeholder, and "refname" at the start of the potential matched
+        * name.
+        */
+       while (*rule != '%') {
+               if (!*rule)
+                       BUG("rev-parse rule did not have percent");
+               if (*refname++ != *rule++)
+                       return NULL;
+       }
  
-               for (i = 0; i < NUM_REV_PARSE_RULES; i++)
-                       /* -2 for strlen("%.*s") - strlen("%s"); +1 for NUL */
-                       total_len += strlen(ref_rev_parse_rules[i]) - 2 + 1;
+       /*
+        * Check that our "%" is the expected placeholder. This assumes there
+        * are no other percents (placeholder or quoted) in the string, but
+        * that is sufficient for our rev-parse rules.
+        */
+       if (!skip_prefix(rule, "%.*s", &rule))
+               return NULL;
  
-               scanf_fmts = xmalloc(st_add(st_mult(sizeof(char *), NUM_REV_PARSE_RULES), total_len));
+       /*
+        * And now check that our suffix (if any) matches.
+        */
+       if (!strip_suffix(refname, rule, len))
+               return NULL;
  
-               offset = 0;
-               for (i = 0; i < NUM_REV_PARSE_RULES; i++) {
-                       assert(offset < total_len);
-                       scanf_fmts[i] = (char *)&scanf_fmts[NUM_REV_PARSE_RULES] + offset;
-                       offset += xsnprintf(scanf_fmts[i], total_len - offset,
-                                           ref_rev_parse_rules[i], 2, "%s") + 1;
-               }
-       }
+       return refname; /* len set by strip_suffix() */
+}
  
-       /* buffer for scanf result, at most refname must fit */
-       short_name = xstrdup(refname);
+char *refs_shorten_unambiguous_ref(struct ref_store *refs,
+                                  const char *refname, int strict)
+{
+       int i;
+       struct strbuf resolved_buf = STRBUF_INIT;
  
         /* skip first rule, it will always match */
         for (i = NUM_REV_PARSE_RULES - 1; i > 0 ; --i) {
                 int j;
                 int rules_to_fail = i;
+               const char *short_name;
                 size_t short_name_len;
  
-               if (1 != sscanf(refname, scanf_fmts[i], short_name))
+               short_name = match_parse_rule(refname, ref_rev_parse_rules[i],
+                                             &short_name_len);
+               if (!short_name)
                         continue;
  
-               short_name_len = strlen(short_name);
-
                 /*
                  * in strict mode, all (except the matched one) rules
                  * must fail to resolve to a valid non-ambiguous ref
@@ -1394,12 +1403,11 @@ char *refs_shorten_unambiguous_ref(struct ref_store *refs,
                  */
                 if (j == rules_to_fail) {
                         strbuf_release(&resolved_buf);
-                       return short_name;
+                       return xmemdupz(short_name, short_name_len);
                 }
         }
  
         strbuf_release(&resolved_buf);
-       free(short_name);
         return xstrdup(refname);
  }
  
diff --git a/t/t1401-symbolic-ref.sh b/t/t1401-symbolic-ref.sh

index d708acdb819536083cc0f8bc5e41d2196d90aee3..be23be30c768304d35d79cf6905d576293125801 100755 (executable)
--- a/t/t1401-symbolic-ref.sh
+++ b/t/t1401-symbolic-ref.sh
@@ -189,4 +189,38 @@ test_expect_success 'symbolic-ref pointing at another' '
         test_cmp expect actual
  '
  
+test_expect_success 'symbolic-ref --short handles complex utf8 case' '
+       name="测试-加-增加-加-增加" &&
+       git symbolic-ref TEST_SYMREF "refs/heads/$name" &&
+       # In the real world, we saw problems with this case only
+       # when the locale includes UTF-8. Set it here to try to make things as
+       # hard as possible for us to pass, but in practice we should do the
+       # right thing regardless (and of course some platforms may not even
+       # have this locale).
+       LC_ALL=en_US.UTF-8 git symbolic-ref --short TEST_SYMREF >actual &&
+       echo "$name" >expect &&
+       test_cmp expect actual
+'
+
+test_expect_success 'symbolic-ref --short handles name with suffix' '
+       git symbolic-ref TEST_SYMREF "refs/remotes/origin/HEAD" &&
+       git symbolic-ref --short TEST_SYMREF >actual &&
+       echo "origin" >expect &&
+       test_cmp expect actual
+'
+
+test_expect_success 'symbolic-ref --short handles almost-matching name' '
+       git symbolic-ref TEST_SYMREF "refs/headsXfoo" &&
+       git symbolic-ref --short TEST_SYMREF >actual &&
+       echo "headsXfoo" >expect &&
+       test_cmp expect actual
+'
+
+test_expect_success 'symbolic-ref --short handles name with percent' '
+       git symbolic-ref TEST_SYMREF "refs/heads/%foo" &&
+       git symbolic-ref --short TEST_SYMREF >actual &&
+       echo "%foo" >expect &&
+       test_cmp expect actual
+'
+
  test_done
author	Jeff King <peff@peff.net>
	Wed, 15 Feb 2023 15:16:21 +0000 (10:16 -0500)
committer	Junio C Hamano <gitster@pobox.com>
	Wed, 15 Feb 2023 16:53:17 +0000 (08:53 -0800)
refs.c		patch \| blob \| blame \| history
t/t1401-symbolic-ref.sh		patch \| blob \| blame \| history