]> git.ipfire.org Git - thirdparty/git.git/commitdiff
string-list: multi-delimiter `string_list_split_in_place()`
authorTaylor Blau <me@ttaylorr.com>
Mon, 24 Apr 2023 22:20:10 +0000 (18:20 -0400)
committerJunio C Hamano <gitster@pobox.com>
Mon, 24 Apr 2023 23:01:28 +0000 (16:01 -0700)
Enhance `string_list_split_in_place()` to accept multiple characters as
delimiters instead of a single character.

Instead of using `strchr(2)` to locate the first occurrence of the given
delimiter character, `string_list_split_in_place_multi()` uses
`strcspn(2)` to move past the initial segment of characters comprised of
any characters in the delimiting set.

When only a single delimiting character is provided, `strpbrk(2)` (which
is implemented with `strcspn(2)`) has equivalent performance to
`strchr(2)`. Modern `strcspn(2)` implementations treat an empty
delimiter or the singleton delimiter as a special case and fall back to
calling strchrnul(). Both glibc[1] and musl[2] implement `strcspn(2)`
this way.

This change is one step to removing `strtok(2)` from the tree. Note that
`string_list_split_in_place()` is not a strict replacement for
`strtok()`, since it will happily turn sequential delimiter characters
into empty entries in the resulting string_list. For example:

    string_list_split_in_place(&xs, "foo:;:bar:;:baz", ":;", -1)

would yield a string list of:

    ["foo", "", "", "bar", "", "", "baz"]

Callers that wish to emulate the behavior of strtok(2) more directly
should call `string_list_remove_empty_items()` after splitting.

To avoid regressions for the new multi-character delimter cases, update
t0063 in this patch as well.

[1]: https://sourceware.org/git/?p=glibc.git;a=blob;f=string/strcspn.c;hb=glibc-2.37#l35
[2]: https://git.musl-libc.org/cgit/musl/tree/src/string/strcspn.c?h=v1.2.3#n11

Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
builtin/gc.c
diff.c
notes.c
refs/packed-backend.c
string-list.c
string-list.h
t/helper/test-string-list.c
t/t0063-string-list.sh

index edd98d35a5a460a06bd812e455be201a0309d334..f68e9767043ecdbb66992718e13757cd667b2ab7 100644 (file)
@@ -1687,11 +1687,11 @@ static int get_schedule_cmd(const char **cmd, int *is_available)
        if (is_available)
                *is_available = 0;
 
-       string_list_split_in_place(&list, testing, ',', -1);
+       string_list_split_in_place(&list, testing, ",", -1);
        for_each_string_list_item(item, &list) {
                struct string_list pair = STRING_LIST_INIT_NODUP;
 
-               if (string_list_split_in_place(&pair, item->string, ':', 2) != 2)
+               if (string_list_split_in_place(&pair, item->string, ":", 2) != 2)
                        continue;
 
                if (!strcmp(*cmd, pair.items[0].string)) {
diff --git a/diff.c b/diff.c
index 78b0fdd8caa2664337e25572abc2e698197357cc..378a0248e18e01b0c3faef17241845b6dff1e1b6 100644 (file)
--- a/diff.c
+++ b/diff.c
@@ -134,7 +134,7 @@ static int parse_dirstat_params(struct diff_options *options, const char *params
        int i;
 
        if (*params_copy)
-               string_list_split_in_place(&params, params_copy, ',', -1);
+               string_list_split_in_place(&params, params_copy, ",", -1);
        for (i = 0; i < params.nr; i++) {
                const char *p = params.items[i].string;
                if (!strcmp(p, "changes")) {
diff --git a/notes.c b/notes.c
index 45fb7f22d1df011396ec86e56f1f1bd3bd1645b9..eee806f6262245fdd44fb24ff745c86b25d303cc 100644 (file)
--- a/notes.c
+++ b/notes.c
@@ -963,7 +963,7 @@ void string_list_add_refs_from_colon_sep(struct string_list *list,
        char *globs_copy = xstrdup(globs);
        int i;
 
-       string_list_split_in_place(&split, globs_copy, ':', -1);
+       string_list_split_in_place(&split, globs_copy, ":", -1);
        string_list_remove_empty_items(&split, 0);
 
        for (i = 0; i < split.nr; i++)
index 1eba1015dd5aa8378bf722b50a2b466774b43f0d..cc903baa7eeee21ce2d451c29fba17f0e6a92ca7 100644 (file)
@@ -650,7 +650,7 @@ static struct snapshot *create_snapshot(struct packed_ref_store *refs)
                                         snapshot->buf,
                                         snapshot->eof - snapshot->buf);
 
-               string_list_split_in_place(&traits, p, ' ', -1);
+               string_list_split_in_place(&traits, p, " ", -1);
 
                if (unsorted_string_list_has_string(&traits, "fully-peeled"))
                        snapshot->peeled = PEELED_FULLY;
index db473f273e1f77c45f2a5b0cf7a70b106bc19d02..5f5b60fe1c5d4afb2aa4a9a61b6a19cf6ed2ace0 100644 (file)
@@ -301,7 +301,7 @@ int string_list_split(struct string_list *list, const char *string,
 }
 
 int string_list_split_in_place(struct string_list *list, char *string,
-                              int delim, int maxsplit)
+                              const char *delim, int maxsplit)
 {
        int count = 0;
        char *p = string, *end;
@@ -315,7 +315,7 @@ int string_list_split_in_place(struct string_list *list, char *string,
                        string_list_append(list, p);
                        return count;
                }
-               end = strchr(p, delim);
+               end = strpbrk(p, delim);
                if (end) {
                        *end = '\0';
                        string_list_append(list, p);
index c7b0d5d0008efb906ba034c632adb69b53c4f545..77854840f7543545f16c4a3e510b46d4852aaeef 100644 (file)
@@ -270,5 +270,5 @@ int string_list_split(struct string_list *list, const char *string,
  * list->strdup_strings must *not* be set.
  */
 int string_list_split_in_place(struct string_list *list, char *string,
-                              int delim, int maxsplit);
+                              const char *delim, int maxsplit);
 #endif /* STRING_LIST_H */
index 2123dda85bf10033dcbf0d801028b3705e73a507..63df88575ca4c7391c5dc8e191c6b8581834dc8b 100644 (file)
@@ -62,7 +62,7 @@ int cmd__string_list(int argc, const char **argv)
                struct string_list list = STRING_LIST_INIT_NODUP;
                int i;
                char *s = xstrdup(argv[2]);
-               int delim = *argv[3];
+               const char *delim = argv[3];
                int maxsplit = atoi(argv[4]);
 
                i = string_list_split_in_place(&list, s, delim, maxsplit);
@@ -111,7 +111,7 @@ int cmd__string_list(int argc, const char **argv)
                 */
                if (sb.len && sb.buf[sb.len - 1] == '\n')
                        strbuf_setlen(&sb, sb.len - 1);
-               string_list_split_in_place(&list, sb.buf, '\n', -1);
+               string_list_split_in_place(&list, sb.buf, "\n", -1);
 
                string_list_sort(&list);
 
index 46d4839194bb2322b2a8cd9b80618a855e0fb070..1fee6d90102cc6f9b90786a6324f56e7086b91bf 100755 (executable)
@@ -18,6 +18,14 @@ test_split () {
        "
 }
 
+test_split_in_place() {
+       cat >expected &&
+       test_expect_success "split (in place) $1 at $2, max $3" "
+               test-tool string-list split_in_place '$1' '$2' '$3' >actual &&
+               test_cmp expected actual
+       "
+}
+
 test_split "foo:bar:baz" ":" "-1" <<EOF
 3
 [0]: "foo"
@@ -61,6 +69,49 @@ test_split ":" ":" "-1" <<EOF
 [1]: ""
 EOF
 
+test_split_in_place "foo:;:bar:;:baz:;:" ":;" "-1" <<EOF
+10
+[0]: "foo"
+[1]: ""
+[2]: ""
+[3]: "bar"
+[4]: ""
+[5]: ""
+[6]: "baz"
+[7]: ""
+[8]: ""
+[9]: ""
+EOF
+
+test_split_in_place "foo:;:bar:;:baz" ":;" "0" <<EOF
+1
+[0]: "foo:;:bar:;:baz"
+EOF
+
+test_split_in_place "foo:;:bar:;:baz" ":;" "1" <<EOF
+2
+[0]: "foo"
+[1]: ";:bar:;:baz"
+EOF
+
+test_split_in_place "foo:;:bar:;:baz" ":;" "2" <<EOF
+3
+[0]: "foo"
+[1]: ""
+[2]: ":bar:;:baz"
+EOF
+
+test_split_in_place "foo:;:bar:;:" ":;" "-1" <<EOF
+7
+[0]: "foo"
+[1]: ""
+[2]: ""
+[3]: "bar"
+[4]: ""
+[5]: ""
+[6]: ""
+EOF
+
 test_expect_success "test filter_string_list" '
        test "x-" = "x$(test-tool string-list filter - y)" &&
        test "x-" = "x$(test-tool string-list filter no y)" &&