]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
strv: add new helper strv_rebreak_lines() with a simple line breaking algorithm
authorLennart Poettering <lennart@poettering.net>
Fri, 26 Apr 2024 15:40:32 +0000 (17:40 +0200)
committerLennart Poettering <lennart@poettering.net>
Mon, 17 Jun 2024 07:20:21 +0000 (09:20 +0200)
src/basic/strv.c
src/basic/strv.h
src/test/test-strv.c

index d081821a8625a378c8445a73e2dd6419c28b11e7..4709dfaf2464d26f98f05f4013c4b0dc92d9c54c 100644 (file)
 #include "escape.h"
 #include "extract-word.h"
 #include "fileio.h"
+#include "gunicode.h"
 #include "memory-util.h"
 #include "nulstr-util.h"
 #include "sort-util.h"
 #include "string-util.h"
 #include "strv.h"
+#include "utf8.h"
 
 char* strv_find(char * const *l, const char *name) {
         assert(name);
@@ -967,3 +969,91 @@ int _string_strv_ordered_hashmap_put(OrderedHashmap **h, const char *key, const
 }
 
 DEFINE_HASH_OPS_FULL(string_strv_hash_ops, char, string_hash_func, string_compare_func, free, char*, strv_free);
+
+int strv_rebreak_lines(char **l, size_t width, char ***ret) {
+        _cleanup_strv_free_ char **broken = NULL;
+        int r;
+
+        assert(ret);
+
+        /* Implements a simple UTF-8 line breaking algorithm
+         *
+         * Goes through all entries in *l, and line-breaks each line that is longer than the specified
+         * character width. Breaks at the end of words/beginning of whitespace. Lines that do not contain whitespace are not
+         * broken. Retains whitespace at beginning of lines, removes it at end of lines. */
+
+        if (width == SIZE_MAX) { /* NOP? */
+                broken = strv_copy(l);
+                if (!broken)
+                        return -ENOMEM;
+
+                *ret = TAKE_PTR(broken);
+                return 0;
+        }
+
+        STRV_FOREACH(i, l) {
+                const char *start = *i, *whitespace_begin = NULL, *whitespace_end = NULL;
+                bool in_prefix = true; /* still in the whitespace in the beginning of the line? */
+                size_t w = 0;
+
+                for (const char *p = start; *p != 0; p = utf8_next_char(p)) {
+                        if (strchr(NEWLINE, *p)) {
+                                in_prefix = true;
+                                whitespace_begin = whitespace_end = NULL;
+                                w = 0;
+                        } else if (strchr(WHITESPACE, *p)) {
+                                if (!in_prefix && (!whitespace_begin || whitespace_end)) {
+                                        whitespace_begin = p;
+                                        whitespace_end = NULL;
+                                }
+                        } else {
+                                if (whitespace_begin && !whitespace_end)
+                                        whitespace_end = p;
+
+                                in_prefix = false;
+                        }
+
+                        int cw = utf8_char_console_width(p);
+                        if (cw < 0) {
+                                log_debug_errno(cw, "Comment to line break contains invalid UTF-8, ignoring.");
+                                cw = 1;
+                        }
+
+                        w += cw;
+
+                        if (w > width && whitespace_begin && whitespace_end) {
+                                _cleanup_free_ char *truncated = NULL;
+
+                                truncated = strndup(start, whitespace_begin - start);
+                                if (!truncated)
+                                        return -ENOMEM;
+
+                                r = strv_consume(&broken, TAKE_PTR(truncated));
+                                if (r < 0)
+                                        return r;
+
+                                p = start = whitespace_end;
+                                whitespace_begin = whitespace_end = NULL;
+                                w = cw;
+                        }
+                }
+
+                if (start) { /* Process rest of the line */
+                        if (in_prefix) /* Never seen anything non-whitespace? Generate empty line! */
+                                r = strv_extend(&broken, "");
+                        else if (whitespace_begin && !whitespace_end) { /* Ends in whitespace? Chop it off! */
+                                _cleanup_free_ char *truncated = strndup(start, whitespace_begin - start);
+                                if (!truncated)
+                                        return -ENOMEM;
+
+                                r = strv_consume(&broken, TAKE_PTR(truncated));
+                        } else /* Otherwise use line as is */
+                                r = strv_extend(&broken, start);
+                        if (r < 0)
+                                return r;
+                }
+        }
+
+        *ret = TAKE_PTR(broken);
+        return 0;
+}
index 169737d1d8c18f72cdb3991cf7994b278168796b..c828bd612f521390ceaa14c5074d725e54ff33c0 100644 (file)
@@ -257,3 +257,5 @@ int _string_strv_hashmap_put(Hashmap **h, const char *key, const char *value  HA
 int _string_strv_ordered_hashmap_put(OrderedHashmap **h, const char *key, const char *value  HASHMAP_DEBUG_PARAMS);
 #define string_strv_hashmap_put(h, k, v) _string_strv_hashmap_put(h, k, v  HASHMAP_DEBUG_SRC_ARGS)
 #define string_strv_ordered_hashmap_put(h, k, v) _string_strv_ordered_hashmap_put(h, k, v  HASHMAP_DEBUG_SRC_ARGS)
+
+int strv_rebreak_lines(char **l, size_t width, char ***ret);
index 28b8b2270c099bfafda3b77203fe9f2fc0f566a9..65afefed3be2559fb72c7700d39558dc58a7b819 100644 (file)
@@ -1055,4 +1055,62 @@ TEST(strv_extend_many) {
         assert_se(strv_equal(l, STRV_MAKE("foo", "bar", "waldo", "quux", "1", "2", "3", "4", "yes", "no")));
 }
 
+TEST(strv_rebreak_lines) {
+        _cleanup_strv_free_ char **l = NULL;
+
+        assert_se(strv_rebreak_lines(NULL, SIZE_MAX, &l) >= 0);
+        assert_se(strv_equal(l, NULL));
+        l = strv_free(l);
+
+        assert_se(strv_rebreak_lines(STRV_MAKE(""), SIZE_MAX, &l) >= 0);
+        assert_se(strv_equal(l, STRV_MAKE("")));
+        l = strv_free(l);
+
+        assert_se(strv_rebreak_lines(STRV_MAKE("", ""), SIZE_MAX, &l) >= 0);
+        assert_se(strv_equal(l, STRV_MAKE("", "")));
+        l = strv_free(l);
+
+        assert_se(strv_rebreak_lines(STRV_MAKE("foo"), SIZE_MAX, &l) >= 0);
+        assert_se(strv_equal(l, STRV_MAKE("foo")));
+        l = strv_free(l);
+
+        assert_se(strv_rebreak_lines(STRV_MAKE("foo", "bar"), SIZE_MAX, &l) >= 0);
+        assert_se(strv_equal(l, STRV_MAKE("foo", "bar")));
+        l = strv_free(l);
+
+        assert_se(strv_rebreak_lines(STRV_MAKE("Foo fOo foO FOo", "bar Bar bAr baR BAr"), 10, &l) >= 0);
+        assert_se(strv_equal(l, STRV_MAKE("Foo fOo", "foO FOo", "bar Bar", "bAr baR", "BAr")));
+        l = strv_free(l);
+
+        assert_se(strv_rebreak_lines(STRV_MAKE("           foo               ",
+                                               "             foo bar               waldo quux         "),
+                                     10, &l) >= 0);
+        assert_se(strv_equal(l, STRV_MAKE("           foo",
+                                          "             foo",
+                                          "bar",
+                                          "waldo quux")));
+        l = strv_free(l);
+
+        assert_se(strv_rebreak_lines(STRV_MAKE("            ",
+                                               "\tfoo bar\t",
+                                               "FOO\tBAR"),
+                                     10, &l) >= 0);
+        assert_se(strv_equal(l, STRV_MAKE("",
+                                          "\tfoo",
+                                          "bar",
+                                          "FOO",
+                                          "BAR")));
+        l = strv_free(l);
+
+        /* Now make sure that breaking the lines a 2nd time does not modify the output anymore */
+        for (size_t i = 1; i < 100; i++) {
+                _cleanup_strv_free_ char **a = NULL, **b = NULL;
+
+                assert_se(strv_rebreak_lines(STRV_MAKE("foobar waldo waldo quux piep\tschnurz    pimm"), i, &a) >= 0);
+                assert_se(strv_rebreak_lines(a, i, &b) >= 0);
+
+                assert_se(strv_equal(a, b));
+        }
+}
+
 DEFINE_TEST_MAIN(LOG_INFO);