test-ellipsize: add tests for ellipsize_mem, fix bugs

author Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>

Sat, 2 Jun 2018 15:08:46 +0000 (17:08 +0200)

committer Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>

Sat, 2 Jun 2018 19:53:25 +0000 (21:53 +0200)
author Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Sat, 2 Jun 2018 15:08:46 +0000 (17:08 +0200)
committer Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Sat, 2 Jun 2018 19:53:25 +0000 (21:53 +0200)
diff --git a/src/basic/string-util.c b/src/basic/string-util.c

index 882683c767526b7ad5127754dbccad57d539513b..a9362bf8bdd7db4761cde133ee61ce5f15b58e29 100644 (file)
--- a/src/basic/string-util.c
+++ b/src/basic/string-util.c
@@ -469,8 +469,8 @@ static int write_ellipsis(char *buf, bool unicode) {
  }
  
  static char *ascii_ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) {
-        size_t x, need_space;
-        char *r;
+        size_t x, need_space, suffix_len;
+        char *t;
  
          assert(s);
          assert(percent <= 100);
@@ -506,8 +506,8 @@ static char *ascii_ellipsize_mem(const char *s, size_t old_length, size_t new_le
           * either for the UTF-8 encoded character or for three ASCII characters. */
          need_space = is_locale_utf8() ? 1 : 3;
  
-        r = new(char, new_length+3);
-        if (!r)
+        t = new(char, new_length+3);
+        if (!t)
                  return NULL;
  
          assert(new_length >= need_space);
@@ -515,13 +515,13 @@ static char *ascii_ellipsize_mem(const char *s, size_t old_length, size_t new_le
          x = ((new_length - need_space) * percent + 50) / 100;
          assert(x <= new_length - need_space);
  
-        memcpy(r, s, x);
-        write_ellipsis(r + x, false);
-        memcpy(r + x + 3,
-               s + old_length - (new_length - x - need_space),
-               new_length - x - need_space + 1);
+        memcpy(t, s, x);
+        write_ellipsis(t + x, false);
+        suffix_len = new_length - x - need_space;
+        memcpy(t + x + 3, s + old_length - suffix_len, suffix_len);
+        *(t + x + 3 + suffix_len) = '\0';
  
-        return r;
+        return t;
  }
  
  char *ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) {
@@ -559,35 +559,49 @@ char *ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigne
          assert(x <= new_length - 1);
  
          k = 0;
-        for (i = s; k < x && i < s + old_length; i = utf8_next_char(i)) {
+        for (i = s; i < s + old_length; i = utf8_next_char(i)) {
                  char32_t c;
+                int w;
  
                  r = utf8_encoded_to_unichar(i, &c);
                  if (r < 0)
                          return NULL;
-                k += unichar_iswide(c) ? 2 : 1;
-        }
  
-        if (k > x) /* last character was wide and went over quota */
-                x++;
+                w = unichar_iswide(c) ? 2 : 1;
+                if (k + w <= x)
+                        k += w;
+                else
+                        break;
+        }
  
-        for (j = s + old_length; k < new_length && j > i; ) {
+        for (j = s + old_length; j > i; ) {
                  char32_t c;
+                int w;
+                const char *jj;
  
-                j = utf8_prev_char(j);
-                r = utf8_encoded_to_unichar(j, &c);
+                jj = utf8_prev_char(j);
+                r = utf8_encoded_to_unichar(jj, &c);
                  if (r < 0)
                          return NULL;
-                k += unichar_iswide(c) ? 2 : 1;
+
+                w = unichar_iswide(c) ? 2 : 1;
+                if (k + w <= new_length) {
+                        k += w;
+                        j = jj;
+                } else
+                        break;
          }
          assert(i <= j);
  
          /* we don't actually need to ellipsize */
          if (i == j)
-                return memdup(s, old_length + 1);
+                return memdup_suffix0(s, old_length);
  
-        /* make space for ellipsis */
-        j = utf8_next_char(j);
+        /* make space for ellipsis, if possible */
+        if (j < s + old_length)
+                j = utf8_next_char(j);
+        else if (i > s)
+                i = utf8_prev_char(i);
  
          len = i - s;
          len2 = s + old_length - j;
@@ -602,7 +616,8 @@ char *ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigne
  
          memcpy(e, s, len);
          write_ellipsis(e + len, true);
-        memcpy(e + len + 3, j, len2 + 1);
+        memcpy(e + len + 3, j, len2);
+        *(e + len + 3 + len2) = '\0';
  
          return e;
  }
diff --git a/src/test/test-ellipsize.c b/src/test/test-ellipsize.c

index 902bc3342f12a1e9dde67ab65233169092268207..7d6b2b5449cdba53662716551d2b80393efe3425 100644 (file)
--- a/src/test/test-ellipsize.c
+++ b/src/test/test-ellipsize.c
@@ -10,10 +10,80 @@
  #include "alloc-util.h"
  #include "def.h"
  #include "string-util.h"
+#include "strv.h"
  #include "terminal-util.h"
  #include "util.h"
+#include "utf8.h"
  
-static void test_one(const char *p) {
+static void test_ellipsize_mem_one(const char *s, size_t old_length, size_t new_length) {
+        _cleanup_free_ char *n = NULL;
+        _cleanup_free_ char *t1 = NULL, *t2 = NULL, *t3 = NULL;
+        char buf[LINE_MAX];
+        bool has_wide_chars;
+        size_t max_width;
+
+        n = memdup_suffix0(s, old_length);
+
+        if (!utf8_is_valid(n))
+                /* We don't support invalid sequences… */
+                return;
+
+        /* Report out inputs. We duplicate the data so that cellescape
+         * can properly report truncated multibyte sequences. */
+        log_info("%s \"%s\" old_length=%zu/%zu new_length=%zu", __func__,
+                 cellescape(buf, sizeof buf, n),
+                 old_length, utf8_console_width(n),
+                 new_length);
+
+        /* To keep this test simple, any case with wide chars starts with this glyph */
+        has_wide_chars = startswith(s, "你");
+        max_width = MIN(utf8_console_width(n), new_length);
+
+        t1 = ellipsize_mem(n, old_length, new_length, 30);
+        log_info("30%% → %s utf8_console_width=%zu", t1, utf8_console_width(t1));
+        if (!has_wide_chars)
+                assert_se(utf8_console_width(t1) == max_width);
+        else
+                assert_se(utf8_console_width(t1) <= max_width);
+
+        t2 = ellipsize_mem(n, old_length, new_length, 90);
+        log_info("90%% → %s utf8_console_width=%zu", t2, utf8_console_width(t2));
+        if (!has_wide_chars)
+                assert_se(utf8_console_width(t2) == max_width);
+        else
+                assert_se(utf8_console_width(t2) <= max_width);
+
+        t3 = ellipsize_mem(n, old_length, new_length, 100);
+        log_info("100%% → %s utf8_console_width=%zu", t3, utf8_console_width(t3));
+        if (!has_wide_chars)
+                assert_se(utf8_console_width(t3) == max_width);
+        else
+                assert_se(utf8_console_width(t3) <= max_width);
+
+        if (new_length >= old_length) {
+                assert_se(streq(t1, n));
+                assert_se(streq(t2, n));
+                assert_se(streq(t3, n));
+        }
+}
+
+static void test_ellipsize_mem(void) {
+        const char *s;
+        ssize_t l, k;
+
+        FOREACH_STRING(s,
+                       "_XXXXXXXXXXX_", /* ASCII */
+                       "_aąęółśćńżźć_", /* two-byte utf-8 */
+                       "გამარჯობა",     /* multi-byte utf-8 */
+                       "你好世界",       /* wide characters */
+                       "你გą世óoó界")    /* a mix */
+
+                for (l = strlen(s); l >= 0; l--)
+                        for (k = strlen(s) + 1; k >= 0; k--)
+                                test_ellipsize_mem_one(s, l, k);
+}
+
+static void test_ellipsize_one(const char *p) {
          _cleanup_free_ char *t;
          t = ellipsize(p, columns(), 70);
          puts(t);
@@ -43,15 +113,20 @@ static void test_one(const char *p) {
          puts(t);
  }
  
+static void test_ellipsize(void) {
+        test_ellipsize_one(DIGITS LETTERS DIGITS LETTERS);
+        test_ellipsize_one("한국어한국어한국어한국어한국어한국어한국어한국어한국어한국어한국어한국어한국어한국어한국어한국어한국어한국어");
+        test_ellipsize_one("-日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国");
+        test_ellipsize_one("中国中国中国中国中国中国中国中国中国中国中国中国中国中国中国中国中国中国中国中国中国中国-中国中国中国中国中国中国中国中国中国中国中国中国中国");
+        test_ellipsize_one("sÿstëmd sÿstëmd sÿstëmd sÿstëmd sÿstëmd sÿstëmd sÿstëmd sÿstëmd sÿstëmd sÿstëmd sÿstëmd sÿstëmd sÿstëmd");
+        test_ellipsize_one("🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮");
+        test_ellipsize_one("Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.");
+        test_ellipsize_one("shórt");
+}
+
  int main(int argc, char *argv[]) {
-        test_one(DIGITS LETTERS DIGITS LETTERS);
-        test_one("한국어한국어한국어한국어한국어한국어한국어한국어한국어한국어한국어한국어한국어한국어한국어한국어한국어한국어");
-        test_one("-日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国日本国");
-        test_one("中国中国中国中国中国中国中国中国中国中国中国中国中国中国中国中国中国中国中国中国中国中国-中国中国中国中国中国中国中国中国中国中国中国中国中国");
-        test_one("sÿstëmd sÿstëmd sÿstëmd sÿstëmd sÿstëmd sÿstëmd sÿstëmd sÿstëmd sÿstëmd sÿstëmd sÿstëmd sÿstëmd sÿstëmd");
-        test_one("🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮🐮");
-        test_one("Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.");
-        test_one("shórt");
+        test_ellipsize_mem();
+        test_ellipsize();
  
          return 0;
  }
diff --git a/src/test/test-string-util.c b/src/test/test-string-util.c

index 8b176781de900270dd38c8491f9499eef6a33b59..d6eca393ee0348c9625e6e7cbde89b04b7c2a3eb 100644 (file)
--- a/src/test/test-string-util.c
+++ b/src/test/test-string-util.c
@@ -10,6 +10,7 @@
  #include "macro.h"
  #include "string-util.h"
  #include "strv.h"
+#include "utf8.h"
  
  static void test_string_erase(void) {
          char *x;
diff --git a/test/fuzz-regressions/fuzz-journal-remote/oss-fuzz-8686 b/test/fuzz-regressions/fuzz-journal-remote/oss-fuzz-8686

new file mode 100644 (file)

index 0000000..7c73c8c
--- /dev/null
+++ b/test/fuzz-regressions/fuzz-journal-remote/oss-fuzz-8686
@@ -0,0 +1,6 @@
+__REALTIME_TIMESTAMP=  6
+SYSLOG_IDENTIFIER=             
+MESSAGE=                        ᅟ                                                                                                                                                                                                                                                                                
+SYSLOG_PID=            
+
+  
+\ No newline at end of file
author	Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
	Sat, 2 Jun 2018 15:08:46 +0000 (17:08 +0200)
committer	Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
	Sat, 2 Jun 2018 19:53:25 +0000 (21:53 +0200)
src/basic/string-util.c		patch \| blob \| blame \| history
src/test/test-ellipsize.c		patch \| blob \| blame \| history
src/test/test-string-util.c		patch \| blob \| blame \| history
test/fuzz-regressions/fuzz-journal-remote/oss-fuzz-8686	[new file with mode: 0644]	patch \| blob