]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
basic/escape: allow truncation mode where "…" is always appended
authorZbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Wed, 5 May 2021 10:53:53 +0000 (12:53 +0200)
committerZbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Wed, 5 May 2021 11:59:23 +0000 (13:59 +0200)
So far we would append "…" or "..." when the string was wider than the specified
output width. But let's add a mode where the caller knows that the string being
passed is already truncated.

The condition for jumping back in utf8_escape_non_printable_full() was
off-by-one. But we only jumped to that label after doing a check with a
stronger condition, so I think it didn't matter. Now it matters because we'd
output the forced ellipsis one column too early.

src/basic/escape.c
src/basic/escape.h
src/basic/utf8.c
src/basic/utf8.h
src/test/test-escape.c
src/test/test-utf8.c

index f579f15d87e20aa0d3d40558612ace3c4a285926..2a3a0e31a1ec24291a85bfd8ada22b28fa51af89 100644 (file)
@@ -368,7 +368,8 @@ char* xescape_full(const char *s, const char *bad, size_t console_width, XEscape
          * reversed with cunescape(). If XESCAPE_8_BIT is specified, characters >= 127 are let through
          * unchanged. This corresponds to non-ASCII printable characters in pre-unicode encodings.
          *
-         * If console_width is reached, output is truncated and "..." is appended. */
+         * If console_width is reached, or XESCAPE_FORCE_ELLIPSIS is set, output is truncated and "..." is
+         * appended. */
 
         if (console_width == 0)
                 return strdup("");
@@ -380,10 +381,15 @@ char* xescape_full(const char *s, const char *bad, size_t console_width, XEscape
         memset(ans, '_', MIN(strlen(s), console_width) * 4);
         ans[MIN(strlen(s), console_width) * 4] = 0;
 
+        bool force_ellipsis = FLAGS_SET(flags, XESCAPE_FORCE_ELLIPSIS);
+
         for (f = s, t = prev = prev2 = ans; ; f++) {
                 char *tmp_t = t;
 
                 if (!*f) {
+                        if (force_ellipsis)
+                                break;
+
                         *t = 0;
                         return ans;
                 }
@@ -391,7 +397,7 @@ char* xescape_full(const char *s, const char *bad, size_t console_width, XEscape
                 if ((unsigned char) *f < ' ' ||
                     (!FLAGS_SET(flags, XESCAPE_8_BIT) && (unsigned char) *f >= 127) ||
                     *f == '\\' || strchr(bad, *f)) {
-                        if ((size_t) (t - ans) + 4 > console_width)
+                        if ((size_t) (t - ans) + 4 + 3 * force_ellipsis > console_width)
                                 break;
 
                         *(t++) = '\\';
@@ -399,7 +405,7 @@ char* xescape_full(const char *s, const char *bad, size_t console_width, XEscape
                         *(t++) = hexchar(*f >> 4);
                         *(t++) = hexchar(*f);
                 } else {
-                        if ((size_t) (t - ans) + 1 > console_width)
+                        if ((size_t) (t - ans) + 1 + 3 * force_ellipsis > console_width)
                                 break;
 
                         *(t++) = *f;
@@ -432,7 +438,9 @@ char* escape_non_printable_full(const char *str, size_t console_width, XEscapeFl
         if (FLAGS_SET(flags, XESCAPE_8_BIT))
                 return xescape_full(str, "", console_width, flags);
         else
-                return utf8_escape_non_printable_full(str, console_width);
+                return utf8_escape_non_printable_full(str,
+                                                      console_width,
+                                                      FLAGS_SET(flags, XESCAPE_FORCE_ELLIPSIS));
 }
 
 char* octescape(const char *s, size_t len) {
index 945e7dc82c81b399290aeb6d3c75419c8d0b6843..907b572bd4ad8d7a21b2cf8a1545b4f13bf7465e 100644 (file)
@@ -56,6 +56,7 @@ int cunescape_one(const char *p, size_t length, char32_t *ret, bool *eight_bit,
 
 typedef enum XEscapeFlags {
         XESCAPE_8_BIT          = 1 << 0,
+        XESCAPE_FORCE_ELLIPSIS = 1 << 1,
 } XEscapeFlags;
 
 char* xescape_full(const char *s, const char *bad, size_t console_width, XEscapeFlags flags);
index 244b8ade932de0b8e65b79c39ffc885256810d48..63fc9f71d1e4601d53f4937d1fb55286dfd0d2e6 100644 (file)
@@ -212,7 +212,7 @@ static int utf8_char_console_width(const char *str) {
         return unichar_iswide(c) ? 2 : 1;
 }
 
-char *utf8_escape_non_printable_full(const char *str, size_t console_width) {
+char *utf8_escape_non_printable_full(const char *str, size_t console_width, bool force_ellipsis) {
         char *p, *s, *prev_s;
         size_t n = 0; /* estimated print width */
 
@@ -229,8 +229,12 @@ char *utf8_escape_non_printable_full(const char *str, size_t console_width) {
                 int len;
                 char *saved_s = s;
 
-                if (!*str) /* done! */
-                        goto finish;
+                if (!*str) { /* done! */
+                        if (force_ellipsis)
+                                goto truncation;
+                        else
+                                goto finish;
+                }
 
                 len = utf8_encoded_valid_unichar(str, SIZE_MAX);
                 if (len > 0) {
@@ -274,7 +278,7 @@ char *utf8_escape_non_printable_full(const char *str, size_t console_width) {
 
  truncation:
         /* Try to go back one if we don't have enough space for the ellipsis */
-        if (n + 1 >= console_width)
+        if (n + 1 > console_width)
                 s = prev_s;
 
         s = mempcpy(s, "…", strlen("…"));
index 219ca89184b6046010606e5cbf8df51f1790a8c1..b0e969f655dbfec865e007c81fa231a0e45bb0c2 100644 (file)
@@ -25,9 +25,9 @@ bool utf8_is_printable_newline(const char* str, size_t length, bool allow_newlin
 #define utf8_is_printable(str, length) utf8_is_printable_newline(str, length, true)
 
 char *utf8_escape_invalid(const char *s);
-char *utf8_escape_non_printable_full(const char *str, size_t console_width);
+char *utf8_escape_non_printable_full(const char *str, size_t console_width, bool force_ellipsis);
 static inline char *utf8_escape_non_printable(const char *str) {
-        return utf8_escape_non_printable_full(str, SIZE_MAX);
+        return utf8_escape_non_printable_full(str, SIZE_MAX, false);
 }
 
 size_t utf8_encode_unichar(char *out_utf8, char32_t g);
index 63f9306fb4dbcbfad2355e9ab7a4a2a126d90e4f..991b135a332e00a35125cb3ccf90ebfdc3cc5e10 100644 (file)
@@ -27,11 +27,11 @@ static void test_xescape_full(bool eight_bits) {
         XEscapeFlags flags = eight_bits * XESCAPE_8_BIT;
 
         for (unsigned i = 0; i < 60; i++) {
-                _cleanup_free_ char *t;
+                _cleanup_free_ char *t, *q;
 
                 assert_se(t = xescape_full("abc\\\"\b\f\n\r\t\v\a\003\177\234\313", "b", i, flags));
 
-                log_info("%02d: %s", i, t);
+                log_info("%02d: <%s>", i, t);
 
                 if (i >= full_fit)
                         assert_se(streq(t, escaped));
@@ -45,6 +45,15 @@ static void test_xescape_full(bool eight_bits) {
                         assert_se(strlen(t) == i);
                         assert_se(strneq(t, "...", i));
                 }
+
+                assert_se(q = xescape_full("abc\\\"\b\f\n\r\t\v\a\003\177\234\313", "b", i,
+                                           flags | XESCAPE_FORCE_ELLIPSIS));
+
+                log_info("%02d: <%s>", i, q);
+                if (i > 0)
+                        assert_se(endswith(q, "."));
+                assert(strlen(q) <= i);
+                assert(strlen(q) + 3 >= strlen(t));
         }
 }
 
index 042b94634b01372007be7eb56967e9a0fcfdb906..cdbdfcb054f9ae3798e53e09618586b18e269b1e 100644 (file)
@@ -136,32 +136,29 @@ static void test_utf8_escape_non_printable(void) {
 static void test_utf8_escape_non_printable_full(void) {
         log_info("/* %s */", __func__);
 
-        for (size_t i = 0; i < 20; i++) {
-                _cleanup_free_ char *p;
-
-                p = utf8_escape_non_printable_full("goo goo goo", i);
-                puts(p);
-                assert_se(utf8_is_valid(p));
-                assert_se(utf8_console_width(p) <= i);
-        }
-
-        for (size_t i = 0; i < 20; i++) {
-                _cleanup_free_ char *p;
-
-                p = utf8_escape_non_printable_full("\001 \019\20\a", i);
-                puts(p);
-                assert_se(utf8_is_valid(p));
-                assert_se(utf8_console_width(p) <= i);
-        }
-
-        for (size_t i = 0; i < 20; i++) {
-                _cleanup_free_ char *p;
-
-                p = utf8_escape_non_printable_full("\xef\xbf\x30\x13", i);
-                puts(p);
-                assert_se(utf8_is_valid(p));
-                assert_se(utf8_console_width(p) <= i);
-        }
+        const char *s;
+        FOREACH_STRING(s,
+                       "goo goo goo",       /* ASCII */
+                       "\001 \019\20\a",    /* control characters */
+                       "\xef\xbf\x30\x13")  /* misplaced continuation bytes followed by a digit and cc */
+                for (size_t cw = 0; cw < 22; cw++) {
+                        _cleanup_free_ char *p, *q;
+                        size_t ew;
+
+                        p = utf8_escape_non_printable_full(s, cw, false);
+                        ew = utf8_console_width(p);
+                        log_debug("%02zu \"%s\" (%zu wasted)", cw, p, cw - ew);
+                        assert_se(utf8_is_valid(p));
+                        assert_se(ew <= cw);
+
+                        q = utf8_escape_non_printable_full(s, cw, true);
+                        ew = utf8_console_width(q);
+                        log_debug("   \"%s\" (%zu wasted)", q, cw - ew);
+                        assert_se(utf8_is_valid(q));
+                        assert_se(ew <= cw);
+                        if (cw > 0)
+                                assert_se(endswith(q, "…"));
+                }
 }
 
 static void test_utf16_to_utf8(void) {