So far we would append "…" or "..." when the string was wider than the specified
output width. But let's add a mode where the caller knows that the string being
passed is already truncated.
The condition for jumping back in utf8_escape_non_printable_full() was
off-by-one. But we only jumped to that label after doing a check with a
stronger condition, so I think it didn't matter. Now it matters because we'd
output the forced ellipsis one column too early.
* reversed with cunescape(). If XESCAPE_8_BIT is specified, characters >= 127 are let through
* unchanged. This corresponds to non-ASCII printable characters in pre-unicode encodings.
*
- * If console_width is reached, output is truncated and "..." is appended. */
+ * If console_width is reached, or XESCAPE_FORCE_ELLIPSIS is set, output is truncated and "..." is
+ * appended. */
if (console_width == 0)
return strdup("");
memset(ans, '_', MIN(strlen(s), console_width) * 4);
ans[MIN(strlen(s), console_width) * 4] = 0;
+ bool force_ellipsis = FLAGS_SET(flags, XESCAPE_FORCE_ELLIPSIS);
+
for (f = s, t = prev = prev2 = ans; ; f++) {
char *tmp_t = t;
if (!*f) {
+ if (force_ellipsis)
+ break;
+
*t = 0;
return ans;
}
if ((unsigned char) *f < ' ' ||
(!FLAGS_SET(flags, XESCAPE_8_BIT) && (unsigned char) *f >= 127) ||
*f == '\\' || strchr(bad, *f)) {
- if ((size_t) (t - ans) + 4 > console_width)
+ if ((size_t) (t - ans) + 4 + 3 * force_ellipsis > console_width)
break;
*(t++) = '\\';
*(t++) = hexchar(*f >> 4);
*(t++) = hexchar(*f);
} else {
- if ((size_t) (t - ans) + 1 > console_width)
+ if ((size_t) (t - ans) + 1 + 3 * force_ellipsis > console_width)
break;
*(t++) = *f;
if (FLAGS_SET(flags, XESCAPE_8_BIT))
return xescape_full(str, "", console_width, flags);
else
- return utf8_escape_non_printable_full(str, console_width);
+ return utf8_escape_non_printable_full(str,
+ console_width,
+ FLAGS_SET(flags, XESCAPE_FORCE_ELLIPSIS));
}
char* octescape(const char *s, size_t len) {
typedef enum XEscapeFlags {
XESCAPE_8_BIT = 1 << 0,
+ XESCAPE_FORCE_ELLIPSIS = 1 << 1,
} XEscapeFlags;
char* xescape_full(const char *s, const char *bad, size_t console_width, XEscapeFlags flags);
return unichar_iswide(c) ? 2 : 1;
}
-char *utf8_escape_non_printable_full(const char *str, size_t console_width) {
+char *utf8_escape_non_printable_full(const char *str, size_t console_width, bool force_ellipsis) {
char *p, *s, *prev_s;
size_t n = 0; /* estimated print width */
int len;
char *saved_s = s;
- if (!*str) /* done! */
- goto finish;
+ if (!*str) { /* done! */
+ if (force_ellipsis)
+ goto truncation;
+ else
+ goto finish;
+ }
len = utf8_encoded_valid_unichar(str, SIZE_MAX);
if (len > 0) {
truncation:
/* Try to go back one if we don't have enough space for the ellipsis */
- if (n + 1 >= console_width)
+ if (n + 1 > console_width)
s = prev_s;
s = mempcpy(s, "…", strlen("…"));
#define utf8_is_printable(str, length) utf8_is_printable_newline(str, length, true)
char *utf8_escape_invalid(const char *s);
-char *utf8_escape_non_printable_full(const char *str, size_t console_width);
+char *utf8_escape_non_printable_full(const char *str, size_t console_width, bool force_ellipsis);
static inline char *utf8_escape_non_printable(const char *str) {
- return utf8_escape_non_printable_full(str, SIZE_MAX);
+ return utf8_escape_non_printable_full(str, SIZE_MAX, false);
}
size_t utf8_encode_unichar(char *out_utf8, char32_t g);
XEscapeFlags flags = eight_bits * XESCAPE_8_BIT;
for (unsigned i = 0; i < 60; i++) {
- _cleanup_free_ char *t;
+ _cleanup_free_ char *t, *q;
assert_se(t = xescape_full("abc\\\"\b\f\n\r\t\v\a\003\177\234\313", "b", i, flags));
- log_info("%02d: %s", i, t);
+ log_info("%02d: <%s>", i, t);
if (i >= full_fit)
assert_se(streq(t, escaped));
assert_se(strlen(t) == i);
assert_se(strneq(t, "...", i));
}
+
+ assert_se(q = xescape_full("abc\\\"\b\f\n\r\t\v\a\003\177\234\313", "b", i,
+ flags | XESCAPE_FORCE_ELLIPSIS));
+
+ log_info("%02d: <%s>", i, q);
+ if (i > 0)
+ assert_se(endswith(q, "."));
+ assert(strlen(q) <= i);
+ assert(strlen(q) + 3 >= strlen(t));
}
}
static void test_utf8_escape_non_printable_full(void) {
log_info("/* %s */", __func__);
- for (size_t i = 0; i < 20; i++) {
- _cleanup_free_ char *p;
-
- p = utf8_escape_non_printable_full("goo goo goo", i);
- puts(p);
- assert_se(utf8_is_valid(p));
- assert_se(utf8_console_width(p) <= i);
- }
-
- for (size_t i = 0; i < 20; i++) {
- _cleanup_free_ char *p;
-
- p = utf8_escape_non_printable_full("\001 \019\20\a", i);
- puts(p);
- assert_se(utf8_is_valid(p));
- assert_se(utf8_console_width(p) <= i);
- }
-
- for (size_t i = 0; i < 20; i++) {
- _cleanup_free_ char *p;
-
- p = utf8_escape_non_printable_full("\xef\xbf\x30\x13", i);
- puts(p);
- assert_se(utf8_is_valid(p));
- assert_se(utf8_console_width(p) <= i);
- }
+ const char *s;
+ FOREACH_STRING(s,
+ "goo goo goo", /* ASCII */
+ "\001 \019\20\a", /* control characters */
+ "\xef\xbf\x30\x13") /* misplaced continuation bytes followed by a digit and cc */
+ for (size_t cw = 0; cw < 22; cw++) {
+ _cleanup_free_ char *p, *q;
+ size_t ew;
+
+ p = utf8_escape_non_printable_full(s, cw, false);
+ ew = utf8_console_width(p);
+ log_debug("%02zu \"%s\" (%zu wasted)", cw, p, cw - ew);
+ assert_se(utf8_is_valid(p));
+ assert_se(ew <= cw);
+
+ q = utf8_escape_non_printable_full(s, cw, true);
+ ew = utf8_console_width(q);
+ log_debug(" \"%s\" (%zu wasted)", q, cw - ew);
+ assert_se(utf8_is_valid(q));
+ assert_se(ew <= cw);
+ if (cw > 0)
+ assert_se(endswith(q, "…"));
+ }
}
static void test_utf16_to_utf8(void) {