basic/string-util: add a convenience function to cescape mostly-ascii fields

author Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>

Thu, 17 May 2018 08:55:21 +0000 (10:55 +0200)

committer Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>

Thu, 31 May 2018 12:27:07 +0000 (14:27 +0200)
author Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Thu, 17 May 2018 08:55:21 +0000 (10:55 +0200)
committer Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Thu, 31 May 2018 12:27:07 +0000 (14:27 +0200)
diff --git a/src/basic/string-util.c b/src/basic/string-util.c

index 07c9938a3f1e264533928535e21dba06a925014b..b06b50397ee95e2bede52eee250969bc1bfa2c5c 100644 (file)
--- a/src/basic/string-util.c
+++ b/src/basic/string-util.c
@@ -14,6 +14,7 @@
  #include <string.h>
  
  #include "alloc-util.h"
+#include "escape.h"
  #include "gunicode.h"
  #include "locale-util.h"
  #include "macro.h"
@@ -453,6 +454,20 @@ bool string_has_cc(const char *p, const char *ok) {
          return false;
  }
  
+static int write_ellipsis(char *buf, bool unicode) {
+        if (unicode || is_locale_utf8()) {
+                buf[0] = 0xe2; /* tri-dot ellipsis: … */
+                buf[1] = 0x80;
+                buf[2] = 0xa6;
+        } else {
+                buf[0] = '.';
+                buf[1] = '.';
+                buf[2] = '.';
+        }
+
+        return 3;
+}
+
  static char *ascii_ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) {
          size_t x, need_space;
          char *r;
@@ -501,17 +516,7 @@ static char *ascii_ellipsize_mem(const char *s, size_t old_length, size_t new_le
          assert(x <= new_length - need_space);
  
          memcpy(r, s, x);
-
-        if (is_locale_utf8()) {
-                r[x+0] = 0xe2; /* tri-dot ellipsis: … */
-                r[x+1] = 0x80;
-                r[x+2] = 0xa6;
-        } else {
-                r[x+0] = '.';
-                r[x+1] = '.';
-                r[x+2] = '.';
-        }
-
+        write_ellipsis(r + x, false);
          memcpy(r + x + 3,
                 s + old_length - (new_length - x - need_space),
                 new_length - x - need_space + 1);
@@ -596,23 +601,56 @@ char *ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigne
          */
  
          memcpy(e, s, len);
-        e[len + 0] = 0xe2; /* tri-dot ellipsis: … */
-        e[len + 1] = 0x80;
-        e[len + 2] = 0xa6;
-
+        write_ellipsis(e + len, true);
          memcpy(e + len + 3, j, len2 + 1);
  
          return e;
  }
  
  char *ellipsize(const char *s, size_t length, unsigned percent) {
-
          if (length == (size_t) -1)
                  return strdup(s);
  
          return ellipsize_mem(s, strlen(s), length, percent);
  }
  
+char *cellescape(char *buf, size_t len, const char *s) {
+        /* Escape and ellipsize s into buffer buf of size len. Only non-control ASCII
+         * characters are copied as they are, everything else is escaped. The result
+         * is different then if escaping and ellipsization was performed in two
+         * separate steps, because each sequence is either stored in full or skipped.
+         *
+         * This function should be used for logging about strings which expected to
+         * be plain ASCII in a safe way.
+         *
+         * An ellipsis will be used if s is too long. It was always placed at the
+         * very end.
+         */
+
+        size_t i;
+        const char *t = s;
+
+        assert(len > 4 + 4 + 1); /* two chars and the terminator */
+
+        for (i = 0; i < len - 9; t++) {
+                if (!*t)
+                        goto done;
+                i += cescape_char(*t, buf + i);
+        }
+
+        /* We have space for one more char and terminating nul at this point */
+        if (*t) {
+                if (*(t+1))
+                        i += write_ellipsis(buf + i, false);
+                else
+                        i += cescape_char(*t, buf + i);
+        }
+
+ done:
+        buf[i] = '\0';
+        return buf;
+}
+
  bool nulstr_contains(const char *nulstr, const char *needle) {
          const char *i;
  
diff --git a/src/basic/string-util.h b/src/basic/string-util.h

index aa007242663240f0f79d08e4455788bdbac8464d..3c5e2a9ef69f1c51c5e120ab15396e2d6d28346c 100644 (file)
--- a/src/basic/string-util.h
+++ b/src/basic/string-util.h
@@ -157,6 +157,7 @@ bool string_has_cc(const char *p, const char *ok) _pure_;
  
  char *ellipsize_mem(const char *s, size_t old_length_bytes, size_t new_length_columns, unsigned percent);
  char *ellipsize(const char *s, size_t length, unsigned percent);
+char *cellescape(char *buf, size_t len, const char *s);
  
  bool nulstr_contains(const char *nulstr, const char *needle);
  
diff --git a/src/test/test-string-util.c b/src/test/test-string-util.c

index eac12ac7afde48911ab1bbcf64957facc24a47e4..413adfda7de75264a95e7f7aad2d374cf4a80cd2 100644 (file)
--- a/src/test/test-string-util.c
+++ b/src/test/test-string-util.c
@@ -6,6 +6,7 @@
  ***/
  
  #include "alloc-util.h"
+#include "locale-util.h"
  #include "macro.h"
  #include "string-util.h"
  #include "strv.h"
@@ -77,6 +78,29 @@ static void test_ascii_strcasecmp_nn(void) {
          assert_se(ascii_strcasecmp_nn("BBbb", 4, "aaaa", 4) > 0);
  }
  
+static void test_cellescape(void) {
+        char buf[40];
+
+        assert_se(streq(cellescape(buf, 10, "1"), "1"));
+        assert_se(streq(cellescape(buf, 10, "12"), "12"));
+        assert_se(streq(cellescape(buf, 10, "123"), is_locale_utf8() ? "1…" : "1..."));
+
+        assert_se(streq(cellescape(buf, 10, "1\011"), "1\\t"));
+        assert_se(streq(cellescape(buf, 10, "1\020"), "1\\020"));
+        assert_se(streq(cellescape(buf, 10, "1\020x"), is_locale_utf8() ? "1…" : "1..."));
+
+        assert_se(streq(cellescape(buf, 40, "1\020"), "1\\020"));
+        assert_se(streq(cellescape(buf, 40, "1\020x"), "1\\020x"));
+
+        assert_se(streq(cellescape(buf, 40, "\a\b\f\n\r\t\v\\\"'"), "\\a\\b\\f\\n\\r\\t\\v\\\\\\\"\\'"));
+        assert_se(streq(cellescape(buf, 10, "\a\b\f\n\r\t\v\\\"'"), is_locale_utf8() ? "\\a…" : "\\a..."));
+        assert_se(streq(cellescape(buf, 11, "\a\b\f\n\r\t\v\\\"'"), is_locale_utf8() ? "\\a…" : "\\a..."));
+        assert_se(streq(cellescape(buf, 12, "\a\b\f\n\r\t\v\\\"'"), is_locale_utf8() ? "\\a\\b…" : "\\a\\b..."));
+
+        assert_se(streq(cellescape(buf, sizeof buf, "1\020"), "1\\020"));
+        assert_se(streq(cellescape(buf, sizeof buf, "1\020x"), "1\\020x"));
+}
+
  static void test_streq_ptr(void) {
          assert_se(streq_ptr(NULL, NULL));
          assert_se(!streq_ptr("abc", "cdef"));
@@ -422,6 +446,7 @@ int main(int argc, char *argv[]) {
          test_string_erase();
          test_ascii_strcasecmp_n();
          test_ascii_strcasecmp_nn();
+        test_cellescape();
          test_streq_ptr();
          test_strstrip();
          test_strextend();
author	Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
	Thu, 17 May 2018 08:55:21 +0000 (10:55 +0200)
committer	Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
	Thu, 31 May 2018 12:27:07 +0000 (14:27 +0200)
src/basic/string-util.c		patch \| blob \| blame \| history
src/basic/string-util.h		patch \| blob \| blame \| history
src/test/test-string-util.c		patch \| blob \| blame \| history