]> git.ipfire.org Git - thirdparty/util-linux.git/commitdiff
lib/mbsalign: add mbs_invalid_encode()
authorKarel Zak <kzak@redhat.com>
Wed, 22 Nov 2017 13:41:10 +0000 (14:41 +0100)
committerKarel Zak <kzak@redhat.com>
Thu, 14 Dec 2017 14:39:40 +0000 (15:39 +0100)
Like mbs_safe_encode(), but it does not care about control chars.

Signed-off-by: Karel Zak <kzak@redhat.com>
include/mbsalign.h
lib/mbsalign.c

index 6bdb50d3f5b74a4654ff558ba1b27eb9849317e9..0c28e6f69dd4ab22b63a402f33171e6127ff39ad 100644 (file)
@@ -57,4 +57,7 @@ extern char *mbs_safe_encode(const char *s, size_t *width);
 extern char *mbs_safe_encode_to_buffer(const char *s, size_t *width, char *buf, const char *safechars);
 extern size_t mbs_safe_encode_size(size_t bytes);
 
+extern char *mbs_invalid_encode(const char *s, size_t *width);
+extern char *mbs_invalid_encode_to_buffer(const char *s, size_t *width, char *buf);
+
 #endif /* UTIL_LINUX_MBSALIGN_H */
index b0e1004a087c8af09b3095acad74404c45efeb5f..78ab12a0da91dec44ea8e0487661f2451ff78849 100644 (file)
@@ -194,6 +194,67 @@ char *mbs_safe_encode_to_buffer(const char *s, size_t *width, char *buf, const c
        return buf;
 }
 
+/*
+ * Copy @s to @buf and replace broken sequences to \x?? hex sequence. The
+ * @width returns number of cells. The @safechars are not encoded.
+ *
+ * The @buf has to be big enough to store mbs_safe_encode_size(strlen(s)))
+ * bytes.
+ */
+char *mbs_invalid_encode_to_buffer(const char *s, size_t *width, char *buf)
+{
+       const char *p = s;
+       char *r;
+       size_t sz = s ? strlen(s) : 0;
+
+#ifdef HAVE_WIDECHAR
+       mbstate_t st;
+       memset(&st, 0, sizeof(st));
+#endif
+       if (!sz || !buf)
+               return NULL;
+
+       r = buf;
+       *width = 0;
+
+       while (p && *p) {
+#ifdef HAVE_WIDECHAR
+               wchar_t wc;
+               size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st);
+
+               if (len == 0)
+                       break;          /* end of string */
+
+               if (len == (size_t) -1 || len == (size_t) -2) {
+                       len = 1;
+                       /*
+                        * Not valid multibyte sequence -- maybe it's
+                        * printable char according to the current locales.
+                        */
+                       if (!isprint((unsigned char) *p)) {
+                               sprintf(r, "\\x%02x", (unsigned char) *p);
+                               r += 4;
+                               *width += 4;
+                       } else {
+                               (*width)++;
+                               *r++ = *p;
+                       }
+               } else {
+                       memcpy(r, p, len);
+                       r += len;
+                       *width += wcwidth(wc);
+               }
+               p += len;
+#else
+               *r++ = *p++;
+               (*width)++;
+#endif
+       }
+
+       *r = '\0';
+       return buf;
+}
+
 size_t mbs_safe_encode_size(size_t bytes)
 {
        return (bytes * 4) + 1;
@@ -218,6 +279,25 @@ char *mbs_safe_encode(const char *s, size_t *width)
        return ret;
 }
 
+/*
+ * Returns allocated string where all broken widechars chars are
+ * replaced with \x?? hex sequence.
+ */
+char *mbs_invalid_encode(const char *s, size_t *width)
+{
+       size_t sz = s ? strlen(s) : 0;
+       char *buf, *ret = NULL;
+
+       if (!sz)
+               return NULL;
+       buf = malloc(mbs_safe_encode_size(sz));
+       if (buf)
+               ret = mbs_invalid_encode_to_buffer(s, width, buf);
+       if (!ret)
+               free(buf);
+       return ret;
+}
+
 #ifdef HAVE_WIDECHAR
 
 static bool