]> git.ipfire.org Git - thirdparty/util-linux.git/commitdiff
col: make input to tolerate invalid wide characters
authorSami Kerola <kerolasa@iki.fi>
Thu, 26 Nov 2020 22:01:39 +0000 (22:01 +0000)
committerSami Kerola <kerolasa@iki.fi>
Wed, 2 Dec 2020 22:12:32 +0000 (22:12 +0000)
The getwchar(3) will choke and exit if invalid character is encountered.
This change will make col(1) to print broken multibyte characters as
\x{hex} string.

Reported-by: Vitaly Lipatov <lav@etersoft.ru>
Addresses: https://github.com/karelzak/util-linux/issues/1198
Signed-off-by: Sami Kerola <kerolasa@iki.fi>
tests/expected/col/multibyte [deleted file]
tests/expected/col/multibyte-invalid [new file with mode: 0644]
tests/expected/col/multibyte-valid [new file with mode: 0644]
tests/ts/col/multibyte
text-utils/col.c

diff --git a/tests/expected/col/multibyte b/tests/expected/col/multibyte
deleted file mode 100644 (file)
index abf6072..0000000
+++ /dev/null
@@ -1 +0,0 @@
-col: failed on line 1: EILSEQ
diff --git a/tests/expected/col/multibyte-invalid b/tests/expected/col/multibyte-invalid
new file mode 100644 (file)
index 0000000..ebae97c
--- /dev/null
@@ -0,0 +1 @@
+abc\x80\x80jkl
diff --git a/tests/expected/col/multibyte-valid b/tests/expected/col/multibyte-valid
new file mode 100644 (file)
index 0000000..e35d95c
--- /dev/null
@@ -0,0 +1 @@
+Dateiname der Versandh\xfclle
index b9240d6b41a2de69c90dd35411189ca6e44e0d5b..543608cab8cd15510a7abdd6d962c1d281991121 100755 (executable)
@@ -24,8 +24,17 @@ ts_init "$*"
 ts_check_test_command "$TS_CMD_COL"
 ts_check_test_command "$TS_HELPER_STRERROR"
 
+ts_init_subtest "valid"
 cat $TS_SELF/multibyte.data |
-    LC_ALL=C.UTF-8 ts_run $TS_CMD_COL 2>&1 > /dev/null |
+    LC_ALL=C ts_run $TS_CMD_COL 2>&1 |
     sed -e "s@$($TS_HELPER_STRERROR EILSEQ)@EILSEQ@" > $TS_OUTPUT
+ts_finalize_subtest
+
+ts_init_subtest "invalid"
+printf '%s\n' $'abc\200\200jkl' |
+       LC_ALL=C ts_run $TS_CMD_COL 2>&1 |
+       sed -e "s@$($TS_HELPER_STRERROR EILSEQ)@EILSEQ@" > $TS_OUTPUT
+ts_finalize_subtest
+
 
 ts_finalize
index 6963eb68f89a95ac91a165498dea662f4a09d905..f9e39a2261b1da79dcd606984b90d358b4747ef7 100644 (file)
@@ -599,6 +599,49 @@ static void free_line_allocations(struct col_alloc *root)
 }
 #endif
 
+static void process_char(struct col_ctl *ctl, struct col_lines *lns)
+{
+                /* Deal printable characters */
+                if (!iswgraph(lns->ch) && handle_not_graphic(ctl, lns))
+                        return;
+
+                /* Must stuff ch in a line - are we at the right one? */
+                if ((size_t)lns->cur_line != lns->this_line - lns->adjust)
+                        update_cur_line(ctl, lns);
+
+                /* Does line buffer need to grow? */
+                if (ctl->l->l_lsize <= ctl->l->l_line_len + 1) {
+                        size_t need;
+
+                        need = ctl->l->l_lsize ? ctl->l->l_lsize * 2 : NALLOC;
+                        ctl->l->l_line = xrealloc(ctl->l->l_line, need * sizeof(struct col_char));
+                        ctl->l->l_lsize = need;
+                }
+
+                /* Store character */
+                lns->c = &ctl->l->l_line[ctl->l->l_line_len++];
+                lns->c->c_char = lns->ch;
+                lns->c->c_set = lns->cur_set;
+
+                if (0 < lns->cur_col)
+                        lns->c->c_column = lns->cur_col;
+                else
+                        lns->c->c_column = 0;
+                lns->c->c_width = wcwidth(lns->ch);
+
+                /*
+                 * If things are put in out of order, they will need sorting
+                 * when it is flushed.
+                 */
+                if (lns->cur_col < ctl->l->l_max_col)
+                        ctl->l->l_needs_sort = 1;
+                else
+                        ctl->l->l_max_col = lns->cur_col;
+                if (0 < lns->c->c_width)
+                        lns->cur_col += lns->c->c_width;
+
+}
+
 int main(int argc, char **argv)
 {
        struct col_ctl ctl = {
@@ -623,52 +666,30 @@ int main(int argc, char **argv)
        while (feof(stdin) == 0) {
                errno = 0;
                /* Get character */
-               if ((lns.ch = getwchar()) == WEOF) {
-                       if (errno == EILSEQ) {
-                               warn(_("failed on line %lu"), lns.max_line + 1);
-                               ret = EXIT_FAILURE;
-                       }
-                       break;
-               }
-
-               /* Deal printable characters */
-               if (!iswgraph(lns.ch) && handle_not_graphic(&ctl, &lns))
-                       continue;
-
-               /* Must stuff ch in a line - are we at the right one? */
-               if ((size_t)lns.cur_line != lns.this_line - lns.adjust)
-                       update_cur_line(&ctl, &lns);
-
-               /* Does line buffer need to grow? */
-               if (ctl.l->l_lsize <= ctl.l->l_line_len + 1) {
-                       size_t need;
+               lns.ch = getwchar();
 
-                       need = ctl.l->l_lsize ? ctl.l->l_lsize * 2 : NALLOC;
-                       ctl.l->l_line = xrealloc(ctl.l->l_line, need * sizeof(struct col_char));
-                       ctl.l->l_lsize = need;
-               }
-
-               /* Store character */
-               lns.c = &ctl.l->l_line[ctl.l->l_line_len++];
-               lns.c->c_char = lns.ch;
-               lns.c->c_set = lns.cur_set;
-
-               if (0 < lns.cur_col)
-                       lns.c->c_column = lns.cur_col;
-               else
-                       lns.c->c_column = 0;
-               lns.c->c_width = wcwidth(lns.ch);
+               if (lns.ch == WEOF) {
+                       if (errno == EILSEQ) {
+                               /* Illegal multibyte sequence */
+                               int c;
+                               char buf[5];
+                               size_t len, i;
 
-               /*
-                * If things are put in out of order, they will need sorting
-                * when it is flushed.
-                */
-               if (lns.cur_col < ctl.l->l_max_col)
-                       ctl.l->l_needs_sort = 1;
-               else
-                       ctl.l->l_max_col = lns.cur_col;
-               if (0 < lns.c->c_width)
-                       lns.cur_col += lns.c->c_width;
+                               c = getchar();
+                               if (c == EOF)
+                                       break;
+                               sprintf(buf, "\\x%02x", (unsigned char) c);
+                               len = strlen(buf);
+                               for (i = 0; i < len; i++) {
+                                       lns.ch = buf[i];
+                                       process_char(&ctl, &lns);
+                               }
+                       } else
+                               /* end of file */
+                               break;
+               } else
+                       /* the common case */
+                       process_char(&ctl, &lns);
        }
 
        /* goto the last line that had a character on it */