From 14a36f96ee671c2d3a20fa15437844a711e0b29c Mon Sep 17 00:00:00 2001 From: Sami Kerola Date: Thu, 26 Nov 2020 22:01:39 +0000 Subject: [PATCH] col: make input to tolerate invalid wide characters The getwchar(3) will choke and exit if invalid character is encountered. This change will make col(1) to print broken multibyte characters as \x{hex} string. Reported-by: Vitaly Lipatov Addresses: https://github.com/karelzak/util-linux/issues/1198 Signed-off-by: Sami Kerola --- tests/expected/col/multibyte | 1 - tests/expected/col/multibyte-invalid | 1 + tests/expected/col/multibyte-valid | 1 + tests/ts/col/multibyte | 11 ++- text-utils/col.c | 109 ++++++++++++++++----------- 5 files changed, 77 insertions(+), 46 deletions(-) delete mode 100644 tests/expected/col/multibyte create mode 100644 tests/expected/col/multibyte-invalid create mode 100644 tests/expected/col/multibyte-valid diff --git a/tests/expected/col/multibyte b/tests/expected/col/multibyte deleted file mode 100644 index abf607249a..0000000000 --- a/tests/expected/col/multibyte +++ /dev/null @@ -1 +0,0 @@ -col: failed on line 1: EILSEQ diff --git a/tests/expected/col/multibyte-invalid b/tests/expected/col/multibyte-invalid new file mode 100644 index 0000000000..ebae97c72f --- /dev/null +++ b/tests/expected/col/multibyte-invalid @@ -0,0 +1 @@ +abc\x80\x80jkl diff --git a/tests/expected/col/multibyte-valid b/tests/expected/col/multibyte-valid new file mode 100644 index 0000000000..e35d95cda5 --- /dev/null +++ b/tests/expected/col/multibyte-valid @@ -0,0 +1 @@ +Dateiname der Versandh\xfclle diff --git a/tests/ts/col/multibyte b/tests/ts/col/multibyte index b9240d6b41..543608cab8 100755 --- a/tests/ts/col/multibyte +++ b/tests/ts/col/multibyte @@ -24,8 +24,17 @@ ts_init "$*" ts_check_test_command "$TS_CMD_COL" ts_check_test_command "$TS_HELPER_STRERROR" +ts_init_subtest "valid" cat $TS_SELF/multibyte.data | - LC_ALL=C.UTF-8 ts_run $TS_CMD_COL 2>&1 > /dev/null | + LC_ALL=C ts_run $TS_CMD_COL 2>&1 | sed -e "s@$($TS_HELPER_STRERROR EILSEQ)@EILSEQ@" > $TS_OUTPUT +ts_finalize_subtest + +ts_init_subtest "invalid" +printf '%s\n' $'abc\200\200jkl' | + LC_ALL=C ts_run $TS_CMD_COL 2>&1 | + sed -e "s@$($TS_HELPER_STRERROR EILSEQ)@EILSEQ@" > $TS_OUTPUT +ts_finalize_subtest + ts_finalize diff --git a/text-utils/col.c b/text-utils/col.c index 6963eb68f8..f9e39a2261 100644 --- a/text-utils/col.c +++ b/text-utils/col.c @@ -599,6 +599,49 @@ static void free_line_allocations(struct col_alloc *root) } #endif +static void process_char(struct col_ctl *ctl, struct col_lines *lns) +{ + /* Deal printable characters */ + if (!iswgraph(lns->ch) && handle_not_graphic(ctl, lns)) + return; + + /* Must stuff ch in a line - are we at the right one? */ + if ((size_t)lns->cur_line != lns->this_line - lns->adjust) + update_cur_line(ctl, lns); + + /* Does line buffer need to grow? */ + if (ctl->l->l_lsize <= ctl->l->l_line_len + 1) { + size_t need; + + need = ctl->l->l_lsize ? ctl->l->l_lsize * 2 : NALLOC; + ctl->l->l_line = xrealloc(ctl->l->l_line, need * sizeof(struct col_char)); + ctl->l->l_lsize = need; + } + + /* Store character */ + lns->c = &ctl->l->l_line[ctl->l->l_line_len++]; + lns->c->c_char = lns->ch; + lns->c->c_set = lns->cur_set; + + if (0 < lns->cur_col) + lns->c->c_column = lns->cur_col; + else + lns->c->c_column = 0; + lns->c->c_width = wcwidth(lns->ch); + + /* + * If things are put in out of order, they will need sorting + * when it is flushed. + */ + if (lns->cur_col < ctl->l->l_max_col) + ctl->l->l_needs_sort = 1; + else + ctl->l->l_max_col = lns->cur_col; + if (0 < lns->c->c_width) + lns->cur_col += lns->c->c_width; + +} + int main(int argc, char **argv) { struct col_ctl ctl = { @@ -623,52 +666,30 @@ int main(int argc, char **argv) while (feof(stdin) == 0) { errno = 0; /* Get character */ - if ((lns.ch = getwchar()) == WEOF) { - if (errno == EILSEQ) { - warn(_("failed on line %lu"), lns.max_line + 1); - ret = EXIT_FAILURE; - } - break; - } - - /* Deal printable characters */ - if (!iswgraph(lns.ch) && handle_not_graphic(&ctl, &lns)) - continue; - - /* Must stuff ch in a line - are we at the right one? */ - if ((size_t)lns.cur_line != lns.this_line - lns.adjust) - update_cur_line(&ctl, &lns); - - /* Does line buffer need to grow? */ - if (ctl.l->l_lsize <= ctl.l->l_line_len + 1) { - size_t need; + lns.ch = getwchar(); - need = ctl.l->l_lsize ? ctl.l->l_lsize * 2 : NALLOC; - ctl.l->l_line = xrealloc(ctl.l->l_line, need * sizeof(struct col_char)); - ctl.l->l_lsize = need; - } - - /* Store character */ - lns.c = &ctl.l->l_line[ctl.l->l_line_len++]; - lns.c->c_char = lns.ch; - lns.c->c_set = lns.cur_set; - - if (0 < lns.cur_col) - lns.c->c_column = lns.cur_col; - else - lns.c->c_column = 0; - lns.c->c_width = wcwidth(lns.ch); + if (lns.ch == WEOF) { + if (errno == EILSEQ) { + /* Illegal multibyte sequence */ + int c; + char buf[5]; + size_t len, i; - /* - * If things are put in out of order, they will need sorting - * when it is flushed. - */ - if (lns.cur_col < ctl.l->l_max_col) - ctl.l->l_needs_sort = 1; - else - ctl.l->l_max_col = lns.cur_col; - if (0 < lns.c->c_width) - lns.cur_col += lns.c->c_width; + c = getchar(); + if (c == EOF) + break; + sprintf(buf, "\\x%02x", (unsigned char) c); + len = strlen(buf); + for (i = 0; i < len; i++) { + lns.ch = buf[i]; + process_char(&ctl, &lns); + } + } else + /* end of file */ + break; + } else + /* the common case */ + process_char(&ctl, &lns); } /* goto the last line that had a character on it */ -- 2.47.3