From 0f5413048acea94d7bc30038ef556d42335b0337 Mon Sep 17 00:00:00 2001 From: Karel Zak Date: Tue, 7 Aug 2012 11:19:54 +0200 Subject: [PATCH] include/tt: improve work with non-utf8 chars MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Reported-by: Pádraig Brady Signed-off-by: Karel Zak --- lib/tt.c | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/lib/tt.c b/lib/tt.c index 51b8c2e074..7063760a91 100644 --- a/lib/tt.c +++ b/lib/tt.c @@ -53,7 +53,7 @@ static const struct tt_symbols utf8_tt_symbols = { list_last_entry(&(_cl)->cl_columns, &(_tb)->tb_columns) /* - * Counts number of cells in multibyte string. For all control and + * Counts number of cells in multibyte string. For all control and * non-printable chars is the result width enlarged to store \x?? hex * sequence. See mbs_safe_encode(). */ @@ -77,10 +77,12 @@ static size_t mbs_safe_width(const char *s) if (len == 0) break; - if (len == (size_t) -1 || len == (size_t) -2) - return (size_t) -1; - if (!iswprint(wc)) + if (len == (size_t) -1 || len == (size_t) -2) { + len = 1; + width += (isprint((unsigned char) *p) ? 1 : 4); + + } if (!iswprint(wc)) width += len * 4; /* hex encode whole sequence */ else width += wcwidth(wc); /* number of cells */ @@ -137,11 +139,23 @@ static char *mbs_safe_encode(const char *s, size_t *width) size_t len = mbrtowc(&wc, p, MB_CUR_MAX, &st); if (len == 0) - break; - if (len == (size_t) -1 || len == (size_t) -2) - return NULL; - - if (!iswprint(wc)) { + break; /* end of string */ + + if (len == (size_t) -1 || len == (size_t) -2) { + len = 1; + /* + * Not valid multibyte sequence -- maybe it's + * printable char according to the current locales. + */ + if (!isprint((unsigned char) *p)) { + sprintf(r, "\\x%02x", (unsigned char) *p); + r += 4; + *width += 4; + } else { + width++; + *r++ = *p; + } + } else if (!iswprint(wc)) { size_t i; for (i = 0; i < len; i++) { sprintf(r, "\\x%02x", (unsigned char) *p); -- 2.47.3