From: Paul Eggert Date: Sun, 22 Jul 2018 16:50:20 +0000 (-0700) Subject: df: avoid multibyte character corruption on macOS X-Git-Tag: v8.31~82 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=1ecf7abe4a688b3a1f8a0a81f49af8134a1273ba;p=thirdparty%2Fcoreutils.git df: avoid multibyte character corruption on macOS This improves on the earlier fix for the problem reported by Chih-Hsuan Yen (Bug#32236), by also looking for other control characters and for encoding errors. * src/df.c: Include wchar.h and wctype.h instead of c-ctype.h. (hide_problematic_chars): Process the string as multibyte. Use iswcntrl, not c_iscntrl. --- diff --git a/src/df.c b/src/df.c index c851fcc682..d27ba029c4 100644 --- a/src/df.c +++ b/src/df.c @@ -23,7 +23,8 @@ #include #include #include -#include +#include +#include #include "system.h" #include "canonicalize.h" @@ -272,21 +273,41 @@ static struct option const long_options[] = {NULL, 0, NULL, 0} }; -/* Replace problematic chars with '?'. - Since only control characters are currently considered, - this should work in all encodings. */ +/* Replace problematic chars with '?'. */ -static char* +static void hide_problematic_chars (char *cell) { - char *p = cell; - while (*p) + char *srcend = cell + strlen (cell); + char *dst = cell; + mbstate_t mbstate = { 0, }; + size_t n; + + for (char *src = cell; src != srcend; src += n) { - if (c_iscntrl (to_uchar (*p))) - *p = '?'; - p++; + wchar_t wc; + size_t srcbytes = srcend - src; + n = mbrtowc (&wc, src, srcbytes, &mbstate); + bool ok = 0 < n && n <= srcbytes; + + if (ok) + ok = !iswcntrl (wc); + else + n = 1; + + if (ok) + { + memmove (dst, src, n); + dst += n; + } + else + { + *dst++ = '?'; + memset (&mbstate, 0, sizeof mbstate); + } } - return cell; + + *dst = '\0'; } /* Dynamically allocate a row of pointers in TABLE, which