From: aizu-m Date: Fri, 19 Jun 2026 06:53:03 +0000 (+0530) Subject: wc: fix out-of-bounds read for single-byte wide characters X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;p=thirdparty%2Fcoreutils.git wc: fix out-of-bounds read for single-byte wide characters In legacy CJK locales a single input byte can decode to a wide character above UCHAR_MAX, e.g. SHIFT-JIS bytes 0xA1..0xDF become U+FF61..U+FF9F. wc then indexed its 256-entry wc_isprint/wc_isspace tables with that value and read past their end. * src/wc.c (wc): Take the byte-indexed fast path only when the decoded character fits in a byte; wider values fall through to c32width and c32isspace. * tests/wc/wc-sjis.sh: New test. * tests/local.mk (all_tests): Add it. * NEWS: Mention the fix. Link: https://github.com/coreutils/coreutils/pull/298 --- diff --git a/NEWS b/NEWS index 58beb738ee..4ffd690def 100644 --- a/NEWS +++ b/NEWS @@ -36,6 +36,10 @@ GNU coreutils NEWS -*- outline -*- 'uniq -w' no longer overruns the read buffer in multibyte locales. [bug introduced in coreutils-9.5] + 'wc' no longer reads past the end of a lookup table in legacy multibyte + locales like SHIFT-JIS where a single byte can decode to a wide character. + [bug introduced in coreutils-9.5] + ** Changes in behavior 'ls' -w,--width no longer includes '\n' in the width of a line. diff --git a/src/wc.c b/src/wc.c index 2e2a9b03cb..4a31189e64 100644 --- a/src/wc.c +++ b/src/wc.c @@ -581,7 +581,7 @@ wc (int fd, char const *file_x, struct fstatus *fstatus) default:; bool in_word2; - if (single_byte) + if (single_byte && wide_char <= UCHAR_MAX) { linepos += wc_isprint[wide_char]; in_word2 = !wc_isspace[wide_char]; diff --git a/tests/local.mk b/tests/local.mk index b56e5dcfb6..154b591cea 100644 --- a/tests/local.mk +++ b/tests/local.mk @@ -308,6 +308,7 @@ all_tests = \ tests/wc/wc-nbsp.sh \ tests/wc/wc-parallel.sh \ tests/wc/wc-proc.sh \ + tests/wc/wc-sjis.sh \ tests/wc/wc-total.sh \ tests/cat/cat-E.sh \ tests/cat/cat-proc.sh \ diff --git a/tests/wc/wc-sjis.sh b/tests/wc/wc-sjis.sh new file mode 100755 index 0000000000..39ca1358f4 --- /dev/null +++ b/tests/wc/wc-sjis.sh @@ -0,0 +1,54 @@ +#!/bin/sh +# A single input byte that decodes to a wide character must not overrun +# wc's byte-indexed isprint/isspace tables. + +# Copyright (C) 2026 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src +print_ver_ wc printf + +# In SHIFT-JIS the bytes 0xA1..0xDF are half-width katakana: each is a +# single input byte, yet it decodes to U+FF61..U+FF9F, i.e., > UCHAR_MAX. +# In coreutils v9.5 to 9.11 on glibc where SHIFT_JIS is available, +# wc indexed its 256-entry wc_isprint[]/wc_isspace[] tables with the +# decoded value, so reading past their end and crashed. + +export LC_ALL=ja_JP.SHIFT_JIS + +# Try to generate the locale if not available +# as it's generally not available by default. +if test "$(locale charmap 2>/dev/null)" != SHIFT_JIS; then + mkdir locale || framework_failure_ + localedef --no-archive --no-warnings=ascii \ + -f SHIFT_JIS -i ja_JP "$PWD/locale/ja_JP.SHIFT_JIS" \ + >/dev/null 2>&1 || skip_ "SHIFT-JIS locale not available" + export LOCPATH="$PWD/locale" +fi + +test "$(locale charmap 2>/dev/null)" = SHIFT_JIS \ + || skip_ "SHIFT-JIS locale not available" + +env printf 'a\xa1b\n' > in || framework_failure_ + +# 0xA1 is one byte and one non-space character: 1 line, 1 word, 4 bytes. +wc in > out || fail=1 +printf '1 1 4 in\n' > exp || framework_failure_ +compare exp out || fail=1 + +# -L reaches the same table through wc_isprint; ensure it too completes. +wc -L in > /dev/null || fail=1 + +Exit $fail