From 3a81d44d43b078ee20f1ce2b907c23d0926070b3 Mon Sep 17 00:00:00 2001 From: Collin Funk Date: Tue, 2 Sep 2025 20:08:20 -0700 Subject: [PATCH] fold: check that characters are not non-breaking spaces when -s is used NetBSD 10 and Solaris 11.4 treat non-breaking spaces as blank characters unlike glibc. * src/system.h: Include uchar.h. (c32isnbspace): New function based on iswnbspace from src/wc.c. * src/fold.c (fold_file): Use it. * src/wc.c (iswnbspace): Remove function. (maybe_c32isnbspace): New function. (wc, main): Use it. Fixes https://bugs.gnu.org/79300 --- src/fold.c | 2 +- src/system.h | 9 +++++++++ src/wc.c | 15 +++++++-------- 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/src/fold.c b/src/fold.c index 5f71d5c553..b90bc7d80a 100644 --- a/src/fold.c +++ b/src/fold.c @@ -216,7 +216,7 @@ fold_file (char const *filename, size_t width) for (mcel_t g2; logical_p < logical_lim; logical_p += g2.len) { g2 = mcel_scan (logical_p, logical_lim); - if (c32isblank (g2.ch)) + if (c32isblank (g2.ch) && ! c32isnbspace (g2.ch)) { space_length = g2.len; logical_end = logical_p - line_out; diff --git a/src/system.h b/src/system.h index 5cb751cc89..2296c8bbb9 100644 --- a/src/system.h +++ b/src/system.h @@ -70,6 +70,7 @@ #include #include #include +#include #include /* Some systems don't define this; POSIX mentions it but says it is @@ -148,6 +149,14 @@ enum errors that the cast doesn't. */ static inline unsigned char to_uchar (char ch) { return ch; } +/* Return non zero if a non breaking space. */ +ATTRIBUTE_PURE +static inline int +c32isnbspace (char32_t wc) +{ + return wc == 0x00A0 || wc == 0x2007 || wc == 0x202F || wc == 0x2060; +} + #include /* Take care of NLS matters. */ diff --git a/src/wc.c b/src/wc.c index 05e78676e3..f22f658b4f 100644 --- a/src/wc.c +++ b/src/wc.c @@ -191,14 +191,13 @@ the following order: newline, word, character, byte, maximum line length.\n\ exit (status); } -/* Return non zero if a non breaking space. */ +/* Return non zero if POSIXLY_CORRECT is not set and WC is a non breaking + space. */ ATTRIBUTE_PURE static int -iswnbspace (wint_t wc) +maybe_c32isnbspace (char32_t wc) { - return ! posixly_correct - && (wc == 0x00A0 || wc == 0x2007 - || wc == 0x202F || wc == 0x2060); + return ! posixly_correct && c32isnbspace (wc); } /* FILE is the name of the file (or null for standard input) @@ -525,8 +524,8 @@ wc (int fd, char const *file_x, struct fstatus *fstatus) if (width > 0) linepos += width; } - in_word2 = ! iswspace (wide_char) - && ! iswnbspace (wide_char); + in_word2 = (! iswspace (wide_char) + && ! maybe_c32isnbspace (wide_char)); } /* Count words by counting word starts, i.e., each @@ -798,7 +797,7 @@ main (int argc, char **argv) wc_isprint[i] = !!isprint (i); if (print_words) for (int i = 0; i <= UCHAR_MAX; i++) - wc_isspace[i] = isspace (i) || iswnbspace (btoc32 (i)); + wc_isspace[i] = isspace (i) || maybe_c32isnbspace (btoc32 (i)); bool read_tokens = false; struct argv_iterator *ai; -- 2.47.3