From feda512e97ab988ba866aa82895e6647cb68917e Mon Sep 17 00:00:00 2001
From: Aearil <aearil@paranoici.org>
Date: Sat, 24 Feb 2024 21:44:24 +0100
Subject: [PATCH] wc: fix -w with breaking space over UCHAR_MAX

* src/wc.c (wc): Fix regression introduced in commit v9.4-48-gf40c6b5cf.
* tests/wc/wc-nbsh.sh: Add test cases for "standard" spaces.
Fixes https://bugs.gnu.org/69369
---
 src/wc.c            | 3 ++-
 tests/wc/wc-nbsp.sh | 5 +++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/wc.c b/src/wc.c
index f5a921534c..d70ad39363 100644
--- a/src/wc.c
+++ b/src/wc.c
@@ -528,7 +528,8 @@ wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos)
                           if (width > 0)
                             linepos += width;
                         }
-                      in_word2 = !iswnbspace (wide_char);
+                      in_word2 = ! iswspace (wide_char)
+                                 && ! iswnbspace (wide_char);
                     }
 
                   /* Count words by counting word starts, i.e., each
diff --git a/tests/wc/wc-nbsp.sh b/tests/wc/wc-nbsp.sh
index 371cc8b5b1..39a8baccc6 100755
--- a/tests/wc/wc-nbsp.sh
+++ b/tests/wc/wc-nbsp.sh
@@ -38,10 +38,15 @@ fi
 
 export LC_ALL=en_US.UTF-8
 if test "$(locale charmap 2>/dev/null)" = UTF-8; then
+  #non breaking space class
   check_word_sep '\u00A0'
   check_word_sep '\u2007'
   check_word_sep '\u202F'
   check_word_sep '\u2060'
+
+  #sampling of "standard" space class
+  check_word_sep '\u0020'
+  check_word_sep '\u2003'
 fi
 
 export LC_ALL=ru_RU.KOI8-R
-- 
2.47.2