'install' now allows the combination of the --compare and
--preserve-timestamps options.
+ 'fold', 'join', 'numfmt', 'uniq' now use more consistent blank character
+ determination on non GLIBC platforms. For example \u3000 (ideographic space)
+ will be considered a blank character on all platforms.
+
'nl' now supports multi-byte --section-delimiter characters.
'shuf -i' now operates up to two times faster on systems with unlocked stdio
if (convert)
{
- convert &= convert_entire_line
- || !! (c32isblank (g.ch) && ! c32isnbspace (g.ch));
+ convert &= convert_entire_line || c32issep (g.ch);
if (g.ch == '\t')
{
for (mcel_t g2; logical_p < logical_lim; logical_p += g2.len)
{
g2 = mcel_scan (logical_p, logical_lim);
- if (c32isblank (g2.ch) && ! c32isnbspace (g2.ch))
+ if (c32issep (g2.ch))
{
space_length = g2.len;
logical_end = logical_p - line_out;
static bool
newline_or_blank (mcel_t g)
{
- return g.ch == '\n' || c32isblank (g.ch);
+ return g.ch == '\n' || c32issep (g.ch);
}
/* Fill in the 'fields' structure in LINE. */
static bool
comma_or_blank (mcel_t g)
{
- return g.ch == ',' || c32isblank (g.ch);
+ return g.ch == ',' || c32issep (g.ch);
}
/* Add the comma or blank separated field spec(s) in STR to 'outlist'. */
static bool
newline_or_blank (mcel_t g)
{
- return g.ch == '\n'
- || (c32isblank (g.ch) && ! c32isnbspace (g.ch));
+ return g.ch == '\n' || c32issep (g.ch);
}
static inline int
if (!matched_unit_sep)
{
mcel_t g = mcel_scanz (*endptr);
- if (c32isblank (g.ch) || c32isnbspace (g.ch))
+ if (c32issep (g.ch) || c32isnbspace (g.ch))
(*endptr) += g.len;
}
return wc == 0x00A0 || wc == 0x2007 || wc == 0x202F || wc == 0x2060;
}
+ATTRIBUTE_PURE
+static inline int
+c32isvertspace (char32_t wc)
+{
+ return wc == 0x000A || wc == 0x000B || wc == 0x000C || wc == 0x000D
+ || wc == 0x2028 || wc == 0x2029;
+}
+
+
+/* c32isblank() is too variable on non GLIBC platforms.
+ E.g., does not include \u3000 ideographic space on musl.
+ E.g., does include non-breaking space on Solaris and NetBSD.
+ This equivalent is more consistent across systems. */
+ATTRIBUTE_PURE
+static inline bool
+c32issep (char32_t wc)
+{
+#if defined __GLIBC__
+ return !! c32isblank (wc);
+#endif
+ return !! (c32isspace (wc) && ! c32isvertspace (wc) && ! c32isnbspace (wc));
+}
+
#include <locale.h>
/* Take care of NLS matters. */
if (convert)
{
- bool blank = !! (c32isblank (g.ch) && ! c32isnbspace (g.ch));
+ bool blank = c32issep (g.ch);
if (blank)
{
static bool
newline_or_blank (mcel_t g)
{
- return g.ch == '\n' || c32isblank (g.ch);
+ return g.ch == '\n' || c32issep (g.ch);
}
/* Given a linebuffer LINE,