From 01f986b921d988ae51de6c937cc374b50a8b23b0 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Tue, 12 Sep 2023 00:15:52 -0500 Subject: [PATCH] Parse in a more locale-independent way MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit update submodules to latest * gnulib.modules: Add c-ctype. * lib/wordsplit.c, src/buffer.c, src/exclist.c, src/incremen.c: * src/list.c, src/misc.c, src/names.c, src/sparse.c, src/tar.c: * src/xheader.c: Include c-ctype.h, and use its API rather than ctype.h’s. This is more likely to work when oddball locales are used. * src/transform.c: Include ctype.h, since this module still uses tolower and toupper (this is probably wrong - should be multi-byte). --- gnulib | 2 +- gnulib.modules | 1 + lib/wordsplit.c | 35 ++++++++++++++--------------------- paxutils | 2 +- src/buffer.c | 3 ++- src/exclist.c | 9 +++++---- src/incremen.c | 3 ++- src/list.c | 5 +++-- src/misc.c | 9 +++++---- src/names.c | 3 ++- src/sparse.c | 3 ++- src/tar.c | 5 +++-- src/transform.c | 1 + src/xheader.c | 9 +++++---- 14 files changed, 47 insertions(+), 43 deletions(-) diff --git a/gnulib b/gnulib index 78b62f83..0e8fd2c0 160000 --- a/gnulib +++ b/gnulib @@ -1 +1 @@ -Subproject commit 78b62f8320f8dffb813222c1480563ed14251ca3 +Subproject commit 0e8fd2c0eed44dba1d39840a8733c9159220d671 diff --git a/gnulib.modules b/gnulib.modules index 8bcd8096..bd703a8a 100644 --- a/gnulib.modules +++ b/gnulib.modules @@ -24,6 +24,7 @@ argp argp-version-etc attribute backupfile +c-ctype closeout configmake dirname diff --git a/lib/wordsplit.c b/lib/wordsplit.c index f0c26a9b..86683a43 100644 --- a/lib/wordsplit.c +++ b/lib/wordsplit.c @@ -18,8 +18,9 @@ # include #endif +#include + #include -#include #include #include #include @@ -28,6 +29,8 @@ #include #include +#include + #if ENABLE_NLS # include #else @@ -36,22 +39,12 @@ #define _(msgid) gettext (msgid) #define N_(msgid) msgid -#include - #define ISWS(c) ((c)==' '||(c)=='\t'||(c)=='\n') #define ISDELIM(ws,c) \ (strchr ((ws)->ws_delim, (c)) != NULL) -#define ISPUNCT(c) (strchr("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",(c))!=NULL) -#define ISUPPER(c) ('A' <= ((unsigned) (c)) && ((unsigned) (c)) <= 'Z') -#define ISLOWER(c) ('a' <= ((unsigned) (c)) && ((unsigned) (c)) <= 'z') -#define ISALPHA(c) (ISUPPER(c) || ISLOWER(c)) -#define ISDIGIT(c) ('0' <= ((unsigned) (c)) && ((unsigned) (c)) <= '9') -#define ISXDIGIT(c) (strchr("abcdefABCDEF", c)!=NULL) -#define ISALNUM(c) (ISALPHA(c) || ISDIGIT(c)) -#define ISPRINT(c) (' ' <= ((unsigned) (c)) && ((unsigned) (c)) <= 127) - -#define ISVARBEG(c) (ISALPHA(c) || c == '_') -#define ISVARCHR(c) (ISALNUM(c) || c == '_') + +#define ISVARBEG(c) (c_isalpha (c) || c == '_') +#define ISVARCHR(c) (c_isalnum (c) || c == '_') #define WSP_RETURN_DELIMS(wsp) \ ((wsp)->ws_flags & WRDSF_RETURN_DELIMS || ((wsp)->ws_options & WRDSO_MAXWORDS)) @@ -1891,7 +1884,7 @@ skip_sed_expr (const char *command, size_t i, size_t len) if (command[i] == ';') i++; - if (!(command[i] == 's' && i + 3 < len && ISPUNCT (command[i + 1]))) + if (!(command[i] == 's' && i + 3 < len && c_ispunct (command[i + 1]))) break; delim = command[++i]; @@ -1900,7 +1893,7 @@ skip_sed_expr (const char *command, size_t i, size_t len) { if (state == 3) { - if (command[i] == delim || !ISALNUM (command[i])) + if (command[i] == delim || !c_isalnum (command[i])) break; } else if (command[i] == '\\') @@ -1987,7 +1980,7 @@ scan_word (struct wordsplit *wsp, size_t start, int consume_all) start = i; if (wsp->ws_flags & WRDSF_SED_EXPR - && command[i] == 's' && i + 3 < len && ISPUNCT (command[i + 1])) + && command[i] == 's' && i + 3 < len && c_ispunct (command[i + 1])) { flags = _WSNF_SEXP; i = skip_sed_expr (command, i, len); @@ -2080,7 +2073,7 @@ scan_word (struct wordsplit *wsp, size_t start, int consume_all) } #define to_num(c) \ - (ISDIGIT(c) ? c - '0' : (ISXDIGIT(c) ? toupper(c) - 'A' + 10 : 255 )) + (c_isdigit(c) ? c - '0' : c_isxdigit (c) ? c_toupper (c) - 'A' + 10 : 255) static int xtonum (int *pval, const char *src, int base, int cnt) @@ -2113,7 +2106,7 @@ wordsplit_c_quoted_length (const char *str, int quote_hex, int *quote) len++; else if (*str == '"') len += 2; - else if (*str != '\t' && *str != '\\' && ISPRINT (*str)) + else if (*str != '\t' && *str != '\\' && c_isprint (*str)) len++; else if (quote_hex) len += 3; @@ -2201,7 +2194,7 @@ wordsplit_string_unquote_copy (struct wordsplit *ws, int inquote, } } else if (WRDSO_ESC_TEST (ws, inquote, WRDSO_OESC) - && (unsigned char) src[i] < 128 && ISDIGIT (src[i])) + && (unsigned char) src[i] < 128 && c_isdigit (src[i])) { if (n - i < 1) { @@ -2251,7 +2244,7 @@ wordsplit_c_quote_copy (char *dst, const char *src, int quote_hex) *dst++ = '\\'; *dst++ = *src; } - else if (*src != '\t' && *src != '\\' && ISPRINT (*src)) + else if (*src != '\t' && *src != '\\' && c_isprint (*src)) *dst++ = *src; else { diff --git a/paxutils b/paxutils index 481bae11..d56df683 160000 --- a/paxutils +++ b/paxutils @@ -1 +1 @@ -Subproject commit 481bae11050fcbdca67a66eb57390267b280a312 +Subproject commit d56df6838db922686bdfb3325f6368d295ae7f15 diff --git a/src/buffer.c b/src/buffer.c index 7f353fa4..e4319bd3 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -23,6 +23,7 @@ #include +#include #include #include #include @@ -1573,7 +1574,7 @@ drop_volume_label_suffix (const char *label) idx_t prefix_len = 0; for (idx_t i = 0; label[i]; i++) - if (!isdigit ((unsigned char) label[i])) + if (!c_isdigit (label[i])) prefix_len = i + 1; ptrdiff_t len = prefix_len - VOLUME_TEXT_LEN; diff --git a/src/exclist.c b/src/exclist.c index 92b814fe..486ebe79 100644 --- a/src/exclist.c +++ b/src/exclist.c @@ -18,6 +18,7 @@ along with this program. If not, see . */ #include +#include #include #include #include @@ -219,7 +220,7 @@ static void git_addfn (struct exclude *ex, char const *pattern, int options, MAYBE_UNUSED void *data) { - while (isspace (*pattern)) + while (c_isspace (*pattern)) ++pattern; if (*pattern == 0 || *pattern == '#') return; @@ -232,7 +233,7 @@ static void bzr_addfn (struct exclude *ex, char const *pattern, int options, MAYBE_UNUSED void *data) { - while (isspace (*pattern)) + while (c_isspace (*pattern)) ++pattern; if (*pattern == 0 || *pattern == '#') return; @@ -269,13 +270,13 @@ hg_addfn (struct exclude *ex, char const *pattern, int options, void *data) int *hgopt = data; size_t len; - while (isspace (*pattern)) + while (c_isspace (*pattern)) ++pattern; if (*pattern == 0 || *pattern == '#') return; if (strncmp (pattern, "syntax:", 7) == 0) { - for (pattern += 7; isspace (*pattern); ++pattern) + for (pattern += 7; c_isspace (*pattern); ++pattern) ; if (strcmp (pattern, "regexp") == 0) /* FIXME: Regexps must be perl-style */ diff --git a/src/incremen.c b/src/incremen.c index 7bcfdb93..98f82f6e 100644 --- a/src/incremen.c +++ b/src/incremen.c @@ -18,6 +18,7 @@ along with this program. If not, see . */ #include +#include #include #include #include "common.h" @@ -1132,7 +1133,7 @@ read_num (FILE *fp, char const *fieldname, int c = getc (fp); bool negative = c == '-'; - for (i = 0; (i == 0 && negative) || ISDIGIT (c); i++) + for (i = 0; (i == 0 && negative) || c_isdigit (c); i++) { buf[i] = c; if (i == sizeof buf - 1) diff --git a/src/list.c b/src/list.c index a6a40b3f..96f72dc4 100644 --- a/src/list.c +++ b/src/list.c @@ -20,6 +20,7 @@ Written by John Gilmore, on 1985-08-26. */ #include +#include #include #include #include @@ -780,7 +781,7 @@ from_header (char const *where0, size_t digs, char const *type, type)); return -1; } - if (!isspace ((unsigned char) *where)) + if (!c_isspace (*where)) break; where++; } @@ -916,7 +917,7 @@ from_header (char const *where0, size_t digs, char const *type, value = -value; } - if (where != lim && *where && !isspace ((unsigned char) *where)) + if (where != lim && *where && !c_isspace (*where)) { if (type) { diff --git a/src/misc.c b/src/misc.c index 73c04646..3adfb358 100644 --- a/src/misc.c +++ b/src/misc.c @@ -19,6 +19,7 @@ #include #include #include "common.h" +#include #include #include #include @@ -414,7 +415,7 @@ strtosysint (char const *arg, char **arglim, intmax_t minval, uintmax_t maxval) errno = 0; if (maxval <= INTMAX_MAX) { - if (ISDIGIT (arg[*arg == '-'])) + if (c_isdigit (arg[*arg == '-'])) { intmax_t i = strtoimax (arg, arglim, 10); intmax_t imaxval = maxval; @@ -426,7 +427,7 @@ strtosysint (char const *arg, char **arglim, intmax_t minval, uintmax_t maxval) } else { - if (ISDIGIT (*arg)) + if (c_isdigit (*arg)) { uintmax_t i = strtoumax (arg, arglim, 10); if (i <= maxval) @@ -506,7 +507,7 @@ decode_timespec (char const *arg, char **arg_lim, bool parse_fraction) bool negative = *arg == '-'; struct timespec r; - if (! ISDIGIT (arg[negative])) + if (! c_isdigit (arg[negative])) errno = EINVAL; else { @@ -537,7 +538,7 @@ decode_timespec (char const *arg, char **arg_lim, bool parse_fraction) int digits = 0; bool trailing_nonzero = false; - while (ISDIGIT (*++p)) + while (c_isdigit (*++p)) if (digits < LOG10_BILLION) digits++, ns = 10 * ns + (*p - '0'); else diff --git a/src/names.c b/src/names.c index d3027379..06342d8c 100644 --- a/src/names.c +++ b/src/names.c @@ -17,6 +17,7 @@ #include +#include #include #include #include @@ -990,7 +991,7 @@ handle_option (const char *str, struct name_elt const *ent) int i; struct option_locus loc; - while (*str && isspace (*str)) + while (*str && c_isspace (*str)) ++str; if (*str != '-') return 1; diff --git a/src/sparse.c b/src/sparse.c index ec19636c..b612cad7 100644 --- a/src/sparse.c +++ b/src/sparse.c @@ -16,6 +16,7 @@ with this program. If not, see . */ #include +#include #include #include #include "common.h" @@ -1251,7 +1252,7 @@ decode_num (uintmax_t *num, char const *arg, uintmax_t maxval) uintmax_t u; char *arg_lim; - if (!ISDIGIT (*arg)) + if (!c_isdigit (*arg)) return false; errno = 0; diff --git a/src/tar.c b/src/tar.c index 98132e79..db7c7212 100644 --- a/src/tar.c +++ b/src/tar.c @@ -38,6 +38,7 @@ #include "common.h" #include +#include #include #include #include @@ -1223,7 +1224,7 @@ expand_pax_option (struct tar_args *targs, const char *arg) size_t len = p - arg + 1; obstack_grow (&stk, arg, len); len = seglen - len; - for (++p; *p && isspace ((unsigned char) *p); p++) + for (++p; *p && c_isspace (*p); p++) len--; if (*p == '{' && p[len-1] == '}') { @@ -1720,7 +1721,7 @@ parse_opt (int key, char *arg, struct argp_state *state) case SET_MTIME_FORMAT_OPTION: set_mtime_format = arg; break; - + case SPARSE_VERSION_OPTION: sparse_option = true; { diff --git a/src/transform.c b/src/transform.c index e15e002b..65dba791 100644 --- a/src/transform.c +++ b/src/transform.c @@ -15,6 +15,7 @@ with this program. If not, see . */ #include +#include #include #include "common.h" diff --git a/src/xheader.c b/src/xheader.c index c82222ed..89feca64 100644 --- a/src/xheader.c +++ b/src/xheader.c @@ -19,6 +19,7 @@ #include +#include #include #include #include @@ -192,12 +193,12 @@ xheader_set_keyword_equal (char *kw, char *eq) global = false; } - while (p > kw && isspace ((unsigned char) *p)) + while (p > kw && c_isspace (*p)) p--; *p = 0; - for (p = eq + 1; *p && isspace ((unsigned char) *p); p++) + for (p = eq + 1; *p && c_isspace (*p); p++) ; if (strcmp (kw, "delete") == 0) @@ -616,7 +617,7 @@ decode_record (struct xheader *xhdr, while (*p == ' ' || *p == '\t') p++; - if (! ISDIGIT (*p)) + if (! c_isdigit (*p)) { if (*p) ERROR ((0, 0, _("Malformed extended header: missing length"))); @@ -1445,7 +1446,7 @@ sparse_map_decoder (struct tar_stat_info *st, intmax_t u; char *delim; - if (!ISDIGIT (*arg)) + if (!c_isdigit (*arg)) { ERROR ((0, 0, _("Malformed extended header: invalid %s=%s"), keyword, arg)); -- 2.47.2