From cbb6dfec557e7c8ac2dd728e4a943beb6ccfd53c Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Tue, 29 Aug 2023 23:52:07 -0700 Subject: [PATCH] maint: remove need for mbsalign MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This simplifies memory allocation a bit, and removes an arbitrary limitation from numfmt, which formerly limited cell output to 127 bytes. * bootstrap.conf (gnulib_modules): Remove mbsalign, strncat. Add strnlen (the code already used strnlen directly, and we were saved only because Gnulib used the module indirectly) * gl/lib/mbsalign.c, gl/lib/mbsalign.h, gl/modules/mbsalign: * gl/modules/mbsalign-tests, gl/tests/test-mbsalign.c: Remove. * src/df.c, src/ls.c: Do not include mbsalign.h. (MBSWIDTH_FLAGS): New constant, now used for all mbswidth calls. All callers changed to check for -1 return. * src/df.c (struct field_data_t): ‘width’ is now int not size_t, since mbswidth can’t do widths greater than INT_MAX anyway. Replace ‘align’ with ‘align_right’. All uses changed. (print_table): Redo to avoid the need for ambsalign. (get_header, get_dev): mbswidth returns int, not size_t. * src/ls.c (MAX_MON_WIDTH): Remove; no longer used. (abmon_init): Use strnlen to cheaply discard too-long month names. Align by hand instead of using mbsalign. * src/numfmt.c: Include stdckdint.h, mbswidth.h. Do not include mbsalign.h. (padding_buffer_size): Now idx_t. All uses changed. (padding_width): Now intmax_t, since it’s no longer an object size. Its sign now records alignment. All uses changed. (zero_padding_width): Now int, since it’s given to sprintf. All uses changed. (padding_alignment): Remove; it’s now taken from padding_width’s sign. (double_to_human): Return string length. BUF_SIZE arg is now idx_t. Include suffix in output. All callers changed. Simplify by not calling strncat or stpcpy. Calculate fmt size bound more carefully. (setup_padding_buffer): Remove. All uses removed. (parse_format_string): Use intmax_t, not long, for pad. On overflow, set widths to large values that cause later code to do the right thing, rather than separately checking for overflow here. (prepare_padded_number): Return bool, not int 0/1. New arg PADDING. All uses changed. Do not limit padded output to 127 bytes; instead, use xpalloc to expand the output buffer. (print_padded_number): New arg PADDING. All uses changed. (process_suffixed_number): Simplify. (main): Take extremum if xstrtoimax overflows, as this does the right thing. * tests/misc/numfmt.pl: New test suf-20 to test for truncation bug. Remove tests pad-3.2, fmt-err-7, as they’re no longer invalid but are quite expensive. --- bootstrap.conf | 3 +- gl/lib/mbsalign.c | 275 -------------------------------------- gl/lib/mbsalign.h | 63 --------- gl/modules/mbsalign | 28 ---- gl/modules/mbsalign-tests | 11 -- gl/tests/test-mbsalign.c | 147 -------------------- src/df.c | 53 ++++---- src/ls.c | 97 ++++++++------ src/numfmt.c | 194 ++++++++++++--------------- tests/misc/numfmt.pl | 10 +- 10 files changed, 172 insertions(+), 709 deletions(-) delete mode 100644 gl/lib/mbsalign.c delete mode 100644 gl/lib/mbsalign.h delete mode 100644 gl/modules/mbsalign delete mode 100644 gl/modules/mbsalign-tests delete mode 100644 gl/tests/test-mbsalign.c diff --git a/bootstrap.conf b/bootstrap.conf index bd73ff2fef..742d4b2f4b 100644 --- a/bootstrap.conf +++ b/bootstrap.conf @@ -169,7 +169,6 @@ gnulib_modules=" manywarnings mbrlen mbrtowc - mbsalign mbschr mbslen mbswidth @@ -259,7 +258,7 @@ gnulib_modules=" stpcpy stpncpy strdup-posix - strncat + strnlen strnumcmp strsignal strtoimax diff --git a/gl/lib/mbsalign.c b/gl/lib/mbsalign.c deleted file mode 100644 index d6f1bb34c9..0000000000 --- a/gl/lib/mbsalign.c +++ /dev/null @@ -1,275 +0,0 @@ -/* Align/Truncate a string in a given screen width - Copyright (C) 2009-2023 Free Software Foundation, Inc. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -/* Written by Pádraig Brady. */ - -#include -#include "mbsalign.h" - -#include "minmax.h" - -#include -#include -#include -#include -#include -#include -#include - -/* Replace non printable chars. - Note \t and \n etc. are non printable. - Return 1 if replacement made, 0 otherwise. */ - -static bool -wc_ensure_printable (wchar_t *wchars) -{ - bool replaced = false; - wchar_t *wc = wchars; - while (*wc) - { - if (!iswprint ((wint_t) *wc)) - { - *wc = 0xFFFD; /* L'\uFFFD' (replacement char) */ - replaced = true; - } - wc++; - } - return replaced; -} - -/* Truncate wchar string to width cells. - * Returns number of cells used. */ - -static size_t -wc_truncate (wchar_t *wc, size_t width) -{ - size_t cells = 0; - int next_cells = 0; - - while (*wc) - { - next_cells = wcwidth (*wc); - if (next_cells == -1) /* non printable */ - { - *wc = 0xFFFD; /* L'\uFFFD' (replacement char) */ - next_cells = 1; - } - if (cells + next_cells > width) - break; - cells += next_cells; - wc++; - } - *wc = L'\0'; - return cells; -} - -/* Write N_SPACES space characters to DEST while ensuring - nothing is written beyond DEST_END. A terminating NUL - is always added to DEST. - A pointer to the terminating NUL is returned. */ - -static char * -mbs_align_pad (char *dest, char const *dest_end, size_t n_spaces) -{ - /* FIXME: Should we pad with "figure space" (\u2007) - if non ascii data present? */ - while (n_spaces-- && (dest < dest_end)) - *dest++ = ' '; - *dest = '\0'; - return dest; -} - -/* Align a string, SRC, in a field of *WIDTH columns, handling multi-byte - characters; write the result into the DEST_SIZE-byte buffer, DEST. - ALIGNMENT specifies whether to left- or right-justify or to center. - If SRC requires more than *WIDTH columns, truncate it to fit. - When centering, the number of trailing spaces may be one less than the - number of leading spaces. - Return the length in bytes required for the final result, not counting - the trailing NUL. A return value of DEST_SIZE or larger means there - wasn't enough space. DEST will be NUL terminated in any case. - Return SIZE_MAX upon error (invalid multi-byte sequence in SRC, - or malloc failure), unless MBA_UNIBYTE_FALLBACK is specified. - Update *WIDTH to indicate how many columns were used before padding. */ - -size_t -mbsalign (char const *src, char *dest, size_t dest_size, - size_t *width, mbs_align_t align, int flags) -{ - size_t ret = SIZE_MAX; - size_t src_size = strlen (src) + 1; - char *newstr = nullptr; - wchar_t *str_wc = nullptr; - char const *str_to_print = src; - size_t n_cols = src_size - 1; - size_t n_used_bytes = n_cols; /* Not including NUL */ - size_t n_spaces = 0; - bool conversion = false; - bool wc_enabled = false; - - /* In multi-byte locales convert to wide characters - to allow easy truncation. Also determine number - of screen columns used. */ - if (!(flags & MBA_UNIBYTE_ONLY) && MB_CUR_MAX > 1) - { - size_t src_chars = mbstowcs (nullptr, src, 0); - if (src_chars == SIZE_MAX) - { - if (flags & MBA_UNIBYTE_FALLBACK) - goto mbsalign_unibyte; - else - goto mbsalign_cleanup; - } - src_chars += 1; /* make space for NUL */ - str_wc = malloc (src_chars * sizeof (wchar_t)); - if (str_wc == nullptr) - { - if (flags & MBA_UNIBYTE_FALLBACK) - goto mbsalign_unibyte; - else - goto mbsalign_cleanup; - } - if (mbstowcs (str_wc, src, src_chars) != 0) - { - str_wc[src_chars - 1] = L'\0'; - wc_enabled = true; - conversion = wc_ensure_printable (str_wc); - n_cols = wcswidth (str_wc, src_chars); - } - } - - /* If we transformed or need to truncate the source string - then create a modified copy of it. */ - if (wc_enabled && (conversion || (n_cols > *width))) - { - if (conversion) - { - /* May have increased the size by converting - \t to \uFFFD for example. */ - src_size = wcstombs (nullptr, str_wc, 0) + 1; - } - newstr = malloc (src_size); - if (newstr == nullptr) - { - if (flags & MBA_UNIBYTE_FALLBACK) - goto mbsalign_unibyte; - else - goto mbsalign_cleanup; - } - str_to_print = newstr; - n_cols = wc_truncate (str_wc, *width); - n_used_bytes = wcstombs (newstr, str_wc, src_size); - } - -mbsalign_unibyte: - - if (n_cols > *width) /* Unibyte truncation required. */ - { - n_cols = *width; - n_used_bytes = n_cols; - } - - if (*width > n_cols) /* Padding required. */ - n_spaces = *width - n_cols; - - /* indicate to caller how many cells needed (not including padding). */ - *width = n_cols; - - { - size_t start_spaces, end_spaces; - - switch (align) - { - case MBS_ALIGN_LEFT: - start_spaces = 0; - end_spaces = n_spaces; - break; - case MBS_ALIGN_RIGHT: - start_spaces = n_spaces; - end_spaces = 0; - break; - case MBS_ALIGN_CENTER: - default: - start_spaces = n_spaces / 2 + n_spaces % 2; - end_spaces = n_spaces / 2; - break; - } - - if (flags & MBA_NO_LEFT_PAD) - start_spaces = 0; - if (flags & MBA_NO_RIGHT_PAD) - end_spaces = 0; - - /* Write as much NUL terminated output to DEST as possible. */ - if (dest_size != 0) - { - size_t space_left; - char *dest_end = dest + dest_size - 1; - - dest = mbs_align_pad (dest, dest_end, start_spaces); - space_left = dest_end - dest; - dest = mempcpy (dest, str_to_print, MIN (n_used_bytes, space_left)); - mbs_align_pad (dest, dest_end, end_spaces); - } - - /* indicate to caller how many bytes needed (not including NUL). */ - ret = n_used_bytes + ((start_spaces + end_spaces) * 1); - } - -mbsalign_cleanup: - - free (str_wc); - free (newstr); - - return ret; -} - -/* A wrapper around mbsalign() to dynamically allocate the - minimum amount of memory to store the result. - Return nullptr on failure. */ - -char * -ambsalign (char const *src, size_t *width, mbs_align_t align, int flags) -{ - size_t orig_width = *width; - size_t size = *width; /* Start with enough for unibyte mode. */ - size_t req = size; - char *buf = nullptr; - - while (req >= size) - { - char *nbuf; - size = req + 1; /* Space for NUL. */ - nbuf = realloc (buf, size); - if (nbuf == nullptr) - { - free (buf); - buf = nullptr; - break; - } - buf = nbuf; - *width = orig_width; - req = mbsalign (src, buf, size, width, align, flags); - if (req == SIZE_MAX) - { - free (buf); - buf = nullptr; - break; - } - } - - return buf; -} diff --git a/gl/lib/mbsalign.h b/gl/lib/mbsalign.h deleted file mode 100644 index 37a9af5786..0000000000 --- a/gl/lib/mbsalign.h +++ /dev/null @@ -1,63 +0,0 @@ -/* Align/Truncate a string in a given screen width - Copyright (C) 2009-2023 Free Software Foundation, Inc. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -#include -#include - -typedef enum { MBS_ALIGN_LEFT, MBS_ALIGN_RIGHT, MBS_ALIGN_CENTER } mbs_align_t; - -enum { - /* Use unibyte mode for invalid multibyte strings - or when heap memory is exhausted. */ - MBA_UNIBYTE_FALLBACK = 0x0001, - - /* As an optimization, don't do multibyte processing - when we know no multibyte characters are present. */ - MBA_UNIBYTE_ONLY = 0x0002, - - /* Don't add leading padding. */ - MBA_NO_LEFT_PAD = 0x0004, - - /* Don't add trailing padding. */ - MBA_NO_RIGHT_PAD = 0x0008 - -#if 0 /* Other possible options. */ - /* Skip invalid multibyte chars rather than failing. */ - MBA_IGNORE_INVALID - - /* Align multibyte strings using "figure space" (\u2007). */ - MBA_USE_FIGURE_SPACE - - /* Don't truncate. */ - MBA_NO_TRUNCATE - - /* Ensure no leading whitespace. */ - MBA_LSTRIP - - /* Ensure no trailing whitespace. */ - MBA_RSTRIP -#endif -}; - -size_t -mbsalign (char const *src, char *dest, size_t dest_size, - size_t *width, mbs_align_t align, int flags) - _GL_ATTRIBUTE_NONNULL (); - -char * -ambsalign (char const *src, size_t *width, mbs_align_t align, int flags) - _GL_ATTRIBUTE_MALLOC _GL_ATTRIBUTE_DEALLOC_FREE - _GL_ATTRIBUTE_NONNULL (); diff --git a/gl/modules/mbsalign b/gl/modules/mbsalign deleted file mode 100644 index c27c21225a..0000000000 --- a/gl/modules/mbsalign +++ /dev/null @@ -1,28 +0,0 @@ -Description: -Align/Truncate a string in a given screen width. - -Files: -lib/mbsalign.c -lib/mbsalign.h - -Depends-on: -minmax -wchar -wctype -wcwidth -mempcpy -stdint - -configure.ac: - -Makefile.am: -lib_SOURCES += mbsalign.c mbsalign.h - -Include: -"mbsalign.h" - -License: -LGPL - -Maintainer: -Pádraig Brady diff --git a/gl/modules/mbsalign-tests b/gl/modules/mbsalign-tests deleted file mode 100644 index 8e0d138fe1..0000000000 --- a/gl/modules/mbsalign-tests +++ /dev/null @@ -1,11 +0,0 @@ -Files: -tests/test-mbsalign.c -tests/macros.h - -Depends-on: - -configure.ac: - -Makefile.am: -TESTS += test-mbsalign -check_PROGRAMS += test-mbsalign diff --git a/gl/tests/test-mbsalign.c b/gl/tests/test-mbsalign.c deleted file mode 100644 index 151dbef13e..0000000000 --- a/gl/tests/test-mbsalign.c +++ /dev/null @@ -1,147 +0,0 @@ -/* Test that mbsalign works as advertised. - Copyright (C) 2010-2023 Free Software Foundation, Inc. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -/* Written by Pádraig Brady. */ - -#include - -#include "mbsalign.h" -#include "macros.h" -#include -#include - -int -main (void) -{ - char dest[4 * 16 + 1]; - size_t width, n; - -#ifdef __ANDROID__ - /* On Android ≥ 5.0, the default locale is the "C.UTF-8" locale, not the - "C" locale. Furthermore, when you attempt to set the "C" or "POSIX" - locale via setlocale(), what you get is a "C" locale with UTF-8 encoding, - that is, effectively the "C.UTF-8" locale. */ - if (MB_CUR_MAX == 1) -#endif - { - /* Test unibyte truncation. */ - width = 4; - n = mbsalign ("t\tés", dest, sizeof dest, &width, MBS_ALIGN_LEFT, 0); - ASSERT (n == 4); - } - - /* Test center alignment. */ - width = 4; - n = mbsalign ("es", dest, sizeof dest, &width, MBS_ALIGN_CENTER, 0); - ASSERT (*dest == ' ' && *(dest + n - 1) == ' '); - ASSERT (n == 4); - - /* Test center alignment, with no trailing padding. */ - width = 4; - n = mbsalign ("es", dest, sizeof dest, &width, MBS_ALIGN_CENTER, - MBA_NO_RIGHT_PAD); - ASSERT (n == 3); - ASSERT (*dest == ' ' && *(dest + n - 1) == 's'); - - /* Test left alignment, with no trailing padding. (truncate only). */ - width = 4; - n = mbsalign ("es", dest, sizeof dest, &width, MBS_ALIGN_LEFT, - MBA_NO_RIGHT_PAD); - ASSERT (n == 2); - ASSERT (*dest == 'e' && *(dest + n - 1) == 's'); - - /* Test center alignment, with no padding. (truncate only). */ - width = 4; - n = mbsalign ("es", dest, sizeof dest, &width, MBS_ALIGN_CENTER, - MBA_NO_LEFT_PAD | MBA_NO_RIGHT_PAD); - ASSERT (n == 2); - ASSERT (*dest == 'e' && *(dest + n - 1) == 's'); - - /* Test center alignment, with no left padding. (may be useful for RTL?) */ - width = 4; - n = mbsalign ("es", dest, sizeof dest, &width, MBS_ALIGN_CENTER, - MBA_NO_LEFT_PAD); - ASSERT (n == 3); - ASSERT (*dest == 'e' && *(dest + n - 1) == ' '); - - if (setlocale (LC_ALL, "en_US.UTF8")) - { - /* Check invalid input is flagged. */ - width = 4; - n = mbsalign ("t\xe1\xe2s", dest, sizeof dest, &width, MBS_ALIGN_LEFT, 0); - ASSERT (n == (size_t) -1); - - /* Check invalid input is treated as unibyte */ - width = 4; - n = mbsalign ("t\xe1\xe2s", dest, sizeof dest, &width, - MBS_ALIGN_LEFT, MBA_UNIBYTE_FALLBACK); - ASSERT (n == 4); - - /* Test multibyte center alignment. */ - width = 4; - n = mbsalign ("és", dest, sizeof dest, &width, MBS_ALIGN_CENTER, 0); - ASSERT (n == 5); - ASSERT (*dest == ' ' && *(dest + n - 1) == ' '); - - /* Test multibyte left alignment. */ - width = 4; - n = mbsalign ("és", dest, sizeof dest, &width, MBS_ALIGN_LEFT, 0); - ASSERT (n == 5); - ASSERT (*(dest + n - 1) == ' ' && *(dest + n - 2) == ' '); - - /* Test multibyte right alignment. */ - width = 4; - n = mbsalign ("és", dest, sizeof dest, &width, MBS_ALIGN_RIGHT, 0); - ASSERT (n == 5); - ASSERT (*(dest) == ' ' && *(dest + 1) == ' '); - - /* multibyte multicell truncation. */ - width = 4; /* cells */ - n = mbsalign ("日月火水", dest, sizeof dest, &width, - MBS_ALIGN_LEFT, 0); - ASSERT (n == 6); /* 2 characters */ - - /* multibyte unicell truncation. */ - width = 3; /* cells */ - n = mbsalign ("¹²³⁴", dest, sizeof dest, &width, MBS_ALIGN_LEFT, 0); - ASSERT (n == 6); /* 3 characters */ - - /* Check independence from dest buffer. */ - width = 4; /* cells */ - n = mbsalign ("¹²³⁴", dest, 0, &width, MBS_ALIGN_LEFT, 0); - ASSERT (n == 9); /* 4 characters */ - - /* Check that width is updated with cells required before padding. */ - width = 4; /* cells */ - n = mbsalign ("¹²³", dest, 0, &width, MBS_ALIGN_LEFT, 0); - ASSERT (width == 3); - - /* Test case where output is larger than input - (as tab converted to multi byte replacement char). */ - width = 4; - n = mbsalign ("t\tés" /* 6 including NUL */ , dest, sizeof dest, - &width, MBS_ALIGN_LEFT, 0); - ASSERT (n == 7); - - /* Test forced unibyte truncation. */ - width = 4; - n = mbsalign ("t\tés", dest, sizeof dest, &width, MBS_ALIGN_LEFT, - MBA_UNIBYTE_ONLY); - ASSERT (n == 4); - } - - return 0; -} diff --git a/src/df.c b/src/df.c index 31b5184783..e127fc2dbc 100644 --- a/src/df.c +++ b/src/df.c @@ -31,7 +31,6 @@ #include "canonicalize.h" #include "fsusage.h" #include "human.h" -#include "mbsalign.h" #include "mbswidth.h" #include "mountlist.h" #include "quote.h" @@ -169,48 +168,48 @@ struct field_data_t char const *arg; field_type_t field_type; char const *caption;/* nullptr means use default header of this field. */ - size_t width; /* Auto adjusted (up) widths used to align columns. */ - mbs_align_t align; /* Alignment for this field. */ + int width; /* Auto adjusted (up) widths used to align columns. */ + bool align_right; /* Whether to right-align columns, not left-align. */ bool used; }; /* Header strings, minimum width and alignment for the above fields. */ static struct field_data_t field_data[] = { [SOURCE_FIELD] = { SOURCE_FIELD, - "source", OTHER_FLD, N_("Filesystem"), 14, MBS_ALIGN_LEFT, false }, + "source", OTHER_FLD, N_("Filesystem"), 14, false, false }, [FSTYPE_FIELD] = { FSTYPE_FIELD, - "fstype", OTHER_FLD, N_("Type"), 4, MBS_ALIGN_LEFT, false }, + "fstype", OTHER_FLD, N_("Type"), 4, false, false }, [SIZE_FIELD] = { SIZE_FIELD, - "size", BLOCK_FLD, N_("blocks"), 5, MBS_ALIGN_RIGHT, false }, + "size", BLOCK_FLD, N_("blocks"), 5, true, false }, [USED_FIELD] = { USED_FIELD, - "used", BLOCK_FLD, N_("Used"), 5, MBS_ALIGN_RIGHT, false }, + "used", BLOCK_FLD, N_("Used"), 5, true, false }, [AVAIL_FIELD] = { AVAIL_FIELD, - "avail", BLOCK_FLD, N_("Available"), 5, MBS_ALIGN_RIGHT, false }, + "avail", BLOCK_FLD, N_("Available"), 5, true, false }, [PCENT_FIELD] = { PCENT_FIELD, - "pcent", BLOCK_FLD, N_("Use%"), 4, MBS_ALIGN_RIGHT, false }, + "pcent", BLOCK_FLD, N_("Use%"), 4, true, false }, [ITOTAL_FIELD] = { ITOTAL_FIELD, - "itotal", INODE_FLD, N_("Inodes"), 5, MBS_ALIGN_RIGHT, false }, + "itotal", INODE_FLD, N_("Inodes"), 5, true, false }, [IUSED_FIELD] = { IUSED_FIELD, - "iused", INODE_FLD, N_("IUsed"), 5, MBS_ALIGN_RIGHT, false }, + "iused", INODE_FLD, N_("IUsed"), 5, true, false }, [IAVAIL_FIELD] = { IAVAIL_FIELD, - "iavail", INODE_FLD, N_("IFree"), 5, MBS_ALIGN_RIGHT, false }, + "iavail", INODE_FLD, N_("IFree"), 5, true, false }, [IPCENT_FIELD] = { IPCENT_FIELD, - "ipcent", INODE_FLD, N_("IUse%"), 4, MBS_ALIGN_RIGHT, false }, + "ipcent", INODE_FLD, N_("IUse%"), 4, true, false }, [TARGET_FIELD] = { TARGET_FIELD, - "target", OTHER_FLD, N_("Mounted on"), 0, MBS_ALIGN_LEFT, false }, + "target", OTHER_FLD, N_("Mounted on"), 0, false, false }, [FILE_FIELD] = { FILE_FIELD, - "file", OTHER_FLD, N_("File"), 0, MBS_ALIGN_LEFT, false } + "file", OTHER_FLD, N_("File"), 0, false, false } }; static char const *all_args_string = @@ -295,6 +294,8 @@ automount_stat_err (char const *file, struct stat *st) } } +enum { MBSWIDTH_FLAGS = MBSW_REJECT_INVALID | MBSW_REJECT_UNPRINTABLE }; + /* Replace problematic chars with '?'. Since only control characters are currently considered, this should work in all encodings. */ @@ -392,15 +393,15 @@ print_table (void) if (col != 0) putchar (' '); - int flags = 0; - if (col == ncolumns - 1) /* The last one. */ - flags = MBA_NO_RIGHT_PAD; - - size_t width = columns[col]->width; - cell = ambsalign (cell, &width, columns[col]->align, flags); - /* When ambsalign fails, output unaligned data. */ - fputs (cell ? cell : table[row][col], stdout); - free (cell); + int width = mbswidth (cell, MBSWIDTH_FLAGS); + int fill = width < 0 ? 0 : columns[col]->width - width; + if (columns[col]->align_right) + for (; 0 < fill; fill--) + putchar (' '); + fputs (cell, stdout); + if (col + 1 < ncolumns) + for (; 0 < fill; fill--) + putchar (' '); } putchar ('\n'); } @@ -638,7 +639,7 @@ get_header (void) table[nrows - 1][col] = cell; - size_t cell_width = mbswidth (cell, 0); + int cell_width = mbswidth (cell, MBSWIDTH_FLAGS); columns[col]->width = MAX (columns[col]->width, cell_width); } } @@ -1252,7 +1253,7 @@ get_dev (char const *device, char const *mount_point, char const *file, affirm (cell); replace_problematic_chars (cell); - size_t cell_width = mbswidth (cell, 0); + int cell_width = mbswidth (cell, MBSWIDTH_FLAGS); columns[col]->width = MAX (columns[col]->width, cell_width); table[nrows - 1][col] = cell; } diff --git a/src/ls.c b/src/ls.c index 336d5bd97b..d5ef093eb9 100644 --- a/src/ls.c +++ b/src/ls.c @@ -108,7 +108,6 @@ #include "xstrtol.h" #include "xstrtol-error.h" #include "areadlink.h" -#include "mbsalign.h" #include "dircolors.h" #include "xgethostname.h" #include "c-ctype.h" @@ -1282,6 +1281,8 @@ file_escape_init (void) RFC3986[i] |= c_isalnum (i) || i == '~' || i == '-' || i == '.' || i == '_'; } +enum { MBSWIDTH_FLAGS = MBSW_REJECT_INVALID | MBSW_REJECT_UNPRINTABLE }; + /* Read the abbreviated month names from the locale, to align them and to determine the max width of the field and to truncate names greater than our max allowed. @@ -1290,11 +1291,6 @@ file_escape_init (void) variable width abbreviated months and also precomputing/caching the names was seen to increase the performance of ls significantly. */ -/* max number of display cells to use. - As of 2018 the abmon for Arabic has entries with width 12. - It doesn't make much sense to support wider than this - and locales should aim for abmon entries of width <= 5. */ -enum { MAX_MON_WIDTH = 12 }; /* abformat[RECENT][MON] is the format to use for timestamps with recentness RECENT and month MON. */ enum { ABFORMAT_SIZE = 128 }; @@ -1313,28 +1309,41 @@ abmon_init (char abmon[12][ABFORMAT_SIZE]) #ifndef HAVE_NL_LANGINFO return false; #else - size_t required_mon_width = MAX_MON_WIDTH; - size_t curr_max_width; - do + int max_mon_width = 0; + int mon_width[12]; + int mon_len[12]; + + for (int i = 0; i < 12; i++) { - curr_max_width = required_mon_width; - required_mon_width = 0; - for (int i = 0; i < 12; i++) + char const *abbr = nl_langinfo (ABMON_1 + i); + mon_len[i] = strnlen (abbr, ABFORMAT_SIZE); + if (mon_len[i] == ABFORMAT_SIZE) + return false; + if (strchr (abbr, '%')) + return false; + mon_width[i] = mbswidth (strcpy (abmon[i], abbr), MBSWIDTH_FLAGS); + if (mon_width[i] < 0) + return false; + max_mon_width = MAX (max_mon_width, mon_width[i]); + } + + for (int i = 0; i < 12; i++) + { + int fill = max_mon_width - mon_width[i]; + if (ABFORMAT_SIZE - mon_len[i] <= fill) + return false; + bool align_left = !isdigit (to_uchar (abmon[i][0])); + int fill_offset; + if (align_left) + fill_offset = mon_len[i]; + else { - size_t width = curr_max_width; - char const *abbr = nl_langinfo (ABMON_1 + i); - if (strchr (abbr, '%')) - return false; - mbs_align_t alignment = isdigit (to_uchar (*abbr)) - ? MBS_ALIGN_RIGHT : MBS_ALIGN_LEFT; - size_t req = mbsalign (abbr, abmon[i], ABFORMAT_SIZE, - &width, alignment, 0); - if (! (req < ABFORMAT_SIZE)) - return false; - required_mon_width = MAX (required_mon_width, width); + memmove (abmon[i] + fill, abmon[i], mon_len[i]); + fill_offset = 0; } + memset (abmon[i] + fill_offset, ' ', fill); + abmon[i][mon_len[i] + fill] = '\0'; } - while (curr_max_width > required_mon_width); return true; #endif @@ -3607,7 +3616,7 @@ gobble_file (char const *name, enum filetype type, ino_t inode, char buf[LONGEST_HUMAN_READABLE + 1]; int len = mbswidth (human_readable (blocks, buf, human_output_opts, ST_NBLOCKSIZE, output_block_size), - 0); + MBSWIDTH_FLAGS); if (block_size_width < len) block_size_width = len; } @@ -3670,7 +3679,7 @@ gobble_file (char const *name, enum filetype type, ino_t inode, int len = mbswidth (human_readable (size, buf, file_human_output_opts, 1, file_output_block_size), - 0); + MBSWIDTH_FLAGS); if (file_size_width < len) file_size_width = len; } @@ -4208,7 +4217,7 @@ long_time_expected_width (void) size_t len = align_nstrftime (buf, sizeof buf, false, &tm, localtz, 0); if (len != 0) - width = mbsnwidth (buf, len, 0); + width = mbsnwidth (buf, len, MBSWIDTH_FLAGS); } if (width < 0) @@ -4226,7 +4235,8 @@ format_user_or_group (char const *name, uintmax_t id, int width) { if (name) { - int width_gap = width - mbswidth (name, 0); + int name_width = mbswidth (name, MBSWIDTH_FLAGS); + int width_gap = name_width < 0 ? 0 : width - name_width; int pad = MAX (0, width_gap); dired_outstring (name); @@ -4257,21 +4267,19 @@ format_group (gid_t g, int width, bool stat_ok) (numeric_ids ? nullptr : getgroup (g)), g, width); } -/* Return the number of columns that format_user_or_group will print. */ +/* Return the number of columns that format_user_or_group will print, + or -1 if unknown. */ static int format_user_or_group_width (char const *name, uintmax_t id) { - if (name) - { - int len = mbswidth (name, 0); - return MAX (0, len); - } - else - return snprintf (nullptr, 0, "%"PRIuMAX, id); + return (name + ? mbswidth (name, MBSWIDTH_FLAGS) + : snprintf (nullptr, 0, "%"PRIuMAX, id)); } -/* Return the number of columns that format_user will print. */ +/* Return the number of columns that format_user will print, + or -1 if unknown. */ static int format_user_width (uid_t u) @@ -4372,8 +4380,9 @@ print_long_format (const struct fileinfo *f) ? "?" : human_readable (ST_NBLOCKS (f->stat), hbuf, human_output_opts, ST_NBLOCKSIZE, output_block_size)); - int pad; - for (pad = block_size_width - mbswidth (blocks, 0); 0 < pad; pad--) + int blocks_width = mbswidth (blocks, MBSWIDTH_FLAGS); + for (int pad = blocks_width < 0 ? 0 : block_size_width - blocks_width; + 0 < pad; pad--) *p++ = ' '; while ((*p++ = *blocks++)) continue; @@ -4432,8 +4441,9 @@ print_long_format (const struct fileinfo *f) : human_readable (unsigned_file_size (f->stat.st_size), hbuf, file_human_output_opts, 1, file_output_block_size)); - int pad; - for (pad = file_size_width - mbswidth (size, 0); 0 < pad; pad--) + int size_width = mbswidth (size, MBSWIDTH_FLAGS); + for (int pad = size_width < 0 ? 0 : block_size_width - size_width; + 0 < pad; pad--) *p++ = ' '; while ((*p++ = *size++)) continue; @@ -4677,7 +4687,10 @@ quote_name_buf (char **inbuf, size_t bufsize, char *name, else if (width != nullptr) { if (MB_CUR_MAX > 1) - displayed_width = mbsnwidth (buf, len, 0); + { + displayed_width = mbsnwidth (buf, len, MBSWIDTH_FLAGS); + displayed_width = MAX (0, displayed_width); + } else { char const *p = buf; diff --git a/src/numfmt.c b/src/numfmt.c index ca2eaffc39..a242f0efb7 100644 --- a/src/numfmt.c +++ b/src/numfmt.c @@ -17,13 +17,14 @@ #include #include #include +#include #include #include #include -#include "mbsalign.h" #include "argmatch.h" #include "c-ctype.h" +#include "mbswidth.h" #include "quote.h" #include "system.h" #include "xstrtol.h" @@ -173,9 +174,9 @@ static uintmax_t from_unit_size = 1; static uintmax_t to_unit_size = 1; static int grouping = 0; static char *padding_buffer = nullptr; -static size_t padding_buffer_size = 0; -static long int padding_width = 0; -static long int zero_padding_width = 0; +static idx_t padding_buffer_size = 0; +static intmax_t padding_width = 0; +static int zero_padding_width = 0; static long int user_precision = -1; static char const *format_str = nullptr; static char *format_str_prefix = nullptr; @@ -187,7 +188,6 @@ static int conv_exit_code = EXIT_CONVERSION_WARNINGS; /* auto-pad each line based on skipped whitespace. */ static int auto_padding = 0; -static mbs_align_t padding_alignment = MBS_ALIGN_RIGHT; /* field delimiter */ static int delimiter = DELIMITER_DEFAULT; @@ -728,19 +728,17 @@ simple_strtod_fatal (enum simple_strtod_error err, char const *input_str) error (conv_exit_code, 0, gettext (msgid), quote (input_str)); } -/* Convert VAL to a human format string in BUF. */ -static void +/* Convert VAL to a human format string using PRECISION in BUF of size + BUF_SIZE. Use SCALE, GROUP, and ROUND to format. Return + the number of bytes needed to represent VAL. If this number is not + less than BUF_SIZE, the buffer is too small; if it is negative, the + formatting failed for some reason. */ +static int double_to_human (long double val, int precision, - char *buf, size_t buf_size, + char *buf, idx_t buf_size, enum scale_type scale, int group, enum round_type round) { - int num_size; - char fmt[64]; - static_assert ((INT_BUFSIZE_BOUND (zero_padding_width) - + INT_BUFSIZE_BOUND (precision) - + 10 /* for %.Lf etc. */) - < sizeof fmt); - + char fmt[sizeof "%'0.*Lfi%s%s%s" + INT_STRLEN_BOUND (zero_padding_width)]; char *pfmt = fmt; *pfmt++ = '%'; @@ -748,7 +746,7 @@ double_to_human (long double val, int precision, *pfmt++ = '\''; if (zero_padding_width) - pfmt += snprintf (pfmt, sizeof (fmt) - 2, "0%ld", zero_padding_width); + pfmt += sprintf (pfmt, "0%d", zero_padding_width); devmsg ("double_to_human:\n"); @@ -762,13 +760,10 @@ double_to_human (long double val, int precision, " no scaling, returning (grouped) value: %'.*Lf\n" : " no scaling, returning value: %.*Lf\n", precision, val); - stpcpy (pfmt, ".*Lf"); + strcpy (pfmt, ".*Lf%s"); - num_size = snprintf (buf, buf_size, fmt, precision, val); - if (num_size < 0 || num_size >= (int) buf_size) - error (EXIT_FAILURE, 0, - _("failed to prepare value '%Lf' for printing"), val); - return; + return snprintf (buf, buf_size, fmt, precision, val, + suffix ? suffix : ""); } /* Scaling requested by user. */ @@ -810,23 +805,14 @@ double_to_human (long double val, int precision, devmsg (" after rounding, value=%Lf * %0.f ^ %d\n", val, scale_base, power); - stpcpy (pfmt, ".*Lf%s"); + strcpy (pfmt, ".*Lf%s%s%s"); int prec = user_precision == -1 ? show_decimal_point : user_precision; - /* buf_size - 1 used here to ensure place for possible scale_IEC_I suffix. */ - num_size = snprintf (buf, buf_size - 1, fmt, prec, val, - suffix_power_char (power)); - if (num_size < 0 || num_size >= (int) buf_size - 1) - error (EXIT_FAILURE, 0, - _("failed to prepare value '%Lf' for printing"), val); - - if (scale == scale_IEC_I && power > 0) - strncat (buf, "i", buf_size - num_size - 1); - - devmsg (" returning value: %s\n", quote (buf)); - - return; + return snprintf (buf, buf_size, fmt, prec, val, + suffix_power_char (power), + &"i"[! (scale == scale_IEC_I && 0 < power)], + suffix ? suffix : ""); } /* Convert a string of decimal digits, N_STRING, with an optional suffix @@ -876,17 +862,6 @@ unit_to_umax (char const *n_string) return n; } - -static void -setup_padding_buffer (size_t min_size) -{ - if (padding_buffer_size > min_size) - return; - - padding_buffer_size = min_size + 1; - padding_buffer = xrealloc (padding_buffer, padding_buffer_size); -} - void usage (int status) { @@ -1052,7 +1027,7 @@ Examples:\n\ NOTES: 1. This function sets the global variables: - padding_width, padding_alignment, grouping, + padding_width, grouping, format_str_prefix, format_str_suffix 2. The function aborts on any errors. */ static void @@ -1061,7 +1036,6 @@ parse_format_string (char const *fmt) size_t i; size_t prefix_len = 0; size_t suffix_pos; - long int pad = 0; char *endptr = nullptr; bool zero_padding = false; @@ -1092,30 +1066,25 @@ parse_format_string (char const *fmt) break; } - errno = 0; - pad = strtol (fmt + i, &endptr, 10); - if (errno == ERANGE || pad < -LONG_MAX) - error (EXIT_FAILURE, 0, - _("invalid format %s (width overflow)"), quote (fmt)); + intmax_t pad = strtoimax (fmt + i, &endptr, 10); - if (endptr != (fmt + i) && pad != 0) + if (pad != 0) { if (debug && padding_width && !(zero_padding && pad > 0)) error (0, 0, _("--format padding overriding --padding")); + /* Set padding width and alignment. On overflow, set widths to + large values that cause later code to avoid undefined behavior + and fail at a reasonable point. */ if (pad < 0) - { - padding_alignment = MBS_ALIGN_LEFT; - padding_width = -pad; - } + padding_width = pad; else { if (zero_padding) - zero_padding_width = pad; + zero_padding_width = MIN (pad, INT_MAX); else padding_width = pad; } - } i = endptr - fmt; @@ -1159,11 +1128,10 @@ parse_format_string (char const *fmt) format_str_suffix = xstrdup (fmt + suffix_pos); devmsg ("format String:\n input: %s\n grouping: %s\n" - " padding width: %ld\n alignment: %s\n" + " padding width: %jd\n" " prefix: %s\n suffix: %s\n", quote_n (0, fmt), (grouping) ? "yes" : "no", padding_width, - (padding_alignment == MBS_ALIGN_LEFT) ? "Left" : "Right", quote_n (1, format_str_prefix ? format_str_prefix : ""), quote_n (2, format_str_suffix ? format_str_suffix : "")); } @@ -1203,12 +1171,11 @@ parse_human_number (char const *str, long double /*output */ *value, /* Print the given VAL, using the requested representation. The number is printed to STDOUT, with padding and alignment. */ -static int -prepare_padded_number (const long double val, size_t precision) +static bool +prepare_padded_number (const long double val, size_t precision, + intmax_t *padding) { /* Generate Output. */ - char buf[128]; - size_t precision_used = user_precision == -1 ? precision : user_precision; /* Can't reliably print too-large values without auto-scaling. */ @@ -1229,7 +1196,7 @@ prepare_padded_number (const long double val, size_t precision) _("value too large to be printed: '%Lg'" " (consider using --to)"), val); } - return 0; + return false; } if (x > MAX_ACCEPTABLE_DIGITS - 1) @@ -1237,42 +1204,65 @@ prepare_padded_number (const long double val, size_t precision) if (inval_style != inval_ignore) error (conv_exit_code, 0, _("value too large to be printed: '%Lg'" " (cannot handle values > 999Q)"), val); - return 0; + return false; } - double_to_human (val, precision_used, buf, sizeof (buf), - scale_to, grouping, round_style); - if (suffix) - strncat (buf, suffix, sizeof (buf) - strlen (buf) -1); + while (true) + { + int numlen = double_to_human (val, precision_used, + padding_buffer, padding_buffer_size, + scale_to, grouping, round_style); + ptrdiff_t growth; + if (numlen < 0 || ckd_sub (&growth, numlen, padding_buffer_size - 1)) + error (EXIT_FAILURE, 0, + _("failed to prepare value '%Lf' for printing"), val); + if (growth <= 0) + break; + padding_buffer = xpalloc (padding_buffer, &padding_buffer_size, + growth, -1, 1); + } devmsg ("formatting output:\n value: %Lf\n humanized: %s\n", - val, quote (buf)); + val, quote (padding_buffer)); - if (padding_width && strlen (buf) < padding_width) + intmax_t pad = 0; + if (padding_width) { - size_t w = padding_width; - mbsalign (buf, padding_buffer, padding_buffer_size, &w, - padding_alignment, MBA_UNIBYTE_ONLY); - - devmsg (" After padding: %s\n", quote (padding_buffer)); - } - else - { - setup_padding_buffer (strlen (buf) + 1); - strcpy (padding_buffer, buf); + int buf_width = mbswidth (padding_buffer, + MBSW_REJECT_INVALID | MBSW_REJECT_UNPRINTABLE); + if (0 <= buf_width) + { + if (padding_width < 0) + { + if (padding_width < -buf_width) + pad = padding_width + buf_width; + } + else + { + if (buf_width < padding_width) + pad = padding_width - buf_width; + } + } } - return 1; + *padding = pad; + return true; } static void -print_padded_number (void) +print_padded_number (intmax_t padding) { if (format_str_prefix) fputs (format_str_prefix, stdout); + for (intmax_t p = padding; 0 < p; p--) + putchar (' '); + fputs (padding_buffer, stdout); + for (intmax_t p = padding; p < 0; p++) + putchar (' '); + if (format_str_suffix) fputs (format_str_suffix, stdout); } @@ -1305,16 +1295,8 @@ process_suffixed_number (char *text, long double *result, /* setup auto-padding. */ if (auto_padding) { - if (text < p || field > 1) - { - padding_width = strlen (text); - setup_padding_buffer (padding_width); - } - else - { - padding_width = 0; - } - devmsg ("setting Auto-Padding to %ld characters\n", padding_width); + padding_width = text < p || 1 < field ? strlen (text) : 0; + devmsg ("setting Auto-Padding to %jd characters\n", padding_width); } long double val = 0; @@ -1393,11 +1375,12 @@ process_field (char *text, uintmax_t field) valid_number = process_suffixed_number (text, &val, &precision, field); + intmax_t padding; if (valid_number) - valid_number = prepare_padded_number (val, precision); + valid_number = prepare_padded_number (val, precision, &padding); if (valid_number) - print_padded_number (); + print_padded_number (padding); else fputs (text, stdout); } @@ -1508,15 +1491,12 @@ main (int argc, char **argv) break; case PADDING_OPTION: - if (xstrtol (optarg, nullptr, 10, &padding_width, "") != LONGINT_OK - || padding_width == 0 || padding_width < -LONG_MAX) + if (((xstrtoimax (optarg, nullptr, 10, &padding_width, "") + & ~LONGINT_OVERFLOW) + != LONGINT_OK) + || padding_width == 0) error (EXIT_FAILURE, 0, _("invalid padding value %s"), quote (optarg)); - if (padding_width < 0) - { - padding_alignment = MBS_ALIGN_LEFT; - padding_width = -padding_width; - } /* TODO: We probably want to apply a specific --padding to --header lines too. */ break; @@ -1605,8 +1585,6 @@ main (int argc, char **argv) error (0, 0, _("grouping has no effect in this locale")); } - - setup_padding_buffer (padding_width); auto_padding = (padding_width == 0 && delimiter == DELIMITER_DEFAULT); if (inval_style != inval_abort) diff --git a/tests/misc/numfmt.pl b/tests/misc/numfmt.pl index 86fb78f1e0..86698dde59 100755 --- a/tests/misc/numfmt.pl +++ b/tests/misc/numfmt.pl @@ -159,6 +159,9 @@ my @Tests = # space(s) between number and suffix. Note only field 1 is used # by default so specify the NUL delimiter to consider the whole "line". ['suf-19', "-d '' --from=si '4.0 K'", {OUT => "4000"}], + ['suf-20', + '--suffix=Fooxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy 0', + {OUT => "0Fooxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy"}], ## GROUPING @@ -178,9 +181,6 @@ my @Tests = ['pad-3.1', '--padding=0 5', {ERR => "$prog: invalid padding value '0'\n"}, {EXIT => '1'}], - ['pad-3.2', "--padding=$limits->{LONG_MIN} 0", - {ERR => "$prog: invalid padding value '$limits->{LONG_MIN}'\n"}, - {EXIT => '1'}], ['pad-4', '--padding=10 --to=si 50000', {OUT=>' 50K'}], ['pad-5', '--padding=-10 --to=si 50000', {OUT=>'50K '}], @@ -675,10 +675,6 @@ my @Tests = ['fmt-err-6', '--format "%f %f"', {ERR=>"$prog: format '%f %f' has too many % directives\n"}, {EXIT=>1}], - ['fmt-err-7', '--format "%'.$limits->{LONG_OFLOW}.'f"', - {ERR=>"$prog: invalid format '%$limits->{LONG_OFLOW}f'". - " (width overflow)\n"}, - {EXIT=>1}], ['fmt-err-9', '--format "%f" --grouping', {ERR=>"$prog: --grouping cannot be combined with --format\n"}, {EXIT=>1}], -- 2.47.2