From: Pádraig Brady Date: Sun, 29 Aug 2021 18:34:32 +0000 (+0100) Subject: digest: refactor sum(1) into digest.c X-Git-Tag: v9.0~42 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=eb17c610d442cc8180c21b382a806bff40cea1f4;p=thirdparty%2Fcoreutils.git digest: refactor sum(1) into digest.c Since digest will be providing all digest functionality, refactor sum.c into it. * po/POTFILES.in: sum.c no longer has translatable strings so remove. * src/digest.c: Call out to new stream interfaces in sum.c * src/local.mk: Adjust sources for the sum binary. * src/sum.c: Provide a stream interface for BSD and SYSV digests. * src/sum.h: A new file to declare the exported functions in sum.c --- diff --git a/po/POTFILES.in b/po/POTFILES.in index de70201a04..aca7c260fc 100644 --- a/po/POTFILES.in +++ b/po/POTFILES.in @@ -120,7 +120,6 @@ src/split.c src/stat.c src/stdbuf.c src/stty.c -src/sum.c src/sync.c src/system.h src/tac-pipe.c diff --git a/src/digest.c b/src/digest.c index 97804de0f0..090ba73d74 100644 --- a/src/digest.c +++ b/src/digest.c @@ -27,6 +27,9 @@ #include "xdectoint.h" #include "xstrtol.h" +#if HASH_ALGO_SUM +# include "sum.h" +#endif #if HASH_ALGO_BLAKE2 # include "blake2/b2sum.h" #endif @@ -49,7 +52,14 @@ #include "xbinary-io.h" /* The official name of this program (e.g., no 'g' prefix). */ -#if HASH_ALGO_MD5 +#if HASH_ALGO_SUM +# define PROGRAM_NAME "sum" +# define DIGEST_TYPE_STRING "BSD" +# define DIGEST_STREAM sumfns[sum_algorithm] +# define DIGEST_OUT sum_output_fns[sum_algorithm] +# define DIGEST_BITS 16 +# define DIGEST_ALIGN 4 +#elif HASH_ALGO_MD5 # define PROGRAM_NAME "md5sum" # define DIGEST_TYPE_STRING "MD5" # define DIGEST_STREAM md5_stream @@ -101,8 +111,15 @@ #else # error "Can't decide which hash algorithm to compile." #endif +#if !HASH_ALGO_SUM +# define DIGEST_OUT output_file +#endif -#if HASH_ALGO_BLAKE2 +#if HASH_ALGO_SUM +# define AUTHORS \ + proper_name ("Kayvan Aghaiepour"), \ + proper_name ("David MacKenzie") +#elif HASH_ALGO_BLAKE2 # define AUTHORS \ proper_name ("Padraig Brady"), \ proper_name ("Samuel Neves") @@ -111,6 +128,8 @@ proper_name ("Ulrich Drepper"), \ proper_name ("Scott Miller"), \ proper_name ("David Madore") +#endif +#if !HASH_ALGO_BLAKE2 # define DIGEST_HEX_BYTES (DIGEST_BITS / 4) #endif #define DIGEST_BIN_BYTES (DIGEST_BITS / 8) @@ -188,6 +207,28 @@ static uintmax_t blake2_max_len[]= }; #endif /* HASH_ALGO_BLAKE2 */ +typedef void (*digest_output_fn)(char const*, int, void const*, + bool, bool, uintmax_t); +#if HASH_ALGO_SUM +enum Algorithm +{ + bsd, + sysv, +}; + +static enum Algorithm sum_algorithm; +static sumfn sumfns[]= +{ + bsd_sum_stream, + sysv_sum_stream, +}; +static digest_output_fn sum_output_fns[]= +{ + output_bsd, + output_sysv, +}; +#endif + /* For long options that have no equivalent short option, use a non-character as a pseudo short option, starting with CHAR_MAX + 1. */ enum @@ -204,6 +245,7 @@ static struct option const long_options[] = #if HASH_ALGO_BLAKE2 { "length", required_argument, NULL, 'l'}, #endif +#if !HASH_AGLO_SUM { "binary", no_argument, NULL, 'b' }, { "check", no_argument, NULL, 'c' }, { "ignore-missing", no_argument, NULL, IGNORE_MISSING_OPTION}, @@ -214,6 +256,9 @@ static struct option const long_options[] = { "strict", no_argument, NULL, STRICT_OPTION }, { "tag", no_argument, NULL, TAG_OPTION }, { "zero", no_argument, NULL, 'z' }, +#else + {"sysv", no_argument, NULL, 's'}, +#endif { GETOPT_HELP_OPTION_DECL }, { GETOPT_VERSION_OPTION_DECL }, { NULL, 0, NULL, 0 } @@ -235,6 +280,13 @@ Print or check %s (%d-bit) checksums.\n\ DIGEST_BITS); emit_stdin_note (); +#if HASH_ALGO_SUM + fputs (_("\ +\n\ + -r use BSD sum algorithm (the default), use 1K blocks\n\ + -s, --sysv use System V sum algorithm, use 512 bytes blocks\n\ +"), stdout); +#else if (O_BINARY) fputs (_("\ \n\ @@ -249,12 +301,12 @@ Print or check %s (%d-bit) checksums.\n\ printf (_("\ -c, --check read %s sums from the FILEs and check them\n"), DIGEST_TYPE_STRING); -#if HASH_ALGO_BLAKE2 +# if HASH_ALGO_BLAKE2 fputs (_("\ -l, --length digest length in bits; must not exceed the maximum for\n\ the blake2 algorithm and must be a multiple of 8\n\ "), stdout); -#endif +# endif fputs (_("\ --tag create a BSD-style checksum\n\ "), stdout); @@ -280,8 +332,10 @@ The following five options are useful only when verifying checksums:\n\ -w, --warn warn about improperly formatted checksum lines\n\ \n\ "), stdout); +#endif fputs (HELP_OPTION_DESCRIPTION, stdout); fputs (VERSION_OPTION_DESCRIPTION, stdout); +#if !HASH_ALGO_SUM printf (_("\ \n\ The sums are computed as described in %s. When checking, the input\n\ @@ -292,6 +346,7 @@ line with checksum, a space, a character indicating input mode ('*' for binary,\ Note: There is no difference between binary mode and text mode on GNU systems.\ \n"), DIGEST_REFERENCE); +#endif emit_ancillary_info (PROGRAM_NAME); } @@ -626,7 +681,9 @@ digest_file (char const *filename, int *binary, unsigned char *bin_result, fadvise (fp, FADVISE_SEQUENTIAL); -#if HASH_ALGO_BLAKE2 +#if HASH_ALGO_SUM + err = DIGEST_STREAM (fp, bin_result, length); +#elif HASH_ALGO_BLAKE2 err = DIGEST_STREAM (fp, bin_result, b2_length / 8); #else err = DIGEST_STREAM (fp, bin_result); @@ -646,6 +703,60 @@ digest_file (char const *filename, int *binary, unsigned char *bin_result, return true; } +#if !HASH_ALGO_SUM +static void +output_file (char const *file, int binary_file, void const *digest, + bool tagged, bool args _GL_UNUSED, uintmax_t length _GL_UNUSED) +{ + unsigned char const *bin_buffer = digest; + /* We don't really need to escape, and hence detect, the '\\' + char, and not doing so should be both forwards and backwards + compatible, since only escaped lines would have a '\\' char at + the start. However just in case users are directly comparing + against old (hashed) outputs, in the presence of files + containing '\\' characters, we decided to not simplify the + output in this case. */ + bool needs_escape = (strchr (file, '\\') || strchr (file, '\n')) + && delim == '\n'; + + if (tagged) + { + if (needs_escape) + putchar ('\\'); + +# if HASH_ALGO_BLAKE2 + fputs (algorithm_out_string[b2_algorithm], stdout); + if (b2_length < blake2_max_len[b2_algorithm] * 8) + printf ("-%"PRIuMAX, b2_length); +# else + fputs (DIGEST_TYPE_STRING, stdout); +# endif + fputs (" (", stdout); + print_filename (file, needs_escape); + fputs (") = ", stdout); + } + + /* Output a leading backslash if the file name contains + a newline or backslash. */ + if (!tagged && needs_escape) + putchar ('\\'); + + for (size_t i = 0; i < (digest_hex_bytes / 2); ++i) + printf ("%02x", bin_buffer[i]); + + if (!tagged) + { + putchar (' '); + + putchar (binary_file ? '*' : ' '); + + print_filename (file, needs_escape); + } + + putchar (delim); +} +#endif + static bool digest_check (char const *checkfile_name) { @@ -881,7 +992,9 @@ main (int argc, char **argv) so that processes running in parallel do not intersperse their output. */ setvbuf (stdout, NULL, _IOLBF, 0); -#if HASH_ALGO_BLAKE2 +#if HASH_ALGO_SUM + const char* short_opts = "rs"; +#elif HASH_ALGO_BLAKE2 const char* short_opts = "l:bctwz"; const char* b2_length_str = ""; #else @@ -903,6 +1016,7 @@ main (int argc, char **argv) } break; #endif +#if !HASH_ALGO_SUM case 'b': binary = 1; break; @@ -940,6 +1054,15 @@ main (int argc, char **argv) case 'z': delim = '\0'; break; +#else + case 'r': /* For SysV compatibility. */ + sum_algorithm = bsd; + break; + + case 's': + sum_algorithm = sysv; + break; +#endif case_GETOPT_HELP_CHAR; case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); default: @@ -1041,63 +1164,20 @@ main (int argc, char **argv) for (char **operandp = argv + optind; operandp < operand_lim; operandp++) { char *file = *operandp; - if (do_check) ok &= digest_check (file); else { - int file_is_binary = binary; + int binary_file = binary; bool missing; + uintmax_t length; - if (! digest_file (file, &file_is_binary, bin_buffer, &missing, NULL)) + if (! digest_file (file, &binary_file, bin_buffer, &missing, &length)) ok = false; else { - /* We don't really need to escape, and hence detect, the '\\' - char, and not doing so should be both forwards and backwards - compatible, since only escaped lines would have a '\\' char at - the start. However just in case users are directly comparing - against old (hashed) outputs, in the presence of files - containing '\\' characters, we decided to not simplify the - output in this case. */ - bool needs_escape = (strchr (file, '\\') || strchr (file, '\n')) - && delim == '\n'; - - if (prefix_tag) - { - if (needs_escape) - putchar ('\\'); - -#if HASH_ALGO_BLAKE2 - fputs (algorithm_out_string[b2_algorithm], stdout); - if (b2_length < blake2_max_len[b2_algorithm] * 8) - printf ("-%"PRIuMAX, b2_length); -#else - fputs (DIGEST_TYPE_STRING, stdout); -#endif - fputs (" (", stdout); - print_filename (file, needs_escape); - fputs (") = ", stdout); - } - - /* Output a leading backslash if the file name contains - a newline or backslash. */ - if (!prefix_tag && needs_escape) - putchar ('\\'); - - for (size_t i = 0; i < (digest_hex_bytes / 2); ++i) - printf ("%02x", bin_buffer[i]); - - if (!prefix_tag) - { - putchar (' '); - - putchar (file_is_binary ? '*' : ' '); - - print_filename (file, needs_escape); - } - - putchar (delim); + DIGEST_OUT (file, binary_file, bin_buffer, prefix_tag, + optind != argc, length); } } } diff --git a/src/local.mk b/src/local.mk index bb5b722f2f..b1fce9c279 100644 --- a/src/local.mk +++ b/src/local.mk @@ -355,6 +355,9 @@ src___SOURCES = src/lbracket.c nodist_src_coreutils_SOURCES = src/coreutils.h src_coreutils_SOURCES = src/coreutils.c +src_sum_SOURCES = src/sum.c src/sum.h src/digest.c +src_sum_CPPFLAGS = -DHASH_ALGO_SUM=1 $(AM_CPPFLAGS) + src_cksum_SOURCES = src/cksum.c src/cksum.h if USE_PCLMUL_CRC32 noinst_LIBRARIES += src/libcksum_pclmul.a diff --git a/src/sum.c b/src/sum.c index c66147edc3..1633c86c52 100644 --- a/src/sum.c +++ b/src/sum.c @@ -22,251 +22,171 @@ #include #include -#include #include "system.h" -#include "die.h" -#include "error.h" -#include "fadvise.h" #include "human.h" -#include "safe-read.h" -#include "xbinary-io.h" +#include "sum.h" -/* The official name of this program (e.g., no 'g' prefix). */ -#define PROGRAM_NAME "sum" +/* Calculate the checksum and the size in bytes of stream STREAM. + Return -1 on error, 0 on success. */ -#define AUTHORS \ - proper_name ("Kayvan Aghaiepour"), \ - proper_name ("David MacKenzie") +int +bsd_sum_stream (FILE *stream, void *resstream, uintmax_t *length) +{ + int ret = -1; + size_t sum, n; + int checksum = 0; /* The checksum mod 2^16. */ + uintmax_t total_bytes = 0; /* The number of bytes. */ + static const size_t buffer_length = 32768; + uint8_t *buffer = malloc (buffer_length); -/* True if any of the files read were the standard input. */ -static bool have_read_stdin; + if (! buffer) + return -1; -static struct option const longopts[] = -{ - {"sysv", no_argument, NULL, 's'}, - {GETOPT_HELP_OPTION_DECL}, - {GETOPT_VERSION_OPTION_DECL}, - {NULL, 0, NULL, 0} -}; + /* Process file */ + while (true) + { + sum = 0; -void -usage (int status) -{ - if (status != EXIT_SUCCESS) - emit_try_help (); - else + /* Read block */ + while (true) { - printf (_("\ -Usage: %s [OPTION]... [FILE]...\n\ -"), - program_name); - fputs (_("\ -Print checksum and block counts for each FILE.\n\ -"), stdout); - - emit_stdin_note (); - - fputs (_("\ -\n\ - -r use BSD sum algorithm (the default), use 1K blocks\n\ - -s, --sysv use System V sum algorithm, use 512 bytes blocks\n\ -"), stdout); - fputs (HELP_OPTION_DESCRIPTION, stdout); - fputs (VERSION_OPTION_DESCRIPTION, stdout); - emit_ancillary_info (PROGRAM_NAME); - } - exit (status); -} - -/* Calculate and print the rotated checksum and the size in 1K blocks - of file FILE, or of the standard input if FILE is "-". - If PRINT_NAME is >0, print FILE next to the checksum and size. - The checksum varies depending on sizeof (int). - Return true if successful. */ + n = fread (buffer + sum, 1, buffer_length - sum, stream); + sum += n; -static bool -bsd_sum_file (char const *file, int print_name) -{ - FILE *fp; - int checksum = 0; /* The checksum mod 2^16. */ - uintmax_t total_bytes = 0; /* The number of bytes. */ - int ch; /* Each character read. */ - char hbuf[LONGEST_HUMAN_READABLE + 1]; - bool is_stdin = STREQ (file, "-"); + if (buffer_length == sum) + break; - if (is_stdin) - { - fp = stdin; - have_read_stdin = true; - xset_binary_mode (STDIN_FILENO, O_BINARY); - } - else - { - fp = fopen (file, (O_BINARY ? "rb" : "r")); - if (fp == NULL) + if (n == 0) { - error (0, errno, "%s", quotef (file)); - return false; + if (ferror (stream)) + goto cleanup_buffer; + goto final_process; } + + if (feof (stream)) + goto final_process; } - fadvise (fp, FADVISE_SEQUENTIAL); + for (size_t i = 0; i < sum; i++) + { + checksum = (checksum >> 1) + ((checksum & 1) << 15); + checksum += buffer[i]; + checksum &= 0xffff; /* Keep it within bounds. */ + } + total_bytes += sum; + } - while ((ch = getc (fp)) != EOF) +final_process:; + + for (size_t i = 0; i < sum; i++) { - total_bytes++; checksum = (checksum >> 1) + ((checksum & 1) << 15); - checksum += ch; + checksum += buffer[i]; checksum &= 0xffff; /* Keep it within bounds. */ } - - int err = errno; - if (!ferror (fp)) - err = 0; - if (is_stdin) - clearerr (fp); - else if (fclose (fp) != 0 && !err) - err = errno; - if (err) - { - error (0, err, "%s", quotef (file)); - return false; - } - - printf ("%05d %5s", checksum, - human_readable (total_bytes, hbuf, human_ceiling, 1, 1024)); - if (print_name) - printf (" %s", file); - putchar ('\n'); - - return true; + total_bytes += sum; + + memcpy (resstream, &checksum, sizeof checksum); + *length = total_bytes; + ret = 0; +cleanup_buffer: + free (buffer); + return ret; } -/* Calculate and print the checksum and the size in 512-byte blocks - of file FILE, or of the standard input if FILE is "-". - If PRINT_NAME is >0, print FILE next to the checksum and size. - Return true if successful. */ +/* Calculate the checksum and the size in bytes of stream STREAM. + Return -1 on error, 0 on success. */ -static bool -sysv_sum_file (char const *file, int print_name) +int +sysv_sum_stream (FILE *stream, void *resstream, uintmax_t *length) { - int fd; - unsigned char buf[8192]; + int ret = -1; + size_t sum, n; uintmax_t total_bytes = 0; - char hbuf[LONGEST_HUMAN_READABLE + 1]; - int r; - int checksum; + static const size_t buffer_length = 32768; + uint8_t *buffer = malloc (buffer_length); + + if (! buffer) + return -1; /* The sum of all the input bytes, modulo (UINT_MAX + 1). */ unsigned int s = 0; - bool is_stdin = STREQ (file, "-"); - - if (is_stdin) - { - fd = STDIN_FILENO; - have_read_stdin = true; - xset_binary_mode (STDIN_FILENO, O_BINARY); - } - else - { - fd = open (file, O_RDONLY | O_BINARY); - if (fd == -1) - { - error (0, errno, "%s", quotef (file)); - return false; - } - } - + /* Process file */ while (true) + { + sum = 0; + + /* Read block */ + while (true) { - size_t bytes_read = safe_read (fd, buf, sizeof buf); + n = fread (buffer + sum, 1, buffer_length - sum, stream); + sum += n; - if (bytes_read == 0) + if (buffer_length == sum) break; - if (bytes_read == SAFE_READ_ERROR) + if (n == 0) { - error (0, errno, "%s", quotef (file)); - if (!is_stdin) - close (fd); - return false; + if (ferror (stream)) + goto cleanup_buffer; + goto final_process; } - for (size_t i = 0; i < bytes_read; i++) - s += buf[i]; - total_bytes += bytes_read; - } - - if (!is_stdin && close (fd) != 0) - { - error (0, errno, "%s", quotef (file)); - return false; + if (feof (stream)) + goto final_process; } - r = (s & 0xffff) + ((s & 0xffffffff) >> 16); - checksum = (r & 0xffff) + (r >> 16); - - printf ("%d %s", checksum, - human_readable (total_bytes, hbuf, human_ceiling, 1, 512)); - if (print_name) - printf (" %s", file); - putchar ('\n'); - - return true; -} - -int -main (int argc, char **argv) -{ - bool ok; - int optc; - int files_given; - bool (*sum_func) (char const *, int) = bsd_sum_file; - - initialize_main (&argc, &argv); - set_program_name (argv[0]); - setlocale (LC_ALL, ""); - bindtextdomain (PACKAGE, LOCALEDIR); - textdomain (PACKAGE); + for (size_t i = 0; i < sum; i++) + s += buffer[i]; + total_bytes += sum; + } - atexit (close_stdout); +final_process:; - /* Line buffer stdout to ensure lines are written atomically and immediately - so that processes running in parallel do not intersperse their output. */ - setvbuf (stdout, NULL, _IOLBF, 0); + for (size_t i = 0; i < sum; i++) + s += buffer[i]; + total_bytes += sum; - have_read_stdin = false; + int r = (s & 0xffff) + ((s & 0xffffffff) >> 16); + int checksum = (r & 0xffff) + (r >> 16); - while ((optc = getopt_long (argc, argv, "rs", longopts, NULL)) != -1) - { - switch (optc) - { - case 'r': /* For SysV compatibility. */ - sum_func = bsd_sum_file; - break; + memcpy (resstream, &checksum, sizeof checksum); + *length = total_bytes; + ret = 0; +cleanup_buffer: + free (buffer); + return ret; +} - case 's': - sum_func = sysv_sum_file; - break; +/* Print the checksum and size (in 1024 byte blocks) to stdout. + If ARGS is true, also print the FILE name. */ - case_GETOPT_HELP_CHAR; +void +output_bsd (char const *file, int binary_file, void const *digest, + bool tagged, bool args _GL_UNUSED, uintmax_t length _GL_UNUSED) +{ - case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + char hbuf[LONGEST_HUMAN_READABLE + 1]; + printf ("%05d %5s", *(int *)digest, + human_readable (length, hbuf, human_ceiling, 1, 1024)); + if (args) + printf (" %s", file); + putchar ('\n'); +} - default: - usage (EXIT_FAILURE); - } - } +/* Print the checksum and size (in 512 byte blocks) to stdout. + If ARGS is true, also print the FILE name. */ - files_given = argc - optind; - if (files_given <= 0) - ok = sum_func ("-", files_given); - else - for (ok = true; optind < argc; optind++) - ok &= sum_func (argv[optind], files_given); +void +output_sysv (char const *file, int binary_file, void const *digest, + bool tagged, bool args _GL_UNUSED, uintmax_t length _GL_UNUSED) +{ - if (have_read_stdin && fclose (stdin) == EOF) - die (EXIT_FAILURE, errno, "%s", quotef ("-")); - return ok ? EXIT_SUCCESS : EXIT_FAILURE; + char hbuf[LONGEST_HUMAN_READABLE + 1]; + printf ("%d %s", *(int *)digest, + human_readable (length, hbuf, human_ceiling, 1, 512)); + if (args) + printf (" %s", file); + putchar ('\n'); } diff --git a/src/sum.h b/src/sum.h new file mode 100644 index 0000000000..ca68f750a2 --- /dev/null +++ b/src/sum.h @@ -0,0 +1,16 @@ +extern int +bsd_sum_stream (FILE *stream, void *resstream, uintmax_t *length); + +extern int +sysv_sum_stream (FILE *stream, void *resstream, uintmax_t *length); + +typedef int (*sumfn)(FILE *, void *, uintmax_t*); + + +extern void +output_bsd (char const *file, int binary_file, void const *digest, + bool tagged, bool args _GL_UNUSED, uintmax_t length _GL_UNUSED); + +extern void +output_sysv (char const *file, int binary_file, void const *digest, + bool tagged, bool args _GL_UNUSED, uintmax_t length _GL_UNUSED);