seq: use faster processing for integer steps from 2 to 200

author Pádraig Brady <pbrady@fb.com>

Tue, 3 Sep 2019 09:14:48 +0000 (10:14 +0100)

committer Pádraig Brady <P@draigBrady.com>

Sun, 8 Sep 2019 17:59:24 +0000 (18:59 +0100)
author Pádraig Brady <pbrady@fb.com>
Tue, 3 Sep 2019 09:14:48 +0000 (10:14 +0100)
committer Pádraig Brady <P@draigBrady.com>
Sun, 8 Sep 2019 17:59:24 +0000 (18:59 +0100)
diff --git a/src/seq.c b/src/seq.c

index 8efe929e1ac6e8b0eb1bac7eafb66ed2607c60e0..7a05142f9c9bc2ab5a3ad2dd0b6c35b6c268d9fc 100644 (file)
--- a/src/seq.c
+++ b/src/seq.c
@@ -37,6 +37,11 @@
  # define isnan(x) ((x) != (x))
  #endif
  
+/* Limit below which seq_fast has more throughput.
+   Determined with: seq 0 200 inf | pv > /dev/null  */
+#define SEQ_FAST_STEP_LIMIT 200
+#define SEQ_FAST_STEP_LIMIT_DIGITS 3
+
  /* The official name of this program (e.g., no 'g' prefix).  */
  #define PROGRAM_NAME "seq"
  
@@ -431,7 +436,7 @@ cmp (char const *a, size_t a_len, char const *b, size_t b_len)
      return -1;
    if (b_len < a_len)
      return 1;
-  return (strcmp (a, b));
+  return (memcmp (a, b, a_len));
  }
  
  /* Trim leading 0's from S, but if S is all 0's, leave one.
@@ -453,7 +458,7 @@ trim_leading_zeros (char const *s)
     followed by a newline.  If B < A, return false and print nothing.
     Otherwise, return true.  */
  static bool
-seq_fast (char const *a, char const *b)
+seq_fast (char const *a, char const *b, uintmax_t step)
  {
    bool inf = STREQ (b, "inf");
  
@@ -467,7 +472,10 @@ seq_fast (char const *a, char const *b)
  
    /* Allow for at least 31 digits without realloc.
       1 more than p_len is needed for the inf case.  */
-  size_t inc_size = MAX (MAX (p_len + 1, q_len), 31);
+#define INITIAL_ALLOC_DIGITS 31
+  size_t inc_size = MAX (MAX (p_len + 1, q_len), INITIAL_ALLOC_DIGITS);
+  /* Ensure we only increase by at most 1 digit at buffer boundaries.  */
+  verify (SEQ_FAST_STEP_LIMIT_DIGITS < INITIAL_ALLOC_DIGITS - 1);
  
    /* Copy input strings (incl NUL) to end of new buffers.  */
    char *p0 = xmalloc (inc_size + 1);
@@ -498,10 +506,15 @@ seq_fast (char const *a, char const *b)
        bufp = mempcpy (bufp, p, p_len);
  
        /* Append separator then number.  */
-      while (inf || cmp (p, p_len, q, q_len) < 0)
+      while (true)
          {
+          for (uintmax_t n_incr = step; n_incr; n_incr--)
+            incr (&p, &p_len);
+
+          if (! inf && 0 < cmp (p, p_len, q, q_len))
+            break;
+
            *bufp++ = *separator;
-          incr (&p, &p_len);
  
            /* Double up the buffers when needed for the inf case.  */
            if (p_len == inc_size)
@@ -641,17 +654,25 @@ main (int argc, char **argv)
       - no format string, [FIXME: relax this, eventually]
       - integer start (or no start)
       - integer end
-     - increment == 1 or not specified [FIXME: relax this, eventually]
-     then use the much more efficient integer-only code.  */
+     - integer increment <= SEQ_FAST_STEP_LIMIT
+     then use the much more efficient integer-only code,
+     operating on arbitrarily large numbers.  */
+  bool fast_step_ok = false;
+  if (n_args != 3
+      || (all_digits_p (argv[optind + 1])
+          && xstrtold (argv[optind + 1], NULL, &step.value, cl_strtold)
+          && 0 < step.value && step.value <= SEQ_FAST_STEP_LIMIT))
+    fast_step_ok = true;
+
    if (all_digits_p (argv[optind])
        && (n_args == 1 || all_digits_p (argv[optind + 1]))
-      && (n_args < 3 || (STREQ ("1", argv[optind + 1])
+      && (n_args < 3 || (fast_step_ok
                           && all_digits_p (argv[optind + 2])))
        && !equal_width && !format_str && strlen (separator) == 1)
      {
        char const *s1 = n_args == 1 ? "1" : argv[optind];
        char const *s2 = argv[optind + (n_args - 1)];
-      if (seq_fast (s1, s2))
+      if (seq_fast (s1, s2, step.value))
          return EXIT_SUCCESS;
  
        /* Upon any failure, let the more general code deal with it.  */
@@ -678,9 +699,11 @@ main (int argc, char **argv)
          }
      }
  
-  if ((isfinite (first.value) && first.precision == 0)
-      && step.precision == 0 && last.precision == 0
-      && 0 <= first.value && step.value == 1 && 0 <= last.value
+  /* Try the fast method again, for integers of the form 1e1 etc.,
+     or "inf" end value.  */
+  if (first.precision == 0 && step.precision == 0 && last.precision == 0
+      && isfinite (first.value) && 0 <= first.value && 0 <= last.value
+      && 0 < step.value && step.value <= SEQ_FAST_STEP_LIMIT
        && !equal_width && !format_str && strlen (separator) == 1)
      {
        char *s1;
@@ -692,7 +715,7 @@ main (int argc, char **argv)
        else if (asprintf (&s2, "%0.Lf", last.value) < 0)
          xalloc_die ();
  
-      if (*s1 != '-' && *s2 != '-' && seq_fast (s1, s2))
+      if (*s1 != '-' && *s2 != '-' && seq_fast (s1, s2, step.value))
          {
            IF_LINT (free (s1));
            IF_LINT (free (s2));
diff --git a/tests/misc/seq.pl b/tests/misc/seq.pl

index f143f6a8496ebbb9894a355b17880627e7475c4c..b20e43fe21e765503b970ad3dae262fd2086f1d4 100755 (executable)
--- a/tests/misc/seq.pl
+++ b/tests/misc/seq.pl
@@ -153,6 +153,9 @@ my @Tests =
     ['fast-1', qw(4), {OUT => [qw(1 2 3 4)]}],
     ['fast-2', qw(1 4), {OUT => [qw(1 2 3 4)]}],
     ['fast-3', qw(1 1 4), {OUT => [qw(1 2 3 4)]}],
+   ['fast-4', qw(1 2 4), {OUT => [qw(1 3)]}],
+   ['fast-5', qw(1 4 4), {OUT => [qw(1)]}],
+   ['fast-6', qw(1 1e0 4), {OUT => [qw(1 2 3 4)]}],
  
     # Ensure an INCREMENT of Zero is rejected.
     ['inc-zero-1',      qw(1 0 10), {EXIT => 1}, {ERR => $err_inc_zero}],
author	Pádraig Brady <pbrady@fb.com>
	Tue, 3 Sep 2019 09:14:48 +0000 (10:14 +0100)
committer	Pádraig Brady <P@draigBrady.com>
	Sun, 8 Sep 2019 17:59:24 +0000 (18:59 +0100)
src/seq.c		patch \| blob \| blame \| history
tests/misc/seq.pl		patch \| blob \| blame \| history