ptx: fix whitespace trimming with multiple files

author Pádraig Brady <P@draigBrady.com>

Mon, 28 Apr 2014 12:29:41 +0000 (13:29 +0100)

committer Pádraig Brady <P@draigBrady.com>

Tue, 29 Apr 2014 08:50:58 +0000 (09:50 +0100)
author Pádraig Brady <P@draigBrady.com>
Mon, 28 Apr 2014 12:29:41 +0000 (13:29 +0100)
committer Pádraig Brady <P@draigBrady.com>
Tue, 29 Apr 2014 08:50:58 +0000 (09:50 +0100)
diff --git a/NEWS b/NEWS

index 55f0b5881063815ca5e4f35ea23d07fc5523e1ed..7855a4811962cd669079205a9513e20b8ed3a4be 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -43,6 +43,9 @@ GNU coreutils NEWS                                    -*- outline -*-
    ptx --format long option parsing no longer falls through into the --help case.
    [bug introduced in TEXTUTILS-1_22i]
  
+  ptx now consistently trims whitespace when processing multiple files.
+  [This bug was present in "the beginning".]
+
    shuf --repeat no longer dumps core if the input is empty.
    [bug introduced with the --repeat feature in coreutils-8.22]
  
diff --git a/src/ptx.c b/src/ptx.c

index be342eefc54b1c64ac8811b53e1fa11f58654500..d165e966589d63cf4f94835638faac0ff66818f1 100644 (file)
--- a/src/ptx.c
+++ b/src/ptx.c
@@ -166,7 +166,7 @@ static int total_line_count;        /* total number of lines seen so far */
  static const char **input_file_name;   /* array of text input file names */
  static int *file_line_count;   /* array of 'total_line_count' values at end */
  
-static BLOCK text_buffer;      /* file to study */
+static BLOCK *text_buffers;    /* files to study */
  
  /* SKIP_NON_WHITE used only for getting or skipping the reference.  */
  
@@ -232,6 +232,7 @@ typedef struct
      DELTA left;                        /* distance to left context start */
      DELTA right;               /* distance to right context end */
      int reference;             /* reference descriptor */
+    size_t file_index;         /* corresponding file  */
    }
  OCCURS;
  
@@ -744,7 +745,7 @@ digest_word_file (const char *file_name, WORD_TABLE *table)
  `----------------------------------------------------------------------*/
  
  static void
-find_occurs_in_text (void)
+find_occurs_in_text (size_t file_index)
  {
    char *cursor;                        /* for scanning the source text */
    char *scan;                  /* for scanning the source text also */
@@ -760,6 +761,8 @@ find_occurs_in_text (void)
    char *word_end;              /* end of word */
    char *next_context_start;    /* next start of left context */
  
+  const BLOCK *text_buffer = &text_buffers[file_index];
+
    /* reference_length is always used within 'if (input_reference)'.
       However, GNU C diagnoses that it may be used uninitialized.  The
       following assignment is merely to shut it up.  */
@@ -775,19 +778,19 @@ find_occurs_in_text (void)
       found inside it.  Also, unconditionally assigning these variable has
       the happy effect of shutting up lint.  */
  
-  line_start = text_buffer.start;
+  line_start = text_buffer->start;
    line_scan = line_start;
    if (input_reference)
      {
-      SKIP_NON_WHITE (line_scan, text_buffer.end);
+      SKIP_NON_WHITE (line_scan, text_buffer->end);
        reference_length = line_scan - line_start;
-      SKIP_WHITE (line_scan, text_buffer.end);
+      SKIP_WHITE (line_scan, text_buffer->end);
      }
  
    /* Process the whole buffer, one line or one sentence at a time.  */
  
-  for (cursor = text_buffer.start;
-       cursor < text_buffer.end;
+  for (cursor = text_buffer->start;
+       cursor < text_buffer->end;
         cursor = next_context_start)
      {
  
@@ -805,11 +808,11 @@ find_occurs_in_text (void)
           This test also accounts for the case of an incomplete line or
           sentence at the end of the buffer.  */
  
-      next_context_start = text_buffer.end;
+      next_context_start = text_buffer->end;
        if (context_regex.string)
          switch (re_search (&context_regex.pattern, cursor,
-                           text_buffer.end - cursor,
-                           0, text_buffer.end - cursor, &context_regs))
+                           text_buffer->end - cursor,
+                           0, text_buffer->end - cursor, &context_regs))
            {
            case -2:
              matcher_error ();
@@ -915,7 +918,7 @@ find_occurs_in_text (void)
                      total_line_count++;
                      line_scan++;
                      line_start = line_scan;
-                    SKIP_NON_WHITE (line_scan, text_buffer.end);
+                    SKIP_NON_WHITE (line_scan, text_buffer->end);
                      reference_length = line_scan - line_start;
                    }
                  else
@@ -956,7 +959,7 @@ find_occurs_in_text (void)
  
            occurs_cursor = occurs_table[0] + number_of_occurs[0];
  
-          /* Define the refence field, if any.  */
+          /* Define the reference field, if any.  */
  
            if (auto_reference)
              {
@@ -973,7 +976,7 @@ find_occurs_in_text (void)
                      total_line_count++;
                      line_scan++;
                      line_start = line_scan;
-                    SKIP_NON_WHITE (line_scan, text_buffer.end);
+                    SKIP_NON_WHITE (line_scan, text_buffer->end);
                    }
                  else
                    line_scan++;
@@ -1007,6 +1010,7 @@ find_occurs_in_text (void)
            occurs_cursor->key = possible_key;
            occurs_cursor->left = context_start - possible_key.start;
            occurs_cursor->right = context_end - possible_key.start;
+          occurs_cursor->file_index = file_index;
  
            number_of_occurs[0]++;
          }
@@ -1356,9 +1360,10 @@ define_all_fields (OCCURS *occurs)
    char *left_context_start;    /* start of left context */
    char *right_context_end;     /* end of right context */
    char *left_field_start;      /* conservative start for 'head'/'before' */
-  int file_index;              /* index in text input file arrays */
    const char *file_name;       /* file name for reference */
    int line_ordinal;            /* line ordinal for reference */
+  const char *buffer_start;    /* start of buffered file for this occurs */
+  const char *buffer_end;      /* end of buffered file for this occurs */
  
    /* Define 'keyafter', start of left context and end of right context.
       'keyafter' starts at the saved position for keyword and extend to the
@@ -1371,6 +1376,9 @@ define_all_fields (OCCURS *occurs)
    left_context_start = keyafter.start + occurs->left;
    right_context_end = keyafter.start + occurs->right;
  
+  buffer_start = text_buffers[occurs->file_index].start;
+  buffer_end = text_buffers[occurs->file_index].end;
+
    cursor = keyafter.end;
    while (cursor < right_context_end
           && cursor <= keyafter.start + keyafter_max_width)
@@ -1422,13 +1430,13 @@ define_all_fields (OCCURS *occurs)
    if (truncation_string)
      {
        cursor = before.start;
-      SKIP_WHITE_BACKWARDS (cursor, text_buffer.start);
+      SKIP_WHITE_BACKWARDS (cursor, buffer_start);
        before_truncation = cursor > left_context_start;
      }
    else
      before_truncation = 0;
  
-  SKIP_WHITE (before.start, text_buffer.end);
+  SKIP_WHITE (before.start, buffer_end);
  
    /* The tail could not take more columns than what has been left in the
       left context field, and a gap is mandatory.  It starts after the
@@ -1443,7 +1451,7 @@ define_all_fields (OCCURS *occurs)
    if (tail_max_width > 0)
      {
        tail.start = keyafter.end;
-      SKIP_WHITE (tail.start, text_buffer.end);
+      SKIP_WHITE (tail.start, buffer_end);
  
        tail.end = tail.start;
        cursor = tail.end;
@@ -1489,7 +1497,7 @@ define_all_fields (OCCURS *occurs)
    if (head_max_width > 0)
      {
        head.end = before.start;
-      SKIP_WHITE_BACKWARDS (head.end, text_buffer.start);
+      SKIP_WHITE_BACKWARDS (head.end, buffer_start);
  
        head.start = left_field_start;
        while (head.start + head_max_width < head.end)
@@ -1520,21 +1528,16 @@ define_all_fields (OCCURS *occurs)
      {
  
        /* Construct the reference text in preallocated space from the file
-         name and the line number.  Find out in which file the reference
-         occurred.  Standard input yields an empty file name.  Insure line
-         numbers are one based, even if they are computed zero based.  */
-
-      file_index = 0;
-      while (file_line_count[file_index] < occurs->reference)
-        file_index++;
+         name and the line number.  Standard input yields an empty file name.
+         Ensure line numbers are 1 based, even if they are computed 0 based.  */
  
-      file_name = input_file_name[file_index];
+      file_name = input_file_name[occurs->file_index];
        if (!file_name)
          file_name = "";
  
        line_ordinal = occurs->reference + 1;
-      if (file_index > 0)
-        line_ordinal -= file_line_count[file_index - 1];
+      if (occurs->file_index > 0)
+        line_ordinal -= file_line_count[occurs->file_index - 1];
  
        sprintf (reference.start, "%s:%d", file_name, line_ordinal);
        reference.end = reference.start + strlen (reference.start);
@@ -2034,6 +2037,7 @@ main (int argc, char **argv)
  
        input_file_name = xmalloc (sizeof *input_file_name);
        file_line_count = xmalloc (sizeof *file_line_count);
+      text_buffers =    xmalloc (sizeof *text_buffers);
        number_input_files = 1;
        input_file_name[0] = NULL;
      }
@@ -2042,6 +2046,7 @@ main (int argc, char **argv)
        number_input_files = argc - optind;
        input_file_name = xmalloc (number_input_files * sizeof *input_file_name);
        file_line_count = xmalloc (number_input_files * sizeof *file_line_count);
+      text_buffers    = xmalloc (number_input_files * sizeof *text_buffers);
  
        for (file_index = 0; file_index < number_input_files; file_index++)
          {
@@ -2060,6 +2065,7 @@ main (int argc, char **argv)
        number_input_files = 1;
        input_file_name = xmalloc (sizeof *input_file_name);
        file_line_count = xmalloc (sizeof *file_line_count);
+      text_buffers    = xmalloc (sizeof *text_buffers);
        if (!*argv[optind] || STREQ (argv[optind], "-"))
          input_file_name[0] = NULL;
        else
@@ -2126,11 +2132,12 @@ main (int argc, char **argv)
  
    for (file_index = 0; file_index < number_input_files; file_index++)
      {
+      BLOCK *text_buffer = text_buffers + file_index;
  
-      /* Read the file in core, than study it.  */
+      /* Read the file in core, then study it.  */
  
-      swallow_file_in_memory (input_file_name[file_index], &text_buffer);
-      find_occurs_in_text ();
+      swallow_file_in_memory (input_file_name[file_index], text_buffer);
+      find_occurs_in_text (file_index);
  
        /* Maintain for each file how many lines has been read so far when its
           end is reached.  Incrementing the count first is a simple kludge to
diff --git a/tests/misc/ptx-overrun.sh b/tests/misc/ptx-overrun.sh

index 693a1800a84948ccc637df1d3aa0f9ae49923bb7..be9fb524f49e773721b8b3ef3d5778728a90c1b9 100755 (executable)
--- a/tests/misc/ptx-overrun.sh
+++ b/tests/misc/ptx-overrun.sh
@@ -1,5 +1,4 @@
  #!/bin/sh
-# Trigger a heap-clobbering bug in ptx from coreutils-6.10 and earlier.
  
  # Copyright (C) 2008-2014 Free Software Foundation, Inc.
  
@@ -19,12 +18,12 @@
  . "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
  print_ver_ ptx
  
+# Trigger a heap-clobbering bug in ptx from coreutils-6.10 and earlier.
  # Using a long file name makes an abort more likely.
  # Even with no file name, valgrind detects the buffer overrun.
  f=01234567890123456789012345678901234567890123456789
  touch $f empty || framework_failure_
  
-
  # Specifying a regular expression ending in a lone backslash
  # would cause ptx to write beyond the end of a malloc'd buffer.
  ptx -F '\'      $f < /dev/null  > out || fail=1
@@ -32,4 +31,14 @@ ptx -S 'foo\'   $f < /dev/null >> out || fail=1
  ptx -W 'bar\\\' $f < /dev/null >> out || fail=1
  compare out empty || fail=1
  
+
+# Trigger an invalid heap reference noticed by gcc -fsanitize=address
+# from coreutils-8.22 and earlier.  As well as an invalid memory reference,
+# the issue can be seen in the output, with non deterministice whitespace
+# trimming when multiple files are specified.
+printf '%s\n' 'This is a ptx whitespace Trimming test' > ws.in
+ptx ws.in ws.in | sort | uniq -u > out
+compare /dev/null out || fail=1
+
+
  Exit $fail
author	Pádraig Brady <P@draigBrady.com>
	Mon, 28 Apr 2014 12:29:41 +0000 (13:29 +0100)
committer	Pádraig Brady <P@draigBrady.com>
	Tue, 29 Apr 2014 08:50:58 +0000 (09:50 +0100)
NEWS		patch \| blob \| blame \| history
src/ptx.c		patch \| blob \| blame \| history
tests/misc/ptx-overrun.sh		patch \| blob \| blame \| history