2005-01-26 Jakub Jelinek <jakub@redhat.com>

author Roland McGrath <roland@gnu.org>

Wed, 16 Feb 2005 11:09:25 +0000 (11:09 +0000)

committer Roland McGrath <roland@gnu.org>

Wed, 16 Feb 2005 11:09:25 +0000 (11:09 +0000)
author Roland McGrath <roland@gnu.org>
Wed, 16 Feb 2005 11:09:25 +0000 (11:09 +0000)
committer Roland McGrath <roland@gnu.org>
Wed, 16 Feb 2005 11:09:25 +0000 (11:09 +0000)
diff --git a/posix/bug-regex19.c b/posix/bug-regex19.c

index 4000b19b4d539a08c4b8c9493bd9cefa130d8aa2..3a6391cb01aef7f88efdef599a0c46630de40274 100644 (file)
--- a/posix/bug-regex19.c
+++ b/posix/bug-regex19.c
@@ -170,22 +170,22 @@ static struct test_s
    {ERE, "[^k]\\B[^k]", "kBk", 0, -1},
    {ERE, "[^C]\\B[^C]", "CCCABA", 0, 3},
    {ERE, "[^C]\\B[^C]", "CBC", 0, -1},
-  {ERE, ".(\\b|\\B).", "=~AB", 0, 1},
+  {ERE, ".(\\b|\\B).", "=~AB", 0, 0},
    {ERE, ".(\\b|\\B).", "A=C", 0, 0},
    {ERE, ".(\\b|\\B).", "ABC", 0, 0},
-  {ERE, ".(\\b|\\B).", "=~\\!", 0, -1},
-  {ERE, "[^k](\\b|\\B)[^k]", "=~AB", 0, 1},
+  {ERE, ".(\\b|\\B).", "=~\\!", 0, 0},
+  {ERE, "[^k](\\b|\\B)[^k]", "=~AB", 0, 0},
    {ERE, "[^k](\\b|\\B)[^k]", "A=C", 0, 0},
    {ERE, "[^k](\\b|\\B)[^k]", "ABC", 0, 0},
-  {ERE, "[^k](\\b|\\B)[^k]", "=~kBD", 0, 3},
-  {ERE, "[^k](\\b|\\B)[^k]", "=~\\!", 0, -1},
-  {ERE, "[^k](\\b|\\B)[^k]", "=~kB", 0, -1},
-  {ERE, "[^C](\\b|\\B)[^C]", "=~AB", 0, 1},
+  {ERE, "[^k](\\b|\\B)[^k]", "=~kBD", 0, 0},
+  {ERE, "[^k](\\b|\\B)[^k]", "=~\\!", 0, 0},
+  {ERE, "[^k](\\b|\\B)[^k]", "=~kB", 0, 0},
+  {ERE, "[^C](\\b|\\B)[^C]", "=~AB", 0, 0},
    {ERE, "[^C](\\b|\\B)[^C]", "A=C", 0, 0},
    {ERE, "[^C](\\b|\\B)[^C]", "ABC", 0, 0},
-  {ERE, "[^C](\\b|\\B)[^C]", "=~CBD", 0, 3},
-  {ERE, "[^C](\\b|\\B)[^C]", "=~\\!", 0, -1},
-  {ERE, "[^C](\\b|\\B)[^C]", "=~CB", 0, -1},
+  {ERE, "[^C](\\b|\\B)[^C]", "=~CBD", 0, 0},
+  {ERE, "[^C](\\b|\\B)[^C]", "=~\\!", 0, 0},
+  {ERE, "[^C](\\b|\\B)[^C]", "=~CB", 0, 0},
    {ERE, "\\b([A]|[!]|.B)", "A=AC", 0, 0},
    {ERE, "\\b([A]|[!]|.B)", "=AC", 0, 1},
    {ERE, "\\b([A]|[!]|.B)", "!AC", 0, 1},
diff --git a/posix/regcomp.c b/posix/regcomp.c

index 5de5bf725ac66f00400d9d82d6e03b449618ff97..72bf187b14be96f7edcaa98a0639216ebca5ba6a 100644 (file)
--- a/posix/regcomp.c
+++ b/posix/regcomp.c
@@ -1859,7 +1859,7 @@ peek_token (token, input, syntax)
           if (!(syntax & RE_NO_GNU_OPS))
             {
               token->type = ANCHOR;
-             token->opr.ctx_type = INSIDE_WORD;
+             token->opr.ctx_type = NOT_WORD_DELIM;
             }
           break;
         case 'w':
@@ -2349,15 +2349,25 @@ parse_expression (regexp, preg, token, syntax, nest, err)
        break;
      case ANCHOR:
        if ((token->opr.ctx_type
-          & (WORD_DELIM | INSIDE_WORD | WORD_FIRST | WORD_LAST))
+          & (WORD_DELIM | NOT_WORD_DELIM | WORD_FIRST | WORD_LAST))
           && dfa->word_ops_used == 0)
         init_word_char (dfa);
-      if (token->opr.ctx_type == WORD_DELIM)
+      if (token->opr.ctx_type == WORD_DELIM
+          || token->opr.ctx_type == NOT_WORD_DELIM)
         {
           bin_tree_t *tree_first, *tree_last;
-         token->opr.ctx_type = WORD_FIRST;
-         tree_first = re_dfa_add_tree_node (dfa, NULL, NULL, token);
-         token->opr.ctx_type = WORD_LAST;
+         if (token->opr.ctx_type == WORD_DELIM)
+           {
+             token->opr.ctx_type = WORD_FIRST;
+             tree_first = re_dfa_add_tree_node (dfa, NULL, NULL, token);
+             token->opr.ctx_type = WORD_LAST;
+            }
+          else
+            {
+             token->opr.ctx_type = INSIDE_WORD;
+             tree_first = re_dfa_add_tree_node (dfa, NULL, NULL, token);
+             token->opr.ctx_type = INSIDE_NOTWORD;
+            }
           tree_last = re_dfa_add_tree_node (dfa, NULL, NULL, token);
           token->type = OP_ALT;
           tree = re_dfa_add_tree_node (dfa, tree_first, tree_last, token);
diff --git a/posix/regex_internal.h b/posix/regex_internal.h

index 0ccd8d3665fd5db7b9d152c30ccf46bc20d52cb2..18865a7266447e6c0dfe274c2bbb0480ab3ef026 100644 (file)
--- a/posix/regex_internal.h
+++ b/posix/regex_internal.h
@@ -143,18 +143,21 @@ static inline void bitset_mask (bitset dest, const bitset src);
  #define NEXT_NEWLINE_CONSTRAINT 0x0020
  #define PREV_BEGBUF_CONSTRAINT 0x0040
  #define NEXT_ENDBUF_CONSTRAINT 0x0080
-#define DUMMY_CONSTRAINT 0x0100
+#define WORD_DELIM_CONSTRAINT 0x0100
+#define NOT_WORD_DELIM_CONSTRAINT 0x0200
  
  typedef enum
  {
    INSIDE_WORD = PREV_WORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
    WORD_FIRST = PREV_NOTWORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
    WORD_LAST = PREV_WORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
+  INSIDE_NOTWORD = PREV_NOTWORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
    LINE_FIRST = PREV_NEWLINE_CONSTRAINT,
    LINE_LAST = NEXT_NEWLINE_CONSTRAINT,
    BUF_FIRST = PREV_BEGBUF_CONSTRAINT,
    BUF_LAST = NEXT_ENDBUF_CONSTRAINT,
-  WORD_DELIM = DUMMY_CONSTRAINT
+  WORD_DELIM = WORD_DELIM_CONSTRAINT,
+  NOT_WORD_DELIM = NOT_WORD_DELIM_CONSTRAINT
  } re_context_type;
  
  typedef struct
diff --git a/posix/rxspencer/tests b/posix/rxspencer/tests

index a724252d8c9724b37da948291d1a289d5f079c36..a8b6e4baa82085967a8c63bbdfb200ac27164fec 100644 (file)
--- a/posix/rxspencer/tests
+++ b/posix/rxspencer/tests
@@ -526,3 +526,12 @@ a((b+|((c)*)))+d   -       abcd    abcd    c,c,c,c
  (((\b))){0}    -       x       @x      -,-,-
  a(((.*)))b((\2)){0}c   -       abc     abc     @bc,@bc,@bc,-,-
  a(((.*)))b((\1)){0}c   -       axbc    axbc    x,x,x,-,-
+
+\b     &       SaT     @aT
+\b     &       aT      @aT
+a.*\b  &       abT     ab
+\b     &       STSS
+\B     &       abc     @bc
+\B     &       aSbTc
+\B     &       SaT     @SaT
+\B     &       aSTSb   @TSb
diff --git a/posix/tst-rxspencer.c b/posix/tst-rxspencer.c

index cb40421797bf2f8fb3e7151880d25f433006b4f9..3febc01cb201d3482293629c3210abfa1cff31e8 100644 (file)
--- a/posix/tst-rxspencer.c
+++ b/posix/tst-rxspencer.c
@@ -127,14 +127,15 @@ mb_frob_string (const char *str, const char *letters)
  }
  
  /* Like mb_frob_string, but don't replace anything between
-   [: and :], [. and .] or [= and =].  */
+   [: and :], [. and .] or [= and =] or characters escaped
+   with a backslash.  */
  
  static char *
  mb_frob_pattern (const char *str, const char *letters)
  {
    char *ret, *dst;
    const char *src;
-  int in_class = 0;
+  int in_class = 0, escaped = 0;
  
    if (str == NULL)
      return NULL;
@@ -144,7 +145,18 @@ mb_frob_pattern (const char *str, const char *letters)
      return NULL;
  
    for (src = str, dst = ret; *src; ++src)
-    if (!in_class && strchr (letters, *src))
+    if (*src == '\\')
+      {
+       escaped ^= 1;
+       *dst++ = *src;
+      }
+    else if (escaped)
+      {
+       escaped = 0;
+       *dst++ = *src;
+       continue;
+      }
+    else if (!in_class && strchr (letters, *src))
        dst = mb_replace (dst, *src);
      else
        {
author	Roland McGrath <roland@gnu.org>
	Wed, 16 Feb 2005 11:09:25 +0000 (11:09 +0000)
committer	Roland McGrath <roland@gnu.org>
	Wed, 16 Feb 2005 11:09:25 +0000 (11:09 +0000)
posix/bug-regex19.c		patch \| blob \| blame \| history
posix/regcomp.c		patch \| blob \| blame \| history
posix/regex_internal.h		patch \| blob \| blame \| history
posix/rxspencer/tests		patch \| blob \| blame \| history
posix/tst-rxspencer.c		patch \| blob \| blame \| history