Latest from Jeffrey Ollie.

author Guido van Rossum <guido@python.org>

Fri, 9 May 1997 02:35:58 +0000 (02:35 +0000)

committer Guido van Rossum <guido@python.org>

Fri, 9 May 1997 02:35:58 +0000 (02:35 +0000)
author Guido van Rossum <guido@python.org>
Fri, 9 May 1997 02:35:58 +0000 (02:35 +0000)
committer Guido van Rossum <guido@python.org>
Fri, 9 May 1997 02:35:58 +0000 (02:35 +0000)
diff --git a/Modules/regexpr.c b/Modules/regexpr.c

index 1eebdb88fa4c6f64a1b7e7c508ec7d2de2686a8f..c74ece324dd2c532b025a5d4f669bed12f997a3f 100644 (file)
--- a/Modules/regexpr.c
+++ b/Modules/regexpr.c
@@ -1,34 +1,43 @@
-/*
-
-regexpr.c
-
-Author: Tatu Ylonen <ylo@ngs.fi>
-
-Copyright (c) 1991 Tatu Ylonen, Espoo, Finland
-
-Permission to use, copy, modify, distribute, and sell this software
-and its documentation for any purpose is hereby granted without fee,
-provided that the above copyright notice appear in all copies.  This
-software is provided "as is" without express or implied warranty.
-
-Created: Thu Sep 26 17:14:05 1991 ylo
-Last modified: Mon Nov  4 17:06:48 1991 ylo
-Ported to Think C: 19 Jan 1992 guido@cwi.nl
-
-This code draws many ideas from the regular expression packages by
-Henry Spencer of the University of Toronto and Richard Stallman of the
-Free Software Foundation.
-
-Emacs-specific code and syntax table code is almost directly borrowed
-from GNU regexp.
-
-*/
+/* regexpr.c
+ *
+ * Author: Tatu Ylonen <ylo@ngs.fi>
+ *
+ * Copyright (c) 1991 Tatu Ylonen, Espoo, Finland
+ *
+ * Permission to use, copy, modify, distribute, and sell this software
+ * and its documentation for any purpose is hereby granted without
+ * fee, provided that the above copyright notice appear in all copies.
+ * This software is provided "as is" without express or implied
+ * warranty.
+ *
+ * Created: Thu Sep 26 17:14:05 1991 ylo
+ * Last modified: Mon Nov  4 17:06:48 1991 ylo
+ * Ported to Think C: 19 Jan 1992 guido@cwi.nl
+ *
+ * This code draws many ideas from the regular expression packages by
+ * Henry Spencer of the University of Toronto and Richard Stallman of
+ * the Free Software Foundation.
+ *
+ * Emacs-specific code and syntax table code is almost directly borrowed
+ * from GNU regexp.
+ *
+ * Bugs fixed and lots of reorganization by Jeffrey C. Ollie, April
+ * 1997 Thanks for bug reports and ideas from Andrew Kuchling, Tim
+ * Peters, Guido van Rossum, Ka-Ping Yee, Sjoerd Mullender, and
+ * probably one or two others that I'm forgetting.
+ *
+ * $Id$ */
  
  #include "config.h" /* For Win* specific redefinition of printf c.s. */
  
-#include "myproto.h" /* For Py_PROTO macro --Guido */
+#include "myproto.h" /* For PROTO macro --Guido */
  
  #include <stdio.h>
+
+#ifndef NDEBUG
+#define NDEBUG 1
+#endif
+
  #include <assert.h>
  #include "regexpr.h"
  
@@ -48,8 +57,316 @@ char *realloc();
  #endif /* __STDC__ */
  #endif /* THINK_C */
  
-#define MACRO_BEGIN do {
-#define MACRO_END } while (0)
+/* The stack implementation is taken from an idea by Andrew Kuchling.
+ * It's a doubly linked list of arrays. The advantages of this over a
+ * simple linked list are that the number of mallocs required are
+ * reduced. It also makes it possible to statically allocate enough
+ * space so that small patterns don't ever need to call malloc.
+ *
+ * The advantages over a single array is that is periodically
+ * realloced when more space is needed is that we avoid ever copying
+ * the stack. */
+
+/* item_t is the basic stack element.  Defined as a union of
+ * structures so that both registers, failure points, and counters can
+ * be pushed/popped from the stack.  There's nothing built into the
+ * item to keep track of whether a certain stack item is a register, a
+ * failure point, or a counter. */
+
+typedef union item_t
+{
+      struct
+      {
+           int num;
+           int level;
+           char *start;
+           char *end;
+      } reg;
+      struct
+      {
+           int count;
+           int level;
+           int phantom;
+           char *code;
+           char *text;
+      } fail;
+      struct
+      {
+           int num;
+           int level;
+           int count;
+      } cntr;
+} item_t;
+
+#define STACK_PAGE_SIZE 256
+#define NUM_REGISTERS 256
+
+/* A 'page' of stack items. */
+
+typedef struct item_page_t
+{
+      item_t items[STACK_PAGE_SIZE];
+      struct item_page_t *prev;
+      struct item_page_t *next;
+} item_page_t;
+
+
+typedef struct match_state
+{
+      /* Structure to encapsulate the stack. */
+      struct
+      {
+           /* index into the curent page.  If index == 0 and you need
+            * to pop and item, move to the previous page and set
+            * index = STACK_PAGE_SIZE - 1.  Otherwise decrement index
+            * to push a page. If index == STACK_PAGE_SIZE and you
+            * need to push a page move to the next page and set index
+            * = 0. If there is no new next page, allocate a new page
+            * and link it in. Otherwise, increment index to push a
+            * page. */
+           int index;
+           item_page_t *current; /* Pointer to the current page. */
+           item_page_t first; /* First page is statically allocated. */
+      } stack;
+      char *start[NUM_REGISTERS];
+      char *end[NUM_REGISTERS];
+
+      int changed[NUM_REGISTERS];
+      /* The number of registers that have been pushed onto the stack
+       * since the last failure point. */
+      int count;
+      /* Used to control when registers need to be pushed onto the
+       * stack. */
+      int level;
+      /* The number of failure points on the stack. */
+      int point;
+} match_state;
+
+/* Discard the top 'count' stack items. */
+
+#define STACK_DISCARD(stack, count, on_error) \
+stack.index -= count; \
+while (stack.index < 0) \
+{ \
+   if (stack.current->prev == NULL) \
+      on_error; \
+   stack.current = stack.current->prev; \
+   stack.index += STACK_PAGE_SIZE; \
+}
+
+/* Store a pointer to the previous item on the stack. Used to pop an
+ * item off of the stack. */
+
+#define STACK_PREV(stack, top, on_error) \
+if (stack.index == 0) \
+{ \
+   if (stack.current->prev == NULL) \
+      on_error; \
+   stack.current = stack.current->prev; \
+   stack.index = STACK_PAGE_SIZE - 1; \
+} \
+else \
+   stack.index--; \
+top = &(stack.current->items[stack.index])
+
+/* Store a pointer to the next item on the stack. Used to push an item
+ * on to the stack. */
+
+#define STACK_NEXT(stack, top, on_error) \
+if (stack.index == STACK_PAGE_SIZE) \
+{ \
+   if (stack.current->next == NULL) \
+   { \
+      stack.current->next = malloc(sizeof(item_page_t)); \
+      if (stack.current->next == NULL) \
+         on_error; \
+      stack.current->next->prev = stack.current; \
+      stack.current->next->next = NULL; \
+   } \
+   stack.current = stack.current->next; \
+   stack.index = 0; \
+} \
+top = &(stack.current->items[stack.index++])
+
+/* Store a pointer to the item that is 'count' items back in the
+ * stack. STACK_BACK(stack, top, 1, on_error) is equivalent to
+ * STACK_TOP(stack, top, on_error).  */
+
+#define STACK_BACK(stack, top, count, on_error) \
+{ \
+   int index; \
+   item_page_t *current; \
+   current = stack.current; \
+   index = stack.index - (count); \
+   while (index < 0) \
+   { \
+      if (current->prev == NULL) \
+        on_error; \
+      current = current->prev; \
+      index += STACK_PAGE_SIZE; \
+   } \
+   top = &(current->items[index]); \
+}
+
+/* Store a pointer to the top item on the stack. Execute the
+ * 'on_error' code if there are no items on the stack. */
+
+#define STACK_TOP(stack, top, on_error) \
+if (stack.index == 0) \
+{ \
+   if (stack.current->prev == NULL) \
+      on_error; \
+   top = &(stack.current->prev->items[STACK_PAGE_SIZE - 1]); \
+} \
+else \
+   top = &(stack.current->items[stack.index - 1])
+
+/* Test to see if the stack is empty */
+
+#define STACK_EMPTY(stack) ((stack.index == 0) && \
+                           (stack.current->prev == NULL))
+
+
+/* Initialize a state object */
+
+#define NEW_STATE(state) \
+memset(&state, 0, sizeof(match_state)); \
+state.stack.current = &state.stack.first; \
+state.level = 1
+
+/* Free any memory that might have been malloc'd */
+
+#define FREE_STATE(state) \
+while(state.stack.first.next != NULL) \
+{ \
+   state.stack.current = state.stack.first.next; \
+   state.stack.first.next = state.stack.current->next; \
+   free(state.stack.current); \
+}
+
+/* Return the start of register 'reg' */
+
+#define GET_REG_START(state, reg) (state.start[reg])
+
+/* Return the end of register 'reg' */
+
+#define GET_REG_END(state, reg) (state.end[reg])
+
+/* Set the start of register 'reg'. If the state of the register needs
+ * saving, push it on the stack. */
+
+#define SET_REG_START(state, reg, text, on_error) \
+if(state.changed[reg] < state.level) \
+{ \
+   item_t *item; \
+   STACK_NEXT(state.stack, item, on_error); \
+   item->reg.num = reg; \
+   item->reg.start = state.start[reg]; \
+   item->reg.end = state.end[reg]; \
+   item->reg.level = state.changed[reg]; \
+   state.changed[reg] = state.level; \
+   state.count++; \
+} \
+state.start[reg] = text
+
+/* Set the end of register 'reg'. If the state of the register needs
+ * saving, push it on the stack. */
+
+#define SET_REG_END(state, reg, text, on_error) \
+if(state.changed[reg] < state.level) \
+{ \
+   item_t *item; \
+   STACK_NEXT(state.stack, item, on_error); \
+   item->reg.num = reg; \
+   item->reg.start = state.start[reg]; \
+   item->reg.end = state.end[reg]; \
+   item->reg.level = state.changed[reg]; \
+   state.changed[reg] = state.level; \
+   state.count++; \
+} \
+state.end[reg] = text
+
+#define PUSH_FAILURE(state, xcode, xtext, on_error) \
+{ \
+   item_t *item; \
+   STACK_NEXT(state.stack, item, on_error); \
+   item->fail.code = xcode; \
+   item->fail.text = xtext; \
+   item->fail.count = state.count; \
+   item->fail.level = state.level; \
+   item->fail.phantom = 0; \
+   state.count = 0; \
+   state.level++; \
+   state.point++; \
+}
+
+/* Update the last failure point with a new position in the text. */
+
+/* #define UPDATE_FAILURE(state, xtext, on_error) \ */
+/* { \ */
+/*    item_t *item; \ */
+/*    STACK_DISCARD(state.stack, state.count, on_error); \ */
+/*    STACK_TOP(state.stack, item, on_error); \ */
+/*    item->fail.text = xtext; \ */
+/*    state.count = 0; \ */
+/* } */
+
+/* #define UPDATE_FAILURE(state, xtext, on_error) \ */
+/* { \ */
+/*    item_t *item; \ */
+/*    STACK_BACK(state.stack, item, state.count + 1, on_error); \ */
+/*    item->fail.text = xtext; \ */
+/* } */
+
+#define UPDATE_FAILURE(state, xtext, on_error) \
+{ \
+   item_t *item; \
+   STACK_BACK(state.stack, item, state.count + 1, on_error); \
+   if (!item->fail.phantom) \
+   { \
+      item_t *item2; \
+      STACK_NEXT(state.stack, item2, on_error); \
+      item2->fail.code = item->fail.code; \
+      item2->fail.text = xtext; \
+      item2->fail.count = state.count; \
+      item2->fail.level = state.level; \
+      item2->fail.phantom = 1; \
+      state.count = 0; \
+      state.level++; \
+      state.point++; \
+   } \
+   else \
+   { \
+      STACK_DISCARD(state.stack, state.count, on_error); \
+      STACK_TOP(state.stack, item, on_error); \
+      item->fail.text = xtext; \
+      state.count = 0; \
+      state.level++; \
+   } \
+}
+
+#define POP_FAILURE(state, xcode, xtext, on_empty, on_error) \
+{ \
+   item_t *item; \
+   do \
+   { \
+      while(state.count > 0) \
+      { \
+         STACK_PREV(state.stack, item, on_error); \
+         state.start[item->reg.num] = item->reg.start; \
+         state.end[item->reg.num] = item->reg.end; \
+         state.changed[item->reg.num] = item->reg.level; \
+         state.count--; \
+      } \
+      STACK_PREV(state.stack, item, on_empty); \
+      xcode = item->fail.code; \
+      xtext = item->fail.text; \
+      state.count = item->fail.count; \
+      state.level = item->fail.level; \
+      state.point--; \
+   } \
+   while (item->fail.text == NULL); \
+}
  
  enum regexp_compiled_ops /* opcodes for compiled regexp */
  {
@@ -73,9 +390,6 @@ enum regexp_compiled_ops /* opcodes for compiled regexp */
    Cwordend,            /* match at end of word */
    Cwordbound,          /* match if at word boundary */
    Cnotwordbound,       /* match if not at word boundary */
-#ifdef emacs
-  Cemacs_at_dot,       /* emacs only: matches at dot */
-#endif /* emacs */
    Csyntaxspec,         /* matches syntax code (1 byte follows) */
    Cnotsyntaxspec       /* matches if syntax code does not match (1 byte foll)*/
  };
@@ -106,11 +420,6 @@ enum regexp_syntax_op      /* syntax codes for plain and quoted characters */
    Rwordend,            /* end of word */
    Rwordbound,          /* word bound */
    Rnotwordbound,       /* not word bound */
-#ifdef emacs
-  Remacs_at_dot,       /* emacs: at dot */
-  Remacs_syntaxspec,   /* syntaxspec */
-  Remacs_notsyntaxspec,        /* notsyntaxspec */
-#endif /* emacs */
    Rnum_ops
  };
  
@@ -126,99 +435,70 @@ static int regexp_ansi_sequences;
  #define NUM_LEVELS  5    /* number of precedence levels in use */
  #define MAX_NESTING 100  /* max nesting level of operators */
  
-#ifdef emacs
-
-/* This code is for emacs compatibility only. */
-
-#include "config.h"
-#include "lisp.h"
-#include "buffer.h"
-#include "syntax.h"
-
-/* emacs defines NULL in some strange way? */
-#undef NULL
-#define NULL 0
-
-#else /* emacs */
-
  #define SYNTAX(ch) re_syntax_table[(unsigned char)(ch)]
  #define Sword 1
  
-#ifdef SYNTAX_TABLE
-char *re_syntax_table;
-#else
  static char re_syntax_table[256];
-#endif /* SYNTAX_TABLE */
  
-#endif /* emacs */
-
-static void re_compile_initialize Py_PROTO((void));
-static void re_compile_initialize()
+static void re_compile_initialize(void)
  {
-  int a;
-  
-#if !defined(emacs) && !defined(SYNTAX_TABLE)
-  static int syntax_table_inited = 0;
+   int a;
    
-  if (!syntax_table_inited)
-    {
+   static int syntax_table_inited = 0;
+   
+   if (!syntax_table_inited)
+   {
        syntax_table_inited = 1;
        memset(re_syntax_table, 0, 256);
        for (a = 'a'; a <= 'z'; a++)
-       re_syntax_table[a] = Sword;
+        re_syntax_table[a] = Sword;
        for (a = 'A'; a <= 'Z'; a++)
-       re_syntax_table[a] = Sword;
+        re_syntax_table[a] = Sword;
        for (a = '0'; a <= '9'; a++)
-       re_syntax_table[a] = Sword;
-    }
-#endif /* !emacs && !SYNTAX_TABLE */
-  re_compile_initialized = 1;
-  for (a = 0; a < 256; a++)
-    {
+        re_syntax_table[a] = Sword;
+   }
+   re_compile_initialized = 1;
+   for (a = 0; a < 256; a++)
+   {
        regexp_plain_ops[a] = Rnormal;
        regexp_quoted_ops[a] = Rnormal;
-    }
-  for (a = '0'; a <= '9'; a++)
-    regexp_quoted_ops[a] = Rmemory;
-  regexp_plain_ops['\134'] = Rquote;
-  if (regexp_syntax & RE_NO_BK_PARENS)
-    {
+   }
+   for (a = '0'; a <= '9'; a++)
+      regexp_quoted_ops[a] = Rmemory;
+   regexp_plain_ops['\134'] = Rquote;
+   if (regexp_syntax & RE_NO_BK_PARENS)
+   {
        regexp_plain_ops['('] = Ropenpar;
        regexp_plain_ops[')'] = Rclosepar;
-    }
-  else
-    {
+   }
+   else
+   {
        regexp_quoted_ops['('] = Ropenpar;
        regexp_quoted_ops[')'] = Rclosepar;
-    }
-  if (regexp_syntax & RE_NO_BK_VBAR)
-    regexp_plain_ops['\174'] = Ror;
-  else
-    regexp_quoted_ops['\174'] = Ror;
-  regexp_plain_ops['*'] = Rstar;
-  if (regexp_syntax & RE_BK_PLUS_QM)
-    {
+   }
+   if (regexp_syntax & RE_NO_BK_VBAR)
+      regexp_plain_ops['\174'] = Ror;
+   else
+      regexp_quoted_ops['\174'] = Ror;
+   regexp_plain_ops['*'] = Rstar;
+   if (regexp_syntax & RE_BK_PLUS_QM)
+   {
        regexp_quoted_ops['+'] = Rplus;
        regexp_quoted_ops['?'] = Roptional;
-    }
-  else
-    {
+   }
+   else
+   {
        regexp_plain_ops['+'] = Rplus;
        regexp_plain_ops['?'] = Roptional;
-    }
-  if (regexp_syntax & RE_NEWLINE_OR)
-    regexp_plain_ops['\n'] = Ror;
-  regexp_plain_ops['\133'] = Ropenset;
-  regexp_plain_ops['\136'] = Rbol;
-  regexp_plain_ops['$'] = Reol;
-  regexp_plain_ops['.'] = Ranychar;
-  if (!(regexp_syntax & RE_NO_GNU_EXTENSIONS))
-    {
-#ifdef emacs
-      regexp_quoted_ops['='] = Remacs_at_dot;
-      regexp_quoted_ops['s'] = Remacs_syntaxspec;
-      regexp_quoted_ops['S'] = Remacs_notsyntaxspec;
-#endif /* emacs */
+   }
+   if (regexp_syntax & RE_NEWLINE_OR)
+      regexp_plain_ops['\n'] = Ror;
+   regexp_plain_ops['\133'] = Ropenset;
+   regexp_plain_ops['\136'] = Rbol;
+   regexp_plain_ops['$'] = Reol;
+   regexp_plain_ops['.'] = Ranychar;
+   if (!(regexp_syntax & RE_NO_GNU_EXTENSIONS))
+   {
        regexp_quoted_ops['w'] = Rwordchar;
        regexp_quoted_ops['W'] = Rnotwordchar;
        regexp_quoted_ops['<'] = Rwordbeg;
@@ -227,86 +507,469 @@ static void re_compile_initialize()
        regexp_quoted_ops['B'] = Rnotwordbound;
        regexp_quoted_ops['`'] = Rbegbuf;
        regexp_quoted_ops['\''] = Rendbuf;
-    }
-  if (regexp_syntax & RE_ANSI_HEX)
-    regexp_quoted_ops['v'] = Rextended_memory;
-  for (a = 0; a < Rnum_ops; a++)
-    regexp_precedences[a] = 4;
-  if (regexp_syntax & RE_TIGHT_VBAR)
-    {
+   }
+   if (regexp_syntax & RE_ANSI_HEX)
+      regexp_quoted_ops['v'] = Rextended_memory;
+   for (a = 0; a < Rnum_ops; a++)
+      regexp_precedences[a] = 4;
+   if (regexp_syntax & RE_TIGHT_VBAR)
+   {
        regexp_precedences[Ror] = 3;
        regexp_precedences[Rbol] = 2;
        regexp_precedences[Reol] = 2;
-    }
-  else
-    {
+   }
+   else
+   {
        regexp_precedences[Ror] = 2;
        regexp_precedences[Rbol] = 3;
        regexp_precedences[Reol] = 3;
-    }
-  regexp_precedences[Rclosepar] = 1;
-  regexp_precedences[Rend] = 0;
-  regexp_context_indep_ops = (regexp_syntax & RE_CONTEXT_INDEP_OPS) != 0;
-  regexp_ansi_sequences = (regexp_syntax & RE_ANSI_HEX) != 0;
+   }
+   regexp_precedences[Rclosepar] = 1;
+   regexp_precedences[Rend] = 0;
+   regexp_context_indep_ops = (regexp_syntax & RE_CONTEXT_INDEP_OPS) != 0;
+   regexp_ansi_sequences = (regexp_syntax & RE_ANSI_HEX) != 0;
  }
  
-int re_set_syntax(syntax)
-int syntax;
+int re_set_syntax(int syntax)
  {
-  int ret;
+   int ret;
+   
+   ret = regexp_syntax;
+   regexp_syntax = syntax;
+   re_syntax = syntax; /* Exported copy */
+   re_compile_initialize();
+   return ret;
+}
+
+static int hex_char_to_decimal(int ch)
+{
+   if (ch >= '0' && ch <= '9')
+      return ch - '0';
+   if (ch >= 'a' && ch <= 'f')
+      return ch - 'a' + 10;
+   if (ch >= 'A' && ch <= 'F')
+      return ch - 'A' + 10;
+   return 16;
+}
  
-  ret = regexp_syntax;
-  regexp_syntax = syntax;
-  re_syntax = syntax; /* Exported copy */
-  re_compile_initialize();
-  return ret;
+static void re_compile_fastmap_aux(char *code,
+                                  int pos,
+                                  char *visited,
+                                  char *can_be_null,
+                                  char *fastmap)
+{
+   int a;
+   int b;
+   int syntaxcode;
+   
+   if (visited[pos])
+      return;  /* we have already been here */
+   visited[pos] = 1;
+   for (;;)
+      switch (code[pos++])
+      {
+        case Cend:
+        {
+           *can_be_null = 1;
+           return;
+        }
+        case Cbol:
+        case Cbegbuf:
+        case Cendbuf:
+        case Cwordbeg:
+        case Cwordend:
+        case Cwordbound:
+        case Cnotwordbound:
+        {
+           break;
+        }
+        case Csyntaxspec:
+        {
+           syntaxcode = code[pos++];
+           for (a = 0; a < 256; a++)
+              if (SYNTAX(a) == syntaxcode)
+                 fastmap[a] = 1;
+           return;
+        }
+        case Cnotsyntaxspec:
+        {
+           syntaxcode = code[pos++];
+           for (a = 0; a < 256; a++)
+              if (SYNTAX(a) != syntaxcode)
+                 fastmap[a] = 1;
+           return;
+        }
+        case Ceol:
+        {
+           fastmap['\n'] = 1;
+           if (*can_be_null == 0)
+              *can_be_null = 2; /* can match null, but only at end of buffer*/
+           return;
+        }
+        case Cset:
+        {
+           for (a = 0; a < 256/8; a++)
+              if (code[pos + a] != 0)
+                 for (b = 0; b < 8; b++)
+                    if (code[pos + a] & (1 << b))
+                       fastmap[(a << 3) + b] = 1;
+           pos += 256/8;
+           return;
+        }
+        case Cexact:
+        {
+           fastmap[(unsigned char)code[pos]] = 1;
+           return;
+        }
+        case Canychar:
+        {
+           for (a = 0; a < 256; a++)
+              if (a != '\n')
+                 fastmap[a] = 1;
+           return;
+        }
+        case Cstart_memory:
+        case Cend_memory:
+        {
+           pos++;
+           break;
+        }
+        case Cmatch_memory:
+        {
+           for (a = 0; a < 256; a++)
+              fastmap[a] = 1;
+           *can_be_null = 1;
+           return;
+        }
+        case Cjump:
+        case Cdummy_failure_jump:
+        case Cupdate_failure_jump:
+        case Cstar_jump:
+        {
+           a = (unsigned char)code[pos++];
+           a |= (unsigned char)code[pos++] << 8;
+           pos += (int)(short)a;
+           if (visited[pos])
+           {
+              /* argh... the regexp contains empty loops.  This is not
+                 good, as this may cause a failure stack overflow when
+                 matching.  Oh well. */
+              /* this path leads nowhere; pursue other paths. */
+              return;
+           }
+           visited[pos] = 1;
+           break;
+        }
+        case Cfailure_jump:
+        {
+           a = (unsigned char)code[pos++];
+           a |= (unsigned char)code[pos++] << 8;
+           a = pos + (int)(short)a;
+           re_compile_fastmap_aux(code, a, visited, can_be_null, fastmap);
+           break;
+        }
+        default:
+        {
+           abort();  /* probably some opcode is missing from this switch */
+           /*NOTREACHED*/
+        }
+      }
  }
  
-static int hex_char_to_decimal Py_PROTO((int));
-static int hex_char_to_decimal(ch)
-int ch;
+static int re_do_compile_fastmap(char *buffer,
+                                int used,
+                                int pos,
+                                char *can_be_null,
+                                char *fastmap)
  {
-  if (ch >= '0' && ch <= '9')
-    return ch - '0';
-  if (ch >= 'a' && ch <= 'f')
-    return ch - 'a' + 10;
-  if (ch >= 'A' && ch <= 'F')
-    return ch - 'A' + 10;
-  return 16;
+   char small_visited[512], *visited;
+   
+   if (used <= sizeof(small_visited))
+      visited = small_visited;
+   else
+   {
+      visited = malloc(used);
+      if (!visited)
+        return 0;
+   }
+   *can_be_null = 0;
+   memset(fastmap, 0, 256);
+   memset(visited, 0, used);
+   re_compile_fastmap_aux(buffer, pos, visited, can_be_null, fastmap);
+   if (visited != small_visited)
+      free(visited);
+   return 1;
  }
  
-char *re_compile_pattern(regex, size, bufp)
-char *regex;
-int size;
-regexp_t bufp;
+void re_compile_fastmap(regexp_t bufp)
  {
-  int a, pos, op, current_level, level, opcode;
-  int pattern_offset = 0, alloc;
-  int starts[NUM_LEVELS * MAX_NESTING], starts_base;
-  int future_jumps[MAX_NESTING], num_jumps;
-  unsigned char ch = '\0';
-  char *pattern, *translate;
-  int next_register, paren_depth, num_open_registers, open_registers[RE_NREGS];
-  int beginning_context;
-
-#define NEXTCHAR(var)                  \
-  MACRO_BEGIN                          \
-    if (pos >= size)                   \
-      goto ends_prematurely;           \
-    (var) = regex[pos];                        \
-    pos++;                             \
-  MACRO_END
-
-#define ALLOC(amount)                          \
-  MACRO_BEGIN                                  \
-    if (pattern_offset+(amount) > alloc)       \
-      {                                                \
-       alloc += 256 + (amount);                \
-       pattern = realloc(pattern, alloc);      \
-       if (!pattern)                           \
-         goto out_of_memory;                   \
-      }                                                \
-  MACRO_END
+   if (!bufp->fastmap || bufp->fastmap_accurate)
+      return;
+   assert(bufp->used > 0);
+   if (!re_do_compile_fastmap(bufp->buffer,
+                             bufp->used,
+                             0,
+                             &bufp->can_be_null,
+                             bufp->fastmap))
+      return;
+   if (bufp->buffer[0] == Cbol)
+      bufp->anchor = 1;   /* begline */
+   else
+      if (bufp->buffer[0] == Cbegbuf)
+        bufp->anchor = 2; /* begbuf */
+      else
+        bufp->anchor = 0; /* none */
+   bufp->fastmap_accurate = 1;
+}
+
+/* 
+ * star is coded as:
+ * 1: failure_jump 2
+ *    ... code for operand of star
+ *    star_jump 1
+ * 2: ... code after star
+ *
+ * We change the star_jump to update_failure_jump if we can determine
+ * that it is safe to do so; otherwise we change it to an ordinary
+ * jump.
+ *
+ * plus is coded as
+ *
+ *    jump 2
+ * 1: failure_jump 3
+ * 2: ... code for operand of plus
+ *    star_jump 1
+ * 3: ... code after plus
+ *
+ * For star_jump considerations this is processed identically to star.
+ *
+ */
+
+static int re_optimize_star_jump(regexp_t bufp, char *code)
+{
+   char map[256];
+   char can_be_null;
+   char *p1;
+   char *p2;
+   char ch;
+   int a;
+   int b;
+
+   a = (unsigned char)*code++;
+   a |= (unsigned char)*code++ << 8;
+   a = (int)(short)a;
+
+   p1 = code + a + 3; /* skip the failure_jump */
+   assert(p1[-3] == Cfailure_jump);
+   p2 = code;
+   /* p1 points inside loop, p2 points to after loop */
+   if (!re_do_compile_fastmap(bufp->buffer, bufp->used,
+                             p2 - bufp->buffer, &can_be_null, map))
+      goto make_normal_jump;
+   
+   /* If we might introduce a new update point inside the
+    * loop, we can't optimize because then update_jump would
+    * update a wrong failure point.  Thus we have to be
+    * quite careful here.
+    */
+      
+   /* loop until we find something that consumes a character */
+  loop_p1:
+   switch (*p1++)
+   {
+      case Cbol:
+      case Ceol:
+      case Cbegbuf:
+      case Cendbuf:
+      case Cwordbeg:
+      case Cwordend:
+      case Cwordbound:
+      case Cnotwordbound:
+      {
+        goto loop_p1;
+      }
+      case Cstart_memory:
+      case Cend_memory:
+      {
+        p1++;
+        goto loop_p1;
+      }
+      case Cexact:
+      {
+        ch = (unsigned char)*p1++;
+        if (map[ch])
+           goto make_normal_jump;
+        break;
+      }
+      case Canychar:
+      {
+        for (b = 0; b < 256; b++)
+           if (b != '\n' && map[b])
+              goto make_normal_jump;
+        break;
+      }
+      case Cset:
+      {
+        for (b = 0; b < 256; b++)
+           if ((p1[b >> 3] & (1 << (b & 7))) && map[b])
+              goto make_normal_jump;
+        p1 += 256/8;
+        break;
+      }
+      default:
+      {
+        goto make_normal_jump;
+      }
+   }
+   /* now we know that we can't backtrack. */
+   while (p1 != p2 - 3)
+   {
+      switch (*p1++)
+      {
+        case Cend:
+        {
+           return 0;
+        }
+        case Cbol:
+        case Ceol:
+        case Canychar:
+        case Cbegbuf:
+        case Cendbuf:
+        case Cwordbeg:
+        case Cwordend:
+        case Cwordbound:
+        case Cnotwordbound:
+        {
+           break;
+        }
+        case Cset:
+        {
+           p1 += 256/8;
+           break;
+        }
+        case Cexact:
+        case Cstart_memory:
+        case Cend_memory:
+        case Cmatch_memory:
+        case Csyntaxspec:
+        case Cnotsyntaxspec:
+        {
+           p1++;
+           break;
+        }
+        case Cjump:
+        case Cstar_jump:
+        case Cfailure_jump:
+        case Cupdate_failure_jump:
+        case Cdummy_failure_jump:
+        {
+           goto make_normal_jump;
+        }
+        default:
+        {
+           return 0;
+           break;
+        }
+      }
+   }
+
+  make_update_jump:
+   code -= 3;
+   a += 3;  /* jump to after the Cfailure_jump */
+   code[0] = Cupdate_failure_jump;
+   code[1] = a & 0xff;
+   code[2] = a >> 8;
+   return 1;
+
+  make_normal_jump:
+   code -= 3;
+   *code = Cjump;
+   return 1;
+}
+
+static int re_optimize(regexp_t bufp)
+{
+   char *code;
+
+   code = bufp->buffer;
+
+   while(1)
+   {
+      switch (*code++)
+      {
+        case Cend:
+        {
+           return 1;
+        }
+        case Canychar:
+        case Cbol:
+        case Ceol:
+        case Cbegbuf:
+        case Cendbuf:
+        case Cwordbeg:
+        case Cwordend:
+        case Cwordbound:
+        case Cnotwordbound:
+        {
+           break;
+        }
+        case Cset:
+        {
+           code += 256/8;
+           break;
+        }
+        case Cexact:
+        case Cstart_memory:
+        case Cend_memory:
+        case Cmatch_memory:
+        case Csyntaxspec:
+        case Cnotsyntaxspec:
+        {
+           code++;
+           break;
+        }
+        case Cstar_jump:
+        {
+           if (!re_optimize_star_jump(bufp, code))
+           {
+              return 0;
+           }
+           /* fall through */
+        }
+        case Cupdate_failure_jump:
+        case Cjump:
+        case Cdummy_failure_jump:
+        case Cfailure_jump:
+        {
+           code += 2;
+           break;
+        }
+        default:
+        {
+           return 0;
+        }
+      }
+   }
+}
+
+#define NEXTCHAR(var) \
+{ \
+   if (pos >= size) \
+      goto ends_prematurely; \
+   (var) = regex[pos]; \
+   pos++; \
+}
+
+#define ALLOC(amount) \
+{ \
+   if (pattern_offset+(amount) > alloc) \
+   { \
+      alloc += 256 + (amount); \
+      pattern = realloc(pattern, alloc); \
+      if (!pattern) \
+        goto out_of_memory; \
+   } \
+}
  
  #define STORE(ch) pattern[pattern_offset++] = (ch)
  
@@ -314,450 +977,522 @@ regexp_t bufp;
  
  #define SET_LEVEL_START starts[starts_base + current_level] = pattern_offset
  
-#define PUSH_LEVEL_STARTS if (starts_base < (MAX_NESTING-1)*NUM_LEVELS) \
-                           starts_base += NUM_LEVELS;                  \
-                          else                                         \
-                           goto too_complex
+#define PUSH_LEVEL_STARTS \
+   if (starts_base < (MAX_NESTING-1)*NUM_LEVELS) \
+      starts_base += NUM_LEVELS; \
+   else \
+      goto too_complex
  
  #define POP_LEVEL_STARTS starts_base -= NUM_LEVELS
  
-#define PUT_ADDR(offset,addr)                          \
-  MACRO_BEGIN                                          \
-    int disp = (addr) - (offset) - 2;                  \
-    pattern[(offset)] = disp & 0xff;                   \
-    pattern[(offset)+1] = (disp>>8) & 0xff;            \
-  MACRO_END
-
-#define INSERT_JUMP(pos,type,addr)                     \
-  MACRO_BEGIN                                          \
-    int a, p = (pos), t = (type), ad = (addr);         \
-    for (a = pattern_offset - 1; a >= p; a--)          \
-      pattern[a + 3] = pattern[a];                     \
-    pattern[p] = t;                                    \
-    PUT_ADDR(p+1,ad);                                  \
-    pattern_offset += 3;                               \
-  MACRO_END
+#define PUT_ADDR(offset,addr) \
+{ \
+   int disp = (addr) - (offset) - 2; \
+   pattern[(offset)] = disp & 0xff; \
+   pattern[(offset)+1] = (disp>>8) & 0xff; \
+}
  
+#define INSERT_JUMP(pos,type,addr) \
+{ \
+   int a, p = (pos), t = (type), ad = (addr); \
+   for (a = pattern_offset - 1; a >= p; a--) \
+      pattern[a + 3] = pattern[a]; \
+   pattern[p] = t; \
+   PUT_ADDR(p+1,ad); \
+   pattern_offset += 3; \
+}
  #define SETBIT(buf,offset,bit) (buf)[(offset)+(bit)/8] |= (1<<((bit) & 7))
  
-#define SET_FIELDS                             \
-  MACRO_BEGIN                                  \
-    bufp->allocated = alloc;                   \
-    bufp->buffer = pattern;                    \
-    bufp->used = pattern_offset;               \
-  MACRO_END
+#define SET_FIELDS \
+{ \
+   bufp->allocated = alloc; \
+   bufp->buffer = pattern; \
+   bufp->used = pattern_offset; \
+}
      
-#define GETHEX(var)                                            \
-  MACRO_BEGIN                                                  \
-    char gethex_ch, gethex_value;                              \
-    NEXTCHAR(gethex_ch);                                       \
-    gethex_value = hex_char_to_decimal(gethex_ch);             \
-    if (gethex_value == 16)                                    \
-      goto hex_error;                                          \
-    NEXTCHAR(gethex_ch);                                       \
-    gethex_ch = hex_char_to_decimal(gethex_ch);                        \
-    if (gethex_ch == 16)                                       \
-      goto hex_error;                                          \
-    (var) = gethex_value * 16 + gethex_ch;                     \
-  MACRO_END
-
-#define ANSI_TRANSLATE(ch)                             \
-  MACRO_BEGIN                                          \
-    switch (ch)                                                \
-      {                                                        \
-      case 'a':                                                \
-      case 'A':                                                \
-       ch = 7; /* audible bell */                      \
-       break;                                          \
-      case 'b':                                                \
-      case 'B':                                                \
-       ch = 8; /* backspace */                         \
-       break;                                          \
-      case 'f':                                                \
-      case 'F':                                                \
-       ch = 12; /* form feed */                        \
-       break;                                          \
-      case 'n':                                                \
-      case 'N':                                                \
-       ch = 10; /* line feed */                        \
-       break;                                          \
-      case 'r':                                                \
-      case 'R':                                                \
-       ch = 13; /* carriage return */                  \
-       break;                                          \
-      case 't':                                                \
-      case 'T':                                                \
-       ch = 9; /* tab */                               \
-       break;                                          \
-      case 'v':                                                \
-      case 'V':                                                \
-       ch = 11; /* vertical tab */                     \
-       break;                                          \
-      case 'x': /* hex code */                         \
-      case 'X':                                                \
-       GETHEX(ch);                                     \
-       break;                                          \
-      default:                                         \
-       /* other characters passed through */           \
-       if (translate)                                  \
-         ch = translate[(unsigned char)ch];            \
-       break;                                          \
-      }                                                        \
-  MACRO_END
-
-  if (!re_compile_initialized)
-    re_compile_initialize();
-  bufp->used = 0;
-  bufp->fastmap_accurate = 0;
-  bufp->uses_registers = 0;
-  translate = bufp->translate;
-  pattern = bufp->buffer;
-  alloc = bufp->allocated;
-  if (alloc == 0 || pattern == NULL)
-    {
+#define GETHEX(var) \
+{ \
+   char gethex_ch, gethex_value; \
+   NEXTCHAR(gethex_ch); \
+   gethex_value = hex_char_to_decimal(gethex_ch); \
+   if (gethex_value == 16) \
+      goto hex_error; \
+   NEXTCHAR(gethex_ch); \
+   gethex_ch = hex_char_to_decimal(gethex_ch); \
+   if (gethex_ch == 16) \
+      goto hex_error; \
+   (var) = gethex_value * 16 + gethex_ch; \
+}
+
+#define ANSI_TRANSLATE(ch)  \
+{ \
+   switch (ch) \
+   { \
+      case 'a': \
+      case 'A': \
+      { \
+        ch = 7; /* audible bell */ \
+        break; \
+      } \
+      case 'b': \
+      case 'B': \
+      { \
+        ch = 8; /* backspace */ \
+        break; \
+      } \
+      case 'f': \
+      case 'F': \
+      { \
+        ch = 12; /* form feed */ \
+        break; \
+      } \
+      case 'n': \
+      case 'N': \
+      { \
+        ch = 10; /* line feed */ \
+        break; \
+      } \
+      case 'r': \
+      case 'R': \
+      { \
+        ch = 13; /* carriage return */ \
+        break; \
+      } \
+      case 't': \
+      case 'T': \
+      { \
+        ch = 9; /* tab */ \
+        break; \
+      } \
+      case 'v': \
+      case 'V': \
+      { \
+        ch = 11; /* vertical tab */ \
+        break; \
+      } \
+      case 'x': /* hex code */ \
+      case 'X': \
+      { \
+        GETHEX(ch); \
+        break; \
+      } \
+      default: \
+      { \
+        /* other characters passed through */ \
+        if (translate) \
+           ch = translate[(unsigned char)ch]; \
+        break; \
+      } \
+   } \
+}
+
+char *re_compile_pattern(char *regex, int size, regexp_t bufp)
+{
+   int a;
+   int pos;
+   int op;
+   int current_level;
+   int level;
+   int opcode;
+   int pattern_offset, alloc;
+   int starts[NUM_LEVELS * MAX_NESTING];
+   int starts_base;
+   int future_jumps[MAX_NESTING];
+   int num_jumps;
+   unsigned char ch;
+   char *pattern;
+   char *translate;
+   int next_register;
+   int paren_depth;
+   int num_open_registers;
+   int open_registers[RE_NREGS];
+   int beginning_context;
+
+   if (!re_compile_initialized)
+      re_compile_initialize();
+   bufp->used = 0;
+   bufp->fastmap_accurate = 0;
+   bufp->uses_registers = 0;
+   translate = bufp->translate;
+   pattern = bufp->buffer;
+   alloc = bufp->allocated;
+   if (alloc == 0 || pattern == NULL)
+   {
        alloc = 256;
        pattern = malloc(alloc);
        if (!pattern)
-       goto out_of_memory;
-    }
-  pattern_offset = 0;
-  starts_base = 0;
-  num_jumps = 0;
-  current_level = 0;
-  SET_LEVEL_START;
-  num_open_registers = 0;
-  next_register = 1;
-  paren_depth = 0;
-  beginning_context = 1;
-  op = -1;
-  /* we use Rend dummy to ensure that pending jumps are updated (due to
-     low priority of Rend) before exiting the loop. */
-  pos = 0;
-  while (op != Rend)
-    {
+        goto out_of_memory;
+   }
+   pattern_offset = 0;
+   starts_base = 0;
+   num_jumps = 0;
+   current_level = 0;
+   SET_LEVEL_START;
+   num_open_registers = 0;
+   next_register = 1;
+   paren_depth = 0;
+   beginning_context = 1;
+   op = -1;
+   /* we use Rend dummy to ensure that pending jumps are updated (due to
+      low priority of Rend) before exiting the loop. */
+   pos = 0;
+   while (op != Rend)
+   {
        if (pos >= size)
-       op = Rend;
+        op = Rend;
        else
-       {
-         NEXTCHAR(ch);
-         if (translate)
+      {
+        NEXTCHAR(ch);
+        if (translate)
             ch = translate[(unsigned char)ch];
-         op = regexp_plain_ops[(unsigned char)ch];
-         if (op == Rquote)
-           {
-             NEXTCHAR(ch);
-             op = regexp_quoted_ops[(unsigned char)ch];
-             if (op == Rnormal && regexp_ansi_sequences)
-               ANSI_TRANSLATE(ch);
-           }
-       }
+        op = regexp_plain_ops[(unsigned char)ch];
+        if (op == Rquote)
+        {
+           NEXTCHAR(ch);
+           op = regexp_quoted_ops[(unsigned char)ch];
+           if (op == Rnormal && regexp_ansi_sequences)
+              ANSI_TRANSLATE(ch);
+        }
+      }
        level = regexp_precedences[op];
        /* printf("ch='%c' op=%d level=%d current_level=%d curlevstart=%d\n",
-            ch, op, level, current_level, CURRENT_LEVEL_START); */
+        ch, op, level, current_level, CURRENT_LEVEL_START); */
        if (level > current_level)
-       {
-         for (current_level++; current_level < level; current_level++)
+      {
+        for (current_level++; current_level < level; current_level++)
             SET_LEVEL_START;
-         SET_LEVEL_START;
-       }
+        SET_LEVEL_START;
+      }
        else
-       if (level < current_level)
-         {
+        if (level < current_level)
+        {
             current_level = level;
             for (;num_jumps > 0 &&
-                future_jumps[num_jumps-1] >= CURRENT_LEVEL_START;
+                   future_jumps[num_jumps-1] >= CURRENT_LEVEL_START;
                  num_jumps--)
-             PUT_ADDR(future_jumps[num_jumps-1], pattern_offset);
-         }
+              PUT_ADDR(future_jumps[num_jumps-1], pattern_offset);
+        }
        switch (op)
-       {
-       case Rend:
-         break;
-       case Rnormal:
-       normal_char:
-         opcode = Cexact;
-       store_opcode_and_arg: /* opcode & ch must be set */
-         SET_LEVEL_START;
-         ALLOC(2);
-         STORE(opcode);
-         STORE(ch);
-         break;
-       case Ranychar:
-         opcode = Canychar;
-       store_opcode:
-         SET_LEVEL_START;
-         ALLOC(1);
-         STORE(opcode);
-         break;
-       case Rquote:
-         abort();
-         /*NOTREACHED*/
-       case Rbol:
-         if (!beginning_context)
-           if (regexp_context_indep_ops)
-             goto op_error;
-           else
-             goto normal_char;
-         opcode = Cbol;
-         goto store_opcode;
-       case Reol:
-         if (!((pos >= size) ||
-               ((regexp_syntax & RE_NO_BK_VBAR) ?
-                (regex[pos] == '\174') :
-                (pos+1 < size && regex[pos] == '\134' &&
-                 regex[pos+1] == '\174')) ||
-               ((regexp_syntax & RE_NO_BK_PARENS)?
-                (regex[pos] == ')'):
-                (pos+1 < size && regex[pos] == '\134' &&
-                 regex[pos+1] == ')'))))
-           if (regexp_context_indep_ops)
-             goto op_error;
-           else
-             goto normal_char;
-         opcode = Ceol;
-         goto store_opcode;
-         /* NOTREACHED */
-         break;
-       case Roptional:
-         if (beginning_context)
-           if (regexp_context_indep_ops)
-             goto op_error;
-           else
-             goto normal_char;
-         if (CURRENT_LEVEL_START == pattern_offset)
-           break; /* ignore empty patterns for ? */
-         ALLOC(3);
-         INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump,
-                     pattern_offset + 3);
-         break;
-       case Rstar:
-       case Rplus:
-         if (beginning_context)
-           if (regexp_context_indep_ops)
-             goto op_error;
-           else
-             goto normal_char;
-         if (CURRENT_LEVEL_START == pattern_offset)
-           break; /* ignore empty patterns for + and * */
-         ALLOC(9);
-         INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump,
-                     pattern_offset + 6);
-         INSERT_JUMP(pattern_offset, Cstar_jump, CURRENT_LEVEL_START);
-         if (op == Rplus)  /* jump over initial failure_jump */
-           INSERT_JUMP(CURRENT_LEVEL_START, Cdummy_failure_jump,
-                       CURRENT_LEVEL_START + 6);
-         break;
-       case Ror:
-         ALLOC(6);
-         INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump,
-                     pattern_offset + 6);
-         if (num_jumps >= MAX_NESTING)
-           goto too_complex;
-         STORE(Cjump);
-         future_jumps[num_jumps++] = pattern_offset;
-         STORE(0);
-         STORE(0);
-         SET_LEVEL_START;
-         break;
-       case Ropenpar:
-         SET_LEVEL_START;
-         if (next_register < RE_NREGS)
+      {
+        case Rend:
+        {
+           break;
+        }
+        case Rnormal:
+        {
+          normal_char:
+           opcode = Cexact;
+          store_opcode_and_arg: /* opcode & ch must be set */
+           SET_LEVEL_START;
+           ALLOC(2);
+           STORE(opcode);
+           STORE(ch);
+           break;
+        }
+        case Ranychar:
+        {
+           opcode = Canychar;
+          store_opcode:
+           SET_LEVEL_START;
+           ALLOC(1);
+           STORE(opcode);
+           break;
+        }
+        case Rquote:
+        {
+           abort();
+           /*NOTREACHED*/
+        }
+        case Rbol:
+        {
+           if (!beginning_context)
+              if (regexp_context_indep_ops)
+                 goto op_error;
+              else
+                 goto normal_char;
+           opcode = Cbol;
+           goto store_opcode;
+        }
+        case Reol:
+        {
+           if (!((pos >= size) ||
+                 ((regexp_syntax & RE_NO_BK_VBAR) ?
+                  (regex[pos] == '\174') :
+                  (pos+1 < size && regex[pos] == '\134' &&
+                   regex[pos+1] == '\174')) ||
+                 ((regexp_syntax & RE_NO_BK_PARENS)?
+                  (regex[pos] == ')'):
+                  (pos+1 < size && regex[pos] == '\134' &&
+                   regex[pos+1] == ')'))))
+              if (regexp_context_indep_ops)
+                 goto op_error;
+              else
+                 goto normal_char;
+           opcode = Ceol;
+           goto store_opcode;
+           /* NOTREACHED */
+           break;
+        }
+        case Roptional:
+        {
+           if (beginning_context)
+              if (regexp_context_indep_ops)
+                 goto op_error;
+              else
+                 goto normal_char;
+           if (CURRENT_LEVEL_START == pattern_offset)
+              break; /* ignore empty patterns for ? */
+           ALLOC(3);
+           INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump,
+                       pattern_offset + 3);
+           break;
+        }
+        case Rstar:
+        case Rplus:
+        {
+           if (beginning_context)
+              if (regexp_context_indep_ops)
+                 goto op_error;
+              else
+                 goto normal_char;
+           if (CURRENT_LEVEL_START == pattern_offset)
+              break; /* ignore empty patterns for + and * */
+           ALLOC(9);
+           INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump,
+                       pattern_offset + 6);
+           INSERT_JUMP(pattern_offset, Cstar_jump, CURRENT_LEVEL_START);
+           if (op == Rplus)  /* jump over initial failure_jump */
+              INSERT_JUMP(CURRENT_LEVEL_START, Cdummy_failure_jump,
+                          CURRENT_LEVEL_START + 6);
+           break;
+        }
+        case Ror:
+        {
+           ALLOC(6);
+           INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump,
+                       pattern_offset + 6);
+           if (num_jumps >= MAX_NESTING)
+              goto too_complex;
+           STORE(Cjump);
+           future_jumps[num_jumps++] = pattern_offset;
+           STORE(0);
+           STORE(0);
+           SET_LEVEL_START;
+           break;
+        }
+        case Ropenpar:
+        {
+           SET_LEVEL_START;
+           if (next_register < RE_NREGS)
             {
-             bufp->uses_registers = 1;
-             ALLOC(2);
-             STORE(Cstart_memory);
-             STORE(next_register);
-             open_registers[num_open_registers++] = next_register;
-             next_register++;
+              bufp->uses_registers = 1;
+              ALLOC(2);
+              STORE(Cstart_memory);
+              STORE(next_register);
+              open_registers[num_open_registers++] = next_register;
+              next_register++;
             }
-         paren_depth++;
-         PUSH_LEVEL_STARTS;
-         current_level = 0;
-         SET_LEVEL_START;
-         break;
-       case Rclosepar:
-         if (paren_depth <= 0)
-           goto parenthesis_error;
-         POP_LEVEL_STARTS;
-         current_level = regexp_precedences[Ropenpar];
-         paren_depth--;
-         if (paren_depth < num_open_registers)
+           paren_depth++;
+           PUSH_LEVEL_STARTS;
+           current_level = 0;
+           SET_LEVEL_START;
+           break;
+        }
+        case Rclosepar:
+        {
+           if (paren_depth <= 0)
+              goto parenthesis_error;
+           POP_LEVEL_STARTS;
+           current_level = regexp_precedences[Ropenpar];
+           paren_depth--;
+           if (paren_depth < num_open_registers)
             {
-             bufp->uses_registers = 1;
-             ALLOC(2);
-             STORE(Cend_memory);
-             num_open_registers--;
-             STORE(open_registers[num_open_registers]);
+              bufp->uses_registers = 1;
+              ALLOC(2);
+              STORE(Cend_memory);
+              num_open_registers--;
+              STORE(open_registers[num_open_registers]);
             }
-         break;
-       case Rmemory:
-         if (ch == '0')
-           goto bad_match_register;
-         assert(ch >= '0' && ch <= '9');
-         bufp->uses_registers = 1;
-         opcode = Cmatch_memory;
-         ch -= '0';
-         goto store_opcode_and_arg;
-       case Rextended_memory:
-         NEXTCHAR(ch);
-         if (ch < '0' || ch > '9')
-           goto bad_match_register;
-         NEXTCHAR(a);
-         if (a < '0' || a > '9')
-           goto bad_match_register;
-         ch = 10 * (a - '0') + ch - '0';
-         if (ch <= 0 || ch >= RE_NREGS)
-           goto bad_match_register;
-         bufp->uses_registers = 1;
-         opcode = Cmatch_memory;
-         goto store_opcode_and_arg;
-       case Ropenset:
-         {
-           int complement,prev,offset,range,firstchar;
+           break;
+        }
+        case Rmemory:
+        {
+           if (ch == '0')
+              goto bad_match_register;
+           assert(ch >= '0' && ch <= '9');
+           bufp->uses_registers = 1;
+           opcode = Cmatch_memory;
+           ch -= '0';
+           goto store_opcode_and_arg;
+        }
+        case Rextended_memory:
+        {
+           NEXTCHAR(ch);
+           if (ch < '0' || ch > '9')
+              goto bad_match_register;
+           NEXTCHAR(a);
+           if (a < '0' || a > '9')
+              goto bad_match_register;
+           ch = 10 * (a - '0') + ch - '0';
+           if (ch <= 0 || ch >= RE_NREGS)
+              goto bad_match_register;
+           bufp->uses_registers = 1;
+           opcode = Cmatch_memory;
+           goto store_opcode_and_arg;
+        }
+        case Ropenset:
+        {
+           int complement;
+           int prev;
+           int offset;
+           int range;
+           int firstchar;
             
             SET_LEVEL_START;
             ALLOC(1+256/8);
             STORE(Cset);
             offset = pattern_offset;
             for (a = 0; a < 256/8; a++)
-             STORE(0);
+              STORE(0);
             NEXTCHAR(ch);
             if (translate)
-             ch = translate[(unsigned char)ch];
+              ch = translate[(unsigned char)ch];
             if (ch == '\136')
-             {
-               complement = 1;
-               NEXTCHAR(ch);
-               if (translate)
+           {
+              complement = 1;
+              NEXTCHAR(ch);
+              if (translate)
                   ch = translate[(unsigned char)ch];
-             }
+           }
             else
-             complement = 0;
+              complement = 0;
             prev = -1;
             range = 0;
             firstchar = 1;
             while (ch != '\135' || firstchar)
-             {
-               firstchar = 0;
-               if (regexp_ansi_sequences && ch == '\134')
-                 {
-                   NEXTCHAR(ch);
-                   ANSI_TRANSLATE(ch);
-                 }
-               if (range)
-                 {
-                   for (a = prev; a <= (int)ch; a++)
-                     SETBIT(pattern, offset, a);
-                   prev = -1;
-                   range = 0;
-                 }
-               else
+           {
+              firstchar = 0;
+              if (regexp_ansi_sequences && ch == '\134')
+              {
+                 NEXTCHAR(ch);
+                 ANSI_TRANSLATE(ch);
+              }
+              if (range)
+              {
+                 for (a = prev; a <= (int)ch; a++)
+                    SETBIT(pattern, offset, a);
+                 prev = -1;
+                 range = 0;
+              }
+              else
                   if (prev != -1 && ch == '-')
-                   range = 1;
+                    range = 1;
                   else
-                   {
-                     SETBIT(pattern, offset, ch);
-                     prev = ch;
-                   }
-               NEXTCHAR(ch);
-               if (translate)
+                 {
+                    SETBIT(pattern, offset, ch);
+                    prev = ch;
+                 }
+              NEXTCHAR(ch);
+              if (translate)
                   ch = translate[(unsigned char)ch];
-             }
+           }
             if (range)
-             SETBIT(pattern, offset, '-');
+              SETBIT(pattern, offset, '-');
             if (complement)
-             {
-               for (a = 0; a < 256/8; a++)
+           {
+              for (a = 0; a < 256/8; a++)
                   pattern[offset+a] ^= 0xff;
-             }
+           }
             break;
-         }
-       case Rbegbuf:
-         opcode = Cbegbuf;
-         goto store_opcode;
-       case Rendbuf:
-         opcode = Cendbuf;
-         goto store_opcode;
-       case Rwordchar:
-         opcode = Csyntaxspec;
-         ch = Sword;
-         goto store_opcode_and_arg;
-       case Rnotwordchar:
-         opcode = Cnotsyntaxspec;
-         ch = Sword;
-         goto store_opcode_and_arg;
-       case Rwordbeg:
-         opcode = Cwordbeg;
-         goto store_opcode;
-       case Rwordend:
-         opcode = Cwordend;
-         goto store_opcode;
-       case Rwordbound:
-         opcode = Cwordbound;
-         goto store_opcode;
-       case Rnotwordbound:
-         opcode = Cnotwordbound;
-         goto store_opcode;
-#ifdef emacs
-       case Remacs_at_dot:
-         opcode = Cemacs_at_dot;
-         goto store_opcode;
-       case Remacs_syntaxspec:
-         NEXTCHAR(ch);
-         if (translate)
-           ch = translate[(unsigned char)ch];
-         opcode = Csyntaxspec;
-         ch = syntax_spec_code[(unsigned char)ch];
-         goto store_opcode_and_arg;
-       case Remacs_notsyntaxspec:
-         NEXTCHAR(ch);
-         if (translate)
-           ch = translate[(unsigned char)ch];
-         opcode = Cnotsyntaxspec;
-         ch = syntax_spec_code[(unsigned char)ch];
-         goto store_opcode_and_arg;
-#endif /* emacs */
-       default:
-         abort();
-       }
+        }
+        case Rbegbuf:
+        {
+           opcode = Cbegbuf;
+           goto store_opcode;
+        }
+        case Rendbuf:
+        {
+           opcode = Cendbuf;
+           goto store_opcode;
+        }
+        case Rwordchar:
+        {
+           opcode = Csyntaxspec;
+           ch = Sword;
+           goto store_opcode_and_arg;
+        }
+        case Rnotwordchar:
+        {
+           opcode = Cnotsyntaxspec;
+           ch = Sword;
+           goto store_opcode_and_arg;
+        }
+        case Rwordbeg:
+        {
+           opcode = Cwordbeg;
+           goto store_opcode;
+        }
+        case Rwordend:
+        {
+           opcode = Cwordend;
+           goto store_opcode;
+        }
+        case Rwordbound:
+        {
+           opcode = Cwordbound;
+           goto store_opcode;
+        }
+        case Rnotwordbound:
+        {
+           opcode = Cnotwordbound;
+           goto store_opcode;
+        }
+        default:
+        {
+           abort();
+        }
+      }
        beginning_context = (op == Ropenpar || op == Ror);
-    }
-  if (starts_base != 0)
-    goto parenthesis_error;
-  assert(num_jumps == 0);
-  ALLOC(1);
-  STORE(Cend);
-  SET_FIELDS;
-  return NULL;
-
- op_error:
-  SET_FIELDS;
-  return "Badly placed special character";
-
- bad_match_register:
-  SET_FIELDS;
-  return "Bad match register number";
-
- hex_error:
-  SET_FIELDS;
-  return "Bad hexadecimal number";
-
- parenthesis_error:
-  SET_FIELDS;
-  return "Badly placed parenthesis";
-
- out_of_memory:
-  SET_FIELDS;
-  return "Out of memory";
-
- ends_prematurely:
-  SET_FIELDS;
-  return "Regular expression ends prematurely";
-
- too_complex:
-  SET_FIELDS;
-  return "Regular expression too complex";
+   }
+   if (starts_base != 0)
+      goto parenthesis_error;
+   assert(num_jumps == 0);
+   ALLOC(1);
+   STORE(Cend);
+   SET_FIELDS;
+   if(!re_optimize(bufp))
+      return "Optimization error";
+   return NULL;
+
+  op_error:
+   SET_FIELDS;
+   return "Badly placed special character";
+
+  bad_match_register:
+   SET_FIELDS;
+   return "Bad match register number";
+   
+  hex_error:
+   SET_FIELDS;
+   return "Bad hexadecimal number";
+   
+  parenthesis_error:
+   SET_FIELDS;
+   return "Badly placed parenthesis";
+   
+  out_of_memory:
+   SET_FIELDS;
+   return "Out of memory";
+   
+  ends_prematurely:
+   SET_FIELDS;
+   return "Regular expression ends prematurely";
+
+  too_complex:
+   SET_FIELDS;
+   return "Regular expression too complex";
  }
+
  #undef CHARAT
  #undef NEXTCHAR
  #undef GETHEX
@@ -772,933 +1507,423 @@ regexp_t bufp;
  #undef SETBIT
  #undef SET_FIELDS
  
-static void re_compile_fastmap_aux
-       Py_PROTO((char *, int, char *, char *, char *));
-static void re_compile_fastmap_aux(code, pos, visited, can_be_null, fastmap)
-char *code, *visited, *can_be_null, *fastmap;
-int pos;
-{
-  int a, b, syntaxcode;
+#define PREFETCH if (text == textend) goto fail
  
-  if (visited[pos])
-    return;  /* we have already been here */
-  visited[pos] = 1;
-  for (;;)
-    switch (code[pos++])
-      {
-      case Cend:
-       *can_be_null = 1;
-       return;
-      case Cbol:
-      case Cbegbuf:
-      case Cendbuf:
-      case Cwordbeg:
-      case Cwordend:
-      case Cwordbound:
-      case Cnotwordbound:
-#ifdef emacs
-      case Cemacs_at_dot:
-#endif /* emacs */
-       break;
-      case Csyntaxspec:
-       syntaxcode = code[pos++];
-       for (a = 0; a < 256; a++)
-         if (SYNTAX(a) == syntaxcode)
-           fastmap[a] = 1;
-       return;
-      case Cnotsyntaxspec:
-       syntaxcode = code[pos++];
-       for (a = 0; a < 256; a++)
-         if (SYNTAX(a) != syntaxcode)
-           fastmap[a] = 1;
-       return;
-      case Ceol:
-       fastmap['\n'] = 1;
-       if (*can_be_null == 0)
-         *can_be_null = 2;  /* can match null, but only at end of buffer*/
-       return;
-      case Cset:
-       for (a = 0; a < 256/8; a++)
-         if (code[pos + a] != 0)
-           for (b = 0; b < 8; b++)
-             if (code[pos + a] & (1 << b))
-               fastmap[(a << 3) + b] = 1;
-       pos += 256/8;
-       return;
-      case Cexact:
-       fastmap[(unsigned char)code[pos]] = 1;
-       return;
-      case Canychar:
-       for (a = 0; a < 256; a++)
-         if (a != '\n')
-           fastmap[a] = 1;
-       return;
-      case Cstart_memory:
-      case Cend_memory:
-       pos++;
-       break;
-      case Cmatch_memory:
-       /* should this ever happen for sensible patterns??? */
-       *can_be_null = 1;
-       return;
-      case Cjump:
-      case Cdummy_failure_jump:
-      case Cupdate_failure_jump:
-      case Cstar_jump:
-       a = (unsigned char)code[pos++];
-       a |= (unsigned char)code[pos++] << 8;
-       pos += (int)(short)a;
-       if (visited[pos])
-         {
-           /* argh... the regexp contains empty loops.  This is not
-              good, as this may cause a failure stack overflow when
-              matching.  Oh well. */
-           /* this path leads nowhere; pursue other paths. */
-           return;
-         }
-       visited[pos] = 1;
-       break;
-      case Cfailure_jump:
-       a = (unsigned char)code[pos++];
-       a |= (unsigned char)code[pos++] << 8;
-       a = pos + (int)(short)a;
-       re_compile_fastmap_aux(code, a, visited, can_be_null, fastmap);
-       break;
-      default:
-       abort();  /* probably some opcode is missing from this switch */
-       /*NOTREACHED*/
-      }
-}
+#define NEXTCHAR(var) \
+PREFETCH; \
+var = (unsigned char)*text++; \
+if (translate) \
+   var = translate[var]
  
-static int re_do_compile_fastmap Py_PROTO((char *, int, int, char *, char *));
-static int re_do_compile_fastmap(buffer, used, pos, can_be_null, fastmap)
-char *buffer, *fastmap, *can_be_null;
-int used, pos;
+int re_match(regexp_t bufp,
+            char *string,
+            int size,
+            int pos,
+            regexp_registers_t old_regs)
  {
-  char small_visited[512], *visited;
-
-  if (used <= sizeof(small_visited))
-    visited = small_visited;
-  else
-    {
-      visited = malloc(used);
-      if (!visited)
-       return 0;
-    }
-  *can_be_null = 0;
-  memset(fastmap, 0, 256);
-  memset(visited, 0, used);
-  re_compile_fastmap_aux(buffer, pos, visited, can_be_null, fastmap);
-  if (visited != small_visited)
-    free(visited);
-  return 1;
-}
-
-void re_compile_fastmap(bufp)
-regexp_t bufp;
-{
-  if (!bufp->fastmap || bufp->fastmap_accurate)
-    return;
-  assert(bufp->used > 0);
-  if (!re_do_compile_fastmap(bufp->buffer, bufp->used, 0, &bufp->can_be_null,
-                            bufp->fastmap))
-    return;
-  if (bufp->buffer[0] == Cbol)
-    bufp->anchor = 1;   /* begline */
-  else
-    if (bufp->buffer[0] == Cbegbuf)
-      bufp->anchor = 2; /* begbuf */
-    else
-      bufp->anchor = 0; /* none */
-  bufp->fastmap_accurate = 1;
-}
-
-#define INITIAL_FAILURES  128  /* initial # failure points to allocate */
-#define MAX_FAILURES     4100L /* max # of failure points before failing */
-
-int re_match_2(bufp, string1, size1, string2, size2, pos, regs, mstop)
-regexp_t bufp;
-char *string1, *string2;
-int size1, size2, pos, mstop;
-regexp_registers_t regs;
-{
-  struct failure_point { char *text, *partend, *code; }
-    *failure_stack_start, *failure_sp, *failure_stack_end,
-    initial_failure_stack[INITIAL_FAILURES];
-  char *code, *translate, *text, *textend, *partend, *part_2_end;
-  char *regstart_text[RE_NREGS], *regstart_partend[RE_NREGS];
-  char *regend_text[RE_NREGS], *regend_partend[RE_NREGS];
-  int a, b, ch, reg, regch, match_end;
-  char *regtext, *regpartend, *regtextend;
-
-#define PREFETCH                                       \
-  MACRO_BEGIN                                          \
-    if (text == partend)                               \
-      {                                                        \
-       if (text == textend)                            \
-         goto fail;                                    \
-       text = string2;                                 \
-       partend = part_2_end;                           \
-      }                                                        \
-  MACRO_END
-
-#define NEXTCHAR(var)                          \
-  MACRO_BEGIN                                  \
-    PREFETCH;                                  \
-    (var) = (unsigned char)*text++;            \
-    if (translate)                             \
-      (var) = (unsigned char)translate[(var)]; \
-  MACRO_END
-
-  assert(pos >= 0 && size1 >= 0 && size2 >= 0 && mstop >= 0);
-  assert(mstop <= size1 + size2);
-  assert(pos <= mstop);
-
-  if (pos <= size1)
-    {
-      text = string1 + pos;
-      if (mstop <= size1)
-       {
-         partend = string1 + mstop;
-         textend = partend;
-       }
-      else
-       {
-         partend = string1 + size1;
-         textend = string2 + mstop - size1;
-       }
-      part_2_end = string2 + mstop - size1;
-    }
-  else
-    {
-      text = string2 + pos - size1;
-      partend = string2 + mstop - size1;
-      textend = partend;
-      part_2_end = partend;
-    }
-
-  if (bufp->uses_registers && regs != NULL)
-    for (a = 0; a < RE_NREGS; a++)
-      regend_text[a] = NULL;
-
+  char *code;
+  char *translate;
+  char *text;
+  char *textstart;
+  char *textend;
+  int a;
+  int b;
+  int ch;
+  int reg;
+  int match_end;
+  char *regstart;
+  char *regend;
+  int regsize;
+  match_state state;
+  
+  assert(pos >= 0 && size >= 0);
+  assert(pos <= size);
+  
+  text = string + pos;
+  textstart = string;
+  textend = string + size;
+  
    code = bufp->buffer;
+  
    translate = bufp->translate;
-  failure_stack_start = failure_sp = initial_failure_stack;
-  failure_stack_end = initial_failure_stack + INITIAL_FAILURES;
-
-#if 0
-  /* re_search_2 has already done this, and otherwise we get little benefit
-     from this.  So I'll leave this out. */
-  if (bufp->fastmap_accurate && !bufp->can_be_null &&
-      text != textend &&
-      !bufp->fastmap[translate ?
-                    (unsigned char)translate[(unsigned char)*text] :
-                    (unsigned char)*text])
-    return -1;  /* it can't possibly match */
-#endif
-
- continue_matching:
-  for (;;)
-    {
-      switch (*code++)
+/*   translated = NULL; */
+/*   if (bufp->translate) */
+/*   { */
+/*      char *t1; */
+/*      char *t2; */
+     
+/*      translated = malloc(size); */
+/*      if (translated == NULL) */
+/*     goto error; */
+
+/*      t1 = string; */
+/*      t2 = translated; */
+/*      while(t1 < textend) */
+/*     *t2++ = bufp->translate[*t1++]; */
+     
+/*      text = translated + pos; */
+/*      textstart = translated; */
+/*      textend = translated + size; */
+/*   } */
+  
+  NEW_STATE(state);
+  
+  continue_matching:
+  switch (*code++)
+  {
+     case Cend:
+     {
+       match_end = text - textstart;
+       if (old_regs)
         {
-       case Cend:
-         if (partend != part_2_end)
-           match_end = text - string1;
-         else
-           match_end = text - string2 + size1;
-         if (regs)
-           {
-             regs->start[0] = pos;
-             regs->end[0] = match_end;
-             if (!bufp->uses_registers)
-               {
-                 for (a = 1; a < RE_NREGS; a++)
-                   {
-                     regs->start[a] = -1;
-                     regs->end[a] = -1;
-                   }
-               }
-             else
-               {
-                 for (a = 1; a < RE_NREGS; a++)
-                   {
-                     if (regend_text[a] == NULL)
-                       {
-                         regs->start[a] = -1;
-                         regs->end[a] = -1;
-                         continue;
-                       }
-                     if (regstart_partend[a] != part_2_end)
-                       regs->start[a] = regstart_text[a] - string1;
-                     else
-                       regs->start[a] = regstart_text[a] - string2 + size1;
-                     if (regend_partend[a] != part_2_end)
-                       regs->end[a] = regend_text[a] - string1;
-                     else
-                       regs->end[a] = regend_text[a] - string2 + size1;
-                   }
-               }
-           }
-         if (failure_stack_start != initial_failure_stack)
-           free((char *)failure_stack_start);
-         return match_end - pos;
-       case Cbol:
-         if (text == string1 || text[-1] == '\n') /* text[-1] always valid */
-           break;
-         goto fail;
-       case Ceol:
-         if (text == string2 + size2 ||
-             (text == string1 + size1 ?
-              (size2 == 0 || *string2 == '\n') :
-              *text == '\n'))
-           break;
-         goto fail;
-       case Cset:
-         NEXTCHAR(ch);
-         if (code[ch/8] & (1<<(ch & 7)))
-           {
-             code += 256/8;
-             break;
-           }
-         goto fail;
-       case Cexact:
-         NEXTCHAR(ch);
-         if (ch != (unsigned char)*code++)
-           goto fail;
-         break;
-       case Canychar:
-         NEXTCHAR(ch);
-         if (ch == '\n')
-           goto fail;
-         break;
-       case Cstart_memory:
-         reg = *code++;
-         regstart_text[reg] = text;
-         regstart_partend[reg] = partend;
-         break;
-       case Cend_memory:
-         reg = *code++;
-         regend_text[reg] = text;
-         regend_partend[reg] = partend;
-         break;
-       case Cmatch_memory:
-         reg = *code++;
-         if (regend_text[reg] == NULL)
-           goto fail;  /* or should we just match nothing? */
-         regtext = regstart_text[reg];
-         regtextend = regend_text[reg];
-         if (regstart_partend[reg] == regend_partend[reg])
-           regpartend = regtextend;
-         else
-           regpartend = string1 + size1;
-         
-         for (;regtext != regtextend;)
-           {
-             NEXTCHAR(ch);
-             if (regtext == regpartend)
-               regtext = string2;
-             regch = (unsigned char)*regtext++;
-             if (translate)
-               regch = (unsigned char)translate[regch];
-             if (regch != ch)
-               goto fail;
-           }
-         break;
-       case Cstar_jump:
-         /* star is coded as:
-              1: failure_jump 2
-                 ... code for operand of star
-                 star_jump 1
-              2: ... code after star
-            We change the star_jump to update_failure_jump if we can determine
-            that it is safe to do so; otherwise we change it to an ordinary
-            jump.
-            plus is coded as
-                 jump 2
-              1: failure_jump 3
-              2: ... code for operand of plus
-                 star_jump 1
-              3: ... code after plus
-            For star_jump considerations this is processed identically
-            to star. */
-         a = (unsigned char)*code++;
-         a |= (unsigned char)*code++ << 8;
-         a = (int)(short)a;
-         {
-           char map[256], can_be_null;
-           char *p1, *p2;
-
-           p1 = code + a + 3; /* skip the failure_jump */
-           assert(p1[-3] == Cfailure_jump);
-           p2 = code;
-           /* p1 points inside loop, p2 points to after loop */
-           if (!re_do_compile_fastmap(bufp->buffer, bufp->used,
-                                      p2 - bufp->buffer, &can_be_null, map))
-             goto make_normal_jump;
-           /* If we might introduce a new update point inside the loop,
-              we can't optimize because then update_jump would update a
-              wrong failure point.  Thus we have to be quite careful here. */
-         loop_p1:
-           /* loop until we find something that consumes a character */
-           switch (*p1++)
+          old_regs->start[0] = pos;
+          old_regs->end[0] = match_end;
+          if (!bufp->uses_registers)
+          {
+             for (a = 1; a < RE_NREGS; a++)
               {
-              case Cbol:
-              case Ceol:
-              case Cbegbuf:
-              case Cendbuf:
-              case Cwordbeg:
-              case Cwordend:
-              case Cwordbound:
-              case Cnotwordbound:
-#ifdef emacs
-              case Cemacs_at_dot:
-#endif /* emacs */
-                goto loop_p1;
-              case Cstart_memory:
-              case Cend_memory:
-                p1++;
-                goto loop_p1;
-             case Cexact:
-               ch = (unsigned char)*p1++;
-               if (map[ch])
-                 goto make_normal_jump;
-               break;
-             case Canychar:
-               for (b = 0; b < 256; b++)
-                 if (b != '\n' && map[b])
-                   goto make_normal_jump;
-               break;
-             case Cset:
-               for (b = 0; b < 256; b++)
-                 if ((p1[b >> 3] & (1 << (b & 7))) && map[b])
-                   goto make_normal_jump;
-               p1 += 256/8;
-               break;
-             default:
-               goto make_normal_jump;
+                old_regs->start[a] = -1;
+                old_regs->end[a] = -1;
               }
-           /* now we know that we can't backtrack. */
-           while (p1 != p2 - 3)
+          }
+          else
+          {
+             for (a = 1; a < RE_NREGS; a++)
               {
-               switch (*p1++)
-                 {
-                 case Cend:
-                   abort();  /* we certainly shouldn't get this inside loop */
-                   /*NOTREACHED*/
-                 case Cbol:
-                 case Ceol:
-                 case Canychar:
-                 case Cbegbuf:
-                 case Cendbuf:
-                 case Cwordbeg:
-                 case Cwordend:
-                 case Cwordbound:
-                 case Cnotwordbound:
-#ifdef emacs
-                 case Cemacs_at_dot:
-#endif /* emacs */
-                   break;
-                 case Cset:
-                   p1 += 256/8;
-                   break;
-                 case Cexact:
-                 case Cstart_memory:
-                 case Cend_memory:
-                 case Cmatch_memory:
-                 case Csyntaxspec:
-                 case Cnotsyntaxspec:
-                   p1++;
-                   break;
-                 case Cjump:
-                 case Cstar_jump:
-                 case Cfailure_jump:
-                 case Cupdate_failure_jump:
-                 case Cdummy_failure_jump:
-                   goto make_normal_jump;
-                 default:
-                   printf("regexpr.c: processing star_jump: unknown op %d\n", p1[-1]);
-                   break;
-                 }
+                if ((GET_REG_START(state, a) == NULL) ||
+                    (GET_REG_END(state, a) == NULL))
+                {
+                   old_regs->start[a] = -1;
+                   old_regs->end[a] = -1;
+                   continue;
+                }
+                old_regs->start[a] = GET_REG_START(state, a) - textstart;
+                old_regs->end[a] = GET_REG_END(state, a) - textstart;
               }
-           goto make_update_jump;
-         }
-       make_normal_jump:
-         /* printf("changing to normal jump\n"); */
-         code -= 3;
-         *code = Cjump;
-         break;
-       make_update_jump:
-         /* printf("changing to update jump\n"); */
-         code -= 2;
-         a += 3;  /* jump to after the Cfailure_jump */
-         code[-1] = Cupdate_failure_jump;
-         code[0] = a & 0xff;
-         code[1] = a >> 8;
-         /* fall to next case */
-       case Cupdate_failure_jump:
-         failure_sp[-1].text = text;
-         failure_sp[-1].partend = partend;
-         /* fall to next case */
-       case Cjump:
-         a = (unsigned char)*code++;
-         a |= (unsigned char)*code++ << 8;
-         code += (int)(short)a;
-         break;
-       case Cdummy_failure_jump:
-       case Cfailure_jump:
-         if (failure_sp == failure_stack_end)
-           {
-             if (failure_stack_start != initial_failure_stack)
-               goto error;
-             failure_stack_start = (struct failure_point *)
-               malloc(MAX_FAILURES * sizeof(*failure_stack_start));
-             if (failure_stack_start == NULL)
-               {
-                 failure_stack_start = initial_failure_stack;
-                 goto error;
-               }
-             failure_stack_end = failure_stack_start + MAX_FAILURES;
-             memcpy((char *)failure_stack_start, (char *)initial_failure_stack,
-                    INITIAL_FAILURES * sizeof(*failure_stack_start));
-             failure_sp = failure_stack_start + INITIAL_FAILURES;
-           }
-         a = (unsigned char)*code++;
-         a |= (unsigned char)*code++ << 8;
-         a = (int)(short)a;
-         if (code[-3] == Cdummy_failure_jump)
-           { /* this is only used in plus */
-             assert(*code == Cfailure_jump);
-             b = (unsigned char)code[1];
-             b |= (unsigned char)code[2] << 8;
-             failure_sp->code = code + (int)(short)b + 3;
-             failure_sp->text = NULL;
-             code += a;
-           }
-         else
-           {
-             failure_sp->code = code + a;
-             failure_sp->text = text;
-             failure_sp->partend = partend;
-           }
-         failure_sp++;
-         break;
-       case Cbegbuf:
-         if (text == string1)
-           break;
-         goto fail;
-       case Cendbuf:
-         if (size2 == 0 ? text == string1 + size1 : text == string2 + size2)
-           break;
-         goto fail;
-       case Cwordbeg:
-         if (text == string2 + size2)
-           goto fail;
-         if (size2 == 0 && text == string1 + size1)
-           goto fail;
-         if (SYNTAX(text == string1 + size1 ? *string1 : *text) != Sword)
-           goto fail;
-         if (text == string1)
-           break;
-         if (SYNTAX(text[-1]) != Sword)
-           break;
-         goto fail;
-       case Cwordend:
-         if (text == string1)
-           goto fail;
-         if (SYNTAX(text[-1]) != Sword)
-           goto fail;
-         if (text == string2 + size2)
-           break;
-         if (size2 == 0 && text == string1 + size1)
-           break;
-         if (SYNTAX(*text) == Sword)
-           goto fail;
-         break;
-       case Cwordbound:
-         /* Note: as in gnu regexp, this also matches at the beginning
-            and end of buffer. */
-         if (text == string1 || text == string2 + size2 ||
-             (size2 == 0 && text == string1 + size1))
-           break;
-         if ((SYNTAX(text[-1]) == Sword) ^
-             (SYNTAX(text == string1 + size1 ? *string2 : *text) == Sword))
-           break;
-         goto fail;
-       case Cnotwordbound:
-         /* Note: as in gnu regexp, this never matches at the beginning
-            and end of buffer. */
-         if (text == string1 || text == string2 + size2 ||
-             (size2 == 0 && text == string1 + size1))
-           goto fail;
-         if (!((SYNTAX(text[-1]) == Sword) ^
-               (SYNTAX(text == string1 + size1 ? *string2 : *text) == Sword)))
-           goto fail;
-         break;
-       case Csyntaxspec:
-         NEXTCHAR(ch);
-         if (SYNTAX(ch) != (unsigned char)*code++)
-           goto fail;
-         break;
-       case Cnotsyntaxspec:
-         NEXTCHAR(ch);
-         if (SYNTAX(ch) != (unsigned char)*code++)
-           break;
-         goto fail;
-#ifdef emacs
-       case Cemacs_at_dot:
-         if (PTR_CHAR_POS((unsigned char *)text) + 1 != point)
-           goto fail;
-         break;
-#endif /* emacs */
-       default:
-         abort();
-         /*NOTREACHED*/
+          }
         }
-    }
+/*     if(translated) */
+/*        free(translated); */
+       FREE_STATE(state);
+       return match_end - pos;
+     }
+     case Cbol:
+     {
+       if (text == textstart || text[-1] == '\n')
+          goto continue_matching;
+       goto fail;
+     }
+     case Ceol:
+     {
+       if (text == textend || *text == '\n')
+          goto continue_matching;
+       goto fail;
+     }
+     case Cset:
+     {
+       NEXTCHAR(ch);
+       if (code[ch/8] & (1<<(ch & 7)))
+       {
+          code += 256/8;
+          goto continue_matching;
+       }
+       goto fail;
+     }
+     case Cexact:
+     {
+       NEXTCHAR(ch);
+       if (ch != (unsigned char)*code++)
+          goto fail;
+/*     { */
+/*        char *p1 = code - 2; */
+/*        ch = *(code - 1); */
+/*        POP_FAILURE(state, code, text, goto done_matching, goto error); */
+/*        while ((code == p1) && (*text != ch)) */
+/*           POP_FAILURE(state, code, text, goto done_matching, goto error); */
+/*        if ((code == p1) && (*text == ch)) */
+/*        { */
+/*           code += 2; */
+/*           text++; */
+/*        } */
+/*     } */
+       goto continue_matching;
+     }
+     case Canychar:
+     {
+       NEXTCHAR(ch);
+       if (ch == '\n')
+          goto fail;
+       goto continue_matching;
+     }
+     case Cstart_memory:
+     {
+       reg = *code++;
+       SET_REG_START(state, reg, text, goto error);
+       goto continue_matching;
+     }
+     case Cend_memory:
+     {
+       reg = *code++;
+       SET_REG_END(state, reg, text, goto error);
+       goto continue_matching;
+     }
+     case Cmatch_memory:
+     {
+       reg = *code++;
+       regstart = GET_REG_START(state, reg);
+       regend = GET_REG_END(state, reg);
+       if ((regstart == NULL) || (regend == NULL))
+          goto fail;  /* or should we just match nothing? */
+       regsize = regend - regstart;
+
+       if (regsize > (textend - text))
+          goto fail;
+       if(translate)
+       {
+          for (; regstart < regend; regstart++, text++)
+             if (translate[*regstart] != translate[*text])
+                goto fail;
+       }
+       else
+          for (; regstart < regend; regstart++, text++)
+             if (*regstart != *text)
+                goto fail;
+/*     if (memcmp(text, regstart, regsize) != 0)
+          goto fail;
+       text += regsize; */
+       goto continue_matching;
+     }
+     case Cupdate_failure_jump:
+     {
+       UPDATE_FAILURE(state, text, goto error);
+       /* fall to next case */
+     }
+     /* treat Cstar_jump just like Cjump if it hasn't been optimized */
+     case Cstar_jump:
+     case Cjump:
+     {
+       a = (unsigned char)*code++;
+       a |= (unsigned char)*code++ << 8;
+       code += (int)(short)a;
+       goto continue_matching;
+     }
+     case Cdummy_failure_jump:
+     {
+       a = (unsigned char)*code++;
+       a |= (unsigned char)*code++ << 8;
+       a = (int)(short)a;
+       assert(*code == Cfailure_jump);
+       b = (unsigned char)code[1];
+       b |= (unsigned char)code[2] << 8;
+       PUSH_FAILURE(state, code + (int)(short)b + 3, NULL, goto error);
+       code += a;
+       goto continue_matching;
+     }
+     case Cfailure_jump:
+     {
+       a = (unsigned char)*code++;
+       a |= (unsigned char)*code++ << 8;
+       a = (int)(short)a;
+       PUSH_FAILURE(state, code + a, text, goto error);
+       goto continue_matching;
+     }
+     case Cbegbuf:
+     {
+       if (text == textstart)
+          goto continue_matching;
+       goto fail;
+     }
+     case Cendbuf:
+     {
+       if (text == textend)
+          goto continue_matching;
+       goto fail;
+     }
+     case Cwordbeg:
+     {
+       if (text == textend)
+          goto fail;
+       if (SYNTAX(*text) != Sword)
+          goto fail;
+       if (text == textstart)
+          goto continue_matching;
+       if (SYNTAX(text[-1]) != Sword)
+          goto continue_matching;
+       goto fail;
+     }
+     case Cwordend:
+     {
+       if (text == textstart)
+          goto fail;
+       if (SYNTAX(text[-1]) != Sword)
+          goto fail;
+       if (text == textend)
+          goto continue_matching;
+       if (SYNTAX(*text) == Sword)
+          goto fail;
+       goto continue_matching;
+     }
+     case Cwordbound:
+     {
+       /* Note: as in gnu regexp, this also matches at the beginning
+        * and end of buffer.  */
+
+       if (text == textstart || text == textend)
+          goto continue_matching;
+       if ((SYNTAX(text[-1]) == Sword) ^ (SYNTAX(*text) == Sword))
+          goto continue_matching;
+       goto fail;
+     }
+     case Cnotwordbound:
+     {
+       /* Note: as in gnu regexp, this never matches at the beginning
+        * and end of buffer.  */
+       if (text == textstart || text == textend)
+          goto fail;
+       if (!((SYNTAX(text[-1]) == Sword) ^ (SYNTAX(*text) == Sword)))
+          goto fail;
+       goto continue_matching;
+     }
+     case Csyntaxspec:
+     {
+       NEXTCHAR(ch);
+       if (SYNTAX(ch) != (unsigned char)*code++)
+          goto fail;
+          goto continue_matching;
+     }
+     case Cnotsyntaxspec:
+     {
+       NEXTCHAR(ch);
+       if (SYNTAX(ch) != (unsigned char)*code++)
+          break;
+       goto continue_matching;
+     }
+     default:
+     {
+       abort();
+       /*NOTREACHED*/
+     }
+  }
+
  #if 0 /* This line is never reached --Guido */
    abort();
  #endif
-  /*NOTREACHED*/
-
- fail:
-  if (failure_sp != failure_stack_start)
-    {
-      failure_sp--;
-      text = failure_sp->text;
-      if (text == NULL)
-       goto fail;
-      partend = failure_sp->partend;
-      code = failure_sp->code;
-      goto continue_matching;
-    }
-  if (failure_stack_start != initial_failure_stack)
-    free((char *)failure_stack_start);
+  /*
+   *NOTREACHED
+   */
+  
+  fail:
+  POP_FAILURE(state, code, text, goto done_matching, goto error);
+  goto continue_matching;
+  
+  done_matching:
+/*   if(translated != NULL) */
+/*      free(translated); */
+  FREE_STATE(state);
    return -1;
  
- error:
-  if (failure_stack_start != initial_failure_stack)
-    free((char *)failure_stack_start);
+  error:
+/*   if (translated != NULL) */
+/*      free(translated); */
+  FREE_STATE(state);
    return -2;
  }
  
  #undef PREFETCH
  #undef NEXTCHAR
-#undef PUSH_FAILURE
  
-int re_match(bufp, string, size, pos, regs)
-regexp_t bufp;
-char *string;
-int size, pos;
-regexp_registers_t regs;
+int re_search(regexp_t bufp,
+             char *string,
+             int size,
+             int pos,
+             int range,
+             regexp_registers_t regs)
  {
-  return re_match_2(bufp, string, size, (char *)NULL, 0, pos, regs, size);
-}
-
-int re_search_2(bufp, string1, size1, string2, size2, pos, range, regs,
-               mstop)
-regexp_t bufp;
-char *string1, *string2;
-int size1, size2, pos, range, mstop;
-regexp_registers_t regs;
-{
-  char *fastmap, *translate, *text, *partstart, *partend;
-  int dir, ret;
+  char *fastmap;
+  char *translate;
+  char *text;
+  char *partstart;
+  char *partend;
+  int dir;
+  int ret;
    char anchor;
    
-  assert(size1 >= 0 && size2 >= 0 && pos >= 0 && mstop >= 0);
-  assert(pos + range >= 0 && pos + range <= size1 + size2); /* Bugfix by ylo */
-  assert(pos <= mstop);
+  assert(size >= 0 && pos >= 0);
+  assert(pos + range >= 0 && pos + range <= size); /* Bugfix by ylo */
    
    fastmap = bufp->fastmap;
    translate = bufp->translate;
    if (fastmap && !bufp->fastmap_accurate)
-    re_compile_fastmap(bufp);
+     re_compile_fastmap(bufp);
    anchor = bufp->anchor;
    if (bufp->can_be_null == 1) /* can_be_null == 2: can match null at eob */
-    fastmap = NULL;
+     fastmap = NULL;
+
    if (range < 0)
-    {
-      dir = -1;
-      range = -range;
-    }
+  {
+     dir = -1;
+     range = -range;
+  }
    else
-    dir = 1;
+     dir = 1;
+
    if (anchor == 2)
-    if (pos != 0)
-      return -1;
-    else
-      range = 0;
+     if (pos != 0)
+       return -1;
+     else
+       range = 0;
+
    for (; range >= 0; range--, pos += dir)
-    {
-      if (fastmap)
-       {
-         if (dir == 1)
-           { /* searching forwards */
-             if (pos < size1)
-               {
-                 text = string1 + pos;
-                 if (pos + range > size1)
-                   partend = string1 + size1;
-                 else
-                   partend = string1 + pos + range;
-               }
-             else
-               {
-                 text = string2 + pos - size1;
-                 partend = string2 + pos + range - size1;
-               }
-             partstart = text;
-             if (translate)
-               while (text != partend &&
-                      !fastmap[(unsigned char)
-                               translate[(unsigned char)*text]])
-                 text++;
-             else
-               while (text != partend && !fastmap[(unsigned char)*text])
-                 text++;
-             pos += text - partstart;
-             range -= text - partstart;
-             if (pos == size1 + size2 && bufp->can_be_null == 0)
-               return -1;
-           }
-         else
-           { /* searching backwards */
-             if (pos <= size1)
-               {
-                 text = string1 + pos;
-                 partstart = string1 + pos - range;
-               }
-             else
-               {
-                 text = string2 + pos - size1;
-                 if (range < pos - size1)
-                   partstart = string2 + pos - size1 - range;
-                 else
-                   partstart = string2;
-               }
-             partend = text;
-             if (translate)
-               while (text != partstart &&
-                      !fastmap[(unsigned char)
-                               translate[(unsigned char)*text]])
-                 text--;
-             else
-               while (text != partstart &&
-                      !fastmap[(unsigned char)*text])
-                 text--;
-             pos -= partend - text;
-             range -= partend - text;
-           }
+  {
+     if (fastmap)
+     {
+       if (dir == 1)
+       { /* searching forwards */
+
+          text = string + pos;
+          partend = string + size;
+          partstart = text;
+          if (translate)
+             while (text != partend &&
+                    !fastmap[(unsigned char) translate[(unsigned char)*text]])
+                text++;
+          else
+             while (text != partend && !fastmap[(unsigned char)*text])
+                text++;
+          pos += text - partstart;
+          range -= text - partstart;
+          if (pos == size && bufp->can_be_null == 0)
+             return -1;
         }
-      if (anchor == 1)
-       { /* anchored to begline */
-         if (pos > 0 &&
-             (pos <= size1 ? string1[pos - 1] :
-              string2[pos - size1 - 1]) != '\n')
-           continue;
+       else
+       { /* searching backwards */
+          text = string + pos;
+          partstart = string + pos - range;
+          partend = text;
+          if (translate)
+             while (text != partstart &&
+                    !fastmap[(unsigned char)
+                            translate[(unsigned char)*text]])
+                text--;
+          else
+             while (text != partstart &&
+                    !fastmap[(unsigned char)*text])
+                text--;
+          pos -= partend - text;
+          range -= partend - text;
         }
-      assert(pos >= 0 && pos <= size1 + size2);
-      ret = re_match_2(bufp, string1, size1, string2, size2, pos, regs, mstop);
-      if (ret >= 0)
+     }
+     if (anchor == 1)
+     { /* anchored to begline */
+       if (pos > 0 && string[pos - 1])
+          continue;
+     }
+     assert(pos >= 0 && pos <= size);
+     ret = re_match(bufp, string, size, pos, regs);
+     if (ret >= 0)
         return pos;
-      if (ret == -2)
+     if (ret == -2)
         return -2;
-    }
+  }
    return -1;
  }
-
-int re_search(bufp, string, size, startpos, range, regs)
-regexp_t bufp;
-char *string;
-int size, startpos, range;
-regexp_registers_t regs;
-{
-  return re_search_2(bufp, string, size, (char *)NULL, 0,
-                    startpos, range, regs, size);
-}
-
-#ifdef UNUSED
-
-static struct re_pattern_buffer re_comp_buf;
-
-char *re_comp(s)
-char *s;
-{
-  if (s == NULL)
-    {
-      if (!re_comp_buf.buffer)
-       return "Out of memory";
-      return NULL;
-    }
-  if (!re_comp_buf.buffer)
-    {
-      /* the buffer will be allocated automatically */
-      re_comp_buf.fastmap = malloc(256);
-      re_comp_buf.translate = NULL;
-      if (re_comp_buf.fastmap == NULL)
-       return "Out of memory";
-    }
-  return re_compile_pattern(s, strlen(s), &re_comp_buf);
-}
-
-int re_exec(s)
-char *s;
-{
-  int len = strlen(s);
-  
-  return re_search(&re_comp_buf, s, len, 0, len, (regexp_registers_t)NULL) >= 0;
-}
-
-#endif
-
-#ifdef TEST_REGEXP
-
-int main()
-{
-  char buf[500];
-  char *cp;
-  struct re_pattern_buffer exp;
-  struct re_registers regs;
-  int a,pos;
-  char fastmap[256];
-
-  exp.allocated = 0;
-  exp.buffer = 0;
-  exp.translate = NULL;
-  exp.fastmap = fastmap;
-
-  /* re_set_syntax(RE_NO_BK_PARENS|RE_NO_BK_VBAR|RE_ANSI_HEX); */
-
-  while (1)
-    {
-      printf("Enter regexp:\n");
-      gets(buf);
-      cp=re_compile_pattern(buf, strlen(buf), &exp);
-      if (cp)
-       {
-         printf("Error: %s\n", cp);
-         continue;
-       }
-      re_compile_fastmap(&exp);
-      printf("dump:\n");
-      for (pos = 0; pos < exp.used;)
-       {
-         printf("%d: ", pos);
-         switch (exp.buffer[pos++])
-           {
-           case Cend:
-             strcpy(buf, "end");
-             break;
-           case Cbol:
-             strcpy(buf, "bol");
-             break;
-           case Ceol:
-             strcpy(buf, "eol");
-             break;
-           case Cset:
-             strcpy(buf, "set ");
-             for (a = 0; a < 256/8; a++)
-               sprintf(buf+strlen(buf)," %02x",
-                       (unsigned char)exp.buffer[pos++]);
-             break;
-           case Cexact:
-             sprintf(buf, "exact '%c' 0x%x", exp.buffer[pos],
-                     (unsigned char)exp.buffer[pos]);
-             pos++;
-             break;
-           case Canychar:
-             strcpy(buf, "anychar");
-             break;
-           case Cstart_memory:
-             sprintf(buf, "start_memory %d", exp.buffer[pos++]);
-             break;
-           case Cend_memory:
-             sprintf(buf, "end_memory %d", exp.buffer[pos++]);
-             break;
-           case Cmatch_memory:
-             sprintf(buf, "match_memory %d", exp.buffer[pos++]);
-             break;
-           case Cjump:
-           case Cdummy_failure_jump:
-           case Cstar_jump:
-           case Cfailure_jump:
-           case Cupdate_failure_jump:
-             a = (unsigned char)exp.buffer[pos++];
-             a += (unsigned char)exp.buffer[pos++] << 8;
-             a = (int)(short)a;
-             switch (exp.buffer[pos-3])
-               {
-               case Cjump:
-                 cp = "jump";
-                 break;
-               case Cstar_jump:
-                 cp = "star_jump";
-                 break;
-               case Cfailure_jump:
-                 cp = "failure_jump";
-                 break;
-               case Cupdate_failure_jump:
-                 cp = "update_failure_jump";
-                 break;
-               case Cdummy_failure_jump:
-                 cp = "dummy_failure_jump";
-                 break;
-               default:
-                 cp = "unknown jump";
-                 break;
-               }
-             sprintf(buf, "%s %d", cp, a + pos);
-             break;
-           case Cbegbuf:
-             strcpy(buf,"begbuf");
-             break;
-           case Cendbuf:
-             strcpy(buf,"endbuf");
-             break;
-           case Cwordbeg:
-             strcpy(buf,"wordbeg");
-             break;
-           case Cwordend:
-             strcpy(buf,"wordend");
-             break;
-           case Cwordbound:
-             strcpy(buf,"wordbound");
-             break;
-           case Cnotwordbound:
-             strcpy(buf,"notwordbound");
-             break;
-           default:
-             sprintf(buf, "unknown code %d",
-                     (unsigned char)exp.buffer[pos - 1]);
-             break;
-           }
-         printf("%s\n", buf);
-       }
-      printf("can_be_null = %d uses_registers = %d anchor = %d\n",
-            exp.can_be_null, exp.uses_registers, exp.anchor);
-      
-      printf("fastmap:");
-      for (a = 0; a < 256; a++)
-       if (exp.fastmap[a])
-         printf(" %d", a);
-      printf("\n");
-      printf("Enter strings.  An empty line terminates.\n");
-      while (fgets(buf, sizeof(buf), stdin))
-       {
-         if (buf[0] == '\n')
-           break;
-         a = re_search(&exp, buf, strlen(buf), 0, strlen(buf), &regs);
-         printf("search returns %d\n", a);
-         if (a != -1)
-           {
-             for (a = 0; a < RE_NREGS; a++)
-               {
-                 printf("buf %d: %d to %d\n", a, regs.start[a], regs.end[a]);
-               }
-           }
-       }
-    }
-}
-
-#endif /* TEST_REGEXP */
diff --git a/Modules/regexpr.h b/Modules/regexpr.h

index 0ddb3a184de1f249df4b26e4d09576ccd2b72909..e623362e1e95864d0a51d250b650dcd7f5ecf82a 100644 (file)
--- a/Modules/regexpr.h
+++ b/Modules/regexpr.h
@@ -69,9 +69,7 @@ typedef struct re_registers
  #define re_set_syntax _Py_re_set_syntax
  #define re_compile_pattern _Py_re_compile_pattern
  #define re_match _Py_re_match
-#define re_match_2 _Py_re_match_2
  #define re_search _Py_re_search
-#define re_search_2 _Py_re_search_2
  #define re_compile_fastmap _Py_re_compile_fastmap
  #define re_comp _Py_re_comp
  #define re_exec _Py_re_exec
@@ -96,20 +94,12 @@ char *re_compile_pattern(char *regex, int regex_size, regexp_t compiled);
     translation table, or NULL if it is not used. */
  
  int re_match(regexp_t compiled, char *string, int size, int pos,
-            regexp_registers_t regs);
+            regexp_registers_t old_regs);
  /* This tries to match the regexp against the string.  This returns the
     length of the matched portion, or -1 if the pattern could not be
     matched and -2 if an error (such as failure stack overflow) is
     encountered. */
  
-int re_match_2(regexp_t compiled, char *string1, int size1,
-             char *string2, int size2, int pos, regexp_registers_t regs,
-              int mstop);
-/* This tries to match the regexp to the concatenation of string1 and
-   string2.  This returns the length of the matched portion, or -1 if the
-   pattern could not be matched and -2 if an error (such as failure stack
-   overflow) is encountered. */
-
  int re_search(regexp_t compiled, char *string, int size, int startpos,
               int range, regexp_registers_t regs);
  /* This rearches for a substring matching the regexp.  This returns the first
@@ -119,12 +109,6 @@ int re_search(regexp_t compiled, char *string, int size, int startpos,
     which a match must not go.  This returns -1 if no match is found, and
     -2 if an error (such as failure stack overflow) is encountered. */
  
-int re_search_2(regexp_t compiled, char *string1, int size1,
-               char *string2, int size2, int startpos, int range,
-               regexp_registers_t regs, int mstop);
-/* This is like re_search, but search from the concatenation of string1 and
-   string2.  */
-
  void re_compile_fastmap(regexp_t compiled);
  /* This computes the fastmap for the regexp.  For this to have any effect,
     the calling program must have initialized the fastmap field to point
@@ -146,9 +130,7 @@ extern int re_syntax;
  int re_set_syntax();
  char *re_compile_pattern();
  int re_match();
-int re_match_2();
  int re_search();
-int re_search_2();
  void re_compile_fastmap();
  char *re_comp();
  int re_exec();
author	Guido van Rossum <guido@python.org>
	Fri, 9 May 1997 02:35:58 +0000 (02:35 +0000)
committer	Guido van Rossum <guido@python.org>
	Fri, 9 May 1997 02:35:58 +0000 (02:35 +0000)
Modules/regexpr.c		patch \| blob \| blame \| history
Modules/regexpr.h		patch \| blob \| blame \| history