Bring SRE up do date with Python 2.1

author Guido van Rossum <guido@python.org>

Wed, 13 Jun 2001 15:15:02 +0000 (15:15 +0000)

committer Guido van Rossum <guido@python.org>

Wed, 13 Jun 2001 15:15:02 +0000 (15:15 +0000)
author Guido van Rossum <guido@python.org>
Wed, 13 Jun 2001 15:15:02 +0000 (15:15 +0000)
committer Guido van Rossum <guido@python.org>
Wed, 13 Jun 2001 15:15:02 +0000 (15:15 +0000)
diff --git a/Lib/sre.py b/Lib/sre.py

index 6dea5c40456f23a7dcd0e04eadfda16cad631630..6706fac8692e09cf723f76b744428d8046440665 100644 (file)
--- a/Lib/sre.py
+++ b/Lib/sre.py
@@ -3,7 +3,7 @@
  #
  # re-compatible interface for the sre matching engine
  #
-# Copyright (c) 1998-2000 by Secret Labs AB.  All rights reserved.
+# Copyright (c) 1998-2001 by Secret Labs AB.  All rights reserved.
  #
  # This version of the SRE library can be redistributed under CNRI's
  # Python 1.6 license.  For any other use, please contact Secret Labs
@@ -14,23 +14,31 @@
  # other compatibility work.
  #
  
-# FIXME: change all FIXME's to XXX ;-)
-
  import sre_compile
  import sre_parse
  
+# public symbols
+__all__ = [ "match", "search", "sub", "subn", "split", "findall",
+    "compile", "purge", "template", "escape", "I", "L", "M", "S", "X",
+    "U", "IGNORECASE", "LOCALE", "MULTILINE", "DOTALL", "VERBOSE",
+    "UNICODE", "error" ]
+
+__version__ = "2.1b2"
+
+# this module works under 1.5.2 and later.  don't use string methods
  import string
  
  # flags
-I = IGNORECASE = sre_compile.SRE_FLAG_IGNORECASE
-L = LOCALE = sre_compile.SRE_FLAG_LOCALE
-M = MULTILINE = sre_compile.SRE_FLAG_MULTILINE
-S = DOTALL = sre_compile.SRE_FLAG_DOTALL
-X = VERBOSE = sre_compile.SRE_FLAG_VERBOSE
+I = IGNORECASE = sre_compile.SRE_FLAG_IGNORECASE # ignore case
+L = LOCALE = sre_compile.SRE_FLAG_LOCALE # assume current 8-bit locale
+U = UNICODE = sre_compile.SRE_FLAG_UNICODE # assume unicode locale
+M = MULTILINE = sre_compile.SRE_FLAG_MULTILINE # make anchors look for newline
+S = DOTALL = sre_compile.SRE_FLAG_DOTALL # make dot match newline
+X = VERBOSE = sre_compile.SRE_FLAG_VERBOSE # ignore whitespace and comments
  
-# sre extensions (may or may not be in 1.6/2.0 final)
-T = TEMPLATE = sre_compile.SRE_FLAG_TEMPLATE
-U = UNICODE = sre_compile.SRE_FLAG_UNICODE
+# sre extensions (experimental, don't rely on these)
+T = TEMPLATE = sre_compile.SRE_FLAG_TEMPLATE # disable backtracking
+DEBUG = sre_compile.SRE_FLAG_DEBUG # dump pattern after compilation
  
  # sre exception
  error = sre_compile.error
@@ -38,36 +46,60 @@ error = sre_compile.error
  # --------------------------------------------------------------------
  # public interface
  
-# FIXME: add docstrings
-
  def match(pattern, string, flags=0):
+    """Try to apply the pattern at the start of the string, returning
+    a match object, or None if no match was found."""
      return _compile(pattern, flags).match(string)
  
  def search(pattern, string, flags=0):
+    """Scan through string looking for a match to the pattern, returning
+    a match object, or None if no match was found."""
      return _compile(pattern, flags).search(string)
  
  def sub(pattern, repl, string, count=0):
+    """Return the string obtained by replacing the leftmost
+    non-overlapping occurrences of the pattern in string by the
+    replacement repl"""
      return _compile(pattern, 0).sub(repl, string, count)
  
  def subn(pattern, repl, string, count=0):
+    """Return a 2-tuple containing (new_string, number).
+    new_string is the string obtained by replacing the leftmost
+    non-overlapping occurrences of the pattern in the source
+    string by the replacement repl.  number is the number of
+    substitutions that were made."""
      return _compile(pattern, 0).subn(repl, string, count)
  
  def split(pattern, string, maxsplit=0):
+    """Split the source string by the occurrences of the pattern,
+    returning a list containing the resulting substrings."""
      return _compile(pattern, 0).split(string, maxsplit)
  
  def findall(pattern, string, maxsplit=0):
+    """Return a list of all non-overlapping matches in the string.
+
+    If one or more groups are present in the pattern, return a
+    list of groups; this will be a list of tuples if the pattern
+    has more than one group.
+
+    Empty matches are included in the result."""
      return _compile(pattern, 0).findall(string, maxsplit)
  
  def compile(pattern, flags=0):
+    "Compile a regular expression pattern, returning a pattern object."
      return _compile(pattern, flags)
  
  def purge():
+    "Clear the regular expression cache"
      _cache.clear()
+    _cache_repl.clear()
  
  def template(pattern, flags=0):
+    "Compile a template pattern, returning a pattern object"
      return _compile(pattern, flags|T)
  
  def escape(pattern):
+    "Escape all non-alphanumeric characters in pattern."
      s = list(pattern)
      for i in range(len(pattern)):
          c = pattern[i]
@@ -82,6 +114,8 @@ def escape(pattern):
  # internals
  
  _cache = {}
+_cache_repl = {}
+
  _MAXCACHE = 100
  
  def _join(seq, sep):
@@ -105,6 +139,21 @@ def _compile(*key):
      _cache[key] = p
      return p
  
+def _compile_repl(*key):
+    # internal: compile replacement pattern
+    p = _cache_repl.get(key)
+    if p is not None:
+        return p
+    repl, pattern = key
+    try:
+        p = sre_parse.parse_template(repl, pattern)
+    except error, v:
+        raise error, v # invalid expression
+    if len(_cache_repl) >= _MAXCACHE:
+        _cache_repl.clear()
+    _cache_repl[key] = p
+    return p
+
  def _expand(pattern, match, template):
      # internal: match.expand implementation hook
      template = sre_parse.parse_template(template, pattern)
@@ -119,7 +168,7 @@ def _subn(pattern, template, string, count=0):
      if callable(template):
          filter = template
      else:
-        template = sre_parse.parse_template(template, pattern)
+        template = _compile_repl(template, pattern)
          def filter(match, template=template):
              return sre_parse.expand_template(template, match)
      n = i = 0
@@ -158,7 +207,7 @@ def _split(pattern, string, maxsplit=0):
              continue
          append(string[i:b])
          if g and b != e:
-            extend(m.groups())
+            extend(list(m.groups()))
          i = e
          n = n + 1
      append(string[i:])
@@ -204,7 +253,7 @@ class Scanner:
                  break
              action = self.lexicon[m.lastindex][1]
              if callable(action):
-                self.match = match
+                self.match = m
                  action = action(self, m.group())
              if action is not None:
                  append(action)
diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py

index dc508e57cdc4d1c1beab725feac59b17cc709cf8..44cb23e6a4a78c26621c7a3df356f36e0729b7bc 100644 (file)
--- a/Lib/sre_compile.py
+++ b/Lib/sre_compile.py
@@ -3,7 +3,7 @@
  #
  # convert template to internal format
  #
-# Copyright (c) 1997-2000 by Secret Labs AB.  All rights reserved.
+# Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
  #
  # See the sre.py file for information on usage and redistribution.
  #
@@ -12,6 +12,8 @@ import _sre
  
  from sre_constants import *
  
+assert _sre.MAGIC == MAGIC, "SRE module mismatch"
+
  MAXCODE = 65535
  
  def _compile(code, pattern, flags):
@@ -21,9 +23,10 @@ def _compile(code, pattern, flags):
          if op in (LITERAL, NOT_LITERAL):
              if flags & SRE_FLAG_IGNORECASE:
                  emit(OPCODES[OP_IGNORE[op]])
+                emit(_sre.getlower(av, flags))
              else:
                  emit(OPCODES[op])
-            emit(av)
+                emit(av)
          elif op is IN:
              if flags & SRE_FLAG_IGNORECASE:
                  emit(OPCODES[OP_IGNORE[op]])
@@ -102,9 +105,12 @@ def _compile(code, pattern, flags):
          elif op is AT:
              emit(OPCODES[op])
              if flags & SRE_FLAG_MULTILINE:
-                emit(ATCODES[AT_MULTILINE.get(av, av)])
-            else:
-                emit(ATCODES[av])
+                av = AT_MULTILINE.get(av, av)
+            if flags & SRE_FLAG_LOCALE:
+                av = AT_LOCALE.get(av, av)
+            elif flags & SRE_FLAG_UNICODE:
+                av = AT_UNICODE.get(av, av)
+            emit(ATCODES[av])
          elif op is BRANCH:
              emit(OPCODES[op])
              tail = []
@@ -121,11 +127,10 @@ def _compile(code, pattern, flags):
          elif op is CATEGORY:
              emit(OPCODES[op])
              if flags & SRE_FLAG_LOCALE:
-                emit(CHCODES[CH_LOCALE[av]])
+                av = CH_LOCALE[av]
              elif flags & SRE_FLAG_UNICODE:
-                emit(CHCODES[CH_UNICODE[av]])
-            else:
-                emit(CHCODES[av])
+                av = CH_UNICODE[av]
+            emit(CHCODES[av])
          elif op is GROUPREF:
              if flags & SRE_FLAG_IGNORECASE:
                  emit(OPCODES[OP_IGNORE[op]])
@@ -176,7 +181,7 @@ def _optimize_charset(charset, fixup):
                  for i in range(fixup(av[0]), fixup(av[1])+1):
                      charmap[i] = 1
              elif op is CATEGORY:
-                # FIXME: could append to charmap tail
+                # XXX: could append to charmap tail
                  return charset # cannot compress
      except IndexError:
          # character set contains unicode characters
@@ -364,7 +369,7 @@ def compile(p, flags=0):
  
      # print code
  
-    # FIXME: <fl> get rid of this limitation!
+    # XXX: <fl> get rid of this limitation!
      assert p.pattern.groups <= 100,\
             "sorry, but this version only supports 100 named groups"
  
diff --git a/Lib/sre_constants.py b/Lib/sre_constants.py

index ea649c048293dfc32949387693120911732c7655..bbe7880a1d55f023cad702ca151ec7ba8ae25c36 100644 (file)
--- a/Lib/sre_constants.py
+++ b/Lib/sre_constants.py
@@ -4,13 +4,20 @@
  # various symbols used by the regular expression engine.
  # run this script to update the _sre include files!
  #
-# Copyright (c) 1998-2000 by Secret Labs AB.  All rights reserved.
+# Copyright (c) 1998-2001 by Secret Labs AB.  All rights reserved.
  #
  # See the sre.py file for information on usage and redistribution.
  #
  
+# update when constants are added or removed
+
+MAGIC = 20010320
+
+# max code word in this release
+
  MAXREPEAT = 65535
  
+# SRE standard exception (access as sre.error)
  # should this really be here?
  
  class error(Exception):
@@ -54,10 +61,16 @@ SUBPATTERN = "subpattern"
  # positions
  AT_BEGINNING = "at_beginning"
  AT_BEGINNING_LINE = "at_beginning_line"
+AT_BEGINNING_STRING = "at_beginning_string"
  AT_BOUNDARY = "at_boundary"
  AT_NON_BOUNDARY = "at_non_boundary"
  AT_END = "at_end"
  AT_END_LINE = "at_end_line"
+AT_END_STRING = "at_end_string"
+AT_LOC_BOUNDARY = "at_loc_boundary"
+AT_LOC_NON_BOUNDARY = "at_loc_non_boundary"
+AT_UNI_BOUNDARY = "at_uni_boundary"
+AT_UNI_NON_BOUNDARY = "at_uni_non_boundary"
  
  # categories
  CATEGORY_DIGIT = "category_digit"
@@ -109,8 +122,10 @@ OPCODES = [
  ]
  
  ATCODES = [
-    AT_BEGINNING, AT_BEGINNING_LINE, AT_BOUNDARY,
-    AT_NON_BOUNDARY, AT_END, AT_END_LINE
+    AT_BEGINNING, AT_BEGINNING_LINE, AT_BEGINNING_STRING, AT_BOUNDARY,
+    AT_NON_BOUNDARY, AT_END, AT_END_LINE, AT_END_STRING,
+    AT_LOC_BOUNDARY, AT_LOC_NON_BOUNDARY, AT_UNI_BOUNDARY,
+    AT_UNI_NON_BOUNDARY
  ]
  
  CHCODES = [
@@ -148,6 +163,16 @@ AT_MULTILINE = {
      AT_END: AT_END_LINE
  }
  
+AT_LOCALE = {
+    AT_BOUNDARY: AT_LOC_BOUNDARY,
+    AT_NON_BOUNDARY: AT_LOC_NON_BOUNDARY
+}
+
+AT_UNICODE = {
+    AT_BOUNDARY: AT_UNI_BOUNDARY,
+    AT_NON_BOUNDARY: AT_UNI_NON_BOUNDARY
+}
+
  CH_LOCALE = {
      CATEGORY_DIGIT: CATEGORY_DIGIT,
      CATEGORY_NOT_DIGIT: CATEGORY_NOT_DIGIT,
@@ -178,6 +203,7 @@ SRE_FLAG_MULTILINE = 8 # treat target as multiline string
  SRE_FLAG_DOTALL = 16 # treat target as a single string
  SRE_FLAG_UNICODE = 32 # use unicode locale
  SRE_FLAG_VERBOSE = 64 # ignore whitespace and comments
+SRE_FLAG_DEBUG = 128 # debugging
  
  # flags for INFO primitive
  SRE_INFO_PREFIX = 1 # has prefix
@@ -201,13 +227,15 @@ if __name__ == "__main__":
   * NOTE: This file is generated by sre_constants.py.  If you need
   * to change anything in here, edit sre_constants.py and run it.
   *
- * Copyright (c) 1997-2000 by Secret Labs AB.  All rights reserved.
+ * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
   *
   * See the _sre.c file for information on usage and redistribution.
   */
  
  """)
  
+    f.write("#define SRE_MAGIC %d\n" % MAGIC)
+
      dump(f, OPCODES, "SRE_OP")
      dump(f, ATCODES, "SRE")
      dump(f, CHCODES, "SRE")
diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py

index 7c36d4f2dcb2bb673d34d333c1b03407c8954efb..44626bd5e82fbd30f07a2a7e4db99a6cec0b8c60 100644 (file)
--- a/Lib/sre_parse.py
+++ b/Lib/sre_parse.py
@@ -3,11 +3,14 @@
  #
  # convert re-style regular expression to sre pattern
  #
-# Copyright (c) 1998-2000 by Secret Labs AB.  All rights reserved.
+# Copyright (c) 1998-2001 by Secret Labs AB.  All rights reserved.
  #
  # See the sre.py file for information on usage and redistribution.
  #
  
+# XXX: show string offset and offending character for all errors
+
+# this module works under 1.5.2 and later.  don't use string methods
  import string, sys
  
  from sre_constants import *
@@ -23,18 +26,18 @@ HEXDIGITS = tuple("0123456789abcdefABCDEF")
  WHITESPACE = tuple(" \t\n\r\v\f")
  
  ESCAPES = {
-    r"\a": (LITERAL, 7),
-    r"\b": (LITERAL, 8),
-    r"\f": (LITERAL, 12),
-    r"\n": (LITERAL, 10),
-    r"\r": (LITERAL, 13),
-    r"\t": (LITERAL, 9),
-    r"\v": (LITERAL, 11),
+    r"\a": (LITERAL, ord("\a")),
+    r"\b": (LITERAL, ord("\b")),
+    r"\f": (LITERAL, ord("\f")),
+    r"\n": (LITERAL, ord("\n")),
+    r"\r": (LITERAL, ord("\r")),
+    r"\t": (LITERAL, ord("\t")),
+    r"\v": (LITERAL, ord("\v")),
      r"\\": (LITERAL, ord("\\"))
  }
  
  CATEGORIES = {
-    r"\A": (AT, AT_BEGINNING), # start of string
+    r"\A": (AT, AT_BEGINNING_STRING), # start of string
      r"\b": (AT, AT_BOUNDARY),
      r"\B": (AT, AT_NON_BOUNDARY),
      r"\d": (IN, [(CATEGORY, CATEGORY_DIGIT)]),
@@ -43,7 +46,7 @@ CATEGORIES = {
      r"\S": (IN, [(CATEGORY, CATEGORY_NOT_SPACE)]),
      r"\w": (IN, [(CATEGORY, CATEGORY_WORD)]),
      r"\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]),
-    r"\Z": (AT, AT_END), # end of string
+    r"\Z": (AT, AT_END_STRING), # end of string
  }
  
  FLAGS = {
@@ -58,18 +61,31 @@ FLAGS = {
      "u": SRE_FLAG_UNICODE,
  }
  
+# figure out best way to convert hex/octal numbers to integers
+try:
+    int("10", 8)
+    atoi = int # 2.0 and later
+except TypeError:
+    atoi = string.atoi # 1.5.2
+
  class Pattern:
      # master pattern object.  keeps track of global attributes
      def __init__(self):
          self.flags = 0
+        self.open = []
          self.groups = 1
          self.groupdict = {}
-    def getgroup(self, name=None):
+    def opengroup(self, name=None):
          gid = self.groups
          self.groups = gid + 1
          if name:
              self.groupdict[name] = gid
+        self.open.append(gid)
          return gid
+    def closegroup(self, gid):
+        self.open.remove(gid)
+    def checkgroup(self, gid):
+        return gid < self.groups and gid not in self.open
  
  class SubPattern:
      # a subpattern, in intermediate form
@@ -208,7 +224,7 @@ def isname(name):
  def _group(escape, groups):
      # check if the escape string represents a valid group
      try:
-        gid = int(escape[1:])
+        gid = atoi(escape[1:])
          if gid and gid < groups:
              return gid
      except ValueError:
@@ -231,13 +247,13 @@ def _class_escape(source, escape):
              escape = escape[2:]
              if len(escape) != 2:
                  raise error, "bogus escape: %s" % repr("\\" + escape)
-            return LITERAL, int(escape, 16) & 0xff
+            return LITERAL, atoi(escape, 16) & 0xff
          elif str(escape[1:2]) in OCTDIGITS:
              # octal escape (up to three digits)
              while source.next in OCTDIGITS and len(escape) < 5:
                  escape = escape + source.get()
              escape = escape[1:]
-            return LITERAL, int(escape, 8) & 0xff
+            return LITERAL, atoi(escape, 8) & 0xff
          if len(escape) == 2:
              return LITERAL, ord(escape[1])
      except ValueError:
@@ -259,12 +275,12 @@ def _escape(source, escape, state):
                  escape = escape + source.get()
              if len(escape) != 4:
                  raise ValueError
-            return LITERAL, int(escape[2:], 16) & 0xff
+            return LITERAL, atoi(escape[2:], 16) & 0xff
          elif escape[1:2] == "0":
              # octal escape
              while source.next in OCTDIGITS and len(escape) < 4:
                  escape = escape + source.get()
-            return LITERAL, int(escape[1:], 8) & 0xff
+            return LITERAL, atoi(escape[1:], 8) & 0xff
          elif escape[1:2] in DIGITS:
              # octal escape *or* decimal group reference (sigh)
              here = source.tell()
@@ -274,10 +290,12 @@ def _escape(source, escape, state):
                      source.next in OCTDIGITS):
                      # got three octal digits; this is an octal escape
                      escape = escape + source.get()
-                    return LITERAL, int(escape[1:], 8) & 0xff
+                    return LITERAL, atoi(escape[1:], 8) & 0xff
              # got at least one decimal digit; this is a group reference
              group = _group(escape, state.groups)
              if group:
+                if not state.checkgroup(group):
+                    raise error, "cannot refer to open group"
                  return GROUPREF, group
              raise ValueError
          if len(escape) == 2:
@@ -402,24 +420,24 @@ def _parse(source, state):
                          else:
                              code2 = LITERAL, ord(this)
                          if code1[0] != LITERAL or code2[0] != LITERAL:
-                            raise error, "illegal range"
+                            raise error, "bad character range"
                          lo = code1[1]
                          hi = code2[1]
                          if hi < lo:
-                            raise error, "illegal range"
+                            raise error, "bad character range"
                          set.append((RANGE, (lo, hi)))
                  else:
                      if code1[0] is IN:
                          code1 = code1[1][0]
                      set.append(code1)
  
-            # FIXME: <fl> move set optimization to compiler!
+            # XXX: <fl> should move set optimization to compiler!
              if len(set)==1 and set[0][0] is LITERAL:
                  subpattern.append(set[0]) # optimization
              elif len(set)==2 and set[0][0] is NEGATE and set[1][0] is LITERAL:
                  subpattern.append((NOT_LITERAL, set[1][1])) # optimization
              else:
-                # FIXME: <fl> add charmap optimization
+                # XXX: <fl> should add charmap optimization here
                  subpattern.append((IN, set))
  
          elif this and this[0] in REPEAT_CHARS:
@@ -428,6 +446,7 @@ def _parse(source, state):
                  min, max = 0, 1
              elif this == "*":
                  min, max = 0, MAXREPEAT
+
              elif this == "+":
                  min, max = 1, MAXREPEAT
              elif this == "{":
@@ -446,17 +465,22 @@ def _parse(source, state):
                      source.seek(here)
                      continue
                  if lo:
-                    min = int(lo)
+                    min = atoi(lo)
                  if hi:
-                    max = int(hi)
-                # FIXME: <fl> check that hi >= lo!
+                    max = atoi(hi)
+                if max < min:
+                    raise error, "bad repeat interval"
              else:
                  raise error, "not supported"
              # figure out which item to repeat
              if subpattern:
                  item = subpattern[-1:]
              else:
+                item = None
+            if not item or (len(item) == 1 and item[0][0] == AT):
                  raise error, "nothing to repeat"
+            if item[0][0] in (MIN_REPEAT, MAX_REPEAT):
+                raise error, "multiple repeat"
              if source.match("?"):
                  subpattern[-1] = (MIN_REPEAT, (min, max, item))
              else:
@@ -485,7 +509,7 @@ def _parse(source, state):
                              name = name + char
                          group = 1
                          if not isname(name):
-                            raise error, "illegal character in group name"
+                            raise error, "bad character in group name"
                      elif source.match("="):
                          # named backreference
                          name = ""
@@ -497,7 +521,7 @@ def _parse(source, state):
                                  break
                              name = name + char
                          if not isname(name):
-                            raise error, "illegal character in group name"
+                            raise error, "bad character in group name"
                          gid = state.groupdict.get(name)
                          if gid is None:
                              raise error, "unknown group name"
@@ -539,6 +563,8 @@ def _parse(source, state):
                      continue
                  else:
                      # flags
+                    if not FLAGS.has_key(source.next):
+                        raise error, "unexpected end of pattern"
                      while FLAGS.has_key(source.next):
                          state.flags = state.flags | FLAGS[source.get()]
              if group:
@@ -547,15 +573,19 @@ def _parse(source, state):
                      # anonymous group
                      group = None
                  else:
-                    group = state.getgroup(name)
+                    group = state.opengroup(name)
                  p = _parse_sub(source, state)
                  if not source.match(")"):
                      raise error, "unbalanced parenthesis"
+                if group is not None:
+                    state.closegroup(group)
                  subpattern.append((SUBPATTERN, (group, p)))
              else:
                  while 1:
                      char = source.get()
-                    if char is None or char == ")":
+                    if char is None:
+                        raise error, "unexpected end of pattern"
+                    if char == ")":
                          break
                      raise error, "unknown extension"
  
@@ -582,6 +612,7 @@ def parse(str, flags=0, pattern=None):
      if pattern is None:
          pattern = Pattern()
      pattern.flags = flags
+    pattern.str = str
  
      p = _parse_sub(source, pattern, 0)
  
@@ -591,7 +622,8 @@ def parse(str, flags=0, pattern=None):
      elif tail:
          raise error, "bogus characters at end of regular expression"
  
-    # p.dump()
+    if flags & SRE_FLAG_DEBUG:
+        p.dump()
  
      if not (flags & SRE_FLAG_VERBOSE) and p.pattern.flags & SRE_FLAG_VERBOSE:
          # the VERBOSE flag was switched on inside the pattern.  to be
@@ -606,6 +638,16 @@ def parse_template(source, pattern):
      s = Tokenizer(source)
      p = []
      a = p.append
+    def literal(literal, p=p):
+        if p and p[-1][0] is LITERAL:
+            p[-1] = LITERAL, p[-1][1] + literal
+        else:
+            p.append((LITERAL, literal))
+    sep = source[:0]
+    if type(sep) is type(""):
+        char = chr
+    else:
+        char = unichr
      while 1:
          this = s.get()
          if this is None:
@@ -625,10 +667,10 @@ def parse_template(source, pattern):
                  if not name:
                      raise error, "bad group name"
                  try:
-                    index = int(name)
+                    index = atoi(name)
                  except ValueError:
                      if not isname(name):
-                        raise error, "illegal character in group name"
+                        raise error, "bad character in group name"
                      try:
                          index = pattern.groupindex[name]
                      except KeyError:
@@ -641,7 +683,7 @@ def parse_template(source, pattern):
                      if group:
                          if (s.next not in DIGITS or
                              not _group(this + s.next, pattern.groups+1)):
-                            code = MARK, int(group)
+                            code = MARK, group
                              break
                      elif s.next in OCTDIGITS:
                          this = this + s.get()
@@ -649,34 +691,42 @@ def parse_template(source, pattern):
                          break
                  if not code:
                      this = this[1:]
-                    code = LITERAL, int(this[-6:], 8) & 0xff
-                a(code)
+                    code = LITERAL, char(atoi(this[-6:], 8) & 0xff)
+                if code[0] is LITERAL:
+                    literal(code[1])
+                else:
+                    a(code)
              else:
                  try:
-                    a(ESCAPES[this])
+                    this = char(ESCAPES[this][1])
                  except KeyError:
-                    for c in this:
-                        a((LITERAL, ord(c)))
+                    pass
+                literal(this)
          else:
-            a((LITERAL, ord(this)))
-    return p
+            literal(this)
+    # convert template to groups and literals lists
+    i = 0
+    groups = []
+    literals = []
+    for c, s in p:
+        if c is MARK:
+            groups.append((i, s))
+            literals.append(None)
+        else:
+            literals.append(s)
+        i = i + 1
+    return groups, literals
  
  def expand_template(template, match):
-    # FIXME: <fl> this is sooooo slow.  drop in the slicelist
-    # code instead
-    p = []
-    a = p.append
+    g = match.group
      sep = match.string[:0]
-    if type(sep) is type(""):
-        char = chr
-    else:
-        char = unichr
-    for c, s in template:
-        if c is LITERAL:
-            a(char(s))
-        elif c is MARK:
-            s = match.group(s)
+    groups, literals = template
+    literals = literals[:]
+    try:
+        for index, group in groups:
+            literals[index] = s = g(group)
              if s is None:
-                raise error, "empty group"
-            a(s)
-    return string.join(p, sep)
+                raise IndexError
+    except IndexError:
+        raise error, "empty group"
+    return string.join(literals, sep)
diff --git a/Misc/NEWS b/Misc/NEWS

index e0a85dde6a149ad5ad3aa58624407b8a6a9a27d4..6a88fa778c300d1ab484b229034a8f3079193509 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -13,6 +13,8 @@ http://sourceforge.net/bugs/?func=detailbug&bug_id=<id>&group_id=5470
  More recent bugs are accessed as
  http://sourceforge.net/tracker/index.php?func=detail&aid=<id>&group_id=5470&atid=105470
  
+- Brought SRE up to date with Python 2.1
+
  - #117278, #117167: _tkinter
  
  - #116172, curses module fails to build on SGI, _curses
diff --git a/Modules/_sre.c b/Modules/_sre.c

index b72b8b2c1a6de4903f226abdfdc148f3b682772a..308b7260b57f96fa7c4e3fd93e02f03849cc397d 100644 (file)
--- a/Modules/_sre.c
+++ b/Modules/_sre.c
@@ -5,14 +5,14 @@
   *
   * partial history:
   * 1999-10-24 fl  created (based on existing template matcher code)
- * 2000-03-06 fl  first alpha, sort of (0.5)
- * 2000-06-30 fl  added fast search optimization (0.9.3)
- * 2000-06-30 fl  added assert (lookahead) primitives, etc (0.9.4)
- * 2000-07-02 fl  added charset optimizations, etc (0.9.5)
+ * 2000-03-06 fl  first alpha, sort of
+ * 2000-06-30 fl  added fast search optimization
+ * 2000-06-30 fl  added assert (lookahead) primitives, etc
+ * 2000-07-02 fl  added charset optimizations, etc
   * 2000-07-03 fl  store code in pattern object, lookbehind, etc
   * 2000-07-08 fl  added regs attribute
- * 2000-07-21 fl  reset lastindex in scanner methods (0.9.6)
- * 2000-08-01 fl  fixes for 1.6b1 (0.9.8)
+ * 2000-07-21 fl  reset lastindex in scanner methods
+ * 2000-08-01 fl  fixes for 1.6b1
   * 2000-08-03 fl  added recursion limit
   * 2000-08-07 fl  use PyOS_CheckStack() if available
   * 2000-08-08 fl  changed findall to return empty strings instead of None
@@ -21,8 +21,15 @@
   * 2000-09-20 fl  added expand method
   * 2000-09-21 fl  don't use the buffer interface for unicode strings
   * 2000-10-03 fl  fixed assert_not primitive; support keyword arguments
+ * 2000-10-24 fl  really fixed assert_not; reset groups in findall
+ * 2000-12-21 fl  fixed memory leak in groupdict
+ * 2001-01-02 fl  properly reset pointer after failed assertion in MIN_UNTIL
+ * 2001-01-15 fl  avoid recursion for MIN_UNTIL; fixed uppercase literal bug
+ * 2001-01-16 fl  fixed memory leak in pattern destructor
+ * 2001-03-20 fl  lots of fixes for 2.1b2
+ * 2001-04-15 fl  export copyright as Python attribute, not global
   *
- * Copyright (c) 1997-2000 by Secret Labs AB.  All rights reserved.
+ * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
   *
   * This version of the SRE library can be redistributed under CNRI's
   * Python 1.6 license.  For any other use, please contact Secret Labs
@@ -35,7 +42,8 @@
  
  #ifndef SRE_RECURSIVE
  
-char copyright[] = " SRE 0.9.8 Copyright (c) 1997-2000 by Secret Labs AB ";
+static char copyright[] =
+    " SRE 2.1b2 Copyright (c) 1997-2001 by Secret Labs AB ";
  
  #include "Python.h"
  
@@ -44,7 +52,9 @@ char copyright[] = " SRE 0.9.8 Copyright (c) 1997-2000 by Secret Labs AB ";
  #include <ctype.h>
  
  /* name of this module, minus the leading underscore */
-#define MODULE "sre"
+#if !defined(SRE_MODULE)
+#define SRE_MODULE "sre"
+#endif
  
  /* defining this one enables tracing */
  #undef VERBOSE
@@ -76,6 +86,10 @@ char copyright[] = " SRE 0.9.8 Copyright (c) 1997-2000 by Secret Labs AB ";
  /* enables aggressive inlining (always on for Visual C) */
  #undef USE_INLINE
  
+#if PY_VERSION_HEX < 0x01060000
+#define PyObject_DEL(op) PyMem_DEL((op))
+#endif
+
  /* -------------------------------------------------------------------- */
  
  #if defined(_MSC_VER)
@@ -130,11 +144,6 @@ static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
  106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
  120, 121, 122, 123, 124, 125, 126, 127 };
  
-static unsigned int sre_lower(unsigned int ch)
-{
-    return ((ch) < 128 ? sre_char_lower[ch] : ch);
-}
-
  #define SRE_IS_DIGIT(ch)\
      ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
  #define SRE_IS_SPACE(ch)\
@@ -146,30 +155,39 @@ static unsigned int sre_lower(unsigned int ch)
  #define SRE_IS_WORD(ch)\
      ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
  
-/* locale-specific character predicates */
-
-static unsigned int sre_lower_locale(unsigned int ch)
+static unsigned int sre_lower(unsigned int ch)
  {
-    return ((ch) < 256 ? tolower((ch)) : ch);
+    return ((ch) < 128 ? sre_char_lower[ch] : ch);
  }
+
+/* locale-specific character predicates */
+
  #define SRE_LOC_IS_DIGIT(ch) ((ch) < 256 ? isdigit((ch)) : 0)
  #define SRE_LOC_IS_SPACE(ch) ((ch) < 256 ? isspace((ch)) : 0)
  #define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
  #define SRE_LOC_IS_ALNUM(ch) ((ch) < 256 ? isalnum((ch)) : 0)
  #define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
  
+static unsigned int sre_lower_locale(unsigned int ch)
+{
+    return ((ch) < 256 ? tolower((ch)) : ch);
+}
+
  /* unicode-specific character predicates */
  
  #if defined(HAVE_UNICODE)
-static unsigned int sre_lower_unicode(unsigned int ch)
-{
-    return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
-}
+
  #define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDIGIT((Py_UNICODE)(ch))
  #define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
  #define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
  #define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
  #define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')
+
+static unsigned int sre_lower_unicode(unsigned int ch)
+{
+    return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
+}
+
  #endif
  
  LOCAL(int)
@@ -216,6 +234,23 @@ sre_category(SRE_CODE category, unsigned int ch)
          return SRE_UNI_IS_LINEBREAK(ch);
      case SRE_CATEGORY_UNI_NOT_LINEBREAK:
          return !SRE_UNI_IS_LINEBREAK(ch);
+#else
+    case SRE_CATEGORY_UNI_DIGIT:
+        return SRE_IS_DIGIT(ch);
+    case SRE_CATEGORY_UNI_NOT_DIGIT:
+        return !SRE_IS_DIGIT(ch);
+    case SRE_CATEGORY_UNI_SPACE:
+        return SRE_IS_SPACE(ch);
+    case SRE_CATEGORY_UNI_NOT_SPACE:
+        return !SRE_IS_SPACE(ch);
+    case SRE_CATEGORY_UNI_WORD:
+        return SRE_LOC_IS_WORD(ch);
+    case SRE_CATEGORY_UNI_NOT_WORD:
+        return !SRE_LOC_IS_WORD(ch);
+    case SRE_CATEGORY_UNI_LINEBREAK:
+        return SRE_IS_LINEBREAK(ch);
+    case SRE_CATEGORY_UNI_NOT_LINEBREAK:
+        return !SRE_IS_LINEBREAK(ch);
  #endif
      }
      return 0;
@@ -354,6 +389,7 @@ SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
      switch (at) {
  
      case SRE_AT_BEGINNING:
+    case SRE_AT_BEGINNING_STRING:
          return ((void*) ptr == state->beginning);
  
      case SRE_AT_BEGINNING_LINE:
@@ -369,6 +405,9 @@ SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
          return ((void*) ptr == state->end ||
                  SRE_IS_LINEBREAK((int) ptr[0]));
  
+    case SRE_AT_END_STRING:
+        return ((void*) ptr == state->end);
+
      case SRE_AT_BOUNDARY:
          if (state->beginning == state->end)
              return 0;
@@ -386,6 +425,42 @@ SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
          this = ((void*) ptr < state->end) ?
              SRE_IS_WORD((int) ptr[0]) : 0;
          return this == that;
+
+    case SRE_AT_LOC_BOUNDARY:
+        if (state->beginning == state->end)
+            return 0;
+        that = ((void*) ptr > state->beginning) ?
+            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
+        this = ((void*) ptr < state->end) ?
+            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
+        return this != that;
+
+    case SRE_AT_LOC_NON_BOUNDARY:
+        if (state->beginning == state->end)
+            return 0;
+        that = ((void*) ptr > state->beginning) ?
+            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
+        this = ((void*) ptr < state->end) ?
+            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
+        return this == that;
+
+    case SRE_AT_UNI_BOUNDARY:
+        if (state->beginning == state->end)
+            return 0;
+        that = ((void*) ptr > state->beginning) ?
+            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
+        this = ((void*) ptr < state->end) ?
+            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
+        return this != that;
+
+    case SRE_AT_UNI_NON_BOUNDARY:
+        if (state->beginning == state->end)
+            return 0;
+        that = ((void*) ptr > state->beginning) ?
+            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
+        this = ((void*) ptr < state->end) ?
+            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
+        return this == that;
      }
  
      return 0;
@@ -783,13 +858,13 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
              /* <ASSERT_NOT> <skip> <back> <pattern> */
              TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern, ptr, pattern[1]));
              state->ptr = ptr - pattern[1];
-            if (state->ptr < state->beginning)
-                return 0;
-            i = SRE_MATCH(state, pattern + 2, level + 1);
-            if (i < 0)
-                return i;
-            if (i)
-                return 0;
+            if (state->ptr >= state->beginning) {
+                i = SRE_MATCH(state, pattern + 2, level + 1);
+                if (i < 0)
+                    return i;
+                if (i)
+                    return 0;
+            }
              pattern += pattern[0];
              break;
  
@@ -825,7 +900,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
              /* this operator only works if the repeated item is
                 exactly one character wide, and we're not already
                 collecting backtracking points.  for other cases,
-               use the MAX_REPEAT operator instead */
+               use the MAX_REPEAT operator */
  
              /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
  
@@ -899,7 +974,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
  
          case SRE_OP_REPEAT:
              /* create repeat context.  all the hard work is done
-               by the UNTIL operator */
+               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
              /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
              TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
                     pattern[1], pattern[2]));
@@ -973,6 +1048,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
              if (i)
                  return i;
              state->repeat = rp;
+            state->ptr = ptr;
              return 0;
  
          case SRE_OP_MIN_UNTIL:
@@ -985,7 +1061,8 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
  
              count = rp->count + 1;
  
-            TRACE(("|%p|%p|MIN_UNTIL %d\n", pattern, ptr, count));
+            TRACE(("|%p|%p|MIN_UNTIL %d %p\n", pattern, ptr, count,
+                   rp->pattern));
  
              state->ptr = ptr;
  
@@ -1003,11 +1080,23 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
  
              /* see if the tail matches */
              state->repeat = rp->prev;
-            i = SRE_MATCH(state, pattern, level + 1);
+            /* FIXME: the following fix doesn't always work (#133283) */
+            if (0 && rp->pattern[2] == 65535) {
+                /* unbounded repeat */
+                for (;;) {
+                    i = SRE_MATCH(state, pattern, level + 1);
+                    if (i || ptr >= end)
+                        break;
+                    state->ptr = ++ptr;
+                }
+            } else
+                i = SRE_MATCH(state, pattern, level + 1);
              if (i) {
                  /* free(rp); */
                  return i;
              }
+
+            state->ptr = ptr;
              state->repeat = rp;
  
              if (count >= rp->pattern[2] && rp->pattern[2] != 65535)
@@ -1019,6 +1108,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
              if (i)
                  return i;
              rp->count = count - 1;
+            state->ptr = ptr;
              return 0;
  
          default:
@@ -1185,35 +1275,26 @@ _compile(PyObject* self_, PyObject* args)
      int groups = 0;
      PyObject* groupindex = NULL;
      PyObject* indexgroup = NULL;
-    if (!PyArg_ParseTuple(args, "OiO|iOO", &pattern, &flags, &code,
-                          &groups, &groupindex, &indexgroup))
-        return NULL;
-
-    code = PySequence_Fast(code, "code argument must be a sequence");
-    if (!code)
+    if (!PyArg_ParseTuple(args, "OiO!|iOO", &pattern, &flags,
+                          &PyList_Type, &code, &groups,
+                          &groupindex, &indexgroup))
          return NULL;
  
-#if PY_VERSION_HEX >= 0x01060000
-    n = PySequence_Size(code);
-#else
-    n = PySequence_Length(code);
-#endif
+    n = PyList_GET_SIZE(code);
  
-    self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, 100*n);
-    if (!self) {
-        Py_DECREF(code);
+    self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
+    if (!self)
          return NULL;
-    }
  
      for (i = 0; i < n; i++) {
-        PyObject *o = PySequence_Fast_GET_ITEM(code, i);
+        PyObject *o = PyList_GET_ITEM(code, i);
          self->code[i] = (SRE_CODE) PyInt_AsLong(o);
      }
  
-    Py_DECREF(code);
-
-    if (PyErr_Occurred())
+    if (PyErr_Occurred()) {
+        PyObject_DEL(self);
          return NULL;
+    }
  
      Py_INCREF(pattern);
      self->pattern = pattern;
@@ -1245,9 +1326,11 @@ sre_getlower(PyObject* self, PyObject* args)
          return NULL;
      if (flags & SRE_FLAG_LOCALE)
          return Py_BuildValue("i", sre_lower_locale(character));
-#if defined(HAVE_UNICODE)
      if (flags & SRE_FLAG_UNICODE)
+#if defined(HAVE_UNICODE)
          return Py_BuildValue("i", sre_lower_unicode(character));
+#else
+        return Py_BuildValue("i", sre_lower_locale(character));
  #endif
      return Py_BuildValue("i", sre_lower(character));
  }
@@ -1355,9 +1438,11 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
  
      if (pattern->flags & SRE_FLAG_LOCALE)
          state->lower = sre_lower_locale;
-#if defined(HAVE_UNICODE)
      else if (pattern->flags & SRE_FLAG_UNICODE)
+#if defined(HAVE_UNICODE)
          state->lower = sre_lower_unicode;
+#else
+        state->lower = sre_lower_locale;
  #endif
      else
          state->lower = sre_lower;
@@ -1495,7 +1580,7 @@ pattern_scanner(PatternObject* pattern, PyObject* args)
  
      string = state_init(&self->state, pattern, string, start, end);
      if (!string) {
-        PyObject_Del(self);
+        PyObject_DEL(self);
          return NULL;
      }
  
@@ -1510,6 +1595,7 @@ pattern_dealloc(PatternObject* self)
  {
      Py_XDECREF(self->pattern);
      Py_XDECREF(self->groupindex);
+    Py_XDECREF(self->indexgroup);
      PyObject_DEL(self);
  }
  
@@ -1593,7 +1679,7 @@ call(char* function, PyObject* args)
      PyObject* func;
      PyObject* result;
  
-    name = PyString_FromString(MODULE);
+    name = PyString_FromString(SRE_MODULE);
      if (!name)
          return NULL;
      module = PyImport_Import(name);
@@ -1680,6 +1766,8 @@ pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
  
          PyObject* item;
          
+        state_reset(&state);
+
          state.ptr = state.start;
  
          if (state.charsize == 1) {
@@ -1962,7 +2050,7 @@ match_groupdict(MatchObject* self, PyObject* args, PyObject* kw)
  
      PyObject* def = Py_None;
      static char* kwlist[] = { "default", NULL };
-    if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def))
+    if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groupdict", kwlist, &def))
          return NULL;
  
      result = PyDict_New();
@@ -1970,34 +2058,35 @@ match_groupdict(MatchObject* self, PyObject* args, PyObject* kw)
          return result;
  
      keys = PyMapping_Keys(self->pattern->groupindex);
-    if (!keys) {
-        Py_DECREF(result);
-        return NULL;
-    }
+    if (!keys)
+        goto failed;
  
      for (index = 0; index < PyList_GET_SIZE(keys); index++) {
+        int status;
          PyObject* key;
-        PyObject* item;
+        PyObject* value;
          key = PyList_GET_ITEM(keys, index);
-        if (!key) {
-            Py_DECREF(keys);
-            Py_DECREF(result);
-            return NULL;
-        }
-        item = match_getslice(self, key, def);
-        if (!item) {
+        if (!key)
+            goto failed;
+        value = match_getslice(self, key, def);
+        if (!value) {
              Py_DECREF(key);
-            Py_DECREF(keys);
-            Py_DECREF(result);
-            return NULL;
+            goto failed;
          }
-        /* FIXME: <fl> this can fail, right? */
-        PyDict_SetItem(result, key, item);
+        status = PyDict_SetItem(result, key, value);
+        Py_DECREF(value);
+        if (status < 0)
+            goto failed;
      }
  
      Py_DECREF(keys);
  
      return result;
+
+failed:
+    Py_DECREF(keys);
+    Py_DECREF(result);
+    return NULL;
  }
  
  static PyObject*
@@ -2324,17 +2413,27 @@ static PyMethodDef _functions[] = {
      {NULL, NULL}
  };
  
-void
-#if defined(WIN32)
-__declspec(dllexport)
-#endif
+DL_EXPORT(void)
  init_sre(void)
  {
+    PyObject* m;
+    PyObject* d;
+
      /* Patch object types */
      Pattern_Type.ob_type = Match_Type.ob_type =
          Scanner_Type.ob_type = &PyType_Type;
  
-    Py_InitModule("_" MODULE, _functions);
+    m = Py_InitModule("_" SRE_MODULE, _functions);
+    d = PyModule_GetDict(m);
+
+    PyDict_SetItemString(
+        d, "MAGIC", (PyObject*) PyInt_FromLong(SRE_MAGIC)
+        );
+
+    PyDict_SetItemString(
+        d, "copyright", (PyObject*) PyString_FromString(copyright)
+        );
+
  }
  
  #endif /* !defined(SRE_RECURSIVE) */
diff --git a/Modules/sre_constants.h b/Modules/sre_constants.h

index 5c55c3dbd91721114b83bd45618fbea823f14568..73bcb349711d020770d84c867a09051381424ba3 100644 (file)
--- a/Modules/sre_constants.h
+++ b/Modules/sre_constants.h
@@ -6,11 +6,12 @@
   * NOTE: This file is generated by sre_constants.py.  If you need
   * to change anything in here, edit sre_constants.py and run it.
   *
- * Copyright (c) 1997-2000 by Secret Labs AB.  All rights reserved.
+ * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
   *
   * See the _sre.c file for information on usage and redistribution.
   */
  
+#define SRE_MAGIC 20010320
  #define SRE_OP_FAILURE 0
  #define SRE_OP_SUCCESS 1
  #define SRE_OP_ANY 2
@@ -42,10 +43,16 @@
  #define SRE_OP_SUBPATTERN 28
  #define SRE_AT_BEGINNING 0
  #define SRE_AT_BEGINNING_LINE 1
-#define SRE_AT_BOUNDARY 2
-#define SRE_AT_NON_BOUNDARY 3
-#define SRE_AT_END 4
-#define SRE_AT_END_LINE 5
+#define SRE_AT_BEGINNING_STRING 2
+#define SRE_AT_BOUNDARY 3
+#define SRE_AT_NON_BOUNDARY 4
+#define SRE_AT_END 5
+#define SRE_AT_END_LINE 6
+#define SRE_AT_END_STRING 7
+#define SRE_AT_LOC_BOUNDARY 8
+#define SRE_AT_LOC_NON_BOUNDARY 9
+#define SRE_AT_UNI_BOUNDARY 10
+#define SRE_AT_UNI_NON_BOUNDARY 11
  #define SRE_CATEGORY_DIGIT 0
  #define SRE_CATEGORY_NOT_DIGIT 1
  #define SRE_CATEGORY_SPACE 2
author	Guido van Rossum <guido@python.org>
	Wed, 13 Jun 2001 15:15:02 +0000 (15:15 +0000)
committer	Guido van Rossum <guido@python.org>
	Wed, 13 Jun 2001 15:15:02 +0000 (15:15 +0000)
Lib/sre.py		patch \| blob \| blame \| history
Lib/sre_compile.py		patch \| blob \| blame \| history
Lib/sre_constants.py		patch \| blob \| blame \| history
Lib/sre_parse.py		patch \| blob \| blame \| history
Misc/NEWS		patch \| blob \| blame \| history
Modules/_sre.c		patch \| blob \| blame \| history
Modules/sre_constants.h		patch \| blob \| blame \| history