POSSESSIVE_REPEAT: (POSSESSIVE_REPEAT, SUCCESS, POSSESSIVE_REPEAT_ONE),
}
+_CHARSET_ALL = [(NEGATE, None)]
+
def _combine_flags(flags, add_flags, del_flags,
TYPE_FLAGS=_parser.TYPE_FLAGS):
if add_flags & TYPE_FLAGS:
code[skip] = _len(code) - skip
elif op is IN:
charset, hascased = _optimize_charset(av, iscased, tolower, fixes)
- if flags & SRE_FLAG_IGNORECASE and flags & SRE_FLAG_LOCALE:
- emit(IN_LOC_IGNORE)
- elif not hascased:
- emit(IN)
- elif not fixes: # ascii
- emit(IN_IGNORE)
+ if not charset:
+ emit(FAILURE)
+ elif charset == _CHARSET_ALL:
+ emit(ANY_ALL)
else:
- emit(IN_UNI_IGNORE)
- skip = _len(code); emit(0)
- _compile_charset(charset, flags, code)
- code[skip] = _len(code) - skip
+ if flags & SRE_FLAG_IGNORECASE and flags & SRE_FLAG_LOCALE:
+ emit(IN_LOC_IGNORE)
+ elif not hascased:
+ emit(IN)
+ elif not fixes: # ascii
+ emit(IN_IGNORE)
+ else:
+ emit(IN_UNI_IGNORE)
+ skip = _len(code); emit(0)
+ _compile_charset(charset, flags, code)
+ code[skip] = _len(code) - skip
elif op is ANY:
if flags & SRE_FLAG_DOTALL:
emit(ANY_ALL)
charmap[i] = 1
elif op is NEGATE:
out.append((op, av))
+ elif op is CATEGORY and tail and (CATEGORY, CH_NEGATE[av]) in tail:
+ # Optimize [\s\S] etc.
+ out = [] if out else _CHARSET_ALL
+ return out, False
else:
tail.append((op, av))
except IndexError:
# look for a literal prefix
prefix = []
prefix_skip = 0
- charset = [] # not used
+ charset = None # not used
if not (flags & SRE_FLAG_IGNORECASE and flags & SRE_FLAG_LOCALE):
# look for literal prefix
prefix, prefix_skip, got_all = _get_literal_prefix(pattern, flags)
# if no prefix, look for charset prefix
if not prefix:
charset = _get_charset_prefix(pattern, flags)
+ if charset:
+ charset, hascased = _optimize_charset(charset)
+ assert not hascased
+ if charset == _CHARSET_ALL:
+ charset = None
## if prefix:
## print("*** PREFIX", prefix, prefix_skip)
## if charset:
# generate overlap table
code.extend(_generate_overlap_table(prefix))
elif charset:
- charset, hascased = _optimize_charset(charset)
- assert not hascased
_compile_charset(charset, flags, code)
code[skip] = len(code) - skip
def test_fail(self):
self.assertEqual(re.search(r'12(?!)|3', '123')[0], '3')
+ def test_character_set_any(self):
+ # The union of complementary character sets mathes any character
+ # and is equivalent to "(?s:.)".
+ s = '1x\n'
+ for p in r'[\s\S]', r'[\d\D]', r'[\w\W]', r'[\S\s]', r'\s|\S':
+ with self.subTest(pattern=p):
+ self.assertEqual(re.findall(p, s), list(s))
+ self.assertEqual(re.fullmatch('(?:' + p + ')+', s).group(), s)
+
+ def test_character_set_none(self):
+ # Negation of the union of complementary character sets does not match
+ # any character.
+ s = '1x\n'
+ for p in r'[^\s\S]', r'[^\d\D]', r'[^\w\W]', r'[^\S\s]':
+ with self.subTest(pattern=p):
+ self.assertIsNone(re.search(p, s))
+ self.assertIsNone(re.search('(?s:.)' + p, s))
+
def get_debug_out(pat):
with captured_stdout() as out: