p.terminate()
p.join()
+ def test_fail(self):
+ self.assertEqual(re.search(r'12(?!)|3', '123')[0], '3')
+
+ def test_character_set_any(self):
+ # The union of complementary character sets matches any character
+ # and is equivalent to "(?s:.)".
+ s = '1x\n'
+ for p in r'[\s\S]', r'[\d\D]', r'[\w\W]', r'[\S\s]', r'\s|\S':
+ with self.subTest(pattern=p):
+ self.assertEqual(re.findall(p, s), list(s))
+ self.assertEqual(re.fullmatch('(?:' + p + ')+', s).group(), s)
+
+ def test_character_set_none(self):
+ # Negation of the union of complementary character sets does not match
+ # any character.
+ s = '1x\n'
+ for p in r'[^\s\S]', r'[^\d\D]', r'[^\w\W]', r'[^\S\s]':
+ with self.subTest(pattern=p):
+ self.assertIsNone(re.search(p, s))
+ self.assertIsNone(re.search('(?s:.)' + p, s))
+
+ def check_interrupt(self, pattern, string, maxcount):
+ class Interrupt(Exception):
+ pass
+ p = re.compile(pattern)
+ for n in range(maxcount):
+ try:
+ p._fail_after(n, Interrupt)
+ p.match(string)
+ return n
+ except Interrupt:
+ pass
+ finally:
+ p._fail_after(-1, None)
+
+ @unittest.skipUnless(hasattr(re.Pattern, '_fail_after'), 'requires debug build')
+ def test_memory_leaks(self):
+ self.check_interrupt(r'(.)*:', 'abc:', 100)
+ self.check_interrupt(r'([^:])*?:', 'abc:', 100)
+ self.check_interrupt(r'([^:])*+:', 'abc:', 100)
+ self.check_interrupt(r'(.){2,4}:', 'abc:', 100)
+ self.check_interrupt(r'([^:]){2,4}?:', 'abc:', 100)
+ self.check_interrupt(r'([^:]){2,4}+:', 'abc:', 100)
+
def get_debug_out(pat):
with captured_stdout() as out:
--- /dev/null
+Fix memory leaks when :mod:`regular expression <re>` matching terminates
+abruptly, either because of a signal or because memory allocation fails.
#define _SRE_SRE_PATTERN___DEEPCOPY___METHODDEF \
{"__deepcopy__", (PyCFunction)_sre_SRE_Pattern___deepcopy__, METH_O, _sre_SRE_Pattern___deepcopy____doc__},
+#if defined(Py_DEBUG)
+
+PyDoc_STRVAR(_sre_SRE_Pattern__fail_after__doc__,
+"_fail_after($self, count, exception, /)\n"
+"--\n"
+"\n"
+"For debugging.");
+
+#define _SRE_SRE_PATTERN__FAIL_AFTER_METHODDEF \
+ {"_fail_after", _PyCFunction_CAST(_sre_SRE_Pattern__fail_after), METH_FASTCALL, _sre_SRE_Pattern__fail_after__doc__},
+
+static PyObject *
+_sre_SRE_Pattern__fail_after_impl(PatternObject *self, int count,
+ PyObject *exception);
+
+static PyObject *
+_sre_SRE_Pattern__fail_after(PatternObject *self, PyObject *const *args, Py_ssize_t nargs)
+{
+ PyObject *return_value = NULL;
+ int count;
+ PyObject *exception;
+
+ if (!_PyArg_CheckPositional("_fail_after", nargs, 2, 2)) {
+ goto exit;
+ }
+ count = _PyLong_AsInt(args[0]);
+ if (count == -1 && PyErr_Occurred()) {
+ goto exit;
+ }
+ exception = args[1];
+ return_value = _sre_SRE_Pattern__fail_after_impl(self, count, exception);
+
+exit:
+ return return_value;
+}
+
+#endif /* defined(Py_DEBUG) */
+
PyDoc_STRVAR(_sre_compile__doc__,
"compile($module, /, pattern, flags, code, groups, groupindex,\n"
" indexgroup)\n"
}
return _sre_SRE_Scanner_search_impl(self, cls);
}
-/*[clinic end generated code: output=045de53cfe02dee0 input=a9049054013a1b77]*/
+
+#ifndef _SRE_SRE_PATTERN__FAIL_AFTER_METHODDEF
+ #define _SRE_SRE_PATTERN__FAIL_AFTER_METHODDEF
+#endif /* !defined(_SRE_SRE_PATTERN__FAIL_AFTER_METHODDEF) */
+/*[clinic end generated code: output=2165ecf43a7c20e8 input=a9049054013a1b77]*/
return 0;
}
+/* memory pool functions for SRE_REPEAT, this can avoid memory
+ leak when SRE(match) function terminates abruptly.
+ state->repeat_pool_used is a doubly-linked list, so that we
+ can remove a SRE_REPEAT node from it.
+ state->repeat_pool_unused is a singly-linked list, we put/get
+ node at the head. */
+static SRE_REPEAT *
+repeat_pool_malloc(SRE_STATE *state)
+{
+ SRE_REPEAT *repeat;
+
+ if (state->repeat_pool_unused) {
+ /* remove from unused pool (singly-linked list) */
+ repeat = state->repeat_pool_unused;
+ state->repeat_pool_unused = repeat->pool_next;
+ }
+ else {
+ repeat = PyObject_Malloc(sizeof(SRE_REPEAT));
+ if (!repeat) {
+ return NULL;
+ }
+ }
+
+ /* add to used pool (doubly-linked list) */
+ SRE_REPEAT *temp = state->repeat_pool_used;
+ if (temp) {
+ temp->pool_prev = repeat;
+ }
+ repeat->pool_prev = NULL;
+ repeat->pool_next = temp;
+ state->repeat_pool_used = repeat;
+
+ return repeat;
+}
+
+static void
+repeat_pool_free(SRE_STATE *state, SRE_REPEAT *repeat)
+{
+ SRE_REPEAT *prev = repeat->pool_prev;
+ SRE_REPEAT *next = repeat->pool_next;
+
+ /* remove from used pool (doubly-linked list) */
+ if (prev) {
+ prev->pool_next = next;
+ }
+ else {
+ state->repeat_pool_used = next;
+ }
+ if (next) {
+ next->pool_prev = prev;
+ }
+
+ /* add to unused pool (singly-linked list) */
+ repeat->pool_next = state->repeat_pool_unused;
+ state->repeat_pool_unused = repeat;
+}
+
+static void
+repeat_pool_clear(SRE_STATE *state)
+{
+ /* clear used pool */
+ SRE_REPEAT *next = state->repeat_pool_used;
+ state->repeat_pool_used = NULL;
+ while (next) {
+ SRE_REPEAT *temp = next;
+ next = temp->pool_next;
+ PyObject_Free(temp);
+ }
+
+ /* clear unused pool */
+ next = state->repeat_pool_unused;
+ state->repeat_pool_unused = NULL;
+ while (next) {
+ SRE_REPEAT *temp = next;
+ next = temp->pool_next;
+ PyObject_Free(temp);
+ }
+}
+
/* generate 8-bit version */
#define SRE_CHAR Py_UCS1
state->pos = start;
state->endpos = end;
+#ifdef Py_DEBUG
+ state->fail_after_count = pattern->fail_after_count;
+ state->fail_after_exc = pattern->fail_after_exc; // borrowed ref
+#endif
+
return string;
err:
/* We add an explicit cast here because MSVC has a bug when
/* See above PyMem_Del for why we explicitly cast here. */
PyMem_Free((void*) state->mark);
state->mark = NULL;
+ /* SRE_REPEAT pool */
+ repeat_pool_clear(state);
}
/* calculate offset from start of string */
Py_VISIT(self->groupindex);
Py_VISIT(self->indexgroup);
Py_VISIT(self->pattern);
+#ifdef Py_DEBUG
+ Py_VISIT(self->fail_after_exc);
+#endif
return 0;
}
Py_CLEAR(self->groupindex);
Py_CLEAR(self->indexgroup);
Py_CLEAR(self->pattern);
+#ifdef Py_DEBUG
+ Py_CLEAR(self->fail_after_exc);
+#endif
return 0;
}
Py_ssize_t status;
PyObject *match;
- if (!state_init(&state, (PatternObject *)self, string, pos, endpos))
+ if (!state_init(&state, self, string, pos, endpos))
return NULL;
state.ptr = state.start;
return Py_NewRef(self);
}
+#ifdef Py_DEBUG
+/*[clinic input]
+_sre.SRE_Pattern._fail_after
+
+ count: int
+ exception: object
+ /
+
+For debugging.
+[clinic start generated code]*/
+
+static PyObject *
+_sre_SRE_Pattern__fail_after_impl(PatternObject *self, int count,
+ PyObject *exception)
+/*[clinic end generated code: output=9a6bf12135ac50c2 input=ef80a45c66c5499d]*/
+{
+ self->fail_after_count = count;
+ Py_INCREF(exception);
+ Py_XSETREF(self->fail_after_exc, exception);
+ Py_RETURN_NONE;
+}
+#endif /* Py_DEBUG */
+
static PyObject *
pattern_repr(PatternObject *obj)
{
self->pattern = NULL;
self->groupindex = NULL;
self->indexgroup = NULL;
+#ifdef Py_DEBUG
+ self->fail_after_count = -1;
+ self->fail_after_exc = NULL;
+#endif
self->codesize = n;
if (!match)
return NULL;
- match->pattern = (PatternObject*)Py_NewRef(pattern);
+ Py_INCREF(pattern);
+ match->pattern = pattern;
match->string = Py_NewRef(state->string);
return NULL;
}
- match = pattern_new_match(module_state, (PatternObject*) self->pattern,
+ match = pattern_new_match(module_state, self->pattern,
state, status);
if (status == 0)
return NULL;
}
- match = pattern_new_match(module_state, (PatternObject*) self->pattern,
+ match = pattern_new_match(module_state, self->pattern,
state, status);
if (status == 0)
return NULL;
}
- scanner->pattern = Py_NewRef(self);
+ Py_INCREF(self);
+ scanner->pattern = self;
PyObject_GC_Track(scanner);
return (PyObject*) scanner;
_SRE_SRE_PATTERN_SCANNER_METHODDEF
_SRE_SRE_PATTERN___COPY___METHODDEF
_SRE_SRE_PATTERN___DEEPCOPY___METHODDEF
+ _SRE_SRE_PATTERN__FAIL_AFTER_METHODDEF
{"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS,
PyDoc_STR("See PEP 585")},
{NULL, NULL}
int flags; /* flags used when compiling pattern source */
PyObject *weakreflist; /* List of weak references */
int isbytes; /* pattern type (1 - bytes, 0 - string, -1 - None) */
+#ifdef Py_DEBUG
+ /* for simulation of user interruption */
+ int fail_after_count;
+ PyObject *fail_after_exc;
+#endif
/* pattern code */
Py_ssize_t codesize;
SRE_CODE code[1];
const SRE_CODE* pattern; /* points to REPEAT operator arguments */
const void* last_ptr; /* helper to check for infinite loops */
struct SRE_REPEAT_T *prev; /* points to previous repeat context */
+ /* for SRE_REPEAT pool */
+ struct SRE_REPEAT_T *pool_prev;
+ struct SRE_REPEAT_T *pool_next;
} SRE_REPEAT;
typedef struct {
size_t data_stack_base;
/* current repeat context */
SRE_REPEAT *repeat;
+ /* SRE_REPEAT pool */
+ SRE_REPEAT *repeat_pool_used;
+ SRE_REPEAT *repeat_pool_unused;
unsigned int sigcount;
+#ifdef Py_DEBUG
+ int fail_after_count;
+ PyObject *fail_after_exc;
+#endif
} SRE_STATE;
typedef struct {
PyObject_HEAD
- PyObject* pattern;
+ PatternObject* pattern;
SRE_STATE state;
int executing;
} ScannerObject;
Py_ssize_t last_ctx_pos;
} SRE(match_context);
-#define MAYBE_CHECK_SIGNALS \
+#define _MAYBE_CHECK_SIGNALS \
do { \
if ((0 == (++sigcount & 0xfff)) && PyErr_CheckSignals()) { \
RETURN_ERROR(SRE_ERROR_INTERRUPTED); \
} \
} while (0)
+#ifdef Py_DEBUG
+# define MAYBE_CHECK_SIGNALS \
+ do { \
+ _MAYBE_CHECK_SIGNALS; \
+ if (state->fail_after_count >= 0) { \
+ if (state->fail_after_count-- == 0) { \
+ PyErr_SetNone(state->fail_after_exc); \
+ RETURN_ERROR(SRE_ERROR_INTERRUPTED); \
+ } \
+ } \
+ } while (0)
+#else
+# define MAYBE_CHECK_SIGNALS _MAYBE_CHECK_SIGNALS
+#endif /* Py_DEBUG */
+
#ifdef HAVE_COMPUTED_GOTOS
#ifndef USE_COMPUTED_GOTOS
#define USE_COMPUTED_GOTOS 1
pattern[1], pattern[2]));
/* install new repeat context */
- /* TODO(https://github.com/python/cpython/issues/67877): Fix this
- * potential memory leak. */
- ctx->u.rep = (SRE_REPEAT*) PyObject_Malloc(sizeof(*ctx->u.rep));
+ ctx->u.rep = repeat_pool_malloc(state);
if (!ctx->u.rep) {
- PyErr_NoMemory();
- RETURN_FAILURE;
+ RETURN_ERROR(SRE_ERROR_MEMORY);
}
ctx->u.rep->count = -1;
ctx->u.rep->pattern = pattern;
state->ptr = ptr;
DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
state->repeat = ctx->u.rep->prev;
- PyObject_Free(ctx->u.rep);
+ repeat_pool_free(state, ctx->u.rep);
if (ret) {
RETURN_ON_ERROR(ret);