From: Serhiy Storchaka Date: Thu, 6 Mar 2014 09:36:15 +0000 (+0200) Subject: Issue #20283: RE pattern methods now accept the string keyword parameters X-Git-Tag: v3.4.1rc1~233^2~140 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=a537eb45fd964dea378e78efa6c576dc00666f2a;p=thirdparty%2FPython%2Fcpython.git Issue #20283: RE pattern methods now accept the string keyword parameters as documented. The pattern and source keyword parameters are left as deprecated aliases. --- a537eb45fd964dea378e78efa6c576dc00666f2a diff --cc Lib/test/test_re.py index a229e235ca20,5466b2065395..33ccd1539830 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@@ -1205,84 -1076,23 +1205,102 @@@ class ReTests(unittest.TestCase) self.assertEqual(out.getvalue().splitlines(), ['literal 102 ', 'literal 111 ', 'literal 111 ']) + def test_keyword_parameters(self): + # Issue #20283: Accepting the string keyword parameter. + pat = re.compile(r'(ab)') + self.assertEqual( + pat.match(string='abracadabra', pos=7, endpos=10).span(), (7, 9)) ++ self.assertEqual( ++ pat.fullmatch(string='abracadabra', pos=7, endpos=9).span(), (7, 9)) + self.assertEqual( + pat.search(string='abracadabra', pos=3, endpos=10).span(), (7, 9)) + self.assertEqual( + pat.findall(string='abracadabra', pos=3, endpos=10), ['ab']) + self.assertEqual( + pat.split(string='abracadabra', maxsplit=1), + ['', 'ab', 'racadabra']) + self.assertEqual( + pat.scanner(string='abracadabra', pos=3, endpos=10).search().span(), + (7, 9)) + +class PatternReprTests(unittest.TestCase): + def check(self, pattern, expected): + self.assertEqual(repr(re.compile(pattern)), expected) + + def check_flags(self, pattern, flags, expected): + self.assertEqual(repr(re.compile(pattern, flags)), expected) + + def test_without_flags(self): + self.check('random pattern', + "re.compile('random pattern')") + + def test_single_flag(self): + self.check_flags('random pattern', re.IGNORECASE, + "re.compile('random pattern', re.IGNORECASE)") + + def test_multiple_flags(self): + self.check_flags('random pattern', re.I|re.S|re.X, + "re.compile('random pattern', " + "re.IGNORECASE|re.DOTALL|re.VERBOSE)") + + def test_unicode_flag(self): + self.check_flags('random pattern', re.U, + "re.compile('random pattern')") + self.check_flags('random pattern', re.I|re.S|re.U, + "re.compile('random pattern', " + "re.IGNORECASE|re.DOTALL)") + + def test_inline_flags(self): + self.check('(?i)pattern', + "re.compile('(?i)pattern', re.IGNORECASE)") + + def test_unknown_flags(self): + self.check_flags('random pattern', 0x123000, + "re.compile('random pattern', 0x123000)") + self.check_flags('random pattern', 0x123000|re.I, + "re.compile('random pattern', re.IGNORECASE|0x123000)") + + def test_bytes(self): + self.check(b'bytes pattern', + "re.compile(b'bytes pattern')") + self.check_flags(b'bytes pattern', re.A, + "re.compile(b'bytes pattern', re.ASCII)") + + def test_quotes(self): + self.check('random "double quoted" pattern', + '''re.compile('random "double quoted" pattern')''') + self.check("random 'single quoted' pattern", + '''re.compile("random 'single quoted' pattern")''') + self.check('''both 'single' and "double" quotes''', + '''re.compile('both \\'single\\' and "double" quotes')''') + + def test_long_pattern(self): + pattern = 'Very %spattern' % ('long ' * 1000) + r = repr(re.compile(pattern)) + self.assertLess(len(r), 300) + self.assertEqual(r[:30], "re.compile('Very long long lon") + r = repr(re.compile(pattern, re.I)) + self.assertLess(len(r), 300) + self.assertEqual(r[:30], "re.compile('Very long long lon") + self.assertEqual(r[-16:], ", re.IGNORECASE)") + + +class ImplementationTest(unittest.TestCase): + """ + Test implementation details of the re module. + """ + + def test_overlap_table(self): + f = sre_compile._generate_overlap_table + self.assertEqual(f(""), []) + self.assertEqual(f("a"), [0]) + self.assertEqual(f("abcd"), [0, 0, 0, 0]) + self.assertEqual(f("aaaa"), [0, 1, 2, 3]) + self.assertEqual(f("ababba"), [0, 0, 1, 2, 0, 1]) + self.assertEqual(f("abcabdac"), [0, 0, 0, 1, 2, 0, 1, 0]) + + def run_re_tests(): from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR if verbose: diff --cc Misc/NEWS index 1da5d6430f20,62211f47113c..1b6516f68331 --- a/Misc/NEWS +++ b/Misc/NEWS @@@ -20,10 -22,10 +20,14 @@@ Core and Builtin Library ------- + - Issue #20283: RE pattern methods now accept the string keyword parameters + as documented. The pattern and source keyword parameters are left as + deprecated aliases. + +- Issue #20839: Don't trigger a DeprecationWarning in the still supported + pkgutil.get_loader() API when __loader__ isn't set on a module (nor + when pkgutil.find_loader() is called directly). + - Issue #20778: Fix modulefinder to work with bytecode-only modules. - Issue #20791: copy.copy() now doesn't make a copy when the input is diff --cc Modules/_sre.c index d4d1d9d0eb3c,b1258eefc497..eb1106ad8055 --- a/Modules/_sre.c +++ b/Modules/_sre.c @@@ -504,111 -1876,46 +504,105 @@@ pattern_dealloc(PatternObject* self PyObject_DEL(self); } -static PyObject* +LOCAL(Py_ssize_t) +sre_match(SRE_STATE* state, SRE_CODE* pattern) +{ + if (state->charsize == 1) + return sre_ucs1_match(state, pattern); + if (state->charsize == 2) + return sre_ucs2_match(state, pattern); + assert(state->charsize == 4); + return sre_ucs4_match(state, pattern); +} + +LOCAL(Py_ssize_t) +sre_search(SRE_STATE* state, SRE_CODE* pattern) +{ + if (state->charsize == 1) + return sre_ucs1_search(state, pattern); + if (state->charsize == 2) + return sre_ucs2_search(state, pattern); + assert(state->charsize == 4); + return sre_ucs4_search(state, pattern); +} + - /*[clinic input] - module _sre - class _sre.SRE_Pattern "PatternObject *" "&Pattern_Type" - - _sre.SRE_Pattern.match as pattern_match - - pattern: object - pos: Py_ssize_t = 0 - endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize - - Matches zero or more characters at the beginning of the string. - [clinic start generated code]*/ - - PyDoc_STRVAR(pattern_match__doc__, - "match($self, /, pattern, pos=0, endpos=sys.maxsize)\n" - "--\n" - "\n" - "Matches zero or more characters at the beginning of the string."); - - #define PATTERN_MATCH_METHODDEF \ - {"match", (PyCFunction)pattern_match, METH_VARARGS|METH_KEYWORDS, pattern_match__doc__}, - +static PyObject * - pattern_match_impl(PatternObject *self, PyObject *pattern, Py_ssize_t pos, Py_ssize_t endpos); + fix_string_param(PyObject *string, PyObject *string2, const char *oldname) + { + if (string2 != NULL) { + if (string != NULL) { + PyErr_Format(PyExc_TypeError, + "Argument given by name ('%s') and position (1)", + oldname); + return NULL; + } + if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, + "The '%s' keyword parameter name is deprecated. " + "Use 'string' instead.", oldname) < 0) + return NULL; + return string2; + } + if (string == NULL) { + PyErr_SetString(PyExc_TypeError, + "Required argument 'string' (pos 1) not found"); + return NULL; + } + return string; + } +static PyObject * +pattern_match(PatternObject *self, PyObject *args, PyObject *kwargs) +{ - PyObject *return_value = NULL; - static char *_keywords[] = {"pattern", "pos", "endpos", NULL}; - PyObject *pattern; ++ static char *_keywords[] = {"string", "pos", "endpos", "pattern", NULL}; ++ PyObject *string = NULL; + Py_ssize_t pos = 0; + Py_ssize_t endpos = PY_SSIZE_T_MAX; - - if (!PyArg_ParseTupleAndKeywords(args, kwargs, - "O|nn:match", _keywords, - &pattern, &pos, &endpos)) - goto exit; - return_value = pattern_match_impl(self, pattern, pos, endpos); - - exit: - return return_value; - } - - static PyObject * - pattern_match_impl(PatternObject *self, PyObject *pattern, Py_ssize_t pos, Py_ssize_t endpos) - /*[clinic end generated code: output=1528eafdb8b025ad input=26f9fd31befe46b9]*/ - { ++ PyObject *pattern = NULL; + SRE_STATE state; + Py_ssize_t status; - PyObject *string; + - string = state_init(&state, (PatternObject *)self, pattern, pos, endpos); ++ if (!PyArg_ParseTupleAndKeywords(args, kwargs, ++ "|Onn$O:match", _keywords, ++ &string, &pos, &endpos, &pattern)) ++ return NULL; ++ string = fix_string_param(string, pattern, "pattern"); ++ if (!string) ++ return NULL; ++ string = state_init(&state, (PatternObject *)self, string, pos, endpos); + if (!string) + return NULL; + + state.ptr = state.start; + + TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr)); + + status = sre_match(&state, PatternObject_GetCode(self)); + + TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr)); + if (PyErr_Occurred()) + return NULL; + + state_fini(&state); + + return (PyObject *)pattern_new_match(self, &state, status); +} + static PyObject* -pattern_match(PatternObject* self, PyObject* args, PyObject* kw) +pattern_fullmatch(PatternObject* self, PyObject* args, PyObject* kw) { SRE_STATE state; - int status; + Py_ssize_t status; - PyObject* string; + PyObject *string = NULL, *string2 = NULL; Py_ssize_t start = 0; Py_ssize_t end = PY_SSIZE_T_MAX; - static char* kwlist[] = { "pattern", "pos", "endpos", NULL }; - if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:fullmatch", kwlist, - &string, &start, &end)) + static char* kwlist[] = { "string", "pos", "endpos", "pattern", NULL }; - if (!PyArg_ParseTupleAndKeywords(args, kw, "|Onn$O:match", kwlist, ++ if (!PyArg_ParseTupleAndKeywords(args, kw, "|Onn$O:fullmatch", kwlist, + &string, &start, &end, &string2)) + return NULL; + + string = fix_string_param(string, string2, "pattern"); + if (!string) return NULL; string = state_init(&state, self, string, start, end); @@@ -635,14 -1945,18 +629,18 @@@ static PyObject pattern_search(PatternObject* self, PyObject* args, PyObject* kw) { SRE_STATE state; - int status; + Py_ssize_t status; - PyObject* string; + PyObject *string = NULL, *string2 = NULL; Py_ssize_t start = 0; Py_ssize_t end = PY_SSIZE_T_MAX; - static char* kwlist[] = { "pattern", "pos", "endpos", NULL }; - if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:search", kwlist, - &string, &start, &end)) + static char* kwlist[] = { "string", "pos", "endpos", "pattern", NULL }; + if (!PyArg_ParseTupleAndKeywords(args, kw, "|Onn$O:search", kwlist, + &string, &start, &end, &string2)) + return NULL; + + string = fix_string_param(string, string2, "pattern"); + if (!string) return NULL; string = state_init(&state, self, string, start, end); @@@ -715,15 -2081,19 +713,19 @@@ pattern_findall(PatternObject* self, Py { SRE_STATE state; PyObject* list; - int status; + Py_ssize_t status; Py_ssize_t i, b, e; - PyObject* string; + PyObject *string = NULL, *string2 = NULL; Py_ssize_t start = 0; Py_ssize_t end = PY_SSIZE_T_MAX; - static char* kwlist[] = { "source", "pos", "endpos", NULL }; - if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:findall", kwlist, - &string, &start, &end)) + static char* kwlist[] = { "string", "pos", "endpos", "source", NULL }; + if (!PyArg_ParseTupleAndKeywords(args, kw, "|Onn$O:findall", kwlist, + &string, &start, &end, &string2)) + return NULL; + + string = fix_string_param(string, string2, "source"); + if (!string) return NULL; string = state_init(&state, self, string, start, end); @@@ -1212,90 -2581,10 +1218,94 @@@ pattern_deepcopy(PatternObject* self, P #endif } +static PyObject * +pattern_repr(PatternObject *obj) +{ + static const struct { + const char *name; + int value; + } flag_names[] = { + {"re.TEMPLATE", SRE_FLAG_TEMPLATE}, + {"re.IGNORECASE", SRE_FLAG_IGNORECASE}, + {"re.LOCALE", SRE_FLAG_LOCALE}, + {"re.MULTILINE", SRE_FLAG_MULTILINE}, + {"re.DOTALL", SRE_FLAG_DOTALL}, + {"re.UNICODE", SRE_FLAG_UNICODE}, + {"re.VERBOSE", SRE_FLAG_VERBOSE}, + {"re.DEBUG", SRE_FLAG_DEBUG}, + {"re.ASCII", SRE_FLAG_ASCII}, + }; + PyObject *result = NULL; + PyObject *flag_items; + int i; + int flags = obj->flags; + + /* Omit re.UNICODE for valid string patterns. */ + if (obj->isbytes == 0 && + (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) == + SRE_FLAG_UNICODE) + flags &= ~SRE_FLAG_UNICODE; + + flag_items = PyList_New(0); + if (!flag_items) + return NULL; + + for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) { + if (flags & flag_names[i].value) { + PyObject *item = PyUnicode_FromString(flag_names[i].name); + if (!item) + goto done; + + if (PyList_Append(flag_items, item) < 0) { + Py_DECREF(item); + goto done; + } + Py_DECREF(item); + flags &= ~flag_names[i].value; + } + } + if (flags) { + PyObject *item = PyUnicode_FromFormat("0x%x", flags); + if (!item) + goto done; + + if (PyList_Append(flag_items, item) < 0) { + Py_DECREF(item); + goto done; + } + Py_DECREF(item); + } + + if (PyList_Size(flag_items) > 0) { + PyObject *flags_result; + PyObject *sep = PyUnicode_FromString("|"); + if (!sep) + goto done; + flags_result = PyUnicode_Join(sep, flag_items); + Py_DECREF(sep); + if (!flags_result) + goto done; + result = PyUnicode_FromFormat("re.compile(%.200R, %S)", + obj->pattern, flags_result); + Py_DECREF(flags_result); + } + else { + result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern); + } + +done: + Py_DECREF(flag_items); + return result; +} + + PyDoc_STRVAR(pattern_match_doc, + "match(string[, pos[, endpos]]) -> match object or None.\n\ + Matches zero or more characters at the beginning of the string"); + +PyDoc_STRVAR(pattern_fullmatch_doc, +"fullmatch(string[, pos[, endpos]]) -> match object or None.\n\ + Matches against all of the string"); + PyDoc_STRVAR(pattern_search_doc, "search(string[, pos[, endpos]]) -> match object or None.\n\ Scan through string looking for a match, and return a corresponding\n\ @@@ -1329,9 -2618,8 +1339,10 @@@ PyDoc_STRVAR(pattern_subn_doc PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects"); static PyMethodDef pattern_methods[] = { - PATTERN_MATCH_METHODDEF + {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS, + pattern_match_doc}, + {"fullmatch", (PyCFunction) pattern_fullmatch, METH_VARARGS|METH_KEYWORDS, + pattern_fullmatch_doc}, {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS, pattern_search_doc}, {"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS,