From: Pablo Galindo Date: Wed, 3 Apr 2019 18:34:59 +0000 (-0400) Subject: [3.7] bpo-36440: include node names in ParserError messages, instead of numeric IDs... X-Git-Tag: v3.7.4rc1~264 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=513d142993bb8c13e6803727fa086e44eafc360f;p=thirdparty%2FPython%2Fcpython.git [3.7] bpo-36440: include node names in ParserError messages, instead of numeric IDs (GH-12565) (GH-12671) The error messages in the parser module are referring to numeric IDs for the nodes. To improve readability, use the node names when reporting errors.. (cherry picked from commit cb0748d3939c31168ab5d3b80e3677494497d5e3) Co-authored-by: tyomitch --- diff --git a/Lib/test/test_parser.py b/Lib/test/test_parser.py index 94e454663573..e49afd2ba1d8 100644 --- a/Lib/test/test_parser.py +++ b/Lib/test/test_parser.py @@ -713,6 +713,22 @@ class IllegalSyntaxTestCase(unittest.TestCase): with self.assertRaises(UnicodeEncodeError): parser.sequence2st(tree) + def test_invalid_node_id(self): + tree = (257, (269, (-7, ''))) + self.check_bad_tree(tree, "negative node id") + tree = (257, (269, (99, ''))) + self.check_bad_tree(tree, "invalid token id") + tree = (257, (269, (9999, (0, '')))) + self.check_bad_tree(tree, "invalid symbol id") + + def test_ParserError_message(self): + try: + parser.sequence2st((257,(269,(257,(0,''))))) + except parser.ParserError as why: + self.assertIn("simple_stmt", str(why)) # Expected + self.assertIn("file_input", str(why)) # Got + + class CompileTestCase(unittest.TestCase): diff --git a/Misc/NEWS.d/next/Core and Builtins/2019-03-25-13-45-19.bpo-36440.gkvzhi.rst b/Misc/NEWS.d/next/Core and Builtins/2019-03-25-13-45-19.bpo-36440.gkvzhi.rst new file mode 100644 index 000000000000..372b1f771009 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2019-03-25-13-45-19.bpo-36440.gkvzhi.rst @@ -0,0 +1,2 @@ +Include node names in ``ParserError`` messages, instead of numeric IDs. +Patch by A. Skrobov. diff --git a/Modules/parsermodule.c b/Modules/parsermodule.c index 67c874267f24..799a813468f1 100644 --- a/Modules/parsermodule.c +++ b/Modules/parsermodule.c @@ -24,10 +24,6 @@ * Py_[X]DECREF() and Py_[X]INCREF() macros. The lint annotations * look like "NOTE(...)". * - * To debug parser errors like - * "parser.ParserError: Expected node type 12, got 333." - * decode symbol numbers using the automatically-generated files - * Lib/symbol.h and Include/token.h. */ #include "Python.h" /* general Python API */ @@ -663,6 +659,13 @@ validate_node(node *tree) for (pos = 0; pos < nch; ++pos) { node *ch = CHILD(tree, pos); int ch_type = TYPE(ch); + if ((ch_type >= NT_OFFSET + _PyParser_Grammar.g_ndfas) + || (ISTERMINAL(ch_type) && (ch_type >= N_TOKENS)) + || (ch_type < 0) + ) { + PyErr_Format(parser_error, "Unrecognized node type %d.", ch_type); + return 0; + } for (arc = 0; arc < dfa_state->s_narcs; ++arc) { short a_label = dfa_state->s_arc[arc].a_lbl; assert(a_label < _PyParser_Grammar.g_ll.ll_nlabels); @@ -691,8 +694,10 @@ validate_node(node *tree) const char *expected_str = _PyParser_Grammar.g_ll.ll_label[a_label].lb_str; if (ISNONTERMINAL(next_type)) { - PyErr_Format(parser_error, "Expected node type %d, got %d.", - next_type, ch_type); + PyErr_Format(parser_error, "Expected %s, got %s.", + _PyParser_Grammar.g_dfa[next_type - NT_OFFSET].d_name, + ISTERMINAL(ch_type) ? _PyParser_TokenNames[ch_type] : + _PyParser_Grammar.g_dfa[ch_type - NT_OFFSET].d_name); } else if (expected_str != NULL) { PyErr_Format(parser_error, "Illegal terminal: expected '%s'.",