bpo-40847: Consider a line with only a LINECONT a blank line (GH-20769)

author Lysandros Nikolaou <lisandrosnik@gmail.com>

Wed, 10 Jun 2020 23:56:08 +0000 (02:56 +0300)

committer GitHub <noreply@github.com>

Wed, 10 Jun 2020 23:56:08 +0000 (00:56 +0100)
author Lysandros Nikolaou <lisandrosnik@gmail.com>
Wed, 10 Jun 2020 23:56:08 +0000 (02:56 +0300)
committer GitHub <noreply@github.com>
Wed, 10 Jun 2020 23:56:08 +0000 (00:56 +0100)
diff --git a/Lib/test/test_peg_parser.py b/Lib/test/test_peg_parser.py

index 6ccb2573176bb5e2d5051db58c20d21cdb76a329..fae85e323da04455e7978ec31d40b981db537d97 100644 (file)
--- a/Lib/test/test_peg_parser.py
+++ b/Lib/test/test_peg_parser.py
@@ -153,6 +153,13 @@ TEST_CASES = [
      ('dict_comp', '{x:1 for x in a}'),
      ('dict_comp_if', '{x:1+2 for x in a if b}'),
      ('dict_empty', '{}'),
+    ('empty_line_after_linecont',
+     r'''
+        pass
+        \
+
+        pass
+     '''),
      ('for',
       '''
          for i in a:
diff --git a/Lib/test/test_syntax.py b/Lib/test/test_syntax.py

index f41426a4e9d2dade6e6ca12a05007957c86b743a..0c207ec8fc07cc7969a079290c9c83b7e9bb731f 100644 (file)
--- a/Lib/test/test_syntax.py
+++ b/Lib/test/test_syntax.py
@@ -858,6 +858,20 @@ class SyntaxTestCase(unittest.TestCase):
                            "iterable argument unpacking follows "
                            "keyword argument unpacking")
  
+    def test_empty_line_after_linecont(self):
+        # See issue-40847
+        s = r"""\
+pass
+        \
+
+pass
+"""
+        try:
+            compile(s, '<string>', 'exec')
+        except SyntaxError:
+            self.fail("Empty line after a line continuation character is valid.")
+
+
  def test_main():
      support.run_unittest(SyntaxTestCase)
      from test import test_syntax
diff --git a/Misc/NEWS.d/next/Core and Builtins/2020-06-09-23-52-32.bpo-40847.4XAACw.rst b/Misc/NEWS.d/next/Core and Builtins/2020-06-09-23-52-32.bpo-40847.4XAACw.rst

new file mode 100644 (file)

index 0000000..0b489f2
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2020-06-09-23-52-32.bpo-40847.4XAACw.rst
@@ -0,0 +1,4 @@
+Fix a bug where a line with only a line continuation character is not considered a blank line at tokenizer level.\r
+In such cases, more than a single `NEWLINE` token was emitted. The old parser was working around the issue,\r
+but the new parser threw a :exc:`SyntaxError` for valid input due to this. For example, an empty line following\r
+a line continuation character was interpreted as a :exc:`SyntaxError`. \r
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c

index cebfadc8e89f3040cdb685a9761dbfd9428623fb..d461e4e24e721cd3e3161d84a21bd00096d63515 100644 (file)
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -1203,8 +1203,9 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
              }
          }
          tok_backup(tok, c);
-        if (c == '#' || c == '\n') {
+        if (c == '#' || c == '\n' || c == '\\') {
              /* Lines with only whitespace and/or comments
+               and/or a line continuation character
                 shouldn't affect the indentation and are
                 not passed to the parser as NEWLINE tokens,
                 except *totally* empty lines in interactive
author	Lysandros Nikolaou <lisandrosnik@gmail.com>
	Wed, 10 Jun 2020 23:56:08 +0000 (02:56 +0300)
committer	GitHub <noreply@github.com>
	Wed, 10 Jun 2020 23:56:08 +0000 (00:56 +0100)
Lib/test/test_peg_parser.py		patch \| blob \| blame \| history
Lib/test/test_syntax.py		patch \| blob \| blame \| history
Misc/NEWS.d/next/Core and Builtins/2020-06-09-23-52-32.bpo-40847.4XAACw.rst	[new file with mode: 0644]	patch \| blob
Parser/tokenizer.c		patch \| blob \| blame \| history