Split lines in lexer only by \r\n, \r and \n

author Matěj Volf <mat.volfik@gmail.com>

Tue, 9 Mar 2021 13:19:58 +0000 (14:19 +0100)

committer David Lord <davidism@gmail.com>

Mon, 5 Apr 2021 18:46:36 +0000 (11:46 -0700)
author Matěj Volf <mat.volfik@gmail.com>
Tue, 9 Mar 2021 13:19:58 +0000 (14:19 +0100)
committer David Lord <davidism@gmail.com>
Mon, 5 Apr 2021 18:46:36 +0000 (11:46 -0700)
diff --git a/CHANGES.rst b/CHANGES.rst

index dad0d0efcb47953629670c7764117e5172463948..dd3739fb5dedf767a054e980043dc157a92eb808 100644 (file)
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -56,6 +56,8 @@ Unreleased
      instead of a ``TypeError``. :issue:`1198`
  -   ``Undefined`` is iterable in an async environment. :issue:`1294`
  -   ``NativeEnvironment`` supports async mode. :issue:`1362`
+-   Template rendering only treats ``\n``, ``\r\n`` and ``\r`` as line
+    breaks. Other characters are left unchanged. :issue:`769, 952, 1313`
  
  
  Version 2.11.3
diff --git a/src/jinja2/lexer.py b/src/jinja2/lexer.py

index d992f0d166782002bbe9c4b442bafb2fbbeb0563..0cade7a3503d10c563f90e084e2b701c4fb1b8bc 100644 (file)
--- a/src/jinja2/lexer.py
+++ b/src/jinja2/lexer.py
@@ -638,12 +638,17 @@ class Lexer:
  
      def tokeniter(self, source, name, filename=None, state=None):
          """This method tokenizes the text and returns the tokens in a
-        generator.  Use this method if you just want to tokenize a template.
+        generator. Use this method if you just want to tokenize a template.
+
+        .. versionchanged:: 3.0
+            Only ``\\n``, ``\\r\\n`` and ``\\r`` are treated as line
+            breaks.
          """
-        lines = source.splitlines()
-        if self.keep_trailing_newline and source:
-            if source.endswith(("\r\n", "\r", "\n")):
-                lines.append("")
+        lines = newline_re.split(source)[::2]
+
+        if not self.keep_trailing_newline and lines[-1] == "":
+            del lines[-1]
+
          source = "\n".join(lines)
          pos = 0
          lineno = 1
diff --git a/tests/test_regression.py b/tests/test_regression.py

index a49356b3e0ec1c1c45ec6e2f8443e54872c5035f..29caee52e7dfaa43e89b34a2e6d16946c10149d4 100644 (file)
--- a/tests/test_regression.py
+++ b/tests/test_regression.py
@@ -745,3 +745,10 @@ End"""
  
          tmpl = env.get_template("base")
          assert tmpl.render() == "42 y"
+
+
+@pytest.mark.parametrize("unicode_char", ["\N{FORM FEED}", "\x85"])
+def test_unicode_whitespace(env, unicode_char):
+    content = "Lorem ipsum\n" + unicode_char + "\nMore text"
+    tmpl = env.from_string(content)
+    assert tmpl.render() == content
author	Matěj Volf <mat.volfik@gmail.com>
	Tue, 9 Mar 2021 13:19:58 +0000 (14:19 +0100)
committer	David Lord <davidism@gmail.com>
	Mon, 5 Apr 2021 18:46:36 +0000 (11:46 -0700)
CHANGES.rst		patch \| blob \| blame \| history
src/jinja2/lexer.py		patch \| blob \| blame \| history
tests/test_regression.py		patch \| blob \| blame \| history