From 9dae67bcc8dbc389e0ceef12f1fa04352ef75b8e Mon Sep 17 00:00:00 2001
From: =?utf8?q?Mat=C4=9Bj=20Volf?= <mat.volfik@gmail.com>
Date: Tue, 9 Mar 2021 14:19:58 +0100
Subject: [PATCH] Split lines in lexer only by \r\n, \r and \n

Python str.splitlines() splits by more characters[1], which, however,
causes problems when keeping these special characters in processed
templates is desirable, i.e. these bug reports: #769, #952, #1313.

The keep_trailing_newlines logic is reworked because splitlines()
removes them already (so they had to be added), while re.split doesn't
so they have to be removed.

[1] https://docs.python.org/3/library/stdtypes.html#str.splitlines
---
 CHANGES.rst              |  2 ++
 src/jinja2/lexer.py      | 15 ++++++++++-----
 tests/test_regression.py |  7 +++++++
 3 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/CHANGES.rst b/CHANGES.rst
index dad0d0ef..dd3739fb 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -56,6 +56,8 @@ Unreleased
     instead of a ``TypeError``. :issue:`1198`
 -   ``Undefined`` is iterable in an async environment. :issue:`1294`
 -   ``NativeEnvironment`` supports async mode. :issue:`1362`
+-   Template rendering only treats ``\n``, ``\r\n`` and ``\r`` as line
+    breaks. Other characters are left unchanged. :issue:`769, 952, 1313`
 
 
 Version 2.11.3
diff --git a/src/jinja2/lexer.py b/src/jinja2/lexer.py
index d992f0d1..0cade7a3 100644
--- a/src/jinja2/lexer.py
+++ b/src/jinja2/lexer.py
@@ -638,12 +638,17 @@ class Lexer:
 
     def tokeniter(self, source, name, filename=None, state=None):
         """This method tokenizes the text and returns the tokens in a
-        generator.  Use this method if you just want to tokenize a template.
+        generator. Use this method if you just want to tokenize a template.
+
+        .. versionchanged:: 3.0
+            Only ``\\n``, ``\\r\\n`` and ``\\r`` are treated as line
+            breaks.
         """
-        lines = source.splitlines()
-        if self.keep_trailing_newline and source:
-            if source.endswith(("\r\n", "\r", "\n")):
-                lines.append("")
+        lines = newline_re.split(source)[::2]
+
+        if not self.keep_trailing_newline and lines[-1] == "":
+            del lines[-1]
+
         source = "\n".join(lines)
         pos = 0
         lineno = 1
diff --git a/tests/test_regression.py b/tests/test_regression.py
index a49356b3..29caee52 100644
--- a/tests/test_regression.py
+++ b/tests/test_regression.py
@@ -745,3 +745,10 @@ End"""
 
         tmpl = env.get_template("base")
         assert tmpl.render() == "42 y"
+
+
+@pytest.mark.parametrize("unicode_char", ["\N{FORM FEED}", "\x85"])
+def test_unicode_whitespace(env, unicode_char):
+    content = "Lorem ipsum\n" + unicode_char + "\nMore text"
+    tmpl = env.from_string(content)
+    assert tmpl.render() == content
-- 
2.47.2