From 37249c08101060bf5a4a230e412575aa741989db Mon Sep 17 00:00:00 2001
From: David Lord <davidism@gmail.com>
Date: Sun, 6 Oct 2019 23:37:16 -0700
Subject: [PATCH] add comments about lstrip code

---
 jinja2/lexer.py | 57 ++++++++++++++++++++++++++++++++-----------------
 1 file changed, 37 insertions(+), 20 deletions(-)

diff --git a/jinja2/lexer.py b/jinja2/lexer.py
index e625c2e9..0aea1c35 100644
--- a/jinja2/lexer.py
+++ b/jinja2/lexer.py
@@ -23,8 +23,6 @@ from jinja2._compat import implements_iterator, intern, iteritems, text_type
 from jinja2.exceptions import TemplateSyntaxError
 from jinja2.utils import LRUCache
 
-from ast import literal_eval # to support scientific notation
-
 # cache for the lexers. Exists in order to be able to have multiple
 # environments with the same lexer
 _lexer_cache = LRUCache(50)
@@ -424,6 +422,12 @@ def get_lexer(environment):
 
 
 class OptionalLStrip(tuple):
+    """A special tuple for marking a point in the state that can have
+    lstrip applied.
+    """
+
+    # Even though it looks like a no-op, creating instances fails
+    # without this.
     def __new__(cls, *members, **kwargs):
         return super(OptionalLStrip, cls).__new__(cls, members)
 
@@ -462,9 +466,9 @@ class Lexer(object):
         # block suffix if trimming is enabled
         block_suffix_re = environment.trim_blocks and '\\n?' or ''
 
-        # strip leading spaces in blocks/comments if lstrip_blocks is enabled
-        # use for example '{%+' to disable lstrip_blocks behavior
-        self.lstrip_blocks_unless_re = c(r'[^ \t]') if environment.lstrip_blocks else None
+        # If lstrip is enabled, it should not be applied if there is any
+        # non-whitespace between the newline and block.
+        self.lstrip_unless_re = c(r"[^ \t]") if environment.lstrip_blocks else None
 
         self.newline_sequence = environment.newline_sequence
         self.keep_trailing_newline = environment.keep_trailing_newline
@@ -586,7 +590,6 @@ class Lexer(object):
         """This method tokenizes the text and returns the tokens in a
         generator.  Use this method if you just want to tokenize a template.
         """
-        neg_lstrip_re = self.lstrip_blocks_unless_re
         source = text_type(source)
         lines = source.splitlines()
         if self.keep_trailing_newline and source:
@@ -601,12 +604,10 @@ class Lexer(object):
         if state is not None and state != 'root':
             assert state in ('variable', 'block'), 'invalid state'
             stack.append(state + '_begin')
-        else:
-            state = 'root'
         statetokens = self.rules[stack[-1]]
         source_length = len(source)
-
         balancing_stack = []
+        lstrip_unless_re = self.lstrip_unless_re
 
         while 1:
             # tokenizer loop
@@ -630,17 +631,33 @@ class Lexer(object):
                     groups = m.groups()
 
                     if isinstance(tokens, OptionalLStrip):
-                        # state supports lstrip, determine override (-/+)
-                        strip_sign = next(group for group in groups[2::2]
-                                          if group is not None)
-                        if strip_sign == '-':
-                            groups = (groups[0].rstrip(),) + groups[1:]
-                        elif strip_sign != '+' and neg_lstrip_re is not None \
-                                and not m.groupdict().get('variable_begin'):
-                            # no override, but lstrip_blocks enabled
-                            l_pos = groups[0].rfind('\n') + 1
-                            if not neg_lstrip_re.search(groups[0], l_pos):
-                                groups = (groups[0][:l_pos],) + groups[1:]
+                        # Rule supports lstrip. Match will look like
+                        # text, block type, whitespace control, type, control, ...
+                        text = groups[0]
+
+                        # Skipping the text and first type, every other group is the
+                        # whitespace control for each type. One of the groups will be
+                        # -, +, or empty string instead of None.
+                        strip_sign = next(g for g in groups[2::2] if g is not None)
+
+                        if strip_sign == "-":
+                            # Strip all whitespace between the text and the tag.
+                            groups = (text.rstrip(),) + groups[1:]
+                        elif (
+                            # Not marked for preserving whitespace.
+                            strip_sign != "+"
+                            # lstrip is enabled.
+                            and lstrip_unless_re is not None
+                            # Not a variable expression.
+                            and not m.groupdict().get("variable_begin")
+                        ):
+                            # The start of text between the last newline and the tag.
+                            l_pos = text.rfind('\n') + 1
+
+                            # If there's only whitespace between the newline and the
+                            # tag, strip it.
+                            if not lstrip_unless_re.search(text, l_pos):
+                                groups = (text[:l_pos],) + groups[1:]
 
                     for idx, token in enumerate(tokens):
                         # failure group
-- 
2.47.2