From: Kevin Brown <kevin@kevin-brown.com>
Date: Fri, 15 May 2020 16:27:10 +0000 (-0400)
Subject: Allow symbols to be overwritten by the environment
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=a0364dd0190fd50551e1ee27f4f2cc2c32c7c5b3;p=thirdparty%2Fjinja.git

Allow symbols to be overwritten by the environment

This introduces a change to both the grammar and the parsing
environment that allows people to override start/end symbols in the
grammar through the environment. This finally brings the parser on
the same level as the old parser and lexer when it comes to handling
those customizations.

This means that the grammar must be compiled dynamically to account
for these customizations per environment. A module-level LRU cache
has been implemented to handle this fact, so grammars can be cached
instead of compiled every time. This should handle most cases other
than the unit tests, since most people aren't frequently changing up
their environment within their applications.

This also adds proper handling to the closing line block statement
so it waits for the end of a line or the end of the expression.
---

diff --git a/grammar.ebnf b/grammar.ebnf
index 11d26e6e..0f8a6ada 100644
--- a/grammar.ebnf
+++ b/grammar.ebnf
@@ -67,14 +67,24 @@ block_end
 
 block_open
     =
-    | ( {SP}* @:"{%-" {SP}* )
-    | @:"{%" {SP}*
+    | ( {SP}* block_open_symbol "-" {SP}* )
+    | block_open_symbol {SP}*
+    ;
+
+block_open_symbol
+    =
+    "{%"
     ;
 
 block_close
     =
-    | ( @:"-%}" {SP}* )
-    | @:"%}"
+    | ( "-" block_close_symbol {SP}* )
+    | block_close_symbol
+    ;
+
+block_close_symbol
+    =
+    "%}"
     ;
 
 line_block_expression
@@ -90,12 +100,17 @@ line_block_start
 
 line_block_end
     =
-    line_block_open "end" name:IDENTIFIER "\n"
+    line_block_open "end" name:IDENTIFIER ("\n" | $)
     ;
 
 line_block_open
     =
-    "\n" {SP}* @:"# " {SP}*
+    "\n" {SP}* line_block_open_symbol {SP}*
+    ;
+
+line_block_open_symbol
+    =
+    "#"
     ;
 
 line_block_parameters
@@ -148,15 +163,27 @@ variable_expression
     =
     variable_open type:`variable` name:conditional_expression variable_close
     ;
+
 variable_open
     =
-    | ( {SP}* @:"{{-" {SP}* )
-    | ( @:"{{" {SP}* )
+    | ( {SP}* variable_open_symbol "-" {SP}* )
+    | ( variable_open_symbol {SP}* )
+    ;
+
+variable_open_symbol
+    =
+    "{{"
     ;
+
 variable_close
     =
-    | ( {SP}* @:"-}}" {SP}* )
-    | ( {SP}* @:"}}" )
+    | ( {SP}* "-" variable_close_symbol {SP}* )
+    | ( {SP}* variable_close_symbol )
+    ;
+
+variable_close_symbol
+    =
+    "}}"
     ;
 
 variable_identifier
@@ -406,12 +433,21 @@ comment_expression
     comment_open comment:comment_content comment_close
     ;
 
-comment_open
+comment_open =
+    comment_open_symbol
+    ;
+
+comment_open_symbol
     =
     "{#"
     ;
 
 comment_close
+    =
+    comment_close_symbol
+    ;
+
+comment_close_symbol
     =
     "#}"
     ;
@@ -428,7 +464,12 @@ line_comment_expression
 
 line_comment_open
     =
-    {SP}* "## "
+    {SP}* line_comment_open_symbol
+    ;
+
+line_comment_open_symbol
+    =
+    '##'
     ;
 
 line_comment_content
diff --git a/src/jinja2/environment.py b/src/jinja2/environment.py
index 3c93c484..94ace534 100644
--- a/src/jinja2/environment.py
+++ b/src/jinja2/environment.py
@@ -50,6 +50,7 @@ from .utils import missing
 
 # for direct template usage we have up to ten living environments
 _spontaneous_environments = LRUCache(10)
+_grammar_cache = LRUCache(10)
 
 
 def get_spontaneous_environment(cls, *args):
@@ -527,6 +528,69 @@ class Environment:
         """Internal parsing function used by `parse` and `compile`."""
         return Parser(self, source, name, filename).parse()
 
+    def get_grammar(self):
+        import tatsu
+
+        grammar_extensions = ''
+
+        with open('grammar.ebnf', 'r') as grammar_file:
+            base_grammar = grammar_file.read()
+
+        if self.block_start_string:
+            grammar_extensions += '''
+            @override
+            block_open_symbol = %r;
+            ''' % (self.block_start_string)
+
+        if self.block_end_string:
+            grammar_extensions += '''
+            @override
+            block_close_symbol = %r;
+            ''' % (self.block_end_string)
+
+        if self.variable_start_string:
+            grammar_extensions += '''
+            @override
+            variable_open_symbol = %r;
+            ''' % (self.variable_start_string)
+
+        if self.variable_end_string:
+            grammar_extensions += '''
+            @override
+            variable_close_symbol = %r;
+            ''' % (self.variable_end_string)
+
+        if self.comment_start_string:
+            grammar_extensions += '''
+            @override
+            comment_open_symbol = %r;
+            ''' % (self.comment_start_string)
+
+        if self.comment_end_string:
+            grammar_extensions += '''
+            @override
+            comment_close_symbol = %r;
+            ''' % (self.comment_end_string)
+
+        if self.line_statement_prefix:
+            grammar_extensions += '''
+            @override
+            line_block_open_symbol = %r;
+            ''' % (self.line_statement_prefix)
+
+        if self.line_comment_prefix:
+            grammar_extensions += '''
+            @override
+            line_comment_open_symbol = %r;
+            ''' % (self.line_comment_prefix)
+
+        final_grammar = base_grammar + grammar_extensions
+
+        if final_grammar not in _grammar_cache:
+            _grammar_cache[final_grammar] = tatsu.compile(final_grammar)
+
+        return _grammar_cache[final_grammar]
+
     def lex(self, source, name=None, filename=None):
         """Lex the given sourcecode and return a generator that yields
         tokens as tuples in the form ``(lineno, token_type, value)``.
diff --git a/src/jinja2/parser.py b/src/jinja2/parser.py
index 721988cd..47a11b02 100644
--- a/src/jinja2/parser.py
+++ b/src/jinja2/parser.py
@@ -5,11 +5,6 @@ from .exceptions import TemplateSyntaxError
 from .lexer import describe_token
 from .lexer import describe_token_expr
 
-import tatsu
-
-with open('grammar.ebnf', 'r') as grammar_file:
-    grammar = tatsu.compile(grammar_file.read())
-
 
 _statement_keywords = frozenset(
     [
@@ -47,6 +42,7 @@ class Parser:
     def __init__(self, environment, source, name=None, filename=None, state=None):
         self.environment = environment
         self.source = source
+        self.grammar = environment.get_grammar()
         self.stream = environment._tokenize(source, name, filename, state)
         self.name = name
         self.filename = filename
@@ -944,7 +940,7 @@ class Parser:
         from .new_parser import JinjaSemantics, parse_template
 
         result = parse_template(
-            grammar.parse(
+            self.grammar.parse(
                 self.source.rstrip('\n'),
                 whitespace='',
                 parseinfo=True,