From: Kevin Brown Date: Fri, 15 May 2020 16:27:10 +0000 (-0400) Subject: Allow symbols to be overwritten by the environment X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=a0364dd0190fd50551e1ee27f4f2cc2c32c7c5b3;p=thirdparty%2Fjinja.git Allow symbols to be overwritten by the environment This introduces a change to both the grammar and the parsing environment that allows people to override start/end symbols in the grammar through the environment. This finally brings the parser on the same level as the old parser and lexer when it comes to handling those customizations. This means that the grammar must be compiled dynamically to account for these customizations per environment. A module-level LRU cache has been implemented to handle this fact, so grammars can be cached instead of compiled every time. This should handle most cases other than the unit tests, since most people aren't frequently changing up their environment within their applications. This also adds proper handling to the closing line block statement so it waits for the end of a line or the end of the expression. --- diff --git a/grammar.ebnf b/grammar.ebnf index 11d26e6e..0f8a6ada 100644 --- a/grammar.ebnf +++ b/grammar.ebnf @@ -67,14 +67,24 @@ block_end block_open = - | ( {SP}* @:"{%-" {SP}* ) - | @:"{%" {SP}* + | ( {SP}* block_open_symbol "-" {SP}* ) + | block_open_symbol {SP}* + ; + +block_open_symbol + = + "{%" ; block_close = - | ( @:"-%}" {SP}* ) - | @:"%}" + | ( "-" block_close_symbol {SP}* ) + | block_close_symbol + ; + +block_close_symbol + = + "%}" ; line_block_expression @@ -90,12 +100,17 @@ line_block_start line_block_end = - line_block_open "end" name:IDENTIFIER "\n" + line_block_open "end" name:IDENTIFIER ("\n" | $) ; line_block_open = - "\n" {SP}* @:"# " {SP}* + "\n" {SP}* line_block_open_symbol {SP}* + ; + +line_block_open_symbol + = + "#" ; line_block_parameters @@ -148,15 +163,27 @@ variable_expression = variable_open type:`variable` name:conditional_expression variable_close ; + variable_open = - | ( {SP}* @:"{{-" {SP}* ) - | ( @:"{{" {SP}* ) + | ( {SP}* variable_open_symbol "-" {SP}* ) + | ( variable_open_symbol {SP}* ) + ; + +variable_open_symbol + = + "{{" ; + variable_close = - | ( {SP}* @:"-}}" {SP}* ) - | ( {SP}* @:"}}" ) + | ( {SP}* "-" variable_close_symbol {SP}* ) + | ( {SP}* variable_close_symbol ) + ; + +variable_close_symbol + = + "}}" ; variable_identifier @@ -406,12 +433,21 @@ comment_expression comment_open comment:comment_content comment_close ; -comment_open +comment_open = + comment_open_symbol + ; + +comment_open_symbol = "{#" ; comment_close + = + comment_close_symbol + ; + +comment_close_symbol = "#}" ; @@ -428,7 +464,12 @@ line_comment_expression line_comment_open = - {SP}* "## " + {SP}* line_comment_open_symbol + ; + +line_comment_open_symbol + = + '##' ; line_comment_content diff --git a/src/jinja2/environment.py b/src/jinja2/environment.py index 3c93c484..94ace534 100644 --- a/src/jinja2/environment.py +++ b/src/jinja2/environment.py @@ -50,6 +50,7 @@ from .utils import missing # for direct template usage we have up to ten living environments _spontaneous_environments = LRUCache(10) +_grammar_cache = LRUCache(10) def get_spontaneous_environment(cls, *args): @@ -527,6 +528,69 @@ class Environment: """Internal parsing function used by `parse` and `compile`.""" return Parser(self, source, name, filename).parse() + def get_grammar(self): + import tatsu + + grammar_extensions = '' + + with open('grammar.ebnf', 'r') as grammar_file: + base_grammar = grammar_file.read() + + if self.block_start_string: + grammar_extensions += ''' + @override + block_open_symbol = %r; + ''' % (self.block_start_string) + + if self.block_end_string: + grammar_extensions += ''' + @override + block_close_symbol = %r; + ''' % (self.block_end_string) + + if self.variable_start_string: + grammar_extensions += ''' + @override + variable_open_symbol = %r; + ''' % (self.variable_start_string) + + if self.variable_end_string: + grammar_extensions += ''' + @override + variable_close_symbol = %r; + ''' % (self.variable_end_string) + + if self.comment_start_string: + grammar_extensions += ''' + @override + comment_open_symbol = %r; + ''' % (self.comment_start_string) + + if self.comment_end_string: + grammar_extensions += ''' + @override + comment_close_symbol = %r; + ''' % (self.comment_end_string) + + if self.line_statement_prefix: + grammar_extensions += ''' + @override + line_block_open_symbol = %r; + ''' % (self.line_statement_prefix) + + if self.line_comment_prefix: + grammar_extensions += ''' + @override + line_comment_open_symbol = %r; + ''' % (self.line_comment_prefix) + + final_grammar = base_grammar + grammar_extensions + + if final_grammar not in _grammar_cache: + _grammar_cache[final_grammar] = tatsu.compile(final_grammar) + + return _grammar_cache[final_grammar] + def lex(self, source, name=None, filename=None): """Lex the given sourcecode and return a generator that yields tokens as tuples in the form ``(lineno, token_type, value)``. diff --git a/src/jinja2/parser.py b/src/jinja2/parser.py index 721988cd..47a11b02 100644 --- a/src/jinja2/parser.py +++ b/src/jinja2/parser.py @@ -5,11 +5,6 @@ from .exceptions import TemplateSyntaxError from .lexer import describe_token from .lexer import describe_token_expr -import tatsu - -with open('grammar.ebnf', 'r') as grammar_file: - grammar = tatsu.compile(grammar_file.read()) - _statement_keywords = frozenset( [ @@ -47,6 +42,7 @@ class Parser: def __init__(self, environment, source, name=None, filename=None, state=None): self.environment = environment self.source = source + self.grammar = environment.get_grammar() self.stream = environment._tokenize(source, name, filename, state) self.name = name self.filename = filename @@ -944,7 +940,7 @@ class Parser: from .new_parser import JinjaSemantics, parse_template result = parse_template( - grammar.parse( + self.grammar.parse( self.source.rstrip('\n'), whitespace='', parseinfo=True,