gh-104016: Fixed off by 1 error in f string tokenizer (#104047)

author jx124 <64946984+jx124@users.noreply.github.com>

Mon, 1 May 2023 19:15:47 +0000 (03:15 +0800)

committer GitHub <noreply@github.com>

Mon, 1 May 2023 19:15:47 +0000 (19:15 +0000)
author jx124 <64946984+jx124@users.noreply.github.com>
Mon, 1 May 2023 19:15:47 +0000 (03:15 +0800)
committer GitHub <noreply@github.com>
Mon, 1 May 2023 19:15:47 +0000 (19:15 +0000)
diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py

index 5e94c99ae65af10825e494760a1314b78b2dade7..5c5176dc54a6d9ee5eddabf27b09eab39e5aa145 100644 (file)
--- a/Lib/test/test_fstring.py
+++ b/Lib/test/test_fstring.py
@@ -565,7 +565,23 @@ x = (
          self.assertAllRaise(SyntaxError,
                              "f-string: expressions nested too deeply",
                              ['f"{1+2:{1+2:{1+1:{1}}}}"'])
+        
+        def create_nested_fstring(n):
+            if n == 0:
+                return "1+1"
+            prev = create_nested_fstring(n-1)
+            return f'f"{{{prev}}}"'
  
+        self.assertAllRaise(SyntaxError,
+                            "too many nested f-strings",
+                            [create_nested_fstring(160)])
+    
+    def test_syntax_error_in_nested_fstring(self):
+        # See gh-104016 for more information on this crash
+        self.assertAllRaise(SyntaxError,
+                            "invalid syntax",
+                            ['f"{1 1:' + ('{f"1:' * 199)])
+    
      def test_double_braces(self):
          self.assertEqual(f'{{', '{')
          self.assertEqual(f'a{{', 'a{')
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c

index 8fb9be7bfd0182ba1b3ff29fcb71ab437cc78b81..d2f9fee110ebf5595e9de51a8454b3b692c504a3 100644 (file)
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -43,12 +43,12 @@
  #ifdef Py_DEBUG
  static inline tokenizer_mode* TOK_GET_MODE(struct tok_state* tok) {
      assert(tok->tok_mode_stack_index >= 0);
-    assert(tok->tok_mode_stack_index < MAXLEVEL);
+    assert(tok->tok_mode_stack_index < MAXFSTRINGLEVEL);
      return &(tok->tok_mode_stack[tok->tok_mode_stack_index]);
  }
  static inline tokenizer_mode* TOK_NEXT_MODE(struct tok_state* tok) {
      assert(tok->tok_mode_stack_index >= 0);
-    assert(tok->tok_mode_stack_index < MAXLEVEL);
+    assert(tok->tok_mode_stack_index + 1 < MAXFSTRINGLEVEL); 
      return &(tok->tok_mode_stack[++tok->tok_mode_stack_index]);
  }
  #else
@@ -2235,6 +2235,9 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
  
          p_start = tok->start;
          p_end = tok->cur;
+        if (tok->tok_mode_stack_index + 1 >= MAXFSTRINGLEVEL) {
+            return MAKE_TOKEN(syntaxerror(tok, "too many nested f-strings"));
+        }
          tokenizer_mode *the_current_tok = TOK_NEXT_MODE(tok);
          the_current_tok->kind = TOK_FSTRING_MODE;
          the_current_tok->f_string_quote = quote;
diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h

index 8b4213c4ce3b5aefc41861ed3ca6b7ecfc95de15..5e2171885ac75b4ee37c164828b07d4d756da7de 100644 (file)
--- a/Parser/tokenizer.h
+++ b/Parser/tokenizer.h
@@ -10,8 +10,9 @@ extern "C" {
  
  #include "pycore_token.h" /* For token types */
  
-#define MAXINDENT 100   /* Max indentation level */
-#define MAXLEVEL 200    /* Max parentheses level */
+#define MAXINDENT 100       /* Max indentation level */
+#define MAXLEVEL 200        /* Max parentheses level */
+#define MAXFSTRINGLEVEL 150 /* Max f-string nesting level */
  
  enum decoding_state {
      STATE_INIT,
@@ -123,7 +124,7 @@ struct tok_state {
      enum interactive_underflow_t interactive_underflow;
      int report_warnings;
      // TODO: Factor this into its own thing
-    tokenizer_mode tok_mode_stack[MAXLEVEL];
+    tokenizer_mode tok_mode_stack[MAXFSTRINGLEVEL];
      int tok_mode_stack_index;
      int tok_report_warnings;
  #ifdef Py_DEBUG
author	jx124 <64946984+jx124@users.noreply.github.com>
	Mon, 1 May 2023 19:15:47 +0000 (03:15 +0800)
committer	GitHub <noreply@github.com>
	Mon, 1 May 2023 19:15:47 +0000 (19:15 +0000)
Lib/test/test_fstring.py		patch \| blob \| blame \| history
Parser/tokenizer.c		patch \| blob \| blame \| history
Parser/tokenizer.h		patch \| blob \| blame \| history