]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
bpo-43950: Distinguish errors happening on character offset decoding (GH-27217)
authorBatuhan Taskaya <batuhan@python.org>
Tue, 20 Jul 2021 15:42:12 +0000 (18:42 +0300)
committerGitHub <noreply@github.com>
Tue, 20 Jul 2021 15:42:12 +0000 (16:42 +0100)
Parser/pegen.c
Python/traceback.c

index 3e8ddfbf53cf751a55257d6e93d39594d02672d9..106dba9ab49ad3ca41fd31fb5c5010dfa389c5cf 100644 (file)
@@ -402,7 +402,7 @@ _PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset)
 {
     const char *str = PyUnicode_AsUTF8(line);
     if (!str) {
-        return 0;
+        return -1;
     }
     Py_ssize_t len = strlen(str);
     if (col_offset > len + 1) {
@@ -411,7 +411,7 @@ _PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset)
     assert(col_offset >= 0);
     PyObject *text = PyUnicode_DecodeUTF8(str, col_offset, "replace");
     if (!text) {
-        return 0;
+        return -1;
     }
     Py_ssize_t size = PyUnicode_GET_LENGTH(text);
     Py_DECREF(text);
@@ -499,9 +499,17 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
 
     if (p->tok->encoding != NULL) {
         col_number = _PyPegen_byte_offset_to_character_offset(error_line, col_offset);
-        end_col_number = end_col_number > 0 ?
-                         _PyPegen_byte_offset_to_character_offset(error_line, end_col_offset) :
-                         end_col_number;
+        if (col_number < 0) {
+            goto error;
+        }
+        if (end_col_number > 0) {
+            Py_ssize_t end_col_offset = _PyPegen_byte_offset_to_character_offset(error_line, end_col_number);
+            if (end_col_offset < 0) {
+                goto error;
+            } else {
+                end_col_number = end_col_offset;
+            }
+        }
     }
     tmp = Py_BuildValue("(OiiNii)", p->tok->filename, lineno, col_number, error_line, end_lineno, end_col_number);
     if (!tmp) {
index 643096c81fc8f50861cc1b594706adc1fde68288..e02caef6f9bce7cdf4a3ce210b5b8f1aeb1fbfa7 100644 (file)
@@ -745,7 +745,17 @@ tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int linen
     // Convert the utf-8 byte offset to the actual character offset so we print the right number of carets.
     assert(source_line);
     Py_ssize_t start_offset = _PyPegen_byte_offset_to_character_offset(source_line, start_col_byte_offset);
+    if (start_offset < 0) {
+        err = ignore_source_errors() < 0;
+        goto done;
+    }
+
     Py_ssize_t end_offset = _PyPegen_byte_offset_to_character_offset(source_line, end_col_byte_offset);
+    if (end_offset < 0) {
+        err = ignore_source_errors() < 0;
+        goto done;
+    }
+
     Py_ssize_t left_end_offset = -1;
     Py_ssize_t right_start_offset = -1;