]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-108590: Fix sqlite3.iterdump for invalid Unicode in TEXT columns (#108657)
authorCorvin <corvin@corvin.dev>
Wed, 30 Aug 2023 09:06:21 +0000 (05:06 -0400)
committerGitHub <noreply@github.com>
Wed, 30 Aug 2023 09:06:21 +0000 (09:06 +0000)
Co-authored-by: Erlend E. Aasland <erlend@python.org>
Lib/sqlite3/dump.py
Lib/test/test_sqlite3/test_dump.py
Misc/NEWS.d/next/Library/2023-08-29-22-53-48.gh-issue-108590.6k0pOl.rst [new file with mode: 0644]

index ead3360ce676086c722076d3783fd970a505321b..481d605194c7feada587254645692740eb469a17 100644 (file)
@@ -7,6 +7,10 @@
 # future enhancements, you should normally quote any identifier that
 # is an English language word, even if you do not have to."
 
+
+from contextlib import contextmanager
+
+
 def _quote_name(name):
     return '"{0}"'.format(name.replace('"', '""'))
 
@@ -15,6 +19,24 @@ def _quote_value(value):
     return "'{0}'".format(value.replace("'", "''"))
 
 
+def _force_decode(bs, *args, **kwargs):
+    # gh-108590: Don't fail if the database contains invalid Unicode data.
+    try:
+        return bs.decode(*args, **kwargs)
+    except UnicodeDecodeError:
+        return "".join([chr(c) for c in bs])
+
+
+@contextmanager
+def _text_factory(con, factory):
+    saved_factory = con.text_factory
+    con.text_factory = factory
+    try:
+        yield
+    finally:
+        con.text_factory = saved_factory
+
+
 def _iterdump(connection):
     """
     Returns an iterator to the dump of the database in an SQL text format.
@@ -74,8 +96,9 @@ def _iterdump(connection):
             )
         )
         query_res = cu.execute(q)
-        for row in query_res:
-            yield("{0};".format(row[0]))
+        with _text_factory(connection, bytes):
+            for row in query_res:
+                yield("{0};".format(_force_decode(row[0])))
 
     # Now when the type is 'index', 'trigger', or 'view'
     q = """
index 3107e1b165d9503f4037443c6b1f382dd273ceb9..0279ce68eeb5f1fa28192820eba1c8791d9a1676 100644 (file)
@@ -133,6 +133,21 @@ class DumpTests(MemoryDatabaseMixin, unittest.TestCase):
         actual = list(self.cx.iterdump())
         self.assertEqual(expected, actual)
 
+    def test_dump_unicode_invalid(self):
+        # gh-108590
+        expected = [
+            "BEGIN TRANSACTION;",
+            "CREATE TABLE foo (data TEXT);",
+            "INSERT INTO \"foo\" VALUES('a\x9f');",
+            "COMMIT;",
+        ]
+        self.cu.executescript("""
+            CREATE TABLE foo (data TEXT);
+            INSERT INTO foo VALUES (CAST(X'619f' AS TEXT));
+        """)
+        actual = list(self.cx.iterdump())
+        self.assertEqual(expected, actual)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/Misc/NEWS.d/next/Library/2023-08-29-22-53-48.gh-issue-108590.6k0pOl.rst b/Misc/NEWS.d/next/Library/2023-08-29-22-53-48.gh-issue-108590.6k0pOl.rst
new file mode 100644 (file)
index 0000000..50b41f2
--- /dev/null
@@ -0,0 +1 @@
+Fixed an issue where :meth:`sqlite3.Connection.iterdump` would fail and leave an incomplete SQL dump if a table includes invalid Unicode sequences. Patch by Corvin McPherson