]> git.ipfire.org Git - thirdparty/sqlalchemy/sqlalchemy.git/commitdiff
improve sqlite reflection regex for unusual names/formats
authorMike Bayer <mike_mp@zzzcomputing.com>
Sat, 18 Oct 2025 20:43:22 +0000 (16:43 -0400)
committerMike Bayer <mike_mp@zzzcomputing.com>
Fri, 24 Oct 2025 13:54:44 +0000 (09:54 -0400)
A series of improvements have been made for reflection of CHECK constraints
on SQLite. The reflection logic now correctly handles table names
containing the strings "CHECK" or "CONSTRAINT", properly supports all four
SQLite identifier quoting styles (double quotes, single quotes, brackets,
and backticks) for constraint names, and accurately parses CHECK constraint
expressions containing parentheses within string literals using balanced
parenthesis matching with string context tracking.    Big thanks to
GruzdevAV for new test cases and implementation ideas.

Fixes: #12924
Change-Id: I0390ac334c98e934c7e0353f47c9f43204791af5

doc/build/changelog/unreleased_20/12924.rst [new file with mode: 0644]
lib/sqlalchemy/dialects/sqlite/base.py
test/dialect/sqlite/test_reflection.py

diff --git a/doc/build/changelog/unreleased_20/12924.rst b/doc/build/changelog/unreleased_20/12924.rst
new file mode 100644 (file)
index 0000000..0183af9
--- /dev/null
@@ -0,0 +1,12 @@
+.. change::
+    :tags: bug, sqlite, reflection
+    :tickets: 12924
+
+    A series of improvements have been made for reflection of CHECK constraints
+    on SQLite. The reflection logic now correctly handles table names
+    containing the strings "CHECK" or "CONSTRAINT", properly supports all four
+    SQLite identifier quoting styles (double quotes, single quotes, brackets,
+    and backticks) for constraint names, and accurately parses CHECK constraint
+    expressions containing parentheses within string literals using balanced
+    parenthesis matching with string context tracking.    Big thanks to
+    GruzdevAV for new test cases and implementation ideas.
index edd973fb512149ad23f663acd5ca41f1abfb167a..a05d2c3602cc6ca60ce3ed4197e543945b73916c 100644 (file)
@@ -2786,27 +2786,88 @@ class SQLiteDialect(default.DefaultDialect):
             connection, table_name, schema=schema, **kw
         )
 
-        # NOTE NOTE NOTE
-        # DO NOT CHANGE THIS REGULAR EXPRESSION.   There is no known way
-        # to parse CHECK constraints that contain newlines themselves using
-        # regular expressions, and the approach here relies upon each
-        # individual
-        # CHECK constraint being on a single line by itself.   This
-        # necessarily makes assumptions as to how the CREATE TABLE
-        # was emitted.   A more comprehensive DDL parsing solution would be
-        # needed to improve upon the current situation. See #11840 for
-        # background
-        CHECK_PATTERN = r"(?:CONSTRAINT (.+) +)?CHECK *\( *(.+) *\),? *"
-        cks = []
+        # Extract CHECK constraints by properly handling balanced parentheses
+        # and avoiding false matches when CHECK/CONSTRAINT appear in table
+        # names. See #12924 for context.
+        #
+        # SQLite supports 4 identifier quote styles (see
+        # sqlite.org/lang_keywords.html):
+        # - Double quotes "..." (standard SQL)
+        # - Brackets [...] (MS Access/SQL Server compatibility)
+        # - Backticks `...` (MySQL compatibility)
+        # - Single quotes '...' (SQLite extension)
+        #
+        # NOTE: there is not currently a way to parse CHECK constraints that
+        # contain newlines as the approach here relies upon each individual
+        # CHECK constraint being on a single line by itself.   This necessarily
+        # makes assumptions as to how the CREATE TABLE was emitted.
+        CHECK_PATTERN = re.compile(
+            r"""
+            (?<![A-Za-z0-9_])   # Negative lookbehind: ensure CHECK is not
+                                # part of an identifier (e.g., table name
+                                # like "tableCHECK")
+
+            (?:                 # Optional CONSTRAINT clause
+                CONSTRAINT\s+
+                (               # Group 1: Constraint name (quoted or unquoted)
+                    "(?:[^"]|"")+"        # Double-quoted: "name" or "na""me"
+                    |'(?:[^']|'')+'  # Single-quoted: 'name' or 'na''me'
+                    |\[(?:[^\]]|\]\])+\]  # Bracket-quoted: [name] or [na]]me]
+                    |`(?:[^`]|``)+`       # Backtick-quoted: `name` or `na``me`
+                    |\S+                  # Unquoted: simple_name
+                )
+                \s+
+            )?
 
-        for match in re.finditer(CHECK_PATTERN, table_data or "", re.I):
+            CHECK\s*\(          # CHECK keyword followed by opening paren
+            """,
+            re.VERBOSE | re.IGNORECASE,
+        )
+        cks = []
 
-            name = match.group(1)
+        for match in re.finditer(CHECK_PATTERN, table_data or ""):
+            constraint_name = match.group(1)
+
+            if constraint_name:
+                # Remove surrounding quotes if present
+                # Double quotes: "name" -> name
+                # Single quotes: 'name' -> name
+                # Brackets: [name] -> name
+                # Backticks: `name` -> name
+                constraint_name = re.sub(
+                    r'^(["\'`])(.+)\1$|^\[(.+)\]$',
+                    lambda m: m.group(2) or m.group(3),
+                    constraint_name,
+                    flags=re.DOTALL,
+                )
 
-            if name:
-                name = re.sub(r'^"|"$', "", name)
+            # Find the matching closing parenthesis by counting balanced parens
+            # Must track string context to ignore parens inside string literals
+            start = match.end()  # Position after 'CHECK ('
+            paren_count = 1
+            in_single_quote = False
+            in_double_quote = False
+
+            for pos, char in enumerate(table_data[start:], start):
+                # Track string literal context
+                if char == "'" and not in_double_quote:
+                    in_single_quote = not in_single_quote
+                elif char == '"' and not in_single_quote:
+                    in_double_quote = not in_double_quote
+                # Only count parens when not inside a string literal
+                elif not in_single_quote and not in_double_quote:
+                    if char == "(":
+                        paren_count += 1
+                    elif char == ")":
+                        paren_count -= 1
+                        if paren_count == 0:
+                            # Successfully found matching closing parenthesis
+                            sqltext = table_data[start:pos].strip()
+                            cks.append(
+                                {"sqltext": sqltext, "name": constraint_name}
+                            )
+                            break
 
-            cks.append({"sqltext": match.group(2), "name": name})
         cks.sort(key=lambda d: d["name"] or "~")  # sort None as last
         if cks:
             return cks
index dfd3f481bc3386025c8f85555456aac44fe030ef..804116f3fe6b60f985d32254f2c04619927066ed 100644 (file)
@@ -279,6 +279,66 @@ class ConstraintReflectionTest(fixtures.TestBase):
                 "REFERENCES fake_table)"
             )
 
+            # tables for issue #12924 - table names with CHECK/CONSTRAINT
+            conn.exec_driver_sql(
+                "CREATE TABLE oneline ( field INTEGER CHECK(field>0))"
+            )
+            conn.exec_driver_sql(
+                "CREATE TABLE oneline_nested ( field INTEGER "
+                "CHECK((field>0 and field<22) or (field>99 and field<1010)))"
+            )
+            conn.exec_driver_sql(
+                "CREATE TABLE oneline_2constraints ( pk INTEGER "
+                "CONSTRAINT pkname PRIMARY KEY, field INTEGER "
+                "CONSTRAINT chname CHECK((field>0 and field<22) or "
+                "(field>99 and field<1010)))"
+            )
+            conn.exec_driver_sql(
+                "CREATE TABLE oneline_nameCHECK ( pk INTEGER "
+                "CONSTRAINT pkname PRIMARY KEY )"
+            )
+            conn.exec_driver_sql(
+                "CREATE TABLE oneline_nameCONSTRAINT "
+                "( field INTEGER CHECK (field IN (1, 0, -1)) )"
+            )
+            conn.exec_driver_sql(
+                "CREATE TABLE twochecks_oneline (\n"
+                "field INTEGER,\n"
+                "CHECK (field>1), CHECK(  field<9)\n"
+                ")"
+            )
+
+            # Test all SQLite quote styles for constraint names
+            conn.exec_driver_sql(
+                "CREATE TABLE quote_styles ( "
+                "field INTEGER, "
+                'CONSTRAINT "double_quoted" CHECK (field > 0), '
+                "CONSTRAINT 'single_quoted' CHECK (field < 100), "
+                "CONSTRAINT [bracket_quoted] CHECK (field != 50), "
+                "CONSTRAINT `backtick_quoted` CHECK (field >= 10)"
+                ")"
+            )
+
+            # Test CHECK constraints with parentheses in string literals
+            # These cases have unbalanced parens if we naively count all parens
+            conn.exec_driver_sql(
+                "CREATE TABLE parens_in_strings ("
+                " field TEXT,"
+                " CHECK (field != '('),"
+                " CHECK (field != ')'),"
+                " CHECK (field NOT LIKE '%('),"
+                " CHECK (field IN (')', '(', 'test')),"
+                # Escaped quotes (SQLite uses '' to escape quotes)
+                " CHECK (field != 'it''s (not) valid'),"
+                ' CHECK (field != "say ""(hello)"" "),'
+                " CHECK (field NOT IN ('()', 'a''b''c', ')')),"
+                # Complex nested cases with lots of unbalanced parens in
+                # strings
+                " CHECK (field != '((' OR field = ')))'),"
+                ' CHECK (field LIKE "%))(%" OR field LIKE "%)(%")'
+                ")"
+            )
+
     @classmethod
     def teardown_test_class(cls):
         with testing.db.begin() as conn:
@@ -305,6 +365,14 @@ class ConstraintReflectionTest(fixtures.TestBase):
                 "a1",
                 "a2",
                 "r",
+                "oneline",
+                "oneline_nested",
+                "oneline_2constraints",
+                "oneline_nameCHECK",
+                "oneline_nameCONSTRAINT",
+                "twochecks_oneline",
+                "quote_styles",
+                "parens_in_strings",
             ]:
                 conn.exec_driver_sql("drop table %s" % name)
 
@@ -881,7 +949,6 @@ class ConstraintReflectionTest(fixtures.TestBase):
             ],
         )
 
-    @testing.fails("need to come up with new regex and/or DDL parsing")
     def test_check_constraint_multiline(self):
         """test for #11677"""
 
@@ -966,6 +1033,116 @@ class ConstraintReflectionTest(fixtures.TestBase):
         else:
             assert False
 
+    def test_check_constraint_oneline(self):
+        inspector = inspect(testing.db)
+        eq_(
+            inspector.get_check_constraints("oneline"),
+            [
+                {"sqltext": "field>0", "name": None},
+            ],
+        )
+
+    def test_check_constraint_oneline_nested(self):
+        inspector = inspect(testing.db)
+        eq_(
+            inspector.get_check_constraints("oneline_nested"),
+            [
+                {
+                    "sqltext": "(field>0 and field<22) "
+                    "or (field>99 and field<1010)",
+                    "name": None,
+                },
+            ],
+        )
+
+    def test_check_constraint_oneline_2constraints(self):
+        inspector = inspect(testing.db)
+        eq_(
+            inspector.get_check_constraints("oneline_2constraints"),
+            [
+                {
+                    "sqltext": "(field>0 and field<22) "
+                    "or (field>99 and field<1010)",
+                    "name": "chname",
+                },
+            ],
+        )
+
+    def test_check_constraint_oneline_nameCHECK(self):
+        inspector = inspect(testing.db)
+        eq_(
+            inspector.get_check_constraints("oneline_nameCHECK"),
+            [],
+        )
+
+    def test_check_constraint_oneline_nameCONSTRAINT(self):
+        inspector = inspect(testing.db)
+        eq_(
+            inspector.get_check_constraints("oneline_nameCONSTRAINT"),
+            [
+                {"sqltext": "field IN (1, 0, -1)", "name": None},
+            ],
+        )
+
+    def test_check_constraint_twochecks_oneline(self):
+        inspector = inspect(testing.db)
+        eq_(
+            inspector.get_check_constraints("twochecks_oneline"),
+            [
+                {"sqltext": "field>1", "name": None},
+                {"sqltext": "field<9", "name": None},
+            ],
+        )
+
+    def test_check_constraint_quote_styles(self):
+        """Test all SQLite identifier quote styles for constraint names.
+
+        SQLite supports 4 quote styles: double quotes, single quotes,
+        brackets, and backticks (for compatibility with other databases).
+        """
+        inspector = inspect(testing.db)
+        eq_(
+            inspector.get_check_constraints("quote_styles"),
+            [
+                {"sqltext": "field >= 10", "name": "backtick_quoted"},
+                {"sqltext": "field != 50", "name": "bracket_quoted"},
+                {"sqltext": "field > 0", "name": "double_quoted"},
+                {"sqltext": "field < 100", "name": "single_quoted"},
+            ],
+        )
+
+    def test_check_constraint_parens_in_strings(self):
+        """Test CHECK constraints with parentheses inside string literals.
+
+        Parentheses inside quoted strings should not be counted when
+        matching balanced parentheses in the constraint expression.
+        These test cases have unbalanced parens if strings are not handled,
+        and include escaped quotes (SQLite uses '' to escape, not backslash).
+        """
+        inspector = inspect(testing.db)
+        eq_(
+            inspector.get_check_constraints("parens_in_strings"),
+            [
+                {"sqltext": "field != '('", "name": None},
+                {"sqltext": "field != ')'", "name": None},
+                {"sqltext": "field NOT LIKE '%('", "name": None},
+                {"sqltext": "field IN (')', '(', 'test')", "name": None},
+                # Escaped quotes with parens
+                {"sqltext": "field != 'it''s (not) valid'", "name": None},
+                {"sqltext": 'field != "say ""(hello)"" "', "name": None},
+                {
+                    "sqltext": "field NOT IN ('()', 'a''b''c', ')')",
+                    "name": None,
+                },
+                # Complex nested cases
+                {"sqltext": "field != '((' OR field = ')))'", "name": None},
+                {
+                    "sqltext": 'field LIKE "%))(%" OR field LIKE "%)(%"',
+                    "name": None,
+                },
+            ],
+        )
+
 
 class TypeReflectionTest(fixtures.TestBase):
     __only_on__ = "sqlite"