From: Mike Bayer Date: Sat, 18 Oct 2025 20:43:22 +0000 (-0400) Subject: improve sqlite reflection regex for unusual names/formats X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=189907be93392246ed5948506f1ca3921b8379f0;p=thirdparty%2Fsqlalchemy%2Fsqlalchemy.git improve sqlite reflection regex for unusual names/formats A series of improvements have been made for reflection of CHECK constraints on SQLite. The reflection logic now correctly handles table names containing the strings "CHECK" or "CONSTRAINT", properly supports all four SQLite identifier quoting styles (double quotes, single quotes, brackets, and backticks) for constraint names, and accurately parses CHECK constraint expressions containing parentheses within string literals using balanced parenthesis matching with string context tracking. Big thanks to GruzdevAV for new test cases and implementation ideas. Fixes: #12924 Change-Id: I0390ac334c98e934c7e0353f47c9f43204791af5 --- diff --git a/doc/build/changelog/unreleased_20/12924.rst b/doc/build/changelog/unreleased_20/12924.rst new file mode 100644 index 0000000000..0183af9199 --- /dev/null +++ b/doc/build/changelog/unreleased_20/12924.rst @@ -0,0 +1,12 @@ +.. change:: + :tags: bug, sqlite, reflection + :tickets: 12924 + + A series of improvements have been made for reflection of CHECK constraints + on SQLite. The reflection logic now correctly handles table names + containing the strings "CHECK" or "CONSTRAINT", properly supports all four + SQLite identifier quoting styles (double quotes, single quotes, brackets, + and backticks) for constraint names, and accurately parses CHECK constraint + expressions containing parentheses within string literals using balanced + parenthesis matching with string context tracking. Big thanks to + GruzdevAV for new test cases and implementation ideas. diff --git a/lib/sqlalchemy/dialects/sqlite/base.py b/lib/sqlalchemy/dialects/sqlite/base.py index edd973fb51..a05d2c3602 100644 --- a/lib/sqlalchemy/dialects/sqlite/base.py +++ b/lib/sqlalchemy/dialects/sqlite/base.py @@ -2786,27 +2786,88 @@ class SQLiteDialect(default.DefaultDialect): connection, table_name, schema=schema, **kw ) - # NOTE NOTE NOTE - # DO NOT CHANGE THIS REGULAR EXPRESSION. There is no known way - # to parse CHECK constraints that contain newlines themselves using - # regular expressions, and the approach here relies upon each - # individual - # CHECK constraint being on a single line by itself. This - # necessarily makes assumptions as to how the CREATE TABLE - # was emitted. A more comprehensive DDL parsing solution would be - # needed to improve upon the current situation. See #11840 for - # background - CHECK_PATTERN = r"(?:CONSTRAINT (.+) +)?CHECK *\( *(.+) *\),? *" - cks = [] + # Extract CHECK constraints by properly handling balanced parentheses + # and avoiding false matches when CHECK/CONSTRAINT appear in table + # names. See #12924 for context. + # + # SQLite supports 4 identifier quote styles (see + # sqlite.org/lang_keywords.html): + # - Double quotes "..." (standard SQL) + # - Brackets [...] (MS Access/SQL Server compatibility) + # - Backticks `...` (MySQL compatibility) + # - Single quotes '...' (SQLite extension) + # + # NOTE: there is not currently a way to parse CHECK constraints that + # contain newlines as the approach here relies upon each individual + # CHECK constraint being on a single line by itself. This necessarily + # makes assumptions as to how the CREATE TABLE was emitted. + CHECK_PATTERN = re.compile( + r""" + (? name + # Single quotes: 'name' -> name + # Brackets: [name] -> name + # Backticks: `name` -> name + constraint_name = re.sub( + r'^(["\'`])(.+)\1$|^\[(.+)\]$', + lambda m: m.group(2) or m.group(3), + constraint_name, + flags=re.DOTALL, + ) - if name: - name = re.sub(r'^"|"$', "", name) + # Find the matching closing parenthesis by counting balanced parens + # Must track string context to ignore parens inside string literals + start = match.end() # Position after 'CHECK (' + paren_count = 1 + in_single_quote = False + in_double_quote = False + + for pos, char in enumerate(table_data[start:], start): + # Track string literal context + if char == "'" and not in_double_quote: + in_single_quote = not in_single_quote + elif char == '"' and not in_single_quote: + in_double_quote = not in_double_quote + # Only count parens when not inside a string literal + elif not in_single_quote and not in_double_quote: + if char == "(": + paren_count += 1 + elif char == ")": + paren_count -= 1 + if paren_count == 0: + # Successfully found matching closing parenthesis + sqltext = table_data[start:pos].strip() + cks.append( + {"sqltext": sqltext, "name": constraint_name} + ) + break - cks.append({"sqltext": match.group(2), "name": name}) cks.sort(key=lambda d: d["name"] or "~") # sort None as last if cks: return cks diff --git a/test/dialect/sqlite/test_reflection.py b/test/dialect/sqlite/test_reflection.py index dfd3f481bc..804116f3fe 100644 --- a/test/dialect/sqlite/test_reflection.py +++ b/test/dialect/sqlite/test_reflection.py @@ -279,6 +279,66 @@ class ConstraintReflectionTest(fixtures.TestBase): "REFERENCES fake_table)" ) + # tables for issue #12924 - table names with CHECK/CONSTRAINT + conn.exec_driver_sql( + "CREATE TABLE oneline ( field INTEGER CHECK(field>0))" + ) + conn.exec_driver_sql( + "CREATE TABLE oneline_nested ( field INTEGER " + "CHECK((field>0 and field<22) or (field>99 and field<1010)))" + ) + conn.exec_driver_sql( + "CREATE TABLE oneline_2constraints ( pk INTEGER " + "CONSTRAINT pkname PRIMARY KEY, field INTEGER " + "CONSTRAINT chname CHECK((field>0 and field<22) or " + "(field>99 and field<1010)))" + ) + conn.exec_driver_sql( + "CREATE TABLE oneline_nameCHECK ( pk INTEGER " + "CONSTRAINT pkname PRIMARY KEY )" + ) + conn.exec_driver_sql( + "CREATE TABLE oneline_nameCONSTRAINT " + "( field INTEGER CHECK (field IN (1, 0, -1)) )" + ) + conn.exec_driver_sql( + "CREATE TABLE twochecks_oneline (\n" + "field INTEGER,\n" + "CHECK (field>1), CHECK( field<9)\n" + ")" + ) + + # Test all SQLite quote styles for constraint names + conn.exec_driver_sql( + "CREATE TABLE quote_styles ( " + "field INTEGER, " + 'CONSTRAINT "double_quoted" CHECK (field > 0), ' + "CONSTRAINT 'single_quoted' CHECK (field < 100), " + "CONSTRAINT [bracket_quoted] CHECK (field != 50), " + "CONSTRAINT `backtick_quoted` CHECK (field >= 10)" + ")" + ) + + # Test CHECK constraints with parentheses in string literals + # These cases have unbalanced parens if we naively count all parens + conn.exec_driver_sql( + "CREATE TABLE parens_in_strings (" + " field TEXT," + " CHECK (field != '(')," + " CHECK (field != ')')," + " CHECK (field NOT LIKE '%(')," + " CHECK (field IN (')', '(', 'test'))," + # Escaped quotes (SQLite uses '' to escape quotes) + " CHECK (field != 'it''s (not) valid')," + ' CHECK (field != "say ""(hello)"" "),' + " CHECK (field NOT IN ('()', 'a''b''c', ')'))," + # Complex nested cases with lots of unbalanced parens in + # strings + " CHECK (field != '((' OR field = ')))')," + ' CHECK (field LIKE "%))(%" OR field LIKE "%)(%")' + ")" + ) + @classmethod def teardown_test_class(cls): with testing.db.begin() as conn: @@ -305,6 +365,14 @@ class ConstraintReflectionTest(fixtures.TestBase): "a1", "a2", "r", + "oneline", + "oneline_nested", + "oneline_2constraints", + "oneline_nameCHECK", + "oneline_nameCONSTRAINT", + "twochecks_oneline", + "quote_styles", + "parens_in_strings", ]: conn.exec_driver_sql("drop table %s" % name) @@ -881,7 +949,6 @@ class ConstraintReflectionTest(fixtures.TestBase): ], ) - @testing.fails("need to come up with new regex and/or DDL parsing") def test_check_constraint_multiline(self): """test for #11677""" @@ -966,6 +1033,116 @@ class ConstraintReflectionTest(fixtures.TestBase): else: assert False + def test_check_constraint_oneline(self): + inspector = inspect(testing.db) + eq_( + inspector.get_check_constraints("oneline"), + [ + {"sqltext": "field>0", "name": None}, + ], + ) + + def test_check_constraint_oneline_nested(self): + inspector = inspect(testing.db) + eq_( + inspector.get_check_constraints("oneline_nested"), + [ + { + "sqltext": "(field>0 and field<22) " + "or (field>99 and field<1010)", + "name": None, + }, + ], + ) + + def test_check_constraint_oneline_2constraints(self): + inspector = inspect(testing.db) + eq_( + inspector.get_check_constraints("oneline_2constraints"), + [ + { + "sqltext": "(field>0 and field<22) " + "or (field>99 and field<1010)", + "name": "chname", + }, + ], + ) + + def test_check_constraint_oneline_nameCHECK(self): + inspector = inspect(testing.db) + eq_( + inspector.get_check_constraints("oneline_nameCHECK"), + [], + ) + + def test_check_constraint_oneline_nameCONSTRAINT(self): + inspector = inspect(testing.db) + eq_( + inspector.get_check_constraints("oneline_nameCONSTRAINT"), + [ + {"sqltext": "field IN (1, 0, -1)", "name": None}, + ], + ) + + def test_check_constraint_twochecks_oneline(self): + inspector = inspect(testing.db) + eq_( + inspector.get_check_constraints("twochecks_oneline"), + [ + {"sqltext": "field>1", "name": None}, + {"sqltext": "field<9", "name": None}, + ], + ) + + def test_check_constraint_quote_styles(self): + """Test all SQLite identifier quote styles for constraint names. + + SQLite supports 4 quote styles: double quotes, single quotes, + brackets, and backticks (for compatibility with other databases). + """ + inspector = inspect(testing.db) + eq_( + inspector.get_check_constraints("quote_styles"), + [ + {"sqltext": "field >= 10", "name": "backtick_quoted"}, + {"sqltext": "field != 50", "name": "bracket_quoted"}, + {"sqltext": "field > 0", "name": "double_quoted"}, + {"sqltext": "field < 100", "name": "single_quoted"}, + ], + ) + + def test_check_constraint_parens_in_strings(self): + """Test CHECK constraints with parentheses inside string literals. + + Parentheses inside quoted strings should not be counted when + matching balanced parentheses in the constraint expression. + These test cases have unbalanced parens if strings are not handled, + and include escaped quotes (SQLite uses '' to escape, not backslash). + """ + inspector = inspect(testing.db) + eq_( + inspector.get_check_constraints("parens_in_strings"), + [ + {"sqltext": "field != '('", "name": None}, + {"sqltext": "field != ')'", "name": None}, + {"sqltext": "field NOT LIKE '%('", "name": None}, + {"sqltext": "field IN (')', '(', 'test')", "name": None}, + # Escaped quotes with parens + {"sqltext": "field != 'it''s (not) valid'", "name": None}, + {"sqltext": 'field != "say ""(hello)"" "', "name": None}, + { + "sqltext": "field NOT IN ('()', 'a''b''c', ')')", + "name": None, + }, + # Complex nested cases + {"sqltext": "field != '((' OR field = ')))'", "name": None}, + { + "sqltext": 'field LIKE "%))(%" OR field LIKE "%)(%"', + "name": None, + }, + ], + ) + class TypeReflectionTest(fixtures.TestBase): __only_on__ = "sqlite"