From 0c81b8fa1b8510ae70f9a372c6f7c054d56808b8 Mon Sep 17 00:00:00 2001 From: Gord Thompson Date: Mon, 19 Jan 2026 05:34:28 -0700 Subject: [PATCH] make qtoken for PostgreSQL _fk_regex_pattern less restrictive Improved the foreign key reflection regular expression pattern used by the PostgreSQL dialect to be more permissive in matching identifier characters, allowing it to correctly handle unicode characters in table and column names. This change improves compatibility with PostgreSQL variants such as CockroachDB that may use different quoting patterns in combination with unicode characters in their identifiers. Pull request courtesy Gord Thompson. Change-Id: Iaee340879400e01df2f776417e8b1018f1801cfe (cherry picked from commit d0d9f1b71115471b0a6918075383c2bddf2212b1) --- .../changelog/unreleased_20/pg_fk_reflect.rst | 10 +++++++ lib/sqlalchemy/dialects/postgresql/base.py | 2 +- test/dialect/postgresql/test_reflection.py | 30 +++++++++++++++++++ 3 files changed, 41 insertions(+), 1 deletion(-) create mode 100644 doc/build/changelog/unreleased_20/pg_fk_reflect.rst diff --git a/doc/build/changelog/unreleased_20/pg_fk_reflect.rst b/doc/build/changelog/unreleased_20/pg_fk_reflect.rst new file mode 100644 index 0000000000..02db2cc613 --- /dev/null +++ b/doc/build/changelog/unreleased_20/pg_fk_reflect.rst @@ -0,0 +1,10 @@ +.. change:: + :tags: bug, postgresql + + Improved the foreign key reflection regular expression pattern used by the + PostgreSQL dialect to be more permissive in matching identifier characters, + allowing it to correctly handle unicode characters in table and column + names. This change improves compatibility with PostgreSQL variants such as + CockroachDB that may use different quoting patterns in combination with + unicode characters in their identifiers. Pull request courtesy Gord + Thompson. diff --git a/lib/sqlalchemy/dialects/postgresql/base.py b/lib/sqlalchemy/dialects/postgresql/base.py index 77f25142c7..9904c6d467 100644 --- a/lib/sqlalchemy/dialects/postgresql/base.py +++ b/lib/sqlalchemy/dialects/postgresql/base.py @@ -4499,7 +4499,7 @@ class PGDialect(default.DefaultDialect): @util.memoized_property def _fk_regex_pattern(self): # optionally quoted token - qtoken = '(?:"[^"]+"|[A-Za-z0-9_]+?)' + qtoken = r'(?:"[^"]+"|[\w]+?)' # https://www.postgresql.org/docs/current/static/sql-createtable.html return re.compile( diff --git a/test/dialect/postgresql/test_reflection.py b/test/dialect/postgresql/test_reflection.py index 4380026e48..d353cfd335 100644 --- a/test/dialect/postgresql/test_reflection.py +++ b/test/dialect/postgresql/test_reflection.py @@ -980,6 +980,36 @@ class ReflectionTest( ) ) + @testing.combinations( + "FOREIGN KEY (tid) REFERENCES some_table(id)", + 'FOREIGN KEY ("tid") REFERENCES some_table(id)', + 'FOREIGN KEY (tid) REFERENCES "some_table"(id)', + 'FOREIGN KEY ("測試") REFERENCES unitable1("méil")', + "FOREIGN KEY (測試) REFERENCES unitable1(méil)", + 'FOREIGN KEY ("col_名前") REFERENCES "table_テーブル"("ref_參考")', + "FOREIGN KEY (普通_column) REFERENCES 普通_table(普通_ref)", + ( + 'FOREIGN KEY ("tid1", tid2) ' + 'REFERENCES some_schema.some_table("id1", id2)' + ), + ( + 'FOREIGN KEY ("測試1", 測試2) ' + 'REFERENCES "schema_スキーマ"."table_表"("ref1_參考", ref2_参考)' + ), + ) + def test_fk_regex_unicode_patterns(self, condef): + """Test that FK regex pattern handles unicode identifiers. + + This specifically tests the improved qtoken regex pattern to + support PostgreSQL variants such as CockroachDB that deliver + unquoted unicode identifiers in FK constraint definitions, whereas + PostgreSQL itself delivers the same identifiers with quotes. + + """ + FK_REGEX = postgresql.dialect()._fk_regex_pattern + match = re.search(FK_REGEX, condef) + self.assert_(match is not None, f"Expected regex to match: {condef}") + def test_reflect_default_over_128_chars(self, metadata, connection): Table( "t", -- 2.47.3