pg_upgrade check for Unicode-dependent relations.

author Jeff Davis <jdavis@postgresql.org>

Thu, 3 Apr 2025 17:45:38 +0000 (10:45 -0700)

committer Jeff Davis <jdavis@postgresql.org>

Thu, 3 Apr 2025 17:45:38 +0000 (10:45 -0700)
author Jeff Davis <jdavis@postgresql.org>
Thu, 3 Apr 2025 17:45:38 +0000 (10:45 -0700)
committer Jeff Davis <jdavis@postgresql.org>
Thu, 3 Apr 2025 17:45:38 +0000 (10:45 -0700)
diff --git a/src/bin/pg_upgrade/check.c b/src/bin/pg_upgrade/check.c

index 02d9146e5ed7b2ac0e2b98bd25175faeb2a62cb9..3189de4df96610c7b692ef03fc0a178d586f810b 100644 (file)
--- a/src/bin/pg_upgrade/check.c
+++ b/src/bin/pg_upgrade/check.c
@@ -13,6 +13,7 @@
  #include "catalog/pg_class_d.h"
  #include "fe_utils/string_utils.h"
  #include "pg_upgrade.h"
+#include "common/unicode_version.h"
  
  static void check_new_cluster_is_empty(void);
  static void check_is_install_user(ClusterInfo *cluster);
@@ -25,6 +26,7 @@ static void check_for_tables_with_oids(ClusterInfo *cluster);
  static void check_for_pg_role_prefix(ClusterInfo *cluster);
  static void check_for_new_tablespace_dir(void);
  static void check_for_user_defined_encoding_conversions(ClusterInfo *cluster);
+static void check_for_unicode_update(ClusterInfo *cluster);
  static void check_new_cluster_logical_replication_slots(void);
  static void check_new_cluster_subscription_configuration(void);
  static void check_old_cluster_for_valid_slots(void);
@@ -633,6 +635,12 @@ check_and_dump_old_cluster(void)
  
         check_for_data_types_usage(&old_cluster);
  
+       /*
+        * Unicode updates can affect some objects that use expressions with
+        * functions dependent on Unicode.
+        */
+       check_for_unicode_update(&old_cluster);
+
         /*
          * PG 14 changed the function signature of encoding conversion functions.
          * Conversions from older versions cannot be upgraded automatically
@@ -1754,6 +1762,178 @@ check_for_user_defined_encoding_conversions(ClusterInfo *cluster)
                 check_ok();
  }
  
+/*
+ * Callback function for processing results of query for
+ * check_for_unicode_update()'s UpgradeTask.  If the query returned any rows
+ * (i.e., the check failed), write the details to the report file.
+ */
+static void
+process_unicode_update(DbInfo *dbinfo, PGresult *res, void *arg)
+{
+       UpgradeTaskReport *report = (UpgradeTaskReport *) arg;
+       int                     ntups = PQntuples(res);
+       int                     i_reloid = PQfnumber(res, "reloid");
+       int                     i_nspname = PQfnumber(res, "nspname");
+       int                     i_relname = PQfnumber(res, "relname");
+
+       if (ntups == 0)
+               return;
+
+       if (report->file == NULL &&
+               (report->file = fopen_priv(report->path, "w")) == NULL)
+               pg_fatal("could not open file \"%s\": %m", report->path);
+
+       fprintf(report->file, "In database: %s\n", dbinfo->db_name);
+
+       for (int rowno = 0; rowno < ntups; rowno++)
+               fprintf(report->file, "  (oid=%s) %s.%s\n",
+                               PQgetvalue(res, rowno, i_reloid),
+                               PQgetvalue(res, rowno, i_nspname),
+                               PQgetvalue(res, rowno, i_relname));
+}
+
+/*
+ * Check if the Unicode version built into Postgres changed between the old
+ * cluster and the new cluster.
+ */
+static bool
+unicode_version_changed(ClusterInfo *cluster)
+{
+       PGconn     *conn_template1 = connectToServer(cluster, "template1");
+       PGresult   *res;
+       char       *old_unicode_version;
+       bool            unicode_updated;
+
+       res = executeQueryOrDie(conn_template1, "SELECT unicode_version()");
+       old_unicode_version = PQgetvalue(res, 0, 0);
+       unicode_updated = (strcmp(old_unicode_version, PG_UNICODE_VERSION) != 0);
+
+       PQclear(res);
+       PQfinish(conn_template1);
+
+       return unicode_updated;
+}
+
+/*
+ * check_for_unicode_update()
+ *
+ * Check if the version of Unicode in the old server and the new server
+ * differ. If so, check for indexes, partitioned tables, or constraints that
+ * use expressions with functions dependent on Unicode behavior.
+ */
+static void
+check_for_unicode_update(ClusterInfo *cluster)
+{
+       UpgradeTaskReport report;
+       UpgradeTask *task = upgrade_task_create();
+       const char *query;
+
+       /*
+        * The builtin provider did not exist prior to version 17. While there are
+        * still problems that could potentially be caught from earlier versions,
+        * such as an index on NORMALIZE(), we don't check for that here.
+        */
+       if (GET_MAJOR_VERSION(cluster->major_version) < 1700)
+               return;
+
+       prep_status("Checking for objects affected by Unicode update");
+
+       if (!unicode_version_changed(cluster))
+       {
+               check_ok();
+               return;
+       }
+
+       report.file = NULL;
+       snprintf(report.path, sizeof(report.path), "%s/%s",
+                        log_opts.basedir,
+                        "unicode_dependent_rels.txt");
+
+       query =
+       /* collations that use built-in Unicode for character semantics */
+               "WITH collations(collid) AS ( "
+               "  SELECT oid FROM pg_collation "
+               "  WHERE collprovider='b' AND colllocale IN ('C.UTF-8','PG_UNICODE_FAST') "
+       /* include default collation, if appropriate */
+               "  UNION "
+               "  SELECT 'pg_catalog.default'::regcollation FROM pg_database "
+               "  WHERE datname = current_database() AND "
+               "  datlocprovider='b' AND datlocale IN ('C.UTF-8','PG_UNICODE_FAST') "
+               "), "
+       /* functions that use built-in Unicode */
+               "functions(procid) AS ( "
+               "  SELECT proc.oid FROM pg_proc proc "
+               "  WHERE proname IN ('normalize','unicode_assigned','unicode_version','is_normalized') AND "
+               "        pronamespace='pg_catalog'::regnamespace "
+               "), "
+       /* operators that use the input collation for character semantics */
+               "coll_operators(operid, procid, collid) AS ( "
+               "  SELECT oper.oid, oper.oprcode, collid FROM pg_operator oper, collations "
+               "  WHERE oprname IN ('~', '~*', '!~', '!~*', '~~*', '!~~*') AND "
+               "        oprnamespace='pg_catalog'::regnamespace AND "
+               "        oprright='text'::regtype "
+               "), "
+       /* functions that use the input collation for character semantics */
+               "coll_functions(procid, collid) AS ( "
+               "  SELECT proc.oid, collid FROM pg_proc proc, collations "
+               "  WHERE proname IN ('lower','initcap','upper') AND "
+               "        pronamespace='pg_catalog'::regnamespace AND "
+               "        proargtypes[0] = 'text'::regtype "
+       /* include functions behind the operators listed above */
+               "  UNION "
+               "  SELECT procid, collid FROM coll_operators "
+               "), "
+
+       /*
+        * Generate patterns to search a pg_node_tree for the above functions and
+        * operators.
+        */
+               "patterns(p) AS ( "
+               "  SELECT '{FUNCEXPR :funcid ' || procid::text || '[ }]' FROM functions "
+               "  UNION "
+               "  SELECT '{OPEXPR :opno ' || operid::text || ' (:\\w+ \\w+ )*' || "
+               "         ':inputcollid ' || collid::text || '[ }]' FROM coll_operators "
+               "  UNION "
+               "  SELECT '{FUNCEXPR :funcid ' || procid::text || ' (:\\w+ \\w+ )*' || "
+               "         ':inputcollid ' || collid::text || '[ }]' FROM coll_functions "
+               ") "
+
+       /*
+        * Match the patterns against expressions used for relation contents.
+        */
+               "SELECT reloid, relkind, nspname, relname "
+               "  FROM ( "
+               "    SELECT conrelid "
+               "    FROM pg_constraint, patterns WHERE conbin::text ~ p "
+               "  UNION "
+               "    SELECT indexrelid "
+               "    FROM pg_index, patterns WHERE indexprs::text ~ p OR indpred::text ~ p "
+               "  UNION "
+               "    SELECT partrelid "
+               "    FROM pg_partitioned_table, patterns WHERE partexprs::text ~ p "
+               "  ) s(reloid), pg_class c, pg_namespace n, pg_database d "
+               "  WHERE s.reloid = c.oid AND c.relnamespace = n.oid AND "
+               "        d.datname = current_database() AND "
+               "        d.encoding = pg_char_to_encoding('UTF8');";
+
+       upgrade_task_add_step(task, query,
+                                                 process_unicode_update,
+                                                 true, &report);
+       upgrade_task_run(task, cluster);
+       upgrade_task_free(task);
+
+       if (report.file)
+       {
+               fclose(report.file);
+               report_status(PG_WARNING, "warning");
+               pg_log(PG_WARNING, "Your installation contains relations that may be affected by a new version of Unicode.\n"
+                          "A list of potentially-affected relations is in the file:\n"
+                          "    %s", report.path);
+       }
+       else
+               check_ok();
+}
+
  /*
   * check_new_cluster_logical_replication_slots()
   *
author	Jeff Davis <jdavis@postgresql.org>
	Thu, 3 Apr 2025 17:45:38 +0000 (10:45 -0700)
committer	Jeff Davis <jdavis@postgresql.org>
	Thu, 3 Apr 2025 17:45:38 +0000 (10:45 -0700)