From: Daniele Varrazzo <daniele.varrazzo@gmail.com>
Date: Sun, 31 Mar 2024 20:57:42 +0000 (+0000)
Subject: fix: more careful stripping of error prefixes
X-Git-Tag: 3.1.19~6^2~1
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=e9e62a4455422591f82340267636743af38c727b;p=thirdparty%2Fpsycopg.git

fix: more careful stripping of error prefixes

Only strip the known prefixes, both in English and in the currently
known localizations.

Added script to generate regexp to match every backend localization. The
script was executed on PostgreSQL commit f4ad0021af (on master branch,
before v17).

Close #752.
---

diff --git a/docs/news.rst b/docs/news.rst
index 31a5f4cde..820d91cd6 100644
--- a/docs/news.rst
+++ b/docs/news.rst
@@ -7,6 +7,15 @@
 ``psycopg`` release notes
 =========================
 
+Future releases
+---------------
+
+Psycopg 3.1.19
+^^^^^^^^^^^^^^
+
+- Fix excessive stripping of error message prefixes (:ticket:`#752`).
+
+
 Current release
 ---------------
 
diff --git a/psycopg/psycopg/pq/misc.py b/psycopg/psycopg/pq/misc.py
index 3a43133ce..17f14d323 100644
--- a/psycopg/psycopg/pq/misc.py
+++ b/psycopg/psycopg/pq/misc.py
@@ -4,6 +4,7 @@ Various functionalities to make easier to work with the libpq.
 
 # Copyright (C) 2020 The Psycopg Team
 
+import re
 import os
 import sys
 import logging
@@ -90,31 +91,61 @@ def error_message(obj: Union[PGconn, PGresult], encoding: str = "utf8") -> str:
         obj = cast(PGresult, obj)
         bmsg = obj.error_message
 
-        # strip severity and whitespaces
-        if bmsg:
-            bmsg = bmsg.split(b":", 1)[-1].strip()
-
     elif hasattr(obj, "error_message"):
         # obj is a PGconn
         if obj.status == OK:
             encoding = pgconn_encoding(obj)
         bmsg = obj.error_message
 
-        # strip severity and whitespaces
-        if bmsg:
-            bmsg = bmsg.split(b":", 1)[-1].strip()
-
     else:
         raise TypeError(f"PGconn or PGresult expected, got {type(obj).__name__}")
 
     if bmsg:
-        msg = bmsg.decode(encoding, "replace")
+        msg = strip_severity(bmsg.decode(encoding, "replace"))
     else:
         msg = "no details available"
 
     return msg
 
 
+# Possible prefixes to strip for error messages, in the known localizations.
+# This regular expression is generated from PostgreSQL sources using the
+# `tools/update_error_prefixes.py` script
+PREFIXES = re.compile(
+    # autogenerated: start
+    r"""
+    ^ (?:
+      DEBUG | INFO | HINWEIS | WARNUNG | FEHLER | LOG | FATAL | PANIK  # de
+    | DEBUG | INFO | NOTICE | WARNING | ERROR | LOG | FATAL | PANIC  # en
+    | DEBUG | INFO | NOTICE | WARNING | ERROR | LOG | FATAL | PANIC  # es
+    | DEBUG | INFO | NOTICE | ATTENTION | ERREUR | LOG | FATAL | PANIC  # fr
+    | DEBUG | INFO | NOTICE | PERINGATAN | ERROR | LOG | FATAL | PANIK  # id
+    | DEBUG | INFO | NOTIFICA | ATTENZIONE | ERRORE | LOG | FATALE | PANICO  # it
+    | DEBUG | INFO | NOTICE | WARNING | ERROR | LOG | FATAL | PANIC  # ja
+    | ëë²ê·¸ | ì ë³´ | ìë¦¼ | ê²½ê³  | ì¤ë¥ | ë¡ê·¸ | ì¹ëªì ì¤ë¥ | ìì  # ko
+    | DEBUG | INFORMACJA | UWAGA | OSTRZEÅ»ENIE | BÅÄD | DZIENNIK | KATASTROFALNY | PANIKA  # pl
+    | DEPURAÃÃO | INFO | NOTA | AVISO | ERRO | LOG | FATAL | PÃNICO  # pt_BR
+    | ÐÐ¢ÐÐÐÐÐ | ÐÐÐ¤ÐÐ ÐÐÐ¦ÐÐ¯ | ÐÐÐÐÐ§ÐÐÐÐ | ÐÐ ÐÐÐ£ÐÐ ÐÐÐÐÐÐÐ | ÐÐ¨ÐÐÐÐ | Ð¡ÐÐÐÐ©ÐÐÐÐ | ÐÐÐÐÐ | ÐÐÐÐÐÐ  # ru
+    | DEBUG | INFO | NOTIS | VARNING | FEL | LOGG | FATALT | PANIK  # sv
+    | DEBUG | BÄ°LGÄ° | NOT | UYARI | HATA | LOG | ÃLÃMCÃL\ \(FATAL\) | KRÄ°TÄ°K  # tr
+    | ÐÐÐÐÐÐÐÐÐÐÐÐ¯ | ÐÐÐ¤ÐÐ ÐÐÐ¦ÐÐ¯ | ÐÐÐÐÐÐÐÐÐÐÐÐ¯ | ÐÐÐÐÐ ÐÐÐÐÐÐÐ¯ | ÐÐÐÐÐÐÐ | ÐÐÐÐÐ¡Ð£ÐÐÐÐÐ¯ | Ð¤ÐÐ¢ÐÐÐ¬ÐÐ | ÐÐÐÐÐÐ  # uk
+    | è°è¯ | ä¿¡æ¯ | æ³¨æ | è­¦å | éè¯¯ | æ¥å¿ | è´å½éè¯¯ | æ¯è´å½éè¯¯è¿è¿åçéè¯¯  # zh_CN
+    ) : \s+
+    """,  # noqa: E501
+    # autogenerated: end
+    re.VERBOSE | re.MULTILINE,
+)
+
+
+def strip_severity(msg: str) -> str:
+    """Strip severity and whitespaces from error message."""
+    m = PREFIXES.match(msg)
+    if m:
+        msg = msg[m.span()[1] :]
+
+    return msg.strip()
+
+
 def connection_summary(pgconn: PGconn) -> str:
     """
     Return summary information on a connection.
diff --git a/tests/test_errors.py b/tests/test_errors.py
index a5016ae32..78961eea7 100644
--- a/tests/test_errors.py
+++ b/tests/test_errors.py
@@ -323,3 +323,32 @@ def test_pgresult_pickle(conn):
 
 def test_blank_sqlstate(conn):
     assert e.get_base_exception("") is e.DatabaseError
+
+
+@pytest.mark.parametrize(
+    "msg",
+    [
+        'connection to server at "2001:1488:fffe:20::229", port 5432 failed',
+        "HORROR: foo\n",
+    ],
+)
+def test_strip_severity_unstripped(msg):
+    from psycopg.pq.misc import strip_severity
+
+    out = strip_severity(msg)
+    assert out == msg.strip()
+
+
+@pytest.mark.parametrize(
+    "msg",
+    [
+        "ERROR: foo\n",
+        "ERRORE: foo\nbar\n",
+        "ì¤ë¥: foo: bar",
+    ],
+)
+def test_strip_severity_l10n(msg):
+    from psycopg.pq.misc import strip_severity
+
+    out = strip_severity(msg)
+    assert out == msg.split(":", 1)[1].strip()
diff --git a/tools/update_error_prefixes.py b/tools/update_error_prefixes.py
new file mode 100755
index 000000000..af63ae240
--- /dev/null
+++ b/tools/update_error_prefixes.py
@@ -0,0 +1,102 @@
+#!/usr/bin/env python
+"""Find the error prefixes in various l10n used for precise prefixstripping.
+"""
+
+import re
+import logging
+from pathlib import Path
+from argparse import ArgumentParser, Namespace
+from collections import defaultdict
+
+import polib
+
+HERE = Path(__file__).parent
+
+logger = logging.getLogger()
+logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
+
+
+def main() -> None:
+    args = parse_cmdline()
+    regexp = make_regexp(args.pgroot)
+    update_file(args.dest, regexp)
+
+
+def make_regexp(pgroot: Path) -> str:
+    logger.info("looking for translations in %s", pgroot)
+    msgids = "DEBUG INFO NOTICE WARNING ERROR LOG FATAL PANIC".split()
+    bylang = defaultdict[str, list[str]](list)
+    bylang["en"].extend(msgids)
+    for fn in (pgroot / "src/backend/po").glob("*.po"):
+        lang = fn.name.rsplit(".")[0]
+        pofile = polib.pofile(str(fn))
+        for msgid in msgids:
+            if not (entry := pofile.find(msgid)):
+                continue
+            bylang[lang].append(entry.msgstr)
+
+    pattern = "\n    | ".join(
+        "%s  # %s" % (" | ".join(re.escape(msg) for msg in msgs), lang)
+        for lang, msgs in sorted(bylang.items())
+    )
+    return rf'''    r"""
+    ^ (?:
+      {pattern}
+    ) : \s+
+    """,  # noqa: E501'''
+
+
+def update_file(fn: Path, content: str) -> None:
+    logger.info("updating %s", fn)
+
+    with open(fn, "r") as f:
+        lines = f.read().splitlines()
+
+    istart, iend = [
+        i
+        for i, line in enumerate(lines)
+        if re.match(r"\s*(#|\.\.)\s*autogenerated:\s+(start|end)", line)
+    ]
+
+    lines[istart + 1 : iend] = [content]
+
+    with open(fn, "w") as f:
+        for line in lines:
+            f.write(line + "\n")
+
+
+def parse_cmdline() -> Namespace:
+    for default_pgroot in (
+        HERE / "../../fs/postgres",  # it happens to be my laptop
+        HERE / "../../postgres",  # the last entry is the default if none found
+    ):
+        if default_pgroot.exists():
+            break
+
+    default_pgroot = default_pgroot.resolve()
+    default_dest = (HERE / "../psycopg/psycopg/pq/misc.py").resolve()
+
+    parser = ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "--pgroot",
+        metavar="DIR",
+        default=default_pgroot,
+        type=Path,
+        help="root PostgreSQL source directory [default: %(default)s]",
+    )
+    parser.add_argument(
+        "--dest",
+        default=default_dest,
+        type=Path,
+        help="the file to change [default: %(default)s]",
+    )
+
+    opt = parser.parse_args()
+    if not opt.pgroot.is_dir():
+        parser.error("not a valid directory: {opt.pgroot}")
+
+    return opt
+
+
+if __name__ == "__main__":
+    main()