From: Daniele Varrazzo Date: Sun, 31 Mar 2024 20:57:42 +0000 (+0000) Subject: fix: more careful stripping of error prefixes X-Git-Tag: 3.1.19~6^2~1 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=e9e62a4455422591f82340267636743af38c727b;p=thirdparty%2Fpsycopg.git fix: more careful stripping of error prefixes Only strip the known prefixes, both in English and in the currently known localizations. Added script to generate regexp to match every backend localization. The script was executed on PostgreSQL commit f4ad0021af (on master branch, before v17). Close #752. --- diff --git a/docs/news.rst b/docs/news.rst index 31a5f4cde..820d91cd6 100644 --- a/docs/news.rst +++ b/docs/news.rst @@ -7,6 +7,15 @@ ``psycopg`` release notes ========================= +Future releases +--------------- + +Psycopg 3.1.19 +^^^^^^^^^^^^^^ + +- Fix excessive stripping of error message prefixes (:ticket:`#752`). + + Current release --------------- diff --git a/psycopg/psycopg/pq/misc.py b/psycopg/psycopg/pq/misc.py index 3a43133ce..17f14d323 100644 --- a/psycopg/psycopg/pq/misc.py +++ b/psycopg/psycopg/pq/misc.py @@ -4,6 +4,7 @@ Various functionalities to make easier to work with the libpq. # Copyright (C) 2020 The Psycopg Team +import re import os import sys import logging @@ -90,31 +91,61 @@ def error_message(obj: Union[PGconn, PGresult], encoding: str = "utf8") -> str: obj = cast(PGresult, obj) bmsg = obj.error_message - # strip severity and whitespaces - if bmsg: - bmsg = bmsg.split(b":", 1)[-1].strip() - elif hasattr(obj, "error_message"): # obj is a PGconn if obj.status == OK: encoding = pgconn_encoding(obj) bmsg = obj.error_message - # strip severity and whitespaces - if bmsg: - bmsg = bmsg.split(b":", 1)[-1].strip() - else: raise TypeError(f"PGconn or PGresult expected, got {type(obj).__name__}") if bmsg: - msg = bmsg.decode(encoding, "replace") + msg = strip_severity(bmsg.decode(encoding, "replace")) else: msg = "no details available" return msg +# Possible prefixes to strip for error messages, in the known localizations. +# This regular expression is generated from PostgreSQL sources using the +# `tools/update_error_prefixes.py` script +PREFIXES = re.compile( + # autogenerated: start + r""" + ^ (?: + DEBUG | INFO | HINWEIS | WARNUNG | FEHLER | LOG | FATAL | PANIK # de + | DEBUG | INFO | NOTICE | WARNING | ERROR | LOG | FATAL | PANIC # en + | DEBUG | INFO | NOTICE | WARNING | ERROR | LOG | FATAL | PANIC # es + | DEBUG | INFO | NOTICE | ATTENTION | ERREUR | LOG | FATAL | PANIC # fr + | DEBUG | INFO | NOTICE | PERINGATAN | ERROR | LOG | FATAL | PANIK # id + | DEBUG | INFO | NOTIFICA | ATTENZIONE | ERRORE | LOG | FATALE | PANICO # it + | DEBUG | INFO | NOTICE | WARNING | ERROR | LOG | FATAL | PANIC # ja + | 디버그 | 정보 | 알림 | 경고 | 오류 | 로그 | 치명적오류 | 손상 # ko + | DEBUG | INFORMACJA | UWAGA | OSTRZEŻENIE | BŁĄD | DZIENNIK | KATASTROFALNY | PANIKA # pl + | DEPURAÇÃO | INFO | NOTA | AVISO | ERRO | LOG | FATAL | PÂNICO # pt_BR + | ОТЛАДКА | ИНФОРМАЦИЯ | ЗАМЕЧАНИЕ | ПРЕДУПРЕЖДЕНИЕ | ОШИБКА | СООБЩЕНИЕ | ВАЖНО | ПАНИКА # ru + | DEBUG | INFO | NOTIS | VARNING | FEL | LOGG | FATALT | PANIK # sv + | DEBUG | BİLGİ | NOT | UYARI | HATA | LOG | ÖLÜMCÜL\ \(FATAL\) | KRİTİK # tr + | НАЛАГОДЖЕННЯ | ІНФОРМАЦІЯ | ПОВІДОМЛЕННЯ | ПОПЕРЕДЖЕННЯ | ПОМИЛКА | ЗАПИСУВАННЯ | ФАТАЛЬНО | ПАНІКА # uk + | 调试 | 信息 | 注意 | 警告 | 错误 | 日志 | 致命错误 | 比致命错误还过分的错误 # zh_CN + ) : \s+ + """, # noqa: E501 + # autogenerated: end + re.VERBOSE | re.MULTILINE, +) + + +def strip_severity(msg: str) -> str: + """Strip severity and whitespaces from error message.""" + m = PREFIXES.match(msg) + if m: + msg = msg[m.span()[1] :] + + return msg.strip() + + def connection_summary(pgconn: PGconn) -> str: """ Return summary information on a connection. diff --git a/tests/test_errors.py b/tests/test_errors.py index a5016ae32..78961eea7 100644 --- a/tests/test_errors.py +++ b/tests/test_errors.py @@ -323,3 +323,32 @@ def test_pgresult_pickle(conn): def test_blank_sqlstate(conn): assert e.get_base_exception("") is e.DatabaseError + + +@pytest.mark.parametrize( + "msg", + [ + 'connection to server at "2001:1488:fffe:20::229", port 5432 failed', + "HORROR: foo\n", + ], +) +def test_strip_severity_unstripped(msg): + from psycopg.pq.misc import strip_severity + + out = strip_severity(msg) + assert out == msg.strip() + + +@pytest.mark.parametrize( + "msg", + [ + "ERROR: foo\n", + "ERRORE: foo\nbar\n", + "오류: foo: bar", + ], +) +def test_strip_severity_l10n(msg): + from psycopg.pq.misc import strip_severity + + out = strip_severity(msg) + assert out == msg.split(":", 1)[1].strip() diff --git a/tools/update_error_prefixes.py b/tools/update_error_prefixes.py new file mode 100755 index 000000000..af63ae240 --- /dev/null +++ b/tools/update_error_prefixes.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python +"""Find the error prefixes in various l10n used for precise prefixstripping. +""" + +import re +import logging +from pathlib import Path +from argparse import ArgumentParser, Namespace +from collections import defaultdict + +import polib + +HERE = Path(__file__).parent + +logger = logging.getLogger() +logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s") + + +def main() -> None: + args = parse_cmdline() + regexp = make_regexp(args.pgroot) + update_file(args.dest, regexp) + + +def make_regexp(pgroot: Path) -> str: + logger.info("looking for translations in %s", pgroot) + msgids = "DEBUG INFO NOTICE WARNING ERROR LOG FATAL PANIC".split() + bylang = defaultdict[str, list[str]](list) + bylang["en"].extend(msgids) + for fn in (pgroot / "src/backend/po").glob("*.po"): + lang = fn.name.rsplit(".")[0] + pofile = polib.pofile(str(fn)) + for msgid in msgids: + if not (entry := pofile.find(msgid)): + continue + bylang[lang].append(entry.msgstr) + + pattern = "\n | ".join( + "%s # %s" % (" | ".join(re.escape(msg) for msg in msgs), lang) + for lang, msgs in sorted(bylang.items()) + ) + return rf''' r""" + ^ (?: + {pattern} + ) : \s+ + """, # noqa: E501''' + + +def update_file(fn: Path, content: str) -> None: + logger.info("updating %s", fn) + + with open(fn, "r") as f: + lines = f.read().splitlines() + + istart, iend = [ + i + for i, line in enumerate(lines) + if re.match(r"\s*(#|\.\.)\s*autogenerated:\s+(start|end)", line) + ] + + lines[istart + 1 : iend] = [content] + + with open(fn, "w") as f: + for line in lines: + f.write(line + "\n") + + +def parse_cmdline() -> Namespace: + for default_pgroot in ( + HERE / "../../fs/postgres", # it happens to be my laptop + HERE / "../../postgres", # the last entry is the default if none found + ): + if default_pgroot.exists(): + break + + default_pgroot = default_pgroot.resolve() + default_dest = (HERE / "../psycopg/psycopg/pq/misc.py").resolve() + + parser = ArgumentParser(description=__doc__) + parser.add_argument( + "--pgroot", + metavar="DIR", + default=default_pgroot, + type=Path, + help="root PostgreSQL source directory [default: %(default)s]", + ) + parser.add_argument( + "--dest", + default=default_dest, + type=Path, + help="the file to change [default: %(default)s]", + ) + + opt = parser.parse_args() + if not opt.pgroot.is_dir(): + parser.error("not a valid directory: {opt.pgroot}") + + return opt + + +if __name__ == "__main__": + main()