From: henadzit Date: Sun, 2 Feb 2025 18:29:01 +0000 (+0100) Subject: perf(c): Use hex_to_int_map in UUIDLoader X-Git-Tag: 3.2.5~2^2~6 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=811cb51123f367144482014d1c0db5b719af3833;p=thirdparty%2Fpsycopg.git perf(c): Use hex_to_int_map in UUIDLoader --- diff --git a/psycopg_c/psycopg_c/types/uuid.pyx b/psycopg_c/psycopg_c/types/uuid.pyx index 6f2a99d34..4e8cd8b18 100644 --- a/psycopg_c/psycopg_c/types/uuid.pyx +++ b/psycopg_c/psycopg_c/types/uuid.pyx @@ -2,6 +2,7 @@ cimport cython from types import ModuleType from cpython.bytes cimport PyBytes_AsString +from cpython.long cimport PyLong_FromUnsignedLongLong cdef extern from "Python.h": # PyUnicode_AsUTF8 was added to cpython.unicode in 3.1.x but we still @@ -36,6 +37,30 @@ cdef class UUIDBinaryDumper(CDumper): return 16 +cdef extern from *: + """ +static const int8_t hex_to_int_map[] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0-15 + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 16-31 + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 32-47 + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 48-63 ('0'-'9') + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 64-79 ('A'-'F') + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 80-95 + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 96-111 ('a'-'f') + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 112-127 + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 128-143 + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 144-159 + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 160-175 + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 176-191 + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 192-207 + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 208-223 + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 224-239 + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 // 240-255 +}; +""" + const int8_t[256] hex_to_int_map + + @cython.final cdef class UUIDLoader(CLoader): format = PQ_TEXT @@ -47,19 +72,32 @@ cdef class UUIDLoader(CLoader): import uuid cdef object cload(self, const char *data, size_t length): - cdef char[33] hex_str + cdef uint64_t high = 0 + cdef uint64_t low = 0 cdef size_t i - cdef int j = 0 - for i in range(36): - if data[i] == b'-': + cdef int ndigits = 0 + cdef int8_t c + + for i in range(length): + c = data[i] + if hex_to_int_map[c] == -1: continue - hex_str[j] = data[i] - j += 1 - hex_str[32] = 0 + + if ndigits < 16: + high = (high << 4) | hex_to_int_map[c] + else: + low = (low << 4) | hex_to_int_map[c] + ndigits += 1 + + if ndigits != 32: + raise ValueError("Invalid UUID string") + + cdef object py_high = PyLong_FromUnsignedLongLong(high) + cdef object py_low = PyLong_FromUnsignedLongLong(low) u = uuid.UUID.__new__(uuid.UUID) object.__setattr__(u, 'is_safe', uuid.SafeUUID.unknown) - object.__setattr__(u, 'int', PyLong_FromString(hex_str, NULL, 16)) + object.__setattr__(u, 'int', (py_high << 64) | py_low) return u diff --git a/tests/types/test_uuid.py b/tests/types/test_uuid.py index a9c46544d..6dd823292 100644 --- a/tests/types/test_uuid.py +++ b/tests/types/test_uuid.py @@ -19,7 +19,14 @@ def test_uuid_dump(conn, fmt_in): @pytest.mark.crdb_skip("copy") @pytest.mark.parametrize("fmt_out", pq.Format) @pytest.mark.parametrize( - "val", ["12345678123456781234567812345679", "12345678-1234-5678-1234-567812345679"] + "val", + [ + "12345678123456781234567812345679", + "12345678-1234-5678-1234-567812345679", + "0123456789abcdef0123456789abcdef", + "01234567-89ab-cdef-0123-456789abcdef", + "{a0eebc99-9c0b4ef8-bb6d6bb9-bd380a11}", + ], ) def test_uuid_load(conn, fmt_out, val): cur = conn.cursor(binary=fmt_out) @@ -33,7 +40,13 @@ def test_uuid_load(conn, fmt_out, val): copy.set_types(["uuid"]) (res,) = copy.read_row() - assert res == UUID(val) + uuid_val = UUID(val) + assert res == uuid_val + # the C modules bypasses __init__, so checking the state of the UUID object + assert res.hex == uuid_val.hex + assert res.int == uuid_val.int + assert res.bytes == uuid_val.bytes + assert res.is_safe == uuid_val.is_safe @pytest.mark.slow