]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
instrumentation: Standardize ticks to nanosecond conversion method
authorAndres Freund <andres@anarazel.de>
Tue, 7 Apr 2026 16:48:07 +0000 (12:48 -0400)
committerAndres Freund <andres@anarazel.de>
Tue, 7 Apr 2026 17:00:24 +0000 (13:00 -0400)
The timing infrastructure (INSTR_* macros) measures time elapsed using
clock_gettime() on POSIX systems, which returns the time as nanoseconds,
and QueryPerformanceCounter() on Windows, which is a specialized timing
clock source that returns a tick counter that needs to be converted to
nanoseconds using the result of QueryPerformanceFrequency().

This conversion currently happens ad-hoc on Windows, e.g. when calling
INSTR_TIME_GET_NANOSEC, which calls QueryPerformanceFrequency() on every
invocation, despite the frequency being stable after program start,
incurring unnecessary overhead. It also causes a fractured implementation
where macros are defined differently between platforms.

To ease code readability, and prepare for a future change that intends
to use a ticks-to-nanosecond conversion on x86-64 for TSC use, introduce
new pg_ticks_to_ns() / pg_ns_to_ticks() functions that get called from
INSTR_* macros on all platforms.

These functions rely on a separately initialized ticks_per_ns_scaled
value, that represents the conversion ratio. This value is initialized
from QueryPerformanceFrequency() on Windows, and set to zero on x86-64
POSIX systems, which results in the ticks being treated as nanoseconds.
Other architectures always directly return the original ticks.

To support this, pg_initialize_timing() is introduced, and is now
mandatory for both the backend and any frontend programs to call before
utilizing INSTR_* macros.

In passing, fix variable names in comment documenting INSTR_TIME_ADD_NANOSEC().

Author: Lukas Fittl <lukas@fittl.com>
Author: David Geier <geidav.pg@gmail.com>
Author: Andres Freund <andres@anarazel.de>
Reviewed-by: Andres Freund <andres@anarazel.de>
Reviewed-by: David Geier <geidav.pg@gmail.com>
Reviewed-by: Lukas Fittl <lukas@fittl.com>
Reviewed-by: Zsolt Parragi <zsolt.parragi@percona.com>
Discussion: https://www.postgresql.org/message-id/flat/20200612232810.f46nbqkdhbutzqdg%40alap3.anarazel.de

12 files changed:
src/backend/postmaster/postmaster.c
src/bin/pg_test_timing/pg_test_timing.c
src/bin/pgbench/pgbench.c
src/bin/psql/startup.c
src/common/Makefile
src/common/instr_time.c [new file with mode: 0644]
src/common/meson.build
src/include/portability/instr_time.h
src/test/regress/expected/misc_functions.out
src/test/regress/pg_regress.c
src/test/regress/regress.c
src/test/regress/sql/misc_functions.sql

index 6f13e8f40a0be00802001e618acc20ddab1d95d9..ae8297470043a6e8d782ec86e3fae183a00549c3 100644 (file)
@@ -1954,6 +1954,9 @@ InitProcessGlobals(void)
        MyStartTimestamp = GetCurrentTimestamp();
        MyStartTime = timestamptz_to_time_t(MyStartTimestamp);
 
+       /* initialize timing infrastructure (required for INSTR_* calls) */
+       pg_initialize_timing();
+
        /*
         * Set a different global seed in every process.  We want something
         * unpredictable, so if possible, use high-quality random bits for the
index aee41dbe3f9b7185d86b67da9f8ca5fec29d8d5f..513ae88cafc66d5c585540a7bffb8ec0bdb71a92 100644 (file)
@@ -43,6 +43,9 @@ main(int argc, char *argv[])
 
        handle_args(argc, argv);
 
+       /* initialize timing infrastructure (required for INSTR_* calls) */
+       pg_initialize_timing();
+
        loop_count = test_timing(test_duration);
 
        output(loop_count);
index 1dae918cc09d2d14fd05dc78567fdf9605344801..c969afab3a595dd5ea8fc30c820c1c80fe3cd6c7 100644 (file)
@@ -6820,6 +6820,9 @@ main(int argc, char **argv)
        int                     exit_code = 0;
        struct timeval tv;
 
+       /* initialize timing infrastructure (required for INSTR_* calls) */
+       pg_initialize_timing();
+
        /*
         * Record difference between Unix time and instr_time time.  We'll use
         * this for logging and aggregation.
index 9a397ec87b7365c130d4410520cd5cd600126008..69d044d405d5bc2d72b805b6ca5504327f22250a 100644 (file)
@@ -24,6 +24,7 @@
 #include "help.h"
 #include "input.h"
 #include "mainloop.h"
+#include "portability/instr_time.h"
 #include "settings.h"
 
 /*
@@ -327,6 +328,9 @@ main(int argc, char *argv[])
 
        PQsetNoticeProcessor(pset.db, NoticeProcessor, NULL);
 
+       /* initialize timing infrastructure (required for INSTR_* calls) */
+       pg_initialize_timing();
+
        SyncVariables();
 
        if (options.list_dbs)
index 2c720caa50972ed6b19944325018f5d0d34025f0..1a2fbbe887f22777f27f6618e814c885caa70452 100644 (file)
@@ -59,6 +59,7 @@ OBJS_COMMON = \
        file_perm.o \
        file_utils.o \
        hashfn.o \
+       instr_time.o \
        ip.o \
        jsonapi.o \
        keywords.o \
diff --git a/src/common/instr_time.c b/src/common/instr_time.c
new file mode 100644 (file)
index 0000000..9271113
--- /dev/null
@@ -0,0 +1,106 @@
+/*-------------------------------------------------------------------------
+ *
+ * instr_time.c
+ *        Non-inline parts of the portable high-precision interval timing
+ *      implementation
+ *
+ * Portions Copyright (c) 2026, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ *       src/common/instr_time.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include "portability/instr_time.h"
+
+/*
+ * Stores what the number of ticks needs to be multiplied with to end up
+ * with nanoseconds using integer math.
+ *
+ * On certain platforms (currently Windows) the ticks to nanoseconds conversion
+ * requires floating point math because:
+ *
+ * sec = ticks / frequency_hz
+ * ns  = ticks / frequency_hz * 1,000,000,000
+ * ns  = ticks * (1,000,000,000 / frequency_hz)
+ * ns  = ticks * (1,000,000 / frequency_khz) <-- now in kilohertz
+ *
+ * Here, 'ns' is usually a floating point number. For example for a 2.5 GHz CPU
+ * the scaling factor becomes 1,000,000 / 2,500,000 = 0.4.
+ *
+ * To be able to use integer math we work around the lack of precision. We
+ * first scale the integer up (left shift by TICKS_TO_NS_SHIFT) and after the
+ * multiplication by the number of ticks in pg_ticks_to_ns() we shift right by
+ * the same amount.
+ *
+ * We remember the maximum number of ticks that can be multiplied by the scale
+ * factor without overflowing so we can check via a * b > max <=> a > max / b.
+ *
+ * However, as this is meant for interval measurements, it is unlikely that the
+ * overflow path is actually taken in typical scenarios, since overflows would
+ * only occur for intervals longer than 6.5 days.
+ *
+ * Note we utilize unsigned integers even though ticks are stored as a signed
+ * value to encourage compilers to generate better assembly, since we can be
+ * sure these values are not negative.
+ *
+ * On all other platforms we are using clock_gettime(), which uses nanoseconds
+ * as ticks. Hence, we set the multiplier to zero, which causes pg_ticks_to_ns
+ * to return the original value.
+ */
+uint64         ticks_per_ns_scaled = 0;
+uint64         max_ticks_no_overflow = 0;
+bool           timing_initialized = false;
+
+static void set_ticks_per_ns_system(void);
+
+/*
+ * Initializes timing infrastructure. Must be called before making any use
+ * of INSTR* macros.
+ */
+void
+pg_initialize_timing(void)
+{
+       if (timing_initialized)
+               return;
+
+       set_ticks_per_ns_system();
+       timing_initialized = true;
+}
+
+#ifndef WIN32
+
+static void
+set_ticks_per_ns_system(void)
+{
+       ticks_per_ns_scaled = 0;
+       max_ticks_no_overflow = 0;
+}
+
+#else                                                  /* WIN32 */
+
+/* GetTimerFrequency returns counts per second */
+static inline double
+GetTimerFrequency(void)
+{
+       LARGE_INTEGER f;
+
+       QueryPerformanceFrequency(&f);
+       return (double) f.QuadPart;
+}
+
+static void
+set_ticks_per_ns_system(void)
+{
+       ticks_per_ns_scaled = (NS_PER_S << TICKS_TO_NS_SHIFT) / GetTimerFrequency();
+       max_ticks_no_overflow = PG_INT64_MAX / ticks_per_ns_scaled;
+}
+
+#endif                                                 /* WIN32 */
index 4f9b8b8263d55d866e35732eb5a2af5f2ffceed1..9bd55cda95b102fe08a8fe62124b544a1ebf0227 100644 (file)
@@ -13,6 +13,7 @@ common_sources = files(
   'file_perm.c',
   'file_utils.c',
   'hashfn.c',
+  'instr_time.c',
   'ip.c',
   'jsonapi.c',
   'keywords.c',
index 0a1fff7c487aec8f7693e430adc9214b83c925ee..115f5176317ec7c21d9fb738c8d3e32d05d0bb55 100644 (file)
@@ -22,7 +22,7 @@
  *
  * INSTR_TIME_ADD(x, y)                                x += y
  *
- * INSTR_TIME_ADD_NANOSEC(t, n)                x += y in nanoseconds (converts to ticks)
+ * INSTR_TIME_ADD_NANOSEC(t, n)                t += n in nanoseconds (converts to ticks)
  *
  * INSTR_TIME_SUBTRACT(x, y)           x -= y
  *
@@ -80,11 +80,37 @@ typedef struct instr_time
 #define NS_PER_MS      INT64CONST(1000000)
 #define NS_PER_US      INT64CONST(1000)
 
+/* Shift amount for fixed-point ticks-to-nanoseconds conversion. */
+#define TICKS_TO_NS_SHIFT 14
 
-#ifndef WIN32
+/*
+ * PG_INSTR_TICKS_TO_NS controls whether pg_ticks_to_ns/pg_ns_to_ticks needs to
+ * check ticks_per_ns_scaled and potentially convert ticks <=> nanoseconds.
+ */
+#ifdef WIN32
+#define PG_INSTR_TICKS_TO_NS 1
+#else
+#define PG_INSTR_TICKS_TO_NS 0
+#endif
 
+/*
+ * Variables used to translate ticks to nanoseconds, initialized by
+ * pg_initialize_timing.
+ */
+extern PGDLLIMPORT uint64 ticks_per_ns_scaled;
+extern PGDLLIMPORT uint64 max_ticks_no_overflow;
+extern PGDLLIMPORT bool timing_initialized;
 
-/* Use clock_gettime() */
+/*
+ * Initialize timing infrastructure
+ *
+ * This must be called at least once before using INSTR_TIME_SET_CURRENT* macros.
+ */
+extern void pg_initialize_timing(void);
+
+#ifndef WIN32
+
+/* On POSIX, use clock_gettime() for system clock source */
 
 #include <time.h>
 
@@ -108,67 +134,119 @@ typedef struct instr_time
 #define PG_INSTR_CLOCK CLOCK_REALTIME
 #endif
 
-/* helper for INSTR_TIME_SET_CURRENT */
 static inline instr_time
-pg_clock_gettime_ns(void)
+pg_get_ticks(void)
 {
        instr_time      now;
        struct timespec tmp;
 
+       Assert(timing_initialized);
+
        clock_gettime(PG_INSTR_CLOCK, &tmp);
        now.ticks = tmp.tv_sec * NS_PER_S + tmp.tv_nsec;
 
        return now;
 }
 
-#define INSTR_TIME_SET_CURRENT(t) \
-       ((t) = pg_clock_gettime_ns())
-
-#define INSTR_TIME_GET_NANOSEC(t) \
-       ((int64) (t).ticks)
-
-#define INSTR_TIME_ADD_NANOSEC(t, n) \
-       ((t).ticks += (n))
-
-
 #else                                                  /* WIN32 */
 
+/* On Windows, use QueryPerformanceCounter() for system clock source */
 
-/* Use QueryPerformanceCounter() */
-
-/* helper for INSTR_TIME_SET_CURRENT */
 static inline instr_time
-pg_query_performance_counter(void)
+pg_get_ticks(void)
 {
        instr_time      now;
        LARGE_INTEGER tmp;
 
+       Assert(timing_initialized);
+
        QueryPerformanceCounter(&tmp);
        now.ticks = tmp.QuadPart;
 
        return now;
 }
 
-static inline double
-GetTimerFrequency(void)
+#endif                                                 /* WIN32 */
+
+static inline int64
+pg_ticks_to_ns(int64 ticks)
 {
-       LARGE_INTEGER f;
+#if PG_INSTR_TICKS_TO_NS
+       int64           ns = 0;
+
+       Assert(timing_initialized);
+
+       /*
+        * Avoid doing work if we don't use scaled ticks, e.g. system clock on
+        * Unix (in that case ticks is counted in nanoseconds)
+        */
+       if (ticks_per_ns_scaled == 0)
+               return ticks;
+
+       /*
+        * Would multiplication overflow? If so perform computation in two parts.
+        */
+       if (unlikely(ticks > (int64) max_ticks_no_overflow))
+       {
+               /*
+                * To avoid overflow, first scale total ticks down by the fixed
+                * factor, and *afterwards* multiply them by the frequency-based scale
+                * factor.
+                *
+                * The remaining ticks can follow the regular formula, since they
+                * won't overflow.
+                */
+               int64           count = ticks >> TICKS_TO_NS_SHIFT;
+
+               ns = count * ticks_per_ns_scaled;
+               ticks -= (count << TICKS_TO_NS_SHIFT);
+       }
+
+       ns += (ticks * ticks_per_ns_scaled) >> TICKS_TO_NS_SHIFT;
+
+       return ns;
+#else
+       Assert(timing_initialized);
 
-       QueryPerformanceFrequency(&f);
-       return (double) f.QuadPart;
+       return ticks;
+#endif                                                 /* PG_INSTR_TICKS_TO_NS */
 }
 
-#define INSTR_TIME_SET_CURRENT(t) \
-       ((t) = pg_query_performance_counter())
+static inline int64
+pg_ns_to_ticks(int64 ns)
+{
+#if PG_INSTR_TICKS_TO_NS
+       int64           ticks = 0;
 
-#define INSTR_TIME_GET_NANOSEC(t) \
-       ((int64) ((t).ticks * ((double) NS_PER_S / GetTimerFrequency())))
+       Assert(timing_initialized);
 
-#define INSTR_TIME_ADD_NANOSEC(t, n) \
-       ((t).ticks += ((n) / ((double) NS_PER_S / GetTimerFrequency())))
+       /*
+        * If ticks_per_ns_scaled is zero, ticks are already in nanoseconds (e.g.
+        * system clock on Unix).
+        */
+       if (ticks_per_ns_scaled == 0)
+               return ns;
 
-#endif                                                 /* WIN32 */
+       /*
+        * The reverse of pg_ticks_to_ns to avoid a similar overflow problem.
+        */
+       if (unlikely(ns > (INT64_MAX >> TICKS_TO_NS_SHIFT)))
+       {
+               int64           count = ns / ticks_per_ns_scaled;
+
+               ticks = count << TICKS_TO_NS_SHIFT;
+               ns -= count * ticks_per_ns_scaled;
+       }
 
+       ticks += (ns << TICKS_TO_NS_SHIFT) / ticks_per_ns_scaled;
+
+       return ticks;
+#else
+       Assert(timing_initialized);
+
+       return ns;
+#endif                                                 /* PG_INSTR_TICKS_TO_NS */
+}
 
 /*
  * Common macros
@@ -178,10 +256,16 @@ GetTimerFrequency(void)
 
 #define INSTR_TIME_SET_ZERO(t) ((t).ticks = 0)
 
+#define INSTR_TIME_SET_CURRENT(t) \
+       ((t) = pg_get_ticks())
+
 
 #define INSTR_TIME_ADD(x,y) \
        ((x).ticks += (y).ticks)
 
+#define INSTR_TIME_ADD_NANOSEC(t, n) \
+       ((t).ticks += pg_ns_to_ticks(n))
+
 #define INSTR_TIME_SUBTRACT(x,y) \
        ((x).ticks -= (y).ticks)
 
@@ -191,6 +275,9 @@ GetTimerFrequency(void)
 #define INSTR_TIME_GT(x,y) \
        ((x).ticks > (y).ticks)
 
+#define INSTR_TIME_GET_NANOSEC(t) \
+       (pg_ticks_to_ns((t).ticks))
+
 #define INSTR_TIME_GET_DOUBLE(t) \
        ((double) INSTR_TIME_GET_NANOSEC(t) / NS_PER_S)
 
index cf55cdf3688d972bba52975b28680d839e9b25f4..c3261bff209fb1f05b851850da0dc0154f651b27 100644 (file)
@@ -850,3 +850,14 @@ SELECT oldest_multixact IS NULL AS null_result FROM pg_get_multixact_stats();
 
 RESET ROLE;
 DROP ROLE regress_multixact_funcs;
+-- test instr_time nanosecond<->ticks conversion
+CREATE FUNCTION test_instr_time()
+    RETURNS bool
+    AS :'regresslib'
+    LANGUAGE C;
+SELECT test_instr_time();
+ test_instr_time 
+-----------------
+ t
+(1 row)
+
index 9a918156437b2e67a5221c741c894c195d5aa8af..0c0620569829bd5b85b3b88a21ac078b18a65d22 100644 (file)
@@ -2181,6 +2181,8 @@ regression_main(int argc, char *argv[],
        progname = get_progname(argv[0]);
        set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_regress"));
 
+       pg_initialize_timing();
+
        get_restricted_token();
 
        atexit(stop_postmaster);
index 68a01a1dde01483d18c0c967c025194a8eb9aa0e..c2eaa96f08605ec667c97bae762ab46f72c04bb7 100644 (file)
@@ -38,6 +38,7 @@
 #include "optimizer/plancat.h"
 #include "parser/parse_coerce.h"
 #include "port/atomics.h"
+#include "portability/instr_time.h"
 #include "postmaster/postmaster.h"     /* for MAX_BACKENDS */
 #include "storage/spin.h"
 #include "tcop/tcopprot.h"
@@ -1384,3 +1385,38 @@ test_translation(PG_FUNCTION_ARGS)
 
        PG_RETURN_VOID();
 }
+
+/* Verify that pg_ticks_to_ns behaves correct, including overflow */
+PG_FUNCTION_INFO_V1(test_instr_time);
+Datum
+test_instr_time(PG_FUNCTION_ARGS)
+{
+       instr_time      t;
+       int64           test_ns[] = {0, 1000, INT64CONST(1000000000000000)};
+       int64           max_err;
+
+       /*
+        * The ns-to-ticks-to-ns roundtrip may lose precision due to integer
+        * truncation in the fixed-point conversion. The maximum error depends on
+        * ticks_per_ns_scaled relative to the shift factor.
+        */
+       max_err = (ticks_per_ns_scaled >> TICKS_TO_NS_SHIFT) + 1;
+
+       for (int i = 0; i < lengthof(test_ns); i++)
+       {
+               int64           result;
+
+               INSTR_TIME_SET_ZERO(t);
+               INSTR_TIME_ADD_NANOSEC(t, test_ns[i]);
+               result = INSTR_TIME_GET_NANOSEC(t);
+
+               if (result < test_ns[i] - max_err || result > test_ns[i])
+                       elog(ERROR,
+                                "INSTR_TIME_GET_NANOSEC(t) yielded " INT64_FORMAT
+                                ", expected " INT64_FORMAT " (max_err " INT64_FORMAT
+                                ") in file \"%s\" line %u",
+                                result, test_ns[i], max_err, __FILE__, __LINE__);
+       }
+
+       PG_RETURN_BOOL(true);
+}
index c8226652f2c94e645ce472117061bcc96144e831..946ee5726cdd790a8fe1d5854159fe6519af6b38 100644 (file)
@@ -349,3 +349,10 @@ SET ROLE regress_multixact_funcs;
 SELECT oldest_multixact IS NULL AS null_result FROM pg_get_multixact_stats();
 RESET ROLE;
 DROP ROLE regress_multixact_funcs;
+
+-- test instr_time nanosecond<->ticks conversion
+CREATE FUNCTION test_instr_time()
+    RETURNS bool
+    AS :'regresslib'
+    LANGUAGE C;
+SELECT test_instr_time();