instrumentation: Standardize ticks to nanosecond conversion method

author Andres Freund <andres@anarazel.de>

Tue, 7 Apr 2026 16:48:07 +0000 (12:48 -0400)

committer Andres Freund <andres@anarazel.de>

Tue, 7 Apr 2026 17:00:24 +0000 (13:00 -0400)
author Andres Freund <andres@anarazel.de>
Tue, 7 Apr 2026 16:48:07 +0000 (12:48 -0400)
committer Andres Freund <andres@anarazel.de>
Tue, 7 Apr 2026 17:00:24 +0000 (13:00 -0400)
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c

index 6f13e8f40a0be00802001e618acc20ddab1d95d9..ae8297470043a6e8d782ec86e3fae183a00549c3 100644 (file)
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -1954,6 +1954,9 @@ InitProcessGlobals(void)
         MyStartTimestamp = GetCurrentTimestamp();
         MyStartTime = timestamptz_to_time_t(MyStartTimestamp);
  
+       /* initialize timing infrastructure (required for INSTR_* calls) */
+       pg_initialize_timing();
+
         /*
          * Set a different global seed in every process.  We want something
          * unpredictable, so if possible, use high-quality random bits for the
diff --git a/src/bin/pg_test_timing/pg_test_timing.c b/src/bin/pg_test_timing/pg_test_timing.c

index aee41dbe3f9b7185d86b67da9f8ca5fec29d8d5f..513ae88cafc66d5c585540a7bffb8ec0bdb71a92 100644 (file)
--- a/src/bin/pg_test_timing/pg_test_timing.c
+++ b/src/bin/pg_test_timing/pg_test_timing.c
@@ -43,6 +43,9 @@ main(int argc, char *argv[])
  
         handle_args(argc, argv);
  
+       /* initialize timing infrastructure (required for INSTR_* calls) */
+       pg_initialize_timing();
+
         loop_count = test_timing(test_duration);
  
         output(loop_count);
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c

index 1dae918cc09d2d14fd05dc78567fdf9605344801..c969afab3a595dd5ea8fc30c820c1c80fe3cd6c7 100644 (file)
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -6820,6 +6820,9 @@ main(int argc, char **argv)
         int                     exit_code = 0;
         struct timeval tv;
  
+       /* initialize timing infrastructure (required for INSTR_* calls) */
+       pg_initialize_timing();
+
         /*
          * Record difference between Unix time and instr_time time.  We'll use
          * this for logging and aggregation.
diff --git a/src/bin/psql/startup.c b/src/bin/psql/startup.c

index 9a397ec87b7365c130d4410520cd5cd600126008..69d044d405d5bc2d72b805b6ca5504327f22250a 100644 (file)
--- a/src/bin/psql/startup.c
+++ b/src/bin/psql/startup.c
@@ -24,6 +24,7 @@
  #include "help.h"
  #include "input.h"
  #include "mainloop.h"
+#include "portability/instr_time.h"
  #include "settings.h"
  
  /*
@@ -327,6 +328,9 @@ main(int argc, char *argv[])
  
         PQsetNoticeProcessor(pset.db, NoticeProcessor, NULL);
  
+       /* initialize timing infrastructure (required for INSTR_* calls) */
+       pg_initialize_timing();
+
         SyncVariables();
  
         if (options.list_dbs)
diff --git a/src/common/Makefile b/src/common/Makefile

index 2c720caa50972ed6b19944325018f5d0d34025f0..1a2fbbe887f22777f27f6618e814c885caa70452 100644 (file)
--- a/src/common/Makefile
+++ b/src/common/Makefile
@@ -59,6 +59,7 @@ OBJS_COMMON = \
         file_perm.o \
         file_utils.o \
         hashfn.o \
+       instr_time.o \
         ip.o \
         jsonapi.o \
         keywords.o \
diff --git a/src/common/instr_time.c b/src/common/instr_time.c

new file mode 100644 (file)

index 0000000..9271113
--- /dev/null
+++ b/src/common/instr_time.c
@@ -0,0 +1,106 @@
+/*-------------------------------------------------------------------------
+ *
+ * instr_time.c
+ *        Non-inline parts of the portable high-precision interval timing
+ *      implementation
+ *
+ * Portions Copyright (c) 2026, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ *       src/common/instr_time.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include "portability/instr_time.h"
+
+/*
+ * Stores what the number of ticks needs to be multiplied with to end up
+ * with nanoseconds using integer math.
+ *
+ * On certain platforms (currently Windows) the ticks to nanoseconds conversion
+ * requires floating point math because:
+ *
+ * sec = ticks / frequency_hz
+ * ns  = ticks / frequency_hz * 1,000,000,000
+ * ns  = ticks * (1,000,000,000 / frequency_hz)
+ * ns  = ticks * (1,000,000 / frequency_khz) <-- now in kilohertz
+ *
+ * Here, 'ns' is usually a floating point number. For example for a 2.5 GHz CPU
+ * the scaling factor becomes 1,000,000 / 2,500,000 = 0.4.
+ *
+ * To be able to use integer math we work around the lack of precision. We
+ * first scale the integer up (left shift by TICKS_TO_NS_SHIFT) and after the
+ * multiplication by the number of ticks in pg_ticks_to_ns() we shift right by
+ * the same amount.
+ *
+ * We remember the maximum number of ticks that can be multiplied by the scale
+ * factor without overflowing so we can check via a * b > max <=> a > max / b.
+ *
+ * However, as this is meant for interval measurements, it is unlikely that the
+ * overflow path is actually taken in typical scenarios, since overflows would
+ * only occur for intervals longer than 6.5 days.
+ *
+ * Note we utilize unsigned integers even though ticks are stored as a signed
+ * value to encourage compilers to generate better assembly, since we can be
+ * sure these values are not negative.
+ *
+ * On all other platforms we are using clock_gettime(), which uses nanoseconds
+ * as ticks. Hence, we set the multiplier to zero, which causes pg_ticks_to_ns
+ * to return the original value.
+ */
+uint64         ticks_per_ns_scaled = 0;
+uint64         max_ticks_no_overflow = 0;
+bool           timing_initialized = false;
+
+static void set_ticks_per_ns_system(void);
+
+/*
+ * Initializes timing infrastructure. Must be called before making any use
+ * of INSTR* macros.
+ */
+void
+pg_initialize_timing(void)
+{
+       if (timing_initialized)
+               return;
+
+       set_ticks_per_ns_system();
+       timing_initialized = true;
+}
+
+#ifndef WIN32
+
+static void
+set_ticks_per_ns_system(void)
+{
+       ticks_per_ns_scaled = 0;
+       max_ticks_no_overflow = 0;
+}
+
+#else                                                  /* WIN32 */
+
+/* GetTimerFrequency returns counts per second */
+static inline double
+GetTimerFrequency(void)
+{
+       LARGE_INTEGER f;
+
+       QueryPerformanceFrequency(&f);
+       return (double) f.QuadPart;
+}
+
+static void
+set_ticks_per_ns_system(void)
+{
+       ticks_per_ns_scaled = (NS_PER_S << TICKS_TO_NS_SHIFT) / GetTimerFrequency();
+       max_ticks_no_overflow = PG_INT64_MAX / ticks_per_ns_scaled;
+}
+
+#endif                                                 /* WIN32 */
diff --git a/src/common/meson.build b/src/common/meson.build

index 4f9b8b8263d55d866e35732eb5a2af5f2ffceed1..9bd55cda95b102fe08a8fe62124b544a1ebf0227 100644 (file)
--- a/src/common/meson.build
+++ b/src/common/meson.build
@@ -13,6 +13,7 @@ common_sources = files(
    'file_perm.c',
    'file_utils.c',
    'hashfn.c',
+  'instr_time.c',
    'ip.c',
    'jsonapi.c',
    'keywords.c',
diff --git a/src/include/portability/instr_time.h b/src/include/portability/instr_time.h

index 0a1fff7c487aec8f7693e430adc9214b83c925ee..115f5176317ec7c21d9fb738c8d3e32d05d0bb55 100644 (file)
--- a/src/include/portability/instr_time.h
+++ b/src/include/portability/instr_time.h
@@ -22,7 +22,7 @@
   *
   * INSTR_TIME_ADD(x, y)                                x += y
   *
- * INSTR_TIME_ADD_NANOSEC(t, n)                x += y in nanoseconds (converts to ticks)
+ * INSTR_TIME_ADD_NANOSEC(t, n)                t += n in nanoseconds (converts to ticks)
   *
   * INSTR_TIME_SUBTRACT(x, y)           x -= y
   *
@@ -80,11 +80,37 @@ typedef struct instr_time
  #define NS_PER_MS      INT64CONST(1000000)
  #define NS_PER_US      INT64CONST(1000)
  
+/* Shift amount for fixed-point ticks-to-nanoseconds conversion. */
+#define TICKS_TO_NS_SHIFT 14
  
-#ifndef WIN32
+/*
+ * PG_INSTR_TICKS_TO_NS controls whether pg_ticks_to_ns/pg_ns_to_ticks needs to
+ * check ticks_per_ns_scaled and potentially convert ticks <=> nanoseconds.
+ */
+#ifdef WIN32
+#define PG_INSTR_TICKS_TO_NS 1
+#else
+#define PG_INSTR_TICKS_TO_NS 0
+#endif
  
+/*
+ * Variables used to translate ticks to nanoseconds, initialized by
+ * pg_initialize_timing.
+ */
+extern PGDLLIMPORT uint64 ticks_per_ns_scaled;
+extern PGDLLIMPORT uint64 max_ticks_no_overflow;
+extern PGDLLIMPORT bool timing_initialized;
  
-/* Use clock_gettime() */
+/*
+ * Initialize timing infrastructure
+ *
+ * This must be called at least once before using INSTR_TIME_SET_CURRENT* macros.
+ */
+extern void pg_initialize_timing(void);
+
+#ifndef WIN32
+
+/* On POSIX, use clock_gettime() for system clock source */
  
  #include <time.h>
  
@@ -108,67 +134,119 @@ typedef struct instr_time
  #define PG_INSTR_CLOCK CLOCK_REALTIME
  #endif
  
-/* helper for INSTR_TIME_SET_CURRENT */
  static inline instr_time
-pg_clock_gettime_ns(void)
+pg_get_ticks(void)
  {
         instr_time      now;
         struct timespec tmp;
  
+       Assert(timing_initialized);
+
         clock_gettime(PG_INSTR_CLOCK, &tmp);
         now.ticks = tmp.tv_sec * NS_PER_S + tmp.tv_nsec;
  
         return now;
  }
  
-#define INSTR_TIME_SET_CURRENT(t) \
-       ((t) = pg_clock_gettime_ns())
-
-#define INSTR_TIME_GET_NANOSEC(t) \
-       ((int64) (t).ticks)
-
-#define INSTR_TIME_ADD_NANOSEC(t, n) \
-       ((t).ticks += (n))
-
-
  #else                                                  /* WIN32 */
  
+/* On Windows, use QueryPerformanceCounter() for system clock source */
  
-/* Use QueryPerformanceCounter() */
-
-/* helper for INSTR_TIME_SET_CURRENT */
  static inline instr_time
-pg_query_performance_counter(void)
+pg_get_ticks(void)
  {
         instr_time      now;
         LARGE_INTEGER tmp;
  
+       Assert(timing_initialized);
+
         QueryPerformanceCounter(&tmp);
         now.ticks = tmp.QuadPart;
  
         return now;
  }
  
-static inline double
-GetTimerFrequency(void)
+#endif                                                 /* WIN32 */
+
+static inline int64
+pg_ticks_to_ns(int64 ticks)
  {
-       LARGE_INTEGER f;
+#if PG_INSTR_TICKS_TO_NS
+       int64           ns = 0;
+
+       Assert(timing_initialized);
+
+       /*
+        * Avoid doing work if we don't use scaled ticks, e.g. system clock on
+        * Unix (in that case ticks is counted in nanoseconds)
+        */
+       if (ticks_per_ns_scaled == 0)
+               return ticks;
+
+       /*
+        * Would multiplication overflow? If so perform computation in two parts.
+        */
+       if (unlikely(ticks > (int64) max_ticks_no_overflow))
+       {
+               /*
+                * To avoid overflow, first scale total ticks down by the fixed
+                * factor, and *afterwards* multiply them by the frequency-based scale
+                * factor.
+                *
+                * The remaining ticks can follow the regular formula, since they
+                * won't overflow.
+                */
+               int64           count = ticks >> TICKS_TO_NS_SHIFT;
+
+               ns = count * ticks_per_ns_scaled;
+               ticks -= (count << TICKS_TO_NS_SHIFT);
+       }
+
+       ns += (ticks * ticks_per_ns_scaled) >> TICKS_TO_NS_SHIFT;
+
+       return ns;
+#else
+       Assert(timing_initialized);
  
-       QueryPerformanceFrequency(&f);
-       return (double) f.QuadPart;
+       return ticks;
+#endif                                                 /* PG_INSTR_TICKS_TO_NS */
  }
  
-#define INSTR_TIME_SET_CURRENT(t) \
-       ((t) = pg_query_performance_counter())
+static inline int64
+pg_ns_to_ticks(int64 ns)
+{
+#if PG_INSTR_TICKS_TO_NS
+       int64           ticks = 0;
  
-#define INSTR_TIME_GET_NANOSEC(t) \
-       ((int64) ((t).ticks * ((double) NS_PER_S / GetTimerFrequency())))
+       Assert(timing_initialized);
  
-#define INSTR_TIME_ADD_NANOSEC(t, n) \
-       ((t).ticks += ((n) / ((double) NS_PER_S / GetTimerFrequency())))
+       /*
+        * If ticks_per_ns_scaled is zero, ticks are already in nanoseconds (e.g.
+        * system clock on Unix).
+        */
+       if (ticks_per_ns_scaled == 0)
+               return ns;
  
-#endif                                                 /* WIN32 */
+       /*
+        * The reverse of pg_ticks_to_ns to avoid a similar overflow problem.
+        */
+       if (unlikely(ns > (INT64_MAX >> TICKS_TO_NS_SHIFT)))
+       {
+               int64           count = ns / ticks_per_ns_scaled;
+
+               ticks = count << TICKS_TO_NS_SHIFT;
+               ns -= count * ticks_per_ns_scaled;
+       }
  
+       ticks += (ns << TICKS_TO_NS_SHIFT) / ticks_per_ns_scaled;
+
+       return ticks;
+#else
+       Assert(timing_initialized);
+
+       return ns;
+#endif                                                 /* PG_INSTR_TICKS_TO_NS */
+}
  
  /*
   * Common macros
@@ -178,10 +256,16 @@ GetTimerFrequency(void)
  
  #define INSTR_TIME_SET_ZERO(t) ((t).ticks = 0)
  
+#define INSTR_TIME_SET_CURRENT(t) \
+       ((t) = pg_get_ticks())
+
  
  #define INSTR_TIME_ADD(x,y) \
         ((x).ticks += (y).ticks)
  
+#define INSTR_TIME_ADD_NANOSEC(t, n) \
+       ((t).ticks += pg_ns_to_ticks(n))
+
  #define INSTR_TIME_SUBTRACT(x,y) \
         ((x).ticks -= (y).ticks)
  
@@ -191,6 +275,9 @@ GetTimerFrequency(void)
  #define INSTR_TIME_GT(x,y) \
         ((x).ticks > (y).ticks)
  
+#define INSTR_TIME_GET_NANOSEC(t) \
+       (pg_ticks_to_ns((t).ticks))
+
  #define INSTR_TIME_GET_DOUBLE(t) \
         ((double) INSTR_TIME_GET_NANOSEC(t) / NS_PER_S)
  
diff --git a/src/test/regress/expected/misc_functions.out b/src/test/regress/expected/misc_functions.out

index cf55cdf3688d972bba52975b28680d839e9b25f4..c3261bff209fb1f05b851850da0dc0154f651b27 100644 (file)
--- a/src/test/regress/expected/misc_functions.out
+++ b/src/test/regress/expected/misc_functions.out
@@ -850,3 +850,14 @@ SELECT oldest_multixact IS NULL AS null_result FROM pg_get_multixact_stats();
  
  RESET ROLE;
  DROP ROLE regress_multixact_funcs;
+-- test instr_time nanosecond<->ticks conversion
+CREATE FUNCTION test_instr_time()
+    RETURNS bool
+    AS :'regresslib'
+    LANGUAGE C;
+SELECT test_instr_time();
+ test_instr_time 
+-----------------
+ t
+(1 row)
+
diff --git a/src/test/regress/pg_regress.c b/src/test/regress/pg_regress.c

index 9a918156437b2e67a5221c741c894c195d5aa8af..0c0620569829bd5b85b3b88a21ac078b18a65d22 100644 (file)
--- a/src/test/regress/pg_regress.c
+++ b/src/test/regress/pg_regress.c
@@ -2181,6 +2181,8 @@ regression_main(int argc, char *argv[],
         progname = get_progname(argv[0]);
         set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_regress"));
  
+       pg_initialize_timing();
+
         get_restricted_token();
  
         atexit(stop_postmaster);
diff --git a/src/test/regress/regress.c b/src/test/regress/regress.c

index 68a01a1dde01483d18c0c967c025194a8eb9aa0e..c2eaa96f08605ec667c97bae762ab46f72c04bb7 100644 (file)
--- a/src/test/regress/regress.c
+++ b/src/test/regress/regress.c
@@ -38,6 +38,7 @@
  #include "optimizer/plancat.h"
  #include "parser/parse_coerce.h"
  #include "port/atomics.h"
+#include "portability/instr_time.h"
  #include "postmaster/postmaster.h"     /* for MAX_BACKENDS */
  #include "storage/spin.h"
  #include "tcop/tcopprot.h"
@@ -1384,3 +1385,38 @@ test_translation(PG_FUNCTION_ARGS)
  
         PG_RETURN_VOID();
  }
+
+/* Verify that pg_ticks_to_ns behaves correct, including overflow */
+PG_FUNCTION_INFO_V1(test_instr_time);
+Datum
+test_instr_time(PG_FUNCTION_ARGS)
+{
+       instr_time      t;
+       int64           test_ns[] = {0, 1000, INT64CONST(1000000000000000)};
+       int64           max_err;
+
+       /*
+        * The ns-to-ticks-to-ns roundtrip may lose precision due to integer
+        * truncation in the fixed-point conversion. The maximum error depends on
+        * ticks_per_ns_scaled relative to the shift factor.
+        */
+       max_err = (ticks_per_ns_scaled >> TICKS_TO_NS_SHIFT) + 1;
+
+       for (int i = 0; i < lengthof(test_ns); i++)
+       {
+               int64           result;
+
+               INSTR_TIME_SET_ZERO(t);
+               INSTR_TIME_ADD_NANOSEC(t, test_ns[i]);
+               result = INSTR_TIME_GET_NANOSEC(t);
+
+               if (result < test_ns[i] - max_err || result > test_ns[i])
+                       elog(ERROR,
+                                "INSTR_TIME_GET_NANOSEC(t) yielded " INT64_FORMAT
+                                ", expected " INT64_FORMAT " (max_err " INT64_FORMAT
+                                ") in file \"%s\" line %u",
+                                result, test_ns[i], max_err, __FILE__, __LINE__);
+       }
+
+       PG_RETURN_BOOL(true);
+}
diff --git a/src/test/regress/sql/misc_functions.sql b/src/test/regress/sql/misc_functions.sql

index c8226652f2c94e645ce472117061bcc96144e831..946ee5726cdd790a8fe1d5854159fe6519af6b38 100644 (file)
--- a/src/test/regress/sql/misc_functions.sql
+++ b/src/test/regress/sql/misc_functions.sql
@@ -349,3 +349,10 @@ SET ROLE regress_multixact_funcs;
  SELECT oldest_multixact IS NULL AS null_result FROM pg_get_multixact_stats();
  RESET ROLE;
  DROP ROLE regress_multixact_funcs;
+
+-- test instr_time nanosecond<->ticks conversion
+CREATE FUNCTION test_instr_time()
+    RETURNS bool
+    AS :'regresslib'
+    LANGUAGE C;
+SELECT test_instr_time();
author	Andres Freund <andres@anarazel.de>
	Tue, 7 Apr 2026 16:48:07 +0000 (12:48 -0400)
committer	Andres Freund <andres@anarazel.de>
	Tue, 7 Apr 2026 17:00:24 +0000 (13:00 -0400)
src/backend/postmaster/postmaster.c		patch \| blob \| blame \| history
src/bin/pg_test_timing/pg_test_timing.c		patch \| blob \| blame \| history
src/bin/pgbench/pgbench.c		patch \| blob \| blame \| history
src/bin/psql/startup.c		patch \| blob \| blame \| history
src/common/Makefile		patch \| blob \| blame \| history
src/common/instr_time.c	[new file with mode: 0644]	patch \| blob
src/common/meson.build		patch \| blob \| blame \| history
src/include/portability/instr_time.h		patch \| blob \| blame \| history
src/test/regress/expected/misc_functions.out		patch \| blob \| blame \| history
src/test/regress/pg_regress.c		patch \| blob \| blame \| history
src/test/regress/regress.c		patch \| blob \| blame \| history
src/test/regress/sql/misc_functions.sql		patch \| blob \| blame \| history