The timing infrastructure (INSTR_* macros) measures time elapsed using
clock_gettime() on POSIX systems, which returns the time as nanoseconds,
and QueryPerformanceCounter() on Windows, which is a specialized timing
clock source that returns a tick counter that needs to be converted to
nanoseconds using the result of QueryPerformanceFrequency().
This conversion currently happens ad-hoc on Windows, e.g. when calling
INSTR_TIME_GET_NANOSEC, which calls QueryPerformanceFrequency() on every
invocation, despite the frequency being stable after program start,
incurring unnecessary overhead. It also causes a fractured implementation
where macros are defined differently between platforms.
To ease code readability, and prepare for a future change that intends
to use a ticks-to-nanosecond conversion on x86-64 for TSC use, introduce
new pg_ticks_to_ns() / pg_ns_to_ticks() functions that get called from
INSTR_* macros on all platforms.
These functions rely on a separately initialized ticks_per_ns_scaled
value, that represents the conversion ratio. This value is initialized
from QueryPerformanceFrequency() on Windows, and set to zero on x86-64
POSIX systems, which results in the ticks being treated as nanoseconds.
Other architectures always directly return the original ticks.
To support this, pg_initialize_timing() is introduced, and is now
mandatory for both the backend and any frontend programs to call before
utilizing INSTR_* macros.
In passing, fix variable names in comment documenting INSTR_TIME_ADD_NANOSEC().
Author: Lukas Fittl <lukas@fittl.com>
Author: David Geier <geidav.pg@gmail.com>
Author: Andres Freund <andres@anarazel.de>
Reviewed-by: Andres Freund <andres@anarazel.de>
Reviewed-by: David Geier <geidav.pg@gmail.com>
Reviewed-by: Lukas Fittl <lukas@fittl.com>
Reviewed-by: Zsolt Parragi <zsolt.parragi@percona.com>
Discussion: https://www.postgresql.org/message-id/flat/
20200612232810.f46nbqkdhbutzqdg%40alap3.anarazel.de
MyStartTimestamp = GetCurrentTimestamp();
MyStartTime = timestamptz_to_time_t(MyStartTimestamp);
+ /* initialize timing infrastructure (required for INSTR_* calls) */
+ pg_initialize_timing();
+
/*
* Set a different global seed in every process. We want something
* unpredictable, so if possible, use high-quality random bits for the
handle_args(argc, argv);
+ /* initialize timing infrastructure (required for INSTR_* calls) */
+ pg_initialize_timing();
+
loop_count = test_timing(test_duration);
output(loop_count);
int exit_code = 0;
struct timeval tv;
+ /* initialize timing infrastructure (required for INSTR_* calls) */
+ pg_initialize_timing();
+
/*
* Record difference between Unix time and instr_time time. We'll use
* this for logging and aggregation.
#include "help.h"
#include "input.h"
#include "mainloop.h"
+#include "portability/instr_time.h"
#include "settings.h"
/*
PQsetNoticeProcessor(pset.db, NoticeProcessor, NULL);
+ /* initialize timing infrastructure (required for INSTR_* calls) */
+ pg_initialize_timing();
+
SyncVariables();
if (options.list_dbs)
file_perm.o \
file_utils.o \
hashfn.o \
+ instr_time.o \
ip.o \
jsonapi.o \
keywords.o \
--- /dev/null
+/*-------------------------------------------------------------------------
+ *
+ * instr_time.c
+ * Non-inline parts of the portable high-precision interval timing
+ * implementation
+ *
+ * Portions Copyright (c) 2026, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ * src/common/instr_time.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include "portability/instr_time.h"
+
+/*
+ * Stores what the number of ticks needs to be multiplied with to end up
+ * with nanoseconds using integer math.
+ *
+ * On certain platforms (currently Windows) the ticks to nanoseconds conversion
+ * requires floating point math because:
+ *
+ * sec = ticks / frequency_hz
+ * ns = ticks / frequency_hz * 1,000,000,000
+ * ns = ticks * (1,000,000,000 / frequency_hz)
+ * ns = ticks * (1,000,000 / frequency_khz) <-- now in kilohertz
+ *
+ * Here, 'ns' is usually a floating point number. For example for a 2.5 GHz CPU
+ * the scaling factor becomes 1,000,000 / 2,500,000 = 0.4.
+ *
+ * To be able to use integer math we work around the lack of precision. We
+ * first scale the integer up (left shift by TICKS_TO_NS_SHIFT) and after the
+ * multiplication by the number of ticks in pg_ticks_to_ns() we shift right by
+ * the same amount.
+ *
+ * We remember the maximum number of ticks that can be multiplied by the scale
+ * factor without overflowing so we can check via a * b > max <=> a > max / b.
+ *
+ * However, as this is meant for interval measurements, it is unlikely that the
+ * overflow path is actually taken in typical scenarios, since overflows would
+ * only occur for intervals longer than 6.5 days.
+ *
+ * Note we utilize unsigned integers even though ticks are stored as a signed
+ * value to encourage compilers to generate better assembly, since we can be
+ * sure these values are not negative.
+ *
+ * On all other platforms we are using clock_gettime(), which uses nanoseconds
+ * as ticks. Hence, we set the multiplier to zero, which causes pg_ticks_to_ns
+ * to return the original value.
+ */
+uint64 ticks_per_ns_scaled = 0;
+uint64 max_ticks_no_overflow = 0;
+bool timing_initialized = false;
+
+static void set_ticks_per_ns_system(void);
+
+/*
+ * Initializes timing infrastructure. Must be called before making any use
+ * of INSTR* macros.
+ */
+void
+pg_initialize_timing(void)
+{
+ if (timing_initialized)
+ return;
+
+ set_ticks_per_ns_system();
+ timing_initialized = true;
+}
+
+#ifndef WIN32
+
+static void
+set_ticks_per_ns_system(void)
+{
+ ticks_per_ns_scaled = 0;
+ max_ticks_no_overflow = 0;
+}
+
+#else /* WIN32 */
+
+/* GetTimerFrequency returns counts per second */
+static inline double
+GetTimerFrequency(void)
+{
+ LARGE_INTEGER f;
+
+ QueryPerformanceFrequency(&f);
+ return (double) f.QuadPart;
+}
+
+static void
+set_ticks_per_ns_system(void)
+{
+ ticks_per_ns_scaled = (NS_PER_S << TICKS_TO_NS_SHIFT) / GetTimerFrequency();
+ max_ticks_no_overflow = PG_INT64_MAX / ticks_per_ns_scaled;
+}
+
+#endif /* WIN32 */
'file_perm.c',
'file_utils.c',
'hashfn.c',
+ 'instr_time.c',
'ip.c',
'jsonapi.c',
'keywords.c',
*
* INSTR_TIME_ADD(x, y) x += y
*
- * INSTR_TIME_ADD_NANOSEC(t, n) x += y in nanoseconds (converts to ticks)
+ * INSTR_TIME_ADD_NANOSEC(t, n) t += n in nanoseconds (converts to ticks)
*
* INSTR_TIME_SUBTRACT(x, y) x -= y
*
#define NS_PER_MS INT64CONST(1000000)
#define NS_PER_US INT64CONST(1000)
+/* Shift amount for fixed-point ticks-to-nanoseconds conversion. */
+#define TICKS_TO_NS_SHIFT 14
-#ifndef WIN32
+/*
+ * PG_INSTR_TICKS_TO_NS controls whether pg_ticks_to_ns/pg_ns_to_ticks needs to
+ * check ticks_per_ns_scaled and potentially convert ticks <=> nanoseconds.
+ */
+#ifdef WIN32
+#define PG_INSTR_TICKS_TO_NS 1
+#else
+#define PG_INSTR_TICKS_TO_NS 0
+#endif
+/*
+ * Variables used to translate ticks to nanoseconds, initialized by
+ * pg_initialize_timing.
+ */
+extern PGDLLIMPORT uint64 ticks_per_ns_scaled;
+extern PGDLLIMPORT uint64 max_ticks_no_overflow;
+extern PGDLLIMPORT bool timing_initialized;
-/* Use clock_gettime() */
+/*
+ * Initialize timing infrastructure
+ *
+ * This must be called at least once before using INSTR_TIME_SET_CURRENT* macros.
+ */
+extern void pg_initialize_timing(void);
+
+#ifndef WIN32
+
+/* On POSIX, use clock_gettime() for system clock source */
#include <time.h>
#define PG_INSTR_CLOCK CLOCK_REALTIME
#endif
-/* helper for INSTR_TIME_SET_CURRENT */
static inline instr_time
-pg_clock_gettime_ns(void)
+pg_get_ticks(void)
{
instr_time now;
struct timespec tmp;
+ Assert(timing_initialized);
+
clock_gettime(PG_INSTR_CLOCK, &tmp);
now.ticks = tmp.tv_sec * NS_PER_S + tmp.tv_nsec;
return now;
}
-#define INSTR_TIME_SET_CURRENT(t) \
- ((t) = pg_clock_gettime_ns())
-
-#define INSTR_TIME_GET_NANOSEC(t) \
- ((int64) (t).ticks)
-
-#define INSTR_TIME_ADD_NANOSEC(t, n) \
- ((t).ticks += (n))
-
-
#else /* WIN32 */
+/* On Windows, use QueryPerformanceCounter() for system clock source */
-/* Use QueryPerformanceCounter() */
-
-/* helper for INSTR_TIME_SET_CURRENT */
static inline instr_time
-pg_query_performance_counter(void)
+pg_get_ticks(void)
{
instr_time now;
LARGE_INTEGER tmp;
+ Assert(timing_initialized);
+
QueryPerformanceCounter(&tmp);
now.ticks = tmp.QuadPart;
return now;
}
-static inline double
-GetTimerFrequency(void)
+#endif /* WIN32 */
+
+static inline int64
+pg_ticks_to_ns(int64 ticks)
{
- LARGE_INTEGER f;
+#if PG_INSTR_TICKS_TO_NS
+ int64 ns = 0;
+
+ Assert(timing_initialized);
+
+ /*
+ * Avoid doing work if we don't use scaled ticks, e.g. system clock on
+ * Unix (in that case ticks is counted in nanoseconds)
+ */
+ if (ticks_per_ns_scaled == 0)
+ return ticks;
+
+ /*
+ * Would multiplication overflow? If so perform computation in two parts.
+ */
+ if (unlikely(ticks > (int64) max_ticks_no_overflow))
+ {
+ /*
+ * To avoid overflow, first scale total ticks down by the fixed
+ * factor, and *afterwards* multiply them by the frequency-based scale
+ * factor.
+ *
+ * The remaining ticks can follow the regular formula, since they
+ * won't overflow.
+ */
+ int64 count = ticks >> TICKS_TO_NS_SHIFT;
+
+ ns = count * ticks_per_ns_scaled;
+ ticks -= (count << TICKS_TO_NS_SHIFT);
+ }
+
+ ns += (ticks * ticks_per_ns_scaled) >> TICKS_TO_NS_SHIFT;
+
+ return ns;
+#else
+ Assert(timing_initialized);
- QueryPerformanceFrequency(&f);
- return (double) f.QuadPart;
+ return ticks;
+#endif /* PG_INSTR_TICKS_TO_NS */
}
-#define INSTR_TIME_SET_CURRENT(t) \
- ((t) = pg_query_performance_counter())
+static inline int64
+pg_ns_to_ticks(int64 ns)
+{
+#if PG_INSTR_TICKS_TO_NS
+ int64 ticks = 0;
-#define INSTR_TIME_GET_NANOSEC(t) \
- ((int64) ((t).ticks * ((double) NS_PER_S / GetTimerFrequency())))
+ Assert(timing_initialized);
-#define INSTR_TIME_ADD_NANOSEC(t, n) \
- ((t).ticks += ((n) / ((double) NS_PER_S / GetTimerFrequency())))
+ /*
+ * If ticks_per_ns_scaled is zero, ticks are already in nanoseconds (e.g.
+ * system clock on Unix).
+ */
+ if (ticks_per_ns_scaled == 0)
+ return ns;
-#endif /* WIN32 */
+ /*
+ * The reverse of pg_ticks_to_ns to avoid a similar overflow problem.
+ */
+ if (unlikely(ns > (INT64_MAX >> TICKS_TO_NS_SHIFT)))
+ {
+ int64 count = ns / ticks_per_ns_scaled;
+
+ ticks = count << TICKS_TO_NS_SHIFT;
+ ns -= count * ticks_per_ns_scaled;
+ }
+ ticks += (ns << TICKS_TO_NS_SHIFT) / ticks_per_ns_scaled;
+
+ return ticks;
+#else
+ Assert(timing_initialized);
+
+ return ns;
+#endif /* PG_INSTR_TICKS_TO_NS */
+}
/*
* Common macros
#define INSTR_TIME_SET_ZERO(t) ((t).ticks = 0)
+#define INSTR_TIME_SET_CURRENT(t) \
+ ((t) = pg_get_ticks())
+
#define INSTR_TIME_ADD(x,y) \
((x).ticks += (y).ticks)
+#define INSTR_TIME_ADD_NANOSEC(t, n) \
+ ((t).ticks += pg_ns_to_ticks(n))
+
#define INSTR_TIME_SUBTRACT(x,y) \
((x).ticks -= (y).ticks)
#define INSTR_TIME_GT(x,y) \
((x).ticks > (y).ticks)
+#define INSTR_TIME_GET_NANOSEC(t) \
+ (pg_ticks_to_ns((t).ticks))
+
#define INSTR_TIME_GET_DOUBLE(t) \
((double) INSTR_TIME_GET_NANOSEC(t) / NS_PER_S)
RESET ROLE;
DROP ROLE regress_multixact_funcs;
+-- test instr_time nanosecond<->ticks conversion
+CREATE FUNCTION test_instr_time()
+ RETURNS bool
+ AS :'regresslib'
+ LANGUAGE C;
+SELECT test_instr_time();
+ test_instr_time
+-----------------
+ t
+(1 row)
+
progname = get_progname(argv[0]);
set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_regress"));
+ pg_initialize_timing();
+
get_restricted_token();
atexit(stop_postmaster);
#include "optimizer/plancat.h"
#include "parser/parse_coerce.h"
#include "port/atomics.h"
+#include "portability/instr_time.h"
#include "postmaster/postmaster.h" /* for MAX_BACKENDS */
#include "storage/spin.h"
#include "tcop/tcopprot.h"
PG_RETURN_VOID();
}
+
+/* Verify that pg_ticks_to_ns behaves correct, including overflow */
+PG_FUNCTION_INFO_V1(test_instr_time);
+Datum
+test_instr_time(PG_FUNCTION_ARGS)
+{
+ instr_time t;
+ int64 test_ns[] = {0, 1000, INT64CONST(1000000000000000)};
+ int64 max_err;
+
+ /*
+ * The ns-to-ticks-to-ns roundtrip may lose precision due to integer
+ * truncation in the fixed-point conversion. The maximum error depends on
+ * ticks_per_ns_scaled relative to the shift factor.
+ */
+ max_err = (ticks_per_ns_scaled >> TICKS_TO_NS_SHIFT) + 1;
+
+ for (int i = 0; i < lengthof(test_ns); i++)
+ {
+ int64 result;
+
+ INSTR_TIME_SET_ZERO(t);
+ INSTR_TIME_ADD_NANOSEC(t, test_ns[i]);
+ result = INSTR_TIME_GET_NANOSEC(t);
+
+ if (result < test_ns[i] - max_err || result > test_ns[i])
+ elog(ERROR,
+ "INSTR_TIME_GET_NANOSEC(t) yielded " INT64_FORMAT
+ ", expected " INT64_FORMAT " (max_err " INT64_FORMAT
+ ") in file \"%s\" line %u",
+ result, test_ns[i], max_err, __FILE__, __LINE__);
+ }
+
+ PG_RETURN_BOOL(true);
+}
SELECT oldest_multixact IS NULL AS null_result FROM pg_get_multixact_stats();
RESET ROLE;
DROP ROLE regress_multixact_funcs;
+
+-- test instr_time nanosecond<->ticks conversion
+CREATE FUNCTION test_instr_time()
+ RETURNS bool
+ AS :'regresslib'
+ LANGUAGE C;
+SELECT test_instr_time();