arch/x86/entry/vdso/vclock_gettime.c

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  * Copyright 2006 Andi Kleen, SUSE Labs.
   4  *
   5  * Fast user context implementation of clock_gettime, gettimeofday, and time.
   6  *
   7  * 32 Bit compat layer by Stefani Seibold <stefani@seibold.net>
   8  *  sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany
   9  *
  10  * The code should have no internal unresolved relocations.
  11  * Check with readelf after changing.
  12  */
  13
  14 #include <uapi/linux/time.h>
  15 #include <asm/vgtod.h>
  16 #include <asm/vvar.h>
  17 #include <asm/unistd.h>
  18 #include <asm/msr.h>
  19 #include <asm/pvclock.h>
  20 #include <asm/mshyperv.h>
  21 #include <linux/math64.h>
  22 #include <linux/time.h>
  23 #include <linux/kernel.h>
  24
  25 #define gtod (&VVAR(vsyscall_gtod_data))
  26
  27 extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts);
  28 extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz);
  29 extern time_t __vdso_time(time_t *t);
  30
  31 #ifdef CONFIG_PARAVIRT_CLOCK
  32 extern u8 pvclock_page[PAGE_SIZE]
  33         __attribute__((visibility("hidden")));
  34 #endif
  35
  36 #ifdef CONFIG_HYPERV_TSCPAGE
  37 extern u8 hvclock_page[PAGE_SIZE]
  38         __attribute__((visibility("hidden")));
  39 #endif
  40
  41 #ifndef BUILD_VDSO32
  42
  43 notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
  44 {
  45         long ret;
  46         asm ("syscall" : "=a" (ret), "=m" (*ts) :
  47              "0" (__NR_clock_gettime), "D" (clock), "S" (ts) :
  48              "rcx", "r11");
  49         return ret;
  50 }
  51
  52 #else
  53
  54 notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
  55 {
  56         long ret;
  57
  58         asm (
  59                 "mov %%ebx, %%edx \n"
  60                 "mov %[clock], %%ebx \n"
  61                 "call __kernel_vsyscall \n"
  62                 "mov %%edx, %%ebx \n"
  63                 : "=a" (ret), "=m" (*ts)
  64                 : "0" (__NR_clock_gettime), [clock] "g" (clock), "c" (ts)
  65                 : "edx");
  66         return ret;
  67 }
  68
  69 #endif
  70
  71 #ifdef CONFIG_PARAVIRT_CLOCK
  72 static notrace const struct pvclock_vsyscall_time_info *get_pvti0(void)
  73 {
  74         return (const struct pvclock_vsyscall_time_info *)&pvclock_page;
  75 }
  76
  77 static notrace u64 vread_pvclock(void)
  78 {
  79         const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti;
  80         u32 version;
  81         u64 ret;
  82
  83         /*
  84          * Note: The kernel and hypervisor must guarantee that cpu ID
  85          * number maps 1:1 to per-CPU pvclock time info.
  86          *
  87          * Because the hypervisor is entirely unaware of guest userspace
  88          * preemption, it cannot guarantee that per-CPU pvclock time
  89          * info is updated if the underlying CPU changes or that that
  90          * version is increased whenever underlying CPU changes.
  91          *
  92          * On KVM, we are guaranteed that pvti updates for any vCPU are
  93          * atomic as seen by *all* vCPUs.  This is an even stronger
  94          * guarantee than we get with a normal seqlock.
  95          *
  96          * On Xen, we don't appear to have that guarantee, but Xen still
  97          * supplies a valid seqlock using the version field.
  98          *
  99          * We only do pvclock vdso timing at all if
 100          * PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to
 101          * mean that all vCPUs have matching pvti and that the TSC is
 102          * synced, so we can just look at vCPU 0's pvti.
 103          */
 104
 105         do {
 106                 version = pvclock_read_begin(pvti);
 107
 108                 if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT)))
 109                         return U64_MAX;
 110
 111                 ret = __pvclock_read_cycles(pvti, rdtsc_ordered());
 112         } while (pvclock_read_retry(pvti, version));
 113
 114         return ret;
 115 }
 116 #endif
 117 #ifdef CONFIG_HYPERV_TSCPAGE
 118 static notrace u64 vread_hvclock(void)
 119 {
 120         const struct ms_hyperv_tsc_page *tsc_pg =
 121                 (const struct ms_hyperv_tsc_page *)&hvclock_page;
 122
 123         return hv_read_tsc_page(tsc_pg);
 124 }
 125 #endif
 126
 127 notrace static inline u64 vgetcyc(int mode)
 128 {
 129         if (mode == VCLOCK_TSC)
 130                 return (u64)rdtsc_ordered();
 131
 132         /*
 133          * For any memory-mapped vclock type, we need to make sure that gcc
 134          * doesn't cleverly hoist a load before the mode check.  Otherwise we
 135          * might end up touching the memory-mapped page even if the vclock in
 136          * question isn't enabled, which will segfault.  Hence the barriers.
 137          */
 138 #ifdef CONFIG_PARAVIRT_CLOCK
 139         if (mode == VCLOCK_PVCLOCK) {
 140                 barrier();
 141                 return vread_pvclock();
 142         }
 143 #endif
 144 #ifdef CONFIG_HYPERV_TSCPAGE
 145         if (mode == VCLOCK_HVCLOCK) {
 146                 barrier();
 147                 return vread_hvclock();
 148         }
 149 #endif
 150         return U64_MAX;
 151 }
 152
 153 notrace static int do_hres(clockid_t clk, struct timespec *ts)
 154 {
 155         struct vgtod_ts *base = &gtod->basetime[clk];
 156         u64 cycles, last, sec, ns;
 157         unsigned int seq;
 158
 159         do {
 160                 seq = gtod_read_begin(gtod);
 161                 cycles = vgetcyc(gtod->vclock_mode);
 162                 ns = base->nsec;
 163                 last = gtod->cycle_last;
 164                 if (unlikely((s64)cycles < 0))
 165                         return vdso_fallback_gettime(clk, ts);
 166                 if (cycles > last)
 167                         ns += (cycles - last) * gtod->mult;
 168                 ns >>= gtod->shift;
 169                 sec = base->sec;
 170         } while (unlikely(gtod_read_retry(gtod, seq)));
 171
 172         /*
 173          * Do this outside the loop: a race inside the loop could result
 174          * in __iter_div_u64_rem() being extremely slow.
 175          */
 176         ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
 177         ts->tv_nsec = ns;
 178
 179         return 0;
 180 }
 181
 182 notrace static void do_coarse(clockid_t clk, struct timespec *ts)
 183 {
 184         struct vgtod_ts *base = &gtod->basetime[clk];
 185         unsigned int seq;
 186
 187         do {
 188                 seq = gtod_read_begin(gtod);
 189                 ts->tv_sec = base->sec;
 190                 ts->tv_nsec = base->nsec;
 191         } while (unlikely(gtod_read_retry(gtod, seq)));
 192 }
 193
 194 notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
 195 {
 196         unsigned int msk;
 197
 198         /* Sort out negative (CPU/FD) and invalid clocks */
 199         if (unlikely((unsigned int) clock >= MAX_CLOCKS))
 200                 return vdso_fallback_gettime(clock, ts);
 201
 202         /*
 203          * Convert the clockid to a bitmask and use it to check which
 204          * clocks are handled in the VDSO directly.
 205          */
 206         msk = 1U << clock;
 207         if (likely(msk & VGTOD_HRES)) {
 208                 return do_hres(clock, ts);
 209         } else if (msk & VGTOD_COARSE) {
 210                 do_coarse(clock, ts);
 211                 return 0;
 212         }
 213         return vdso_fallback_gettime(clock, ts);
 214 }
 215
 216 int clock_gettime(clockid_t, struct timespec *)
 217         __attribute__((weak, alias("__vdso_clock_gettime")));
 218
 219 notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
 220 {
 221         if (likely(tv != NULL)) {
 222                 struct timespec *ts = (struct timespec *) tv;
 223
 224                 do_hres(CLOCK_REALTIME, ts);
 225                 tv->tv_usec /= 1000;
 226         }
 227         if (unlikely(tz != NULL)) {
 228                 tz->tz_minuteswest = gtod->tz_minuteswest;
 229                 tz->tz_dsttime = gtod->tz_dsttime;
 230         }
 231
 232         return 0;
 233 }
 234 int gettimeofday(struct timeval *, struct timezone *)
 235         __attribute__((weak, alias("__vdso_gettimeofday")));
 236
 237 /*
 238  * This will break when the xtime seconds get inaccurate, but that is
 239  * unlikely
 240  */
 241 notrace time_t __vdso_time(time_t *t)
 242 {
 243         /* This is atomic on x86 so we don't need any locks. */
 244         time_t result = READ_ONCE(gtod->basetime[CLOCK_REALTIME].sec);
 245
 246         if (t)
 247                 *t = result;
 248         return result;
 249 }
 250 time_t time(time_t *t)
 251         __attribute__((weak, alias("__vdso_time")));