]> git.ipfire.org Git - thirdparty/kernel/stable.git/blame - arch/x86/entry/vdso/vclock_gettime.c
x86/vdso: Introduce and use vgtod_ts
[thirdparty/kernel/stable.git] / arch / x86 / entry / vdso / vclock_gettime.c
CommitLineData
2aae950b
AK
1/*
2 * Copyright 2006 Andi Kleen, SUSE Labs.
3 * Subject to the GNU Public License, v.2
4 *
f144a6b4 5 * Fast user context implementation of clock_gettime, gettimeofday, and time.
2aae950b 6 *
7a59ed41
SS
7 * 32 Bit compat layer by Stefani Seibold <stefani@seibold.net>
8 * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany
9 *
2aae950b
AK
10 * The code should have no internal unresolved relocations.
11 * Check with readelf after changing.
2aae950b
AK
12 */
13
7a59ed41 14#include <uapi/linux/time.h>
2aae950b 15#include <asm/vgtod.h>
7c03156f 16#include <asm/vvar.h>
2aae950b 17#include <asm/unistd.h>
7c03156f 18#include <asm/msr.h>
76480a6a 19#include <asm/pvclock.h>
90b20432 20#include <asm/mshyperv.h>
7c03156f
SS
21#include <linux/math64.h>
22#include <linux/time.h>
76480a6a 23#include <linux/kernel.h>
2aae950b 24
8c49d9a7 25#define gtod (&VVAR(vsyscall_gtod_data))
2aae950b 26
7a59ed41
SS
27extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts);
28extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz);
29extern time_t __vdso_time(time_t *t);
30
dac16fba
AL
31#ifdef CONFIG_PARAVIRT_CLOCK
32extern u8 pvclock_page
33 __attribute__((visibility("hidden")));
34#endif
35
90b20432
VK
36#ifdef CONFIG_HYPERV_TSCPAGE
37extern u8 hvclock_page
38 __attribute__((visibility("hidden")));
39#endif
40
7a59ed41
SS
41#ifndef BUILD_VDSO32
42
411f790c
SS
43notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
44{
45 long ret;
715bd9d1
AL
46 asm ("syscall" : "=a" (ret), "=m" (*ts) :
47 "0" (__NR_clock_gettime), "D" (clock), "S" (ts) :
48 "memory", "rcx", "r11");
411f790c 49 return ret;
98d0ac38
AL
50}
51
411f790c 52notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
98d0ac38 53{
411f790c
SS
54 long ret;
55
715bd9d1
AL
56 asm ("syscall" : "=a" (ret), "=m" (*tv), "=m" (*tz) :
57 "0" (__NR_gettimeofday), "D" (tv), "S" (tz) :
58 "memory", "rcx", "r11");
411f790c 59 return ret;
98d0ac38
AL
60}
61
51c19b4f 62
76480a6a
AL
63#else
64
65notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
66{
67 long ret;
68
715bd9d1 69 asm (
76480a6a 70 "mov %%ebx, %%edx \n"
02e42566 71 "mov %[clock], %%ebx \n"
76480a6a
AL
72 "call __kernel_vsyscall \n"
73 "mov %%edx, %%ebx \n"
715bd9d1 74 : "=a" (ret), "=m" (*ts)
02e42566 75 : "0" (__NR_clock_gettime), [clock] "g" (clock), "c" (ts)
76480a6a
AL
76 : "memory", "edx");
77 return ret;
78}
79
80notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
81{
82 long ret;
83
715bd9d1 84 asm (
76480a6a 85 "mov %%ebx, %%edx \n"
02e42566 86 "mov %[tv], %%ebx \n"
76480a6a
AL
87 "call __kernel_vsyscall \n"
88 "mov %%edx, %%ebx \n"
715bd9d1 89 : "=a" (ret), "=m" (*tv), "=m" (*tz)
02e42566 90 : "0" (__NR_gettimeofday), [tv] "g" (tv), "c" (tz)
76480a6a
AL
91 : "memory", "edx");
92 return ret;
93}
94
95#endif
96
97#ifdef CONFIG_PARAVIRT_CLOCK
dac16fba 98static notrace const struct pvclock_vsyscall_time_info *get_pvti0(void)
51c19b4f 99{
dac16fba 100 return (const struct pvclock_vsyscall_time_info *)&pvclock_page;
51c19b4f
MT
101}
102
a5a1d1c2 103static notrace u64 vread_pvclock(int *mode)
51c19b4f 104{
dac16fba 105 const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti;
a5a1d1c2 106 u64 ret;
abe9efa7
PB
107 u64 last;
108 u32 version;
51c19b4f
MT
109
110 /*
6b078f5d
AL
111 * Note: The kernel and hypervisor must guarantee that cpu ID
112 * number maps 1:1 to per-CPU pvclock time info.
113 *
114 * Because the hypervisor is entirely unaware of guest userspace
115 * preemption, it cannot guarantee that per-CPU pvclock time
116 * info is updated if the underlying CPU changes or that that
117 * version is increased whenever underlying CPU changes.
118 *
119 * On KVM, we are guaranteed that pvti updates for any vCPU are
120 * atomic as seen by *all* vCPUs. This is an even stronger
121 * guarantee than we get with a normal seqlock.
73459e2a 122 *
6b078f5d
AL
123 * On Xen, we don't appear to have that guarantee, but Xen still
124 * supplies a valid seqlock using the version field.
78fd8c72 125 *
6b078f5d
AL
126 * We only do pvclock vdso timing at all if
127 * PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to
128 * mean that all vCPUs have matching pvti and that the TSC is
129 * synced, so we can just look at vCPU 0's pvti.
51c19b4f 130 */
6b078f5d 131
6b078f5d 132 do {
3aed64f6 133 version = pvclock_read_begin(pvti);
6b078f5d 134
78fd8c72
AL
135 if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) {
136 *mode = VCLOCK_NONE;
137 return 0;
138 }
139
108b249c 140 ret = __pvclock_read_cycles(pvti, rdtsc_ordered());
3aed64f6 141 } while (pvclock_read_retry(pvti, version));
6b078f5d 142
76480a6a 143 /* refer to vread_tsc() comment for rationale */
7c03156f 144 last = gtod->cycle_last;
51c19b4f
MT
145
146 if (likely(ret >= last))
147 return ret;
148
149 return last;
150}
151#endif
90b20432
VK
152#ifdef CONFIG_HYPERV_TSCPAGE
153static notrace u64 vread_hvclock(int *mode)
154{
155 const struct ms_hyperv_tsc_page *tsc_pg =
156 (const struct ms_hyperv_tsc_page *)&hvclock_page;
157 u64 current_tick = hv_read_tsc_page(tsc_pg);
158
159 if (current_tick != U64_MAX)
160 return current_tick;
161
162 *mode = VCLOCK_NONE;
163 return 0;
164}
165#endif
51c19b4f 166
a5a1d1c2 167notrace static u64 vread_tsc(void)
2aae950b 168{
a5a1d1c2 169 u64 ret = (u64)rdtsc_ordered();
03b9730b 170 u64 last = gtod->cycle_last;
a939e817 171
411f790c
SS
172 if (likely(ret >= last))
173 return ret;
174
175 /*
176 * GCC likes to generate cmov here, but this branch is extremely
6a6256f9 177 * predictable (it's just a function of time and the likely is
411f790c
SS
178 * very likely) and there's a data dependence, so force GCC
179 * to generate a branch instead. I don't barrier() because
180 * we don't actually need a barrier, and if this function
181 * ever gets inlined it will generate worse code.
182 */
183 asm volatile ("");
184 return last;
185}
a939e817 186
51c19b4f 187notrace static inline u64 vgetsns(int *mode)
2aae950b 188{
7a59ed41 189 u64 v;
98d0ac38 190 cycles_t cycles;
7c03156f
SS
191
192 if (gtod->vclock_mode == VCLOCK_TSC)
98d0ac38 193 cycles = vread_tsc();
51c19b4f 194#ifdef CONFIG_PARAVIRT_CLOCK
7c03156f 195 else if (gtod->vclock_mode == VCLOCK_PVCLOCK)
51c19b4f 196 cycles = vread_pvclock(mode);
90b20432
VK
197#endif
198#ifdef CONFIG_HYPERV_TSCPAGE
199 else if (gtod->vclock_mode == VCLOCK_HVCLOCK)
200 cycles = vread_hvclock(mode);
51c19b4f 201#endif
a939e817
JS
202 else
203 return 0;
a51e996d 204 v = cycles - gtod->cycle_last;
7c03156f 205 return v * gtod->mult;
2aae950b
AK
206}
207
5f293474
AL
208/* Code size doesn't matter (vdso is 4k anyway) and this is faster. */
209notrace static int __always_inline do_realtime(struct timespec *ts)
2aae950b 210{
49116f20 211 struct vgtod_ts *base = &gtod->basetime[CLOCK_REALTIME];
77e9c678 212 unsigned int seq;
650ea024 213 u64 ns;
a939e817
JS
214 int mode;
215
2aae950b 216 do {
7c03156f
SS
217 seq = gtod_read_begin(gtod);
218 mode = gtod->vclock_mode;
49116f20
TG
219 ts->tv_sec = base->sec;
220 ns = base->nsec;
51c19b4f 221 ns += vgetsns(&mode);
7c03156f
SS
222 ns >>= gtod->shift;
223 } while (unlikely(gtod_read_retry(gtod, seq)));
224
225 ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
226 ts->tv_nsec = ns;
a939e817 227
a939e817 228 return mode;
2aae950b
AK
229}
230
7a59ed41 231notrace static int __always_inline do_monotonic(struct timespec *ts)
2aae950b 232{
49116f20 233 struct vgtod_ts *base = &gtod->basetime[CLOCK_MONOTONIC];
77e9c678 234 unsigned int seq;
650ea024 235 u64 ns;
a939e817
JS
236 int mode;
237
2aae950b 238 do {
7c03156f
SS
239 seq = gtod_read_begin(gtod);
240 mode = gtod->vclock_mode;
49116f20
TG
241 ts->tv_sec = base->sec;
242 ns = base->nsec;
51c19b4f 243 ns += vgetsns(&mode);
7c03156f
SS
244 ns >>= gtod->shift;
245 } while (unlikely(gtod_read_retry(gtod, seq)));
246
247 ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
248 ts->tv_nsec = ns;
0f51f285 249
a939e817 250 return mode;
2aae950b
AK
251}
252
ce39c640 253notrace static void do_realtime_coarse(struct timespec *ts)
da15cfda 254{
49116f20 255 struct vgtod_ts *base = &gtod->basetime[CLOCK_REALTIME_COARSE];
77e9c678 256 unsigned int seq;
49116f20 257
da15cfda 258 do {
7c03156f 259 seq = gtod_read_begin(gtod);
49116f20
TG
260 ts->tv_sec = base->sec;
261 ts->tv_nsec = base->nsec;
7c03156f 262 } while (unlikely(gtod_read_retry(gtod, seq)));
da15cfda
JS
263}
264
ce39c640 265notrace static void do_monotonic_coarse(struct timespec *ts)
da15cfda 266{
49116f20 267 struct vgtod_ts *base = &gtod->basetime[CLOCK_MONOTONIC_COARSE];
77e9c678 268 unsigned int seq;
49116f20 269
da15cfda 270 do {
7c03156f 271 seq = gtod_read_begin(gtod);
49116f20
TG
272 ts->tv_sec = base->sec;
273 ts->tv_nsec = base->nsec;
7c03156f 274 } while (unlikely(gtod_read_retry(gtod, seq)));
da15cfda
JS
275}
276
23adec55 277notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
2aae950b 278{
0d7b8547
AL
279 switch (clock) {
280 case CLOCK_REALTIME:
ce39c640
SS
281 if (do_realtime(ts) == VCLOCK_NONE)
282 goto fallback;
0d7b8547
AL
283 break;
284 case CLOCK_MONOTONIC:
ce39c640
SS
285 if (do_monotonic(ts) == VCLOCK_NONE)
286 goto fallback;
0d7b8547
AL
287 break;
288 case CLOCK_REALTIME_COARSE:
ce39c640
SS
289 do_realtime_coarse(ts);
290 break;
0d7b8547 291 case CLOCK_MONOTONIC_COARSE:
ce39c640
SS
292 do_monotonic_coarse(ts);
293 break;
294 default:
295 goto fallback;
0d7b8547
AL
296 }
297
a939e817 298 return 0;
ce39c640
SS
299fallback:
300 return vdso_fallback_gettime(clock, ts);
2aae950b
AK
301}
302int clock_gettime(clockid_t, struct timespec *)
303 __attribute__((weak, alias("__vdso_clock_gettime")));
304
23adec55 305notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
2aae950b 306{
a939e817 307 if (likely(tv != NULL)) {
0df1ea2b
SS
308 if (unlikely(do_realtime((struct timespec *)tv) == VCLOCK_NONE))
309 return vdso_fallback_gtod(tv, tz);
a939e817 310 tv->tv_usec /= 1000;
2aae950b 311 }
a939e817 312 if (unlikely(tz != NULL)) {
7c03156f
SS
313 tz->tz_minuteswest = gtod->tz_minuteswest;
314 tz->tz_dsttime = gtod->tz_dsttime;
a939e817
JS
315 }
316
a939e817 317 return 0;
2aae950b
AK
318}
319int gettimeofday(struct timeval *, struct timezone *)
320 __attribute__((weak, alias("__vdso_gettimeofday")));
f144a6b4 321
0d7b8547
AL
322/*
323 * This will break when the xtime seconds get inaccurate, but that is
324 * unlikely
325 */
f144a6b4
AL
326notrace time_t __vdso_time(time_t *t)
327{
7a59ed41 328 /* This is atomic on x86 so we don't need any locks. */
49116f20 329 time_t result = READ_ONCE(gtod->basetime[CLOCK_REALTIME].sec);
f144a6b4
AL
330
331 if (t)
332 *t = result;
333 return result;
334}
88edb57d 335time_t time(time_t *t)
f144a6b4 336 __attribute__((weak, alias("__vdso_time")));