]>
Commit | Line | Data |
---|---|---|
2aae950b AK |
1 | /* |
2 | * Copyright 2006 Andi Kleen, SUSE Labs. | |
3 | * Subject to the GNU Public License, v.2 | |
4 | * | |
f144a6b4 | 5 | * Fast user context implementation of clock_gettime, gettimeofday, and time. |
2aae950b | 6 | * |
7a59ed41 SS |
7 | * 32 Bit compat layer by Stefani Seibold <stefani@seibold.net> |
8 | * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany | |
9 | * | |
2aae950b AK |
10 | * The code should have no internal unresolved relocations. |
11 | * Check with readelf after changing. | |
2aae950b AK |
12 | */ |
13 | ||
7a59ed41 | 14 | #include <uapi/linux/time.h> |
2aae950b | 15 | #include <asm/vgtod.h> |
7c03156f | 16 | #include <asm/vvar.h> |
2aae950b | 17 | #include <asm/unistd.h> |
7c03156f | 18 | #include <asm/msr.h> |
76480a6a | 19 | #include <asm/pvclock.h> |
90b20432 | 20 | #include <asm/mshyperv.h> |
7c03156f SS |
21 | #include <linux/math64.h> |
22 | #include <linux/time.h> | |
76480a6a | 23 | #include <linux/kernel.h> |
2aae950b | 24 | |
8c49d9a7 | 25 | #define gtod (&VVAR(vsyscall_gtod_data)) |
2aae950b | 26 | |
7a59ed41 SS |
27 | extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts); |
28 | extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz); | |
29 | extern time_t __vdso_time(time_t *t); | |
30 | ||
dac16fba AL |
31 | #ifdef CONFIG_PARAVIRT_CLOCK |
32 | extern u8 pvclock_page | |
33 | __attribute__((visibility("hidden"))); | |
34 | #endif | |
35 | ||
90b20432 VK |
36 | #ifdef CONFIG_HYPERV_TSCPAGE |
37 | extern u8 hvclock_page | |
38 | __attribute__((visibility("hidden"))); | |
39 | #endif | |
40 | ||
7a59ed41 SS |
41 | #ifndef BUILD_VDSO32 |
42 | ||
411f790c SS |
43 | notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) |
44 | { | |
45 | long ret; | |
715bd9d1 AL |
46 | asm ("syscall" : "=a" (ret), "=m" (*ts) : |
47 | "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : | |
89fe0a1f | 48 | "rcx", "r11"); |
411f790c | 49 | return ret; |
98d0ac38 AL |
50 | } |
51 | ||
76480a6a AL |
52 | #else |
53 | ||
54 | notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) | |
55 | { | |
56 | long ret; | |
57 | ||
715bd9d1 | 58 | asm ( |
76480a6a | 59 | "mov %%ebx, %%edx \n" |
02e42566 | 60 | "mov %[clock], %%ebx \n" |
76480a6a AL |
61 | "call __kernel_vsyscall \n" |
62 | "mov %%edx, %%ebx \n" | |
715bd9d1 | 63 | : "=a" (ret), "=m" (*ts) |
02e42566 | 64 | : "0" (__NR_clock_gettime), [clock] "g" (clock), "c" (ts) |
89fe0a1f | 65 | : "edx"); |
76480a6a AL |
66 | return ret; |
67 | } | |
68 | ||
76480a6a AL |
69 | #endif |
70 | ||
71 | #ifdef CONFIG_PARAVIRT_CLOCK | |
dac16fba | 72 | static notrace const struct pvclock_vsyscall_time_info *get_pvti0(void) |
51c19b4f | 73 | { |
dac16fba | 74 | return (const struct pvclock_vsyscall_time_info *)&pvclock_page; |
51c19b4f MT |
75 | } |
76 | ||
4f72adc5 | 77 | static notrace u64 vread_pvclock(void) |
51c19b4f | 78 | { |
dac16fba | 79 | const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti; |
abe9efa7 | 80 | u32 version; |
3e89bf35 | 81 | u64 ret; |
51c19b4f MT |
82 | |
83 | /* | |
6b078f5d AL |
84 | * Note: The kernel and hypervisor must guarantee that cpu ID |
85 | * number maps 1:1 to per-CPU pvclock time info. | |
86 | * | |
87 | * Because the hypervisor is entirely unaware of guest userspace | |
88 | * preemption, it cannot guarantee that per-CPU pvclock time | |
89 | * info is updated if the underlying CPU changes or that that | |
90 | * version is increased whenever underlying CPU changes. | |
91 | * | |
92 | * On KVM, we are guaranteed that pvti updates for any vCPU are | |
93 | * atomic as seen by *all* vCPUs. This is an even stronger | |
94 | * guarantee than we get with a normal seqlock. | |
73459e2a | 95 | * |
6b078f5d AL |
96 | * On Xen, we don't appear to have that guarantee, but Xen still |
97 | * supplies a valid seqlock using the version field. | |
78fd8c72 | 98 | * |
6b078f5d AL |
99 | * We only do pvclock vdso timing at all if |
100 | * PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to | |
101 | * mean that all vCPUs have matching pvti and that the TSC is | |
102 | * synced, so we can just look at vCPU 0's pvti. | |
51c19b4f | 103 | */ |
6b078f5d | 104 | |
6b078f5d | 105 | do { |
3aed64f6 | 106 | version = pvclock_read_begin(pvti); |
6b078f5d | 107 | |
4f72adc5 TG |
108 | if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) |
109 | return U64_MAX; | |
78fd8c72 | 110 | |
108b249c | 111 | ret = __pvclock_read_cycles(pvti, rdtsc_ordered()); |
3aed64f6 | 112 | } while (pvclock_read_retry(pvti, version)); |
6b078f5d | 113 | |
3e89bf35 | 114 | return ret; |
51c19b4f MT |
115 | } |
116 | #endif | |
90b20432 | 117 | #ifdef CONFIG_HYPERV_TSCPAGE |
4f72adc5 | 118 | static notrace u64 vread_hvclock(void) |
90b20432 VK |
119 | { |
120 | const struct ms_hyperv_tsc_page *tsc_pg = | |
121 | (const struct ms_hyperv_tsc_page *)&hvclock_page; | |
90b20432 | 122 | |
4f72adc5 | 123 | return hv_read_tsc_page(tsc_pg); |
90b20432 VK |
124 | } |
125 | #endif | |
51c19b4f | 126 | |
4f72adc5 | 127 | notrace static inline u64 vgetcyc(int mode) |
2aae950b | 128 | { |
4f72adc5 | 129 | if (mode == VCLOCK_TSC) |
3e89bf35 | 130 | return (u64)rdtsc_ordered(); |
51c19b4f | 131 | #ifdef CONFIG_PARAVIRT_CLOCK |
4f72adc5 TG |
132 | else if (mode == VCLOCK_PVCLOCK) |
133 | return vread_pvclock(); | |
90b20432 VK |
134 | #endif |
135 | #ifdef CONFIG_HYPERV_TSCPAGE | |
4f72adc5 TG |
136 | else if (mode == VCLOCK_HVCLOCK) |
137 | return vread_hvclock(); | |
51c19b4f | 138 | #endif |
4f72adc5 | 139 | return U64_MAX; |
2aae950b AK |
140 | } |
141 | ||
e9a62f76 | 142 | notrace static int do_hres(clockid_t clk, struct timespec *ts) |
2aae950b | 143 | { |
e9a62f76 | 144 | struct vgtod_ts *base = >od->basetime[clk]; |
99c19e6a | 145 | u64 cycles, last, sec, ns; |
77e9c678 | 146 | unsigned int seq; |
a939e817 | 147 | |
2aae950b | 148 | do { |
7c03156f | 149 | seq = gtod_read_begin(gtod); |
99c19e6a | 150 | cycles = vgetcyc(gtod->vclock_mode); |
49116f20 | 151 | ns = base->nsec; |
3e89bf35 | 152 | last = gtod->cycle_last; |
4f72adc5 TG |
153 | if (unlikely((s64)cycles < 0)) |
154 | return vdso_fallback_gettime(clk, ts); | |
3e89bf35 TG |
155 | if (cycles > last) |
156 | ns += (cycles - last) * gtod->mult; | |
7c03156f | 157 | ns >>= gtod->shift; |
99c19e6a | 158 | sec = base->sec; |
7c03156f SS |
159 | } while (unlikely(gtod_read_retry(gtod, seq))); |
160 | ||
99c19e6a AL |
161 | /* |
162 | * Do this outside the loop: a race inside the loop could result | |
163 | * in __iter_div_u64_rem() being extremely slow. | |
164 | */ | |
165 | ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); | |
7c03156f | 166 | ts->tv_nsec = ns; |
0f51f285 | 167 | |
4f72adc5 | 168 | return 0; |
2aae950b AK |
169 | } |
170 | ||
6deec5bd | 171 | notrace static void do_coarse(clockid_t clk, struct timespec *ts) |
da15cfda | 172 | { |
6deec5bd | 173 | struct vgtod_ts *base = >od->basetime[clk]; |
77e9c678 | 174 | unsigned int seq; |
49116f20 | 175 | |
da15cfda | 176 | do { |
7c03156f | 177 | seq = gtod_read_begin(gtod); |
49116f20 TG |
178 | ts->tv_sec = base->sec; |
179 | ts->tv_nsec = base->nsec; | |
7c03156f | 180 | } while (unlikely(gtod_read_retry(gtod, seq))); |
da15cfda JS |
181 | } |
182 | ||
23adec55 | 183 | notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) |
2aae950b | 184 | { |
f3e83938 | 185 | unsigned int msk; |
0d7b8547 | 186 | |
f3e83938 TG |
187 | /* Sort out negative (CPU/FD) and invalid clocks */ |
188 | if (unlikely((unsigned int) clock >= MAX_CLOCKS)) | |
189 | return vdso_fallback_gettime(clock, ts); | |
190 | ||
191 | /* | |
192 | * Convert the clockid to a bitmask and use it to check which | |
193 | * clocks are handled in the VDSO directly. | |
194 | */ | |
195 | msk = 1U << clock; | |
196 | if (likely(msk & VGTOD_HRES)) { | |
4f72adc5 | 197 | return do_hres(clock, ts); |
f3e83938 TG |
198 | } else if (msk & VGTOD_COARSE) { |
199 | do_coarse(clock, ts); | |
200 | return 0; | |
201 | } | |
ce39c640 | 202 | return vdso_fallback_gettime(clock, ts); |
2aae950b | 203 | } |
f3e83938 | 204 | |
2aae950b AK |
205 | int clock_gettime(clockid_t, struct timespec *) |
206 | __attribute__((weak, alias("__vdso_clock_gettime"))); | |
207 | ||
23adec55 | 208 | notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) |
2aae950b | 209 | { |
a939e817 | 210 | if (likely(tv != NULL)) { |
e9a62f76 TG |
211 | struct timespec *ts = (struct timespec *) tv; |
212 | ||
4f72adc5 | 213 | do_hres(CLOCK_REALTIME, ts); |
a939e817 | 214 | tv->tv_usec /= 1000; |
2aae950b | 215 | } |
a939e817 | 216 | if (unlikely(tz != NULL)) { |
7c03156f SS |
217 | tz->tz_minuteswest = gtod->tz_minuteswest; |
218 | tz->tz_dsttime = gtod->tz_dsttime; | |
a939e817 JS |
219 | } |
220 | ||
a939e817 | 221 | return 0; |
2aae950b AK |
222 | } |
223 | int gettimeofday(struct timeval *, struct timezone *) | |
224 | __attribute__((weak, alias("__vdso_gettimeofday"))); | |
f144a6b4 | 225 | |
0d7b8547 AL |
226 | /* |
227 | * This will break when the xtime seconds get inaccurate, but that is | |
228 | * unlikely | |
229 | */ | |
f144a6b4 AL |
230 | notrace time_t __vdso_time(time_t *t) |
231 | { | |
7a59ed41 | 232 | /* This is atomic on x86 so we don't need any locks. */ |
49116f20 | 233 | time_t result = READ_ONCE(gtod->basetime[CLOCK_REALTIME].sec); |
f144a6b4 AL |
234 | |
235 | if (t) | |
236 | *t = result; | |
237 | return result; | |
238 | } | |
88edb57d | 239 | time_t time(time_t *t) |
f144a6b4 | 240 | __attribute__((weak, alias("__vdso_time"))); |