]> git.ipfire.org Git - people/teissler/ipfire-2.x.git/blob - src/patches/suse-2.6.27.31/patches.arch/ppc-clock_gettime-nanoseconds.patch
Merge branch 'master' of git://git.ipfire.org/ipfire-2.x
[people/teissler/ipfire-2.x.git] / src / patches / suse-2.6.27.31 / patches.arch / ppc-clock_gettime-nanoseconds.patch
1 From: Tony Breeds <tony@bakeyournoodle.com>
2 Subject: [PATCH] powerpc: Improve resolution of VDSO clock_gettime
3 References: 439908 - LTC49499
4
5 Currently the clock_gettime implementation in the VDSO produces a
6 result with microsecond resolution for the cases that are handled
7 without a system call, i.e. CLOCK_REALTIME and CLOCK_MONOTONIC. The
8 nanoseconds field of the result is obtained by computing a
9 microseconds value and multiplying by 1000.
10
11 This changes the code in the VDSO to do the computation for
12 clock_gettime with nanosecond resolution. That means that the
13 resolution of the result will ultimately depend on the timebase
14 frequency.
15
16 Because the timestamp in the VDSO datapage (stamp_xsec, the real time
17 corresponding to the timebase count in tb_orig_stamp) is in units of
18 2^-20 seconds, it doesn't have sufficient resolution for computing a
19 result with nanosecond resolution. Therefore this adds a copy of
20 xtime to the VDSO datapage and updates it in update_gtod() along with
21 the other time-related fields.
22
23 Signed-off-by: Paul Mackerras <paulus@samba.org>
24 Signed-off-by: Tony Breeds <tony@bakeyournoodle.com>
25 Signed-off-by: Olaf Hering <olh@suse.de>
26 ---
27 arch/powerpc/include/asm/vdso_datapage.h | 3
28 arch/powerpc/kernel/asm-offsets.c | 1
29 arch/powerpc/kernel/time.c | 1
30 arch/powerpc/kernel/vdso32/gettimeofday.S | 196 ++++++++++++++++++------------
31 arch/powerpc/kernel/vdso64/gettimeofday.S | 143 +++++++++++----------
32 5 files changed, 205 insertions(+), 139 deletions(-)
33
34 --- a/arch/powerpc/include/asm/vdso_datapage.h
35 +++ b/arch/powerpc/include/asm/vdso_datapage.h
36 @@ -39,6 +39,7 @@
37 #ifndef __ASSEMBLY__
38
39 #include <linux/unistd.h>
40 +#include <linux/time.h>
41
42 #define SYSCALL_MAP_SIZE ((__NR_syscalls + 31) / 32)
43
44 @@ -83,6 +84,7 @@ struct vdso_data {
45 __u32 icache_log_block_size; /* L1 i-cache log block size */
46 __s32 wtom_clock_sec; /* Wall to monotonic clock */
47 __s32 wtom_clock_nsec;
48 + struct timespec stamp_xtime; /* xtime value for tb_orig_stamp */
49 __u32 syscall_map_64[SYSCALL_MAP_SIZE]; /* map of syscalls */
50 __u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */
51 };
52 @@ -102,6 +104,7 @@ struct vdso_data {
53 __u32 tz_dsttime; /* Type of dst correction 0x5C */
54 __s32 wtom_clock_sec; /* Wall to monotonic clock */
55 __s32 wtom_clock_nsec;
56 + struct timespec stamp_xtime; /* xtime value for tb_orig_stamp */
57 __u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */
58 __u32 dcache_block_size; /* L1 d-cache block size */
59 __u32 icache_block_size; /* L1 i-cache block size */
60 --- a/arch/powerpc/kernel/asm-offsets.c
61 +++ b/arch/powerpc/kernel/asm-offsets.c
62 @@ -304,6 +304,7 @@ int main(void)
63 DEFINE(CFG_SYSCALL_MAP32, offsetof(struct vdso_data, syscall_map_32));
64 DEFINE(WTOM_CLOCK_SEC, offsetof(struct vdso_data, wtom_clock_sec));
65 DEFINE(WTOM_CLOCK_NSEC, offsetof(struct vdso_data, wtom_clock_nsec));
66 + DEFINE(STAMP_XTIME, offsetof(struct vdso_data, stamp_xtime));
67 DEFINE(CFG_ICACHE_BLOCKSZ, offsetof(struct vdso_data, icache_block_size));
68 DEFINE(CFG_DCACHE_BLOCKSZ, offsetof(struct vdso_data, dcache_block_size));
69 DEFINE(CFG_ICACHE_LOGBLOCKSZ, offsetof(struct vdso_data, icache_log_block_size));
70 --- a/arch/powerpc/kernel/time.c
71 +++ b/arch/powerpc/kernel/time.c
72 @@ -456,6 +456,7 @@ static inline void update_gtod(u64 new_t
73 vdso_data->tb_to_xs = new_tb_to_xs;
74 vdso_data->wtom_clock_sec = wall_to_monotonic.tv_sec;
75 vdso_data->wtom_clock_nsec = wall_to_monotonic.tv_nsec;
76 + vdso_data->stamp_xtime = xtime;
77 smp_wmb();
78 ++(vdso_data->tb_update_count);
79 }
80 --- a/arch/powerpc/kernel/vdso32/gettimeofday.S
81 +++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
82 @@ -16,6 +16,13 @@
83 #include <asm/asm-offsets.h>
84 #include <asm/unistd.h>
85
86 +/* Offset for the low 32-bit part of a field of long type */
87 +#ifdef CONFIG_PPC64
88 +#define LOPART 4
89 +#else
90 +#define LOPART 0
91 +#endif
92 +
93 .text
94 /*
95 * Exact prototype of gettimeofday
96 @@ -90,101 +97,53 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
97
98 mflr r12 /* r12 saves lr */
99 .cfi_register lr,r12
100 - mr r10,r3 /* r10 saves id */
101 mr r11,r4 /* r11 saves tp */
102 bl __get_datapage@local /* get data page */
103 mr r9,r3 /* datapage ptr in r9 */
104 - beq cr1,50f /* if monotonic -> jump there */
105 -
106 - /*
107 - * CLOCK_REALTIME
108 - */
109 -
110 - bl __do_get_xsec@local /* get xsec from tb & kernel */
111 - bne- 98f /* out of line -> do syscall */
112 -
113 - /* seconds are xsec >> 20 */
114 - rlwinm r5,r4,12,20,31
115 - rlwimi r5,r3,12,0,19
116 - stw r5,TSPC32_TV_SEC(r11)
117
118 - /* get remaining xsec and convert to nsec. we scale
119 - * up remaining xsec by 12 bits and get the top 32 bits
120 - * of the multiplication, then we multiply by 1000
121 - */
122 - rlwinm r5,r4,12,0,19
123 - lis r6,1000000@h
124 - ori r6,r6,1000000@l
125 - mulhwu r5,r5,r6
126 - mulli r5,r5,1000
127 - stw r5,TSPC32_TV_NSEC(r11)
128 - mtlr r12
129 - crclr cr0*4+so
130 - li r3,0
131 - blr
132 +50: bl __do_get_tspec@local /* get sec/nsec from tb & kernel */
133 + bne cr1,80f /* not monotonic -> all done */
134
135 /*
136 * CLOCK_MONOTONIC
137 */
138
139 -50: bl __do_get_xsec@local /* get xsec from tb & kernel */
140 - bne- 98f /* out of line -> do syscall */
141 -
142 - /* seconds are xsec >> 20 */
143 - rlwinm r6,r4,12,20,31
144 - rlwimi r6,r3,12,0,19
145 -
146 - /* get remaining xsec and convert to nsec. we scale
147 - * up remaining xsec by 12 bits and get the top 32 bits
148 - * of the multiplication, then we multiply by 1000
149 - */
150 - rlwinm r7,r4,12,0,19
151 - lis r5,1000000@h
152 - ori r5,r5,1000000@l
153 - mulhwu r7,r7,r5
154 - mulli r7,r7,1000
155 -
156 /* now we must fixup using wall to monotonic. We need to snapshot
157 * that value and do the counter trick again. Fortunately, we still
158 * have the counter value in r8 that was returned by __do_get_xsec.
159 - * At this point, r6,r7 contain our sec/nsec values, r3,r4 and r5
160 - * can be used
161 + * At this point, r3,r4 contain our sec/nsec values, r5 and r6
162 + * can be used, r7 contains NSEC_PER_SEC.
163 */
164
165 - lwz r3,WTOM_CLOCK_SEC(r9)
166 - lwz r4,WTOM_CLOCK_NSEC(r9)
167 + lwz r5,WTOM_CLOCK_SEC(r9)
168 + lwz r6,WTOM_CLOCK_NSEC(r9)
169
170 - /* We now have our result in r3,r4. We create a fake dependency
171 - * on that result and re-check the counter
172 + /* We now have our offset in r5,r6. We create a fake dependency
173 + * on that value and re-check the counter
174 */
175 - or r5,r4,r3
176 - xor r0,r5,r5
177 + or r0,r6,r5
178 + xor r0,r0,r0
179 add r9,r9,r0
180 -#ifdef CONFIG_PPC64
181 - lwz r0,(CFG_TB_UPDATE_COUNT+4)(r9)
182 -#else
183 - lwz r0,(CFG_TB_UPDATE_COUNT)(r9)
184 -#endif
185 + lwz r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9)
186 cmpl cr0,r8,r0 /* check if updated */
187 bne- 50b
188
189 - /* Calculate and store result. Note that this mimmics the C code,
190 + /* Calculate and store result. Note that this mimics the C code,
191 * which may cause funny results if nsec goes negative... is that
192 * possible at all ?
193 */
194 - add r3,r3,r6
195 - add r4,r4,r7
196 - lis r5,NSEC_PER_SEC@h
197 - ori r5,r5,NSEC_PER_SEC@l
198 - cmpl cr0,r4,r5
199 - cmpli cr1,r4,0
200 + add r3,r3,r5
201 + add r4,r4,r6
202 + cmpw cr0,r4,r7
203 + cmpwi cr1,r4,0
204 blt 1f
205 - subf r4,r5,r4
206 + subf r4,r7,r4
207 addi r3,r3,1
208 -1: bge cr1,1f
209 +1: bge cr1,80f
210 addi r3,r3,-1
211 - add r4,r4,r5
212 -1: stw r3,TSPC32_TV_SEC(r11)
213 + add r4,r4,r7
214 +
215 +80: stw r3,TSPC32_TV_SEC(r11)
216 stw r4,TSPC32_TV_NSEC(r11)
217
218 mtlr r12
219 @@ -195,10 +154,6 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
220 /*
221 * syscall fallback
222 */
223 -98:
224 - mtlr r12
225 - mr r3,r10
226 - mr r4,r11
227 99:
228 li r0,__NR_clock_gettime
229 sc
230 @@ -322,3 +277,98 @@ __do_get_xsec:
231 */
232 3: blr
233 .cfi_endproc
234 +
235 +/*
236 + * This is the core of clock_gettime(), it returns the current
237 + * time in seconds and nanoseconds in r3 and r4.
238 + * It expects the datapage ptr in r9 and doesn't clobber it.
239 + * It clobbers r0, r5, r6, r10 and returns NSEC_PER_SEC in r7.
240 + * On return, r8 contains the counter value that can be reused.
241 + * This clobbers cr0 but not any other cr field.
242 + */
243 +__do_get_tspec:
244 + .cfi_startproc
245 + /* Check for update count & load values. We use the low
246 + * order 32 bits of the update count
247 + */
248 +1: lwz r8,(CFG_TB_UPDATE_COUNT+LOPART)(r9)
249 + andi. r0,r8,1 /* pending update ? loop */
250 + bne- 1b
251 + xor r0,r8,r8 /* create dependency */
252 + add r9,r9,r0
253 +
254 + /* Load orig stamp (offset to TB) */
255 + lwz r5,CFG_TB_ORIG_STAMP(r9)
256 + lwz r6,(CFG_TB_ORIG_STAMP+4)(r9)
257 +
258 + /* Get a stable TB value */
259 +2: mftbu r3
260 + mftbl r4
261 + mftbu r0
262 + cmpl cr0,r3,r0
263 + bne- 2b
264 +
265 + /* Subtract tb orig stamp and shift left 12 bits.
266 + */
267 + subfc r7,r6,r4
268 + subfe r0,r5,r3
269 + slwi r0,r0,12
270 + rlwimi. r0,r7,12,20,31
271 + slwi r7,r7,12
272 +
273 + /* Load scale factor & do multiplication */
274 + lwz r5,CFG_TB_TO_XS(r9) /* load values */
275 + lwz r6,(CFG_TB_TO_XS+4)(r9)
276 + mulhwu r3,r7,r6
277 + mullw r10,r7,r5
278 + mulhwu r4,r7,r5
279 + addc r10,r3,r10
280 + li r3,0
281 +
282 + beq+ 4f /* skip high part computation if 0 */
283 + mulhwu r3,r0,r5
284 + mullw r7,r0,r5
285 + mulhwu r5,r0,r6
286 + mullw r6,r0,r6
287 + adde r4,r4,r7
288 + addze r3,r3
289 + addc r4,r4,r5
290 + addze r3,r3
291 + addc r10,r10,r6
292 +
293 +4: addze r4,r4 /* add in carry */
294 + lis r7,NSEC_PER_SEC@h
295 + ori r7,r7,NSEC_PER_SEC@l
296 + mulhwu r4,r4,r7 /* convert to nanoseconds */
297 +
298 + /* At this point, we have seconds & nanoseconds since the xtime
299 + * stamp in r3+CA and r4. Load & add the xtime stamp.
300 + */
301 +#ifdef CONFIG_PPC64
302 + lwz r5,STAMP_XTIME+TSPC64_TV_SEC+LOPART(r9)
303 + lwz r6,STAMP_XTIME+TSPC64_TV_NSEC+LOPART(r9)
304 +#else
305 + lwz r5,STAMP_XTIME+TSPC32_TV_SEC(r9)
306 + lwz r6,STAMP_XTIME+TSPC32_TV_NSEC(r9)
307 +#endif
308 + add r4,r4,r6
309 + adde r3,r3,r5
310 +
311 + /* We now have our result in r3,r4. We create a fake dependency
312 + * on that result and re-check the counter
313 + */
314 + or r6,r4,r3
315 + xor r0,r6,r6
316 + add r9,r9,r0
317 + lwz r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9)
318 + cmpl cr0,r8,r0 /* check if updated */
319 + bne- 1b
320 +
321 + /* check for nanosecond overflow and adjust if necessary */
322 + cmpw r4,r7
323 + bltlr /* all done if no overflow */
324 + subf r4,r7,r4 /* adjust if overflow */
325 + addi r3,r3,1
326 +
327 + blr
328 + .cfi_endproc
329 --- a/arch/powerpc/kernel/vdso64/gettimeofday.S
330 +++ b/arch/powerpc/kernel/vdso64/gettimeofday.S
331 @@ -75,90 +75,49 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
332
333 mflr r12 /* r12 saves lr */
334 .cfi_register lr,r12
335 - mr r10,r3 /* r10 saves id */
336 mr r11,r4 /* r11 saves tp */
337 bl V_LOCAL_FUNC(__get_datapage) /* get data page */
338 - beq cr1,50f /* if monotonic -> jump there */
339 -
340 - /*
341 - * CLOCK_REALTIME
342 - */
343 -
344 - bl V_LOCAL_FUNC(__do_get_xsec) /* get xsec from tb & kernel */
345 -
346 - lis r7,15 /* r7 = 1000000 = USEC_PER_SEC */
347 - ori r7,r7,16960
348 - rldicl r5,r4,44,20 /* r5 = sec = xsec / XSEC_PER_SEC */
349 - rldicr r6,r5,20,43 /* r6 = sec * XSEC_PER_SEC */
350 - std r5,TSPC64_TV_SEC(r11) /* store sec in tv */
351 - subf r0,r6,r4 /* r0 = xsec = (xsec - r6) */
352 - mulld r0,r0,r7 /* usec = (xsec * USEC_PER_SEC) /
353 - * XSEC_PER_SEC
354 - */
355 - rldicl r0,r0,44,20
356 - mulli r0,r0,1000 /* nsec = usec * 1000 */
357 - std r0,TSPC64_TV_NSEC(r11) /* store nsec in tp */
358 -
359 - mtlr r12
360 - crclr cr0*4+so
361 - li r3,0
362 - blr
363 +50: bl V_LOCAL_FUNC(__do_get_tspec) /* get time from tb & kernel */
364 + bne cr1,80f /* if not monotonic, all done */
365
366 /*
367 * CLOCK_MONOTONIC
368 */
369
370 -50: bl V_LOCAL_FUNC(__do_get_xsec) /* get xsec from tb & kernel */
371 -
372 - lis r7,15 /* r7 = 1000000 = USEC_PER_SEC */
373 - ori r7,r7,16960
374 - rldicl r5,r4,44,20 /* r5 = sec = xsec / XSEC_PER_SEC */
375 - rldicr r6,r5,20,43 /* r6 = sec * XSEC_PER_SEC */
376 - subf r0,r6,r4 /* r0 = xsec = (xsec - r6) */
377 - mulld r0,r0,r7 /* usec = (xsec * USEC_PER_SEC) /
378 - * XSEC_PER_SEC
379 - */
380 - rldicl r6,r0,44,20
381 - mulli r6,r6,1000 /* nsec = usec * 1000 */
382 -
383 /* now we must fixup using wall to monotonic. We need to snapshot
384 * that value and do the counter trick again. Fortunately, we still
385 - * have the counter value in r8 that was returned by __do_get_xsec.
386 - * At this point, r5,r6 contain our sec/nsec values.
387 - * can be used
388 + * have the counter value in r8 that was returned by __do_get_tspec.
389 + * At this point, r4,r5 contain our sec/nsec values.
390 */
391
392 - lwa r4,WTOM_CLOCK_SEC(r3)
393 - lwa r7,WTOM_CLOCK_NSEC(r3)
394 + lwa r6,WTOM_CLOCK_SEC(r3)
395 + lwa r9,WTOM_CLOCK_NSEC(r3)
396
397 - /* We now have our result in r4,r7. We create a fake dependency
398 + /* We now have our result in r6,r9. We create a fake dependency
399 * on that result and re-check the counter
400 */
401 - or r9,r4,r7
402 - xor r0,r9,r9
403 + or r0,r6,r9
404 + xor r0,r0,r0
405 add r3,r3,r0
406 ld r0,CFG_TB_UPDATE_COUNT(r3)
407 cmpld cr0,r0,r8 /* check if updated */
408 bne- 50b
409
410 - /* Calculate and store result. Note that this mimmics the C code,
411 - * which may cause funny results if nsec goes negative... is that
412 - * possible at all ?
413 - */
414 - add r4,r4,r5
415 - add r7,r7,r6
416 - lis r9,NSEC_PER_SEC@h
417 - ori r9,r9,NSEC_PER_SEC@l
418 - cmpl cr0,r7,r9
419 - cmpli cr1,r7,0
420 + /* Add wall->monotonic offset and check for overflow or underflow.
421 + */
422 + add r4,r4,r6
423 + add r5,r5,r9
424 + cmpd cr0,r5,r7
425 + cmpdi cr1,r5,0
426 blt 1f
427 - subf r7,r9,r7
428 + subf r5,r7,r5
429 addi r4,r4,1
430 -1: bge cr1,1f
431 +1: bge cr1,80f
432 addi r4,r4,-1
433 - add r7,r7,r9
434 -1: std r4,TSPC64_TV_SEC(r11)
435 - std r7,TSPC64_TV_NSEC(r11)
436 + add r5,r5,r7
437 +
438 +80: std r4,TSPC64_TV_SEC(r11)
439 + std r5,TSPC64_TV_NSEC(r11)
440
441 mtlr r12
442 crclr cr0*4+so
443 @@ -168,10 +127,6 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
444 /*
445 * syscall fallback
446 */
447 -98:
448 - mtlr r12
449 - mr r3,r10
450 - mr r4,r11
451 99:
452 li r0,__NR_clock_gettime
453 sc
454 @@ -253,3 +208,59 @@ V_FUNCTION_BEGIN(__do_get_xsec)
455 blr
456 .cfi_endproc
457 V_FUNCTION_END(__do_get_xsec)
458 +
459 +/*
460 + * This is the core of clock_gettime(), it returns the current
461 + * time in seconds and nanoseconds in r4 and r5.
462 + * It expects the datapage ptr in r3 and doesn't clobber it.
463 + * It clobbers r0 and r6 and returns NSEC_PER_SEC in r7.
464 + * On return, r8 contains the counter value that can be reused.
465 + * This clobbers cr0 but not any other cr field.
466 + */
467 +V_FUNCTION_BEGIN(__do_get_tspec)
468 + .cfi_startproc
469 + /* check for update count & load values */
470 +1: ld r8,CFG_TB_UPDATE_COUNT(r3)
471 + andi. r0,r8,1 /* pending update ? loop */
472 + bne- 1b
473 + xor r0,r8,r8 /* create dependency */
474 + add r3,r3,r0
475 +
476 + /* Get TB & offset it. We use the MFTB macro which will generate
477 + * workaround code for Cell.
478 + */
479 + MFTB(r7)
480 + ld r9,CFG_TB_ORIG_STAMP(r3)
481 + subf r7,r9,r7
482 +
483 + /* Scale result */
484 + ld r5,CFG_TB_TO_XS(r3)
485 + sldi r7,r7,12 /* compute time since stamp_xtime */
486 + mulhdu r6,r7,r5 /* in units of 2^-32 seconds */
487 +
488 + /* Add stamp since epoch */
489 + ld r4,STAMP_XTIME+TSPC64_TV_SEC(r3)
490 + ld r5,STAMP_XTIME+TSPC64_TV_NSEC(r3)
491 + or r0,r4,r5
492 + or r0,r0,r6
493 + xor r0,r0,r0
494 + add r3,r3,r0
495 + ld r0,CFG_TB_UPDATE_COUNT(r3)
496 + cmpld r0,r8 /* check if updated */
497 + bne- 1b /* reload if so */
498 +
499 + /* convert to seconds & nanoseconds and add to stamp */
500 + lis r7,NSEC_PER_SEC@h
501 + ori r7,r7,NSEC_PER_SEC@l
502 + mulhwu r0,r6,r7 /* compute nanoseconds and */
503 + srdi r6,r6,32 /* seconds since stamp_xtime */
504 + clrldi r0,r0,32
505 + add r5,r5,r0 /* add nanoseconds together */
506 + cmpd r5,r7 /* overflow? */
507 + add r4,r4,r6
508 + bltlr /* all done if no overflow */
509 + subf r5,r7,r5 /* if overflow, adjust */
510 + addi r4,r4,1
511 + blr
512 + .cfi_endproc
513 +V_FUNCTION_END(__do_get_tspec)