]> git.ipfire.org Git - ipfire-2.x.git/blame - src/patches/suse-2.6.27.39/patches.arch/ppc-clock_gettime-nanoseconds.patch
Fix oinkmaster patch.
[ipfire-2.x.git] / src / patches / suse-2.6.27.39 / patches.arch / ppc-clock_gettime-nanoseconds.patch
CommitLineData
2cb7cef9
BS
1From: Tony Breeds <tony@bakeyournoodle.com>
2Subject: [PATCH] powerpc: Improve resolution of VDSO clock_gettime
3References: 439908 - LTC49499
4
5Currently the clock_gettime implementation in the VDSO produces a
6result with microsecond resolution for the cases that are handled
7without a system call, i.e. CLOCK_REALTIME and CLOCK_MONOTONIC. The
8nanoseconds field of the result is obtained by computing a
9microseconds value and multiplying by 1000.
10
11This changes the code in the VDSO to do the computation for
12clock_gettime with nanosecond resolution. That means that the
13resolution of the result will ultimately depend on the timebase
14frequency.
15
16Because the timestamp in the VDSO datapage (stamp_xsec, the real time
17corresponding to the timebase count in tb_orig_stamp) is in units of
182^-20 seconds, it doesn't have sufficient resolution for computing a
19result with nanosecond resolution. Therefore this adds a copy of
20xtime to the VDSO datapage and updates it in update_gtod() along with
21the other time-related fields.
22
23Signed-off-by: Paul Mackerras <paulus@samba.org>
24Signed-off-by: Tony Breeds <tony@bakeyournoodle.com>
25Signed-off-by: Olaf Hering <olh@suse.de>
26---
27 arch/powerpc/include/asm/vdso_datapage.h | 3
28 arch/powerpc/kernel/asm-offsets.c | 1
29 arch/powerpc/kernel/time.c | 1
30 arch/powerpc/kernel/vdso32/gettimeofday.S | 196 ++++++++++++++++++------------
31 arch/powerpc/kernel/vdso64/gettimeofday.S | 143 +++++++++++----------
32 5 files changed, 205 insertions(+), 139 deletions(-)
33
34--- a/arch/powerpc/include/asm/vdso_datapage.h
35+++ b/arch/powerpc/include/asm/vdso_datapage.h
36@@ -39,6 +39,7 @@
37 #ifndef __ASSEMBLY__
38
39 #include <linux/unistd.h>
40+#include <linux/time.h>
41
42 #define SYSCALL_MAP_SIZE ((__NR_syscalls + 31) / 32)
43
44@@ -83,6 +84,7 @@ struct vdso_data {
45 __u32 icache_log_block_size; /* L1 i-cache log block size */
46 __s32 wtom_clock_sec; /* Wall to monotonic clock */
47 __s32 wtom_clock_nsec;
48+ struct timespec stamp_xtime; /* xtime value for tb_orig_stamp */
49 __u32 syscall_map_64[SYSCALL_MAP_SIZE]; /* map of syscalls */
50 __u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */
51 };
52@@ -102,6 +104,7 @@ struct vdso_data {
53 __u32 tz_dsttime; /* Type of dst correction 0x5C */
54 __s32 wtom_clock_sec; /* Wall to monotonic clock */
55 __s32 wtom_clock_nsec;
56+ struct timespec stamp_xtime; /* xtime value for tb_orig_stamp */
57 __u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */
58 __u32 dcache_block_size; /* L1 d-cache block size */
59 __u32 icache_block_size; /* L1 i-cache block size */
60--- a/arch/powerpc/kernel/asm-offsets.c
61+++ b/arch/powerpc/kernel/asm-offsets.c
62@@ -304,6 +304,7 @@ int main(void)
63 DEFINE(CFG_SYSCALL_MAP32, offsetof(struct vdso_data, syscall_map_32));
64 DEFINE(WTOM_CLOCK_SEC, offsetof(struct vdso_data, wtom_clock_sec));
65 DEFINE(WTOM_CLOCK_NSEC, offsetof(struct vdso_data, wtom_clock_nsec));
66+ DEFINE(STAMP_XTIME, offsetof(struct vdso_data, stamp_xtime));
67 DEFINE(CFG_ICACHE_BLOCKSZ, offsetof(struct vdso_data, icache_block_size));
68 DEFINE(CFG_DCACHE_BLOCKSZ, offsetof(struct vdso_data, dcache_block_size));
69 DEFINE(CFG_ICACHE_LOGBLOCKSZ, offsetof(struct vdso_data, icache_log_block_size));
70--- a/arch/powerpc/kernel/time.c
71+++ b/arch/powerpc/kernel/time.c
72@@ -456,6 +456,7 @@ static inline void update_gtod(u64 new_t
73 vdso_data->tb_to_xs = new_tb_to_xs;
74 vdso_data->wtom_clock_sec = wall_to_monotonic.tv_sec;
75 vdso_data->wtom_clock_nsec = wall_to_monotonic.tv_nsec;
76+ vdso_data->stamp_xtime = xtime;
77 smp_wmb();
78 ++(vdso_data->tb_update_count);
79 }
80--- a/arch/powerpc/kernel/vdso32/gettimeofday.S
81+++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
82@@ -16,6 +16,13 @@
83 #include <asm/asm-offsets.h>
84 #include <asm/unistd.h>
85
86+/* Offset for the low 32-bit part of a field of long type */
87+#ifdef CONFIG_PPC64
88+#define LOPART 4
89+#else
90+#define LOPART 0
91+#endif
92+
93 .text
94 /*
95 * Exact prototype of gettimeofday
96@@ -90,101 +97,53 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
97
98 mflr r12 /* r12 saves lr */
99 .cfi_register lr,r12
100- mr r10,r3 /* r10 saves id */
101 mr r11,r4 /* r11 saves tp */
102 bl __get_datapage@local /* get data page */
103 mr r9,r3 /* datapage ptr in r9 */
104- beq cr1,50f /* if monotonic -> jump there */
105-
106- /*
107- * CLOCK_REALTIME
108- */
109-
110- bl __do_get_xsec@local /* get xsec from tb & kernel */
111- bne- 98f /* out of line -> do syscall */
112-
113- /* seconds are xsec >> 20 */
114- rlwinm r5,r4,12,20,31
115- rlwimi r5,r3,12,0,19
116- stw r5,TSPC32_TV_SEC(r11)
117
118- /* get remaining xsec and convert to nsec. we scale
119- * up remaining xsec by 12 bits and get the top 32 bits
120- * of the multiplication, then we multiply by 1000
121- */
122- rlwinm r5,r4,12,0,19
123- lis r6,1000000@h
124- ori r6,r6,1000000@l
125- mulhwu r5,r5,r6
126- mulli r5,r5,1000
127- stw r5,TSPC32_TV_NSEC(r11)
128- mtlr r12
129- crclr cr0*4+so
130- li r3,0
131- blr
132+50: bl __do_get_tspec@local /* get sec/nsec from tb & kernel */
133+ bne cr1,80f /* not monotonic -> all done */
134
135 /*
136 * CLOCK_MONOTONIC
137 */
138
139-50: bl __do_get_xsec@local /* get xsec from tb & kernel */
140- bne- 98f /* out of line -> do syscall */
141-
142- /* seconds are xsec >> 20 */
143- rlwinm r6,r4,12,20,31
144- rlwimi r6,r3,12,0,19
145-
146- /* get remaining xsec and convert to nsec. we scale
147- * up remaining xsec by 12 bits and get the top 32 bits
148- * of the multiplication, then we multiply by 1000
149- */
150- rlwinm r7,r4,12,0,19
151- lis r5,1000000@h
152- ori r5,r5,1000000@l
153- mulhwu r7,r7,r5
154- mulli r7,r7,1000
155-
156 /* now we must fixup using wall to monotonic. We need to snapshot
157 * that value and do the counter trick again. Fortunately, we still
158 * have the counter value in r8 that was returned by __do_get_xsec.
159- * At this point, r6,r7 contain our sec/nsec values, r3,r4 and r5
160- * can be used
161+ * At this point, r3,r4 contain our sec/nsec values, r5 and r6
162+ * can be used, r7 contains NSEC_PER_SEC.
163 */
164
165- lwz r3,WTOM_CLOCK_SEC(r9)
166- lwz r4,WTOM_CLOCK_NSEC(r9)
167+ lwz r5,WTOM_CLOCK_SEC(r9)
168+ lwz r6,WTOM_CLOCK_NSEC(r9)
169
170- /* We now have our result in r3,r4. We create a fake dependency
171- * on that result and re-check the counter
172+ /* We now have our offset in r5,r6. We create a fake dependency
173+ * on that value and re-check the counter
174 */
175- or r5,r4,r3
176- xor r0,r5,r5
177+ or r0,r6,r5
178+ xor r0,r0,r0
179 add r9,r9,r0
180-#ifdef CONFIG_PPC64
181- lwz r0,(CFG_TB_UPDATE_COUNT+4)(r9)
182-#else
183- lwz r0,(CFG_TB_UPDATE_COUNT)(r9)
184-#endif
185+ lwz r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9)
186 cmpl cr0,r8,r0 /* check if updated */
187 bne- 50b
188
189- /* Calculate and store result. Note that this mimmics the C code,
190+ /* Calculate and store result. Note that this mimics the C code,
191 * which may cause funny results if nsec goes negative... is that
192 * possible at all ?
193 */
194- add r3,r3,r6
195- add r4,r4,r7
196- lis r5,NSEC_PER_SEC@h
197- ori r5,r5,NSEC_PER_SEC@l
198- cmpl cr0,r4,r5
199- cmpli cr1,r4,0
200+ add r3,r3,r5
201+ add r4,r4,r6
202+ cmpw cr0,r4,r7
203+ cmpwi cr1,r4,0
204 blt 1f
205- subf r4,r5,r4
206+ subf r4,r7,r4
207 addi r3,r3,1
208-1: bge cr1,1f
209+1: bge cr1,80f
210 addi r3,r3,-1
211- add r4,r4,r5
212-1: stw r3,TSPC32_TV_SEC(r11)
213+ add r4,r4,r7
214+
215+80: stw r3,TSPC32_TV_SEC(r11)
216 stw r4,TSPC32_TV_NSEC(r11)
217
218 mtlr r12
219@@ -195,10 +154,6 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
220 /*
221 * syscall fallback
222 */
223-98:
224- mtlr r12
225- mr r3,r10
226- mr r4,r11
227 99:
228 li r0,__NR_clock_gettime
229 sc
230@@ -322,3 +277,98 @@ __do_get_xsec:
231 */
232 3: blr
233 .cfi_endproc
234+
235+/*
236+ * This is the core of clock_gettime(), it returns the current
237+ * time in seconds and nanoseconds in r3 and r4.
238+ * It expects the datapage ptr in r9 and doesn't clobber it.
239+ * It clobbers r0, r5, r6, r10 and returns NSEC_PER_SEC in r7.
240+ * On return, r8 contains the counter value that can be reused.
241+ * This clobbers cr0 but not any other cr field.
242+ */
243+__do_get_tspec:
244+ .cfi_startproc
245+ /* Check for update count & load values. We use the low
246+ * order 32 bits of the update count
247+ */
248+1: lwz r8,(CFG_TB_UPDATE_COUNT+LOPART)(r9)
249+ andi. r0,r8,1 /* pending update ? loop */
250+ bne- 1b
251+ xor r0,r8,r8 /* create dependency */
252+ add r9,r9,r0
253+
254+ /* Load orig stamp (offset to TB) */
255+ lwz r5,CFG_TB_ORIG_STAMP(r9)
256+ lwz r6,(CFG_TB_ORIG_STAMP+4)(r9)
257+
258+ /* Get a stable TB value */
259+2: mftbu r3
260+ mftbl r4
261+ mftbu r0
262+ cmpl cr0,r3,r0
263+ bne- 2b
264+
265+ /* Subtract tb orig stamp and shift left 12 bits.
266+ */
267+ subfc r7,r6,r4
268+ subfe r0,r5,r3
269+ slwi r0,r0,12
270+ rlwimi. r0,r7,12,20,31
271+ slwi r7,r7,12
272+
273+ /* Load scale factor & do multiplication */
274+ lwz r5,CFG_TB_TO_XS(r9) /* load values */
275+ lwz r6,(CFG_TB_TO_XS+4)(r9)
276+ mulhwu r3,r7,r6
277+ mullw r10,r7,r5
278+ mulhwu r4,r7,r5
279+ addc r10,r3,r10
280+ li r3,0
281+
282+ beq+ 4f /* skip high part computation if 0 */
283+ mulhwu r3,r0,r5
284+ mullw r7,r0,r5
285+ mulhwu r5,r0,r6
286+ mullw r6,r0,r6
287+ adde r4,r4,r7
288+ addze r3,r3
289+ addc r4,r4,r5
290+ addze r3,r3
291+ addc r10,r10,r6
292+
293+4: addze r4,r4 /* add in carry */
294+ lis r7,NSEC_PER_SEC@h
295+ ori r7,r7,NSEC_PER_SEC@l
296+ mulhwu r4,r4,r7 /* convert to nanoseconds */
297+
298+ /* At this point, we have seconds & nanoseconds since the xtime
299+ * stamp in r3+CA and r4. Load & add the xtime stamp.
300+ */
301+#ifdef CONFIG_PPC64
302+ lwz r5,STAMP_XTIME+TSPC64_TV_SEC+LOPART(r9)
303+ lwz r6,STAMP_XTIME+TSPC64_TV_NSEC+LOPART(r9)
304+#else
305+ lwz r5,STAMP_XTIME+TSPC32_TV_SEC(r9)
306+ lwz r6,STAMP_XTIME+TSPC32_TV_NSEC(r9)
307+#endif
308+ add r4,r4,r6
309+ adde r3,r3,r5
310+
311+ /* We now have our result in r3,r4. We create a fake dependency
312+ * on that result and re-check the counter
313+ */
314+ or r6,r4,r3
315+ xor r0,r6,r6
316+ add r9,r9,r0
317+ lwz r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9)
318+ cmpl cr0,r8,r0 /* check if updated */
319+ bne- 1b
320+
321+ /* check for nanosecond overflow and adjust if necessary */
322+ cmpw r4,r7
323+ bltlr /* all done if no overflow */
324+ subf r4,r7,r4 /* adjust if overflow */
325+ addi r3,r3,1
326+
327+ blr
328+ .cfi_endproc
329--- a/arch/powerpc/kernel/vdso64/gettimeofday.S
330+++ b/arch/powerpc/kernel/vdso64/gettimeofday.S
331@@ -75,90 +75,49 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
332
333 mflr r12 /* r12 saves lr */
334 .cfi_register lr,r12
335- mr r10,r3 /* r10 saves id */
336 mr r11,r4 /* r11 saves tp */
337 bl V_LOCAL_FUNC(__get_datapage) /* get data page */
338- beq cr1,50f /* if monotonic -> jump there */
339-
340- /*
341- * CLOCK_REALTIME
342- */
343-
344- bl V_LOCAL_FUNC(__do_get_xsec) /* get xsec from tb & kernel */
345-
346- lis r7,15 /* r7 = 1000000 = USEC_PER_SEC */
347- ori r7,r7,16960
348- rldicl r5,r4,44,20 /* r5 = sec = xsec / XSEC_PER_SEC */
349- rldicr r6,r5,20,43 /* r6 = sec * XSEC_PER_SEC */
350- std r5,TSPC64_TV_SEC(r11) /* store sec in tv */
351- subf r0,r6,r4 /* r0 = xsec = (xsec - r6) */
352- mulld r0,r0,r7 /* usec = (xsec * USEC_PER_SEC) /
353- * XSEC_PER_SEC
354- */
355- rldicl r0,r0,44,20
356- mulli r0,r0,1000 /* nsec = usec * 1000 */
357- std r0,TSPC64_TV_NSEC(r11) /* store nsec in tp */
358-
359- mtlr r12
360- crclr cr0*4+so
361- li r3,0
362- blr
363+50: bl V_LOCAL_FUNC(__do_get_tspec) /* get time from tb & kernel */
364+ bne cr1,80f /* if not monotonic, all done */
365
366 /*
367 * CLOCK_MONOTONIC
368 */
369
370-50: bl V_LOCAL_FUNC(__do_get_xsec) /* get xsec from tb & kernel */
371-
372- lis r7,15 /* r7 = 1000000 = USEC_PER_SEC */
373- ori r7,r7,16960
374- rldicl r5,r4,44,20 /* r5 = sec = xsec / XSEC_PER_SEC */
375- rldicr r6,r5,20,43 /* r6 = sec * XSEC_PER_SEC */
376- subf r0,r6,r4 /* r0 = xsec = (xsec - r6) */
377- mulld r0,r0,r7 /* usec = (xsec * USEC_PER_SEC) /
378- * XSEC_PER_SEC
379- */
380- rldicl r6,r0,44,20
381- mulli r6,r6,1000 /* nsec = usec * 1000 */
382-
383 /* now we must fixup using wall to monotonic. We need to snapshot
384 * that value and do the counter trick again. Fortunately, we still
385- * have the counter value in r8 that was returned by __do_get_xsec.
386- * At this point, r5,r6 contain our sec/nsec values.
387- * can be used
388+ * have the counter value in r8 that was returned by __do_get_tspec.
389+ * At this point, r4,r5 contain our sec/nsec values.
390 */
391
392- lwa r4,WTOM_CLOCK_SEC(r3)
393- lwa r7,WTOM_CLOCK_NSEC(r3)
394+ lwa r6,WTOM_CLOCK_SEC(r3)
395+ lwa r9,WTOM_CLOCK_NSEC(r3)
396
397- /* We now have our result in r4,r7. We create a fake dependency
398+ /* We now have our result in r6,r9. We create a fake dependency
399 * on that result and re-check the counter
400 */
401- or r9,r4,r7
402- xor r0,r9,r9
403+ or r0,r6,r9
404+ xor r0,r0,r0
405 add r3,r3,r0
406 ld r0,CFG_TB_UPDATE_COUNT(r3)
407 cmpld cr0,r0,r8 /* check if updated */
408 bne- 50b
409
410- /* Calculate and store result. Note that this mimmics the C code,
411- * which may cause funny results if nsec goes negative... is that
412- * possible at all ?
413- */
414- add r4,r4,r5
415- add r7,r7,r6
416- lis r9,NSEC_PER_SEC@h
417- ori r9,r9,NSEC_PER_SEC@l
418- cmpl cr0,r7,r9
419- cmpli cr1,r7,0
420+ /* Add wall->monotonic offset and check for overflow or underflow.
421+ */
422+ add r4,r4,r6
423+ add r5,r5,r9
424+ cmpd cr0,r5,r7
425+ cmpdi cr1,r5,0
426 blt 1f
427- subf r7,r9,r7
428+ subf r5,r7,r5
429 addi r4,r4,1
430-1: bge cr1,1f
431+1: bge cr1,80f
432 addi r4,r4,-1
433- add r7,r7,r9
434-1: std r4,TSPC64_TV_SEC(r11)
435- std r7,TSPC64_TV_NSEC(r11)
436+ add r5,r5,r7
437+
438+80: std r4,TSPC64_TV_SEC(r11)
439+ std r5,TSPC64_TV_NSEC(r11)
440
441 mtlr r12
442 crclr cr0*4+so
443@@ -168,10 +127,6 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
444 /*
445 * syscall fallback
446 */
447-98:
448- mtlr r12
449- mr r3,r10
450- mr r4,r11
451 99:
452 li r0,__NR_clock_gettime
453 sc
454@@ -253,3 +208,59 @@ V_FUNCTION_BEGIN(__do_get_xsec)
455 blr
456 .cfi_endproc
457 V_FUNCTION_END(__do_get_xsec)
458+
459+/*
460+ * This is the core of clock_gettime(), it returns the current
461+ * time in seconds and nanoseconds in r4 and r5.
462+ * It expects the datapage ptr in r3 and doesn't clobber it.
463+ * It clobbers r0 and r6 and returns NSEC_PER_SEC in r7.
464+ * On return, r8 contains the counter value that can be reused.
465+ * This clobbers cr0 but not any other cr field.
466+ */
467+V_FUNCTION_BEGIN(__do_get_tspec)
468+ .cfi_startproc
469+ /* check for update count & load values */
470+1: ld r8,CFG_TB_UPDATE_COUNT(r3)
471+ andi. r0,r8,1 /* pending update ? loop */
472+ bne- 1b
473+ xor r0,r8,r8 /* create dependency */
474+ add r3,r3,r0
475+
476+ /* Get TB & offset it. We use the MFTB macro which will generate
477+ * workaround code for Cell.
478+ */
479+ MFTB(r7)
480+ ld r9,CFG_TB_ORIG_STAMP(r3)
481+ subf r7,r9,r7
482+
483+ /* Scale result */
484+ ld r5,CFG_TB_TO_XS(r3)
485+ sldi r7,r7,12 /* compute time since stamp_xtime */
486+ mulhdu r6,r7,r5 /* in units of 2^-32 seconds */
487+
488+ /* Add stamp since epoch */
489+ ld r4,STAMP_XTIME+TSPC64_TV_SEC(r3)
490+ ld r5,STAMP_XTIME+TSPC64_TV_NSEC(r3)
491+ or r0,r4,r5
492+ or r0,r0,r6
493+ xor r0,r0,r0
494+ add r3,r3,r0
495+ ld r0,CFG_TB_UPDATE_COUNT(r3)
496+ cmpld r0,r8 /* check if updated */
497+ bne- 1b /* reload if so */
498+
499+ /* convert to seconds & nanoseconds and add to stamp */
500+ lis r7,NSEC_PER_SEC@h
501+ ori r7,r7,NSEC_PER_SEC@l
502+ mulhwu r0,r6,r7 /* compute nanoseconds and */
503+ srdi r6,r6,32 /* seconds since stamp_xtime */
504+ clrldi r0,r0,32
505+ add r5,r5,r0 /* add nanoseconds together */
506+ cmpd r5,r7 /* overflow? */
507+ add r4,r4,r6
508+ bltlr /* all done if no overflow */
509+ subf r5,r7,r5 /* if overflow, adjust */
510+ addi r4,r4,1
511+ blr
512+ .cfi_endproc
513+V_FUNCTION_END(__do_get_tspec)