]> git.ipfire.org Git - ipfire-2.x.git/blame - src/patches/suse-2.6.27.39/patches.arch/s390-17-perf-11-csum-fold.patch
Fix oinkmaster patch.
[ipfire-2.x.git] / src / patches / suse-2.6.27.39 / patches.arch / s390-17-perf-11-csum-fold.patch
CommitLineData
82094b55
AF
1From: Gerald Schaefer <geraldsc@de.ibm.com>
2Subject: [PATCH] convert/optimize csum_fold() to C
3References: bnc#532063,LTC#55526
4
5From: Heiko Carstens <heiko.carstens@de.ibm.com>
6
7In the meantime gcc generates better code than the old inline
8assemblies do. Original inline assembly results in:
9
10lr %r1,%r2
11sr %r3,%r3
12lr %r2,%r1
13srdl %r2,16
14alr %r2,%r3
15alr %r1,%r2
16srl %r1,16
17xilf %r1,65535
18llghr %r2,%r1
19br %r14
20
21Out of the C code gcc generates this:
22
23rll %r1,%r2,16
24ar %r1,%r2
25srl %r1,16
26xilf %r1,65535
27llghr %r2,%r1
28br %r14
29
30In addition we don't have any static register allocations anymore and
31gcc is free to shuffle instructions around for better pipeline usage.
32
33Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
34Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
35
36Acked-by: John Jolly <jjolly@suse.de>
37---
38
39 arch/s390/include/asm/checksum.h | 25 ++++---------------------
40 1 file changed, 4 insertions(+), 21 deletions(-)
41
42diff -urpN linux-2.6/arch/s390/include/asm/checksum.h linux-2.6-patched/arch/s390/include/asm/checksum.h
43--- linux-2.6/arch/s390/include/asm/checksum.h 2009-06-10 05:05:27.000000000 +0200
44+++ linux-2.6-patched/arch/s390/include/asm/checksum.h 2009-06-30 09:21:36.000000000 +0200
45@@ -78,28 +78,11 @@ csum_partial_copy_nocheck (const void *s
46 */
47 static inline __sum16 csum_fold(__wsum sum)
48 {
49-#ifndef __s390x__
50- register_pair rp;
51+ u32 csum = (__force u32) sum;
52
53- asm volatile(
54- " slr %N1,%N1\n" /* %0 = H L */
55- " lr %1,%0\n" /* %0 = H L, %1 = H L 0 0 */
56- " srdl %1,16\n" /* %0 = H L, %1 = 0 H L 0 */
57- " alr %1,%N1\n" /* %0 = H L, %1 = L H L 0 */
58- " alr %0,%1\n" /* %0 = H+L+C L+H */
59- " srl %0,16\n" /* %0 = H+L+C */
60- : "+&d" (sum), "=d" (rp) : : "cc");
61-#else /* __s390x__ */
62- asm volatile(
63- " sr 3,3\n" /* %0 = H*65536 + L */
64- " lr 2,%0\n" /* %0 = H L, 2/3 = H L / 0 0 */
65- " srdl 2,16\n" /* %0 = H L, 2/3 = 0 H / L 0 */
66- " alr 2,3\n" /* %0 = H L, 2/3 = L H / L 0 */
67- " alr %0,2\n" /* %0 = H+L+C L+H */
68- " srl %0,16\n" /* %0 = H+L+C */
69- : "+&d" (sum) : : "cc", "2", "3");
70-#endif /* __s390x__ */
71- return (__force __sum16) ~sum;
72+ csum += (csum >> 16) + (csum << 16);
73+ csum >>= 16;
74+ return (__force __sum16) ~csum;
75 }
76
77 /*