]>
Commit | Line | Data |
---|---|---|
82094b55 AF |
1 | From: Gerald Schaefer <geraldsc@de.ibm.com> |
2 | Subject: [PATCH] convert/optimize csum_fold() to C | |
3 | References: bnc#532063,LTC#55526 | |
4 | ||
5 | From: Heiko Carstens <heiko.carstens@de.ibm.com> | |
6 | ||
7 | In the meantime gcc generates better code than the old inline | |
8 | assemblies do. Original inline assembly results in: | |
9 | ||
10 | lr %r1,%r2 | |
11 | sr %r3,%r3 | |
12 | lr %r2,%r1 | |
13 | srdl %r2,16 | |
14 | alr %r2,%r3 | |
15 | alr %r1,%r2 | |
16 | srl %r1,16 | |
17 | xilf %r1,65535 | |
18 | llghr %r2,%r1 | |
19 | br %r14 | |
20 | ||
21 | Out of the C code gcc generates this: | |
22 | ||
23 | rll %r1,%r2,16 | |
24 | ar %r1,%r2 | |
25 | srl %r1,16 | |
26 | xilf %r1,65535 | |
27 | llghr %r2,%r1 | |
28 | br %r14 | |
29 | ||
30 | In addition we don't have any static register allocations anymore and | |
31 | gcc is free to shuffle instructions around for better pipeline usage. | |
32 | ||
33 | Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> | |
34 | Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> | |
35 | ||
36 | Acked-by: John Jolly <jjolly@suse.de> | |
37 | --- | |
38 | ||
39 | arch/s390/include/asm/checksum.h | 25 ++++--------------------- | |
40 | 1 file changed, 4 insertions(+), 21 deletions(-) | |
41 | ||
42 | diff -urpN linux-2.6/arch/s390/include/asm/checksum.h linux-2.6-patched/arch/s390/include/asm/checksum.h | |
43 | --- linux-2.6/arch/s390/include/asm/checksum.h 2009-06-10 05:05:27.000000000 +0200 | |
44 | +++ linux-2.6-patched/arch/s390/include/asm/checksum.h 2009-06-30 09:21:36.000000000 +0200 | |
45 | @@ -78,28 +78,11 @@ csum_partial_copy_nocheck (const void *s | |
46 | */ | |
47 | static inline __sum16 csum_fold(__wsum sum) | |
48 | { | |
49 | -#ifndef __s390x__ | |
50 | - register_pair rp; | |
51 | + u32 csum = (__force u32) sum; | |
52 | ||
53 | - asm volatile( | |
54 | - " slr %N1,%N1\n" /* %0 = H L */ | |
55 | - " lr %1,%0\n" /* %0 = H L, %1 = H L 0 0 */ | |
56 | - " srdl %1,16\n" /* %0 = H L, %1 = 0 H L 0 */ | |
57 | - " alr %1,%N1\n" /* %0 = H L, %1 = L H L 0 */ | |
58 | - " alr %0,%1\n" /* %0 = H+L+C L+H */ | |
59 | - " srl %0,16\n" /* %0 = H+L+C */ | |
60 | - : "+&d" (sum), "=d" (rp) : : "cc"); | |
61 | -#else /* __s390x__ */ | |
62 | - asm volatile( | |
63 | - " sr 3,3\n" /* %0 = H*65536 + L */ | |
64 | - " lr 2,%0\n" /* %0 = H L, 2/3 = H L / 0 0 */ | |
65 | - " srdl 2,16\n" /* %0 = H L, 2/3 = 0 H / L 0 */ | |
66 | - " alr 2,3\n" /* %0 = H L, 2/3 = L H / L 0 */ | |
67 | - " alr %0,2\n" /* %0 = H+L+C L+H */ | |
68 | - " srl %0,16\n" /* %0 = H+L+C */ | |
69 | - : "+&d" (sum) : : "cc", "2", "3"); | |
70 | -#endif /* __s390x__ */ | |
71 | - return (__force __sum16) ~sum; | |
72 | + csum += (csum >> 16) + (csum << 16); | |
73 | + csum >>= 16; | |
74 | + return (__force __sum16) ~csum; | |
75 | } | |
76 | ||
77 | /* |