1 From: Gerald Schaefer <geraldsc@de.ibm.com>
2 Subject: [PATCH] convert/optimize csum_fold() to C
3 References: bnc#532063,LTC#55526
5 From: Heiko Carstens <heiko.carstens@de.ibm.com>
7 In the meantime gcc generates better code than the old inline
8 assemblies do. Original inline assembly results in:
21 Out of the C code gcc generates this:
30 In addition we don't have any static register allocations anymore and
31 gcc is free to shuffle instructions around for better pipeline usage.
33 Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
34 Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
36 Acked-by: John Jolly <jjolly@suse.de>
39 arch/s390/include/asm/checksum.h | 25 ++++---------------------
40 1 file changed, 4 insertions(+), 21 deletions(-)
42 diff -urpN linux-2.6/arch/s390/include/asm/checksum.h linux-2.6-patched/arch/s390/include/asm/checksum.h
43 --- linux-2.6/arch/s390/include/asm/checksum.h 2009-06-10 05:05:27.000000000 +0200
44 +++ linux-2.6-patched/arch/s390/include/asm/checksum.h 2009-06-30 09:21:36.000000000 +0200
45 @@ -78,28 +78,11 @@ csum_partial_copy_nocheck (const void *s
47 static inline __sum16 csum_fold(__wsum sum)
51 + u32 csum = (__force u32) sum;
54 - " slr %N1,%N1\n" /* %0 = H L */
55 - " lr %1,%0\n" /* %0 = H L, %1 = H L 0 0 */
56 - " srdl %1,16\n" /* %0 = H L, %1 = 0 H L 0 */
57 - " alr %1,%N1\n" /* %0 = H L, %1 = L H L 0 */
58 - " alr %0,%1\n" /* %0 = H+L+C L+H */
59 - " srl %0,16\n" /* %0 = H+L+C */
60 - : "+&d" (sum), "=d" (rp) : : "cc");
61 -#else /* __s390x__ */
63 - " sr 3,3\n" /* %0 = H*65536 + L */
64 - " lr 2,%0\n" /* %0 = H L, 2/3 = H L / 0 0 */
65 - " srdl 2,16\n" /* %0 = H L, 2/3 = 0 H / L 0 */
66 - " alr 2,3\n" /* %0 = H L, 2/3 = L H / L 0 */
67 - " alr %0,2\n" /* %0 = H+L+C L+H */
68 - " srl %0,16\n" /* %0 = H+L+C */
69 - : "+&d" (sum) : : "cc", "2", "3");
70 -#endif /* __s390x__ */
71 - return (__force __sum16) ~sum;
72 + csum += (csum >> 16) + (csum << 16);
74 + return (__force __sum16) ~csum;