src/patches/suse-2.6.27.39/patches.arch/s390-17-perf-11-csum-fold.patch

   1 From: Gerald Schaefer <geraldsc@de.ibm.com>
   2 Subject: [PATCH] convert/optimize csum_fold() to C
   3 References: bnc#532063,LTC#55526
   4
   5 From: Heiko Carstens <heiko.carstens@de.ibm.com>
   6
   7 In the meantime gcc generates better code than the old inline
   8 assemblies do. Original inline assembly results in:
   9
  10 lr      %r1,%r2
  11 sr      %r3,%r3
  12 lr      %r2,%r1
  13 srdl    %r2,16
  14 alr     %r2,%r3
  15 alr     %r1,%r2
  16 srl     %r1,16
  17 xilf    %r1,65535
  18 llghr   %r2,%r1
  19 br      %r14
  20
  21 Out of the C code gcc generates this:
  22
  23 rll     %r1,%r2,16
  24 ar      %r1,%r2
  25 srl     %r1,16
  26 xilf    %r1,65535
  27 llghr   %r2,%r1
  28 br      %r14
  29
  30 In addition we don't have any static register allocations anymore and
  31 gcc is free to shuffle instructions around for better pipeline usage.
  32
  33 Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
  34 Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
  35
  36 Acked-by: John Jolly <jjolly@suse.de>
  37 ---
  38
  39  arch/s390/include/asm/checksum.h |   25 ++++---------------------
  40  1 file changed, 4 insertions(+), 21 deletions(-)
  41
  42 diff -urpN linux-2.6/arch/s390/include/asm/checksum.h linux-2.6-patched/arch/s390/include/asm/checksum.h
  43 --- linux-2.6/arch/s390/include/asm/checksum.h  2009-06-10 05:05:27.000000000 +0200
  44 +++ linux-2.6-patched/arch/s390/include/asm/checksum.h  2009-06-30 09:21:36.000000000 +0200
  45 @@ -78,28 +78,11 @@ csum_partial_copy_nocheck (const void *s
  46   */
  47  static inline __sum16 csum_fold(__wsum sum)
  48  {
  49 -#ifndef __s390x__
  50 -       register_pair rp;
  51 +       u32 csum = (__force u32) sum;
  52
  53 -       asm volatile(
  54 -               "       slr     %N1,%N1\n"      /* %0 = H L */
  55 -               "       lr      %1,%0\n"        /* %0 = H L, %1 = H L 0 0 */
  56 -               "       srdl    %1,16\n"        /* %0 = H L, %1 = 0 H L 0 */
  57 -               "       alr     %1,%N1\n"       /* %0 = H L, %1 = L H L 0 */
  58 -               "       alr     %0,%1\n"        /* %0 = H+L+C L+H */
  59 -               "       srl     %0,16\n"        /* %0 = H+L+C */
  60 -               : "+&d" (sum), "=d" (rp) : : "cc");
  61 -#else /* __s390x__ */
  62 -       asm volatile(
  63 -               "       sr      3,3\n"          /* %0 = H*65536 + L */
  64 -               "       lr      2,%0\n"         /* %0 = H L, 2/3 = H L / 0 0 */
  65 -               "       srdl    2,16\n"         /* %0 = H L, 2/3 = 0 H / L 0 */
  66 -               "       alr     2,3\n"          /* %0 = H L, 2/3 = L H / L 0 */
  67 -               "       alr     %0,2\n"         /* %0 = H+L+C L+H */
  68 -               "       srl     %0,16\n"        /* %0 = H+L+C */
  69 -               : "+&d" (sum) : : "cc", "2", "3");
  70 -#endif /* __s390x__ */
  71 -       return (__force __sum16) ~sum;
  72 +       csum += (csum >> 16) + (csum << 16);
  73 +       csum >>= 16;
  74 +       return (__force __sum16) ~csum;
  75  }
  76
  77  /*