]> git.ipfire.org Git - thirdparty/glibc.git/commitdiff
S390: Optimize iso-8859-1 to ibm037 iconv-module.
authorStefan Liebler <stli@linux.vnet.ibm.com>
Wed, 25 May 2016 15:18:05 +0000 (17:18 +0200)
committerStefan Liebler <stli@linux.vnet.ibm.com>
Wed, 25 May 2016 15:18:05 +0000 (17:18 +0200)
This patch reworks the s390 specific module which used the z900
translate one to one instruction. Now the g5 translate instruction is used,
because it outperforms the troo instruction.

ChangeLog:

* sysdeps/s390/s390-64/iso-8859-1_cp037_z900.c (TROO_LOOP):
Rename to TR_LOOP and usage of tr instead of troo instruction.

ChangeLog
sysdeps/s390/s390-64/iso-8859-1_cp037_z900.c

index 285f4fb8ddb0ee53dcdd5cd3db045e254e0d76ef..f303dea9d51e06e596fe1a9a244ca48888f83cb0 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2016-05-25  Stefan Liebler  <stli@linux.vnet.ibm.com>
+
+       * sysdeps/s390/s390-64/iso-8859-1_cp037_z900.c (TROO_LOOP):
+       Rename to TR_LOOP and usage of tr instead of troo instruction.
+
 2016-05-25  Stefan Liebler  <stli@linux.vnet.ibm.com>
 
        * sysdeps/s390/multiarch/gconv_simple.c: New File.
index c59f87f18da346d657a8b02ed55f75ea4fd61301..3b63e6a94f1b9bafd848375f29b8606f4fbd3ee9 100644 (file)
@@ -1,7 +1,6 @@
 /* Conversion between ISO 8859-1 and IBM037.
 
-   This module uses the Z900 variant of the Translate One To One
-   instruction.
+   This module uses the translate instruction.
    Copyright (C) 1997-2016 Free Software Foundation, Inc.
 
    Author: Andreas Krebbel  <Andreas.Krebbel@de.ibm.com>
@@ -176,50 +175,70 @@ __attribute__ ((aligned (8))) =
 #define MIN_NEEDED_FROM                1
 #define MIN_NEEDED_TO          1
 
-/* The Z900 variant of troo forces us to always specify a test
-   character which ends the translation.  So if we run into the
-   situation where the translation has been interrupted due to the
-   test character we translate the character by hand and jump back
-   into the instruction.  */
-
-#define TROO_LOOP(TABLE)                                               \
+#define TR_LOOP(TABLE)                                                 \
   {                                                                    \
-    register const unsigned char test __asm__ ("0") = 0;               \
-    register const unsigned char *pTable __asm__ ("1") = TABLE;                \
-    register unsigned char *pOutput __asm__ ("2") = outptr;            \
-    register uint64_t length __asm__ ("3");                            \
-    const unsigned char* pInput = inptr;                               \
-    uint64_t tmp;                                                      \
-                                                                       \
-    length = (inend - inptr < outend - outptr                          \
-             ? inend - inptr : outend - outptr);                       \
+    size_t length = (inend - inptr < outend - outptr                   \
+                    ? inend - inptr : outend - outptr);                \
                                                                        \
-    __asm__ volatile ("0:                        \n\t"                 \
-                     "  troo    %0,%1           \n\t"                  \
-                     "  jz      1f              \n\t"                  \
-                     "  jo      0b              \n\t"                  \
-                     "  llgc    %3,0(%1)        \n\t"                  \
-                     "  la      %3,0(%3,%4)     \n\t"                  \
-                     "  mvc     0(1,%0),0(%3)   \n\t"                  \
-                     "  aghi    %1,1            \n\t"                  \
-                     "  aghi    %0,1            \n\t"                  \
-                     "  aghi    %2,-1           \n\t"                  \
-                     "  j       0b              \n\t"                  \
-                     "1:                        \n"                    \
+    /* Process in 256 byte blocks.  */                                 \
+    if (__builtin_expect (length >= 256, 0))                           \
+      {                                                                        \
+       size_t blocks = length / 256;                                   \
+       __asm__ __volatile__("0: mvc 0(256,%[R_OUT]),0(%[R_IN])\n\t"    \
+                            "   tr 0(256,%[R_OUT]),0(%[R_TBL])\n\t"    \
+                            "   la %[R_IN],256(%[R_IN])\n\t"           \
+                            "   la %[R_OUT],256(%[R_OUT])\n\t"         \
+                            "   brctg %[R_LI],0b\n\t"                  \
+                            : /* outputs */ [R_IN] "+a" (inptr)        \
+                              , [R_OUT] "+a" (outptr), [R_LI] "+d" (blocks) \
+                            : /* inputs */ [R_TBL] "a" (TABLE)         \
+                            : /* clobber list */ "memory"              \
+                            );                                         \
+       length = length % 256;                                          \
+      }                                                                        \
                                                                        \
-     : "+a" (pOutput), "+a" (pInput), "+d" (length), "=&a" (tmp)        \
-     : "a" (pTable), "d" (test)                                                \
-     : "cc");                                                          \
+    /* Process remaining 0...248 bytes in 8byte blocks.  */            \
+    if (length >= 8)                                                   \
+      {                                                                        \
+       size_t blocks = length / 8;                                     \
+       for (int i = 0; i < blocks; i++)                                \
+         {                                                             \
+           outptr[0] = TABLE[inptr[0]];                                \
+           outptr[1] = TABLE[inptr[1]];                                \
+           outptr[2] = TABLE[inptr[2]];                                \
+           outptr[3] = TABLE[inptr[3]];                                \
+           outptr[4] = TABLE[inptr[4]];                                \
+           outptr[5] = TABLE[inptr[5]];                                \
+           outptr[6] = TABLE[inptr[6]];                                \
+           outptr[7] = TABLE[inptr[7]];                                \
+           inptr += 8;                                                 \
+           outptr += 8;                                                \
+         }                                                             \
+       length = length % 8;                                            \
+      }                                                                        \
                                                                        \
-    inptr = pInput;                                                    \
-    outptr = pOutput;                                                  \
+    /* Process remaining 0...7 bytes.  */                              \
+    switch (length)                                                    \
+      {                                                                        \
+      case 7: outptr[6] = TABLE[inptr[6]];                             \
+      case 6: outptr[5] = TABLE[inptr[5]];                             \
+      case 5: outptr[4] = TABLE[inptr[4]];                             \
+      case 4: outptr[3] = TABLE[inptr[3]];                             \
+      case 3: outptr[2] = TABLE[inptr[2]];                             \
+      case 2: outptr[1] = TABLE[inptr[1]];                             \
+      case 1: outptr[0] = TABLE[inptr[0]];                             \
+      case 0: break;                                                   \
+      }                                                                        \
+    inptr += length;                                                   \
+    outptr += length;                                                  \
   }
 
+
 /* First define the conversion function from ISO 8859-1 to CP037.  */
 #define MIN_NEEDED_INPUT       MIN_NEEDED_FROM
 #define MIN_NEEDED_OUTPUT      MIN_NEEDED_TO
 #define LOOPFCT                        FROM_LOOP
-#define BODY TROO_LOOP (table_iso8859_1_to_cp037)
+#define BODY                   TR_LOOP (table_iso8859_1_to_cp037)
 
 #include <iconv/loop.c>
 
@@ -228,7 +247,7 @@ __attribute__ ((aligned (8))) =
 #define MIN_NEEDED_INPUT       MIN_NEEDED_TO
 #define MIN_NEEDED_OUTPUT      MIN_NEEDED_FROM
 #define LOOPFCT                        TO_LOOP
-#define BODY TROO_LOOP (table_cp037_iso8859_1);
+#define BODY                   TR_LOOP (table_cp037_iso8859_1);
 
 #include <iconv/loop.c>