Replace FSF snail mail address with URLs.

[thirdparty/glibc.git] / iconv / loop.c
diff --git a/iconv/loop.c b/iconv/loop.c

index ae83894dc9dea3a90220e16b1d12a79e281cc72b..e11e86b5ecd4abd95ec0ea125181b298299d0ade 100644 (file)
--- a/iconv/loop.c
+++ b/iconv/loop.c
@@ -1,5 +1,5 @@
  /* Conversion loop frame work.
-   Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
+   Copyright (C) 1998-2003, 2005, 2008, 2011, 2012 Free Software Foundation, Inc.
     This file is part of the GNU C Library.
     Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
  
@@ -14,9 +14,8 @@
     Lesser General Public License for more details.
  
     You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
  
  /* This file provides a frame for the reader loop in all conversion modules.
     The actual code must (of course) be provided in the actual module source
@@ -43,6 +42,9 @@
  
       INIT_PARAMS       code to define and initialize variables from params.
       UPDATE_PARAMS     code to store result in params.
+
+     ONEBYTE_BODY      body of the specialized conversion function for a
+                       single byte from the current character set to INTERNAL.
  */
  
  #include <assert.h>
@@ -63,8 +65,8 @@
  #undef FCTNAME2
  #if defined _STRING_ARCH_unaligned || !defined DEFINE_UNALIGNED
  /* We can handle unaligned memory access.  */
-# define get16(addr) *((__const uint16_t *) (addr))
-# define get32(addr) *((__const uint32_t *) (addr))
+# define get16(addr) *((const uint16_t *) (addr))
+# define get32(addr) *((const uint32_t *) (addr))
  
  /* We need no special support for writing values either.  */
  # define put16(addr, val) *((uint16_t *) (addr)) = (val)
@@ -75,13 +77,13 @@
  /* Distinguish between big endian and little endian.  */
  # if __BYTE_ORDER == __LITTLE_ENDIAN
  #  define get16(addr) \
-     (((__const unsigned char *) (addr))[1] << 8                             \
-      | ((__const unsigned char *) (addr))[0])
+     (((const unsigned char *) (addr))[1] << 8                               \
+      | ((const unsigned char *) (addr))[0])
  #  define get32(addr) \
-     (((((__const unsigned char *) (addr))[3] << 8                           \
-       | ((__const unsigned char *) (addr))[2]) << 8                         \
-       | ((__const unsigned char *) (addr))[1]) << 8                         \
-      | ((__const unsigned char *) (addr))[0])
+     (((((const unsigned char *) (addr))[3] << 8                             \
+       | ((const unsigned char *) (addr))[2]) << 8                           \
+       | ((const unsigned char *) (addr))[1]) << 8                           \
+      | ((const unsigned char *) (addr))[0])
  
  #  define put16(addr, val) \
       ({ uint16_t __val = (val);                                                      \
@@ -100,13 +102,13 @@
         (void) 0; })
  # else
  #  define get16(addr) \
-     (((__const unsigned char *) (addr))[0] << 8                             \
-      | ((__const unsigned char *) (addr))[1])
+     (((const unsigned char *) (addr))[0] << 8                               \
+      | ((const unsigned char *) (addr))[1])
  #  define get32(addr) \
-     (((((__const unsigned char *) (addr))[0] << 8                           \
-       | ((__const unsigned char *) (addr))[1]) << 8                         \
-       | ((__const unsigned char *) (addr))[2]) << 8                         \
-      | ((__const unsigned char *) (addr))[3])
+     (((((const unsigned char *) (addr))[0] << 8                             \
+       | ((const unsigned char *) (addr))[1]) << 8                           \
+       | ((const unsigned char *) (addr))[2]) << 8                           \
+      | ((const unsigned char *) (addr))[3])
  
  #  define put16(addr, val) \
       ({ uint16_t __val = (val);                                                      \
@@ -133,6 +135,8 @@
  /* We need at least one byte for the next round.  */
  #ifndef MIN_NEEDED_INPUT
  # error "MIN_NEEDED_INPUT definition missing"
+#elif MIN_NEEDED_INPUT < 1
+# error "MIN_NEEDED_INPUT must be >= 1"
  #endif
  
  /* Let's see how many bytes we produce.  */
@@ -143,6 +147,8 @@
  /* We produce at least one byte in the next round.  */
  #ifndef MIN_NEEDED_OUTPUT
  # error "MIN_NEEDED_OUTPUT definition missing"
+#elif MIN_NEEDED_OUTPUT < 1
+# error "MIN_NEEDED_OUTPUT must be >= 1"
  #endif
  
  /* Let's see how many bytes we produce.  */
@@ -167,6 +173,15 @@
  # define EXTRA_LOOP_DECLS
  #endif
  
+/* Allow using UPDATE_PARAMS in macros where #ifdef UPDATE_PARAMS test
+   isn't possible.  */
+#ifndef UPDATE_PARAMS
+# define UPDATE_PARAMS do { } while (0)
+#endif
+#ifndef REINIT_PARAMS
+# define REINIT_PARAMS do { } while (0)
+#endif
+
  
  /* To make it easier for the writers of the modules, we define a macro
     to test whether we have to ignore errors.  */
@@ -174,10 +189,29 @@
    (irreversible != NULL && (flags & __GCONV_IGNORE_ERRORS))
  
  
-/* Error handling with transliteration/transcription function use and
-   ignoring of errors.  Note that we cannot use the do while (0) trick
-   since `break' and `continue' must reach certain points.  */
-#define STANDARD_ERR_HANDLER(Incr) \
+/* Error handling for the FROM_LOOP direction, with ignoring of errors.
+   Note that we cannot use the do while (0) trick since `break' and
+   `continue' must reach certain points.  */
+#define STANDARD_FROM_LOOP_ERR_HANDLER(Incr) \
+  {                                                                          \
+    result = __GCONV_ILLEGAL_INPUT;                                          \
+                                                                             \
+    if (! ignore_errors_p ())                                                \
+      break;                                                                 \
+                                                                             \
+    /* We ignore the invalid input byte sequence.  */                        \
+    inptr += (Incr);                                                         \
+    ++*irreversible;                                                         \
+    /* But we keep result == __GCONV_ILLEGAL_INPUT, because of the constraint \
+       that "iconv -c" must give the same exitcode as "iconv".  */           \
+    continue;                                                                \
+  }
+
+/* Error handling for the TO_LOOP direction, with use of transliteration/
+   transcription functions and ignoring of errors.  Note that we cannot use
+   the do while (0) trick since `break' and `continue' must reach certain
+   points.  */
+#define STANDARD_TO_LOOP_ERR_HANDLER(Incr) \
    {                                                                          \
      struct __gconv_trans_data *trans;                                        \
                                                                               \
@@ -188,6 +222,10 @@
          case we are not doing any error recovery outself.  */                \
        break;                                                                 \
                                                                               \
+    /* If needed, flush any conversion state, so that __gconv_transliterate   \
+       starts with current shift state.  */                                  \
+    UPDATE_PARAMS;                                                           \
+                                                                             \
      /* First try the transliteration methods.  */                            \
      for (trans = step_data->__trans; trans != NULL; trans = trans->__next)    \
        {                                                                              \
@@ -197,9 +235,17 @@
         if (result != __GCONV_ILLEGAL_INPUT)                                  \
           break;                                                              \
        }                                                                              \
+                                                                             \
+    REINIT_PARAMS;                                                           \
+                                                                             \
      /* If any of them recognized the input continue with the loop.  */       \
      if (result != __GCONV_ILLEGAL_INPUT)                                     \
-      continue;                                                                      \
+      {                                                                              \
+       if (__builtin_expect (result == __GCONV_FULL_OUTPUT, 0))              \
+         break;                                                              \
+                                                                             \
+       continue;                                                             \
+      }                                                                              \
                                                                               \
      /* Next see whether we have to ignore the error.  If not, stop.  */              \
      if (! ignore_errors_p ())                                                \
@@ -208,14 +254,16 @@
      /* When we come here it means we ignore the character.  */               \
      ++*irreversible;                                                         \
      inptr += Incr;                                                           \
+    /* But we keep result == __GCONV_ILLEGAL_INPUT, because of the constraint \
+       that "iconv -c" must give the same exitcode as "iconv".  */           \
      continue;                                                                \
    }
  
  
  /* Handling of Unicode 3.1 TAG characters.  Unicode recommends
     "If language codes are not relevant to the particular processing
-    operation, then they should be ignored."
-   This macro is usually called right before STANDARD_ERR_HANDLER (Incr).  */
+    operation, then they should be ignored."  This macro is usually
+   called right before  STANDARD_TO_LOOP_ERR_HANDLER (Incr).  */
  #define UNICODE_TAG_HANDLER(Character, Incr) \
    {                                                                          \
      /* TAG characters are those in the range U+E0000..U+E007F.  */           \
@@ -229,6 +277,7 @@
  
  /* The function returns the status, as defined in gconv.h.  */
  static inline int
+__attribute ((always_inline))
  FCTNAME (LOOPFCT) (struct __gconv_step *step,
                    struct __gconv_step_data *step_data,
                    const unsigned char **inptrp, const unsigned char *inend,
@@ -257,6 +306,14 @@ FCTNAME (LOOPFCT) (struct __gconv_step *step,
        /* `if' cases for MIN_NEEDED_OUTPUT ==/!= 1 is made to help the
          compiler generating better code.  They will be optimized away
          since MIN_NEEDED_OUTPUT is always a constant.  */
+      if (MIN_NEEDED_INPUT > 1
+         && __builtin_expect (inptr + MIN_NEEDED_INPUT > inend, 0))
+       {
+         /* We don't have enough input for another complete input
+            character.  */
+         result = __GCONV_INCOMPLETE_INPUT;
+         break;
+       }
        if ((MIN_NEEDED_OUTPUT != 1
            && __builtin_expect (outptr + MIN_NEEDED_OUTPUT > outend, 0))
           || (MIN_NEEDED_OUTPUT == 1
@@ -266,14 +323,6 @@ FCTNAME (LOOPFCT) (struct __gconv_step *step,
           result = __GCONV_FULL_OUTPUT;
           break;
         }
-      if (MIN_NEEDED_INPUT > 1
-         && __builtin_expect (inptr + MIN_NEEDED_INPUT > inend, 0))
-       {
-         /* We don't have enough input for another complete input
-            character.  */
-         result = __GCONV_INCOMPLETE_INPUT;
-         break;
-       }
  
        /* Here comes the body the user provides.  It can stop with
          RESULT set to GCONV_INCOMPLETE_INPUT (if the size of the
@@ -285,9 +334,7 @@ FCTNAME (LOOPFCT) (struct __gconv_step *step,
    /* Update the pointers pointed to by the parameters.  */
    *inptrp = inptr;
    *outptrp = outptr;
-#ifdef UPDATE_PARAMS
    UPDATE_PARAMS;
-#endif
  
    return result;
  }
@@ -296,8 +343,8 @@ FCTNAME (LOOPFCT) (struct __gconv_step *step,
  /* Include the file a second time to define the function to handle
     unaligned access.  */
  #if !defined DEFINE_UNALIGNED && !defined _STRING_ARCH_unaligned \
-    && MIN_NEEDED_FROM != 1 && MAX_NEEDED_FROM % MIN_NEEDED_FROM == 0 \
-    && MIN_NEEDED_TO != 1 && MAX_NEEDED_TO % MIN_NEEDED_TO == 0
+    && MIN_NEEDED_INPUT != 1 && MAX_NEEDED_INPUT % MIN_NEEDED_INPUT == 0 \
+    && MIN_NEEDED_OUTPUT != 1 && MAX_NEEDED_OUTPUT % MIN_NEEDED_OUTPUT == 0
  # undef get16
  # undef get32
  # undef put16
@@ -314,6 +361,7 @@ FCTNAME (LOOPFCT) (struct __gconv_step *step,
  # define SINGLE(fct) SINGLE2 (fct)
  # define SINGLE2(fct) fct##_single
  static inline int
+__attribute ((always_inline))
  SINGLE(LOOPFCT) (struct __gconv_step *step,
                  struct __gconv_step_data *step_data,
                  const unsigned char **inptrp, const unsigned char *inend,
@@ -341,15 +389,20 @@ SINGLE(LOOPFCT) (struct __gconv_step *step,
    UNPACK_BYTES
  #else
    /* Add the bytes from the state to the input buffer.  */
+  assert ((state->__count & 7) <= sizeof (state->__value));
    for (inlen = 0; inlen < (size_t) (state->__count & 7); ++inlen)
      bytebuf[inlen] = state->__value.__wchb[inlen];
  #endif
  
    /* Are there enough bytes in the input buffer?  */
-  if (__builtin_expect (inptr + (MIN_NEEDED_INPUT - inlen) > inend, 0))
+  if (MIN_NEEDED_INPUT > 1
+      && __builtin_expect (inptr + (MIN_NEEDED_INPUT - inlen) > inend, 0))
      {
        *inptrp = inend;
  #ifdef STORE_REST
+      while (inptr < inend)
+       bytebuf[inlen++] = *inptr++;
+
        inptr = bytebuf;
        inptrp = &inptr;
        inend = &bytebuf[inlen];
@@ -400,7 +453,11 @@ SINGLE(LOOPFCT) (struct __gconv_step *step,
        result = __GCONV_OK;
  
        /* Clear the state buffer.  */
+#ifdef CLEAR_STATE
+      CLEAR_STATE;
+#else
        state->__count &= ~7;
+#endif
      }
    else if (result == __GCONV_INCOMPLETE_INPUT)
      {
@@ -416,6 +473,10 @@ SINGLE(LOOPFCT) (struct __gconv_step *step,
  #else
        /* We don't have enough input for another complete input
          character.  */
+      assert (inend - inptr > (state->__count & ~7));
+      assert (inend - inptr <= sizeof (state->__value));
+      state->__count = (state->__count & ~7) | (inend - inptr);
+      inlen = 0;
        while (inptr < inend)
         state->__value.__wchb[inlen++] = *inptr++;
  #endif
@@ -428,6 +489,15 @@ SINGLE(LOOPFCT) (struct __gconv_step *step,
  #endif
  
  
+#ifdef ONEBYTE_BODY
+/* Define the shortcut function for btowc.  */
+static wint_t
+gconv_btowc (struct __gconv_step *step, unsigned char c)
+  ONEBYTE_BODY
+# define FROM_ONEBYTE gconv_btowc
+#endif
+
+
  /* We remove the macro definitions so that we can include this file again
     for the definition of another function.  */
  #undef MIN_NEEDED_INPUT
@@ -440,7 +510,10 @@ SINGLE(LOOPFCT) (struct __gconv_step *step,
  #undef EXTRA_LOOP_DECLS
  #undef INIT_PARAMS
  #undef UPDATE_PARAMS
+#undef REINIT_PARAMS
+#undef ONEBYTE_BODY
  #undef UNPACK_BYTES
+#undef CLEAR_STATE
  #undef LOOP_NEED_STATE
  #undef LOOP_NEED_FLAGS
  #undef LOOP_NEED_DATA