Replace FSF snail mail address with URLs.

[thirdparty/glibc.git] / iconv / loop.c
diff --git a/iconv/loop.c b/iconv/loop.c

index c01e52040e236ab8201ea7c1a4a08c69e28ba133..e11e86b5ecd4abd95ec0ea125181b298299d0ade 100644 (file)
--- a/iconv/loop.c
+++ b/iconv/loop.c
@@ -1,22 +1,21 @@
  /* Conversion loop frame work.
-   Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
+   Copyright (C) 1998-2003, 2005, 2008, 2011, 2012 Free Software Foundation, Inc.
     This file is part of the GNU C Library.
     Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
  
     The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Library General Public License as
-   published by the Free Software Foundation; either version 2 of the
-   License, or (at your option) any later version.
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
  
     The GNU C Library is distributed in the hope that it will be useful,
     but WITHOUT ANY WARRANTY; without even the implied warranty of
     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Library General Public License for more details.
+   Lesser General Public License for more details.
  
-   You should have received a copy of the GNU Library General Public
-   License along with the GNU C Library; see the file COPYING.LIB.  If not,
-   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-   Boston, MA 02111-1307, USA.  */
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
  
  /* This file provides a frame for the reader loop in all conversion modules.
     The actual code must (of course) be provided in the actual module source
@@ -43,6 +42,9 @@
  
       INIT_PARAMS       code to define and initialize variables from params.
       UPDATE_PARAMS     code to store result in params.
+
+     ONEBYTE_BODY      body of the specialized conversion function for a
+                       single byte from the current character set to INTERNAL.
  */
  
  #include <assert.h>
@@ -63,8 +65,8 @@
  #undef FCTNAME2
  #if defined _STRING_ARCH_unaligned || !defined DEFINE_UNALIGNED
  /* We can handle unaligned memory access.  */
-# define get16(addr) *((uint16_t *) (addr))
-# define get32(addr) *((uint32_t *) (addr))
+# define get16(addr) *((const uint16_t *) (addr))
+# define get32(addr) *((const uint32_t *) (addr))
  
  /* We need no special support for writing values either.  */
  # define put16(addr, val) *((uint16_t *) (addr)) = (val)
@@ -75,13 +77,13 @@
  /* Distinguish between big endian and little endian.  */
  # if __BYTE_ORDER == __LITTLE_ENDIAN
  #  define get16(addr) \
-     (((__const unsigned char *) (addr))[1] << 8                             \
-      | ((__const unsigned char *) (addr))[0])
+     (((const unsigned char *) (addr))[1] << 8                               \
+      | ((const unsigned char *) (addr))[0])
  #  define get32(addr) \
-     (((((__const unsigned char *) (addr))[3] << 8                           \
-       | ((__const unsigned char *) (addr))[2]) << 8                         \
-       | ((__const unsigned char *) (addr))[1]) << 8                         \
-      | ((__const unsigned char *) (addr))[0])
+     (((((const unsigned char *) (addr))[3] << 8                             \
+       | ((const unsigned char *) (addr))[2]) << 8                           \
+       | ((const unsigned char *) (addr))[1]) << 8                           \
+      | ((const unsigned char *) (addr))[0])
  
  #  define put16(addr, val) \
       ({ uint16_t __val = (val);                                                      \
@@ -100,18 +102,18 @@
         (void) 0; })
  # else
  #  define get16(addr) \
-     (((__const unsigned char *) (addr))[0] << 8                             \
-      | ((__const unsigned char *) (addr))[1])
+     (((const unsigned char *) (addr))[0] << 8                               \
+      | ((const unsigned char *) (addr))[1])
  #  define get32(addr) \
-     (((((__const unsigned char *) (addr))[0] << 8                           \
-       | ((__const unsigned char *) (addr))[1]) << 8                         \
-       | ((__const unsigned char *) (addr))[2]) << 8                         \
-      | ((__const unsigned char *) (addr))[3])
+     (((((const unsigned char *) (addr))[0] << 8                             \
+       | ((const unsigned char *) (addr))[1]) << 8                           \
+       | ((const unsigned char *) (addr))[2]) << 8                           \
+      | ((const unsigned char *) (addr))[3])
  
  #  define put16(addr, val) \
       ({ uint16_t __val = (val);                                                      \
         ((unsigned char *) (addr))[1] = __val;                                \
-       ((unsigned char *) (addr))[2] = __val >> 8;                           \
+       ((unsigned char *) (addr))[0] = __val >> 8;                           \
         (void) 0; })
  #  define put32(addr, val) \
       ({ uint32_t __val = (val);                                                      \
@@ -133,6 +135,8 @@
  /* We need at least one byte for the next round.  */
  #ifndef MIN_NEEDED_INPUT
  # error "MIN_NEEDED_INPUT definition missing"
+#elif MIN_NEEDED_INPUT < 1
+# error "MIN_NEEDED_INPUT must be >= 1"
  #endif
  
  /* Let's see how many bytes we produce.  */
@@ -143,6 +147,8 @@
  /* We produce at least one byte in the next round.  */
  #ifndef MIN_NEEDED_OUTPUT
  # error "MIN_NEEDED_OUTPUT definition missing"
+#elif MIN_NEEDED_OUTPUT < 1
+# error "MIN_NEEDED_OUTPUT must be >= 1"
  #endif
  
  /* Let's see how many bytes we produce.  */
@@ -167,104 +173,168 @@
  # define EXTRA_LOOP_DECLS
  #endif
  
+/* Allow using UPDATE_PARAMS in macros where #ifdef UPDATE_PARAMS test
+   isn't possible.  */
+#ifndef UPDATE_PARAMS
+# define UPDATE_PARAMS do { } while (0)
+#endif
+#ifndef REINIT_PARAMS
+# define REINIT_PARAMS do { } while (0)
+#endif
+
  
  /* To make it easier for the writers of the modules, we define a macro
     to test whether we have to ignore errors.  */
-#define ignore_errors_p() (flags & __GCONV_IGNORE_ERRORS)
+#define ignore_errors_p() \
+  (irreversible != NULL && (flags & __GCONV_IGNORE_ERRORS))
+
+
+/* Error handling for the FROM_LOOP direction, with ignoring of errors.
+   Note that we cannot use the do while (0) trick since `break' and
+   `continue' must reach certain points.  */
+#define STANDARD_FROM_LOOP_ERR_HANDLER(Incr) \
+  {                                                                          \
+    result = __GCONV_ILLEGAL_INPUT;                                          \
+                                                                             \
+    if (! ignore_errors_p ())                                                \
+      break;                                                                 \
+                                                                             \
+    /* We ignore the invalid input byte sequence.  */                        \
+    inptr += (Incr);                                                         \
+    ++*irreversible;                                                         \
+    /* But we keep result == __GCONV_ILLEGAL_INPUT, because of the constraint \
+       that "iconv -c" must give the same exitcode as "iconv".  */           \
+    continue;                                                                \
+  }
+
+/* Error handling for the TO_LOOP direction, with use of transliteration/
+   transcription functions and ignoring of errors.  Note that we cannot use
+   the do while (0) trick since `break' and `continue' must reach certain
+   points.  */
+#define STANDARD_TO_LOOP_ERR_HANDLER(Incr) \
+  {                                                                          \
+    struct __gconv_trans_data *trans;                                        \
+                                                                             \
+    result = __GCONV_ILLEGAL_INPUT;                                          \
+                                                                             \
+    if (irreversible == NULL)                                                \
+      /* This means we are in call from __gconv_transliterate.  In this              \
+        case we are not doing any error recovery outself.  */                \
+      break;                                                                 \
+                                                                             \
+    /* If needed, flush any conversion state, so that __gconv_transliterate   \
+       starts with current shift state.  */                                  \
+    UPDATE_PARAMS;                                                           \
+                                                                             \
+    /* First try the transliteration methods.  */                            \
+    for (trans = step_data->__trans; trans != NULL; trans = trans->__next)    \
+      {                                                                              \
+       result = DL_CALL_FCT (trans->__trans_fct,                             \
+                             (step, step_data, trans->__data, *inptrp,       \
+                              &inptr, inend, &outptr, irreversible));        \
+       if (result != __GCONV_ILLEGAL_INPUT)                                  \
+         break;                                                              \
+      }                                                                              \
+                                                                             \
+    REINIT_PARAMS;                                                           \
+                                                                             \
+    /* If any of them recognized the input continue with the loop.  */       \
+    if (result != __GCONV_ILLEGAL_INPUT)                                     \
+      {                                                                              \
+       if (__builtin_expect (result == __GCONV_FULL_OUTPUT, 0))              \
+         break;                                                              \
+                                                                             \
+       continue;                                                             \
+      }                                                                              \
+                                                                             \
+    /* Next see whether we have to ignore the error.  If not, stop.  */              \
+    if (! ignore_errors_p ())                                                \
+      break;                                                                 \
+                                                                             \
+    /* When we come here it means we ignore the character.  */               \
+    ++*irreversible;                                                         \
+    inptr += Incr;                                                           \
+    /* But we keep result == __GCONV_ILLEGAL_INPUT, because of the constraint \
+       that "iconv -c" must give the same exitcode as "iconv".  */           \
+    continue;                                                                \
+  }
+
+
+/* Handling of Unicode 3.1 TAG characters.  Unicode recommends
+   "If language codes are not relevant to the particular processing
+    operation, then they should be ignored."  This macro is usually
+   called right before  STANDARD_TO_LOOP_ERR_HANDLER (Incr).  */
+#define UNICODE_TAG_HANDLER(Character, Incr) \
+  {                                                                          \
+    /* TAG characters are those in the range U+E0000..U+E007F.  */           \
+    if (((Character) >> 7) == (0xe0000 >> 7))                                \
+      {                                                                              \
+       inptr += Incr;                                                        \
+       continue;                                                             \
+      }                                                                              \
+  }
  
  
  /* The function returns the status, as defined in gconv.h.  */
  static inline int
-FCTNAME (LOOPFCT) (const unsigned char **inptrp, const unsigned char *inend,
-                  unsigned char **outptrp, unsigned char *outend,
-                  mbstate_t *state, int flags, void *data,
+__attribute ((always_inline))
+FCTNAME (LOOPFCT) (struct __gconv_step *step,
+                  struct __gconv_step_data *step_data,
+                  const unsigned char **inptrp, const unsigned char *inend,
+                  unsigned char **outptrp, const unsigned char *outend,
                    size_t *irreversible EXTRA_LOOP_DECLS)
  {
-  int result = __GCONV_OK;
+#ifdef LOOP_NEED_STATE
+  mbstate_t *state = step_data->__statep;
+#endif
+#ifdef LOOP_NEED_FLAGS
+  int flags = step_data->__flags;
+#endif
+#ifdef LOOP_NEED_DATA
+  void *data = step->__data;
+#endif
+  int result = __GCONV_EMPTY_INPUT;
    const unsigned char *inptr = *inptrp;
    unsigned char *outptr = *outptrp;
  
-  /* We run one loop where we avoid checks for underflow/overflow of the
-     buffers to speed up the conversion a bit.  */
-  size_t min_in_rounds = (inend - inptr) / MAX_NEEDED_INPUT;
-  size_t min_out_rounds = (outend - outptr) / MAX_NEEDED_OUTPUT;
-  size_t min_rounds = MIN (min_in_rounds, min_out_rounds);
-
  #ifdef INIT_PARAMS
    INIT_PARAMS;
  #endif
  
-#undef NEED_LENGTH_TEST
-#define NEED_LENGTH_TEST       0
-  while (min_rounds-- > 0)
-    {
-      /* Here comes the body the user provides.  It can stop with RESULT
-        set to GCONV_INCOMPLETE_INPUT (if the size of the input characters
-        vary in size), GCONV_ILLEGAL_INPUT, or GCONV_FULL_OUTPUT (if the
-        output characters vary in size.  */
-      BODY
-    }
-
-  if (result == __GCONV_OK)
+  while (inptr != inend)
      {
-#if MIN_NEEDED_INPUT == MAX_NEEDED_INPUT \
-    && MIN_NEEDED_OUTPUT == MAX_NEEDED_OUTPUT
-      /* We don't need to start another loop since we were able to determine
-        the maximal number of characters to copy in advance.  What remains
-        to be determined is the status.  */
-      if (inptr == inend)
-       /* No more input.  */
-       result = __GCONV_EMPTY_INPUT;
-      else if ((MIN_NEEDED_OUTPUT != 1 && outptr + MIN_NEEDED_OUTPUT > outend)
-              || (MIN_NEEDED_OUTPUT == 1 && outptr >= outend))
-       /* Overflow in the output buffer.  */
-       result = __GCONV_FULL_OUTPUT;
-      else
-       /* We have something left in the input buffer.  */
-       result = __GCONV_INCOMPLETE_INPUT;
-#else
-      result = __GCONV_EMPTY_INPUT;
-
-# undef NEED_LENGTH_TEST
-# define NEED_LENGTH_TEST      1
-      while (inptr != inend)
+      /* `if' cases for MIN_NEEDED_OUTPUT ==/!= 1 is made to help the
+        compiler generating better code.  They will be optimized away
+        since MIN_NEEDED_OUTPUT is always a constant.  */
+      if (MIN_NEEDED_INPUT > 1
+         && __builtin_expect (inptr + MIN_NEEDED_INPUT > inend, 0))
+       {
+         /* We don't have enough input for another complete input
+            character.  */
+         result = __GCONV_INCOMPLETE_INPUT;
+         break;
+       }
+      if ((MIN_NEEDED_OUTPUT != 1
+          && __builtin_expect (outptr + MIN_NEEDED_OUTPUT > outend, 0))
+         || (MIN_NEEDED_OUTPUT == 1
+             && __builtin_expect (outptr >= outend, 0)))
         {
-         /* `if' cases for MIN_NEEDED_OUTPUT ==/!= 1 is made to help the
-            compiler generating better code.  It will optimized away
-            since MIN_NEEDED_OUTPUT is always a constant.  */
-         if ((MIN_NEEDED_OUTPUT != 1
-              && __builtin_expect (outptr + MIN_NEEDED_OUTPUT > outend, 0))
-             || (MIN_NEEDED_OUTPUT == 1
-                 && __builtin_expect (outptr >= outend, 0)))
-           {
-             /* Overflow in the output buffer.  */
-             result = __GCONV_FULL_OUTPUT;
-             break;
-           }
-         if (MIN_NEEDED_INPUT > 1
-             && __builtin_expect (inptr + MIN_NEEDED_INPUT > inend, 0))
-           {
-             /* We don't have enough input for another complete input
-                character.  */
-             result = __GCONV_INCOMPLETE_INPUT;
-             break;
-           }
-
-         /* Here comes the body the user provides.  It can stop with
-            RESULT set to GCONV_INCOMPLETE_INPUT (if the size of the
-            input characters vary in size), GCONV_ILLEGAL_INPUT, or
-            GCONV_FULL_OUTPUT (if the output characters vary in size).  */
-         BODY
+         /* Overflow in the output buffer.  */
+         result = __GCONV_FULL_OUTPUT;
+         break;
         }
-#endif /* Input and output charset are not both fixed width.  */
+
+      /* Here comes the body the user provides.  It can stop with
+        RESULT set to GCONV_INCOMPLETE_INPUT (if the size of the
+        input characters vary in size), GCONV_ILLEGAL_INPUT, or
+        GCONV_FULL_OUTPUT (if the output characters vary in size).  */
+      BODY
      }
  
    /* Update the pointers pointed to by the parameters.  */
    *inptrp = inptr;
    *outptrp = outptr;
-#ifdef UPDATE_PARAMS
    UPDATE_PARAMS;
-#endif
  
    return result;
  }
@@ -273,8 +343,8 @@ FCTNAME (LOOPFCT) (const unsigned char **inptrp, const unsigned char *inend,
  /* Include the file a second time to define the function to handle
     unaligned access.  */
  #if !defined DEFINE_UNALIGNED && !defined _STRING_ARCH_unaligned \
-    && MIN_NEEDED_FROM != 1 && MAX_NEEDED_FROM % MIN_NEEDED_FROM == 0 \
-    && MIN_NEEDED_TO != 1 && MAX_NEEDED_TO % MIN_NEEDED_TO == 0
+    && MIN_NEEDED_INPUT != 1 && MAX_NEEDED_INPUT % MIN_NEEDED_INPUT == 0 \
+    && MIN_NEEDED_OUTPUT != 1 && MAX_NEEDED_OUTPUT % MIN_NEEDED_OUTPUT == 0
  # undef get16
  # undef get32
  # undef put16
@@ -291,11 +361,20 @@ FCTNAME (LOOPFCT) (const unsigned char **inptrp, const unsigned char *inend,
  # define SINGLE(fct) SINGLE2 (fct)
  # define SINGLE2(fct) fct##_single
  static inline int
-SINGLE(LOOPFCT) (const unsigned char **inptrp, const unsigned char *inend,
+__attribute ((always_inline))
+SINGLE(LOOPFCT) (struct __gconv_step *step,
+                struct __gconv_step_data *step_data,
+                const unsigned char **inptrp, const unsigned char *inend,
                  unsigned char **outptrp, unsigned char *outend,
-                mbstate_t *state, int flags, void *data, size_t *irreversible
-                EXTRA_LOOP_DECLS)
+                size_t *irreversible EXTRA_LOOP_DECLS)
  {
+  mbstate_t *state = step_data->__statep;
+#ifdef LOOP_NEED_FLAGS
+  int flags = step_data->__flags;
+#endif
+#ifdef LOOP_NEED_DATA
+  void *data = step->__data;
+#endif
    int result = __GCONV_OK;
    unsigned char bytebuf[MAX_NEEDED_INPUT];
    const unsigned char *inptr = *inptrp;
@@ -310,15 +389,20 @@ SINGLE(LOOPFCT) (const unsigned char **inptrp, const unsigned char *inend,
    UNPACK_BYTES
  #else
    /* Add the bytes from the state to the input buffer.  */
-  for (inlen = 0; inlen < (state->__count & 7); ++ inlen)
+  assert ((state->__count & 7) <= sizeof (state->__value));
+  for (inlen = 0; inlen < (size_t) (state->__count & 7); ++inlen)
      bytebuf[inlen] = state->__value.__wchb[inlen];
  #endif
  
    /* Are there enough bytes in the input buffer?  */
-  if (__builtin_expect (inptr + (MIN_NEEDED_INPUT - inlen) > inend, 0))
+  if (MIN_NEEDED_INPUT > 1
+      && __builtin_expect (inptr + (MIN_NEEDED_INPUT - inlen) > inend, 0))
      {
        *inptrp = inend;
  #ifdef STORE_REST
+      while (inptr < inend)
+       bytebuf[inlen++] = *inptr++;
+
        inptr = bytebuf;
        inptrp = &inptr;
        inend = &bytebuf[inlen];
@@ -347,8 +431,7 @@ SINGLE(LOOPFCT) (const unsigned char **inptrp, const unsigned char *inend,
  
    inptr = bytebuf;
    inend = &bytebuf[inlen];
-#undef NEED_LENGTH_TEST
-#define NEED_LENGTH_TEST       1
+
    do
      {
        BODY
@@ -370,7 +453,11 @@ SINGLE(LOOPFCT) (const unsigned char **inptrp, const unsigned char *inend,
        result = __GCONV_OK;
  
        /* Clear the state buffer.  */
+#ifdef CLEAR_STATE
+      CLEAR_STATE;
+#else
        state->__count &= ~7;
+#endif
      }
    else if (result == __GCONV_INCOMPLETE_INPUT)
      {
@@ -386,6 +473,10 @@ SINGLE(LOOPFCT) (const unsigned char **inptrp, const unsigned char *inend,
  #else
        /* We don't have enough input for another complete input
          character.  */
+      assert (inend - inptr > (state->__count & ~7));
+      assert (inend - inptr <= sizeof (state->__value));
+      state->__count = (state->__count & ~7) | (inend - inptr);
+      inlen = 0;
        while (inptr < inend)
         state->__value.__wchb[inlen++] = *inptr++;
  #endif
@@ -398,6 +489,15 @@ SINGLE(LOOPFCT) (const unsigned char **inptrp, const unsigned char *inend,
  #endif
  
  
+#ifdef ONEBYTE_BODY
+/* Define the shortcut function for btowc.  */
+static wint_t
+gconv_btowc (struct __gconv_step *step, unsigned char c)
+  ONEBYTE_BODY
+# define FROM_ONEBYTE gconv_btowc
+#endif
+
+
  /* We remove the macro definitions so that we can include this file again
     for the definition of another function.  */
  #undef MIN_NEEDED_INPUT
@@ -410,9 +510,15 @@ SINGLE(LOOPFCT) (const unsigned char **inptrp, const unsigned char *inend,
  #undef EXTRA_LOOP_DECLS
  #undef INIT_PARAMS
  #undef UPDATE_PARAMS
+#undef REINIT_PARAMS
+#undef ONEBYTE_BODY
+#undef UNPACK_BYTES
+#undef CLEAR_STATE
+#undef LOOP_NEED_STATE
+#undef LOOP_NEED_FLAGS
+#undef LOOP_NEED_DATA
  #undef get16
  #undef get32
  #undef put16
  #undef put32
  #undef unaligned
-#undef UNPACK_BYTES