]> git.ipfire.org Git - thirdparty/glibc.git/commitdiff
iconv: make utf-7.c able to use variants
authorMax Gautier <mg@max.gautier.name>
Mon, 21 Mar 2022 12:12:05 +0000 (09:12 -0300)
committerAdhemerval Zanella <adhemerval.zanella@linaro.org>
Mon, 21 Mar 2022 16:23:56 +0000 (13:23 -0300)
Add infrastructure in utf-7.c to handle variants. The approach comes from
iso646.c
The variant is defined at gconv_init time and is passed as a
supplementary variable.

Signed-off-by: Max Gautier <mg@max.gautier.name>
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
iconvdata/utf-7.c

index 815b1891c7600e4e29ebe315ef3d3c1df7ac4b3d..393fd3583f28d117ec29848cc0c4632f216221f4 100644 (file)
 #include <gconv.h>
 #include <stdint.h>
 #include <stdlib.h>
+#include <string.h>
 
 
+enum variant
+{
+  UTF7,
+};
+
+/* Must be in the same order as enum variant above.  */
+static const char names[] =
+  "UTF-7//\0"
+  "\0";
+
+static uint32_t
+shift_character (enum variant const var)
+{
+  if (var == UTF7)
+    return '+';
+  else
+    abort ();
+}
 
 static bool
 between (uint32_t const ch,
@@ -42,30 +61,38 @@ between (uint32_t const ch,
 */
 
 static bool
-isdirect (uint32_t ch)
+isdirect (uint32_t ch, enum variant var)
 {
-  return (between (ch, 'A', 'Z')
-         || between (ch, 'a', 'z')
-         || between (ch, '0', '9')
-         || ch == '\'' || ch == '(' || ch == ')'
-         || between (ch, ',', '/')
-         || ch == ':' || ch == '?'
-         || ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r');
+  if (var == UTF7)
+    return (between (ch, 'A', 'Z')
+           || between (ch, 'a', 'z')
+           || between (ch, '0', '9')
+           || ch == '\'' || ch == '(' || ch == ')'
+           || between (ch, ',', '/')
+           || ch == ':' || ch == '?'
+           || ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r');
+  abort ();
 }
 
 
 /* The set of "direct and optional direct characters":
    A-Z a-z 0-9 ' ( ) , - . / : ? space tab lf cr
+   (UTF-7 only)
    ! " # $ % & * ; < = > @ [ ] ^ _ ` { | }
 */
 
 static bool
-isxdirect (uint32_t ch)
+isxdirect (uint32_t ch, enum variant var)
 {
-  return (ch == '\t'
-         || ch == '\n'
-         || ch == '\r'
-         || (between (ch, ' ', '}') && ch != '+' && ch != '\\'));
+  if (isdirect (ch, var))
+    return true;
+  if (var != UTF7)
+    return false;
+  return between (ch, '!', '&')
+    || ch == '*'
+    || between (ch, ';', '@')
+    || (between (ch, '[', '`') && ch != '\\')
+    || between (ch, '{', '}');
 }
 
 
@@ -85,7 +112,7 @@ needs_explicit_shift (uint32_t ch)
 
 /* Converts a value in the range 0..63 to a base64 encoded char.  */
 static unsigned char
-base64 (unsigned int i)
+base64 (unsigned int i, enum variant var)
 {
   if (i < 26)
     return i + 'A';
@@ -95,7 +122,7 @@ base64 (unsigned int i)
     return i - 52 + '0';
   else if (i == 62)
     return '+';
-  else if (i == 63)
+  else if (i == 63 && var == UTF7)
     return '/';
   else
     abort ();
@@ -103,9 +130,8 @@ base64 (unsigned int i)
 
 
 /* Definitions used in the body of the `gconv' function.  */
-#define CHARSET_NAME           "UTF-7//"
-#define DEFINE_INIT            1
-#define DEFINE_FINI            1
+#define DEFINE_INIT            0
+#define DEFINE_FINI            0
 #define FROM_LOOP              from_utf7_loop
 #define TO_LOOP                        to_utf7_loop
 #define MIN_NEEDED_FROM                1
@@ -113,11 +139,27 @@ base64 (unsigned int i)
 #define MIN_NEEDED_TO          4
 #define MAX_NEEDED_TO          4
 #define ONE_DIRECTION          0
+#define FROM_DIRECTION      (dir == from_utf7)
 #define PREPARE_LOOP \
   mbstate_t saved_state;                                                     \
-  mbstate_t *statep = data->__statep;
-#define EXTRA_LOOP_ARGS                , statep
+  mbstate_t *statep = data->__statep;                                        \
+  enum direction dir = ((struct utf7_data *) step->__data)->dir;             \
+  enum direction var = ((struct utf7_data *) step->__data)->var;
+#define EXTRA_LOOP_ARGS                , statep, var
+
 
+enum direction
+{
+  illegal_dir,
+  from_utf7,
+  to_utf7
+};
+
+struct utf7_data
+{
+  enum direction dir;
+  enum variant var;
+};
 
 /* Since we might have to reset input pointer we must be able to save
    and restore the state.  */
@@ -127,6 +169,70 @@ base64 (unsigned int i)
   else                                                                       \
     *statep = saved_state
 
+int
+gconv_init (struct __gconv_step *step)
+{
+  /* Determine which direction.  */
+  struct utf7_data *new_data;
+  enum direction dir = illegal_dir;
+
+  enum variant var = 0;
+  for (const char *name = names; *name != '\0';
+       name = __rawmemchr (name, '\0') + 1)
+    {
+      if (__strcasecmp (step->__from_name, name) == 0)
+       {
+         dir = from_utf7;
+         break;
+       }
+      else if (__strcasecmp (step->__to_name, name) == 0)
+       {
+         dir = to_utf7;
+         break;
+       }
+      ++var;
+    }
+
+  if (__glibc_likely (dir != illegal_dir))
+    {
+      new_data = malloc (sizeof (*new_data));
+      if (new_data == NULL)
+       return __GCONV_NOMEM;
+
+      new_data->dir = dir;
+      new_data->var = var;
+      step->__data = new_data;
+
+      if (dir == from_utf7)
+       {
+         step->__min_needed_from = MIN_NEEDED_FROM;
+         step->__max_needed_from = MAX_NEEDED_FROM;
+         step->__min_needed_to = MIN_NEEDED_TO;
+         step->__max_needed_to = MAX_NEEDED_TO;
+       }
+      else
+       {
+         step->__min_needed_from = MIN_NEEDED_TO;
+         step->__max_needed_from = MAX_NEEDED_TO;
+         step->__min_needed_to = MIN_NEEDED_FROM;
+         step->__max_needed_to = MAX_NEEDED_FROM;
+       }
+    }
+  else
+    return __GCONV_NOCONV;
+
+  step->__stateful = 1;
+
+  return __GCONV_OK;
+}
+
+void
+gconv_end (struct __gconv_step *data)
+{
+  free (data->__data);
+}
+
+
 
 /* First define the conversion function from UTF-7 to UCS4.
    The state is structured as follows:
@@ -154,13 +260,13 @@ base64 (unsigned int i)
     if ((statep->__count >> 3) == 0)                                         \
       {                                                                              \
        /* base64 encoding inactive.  */                                      \
-       if (isxdirect (ch))                                                   \
+       if (isxdirect (ch, var))                                              \
          {                                                                   \
            inptr++;                                                          \
            put32 (outptr, ch);                                               \
            outptr += 4;                                                      \
          }                                                                   \
-       else if (__glibc_likely (ch == '+'))                                  \
+       else if (__glibc_likely (ch == shift_character (var)))                \
          {                                                                   \
            if (__glibc_unlikely (inptr + 2 > inend))                         \
              {                                                               \
@@ -285,7 +391,7 @@ base64 (unsigned int i)
       }                                                                              \
   }
 #define LOOP_NEED_FLAGS
-#define EXTRA_LOOP_DECLS       , mbstate_t *statep
+#define EXTRA_LOOP_DECLS       , mbstate_t *statep, enum variant var
 #include <iconv/loop.c>
 
 
@@ -316,7 +422,7 @@ base64 (unsigned int i)
     if ((statep->__count & 0x18) == 0)                                       \
       {                                                                              \
        /* base64 encoding inactive */                                        \
-       if (isdirect (ch))                                                    \
+       if (isdirect (ch, var))                                               \
          {                                                                   \
            *outptr++ = (unsigned char) ch;                                   \
          }                                                                   \
@@ -324,7 +430,7 @@ base64 (unsigned int i)
          {                                                                   \
            size_t count;                                                     \
                                                                              \
-           if (ch == '+')                                                    \
+           if (ch == shift_character (var))                                  \
              count = 2;                                                      \
            else if (ch < 0x10000)                                            \
              count = 3;                                                      \
@@ -339,13 +445,13 @@ base64 (unsigned int i)
                break;                                                        \
              }                                                               \
                                                                              \
-           *outptr++ = '+';                                                  \
-           if (ch == '+')                                                    \
+           *outptr++ = shift_character (var);                                \
+           if (ch == shift_character (var))                                  \
              *outptr++ = '-';                                                \
            else if (ch < 0x10000)                                            \
              {                                                               \
-               *outptr++ = base64 (ch >> 10);                                \
-               *outptr++ = base64 ((ch >> 4) & 0x3f);                        \
+               *outptr++ = base64 (ch >> 10, var);                           \
+               *outptr++ = base64 ((ch >> 4) & 0x3f, var);                   \
                statep->__count = ((ch & 15) << 5) | (3 << 3);                \
              }                                                               \
            else if (ch < 0x110000)                                           \
@@ -354,11 +460,11 @@ base64 (unsigned int i)
                uint32_t ch2 = 0xdc00 + ((ch - 0x10000) & 0x3ff);             \
                                                                              \
                ch = (ch1 << 16) | ch2;                                       \
-               *outptr++ = base64 (ch >> 26);                                \
-               *outptr++ = base64 ((ch >> 20) & 0x3f);                       \
-               *outptr++ = base64 ((ch >> 14) & 0x3f);                       \
-               *outptr++ = base64 ((ch >> 8) & 0x3f);                        \
-               *outptr++ = base64 ((ch >> 2) & 0x3f);                        \
+               *outptr++ = base64 (ch >> 26, var);                           \
+               *outptr++ = base64 ((ch >> 20) & 0x3f, var);                  \
+               *outptr++ = base64 ((ch >> 14) & 0x3f, var);                  \
+               *outptr++ = base64 ((ch >> 8) & 0x3f, var);                   \
+               *outptr++ = base64 ((ch >> 2) & 0x3f, var);                   \
                statep->__count = ((ch & 3) << 7) | (2 << 3);                 \
              }                                                               \
            else                                                              \
@@ -368,7 +474,7 @@ base64 (unsigned int i)
     else                                                                     \
       {                                                                              \
        /* base64 encoding active */                                          \
-       if (isdirect (ch))                                                    \
+       if (isdirect (ch, var))                                               \
          {                                                                   \
            /* deactivate base64 encoding */                                  \
            size_t count;                                                     \
@@ -382,7 +488,7 @@ base64 (unsigned int i)
              }                                                               \
                                                                              \
            if ((statep->__count & 0x18) >= 0x10)                             \
-             *outptr++ = base64 ((statep->__count >> 3) & ~3);               \
+             *outptr++ = base64 ((statep->__count >> 3) & ~3, var);          \
            if (needs_explicit_shift (ch))                                    \
              *outptr++ = '-';                                                \
            *outptr++ = (unsigned char) ch;                                   \
@@ -410,22 +516,24 @@ base64 (unsigned int i)
                switch ((statep->__count >> 3) & 3)                           \
                  {                                                           \
                  case 1:                                                     \
-                   *outptr++ = base64 (ch >> 10);                            \
-                   *outptr++ = base64 ((ch >> 4) & 0x3f);                    \
+                   *outptr++ = base64 (ch >> 10, var);                       \
+                   *outptr++ = base64 ((ch >> 4) & 0x3f, var);               \
                    statep->__count = ((ch & 15) << 5) | (3 << 3);            \
                    break;                                                    \
                  case 2:                                                     \
                    *outptr++ =                                               \
-                     base64 (((statep->__count >> 3) & ~3) | (ch >> 12));    \
-                   *outptr++ = base64 ((ch >> 6) & 0x3f);                    \
-                   *outptr++ = base64 (ch & 0x3f);                           \
+                     base64 (((statep->__count >> 3) & ~3) | (ch >> 12),     \
+                             var);                                           \
+                   *outptr++ = base64 ((ch >> 6) & 0x3f, var);               \
+                   *outptr++ = base64 (ch & 0x3f, var);                      \
                    statep->__count = (1 << 3);                               \
                    break;                                                    \
                  case 3:                                                     \
                    *outptr++ =                                               \
-                     base64 (((statep->__count >> 3) & ~3) | (ch >> 14));    \
-                   *outptr++ = base64 ((ch >> 8) & 0x3f);                    \
-                   *outptr++ = base64 ((ch >> 2) & 0x3f);                    \
+                     base64 (((statep->__count >> 3) & ~3) | (ch >> 14),     \
+                             var);                                           \
+                   *outptr++ = base64 ((ch >> 8) & 0x3f, var);               \
+                   *outptr++ = base64 ((ch >> 2) & 0x3f, var);               \
                    statep->__count = ((ch & 3) << 7) | (2 << 3);             \
                    break;                                                    \
                  default:                                                    \
@@ -441,30 +549,32 @@ base64 (unsigned int i)
                switch ((statep->__count >> 3) & 3)                           \
                  {                                                           \
                  case 1:                                                     \
-                   *outptr++ = base64 (ch >> 26);                            \
-                   *outptr++ = base64 ((ch >> 20) & 0x3f);                   \
-                   *outptr++ = base64 ((ch >> 14) & 0x3f);                   \
-                   *outptr++ = base64 ((ch >> 8) & 0x3f);                    \
-                   *outptr++ = base64 ((ch >> 2) & 0x3f);                    \
+                   *outptr++ = base64 (ch >> 26, var);                       \
+                   *outptr++ = base64 ((ch >> 20) & 0x3f, var);              \
+                   *outptr++ = base64 ((ch >> 14) & 0x3f, var);              \
+                   *outptr++ = base64 ((ch >> 8) & 0x3f, var);               \
+                   *outptr++ = base64 ((ch >> 2) & 0x3f, var);               \
                    statep->__count = ((ch & 3) << 7) | (2 << 3);             \
                    break;                                                    \
                  case 2:                                                     \
                    *outptr++ =                                               \
-                     base64 (((statep->__count >> 3) & ~3) | (ch >> 28));    \
-                   *outptr++ = base64 ((ch >> 22) & 0x3f);                   \
-                   *outptr++ = base64 ((ch >> 16) & 0x3f);                   \
-                   *outptr++ = base64 ((ch >> 10) & 0x3f);                   \
-                   *outptr++ = base64 ((ch >> 4) & 0x3f);                    \
+                     base64 (((statep->__count >> 3) & ~3) | (ch >> 28),     \
+                             var);                                           \
+                   *outptr++ = base64 ((ch >> 22) & 0x3f, var);              \
+                   *outptr++ = base64 ((ch >> 16) & 0x3f, var);              \
+                   *outptr++ = base64 ((ch >> 10) & 0x3f, var);              \
+                   *outptr++ = base64 ((ch >> 4) & 0x3f, var);               \
                    statep->__count = ((ch & 15) << 5) | (3 << 3);            \
                    break;                                                    \
                  case 3:                                                     \
                    *outptr++ =                                               \
-                     base64 (((statep->__count >> 3) & ~3) | (ch >> 30));    \
-                   *outptr++ = base64 ((ch >> 24) & 0x3f);                   \
-                   *outptr++ = base64 ((ch >> 18) & 0x3f);                   \
-                   *outptr++ = base64 ((ch >> 12) & 0x3f);                   \
-                   *outptr++ = base64 ((ch >> 6) & 0x3f);                    \
-                   *outptr++ = base64 (ch & 0x3f);                           \
+                     base64 (((statep->__count >> 3) & ~3) | (ch >> 30),     \
+                             var);                                           \
+                   *outptr++ = base64 ((ch >> 24) & 0x3f, var);              \
+                   *outptr++ = base64 ((ch >> 18) & 0x3f, var);              \
+                   *outptr++ = base64 ((ch >> 12) & 0x3f, var);              \
+                   *outptr++ = base64 ((ch >> 6) & 0x3f, var);               \
+                   *outptr++ = base64 (ch & 0x3f, var);                      \
                    statep->__count = (1 << 3);                               \
                    break;                                                    \
                  default:                                                    \
@@ -480,7 +590,7 @@ base64 (unsigned int i)
     inptr += 4;                                                                      \
   }
 #define LOOP_NEED_FLAGS
-#define EXTRA_LOOP_DECLS       , mbstate_t *statep
+#define EXTRA_LOOP_DECLS       , mbstate_t *statep, enum variant var
 #include <iconv/loop.c>
 
 
@@ -510,7 +620,7 @@ base64 (unsigned int i)
            {                                                                 \
              /* Write out the shift sequence.  */                            \
              if ((state & 0x18) >= 0x10)                                     \
-               *outbuf++ = base64 ((state >> 3) & ~3);                       \
+               *outbuf++ = base64 ((state >> 3) & ~3, var);                  \
              *outbuf++ = '-';                                                \
                                                                              \
              data->__statep->__count = 0;                                    \