]> git.ipfire.org Git - thirdparty/samba.git/commitdiff
changed the iconv interface to go via ucs2 for all conversions. This
authorAndrew Tridgell <tridge@samba.org>
Sun, 22 Jul 2001 07:38:32 +0000 (07:38 +0000)
committerAndrew Tridgell <tridge@samba.org>
Sun, 22 Jul 2001 07:38:32 +0000 (07:38 +0000)
fixes some problems wih some character sets and allows for using
internal charsets in conjunction with ionv charsets
this makes us slower but more correct. speed will come later.

source/include/includes.h
source/include/smb.h
source/lib/charcnv.c
source/lib/iconv.c

index 3d6147f587f1c7ecc5e7b8d9e2e214b4b38a1835..51f2ca9ee852d5cc59e448c5239b110e1373ad28 100644 (file)
@@ -1029,6 +1029,11 @@ extern int DEBUGLEVEL;
 #define RTLD_NOW 0
 #endif
 
+/* needed for some systems without iconv */
+#ifndef EILSEQ
+#define EILSEQ EINVAL
+#endif
+
 /* add varargs prototypes with printf checking */
 int fdprintf(int , char *, ...) PRINTF_ATTRIBUTE(2,3);
 #ifndef HAVE_SNPRINTF_DECL
index 091ed2ac6639e9b85f33781873fa63359fcf0b13..2e83e1c6375765d63b6e1d6a1e4d19a559e0aa43 100644 (file)
@@ -1687,15 +1687,13 @@ typedef struct {
 
 /* generic iconv conversion structure */
 typedef struct {
-       size_t (*direct)(char **inbuf, size_t *inbytesleft,
+       size_t (*direct)(void *cd, char **inbuf, size_t *inbytesleft,
                         char **outbuf, size_t *outbytesleft);
-       size_t (*pull)(char **inbuf, size_t *inbytesleft,
+       size_t (*pull)(void *cd, char **inbuf, size_t *inbytesleft,
                       char **outbuf, size_t *outbytesleft);
-       size_t (*push)(char **inbuf, size_t *inbytesleft,
+       size_t (*push)(void *cd, char **inbuf, size_t *inbytesleft,
                       char **outbuf, size_t *outbytesleft);
-#ifdef HAVE_NATIVE_ICONV
-       iconv_t cd;
-#endif
+       void *cd_direct, *cd_pull, *cd_push;
 } *smb_iconv_t;
 
 #endif /* _SMB_H */
index 6c7646673d8964cad5bdc4459813e88c6c441d8d..49c4ba4063e4d3481c3b7037faba5be2ae5ee2bd 100644 (file)
@@ -97,9 +97,7 @@ static size_t convert_string(smb_iconv_t *descriptor,
                               DEBUG(0, ("Required %d, available %d\n",
                               srclen, destlen));       
                               break;
-#ifdef EILSEQ
                  case EILSEQ: reason="Illegal myltybyte sequence"; break;
-#endif
                }
                DEBUG(0,("Conversion error:%s(%s)\n",reason,inbuf));
                /* smb_panic(reason); */
index b73ff6ff395fdf71ecd72b0cf3cc6f625ef06da8..2285d8debfef7ce25a843f716887e3b938ffc102 100644 (file)
 
 #include "includes.h"
 
-static size_t ascii_pull(char **, size_t *, char **, size_t *);
-static size_t ascii_push(char **, size_t *, char **, size_t *);
-static size_t  utf8_pull(char **, size_t *, char **, size_t *);
-static size_t  utf8_push(char **, size_t *, char **, size_t *);
-static size_t weird_pull(char **, size_t *, char **, size_t *);
-static size_t weird_push(char **, size_t *, char **, size_t *);
-static size_t iconv_copy(char **, size_t *, char **, size_t *);
+static size_t ascii_pull(void *,char **, size_t *, char **, size_t *);
+static size_t ascii_push(void *,char **, size_t *, char **, size_t *);
+static size_t  utf8_pull(void *,char **, size_t *, char **, size_t *);
+static size_t  utf8_push(void *,char **, size_t *, char **, size_t *);
+static size_t weird_pull(void *,char **, size_t *, char **, size_t *);
+static size_t weird_push(void *,char **, size_t *, char **, size_t *);
+static size_t ucs2hex_pull(void *,char **, size_t *, char **, size_t *);
+static size_t ucs2hex_push(void *,char **, size_t *, char **, size_t *);
+static size_t iconv_copy(void *,char **, size_t *, char **, size_t *);
 
 /*
   for each charset we have a function that pulls from that charset to 
@@ -35,18 +37,39 @@ static size_t iconv_copy(char **, size_t *, char **, size_t *);
 */
 static struct {
        char *name;
-       size_t (*pull)(char **inbuf, size_t *inbytesleft,
+       size_t (*pull)(void *, char **inbuf, size_t *inbytesleft,
                       char **outbuf, size_t *outbytesleft);
-       size_t (*push)(char **inbuf, size_t *inbytesleft,
+       size_t (*push)(void *, char **inbuf, size_t *inbytesleft,
                       char **outbuf, size_t *outbytesleft);
 } charsets[] = {
        {"UCS-2LE",  iconv_copy, iconv_copy},
        {"UTF8",   utf8_pull,  utf8_push},
        {"ASCII", ascii_pull, ascii_push},
        {"WEIRD", weird_pull, weird_push},
+       {"UCS2-HEX", ucs2hex_pull, ucs2hex_push},
        {NULL, NULL, NULL}
 };
 
+
+/* if there was an error then reset the internal state,
+   this ensures that we don't have a shift state remaining for
+   character sets like SJIS */
+static size_t sys_iconv(void *cd, 
+                       char **inbuf, size_t *inbytesleft,
+                       char **outbuf, size_t *outbytesleft)
+{
+#ifdef HAVE_NATIVE_ICONV
+       size_t ret = iconv((iconv_t)cd, 
+                          inbuf, inbytesleft, 
+                          outbuf, outbytesleft);
+       if (ret == (size_t)-1) iconv(cd, NULL, NULL, NULL, NULL);
+       return ret;
+#else
+       errno = EINVAL;
+       return -1;
+#endif
+}
+
 /*
   this is a simple portable iconv() implementaion. It only knows about
   a very small number of character sets - just enough that Samba works
@@ -60,36 +83,28 @@ size_t smb_iconv(smb_iconv_t cd,
        char *bufp = cvtbuf;
        size_t bufsize;
 
-#ifdef HAVE_NATIVE_ICONV
-       if (cd->cd) {
-               size_t ret;
-               ret = iconv(cd->cd, inbuf, inbytesleft, outbuf, outbytesleft);
-
-               /* if there was an error then reset the internal state,
-                  this ensures that we don't have a shift state remaining for
-                  character sets like SJIS */
-               if (ret == (size_t)-1) {
-                       iconv(cd->cd, NULL, NULL, NULL, NULL);
-               }
-               return ret;
-       }
-#endif
-
-       /* in most cases we can go direct */
+       /* in many cases we can go direct */
        if (cd->direct) {
-               return cd->direct(inbuf, inbytesleft, outbuf, outbytesleft);
+               return cd->direct(cd->cd_direct, 
+                                 inbuf, inbytesleft, outbuf, outbytesleft);
        }
 
+
        /* otherwise we have to do it chunks at a time */
        while (*inbytesleft > 0) {
                bufp = cvtbuf;
                bufsize = sizeof(cvtbuf);
-               if (cd->pull(inbuf, inbytesleft, &bufp, &bufsize) == -1 &&
-                   errno != E2BIG) return -1;
+               
+               if (cd->pull(cd->cd_pull, 
+                            inbuf, inbytesleft, &bufp, &bufsize) == -1
+                   && errno != E2BIG) return -1;
 
                bufp = cvtbuf;
                bufsize = sizeof(cvtbuf) - bufsize;
-               if (cd->push(&bufp, &bufsize, outbuf, outbytesleft) == -1) return -1;
+
+               if (cd->push(cd->cd_push, 
+                            &bufp, &bufsize, 
+                            outbuf, outbytesleft) == -1) return -1;
        }
 
        return 0;
@@ -102,9 +117,19 @@ smb_iconv_t smb_iconv_open(const char *tocode, const char *fromcode)
 {
        smb_iconv_t ret;
        int from, to;
-#ifdef HAVE_NATIVE_ICONV
-       iconv_t cd = NULL;
-#endif
+
+       ret = (smb_iconv_t)malloc(sizeof(*ret));
+       if (!ret) {
+               errno = ENOMEM;
+               return (smb_iconv_t)-1;
+       }
+       memset(ret, 0, sizeof(*ret));
+
+       /* check for the simplest null conversion */
+       if (strcmp(fromcode, tocode) == 0) {
+               ret->direct = iconv_copy;
+               return ret;
+       }
 
        for (from=0; charsets[from].name; from++) {
                if (strcasecmp(charsets[from].name, fromcode) == 0) break;
@@ -113,53 +138,57 @@ smb_iconv_t smb_iconv_open(const char *tocode, const char *fromcode)
                if (strcasecmp(charsets[to].name, tocode) == 0) break;
        }
 
-       if (!charsets[from].name || !charsets[to].name) {
 #ifdef HAVE_NATIVE_ICONV
-               /* its not builtin - see if iconv() has it */
-               cd = iconv_open(tocode, fromcode);
-               if (!cd)
-#endif
-               {
-                       errno = EINVAL;
-                       return (smb_iconv_t)-1;
-               }
+       if (!charsets[from].name) {
+               ret->pull = sys_iconv;
+               ret->cd_pull = iconv_open("UCS-2LE", fromcode);
+               if (!ret->cd_pull) goto failed;
        }
-
-       ret = (smb_iconv_t)malloc(sizeof(*ret));
-       if (!ret) {
-               errno = ENOMEM;
-               return (smb_iconv_t)-1;
+       if (!charsets[to].name) {
+               ret->push = sys_iconv;
+               ret->cd_push = iconv_open(tocode, "UCS-2LE");
+               if (!ret->cd_push) goto failed;
        }
-       memset(ret, 0, sizeof(*ret));
-
-#ifdef HAVE_NATIVE_ICONV
-       /* see if we will be using the native iconv */
-       if (cd) {
-               ret->cd = cd;
-               return ret;
+#else
+       if (!charsets[from].name || !charsets[to].name) {
+               goto failed;
        }
 #endif
 
-       /* check for the simplest null conversion */
-       if (from == to) {
-               ret->direct = iconv_copy;
+       /* check for conversion to/from ucs2 */
+       if (from == 0 && charsets[to].name) {
+               ret->direct = charsets[to].push;
+               return ret;
+       }
+       if (to == 0 && charsets[from].name) {
+               ret->direct = charsets[from].pull;
                return ret;
        }
 
-       /* check for conversion to/from ucs2 */
+#ifdef HAVE_NATIVE_ICONV
        if (from == 0) {
-               ret->direct = charsets[to].push;
+               ret->direct = sys_iconv;
+               ret->cd_direct = ret->cd_push;
+               ret->cd_push = NULL;
                return ret;
        }
        if (to == 0) {
-               ret->direct = charsets[from].pull;
+               ret->direct = sys_iconv;
+               ret->cd_direct = ret->cd_pull;
+               ret->cd_pull = NULL;
                return ret;
        }
+#endif
 
        /* the general case has to go via a buffer */
-       ret->pull = charsets[from].pull;
-       ret->push = charsets[to].push;
+       if (!ret->pull) ret->pull = charsets[from].pull;
+       if (!ret->push) ret->push = charsets[to].push;
        return ret;
+
+failed:
+       free(ret);
+       errno = EINVAL;
+       return (smb_iconv_t)-1;
 }
 
 /*
@@ -168,10 +197,11 @@ smb_iconv_t smb_iconv_open(const char *tocode, const char *fromcode)
 int smb_iconv_close (smb_iconv_t cd)
 {
 #ifdef HAVE_NATIVE_ICONV
-       if (cd->cd) {
-               iconv_close(cd->cd);
-       }
+       if (cd->cd_direct) iconv_close((iconv_t)cd->cd_direct);
+       if (cd->cd_pull) iconv_close((iconv_t)cd->cd_pull);
+       if (cd->cd_push) iconv_close((iconv_t)cd->cd_push);
 #endif
+
        memset(cd, 0, sizeof(*cd));
        free(cd);
        return 0;
@@ -184,7 +214,7 @@ int smb_iconv_close (smb_iconv_t cd)
  multi-byte character set support for english users
 ***********************************************************************/
 
-static size_t ascii_pull(char **inbuf, size_t *inbytesleft,
+static size_t ascii_pull(void *cd, char **inbuf, size_t *inbytesleft,
                         char **outbuf, size_t *outbytesleft)
 {
        while (*inbytesleft >= 1 && *outbytesleft >= 2) {
@@ -204,7 +234,7 @@ static size_t ascii_pull(char **inbuf, size_t *inbytesleft,
        return 0;
 }
 
-static size_t ascii_push(char **inbuf, size_t *inbytesleft,
+static size_t ascii_push(void *cd, char **inbuf, size_t *inbytesleft,
                         char **outbuf, size_t *outbytesleft)
 {
        int ir_count=0;
@@ -232,6 +262,91 @@ static size_t ascii_push(char **inbuf, size_t *inbytesleft,
 }
 
 
+static size_t ucs2hex_pull(void *cd, char **inbuf, size_t *inbytesleft,
+                        char **outbuf, size_t *outbytesleft)
+{
+       while (*inbytesleft >= 1 && *outbytesleft >= 2) {
+               unsigned v;
+
+               if ((*inbuf)[0] != '@') {
+                       /* seven bit ascii case */
+                       (*outbuf)[0] = (*inbuf)[0];
+                       (*outbuf)[1] = 0;
+                       (*inbytesleft)  -= 1;
+                       (*outbytesleft) -= 2;
+                       (*inbuf)  += 1;
+                       (*outbuf) += 2;
+                       continue;
+               }
+               /* it's a hex character */
+               if (*inbytesleft < 5) {
+                       errno = EINVAL;
+                       return -1;
+               }
+               
+               if (sscanf(&(*inbuf)[1], "%04x", &v) != 1) {
+                       errno = EILSEQ;
+                       return -1;
+               }
+
+               (*outbuf)[0] = v&0xff;
+               (*outbuf)[1] = v>>8;
+               (*inbytesleft)  -= 5;
+               (*outbytesleft) -= 2;
+               (*inbuf)  += 5;
+               (*outbuf) += 2;
+       }
+
+       if (*inbytesleft > 0) {
+               errno = E2BIG;
+               return -1;
+       }
+       
+       return 0;
+}
+
+static size_t ucs2hex_push(void *cd, char **inbuf, size_t *inbytesleft,
+                          char **outbuf, size_t *outbytesleft)
+{
+       while (*inbytesleft >= 2 && *outbytesleft >= 1) {
+               char buf[6];
+
+               if ((*inbuf)[1] == 0 && 
+                   ((*inbuf)[0] & 0x80) == 0 &&
+                   (*inbuf)[0] != '@') {
+                       (*outbuf)[0] = (*inbuf)[0];
+                       (*inbytesleft)  -= 2;
+                       (*outbytesleft) -= 1;
+                       (*inbuf)  += 2;
+                       (*outbuf) += 1;
+                       continue;
+               }
+               if (*outbytesleft < 5) {
+                       errno = E2BIG;
+                       return -1;
+               }
+               snprintf(buf, 6, "@%04x", SVAL(*inbuf, 0));
+               memcpy(*outbuf, buf, 5);
+               (*inbytesleft)  -= 2;
+               (*outbytesleft) -= 5;
+               (*inbuf)  += 2;
+               (*outbuf) += 5;
+       }
+
+       if (*inbytesleft == 1) {
+               errno = EINVAL;
+               return -1;
+       }
+
+       if (*inbytesleft > 1) {
+               errno = E2BIG;
+               return -1;
+       }
+       
+       return 0;
+}
+
+
 /* the "weird" character set is very useful for testing multi-byte
    support and finding bugs. Don't use on a production system! 
 */
@@ -245,7 +360,7 @@ static struct {
        {0, NULL}
 };
 
-static size_t weird_pull(char **inbuf, size_t *inbytesleft,
+static size_t weird_pull(void *cd, char **inbuf, size_t *inbytesleft,
                         char **outbuf, size_t *outbytesleft)
 {
        while (*inbytesleft >= 1 && *outbytesleft >= 2) {
@@ -288,7 +403,7 @@ static size_t weird_pull(char **inbuf, size_t *inbytesleft,
        return 0;
 }
 
-static size_t weird_push(char **inbuf, size_t *inbytesleft,
+static size_t weird_push(void *cd, char **inbuf, size_t *inbytesleft,
                         char **outbuf, size_t *outbytesleft)
 {
        int ir_count=0;
@@ -337,7 +452,7 @@ static size_t weird_push(char **inbuf, size_t *inbytesleft,
        return ir_count;
 }
 
-static size_t iconv_copy(char **inbuf, size_t *inbytesleft,
+static size_t iconv_copy(void *cd, char **inbuf, size_t *inbytesleft,
                         char **outbuf, size_t *outbytesleft)
 {
        int n;
@@ -359,7 +474,7 @@ static size_t iconv_copy(char **inbuf, size_t *inbytesleft,
        return 0;
 }
 
-static size_t utf8_pull(char **inbuf, size_t *inbytesleft,
+static size_t utf8_pull(void *cd, char **inbuf, size_t *inbytesleft,
                         char **outbuf, size_t *outbytesleft)
 {
        while (*inbytesleft >= 1 && *outbytesleft >= 2) {
@@ -406,7 +521,7 @@ badseq:
        return -1;
 }
 
-static size_t utf8_push(char **inbuf, size_t *inbytesleft,
+static size_t utf8_push(void *cd, char **inbuf, size_t *inbytesleft,
                         char **outbuf, size_t *outbytesleft)
 {
        while (*inbytesleft >= 2 && *outbytesleft >= 1) {