# Checks for libraries.
AC_CHECK_LIB(socket,socket)
AC_CHECK_LIB(nsl,gethostbyname)
+AC_CHECK_LIB(iconv,iconv_open)
# Checks for header files.
AC_HEADER_STDC
AC_CHECK_HEADERS([arpa/inet.h fcntl.h netinet/in.h stddef.h stdlib.h string.h])
AC_CHECK_HEADERS([sys/socket.h syslog.h unistd.h time.h])
+AC_CHECK_HEADER([iconv.h])
# Checks for typedefs, structures, and compiler characteristics.
AC_C_CONST
#include <sys/stat.h>
#include <sys/wait.h>
#include <ctype.h>
+#include <iconv.h>
#include "mlmmj.h"
#include "unistr.h"
#include "log_error.h"
#include "memory.h"
+/* This is allocated on the stack, so it can't be too big. */
+#define ICONV_BUFFER_SIZE 160
+
unistr *unistr_new(void)
{
}
+void unistr_append_iconv(unistr *str, char *binary, size_t bin_len,
+ const char * charset)
+{
+ char bytes[ICONV_BUFFER_SIZE];
+ char * buffer;
+ size_t bufferleft;
+ iconv_t cd;
+
+ cd = iconv_open("UTF-8", charset);
+ if (cd == (iconv_t)-1) {
+ unistr_append_usascii(str, "???", 3);
+ return;
+ }
+
+ while (bin_len > 0) {
+ buffer = bytes;
+ bufferleft = ICONV_BUFFER_SIZE;
+ if (iconv(cd, &binary, &bin_len, &buffer, &bufferleft) == (size_t)-1) {
+ if (errno == EILSEQ) {
+ /* illegal sequence; try to recover */
+ unistr_append_utf8(str, bytes, ICONV_BUFFER_SIZE - bufferleft);
+ unistr_append_usascii(str, "?", 1);
+ bin_len--;
+ binary++;
+ continue;
+ } else if (errno == EINVAL) {
+ /* incomplete sequence; we're done */
+ unistr_append_usascii(str, "?", 1);
+ break;
+ } else if (errno != E2BIG) {
+ /* some other error; abort */
+ unistr_append_usascii(str, "???", 1);
+ break;
+ }
+ }
+ /* success or buffer full */
+ unistr_append_utf8(str, bytes, ICONV_BUFFER_SIZE - bufferleft);
+ }
+ iconv_close(cd);
+}
+
+
void unistr_dump(const unistr *str)
{
unsigned int i;
} else if (strcasecmp(charset, "iso-8859-1") == 0) {
unistr_append_iso88591(ret, binary, bin_len);
} else {
- /* unknown charset */
- unistr_append_usascii(ret, "???", 3);
+ unistr_append_iconv(ret, binary, bin_len, charset);
}
myfree(my_word);