]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
Move string utilities to a separate module.
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Fri, 17 Jul 2015 14:39:46 +0000 (15:39 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Fri, 17 Jul 2015 14:39:46 +0000 (15:39 +0100)
src/libutil/CMakeLists.txt
src/libutil/str_util.c [new file with mode: 0644]
src/libutil/str_util.h [new file with mode: 0644]
src/libutil/util.c
src/libutil/util.h

index 29c3b24293bc6b91f66374445295545684e1b8a0..61e5d6d1590a3be3562f7625e51c2eb57c9b0023 100644 (file)
@@ -19,6 +19,7 @@ SET(LIBRSPAMDUTILSRC
                                                                ${CMAKE_CURRENT_SOURCE_DIR}/rrd.c
                                                                ${CMAKE_CURRENT_SOURCE_DIR}/shingles.c
                                                                ${CMAKE_CURRENT_SOURCE_DIR}/sqlite_utils.c
+                                                               ${CMAKE_CURRENT_SOURCE_DIR}/str_util.c
                                                                ${CMAKE_CURRENT_SOURCE_DIR}/upstream.c
                                                                ${CMAKE_CURRENT_SOURCE_DIR}/util.c)
 # Rspamdutil
diff --git a/src/libutil/str_util.c b/src/libutil/str_util.c
new file mode 100644 (file)
index 0000000..58105be
--- /dev/null
@@ -0,0 +1,709 @@
+/*
+ * Copyright (c) 2015, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *      * Redistributions of source code must retain the above copyright
+ *        notice, this list of conditions and the following disclaimer.
+ *      * Redistributions in binary form must reproduce the above copyright
+ *        notice, this list of conditions and the following disclaimer in the
+ *        documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include "config.h"
+#include "util.h"
+#include "mem_pool.h"
+#include "xxhash.h"
+
+static const guchar lc_map[256] = {
+               0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+               0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+               0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+               0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+               0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
+               0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
+               0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
+               0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
+               0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+               0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
+               0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+               0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
+               0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+               0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
+               0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+               0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
+               0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+               0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+               0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+               0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
+               0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
+               0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
+               0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
+               0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
+               0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
+               0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
+               0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
+               0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
+               0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
+               0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
+               0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
+               0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff
+};
+
+void
+rspamd_str_lc (gchar *str, guint size)
+{
+       guint leftover = size % 4;
+       guint fp, i;
+       const uint8_t* s = (const uint8_t*) str;
+       gchar *dest = str;
+       guchar c1, c2, c3, c4;
+
+       fp = size - leftover;
+
+       for (i = 0; i != fp; i += 4) {
+               c1 = s[i], c2 = s[i + 1], c3 = s[i + 2], c4 = s[i + 3];
+               dest[0] = lc_map[c1];
+               dest[1] = lc_map[c2];
+               dest[2] = lc_map[c3];
+               dest[3] = lc_map[c4];
+               dest += 4;
+       }
+
+       switch (leftover) {
+       case 3:
+               *dest++ = lc_map[(guchar)str[i++]];
+       case 2:
+               *dest++ = lc_map[(guchar)str[i++]];
+       case 1:
+               *dest++ = lc_map[(guchar)str[i]];
+       }
+
+}
+
+/*
+ * The purpose of this function is fast and in place conversion of a unicode
+ * string to lower case, so some locale peculiarities are simply ignored
+ * If the target string is longer than initial one, then we just trim it
+ */
+void
+rspamd_str_lc_utf8 (gchar *str, guint size)
+{
+       const gchar *s = str, *p;
+       gchar *d = str, tst[6];
+       gint remain = size;
+       gint r;
+       gunichar uc;
+
+       while (remain > 0) {
+               uc = g_utf8_get_char (s);
+               uc = g_unichar_tolower (uc);
+               p = g_utf8_next_char (s);
+
+               if (p - s > remain) {
+                       break;
+               }
+
+               if (remain >= 6) {
+                       r = g_unichar_to_utf8 (uc, d);
+               }
+               else {
+                       /* We must be cautious here to avoid broken unicode being append */
+                       r = g_unichar_to_utf8 (uc, tst);
+                       if (r > remain) {
+                               break;
+                       }
+                       else {
+                               memcpy (d, tst, r);
+                       }
+               }
+               remain -= r;
+               s = p;
+               d += r;
+       }
+}
+
+gboolean
+rspamd_strcase_equal (gconstpointer v, gconstpointer v2)
+{
+       if (g_ascii_strcasecmp ((const gchar *)v, (const gchar *)v2) == 0) {
+               return TRUE;
+       }
+
+       return FALSE;
+}
+
+static guint
+rspamd_icase_hash (const gchar *in, gsize len)
+{
+       guint leftover = len % 4;
+       guint fp, i;
+       const uint8_t* s = (const uint8_t*) in;
+       union {
+               struct {
+                       guchar c1, c2, c3, c4;
+               } c;
+               guint32 pp;
+       } u;
+       XXH64_state_t st;
+
+       fp = len - leftover;
+       XXH64_reset (&st, rspamd_hash_seed ());
+
+       for (i = 0; i != fp; i += 4) {
+               u.c.c1 = s[i], u.c.c2 = s[i + 1], u.c.c3 = s[i + 2], u.c.c4 = s[i + 3];
+               u.c.c1 = lc_map[u.c.c1];
+               u.c.c2 = lc_map[u.c.c2];
+               u.c.c3 = lc_map[u.c.c3];
+               u.c.c4 = lc_map[u.c.c4];
+               XXH64_update (&st, &u.pp, sizeof (u));
+       }
+
+       u.pp = 0;
+       switch (leftover) {
+       case 3:
+               u.c.c3 = lc_map[(guchar)s[i++]];
+       case 2:
+               u.c.c2 = lc_map[(guchar)s[i++]];
+       case 1:
+               u.c.c1 = lc_map[(guchar)s[i]];
+               XXH64_update (&st, &u.pp, leftover);
+               break;
+       }
+
+       return XXH64_digest (&st);
+}
+
+guint
+rspamd_strcase_hash (gconstpointer key)
+{
+       const gchar *p = key;
+       gsize len;
+
+       len = strlen (p);
+
+       return rspamd_icase_hash (p, len);
+}
+
+guint
+rspamd_str_hash (gconstpointer key)
+{
+       gsize len;
+
+       len = strlen ((const gchar *)key);
+
+       return XXH64 (key, len, rspamd_hash_seed ());
+}
+
+gboolean
+rspamd_str_equal (gconstpointer v, gconstpointer v2)
+{
+       return strcmp ((const gchar *)v, (const gchar *)v2) == 0;
+}
+
+gboolean
+rspamd_fstring_icase_equal (gconstpointer v, gconstpointer v2)
+{
+       const rspamd_fstring_t *f1 = v, *f2 = v2;
+       if (f1->len == f2->len &&
+               g_ascii_strncasecmp (f1->begin, f2->begin, f1->len) == 0) {
+               return TRUE;
+       }
+
+       return FALSE;
+}
+
+
+guint
+rspamd_fstring_icase_hash (gconstpointer key)
+{
+       const rspamd_fstring_t *f = key;
+
+       return rspamd_icase_hash (f->begin, f->len);
+}
+
+gboolean
+rspamd_gstring_icase_equal (gconstpointer v, gconstpointer v2)
+{
+       const GString *f1 = v, *f2 = v2;
+       if (f1->len == f2->len &&
+               g_ascii_strncasecmp (f1->str, f2->str, f1->len) == 0) {
+               return TRUE;
+       }
+
+       return FALSE;
+}
+
+
+guint
+rspamd_gstring_icase_hash (gconstpointer key)
+{
+       const GString *f = key;
+
+       return rspamd_icase_hash (f->str, f->len);
+}
+
+gsize
+rspamd_strlcpy (gchar *dst, const gchar *src, gsize siz)
+{
+       gchar *d = dst;
+       const gchar *s = src;
+       gsize n = siz;
+
+       /* Copy as many bytes as will fit */
+       if (n != 0) {
+               while (--n != 0) {
+                       if ((*d++ = *s++) == '\0') {
+                               break;
+                       }
+               }
+       }
+
+       if (n == 0 && siz != 0) {
+               *d = '\0';
+       }
+
+       return (s - src - 1);    /* count does not include NUL */
+}
+
+gsize
+rspamd_strlcpy_tolower (gchar *dst, const gchar *src, gsize siz)
+{
+       gchar *d = dst;
+       const gchar *s = src;
+       gsize n = siz;
+
+       /* Copy as many bytes as will fit */
+       if (n != 0) {
+               while (--n != 0) {
+                       if ((*d++ = g_ascii_tolower (*s++)) == '\0') {
+                               break;
+                       }
+               }
+       }
+
+       if (n == 0 && siz != 0) {
+               *d = '\0';
+       }
+
+       return (s - src - 1);    /* count does not include NUL */
+}
+
+
+/*
+ * Find the first occurrence of find in s, ignore case.
+ */
+gchar *
+rspamd_strncasestr (const gchar *s, const gchar *find, gint len)
+{
+       gchar c, sc;
+       gsize mlen;
+
+       if ((c = *find++) != 0) {
+               c = g_ascii_tolower (c);
+               mlen = strlen (find);
+               do {
+                       do {
+                               if ((sc = *s++) == 0 || len-- == 0)
+                                       return (NULL);
+                       } while (g_ascii_tolower (sc) != c);
+               } while (g_ascii_strncasecmp (s, find, mlen) != 0);
+               s--;
+       }
+       return ((gchar *)s);
+}
+
+/*
+ * Try to convert string of length to long
+ */
+gboolean
+rspamd_strtol (const gchar *s, gsize len, glong *value)
+{
+       const gchar *p = s, *end = s + len;
+       gchar c;
+       glong v = 0;
+       const glong cutoff = G_MAXLONG / 10, cutlim = G_MAXLONG % 10;
+       gboolean neg;
+
+       /* Case negative values */
+       if (*p == '-') {
+               neg = TRUE;
+               p++;
+       }
+       else {
+               neg = FALSE;
+       }
+       /* Some preparations for range errors */
+
+       while (p < end) {
+               c = *p;
+               if (c >= '0' && c <= '9') {
+                       c -= '0';
+                       if (v > cutoff || (v == cutoff && c > cutlim)) {
+                               /* Range error */
+                               *value = neg ? G_MINLONG : G_MAXLONG;
+                               return FALSE;
+                       }
+                       else {
+                               v *= 10;
+                               v += c;
+                       }
+               }
+               else {
+                       return FALSE;
+               }
+               p++;
+       }
+
+       *value = neg ? -(v) : v;
+       return TRUE;
+}
+
+/*
+ * Try to convert string of length to long
+ */
+gboolean
+rspamd_strtoul (const gchar *s, gsize len, gulong *value)
+{
+       const gchar *p = s, *end = s + len;
+       gchar c;
+       gulong v = 0;
+       const gulong cutoff = G_MAXULONG / 10, cutlim = G_MAXULONG % 10;
+
+       /* Some preparations for range errors */
+       while (p < end) {
+               c = *p;
+               if (c >= '0' && c <= '9') {
+                       c -= '0';
+                       if (v > cutoff || (v == cutoff && (guint8)c > cutlim)) {
+                               /* Range error */
+                               *value = G_MAXULONG;
+                               return FALSE;
+                       }
+                       else {
+                               v *= 10;
+                               v += c;
+                       }
+               }
+               else {
+                       return FALSE;
+               }
+               p++;
+       }
+
+       *value = v;
+       return TRUE;
+}
+
+/**
+ * Utility function to provide mem_pool copy for rspamd_hash_table_copy function
+ * @param data string to copy
+ * @param ud memory pool to use
+ * @return
+ */
+gpointer
+rspamd_str_pool_copy (gconstpointer data, gpointer ud)
+{
+       rspamd_mempool_t *pool = ud;
+
+       return data ? rspamd_mempool_strdup (pool, data) : NULL;
+}
+
+/*
+ * We use here z-base32 encoding described here:
+ * http://philzimmermann.com/docs/human-oriented-base-32-encoding.txt
+ */
+
+gchar *
+rspamd_encode_base32 (const guchar *in, gsize inlen)
+{
+       gint remain = -1, x;
+       gsize i, r;
+       gsize allocated_len = inlen * 8 / 5 + 2;
+       gchar *out;
+       static const char b32[]="ybndrfg8ejkmcpqxot1uwisza345h769";
+
+       out = g_malloc (allocated_len);
+       for (i = 0, r = 0; i < inlen; i++) {
+               switch (i % 5) {
+               case 0:
+                       /* 8 bits of input and 3 to remain */
+                       x = in[i];
+                       remain = in[i] >> 5;
+                       out[r++] = b32[x & 0x1F];
+                       break;
+               case 1:
+                       /* 11 bits of input, 1 to remain */
+                       x = remain | in[i] << 3;
+                       out[r++] = b32[x & 0x1F];
+                       out[r++] = b32[x >> 5 & 0x1F];
+                       remain = x >> 10;
+                       break;
+               case 2:
+                       /* 9 bits of input, 4 to remain */
+                       x = remain | in[i] << 1;
+                       out[r++] = b32[x & 0x1F];
+                       remain = x >> 5;
+                       break;
+               case 3:
+                       /* 12 bits of input, 2 to remain */
+                       x = remain | in[i] << 4;
+                       out[r++] = b32[x & 0x1F];
+                       out[r++] = b32[x >> 5 & 0x1F];
+                       remain = x >> 10 & 0x3;
+                       break;
+               case 4:
+                       /* 10 bits of output, nothing to remain */
+                       x = remain | in[i] << 2;
+                       out[r++] = b32[x & 0x1F];
+                       out[r++] = b32[x >> 5 & 0x1F];
+                       remain = -1;
+                       break;
+               default:
+                       /* Not to be happen */
+                       break;
+               }
+
+       }
+       if (remain >= 0) {
+               out[r++] = b32[remain];
+       }
+
+       out[r] = 0;
+       g_assert (r < allocated_len);
+
+       return out;
+}
+
+static const guchar b32_dec[] = {
+       0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+       0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+       0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+       0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+       0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+       0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+       0xff, 0x12, 0xff, 0x19, 0x1a, 0x1b, 0x1e, 0x1d,
+       0x07, 0x1f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+       0xff, 0x18, 0x01, 0x0c, 0x03, 0x08, 0x05, 0x06,
+       0x1c, 0x15, 0x09, 0x0a, 0xff, 0x0b, 0x02, 0x10,
+       0x0d, 0x0e, 0x04, 0x16, 0x11, 0x13, 0xff, 0x14,
+       0x0f, 0x00, 0x17, 0xff, 0xff, 0xff, 0xff, 0xff,
+       0xff, 0x18, 0x01, 0x0c, 0x03, 0x08, 0x05, 0x06,
+       0x1c, 0x15, 0x09, 0x0a, 0xff, 0x0b, 0x02, 0x10,
+       0x0d, 0x0e, 0x04, 0x16, 0x11, 0x13, 0xff, 0x14,
+       0x0f, 0x00, 0x17, 0xff, 0xff, 0xff, 0xff, 0xff,
+       0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+       0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+       0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+       0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+       0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+       0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+       0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+       0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+       0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+       0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+       0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+       0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+       0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+       0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+       0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+       0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+};
+
+guchar*
+rspamd_decode_base32 (const gchar *in, gsize inlen, gsize *outlen)
+{
+       guchar *res, decoded;
+       guchar c;
+       guint acc = 0U;
+       guint processed_bits = 0;
+       gsize olen = 0, i, allocated_len = inlen * 5 / 8 + 2;
+
+       res = g_malloc (allocated_len);
+
+       for (i = 0; i < inlen; i ++) {
+               c = (guchar)in[i];
+
+               if (processed_bits >= 8) {
+                       processed_bits -= 8;
+                       res[olen++] = acc & 0xFF;
+                       acc >>= 8;
+               }
+
+               decoded = b32_dec[c];
+               if (decoded == 0xff) {
+                       g_free (res);
+                       return NULL;
+               }
+
+               acc = (decoded << processed_bits) | acc;
+               processed_bits += 5;
+       }
+
+       if (processed_bits > 0) {
+               res[olen++] = (acc & 0xFF);
+       }
+
+       g_assert (olen <= allocated_len);
+
+       *outlen = olen;
+
+       return res;
+}
+
+
+gchar *
+rspamd_encode_base64 (const guchar *in, gsize inlen, gint str_len, gsize *outlen)
+{
+#define CHECK_SPLIT \
+       do { if (str_len > 0 && cols >= str_len) { \
+                               *o++ = '\r'; \
+                               *o++ = '\n'; \
+                               cols = 0; \
+       } } \
+while (0)
+
+       gsize allocated_len = (inlen / 3) * 4 + 4;
+       gchar *out, *o;
+       guint64 n;
+       guint32 rem, t, carry;
+       gint cols, shift;
+       static const char b64_enc[] =
+               "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+               "abcdefghijklmnopqrstuvwxyz"
+               "0123456789+/";
+
+       if (str_len > 0) {
+               g_assert (str_len > 8);
+               allocated_len += (allocated_len / str_len + 1) * 2 + 1;
+       }
+
+       out = g_malloc (allocated_len);
+       o = out;
+       cols = 0;
+
+       while (inlen > 6) {
+               n = *(guint64 *)in;
+               n = GUINT64_TO_BE (n);
+
+               if (str_len <= 0 || cols <= str_len - 8) {
+                       *o++ = b64_enc[(n >> 58) & 0x3F];
+                       *o++ = b64_enc[(n >> 52) & 0x3F];
+                       *o++ = b64_enc[(n >> 46) & 0x3F];
+                       *o++ = b64_enc[(n >> 40) & 0x3F];
+                       *o++ = b64_enc[(n >> 34) & 0x3F];
+                       *o++ = b64_enc[(n >> 28) & 0x3F];
+                       *o++ = b64_enc[(n >> 22) & 0x3F];
+                       *o++ = b64_enc[(n >> 16) & 0x3F];
+                       cols += 8;
+               }
+               else {
+                       cols = str_len - cols;
+                       shift = 58;
+                       while (cols) {
+                               *o++ = b64_enc[(n >> shift) & 0x3F];
+                               shift -= 6;
+                               cols --;
+                       }
+
+                       *o++ = '\r';
+                       *o++ = '\n';
+
+                       /* Remaining bytes */
+                       while (shift >= 16) {
+                               *o++ = b64_enc[(n >> shift) & 0x3F];
+                               shift -= 6;
+                               cols ++;
+                       }
+               }
+
+               in += 6;
+               inlen -= 6;
+       }
+
+       CHECK_SPLIT;
+
+       rem = 0;
+       carry = 0;
+
+       for (;;) {
+               /* Padding + remaining data (0 - 2 bytes) */
+               switch (rem) {
+               case 0:
+                       if (inlen-- == 0) {
+                               goto end;
+                       }
+                       t = *in++;
+                       *o++ = b64_enc[t >> 2];
+                       carry = (t << 4) & 0x30;
+                       rem = 1;
+                       cols ++;
+               case 1:
+                       if (inlen-- == 0) {
+                               goto end;
+                       }
+                       CHECK_SPLIT;
+                       t = *in++;
+                       *o++ = b64_enc[carry | (t >> 4)];
+                       carry = (t << 2) & 0x3C;
+                       rem = 2;
+                       cols ++;
+               default:
+                       if (inlen-- == 0) {
+                               goto end;
+                       }
+                       CHECK_SPLIT;
+                       t = *in ++;
+                       *o++ = b64_enc[carry | (t >> 6)];
+                       cols ++;
+                       CHECK_SPLIT;
+                       *o++ = b64_enc[t & 0x3F];
+                       cols ++;
+                       CHECK_SPLIT;
+                       rem = 0;
+               }
+       }
+
+end:
+       if (rem == 1) {
+               *o++ = b64_enc[carry];
+               cols ++;
+               CHECK_SPLIT;
+               *o++ = '=';
+               cols ++;
+               CHECK_SPLIT;
+               *o++ = '=';
+               cols ++;
+               CHECK_SPLIT;
+       }
+       else if (rem == 2) {
+               *o++ = b64_enc[carry];
+               cols ++;
+               CHECK_SPLIT;
+               *o++ = '=';
+               cols ++;
+       }
+
+       CHECK_SPLIT;
+
+       *o = '\0';
+
+       if (outlen != NULL) {
+               *outlen = o - out;
+       }
+
+       return out;
+}
diff --git a/src/libutil/str_util.h b/src/libutil/str_util.h
new file mode 100644 (file)
index 0000000..a9c920c
--- /dev/null
@@ -0,0 +1,135 @@
+/*
+ * Copyright (c) 2015, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *      * Redistributions of source code must retain the above copyright
+ *        notice, this list of conditions and the following disclaimer.
+ *      * Redistributions in binary form must reproduce the above copyright
+ *        notice, this list of conditions and the following disclaimer in the
+ *        documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#ifndef SRC_LIBUTIL_STR_UTIL_H_
+#define SRC_LIBUTIL_STR_UTIL_H_
+
+#include "config.h"
+
+/**
+ * Convert string to lowercase in-place using ASCII conversion
+ */
+void rspamd_str_lc (gchar *str, guint size);
+/**
+ * Convert string to lowercase in-place using utf (limited) conversion
+ */
+void rspamd_str_lc_utf8 (gchar *str, guint size);
+
+/*
+ * Hash table utility functions for case insensitive hashing
+ */
+guint rspamd_strcase_hash (gconstpointer key);
+gboolean rspamd_strcase_equal (gconstpointer v, gconstpointer v2);
+
+/*
+ * Hash table utility functions for case sensitive hashing
+ */
+guint rspamd_str_hash (gconstpointer key);
+gboolean rspamd_str_equal (gconstpointer v, gconstpointer v2);
+
+
+/*
+ * Hash table utility functions for hashing fixed strings
+ */
+guint rspamd_fstring_icase_hash (gconstpointer key);
+gboolean rspamd_fstring_icase_equal (gconstpointer v, gconstpointer v2);
+guint rspamd_gstring_icase_hash (gconstpointer key);
+gboolean rspamd_gstring_icase_equal (gconstpointer v, gconstpointer v2);
+
+/**
+ * Copy src to dest limited to len, in compare with standart strlcpy(3) rspamd strlcpy does not
+ * traverse the whole string and it is possible to use it for non NULL terminated strings. This is
+ * more like memccpy(dst, src, size, '\0')
+ *
+ * @param dst destination string
+ * @param src source string
+ * @param siz length of destination buffer
+ * @return bytes copied
+ */
+gsize rspamd_strlcpy (gchar *dst, const gchar *src, gsize siz);
+
+/**
+ * Lowercase strlcpy variant
+ * @param dst
+ * @param src
+ * @param siz
+ * @return
+ */
+gsize rspamd_strlcpy_tolower (gchar *dst, const gchar *src, gsize siz);
+
+/*
+ * Find string find in string s ignoring case
+ */
+gchar * rspamd_strncasestr (const gchar *s, const gchar *find, gint len);
+
+/*
+ * Try to convert string of length to long
+ */
+gboolean rspamd_strtol (const gchar *s, gsize len, glong *value);
+
+/*
+ * Try to convert string of length to unsigned long
+ */
+gboolean rspamd_strtoul (const gchar *s, gsize len, gulong *value);
+
+/**
+ * Utility function to provide mem_pool copy for rspamd_hash_table_copy function
+ * @param data string to copy
+ * @param ud memory pool to use
+ * @return
+ */
+gpointer rspamd_str_pool_copy (gconstpointer data, gpointer ud);
+
+/**
+ * Encode string using base32 encoding
+ * @param in input
+ * @param inlen input length
+ * @return freshly allocated base32 encoding of a specified string
+ */
+gchar * rspamd_encode_base32 (const guchar *in, gsize inlen);
+
+/**
+ * Decode string using base32 encoding
+ * @param in input
+ * @param inlen input length
+ * @return freshly allocated base32 decoded value or NULL if input is invalid
+ */
+guchar* rspamd_decode_base32 (const gchar *in, gsize inlen, gsize *outlen);
+
+/**
+ * Encode string using base64 encoding
+ * @param in input
+ * @param inlen input length
+ * @param str_len maximum string length (if <= 0 then no lines are split)
+ * @return freshly allocated base64 encoded value or NULL if input is invalid
+ */
+gchar * rspamd_encode_base64 (const guchar *in, gsize inlen, gint str_len,
+               gsize *outlen);
+
+#ifndef g_tolower
+#   define g_tolower(x) (((x) >= 'A' && (x) <= 'Z') ? (x) - 'A' + 'a' : (x))
+#endif
+
+#endif /* SRC_LIBUTIL_STR_UTIL_H_ */
index bc58fc70a5ba510c6f27801f05949c21de93b961..02f1721ff225a930b04466da3d7c9ffc731ed484 100644 (file)
@@ -660,114 +660,6 @@ rspamd_pass_signal (GHashTable * workers, gint signo)
        g_hash_table_foreach (workers, pass_signal_cb, GINT_TO_POINTER (signo));
 }
 
-static const guchar lc_map[256] = {
-               0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
-               0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
-               0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
-               0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
-               0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
-               0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
-               0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
-               0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
-               0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
-               0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
-               0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
-               0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
-               0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
-               0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
-               0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
-               0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
-               0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
-               0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
-               0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
-               0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
-               0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
-               0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
-               0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
-               0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
-               0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
-               0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
-               0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
-               0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
-               0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
-               0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
-               0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
-               0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff
-};
-
-void
-rspamd_str_lc (gchar *str, guint size)
-{
-       guint leftover = size % 4;
-       guint fp, i;
-       const uint8_t* s = (const uint8_t*) str;
-       gchar *dest = str;
-       guchar c1, c2, c3, c4;
-
-       fp = size - leftover;
-
-       for (i = 0; i != fp; i += 4) {
-               c1 = s[i], c2 = s[i + 1], c3 = s[i + 2], c4 = s[i + 3];
-               dest[0] = lc_map[c1];
-               dest[1] = lc_map[c2];
-               dest[2] = lc_map[c3];
-               dest[3] = lc_map[c4];
-               dest += 4;
-       }
-
-       switch (leftover) {
-       case 3:
-               *dest++ = lc_map[(guchar)str[i++]];
-       case 2:
-               *dest++ = lc_map[(guchar)str[i++]];
-       case 1:
-               *dest++ = lc_map[(guchar)str[i]];
-       }
-
-}
-
-/*
- * The purpose of this function is fast and in place conversion of a unicode
- * string to lower case, so some locale peculiarities are simply ignored
- * If the target string is longer than initial one, then we just trim it
- */
-void
-rspamd_str_lc_utf8 (gchar *str, guint size)
-{
-       const gchar *s = str, *p;
-       gchar *d = str, tst[6];
-       gint remain = size;
-       gint r;
-       gunichar uc;
-
-       while (remain > 0) {
-               uc = g_utf8_get_char (s);
-               uc = g_unichar_tolower (uc);
-               p = g_utf8_next_char (s);
-
-               if (p - s > remain) {
-                       break;
-               }
-
-               if (remain >= 6) {
-                       r = g_unichar_to_utf8 (uc, d);
-               }
-               else {
-                       /* We must be cautious here to avoid broken unicode being append */
-                       r = g_unichar_to_utf8 (uc, tst);
-                       if (r > remain) {
-                               break;
-                       }
-                       else {
-                               memcpy (d, tst, r);
-                       }
-               }
-               remain -= r;
-               s = p;
-               d += r;
-       }
-}
-
 #ifndef HAVE_SETPROCTITLE
 
 static gchar *title_buffer = 0;
@@ -1208,130 +1100,6 @@ rspamd_log_check_time (gdouble start_real, gdouble start_virtual, gint resolutio
        return (const gchar *)res;
 }
 
-#ifndef g_tolower
-#   define g_tolower(x) (((x) >= 'A' && (x) <= 'Z') ? (x) - 'A' + 'a' : (x))
-#endif
-
-
-gboolean
-rspamd_strcase_equal (gconstpointer v, gconstpointer v2)
-{
-       if (g_ascii_strcasecmp ((const gchar *)v, (const gchar *)v2) == 0) {
-               return TRUE;
-       }
-
-       return FALSE;
-}
-
-static guint
-rspamd_icase_hash (const gchar *in, gsize len)
-{
-       guint leftover = len % 4;
-       guint fp, i;
-       const uint8_t* s = (const uint8_t*) in;
-       union {
-               struct {
-                       guchar c1, c2, c3, c4;
-               } c;
-               guint32 pp;
-       } u;
-       XXH64_state_t st;
-
-       fp = len - leftover;
-       XXH64_reset (&st, rspamd_hash_seed ());
-
-       for (i = 0; i != fp; i += 4) {
-               u.c.c1 = s[i], u.c.c2 = s[i + 1], u.c.c3 = s[i + 2], u.c.c4 = s[i + 3];
-               u.c.c1 = lc_map[u.c.c1];
-               u.c.c2 = lc_map[u.c.c2];
-               u.c.c3 = lc_map[u.c.c3];
-               u.c.c4 = lc_map[u.c.c4];
-               XXH64_update (&st, &u.pp, sizeof (u));
-       }
-
-       u.pp = 0;
-       switch (leftover) {
-       case 3:
-               u.c.c3 = lc_map[(guchar)s[i++]];
-       case 2:
-               u.c.c2 = lc_map[(guchar)s[i++]];
-       case 1:
-               u.c.c1 = lc_map[(guchar)s[i]];
-               XXH64_update (&st, &u.pp, leftover);
-               break;
-       }
-
-       return XXH64_digest (&st);
-}
-
-guint
-rspamd_strcase_hash (gconstpointer key)
-{
-       const gchar *p = key;
-       gsize len;
-
-       len = strlen (p);
-
-       return rspamd_icase_hash (p, len);
-}
-
-guint
-rspamd_str_hash (gconstpointer key)
-{
-       gsize len;
-
-       len = strlen ((const gchar *)key);
-
-       return XXH64 (key, len, rspamd_hash_seed ());
-}
-
-gboolean
-rspamd_str_equal (gconstpointer v, gconstpointer v2)
-{
-       return strcmp ((const gchar *)v, (const gchar *)v2) == 0;
-}
-
-gboolean
-rspamd_fstring_icase_equal (gconstpointer v, gconstpointer v2)
-{
-       const rspamd_fstring_t *f1 = v, *f2 = v2;
-       if (f1->len == f2->len &&
-               g_ascii_strncasecmp (f1->begin, f2->begin, f1->len) == 0) {
-               return TRUE;
-       }
-
-       return FALSE;
-}
-
-
-guint
-rspamd_fstring_icase_hash (gconstpointer key)
-{
-       const rspamd_fstring_t *f = key;
-
-       return rspamd_icase_hash (f->begin, f->len);
-}
-
-gboolean
-rspamd_gstring_icase_equal (gconstpointer v, gconstpointer v2)
-{
-       const GString *f1 = v, *f2 = v2;
-       if (f1->len == f2->len &&
-               g_ascii_strncasecmp (f1->str, f2->str, f1->len) == 0) {
-               return TRUE;
-       }
-
-       return FALSE;
-}
-
-
-guint
-rspamd_gstring_icase_hash (gconstpointer key)
-{
-       const GString *f = key;
-
-       return rspamd_icase_hash (f->str, f->len);
-}
 
 void
 gperf_profiler_init (struct rspamd_config *cfg, const gchar *descr)
@@ -1494,53 +1262,6 @@ g_ptr_array_new_full (guint reserved_size,
 }
 #endif
 
-
-gsize
-rspamd_strlcpy (gchar *dst, const gchar *src, gsize siz)
-{
-       gchar *d = dst;
-       const gchar *s = src;
-       gsize n = siz;
-
-       /* Copy as many bytes as will fit */
-       if (n != 0) {
-               while (--n != 0) {
-                       if ((*d++ = *s++) == '\0') {
-                               break;
-                       }
-               }
-       }
-
-       if (n == 0 && siz != 0) {
-               *d = '\0';
-       }
-
-       return (s - src - 1);    /* count does not include NUL */
-}
-
-gsize
-rspamd_strlcpy_tolower (gchar *dst, const gchar *src, gsize siz)
-{
-       gchar *d = dst;
-       const gchar *s = src;
-       gsize n = siz;
-
-       /* Copy as many bytes as will fit */
-       if (n != 0) {
-               while (--n != 0) {
-                       if ((*d++ = g_ascii_tolower (*s++)) == '\0') {
-                               break;
-                       }
-               }
-       }
-
-       if (n == 0 && siz != 0) {
-               *d = '\0';
-       }
-
-       return (s - src - 1);    /* count does not include NUL */
-}
-
 guint
 rspamd_url_hash (gconstpointer u)
 {
@@ -1607,111 +1328,6 @@ rspamd_urls_cmp (gconstpointer a, gconstpointer b)
        return r == 0;
 }
 
-/*
- * Find the first occurrence of find in s, ignore case.
- */
-gchar *
-rspamd_strncasestr (const gchar *s, const gchar *find, gint len)
-{
-       gchar c, sc;
-       gsize mlen;
-
-       if ((c = *find++) != 0) {
-               c = g_ascii_tolower (c);
-               mlen = strlen (find);
-               do {
-                       do {
-                               if ((sc = *s++) == 0 || len-- == 0)
-                                       return (NULL);
-                       } while (g_ascii_tolower (sc) != c);
-               } while (g_ascii_strncasecmp (s, find, mlen) != 0);
-               s--;
-       }
-       return ((gchar *)s);
-}
-
-/*
- * Try to convert string of length to long
- */
-gboolean
-rspamd_strtol (const gchar *s, gsize len, glong *value)
-{
-       const gchar *p = s, *end = s + len;
-       gchar c;
-       glong v = 0;
-       const glong cutoff = G_MAXLONG / 10, cutlim = G_MAXLONG % 10;
-       gboolean neg;
-
-       /* Case negative values */
-       if (*p == '-') {
-               neg = TRUE;
-               p++;
-       }
-       else {
-               neg = FALSE;
-       }
-       /* Some preparations for range errors */
-
-       while (p < end) {
-               c = *p;
-               if (c >= '0' && c <= '9') {
-                       c -= '0';
-                       if (v > cutoff || (v == cutoff && c > cutlim)) {
-                               /* Range error */
-                               *value = neg ? G_MINLONG : G_MAXLONG;
-                               return FALSE;
-                       }
-                       else {
-                               v *= 10;
-                               v += c;
-                       }
-               }
-               else {
-                       return FALSE;
-               }
-               p++;
-       }
-
-       *value = neg ? -(v) : v;
-       return TRUE;
-}
-
-/*
- * Try to convert string of length to long
- */
-gboolean
-rspamd_strtoul (const gchar *s, gsize len, gulong *value)
-{
-       const gchar *p = s, *end = s + len;
-       gchar c;
-       gulong v = 0;
-       const gulong cutoff = G_MAXULONG / 10, cutlim = G_MAXULONG % 10;
-
-       /* Some preparations for range errors */
-       while (p < end) {
-               c = *p;
-               if (c >= '0' && c <= '9') {
-                       c -= '0';
-                       if (v > cutoff || (v == cutoff && (guint8)c > cutlim)) {
-                               /* Range error */
-                               *value = G_MAXULONG;
-                               return FALSE;
-                       }
-                       else {
-                               v *= 10;
-                               v += c;
-                       }
-               }
-               else {
-                       return FALSE;
-               }
-               p++;
-       }
-
-       *value = v;
-       return TRUE;
-}
-
 gint
 rspamd_fallocate (gint fd, off_t offset, off_t len)
 {
@@ -1978,20 +1594,6 @@ rspamd_hash_table_copy (GHashTable *src, GHashTable *dst,
        }
 }
 
-/**
- * Utility function to provide mem_pool copy for rspamd_hash_table_copy function
- * @param data string to copy
- * @param ud memory pool to use
- * @return
- */
-gpointer
-rspamd_str_pool_copy (gconstpointer data, gpointer ud)
-{
-       rspamd_mempool_t *pool = ud;
-
-       return data ? rspamd_mempool_strdup (pool, data) : NULL;
-}
-
 static volatile sig_atomic_t saved_signo[NSIG];
 
 static
@@ -2190,292 +1792,6 @@ rspamd_ucl_emit_gstring (ucl_object_t *obj,
        ucl_object_emit_full (obj, emit_type, &func);
 }
 
-/*
- * We use here z-base32 encoding described here:
- * http://philzimmermann.com/docs/human-oriented-base-32-encoding.txt
- */
-
-gchar *
-rspamd_encode_base32 (const guchar *in, gsize inlen)
-{
-       gint remain = -1, x;
-       gsize i, r;
-       gsize allocated_len = inlen * 8 / 5 + 2;
-       gchar *out;
-       static const char b32[]="ybndrfg8ejkmcpqxot1uwisza345h769";
-
-       out = g_malloc (allocated_len);
-       for (i = 0, r = 0; i < inlen; i++) {
-               switch (i % 5) {
-               case 0:
-                       /* 8 bits of input and 3 to remain */
-                       x = in[i];
-                       remain = in[i] >> 5;
-                       out[r++] = b32[x & 0x1F];
-                       break;
-               case 1:
-                       /* 11 bits of input, 1 to remain */
-                       x = remain | in[i] << 3;
-                       out[r++] = b32[x & 0x1F];
-                       out[r++] = b32[x >> 5 & 0x1F];
-                       remain = x >> 10;
-                       break;
-               case 2:
-                       /* 9 bits of input, 4 to remain */
-                       x = remain | in[i] << 1;
-                       out[r++] = b32[x & 0x1F];
-                       remain = x >> 5;
-                       break;
-               case 3:
-                       /* 12 bits of input, 2 to remain */
-                       x = remain | in[i] << 4;
-                       out[r++] = b32[x & 0x1F];
-                       out[r++] = b32[x >> 5 & 0x1F];
-                       remain = x >> 10 & 0x3;
-                       break;
-               case 4:
-                       /* 10 bits of output, nothing to remain */
-                       x = remain | in[i] << 2;
-                       out[r++] = b32[x & 0x1F];
-                       out[r++] = b32[x >> 5 & 0x1F];
-                       remain = -1;
-                       break;
-               default:
-                       /* Not to be happen */
-                       break;
-               }
-
-       }
-       if (remain >= 0) {
-               out[r++] = b32[remain];
-       }
-
-       out[r] = 0;
-       g_assert (r < allocated_len);
-
-       return out;
-}
-
-static const guchar b32_dec[] = {
-       0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-       0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-       0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-       0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-       0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-       0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-       0xff, 0x12, 0xff, 0x19, 0x1a, 0x1b, 0x1e, 0x1d,
-       0x07, 0x1f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-       0xff, 0x18, 0x01, 0x0c, 0x03, 0x08, 0x05, 0x06,
-       0x1c, 0x15, 0x09, 0x0a, 0xff, 0x0b, 0x02, 0x10,
-       0x0d, 0x0e, 0x04, 0x16, 0x11, 0x13, 0xff, 0x14,
-       0x0f, 0x00, 0x17, 0xff, 0xff, 0xff, 0xff, 0xff,
-       0xff, 0x18, 0x01, 0x0c, 0x03, 0x08, 0x05, 0x06,
-       0x1c, 0x15, 0x09, 0x0a, 0xff, 0x0b, 0x02, 0x10,
-       0x0d, 0x0e, 0x04, 0x16, 0x11, 0x13, 0xff, 0x14,
-       0x0f, 0x00, 0x17, 0xff, 0xff, 0xff, 0xff, 0xff,
-       0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-       0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-       0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-       0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-       0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-       0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-       0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-       0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-       0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-       0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-       0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-       0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-       0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-       0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-       0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-       0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
-};
-
-guchar*
-rspamd_decode_base32 (const gchar *in, gsize inlen, gsize *outlen)
-{
-       guchar *res, decoded;
-       guchar c;
-       guint acc = 0U;
-       guint processed_bits = 0;
-       gsize olen = 0, i, allocated_len = inlen * 5 / 8 + 2;
-
-       res = g_malloc (allocated_len);
-
-       for (i = 0; i < inlen; i ++) {
-               c = (guchar)in[i];
-
-               if (processed_bits >= 8) {
-                       processed_bits -= 8;
-                       res[olen++] = acc & 0xFF;
-                       acc >>= 8;
-               }
-
-               decoded = b32_dec[c];
-               if (decoded == 0xff) {
-                       g_free (res);
-                       return NULL;
-               }
-
-               acc = (decoded << processed_bits) | acc;
-               processed_bits += 5;
-       }
-
-       if (processed_bits > 0) {
-               res[olen++] = (acc & 0xFF);
-       }
-
-       g_assert (olen <= allocated_len);
-
-       *outlen = olen;
-
-       return res;
-}
-
-
-gchar *
-rspamd_encode_base64 (const guchar *in, gsize inlen, gint str_len, gsize *outlen)
-{
-#define CHECK_SPLIT \
-       do { if (str_len > 0 && cols >= str_len) { \
-                               *o++ = '\r'; \
-                               *o++ = '\n'; \
-                               cols = 0; \
-       } } \
-while (0)
-
-       gsize allocated_len = (inlen / 3) * 4 + 4;
-       gchar *out, *o;
-       guint64 n;
-       guint32 rem, t, carry;
-       gint cols, shift;
-       static const char b64_enc[] =
-               "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
-               "abcdefghijklmnopqrstuvwxyz"
-               "0123456789+/";
-
-       if (str_len > 0) {
-               g_assert (str_len > 8);
-               allocated_len += (allocated_len / str_len + 1) * 2 + 1;
-       }
-
-       out = g_malloc (allocated_len);
-       o = out;
-       cols = 0;
-
-       while (inlen > 6) {
-               n = *(guint64 *)in;
-               n = GUINT64_TO_BE (n);
-
-               if (str_len <= 0 || cols <= str_len - 8) {
-                       *o++ = b64_enc[(n >> 58) & 0x3F];
-                       *o++ = b64_enc[(n >> 52) & 0x3F];
-                       *o++ = b64_enc[(n >> 46) & 0x3F];
-                       *o++ = b64_enc[(n >> 40) & 0x3F];
-                       *o++ = b64_enc[(n >> 34) & 0x3F];
-                       *o++ = b64_enc[(n >> 28) & 0x3F];
-                       *o++ = b64_enc[(n >> 22) & 0x3F];
-                       *o++ = b64_enc[(n >> 16) & 0x3F];
-                       cols += 8;
-               }
-               else {
-                       cols = str_len - cols;
-                       shift = 58;
-                       while (cols) {
-                               *o++ = b64_enc[(n >> shift) & 0x3F];
-                               shift -= 6;
-                               cols --;
-                       }
-
-                       *o++ = '\r';
-                       *o++ = '\n';
-
-                       /* Remaining bytes */
-                       while (shift >= 16) {
-                               *o++ = b64_enc[(n >> shift) & 0x3F];
-                               shift -= 6;
-                               cols ++;
-                       }
-               }
-
-               in += 6;
-               inlen -= 6;
-       }
-
-       CHECK_SPLIT;
-
-       rem = 0;
-       carry = 0;
-
-       for (;;) {
-               /* Padding + remaining data (0 - 2 bytes) */
-               switch (rem) {
-               case 0:
-                       if (inlen-- == 0) {
-                               goto end;
-                       }
-                       t = *in++;
-                       *o++ = b64_enc[t >> 2];
-                       carry = (t << 4) & 0x30;
-                       rem = 1;
-                       cols ++;
-               case 1:
-                       if (inlen-- == 0) {
-                               goto end;
-                       }
-                       CHECK_SPLIT;
-                       t = *in++;
-                       *o++ = b64_enc[carry | (t >> 4)];
-                       carry = (t << 2) & 0x3C;
-                       rem = 2;
-                       cols ++;
-               default:
-                       if (inlen-- == 0) {
-                               goto end;
-                       }
-                       CHECK_SPLIT;
-                       t = *in ++;
-                       *o++ = b64_enc[carry | (t >> 6)];
-                       cols ++;
-                       CHECK_SPLIT;
-                       *o++ = b64_enc[t & 0x3F];
-                       cols ++;
-                       CHECK_SPLIT;
-                       rem = 0;
-               }
-       }
-
-end:
-       if (rem == 1) {
-               *o++ = b64_enc[carry];
-               cols ++;
-               CHECK_SPLIT;
-               *o++ = '=';
-               cols ++;
-               CHECK_SPLIT;
-               *o++ = '=';
-               cols ++;
-               CHECK_SPLIT;
-       }
-       else if (rem == 2) {
-               *o++ = b64_enc[carry];
-               cols ++;
-               CHECK_SPLIT;
-               *o++ = '=';
-               cols ++;
-       }
-
-       CHECK_SPLIT;
-
-       *o = '\0';
-
-       if (outlen != NULL) {
-               *outlen = o - out;
-       }
-
-       return out;
-}
-
 gdouble
 rspamd_get_ticks (void)
 {
index d3dcdbcf7299c86e56f606220c9f7481e96129d4..ee7bf01f8532898c6d8527ea3c6880c4abb63a0c 100644 (file)
@@ -7,6 +7,7 @@
 #include "fstring.h"
 #include "ucl.h"
 #include "addr.h"
+#include "str_util.h"
 
 struct rspamd_config;
 struct rspamd_main;
@@ -105,11 +106,6 @@ void rspamd_signals_init (struct sigaction *sa, void (*sig_handler)(gint));
  * Send specified signal to each worker
  */
 void rspamd_pass_signal (GHashTable *, gint );
-/*
- * Convert string to lowercase
- */
-void rspamd_str_lc (gchar *str, guint size);
-void rspamd_str_lc_utf8 (gchar *str, guint size);
 
 #ifndef HAVE_SETPROCTITLE
 /*
@@ -166,27 +162,6 @@ rspamd_log_check_time (gdouble start_real, gdouble start_virtual, gint resolutio
 gboolean rspamd_file_lock (gint fd, gboolean async);
 gboolean rspamd_file_unlock (gint fd, gboolean async);
 
-/*
- * Hash table utility functions for case insensitive hashing
- */
-guint rspamd_strcase_hash (gconstpointer key);
-gboolean rspamd_strcase_equal (gconstpointer v, gconstpointer v2);
-
-/*
- * Hash table utility functions for case sensitive hashing
- */
-guint rspamd_str_hash (gconstpointer key);
-gboolean rspamd_str_equal (gconstpointer v, gconstpointer v2);
-
-
-/*
- * Hash table utility functions for hashing fixed strings
- */
-guint rspamd_fstring_icase_hash (gconstpointer key);
-gboolean rspamd_fstring_icase_equal (gconstpointer v, gconstpointer v2);
-guint rspamd_gstring_icase_hash (gconstpointer key);
-gboolean rspamd_gstring_icase_equal (gconstpointer v, gconstpointer v2);
-
 /*
  * Google perf-tools initialization function
  */
@@ -203,27 +178,6 @@ void g_queue_clear (GQueue *queue);
 #endif
 
 
-/**
- * Copy src to dest limited to len, in compare with standart strlcpy(3) rspamd strlcpy does not
- * traverse the whole string and it is possible to use it for non NULL terminated strings. This is
- * more like memccpy(dst, src, size, '\0')
- *
- * @param dst destination string
- * @param src source string
- * @param siz length of destination buffer
- * @return bytes copied
- */
-gsize rspamd_strlcpy (gchar *dst, const gchar *src, gsize siz);
-
-/**
- * Lowercase strlcpy variant
- * @param dst
- * @param src
- * @param siz
- * @return
- */
-gsize rspamd_strlcpy_tolower (gchar *dst, const gchar *src, gsize siz);
-
 /*
  * Convert milliseconds to timeval fields
  */
@@ -245,21 +199,6 @@ gboolean rspamd_emails_cmp (gconstpointer a, gconstpointer b);
 /* Compare two urls for building emails hash */
 gboolean rspamd_urls_cmp (gconstpointer a, gconstpointer b);
 
-/*
- * Find string find in string s ignoring case
- */
-gchar * rspamd_strncasestr (const gchar *s, const gchar *find, gint len);
-
-/*
- * Try to convert string of length to long
- */
-gboolean rspamd_strtol (const gchar *s, gsize len, glong *value);
-
-/*
- * Try to convert string of length to unsigned long
- */
-gboolean rspamd_strtoul (const gchar *s, gsize len, gulong *value);
-
 /**
  * Try to allocate a file on filesystem (using fallocate or posix_fallocate)
  * @param fd descriptor
@@ -385,13 +324,6 @@ void rspamd_hash_table_copy (GHashTable *src, GHashTable *dst,
        gpointer (*value_copy_func)(gconstpointer data, gpointer ud),
        gpointer ud);
 
-/**
- * Utility function to provide mem_pool copy for rspamd_hash_table_copy function
- * @param data string to copy
- * @param ud memory pool to use
- * @return
- */
-gpointer rspamd_str_pool_copy (gconstpointer data, gpointer ud);
 
 /**
  * Read passphrase from tty
@@ -413,32 +345,6 @@ void rspamd_ucl_emit_gstring (ucl_object_t *obj,
        enum ucl_emitter emit_type,
        GString *target);
 
-/**
- * Encode string using base32 encoding
- * @param in input
- * @param inlen input length
- * @return freshly allocated base32 encoding of a specified string
- */
-gchar * rspamd_encode_base32 (const guchar *in, gsize inlen);
-
-/**
- * Decode string using base32 encoding
- * @param in input
- * @param inlen input length
- * @return freshly allocated base32 decoded value or NULL if input is invalid
- */
-guchar* rspamd_decode_base32 (const gchar *in, gsize inlen, gsize *outlen);
-
-/**
- * Encode string using base64 encoding
- * @param in input
- * @param inlen input length
- * @param str_len maximum string length (if <= 0 then no lines are split)
- * @return freshly allocated base64 encoded value or NULL if input is invalid
- */
-gchar * rspamd_encode_base64 (const guchar *in, gsize inlen, gint str_len,
-               gsize *outlen);
-
 /**
  * Portably return the current clock ticks as seconds
  * @return