]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
lib: base64 - Properly implemenent incremental encoding.
authorStephan Bosch <stephan.bosch@dovecot.fi>
Sat, 16 Mar 2019 20:19:49 +0000 (21:19 +0100)
committerStephan Bosch <stephan.bosch@open-xchange.com>
Mon, 26 Aug 2019 19:31:39 +0000 (21:31 +0200)
src/lib/base64.c
src/lib/base64.h
src/lib/test-base64.c

index 3e7716170ab07cb19125450632fa5de419fb386b..6537feded7200d5e5adb51ca485445a408381a61 100644 (file)
 #include "buffer.h"
 
 /*
- * Common Base 64
+ * Low-level Base64 encoder
  */
 
-void base64_scheme_encode(const struct base64_scheme *b64,
-                         const void *src, size_t src_size,
-                         buffer_t *dest)
+static size_t
+base64_encode_get_out_size(struct base64_encoder *enc, size_t src_size)
 {
+       size_t res_size = enc->w_buf_len;
+
+       i_assert(enc->w_buf_len <= 4);
+
+       if (src_size == 0)
+               return res_size;
+
+       /* Handle sub-position */
+       switch (enc->sub_pos) {
+       case 0:
+               break;
+       case 1:
+               res_size++;
+               src_size--;
+               if (src_size == 0)
+                       return res_size;
+               /* fall through */
+       case 2:
+               res_size += 2;
+               src_size--;
+               break;
+       default:
+               i_unreached();
+       }
+
+       /* We're now at a 3-byte boundary */
+       if (src_size == 0)
+               return res_size;
+
+       /* Calculate size we can append to the output from remaining input */
+       res_size += ((src_size) / 3) * 4;
+       switch (src_size % 3) {
+       case 0:
+               break;
+       case 1:
+               res_size += 1;
+               break;
+       case 2:
+               res_size += 2;
+               break;
+       }
+       return res_size;
+}
+
+size_t base64_encode_get_size(struct base64_encoder *enc, size_t src_size)
+{
+       size_t out_size = base64_encode_get_out_size(enc, src_size);
+
+       if (src_size == 0) {
+               /* last block */
+               switch (enc->sub_pos) {
+               case 0:
+                       break;
+               case 1:
+                       out_size += 3;
+                       break;
+               case 2:
+                       out_size += 2;
+                       break;
+               default:
+                       i_unreached();
+               }
+       }
+
+       return out_size;
+}
+
+static void
+base64_encode_more_data(struct base64_encoder *enc,
+                       const unsigned char *src_c, size_t src_size,
+                       size_t *src_pos_r, size_t dst_avail, buffer_t *dest)
+{
+       const struct base64_scheme *b64 = enc->b64;
        const char *b64enc = b64->encmap;
-       const size_t res_size = MAX_BASE64_ENCODED_SIZE(src_size);
-       unsigned char *start = buffer_append_space_unsafe(dest, res_size);
-       unsigned char *ptr = start;
-       const unsigned char *src_c = src;
+       size_t res_size;
+       unsigned char *start, *ptr, *end;
        size_t src_pos;
 
-       for (src_pos = 0; src_size - src_pos > 2; src_pos += 3, ptr += 4) {
+       /* determine how much we can write in destination buffer */
+       if (dst_avail == 0) {
+               *src_pos_r = 0;
+               return;
+       }
+
+       /* pre-allocate space in the destination buffer */
+       res_size = base64_encode_get_out_size(enc, src_size);
+       if (res_size > dst_avail)
+               res_size = dst_avail;
+
+       start = buffer_append_space_unsafe(dest, res_size);
+       end = start + res_size;
+       ptr = start;
+
+       /* write bytes not written in previous call */
+       i_assert(enc->w_buf_len <= 4);
+       if (enc->w_buf_len > res_size) {
+               memcpy(ptr, enc->w_buf, res_size);
+               ptr += res_size;
+               enc->w_buf_len -= res_size;
+               memmove(enc->w_buf, enc->w_buf + res_size, enc->w_buf_len);
+       } else if (enc->w_buf_len > 0) {
+               memcpy(ptr, enc->w_buf, enc->w_buf_len);
+               ptr += enc->w_buf_len;
+               enc->w_buf_len = 0;
+       }
+       if (ptr == end) {
+               *src_pos_r = 0;
+               return;
+       }
+       i_assert(enc->w_buf_len == 0);
+       i_assert(src_size != 0);
+
+       /* Handle sub-position */
+       src_pos = 0;
+       switch (enc->sub_pos) {
+       case 0:
+               break;
+       case 1:
+               i_assert(ptr < end);
+               ptr[0] = b64enc[enc->buf | (src_c[src_pos] >> 4)];
+               ptr++;
+               enc->buf = (src_c[src_pos] & 0x0f) << 2;
+               src_pos++;
+               if (src_pos == src_size || ptr == end) {
+                       enc->sub_pos = 2;
+                       *src_pos_r = src_pos;
+                       return;
+               }
+               /* fall through */
+       case 2:
+               ptr[0] = b64enc[enc->buf | ((src_c[src_pos] & 0xc0) >> 6)];
+               enc->w_buf[0] = b64enc[src_c[src_pos] & 0x3f];
+               ptr++;
+               src_pos++;
+               if (ptr < end) {
+                       ptr[0] = enc->w_buf[0];
+                       ptr++;
+                       enc->w_buf_len = 0;
+               } else {
+                       enc->sub_pos = 0;
+                       enc->w_buf_len = 1;
+                       *src_pos_r = src_pos;
+                       return;
+               }
+               break;
+       default:
+               i_unreached();
+       }
+       enc->sub_pos = 0;
+
+       /* We're now at a 3-byte boundary */
+       if (src_pos == src_size) {
+               i_assert(ptr == end);
+               *src_pos_r = src_pos;
+               return;
+       }
+
+       /* Convert the bulk */
+       for (; src_size - src_pos > 2 && &ptr[3] < end;
+            src_pos += 3, ptr += 4) {
                ptr[0] = b64enc[src_c[src_pos] >> 2];
                ptr[1] = b64enc[((src_c[src_pos] & 0x03) << 4) |
                                (src_c[src_pos+1] >> 4)];
@@ -28,29 +179,147 @@ void base64_scheme_encode(const struct base64_scheme *b64,
                ptr[3] = b64enc[src_c[src_pos+2] & 0x3f];
        }
 
-       i_assert(ptr <= start + res_size);
-
+       /* Convert the bytes beyond the last 3-byte boundary and update state
+          for next call */
        switch (src_size - src_pos) {
        case 0:
+               enc->sub_pos = 0;
+               enc->buf = 0;
                break;
        case 1:
-               ptr[0] = b64enc[src_c[src_pos] >> 2];
-               ptr[1] = b64enc[(src_c[src_pos] & 0x03) << 4];
-               ptr[2] = '=';
-               ptr[3] = '=';
+               enc->sub_pos = 1;
+               enc->w_buf[0] = b64enc[src_c[src_pos] >> 2];
+               enc->w_buf_len = 1;
+               enc->buf = (src_c[src_pos] & 0x03) << 4;
+               src_pos++;
                break;
        case 2:
-               ptr[0] = b64enc[src_c[src_pos] >> 2];
-               ptr[1] = b64enc[((src_c[src_pos] & 0x03) << 4) |
-                               (src_c[src_pos+1] >> 4)];
-               ptr[2] = b64enc[((src_c[src_pos+1] & 0x0f) << 2)];
-               ptr[3] = '=';
+               enc->sub_pos = 2;
+               enc->w_buf[0] = b64enc[src_c[src_pos] >> 2];
+               enc->w_buf[1] = b64enc[((src_c[src_pos] & 0x03) << 4) |
+                                      (src_c[src_pos+1] >> 4)];
+               enc->w_buf_len = 2;
+               enc->buf = (src_c[src_pos+1] & 0x0f) << 2;
+               src_pos += 2;
+               res_size = end - ptr;
+               break;
+       default:
+               /* hit the end of the destination buffer */
+               enc->sub_pos = 0;
+               enc->w_buf[0] = b64enc[src_c[src_pos] >> 2];
+               enc->w_buf[1] = b64enc[((src_c[src_pos] & 0x03) << 4) |
+                                      (src_c[src_pos+1] >> 4)];
+               enc->w_buf[2] = b64enc[((src_c[src_pos+1] & 0x0f) << 2) |
+                                      ((src_c[src_pos+2] & 0xc0) >> 6)];
+               enc->w_buf[3] = b64enc[src_c[src_pos+2] & 0x3f];
+               enc->w_buf_len = 4;
+               enc->buf = 0;
+               src_pos += 3;
+       }
+
+       /* fill the remaining allocated space */
+       i_assert(ptr <= end);
+       res_size = end - ptr;
+       i_assert(enc->w_buf_len <= 4);
+       if (enc->w_buf_len > res_size) {
+               memcpy(ptr, enc->w_buf, res_size);
+               ptr += res_size;
+               enc->w_buf_len -= res_size;
+               memmove(enc->w_buf, enc->w_buf + res_size, enc->w_buf_len);
+       } else if (enc->w_buf_len > 0) {
+               memcpy(ptr, enc->w_buf, enc->w_buf_len);
+               ptr += enc->w_buf_len;
+               enc->w_buf_len = 0;
+       }
+
+       i_assert(ptr == end);
+       *src_pos_r = src_pos;
+}
+
+bool base64_encode_more(struct base64_encoder *enc,
+                       const void *src, size_t src_size, size_t *src_pos_r,
+                       buffer_t *dest)
+{
+       const unsigned char *src_c = src;
+       size_t src_pos, dst_avail;
+
+       i_assert(!enc->finished);
+
+       /* determine how much we can write in destination buffer */
+       dst_avail = buffer_get_avail_size(dest);
+       if (dst_avail == 0) {
+               i_assert(src_pos_r != NULL);
+               *src_pos_r = 0;
+               return FALSE;
+       }
+
+       base64_encode_more_data(enc, src_c, src_size, &src_pos,
+                               dst_avail, dest);
+
+       if (src_pos_r != NULL)
+               *src_pos_r = src_pos;
+       return (src_pos == src_size);
+}
+
+bool base64_encode_finish(struct base64_encoder *enc, buffer_t *dest)
+{
+       const struct base64_scheme *b64 = enc->b64;
+       const char *b64enc = b64->encmap;
+       size_t dst_avail;
+       unsigned char w_buf[7];
+       unsigned int w_buf_len = 0;
+
+       dst_avail = 0;
+       if (dest != NULL)
+               dst_avail = buffer_get_avail_size(dest);
+
+       i_assert(!enc->finished);
+
+       if (enc->w_buf_len > 0) {
+               if (dst_avail == 0)
+                       return FALSE;
+               i_assert(enc->w_buf_len <= 4);
+               memcpy(w_buf, enc->w_buf, enc->w_buf_len);
+               w_buf_len += enc->w_buf_len;
+       }
+
+       switch (enc->sub_pos) {
+       case 0:
+               break;
+       case 1:
+               w_buf[w_buf_len + 0] = b64enc[enc->buf];
+               w_buf[w_buf_len + 1] =  '=';
+               w_buf[w_buf_len + 2] =  '=';
+               w_buf_len += 3;
+               break;
+       case 2:
+               w_buf[w_buf_len + 0] = b64enc[enc->buf];
+               w_buf[w_buf_len + 1] =  '=';
+               w_buf_len += 2;
                break;
        default:
                i_unreached();
        }
+       enc->sub_pos = 0;
+
+       if (w_buf_len == 0) {
+               enc->finished = TRUE;
+               return TRUE;
+       }
+
+       i_assert(dest != NULL);
+       if (dst_avail < w_buf_len)
+               return FALSE;
+
+       buffer_append(dest, w_buf, w_buf_len);
+       enc->finished = TRUE;
+       return TRUE;
 }
 
+/*
+ * Generic Base64 API
+ */
+
 #define IS_EMPTY(c) \
        ((c) == '\n' || (c) == '\r' || (c) == ' ' || (c) == '\t')
 
index 6c4e5a1c7394ffaf87a458d537ad5ccb2d57c3dd..142de87584ef0456ab644a77e87213a44d032a5a 100644 (file)
@@ -17,6 +17,72 @@ struct base64_scheme {
        const unsigned char decmap[256];
 };
 
+/*
+ * Low-level Base64 encoder
+ */
+
+struct base64_encoder {
+       const struct base64_scheme *b64;
+
+       /* state */
+       unsigned int sub_pos;
+       unsigned char buf;
+
+       unsigned char w_buf[4];
+       unsigned int w_buf_len;
+
+       bool finished:1;
+};
+
+/* Returns TRUE when base64_encode_finish() was called on this encoder. */
+static inline bool
+base64_encode_is_finished(struct base64_encoder *enc)
+{
+       return enc->finished;
+}
+
+/* Initialize the Base64 encoder. The b64 parameter is the definition of the
+   particular Base64 encoding scheme that is used.
+ */
+static inline void
+base64_encode_init(struct base64_encoder *enc,
+                  const struct base64_scheme *b64)
+{
+       i_zero(enc);
+       enc->b64 = b64;
+}
+
+/* Reset the Base64 encoder to its initial state. */
+static inline void
+base64_encode_reset(struct base64_encoder *enc)
+{
+       const struct base64_scheme *b64 = enc->b64;
+
+       base64_encode_init(enc, b64);
+}
+
+/* Translate the size of the next input to the size of the output once encoded.
+   This yields the amount of data appended to the dest buffer by
+   base64_encode_more() with the indicated src_size. */
+size_t base64_encode_get_size(struct base64_encoder *enc, size_t src_size);
+
+/* Translates binary data into some form of Base64. The src must not point to
+   dest buffer. Returns TRUE when all the provided data is encoded. Returns
+   FALSE when the space in the provided buffer is insufficient. The return value
+   may be ignored. If src_pos_r is non-NULL, it's updated to first
+   non-translated character in src.
+ */
+bool ATTR_NOWARN_UNUSED_RESULT
+base64_encode_more(struct base64_encoder *enc, const void *src, size_t src_size,
+                  size_t *src_pos_r, buffer_t *dest) ATTR_NULL(4);
+
+/* Finishes Base64 encoding. Returns TRUE when all the provided data is encoded.
+   Returns FALSE when the space in the provided buffer is insufficient. The
+   return value may be ignored.
+ */
+bool ATTR_NOWARN_UNUSED_RESULT
+base64_encode_finish(struct base64_encoder *enc, buffer_t *dest) ATTR_NULL(2);
+
 /*
  * Generic Base64 API
  */
@@ -27,9 +93,16 @@ struct base64_scheme {
    The b64 parameter is the definition of the particular Base 64 encoding scheme
    that is used. See below for specific functions.
  */
-void base64_scheme_encode(const struct base64_scheme *b64,
-                         const void *src, size_t src_size,
-                         buffer_t *dest);
+static inline void
+base64_scheme_encode(const struct base64_scheme *b64,
+                    const void *src, size_t src_size, buffer_t *dest)
+{
+       struct base64_encoder enc;
+
+       base64_encode_init(&enc, b64);
+       base64_encode_more(&enc, src, src_size, NULL, dest);
+       base64_encode_finish(&enc, dest);
+}
 
 /* Translates some variant of Base64 data into binary and appends it to dest
    buffer. dest may point to same buffer as src. Returns 1 if all ok, 0 if end
index e1392047b9d71281eabbf2d9b561e619225c3320..d4242fc28b04a0b918839b91df234fb58922382f 100644 (file)
@@ -254,6 +254,103 @@ static void test_base64url_random(void)
        test_end();
 }
 
+struct test_base64_encode_lowlevel {
+       const struct base64_scheme *scheme;
+       const char *input;
+       const char *output;
+};
+
+static const struct test_base64_encode_lowlevel
+tests_base64_encode_lowlevel[] = {
+       {
+               .scheme = &base64_scheme,
+               .input = "hello world",
+               .output = "aGVsbG8gd29ybGQ=",
+       },
+       {
+               .scheme = &base64url_scheme,
+               .input = "hello world",
+               .output = "aGVsbG8gd29ybGQ=",
+       },
+       {
+               .scheme = &base64_scheme,
+               .input = "foo barits",
+               .output = "Zm9vIGJhcml0cw==",
+       },
+       {
+               .scheme = &base64url_scheme,
+               .input = "foo barits",
+               .output = "Zm9vIGJhcml0cw==",
+       },
+       {
+               .scheme = &base64_scheme,
+               .input = "just niin",
+               .output = "anVzdCBuaWlu",
+       },
+       {
+               .scheme = &base64url_scheme,
+               .input = "just niin",
+               .output = "anVzdCBuaWlu",
+       },
+       {
+               .scheme = &base64_scheme,
+               .input =
+                       "\xe7\x8c\xbf\xe3\x82\x82\xe6\x9c\xa8\xe3\x81\x8b"
+                       "\xe3\x82\x89\xe8\x90\xbd\xe3\x81\xa1\xe3\x82\x8b",
+               .output = "54y/44KC5pyo44GL44KJ6JC944Gh44KL",
+       },
+       {
+               .scheme = &base64url_scheme,
+               .input =
+                       "\xe7\x8c\xbf\xe3\x82\x82\xe6\x9c\xa8\xe3\x81\x8b"
+                       "\xe3\x82\x89\xe8\x90\xbd\xe3\x81\xa1\xe3\x82\x8b",
+               .output = "54y_44KC5pyo44GL44KJ6JC944Gh44KL",
+       },
+       {
+               .scheme = &base64_scheme,
+               .input =
+                       "\xe8\xa7\x92\xe3\x82\x92\xe7\x9f\xaf\xe3\x82\x81\xe3"
+                       "\x81\xa6\xe7\x89\x9b\xe3\x82\x92\xe6\xae\xba\xe3\x81"
+                       "\x99",
+               .output = "6KeS44KS55+v44KB44Gm54mb44KS5q6644GZ",
+       },
+       {
+               .scheme = &base64url_scheme,
+               .input =
+                       "\xe8\xa7\x92\xe3\x82\x92\xe7\x9f\xaf\xe3\x82\x81\xe3"
+                       "\x81\xa6\xe7\x89\x9b\xe3\x82\x92\xe6\xae\xba\xe3\x81"
+                       "\x99",
+               .output = "6KeS44KS55-v44KB44Gm54mb44KS5q6644GZ",
+       },
+};
+
+static void test_base64_encode_lowlevel(void)
+{
+       string_t *str;
+       unsigned int i;
+
+       test_begin("base64 encode low-level");
+       str = t_str_new(256);
+       for (i = 0; i < N_ELEMENTS(tests_base64_encode_lowlevel); i++) {
+               const struct test_base64_encode_lowlevel *test =
+                       &tests_base64_encode_lowlevel[i];
+               struct base64_encoder enc;
+
+               str_truncate(str, 0);
+
+               base64_encode_init(&enc, test->scheme);
+               base64_encode_more(&enc, test->input, strlen(test->input),
+                                  NULL, str);
+               base64_encode_finish(&enc, str);
+
+               test_assert_idx(strcmp(test->output, str_c(str)) == 0, i);
+               test_assert_idx(
+                       str_len(str) == MAX_BASE64_ENCODED_SIZE(
+                               strlen(test->input)), i);
+       }
+       test_end();
+}
+
 void test_base64(void)
 {
        test_base64_encode();
@@ -262,4 +359,5 @@ void test_base64(void)
        test_base64url_encode();
        test_base64url_decode();
        test_base64url_random();
+       test_base64_encode_lowlevel();
 }