From: Stephan Bosch Date: Sat, 16 Mar 2019 20:19:49 +0000 (+0100) Subject: lib: base64 - Properly implemenent incremental encoding. X-Git-Tag: 2.3.9~279 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=2a739f0af19a08efa27c1384847f796cb8bfd462;p=thirdparty%2Fdovecot%2Fcore.git lib: base64 - Properly implemenent incremental encoding. --- diff --git a/src/lib/base64.c b/src/lib/base64.c index 3e7716170a..6537feded7 100644 --- a/src/lib/base64.c +++ b/src/lib/base64.c @@ -5,21 +5,172 @@ #include "buffer.h" /* - * Common Base 64 + * Low-level Base64 encoder */ -void base64_scheme_encode(const struct base64_scheme *b64, - const void *src, size_t src_size, - buffer_t *dest) +static size_t +base64_encode_get_out_size(struct base64_encoder *enc, size_t src_size) { + size_t res_size = enc->w_buf_len; + + i_assert(enc->w_buf_len <= 4); + + if (src_size == 0) + return res_size; + + /* Handle sub-position */ + switch (enc->sub_pos) { + case 0: + break; + case 1: + res_size++; + src_size--; + if (src_size == 0) + return res_size; + /* fall through */ + case 2: + res_size += 2; + src_size--; + break; + default: + i_unreached(); + } + + /* We're now at a 3-byte boundary */ + if (src_size == 0) + return res_size; + + /* Calculate size we can append to the output from remaining input */ + res_size += ((src_size) / 3) * 4; + switch (src_size % 3) { + case 0: + break; + case 1: + res_size += 1; + break; + case 2: + res_size += 2; + break; + } + return res_size; +} + +size_t base64_encode_get_size(struct base64_encoder *enc, size_t src_size) +{ + size_t out_size = base64_encode_get_out_size(enc, src_size); + + if (src_size == 0) { + /* last block */ + switch (enc->sub_pos) { + case 0: + break; + case 1: + out_size += 3; + break; + case 2: + out_size += 2; + break; + default: + i_unreached(); + } + } + + return out_size; +} + +static void +base64_encode_more_data(struct base64_encoder *enc, + const unsigned char *src_c, size_t src_size, + size_t *src_pos_r, size_t dst_avail, buffer_t *dest) +{ + const struct base64_scheme *b64 = enc->b64; const char *b64enc = b64->encmap; - const size_t res_size = MAX_BASE64_ENCODED_SIZE(src_size); - unsigned char *start = buffer_append_space_unsafe(dest, res_size); - unsigned char *ptr = start; - const unsigned char *src_c = src; + size_t res_size; + unsigned char *start, *ptr, *end; size_t src_pos; - for (src_pos = 0; src_size - src_pos > 2; src_pos += 3, ptr += 4) { + /* determine how much we can write in destination buffer */ + if (dst_avail == 0) { + *src_pos_r = 0; + return; + } + + /* pre-allocate space in the destination buffer */ + res_size = base64_encode_get_out_size(enc, src_size); + if (res_size > dst_avail) + res_size = dst_avail; + + start = buffer_append_space_unsafe(dest, res_size); + end = start + res_size; + ptr = start; + + /* write bytes not written in previous call */ + i_assert(enc->w_buf_len <= 4); + if (enc->w_buf_len > res_size) { + memcpy(ptr, enc->w_buf, res_size); + ptr += res_size; + enc->w_buf_len -= res_size; + memmove(enc->w_buf, enc->w_buf + res_size, enc->w_buf_len); + } else if (enc->w_buf_len > 0) { + memcpy(ptr, enc->w_buf, enc->w_buf_len); + ptr += enc->w_buf_len; + enc->w_buf_len = 0; + } + if (ptr == end) { + *src_pos_r = 0; + return; + } + i_assert(enc->w_buf_len == 0); + i_assert(src_size != 0); + + /* Handle sub-position */ + src_pos = 0; + switch (enc->sub_pos) { + case 0: + break; + case 1: + i_assert(ptr < end); + ptr[0] = b64enc[enc->buf | (src_c[src_pos] >> 4)]; + ptr++; + enc->buf = (src_c[src_pos] & 0x0f) << 2; + src_pos++; + if (src_pos == src_size || ptr == end) { + enc->sub_pos = 2; + *src_pos_r = src_pos; + return; + } + /* fall through */ + case 2: + ptr[0] = b64enc[enc->buf | ((src_c[src_pos] & 0xc0) >> 6)]; + enc->w_buf[0] = b64enc[src_c[src_pos] & 0x3f]; + ptr++; + src_pos++; + if (ptr < end) { + ptr[0] = enc->w_buf[0]; + ptr++; + enc->w_buf_len = 0; + } else { + enc->sub_pos = 0; + enc->w_buf_len = 1; + *src_pos_r = src_pos; + return; + } + break; + default: + i_unreached(); + } + enc->sub_pos = 0; + + /* We're now at a 3-byte boundary */ + if (src_pos == src_size) { + i_assert(ptr == end); + *src_pos_r = src_pos; + return; + } + + /* Convert the bulk */ + for (; src_size - src_pos > 2 && &ptr[3] < end; + src_pos += 3, ptr += 4) { ptr[0] = b64enc[src_c[src_pos] >> 2]; ptr[1] = b64enc[((src_c[src_pos] & 0x03) << 4) | (src_c[src_pos+1] >> 4)]; @@ -28,29 +179,147 @@ void base64_scheme_encode(const struct base64_scheme *b64, ptr[3] = b64enc[src_c[src_pos+2] & 0x3f]; } - i_assert(ptr <= start + res_size); - + /* Convert the bytes beyond the last 3-byte boundary and update state + for next call */ switch (src_size - src_pos) { case 0: + enc->sub_pos = 0; + enc->buf = 0; break; case 1: - ptr[0] = b64enc[src_c[src_pos] >> 2]; - ptr[1] = b64enc[(src_c[src_pos] & 0x03) << 4]; - ptr[2] = '='; - ptr[3] = '='; + enc->sub_pos = 1; + enc->w_buf[0] = b64enc[src_c[src_pos] >> 2]; + enc->w_buf_len = 1; + enc->buf = (src_c[src_pos] & 0x03) << 4; + src_pos++; break; case 2: - ptr[0] = b64enc[src_c[src_pos] >> 2]; - ptr[1] = b64enc[((src_c[src_pos] & 0x03) << 4) | - (src_c[src_pos+1] >> 4)]; - ptr[2] = b64enc[((src_c[src_pos+1] & 0x0f) << 2)]; - ptr[3] = '='; + enc->sub_pos = 2; + enc->w_buf[0] = b64enc[src_c[src_pos] >> 2]; + enc->w_buf[1] = b64enc[((src_c[src_pos] & 0x03) << 4) | + (src_c[src_pos+1] >> 4)]; + enc->w_buf_len = 2; + enc->buf = (src_c[src_pos+1] & 0x0f) << 2; + src_pos += 2; + res_size = end - ptr; + break; + default: + /* hit the end of the destination buffer */ + enc->sub_pos = 0; + enc->w_buf[0] = b64enc[src_c[src_pos] >> 2]; + enc->w_buf[1] = b64enc[((src_c[src_pos] & 0x03) << 4) | + (src_c[src_pos+1] >> 4)]; + enc->w_buf[2] = b64enc[((src_c[src_pos+1] & 0x0f) << 2) | + ((src_c[src_pos+2] & 0xc0) >> 6)]; + enc->w_buf[3] = b64enc[src_c[src_pos+2] & 0x3f]; + enc->w_buf_len = 4; + enc->buf = 0; + src_pos += 3; + } + + /* fill the remaining allocated space */ + i_assert(ptr <= end); + res_size = end - ptr; + i_assert(enc->w_buf_len <= 4); + if (enc->w_buf_len > res_size) { + memcpy(ptr, enc->w_buf, res_size); + ptr += res_size; + enc->w_buf_len -= res_size; + memmove(enc->w_buf, enc->w_buf + res_size, enc->w_buf_len); + } else if (enc->w_buf_len > 0) { + memcpy(ptr, enc->w_buf, enc->w_buf_len); + ptr += enc->w_buf_len; + enc->w_buf_len = 0; + } + + i_assert(ptr == end); + *src_pos_r = src_pos; +} + +bool base64_encode_more(struct base64_encoder *enc, + const void *src, size_t src_size, size_t *src_pos_r, + buffer_t *dest) +{ + const unsigned char *src_c = src; + size_t src_pos, dst_avail; + + i_assert(!enc->finished); + + /* determine how much we can write in destination buffer */ + dst_avail = buffer_get_avail_size(dest); + if (dst_avail == 0) { + i_assert(src_pos_r != NULL); + *src_pos_r = 0; + return FALSE; + } + + base64_encode_more_data(enc, src_c, src_size, &src_pos, + dst_avail, dest); + + if (src_pos_r != NULL) + *src_pos_r = src_pos; + return (src_pos == src_size); +} + +bool base64_encode_finish(struct base64_encoder *enc, buffer_t *dest) +{ + const struct base64_scheme *b64 = enc->b64; + const char *b64enc = b64->encmap; + size_t dst_avail; + unsigned char w_buf[7]; + unsigned int w_buf_len = 0; + + dst_avail = 0; + if (dest != NULL) + dst_avail = buffer_get_avail_size(dest); + + i_assert(!enc->finished); + + if (enc->w_buf_len > 0) { + if (dst_avail == 0) + return FALSE; + i_assert(enc->w_buf_len <= 4); + memcpy(w_buf, enc->w_buf, enc->w_buf_len); + w_buf_len += enc->w_buf_len; + } + + switch (enc->sub_pos) { + case 0: + break; + case 1: + w_buf[w_buf_len + 0] = b64enc[enc->buf]; + w_buf[w_buf_len + 1] = '='; + w_buf[w_buf_len + 2] = '='; + w_buf_len += 3; + break; + case 2: + w_buf[w_buf_len + 0] = b64enc[enc->buf]; + w_buf[w_buf_len + 1] = '='; + w_buf_len += 2; break; default: i_unreached(); } + enc->sub_pos = 0; + + if (w_buf_len == 0) { + enc->finished = TRUE; + return TRUE; + } + + i_assert(dest != NULL); + if (dst_avail < w_buf_len) + return FALSE; + + buffer_append(dest, w_buf, w_buf_len); + enc->finished = TRUE; + return TRUE; } +/* + * Generic Base64 API + */ + #define IS_EMPTY(c) \ ((c) == '\n' || (c) == '\r' || (c) == ' ' || (c) == '\t') diff --git a/src/lib/base64.h b/src/lib/base64.h index 6c4e5a1c73..142de87584 100644 --- a/src/lib/base64.h +++ b/src/lib/base64.h @@ -17,6 +17,72 @@ struct base64_scheme { const unsigned char decmap[256]; }; +/* + * Low-level Base64 encoder + */ + +struct base64_encoder { + const struct base64_scheme *b64; + + /* state */ + unsigned int sub_pos; + unsigned char buf; + + unsigned char w_buf[4]; + unsigned int w_buf_len; + + bool finished:1; +}; + +/* Returns TRUE when base64_encode_finish() was called on this encoder. */ +static inline bool +base64_encode_is_finished(struct base64_encoder *enc) +{ + return enc->finished; +} + +/* Initialize the Base64 encoder. The b64 parameter is the definition of the + particular Base64 encoding scheme that is used. + */ +static inline void +base64_encode_init(struct base64_encoder *enc, + const struct base64_scheme *b64) +{ + i_zero(enc); + enc->b64 = b64; +} + +/* Reset the Base64 encoder to its initial state. */ +static inline void +base64_encode_reset(struct base64_encoder *enc) +{ + const struct base64_scheme *b64 = enc->b64; + + base64_encode_init(enc, b64); +} + +/* Translate the size of the next input to the size of the output once encoded. + This yields the amount of data appended to the dest buffer by + base64_encode_more() with the indicated src_size. */ +size_t base64_encode_get_size(struct base64_encoder *enc, size_t src_size); + +/* Translates binary data into some form of Base64. The src must not point to + dest buffer. Returns TRUE when all the provided data is encoded. Returns + FALSE when the space in the provided buffer is insufficient. The return value + may be ignored. If src_pos_r is non-NULL, it's updated to first + non-translated character in src. + */ +bool ATTR_NOWARN_UNUSED_RESULT +base64_encode_more(struct base64_encoder *enc, const void *src, size_t src_size, + size_t *src_pos_r, buffer_t *dest) ATTR_NULL(4); + +/* Finishes Base64 encoding. Returns TRUE when all the provided data is encoded. + Returns FALSE when the space in the provided buffer is insufficient. The + return value may be ignored. + */ +bool ATTR_NOWARN_UNUSED_RESULT +base64_encode_finish(struct base64_encoder *enc, buffer_t *dest) ATTR_NULL(2); + /* * Generic Base64 API */ @@ -27,9 +93,16 @@ struct base64_scheme { The b64 parameter is the definition of the particular Base 64 encoding scheme that is used. See below for specific functions. */ -void base64_scheme_encode(const struct base64_scheme *b64, - const void *src, size_t src_size, - buffer_t *dest); +static inline void +base64_scheme_encode(const struct base64_scheme *b64, + const void *src, size_t src_size, buffer_t *dest) +{ + struct base64_encoder enc; + + base64_encode_init(&enc, b64); + base64_encode_more(&enc, src, src_size, NULL, dest); + base64_encode_finish(&enc, dest); +} /* Translates some variant of Base64 data into binary and appends it to dest buffer. dest may point to same buffer as src. Returns 1 if all ok, 0 if end diff --git a/src/lib/test-base64.c b/src/lib/test-base64.c index e1392047b9..d4242fc28b 100644 --- a/src/lib/test-base64.c +++ b/src/lib/test-base64.c @@ -254,6 +254,103 @@ static void test_base64url_random(void) test_end(); } +struct test_base64_encode_lowlevel { + const struct base64_scheme *scheme; + const char *input; + const char *output; +}; + +static const struct test_base64_encode_lowlevel +tests_base64_encode_lowlevel[] = { + { + .scheme = &base64_scheme, + .input = "hello world", + .output = "aGVsbG8gd29ybGQ=", + }, + { + .scheme = &base64url_scheme, + .input = "hello world", + .output = "aGVsbG8gd29ybGQ=", + }, + { + .scheme = &base64_scheme, + .input = "foo barits", + .output = "Zm9vIGJhcml0cw==", + }, + { + .scheme = &base64url_scheme, + .input = "foo barits", + .output = "Zm9vIGJhcml0cw==", + }, + { + .scheme = &base64_scheme, + .input = "just niin", + .output = "anVzdCBuaWlu", + }, + { + .scheme = &base64url_scheme, + .input = "just niin", + .output = "anVzdCBuaWlu", + }, + { + .scheme = &base64_scheme, + .input = + "\xe7\x8c\xbf\xe3\x82\x82\xe6\x9c\xa8\xe3\x81\x8b" + "\xe3\x82\x89\xe8\x90\xbd\xe3\x81\xa1\xe3\x82\x8b", + .output = "54y/44KC5pyo44GL44KJ6JC944Gh44KL", + }, + { + .scheme = &base64url_scheme, + .input = + "\xe7\x8c\xbf\xe3\x82\x82\xe6\x9c\xa8\xe3\x81\x8b" + "\xe3\x82\x89\xe8\x90\xbd\xe3\x81\xa1\xe3\x82\x8b", + .output = "54y_44KC5pyo44GL44KJ6JC944Gh44KL", + }, + { + .scheme = &base64_scheme, + .input = + "\xe8\xa7\x92\xe3\x82\x92\xe7\x9f\xaf\xe3\x82\x81\xe3" + "\x81\xa6\xe7\x89\x9b\xe3\x82\x92\xe6\xae\xba\xe3\x81" + "\x99", + .output = "6KeS44KS55+v44KB44Gm54mb44KS5q6644GZ", + }, + { + .scheme = &base64url_scheme, + .input = + "\xe8\xa7\x92\xe3\x82\x92\xe7\x9f\xaf\xe3\x82\x81\xe3" + "\x81\xa6\xe7\x89\x9b\xe3\x82\x92\xe6\xae\xba\xe3\x81" + "\x99", + .output = "6KeS44KS55-v44KB44Gm54mb44KS5q6644GZ", + }, +}; + +static void test_base64_encode_lowlevel(void) +{ + string_t *str; + unsigned int i; + + test_begin("base64 encode low-level"); + str = t_str_new(256); + for (i = 0; i < N_ELEMENTS(tests_base64_encode_lowlevel); i++) { + const struct test_base64_encode_lowlevel *test = + &tests_base64_encode_lowlevel[i]; + struct base64_encoder enc; + + str_truncate(str, 0); + + base64_encode_init(&enc, test->scheme); + base64_encode_more(&enc, test->input, strlen(test->input), + NULL, str); + base64_encode_finish(&enc, str); + + test_assert_idx(strcmp(test->output, str_c(str)) == 0, i); + test_assert_idx( + str_len(str) == MAX_BASE64_ENCODED_SIZE( + strlen(test->input)), i); + } + test_end(); +} + void test_base64(void) { test_base64_encode(); @@ -262,4 +359,5 @@ void test_base64(void) test_base64url_encode(); test_base64url_decode(); test_base64url_random(); + test_base64_encode_lowlevel(); }