From: Stephan Bosch Date: Sat, 30 Mar 2019 18:08:10 +0000 (+0100) Subject: lib: base64 - Properly implemenent incremental decoding. X-Git-Tag: 2.3.9~278 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=747a828a08f75e5958a4d841d6342593baefdb08;p=thirdparty%2Fdovecot%2Fcore.git lib: base64 - Properly implemenent incremental decoding. --- diff --git a/src/lib/base64.c b/src/lib/base64.c index 6537feded7..fe0c62ee98 100644 --- a/src/lib/base64.c +++ b/src/lib/base64.c @@ -317,12 +317,212 @@ bool base64_encode_finish(struct base64_encoder *enc, buffer_t *dest) } /* - * Generic Base64 API + * Low-level Base64 decoder */ #define IS_EMPTY(c) \ ((c) == '\n' || (c) == '\r' || (c) == ' ' || (c) == '\t') +static inline void +base64_skip_whitespace(struct base64_decoder *dec ATTR_UNUSED, + const unsigned char *src_c, + size_t src_size, size_t *src_pos) +{ + /* skip any whitespace in the padding */ + while ((*src_pos) < src_size && IS_EMPTY(src_c[(*src_pos)])) + (*src_pos)++; +} + +int base64_decode_more(struct base64_decoder *dec, + const void *src, size_t src_size, size_t *src_pos_r, + buffer_t *dest) +{ + const struct base64_scheme *b64 = dec->b64; + const unsigned char *src_c = src; + bool expect_boundary = HAS_ALL_BITS(dec->flags, + BASE64_DECODE_FLAG_EXPECT_BOUNDARY); + size_t src_pos, dst_avail; + int ret = 1; + + i_assert(!dec->finished); + i_assert(!dec->failed); + + if (dec->seen_boundary) { + /* already seen the boundary/end of base64 data */ + if (src_pos_r != NULL) + *src_pos_r = 0; + dec->failed = TRUE; + return -1; + } + + src_pos = 0; + if (dec->seen_end) { + /* skip any whitespace at the end */ + base64_skip_whitespace(dec, src_c, src_size, &src_pos); + if (src_pos_r != NULL) + *src_pos_r = src_pos; + if (src_pos < src_size) { + if (!expect_boundary) { + dec->failed = TRUE; + return -1; + } + dec->seen_boundary = TRUE; + return 0; + } + /* more whitespace may follow */ + return 1; + } + + if (src_size == 0) { + if (src_pos_r != NULL) + *src_pos_r = 0; + return 1; + } + + dst_avail = buffer_get_avail_size(dest); + if (dst_avail == 0) { + i_assert(src_pos_r != NULL); + *src_pos_r = 0; + return 1; + } + + for (; !dec->seen_padding && src_pos < src_size; src_pos++) { + unsigned char in = src_c[src_pos]; + unsigned char dm = b64->decmap[in]; + + if (dm == 0xff) { + if (unlikely(!IS_EMPTY(in))) { + ret = -1; + break; + } + continue; + } + + switch (dec->sub_pos) { + case 0: + dec->buf = dm; + dec->sub_pos++; + break; + case 1: + dec->buf = (dec->buf << 2) | (dm >> 4); + buffer_append_c(dest, dec->buf); + dst_avail--; + dec->buf = dm; + dec->sub_pos++; + break; + case 2: + dec->buf = (dec->buf << 4) | (dm >> 2); + buffer_append_c(dest, dec->buf); + dst_avail--; + dec->buf = dm; + dec->sub_pos++; + break; + case 3: + dec->buf = ((dec->buf << 6) & 0xc0) | dm; + buffer_append_c(dest, dec->buf); + dst_avail--; + dec->buf = 0; + dec->sub_pos = 0; + break; + default: + i_unreached(); + } + if (dst_avail == 0) { + i_assert(src_pos_r != NULL); + *src_pos_r = src_pos + 1; + return 1; + } + } + + if (dec->seen_padding) { + /* skip any whitespace in or after the padding */ + base64_skip_whitespace(dec, src_c, src_size, &src_pos); + if (src_pos == src_size) { + if (src_pos_r != NULL) + *src_pos_r = src_pos; + return 1; + } + } + + if (dec->seen_padding || ret < 0) { + /* try to parse the end (padding) of the base64 input */ + i_assert(src_pos < src_size); + + switch (dec->sub_pos) { + case 0: + case 1: + /* no padding expected */ + ret = -1; + break; + case 2: + if (unlikely(src_c[src_pos] != '=')) { + /* invalid character */ + ret = -1; + break; + } + dec->seen_padding = TRUE; + dec->sub_pos++; + src_pos++; + if (src_pos == src_size) { + ret = 1; + break; + } + /* skip any whitespace in the padding */ + base64_skip_whitespace(dec, src_c, src_size, + &src_pos); + if (src_pos == src_size) { + ret = 1; + break; + } + /* fall through */ + case 3: + if (unlikely(src_c[src_pos] != '=')) { + /* invalid character */ + ret = -1; + break; + } + dec->seen_padding = TRUE; + dec->seen_end = TRUE; + dec->sub_pos = 0; + src_pos++; + /* skip any trailing whitespace */ + base64_skip_whitespace(dec, src_c, src_size, + &src_pos); + if (src_pos < src_size) { + ret = -1; + break; + } + /* more whitespace may follow */ + ret = 1; + break; + } + } + + if (ret < 0) { + if (!expect_boundary) { + dec->failed = TRUE; + } else { + dec->seen_boundary = TRUE; + ret = 0; + } + } + if (src_pos_r != NULL) + *src_pos_r = src_pos; + return ret; +} + +int base64_decode_finish(struct base64_decoder *dec) +{ + i_assert(!dec->finished); + dec->finished = TRUE; + + return (!dec->failed && dec->sub_pos == 0 ? 0 : -1); +} + +/* + * Generic Base64 API + */ + int base64_scheme_decode(const struct base64_scheme *b64, const void *src, size_t src_size, size_t *src_pos_r, buffer_t *dest) diff --git a/src/lib/base64.h b/src/lib/base64.h index 142de87584..ed455f6191 100644 --- a/src/lib/base64.h +++ b/src/lib/base64.h @@ -83,6 +83,82 @@ base64_encode_more(struct base64_encoder *enc, const void *src, size_t src_size, bool ATTR_NOWARN_UNUSED_RESULT base64_encode_finish(struct base64_encoder *enc, buffer_t *dest) ATTR_NULL(2); +/* + * Low-level Base64 decoder + */ + +enum base64_decode_flags { + /* Decode input until a boundary is reached. This boundary is a + non-Base64 input sequence that would normally trigger a decode error; + e.g., Base64 data followed by a ':'. With this flag, it is possible + to decode such a Base64 prefix. The base64_decode_finish() function + will still check that the Base64 data ends properly (padding). */ + BASE64_DECODE_FLAG_EXPECT_BOUNDARY = BIT(0), +}; + +struct base64_decoder { + const struct base64_scheme *b64; + enum base64_decode_flags flags; + + /* state */ + unsigned int sub_pos; + unsigned char buf; + + bool seen_padding:1; + bool seen_end:1; + bool seen_boundary:1; + bool finished:1; + bool failed:1; +}; + +/* Returns TRUE when base64_decode_finish() was called on this decoder. */ +static inline bool +base64_decode_is_finished(struct base64_decoder *dec) +{ + return dec->finished; +} + +/* Initialize the Base64 decoder. The b64 parameter is the definition of the + particular Base64 encoding scheme that is expected. + */ +static inline void +base64_decode_init(struct base64_decoder *dec, + const struct base64_scheme *b64, + enum base64_decode_flags flags) +{ + i_zero(dec); + dec->b64 = b64; + dec->flags = flags; +} + +/* Reset the Base64 decoder to its initial state. */ +static inline void +base64_decode_reset(struct base64_decoder *dec) +{ + const struct base64_scheme *b64 = dec->b64; + enum base64_decode_flags flags = dec->flags; + + base64_decode_init(dec, b64, flags); +} + +/* Translates some form of Base64 data into binary and appends it to dest + buffer. dest may point to same buffer as src. Returns 1 if all ok, 0 if end + of base64 data found, -1 if data is invalid. + + Any CR, LF characters are ignored, as well as whitespace at beginning or end + of line. + + If src_pos is non-NULL, it's updated to first non-translated character in + src. + */ +int base64_decode_more(struct base64_decoder *dec, + const void *src, size_t src_size, size_t *src_pos_r, + buffer_t *dest) ATTR_NULL(4); +/* Finishes Base64 decoding. This function checks whether the encoded data ends + in the proper padding. Returns 0 if all ok, and -1 if data is invalid. + */ +int base64_decode_finish(struct base64_decoder *dec); + /* * Generic Base64 API */ diff --git a/src/lib/test-base64.c b/src/lib/test-base64.c index d4242fc28b..38873816c2 100644 --- a/src/lib/test-base64.c +++ b/src/lib/test-base64.c @@ -351,6 +351,410 @@ static void test_base64_encode_lowlevel(void) test_end(); } +struct test_base64_decode_lowlevel { + const struct base64_scheme *scheme; + enum base64_decode_flags flags; + + const char *input; + const char *output; + int ret; + unsigned int src_pos; +}; + +static const struct test_base64_decode_lowlevel +tests_base64_decode_lowlevel[] = { + { + .scheme = &base64_scheme, + .input = "\taGVsbG8gd29ybGQ=", + .output = "hello world", + .ret = 0, + .src_pos = UINT_MAX, + }, + { + .scheme = &base64url_scheme, + .input = "\taGVsbG8gd29ybGQ=", + .output = "hello world", + .ret = 0, + .src_pos = UINT_MAX, + }, + { + .scheme = &base64_scheme, + .input = "\taGVsbG8gd29ybGQ=\t", + .output = "hello world", + .ret = 0, + .src_pos = UINT_MAX, + }, + { + .scheme = &base64_scheme, + .input = "aGVsbG8gd29ybGQ=:frop", + .output = "hello world", + .ret = 0, + .src_pos = 16, + .flags = BASE64_DECODE_FLAG_EXPECT_BOUNDARY, + }, + { + .scheme = &base64_scheme, + .input = "\taGVsbG8gd29ybGQ=\t:frop", + .output = "hello world", + .ret = 0, + .src_pos = 18, + .flags = BASE64_DECODE_FLAG_EXPECT_BOUNDARY, + }, + { + .scheme = &base64_scheme, + .input = "\nZm9v\n \tIGJh \t\ncml0cw==", + .output = "foo barits", + .ret = 0, + .src_pos = UINT_MAX, + }, + { + .scheme = &base64url_scheme, + .input = "\nZm9v\n \tIGJh \t\ncml0cw==", + .output = "foo barits", + .ret = 0, + .src_pos = UINT_MAX, + }, + { + .scheme = &base64_scheme, + .input = "\nZm9v\n \tIGJh \t\ncml0cw==\n ", + .output = "foo barits", + .ret = 0, + .src_pos = UINT_MAX, + }, + { + .scheme = &base64_scheme, + .input = "\nZm9v\n \tIGJh \t\ncml0cw= =\n ", + .output = "foo barits", + .ret = 0, + .src_pos = UINT_MAX, + }, + { + .scheme = &base64_scheme, + .input = "\nZm9v\n \tIGJh \t\ncml0cw\n= =\n ", + .output = "foo barits", + .ret = 0, + .src_pos = UINT_MAX, + }, + { + .scheme = &base64_scheme, + .input = " anVzdCBuaWlu \n", + .output = "just niin", + .ret = 0, + .src_pos = UINT_MAX, + }, + { + .scheme = &base64url_scheme, + .input = " anVzdCBuaWlu \n", + .output = "just niin", + .ret = 0, + .src_pos = UINT_MAX, + }, + { + .scheme = &base64_scheme, + .input = "aGVsb", + .output = "hel", + .ret = -1, + .src_pos = 5, + }, + { + .scheme = &base64url_scheme, + .input = "aGVsb", + .output = "hel", + .ret = -1, + .src_pos = 5, + }, + { + .scheme = &base64_scheme, + .input = "aGVsb!!!!!", + .output = "hel", + .ret = -1, + .src_pos = 5, + }, + { + .scheme = &base64url_scheme, + .input = "aGVsb!!!!!", + .output = "hel", + .ret = -1, + .src_pos = 5, + }, + { + .scheme = &base64_scheme, + .input = "aGVs!!!!!", + .output = "hel", + .ret = -1, + .src_pos = 4, + }, + { + .scheme = &base64url_scheme, + .input = "aGVs!!!!!", + .output = "hel", + .ret = -1, + .src_pos = 4, + }, + { + .scheme = &base64_scheme, + .input = + "0JPQvtCy0L7RgNGPzIHRgiwg0YfRgt" + "C+INC60YPRgCDQtNC+0Y/MgdGCLg==", + .output = + "\xd0\x93\xd0\xbe\xd0\xb2\xd0\xbe\xd1\x80\xd1\x8f\xcc" + "\x81\xd1\x82\x2c\x20\xd1\x87\xd1\x82\xd0\xbe\x20\xd0" + "\xba\xd1\x83\xd1\x80\x20\xd0\xb4\xd0\xbe\xd1\x8f\xcc" + "\x81\xd1\x82\x2e", + .ret = 0, + .src_pos = UINT_MAX, + }, + { + .scheme = &base64url_scheme, + .input = + "0JPQvtCy0L7RgNGPzIHRgiwg0YfRgt" + "C-INC60YPRgCDQtNC-0Y_MgdGCLg==", + .output = + "\xd0\x93\xd0\xbe\xd0\xb2\xd0\xbe\xd1\x80\xd1\x8f\xcc" + "\x81\xd1\x82\x2c\x20\xd1\x87\xd1\x82\xd0\xbe\x20\xd0" + "\xba\xd1\x83\xd1\x80\x20\xd0\xb4\xd0\xbe\xd1\x8f\xcc" + "\x81\xd1\x82\x2e", + .ret = 0, + .src_pos = UINT_MAX, + }, +}; + +static void test_base64_decode_lowlevel(void) +{ + string_t *str; + buffer_t buf; + unsigned int i; + + test_begin("base64 decode low-level"); + for (i = 0; i < N_ELEMENTS(tests_base64_decode_lowlevel); i++) { + const struct test_base64_decode_lowlevel *test = + &tests_base64_decode_lowlevel[i]; + struct base64_decoder dec; + size_t src_pos; + int ret; + + /* Some of the base64_decode() callers use fixed size buffers. + Use a fixed size buffer here as well to test that + base64_decode() can't allocate any extra space even + temporarily. */ + size_t max_decoded_size = + MAX_BASE64_DECODED_SIZE(strlen(test->input)); + + buffer_create_from_data(&buf, t_malloc0(max_decoded_size), + max_decoded_size); + str = &buf; + base64_decode_init(&dec, test->scheme, test->flags); + ret = base64_decode_more(&dec, test->input, strlen(test->input), + &src_pos, str); + if (ret >= 0) + ret = base64_decode_finish(&dec); + + test_assert_idx(ret == test->ret, i); + test_assert_idx(strlen(test->output) == str_len(str) && + memcmp(test->output, str_data(str), + str_len(str)) == 0, i); + test_assert_idx(src_pos == test->src_pos || + (test->src_pos == UINT_MAX && + src_pos == strlen(test->input)), i); + if (ret >= 0) { + test_assert_idx( + str_len(str) <= MAX_BASE64_DECODED_SIZE( + strlen(test->input)), i); + } + } + test_end(); +} + +static void +test_base64_random_lowlevel_one_block(const struct base64_scheme *b64, + enum base64_decode_flags dec_flags, + unsigned int test_idx, + const unsigned char *in_buf, + size_t in_buf_size, + buffer_t *buf1, buffer_t *buf2) +{ + struct base64_encoder enc; + struct base64_decoder dec; + int ret; + + buffer_set_used_size(buf1, 0); + buffer_set_used_size(buf2, 0); + + base64_encode_init(&enc, b64); + base64_encode_more(&enc, in_buf, in_buf_size, NULL, buf1); + base64_encode_finish(&enc, buf1); + + base64_decode_init(&dec, b64, dec_flags); + ret = base64_decode_more(&dec, buf1->data, buf1->used, + NULL, buf2); + if (ret >= 0) + ret = base64_decode_finish(&dec); + + test_assert_idx(ret >= 0, test_idx); + test_assert_idx(buf2->used == in_buf_size && + memcmp(in_buf, buf2->data, in_buf_size) == 0, test_idx); +} + +static void +test_base64_random_lowlevel_stream(const struct base64_scheme *b64, + enum base64_decode_flags dec_flags, + unsigned int test_idx, + const unsigned char *in_buf, + size_t in_buf_size, + buffer_t *buf1, buffer_t *buf2, + size_t chunk_size) +{ + struct base64_encoder enc; + struct base64_decoder dec; + const unsigned char *buf_p, *buf_begin, *buf_end; + int ret; + size_t out_space; + void *out_data; + buffer_t out; + + buf_begin = in_buf; + buf_end = buf_begin + in_buf_size; + + buffer_set_used_size(buf1, 0); + buffer_set_used_size(buf2, 0); + + base64_encode_init(&enc, b64); + out_space = 0; + for (buf_p = buf_begin; buf_p < buf_end; ) { + size_t buf_ch, out_ch; + size_t left = (buf_end - buf_p); + size_t used = buf1->used; + size_t src_pos; + + if (chunk_size == 0) { + buf_ch = i_rand_limit(32); + out_ch = i_rand_limit(32); + } else { + buf_ch = chunk_size; + out_ch = chunk_size; + } + + out_space += out_ch; + out_data = buffer_append_space_unsafe(buf1, out_space); + buffer_create_from_data(&out, out_data, out_space); + + if (buf_ch > left) + buf_ch = left; + + base64_encode_more(&enc, buf_p, buf_ch, &src_pos, &out); + buf_p += src_pos; + i_assert(out_space >= out.used); + out_space -= out.used; + buffer_set_used_size(buf1, used + out.used); + } + base64_encode_finish(&enc, buf1); + + buf_begin = buf1->data; + buf_end = buf_begin + buf1->used; + + base64_decode_init(&dec, b64, dec_flags); + ret = 1; + out_space = 0; + for (buf_p = buf_begin; buf_p < buf_end; ) { + size_t buf_ch, out_ch; + size_t left = (buf_end - buf_p); + size_t used = buf2->used; + size_t src_pos; + + if (chunk_size == 0) { + buf_ch = i_rand_limit(32); + out_ch = i_rand_limit(32); + } else { + buf_ch = chunk_size; + out_ch = chunk_size; + } + + out_space += out_ch; + out_data = buffer_append_space_unsafe(buf2, out_space); + buffer_create_from_data(&out, out_data, out_space); + + if (buf_ch > left) + buf_ch = left; + ret = base64_decode_more(&dec, buf_p, buf_ch, + &src_pos, &out); + test_assert_idx(ret >= 0, test_idx); + if (ret < 0) { + break; + } + buf_p += src_pos; + i_assert(out_space >= out.used); + out_space -= out.used; + buffer_set_used_size(buf2, used + out.used); + } + test_assert_idx(ret >= 0, test_idx); + + if (ret > 0) { + ret = base64_decode_finish(&dec); + test_assert_idx(ret == 0, test_idx); + test_assert_idx(buf2->used == in_buf_size && + memcmp(in_buf, buf2->data, in_buf_size) == 0, + test_idx); + } +} + +static void +test_base64_random_lowlevel_case(const struct base64_scheme *b64, + enum base64_decode_flags dec_flags) +{ + unsigned char in_buf[512]; + size_t in_buf_size; + buffer_t *buf1, *buf2; + unsigned int i, j; + + buf1 = t_buffer_create(MAX_BASE64_ENCODED_SIZE(sizeof(in_buf))); + buf2 = t_buffer_create(sizeof(in_buf)); + + /* one block */ + for (i = 0; i < 1000; i++) { + in_buf_size = i_rand_limit(sizeof(in_buf)); + for (j = 0; j < in_buf_size; j++) + in_buf[j] = i_rand(); + + test_base64_random_lowlevel_one_block(b64, dec_flags, i, + in_buf, in_buf_size, + buf1, buf2); + } + + /* streaming; single-byte trickle */ + for (i = 0; i < 1000; i++) { + in_buf_size = i_rand_limit(sizeof(in_buf)); + for (j = 0; j < in_buf_size; j++) + in_buf[j] = i_rand(); + + test_base64_random_lowlevel_stream(b64, dec_flags, i, + in_buf, in_buf_size, + buf1, buf2, 1); + } + + /* streaming; random chunks */ + for (i = 0; i < 1000; i++) { + in_buf_size = i_rand_limit(sizeof(in_buf)); + for (j = 0; j < in_buf_size; j++) + in_buf[j] = i_rand(); + + test_base64_random_lowlevel_stream(b64, dec_flags, i, + in_buf, in_buf_size, + buf1, buf2, 0); + } +} + +static void +test_base64_random_lowlevel(void) +{ + test_begin("base64 encode/decode low-level with random input"); + test_base64_random_lowlevel_case(&base64_scheme, 0); + test_base64_random_lowlevel_case(&base64url_scheme, 0); + test_base64_random_lowlevel_case(&base64_scheme, + BASE64_DECODE_FLAG_EXPECT_BOUNDARY); + test_base64_random_lowlevel_case(&base64url_scheme, + BASE64_DECODE_FLAG_EXPECT_BOUNDARY); + test_end(); +} void test_base64(void) { test_base64_encode(); @@ -360,4 +764,6 @@ void test_base64(void) test_base64url_decode(); test_base64url_random(); test_base64_encode_lowlevel(); + test_base64_decode_lowlevel(); + test_base64_random_lowlevel(); }