From: Aki Tuomi Date: Tue, 7 Sep 2021 11:37:34 +0000 (+0300) Subject: lib: Add i_memspn() and i_memcspn() X-Git-Tag: 2.3.17~44 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=0009fd1edf213c01ff8c46e70a137c6f225993eb;p=thirdparty%2Fdovecot%2Fcore.git lib: Add i_memspn() and i_memcspn() Binary data safe variants of strspn() and strcspn() --- diff --git a/src/lib/strfuncs.c b/src/lib/strfuncs.c index 9c5d80eef0..56e45765ba 100644 --- a/src/lib/strfuncs.c +++ b/src/lib/strfuncs.c @@ -633,6 +633,46 @@ str_match(const char *p1, const char *p2) return i; } +size_t i_memspn(const void *data, size_t data_len, + const void *accept, size_t accept_len) +{ + const unsigned char *start = data; + i_assert(data != NULL || data_len == 0); + i_assert(accept != NULL || accept_len == 0); + size_t pos = 0; + /* nothing to accept */ + if (accept_len == 0) + return 0; + for (; pos < data_len; pos++) { + if (memchr(accept, start[pos], accept_len) == NULL) + break; + } + return pos; +} + +size_t i_memcspn(const void *data, size_t data_len, + const void *reject, size_t reject_len) +{ + const unsigned char *start = data; + const unsigned char *r = reject; + const unsigned char *ptr = CONST_PTR_OFFSET(data, data_len); + i_assert(data != NULL || data_len == 0); + i_assert(reject != NULL || reject_len == 0); + /* nothing to reject */ + if (reject_len == 0 || data_len == 0) + return data_len; + /* Doing repeated memchr's over the data is faster than + going over it once byte by byte, as long as reject + is reasonably short. */ + for (size_t i = 0; i < reject_len; i++) { + const unsigned char *kand = + memchr(start, r[i], data_len); + if (kand != NULL && kand < ptr) + ptr = kand; + } + return ptr - start; +} + static char ** split_str_slow(pool_t pool, const char *data, const char *separators, bool spaces) { diff --git a/src/lib/strfuncs.h b/src/lib/strfuncs.h index 0fef770ac8..1ddb82f8b5 100644 --- a/src/lib/strfuncs.h +++ b/src/lib/strfuncs.h @@ -97,6 +97,21 @@ static inline ATTR_PURE bool str_begins(const char *haystack, const char *needle # define str_begins(h, n) (__builtin_constant_p(n) ? strncmp((h), (n), strlen(n))==0 : (str_begins)((h), (n))) #endif +/* Get length of a prefix segment. + + Calculates the length (in bytes) of the initial segment of s which consists + entirely of bytes in accept. +*/ +size_t i_memspn(const void *data, size_t data_len, + const void *accept, size_t accept_len); +/* Get length of a prefix segment. + + Calculates the length of the initial segment of s which consists entirely of + bytes not in reject. +*/ +size_t i_memcspn(const void *data, size_t data_len, + const void *reject, size_t reject_len); + static inline char *i_strchr_to_next(const char *str, char chr) { char *tmp = (char *)strchr(str, chr); diff --git a/src/lib/test-strfuncs.c b/src/lib/test-strfuncs.c index b3c84fee7c..f9154709f9 100644 --- a/src/lib/test-strfuncs.c +++ b/src/lib/test-strfuncs.c @@ -495,6 +495,109 @@ test_str_match(void) test_end(); } +static void test_memspn(void) +{ +#undef TEST_CASE +/* we substract 1 to ensure we don't include the final \0 byte */ +#define TEST_CASE(a, b, r) { \ + .input = (const unsigned char*)((a)), .input_len = sizeof((a))-1, \ + .accept = (const unsigned char*)((b)), .accept_len = sizeof((b))-1, \ + .result = r, \ +} + + static struct { + const unsigned char *input; + size_t input_len; + const unsigned char *accept; + size_t accept_len; + size_t result; + } tests[] = { + TEST_CASE("", "", 0), + TEST_CASE("", "123456789", 0), + TEST_CASE("123456789", "", 0), + TEST_CASE("hello, world", "helo", 5), + TEST_CASE("hello, uuuuu", "helo", 5), + TEST_CASE("\0\0\0\0\0hello", "\0", 5), + TEST_CASE("\r\r\r\r", "\r", 4), + TEST_CASE("aaa", "a", 3), + TEST_CASE("bbb", "a", 0), + /* null safety test */ + { + .input = NULL, .accept = NULL, + .input_len = 0, .accept_len = 0, + .result = 0, + } + }; + + test_begin("i_memspn"); + + for (unsigned int i = 0; i < N_ELEMENTS(tests); i++) { + size_t a = i_memspn(tests[i].input, tests[i].input_len, + tests[i].accept, tests[i].accept_len); + test_assert_ucmp_idx(a, ==, tests[i].result, i); + if (tests[i].input == NULL) + continue; + a = i_memspn(tests[i].input, strlen((const char*)tests[i].input), + tests[i].accept, strlen((const char*)tests[i].accept)); + size_t b = strspn((const char*)tests[i].input, + (const char*)tests[i].accept); + test_assert_ucmp_idx(a, ==, b, i); + } + + test_end(); +} + +static void test_memcspn(void) +{ +#undef TEST_CASE +/* we substract 1 to ensure we don't include the final \0 byte */ +#define TEST_CASE(a, b, r) { \ + .input = (const unsigned char*)((a)), .input_len = sizeof((a))-1, \ + .reject = (const unsigned char*)((b)), .reject_len = sizeof((b))-1, \ + .result = r, \ +} + + static struct { + const unsigned char *input; + size_t input_len; + const unsigned char *reject; + size_t reject_len; + size_t result; + } tests[] = { + TEST_CASE("", "", 0), + TEST_CASE("hello", "", 5), + TEST_CASE("uuuuu, hello", "helo", 7), + TEST_CASE("\0\0\0\0\0\0hello", "u", 11), + TEST_CASE("this\0is\0test", "\0", 4), + TEST_CASE("hello, world\r", "\r", 12), + TEST_CASE("aaa", "a", 0), + TEST_CASE("bbb", "a", 3), + /* null safety test */ + { + .input = NULL, .reject = NULL, + .input_len = 0, .reject_len = 0, + .result = 0, + } + }; + + test_begin("i_memcspn"); + + for (unsigned int i = 0; i < N_ELEMENTS(tests); i++) { + size_t a = i_memcspn(tests[i].input, tests[i].input_len, + tests[i].reject, tests[i].reject_len); + test_assert_ucmp_idx(a, ==, tests[i].result, i); + if (tests[i].input == NULL) + continue; + a = i_memcspn(tests[i].input, strlen((const char*)tests[i].input), + tests[i].reject, strlen((const char*)tests[i].reject)); + size_t b = strcspn((const char*)tests[i].input, + (const char*)tests[i].reject); + test_assert_ucmp_idx(a, ==, b, i); + } + + test_end(); +} + void test_strfuncs(void) { test_p_strdup(); @@ -515,6 +618,8 @@ void test_strfuncs(void) test_str_equals_timing_almost_safe(); test_dec2str_buf(); test_str_match(); + test_memspn(); + test_memcspn(); } enum fatal_test_state fatal_strfuncs(unsigned int stage)