From: Aki Tuomi Date: Wed, 26 Jun 2024 19:44:32 +0000 (+0300) Subject: lib: Add punycode decoder X-Git-Tag: 2.4.0~1556 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=eabb4115a76f4ba3beb193f4fdb2f484bdf4da48;p=thirdparty%2Fdovecot%2Fcore.git lib: Add punycode decoder --- diff --git a/src/lib/Makefile.am b/src/lib/Makefile.am index 26158edb15..a87d7b7415 100644 --- a/src/lib/Makefile.am +++ b/src/lib/Makefile.am @@ -161,6 +161,7 @@ liblib_la_SOURCES = \ process-stat.c \ process-title.c \ priorityq.c \ + punycode.c \ randgen.c \ rand.c \ read-full.c \ @@ -321,6 +322,7 @@ headers = \ process-stat.h \ process-title.h \ priorityq.h \ + punycode.h \ randgen.h \ read-full.h \ restrict-access.h \ @@ -445,6 +447,7 @@ test_lib_SOURCES = \ test-primes.c \ test-printf-format-fix.c \ test-priorityq.c \ + test-punycode.c \ test-random.c \ test-seq-range-array.c \ test-seq-set-builder.c \ diff --git a/src/lib/punycode.c b/src/lib/punycode.c new file mode 100644 index 0000000000..b9a802d4d9 --- /dev/null +++ b/src/lib/punycode.c @@ -0,0 +1,150 @@ +/* Copyright (c) 2024 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "array.h" +#include "str.h" +#include "unichar.h" +#include "punycode.h" + +/* Bootstring parameters for Punycode */ + +static const unsigned int base = 36; /* maximum basic code point */ +static const unsigned int tmin = 1; +static const unsigned int tmax = 26; +static const unsigned int skew = 38; +static const unsigned int damp = 700; +static const unsigned int initialBias = 72; +static const unsigned int initialN = 0x80; +static const unsigned int delimiter = u'-'; + +/* + code points digit-values + ------------ ---------------------- + 41..5A (A-Z) = 0 to 25, respectively + 61..7A (a-z) = 0 to 25, respectively + 30..39 (0-9) = 26 to 35, respectively +*/ +static inline unsigned int decode_digit(unsigned char cp) +{ + if (cp >= '0' && cp <= '9') + return cp - u'0' + 26; + else if (cp >= 'A' && cp <= 'Z') + return cp - u'A'; + else if (cp >= 'a' && cp <= 'z') + return cp - u'a'; + else + return base; +} + +/* Bias adaptation function */ + +static unsigned int adapt(unsigned int delta, unsigned int numpoints, bool firsttime) +{ + unsigned int k; + + delta = firsttime ? delta / damp : delta >> 1; + /* delta >> 1 is a faster way of doing delta / 2 */ + delta += delta / numpoints; + + for (k = 0; delta > ((base - tmin) * tmax) / 2; k += base) + delta /= base - tmin; + + return k + (base - tmin + 1) * delta / (delta + skew); +} + +/* Decodes a punycoded string into output, or returns -1 on error. */ +int punycode_decode(const char *input, size_t len, string_t *output) +{ + ARRAY(unichar_t) label; + size_t i = 0; + size_t out = 0; + unsigned int n = initialN, bias = initialBias; + const char *delim = NULL; + const char *end = CONST_PTR_OFFSET(input, len); + const char *ptr = input; + t_array_init(&label, len); + + /* find the rightmost delimiter, if present in string */ + delim = strrchr(ptr, delimiter); + i_assert(delim == NULL || delim < end); + + /* no delimiter found, reset to start of string */ + if (delim == NULL) + delim = input; + i_assert(delim <= end); + + for (ptr = input; ptr < delim; ptr++) { + if ((unsigned char)*ptr >= 0x80) + /* Has non-ascii input, this cannot be punycoded. */ + return -1; + i_assert(out < sizeof(label)); + /* Add basic code points to label */ + unichar_t ch = (unsigned char)*ptr; + array_push_back(&label, &ch); + } + + out = array_count(&label); + + /* Main decoding loop: start from after delimiter */ + if (delim != input) + ptr = delim + 1; + else + ptr = input; + + i_assert(ptr < end); + while (ptr < end) { + unsigned int oldi, w, k, digit, t; + /* Decode a generalized variable-length integer into delta, + which gets added to i. The overflow checking is easier if + we increase i as we go, then subtract off its starting + value at the end to obtain delta. */ + + oldi = i; + w = 1; + k = base; + + while (ptr <= end) { + /* ptr points to next digit to decode */ + digit = decode_digit(*ptr++); + if (digit >= base) + return -1; + if (digit > (UINT_MAX - i) / w) + return -1; + i += digit * w; + t = k <= bias ? tmin : + k >= bias + tmax ? tmax : k - bias; + if (digit < t) + break; + if (w > UINT_MAX / (base - t)) + return -1; + w *= (base - t); + k += base; + } + + bias = adapt(i - oldi, out + 1, oldi == 0); + + /* i was supposed to wrap around from out+1 to 0, incrementing + n each time, so we'll fix that now: */ + + if (i / (out + 1) > UINT_MAX - n) + return -1; + + n += i / (out + 1); + i %= (out + 1); + + if (n < initialN) + return -1; + + /* Insert n at position i of the output: */ + if (i <= out) { + out++; + array_insert(&label, i, &n, 1); + } else + return -1; + + i++; + } + + uni_ucs4_to_utf8(array_front(&label), out, output); + return 0; +} diff --git a/src/lib/punycode.h b/src/lib/punycode.h new file mode 100644 index 0000000000..40adf3b78f --- /dev/null +++ b/src/lib/punycode.h @@ -0,0 +1,8 @@ +#ifndef PUNYCODE_H +#define PUNYCODE_H + +/* Parse input as a punycode-encoded string and append it to + output. Returns 0 on success and -1 on failure. */ +int punycode_decode(const char *input, size_t len, string_t *output); + +#endif diff --git a/src/lib/test-lib.inc b/src/lib/test-lib.inc index 59e7042850..34ec63225b 100644 --- a/src/lib/test-lib.inc +++ b/src/lib/test-lib.inc @@ -88,6 +88,7 @@ TEST(test_primes) TEST(test_printf_format_fix) FATAL(fatal_printf_format_fix) TEST(test_priorityq) +TEST(test_punycode) TEST(test_random) FATAL(fatal_random) TEST(test_seq_range_array) diff --git a/src/lib/test-punycode.c b/src/lib/test-punycode.c new file mode 100644 index 0000000000..485a5cfa42 --- /dev/null +++ b/src/lib/test-punycode.c @@ -0,0 +1,46 @@ +/* Copyright (c) 2024 Dovecot authors, see the included COPYING file */ + +#include "test-lib.h" +#include "punycode.h" +#include "str.h" + +static void test_punycode_decode(void) +{ + const struct test_case { + const char *in; + const char *out; + int ret; + } cases[] = { + /* has ASCII, appends */ + { .in = "gr-zia", .out = "\x67\x72\xc3\xa5", .ret = 0 }, + /* has ASCII, inserts */ + { .in = "bl-yia", .out = "\x62\xc3\xa5\x6c", .ret = 0 }, + /* has ASCII, inserts AND appends */ + { .in = "stlbl-nrad", + .out = "\x73\x74\xc3\xa5\x6c\x62\x6c\xc3\xa5", .ret = 0 }, + /* has no ASCII, appends */ + { .in = "--7sbabjsrp6aymef", + .out = "\xd0\xb0\xd0\xba\xd1\x82\xd1\x80\xd0\xb8\xd1\x81\xd0" + "\xb0\x2d\xd0\xb2\xd0\xb5\xd1\x81\xd0\xbd\xd0\xb0", + .ret = 0 }, + /* broken */ + { .in = "zz-zzzz", .out = "", .ret = -1 }, + }; + + unsigned int i; + string_t *r = t_str_new(42); + + test_begin("punycode decoding"); + for (i = 0; i < N_ELEMENTS(cases); i ++) { + str_truncate(r, 0); + int ret = punycode_decode(cases[i].in, strlen(cases[i].in), r); + test_assert_idx(ret == cases[i].ret, i); + test_assert_strcmp_idx(str_c(r), cases[i].out, i); + } + test_end(); +} + +void test_punycode(void) +{ + test_punycode_decode(); +}