From: Daniel Stenberg Date: Mon, 17 Feb 2025 21:34:21 +0000 (+0100) Subject: strparse: speed up the hex parser somewhat X-Git-Tag: curl-8_13_0~429 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=ad700a091788612017a4760ea4f81cbe8fb413ce;p=thirdparty%2Fcurl.git strparse: speed up the hex parser somewhat Around 2.3x speed-up parsing many large hexadecimal numbers. The decimal and octal parser get marginally faster. Still very readable, compact and easy to follow code. Tweaks - combine the max and the overflow check, gains 3ns/num (use a separate check outside of the loop instead for max < base) - one less indirection in the pointer, gains 3ns/num - using the table lookup for hex nums, gains 5ns/num - unfold the num_digit() macro, gains 3s/num - use the hexasciitable unconditionally, gains 2ns/num - use post-increment pointer in the table lookup, gains 1ns/num - improved valid_digit() using the table for the hex case, gains 26 ns/num - use "max char" in valid_digit(), gains 3ns/num Behavior changes: - no longer returns STRE_TOO_BIG - only STRE_OVERFLOW - does not move the char ** on error, which is probably better Updated and extended test 1664 (significantly). Closes #16374 --- diff --git a/lib/cf-socket.c b/lib/cf-socket.c index abd485a971..967a27fc2e 100644 --- a/lib/cf-socket.c +++ b/lib/cf-socket.c @@ -995,7 +995,7 @@ static CURLcode cf_socket_ctx_init(struct cf_socket_ctx *ctx, p = getenv("CURL_DBG_SOCK_RMAX"); if(p) { curl_off_t l; - if(!Curl_str_number(&p, &l, SIZE_T_MAX)) + if(!Curl_str_number(&p, &l, CURL_OFF_T_MAX)) ctx->recv_max = (size_t)l; } } diff --git a/lib/cfilters.c b/lib/cfilters.c index fa0abc46d9..3cc1a41840 100644 --- a/lib/cfilters.c +++ b/lib/cfilters.c @@ -882,14 +882,14 @@ CURLcode Curl_conn_send(struct Curl_easy *data, int sockindex, DEBUGASSERT(data->conn); conn = data->conn; #ifdef DEBUGBUILD - { + if(write_len) { /* Allow debug builds to override this logic to force short sends */ const char *p = getenv("CURL_SMALLSENDS"); if(p) { curl_off_t altsize; - if(!Curl_str_number(&p, &altsize, SIZE_T_MAX)) - write_len = CURLMIN(write_len, (size_t)altsize); + if(!Curl_str_number(&p, &altsize, write_len)) + write_len = (size_t)altsize; } } #endif diff --git a/lib/request.c b/lib/request.c index 6e98879c31..667880fa76 100644 --- a/lib/request.c +++ b/lib/request.c @@ -195,11 +195,13 @@ static CURLcode xfer_send(struct Curl_easy *data, /* Allow debug builds to override this logic to force short initial sends */ size_t body_len = blen - hds_len; - const char *p = getenv("CURL_SMALLREQSEND"); - if(p) { - curl_off_t body_small; - if(!Curl_str_number(&p, &body_small, body_len)) - blen = hds_len + (size_t)body_small; + if(body_len) { + const char *p = getenv("CURL_SMALLREQSEND"); + if(p) { + curl_off_t body_small; + if(!Curl_str_number(&p, &body_small, body_len)) + blen = hds_len + (size_t)body_small; + } } } #endif diff --git a/lib/strparse.c b/lib/strparse.c index 0ba661446e..30a20f7367 100644 --- a/lib/strparse.c +++ b/lib/strparse.c @@ -104,40 +104,57 @@ int Curl_str_singlespace(const char **linep) return Curl_str_single(linep, ' '); } -/* given an ASCII hexadecimal character, return the value */ -#define HEXDIGIT2NUM(x) \ - (((x) > '9') ? Curl_raw_tolower(x) - 'a' + 10 : x - '0') - -/* given an ASCII character and a given base, return TRUE if valid */ -#define valid_digit(digit, base) \ - (((base == 10) && ISDIGIT(digit)) || \ - ((base == 16) && ISXDIGIT(digit)) || \ - ((base == 8) && ISODIGIT(digit))) - -/* given an ASCII character and a given base, return the value */ -#define num_digit(digit, base) \ - ((base != 16) ? digit - '0' : HEXDIGIT2NUM(digit)) +/* given an ASCII character and max ascii, return TRUE if valid */ +#define valid_digit(x,m) \ + (((x) >= '0') && ((x) <= m) && hexasciitable[(x)-'0']) /* no support for 0x prefix nor leading spaces */ static int str_num_base(const char **linep, curl_off_t *nump, curl_off_t max, int base) /* 8, 10 or 16, nothing else */ { + /* We use 16 for the zero index (and the necessary bitwise AND in the loop) + to be able to have a non-zero value there to make valid_digit() able to + use the info */ + static const unsigned char hexasciitable[] = { + 16, 1, 2, 3, 4, 5, 6, 7, 8, 9, /* 0x30: 0 - 9 */ + 0, 0, 0, 0, 0, 0, 0, + 10, 11, 12, 13, 14, 15, /* 0x41: A - F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 10, 11, 12, 13, 14, 15 /* 0x61: a - f */ + }; + curl_off_t num = 0; + const char *p; + int m = (base == 10) ? '9' : /* the largest digit possible */ + (base == 16) ? 'f' : '7'; DEBUGASSERT(linep && *linep && nump); DEBUGASSERT((base == 8) || (base == 10) || (base == 16)); + DEBUGASSERT(max >= 0); /* mostly to catch SIZE_T_MAX, which is too large */ *nump = 0; - if(!valid_digit(**linep, base)) + p = *linep; + if(!valid_digit(*p, m)) return STRE_NO_NUM; - do { - int n = num_digit(**linep, base); - if(num > ((CURL_OFF_T_MAX - n) / base)) - return STRE_OVERFLOW; - num = num * base + n; - if(num > max) - return STRE_BIG; /** too big */ - (*linep)++; - } while(valid_digit(**linep, base)); + if(max < base) { + /* special-case low max scenario because check needs to be different */ + do { + int n = hexasciitable[*p++ - '0'] & 0x0f; + num = num * base + n; + if(num > max) + return STRE_OVERFLOW; + } while(valid_digit(*p, m)); + } + else { + do { + int n = hexasciitable[*p++ - '0'] & 0x0f; + if(num > ((max - n) / base)) + return STRE_OVERFLOW; + num = num * base + n; + } while(valid_digit(*p, m)); + } *nump = num; + *linep = p; return STRE_OK; } diff --git a/tests/data/test1664 b/tests/data/test1664 index 0c52b11afb..0800ca2842 100644 --- a/tests/data/test1664 +++ b/tests/data/test1664 @@ -85,10 +85,10 @@ Curl_str_single 6: ("") 5, line 0 Curl_str_number 0: ("1") 0, [1] line 1 -1: ("10000") 1, [0] line 4 +1: ("10000") 7, [0] line 0 2: ("1234") 0, [1234] line 4 3: ("1235") 0, [1235] line 4 -4: ("1236") 1, [0] line 3 +4: ("1236") 7, [0] line 0 5: ("01234") 0, [1234] line 5 6: ("00000000000000000000000000001234") 0, [1234] line 32 7: ("0123 345") 0, [123] line 4 @@ -96,31 +96,95 @@ Curl_str_number 9: ("-12") 8, [0] line 0 10: (" 123") 8, [0] line 0 11: ("") 8, [0] line 0 +Curl_str_number varying max +0: ("00") max 8 == 0, [0] +1: ("1") max 8 == 0, [1] +2: ("1") max 1 == 0, [1] +3: ("2") max 1 == 7, [0] +4: ("2") max 2 == 0, [2] +5: ("5") max 6 == 0, [5] +6: ("000000000000000000000006") max 6 == 0, [6] +7: ("7") max 6 == 7, [0] +8: ("8") max 6 == 7, [0] +9: ("9") max 8 == 7, [0] +10: ("10") max 10 == 0, [10] +11: ("11") max 10 == 7, [0] +12: ("12") max 10 == 7, [0] +Curl_str_hex varying max +0: ("00") max 8 == 0, [0] +1: ("1") max 8 == 0, [1] +2: ("1") max 1 == 0, [1] +3: ("2") max 1 == 7, [0] +4: ("2") max 2 == 0, [2] +5: ("5") max 6 == 0, [5] +6: ("000000000000000000000006") max 6 == 0, [6] +7: ("7") max 6 == 7, [0] +8: ("8") max 6 == 7, [0] +9: ("9") max 8 == 7, [0] +10: ("a") max 14 == 0, [10] +11: ("b") max 14 == 0, [11] +12: ("c") max 14 == 0, [12] +13: ("d") max 14 == 0, [13] +14: ("e") max 14 == 0, [14] +15: ("f") max 14 == 7, [0] +16: ("f") max 15 == 0, [15] +17: ("10") max 16 == 0, [16] +18: ("11") max 16 == 7, [0] +19: ("12") max 16 == 7, [0] +Curl_str_octal varying max +0: ("00") max 4 == 0, [0] +1: ("1") max 4 == 0, [1] +2: ("1") max 4 == 0, [1] +3: ("2") max 4 == 0, [2] +4: ("3") max 4 == 0, [3] +5: ("4") max 4 == 0, [4] +6: ("5") max 4 == 7, [0] +7: ("000000000000000000000006") max 6 == 0, [6] +8: ("7") max 7 == 0, [7] +9: ("10") max 8 == 0, [8] +10: ("11") max 8 == 7, [0] +11: ("11") max 9 == 0, [9] +12: ("12") max 9 == 7, [0] +13: ("13") max 9 == 7, [0] +14: ("8") max 10 == 8, [0] Curl_str_number / max 0: ("9223372036854775807") 0, [9223372036854775807] line 19 -1: ("9223372036854775808") 7, [0] line 18 -2: ("18446744073709551615") 7, [0] line 19 -3: ("18446744073709551616") 7, [0] line 19 -4: ("18446744073709551617") 7, [0] line 19 +1: ("9223372036854775808") 7, [0] line 0 +2: ("18446744073709551615") 7, [0] line 0 +3: ("18446744073709551616") 7, [0] line 0 +4: ("18446744073709551617") 7, [0] line 0 +5: ("0123456799a") 0, [123456799] line 10 +6: ("0123456789") 0, [123456789] line 10 +7: ("123498760b") 0, [123498760] line 9 +8: ("1234987607611298232") 0, [1234987607611298232] line 19 +9: ("1111111111111111111") 0, [1111111111111111111] line 19 +10: ("2222222222222222222") 0, [2222222222222222222] line 19 +11: ("00000000000000000000000000000009223372036854775807") 0, [9223372036854775807] line 50 +12: ("3333333333333333333") 0, [3333333333333333333] line 19 +13: ("4444444444444444444") 0, [4444444444444444444] line 19 +14: ("5555555555555555555") 0, [5555555555555555555] line 19 +15: ("6666666666666666666") 0, [6666666666666666666] line 19 +16: ("7777777777777777777") 0, [7777777777777777777] line 19 +17: ("8888888888888888888") 0, [8888888888888888888] line 19 +18: ("999999999999999999") 0, [999999999999999999] line 18 Curl_str_newline -0: ("a") 6, line 0 -1: ("aa") 6, line 0 -2: ("A") 6, line 0 -3: ("b") 6, line 0 -4: ("\") 6, line 0 -5: (" ") 6, line 0 -6: (" -") 0, line 1 -7: (" ") 0, line 1 -8: (" -") 0, line 1 -9: ("") 6, line 0 +0: (%61) 6, line 0 +1: (%61) 6, line 0 +2: (%41) 6, line 0 +3: (%62) 6, line 0 +4: (%5c) 6, line 0 +5: (%20) 6, line 0 +6: (%0a) 0, line 1 +7: (%0d) 0, line 1 +8: (%0d) 0, line 1 +9: (%0c) 6, line 0 +10: (%00) 6, line 0 Curl_str_hex 0: ("1") 0, [1] line 1 1: ("1000") 0, [4096] line 4 2: ("1234") 0, [4660] line 4 3: ("1235") 0, [4661] line 4 -4: ("1236") 1, [0] line 3 +4: ("1236") 7, [0] line 0 5: ("01234") 0, [4660] line 5 6: ("00000000000000000000000000001234") 0, [4660] line 32 7: ("0123 345") 0, [291] line 4 @@ -133,7 +197,7 @@ Curl_str_octal 1: ("1000") 0, [512] line 4 2: ("1234") 0, [668] line 4 3: ("1235") 0, [669] line 4 -4: ("1236") 1, [0] line 3 +4: ("1236") 7, [0] line 0 5: ("01234") 0, [668] line 5 6: ("00000000000000000000000000001234") 0, [668] line 32 7: ("0123 345") 0, [83] line 4 @@ -143,10 +207,34 @@ Curl_str_octal 11: ("") 8, [0] line 0 Curl_str_octal / max 0: ("777777777777777777777") 0, [9223372036854775807] line 21 -1: ("1000000000000000000000") 7, [0] line 21 +1: ("1000000000000000000000") 7, [0] line 0 +2: ("111111111111111111111") 0, [1317624576693539401] line 21 +3: ("222222222222222222222") 0, [2635249153387078802] line 21 +4: ("333333333333333333333") 0, [3952873730080618203] line 21 +5: ("444444444444444444444") 0, [5270498306774157604] line 21 +6: ("555555555555555555555") 0, [6588122883467697005] line 21 +7: ("666666666666666666666") 0, [7905747460161236406] line 21 Curl_str_hex / max 0: ("7FFFFFFFFFFFFFFF") 0, [9223372036854775807] line 16 -1: ("8000000000000000") 7, [0] line 15 +1: ("8000000000000000") 7, [0] line 0 +2: ("1111111111111111") 0, [1229782938247303441] line 16 +3: ("2222222222222222") 0, [2459565876494606882] line 16 +4: ("3333333333333333") 0, [3689348814741910323] line 16 +5: ("4444444444444444") 0, [4919131752989213764] line 16 +6: ("5555555555555555") 0, [6148914691236517205] line 16 +7: ("6666666666666666") 0, [7378697629483820646] line 16 +8: ("7777777777777777") 0, [8608480567731124087] line 16 +9: ("888888888888888") 0, [614891469123651720] line 15 +10: ("999999999999999") 0, [691752902764108185] line 15 +11: ("aaaaaaaaAAAAAAA") 0, [768614336404564650] line 15 +12: ("bbbbbbbbBBBBBBB") 0, [845475770045021115] line 15 +13: ("BBBBBBBBbbbbbbb") 0, [845475770045021115] line 15 +14: ("ccccccccCCCCCCC") 0, [922337203685477580] line 15 +15: ("ddddddddDDDDDDD") 0, [999198637325934045] line 15 +16: ("eeeeeeeeEEEEEEE") 0, [1076060070966390510] line 15 +17: ("ffffffffFFFFFFF") 0, [1152921504606846975] line 15 +18: ("abcdef") 0, [11259375] line 6 +19: ("ABCDEF") 0, [11259375] line 6 diff --git a/tests/unit/unit1664.c b/tests/unit/unit1664.c index 4fbe89da13..786cf76868 100644 --- a/tests/unit/unit1664.c +++ b/tests/unit/unit1664.c @@ -205,6 +205,114 @@ UNITTEST_START } } + { + struct t { + const char *str; + curl_off_t max; + }; + static struct t nums[] = { + { "00", 8}, + { "1", 8}, + { "1", 1}, + { "2", 1}, + { "2", 2}, + { "5", 6}, + { "000000000000000000000006", 6}, + { "7", 6}, + { "8", 6}, + { "9", 8}, + { "10", 10}, + { "11", 10}, + { "12", 10}, + {NULL, 0} + }; + printf("Curl_str_number varying max\n"); + for(i = 0; nums[i].str; i++) { + curl_off_t num; + const char *line = nums[i].str; + const char *orgline = line; + int rc = Curl_str_number(&line, &num, nums[i].max); + printf("%u: (\"%s\") max %" CURL_FORMAT_CURL_OFF_T + " == %d, [%" CURL_FORMAT_CURL_OFF_T "]\n", + i, orgline, nums[i].max, rc, num); + } + } + + { + struct t { + const char *str; + curl_off_t max; + }; + static struct t nums[] = { + { "00", 8}, + { "1", 8}, + { "1", 1}, + { "2", 1}, + { "2", 2}, + { "5", 6}, + { "000000000000000000000006", 6}, + { "7", 6}, + { "8", 6}, + { "9", 8}, + { "a", 14}, + { "b", 14}, + { "c", 14}, + { "d", 14}, + { "e", 14}, + { "f", 14}, + { "f", 15}, + { "10", 16}, + { "11", 16}, + { "12", 16}, + {NULL, 0} + }; + printf("Curl_str_hex varying max\n"); + for(i = 0; nums[i].str; i++) { + curl_off_t num; + const char *line = nums[i].str; + const char *orgline = line; + int rc = Curl_str_hex(&line, &num, nums[i].max); + printf("%u: (\"%s\") max %" CURL_FORMAT_CURL_OFF_T + " == %d, [%" CURL_FORMAT_CURL_OFF_T "]\n", + i, orgline, nums[i].max, rc, num); + } + } + + { + struct t { + const char *str; + curl_off_t max; + }; + static struct t nums[] = { + { "00", 4}, + { "1", 4}, + { "1", 4}, + { "2", 4}, + { "3", 4}, + { "4", 4}, + { "5", 4}, + { "000000000000000000000006", 6}, + { "7", 7}, + { "10", 8}, + { "11", 8}, + { "11", 9}, + { "12", 9}, + { "13", 9}, + { "8", 10}, + {NULL, 0} + }; + printf("Curl_str_octal varying max\n"); + for(i = 0; nums[i].str; i++) { + curl_off_t num; + const char *line = nums[i].str; + const char *orgline = line; + int rc = Curl_str_octal(&line, &num, nums[i].max); + printf("%u: (\"%s\") max %" CURL_FORMAT_CURL_OFF_T + " == %d, [%" CURL_FORMAT_CURL_OFF_T "]\n", + i, orgline, nums[i].max, rc, num); + } + } + { /* CURL_OFF_T is typically 9223372036854775807 */ static const char *nums[] = { @@ -213,6 +321,20 @@ UNITTEST_START "18446744073709551615", /* 2^64 - 1 */ "18446744073709551616", /* 2^64 */ "18446744073709551617", /* 2^64 + 1 */ + "0123456799a", + "0123456789", + "123498760b", + "1234987607611298232", + "1111111111111111111", + "2222222222222222222", + "00000000000000000000000000000009223372036854775807", + "3333333333333333333", + "4444444444444444444", + "5555555555555555555", + "6666666666666666666", + "7777777777777777777", + "8888888888888888888", + "999999999999999999", NULL }; printf("Curl_str_number / max\n"); @@ -237,6 +359,7 @@ UNITTEST_START "\n", "\r", "\r\n", + "\x0c", "", NULL }; @@ -245,8 +368,8 @@ UNITTEST_START const char *line = newl[i]; const char *orgline = line; int rc = Curl_str_newline(&line); - printf("%u: (\"%s\") %d, line %d\n", - i, orgline, rc, (int)(line - orgline)); + printf("%u: (%%%02x) %d, line %d\n", + i, *orgline, rc, (int)(line - orgline)); } } @@ -309,6 +432,12 @@ UNITTEST_START static const char *nums[] = { "777777777777777777777", /* 2^63 -1 */ "1000000000000000000000", /* 2^63 */ + "111111111111111111111", + "222222222222222222222", + "333333333333333333333", + "444444444444444444444", + "555555555555555555555", + "666666666666666666666", NULL }; printf("Curl_str_octal / max\n"); @@ -327,6 +456,24 @@ UNITTEST_START static const char *nums[] = { "7FFFFFFFFFFFFFFF", /* 2^63 -1 */ "8000000000000000", /* 2^63 */ + "1111111111111111", + "2222222222222222", + "3333333333333333", + "4444444444444444", + "5555555555555555", + "6666666666666666", + "7777777777777777", + "888888888888888", + "999999999999999", + "aaaaaaaaAAAAAAA", + "bbbbbbbbBBBBBBB", + "BBBBBBBBbbbbbbb", + "ccccccccCCCCCCC", + "ddddddddDDDDDDD", + "eeeeeeeeEEEEEEE", + "ffffffffFFFFFFF", + "abcdef", + "ABCDEF", NULL }; printf("Curl_str_hex / max\n");