From: Daniel Stenberg Date: Fri, 17 Nov 2023 12:04:13 +0000 (+0100) Subject: url: find scheme with a "perfect hash" X-Git-Tag: curl-8_5_0~78 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=b2d8f3f7f9bbfe7e3ad88519f068d5ce3e686048;p=thirdparty%2Fcurl.git url: find scheme with a "perfect hash" Instead of a loop to scan over the potentially 30+ scheme names, this uses a "perfect hash" table. This works fine because the set of schemes is known and cannot change in a build. The hash algorithm and table size is made to only make a single scheme index per table entry. The perfect hash is generated by a separate tool (scripts/schemetable.c) Closes #12347 --- diff --git a/lib/url.c b/lib/url.c index 187dcda9af..fc35a84b99 100644 --- a/lib/url.c +++ b/lib/url.c @@ -168,130 +168,6 @@ static curl_prot_t get_protocol_family(const struct Curl_handler *h) return h->family; } - -/* - * Protocol table. Schemes (roughly) in 2019 popularity order: - * - * HTTPS, HTTP, FTP, FTPS, SFTP, FILE, SCP, SMTP, LDAP, IMAPS, TELNET, IMAP, - * LDAPS, SMTPS, TFTP, SMB, POP3, GOPHER POP3S, RTSP, RTMP, SMBS, DICT - */ -static const struct Curl_handler * const protocols[] = { - -#if defined(USE_SSL) && !defined(CURL_DISABLE_HTTP) - &Curl_handler_https, -#endif - -#ifndef CURL_DISABLE_HTTP - &Curl_handler_http, -#endif - -#ifdef USE_WEBSOCKETS -#if defined(USE_SSL) && !defined(CURL_DISABLE_HTTP) - &Curl_handler_wss, -#endif - -#ifndef CURL_DISABLE_HTTP - &Curl_handler_ws, -#endif -#endif - -#ifndef CURL_DISABLE_FTP - &Curl_handler_ftp, -#endif - -#if defined(USE_SSL) && !defined(CURL_DISABLE_FTP) - &Curl_handler_ftps, -#endif - -#if defined(USE_SSH) - &Curl_handler_sftp, -#endif - -#ifndef CURL_DISABLE_FILE - &Curl_handler_file, -#endif - -#if defined(USE_SSH) && !defined(USE_WOLFSSH) - &Curl_handler_scp, -#endif - -#ifndef CURL_DISABLE_SMTP - &Curl_handler_smtp, -#ifdef USE_SSL - &Curl_handler_smtps, -#endif -#endif - -#ifndef CURL_DISABLE_LDAP - &Curl_handler_ldap, -#if !defined(CURL_DISABLE_LDAPS) && \ - ((defined(USE_OPENLDAP) && defined(USE_SSL)) || \ - (!defined(USE_OPENLDAP) && defined(HAVE_LDAP_SSL))) - &Curl_handler_ldaps, -#endif -#endif - -#ifndef CURL_DISABLE_IMAP - &Curl_handler_imap, -#ifdef USE_SSL - &Curl_handler_imaps, -#endif -#endif - -#ifndef CURL_DISABLE_TELNET - &Curl_handler_telnet, -#endif - -#ifndef CURL_DISABLE_TFTP - &Curl_handler_tftp, -#endif - -#ifndef CURL_DISABLE_POP3 - &Curl_handler_pop3, -#ifdef USE_SSL - &Curl_handler_pop3s, -#endif -#endif - -#if !defined(CURL_DISABLE_SMB) && defined(USE_CURL_NTLM_CORE) && \ - (SIZEOF_CURL_OFF_T > 4) - &Curl_handler_smb, -#ifdef USE_SSL - &Curl_handler_smbs, -#endif -#endif - -#ifndef CURL_DISABLE_RTSP - &Curl_handler_rtsp, -#endif - -#ifndef CURL_DISABLE_MQTT - &Curl_handler_mqtt, -#endif - -#ifndef CURL_DISABLE_GOPHER - &Curl_handler_gopher, -#ifdef USE_SSL - &Curl_handler_gophers, -#endif -#endif - -#ifdef USE_LIBRTMP - &Curl_handler_rtmp, - &Curl_handler_rtmpt, - &Curl_handler_rtmpe, - &Curl_handler_rtmpte, - &Curl_handler_rtmps, - &Curl_handler_rtmpts, -#endif - -#ifndef CURL_DISABLE_DICT - &Curl_handler_dict, -#endif - - NULL -}; - void Curl_freeset(struct Curl_easy *data) { /* Free all dynamic strings stored in the data->set substructure. */ @@ -1593,17 +1469,216 @@ const struct Curl_handler *Curl_get_scheme_handler(const char *scheme) const struct Curl_handler *Curl_getn_scheme_handler(const char *scheme, size_t len) { - size_t i; - /* Scan protocol handler table and match against 'scheme'. The handler may - be changed later when the protocol specific setup function is called. */ - for(i = 0; i < ARRAYSIZE(protocols) - 1; ++i) - if(strncasecompare(protocols[i]->scheme, scheme, len) && - !protocols[i]->scheme[len]) - /* Protocol found in table. */ - return protocols[i]; - return NULL; /* not found */ -} + /* table generated by schemetable.c: + 1. gcc schemetable.c && ./a.out + 2. check how small the table gets + 3. tweak the hash algorithm, then rerun from 1 + 4. when the table is good enough + 5. copy the table into this source code + 6. make sure this function uses the same hash function that worked for + schemetable.c + 7. if needed, adjust the #ifdefs in schemetable.c and rerun + */ + static const struct Curl_handler * const protocols[67] = { +#ifndef CURL_DISABLE_FILE + &Curl_handler_file, +#else + NULL, +#endif + NULL, NULL, +#if defined(USE_SSL) && !defined(CURL_DISABLE_GOPHER) + &Curl_handler_gophers, +#else + NULL, +#endif + NULL, +#ifdef USE_LIBRTMP + &Curl_handler_rtmpe, +#else + NULL, +#endif +#ifndef CURL_DISABLE_SMTP + &Curl_handler_smtp, +#else + NULL, +#endif +#if defined(USE_SSH) + &Curl_handler_sftp, +#else + NULL, +#endif +#if !defined(CURL_DISABLE_SMB) && defined(USE_CURL_NTLM_CORE) && \ + (SIZEOF_CURL_OFF_T > 4) + &Curl_handler_smb, +#else + NULL, +#endif +#if defined(USE_SSL) && !defined(CURL_DISABLE_SMTP) + &Curl_handler_smtps, +#else + NULL, +#endif +#ifndef CURL_DISABLE_TELNET + &Curl_handler_telnet, +#else + NULL, +#endif +#ifndef CURL_DISABLE_GOPHER + &Curl_handler_gopher, +#else + NULL, +#endif +#ifndef CURL_DISABLE_TFTP + &Curl_handler_tftp, +#else + NULL, +#endif + NULL, NULL, NULL, +#if defined(USE_SSL) && !defined(CURL_DISABLE_FTP) + &Curl_handler_ftps, +#else + NULL, +#endif +#ifndef CURL_DISABLE_HTTP + &Curl_handler_http, +#else + NULL, +#endif +#ifndef CURL_DISABLE_IMAP + &Curl_handler_imap, +#else + NULL, +#endif +#ifdef USE_LIBRTMP + &Curl_handler_rtmps, +#else + NULL, +#endif +#ifdef USE_LIBRTMP + &Curl_handler_rtmpt, +#else + NULL, +#endif + NULL, NULL, NULL, +#if !defined(CURL_DISABLE_LDAP) && \ + !defined(CURL_DISABLE_LDAPS) && \ + ((defined(USE_OPENLDAP) && defined(USE_SSL)) || \ + (!defined(USE_OPENLDAP) && defined(HAVE_LDAP_SSL))) + &Curl_handler_ldaps, +#else + NULL, +#endif +#if defined(USE_WEBSOCKETS) && \ + defined(USE_SSL) && !defined(CURL_DISABLE_HTTP) + &Curl_handler_wss, +#else + NULL, +#endif +#if defined(USE_SSL) && !defined(CURL_DISABLE_HTTP) + &Curl_handler_https, +#else + NULL, +#endif + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +#ifndef CURL_DISABLE_RTSP + &Curl_handler_rtsp, +#else + NULL, +#endif +#if defined(USE_SSL) && !defined(CURL_DISABLE_SMB) && \ + defined(USE_CURL_NTLM_CORE) && (SIZEOF_CURL_OFF_T > 4) + &Curl_handler_smbs, +#else + NULL, +#endif +#if defined(USE_SSH) && !defined(USE_WOLFSSH) + &Curl_handler_scp, +#else + NULL, +#endif + NULL, NULL, NULL, +#ifndef CURL_DISABLE_POP3 + &Curl_handler_pop3, +#else + NULL, +#endif + NULL, NULL, +#ifdef USE_LIBRTMP + &Curl_handler_rtmp, +#else + NULL, +#endif + NULL, NULL, NULL, +#ifdef USE_LIBRTMP + &Curl_handler_rtmpte, +#else + NULL, +#endif + NULL, NULL, NULL, +#ifndef CURL_DISABLE_DICT + &Curl_handler_dict, +#else + NULL, +#endif + NULL, NULL, NULL, +#ifndef CURL_DISABLE_MQTT + &Curl_handler_mqtt, +#else + NULL, +#endif +#if defined(USE_SSL) && !defined(CURL_DISABLE_POP3) + &Curl_handler_pop3s, +#else + NULL, +#endif +#if defined(USE_SSL) && !defined(CURL_DISABLE_IMAP) + &Curl_handler_imaps, +#else + NULL, +#endif + NULL, +#if defined(USE_WEBSOCKETS) && !defined(CURL_DISABLE_HTTP) + &Curl_handler_ws, +#else + NULL, +#endif + NULL, +#ifdef USE_LIBRTMP + &Curl_handler_rtmpts, +#else + NULL, +#endif +#ifndef CURL_DISABLE_LDAP + &Curl_handler_ldap, +#else + NULL, +#endif + NULL, NULL, +#ifndef CURL_DISABLE_FTP + &Curl_handler_ftp, +#else + NULL, +#endif + }; + if(len && (len <= 7)) { + const char *s = scheme; + size_t l = len; + const struct Curl_handler *h; + unsigned int c = 978; + while(l) { + c <<= 5; + c += Curl_raw_tolower(*s); + s++; + l--; + } + + h = protocols[c % 67]; + if(h && strncasecompare(scheme, h->scheme, len) && !h->scheme[len]) + return h; + } + return NULL; +} static CURLcode findprotocol(struct Curl_easy *data, struct connectdata *conn,