From: Amaury Denoyelle Date: Tue, 6 Jul 2021 09:02:22 +0000 (+0200) Subject: MINOR: http: use http uri parser for authority X-Git-Tag: v2.5-dev2~29 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=69294b20ac03497e33c99464a0050951bdfff737;p=thirdparty%2Fhaproxy.git MINOR: http: use http uri parser for authority Replace http_get_authority by the http_uri_parser API. The new function is renamed http_parse_authority. Replace duplicated scheme parsing code by http_parse_scheme invocation. A new http_uri_parser state is declared to mark the authority parsing as done. --- diff --git a/include/haproxy/http-t.h b/include/haproxy/http-t.h index ddc0f2350e..db0ce2379e 100644 --- a/include/haproxy/http-t.h +++ b/include/haproxy/http-t.h @@ -135,6 +135,7 @@ enum http_etag_type { enum http_uri_parser_state { URI_PARSER_STATE_BEFORE = 0, URI_PARSER_STATE_SCHEME_DONE, + URI_PARSER_STATE_AUTHORITY_DONE, }; /* HTTP URI format as described in rfc 7230 5.3. diff --git a/include/haproxy/http.h b/include/haproxy/http.h index 60e101ebd6..2b736498b8 100644 --- a/include/haproxy/http.h +++ b/include/haproxy/http.h @@ -37,7 +37,7 @@ enum http_meth_t find_http_meth(const char *str, const int len); int http_get_status_idx(unsigned int status); const char *http_get_reason(unsigned int status); struct ist http_parse_scheme(struct http_uri_parser *parser); -struct ist http_get_authority(const struct ist uri, int no_userinfo); +struct ist http_parse_authority(struct http_uri_parser *parser, int no_userinfo); struct ist http_get_path(const struct ist uri); int http_header_match2(const char *hdr, const char *end, const char *name, int len); diff --git a/src/h1.c b/src/h1.c index 3a6c1c3309..4b13cab86d 100644 --- a/src/h1.c +++ b/src/h1.c @@ -871,8 +871,9 @@ int h1_headers_to_hdr_list(char *start, const char *stop, else if (!(h1m->flags & (H1_MF_HDRS_ONLY|H1_MF_RESP)) && isteqi(n, ist("host"))) { if (host_idx == -1) { struct ist authority; + struct http_uri_parser parser = http_uri_parser_init(sl.rq.u); - authority = http_get_authority(sl.rq.u, 1); + authority = http_parse_authority(&parser, 1); if (authority.len && !isteqi(v, authority)) { if (h1m->err_pos < -1) { state = H1_MSG_HDR_L2_LWS; diff --git a/src/http.c b/src/http.c index 2899232149..8b3d20b598 100644 --- a/src/http.c +++ b/src/http.c @@ -523,55 +523,40 @@ struct ist http_parse_scheme(struct http_uri_parser *parser) * path. if no_userinfo is not zero, the part before the '@' (including it) is * skipped. If not found, an empty ist is returned. Otherwise, the ist pointing * on the authority is returned. + * + * must have been initialized via http_uri_parser_init. See the + * related http_uri_parser documentation for the specific API usage. */ -struct ist http_get_authority(const struct ist uri, int no_userinfo) +struct ist http_parse_authority(struct http_uri_parser *parser, int no_userinfo) { const char *ptr, *start, *end; - if (!uri.len) + if (parser->state >= URI_PARSER_STATE_AUTHORITY_DONE) goto not_found; - ptr = uri.ptr; - start = ptr; - end = ptr + uri.len; - - /* RFC7230, par. 2.7 : - * Request-URI = "*" | absuri | abspath | authority - */ - - if (*ptr == '*' || *ptr == '/') + if (parser->format != URI_PARSER_FORMAT_ABSURI_OR_AUTHORITY) goto not_found; - if (isalpha((unsigned char)*ptr)) { - /* this is a scheme as described by RFC3986, par. 3.1, or only - * an authority (in case of a CONNECT method). - */ - ptr++; - while (ptr < end && - (isalnum((unsigned char)*ptr) || *ptr == '+' || *ptr == '-' || *ptr == '.')) - ptr++; - /* skip '://' or take the whole as authority if not found */ - if (ptr == end || *ptr++ != ':') - goto authority; - if (ptr == end || *ptr++ != '/') - goto authority; - if (ptr == end || *ptr++ != '/') - goto authority; - } + if (parser->state < URI_PARSER_STATE_SCHEME_DONE) + http_parse_scheme(parser); + + ptr = start = istptr(parser->uri); + end = istend(parser->uri); - start = ptr; while (ptr < end && *ptr != '/') { if (*ptr++ == '@' && no_userinfo) start = ptr; } /* OK, ptr point on the '/' or the end */ - end = ptr; authority: - return ist2(start, end - start); + parser->uri = ist2(ptr, end - ptr); + parser->state = URI_PARSER_STATE_AUTHORITY_DONE; + return ist2(start, ptr - start); not_found: + parser->state = URI_PARSER_STATE_AUTHORITY_DONE; return IST_NULL; } diff --git a/src/http_htx.c b/src/http_htx.c index 5a62d0a21c..2e39191882 100644 --- a/src/http_htx.c +++ b/src/http_htx.c @@ -691,9 +691,11 @@ int http_update_authority(struct htx *htx, struct htx_sl *sl, const struct ist h { struct buffer *temp = get_trash_chunk(); struct ist meth, vsn, uri, authority; + struct http_uri_parser parser; uri = htx_sl_req_uri(sl); - authority = http_get_authority(uri, 1); + parser = http_uri_parser_init(uri); + authority = http_parse_authority(&parser, 1); if (!authority.len) return 0; @@ -728,9 +730,11 @@ int http_update_host(struct htx *htx, struct htx_sl *sl, const struct ist uri) { struct ist authority; struct http_hdr_ctx ctx; + struct http_uri_parser parser = http_uri_parser_init(uri); - if (!uri.len || uri.ptr[0] == '/' || uri.ptr[0] == '*') { - // origin-form or a asterisk-form (RFC7320 #5.3.1 and #5.3.4) + if (parser.format == URI_PARSER_FORMAT_EMPTY || + parser.format == URI_PARSER_FORMAT_ASTERISK || + parser.format == URI_PARSER_FORMAT_ABSPATH) { sl->flags &= ~HTX_SL_F_HAS_AUTHORITY; } else { @@ -741,7 +745,7 @@ int http_update_host(struct htx *htx, struct htx_sl *sl, const struct ist uri) if (uri.len > 4 && (uri.ptr[0] | 0x20) == 'h') sl->flags |= ((uri.ptr[4] == ':') ? HTX_SL_F_SCHM_HTTP : HTX_SL_F_SCHM_HTTPS); - authority = http_get_authority(uri, 1); + authority = http_parse_authority(&parser, 1); if (!authority.len) goto fail; } @@ -1759,7 +1763,7 @@ int http_scheme_based_normalize(struct htx *htx) * hostnames, do a reverse search on the last ':' separator as long as * digits are found. */ - authority = http_get_authority(uri, 0); + authority = http_parse_authority(&parser, 0); start = istptr(authority); end = istend(authority); for (ptr = end; ptr > start && isdigit((unsigned char)*--ptr); )