From: Amaury Denoyelle Date: Wed, 7 Jul 2021 08:49:26 +0000 (+0200) Subject: MEDIUM: http: implement scheme-based normalization X-Git-Tag: v2.5-dev2~36 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=4c0882b1b42790258a2153dad9fc57a69b32bc2d;p=thirdparty%2Fhaproxy.git MEDIUM: http: implement scheme-based normalization Implement the scheme-based uri normalization as described in rfc3986 6.3.2. Its purpose is to remove the port of an uri if the default one is used according to the uri scheme : 80/http and 443/https. All other ports are not touched. This method uses an htx message as an input. It requires that the target URI is in absolute-form with a http/https scheme. This represents most of h2 requests except CONNECT. On the contrary, most of h1 requests won't be elligible as origin-form is the standard case. The normalization is first applied on the target URL of the start line. Then, it is conducted on every Host headers present, assuming that they are equivalent to the target URL. This change will be notably useful to not confuse users who are accustomed to use the host for routing without specifying default ports. This problem was recently encountered with Firefox which specify the 443 default port for http2 websocket Extended CONNECT. --- diff --git a/include/haproxy/http_htx.h b/include/haproxy/http_htx.h index d807584dec..f7becff268 100644 --- a/include/haproxy/http_htx.h +++ b/include/haproxy/http_htx.h @@ -66,6 +66,8 @@ int http_check_http_reply(struct http_reply *reply, struct proxy*px, char **errm struct http_reply *http_parse_http_reply(const char **args, int *orig_arg, struct proxy *px, int default_status, char **errmsg); +int http_scheme_based_normalize(struct htx *htx); + struct buffer *http_load_errorfile(const char *file, char **errmsg); struct buffer *http_load_errormsg(const char *key, const struct ist msg, char **errmsg); struct buffer *http_parse_errorfile(int status, const char *file, char **errmsg); diff --git a/src/http_htx.c b/src/http_htx.c index f8a72f5046..6c288f9f58 100644 --- a/src/http_htx.c +++ b/src/http_htx.c @@ -11,6 +11,7 @@ */ #include #include +#include #include #include @@ -1718,6 +1719,95 @@ struct http_reply *http_parse_http_reply(const char **args, int *orig_arg, struc return NULL; } +static int uri_is_default_port(const struct ist scheme, const struct ist port) +{ + return (isteq(port, ist("443")) && isteqi(scheme, ist("https://"))) || + (isteq(port, ist("80")) && isteqi(scheme, ist("http://"))); +} + +/* Apply schemed-based normalization as described on rfc3986 on section 6.3.2. + * Returns 0 if no error has been found else non-zero. + * + * The normalization is processed on the target-uri at the condition that it is + * in absolute-form. In the case where the target-uri was normalized, every + * host headers values found are also replaced by the normalized hostname. This + * assumes that the target-uri and host headers were properly identify as + * similar before calling this function. + */ +int http_scheme_based_normalize(struct htx *htx) +{ + struct http_hdr_ctx ctx; + struct htx_sl *sl; + struct ist uri, scheme, authority, host, port; + char *start, *end, *ptr; + + sl = http_get_stline(htx); + + if (!sl || !(sl->flags & (HTX_SL_F_HAS_SCHM|HTX_SL_F_HAS_AUTHORITY))) + return 0; + + uri = htx_sl_req_uri(sl); + + scheme = http_get_scheme(uri); + /* if no scheme found, no normalization to proceed */ + if (!isttest(scheme)) + return 0; + + /* Extract the port if present in authority. To properly support ipv6 + * hostnames, do a reverse search on the last ':' separator as long as + * digits are found. + */ + authority = http_get_authority(uri, 0); + start = istptr(authority); + end = istend(authority); + for (ptr = end; ptr > start && isdigit(*--ptr); ) + ; + + /* if no port found, no normalization to proceed */ + if (likely(*ptr != ':')) + return 0; + + /* split host/port on the ':' separator found */ + host = ist2(start, ptr - start); + port = istnext(ist2(ptr, end - ptr)); + + if (istlen(port) && uri_is_default_port(scheme, port)) { + /* reconstruct the uri with removal of the port */ + struct buffer *temp = get_trash_chunk(); + struct ist meth, vsn, path; + + /* meth */ + chunk_memcat(temp, HTX_SL_REQ_MPTR(sl), HTX_SL_REQ_MLEN(sl)); + meth = ist2(temp->area, HTX_SL_REQ_MLEN(sl)); + + /* vsn */ + chunk_memcat(temp, HTX_SL_REQ_VPTR(sl), HTX_SL_REQ_VLEN(sl)); + vsn = ist2(temp->area + meth.len, HTX_SL_REQ_VLEN(sl)); + + /* reconstruct uri without port */ + path = http_get_path(uri); + chunk_istcat(temp, scheme); + chunk_istcat(temp, host); + chunk_istcat(temp, path); + uri = ist2(temp->area + meth.len + vsn.len, + scheme.len + host.len + path.len); + + http_replace_stline(htx, meth, uri, vsn); + + /* replace every host headers values by the normalized host */ + ctx.blk = NULL; + while (http_find_header(htx, ist("host"), &ctx, 0)) { + if (!http_replace_header_value(htx, &ctx, host)) + goto fail; + } + } + + return 0; + + fail: + return 1; +} + /* Parses the "errorloc[302|303]" proxy keyword */ static int proxy_parse_errorloc(char **args, int section, struct proxy *curpx, const struct proxy *defpx, const char *file, int line,