From: Amaury Denoyelle Date: Tue, 6 Jul 2021 09:40:12 +0000 (+0200) Subject: MINOR: http: use http uri parser for path X-Git-Tag: v2.5-dev2~27 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=c453f9547e14c563f7bdf03d68979a5083c0372b;p=thirdparty%2Fhaproxy.git MINOR: http: use http uri parser for path Replace http_get_path by the http_uri_parser API. The new functions is renamed http_parse_path. Replace duplicated code for scheme and authority parsing by invocations to http_parse_scheme/authority. If no scheme is found for an URI detected as an absolute-uri/authority, consider it to be an authority format : no path will be found. For an absolute-uri or absolute-path, use the remaining of the string as the path. A new http_uri_parser state is declared to mark the path parsing as done. --- diff --git a/include/haproxy/h1_htx.h b/include/haproxy/h1_htx.h index 5afe53dde0..0990558464 100644 --- a/include/haproxy/h1_htx.h +++ b/include/haproxy/h1_htx.h @@ -49,7 +49,8 @@ static inline struct ist h1_get_uri(const struct htx_sl *sl) uri = htx_sl_req_uri(sl); if (sl->flags & HTX_SL_F_NORMALIZED_URI) { - uri = http_get_path(uri); + struct http_uri_parser parser = http_uri_parser_init(uri); + uri = http_parse_path(&parser); if (unlikely(!uri.len)) { if (sl->info.req.meth == HTTP_METH_OPTIONS) uri = ist("*"); diff --git a/include/haproxy/http-t.h b/include/haproxy/http-t.h index db0ce2379e..1b4caf9d1a 100644 --- a/include/haproxy/http-t.h +++ b/include/haproxy/http-t.h @@ -136,6 +136,7 @@ enum http_uri_parser_state { URI_PARSER_STATE_BEFORE = 0, URI_PARSER_STATE_SCHEME_DONE, URI_PARSER_STATE_AUTHORITY_DONE, + URI_PARSER_STATE_PATH_DONE, }; /* HTTP URI format as described in rfc 7230 5.3. diff --git a/include/haproxy/http.h b/include/haproxy/http.h index 2b736498b8..150ebdb7f9 100644 --- a/include/haproxy/http.h +++ b/include/haproxy/http.h @@ -38,7 +38,7 @@ int http_get_status_idx(unsigned int status); const char *http_get_reason(unsigned int status); struct ist http_parse_scheme(struct http_uri_parser *parser); struct ist http_parse_authority(struct http_uri_parser *parser, int no_userinfo); -struct ist http_get_path(const struct ist uri); +struct ist http_parse_path(struct http_uri_parser *parser); int http_header_match2(const char *hdr, const char *end, const char *name, int len); char *http_find_hdr_value_end(char *s, const char *e); diff --git a/src/backend.c b/src/backend.c index f5fec1d8ad..ceb24a7096 100644 --- a/src/backend.c +++ b/src/backend.c @@ -719,7 +719,10 @@ int assign_server(struct stream *s) uri = htx_sl_req_uri(http_get_stline(htxbuf(&s->req.buf))); if (s->be->lbprm.arg_opt1 & 2) { - uri = http_get_path(uri); + struct http_uri_parser parser = + http_uri_parser_init(uri); + + uri = http_parse_path(&parser); if (!isttest(uri)) uri = ist(""); } diff --git a/src/hlua.c b/src/hlua.c index 0c0895e536..2ad3d19bd1 100644 --- a/src/hlua.c +++ b/src/hlua.c @@ -4125,6 +4125,7 @@ static int hlua_applet_http_new(lua_State *L, struct appctx *ctx) struct ist path; unsigned long long len = 0; int32_t pos; + struct http_uri_parser parser; /* Check stack size. */ if (!lua_checkstack(L, 3)) @@ -4193,7 +4194,8 @@ static int hlua_applet_http_new(lua_State *L, struct appctx *ctx) return 0; lua_settable(L, -3); - path = http_get_path(htx_sl_req_uri(sl)); + parser = http_uri_parser_init(htx_sl_req_uri(sl)); + path = http_parse_path(&parser); if (isttest(path)) { char *p, *q, *end; diff --git a/src/http.c b/src/http.c index 8b3d20b598..75c899d774 100644 --- a/src/http.c +++ b/src/http.c @@ -563,50 +563,52 @@ struct ist http_parse_authority(struct http_uri_parser *parser, int no_userinfo) /* Parse the URI from the given transaction (which is assumed to be in request * phase) and look for the "/" beginning the PATH. If not found, ist2(0,0) is * returned. Otherwise the pointer and length are returned. + * + * must have been initialized via http_uri_parser_init. See the + * related http_uri_parser documentation for the specific API usage. */ -struct ist http_get_path(const struct ist uri) +struct ist http_parse_path(struct http_uri_parser *parser) { const char *ptr, *end; - if (!uri.len) + if (parser->state >= URI_PARSER_STATE_PATH_DONE) goto not_found; - ptr = uri.ptr; - end = ptr + uri.len; + if (parser->format == URI_PARSER_FORMAT_EMPTY || + parser->format == URI_PARSER_FORMAT_ASTERISK) { + goto not_found; + } + + ptr = istptr(parser->uri); + end = istend(parser->uri); - /* RFC7230, par. 2.7 : - * Request-URI = "*" | absuri | abspath | authority + /* If the uri is in absolute-path format, first skip the scheme and + * authority parts. No scheme will be found if the uri is in authority + * format, which indicates that the path won't be present. */ + if (parser->format == URI_PARSER_FORMAT_ABSURI_OR_AUTHORITY) { + if (parser->state < URI_PARSER_STATE_SCHEME_DONE) { + /* If no scheme found, uri is in authority format. No + * path is present. + */ + if (!isttest(http_parse_scheme(parser))) + goto not_found; + } - if (*ptr == '*') - goto not_found; + if (parser->state < URI_PARSER_STATE_AUTHORITY_DONE) + http_parse_authority(parser, 1); - if (isalpha((unsigned char)*ptr)) { - /* this is a scheme as described by RFC3986, par. 3.1 */ - ptr++; - while (ptr < end && - (isalnum((unsigned char)*ptr) || *ptr == '+' || *ptr == '-' || *ptr == '.')) - ptr++; - /* skip '://' */ - if (ptr == end || *ptr++ != ':') - goto not_found; - if (ptr == end || *ptr++ != '/') - goto not_found; - if (ptr == end || *ptr++ != '/') + ptr = istptr(parser->uri); + + if (ptr == end) goto not_found; } - /* skip [user[:passwd]@]host[:[port]] */ - - while (ptr < end && *ptr != '/') - ptr++; - - if (ptr == end) - goto not_found; - /* OK, we got the '/' ! */ + parser->state = URI_PARSER_STATE_PATH_DONE; return ist2(ptr, end - ptr); not_found: + parser->state = URI_PARSER_STATE_PATH_DONE; return IST_NULL; } diff --git a/src/http_act.c b/src/http_act.c index f16b6390b2..c2fee04d64 100644 --- a/src/http_act.c +++ b/src/http_act.c @@ -216,7 +216,8 @@ static enum act_return http_action_normalize_uri(struct act_rule *rule, struct p switch ((enum act_normalize_uri) rule->action) { case ACT_NORMALIZE_URI_PATH_MERGE_SLASHES: { - const struct ist path = http_get_path(uri); + struct http_uri_parser parser = http_uri_parser_init(uri); + const struct ist path = http_parse_path(&parser); struct ist newpath = ist2(replace->area, replace->size); if (!isttest(path)) @@ -233,7 +234,8 @@ static enum act_return http_action_normalize_uri(struct act_rule *rule, struct p break; } case ACT_NORMALIZE_URI_PATH_STRIP_DOT: { - const struct ist path = http_get_path(uri); + struct http_uri_parser parser = http_uri_parser_init(uri); + const struct ist path = http_parse_path(&parser); struct ist newpath = ist2(replace->area, replace->size); if (!isttest(path)) @@ -251,7 +253,8 @@ static enum act_return http_action_normalize_uri(struct act_rule *rule, struct p } case ACT_NORMALIZE_URI_PATH_STRIP_DOTDOT: case ACT_NORMALIZE_URI_PATH_STRIP_DOTDOT_FULL: { - const struct ist path = http_get_path(uri); + struct http_uri_parser parser = http_uri_parser_init(uri); + const struct ist path = http_parse_path(&parser); struct ist newpath = ist2(replace->area, replace->size); if (!isttest(path)) @@ -268,7 +271,8 @@ static enum act_return http_action_normalize_uri(struct act_rule *rule, struct p break; } case ACT_NORMALIZE_URI_QUERY_SORT_BY_NAME: { - const struct ist path = http_get_path(uri); + struct http_uri_parser parser = http_uri_parser_init(uri); + const struct ist path = http_parse_path(&parser); struct ist newquery = ist2(replace->area, replace->size); if (!isttest(path)) @@ -286,7 +290,8 @@ static enum act_return http_action_normalize_uri(struct act_rule *rule, struct p } case ACT_NORMALIZE_URI_PERCENT_TO_UPPERCASE: case ACT_NORMALIZE_URI_PERCENT_TO_UPPERCASE_STRICT: { - const struct ist path = http_get_path(uri); + struct http_uri_parser parser = http_uri_parser_init(uri); + const struct ist path = http_parse_path(&parser); struct ist newpath = ist2(replace->area, replace->size); if (!isttest(path)) @@ -304,7 +309,8 @@ static enum act_return http_action_normalize_uri(struct act_rule *rule, struct p } case ACT_NORMALIZE_URI_PERCENT_DECODE_UNRESERVED: case ACT_NORMALIZE_URI_PERCENT_DECODE_UNRESERVED_STRICT: { - const struct ist path = http_get_path(uri); + struct http_uri_parser parser = http_uri_parser_init(uri); + const struct ist path = http_parse_path(&parser); struct ist newpath = ist2(replace->area, replace->size); if (!isttest(path)) @@ -321,7 +327,8 @@ static enum act_return http_action_normalize_uri(struct act_rule *rule, struct p break; } case ACT_NORMALIZE_URI_FRAGMENT_STRIP: { - const struct ist path = http_get_path(uri); + struct http_uri_parser parser = http_uri_parser_init(uri); + const struct ist path = http_parse_path(&parser); struct ist newpath = ist2(replace->area, replace->size); if (!isttest(path)) @@ -338,7 +345,8 @@ static enum act_return http_action_normalize_uri(struct act_rule *rule, struct p break; } case ACT_NORMALIZE_URI_FRAGMENT_ENCODE: { - const struct ist path = http_get_path(uri); + struct http_uri_parser parser = http_uri_parser_init(uri); + const struct ist path = http_parse_path(&parser); struct ist newpath = ist2(replace->area, replace->size); if (!isttest(path)) @@ -517,10 +525,14 @@ static enum act_return http_action_replace_uri(struct act_rule *rule, struct pro goto fail_alloc; uri = htx_sl_req_uri(http_get_stline(htxbuf(&s->req.buf))); - if (rule->action == 1) // replace-path - uri = iststop(http_get_path(uri), '?'); - else if (rule->action == 4) // replace-pathq - uri = http_get_path(uri); + if (rule->action == 1) { // replace-path + struct http_uri_parser parser = http_uri_parser_init(uri); + uri = iststop(http_parse_path(&parser), '?'); + } + else if (rule->action == 4) { // replace-pathq + struct http_uri_parser parser = http_uri_parser_init(uri); + uri = http_parse_path(&parser); + } if (!regex_exec_match2(rule->arg.http.re, uri.ptr, uri.len, MAX_MATCH, pmatch, 0)) goto leave; diff --git a/src/http_ana.c b/src/http_ana.c index da436e7932..5eca741562 100644 --- a/src/http_ana.c +++ b/src/http_ana.c @@ -206,9 +206,10 @@ int http_wait_for_request(struct stream *s, struct channel *req, int an_bit) if (unlikely(sess->fe->monitor_uri_len != 0)) { const struct ist monitor_uri = ist2(sess->fe->monitor_uri, sess->fe->monitor_uri_len); + struct http_uri_parser parser = http_uri_parser_init(htx_sl_req_uri(sl)); if ((istptr(monitor_uri)[0] == '/' && - isteq(http_get_path(htx_sl_req_uri(sl)), monitor_uri)) || + isteq(http_parse_path(&parser), monitor_uri)) || isteq(htx_sl_req_uri(sl), monitor_uri)) { /* * We have found the monitor URI @@ -622,6 +623,7 @@ int http_process_request(struct stream *s, struct channel *req, int an_bit) if ((s->be->options & PR_O_HTTP_PROXY) && !(s->flags & SF_ADDR_SET)) { struct htx_sl *sl; struct ist uri, path; + struct http_uri_parser parser = http_uri_parser_init(uri); if (!sockaddr_alloc(&s->target_addr, NULL, 0)) { if (!(s->flags & SF_ERR_MASK)) @@ -630,7 +632,7 @@ int http_process_request(struct stream *s, struct channel *req, int an_bit) } sl = http_get_stline(htx); uri = htx_sl_req_uri(sl); - path = http_get_path(uri); + path = http_parse_path(&parser); if (url2sa(uri.ptr, uri.len - path.len, s->target_addr, NULL) == -1) goto return_bad_req; @@ -2409,6 +2411,7 @@ int http_apply_redirect_rule(struct redirect_rule *rule, struct stream *s, struc case REDIRECT_TYPE_SCHEME: { struct http_hdr_ctx ctx; struct ist path, host; + struct http_uri_parser parser; host = ist(""); ctx.blk = NULL; @@ -2416,7 +2419,8 @@ int http_apply_redirect_rule(struct redirect_rule *rule, struct stream *s, struc host = ctx.value; sl = http_get_stline(htx); - path = http_get_path(htx_sl_req_uri(sl)); + parser = http_uri_parser_init(htx_sl_req_uri(sl)); + path = http_parse_path(&parser); /* build message using path */ if (isttest(path)) { if (rule->flags & REDIRECT_FLAG_DROP_QS) { @@ -2462,9 +2466,11 @@ int http_apply_redirect_rule(struct redirect_rule *rule, struct stream *s, struc case REDIRECT_TYPE_PREFIX: { struct ist path; + struct http_uri_parser parser; sl = http_get_stline(htx); - path = http_get_path(htx_sl_req_uri(sl)); + parser = http_uri_parser_init(htx_sl_req_uri(sl)); + path = http_parse_path(&parser); /* build message using path */ if (isttest(path)) { if (rule->flags & REDIRECT_FLAG_DROP_QS) { @@ -3858,8 +3864,10 @@ static int http_stats_check_uri(struct stream *s, struct http_txn *txn, struct p htx = htxbuf(&s->req.buf); sl = http_get_stline(htx); uri = htx_sl_req_uri(sl); - if (*uri_auth->uri_prefix == '/') - uri = http_get_path(uri); + if (*uri_auth->uri_prefix == '/') { + struct http_uri_parser parser = http_uri_parser_init(uri); + uri = http_parse_path(&parser); + } /* check URI size */ if (uri_auth->uri_len > uri.len) @@ -4173,6 +4181,7 @@ void http_perform_server_redirect(struct stream *s, struct stream_interface *si) struct htx_sl *sl; struct ist path, location; unsigned int flags; + struct http_uri_parser parser; /* * Create the location @@ -4190,7 +4199,8 @@ void http_perform_server_redirect(struct stream *s, struct stream_interface *si) /* 2: add the request Path */ htx = htxbuf(&req->buf); sl = http_get_stline(htx); - path = http_get_path(htx_sl_req_uri(sl)); + parser = http_uri_parser_init(htx_sl_req_uri(sl)); + path = http_parse_path(&parser); if (!isttest(path)) return; diff --git a/src/http_fetch.c b/src/http_fetch.c index a28cbaf4b3..d3192aa6c4 100644 --- a/src/http_fetch.c +++ b/src/http_fetch.c @@ -1039,17 +1039,18 @@ static int smp_fetch_path(const struct arg *args, struct sample *smp, const char struct htx *htx = smp_prefetch_htx(smp, chn, NULL, 1); struct htx_sl *sl; struct ist path; + struct http_uri_parser parser; if (!htx) return 0; sl = http_get_stline(htx); - path = http_get_path(htx_sl_req_uri(sl)); + parser = http_uri_parser_init(htx_sl_req_uri(sl)); if (kw[4] == 'q' && (kw[0] == 'p' || kw[0] == 'b')) // pathq or baseq - path = http_get_path(htx_sl_req_uri(sl)); + path = http_parse_path(&parser); else - path = iststop(http_get_path(htx_sl_req_uri(sl)), '?'); + path = iststop(http_parse_path(&parser), '?'); if (!isttest(path)) return 0; @@ -1077,6 +1078,7 @@ static int smp_fetch_base(const struct arg *args, struct sample *smp, const char struct buffer *temp; struct http_hdr_ctx ctx; struct ist path; + struct http_uri_parser parser; if (!htx) return 0; @@ -1091,7 +1093,8 @@ static int smp_fetch_base(const struct arg *args, struct sample *smp, const char /* now retrieve the path */ sl = http_get_stline(htx); - path = http_get_path(htx_sl_req_uri(sl)); + parser = http_uri_parser_init(htx_sl_req_uri(sl)); + path = http_parse_path(&parser); if (isttest(path)) { size_t len; @@ -1128,6 +1131,7 @@ static int smp_fetch_base32(const struct arg *args, struct sample *smp, const ch struct http_hdr_ctx ctx; struct ist path; unsigned int hash = 0; + struct http_uri_parser parser; if (!htx) return 0; @@ -1141,7 +1145,8 @@ static int smp_fetch_base32(const struct arg *args, struct sample *smp, const ch /* now retrieve the path */ sl = http_get_stline(htx); - path = http_get_path(htx_sl_req_uri(sl)); + parser = http_uri_parser_init(htx_sl_req_uri(sl)); + path = http_parse_path(&parser); if (isttest(path)) { size_t len; @@ -1486,6 +1491,7 @@ static int smp_fetch_capture_req_uri(const struct arg *args, struct sample *smp, struct http_txn *txn; struct ist path; const char *ptr; + struct http_uri_parser parser; if (!smp->strm) return 0; @@ -1508,7 +1514,8 @@ static int smp_fetch_capture_req_uri(const struct arg *args, struct sample *smp, ptr++; path.len = ptr - path.ptr; - path = http_get_path(path); + parser = http_uri_parser_init(path); + path = http_parse_path(&parser); if (!isttest(path)) return 0; @@ -1952,6 +1959,7 @@ static int smp_fetch_url32(const struct arg *args, struct sample *smp, const cha struct htx_sl *sl; struct ist path; unsigned int hash = 0; + struct http_uri_parser parser; if (!htx) return 0; @@ -1965,7 +1973,8 @@ static int smp_fetch_url32(const struct arg *args, struct sample *smp, const cha /* now retrieve the path */ sl = http_get_stline(htx); - path = http_get_path(htx_sl_req_uri(sl)); + parser = http_uri_parser_init(htx_sl_req_uri(sl)); + path = http_parse_path(&parser); if (path.len && *(path.ptr) == '/') { while (path.len--) hash = *(path.ptr++) + (hash << 6) + (hash << 16) - hash; diff --git a/src/http_htx.c b/src/http_htx.c index 2e39191882..bbbac4a90d 100644 --- a/src/http_htx.c +++ b/src/http_htx.c @@ -407,12 +407,14 @@ int http_replace_req_path(struct htx *htx, const struct ist path, int with_qs) struct htx_sl *sl = http_get_stline(htx); struct ist meth, uri, vsn, p; size_t plen = 0; + struct http_uri_parser parser; if (!sl) return 0; uri = htx_sl_req_uri(sl); - p = http_get_path(uri); + parser = http_uri_parser_init(uri); + p = http_parse_path(&parser); if (!isttest(p)) p = uri; if (with_qs) @@ -1791,7 +1793,7 @@ int http_scheme_based_normalize(struct htx *htx) vsn = ist2(temp->area + meth.len, HTX_SL_REQ_VLEN(sl)); /* reconstruct uri without port */ - path = http_get_path(uri); + path = http_parse_path(&parser); chunk_istcat(temp, scheme); chunk_istcat(temp, host); chunk_istcat(temp, path); diff --git a/src/log.c b/src/log.c index 41810f574e..2ba645a9f2 100644 --- a/src/log.c +++ b/src/log.c @@ -1961,6 +1961,7 @@ int sess_build_logline(struct session *sess, struct stream *s, char *dst, size_t struct timeval tv; struct strm_logs tmp_strm_log; struct ist path; + struct http_uri_parser parser; /* FIXME: let's limit ourselves to frontend logging for now. */ @@ -2790,7 +2791,8 @@ int sess_build_logline(struct session *sess, struct stream *s, char *dst, size_t path = ist2(uri, spc - uri); // extract relative path without query params from url - path = iststop(http_get_path(path), '?'); + parser = http_uri_parser_init(path); + path = iststop(http_parse_path(&parser), '?'); if (!txn || !txn->uri || nspaces == 0) { chunk.area = ""; chunk.data = strlen(""); diff --git a/src/mux_fcgi.c b/src/mux_fcgi.c index 18a9ff5cde..1be7b96f84 100644 --- a/src/mux_fcgi.c +++ b/src/mux_fcgi.c @@ -1327,7 +1327,8 @@ static int fcgi_set_default_param(struct fcgi_conn *fconn, struct fcgi_strm *fst #endif if ((params->mask & FCGI_SP_URI_MASK) != FCGI_SP_URI_MASK) { /* one of scriptname, pathinfo or query_string is no set */ - struct ist path = http_get_path(params->uri); + struct http_uri_parser parser = http_uri_parser_init(params->uri); + struct ist path = http_parse_path(&parser); int len; /* No scrit_name set but no valid path ==> error */