]> git.ipfire.org Git - thirdparty/haproxy.git/commitdiff
MINOR: uri_normalizer: Add `fragment-encode` normalizer
authorTim Duesterhus <tim@bastelstu.be>
Mon, 10 May 2021 15:28:26 +0000 (17:28 +0200)
committerWilly Tarreau <w@1wt.eu>
Tue, 11 May 2021 15:24:32 +0000 (17:24 +0200)
This normalizer encodes '#' as '%23'.

See GitHub Issue #714.

doc/configuration.txt
include/haproxy/action-t.h
include/haproxy/uri_normalizer.h
reg-tests/http-rules/normalize_uri.vtc
src/http_act.c
src/uri_normalizer.c

index 7ab7baadcecf325831546fba445a65ee83b55b60..442b61e17089f713ed8b1c3fe1301b41d16a2a49 100644 (file)
@@ -6172,6 +6172,7 @@ http-request early-hint <name> <fmt> [ { if | unless } <condition> ]
   See RFC 8297 for more information.
 
 http-request normalize-uri <normalizer> [ { if | unless } <condition> ]
+http-request normalize-uri fragment-encode [ { if | unless } <condition> ]
 http-request normalize-uri fragment-strip [ { if | unless } <condition> ]
 http-request normalize-uri path-merge-slashes [ { if | unless } <condition> ]
 http-request normalize-uri path-strip-dot [ { if | unless } <condition> ]
@@ -6210,6 +6211,14 @@ http-request normalize-uri query-sort-by-name [ { if | unless } <condition> ]
 
   The following normalizers are available:
 
+  - fragment-encode: Encodes "#" as "%23".
+
+      The "fragment-strip" normalizer should be preferred, unless it is known
+      that broken clients do not correctly encode '#' within the path component.
+
+      Example:
+      - /#foo  -> /%23foo
+
   - fragment-strip: Removes the URI's "fragment" component.
 
       According to RFC 3986#3.5 the "fragment" component of an URI should not
index 56ac32f7fa49bae44d38f34eb7c887bcc9d10398..d4fc3f6dabad777c8d9d5c4fff64d0891c9a51ac 100644 (file)
@@ -112,6 +112,7 @@ enum act_normalize_uri {
        ACT_NORMALIZE_URI_PERCENT_DECODE_UNRESERVED,
        ACT_NORMALIZE_URI_PERCENT_DECODE_UNRESERVED_STRICT,
        ACT_NORMALIZE_URI_FRAGMENT_STRIP,
+       ACT_NORMALIZE_URI_FRAGMENT_ENCODE,
 };
 
 /* NOTE: if <.action_ptr> is defined, the referenced function will always be
index fa5d89dd0d1f4eeff245ea6bf0cef8a3937b93bb..b384007f5d4c5f4fa01cf30d8f0c7566f8a4d46a 100644 (file)
@@ -26,6 +26,7 @@ static inline enum uri_normalizer_err uri_normalizer_fragment_strip(const struct
        return URI_NORMALIZER_ERR_NONE;
 }
 
+enum uri_normalizer_err uri_normalizer_fragment_encode(const struct ist input, struct ist *dst);
 enum uri_normalizer_err uri_normalizer_percent_decode_unreserved(const struct ist input, int strict, struct ist *dst);
 enum uri_normalizer_err uri_normalizer_percent_upper(const struct ist input, int strict, struct ist *dst);
 enum uri_normalizer_err uri_normalizer_path_dot(const struct ist path, struct ist *dst);
index 792bea5d4ff248c648b26edb2d6a793f21fafc0a..7e2d7491f12fdb8925cbf390ecb45cf8da5c7cbe 100644 (file)
@@ -8,7 +8,7 @@ feature ignore_unknown_macro
 server s1 {
     rxreq
     txresp
-} -repeat 66 -start
+} -repeat 70 -start
 
 haproxy h1 -conf {
     global
@@ -137,6 +137,18 @@ haproxy h1 -conf {
 
         default_backend be
 
+    frontend fe_fragment_encode
+        bind "fd@${fe_fragment_encode}"
+
+        http-request set-var(txn.before) url
+        http-request normalize-uri fragment-encode
+        http-request set-var(txn.after) url
+
+        http-response add-header before  %[var(txn.before)]
+        http-response add-header after  %[var(txn.after)]
+
+        default_backend be
+
     backend be
         server s1 ${s1_addr}:${s1_port}
 
@@ -500,3 +512,25 @@ client c9 -connect ${h1_fe_fragment_strip_sock} {
     expect resp.http.before == "*"
     expect resp.http.after == "*"
 } -run
+
+client c10 -connect ${h1_fe_fragment_encode_sock} {
+    txreq -url "/#foo"
+    rxresp
+    expect resp.http.before == "/#foo"
+    expect resp.http.after == "/%23foo"
+
+    txreq -url "/#foo/#foo"
+    rxresp
+    expect resp.http.before == "/#foo/#foo"
+    expect resp.http.after == "/%23foo/%23foo"
+
+    txreq -url "/%23foo"
+    rxresp
+    expect resp.http.before == "/%23foo"
+    expect resp.http.after == "/%23foo"
+
+    txreq -req OPTIONS -url "*"
+    rxresp
+    expect resp.http.before == "*"
+    expect resp.http.after == "*"
+} -run
index f30694e5f439b97194242ecb44a5b5da84b96b5c..f61362475993e29d1d8f235febc598bd6dc3b34a 100644 (file)
@@ -329,6 +329,23 @@ static enum act_return http_action_normalize_uri(struct act_rule *rule, struct p
 
                        err = uri_normalizer_fragment_strip(path, &newpath);
 
+                       if (err != URI_NORMALIZER_ERR_NONE)
+                               break;
+
+                       if (!http_replace_req_path(htx, newpath, 1))
+                               goto fail_rewrite;
+
+                       break;
+               }
+               case ACT_NORMALIZE_URI_FRAGMENT_ENCODE: {
+                       const struct ist path = http_get_path(uri);
+                       struct ist newpath = ist2(replace->area, replace->size);
+
+                       if (!isttest(path))
+                               goto leave;
+
+                       err = uri_normalizer_fragment_encode(path, &newpath);
+
                        if (err != URI_NORMALIZER_ERR_NONE)
                                break;
 
@@ -462,6 +479,11 @@ static enum act_parse_ret parse_http_normalize_uri(const char **args, int *orig_
 
                rule->action = ACT_NORMALIZE_URI_FRAGMENT_STRIP;
        }
+       else if (strcmp(args[cur_arg], "fragment-encode") == 0) {
+               cur_arg++;
+
+               rule->action = ACT_NORMALIZE_URI_FRAGMENT_ENCODE;
+       }
        else {
                memprintf(err, "unknown normalizer '%s'", args[cur_arg]);
                return ACT_RET_PRS_ERR;
index 4fd783d4a664f7644f067089e5fe00c53f6322b6..bc793f2f17a051a2a277b31f3b3a09a5055d484a 100644 (file)
 #include <haproxy/tools.h>
 #include <haproxy/uri_normalizer.h>
 
+/* Encodes '#' as '%23'. */
+enum uri_normalizer_err uri_normalizer_fragment_encode(const struct ist input, struct ist *dst)
+{
+       enum uri_normalizer_err err;
+
+       const size_t size = istclear(dst);
+       struct ist output = *dst;
+
+       struct ist scanner = input;
+
+       while (istlen(scanner)) {
+               const struct ist before_hash = istsplit(&scanner, '#');
+
+               if (istcat(&output, before_hash, size) < 0) {
+                       err = URI_NORMALIZER_ERR_ALLOC;
+                       goto fail;
+               }
+
+               if (istend(before_hash) != istend(scanner)) {
+                       if (istcat(&output, ist("%23"), size) < 0) {
+                               err = URI_NORMALIZER_ERR_ALLOC;
+                               goto fail;
+                       }
+               }
+       }
+
+       *dst = output;
+
+       return URI_NORMALIZER_ERR_NONE;
+
+  fail:
+
+       return err;
+}
+
 /* Returns 1 if the given character is part of the 'unreserved' set in the
  * RFC 3986 ABNF.
  * Returns 0 if not.