]> git.ipfire.org Git - thirdparty/haproxy.git/commitdiff
MINOR: log: Logging HTTP path only with %HPO
authorMaciej Zdeb <maciej@zdeb.pl>
Mon, 30 Nov 2020 18:27:47 +0000 (18:27 +0000)
committerWilly Tarreau <w@1wt.eu>
Tue, 1 Dec 2020 08:32:44 +0000 (09:32 +0100)
This patch adds a new logging variable '%HPO' for logging HTTP path only
(without query string) from relative or absolute URI.

For example:
log-format "hpo=%HPO hp=%HP hu=%HU hq=%HQ"

GET /r/1 HTTP/1.1
=>
hpo=/r/1 hp=/r/1 hu=/r/1 hq=

GET /r/2?q=2 HTTP/1.1
=>
hpo=/r/2 hp=/r/2 hu=/r/2?q=2 hq=?q=2

GET http://host/r/3 HTTP/1.1
=>
hpo=/r/3 hp=http://host/r/3 hu=http://host/r/3 hq=

GET http://host/r/4?q=4 HTTP/1.1
=>
hpo=/r/4 hp=http://host/r/4 hu=http://host/r/4?q=4 hq=?q=4

doc/configuration.txt
include/haproxy/log-t.h
reg-tests/log/log_uri.vtc [new file with mode: 0644]
src/log.c

index d30048da0ffc7fb46851416062b400fa4f4da04c..bc2ad0168af14b6225ad3f3e58474c72ac9568c3 100644 (file)
@@ -19635,6 +19635,7 @@ Please refer to the table below for currently defined variables :
   |   | %H   | hostname                                      | string      |
   | H | %HM  | HTTP method (ex: POST)                        | string      |
   | H | %HP  | HTTP request URI without query string         | string      |
+  | H | %HPO | HTTP path only (without host nor query string)| string      |
   | H | %HQ  | HTTP request URI query string (ex: ?bar=baz)  | string      |
   | H | %HU  | HTTP request URI (ex: /foo?bar=baz)           | string      |
   | H | %HV  | HTTP version (ex: HTTP/1.0)                   | string      |
index c8f52f554cf573244cdbb0cc5037000cda16d307..9146b77ba2fcf20d99abb9b8fcb8fedb34e46fc9 100644 (file)
@@ -169,6 +169,7 @@ enum {
        LOG_FMT_HTTP_METHOD,
        LOG_FMT_HTTP_URI,
        LOG_FMT_HTTP_PATH,
+       LOG_FMT_HTTP_PATH_ONLY,
        LOG_FMT_HTTP_QUERY,
        LOG_FMT_HTTP_VERSION,
        LOG_FMT_HOSTNAME,
diff --git a/reg-tests/log/log_uri.vtc b/reg-tests/log/log_uri.vtc
new file mode 100644 (file)
index 0000000..934a3ef
--- /dev/null
@@ -0,0 +1,59 @@
+varnishtest "Verify logging of relative/aboslute URI path"
+feature ignore_unknown_macro
+
+server s1 {
+    rxreq
+    txresp
+} -repeat 4 -start
+
+syslog Slg_1 -level info {
+    recv
+    expect ~ "[^:\\[ ]\\[${h1_pid}\\]: .* hpo=/r/1 hp=/r/1 hu=/r/1 hq="
+    recv
+    expect ~ "[^:\\[ ]\\[${h1_pid}\\]: .* hpo=/r/2 hp=/r/2 hu=/r/2\\?q=2 hq=\\?q=2"
+    recv
+    expect ~ "[^:\\[ ]\\[${h1_pid}\\]: .* hpo=/r/3 hp=http://localhost/r/3 hu=http://localhost/r/3 hq="
+    recv
+    expect ~ "[^:\\[ ]\\[${h1_pid}\\]: .* hpo=/r/4 hp=http://localhost/r/4 hu=http://localhost/r/4\\?q=4 hq=\\?q=4"
+} -start
+
+haproxy h1 -conf {
+    global
+        nbthread 1
+
+    defaults
+        mode http
+        option httplog
+        timeout connect 1000
+        timeout client  1000
+        timeout server  1000
+
+    frontend fe1
+        bind "fd@${fe_1}"
+        log ${Slg_1_addr}:${Slg_1_port} local0
+        log-format "ci:%cp [%tr] hpo=%HPO hp=%HP hu=%HU hq=%HQ"
+        default_backend be
+
+    backend be
+        server app1 ${s1_addr}:${s1_port}
+} -start
+
+# The following client are started in background and synchronized
+client c1 -connect ${h1_fe_1_sock} {
+    txreq -url "/r/1"
+    rxresp
+    expect resp.status == 200
+    txreq -url "/r/2?q=2"
+    rxresp
+    expect resp.status == 200
+    txreq -url "http://localhost/r/3" -hdr "host: localhost"
+    rxresp
+    expect resp.status == 200
+    txreq -url "http://localhost/r/4?q=4" -hdr "host: localhost"
+    rxresp
+    expect resp.status == 200
+} -start
+
+syslog Slg_1 -wait
+
+client c1 -wait
index 6014bfc2d272ff17d4a263cc1fcb33c4aaa91dbb..4e6dc30084b342eb16b39f0f449c341c0a66de2a 100644 (file)
--- a/src/log.c
+++ b/src/log.c
@@ -169,7 +169,8 @@ static const struct logformat_type logformat_keywords[] = {
        { "hs", LOG_FMT_HDRRESPONS, PR_MODE_TCP, LW_RSPHDR, NULL },  /* header response */
        { "hsl", LOG_FMT_HDRRESPONSLIST, PR_MODE_TCP, LW_RSPHDR, NULL },  /* header response list */
        { "HM", LOG_FMT_HTTP_METHOD, PR_MODE_HTTP, LW_REQ, NULL },  /* HTTP method */
-       { "HP", LOG_FMT_HTTP_PATH, PR_MODE_HTTP, LW_REQ, NULL },  /* HTTP path */
+       { "HP", LOG_FMT_HTTP_PATH, PR_MODE_HTTP, LW_REQ, NULL },  /* HTTP relative or absolute path */
+       { "HPO", LOG_FMT_HTTP_PATH_ONLY, PR_MODE_HTTP, LW_REQ, NULL }, /* HTTP path only (without host nor query string) */
        { "HQ", LOG_FMT_HTTP_QUERY, PR_MODE_HTTP, LW_REQ, NULL },  /* HTTP query */
        { "HU", LOG_FMT_HTTP_URI, PR_MODE_HTTP, LW_REQ, NULL },  /* HTTP full URI */
        { "HV", LOG_FMT_HTTP_VERSION, PR_MODE_HTTP, LW_REQ, NULL },  /* HTTP version */
@@ -2102,6 +2103,7 @@ int sess_build_logline(struct session *sess, struct stream *s, char *dst, size_t
        struct logformat_node *tmp;
        struct timeval tv;
        struct strm_logs tmp_strm_log;
+       struct ist path;
 
        /* FIXME: let's limit ourselves to frontend logging for now. */
 
@@ -2855,6 +2857,52 @@ int sess_build_logline(struct session *sess, struct stream *s, char *dst, size_t
                                last_isspace = 0;
                                break;
 
+                       case LOG_FMT_HTTP_PATH_ONLY: // %HPO
+                               uri = txn && txn->uri ? txn->uri : "<BADREQ>";
+
+                               if (tmp->options & LOG_OPT_QUOTE)
+                                       LOGCHAR('"');
+
+                               end = uri + strlen(uri);
+
+                               // look for the first whitespace character
+                               while (uri < end && !HTTP_IS_SPHT(*uri))
+                                       uri++;
+
+                               // keep advancing past multiple spaces
+                               while (uri < end && HTTP_IS_SPHT(*uri)) {
+                                       uri++; nspaces++;
+                               }
+
+                               // look for first space after url
+                               spc = uri;
+                               while (spc < end && !HTTP_IS_SPHT(*spc))
+                                       spc++;
+
+                               path.ptr = uri;
+                               path.len = spc - uri;
+
+                               // extract relative path without query params from url
+                               path = iststop(http_get_path(path), '?');
+                               if (!txn || !txn->uri || nspaces == 0) {
+                                       chunk.area = "<BADREQ>";
+                                       chunk.data = strlen("<BADREQ>");
+                               } else {
+                                       chunk.area = path.ptr;
+                                       chunk.data = path.len;
+                               }
+
+                               ret = lf_encode_chunk(tmplog, dst + maxsize, '#', url_encode_map, &chunk, tmp);
+                               if (ret == NULL || *ret != '\0')
+                                       goto out;
+
+                               tmplog = ret;
+                               if (tmp->options & LOG_OPT_QUOTE)
+                                       LOGCHAR('"');
+
+                               last_isspace = 0;
+                               break;
+
                        case LOG_FMT_HTTP_QUERY: // %HQ
                                if (tmp->options & LOG_OPT_QUOTE)
                                        LOGCHAR('"');