]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
lib-http: Added support for parsing request target URLs.
authorStephan Bosch <stephan@rename-it.nl>
Sun, 15 Sep 2013 00:47:29 +0000 (03:47 +0300)
committerStephan Bosch <stephan@rename-it.nl>
Sun, 15 Sep 2013 00:47:29 +0000 (03:47 +0300)
src/lib-http/http-request.h
src/lib-http/http-url.c
src/lib-http/http-url.h

index 5f1d064ccc2c14652ea8adf10cb508b64da571d9..501cc96c4b7a5170cce298db8fc3fe6dd1fd5e2c 100644 (file)
@@ -3,6 +3,20 @@
 
 #include "http-header.h"
 
+struct http_url;
+
+enum http_request_target_format {
+       HTTP_REQUEST_TARGET_FORMAT_ORIGIN = 0,
+       HTTP_REQUEST_TARGET_FORMAT_ABSOLUTE,
+       HTTP_REQUEST_TARGET_FORMAT_AUTHORITY,
+       HTTP_REQUEST_TARGET_FORMAT_ASTERISK
+};
+
+struct http_request_target {
+       enum http_request_target_format format;
+       struct http_url *url;
+};
+
 struct http_request {
        const char *method;
 
index 80a9ae39d404c92c87686b0396023ca3516a6495..7454f68d2bc5f9a4d21c249943eb27d8a33b4e47 100644 (file)
@@ -5,7 +5,9 @@
 #include "strfuncs.h"
 #include "net.h"
 #include "uri-util.h"
+
 #include "http-url.h"
+#include "http-request.h"
 
 /*
  * HTTP URL parser
@@ -19,33 +21,122 @@ struct http_url_parser {
        struct http_url *url;
        struct http_url *base;
 
-       unsigned int relative:1;
+       enum http_request_target_format req_format;
+
+       unsigned int relative:1;
+       unsigned int request_target:1;
 };
 
+static bool http_url_parse_authority(struct http_url_parser *url_parser)
+{
+       struct uri_parser *parser = &url_parser->parser;
+       struct http_url *url = url_parser->url;
+       struct uri_authority auth;
+       int ret;
+
+       if ((ret = uri_parse_authority(parser, &auth)) < 0)
+               return FALSE;
+       if (ret > 0) {
+               if (auth.enc_userinfo != NULL) {
+                       /* http://tools.ietf.org/html/draft-ietf-httpbis-p1-messaging-20
+
+                                Section 2.8.1:
+
+                          {...} Senders MUST NOT include a userinfo subcomponent (and its "@"
+                          delimiter) when transmitting an "http" URI in a message. Recipients
+                          of HTTP messages that contain a URI reference SHOULD parse for the
+                          existence of userinfo and treat its presence as an error, likely
+                          indicating that the deprecated subcomponent is being used to
+                          obscure the authority for the sake of phishing attacks.
+                        */
+                       parser->error = "HTTP URL does not allow `userinfo@' part";
+                       return FALSE;
+               }
+       }
+       if (url != NULL) {
+               url->host_name = p_strdup(parser->pool, auth.host_literal);
+               url->host_ip = auth.host_ip;
+               url->have_host_ip = auth.have_host_ip;
+               url->port = auth.port;
+               url->have_port = auth.have_port;
+       }
+       return TRUE;
+}
+
+static bool http_url_parse_authority_form(struct http_url_parser *url_parser)
+{
+       struct uri_parser *parser = &url_parser->parser;
+
+       if (!http_url_parse_authority(url_parser))
+               return FALSE;
+       if (parser->cur != parser->end)
+               return FALSE;
+       url_parser->req_format = HTTP_REQUEST_TARGET_FORMAT_AUTHORITY;
+       return TRUE;
+}
+
 static bool http_url_do_parse(struct http_url_parser *url_parser)
 {
        struct uri_parser *parser = &url_parser->parser;
        struct http_url *url = url_parser->url, *base = url_parser->base;
-       struct uri_authority auth;
        const char *const *path;
-       bool relative = TRUE, have_path = FALSE;
+       bool relative = TRUE, have_scheme = FALSE, have_authority = FALSE,
+               have_path = FALSE;
        int path_relative;
        const char *part;
        int ret;
 
-       /* RFC 2616 - Hypertext Transfer Protocol, Section 3.2:
-        *   
-        * http_URL = "http:" "//" host [ ":" port ] [ abs_path [ "?" query ]]
-        * 
-        * Translated to RFC 3986:
-        *
-        * absolute-http-URL = "http:" "//" host [ ":" port ] path-absolute
-        *                       ["?" query] [ "#" fragment ]
-        * relative-http-ref = relative-http-part [ "?" query ] [ "#" fragment ]
-        * relative-http-part = "//" host [ ":" port ] path-abempty
-        *                      / path-absolute
-        *                      / path-noscheme
-        *                      / path-empty
+       /*
+          http://tools.ietf.org/html/draft-ietf-httpbis-p1-messaging-23
+            Appendix C:
+
+          http-URI       = "http://" authority path-abempty [ "?" query ]
+                             [ "#" fragment ]
+          https-URI      = "https://" authority path-abempty [ "?" query ]
+                             [ "#" fragment ]
+          partial-URI    = relative-part [ "?" query ]
+
+          request-target = origin-form / absolute-form / authority-form /
+                           asterisk-form
+
+          origin-form    = absolute-path [ "?" query ]
+          absolute-form  = absolute-URI
+          authority-form = authority
+          asterisk-form  = "*"
+                           ; Not parsed here
+
+          absolute-path  = 1*( "/" segment )
+
+          http://tools.ietf.org/html/rfc3986
+            Appendix A: (implemented in uri-util.h)
+
+          absolute-URI   = scheme ":" hier-part [ "?" query ]
+
+          hier-part      = "//" authority path-abempty
+                         / path-absolute
+                         / path-rootless
+                         / path-empty
+
+          relative-part  = "//" authority path-abempty
+                         / path-absolute
+                         / path-noscheme
+                         / path-empty
+
+          authority     = [ userinfo "@" ] host [ ":" port ]
+
+          path-abempty   = *( "/" segment )
+          path-absolute  = "/" [ segment-nz *( "/" segment ) ]
+          path-noscheme  = segment-nz-nc *( "/" segment )
+          path-rootless  = segment-nz *( "/" segment )
+          path-empty     = 0<pchar>
+
+          segment        = *pchar
+          segment-nz     = 1*pchar
+          segment-nz-nc  = 1*( unreserved / pct-encoded / sub-delims / "@" )
+                    ; non-zero-length segment without any colon ":"
+
+          query          = *( pchar / "/" / "?" )
+          fragment       = *( pchar / "/" / "?" )
         */
 
        /* "http:" / "https:" */
@@ -59,46 +150,54 @@ static bool http_url_do_parse(struct http_url_parser *url_parser)
                                if (url != NULL)
                                        url->have_ssl = TRUE;
                        } else if (strcasecmp(scheme, "http") != 0) {
+                               if (url_parser->request_target) {
+                                       /* valid as non-HTTP scheme, but also try to parse as authority */
+                                       parser->cur = parser->begin;
+                                       if (!http_url_parse_authority_form(url_parser)) {
+                                               url_parser->url = NULL; /* indicate non-http-url */
+                                               url_parser->req_format = HTTP_REQUEST_TARGET_FORMAT_ABSOLUTE;
+                                       }
+                                       return TRUE;
+                               }
                                parser->error = "Not an HTTP URL";
                                return FALSE;
                        }
                        relative = FALSE;
+                       have_scheme = TRUE;
                }
        } else {
                relative = FALSE;
+               have_scheme = TRUE;
        }
 
-       /* "//" host [ ":" port ] */
-       if ((ret = uri_parse_slashslash_authority(parser, &auth)) < 0)
-               return FALSE;
-       if (ret > 0) {
-               if (auth.enc_userinfo != NULL) {
-                       /* http://tools.ietf.org/html/draft-ietf-httpbis-p1-messaging-20
-
-                                Section 2.8.1:
-
-                          {...} Senders MUST NOT include a userinfo subcomponent (and its "@"
-                          delimiter) when transmitting an "http" URI in a message. Recipients
-                          of HTTP messages that contain a URI reference SHOULD parse for the
-                          existence of userinfo and treat its presence as an error, likely
-                          indicating that the deprecated subcomponent is being used to
-                          obscure the authority for the sake of phishing attacks.
-                        */
-                       parser->error = "HTTP URL does not allow `userinfo@' part";
+       /* "//" authority   ; or
+        * ["//"] authority ; when parsing a request target
+        */
+       if (parser->cur < parser->end && parser->cur[0] == '/') {
+               if (parser->cur+1 < parser->end && parser->cur[1] == '/') {
+                       parser->cur += 2;
+                       relative = FALSE;
+                       have_authority = TRUE;
+               } else {
+                       /* start of absolute-path */
+               }
+       } else if (url_parser->request_target && !have_scheme) {
+               if (!http_url_parse_authority_form(url_parser)) {
+                       /* not non-HTTP scheme and invalid as authority-form */
+                       parser->error = "Request target is invalid";
                        return FALSE;
                }
-               relative = FALSE;
-       } else if (!relative) {
+               return TRUE;
+       }
+
+       if (have_scheme && !have_authority) {
                parser->error = "Absolute HTTP URL requires `//' after `http:'";
-               return FALSE;
+               return FALSE;
        }
 
-       if (ret > 0 && url != NULL) {
-               url->host_name = p_strdup(parser->pool, auth.host_literal);
-               url->host_ip = auth.host_ip;
-               url->have_host_ip = auth.have_host_ip;
-               url->port = auth.port;
-               url->have_port = auth.have_port;
+       if (have_authority) {
+               if (!http_url_parse_authority(url_parser))
+                       return FALSE;
        }
 
        /* path-abempty / path-absolute / path-noscheme / path-empty */
@@ -108,14 +207,15 @@ static bool http_url_do_parse(struct http_url_parser *url_parser)
        /* Relative URLs are only valid when we have a base URL */
        if (relative) {
                if (base == NULL) {
-                       parser->error = "Relative URL not allowed";
+                       parser->error = "Relative HTTP URL not allowed";
                        return FALSE;
-               } else if (url != NULL) {
-                       url->host_name = p_strdup_empty(parser->pool, base->host_name); 
+               } else if (!have_authority && url != NULL) {
+                       url->host_name = p_strdup(parser->pool, base->host_name); 
                        url->host_ip = base->host_ip;
                        url->have_host_ip = base->have_host_ip;
                        url->port = base->port;
                        url->have_port = base->have_port;
+                       url->have_ssl = base->have_ssl;
                }
 
                url_parser->relative = TRUE;
@@ -152,7 +252,7 @@ static bool http_url_do_parse(struct http_url_parser *url_parser)
                        if (url != NULL && pend > pbegin)
                                str_append_n(fullpath, pbegin, pend-pbegin);
                }
-       
+
                /* append relative path */
                while (*path != NULL) {
                        if (!uri_data_decode(parser, *path, NULL, &part))
@@ -161,7 +261,7 @@ static bool http_url_do_parse(struct http_url_parser *url_parser)
                        if (url != NULL) {
                                str_append_c(fullpath, '/');
                                str_append(fullpath, part);
-                       }       
+                       }
                        path++;
                }
 
@@ -170,7 +270,7 @@ static bool http_url_do_parse(struct http_url_parser *url_parser)
        } else if (relative && url != NULL) {
                url->path = p_strdup(parser->pool, base->path);
        }
-       
+
        /* [ "?" query ] */
        if ((ret = uri_parse_query(parser, &part)) < 0)
                return FALSE;
@@ -180,13 +280,13 @@ static bool http_url_do_parse(struct http_url_parser *url_parser)
                if (url != NULL)
                        url->enc_query = p_strdup(parser->pool, part);
        } else if (relative && !have_path && url != NULL) {
-               url->enc_query = p_strdup(parser->pool, base->enc_query);               
+               url->enc_query = p_strdup(parser->pool, base->enc_query);
        }
 
        /* [ "#" fragment ] */
-       if ((ret = uri_parse_fragment(parser, &part)) < 0) 
+       if ((ret = uri_parse_fragment(parser, &part)) < 0)
                return FALSE;
-       if (ret > 0) {  
+       if (ret > 0) {
                if ((url_parser->flags & HTTP_URL_ALLOW_FRAGMENT_PART) == 0) {
                        parser->error = "URL fragment not allowed for HTTP URL in this context";
                        return FALSE;
@@ -196,13 +296,16 @@ static bool http_url_do_parse(struct http_url_parser *url_parser)
                if (url != NULL)
                        url->enc_fragment =  p_strdup(parser->pool, part);
        } else if (relative && !have_path && url != NULL) {
-               url->enc_fragment = p_strdup(parser->pool, base->enc_fragment);         
+               url->enc_fragment = p_strdup(parser->pool, base->enc_fragment);
        }
 
        if (parser->cur != parser->end) {
-               parser->error = "HTTP URL contains invalid character.";
+               parser->error = "HTTP URL contains invalid character";
                return FALSE;
        }
+
+       if (have_scheme)
+               url_parser->req_format = HTTP_REQUEST_TARGET_FORMAT_ABSOLUTE;
        return TRUE;
 }
 
@@ -233,6 +336,67 @@ int http_url_parse(const char *url, struct http_url *base,
        return 0;
 }
 
+int http_url_request_target_parse(const char *request_target,
+       const char *host_header, pool_t pool, struct http_request_target *target,
+       const char **error_r)
+{
+       struct http_url_parser url_parser;
+       struct uri_parser *parser;
+       struct uri_authority host;
+       struct http_url base;
+
+       memset(&url_parser, '\0', sizeof(url_parser));
+       parser = &url_parser.parser;
+       uri_parser_init(parser, pool, host_header);
+
+       if (uri_parse_authority(parser, &host) <= 0) {
+               parser->error = t_strdup_printf("Invalid Host header: %s", parser->error);
+               return -1;
+       }
+
+       if (parser->cur != parser->end || host.enc_userinfo != NULL) {
+               parser->error = "Invalid Host header: Contains invalid character";
+               return -1;
+       }
+
+       if (request_target[0] == '*' && request_target[1] == '\0') {
+               struct http_url *url = p_new(pool, struct http_url, 1);
+               url->host_name = p_strdup(pool, host.host_literal);
+               url->host_ip = host.host_ip;
+               url->port = host.port;
+               url->have_host_ip = host.have_host_ip;
+               url->have_port = host.have_port;
+               target->url = url;
+               target->format = HTTP_REQUEST_TARGET_FORMAT_ASTERISK;
+               return 0;
+       }
+
+       memset(&base, 0, sizeof(base));
+       base.host_name = host.host_literal;
+       base.host_ip = host.host_ip;
+       base.port = host.port;
+       base.have_host_ip = host.have_host_ip;
+       base.have_port = host.have_port;
+
+       memset(parser, '\0', sizeof(*parser));
+       uri_parser_init(parser, pool, request_target);
+
+       url_parser.url = p_new(pool, struct http_url, 1);
+       url_parser.request_target = TRUE;
+       url_parser.req_format = HTTP_REQUEST_TARGET_FORMAT_ORIGIN;
+       url_parser.base = &base;
+       url_parser.flags = 0;
+
+       if (!http_url_do_parse(&url_parser)) {
+               *error_r = url_parser.parser.error;
+               return -1;
+       }
+
+       target->url = url_parser.url;
+       target->format = url_parser.req_format;
+       return 0;
+}
+
 /*
  * HTTP URL construction
  */
index 76067d1fce23b65be6481ccbc6ce0c1a50a578eb..f4d85c9f40b0066373c82a9ca052826eee0bd146 100644 (file)
@@ -3,6 +3,8 @@
 
 #include "net.h"
 
+struct http_request_target;
+
 struct http_url {
        /* server */
        const char *host_name;
@@ -39,6 +41,10 @@ int http_url_parse(const char *url, struct http_url *base,
                   enum http_url_parse_flags flags, pool_t pool,
                   struct http_url **url_r, const char **error_r);
 
+int http_url_request_target_parse(const char *request_target,
+       const char *host_header, pool_t pool,
+       struct http_request_target *target, const char **error_r);
+
 /*
  * HTTP URL construction
  */