From: Gregor Jasny Date: Sat, 17 Jul 2021 14:43:22 +0000 (+0200) Subject: HTTP storage: Fix IPv6 address handling (#898) X-Git-Tag: v4.4~125 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=d068911695250dfa118d0e89ecafc980ad9f6d9b;p=thirdparty%2Fccache.git HTTP storage: Fix IPv6 address handling (#898) --- diff --git a/LICENSE.adoc b/LICENSE.adoc index d4b0029d0..1116ff872 100644 --- a/LICENSE.adoc +++ b/LICENSE.adoc @@ -678,7 +678,7 @@ src/third_party/httplib.h ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cpp-httplib - A C++11 single-file header-only cross platform HTTP/HTTPS library. -Copied from commit 6b9ffc8b of https://github.com/yhirose/cpp-httplib[cpp-httplib]. +Copied from https://github.com/yhirose/cpp-httplib[cpp-httplib] v0.9.1. The library has the following license text: ------------------------------------------------------------------------------- diff --git a/doc/MANUAL.adoc b/doc/MANUAL.adoc index 87ad74493..e08ca2d41 100644 --- a/doc/MANUAL.adoc +++ b/doc/MANUAL.adoc @@ -955,7 +955,6 @@ Optional attributes: Known issues and limitations: * HTTPS is not yet supported. -* URLs containing IPv6 addresses like `http://[::1]/` are not supported. === Redis storage backend diff --git a/src/storage/secondary/HttpStorage.cpp b/src/storage/secondary/HttpStorage.cpp index 07c35f93c..1928d5916 100644 --- a/src/storage/secondary/HttpStorage.cpp +++ b/src/storage/secondary/HttpStorage.cpp @@ -85,18 +85,41 @@ get_url_path(const Url& url) return path; } +std::string +get_host_header_value(const Url& url) +{ + // We need to construct an HTTP Host header that follows the same IPv6 + // escaping rules like a URL. To avoid code duplication we re-use the + // Url class to render that string. + + Url host_and_port_only; + host_and_port_only.host(url.host(), url.ip_version()).port(url.port()); + + // The rendered_value now contains a string like '//[::1]:8080'. The trailing + // slashes must be stripped. + const auto rendered_value = host_and_port_only.str(); + const auto prefix = nonstd::string_view{"//"}; + if (!Util::starts_with(rendered_value, prefix)) { + throw Error( + "Expected partial URL to start with '{}': '{}'", prefix, rendered_value); + } + return rendered_value.substr(prefix.size()); +} + std::unique_ptr make_client(const Url& url) { - std::string scheme_host_port; - - if (url.port().empty()) { - scheme_host_port = FMT("{}://{}", url.scheme(), url.host()); - } else { - scheme_host_port = FMT("{}://{}:{}", url.scheme(), url.host(), url.port()); + if (url.host().empty()) { + throw Error("A host is required in HTTP storage URL: '{}'", url.str()); } - auto client = std::make_unique(scheme_host_port.c_str()); + // the httplib requires a partial URL with just scheme, host and port + Url destination; + destination.scheme(url.scheme()) + .host(url.host(), url.ip_version()) + .port(url.port()); + + auto client = std::make_unique(destination.str().c_str()); if (!url.user_info().empty()) { const auto pair = util::split_once(url.user_info(), ':'); if (!pair.second) { @@ -132,7 +155,10 @@ HttpStorage::HttpStorage(const Url& url, const AttributeMap& attributes) m_http_client(make_client(url)) { m_http_client->set_default_headers( - {{"User-Agent", FMT("{}/{}", CCACHE_NAME, CCACHE_VERSION)}}); + {// explicit setting of the Host header is required due to IPv6 address + // handling issues in httplib + {"Host", get_host_header_value(url)}, + {"User-Agent", FMT("{}/{}", CCACHE_NAME, CCACHE_VERSION)}}); m_http_client->set_keep_alive(true); configure_timeouts(attributes); } diff --git a/src/third_party/httplib.h b/src/third_party/httplib.h index b18f54e4d..ee9a947f4 100644 --- a/src/third_party/httplib.h +++ b/src/third_party/httplib.h @@ -4520,25 +4520,58 @@ inline void Server::stop() { } inline bool Server::parse_request_line(const char *s, Request &req) { - const static std::regex re( - "(GET|HEAD|POST|PUT|DELETE|CONNECT|OPTIONS|TRACE|PATCH|PRI) " - "(([^? ]+)(?:\\?([^ ]*?))?) (HTTP/1\\.[01])\r\n"); + auto len = strlen(s); + if (len < 2 || s[len - 2] != '\r' || s[len - 1] != '\n') { return false; } + len -= 2; - std::cmatch m; - if (std::regex_match(s, m, re)) { - req.version = std::string(m[5]); - req.method = std::string(m[1]); - req.target = std::string(m[2]); - req.path = detail::decode_url(m[3], false); + { + size_t count = 0; + + detail::split(s, s + len, ' ', [&](const char *b, const char *e) { + switch (count) { + case 0: req.method = std::string(b, e); break; + case 1: req.target = std::string(b, e); break; + case 2: req.version = std::string(b, e); break; + default: break; + } + count++; + }); - // Parse query text - auto len = std::distance(m[4].first, m[4].second); - if (len > 0) { detail::parse_query_text(m[4], req.params); } + if (count != 3) { return false; } + } - return true; + static const std::set methods{ + "GET", "HEAD", "POST", "PUT", "DELETE", + "CONNECT", "OPTIONS", "TRACE", "PATCH", "PRI"}; + + if (methods.find(req.method) == methods.end()) { return false; } + + if (req.version != "HTTP/1.1" && req.version != "HTTP/1.0") { return false; } + + { + size_t count = 0; + + detail::split(req.target.data(), req.target.data() + req.target.size(), '?', + [&](const char *b, const char *e) { + switch (count) { + case 0: + req.path = detail::decode_url(std::string(b, e), false); + break; + case 1: { + if (e - b > 0) { + detail::parse_query_text(std::string(b, e), req.params); + } + break; + } + default: break; + } + count++; + }); + + if (count > 2) { return false; } } - return false; + return true; } inline bool Server::write_response(Stream &strm, bool close_connection, @@ -4615,8 +4648,7 @@ inline bool Server::write_response_core(Stream &strm, bool close_connection, if (!res.body.empty()) { if (!strm.write(res.body)) { ret = false; } } else if (res.content_provider_) { - if (write_content_with_provider(strm, req, res, boundary, - content_type)) { + if (write_content_with_provider(strm, req, res, boundary, content_type)) { res.content_provider_success_ = true; } else { res.content_provider_success_ = false; @@ -5551,8 +5583,8 @@ inline bool ClientImpl::handle_request(Stream &strm, Request &req, if (detail::parse_www_authenticate(res, auth, is_proxy)) { Request new_req = req; new_req.authorization_count_ += 1; - auto key = is_proxy ? "Proxy-Authorization" : "Authorization"; - new_req.headers.erase(key); + new_req.headers.erase(is_proxy ? "Proxy-Authorization" + : "Authorization"); new_req.headers.insert(detail::make_digest_authentication_header( req, auth, new_req.authorization_count_, detail::random_string(10), username, password, is_proxy)); @@ -5579,7 +5611,7 @@ inline bool ClientImpl::redirect(Request &req, Response &res, Error &error) { if (location.empty()) { return false; } const static std::regex re( - R"(^(?:(https?):)?(?://([^:/?#]*)(?::(\d+))?)?([^?#]*(?:\?[^#]*)?)(?:#.*)?)"); + R"((?:(https?):)?(?://(?:\[([\d:]+)\]|([^:/?#]+))(?::(\d+))?)?([^?#]*(?:\?[^#]*)?)(?:#.*)?)"); std::smatch m; if (!std::regex_match(location, m, re)) { return false; } @@ -5588,8 +5620,9 @@ inline bool ClientImpl::redirect(Request &req, Response &res, Error &error) { auto next_scheme = m[1].str(); auto next_host = m[2].str(); - auto port_str = m[3].str(); - auto next_path = m[4].str(); + if (next_host.empty()) { next_host = m[3].str(); } + auto port_str = m[4].str(); + auto next_path = m[5].str(); auto next_port = port_; if (!port_str.empty()) { @@ -5649,7 +5682,11 @@ inline bool ClientImpl::write_content_with_provider(Stream &strm, inline bool ClientImpl::write_request(Stream &strm, Request &req, bool close_connection, Error &error) { // Prepare additional headers - if (close_connection) { req.headers.emplace("Connection", "close"); } + if (close_connection) { + if (!req.has_header("Connection")) { + req.headers.emplace("Connection", "close"); + } + } if (!req.has_header("Host")) { if (is_ssl()) { @@ -5676,8 +5713,10 @@ inline bool ClientImpl::write_request(Stream &strm, Request &req, if (req.body.empty()) { if (req.content_provider_) { if (!req.is_chunked_content_provider_) { - auto length = std::to_string(req.content_length_); - req.headers.emplace("Content-Length", length); + if (!req.has_header("Content-Length")) { + auto length = std::to_string(req.content_length_); + req.headers.emplace("Content-Length", length); + } } } else { if (req.method == "POST" || req.method == "PUT" || @@ -5697,24 +5736,32 @@ inline bool ClientImpl::write_request(Stream &strm, Request &req, } if (!basic_auth_password_.empty() || !basic_auth_username_.empty()) { - req.headers.insert(make_basic_authentication_header( - basic_auth_username_, basic_auth_password_, false)); + if (!req.has_header("Authorization")) { + req.headers.insert(make_basic_authentication_header( + basic_auth_username_, basic_auth_password_, false)); + } } if (!proxy_basic_auth_username_.empty() && !proxy_basic_auth_password_.empty()) { - req.headers.insert(make_basic_authentication_header( - proxy_basic_auth_username_, proxy_basic_auth_password_, true)); + if (!req.has_header("Proxy-Authorization")) { + req.headers.insert(make_basic_authentication_header( + proxy_basic_auth_username_, proxy_basic_auth_password_, true)); + } } if (!bearer_token_auth_token_.empty()) { - req.headers.insert(make_bearer_token_authentication_header( - bearer_token_auth_token_, false)); + if (!req.has_header("Authorization")) { + req.headers.insert(make_bearer_token_authentication_header( + bearer_token_auth_token_, false)); + } } if (!proxy_bearer_token_auth_token_.empty()) { - req.headers.insert(make_bearer_token_authentication_header( - proxy_bearer_token_auth_token_, true)); + if (!req.has_header("Proxy-Authorization")) { + req.headers.insert(make_bearer_token_authentication_header( + proxy_bearer_token_auth_token_, true)); + } } // Request line and headers @@ -6687,8 +6734,9 @@ inline ssize_t SSLSocketStream::read(char *ptr, size_t size) { auto err = SSL_get_error(ssl_, ret); int n = 1000; #ifdef _WIN32 - while (--n >= 0 && (err == SSL_ERROR_WANT_READ || - err == SSL_ERROR_SYSCALL && WSAGetLastError() == WSAETIMEDOUT)) { + while (--n >= 0 && + (err == SSL_ERROR_WANT_READ || + err == SSL_ERROR_SYSCALL && WSAGetLastError() == WSAETIMEDOUT)) { #else while (--n >= 0 && err == SSL_ERROR_WANT_READ) { #endif @@ -7219,7 +7267,8 @@ inline Client::Client(const char *scheme_host_port) inline Client::Client(const char *scheme_host_port, const std::string &client_cert_path, const std::string &client_key_path) { - const static std::regex re(R"(^(?:([a-z]+)://)?([^:/?#]+)(?::(\d+))?)"); + const static std::regex re( + R"((?:([a-z]+):\/\/)?(?:\[([\d:]+)\]|([^:/?#]+))(?::(\d+))?)"); std::cmatch m; if (std::regex_match(scheme_host_port, m, re)) { @@ -7238,8 +7287,9 @@ inline Client::Client(const char *scheme_host_port, auto is_ssl = scheme == "https"; auto host = m[2].str(); + if (host.empty()) { host = m[3].str(); } - auto port_str = m[3].str(); + auto port_str = m[4].str(); auto port = !port_str.empty() ? std::stoi(port_str) : (is_ssl ? 443 : 80); if (is_ssl) { diff --git a/test/suites/secondary_http.bash b/test/suites/secondary_http.bash index fa4a8b994..99a70ca62 100644 --- a/test/suites/secondary_http.bash +++ b/test/suites/secondary_http.bash @@ -12,6 +12,20 @@ start_http_server() { || test_failed_internal "Cannot connect to server" } +maybe_start_ipv6_http_server() { + local port="$1" + local cache_dir="$2" + local credentials="$3" # optional parameter + + mkdir -p "${cache_dir}" + "${HTTP_SERVER}" --bind "::1" --directory "${cache_dir}" "${port}" \ + ${credentials:+--basic-auth ${credentials}} \ + &>http-server.log & + "${HTTP_CLIENT}" "http://[::1]:${port}" &>http-client.log \ + ${credentials:+--basic-auth ${credentials}} \ + || return 1 +} + SUITE_secondary_http_PROBE() { if ! "${HTTP_SERVER}" --help >/dev/null 2>&1; then echo "cannot execute ${HTTP_SERVER} - Python 3 might be missing" @@ -92,4 +106,34 @@ SUITE_secondary_http() { expect_stat 'files in cache' 2 expect_file_count 0 '*' secondary # result + manifest expect_contains test.o.ccache-log "status code: 401" + + # ------------------------------------------------------------------------- + TEST "IPv6 address" + + if maybe_start_ipv6_http_server 12780 secondary; then + export CCACHE_SECONDARY_STORAGE="http://[::1]:12780" + + $CCACHE_COMPILE -c test.c + expect_stat 'cache hit (direct)' 0 + expect_stat 'cache miss' 1 + expect_stat 'files in cache' 2 + expect_file_count 2 '*' secondary # result + manifest + + $CCACHE_COMPILE -c test.c + expect_stat 'cache hit (direct)' 1 + expect_stat 'cache miss' 1 + expect_stat 'files in cache' 2 + expect_file_count 2 '*' secondary # result + manifest + + $CCACHE -C >/dev/null + expect_stat 'files in cache' 0 + expect_file_count 2 '*' secondary # result + manifest + + $CCACHE_COMPILE -c test.c + expect_stat 'cache hit (direct)' 2 + expect_stat 'cache miss' 1 + expect_stat 'files in cache' 0 + expect_stat 'files in cache' 0 + expect_file_count 2 '*' secondary # result + manifest + fi }