]> git.ipfire.org Git - thirdparty/git.git/commitdiff
credential: treat "?" and "#" in URLs as end of host
authorJeff King <peff@peff.net>
Tue, 14 Apr 2020 21:43:04 +0000 (17:43 -0400)
committerJunio C Hamano <gitster@pobox.com>
Wed, 15 Apr 2020 17:31:03 +0000 (10:31 -0700)
It's unusual to see:

  https://example.com?query-parameters

without an intervening slash, like:

  https://example.com/some-path?query-parameters

or even:

  https://example.com/?query-parameters

but it is a valid end to the hostname (actually "authority component")
according to RFC 3986. Likewise for "#".

And curl will parse the URL according to the standard, meaning it will
contact example.com, but our credential code would ask about a bogus
hostname with a "?" in it. Let's make sure we follow the standard, and
more importantly ask about the same hosts that curl will be talking to.

It would be nice if we could just ask curl to parse the URL for us. But
it didn't grow a URL-parsing API until 7.62, so we'd be stuck with
fallback code either way. Plus we'd need this code in the main Git
binary, where we've tried to avoid having a link dependency on libcurl.

But let's at least fix our parser. Moving to curl's parser would prevent
other potential discrepancies, but this gives us immediate relief for
the known problem, and would help our fallback code if we eventually use
curl.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
credential.c
t/t0300-credentials.sh

index 21b3ba152fe23f62d5a6e8d9e66065b43f53fe0a..8aa977754886841805bcb42b558c5246fcc1b359 100644 (file)
@@ -388,7 +388,14 @@ int credential_from_url_gently(struct credential *c, const char *url,
        cp = proto_end + 3;
        at = strchr(cp, '@');
        colon = strchr(cp, ':');
-       slash = strchrnul(cp, '/');
+
+       /*
+        * A query or fragment marker before the slash ends the host portion.
+        * We'll just continue to call this "slash" for simplicity. Notably our
+        * "trim leading slashes" part won't skip over this part of the path,
+        * but that's what we'd want.
+        */
+       slash = cp + strcspn(cp, "/?#");
 
        if (!at || slash <= at) {
                /* Case (1) */
index 5b78ebbc3f223580e287889fd1d37e8cfb7bf724..b6ec6769899d4b78e85b6c13d2c06df5e508785b 100755 (executable)
@@ -443,11 +443,45 @@ test_expect_success 'url parser ignores embedded newlines' '
        username=askpass-username
        password=askpass-password
        --
-       warning: url contains a newline in its host component: https://one.example.com?%0ahost=two.example.com/
+       warning: url contains a newline in its path component: https://one.example.com?%0ahost=two.example.com/
        warning: skipping credential lookup for url: https://one.example.com?%0ahost=two.example.com/
        askpass: Username:
        askpass: Password:
        EOF
 '
 
+# usage: check_host_and_path <url> <expected-host> <expected-path>
+check_host_and_path () {
+       # we always parse the path component, but we need this to make sure it
+       # is passed to the helper
+       test_config credential.useHTTPPath true &&
+       check fill "verbatim user pass" <<-EOF
+       url=$1
+       --
+       protocol=https
+       host=$2
+       path=$3
+       username=user
+       password=pass
+       --
+       verbatim: get
+       verbatim: protocol=https
+       verbatim: host=$2
+       verbatim: path=$3
+       EOF
+}
+
+test_expect_success 'url parser handles bare query marker' '
+       check_host_and_path https://example.com?foo.git example.com ?foo.git
+'
+
+test_expect_success 'url parser handles bare fragment marker' '
+       check_host_and_path https://example.com#foo.git example.com "#foo.git"
+'
+
+test_expect_success 'url parser not confused by encoded markers' '
+       check_host_and_path https://example.com%23%3f%2f/foo.git \
+               "example.com#?/" foo.git
+'
+
 test_done