From: Daniel Stenberg Date: Sun, 25 Jan 2026 15:35:53 +0000 (+0100) Subject: tool_cb_hdr: with -J, use the redirect name as a backup X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=fa6a46473e6d4f97aff56a0bd823229052c8c2d2;p=thirdparty%2Fcurl.git tool_cb_hdr: with -J, use the redirect name as a backup The -J / --remote-header-name logic now records the file name part used in the redirects so that it can use the last one as a name if no Content-Disposition header arrives. Add tests to verify: 1641: -J with a redirect and extract the CD contents in the second response 1642: -J with a redirect but no Content-Disposition, use the name from the Location: header 1643: -J with two redirects, using the last file name and also use queries and fragments to verify them stripped off Closes #20430 --- diff --git a/docs/TODO.md b/docs/TODO.md index 7c7c065baf..6946f453e0 100644 --- a/docs/TODO.md +++ b/docs/TODO.md @@ -819,25 +819,6 @@ already transferred before the retry. See [curl issue 1084](https://github.com/curl/curl/issues/1084) -## consider filename from the redirected URL with `-O` ? - -When a user gives a URL and uses `-O`, and curl follows a redirect to a new -URL, the filename is not extracted and used from the newly redirected-to URL -even if the new URL may have a much more sensible filename. - -This is clearly documented and helps for security since there is no surprise -to users which filename that might get overwritten, but maybe a new option -could allow for this or maybe `-J` should imply such a treatment as well as -`-J` already allows for the server to decide what filename to use so it -already provides the "may overwrite any file" risk. - -This is extra tricky if the original URL has no filename part at all since -then the current code path does error out with an error message, and we cannot -*know* already at that point if curl is redirected to a URL that has a -filename... - -See [curl issue 1241](https://github.com/curl/curl/issues/1241) - ## retry on network is unreachable The `--retry` option retries transfers on *transient failures*. We later added diff --git a/docs/cmdline-opts/remote-header-name.md b/docs/cmdline-opts/remote-header-name.md index 88c2808a3f..52ae98b01c 100644 --- a/docs/cmdline-opts/remote-header-name.md +++ b/docs/cmdline-opts/remote-header-name.md @@ -34,6 +34,9 @@ this option may provide you with rather unexpected filenames. This feature uses the name from the `filename` field, it does not yet support the `filename*` field (filenames with explicit character sets). +Starting in 8.19.0, curl falls back and uses the filename extracted from the +last redirect header if no `Content-Disposition:` header provides a filename. + **WARNING**: Exercise judicious use of this option, especially on Windows. A rogue server could send you the name of a DLL or other file that could be loaded automatically by Windows or some third party software. diff --git a/src/tool_cb_hdr.c b/src/tool_cb_hdr.c index 876a599049..eb35c5c383 100644 --- a/src/tool_cb_hdr.c +++ b/src/tool_cb_hdr.c @@ -148,30 +148,40 @@ locdone: /* * Copies a filename part and returns an ALLOCATED data buffer. */ -static char *parse_filename(const char *ptr, size_t len) +static char *parse_filename(const char *ptr, size_t len, char stop) { char *copy; char *p; char *q; - char stop = '\0'; copy = memdup0(ptr, len); if(!copy) return NULL; p = copy; - if(*p == '\'' || *p == '"') { - /* store the starting quote */ - stop = *p; - p++; - } - else - stop = ';'; + if(stop) { + /* a Content-Disposition: header */ + if(*p == '\'' || *p == '"') { + /* store the starting quote */ + stop = *p; + p++; + } - /* scan for the end letter and stop there */ - q = strchr(p, stop); - if(q) - *q = '\0'; + /* scan for the end letter and stop there */ + q = strchr(p, stop); + if(q) + *q = '\0'; + } + else { + /* this is a Location: header, so we need to trim off any queries and + fragments present */ + q = strchr(p, '?'); /* trim off query, if present */ + if(q) + *q = '\0'; + q = strchr(p, '#'); /* trim off fragment, if present */ + if(q) + *q = '\0'; + } /* if the filename contains a path, only use filename portion */ q = strrchr(p, '/'); @@ -283,12 +293,44 @@ static size_t save_etag(const char *etag_h, const char *endp, * Content-Disposition header specifying a filename property. */ static size_t content_disposition(const char *str, const char *end, - size_t cb, struct per_transfer *per) + size_t cb, struct per_transfer *per, + long response) /* response code */ { struct HdrCbData *hdrcbdata = &per->hdrcbdata; struct OutStruct *outs = &per->outs; - if((cb > 20) && checkprefix("Content-disposition:", str)) { + if((cb > 9) && checkprefix("Location:", str) && (response/100 == 3)) { + /* Get the name off the location header as a temporary measure in case + there is no Content-Disposition */ + const char *p = &str[9]; + curlx_str_passblanks(&p); + if(p < end) { /* as a precaution */ + char *filename = parse_filename(p, cb - (p - str), 0); + if(filename) { + if(outs->stream) { + /* indication of problem, get out! */ + curlx_free(filename); + return CURL_WRITEFUNC_ERROR; + } + if(outs->alloc_filename) + curlx_free(outs->filename); + + if(per->config->output_dir) { + outs->filename = curl_maprintf("%s/%s", per->config->output_dir, + filename); + curlx_free(filename); + if(!outs->filename) + return CURL_WRITEFUNC_ERROR; + } + else + outs->filename = filename; + outs->alloc_filename = TRUE; + outs->is_cd_filename = TRUE; /* set to avoid clobbering existing files + by default */ + } + } + } + else if((cb > 20) && checkprefix("Content-disposition:", str)) { const char *p = str + 20; /* look for the 'filename=' parameter (encoded filenames (*=) are not supported) */ @@ -312,14 +354,17 @@ static size_t content_disposition(const char *str, const char *end, } p += 9; + curlx_str_passblanks(&p); len = cb - (size_t)(p - str); - filename = parse_filename(p, len); + filename = parse_filename(p, len, ';'); if(filename) { if(outs->stream) { /* indication of problem, get out! */ curlx_free(filename); return CURL_WRITEFUNC_ERROR; } + if(outs->alloc_filename) + curlx_free(outs->filename); if(per->config->output_dir) { outs->filename = curl_maprintf("%s/%s", per->config->output_dir, @@ -440,13 +485,13 @@ size_t tool_header_cb(char *ptr, size_t size, size_t nmemb, void *userdata) return rc; } - /* Parse the content-disposition header. When honor_cd_filename is true - other headers may be stored until the content-disposition header is - reached, at which point the saved headers can be written. That means - the content_disposition() may return an rc when it has saved a - different header for writing later. */ + /* Parse the content-disposition and location headers. When + honor_cd_filename is true, other headers may be stored until the + content-disposition header is reached, at which point the saved headers + can be written. That means the content_disposition() may return an rc + when it has saved a different header for writing later. */ else if(hdrcbdata->honor_cd_filename) { - size_t rc = content_disposition(str, end, cb, per); + size_t rc = content_disposition(str, end, cb, per, response); if(rc) return rc; } diff --git a/src/tool_sdecls.h b/src/tool_sdecls.h index abee355d17..9058233a68 100644 --- a/src/tool_sdecls.h +++ b/src/tool_sdecls.h @@ -36,7 +36,8 @@ * dynamically allocated and 'belongs' to this OutStruct, otherwise FALSE. * * 'is_cd_filename' member is TRUE when string pointed by 'filename' has been - * set using a server-specified Content-Disposition filename, otherwise FALSE. + * set using a server-specified Content-Disposition or Location filename, + * otherwise FALSE. * * 'regular_file' member is TRUE when output goes to a regular file, this also * implies that output is 'seekable' and 'appendable' and also that member diff --git a/tests/data/Makefile.am b/tests/data/Makefile.am index 7de9f5f595..d7115003bf 100644 --- a/tests/data/Makefile.am +++ b/tests/data/Makefile.am @@ -218,7 +218,7 @@ test1620 test1621 test1622 test1623 \ \ test1630 test1631 test1632 test1633 test1634 test1635 test1636 \ \ -test1640 \ +test1640 test1641 test1642 test1643 \ \ test1650 test1651 test1652 test1653 test1654 test1655 test1656 test1657 \ test1658 \ diff --git a/tests/data/test1641 b/tests/data/test1641 new file mode 100644 index 0000000000..a71a59b056 --- /dev/null +++ b/tests/data/test1641 @@ -0,0 +1,62 @@ + + + + +HTTP +HTTP GET +-J + + + + + +HTTP/1.1 301 OK +Location: %TESTNUMBER0002 + + + +HTTP/1.1 200 OK +Date: Tue, 09 Nov 2010 14:49:00 GMT +Server: test-server/fake +Content-Length: 6 +Connection: close +Content-Type: text/html +Content-Disposition: filename=name%TESTNUMBER; charset=funny; option=strange + +12345 + + + +# Client-side + + +http + + +HTTP GET with -J, a redirect and Content-Disposition in the second response + + +http://%HOSTIP:%HTTPPORT/%TESTNUMBER -J -L -O --output-dir %LOGDIR + + + +# Verify data after the test has been "shot" + + +GET /%TESTNUMBER HTTP/1.1 +Host: %HOSTIP:%HTTPPORT +User-Agent: curl/%VERSION +Accept: */* + +GET /%TESTNUMBER0002 HTTP/1.1 +Host: %HOSTIP:%HTTPPORT +User-Agent: curl/%VERSION +Accept: */* + + + +12345 + + + + diff --git a/tests/data/test1642 b/tests/data/test1642 new file mode 100644 index 0000000000..81f0ae8fc6 --- /dev/null +++ b/tests/data/test1642 @@ -0,0 +1,61 @@ + + + + +HTTP +HTTP GET +-J + + + + + +HTTP/1.1 301 OK +Location: go/here/%TESTNUMBER0002 + + + +HTTP/1.1 200 OK +Date: Tue, 09 Nov 2010 14:49:00 GMT +Server: test-server/fake +Content-Length: 6 +Connection: close +Content-Type: text/html + +12345 + + + +# Client-side + + +http + + +HTTP GET with -J, redirect, no Content-Disposition use the Location: name + + +http://%HOSTIP:%HTTPPORT/%TESTNUMBER -J -L -O --output-dir %LOGDIR + + + +# Verify data after the test has been "shot" + + +GET /%TESTNUMBER HTTP/1.1 +Host: %HOSTIP:%HTTPPORT +User-Agent: curl/%VERSION +Accept: */* + +GET /go/here/%TESTNUMBER0002 HTTP/1.1 +Host: %HOSTIP:%HTTPPORT +User-Agent: curl/%VERSION +Accept: */* + + + +12345 + + + + diff --git a/tests/data/test1643 b/tests/data/test1643 new file mode 100644 index 0000000000..17c368344e --- /dev/null +++ b/tests/data/test1643 @@ -0,0 +1,71 @@ + + + + +HTTP +HTTP GET +-J + + + + + +HTTP/1.1 301 OK +Location: go/here/%TESTNUMBER0002#first + + + +HTTP/1.1 301 OK +Location: too/%TESTNUMBER0003?fooo#second/%TESTNUMBER0003 + + + +HTTP/1.1 200 OK +Date: Tue, 09 Nov 2010 14:49:00 GMT +Server: test-server/fake +Content-Length: 6 +Connection: close +Content-Type: text/html + +12345 + + + +# Client-side + + +http + + +HTTP -J, two redirects, use the last Location: name + + +http://%HOSTIP:%HTTPPORT/%TESTNUMBER -J -L -O --output-dir %LOGDIR + + + +# Verify data after the test has been "shot" + + +GET /%TESTNUMBER HTTP/1.1 +Host: %HOSTIP:%HTTPPORT +User-Agent: curl/%VERSION +Accept: */* + +GET /go/here/%TESTNUMBER0002 HTTP/1.1 +Host: %HOSTIP:%HTTPPORT +User-Agent: curl/%VERSION +Accept: */* + +GET /go/here/too/%TESTNUMBER0003?fooo HTTP/1.1 +Host: %HOSTIP:%HTTPPORT +User-Agent: curl/%VERSION +Accept: */* + + + +12345 + + + +