From: Daniel Stenberg Date: Sat, 22 Feb 2025 09:36:10 +0000 (+0100) Subject: tool_getparam: make --url support a file with URLs X-Git-Tag: curl-8_13_0~356 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=6306476fc3f23a0eba833161f76503ea923b567b;p=thirdparty%2Fcurl.git tool_getparam: make --url support a file with URLs It implies -O used for each URL. Mention in the --url documentation. Test 488 and 489 verify. Closes #16099 --- diff --git a/docs/cmdline-opts/url.md b/docs/cmdline-opts/url.md index d19c73ecbc..34b9c71a3d 100644 --- a/docs/cmdline-opts/url.md +++ b/docs/cmdline-opts/url.md @@ -2,16 +2,19 @@ c: Copyright (C) Daniel Stenberg, , et al. SPDX-License-Identifier: curl Long: url -Arg: -Help: URL to work with +Arg: +Help: URL(s) to work with Category: curl Added: 7.5 Multi: append See-also: - next - config + - path-as-is + - disallow-username-in-url Example: - --url $URL + - --url @file --- # `--url` @@ -32,3 +35,15 @@ destination option unless --remote-name-all is used. On Windows, `file://` accesses can be converted to network accesses by the operating system. + +Starting in curl 8.13.0, curl can be told to download URLs provided in a text +file, one URL per line. It is done by with `--url @filename`: so instead of a +URL, you specify a filename prefixed with the `@` symbol. It can be told to +load the list of URLs from stdin by providing an argument like `@-`. + +When downloading URLs given in a file, it implies using --remote-name for each +provided URL. The URLs are full, there is no globbing applied or done on +these. Features such as --skip-existing work fine in combination with this. + +Lines in the URL file that start with `#` are treated as comments and are +skipped. diff --git a/src/tool_getparam.c b/src/tool_getparam.c index 2b3bf04730..5364098d2f 100644 --- a/src/tool_getparam.c +++ b/src/tool_getparam.c @@ -1020,9 +1020,10 @@ const struct LongShort *findlongopt(const char *opt) sizeof(aliases[0]), findarg); } -static ParameterError parse_url(struct GlobalConfig *global, - struct OperationConfig *config, - const char *nextarg) +static ParameterError add_url(struct GlobalConfig *global, + struct OperationConfig *config, + const char *thisurl, + int extraflags) { ParameterError err = PARAM_OK; struct getout *url; @@ -1050,10 +1051,10 @@ static ParameterError parse_url(struct GlobalConfig *global, return PARAM_NO_MEM; else { /* fill in the URL */ - err = getstr(&url->url, nextarg, DENY_BLANK); - url->flags |= GETOUT_URL; - if(!err && (++config->num_urls > 1) && (config->etag_save_file || - config->etag_compare_file)) { + err = getstr(&url->url, thisurl, DENY_BLANK); + url->flags |= GETOUT_URL | extraflags; + if(!err && (++config->num_urls > 1) && + (config->etag_save_file || config->etag_compare_file)) { errorf(global, "The etag options only work on a single URL"); return PARAM_BAD_USE; } @@ -1061,6 +1062,65 @@ static ParameterError parse_url(struct GlobalConfig *global, return err; } +static ParameterError parse_url(struct GlobalConfig *global, + struct OperationConfig *config, + const char *nextarg) +{ + if(nextarg && (nextarg[0] == '@')) { + /* read URLs from a file, treat all as -O */ + struct curlx_dynbuf line; + ParameterError err = PARAM_OK; + bool error = FALSE; + bool fromstdin = !strcmp("-", &nextarg[1]); + FILE *f; + + if(fromstdin) + f = stdin; + else + f = fopen(&nextarg[1], FOPEN_READTEXT); + if(f) { + curlx_dyn_init(&line, 8092); + while(my_get_line(f, &line, &error)) { + /* every line has a newline that we strip off */ + size_t len = curlx_dyn_len(&line); + const char *ptr; + if(len) + curlx_dyn_setlen(&line, len - 1); + ptr = curlx_dyn_ptr(&line); + /* line with # in the first non-blank column is a comment! */ + while(*ptr && ISSPACE(*ptr)) + ptr++; + + switch(*ptr) { + case '#': + case '/': + case '\r': + case '\n': + case '*': + case '\0': + /* comment or weird line, skip it */ + break; + default: + err = add_url(global, config, ptr, GETOUT_USEREMOTE | GETOUT_NOGLOB); + break; + } + if(err) + break; + curlx_dyn_reset(&line); + } + if(!fromstdin) + fclose(f); + curlx_dyn_free(&line); + if(error || err) + return PARAM_READ_ERROR; + return PARAM_OK; + } + return PARAM_READ_ERROR; /* file not found */ + } + return add_url(global, config, nextarg, 0); +} + + static ParameterError parse_localport(struct OperationConfig *config, char *nextarg) { diff --git a/src/tool_listhelp.c b/src/tool_listhelp.c index 70b5e37817..c79859d2d2 100644 --- a/src/tool_listhelp.c +++ b/src/tool_listhelp.c @@ -808,8 +808,8 @@ const struct helptxt helptext[] = { {"-T, --upload-file ", "Transfer local FILE to destination", CURLHELP_IMPORTANT | CURLHELP_UPLOAD}, - {" --url ", - "URL to work with", + {" --url ", + "URL(s) to work with", CURLHELP_CURL}, {" --url-query ", "Add a URL query part", diff --git a/src/tool_operate.c b/src/tool_operate.c index 95ce8a4234..95a1d0e1e2 100644 --- a/src/tool_operate.c +++ b/src/tool_operate.c @@ -1889,7 +1889,7 @@ static CURLcode single_transfer(struct GlobalConfig *global, } if(!state->urlnum) { - if(!config->globoff) { + if(!config->globoff && !(urlnode->flags & GETOUT_NOGLOB)) { /* Unless explicitly shut off, we expand '{...}' and '[...]' expressions and return total number of URLs in pattern set */ result = glob_url(&state->urls, urlnode->url, &state->urlnum, diff --git a/src/tool_sdecls.h b/src/tool_sdecls.h index 1d1fb3e27d..2f46d1a073 100644 --- a/src/tool_sdecls.h +++ b/src/tool_sdecls.h @@ -96,6 +96,7 @@ struct getout { #define GETOUT_USEREMOTE (1<<2) /* use remote filename locally */ #define GETOUT_UPLOAD (1<<3) /* if set, -T has been used */ #define GETOUT_NOUPLOAD (1<<4) /* if set, -T "" has been used */ +#define GETOUT_NOGLOB (1<<5) /* disable globbing for this URL */ /* * 'trace' enumeration represents curl's output look'n feel possibilities. diff --git a/tests/data/Makefile.am b/tests/data/Makefile.am index 695160b8fd..015d568c53 100644 --- a/tests/data/Makefile.am +++ b/tests/data/Makefile.am @@ -78,30 +78,30 @@ test444 test445 test446 test447 test448 test449 test450 test451 test452 \ test453 test454 test455 test456 test457 test458 test459 test460 test461 \ test462 test463 test467 test468 test469 test470 test471 test472 test473 \ test474 test475 test476 test477 test478 test479 test480 test481 test482 \ -test483 test484 test485 test486 test487 \ -test490 test491 test492 test493 test494 test495 test496 test497 test498 \ -test499 test500 test501 test502 test503 test504 test505 test506 test507 \ -test508 test509 test510 test511 test512 test513 test514 test515 test516 \ -test517 test518 test519 test520 test521 test522 test523 test524 test525 \ -test526 test527 test528 test529 test530 test531 test532 test533 test534 \ -test535 test536 test537 test538 test539 test540 test541 test542 test543 \ -test544 test545 test546 test547 test548 test549 test550 test551 test552 \ -test553 test554 test555 test556 test557 test558 test559 test560 test561 \ -test562 test563 test564 test565 test566 test567 test568 test569 test570 \ -test571 test572 test573 test574 test575 test576 test577 test578 test579 \ -test580 test581 test582 test583 test584 test585 test586 test587 test588 \ -test589 test590 test591 test592 test593 test594 test595 test596 test597 \ -test598 test599 test600 test601 test602 test603 test604 test605 test606 \ -test607 test608 test609 test610 test611 test612 test613 test614 test615 \ -test616 test617 test618 test619 test620 test621 test622 test623 test624 \ -test625 test626 test627 test628 test629 test630 test631 test632 test633 \ -test634 test635 test636 test637 test638 test639 test640 test641 test642 \ -test643 test644 test645 test646 test647 test648 test649 test650 test651 \ -test652 test653 test654 test655 test656 test658 test659 test660 test661 \ -test662 test663 test664 test665 test666 test667 test668 test669 test670 \ -test671 test672 test673 test674 test675 test676 test677 test678 test679 \ -test680 test681 test682 test683 test684 test685 test686 test687 test688 \ -test689 test690 test691 test692 test693 test694 test695 test696 test697 \ +test483 test484 test485 test486 test487 test488 test489 test490 test491 \ +test492 test493 test494 test495 test496 test497 test498 test499 test500 \ +test501 test502 test503 test504 test505 test506 test507 test508 test509 \ +test510 test511 test512 test513 test514 test515 test516 test517 test518 \ +test519 test520 test521 test522 test523 test524 test525 test526 test527 \ +test528 test529 test530 test531 test532 test533 test534 test535 test536 \ +test537 test538 test539 test540 test541 test542 test543 test544 test545 \ +test546 test547 test548 test549 test550 test551 test552 test553 test554 \ +test555 test556 test557 test558 test559 test560 test561 test562 test563 \ +test564 test565 test566 test567 test568 test569 test570 test571 test572 \ +test573 test574 test575 test576 test577 test578 test579 test580 test581 \ +test582 test583 test584 test585 test586 test587 test588 test589 test590 \ +test591 test592 test593 test594 test595 test596 test597 test598 test599 \ +test600 test601 test602 test603 test604 test605 test606 test607 test608 \ +test609 test610 test611 test612 test613 test614 test615 test616 test617 \ +test618 test619 test620 test621 test622 test623 test624 test625 test626 \ +test627 test628 test629 test630 test631 test632 test633 test634 test635 \ +test636 test637 test638 test639 test640 test641 test642 test643 test644 \ +test645 test646 test647 test648 test649 test650 test651 test652 test653 \ +test654 test655 test656 test658 test659 test660 test661 test662 test663 \ +test664 test665 test666 test667 test668 test669 test670 test671 test672 \ +test673 test674 test675 test676 test677 test678 test679 test680 test681 \ +test682 test683 test684 test685 test686 test687 test688 test689 test690 \ +test691 test692 test693 test694 test695 test696 test697 \ \ test700 test701 test702 test703 test704 test705 test706 test707 test708 \ test709 test710 test711 test712 test713 test714 test715 test716 test717 \ diff --git a/tests/data/test488 b/tests/data/test488 new file mode 100644 index 0000000000..e0bf4dc2ec --- /dev/null +++ b/tests/data/test488 @@ -0,0 +1,63 @@ + + + +HTTP +HTTP GET +--url + + + +# +# Server-side + + +HTTP/1.1 200 OK +Date: Tue, 09 Nov 2010 14:49:00 GMT +Server: test-server/fake +Last-Modified: Tue, 13 Jun 2000 12:10:00 GMT +ETag: "21025-dc7-39462498" +Accept-Ranges: bytes +Content-Length: 6 +Connection: close +Content-Type: text/html +Funny-head: yesyes + +-foo- + + + +# +# Client-side + + +http + + +Download two URLs provided on stdin + + +--url @- + + +http://%HOSTIP:%HTTPPORT/a +http://%HOSTIP:%HTTPPORT/b + + + +# +# Verify data after the test has been "shot" + + +GET /a HTTP/1.1 +Host: %HOSTIP:%HTTPPORT +User-Agent: curl/%VERSION +Accept: */* + +GET /b HTTP/1.1 +Host: %HOSTIP:%HTTPPORT +User-Agent: curl/%VERSION +Accept: */* + + + + diff --git a/tests/data/test489 b/tests/data/test489 new file mode 100644 index 0000000000..4d015cdd50 --- /dev/null +++ b/tests/data/test489 @@ -0,0 +1,63 @@ + + + +HTTP +HTTP GET +--url + + + +# +# Server-side + + +HTTP/1.1 200 OK +Date: Tue, 09 Nov 2010 14:49:00 GMT +Server: test-server/fake +Last-Modified: Tue, 13 Jun 2000 12:10:00 GMT +ETag: "21025-dc7-39462498" +Accept-Ranges: bytes +Content-Length: 6 +Connection: close +Content-Type: text/html +Funny-head: yesyes + +-foo- + + + +# +# Client-side + + +http + + +Download two URLs provided in a file + + +--url @%LOGDIR/urls + + +http://%HOSTIP:%HTTPPORT/a +http://%HOSTIP:%HTTPPORT/b + + + +# +# Verify data after the test has been "shot" + + +GET /a HTTP/1.1 +Host: %HOSTIP:%HTTPPORT +User-Agent: curl/%VERSION +Accept: */* + +GET /b HTTP/1.1 +Host: %HOSTIP:%HTTPPORT +User-Agent: curl/%VERSION +Accept: */* + + + +