wcurl: import v2025.04.20 script + docs

author Daniel Stenberg <daniel@haxx.se>

Fri, 11 Apr 2025 21:17:03 +0000 (23:17 +0200)

committer Daniel Stenberg <daniel@haxx.se>

Mon, 21 Apr 2025 09:06:44 +0000 (11:06 +0200)
author Daniel Stenberg <daniel@haxx.se>
Fri, 11 Apr 2025 21:17:03 +0000 (23:17 +0200)
committer Daniel Stenberg <daniel@haxx.se>
Mon, 21 Apr 2025 09:06:44 +0000 (11:06 +0200)
diff --git a/.github/scripts/spellcheck.yaml b/.github/scripts/spellcheck.yaml

index 04dfbfaf1beccfd53db0003108911e20fb888ac5..05ddf0d937f11963d6c710f682d0ba65d703def1 100644 (file)
--- a/.github/scripts/spellcheck.yaml
+++ b/.github/scripts/spellcheck.yaml
@@ -29,4 +29,4 @@ matrix:
              - 'strong'
              - 'em'
      sources:
-      - '**/*.md|!docs/BINDINGS.md|!docs/DISTROS.md|!docs/CIPHERS-TLS12.md'
+      - '**/*.md|!docs/BINDINGS.md|!docs/DISTROS.md|!docs/CIPHERS-TLS12.md|!docs/wcurl.md'
diff --git a/.github/workflows/checksrc.yml b/.github/workflows/checksrc.yml

index f172e55341c53e834bfa2e06d13788a00f3fd3c1..e170d3f660043cd825f0d8b975b0e22060766fda 100644 (file)
--- a/.github/workflows/checksrc.yml
+++ b/.github/workflows/checksrc.yml
@@ -69,6 +69,7 @@ jobs:
            codespell \
              --skip scripts/mk-ca-bundle.pl \
              --skip src/tool_hugehelp.c \
+            --skip scripts/wcurl \
              -I .github/scripts/codespell-ignore.txt \
              CMake include m4 scripts src lib
  
diff --git a/CMakeLists.txt b/CMakeLists.txt

index f93a082f116cb6fced34b67f1c12eb3be58e1081..de72bb7f3409f2f3ffb5f79dea0734e86b0b6a6a 100644 (file)
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2503,6 +2503,13 @@ if(NOT CURL_DISABLE_INSTALL)
        COMMAND ${CMAKE_COMMAND} -P "${CMAKE_CURRENT_BINARY_DIR}/CMake/cmake_uninstall.cmake")
    endif()
  
+  install(FILES "${PROJECT_SOURCE_DIR}/scripts/wcurl"
+    DESTINATION ${CMAKE_INSTALL_BINDIR}
+    PERMISSIONS
+      OWNER_READ OWNER_WRITE OWNER_EXECUTE
+      GROUP_READ GROUP_EXECUTE
+      WORLD_READ WORLD_EXECUTE)
+
    # The `-DEV` part is important
    string(REGEX REPLACE "([0-9]+\.[0-9]+)\.([0-9]+.*)" "\\2" CPACK_PACKAGE_VERSION_PATCH "${_curl_version}")
    set(CPACK_GENERATOR "TGZ")
diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt

index fd5f1522a2804eadcb89523237e1a1389687f0ac..f5293c5a291839bdc1a1a6c202e936fc62ee7c35 100644 (file)
--- a/docs/CMakeLists.txt
+++ b/docs/CMakeLists.txt
@@ -30,7 +30,7 @@ if(ENABLE_CURL_MANUAL AND BUILD_CURL_EXE)
  endif()
  
  if(BUILD_MISC_DOCS)
-  foreach(_man_misc IN ITEMS "curl-config" "mk-ca-bundle")
+  foreach(_man_misc IN ITEMS "curl-config" "mk-ca-bundle" "wcurl")
      set(_man_target "${CMAKE_CURRENT_BINARY_DIR}/${_man_misc}.1")
      add_custom_command(OUTPUT "${_man_target}"
        WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
diff --git a/docs/Makefile.am b/docs/Makefile.am

index 0c92dd8a3369e3dff68e384fcbb88d93ec4321dc..80d446217f6acf433970d07f866562f439b3384d 100644 (file)
--- a/docs/Makefile.am
+++ b/docs/Makefile.am
@@ -28,16 +28,16 @@ if BUILD_DOCS
  # if we disable man page building, ignore these
  MK_CA_DOCS = mk-ca-bundle.1
  CURLCONF_DOCS = curl-config.1
-man_MANS = curl-config.1
+man_MANS = curl-config.1 wcurl.1
  endif
  
-CURLPAGES = curl-config.md mk-ca-bundle.md
+CURLPAGES = curl-config.md mk-ca-bundle.md wcurl.md
  
  SUBDIRS = . cmdline-opts libcurl
  DIST_SUBDIRS = $(SUBDIRS) examples
  
  if BUILD_DOCS
-CLEANFILES = mk-ca-bundle.1 curl-config.1
+CLEANFILES = mk-ca-bundle.1 curl-config.1 wcurl.1
  endif
  
  INTERNALDOCS =                                  \
@@ -134,5 +134,7 @@ curl-config.1: curl-config.md
  
  mk-ca-bundle.1: mk-ca-bundle.md
  
+wcurl.1: wcurl.md
+
  distclean:
         rm -f $(CLEANFILES)
diff --git a/docs/wcurl.md b/docs/wcurl.md

new file mode 100644 (file)

index 0000000..4111af5
--- /dev/null
+++ b/docs/wcurl.md
@@ -0,0 +1,145 @@
+---
+c: Copyright (C) Samuel Henrique <samueloph@debian.org>, Sergio Durigan Junior <sergiodj@debian.org> and many contributors, see the AUTHORS file.
+SPDX-License-Identifier: curl
+Title: wcurl
+Section: 1
+Source: wcurl
+See-also:
+  - curl (1)
+  - trurl (1)
+Added-in: n/a
+---
+
+# NAME
+
+**wcurl** - a simple wrapper around curl to easily download files.
+
+# SYNOPSIS
+
+**wcurl \<URL\>...**
+
+**wcurl [--curl-options \<CURL_OPTIONS\>]... [--dry-run] [--no-decode-filename] [-o|-O|--output \<PATH\>] [--] \<URL\>...**
+
+**wcurl [--curl-options=\<CURL_OPTIONS\>]... [--dry-run] [--no-decode-filename] [--output=\<PATH\>] [--] \<URL\>...**
+
+**wcurl -V|--version**
+
+**wcurl -h|--help**
+
+# DESCRIPTION
+
+**wcurl** is a simple curl wrapper which lets you use curl to download files
+without having to remember any parameters.
+
+Simply call **wcurl** with a list of URLs you want to download and **wcurl**
+picks sane defaults.
+
+If you need anything more complex, you can provide any of curl's supported
+parameters via the **--curl-options** option. Just beware that you likely
+should be using curl directly if your use case is not covered.
+
+By default, **wcurl** does:
+
+## * Percent-encode whitespaces in URLs;
+
+## * Download multiple URLs in parallel
+    if the installed curl's version is \>= 7.66.0 (--parallel);
+
+## * Follow redirects;
+
+## * Automatically choose a filename as output;
+
+## * Avoid overwriting files
+     if the installed curl's version is \>= 7.83.0 (--no-clobber);
+
+## * Perform retries;
+
+## * Set the downloaded file timestamp
+    to the value provided by the server, if available;
+
+## * Default to https
+    if the URL does not contain any scheme;
+
+## * Disable curl's URL globbing parser
+    so {} and [] characters in URLs are not treated specially;
+
+## * Percent-decode the resulting filename;
+
+## * Use 'index.html' as the default filename
+    if there is none in the URL.
+
+# OPTIONS
+
+## --curl-options, --curl-options=\<CURL_OPTIONS\>...
+
+Specify extra options to be passed when invoking curl. May be specified more
+than once.
+
+## -o, -O, --output, --output=\<PATH\>
+
+Use the provided output path instead of getting it from the URL. If multiple
+URLs are provided, resulting files share the same name with a number appended to
+the end (curl \>= 7.83.0). If this option is provided multiple times, only the
+last value is considered.
+
+## --no-decode-filename
+
+Don't percent-decode the output filename, even if the percent-encoding in the
+URL was done by **wcurl**, e.g.: The URL contained whitespaces.
+
+## --dry-run
+
+Do not actually execute curl, just print what would be invoked.
+
+## -V, \--version
+
+Print version information.
+
+## -h, \--help
+
+Print help message.
+
+# CURL_OPTIONS
+
+Any option supported by curl can be set here. This is not used by **wcurl**; it
+is instead forwarded to the curl invocation.
+
+# URL
+
+URL to be downloaded. Anything that is not a parameter is considered
+an URL. Whitespaces are percent-encoded and the URL is passed to curl, which
+then performs the parsing. May be specified more than once.
+
+# EXAMPLES
+
+Download a single file:
+
+**wcurl example.com/filename.txt**
+
+Download two files in parallel:
+
+**wcurl example.com/filename1.txt example.com/filename2.txt**
+
+Download a file passing the **--progress-bar** and **--http2** flags to curl:
+
+**wcurl --curl-options="--progress-bar --http2" example.com/filename.txt**
+
+Resume from an interrupted download (if more options are used, this needs to
+be the last one in the list):
+
+**wcurl --curl-options="--continue-at -" example.com/filename.txt**
+
+# AUTHORS
+
+    Samuel Henrique \<samueloph@debian.org\>
+    Sergio Durigan Junior \<sergiodj@debian.org\>
+    and many contributors, see the AUTHORS file.
+
+# REPORTING BUGS
+
+If you experience any problems with **wcurl** that you do not experience with
+curl, submit an issue on Github: https://github.com/curl/wcurl
+
+# COPYRIGHT
+
+**wcurl** is licensed under the curl license
diff --git a/scripts/Makefile.am b/scripts/Makefile.am

index dfee81e0552cc307b1d576567271d29e26556480..705a77a09e75a881bee97e62bef2345e6c38c7af 100644 (file)
--- a/scripts/Makefile.am
+++ b/scripts/Makefile.am
@@ -25,7 +25,9 @@
  EXTRA_DIST = coverage.sh completion.pl firefox-db2pem.sh checksrc.pl              \
    mk-ca-bundle.pl mk-unity.pl schemetable.c cd2nroff nroff2cd cdall cd2cd managen \
    dmaketgz maketgz release-tools.sh verify-release cmakelint.sh mdlinkcheck       \
-  CMakeLists.txt randdisable
+  CMakeLists.txt randdisable wcurl
+
+dist_bin_SCRIPTS = wcurl
  
  ZSH_FUNCTIONS_DIR = @ZSH_FUNCTIONS_DIR@
  FISH_FUNCTIONS_DIR = @FISH_FUNCTIONS_DIR@
diff --git a/scripts/wcurl b/scripts/wcurl

new file mode 100755 (executable)

index 0000000..35fcb8a
--- /dev/null
+++ b/scripts/wcurl
@@ -0,0 +1,324 @@
+#!/bin/sh
+
+# wcurl - a simple wrapper around curl to easily download files.
+#
+# Requires curl >= 7.46.0 (2015)
+#
+# Copyright (C) Samuel Henrique <samueloph@debian.org>, Sergio Durigan
+# Junior <sergiodj@debian.org> and many contributors, see the AUTHORS
+# file.
+#
+# Permission to use, copy, modify, and distribute this software for any purpose
+# with or without fee is hereby granted, provided that the above copyright
+# notice and this permission notice appear in all copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN
+# NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+# OR OTHER DEALINGS IN THE SOFTWARE.
+#
+# Except as contained in this notice, the name of a copyright holder shall not be
+# used in advertising or otherwise to promote the sale, use or other dealings in
+# this Software without prior written authorization of the copyright holder.
+#
+# SPDX-License-Identifier: curl
+
+# Stop on errors and on usage of unset variables.
+set -eu
+
+VERSION="2025.04.20"
+
+PROGRAM_NAME="$(basename "$0")"
+readonly PROGRAM_NAME
+
+# Display the version.
+print_version()
+{
+    cat << _EOF_
+${VERSION}
+_EOF_
+}
+
+# Display the program usage.
+usage()
+{
+    cat << _EOF_
+${PROGRAM_NAME} -- a simple wrapper around curl to easily download files.
+
+Usage: ${PROGRAM_NAME} <URL>...
+       ${PROGRAM_NAME} [--curl-options <CURL_OPTIONS>]... [--no-decode-filename] [-o|-O|--output <PATH>] [--dry-run] [--] <URL>...
+       ${PROGRAM_NAME} [--curl-options=<CURL_OPTIONS>]... [--no-decode-filename] [--output=<PATH>] [--dry-run] [--] <URL>...
+       ${PROGRAM_NAME} -h|--help
+       ${PROGRAM_NAME} -V|--version
+
+Options:
+
+  --curl-options <CURL_OPTIONS>: Specify extra options to be passed when invoking curl. May be
+                                 specified more than once.
+
+  -o, -O, --output <PATH>: Use the provided output path instead of getting it from the URL. If
+                           multiple URLs are provided, resulting files share the same name with a
+                           number appended to the end (curl >= 7.83.0). If this option is provided
+                           multiple times, only the last value is considered.
+
+  --no-decode-filename: Don't percent-decode the output filename, even if the percent-encoding in
+                        the URL was done by wcurl, e.g.: The URL contained whitespaces.
+
+  --dry-run: Don't actually execute curl, just print what would be invoked.
+
+  -V, --version: Print version information.
+
+  -h, --help: Print this usage message.
+
+  <CURL_OPTIONS>: Any option supported by curl can be set here. This is not used by wcurl; it is
+                 instead forwarded to the curl invocation.
+
+  <URL>: URL to be downloaded. Anything that is not a parameter is considered
+         an URL. Whitespaces are percent-encoded and the URL is passed to curl, which
+         then performs the parsing. May be specified more than once.
+_EOF_
+}
+
+# Display an error message and bail out.
+error()
+{
+    printf "%s\n" "$*" > /dev/stderr
+    exit 1
+}
+
+# Extra curl options provided by the user.
+# This is set per-URL for every URL provided.
+# Some options are global, but we are erroring on the side of needlesly setting
+# them multiple times instead of causing issues with parameters that needs to
+# be set per-URL.
+CURL_OPTIONS=""
+
+# The URLs to be downloaded.
+URLS=""
+
+# Variable used to be set to the percent-decoded filename parsed from the URL, unless
+# --output or --no-decode-filename are used.
+OUTPUT_PATH=""
+HAS_USER_SET_OUTPUT="false"
+
+# The parameters that are passed per-URL to curl.
+readonly PER_URL_PARAMETERS="\
+    --fail \
+    --globoff \
+    --location \
+    --proto-default https \
+    --remote-time \
+    --retry 10 \
+    --retry-max-time 10 "
+
+# Whether to invoke curl or not.
+DRY_RUN="false"
+
+# Sanitize parameters.
+sanitize()
+{
+    if [ -z "${URLS}" ]; then
+        error "You must provide at least one URL to download."
+    fi
+
+    readonly CURL_OPTIONS URLS DRY_RUN HAS_USER_SET_OUTPUT
+}
+
+# Indicate via exit code whether the string given in the first parameter
+# consists solely of characters from the string given in the second parameter.
+# In other words, it returns 0 if the first parameter only contains characters
+# from the second parameter, e.g.: Are $1 characters a subset of $2 characters?
+is_subset_of()
+{
+    case "${1}" in
+        *[!${2}]*|'') return 1;;
+    esac
+}
+
+# Print the given string percent-decoded.
+percent_decode()
+{
+    # Encodings of control characters (00-1F) are passed through without decoding.
+    # Iterate on the input character-by-character, decoding it.
+    printf "%s\n" "${1}" | fold -w1 | while IFS= read -r decode_out; do
+        # If character is a "%", read the next character as decode_hex1.
+        if [ "${decode_out}" = % ] && IFS= read -r decode_hex1; then
+            decode_out="${decode_out}${decode_hex1}"
+            # If there's one more character, read it as decode_hex2.
+            if IFS= read -r decode_hex2; then
+                decode_out="${decode_out}${decode_hex2}"
+                # Skip decoding if this is a control character (00-1F).
+                # Skip decoding if DECODE_FILENAME is not "true".
+                if is_subset_of "${decode_hex1}" "23456789abcdefABCDEF" && \
+                    is_subset_of "${decode_hex2}" "0123456789abcdefABCDEF" && \
+                    [ "${DECODE_FILENAME}" = "true" ]; then
+                    # Use printf to decode it into octal and then decode it to the final format.
+                    decode_out="$(printf "%b" "\\$(printf %o "0x${decode_hex1}${decode_hex2}")")"
+                fi
+            fi
+        fi
+        printf %s "${decode_out}"
+    done
+}
+
+# Print the percent-decoded filename portion of the given URL.
+get_url_filename()
+{
+    # Remove protocol and query string if present.
+    hostname_and_path="$(printf %s "${1}" | sed -e 's,^[^/]*//,,' -e 's,?.*$,,')"
+    # If what remains contains a slash, there's a path; return it percent-decoded.
+    case "${hostname_and_path}" in
+        # sed to remove everything preceding the last '/', e.g.: "example/something" becomes "something"
+        */*) percent_decode "$(printf %s "${hostname_and_path}" | sed -e 's,^.*/,,')";;
+    esac
+    # No slash means there was just a hostname and no path; return empty string.
+}
+
+# Execute curl with the list of URLs provided by the user.
+exec_curl()
+{
+    CMD="curl "
+
+    # Store version to check if it supports --no-clobber and --parallel.
+    curl_version=$($CMD --version | cut -f2 -d' ' | head -n1)
+    curl_version_major=$(echo "$curl_version" | cut -f1 -d.)
+    curl_version_minor=$(echo "$curl_version" | cut -f2 -d.)
+
+    CURL_HAS_NO_CLOBBER=""
+    CURL_HAS_PARALLEL=""
+    # --no-clobber is only supported since 7.83.0.
+    # --parallel is only supported since 7.66.0.
+    if [ "${curl_version_major}" -ge 8 ]; then
+        CURL_HAS_NO_CLOBBER="--no-clobber"
+        CURL_HAS_PARALLEL="--parallel"
+    elif [ "${curl_version_major}" -eq 7 ];then
+        if [ "${curl_version_minor}" -ge 83 ]; then
+            CURL_HAS_NO_CLOBBER="--no-clobber"
+        fi
+        if [ "${curl_version_minor}" -ge 66 ]; then
+            CURL_HAS_PARALLEL="--parallel"
+        fi
+    fi
+
+    # Detecting whether we need --parallel.  It's easier to rely on
+    # the shell's argument parsing.
+    # shellcheck disable=SC2086
+    set -- $URLS
+
+    if [ "$#" -gt 1 ]; then
+        CURL_PARALLEL="$CURL_HAS_PARALLEL"
+    else
+        CURL_PARALLEL=""
+    fi
+
+    # Start assembling the command.
+    #
+    # We use 'set --' here (again) because (a) we don't have arrays on
+    # POSIX shell, and (b) we need better control over the way we
+    # split arguments.
+    #
+    # shellcheck disable=SC2086
+    set -- ${CMD} ${CURL_PARALLEL}
+
+    NEXT_PARAMETER=""
+    for url in ${URLS}; do
+        # If the user did not provide an output path, define one.
+        if [ "${HAS_USER_SET_OUTPUT}" = "false" ]; then
+            OUTPUT_PATH="$(get_url_filename "${url}")"
+            # If we could not get a path from the URL, use the default: index.html.
+            [ -z "${OUTPUT_PATH}" ] && OUTPUT_PATH=index.html
+        fi
+        # shellcheck disable=SC2086
+        set -- "$@" ${NEXT_PARAMETER} ${PER_URL_PARAMETERS} ${CURL_HAS_NO_CLOBBER} ${CURL_OPTIONS} --output "${OUTPUT_PATH}" "${url}"
+        NEXT_PARAMETER="--next"
+    done
+
+    if [ "${DRY_RUN}" = "false" ]; then
+        exec "$@"
+    else
+        printf "%s\n" "$@"
+    fi
+}
+
+# Default to decoding the output filename
+DECODE_FILENAME="true"
+
+# Use "${1-}" in order to avoid errors because of 'set -u'.
+while [ -n "${1-}" ]; do
+    case "${1}" in
+        --curl-options=*)
+            opt=$(printf "%s\n" "${1}" | sed 's/^--curl-options=//')
+            CURL_OPTIONS="${CURL_OPTIONS} ${opt}"
+            ;;
+
+        --curl-options)
+            shift
+            CURL_OPTIONS="${CURL_OPTIONS} ${1}"
+            ;;
+
+        --dry-run)
+            DRY_RUN="true"
+            ;;
+
+        --output=*)
+            opt=$(printf "%s\n" "${1}" | sed 's/^--output=//')
+            HAS_USER_SET_OUTPUT="true"
+            OUTPUT_PATH="${opt}"
+            ;;
+
+        -o|-O|--output)
+            shift
+            HAS_USER_SET_OUTPUT="true"
+            OUTPUT_PATH="${1}"
+            ;;
+
+        -o*|-O*)
+            opt=$(printf "%s\n" "${1}" | sed 's/^-[oO]//')
+            HAS_USER_SET_OUTPUT="true"
+            OUTPUT_PATH="${opt}"
+            ;;
+
+        --no-decode-filename)
+            DECODE_FILENAME="false"
+            ;;
+
+        -h|--help)
+            usage
+            exit 0
+            ;;
+
+        -V|--version)
+            print_version
+            exit 0
+            ;;
+
+        --)
+            # This is the start of the list of URLs.
+            shift
+            for url in "$@"; do
+                # Encode whitespaces into %20, since wget supports those URLs.
+                newurl=$(printf "%s\n" "${url}" | sed 's/ /%20/g')
+                URLS="${URLS} ${newurl}"
+            done
+            break
+            ;;
+
+        -*)
+            error "Unknown option: '$1'."
+            ;;
+
+        *)
+            # This must be a URL.
+            # Encode whitespaces into %20, since wget supports those URLs.
+            newurl=$(printf "%s\n" "${1}" | sed 's/ /%20/g')
+            URLS="${URLS} ${newurl}"
+            ;;
+    esac
+    shift
+done
+
+sanitize
+exec_curl
author	Daniel Stenberg <daniel@haxx.se>
	Fri, 11 Apr 2025 21:17:03 +0000 (23:17 +0200)
committer	Daniel Stenberg <daniel@haxx.se>
	Mon, 21 Apr 2025 09:06:44 +0000 (11:06 +0200)
.github/scripts/spellcheck.yaml		patch \| blob \| blame \| history
.github/workflows/checksrc.yml		patch \| blob \| blame \| history
CMakeLists.txt		patch \| blob \| blame \| history
docs/CMakeLists.txt		patch \| blob \| blame \| history
docs/Makefile.am		patch \| blob \| blame \| history
docs/wcurl.md	[new file with mode: 0644]	patch \| blob
scripts/Makefile.am		patch \| blob \| blame \| history
scripts/wcurl	[new file with mode: 0755]	patch \| blob