]> git.ipfire.org Git - thirdparty/curl.git/commitdiff
tool_operate: keep failed partial download for retry auto-resume
authorJay Satiro <raysatiro@yahoo.com>
Fri, 18 Oct 2024 18:12:31 +0000 (14:12 -0400)
committerJay Satiro <raysatiro@yahoo.com>
Thu, 9 Oct 2025 18:39:27 +0000 (14:39 -0400)
- Keep data from a failed download instead of discarding it on retry in
  some limited cases when we know it's ok (currently only HTTP 200/206).

Prior to this change on failed transfer the tool truncated any outfile
data written before retrying the transfer. This change adds an exception
for HTTP downloads when the user requested auto-resume, because in that
case we can keep the outfile data and resume from the new position.

Reported-by: tkzv@users.noreply.github.com
Fixes https://github.com/curl/curl/issues/18035
Closes https://github.com/curl/curl/pull/18665

src/tool_operate.c
tests/data/Makefile.am
tests/data/test3035 [new file with mode: 0644]

index ca0d2e77f59361aaa1cb40827aee0b14d6785187..1901ab3ac1787e3df431b03758dda6fa2a2c2127 100644 (file)
@@ -336,6 +336,32 @@ void single_transfer_cleanup(void)
   glob_cleanup(&state->inglob);
 }
 
+/* Helper function for retrycheck.
+ *
+ * This function is a prerequisite check used to determine whether or not some
+ * already downloaded data (ie out->bytes written) can be safely resumed in a
+ * subsequent transfer. The conditions are somewhat pedantic to avoid any risk
+ * of data corruption.
+ *
+ * Specific HTTP limitations (scheme, method, response code, etc) are checked
+ * in retrycheck if this prerequisite check is met.
+ */
+static bool is_outfile_auto_resumable(struct OperationConfig *config,
+                                      struct per_transfer *per,
+                                      CURLcode result)
+{
+  struct OutStruct *outs = &per->outs;
+  return config->use_resume && config->resume_from_current &&
+         config->resume_from >= 0 && outs->init == config->resume_from &&
+         outs->bytes > 0 && outs->filename && outs->s_isreg && outs->fopened &&
+         outs->stream && !ferror(outs->stream) &&
+         !config->customrequest && !per->uploadfile &&
+         (config->httpreq == TOOL_HTTPREQ_UNSPEC ||
+          config->httpreq == TOOL_HTTPREQ_GET) &&
+         /* CURLE_WRITE_ERROR could mean outs->bytes is not accurate */
+         result != CURLE_WRITE_ERROR && result != CURLE_RANGE_ERROR;
+}
+
 static CURLcode retrycheck(struct OperationConfig *config,
                            struct per_transfer *per,
                            CURLcode result,
@@ -353,7 +379,6 @@ static CURLcode retrycheck(struct OperationConfig *config,
     RETRY_FTP,
     RETRY_LAST /* not used */
   } retry = RETRY_NO;
-  long response = 0;
   if((CURLE_OPERATION_TIMEDOUT == result) ||
      (CURLE_COULDNT_RESOLVE_HOST == result) ||
      (CURLE_COULDNT_RESOLVE_PROXY == result) ||
@@ -378,6 +403,7 @@ static CURLcode retrycheck(struct OperationConfig *config,
     scheme = proto_token(scheme);
     if(scheme == proto_http || scheme == proto_https) {
       /* This was HTTP(S) */
+      long response = 0;
       curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &response);
 
       switch(response) {
@@ -404,6 +430,7 @@ static CURLcode retrycheck(struct OperationConfig *config,
   } /* if CURLE_OK */
   else if(result) {
     const char *scheme;
+    long response = 0;
 
     curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &response);
     curl_easy_getinfo(curl, CURLINFO_SCHEME, &scheme);
@@ -432,6 +459,7 @@ static CURLcode retrycheck(struct OperationConfig *config,
       ": HTTP error",
       ": FTP error"
     };
+    bool truncate = TRUE; /* truncate output file */
 
     if(RETRY_HTTP == retry) {
       curl_easy_getinfo(curl, CURLINFO_RETRY_AFTER, &retry_after);
@@ -482,7 +510,49 @@ static CURLcode retrycheck(struct OperationConfig *config,
 
     per->retry_remaining--;
 
-    if(outs->bytes && outs->filename && outs->stream) {
+    /* Skip truncation of outfile if auto-resume is enabled for download and
+       the partially received data is good. Only for HTTP GET requests in
+       limited circumstances. */
+    if(is_outfile_auto_resumable(config, per, result)) {
+      long response = 0;
+      struct curl_header *header = NULL;
+      const char *method = NULL, *scheme = NULL;
+
+      curl_easy_getinfo(curl, CURLINFO_EFFECTIVE_METHOD, &method);
+      curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &response);
+      curl_easy_getinfo(curl, CURLINFO_SCHEME, &scheme);
+      scheme = proto_token(scheme);
+
+      if((scheme == proto_http || scheme == proto_https) &&
+         method && !strcmp(method, "GET") &&
+         ((response == 206 && config->resume_from) ||
+          (response == 200 &&
+           !curl_easy_header(curl, "Accept-Ranges", 0,
+                             CURLH_HEADER, -1, &header) &&
+           !strcmp(header->value, "bytes")))) {
+
+        notef("Keeping %" CURL_FORMAT_CURL_OFF_T " bytes", outs->bytes);
+        if(fflush(outs->stream)) {
+          errorf("Failed to flush output file stream");
+          return CURLE_WRITE_ERROR;
+        }
+        if(outs->bytes >= CURL_OFF_T_MAX - outs->init) {
+          errorf("Exceeded maximum supported file size ("
+                 "%" CURL_FORMAT_CURL_OFF_T " + "
+                 "%" CURL_FORMAT_CURL_OFF_T ")",
+                 outs->init, outs->bytes);
+          return CURLE_WRITE_ERROR;
+        }
+        truncate = FALSE;
+        outs->init += outs->bytes;
+        outs->bytes = 0;
+        config->resume_from = outs->init;
+        curl_easy_setopt(curl, CURLOPT_RESUME_FROM_LARGE,
+                         config->resume_from);
+      }
+    }
+
+    if(truncate && outs->bytes && outs->filename && outs->stream) {
 #ifndef __MINGW32CE__
       struct_stat fileinfo;
 
index ce97740b9af059b8320468c10c869d7c13ad7d7e..99c287507f764bd8e6ce2ede7f67391c8fef26fd 100644 (file)
@@ -275,7 +275,7 @@ test3000 test3001 test3002 test3003 test3004 test3005 test3006 test3007 \
 test3008 test3009 test3010 test3011 test3012 test3013 test3014 test3015 \
 test3016 test3017 test3018 test3019 test3020 test3021 test3022 test3023 \
 test3024 test3025 test3026 test3027 test3028 test3029 test3030 test3031 \
-test3032 test3033 test3034 \
+test3032 test3033 test3034 test3035 \
 \
 test3100 test3101 test3102 test3103 test3104 test3105 \
 \
diff --git a/tests/data/test3035 b/tests/data/test3035
new file mode 100644 (file)
index 0000000..19fefa2
--- /dev/null
@@ -0,0 +1,127 @@
+<testcase>
+<info>
+<keywords>
+HTTP
+HTTP GET
+Content-Range
+Resume
+retry
+</keywords>
+</info>
+
+# Server-side
+<reply>
+
+#
+# the first chunk
+#
+<data nocheck="yes" nonewline="yes">
+HTTP/1.1 200 OK swsbounce swsclose\r
+Accept-Ranges: bytes\r
+Content-Type: text/html\r
+Content-Length: 26\r
+\r
+abcde
+</data>
+
+#
+# the second chunk
+#
+<data1 nocheck="yes" nonewline="yes">
+HTTP/1.1 206 Partial Content swsbounce swsclose\r
+Content-Type: text/html\r
+Content-Length: 21\r
+Content-Range: bytes 5-25/26\r
+\r
+fghijk
+</data1>
+
+#
+# some nonsense that curl should ignore as unresumable
+#
+<data2 nocheck="yes">
+HTTP/1.1 404 Not Found swsbounce\r
+Content-Type: text/html\r
+Content-Length: 5\r
+\r
+body
+</data2>
+
+#
+# some more nonsense that curl should ignore as unresumable
+#
+<data3 nocheck="yes">
+HTTP/1.1 200 OK swsbounce\r
+Accept-Ranges: bytes\r
+Content-Type: text/html\r
+Content-Length: 30\r
+\r
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+</data3>
+
+#
+# the third chunk
+#
+<data4 nocheck="yes" nonewline="yes">
+HTTP/1.1 206 Partial Content swsbounce swsclose
+Content-Type: text/html
+Content-Length: 15
+Content-Range: bytes 11-25/26
+
+lmnopqrstuvwxyz
+</data4>
+</reply>
+
+# Client-side
+<client>
+<server>
+http
+</server>
+<name>
+HTTP retry failed download with keep data and auto-resume
+</name>
+<command option="no-output,no-include">
+--continue-at - --retry 4 --retry-delay 1 --retry-all-errors -o %LOGDIR/outfile%TESTNUMBER http://%HOSTIP:%HTTPPORT/%TESTNUMBER
+</command>
+</client>
+
+# Verify data after the test has been "shot"
+<verify>
+<protocol>
+GET /%TESTNUMBER HTTP/1.1\r
+Host: %HOSTIP:%HTTPPORT\r
+User-Agent: curl/%VERSION\r
+Accept: */*\r
+\r
+GET /%TESTNUMBER HTTP/1.1\r
+Host: %HOSTIP:%HTTPPORT\r
+Range: bytes=5-\r
+User-Agent: curl/%VERSION\r
+Accept: */*\r
+\r
+GET /%TESTNUMBER HTTP/1.1\r
+Host: %HOSTIP:%HTTPPORT\r
+Range: bytes=11-\r
+User-Agent: curl/%VERSION\r
+Accept: */*\r
+\r
+GET /%TESTNUMBER HTTP/1.1\r
+Host: %HOSTIP:%HTTPPORT\r
+Range: bytes=11-\r
+User-Agent: curl/%VERSION\r
+Accept: */*\r
+\r
+GET /%TESTNUMBER HTTP/1.1\r
+Host: %HOSTIP:%HTTPPORT\r
+Range: bytes=11-\r
+User-Agent: curl/%VERSION\r
+Accept: */*\r
+\r
+</protocol>
+
+<file1 name="%LOGDIR/outfile%TESTNUMBER" nonewline="yes">
+abcdefghijklmnopqrstuvwxyz
+</file1>
+
+</verify>
+</testcase>