From: Stefan Eissing <stefan@eissing.org>
Date: Tue, 21 Nov 2023 10:24:18 +0000 (+0100)
Subject: transfer: cleanup done+excess handling
X-Git-Tag: curl-8_5_0~43
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=5b65e7d1aed1770c84f9a179dbf96591ea149f46;p=thirdparty%2Fcurl.git

transfer: cleanup done+excess handling

- add `SingleRequest->download_done` as indicator that
  all download bytes have been received
- remove `stop_reading` bool from readwrite functions
- move excess body handling into client download writer

Closes #12371
---

diff --git a/lib/c-hyper.c b/lib/c-hyper.c
index e3f7d6d444..787d6bbdbe 100644
--- a/lib/c-hyper.c
+++ b/lib/c-hyper.c
@@ -882,6 +882,7 @@ CURLcode Curl_http(struct Curl_easy *data, bool *done)
      may be parts of the request that is not yet sent, since we can deal with
      the rest of the request in the PERFORM phase. */
   *done = TRUE;
+  Curl_client_cleanup(data);
 
   infof(data, "Time for the Hyper dance");
   memset(h, 0, sizeof(struct hyptransfer));
@@ -892,6 +893,8 @@ CURLcode Curl_http(struct Curl_easy *data, bool *done)
 
   Curl_http_method(data, conn, &method, &httpreq);
 
+  DEBUGASSERT(data->req.bytecount ==  0);
+
   /* setup the authentication headers */
   {
     char *pq = NULL;
diff --git a/lib/http.c b/lib/http.c
index 091a056023..45748dd293 100644
--- a/lib/http.c
+++ b/lib/http.c
@@ -3996,8 +3996,7 @@ CURLcode Curl_bump_headersize(struct Curl_easy *data,
 CURLcode Curl_http_readwrite_headers(struct Curl_easy *data,
                                      struct connectdata *conn,
                                      const char *buf, size_t blen,
-                                     size_t *pconsumed,
-                                     bool *stop_reading)
+                                     size_t *pconsumed)
 {
   CURLcode result;
   struct SingleRequest *k = &data->req;
@@ -4005,7 +4004,6 @@ CURLcode Curl_http_readwrite_headers(struct Curl_easy *data,
   char *end_ptr;
 
   /* header line within buffer loop */
-  *stop_reading = FALSE;
   *pconsumed = 0;
   do {
     size_t line_length;
@@ -4358,7 +4356,7 @@ CURLcode Curl_http_readwrite_headers(struct Curl_easy *data,
          * out and return home.
          */
         if(data->req.no_body)
-          *stop_reading = TRUE;
+          k->download_done = TRUE;
 #ifndef CURL_DISABLE_RTSP
         else if((conn->handler->protocol & CURLPROTO_RTSP) &&
                 (data->set.rtspreq == RTSPREQ_DESCRIBE) &&
@@ -4367,7 +4365,7 @@ CURLcode Curl_http_readwrite_headers(struct Curl_easy *data,
              absent, a length 0 must be assumed.  It will prevent libcurl from
              hanging on DESCRIBE request that got refused for whatever
              reason */
-          *stop_reading = TRUE;
+          k->download_done = TRUE;
 #endif
 
         /* If max download size is *zero* (nothing) we already have
@@ -4378,12 +4376,7 @@ CURLcode Curl_http_readwrite_headers(struct Curl_easy *data,
         if(0 == k->maxdownload
            && !Curl_conn_is_http2(data, conn, FIRSTSOCKET)
            && !Curl_conn_is_http3(data, conn, FIRSTSOCKET))
-          *stop_reading = TRUE;
-
-        if(*stop_reading) {
-          /* we make sure that this socket isn't read more now */
-          k->keepon &= ~KEEP_RECV;
-        }
+          k->download_done = TRUE;
 
         Curl_debug(data, CURLINFO_HEADER_IN,
                    Curl_dyn_ptr(&data->state.headerb),
diff --git a/lib/http.h b/lib/http.h
index 0f5218bdee..56b091301f 100644
--- a/lib/http.h
+++ b/lib/http.h
@@ -228,8 +228,7 @@ CURLcode Curl_http_size(struct Curl_easy *data);
 CURLcode Curl_http_readwrite_headers(struct Curl_easy *data,
                                      struct connectdata *conn,
                                      const char *buf, size_t blen,
-                                     size_t *pconsumed,
-                                     bool *stop_reading);
+                                     size_t *pconsumed);
 
 /**
  * Curl_http_output_auth() setups the authentication headers for the
diff --git a/lib/rtsp.c b/lib/rtsp.c
index bcadfd2dda..e673bb8dc0 100644
--- a/lib/rtsp.c
+++ b/lib/rtsp.c
@@ -793,11 +793,9 @@ static CURLcode rtsp_rtp_readwrite(struct Curl_easy *data,
 
   /* we want to parse headers, do so */
   if(data->req.header && blen) {
-    bool stop_reading;
-
     rtspc->in_header = TRUE;
     result = Curl_http_readwrite_headers(data, conn, buf, blen,
-                                         &consumed, &stop_reading);
+                                         &consumed);
     if(result)
       goto out;
 
diff --git a/lib/sendf.c b/lib/sendf.c
index ac5f9197d0..f39ffe91ab 100644
--- a/lib/sendf.c
+++ b/lib/sendf.c
@@ -467,7 +467,8 @@ void Curl_client_cleanup(struct Curl_easy *data)
     Curl_dyn_free(&data->state.tempwrite[i].b);
   }
   data->state.tempcount = 0;
-
+  data->req.bytecount = 0;
+  data->req.headerline = 0;
 }
 
 /* Write data using an unencoding writer stack. "nbytes" is not
@@ -525,6 +526,28 @@ static const struct Curl_cwtype cw_client = {
   sizeof(struct Curl_cwriter)
 };
 
+static size_t get_max_body_write_len(struct Curl_easy *data, curl_off_t limit)
+{
+  if(limit != -1) {
+    /* How much more are we allowed to write? */
+    curl_off_t remain_diff;
+    remain_diff = limit - data->req.bytecount;
+    if(remain_diff < 0) {
+      /* already written too much! */
+      return 0;
+    }
+#if SIZEOF_CURL_OFF_T > SIZEOF_SIZE_T
+    else if(remain_diff > SSIZE_T_MAX) {
+      return SIZE_T_MAX;
+    }
+#endif
+    else {
+      return (size_t)remain_diff;
+    }
+  }
+  return SIZE_T_MAX;
+}
+
 /* Download client writer in phase CURL_CW_PROTOCOL that
  * sees the "real" download body data. */
 static CURLcode cw_download_write(struct Curl_easy *data,
@@ -532,7 +555,8 @@ static CURLcode cw_download_write(struct Curl_easy *data,
                                   const char *buf, size_t nbytes)
 {
   CURLcode result;
-  size_t nwrite;
+  size_t nwrite, excess_len = 0;
+  const char *excess_data = NULL;
 
   if(!(type & CLIENTWRITE_BODY)) {
     if((type & CLIENTWRITE_CONNECT) && data->set.suppress_connect_headers)
@@ -541,22 +565,28 @@ static CURLcode cw_download_write(struct Curl_easy *data,
   }
 
   nwrite = nbytes;
-  data->req.bytecount += nbytes;
-  ++data->req.bodywrites;
-  /* Enforce `max_filesize` also for downloads where we ignore the body.
-   * Also, write body data up to the max size. This ensures that we
-   * always produce the same result, even when buffers vary due to
-   * connection timings. test457 fails in CI randomly otherwise. */
-  if(data->set.max_filesize &&
-     (data->req.bytecount > data->set.max_filesize)) {
-    curl_off_t nexcess;
-    failf(data, "Exceeded the maximum allowed file size "
-          "(%" CURL_FORMAT_CURL_OFF_T ")",
-          data->set.max_filesize);
-    nexcess = data->req.bytecount - data->set.max_filesize;
-    nwrite = (nexcess >= (curl_off_t)nbytes)? 0 : (nbytes - (size_t)nexcess);
+  if(-1 != data->req.maxdownload) {
+    size_t wmax = get_max_body_write_len(data, data->req.maxdownload);
+    if(nwrite > wmax) {
+      excess_len = nbytes - wmax;
+      nwrite = wmax;
+      excess_data = buf + nwrite;
+    }
+
+    if(nwrite == wmax) {
+      data->req.download_done = TRUE;
+    }
+  }
+
+  if(data->set.max_filesize) {
+    size_t wmax = get_max_body_write_len(data, data->set.max_filesize);
+    if(nwrite > wmax) {
+      nwrite = wmax;
+    }
   }
 
+  data->req.bytecount += nwrite;
+  ++data->req.bodywrites;
   if(!data->req.ignorebody && nwrite) {
     result = Curl_cwriter_write(data, writer->next, type, buf, nwrite);
     if(result)
@@ -566,7 +596,44 @@ static CURLcode cw_download_write(struct Curl_easy *data,
   if(result)
     return result;
 
-  return (nwrite == nbytes)? CURLE_OK : CURLE_FILESIZE_EXCEEDED;
+  if(excess_len) {
+    if(data->conn->handler->readwrite) {
+      /* RTSP hack moved from tranfer loop to here */
+      bool readmore = FALSE; /* indicates data is incomplete, need more */
+      size_t consumed = 0;
+      result = data->conn->handler->readwrite(data, data->conn,
+                                              excess_data, excess_len,
+                                              &consumed, &readmore);
+      if(result)
+        return result;
+      DEBUGASSERT(consumed <= excess_len);
+      excess_len -= consumed;
+      if(readmore) {
+        data->req.download_done = FALSE;
+        data->req.keepon |= KEEP_RECV; /* we're not done reading */
+      }
+    }
+    if(excess_len && !data->req.ignorebody) {
+      infof(data,
+            "Excess found writing body:"
+            " excess = %zu"
+            ", size = %" CURL_FORMAT_CURL_OFF_T
+            ", maxdownload = %" CURL_FORMAT_CURL_OFF_T
+            ", bytecount = %" CURL_FORMAT_CURL_OFF_T,
+            excess_len, data->req.size, data->req.maxdownload,
+            data->req.bytecount);
+      connclose(data->conn, "excess found in a read");
+    }
+  }
+  else if(nwrite < nbytes) {
+    failf(data, "Exceeded the maximum allowed file size "
+          "(%" CURL_FORMAT_CURL_OFF_T ") with %"
+          CURL_FORMAT_CURL_OFF_T " bytes",
+          data->set.max_filesize, data->req.bytecount);
+    return CURLE_FILESIZE_EXCEEDED;
+  }
+
+  return CURLE_OK;
 }
 
 static const struct Curl_cwtype cw_download = {
diff --git a/lib/transfer.c b/lib/transfer.c
index cfb17f7442..e7158aa3ab 100644
--- a/lib/transfer.c
+++ b/lib/transfer.c
@@ -413,28 +413,6 @@ bool Curl_meets_timecondition(struct Curl_easy *data, time_t timeofdoc)
   return TRUE;
 }
 
-static size_t get_max_body_write_len(struct Curl_easy *data)
-{
-  if(data->req.maxdownload != -1) {
-    /* How much more are we allowed to write? */
-    curl_off_t remain_diff;
-    remain_diff = data->req.maxdownload - data->req.bytecount;
-    if(remain_diff < 0) {
-      /* already written too much! */
-      return 0;
-    }
-#if SIZEOF_CURL_OFF_T > SIZEOF_SIZE_T
-    else if(remain_diff > SSIZE_T_MAX) {
-      return SIZE_T_MAX;
-    }
-#endif
-    else {
-      return (size_t)remain_diff;
-    }
-  }
-  return SIZE_T_MAX;
-}
-
 /*
  * Go ahead and do a read if we have a readable socket or if
  * the stream was rewound (in which case we have data in a
@@ -450,8 +428,8 @@ static CURLcode readwrite_data(struct Curl_easy *data,
                                bool *comeback)
 {
   CURLcode result = CURLE_OK;
-  char *buf, *excess_data;
-  size_t blen, hd_data_len, excess_len;
+  char *buf;
+  size_t blen;
   size_t consumed;
   int maxloops = 100;
   curl_off_t max_recv = data->set.max_recv_speed?
@@ -479,8 +457,6 @@ static CURLcode readwrite_data(struct Curl_easy *data,
      * all data read into it. */
     buf = data->state.buffer;
     blen = 0;
-    excess_data = NULL;
-    excess_len = 0;
 
     /* If we are reading BODY data and the connection does NOT handle EOF
      * and we know the size of the BODY data, limit the read amount */
@@ -553,17 +529,28 @@ static CURLcode readwrite_data(struct Curl_easy *data,
         break;
       buf += consumed;
       blen -= consumed;
+      if(k->download_done) {
+        /* We've stopped dealing with input, get out of the do-while loop */
+        if(blen > 0) {
+          infof(data,
+                "Excess found:"
+                " excess = %zu"
+                " url = %s (zero-length body)",
+                blen, data->state.up.path);
+        }
+
+        /* we make sure that this socket isn't read more now */
+        k->keepon &= ~KEEP_RECV;
+        break;
+      }
     }
 
 #ifndef CURL_DISABLE_HTTP
     /* Since this is a two-state thing, we check if we are parsing
        headers at the moment or not. */
     if(k->header) {
-      bool stop_reading = FALSE;
-
       consumed = 0;
-      result = Curl_http_readwrite_headers(data, conn, buf, blen,
-                                           &consumed, &stop_reading);
+      result = Curl_http_readwrite_headers(data, conn, buf, blen, &consumed);
       if(result)
         goto out;
       buf += consumed;
@@ -583,7 +570,7 @@ static CURLcode readwrite_data(struct Curl_easy *data,
         blen -= consumed;
       }
 
-      if(stop_reading) {
+      if(k->download_done) {
         /* We've stopped dealing with input, get out of the do-while loop */
         if(blen > 0) {
           infof(data,
@@ -593,6 +580,8 @@ static CURLcode readwrite_data(struct Curl_easy *data,
                 blen, data->state.up.path);
         }
 
+        /* we make sure that this socket isn't read more now */
+        k->keepon &= ~KEEP_RECV;
         break;
       }
     }
@@ -671,56 +660,6 @@ static CURLcode readwrite_data(struct Curl_easy *data,
       }
 #endif   /* CURL_DISABLE_HTTP */
 
-      /* If we know how much to download, have we reached the last bytes? */
-      if(-1 != k->maxdownload) {
-        size_t max_write_len = get_max_body_write_len(data);
-
-        /* Account for body content stillin the header buffer */
-        hd_data_len = k->badheader? Curl_dyn_len(&data->state.headerb) : 0;
-        if(blen + hd_data_len >= max_write_len) {
-          /* We have all download bytes, but do we have too many? */
-          excess_len = (blen + hd_data_len) - max_write_len;
-          if(excess_len > 0 && !k->ignorebody) {
-            infof(data,
-                  "Excess found in a read:"
-                  " excess = %zu"
-                  ", size = %" CURL_FORMAT_CURL_OFF_T
-                  ", maxdownload = %" CURL_FORMAT_CURL_OFF_T
-                  ", bytecount = %" CURL_FORMAT_CURL_OFF_T,
-                  excess_len, k->size, k->maxdownload, k->bytecount);
-            connclose(conn, "excess found in a read");
-          }
-
-          if(!excess_len) {
-            /* no excess bytes, perfect! */
-            excess_data = NULL;
-          }
-          else if(hd_data_len >= excess_len) {
-            /* uh oh, header body data already exceeds, the whole `buf`
-             * is excess data */
-            excess_len = blen;
-            excess_data = buf;
-            blen = 0;
-          }
-          else {
-            /* `buf` bytes exceed, shorten and set `excess_data` */
-            excess_len -= hd_data_len;
-            DEBUGASSERT(blen >= excess_len);
-            blen -= excess_len;
-            excess_data = buf + blen;
-          }
-
-          /* HTTP/3 over QUIC should keep reading until QUIC connection
-             is closed.  In contrast to HTTP/2 which can stop reading
-             from TCP connection, HTTP/3 over QUIC needs ACK from server
-             to ensure stream closure.  It should keep reading. */
-          if(!is_http3) {
-            k->keepon &= ~KEEP_RECV; /* we're done reading */
-          }
-          k->download_done = TRUE;
-        }
-      }
-
       max_recv -= blen;
 
       if(!k->chunk && (blen || k->badheader || is_empty_data)) {
@@ -758,22 +697,15 @@ static CURLcode readwrite_data(struct Curl_easy *data,
           goto out;
       }
 
+      if(k->download_done && !is_http3) {
+        /* HTTP/3 over QUIC should keep reading until QUIC connection
+           is closed.  In contrast to HTTP/2 which can stop reading
+           from TCP connection, HTTP/3 over QUIC needs ACK from server
+           to ensure stream closure.  It should keep reading. */
+        k->keepon &= ~KEEP_RECV; /* we're done reading */
+      }
     } /* if(!header and data to read) */
 
-    if(conn->handler->readwrite && excess_data) {
-      bool readmore = FALSE; /* indicates data is incomplete, need more */
-
-      consumed = 0;
-      result = conn->handler->readwrite(data, conn, excess_data, excess_len,
-                                        &consumed, &readmore);
-      if(result)
-        goto out;
-
-      if(readmore)
-        k->keepon |= KEEP_RECV; /* we're not done reading */
-      break;
-    }
-
     if(is_empty_data) {
       /* if we received nothing, the server closed the connection and we
          are done */
diff --git a/lib/url.c b/lib/url.c
index c8c99cb6c7..b81785fe2e 100644
--- a/lib/url.c
+++ b/lib/url.c
@@ -3954,6 +3954,7 @@ CURLcode Curl_init_do(struct Curl_easy *data, struct connectdata *conn)
   k->bytecount = 0;
   k->ignorebody = FALSE;
 
+  Curl_client_cleanup(data);
   Curl_speedinit(data);
   Curl_pgrsSetUploadCounter(data, 0);
   Curl_pgrsSetDownloadCounter(data, 0);