]> git.ipfire.org Git - thirdparty/apache/httpd.git/commitdiff
Backport to 2.4:
authorGraham Leggett <minfrin@apache.org>
Sat, 16 Jan 2021 14:08:29 +0000 (14:08 +0000)
committerGraham Leggett <minfrin@apache.org>
Sat, 16 Jan 2021 14:08:29 +0000 (14:08 +0000)
  *) core: output filtering improvements (ease following patches, align trunk/2.4)
     trunk patch: https://svn.apache.org/r1836032
                  https://svn.apache.org/r1884295
                  https://svn.apache.org/r1884296
                  https://svn.apache.org/r1884304
                  https://svn.apache.org/r1836237
                  https://svn.apache.org/r1836258
                  https://svn.apache.org/r1836354
                  https://svn.apache.org/r1843939
     2.4.x patch: http://people.apache.org/~ylavic/patches/2.4.x-core_output_filtering-2on5.patch
                  https://github.com/apache/httpd/pull/156
     +1: ylavic, covener, minfrin
     ylavic: These core output filter changes are needed for the proxy
             tunneling loop to work properly/non-blocking (PR 158 below). They
             do not include the major filter setaside/reinstate changes from
             trunk, reluing on existing 2.4 c->data_in_{input,output}_filter
             flags only.

git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/branches/2.4.x@1885573 13f79535-47bb-0310-9956-ffa450edef68

CHANGES
STATUS
docs/manual/mod/core.xml
include/ap_mmn.h
include/http_core.h
modules/ssl/ssl_engine_io.c
server/core.c
server/core_filters.c

diff --git a/CHANGES b/CHANGES
index c11fef28f382c93cf43652b26604f14e966b74c6..b1b71e96b68d729277652d7baf1d957c788e7c18 100644 (file)
--- a/CHANGES
+++ b/CHANGES
@@ -1,6 +1,12 @@
                                                          -*- coding: utf-8 -*-
 Changes with Apache 2.4.47
 
+  *) core: add ReadBufferSize, FlushMaxThreshold and FlushMaxPipelined
+     directives.  [Yann Ylavic]
+
+  *) core: Ensure that aborted connections are logged as such. PR 62823
+     [Arnaud Grandville <contact@grandville.net>]
+
   *) http: Allow unknown response status' lines returned in the form of
      "HTTP/x.x xxx Status xxx".  [Yann Ylavic]
 
diff --git a/STATUS b/STATUS
index 0887c966e84200ba5d25712c8f80006e0d3c387a..1e42b69eff187dc3c4f410bbdfe419989a197d8b 100644 (file)
--- a/STATUS
+++ b/STATUS
@@ -138,23 +138,6 @@ RELEASE SHOWSTOPPERS:
 PATCHES ACCEPTED TO BACKPORT FROM TRUNK:
   [ start all new proposals below, under PATCHES PROPOSED. ]
 
-  *) core: output filtering improvements (ease following patches, align trunk/2.4)
-     trunk patch: https://svn.apache.org/r1836032
-                  https://svn.apache.org/r1884295
-                  https://svn.apache.org/r1884296
-                  https://svn.apache.org/r1884304
-                  https://svn.apache.org/r1836237
-                  https://svn.apache.org/r1836258
-                  https://svn.apache.org/r1836354
-                  https://svn.apache.org/r1843939
-     2.4.x patch: http://people.apache.org/~ylavic/patches/2.4.x-core_output_filtering-2on5.patch
-                  https://github.com/apache/httpd/pull/156
-     +1: ylavic, covener, minfrin
-     ylavic: These core output filter changes are needed for the proxy
-             tunneling loop to work properly/non-blocking (PR 158 below). They
-             do not include the major filter setaside/reinstate changes from
-             trunk, reluing on existing 2.4 c->data_in_{input,output}_filter
-             flags only.
 
 
 PATCHES PROPOSED TO BACKPORT FROM TRUNK:
index 80ac9ce727fa270ae76ef9c428e93ab3036a52b1..62737f602443b083d1a57bf07cf072b96f2b4b08 100644 (file)
@@ -2012,6 +2012,52 @@ filenames</description>
     different sections are combined when a request is received</seealso>
 </directivesynopsis>
 
+<directivesynopsis>
+<name>FlushMaxPipelined</name>
+<description>Maximum number of pipelined responses above which they are flushed
+to the network</description>
+<syntax>FlushMaxPipelined <var>number</var></syntax>
+<default>FlushMaxPipelined 5</default>
+<contextlist><context>server config</context><context>virtual host</context>
+</contextlist>
+<compatibility>2.4.47 and later</compatibility>
+
+<usage>
+    <p>This directive allows to configure the maximum number of pipelined
+    responses, which remain pending so long as pipelined request are received.
+    When the limit is reached, reponses are forcibly flushed to the network in
+    blocking mode, until passing under the limit again.</p>
+
+    <p><directive>FlushMaxPipelined</directive> helps constraining memory
+    usage. When set to <var>0</var> pipelining is disabled, when set to
+    <var>-1</var> there is no limit (<directive>FlushMaxThreshold</directive>
+    still applies).</p>
+</usage>
+</directivesynopsis>
+
+<directivesynopsis>
+<name>FlushMaxThreshold</name>
+<description>Threshold above which pending data are flushed to the
+network</description>
+<syntax>FlushMaxThreshold<var>number-of-bytes</var></syntax>
+<default>FlushMaxThreshold 65536</default>
+<contextlist><context>server config</context><context>virtual host</context>
+</contextlist>
+<compatibility>2.4.47 and later</compatibility>
+
+<usage>
+    <p>This directive allows to configure the threshold for pending output
+    data (in bytes). When the limit is reached, data are forcibly flushed to
+    the network in blocking mode, until passing under the limit again.</p>
+
+    <p><directive>FlushMaxThreshold</directive> helps constraining memory
+    usage. When set to <var>0</var> or a too small value there are actually
+    no pending data, but for threaded MPMs there can be more threads busy
+    waiting for the network thus less ones available to handle the other
+    simultaneous connections.</p>
+</usage>
+</directivesynopsis>
+
 <directivesynopsis>
 <name>ForceType</name>
 <description>Forces all matching files to be served with the specified
@@ -4057,6 +4103,26 @@ Protocols h2 http/1.1
 </directivesynopsis>
 
 
+<directivesynopsis>
+<name>ReadBufferSize</name>
+<description>Size of the buffers used to read data</description>
+<syntax>ReadBufferSize <var>bytes</var></syntax>
+<default>ReadBufferSize 8192</default>
+<contextlist><context>server config</context><context>virtual host</context>
+<context>directory</context></contextlist>
+<compatibility>2.4.27 and later</compatibility>
+
+<usage>
+    <p>This directive allows to configure the size (in bytes) of the memory
+    buffer used to read data from the network or files.</p>
+
+    <p>A larger buffer can increase peformances with larger data, but consumes
+    more memory per connection. The minimum configurable size is
+    <var>1024</var>.</p>
+</usage>
+</directivesynopsis>
+
 <directivesynopsis>
     <name>RegexDefaultOptions</name>
     <description>Allow to configure global/default options for regexes</description>
index af464772f3f9b68a299b4804bedda5b5a4b2469d..aec47df5e6c3430c2c35a426610715d951741841 100644 (file)
  * 20120211.94 (2.4.47-dev) Add ap_proxy_define_match_worker()
  * 20120211.95 (2.4.47-dev) Add proxy check_trans hook
  * 20120211.96 (2.4.47-dev) Add ap_get_status_line_ex()
+ * 20120211.97 (2.4.47-dev) Add read_buf_size member to core_dir_config,
+ *                          flush_max_threshold and flush_max_pipelined to
+ *                          core_server_config, and ap_get_read_buf_size().
  */
 
 #define MODULE_MAGIC_COOKIE 0x41503234UL /* "AP24" */
 #ifndef MODULE_MAGIC_NUMBER_MAJOR
 #define MODULE_MAGIC_NUMBER_MAJOR 20120211
 #endif
-#define MODULE_MAGIC_NUMBER_MINOR 96                  /* 0...n */
+#define MODULE_MAGIC_NUMBER_MINOR 97                  /* 0...n */
 
 /**
  * Determine if the server's current MODULE_MAGIC_NUMBER is at least a
index 8e1098822445c722231815944b57e155da1c0b62..c1612c18a127ca5e3859c4bf07a3dabe6d243842 100644 (file)
@@ -253,6 +253,13 @@ AP_DECLARE(const char *) ap_get_server_name_for_url(request_rec *r);
  */
 AP_DECLARE(apr_port_t) ap_get_server_port(const request_rec *r);
 
+/**
+ * Get the size of read buffers
+ * @param r The current request
+ * @return The read buffers size
+ */
+AP_DECLARE(apr_size_t) ap_get_read_buf_size(const request_rec *r);
+
 /**
  * Return the limit on bytes in request msg body
  * @param r The current request
@@ -672,6 +679,8 @@ typedef struct {
 
     /** Table of rules for building CGI variables, NULL if none configured */
     apr_hash_t *cgi_var_rules;
+
+    apr_size_t read_buf_size;
 } core_dir_config;
 
 /* macro to implement off by default behaviour */
@@ -741,6 +750,9 @@ typedef struct {
 #define AP_HTTP_METHODS_REGISTERED    2
     char http_methods;
     unsigned int merge_slashes;
+    apr_size_t   flush_max_threshold;
+    apr_int32_t  flush_max_pipelined;
 } core_server_config;
 
 /* for AddOutputFiltersByType in core.c */
index 2a01549ddaf500f21ab52daa1932cee8672dedc7..18a1f29d332b584a646f30064f692dc93f6ac112 100644 (file)
@@ -152,6 +152,9 @@ static int bio_filter_out_flush(BIO *bio)
     bio_filter_out_ctx_t *outctx = (bio_filter_out_ctx_t *)BIO_get_data(bio);
     apr_bucket *e;
 
+    ap_log_cerror(APLOG_MARK, APLOG_TRACE6, 0, outctx->c,
+                  "bio_filter_out_write: flush");
+
     AP_DEBUG_ASSERT(APR_BRIGADE_EMPTY(outctx->bb));
 
     e = apr_bucket_flush_create(outctx->bb->bucket_alloc);
@@ -208,6 +211,9 @@ static int bio_filter_out_write(BIO *bio, const char *in, int inl)
         return -1;
     }
 
+    ap_log_cerror(APLOG_MARK, APLOG_TRACE6, 0, outctx->c,
+                  "bio_filter_out_write: %i bytes", inl);
+
     /* Use a transient bucket for the output data - any downstream
      * filter must setaside if necessary. */
     e = apr_bucket_transient_create(in, inl, outctx->bb->bucket_alloc);
@@ -846,6 +852,9 @@ static apr_status_t ssl_filter_write(ap_filter_t *f,
         return APR_EGENERAL;
     }
 
+    ap_log_cerror(APLOG_MARK, APLOG_TRACE6, 0, f->c,
+                  "ssl_filter_write: %"APR_SIZE_T_FMT" bytes", len);
+
     /* We rely on SSL_get_error() after the write, which requires an empty error
      * queue before the write in order to work properly.
      */
@@ -1633,8 +1642,11 @@ static apr_status_t ssl_io_filter_coalesce(ap_filter_t *f,
              && (ctx == NULL
                  || bytes + ctx->bytes + e->length < COALESCE_BYTES);
          e = APR_BUCKET_NEXT(e)) {
-        if (e->length) count++; /* don't count zero-length buckets */
-        bytes += e->length;
+        /* don't count zero-length buckets */
+        if (e->length) {
+            bytes += e->length;
+            count++;
+        }
     }
     upto = e;
 
index 79b5c2954686d954e9bb2ae03ddb690b3beaf87a..91ae7c2f31c2cbf04acc4b3dabd30ea66ae457c3 100644 (file)
 #include "apr_thread_proc.h"    /* for RLIMIT stuff */
 #include "apr_random.h"
 
+#include "apr_version.h"
+#if APR_MAJOR_VERSION < 2
+#include "apu_version.h"
+#endif
+
 #define APR_WANT_IOVEC
 #define APR_WANT_STRFUNC
 #define APR_WANT_MEMFUNC
@@ -81,6 +86,9 @@
 #define AP_CONTENT_MD5_ON    1
 #define AP_CONTENT_MD5_UNSET 2
 
+#define AP_FLUSH_MAX_THRESHOLD 65535
+#define AP_FLUSH_MAX_PIPELINED 4
+
 APR_HOOK_STRUCT(
     APR_HOOK_LINK(get_mgmt_items)
     APR_HOOK_LINK(insert_network_bucket)
@@ -394,6 +402,13 @@ static void *merge_core_dir_configs(apr_pool_t *a, void *basev, void *newv)
     if (new->enable_sendfile != ENABLE_SENDFILE_UNSET) {
         conf->enable_sendfile = new->enable_sendfile;
     }
+    if (new->read_buf_size) {
+        conf->read_buf_size = new->read_buf_size;
+    }
+    else {
+        conf->read_buf_size = base->read_buf_size;
+    }
 
     conf->allow_encoded_slashes = new->allow_encoded_slashes;
     conf->decode_encoded_slashes = new->decode_encoded_slashes;
@@ -466,14 +481,13 @@ static void *create_core_server_config(apr_pool_t *a, server_rec *s)
         apr_table_setn(conf->accf_map, "http", "data");
         apr_table_setn(conf->accf_map, "https", "data");
 #endif
+
+        conf->flush_max_threshold = AP_FLUSH_MAX_THRESHOLD;
+        conf->flush_max_pipelined = AP_FLUSH_MAX_PIPELINED;
     }
-    /* pcalloc'ed - we have NULL's/0's
-    else ** is_virtual ** {
-        conf->ap_document_root = NULL;
-        conf->access_name = NULL;
-        conf->accf_map = NULL;
+    else {
+        conf->flush_max_pipelined = -1;
     }
-     */
 
     /* initialization, no special case for global context */
 
@@ -562,6 +576,14 @@ static void *merge_core_server_configs(apr_pool_t *p, void *basev, void *virtv)
                                        virt->protocols_honor_order);
     AP_CORE_MERGE_FLAG(merge_slashes, conf, base, virt);
     
+
+    conf->flush_max_threshold = (virt->flush_max_threshold)
+                                  ? virt->flush_max_threshold
+                                  : base->flush_max_threshold;
+    conf->flush_max_pipelined = (virt->flush_max_pipelined >= 0)
+                                  ? virt->flush_max_pipelined
+                                  : base->flush_max_pipelined;
+
     return conf;
 }
 
@@ -1223,6 +1245,13 @@ AP_DECLARE(apr_off_t) ap_get_limit_req_body(const request_rec *r)
     return d->limit_req_body;
 }
 
+AP_DECLARE(apr_size_t) ap_get_read_buf_size(const request_rec *r)
+{
+    core_dir_config *d = ap_get_core_module_config(r->per_dir_config);
+
+    return d->read_buf_size ? d->read_buf_size : AP_IOBUFSIZE;
+}
+
 
 /*****************************************************************
  *
@@ -2231,6 +2260,64 @@ static const char *set_enable_sendfile(cmd_parms *cmd, void *d_,
     return NULL;
 }
 
+static const char *set_read_buf_size(cmd_parms *cmd, void *d_,
+                                     const char *arg)
+{
+    core_dir_config *d = d_;
+    apr_off_t size;
+    char *end;
+
+    if (apr_strtoff(&size, arg, &end, 10)
+            || *end || size < 0 || size > APR_UINT32_MAX)
+        return apr_pstrcat(cmd->pool,
+                           "parameter must be a number between 0 and "
+                           APR_STRINGIFY(APR_UINT32_MAX) "): ",
+                           arg, NULL);
+
+    d->read_buf_size = (apr_size_t)size;
+
+    return NULL;
+}
+
+static const char *set_flush_max_threshold(cmd_parms *cmd, void *d_,
+                                           const char *arg)
+{
+    core_server_config *conf =
+        ap_get_core_module_config(cmd->server->module_config);
+    apr_off_t size;
+    char *end;
+
+    if (apr_strtoff(&size, arg, &end, 10)
+            || *end || size < 0 || size > APR_UINT32_MAX)
+        return apr_pstrcat(cmd->pool,
+                           "parameter must be a number between 0 and "
+                           APR_STRINGIFY(APR_UINT32_MAX) "): ",
+                           arg, NULL);
+
+    conf->flush_max_threshold = (apr_size_t)size;
+
+    return NULL;
+}
+
+static const char *set_flush_max_pipelined(cmd_parms *cmd, void *d_,
+                                           const char *arg)
+{
+    core_server_config *conf =
+        ap_get_core_module_config(cmd->server->module_config);
+    apr_off_t num;
+    char *end;
+
+    if (apr_strtoff(&num, arg, &end, 10)
+            || *end || num < -1 || num > APR_INT32_MAX)
+        return apr_pstrcat(cmd->pool,
+                           "parameter must be a number between -1 and "
+                           APR_STRINGIFY(APR_INT32_MAX) ": ",
+                           arg, NULL);
+
+    conf->flush_max_pipelined = (apr_int32_t)num;
+
+    return NULL;
+}
 
 /*
  * Report a missing-'>' syntax error.
@@ -4398,6 +4485,13 @@ AP_INIT_TAKE1("EnableMMAP", set_enable_mmap, NULL, OR_FILEINFO,
   "Controls whether memory-mapping may be used to read files"),
 AP_INIT_TAKE1("EnableSendfile", set_enable_sendfile, NULL, OR_FILEINFO,
   "Controls whether sendfile may be used to transmit files"),
+AP_INIT_TAKE1("ReadBufferSize", set_read_buf_size, NULL, ACCESS_CONF|RSRC_CONF,
+  "Size (in bytes) of the memory buffers used to read data"),
+AP_INIT_TAKE1("FlushMaxThreshold", set_flush_max_threshold, NULL, RSRC_CONF,
+  "Maximum threshold above which pending data are flushed to the network"),
+AP_INIT_TAKE1("FlushMaxPipelined", set_flush_max_pipelined, NULL, RSRC_CONF,
+  "Maximum number of pipelined responses (pending) above which they are "
+  "flushed to the network"),
 
 /* Old server config file commands */
 
@@ -4842,6 +4936,11 @@ static int default_handler(request_rec *r)
             if (d->enable_mmap == ENABLE_MMAP_OFF) {
                 (void)apr_bucket_file_enable_mmap(e, 0);
             }
+#endif
+#if APR_MAJOR_VERSION > 1 || (APU_MAJOR_VERSION == 1 && APU_MINOR_VERSION >= 6)
+            if (d->read_buf_size) {
+                apr_bucket_file_set_buf_size(e, d->read_buf_size);
+            }
 #endif
         }
 
index 290a1eb9ddbc9f9a2a334deee89be293dd30513b..7cd49f6df3d47838b9afbca43440588d39ca82c0 100644 (file)
@@ -79,9 +79,10 @@ do { \
 
 struct core_output_filter_ctx {
     apr_bucket_brigade *buffered_bb;
-    apr_bucket_brigade *tmp_flush_bb;
     apr_pool_t *deferred_write_pool;
     apr_size_t bytes_written;
+    struct iovec *vec;
+    apr_size_t nvec;
 };
 
 struct core_filter_ctx {
@@ -335,50 +336,132 @@ static void setaside_remaining_output(ap_filter_t *f,
 
 static apr_status_t send_brigade_nonblocking(apr_socket_t *s,
                                              apr_bucket_brigade *bb,
-                                             apr_size_t *bytes_written,
+                                             core_output_filter_ctx_t *ctx,
                                              conn_rec *c);
 
-static void remove_empty_buckets(apr_bucket_brigade *bb);
-
-static apr_status_t send_brigade_blocking(apr_socket_t *s,
-                                          apr_bucket_brigade *bb,
-                                          apr_size_t *bytes_written,
-                                          conn_rec *c);
-
 static apr_status_t writev_nonblocking(apr_socket_t *s,
-                                       struct iovec *vec, apr_size_t nvec,
                                        apr_bucket_brigade *bb,
-                                       apr_size_t *cumulative_bytes_written,
+                                       core_output_filter_ctx_t *ctx,
+                                       apr_size_t bytes_to_write,
+                                       apr_size_t nvec,
                                        conn_rec *c);
 
 #if APR_HAS_SENDFILE
 static apr_status_t sendfile_nonblocking(apr_socket_t *s,
                                          apr_bucket *bucket,
-                                         apr_size_t *cumulative_bytes_written,
+                                         core_output_filter_ctx_t *ctx,
                                          conn_rec *c);
 #endif
 
 /* XXX: Should these be configurable parameters? */
 #define THRESHOLD_MIN_WRITE 4096
-#define THRESHOLD_MAX_BUFFER 65536
-#define MAX_REQUESTS_IN_PIPELINE 5
 
 /* Optional function coming from mod_logio, used for logging of output
  * traffic
  */
 extern APR_OPTIONAL_FN_TYPE(ap_logio_add_bytes_out) *ap__logio_add_bytes_out;
 
+static apr_status_t should_send_brigade(apr_bucket_brigade *bb,
+                                        conn_rec *c, int *flush)
+{
+    core_server_config *conf =
+        ap_get_core_module_config(c->base_server->module_config);
+    apr_size_t total_bytes = 0, non_file_bytes = 0;
+    apr_uint32_t eor_buckets = 0;
+    apr_bucket *bucket;
+    int need_flush = 0;
+
+    /* Scan through the brigade and decide whether we need to flush it,
+     * based on the following rules:
+     *
+     *  a) The brigade contains a flush bucket: Do a blocking write
+     *     of everything up that point.
+     *
+     *  b) The request is in CONN_STATE_HANDLER state, and the brigade
+     *     contains at least flush_max_threshold bytes in non-file
+     *     buckets: Do blocking writes until the amount of data in the
+     *     buffer is less than flush_max_threshold.  (The point of this
+     *     rule is to provide flow control, in case a handler is
+     *     streaming out lots of data faster than the data can be
+     *     sent to the client.)
+     *
+     *  c) The request is in CONN_STATE_HANDLER state, and the brigade
+     *     contains at least flush_max_pipelined EOR buckets:
+     *     Do blocking writes until less than flush_max_pipelined EOR
+     *     buckets are left. (The point of this rule is to prevent too many
+     *     FDs being kept open by pipelined requests, possibly allowing a
+     *     DoS).
+     *
+     *  d) The brigade contains a morphing bucket: otherwise ap_save_brigade()
+     *     could read the whole bucket into memory.
+     */
+    for (bucket = APR_BRIGADE_FIRST(bb);
+         bucket != APR_BRIGADE_SENTINEL(bb);
+         bucket = APR_BUCKET_NEXT(bucket)) {
+
+        if (!APR_BUCKET_IS_METADATA(bucket)) {
+            if (bucket->length == (apr_size_t)-1) {
+                if (flush) {
+                    ap_log_cerror(APLOG_MARK, APLOG_TRACE6, 0, c,
+                                  "core_output_filter: flushing because "
+                                  "of morphing bucket");
+                }
+                need_flush = 1;
+                break;
+            }
+
+            total_bytes += bucket->length;
+            if (!APR_BUCKET_IS_FILE(bucket)) {
+                non_file_bytes += bucket->length;
+                if (non_file_bytes > conf->flush_max_threshold) {
+                    if (flush) {
+                        ap_log_cerror(APLOG_MARK, APLOG_TRACE6, 0, c,
+                                      "core_output_filter: flushing because "
+                                      "of max threshold");
+                    }
+                    need_flush = 1;
+                    break;
+                }
+            }
+        }
+        else if (APR_BUCKET_IS_FLUSH(bucket)) {
+            if (flush) {
+                ap_log_cerror(APLOG_MARK, APLOG_TRACE6, 0, c,
+                              "core_output_filter: flushing because "
+                              "of FLUSH bucket");
+            }
+            need_flush = 1;
+            break;
+        }
+        else if (AP_BUCKET_IS_EOR(bucket)
+                 && conf->flush_max_pipelined >= 0
+                 && ++eor_buckets > conf->flush_max_pipelined) {
+            if (flush) {
+                ap_log_cerror(APLOG_MARK, APLOG_TRACE6, 0, c,
+                              "core_output_filter: flushing because "
+                              "of max pipelined");
+            }
+            need_flush = 1;
+            break;
+        }
+    }
+    if (flush) {
+        *flush = need_flush;
+    }
+
+    /* Also send if above flush_min_threshold, or if there are FILE buckets */
+    return (need_flush
+            || total_bytes >= THRESHOLD_MIN_WRITE
+            || total_bytes > non_file_bytes);
+}
+
 apr_status_t ap_core_output_filter(ap_filter_t *f, apr_bucket_brigade *new_bb)
 {
     conn_rec *c = f->c;
     core_net_rec *net = f->ctx;
     core_output_filter_ctx_t *ctx = net->out_ctx;
     apr_bucket_brigade *bb = NULL;
-    apr_bucket *bucket, *next, *flush_upto = NULL;
-    apr_size_t bytes_in_brigade, non_file_bytes_in_brigade;
-    int eor_buckets_in_brigade, morphing_bucket_in_brigade;
-    apr_status_t rv;
-    int loglevel = ap_get_conn_module_loglevel(c, APLOG_MODULE_INDEX);
+    apr_status_t rv = APR_SUCCESS;
 
     /* Fail quickly if the connection has already been aborted. */
     if (c->aborted) {
@@ -392,12 +475,10 @@ apr_status_t ap_core_output_filter(ap_filter_t *f, apr_bucket_brigade *new_bb)
         ctx = apr_pcalloc(c->pool, sizeof(*ctx));
         net->out_ctx = (core_output_filter_ctx_t *)ctx;
         /*
-         * Need to create tmp brigade with correct lifetime. Passing
-         * NULL to apr_brigade_split_ex would result in a brigade
+         * Need to create buffered_bb brigade with correct lifetime. Passing
+         * NULL to ap_save_brigade() would result in a brigade
          * allocated from bb->pool which might be wrong.
          */
-        ctx->tmp_flush_bb = apr_brigade_create(c->pool, c->bucket_alloc);
-        /* same for buffered_bb and ap_save_brigade */
         ctx->buffered_bb = apr_brigade_create(c->pool, c->bucket_alloc);
     }
 
@@ -412,191 +493,59 @@ apr_status_t ap_core_output_filter(ap_filter_t *f, apr_bucket_brigade *new_bb)
         else {
             bb = ctx->buffered_bb;
         }
-        c->data_in_output_filters = 0;
     }
     else if (new_bb == NULL) {
+        c->data_in_output_filters = 0;
         return APR_SUCCESS;
     }
 
-    /* Scan through the brigade and decide whether to attempt a write,
-     * and how much to write, based on the following rules:
-     *
-     *  1) The new_bb is null: Do a nonblocking write of as much as
-     *     possible: do a nonblocking write of as much data as possible,
-     *     then save the rest in ctx->buffered_bb.  (If new_bb == NULL,
-     *     it probably means that the MPM is doing asynchronous write
-     *     completion and has just determined that this connection
-     *     is writable.)
-     *
-     *  2) Determine if and up to which bucket we need to do a blocking
-     *     write:
-     *
-     *  a) The brigade contains a flush bucket: Do a blocking write
-     *     of everything up that point.
-     *
-     *  b) The request is in CONN_STATE_HANDLER state, and the brigade
-     *     contains at least THRESHOLD_MAX_BUFFER bytes in non-file
-     *     buckets: Do blocking writes until the amount of data in the
-     *     buffer is less than THRESHOLD_MAX_BUFFER.  (The point of this
-     *     rule is to provide flow control, in case a handler is
-     *     streaming out lots of data faster than the data can be
-     *     sent to the client.)
-     *
-     *  c) The request is in CONN_STATE_HANDLER state, and the brigade
-     *     contains at least MAX_REQUESTS_IN_PIPELINE EOR buckets:
-     *     Do blocking writes until less than MAX_REQUESTS_IN_PIPELINE EOR
-     *     buckets are left. (The point of this rule is to prevent too many
-     *     FDs being kept open by pipelined requests, possibly allowing a
-     *     DoS).
-     *
-     *  d) The brigade contains a morphing bucket: If there was no other
-     *     reason to do a blocking write yet, try reading the bucket. If its
-     *     contents fit into memory before THRESHOLD_MAX_BUFFER is reached,
-     *     everything is fine. Otherwise we need to do a blocking write the
-     *     up to and including the morphing bucket, because ap_save_brigade()
-     *     would read the whole bucket into memory later on.
-     *
-     *  3) Actually do the blocking write up to the last bucket determined
-     *     by rules 2a-d. The point of doing only one flush is to make as
-     *     few calls to writev() as possible.
-     *
-     *  4) If the brigade contains at least THRESHOLD_MIN_WRITE
-     *     bytes: Do a nonblocking write of as much data as possible,
-     *     then save the rest in ctx->buffered_bb.
-     */
-
-    if (new_bb == NULL) {
-        rv = send_brigade_nonblocking(net->client_socket, bb,
-                                      &(ctx->bytes_written), c);
-        if (rv != APR_SUCCESS && !APR_STATUS_IS_EAGAIN(rv)) {
-            /* The client has aborted the connection */
-            ap_log_cerror(APLOG_MARK, APLOG_TRACE1, rv, c,
-                          "core_output_filter: writing data to the network");
-            apr_brigade_cleanup(bb);
-            c->aborted = 1;
-            return rv;
-        }
-        setaside_remaining_output(f, ctx, bb, c);
-        return APR_SUCCESS;
-    }
-
-    bytes_in_brigade = 0;
-    non_file_bytes_in_brigade = 0;
-    eor_buckets_in_brigade = 0;
-    morphing_bucket_in_brigade = 0;
+    if (!new_bb || should_send_brigade(bb, c, NULL)) {
+        apr_socket_t *sock = net->client_socket;
+        apr_interval_time_t sock_timeout = 0;
+        int flush;
 
-    for (bucket = APR_BRIGADE_FIRST(bb); bucket != APR_BRIGADE_SENTINEL(bb);
-         bucket = next) {
-        next = APR_BUCKET_NEXT(bucket);
+        /* Non-blocking writes on the socket in any case. */
+        apr_socket_timeout_get(sock, &sock_timeout);
+        apr_socket_timeout_set(sock, 0);
 
-        if (!APR_BUCKET_IS_METADATA(bucket)) {
-            if (bucket->length == (apr_size_t)-1) {
-                /*
-                 * A setaside of morphing buckets would read everything into
-                 * memory. Instead, we will flush everything up to and
-                 * including this bucket.
-                 */
-                morphing_bucket_in_brigade = 1;
-            }
-            else {
-                bytes_in_brigade += bucket->length;
-                if (!APR_BUCKET_IS_FILE(bucket))
-                    non_file_bytes_in_brigade += bucket->length;
+        do {
+            rv = send_brigade_nonblocking(sock, bb, ctx, c);
+            if (!new_bb || !APR_STATUS_IS_EAGAIN(rv)) {
+                break;
             }
-        }
-        else if (AP_BUCKET_IS_EOR(bucket)) {
-            eor_buckets_in_brigade++;
-        }
 
-        if (APR_BUCKET_IS_FLUSH(bucket)
-            || non_file_bytes_in_brigade >= THRESHOLD_MAX_BUFFER
-            || morphing_bucket_in_brigade
-            || eor_buckets_in_brigade > MAX_REQUESTS_IN_PIPELINE) {
-            /* this segment of the brigade MUST be sent before returning. */
-
-            if (loglevel >= APLOG_TRACE6) {
-                char *reason = APR_BUCKET_IS_FLUSH(bucket) ?
-                               "FLUSH bucket" :
-                               (non_file_bytes_in_brigade >= THRESHOLD_MAX_BUFFER) ?
-                               "THRESHOLD_MAX_BUFFER" :
-                               morphing_bucket_in_brigade ? "morphing bucket" :
-                               "MAX_REQUESTS_IN_PIPELINE";
-                ap_log_cerror(APLOG_MARK, APLOG_TRACE6, 0, c,
-                              "will flush because of %s", reason);
-                ap_log_cerror(APLOG_MARK, APLOG_TRACE8, 0, c,
-                              "seen in brigade%s: bytes: %" APR_SIZE_T_FMT
-                              ", non-file bytes: %" APR_SIZE_T_FMT ", eor "
-                              "buckets: %d, morphing buckets: %d",
-                              flush_upto == NULL ? " so far"
-                                                 : " since last flush point",
-                              bytes_in_brigade,
-                              non_file_bytes_in_brigade,
-                              eor_buckets_in_brigade,
-                              morphing_bucket_in_brigade);
+            should_send_brigade(bb, c, &flush);
+            if (flush) {
+                apr_int32_t nfd;
+                apr_pollfd_t pfd;
+                memset(&pfd, 0, sizeof(pfd));
+                pfd.reqevents = APR_POLLOUT;
+                pfd.desc_type = APR_POLL_SOCKET;
+                pfd.desc.s = sock;
+                pfd.p = c->pool;
+                do {
+                    rv = apr_poll(&pfd, 1, &nfd, sock_timeout);
+                } while (APR_STATUS_IS_EINTR(rv));
             }
-            /*
-             * Defer the actual blocking write to avoid doing many writes.
-             */
-            flush_upto = next;
+        } while (flush);
 
-            bytes_in_brigade = 0;
-            non_file_bytes_in_brigade = 0;
-            eor_buckets_in_brigade = 0;
-            morphing_bucket_in_brigade = 0;
-        }
+        /* Restore original socket timeout before leaving. */
+        apr_socket_timeout_set(sock, sock_timeout);
     }
 
-    if (flush_upto != NULL) {
-        ctx->tmp_flush_bb = apr_brigade_split_ex(bb, flush_upto,
-                                                 ctx->tmp_flush_bb);
-        if (loglevel >= APLOG_TRACE8) {
-                ap_log_cerror(APLOG_MARK, APLOG_TRACE8, 0, c,
-                              "flushing now");
-        }
-        rv = send_brigade_blocking(net->client_socket, bb,
-                                   &(ctx->bytes_written), c);
-        if (rv != APR_SUCCESS) {
-            /* The client has aborted the connection */
-            ap_log_cerror(APLOG_MARK, APLOG_TRACE1, rv, c,
-                          "core_output_filter: writing data to the network");
-            apr_brigade_cleanup(bb);
-            c->aborted = 1;
-            return rv;
-        }
-        if (loglevel >= APLOG_TRACE8) {
-                ap_log_cerror(APLOG_MARK, APLOG_TRACE8, 0, c,
-                              "total bytes written: %" APR_SIZE_T_FMT,
-                              ctx->bytes_written);
-        }
-        APR_BRIGADE_CONCAT(bb, ctx->tmp_flush_bb);
-    }
-
-    if (loglevel >= APLOG_TRACE8) {
-        ap_log_cerror(APLOG_MARK, APLOG_TRACE8, 0, c,
-                      "brigade contains: bytes: %" APR_SIZE_T_FMT
-                      ", non-file bytes: %" APR_SIZE_T_FMT
-                      ", eor buckets: %d, morphing buckets: %d",
-                      bytes_in_brigade, non_file_bytes_in_brigade,
-                      eor_buckets_in_brigade, morphing_bucket_in_brigade);
-    }
-
-    if (bytes_in_brigade >= THRESHOLD_MIN_WRITE) {
-        rv = send_brigade_nonblocking(net->client_socket, bb,
-                                      &(ctx->bytes_written), c);
-        if ((rv != APR_SUCCESS) && (!APR_STATUS_IS_EAGAIN(rv))) {
-            /* The client has aborted the connection */
-            ap_log_cerror(APLOG_MARK, APLOG_TRACE1, rv, c,
-                          "core_output_filter: writing data to the network");
-            apr_brigade_cleanup(bb);
-            c->aborted = 1;
-            return rv;
-        }
-        if (loglevel >= APLOG_TRACE8) {
-                ap_log_cerror(APLOG_MARK, APLOG_TRACE8, 0, c,
-                              "tried nonblocking write, total bytes "
-                              "written: %" APR_SIZE_T_FMT,
-                              ctx->bytes_written);
-        }
+    if (rv != APR_SUCCESS && !APR_STATUS_IS_EAGAIN(rv)) {
+        /* The client has aborted the connection */
+        ap_log_cerror(
+                APLOG_MARK, APLOG_TRACE1, rv, c,
+                "core_output_filter: writing data to the network");
+        /*
+         * Set c->aborted before apr_brigade_cleanup to have the correct status
+         * when logging the request as apr_brigade_cleanup triggers the logging
+         * of the request if it contains an EOR bucket.
+         */
+        c->aborted = 1;
+        apr_brigade_cleanup(bb);
+        return rv;
     }
 
     setaside_remaining_output(f, ctx, bb, c);
@@ -612,10 +561,17 @@ static void setaside_remaining_output(ap_filter_t *f,
                                       apr_bucket_brigade *bb,
                                       conn_rec *c)
 {
-    if (bb == NULL) {
-        return;
+    apr_bucket *bucket;
+
+    /* Don't set aside leading empty buckets, all previous data have been
+     * consumed so it's safe to delete them now.
+     */
+    while (((bucket = APR_BRIGADE_FIRST(bb)) != APR_BRIGADE_SENTINEL(bb)) &&
+           (APR_BUCKET_IS_METADATA(bucket) || (bucket->length == 0))) {
+        apr_bucket_delete(bucket);
     }
-    remove_empty_buckets(bb);
+
+    c->data_in_output_filters = 0;
     if (!APR_BRIGADE_EMPTY(bb)) {
         c->data_in_output_filters = 1;
         if (bb != ctx->buffered_bb) {
@@ -637,285 +593,259 @@ static void setaside_remaining_output(ap_filter_t *f,
 }
 
 #ifndef APR_MAX_IOVEC_SIZE
-#define MAX_IOVEC_TO_WRITE 16
+#define NVEC_MIN 16
+#define NVEC_MAX NVEC_MIN
 #else
 #if APR_MAX_IOVEC_SIZE > 16
-#define MAX_IOVEC_TO_WRITE 16
+#define NVEC_MIN 16
 #else
-#define MAX_IOVEC_TO_WRITE APR_MAX_IOVEC_SIZE
+#define NVEC_MIN APR_MAX_IOVEC_SIZE
 #endif
+#define NVEC_MAX APR_MAX_IOVEC_SIZE
+#endif
+
+static APR_INLINE int is_in_memory_bucket(apr_bucket *b)
+{
+    /* These buckets' data are already in memory. */
+    return APR_BUCKET_IS_HEAP(b)
+           || APR_BUCKET_IS_POOL(b)
+           || APR_BUCKET_IS_TRANSIENT(b)
+           || APR_BUCKET_IS_IMMORTAL(b);
+}
+
+#if APR_HAS_SENDFILE
+static APR_INLINE int can_sendfile_bucket(apr_bucket *b)
+{
+    /* Use sendfile to send the bucket unless:
+     *   - the bucket is not a file bucket, or
+     *   - the file is too small for sendfile to be useful, or
+     *   - sendfile is disabled in the httpd config via "EnableSendfile off".
+     */
+    if (APR_BUCKET_IS_FILE(b) && b->length >= AP_MIN_SENDFILE_BYTES) {
+        apr_file_t *file = ((apr_bucket_file *)b->data)->fd;
+        return apr_file_flags_get(file) & APR_SENDFILE_ENABLED;
+    }
+    else {
+        return 0;
+    }
+}
 #endif
 
 static apr_status_t send_brigade_nonblocking(apr_socket_t *s,
                                              apr_bucket_brigade *bb,
-                                             apr_size_t *bytes_written,
+                                             core_output_filter_ctx_t *ctx,
                                              conn_rec *c)
 {
+    apr_status_t rv = APR_SUCCESS;
+    core_server_config *conf =
+        ap_get_core_module_config(c->base_server->module_config);
+    apr_size_t nvec = 0, nbytes = 0;
     apr_bucket *bucket, *next;
-    apr_status_t rv;
-    struct iovec vec[MAX_IOVEC_TO_WRITE];
-    apr_size_t nvec = 0;
-
-    remove_empty_buckets(bb);
+    const char *data;
+    apr_size_t length;
 
     for (bucket = APR_BRIGADE_FIRST(bb);
          bucket != APR_BRIGADE_SENTINEL(bb);
          bucket = next) {
         next = APR_BUCKET_NEXT(bucket);
-#if APR_HAS_SENDFILE
-        if (APR_BUCKET_IS_FILE(bucket)) {
-            apr_bucket_file *file_bucket = (apr_bucket_file *)(bucket->data);
-            apr_file_t *fd = file_bucket->fd;
-            /* Use sendfile to send this file unless:
-             *   - the platform doesn't support sendfile,
-             *   - the file is too small for sendfile to be useful, or
-             *   - sendfile is disabled in the httpd config via "EnableSendfile off"
-             */
 
-            if ((apr_file_flags_get(fd) & APR_SENDFILE_ENABLED) &&
-                (bucket->length >= AP_MIN_SENDFILE_BYTES)) {
-                if (nvec > 0) {
-                    (void)apr_socket_opt_set(s, APR_TCP_NOPUSH, 1);
-                    rv = writev_nonblocking(s, vec, nvec, bb, bytes_written, c);
-                    if (rv != APR_SUCCESS) {
-                        (void)apr_socket_opt_set(s, APR_TCP_NOPUSH, 0);
-                        return rv;
-                    }
-                }
-                rv = sendfile_nonblocking(s, bucket, bytes_written, c);
-                if (nvec > 0) {
-                    (void)apr_socket_opt_set(s, APR_TCP_NOPUSH, 0);
-                    nvec = 0;
-                }
+#if APR_HAS_SENDFILE
+        if (can_sendfile_bucket(bucket)) {
+            if (nvec > 0) {
+                (void)apr_socket_opt_set(s, APR_TCP_NOPUSH, 1);
+                rv = writev_nonblocking(s, bb, ctx, nbytes, nvec, c);
                 if (rv != APR_SUCCESS) {
-                    return rv;
+                    goto cleanup;
                 }
-                break;
+                nbytes = 0;
+                nvec = 0;
             }
+            rv = sendfile_nonblocking(s, bucket, ctx, c);
+            if (rv != APR_SUCCESS) {
+                goto cleanup;
+            }
+            continue;
         }
 #endif /* APR_HAS_SENDFILE */
-        /* didn't sendfile */
-        if (!APR_BUCKET_IS_METADATA(bucket)) {
-            const char *data;
-            apr_size_t length;
-            
+
+        if (bucket->length) {
             /* Non-blocking read first, in case this is a morphing
              * bucket type. */
             rv = apr_bucket_read(bucket, &data, &length, APR_NONBLOCK_READ);
             if (APR_STATUS_IS_EAGAIN(rv)) {
                 /* Read would block; flush any pending data and retry. */
                 if (nvec) {
-                    rv = writev_nonblocking(s, vec, nvec, bb, bytes_written, c);
-                    if (rv) {
-                        return rv;
+                    rv = writev_nonblocking(s, bb, ctx, nbytes, nvec, c);
+                    if (rv != APR_SUCCESS) {
+                        goto cleanup;
                     }
+                    nbytes = 0;
                     nvec = 0;
                 }
-                
+                (void)apr_socket_opt_set(s, APR_TCP_NOPUSH, 0);
+
                 rv = apr_bucket_read(bucket, &data, &length, APR_BLOCK_READ);
             }
             if (rv != APR_SUCCESS) {
-                return rv;
+                goto cleanup;
             }
 
             /* reading may have split the bucket, so recompute next: */
             next = APR_BUCKET_NEXT(bucket);
-            vec[nvec].iov_base = (char *)data;
-            vec[nvec].iov_len = length;
-            nvec++;
-            if (nvec == MAX_IOVEC_TO_WRITE) {
-                rv = writev_nonblocking(s, vec, nvec, bb, bytes_written, c);
-                nvec = 0;
-                if (rv != APR_SUCCESS) {
-                    return rv;
-                }
-                break;
-            }
         }
-    }
 
-    if (nvec > 0) {
-        rv = writev_nonblocking(s, vec, nvec, bb, bytes_written, c);
-        if (rv != APR_SUCCESS) {
-            return rv;
+        if (!bucket->length) {
+            /* Don't delete empty buckets until all the previous ones have been
+             * sent (nvec == 0); this must happen in sequence since metabuckets
+             * like EOR could free the data still pointed to by the iovec. So
+             * unless the latter is empty, let writev_nonblocking() cleanup the
+             * brigade in order.
+             */
+            if (!nvec) {
+                apr_bucket_delete(bucket);
+            }
+            continue;
         }
-    }
 
-    remove_empty_buckets(bb);
-
-    return APR_SUCCESS;
-}
-
-static void remove_empty_buckets(apr_bucket_brigade *bb)
-{
-    apr_bucket *bucket;
-    while (((bucket = APR_BRIGADE_FIRST(bb)) != APR_BRIGADE_SENTINEL(bb)) &&
-           (APR_BUCKET_IS_METADATA(bucket) || (bucket->length == 0))) {
-        apr_bucket_delete(bucket);
-    }
-}
-
-static apr_status_t send_brigade_blocking(apr_socket_t *s,
-                                          apr_bucket_brigade *bb,
-                                          apr_size_t *bytes_written,
-                                          conn_rec *c)
-{
-    apr_status_t rv;
-
-    rv = APR_SUCCESS;
-    while (!APR_BRIGADE_EMPTY(bb)) {
-        rv = send_brigade_nonblocking(s, bb, bytes_written, c);
-        if (rv != APR_SUCCESS) {
-            if (APR_STATUS_IS_EAGAIN(rv)) {
-                /* Wait until we can send more data */
-                apr_int32_t nsds;
-                apr_interval_time_t timeout;
-                apr_pollfd_t pollset;
-
-                pollset.p = c->pool;
-                pollset.desc_type = APR_POLL_SOCKET;
-                pollset.reqevents = APR_POLLOUT;
-                pollset.desc.s = s;
-                apr_socket_timeout_get(s, &timeout);
-                do {
-                    rv = apr_poll(&pollset, 1, &nsds, timeout);
-                } while (APR_STATUS_IS_EINTR(rv));
+        /* Make sure that these new data fit in our iovec. */
+        if (nvec == ctx->nvec) {
+            if (nvec == NVEC_MAX) {
+                (void)apr_socket_opt_set(s, APR_TCP_NOPUSH, 1);
+                rv = writev_nonblocking(s, bb, ctx, nbytes, nvec, c);
                 if (rv != APR_SUCCESS) {
-                    break;
+                    goto cleanup;
                 }
+                nbytes = 0;
+                nvec = 0;
             }
             else {
-                break;
+                struct iovec *newvec;
+                apr_size_t newn = nvec * 2;
+                if (newn < NVEC_MIN) {
+                    newn = NVEC_MIN;
+                }
+                else if (newn > NVEC_MAX) {
+                    newn = NVEC_MAX;
+                }
+                newvec = apr_palloc(c->pool, newn * sizeof(struct iovec));
+                if (nvec) {
+                    memcpy(newvec, ctx->vec, nvec * sizeof(struct iovec));
+                }
+                ctx->vec = newvec;
+                ctx->nvec = newn;
             }
         }
+        nbytes += length;
+        ctx->vec[nvec].iov_base = (void *)data;
+        ctx->vec[nvec].iov_len = length;
+        nvec++;
+
+        /* Flush above max threshold, unless the brigade still contains in
+         * memory buckets which we want to try writing in the same pass (if
+         * we are at the end of the brigade, the write will happen outside
+         * the loop anyway).
+         */
+        if (nbytes > conf->flush_max_threshold
+                && next != APR_BRIGADE_SENTINEL(bb)
+                && !is_in_memory_bucket(next)) {
+            (void)apr_socket_opt_set(s, APR_TCP_NOPUSH, 1);
+            rv = writev_nonblocking(s, bb, ctx, nbytes, nvec, c);
+            if (rv != APR_SUCCESS) {
+                goto cleanup;
+            }
+            nbytes = 0;
+            nvec = 0;
+        }
     }
+    if (nvec > 0) {
+        rv = writev_nonblocking(s, bb, ctx, nbytes, nvec, c);
+    }
+
+cleanup:
+    (void)apr_socket_opt_set(s, APR_TCP_NOPUSH, 0);
     return rv;
 }
 
 static apr_status_t writev_nonblocking(apr_socket_t *s,
-                                       struct iovec *vec, apr_size_t nvec,
                                        apr_bucket_brigade *bb,
-                                       apr_size_t *cumulative_bytes_written,
+                                       core_output_filter_ctx_t *ctx,
+                                       apr_size_t bytes_to_write,
+                                       apr_size_t nvec,
                                        conn_rec *c)
 {
-    apr_status_t rv = APR_SUCCESS, arv;
-    apr_size_t bytes_written = 0, bytes_to_write = 0;
-    apr_size_t i, offset;
-    apr_interval_time_t old_timeout;
-
-    arv = apr_socket_timeout_get(s, &old_timeout);
-    if (arv != APR_SUCCESS) {
-        return arv;
-    }
-    arv = apr_socket_timeout_set(s, 0);
-    if (arv != APR_SUCCESS) {
-        return arv;
-    }
+    apr_status_t rv;
+    struct iovec *vec = ctx->vec;
+    apr_size_t bytes_written = 0;
+    apr_size_t i, offset = 0;
 
-    for (i = 0; i < nvec; i++) {
-        bytes_to_write += vec[i].iov_len;
-    }
-    offset = 0;
-    while (bytes_written < bytes_to_write) {
+    do {
         apr_size_t n = 0;
         rv = apr_socket_sendv(s, vec + offset, nvec - offset, &n);
-        if (n > 0) {
-            bytes_written += n;
-            for (i = offset; i < nvec; ) {
-                apr_bucket *bucket = APR_BRIGADE_FIRST(bb);
-                if (APR_BUCKET_IS_METADATA(bucket)) {
-                    apr_bucket_delete(bucket);
-                }
-                else if (n >= vec[i].iov_len) {
-                    apr_bucket_delete(bucket);
-                    offset++;
-                    n -= vec[i++].iov_len;
-                }
-                else {
+        bytes_written += n;
+
+        for (i = offset; i < nvec; ) {
+            apr_bucket *bucket = APR_BRIGADE_FIRST(bb);
+            if (!bucket->length) {
+                apr_bucket_delete(bucket);
+            }
+            else if (n >= vec[i].iov_len) {
+                apr_bucket_delete(bucket);
+                n -= vec[i++].iov_len;
+                offset++;
+            }
+            else {
+                if (n) {
                     apr_bucket_split(bucket, n);
                     apr_bucket_delete(bucket);
                     vec[i].iov_len -= n;
                     vec[i].iov_base = (char *) vec[i].iov_base + n;
-                    break;
                 }
+                break;
             }
         }
-        if (rv != APR_SUCCESS) {
-            break;
-        }
-    }
+    } while (rv == APR_SUCCESS && bytes_written < bytes_to_write);
+
     if ((ap__logio_add_bytes_out != NULL) && (bytes_written > 0)) {
         ap__logio_add_bytes_out(c, bytes_written);
     }
-    *cumulative_bytes_written += bytes_written;
+    ctx->bytes_written += bytes_written;
 
-    arv = apr_socket_timeout_set(s, old_timeout);
-    if ((arv != APR_SUCCESS) && (rv == APR_SUCCESS)) {
-        return arv;
-    }
-    else {
-        return rv;
-    }
+    ap_log_cerror(APLOG_MARK, APLOG_TRACE6, rv, c,
+                  "writev_nonblocking: %"APR_SIZE_T_FMT"/%"APR_SIZE_T_FMT,
+                  bytes_written, bytes_to_write);
+    return rv;
 }
 
 #if APR_HAS_SENDFILE
 
 static apr_status_t sendfile_nonblocking(apr_socket_t *s,
                                          apr_bucket *bucket,
-                                         apr_size_t *cumulative_bytes_written,
+                                         core_output_filter_ctx_t *ctx,
                                          conn_rec *c)
 {
-    apr_status_t rv = APR_SUCCESS;
-    apr_bucket_file *file_bucket;
-    apr_file_t *fd;
-    apr_size_t file_length;
-    apr_off_t file_offset;
-    apr_size_t bytes_written = 0;
+    apr_status_t rv;
+    apr_file_t *file = ((apr_bucket_file *)bucket->data)->fd;
+    apr_size_t bytes_written = bucket->length; /* bytes_to_write for now */
+    apr_off_t file_offset = bucket->start;
 
-    if (!APR_BUCKET_IS_FILE(bucket)) {
-        ap_log_error(APLOG_MARK, APLOG_ERR, rv, c->base_server, APLOGNO(00006)
-                     "core_filter: sendfile_nonblocking: "
-                     "this should never happen");
-        return APR_EGENERAL;
-    }
-    file_bucket = (apr_bucket_file *)(bucket->data);
-    fd = file_bucket->fd;
-    file_length = bucket->length;
-    file_offset = bucket->start;
-
-    if (bytes_written < file_length) {
-        apr_size_t n = file_length - bytes_written;
-        apr_status_t arv;
-        apr_interval_time_t old_timeout;
-
-        arv = apr_socket_timeout_get(s, &old_timeout);
-        if (arv != APR_SUCCESS) {
-            return arv;
-        }
-        arv = apr_socket_timeout_set(s, 0);
-        if (arv != APR_SUCCESS) {
-            return arv;
-        }
-        rv = apr_socket_sendfile(s, fd, NULL, &file_offset, &n, 0);
-        if (rv == APR_SUCCESS) {
-            bytes_written += n;
-            file_offset += n;
-        }
-        arv = apr_socket_timeout_set(s, old_timeout);
-        if ((arv != APR_SUCCESS) && (rv == APR_SUCCESS)) {
-            rv = arv;
-        }
-    }
+    rv = apr_socket_sendfile(s, file, NULL, &file_offset, &bytes_written, 0);
     if ((ap__logio_add_bytes_out != NULL) && (bytes_written > 0)) {
         ap__logio_add_bytes_out(c, bytes_written);
     }
-    *cumulative_bytes_written += bytes_written;
-    if ((bytes_written < file_length) && (bytes_written > 0)) {
-        apr_bucket_split(bucket, bytes_written);
+    ctx->bytes_written += bytes_written;
+
+    ap_log_cerror(APLOG_MARK, APLOG_TRACE6, rv, c,
+                  "sendfile_nonblocking: %" APR_SIZE_T_FMT "/%" APR_SIZE_T_FMT,
+                  bytes_written, bucket->length);
+    if (bytes_written >= bucket->length) {
         apr_bucket_delete(bucket);
     }
-    else if (bytes_written == file_length) {
+    else if (bytes_written > 0) {
+        apr_bucket_split(bucket, bytes_written);
         apr_bucket_delete(bucket);
+        if (rv == APR_SUCCESS) {
+            rv = APR_EAGAIN;
+        }
     }
     return rv;
 }