From: Graham Leggett <minfrin@apache.org>
Date: Wed, 25 Oct 2006 13:44:47 +0000 (+0000)
Subject: mod_cache: Fix an out of memory condition that occurs when the
X-Git-Tag: 2.3.0~2047
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=48a692a9cde611536e1afb56843befd4310bd42e;p=thirdparty%2Fapache%2Fhttpd.git

mod_cache: Fix an out of memory condition that occurs when the
cache tries to save huge files (greater than RAM). Buckets bigger
than a tuneable threshold are split into smaller buckets before
being passed to mod_disk_cache, etc. PR 39380


git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/trunk@467655 13f79535-47bb-0310-9956-ffa450edef68
---

diff --git a/CHANGES b/CHANGES
index 44a73e2807a..66c518eb77b 100644
--- a/CHANGES
+++ b/CHANGES
@@ -2,6 +2,11 @@
 Changes with Apache 2.3.0
   [Remove entries to the current 2.0 and 2.2 section below, when backported]
 
+  *) mod_cache: Fix an out of memory condition that occurs when the
+     cache tries to save huge files (greater than RAM). Buckets bigger
+     than a tuneable threshold are split into smaller buckets before
+     being passed to mod_disk_cache, etc. PR 39380 [Graham Leggett]
+
   *) Fix issue which could cause error messages to be written to access logs
      on Win32.  PR 40476.  [Tom Donovan <Tom.Donovan acm.org>]
 
diff --git a/docs/manual/mod/mod_cache.xml b/docs/manual/mod/mod_cache.xml
index 322ab824182..4b8937ff331 100644
--- a/docs/manual/mod/mod_cache.xml
+++ b/docs/manual/mod/mod_cache.xml
@@ -346,6 +346,44 @@ LastModified date.</description>
 </usage>
 </directivesynopsis>
 
+<directivesynopsis>
+<name>CacheMaxBucketSize</name>
+<description>This tuneable value specifies the maximum bucket size in bytes 
+that the cache modules can be expected to process in one go.
+</description>
+<syntax>CacheMaxBucketSize <var>integer</var></syntax>
+<default>CacheMaxBucketSize 16777216</default>
+<contextlist><context>server config</context><context>virtual host</context>
+</contextlist>
+
+<usage>
+<p>When caching large objects, such as disk images or video files, the
+cache modules may be expected to process a large bucket of the response
+in one go. This could lead to an out of memory condition, or could result
+in very slow response times to the client, as the large object is stored
+to disk.</p>
+
+<p>This will typically happen when an attempt is made to improve
+performance of an archive of large files stored on a slow disk, by
+caching often accessed files to a fast disk. To prevent this problem,
+large objects are split up before being processed into buckets of a
+maximum size set by this parameter.</p>
+
+<p>When the cache is used in front of a forward or reverse proxy, bucket
+sizes will typically be determined by the underlying network, and this
+option will have little or no effect.</p>
+
+<p>Setting this option to a low value will result in more buckets being
+processed by the server, which may lead to increased memory usage.
+Setting this option too high may cause an out of memory condition, or
+may cause long pauses during download as the large buckets are written
+to the cache disk.</p>
+
+<p>The default of 16MB should be sufficient for most applications.</p>
+
+</usage>
+</directivesynopsis>
+
 <directivesynopsis>
 <name>CacheIgnoreHeaders</name>
 <description>Do not store the given HTTP header(s) in the cache.
diff --git a/modules/cache/mod_cache.c b/modules/cache/mod_cache.c
index 0d0ba0f6fec..483b2bb5623 100644
--- a/modules/cache/mod_cache.c
+++ b/modules/cache/mod_cache.c
@@ -299,6 +299,92 @@ static int cache_out_filter(ap_filter_t *f, apr_bucket_brigade *bb)
 }
 
 
+/*
+ * CACHE_SAVE - Do Store Body
+ * 
+ * Run the store body hook, and pass the brigade further up the stack.
+ *  
+ * We need a sanity check at this point. Buckets (specifically file
+ * buckets) can be of extremely large size, greater for example than
+ * available memory. Buckets may also take an extremely long time to
+ * be processed by the store_body() hook, long enough for a client
+ * request to time out before seeing any data.
+ * 
+ * To simplify the code in each provider, and to prevent a provider from
+ * having to care whether it might try load a huge bucket into memory
+ * or have to save a huge bucket to disk at once, we split buckets in
+ * the brigade into manageable chunks, and deal with each chunk one at a
+ * time.
+ * 
+ * Where possible inside the provider, the split brigade will be replaced
+ * by a file bucket from the cache. As file buckets are sent to the network
+ * using non blocking writes and setaside / queued if the network client is
+ * too slow, a slow network client will not hold up the writing to the cache.
+ */
+
+static int do_store_body(cache_request_rec *cache,
+                         ap_filter_t *f,
+                         apr_bucket_brigade *in) {
+    apr_bucket *e;
+    apr_bucket_brigade *bb;
+    apr_status_t rv, rv2;
+    cache_server_conf *conf;
+    
+    conf = (cache_server_conf *) ap_get_module_config(f->r->server->module_config,
+                                                      &cache_module);
+
+    /* try split any buckets larger than threshold */
+    rv = APR_SUCCESS; /* successful unless found otherwise */
+    rv2 = APR_SUCCESS;
+    if (conf->maxbucketsize > 0) {
+        e = APR_BRIGADE_FIRST(in);
+        while (e != APR_BRIGADE_SENTINEL(in)) {   
+    
+            /* if necessary, split the brigade and send what we have so far */
+            if (APR_SUCCESS == apr_bucket_split(e, conf->maxbucketsize)) {
+                e = APR_BUCKET_NEXT(e);
+                bb = in;
+                in = apr_brigade_split(bb, e);
+    
+                /* if store body fails, don't try store body again */
+                if (APR_SUCCESS == rv) {
+                    rv = cache->provider->store_body(cache->handle, f->r, bb);
+                }
+                
+                /* try write split brigade to the filter stack and network */
+                if (APR_SUCCESS == rv2) {
+                    rv2 = ap_pass_brigade(f->next, bb);
+                }
+                apr_brigade_destroy(bb);
+            }
+            else {
+                e = APR_BUCKET_NEXT(e);
+            }
+        }
+    }
+
+    /* send whatever is left over to the cache */
+    if (APR_SUCCESS == rv) {
+        rv = cache->provider->store_body(cache->handle, f->r, in);
+    }
+
+    /* log any store body error we may have found */
+    if (rv != APR_SUCCESS) {
+        ap_log_error(APLOG_MARK, APLOG_DEBUG, rv, f->r->server,
+                     "cache: store_body failed");
+        ap_remove_output_filter(f);
+    }
+
+    /* did our attempt to write to the network fail? */
+    if (APR_SUCCESS != rv2) {
+        return rv2;
+    }
+
+    return ap_pass_brigade(f->next, in);
+
+}
+
+
 /*
  * CACHE_SAVE filter
  * ---------------
@@ -821,6 +907,7 @@ static int cache_save_filter(ap_filter_t *f, apr_bucket_brigade *in)
         return ap_pass_brigade(f->next, bb);
     }
 
+    /* Otherwise, if store_headers() failed on a fresh entry, bail out cleanly */
     if(rv != APR_SUCCESS) {
         ap_log_error(APLOG_MARK, APLOG_DEBUG, rv, r->server,
                      "cache: store_headers failed");
@@ -829,14 +916,9 @@ static int cache_save_filter(ap_filter_t *f, apr_bucket_brigade *in)
         return ap_pass_brigade(f->next, in);
     }
 
-    rv = cache->provider->store_body(cache->handle, r, in);
-    if (rv != APR_SUCCESS) {
-        ap_log_error(APLOG_MARK, APLOG_DEBUG, rv, r->server,
-                     "cache: store_body failed");
-        ap_remove_output_filter(f);
-    }
+    /* It's now time to store the body of the request in the cache (finally!). */
+    return do_store_body(cache, f, in);
 
-    return ap_pass_brigade(f->next, in);
 }
 
 /*
@@ -920,6 +1002,8 @@ static void * create_cache_config(apr_pool_t *p, server_rec *s)
     /* array of headers that should not be stored in cache */
     ps->ignore_headers = apr_array_make(p, 10, sizeof(char *));
     ps->ignore_headers_set = CACHE_IGNORE_HEADERS_UNSET;
+    ps->maxbucketsize = CACHE_MAX_BUCKET_SIZE;
+    ps->maxbucketsize_set = 0;
     return ps;
 }
 
@@ -966,6 +1050,7 @@ static void * merge_cache_config(apr_pool_t *p, void *basev, void *overridesv)
         (overrides->ignore_headers_set == CACHE_IGNORE_HEADERS_UNSET)
         ? base->ignore_headers
         : overrides->ignore_headers;
+    ps->maxbucketsize = (overrides->maxbucketsize_set == 0) ? base->maxbucketsize : overrides->maxbucketsize;
     return ps;
 }
 static const char *set_cache_ignore_no_last_mod(cmd_parms *parms, void *dummy,
@@ -1157,6 +1242,23 @@ static const char *set_cache_factor(cmd_parms *parms, void *dummy,
     return NULL;
 }
 
+static const char
+*set_cache_maxbucketsize(cmd_parms *parms, void *in_struct_ptr, const char *arg)
+{
+    cache_server_conf *conf = ap_get_module_config(parms->server->module_config,
+                                                 &cache_module);
+    apr_off_t size;
+
+    if (apr_strtoff(&size, arg, NULL, 0) != APR_SUCCESS ||
+            size < 0) 
+    {
+        return "CacheMaxBucketSize argument must be a non-negative integer in bytes. Set to 0 to disable.";
+    }
+    conf->maxbucketsize = (apr_size_t)size;
+    conf->maxbucketsize_set = 1;
+    return NULL;
+}
+
 static int cache_post_config(apr_pool_t *p, apr_pool_t *plog,
                              apr_pool_t *ptemp, server_rec *s)
 {
@@ -1211,6 +1313,11 @@ static const command_rec cache_cmds[] =
     AP_INIT_TAKE1("CacheLastModifiedFactor", set_cache_factor, NULL, RSRC_CONF,
                   "The factor used to estimate Expires date from "
                   "LastModified date"),
+    AP_INIT_TAKE1("CacheMaxBucketSize", set_cache_maxbucketsize, NULL, RSRC_CONF,
+                  "A tuneable safety threshold to stop the cache trying to process "
+                  "whole responses larger than RAM, or to to slow storage "
+                  "in one go. Specified as bytes, defaults to 16MB. Set to zero "
+                  "to disable."),
     {NULL}
 };
 
diff --git a/modules/cache/mod_cache.h b/modules/cache/mod_cache.h
index 2f6f8b05711..398db281c5d 100644
--- a/modules/cache/mod_cache.h
+++ b/modules/cache/mod_cache.h
@@ -154,6 +154,10 @@ typedef struct {
     /* Minimum time to keep cached files in msecs */
     apr_time_t minex;
     int minex_set;
+    /* max size of buckets to process in one go, default 16MB */
+    #define CACHE_MAX_BUCKET_SIZE AP_MAX_SENDFILE
+    apr_size_t maxbucketsize;
+    int maxbucketsize_set;
 } cache_server_conf;
 
 /* cache info information */