From: Justin Erenkrantz Date: Tue, 21 Sep 2004 22:56:23 +0000 (+0000) Subject: Fix Expires (freshness) handling in mod_cache. X-Git-Tag: 2.1.1~225 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=0427774f8d3055317f9c5d7f36fc911062cf3007;p=thirdparty%2Fapache%2Fhttpd.git Fix Expires (freshness) handling in mod_cache. Previously, if the cached copy was stale, the response would go into an indeterminate state. Therefore, the freshness check must be done before we 'accept' the response and, if it fails (i.e. stale), we can't allow any side effects. This caused a number of changes to how mod_disk_cache reads its headers as ap_scan_script_header_err() purposely has side-effects and that's unacceptable. So, factor out only what we need. Also, remove the broken conditional filter code as you can't reliably alter the filter list once the response is started. (Regardless, cache_select_url() has the freshness checks now.) Assist to Sascha Schumann for reporting mod_cache was busted. git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/trunk@105236 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/CHANGES b/CHANGES index 8d5f8c50c4d..ff51cce73f7 100644 --- a/CHANGES +++ b/CHANGES @@ -2,6 +2,11 @@ Changes with Apache 2.1.0-dev [Remove entries to the current 2.0 section below, when backported] + *) Fix Expires handling in mod_cache. [Justin Erenkrantz] + + *) Alter mod_expires to run at a different filter priority to allow + proper Expires storage by mod_cache. [Justin Erenkrantz] + *) Fix the global mutex crash when the global mutex is never allocated due to disabled/empty caches. [Jess Holle ] diff --git a/modules/experimental/cache_storage.c b/modules/experimental/cache_storage.c index 1e87c6fdccf..c49d9e25d8b 100644 --- a/modules/experimental/cache_storage.c +++ b/modules/experimental/cache_storage.c @@ -98,6 +98,57 @@ int cache_create_entity(request_rec *r, char *url, apr_off_t size) return DECLINED; } +static int set_cookie_doo_doo(void *v, const char *key, const char *val) +{ + apr_table_addn(v, key, val); + return 1; +} + +static void accept_headers(cache_handle_t *h, request_rec *r) +{ + apr_table_t *cookie_table; + const char *v; + + v = apr_table_get(h->resp_hdrs, "Content-Type"); + if (v) { + ap_set_content_type(r, v); + apr_table_unset(h->resp_hdrs, "Content-Type"); + } + + /* If the cache gave us a Last-Modified header, we can't just + * pass it on blindly because of restrictions on future values. + */ + v = apr_table_get(h->resp_hdrs, "Last-Modified"); + if (v) { + ap_update_mtime(r, apr_date_parse_http(v)); + ap_set_last_modified(r); + apr_table_unset(h->resp_hdrs, "Last-Modified"); + } + + /* The HTTP specification says that it is legal to merge duplicate + * headers into one. Some browsers that support Cookies don't like + * merged headers and prefer that each Set-Cookie header is sent + * separately. Lets humour those browsers by not merging. + * Oh what a pain it is. + */ + cookie_table = apr_table_make(r->pool, 2); + apr_table_do(set_cookie_doo_doo, cookie_table, r->err_headers_out, + "Set-Cookie", NULL); + apr_table_do(set_cookie_doo_doo, cookie_table, h->resp_hdrs, + "Set-Cookie", NULL); + apr_table_unset(r->err_headers_out, "Set-Cookie"); + apr_table_unset(h->resp_hdrs, "Set-Cookie"); + + apr_table_overlap(r->headers_out, h->resp_hdrs, + APR_OVERLAP_TABLES_SET); + apr_table_overlap(r->err_headers_out, h->resp_err_hdrs, + APR_OVERLAP_TABLES_SET); + if (!apr_is_empty_table(cookie_table)) { + r->err_headers_out = apr_table_overlay(r->pool, r->err_headers_out, + cookie_table); + } +} + /* * select a specific URL entity in the cache * @@ -118,12 +169,12 @@ int cache_select_url(request_rec *r, char *url) cache_request_rec *cache = (cache_request_rec *) ap_get_module_config(r->request_config, &cache_module); - rv = cache_generate_key(r,r->pool,&key); + rv = cache_generate_key(r, r->pool, &key); if (rv != APR_SUCCESS) { return rv; } /* go through the cache types till we get a match */ - h = cache->handle = apr_palloc(r->pool, sizeof(cache_handle_t)); + h = apr_palloc(r->pool, sizeof(cache_handle_t)); list = cache->providers; @@ -132,32 +183,33 @@ int cache_select_url(request_rec *r, char *url) case OK: { char *vary = NULL; const char *varyhdr = NULL; + int fresh; + if (list->provider->recall_headers(h, r) != APR_SUCCESS) { /* TODO: Handle this error */ return DECLINED; } - r->filename = apr_pstrdup(r->pool, h->cache_obj->info.filename); - /* * Check Content-Negotiation - Vary * - * At this point we need to make sure that the object we found in the cache - * is the same object that would be delivered to the client, when the - * effects of content negotiation are taken into effect. - * + * At this point we need to make sure that the object we found in + * the cache is the same object that would be delivered to the + * client, when the effects of content negotiation are taken into + * effect. + * * In plain english, we want to make sure that a language-negotiated * document in one language is not given to a client asking for a * language negotiated document in a different language by mistake. - * + * * This code makes the assumption that the storage manager will * cache the req_hdrs if the response contains a Vary * header. - * + * * RFC2616 13.6 and 14.44 describe the Vary mechanism. */ - if ((varyhdr = apr_table_get(r->err_headers_out, "Vary")) == NULL) { - varyhdr = apr_table_get(r->headers_out, "Vary"); + if ((varyhdr = apr_table_get(h->resp_err_hdrs, "Vary")) == NULL) { + varyhdr = apr_table_get(h->resp_hdrs, "Vary"); } vary = apr_pstrdup(r->pool, varyhdr); while (vary && *vary) { @@ -186,16 +238,28 @@ int cache_select_url(request_rec *r, char *url) } else { /* headers do not match, so Vary failed */ - ap_log_error(APLOG_MARK, APLOG_INFO, APR_SUCCESS, r->server, - "cache_select_url(): Vary header mismatch - Cached document cannot be used. \n"); - apr_table_clear(r->headers_out); - r->status_line = NULL; - cache->handle = NULL; + ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, + r->server, + "cache_select_url(): Vary header mismatch."); return DECLINED; } } + + /* Is our cached response fresh enough? */ + fresh = ap_cache_check_freshness(h, r); + if (!fresh) { + return DECLINED; + } + + /* Okay, this response looks okay. Merge in our stuff and go. */ + apr_table_setn(r->headers_out, "Content-Type", + ap_make_content_type(r, h->content_type)); + r->filename = apr_pstrdup(r->pool, h->cache_obj->info.filename); + accept_headers(h, r); + cache->provider = list->provider; cache->provider_name = list->provider_name; + cache->handle = h; return OK; } case DECLINED: { @@ -205,12 +269,10 @@ int cache_select_url(request_rec *r, char *url) } default: { /* oo-er! an error */ - cache->handle = NULL; return rv; } } } - cache->handle = NULL; return DECLINED; } @@ -224,3 +286,4 @@ apr_status_t cache_generate_key_default( request_rec *r, apr_pool_t*p, char**key } return APR_SUCCESS; } + diff --git a/modules/experimental/cache_util.c b/modules/experimental/cache_util.c index ac377ca2aa9..bfb0c740db2 100644 --- a/modules/experimental/cache_util.c +++ b/modules/experimental/cache_util.c @@ -118,7 +118,7 @@ CACHE_DECLARE(apr_int64_t) ap_cache_current_age(cache_info *info, return apr_time_sec(current_age); } -CACHE_DECLARE(int) ap_cache_check_freshness(cache_request_rec *cache, +CACHE_DECLARE(int) ap_cache_check_freshness(cache_handle_t *h, request_rec *r) { apr_int64_t age, maxage_req, maxage_cresp, maxage, smaxage, maxstale; @@ -129,7 +129,7 @@ CACHE_DECLARE(int) ap_cache_check_freshness(cache_request_rec *cache, const char *expstr = NULL; char *val; apr_time_t age_c = 0; - cache_info *info = &(cache->handle->cache_obj->info); + cache_info *info = &(h->cache_obj->info); /* * We now want to check if our cached data is still fresh. This depends @@ -163,20 +163,20 @@ CACHE_DECLARE(int) ap_cache_check_freshness(cache_request_rec *cache, * entity, and it's value is in the past, it has expired. * */ - cc_cresp = apr_table_get(r->headers_out, "Cache-Control"); - cc_ceresp = apr_table_get(r->err_headers_out, "Cache-Control"); - cc_req = apr_table_get(r->headers_in, "Cache-Control"); - - if ((agestr = apr_table_get(r->headers_out, "Age"))) { + cc_cresp = apr_table_get(h->resp_hdrs, "Cache-Control"); + cc_ceresp = apr_table_get(h->resp_err_hdrs, "Cache-Control"); + cc_req = apr_table_get(h->req_hdrs, "Cache-Control"); + + if ((agestr = apr_table_get(h->resp_hdrs, "Age"))) { age_c = apr_atoi64(agestr); } - else if ((agestr = apr_table_get(r->err_headers_out, "Age"))) { + else if ((agestr = apr_table_get(h->resp_err_hdrs, "Age"))) { age_c = apr_atoi64(agestr); age_in_errhdr = 1; } - if (!(expstr = apr_table_get(r->err_headers_out, "Expires"))) { - expstr = apr_table_get(r->headers_out, "Expires"); + if (!(expstr = apr_table_get(h->resp_err_hdrs, "Expires"))) { + expstr = apr_table_get(h->resp_hdrs, "Expires"); } /* calculate age of object */ @@ -267,23 +267,23 @@ CACHE_DECLARE(int) ap_cache_check_freshness(cache_request_rec *cache, const char *warn_head; apr_table_t *head_ptr; - warn_head = apr_table_get(r->headers_out, "Warning"); + warn_head = apr_table_get(h->resp_hdrs, "Warning"); if (warn_head != NULL) { - head_ptr = r->headers_out; + head_ptr = h->resp_hdrs; } else { - warn_head = apr_table_get(r->err_headers_out, "Warning"); - head_ptr = r->err_headers_out; + warn_head = apr_table_get(h->resp_err_hdrs, "Warning"); + head_ptr = h->resp_err_hdrs; } /* it's fresh darlings... */ /* set age header on response */ if (age_in_errhdr) { - apr_table_set(r->err_headers_out, "Age", + apr_table_set(h->resp_err_hdrs, "Age", apr_psprintf(r->pool, "%lu", (unsigned long)age)); } else { - apr_table_set(r->headers_out, "Age", + apr_table_set(h->resp_hdrs, "Age", apr_psprintf(r->pool, "%lu", (unsigned long)age)); } diff --git a/modules/experimental/mod_cache.c b/modules/experimental/mod_cache.c index a4de02d5d16..bb2273798c4 100644 --- a/modules/experimental/mod_cache.c +++ b/modules/experimental/mod_cache.c @@ -28,7 +28,6 @@ APR_OPTIONAL_FN_TYPE(ap_cache_generate_key) *cache_generate_key; */ static ap_filter_rec_t *cache_save_filter_handle; static ap_filter_rec_t *cache_out_filter_handle; -static ap_filter_rec_t *cache_conditional_filter_handle; /* * CACHE handler @@ -131,14 +130,10 @@ static int cache_url_handler(request_rec *r, int lookup) * If no existing cache file (DECLINED) * add cache_save filter * If cached file (OK) - * If fresh cache file - * clear filter stack - * add cache_out filter - * return OK - * If stale cache file - * add cache_conditional filter (which updates cache) + * clear filter stack + * add cache_out filter + * return OK */ - rv = cache_select_url(r, url); if (rv != OK) { if (rv == DECLINED) { @@ -158,38 +153,6 @@ static int cache_url_handler(request_rec *r, int lookup) } /* We have located a suitable cache file now. */ - - /* RFC2616 13.2 - Check cache object expiration */ - cache->fresh = ap_cache_check_freshness(cache, r); - - /* What we have in our cache isn't fresh. */ - if (!cache->fresh) { - /* If our stale cached response was conditional... */ - if (!lookup && ap_cache_request_is_conditional(r)) { - info = &(cache->handle->cache_obj->info); - - /* fudge response into a conditional */ - if (info && info->etag) { - /* if we have a cached etag */ - apr_table_set(r->headers_in, "If-None-Match", info->etag); - } - else if (info && info->lastmods) { - /* if we have a cached IMS */ - apr_table_set(r->headers_in, "If-Modified-Since", - info->lastmods); - } - } - - /* Add cache_conditional_filter to see if we can salvage - * later. - */ - ap_add_output_filter_handle(cache_conditional_filter_handle, - NULL, r, r->connection); - return DECLINED; - } - - /* fresh data available */ - info = &(cache->handle->cache_obj->info); if (info && info->lastmod) { @@ -268,39 +231,6 @@ static int cache_out_filter(ap_filter_t *f, apr_bucket_brigade *bb) } -/* - * CACHE_CONDITIONAL filter - * ------------------------ - * - * Decide whether or not cached content should be delivered - * based on our fudged conditional request. - * If response HTTP_NOT_MODIFIED - * replace ourselves with cache_out filter - * Otherwise - * replace ourselves with cache_save filter - */ - -static int cache_conditional_filter(ap_filter_t *f, apr_bucket_brigade *in) -{ - ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, f->r->server, - "cache: running CACHE_CONDITIONAL filter"); - - if (f->r->status == HTTP_NOT_MODIFIED) { - /* replace ourselves with CACHE_OUT filter */ - ap_add_output_filter_handle(cache_out_filter_handle, NULL, - f->r, f->r->connection); - } - else { - /* replace ourselves with CACHE_SAVE filter */ - ap_add_output_filter_handle(cache_save_filter_handle, NULL, - f->r, f->r->connection); - } - ap_remove_output_filter(f); - - return ap_pass_brigade(f->next, in); -} - - /* * CACHE_SAVE filter * --------------- @@ -973,11 +903,6 @@ static void register_hooks(apr_pool_t *p) cache_out_filter, NULL, AP_FTYPE_CONTENT_SET-1); - cache_conditional_filter_handle = - ap_register_output_filter("CACHE_CONDITIONAL", - cache_conditional_filter, - NULL, - AP_FTYPE_CONTENT_SET); ap_hook_post_config(cache_post_config, NULL, NULL, APR_HOOK_REALLY_FIRST); } diff --git a/modules/experimental/mod_cache.h b/modules/experimental/mod_cache.h index 6fb289a221c..e519940e668 100644 --- a/modules/experimental/mod_cache.h +++ b/modules/experimental/mod_cache.h @@ -203,7 +203,11 @@ struct cache_provider_list { struct cache_handle { cache_object_t *cache_obj; - apr_table_t *req_hdrs; /* These are the original request headers */ + apr_table_t *req_hdrs; /* cached request headers */ + apr_table_t *resp_hdrs; /* cached response headers */ + apr_table_t *resp_err_hdrs; /* cached response err headers */ + const char *content_type; /* cached content type */ + int status; /* cached status */ }; /* per request cache information */ @@ -229,11 +233,11 @@ CACHE_DECLARE(apr_time_t) ap_cache_current_age(cache_info *info, const apr_time_ /** * Check the freshness of the cache object per RFC2616 section 13.2 (Expiration Model) - * @param cache cache_request_rec + * @param h cache_handle_t * @param r request_rec * @return 0 ==> cache object is stale, 1 ==> cache object is fresh */ -CACHE_DECLARE(int) ap_cache_check_freshness(cache_request_rec *cache, request_rec *r); +CACHE_DECLARE(int) ap_cache_check_freshness(cache_handle_t *h, request_rec *r); CACHE_DECLARE(apr_time_t) ap_cache_hex2usec(const char *x); CACHE_DECLARE(void) ap_cache_usec2hex(apr_time_t j, char *y); CACHE_DECLARE(char *) generate_name(apr_pool_t *p, int dirlevels, diff --git a/modules/experimental/mod_disk_cache.c b/modules/experimental/mod_disk_cache.c index a3efeee0803..ce4b11a30b6 100644 --- a/modules/experimental/mod_disk_cache.c +++ b/modules/experimental/mod_disk_cache.c @@ -415,6 +415,88 @@ static int remove_url(const char *key) return OK; } +static apr_status_t read_table(cache_handle_t *handle, request_rec *r, + apr_table_t *table, apr_file_t *file) +{ + char w[MAX_STRING_LEN]; + char *l; + int p; + apr_status_t rv; + + while (1) { + + /* ### What about APR_EOF? */ + rv = apr_file_gets(w, MAX_STRING_LEN - 1, file); + if (rv != APR_SUCCESS) { + ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, + "Premature end of cache headers."); + return rv; + } + + /* Delete terminal (CR?)LF */ + + p = strlen(w); + /* Indeed, the host's '\n': + '\012' for UNIX; '\015' for MacOS; '\025' for OS/390 + -- whatever the script generates. + */ + if (p > 0 && w[p - 1] == '\n') { + if (p > 1 && w[p - 2] == CR) { + w[p - 2] = '\0'; + } + else { + w[p - 1] = '\0'; + } + } + + /* If we've finished reading the headers, break out of the loop. */ + if (w[0] == '\0') { + break; + } + +#if APR_CHARSET_EBCDIC + /* Chances are that we received an ASCII header text instead of + * the expected EBCDIC header lines. Try to auto-detect: + */ + if (!(l = strchr(w, ':'))) { + int maybeASCII = 0, maybeEBCDIC = 0; + unsigned char *cp, native; + apr_size_t inbytes_left, outbytes_left; + + for (cp = w; *cp != '\0'; ++cp) { + native = apr_xlate_conv_byte(ap_hdrs_from_ascii, *cp); + if (apr_isprint(*cp) && !apr_isprint(native)) + ++maybeEBCDIC; + if (!apr_isprint(*cp) && apr_isprint(native)) + ++maybeASCII; + } + if (maybeASCII > maybeEBCDIC) { + ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server, + "CGI Interface Error: Script headers apparently ASCII: (CGI = %s)", + r->filename); + inbytes_left = outbytes_left = cp - w; + apr_xlate_conv_buffer(ap_hdrs_from_ascii, + w, &inbytes_left, w, &outbytes_left); + } + } +#endif /*APR_CHARSET_EBCDIC*/ + + /* if we see a bogus header don't ignore it. Shout and scream */ + if (!(l = strchr(w, ':'))) { + return APR_EGENERAL; + } + + *l++ = '\0'; + while (*l && apr_isspace(*l)) { + ++l; + } + + apr_table_add(table, w, l); + } + + return APR_SUCCESS; +} + /* * Reads headers from a buffer and returns an array of headers. * Returns NULL on file error @@ -433,34 +515,19 @@ static apr_status_t recall_headers(cache_handle_t *h, request_rec *r) return APR_NOTFOUND; } - if(!r->headers_out) { - r->headers_out = apr_table_make(r->pool, 20); - } - - /* - * Call routine to read the header lines/status line - */ - r->status = dobj->disk_info.status; - ap_scan_script_header_err(r, dobj->hfd, NULL); - - apr_table_setn(r->headers_out, "Content-Type", - ap_make_content_type(r, r->content_type)); - h->req_hdrs = apr_table_make(r->pool, 20); + h->resp_hdrs = apr_table_make(r->pool, 20); + h->resp_err_hdrs = apr_table_make(r->pool, 20); - /* - * Call routine to read the header lines/status line - * - * Note that ap_scan_script_header_err sets to r->err_headers_out, - * so we must set the real one aside. - */ - tmp = r->err_headers_out; - r->err_headers_out = h->req_hdrs; - ap_scan_script_header_err(r, dobj->hfd, NULL); - r->err_headers_out = tmp; + /* Call routine to read the header lines/status line */ + read_table(h, r, h->resp_hdrs, dobj->hfd); + read_table(h, r, h->req_hdrs, dobj->hfd); apr_file_close(dobj->hfd); + h->status = dobj->disk_info.status; + h->content_type = apr_table_get(h->resp_hdrs, "Content-Type"); + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, "disk_cache: Recalled headers for URL %s", dobj->name); return APR_SUCCESS; diff --git a/modules/experimental/mod_mem_cache.c b/modules/experimental/mod_mem_cache.c index e76ec486c1c..108522f4981 100644 --- a/modules/experimental/mod_mem_cache.c +++ b/modules/experimental/mod_mem_cache.c @@ -667,8 +667,9 @@ static apr_status_t recall_headers(cache_handle_t *h, request_rec *r) mem_cache_object_t *mobj = (mem_cache_object_t*) h->cache_obj->vobj; h->req_hdrs = apr_table_make(r->pool, mobj->num_req_hdrs); - r->headers_out = apr_table_make(r->pool, mobj->num_header_out); - r->err_headers_out = apr_table_make(r->pool, mobj->num_err_header_out); + h->resp_hdrs = apr_table_make(r->pool, mobj->num_header_out); + h->resp_err_hdrs = apr_table_make(r->pool, mobj->num_err_header_out); + /* ### FIXME: These two items should not be saved. */ r->subprocess_env = apr_table_make(r->pool, mobj->num_subprocess_env); r->notes = apr_table_make(r->pool, mobj->num_notes); @@ -677,10 +678,10 @@ static apr_status_t recall_headers(cache_handle_t *h, request_rec *r) h->req_hdrs); rc = unserialize_table( mobj->header_out, mobj->num_header_out, - r->headers_out); + h->resp_hdrs); rc = unserialize_table( mobj->err_header_out, mobj->num_err_header_out, - r->err_headers_out); + h->resp_err_hdrs); rc = unserialize_table( mobj->subprocess_env, mobj->num_subprocess_env, r->subprocess_env); @@ -691,7 +692,7 @@ static apr_status_t recall_headers(cache_handle_t *h, request_rec *r) /* Content-Type: header may not be set if content is local since * CACHE_IN runs before header filters.... */ - ap_set_content_type(r, apr_pstrdup(r->pool, h->cache_obj->info.content_type)); + h->content_type = h->cache_obj->info.content_type; return rc; }