From: André Malo Date: Mon, 12 Jan 2004 00:59:03 +0000 (+0000) Subject: finally bring mod_include's parser bump into 2.1. X-Git-Tag: 2.0.49~227 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=b42d27ab6f729c7666c994976304851b5f6c8f06;p=thirdparty%2Fapache%2Fhttpd.git finally bring mod_include's parser bump into 2.1. Thanks folks! PR: a lot Obtained from: 2.1 Reviewed by: Brian Pane, Jeff Trawick (, Justin Erenkrantz) git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/branches/APACHE_2_0_BRANCH@102277 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/CHANGES b/CHANGES index 509e3e4262a..53c56e74367 100644 --- a/CHANGES +++ b/CHANGES @@ -1,5 +1,11 @@ Changes with Apache 2.0.49 + *) Backport major overhaul of mod_include's filter parser from 2.1. + The new parser code is expected to be more robust and should + catch all of the edge cases that were not handled by the previous one. + The 2.1 external API changes were hidden by a wrapper which is + expected to keep the API backwards compatible. [André Malo] + *) Add a hook (insert_error_filter) to allow filters to re-insert themselves during processing of error responses. Enable mod_expires to use the new hook to include Expires headers in valid error diff --git a/STATUS b/STATUS index 21424588010..4febae276a3 100644 --- a/STATUS +++ b/STATUS @@ -1,5 +1,5 @@ APACHE 2.0 STATUS: -*-text-*- -Last modified at [$Date: 2004/01/12 00:04:37 $] +Last modified at [$Date: 2004/01/12 00:59:02 $] Release: @@ -294,31 +294,6 @@ PATCHES TO BACKPORT FROM 2.1 modules/mappers/mod_rewrite.c: r1.228 +1: nd - * Overhaul of mod_include's filter parser. - modules/filters/mod_include.c: r1.235, r1.236 - modules/filters/mod_include.h: r1.35 - jerenkrantz asks: Doesn't this require a MMN bump? - nd replies: The rewrite contains a wrapper around the new API that - emulates the old one. The only difference is that - foo_start_bucket/_index fields are not filled as before - (and ssi_tag_brigade is always NULL). Since they are only of - internal meaning I do not think that the change requires a - bump. - +1: nd, brianp, trawick - +1 (concept): jerenkrantz (on r2 below) - jwoolley suggests: Hang onto it for 2.2 series. - brianp asks: Do we have an alternate fix to eliminate 2.0's segfaults? - nd adds: I've found some time over the weekend to create the following - patch: . - It creates for backwards compat an ssi_tag_brigade on the fly - before executing the tag's handler function and fills that way - *_start_bucket/_index fields and ssi_tag_brigade. So nothing - should have been changed outside the module. - I'd really like to patch this one into 2.0. - - Success with this patch has been reported in PR 22946. Upcoming - issues were resolved and are included in the -r2.patch. - * mod_include's expression tokenizer: don't skip the first character of a string, it may be a backslash. (2.0 + 1.3) (patch for 2.0: http://cvs.apache.org/~nd/ssi-escape.patch) diff --git a/modules/filters/mod_include.c b/modules/filters/mod_include.c index bdca6663b9e..18207bea7df 100644 --- a/modules/filters/mod_include.c +++ b/modules/filters/mod_include.c @@ -73,6 +73,7 @@ #include "apr_optional.h" #define APR_WANT_STRFUNC +#define APR_WANT_MEMFUNC #include "apr_want.h" #define CORE_PRIVATE @@ -89,6 +90,8 @@ #include "http_main.h" #include "util_script.h" #include "http_core.h" + +#define MOD_INCLUDE_REDESIGN #include "mod_include.h" #include "util_ebcdic.h" @@ -126,6 +129,58 @@ typedef struct { int undefinedEchoLen; } include_server_config; +/* main parser states */ +typedef enum { + PARSE_PRE_HEAD, + PARSE_HEAD, + PARSE_DIRECTIVE, + PARSE_DIRECTIVE_POSTNAME, + PARSE_DIRECTIVE_TAIL, + PARSE_DIRECTIVE_POSTTAIL, + PARSE_PRE_ARG, + PARSE_ARG, + PARSE_ARG_NAME, + PARSE_ARG_POSTNAME, + PARSE_ARG_EQ, + PARSE_ARG_PREVAL, + PARSE_ARG_VAL, + PARSE_ARG_VAL_ESC, + PARSE_ARG_POSTVAL, + PARSE_TAIL, + PARSE_TAIL_SEQ, + PARSE_EXECUTE +} parse_state_t; + +typedef struct ssi_arg_item { + struct ssi_arg_item *next; + char *name; + apr_size_t name_len; + char *value; + apr_size_t value_len; +} ssi_arg_item_t; + +typedef struct { + parse_state_t state; + int seen_eos; + int error; + char quote; /* quote character value (or \0) */ + + apr_bucket_brigade *tmp_bb; + + apr_size_t end_seq_len; + char *directive; /* name of the current directive */ + + unsigned argc; /* argument counter (of the current + * directive) + */ + ssi_arg_item_t *argv; /* all arguments */ + ssi_arg_item_t *current_arg; /* currently parsed argument */ + request_rec *r; + include_ctx_t *ctx; /* public part of the context structure */ + + apr_pool_t *dpool; +} ssi_ctx_t; + #ifdef XBITHACK #define DEFAULT_XBITHACK xbithack_full #else @@ -134,6 +189,11 @@ typedef struct { #define BYTE_COUNT_THRESHOLD AP_MIN_BYTES_TO_WRITE +#define SSI_CREATE_ERROR_BUCKET(ctx, f, bb) APR_BRIGADE_INSERT_TAIL((bb), \ + apr_bucket_pool_create(apr_pstrdup((ctx)->pool, (ctx)->error_str), \ + strlen((ctx)->error_str), (ctx)->pool, \ + (f)->c->bucket_alloc)) + /* ------------------------ Environment function -------------------------- */ /* Sentinel value to store in subprocess_env for items that @@ -316,463 +376,6 @@ static apr_size_t bndm(const char *n, apr_size_t nl, const char *h, return hl; } -/* We've now found a start sequence tag... */ -static apr_bucket* found_start_sequence(apr_bucket *dptr, - include_ctx_t *ctx, - apr_size_t tagStart, - apr_size_t len) -{ - /* We want to split the bucket at the '<'. */ - ctx->state = PARSE_DIRECTIVE; - ctx->tag_length = 0; - ctx->parse_pos = 0; - - /* If tagStart indexes the end of the bucket, then tag_start_bucket - * should be the next bucket - */ - if (tagStart < len) { - ctx->tag_start_bucket = dptr; - ctx->tag_start_index = tagStart; - } - else { - ctx->tag_start_bucket = APR_BUCKET_NEXT(dptr); - ctx->tag_start_index = 0; - } - - if (ctx->head_start_index > 0) { - apr_bucket *tmp_bkt; - - /* Split the bucket with the start of the tag in it */ - apr_bucket_split(ctx->head_start_bucket, ctx->head_start_index); - tmp_bkt = APR_BUCKET_NEXT(ctx->head_start_bucket); - /* If it was a one bucket match */ - if ((tagStart < len) && (dptr == ctx->head_start_bucket)) { - ctx->tag_start_bucket = tmp_bkt; - ctx->tag_start_index = tagStart - ctx->head_start_index; - } - ctx->head_start_bucket = tmp_bkt; - ctx->head_start_index = 0; - } - return ctx->head_start_bucket; -} - -/* This function returns either a pointer to the split bucket containing the - * first byte of the BEGINNING_SEQUENCE (after finding a complete match) or it - * returns NULL if no match found. - */ -static apr_bucket *find_start_sequence(apr_bucket *dptr, include_ctx_t *ctx, - apr_bucket_brigade *bb, int *do_cleanup) -{ - apr_size_t len; - const char *c; - const char *buf; - const char *str = ctx->start_seq ; - apr_size_t slen = ctx->start_seq_len; - apr_size_t pos; - - *do_cleanup = 0; - - do { - apr_status_t rv = 0; - int read_done = 0; - - if (APR_BUCKET_IS_EOS(dptr)) { - break; - } - -#if 0 - /* XXX the bucket flush support is commented out for now - * because it was causing a segfault */ - if (APR_BUCKET_IS_FLUSH(dptr)) { - apr_bucket *old = dptr; - dptr = APR_BUCKET_NEXT(old); - APR_BUCKET_REMOVE(old); - ctx->output_now = 1; - ctx->output_flush = 1; - } - else -#endif /* 0 */ - if (ctx->bytes_parsed >= BYTE_COUNT_THRESHOLD) { - ctx->output_now = 1; - } - else if (ctx->bytes_parsed > 0) { - rv = apr_bucket_read(dptr, &buf, &len, APR_NONBLOCK_READ); - read_done = 1; - if (APR_STATUS_IS_EAGAIN(rv)) { - ctx->output_now = 1; - } - } - - if (ctx->output_now) { - apr_bucket *start_bucket; - if (ctx->head_start_index > 0) { - start_bucket = ctx->head_start_bucket; - apr_bucket_split(start_bucket, ctx->head_start_index); - start_bucket = APR_BUCKET_NEXT(start_bucket); - ctx->head_start_index = 0; - ctx->head_start_bucket = start_bucket; - ctx->parse_pos = 0; - ctx->state = PRE_HEAD; - } - else { - start_bucket = dptr; - } - return start_bucket; - } - - if (!read_done) { - rv = apr_bucket_read(dptr, &buf, &len, APR_BLOCK_READ); - } - if (!APR_STATUS_IS_SUCCESS(rv)) { - ctx->status = rv; - return NULL; - } - - if (len == 0) { /* end of pipe? */ - dptr = APR_BUCKET_NEXT(dptr); - continue; - } - - /* Set our buffer to use. */ - c = buf; - - /* The last bucket had a left over partial match that we need to - * complete. - */ - if (ctx->state == PARSE_HEAD) - { - apr_size_t tmpLen; - tmpLen = (len < (slen - 1)) ? len : (slen - 1); - - while (c < buf + tmpLen && *c == str[ctx->parse_pos]) - { - c++; - ctx->parse_pos++; - } - - if (str[ctx->parse_pos] == '\0') - { - ctx->bytes_parsed += c - buf; - return found_start_sequence(dptr, ctx, c - buf, len); - } - else if (c == buf + tmpLen) { - dptr = APR_BUCKET_NEXT(dptr); - continue; - } - - /* False alarm... - */ - APR_BRIGADE_PREPEND(bb, ctx->ssi_tag_brigade); - - /* We know we are at the beginning of this bucket so - * we can just prepend the saved bytes from the - * ssi_tag_brigade (which empties the ssi_tag_brigade) - * and continue processing. - * We do not need to set do_cleanup beacuse the - * prepend takes care of that. - */ - ctx->state = PRE_HEAD; - ctx->head_start_bucket = NULL; - ctx->head_start_index = 0; - } - - if (len) - { - pos = bndm(str, slen, buf, len, ctx->start_seq_pat); - if (pos != len) - { - ctx->head_start_bucket = dptr; - ctx->head_start_index = pos; - ctx->bytes_parsed += pos + slen; - return found_start_sequence(dptr, ctx, pos + slen, len); - } - } - - /* Consider the case where we have . This makes the - * second check after the original check fails. - * If parse_pos was already 0 then we already checked - * this. - */ - ctx->tag_length += ctx->parse_pos; - - if (*c == str[0]) { - ctx->state = PARSE_TAIL; - ctx->tail_start_bucket = dptr; - ctx->tail_start_index = c - buf; - ctx->parse_pos = 1; - } - else { - ctx->tag_length++; - if (ctx->tag_length > ctx->directive_length) { - ctx->state = PARSE_TAG; - } - else { - ctx->state = PARSE_DIRECTIVE; - ctx->directive_length += ctx->parse_pos; - } - ctx->tail_start_bucket = NULL; - ctx->tail_start_index = 0; - ctx->parse_pos = 0; - } - } - } - } - c++; - } - ctx->bytes_parsed += (c - start); - dptr = APR_BUCKET_NEXT(dptr); - } while (dptr != APR_BRIGADE_SENTINEL(bb)); - return NULL; -} - -/* This function culls through the buckets that have been set aside in the - * ssi_tag_brigade and copies just the directive part of the SSI tag (none - * of the start and end delimiter bytes are copied). - */ -static apr_status_t get_combined_directive (include_ctx_t *ctx, - request_rec *r, - apr_bucket_brigade *bb, - char *tmp_buf, - apr_size_t tmp_buf_size) -{ - int done = 0; - apr_bucket *dptr; - const char *tmp_from; - apr_size_t tmp_from_len; - - /* If the tag length is longer than the tmp buffer, allocate space. */ - if (ctx->tag_length > tmp_buf_size-1) { - if ((ctx->combined_tag = apr_pcalloc(r->pool, - ctx->tag_length + 1)) == NULL) { - return (APR_ENOMEM); - } - } /* Else, just use the temp buffer. */ - else { - ctx->combined_tag = tmp_buf; - } - - /* Prime the pump. Start at the beginning of the tag... */ - dptr = ctx->tag_start_bucket; - /* Read the bucket... */ - apr_bucket_read (dptr, &tmp_from, &tmp_from_len, 0); - - /* Adjust the pointer to start at the tag within the bucket... */ - if (dptr == ctx->tail_start_bucket) { - tmp_from_len -= (tmp_from_len - ctx->tail_start_index); - } - tmp_from = &tmp_from[ctx->tag_start_index]; - tmp_from_len -= ctx->tag_start_index; - ctx->curr_tag_pos = ctx->combined_tag; - - /* Loop through the buckets from the tag_start_bucket until before - * the tail_start_bucket copying the contents into the buffer. - */ - do { - memcpy (ctx->curr_tag_pos, tmp_from, tmp_from_len); - ctx->curr_tag_pos += tmp_from_len; - - if (dptr == ctx->tail_start_bucket) { - done = 1; - } - else { - dptr = APR_BUCKET_NEXT (dptr); - apr_bucket_read (dptr, &tmp_from, &tmp_from_len, 0); - /* Adjust the count to stop at the beginning of the tail. */ - if (dptr == ctx->tail_start_bucket) { - tmp_from_len -= (tmp_from_len - ctx->tail_start_index); - } - } - } while ((!done) && - (ctx->curr_tag_pos < ctx->combined_tag + ctx->tag_length)); - - ctx->combined_tag[ctx->tag_length] = '\0'; - ctx->curr_tag_pos = ctx->combined_tag; - - return (APR_SUCCESS); -} - /* * decodes a string containing html entities or numeric character references. * 's' is overwritten with the decoded string. @@ -890,106 +493,34 @@ otilde\365oslash\370ugrave\371uacute\372yacute\375" /* 6 */ static void ap_ssi_get_tag_and_value(include_ctx_t *ctx, char **tag, char **tag_val, int dodecode) { - char *c = ctx->curr_tag_pos; - int shift_val = 0; - char term = '\0'; - *tag_val = NULL; - if (ctx->curr_tag_pos > ctx->combined_tag + ctx->tag_length) { + if (ctx->curr_tag_pos >= ctx->combined_tag + ctx->tag_length) { *tag = NULL; return; } - SKIP_TAG_WHITESPACE(c); - *tag = c; /* First non-whitespace character (could be NULL). */ - - while (apr_islower(*c)) { - c++; /* Optimization for the common case where the tag */ - } /* is already lowercase */ - while ((*c != '=') && (!apr_isspace(*c)) && (*c != '\0')) { - *c = apr_tolower(*c); /* find end of tag, lowercasing as we go... */ - c++; + *tag = ctx->curr_tag_pos; + if (!**tag) { + *tag = NULL; + /* finitio */ + ctx->curr_tag_pos = ctx->combined_tag + ctx->tag_length; + return; } - if ((*c == '\0') || (**tag == '=')) { - if ((**tag == '\0') || (**tag == '=')) { - *tag = NULL; - } - ctx->curr_tag_pos = c; - return; /* We have found the end of the buffer. */ - } /* We might have a tag, but definitely no value. */ - - if (*c == '=') { - *c++ = '\0'; /* Overwrite the '=' with a terminating byte after tag. */ - } - else { /* Try skipping WS to find the '='. */ - *c++ = '\0'; /* Terminate the tag... */ - SKIP_TAG_WHITESPACE(c); - - /* There needs to be an equal sign if there's a value. */ - if (*c != '=') { - ctx->curr_tag_pos = c; - return; /* There apparently was no value. */ - } - else { - c++; /* Skip the equals sign. */ - } + *tag_val = ap_strchr(*tag, '='); + if (!*tag_val) { + ctx->curr_tag_pos = ctx->combined_tag + ctx->tag_length; + return; } - SKIP_TAG_WHITESPACE(c); - if (*c == '"' || *c == '\'' || *c == '`') { - /* Allow quoted values for space inclusion. - * NOTE: This does not pass the quotes on return. - */ - term = *c++; - } - - *tag_val = c; - if (!term) { - while (!apr_isspace(*c) && (*c != '\0')) { - c++; - } + /* if it starts with '=' there was no tag name, just a value */ + if (*tag_val == *tag) { + *tag = NULL; } - else { - while ((*c != term) && (*c != '\0') && (*c != '\\')) { - /* Quickly scan past the string until we reach - * either the end of the tag or a backslash. If - * we find a backslash, we have to switch to the - * more complicated parser loop that follows. - */ - c++; - } - if (*c == '\\') { - do { - /* Accept \" (or ' or `) as valid quotation of string. - */ - if (*c == '\\') { - /* Overwrite the "\" during the embedded - * escape sequence of '"'. "\'" or '`'. - * Shift bytes from here to next delimiter. - */ - c++; - if (*c == term) { - shift_val++; - } - if (shift_val > 0) { - *(c-shift_val) = *c; - } - if (*c == '\0') { - break; - } - } - c++; - if (shift_val > 0) { - *(c-shift_val) = *c; - } - } while ((*c != term) && (*c != '\0')); - } - } - - *(c-shift_val) = '\0'; /* Overwrites delimiter (term or WS) with NULL. */ - ctx->curr_tag_pos = ++c; + *(*tag_val)++ = '\0'; + ctx->curr_tag_pos = *tag_val + strlen(*tag_val) + 1; /* skip \0 byte */ + if (dodecode) { decodehtml(*tag_val); } @@ -1354,6 +885,7 @@ static int handle_include(include_ctx_t *ctx, apr_bucket_brigade **bb, "unknown parameter \"%s\" to tag include in %s", tag, r->filename); CREATE_ERROR_BUCKET(ctx, tmp_buck, head_ptr, *inserted_head); + return 1; } } } @@ -1432,6 +964,7 @@ static int handle_echo(include_ctx_t *ctx, apr_bucket_brigade **bb, "tag echo in %s", tag_val, r->filename); CREATE_ERROR_BUCKET(ctx, tmp_buck, head_ptr, *inserted_head); + return 1; } } else { @@ -1439,6 +972,7 @@ static int handle_echo(include_ctx_t *ctx, apr_bucket_brigade **bb, "unknown parameter \"%s\" in tag echo of %s", tag, r->filename); CREATE_ERROR_BUCKET(ctx, tmp_buck, head_ptr, *inserted_head); + return 1; } } @@ -1514,6 +1048,7 @@ static int handle_config(include_ctx_t *ctx, apr_bucket_brigade **bb, "unknown parameter \"%s\" to tag config in %s", tag, r->filename); CREATE_ERROR_BUCKET(ctx, tmp_buck, head_ptr, *inserted_head); + return 1; } } } @@ -1663,6 +1198,7 @@ static int handle_fsize(include_ctx_t *ctx, apr_bucket_brigade **bb, else { CREATE_ERROR_BUCKET(ctx, tmp_buck, head_ptr, *inserted_head); + return 1; } } } @@ -1712,6 +1248,7 @@ static int handle_flastmod(include_ctx_t *ctx, apr_bucket_brigade **bb, else { CREATE_ERROR_BUCKET(ctx, tmp_buck, head_ptr, *inserted_head); + return 1; } } } @@ -2642,6 +2179,7 @@ static int handle_if(include_ctx_t *ctx, apr_bucket_brigade **bb, "unknown parameter \"%s\" to tag if in %s", tag, r->filename); CREATE_ERROR_BUCKET(ctx, tmp_buck, head_ptr, *inserted_head); + return 1; } } @@ -2678,7 +2216,7 @@ static int handle_elif(include_ctx_t *ctx, apr_bucket_brigade **bb, r->filename); CREATE_ERROR_BUCKET(ctx, tmp_buck, head_ptr, *inserted_head); - return (1); + return 1; } expr_ret = parse_expr(r, ctx, expr, &was_error, &was_unmatched, debug_buf); @@ -2725,6 +2263,7 @@ static int handle_elif(include_ctx_t *ctx, apr_bucket_brigade **bb, "unknown parameter \"%s\" to tag if in %s", tag, r->filename); CREATE_ERROR_BUCKET(ctx, tmp_buck, head_ptr, *inserted_head); + return 1; } } } @@ -2914,362 +2453,1032 @@ static int handle_printenv(include_ctx_t *ctx, apr_bucket_brigade **bb, /* -------------------------- The main function --------------------------- */ -static apr_status_t send_parsed_content(apr_bucket_brigade **bb, - request_rec *r, ap_filter_t *f) +/* + * returns the index position of the first byte of start_seq (or the len of + * the buffer as non-match) + */ +static apr_size_t find_start_sequence(ssi_ctx_t *ctx, const char *data, + apr_size_t len) { - include_ctx_t *ctx = f->ctx; - apr_bucket *dptr = APR_BRIGADE_FIRST(*bb); - apr_bucket *tmp_dptr; - apr_bucket_brigade *tag_and_after; - apr_status_t rv = APR_SUCCESS; + apr_size_t slen = ctx->ctx->start_seq_len; + apr_size_t index; + const char *p, *ep; - if (r->args) { /* add QUERY stuff to env cause it ain't yet */ - char *arg_copy = apr_pstrdup(r->pool, r->args); + if (len < slen) { + p = data; /* try partial match at the end of the buffer (below) */ + } + else { + /* try fast bndm search over the buffer + * (hopefully the whole start sequence can be found in this buffer) + */ + index = bndm(ctx->ctx->start_seq, ctx->ctx->start_seq_len, data, len, + ctx->ctx->start_seq_pat); - apr_table_setn(r->subprocess_env, "QUERY_STRING", r->args); - ap_unescape_url(arg_copy); - apr_table_setn(r->subprocess_env, "QUERY_STRING_UNESCAPED", - ap_escape_shell_cmd(r->pool, arg_copy)); + /* wow, found it. ready. */ + if (index < len) { + ctx->state = PARSE_DIRECTIVE; + return index; + } + else { + /* ok, the pattern can't be found as whole in the buffer, + * check the end for a partial match + */ + p = data + len - slen + 1; + } } - while (dptr != APR_BRIGADE_SENTINEL(*bb) && !APR_BUCKET_IS_EOS(dptr)) { - /* State to check for the STARTING_SEQUENCE. */ - if ((ctx->state == PRE_HEAD) || (ctx->state == PARSE_HEAD)) { - int do_cleanup = 0; - apr_size_t cleanup_bytes = ctx->parse_pos; + ep = data + len; + do { + while (p < ep && *p != *ctx->ctx->start_seq) { + ++p; + } - tmp_dptr = find_start_sequence(dptr, ctx, *bb, &do_cleanup); - if (!APR_STATUS_IS_SUCCESS(ctx->status)) { - return ctx->status; - } + index = p - data; - /* The few bytes stored in the ssi_tag_brigade turned out not to - * be a tag after all. This can only happen if the starting - * tag actually spans brigades. This should be very rare. - */ - if ((do_cleanup) && (!APR_BRIGADE_EMPTY(ctx->ssi_tag_brigade))) { - apr_bucket *tmp_bkt; + /* found a possible start_seq start */ + if (p < ep) { + apr_size_t pos = 1; - tmp_bkt = apr_bucket_immortal_create(ctx->start_seq, - cleanup_bytes, - r->connection->bucket_alloc); - APR_BRIGADE_INSERT_HEAD(*bb, tmp_bkt); - apr_brigade_cleanup(ctx->ssi_tag_brigade); + ++p; + while (p < ep && *p == ctx->ctx->start_seq[pos]) { + ++p; + ++pos; } - /* If I am inside a conditional (if, elif, else) that is false - * then I need to throw away anything contained in it. - */ - if ((!(ctx->flags & FLAG_PRINTING)) && - (dptr != APR_BRIGADE_SENTINEL(*bb))) { - apr_bucket *stop = (!tmp_dptr && ctx->state == PARSE_HEAD) - ? ctx->head_start_bucket - : tmp_dptr; - - while ((dptr != APR_BRIGADE_SENTINEL(*bb)) && (dptr != stop)) { - apr_bucket *free_bucket = dptr; - - dptr = APR_BUCKET_NEXT(dptr); - if (!APR_BUCKET_IS_METADATA(free_bucket)) { - apr_bucket_delete(free_bucket); - } - } + /* partial match found. Store the info for the next round */ + if (p == ep) { + ctx->state = PARSE_HEAD; + ctx->ctx->parse_pos = pos; + return index; } + } - /* Adjust the current bucket position based on what was found... */ - if ((tmp_dptr != NULL) && (ctx->state == PARSE_DIRECTIVE)) { - if (ctx->tag_start_bucket != NULL) { - dptr = ctx->tag_start_bucket; - } - else { - dptr = APR_BRIGADE_SENTINEL(*bb); - } - } - else if ((tmp_dptr != NULL) && - (ctx->output_now || - (ctx->bytes_parsed >= BYTE_COUNT_THRESHOLD))) { - /* Send the large chunk of pre-tag bytes... */ - tag_and_after = apr_brigade_split(*bb, tmp_dptr); - if (ctx->output_flush) { - APR_BRIGADE_INSERT_TAIL(*bb, apr_bucket_flush_create((*bb)->bucket_alloc)); - } + /* we must try all combinations; consider (e.g.) SSIStartTag "--->" + * and a string data of "--.-" and the end of the buffer + */ + p = data + index + 1; + } while (p < ep); - rv = ap_pass_brigade(f->next, *bb); - if (rv != APR_SUCCESS) { - return rv; - } - *bb = tag_and_after; - dptr = tmp_dptr; - ctx->output_flush = 0; - ctx->bytes_parsed = 0; - ctx->output_now = 0; - } - else if (tmp_dptr == NULL) { - /* There was no possible SSI tag in the - * remainder of this brigade... */ - dptr = APR_BRIGADE_SENTINEL(*bb); - } - } + /* no match */ + return len; +} - /* State to check for the ENDING_SEQUENCE. */ - if (((ctx->state == PARSE_DIRECTIVE) || - (ctx->state == PARSE_TAG) || - (ctx->state == PARSE_TAIL)) && - (dptr != APR_BRIGADE_SENTINEL(*bb))) { - tmp_dptr = find_end_sequence(dptr, ctx, *bb); - if (!APR_STATUS_IS_SUCCESS(ctx->status)) { - return ctx->status; - } +/* + * returns the first byte *after* the partial (or final) match. + * + * If we had to trick with the start_seq start, 'release' returns the + * number of chars of the start_seq which appeared not to be part of a + * full tag and may have to be passed down the filter chain. + */ +static apr_size_t find_partial_start_sequence(ssi_ctx_t *ctx, + const char *data, + apr_size_t len, + apr_size_t *release) +{ + apr_size_t pos, spos = 0; + apr_size_t slen = ctx->ctx->start_seq_len; + const char *p, *ep; - if (tmp_dptr != NULL) { - dptr = tmp_dptr; /* Adjust bucket pos... */ - - /* If some of the tag has already been set aside then set - * aside remainder of tag. Now the full tag is in - * ssi_tag_brigade. - * If none has yet been set aside, then leave it all where it - * is. - * In any event after this the entire set of tag buckets will - * be in one place or another. - */ - if (!APR_BRIGADE_EMPTY(ctx->ssi_tag_brigade)) { - tag_and_after = apr_brigade_split(*bb, dptr); - APR_BRIGADE_CONCAT(ctx->ssi_tag_brigade, *bb); - *bb = tag_and_after; - } - else if (ctx->output_now || - (ctx->bytes_parsed >= BYTE_COUNT_THRESHOLD)) { - SPLIT_AND_PASS_PRETAG_BUCKETS(*bb, ctx, f->next, rv); - if (rv != APR_SUCCESS) { - return rv; - } - ctx->output_flush = 0; - ctx->output_now = 0; - } - } - else { - /* remainder of this brigade... */ - dptr = APR_BRIGADE_SENTINEL(*bb); - } + pos = ctx->ctx->parse_pos; + ep = data + len; + *release = 0; + + do { + p = data; + + while (p < ep && pos < slen && *p == ctx->ctx->start_seq[pos]) { + ++p; + ++pos; } - /* State to processed the directive... */ - if (ctx->state == PARSED) { - apr_bucket *content_head = NULL, *tmp_bkt; - apr_size_t tmp_i; - char tmp_buf[TMP_BUF_SIZE]; - int (*handle_func)(include_ctx_t *, apr_bucket_brigade **, - request_rec *, ap_filter_t *, apr_bucket *, - apr_bucket **); + /* full match */ + if (pos == slen) { + ctx->state = PARSE_DIRECTIVE; + return (p - data); + } - /* By now the full tag (all buckets) should either be set aside into - * ssi_tag_brigade or contained within the current bb. All tag - * processing from here on can assume that. - */ + /* the whole buffer is a partial match */ + if (p == ep) { + ctx->ctx->parse_pos = pos; + return (p - data); + } - /* At this point, everything between ctx->head_start_bucket and - * ctx->tail_start_bucket is an SSI - * directive, we just have to deal with it now. - */ - if (get_combined_directive(ctx, r, *bb, tmp_buf, - TMP_BUF_SIZE) != APR_SUCCESS) { - ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, - "mod_include: error copying directive in %s", - r->filename); - CREATE_ERROR_BUCKET(ctx, tmp_bkt, dptr, content_head); + /* No match so far, but again: + * We must try all combinations, since the start_seq is a random + * user supplied string + * + * So: look if the first char of start_seq appears somewhere within + * the current partial match. If it does, try to start a match that + * begins with this offset. (This can happen, if a strange + * start_seq like "---->" spans buffers) + */ + if (spos < ctx->ctx->parse_pos) { + do { + ++spos; + ++*release; + p = ctx->ctx->start_seq + spos; + pos = ctx->ctx->parse_pos - spos; - /* DO CLEANUP HERE!!!!! */ - tmp_dptr = ctx->head_start_bucket; - if (!APR_BRIGADE_EMPTY(ctx->ssi_tag_brigade)) { - apr_brigade_cleanup(ctx->ssi_tag_brigade); - } - else { - do { - tmp_bkt = tmp_dptr; - tmp_dptr = APR_BUCKET_NEXT (tmp_dptr); - apr_bucket_delete(tmp_bkt); - } while ((tmp_dptr != dptr) && - (tmp_dptr != APR_BRIGADE_SENTINEL(*bb))); + while (pos && *p != *ctx->ctx->start_seq) { + ++p; + ++spos; + ++*release; + --pos; } - return APR_SUCCESS; - } - - /* Can't destroy the tag buckets until I'm done processing - * because the combined_tag might just be pointing to - * the contents of a single bucket! - */ + /* if a matching beginning char was found, try to match the + * remainder of the old buffer. + */ + if (pos > 1) { + apr_size_t t = 1; - /* Retrieve the handler function to be called for this directive - * from the functions registered in the hash table. - * Need to lower case the directive for proper matching. Also need - * to have it NULL terminated for proper hash matching. - */ - for (tmp_i = 0; tmp_i < ctx->directive_length; tmp_i++) { - ctx->combined_tag[tmp_i] = - apr_tolower(ctx->combined_tag[tmp_i]); - } - ctx->combined_tag[ctx->directive_length] = '\0'; - ctx->curr_tag_pos = &ctx->combined_tag[ctx->directive_length+1]; + ++p; + while (t < pos && *p == ctx->ctx->start_seq[t]) { + ++p; + ++t; + } - handle_func = - (include_handler_fn_t *)apr_hash_get(include_hash, - ctx->combined_tag, - ctx->directive_length); - if (handle_func != NULL) { - rv = (*handle_func)(ctx, bb, r, f, dptr, &content_head); - if ((rv != 0) && (rv != 1)) { - return (rv); + if (t == pos) { + /* yeah, another partial match found in the *old* + * buffer, now test the *current* buffer for + * continuing match + */ + break; + } } + } while (pos > 1); + + if (pos) { + continue; } - else { - ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, - "unknown directive \"%s\" in parsed doc %s", - ctx->combined_tag, r->filename); - CREATE_ERROR_BUCKET(ctx, tmp_bkt, dptr, content_head); - } - - /* This chunk of code starts at the first bucket in the chain - * of tag buckets (assuming that by this point the bucket for - * the STARTING_SEQUENCE has been split) and loops through to - * the end of the tag buckets freeing them all. - * - * Remember that some part of this may have been set aside - * into the ssi_tag_brigade and the remainder (possibly as - * little as one byte) will be in the current brigade. - * - * The value of dptr should have been set during the - * PARSE_TAIL state to the first bucket after the - * ENDING_SEQUENCE. - * - * The value of content_head may have been set during processing - * of the directive. If so, the content was inserted in front - * of the dptr bucket. The inserted buckets should not be thrown - * away here, but they should also not be parsed later. + } + + break; + } while (1); /* work hard to find a match ;-) */ + + /* no match at all, release all (wrongly) matched chars so far */ + *release = ctx->ctx->parse_pos; + ctx->state = PARSE_PRE_HEAD; + return 0; +} + +/* + * returns the position after the directive + */ +static apr_size_t find_directive(ssi_ctx_t *ctx, const char *data, + apr_size_t len, char ***store, + apr_size_t **store_len) +{ + const char *p = data; + const char *ep = data + len; + apr_size_t pos; + + switch (ctx->state) { + case PARSE_DIRECTIVE: + while (p < ep && !apr_isspace(*p)) { + /* we have to consider the case of missing space between directive + * and end_seq (be somewhat lenient), e.g. */ - if (content_head == NULL) { - content_head = dptr; + if (*p == *ctx->ctx->end_seq) { + ctx->state = PARSE_DIRECTIVE_TAIL; + ctx->ctx->parse_pos = 1; + ++p; + return (p - data); } - tmp_dptr = ctx->head_start_bucket; - if (!APR_BRIGADE_EMPTY(ctx->ssi_tag_brigade)) { - apr_brigade_cleanup(ctx->ssi_tag_brigade); + ++p; + } + + if (p < ep) { /* found delimiter whitespace */ + ctx->state = PARSE_DIRECTIVE_POSTNAME; + *store = &ctx->directive; + *store_len = &ctx->ctx->directive_length; + } + + break; + + case PARSE_DIRECTIVE_TAIL: + pos = ctx->ctx->parse_pos; + + while (p < ep && pos < ctx->end_seq_len && + *p == ctx->ctx->end_seq[pos]) { + ++p; + ++pos; + } + + /* full match, we're done */ + if (pos == ctx->end_seq_len) { + ctx->state = PARSE_DIRECTIVE_POSTTAIL; + *store = &ctx->directive; + *store_len = &ctx->ctx->directive_length; + break; + } + + /* partial match, the buffer is too small to match fully */ + if (p == ep) { + ctx->ctx->parse_pos = pos; + break; + } + + /* no match. continue normal parsing */ + ctx->state = PARSE_DIRECTIVE; + return 0; + + case PARSE_DIRECTIVE_POSTTAIL: + ctx->state = PARSE_EXECUTE; + ctx->ctx->directive_length -= ctx->end_seq_len; + /* continue immediately with the next state */ + + case PARSE_DIRECTIVE_POSTNAME: + if (PARSE_DIRECTIVE_POSTNAME == ctx->state) { + ctx->state = PARSE_PRE_ARG; + } + ctx->argc = 0; + ctx->argv = NULL; + + if (!ctx->ctx->directive_length) { + ctx->error = 1; + ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, ctx->r, "missing directive " + "name in parsed document %s", ctx->r->filename); + } + else { + char *sp = ctx->directive; + char *sep = ctx->directive + ctx->ctx->directive_length; + + /* normalize directive name */ + for (; sp < sep; ++sp) { + *sp = apr_tolower(*sp); } - else { - do { - tmp_bkt = tmp_dptr; - tmp_dptr = APR_BUCKET_NEXT (tmp_dptr); - apr_bucket_delete(tmp_bkt); - } while ((tmp_dptr != content_head) && - (tmp_dptr != APR_BRIGADE_SENTINEL(*bb))); + } + + return 0; + + default: + /* get a rid of a gcc warning about unhandled enumerations */ + break; + } + + return (p - data); +} + +/* + * find out whether the next token is (a possible) end_seq or an argument + */ +static apr_size_t find_arg_or_tail(ssi_ctx_t *ctx, const char *data, + apr_size_t len) +{ + const char *p = data; + const char *ep = data + len; + + /* skip leading WS */ + while (p < ep && apr_isspace(*p)) { + ++p; + } + + /* buffer doesn't consist of whitespaces only */ + if (p < ep) { + ctx->state = (*p == *ctx->ctx->end_seq) ? PARSE_TAIL : PARSE_ARG; + } + + return (p - data); +} + +/* + * test the stream for end_seq. If it doesn't match at all, it must be an + * argument + */ +static apr_size_t find_tail(ssi_ctx_t *ctx, const char *data, + apr_size_t len) +{ + const char *p = data; + const char *ep = data + len; + apr_size_t pos = ctx->ctx->parse_pos; + + if (PARSE_TAIL == ctx->state) { + ctx->state = PARSE_TAIL_SEQ; + pos = ctx->ctx->parse_pos = 0; + } + + while (p < ep && pos < ctx->end_seq_len && *p == ctx->ctx->end_seq[pos]) { + ++p; + ++pos; + } + + /* bingo, full match */ + if (pos == ctx->end_seq_len) { + ctx->state = PARSE_EXECUTE; + return (p - data); + } + + /* partial match, the buffer is too small to match fully */ + if (p == ep) { + ctx->ctx->parse_pos = pos; + return (p - data); + } + + /* no match. It must be an argument string then */ + ctx->state = PARSE_ARG; + return 0; +} + +/* + * extract name=value from the buffer + * A pcre-pattern could look (similar to): + * name\s*(?:=\s*(["'`]?)value\1(?>\s*))? + */ +static apr_size_t find_argument(ssi_ctx_t *ctx, const char *data, + apr_size_t len, char ***store, + apr_size_t **store_len) +{ + const char *p = data; + const char *ep = data + len; + + switch (ctx->state) { + case PARSE_ARG: + /* + * create argument structure and append it to the current list + */ + ctx->current_arg = apr_palloc(ctx->dpool, + sizeof(*ctx->current_arg)); + ctx->current_arg->next = NULL; + + ++(ctx->argc); + if (!ctx->argv) { + ctx->argv = ctx->current_arg; + } + else { + ssi_arg_item_t *newarg = ctx->argv; + + while (newarg->next) { + newarg = newarg->next; } - if (ctx->combined_tag == tmp_buf) { - ctx->combined_tag = NULL; + newarg->next = ctx->current_arg; + } + + /* check whether it's a valid one. If it begins with a quote, we + * can safely assume, someone forgot the name of the argument + */ + switch (*p) { + case '"': case '\'': case '`': + *store = NULL; + + ctx->state = PARSE_ARG_VAL; + ctx->quote = *p++; + ctx->current_arg->name = NULL; + ctx->current_arg->name_len = 0; + ctx->error = 1; + + ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, ctx->r, "missing argument " + "name for value to tag %s in %s", + apr_pstrmemdup(ctx->r->pool, ctx->directive, + ctx->ctx->directive_length), + ctx->r->filename); + + return (p - data); + + default: + ctx->state = PARSE_ARG_NAME; + } + /* continue immediately with next state */ + + case PARSE_ARG_NAME: + while (p < ep && !apr_isspace(*p) && *p != '=') { + ++p; + } + + if (p < ep) { + ctx->state = PARSE_ARG_POSTNAME; + *store = &ctx->current_arg->name; + *store_len = &ctx->current_arg->name_len; + return (p - data); + } + break; + + case PARSE_ARG_POSTNAME: + ctx->current_arg->name = apr_pstrmemdup(ctx->dpool, + ctx->current_arg->name, + ctx->current_arg->name_len); + if (!ctx->current_arg->name_len) { + ctx->error = 1; + ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, ctx->r, "missing argument " + "name for value to tag %s in %s", + apr_pstrmemdup(ctx->r->pool, ctx->directive, + ctx->ctx->directive_length), + ctx->r->filename); + } + else { + char *sp = ctx->current_arg->name; + + /* normalize the name */ + while (*sp) { + *sp = apr_tolower(*sp); + ++sp; } + } - /* Don't reset the flags or the nesting level!!! */ - ctx->parse_pos = 0; - ctx->head_start_bucket = NULL; - ctx->head_start_index = 0; - ctx->tag_start_bucket = NULL; - ctx->tag_start_index = 0; - ctx->tail_start_bucket = NULL; - ctx->tail_start_index = 0; - ctx->curr_tag_pos = NULL; - ctx->tag_length = 0; - ctx->directive_length = 0; + ctx->state = PARSE_ARG_EQ; + /* continue with next state immediately */ - if (!APR_BRIGADE_EMPTY(ctx->ssi_tag_brigade)) { - apr_brigade_cleanup(ctx->ssi_tag_brigade); + case PARSE_ARG_EQ: + *store = NULL; + + while (p < ep && apr_isspace(*p)) { + ++p; + } + + if (p < ep) { + if (*p == '=') { + ctx->state = PARSE_ARG_PREVAL; + ++p; + } + else { /* no value */ + ctx->current_arg->value = NULL; + ctx->state = PARSE_PRE_ARG; } - ctx->state = PRE_HEAD; + return (p - data); + } + break; + + case PARSE_ARG_PREVAL: + *store = NULL; + + while (p < ep && apr_isspace(*p)) { + ++p; + } + + /* buffer doesn't consist of whitespaces only */ + if (p < ep) { + ctx->state = PARSE_ARG_VAL; + switch (*p) { + case '"': case '\'': case '`': + ctx->quote = *p++; + break; + default: + ctx->quote = '\0'; + break; + } + + return (p - data); + } + break; + + case PARSE_ARG_VAL_ESC: + if (*p == ctx->quote) { + ++p; + } + ctx->state = PARSE_ARG_VAL; + /* continue with next state immediately */ + + case PARSE_ARG_VAL: + for (; p < ep; ++p) { + if (ctx->quote && *p == '\\') { + ++p; + if (p == ep) { + ctx->state = PARSE_ARG_VAL_ESC; + break; + } + + if (*p != ctx->quote) { + --p; + } + } + else if (ctx->quote && *p == ctx->quote) { + ++p; + *store = &ctx->current_arg->value; + *store_len = &ctx->current_arg->value_len; + ctx->state = PARSE_ARG_POSTVAL; + break; + } + else if (!ctx->quote && apr_isspace(*p)) { + ++p; + *store = &ctx->current_arg->value; + *store_len = &ctx->current_arg->value_len; + ctx->state = PARSE_ARG_POSTVAL; + break; + } } - } - /* We have nothing more to send, stop now. */ - if (dptr != APR_BRIGADE_SENTINEL(*bb) && - APR_BUCKET_IS_EOS(dptr)) { - /* We might have something saved that we never completed, but send - * down unparsed. This allows for as last + * or only string within the stream). This state, however, + * just cleans up and turns itself to PARSE_EXECUTE, which + * will be passed through within the next (and actually + * last) round. + */ + if (PARSE_EXECUTE == ctx->state || + PARSE_DIRECTIVE_POSTTAIL == ctx->state) { + APR_BUCKET_INSERT_BEFORE(newb, b); + } + else { + break; /* END OF STREAM */ + } + } + else { + APR_BRIGADE_INSERT_TAIL(pass_bb, b); + + if (APR_BUCKET_IS_FLUSH(b)) { + ctx->ctx->output_now = 1; + } + + b = newb; + continue; } - ctx->bytes_parsed = 0; } - } - else if (ctx->state == PARSED) { /* Invalid internal condition... */ - apr_bucket *content_head = NULL, *tmp_bkt; - ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, - "Invalid mod_include state during file %s", r->filename); - CREATE_ERROR_BUCKET(ctx, tmp_bkt, APR_BRIGADE_FIRST(*bb), content_head); - } - else { /* Entire brigade is middle chunk of SSI tag... */ - if (!APR_BRIGADE_EMPTY(ctx->ssi_tag_brigade)) { - APR_BRIGADE_CONCAT(ctx->ssi_tag_brigade, *bb); - } - else { /* End of brigade contains part of SSI tag... */ - apr_bucket *last; - if (ctx->head_start_index > 0) { - apr_bucket_split(ctx->head_start_bucket, ctx->head_start_index); - ctx->head_start_bucket = - APR_BUCKET_NEXT(ctx->head_start_bucket); - ctx->head_start_index = 0; - } - /* Set aside tag, pass pre-tag... */ - tag_and_after = apr_brigade_split(*bb, ctx->head_start_bucket); - rv = ap_pass_brigade(f->next, *bb); - if (rv != APR_SUCCESS) { + + /* enough is enough ... */ + if (ctx->ctx->output_now || + ctx->ctx->bytes_parsed > AP_MIN_BYTES_TO_WRITE) { + + if (!APR_BRIGADE_EMPTY(pass_bb)) { + rv = ap_pass_brigade(f->next, pass_bb); + if (!APR_STATUS_IS_SUCCESS(rv)) { + apr_brigade_destroy(pass_bb); + return rv; + } + } + + ctx->ctx->output_now = 0; + ctx->ctx->bytes_parsed = 0; + } + + /* read the current bucket data */ + len = 0; + if (!ctx->seen_eos) { + if (ctx->ctx->bytes_parsed > 0) { + rv = apr_bucket_read(b, &data, &len, APR_NONBLOCK_READ); + if (APR_STATUS_IS_EAGAIN(rv)) { + ctx->ctx->output_now = 1; + continue; + } + } + + if (!len || !APR_STATUS_IS_SUCCESS(rv)) { + rv = apr_bucket_read(b, &data, &len, APR_BLOCK_READ); + } + + if (!APR_STATUS_IS_SUCCESS(rv)) { + apr_brigade_destroy(pass_bb); return rv; } - - /* Set aside the partial tag - * Exception: if there's an EOS at the end of this brigade, - * the tag will never be completed, so send an error and EOS - */ - last = APR_BRIGADE_LAST(tag_and_after); - if (APR_BUCKET_IS_EOS(last)) { - /* Remove everything before the EOS (i.e., the partial tag) - * and replace it with an error msg */ - apr_bucket *b; - apr_bucket *err_bucket = NULL; - for (b = APR_BRIGADE_FIRST(tag_and_after); - !APR_BUCKET_IS_EOS(b); - b = APR_BRIGADE_FIRST(tag_and_after)) { + + ctx->ctx->bytes_parsed += len; + } + + /* zero length bucket, fetch next one */ + if (!len && !ctx->seen_eos) { + b = APR_BUCKET_NEXT(b); + continue; + } + + /* + * it's actually a data containing bucket, start/continue parsing + */ + + switch (ctx->state) { + /* no current tag; search for start sequence */ + case PARSE_PRE_HEAD: + index = find_start_sequence(ctx, data, len); + + if (index < len) { + apr_bucket_split(b, index); + } + + newb = APR_BUCKET_NEXT(b); + if (ctx->ctx->flags & FLAG_PRINTING) { + APR_BUCKET_REMOVE(b); + APR_BRIGADE_INSERT_TAIL(pass_bb, b); + } + else { + apr_bucket_delete(b); + } + + if (index < len) { + /* now delete the start_seq stuff from the remaining bucket */ + if (PARSE_DIRECTIVE == ctx->state) { /* full match */ + apr_bucket_split(newb, ctx->ctx->start_seq_len); + ctx->ctx->output_now = 1; /* pass pre-tag stuff */ + } + + b = APR_BUCKET_NEXT(newb); + apr_bucket_delete(newb); + } + else { + b = newb; + } + + break; + + /* we're currently looking for the end of the start sequence */ + case PARSE_HEAD: + index = find_partial_start_sequence(ctx, data, len, &release); + + /* check if we mismatched earlier and have to release some chars */ + if (release && (ctx->ctx->flags & FLAG_PRINTING)) { + char *to_release = apr_palloc(ctx->ctx->pool, release); + + memcpy(to_release, ctx->ctx->start_seq, release); + newb = apr_bucket_pool_create(to_release, release, + ctx->ctx->pool, + f->c->bucket_alloc); + APR_BRIGADE_INSERT_TAIL(pass_bb, newb); + } + + if (index) { /* any match */ + /* now delete the start_seq stuff from the remaining bucket */ + if (PARSE_DIRECTIVE == ctx->state) { /* final match */ + apr_bucket_split(b, index); + ctx->ctx->output_now = 1; /* pass pre-tag stuff */ + } + newb = APR_BUCKET_NEXT(b); + apr_bucket_delete(b); + b = newb; + } + + break; + + /* we're currently grabbing the directive name */ + case PARSE_DIRECTIVE: + case PARSE_DIRECTIVE_POSTNAME: + case PARSE_DIRECTIVE_TAIL: + case PARSE_DIRECTIVE_POSTTAIL: + index = find_directive(ctx, data, len, &store, &store_len); + + if (index) { + apr_bucket_split(b, index); + newb = APR_BUCKET_NEXT(b); + } + + if (store) { + if (index) { + APR_BUCKET_REMOVE(b); + APR_BRIGADE_INSERT_TAIL(ctx->tmp_bb, b); + b = newb; + } + + /* time for cleanup? */ + if (store != &magic) { + apr_brigade_pflatten(ctx->tmp_bb, store, store_len, + ctx->dpool); + apr_brigade_cleanup(ctx->tmp_bb); + } + } + else if (index) { + apr_bucket_delete(b); + b = newb; + } + + break; + + /* skip WS and find out what comes next (arg or end_seq) */ + case PARSE_PRE_ARG: + index = find_arg_or_tail(ctx, data, len); + + if (index) { /* skipped whitespaces */ + if (index < len) { + apr_bucket_split(b, index); + } + newb = APR_BUCKET_NEXT(b); + apr_bucket_delete(b); + b = newb; + } + + break; + + /* currently parsing name[=val] */ + case PARSE_ARG: + case PARSE_ARG_NAME: + case PARSE_ARG_POSTNAME: + case PARSE_ARG_EQ: + case PARSE_ARG_PREVAL: + case PARSE_ARG_VAL: + case PARSE_ARG_VAL_ESC: + case PARSE_ARG_POSTVAL: + index = find_argument(ctx, data, len, &store, &store_len); + + if (index) { + apr_bucket_split(b, index); + newb = APR_BUCKET_NEXT(b); + } + + if (store) { + if (index) { APR_BUCKET_REMOVE(b); - apr_bucket_destroy(b); + APR_BRIGADE_INSERT_TAIL(ctx->tmp_bb, b); + b = newb; + } + + /* time for cleanup? */ + if (store != &magic) { + apr_brigade_pflatten(ctx->tmp_bb, store, store_len, + ctx->dpool); + apr_brigade_cleanup(ctx->tmp_bb); + } + } + else if (index) { + apr_bucket_delete(b); + b = newb; + } + + break; + + /* try to match end_seq at current pos. */ + case PARSE_TAIL: + case PARSE_TAIL_SEQ: + index = find_tail(ctx, data, len); + + switch (ctx->state) { + case PARSE_EXECUTE: /* full match */ + apr_bucket_split(b, index); + newb = APR_BUCKET_NEXT(b); + apr_bucket_delete(b); + b = newb; + break; + + case PARSE_ARG: /* no match */ + /* PARSE_ARG must reparse at the beginning */ + APR_BRIGADE_PREPEND(bb, ctx->tmp_bb); + b = APR_BRIGADE_FIRST(bb); + break; + + default: /* partial match */ + newb = APR_BUCKET_NEXT(b); + APR_BUCKET_REMOVE(b); + APR_BRIGADE_INSERT_TAIL(ctx->tmp_bb, b); + b = newb; + break; + } + + break; + + /* now execute the parsed directive, cleanup the space and + * start again with PARSE_PRE_HEAD + */ + case PARSE_EXECUTE: + /* if there was an error, it was already logged; just stop here */ + if (ctx->error) { + if (ctx->ctx->flags & FLAG_PRINTING) { + SSI_CREATE_ERROR_BUCKET(ctx->ctx, f, pass_bb); + ctx->error = 0; } - CREATE_ERROR_BUCKET(ctx, err_bucket, b, err_bucket); - rv = ap_pass_brigade(f->next, tag_and_after); } else { - ap_save_brigade(f, &ctx->ssi_tag_brigade, - &tag_and_after, r->pool); + include_handler_fn_t *handle_func; + + handle_func = + (include_handler_fn_t *) apr_hash_get(include_hash, + ctx->directive, + ctx->ctx->directive_length); + if (handle_func) { + apr_bucket *dummy; + char *tag; + apr_size_t tag_len = 0; + ssi_arg_item_t *carg = ctx->argv; + + /* legacy wrapper code */ + while (carg) { + /* +1 \0 byte (either after tag or value) + * +1 = byte (before value) + */ + tag_len += (carg->name ? carg->name_len : 0) + + (carg->value ? carg->value_len + 1 : 0) + 1; + carg = carg->next; + } + + tag = ctx->ctx->combined_tag = ctx->ctx->curr_tag_pos = + apr_palloc(ctx->dpool, tag_len); + + carg = ctx->argv; + while (carg) { + if (carg->name) { + memcpy(tag, carg->name, carg->name_len); + tag += carg->name_len; + } + if (carg->value) { + *tag++ = '='; + memcpy(tag, carg->value, carg->value_len); + tag += carg->value_len; + } + *tag++ = '\0'; + carg = carg->next; + } + ctx->ctx->tag_length = tag_len; + + /* create dummy buckets for backards compat */ + ctx->ctx->head_start_bucket = + apr_bucket_pool_create(apr_pmemdup(ctx->ctx->pool, + ctx->ctx->start_seq, + ctx->ctx->start_seq_len), + ctx->ctx->start_seq_len, + ctx->ctx->pool, + f->c->bucket_alloc); + APR_BRIGADE_INSERT_TAIL(ctx->ctx->ssi_tag_brigade, + ctx->ctx->head_start_bucket); + ctx->ctx->tag_start_bucket = + apr_bucket_pool_create(apr_pmemdup(ctx->ctx->pool, + ctx->ctx->combined_tag, + ctx->ctx->tag_length), + ctx->ctx->tag_length, + ctx->ctx->pool, + f->c->bucket_alloc); + APR_BRIGADE_INSERT_TAIL(ctx->ctx->ssi_tag_brigade, + ctx->ctx->tag_start_bucket); + ctx->ctx->tail_start_bucket = + apr_bucket_pool_create(apr_pmemdup(ctx->ctx->pool, + ctx->ctx->end_seq, + ctx->end_seq_len), + ctx->end_seq_len, + ctx->ctx->pool, + f->c->bucket_alloc); + APR_BRIGADE_INSERT_TAIL(ctx->ctx->ssi_tag_brigade, + ctx->ctx->tail_start_bucket); + + rv = handle_func(ctx->ctx, &bb, r, f, b, &dummy); + + apr_brigade_cleanup(ctx->ctx->ssi_tag_brigade); + + if (rv != 0 && rv != 1 && rv != -1) { + apr_brigade_destroy(pass_bb); + return rv; + } + + if (dummy) { + apr_bucket_brigade *remain; + + remain = apr_brigade_split(bb, b); + APR_BRIGADE_CONCAT(pass_bb, bb); + bb = remain; + } + } + else { + ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, + "unknown directive \"%s\" in parsed doc %s", + apr_pstrmemdup(r->pool, ctx->directive, + ctx->ctx->directive_length), + r->filename); + if (ctx->ctx->flags & FLAG_PRINTING) { + SSI_CREATE_ERROR_BUCKET(ctx->ctx, f, pass_bb); + } + } } - if (rv != APR_SUCCESS) { - return rv; + + /* cleanup */ + apr_pool_clear(ctx->dpool); + apr_brigade_cleanup(ctx->tmp_bb); + + /* Oooof. Done here, start next round */ + ctx->state = PARSE_PRE_HEAD; + break; + } + + } /* while (brigade) */ + + /* End of stream. Final cleanup */ + if (ctx->seen_eos) { + if (PARSE_HEAD == ctx->state) { + if (ctx->ctx->flags & FLAG_PRINTING) { + char *to_release = apr_palloc(ctx->ctx->pool, + ctx->ctx->parse_pos); + + memcpy(to_release, ctx->ctx->start_seq, ctx->ctx->parse_pos); + APR_BRIGADE_INSERT_TAIL(pass_bb, + apr_bucket_pool_create(to_release, + ctx->ctx->parse_pos, ctx->ctx->pool, + f->c->bucket_alloc)); + } + } + else if (PARSE_PRE_HEAD != ctx->state) { + ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, + "SSI directive was not properly finished at the end " + "of parsed document %s", r->filename); + if (ctx->ctx->flags & FLAG_PRINTING) { + SSI_CREATE_ERROR_BUCKET(ctx->ctx, f, pass_bb); } - ctx->bytes_parsed = 0; } + + if (!(ctx->ctx->flags & FLAG_PRINTING)) { + ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, r, + "missing closing endif directive in parsed document" + " %s", r->filename); + } + + /* cleanup our temporary memory */ + apr_brigade_destroy(ctx->tmp_bb); + apr_pool_destroy(ctx->dpool); + + /* don't forget to finally insert the EOS bucket */ + APR_BRIGADE_INSERT_TAIL(pass_bb, b); } - return APR_SUCCESS; + + /* if something's left over, pass it along */ + if (!APR_BRIGADE_EMPTY(pass_bb)) { + rv = ap_pass_brigade(f->next, pass_bb); + } + else { + rv = APR_SUCCESS; + } + + apr_brigade_destroy(pass_bb); + return rv; } static void *create_includes_dir_config(apr_pool_t *p, char *dummy) @@ -3340,7 +3549,7 @@ static int includes_setup(ap_filter_t *f) * We don't know if we are going to be including a file or executing * a program - in either case a strong ETag header will likely be invalid. */ - apr_table_setn(f->r->notes, "no-etag", ""); + apr_table_setn(f->r->notes, "no-etag", ""); return OK; } @@ -3348,7 +3557,7 @@ static int includes_setup(ap_filter_t *f) static apr_status_t includes_filter(ap_filter_t *f, apr_bucket_brigade *b) { request_rec *r = f->r; - include_ctx_t *ctx = f->ctx; + ssi_ctx_t *ctx = f->ctx; request_rec *parent; include_dir_config *conf = (include_dir_config *)ap_get_module_config(r->per_dir_config, @@ -3366,26 +3575,47 @@ static apr_status_t includes_filter(ap_filter_t *f, apr_bucket_brigade *b) } if (!f->ctx) { - f->ctx = ctx = apr_pcalloc(f->c->pool, sizeof(*ctx)); - ctx->state = PRE_HEAD; - ctx->flags = (FLAG_PRINTING | FLAG_COND_TRUE); - if (ap_allow_options(r) & OPT_INCNOEXEC) { - ctx->flags |= FLAG_NO_EXEC; - } - ctx->ssi_tag_brigade = apr_brigade_create(f->c->pool, - f->c->bucket_alloc); - ctx->status = APR_SUCCESS; - - ctx->error_str = conf->default_error_msg; - ctx->time_str = conf->default_time_fmt; - ctx->pool = f->c->pool; - ctx->start_seq_pat = &sconf->start_seq_pat; - ctx->start_seq = sconf->default_start_tag; - ctx->start_seq_len = sconf->start_tag_len; - ctx->end_seq = sconf->default_end_tag; + /* create context for this filter */ + f->ctx = ctx = apr_palloc(f->c->pool, sizeof(*ctx)); + ctx->ctx = apr_pcalloc(f->c->pool, sizeof(*ctx->ctx)); + ctx->ctx->pool = f->r->pool; + apr_pool_create(&ctx->dpool, ctx->ctx->pool); + + /* configuration data */ + ctx->end_seq_len = strlen(sconf->default_end_tag); + ctx->r = f->r; + + /* runtime data */ + ctx->tmp_bb = apr_brigade_create(ctx->ctx->pool, f->c->bucket_alloc); + ctx->seen_eos = 0; + ctx->state = PARSE_PRE_HEAD; + ctx->ctx->flags = (FLAG_PRINTING | FLAG_COND_TRUE); + if (ap_allow_options(f->r) & OPT_INCNOEXEC) { + ctx->ctx->flags |= FLAG_NO_EXEC; + } + ctx->ctx->if_nesting_level = 0; + ctx->ctx->re_string = NULL; + ctx->ctx->error_str_override = NULL; + ctx->ctx->time_str_override = NULL; + + ctx->ctx->error_str = conf->default_error_msg; + ctx->ctx->time_str = conf->default_time_fmt; + ctx->ctx->start_seq_pat = &sconf->start_seq_pat; + ctx->ctx->start_seq = sconf->default_start_tag; + ctx->ctx->start_seq_len = sconf->start_tag_len; + ctx->ctx->end_seq = sconf->default_end_tag; + + /* legacy compat stuff */ + ctx->ctx->state = PARSED; /* dummy */ + ctx->ctx->ssi_tag_brigade = apr_brigade_create(f->c->pool, + f->c->bucket_alloc); + ctx->ctx->status = APR_SUCCESS; + ctx->ctx->head_start_index = 0; + ctx->ctx->tag_start_index = 0; + ctx->ctx->tail_start_index = 0; } else { - ctx->bytes_parsed = 0; + ctx->ctx->bytes_parsed = 0; } if ((parent = ap_get_module_config(r->request_config, &include_module))) { @@ -3434,7 +3664,17 @@ static apr_status_t includes_filter(ap_filter_t *f, apr_bucket_brigade *b) apr_table_unset(f->r->headers_out, "Last-Modified"); } - return send_parsed_content(&b, r, f); + /* add QUERY stuff to env cause it ain't yet */ + if (r->args) { + char *arg_copy = apr_pstrdup(r->pool, r->args); + + apr_table_setn(r->subprocess_env, "QUERY_STRING", r->args); + ap_unescape_url(arg_copy); + apr_table_setn(r->subprocess_env, "QUERY_STRING_UNESCAPED", + ap_escape_shell_cmd(r->pool, arg_copy)); + } + + return send_parsed_content(f, b); } static void ap_register_include_handler(char *tag, include_handler_fn_t *func) diff --git a/modules/filters/mod_include.h b/modules/filters/mod_include.h index 57483b1ab2d..e691804646d 100644 --- a/modules/filters/mod_include.h +++ b/modules/filters/mod_include.h @@ -131,7 +131,17 @@ * ssi_tag_brigade: The temporary brigade used by this filter to set aside * the buckets containing parts of the ssi tag and headers. */ -typedef enum {PRE_HEAD, PARSE_HEAD, PARSE_DIRECTIVE, PARSE_TAG, PARSE_TAIL, PARSED} states; + +/* I keep this stuff here, because of binary compat. It probably doesn't care, + * but who knows ...? + */ +#ifdef MOD_INCLUDE_REDESIGN +typedef enum {PRE_HEAD, BLOW_PARSE_HEAD, BLOW_PARSE_DIRECTIVE, PARSE_TAG, + BLOW_PARSE_TAIL, PARSED} states; +#else +typedef enum {PRE_HEAD, PARSE_HEAD, PARSE_DIRECTIVE, PARSE_TAG, PARSE_TAIL, + PARSED} states; +#endif /** forward referenced as it needs to be held on the context */ typedef struct bndm_t bndm_t;