From: Graham Leggett
Date: Sun, 19 Sep 2010 13:02:54 +0000 (+0000)
Subject: mod_include: Reinstate support for UTF-8 character sets by allowing a
X-Git-Tag: 2.3.9~487
X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=b9d81442c550fc022e685c12e55404878f1b8a5b;p=thirdparty%2Fapache%2Fhttpd.git
mod_include: Reinstate support for UTF-8 character sets by allowing a
variable being echoed or set to be decoded and then encoded as separate
steps. PR47686
git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/trunk@998651 13f79535-47bb-0310-9956-ffa450edef68
---
diff --git a/CHANGES b/CHANGES
index 4b78432f5fc..771c007f3e2 100644
--- a/CHANGES
+++ b/CHANGES
@@ -2,6 +2,10 @@
Changes with Apache 2.3.9
+ *) mod_include: Reinstate support for UTF-8 character sets by allowing a
+ variable being echoed or set to be decoded and then encoded as separate
+ steps. PR47686 [Graham Leggett]
+
*) mod_cache: Add a discrete commit_entity() provider function within the
mod_cache provider interface which is called to indicate to the
provider that caching is complete, giving the provider the opportunity
diff --git a/docs/manual/mod/mod_include.xml b/docs/manual/mod/mod_include.xml
index 1934435abc1..9c479f4460e 100644
--- a/docs/manual/mod/mod_include.xml
+++ b/docs/manual/mod/mod_include.xml
@@ -180,13 +180,32 @@
var
The value is the name of the variable to print.
+ decoding
+ Specifies whether Apache should strip an encoding from
+ the variable before processing the variable further. The default
+ is none, where no decoding will be done. If set to
+ url, then URL decoding (also known as %-encoding;
+ this is appropriate for use within URLs in links, etc.) will be
+ performed. If set to base64, base64 will be decoded,
+ and if set to entity, HTML entity encoding will be
+ stripped. Decoding is done prior to any further encoding on the
+ variable. Multiple encodings can be stripped by specifying more
+ than one comma separated encoding. The decoding setting will
+ remain in effect until the next decoding attribute is encountered,
+ or the element ends.
+
+ The decoding attribute must precede the
+ corresponding var attribute to be effective.
+
+
encoding
Specifies how Apache should encode special characters
contained in the variable before outputting them. If set
to none, no encoding will be done. If set to
url, then URL encoding (also known as %-encoding;
this is appropriate for use within URLs in links, etc.) will be
- performed. At the start of an echo element,
+ performed. If set to base64, base64 encoding will
+ be performed. At the start of an echo element,
the default is set to entity, resulting in entity
encoding (which is appropriate in the context of a block-level
HTML element, e.g. a paragraph of text). This can be
@@ -195,10 +214,7 @@
is encountered or the element ends, whichever comes first.
The encoding attribute must precede the
- corresponding var attribute to be effective, and
- only special characters as defined in the ISO-8859-1 character
- encoding will be encoded. This encoding process may not have the
- desired result if a different character encoding is in use.
+ corresponding var attribute to be effective.
In order to avoid cross-site scripting issues, you should
@@ -383,6 +399,35 @@
value
The value to give a variable.
+
+ decoding
+ Specifies whether Apache should strip an encoding from
+ the variable before processing the variable further. The default
+ is none, where no decoding will be done. If set to
+ url, base64 or entity,
+ URL decoding, base64 decoding or HTML entity decoding will be
+ performed respectively. More than one decoding can be specified
+ by separating with commas. The decoding setting will remain in
+ effect until the next decoding attribute is encountered, or the
+ element ends. The decoding attribute must
+ precede the corresponding var attribute to
+ be effective.
+
+
+ encoding
+ Specifies how Apache should encode special characters
+ contained in the variable before setting them. The default is
+ none, where no encoding will be done. If set to
+ url, base64 or entity,
+ URL encoding, base64 encoding or HTML entity encoding will be
+ performed respectively. More than one encoding can be specified
+ by separating with commas. The encoding setting will remain in
+ effect until the next encoding attribute is encountered, or the
+ element ends. The encoding attribute must
+ precede the corresponding var attribute
+ to be effective. Encodings are applied after all decodings have
+ been stripped.
+
Example
diff --git a/modules/filters/mod_include.c b/modules/filters/mod_include.c
index 6c12586961f..ba97f41fa89 100644
--- a/modules/filters/mod_include.c
+++ b/modules/filters/mod_include.c
@@ -1136,13 +1136,15 @@ static apr_status_t handle_include(include_ctx_t *ctx, ap_filter_t *f,
}
/*
- *
+ *
*/
static apr_status_t handle_echo(include_ctx_t *ctx, ap_filter_t *f,
apr_bucket_brigade *bb)
{
- enum {E_NONE, E_URL, E_ENTITY} encode;
+ const char *encoding = "entity", *decoding = "none";
request_rec *r = f->r;
+ int error = 0;
if (!ctx->argc) {
ap_log_rerror(APLOG_MARK,
@@ -1161,8 +1163,6 @@ static apr_status_t handle_echo(include_ctx_t *ctx, ap_filter_t *f,
return APR_SUCCESS;
}
- encode = E_ENTITY;
-
while (1) {
char *tag = NULL;
char *tag_val = NULL;
@@ -1182,17 +1182,69 @@ static apr_status_t handle_echo(include_ctx_t *ctx, ap_filter_t *f,
ctx);
if (val) {
- switch(encode) {
- case E_NONE:
- echo_text = val;
- break;
- case E_URL:
- echo_text = ap_escape_uri(ctx->dpool, val);
- break;
- case E_ENTITY:
- /* PR#25202: escape anything non-ascii here */
- echo_text = ap_escape_html2(ctx->dpool, val, 1);
- break;
+ char *last = NULL;
+ char *e, *d, *token;
+
+ echo_text = val;
+
+ d = apr_pstrdup(ctx->pool, decoding);
+ token = apr_strtok(d, ", \t", &last);
+
+ while(token) {
+ if (!strcasecmp(token, "none")) {
+ /* do nothing */
+ }
+ else if (!strcasecmp(token, "url")) {
+ char *buf = apr_pstrdup(ctx->pool, echo_text);
+ ap_unescape_url(buf);
+ echo_text = buf;
+ }
+ else if (!strcasecmp(token, "entity")) {
+ char *buf = apr_pstrdup(ctx->pool, echo_text);
+ decodehtml(buf);
+ echo_text = buf;
+ }
+ else if (!strcasecmp(token, "base64")) {
+ echo_text = ap_pbase64decode(ctx->dpool, echo_text);
+ }
+ else {
+ ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, "unknown value "
+ "\"%s\" to parameter \"decoding\" of tag echo in "
+ "%s", token, r->filename);
+ SSI_CREATE_ERROR_BUCKET(ctx, f, bb);
+ error = 1;
+ break;
+ }
+ token = apr_strtok(NULL, ", \t", &last);
+ }
+
+ e = apr_pstrdup(ctx->pool, encoding);
+ token = apr_strtok(e, ", \t", &last);
+
+ while(token) {
+ if (!strcasecmp(token, "none")) {
+ /* do nothing */
+ }
+ else if (!strcasecmp(token, "url")) {
+ echo_text = ap_escape_uri(ctx->dpool, echo_text);
+ }
+ else if (!strcasecmp(token, "entity")) {
+ echo_text = ap_escape_html2(ctx->dpool, echo_text, 0);
+ }
+ else if (!strcasecmp(token, "base64")) {
+ char *buf;
+ buf = ap_pbase64encode(ctx->dpool, (char *)echo_text);
+ echo_text = buf;
+ }
+ else {
+ ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, "unknown value "
+ "\"%s\" to parameter \"encoding\" of tag echo in "
+ "%s", token, r->filename);
+ SSI_CREATE_ERROR_BUCKET(ctx, f, bb);
+ error = 1;
+ break;
+ }
+ token = apr_strtok(NULL, ", \t", &last);
}
e_len = strlen(echo_text);
@@ -1202,27 +1254,19 @@ static apr_status_t handle_echo(include_ctx_t *ctx, ap_filter_t *f,
e_len = ctx->intern->undefined_echo_len;
}
+ if (error) {
+ break;
+ }
+
APR_BRIGADE_INSERT_TAIL(bb, apr_bucket_pool_create(
apr_pmemdup(ctx->pool, echo_text, e_len),
e_len, ctx->pool, f->c->bucket_alloc));
}
+ else if (!strcmp(tag, "decoding")) {
+ decoding = tag_val;
+ }
else if (!strcmp(tag, "encoding")) {
- if (!strcasecmp(tag_val, "none")) {
- encode = E_NONE;
- }
- else if (!strcasecmp(tag_val, "url")) {
- encode = E_URL;
- }
- else if (!strcasecmp(tag_val, "entity")) {
- encode = E_ENTITY;
- }
- else {
- ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, "unknown value "
- "\"%s\" to parameter \"encoding\" of tag echo in "
- "%s", tag_val, r->filename);
- SSI_CREATE_ERROR_BUCKET(ctx, f, bb);
- break;
- }
+ encoding = tag_val;
}
else {
ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, "unknown parameter "
@@ -1690,10 +1734,12 @@ static apr_status_t handle_endif(include_ctx_t *ctx, ap_filter_t *f,
static apr_status_t handle_set(include_ctx_t *ctx, ap_filter_t *f,
apr_bucket_brigade *bb)
{
+ const char *encoding = "none", *decoding = "none";
char *var = NULL;
request_rec *r = f->r;
request_rec *sub = r->main;
apr_pool_t *p = r->pool;
+ int error = 0;
if (ctx->argc < 2) {
ap_log_rerror(APLOG_MARK,
@@ -1724,16 +1770,23 @@ static apr_status_t handle_set(include_ctx_t *ctx, ap_filter_t *f,
char *tag = NULL;
char *tag_val = NULL;
- ap_ssi_get_tag_and_value(ctx, &tag, &tag_val, SSI_VALUE_DECODED);
+ ap_ssi_get_tag_and_value(ctx, &tag, &tag_val, SSI_VALUE_RAW);
if (!tag || !tag_val) {
break;
}
if (!strcmp(tag, "var")) {
+ decodehtml(tag_val);
var = ap_ssi_parse_string(ctx, tag_val, NULL, 0,
SSI_EXPAND_DROP_NAME);
}
+ else if (!strcmp(tag, "decoding")) {
+ decoding = tag_val;
+ }
+ else if (!strcmp(tag, "encoding")) {
+ encoding = tag_val;
+ }
else if (!strcmp(tag, "value")) {
char *parsed_string;
@@ -1747,6 +1800,77 @@ static apr_status_t handle_set(include_ctx_t *ctx, ap_filter_t *f,
parsed_string = ap_ssi_parse_string(ctx, tag_val, NULL, 0,
SSI_EXPAND_DROP_NAME);
+
+ if (parsed_string) {
+ char *last = NULL;
+ char *e, *d, *token;
+
+ d = apr_pstrdup(ctx->pool, decoding);
+ token = apr_strtok(d, ", \t", &last);
+
+ while(token) {
+ if (!strcasecmp(token, "none")) {
+ /* do nothing */
+ }
+ else if (!strcasecmp(token, "url")) {
+ char *buf = apr_pstrdup(ctx->pool, parsed_string);
+ ap_unescape_url(buf);
+ parsed_string = buf;
+ }
+ else if (!strcasecmp(token, "entity")) {
+ char *buf = apr_pstrdup(ctx->pool, parsed_string);
+ decodehtml(buf);
+ parsed_string = buf;
+ }
+ else if (!strcasecmp(token, "base64")) {
+ parsed_string = ap_pbase64decode(ctx->dpool, parsed_string);
+ }
+ else {
+ ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, "unknown value "
+ "\"%s\" to parameter \"decoding\" of tag set in "
+ "%s", token, r->filename);
+ SSI_CREATE_ERROR_BUCKET(ctx, f, bb);
+ error = 1;
+ break;
+ }
+ token = apr_strtok(NULL, ", \t", &last);
+ }
+
+ e = apr_pstrdup(ctx->pool, encoding);
+ token = apr_strtok(e, ", \t", &last);
+
+ while(token) {
+ if (!strcasecmp(token, "none")) {
+ /* do nothing */
+ }
+ else if (!strcasecmp(token, "url")) {
+ parsed_string = ap_escape_uri(ctx->dpool, parsed_string);
+ }
+ else if (!strcasecmp(token, "entity")) {
+ parsed_string = ap_escape_html2(ctx->dpool, parsed_string, 0);
+ }
+ else if (!strcasecmp(token, "base64")) {
+ char *buf;
+ buf = ap_pbase64encode(ctx->dpool, (char *)parsed_string);
+ parsed_string = buf;
+ }
+ else {
+ ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, "unknown value "
+ "\"%s\" to parameter \"encoding\" of tag set in "
+ "%s", token, r->filename);
+ SSI_CREATE_ERROR_BUCKET(ctx, f, bb);
+ error = 1;
+ break;
+ }
+ token = apr_strtok(NULL, ", \t", &last);
+ }
+
+ }
+
+ if (error) {
+ break;
+ }
+
apr_table_setn(r->subprocess_env, apr_pstrdup(p, var),
apr_pstrdup(p, parsed_string));
}