From: Joe Orton Date: Tue, 19 Mar 2024 08:35:12 +0000 (+0000) Subject: Merge r1884505, r1915625 from trunk: X-Git-Tag: 2.4.59-rc1-candidate~33 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=fb6dd556674f2b102dbd9dd95568257fe8a519f3;p=thirdparty%2Fapache%2Fhttpd.git Merge r1884505, r1915625 from trunk: The Microsoft OOXML format uses xml packaged into a zip file, and has mimetypes like: application/vnd.openxmlformats-officedocument.spreadsheetml.sheet This mimetypes contains 'xml', but is unfortunately not an xml file. xml2enc processes these files (in particular, when mod_proxy_html is used), typically resulting in them being corrupted as it seems to attempt to perform a ISO-8859-1 to UTF-8 conversion on them. * modules/filters/mod_xml2enc.c (xml2enc_ffunc): Restrict test for XML types to matching "+xml". Submitted by: Joseph Heenan , jorton PR: 64339 Reviewed by: jorton, ylavic, gbechis Github: closes #410 git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/branches/2.4.x@1916412 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/changes-entries/pr64339.txt b/changes-entries/pr64339.txt new file mode 100644 index 00000000000..9d88bc38941 --- /dev/null +++ b/changes-entries/pr64339.txt @@ -0,0 +1,4 @@ + *) mod_xml2enc: Update check to accept any text/ media type + or any XML media type per RFC 7303, avoiding + corruption of Microsoft OOXML formats. PR 64339. + [Joseph Heenan , Joe Orton] diff --git a/modules/filters/mod_xml2enc.c b/modules/filters/mod_xml2enc.c index 9e3bc314f4e..eb05c183a01 100644 --- a/modules/filters/mod_xml2enc.c +++ b/modules/filters/mod_xml2enc.c @@ -323,7 +323,7 @@ static apr_status_t xml2enc_ffunc(ap_filter_t* f, apr_bucket_brigade* bb) apr_bucket* bstart; apr_size_t insz = 0; int pending_meta = 0; - char *ctype; + char *mtype; char *p; if (!ctx || !f->r->content_type) { @@ -332,13 +332,17 @@ static apr_status_t xml2enc_ffunc(ap_filter_t* f, apr_bucket_brigade* bb) return ap_pass_brigade(f->next, bb) ; } - ctype = apr_pstrdup(f->r->pool, f->r->content_type); - for (p = ctype; *p; ++p) - if (isupper(*p)) - *p = tolower(*p); - - /* only act if starts-with "text/" or contains "xml" */ - if (strncmp(ctype, "text/", 5) && !strstr(ctype, "xml")) { + /* Extract the media type, ignoring parameters in content-type. */ + mtype = apr_pstrdup(f->r->pool, f->r->content_type); + if ((p = ap_strchr(mtype, ';')) != NULL) *p = '\0'; + ap_str_tolower(mtype); + + /* Accept text/ types, plus any XML media type per RFC 7303. */ + if (!(strncmp(mtype, "text/", 5) == 0 + || strcmp(mtype, "application/xml") == 0 + || (strlen(mtype) > 7 /* minimum 'a/b+xml' length */ + && (p = strstr(mtype, "+xml")) != NULL + && strlen(p) == 4 /* ensures +xml is a suffix */))) { ap_remove_output_filter(f); return ap_pass_brigade(f->next, bb) ; }