From cb46ff13436ea9a3c22700ea0ea7076c822e2bd9 Mon Sep 17 00:00:00 2001
From: Jim Jagielski 
Date: Mon, 29 Oct 2007 13:08:43 +0000
Subject: [PATCH] Merge r573831, r589343 from trunk:
Add option to escape backreferences in RewriteRule.
PR 34602  and  PR 39746
Patch by Guenther Gsenger
Update r573831 to avoid duplicating URL-escaping code.
Ref. http://www.mail-archive.com/dev@httpd.apache.org/msg38532.html
Submitted by: niq
Reviewed by: jim
git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/branches/2.2.x@589615 13f79535-47bb-0310-9956-ffa450edef68
---
 CHANGES                         |  5 +++-
 STATUS                          |  6 ----
 docs/manual/mod/mod_rewrite.xml | 18 ++++++++++--
 modules/mappers/mod_rewrite.c   | 49 ++++++++++++++++++++++++---------
 4 files changed, 56 insertions(+), 22 deletions(-)
diff --git a/CHANGES b/CHANGES
index 713f595e094..ed78ed5ebbc 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,6 +1,9 @@
-                                                        -*- coding: utf-8 -*-
+                                                        -*- coding: utf-8 -*-
 Changes with Apache 2.2.7
 
+  *) mod_rewrite: Add option to suppress URL unescaping
+     PR 34602 [Guenther Gsenger ]
+
   *) mpm_winnt: Eliminate wait_for_many_objects.  Allows the clean 
      shutdown of the server when the MaxClients is higher then 257,
      in a more responsive manner [Mladen Turk, William Rowe]
diff --git a/STATUS b/STATUS
index 50b4fda7894..e77f4b88aad 100644
--- a/STATUS
+++ b/STATUS
@@ -79,12 +79,6 @@ RELEASE SHOWSTOPPERS:
 PATCHES ACCEPTED TO BACKPORT FROM TRUNK:
   [ start all new proposals below, under PATCHES PROPOSED. ]
 
-    * mod_rewrite: Add option to suppress URL unescaping
-      PR 34602
-      http://svn.apache.org/viewvc?view=rev&revision=573831
-      http://svn.apache.org/viewvc?view=rev&revision=589343
-      +1: niq, rpluem, jim
-
    * HTTP protocol: Add "DefaultType none" option.
      PR 13986 and PR 16139
      http://svn.apache.org/viewvc?view=rev&revision=579991 (code)
diff --git a/docs/manual/mod/mod_rewrite.xml b/docs/manual/mod/mod_rewrite.xml
index 8e0248d87b8..51e7d2beefe 100644
--- a/docs/manual/mod/mod_rewrite.xml
+++ b/docs/manual/mod/mod_rewrite.xml
@@ -1223,6 +1223,21 @@ cannot use $N in the substitution string!
       brackets, of any of the following flags: 
 
       
+        - 'B' (escape backreferences)
+- Apache has to unescape URLs before mapping them,
+        so backreferences will be unescaped at the time they are applied.
+        Using the B flag, non-alphanumeric characters in backreferences
+        will be escaped.  For example, consider the rule: +-  RewriteRule RewriteRule ^(.*)$   index.php?show=$1 
 +- This will map - /C++to- index.php?show=C++.
+        But it will also map- /C%2b%2bto
+- index.php?show=C++, because the- %2b+        has been unescaped.  With the B flag, it will instead map to
+- index.php?show=>/C%2b%2b.
 +- This escaping is particularly necessary in a proxy situation,
+        when the backend may break if presented with an unescaped URL. +
+- 'chain|C'
         (chained with next rule)
- 
          This flag chains the current rule with the next rule
@@ -1236,8 +1251,7 @@ cannot use $Nin the substitution string!
         when you let an external redirect happen (where the
         ``.www'' part should not occur!).
-- 
-		'cookie|CO=NAME:VAL:domain[:lifetime[:path]]'
+
- 'cookie|CO=NAME:VAL:domain[:lifetime[:path]]'
         (set cookie)
- 
         This sets a cookie in the client's browser.  The cookie's name
         is specified by NAME and the value is
diff --git a/modules/mappers/mod_rewrite.c b/modules/mappers/mod_rewrite.c
index 39dc11fbb6e..609ad3354f1 100644
--- a/modules/mappers/mod_rewrite.c
+++ b/modules/mappers/mod_rewrite.c
@@ -145,6 +145,7 @@
 #define RULEFLAG_NOESCAPE           1<<11
 #define RULEFLAG_NOSUB              1<<12
 #define RULEFLAG_STATUS             1<<13
+#define RULEFLAG_ESCAPEBACKREF      1<<14
 
 /* return code of the rewrite rule
  * the result may be escaped - or not
@@ -2079,7 +2080,7 @@ static APR_INLINE char *find_char_in_curlies(char *s, int c)
  * are interpreted by a later expansion, producing results that
  * were not intended by the administrator.
  */
-static char *do_expand(char *input, rewrite_ctx *ctx)
+static char *do_expand(char *input, rewrite_ctx *ctx, rewriterule_entry *entry)
 {
     result_list *result, *current;
     result_list sresult[SMALL_EXPANSION];
@@ -2191,10 +2192,10 @@ static char *do_expand(char *input, rewrite_ctx *ctx)
                     }
 
                     /* reuse of key variable as result */
-                    key = lookup_map(ctx->r, map, do_expand(key, ctx));
+                    key = lookup_map(ctx->r, map, do_expand(key, ctx, entry));
 
                     if (!key && dflt && *dflt) {
-                        key = do_expand(dflt, ctx);
+                        key = do_expand(dflt, ctx, entry);
                     }
 
                     if (key) {
@@ -2218,9 +2219,22 @@ static char *do_expand(char *input, rewrite_ctx *ctx)
             if (bri->source && n < AP_MAX_REG_MATCH
                 && bri->regmatch[n].rm_eo > bri->regmatch[n].rm_so) {
                 span = bri->regmatch[n].rm_eo - bri->regmatch[n].rm_so;
-
-                current->len = span;
-                current->string = bri->source + bri->regmatch[n].rm_so;
+                if (entry && (entry->flags & RULEFLAG_ESCAPEBACKREF)) {
+                    /* escape the backreference */
+                    char *tmp2, *tmp;
+                    tmp = apr_pstrndup(pool, bri->source + bri->regmatch[n].rm_so, span);
+                    tmp2 = ap_escape_path_segment(pool, tmp);
+                    rewritelog((ctx->r, 5, ctx->perdir, "escaping backreference '%s' to '%s'",
+                            tmp, tmp2));
+
+                    current->len = span = strlen(tmp2);
+                    current->string = tmp2;
+                }
+                else {
+                    current->len = span;
+                    current->string = bri->source + bri->regmatch[n].rm_so;
+                }
+                
                 outlen += span;
             }
 
@@ -2280,7 +2294,7 @@ static void do_expand_env(data_item *env, rewrite_ctx *ctx)
     char *name, *val;
 
     while (env) {
-        name = do_expand(env->data, ctx);
+        name = do_expand(env->data, ctx, NULL);
         if ((val = ap_strchr(name, ':')) != NULL) {
             *val++ = '\0';
 
@@ -2369,7 +2383,7 @@ static void add_cookie(request_rec *r, char *s)
 static void do_expand_cookie(data_item *cookie, rewrite_ctx *ctx)
 {
     while (cookie) {
-        add_cookie(ctx->r, do_expand(cookie->data, ctx));
+        add_cookie(ctx->r, do_expand(cookie->data, ctx, NULL));
         cookie = cookie->next;
     }
 
@@ -3149,6 +3163,15 @@ static const char *cmd_rewriterule_setflag(apr_pool_t *p, void *_cfg,
     int error = 0;
 
     switch (*key++) {
+    case 'b':
+    case 'B':
+        if (!*key || !strcasecmp(key, "ackrefescaping")) {
+            cfg->flags |= RULEFLAG_ESCAPEBACKREF;
+        } 
+        else {
+            ++error;
+        }
+        break;
     case 'c':
     case 'C':
         if (!*key || !strcasecmp(key, "hain")) {           /* chain */
@@ -3350,7 +3373,6 @@ static const char *cmd_rewriterule_setflag(apr_pool_t *p, void *_cfg,
             ++error;
         }
         break;
-
     default:
         ++error;
         break;
@@ -3486,7 +3508,7 @@ static APR_INLINE int compare_lexicography(char *a, char *b)
  */
 static int apply_rewrite_cond(rewritecond_entry *p, rewrite_ctx *ctx)
 {
-    char *input = do_expand(p->input, ctx);
+    char *input = do_expand(p->input, ctx, NULL);
     apr_finfo_t sb;
     request_rec *rsub, *r = ctx->r;
     ap_regmatch_t regmatch[AP_MAX_REG_MATCH];
@@ -3609,7 +3631,7 @@ static APR_INLINE void force_type_handler(rewriterule_entry *p,
     char *expanded;
 
     if (p->forced_mimetype) {
-        expanded = do_expand(p->forced_mimetype, ctx);
+        expanded = do_expand(p->forced_mimetype, ctx, p);
 
         if (*expanded) {
             ap_str_tolower(expanded);
@@ -3623,7 +3645,7 @@ static APR_INLINE void force_type_handler(rewriterule_entry *p,
     }
 
     if (p->forced_handler) {
-        expanded = do_expand(p->forced_handler, ctx);
+        expanded = do_expand(p->forced_handler, ctx, p);
 
         if (*expanded) {
             ap_str_tolower(expanded);
@@ -3755,7 +3777,7 @@ static int apply_rewrite_rule(rewriterule_entry *p, rewrite_ctx *ctx)
 
     /* expand the result */
     if (!(p->flags & RULEFLAG_NOSUB)) {
-        newuri = do_expand(p->output, ctx);
+        newuri = do_expand(p->output, ctx, p);
         rewritelog((r, 2, ctx->perdir, "rewrite '%s' -> '%s'", ctx->uri,
                     newuri));
     }
@@ -3802,6 +3824,7 @@ static int apply_rewrite_rule(rewriterule_entry *p, rewrite_ctx *ctx)
      * ourself).
      */
     if (p->flags & RULEFLAG_PROXY) {
+	/* PR#39746: Escaping things here gets repeated in mod_proxy */
         fully_qualify_uri(r);
 
         rewritelog((r, 2, ctx->perdir, "forcing proxy-throughput with %s",
-- 
2.47.3