Backport mod_substitute from trunk:

author Ruediger Pluem <rpluem@apache.org>

Sun, 9 Dec 2007 15:19:40 +0000 (15:19 +0000)

committer Ruediger Pluem <rpluem@apache.org>

Sun, 9 Dec 2007 15:19:40 +0000 (15:19 +0000)
author Ruediger Pluem <rpluem@apache.org>
Sun, 9 Dec 2007 15:19:40 +0000 (15:19 +0000)
committer Ruediger Pluem <rpluem@apache.org>
Sun, 9 Dec 2007 15:19:40 +0000 (15:19 +0000)
diff --git a/CHANGES b/CHANGES

index d6c85713e96b2b9217e929ab43558e3045a23c43..ef4f8cee29eec7ffddf5749f26bbfb42cd3309f4 100644 (file)
--- a/CHANGES
+++ b/CHANGES
@@ -34,6 +34,10 @@ Changes with Apache 2.2.7
       configurable in case something breaks on it.
       PR 16518 [Nick Kew]
  
+  *) mod_substitute: Added a new experimental output filter, which
+     performs inline response content pattern matching (including
+     regex) and substitution.  [Jim Jagielski, Ruediger Pluem]
+
    *) rotatelogs: Change command-line parsing to report more types
       of errors.  Allow local timestamps to be used when rotating based
       on file size.  [Jeff Trawick]
diff --git a/STATUS b/STATUS

index ad49828094490322b1a4a6495b69fa0bf778e17d..d90395ff44045f3b3709e823168c96a17c8ee7bd 100644 (file)
--- a/STATUS
+++ b/STATUS
@@ -79,16 +79,6 @@ RELEASE SHOWSTOPPERS:
  PATCHES ACCEPTED TO BACKPORT FROM TRUNK:
    [ start all new proposals below, under PATCHES PROPOSED. ]
  
-  * mod_substitute: New module for on-the-fly response rewrite-like
-    capability.
-    trunk:
-       http://svn.apache.org/viewvc/httpd/httpd/trunk/modules/experimental/mod_substitute.c?view=log
-       Don't even bother...
-    2.2.x:
-       http://people.apache.org/~jim/patches/mod_substitute-2.2rev2.txt
-       (NWGNUsubstitute need also be copied over from trunk, is missing in your patch)
-    +1: jim, rpluem, fuankg
-
  PATCHES PROPOSED TO BACKPORT FROM TRUNK:
    [ New proposals should be added at the end of the list ]
  
diff --git a/docs/manual/mod/mod_substitute.xml b/docs/manual/mod/mod_substitute.xml

new file mode 100644 (file)

index 0000000..0bad52b
--- /dev/null
+++ b/docs/manual/mod/mod_substitute.xml
@@ -0,0 +1,90 @@
+<?xml version="1.0"?>
+<!DOCTYPE modulesynopsis SYSTEM "../style/modulesynopsis.dtd">
+<?xml-stylesheet type="text/xsl" href="../style/manual.en.xsl"?>
+<!-- $LastChangedRevision: 587150 $ -->
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<modulesynopsis metafile="mod_substitute.xml.meta">
+
+<name>mod_substitute</name>
+<description>Perform search and replace operations on response bodies</description>
+<status>Extension</status>
+<sourcefile>mod_substitute.c</sourcefile>
+<identifier>substitute_module</identifier>
+
+<summary>
+    <p><module>mod_substitute</module> provides a mechanism to perform
+    both regular expression and fixed string substitutions on
+    response bodies.</p>
+    <p>This is an <strong>experimental</strong> module and should
+    be used with care.</p>
+</summary>
+
+<directivesynopsis>
+<name>Substitute</name>
+<description>Pattern to filter the response content</description>
+<syntax>Substitute <var>s/pattern/substitution/[inf]</var></syntax>
+<contextlist><context>directory</context>
+<context>.htaccess</context></contextlist>
+<override>FileInfo</override>
+
+<usage>
+    <p>The <directive>Substitute</directive> directive specifies a
+    search and replace pattern to apply to the response body.</p>
+    
+    <p>The meaning of the pattern can be modified by using any
+    combination of these flags:</p>
+    
+    <dl>
+        <dt><code>i</code></dt>
+        <dd>Perform a case-insensitive match.</dd>
+        <dt><code>n</code></dt>
+        <dd>By default the pattern is treated as a regular expression.
+        Using the <code>n</code> flag forces the pattern to be treated
+        as a fixed string.</dd>
+        <dt><code>f</code></dt>
+        <dd>The <code>f</code> flag causes mod_substitute to flatten the
+        result of a substitution allowing for later substitutions to
+        take place on the boundary of this one.</dd>
+    </dl>
+    
+    <example><title>Example</title>
+        &lt;Location /&gt;
+        <indent>
+            AddOutputFilterByType SUBSTITUTE text/html<br />
+            Substitute s/foo/bar/ni<br />
+        </indent>
+        &lt;/Location&gt;
+    </example>
+    
+    <p>If either the pattern or the substitution contain a slash
+    character then an alternative delimiter should be used:</p>
+    
+    <example><title>Example of using an alternate delimiter</title>
+        &lt;Location /&gt;
+        <indent>
+            AddOutputFilterByType SUBSTITUTE text/html<br />
+            Substitute "s|&lt;BR */?&gt;|&lt;br /&gt;|i"
+        </indent>
+        &lt;/Location&gt;
+    </example>
+</usage>
+</directivesynopsis>
+
+</modulesynopsis>
diff --git a/docs/manual/mod/mod_substitute.xml.meta b/docs/manual/mod/mod_substitute.xml.meta

new file mode 100644 (file)

index 0000000..8fab2f3
--- /dev/null
+++ b/docs/manual/mod/mod_substitute.xml.meta
@@ -0,0 +1,11 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+
+<metafile>
+  <basename>mod_substitute</basename>
+  <path>/mod/</path>
+  <relpath>..</relpath>
+
+  <variants>
+    <variant>en</variant>
+  </variants>
+</metafile>
diff --git a/modules/filters/NWGNUmakefile b/modules/filters/NWGNUmakefile

index 4433689ee568f94ce38729b9e371afb0458fae02..04762d6054f9ddd66cf5aae406bd1443eff839b8 100644 (file)
--- a/modules/filters/NWGNUmakefile
+++ b/modules/filters/NWGNUmakefile
@@ -155,6 +155,7 @@ TARGET_nlm = \
         $(OBJDIR)/extfiltr.nlm \
         $(OBJDIR)/charsetl.nlm \
         $(OBJDIR)/mod_filter.nlm \
+       $(OBJDIR)/substitute.nlm \
         $(EOLIST)
  
  # If the zlib libraries source exists then build the mod_deflate module
diff --git a/modules/filters/NWGNUsubstitute b/modules/filters/NWGNUsubstitute

new file mode 100644 (file)

index 0000000..ca19eb1
--- /dev/null
+++ b/modules/filters/NWGNUsubstitute
@@ -0,0 +1,258 @@
+#
+# Declare the sub-directories to be built here
+#
+
+SUBDIRS = \
+       $(EOLIST)
+
+#
+# Get the 'head' of the build environment.  This includes default targets and
+# paths to tools
+#
+
+include $(AP_WORK)\build\NWGNUhead.inc
+
+#
+# build this level's files
+
+#
+# Make sure all needed macro's are defined
+#
+
+#
+# These directories will be at the beginning of the include list, followed by
+# INCDIRS
+#
+XINCDIRS       += \
+                       $(AP_WORK)/include \
+                       $(NWOS) \
+                       $(AP_WORK)/modules/arch/netware \
+                       $(APR)/include \
+                       $(APRUTIL)/include \
+                       $(APR) \
+                       $(EOLIST)
+
+#
+# These flags will come after CFLAGS
+#
+XCFLAGS                += \
+                       $(EOLIST)
+
+#
+# These defines will come after DEFINES
+#
+XDEFINES       += \
+                       $(EOLIST)
+
+#
+# These flags will be added to the link.opt file
+#
+XLFLAGS                += \
+                       $(EOLIST)
+
+#
+# These values will be appended to the correct variables based on the value of
+# RELEASE
+#
+ifeq "$(RELEASE)" "debug"
+XINCDIRS       += \
+                       $(EOLIST)
+
+XCFLAGS                += \
+                       $(EOLIST)
+
+XDEFINES       += \
+                       $(EOLIST)
+
+XLFLAGS                += \
+                       $(EOLIST)
+endif
+
+ifeq "$(RELEASE)" "noopt"
+XINCDIRS       += \
+                       $(EOLIST)
+
+XCFLAGS                += \
+                       $(EOLIST)
+
+XDEFINES       += \
+                       $(EOLIST)
+
+XLFLAGS                += \
+                       $(EOLIST)
+endif
+
+ifeq "$(RELEASE)" "release"
+XINCDIRS       += \
+                       $(EOLIST)
+
+XCFLAGS                += \
+                       $(EOLIST)
+
+XDEFINES       += \
+                       $(EOLIST)
+
+XLFLAGS                += \
+                       $(EOLIST)
+endif
+
+#
+# These are used by the link target if an NLM is being generated
+# This is used by the link 'name' directive to name the nlm.  If left blank
+# TARGET_nlm (see below) will be used.
+#
+NLM_NAME       = substitute
+
+#
+# This is used by the link '-desc ' directive.
+# If left blank, NLM_NAME will be used.
+#
+NLM_DESCRIPTION        = Apache $(VERSION_STR) Substitute Module
+
+#
+# This is used by the '-threadname' directive.  If left blank,
+# NLM_NAME Thread will be used.
+#
+NLM_THREAD_NAME        = Substitute Module
+
+#
+# If this is specified, it will override VERSION value in
+# $(AP_WORK)\build\NWGNUenvironment.inc
+#
+NLM_VERSION    =
+
+#
+# If this is specified, it will override the default of 64K
+#
+NLM_STACK_SIZE = 8192
+
+
+#
+# If this is specified it will be used by the link '-entry' directive
+#
+NLM_ENTRY_SYM  = _LibCPrelude
+
+#
+# If this is specified it will be used by the link '-exit' directive
+#
+NLM_EXIT_SYM   = _LibCPostlude
+
+#
+# If this is specified it will be used by the link '-check' directive
+#
+NLM_CHECK_SYM  =
+
+#
+# If these are specified it will be used by the link '-flags' directive
+#
+NLM_FLAGS      = AUTOUNLOAD, PSEUDOPREEMPTION
+
+#
+# If this is specified it will be linked in with the XDCData option in the def
+# file instead of the default of $(NWOS)/apache.xdc.  XDCData can be disabled
+# by setting APACHE_UNIPROC in the environment
+#
+XDCDATA                =
+
+#
+# If there is an NLM target, put it here
+#
+TARGET_nlm = \
+       $(OBJDIR)/substitute.nlm \
+       $(EOLIST)
+
+#
+# If there is an LIB target, put it here
+#
+TARGET_lib = \
+       $(EOLIST)
+
+#
+# These are the OBJ files needed to create the NLM target above.
+# Paths must all use the '/' character
+#
+FILES_nlm_objs = \
+       $(OBJDIR)/mod_substitute.o \
+       $(EOLIST)
+
+#
+# These are the LIB files needed to create the NLM target above.
+# These will be added as a library command in the link.opt file.
+#
+FILES_nlm_libs = \
+       libcpre.o \
+       $(EOLIST)
+
+#
+# These are the modules that the above NLM target depends on to load.
+# These will be added as a module command in the link.opt file.
+#
+FILES_nlm_modules = \
+       aprlib \
+       libc \
+       $(EOLIST)
+
+#
+# If the nlm has a msg file, put it's path here
+#
+FILE_nlm_msg =
+
+#
+# If the nlm has a hlp file put it's path here
+#
+FILE_nlm_hlp =
+
+#
+# If this is specified, it will override $(NWOS)\copyright.txt.
+#
+FILE_nlm_copyright =
+
+#
+# Any additional imports go here
+#
+FILES_nlm_Ximports = \
+       @$(APR)/aprlib.imp \
+       @$(NWOS)/httpd.imp \
+       @libc.imp \
+       $(EOLIST)
+
+#
+# Any symbols exported to here
+#
+FILES_nlm_exports = \
+       substitute_module \
+       $(EOLIST)
+
+#
+# These are the OBJ files needed to create the LIB target above.
+# Paths must all use the '/' character
+#
+FILES_lib_objs = \
+       $(EOLIST)
+
+#
+# implement targets and dependancies (leave this section alone)
+#
+
+libs :: $(OBJDIR) $(TARGET_lib)
+
+nlms :: libs $(TARGET_nlm)
+
+#
+# Updated this target to create necessary directories and copy files to the
+# correct place.  (See $(AP_WORK)\build\NWGNUhead.inc for examples)
+#
+install :: nlms FORCE
+
+#
+# Any specialized rules here
+#
+
+#
+# Include the 'tail' makefile that has targets that depend on variables defined
+# in this makefile
+#
+
+include $(AP_WORK)\build\NWGNUtail.inc
+
+
diff --git a/modules/filters/config.m4 b/modules/filters/config.m4

index c9c8f386c9eb4a3b020fbbfb23feab485a6a9019..9c1f608350ad301c0ff0f9eeb0ec8560d7d82788 100644 (file)
--- a/modules/filters/config.m4
+++ b/modules/filters/config.m4
@@ -7,6 +7,7 @@ APACHE_MODPATH_INIT(filters)
  APACHE_MODULE(ext_filter, external filter module, , , most)
  APACHE_MODULE(include, Server Side Includes, , , yes)
  APACHE_MODULE(filter, Smart Filtering, , , yes)
+APACHE_MODULE(substitute, response content rewrite-like filtering, , , most)
  
  if test "$ac_cv_ebcdic" = "yes"; then
  # mod_charset_lite can be very useful on an ebcdic system,
diff --git a/modules/filters/mod_substitute.c b/modules/filters/mod_substitute.c

new file mode 100644 (file)

index 0000000..592d140
--- /dev/null
+++ b/modules/filters/mod_substitute.c
@@ -0,0 +1,584 @@
+/* Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * mod_substitute.c: Perform content rewriting on the fly
+ */
+
+#include "httpd.h"
+#include "http_config.h"
+#include "http_core.h"
+#include "apr_general.h"
+#include "apr_strings.h"
+#include "apr_strmatch.h"
+#include "apr_lib.h"
+#include "util_filter.h"
+#include "apr_buckets.h"
+#include "http_request.h"
+#define APR_WANT_STRFUNC
+#include "apr_want.h"
+
+static const char substitute_filter_name[] = "SUBSTITUTE";
+
+module AP_MODULE_DECLARE_DATA substitute_module;
+
+typedef struct subst_pattern_t {
+    const apr_strmatch_pattern *pattern;
+    const ap_regex_t *regexp;
+    const char *replacement;
+    apr_size_t replen;
+    apr_size_t patlen;
+    int flatten;
+} subst_pattern_t;
+
+typedef struct {
+    apr_array_header_t *patterns;
+} subst_dir_conf;
+
+typedef struct {
+    apr_bucket_brigade *linebb;
+    apr_bucket_brigade *linesbb;
+    apr_bucket_brigade *passbb;
+    apr_bucket_brigade *pattbb;
+    apr_pool_t *tpool;
+} substitute_module_ctx;
+
+static void *create_substitute_dcfg(apr_pool_t *p, char *d)
+{
+    subst_dir_conf *dcfg =
+    (subst_dir_conf *) apr_pcalloc(p, sizeof(subst_dir_conf));
+
+    dcfg->patterns = apr_array_make(p, 10, sizeof(subst_pattern_t));
+    return dcfg;
+}
+
+static void *merge_substitute_dcfg(apr_pool_t *p, void *basev, void *overv)
+{
+    subst_dir_conf *a =
+    (subst_dir_conf *) apr_pcalloc(p, sizeof(subst_dir_conf));
+    subst_dir_conf *base = (subst_dir_conf *) basev;
+    subst_dir_conf *over = (subst_dir_conf *) overv;
+
+    a->patterns = apr_array_append(p, over->patterns,
+                                                  base->patterns);
+    return a;
+}
+
+#define AP_MAX_BUCKETS 1000
+
+#define SEDSCAT(s1, s2, pool, buff, blen, repl) do { \
+    if (!s1) {                                       \
+        s1 = apr_pstrmemdup(pool, buff, blen);       \
+    }                                                \
+    else {                                           \
+        s2 = apr_pstrmemdup(pool, buff, blen);       \
+        s1 = apr_pstrcat(pool, s1, s2, NULL);        \
+    }                                                \
+    s1 = apr_pstrcat(pool, s1, repl, NULL);          \
+} while (0)
+
+#define SEDRMPATBCKT(b, offset, tmp_b, patlen) do {  \
+    apr_bucket_split(b, offset);                     \
+    tmp_b = APR_BUCKET_NEXT(b);                      \
+    apr_bucket_split(tmp_b, patlen);                 \
+    b = APR_BUCKET_NEXT(tmp_b);                      \
+    apr_bucket_delete(tmp_b);                        \
+} while (0)
+
+static void do_pattmatch(ap_filter_t *f, apr_bucket *inb,
+                         apr_bucket_brigade *mybb,
+                         apr_pool_t *tmp_pool)
+{
+    int i;
+    ap_regmatch_t regm[AP_MAX_REG_MATCH];
+    apr_size_t bytes;
+    apr_size_t len;
+    apr_size_t fbytes;
+    const char *buff;
+    const char *repl;
+    char *scratch;
+    char *p;
+    char *s1;
+    char *s2;
+    apr_bucket *b;
+    apr_bucket *tmp_b;
+    apr_pool_t *tpool;
+
+    subst_dir_conf *cfg =
+    (subst_dir_conf *) ap_get_module_config(f->r->per_dir_config,
+                                             &substitute_module);
+    subst_pattern_t *script;
+
+    APR_BRIGADE_INSERT_TAIL(mybb, inb);
+    
+    script = (subst_pattern_t *) cfg->patterns->elts;
+    apr_pool_create(&tpool, tmp_pool);
+    scratch = NULL;
+    fbytes = 0;
+    for (i = 0; i < cfg->patterns->nelts; i++) {
+        for (b = APR_BRIGADE_FIRST(mybb);
+             b != APR_BRIGADE_SENTINEL(mybb);
+             b = APR_BUCKET_NEXT(b)) {
+            if (APR_BUCKET_IS_METADATA(b)) {
+                /*
+                 * we should NEVER see this, because we should never
+                 * be passed any, but "handle" it just in case.
+                 */
+                continue;
+            }
+            if (apr_bucket_read(b, &buff, &bytes, APR_BLOCK_READ)
+                    == APR_SUCCESS) {
+                s1 = NULL;
+                if (script->pattern) {
+                    while ((repl = apr_strmatch(script->pattern, buff, bytes)))
+                    {
+                        /* get offset into buff for pattern */
+                        len = (apr_size_t) (repl - buff);
+                        if (script->flatten) {
+                            /*
+                             * We are flattening the buckets here, meaning
+                             * that we don't do the fast bucket splits.
+                             * Instead we copy over what the buckets would
+                             * contain and use them. This is slow, since we
+                             * are constanting allocing space and copying
+                             * strings.
+                             */
+                            SEDSCAT(s1, s2, tmp_pool, buff, len,
+                                    script->replacement);
+                        }
+                        else {
+                            /*
+                             * We now split off the stuff before the regex
+                             * as its own bucket, then isolate the pattern
+                             * and delete it.
+                             */
+                            SEDRMPATBCKT(b, len, tmp_b, script->patlen);
+                            /*
+                             * Finally, we create a bucket that contains the
+                             * replacement...
+                             */
+                            tmp_b = apr_bucket_transient_create(script->replacement,
+                                      script->replen,
+                                      f->r->connection->bucket_alloc);
+                            /* ... and insert it */
+                            APR_BUCKET_INSERT_BEFORE(b, tmp_b);
+                        }
+                        /* now we need to adjust buff for all these changes */
+                        len += script->patlen;
+                        bytes -= len;
+                        buff += len;
+                    }
+                    if (script->flatten && s1) {
+                        /*
+                         * we've finished looking at the bucket, so remove the
+                         * old one and add in our new one
+                         */
+                        s2 = apr_pstrmemdup(tmp_pool, buff, bytes);
+                        s1 = apr_pstrcat(tmp_pool, s1, s2, NULL);
+                        tmp_b = apr_bucket_transient_create(s1, strlen(s1),
+                                            f->r->connection->bucket_alloc);
+                        APR_BUCKET_INSERT_BEFORE(b, tmp_b);
+                        tmp_b = APR_BUCKET_NEXT(b);
+                        apr_bucket_delete(b);
+                        b = tmp_b;
+                    }
+
+                }
+                else if (script->regexp) {
+                    /*
+                     * we need a null terminated string here :(. To hopefully
+                     * save time and memory, we don't alloc for each run
+                     * through, but only if we need to have a larger chunk
+                     * to save the string to. So we keep track of how much
+                     * we've allocated and only re-alloc when we need it.
+                     * NOTE: this screams for a macro.
+                     */
+                    if (!scratch || (bytes > (fbytes + 1))) {
+                        fbytes = bytes + 1;
+                        scratch = apr_palloc(tpool, fbytes);
+                    }
+                    /* reset pointer to the scratch space */
+                    p = scratch;
+                    memcpy(p, buff, bytes);
+                    p[bytes] = '\0';
+                    while (!ap_regexec(script->regexp, p,
+                                       AP_MAX_REG_MATCH, regm, 0)) {
+                        /* first, grab the replacement string */
+                        repl = ap_pregsub(tmp_pool, script->replacement, p,
+                                          AP_MAX_REG_MATCH, regm);
+                        if (script->flatten) {
+                            SEDSCAT(s1, s2, tmp_pool, p, regm[0].rm_so, repl);
+                        }
+                        else {
+                            len = (apr_size_t) (regm[0].rm_eo - regm[0].rm_so);
+                            SEDRMPATBCKT(b, regm[0].rm_so, tmp_b, len);
+                            tmp_b = apr_bucket_transient_create(repl,
+                                                                strlen(repl),
+                                             f->r->connection->bucket_alloc);
+                            APR_BUCKET_INSERT_BEFORE(b, tmp_b);
+                        }
+                        /*
+                         * reset to past what we just did. buff now maps to b
+                         * again
+                         */
+                        p += regm[0].rm_eo;
+                    }
+                    if (script->flatten && s1) {
+                        s1 = apr_pstrcat(tmp_pool, s1, p, NULL);
+                        tmp_b = apr_bucket_transient_create(s1, strlen(s1),
+                                            f->r->connection->bucket_alloc);
+                        APR_BUCKET_INSERT_BEFORE(b, tmp_b);
+                        tmp_b = APR_BUCKET_NEXT(b);
+                        apr_bucket_delete(b);
+                        b = tmp_b;
+                    }
+
+                }
+                else {
+                    /* huh? */
+                    continue;
+                }
+            }
+        }
+        script++;
+    }
+
+    apr_pool_destroy(tpool);
+
+    return;
+}
+
+static apr_status_t substitute_filter(ap_filter_t *f, apr_bucket_brigade *bb)
+{
+    apr_size_t bytes;
+    apr_size_t len;
+    apr_size_t fbytes;
+    const char *buff;
+    const char *nl = NULL;
+    char *bflat;
+    apr_bucket *b;
+    apr_bucket *tmp_b;
+    apr_bucket_brigade *tmp_bb = NULL;
+    apr_status_t rv;
+
+    substitute_module_ctx *ctx = f->ctx;
+    
+    /*
+     * First time around? Create the saved bb that we used for each pass
+     * through. Note that we can also get here when we explicitly clear ctx,
+     * for error handling
+     */
+    if (!ctx) {
+        f->ctx = ctx = apr_pcalloc(f->r->pool, sizeof(*ctx));
+        /*
+         * Create all the temporary brigades we need and reuse them to avoid
+         * creating them over and over again from r->pool which would cost a
+         * lot of memory in some cases.
+         */
+        ctx->linebb = apr_brigade_create(f->r->pool, f->c->bucket_alloc);
+        ctx->linesbb = apr_brigade_create(f->r->pool, f->c->bucket_alloc);
+        ctx->pattbb = apr_brigade_create(f->r->pool, f->c->bucket_alloc);
+        /*
+         * Everything to be passed to the next filter goes in
+         * here, our pass brigade.
+         */
+        ctx->passbb = apr_brigade_create(f->r->pool, f->c->bucket_alloc);
+        /* Create our temporary pool only once */
+        apr_pool_create(&(ctx->tpool), f->r->pool);
+        apr_table_unset(f->r->headers_out, "Content-Length");
+    }
+
+    /*
+     * Shortcircuit processing
+     */
+    if (APR_BRIGADE_EMPTY(bb))
+        return APR_SUCCESS;
+
+    /*
+     * Here's the concept:
+     *  Read in the data and look for newlines. Once we
+     *  find a full "line", add it to our working brigade.
+     *  If we've finished reading the brigade and we have
+     *  any left over data (not a "full" line), store that
+     *  for the next pass.
+     *
+     * Note: anything stored in ctx->linebb for sure does not have
+     * a newline char, so we don't concat that bb with the
+     * new bb, since we would spending time searching for the newline
+     * in data we know it doesn't exist. So instead, we simply scan
+     * our current bb and, if we see a newline, prepend ctx->linebb
+     * to the front of it. This makes the code much less straight-
+     * forward (otherwise we could APR_BRIGADE_CONCAT(ctx->linebb, bb)
+     * and just scan for newlines and not bother with needing to know
+     * when ctx->linebb needs to be reset) but also faster. We'll take
+     * the speed.
+     *
+     * Note: apr_brigade_split_line would be nice here, but we
+     * really can't use it since we need more control and we want
+     * to re-use already read bucket data.
+     *
+     * See mod_include if still confused :)
+     */
+
+    while ((b = APR_BRIGADE_FIRST(bb)) && (b != APR_BRIGADE_SENTINEL(bb))) {
+        if (APR_BUCKET_IS_EOS(b)) {
+            /*
+             * if we see the EOS, then we need to pass along everything we
+             * have. But if the ctx->linebb isn't empty, then we need to add
+             * that to the end of what we'll be passing.
+             */
+            if (!APR_BRIGADE_EMPTY(ctx->linebb)) {
+                rv = apr_brigade_pflatten(ctx->linebb, &bflat,
+                                          &fbytes, ctx->tpool);
+                tmp_b = apr_bucket_transient_create(bflat, fbytes,
+                                                f->r->connection->bucket_alloc);
+                do_pattmatch(f, tmp_b, ctx->pattbb, ctx->tpool);
+                APR_BRIGADE_CONCAT(ctx->passbb, ctx->pattbb);
+            }
+            apr_brigade_cleanup(ctx->linebb);
+            APR_BUCKET_REMOVE(b);
+            APR_BRIGADE_INSERT_TAIL(ctx->passbb, b);
+        }
+        /*
+         * No need to handle FLUSH buckets separately as we call
+         * ap_pass_brigade anyway at the end of the loop.
+         */
+        else if (APR_BUCKET_IS_METADATA(b)) {
+            APR_BUCKET_REMOVE(b);
+            APR_BRIGADE_INSERT_TAIL(ctx->passbb, b);
+        }
+        else {
+            /*
+             * We have actual "data" so read in as much as we can and start
+             * scanning and splitting from our read buffer
+             */
+            rv = apr_bucket_read(b, &buff, &bytes, APR_BLOCK_READ);
+            if (rv != APR_SUCCESS || bytes == 0) {
+                APR_BUCKET_REMOVE(b);
+            }
+            else {
+                int num = 0;
+                while (bytes > 0) {
+                    nl = memchr(buff, APR_ASCII_LF, bytes);
+                    if (nl) {
+                        len = (apr_size_t) (nl - buff) + 1;
+                        /* split *after* the newline */
+                        apr_bucket_split(b, len);
+                        /*
+                         * We've likely read more data, so bypass rereading
+                         * bucket data and continue scanning through this
+                         * buffer
+                         */
+                        bytes -= len;
+                        buff += len;
+                        /*
+                         * we need b to be updated for future potential
+                         * splitting
+                         */
+                        tmp_b = APR_BUCKET_NEXT(b);
+                        APR_BUCKET_REMOVE(b);
+                        /*
+                         * Hey, we found a newline! Don't forget the old
+                         * stuff that needs to be added to the front. So we
+                         * add the split bucket to the end, flatten the whole
+                         * bb, morph the whole shebang into a bucket which is
+                         * then added to the tail of the newline bb.
+                         */
+                        if (!APR_BRIGADE_EMPTY(ctx->linebb)) {
+                            APR_BRIGADE_INSERT_TAIL(ctx->linebb, b);
+                            rv = apr_brigade_pflatten(ctx->linebb, &bflat,
+                                                      &fbytes, ctx->tpool);
+                            b = apr_bucket_transient_create(bflat, fbytes,
+                                            f->r->connection->bucket_alloc);
+                            apr_brigade_cleanup(ctx->linebb);
+                        }
+                        do_pattmatch(f, b, ctx->pattbb, ctx->tpool);
+                        /*
+                         * Count how many buckets we have in ctx->passbb
+                         * so far. Yes, this is correct we count ctx->passbb
+                         * and not ctx->pattbb as we do not reset num on every
+                         * iteration.
+                         */
+                        for (b = APR_BRIGADE_FIRST(ctx->pattbb);
+                             b != APR_BRIGADE_SENTINEL(ctx->pattbb);
+                             b = APR_BUCKET_NEXT(b)) {
+                            num++;
+                        }
+                        APR_BRIGADE_CONCAT(ctx->passbb, ctx->pattbb);
+                        /*
+                         * If the number of buckets in ctx->passbb reaches an
+                         * "insane" level, we consume much memory for all the
+                         * buckets as such. So lets flush them down the chain
+                         * in this case and thus clear ctx->passbb. This frees
+                         * the buckets memory for further processing.
+                         * Usually this condition should not become true, but
+                         * it is a safety measure for edge cases.
+                         */
+                        if (num > AP_MAX_BUCKETS) {
+                            b = apr_bucket_flush_create(
+                                                f->r->connection->bucket_alloc);
+                            APR_BRIGADE_INSERT_TAIL(ctx->passbb, b);
+                            rv = ap_pass_brigade(f->next, ctx->passbb);
+                            apr_brigade_cleanup(ctx->passbb);
+                            num = 0;
+                            apr_pool_clear(ctx->tpool);
+                            if (rv != APR_SUCCESS)
+                                return rv;
+                        }
+                        b = tmp_b;
+                    }
+                    else {
+                        /*
+                         * no newline in whatever is left of this buffer so
+                         * tuck data away and get next bucket
+                         */
+                        APR_BUCKET_REMOVE(b);
+                        APR_BRIGADE_INSERT_TAIL(ctx->linebb, b);
+                        bytes = 0;
+                    }
+                }
+            }
+        }
+        if (!APR_BRIGADE_EMPTY(ctx->passbb)) {
+            rv = ap_pass_brigade(f->next, ctx->passbb);
+            apr_brigade_cleanup(ctx->passbb);
+            if (rv != APR_SUCCESS) {
+                apr_pool_clear(ctx->tpool);
+                return rv;
+            }
+        }
+        apr_pool_clear(ctx->tpool);
+    }
+
+    /* Anything left we want to save/setaside for the next go-around */
+    if (!APR_BRIGADE_EMPTY(ctx->linebb)) {
+        /*
+         * Provide ap_save_brigade with an existing empty brigade
+         * (ctx->linesbb) to avoid creating a new one.
+         */
+        ap_save_brigade(f, &(ctx->linesbb), &(ctx->linebb), f->r->pool);
+        tmp_bb = ctx->linebb;
+        ctx->linebb = ctx->linesbb;
+        ctx->linesbb = tmp_bb;
+    }
+
+    return APR_SUCCESS;
+}
+
+static const char *set_pattern(cmd_parms *cmd, void *cfg, const char *line)
+{
+    char *from = NULL;
+    char *to = NULL;
+    char *flags = NULL;
+    char *ourline;
+    char delim;
+    subst_pattern_t *nscript;
+    int is_pattern = 0;
+    int ignore_case = 0;
+    int flatten = 0;
+    ap_regex_t *r = NULL;
+
+    if (apr_tolower(*line) != 's') {
+        return "Bad Substitute format, must be an s/// pattern";
+    }
+    ourline = apr_pstrdup(cmd->pool, line);
+    delim = *++ourline;
+    if (delim)
+        from = ++ourline;
+    if (from) {
+        while (*++ourline && *ourline != delim);
+        if (*ourline) {
+            *ourline = '\0';
+            to = ++ourline;
+        }
+    }
+    if (to) {
+        while (*++ourline && *ourline != delim);
+        if (*ourline) {
+            *ourline = '\0';
+            flags = ++ourline;
+        }
+    }
+
+    if (!delim || !from || !to) {
+        return "Bad Substitute format, must be a complete s/// pattern";
+    }
+
+    while (*flags) {
+        delim = apr_tolower(*flags);    /* re-use */
+        if (delim == 'i')
+            ignore_case = 1;
+        else if (delim == 'n')
+            is_pattern = 1;
+        else if (delim == 'f')
+            flatten = 1;
+        else
+            return "Bad Substitute flag, only s///[inf] are supported";
+        flags++;
+    }
+
+    /* first see if we can compile the regex */
+    if (!is_pattern) {
+        r = ap_pregcomp(cmd->pool, from, AP_REG_EXTENDED |
+                        (ignore_case ? AP_REG_ICASE : 0));
+        if (!r)
+            return "Substitute could not compile regex";
+    }
+    nscript = apr_array_push(((subst_dir_conf *) cfg)->patterns);
+    /* init the new entries */
+    nscript->pattern = NULL;
+    nscript->regexp = NULL;
+    nscript->replacement = NULL;
+    nscript->patlen = 0;
+
+    if (is_pattern) {
+        nscript->patlen = strlen(from);
+        nscript->pattern = apr_strmatch_precompile(cmd->pool, from, 
+                                                   !ignore_case);
+    }
+    else {
+        nscript->regexp = r;
+    }
+
+    nscript->replacement = to;
+    nscript->replen = strlen(to);
+    nscript->flatten = flatten;
+
+    return NULL;
+}
+
+#define PROTO_FLAGS AP_FILTER_PROTO_CHANGE|AP_FILTER_PROTO_CHANGE_LENGTH
+static void register_hooks(apr_pool_t *pool)
+{
+    ap_register_output_filter(substitute_filter_name, substitute_filter,
+                              NULL, AP_FTYPE_RESOURCE);
+}
+
+static const command_rec substitute_cmds[] = {
+    AP_INIT_TAKE1("Substitute", set_pattern, NULL, OR_ALL,
+                  "Pattern to filter the response content (s/foo/bar/[inf])"),
+    {NULL}
+};
+
+module AP_MODULE_DECLARE_DATA substitute_module = {
+    STANDARD20_MODULE_STUFF,
+    create_substitute_dcfg,     /* dir config creater */
+    merge_substitute_dcfg,      /* dir merger --- default is to override */
+    NULL,                       /* server config */
+    NULL,                       /* merge server config */
+    substitute_cmds,            /* command table */
+    register_hooks              /* register hooks */
+};
author	Ruediger Pluem <rpluem@apache.org>
	Sun, 9 Dec 2007 15:19:40 +0000 (15:19 +0000)
committer	Ruediger Pluem <rpluem@apache.org>
	Sun, 9 Dec 2007 15:19:40 +0000 (15:19 +0000)
CHANGES		patch \| blob \| blame \| history
STATUS		patch \| blob \| blame \| history
docs/manual/mod/mod_substitute.xml	[new file with mode: 0644]	patch \| blob
docs/manual/mod/mod_substitute.xml.meta	[new file with mode: 0644]	patch \| blob
modules/filters/NWGNUmakefile		patch \| blob \| blame \| history
modules/filters/NWGNUsubstitute	[new file with mode: 0644]	patch \| blob
modules/filters/config.m4		patch \| blob \| blame \| history
modules/filters/mod_substitute.c	[new file with mode: 0644]	patch \| blob