From: Jim Jagielski Date: Tue, 2 Jun 2026 20:29:00 +0000 (+0000) Subject: support: add Python ports of the Perl support scripts X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=d626b31a2cef21a070b6b53bf591e9e40344aa3f;p=thirdparty%2Fapache%2Fhttpd.git support: add Python ports of the Perl support scripts git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/trunk@1934905 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/configure.in b/configure.in index 2ba4c6f710..3f51b79dc3 100644 --- a/configure.in +++ b/configure.in @@ -1071,6 +1071,15 @@ if test "x$perlbin" = "x"; then fi AC_SUBST(perlbin) +pythonbin=`$ac_aux_dir/PrintPath python3` +if test "x$pythonbin" = "x"; then + pythonbin=`$ac_aux_dir/PrintPath python` +fi +if test "x$pythonbin" = "x"; then + pythonbin="/replace/with/path/to/python/interpreter" +fi +AC_SUBST(pythonbin) + dnl If we are running on a BSD variant, see if we need to use the BSD .include syntax. ap_make_include=include @@ -1117,7 +1126,7 @@ AC_SUBST(ap_make_delimiter) dnl Ensure that docs/conf is created. test -d docs/conf||$mkdir_p docs/conf -AC_CONFIG_FILES(docs/conf/httpd.conf docs/conf/extra/httpd-autoindex.conf docs/conf/extra/httpd-dav.conf docs/conf/extra/httpd-default.conf docs/conf/extra/httpd-info.conf docs/conf/extra/httpd-languages.conf docs/conf/extra/httpd-manual.conf docs/conf/extra/httpd-mpm.conf docs/conf/extra/httpd-multilang-errordoc.conf docs/conf/extra/httpd-policy.conf docs/conf/extra/httpd-ssl.conf docs/conf/extra/httpd-userdir.conf docs/conf/extra/httpd-vhosts.conf docs/conf/extra/proxy-html.conf docs/conf/extra/httpd-macro.conf include/ap_config_layout.h support/apxs support/apachectl support/dbmmanage support/envvars-std support/log_server_status support/logresolve.pl support/phf_abuse_log.cgi support/split-logfile build/rules.mk build/pkg/pkginfo build/config_vars.sh) +AC_CONFIG_FILES(docs/conf/httpd.conf docs/conf/extra/httpd-autoindex.conf docs/conf/extra/httpd-dav.conf docs/conf/extra/httpd-default.conf docs/conf/extra/httpd-info.conf docs/conf/extra/httpd-languages.conf docs/conf/extra/httpd-manual.conf docs/conf/extra/httpd-mpm.conf docs/conf/extra/httpd-multilang-errordoc.conf docs/conf/extra/httpd-policy.conf docs/conf/extra/httpd-ssl.conf docs/conf/extra/httpd-userdir.conf docs/conf/extra/httpd-vhosts.conf docs/conf/extra/proxy-html.conf docs/conf/extra/httpd-macro.conf include/ap_config_layout.h support/apxs support/apxs-ng support/apachectl support/dbmmanage support/dbmmanage-ng support/envvars-std support/log_server_status support/log_server_status-ng support/logresolve.pl support/logresolve.py support/phf_abuse_log.cgi support/phf_abuse_log-ng.cgi support/split-logfile support/split-logfile-ng build/rules.mk build/pkg/pkginfo build/config_vars.sh) AC_CONFIG_COMMANDS([default], [$SHELL $srcdir/build/fastgen.sh $srcdir $ac_cv_mkdir_p $BSD_MAKEFILE $APACHE_FAST_OUTPUT_FILES >&AS_MESSAGE_FD], [ac_cv_mkdir_p=$ac_cv_mkdir_p diff --git a/support/Makefile.in b/support/Makefile.in index 4836cc2e57..2e0afd99de 100644 --- a/support/Makefile.in +++ b/support/Makefile.in @@ -1,5 +1,7 @@ -DISTCLEAN_TARGETS = apxs apachectl dbmmanage log_server_status \ - logresolve.pl phf_abuse_log.cgi split-logfile envvars-std +DISTCLEAN_TARGETS = apxs apxs-ng apachectl dbmmanage dbmmanage-ng \ + log_server_status log_server_status-ng logresolve.pl logresolve.py \ + phf_abuse_log.cgi phf_abuse_log-ng.cgi split-logfile split-logfile-ng \ + envvars-std CLEAN_TARGETS = suexec @@ -17,7 +19,7 @@ install: @test -d $(DESTDIR)$(sbindir) || $(MKINSTALLDIRS) $(DESTDIR)$(sbindir) @test -d $(DESTDIR)$(libexecdir) || $(MKINSTALLDIRS) $(DESTDIR)$(libexecdir) @cp -p $(top_builddir)/server/httpd.exp $(DESTDIR)$(libexecdir) - @for i in apxs dbmmanage; do \ + @for i in apxs apxs-ng dbmmanage dbmmanage-ng; do \ if test -f "$(builddir)/$$i"; then \ cp -p $$i $(DESTDIR)$(bindir); \ chmod 755 $(DESTDIR)$(bindir)/$$i; \ diff --git a/support/apxs-ng.in b/support/apxs-ng.in new file mode 100644 index 0000000000..44e78a8abb --- /dev/null +++ b/support/apxs-ng.in @@ -0,0 +1,775 @@ +#!@pythonbin@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# apxs-ng -- APache eXtenSion tool (Python port of the historical Perl apxs) +# +# This is a behavior-compatible reimplementation of support/apxs.in in +# Python 3. It is intended to produce byte-identical output and side +# effects to the Perl version for all documented options (-g, -q, -c, +# -i, -e, -a, -A). + +import os +import re +import sys +import subprocess + +# +# Configuration +# + +# These tokens are substituted by ./configure (see AC_CONFIG_FILES), +# exactly as in the Perl apxs.in. +EXP_BINDIR = "@exp_bindir@" +EXP_INSTALLBUILD = "@exp_installbuilddir@" +MOD_SO_ENABLED = "@MOD_SO_ENABLED@" + +PROG = "apxs" # keep the historical name in all messages/errors + + +def error(msg): + sys.stderr.write("%s:Error: %s.\n" % (PROG, msg)) + + +def notice(msg): + sys.stderr.write("%s\n" % msg) + + +# are we building in a cross compile environment? If so, destdir contains +# the base directory of the cross compiled environment, otherwise destdir +# is the empty string. +def compute_destdir(): + argv0 = sys.argv[0] + idx = argv0.rfind(EXP_BINDIR) + if idx >= 0: + return argv0[0:idx] + return "" + + +config_vars = {} + + +def get_config_vars(path, dest): + try: + fh = open(path, "r") + except IOError as e: + sys.stderr.write("cannot open %s: %s\n" % (path, e.strerror)) + sys.exit(2) + with fh: + for line in fh: + line = line.rstrip("\n") + m = re.match(r"^\s*(.*?)\s*=\s*(.*)$", line) + if m: + dest[m.group(1)] = m.group(2) + + +# internal (CFG_*) variables -- uppercase aliases overridable via -S. +# Stored in a dict keyed by the uppercase var name (without the CFG_ +# prefix), mirroring the Perl $CFG_ lexicals. +internal_var_names = set("""TARGET CC CFLAGS CFLAGS_SHLIB LD_SHLIB + LDFLAGS_SHLIB LIBS_SHLIB PREFIX SBINDIR INCLUDEDIR LIBEXECDIR + SYSCONFDIR""".split()) + +cfg = {} + + +def _expand(value, seen=None): + """Reproduce the Perl `eval qq(...)` interpolation used by get_vars. + + The raw make-style value first has all parentheses stripped + (s/[()]//g in the original), turning $(foo) into $foo, then $foo and + ${foo} references are interpolated against config_vars. We resolve + recursively so chained values (prefix -> exec_prefix -> sbindir) + collapse to their final string, matching the dependency-ordered + lexical interpolation the Perl version relied on. + """ + if seen is None: + seen = set() + # strip parens: $(includedir) -> $includedir (matches s/[()]//g) + value = value.replace("(", "").replace(")", "") + + def repl(m): + name = m.group(1) or m.group(2) + if name in seen: + return "" # guard against pathological self-reference + if name in config_vars: + seen.add(name) + out = _expand(config_vars[name], seen) + seen.discard(name) + return out + return "" # undefined lexical interpolates to empty, as in Perl qq + + return re.sub(r"\$\{(\w+)\}|\$(\w+)", repl, value) + + +def get_vars(*args): + result = "" + # NOTE: `ok` is intentionally NOT reset per-argument. The historical + # Perl get_vars() declared `my $ok = 0;` once outside the loop, so once + # any argument resolves (via config_vars or the internal CFG_* vars) + # every *later* argument that would need the internal branch -- e.g. + # TARGET, which has no lowercase key in config_vars -- is silently + # skipped, and a later invalid name raises no error. We preserve that + # quirk verbatim for byte-identical -q output. + ok = False + for arg in args: + if arg in config_vars or arg.lower() in config_vars: + val = config_vars[arg] if arg in config_vars else config_vars[arg.lower()] + result += _expand(val) + result += ";;" + ok = True + if not ok: + if arg in internal_var_names or arg.lower() in internal_var_names: + key = arg if arg in internal_var_names else arg.lower() + key = key.upper() + val = cfg.get(key) + if val is not None: + result += val + result += ";;" + ok = True + if not ok: + error("Invalid query string `%s'" % arg) + sys.exit(1) + if result.endswith(";;"): + result = result[:-2] + return result + + +# +# argument parsing -- faithful port of the Perl Getopts() that supports +# ':' (single value) and '+' (appended list) option suffixes, clustering, +# and "-Xvalue" / "-X value" forms. +# + +class Opts: + def __init__(self): + self.n = "" + self.g = False + self.c = False + self.o = "" + self.D = [] + self.I = [] + self.L = [] + self.l = [] + self.W = [] + self.S = [] + self.e = False + self.i = False + self.a = False + self.A = False + self.q = False + self.h = False + self.p = False + self.v = False + + +def getopts(spec, argv): + """Returns (ok, opts, remaining_argv).""" + opts = Opts() + errs = 0 + argv = list(argv) + list_opts = set("DILlWS") # options with '+' suffix + value_opts = set("no") # options with ':' suffix + + while argv: + cur = argv[0] + m = re.match(r"^-(.)(.*)", cur) + if not m: + break + if cur == "--": + argv.pop(0) + break + first, rest = m.group(1), m.group(2) + if first in spec: + if first in value_opts: + argv.pop(0) + if rest == "": + if not argv: + error("Incomplete option: %s (needs an argument)" % first) + errs += 1 + rest = "" + else: + rest = argv.pop(0) + setattr(opts, first, rest) + elif first in list_opts: + argv.pop(0) + if rest == "": + if not argv: + error("Incomplete option: %s (needs an argument)" % first) + errs += 1 + rest = "" + else: + rest = argv.pop(0) + getattr(opts, first).append(rest) + else: + setattr(opts, first, True) + if rest == "": + argv.pop(0) + else: + argv[0] = "-" + rest + else: + error("Unknown option: %s" % first) + errs += 1 + if rest != "": + argv[0] = "-" + rest + else: + argv.pop(0) + return (errs == 0, opts, argv) + + +def usage(): + sys.stderr.write("Usage: apxs -g [-S =] -n \n") + sys.stderr.write(" apxs -q [-v] [-S =] [ ...]\n") + sys.stderr.write(" apxs -c [-S =] [-o ] [-D [=]]\n") + sys.stderr.write(" [-I ] [-L ] [-l ] [-Wc,]\n") + sys.stderr.write(" [-Wl,] [-p] ...\n") + sys.stderr.write(" apxs -i [-S =] [-a] [-A] [-n ] ...\n") + sys.stderr.write(" apxs -e [-S =] [-a] [-A] [-n ] ...\n") + sys.exit(1) + + +# helper: execute a list of system commands with return code checks +def execute_cmds(cmds): + for cmd in cmds: + notice(cmd) + rc = subprocess.call(cmd, shell=True) + if rc: + # Perl's `system` returns the raw wait status ($?), i.e. the + # exit code shifted left by 8. The historical apxs then prints + # that value shifted left by 8 *again*, so a child exiting 1 is + # reported as rc=65536. Reconstruct $? from the exit code to + # reproduce that number exactly. + status = (rc << 8) if rc > 0 else rc + error("Command failed with rc=%d\n" % (status << 8)) + sys.exit(1) + + +def backtick(cmd): + """Run a command via the shell and return its stdout (like Perl ``).""" + return subprocess.check_output(cmd, shell=True).decode("utf-8", "replace") + + +def main(): + destdir = compute_destdir() + + installbuilddir = EXP_INSTALLBUILD + get_config_vars(destdir + installbuilddir + "/config_vars.mk", config_vars) + + # read the configuration variables once (module-level lexicals) + prefix = get_vars("prefix") + CFG_PREFIX = prefix + # exec_prefix, datadir, localstatedir are read for parity though the + # original only uses a subset directly. + get_vars("exec_prefix") + get_vars("datadir") + get_vars("localstatedir") + CFG_TARGET = get_vars("progname") + CFG_SYSCONFDIR = get_vars("sysconfdir") + CFG_CFLAGS = " ".join(get_vars(x) for x in + ("SHLTCFLAGS", "CFLAGS", "NOTEST_CPPFLAGS", "EXTRA_CPPFLAGS", "EXTRA_CFLAGS")) + CFG_LDFLAGS = " ".join(get_vars(x) for x in + ("LDFLAGS", "NOTEST_LDFLAGS", "SH_LDFLAGS")) + includedir = destdir + get_vars("includedir") + CFG_INCLUDEDIR = includedir + CFG_CC = get_vars("CC") + libexecdir = destdir + get_vars("libexecdir") + CFG_LIBEXECDIR = libexecdir + sbindir = get_vars("sbindir") + CFG_SBINDIR = sbindir + + ltflags = os.environ.get("LTFLAGS") or "--silent" + + # seed the CFG_* (internal var) namespace used by get_vars and -S + cfg["PREFIX"] = CFG_PREFIX + cfg["TARGET"] = CFG_TARGET + cfg["SYSCONFDIR"] = CFG_SYSCONFDIR + cfg["CFLAGS"] = CFG_CFLAGS + cfg["LDFLAGS"] = CFG_LDFLAGS + cfg["INCLUDEDIR"] = CFG_INCLUDEDIR + cfg["CC"] = CFG_CC + cfg["LIBEXECDIR"] = CFG_LIBEXECDIR + cfg["SBINDIR"] = CFG_SBINDIR + + # option handling. The spec just enumerates valid option letters; + # which take a value (':') or append to a list ('+') is encoded by + # the value_opts/list_opts sets inside getopts(). + spec = set("qngcoIDLlWSeiaApv") + ok, opt, args = getopts(spec, sys.argv[1:]) + + if not ok: + usage() + if len(args) == 0 and not opt.g and not opt.q: + usage() + if not opt.q and not (opt.g and opt.n) and not opt.i and not opt.c and not opt.e: + usage() + + name = "unknown" + if opt.n != "": + name = opt.n + + # -S var=val overrides + for s in opt.S: + m = re.match(r"^([^=]+)=(.*)$", s) + if m: + var = m.group(1) + val = m.group(2) + oldval = cfg.get(var) + if not (var and oldval): + error("no config variable %s" % var) + usage() + cfg[var] = val + else: + error("malformatted -S option") + usage() + + # refresh local copies that may have been overridden via -S + CFG_PREFIX = cfg["PREFIX"] + CFG_TARGET = cfg["TARGET"] + CFG_SYSCONFDIR = cfg["SYSCONFDIR"] + CFG_CFLAGS = cfg["CFLAGS"] + CFG_LDFLAGS = cfg["LDFLAGS"] + CFG_INCLUDEDIR = cfg["INCLUDEDIR"] + CFG_CC = cfg["CC"] + CFG_LIBEXECDIR = cfg["LIBEXECDIR"] + CFG_SBINDIR = cfg["SBINDIR"] + + # + # Initial shared object support check + # + if MOD_SO_ENABLED != "yes": + error("Sorry, no shared object support for Apache") + error("available under your platform. Make sure") + error("the Apache module mod_so is compiled into") + error("the server binary") + sys.exit(1) + + # + # -g : SAMPLE MODULE SOURCE GENERATION + # + if opt.g: + if os.path.isdir(name): + error("Directory `%s' already exists. Remove first" % name) + sys.exit(1) + + data = DATA + data = data.replace("%NAME%", name) + data = data.replace("%TARGET%", CFG_TARGET) + data = data.replace("%PREFIX%", prefix) + data = data.replace("%INSTALLBUILDDIR%", installbuilddir) + + m = re.match(r"(.+)-=#=-\n(.+)-=#=-\n(.+)", data, re.S) + mkf, mods, src = m.group(1), m.group(2), m.group(3) + + notice("Creating [DIR] %s" % name) + os.system("mkdir %s" % name) + notice("Creating [FILE] %s/Makefile" % name) + with open("%s/Makefile" % name, "w") as fp: + fp.write(mkf) + notice("Creating [FILE] %s/modules.mk" % name) + with open("%s/modules.mk" % name, "w") as fp: + fp.write(mods) + notice("Creating [FILE] %s/mod_%s.c" % (name, name)) + with open("%s/mod_%s.c" % (name, name), "w") as fp: + fp.write(src) + notice("Creating [FILE] %s/.deps" % name) + os.system("touch %s/.deps" % name) + sys.exit(0) + + # + # -q : QUERY INFORMATION + # + if opt.q: + if len(args) >= 1: + result = get_vars(*args) + sys.stdout.write("%s\n" % result) + else: + # -q without var name prints all variables and their values + if opt.v: + vars_sorted = sorted(config_vars.keys(), key=lambda s: s.upper()) + width = 0 + for k in vars_sorted: + if len(k) > width: + width = len(k) + for k in vars_sorted: + sys.stdout.write("%-*s = %s\n" % (width, k, config_vars[k])) + else: + for k in config_vars: + sys.stdout.write("%s=%s\n" % (k, config_vars[k])) + + apr_config = destdir + get_vars("APR_CONFIG") + if not (os.path.isfile(apr_config) and os.access(apr_config, os.X_OK)): + error("%s not found!" % apr_config) + sys.exit(1) + + apr_major_version = int(backtick("%s --version" % apr_config).split(".")[0]) + + apu_config = "" + if apr_major_version < 2: + apu_config = destdir + get_vars("APU_CONFIG") + if not (os.path.isfile(apu_config) and os.access(apu_config, os.X_OK)): + error("%s not found!" % apu_config) + sys.exit(1) + + libtool = backtick("%s --apr-libtool" % apr_config).rstrip("\n") + apr_includedir = backtick("%s --includes" % apr_config).rstrip("\n") + apu_includedir = "" + if apr_major_version < 2: + apu_includedir = backtick("%s --includes" % apu_config).rstrip("\n") + + # + # -c : SHARED OBJECT COMPILATION + # + if opt.c: + srcs = [] + objs = [] + for f in args: + if f.endswith(".c"): + srcs.append(f) + else: + objs.append(f) + + # determine output file + if opt.o == "": + if srcs: + dso_file = re.sub(r"\.[^.]+$", ".la", srcs[0]) + elif objs: + dso_file = re.sub(r"\.[^.]+$", ".la", objs[0]) + else: + dso_file = "mod_unknown.la" + else: + dso_file = re.sub(r"\.[^.]+$", ".la", opt.o) + + cmds = [] + compile_opt = "" + for w in opt.W: + m = re.match(r"^\s*c,(.*)$", w) + if m: + compile_opt += m.group(1) + " " + for inc in opt.I: + compile_opt += "-I%s " % inc + for d in opt.D: + compile_opt += "-D%s " % d + + cflags = CFG_CFLAGS + for s in srcs: + slo = re.sub(r"\.c$", ".slo", s) + lo = re.sub(r"\.c$", ".lo", s) + cmds.append("%s %s --mode=compile %s %s -I%s %s %s %s -c -o %s %s && touch %s" % + (libtool, ltflags, CFG_CC, cflags, CFG_INCLUDEDIR, + apr_includedir, apu_includedir, compile_opt, lo, s, slo)) + objs.insert(0, lo) + + # create link command + lo_all = "" + for o in objs: + lo_all += " " + o + + link_opt = "" + for w in opt.W: + m = re.match(r"^\s*l,(.*)$", w) + if m: + link_opt += m.group(1) + " " + for L in opt.L: + link_opt += " -L%s" % L + for l in opt.l: + link_opt += " -l%s" % l + + ldflags = CFG_LDFLAGS + if opt.p: + apr_libs = backtick("%s --cflags --ldflags --link-libtool --libs" % apr_config).rstrip("\n") + apu_libs = "" + if apr_major_version < 2: + apu_libs = backtick("%s --ldflags --link-libtool --libs" % apu_config).rstrip("\n") + link_opt += " " + apu_libs + " " + apr_libs + else: + apr_ldflags = backtick("%s --ldflags" % apr_config).rstrip("\n") + link_opt += " -rpath %s -module -avoid-version %s" % (CFG_LIBEXECDIR, apr_ldflags) + + cmds.append("%s %s --mode=link %s %s -o %s %s %s" % + (libtool, ltflags, CFG_CC, ldflags, dso_file, link_opt, lo_all)) + + execute_cmds(cmds) + + # allow one-step compilation and installation + if opt.i or opt.e: + args = [dso_file] + + # + # -i / -e : SHARED OBJECT INSTALLATION + # + if opt.i or opt.e: + lmd = [] + cmds = [] + for f in args: + # ack all potential gcc, hp/ux, win32+os2+aix and os/x extensions + if not re.search(r"(\.so$|\.la$|\.sl$|\.dll$|\.dylib$|)", f): + error("file %s is not a shared object" % f) + sys.exit(1) + t = re.sub(r"^.+/([^/]+)$", r"\1", f) + # use .so unambigiously for installed shared library modules + t = re.sub(r"\.[^./\\]+$", ".so", t) + if opt.i: + cmds.append(destdir + "%s/instdso.sh SH_LIBTOOL='%s' %s %s" % + (installbuilddir, libtool, f, CFG_LIBEXECDIR)) + cmds.append("chmod 755 %s/%s" % (CFG_LIBEXECDIR, t)) + + # determine module symbolname and filename + filename = "" + if name == "unknown": + name = "" + base = re.sub(r"\.[^.]+$", "", f) + if os.path.isfile("%s.c" % base): + with open("%s.c" % base, "r") as cfp: + content = cfp.read() + m = re.search(r".*AP_DECLARE_MODULE\s*\(\s*([a-zA-Z0-9_]+)\s*\)\s*=.*", content, re.S) + if not m: + m = re.search(r".*module\s+(?:AP_MODULE_DECLARE_DATA\s+)?([a-zA-Z0-9_]+)_module\s*=\s*.*", content, re.S) + if m: + name = m.group(1) + filename = "%s.c" % base + filename = re.sub(r"^[^/]+/", "", filename) + if name == "": + m = re.match(r".*mod_([a-zA-Z0-9_]+)(\..+|$)", base) + if m: + name = m.group(1) + filename = base + filename = re.sub(r"^[^/]+/", "", filename) + if name == "": + error("Sorry, cannot determine bootstrap symbol name") + error("Please specify one with option `-n'") + sys.exit(1) + if filename == "": + filename = "mod_%s.c" % name + dir_ = CFG_LIBEXECDIR + dir_ = re.sub(r"^%s/?" % re.escape(CFG_PREFIX), "", dir_) + dir_ = re.sub(r"(.)$", r"\1/", dir_) + t = re.sub(r"\.la$", ".so", t) + lmd.append("LoadModule %-18s %s" % ("%s_module" % name, "%s%s" % (dir_, t))) + + execute_cmds(cmds) + + # activate module via LoadModule/AddModule directive + if opt.a or opt.A: + conf = "%s/%s.conf" % (CFG_SYSCONFDIR, CFG_TARGET) + if not os.path.isfile(conf): + error("Config file %s not found" % conf) + sys.exit(1) + + with open(conf, "r") as fp: + content = fp.read() + + if not re.search(r"\n#?\s*LoadModule\s+", content): + error("Activation failed for custom %s file." % conf) + error("At least one `LoadModule' directive already has to exist") + sys.exit(1) + + c = "#" if opt.A else "" + for entry in lmd: + what = "preparing" if opt.A else "activating" + lmd_re = re.sub(r"\s+", r"\\s+", entry) + + if not re.search(r"\n#?\s*" + lmd_re, content): + # find everything up to and including the LAST LoadModule + # (greedy), to count open/closed before it. + bm = re.search(r"^(.*\n)#?\s*LoadModule\s+[^\n]+\n", content, re.S) + before = bm.group(1) if bm else "" + + cntopen = len(re.findall(r"^\s*<[^/].*$", before, re.M)) + cntclose = len(re.findall(r"^\s* +** SetHandler %NAME% +** +** +** Then after restarting Apache via +** +** $ apachectl restart +** +** you immediately can request the URL /%NAME% and watch for the +** output of this module. This can be achieved for instance via: +** +** $ lynx -mime_header http://localhost/%NAME% +** +** The output should be similar to the following one: +** +** HTTP/1.1 200 OK +** Date: Tue, 31 Mar 1998 14:42:22 GMT +** Server: Apache/1.3.4 (Unix) +** Connection: close +** Content-Type: text/html +** +** The sample page from mod_%NAME%.c +*/ + +#include "httpd.h" +#include "http_config.h" +#include "http_protocol.h" +#include "ap_config.h" + +/* The sample content handler */ +static int %NAME%_handler(request_rec *r) +{ + if (strcmp(r->handler, "%NAME%")) { + return DECLINED; + } + r->content_type = "text/html"; + + if (!r->header_only) + ap_rputs("The sample page from mod_%NAME%.c\n", r); + return OK; +} + +static void %NAME%_register_hooks(apr_pool_t *p) +{ + ap_hook_handler(%NAME%_handler, NULL, NULL, APR_HOOK_MIDDLE); +} + +/* Dispatch list for API hooks */ +module AP_MODULE_DECLARE_DATA %NAME%_module = { + STANDARD20_MODULE_STUFF, + NULL, /* create per-dir config structures */ + NULL, /* merge per-dir config structures */ + NULL, /* create per-server config structures */ + NULL, /* merge per-server config structures */ + NULL, /* table of config file commands */ + %NAME%_register_hooks /* register hooks */ +}; + +""" + + +if __name__ == "__main__": + main() diff --git a/support/dbmmanage-ng.in b/support/dbmmanage-ng.in new file mode 100644 index 0000000000..3c42003597 --- /dev/null +++ b/support/dbmmanage-ng.in @@ -0,0 +1,417 @@ +#!@pythonbin@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# dbmmanage-ng -- Python port of the historical Perl dbmmanage. +# +# This is a behavior-compatible reimplementation of support/dbmmanage.in +# in Python 3. It manages user/password DBM files for Apache auth. +# +# usage: dbmmanage + +import os +import re +import sys +import base64 +import hashlib +import getpass +import random +import dbm + +# Python's crypt module is deprecated in 3.11+ and removed in 3.13. The +# Perl original uses crypt(), so we use it too -- but degrade gracefully +# if it is unavailable on this interpreter. +try: + import crypt as _crypt +except ImportError: + _crypt = None + + +# The set of characters used to build salts: [./0-9A-Za-z], matching the +# Perl genseed()/randchar() range. +_SALT_CHARS = "./0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" + +# Default hashing method, like the Perl original (crypt on Unix). +hash_method = "crypt" + + +def usage(): + # Commands are listed sorted, like "join '|', sort keys %dbmc::". + cmds = "|".join(sorted(COMMANDS.keys())) + sys.stderr.write( + "Usage: dbmmanage [enc] dbname command " + "[username [pw [group[,group] [comment]]]]\n" + "\n" + " where enc is -d for crypt hashing (default except on Win32, Netware)\n" + " -m for MD5 hashing (default on Win32, Netware)\n" + " -s for SHA1 hashing\n" + " -p for plaintext\n" + "\n" + " command is one of: " + cmds + "\n" + "\n" + " pw of . for update command retains the old password\n" + " pw of - (or blank) for update command prompts for the password\n" + "\n" + " groups or comment of . (or blank) for update command retains old values\n" + " groups or comment of - for update command clears the existing value\n" + " groups or comment of - for add and adduser commands is the empty value\n" + ) + sys.exit(1) + + +def die(msg): + sys.stderr.write(msg) + sys.exit(1) + + +def randchar(n=1): + return "".join(random.choice(_SALT_CHARS) for _ in range(n)) + + +def saltpw_crypt(): + # The Perl original optionally uses a "newstyle" salt on bsdos; that is + # an obscure edge case, so we always use the traditional 2-char salt. + return randchar(2) + + +def hashpw_crypt(pw, salt=None): + if _crypt is None: + die("dbmmanage: crypt hashing is not available on this Python " + "interpreter.\nPlease use a different hashing option (-m, -s, -p).\n") + if not salt: + salt = saltpw_crypt() + return _crypt.crypt(pw, salt) + + +def saltpw_md5(): + return randchar(8) + + +# Apache's apr1 / md5crypt algorithm. Python's stdlib has no apr1 +# implementation, so we implement it here. This is the classic md5crypt +# routine with the magic string "$apr1$". Given the same (pw, salt) it +# produces the identical $apr1$$ string as htpasswd / Apache. +def apache_md5_crypt(pw, salt): + magic = "$apr1$" + pw_b = pw.encode("utf-8", "surrogateescape") + + # If a full $apr1$salt$ string was passed in (e.g. when verifying), + # extract just the salt portion. + if salt.startswith(magic): + salt = salt[len(magic):] + salt = salt.split("$", 1)[0] + salt = salt[:8] + salt_b = salt.encode("ascii") + + # Primary digest: password + magic + salt. + ctx = hashlib.md5(pw_b + magic.encode("ascii") + salt_b) + + # Alternate digest: password + salt + password. + alt = hashlib.md5(pw_b + salt_b + pw_b).digest() + + # Add as many chars of the alternate digest as the password length. + pw_len = len(pw_b) + i = pw_len + while i > 0: + ctx.update(alt[:16] if i > 16 else alt[:i]) + i -= 16 + + # For each bit of the password length, add either a NUL byte or the + # first byte of the password. + i = pw_len + while i: + if i & 1: + ctx.update(b"\x00") + else: + ctx.update(pw_b[:1]) + i >>= 1 + + final = ctx.digest() + + # 1000 iterations of strengthening. + for i in range(1000): + ctx = hashlib.md5() + if i & 1: + ctx.update(pw_b) + else: + ctx.update(final) + if i % 3: + ctx.update(salt_b) + if i % 7: + ctx.update(pw_b) + if i & 1: + ctx.update(final) + else: + ctx.update(pw_b) + final = ctx.digest() + + # Custom base64 encoding (md5crypt order/alphabet: ./0-9A-Za-z). + itoa64 = "./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + + def _to64(v, n): + out = [] + for _ in range(n): + out.append(itoa64[v & 0x3f]) + v >>= 6 + return "".join(out) + + out = "" + out += _to64((final[0] << 16) | (final[6] << 8) | final[12], 4) + out += _to64((final[1] << 16) | (final[7] << 8) | final[13], 4) + out += _to64((final[2] << 16) | (final[8] << 8) | final[14], 4) + out += _to64((final[3] << 16) | (final[9] << 8) | final[15], 4) + out += _to64((final[4] << 16) | (final[10] << 8) | final[5], 4) + out += _to64(final[11], 2) + + return magic + salt + "$" + out + + +def hashpw_md5(pw, salt=None): + if not salt: + salt = saltpw_md5() + return apache_md5_crypt(pw, salt) + + +def hashpw_sha1(pw, salt=None): + # '{SHA}' + base64(sha1(pw)). The Perl original used unpadded base64 + # from Digest::SHA1 then appended a literal "="; the de-facto correct + # Apache htpasswd {SHA} format is the standard *padded* base64 (28 + # chars ending in "=" for a 20-byte digest), which base64.b64encode + # produces directly. + return "{SHA}" + base64.b64encode( + hashlib.sha1(pw.encode("utf-8", "surrogateescape")).digest() + ).decode("ascii") + + +def hashpw(pw, salt=None): + if hash_method == "md5": + return hashpw_md5(pw, salt) + elif hash_method == "sha1": + return hashpw_sha1(pw, salt) + elif hash_method == "crypt": + return hashpw_crypt(pw, salt) + return pw # otherwise return plaintext + + +def prompt_pass(prompt="Enter password:"): + pwd = getpass.getpass(prompt) + if not len(pwd): + die("Can't use empty password!\n") + return pwd + + +# --------------------------------------------------------------------------- +# DBM access helpers. dbm stores bytes; we use latin-1 consistently so that +# arbitrary hashed-password bytes round-trip cleanly. +# --------------------------------------------------------------------------- + +_ENC = "latin-1" + + +def db_has(db, key): + return key.encode(_ENC) in db + + +def db_get(db, key): + return db[key.encode(_ENC)].decode(_ENC) + + +def db_set(db, key, value): + db[key.encode(_ENC)] = value.encode(_ENC) + + +def db_del(db, key): + del db[key.encode(_ENC)] + + +def db_keys(db): + return [k.decode(_ENC) for k in db.keys()] + + +# --------------------------------------------------------------------------- +# Commands (the Perl dbmc:: subs). These operate on the module-level state +# (db, key, hashed_pwd, groups, comment, is_update). +# --------------------------------------------------------------------------- + +class State: + db = None + key = None + hashed_pwd = None + groups = None + comment = None + is_update = False + + +S = State() + + +def cmd_add(): + if not S.hashed_pwd: + die("Can't use empty password!\n") + if not S.is_update: + if db_has(S.db, S.key): + die("Sorry, user `%s' already exists!\n" % S.key) + groups = S.groups or "" + comment = S.comment or "" + if groups == "-": + groups = "" + if comment == "-": + comment = "" + if comment: + groups = groups + ":" + comment + value = S.hashed_pwd + if groups: + value = value + ":" + groups + db_set(S.db, S.key, value) + action = "updated" if S.is_update else "added" + print("User %s %s with password hashed to %s using %s" + % (S.key, action, value, hash_method)) + + +def cmd_adduser(): + value = prompt_pass("New password:") + if prompt_pass("Re-type new password:") != value: + die("They don't match, sorry.\n") + S.hashed_pwd = hashpw(value) + cmd_add() + + +def cmd_update(): + if not db_has(S.db, S.key): + die("Sorry, user `%s' doesn't exist!\n" % S.key) + parts = (db_get(S.db, S.key).split(":", 2) + ["", "", ""])[:3] + if S.hashed_pwd == ".": + S.hashed_pwd = parts[0] + if not S.groups or S.groups == ".": + S.groups = parts[1] + if not S.comment or S.comment == ".": + S.comment = parts[2] + if not S.hashed_pwd or S.hashed_pwd == "-": + cmd_adduser() + else: + cmd_add() + + +def cmd_delete(): + if not db_has(S.db, S.key): + die("Sorry, user `%s' doesn't exist!\n" % S.key) + db_del(S.db, S.key) + print("`%s' deleted" % S.key) + + +def cmd_view(): + if S.key: + # Like Perl's $DB{$key}, a missing key yields the empty value. + val = db_get(S.db, S.key) if db_has(S.db, S.key) else "" + print("%s:%s" % (S.key, val)) + else: + for k in db_keys(S.db): + v = db_get(S.db, k) + if v: + print("%s:%s" % (k, v)) + + +def cmd_check(): + global hash_method + if not db_has(S.db, S.key): + die("Sorry, user `%s' doesn't exist!\n" % S.key) + chkpass = (db_get(S.db, S.key).split(":", 2) + ["", "", ""])[0] + testpass = prompt_pass() + if chkpass[:6] == "$apr1$": + hash_method = "md5" + elif chkpass[:5] == "{SHA}": + hash_method = "sha1" + elif len(chkpass) == 13 and chkpass != testpass: + hash_method = "crypt" + else: + hash_method = "plain" + ok = hashpw(testpass, chkpass) == chkpass + print(hash_method + (" password ok" if ok else " password mismatch")) + + +def cmd_import(): + for line in sys.stdin: + line = line.rstrip("\n").rstrip("\r") + if not line: + continue + fields = (line.split(":", 3) + ["", "", "", ""])[:4] + S.key, S.hashed_pwd, S.groups, S.comment = fields + cmd_add() + + +COMMANDS = { + "add": cmd_add, + "adduser": cmd_adduser, + "check": cmd_check, + "delete": cmd_delete, + "import": cmd_import, + "update": cmd_update, + "view": cmd_view, +} + + +def main(): + global hash_method + argv = sys.argv[1:] + + # Consume the enc flag from the front of argv, if present. + if argv and argv[0] == "-d": + argv.pop(0) + hash_method = "crypt" + elif argv and argv[0] == "-m": + argv.pop(0) + hash_method = "md5" + elif argv and argv[0] == "-p": + argv.pop(0) + hash_method = "plain" + elif argv and argv[0] == "-s": + argv.pop(0) + hash_method = "sha1" + + file = argv[0] if len(argv) > 0 else None + command = argv[1] if len(argv) > 1 else None + S.key = argv[2] if len(argv) > 2 else None + S.hashed_pwd = argv[3] if len(argv) > 3 else None + S.groups = argv[4] if len(argv) > 4 else None + S.comment = argv[5] if len(argv) > 5 else None + + if not file or not command or command not in COMMANDS: + usage() + + # remove extension if any: .db, .db?, .pag, .dir + file = re.sub(r"\.(db.?|pag|dir)$", "", file) + + S.is_update = command == "update" + + # view/check open read-only; everything else read-write (create). + if command in ("view", "check"): + flag = "r" + else: + flag = "c" + + try: + S.db = dbm.open(file, flag) + except Exception as e: + die("Can't open %s: %s\n" % (file, e)) + + try: + COMMANDS[command]() + finally: + S.db.close() + + +if __name__ == "__main__": + main() diff --git a/support/log_server_status-ng.in b/support/log_server_status-ng.in new file mode 100644 index 0000000000..66e943448e --- /dev/null +++ b/support/log_server_status-ng.in @@ -0,0 +1,93 @@ +#!@pythonbin@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# +# log_server_status-ng -- Python port of the historical Perl log_server_status. +# +# Log Server Status +# Mark J Cox, UK Web Ltd 1996, mark ukweb.com +# +# This script is designed to be run at a frequent interval by something +# like cron. It connects to the server and downloads the status +# information. It reformats the information to a single line and logs +# it to a file. Make sure the directory $wherelog is writable by the +# user who runs this script. +# + +import re +import socket +import sys +import time + +wherelog = "@exp_logfiledir@/" # Logs will be like "@exp_logfiledir@/19960312" +server = "localhost" # Name of server, could be "www.foo.com" +port = "@PORT@" # Port on server +request = "/server-status/?auto" # Request to send + +ltime = time.localtime(time.time()) + +# day = year + zero-padded month + zero-padded day +day = "%d%02d%02d" % (ltime.tm_year, ltime.tm_mon, ltime.tm_mday) + +# time = zero-padded hours + minutes + seconds, concatenated +now = "%02d%02d%02d" % (ltime.tm_hour, ltime.tm_min, ltime.tm_sec) + +logfile = wherelog + day + +try: + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.connect((server, int(port))) +except OSError as err: + with open(logfile, "a") as out: + out.write("%s:-1:-1:-1:-1:%s\n" % (now, err)) + sys.exit("Couldn't connect to %s:%s : %s\n" % (server, port, err)) + +# Send the request and read the response. +sock.sendall( + ("GET %s HTTP/1.1\r\nHost: %s\r\nConnection: close\r\n\r\n\r\n" + % (request, server)).encode("latin-1") +) + +# Read the entire response, then split into lines (mirrors Perl's <$socket>). +response = b"" +while True: + chunk = sock.recv(4096) + if not chunk: + break + response += chunk +sock.close() + +# Default to empty strings: Perl's undef interpolates to "" (the fields are +# always present on a successful server-status response; this only matters +# if the response is malformed). +requests = idle = number = cpu = "" +for line in response.decode("latin-1", "replace").splitlines(): + m = re.match(r"^BusyWorkers: (\S+)", line) + if m: + requests = m.group(1) + m = re.match(r"^IdleWorkers: (\S+)", line) + if m: + idle = m.group(1) + m = re.search(r"sses: (\S+)", line) + if m: + number = m.group(1) + m = re.match(r"^CPULoad: (\S+)", line) + if m: + cpu = m.group(1) + +with open(logfile, "a") as out: + out.write("%s:%s:%s:%s:%s\n" % (now, requests, idle, number, cpu)) diff --git a/support/logresolve.py.in b/support/logresolve.py.in new file mode 100644 index 0000000000..034c388958 --- /dev/null +++ b/support/logresolve.py.in @@ -0,0 +1,136 @@ +#!@pythonbin@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# logresolve.py -- Python port of the historical Perl logresolve.pl. +# +# usage: logresolve.py outfile +# +# input = Apache/NCSA/.. logfile with IP numbers at start of lines +# output = same logfile with IP addresses resolved to hostnames where +# name lookups succeeded. +# +# this differs from the C based 'logresolve' in that this script +# resolves a number (CHILDREN) of addresses concurrently and sets a +# short timeout (TIMEOUT) for each lookup in order to keep things moving +# quickly. +# +# The original Perl version forked CHILDREN subprocesses and exchanged +# IPs/hostnames over Unix sockets, with a per-lookup alarm(TIMEOUT). +# This Python port reproduces the *semantics* without the fork/socket +# IPC machinery: a concurrent.futures.ThreadPoolExecutor with +# max_workers=CHILDREN performs up to CHILDREN reverse lookups in +# parallel, and the per-lookup timeout is enforced via +# socket.setdefaulttimeout(TIMEOUT) (socket.gethostbyaddr() honours the +# process default socket timeout; it has no timeout argument of its own). +# +# Results are cached in a dict so each unique IP is resolved only once, +# and output lines are emitted in the exact same order as the input +# (logfiles must stay in order). +# +# Concurrency / timeout / order-preservation: +# * Concurrency: ThreadPoolExecutor(max_workers=CHILDREN). Each unique +# IP is submitted exactly once; the cache dict guards against +# resolving the same IP twice. +# * Timeout: socket.setdefaulttimeout(TIMEOUT) bounds each DNS lookup. +# As a belt-and-braces measure the worker also bounds itself via +# future.result(timeout=...) when collecting results. +# * Order: we buffer all input lines (remembering each line's leading +# IP), resolve the unique IPs concurrently, then walk the buffered +# lines in their original order substituting the cached hostname. + +import socket +import sys +from concurrent.futures import ThreadPoolExecutor + +CHILDREN = 40 +TIMEOUT = 5 + + +def nslookup(ip): + """Reverse-resolve an IP to a hostname. + + Equivalent to the Perl gethostbyaddr(gethostbyname($ip), AF_INET). + Returns the resolved hostname, or the original IP on any failure or + timeout (matching the Perl behaviour of leaving the IP as-is). + """ + try: + hostname = socket.gethostbyaddr(ip)[0] + except Exception: + return ip + return hostname if hostname else ip + + +def main(): + # Bound every DNS lookup to TIMEOUT seconds. socket.gethostbyaddr() + # has no timeout argument, so we rely on the process-wide default + # socket timeout, which it honours. + socket.setdefaulttimeout(TIMEOUT) + + # Read the whole logfile, buffering each line and the IP that starts + # it. Order is preserved by replaying this buffer at the end. + lines = [] # list of (ip, rest, had_space); rest keeps its newline + unique_ips = [] # unique IPs in first-seen order (for stable submit) + seen = set() + + for line in sys.stdin: + # split on the FIRST space only; a line with no space is all IP. + parts = line.split(' ', 1) + if len(parts) > 1: + ip = parts[0] + rest = parts[1] + had_space = True + else: + # No space: the whole line is the IP. Strip the trailing + # newline so it resolves cleanly, but remember the line + # ending so we can reproduce it verbatim on output. + stripped = line.rstrip('\n') + ip = stripped + rest = line[len(stripped):] + had_space = False + lines.append((ip, rest, had_space)) + if ip not in seen: + seen.add(ip) + unique_ips.append(ip) + + # Resolve all unique IPs concurrently, up to CHILDREN at a time. + cache = {} + if unique_ips: + with ThreadPoolExecutor(max_workers=CHILDREN) as pool: + futures = {ip: pool.submit(nslookup, ip) for ip in unique_ips} + for ip, fut in futures.items(): + try: + cache[ip] = fut.result(timeout=TIMEOUT + 1) + except Exception: + # On timeout/failure leave the IP unchanged. + cache[ip] = ip + + # Emit lines in the original input order, substituting hostnames. + out = sys.stdout + for ip, rest, had_space in lines: + host = cache.get(ip, ip) + if had_space: + out.write("%s %s" % (host, rest)) + else: + # No space in the original line: emit host then the original + # line ending (rest is "\n", "" with EOF no-newline, etc.). + out.write(host) + if rest: + out.write(rest) + + +if __name__ == "__main__": + main() diff --git a/support/phf_abuse_log-ng.cgi.in b/support/phf_abuse_log-ng.cgi.in new file mode 100644 index 0000000000..b017f3c7b8 --- /dev/null +++ b/support/phf_abuse_log-ng.cgi.in @@ -0,0 +1,62 @@ +#!@pythonbin@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# +# phf_abuse_log-ng.cgi -- Python port of the historical Perl phf_abuse_log.cgi. +# +# This script is used to detect people trying to abuse the security hole which +# existed in A CGI script direstributed with Apache 1.0.3 and earlier versions. +# You can redirect them to here using the "" suggestion +# in httpd.conf. +# +# The format logged to is +# "[date] remote_addr remote_host [date] referrer user_agent". + +import os +import sys +import time + +LOG = "/var/log/phf_log" + +# Perl's ctime() produces e.g. "Wed Jun 2 14:30:00 2026\n"; time.ctime() +# matches that format and has no trailing newline to strip. +when = time.ctime() + +remote_addr = os.environ.get("REMOTE_ADDR", "") +remote_host = os.environ.get("REMOTE_HOST", "") +# NOTE: the Perl original had a bug here -- it referenced $ENV{$HTTP_REFERER} +# (with a stray leading $), so it looked up the env var *named by* the value of +# the undefined Perl variable $HTTP_REFERER, which resolved to $ENV{""} and thus +# always logged an empty string. We do the correct thing and read HTTP_REFERER. +http_referer = os.environ.get("HTTP_REFERER", "") +http_user_agent = os.environ.get("HTTP_USER_AGENT", "") + +http_via = os.environ.get("HTTP_VIA", "") +if http_via: + http_user_agent += " via " + http_via + +try: + log = open(LOG, "a") +except OSError as e: + sys.exit("boo hoo, phf_log " + str(e)) + +with log: + log.write("[%s] %s %s %s %s\n" % (when, remote_addr, remote_host, + http_referer, http_user_agent)) + +sys.stdout.write("Content-type: text/html\r\n\r\n" + "Smile, you're on Candid Camera.\n") diff --git a/support/split-logfile-ng.in b/support/split-logfile-ng.in new file mode 100644 index 0000000000..3379daa171 --- /dev/null +++ b/support/split-logfile-ng.in @@ -0,0 +1,92 @@ +#!@pythonbin@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# split-logfile-ng -- Python port of the historical Perl split-logfile. +# +# This script will take a combined Web server access +# log file and break its contents into separate files. +# It assumes that the first field of each line is the +# virtual host identity (put there by "%v"), and that +# the logfiles should be named that+".log" in the current +# directory. +# +# The combined log file is read from stdin. Records read +# will be appended to any existing log files. + +import re +import sys + + +def main(): + # + # Operate on raw bytes, not decoded text, to match the Perl original + # exactly: Perl reads STDIN as bytes, so its \s matches only ASCII + # whitespace and lc() lowercases only ASCII A-Z. Python's bytes + # regexes are inherently ASCII-only and bytes.lower() lowercases only + # ASCII, so reading sys.stdin.buffer reproduces Perl's behavior on + # non-ASCII input (Unicode whitespace / case folding would otherwise + # diverge). + # + log_file = {} + + for log_line in sys.stdin.buffer: + # + # Get the first token from the log record; it's the + # identity of the virtual host to which the record + # applies. + # + vhost = re.split(rb"\s", log_line, maxsplit=1)[0] + # + # Normalize the virtual host name to all lowercase. + # If it's blank, the request was handled by the default + # server, so supply a default name. This shouldn't + # happen, but caution rocks. + # + vhost = vhost.lower() or b"access" + # + # if the vhost contains a "/" or "\", it is illegal so just use + # the default log to avoid any security issues due if it is interprted + # as a directory separator. + # + if re.search(rb"[/\\]", vhost): + vhost = b"access" + # + # If the log file for this virtual host isn't opened + # yet, do it now. + # + if vhost not in log_file: + try: + log_file[vhost] = open(vhost + b".log", "ab") + except IOError: + sys.stderr.write("Can't open %s.log\n" + % vhost.decode("latin-1")) + sys.exit(1) + # + # Strip off the first token (which may be null in the + # case of the default server), and write the edited + # record to the current log file. + # + log_line = re.sub(rb"^\S*\s+", b"", log_line) + log_file[vhost].write(log_line) + + for fh in log_file.values(): + fh.close() + sys.exit(0) + + +if __name__ == "__main__": + main()