From: (no author) <(no author)@unknown> Date: Fri, 18 Jun 1999 18:39:40 +0000 (+0000) Subject: This commit was manufactured by cvs2svn to create branch 'avendor'. X-Git-Tag: mpm-3~1 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=68d677993ab7f2e7bc8e293d4a646102080a2339;p=thirdparty%2Fapache%2Fhttpd.git This commit was manufactured by cvs2svn to create branch 'avendor'. git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/branches/avendor@83346 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/build/binbuild.sh b/build/binbuild.sh new file mode 100755 index 00000000000..3279148fcbc --- /dev/null +++ b/build/binbuild.sh @@ -0,0 +1,221 @@ +#!/bin/sh +# +# binbuild.sh - Builds an Apache binary distribution. +# Initially written by Lars Eilebrecht . +# +# This script falls under the Apache License. +# See http://www.apache.org/docs/LICENSE + + +APDIR=$(basename $(pwd)) +VER=$(echo $APDIR |sed s/apache-//) +OS=$(src/helpers/GuessOS) +USER="$(src/helpers/buildinfo.sh -n %u@%h%d)" +TAR="$(src/helpers/findprg.sh tar)" +GTAR="$(src/helpers/findprg.sh gtar)" +GZIP="$(src/helpers/findprg.sh gzip)" +CONFIGPARAM="--with-layout=BinaryDistribution --enable-module=most --enable-shared=max" + +if [ ! -f ./ABOUT_APACHE ] +then + echo "ERROR: The current directory contains no valid Apache distribution." + echo "Please change the directory to the top level directory of a freshly" + echo "unpacked Apache 1.3 source distribution and re-execute the script" + echo "'./src/helpers/bindbuild.sh'." + exit 1; +fi + +if [ -d ./CVS ] +then + echo "ERROR: The current directory is a CVS checkout of Apache." + echo "Only a standard Apache 1.3 source distribution should be used to" + echo "create a binary distribution." + exit 1; +fi + +echo "Building Apache $VER binary distribution..." +echo "Platform is \"$OS\"..." + +( echo "Build log for Apache binary distribution" && \ + echo "----------------------------------------------------------------------" && \ + ./configure $CONFIGPARAM && \ + echo "----------------------------------------------------------------------" && \ + make clean && \ + rm -rf bindist install-bindist.sh *.bindist + echo "----------------------------------------------------------------------" && \ + make && \ + echo "----------------------------------------------------------------------" && \ + make install-quiet root="bindist/" && \ + echo "----------------------------------------------------------------------" && \ + make clean && \ + echo "----------------------------------------------------------------------" && \ + echo "[EOF]" \ +) > build.log 2>&1 + +if [ ! -f ./bindist/bin/httpd ] +then + echo "ERROR: Failed to build Apache. See \"build.log\" for details." + exit 1; +fi + +echo "Binary images successfully created..." +echo "Creating supplementary files..." + +( echo " " && \ + echo "Apache $VER binary distribution" && \ + echo "================================" && \ + echo " " && \ + echo "This binary distribution is usable on a \"$OS\"" && \ + echo "system and was built by \"$USER\"." && \ + echo "" && \ + echo "The distribution contains all standard Apache modules as shared" && \ + echo "objects. This allows you to enable or disable particular modules" && \ + echo "with the LoadModule/AddModule directives in the configuration file" && \ + echo "without the need to re-compile Apache." && \ + echo "" && \ + echo "See \"INSTALL.bindist\" on how to install the distribution." && \ + echo " " && \ + echo "NOTE: Please do not send support-related mails to the address mentioned" && \ + echo " above or to any member of the Apache Group! Support questions" && \ + echo " should be directed to the \"comp.infosystems.www.servers.unix\"" && \ + echo " or \"comp.infosystems.www.servers.ms-windows\" newsgroup" && \ + echo " (as appropriate for the platform you use), where some of the" && \ + echo " Apache team lurk, in the company of many other Apache gurus" && \ + echo " who should be able to help." && \ + echo " If you think you found a bug in Apache or have a suggestion please" && \ + echo " visit the bug report page at http://www.apache.org/bug_report.html" && \ + echo " " && \ + echo "----------------------------------------------------------------------" && \ + ./bindist/bin/httpd -V && \ + echo "----------------------------------------------------------------------" \ +) > README.bindist +cp README.bindist ../apache-$VER-$OS.README + +( echo " " && \ + echo "Apache $VER binary installation" && \ + echo "================================" && \ + echo " " && \ + echo "To install this binary distribution you have to execute the installation" && \ + echo "script \"install-bindist.sh\" in the top-level directory of the distribution." && \ + echo " " && \ + echo "The script takes the ServerRoot directory into which you want to install" && \ + echo "Apache as an option. If you ommit the option the default path" && \ + echo "\"/usr/local/apache\" is used." && \ + echo "Make sure you have write permissions in the target directory, e.g. switch" && \ + echo "to user \"root\" before you execute the script." && \ + echo " " && \ + echo "See \"README.bindist\" for further details about this distribution." && \ + echo " " && \ + echo "Please note that this distribution includes the complete Apache source code." && \ + echo "Therefore you may compile Apache yourself at any time if you have a compiler" && \ + echo "installation on your system." && \ + echo "See \"INSTALL\" for details on how to accomplish this." && \ + echo " " \ +) > INSTALL.bindist + +( echo "#!/bin/sh" && \ + echo "#" && \ + echo "# Usage: install-bindist.sh [ServerRoot]" && \ + echo "# This script installs the Apache binary distribution and" && \ + echo "# was automatically created by binbuild.sh." && \ + echo " " && \ + echo "if [ .\$1 = . ]" && \ + echo "then" && \ + echo " SR=/usr/local/apache" && \ + echo "else" && \ + echo " SR=\$1" && \ + echo "fi" && \ + echo "echo \"Installing binary distribution for platform $OS\"" && \ + echo "echo \"into directory \$SR ...\"" && \ + echo "./src/helpers/mkdir.sh \$SR" && \ + echo "cp -r bindist/proxy \$SR/proxy" && \ + echo "cp -r bindist/man \$SR/man" && \ + echo "cp -r bindist/logs \$SR/logs" && \ + echo "cp -r bindist/libexec \$SR/libexec" && \ + echo "cp -r bindist/include \$SR/include" && \ + echo "cp -r bindist/icons \$SR/icons" && \ + echo "cp -r bindist/cgi-bin \$SR/cgi-bin" && \ + echo "cp -r bindist/bin \$SR/bin" && \ + echo "if [ -d \$SR/conf ]" && \ + echo "then" && \ + echo " echo \"[Preserving existing configuration files.]\"" && \ + echo " cp -r bindist/conf/*.default \$SR/conf/" && \ + echo "else" && \ + echo " cp -r bindist/conf \$SR/conf" && \ + echo "fi" && \ + echo "if [ -d \$SR/htdocs ]" && \ + echo "then" && \ + echo " echo \"[Preserving existing htdocs directory.]\"" && \ + echo "else" && \ + echo " cp -r bindist/htdocs \$SR/htdocs" && \ + echo "fi" && \ + echo "sed -e s%/usr/local/apache%\$SR/% \$SR/conf/httpd.conf.default > \$SR/conf/httpd.conf" && \ + echo "sed -e s%PIDFILE=%PIDFILE=\$SR/% -e s%HTTPD=%HTTPD=\\\"\$SR/% -e \"s%/httpd$%/httpd -d \$SR\\\"%\" bindist/bin/apachectl > \$SR/bin/apachectl" && \ + echo " " && \ + echo "echo \"Ready.\"" && \ + echo "echo \" +--------------------------------------------------------+\"" && \ + echo "echo \" | You now have successfully installed the Apache $VER |\"" && \ + echo "echo \" | HTTP server. To verify that Apache actually works |\"" && \ + echo "echo \" | correctly you now should first check the (initially |\"" && \ + echo "echo \" | created or preserved) configuration files |\"" && \ + echo "echo \" | |\"" && \ + echo "echo \" | \$SR/conf/httpd.conf\"" && \ + echo "echo \" | |\"" && \ + echo "echo \" | and then you should be able to immediately fire up |\"" && \ + echo "echo \" | Apache the first time by running: |\"" && \ + echo "echo \" | |\"" && \ + echo "echo \" | \$SR/bin/apachectl start \"" &&\ + echo "echo \" | |\"" && \ + echo "echo \" | Thanks for using Apache. The Apache Group |\"" && \ + echo "echo \" | http://www.apache.org/ |\"" && \ + echo "echo \" +--------------------------------------------------------+\"" && \ + echo "echo \" \"" \ +) > install-bindist.sh +chmod 755 install-bindist.sh + +sed -e "s%\"/htdocs%\"/usr/local/apache/htdocs%" \ + -e "s%\"/icons%\"/usr/local/apache/icons%" \ + -e "s%\"/cgi-bin%\"/usr/local/apache/cgi-bin%" \ + -e "s%^ServerAdmin.*%ServerAdmin you@your.address%" \ + -e "s%#ServerName.*%#ServerName localhost%" \ + -e "s%Port 8080%Port 80%" \ + bindist/conf/httpd.conf.default > bindist/conf/httpd.conf +cp bindist/conf/httpd.conf bindist/conf/httpd.conf.default + +echo "Creating distribution archive and readme file..." + +if [ ".`grep -i error build.log > /dev/null`" != . ] +then + echo "ERROR: Failed to build Apache. See \"build.log\" for details." + exit 1; +else + if [ ".$GTAR" != . ] + then + $GTAR -zcf ../apache-$VER-$OS.tar.gz -C .. --owner=root --group=root apache-$VER + else + if [ ".$TAR" != . ] + then + $TAR -cf ../apache-$VER-$OS.tar -C .. apache-$VER + if [ ".$GZIP" != . ] + then + $GZIP ../apache-$VER-$OS.tar + fi + else + echo "ERROR: Could not find a 'tar' program!" + echo " Please execute the following commands manually:" + echo " tar -cf ../apache-$VER-$OS.tar ." + echo " gzip ../apache-$VER-$OS.tar" + fi + fi + + if [ -f ../apache-$VER-$OS.tar.gz ] && [ -f ../apache-$VER-$OS.README ] + then + echo "Ready." + echo "You can find the binary archive (apache-$VER-$OS.tar.gz)" + echo "and the readme file (apache-$VER-$OS.README) in the" + echo "parent directory." + exit 0; + else + exit 1; + fi +fi diff --git a/docs/manual/cgi_path.html b/docs/manual/cgi_path.html new file mode 100644 index 00000000000..81bb314ead0 --- /dev/null +++ b/docs/manual/cgi_path.html @@ -0,0 +1,84 @@ + +PATH_INFO Changes in the CGI Environment + + + +

PATH_INFO Changes in the CGI Environment

+ +
+ +

Overview

+ +

As implemented in Apache 1.1.1 and earlier versions, the method +Apache used to create PATH_INFO in the CGI environment was +counterintiutive, and could result in crashes in certain cases. In +Apache 1.2 and beyond, this behavior has changed. Although this +results in some compatibility problems with certain legacy CGI +applications, the Apache 1.2 behavior is still compatible with the +CGI/1.1 specification, and CGI scripts can be easily modified (see below). + +

The Problem

+ +

Apache 1.1.1 and earlier implemented the PATH_INFO and SCRIPT_NAME +environment variables by looking at the filename, not the URL. While +this resulted in the correct values in many cases, when the filesystem +path was overloaded to contain path information, it could result in +errant behavior. For example, if the following appeared in a config +file: +

+     Alias /cgi-ralph /usr/local/httpd/cgi-bin/user.cgi/ralph
+
+

In this case, user.cgi is the CGI script, the "/ralph" +is information to be passed onto the CGI. If this configuration was in +place, and a request came for "/cgi-ralph/script/", the +code would set PATH_INFO to "/ralph/script", and +SCRIPT_NAME to "/cgi-". Obviously, the latter is +incorrect. In certain cases, this could even cause the server to +crash.

+ +

The Solution

+ +

Apache 1.2 and later now determine SCRIPT_NAME and PATH_INFO by +looking directly at the URL, and determining how much of the URL is +client-modifiable, and setting PATH_INFO to it. To use the above +example, PATH_INFO would be set to "/script", and +SCRIPT_NAME to "/cgi-ralph". This makes sense and results +in no server behavior problems. It also permits the script to be +gauranteed that +"http://$SERVER_NAME:$SERVER_PORT$SCRIPT_NAME$PATH_INFO" +will always be an accessable URL that points to the current script, +something which was not neccessarily true with previous versions of +Apache. + +

However, the "/ralph" +information from the Alias directive is lost. This is +unfortunate, but we feel that using the filesystem to pass along this +sort of information is not a recommended method, and a script making +use of it "deserves" not to work. Apache 1.2b3 and later, however, do +provide a workaround. + +

Compatibility with Previous Servers

+ +

It may be neccessary for a script that was designed for earlier +versions of Apache or other servers to need the information that the +old PATH_INFO variable provided. For this purpose, Apache 1.2 (1.2b3 +and later) sets an additional variable, FILEPATH_INFO. This +environment variable contains the value that PATH_INFO would have had +with Apache 1.1.1.

+ +

A script that wishes to work with both Apache 1.2 and earlier +versions can simply test for the existance of FILEPATH_INFO, and use +it if available. Otherwise, it can use PATH_INFO. For example, in +Perl, one might use: +

+    $path_info = $ENV{'FILEPATH_INFO'} || $ENV{'PATH_INFO'};
+
+ +

By doing this, a script can work with all servers supporting the +CGI/1.1 specification, including all versions of Apache.

+ + + + + diff --git a/docs/manual/cgi_path.html.en b/docs/manual/cgi_path.html.en new file mode 100644 index 00000000000..81bb314ead0 --- /dev/null +++ b/docs/manual/cgi_path.html.en @@ -0,0 +1,84 @@ + +PATH_INFO Changes in the CGI Environment + + + +

PATH_INFO Changes in the CGI Environment

+ +
+ +

Overview

+ +

As implemented in Apache 1.1.1 and earlier versions, the method +Apache used to create PATH_INFO in the CGI environment was +counterintiutive, and could result in crashes in certain cases. In +Apache 1.2 and beyond, this behavior has changed. Although this +results in some compatibility problems with certain legacy CGI +applications, the Apache 1.2 behavior is still compatible with the +CGI/1.1 specification, and CGI scripts can be easily modified (see below). + +

The Problem

+ +

Apache 1.1.1 and earlier implemented the PATH_INFO and SCRIPT_NAME +environment variables by looking at the filename, not the URL. While +this resulted in the correct values in many cases, when the filesystem +path was overloaded to contain path information, it could result in +errant behavior. For example, if the following appeared in a config +file: +

+     Alias /cgi-ralph /usr/local/httpd/cgi-bin/user.cgi/ralph
+
+

In this case, user.cgi is the CGI script, the "/ralph" +is information to be passed onto the CGI. If this configuration was in +place, and a request came for "/cgi-ralph/script/", the +code would set PATH_INFO to "/ralph/script", and +SCRIPT_NAME to "/cgi-". Obviously, the latter is +incorrect. In certain cases, this could even cause the server to +crash.

+ +

The Solution

+ +

Apache 1.2 and later now determine SCRIPT_NAME and PATH_INFO by +looking directly at the URL, and determining how much of the URL is +client-modifiable, and setting PATH_INFO to it. To use the above +example, PATH_INFO would be set to "/script", and +SCRIPT_NAME to "/cgi-ralph". This makes sense and results +in no server behavior problems. It also permits the script to be +gauranteed that +"http://$SERVER_NAME:$SERVER_PORT$SCRIPT_NAME$PATH_INFO" +will always be an accessable URL that points to the current script, +something which was not neccessarily true with previous versions of +Apache. + +

However, the "/ralph" +information from the Alias directive is lost. This is +unfortunate, but we feel that using the filesystem to pass along this +sort of information is not a recommended method, and a script making +use of it "deserves" not to work. Apache 1.2b3 and later, however, do +provide a workaround. + +

Compatibility with Previous Servers

+ +

It may be neccessary for a script that was designed for earlier +versions of Apache or other servers to need the information that the +old PATH_INFO variable provided. For this purpose, Apache 1.2 (1.2b3 +and later) sets an additional variable, FILEPATH_INFO. This +environment variable contains the value that PATH_INFO would have had +with Apache 1.1.1.

+ +

A script that wishes to work with both Apache 1.2 and earlier +versions can simply test for the existance of FILEPATH_INFO, and use +it if available. Otherwise, it can use PATH_INFO. For example, in +Perl, one might use: +

+    $path_info = $ENV{'FILEPATH_INFO'} || $ENV{'PATH_INFO'};
+
+ +

By doing this, a script can work with all servers supporting the +CGI/1.1 specification, including all versions of Apache.

+ + + + + diff --git a/docs/manual/dns-caveats.html b/docs/manual/dns-caveats.html new file mode 100644 index 00000000000..e535a36187a --- /dev/null +++ b/docs/manual/dns-caveats.html @@ -0,0 +1,190 @@ + + +Issues Regarding DNS and Apache + + + +

Issues Regarding DNS and Apache

+ +

This page could be summarized with the statement: don't require +Apache to use DNS for any parsing of the configuration files. +If Apache has to use DNS to parse the configuration files then your +server may be subject to reliability problems (it might not boot), or +denial and theft of service attacks (including users able to steal hits +from other users). + +

A Simple Example

+ +Consider this configuration snippet: + +
+    <VirtualHost www.abc.dom>
+    ServerAdmin webgirl@abc.dom
+    DocumentRoot /www/abc
+    </VirtualHost>
+
+ +

In order for Apache to function properly it absolutely needs +to have two pieces of information about each virtual host: the +ServerName +and at least one ip address that the server +responds to. This example does not include the ip address, so Apache +must use DNS to find the address of www.abc.dom. If for +some reason DNS is not available at the time your server is parsing its +config file, then this virtual host will not be configured. It +won't be able to respond to any hits to this virtual host (prior to +Apache version 1.2 the server would not even boot). + +

Suppose that www.abc.dom has address 10.0.0.1. Then +consider this configuration snippet: + +

+    <VirtualHost 10.0.0.1>
+    ServerAdmin webgirl@abc.dom
+    DocumentRoot /www/abc
+    </VirtualHost>
+
+ +

Now Apache needs to use reverse DNS to find the ServerName +for this virtualhost. If that reverse lookup fails then it will partially +disable the virtualhost (prior to Apache version 1.2 the server would not +even boot). If the virtual host is name-based then it will effectively +be totally disabled, but if it is ip-based then it will mostly work. +However if Apache should ever have to generate a full URL for the server +which includes the server name then it will fail to generate a valid URL. + +

Here is a snippet that avoids both of these problems. + +

+    <VirtualHost 10.0.0.1>
+    ServerName www.abc.dom
+    ServerAdmin webgirl@abc.dom
+    DocumentRoot /www/abc
+    </VirtualHost>
+
+ +

Denial of Service

+ +

There are (at least) two forms that denial of service can come in. +If you are running a version of Apache prior to version 1.2 then your +server will not even boot if one of the two DNS lookups mentioned above +fails for any of your virtual hosts. In some cases this DNS lookup may +not even be under your control. For example, if abc.dom +is one of your customers and they control their own DNS then they +can force your (pre-1.2) server to fail while booting simply by deleting the +www.abc.dom record. + +

Another form is far more insidious. Consider this configuration +snippet: + +

+    <VirtualHost www.abc.dom>
+    ServerAdmin webgirl@abc.dom
+    DocumentRoot /www/abc
+    </VirtualHost>
+
+ +
+    <VirtualHost www.def.dom>
+    ServerAdmin webguy@def.dom
+    DocumentRoot /www/def
+    </VirtualHost>
+
+ +

Suppose that you've assigned 10.0.0.1 to www.abc.dom and +10.0.0.2 to www.def.dom. Furthermore, suppose that +def.com has control of their own DNS. With this config +you have put def.com into a position where they can steal +all traffic destined to abc.com. To do so, all they have to +do is set www.def.dom to 10.0.0.1. +Since they control their own DNS you can't stop them from pointing the +www.def.com record wherever they wish. + +

Requests coming in to 10.0.0.1 (including all those where users typed +in URLs of the form http://www.abc.dom/whatever) will all be +served by the def.com virtual host. To better understand why +this happens requires a more in-depth discussion of how Apache matches +up incoming requests with the virtual host that will serve it. A rough +document describing this is available. + +

The "main server" Address

+ +

The addition of non-IP-based virtual host +support in Apache 1.1 requires Apache to know the IP address(es) of +the host that httpd is running on. To get this address it uses either +the global ServerName (if present) or calls the C function +gethostname (which should return the same as typing +"hostname" at the command prompt). Then it performs a DNS lookup on +this address. At present there is no way to avoid this lookup. + +

If you fear that this lookup might fail because your DNS server is down +then you can insert the hostname in /etc/hosts (where you +probably already have it so that the machine can boot properly). Then +ensure that your machine is configured to use /etc/hosts +in the event that DNS fails. Depending on what OS you are using this +might be accomplished by editing /etc/resolv.conf, or maybe +/etc/nsswitch.conf. + +

If your server doesn't have to perform DNS for any other reason +then you might be able to get away with running Apache with the +HOSTRESORDER environment variable set to "local". This all +depends on what OS and resolver libraries you are using. It also affects +CGIs unless you use mod_env +to control the environment. It's best to consult the man pages or FAQs +for your OS. + +

The _default_ Address

+ +

Any address that happens to go to your webserver which doesn't match +the ip address of any of the webservers will be served from the "main" or +"default" server configurations. The "main" server configuration consists +of all those definitions appearing outside of any VirtualHost section. +You may want instead to define a <VirtualHost _default> +which returns 403 or 404 for all hits. + +

Tips to Avoid these problems

+ + + +

Appendix: Future Directions

+ +

The situation regarding DNS is highly undesirable. For Apache +1.2 we've attempted to make the server at least continue booting +in the event of failed DNS, but it might not be the best we +can do. In any event requiring the use of explicit ip addresses in +configuration files is highly undesirable in today's Internet where renumbering + is a necessity. + +

A possible work around to the theft of service attack described above +would be to perform a reverse DNS lookup on the ip address returned by +the forward lookup and compare the two names. In the event of a mismatch +the virtualhost would be disabled. This would require reverse DNS to be +configured properly (which is something that most admins are familiar with +because of the common use of "double-reverse" DNS lookups by FTP servers +and TCP wrappers). + +

In any event it doesn't seem possible to reliably boot a virtual-hosted +web server when DNS has failed unless IP addresses are used. Partial +solutions such as disabling portions of the configuration might be worse +than not booting at all depending on what the webserver is supposed +to accomplish. + +

As HTTP/1.1 is deployed and browsers and proxies start issuing the +Host header it will become possible to avoid the use of +ip-based virtual hosts entirely. In this event a webserver has no requirement +to do DNS lookups during configuration. But as of March 1997 these +features have not been deployed widely enough to be put into use on +critical webservers. + + + + + diff --git a/docs/manual/dso.html b/docs/manual/dso.html new file mode 100644 index 00000000000..ecaf33f90fc --- /dev/null +++ b/docs/manual/dso.html @@ -0,0 +1,337 @@ + + +Apache 1.3 Dynamic Shared Object (DSO) support + + + + + + +

Apache 1.3 Dynamic Shared Object (DSO) support

+ +


+ +

Originally written by Ralf S. Engelschall, April 1998
+ +

Background

+ +

On modern Unix derivatives there exists a nifty mechanism usually + called dynamic linking/loading of Dynamic Shared Objects (DSO) which + provides a way to build a piece of program code in a special format + for loading it at run-time into the address space of an executable + program. + +

This loading can usually be done in two ways: Automatically by a + system program called ld.so when an executable program + is started or manually from within the executing program via a + programmatic system interface to the Unix loader through the system + calls dlopen()/dlsym(). + +

In the first way the DSO's are usually called "shared libraries" or + "DSO libraries" and named libfoo.so or + libfoo.so.1.2. They reside in a system directory + (usually /usr/lib) and the link to the executable + program is established at link-time by specifying -lfoo + to the linker command. This hardcodes library references into the + executable program file so that at start-time the Unix loader is able + to locate libfoo.so in /usr/lib or in paths + configured via the environment variable + LD_LIBRARY_PATH. It then resolves any (yet unresolved) + symbols in the executable program which are available in the DSO. + +

Symbols in the executable program are usually not referenced by the + DSO (because it's a reuseable library of general code) and hence no + further resolving has to be done. The executable program has no need + to do anything on its own to use the symbols from the DSO because the + complete resolving is done by the Unix loader. (In fact, the code to + invoke ld.so is part of the run-time startup code which + is linked into every executable program which has been bound + non-static). The advantage of dynamic loading of common library code + is obvious: the library code needs to be stored only once, in a + system library like libc.so, saving disk space for every + program. + +

In the second way the DSO's are usually called "shared objects" or + "DSO files" and can be named with an arbitrary extension (although + the canonical name is foo.so). These files usually stay + inside a program-specific directory and there is no automatically + established link to the executable program where they are + used. Instead the executable program manually loads the DSO at + run-time into its address space via dlopen(). At this + time no resolving of symbols from the DSO for the executable program + is done. But instead the Unix loader automatically resolves any (yet + unresolved) symbols in the DSO from the set of symbols exported by + the executable program and its already loaded DSO libraries + (especially all symbols from the ubiquitous libc.so). + This way the DSO gets knowledge of the executable program's symbol + set as if it had been statically linked with it in the first place. + +

Finally, to take advantage of the DSO's API the executable program + has to resolve particular symbols from the DSO via + dlsym() for later use inside dispatch tables etc. In + other words: The executable program has to manually resolve every + symbol it needs to be able to use it. The advantage of such a + mechanism is that optional program parts need not be loaded (and thus + do not spend memory) until they are needed by the program in + question. When required, these program parts can be loaded + dynamically to extend the base program's functionality. + +

Although this DSO mechanism sounds straightforward there is at least one + difficult step here: The resolving of symbols from the executable program for + the DSO when using a DSO to extend a program (the second way). Why? Because + `reverse resolving' DSO symbols from the executable program's symbol set is + against the library design (where the library has no knowledge about the + programs it is used by) and is neither available under all platforms nor + standardized. In practice the executable program's global symbols are often + not re-exported and thus not available for use in a DSO. Finding a way to + force the linker to export all global symbols is the main problem one has to + solve when using DSO for extending a program at run-time. + +

Practical Usage

+ +

The shared library approach is the typical one, because it is what the DSO + mechanism was designed for, hence it is used for nearly all types of + libraries the operating system provides. On the other hand using shared + objects for extending a program is not used by a lot of programs. + +

As of 1998 there are only a few software packages available which use the DSO + mechanism to actually extend their functionality at run-time: Perl 5 (via its + XS mechanism and the DynaLoader module), GIMP, Netscape Server, etc. + Starting with version 1.3, Apache joined the crew, because Apache already + uses a module concept to extend its functionality and internally uses a + dispatch-list-based approach to link external modules into the Apache core + functionality. So, Apache is really predestined for using DSO to load its + modules at run-time. + +

As of Apache 1.3, the configuration system supports two optional features for + taking advantage of the modular DSO approach: compilation of the Apache core + program into a DSO library for shared usage and compilation of the Apache + modules into DSO files for explicit loading at run-time. + +

Implementation

+ +

The DSO support for loading individual Apache modules is based on a module + named mod_so.c which has to be statically compiled into the Apache core. It + is the only module besides http_core.c which cannot be put into a DSO itself + (bootstrapping!). Practically all other distributed Apache modules then can + then be placed into a DSO by individually enabling the DSO build for them via + configure's --enable-shared option (see ../INSTALL file) or by changing the + `AddModule' command in src/Configuration.tmpl into a `SharedModule' command + (see ./INSTALL file). After a module is compiled into a DSO named mod_foo.so + you can use mod_so's `LoadModule' command in your httpd.conf file to load + this module at server startup or restart. + +

To simplify this creation of DSO files for Apache modules (especially for + third-party modules) a new support program named `apxs' is available. It can + be used to build DSO based modules _outside of_ the Apache source tree. The + idea is simple: When installing Apache the configure's "make install" + procedure installs the Apache C header files and puts the platform-dependend + compiler and linker flags for building DSO files into the `apxs' program. + This way the user can use `apxs' to compile his Apache module sources without + the Apache distribution source tree and without having to fiddle with the + platform-dependend compiler and linker flags for DSO support. + +

To place the complete Apache core program into a DSO library (only required + on some of the supported platforms to force the linker to export the apache + core symbols -- a prerequisite for the DSO modularization) the rule + SHARED_CORE has to be enabled via configure's --enable-rule=SHARED_CORE + option (see ../INSTALL file) or by changing the Rule command in + Configuration.tmpl to "Rule SHARED_CORE=yes" (see ./INSTALL file). The Apache + core code is then placed into a DSO library named libhttpd.so. Because one + cannot link a DSO against static libraries, an additional executable program + named libhttpd.ep is created which both binds this static code and provides a + stub for the main() function. Finally the httpd executable program itself is + replaced by a bootstrapping code which automatically makes sure the Unix + loader is able to load and start libhttpd.ep by providing the LD_LIBRARY_PATH + to libhttpd.so. + +

Supported Platforms

+ +

Apache's src/Configure script currently has only limited built-in knowledge + on how to compile DSO files because (as already mentioned) this is heavily + platform-dependent. Nevertheless all major Unix platforms are supported. The + definitive current state (May 1998) is this: + +

+ Out-of-the-box supported platforms:
+ (actually tested versions in parenthesis)
+
+   o  FreeBSD            (2.1.5, 2.2.5, 2.2.6)
+   o  OpenBSD            (2.x)
+   o  NetBSD             (1.3.1)
+   o  Linux              (Debian/1.3.1, RedHat/4.2)
+   o  Solaris            (2.4, 2.5.1, 2.6)
+   o  SunOS              (4.1.3)
+   o  OSF1               (4.0)
+   o  IRIX               (6.2)
+   o  HP/UX              (10.20)
+   o  UnixWare           (2.01, 2.1.2)
+   o  AIX                (3.2, 4.1.5, 4.2, 4.3)
+   o  ReliantUNIX/SINIX  (5.43)
+   o  SVR4               (-)
+
+ Explicitly unsupported platforms:
+
+   o  Ultrix: There is no dlopen-style interface under this platform.
+
+ + +

Usage Summary

+ +

To give you an overview of the DSO features of Apache 1.3, here is + a short and concise summary: + +

    + +
  1. Placing the Apache core code (all the stuff which usually forms + the httpd binary) into a DSO libhttpd.so, an executable program + libhttpd.ep and a bootstrapping executable program httpd (Notice: + this is only required on some of the supported platforms to force + the linker to export the Apache core symbols, which in turn is a + prerequisite for the DSO modularization): + +
    +   o Build and install via configure (preferred):
    +     $ ./configure --prefix=/path/to/install
    +                   --enable-rule=SHARED_CORE ...
    +     $ make install
    +
    +   o Build and install manually: 
    +     - Edit src/Configuration:
    +       << "Rule SHARED_CORE=default"
    +       >> "Rule SHARED_CORE=yes"
    +       << "EXTRA_CFLAGS= "
    +       >> "EXTRA_CFLAGS= -DSHARED_CORE_DIR=\"/path/to/install/libexec\"
    +     $ make 
    +     $ cp src/libhttpd.so* /path/to/install/libexec/
    +     $ cp src/libhttpd.ep  /path/to/install/libexec/
    +     $ cp src/httpd        /path/to/install/bin/
    +
    + +
  2. Build and install a distributed Apache module, say mod_foo.c, + into its own DSO mod_foo.so: + +
    +   o Build and install via configure (preferred):
    +     $ ./configure --prefix=/path/to/install
    +             --enable-shared=foo
    +     $ make install
    +
    +   o Build and install manually: 
    +     - Edit src/Configuration:
    +       << "AddModule    modules/xxxx/mod_foo.o" 
    +       >> "SharedModule modules/xxxx/mod_foo.so"
    +     $ make
    +     $ cp src/xxxx/mod_foo.so /path/to/install/libexec
    +     - Edit /path/to/install/etc/httpd.conf
    +       >> "LoadModule foo_module /path/to/install/libexec/mod_foo.so"
    +
    + +
  3. Build and install a third-party Apache module, say mod_foo.c, + into its own DSO mod_foo.so + +
    +   o Build and install via configure (preferred):
    +     $ ./configure --add-module=/path/to/3rdparty/mod_foo.c 
    +             --enable-shared=foo
    +     $ make install
    +
    +   o Build and install manually: 
    +     $ cp /path/to/3rdparty/mod_foo.c /path/to/apache-1.3/src/modules/extra/
    +     - Edit src/Configuration:
    +       >> "SharedModule modules/extra/mod_foo.so"
    +     $ make
    +     $ cp src/xxxx/mod_foo.so /path/to/install/libexec
    +     - Edit /path/to/install/etc/httpd.conf
    +       >> "LoadModule foo_module /path/to/install/libexec/mod_foo.so"
    +
    + +
  4. Build and install a third-party Apache module, say mod_foo.c, + into its own DSO mod_foo.so _outside of_ the Apache source tree: + +
    +   o Build and install via APXS: 
    +     $ cd /path/to/3rdparty
    +     $ apxs -c mod_foo.c
    +     $ apxs -i -a -n foo mod_foo.so
    +
    + + + +

    Advantages & Disadvantages

    + +

    The above DSO based features of Apache 1.3 have the following advantages: + +

      +
    • The server package is more flexible at run-time because the actual server + process can be assembled at run-time via LoadModule httpd.conf + configuration commands instead of Configuration AddModule commands at + build-time. For instance this way one is able to run different server + instances (standard & SSL version, minimalistic & powered up version + [mod_perl, PHP3], etc.) with only one Apache installation. + +
    • The server package can be easily extended with third-party modules even + after installation. This is at least a great benefit for vendor package + maintainers who can create a Apache core package and additional packages + containing extensions like PHP3, mod_perl, mod_fastcgi, etc. + +
    • Easier Apache module prototyping because with the DSO/APXS pair you can + both work outside the Apache source tree and only need an `apxs -i' + command followed by a `apachectl restart' to bring a new version of your + currently developed module into the running Apache server. +
    + +

    DSO has the following disadvantages: + +

      +
    • The DSO mechanism cannot be used on every platform because not all + operating systems support dynamic loading. + +
    • The server is approximately 20% slower at startup time because of the + symbol resolving overhead the Unix loader now has to do. + +
    • The server is approximately 5% slower at execution time under some + platforms because position independed code (PIC) sometimes needs + complicated assembler tricks for relative addressing which are not + necessarily as fast as absolute addressing. + +
    • Because DSO modules cannot be linked against other DSO-based libraries + (ld -lfoo) on all platforms (for instance a.out-based platforms usually + don't provide this functionality while ELF-based platforms do) you cannot + use the DSO mechanism for all types of modules. Or in other words, + modules compiled as DSO files are restricted to only use symbols from the + Apache core, from the C library (libc) and all other dynamic or static + libraries used by the Apache core, or from static library archives + (libfoo.a) containing position independend code. The only chance to use + other code is to either make sure the Apache core itself already contains + a reference to it or loading the code yourself via dlopen(). + +
    • Under some platforms (many SVR4 systems) there is no way to force the + linker to export all global symbols for use in DSO's when linking the + Apache httpd executable program. But without the visibility of the Apache + core symbols no standard Apache module could be used as a DSO. The only + chance here is to use the SHARED_CORE feature because this way the global + symbols are forced to be exported. As a consequence the Apache + src/Configure script automatically enforces SHARED_CORE on these + platforms when DSO features are used in the Configuration file or on the + configure command line. +
    + +
    +                     Ralf S. Engelschall
    +                     rse@engelschall.com
    +                     www.engelschall.com
    +
    + + + + + + + diff --git a/docs/manual/dso.html.en b/docs/manual/dso.html.en new file mode 100644 index 00000000000..ecaf33f90fc --- /dev/null +++ b/docs/manual/dso.html.en @@ -0,0 +1,337 @@ + + +Apache 1.3 Dynamic Shared Object (DSO) support + + + + + + +

    Apache 1.3 Dynamic Shared Object (DSO) support

    + +


    + +

    Originally written by Ralf S. Engelschall, April 1998
    + +

    Background

    + +

    On modern Unix derivatives there exists a nifty mechanism usually + called dynamic linking/loading of Dynamic Shared Objects (DSO) which + provides a way to build a piece of program code in a special format + for loading it at run-time into the address space of an executable + program. + +

    This loading can usually be done in two ways: Automatically by a + system program called ld.so when an executable program + is started or manually from within the executing program via a + programmatic system interface to the Unix loader through the system + calls dlopen()/dlsym(). + +

    In the first way the DSO's are usually called "shared libraries" or + "DSO libraries" and named libfoo.so or + libfoo.so.1.2. They reside in a system directory + (usually /usr/lib) and the link to the executable + program is established at link-time by specifying -lfoo + to the linker command. This hardcodes library references into the + executable program file so that at start-time the Unix loader is able + to locate libfoo.so in /usr/lib or in paths + configured via the environment variable + LD_LIBRARY_PATH. It then resolves any (yet unresolved) + symbols in the executable program which are available in the DSO. + +

    Symbols in the executable program are usually not referenced by the + DSO (because it's a reuseable library of general code) and hence no + further resolving has to be done. The executable program has no need + to do anything on its own to use the symbols from the DSO because the + complete resolving is done by the Unix loader. (In fact, the code to + invoke ld.so is part of the run-time startup code which + is linked into every executable program which has been bound + non-static). The advantage of dynamic loading of common library code + is obvious: the library code needs to be stored only once, in a + system library like libc.so, saving disk space for every + program. + +

    In the second way the DSO's are usually called "shared objects" or + "DSO files" and can be named with an arbitrary extension (although + the canonical name is foo.so). These files usually stay + inside a program-specific directory and there is no automatically + established link to the executable program where they are + used. Instead the executable program manually loads the DSO at + run-time into its address space via dlopen(). At this + time no resolving of symbols from the DSO for the executable program + is done. But instead the Unix loader automatically resolves any (yet + unresolved) symbols in the DSO from the set of symbols exported by + the executable program and its already loaded DSO libraries + (especially all symbols from the ubiquitous libc.so). + This way the DSO gets knowledge of the executable program's symbol + set as if it had been statically linked with it in the first place. + +

    Finally, to take advantage of the DSO's API the executable program + has to resolve particular symbols from the DSO via + dlsym() for later use inside dispatch tables etc. In + other words: The executable program has to manually resolve every + symbol it needs to be able to use it. The advantage of such a + mechanism is that optional program parts need not be loaded (and thus + do not spend memory) until they are needed by the program in + question. When required, these program parts can be loaded + dynamically to extend the base program's functionality. + +

    Although this DSO mechanism sounds straightforward there is at least one + difficult step here: The resolving of symbols from the executable program for + the DSO when using a DSO to extend a program (the second way). Why? Because + `reverse resolving' DSO symbols from the executable program's symbol set is + against the library design (where the library has no knowledge about the + programs it is used by) and is neither available under all platforms nor + standardized. In practice the executable program's global symbols are often + not re-exported and thus not available for use in a DSO. Finding a way to + force the linker to export all global symbols is the main problem one has to + solve when using DSO for extending a program at run-time. + +

    Practical Usage

    + +

    The shared library approach is the typical one, because it is what the DSO + mechanism was designed for, hence it is used for nearly all types of + libraries the operating system provides. On the other hand using shared + objects for extending a program is not used by a lot of programs. + +

    As of 1998 there are only a few software packages available which use the DSO + mechanism to actually extend their functionality at run-time: Perl 5 (via its + XS mechanism and the DynaLoader module), GIMP, Netscape Server, etc. + Starting with version 1.3, Apache joined the crew, because Apache already + uses a module concept to extend its functionality and internally uses a + dispatch-list-based approach to link external modules into the Apache core + functionality. So, Apache is really predestined for using DSO to load its + modules at run-time. + +

    As of Apache 1.3, the configuration system supports two optional features for + taking advantage of the modular DSO approach: compilation of the Apache core + program into a DSO library for shared usage and compilation of the Apache + modules into DSO files for explicit loading at run-time. + +

    Implementation

    + +

    The DSO support for loading individual Apache modules is based on a module + named mod_so.c which has to be statically compiled into the Apache core. It + is the only module besides http_core.c which cannot be put into a DSO itself + (bootstrapping!). Practically all other distributed Apache modules then can + then be placed into a DSO by individually enabling the DSO build for them via + configure's --enable-shared option (see ../INSTALL file) or by changing the + `AddModule' command in src/Configuration.tmpl into a `SharedModule' command + (see ./INSTALL file). After a module is compiled into a DSO named mod_foo.so + you can use mod_so's `LoadModule' command in your httpd.conf file to load + this module at server startup or restart. + +

    To simplify this creation of DSO files for Apache modules (especially for + third-party modules) a new support program named `apxs' is available. It can + be used to build DSO based modules _outside of_ the Apache source tree. The + idea is simple: When installing Apache the configure's "make install" + procedure installs the Apache C header files and puts the platform-dependend + compiler and linker flags for building DSO files into the `apxs' program. + This way the user can use `apxs' to compile his Apache module sources without + the Apache distribution source tree and without having to fiddle with the + platform-dependend compiler and linker flags for DSO support. + +

    To place the complete Apache core program into a DSO library (only required + on some of the supported platforms to force the linker to export the apache + core symbols -- a prerequisite for the DSO modularization) the rule + SHARED_CORE has to be enabled via configure's --enable-rule=SHARED_CORE + option (see ../INSTALL file) or by changing the Rule command in + Configuration.tmpl to "Rule SHARED_CORE=yes" (see ./INSTALL file). The Apache + core code is then placed into a DSO library named libhttpd.so. Because one + cannot link a DSO against static libraries, an additional executable program + named libhttpd.ep is created which both binds this static code and provides a + stub for the main() function. Finally the httpd executable program itself is + replaced by a bootstrapping code which automatically makes sure the Unix + loader is able to load and start libhttpd.ep by providing the LD_LIBRARY_PATH + to libhttpd.so. + +

    Supported Platforms

    + +

    Apache's src/Configure script currently has only limited built-in knowledge + on how to compile DSO files because (as already mentioned) this is heavily + platform-dependent. Nevertheless all major Unix platforms are supported. The + definitive current state (May 1998) is this: + +

    + Out-of-the-box supported platforms:
    + (actually tested versions in parenthesis)
    +
    +   o  FreeBSD            (2.1.5, 2.2.5, 2.2.6)
    +   o  OpenBSD            (2.x)
    +   o  NetBSD             (1.3.1)
    +   o  Linux              (Debian/1.3.1, RedHat/4.2)
    +   o  Solaris            (2.4, 2.5.1, 2.6)
    +   o  SunOS              (4.1.3)
    +   o  OSF1               (4.0)
    +   o  IRIX               (6.2)
    +   o  HP/UX              (10.20)
    +   o  UnixWare           (2.01, 2.1.2)
    +   o  AIX                (3.2, 4.1.5, 4.2, 4.3)
    +   o  ReliantUNIX/SINIX  (5.43)
    +   o  SVR4               (-)
    +
    + Explicitly unsupported platforms:
    +
    +   o  Ultrix: There is no dlopen-style interface under this platform.
    +
    + + +

    Usage Summary

    + +

    To give you an overview of the DSO features of Apache 1.3, here is + a short and concise summary: + +

      + +
    1. Placing the Apache core code (all the stuff which usually forms + the httpd binary) into a DSO libhttpd.so, an executable program + libhttpd.ep and a bootstrapping executable program httpd (Notice: + this is only required on some of the supported platforms to force + the linker to export the Apache core symbols, which in turn is a + prerequisite for the DSO modularization): + +
      +   o Build and install via configure (preferred):
      +     $ ./configure --prefix=/path/to/install
      +                   --enable-rule=SHARED_CORE ...
      +     $ make install
      +
      +   o Build and install manually: 
      +     - Edit src/Configuration:
      +       << "Rule SHARED_CORE=default"
      +       >> "Rule SHARED_CORE=yes"
      +       << "EXTRA_CFLAGS= "
      +       >> "EXTRA_CFLAGS= -DSHARED_CORE_DIR=\"/path/to/install/libexec\"
      +     $ make 
      +     $ cp src/libhttpd.so* /path/to/install/libexec/
      +     $ cp src/libhttpd.ep  /path/to/install/libexec/
      +     $ cp src/httpd        /path/to/install/bin/
      +
      + +
    2. Build and install a distributed Apache module, say mod_foo.c, + into its own DSO mod_foo.so: + +
      +   o Build and install via configure (preferred):
      +     $ ./configure --prefix=/path/to/install
      +             --enable-shared=foo
      +     $ make install
      +
      +   o Build and install manually: 
      +     - Edit src/Configuration:
      +       << "AddModule    modules/xxxx/mod_foo.o" 
      +       >> "SharedModule modules/xxxx/mod_foo.so"
      +     $ make
      +     $ cp src/xxxx/mod_foo.so /path/to/install/libexec
      +     - Edit /path/to/install/etc/httpd.conf
      +       >> "LoadModule foo_module /path/to/install/libexec/mod_foo.so"
      +
      + +
    3. Build and install a third-party Apache module, say mod_foo.c, + into its own DSO mod_foo.so + +
      +   o Build and install via configure (preferred):
      +     $ ./configure --add-module=/path/to/3rdparty/mod_foo.c 
      +             --enable-shared=foo
      +     $ make install
      +
      +   o Build and install manually: 
      +     $ cp /path/to/3rdparty/mod_foo.c /path/to/apache-1.3/src/modules/extra/
      +     - Edit src/Configuration:
      +       >> "SharedModule modules/extra/mod_foo.so"
      +     $ make
      +     $ cp src/xxxx/mod_foo.so /path/to/install/libexec
      +     - Edit /path/to/install/etc/httpd.conf
      +       >> "LoadModule foo_module /path/to/install/libexec/mod_foo.so"
      +
      + +
    4. Build and install a third-party Apache module, say mod_foo.c, + into its own DSO mod_foo.so _outside of_ the Apache source tree: + +
      +   o Build and install via APXS: 
      +     $ cd /path/to/3rdparty
      +     $ apxs -c mod_foo.c
      +     $ apxs -i -a -n foo mod_foo.so
      +
      + + + +

      Advantages & Disadvantages

      + +

      The above DSO based features of Apache 1.3 have the following advantages: + +

        +
      • The server package is more flexible at run-time because the actual server + process can be assembled at run-time via LoadModule httpd.conf + configuration commands instead of Configuration AddModule commands at + build-time. For instance this way one is able to run different server + instances (standard & SSL version, minimalistic & powered up version + [mod_perl, PHP3], etc.) with only one Apache installation. + +
      • The server package can be easily extended with third-party modules even + after installation. This is at least a great benefit for vendor package + maintainers who can create a Apache core package and additional packages + containing extensions like PHP3, mod_perl, mod_fastcgi, etc. + +
      • Easier Apache module prototyping because with the DSO/APXS pair you can + both work outside the Apache source tree and only need an `apxs -i' + command followed by a `apachectl restart' to bring a new version of your + currently developed module into the running Apache server. +
      + +

      DSO has the following disadvantages: + +

        +
      • The DSO mechanism cannot be used on every platform because not all + operating systems support dynamic loading. + +
      • The server is approximately 20% slower at startup time because of the + symbol resolving overhead the Unix loader now has to do. + +
      • The server is approximately 5% slower at execution time under some + platforms because position independed code (PIC) sometimes needs + complicated assembler tricks for relative addressing which are not + necessarily as fast as absolute addressing. + +
      • Because DSO modules cannot be linked against other DSO-based libraries + (ld -lfoo) on all platforms (for instance a.out-based platforms usually + don't provide this functionality while ELF-based platforms do) you cannot + use the DSO mechanism for all types of modules. Or in other words, + modules compiled as DSO files are restricted to only use symbols from the + Apache core, from the C library (libc) and all other dynamic or static + libraries used by the Apache core, or from static library archives + (libfoo.a) containing position independend code. The only chance to use + other code is to either make sure the Apache core itself already contains + a reference to it or loading the code yourself via dlopen(). + +
      • Under some platforms (many SVR4 systems) there is no way to force the + linker to export all global symbols for use in DSO's when linking the + Apache httpd executable program. But without the visibility of the Apache + core symbols no standard Apache module could be used as a DSO. The only + chance here is to use the SHARED_CORE feature because this way the global + symbols are forced to be exported. As a consequence the Apache + src/Configure script automatically enforces SHARED_CORE on these + platforms when DSO features are used in the Configuration file or on the + configure command line. +
      + +
      +                     Ralf S. Engelschall
      +                     rse@engelschall.com
      +                     www.engelschall.com
      +
      + + + + + + + diff --git a/docs/manual/env.html b/docs/manual/env.html new file mode 100644 index 00000000000..fbecf41bb7f --- /dev/null +++ b/docs/manual/env.html @@ -0,0 +1,30 @@ + + + +Special Purpose Environment Variables + + + + +

      Special Purpose Environment Variables

      +

      Interoperability problems have led to the introduction of mechanisms to modify +the way Apache behaves when talking to particular clients. To make these +mechanisms as flexible as possible, they are invoked by defining environment +variables, typically with BrowserMatch, though SetEnv and +PassEnv could also be used, for example.

      +

      nokeepalive

      +This disables KeepAlive when set. Because +of problems with Netscape 2.x and KeepAlive, we recommend the following +directive be used: +
      +BrowserMatch Mozilla/2 nokeepalive +
      +

      force-response-1.0

      +This forces an HTTP/1.0 response when set. It was originally implemented as a +result of a problem with AOL's proxies. Some clients may not behave correctly +when given an HTTP/1.1 response, and this can be used to interoperate with +them. + + + + diff --git a/docs/manual/env.html.en b/docs/manual/env.html.en new file mode 100644 index 00000000000..fbecf41bb7f --- /dev/null +++ b/docs/manual/env.html.en @@ -0,0 +1,30 @@ + + + +Special Purpose Environment Variables + + + + +

      Special Purpose Environment Variables

      +

      Interoperability problems have led to the introduction of mechanisms to modify +the way Apache behaves when talking to particular clients. To make these +mechanisms as flexible as possible, they are invoked by defining environment +variables, typically with BrowserMatch, though SetEnv and +PassEnv could also be used, for example.

      +

      nokeepalive

      +This disables KeepAlive when set. Because +of problems with Netscape 2.x and KeepAlive, we recommend the following +directive be used: +
      +BrowserMatch Mozilla/2 nokeepalive +
      +

      force-response-1.0

      +This forces an HTTP/1.0 response when set. It was originally implemented as a +result of a problem with AOL's proxies. Some clients may not behave correctly +when given an HTTP/1.1 response, and this can be used to interoperate with +them. + + + + diff --git a/docs/manual/images/mod_rewrite_fig1.gif b/docs/manual/images/mod_rewrite_fig1.gif new file mode 100644 index 00000000000..664ac1e7bb7 Binary files /dev/null and b/docs/manual/images/mod_rewrite_fig1.gif differ diff --git a/docs/manual/images/mod_rewrite_fig2.gif b/docs/manual/images/mod_rewrite_fig2.gif new file mode 100644 index 00000000000..3ea8cb65a3f Binary files /dev/null and b/docs/manual/images/mod_rewrite_fig2.gif differ diff --git a/docs/manual/location.html b/docs/manual/location.html deleted file mode 100644 index a1f4c9ea3ff..00000000000 --- a/docs/manual/location.html +++ /dev/null @@ -1,58 +0,0 @@ - - - -Access Control by URL - - - - -

      Access Control by URL

      - -

      The <Location> Directive

      - -Syntax: <Location URL prefix>
      -Context: server config, virtual host
      -Status: core
      - -

      The <Location> directive provides for access control by -URL. It is comprable to the <Directory> directive, and -should be matched with a </Location> directive. Directives that -apply to the URL given should be listen -within. <Location> sections are processed in the -order they appear in the configuration file, after the -<Directory> sections and .htaccess files are -read.

      - -

      Note that, due to the way HTTP functions, URL prefix -should, save for proxy requests, be of the form /path/, -and should not include the http://servername. It doesn't -neccessarily have to protect a directory (it can be an individual -file, or a number of files), and can include wildcards. In a wildcard -string, `?' matches any single character, and `*' matches any -sequences of characters. - -

      This functionality is especially useful when combined with the -SetHandler -directive. For example, to enable status requests, but allow them only -from browsers at foo.com, you might use: - -

      -    <Location /status>
      -    SetHandler server-status
      -    
      -    order deny,allow
      -    deny from all
      -    allow from .foo.com
      -    
      -    </Location>
      -
      - -


      - -Home -Index - - - - diff --git a/docs/manual/misc/API.html b/docs/manual/misc/API.html deleted file mode 100644 index f860996e472..00000000000 --- a/docs/manual/misc/API.html +++ /dev/null @@ -1,988 +0,0 @@ - - -Apache API notes - - - -

      Apache API notes

      - -These are some notes on the Apache API and the data structures you -have to deal with, etc. They are not yet nearly complete, but -hopefully, they will help you get your bearings. Keep in mind that -the API is still subject to change as we gain experience with it. -(See the TODO file for what might be coming). However, -it will be easy to adapt modules to any changes that are made. -(We have more modules to adapt than you do). -

      - -A few notes on general pedagogical style here. In the interest of -conciseness, all structure declarations here are incomplete --- the -real ones have more slots that I'm not telling you about. For the -most part, these are reserved to one component of the server core or -another, and should be altered by modules with caution. However, in -some cases, they really are things I just haven't gotten around to -yet. Welcome to the bleeding edge.

      - -Finally, here's an outline, to give you some bare idea of what's -coming up, and in what order: - -

      - -

      Basic concepts.

      - -We begin with an overview of the basic concepts behind the -API, and how they are manifested in the code. - -

      Handlers, Modules, and Requests

      - -Apache breaks down request handling into a series of steps, more or -less the same way the Netscape server API does (although this API has -a few more stages than NetSite does, as hooks for stuff I thought -might be useful in the future). These are: - -
        -
      • URI -> Filename translation -
      • Auth ID checking [is the user who they say they are?] -
      • Auth access checking [is the user authorized here?] -
      • Access checking other than auth -
      • Determining MIME type of the object requested -
      • `Fixups' --- there aren't any of these yet, but the phase is - intended as a hook for possible extensions like - SetEnv, which don't really fit well elsewhere. -
      • Actually sending a response back to the client. -
      • Logging the request -
      - -These phases are handled by looking at each of a succession of -modules, looking to see if each of them has a handler for the -phase, and attempting invoking it if so. The handler can typically do -one of three things: - -
        -
      • Handle the request, and indicate that it has done so - by returning the magic constant OK. -
      • Decline to handle the request, by returning the magic - integer constant DECLINED. In this case, the - server behaves in all respects as if the handler simply hadn't - been there. -
      • Signal an error, by returning one of the HTTP error codes. - This terminates normal handling of the request, although an - ErrorDocument may be invoked to try to mop up, and it will be - logged in any case. -
      - -Most phases are terminated by the first module that handles them; -however, for logging, `fixups', and non-access authentication -checking, all handlers always run (barring an error). Also, the -response phase is unique in that modules may declare multiple handlers -for it, via a dispatch table keyed on the MIME type of the requested -object. Modules may declare a response-phase handler which can handle -any request, by giving it the key */* (i.e., a -wildcard MIME type specification). However, wildcard handlers are -only invoked if the server has already tried and failed to find a more -specific response handler for the MIME type of the requested object -(either none existed, or they all declined).

      - -The handlers themselves are functions of one argument (a -request_rec structure. vide infra), which returns an -integer, as above.

      - -

      A brief tour of a module

      - -At this point, we need to explain the structure of a module. Our -candidate will be one of the messier ones, the CGI module --- this -handles both CGI scripts and the ScriptAlias config file -command. It's actually a great deal more complicated than most -modules, but if we're going to have only one example, it might as well -be the one with its fingers in every place.

      - -Let's begin with handlers. In order to handle the CGI scripts, the -module declares a response handler for them. Because of -ScriptAlias, it also has handlers for the name -translation phase (to recognise ScriptAliased URIs), the -type-checking phase (any ScriptAliased request is typed -as a CGI script).

      - -The module needs to maintain some per (virtual) -server information, namely, the ScriptAliases in effect; -the module structure therefore contains pointers to a functions which -builds these structures, and to another which combines two of them (in -case the main server and a virtual server both have -ScriptAliases declared).

      - -Finally, this module contains code to handle the -ScriptAlias command itself. This particular module only -declares one command, but there could be more, so modules have -command tables which declare their commands, and describe -where they are permitted, and how they are to be invoked.

      - -A final note on the declared types of the arguments of some of these -commands: a pool is a pointer to a resource pool -structure; these are used by the server to keep track of the memory -which has been allocated, files opened, etc., either to service a -particular request, or to handle the process of configuring itself. -That way, when the request is over (or, for the configuration pool, -when the server is restarting), the memory can be freed, and the files -closed, en masse, without anyone having to write explicit code to -track them all down and dispose of them. Also, a -cmd_parms structure contains various information about -the config file being read, and other status information, which is -sometimes of use to the function which processes a config-file command -(such as ScriptAlias). - -With no further ado, the module itself: - -

      -/* Declarations of handlers. */
      -
      -int translate_scriptalias (request_rec *);
      -int type_scriptalias (request_rec *);
      -int cgi_handler (request_rec *);
      -
      -/* Subsidiary dispatch table for response-phase handlers, by MIME type */
      -
      -handler_rec cgi_handlers[] = {
      -{ "application/x-httpd-cgi", cgi_handler },
      -{ NULL }
      -};
      -
      -/* Declarations of routines to manipulate the module's configuration
      - * info.  Note that these are returned, and passed in, as void *'s;
      - * the server core keeps track of them, but it doesn't, and can't,
      - * know their internal structure.
      - */
      -
      -void *make_cgi_server_config (pool *);
      -void *merge_cgi_server_config (pool *, void *, void *);
      -
      -/* Declarations of routines to handle config-file commands */
      -
      -extern char *script_alias(cmd_parms *, void *per_dir_config, char *fake,
      -                          char *real);
      -
      -command_rec cgi_cmds[] = {
      -{ "ScriptAlias", script_alias, NULL, RSRC_CONF, TAKE2,
      -    "a fakename and a realname"},
      -{ NULL }
      -};
      -
      -module cgi_module = {
      -   STANDARD_MODULE_STUFF,
      -   NULL,                     /* initializer */
      -   NULL,                     /* dir config creator */
      -   NULL,                     /* dir merger --- default is to override */
      -   make_cgi_server_config,   /* server config */
      -   merge_cgi_server_config,  /* merge server config */
      -   cgi_cmds,                 /* command table */
      -   cgi_handlers,             /* handlers */
      -   translate_scriptalias,    /* filename translation */
      -   NULL,                     /* check_user_id */
      -   NULL,                     /* check auth */
      -   NULL,                     /* check access */
      -   type_scriptalias,         /* type_checker */
      -   NULL,                     /* fixups */
      -   NULL                      /* logger */
      -};
      -
      - -

      How handlers work

      - -The sole argument to handlers is a request_rec structure. -This structure describes a particular request which has been made to -the server, on behalf of a client. In most cases, each connection to -the client generates only one request_rec structure.

      - -

      A brief tour of the request_rec

      - -The request_rec contains pointers to a resource pool -which will be cleared when the server is finished handling the -request; to structures containing per-server and per-connection -information, and most importantly, information on the request itself.

      - -The most important such information is a small set of character -strings describing attributes of the object being requested, including -its URI, filename, content-type and content-encoding (these being filled -in by the translation and type-check handlers which handle the -request, respectively).

      - -Other commonly used data items are tables giving the MIME headers on -the client's original request, MIME headers to be sent back with the -response (which modules can add to at will), and environment variables -for any subprocesses which are spawned off in the course of servicing -the request. These tables are manipulated using the -table_get and table_set routines.

      - -Finally, there are pointers to two data structures which, in turn, -point to per-module configuration structures. Specifically, these -hold pointers to the data structures which the module has built to -describe the way it has been configured to operate in a given -directory (via .htaccess files or -<Directory> sections), for private data it has -built in the course of servicing the request (so modules' handlers for -one phase can pass `notes' to their handlers for other phases). There -is another such configuration vector in the server_rec -data structure pointed to by the request_rec, which -contains per (virtual) server configuration data.

      - -Here is an abridged declaration, giving the fields most commonly used:

      - -

      -struct request_rec {
      -
      -  pool *pool;
      -  conn_rec *connection;
      -  server_rec *server;
      -
      -  /* What object is being requested */
      -  
      -  char *uri;
      -  char *filename;
      -  char *path_info;
      -  char *args;           /* QUERY_ARGS, if any */
      -  struct stat finfo;    /* Set by server core;
      -                         * st_mode set to zero if no such file */
      -  
      -  char *content_type;
      -  char *content_encoding;
      -  
      -  /* MIME header environments, in and out.  Also, an array containing
      -   * environment variables to be passed to subprocesses, so people can
      -   * write modules to add to that environment.
      -   *
      -   * The difference between headers_out and err_headers_out is that
      -   * the latter are printed even on error, and persist across internal
      -   * redirects (so the headers printed for ErrorDocument handlers will
      -   * have them).
      -   */
      -  
      -  table *headers_in;
      -  table *headers_out;
      -  table *err_headers_out;
      -  table *subprocess_env;
      -
      -  /* Info about the request itself... */
      -  
      -  int header_only;     /* HEAD request, as opposed to GET */
      -  char *protocol;      /* Protocol, as given to us, or HTTP/0.9 */
      -  char *method;        /* GET, HEAD, POST, etc. */
      -  int method_number;   /* M_GET, M_POST, etc. */
      -
      -  /* Info for logging */
      -
      -  char *the_request;
      -  int bytes_sent;
      -
      -  /* A flag which modules can set, to indicate that the data being
      -   * returned is volatile, and clients should be told not to cache it.
      -   */
      -
      -  int no_cache;
      -
      -  /* Various other config info which may change with .htaccess files
      -   * These are config vectors, with one void* pointer for each module
      -   * (the thing pointed to being the module's business).
      -   */
      -  
      -  void *per_dir_config;   /* Options set in config files, etc. */
      -  void *request_config;   /* Notes on *this* request */
      -  
      -};
      -
      -
      - -

      Where request_rec structures come from

      - -Most request_rec structures are built by reading an HTTP -request from a client, and filling in the fields. However, there are -a few exceptions: - -
        -
      • If the request is to an imagemap, a type map (i.e., a - *.var file), or a CGI script which returned a - local `Location:', then the resource which the user requested - is going to be ultimately located by some URI other than what - the client originally supplied. In this case, the server does - an internal redirect, constructing a new - request_rec for the new URI, and processing it - almost exactly as if the client had requested the new URI - directly.

        - -

      • If some handler signaled an error, and an - ErrorDocument is in scope, the same internal - redirect machinery comes into play.

        - -

      • Finally, a handler occasionally needs to investigate `what - would happen if' some other request were run. For instance, - the directory indexing module needs to know what MIME type - would be assigned to a request for each directory entry, in - order to figure out what icon to use.

        - - Such handlers can construct a sub-request, using the - functions sub_req_lookup_file and - sub_req_lookup_uri; this constructs a new - request_rec structure and processes it as you - would expect, up to but not including the point of actually - sending a response. (These functions skip over the access - checks if the sub-request is for a file in the same directory - as the original request).

        - - (Server-side includes work by building sub-requests and then - actually invoking the response handler for them, via the - function run_sub_request). -

      - -

      Handling requests, declining, and returning error codes

      - -As discussed above, each handler, when invoked to handle a particular -request_rec, has to return an int to -indicate what happened. That can either be - -
        -
      • OK --- the request was handled successfully. This may or may - not terminate the phase. -
      • DECLINED --- no erroneous condition exists, but the module - declines to handle the phase; the server tries to find another. -
      • an HTTP error code, which aborts handling of the request. -
      - -Note that if the error code returned is REDIRECT, then -the module should put a Location in the request's -headers_out, to indicate where the client should be -redirected to.

      - -

      Special considerations for response handlers

      - -Handlers for most phases do their work by simply setting a few fields -in the request_rec structure (or, in the case of access -checkers, simply by returning the correct error code). However, -response handlers have to actually send a request back to the client.

      - -They should begin by sending an HTTP response header, using the -function send_http_header. (You don't have to do -anything special to skip sending the header for HTTP/0.9 requests; the -function figures out on its own that it shouldn't do anything). If -the request is marked header_only, that's all they should -do; they should return after that, without attempting any further -output.

      - -Otherwise, they should produce a request body which responds to the -client as appropriate. The primitives for this are rputc -and rprintf, for internally generated output, and -send_fd, to copy the contents of some FILE * -straight to the client.

      - -At this point, you should more or less understand the following piece -of code, which is the handler which handles GET requests -which have no more specific handler; it also shows how conditional -GETs can be handled, if it's desirable to do so in a -particular response handler --- set_last_modified checks -against the If-modified-since value supplied by the -client, if any, and returns an appropriate code (which will, if -nonzero, be USE_LOCAL_COPY). No similar considerations apply for -set_content_length, but it returns an error code for -symmetry.

      - -

      -int default_handler (request_rec *r)
      -{
      -    int errstatus;
      -    FILE *f;
      -    
      -    if (r->method_number != M_GET) return DECLINED;
      -    if (r->finfo.st_mode == 0) return NOT_FOUND;
      -
      -    if ((errstatus = set_content_length (r, r->finfo.st_size))
      -        || (errstatus = set_last_modified (r, r->finfo.st_mtime)))
      -        return errstatus;
      -    
      -    f = fopen (r->filename, "r");
      -
      -    if (f == NULL) {
      -        log_reason("file permissions deny server access",
      -                   r->filename, r);
      -        return FORBIDDEN;
      -    }
      -      
      -    register_timeout ("send", r);
      -    send_http_header (r);
      -
      -    if (!r->header_only) send_fd (f, r);
      -    pfclose (r->pool, f);
      -    return OK;
      -}
      -
      - -Finally, if all of this is too much of a challenge, there are a few -ways out of it. First off, as shown above, a response handler which -has not yet produced any output can simply return an error code, in -which case the server will automatically produce an error response. -Secondly, it can punt to some other handler by invoking -internal_redirect, which is how the internal redirection -machinery discussed above is invoked. A response handler which has -internally redirected should always return OK.

      - -(Invoking internal_redirect from handlers which are -not response handlers will lead to serious confusion). - -

      Special considerations for authentication handlers

      - -Stuff that should be discussed here in detail: - -
        -
      • Authentication-phase handlers not invoked unless auth is - configured for the directory. -
      • Common auth configuration stored in the core per-dir - configuration; it has accessors auth_type, - auth_name, and requires. -
      • Common routines, to handle the protocol end of things, at least - for HTTP basic authentication (get_basic_auth_pw, - which sets the connection->user structure field - automatically, and note_basic_auth_failure, which - arranges for the proper WWW-Authenticate: header - to be sent back). -
      - -

      Special considerations for logging handlers

      - -When a request has internally redirected, there is the question of -what to log. Apache handles this by bundling the entire chain of -redirects into a list of request_rec structures which are -threaded through the r->prev and r->next -pointers. The request_rec which is passed to the logging -handlers in such cases is the one which was originally built for the -initial request from the client; note that the bytes_sent field will -only be correct in the last request in the chain (the one for which a -response was actually sent). - -

      Resource allocation and resource pools

      - -One of the problems of writing and designing a server-pool server is -that of preventing leakage, that is, allocating resources (memory, -open files, etc.), without subsequently releasing them. The resource -pool machinery is designed to make it easy to prevent this from -happening, by allowing resource to be allocated in such a way that -they are automatically released when the server is done with -them.

      - -The way this works is as follows: the memory which is allocated, file -opened, etc., to deal with a particular request are tied to a -resource pool which is allocated for the request. The pool -is a data structure which itself tracks the resources in question.

      - -When the request has been processed, the pool is cleared. At -that point, all the memory associated with it is released for reuse, -all files associated with it are closed, and any other clean-up -functions which are associated with the pool are run. When this is -over, we can be confident that all the resource tied to the pool have -been released, and that none of them have leaked.

      - -Server restarts, and allocation of memory and resources for per-server -configuration, are handled in a similar way. There is a -configuration pool, which keeps track of resources which were -allocated while reading the server configuration files, and handling -the commands therein (for instance, the memory that was allocated for -per-server module configuration, log files and other files that were -opened, and so forth). When the server restarts, and has to reread -the configuration files, the configuration pool is cleared, and so the -memory and file descriptors which were taken up by reading them the -last time are made available for reuse.

      - -It should be noted that use of the pool machinery isn't generally -obligatory, except for situations like logging handlers, where you -really need to register cleanups to make sure that the log file gets -closed when the server restarts (this is most easily done by using the -function pfopen, which also -arranges for the underlying file descriptor to be closed before any -child processes, such as for CGI scripts, are execed), or -in case you are using the timeout machinery (which isn't yet even -documented here). However, there are two benefits to using it: -resources allocated to a pool never leak (even if you allocate a -scratch string, and just forget about it); also, for memory -allocation, palloc is generally faster than -malloc.

      - -We begin here by describing how memory is allocated to pools, and then -discuss how other resources are tracked by the resource pool -machinery. - -

      Allocation of memory in pools

      - -Memory is allocated to pools by calling the function -palloc, which takes two arguments, one being a pointer to -a resource pool structure, and the other being the amount of memory to -allocate (in chars). Within handlers for handling -requests, the most common way of getting a resource pool structure is -by looking at the pool slot of the relevant -request_rec; hence the repeated appearance of the -following idiom in module code: - -
      -int my_handler(request_rec *r)
      -{
      -    struct my_structure *foo;
      -    ...
      -
      -    foo = (foo *)palloc (r->pool, sizeof(my_structure));
      -}
      -
      - -Note that there is no pfree --- -palloced memory is freed only when the associated -resource pool is cleared. This means that palloc does not -have to do as much accounting as malloc(); all it does in -the typical case is to round up the size, bump a pointer, and do a -range check.

      - -(It also raises the possibility that heavy use of palloc -could cause a server process to grow excessively large. There are -two ways to deal with this, which are dealt with below; briefly, you -can use malloc, and try to be sure that all of the memory -gets explicitly freed, or you can allocate a sub-pool of -the main pool, allocate your memory in the sub-pool, and clear it out -periodically. The latter technique is discussed in the section on -sub-pools below, and is used in the directory-indexing code, in order -to avoid excessive storage allocation when listing directories with -thousands of files). - -

      Allocating initialized memory

      - -There are functions which allocate initialized memory, and are -frequently useful. The function pcalloc has the same -interface as palloc, but clears out the memory it -allocates before it returns it. The function pstrdup -takes a resource pool and a char * as arguments, and -allocates memory for a copy of the string the pointer points to, -returning a pointer to the copy. Finally pstrcat is a -varargs-style function, which takes a pointer to a resource pool, and -at least two char * arguments, the last of which must be -NULL. It allocates enough memory to fit copies of each -of the strings, as a unit; for instance: - -
      -     pstrcat (r->pool, "foo", "/", "bar", NULL);
      -
      - -returns a pointer to 8 bytes worth of memory, initialized to -"foo/bar". - -

      Tracking open files, etc.

      - -As indicated above, resource pools are also used to track other sorts -of resources besides memory. The most common are open files. The -routine which is typically used for this is pfopen, which -takes a resource pool and two strings as arguments; the strings are -the same as the typical arguments to fopen, e.g., - -
      -     ...
      -     FILE *f = pfopen (r->pool, r->filename, "r");
      -
      -     if (f == NULL) { ... } else { ... }
      -
      - -There is also a popenf routine, which parallels the -lower-level open system call. Both of these routines -arrange for the file to be closed when the resource pool in question -is cleared.

      - -Unlike the case for memory, there are functions to close -files allocated with pfopen, and popenf, -namely pfclose and pclosef. (This is -because, on many systems, the number of files which a single process -can have open is quite limited). It is important to use these -functions to close files allocated with pfopen and -popenf, since to do otherwise could cause fatal errors on -systems such as Linux, which react badly if the same -FILE* is closed more than once.

      - -(Using the close functions is not mandatory, since the -file will eventually be closed regardless, but you should consider it -in cases where your module is opening, or could open, a lot of files). - -

      Other sorts of resources --- cleanup functions

      - -More text goes here. Describe the the cleanup primitives in terms of -which the file stuff is implemented; also, spawn_process. - -

      Fine control --- creating and dealing with sub-pools, with a note -on sub-requests

      - -On rare occasions, too-free use of palloc() and the -associated primitives may result in undesirably profligate resource -allocation. You can deal with such a case by creating a -sub-pool, allocating within the sub-pool rather than the main -pool, and clearing or destroying the sub-pool, which releases the -resources which were associated with it. (This really is a -rare situation; the only case in which it comes up in the standard -module set is in case of listing directories, and then only with -very large directories. Unnecessary use of the primitives -discussed here can hair up your code quite a bit, with very little -gain).

      - -The primitive for creating a sub-pool is make_sub_pool, -which takes another pool (the parent pool) as an argument. When the -main pool is cleared, the sub-pool will be destroyed. The sub-pool -may also be cleared or destroyed at any time, by calling the functions -clear_pool and destroy_pool, respectively. -(The difference is that clear_pool frees resources -associated with the pool, while destroy_pool also -deallocates the pool itself. In the former case, you can allocate new -resources within the pool, and clear it again, and so forth; in the -latter case, it is simply gone).

      - -One final note --- sub-requests have their own resource pools, which -are sub-pools of the resource pool for the main request. The polite -way to reclaim the resources associated with a sub request which you -have allocated (using the sub_req_lookup_... functions) -is destroy_sub_request, which frees the resource pool. -Before calling this function, be sure to copy anything that you care -about which might be allocated in the sub-request's resource pool into -someplace a little less volatile (for instance, the filename in its -request_rec structure).

      - -(Again, under most circumstances, you shouldn't feel obliged to call -this function; only 2K of memory or so are allocated for a typical sub -request, and it will be freed anyway when the main request pool is -cleared. It is only when you are allocating many, many sub-requests -for a single main request that you should seriously consider the -destroy... functions). - -

      Configuration, commands and the like

      - -One of the design goals for this server was to maintain external -compatibility with the NCSA 1.3 server --- that is, to read the same -configuration files, to process all the directives therein correctly, -and in general to be a drop-in replacement for NCSA. On the other -hand, another design goal was to move as much of the server's -functionality into modules which have as little as possible to do with -the monolithic server core. The only way to reconcile these goals is -to move the handling of most commands from the central server into the -modules.

      - -However, just giving the modules command tables is not enough to -divorce them completely from the server core. The server has to -remember the commands in order to act on them later. That involves -maintaining data which is private to the modules, and which can be -either per-server, or per-directory. Most things are per-directory, -including in particular access control and authorization information, -but also information on how to determine file types from suffixes, -which can be modified by AddType and -DefaultType directives, and so forth. In general, the -governing philosophy is that anything which can be made -configurable by directory should be; per-server information is -generally used in the standard set of modules for information like -Aliases and Redirects which come into play -before the request is tied to a particular place in the underlying -file system.

      - -Another requirement for emulating the NCSA server is being able to -handle the per-directory configuration files, generally called -.htaccess files, though even in the NCSA server they can -contain directives which have nothing at all to do with access -control. Accordingly, after URI -> filename translation, but before -performing any other phase, the server walks down the directory -hierarchy of the underlying filesystem, following the translated -pathname, to read any .htaccess files which might be -present. The information which is read in then has to be -merged with the applicable information from the server's own -config files (either from the <Directory> sections -in access.conf, or from defaults in -srm.conf, which actually behaves for most purposes almost -exactly like <Directory />).

      - -Finally, after having served a request which involved reading -.htaccess files, we need to discard the storage allocated -for handling them. That is solved the same way it is solved wherever -else similar problems come up, by tying those structures to the -per-transaction resource pool.

      - -

      Per-directory configuration structures

      - -Let's look out how all of this plays out in mod_mime.c, -which defines the file typing handler which emulates the NCSA server's -behavior of determining file types from suffixes. What we'll be -looking at, here, is the code which implements the -AddType and AddEncoding commands. These -commands can appear in .htaccess files, so they must be -handled in the module's private per-directory data, which in fact, -consists of two separate tables for MIME types and -encoding information, and is declared as follows: - -
      -typedef struct {
      -    table *forced_types;      /* Additional AddTyped stuff */
      -    table *encoding_types;    /* Added with AddEncoding... */
      -} mime_dir_config;
      -
      - -When the server is reading a configuration file, or -<Directory> section, which includes one of the MIME -module's commands, it needs to create a mime_dir_config -structure, so those commands have something to act on. It does this -by invoking the function it finds in the module's `create per-dir -config slot', with two arguments: the name of the directory to which -this configuration information applies (or NULL for -srm.conf), and a pointer to a resource pool in which the -allocation should happen.

      - -(If we are reading a .htaccess file, that resource pool -is the per-request resource pool for the request; otherwise it is a -resource pool which is used for configuration data, and cleared on -restarts. Either way, it is important for the structure being created -to vanish when the pool is cleared, by registering a cleanup on the -pool if necessary).

      - -For the MIME module, the per-dir config creation function just -pallocs the structure above, and a creates a couple of -tables to fill it. That looks like this: - -

      -void *create_mime_dir_config (pool *p, char *dummy)
      -{
      -    mime_dir_config *new =
      -      (mime_dir_config *) palloc (p, sizeof(mime_dir_config));
      -
      -    new->forced_types = make_table (p, 4);
      -    new->encoding_types = make_table (p, 4);
      -    
      -    return new;
      -}
      -
      - -Now, suppose we've just read in a .htaccess file. We -already have the per-directory configuration structure for the next -directory up in the hierarchy. If the .htaccess file we -just read in didn't have any AddType or -AddEncoding commands, its per-directory config structure -for the MIME module is still valid, and we can just use it. -Otherwise, we need to merge the two structures somehow.

      - -To do that, the server invokes the module's per-directory config merge -function, if one is present. That function takes three arguments: -the two structures being merged, and a resource pool in which to -allocate the result. For the MIME module, all that needs to be done -is overlay the tables from the new per-directory config structure with -those from the parent: - -

      -void *merge_mime_dir_configs (pool *p, void *parent_dirv, void *subdirv)
      -{
      -    mime_dir_config *parent_dir = (mime_dir_config *)parent_dirv;
      -    mime_dir_config *subdir = (mime_dir_config *)subdirv;
      -    mime_dir_config *new =
      -      (mime_dir_config *)palloc (p, sizeof(mime_dir_config));
      -
      -    new->forced_types = overlay_tables (p, subdir->forced_types,
      -                                        parent_dir->forced_types);
      -    new->encoding_types = overlay_tables (p, subdir->encoding_types,
      -                                          parent_dir->encoding_types);
      -
      -    return new;
      -}
      -
      - -As a note --- if there is no per-directory merge function present, the -server will just use the subdirectory's configuration info, and ignore -the parent's. For some modules, that works just fine (e.g., for the -includes module, whose per-directory configuration information -consists solely of the state of the XBITHACK), and for -those modules, you can just not declare one, and leave the -corresponding structure slot in the module itself NULL.

      - -

      Command handling

      - -Now that we have these structures, we need to be able to figure out -how to fill them. That involves processing the actual -AddType and AddEncoding commands. To find -commands, the server looks in the module's command table. -That table contains information on how many arguments the commands -take, and in what formats, where it is permitted, and so forth. That -information is sufficient to allow the server to invoke most -command-handling functions with pre-parsed arguments. Without further -ado, let's look at the AddType command handler, which -looks like this (the AddEncoding command looks basically -the same, and won't be shown here): - -
      -char *add_type(cmd_parms *cmd, mime_dir_config *m, char *ct, char *ext)
      -{
      -    if (*ext == '.') ++ext;
      -    table_set (m->forced_types, ext, ct);
      -    return NULL;
      -}
      -
      - -This command handler is unusually simple. As you can see, it takes -four arguments, two of which are pre-parsed arguments, the third being -the per-directory configuration structure for the module in question, -and the fourth being a pointer to a cmd_parms structure. -That structure contains a bunch of arguments which are frequently of -use to some, but not all, commands, including a resource pool (from -which memory can be allocated, and to which cleanups should be tied), -and the (virtual) server being configured, from which the module's -per-server configuration data can be obtained if required.

      - -Another way in which this particular command handler is unusually -simple is that there are no error conditions which it can encounter. -If there were, it could return an error message instead of -NULL; this causes an error to be printed out on the -server's stderr, followed by a quick exit, if it is in -the main config files; for a .htaccess file, the syntax -error is logged in the server error log (along with an indication of -where it came from), and the request is bounced with a server error -response (HTTP error status, code 500).

      - -The MIME module's command table has entries for these commands, which -look like this: - -

      -command_rec mime_cmds[] = {
      -{ "AddType", add_type, NULL, OR_FILEINFO, TAKE2, 
      -    "a mime type followed by a file extension" },
      -{ "AddEncoding", add_encoding, NULL, OR_FILEINFO, TAKE2, 
      -    "an encoding (e.g., gzip), followed by a file extension" },
      -{ NULL }
      -};
      -
      - -The entries in these tables are: - -
        -
      • The name of the command -
      • The function which handles it -
      • a (void *) pointer, which is passed in the - cmd_parms structure to the command handler --- - this is useful in case many similar commands are handled by the - same function. -
      • A bit mask indicating where the command may appear. There are - mask bits corresponding to each AllowOverride - option, and an additional mask bit, RSRC_CONF, - indicating that the command may appear in the server's own - config files, but not in any .htaccess - file. -
      • A flag indicating how many arguments the command handler wants - pre-parsed, and how they should be passed in. - TAKE2 indicates two pre-parsed arguments. Other - options are TAKE1, which indicates one pre-parsed - argument, FLAG, which indicates that the argument - should be On or Off, and is passed in - as a boolean flag, RAW_ARGS, which causes the - server to give the command the raw, unparsed arguments - (everything but the command name itself). There is also - ITERATE, which means that the handler looks the - same as TAKE1, but that if multiple arguments are - present, it should be called multiple times, and finally - ITERATE2, which indicates that the command handler - looks like a TAKE2, but if more arguments are - present, then it should be called multiple times, holding the - first argument constant. -
      • Finally, we have a string which describes the arguments that - should be present. If the arguments in the actual config file - are not as required, this string will be used to help give a - more specific error message. (You can safely leave this - NULL). -
      - -Finally, having set this all up, we have to use it. This is -ultimately done in the module's handlers, specifically for its -file-typing handler, which looks more or less like this; note that the -per-directory configuration structure is extracted from the -request_rec's per-directory configuration vector by using -the get_module_config function. - -
      -int find_ct(request_rec *r)
      -{
      -    int i;
      -    char *fn = pstrdup (r->pool, r->filename);
      -    mime_dir_config *conf = (mime_dir_config *)
      -             get_module_config(r->per_dir_config, &mime_module);
      -    char *type;
      -
      -    if (S_ISDIR(r->finfo.st_mode)) {
      -        r->content_type = DIR_MAGIC_TYPE;
      -        return OK;
      -    }
      -    
      -    if((i=rind(fn,'.')) < 0) return DECLINED;
      -    ++i;
      -
      -    if ((type = table_get (conf->encoding_types, &fn[i])))
      -    {
      -        r->content_encoding = type;
      -
      -        /* go back to previous extension to try to use it as a type */
      -
      -        fn[i-1] = '\0';
      -        if((i=rind(fn,'.')) < 0) return OK;
      -        ++i;
      -    }
      -
      -    if ((type = table_get (conf->forced_types, &fn[i])))
      -    {
      -        r->content_type = type;
      -    }
      -    
      -    return OK;
      -}
      -
      -
      - -

      Side notes --- per-server configuration, virtual servers, etc.

      - -The basic ideas behind per-server module configuration are basically -the same as those for per-directory configuration; there is a creation -function and a merge function, the latter being invoked where a -virtual server has partially overridden the base server configuration, -and a combined structure must be computed. (As with per-directory -configuration, the default if no merge function is specified, and a -module is configured in some virtual server, is that the base -configuration is simply ignored).

      - -The only substantial difference is that when a command needs to -configure the per-server private module data, it needs to go to the -cmd_parms data to get at it. Here's an example, from the -alias module, which also indicates how a syntax error can be returned -(note that the per-directory configuration argument to the command -handler is declared as a dummy, since the module doesn't actually have -per-directory config data): - -

      -char *add_redirect(cmd_parms *cmd, void *dummy, char *f, char *url)
      -{
      -    server_rec *s = cmd->server;
      -    alias_server_conf *conf = (alias_server_conf *)
      -            get_module_config(s->module_config,&alias_module);
      -    alias_entry *new = push_array (conf->redirects);
      -
      -    if (!is_url (url)) return "Redirect to non-URL";
      -    
      -    new->fake = f; new->real = url;
      -    return NULL;
      -}
      -
      - - - diff --git a/docs/manual/misc/FAQ.html b/docs/manual/misc/FAQ.html deleted file mode 100644 index b630a283f00..00000000000 --- a/docs/manual/misc/FAQ.html +++ /dev/null @@ -1,162 +0,0 @@ - - - -Apache server Frequently Asked Questions - - - - -

      Apache server Frequently Asked Questions

      - -

      The Questions

      -
        -
      1. What is Apache ? -
      2. Why was Apache created ? -
      3. How does the Apache group relate to other servers ? -
      4. Why the name "Apache" ? -
      5. How compatible is Apache with my existing NCSA 1.3 setup ? -
      6. OK, so how does Apache compare to other servers ? -
      7. How thoroughly tested is Apache? -
      8. Does or will Apache act as a Proxy server? -
      9. What are the future plans for Apache ? -
      10. Who do I contact for support ? -
      11. Is there any more information on Apache ? -
      12. Where can get Apache ? -
      - -
      - -

      The Answers

      -
        -
      1. What is Apache ? -

        - Apache was originally based on code and ideas found in the most -popular HTTP server of the time.. NCSA httpd 1.3 (early 1995). It has -since evolved into a far superior system which can rival (and probably -surpass) almost any other UNIX based HTTP server in terms of functionality, -efficiency and speed. -

        Since it began, it has been completely rewritten, and includes many new -features. Apache is, as of June 1996, the most popular WWW server on -the Internet, according to the Netcraft Survey. - -

        -
        -
      2. How does the Apache group relate to other -server efforts, such as NCSA's? -

        -We, of course, owe a great debt to NCSA and their programmers for -making the server Apache was based on. We now, however, have our own -server, and our project is mostly our own. The Apache Project is an -entitely independent venture. -

        -
        - -
      3. Why was Apache created ? -

        to address concerns of a group of www providers and part time httpd -programmers, that httpd didn't behave as they wanted it -to. Apache is an entirely volunteer effort, completely funded by its -members, not by commercial sales. -

        - -
        - -
      4. Why the name "Apache" ? -

        A cute name which stuck. Apache is "A PAtCHy server". It was - based on some existing code and a series of "patch files". -

        -
        - - -
      5. How compatible is Apache with my existing NCSA 1.3 -setup ?

        - -Apache attempts to offer all the features and configuration options -of NCSA httpd 1.3, as well as many of the additional features found in -NCSA httpd 1.4 and NCSA httpd 1.5.

        - -NCSA httpd appears to be moving toward adding experimental features -which are not generally required at the moment. Some of the experiments -will succeed while others will inevitably be dropped. The Apache philosophy is -to add what's needed as and when it is needed.

        - -Friendly interaction between Apache and NCSA developers should ensure -that fundamental feature enhancments stay consistent between the two -servers for the foreseeable future.

        - -


        - -
      6. OK, so how does Apache compare to other servers ? -

        -For an independent assessment, see http://www.webcompare.com/server-main.html -

        - -

        Apache has been shown to be substantially faster than many other -free servers. Although certain commercial servers have claimed to -surpass Apache's speed (it has not been demonstrated that any of these -"benchmarks" are a good way of measuring WWW server speed at any -rate), we feel that it is better to have a mostly-fast free server -than an extremely-fast server that costs thousands of dollars. Apache -is run on sites that get millions of hits per day, and they have -experienced no performance difficulties.

        - -
        -
      7. How thoroughly tested is Apache? - -

        Apache is run on over 100,000 Internet servers (as of July 1996). It has -been tested thoroughly by both developers and users. The Apache Group -maintains rigorous standards before releasing new versions of their -server, and our server runs without a hitch on over one third of all -WWW servers. When bugs do show up, we release patches and new -versions, as soon as they are available. - -

        See http://www.apache.org/info/apache_users.html for an incomplete list of sites running Apache.

        - -
        - -
      8. Does or will Apache act as a Proxy server? -

        Apache version 1.1 -and above will come with a proxy module. If compiled in, this will make -Apache act as a caching-proxy server -

        -


        - -
      9. What are the future plans for Apache ? -

          -
        • to continue as a public domain HTTP server, -
        • to keep up with advances in HTTP protocol and web developments in general -
        • to collect suggestions for fixes/improvements from its users, -
        • to respond to needs of large volume providers as well as occasional users. -
        -


        - -
      10. Who do I contact for support ? -

        There is no official support for Apache. None of the developers want to -be swamped by a flood of trivial questions that can be resolved elsewhere. -Bug reports and suggestions should be sent via the bug report page. -Other questions should be directed to -comp.infosystems.www.servers.unix, where some of the Apache team lurk, -in the company of many other httpd gurus who should be able -to help. -

        -Commercial support for Apache is, however, available from a number -third parties. -

        -
        - -
      11. Is there any more information on Apache ? -

        Indeed there is. See http://www.apache.org/. -

        -
        - -
      12. Where can get Apache ? -

        -You can find the source for Apache at http://www.apache.org/. -

        -
        -
      - -Home -Index - - diff --git a/docs/manual/misc/client_block_api.html b/docs/manual/misc/client_block_api.html deleted file mode 100644 index c70ee37a662..00000000000 --- a/docs/manual/misc/client_block_api.html +++ /dev/null @@ -1,70 +0,0 @@ - - - -Reading Client Input in Apache 1.2 - - - - -

      Reading Client Input in Apache 1.2

      - -
      - -

      Apache 1.1 and earlier let modules handle POST and PUT requests by -themselves. The module would, on its own, determine whether the -request had an entity, how many bytes it was, and then called a -function (read_client_block) to get the data. - -

      However, HTTP/1.1 requires several things of POST and PUT request -handlers that did not fit into this module, and all existing modules -have to be rewritten. The API calls for handling this have been -furthur abstracted, so that future HTTP protocol changes can be -accomplished while remaining backwards-compatible.

      - -
      - -

      The New API Functions

      - -
      -   int setup_client_block (request_rec *);
      -   int should_client_block (request_rec *);
      -   long get_client_block (request_rec *, char *buffer, int buffer_size);
      -
      - -
        -
      1. Call setup_client_block() near the beginning of the request - handler. This will set up all the neccessary properties, and - will return either OK, or an error code. If the latter, - the module should return that error code. - -
      2. When you are ready to possibly accept input, call - should_client_block(). - This will tell the module whether or not to read input. If it is 0, - the module should assume that the input is of a non-entity type - (e.g. a GET request). A nonzero response indicates that the module - should proceed (to step 3). - This step also sends a 100 Continue response - to HTTP/1.1 clients, so should not be called until the module - is *defenitely* ready to read content. (otherwise, the point of the - 100 response is defeated). Never call this function more than once. - -
      3. Finally, call get_client_block in a loop. Pass it a - buffer and its - size. It will put data into the buffer (not neccessarily the full - buffer, in the case of chunked inputs), and return the length of - the input block. When it is done reading, it will return 0, and - the module should proceed. - -
      - -

      As an example, please look at the code in -mod_cgi.c. This is properly written to the new API -guidelines.

      - -
      - -Home -Index - - - diff --git a/docs/manual/misc/compat_notes.html b/docs/manual/misc/compat_notes.html deleted file mode 100644 index efa641f8b70..00000000000 --- a/docs/manual/misc/compat_notes.html +++ /dev/null @@ -1,108 +0,0 @@ - -Apache HTTP Server: Compatibility Notes with NCSA's Server - - - -

      Compatibility Notes with NCSA's Server

      - -
      - -While Apache 0.8.x and beyond are for the most part a drop-in -replacement for NCSA's httpd and earlier versions of Apache, there are -a couple gotcha's to watch out for. These are mostly due to the fact -that the parser for config and access control files was rewritten from -scratch, so certain liberties the earlier servers took may not be -available here. These are all easily fixable. If you know of other -non-fatal problems that belong here, let us know. - -

      Please also check the known bugs page. - - - -

        - -
      1. AddType only accepts one file extension per line, without -any dots (.) in the extension, and does not take full filenames. -If you need multiple extensions per type, use multiple lines, e.g. -
        -AddType application/foo foo
        -AddType application/foo bar -
        -To map .foo and .bar to application/foo -

        - - - -

      2. If you follow the NCSA guidelines for setting up access restrictions - based on client domain, you may well have added entries for, - AuthType, AuthName, AuthUserFile or AuthGroupFile. - None of these are needed (or appropriate) for restricting access - based on client domain. - -

        When Apache sees AuthType it (reasonably) assumes you - are using some authorization type based on username and password. - -

        Please remove AuthType, it's unnecessary even for NCSA. - -

        - -

      3. AuthUserFile requires a full pathname. In earlier - versions of NCSA httpd and Apache, you could use a filename - relative to the .htaccess file. This could be a major security hole, - as it made it trivially easy to make a ".htpass" file in the a - directory easily accessable by the world. We recommend you store - your passwords outside your document tree. - -

        - -

      4. OldScriptAlias is no longer supported. - -

        - -

      5. exec cgi="" produces reasonable malformed header - responses when used to invoke non-CGI scripts.
        - The NCSA code ignores the missing header. (bad idea)
        - Solution: write CGI to the CGI spec or use exec cmd="" instead. -

        We might add virtual support to exec cmd to - make up for this difference. - -

        - -

      6. <Limit> sillyness - in the old Apache 0.6.5, a - directive of <Limit GET> would also restrict POST methods - Apache 0.8.8's new - core is correct in not presuming a limit on a GET is the same limit on a POST, - so if you are relying on that behavior you need to change your access configurations - to reflect that. - -

        - -

      7. Icons for FancyIndexing broken - well, no, they're not broken, we've just upgraded the - icons from flat .xbm files to pretty and much smaller .gif files, courtesy of -Kevin Hughes at -EIT. - If you are using the same srm.conf from an old distribution, make sure you add the new - AddIcon, AddIconByType, and DefaultIcon commands. - -

        - -

      8. Under IRIX, the "Group" directive in httpd.conf needs to be a valid group name - (i.e. "nogroup") not the numeric group ID. The distribution httpd.conf, and earlier - ones, had the default Group be "#-1", which was causing silent exits at startup.

        - -

      9. .asis files: Apache 0.6.5 did not require a Status header; -it added one automatically if the .asis file contained a Location header. -0.8.14 requires a Status header.

        - -

      - -More to come when we notice them.... - - -
      - -Home -Index - - - diff --git a/docs/manual/misc/index.html b/docs/manual/misc/index.html new file mode 100644 index 00000000000..b8d7fbe27a1 --- /dev/null +++ b/docs/manual/misc/index.html @@ -0,0 +1,112 @@ + + + + Apache Miscellaneous Documentation + + + + +

      Apache Miscellaneous Documentation

      + +

      + Below is a list of additional documentation pages that apply to the + Apache web server development project. +

      +
      +
      API +
      +
      Description of Apache's Application Programming Interface. +
      +
      FAQ +
      +
      Frequently-Asked Questions concerning the Apache project and server +
      +
      Reading Client Input in Apache 1.2 +
      +
      Describes differences between Apache 1.1 and 1.2 in how modules + read information from the client +
      +
      Compatibility with NCSA +
      +
      Notes about Apache's compatibility with the NCSA server +
      +
      FIN_WAIT_2 +
      +
      A description of the causes of Apache processes going into the + FIN_WAIT_2 state, and what you can do about it +
      +
      "How-To" +
      +
      Instructions about how to accomplish some commonly-desired server + functionality changes +
      +
      Known Bugs +
      +
      Just what it says - a list of known bugs in each of the Apache releases +
      +
      No PGP +
      +
      Why we took PEM and PGP support out of the base Apache distribution +
      +
      Performance Notes (BSD 4.4) +
      +
      Some notes about ways to improve/optimise Apache performance on + BSD 4.4 systems +
      +
      Performance Notes (Digital UNIX) +
      +
      Extracts of USENET postings describing how to optimise Apache + performance on Digital UNIX systems +
      +
      Performance Notes (General) +
      +
      Some generic note about how to improve Apache performance +
      +
      Security Tips +
      +
      Some "do"s - and "don't"s - for keeping your + Apache web site secure +
      +
      Virtual Hosts (IP-based) +
      +
      Excerpts and notes about configuring and using Apache IP-based virtual + hosts +
      +
      Windows Bug with Web Keepalive +
      +
      A brief description of a known problem with Microsoft Windows and + web sites accessed using keepalive connections +
      +
      + + + + diff --git a/docs/manual/misc/perf-tuning.html b/docs/manual/misc/perf-tuning.html new file mode 100644 index 00000000000..16f8c835ef2 --- /dev/null +++ b/docs/manual/misc/perf-tuning.html @@ -0,0 +1,820 @@ + + +Apache Performance Notes + + + +

      Apache Performance Notes

      + +

      Author: Dean Gaudet + +

      Introduction

      +

      Apache is a general webserver, which is designed to be correct first, and +fast second. Even so, it's performance is quite satisfactory. Most +sites have less than 10Mbits of outgoing bandwidth, which Apache can +fill using only a low end Pentium-based webserver. In practice sites +with more bandwidth require more than one machine to fill the bandwidth +due to other constraints (such as CGI or database transaction overhead). +For these reasons the development focus has been mostly on correctness +and configurability. + +

      Unfortunately many folks overlook these facts and cite raw performance +numbers as if they are some indication of the quality of a web server +product. There is a bare minimum performance that is acceptable, beyond +that extra speed only caters to a much smaller segment of the market. +But in order to avoid this hurdle to the acceptance of Apache in some +markets, effort was put into Apache 1.3 to bring performance up to a +point where the difference with other high-end webservers is minimal. + +

      Finally there are the folks who just plain want to see how fast something +can go. The author falls into this category. The rest of this document +is dedicated to these folks who want to squeeze every last bit of +performance out of Apache's current model, and want to understand why +it does some things which slow it down. + +

      Note that this is tailored towards Apache 1.3 on Unix. Some of it applies +to Apache on NT. Apache on NT has not been tuned for performance yet, +in fact it probably performs very poorly because NT performance requires +a different programming model. + +

      Hardware and Operating System Issues

      + +

      The single biggest hardware issue affecting webserver performance +is RAM. A webserver should never ever have to swap, swapping increases +the latency of each request beyond a point that users consider "fast +enough". This causes users to hit stop and reload, further increasing +the load. You can, and should, control the MaxClients +setting so that your server does not spawn so many children it starts +swapping. + +

      Beyond that the rest is mundane: get a fast enough CPU, a fast enough +network card, and fast enough disks, where "fast enough" is something +that needs to be determined by experimentation. + +

      Operating system choice is largely a matter of local concerns. But +a general guideline is to always apply the latest vendor TCP/IP patches. +HTTP serving completely breaks many of the assumptions built into Unix +kernels up through 1994 and even 1995. Good choices include +recent FreeBSD, and Linux. + +

      Run-Time Configuration Issues

      + +

      HostnameLookups

      +

      Prior to Apache 1.3, HostnameLookups defaulted to On. +This adds latency +to every request because it requires a DNS lookup to complete before +the request is finished. In Apache 1.3 this setting defaults to Off. +However (1.3 or later), if you use any allow from domain or +deny from domain directives then you will pay for a +double reverse DNS lookup (a reverse, followed by a forward to make sure +that the reverse is not being spoofed). So for the highest performance +avoid using these directives (it's fine to use IP addresses rather than +domain names). + +

      Note that it's possible to scope the directives, such as within +a <Location /server-status> section. In this +case the DNS lookups are only performed on requests matching the +criteria. Here's an example which disables +lookups except for .html and .cgi files: + +

      +HostnameLookups off
      +<Files ~ "\.(html|cgi)$>
      +    HostnameLookups on
      +</Files>
      +
      + +But even still, if you just need DNS names +in some CGIs you could consider doing the +gethostbyname call in the specific CGIs that need it. + +

      FollowSymLinks and SymLinksIfOwnerMatch

      +

      Wherever in your URL-space you do not have an +Options FollowSymLinks, or you do have an +Options SymLinksIfOwnerMatch Apache will have to +issue extra system calls to check up on symlinks. One extra call per +filename component. For example, if you had: + +

      +DocumentRoot /www/htdocs
      +<Directory />
      +    Options SymLinksIfOwnerMatch
      +</Directory>
      +
      + +and a request is made for the URI /index.html. +Then Apache will perform lstat(2) on /www, +/www/htdocs, and /www/htdocs/index.html. The +results of these lstats are never cached, +so they will occur on every single request. If you really desire the +symlinks security checking you can do something like this: + +
      +DocumentRoot /www/htdocs
      +<Directory />
      +    Options FollowSymLinks
      +</Directory>
      +<Directory /www/htdocs>
      +    Options -FollowSymLinks +SymLinksIfOwnerMatch
      +</Directory>
      +
      + +This at least avoids the extra checks for the DocumentRoot +path. Note that you'll need to add similar sections if you have any +Alias or RewriteRule paths outside of your +document root. For highest performance, and no symlink protection, +set FollowSymLinks everywhere, and never set +SymLinksIfOwnerMatch. + +

      AllowOverride

      + +

      Wherever in your URL-space you allow overrides (typically +.htaccess files) Apache will attempt to open +.htaccess for each filename component. For example, + +

      +DocumentRoot /www/htdocs
      +<Directory />
      +    AllowOverride all
      +</Directory>
      +
      + +and a request is made for the URI /index.html. Then +Apache will attempt to open /.htaccess, +/www/.htaccess, and /www/htdocs/.htaccess. +The solutions are similar to the previous case of Options +FollowSymLinks. For highest performance use +AllowOverride None everywhere in your filesystem. + +

      Negotiation

      + +

      If at all possible, avoid content-negotiation if you're really +interested in every last ounce of performance. In practice the +benefits of negotiation outweigh the performance penalties. There's +one case where you can speed up the server. Instead of using +a wildcard such as: + +

      +DirectoryIndex index
      +
      + +Use a complete list of options: + +
      +DirectoryIndex index.cgi index.pl index.shtml index.html
      +
      + +where you list the most common choice first. + +

      Process Creation

      + +

      Prior to Apache 1.3 the MinSpareServers, +MaxSpareServers, and StartServers settings +all had drastic effects on benchmark results. In particular, Apache +required a "ramp-up" period in order to reach a number of children +sufficient to serve the load being applied. After the initial +spawning of StartServers children, only one child per +second would be created to satisfy the MinSpareServers +setting. So a server being accessed by 100 simultaneous clients, +using the default StartServers of 5 would take on +the order 95 seconds to spawn enough children to handle the load. This +works fine in practice on real-life servers, because they aren't restarted +frequently. But does really poorly on benchmarks which might only run +for ten minutes. + +

      The one-per-second rule was implemented in an effort to avoid +swamping the machine with the startup of new children. If the machine +is busy spawning children it can't service requests. But it has such +a drastic effect on the perceived performance of Apache that it had +to be replaced. As of Apache 1.3, +the code will relax the one-per-second rule. It +will spawn one, wait a second, then spawn two, wait a second, then spawn +four, and it will continue exponentially until it is spawning 32 children +per second. It will stop whenever it satisfies the +MinSpareServers setting. + +

      This appears to be responsive enough that it's +almost unnecessary to twiddle the MinSpareServers, +MaxSpareServers and StartServers knobs. When +more than 4 children are spawned per second, a message will be emitted +to the ErrorLog. If you see a lot of these errors then +consider tuning these settings. Use the mod_status output +as a guide. + +

      Related to process creation is process death induced by the +MaxRequestsPerChild setting. By default this is 30, which +is probably far too low unless your server is using a module such as +mod_perl which causes children to have bloated memory +images. If your server is serving mostly static pages then consider +raising this value to something like 10000. The code is robust enough +that this shouldn't be a problem. + +

      When keep-alives are in use, children will be kept busy +doing nothing waiting for more requests on the already open +connection. The default KeepAliveTimeout of +15 seconds attempts to minimize this effect. The tradeoff +here is between network bandwidth and server resources. +In no event should you raise this above about 60 seconds, as + +most of the benefits are lost. + +

      Compile-Time Configuration Issues

      + +

      mod_status and Rule STATUS=yes

      + +

      If you include mod_status +and you also set Rule STATUS=yes when building +Apache, then on every request Apache will perform two calls to +gettimeofday(2) (or times(2) depending +on your operating system), and (pre-1.3) several extra calls to +time(2). This is all done so that the status report +contains timing indications. For highest performance, set Rule +STATUS=no. + +

      accept Serialization - multiple sockets

      + +

      This discusses a shortcoming in the Unix socket API. +Suppose your +web server uses multiple Listen statements to listen on +either multiple ports or multiple addresses. In order to test each +socket to see if a connection is ready Apache uses select(2). +select(2) indicates that a socket has none or +at least one connection waiting on it. Apache's model includes +multiple children, and all the idle ones test for new connections at the +same time. A naive implementation looks something like this +(these examples do not match the code, they're contrived for +pedagogical purposes): + +

      +    for (;;) {
      +	for (;;) {
      +	    fd_set accept_fds;
      +
      +	    FD_ZERO (&accept_fds);
      +	    for (i = first_socket; i <= last_socket; ++i) {
      +		FD_SET (i, &accept_fds);
      +	    }
      +	    rc = select (last_socket+1, &accept_fds, NULL, NULL, NULL);
      +	    if (rc < 1) continue;
      +	    new_connection = -1;
      +	    for (i = first_socket; i <= last_socket; ++i) {
      +		if (FD_ISSET (i, &accept_fds)) {
      +		    new_connection = accept (i, NULL, NULL);
      +		    if (new_connection != -1) break;
      +		}
      +	    }
      +	    if (new_connection != -1) break;
      +	}
      +	process the new_connection;
      +    }
      +
      + +But this naive implementation has a serious starvation problem. Recall +that multiple children execute this loop at the same time, and so multiple +children will block at select when they are in between +requests. All those blocked children will awaken and return from +select when a single request appears on any socket +(the number of children which awaken varies depending on the operating +system and timing issues). +They will all then fall down into the loop and try to accept +the connection. But only one will succeed (assuming there's still only +one connection ready), the rest will be blocked in accept. +This effectively locks those children into serving requests from that +one socket and no other sockets, and they'll be stuck there until enough +new requests appear on that socket to wake them all up. +This starvation problem was first documented in +PR#467. There +are at least two solutions. + +

      One solution is to make the sockets non-blocking. In this case the +accept won't block the children, and they will be allowed +to continue immediately. But this wastes CPU time. Suppose you have +ten idle children in select, and one connection arrives. +Then nine of those children will wake up, try to accept the +connection, fail, and loop back into select, accomplishing +nothing. Meanwhile none of those children are servicing requests that +occurred on other sockets until they get back up to the select +again. Overall this solution does not seem very fruitful unless you +have as many idle CPUs (in a multiprocessor box) as you have idle children, +not a very likely situation. + +

      Another solution, the one used by Apache, is to serialize entry into +the inner loop. The loop looks like this (differences highlighted): + +

      +    for (;;) {
      +	accept_mutex_on ();
      +	for (;;) {
      +	    fd_set accept_fds;
      +
      +	    FD_ZERO (&accept_fds);
      +	    for (i = first_socket; i <= last_socket; ++i) {
      +		FD_SET (i, &accept_fds);
      +	    }
      +	    rc = select (last_socket+1, &accept_fds, NULL, NULL, NULL);
      +	    if (rc < 1) continue;
      +	    new_connection = -1;
      +	    for (i = first_socket; i <= last_socket; ++i) {
      +		if (FD_ISSET (i, &accept_fds)) {
      +		    new_connection = accept (i, NULL, NULL);
      +		    if (new_connection != -1) break;
      +		}
      +	    }
      +	    if (new_connection != -1) break;
      +	}
      +	accept_mutex_off ();
      +	process the new_connection;
      +    }
      +
      + + +The functions accept_mutex_on and accept_mutex_off +implement a mutual exclusion semaphore. Only one child can have the +mutex at any time. There are several choices for implementing these +mutexes. The choice is defined in src/conf.h (pre-1.3) or +src/main/conf.h (1.3 or later). Some architectures +do not have any locking choice made, on these architectures it is unsafe +to use multiple Listen directives. + +
      +
      USE_FLOCK_SERIALIZED_ACCEPT +
      This method uses the flock(2) system call to lock a +lock file (located by the LockFile directive). + +
      USE_FCNTL_SERIALIZED_ACCEPT +
      This method uses the fcntl(2) system call to lock a +lock file (located by the LockFile directive). + +
      USE_SYSVSEM_SERIALIZED_ACCEPT +
      (1.3 or later) This method uses SysV-style semaphores to implement the +mutex. Unfortunately SysV-style semaphores have some bad side-effects. +One is that it's possible Apache will die without cleaning up the semaphore +(see the ipcs(8) man page). The other is that the semaphore +API allows for a denial of service attack by any CGIs running under the +same uid as the webserver (i.e. all CGIs unless you use something +like suexec or cgiwrapper). For these reasons this method is not used +on any architecture except IRIX (where the previous two are prohibitively +expensive on most IRIX boxes). + +
      USE_USLOCK_SERIALIZED_ACCEPT +
      (1.3 or later) This method is only available on IRIX, and uses +usconfig(2) to create a mutex. While this method avoids +the hassles of SysV-style semaphores, it is not the default for IRIX. +This is because on single processor IRIX boxes (5.3 or 6.2) the +uslock code is two orders of magnitude slower than the SysV-semaphore +code. On multi-processor IRIX boxes the uslock code is an order of magnitude +faster than the SysV-semaphore code. Kind of a messed up situation. +So if you're using a multiprocessor IRIX box then you should rebuild your +webserver with -DUSE_USLOCK_SERIALIZED_ACCEPT on the +EXTRA_CFLAGS. + +
      USE_PTHREADS_SERIALIZED_ACCEPT +
      (1.3 or later) This method uses POSIX mutexes and should work on +any architecture implementing the full POSIX threads specification, +however appears to only work on Solaris (2.5 or later). This is the +default for Solaris 2.5 or later. +
      + +

      If your system has another method of serialization which isn't in the +above list then it may be worthwhile adding code for it (and submitting +a patch back to Apache). + +

      Another solution that has been considered but never implemented is +to partially serialize the loop -- that is, let in a certain number +of processes. This would only be of interest on multiprocessor boxes +where it's possible multiple children could run simultaneously, and the +serialization actually doesn't take advantage of the full bandwidth. +This is a possible area of future investigation, but priority remains +low because highly parallel web servers are not the norm. + +

      Ideally you should run servers without multiple Listen +statements if you want the highest performance. But read on. + +

      accept Serialization - single socket

      + +

      The above is fine and dandy for multiple socket servers, but what +about single socket servers? In theory they shouldn't experience +any of these same problems because all children can just block in +accept(2) until a connection arrives, and no starvation +results. In practice this hides almost the same "spinning" behaviour +discussed above in the non-blocking solution. The way that most TCP +stacks are implemented, the kernel actually wakes up all processes blocked +in accept when a single connection arrives. One of those +processes gets the connection and returns to user-space, the rest spin in +the kernel and go back to sleep when they discover there's no connection +for them. This spinning is hidden from the user-land code, but it's +there nonetheless. This can result in the same load-spiking wasteful +behaviour that a non-blocking solution to the multiple sockets case can. + +

      For this reason we have found that many architectures behave more +"nicely" if we serialize even the single socket case. So this is +actually the default in almost all cases. Crude experiments under +Linux (2.0.30 on a dual Pentium pro 166 w/128Mb RAM) have shown that +the serialization of the single socket case causes less than a 3% +decrease in requests per second over unserialized single-socket. +But unserialized single-socket showed an extra 100ms latency on +each request. This latency is probably a wash on long haul lines, +and only an issue on LANs. If you want to override the single socket +serialization you can define SAFE_UNSERIALIZED_ACCEPT +and then single-socket servers will not serialize at all. + +

      Lingering Close

      + +

      As discussed in +draft-ietf-http-connection-00.txt section 8, +in order for an HTTP server to reliably implement the protocol +it needs to shutdown each direction of the communication independently +(recall that a TCP connection is bi-directional, each half is independent +of the other). This fact is often overlooked by other servers, but +is correctly implemented in Apache as of 1.2. + +

      When this feature was added to Apache it caused a flurry of +problems on various versions of Unix because of a shortsightedness. +The TCP specification does not state that the FIN_WAIT_2 state has a +timeout, but it doesn't prohibit it. On systems without the timeout, +Apache 1.2 induces many sockets stuck forever in the FIN_WAIT_2 state. +In many cases this can be avoided by simply upgrading to the latest +TCP/IP patches supplied by the vendor, in cases where the vendor has +never released patches (i.e. SunOS4 -- although folks with a source +license can patch it themselves) we have decided to disable this feature. + +

      There are two ways of accomplishing this. One is the +socket option SO_LINGER. But as fate would have it, +this has never been implemented properly in most TCP/IP stacks. Even +on those stacks with a proper implementation (i.e. Linux 2.0.31) this +method proves to be more expensive (cputime) than the next solution. + +

      For the most part, Apache implements this in a function called +lingering_close (in http_main.c). The +function looks roughly like this: + +

      +    void lingering_close (int s)
      +    {
      +	char junk_buffer[2048];
      +
      +	/* shutdown the sending side */
      +	shutdown (s, 1);
      +
      +	signal (SIGALRM, lingering_death);
      +	alarm (30);
      +
      +	for (;;) {
      +	    select (s for reading, 2 second timeout);
      +	    if (error) break;
      +	    if (s is ready for reading) {
      +		read (s, junk_buffer, sizeof (junk_buffer));
      +		/* just toss away whatever is here */
      +	    }
      +	}
      +
      +	close (s);
      +    }
      +
      + +This naturally adds some expense at the end of a connection, but it +is required for a reliable implementation. As HTTP/1.1 becomes more +prevalent, and all connections are persistent, this expense will be +amortized over more requests. If you want to play with fire and +disable this feature you can define NO_LINGCLOSE, but +this is not recommended at all. In particular, as HTTP/1.1 pipelined +persistent connections come into use lingering_close +is an absolute necessity (and + +pipelined connections are faster, so you +want to support them). + +

      Scoreboard File

      + +

      Apache's parent and children communicate with each other through +something called the scoreboard. Ideally this should be implemented +in shared memory. For those operating systems that we either have +access to, or have been given detailed ports for, it typically is +implemented using shared memory. The rest default to using an +on-disk file. The on-disk file is not only slow, but it is unreliable +(and less featured). Peruse the src/main/conf.h file +for your architecture and look for either HAVE_MMAP or +HAVE_SHMGET. Defining one of those two enables the +supplied shared memory code. If your system has another type of +shared memory then edit the file src/main/http_main.c and +add the hooks necessary to use it in Apache. (Send us back a patch +too please.) + +

      Historical note: The Linux port of Apache didn't start to use +shared memory until version 1.2 of Apache. This oversight resulted +in really poor and unreliable behaviour of earlier versions of Apache +on Linux. + +

      DYNAMIC_MODULE_LIMIT

      + +

      If you have no intention of using dynamically loaded modules +(you probably don't if you're reading this and tuning your +server for every last ounce of performance) then you should add +-DDYNAMIC_MODULE_LIMIT=0 when building your server. +This will save RAM that's allocated only for supporting dynamically +loaded modules. + +

      Appendix: Detailed Analysis of a Trace

      + +Here is a system call trace of Apache 1.3 running on Linux. The run-time +configuration file is essentially the default plus: + +
      +<Directory />
      +    AllowOverride none
      +    Options FollowSymLinks
      +</Directory>
      +
      + +The file being requested is a static 6K file of no particular content. +Traces of non-static requests or requests with content negotiation +look wildly different (and quite ugly in some cases). First the +entire trace, then we'll examine details. (This was generated by +the strace program, other similar programs include +truss, ktrace, and par.) + +
      +accept(15, {sin_family=AF_INET, sin_port=htons(22283), sin_addr=inet_addr("127.0.0.1")}, [16]) = 3
      +flock(18, LOCK_UN)                      = 0
      +sigaction(SIGUSR1, {SIG_IGN}, {0x8059954, [], SA_INTERRUPT}) = 0
      +getsockname(3, {sin_family=AF_INET, sin_port=htons(8080), sin_addr=inet_addr("127.0.0.1")}, [16]) = 0
      +setsockopt(3, IPPROTO_TCP1, [1], 4)     = 0
      +read(3, "GET /6k HTTP/1.0\r\nUser-Agent: "..., 4096) = 60
      +sigaction(SIGUSR1, {SIG_IGN}, {SIG_IGN}) = 0
      +time(NULL)                              = 873959960
      +gettimeofday({873959960, 404935}, NULL) = 0
      +stat("/home/dgaudet/ap/apachen/htdocs/6k", {st_mode=S_IFREG|0644, st_size=6144, ...}) = 0
      +open("/home/dgaudet/ap/apachen/htdocs/6k", O_RDONLY) = 4
      +mmap(0, 6144, PROT_READ, MAP_PRIVATE, 4, 0) = 0x400ee000
      +writev(3, [{"HTTP/1.1 200 OK\r\nDate: Thu, 11"..., 245}, {"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 6144}], 2) = 6389
      +close(4)                                = 0
      +time(NULL)                              = 873959960
      +write(17, "127.0.0.1 - - [10/Sep/1997:23:39"..., 71) = 71
      +gettimeofday({873959960, 417742}, NULL) = 0
      +times({tms_utime=5, tms_stime=0, tms_cutime=0, tms_cstime=0}) = 446747
      +shutdown(3, 1 /* send */)               = 0
      +oldselect(4, [3], NULL, [3], {2, 0})    = 1 (in [3], left {2, 0})
      +read(3, "", 2048)                       = 0
      +close(3)                                = 0
      +sigaction(SIGUSR1, {0x8059954, [], SA_INTERRUPT}, {SIG_IGN}) = 0
      +munmap(0x400ee000, 6144)                = 0
      +flock(18, LOCK_EX)                      = 0
      +
      + +

      Notice the accept serialization: + +

      +flock(18, LOCK_UN)                      = 0
      +...
      +flock(18, LOCK_EX)                      = 0
      +
      + +These two calls can be removed by defining +SAFE_UNSERIALIZED_ACCEPT as described earlier. + +

      Notice the SIGUSR1 manipulation: + +

      +sigaction(SIGUSR1, {SIG_IGN}, {0x8059954, [], SA_INTERRUPT}) = 0
      +...
      +sigaction(SIGUSR1, {SIG_IGN}, {SIG_IGN}) = 0
      +...
      +sigaction(SIGUSR1, {0x8059954, [], SA_INTERRUPT}, {SIG_IGN}) = 0
      +
      + +This is caused by the implementation of graceful restarts. When the +parent receives a SIGUSR1 it sends a SIGUSR1 +to all of its children (and it also increments a "generation counter" +in shared memory). Any children that are idle (between connections) +will immediately die +off when they receive the signal. Any children that are in keep-alive +connections, but are in between requests will die off immediately. But +any children that have a connection and are still waiting for the first +request will not die off immediately. + +

      To see why this is necessary, consider how a browser reacts to a closed +connection. If the connection was a keep-alive connection and the request +being serviced was not the first request then the browser will quietly +reissue the request on a new connection. It has to do this because the +server is always free to close a keep-alive connection in between requests +(i.e. due to a timeout or because of a maximum number of requests). +But, if the connection is closed before the first response has been +received the typical browser will display a "document contains no data" +dialogue (or a broken image icon). This is done on the assumption that +the server is broken in some way (or maybe too overloaded to respond +at all). So Apache tries to avoid ever deliberately closing the connection +before it has sent a single response. This is the cause of those +SIGUSR1 manipulations. + +

      Note that it is theoretically possible to eliminate all three of +these calls. But in rough tests the gain proved to be almost unnoticeable. + +

      In order to implement virtual hosts, Apache needs to know the +local socket address used to accept the connection: + +

      +getsockname(3, {sin_family=AF_INET, sin_port=htons(8080), sin_addr=inet_addr("127.0.0.1")}, [16]) = 0
      +
      + +It is possible to eliminate this call in many situations (such as when +there are no virtual hosts, or when Listen directives are +used which do not have wildcard addresses). But no effort has yet been +made to do these optimizations. + +

      Apache turns off the Nagle algorithm: + +

      +setsockopt(3, IPPROTO_TCP1, [1], 4)     = 0
      +
      + +because of problems described in +a +paper by John Heidemann. + +

      Notice the two time calls: + +

      +time(NULL)                              = 873959960
      +...
      +time(NULL)                              = 873959960
      +
      + +One of these occurs at the beginning of the request, and the other occurs +as a result of writing the log. At least one of these is required to +properly implement the HTTP protocol. The second occurs because the +Common Log Format dictates that the log record include a timestamp of the +end of the request. A custom logging module could eliminate one of the +calls. + +

      As described earlier, Rule STATUS=yes causes two +gettimeofday calls and a call to times: + +

      +gettimeofday({873959960, 404935}, NULL) = 0
      +...
      +gettimeofday({873959960, 417742}, NULL) = 0
      +times({tms_utime=5, tms_stime=0, tms_cutime=0, tms_cstime=0}) = 446747
      +
      + +These can be removed by either removing mod_status or +setting Rule STATUS=no. + +

      It might seem odd to call stat: + +

      +stat("/home/dgaudet/ap/apachen/htdocs/6k", {st_mode=S_IFREG|0644, st_size=6144, ...}) = 0
      +
      + +This is part of the algorithm which calculates the +PATH_INFO for use by CGIs. In fact if the request had been +for the URI /cgi-bin/printenv/foobar then there would be +two calls to stat. The first for +/home/dgaudet/ap/apachen/cgi-bin/printenv/foobar +which does not exist, and the second for +/home/dgaudet/ap/apachen/cgi-bin/printenv, which does exist. +Regardless, at least one stat call is necessary when +serving static files because the file size and modification times are +used to generate HTTP headers (such as Content-Length, +Last-Modified) and implement protocol features (such +as If-Modified-Since). A somewhat more clever server +could avoid the stat when serving non-static files, +however doing so in Apache is very difficult given the modular structure. + +

      All static files are served using mmap: + +

      +mmap(0, 6144, PROT_READ, MAP_PRIVATE, 4, 0) = 0x400ee000
      +...
      +munmap(0x400ee000, 6144)                = 0
      +
      + +On some architectures it's slower to mmap small +files than it is to simply read them. The define +MMAP_THRESHOLD can be set to the minimum size required before +using mmap. By default it's set to 0 (except on SunOS4 +where experimentation has shown 8192 to be a better value). Using a +tool such as +lmbench +you can determine the optimal setting for your +environment. It may even be the case that mmap isn't used +on your architecture, if so then defining USE_MMAP_FILES +might work (if it works then report back to us). + + +

      Apache does its best to avoid copying bytes around in memory. The +first write of any request typically is turned into a writev +which combines both the headers and the first hunk of data: + +

      +writev(3, [{"HTTP/1.1 200 OK\r\nDate: Thu, 11"..., 245}, {"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 6144}], 2) = 6389
      +
      + +When doing HTTP/1.1 chunked encoding Apache will generate up to four +element writevs. The goal is to push the byte copying +into the kernel, where it typically has to happen anyhow (to assemble +network packets). On testing, various Unixes (BSDI 2.x, Solaris 2.5, +Linux 2.0.31+) properly combine the elements into network packets. +Pre-2.0.31 Linux will not combine, and will create a packet for +each element, so upgrading is a good idea. Defining NO_WRITEV +will disable this combining, but result in very poor chunked encoding +performance. + +

      The log write: + +

      +write(17, "127.0.0.1 - - [10/Sep/1997:23:39"..., 71) = 71
      +
      + +can be deferred by defining BUFFERED_LOGS. In this case +up to PIPE_BUF bytes (a POSIX defined constant) of log entries +are buffered before writing. At no time does it split a log entry +across a PIPE_BUF boundary because those writes may not +be atomic. (i.e. entries from multiple children could become mixed together). +The code does it best to flush this buffer when a child dies. + +

      The lingering close code causes four system calls: + +

      +shutdown(3, 1 /* send */)               = 0
      +oldselect(4, [3], NULL, [3], {2, 0})    = 1 (in [3], left {2, 0})
      +read(3, "", 2048)                       = 0
      +close(3)                                = 0
      +
      + +which were described earlier. + +

      Let's apply some of these optimizations: +-DSAFE_UNSERIALIZED_ACCEPT -DBUFFERED_LOGS and +Rule STATUS=no. Here's the final trace: + +

      +accept(15, {sin_family=AF_INET, sin_port=htons(22286), sin_addr=inet_addr("127.0.0.1")}, [16]) = 3
      +sigaction(SIGUSR1, {SIG_IGN}, {0x8058c98, [], SA_INTERRUPT}) = 0
      +getsockname(3, {sin_family=AF_INET, sin_port=htons(8080), sin_addr=inet_addr("127.0.0.1")}, [16]) = 0
      +setsockopt(3, IPPROTO_TCP1, [1], 4)     = 0
      +read(3, "GET /6k HTTP/1.0\r\nUser-Agent: "..., 4096) = 60
      +sigaction(SIGUSR1, {SIG_IGN}, {SIG_IGN}) = 0
      +time(NULL)                              = 873961916
      +stat("/home/dgaudet/ap/apachen/htdocs/6k", {st_mode=S_IFREG|0644, st_size=6144, ...}) = 0
      +open("/home/dgaudet/ap/apachen/htdocs/6k", O_RDONLY) = 4
      +mmap(0, 6144, PROT_READ, MAP_PRIVATE, 4, 0) = 0x400e3000
      +writev(3, [{"HTTP/1.1 200 OK\r\nDate: Thu, 11"..., 245}, {"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 6144}], 2) = 6389
      +close(4)                                = 0
      +time(NULL)                              = 873961916
      +shutdown(3, 1 /* send */)               = 0
      +oldselect(4, [3], NULL, [3], {2, 0})    = 1 (in [3], left {2, 0})
      +read(3, "", 2048)                       = 0
      +close(3)                                = 0
      +sigaction(SIGUSR1, {0x8058c98, [], SA_INTERRUPT}, {SIG_IGN}) = 0
      +munmap(0x400e3000, 6144)                = 0
      +
      + +That's 19 system calls, of which 4 remain relatively easy to remove, +but don't seem worth the effort. + +

      Appendix: The Pre-Forking Model

      + +

      Apache (on Unix) is a pre-forking model server. The +parent process is responsible only for forking child +processes, it does not serve any requests or service any network +sockets. The child processes actually process connections, they serve +multiple connections (one at a time) before dying. +The parent spawns new or kills off old +children in response to changes in the load on the server (it does so +by monitoring a scoreboard which the children keep up to date). + +

      This model for servers offers a robustness that other models do +not. In particular, the parent code is very simple, and with a high +degree of confidence the parent will continue to do its job without +error. The children are complex, and when you add in third party +code via modules, you risk segmentation faults and other forms of +corruption. Even should such a thing happen, it only affects one +connection and the server continues serving requests. The parent +quickly replaces the dead child. + +

      Pre-forking is also very portable across dialects of Unix. +Historically this has been an important goal for Apache, and it continues +to remain so. + +

      The pre-forking model comes under criticism for various +performance aspects. Of particular concern are the overhead +of forking a process, the overhead of context switches between +processes, and the memory overhead of having multiple processes. +Furthermore it does not offer as many opportunities for data-caching +between requests (such as a pool of mmapped files). +Various other models exist and extensive analysis can be found in the + papers +of the JAWS project. In practice all of these costs vary drastically +depending on the operating system. + +

      Apache's core code is already multithread aware, and Apache version +1.3 is multithreaded on NT. There have been at least two other experimental +implementations of threaded Apache (one using the 1.3 code base on DCE, +and one using a custom user-level threads package and the 1.0 code base, +neither are available publically). Part of our redesign for version 2.0 +of Apache will include abstractions of the server model so that we +can continue to support the pre-forking model, and also support various +threaded models. + + + diff --git a/docs/manual/mod/directive-dict.html b/docs/manual/mod/directive-dict.html new file mode 100644 index 00000000000..780ac3a6270 --- /dev/null +++ b/docs/manual/mod/directive-dict.html @@ -0,0 +1,262 @@ + + + + Definitions of terms used to describe Apache directives + + + + + +

      Terms Used to Describe Apache Directives

      + +

      + Each Apache configuration directive is described using a common format + that looks like this: +

      +
      +
      Syntax: directive-name some args +
      + Default: + directive-name default-value +
      + Context: context-list +
      + Override: override +
      + Status: status +
      + Module: module-name +
      + Compatibility: compatibility notes +
      +
      +

      + Each of the directive's attributes, complete with possible values + where possible, are described in this document. +

      + +

      Directive Terms

      + + +
      +

      Syntax

      +

      + This indicates the format of the directive as it would appear in a + configuration file. This syntax is extremely directive-specific, so + refer to the text of the directive's description for details. +

      + +
      +

      Default

      +

      + If the directive has a default value (i.e., if you omit it + from your configuration entirely, the Apache Web server will behave as + though you set it to a particular value), it is described here. If + there is no default value, this section should say + "None". +

      + +
      +

      Context

      +

      + This indicates where in the server's configuration files the directive + is legal. It's a comma-separated list of one or more of the following + values: +

      +
      +
      server config +
      +
      This means that the directive may be used in the server + configuration files (e.g., httpd.conf, + srm.conf, and access.conf), but + not within any <VirtualHost> or + <Directory> containers. It is not allowed in + .htaccess files at all. +

      +

      +
      +
      virtual host +
      +
      This context means that the directive may appear inside + <VirtualHost> containers in the server + configuration files. +

      +

      +
      +
      directory +
      +
      A directive marked as being valid in this context may be used + inside <Directory> containers in the server + configuration files. +

      +

      +
      +
      .htaccess +
      +
      If a directive is valid in this context, it means that it can + appear inside per-directory .htaccess files. + It may not be processed, though depending upon the + overrides + currently active. +

      +

      +
      +
      +

      + The directive is only allowed within the designated context; + if you try to use it elsewhere, you'll get a configuration error that + will either prevent the server from handling requests in that context + correctly, or will keep the server from operating at all -- + i.e., the server won't even start. +

      +

      + The valid locations for the directive are actually the result of a + Boolean OR of all of the listed contexts. In other words, a directive + that is marked as being valid in "server config, + .htaccess" can be used in the httpd.conf file + and in .htaccess files, but not within any + <Directory> or <VirtualHost> containers. +

      + +
      +

      Override

      +

      + This directive attribute indicates which configuration override must + be active in order for the directive to be processed when it appears + in a .htaccess file. If the directive's + context + doesn't permit it to appear in .htaccess files, this + attribute should say "Not applicable". +

      +

      + Overrides are activated by the + AllowOverrides + directive, and apply to a particular scope (such as a directory) and + all descendants, unless further modified by other + AllowOverrides directives at lower levels. The + documentation for that directive also lists the possible override + names available. +

      + +
      +

      Status

      +

      + This indicates how tightly bound into the Apache Web server the + directive is; in other words, you may need to recompile the server + with an enhanced set of modules in order to gain access to the + directive and its functionality. Possible values for this attribute + are: +

      +
      +
      Core +
      +
      If a directive is listed as having "Core" status, that + means it is part of the innermost portions of the Apache Web server, + and is always available. +

      +

      +
      +
      Base +
      +
      A directive labeled as having "Base" status is + supported by one of the standard Apache modules which is compiled + into the server by default, and is therefore normally available + unless you've taken steps to remove the module from your configuration. +

      +

      +
      +
      Extension +
      +
      A directive with "Extension" status is provided by one + of the modules included with the Apache server kit, but the module + isn't normally compiled into the server. To enable the directive + and its functionality, you will need to change the server build + configuration files and re-compile Apache. +

      +

      +
      +
      Experimental +
      +
      "Experimental" status indicates that the directive is + available as part of the Apache kit, but you're on your own if you + try to use it. The directive is being documented for completeness, + and is not necessarily supported. The module which provides the + directive may or may not be compiled in by default; check the top of + the page which describes the directive and its module to see if it + remarks on the availability. +

      +

      +
      +
      + +
      +

      Module

      +

      + This quite simply lists the name of the source module which defines + the directive. +

      + +
      +

      Compatibility

      +

      + If the directive wasn't part of the original Apache version 1 + distribution, the version in which it was introduced should be listed + here. If the directive has the same name as one from the NCSA HTTPd + server, any inconsistencies in behaviour between the two should also + be mentioned. Otherwise, this attribute should say "No + compatibility issues." +

      + + + diff --git a/docs/manual/mod/directive-dict.html.en b/docs/manual/mod/directive-dict.html.en new file mode 100644 index 00000000000..780ac3a6270 --- /dev/null +++ b/docs/manual/mod/directive-dict.html.en @@ -0,0 +1,262 @@ + + + + Definitions of terms used to describe Apache directives + + + + + +

      Terms Used to Describe Apache Directives

      + +

      + Each Apache configuration directive is described using a common format + that looks like this: +

      +
      +
      Syntax: directive-name some args +
      + Default: + directive-name default-value +
      + Context: context-list +
      + Override: override +
      + Status: status +
      + Module: module-name +
      + Compatibility: compatibility notes +
      +
      +

      + Each of the directive's attributes, complete with possible values + where possible, are described in this document. +

      + +

      Directive Terms

      + + +
      +

      Syntax

      +

      + This indicates the format of the directive as it would appear in a + configuration file. This syntax is extremely directive-specific, so + refer to the text of the directive's description for details. +

      + +
      +

      Default

      +

      + If the directive has a default value (i.e., if you omit it + from your configuration entirely, the Apache Web server will behave as + though you set it to a particular value), it is described here. If + there is no default value, this section should say + "None". +

      + +
      +

      Context

      +

      + This indicates where in the server's configuration files the directive + is legal. It's a comma-separated list of one or more of the following + values: +

      +
      +
      server config +
      +
      This means that the directive may be used in the server + configuration files (e.g., httpd.conf, + srm.conf, and access.conf), but + not within any <VirtualHost> or + <Directory> containers. It is not allowed in + .htaccess files at all. +

      +

      +
      +
      virtual host +
      +
      This context means that the directive may appear inside + <VirtualHost> containers in the server + configuration files. +

      +

      +
      +
      directory +
      +
      A directive marked as being valid in this context may be used + inside <Directory> containers in the server + configuration files. +

      +

      +
      +
      .htaccess +
      +
      If a directive is valid in this context, it means that it can + appear inside per-directory .htaccess files. + It may not be processed, though depending upon the + overrides + currently active. +

      +

      +
      +
      +

      + The directive is only allowed within the designated context; + if you try to use it elsewhere, you'll get a configuration error that + will either prevent the server from handling requests in that context + correctly, or will keep the server from operating at all -- + i.e., the server won't even start. +

      +

      + The valid locations for the directive are actually the result of a + Boolean OR of all of the listed contexts. In other words, a directive + that is marked as being valid in "server config, + .htaccess" can be used in the httpd.conf file + and in .htaccess files, but not within any + <Directory> or <VirtualHost> containers. +

      + +
      +

      Override

      +

      + This directive attribute indicates which configuration override must + be active in order for the directive to be processed when it appears + in a .htaccess file. If the directive's + context + doesn't permit it to appear in .htaccess files, this + attribute should say "Not applicable". +

      +

      + Overrides are activated by the + AllowOverrides + directive, and apply to a particular scope (such as a directory) and + all descendants, unless further modified by other + AllowOverrides directives at lower levels. The + documentation for that directive also lists the possible override + names available. +

      + +
      +

      Status

      +

      + This indicates how tightly bound into the Apache Web server the + directive is; in other words, you may need to recompile the server + with an enhanced set of modules in order to gain access to the + directive and its functionality. Possible values for this attribute + are: +

      +
      +
      Core +
      +
      If a directive is listed as having "Core" status, that + means it is part of the innermost portions of the Apache Web server, + and is always available. +

      +

      +
      +
      Base +
      +
      A directive labeled as having "Base" status is + supported by one of the standard Apache modules which is compiled + into the server by default, and is therefore normally available + unless you've taken steps to remove the module from your configuration. +

      +

      +
      +
      Extension +
      +
      A directive with "Extension" status is provided by one + of the modules included with the Apache server kit, but the module + isn't normally compiled into the server. To enable the directive + and its functionality, you will need to change the server build + configuration files and re-compile Apache. +

      +

      +
      +
      Experimental +
      +
      "Experimental" status indicates that the directive is + available as part of the Apache kit, but you're on your own if you + try to use it. The directive is being documented for completeness, + and is not necessarily supported. The module which provides the + directive may or may not be compiled in by default; check the top of + the page which describes the directive and its module to see if it + remarks on the availability. +

      +

      +
      +
      + +
      +

      Module

      +

      + This quite simply lists the name of the source module which defines + the directive. +

      + +
      +

      Compatibility

      +

      + If the directive wasn't part of the original Apache version 1 + distribution, the version in which it was introduced should be listed + here. If the directive has the same name as one from the NCSA HTTPd + server, any inconsistencies in behaviour between the two should also + be mentioned. Otherwise, this attribute should say "No + compatibility issues." +

      + + + diff --git a/docs/manual/mod/directives.html b/docs/manual/mod/directives.html new file mode 100644 index 00000000000..3e55089f535 --- /dev/null +++ b/docs/manual/mod/directives.html @@ -0,0 +1,121 @@ + + + +Apache directives + + + + +

      Apache directives

      + + + + + + diff --git a/docs/manual/mod/mod_access.html b/docs/manual/mod/mod_access.html deleted file mode 100644 index b93c6e30a87..00000000000 --- a/docs/manual/mod/mod_access.html +++ /dev/null @@ -1,125 +0,0 @@ - - - - -Apache module mod_access - - - - - -

      Module mod_access

      - -This module is contained in the mod_access.c file, and -is compiled in by default. It provides access control based on client -hostname or IP address. - - - -
    5. allow -
    6. deny -
    7. order -
    8. -
      - - -

      allow

      - -Syntax: allow from host host ...
      -Context: directory, .htaccess
      -Override: Limit
      -Status: Base
      -Module: mod_access

      - -The allow directive affects which hosts can access a given directory; it is -typically used within a <Limit> section. -Host is one of the following: -

      -
      all -
      all hosts are allowed access -
      A (partial) domain-name -
      host whose name is, or ends in, this string are allowed access. -
      A full IP address -
      An IP address of a host allowed access -
      A partial IP address -
      The first 1 to 3 bytes of an IP address, for subnet restriction. -
      - -Example:
      allow from .ncsa.uiuc.edu
      -All hosts in the specified domain are allowed access.

      - -Note that this compares whole components; bar.edu -would not match foobar.edu.

      - -See also deny and order.


      - -

      deny

      - -Syntax: deny from host host ...
      -Context: directory, .htaccess
      -Override: Limit
      -Status: Base
      -Module: mod_access

      - -The deny directive affects which hosts can access a given directory; it is -typically used within a <Limit> section. -Host is one of the following: -

      -
      all -
      all hosts are denied access -
      A (partial) domain-name -
      host whose name is, or ends in, this string are denied access. -
      A full IP address -
      An IP address of a host denied access -
      A partial IP address -
      The first 1 to 3 bytes of an IP address, for subnet restriction. -
      - -Example:
      deny from 16
      -All hosts in the specified network are denied access.

      - -Note that this compares whole components; bar.edu -would not match foobar.edu.

      - -See also allow and order.


      - -

      order

      - -Syntax: order ordering
      -Default: order deny,allow
      -Context: directory, .htaccess
      -Override: Limit
      -Status: Base
      -Module: mod_access

      - -The order directive controls the order in which allow and -deny directives are evaluated. Ordering is one -of -

      -
      deny,allow -
      the deny directives are evaluated before the allow directives. -
      allow,deny -
      the allow directives are evaluated before the deny directives. -
      mutual-failure -
      Only those hosts which appear on the allow list and do not appear -on the deny list are granted access. -
      - -Example: -
      -order deny,allow -deny from all -allow from .ncsa.uiuc.edu -
      -Hosts in the ncsa.uiuc.edu domain are allowed access; all other hosts are -denied access. - - -
      - -Home -Index - - - - diff --git a/docs/manual/mod/mod_auth.html b/docs/manual/mod/mod_auth.html deleted file mode 100644 index ac522f549a6..00000000000 --- a/docs/manual/mod/mod_auth.html +++ /dev/null @@ -1,84 +0,0 @@ - - - - -Apache module mod_auth - - - - - -

      Module mod_auth

      - -This module is contained in the mod_auth.c file, and -is compiled in by default. It provides for user authentication using -textual files. - - - -
    9. AuthGroupFile -
    10. AuthUserFile -
    11. -
      - - -

      AuthGroupFile

      - -Syntax: AuthGroupFile filename
      -Context: directory, .htaccess
      -Override: AuthConfig
      -Status: Base
      -Module: mod_auth

      - -The AuthGroupFile directive sets the name of a textual file containing the list -of user groups for user authentication. Filename is the absolute path -to the group file.

      -Each line of the group file contains a groupname followed by a colon, followed -by the member usernames separated by spaces. Example: -

      mygroup: bob joe anne
      -Note that searching large groups files is very inefficient; -AuthDBMGroupFile should -be used instead.

      - -Security: make sure that the AuthGroupFile is stored outside the -document tree of the webserver; do not put it in the directory that -it protects. Otherwise, clients will be able to download the AuthGroupFile.

      - -See also AuthName, -AuthType and -AuthUserFile.


      - -

      AuthUserFile

      - -Syntax: AuthUserFile filename
      -Context: directory, .htaccess
      -Override: AuthConfig
      -Status: Base
      -Module: mod_auth

      - -The AuthUserFile directive sets the name of a textual file containing the list -of users and passwords for user authentication. Filename is the -absolute path to the user file.

      -Each line of the user file file contains a username followed by a colon, -followed by the crypt() encrypted password. The behaviour of multiple -occurrences of the same user is undefined.

      -Note that searching user groups files is inefficient; -AuthDBMUserFile should -be used instead.

      - -Security: make sure that the AuthUserFile is stored outside the -document tree of the webserver; do not put it in the directory that -it protects. Otherwise, clients will be able to download the AuthUserFile.

      - -See also AuthName, -AuthType and -AuthGroupFile.

      - - -


      -Home -Index - - - - diff --git a/docs/manual/mod/mod_auth_anon.html b/docs/manual/mod/mod_auth_anon.html deleted file mode 100644 index 2a299a1d7e2..00000000000 --- a/docs/manual/mod/mod_auth_anon.html +++ /dev/null @@ -1,192 +0,0 @@ - - -4.1a Module mod_auth_anon.c - - -

      4.1a Module mod_auth_anon.

      - -This module is contained in the mod_auth_anon.c file and -is compiled in by default. It is only available in Apache 1.1 and -later. -

      -It does access control in a manner similar to anonymous-ftp sites; i.e. -have a 'magic' user id 'anonymous' and the email address as a password. -These email addresses can be logged. -

      -Combined with other (database) access control methods, this allows for -effective user tracking and customization according to a user profile -while still keeping the site open for 'unregistered' users. One advantage -of using Auth-based user tracking is that, unlike magic-cookies and -funny URL pre/postfixes, it is completely browser independent and it -allows users to share URLs. -

      - -Full description / -Example / -Compile time options / -RevisionHistory / -Person to blame / -Sourcecode -

      - -


      Full description of all tokens

      -
      - -
      -Anonymous < Space separated list > -
      - A list of one or more 'magic' userIDs which are allowed access - without password verification. The userIDs are space separated. - It is possible to use the ' and " quotes to allow a space in - a userID as well as the \ escape character. -

      - Please note that the comparison is case-IN-sensitive. -
      - I strongly suggest that the magic username 'anonymous' - is always one of the allowed userIDs. -

      - Example:
      - - Anonymous: anonymous "Not Registered" 'I don\'t know' -

      - This would allow the user to enter without password verification - by using the userId's 'anonymous', 'AnonyMous','Not Registered' and - 'I Don't Know'. -

      - -
      -Anonymous_LogEmail < on | off > -
      - When set 'on', the default, the 'password' entered (which hopefully - contains a sensible email address) is logged in the httpd-log file. -
      - -
      -Anonymous_VerifyEmail < on | off > -
      - When set 'on', the default is 'off', the 'password' entered is - checked for at least one '@' and a '.' to encourage users to enter - valid email addressses (see the above Auth_LogEmail. -
      - -
      -Anonymous_NoUserID < on | off > -
      - When set 'on', the default is 'off', users can leave - the userID (and perhaps the password field) empty. This - can be very convenient for MS-Explorer users who can - just hit return or click directly on the OK button; which - seems a natural reaction. -
      - -
      -Anonymous_Authorative < on | off > -
      - Default is 'off'. When set 'on', there is no - fall-through to other authorization methods. So if a - userID does not match the values specified in the - Anonymous directive, access is denied. -

      - Be sure you know what you are doing when you decide to switch - it on. And remember that it is the linking order of the modules - (in the Configuration / Make file) which details the order - in which the Authorization modules are queried. -

      - -
      - - -

      Example

      - -The example below (when combined with the Auth directives -of a htpasswd-file based (or GDM, mSQL etc) base access -control system allows users in as 'guests' with the -following properties: -
        -
      • -It insists that the user enters a userId. (Anonymous_NoUserId) -
      • -It insists that the user enters a password. (Anonymous_MustGiveEmail) -
      • -The password entered must be a valid email address, ie. contain at least one '@' and a '.'. -(Anonymous_VerifyEmail) -
      • -The userID must be one of anonymous guest www test welcome -and comparison is not case sensitive. -<directory /web/docs/public> -
      • -And the Email addresses entered in the passswd field are logged to -the httpd-log file -(Anonymous_LogEmail) -
      -

      -Excerp of access.conf: -

      -
      -Anonymous anonymous guest www test welcome

      -Anonymous_MustGiveEmail on
      -Anonymous_VerifyEmail on
      -Anonymous_NoUserId off
      -Anonymous_LogEmail on
      -

      -AuthName Use 'anonymous' & Email address for guest entry
      -AuthType basic

      - -

      -
      - Normal Apache/NCSA tokens for access control -

      - <limit get post head>
      - order deny,allow
      - allow from all
      -

      - require valid-user
      - <limit>
      -

      -
      - - -

      Compile Time Options

      - -Currently there are no Compile options. - -

      Revision History

      - -This version: 23 Nov 1995, 24 Feb 1996, 16 May 1996. - -
      - -
      Version 0.4
      -
      First release -
      -
      Version 0.5
      -
      Added 'VerifyEmail' and 'LogEmail' options. Multiple - 'anonymous' tokes allowed. more docs. Added Authorative - functionality. -
      -
      - - -

      Contact/person to blame

      - -This module was written for the -European Wide Service Exchange by -<Dirk.vanGulik@jrc.it>. -Feel free to contact me if you have any problems, icecreams or bugs. This -documentation, courtesy of Nick Himba, -<himba@cs.utwente.nl>. -

      - - -


      Sourcecode

      - -The source code can be found at -http://www.apache.org. A snapshot of a development version -usually resides at -http://me-www.jrc.it/~dirkx/mod_auth_anon.c. Please make sure -that you always quote the version you use when filing a bug report. -

      - - - - diff --git a/docs/manual/mod/mod_auth_db.html b/docs/manual/mod/mod_auth_db.html deleted file mode 100644 index 8c4faffd656..00000000000 --- a/docs/manual/mod/mod_auth_db.html +++ /dev/null @@ -1,116 +0,0 @@ - - - - -Apache module mod_auth_db - - - - - -

      Module mod_auth_db

      - -This module is contained in the mod_auth_db.c file, and -is not compiled in by default. It provides for user authentication using -Berkeley DB files. It is an alternative to DBM -files for those systems which support DB and not DBM. It is only -available in Apache 1.1 and later. - - - -
    12. AuthDBGroupFile -
    13. AuthDBUserFile -
    14. -
      - - -

      AuthDBGroupFile

      - -Syntax: AuthDBGroupFile filename
      -Context: directory, .htaccess
      -Override: AuthConfig
      -Status: Extension
      -Module: mod_auth_db

      - -The AuthDBGroupFile directive sets the name of a DB file containing the list -of user groups for user authentication. Filename is the absolute path -to the group file.

      - -The group file is keyed on the username. The value for a user is a -comma-separated list of the groups to which the users belongs. There must -be no whitespace within the value, and it must never contain any colons.

      - -Security: make sure that the AuthDBGroupFile is stored outside the -document tree of the webserver; do not put it in the directory that -it protects. Otherwise, clients will be able to download the -AuthDBGroupFile unless otherwise protected.

      - -Combining Group and Password DB files: In some cases it is easier to -manage a single database which contains both the password and group -details for each user. This simplifies any support programs that need -to be written: they now only have to deal with writing to and locking -a single DBM file. This can be accomplished by first setting the group -and password files to point to the same DB file:

      - -

      -AuthDBGroupFile /www/userbase
      -AuthDBUserFile /www/userbase -
      - -The key for the single DB record is the username. The value consists of

      - -

      -Unix Crypted Password : List of Groups [ : (ignored) ] -
      - -The password section contains the Unix crypt() password as before. This is -followed by a colon and the comma separated list of groups. Other data may -optionally be left in the DB file after another colon; it is ignored by the -authentication module.

      - -See also AuthName, -AuthType and -AuthDBUserFile.


      - -

      AuthDBUserFile

      - -Syntax: AuthDBUserFile filename
      -Context: directory, .htaccess
      -Override: AuthConfig
      -Status: Extension
      -Module: mod_auth_db

      - -The AuthDBUserFile directive sets the name of a DB file containing the list -of users and passwords for user authentication. Filename is the -absolute path to the user file.

      - -The user file is keyed on the username. The value for a user is the -crypt() encrypted password, optionally followed by a colon and -arbitrary data. The colon and the data following it will be ignored -by the server.

      - -Security: make sure that the AuthDBUserFile is stored outside the -document tree of the webserver; do not put it in the directory that -it protects. Otherwise, clients will be able to download the -AuthDBUserFile.

      - -Important compatibility note: The implementation of "dbmopen" in the -apache modules reads the string length of the hashed values from the -DB data structures, rather than relying upon the string being -NULL-appended. Some applications, such as the Netscape web server, -rely upon the string being NULL-appended, so if you are having trouble -using DB files interchangeably between applications this may be a -part of the problem.

      - -See also AuthName, -AuthType and -AuthDBGroupFile.

      - - -


      -Home -Index - - - - diff --git a/docs/manual/mod/mod_auth_dbm.html b/docs/manual/mod/mod_auth_dbm.html deleted file mode 100644 index e36fc12485a..00000000000 --- a/docs/manual/mod/mod_auth_dbm.html +++ /dev/null @@ -1,115 +0,0 @@ - - - - -Apache module mod_auth_dbm - - - - - -

      Module mod_auth_dbm

      - -This module is contained in the mod_auth_dbm.c file, and -is not compiled in by default. It provides for user authentication using -DBM files. See the DBM user documentation. - - - -
    15. AuthDBMGroupFile -
    16. AuthDBMUserFile -
    17. -
      - - -

      AuthDbmGroupFile

      - -Syntax: AuthDBMGroupFile filename
      -Context: directory, .htaccess
      -Override: AuthConfig
      -Status: Extension
      -Module: mod_auth_dbm

      - -The AuthDBMGroupFile directive sets the name of a DBM file containing the list -of user groups for user authentication. Filename is the absolute path -to the group file.

      - -The group file is keyed on the username. The value for a user is a -comma-separated list of the groups to which the users belongs. There must -be no whitespace within the value, and it must never contain any colons.

      - -Security: make sure that the AuthDBMGroupFile is stored outside the -document tree of the webserver; do not put it in the directory that -it protects. Otherwise, clients will be able to download the -AuthDBMGroupFile unless otherwise protected.

      - -Combining Group and Password DBM files: In some cases it is easier to -manage a single database which contains both the password and group -details for each user. This simplifies any support programs that need -to be written: they now only have to deal with writing to and locking -a single DBM file. This can be accomplished by first setting the group -and password files to point to the same DBM:

      - -

      -AuthDBMGroupFile /www/userbase
      -AuthDBMUserFile /www/userbase -
      - -The key for the single DBM is the username. The value consists of

      - -

      -Unix Crypted Password : List of Groups [ : (ignored) ] -
      - -The password section contains the Unix crypt() password as before. This is -followed by a colon and the comma separated list of groups. Other data may -optionally be left in the DBM file after another colon; it is ignored by the -authentication module. This is what www.telescope.org uses for its combined -password and group database.

      - -See also AuthName, -AuthType and -AuthDBMUserFile.


      - -

      AuthDBMUserFile

      - -Syntax: AuthDBMUserFile filename
      -Context: directory, .htaccess
      -Override: AuthConfig
      -Status: Extension
      -Module: mod_auth_dbm

      - -The AuthDBMUserFile directive sets the name of a DBM file containing the list -of users and passwords for user authentication. Filename is the -absolute path to the user file.

      - -The user file is keyed on the username. The value for a user is the -crypt() encrypted password, optionally followed by a colon and -arbitrary data. The colon and the data following it will be ignored -by the server.

      - -Security: make sure that the AuthDBMUserFile is stored outside the -document tree of the webserver; do not put it in the directory that -it protects. Otherwise, clients will be able to download the -AuthDBMUserFile.

      - -Important compatibility note: The implementation of "dbmopen" in the -apache modules reads the string length of the hashed values from the -DBM data structures, rather than relying upon the string being -NULL-appended. Some applications, such as the Netscape web server, -rely upon the string being NULL-appended, so if you are having trouble -using DBM files interchangeably between applications this may be a -part of the problem.

      - -See also AuthName, -AuthType and -AuthDBMGroupFile.

      - - -


      -Home -Index - - - - diff --git a/docs/manual/mod/mod_example.html b/docs/manual/mod/mod_example.html new file mode 100644 index 00000000000..05fb8ef7211 --- /dev/null +++ b/docs/manual/mod/mod_example.html @@ -0,0 +1,133 @@ + + + + Apache module mod_example + + + +

      Module mod_example

      +

      + This module is contained in the modules/mod_example.c file, and + is not compiled in by default. It illustrates many of + the aspects of the + Apache 1.2 API + and, when used, demonstrates the manner in which module callbacks are + triggered by the server. +

      +

      Summary

      +

      + The files in the src/modules/example directory under the + Apache distribution directory tree are provided as an example to those + that wish to write modules that use the Apache API. +

      +

      + The main file is mod_example.c, which illustrates all + the different callback mechanisms and call syntaces. By no means does + an add-on module need to include routines for all of the callbacks - + quite the contrary! +

      +

      + The example module is an actual working module. If you link it into + your server, enable the "example-handler" handler for a location, and + then browse to that location, you will see a display of + some of the tracing the example module did as the various callbacks + were made. +

      +

      + To include the example module in your server, follow the steps below: +

      +
        +
      1. Uncomment the "Module example_module" line near the bottom of + the src/Configuration file. If there isn't one, add + it; it should look like this: +
        +     Module example_module        modules/example/mod_example.o
        +    
        +
      2. +
      3. Run the src/Configure script + ("cd src; ./Configure"). This will + build the Makefile for the server itself, and update the + src/modules/Makefile for any additional modules you + have requested from beneath that subdirectory. +
      4. +
      5. Make the server (run "make" in the src + directory). +
      6. +
      +

      + To add another module of your own: +

      +
        +
      1. mkdir src/modules/mymodule +
      2. +
      3. cp src/modules/example/* src/modules/mymodule +
      4. +
      5. Modify the files in the new directory. +
      6. +
      7. Follow steps [1] through [3] above, with appropriate changes. +
      8. +
      +

      + Using the mod_example Module +

      +

      + To activate the example module, include a block similar to the + following in your srm.conf file: +

      +
      +   <Location /example-info>
      +       SetHandler example-handler
      +   </Location>
      +  
      +

      + As an alternative, you can put the following into a + .htaccess + file and then request the file "test.example" from that + location: +

      +
      +   AddHandler example-handler .example
      +  
      +

      + After reloading/restarting your server, you should be able to browse + to this location and see the brief display mentioned earlier. +

      +

      Directives

      +

      +

      +

      +
      + +

      Example

      +
      +

      + Syntax: Example +
      + Default: None +
      + Context: server config, virtual host, directory, .htaccess +
      + Override: Options +
      + Status: Extension +
      + Module: mod_example +

      +

      + The Example directive activates the example module's content handler + for a particular location or file type. It takes no arguments. If + you browse to an URL to which the example content-handler applies, you + will get a display of the routines within the module and how and in + what order they were called to service the document request. +

      + + + diff --git a/docs/manual/mod/mod_expires.html b/docs/manual/mod/mod_expires.html new file mode 100644 index 00000000000..4dbab5f2469 --- /dev/null +++ b/docs/manual/mod/mod_expires.html @@ -0,0 +1,178 @@ + + + + Apache module mod_expires + + + +

      Module mod_expires

      +

      + This module is contained in the mod_expires.c file, and + is not compiled in by default. It provides for the + generation of Expires headers according to user-specified + criteria. +

      +

      Summary

      +

      + This module controls the setting of the Expires HTTP + header in server responses. The expiration date can set to be + relative to either the time the source file was last modified, or to + the time of the client access. +

      +

      + The Expires HTTP header is an instruction to the client + about the document's validity and persistence. If cached, the document + may be fetched from the cache rather than from the source until this + time has passed. After that, the cache copy is considered + "expired" and invalid, and a new copy must be obtained from + the source. +

      +

      Directives

      +

      +

      +
    18. ExpiresActive +
    19. +
    20. ExpiresByType +
    21. +
    22. ExpiresDefault +
    23. +
      +
      + +

      ExpiresActive directive

      +
      + +

      + Syntax: ExpiresActive boolean +
      + Context: server config, virtual host, directory, .htaccess +
      + Override: Indexes +
      + Status: Extension +
      + Module: mod_expires +

      +

      + This directive enables or disables the generation of the + Expires header for the document realm in question. (That + is, if found in an .htaccess file, for instance, it + applies only to documents generated from that directory.) If set to + Off, no Expires header will be + generated for any document in the realm (unless overridden at a lower + level, such as an .htaccess file overriding a server + config file). If set to On, the header will be + added to served documents according to the criteria defined by the + ExpiresByType + and + ExpiresDefault + directives (q.v.). +

      +

      + Note that this directive does not guarantee that an + Expires header will be generated. If the criteria aren't + met, no header will be sent, and the effect will be as though this + directive wasn't even specified. +

      +
      + +

      ExpiresByType directive

      +
      + +

      + Syntax: ExpiresByType mime-type <code>seconds +
      + Context: server config, virtual host, directory, .htaccess +
      + Override: Indexes +
      + Status: Extension +
      + Module: mod_expires +

      +

      + This directive defines the value of the Expires header + generated for documents of the specified type (e.g., + text/html). The second argument sets the number of + seconds that will be added to a base time to construct the expiration + date. +

      +

      + The base time is either the last modification time of the file, or the + time of the client's access to the document. Which should be used is + specified by the <code> field; + M means that the file's last modification time should + be used as the base time, and A means the client's + access time should be used. +

      +

      + The difference in effect is subtle. If M is used, all current + copies of the document in all caches will expire at the same time, + which can be good for something like a weekly notice that's always + found at the same URL. If A is used, the date of expiration + is different for each client; this can be good for image files that + don't change very often, particularly for a set of related documents + that all refer to the same images (i.e., the images will be + accessed repeatedly within a relatively short timespan). +

      +

      + Example: +

      +

      +

      +   ExpiresActive On                  # enable expirations
      +   ExpiresByType image/gif A2592000  # expire GIF images after a month
      +                                     #  in the client's cache
      +   ExpiresByType text/html M604800   # HTML documents are good for a
      +                                     #  week from the time they were
      +                                     #  changed, period
      +  
      +

      +

      + Note that this directive only has effect if ExpiresActive + On has been specified. It overrides, for the specified MIME + type only, any expiration date set by the + ExpiresDefault + directive. +

      +
      + +

      ExpiresDefault directive

      +
      + +

      + Syntax: ExpiresDefault <code>seconds +
      + Context: server config, virtual host, directory, .htaccess +
      + Override: Indexes +
      + Status: Extension +
      + Module: mod_expires +

      +

      + This directive sets the default algorithm for calculating the + expiration time for all documents in the affected realm. It can be + overridden on a type-by-type basis by the + ExpiresByType + directive. See the description of that directive for details about + the syntax of the argument. +

      + + + diff --git a/docs/manual/mod/mod_headers.html b/docs/manual/mod/mod_headers.html new file mode 100644 index 00000000000..db25271583b --- /dev/null +++ b/docs/manual/mod/mod_headers.html @@ -0,0 +1,97 @@ + + + +Apache module mod_headers + + + + +

      Headers Module

      + +The optional headers module allows for the customisation of HTTP +response headers. Headers can be merged, replaced or removed. The +directives described in this document are only available if Apache is +compiled with mod_headers.c. + +
      + +

      Directive

      + + +
      + +

      Header

      +Sytnax: Header [ set | append | add ] header value
      +Sytnax: Header unset header
      +Context: server config, virtual host, access.conf, .htaccess
      +Status: optional
      +Module: mod_header

      + +This directive can replace, merge or remove HTTP response headers. The +action it performs is determined by the first argument. This can be one +of the following values: + +

        +
      • set
        + The response header is set, replacing any previous header with this name + +
      • append
        + The response header is appended to any existing header of the same + name. When a new value is merged onto an existing header it is + separated from the existing header with a comma. This is the HTTP standard + way of giving a header multiple values. + +
      • add
        + The response header is added to the existing set of headers, even if + this header already exists. This can result in two (or more) headers + having the same name. This can lead to unforseen consequences, and in + general "append" should be used instead. + +
      • unset
        + The response header of this name is removed, if it exists. If there are + multiple headers of the same name, only the first one set will be removed. +
      + +This argument is followed by a header name, which can include the +final colon, but it is not required. Case is ignored. For +add, append and set a value is given as the third argument. If this +value contains spaces, it should be surrounded by double quotes. +For unset, no value should be given. + +

      Order of Processing

      + +The Header directive can occur almost anywhere within the server +configuration. It is valid in the main server config and virtual host +sections, inside <Directory>, <Location> and <Files> +sections, and within .htaccess files. +

      +The Header directives are processed in the following order: +

        +
      1. main server +
      2. virtual host +
      3. <Directory> sections and .htaccess +
      4. <Location> +
      5. <Files> +
      + +Order is important. These two headers have a different effect if reversed: +
      +Header append Author "John P. Doe"
      +Header unset Author
      +
      + +This way round, the Author header is not set. If reversed, the Author +header is set to "John P. Doe". +

      + +The Header directives are processed just before the response is sent +by its handler. These means that some headers that are added just +before the response is sent cannot be unset or overridden. This +includes headers such as "Date" and "Server". +

      + + + + diff --git a/docs/manual/mod/mod_isapi.html b/docs/manual/mod/mod_isapi.html new file mode 100644 index 00000000000..5fde08c1a7d --- /dev/null +++ b/docs/manual/mod/mod_isapi.html @@ -0,0 +1,73 @@ + + + +Apache module mod_isapi + + + + + + +

      Module mod_isapi

      + +

      This module is contained in the mod_isapi.c file, and is + compiled in by default. It provides support for ISAPI Extensions when + running under Microsoft Windows. Any document with a handler of + isapi-isa will be processed by this module. + +

      Purpose

      + +

      This module implements the ISAPI + Extension API. It allows Internet Server Applications (i.e., ISAPI + Extensions) to be used with Apache for Windows. + +

      Usage

      + +

      In the server configuration file, add a handler called + isapi-isa, and map it to files with a .DLL + extension. In other words:

      +
      +    AddHandler isapi-isa dll
      +
      +

      Now simply place the ISA DLLs into your document root, and they will + be loaded when their URLs are accessed.

      + +

      ISAPI Extensions are governed by the same restrictions as CGI + scripts. That is, Options ExecCGI must be active in the + directory that contains the ISA.

      + +

      Notes

      + +

      Apache's ISAPI implementation conforms to all of the ISAPI 2.0 + specification, except for the "Microsoft-specific" extensions dealing + with ascynchronous I/O. Apache's I/O model does not allow asynchronous + reading and writing in a manner that the ISAPI could access. If an ISA + tries to access async I/O, a message will be place in the error log, + to help with debugging. + +

      Some servers, like Microsoft IIS, load the ISA into the server, and + keep it loaded until memory usage is too high, and it is + unloaded. Apache currently loads and unloads the ISA for each + request. This is inefficient, but Apache's request model makes this + method the only method that currently works. A future release may use + a more effective loading method. + +

      Apache 1.3a1 currently limits POST and PUT input to 48k per + request. This is to work around a problem with the ISAPI implementation + that could result in a denial of service attack. It is expected that + support for larger uploads will be added soon. + +

      Also, remember that while Apache supports ISAPI Extensions, it does + not support ISAPI Filters. Support for filters may be added at a later + date, but no support is planned at this time.

      + + + + diff --git a/docs/manual/mod/mod_log_agent.html b/docs/manual/mod/mod_log_agent.html deleted file mode 100644 index 47de105b5a6..00000000000 --- a/docs/manual/mod/mod_log_agent.html +++ /dev/null @@ -1,54 +0,0 @@ - - - - -Apache module mod_log_agent - - - - - -

      Module mod_log_agent

      - -This module is contained in the mod_log_agent.c file, and is not -compiled in by default. It provides for logging of the client user agents. - - - -
      - - -

      AgentLog

      - -Syntax: AgentLog file-pipe
      -Default: AgentLog logs/agent_log
      -Context: server config, virtual host
      -Status: Extension
      -Module: mod_log_agent

      - -The AgentLog directive sets the name of the file to which the server will -log the UserAgent header of incoming requests. File-pipe is one -of -

      A filename -
      A filename relative to the ServerRoot. -
      `|' followed by a command -
      A program to receive the agent log information on its standard input. -Note the a new program will not be started for a VirtualHost if it inherits -the AgentLog from the main server. -
      -Security: if a program is used, then it will be -run under the user who started httpd. This will be root if the server -was started by root; be sure that the program is secure.

      - -This directive is provided for compatibility with NCSA 1.4.

      - -


      - -Home -Index - - - - diff --git a/docs/manual/mod/mod_log_referer.html b/docs/manual/mod/mod_log_referer.html deleted file mode 100644 index c16e60a0a63..00000000000 --- a/docs/manual/mod/mod_log_referer.html +++ /dev/null @@ -1,80 +0,0 @@ - - - - -Apache module mod_log_referer - - - - - -

      Module mod_log_referer

      - -This module is contained in the mod_log_referer.c file, and is not -compiled in by default. It provides for logging of the documents which -reference documents on the server. - -

      Log file format

      -The log file contains a separate line for each refer. Each line has the -format -
      uri -> document
      -where uri is the (%-escaped) URI for the document that references -the one requested by the client, and document is the (%-decoded) -local URL to the document being referred to. - - -

      Directives

      - -
      - - -

      RefererIgnore

      - -Syntax: RefererIgnore string string ...
      -Context: server config, virtual host
      -Status: Extension
      -Module: mod_log_referer

      - -The RefererIgnore directive adds to the list of strings to ignore in -Referer headers. If any of the strings in the list is contained in -the Referer header, then no referrer information will be logged for the -request. Example: -

      RefererIgnore www.ncsa.uiuc.edu
      -This avoids logging references from www.ncsa.uiuc.edu. -


      - - -

      RefererLog

      - -Syntax: RefererLog file-pipe
      -Default: RefererLog logs/referer_log
      -Context: server config, virtual host
      -Status: Extension
      -Module: mod_log_referer

      - -The RefererLog directive sets the name of the file to which the server will -log the Referer header of incoming requests. File-pipe is one -of -

      A filename -
      A filename relative to the ServerRoot. -
      `|' followed by a command -
      A program to receive the referrer log information on its standard input. -Note the a new program will not be started for a VirtualHost if it inherits -the RefererLog from the main server. -
      -Security: if a program is used, then it will be -run under the user who started httpd. This will be root if the server -was started by root; be sure that the program is secure.

      - -This directive is provided for compatibility with NCSA 1.4.

      - -


      -Home -Index - - - - diff --git a/docs/manual/mod/mod_mime_magic.html b/docs/manual/mod/mod_mime_magic.html new file mode 100644 index 00000000000..56fade84863 --- /dev/null +++ b/docs/manual/mod/mod_mime_magic.html @@ -0,0 +1,251 @@ + + + + Apache module mod_mime_magic + + + +

      Module mod_mime_magic

      + + This module is an optional extension to the Apache HTTPD server. + The current version can be obtained from + + http://www.employees.org/~ikluft/apache/mod_mime_magic/. + +

      Summary

      + + This module is derived from a free version of the file(1) + command for Unix, + which uses "magic numbers" and other hints from a file's contents to + figure out what the contents are. + In the case of this module, + it tries to figure out the MIME type of the file. +

      + This module is only active if the magic file exists and + was successfully opened at server-configuration time. + The magic file can be named by the + MimeMagicFile + directive or defaults to conf/magic. +

      + The contents of the file are plain ASCII text in 4-5 columns. + Blank lines are allowed but ignored. + Commented lines use a hash mark "#". + The remaining lines are parsed for the following columns: + + + + + + + + + + + + + + + + + + + + + +
      ColumnDescription
      1byte number to begin checking from +
      + ">" indicates a dependency upon the previous non-">" line
      2type of data to match + + + + + + + + + + + + +
      bytesingle character
      shortmachine-order 16-bit integer
      longmachine-order 32-bit integer
      stringarbitrary-length string
      datelong integer date + (seconds since Unix epoch/1970)
      beshortbig-endian 16-bit integer
      belongbig-endian 32-bit integer
      bedatebig-endian 32-bit integer date
      leshortlittle-endian 16-bit integer
      lelonglittle-endian 32-bit integer
      ledatelittle-endian 32-bit integer date
      +
      3contents of data to match
      4MIME type if matched
      5MIME encoding if matched (optional)
      + +

      + For example, the following magic file lines + would recognize some audio formats. + +

      +# Sun/NeXT audio data
      +0       string          .snd
      +>12     belong          1               audio/basic
      +>12     belong          2               audio/basic
      +>12     belong          3               audio/basic
      +>12     belong          4               audio/basic
      +>12     belong          5               audio/basic
      +>12     belong          6               audio/basic
      +>12     belong          7               audio/basic
      +>12     belong          23              audio/x-adpcm
      +
      + + Or these would recognize the difference between "*.doc" files containing + Microsoft Word or FrameMaker documents. (These are incompatible file + formats which use the same file suffix.) + +
      +# Frame
      +0       string          \<MakerFile     application/x-frame
      +0       string          \<MIFFile       application/x-frame
      +0       string          \<MakerDictionary       application/x-frame
      +0       string          \<MakerScreenFon        application/x-frame
      +0       string          \<MML           application/x-frame
      +0       string          \<Book          application/x-frame
      +0       string          \<Maker         application/x-frame
      +
      +# MS-Word
      +0       string          \376\067\0\043                  application/msword
      +0       string          \320\317\021\340\241\261        application/msword
      +0       string          \333\245-\0\0\0                 application/msword
      +
      + + An optional MIME encoding can be included as a fifth column. + For example, this can recognize gzipped files and set the encoding + for them. + +
      +# gzip (GNU zip, not to be confused with [Info-ZIP/PKWARE] zip archiver)
      +0       string          \037\213        application/octet-stream        x-gzip
      +
      + +

      Performance Issues

      + + This module is not for every system. If your system is barely keeping + up with its load or if you're performing a web server benchmark, + you may not want to enable this because the processing is not free. +

      + However, an effort was made to improve the performance of the original + file(1) code to make it fit in a busy web server. + It was designed for a server where there are thousands of users who + publish their own documents. + This is probably very common on intranets. + Many times, it's helpful + if the server can make more intelligent decisions about a file's + contents than the file name allows + ...even if just to reduce the "why doesn't my page work" calls + when users improperly name their own files. + You have to decide if the extra work suits your environment. +

      + When compiling an Apache server, this module should be at or near the + top of the list of modules in the Configuration file. The modules are + listed in increasing priority so that will mean this one is used only + as a last resort, just like it was designed to. + +

      Directives

      +

      +

      +

      +
      +

      + MimeMagicFile +

      +

      + Syntax: MimeMagicFile magic-file-name +
      + Default: conf/magic +
      + Context: server config, virtual host +
      + Status: Extension +
      + Module: mod_mime_magic +

      + + The MimeMagicFile directive can be used to change the location of the + magic file from its default location at conf/magic. + Non-rooted paths are relative to the ServerRoot. +

      +


      + +

      Notes

      + + Patches and suggestions for mod_mime_magic should be sent to + Ian Kluft <ikluft@cisco.com>. + Note that enhancements are done on a volunteer basis so no timetable can + be committed for any particular request. + Obviously, patches are given much higher priority over plain requests. +

      + The following notes apply to the mod_mime_magic module and are + included here for compliance with contributors' copyright restrictions + that require their acknowledgement. + +

      +/*
      + * mod_mime_magic: MIME type lookup via file magic numbers
      + * Copyright (c) 1996-1997 Cisco Systems, Inc.
      + *
      + * This software was submitted by Cisco Systems to the Apache Group in July
      + * 1997.  Future revisions and derivatives of this source code must
      + * acknowledge Cisco Systems as the original contributor of this module.
      + * All other licensing and usage conditions are those of the Apache Group.
      + *
      + * Some of this code is derived from the free version of the file command
      + * originally posted to comp.sources.unix.  Copyright info for that program
      + * is included below as required.
      + * ---------------------------------------------------------------------------
      + * - Copyright (c) Ian F. Darwin, 1987. Written by Ian F. Darwin.
      + *
      + * This software is not subject to any license of the American Telephone and
      + * Telegraph Company or of the Regents of the University of California.
      + *
      + * Permission is granted to anyone to use this software for any purpose on any
      + * computer system, and to alter it and redistribute it freely, subject to
      + * the following restrictions:
      + *
      + * 1. The author is not responsible for the consequences of use of this
      + * software, no matter how awful, even if they arise from flaws in it.
      + *
      + * 2. The origin of this software must not be misrepresented, either by
      + * explicit claim or by omission.  Since few users ever read sources, credits
      + * must appear in the documentation.
      + *
      + * 3. Altered versions must be plainly marked as such, and must not be
      + * misrepresented as being the original software.  Since few users ever read
      + * sources, credits must appear in the documentation.
      + *
      + * 4. This notice may not be removed or altered.
      + * -------------------------------------------------------------------------
      + *
      + * For complicance with Mr Darwin's terms: this has been very significantly
      + * modified from the free "file" command.
      + * - all-in-one file for compilation convenience when moving from one
      + *   version of Apache to the next.
      + * - Memory allocation is done through the Apache API's pool structure.
      + * - All functions have had necessary Apache API request or server
      + *   structures passed to them where necessary to call other Apache API
      + *   routines.  (i.e. usually for logging, files, or memory allocation in
      + *   itself or a called function.)
      + * - struct magic has been converted from an array to a single-ended linked
      + *   list because it only grows one record at a time, it's only accessed
      + *   sequentially, and the Apache API has no equivalent of realloc().
      + * - Functions have been changed to get their parameters from the server
      + *   configuration instead of globals.  (It should be reentrant now but has
      + *   not been tested in a threaded environment.)
      + * - Places where it used to print results to stdout now saves them in a
      + *   list where they're used to set the MIME type in the Apache request
      + *   record.
      + * - Command-line flags have been removed since they will never be used here.
      + *
      + */
      +
      + + + diff --git a/docs/manual/mod/mod_rewrite.html b/docs/manual/mod/mod_rewrite.html new file mode 100644 index 00000000000..b7c4e633a00 --- /dev/null +++ b/docs/manual/mod/mod_rewrite.html @@ -0,0 +1,1083 @@ + + + + + +Apache module mod_rewrite + + + + +

      Module mod_rewrite (Version 2.3)

      + +This module is contained in the mod_rewrite.c file, with +Apache 1.2 and later. It provides +a rule-based rewriting engine to rewrite requested URLs on the fly. +mod_rewrite is not compiled into the server by +default. To use mod_rewrite you have to enable the following +line in the server build Configuration file: +
      +    Module  rewrite_module   mod_rewrite.o
      +
      + +

      Summary

      + +This module uses a rule-based rewriting engine (based on a +regular-expression parser) to rewrite requested URLs on the fly. + +

      +It supports an unlimited number of additional rule conditions (which can +operate on a lot of variables, including HTTP headers) for granular +matching and external database lookups (either via plain text +tables, DBM hash files or external processes) for advanced URL +substitution. + +

      +It operates on the full URLs (including the PATH_INFO part) both in +per-server context (httpd.conf) and per-dir context (.htaccess) and even +can generate QUERY_STRING parts on result. The rewrittten result can lead to internal sub-processing, external request redirection or to internal proxy throughput. + + +

      +The latest version can be found on
      + +http://www.engelschall.com/sw/mod_rewrite/ + +

      +Copyright © 1996 The Apache Group, All rights reserved.
      +Copyright © 1996 Ralf S. Engelschall, All rights reserved. +

      +Written for The Apache Group by +

      + Ralf S. Engelschall
      + rse@engelschall.com
      + http://www.engelschall.com/~rse +
      + + +
      + + +

      +

      Directives

      + + + + +
      + + + +
      + +

      Configuration Directives

      +
      +
      + +

      RewriteEngine

      +Syntax: RewriteEngine {on,off}
      +Default: RewriteEngine off
      +Context: server config, virtual host, per-directory config
      +

      + +The RewriteEngine directive enables or disables the +runtime rewriting engine. If it is set to off this module does +no runtime processing at all. It does not even update the SCRIPT_URx +environment variables. + +

      +Use this directive to disable the module instead of commenting out +all RewriteRule directives! + +

      +


      +

      + +

      RewriteOptions

      +Syntax: RewriteOptions Option ...
      +Default: -None-
      +Context: server config, virtual host, per-directory config
      +

      + +The RewriteOption directive sets some special options for the +current per-server or per-directory configuration. The Option +strings can be one of the following: + +

        +
      • 'inherit'
        + This forces the current configuration to inherit the configuration of the + parent. In per-virtual-server context this means that the maps, + conditions and rules of the main server gets inherited. In per-directory + context this means that conditions and rules of the parent directory's + .htaccess configuration gets inherited. +

        +

      + +

      +


      +

      + +

      RewriteLog

      +Syntax: RewriteLog Filename
      +Default: -None-
      +Context: server config, virtual host
      +

      + +The RewriteLog directive sets the name of the file to which the +server logs any rewriting actions it performs. If the name does not begin +with a slash ('/') then it is assumed to be relative to the +Server Root. The directive should occur only once per server +config. + +

      + + +
      +To disable the logging of rewriting actions it is not recommended +to set Filename +to /dev/null, because although the rewriting engine does +not create output to a logfile it still creates the logfile +output internally. This will slow down the server with no advantage to the +administrator! +To disable logging either remove or comment out the +RewriteLog directive or use RewriteLogLevel 0! +
      + +

      +Example: +

      +
      +RewriteLog "/usr/local/var/apache/logs/rewrite.log"
      +
      +
      + +

      +


      +

      + +

      RewriteLogLevel

      +Syntax: RewriteLogLevel Level
      +Default: RewriteLogLevel 0
      +Context: server config, virtual host
      +

      + +The RewriteLogLevel directive set the verbosity level of the rewriting +logfile. The default level 0 means no logging, while 9 or more means +that practically all actions are logged. + +

      +To disable the logging of rewriting actions simply set Level to 0. +This disables all rewrite action logs. + +

      + + +
      +Notice: Using a high value for Level will slow down your Apache +server dramatically! Use the rewriting logfile only for debugging or at least +at Level not greater than 2! +
      + + +

      +Example: +

      +
      +RewriteLogLevel 3
      +
      +
      + +

      +


      +

      + +

      RewriteMap

      +Syntax: RewriteMap Mapname {txt,dbm,prg}:Filename
      +Default: not used per default
      +Context: server config, virtual host
      +

      + +The RewriteMap directive defines an external Rewriting Map +which can be used inside rule substitution strings by the mapping-functions +to insert/substitute fields through a key lookup. +

      + +The Mapname is the name of the map and will +be used to specify a mapping-function for the substitution strings of a +rewriting rule via + +

      +${ Mapname : LookupKey +| DefaultValue } +
      + +When such a directive occurs the map Mapname +is consulted and the key LookupKey is looked-up. If the key is +found, the map-function directive is substituted by SubstValue. If +the key is not found then it is substituted by DefaultValue. + +

      +The Filename must be a valid Unix filepath, containing one +of the following formats: + +

        +
      1. Plain Text Format +

        + This is a ASCII file which contains either blank lines, comment lines + (starting with a '#' character) or + +

        + MatchingKey SubstValue +
        + + pairs - one per line. You can create such files either manually, + using your favorite editor, or by using the programs + mapcollect and mapmerge from the support + directory of the mod_rewrite distribution. +

        + To declare such a map prefix, Filename with a txt: + string as in the following example: + +

        + + +
        +#
        +#   map.real-to-user -- maps realnames to usernames
        +#
        +
        +Ralf.S.Engelschall    rse   # Bastard Operator From Hell 
        +Dr.Fred.Klabuster     fred  # Mr. DAU
        +
        + +

        + + +
        +RewriteMap real-to-host txt:/path/to/file/map.real-to-user
        +
        + +

        +

      2. DBM Hashfile Format +

        + This is a binary NDBM format file containing the + same contents as the Plain Text Format files. You can create + such a file with any NDBM tool or with the dbmmanage program + from the support directory of the Apache distribution. +

        + To declare such a map prefix Filename with a dbm: + string. +

        +

      3. Program Format +

        + This is a Unix executable, not a lookup file. To create it you can use + the language of your choice, but the result has to be a runable Unix + binary (i.e. either object-code or a script with the + magic cookie trick '#!/path/to/interpreter' as the first line). +

        + This program gets started once at startup of the Apache servers and then + communicates with the rewriting engine over its stdin and + stdout filehandles. For each map-function lookup it will + receive the key to lookup as a newline-terminated string on + stdin. It then has to give back the looked-up value as a + newline-terminated string on stdout or the four-character string + ``NULL'' if it fails (i.e. there is no corresponding value + for the given key). A trivial program which will implement a 1:1 map + (i.e. key == value) could be: +

        + + +
        +#!/usr/bin/perl
        +$| = 1;
        +while (<STDIN>) {
        +    # ...here any transformations 
        +    # or lookups should occur...
        +    print $_;
        +}
        +
        +

        + But be very careful:
        +

          +
        1. ``Keep the program simple, stupid'' (KISS), because + if this program hangs it will lead to a hang of the Apache server + when the rule occurs. +
        2. Avoid one common mistake: never do buffered I/O on stdout! + This will cause a deadloop! Hence the ``$|=1'' in the above + example... +
        +

        + To declare such a map prefix Filename with a prg: + string. +

      + +The RewriteMap directive can occur more than once. For each +mapping-function use one RewriteMap directive to declare its +rewriting mapfile. While you cannot declare a map in per-directory +context it is of course possible to use this map in per-directory +context. + +

      + + +
      +For plain text and DBM format files the looked-up keys are cached in-core +until the mtime of the mapfile changes or the server does a +restart. This way you can have map-functions in rules which are used +for every request. This is no problem, because the external lookup +only happens once! +
      + + +

      +


      +

      + +

      RewriteBase

      +Syntax: RewriteBase BaseURL
      +Default: default is the physical directory path
      +Context: per-directory config
      +

      + +The RewriteBase directive explicitly sets the base URL for +per-directory rewrites. As you will see below, RewriteRule can be +used in per-directory config files (.htaccess). There it will act +locally, i.e. the local directory prefix is stripped at this stage of +processing and your rewriting rules act only on the remainder. At the end +it is automatically added. + +

      +When a substitution occurs for a new URL, this module has to +re-inject the URL into the server processing. To be able to do this it needs +to know what the corresponding URL-prefix or URL-base is. By default this +prefix is the corresponding filepath itself. But at most websites URLs are +NOT directly related to physical filename paths, so this assumption +will be usually be wrong! There you have to use the RewriteBase +directive to specify the correct URL-prefix. + +

      + + +
      +So, if your webserver's URLs are not directly +related to physical file paths, you have to use RewriteBase in every +.htaccess files where you want to use RewriteRule +directives. +
      + +

      +Example: + +

      + Assume the following per-directory config file: + +

      + + +
      +#
      +#  /abc/def/.htaccess -- per-dir config file for directory /abc/def
      +#  Remember: /abc/def is the physical path of /xyz, i.e. the server
      +#            has a 'Alias /xyz /abc/def' directive e.g.
      +#
      +
      +RewriteEngine On
      +
      +#  let the server know that we are reached via /xyz and not 
      +#  via the physical path prefix /abc/def
      +RewriteBase   /xyz
      +
      +#  now the rewriting rules
      +RewriteRule   ^oldstuff\.html$  newstuff.html
      +
      + +

      +In the above example, a request to /xyz/oldstuff.html gets correctly +rewritten to the physical file /abc/def/newstuff.html. + +

      + + +
      + +For the Apache hackers:
      +The following list gives detailed information about the internal +processing steps: + +

      +

      +Request:
      +  /xyz/oldstuff.html
      +
      +Internal Processing:
      +  /xyz/oldstuff.html     -> /abc/def/oldstuff.html    (per-server Alias)
      +  /abc/def/oldstuff.html -> /abc/def/newstuff.html    (per-dir    RewriteRule)
      +  /abc/def/newstuff.html -> /xyz/newstuff.html        (per-dir    RewriteBase)
      +  /xyz/newstuff.html     -> /abc/def/newstuff.html    (per-server Alias)
      +
      +Result:
      +  /abc/def/newstuff.html
      +
      + +This seems very complicated but is the correct Apache internal processing, +because the per-directory rewriting comes too late in the process. So, +when it occurs the (rewritten) request has to be re-injected into the Apache +kernel! BUT: While this seems like a serious overhead, it really isn't, because +this re-injection happens fully internal to the Apache server and the same +procedure is used by many other operations inside Apache. So, you can be +sure the design and implementation is correct. +
      +
      + +

      + + +

      +


      +

      + +

      RewriteCond

      +Syntax: RewriteCond TestString CondPattern
      +Default: -None-
      +Context: server config, virtual host, per-directory config
      +

      + +The RewriteCond directive defines a rule condition. Precede a +RewriteRule directive with one ore more RewriteCond +directives. + +The following rewriting rule is only used if its pattern matches the current +state of the URI AND if these additional conditions apply, too. + +

      +TestString is a string which contains server-variables of the form + +

      +%{ NAME_OF_VARIABLE } +
      + +where NAME_OF_VARIABLE can be a string +of the following list: + +

      + + + + + + + + + + + + + + + +
      +HTTP headers:

      + +HTTP_USER_AGENT
      +HTTP_REFERER
      +HTTP_COOKIE
      +HTTP_FORWARDED
      +HTTP_HOST
      +HTTP_PROXY_CONNECTION
      +HTTP_ACCEPT
      +
      +

      +connection & request:

      + +REMOTE_ADDR
      +REMOTE_HOST
      +REMOTE_USER
      +REMOTE_IDENT
      +REQUEST_METHOD
      +SCRIPT_FILENAME
      +PATH_INFO
      +QUERY_STRING
      +AUTH_TYPE
      +
      +

      +server internals:

      + +DOCUMENT_ROOT
      +SERVER_ADMIN
      +SERVER_NAME
      +SERVER_PORT
      +SERVER_PROTOCOL
      +SERVER_SOFTWARE
      +SERVER_VERSION
      +
      +

      +system stuff:

      + +TIME_YEAR
      +TIME_MON
      +TIME_DAY
      +TIME_HOUR
      +TIME_MIN
      +TIME_SEC
      +TIME_WDAY
      +
      +

      +specials:

      + +API_VERSION
      +THE_REQUEST
      +REQUEST_URI
      +REQUEST_FILENAME
      +
      +

      + +

      + + +
      +These variables all correspond to the similar named HTTP MIME-headers, C +variables of the Apache server or struct tm fields of the Unix +system. +
      + +

      +Special Notes: +

        +
      1. The variables SCRIPT_FILENAME and REQUEST_FILENAME contain the same +value, i.e. the value of the filename field of the internal +request_rec structure of the Apache server. The first name is just the +commonly known CGI variable name while the second is the consistent +counterpart to REQUEST_URI (which contains the value of the uri +field of request_rec). + +

        +

      2. There is an additional format: %{ENV:variable} where +variable can be any Unix environment variable. This is looked-up +via getenv() from the Apache server process. + +

        +

      3. There is one more additional format: %{HTTP:header} where +header can be any HTTP MIME-header name. This is looked-up +from the HTTP request. Example: %{HTTP:Proxy-Connection} +is the value of the HTTP header ``Proxy-Connection:''. +
      + + +

      +CondPattern is the condition pattern, i.e. a regular expression +which gets applied to the current instance of the TestString, i.e. +TestString gets evaluated and then matched against +CondPattern. + +

      +Remember: CondPattern is a standard +Extended Regular Expression with some additions: + +

        +
      1. You can precede the pattern string with a '!' character +(exclamation mark) to specify a non-matching pattern. + +

        +

      2. +There are some special variants of CondPatterns. Instead of real +regular expression strings you can also use one of the following: +

        +

          +
        • '-d' (is directory)
          +Treats the TestString as a pathname and +tests if it exists and is a directory. +

          +

        • '-f' (is regular file)
          +Treats the TestString as a pathname and +tests if it exists and is a regular file. +

          +

        • '-s' (is regular file with size)
          +Treats the TestString as a pathname and +tests if it exists and is a regular file with size greater then zero. +

          +

        • '-l' (is symbolic link)
          +Treats the TestString as a pathname and +tests if it exists and is a symbolic link. +
        +

        +Notice: All of these tests can also be prefixed by a not ('!') character +to negate their meaning. +

      + +

      +Additionally you can set special flags for CondPattern by appending + +

      +[flags] +
      + +as the third argument to the RewriteCond directive. Flags +is a comma-separated list of the following flags: + +
        +
      • 'nocase|NC' (no case)
        + This makes the condition test case-insensitive, i.e. there is + no difference between 'A-Z' and 'a-z' both in the expanded + TestString and the CondPattern. +

        +

      • 'ornext|OR' (or next condition)
        + Use this to combine rule conditions with a local OR instead of the + implicit AND. Typical example: +

        +

        +RewriteCond %{REMOTE_HOST}  ^host1.*  [OR]
        +RewriteCond %{REMOTE_HOST}  ^host2.*  [OR]
        +RewriteCond %{REMOTE_HOST}  ^host3.*  
        +RewriteRule ...some special stuff for any of these hosts...
        +
        + Without this flag you had to write down the cond/rule three times. +

        +

      + +

      +Example: +

      + +To rewrite the Homepage of a site according to the ``User-Agent:'' +header of the request, you can use the following: + +
      +RewriteCond  %{HTTP_USER_AGENT}  ^Mozilla.*
      +RewriteRule  ^/$                 /homepage.max.html  [L]
      +
      +RewriteCond  %{HTTP_USER_AGENT}  ^Lynx.*
      +RewriteRule  ^/$                 /homepage.min.html  [L]
      +
      +RewriteRule  ^/$                 /homepage.std.html  [L]
      +
      + +Interpretation: If you use Netscape Navigator as your browser (which identifies +itself as 'Mozilla'), then you get the max homepage, which includes +Frames, etc. If you use the Lynx browser (which is Terminal-based), then you +get the min homepage, which contains no images, no tables, etc. If you +use any other browser you get the standard homepage. +
      +

      + +

      +


      +

      + +

      RewriteRule

      +Syntax: RewriteRule Pattern Substitution
      +Default: -None-
      +Context: server config, virtual host, per-directory config
      + +

      +The RewriteRule directive is the real rewriting workhorse. The +directive can occur more than once. Each directive then defines one single +rewriting rule. The definition order of these rules is +important, because this order is used when applying the rules at +run-time. + +

      +Pattern can be (for Apache 1.1.x a System +V8 and for Apache 1.2.x a POSIX) regular expression +which gets applied to the current URL. Here ``current'' means the value of the +URL when this rule gets applied. This may not be the original requested +URL, because there could be any number of rules before which already matched +and made alterations to it. + +

      +Some hints about the syntax of regular expressions: + +

      + + + + +
      +
      +^           Start of line
      +$           End of line
      +.           Any single character
      +[chars]     One of chars 
      +[^chars]    None of chars 
      +
      +?           0 or 1 of the preceding char
      +*           0 or N of the preceding char
      ++           1 or N of the preceding char
      +
      +\char       escape that specific char 
      +            (e.g. for specifying the chars ".[]()" etc.)
      +
      +(string)    Grouping of chars (the Nth group can be used on the RHS with $N)
      +
      +
      + +

      +Additionally the NOT character ('!') is a possible pattern +prefix. This gives you the ability to negate a pattern; to say, for instance: ``if +the current URL does NOT match to this pattern''. This can be used +for special cases where it is better to match the negative pattern or as a +last default rule. + +

      + + +
      +Notice! When using the NOT character to negate a pattern you cannot +have grouped wildcard parts in the pattern. This is impossible because when +the pattern does NOT match, there are no contents for the groups. In +consequence, if negated patterns are used, you cannot use $N in the +substitution string! +
      + +

      +Substitution of a rewriting rule is the string +which is substituted for (or replaces) the original URL for which +Pattern matched. Beside plain text you can use + +

        +
      1. pattern-group back-references ($N) +
      2. server-variables as in rule condition test-strings (%{VARNAME}) +
      3. mapping-function calls (${mapname:key|default}) +
      + +Back-references are $N (N=1..9) identifiers which +will be replaced by the contents of the Nth group of the matched +Pattern. The server-variables are the same as for the +TestString of a RewriteCond directive. The +mapping-functions come from the RewriteMap directive and are +explained there. These three types of variables are expanded in the order of +the above list. + +

      +As already mentioned above, all the rewriting rules are applied to the +Substitution (in the order of definition in the config file). The +URL is completely replaced by the Substitution and the +rewriting process goes on until there are no more rules (unless explicitly +terminated by a L flag - see below). + +

      +There is a special substitution string named '-' which means: +NO substitution! Sounds silly? No, it is useful to provide rewriting +rules which only match some URLs but do no substitution, e.g. in +conjunction with the C (chain) flag to be able to have more than one +pattern to be applied before a substitution occurs. + +

      + + +
      +Notice: There is a special feature. When you prefix a substitution +field with http://thishost[:thisport] then +mod_rewrite automatically strips it out. This auto-reduction on +implicit external redirect URLs is a useful and important feature when +used in combination with a mapping-function which generates the hostname +part. Have a look at the first example in the example section below to +understand this. +

      +Remember: An unconditional external redirect to your own server will +not work with the prefix http://thishost because of this feature. +To achieve such a self-redirect, you have to use the R-flag (see +below). +

      + +

      +Additionally you can set special flags for Substitution by appending + +

      +[flags] +
      + +as the third argument to the RewriteRule directive. Flags is a +comma-separated list of the following flags: + +
        +
      • 'redirect|R' (force redirect)
        + Prefix Substitution + with http://thishost/ (which makes the new URL a URI) to + force a external redirection. Use it for rules which should + canonicalize the URL and gives it back to the client, e.g. translate + ``/~'' into ``/u/'' or always append a slash to + /u/user, etc.
        + Notice: When you use this flag, make sure that the + substitution field is a valid URL! If not, you are redirecting to an invalid + location! +

        +

      • 'proxy|P' (force proxy)
        + This flag forces the substitution part to be internally forced as a proxy + request and immediately (i.e. rewriting rule processing stops here) put + through the proxy module. You have to make sure that the substitution + string is a valid URI (e.g. typically http://) which can + be handled by the Apache proxy module. If not you get an error from + the proxy module. Use this flag to achieve a more powerful implementation + of the mod_proxy directive ProxyPass, to map + some remote stuff into the namespace of the local server. +

        +

      • 'last|L' (last rule)
        + Stop the rewriting process here and + don't apply any more rewriting rules. This corresponds to the Perl + last command or the break command from the C + language. Use this flag to prevent the currently rewritten URL from being + rewritten further by following rules which may be wrong. For + example, use it to rewrite the root-path URL ('/') to a real + one, e.g. '/e/www/'. +

        +

      • 'next|N' (next round)
        + Re-run the rewriting process (starting again with the first rewriting + rule). Here the URL to match is again not the original URL but the URL + from the last rewriting rule. This corresponds to the Perl + next command or the continue command from the C + language. Use this flag to restart the rewriting process, i.e. to + immediately go to the top of the loop.
        + But be careful not to create a deadloop! +

        +

      • 'chain|C' (chained with next rule)
        + This flag chains the current rule with the next rule (which itself can + also be chained with its following rule, etc.). This has the following + effect: if a rule matches, then processing continues as usual, i.e. the + flag has no effect. If the rule does not match, then all following + chained rules are skipped. For instance, use it to remove the + ``.www'' part inside a per-directory rule set when you let an + external redirect happen (where the ``.www'' part should not to + occur!). +

        +

      • 'type|T=mime-type' (force MIME type)
        + Force the MIME-type of the target file to be mime-type. For + instance, this can be used to simulate the old mod_alias + directive ScriptAlias which internally forces all files inside + the mapped directory to have a MIME type of + ``application/x-httpd-cgi''. +

        +

      • 'nosubreq|NS' (used only if no internal sub-request)
        + This flag forces the rewriting engine to skip a rewriting rule if the + current request is an internal sub-request. For instance, sub-requests + occur internally in Apache when mod_include tries to find out + information about possible directory default files (index.xxx). + On sub-requests it is not always useful and even sometimes causes a failure to + if the complete set of rules are applied. Use this flag to exclude some rules.
        +

        + Use the following rule for your decision: whenever you prefix some URLs + with CGI-scripts to force them to be processed by the CGI-script, the + chance is high that you will run into problems (or even overhead) on sub-requests. + In these cases, use this flag. +

        +

      • 'passthrough|PT' (pass through to next handler)
        + This flag forces the rewriting engine to set the uri field + of the internal request_rec structure to the value + of the filename field. This flag is just a hack to be able + to post-process the output of RewriteRule directives by + Alias, ScriptAlias, Redirect, etc. directives + from other URI-to-filename translators. A trivial example to show the + semantics: + If you want to rewrite /abc to /def via the rewriting + engine of mod_rewrite and then /def to /ghi + with mod_alias: +
        +    RewriteRule ^/abc(.*)  /def$1 [PT]
        +    Alias       /def       /ghi   
        +    
        + If you omit the PT flag then mod_rewrite + will do its job fine, i.e. it rewrites uri=/abc/... to + filename=/def/... as a full API-compliant URI-to-filename + translator should do. Then mod_alias comes and tries to do a + URI-to-filename transition which will not work. +

        + Notice: You have to use this flag if you want to intermix directives + of different modules which contain URL-to-filename translators. The + typical example is the use of mod_alias and + mod_rewrite.. + +

        + + +
        + + For the Apache hackers:
        + If the current Apache API had a + filename-to-filename hook additionally to the URI-to-filename hook then + we wouldn't need this flag! But without such a hook this flag is the + only solution. The Apache Group has discussed this problem and will + add such hooks into Apache version 2.0. +
        +
        +

        +

      • 'skip|S=num' (skip next rule(s))
        + This flag forces the rewriting engine to skip the next num rules + in sequence when the current rule matches. Use this to make pseudo + if-then-else constructs: The last rule of the then-clause becomes + a skip=N where N is the number of rules in the else-clause. + (This is not the same as the 'chain|C' flag!) +

        +

      + +

      + + +
      +Remember: Never forget that Pattern gets applied to a complete URL +in per-server configuration files. But in per-directory configuration +files, the per-directory prefix (which always is the same for a specific +directory!) gets automatically removed for the pattern matching and +automatically added after the substitution has been done. This feature is +essential for many sorts of rewriting, because without this prefix stripping +you have to match the parent directory which is not always possible. +

      +There is one exception: If a substitution string starts with +``http://'' then the directory prefix will be not added and a +external redirect or proxy throughput (if flag P is used!) is forced! +

      + +

      + + +
      +Notice! To enable the rewriting engine for per-directory configuration files +you need to set ``RewriteEngine On'' in these files and +``Option FollowSymLinks'' enabled. If your administrator has +disabled override of FollowSymLinks for a user's directory, then +you cannot use the rewriting engine. This restriction is needed for +security reasons. +
      + +

      +Here are all possible substitution combinations and their meanings: + +

      +Inside per-server configuration (httpd.conf)
      +for request ``GET /somepath/pathinfo'':

      + +

      + + + + +
      +
      +Given Rule                                      Resulting Substitution
      +----------------------------------------------  ----------------------------------
      +^/somepath(.*) otherpath$1                      not supported, because invalid!
      +
      +^/somepath(.*) otherpath$1  [R]                 not supported, because invalid!
      +
      +^/somepath(.*) otherpath$1  [P]                 not supported, because invalid!
      +----------------------------------------------  ----------------------------------
      +^/somepath(.*) /otherpath$1                     /otherpath/pathinfo
      +
      +^/somepath(.*) /otherpath$1 [R]                 http://thishost/otherpath/pathinfo
      +                                                via external redirection
      +
      +^/somepath(.*) /otherpath$1 [P]                 not supported, because silly!
      +----------------------------------------------  ----------------------------------
      +^/somepath(.*) http://thishost/otherpath$1      /otherpath/pathinfo
      +
      +^/somepath(.*) http://thishost/otherpath$1 [R]  http://thishost/otherpath/pathinfo
      +                                                via external redirection
      +
      +^/somepath(.*) http://thishost/otherpath$1 [P]  not supported, because silly!
      +----------------------------------------------  ----------------------------------
      +^/somepath(.*) http://otherhost/otherpath$1     http://otherhost/otherpath/pathinfo
      +                                                via external redirection
      +
      +^/somepath(.*) http://otherhost/otherpath$1 [R] http://otherhost/otherpath/pathinfo
      +                                                via external redirection
      +                                                (the [R] flag is redundant)
      +
      +^/somepath(.*) http://otherhost/otherpath$1 [P] http://otherhost/otherpath/pathinfo
      +                                                via internal proxy
      +
      +
      + +

      +Inside per-directory configuration for /somepath
      +(i.e. file .htaccess in dir /physical/path/to/somepath containing +RewriteBase /somepath)
      for +request ``GET /somepath/localpath/pathinfo'':

      + +

      + + + + +
      +
      +Given Rule                                      Resulting Substitution
      +----------------------------------------------  ----------------------------------
      +^localpath(.*) otherpath$1                      /somepath/otherpath/pathinfo
      +
      +^localpath(.*) otherpath$1  [R]                 http://thishost/somepath/otherpath/pathinfo
      +                                                via external redirection
      +
      +^localpath(.*) otherpath$1  [P]                 not supported, because silly!
      +----------------------------------------------  ----------------------------------
      +^localpath(.*) /otherpath$1                     /otherpath/pathinfo
      +
      +^localpath(.*) /otherpath$1 [R]                 http://thishost/otherpath/pathinfo
      +                                                via external redirection
      +
      +^localpath(.*) /otherpath$1 [P]                 not supported, because silly!
      +----------------------------------------------  ----------------------------------
      +^localpath(.*) http://thishost/otherpath$1      /otherpath/pathinfo
      +
      +^localpath(.*) http://thishost/otherpath$1 [R]  http://thishost/otherpath/pathinfo
      +                                                via external redirection
      +
      +^localpath(.*) http://thishost/otherpath$1 [P]  not supported, because silly!
      +----------------------------------------------  ----------------------------------
      +^localpath(.*) http://otherhost/otherpath$1     http://otherhost/otherpath/pathinfo
      +                                                via external redirection
      +
      +^localpath(.*) http://otherhost/otherpath$1 [R] http://otherhost/otherpath/pathinfo
      +                                                via external redirection
      +                                                (the [R] flag is redundant)
      +
      +^localpath(.*) http://otherhost/otherpath$1 [P] http://otherhost/otherpath/pathinfo
      +                                                via internal proxy
      +
      +
      + + + + + + +

      +Example: +

      +

      +We want to rewrite URLs of the form +
      +/ Language +/~ Realname +/.../ File +
      +into +
      +/u/ Username +/.../ File +. Language +
      +

      +We take the rewrite mapfile from above and save it under +/anywhere/map.real-to-user. Then we only have to add the +following lines to the Apache server configuration file: + +

      +
      +RewriteLog   /anywhere/rewrite.log
      +RewriteMap   real-to-user               txt:/anywhere/map.real-to-host
      +RewriteRule  ^/([^/]+)/~([^/]+)/(.*)$   /u/${real-to-user:$2|nobody}/$3.$1
      +
      +
      +
      + + + + + \ No newline at end of file diff --git a/docs/manual/mod/mod_setenvif.html b/docs/manual/mod/mod_setenvif.html new file mode 100644 index 00000000000..d089fa37dba --- /dev/null +++ b/docs/manual/mod/mod_setenvif.html @@ -0,0 +1,275 @@ + + + + Apache module mod_setenvif + + + + +

      Module mod_setenvif

      +

      + This module is contained in the mod_setenvif.c file, and + is compiled in by default. It provides for + the ability to set environment variables based upon attributes of the + request. +

      +

      Summary

      +

      + The mod_setenvif module allows you to set environment + variables according to whether different aspects of the request match + regular expressions you specify. These envariables can be used by + other parts of the server to make decisions about actions to be taken. +

      +

      Directives

      + + +
      +

      The BrowserMatch Directive

      +

      + Syntax: BrowserMatch regex envar[=value] [...] +
      + Default: none +
      + Context: server config +
      + Override: none +
      + Status: Base +
      + Module: mod_setenvif +
      + Compatibility: Apache 1.2 and above +

      +

      + The BrowserMatch directive defines environment variables based on the + User-Agent HTTP request header field. The first argument + should be a POSIX.2 extended regular expression (similar to an + egrep-style regex). The rest of the arguments give the + names of variables to set, and optionally values to which they should + be set. These take the form of +

      +
        +
      1. varname, or +
      2. +
      3. !varname, or +
      4. +
      5. varname=value +
      6. +
      +

      + In the first form, the value will be set to "1". The second + will remove the given variable if already defined, and the third will + set the variable to the value given by value. If a + User-Agent string matches more than one entry, they will + be merged. Entries are processed in the order in which they appear, + and later entries can override earlier ones. +

      +

      + For example: +

      +
      +    BrowserMatch ^Mozilla forms jpeg=yes browser=netscape
      +    BrowserMatch "^Mozilla/[2-3]" tables agif frames javascript
      +    BrowserMatch MSIE !javascript
      +  
      +

      + Note that the regular expression string is + case-sensitive. For cane-INsensitive matching, see + the + BrowserMatchNoCase + directive. +

      +

      + The BrowserMatch and BrowserMatchNoCase + directives are special cases of the + SetEnvIf + and + SetEnvIfNoCase + directives. The following two lines have the same effect: +

      +
      +   BrowserMatchNoCase Robot is_a_robot
      +   SetEnvIfNoCase User-Agent Robot is_a_robot
      +  
      + +
      +

      + + The BrowserMatchNoCase Directive + +

      +

      + Syntax: BrowserMatchNoCase regex envar[=value] [...] +
      + Default: none +
      + Context: server config +
      + Override: none +
      + Status: Base +
      + Module: mod_setenvif +
      + Compatibility: Apache 1.2 and above +

      +

      + The BrowserMatchNoCase directive is semantically identical to + the + BrowserMatch + directive. However, it provides for case-insensitive matching. For + example: +

      +
      +    BrowserMatchNoCase mac platform=macintosh
      +    BrowserMatchNoCase win platform=windows
      +  
      +

      + The BrowserMatch and BrowserMatchNoCase + directives are special cases of the + SetEnvIf + and + SetEnvIfNoCase + directives. The following two lines have the same effect: +

      +
      +   BrowserMatchNoCase Robot is_a_robot
      +   SetEnvIfNoCase User-Agent Robot is_a_robot
      +  
      + +
      +

      + + The SetEnvIf Directive + +

      +

      + Syntax: SetEnvIf attribute regex envar[=value] [...] +
      + Default: none +
      + Context: server config +
      + Override: none +
      + Status: Base +
      + Module: mod_setenvif +
      + Compatibility: Apache 1.3 and above +

      +

      + The SetEnvIf directive defines environment variables + based on attributes of the request. These attributes can be the + values of various HTTP request header fields (see + RFC2068 + for more information about these), or of other aspects of the request, + including the following: +

      +
        +
      • Remote_Host - the hostname (if available) of the + client making the request +
      • +
      • Remote_Addr - the IP address of the client making + the request +
      • +
      • Remote_User - the authenticated username (if + available) +
      • +
      • Request_Method - the name of the method being used + (GET, POST, et cetera) +
      • +
      • Request_URI - the portion of the URL following the + scheme and host portion +
      • +
      +

      + Some of the more commonly used request header field names include + Host, User-Agent, and Referer. +

      +

      + Example: +

      +
      +   SetEnvIf Request_URI "\.(gif)|(jpg)|(xbm)$" object_is_image
      +   SetEnvIf Referer www\.mydomain\.com intra_site_referral
      +  
      +

      + The first will set the envariable object_is_image if the + request was for an image file, and the second sets + intra_site_referral if the referring page was somewhere + on the www.mydomain.com Web site. +

      + +
      +

      + + The SetEnvIfNoCase Directive + +

      +

      + Syntax: SetEnvIfNoCase + attribute regex envar[=value] [...] +
      + Default: none +
      + Context: server config +
      + Override: none +
      + Status: Base +
      + Module: mod_setenvif +
      + Compatibility: Apache 1.3 and above +

      +

      + The SetEnvIfNoCase is semantically identical to the + SetEnvIf + directive, and differs only in that the regular expression matching is + performed in a case-insensitive manner. For example: +

      +
      +   SetEnvIfNoCase Host Apache\.Org site=apache
      +  
      +

      + This will cause the site envariable to be set to + "apache" if the HTTP request header field + Host: was included and contained Apache.Org, + apache.org, or any other combination. +

      + + + + diff --git a/docs/manual/mod/mod_so.html b/docs/manual/mod/mod_so.html new file mode 100644 index 00000000000..9ceefee7d12 --- /dev/null +++ b/docs/manual/mod/mod_so.html @@ -0,0 +1,95 @@ + + + +Apache module mod_so + + + + + +

      Module mod_so

      + +This module is contained in the mod_so.c file, and is not +compiled in by default. It provides for loading of executable code and +modules into the server at start-up time, on Unix systems. Win32 +systems use mod_dll instead. This module is +only available in Apache 1.3 and up. + +

      Summary

      + +This is an experimental module. On selected operating systems it can +be used to load modules into Apache at runtime, rather than requiring +a recompilation. + + +

      Directives

      + +
      + + +

      LoadFile

      + +Syntax: LoadFile filename filename ...
      +Context: server config
      +Status: Experimental
      +Module: mod_so

      + +The LoadFile directive links in the named object files or libraries when +the server is started; this is used to load additional code which +may be required for some module to work. Filename is relative +to ServerRoot.


      + +

      LoadModule

      + +Syntax: LoadModule module filename
      +Context: server config
      +Status: Experimental
      +Module: mod_so

      + +The LoadModule directive links in the object file or library filename +and adds the module structure named module to the list of active +modules. Module is the name of the external variable of type +module in the file. Example: +

      +LoadModule status_module modules/mod_status.so +
      +loads the module in the modules subdirectory of the ServerRoot.

      + + + + + + diff --git a/docs/manual/mod/mod_speling.html b/docs/manual/mod/mod_speling.html new file mode 100644 index 00000000000..2db6c4773a9 --- /dev/null +++ b/docs/manual/mod/mod_speling.html @@ -0,0 +1,88 @@ + + + + Apache module mod_speling + + + + +

      Module mod_speling

      +

      + This module is contained in the mod_speling.c file, + and is not compiled in by default. + It attemps to correct mispellings of + URLs that users might have entered, by ignoring capitalization + and by allowing up to one misspelling.
      + This catches the majority of misspelled requests. An automatic + "spelling corrected" redirection is returned if only one matching + document was found, and a list of matches is returned if more than + one document with a sufficiently similar name is found. +

      + +

      Summary

      +

      + Requests to documents sometimes cannot be served by the core apache + server because the request was misspelled or miscapitalized. This + module addresses this problem by trying to find a matching document, + even after all other modules gave up. It does its work by comparing + each document name in the requested directory against the requested + document name without regard to case, and allowing + up to one misspelling (character insertion / omission + / transposition or wrong character). A list is built with all document + names which were matched using this strategy. +

      +

      + If, after scanning the directory, +

        +
      • no matching document was found, Apache will proceed as usual + and return a "document not found" error. +
      • only one document is found that "almost" matches the request, + then it is returned in the form of a redirection response. +
      • more than one document with a close match was found, then + the list of the matches is returned to the client, and the client + can select the correct candidate. +
      +

      + +

      Directives

      + + +
    24. CheckSpelling +
    25. + +
      +

      CheckSpelling

      + + Syntax: CheckSpelling on/off
      + Default: CheckSpelling Off
      + Context: server config, virtual host
      + Status: Base
      + Module: mod_speling
      + Compatibility: CheckSpelling was available as a separately + available module for Apache 1.1, but was limited to miscapitalizations. + As of Apache 1.3, it is part of the apache distribution.

      + + This directive enables or disables the spelling module. When enabled, + keep in mind that +

        +
      • the directory scan which is necessary for the spelling + correction will have an impact on the server's performance + when many spelling corrections have to be performed at the same time. +
      • the document trees should not contain sensitive files which could + be matched inadvertedly, by a spelling "correction". +
      • the module is unable to correct misspelled user names + (as in http://my.host/~apahce/), just file names or + directory names. +
      + + + + + diff --git a/docs/manual/mod/mod_unique_id.html b/docs/manual/mod/mod_unique_id.html new file mode 100644 index 00000000000..ff71fce9342 --- /dev/null +++ b/docs/manual/mod/mod_unique_id.html @@ -0,0 +1,180 @@ + + + +Apache module mod_unique_id + + + + + +

      Module mod_unique_id

      + +This module provides a magic token for each request which is guaranteed +to be unique across "all" requests under very specific conditions. +The unique identifier is even unique across multiple machines in a +properly configured cluster of machines. The environment variable +UNIQUE_ID is set to the identifier for each request. +Unique identifiers are useful for various reasons which are beyond the +scope of this document. + +

      Theory

      + +

      +First a brief recap of how the Apache server works on Unix machines. +This feature currently isn't supported on Windows NT. On Unix machines, +Apache creates several children, the children process requests one at +a time. Each child can serve multiple requests in its lifetime. For the +purpose of this discussion, the children don't share any data +with each other. We'll refer to the children as httpd processes. + +

      +Your website has one or more machines under your administrative control, +together we'll call them a cluster of machines. Each machine can +possibly run multiple instances of Apache. All of these collectively +are considered "the universe", and with certain assumptions we'll +show that in this universe we can generate unique identifiers for each +request, without extensive communication between machines in the cluster. + +

      +The machines in your cluster should satisfy these requirements. +(Even if you have only one machine you should synchronize its clock +with NTP.) + +

        +
      • The machines' times are synchronized via NTP or other network time + protocol. + +
      • The machines' hostnames all differ, such that the module can do a + hostname lookup on the hostname and receive a different IP address + for each machine in the cluster. +
      + +

      +As far as operating system assumptions go, we assume that pids (process +ids) fit in 32-bits. If the operating system uses more than 32-bits +for a pid, the fix is trivial but must be performed in the code. + +

      +Given those assumptions, at a single point in time we can identify +any httpd process on any machine in the cluster from all other httpd +processes. The machine's IP address and the pid of the httpd process +are sufficient to do this. So in order to generate unique identifiers +for requests we need only distinguish between different points in time. + +

      +To distinguish time we will use a Unix timestamp (seconds since January +1, 1970 UTC), and a 16-bit counter. The timestamp has only one second +granularity, so the counter is used to represent up to 65536 values +during a single second. The quadruple ( ip_addr, pid, time_stamp, +counter ) is sufficient to enumerate 65536 requests per second per +httpd process. There are issues however with pid reuse over +time, and the counter is used to alleviate this issue. + +

      +When an httpd child is created, the counter is initialized with ( +current microseconds divided by 10 ) modulo 65536 (this formula was +chosen to eliminate some variance problems with the low order bits of +the microsecond timers on some systems). When a unique identifier is +generated, the time stamp used is the time the request arrived at the +web server. The counter is incremented every time an identifier is +generated (and allowed to roll over). + +

      +The kernel generates a pid for each process as it forks the process, and +pids are allowed to roll over (they're 16-bits on many Unixes, but newer +systems have expanded to 32-bits). So over time the same pid will be +reused. However unless it is reused within the same second, it does not +destroy the uniqueness of our quadruple. That is, we assume the system +does not spawn 65536 processes in a one second interval (it may even be +32768 processes on some Unixes, but even this isn't likely to happen). + +

      +Suppose that time repeats itself for some reason. That is, suppose that +the system's clock is screwed up and it revisits a past time (or it is +too far forward, is reset correctly, and then revisits the future time). +In this case we can easily show that we can get pid and time stamp reuse. +The choice of initializer for the counter is intended to help defeat this. +Note that we really want a random number to initialize the counter, +but there aren't any readily available numbers on most systems (i.e. you +can't use rand() because you need to seed the generator, and can't seed +it with the time because time, at least at one second resolution, has +repeated itself). This is not a perfect defense. + +

      +How good a defense is it? Well suppose that one of your machines serves +at most 500 requests per second (which is a very reasonable upper bound +at this writing, because systems generally do more than just shovel out +static files). To do that it will require a number of children which +depends on how many concurrent clients you have. But we'll be pessimistic +and suppose that a single child is able to serve 500 requests per second. +There are 1000 possible starting counter values such that two sequences +of 500 requests overlap. So there is a 1.5% chance that if time (at one +second resolution) repeats itself this child will repeat a counter value, +and uniqueness will be broken. This was a very pessimistic example, +and with real world values it's even less likely to occur. If your +system is such that it's still likely to occur, then perhaps you should +make the counter 32 bits (by editing the code). + +

      +You may be concerned about the clock being "set back" during summer +daylight savings. However this isn't an issue because the times used here +are UTC, which "always" go forward. Note that x86 based Unixes may need +proper configuration for this to be true -- they should be configured to +assume that the motherboard clock is on UTC and compensate appropriately. +But even still, if you're running NTP then your UTC time will be correct +very shortly after reboot. + +

      +The UNIQUE_ID environment variable is constructed by +encoding the 112-bit (32-bit IP address, 32 bit pid, 32 bit time stamp, +16 bit counter) quadruple using the alphabet [A-Za-z0-9@-] +in a manner similar to MIME base64 encoding, producing 19 characters. +The MIME base64 alphabet is actually [A-Za-z0-9+/] however ++ and / need to be specially encoded in URLs, +which makes them less desirable. All values are encoded in network +byte ordering so that the encoding is comparable across architectures of +different byte ordering. The actual ordering of the encoding is: time +stamp, IP address, pid, counter. This ordering has a purpose, but it +should be emphasized that applications should not dissect the encoding. +Applications should treat the entire encoded UNIQUE_ID as an +opaque token, which can be compared against other UNIQUE_IDs +for equality only. + +

      +The ordering was chosen such that it's possible to change the encoding +in the future without worrying about collision with an existing database +of UNIQUE_IDs. The new encodings should also keep the time +stamp as the first element, and can otherwise use the same alphabet and +bit length. Since the time stamps are essentially an increasing sequence, +it's sufficient to have a flag second in which all machines in the +cluster stop serving and request, and stop using the old encoding format. +Afterwards they can resume requests and begin issuing the new encodings. + +

      +This we believe is a relatively portable solution to this problem. It can +be extended to multithreaded systems like Windows NT, and can grow with +future needs. The identifiers generated have essentially an infinite +life-time because future identifiers can be made longer as required. +Essentially no communication is required between machines in the cluster +(only NTP synchronization is required, which is low overhead), and no +communication between httpd processes is required (the communication is +implicit in the pid value assigned by the kernel). In very specific +situations the identifier can be shortened, but more information needs +to be assumed (for example the 32-bit IP address is overkill for any +site, but there is no portable shorter replacement for it). + +


      + +

      Directives

      + +mod_unique_id has no directives. + + + + diff --git a/docs/manual/platform/perf-bsd44.html b/docs/manual/platform/perf-bsd44.html deleted file mode 100644 index 1f3a6010c8f..00000000000 --- a/docs/manual/platform/perf-bsd44.html +++ /dev/null @@ -1,215 +0,0 @@ - - -Running a High-Performance Web Server for BSD - - - - - - -

      Running a High-Performance Web Server for BSD

      - -Like other OS's, the listen queue is often the first limit hit. The -following are comments from "Aaron Gifford <agifford@InfoWest.COM>" -on how to fix this on BSDI 1.x, 2.x, and FreeBSD 2.0 (and earlier): - -

      - -Edit the following two files: -

      /usr/include/sys/socket.h
      - /usr/src/sys/sys/socket.h
      -In each file, look for the following: -
      -    /*
      -     * Maximum queue length specifiable by listen.
      -     */
      -    #define SOMAXCONN       5
      -
      - -Just change the "5" to whatever appears to work. I bumped the two -machines I was having problems with up to 32 and haven't noticed the -problem since. - -

      - -After the edit, recompile the kernel and recompile the Apache server -then reboot. - -

      - -FreeBSD 2.1 seems to be perfectly happy, with SOMAXCONN -set to 32 already. - -

      - - -Addendum for very heavily loaded BSD servers
      -
      -from Chuck Murcko <chuck@telebase.com> - -

      - -If you're running a really busy BSD Apache server, the following are useful -things to do if the system is acting sluggish:

      - -

        - -
      • Run vmstat to check memory usage, page/swap rates, etc. - -
      • Run netstat -m to check mbuf usage - -
      • Run fstat to check file descriptor usage - -
      - -These utilities give you an idea what you'll need to tune in your kernel, -and whether it'll help to buy more RAM. - -Here are some BSD kernel config parameters (actually BSDI, but pertinent to -FreeBSD and other 4.4-lite derivatives) from a system getting heavy usage. -The tools mentioned above were used, and the system memory was increased to -48 MB before these tuneups. Other system parameters remained unchanged. - -

      - -

      -maxusers        256
      -
      - -Maxusers drives a lot of other kernel parameters: - -
        - -
      • Maximum # of processes - -
      • Maximum # of processes per user - -
      • System wide open files limit - -
      • Per-process open files limit - -
      • Maximum # of mbuf clusters - -
      • Proc/pgrp hash table size - -
      - -The actual formulae for these derived parameters are in -/usr/src/sys/conf/param.c. -These calculated parameters can also be overridden (in part) by specifying -your own values in the kernel configuration file: - -
      -# Network options. NMBCLUSTERS defines the number of mbuf clusters and
      -# defaults to 256. This machine is a server that handles lots of traffic,
      -# so we crank that value.
      -options         SOMAXCONN=256           # max pending connects
      -options         NMBCLUSTERS=4096        # mbuf clusters at 4096
      -
      -#
      -# Misc. options
      -#
      -options         CHILD_MAX=512           # maximum number of child processes
      -options         OPEN_MAX=512            # maximum fds (breaks RPC svcs)
      -
      - -SOMAXCONN is not derived from maxusers, so you'll always need to increase -that yourself. We used a value guaranteed to be larger than Apache's -default for the listen() of 128, currently. - -

      - -In many cases, NMBCLUSTERS must be set much larger than would appear -necessary at first glance. The reason for this is that if the browser -disconnects in mid-transfer, the socket fd associated with that particular -connection ends up in the TIME_WAIT state for several minutes, during -which time its mbufs are not yet freed. - -

      - -Some more info on mbuf clusters (from sys/mbuf.h): -

      -/*
      - * Mbufs are of a single size, MSIZE (machine/machparam.h), which
      - * includes overhead.  An mbuf may add a single "mbuf cluster" of size
      - * MCLBYTES (also in machine/machparam.h), which has no additional overhead
      - * and is used instead of the internal data area; this is done when
      - * at least MINCLSIZE of data must be stored.
      - */
      -
      - -

      - -CHILD_MAX and OPEN_MAX are set to allow up to 512 child processes (different -than the maximum value for processes per user ID) and file descriptors. -These values may change for your particular configuration (a higher OPEN_MAX -value if you've got modules or CGI scripts opening lots of connections or -files). If you've got a lot of other activity besides httpd on the same -machine, you'll have to set NPROC higher still. In this example, the NPROC -value derived from maxusers proved sufficient for our load. - -

      - -Caveats - -

      - -Be aware that your system may not boot with a kernel that is configured -to use more resources than you have available system RAM. ALWAYS -have a known bootable kernel available when tuning your system this way, -and use the system tools beforehand to learn if you need to buy more -memory before tuning. - -

      - -RPC services will fail when the value of OPEN_MAX is larger than 256. -This is a function of the original implementations of the RPC library, -which used a byte value for holding file descriptors. BSDI has partially -addressed this limit in its 2.1 release, but a real fix may well await -the redesign of RPC itself. - -

      - -Finally, there's the hard limit of child processes configured in Apache. - -

      - -For versions of Apache later than 1.0.5 you'll need to change the -definition for HARD_SERVER_LIMIT in httpd.h and recompile -if you need to run more than the default 150 instances of httpd. - -

      - -From conf/httpd.conf-dist: - -

      -# Limit on total number of servers running, i.e., limit on the number
      -# of clients who can simultaneously connect --- if this limit is ever
      -# reached, clients will be LOCKED OUT, so it should NOT BE SET TOO LOW.
      -# It is intended mainly as a brake to keep a runaway server from taking
      -# Unix with it as it spirals down...
      -
      -MaxClients 150
      -
      - -Know what you're doing if you bump this value up, and make sure you've -done your system monitoring, RAM expansion, and kernel tuning beforehand. -Then you're ready to service some serious hits! - -

      - -Thanks to Tony Sanders and Chris Torek at BSDI for their -helpful suggestions and information. - -


      - -

      More welcome!

      - -If you have tips to contribute, send mail to brian@organic.com - -


      -Home -Index - - diff --git a/docs/manual/platform/perf-dec.html b/docs/manual/platform/perf-dec.html deleted file mode 100644 index cd027bfc607..00000000000 --- a/docs/manual/platform/perf-dec.html +++ /dev/null @@ -1,267 +0,0 @@ - -Performance Tuning Tips for Digital Unix - - -

      Performance Tuning Tips for Digital Unix

      - -Below is a set of newsgroup posts made by an engineer from DEC in -response to queries about how to modify DEC's Digital Unix OS for more -heavily loaded web sites. Copied with permission. - -
      - -

      Update

      -From: Jeffrey Mogul
      -Date: Fri, 28 Jun 96 16:07:56 MDT
      - -
        -
      1. The advice given in the README file regarding the - "tcbhashsize" variable is incorrect. The largest value - this should be set to is 1024. Setting it any higher - will have the perverse result of disabling the hashing - mechanism. - -
      2. Patch ID OSF350-146 has been superseded by -
        - Patch ID OSF350-195 for V3.2C
        - Patch ID OSF360-350195 for V3.2D -
        - Patch IDs for V3.2E and V3.2F should be available soon. - There is no known reason why the Patch ID OSF360-350195 - won't work on these releases, but such use is not officially - supported by Digital. This patch kit will not be needed for - V3.2G when it is released. - - -
        - - -
        -From           mogul@pa.dec.com (Jeffrey Mogul)
        -Organization   DEC Western Research
        -Date           30 May 1996 00:50:25 GMT
        -Newsgroups     comp.unix.osf.osf1
        -Message-ID     <4oirch$bc8@usenet.pa.dec.com>
        -Subject        Re: Web Site Performance
        -References     1
        -
        -
        -
        -In article <skoogDs54BH.9pF@netcom.com> skoog@netcom.com (Jim Skoog) writes:
        ->Where are the performance bottlenecks for Alpha AXP running the
        ->Netscape Commerce Server 1.12 with high volume internet traffic?
        ->We are evaluating network performance for a variety of Alpha AXP
        ->runing DEC UNIX 3.2C, which run DEC's seal firewall and behind
        ->that Alpha 1000 and 2100 webservers.
        -
        -Our experience (running such Web servers as altavista.digital.com
        -and www.digital.com) is that there is one important kernel tuning
        -knob to adjust in order to get good performance on V3.2C.  You
        -need to patch the kernel global variable "somaxconn" (use dbx -k
        -to do this) from its default value of 8 to something much larger.
        -
        -How much larger?  Well, no larger than 32767 (decimal).  And
        -probably no less than about 2048, if you have a really high volume
        -(millions of hits per day), like AltaVista does.
        -
        -This change allows the system to maintain more than 8 TCP
        -connections in the SYN_RCVD state for the HTTP server.  (You
        -can use "netstat -An |grep SYN_RCVD" to see how many such
        -connections exist at any given instant).
        -
        -If you don't make this change, you might find that as the load gets
        -high, some connection attempts take a very long time.  And if a lot
        -of your clients disconnect from the Internet during the process of
        -TCP connection establishment (this happens a lot with dialup
        -users), these "embryonic" connections might tie up your somaxconn
        -quota of SYN_RCVD-state connections.  Until the kernel times out
        -these embryonic connections, no other connections will be accepted,
        -and it will appear as if the server has died.
        -
        -The default value for somaxconn in Digital UNIX V4.0 will be quite
        -a bit larger than it has been in previous versions (we inherited
        -this default from 4.3BSD).
        -
        -Digital UNIX V4.0 includes some other performance-related changes
        -that significantly improve its maximum HTTP connection rate.  However,
        -we've been using V3.2C systems to front-end for altavista.digital.com
        -with no obvious performance bottlenecks at the millions-of-hits-per-day
        -level.
        -
        -We have some Webstone performance results available at
        -        http://www.digital.com/info/alphaserver/news/webff.html
        -I'm not sure if these were done using V4.0 or an earlier version
        -of Digital UNIX, although I suspect they were done using a test
        -version of V4.0.
        -
        --Jeff
        -
        -
        - ----------------------------------------------------------------------------- - -From mogul@pa.dec.com (Jeffrey Mogul) -Organization DEC Western Research -Date 31 May 1996 21:01:01 GMT -Newsgroups comp.unix.osf.osf1 -Message-ID <4onmmd$mmd@usenet.pa.dec.com> -Subject Digital UNIX V3.2C Internet tuning patch info - ----------------------------------------------------------------------------- - -Something that probably few people are aware of is that Digital -has a patch kit available for Digital UNIX V3.2C that may improve -Internet performance, especially for busy web servers. - -This patch kit is one way to increase the value of somaxconn, -which I discussed in a message here a day or two ago. - -I've included in this message the revised README file for this -patch kit below. Note that the original README file in the patch -kit itself may be an earlier version; I'm told that the version -below is the right one. - -Sorry, this patch kit is NOT available for other versions of Digital -UNIX. Most (but not quite all) of these changes also made it into V4.0, -so the description of the various tuning parameters in this README -file might be useful to people running V4.0 systems. - -This patch kit does not appear to be available (yet?) from - http://www.service.digital.com/html/patch_service.html -so I guess you'll have to call Digital's Customer Support to get it. - --Jeff - -DESCRIPTION: Digital UNIX Network tuning patch - - Patch ID: OSF350-146 - - SUPERSEDED PATCHES: OSF350-151, OSF350-158 - - This set of files improves the performance of the network - subsystem on a system being used as a web server. There are - additional tunable parameters included here, to be used - cautiously by an informed system administrator. - -TUNING - - To tune the web server, the number of simultaneous socket - connection requests are limited by: - - somaxconn Sets the maximum number of pending requests - allowed to wait on a listening socket. The - default value in Digital UNIX V3.2 is 8. - This patch kit increases the default to 1024, - which matches the value in Digital UNIX V4.0. - - sominconn Sets the minimum number of pending connections - allowed on a listening socket. When a user - process calls listen with a backlog less - than sominconn, the backlog will be set to - sominconn. sominconn overrides somaxconn. - The default value is 1. - - The effectiveness of tuning these parameters can be monitored by - the sobacklog variables available in the kernel: - - sobacklog_hiwat Tracks the maximum pending requests to any - socket. The initial value is 0. - - sobacklog_drops Tracks the number of drops exceeding the - socket set backlog limit. The initial - value is 0. - - somaxconn_drops Tracks the number of drops exceeding the - somaxconn limit. When sominconn is larger - than somaxconn, tracks the number of drops - exceeding sominconn. The initial value is 0. - - TCP timer parameters also affect performance. Tuning the following - require some knowledge of the characteristics of the network. - - tcp_msl Sets the tcp maximum segment lifetime. - This is the maximum lifetime in half - seconds that a packet can be in transit - on the network. This value, when doubled, - is the length of time a connection remains - in the TIME_WAIT state after a incoming - close request is processed. The unit is - specified in 1/2 seconds, the initial - value is 60. - - tcp_rexmit_interval_min - Sets the minimum TCP retransmit interval. - For some WAN networks the default value may - be too short, causing unnecessary duplicate - packets to be sent. The unit is specified - in 1/2 seconds, the initial value is 1. - - tcp_keepinit This is the amount of time a partially - established connection will sit on the listen - queue before timing out (e.g. if a client - sends a SYN but never answers our SYN/ACK). - Partially established connections tie up slots - on the listen queue. If the queue starts to - fill with connections in SYN_RCVD state, - tcp_keepinit can be decreased to make those - partial connects time out sooner. This should - be used with caution, since there might be - legitimate clients that are taking a while - to respond to SYN/ACK. The unit is specified - in 1/2 seconds, the default value is 150 - (ie. 75 seconds). - - The hashlist size for the TCP inpcb lookup table is regulated by: - - tcbhashsize The number of hash buckets used for the - TCP connection table used in the kernel. - The initial value is 32. For best results, - should be specified as a power of 2. For - busy Web servers, set this to 2048 or more. - - The hashlist size for the interface alias table is regulated by: - - inifaddr_hsize The number of hash buckets used for the - interface alias table used in the kernel. - The initial value is 32. For best results, - should be specified as a power of 2. - - ipport_userreserved The maximum number of concurrent non-reserved, - dynamically allocated ports. Default range - is 1025-5000. The maximum value is 65535. - This limits the numer of times you can - simultaneously telnet or ftp out to connect - to other systems. - - tcpnodelack Don't delay acknowledging TCP data; this - can sometimes improve performance of locally - run CAD packages. Default is value is 0, - the enabled value is 1. - - Digital UNIX version: - - V3.2C -Feature V3.2C patch V4.0 - ======= ===== ===== ==== -somaxconn X X X -sominconn - X X -sobacklog_hiwat - X - -sobacklog_drops - X - -somaxconn_drops - X - -tcpnodelack X X X -tcp_keepidle X X X -tcp_keepintvl X X X -tcp_keepcnt - X X -tcp_keepinit - X X -TCP keepalive per-socket - - X -tcp_msl - X - -tcp_rexmit_interval_min - X - -TCP inpcb hashing - X X -tcbhashsize - X X -interface alias hashing - X X -inifaddr_hsize - X X -ipport_userreserved - X - -sysconfig -q inet - - X -sysconfig -q socket - - X - -
        diff --git a/docs/manual/platform/perf-hp.html b/docs/manual/platform/perf-hp.html new file mode 100644 index 00000000000..0bf286747df --- /dev/null +++ b/docs/manual/platform/perf-hp.html @@ -0,0 +1,124 @@ + + + +Running a High-Performance Web Server on HPUX + + + + + +
        + [APACHE DOCUMENTATION] +

        + Apache HTTP Server Version 1.3 +

        +
        + +
        +

        Running a High-Performance Web Server for HPUX

        + +
        +Date: Wed, 05 Nov 1997 16:59:34 -0800
        +From: Rick Jones <raj@cup.hp.com>
        +Reply-To: raj@cup.hp.com
        +Organization: Network Performance
        +Subject: HP-UX tuning tips
        +
        + +Here are some tuning tips for HP-UX to add to the tuning page. + +

        + +For HP-UX 9.X: Upgrade to 10.20
        +For HP-UX 10.[00|01|10]: Upgrade to 10.20 + +

        + +For HP-UX 10.20: + +

        + +Install the latest cumulative ARPA Transport Patch. This will allow you +to configure the size of the TCP connection lookup hash table. The +default is 256 buckets and must be set to a power of two. This is +accomplished with adb against the *disc* image of the kernel. The +variable name is tcp_hash_size. + +

        + +How to pick the value? Examine the output of + +ftp://ftp.cup.hp.com/dist/networking/tools/connhist and see how many +total TCP connections exist on the system. You probably want that number +divided by the hash table size to be reasonably small, say less than 10. +Folks can look at HP's SPECweb96 disclosures for some common settings. +These can be found at +http://www.specbench.org/. If an HP-UX system was +performing at 1000 SPECweb96 connections per second, the TIME_WAIT time +of 60 seconds would mean 60,000 TCP "connections" being tracked. + +

        + +Folks can check their listen queue depths with + +ftp://ftp.cup.hp.com/dist/networking/misc/listenq. + +

        + +If folks are running Apache on a PA-8000 based system, they should +consider "chatr'ing" the Apache executable to have a large page size. +This would be "chatr +pi L ." The GID of the running executable +must have MLOCK priviledges. Setprivgrp(1m) should be consulted for +assigning MLOCK. The change can be validated by running Glance and +examining the memory regions of the server(s) to make sure that they +show a non-trivial fraction of the text segment being locked. + +

        + +If folks are running Apache on MP systems, they might consider writing a +small program that uses mpctl() to bind processes to processors. A +simple pid % numcpu algorithm is probably sufficient. This might even go +into the source code. + +

        + +If folks are concerned about the number of FIN_WAIT_2 connections, they +can use nettune to shrink the value of tcp_keepstart. However, they +should be careful there - certainly do not make it less than oh two to +four minutes. If tcp_hash_size has been set well, it is probably OK to +let the FIN_WAIT_2's take longer to timeout (perhaps even the default +two hours) - they will not on average have a big impact on performance. + +

        + +There are other things that could go into the code base, but that might +be left for another email. Feel free to drop me a message if you or +others are interested. + +

        + +sincerely, + +

        + +rick jones
        + +http://www.cup.hp.com/netperf/NetperfPage.html + +


        + +

        + Apache HTTP Server Version 1.3 +

        + +Index +Home + + + diff --git a/docs/manual/platform/perf.html b/docs/manual/platform/perf.html deleted file mode 100644 index d2a88e23b3e..00000000000 --- a/docs/manual/platform/perf.html +++ /dev/null @@ -1,134 +0,0 @@ - - - -Hints on Running a High-Performance Web Server - - - - -

        Hints on Running a High-Performance Web Server

        - -Running Apache on a heavily loaded web server, one often encounters -problems related to the machine and OS configuration. "Heavy" is -relative, of course - but if you are seeing more than a couple hits -per second on a sustained basis you should consult the pointers on -this page. In general the suggestions involve how to tune your kernel -for the heavier TCP load, hardware/software conflicts that arise, etc. - - - -
        - - -

        A/UX (Apple's UNIX)

        -
        - -If you are running Apache on A/UX, a page that gives some helpful -performance hints (concerning the listen() queue and using -virtual hosts) -can be found here - -


        - - -

        BSD-based (BSDI, FreeBSD, etc)

        -
        - -Quick and -detailed -performance tuning hints for BSD-derived systems. - -


        - - -

        Digital UNIX

        -
        - -We have some newsgroup postings on how to -tune Digital UNIX 3.2 and 4.0. - -


        - - -

        Hewlett-Packard

        -
        - -Some documentation on tuning HP machines can be found at http://www.software.hp.com/internet/perf/tuning.html. - -


        - - -

        Linux

        -
        - -The most common problem on Linux shows up on heavily-loaded systems -where the whole server will appear to freeze for a couple of minutes -at a time, and then come back to life. This has been traced to a -listen() queue overload - certain Linux implementations have a low -value set for the incoming connection queue which can cause problems. -Please see our Using Apache on -Linux page for more info on how to fix this. - -


        - - -

        SGI

        - -
        - -


        - - -

        Solaris 2.4

        -
        - -The Solaris 2.4 TCP implementation has a few inherent limitations that -only became apparent under heavy loads. This has been fixed to some -extent in 2.5 (and completely revamped in 2.6), but for now consult -the following URL for tips on how to expand the capabilities if you -are finding slowdowns and lags are hurting performance. - - - -


        - - -

        SunOS 4.x

        -
        - -More information on tuning SOMAXCONN on SunOS can be found at - -http://www.islandnet.com/~mark/somaxconn.html. - -


        - -

        More welcome!

        - -If you have tips to contribute, send mail to brian@organic.com - -


        -Home -Index - - diff --git a/docs/manual/platform/windows.html b/docs/manual/platform/windows.html new file mode 100644 index 00000000000..43889ca9089 --- /dev/null +++ b/docs/manual/platform/windows.html @@ -0,0 +1,229 @@ + + + +Using Apache with Microsoft Windows + + + + + + +

        Using Apache With Microsoft Windows

        + +

        This document explains how to compile, install, configure and run + Apache 1.3a1 (or later) under Microsoft Windows. Please note that at + this time, Windows support is entirely experimental, and is + recommended only for experienced users. The Apache Group does not + guarantee that this software will work as documented, or even at + all. If you find any bugs, or wish to contribute in other ways, please + use our bug reporting + page.

        + +
        + + + +
        + +

        Requirements

        + +

        Apache 1.3a1 requires the following:

        + +
          +
        • Microsoft Windows NT 4.0*, or Windows 95. +
        • An Intel-based PC-compatible capable of running above OS (exact + requirements unknown) with a connection to a TCP/IP network. +
        • Microsoft Visual C++ 5.0 or later. +
        + +

        * Apache may run with Windows NT 3.5.1, but + has not been tested.

        + +

        Apache 1.3a1 is available only in source form. Future releases will + contain prebuilt binaries for use by those without compilers (which we + understand are the vast majority of Windows users), however the + current release requires Microsoft Visual C++ 5.0 or later. The Apache + Group is releasing 1.3a1 only as source to limit the alpha release to + those who have the tools and knowledge to assist with the development + processes.

        + +

        This documentation assumes good working knowledge of Microsoft + Windows, Microsoft Visual C++, and the Apache web server (for + Unix).

        + +

        Downloading Apache for Windows

        + +

        Information on the latest version of Apache can be found on the Apache +web server at http://www.apache.org/. This will +list the current release, any more recent alpha or beta-test release, +together with details of mirror web and anonymous ftp sites.

        + +

        You will be able to download Apache 1.3a1 or a later release in + several forms, including a WinZip (.zip) + archive. Although this contains the same files as the others (likely + .tar.gz and .tar.Z), it is recommended for + Windows use, as all the files contained therein will contain Windows + line breaks. The other archives may contain files with Unix line + breaks, which will not function on Windows (although they may).

        + +

        Compiling Apache for Windows

        + +

        Compiling Apache requires Microsoft Visual C++ 5.0 to be properly + installed. It is easiest to compile with the command-line tools + (nmake, etc...). Consult the VC++ manual to determine how to install + them.

        + +

        First, unpack the Apache distribution into an appropriate + directory. Open a command-line prompt, and change to the + src subdirectory of the Apache distribution.

        + +

        The master Apache makefile instructions are contained in the + Makefile.nt file. To compile Apache, simply use one of + the following commands: +

          +
        • nmake /f Makefile.nt release +
        • nmake /f Makefile.nt debug +
        + +

        These will both compile Apache. The latter will include debugging + information in the resulting files, making it easier to find bugs and + track down problems.

        + +

        Apache can also be compiled using VC++'s Visual Studio development + environment. Although compiling Apache in this manner is not as simple, + it makes it possible to easily modify the Apache source, or to compile + Apache if the command-line tools are not installed.

        + +

        Project files (.DSP) are included for each of the + portions of Apache. The two projects that are necessary for Apache to + run are Apache.dsp and ApacheCore.dsp. The + src\nt subdirectory also contains project files for the + optional modules (see below).

        + +

        Installing Apache for Windows

        + +

        Once Apache has been compiled, it needs to be installed in its server + root directory. The hard-coded default is the \Apache + directory, on the current hard drive. Another directory may be used, + but the files will need to be installed manually.

        + +

        To install the files into the \Apache directory + automatically, use one the following nmake commands (see above):

        +
          +
        • nmake /f Makefile.nt installr (for release build) +
        • nmake /f Makefile.nt installd (for debug build) +
        + +

        This will install the following:

        + +
          +
        • \Apache\Apache.exe - Apache executable +
        • \Apache\ApacheCore.dll - Main Apache shared library +
        • \Apache\modules\ApacheModule*.dll - Optional Apache + modules (7 files) +
        • \Apache\conf - Empty configuration directory +
        • \Apache\logs - Empty logging directory +
        + +

        If you do not have nmake, or wish to install in a different directory, + be sure to use a similar naming scheme.

        + +

        Using Apache for Windows

        + +

        The first step is to set up Apache's configuration files. Default + configuration files for Windows are located in the conf + subdirectory in the Apache distribution, and are named + httpd.conf-dist-win, access.conf-dist-win + and srm.conf-dist-win. Move these into + \Apache\conf, and rename them httpd.conf, + access.conf and srm.conf, respectively.

        + +

        Configuring Apache is nearly identical to the Unix version of Apache, + so most of the standard Apache documentation is + applicable. A few things are, however, different:

        + +
          +
        • Because Apache for Windows is multithreaded, it does not use a + separate process for each request, as Apache does with + Unix. Therefore, the "process"-management directives are different: +

          StartServers - This + tells the server how many processes to use. Unlike Unix, there + will never be more than this number, and only one will be used + at a time (the others will be held in reserve in case the main + processes crashes or otherwise dies). The recommended default is + StartServers 3. +

          MaxRequestsPerChild + - Like the Unix directive, this controls how many requests a + process will serve before exiting. However, unlike Unix, a + process serves all the requests at once, not just one, so if + this is set, it is recommended that a very high number is + used. The recommended default, MaxRequestsPerChild + 0, does not cause the process to ever exit. +

          ThreadsPerChild - + This directive is new, and tells the server how many threads it + should use. This is the maximum number of connections the server + can handle at once; be sure and set this number high enough for + your site if you get a lot of hits. The recommended default is + ThreadsPerChild 20.

          +
        • The directives that accept filenames as arguments now must use + Windows filenames instead of Unix ones. However, because Apache + uses Unix-style names internally, you must use forward slashes, not + backslashes. Drive letters can be used; if omitted, the drive with + the Apache executable will be assumed.

          +
        • Apache for Windows contains the ability to load modules at runtime, + without recompiling the server. If Apache is compiled normally, it + will install a number of optional modules in the + \Apache\modules directory. To activate these, or other + modules, the new LoadModule + directive must be used. For example, to active the status module, + use the following (in addition to the status-activating directives + in access.conf):

          +
          +    LoadModule status_module modules/ApacheModuleStatus.dll
          +
          +

          Information on creating module + DLLs is also available.

          +
        + +

        Once Apache is configured correctly, it is nearly ready to be +run. However, we recommend you copy the icons and +htdocs subdirectories from the Apache distribution to +\Apache. The latter is especially important, as it contains +the document root (what the server actually serves). + +

        Apache can be executed in one of two ways, directly from the command + line, or as a Windows NT service. To run it from the command line, use + the following command: +

        +    C:\Apache> apache -s
        +
        + +

        Apache will then execute, and will remain running until it is + exited. To use Apache as a Windows NT service, use the following:

        +
        +    C:\Apache> apache -i
        +
        +

        Then open the Services control panel, and start the Apache service.

        + +

        If you installed Apache in a server root other than + \Apache, you must use the -f command-line + option to specify the httpd.conf file, or the -d option + to specify the server root.

        + + + + diff --git a/docs/manual/process-model.html b/docs/manual/process-model.html deleted file mode 100644 index f2c22f9ccc7..00000000000 --- a/docs/manual/process-model.html +++ /dev/null @@ -1,50 +0,0 @@ - -Server Pool Management with MinSpareServers and MaxSpareServers - - - -

        Server Pool Management with MinSpareServers and MaxSpareServers

        - -
        -

        -We found that many people were using values for "MaxServers" either -too high or too low, and were hanging themselves on it. The model we -adopted is still based on long-lived minimal-forking processes, but -instead of specifying one number of persistant processes, the -webmaster specifies a maximum and minimum number of processes to be -"spare" - every couple of seconds the parent checks the actual number -of spare servers and adjusts accordingly. This should keep the number -of servers concurrently running relatively low while still ensuring -minimal forking. - -

        - -We renamed the current StartServers to MinSpareServers, created -separate StartServers parameter which means what it says, and renamed -MaxServers to MaxSpareServers (though the old name still works, for -NCSA 1.4 back-combatibility). The old names were generally regarded -as too confusing. - -

        - -The defaults for each variable are: - -

        -MinSpareServers		5
        -MaxSpareServers		10
        -StartServers		10
        -
        - -There is a compile-time limit of 150 absolute maximum number of -simultaneous children that will be allowed, which can be overruled by -"MaxClients", though we don't recommend changing that number unless - -
          -
        1. You know you have the server resources to handle more -
        2. You use the machine for other purposes and must limit the amount of memory -Apache uses -
        - - - - diff --git a/docs/manual/sections.html b/docs/manual/sections.html new file mode 100644 index 00000000000..a145a565314 --- /dev/null +++ b/docs/manual/sections.html @@ -0,0 +1,139 @@ + + +How Directory, Location and Files sections work + + + + + +

        How Directory, Location and Files sections work

        + +The sections <Directory>, <Location> and <Files> can contain +directives which only apply to specified directories, URLs or files +respectively. Also htaccess files can be used inside a directory to +apply directives to that directory. This document explains how these +different sections differ and how they relate to each other when +Apache decides which directives apply for a particular directory or +request URL. + +

        Directives allowed in the sections

        + +Everything that is syntactically allowed in +<Directory> is also allowed in +<Location> (except a sub-<Files> +section, but the code doesn't test for that, Lars has an open bug +report on that). Semantically however some things, and the most +notable is AllowOverrides, make no sense in +<Location>. The same for +<Files> -- syntactically everything is fine, but +semantically some things are different. + +

        How the sections are merged

        + +The order of merging is: + +
          + +
        1. + + <Directory> (except regular expressions) and + .htaccess done simultaneously (with .htaccess overriding + <Directory>) + +
        2. + +
        3. + <DirectoryMatch>, and + <Directory> with regular expressions + +
        4. + +
        5. <Files> and <FilesMatch> done simultaneously +
        6. + +
        7. <Location> and <LocationMatch> done simultaneously +
        8. + +
        + +Apart from <Directory>, each group is processed in +the order that they appear in the configuration +files. <Directory> (group 1 above) is processed in +the order shortest directory component to longest. If multiple +<Directory> sections apply to the same directory +they they are processed in the configuration file order. The +configuration files are read in the order httpd.conf, srm.conf and +access.conf. Configurations included via the Include +directive will be treated as if they where inside the including file +at the location of the Include directive. + +

        + +Sections inside <VirtualHost> sections are applied +after the corresponding sections outside the virtual host +definition. This allows virtual hosts to override the main server +configuration. (Note: this only works correctly from 1.2.2 and 1.3a2 +onwards. Before those releases sections inside virtual hosts were +applied before the main server). + +

        Notes about using sections

        + +The general guidelines are: + +

        + +

          +
        • + If you are attempting to match objects at the filesystem level + then you must use <Directory> and/or + <Files>. +
        • + +
        • + If you are attempting to match objects at the URL level then you + must use <Location> +
        • +
        + +But a notable exception is: + +
          +
        • + proxy control is done via <Directory>. This is + a legacy mistake because the proxy existed prior to + <Location>. A future version of the config + language should probably switch this to + <Location>. +
        • +
        + +Note also that modifying .htaccess parsing during Location doesn't do +anything because .htaccess parsing has already occured. + +

        + +Another note: +

        + +

          +
        • + There is actually a + <Location>/<LocationMatch> + sequence performed just before the name translation phase (where + Aliases and DocumentRoots are used to + map URLs to filenames). The results of this sequence are + completely thrown away after the translation has completed. +
        • +
        + + + diff --git a/docs/manual/sections.html.en b/docs/manual/sections.html.en new file mode 100644 index 00000000000..a145a565314 --- /dev/null +++ b/docs/manual/sections.html.en @@ -0,0 +1,139 @@ + + +How Directory, Location and Files sections work + + + + + +

        How Directory, Location and Files sections work

        + +The sections <Directory>, <Location> and <Files> can contain +directives which only apply to specified directories, URLs or files +respectively. Also htaccess files can be used inside a directory to +apply directives to that directory. This document explains how these +different sections differ and how they relate to each other when +Apache decides which directives apply for a particular directory or +request URL. + +

        Directives allowed in the sections

        + +Everything that is syntactically allowed in +<Directory> is also allowed in +<Location> (except a sub-<Files> +section, but the code doesn't test for that, Lars has an open bug +report on that). Semantically however some things, and the most +notable is AllowOverrides, make no sense in +<Location>. The same for +<Files> -- syntactically everything is fine, but +semantically some things are different. + +

        How the sections are merged

        + +The order of merging is: + +
          + +
        1. + + <Directory> (except regular expressions) and + .htaccess done simultaneously (with .htaccess overriding + <Directory>) + +
        2. + +
        3. + <DirectoryMatch>, and + <Directory> with regular expressions + +
        4. + +
        5. <Files> and <FilesMatch> done simultaneously +
        6. + +
        7. <Location> and <LocationMatch> done simultaneously +
        8. + +
        + +Apart from <Directory>, each group is processed in +the order that they appear in the configuration +files. <Directory> (group 1 above) is processed in +the order shortest directory component to longest. If multiple +<Directory> sections apply to the same directory +they they are processed in the configuration file order. The +configuration files are read in the order httpd.conf, srm.conf and +access.conf. Configurations included via the Include +directive will be treated as if they where inside the including file +at the location of the Include directive. + +

        + +Sections inside <VirtualHost> sections are applied +after the corresponding sections outside the virtual host +definition. This allows virtual hosts to override the main server +configuration. (Note: this only works correctly from 1.2.2 and 1.3a2 +onwards. Before those releases sections inside virtual hosts were +applied before the main server). + +

        Notes about using sections

        + +The general guidelines are: + +

        + +

          +
        • + If you are attempting to match objects at the filesystem level + then you must use <Directory> and/or + <Files>. +
        • + +
        • + If you are attempting to match objects at the URL level then you + must use <Location> +
        • +
        + +But a notable exception is: + +
          +
        • + proxy control is done via <Directory>. This is + a legacy mistake because the proxy existed prior to + <Location>. A future version of the config + language should probably switch this to + <Location>. +
        • +
        + +Note also that modifying .htaccess parsing during Location doesn't do +anything because .htaccess parsing has already occured. + +

        + +Another note: +

        + +

          +
        • + There is actually a + <Location>/<LocationMatch> + sequence performed just before the name translation phase (where + Aliases and DocumentRoots are used to + map URLs to filenames). The results of this sequence are + completely thrown away after the translation has completed. +
        • +
        + + + diff --git a/docs/manual/stopping.html b/docs/manual/stopping.html new file mode 100644 index 00000000000..898acca3ca5 --- /dev/null +++ b/docs/manual/stopping.html @@ -0,0 +1,137 @@ + + + +Stopping and Restarting Apache + + + + +

        Stopping and Restarting Apache

        + +

        You will notice many httpd executables running on your system, +but you should not send signals to any of them except the parent, whose +pid is in the PidFile. That is to +say you shouldn't ever need to send signals to any process except the +parent. There are three signals that you can send the parent: +TERM, HUP, and USR1, which will +be described in a moment. + +

        To send a signal to the parent you should issue a command such as: +

        +    kill -TERM `cat /usr/local/etc/httpd/logs/httpd.pid`
        +
        + +You can read about its progress by issuing: + +
        +    tail -f /usr/local/etc/httpd/logs/error_log
        +
        + +Modify those examples to match your +ServerRoot and +PidFile settings. + +

        TERM Signal: stop now

        + +

        Sending the TERM signal to the parent causes it to +immediately attempt to kill off all of its children. It may take it +several seconds to complete killing off its children. Then the +parent itself exits. Any requests in progress are terminated, and no +further requests are served. + +

        HUP Signal: restart now

        + +

        Sending the HUP signal to the parent causes it to kill off +its children like in TERM but the parent doesn't exit. It +re-reads its configuration files, and re-opens any log files. +Then it spawns a new set of children and continues +serving hits. + +

        Users of the +status module +will notice that the server statistics are +set to zero when a HUP is sent. + +

        USR1 Signal: graceful restart

        + +

        Note: prior to release 1.2b9 this code is quite unstable and +shouldn't be used at all. + +

        The USR1 signal causes the parent process to advise +the children to exit after their current request (or to exit immediately +if they're not serving anything). The parent re-reads its configuration +files and re-opens its log files. As each child dies off the parent +replaces it with a child from the new generation of the +configuration, which begins serving new requests immediately. + +

        This code is designed to always respect the +MaxClients, +MinSpareServers, +and MaxSpareServers settings. +Furthermore, it respects StartServers +in the following manner: if after one second at least StartServers new +children have not been created, then create enough to pick up the slack. +This is to say that the code tries to maintain both the number of children +appropriate for the current load on the server, and respect your wishes +with the StartServers parameter. + +

        Users of the +status module +will notice that the server statistics +are not set to zero when a USR1 is sent. The code +was written to both minimize the time in which the server is unable to serve +new requests (they will be queued up by the operating system, so they're +not lost in any event) and to respect your tuning parameters. In order +to do this it has to keep the scoreboard used to keep track +of all children across generations. + +

        The status module will also use a G to indicate those +children which are still serving requests started before the graceful +restart was given. + +

        At present there is no way for a log rotation script using +USR1 to know for certain that all children writing the +pre-restart log have finished. We suggest that you use a suitable delay +after sending the USR1 signal before you do anything with the +old log. For example if most of your hits take less than 10 minutes to +complete for users on low bandwidth links then you could wait 15 minutes +before doing anything with the old log. + +

        Appendix: signals and race conditions

        + +

        Prior to Apache 1.2b9 there were several race conditions +involving the restart and die signals (a simple description of race +condition is: a time-sensitive problem, as in if something happens at just +the wrong time it won't behave as expected). For those architectures that +have the "right" feature set we have eliminated as many as we can. +But it should be noted that there still do exist race conditions on +certain architectures. + +

        Architectures that use an on disk ScoreBoardFile have the potential +to lose track of a child during graceful restart (you'll see an ErrorLog message saying something about +a long lost child). The ScoreBoardFile directive explains how +to figure out if your server uses a file, and possibly how to avoid it. +There is also the potential that the scoreboard will be corrupted during +any signalling, but this only has bad effects on graceful restart. + +

        NEXT and MACHTEN have small race conditions +which can cause a restart/die signal to be lost, but should not cause the +server to do anything otherwise problematic. + + +

        All architectures have a small race condition in each child involving +the second and subsequent requests on a persistent HTTP connection +(KeepAlive). It may exit after reading the request line but before +reading any of the request headers. There is a fix that was discovered +too late to make 1.2. In theory this isn't an issue because the KeepAlive +client has to expect these events because of network latencies and +server timeouts. In practice it doesn't seem to affect anything either +-- in a test case the server was restarted twenty times per second and +clients successfully browsed the site without getting broken images or +empty documents. + + + + diff --git a/docs/manual/stopping.html.en b/docs/manual/stopping.html.en new file mode 100644 index 00000000000..898acca3ca5 --- /dev/null +++ b/docs/manual/stopping.html.en @@ -0,0 +1,137 @@ + + + +Stopping and Restarting Apache + + + + +

        Stopping and Restarting Apache

        + +

        You will notice many httpd executables running on your system, +but you should not send signals to any of them except the parent, whose +pid is in the PidFile. That is to +say you shouldn't ever need to send signals to any process except the +parent. There are three signals that you can send the parent: +TERM, HUP, and USR1, which will +be described in a moment. + +

        To send a signal to the parent you should issue a command such as: +

        +    kill -TERM `cat /usr/local/etc/httpd/logs/httpd.pid`
        +
        + +You can read about its progress by issuing: + +
        +    tail -f /usr/local/etc/httpd/logs/error_log
        +
        + +Modify those examples to match your +ServerRoot and +PidFile settings. + +

        TERM Signal: stop now

        + +

        Sending the TERM signal to the parent causes it to +immediately attempt to kill off all of its children. It may take it +several seconds to complete killing off its children. Then the +parent itself exits. Any requests in progress are terminated, and no +further requests are served. + +

        HUP Signal: restart now

        + +

        Sending the HUP signal to the parent causes it to kill off +its children like in TERM but the parent doesn't exit. It +re-reads its configuration files, and re-opens any log files. +Then it spawns a new set of children and continues +serving hits. + +

        Users of the +status module +will notice that the server statistics are +set to zero when a HUP is sent. + +

        USR1 Signal: graceful restart

        + +

        Note: prior to release 1.2b9 this code is quite unstable and +shouldn't be used at all. + +

        The USR1 signal causes the parent process to advise +the children to exit after their current request (or to exit immediately +if they're not serving anything). The parent re-reads its configuration +files and re-opens its log files. As each child dies off the parent +replaces it with a child from the new generation of the +configuration, which begins serving new requests immediately. + +

        This code is designed to always respect the +MaxClients, +MinSpareServers, +and MaxSpareServers settings. +Furthermore, it respects StartServers +in the following manner: if after one second at least StartServers new +children have not been created, then create enough to pick up the slack. +This is to say that the code tries to maintain both the number of children +appropriate for the current load on the server, and respect your wishes +with the StartServers parameter. + +

        Users of the +status module +will notice that the server statistics +are not set to zero when a USR1 is sent. The code +was written to both minimize the time in which the server is unable to serve +new requests (they will be queued up by the operating system, so they're +not lost in any event) and to respect your tuning parameters. In order +to do this it has to keep the scoreboard used to keep track +of all children across generations. + +

        The status module will also use a G to indicate those +children which are still serving requests started before the graceful +restart was given. + +

        At present there is no way for a log rotation script using +USR1 to know for certain that all children writing the +pre-restart log have finished. We suggest that you use a suitable delay +after sending the USR1 signal before you do anything with the +old log. For example if most of your hits take less than 10 minutes to +complete for users on low bandwidth links then you could wait 15 minutes +before doing anything with the old log. + +

        Appendix: signals and race conditions

        + +

        Prior to Apache 1.2b9 there were several race conditions +involving the restart and die signals (a simple description of race +condition is: a time-sensitive problem, as in if something happens at just +the wrong time it won't behave as expected). For those architectures that +have the "right" feature set we have eliminated as many as we can. +But it should be noted that there still do exist race conditions on +certain architectures. + +

        Architectures that use an on disk ScoreBoardFile have the potential +to lose track of a child during graceful restart (you'll see an ErrorLog message saying something about +a long lost child). The ScoreBoardFile directive explains how +to figure out if your server uses a file, and possibly how to avoid it. +There is also the potential that the scoreboard will be corrupted during +any signalling, but this only has bad effects on graceful restart. + +

        NEXT and MACHTEN have small race conditions +which can cause a restart/die signal to be lost, but should not cause the +server to do anything otherwise problematic. + + +

        All architectures have a small race condition in each child involving +the second and subsequent requests on a persistent HTTP connection +(KeepAlive). It may exit after reading the request line but before +reading any of the request headers. There is a fix that was discovered +too late to make 1.2. In theory this isn't an issue because the KeepAlive +client has to expect these events because of network latencies and +server timeouts. In practice it doesn't seem to affect anything either +-- in a test case the server was restarted twenty times per second and +clients successfully browsed the site without getting broken images or +empty documents. + + + + diff --git a/docs/manual/suexec.html b/docs/manual/suexec.html new file mode 100644 index 00000000000..c1207ac01a5 --- /dev/null +++ b/docs/manual/suexec.html @@ -0,0 +1,20 @@ + +Apache SetUserID Support + + + +

        Apache SetUserID Support

        + +
        + +

        What is SUExec?

        + +

        Enabling SUExec Support

        + +

        When SUExec Is Used

        + + + + + + diff --git a/docs/manual/suexec.html.en b/docs/manual/suexec.html.en new file mode 100644 index 00000000000..c1207ac01a5 --- /dev/null +++ b/docs/manual/suexec.html.en @@ -0,0 +1,20 @@ + +Apache SetUserID Support + + + +

        Apache SetUserID Support

        + +
        + +

        What is SUExec?

        + +

        Enabling SUExec Support

        + +

        When SUExec Is Used

        + + + + + + diff --git a/docs/manual/vhosts/details.html b/docs/manual/vhosts/details.html new file mode 100644 index 00000000000..0e992170c37 --- /dev/null +++ b/docs/manual/vhosts/details.html @@ -0,0 +1,367 @@ + + +An In-Depth Discussion of Virtual Host Matching + + + + + +

        An In-Depth Discussion of Virtual Host Matching

        + +

        The virtual host code was completely rewritten in Apache 1.3. +This document attempts to explain exactly what Apache does when +deciding what virtual host to serve a hit from. With the help of the +new NameVirtualHost +directive virtual host configuration should be a lot easier and safer +than with versions prior to 1.3. + +

        If you just want to make it work without understanding +how, here are some examples. + +

        Config File Parsing

        + +

        There is a main_server which consists of all +the definitions appearing outside of <VirtualHost> sections. +There are virtual servers, called vhosts, which are defined by +<VirtualHost> +sections. + +

        The directives +Port, +ServerName, +ServerPath, +and +ServerAlias +can appear anywhere within the definition of +a server. However, each appearance overrides the previous appearance +(within that server). + +

        The default value of the Port field for main_server +is 80. The main_server has no default ServerPath, or +ServerAlias. The default ServerName is +deduced from the servers IP address. + +

        The main_server Port directive has two functions due to legacy +compatibility with NCSA configuration files. One function is +to determine the default network port Apache will bind to. This +default is overridden by the existence of any +Listen directives. +The second function is to specify the port number which is used +in absolute URIs during redirects. + +

        Unlike the main_server, vhost ports do not affect what +ports Apache listens for connections on. + +

        Each address appearing in the VirtualHost directive +can have an optional port. If the port is unspecified it defaults to +the value of the main_server's most recent Port statement. +The special port * indicates a wildcard that matches any port. +Collectively the entire set of addresses (including multiple +A record +results from DNS lookups) are called the vhost's address set. + +

        Unless a NameVirtualHost +directive is used for a specific IP address the first vhost with +that address is treated as an IP-based vhost. + +

        If name-based vhosts should be used a NameVirtualHost +directive must appear with the IP address set to be used for the +name-based vhosts. In other words, you must specify the IP address that +holds the hostname aliases (CNAMEs) for your name-based vhosts via a +NameVirtualHost directive in your configuration file. + +

        Multiple NameVirtualHost directives can be used each +with a set of VirtualHost directives. + +

        The ordering of NameVirtualHost and +VirtualHost directives is not important which makes the +following two examples identical (only the order of the +VirtualHost directives for one address set +is important, see below): + +

        +                                |
        +  NameVirtualHost 111.22.33.44  | <VirtualHost 111.22.33.44>
        +  <VirtualHost 111.22.33.44>    | # server A
        +  # server A  		        | </VirtualHost>
        +  ... 			        | <VirtualHost 111.22.33.55>
        +  </VirtualHost>	        | # server C
        +  <VirtualHost 111.22.33.44>    | ...
        +  # server B  		        | </VirtualHost>
        +  ... 			        | <VirtualHost 111.22.33.44>
        +  </VirtualHost>	        | # server B
        +                                | ...
        +  NameVirtualHost 111.22.33.55  | </VirtualHost>
        +  <VirtualHost 111.22.33.55>    | <VirtualHost 111.22.33.55>
        +  # server C  		        | # server D
        +  ... 			        | ...
        +  </VirtualHost>	        | </VirtualHost>
        +  <VirtualHost 111.22.33.55>    |
        +  # server D  		        | NameVirtualHost 111.22.33.44
        +  ... 			        | NameVirtualHost 111.22.33.55
        +  </VirtualHost>	        |
        +                                |
        +
        + +

        (To aid the readability of your configuration you should prefer the +left variant.) + +

        After parsing the VirtualHost directive, the vhost server +is given a default Port equal to the port assigned to the +first name in its VirtualHost directive. + +

        The complete list of names in the VirtualHost directive +are treated just like a ServerAlias (but are not overridden by any +ServerAlias statement) if all names resolve to the same address set. +Note that subsequent Port statements for this vhost will not affect +the ports assigned in the address set. + +

        During initialization a list for each IP address +is generated an inserted into an hash table. If the IP address is +used in a NameVirtualHost directive the list contains +all name-based vhosts for the given IP address. If there are no +vhosts defined for that address the NameVirtualHost directive +is ignored and an error is logged. For an IP-based vhost the list in the +hash table is empty. + +

        Due to a fast hashing function the overhead of hashing an IP address +during a request is minimal and almost not existent. Additionally +the table is optimized for IP addresses which vary in the last octet. + +

        For every vhost various default values are set. In particular: + +

          +
        1. If a vhost has no + ServerAdmin, + ResourceConfig, + AccessConfig, + Timeout, + KeepAliveTimeout, + KeepAlive, + MaxKeepAliveRequests, + or + SendBufferSize + directive then the respective value is + inherited from the main_server. (That is, inherited from whatever + the final setting of that value is in the main_server.) + +
        2. The "lookup defaults" that define the default directory + permissions + for a vhost are merged with those of the main_server. This includes + any per-directory configuration information for any module. + +
        3. The per-server configs for each module from the main_server are + merged into the vhost server. +
        + +Essentially, the main_server is treated as "defaults" or a +"base" on which to build each vhost. +But the positioning of these main_server +definitions in the config file is largely irrelevant -- the entire +config of the main_server has been parsed when this final merging occurs. +So even if a main_server definition appears after a vhost definition +it might affect the vhost definition. + +

        If the main_server has no ServerName at this point, +then the hostname of the machine that httpd is running on is used +instead. We will call the main_server address set those IP +addresses returned by a DNS lookup on the ServerName of +the main_server. + +

        For any undefined ServerName fields, a name-based vhost +defaults to the address given first in the VirtualHost +statement defining the vhost. + +

        Any vhost that includes the magic _default_ wildcard +is given the same ServerName as the main_server. + + +

        Virtual Host Matching

        + +

        The server determines which vhost to use for a request as follows: + +

        Hash table lookup

        + +

        When the connection is first made by a client, the IP address to +which the client connected is looked up in the internal IP hash table. + +

        If the lookup fails (the IP address wasn't found) the request is +served from the _default_ vhost if there is such a vhost +for the port to which the client sent the request. If there is no +matching _default_ vhost the request is served from the +main_server. + +

        If the lookup succeeded (a corresponding list for the IP address was +found) the next step is to decide if we have to deal with an IP-based +or a name-base vhost. + +

        IP-based vhost

        + +

        If the entry we found has an empty name list then we have found an +IP-based vhost, no further actions are performed and the request is +served from that vhost. + +

        Name-based vhost

        + +

        If the entry corresponds to a name-based vhost the name list contains +one or more vhost structures. This list contains the vhosts in the same +order as the VirtualHost directives appear in the config +file. + +

        The first vhost on this list (the first vhost that appears after the +corresponding NameVirtualHost directive in the config file) +has the highest priority and catches any request to an unknown +server name or a request without a Host: header. + +

        If the client provided a Host: header the list is +searched for a matching vhost and the first hit on a ServerName +or ServerAlias is taken and the request is served from +that vhost. A Host: header can contain a port number, but +Apache always matches against the real port to which the client sent +the request. + +

        If the client submitted a HTTP/1.0 request without Host: +header we don't know to what server the client tried to connect and +any existing ServerPath is matched against the URI +from the request. The first matching path on the list is used and the +request is served from that vhost. + +

        If no matching vhost could be found the request is served from the +first vhost with a matching port number that is on the list for the IP +to which the client connected (as already mentioned before). + +

        Persistent connections

        +The IP lookup described above is only done once for a particular +TCP/IP session while the name lookup is done on every request +during a KeepAlive/persistent connection. In other words a client may +request pages from different name-based vhosts during a single +persistent connection. + + +

        Absolute URI

        + +

        If the URI from the request is an absolute URI, and its hostname and +port match the main server or one of the configured virtual hosts +and match the address and port to which the client sent the request, +then the scheme/hostname/port prefix is stripped off and the remaining +relative URI is served by the corresponding main server or virtual host. +If it does not match, then the URI remains untouched and the request is +taken to be a proxy request. + + +

        Observations

        + +
          + +
        • A name-based vhost can never interfere with an IP-base vhost and + vice versa. IP-based vhosts can only be reached through an IP address + of its own address set and never through any other address. + The same applies to name-based vhosts, they can only be reached + through an IP address of the corresponding address set which must + be defined with a NameVirtualHost directive. +

          + +

        • ServerAlias and ServerPath checks are never + performed for an IP-based vhost. +

          + +

        • The order of name-/IP-based, the _default_ + vhost and the NameVirtualHost directive within the config + file is not important. Only the ordering + of name-based vhosts for a specific address set is significant. The one + name-based vhosts that comes first in the configuration file has + the highest priority for its corresponding address set. +

          + +

        • For security reasons the port number given in a Host: + header is never used during the matching process. Apache always + uses the real port to which the client sent the request. +

          + +

        • If a ServerPath directive exists which is a prefix of + another ServerPath directive that appears later in + the configuration file, then the former will always be matched + and the latter will never be matched. (That is assuming that no + Host header was available to disambiguate the two.) +

          + +

        • If two IP-based vhosts have an address in common, the vhost appearing + first in the config file is always matched. Such a thing might happen + inadvertently. The server will give a warning in the error + logfile when it detects this. +

          + +

        • A _default_ vhost catches a request only if there is no + other vhost with a matching IP address and a matching port + number for the request. The request is only catched if the port number + to which the client sent the request matches the port number of your + _default_ vhost which is your standard Port + by default. A wildcard port can be specified (i.e. + _default_:*) to catch requests to any available port. +

          + +

        • The main_server is only used to serve a request if the IP address + and port number to which the client connected is unspecified + and does not match any other vhost (including a _default_ + vhost). In other words the main_server only catches a request for an + unspecified address/port combination (unless there is a _default_ + vhost which matches that port). +

          + +

        • A _default_ vhost or the main_server is never + matched for a request with an unknown or missing Host: header + if the client connected to an address (and port) which is used + for name-based vhosts, e.g. in a NameVirtualHost directive. +

          + +

        • You should never specify DNS names in VirtualHost + directives because it will force your server to rely on DNS to boot. + Furthermore it poses a security threat if you do not control the + DNS for all the domains listed. + There's more information + available on this and the next two topics. +

          + +

        • ServerName should always be set for each vhost. Otherwise + A DNS lookup is required for each vhost. +

          + +

        + +

        Tips

        + +

        In addition to the tips on the DNS +Issues page, here are some further tips: + +

          + +
        • Place all main_server definitions before any VirtualHost + definitions. (This is to aid the readability of the configuration -- + the post-config merging process makes it non-obvious that definitions + mixed in around virtual hosts might affect all virtual hosts.) +

          + +

        • Group corresponding NameVirtualHost and + VirtualHost definitions in your configuration to ensure + better readability. +

          + +

        • Avoid ServerPaths which are prefixes of other + ServerPaths. If you cannot avoid this then you have to + ensure that the longer (more specific) prefix vhost appears earlier in + the configuration file than the shorter (less specific) prefix + (i.e., "ServerPath /abc" should appear after + "ServerPath /abc/def"). +

          + +

        + + + + diff --git a/docs/manual/vhosts/examples.html b/docs/manual/vhosts/examples.html new file mode 100644 index 00000000000..c537f06dbb0 --- /dev/null +++ b/docs/manual/vhosts/examples.html @@ -0,0 +1,512 @@ + + +VirtualHost Examples + + + + + +

        Virtual Host examples for common setups

        + + +

        Base configuration

        + + + +

        Additional features

        + + + +
        + +

        IP-based vhosts only

        + +
          + +
        • Setup 1: + The server machine has two IP addresses (111.22.33.44 + and 111.22.33.55) + which resolve to the names server.domain.tld and + www.otherdomain.tld respectively. + The hostname www.domain.tld is an alias (CNAME) + for server.domain.tld and will represent the + main server. +

          + Server configuration: + + +

          +    ...
          +    Port 80
          +    DocumentRoot /www/domain
          +    ServerName www.domain.tld
          +
          +    <VirtualHost 111.22.33.55>
          +    DocumentRoot /www/otherdomain
          +    ServerName www.otherdomain.tld
          +    ...
          +    </VirtualHost>
          +    
          + www.otherdomain.tld can only be reached through the + address 111.22.33.55, while www.domain.tld + can only be reached through 111.22.33.44 + (which represents our main server). +
          +

          + +

        • Setup 2: + Same as setup 1, but we don't want to have a dedicated main server. +

          + Server configuration: + +

          +    ...
          +    Port 80
          +    ServerName server.domain.tld
          +    
          +    <VirtualHost 111.22.33.44>
          +    DocumentRoot /www/domain
          +    ServerName www.domain.tld
          +    ...
          +    </VirtualHost>
          +
          +    <VirtualHost 111.22.33.55>
          +    DocumentRoot /www/otherdomain
          +    ServerName www.otherdomain.tld
          +    ...
          +    </VirtualHost>
          +    
          + The main server can never catch a request, because all IP address + of our machine are in use for IP-based virtual hosts + (only localhost requests can hit the main server). +
          +

          + +

        • Setup 3: + The server machine has two IP addresses (111.22.33.44 + and 111.22.33.55) + which resolve to the names server.domain.tld and + www-cache.domain.tld respectively. + The hostname www.domain.tld is an alias (CNAME) + for server.domain.tld and will represent the + main server. + www-cache.domain.tld will become our proxy-cache + listening on port 8080, while the web server itself uses the default + port 80. +

          + Server configuration: + +

          +    ...
          +    Port 80
          +    Listen 111.22.33.44:80
          +    Listen 111.22.33.55:8080
          +    ServerName server.domain.tld
          +    
          +    <VirtualHost 111.22.33.44:80>
          +    DocumentRoot /www/domain
          +    ServerName www.domain.tld
          +    ...
          +    </VirtualHost>
          +
          +    <VirtualHost 111.22.33.55:8080>
          +    ServerName www-cache.domain.tld
          +    ...
          +      <Directory proxy:>
          +      order deny,allow
          +      deny from all
          +      allow from 111.22.33
          +      </Directory>
          +    </VirtualHost>
          +    
          + The main server can never catch a request, because all IP addresss + (apart from localhost) of our machine are in use for IP-based + virtual hosts. The web server can only be reached on the first address + through port 80 and the proxy only on the second address through port 8080. +
          +
        +
        + +

        Name-based vhosts only

        + +
          + +
        • Setup 1: + The server machine has one IP address (111.22.33.44) + which resolves to the name server.domain.tld. + There are two aliases (CNAMEs) www.domain.tld and + www.sub.domain.tld for the address 111.22.33.44. +

          + Server configuration: + +

          +    ...
          +    Port 80
          +    ServerName server.domain.tld
          +
          +    NameVirtualHost 111.22.33.44 
          +
          +    <VirtualHost 111.22.33.44>
          +    DocumentRoot /www/domain
          +    ServerName www.domain.tld
          +    ...
          +    </VirtualHost>
          +    
          +    <VirtualHost 111.22.33.44>
          +    DocumentRoot /www/subdomain
          +    ServerName www.sub.domain.tld
          +    ...
          +    </VirtualHost> 
          +    
          + Apart from localhost there are no unspecified + addresses/ports, therefore the main server only serves + localhost requests. Due to the fact + that www.domain.tld has the highest priority + it can be seen as the default or + primary server. +
          +

          + +

        • Setup 2: + The server machine has two IP addresses (111.22.33.44 + and 111.22.33.55) + which resolve to the names server1.domain.tld and + server2.domain.tld respectively. + The alias www.domain.tld should be used for the + main server which should also catch any unspecified addresses. + We want to use a virtual host for the alias + www.otherdomain.tld and one virtual host should + catch any request to hostnames of the form + *.sub.domain.tld with www.sub.domain.tld + as its server name. The address 111.22.33.55 should be + used for the virtual hosts. +

          + Server configuration: + +

          +    ...
          +    Port 80
          +    ServerName www.domain.tld
          +    DocumentRoot /www/domain
          +
          +    NameVirtualHost 111.22.33.55
          +
          +    <VirtualHost 111.22.33.55>
          +    DocumentRoot /www/otherdomain
          +    ServerName www.otherdomain.tld
          +    ...
          +    </VirtualHost>
          +   
          +    <VirtualHost 111.22.33.55>
          +    DocumentRoot /www/subdomain
          +    ServerName www.sub.domain.tld
          +    ServerAlias *.sub.domain.tld
          +    ...
          +    </VirtualHost> 
          +    
          + Any request to an address other than 111.22.33.55 + will be served from the main server. A request to + 111.22.33.55 with an unknown or no Host: + header will be served from www.otherdomain.tld. +
          +
        + +
        + +

        Mixed name-/IP-based vhosts

        + +
          + +
        • Setup: + The server machine has three IP addresses (111.22.33.44, + 111.22.33.55 and 111.22.33.66) + which resolve to the names server.domain.tld, + www.otherdomain1.tld and www.otherdomain2.tld + respectively. + The address 111.22.33.44 should we used for a couple + of name-based vhosts and the other addresses for IP-based vhosts. +

          + Server configuration: + +

          +    ...
          +    Port 80
          +    ServerName server.domain.tld
          +
          +    NameVirtualHost 111.22.33.44
          +
          +    <VirtualHost 111.22.33.44>
          +    DocumentRoot /www/domain
          +    ServerName www.domain.tld
          +    ...
          +    </VirtualHost>
          +   
          +    <VirtualHost 111.22.33.44>
          +    DocumentRoot /www/subdomain1
          +    ServerName www.sub1.domain.tld
          +    ...
          +    </VirtualHost> 
          +    
          +    <VirtualHost 111.22.33.44>
          +    DocumentRoot /www/subdomain2
          +    ServerName www.sub2.domain.tld
          +    ...
          +    </VirtualHost> 
          + 
          +    <VirtualHost 111.22.33.55>
          +    DocumentRoot /www/otherdomain1
          +    ServerName www.otherdomain1.tld
          +    ...
          +    </VirtualHost> 
          +    
          +    <VirtualHost 111.22.33.66>
          +    DocumentRoot /www/otherdomain2
          +    ServerName www.otherdomain2.tld
          +    ...
          +    </VirtualHost>     
          +    
          + +
        + +
        + +

        Port-based vhosts

        + +
          + +
        • Setup: + The server machine has one IP address (111.22.33.44) + which resolves to the name www.domain.tld. + If we don't have the option to get another address or alias + for our server we can use port-based vhosts if we need + a virtual host with a different configuration. +

          + Server configuration: + +

          +    ...
          +    Listen 80
          +    Listen 8080
          +    ServerName www.domain.tld
          +    DocumentRoot /www/domain
          +
          +    <VirtualHost 111.22.33.44:8080>
          +    DocumentRoot /www/domain2
          +    ...
          +    </VirtualHost>
          +    
          + A request to www.domain.tld on port 80 is served + from the main server and a request to port 8080 is served from + the virtual host. +
          +
        + +
        + +

        Using _default_ vhosts

        + +
          + +
        • Setup 1: + Catching every request to any unspecified IP address and port, + i.e. an address/port combination that is not used for any other + virtual host. +

          + Server configuration: + +

          +    ...
          +    <VirtualHost _default_:*>
          +    DocumentRoot /www/default
          +    ...
          +    </VirtualHost>
          +    
          + Using such a default vhost with a wildcard port effectively + prevents any request going to the main server.
          + A default vhost never serves a request that was sent to an + address/port that is used for name-based vhosts. If the request + contained an unknown or no Host: header it is + always served from the primary name-based vhost (the + vhost for that address/port appearing first in the configuration + file).
          + You can use + AliasMatch + or + RewriteRule + to rewrite any request to a single information page (or script). +
          +

          + +

        • Setup 2: + Same as setup 1, but the server listens on several ports and + we want to use a second _default_ vhost for port 80. +

          + Server configuration: + +

          +    ...
          +    <VirtualHost _default_:80>
          +    DocumentRoot /www/default80
          +    ...
          +    </VirtualHost>
          +    
          +    <VirtualHost _default_:*>
          +    DocumentRoot /www/default
          +    ...
          +    </VirtualHost>    
          +    
          + The default vhost for port 80 (which must appear before + any default vhost with a wildcard port) catches all requests that + were sent to an unspecified IP address. The main server is + never used to serve a request. +
          +

          + +

        • Setup 3: + We want to have a default vhost for port 80, but no other default vhosts. +

          + Server configuration: + +

          +    ...
          +    <VirtualHost _default_:80>
          +    DocumentRoot /www/default
          +    ...
          +    </VirtualHost>
          +    
          + A request to an unspecified address on port 80 is served from the + default vhost any other request to an unspecified address and port + is served from the main server. +
          + +
        + +
        + +

        Migrating a name-based vhost to an IP-based vhost

        + +
          + +
        • Setup: + The name-based vhost with the hostname + www.otherdomain.tld (from our name-based + example, setup 2) should get its own IP address. + To avoid problems with name servers or proxies who cached the old + IP address for the name-based vhost we want to provide both variants + during a migration phase.
          + The solution is easy, because we can simply add the new IP address + (111.22.33.66) to the VirtualHost directive. +

          + Server configuration: + +

          +    ...
          +    Port 80
          +    ServerName www.domain.tld
          +    DocumentRoot /www/domain
          +
          +    NameVirtualHost 111.22.33.55
          +
          +    <VirtualHost 111.22.33.55 111.22.33.66>
          +    DocumentRoot /www/otherdomain
          +    ServerName www.otherdomain.tld
          +    ...
          +    </VirtualHost>
          +   
          +    <VirtualHost 111.22.33.55>
          +    DocumentRoot /www/subdomain
          +    ServerName www.sub.domain.tld
          +    ServerAlias *.sub.domain.tld
          +    ...
          +    </VirtualHost>
          +    
          + The vhost can now be accessed through the new address (as an IP-based + vhost) and through the old address (as a name-based vhost). +
          + +
        + +
        + +

        Using the ServerPath directive

        + +
          + +
        • Setup: + We have a server with two name-based vhosts. In order to match the correct + virtual host a client must send the correct Host: header. + Old HTTP/1.0 clients do not send such a header and Apache has no clue + what vhost the client tried to reach (and serves the request from + the primary vhost). To provide as much backward compatibility + as possible we create a primary vhost which returns a single page + containing links with an URL prefix to the name-based virtual hosts. +

          + Server configuration: + +

          +    ...
          +    NameVirtualHost 111.22.33.44
          +
          +    <VirtualHost 111.22.33.44>
          +    # primary vhost
          +    DocumentRoot /www/subdomain
          +    RewriteEngine On
          +    RewriteRule ^/.* /www/subdomain/index.html
          +    ...
          +    </VirtualHost>
          +
          +    <VirtualHost 111.22.33.44>
          +    DocumentRoot /www/subdomain/sub1
          +    ServerName www.sub1.domain.tld
          +    ServerPath /sub1/
          +    RewriteEngine On
          +    RewriteRule ^(/sub1/.*) /www/subdomain$1 
          +    ...
          +    </VirtualHost>
          +
          +    <VirtualHost 111.22.33.44>
          +    DocumentRoot /www/subdomain/sub2
          +    ServerName www.sub2.domain.tld
          +    ServerPath /sub2/
          +    RewriteEngine On
          +    RewriteRule ^(/sub2/.*) /www/subdomain$1 
          +    ...
          +    </VirtualHost>
          +    
          + Due to the ServerPath + directive a request to the + URL http://www.sub1.domain.tld/sub1/ is always + served from the sub1-vhost.
          + A request to the URL http://www.sub1.domain.tld/ + is only served from the sub1-vhost if the client sent a correct + Host: header. + If no Host: header is sent the client gets the + information page from the primary host.
          + Please note that there is one oddity: A request to + http://www.sub2.domain.tld/sub1/ is also served from + the sub1-vhost if the client sent no Host: header.
          + The RewriteRule directives are used to make sure that + a client which sent a correct Host: header can use + both URL variants, i.e. with or without URL prefix. +
          + +
        + + + + diff --git a/docs/manual/vhosts/fd-limits.html b/docs/manual/vhosts/fd-limits.html new file mode 100644 index 00000000000..77f4d8254d9 --- /dev/null +++ b/docs/manual/vhosts/fd-limits.html @@ -0,0 +1,59 @@ + + + +Apache Server Virtual Host Support + + + + + +

        File Descriptor Limits

        + +

        +When using a large number of Virtual Hosts, Apache may run out of available +file descriptors (sometimes called file handles if each Virtual +Host specifies different log files. +The total number of file descriptors used by Apache is one for each distinct +error log file, one for every other log file directive, plus 10-20 for +internal use. Unix operating systems limit the number of file descriptors that +may be used by a process; the limit is typically 64, and may usually be +increased up to a large hard-limit. +

        +Although Apache attempts to increase the limit as required, this +may not work if: +

          +
        1. Your system does not provide the setrlimit() system call. +
        2. The setrlimit(RLIMIT_NOFILE) call does not function on your system + (such as Solaris 2.3) +
        3. The number of file descriptors required exceeds the hard limit. +
        4. Your system imposes other limits on file descriptors, such as a limit +on stdio streams only using file descriptors below 256. (Solaris 2) +
        + +In the event of problems you can: +
          +
        • Reduce the number of log files; don't specify log files in the VirtualHost +sections, but only log to the main log files. +
        • If you system falls into 1 or 2 (above), then increase the file descriptor +limit before starting Apache, using a script like +
          +#!/bin/sh
          +ulimit -S -n 100
          +exec httpd
          +
        +

        +Please see the +Descriptors and Apache +document containing further details about file descriptor problems and how +they can be solved on your operating system. +

        + + + + diff --git a/docs/manual/vhosts/fd-limits.html.en b/docs/manual/vhosts/fd-limits.html.en new file mode 100644 index 00000000000..77f4d8254d9 --- /dev/null +++ b/docs/manual/vhosts/fd-limits.html.en @@ -0,0 +1,59 @@ + + + +Apache Server Virtual Host Support + + + + + +

        File Descriptor Limits

        + +

        +When using a large number of Virtual Hosts, Apache may run out of available +file descriptors (sometimes called file handles if each Virtual +Host specifies different log files. +The total number of file descriptors used by Apache is one for each distinct +error log file, one for every other log file directive, plus 10-20 for +internal use. Unix operating systems limit the number of file descriptors that +may be used by a process; the limit is typically 64, and may usually be +increased up to a large hard-limit. +

        +Although Apache attempts to increase the limit as required, this +may not work if: +

          +
        1. Your system does not provide the setrlimit() system call. +
        2. The setrlimit(RLIMIT_NOFILE) call does not function on your system + (such as Solaris 2.3) +
        3. The number of file descriptors required exceeds the hard limit. +
        4. Your system imposes other limits on file descriptors, such as a limit +on stdio streams only using file descriptors below 256. (Solaris 2) +
        + +In the event of problems you can: +
          +
        • Reduce the number of log files; don't specify log files in the VirtualHost +sections, but only log to the main log files. +
        • If you system falls into 1 or 2 (above), then increase the file descriptor +limit before starting Apache, using a script like +
          +#!/bin/sh
          +ulimit -S -n 100
          +exec httpd
          +
        +

        +Please see the +Descriptors and Apache +document containing further details about file descriptor problems and how +they can be solved on your operating system. +

        + + + + diff --git a/docs/manual/vhosts/index.html b/docs/manual/vhosts/index.html new file mode 100644 index 00000000000..0b1a22678ee --- /dev/null +++ b/docs/manual/vhosts/index.html @@ -0,0 +1,58 @@ + + + +Apache Virtual Host documentation + + + + + +

        Apache Virtual Host documentation

        + +

        The term Virtual Host refers to the practice of maintaining +more than one server on one machine, as differentiated by their apparent +hostname. For example, it is often desirable for companies sharing a +web server to have their own domains, with web servers accessible as +www.company1.com and www.company2.com, +without requiring the user to know any extra path information.

        + +

        Apache was one of the first servers to support IP-based +virtual hosts right out of the box. Versions 1.1 and later of +Apache support both, IP-based and name-based virtual hosts (vhosts). +The latter variant of virtual hosts is sometimes also called host-based or +non-IP virtual hosts.

        + +

        Below is a list of documentation pages which explain all details +of virtual host support in Apache version 1.3 and later.

        + +
        + +

        Virtual Host Support

        + + + +

        Configuration directives

        + + + + + + diff --git a/docs/manual/vhosts/index.html.en b/docs/manual/vhosts/index.html.en new file mode 100644 index 00000000000..0b1a22678ee --- /dev/null +++ b/docs/manual/vhosts/index.html.en @@ -0,0 +1,58 @@ + + + +Apache Virtual Host documentation + + + + + +

        Apache Virtual Host documentation

        + +

        The term Virtual Host refers to the practice of maintaining +more than one server on one machine, as differentiated by their apparent +hostname. For example, it is often desirable for companies sharing a +web server to have their own domains, with web servers accessible as +www.company1.com and www.company2.com, +without requiring the user to know any extra path information.

        + +

        Apache was one of the first servers to support IP-based +virtual hosts right out of the box. Versions 1.1 and later of +Apache support both, IP-based and name-based virtual hosts (vhosts). +The latter variant of virtual hosts is sometimes also called host-based or +non-IP virtual hosts.

        + +

        Below is a list of documentation pages which explain all details +of virtual host support in Apache version 1.3 and later.

        + +
        + +

        Virtual Host Support

        + + + +

        Configuration directives

        + + + + + + diff --git a/docs/manual/vhosts/ip-based.html b/docs/manual/vhosts/ip-based.html new file mode 100644 index 00000000000..e1cc14ff056 --- /dev/null +++ b/docs/manual/vhosts/ip-based.html @@ -0,0 +1,129 @@ + + + +Apache IP-based Virtual Host Support + + + + + +

        Apache IP-based Virtual Host Support

        + +See also: +Name-based Virtual Hosts Support + +
        + +

        System requirements

        +As the term IP-based indicates, the server must have a +different IP address for each IP-based virtual host. +This can be achieved by the machine having several physical network connections, +or by use of virtual interfaces which are supported by most modern +operating systems (see system documentation for details). + +

        How to set up Apache

        +There are two ways of configuring apache to support multiple hosts. +Either by running a separate httpd daemon for each hostname, or by running a +single daemon which supports all the virtual hosts. +

        +Use multiple daemons when: +

          +
        • The different virtual hosts need very different httpd configurations, such + as different values for: ServerType, + User, + Group, + TypesConfig or + ServerRoot. +
        • The machine does not process a very high request rate. +
        +Use a single daemon when: +
          +
        • Sharing of the httpd configuration between virtual hosts is acceptable. +
        • The machine services a large number of requests, and so the performance + loss in running separate daemons may be significant. +
        + +

        Setting up multiple daemons

        +Create a separate httpd installation for each virtual host. +For each installation, use the +Listen directive in the configuration +file to select which IP address (or virtual host) that daemon services. +e.g. +
        +    Listen www.smallco.com:80
        +
        +It is recommended that you use an IP address instead of a hostname +(see DNS page). + +

        Setting up a single daemon with virtual hosts

        +For this case, a single httpd will service requests for the main server +and all the virtual hosts. +The VirtualHost directive in the + configuration file is used to set the values of +ServerAdmin, +ServerName, +DocumentRoot, +ErrorLog and +TransferLog or +CustomLog +configuration directives to different values for each virtual host. +e.g. +
        +    <VirtualHost www.smallco.com>
        +    ServerAdmin webmaster@mail.smallco.com
        +    DocumentRoot /groups/smallco/www
        +    ServerName www.smallco.com
        +    ErrorLog /groups/smallco/logs/error_log
        +    TransferLog /groups/smallco/logs/access_log
        +    </VirtualHost>
        +
        +    <VirtualHost www.baygroup.org>
        +    ServerAdmin webmaster@mail.baygroup.org
        +    DocumentRoot /groups/baygroup/www
        +    ServerName www.baygroup.org
        +    ErrorLog /groups/baygroup/logs/error_log
        +    TransferLog /groups/baygroup/logs/access_log
        +    </VirtualHost>
        +
        + +It is recommended that you use an IP address instead of a hostname +(see DNS page). + +

        + +Almost any configuration directive can be put +in the VirtualHost directive, with the exception of +ServerType, +StartServers, +MaxSpareServers, +MinSpareServers, +MaxRequestsPerChild, +BindAddress, +Listen, +PidFile, +TypesConfig, +ServerRoot and +NameVirtualHost. +

        +User and +Group maybe used inside a VirtualHost +directive if the suEXEC wrapper is used. +

        + +SECURITY: When specifying where to write log files, be aware +of some security risks which are present if anyone other than the +user that starts Apache has write access to the directory where they +are written. See the security +tips document for details. +

        + + + + + diff --git a/docs/manual/vhosts/mass.html b/docs/manual/vhosts/mass.html new file mode 100644 index 00000000000..4ecd952e2d2 --- /dev/null +++ b/docs/manual/vhosts/mass.html @@ -0,0 +1,330 @@ + + +Dynamically configured mass virtual hosting + + + + + +

        Dynamically configured mass virtual hosting

        + +

        This document describes how to efficiently serve an arbitrary number +of virtual hosts with Apache 1.3. Some familiarity with +mod_rewrite is +useful.

        + + + +

        Contents:

        + + + +

        Motivation

        + +

        The techniques described here are of interest if your +httpd.conf contains hundreds of +<VirtualHost> sections that are substantially the +same, for example: +

        +NameVirtualHost 111.22.33.44
        +<VirtualHost 111.22.33.44>
        +	ServerName		           www.customer-1.com
        +	DocumentRoot		/www/hosts/www.customer-1.com/docs
        +	ScriptAlias  /cgi-bin/  /www/hosts/www.customer-1.com/cgi-bin
        +</VirtualHost>
        +<VirtualHost 111.22.33.44>
        +	ServerName		           www.customer-2.com
        +	DocumentRoot		/www/hosts/www.customer-2.com/docs
        +	ScriptAlias  /cgi-bin/  /www/hosts/www.customer-2.com/cgi-bin
        +</VirtualHost>
        +# blah blah blah
        +<VirtualHost 111.22.33.44>
        +	ServerName		           www.customer-N.com
        +	DocumentRoot		/www/hosts/www.customer-N.com/docs
        +	ScriptAlias  /cgi-bin/  /www/hosts/www.customer-N.com/cgi-bin
        +</VirtualHost>
        +
        +

        + +

        The basic idea is to replace all of the static +<VirtualHost> configuration with a mechanism that +works it out dynamically. This has a number of advantages: +

          +
        1. Your configuration file is smaller so Apache starts faster and + uses less memory. +
        2. Adding virtual hosts is simply a matter of creating the + appropriate directories in the filesystem and entries in the DNS - + you don't need to reconfigure or restart Apache. +
        +

        + +

        The main disadvantage is that you cannot have a different log file +for each server; however if you have very many virtual hosts then +doing this is dubious anyway because it eats file descriptors. It's +better to log to a pipe or a fifo and arrange for the process at the +other end to distribute the logs (and perhaps accumulate statistics, +etc.). A LogFormat directive that includes +%v for the virtual host makes it easy to do this.

        + + +

        Overview of the technique

        + +

        All of the dynamic virtual hosts will either be configured as part +of the main server configuration, or within a +<VirtualHost> section. For a simple (very uniform) +setup, <VirtualHost> sections aren't needed at all.

        + +

        A couple of things need to be `faked' to make the dynamic virtual +host look like a normal one. The most important is the server name +(configured with ServerName and available to CGIs via the +SERVER_NAME environment variable). The way it is +determined is controlled by the UseCanonicalName +directive: with UseCanonicalName off the server name +comes from the contents of the Host: header in the +request. If there is no Host: header then the value +configured with ServerName is used instead.

        + +

        The other one is the document root (configured with +DocumentRoot and available to CGIs via the +DOCUMENT_ROOT environment variable). This is used by the +core module when mapping URIs to filenames, but in the context of +dynamic virtual hosting its value only matters if any CGIs or SSI +documents make use of the DOCUMENT_ROOT environment +variable. This is an Apache extension to the CGI specification and as +such shouldn't really be relied upon, especially because this +technique breaks it: there isn't currently a way of setting +DOCUMENT_ROOT dynamically.

        + +

        The meat of the mechanism works via Apache's URI-to-filename +translation API phase. This is used by a number of modules: +mod_rewrite, +mod_alias, +mod_userdir, +and the core module. +In the default configuration these modules are called in that order +and given a chance to say that they know what the filename is. Most of +these modules do it in a fairly simple fashion (e.g. the core module +concatenates the document root and the URI) except for +mod_rewrite, which provides enough functionality to do +all sorts of sick and twisted things (like dynamic virtual hosting). +Note that because of the order in which the modules are called, using +a mod_rewrite configuration that matches any URI means +that the other modules (particularly mod_alias) will +cease to function. The examples below show how to deal with this.

        + +

        The dynamic virtual hosting idea is very simple: use the +server name as well as the URI to determine the corresponding +filename.

        + + +

        Simple name-based dynamic virtual hosts

        + +

        This extract from httpd.conf implements the virtual +host arrangement outlined in the Motivation +section above, but in a generic fashion.

        + +

        The first half shows some other configuration options that are +needed to make the mod_rewrite part work as expected; the +second half uses mod_rewrite to do the actual work. Some +care is taken to do a per-dynamic-virtual-host equivalent of +ScriptAlias.

        + +
        +# dynamic ServerName
        +UseCanonicalName Off
        +
        +# splittable logs
        +LogFormat "%v %h %l %u %t \"%r\" %s %b" vcommon
        +CustomLog logs/access_log vcommon
        +
        +<Directory /www/hosts>
        +	# ExecCGI is needed here because we can't force
        +	# CGI execution in the way that ScriptAlias does
        +	Options FollowSymLinks ExecCGI
        +</Directory>
        +
        +# now for the hard bit
        +
        +RewriteEngine On
        +
        +# a ServerName derived from a Host: header may be any case at all
        +RewriteMap  lowercase  int:tolower
        +
        +## deal with normal documents first:
        +# allow Alias /icons/ to work - repeat for other aliases
        +RewriteCond  %{REQUEST_URI}  !^/icons/
        +# allow CGIs to work
        +RewriteCond  %{REQUEST_URI}  !^/cgi-bin/
        +# do the magic
        +RewriteRule  ^/(.*)$  /www/hosts/${lowercase:%{SERVER_NAME}}/docs/$1
        +
        +## and now deal with CGIs - we have to force a MIME type
        +RewriteCond  %{REQUEST_URI}  ^/cgi-bin/
        +RewriteRule  ^/(.*)$  /www/hosts/${lowercase:%{SERVER_NAME}}/cgi-bin/$1  [T=application/x-httpd-cgi]
        +
        +# that's it!
        +
        + + +

        A virtually hosted homepages system

        + +

        This is an adjustment of the above system tailored for an ISP's +homepages server. Using slightly more complicated rewriting rules we +can select substrings of the server name to use in the filename so +that e.g. the documents for www.user.isp.com are found in +/home/user/. It uses a single cgi-bin +directory instead of one per virtual host.

        + +
        +RewriteEngine on
        +
        +RewriteMap   lowercase  int:tolower
        +
        +# allow CGIs to work
        +RewriteCond  %{REQUEST_URI}  !^/cgi-bin/
        +
        +# check the hostname is right so that the RewriteRule works
        +RewriteCond  ${lowercase:%{HTTP_HOST}}  ^www\.[a-z-]+\.isp\.com$
        +
        +# concatenate the virtual host name onto the start of the URI
        +# the [C] means do the next rewrite on the result of this one
        +RewriteRule  ^(.+)  ${lowercase:%{HTTP_HOST}}$1  [C]
        +
        +# now create the real file name
        +RewriteRule  ^www\.([a-z-]+)\.isp\.com/(.*) /home/$1/$2
        +
        +# define the global CGI directory
        +ScriptAlias  /cgi-bin/  /www/std-cgi/
        +
        + + +

        Using a separate virtual host configuration file

        + +

        This arrangement uses a separate configuration file to specify the +translation from virtual host to document root. This provides more +flexibility but requires more configuration.

        + +

        The vhost.map file contains something like this: +

        +www.customer-1.com  /www/customers/1
        +www.customer-2.com  /www/customers/2
        +# ...
        +www.customer-N.com  /www/customers/N
        +
        +

        + +

        The http.conf contains this: +

        +RewriteEngine on
        +
        +RewriteMap   lowercase  int:tolower
        +
        +# define the map file
        +RewriteMap   vhost      txt:/www/conf/vhost.map
        +
        +# deal with aliases as above
        +RewriteCond  %{REQUEST_URI}               !^/icons/
        +RewriteCond  %{REQUEST_URI}               !^/cgi-bin/
        +RewriteCond  ${lowercase:%{SERVER_NAME}}  ^(.+)$
        +# this does the file-based remap
        +RewriteCond  ${vhost:%1}                  ^(/.*)$
        +RewriteRule  ^/(.*)$                      %1/docs/$1
        +
        +RewriteCond  %{REQUEST_URI}               ^/cgi-bin/
        +RewriteCond  ${lowercase:%{SERVER_NAME}}  ^(.+)$
        +RewriteCond  ${vhost:%1}                  ^(/.*)$
        +RewriteRule  ^/(.*)$                      %1/cgi-bin/$1
        +
        +

        + + +

        Using more than one virtual hosting system on the same server

        + +

        With more complicated setups, you can use Apache's normal +<VirtualHost> directives to control the scope of +the various rewrite configurations. For example, you could have one IP +address for homepages customers and another for commercial customers +with the following setup. This can of course be combined with +convential <VirtualHost> configuration +sections.

        + +
        +UseCanonicalName Off
        +
        +LogFormat "%v %h %l %u %t \"%r\" %s %b" vcommon
        +CustomLog logs/access_log vcommon
        +
        +<Directory /www/commercial>
        +	Options FollowSymLinks ExecCGI
        +	AllowOverride All
        +</Directory>
        +
        +<Directory /www/homepages>
        +	Options FollowSymLinks
        +	AllowOverride None
        +</Directory>
        +
        +<VirtualHost 111.22.33.44>
        +	ServerName www.commercial.isp.com
        +
        +	RewriteEngine On
        +	RewriteMap    lowercase  int:tolower
        +
        +	RewriteCond   %{REQUEST_URI}  !^/icons/
        +	RewriteCond   %{REQUEST_URI}  !^/cgi-bin/
        +	RewriteRule   ^/(.*)$  /www/commercial/${lowercase:%{SERVER_NAME}}/docs/$1
        +
        +	RewriteCond   %{REQUEST_URI}  ^/cgi-bin/
        +	RewriteRule   ^/(.*)$  /www/commercial/${lowercase:%{SERVER_NAME}}/cgi-bin/$1  [T=application/x-httpd-cgi]
        +</VirtualHost>
        +
        +<VirtualHost 111.22.33.45>
        +	ServerName www.homepages.isp.com
        +
        +	RewriteEngine on
        +	RewriteMap    lowercase  int:tolower
        +
        +	RewriteCond   %{REQUEST_URI}  !^/cgi-bin/
        +
        +	RewriteCond   ${lowercase:%{HTTP_HOST}}  ^www\.[a-z-]+\.isp\.com$
        +	RewriteRule   ^(.+)  ${lowercase:%{HTTP_HOST}}$1  [C]
        +	RewriteRule   ^www\.([a-z-]+)\.isp\.com/(.*) /www/homepages/$1/$2
        +
        +	ScriptAlias   /cgi-bin/ /www/std-cgi/
        +</VirtualHost>
        +
        + + +
        + +

        + Apache HTTP Server Version 1.3 +

        + +Index +Home + + + diff --git a/docs/manual/vhosts/name-based.html b/docs/manual/vhosts/name-based.html new file mode 100644 index 00000000000..f26dd5f8ed7 --- /dev/null +++ b/docs/manual/vhosts/name-based.html @@ -0,0 +1,141 @@ + + +Apache name-based Virtual Hosts + + + + + +

        Apache name-based Virtual Host Support

        + +See Also: +IP-based Virtual Host Support + +
        + +

        Name-based vs. IP-based virtual hosts

        + +

        While the approach with IP-based virtual hosts works still very well, +it is not the most elegant solution, because a dedicated IP address +is needed for every virtual host and it is hard to implement on some +machines. The HTTP/1.1 protocol contains a method for the +server to identify what name it is being addressed as. Apache 1.1 and +later support this approach as well as the traditional +IP-address-per-hostname method.

        + +

        The benefits of using the new name-based virtual host support is a +practically unlimited number of servers, ease of configuration and use, and +requires no additional hardware or software. +The main disadvantage is that the client must support this part of the +protocol. The latest versions of most browsers do, but there are still +old browsers in use who do not. This can cause problems, although a possible +solution is addressed below.

        + +

        Using non-IP Virtual Hosts

        + +

        Using the new virtual hosts is quite easy, and superficially looks +like the old method. You simply add to one of the Apache configuration +files (most likely httpd.conf or srm.conf) +code similar to the following:

        +
        +    NameVirtualHost 111.22.33.44
        +
        +    <VirtualHost 111.22.33.44>
        +    ServerName www.domain.tld
        +    DocumentRoot /web/domain
        +    </VirtualHost>
        +
        + +

        The notable difference between IP-based and name-based virtual host +configuration is the +NameVirtualHost +directive which specifies any IP address that should be used as a target for +name-based virtual hosts. + +

        Of course, any additional directives can (and should) be placed +into the <VirtualHost> section. To make this work, +all that is needed is to make sure that the name +www.domain.tld is an alias (CNAME) pointing to the IP address +111.22.33.44

        + +

        Additionally, many servers may wish to be accessible by more than +one name. For example, the example server might want to be accessible +as domain.tld, or www2.domain.tld, assuming +the IP addresses pointed to the same server. In fact, one might want it +so that all addresses at domain.tld were picked up by the +server. This is possible with the +ServerAlias +directive, placed inside the <VirtualHost> section. For +example:

        + +
        +    ServerAlias domain.tld *.domain.tld
        +
        + +

        Note that you can use * and ? as wild-card +characters.

        + +

        You also might need ServerAlias if you are +serving local users who do not always include the domain name. +For example, if local users are +familiar with typing "www" or "www.foobar" then you will need to add +ServerAlias www www.foobar. It isn't possible for the +server to know what domain the client uses for their name resolution +because the client doesn't provide that information in the request.

        + +

        Compatibility with Older Browsers

        + +

        As mentioned earlier, there are still some clients in use who +do not send the required data for the name-based virtual hosts to work +properly. These clients will always be sent the pages from the +primary name-based virtual host (the first virtual host +appearing in the configuration file for a specific IP address).

        + +

        There is a possible workaround with the +ServerPath +directive, albeit a slightly cumbersome one:

        + +

        Example configuration: + +

        +    NameVirtualHost 111.22.33.44
        +
        +    <VirtualHost 111.22.33.44>
        +    ServerName www.domain.tld
        +    ServerPath /domain
        +    DocumentRoot /web/domain
        +    </VirtualHost>
        +
        + +

        What does this mean? It means that a request for any URI beginning +with "/domain" will be served from the virtual host +www.domain.tld This means that the pages can be accessed as +http://www.domain.tld/domain/ for all clients, although +clients sending a Host: header can also access it as +http://www.domain.tld/.

        + +

        In order to make this work, put a link on your primary virtual host's page +to http://www.domain.tld/domain/ +Then, in the virtual host's pages, be sure to use either purely +relative links (e.g. "file.html" or +"../icons/image.gif" or links containing the prefacing +/domain/ +(e.g. "http://www.domain.tld/domain/misc/file.html" or +"/domain/misc/file.html").

        + +

        This requires a bit of +discipline, but adherence to these guidelines will, for the most part, +ensure that your pages will work with all browsers, new and old.

        + +

        See also: ServerPath configuration +example

        + + + + diff --git a/docs/manual/vhosts/name-based.html.en b/docs/manual/vhosts/name-based.html.en new file mode 100644 index 00000000000..f26dd5f8ed7 --- /dev/null +++ b/docs/manual/vhosts/name-based.html.en @@ -0,0 +1,141 @@ + + +Apache name-based Virtual Hosts + + + + + +

        Apache name-based Virtual Host Support

        + +See Also: +IP-based Virtual Host Support + +
        + +

        Name-based vs. IP-based virtual hosts

        + +

        While the approach with IP-based virtual hosts works still very well, +it is not the most elegant solution, because a dedicated IP address +is needed for every virtual host and it is hard to implement on some +machines. The HTTP/1.1 protocol contains a method for the +server to identify what name it is being addressed as. Apache 1.1 and +later support this approach as well as the traditional +IP-address-per-hostname method.

        + +

        The benefits of using the new name-based virtual host support is a +practically unlimited number of servers, ease of configuration and use, and +requires no additional hardware or software. +The main disadvantage is that the client must support this part of the +protocol. The latest versions of most browsers do, but there are still +old browsers in use who do not. This can cause problems, although a possible +solution is addressed below.

        + +

        Using non-IP Virtual Hosts

        + +

        Using the new virtual hosts is quite easy, and superficially looks +like the old method. You simply add to one of the Apache configuration +files (most likely httpd.conf or srm.conf) +code similar to the following:

        +
        +    NameVirtualHost 111.22.33.44
        +
        +    <VirtualHost 111.22.33.44>
        +    ServerName www.domain.tld
        +    DocumentRoot /web/domain
        +    </VirtualHost>
        +
        + +

        The notable difference between IP-based and name-based virtual host +configuration is the +NameVirtualHost +directive which specifies any IP address that should be used as a target for +name-based virtual hosts. + +

        Of course, any additional directives can (and should) be placed +into the <VirtualHost> section. To make this work, +all that is needed is to make sure that the name +www.domain.tld is an alias (CNAME) pointing to the IP address +111.22.33.44

        + +

        Additionally, many servers may wish to be accessible by more than +one name. For example, the example server might want to be accessible +as domain.tld, or www2.domain.tld, assuming +the IP addresses pointed to the same server. In fact, one might want it +so that all addresses at domain.tld were picked up by the +server. This is possible with the +ServerAlias +directive, placed inside the <VirtualHost> section. For +example:

        + +
        +    ServerAlias domain.tld *.domain.tld
        +
        + +

        Note that you can use * and ? as wild-card +characters.

        + +

        You also might need ServerAlias if you are +serving local users who do not always include the domain name. +For example, if local users are +familiar with typing "www" or "www.foobar" then you will need to add +ServerAlias www www.foobar. It isn't possible for the +server to know what domain the client uses for their name resolution +because the client doesn't provide that information in the request.

        + +

        Compatibility with Older Browsers

        + +

        As mentioned earlier, there are still some clients in use who +do not send the required data for the name-based virtual hosts to work +properly. These clients will always be sent the pages from the +primary name-based virtual host (the first virtual host +appearing in the configuration file for a specific IP address).

        + +

        There is a possible workaround with the +ServerPath +directive, albeit a slightly cumbersome one:

        + +

        Example configuration: + +

        +    NameVirtualHost 111.22.33.44
        +
        +    <VirtualHost 111.22.33.44>
        +    ServerName www.domain.tld
        +    ServerPath /domain
        +    DocumentRoot /web/domain
        +    </VirtualHost>
        +
        + +

        What does this mean? It means that a request for any URI beginning +with "/domain" will be served from the virtual host +www.domain.tld This means that the pages can be accessed as +http://www.domain.tld/domain/ for all clients, although +clients sending a Host: header can also access it as +http://www.domain.tld/.

        + +

        In order to make this work, put a link on your primary virtual host's page +to http://www.domain.tld/domain/ +Then, in the virtual host's pages, be sure to use either purely +relative links (e.g. "file.html" or +"../icons/image.gif" or links containing the prefacing +/domain/ +(e.g. "http://www.domain.tld/domain/misc/file.html" or +"/domain/misc/file.html").

        + +

        This requires a bit of +discipline, but adherence to these guidelines will, for the most part, +ensure that your pages will work with all browsers, new and old.

        + +

        See also: ServerPath configuration +example

        + + + + diff --git a/include/ap_mpm.h b/include/ap_mpm.h new file mode 100644 index 00000000000..e025bbc075e --- /dev/null +++ b/include/ap_mpm.h @@ -0,0 +1,109 @@ +/* ==================================================================== + * Copyright (c) 1995-1999 The Apache Group. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the Apache Group + * for use in the Apache HTTP server project (http://www.apache.org/)." + * + * 4. The names "Apache Server" and "Apache Group" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache" + * nor may "Apache" appear in their names without prior written + * permission of the Apache Group. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the Apache Group + * for use in the Apache HTTP server project (http://www.apache.org/)." + * + * THIS SOFTWARE IS PROVIDED BY THE APACHE GROUP ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE APACHE GROUP OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Group and was originally based + * on public domain software written at the National Center for + * Supercomputing Applications, University of Illinois, Urbana-Champaign. + * For more information on the Apache Group and the Apache HTTP server + * project, please see . + * + */ + +#ifndef AP_MMN_H +#define AP_MMN_H + +/* run until a restart/shutdown is indicated, return 1 for shutdown + 0 otherwise */ +API_EXPORT(int) ap_mpm_run(pool *pconf, pool *plog, server_rec *server_conf); + +/* predicate indicating if a graceful stop has been requested ... + used by the connection loop */ +API_EXPORT(int) ap_mpm_graceful_stop(void); + +#ifdef HAS_OTHER_CHILD +/* + * register an other_child -- a child which the main loop keeps track of + * and knows it is different than the rest of the scoreboard. + * + * pid is the pid of the child. + * + * maintenance is a function that is invoked with a reason, the data + * pointer passed here, and when appropriate a status result from waitpid(). + * + * write_fd is an fd that is probed for writing by select() if it is ever + * unwritable, then maintenance is invoked with reason OC_REASON_UNWRITABLE. + * This is useful for log pipe children, to know when they've blocked. To + * disable this feature, use -1 for write_fd. + */ +API_EXPORT(void) ap_register_other_child(int pid, + void (*maintenance) (int reason, void *data, ap_wait_t status), void *data, + int write_fd); +#define OC_REASON_DEATH 0 /* child has died, caller must call + * unregister still */ +#define OC_REASON_UNWRITABLE 1 /* write_fd is unwritable */ +#define OC_REASON_RESTART 2 /* a restart is occuring, perform + * any necessary cleanup (including + * sending a special signal to child) + */ +#define OC_REASON_UNREGISTER 3 /* unregister has been called, do + * whatever is necessary (including + * kill the child) */ +#define OC_REASON_LOST 4 /* somehow the child exited without + * us knowing ... buggy os? */ + +/* + * unregister an other_child. Note that the data pointer is used here, and + * is assumed to be unique per other_child. This is because the pid and + * write_fd are possibly killed off separately. + */ +API_EXPORT(void) ap_unregister_other_child(void *data); + +#endif + +#endif diff --git a/include/http_connection.h b/include/http_connection.h new file mode 100644 index 00000000000..cbe8e74f4e7 --- /dev/null +++ b/include/http_connection.h @@ -0,0 +1,71 @@ +/* ==================================================================== + * Copyright (c) 1995-1999 The Apache Group. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the Apache Group + * for use in the Apache HTTP server project (http://www.apache.org/)." + * + * 4. The names "Apache Server" and "Apache Group" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache" + * nor may "Apache" appear in their names without prior written + * permission of the Apache Group. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the Apache Group + * for use in the Apache HTTP server project (http://www.apache.org/)." + * + * THIS SOFTWARE IS PROVIDED BY THE APACHE GROUP ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE APACHE GROUP OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Group and was originally based + * on public domain software written at the National Center for + * Supercomputing Applications, University of Illinois, Urbana-Champaign. + * For more information on the Apache Group and the Apache HTTP server + * project, please see . + * + */ + +#ifndef APACHE_HTTP_CONNECTION_H +#define APACHE_HTTP_CONNECTION_H + +#ifdef __cplusplus +extern "C" { +#endif + +CORE_EXPORT(void) ap_process_connection(conn_rec *); + +#ifdef __cplusplus +} +#endif + +#endif /* !APACHE_HTTP_REQUEST_H */ diff --git a/modules/filters/mod_include.exp b/modules/filters/mod_include.exp new file mode 100644 index 00000000000..335da742da4 --- /dev/null +++ b/modules/filters/mod_include.exp @@ -0,0 +1 @@ +includes_module diff --git a/modules/generators/mod_asis.exp b/modules/generators/mod_asis.exp new file mode 100644 index 00000000000..4f347d921e0 --- /dev/null +++ b/modules/generators/mod_asis.exp @@ -0,0 +1 @@ +asis_module diff --git a/modules/generators/mod_autoindex.exp b/modules/generators/mod_autoindex.exp new file mode 100644 index 00000000000..90f4057e9c2 --- /dev/null +++ b/modules/generators/mod_autoindex.exp @@ -0,0 +1 @@ +autoindex_module diff --git a/modules/generators/mod_cgi.exp b/modules/generators/mod_cgi.exp new file mode 100644 index 00000000000..96ea0c23480 --- /dev/null +++ b/modules/generators/mod_cgi.exp @@ -0,0 +1 @@ +cgi_module diff --git a/modules/generators/mod_info.exp b/modules/generators/mod_info.exp new file mode 100644 index 00000000000..c304fa776db --- /dev/null +++ b/modules/generators/mod_info.exp @@ -0,0 +1 @@ +info_module diff --git a/modules/generators/mod_status.exp b/modules/generators/mod_status.exp new file mode 100644 index 00000000000..5438093686c --- /dev/null +++ b/modules/generators/mod_status.exp @@ -0,0 +1 @@ +status_module diff --git a/modules/http/mod_mime.exp b/modules/http/mod_mime.exp new file mode 100644 index 00000000000..f2e38dbddad --- /dev/null +++ b/modules/http/mod_mime.exp @@ -0,0 +1 @@ +mime_module diff --git a/modules/loggers/mod_log_config.exp b/modules/loggers/mod_log_config.exp new file mode 100644 index 00000000000..01b926f4bb0 --- /dev/null +++ b/modules/loggers/mod_log_config.exp @@ -0,0 +1 @@ +config_log_module diff --git a/modules/mappers/mod_actions.exp b/modules/mappers/mod_actions.exp new file mode 100644 index 00000000000..815dff29a8c --- /dev/null +++ b/modules/mappers/mod_actions.exp @@ -0,0 +1 @@ +action_module diff --git a/modules/mappers/mod_alias.exp b/modules/mappers/mod_alias.exp new file mode 100644 index 00000000000..ac386ec3faa --- /dev/null +++ b/modules/mappers/mod_alias.exp @@ -0,0 +1 @@ +alias_module diff --git a/modules/mappers/mod_dir.exp b/modules/mappers/mod_dir.exp new file mode 100644 index 00000000000..5fbf7729919 --- /dev/null +++ b/modules/mappers/mod_dir.exp @@ -0,0 +1 @@ +dir_module diff --git a/modules/mappers/mod_imap.exp b/modules/mappers/mod_imap.exp new file mode 100644 index 00000000000..1e0e0b83d09 --- /dev/null +++ b/modules/mappers/mod_imap.exp @@ -0,0 +1 @@ +imap_module diff --git a/modules/mappers/mod_negotiation.exp b/modules/mappers/mod_negotiation.exp new file mode 100644 index 00000000000..a7c18da1de8 --- /dev/null +++ b/modules/mappers/mod_negotiation.exp @@ -0,0 +1 @@ +negotiation_module diff --git a/modules/mappers/mod_rewrite.exp b/modules/mappers/mod_rewrite.exp new file mode 100644 index 00000000000..8f2165bfe05 --- /dev/null +++ b/modules/mappers/mod_rewrite.exp @@ -0,0 +1 @@ +rewrite_module diff --git a/modules/mappers/mod_speling.exp b/modules/mappers/mod_speling.exp new file mode 100644 index 00000000000..a6ee8b50340 --- /dev/null +++ b/modules/mappers/mod_speling.exp @@ -0,0 +1 @@ +speling_module diff --git a/modules/mappers/mod_userdir.exp b/modules/mappers/mod_userdir.exp new file mode 100644 index 00000000000..6b8b81d5c33 --- /dev/null +++ b/modules/mappers/mod_userdir.exp @@ -0,0 +1 @@ +userdir_module diff --git a/modules/metadata/mod_cern_meta.exp b/modules/metadata/mod_cern_meta.exp new file mode 100644 index 00000000000..d36e2be6a85 --- /dev/null +++ b/modules/metadata/mod_cern_meta.exp @@ -0,0 +1 @@ +cern_meta_module diff --git a/modules/metadata/mod_env.exp b/modules/metadata/mod_env.exp new file mode 100644 index 00000000000..b487bf09c8f --- /dev/null +++ b/modules/metadata/mod_env.exp @@ -0,0 +1 @@ +env_module diff --git a/modules/metadata/mod_expires.exp b/modules/metadata/mod_expires.exp new file mode 100644 index 00000000000..863a96878e1 --- /dev/null +++ b/modules/metadata/mod_expires.exp @@ -0,0 +1 @@ +expires_module diff --git a/modules/metadata/mod_headers.exp b/modules/metadata/mod_headers.exp new file mode 100644 index 00000000000..3f3063808ab --- /dev/null +++ b/modules/metadata/mod_headers.exp @@ -0,0 +1 @@ +headers_module diff --git a/modules/metadata/mod_mime_magic.exp b/modules/metadata/mod_mime_magic.exp new file mode 100644 index 00000000000..42068a43427 --- /dev/null +++ b/modules/metadata/mod_mime_magic.exp @@ -0,0 +1 @@ +mime_magic_module diff --git a/modules/metadata/mod_setenvif.exp b/modules/metadata/mod_setenvif.exp new file mode 100644 index 00000000000..4f3800e3a8a --- /dev/null +++ b/modules/metadata/mod_setenvif.exp @@ -0,0 +1 @@ +setenvif_module diff --git a/modules/metadata/mod_unique_id.exp b/modules/metadata/mod_unique_id.exp new file mode 100644 index 00000000000..93000f1ee61 --- /dev/null +++ b/modules/metadata/mod_unique_id.exp @@ -0,0 +1 @@ +unique_id_module diff --git a/modules/metadata/mod_usertrack.exp b/modules/metadata/mod_usertrack.exp new file mode 100644 index 00000000000..234a5f759dc --- /dev/null +++ b/modules/metadata/mod_usertrack.exp @@ -0,0 +1 @@ +usertrack_module diff --git a/modules/proxy/libproxy.exp b/modules/proxy/libproxy.exp new file mode 100644 index 00000000000..a20f2378f59 --- /dev/null +++ b/modules/proxy/libproxy.exp @@ -0,0 +1 @@ +proxy_module diff --git a/server/connection.c b/server/connection.c new file mode 100644 index 00000000000..16ee8d494f6 --- /dev/null +++ b/server/connection.c @@ -0,0 +1,236 @@ +/* ==================================================================== + * Copyright (c) 1995-1999 The Apache Group. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the Apache Group + * for use in the Apache HTTP server project (http://www.apache.org/)." + * + * 4. The names "Apache Server" and "Apache Group" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache" + * nor may "Apache" appear in their names without prior written + * permission of the Apache Group. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the Apache Group + * for use in the Apache HTTP server project (http://www.apache.org/)." + * + * THIS SOFTWARE IS PROVIDED BY THE APACHE GROUP ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE APACHE GROUP OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Group and was originally based + * on public domain software written at the National Center for + * Supercomputing Applications, University of Illinois, Urbana-Champaign. + * For more information on the Apache Group and the Apache HTTP server + * project, please see . + * + */ + +#define CORE_PRIVATE +#include "httpd.h" +#include "http_connection.h" +#include "http_request.h" +#include "http_protocol.h" +#include "ap_mpm.h" +#include "http_config.h" +#include "http_vhost.h" + +#include + +/* + * More machine-dependent networking gooo... on some systems, + * you've got to be *really* sure that all the packets are acknowledged + * before closing the connection, since the client will not be able + * to see the last response if their TCP buffer is flushed by a RST + * packet from us, which is what the server's TCP stack will send + * if it receives any request data after closing the connection. + * + * In an ideal world, this function would be accomplished by simply + * setting the socket option SO_LINGER and handling it within the + * server's TCP stack while the process continues on to the next request. + * Unfortunately, it seems that most (if not all) operating systems + * block the server process on close() when SO_LINGER is used. + * For those that don't, see USE_SO_LINGER below. For the rest, + * we have created a home-brew lingering_close. + * + * Many operating systems tend to block, puke, or otherwise mishandle + * calls to shutdown only half of the connection. You should define + * NO_LINGCLOSE in ap_config.h if such is the case for your system. + */ +#ifndef MAX_SECS_TO_LINGER +#define MAX_SECS_TO_LINGER 30 +#endif + +#ifdef USE_SO_LINGER +#define NO_LINGCLOSE /* The two lingering options are exclusive */ + +static void sock_enable_linger(int s) // ZZZZZ abstract the socket, s +{ + struct linger li; // ZZZZZ SocketOptions... + + li.l_onoff = 1; + li.l_linger = MAX_SECS_TO_LINGER; + + if (setsockopt(s, SOL_SOCKET, SO_LINGER, // ZZZZZ abstract, return SUCCESS or not + (char *) &li, sizeof(struct linger)) < 0) { + ap_log_error(APLOG_MARK, APLOG_WARNING, server_conf, + "setsockopt: (SO_LINGER)"); + /* not a fatal error */ + } +} + +#else +#define sock_enable_linger(s) /* NOOP */ +#endif /* USE_SO_LINGER */ + +#ifndef NO_LINGCLOSE + +/* Since many clients will abort a connection instead of closing it, + * attempting to log an error message from this routine will only + * confuse the webmaster. There doesn't seem to be any portable way to + * distinguish between a dropped connection and something that might be + * worth logging. + */ +/*ZZZ this routine needs to be adapted for use with poll()*/ +static void lingering_close(request_rec *r) +{ + /*ZZZ remove the hardwired 512. This is an IO Buffer Size */ + char dummybuf[512]; + struct pollfd pd; + int lsd; + int max_wait; + + /* Prevent a slow-drip client from holding us here indefinitely */ + + max_wait = 30; + ap_bsetopt(r->connection->client, BO_TIMEOUT, &max_wait); + + /* Send any leftover data to the client, but never try to again */ + + if (ap_bflush(r->connection->client) == -1) { + ap_bclose(r->connection->client); + return; + } + ap_bsetflag(r->connection->client, B_EOUT, 1); + + /* Close our half of the connection --- send the client a FIN */ + + lsd = r->connection->client->fd; + + if ((shutdown(lsd, 1) != 0) /* ZZZ abstract shutdown */ + || ap_is_aborted(r->connection)) { + ap_bclose(r->connection->client); + return; + } + + /* Set up to wait for readable data on socket... */ + pd.fd = lsd; + pd.events = POLLIN; + + /* Wait for readable data or error condition on socket; + * slurp up any data that arrives... We exit when we go for an + * interval of tv length without getting any more data, get an error + * from poll(), get an error or EOF on a read, or the timer expires. + */ + /* We use a 2 second timeout because current (Feb 97) browsers + * fail to close a connection after the server closes it. Thus, + * to avoid keeping the child busy, we are only lingering long enough + * for a client that is actively sending data on a connection. + * This should be sufficient unless the connection is massively + * losing packets, in which case we might have missed the RST anyway. + * These parameters are reset on each pass, since they might be + * changed by poll. + */ + do { + pd.revents = 0; + } while ((poll(&pd, 1, 2) == 1) + && read(lsd, dummybuf, sizeof(dummybuf))); + /* && (time() = epoch) < max_wait); */ /* ZZZZ time function is not good... */ + + /* Should now have seen final ack. Safe to finally kill socket */ + ap_bclose(r->connection->client); +} +#endif /* ndef NO_LINGCLOSE */ + + +CORE_EXPORT(void) ap_process_connection(conn_rec *c) +{ + request_rec *r; + + ap_update_vhost_given_ip(c); + + /* + * Read and process each request found on our connection + * until no requests are left or we decide to close. + */ + + while ((r = ap_read_request(c)) != NULL) { + + /* process the request if it was read without error */ + + if (r->status == HTTP_OK) + ap_process_request(r); + + if (!c->keepalive || c->aborted) + break; + + ap_destroy_pool(r->pool); + + if (ap_mpm_graceful_stop()) { + /* XXX: hey wait, this should do a lingering_close! */ + ap_bclose(c->client); + return; + } + } + + /* + * Close the connection, being careful to send out whatever is still + * in our buffers. If possible, try to avoid a hard close until the + * client has ACKed our FIN and/or has stopped sending us data. + */ + +#ifdef NO_LINGCLOSE + ap_bclose(c->client); /* just close it */ +#else + if (r && r->connection + && !r->connection->aborted + && r->connection->client + && (r->connection->client->fd >= 0)) { + + lingering_close(r); + } + else { + ap_bsetflag(c->client, B_EOUT, 1); + ap_bclose(c->client); + } +#endif +}