From 05df66223c914d6d272b4195c314562ac114a92a Mon Sep 17 00:00:00 2001 From: "(no author)" <(no author)@unknown> Date: Fri, 9 Feb 2001 15:05:28 +0000 Subject: [PATCH] This commit was manufactured by cvs2svn to create tag 'APACHE_2_0_2001_02_09'. git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/tags/APACHE_2_0_2001_02_09@88036 13f79535-47bb-0310-9956-ffa450edef68 --- docs/manual/bind.html.en | 78 - docs/manual/cgi_path.html.en | 93 - docs/manual/configuring.html.en | 248 --- docs/manual/content-negotiation.html.en | 590 ------ docs/manual/custom-error.html.en | 177 -- docs/manual/developer/API.html | 1161 ----------- docs/manual/developer/modules.html.en | 203 -- docs/manual/dso.html.en | 394 ---- docs/manual/handler.html.en | 156 -- docs/manual/howto/ssi.html.en | 519 ----- docs/manual/index.html.en | 174 -- docs/manual/install.html.en | 199 -- docs/manual/invoking.html.en | 118 -- docs/manual/mod/directive-dict.html.en | 283 --- docs/manual/mod/module-dict.html.en | 144 -- docs/manual/mpm.html.en | 90 - docs/manual/new_features_2_0.html.en | 94 - docs/manual/sections.html.en | 163 -- docs/manual/server-wide.html.en | 113 - docs/manual/stopping.html.en | 192 -- docs/manual/suexec.html.en | 516 ----- docs/manual/upgrading.html.en | 143 -- docs/manual/vhosts/fd-limits.html.en | 59 - docs/manual/vhosts/index.html.en | 65 - docs/manual/vhosts/name-based.html.en | 169 -- modules/arch/win32/mod_isapi.c | 1296 ------------ modules/test/mod_optional_fn_export.c | 86 - modules/test/mod_optional_fn_export.h | 3 - modules/test/mod_optional_fn_import.c | 93 - server/mpm/experimental/perchild/.cvsignore | 5 - server/mpm/experimental/perchild/Makefile.in | 5 - server/mpm/experimental/perchild/mpm.h | 88 - .../mpm/experimental/perchild/mpm_default.h | 144 -- server/mpm/experimental/perchild/perchild.c | 1834 ----------------- server/mpm/winnt/Win9xConHook.c | 738 ------- server/mpm/winnt/Win9xConHook.def | 10 - server/mpm/winnt/Win9xConHook.dsp | 103 - server/mpm/winnt/Win9xConHook.h | 91 - 38 files changed, 10637 deletions(-) delete mode 100644 docs/manual/bind.html.en delete mode 100644 docs/manual/cgi_path.html.en delete mode 100644 docs/manual/configuring.html.en delete mode 100644 docs/manual/content-negotiation.html.en delete mode 100644 docs/manual/custom-error.html.en delete mode 100644 docs/manual/developer/API.html delete mode 100644 docs/manual/developer/modules.html.en delete mode 100644 docs/manual/dso.html.en delete mode 100644 docs/manual/handler.html.en delete mode 100644 docs/manual/howto/ssi.html.en delete mode 100644 docs/manual/index.html.en delete mode 100644 docs/manual/install.html.en delete mode 100644 docs/manual/invoking.html.en delete mode 100644 docs/manual/mod/directive-dict.html.en delete mode 100644 docs/manual/mod/module-dict.html.en delete mode 100644 docs/manual/mpm.html.en delete mode 100644 docs/manual/new_features_2_0.html.en delete mode 100644 docs/manual/sections.html.en delete mode 100644 docs/manual/server-wide.html.en delete mode 100644 docs/manual/stopping.html.en delete mode 100644 docs/manual/suexec.html.en delete mode 100644 docs/manual/upgrading.html.en delete mode 100644 docs/manual/vhosts/fd-limits.html.en delete mode 100644 docs/manual/vhosts/index.html.en delete mode 100644 docs/manual/vhosts/name-based.html.en delete mode 100644 modules/arch/win32/mod_isapi.c delete mode 100644 modules/test/mod_optional_fn_export.c delete mode 100644 modules/test/mod_optional_fn_export.h delete mode 100644 modules/test/mod_optional_fn_import.c delete mode 100644 server/mpm/experimental/perchild/.cvsignore delete mode 100644 server/mpm/experimental/perchild/Makefile.in delete mode 100644 server/mpm/experimental/perchild/mpm.h delete mode 100644 server/mpm/experimental/perchild/mpm_default.h delete mode 100644 server/mpm/experimental/perchild/perchild.c delete mode 100644 server/mpm/winnt/Win9xConHook.c delete mode 100644 server/mpm/winnt/Win9xConHook.def delete mode 100644 server/mpm/winnt/Win9xConHook.dsp delete mode 100644 server/mpm/winnt/Win9xConHook.h diff --git a/docs/manual/bind.html.en b/docs/manual/bind.html.en deleted file mode 100644 index 208c6c54a21..00000000000 --- a/docs/manual/bind.html.en +++ /dev/null @@ -1,78 +0,0 @@ - - -Setting which addresses and ports Apache uses - - - - - -

Setting which addresses and ports Apache uses

- -

When Apache starts, it connects to some port and address on the -local machine and waits for incoming requests. By default, it -listens to all addresses on the machine, and to the port -as specified by the Port directive in the server configuration. -However, it can be told to listen to more the one port, or to listen -to only selected addresses, or a combination. This is often combined -with the Virtual Host feature which determines how Apache -responds to different IP addresses, hostnames and ports.

- -

The Listen directive tells the server to accept -incoming requests only on the specified port or address-and-port -combinations. If only a port number is specified in the -Listen directive, the server listens to the given port on -all interfaces, instead of the port given by the Port -directive. If an IP address is given as well as a port, the server -will listen on the given port and interface. Multiple Listen -directives may be used to specify a number of addresses and ports to -listen to. The server will respond to requests from any of the listed -addresses and ports.

- -

For example, to make the server accept connections on both port -80 and port 8000, use: -

-   Listen 80
-   Listen 8000
-
- -To make the server accept connections on two specified -interfaces and port numbers, use -
-   Listen 192.170.2.1:80
-   Listen 192.170.2.5:8000
-
- -

How this works with Virtual Hosts

- -

Listen does not implement Virtual Hosts. It only tells the -main server what addresses and ports to listen to. If no -<VirtualHost> directives are used, the server will behave the -same for all accepted requests. However, <VirtualHost> can be -used to specify a different behavior for one or more of the addresses -and ports. To implement a VirtualHost, the server must first be told -to listen to the address and port to be used. Then a -<VirtualHost> section should be created for a specified address -and port to set the behavior of this virtual host. Note that if the -<VirtualHost> is set for an address and port that the server is -not listening to, it cannot be accessed. - -

See also

- -See also the documentation on -Listen directive, -Virtual Hosts, -Port directive, -DNS Issues -and -<VirtualHost> section. - - - - - diff --git a/docs/manual/cgi_path.html.en b/docs/manual/cgi_path.html.en deleted file mode 100644 index 2b7bd963b15..00000000000 --- a/docs/manual/cgi_path.html.en +++ /dev/null @@ -1,93 +0,0 @@ - - -PATH_INFO Changes in the CGI Environment - - - - - -

PATH_INFO Changes in the CGI Environment

- -
- -

Overview

- -

As implemented in Apache 1.1.1 and earlier versions, the method -Apache used to create PATH_INFO in the CGI environment was -counterintuitive, and could result in crashes in certain cases. In -Apache 1.2 and beyond, this behavior has changed. Although this -results in some compatibility problems with certain legacy CGI -applications, the Apache 1.2 behavior is still compatible with the -CGI/1.1 specification, and CGI scripts can be easily modified (see below). - -

The Problem

- -

Apache 1.1.1 and earlier implemented the PATH_INFO and SCRIPT_NAME -environment variables by looking at the filename, not the URL. While -this resulted in the correct values in many cases, when the filesystem -path was overloaded to contain path information, it could result in -errant behavior. For example, if the following appeared in a config -file: -

-     Alias /cgi-ralph /usr/local/httpd/cgi-bin/user.cgi/ralph
-
-

In this case, user.cgi is the CGI script, the "/ralph" -is information to be passed onto the CGI. If this configuration was in -place, and a request came for "/cgi-ralph/script/", the -code would set PATH_INFO to "/ralph/script", and -SCRIPT_NAME to "/cgi-". Obviously, the latter is -incorrect. In certain cases, this could even cause the server to -crash.

- -

The Solution

- -

Apache 1.2 and later now determine SCRIPT_NAME and PATH_INFO by -looking directly at the URL, and determining how much of the URL is -client-modifiable, and setting PATH_INFO to it. To use the above -example, PATH_INFO would be set to "/script", and -SCRIPT_NAME to "/cgi-ralph". This makes sense and results -in no server behavior problems. It also permits the script to be -guaranteed that -"http://$SERVER_NAME:$SERVER_PORT$SCRIPT_NAME$PATH_INFO" -will always be an accessible URL that points to the current script, -something which was not necessarily true with previous versions of -Apache. - -

However, the "/ralph" -information from the Alias directive is lost. This is -unfortunate, but we feel that using the filesystem to pass along this -sort of information is not a recommended method, and a script making -use of it "deserves" not to work. Apache 1.2b3 and later, however, do -provide a workaround. - -

Compatibility with Previous Servers

- -

It may be necessary for a script that was designed for earlier -versions of Apache or other servers to need the information that the -old PATH_INFO variable provided. For this purpose, Apache 1.2 (1.2b3 -and later) sets an additional variable, FILEPATH_INFO. This -environment variable contains the value that PATH_INFO would have had -with Apache 1.1.1.

- -

A script that wishes to work with both Apache 1.2 and earlier -versions can simply test for the existence of FILEPATH_INFO, and use -it if available. Otherwise, it can use PATH_INFO. For example, in -Perl, one might use: -

-    $path_info = $ENV{'FILEPATH_INFO'} || $ENV{'PATH_INFO'};
-
- -

By doing this, a script can work with all servers supporting the -CGI/1.1 specification, including all versions of Apache.

- - - - - diff --git a/docs/manual/configuring.html.en b/docs/manual/configuring.html.en deleted file mode 100644 index f626637083a..00000000000 --- a/docs/manual/configuring.html.en +++ /dev/null @@ -1,248 +0,0 @@ - - - -Configuration Files - - - - - -

Configuration Files

- - - -
- -

Main Configuration Files

- - - -
-Related Modules

-mod_mime
-
-Related Directives

-<IfDefine>
-Include
-TypesConfig
-
- -

Apache is configured by placing directives in plain text configuration files. The main -configuration file is usually called httpd.conf. The -location of this file is set at compile-time, but may be overridden -with the -f command line flag. In addition, other -configuration files may be added using the Include directive. Any -directive may be placed in any of these configuration files. Changes -to the main configuration files are only recognized by Apache when it -is started or restarted.

- -

New with Apache 1.3.13 is a feature where if any configuration -file is actually a directory, Apache will enter that directory -and parse any files (and subdirectories) found there as configuration -files. One possible use for this would be to add VirtualHosts -by creating small configuration files for each host, and placing -them in such a configuration directory. Thus, you can add or -remove VirtualHosts without editing any files at all, simply -adding or deleting them. This makes automating such processes -much easier. - -

-The server also reads a file containing mime document types; the -filename is set by the TypesConfig directive, and is mime.types by default. - -


- -

Syntax of the Configuration Files

- -

Apache configuration files contain one directive per line. The -back-slash "\" may be used as the last character on a line to indicate -that the directive continues onto the next line. There must be no -other characters or white space between the back-slash and the end of -the line. - -

Directives in the configuration files are case-insensitive, but -arguments to directives are often case sensitive. Lines which begin -with the hash character "#" are considered comments, and are ignored. -Comments may not be included on a line after a -configuration directive. Blank lines and white space occurring before -a directive are ignored, so you may indent directives for clarity. - -

You can check your configuration files for syntax errors without -starting the server by using apachectl configtest -or the -t command line option. - -


- -

Modules

- - -
-Related Modules

-mod_so
-
-Related Directives

-AddModule
-ClearModuleList
-<IfModule>
-LoadModule
-
- -

Apache is a modular server. This implies that only the most basic -functionality is included in the core server. Extended features are -available through modules which -can be loaded into Apache. By default, a base set of modules is -included in the server at compile-time. If the server is compiled to -use dynamically loaded modules, then modules -can be compiled separately and added at any time using the LoadModule directive. -Otherwise, Apache must be recompiled to add or remove modules. -Configuration directives may be included conditional on a presence of -a particular module by enclosing them in an <IfModule> block. - -

To see which modules are currently compiled into the server, -you can use the -l command line option. - -


- -

Scope of Directives

- -
-Related Directives

-<Directory>
-<DirectoryMatch>
-<Files>
-<FilesMatch>
-<Location>
-<LocationMatch>
-<VirtualHost>
-
- -

Directives placed in the main configuration files apply to the entire -server. If you wish to change the configuration for only a part of -the server, you can scope your directives by placing them in -<Directory>, -<DirectoryMatch>, -<Files>, -<FilesMatch>, -<Location>, - and -<LocationMatch> - -sections. These sections limit the application of the directives -which they enclose to particular filesystem locations or URLs. They -can also be nested, allowing for very fine grained configuration. - -

Apache has the capability to serve many different websites -simultaneously. This is called Virtual Hosting. -Directives can also be scoped by placing them inside -<VirtualHost> -sections, so that they will only apply to requests for a particular -website. - -

Although most directives can be placed in any of these sections, -some directives do not make sense in some contexts. For example, -directives controlling process creation can only be placed in the main -server context. To find which directives can be placed in which -sections, check the Context of the directive. -For further information, we provide details on How Directory, Location and Files sections -work. - -


- -

.htaccess Files

- -
-Related Directives

-AccessFileName
-AllowOverride
-
- -

Apache allows for decentralized management of configuration via -special files placed inside the web tree. The special files are -usually called .htaccess, but any name can be specified -in the AccessFileName directive. Directives placed in -.htaccess files apply to the directory where you place -the file, and all sub-directories. The .htaccess files -follow the same syntax as the main configuration files. Since -.htaccess files are read on every request, changes made -in these files take immediate effect. - -

To find which directives can be placed in .htaccess -files, check the Context -of the directive. The server administrator further controls what -directives may be placed in .htaccess files by -configuring the AllowOverride -directive in the main configuration files. - -


- -

Log files

- -

security warning

-Anyone who can write to the directory where Apache is writing a -log file can almost certainly gain access to the uid that the server is -started as, which is normally root. Do NOT give people write -access to the directory the logs are stored in without being aware of -the consequences; see the security tips -document for details. - -

pid file

- -

On startup, Apache saves the process id of the parent httpd process to -the file logs/httpd.pid. This filename can be changed -with the PidFile directive. The -process-id is for use by the administrator in restarting and -terminating the daemon: on Unix, a HUP or USR1 signal causes the -daemon to re-read its configuration files and a TERM signal causes it -to die gracefully; on Windows, use the -k command line option instead. -For more information see the Stopping and -Restarting page. - -

-If the process dies (or is killed) abnormally, then it will be necessary to -kill the children httpd processes. - -

Error log

- -

The server will log error messages to a log file, by default -logs/error_log on Unix or logs/error.log on -Windows and OS/2. The filename can be set using the ErrorLog directive; different error -logs can be set for different virtual hosts. - -

Transfer log

- -

The server will typically log each request to a transfer file, by -default logs/access_log on Unix or -logs/access.log on Windows and OS/2. The filename can be -set using a CustomLog -directive; different transfer logs can be set for different virtual hosts. - - - - - diff --git a/docs/manual/content-negotiation.html.en b/docs/manual/content-negotiation.html.en deleted file mode 100644 index d1b4ab20abc..00000000000 --- a/docs/manual/content-negotiation.html.en +++ /dev/null @@ -1,590 +0,0 @@ - - - -Apache Content Negotiation - - - - - -

Content Negotiation

- -

-Apache's support for content negotiation has been updated to meet the -HTTP/1.1 specification. It can choose the best representation of a -resource based on the browser-supplied preferences for media type, -languages, character set and encoding. It is also implements a -couple of features to give more intelligent handling of requests from -browsers which send incomplete negotiation information.

- -Content negotiation is provided by the -mod_negotiation module, -which is compiled in by default. - -


- -

About Content Negotiation

- -

-A resource may be available in several different representations. For -example, it might be available in different languages or different -media types, or a combination. One way of selecting the most -appropriate choice is to give the user an index page, and let them -select. However it is often possible for the server to choose -automatically. This works because browsers can send as part of each -request information about what representations they prefer. For -example, a browser could indicate that it would like to see -information in French, if possible, else English will do. Browsers -indicate their preferences by headers in the request. To request only -French representations, the browser would send - -

-  Accept-Language: fr
-
- -

-Note that this preference will only be applied when there is a choice -of representations and they vary by language. -

- -As an example of a more complex request, this browser has been -configured to accept French and English, but prefer French, and to -accept various media types, preferring HTML over plain text or other -text types, and preferring GIF or JPEG over other media types, but also -allowing any other media type as a last resort: - -

-  Accept-Language: fr; q=1.0, en; q=0.5
-  Accept: text/html; q=1.0, text/*; q=0.8, image/gif; q=0.6,
-        image/jpeg; q=0.6, image/*; q=0.5, */*; q=0.1
-
- -Apache 1.2 supports 'server driven' content negotiation, as defined in -the HTTP/1.1 specification. It fully supports the Accept, -Accept-Language, Accept-Charset and Accept-Encoding request headers. -Apache 1.3.4 also supports 'transparent' content negotiation, which is -an experimental negotiation protocol defined in RFC 2295 and RFC 2296. -It does not offer support for 'feature negotiation' as defined in -these RFCs. -

- -A resource is a conceptual entity identified by a URI -(RFC 2396). An HTTP server like Apache provides access to -representations of the resource(s) within its namespace, -with each representation in the form of a sequence of bytes with a -defined media type, character set, encoding, etc. Each resource may be -associated with zero, one, or more than one representation -at any given time. If multiple representations are available, -the resource is referred to as negotiable and each of its -representations is termed a variant. The ways in which the -variants for a negotiable resource vary are called the -dimensions of negotiation. - -

Negotiation in Apache

- -

-In order to negotiate a resource, the server needs to be given -information about each of the variants. This is done in one of two -ways: - -

- -

Using a type-map file

- -

-A type map is a document which is associated with the handler -named type-map (or, for backwards-compatibility with -older Apache configurations, the mime type -application/x-type-map). Note that to use this feature, -you must have a handler set in the configuration that defines a -file suffix as type-map; this is best done with a - -

-  AddHandler type-map .var
-
- -in the server configuration file. See the comments in the sample config -file for more details.

- -Type map files have an entry for each available variant; these entries -consist of contiguous HTTP-format header lines. Entries for -different variants are separated by blank lines. Blank lines are -illegal within an entry. It is conventional to begin a map file with -an entry for the combined entity as a whole (although this -is not required, and if present will be ignored). An example -map file is: - -

-  URI: foo
-
-  URI: foo.en.html
-  Content-type: text/html
-  Content-language: en
-
-  URI: foo.fr.de.html
-  Content-type: text/html;charset=iso-8859-2
-  Content-language: fr, de
-
- -If the variants have different source qualities, that may be indicated -by the "qs" parameter to the media type, as in this picture (available -as jpeg, gif, or ASCII-art): - -
-  URI: foo
-
-  URI: foo.jpeg
-  Content-type: image/jpeg; qs=0.8
-
-  URI: foo.gif
-  Content-type: image/gif; qs=0.5
-
-  URI: foo.txt
-  Content-type: text/plain; qs=0.01
-
-

- -qs values can vary in the range 0.000 to 1.000. Note that any variant with -a qs value of 0.000 will never be chosen. Variants with no 'qs' -parameter value are given a qs factor of 1.0. The qs parameter indicates -the relative 'quality' of this variant compared to the other available -variants, independent of the client's capabilities. For example, a jpeg -file is usually of higher source quality than an ascii file if it is -attempting to represent a photograph. However, if the resource being -represented is an original ascii art, then an ascii representation would -have a higher source quality than a jpeg representation. A qs value -is therefore specific to a given variant depending on the nature of -the resource it represents. - -

-The full list of headers recognized is: - -

-
URI: -
uri of the file containing the variant (of the given media - type, encoded with the given content encoding). These are - interpreted as URLs relative to the map file; they must be on - the same server (!), and they must refer to files to which the - client would be granted access if they were to be requested - directly. -
Content-Type: -
media type --- charset, level and "qs" parameters may be given. These - are often referred to as MIME types; typical media types are - image/gif, text/plain, or - text/html; level=3. -
Content-Language: -
The languages of the variant, specified as an Internet standard - language tag from RFC 1766 (e.g., en for English, - kr for Korean, etc.). -
Content-Encoding: -
If the file is compressed, or otherwise encoded, rather than - containing the actual raw data, this says how that was done. - Apache only recognizes encodings that are defined by an - AddEncoding directive. - This normally includes the encodings x-compress - for compress'd files, and x-gzip for gzip'd files. - The x- prefix is ignored for encoding comparisons. -
Content-Length: -
The size of the file. Specifying content - lengths in the type-map allows the server to compare file sizes - without checking the actual files. -
Description: -
A human-readable textual description of the variant. If Apache cannot - find any appropriate variant to return, it will return an error - response which lists all available variants instead. Such a variant - list will include the human-readable variant descriptions. -
- -

Multiviews

- -

-MultiViews is a per-directory option, meaning it can be set with -an Options directive within a <Directory>, -<Location> or <Files> -section in access.conf, or (if AllowOverride -is properly set) in .htaccess files. Note that -Options All does not set MultiViews; you -have to ask for it by name. - -

-The effect of MultiViews is as follows: if the server -receives a request for /some/dir/foo, if -/some/dir has MultiViews enabled, and -/some/dir/foo does not exist, then the server reads the -directory looking for files named foo.*, and effectively fakes up a -type map which names all those files, assigning them the same media -types and content-encodings it would have if the client had asked for -one of them by name. It then chooses the best match to the client's -requirements. - -

-MultiViews may also apply to searches for the file named by the -DirectoryIndex directive, if the server is trying to -index a directory. If the configuration files specify - -

-  DirectoryIndex index
-
- -then the server will arbitrate between index.html -and index.html3 if both are present. If neither are -present, and index.cgi is there, the server will run it. - -

-If one of the files found when reading the directive is a CGI script, -it's not obvious what should happen. The code gives that case -special treatment --- if the request was a POST, or a GET with -QUERY_ARGS or PATH_INFO, the script is given an extremely high quality -rating, and generally invoked; otherwise it is given an extremely low -quality rating, which generally causes one of the other views (if any) -to be retrieved. - -

The Negotiation Methods

- -After Apache has obtained a list of the variants for a given resource, -either from a type-map file or from the filenames in the directory, it -invokes one of two methods to decide on the 'best' variant to -return, if any. It is not necessary to know any of the details of how -negotiation actually takes place in order to use Apache's content -negotiation features. However the rest of this document explains the -methods used for those interested. -

- -There are two negotiation methods: - -

    - -
  1. Server driven negotiation with the Apache -algorithm is used in the normal case. The Apache algorithm is -explained in more detail below. When this algorithm is used, Apache -can sometimes 'fiddle' the quality factor of a particular dimension to -achieve a better result. The ways Apache can fiddle quality factors is -explained in more detail below. - -
  2. Transparent content negotiation is used when the -browser specifically requests this through the mechanism defined in RFC -2295. This negotiation method gives the browser full control over -deciding on the 'best' variant, the result is therefore dependent on -the specific algorithms used by the browser. As part of the -transparent negotiation process, the browser can ask Apache to run the -'remote variant selection algorithm' defined in RFC 2296. - -
- - -

Dimensions of Negotiation

- - - - - - - -
Dimension -Notes -
Media Type -Browser indicates preferences with the Accept header field. Each item -can have an associated quality factor. Variant description can also -have a quality factor (the "qs" parameter). -
Language -Browser indicates preferences with the Accept-Language header field. -Each item can have a quality factor. Variants can be associated with none, one -or more than one language. -
Encoding -Browser indicates preference with the Accept-Encoding header field. -Each item can have a quality factor. -
Charset -Browser indicates preference with the Accept-Charset header field. -Each item can have a quality factor. -Variants can indicate a charset as a parameter of the media type. -
- -

Apache Negotiation Algorithm

- -

-Apache can use the following algorithm to select the 'best' variant -(if any) to return to the browser. This algorithm is not -further configurable. It operates as follows: - -

    -
  1. First, for each dimension of the negotiation, check the appropriate -Accept* header field and assign a quality to each -variant. If the Accept* header for any dimension implies that this -variant is not acceptable, eliminate it. If no variants remain, go -to step 4. - -
  2. Select the 'best' variant by a process of elimination. Each of the -following tests is applied in order. Any variants not selected at each -test are eliminated. After each test, if only one variant remains, -select it as the best match and proceed to step 3. If more than one -variant remains, move on to the next test. - -
      -
    1. Multiply the quality factor from the Accept header with the - quality-of-source factor for this variant's media type, and select - the variants with the highest value. - -
    2. Select the variants with the highest language quality factor. - -
    3. Select the variants with the best language match, using either the - order of languages in the Accept-Language header (if present), or else - the order of languages in the LanguagePriority - directive (if present). - -
    4. Select the variants with the highest 'level' media parameter - (used to give the version of text/html media types). - -
    5. Select variants with the best charset media parameters, - as given on the Accept-Charset header line. Charset ISO-8859-1 - is acceptable unless explicitly excluded. Variants with a - text/* media type but not explicitly associated - with a particular charset are assumed to be in ISO-8859-1. - -
    6. Select those variants which have associated - charset media parameters that are not ISO-8859-1. - If there are no such variants, select all variants instead. - -
    7. Select the variants with the best encoding. If there are - variants with an encoding that is acceptable to the user-agent, - select only these variants. Otherwise if there is a mix of encoded - and non-encoded variants, select only the unencoded variants. - If either all variants are encoded or all variants are not encoded, - select all variants. - -
    8. Select the variants with the smallest content length. - -
    9. Select the first variant of those remaining. This will be either the - first listed in the type-map file, or when variants are read from - the directory, the one whose file name comes first when sorted using - ASCII code order. - -
    - -
  3. The algorithm has now selected one 'best' variant, so return - it as the response. The HTTP response header Vary is set to indicate the - dimensions of negotiation (browsers and caches can use this - information when caching the resource). End. - -
  4. To get here means no variant was selected (because none are acceptable - to the browser). Return a 406 status (meaning "No acceptable representation") - with a response body consisting of an HTML document listing the - available variants. Also set the HTTP Vary header to indicate the - dimensions of variance. - -
- -

Fiddling with Quality Values

- -

-Apache sometimes changes the quality values from what would be -expected by a strict interpretation of the Apache negotiation -algorithm above. This is to get a better result from the algorithm for -browsers which do not send full or accurate information. Some of the -most popular browsers send Accept header information which would -otherwise result in the selection of the wrong variant in many -cases. If a browser sends full and correct information these fiddles -will not be applied. -

- -

Media Types and Wildcards

- -

-The Accept: request header indicates preferences for media types. It -can also include 'wildcard' media types, such as "image/*" or "*/*" -where the * matches any string. So a request including: -

-  Accept: image/*, */*
-
- -would indicate that any type starting "image/" is acceptable, -as is any other type (so the first "image/*" is redundant). Some -browsers routinely send wildcards in addition to explicit types they -can handle. For example: -
-  Accept: text/html, text/plain, image/gif, image/jpeg, */*
-
- -The intention of this is to indicate that the explicitly -listed types are preferred, but if a different representation is -available, that is ok too. However under the basic algorithm, as given -above, the */* wildcard has exactly equal preference to all the other -types, so they are not being preferred. The browser should really have -sent a request with a lower quality (preference) value for *.*, such -as: -
-  Accept: text/html, text/plain, image/gif, image/jpeg, */*; q=0.01
-
- -The explicit types have no quality factor, so they default to a -preference of 1.0 (the highest). The wildcard */* is given -a low preference of 0.01, so other types will only be returned if -no variant matches an explicitly listed type. -

- -If the Accept: header contains no q factors at all, Apache sets -the q value of "*/*", if present, to 0.01 to emulate the desired -behavior. It also sets the q value of wildcards of the format -"type/*" to 0.02 (so these are preferred over matches against -"*/*". If any media type on the Accept: header contains a q factor, -these special values are not applied, so requests from browsers -which send the correct information to start with work as expected. - -

Variants with no Language

- -

-If some of the variants for a particular resource have a language -attribute, and some do not, those variants with no language -are given a very low language quality factor of 0.001.

- -The reason for setting this language quality factor for -variant with no language to a very low value is to allow -for a default variant which can be supplied if none of the -other variants match the browser's language preferences. - -For example, consider the situation with three variants: - -

- -

-The meaning of a variant with no language is that it is -always acceptable to the browser. If the request Accept-Language -header includes either en or fr (or both) one of foo.en.html -or foo.fr.html will be returned. If the browser does not list -either en or fr as acceptable, foo.html will be returned instead. - -

Extensions to Transparent Content Negotiation

- -Apache extends the transparent content negotiation protocol (RFC 2295) -as follows. A new {encoding ..} element is used in -variant lists to label variants which are available with a specific -content-encoding only. The implementation of the -RVSA/1.0 algorithm (RFC 2296) is extended to recognize encoded -variants in the list, and to use them as candidate variants whenever -their encodings are acceptable according to the Accept-Encoding -request header. The RVSA/1.0 implementation does not round computed -quality factors to 5 decimal places before choosing the best variant. - -

Note on hyperlinks and naming conventions

- -

-If you are using language negotiation you can choose between -different naming conventions, because files can have more than one -extension, and the order of the extensions is normally irrelevant -(see mod_mime documentation for details). -

-A typical file has a MIME-type extension (e.g., html), -maybe an encoding extension (e.g., gz), and of course a -language extension (e.g., en) when we have different -language variants of this file. - -

-Examples: -

- -

-Here some more examples of filenames together with valid and invalid -hyperlinks: -

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
FilenameValid hyperlinkInvalid hyperlink
foo.html.enfoo
- foo.html
-
foo.en.htmlfoofoo.html
foo.html.en.gzfoo
- foo.html
foo.gz
- foo.html.gz
foo.en.html.gzfoofoo.html
- foo.html.gz
- foo.gz
foo.gz.html.enfoo
- foo.gz
- foo.gz.html
foo.html
foo.html.gz.enfoo
- foo.html
- foo.html.gz
foo.gz
- -

-Looking at the table above you will notice that it is always possible to -use the name without any extensions in an hyperlink (e.g., foo). -The advantage is that you can hide the actual type of a -document rsp. file and can change it later, e.g., from html -to shtml or cgi without changing any -hyperlink references. - -

-If you want to continue to use a MIME-type in your hyperlinks (e.g. -foo.html) the language extension (including an encoding extension -if there is one) must be on the right hand side of the MIME-type extension -(e.g., foo.html.en). - - -

Note on Caching

- -

-When a cache stores a representation, it associates it with the request URL. -The next time that URL is requested, the cache can use the stored -representation. But, if the resource is negotiable at the server, -this might result in only the first requested variant being cached and -subsequent cache hits might return the wrong response. To prevent this, -Apache normally marks all responses that are returned after content negotiation -as non-cacheable by HTTP/1.0 clients. Apache also supports the HTTP/1.1 -protocol features to allow caching of negotiated responses.

- -For requests which come from a HTTP/1.0 compliant client (either a -browser or a cache), the directive CacheNegotiatedDocs can be -used to allow caching of responses which were subject to negotiation. -This directive can be given in the server config or virtual host, and -takes no arguments. It has no effect on requests from HTTP/1.1 clients. - - - - diff --git a/docs/manual/custom-error.html.en b/docs/manual/custom-error.html.en deleted file mode 100644 index 09604ea972b..00000000000 --- a/docs/manual/custom-error.html.en +++ /dev/null @@ -1,177 +0,0 @@ - - - -Custom error responses - - - - - -

Custom error responses

- -
- -
Purpose - -
Additional functionality. Allows webmasters to configure the response of - Apache to some error or problem. - -

Customizable responses can be defined to be activated in the - event of a server detected error or problem. - -

e.g. if a script crashes and produces a "500 Server Error" - response, then this response can be replaced with either some - friendlier text or by a redirection to another URL (local or - external). -

- -

Old behavior - -
NCSA httpd 1.3 would return some boring old error/problem message - which would often be meaningless to the user, and would provide no - means of logging the symptoms which caused it.
- -

- -

New behavior - -
The server can be asked to; -
    -
  1. Display some other text, instead of the NCSA hard coded messages, or -
  2. redirect to a local URL, or -
  3. redirect to an external URL. -
- -

Redirecting to another URL can be useful, but only if some information - can be passed which can then be used to explain and/or log the - error/problem - more clearly. - -

To achieve this, Apache will define new CGI-like environment - variables, e.g. - -

-REDIRECT_HTTP_ACCEPT=*/*, image/gif, image/x-xbitmap, image/jpeg
-REDIRECT_HTTP_USER_AGENT=Mozilla/1.1b2 (X11; I; HP-UX A.09.05 9000/712)
-REDIRECT_PATH=.:/bin:/usr/local/bin:/etc
-REDIRECT_QUERY_STRING=
-REDIRECT_REMOTE_ADDR=121.345.78.123
-REDIRECT_REMOTE_HOST=ooh.ahhh.com
-REDIRECT_SERVER_NAME=crash.bang.edu
-REDIRECT_SERVER_PORT=80
-REDIRECT_SERVER_SOFTWARE=Apache/0.8.15
-REDIRECT_URL=/cgi-bin/buggy.pl
-
- -

note the REDIRECT_ prefix. - -

At least REDIRECT_URL and REDIRECT_QUERY_STRING - will - be passed to the new URL (assuming it's a cgi-script or a cgi-include). - The - other variables will exist only if they existed prior to the - error/problem. - None of these will be set if your ErrorDocument is an - external redirect (i.e., anything starting with a - scheme name - like http:, even if it refers to the same host as the - server).

- -

Configuration - -
Use of "ErrorDocument" is enabled for .htaccess files when the - "FileInfo" override is - allowed. - -

Here are some examples... - -

-ErrorDocument 500 /cgi-bin/crash-recover
-ErrorDocument 500 "Sorry, our script crashed. Oh dear
-ErrorDocument 500 http://xxx/
-ErrorDocument 404 /Lame_excuses/not_found.html
-ErrorDocument 401 /Subscription/how_to_subscribe.html -
- -

The syntax is, - -

ErrorDocument -<3-digit-code> action - -

where the action can be, - -

    -
  1. Text to be displayed. Prefix the text with a quote ("). Whatever - follows the quote is displayed. Note: the (") prefix isn't - displayed. - -
  2. An external URL to redirect to. - -
  3. A local URL to redirect to. - -
-
- -


- -

Custom error responses and redirects

- -
- -
Purpose - -
Apache's behavior to redirected URLs has been modified so that additional - environment variables are available to a script/server-include.

- -

Old behavior - -
Standard CGI vars were made available to a script which has been - redirected to. No indication of where the redirection came from was - provided. - -

- -

New behavior -
- -A new batch of environment variables will be initialized for use by a -script which has been redirected to. Each new variable will have the -prefix REDIRECT_. REDIRECT_ environment -variables are created from the CGI environment variables which existed -prior to the redirect, they are renamed with a REDIRECT_ -prefix, i.e., HTTP_USER_AGENT becomes -REDIRECT_HTTP_USER_AGENT. In addition to these new -variables, Apache will define REDIRECT_URL and -REDIRECT_STATUS to help the script trace its origin. -Both the original URL and the URL being redirected to can be logged in -the access log. - -
-

-If the ErrorDocument specifies a local redirect to a CGI script, the script -should include a "Status:" header field in its output -in order to ensure the propagation all the way back to the client -of the error condition that caused it to be invoked. For instance, a Perl -ErrorDocument script might include the following: -

-
-      :
-    print  "Content-type: text/html\n";
-    printf "Status: %s Condition Intercepted\n", $ENV{"REDIRECT_STATUS"};
-      :
-
-

-If the script is dedicated to handling a particular error condition, such as -404 Not Found, it can use the specific code and -error text instead. -

- - - - diff --git a/docs/manual/developer/API.html b/docs/manual/developer/API.html deleted file mode 100644 index 496be760c97..00000000000 --- a/docs/manual/developer/API.html +++ /dev/null @@ -1,1161 +0,0 @@ - - -Apache API notes - - - - - -
Warning: -This document has not been updated to take into account changes -made in the 2.0 version of the Apache HTTP Server. Some of the -information may still be relevant, but please use it -with care. -
- -

Apache API notes

- -These are some notes on the Apache API and the data structures you -have to deal with, etc. They are not yet nearly complete, but -hopefully, they will help you get your bearings. Keep in mind that -the API is still subject to change as we gain experience with it. -(See the TODO file for what might be coming). However, -it will be easy to adapt modules to any changes that are made. -(We have more modules to adapt than you do). -

- -A few notes on general pedagogical style here. In the interest of -conciseness, all structure declarations here are incomplete --- the -real ones have more slots that I'm not telling you about. For the -most part, these are reserved to one component of the server core or -another, and should be altered by modules with caution. However, in -some cases, they really are things I just haven't gotten around to -yet. Welcome to the bleeding edge.

- -Finally, here's an outline, to give you some bare idea of what's -coming up, and in what order: - -

- -

Basic concepts.

- -We begin with an overview of the basic concepts behind the -API, and how they are manifested in the code. - -

Handlers, Modules, and Requests

- -Apache breaks down request handling into a series of steps, more or -less the same way the Netscape server API does (although this API has -a few more stages than NetSite does, as hooks for stuff I thought -might be useful in the future). These are: - - - -These phases are handled by looking at each of a succession of -modules, looking to see if each of them has a handler for the -phase, and attempting invoking it if so. The handler can typically do -one of three things: - - - -Most phases are terminated by the first module that handles them; -however, for logging, `fixups', and non-access authentication -checking, all handlers always run (barring an error). Also, the -response phase is unique in that modules may declare multiple handlers -for it, via a dispatch table keyed on the MIME type of the requested -object. Modules may declare a response-phase handler which can handle -any request, by giving it the key */* (i.e., a -wildcard MIME type specification). However, wildcard handlers are -only invoked if the server has already tried and failed to find a more -specific response handler for the MIME type of the requested object -(either none existed, or they all declined).

- -The handlers themselves are functions of one argument (a -request_rec structure. vide infra), which returns an -integer, as above.

- -

A brief tour of a module

- -At this point, we need to explain the structure of a module. Our -candidate will be one of the messier ones, the CGI module --- this -handles both CGI scripts and the ScriptAlias config file -command. It's actually a great deal more complicated than most -modules, but if we're going to have only one example, it might as well -be the one with its fingers in every place.

- -Let's begin with handlers. In order to handle the CGI scripts, the -module declares a response handler for them. Because of -ScriptAlias, it also has handlers for the name -translation phase (to recognize ScriptAliased URIs), the -type-checking phase (any ScriptAliased request is typed -as a CGI script).

- -The module needs to maintain some per (virtual) -server information, namely, the ScriptAliases in effect; -the module structure therefore contains pointers to a functions which -builds these structures, and to another which combines two of them (in -case the main server and a virtual server both have -ScriptAliases declared).

- -Finally, this module contains code to handle the -ScriptAlias command itself. This particular module only -declares one command, but there could be more, so modules have -command tables which declare their commands, and describe -where they are permitted, and how they are to be invoked.

- -A final note on the declared types of the arguments of some of these -commands: a pool is a pointer to a resource pool -structure; these are used by the server to keep track of the memory -which has been allocated, files opened, etc., either to service a -particular request, or to handle the process of configuring itself. -That way, when the request is over (or, for the configuration pool, -when the server is restarting), the memory can be freed, and the files -closed, en masse, without anyone having to write explicit code to -track them all down and dispose of them. Also, a -cmd_parms structure contains various information about -the config file being read, and other status information, which is -sometimes of use to the function which processes a config-file command -(such as ScriptAlias). - -With no further ado, the module itself: - -

-/* Declarations of handlers. */
-
-int translate_scriptalias (request_rec *);
-int type_scriptalias (request_rec *);
-int cgi_handler (request_rec *);
-
-/* Subsidiary dispatch table for response-phase handlers, by MIME type */
-
-handler_rec cgi_handlers[] = {
-{ "application/x-httpd-cgi", cgi_handler },
-{ NULL }
-};
-
-/* Declarations of routines to manipulate the module's configuration
- * info.  Note that these are returned, and passed in, as void *'s;
- * the server core keeps track of them, but it doesn't, and can't,
- * know their internal structure.
- */
-
-void *make_cgi_server_config (pool *);
-void *merge_cgi_server_config (pool *, void *, void *);
-
-/* Declarations of routines to handle config-file commands */
-
-extern char *script_alias(cmd_parms *, void *per_dir_config, char *fake,
-                          char *real);
-
-command_rec cgi_cmds[] = {
-{ "ScriptAlias", script_alias, NULL, RSRC_CONF, TAKE2,
-    "a fakename and a realname"},
-{ NULL }
-};
-
-module cgi_module = {
-   STANDARD_MODULE_STUFF,
-   NULL,                     /* initializer */
-   NULL,                     /* dir config creator */
-   NULL,                     /* dir merger --- default is to override */
-   make_cgi_server_config,   /* server config */
-   merge_cgi_server_config,  /* merge server config */
-   cgi_cmds,                 /* command table */
-   cgi_handlers,             /* handlers */
-   translate_scriptalias,    /* filename translation */
-   NULL,                     /* check_user_id */
-   NULL,                     /* check auth */
-   NULL,                     /* check access */
-   type_scriptalias,         /* type_checker */
-   NULL,                     /* fixups */
-   NULL,                     /* logger */
-   NULL                      /* header parser */
-};
-
- -

How handlers work

- -The sole argument to handlers is a request_rec structure. -This structure describes a particular request which has been made to -the server, on behalf of a client. In most cases, each connection to -the client generates only one request_rec structure.

- -

A brief tour of the request_rec

- -The request_rec contains pointers to a resource pool -which will be cleared when the server is finished handling the -request; to structures containing per-server and per-connection -information, and most importantly, information on the request itself.

- -The most important such information is a small set of character -strings describing attributes of the object being requested, including -its URI, filename, content-type and content-encoding (these being filled -in by the translation and type-check handlers which handle the -request, respectively).

- -Other commonly used data items are tables giving the MIME headers on -the client's original request, MIME headers to be sent back with the -response (which modules can add to at will), and environment variables -for any subprocesses which are spawned off in the course of servicing -the request. These tables are manipulated using the -ap_table_get and ap_table_set routines.

-

- Note that the Content-type header value cannot be - set by module content-handlers using the ap_table_*() - routines. Rather, it is set by pointing the content_type - field in the request_rec structure to an appropriate - string. E.g., -
-  r->content_type = "text/html";
- 
-
-Finally, there are pointers to two data structures which, in turn, -point to per-module configuration structures. Specifically, these -hold pointers to the data structures which the module has built to -describe the way it has been configured to operate in a given -directory (via .htaccess files or -<Directory> sections), for private data it has -built in the course of servicing the request (so modules' handlers for -one phase can pass `notes' to their handlers for other phases). There -is another such configuration vector in the server_rec -data structure pointed to by the request_rec, which -contains per (virtual) server configuration data.

- -Here is an abridged declaration, giving the fields most commonly used:

- -

-struct request_rec {
-
-  pool *pool;
-  conn_rec *connection;
-  server_rec *server;
-
-  /* What object is being requested */
-
-  char *uri;
-  char *filename;
-  char *path_info;
-  char *args;           /* QUERY_ARGS, if any */
-  struct stat finfo;    /* Set by server core;
-                         * st_mode set to zero if no such file */
-
-  char *content_type;
-  char *content_encoding;
-
-  /* MIME header environments, in and out.  Also, an array containing
-   * environment variables to be passed to subprocesses, so people can
-   * write modules to add to that environment.
-   *
-   * The difference between headers_out and err_headers_out is that
-   * the latter are printed even on error, and persist across internal
-   * redirects (so the headers printed for ErrorDocument handlers will
-   * have them).
-   */
-
-  table *headers_in;
-  table *headers_out;
-  table *err_headers_out;
-  table *subprocess_env;
-
-  /* Info about the request itself... */
-
-  int header_only;     /* HEAD request, as opposed to GET */
-  char *protocol;      /* Protocol, as given to us, or HTTP/0.9 */
-  char *method;        /* GET, HEAD, POST, etc. */
-  int method_number;   /* M_GET, M_POST, etc. */
-
-  /* Info for logging */
-
-  char *the_request;
-  int bytes_sent;
-
-  /* A flag which modules can set, to indicate that the data being
-   * returned is volatile, and clients should be told not to cache it.
-   */
-
-  int no_cache;
-
-  /* Various other config info which may change with .htaccess files
-   * These are config vectors, with one void* pointer for each module
-   * (the thing pointed to being the module's business).
-   */
-
-  void *per_dir_config;   /* Options set in config files, etc. */
-  void *request_config;   /* Notes on *this* request */
-
-};
-
-
- -

Where request_rec structures come from

- -Most request_rec structures are built by reading an HTTP -request from a client, and filling in the fields. However, there are -a few exceptions: - - - -

Handling requests, declining, and returning error - codes

- -As discussed above, each handler, when invoked to handle a particular -request_rec, has to return an int to -indicate what happened. That can either be - - - -Note that if the error code returned is REDIRECT, then -the module should put a Location in the request's -headers_out, to indicate where the client should be -redirected to.

- -

Special considerations for response - handlers

- -Handlers for most phases do their work by simply setting a few fields -in the request_rec structure (or, in the case of access -checkers, simply by returning the correct error code). However, -response handlers have to actually send a request back to the client.

- -They should begin by sending an HTTP response header, using the -function ap_send_http_header. (You don't have to do -anything special to skip sending the header for HTTP/0.9 requests; the -function figures out on its own that it shouldn't do anything). If -the request is marked header_only, that's all they should -do; they should return after that, without attempting any further -output.

- -Otherwise, they should produce a request body which responds to the -client as appropriate. The primitives for this are ap_rputc -and ap_rprintf, for internally generated output, and -ap_send_fd, to copy the contents of some FILE * -straight to the client.

- -At this point, you should more or less understand the following piece -of code, which is the handler which handles GET requests -which have no more specific handler; it also shows how conditional -GETs can be handled, if it's desirable to do so in a -particular response handler --- ap_set_last_modified checks -against the If-modified-since value supplied by the -client, if any, and returns an appropriate code (which will, if -nonzero, be USE_LOCAL_COPY). No similar considerations apply for -ap_set_content_length, but it returns an error code for -symmetry.

- -

-int default_handler (request_rec *r)
-{
-    int errstatus;
-    FILE *f;
-
-    if (r->method_number != M_GET) return DECLINED;
-    if (r->finfo.st_mode == 0) return NOT_FOUND;
-
-    if ((errstatus = ap_set_content_length (r, r->finfo.st_size))
-	|| (errstatus = ap_set_last_modified (r, r->finfo.st_mtime)))
-        return errstatus;
-
-    f = fopen (r->filename, "r");
-
-    if (f == NULL) {
-        log_reason("file permissions deny server access",
-                   r->filename, r);
-        return FORBIDDEN;
-    }
-
-    register_timeout ("send", r);
-    ap_send_http_header (r);
-
-    if (!r->header_only) send_fd (f, r);
-    ap_pfclose (r->pool, f);
-    return OK;
-}
-
- -Finally, if all of this is too much of a challenge, there are a few -ways out of it. First off, as shown above, a response handler which -has not yet produced any output can simply return an error code, in -which case the server will automatically produce an error response. -Secondly, it can punt to some other handler by invoking -ap_internal_redirect, which is how the internal redirection -machinery discussed above is invoked. A response handler which has -internally redirected should always return OK.

- -(Invoking ap_internal_redirect from handlers which are -not response handlers will lead to serious confusion). - -

Special considerations for authentication - handlers

- -Stuff that should be discussed here in detail: - - - -

Special considerations for logging handlers

- -When a request has internally redirected, there is the question of -what to log. Apache handles this by bundling the entire chain of -redirects into a list of request_rec structures which are -threaded through the r->prev and r->next -pointers. The request_rec which is passed to the logging -handlers in such cases is the one which was originally built for the -initial request from the client; note that the bytes_sent field will -only be correct in the last request in the chain (the one for which a -response was actually sent). - -

Resource allocation and resource pools

-

-One of the problems of writing and designing a server-pool server is -that of preventing leakage, that is, allocating resources (memory, -open files, etc.), without subsequently releasing them. The resource -pool machinery is designed to make it easy to prevent this from -happening, by allowing resource to be allocated in such a way that -they are automatically released when the server is done with -them. -

-

-The way this works is as follows: the memory which is allocated, file -opened, etc., to deal with a particular request are tied to a -resource pool which is allocated for the request. The pool -is a data structure which itself tracks the resources in question. -

-

-When the request has been processed, the pool is cleared. At -that point, all the memory associated with it is released for reuse, -all files associated with it are closed, and any other clean-up -functions which are associated with the pool are run. When this is -over, we can be confident that all the resource tied to the pool have -been released, and that none of them have leaked. -

-

-Server restarts, and allocation of memory and resources for per-server -configuration, are handled in a similar way. There is a -configuration pool, which keeps track of resources which were -allocated while reading the server configuration files, and handling -the commands therein (for instance, the memory that was allocated for -per-server module configuration, log files and other files that were -opened, and so forth). When the server restarts, and has to reread -the configuration files, the configuration pool is cleared, and so the -memory and file descriptors which were taken up by reading them the -last time are made available for reuse. -

-

-It should be noted that use of the pool machinery isn't generally -obligatory, except for situations like logging handlers, where you -really need to register cleanups to make sure that the log file gets -closed when the server restarts (this is most easily done by using the -function ap_pfopen, which also -arranges for the underlying file descriptor to be closed before any -child processes, such as for CGI scripts, are execed), or -in case you are using the timeout machinery (which isn't yet even -documented here). However, there are two benefits to using it: -resources allocated to a pool never leak (even if you allocate a -scratch string, and just forget about it); also, for memory -allocation, ap_palloc is generally faster than -malloc. -

-

-We begin here by describing how memory is allocated to pools, and then -discuss how other resources are tracked by the resource pool -machinery. -

-

Allocation of memory in pools

-

-Memory is allocated to pools by calling the function -ap_palloc, which takes two arguments, one being a pointer to -a resource pool structure, and the other being the amount of memory to -allocate (in chars). Within handlers for handling -requests, the most common way of getting a resource pool structure is -by looking at the pool slot of the relevant -request_rec; hence the repeated appearance of the -following idiom in module code: -

-
-int my_handler(request_rec *r)
-{
-    struct my_structure *foo;
-    ...
-
-    foo = (foo *)ap_palloc (r->pool, sizeof(my_structure));
-}
-
-

-Note that there is no ap_pfree --- -ap_palloced memory is freed only when the associated -resource pool is cleared. This means that ap_palloc does not -have to do as much accounting as malloc(); all it does in -the typical case is to round up the size, bump a pointer, and do a -range check. -

-

-(It also raises the possibility that heavy use of ap_palloc -could cause a server process to grow excessively large. There are -two ways to deal with this, which are dealt with below; briefly, you -can use malloc, and try to be sure that all of the memory -gets explicitly freed, or you can allocate a sub-pool of -the main pool, allocate your memory in the sub-pool, and clear it out -periodically. The latter technique is discussed in the section on -sub-pools below, and is used in the directory-indexing code, in order -to avoid excessive storage allocation when listing directories with -thousands of files). -

-

Allocating initialized memory

-

-There are functions which allocate initialized memory, and are -frequently useful. The function ap_pcalloc has the same -interface as ap_palloc, but clears out the memory it -allocates before it returns it. The function ap_pstrdup -takes a resource pool and a char * as arguments, and -allocates memory for a copy of the string the pointer points to, -returning a pointer to the copy. Finally ap_pstrcat is a -varargs-style function, which takes a pointer to a resource pool, and -at least two char * arguments, the last of which must be -NULL. It allocates enough memory to fit copies of each -of the strings, as a unit; for instance: -

-
-     ap_pstrcat (r->pool, "foo", "/", "bar", NULL);
-
-

-returns a pointer to 8 bytes worth of memory, initialized to -"foo/bar". -

-

Commonly-used pools in the Apache Web server

-

-A pool is really defined by its lifetime more than anything else. There -are some static pools in http_main which are passed to various -non-http_main functions as arguments at opportune times. Here they are: -

-
-
permanent_pool -
-
- -
-
pconf -
-
- -
-
ptemp -
-
- -
-
pchild -
-
- -
-
ptrans -
-
- -
-
r->pool -
-
- -
-
-

-For almost everything folks do, r->pool is the pool to use. But you -can see how other lifetimes, such as pchild, are useful to some -modules... such as modules that need to open a database connection once -per child, and wish to clean it up when the child dies. -

-

-You can also see how some bugs have manifested themself, such as setting -connection->user to a value from r->pool -- in this case -connection exists -for the lifetime of ptrans, which is longer than r->pool (especially if -r->pool is a subrequest!). So the correct thing to do is to allocate -from connection->pool. -

-

-And there was another interesting bug in mod_include/mod_cgi. You'll see -in those that they do this test to decide if they should use r->pool -or r->main->pool. In this case the resource that they are registering -for cleanup is a child process. If it were registered in r->pool, -then the code would wait() for the child when the subrequest finishes. -With mod_include this could be any old #include, and the delay can be up -to 3 seconds... and happened quite frequently. Instead the subprocess -is registered in r->main->pool which causes it to be cleaned up when -the entire request is done -- i.e., after the output has been sent to -the client and logging has happened. -

-

Tracking open files, etc.

-

-As indicated above, resource pools are also used to track other sorts -of resources besides memory. The most common are open files. The -routine which is typically used for this is ap_pfopen, which -takes a resource pool and two strings as arguments; the strings are -the same as the typical arguments to fopen, e.g., -

-
-     ...
-     FILE *f = ap_pfopen (r->pool, r->filename, "r");
-
-     if (f == NULL) { ... } else { ... }
-
-

-There is also a ap_popenf routine, which parallels the -lower-level open system call. Both of these routines -arrange for the file to be closed when the resource pool in question -is cleared. -

-

-Unlike the case for memory, there are functions to close -files allocated with ap_pfopen, and ap_popenf, -namely ap_pfclose and ap_pclosef. (This is -because, on many systems, the number of files which a single process -can have open is quite limited). It is important to use these -functions to close files allocated with ap_pfopen and -ap_popenf, since to do otherwise could cause fatal errors on -systems such as Linux, which react badly if the same -FILE* is closed more than once. -

-

-(Using the close functions is not mandatory, since the -file will eventually be closed regardless, but you should consider it -in cases where your module is opening, or could open, a lot of files). -

-

Other sorts of resources --- cleanup functions

-
-More text goes here. Describe the the cleanup primitives in terms of -which the file stuff is implemented; also, spawn_process. -
-

-Pool cleanups live until clear_pool() is called: clear_pool(a) recursively -calls destroy_pool() on all subpools of a; then calls all the cleanups for a; -then releases all the memory for a. destroy_pool(a) calls clear_pool(a) -and then releases the pool structure itself. i.e., clear_pool(a) doesn't -delete a, it just frees up all the resources and you can start using it -again immediately. -

-

Fine control --- creating and dealing with sub-pools, with a note -on sub-requests

- -On rare occasions, too-free use of ap_palloc() and the -associated primitives may result in undesirably profligate resource -allocation. You can deal with such a case by creating a -sub-pool, allocating within the sub-pool rather than the main -pool, and clearing or destroying the sub-pool, which releases the -resources which were associated with it. (This really is a -rare situation; the only case in which it comes up in the standard -module set is in case of listing directories, and then only with -very large directories. Unnecessary use of the primitives -discussed here can hair up your code quite a bit, with very little -gain).

- -The primitive for creating a sub-pool is ap_make_sub_pool, -which takes another pool (the parent pool) as an argument. When the -main pool is cleared, the sub-pool will be destroyed. The sub-pool -may also be cleared or destroyed at any time, by calling the functions -ap_clear_pool and ap_destroy_pool, respectively. -(The difference is that ap_clear_pool frees resources -associated with the pool, while ap_destroy_pool also -deallocates the pool itself. In the former case, you can allocate new -resources within the pool, and clear it again, and so forth; in the -latter case, it is simply gone).

- -One final note --- sub-requests have their own resource pools, which -are sub-pools of the resource pool for the main request. The polite -way to reclaim the resources associated with a sub request which you -have allocated (using the ap_sub_req_... functions) -is ap_destroy_sub_req, which frees the resource pool. -Before calling this function, be sure to copy anything that you care -about which might be allocated in the sub-request's resource pool into -someplace a little less volatile (for instance, the filename in its -request_rec structure).

- -(Again, under most circumstances, you shouldn't feel obliged to call -this function; only 2K of memory or so are allocated for a typical sub -request, and it will be freed anyway when the main request pool is -cleared. It is only when you are allocating many, many sub-requests -for a single main request that you should seriously consider the -ap_destroy_... functions). - -

Configuration, commands and the like

- -One of the design goals for this server was to maintain external -compatibility with the NCSA 1.3 server --- that is, to read the same -configuration files, to process all the directives therein correctly, -and in general to be a drop-in replacement for NCSA. On the other -hand, another design goal was to move as much of the server's -functionality into modules which have as little as possible to do with -the monolithic server core. The only way to reconcile these goals is -to move the handling of most commands from the central server into the -modules.

- -However, just giving the modules command tables is not enough to -divorce them completely from the server core. The server has to -remember the commands in order to act on them later. That involves -maintaining data which is private to the modules, and which can be -either per-server, or per-directory. Most things are per-directory, -including in particular access control and authorization information, -but also information on how to determine file types from suffixes, -which can be modified by AddType and -DefaultType directives, and so forth. In general, the -governing philosophy is that anything which can be made -configurable by directory should be; per-server information is -generally used in the standard set of modules for information like -Aliases and Redirects which come into play -before the request is tied to a particular place in the underlying -file system.

- -Another requirement for emulating the NCSA server is being able to -handle the per-directory configuration files, generally called -.htaccess files, though even in the NCSA server they can -contain directives which have nothing at all to do with access -control. Accordingly, after URI -> filename translation, but before -performing any other phase, the server walks down the directory -hierarchy of the underlying filesystem, following the translated -pathname, to read any .htaccess files which might be -present. The information which is read in then has to be -merged with the applicable information from the server's own -config files (either from the <Directory> sections -in access.conf, or from defaults in -srm.conf, which actually behaves for most purposes almost -exactly like <Directory />).

- -Finally, after having served a request which involved reading -.htaccess files, we need to discard the storage allocated -for handling them. That is solved the same way it is solved wherever -else similar problems come up, by tying those structures to the -per-transaction resource pool.

- -

Per-directory configuration structures

- -Let's look out how all of this plays out in mod_mime.c, -which defines the file typing handler which emulates the NCSA server's -behavior of determining file types from suffixes. What we'll be -looking at, here, is the code which implements the -AddType and AddEncoding commands. These -commands can appear in .htaccess files, so they must be -handled in the module's private per-directory data, which in fact, -consists of two separate tables for MIME types and -encoding information, and is declared as follows: - -
-typedef struct {
-    table *forced_types;      /* Additional AddTyped stuff */
-    table *encoding_types;    /* Added with AddEncoding... */
-} mime_dir_config;
-
- -When the server is reading a configuration file, or -<Directory> section, which includes one of the MIME -module's commands, it needs to create a mime_dir_config -structure, so those commands have something to act on. It does this -by invoking the function it finds in the module's `create per-dir -config slot', with two arguments: the name of the directory to which -this configuration information applies (or NULL for -srm.conf), and a pointer to a resource pool in which the -allocation should happen.

- -(If we are reading a .htaccess file, that resource pool -is the per-request resource pool for the request; otherwise it is a -resource pool which is used for configuration data, and cleared on -restarts. Either way, it is important for the structure being created -to vanish when the pool is cleared, by registering a cleanup on the -pool if necessary).

- -For the MIME module, the per-dir config creation function just -ap_pallocs the structure above, and a creates a couple of -tables to fill it. That looks like this: - -

-void *create_mime_dir_config (pool *p, char *dummy)
-{
-    mime_dir_config *new =
-      (mime_dir_config *) ap_palloc (p, sizeof(mime_dir_config));
-
-    new->forced_types = ap_make_table (p, 4);
-    new->encoding_types = ap_make_table (p, 4);
-
-    return new;
-}
-
- -Now, suppose we've just read in a .htaccess file. We -already have the per-directory configuration structure for the next -directory up in the hierarchy. If the .htaccess file we -just read in didn't have any AddType or -AddEncoding commands, its per-directory config structure -for the MIME module is still valid, and we can just use it. -Otherwise, we need to merge the two structures somehow.

- -To do that, the server invokes the module's per-directory config merge -function, if one is present. That function takes three arguments: -the two structures being merged, and a resource pool in which to -allocate the result. For the MIME module, all that needs to be done -is overlay the tables from the new per-directory config structure with -those from the parent: - -

-void *merge_mime_dir_configs (pool *p, void *parent_dirv, void *subdirv)
-{
-    mime_dir_config *parent_dir = (mime_dir_config *)parent_dirv;
-    mime_dir_config *subdir = (mime_dir_config *)subdirv;
-    mime_dir_config *new =
-      (mime_dir_config *)ap_palloc (p, sizeof(mime_dir_config));
-
-    new->forced_types = ap_overlay_tables (p, subdir->forced_types,
-                                        parent_dir->forced_types);
-    new->encoding_types = ap_overlay_tables (p, subdir->encoding_types,
-                                          parent_dir->encoding_types);
-
-    return new;
-}
-
- -As a note --- if there is no per-directory merge function present, the -server will just use the subdirectory's configuration info, and ignore -the parent's. For some modules, that works just fine (e.g., for the -includes module, whose per-directory configuration information -consists solely of the state of the XBITHACK), and for -those modules, you can just not declare one, and leave the -corresponding structure slot in the module itself NULL.

- -

Command handling

- -Now that we have these structures, we need to be able to figure out -how to fill them. That involves processing the actual -AddType and AddEncoding commands. To find -commands, the server looks in the module's command table. -That table contains information on how many arguments the commands -take, and in what formats, where it is permitted, and so forth. That -information is sufficient to allow the server to invoke most -command-handling functions with pre-parsed arguments. Without further -ado, let's look at the AddType command handler, which -looks like this (the AddEncoding command looks basically -the same, and won't be shown here): - -
-char *add_type(cmd_parms *cmd, mime_dir_config *m, char *ct, char *ext)
-{
-    if (*ext == '.') ++ext;
-    ap_table_set (m->forced_types, ext, ct);
-    return NULL;
-}
-
- -This command handler is unusually simple. As you can see, it takes -four arguments, two of which are pre-parsed arguments, the third being -the per-directory configuration structure for the module in question, -and the fourth being a pointer to a cmd_parms structure. -That structure contains a bunch of arguments which are frequently of -use to some, but not all, commands, including a resource pool (from -which memory can be allocated, and to which cleanups should be tied), -and the (virtual) server being configured, from which the module's -per-server configuration data can be obtained if required.

- -Another way in which this particular command handler is unusually -simple is that there are no error conditions which it can encounter. -If there were, it could return an error message instead of -NULL; this causes an error to be printed out on the -server's stderr, followed by a quick exit, if it is in -the main config files; for a .htaccess file, the syntax -error is logged in the server error log (along with an indication of -where it came from), and the request is bounced with a server error -response (HTTP error status, code 500).

- -The MIME module's command table has entries for these commands, which -look like this: - -

-command_rec mime_cmds[] = {
-{ "AddType", add_type, NULL, OR_FILEINFO, TAKE2,
-    "a mime type followed by a file extension" },
-{ "AddEncoding", add_encoding, NULL, OR_FILEINFO, TAKE2,
-    "an encoding (e.g., gzip), followed by a file extension" },
-{ NULL }
-};
-
- -The entries in these tables are: - - - -Finally, having set this all up, we have to use it. This is -ultimately done in the module's handlers, specifically for its -file-typing handler, which looks more or less like this; note that the -per-directory configuration structure is extracted from the -request_rec's per-directory configuration vector by using -the ap_get_module_config function. - -
-int find_ct(request_rec *r)
-{
-    int i;
-    char *fn = ap_pstrdup (r->pool, r->filename);
-    mime_dir_config *conf = (mime_dir_config *)
-             ap_get_module_config(r->per_dir_config, &mime_module);
-    char *type;
-
-    if (S_ISDIR(r->finfo.st_mode)) {
-        r->content_type = DIR_MAGIC_TYPE;
-        return OK;
-    }
-
-    if((i=ap_rind(fn,'.')) < 0) return DECLINED;
-    ++i;
-
-    if ((type = ap_table_get (conf->encoding_types, &fn[i])))
-    {
-        r->content_encoding = type;
-
-        /* go back to previous extension to try to use it as a type */
-
-        fn[i-1] = '\0';
-        if((i=ap_rind(fn,'.')) < 0) return OK;
-        ++i;
-    }
-
-    if ((type = ap_table_get (conf->forced_types, &fn[i])))
-    {
-        r->content_type = type;
-    }
-
-    return OK;
-}
-
-
- -

Side notes --- per-server configuration, virtual - servers, etc.

- -The basic ideas behind per-server module configuration are basically -the same as those for per-directory configuration; there is a creation -function and a merge function, the latter being invoked where a -virtual server has partially overridden the base server configuration, -and a combined structure must be computed. (As with per-directory -configuration, the default if no merge function is specified, and a -module is configured in some virtual server, is that the base -configuration is simply ignored).

- -The only substantial difference is that when a command needs to -configure the per-server private module data, it needs to go to the -cmd_parms data to get at it. Here's an example, from the -alias module, which also indicates how a syntax error can be returned -(note that the per-directory configuration argument to the command -handler is declared as a dummy, since the module doesn't actually have -per-directory config data): - -

-char *add_redirect(cmd_parms *cmd, void *dummy, char *f, char *url)
-{
-    server_rec *s = cmd->server;
-    alias_server_conf *conf = (alias_server_conf *)
-            ap_get_module_config(s->module_config,&alias_module);
-    alias_entry *new = ap_push_array (conf->redirects);
-
-    if (!ap_is_url (url)) return "Redirect to non-URL";
-
-    new->fake = f; new->real = url;
-    return NULL;
-}
-
- - diff --git a/docs/manual/developer/modules.html.en b/docs/manual/developer/modules.html.en deleted file mode 100644 index 8a265f5ec15..00000000000 --- a/docs/manual/developer/modules.html.en +++ /dev/null @@ -1,203 +0,0 @@ - - - -Converting Modules from Apache 1.3 to Apache 2.0 - - - - - - - -

From Apache 1.3 to Apache 2.0
Modules

- -

-This is a first attempt at writing the lessons I learned when trying to convert the mod_mmap_static module to Apache 2.0. It's by no means definitive and probably won't even be correct in some ways, but it's a start. -

-
-

The easier changes... -

- -

Cleanup Routines

-

-These now need to be of type ap_status_t and return a value of that type. Normally the return value will be APR_SUCCESS unless there is some need to signal an error in the cleanup. Be aware that even though you signal an error not all code yet checks and acts upon the error. -

- -

Initialisation Routines

- -

-These should now be renamed to better signify where they sit in the overall process. So the name gets a small change from mmap_init to mmap_post_config. The arguments passed have undergone a radical change and now look like -

- -

-Throughout Apache the old pools have been replced by the ap_context_t, though their use remains remarkably similar. -

- -

Data Types

-

-A lot of the data types have been moved into the APR. This means that some have had a name change, such as the one shown above. The following is a brief list of some of the changes that you are likely to have to make. -

-
- -

-The messier changes... -

-

Register Hooks

-

-The new architecture uses a series of hooks to provide for calling your functions. These you'll need to add to your module by way of a new function, static void register_hooks(void). The function is really reasonably straightforward once you understand what needs to be done. Each function that needs calling at some stage in the processing of a request needs to be registered, handlers do not. There are a number of phases where functions can be added, and for each you can specify with a high degree of control the relative order that the function will be called in. -

-

-This is the code that was added to mod_mmap_static -

-
-static void register_hooks(void)
-{
-    static const char * const aszPre[]={ "http_core.c",NULL };
-    ap_hook_post_config(mmap_post_config,NULL,NULL,HOOK_MIDDLE);
-    ap_hook_translate_name(mmap_static_xlat,aszPre,NULL,HOOK_LAST);
-};
-
-

-This registers 2 functions that need to be called, one in the post_config stage (virtually every module will need this one) and one for the translate_name phase. note that while there are different function names the format of each is identical. So what is the format? -

-

-ap_hook_[phase_name](function_name, predecessors, successors, position); -

-

-There are 3 hook positions defined... -

- -

-To define the position you use the position and then modify it with the predecessors and successors. each of the modifiers can be a list of functions that should be called, either before the function is run (predecessors) or after the function has run (successors). -

-

-In the mod_mmap_static case I didn't care about the post_config stage, but the mmap_static_xlat MUST be called after the core module had done it's name translation, hence the use of the aszPre to define a modifier to the position HOOK_LAST. -

-

Module Definition

- -

-There are now a lot fewer stages to worry about when creating your module definition. The old defintion looked like -

-
-module MODULE_VAR_EXPORT [module_name]_module =
-{
-    STANDARD_MODULE_STUFF,
-    /* initializer */
-    /* dir config creater */
-    /* dir merger --- default is to override */
-    /* server config */
-    /* merge server config */
-    /* command handlers */
-    /* handlers */
-    /* filename translation */
-    /* check_user_id */
-    /* check auth */
-    /* check access */
-    /* type_checker */
-    /* fixups */
-    /* logger */
-    /* header parser */
-    /* child_init */
-    /* child_exit */
-    /* post read-request */
-};
-
-

-The new structure is a great deal simpler... -

-
-module MODULE_VAR_EXPORT [module_name]_module =
-{
-    STANDARD20_MODULE_STUFF,
-    /* create per-directory config structures */
-    /* merge per-directory config structures  */
-    /* create per-server config structures    */
-    /* merge per-server config structures     */
-    /* command handlers */
-    /* handlers */
-    /* register hooks */
- };
-
-

-Some of these read directly across, some don't. I'll try to summarise what should be done below. -

-

-The stages that read directly across : -

- -

-The remainder of the old functions should be registered as hooks. There are the following hook stages defined so far... -

- -