]> git.ipfire.org Git - thirdparty/suricata.git/commitdiff
http: Use libhtp-rs.
authorTodd Mortimer <richard.mortimer@cyber.gc.ca>
Mon, 26 Jun 2023 20:01:50 +0000 (20:01 +0000)
committerVictor Julien <victor@inliniac.net>
Fri, 4 Apr 2025 00:35:12 +0000 (02:35 +0200)
Ticket: #2696

There are a lot of changes here, which are described below.

In general these changes are renaming constants to conform to the
libhtp-rs versions (which are generated by cbindgen); making all htp
types opaque and changing struct->member references to
htp_struct_member() function calls; and a handful of changes to offload
functionality onto libhtp-rs from suricata, such as URI normalization
and transaction cleanup.

Functions introduced to handle opaque htp_tx_t:
- tx->parsed_uri => htp_tx_parsed_uri(tx)
- tx->parsed_uri->path => htp_uri_path(htp_tx_parsed_uri(tx)
- tx->parsed_uri->hostname => htp_uri_hostname(htp_tx_parsed_uri(tx))
- htp_tx_get_user_data() => htp_tx_user_data(tx)
- htp_tx_is_http_2_upgrade(tx) convenience function introduced to detect response status 101
  and “Upgrade: h2c" header.

Functions introduced to handle opaque htp_tx_data_t:
- d->len => htp_tx_data_len()
- d->data => htp_tx_data_data()
- htp_tx_data_tx(data) function to get the htp_tx_t from the htp_tx_data_t
- htp_tx_data_is_empty(data) convenience function introduced to test if the data is empty.

Other changes:

Build libhtp-rs as a crate inside rust. Update autoconf to no longer
use libhtp as an external dependency. Remove HAVE_HTP feature defines
since they are no longer needed.

Make function arguments and return values const where possible

htp_tx_destroy(tx) will now free an incomplete transaction

htp_time_t replaced with standard struct timeval

Callbacks from libhtp now provide the htp_connp_t and the htp_tx_data_t
as separate arguments. This means the connection parser is no longer
fetched from the transaction inside callbacks.

SCHTPGenerateNormalizedUri() functionality moved inside libhtp-rs, which
now provides normalized URI values.
The normalized URI is available with accessor function: htp_tx_normalized_uri()
Configuration settings added to control the behaviour of the URI normalization:
- htp_config_set_normalized_uri_include_all()
- htp_config_set_plusspace_decode()
- htp_config_set_convert_lowercase()
- htp_config_set_double_decode_normalized_query()
- htp_config_set_double_decode_normalized_path()
- htp_config_set_backslash_convert_slashes()
- htp_config_set_bestfit_replacement_byte()
- htp_config_set_convert_lowercase()
- htp_config_set_nul_encoded_terminates()
- htp_config_set_nul_raw_terminates()
- htp_config_set_path_separators_compress()
- htp_config_set_path_separators_decode()
- htp_config_set_u_encoding_decode()
- htp_config_set_url_encoding_invalid_handling()
- htp_config_set_utf8_convert_bestfit()
- htp_config_set_normalized_uri_include_all()
- htp_config_set_plusspace_decode()
Constants related to configuring uri normalization:
- HTP_URL_DECODE_PRESERVE_PERCENT => HTP_URL_ENCODING_HANDLING_PRESERVE_PERCENT
- HTP_URL_DECODE_REMOVE_PERCENT => HTP_URL_ENCODING_HANDLING_REMOVE_PERCENT
- HTP_URL_DECODE_PROCESS_INVALID => HTP_URL_ENCODING_HANDLING_PROCESS_INVALID

htp_config_set_field_limits(soft_limit, hard_limit) changed to
htp_config_set_field_limit(limit) because libhtp didn't implement soft
limits.

libhtp logging API updated to provide HTP_LOG_CODE constants along with
the message. This eliminates the need to perform string matching on
message text to map log messages to HTTP_DECODER_EVENT values, and the
HTP_LOG_CODE values can be used directly. In support of this,
HTP_DECODER_EVENT values are mapped to their corresponding HTP_LOG_CODE
values.

New log events to describe additional anomalies:
HTP_LOG_CODE_REQUEST_TOO_MANY_LZMA_LAYERS
HTP_LOG_CODE_RESPONSE_TOO_MANY_LZMA_LAYERS
HTP_LOG_CODE_PROTOCOL_CONTAINS_EXTRA_DATA
HTP_LOG_CODE_CONTENT_LENGTH_EXTRA_DATA_START
HTP_LOG_CODE_CONTENT_LENGTH_EXTRA_DATA_END
HTP_LOG_CODE_SWITCHING_PROTO_WITH_CONTENT_LENGTH
HTP_LOG_CODE_DEFORMED_EOL
HTP_LOG_CODE_PARSER_STATE_ERROR
HTP_LOG_CODE_MISSING_OUTBOUND_TRANSACTION_DATA
HTP_LOG_CODE_MISSING_INBOUND_TRANSACTION_DATA
HTP_LOG_CODE_ZERO_LENGTH_DATA_CHUNKS
HTP_LOG_CODE_REQUEST_LINE_UNKNOWN_METHOD
HTP_LOG_CODE_REQUEST_LINE_UNKNOWN_METHOD_NO_PROTOCOL
HTP_LOG_CODE_REQUEST_LINE_UNKNOWN_METHOD_INVALID_PROTOCOL
HTP_LOG_CODE_REQUEST_LINE_NO_PROTOCOL
HTP_LOG_CODE_RESPONSE_LINE_INVALID_PROTOCOL
HTP_LOG_CODE_RESPONSE_LINE_INVALID_RESPONSE_STATUS
HTP_LOG_CODE_RESPONSE_BODY_INTERNAL_ERROR
HTP_LOG_CODE_REQUEST_BODY_DATA_CALLBACK_ERROR
HTP_LOG_CODE_RESPONSE_INVALID_EMPTY_NAME
HTP_LOG_CODE_REQUEST_INVALID_EMPTY_NAME
HTP_LOG_CODE_RESPONSE_INVALID_LWS_AFTER_NAME
HTP_LOG_CODE_RESPONSE_HEADER_NAME_NOT_TOKEN
HTP_LOG_CODE_REQUEST_INVALID_LWS_AFTER_NAME
HTP_LOG_CODE_LZMA_DECOMPRESSION_DISABLED
HTP_LOG_CODE_CONNECTION_ALREADY_OPEN
HTP_LOG_CODE_COMPRESSION_BOMB_DOUBLE_LZMA
HTP_LOG_CODE_INVALID_CONTENT_ENCODING
HTP_LOG_CODE_INVALID_GAP
HTP_LOG_CODE_ERROR

The new htp_log API supports consuming log messages more easily than
walking a list and tracking the current offset. Internally, libhtp-rs
now provides log messages as a queue of htp_log_t, which means the
application can simply call htp_conn_next_log() to fetch the next log
message until the queue is empty. Once the application is done with a
log message, they can call htp_log_free() to dispose of it.

Functions supporting htp_log_t:
htp_conn_next_log(conn) - Get the next log message
htp_log_message(log) - To get the text of the message
htp_log_code(log) - To get the HTP_LOG_CODE value
htp_log_free(log) - To free the htp_log_t

258 files changed:
.github/PULL_REQUEST_TEMPLATE.md
.github/workflows/builds.yml
.github/workflows/codeql.yml
.github/workflows/commits.yml
.github/workflows/docs.yml
.github/workflows/formatting.yml
.github/workflows/prepare-deps.yml
.github/workflows/rust-checks.yml
.github/workflows/rust.yml
.github/workflows/scan-build.yml
.gitignore
Makefile.am
configure.ac
doc/userguide/devguide/codebase/installation-from-git.rst
doc/userguide/devguide/codebase/testing.rst
doc/userguide/lua/lua-functions.rst
doc/userguide/upgrade.rst
doxygen.cfg
examples/lib/custom/Makefile.am
examples/lib/simple/Makefile.am
libsuricata-config.in
requirements.txt
rules/http-events.rules
rust/Cargo.lock.in
rust/Cargo.toml.in
rust/Makefile.am
rust/htp/.gitignore [new file with mode: 0644]
rust/htp/Cargo.toml [new file with mode: 0644]
rust/htp/LICENSE [new file with mode: 0644]
rust/htp/README.md [new file with mode: 0644]
rust/htp/cbindgen.toml [new file with mode: 0644]
rust/htp/fuzz/Cargo.toml [new file with mode: 0644]
rust/htp/fuzz/fuzz_targets/fuzz_htp.rs [new file with mode: 0644]
rust/htp/src/bstr.rs [new file with mode: 0644]
rust/htp/src/c_api/bstr.rs [new file with mode: 0644]
rust/htp/src/c_api/config.rs [new file with mode: 0644]
rust/htp/src/c_api/connection.rs [new file with mode: 0644]
rust/htp/src/c_api/connection_parser.rs [new file with mode: 0644]
rust/htp/src/c_api/header.rs [new file with mode: 0644]
rust/htp/src/c_api/log.rs [new file with mode: 0644]
rust/htp/src/c_api/mod.rs [new file with mode: 0644]
rust/htp/src/c_api/transaction.rs [new file with mode: 0644]
rust/htp/src/c_api/uri.rs [new file with mode: 0644]
rust/htp/src/config.rs [new file with mode: 0644]
rust/htp/src/connection.rs [new file with mode: 0644]
rust/htp/src/connection_parser.rs [new file with mode: 0644]
rust/htp/src/decompressors.rs [new file with mode: 0644]
rust/htp/src/error.rs [new file with mode: 0644]
rust/htp/src/headers.rs [new file with mode: 0644]
rust/htp/src/hook.rs [new file with mode: 0644]
rust/htp/src/lib.rs [new file with mode: 0644]
rust/htp/src/log.rs [new file with mode: 0644]
rust/htp/src/parsers.rs [new file with mode: 0644]
rust/htp/src/request.rs [new file with mode: 0644]
rust/htp/src/request_generic.rs [new file with mode: 0644]
rust/htp/src/response.rs [new file with mode: 0644]
rust/htp/src/response_generic.rs [new file with mode: 0644]
rust/htp/src/table.rs [new file with mode: 0644]
rust/htp/src/test/common.rs [new file with mode: 0644]
rust/htp/src/test/files/00-adhoc.t [new file with mode: 0644]
rust/htp/src/test/files/01-get.t [new file with mode: 0644]
rust/htp/src/test/files/02-header-test-apache2.t [new file with mode: 0644]
rust/htp/src/test/files/03-post-urlencoded.t [new file with mode: 0644]
rust/htp/src/test/files/04-post-urlencoded-chunked.t [new file with mode: 0644]
rust/htp/src/test/files/05-expect.t [new file with mode: 0644]
rust/htp/src/test/files/06-uri-normal.t [new file with mode: 0644]
rust/htp/src/test/files/07-pipelined-connection.t [new file with mode: 0644]
rust/htp/src/test/files/08-not-pipelined-connection.t [new file with mode: 0644]
rust/htp/src/test/files/09-multi-packet-request-head.t [new file with mode: 0644]
rust/htp/src/test/files/10-host-in-headers.t [new file with mode: 0644]
rust/htp/src/test/files/100-auth-digest-escaped-quote.t [new file with mode: 0644]
rust/htp/src/test/files/101-request-cookies-2.t [new file with mode: 0644]
rust/htp/src/test/files/102-request-cookies-3.t [new file with mode: 0644]
rust/htp/src/test/files/103-request-cookies-4.t [new file with mode: 0644]
rust/htp/src/test/files/104-request-cookies-5.t [new file with mode: 0644]
rust/htp/src/test/files/105-expect-100.t [new file with mode: 0644]
rust/htp/src/test/files/106-tunnelled-1.t [new file with mode: 0644]
rust/htp/src/test/files/107-response_unknown_status.t [new file with mode: 0644]
rust/htp/src/test/files/108-response-headers-cr-only.t [new file with mode: 0644]
rust/htp/src/test/files/109-response-headers-deformed-eol.t [new file with mode: 0644]
rust/htp/src/test/files/11-response-stream-closure.t [new file with mode: 0644]
rust/htp/src/test/files/110-response-folded-headers-2.t [new file with mode: 0644]
rust/htp/src/test/files/111-response-headers-chunked.t [new file with mode: 0644]
rust/htp/src/test/files/112-response-headers-chunked-2.t [new file with mode: 0644]
rust/htp/src/test/files/113-response-multipart-byte-ranges.t [new file with mode: 0644]
rust/htp/src/test/files/114-http-2-upgrade.t [new file with mode: 0644]
rust/htp/src/test/files/115-auth-bearer.t [new file with mode: 0644]
rust/htp/src/test/files/116-request-compression.t [new file with mode: 0644]
rust/htp/src/test/files/117-request-response-compression.t [new file with mode: 0644]
rust/htp/src/test/files/118-post.t [new file with mode: 0644]
rust/htp/src/test/files/119-ambiguous-eol.t [new file with mode: 0644]
rust/htp/src/test/files/12-connect-request.t [new file with mode: 0644]
rust/htp/src/test/files/120-request-gap.t [new file with mode: 0644]
rust/htp/src/test/files/121-response-gap.t [new file with mode: 0644]
rust/htp/src/test/files/122-response-body-data.t [new file with mode: 0644]
rust/htp/src/test/files/123-response-header-bug.t [new file with mode: 0644]
rust/htp/src/test/files/124-response-incomplete.t [new file with mode: 0644]
rust/htp/src/test/files/13-compressed-response-gzip-ct.t [new file with mode: 0644]
rust/htp/src/test/files/14-compressed-response-gzip-chunked.t [new file with mode: 0644]
rust/htp/src/test/files/15-connect-complete.t [new file with mode: 0644]
rust/htp/src/test/files/16-connect-extra.t [new file with mode: 0644]
rust/htp/src/test/files/17-multipart-1.t [new file with mode: 0644]
rust/htp/src/test/files/18-compressed-response-deflate.t [new file with mode: 0644]
rust/htp/src/test/files/19-urlencoded-test.t [new file with mode: 0644]
rust/htp/src/test/files/20-ambiguous-host.t [new file with mode: 0644]
rust/htp/src/test/files/21-http09.t [new file with mode: 0644]
rust/htp/src/test/files/22-http_1_1-host_missing [new file with mode: 0644]
rust/htp/src/test/files/22-php-param-processing.t [new file with mode: 0644]
rust/htp/src/test/files/23-http09-multiple.t [new file with mode: 0644]
rust/htp/src/test/files/24-http09-explicit.t [new file with mode: 0644]
rust/htp/src/test/files/25-small-chunks.t [new file with mode: 0644]
rust/htp/src/test/files/26-request-headers-raw.t [new file with mode: 0644]
rust/htp/src/test/files/27-request-trailer-raw.t [new file with mode: 0644]
rust/htp/src/test/files/28-response-headers-raw.t [new file with mode: 0644]
rust/htp/src/test/files/29-response-trailer-raw.t [new file with mode: 0644]
rust/htp/src/test/files/30-get-ipv6.t [new file with mode: 0644]
rust/htp/src/test/files/31-get-request-line-nul.t [new file with mode: 0644]
rust/htp/src/test/files/32-invalid-hostname.t [new file with mode: 0644]
rust/htp/src/test/files/33-invalid-hostname.t [new file with mode: 0644]
rust/htp/src/test/files/34-invalid-hostname.t [new file with mode: 0644]
rust/htp/src/test/files/35-early-response.t [new file with mode: 0644]
rust/htp/src/test/files/36-invalid-request-1-invalid-c-l.t [new file with mode: 0644]
rust/htp/src/test/files/37-invalid-request-2-t-e-and-c-l.t [new file with mode: 0644]
rust/htp/src/test/files/38-invalid-request-3-invalid-t-e.t [new file with mode: 0644]
rust/htp/src/test/files/39-auto-destroy-crash.t [new file with mode: 0644]
rust/htp/src/test/files/40-auth-basic.t [new file with mode: 0644]
rust/htp/src/test/files/41-auth-digest.t [new file with mode: 0644]
rust/htp/src/test/files/42-unknown-method_only.t [new file with mode: 0644]
rust/htp/src/test/files/43-invalid-protocol.t [new file with mode: 0644]
rust/htp/src/test/files/44-auth-basic-invalid.t [new file with mode: 0644]
rust/htp/src/test/files/45-auth-digest-unquoted-username.t [new file with mode: 0644]
rust/htp/src/test/files/46-auth-digest-invalid-username.t [new file with mode: 0644]
rust/htp/src/test/files/47-auth-unrecognized.t [new file with mode: 0644]
rust/htp/src/test/files/48-invalid-response-headers-1.t [new file with mode: 0644]
rust/htp/src/test/files/49-invalid-response-headers-2.t [new file with mode: 0644]
rust/htp/src/test/files/51-get-ipv6-invalid.t [new file with mode: 0644]
rust/htp/src/test/files/52-invalid-path.t [new file with mode: 0644]
rust/htp/src/test/files/53-path-utf8-none.t [new file with mode: 0644]
rust/htp/src/test/files/54-path-utf8-valid.t [new file with mode: 0644]
rust/htp/src/test/files/55-path-utf8-overlong-2.t [new file with mode: 0644]
rust/htp/src/test/files/56-path-utf8-overlong-3.t [new file with mode: 0644]
rust/htp/src/test/files/57-path-utf8-overlong-4.t [new file with mode: 0644]
rust/htp/src/test/files/58-path-utf8-invalid.t [new file with mode: 0644]
rust/htp/src/test/files/59-path-utf8-fullwidth.t [new file with mode: 0644]
rust/htp/src/test/files/60-request-cookies-1.t [new file with mode: 0644]
rust/htp/src/test/files/61-empty-line-between-requests.t [new file with mode: 0644]
rust/htp/src/test/files/62-post-no-body.t [new file with mode: 0644]
rust/htp/src/test/files/63-post-chunked-invalid-1.t [new file with mode: 0644]
rust/htp/src/test/files/64-post-chunked-invalid-2.t [new file with mode: 0644]
rust/htp/src/test/files/65-post-chunked-invalid-3.t [new file with mode: 0644]
rust/htp/src/test/files/66-post-chunked-split-chunk.t [new file with mode: 0644]
rust/htp/src/test/files/67-long-request-line.t [new file with mode: 0644]
rust/htp/src/test/files/68-invalid-request-header.t [new file with mode: 0644]
rust/htp/src/test/files/69-long-response-header.t [new file with mode: 0644]
rust/htp/src/test/files/70-response-invalid-chunk-length.t [new file with mode: 0644]
rust/htp/src/test/files/71-response-split-chunk.t [new file with mode: 0644]
rust/htp/src/test/files/72-response-split-body.t [new file with mode: 0644]
rust/htp/src/test/files/73-response-te-and-cl.t [new file with mode: 0644]
rust/htp/src/test/files/74-response-multiple-cl.t [new file with mode: 0644]
rust/htp/src/test/files/75-response-invalid-cl.t [new file with mode: 0644]
rust/htp/src/test/files/76-response-no-body.t [new file with mode: 0644]
rust/htp/src/test/files/77-response-folded-headers.t [new file with mode: 0644]
rust/htp/src/test/files/78-response-no-status-headers.t [new file with mode: 0644]
rust/htp/src/test/files/79-connect-invalid-hostport.t [new file with mode: 0644]
rust/htp/src/test/files/80-hostname-invalid-1.t [new file with mode: 0644]
rust/htp/src/test/files/81-hostname-invalid-2.t [new file with mode: 0644]
rust/htp/src/test/files/82-put.t [new file with mode: 0644]
rust/htp/src/test/files/83-auth-digest-invalid-username-2.t [new file with mode: 0644]
rust/htp/src/test/files/84-response-no-status-headers-2.t [new file with mode: 0644]
rust/htp/src/test/files/85-zero-byte-request-timeout.t [new file with mode: 0644]
rust/htp/src/test/files/86-partial-request-timeout.t [new file with mode: 0644]
rust/htp/src/test/files/87-issue-55-incorrect-host-ambiguous-warning.t [new file with mode: 0644]
rust/htp/src/test/files/88-response-multiple-cl-mismatch.t [new file with mode: 0644]
rust/htp/src/test/files/89-get-whitespace.t [new file with mode: 0644]
rust/htp/src/test/files/90-request-uri-too-large.t [new file with mode: 0644]
rust/htp/src/test/files/91-request-unexpected-body.t [new file with mode: 0644]
rust/htp/src/test/files/92-http_0_9-method_only.t [new file with mode: 0644]
rust/htp/src/test/files/93-compressed-response-deflateasgzip.t [new file with mode: 0644]
rust/htp/src/test/files/94-compressed-response-multiple.t [new file with mode: 0644]
rust/htp/src/test/files/95-compressed-response-gzipasdeflate.t [new file with mode: 0644]
rust/htp/src/test/files/96-compressed-response-lzma.t [new file with mode: 0644]
rust/htp/src/test/files/97-requests-cut.t [new file with mode: 0644]
rust/htp/src/test/files/98-responses-cut.t [new file with mode: 0644]
rust/htp/src/test/files/99-get.t [new file with mode: 0644]
rust/htp/src/test/files/anchor.empty [new file with mode: 0644]
rust/htp/src/test/files/generate-gzip-tests.php [new file with mode: 0755]
rust/htp/src/test/files/gztest-01-minimal.gz [new file with mode: 0644]
rust/htp/src/test/files/gztest-02-fname.gz [new file with mode: 0644]
rust/htp/src/test/files/gztest-03-fcomment.gz [new file with mode: 0644]
rust/htp/src/test/files/gztest-04-fhcrc.gz [new file with mode: 0644]
rust/htp/src/test/files/gztest-05-fextra.gz [new file with mode: 0644]
rust/htp/src/test/files/gztest-06-ftext.gz [new file with mode: 0644]
rust/htp/src/test/files/gztest-07-freserved1.gz [new file with mode: 0644]
rust/htp/src/test/files/gztest-08-freserved2.gz [new file with mode: 0644]
rust/htp/src/test/files/gztest-09-freserved3.gz [new file with mode: 0644]
rust/htp/src/test/files/gztest-10-multipart.gz [new file with mode: 0644]
rust/htp/src/test/files/gztest-11-invalid-method.gz [new file with mode: 0644]
rust/htp/src/test/files/gztest-12-invalid-crc32.gz [new file with mode: 0644]
rust/htp/src/test/files/gztest-13-invalid-isize.gz [new file with mode: 0644]
rust/htp/src/test/files/gztest-14-invalid-xfl.gz [new file with mode: 0644]
rust/htp/src/test/files/gztest-15-invalid-fhcrc.gz [new file with mode: 0644]
rust/htp/src/test/files/http-close-headers.t [new file with mode: 0644]
rust/htp/src/test/files/http-evader-017.t [new file with mode: 0644]
rust/htp/src/test/files/http-evader-018.t [new file with mode: 0644]
rust/htp/src/test/files/http-evader-044.t [new file with mode: 0644]
rust/htp/src/test/files/http-evader-059.t [new file with mode: 0644]
rust/htp/src/test/files/http-evader-060.t [new file with mode: 0644]
rust/htp/src/test/files/http-evader-061.t [new file with mode: 0644]
rust/htp/src/test/files/http-evader-078.t [new file with mode: 0644]
rust/htp/src/test/files/http-evader-118.t [new file with mode: 0644]
rust/htp/src/test/files/http-evader-130.t [new file with mode: 0644]
rust/htp/src/test/files/http-evader-195.t [new file with mode: 0644]
rust/htp/src/test/files/http-evader-274.t [new file with mode: 0644]
rust/htp/src/test/files/http-evader-284.t [new file with mode: 0644]
rust/htp/src/test/files/http-evader-286.t [new file with mode: 0644]
rust/htp/src/test/files/http-evader-287.t [new file with mode: 0644]
rust/htp/src/test/files/http-evader-297.t [new file with mode: 0644]
rust/htp/src/test/files/http-evader-300.t [new file with mode: 0644]
rust/htp/src/test/files/http-evader-303.t [new file with mode: 0644]
rust/htp/src/test/files/http-evader-307.t [new file with mode: 0644]
rust/htp/src/test/files/http-evader-318.t [new file with mode: 0644]
rust/htp/src/test/files/http-evader-320.t [new file with mode: 0644]
rust/htp/src/test/files/http-evader-321.t [new file with mode: 0644]
rust/htp/src/test/files/http-evader-390.t [new file with mode: 0644]
rust/htp/src/test/files/http-evader-402.t [new file with mode: 0644]
rust/htp/src/test/files/http-evader-405.t [new file with mode: 0644]
rust/htp/src/test/files/http-evader-411.t [new file with mode: 0644]
rust/htp/src/test/files/http-evader-416.t [new file with mode: 0644]
rust/htp/src/test/files/http-evader-419.t [new file with mode: 0644]
rust/htp/src/test/files/http-evader-423.t [new file with mode: 0644]
rust/htp/src/test/files/http-start-from-response.t [new file with mode: 0644]
rust/htp/src/test/gunzip.rs [new file with mode: 0644]
rust/htp/src/test/hybrid.rs [new file with mode: 0644]
rust/htp/src/test/main.rs [new file with mode: 0644]
rust/htp/src/test/mod.rs [new file with mode: 0644]
rust/htp/src/transaction.rs [new file with mode: 0644]
rust/htp/src/transactions.rs [new file with mode: 0644]
rust/htp/src/unicode_bestfit_map.rs [new file with mode: 0644]
rust/htp/src/uri.rs [new file with mode: 0644]
rust/htp/src/urlencoded.rs [new file with mode: 0644]
rust/htp/src/utf8_decoder.rs [new file with mode: 0644]
rust/htp/src/util.rs [new file with mode: 0644]
rust/src/lib.rs
scripts/bundle.sh
scripts/check-setup.sh
scripts/clang-format.sh
src/Makefile.am
src/app-layer-htp-libhtp.c [deleted file]
src/app-layer-htp-libhtp.h [deleted file]
src/app-layer-htp.c
src/app-layer-htp.h
src/detect-http-headers-stub.h
src/detect-http-host.c
src/detect-http-protocol.c
src/detect-http-uri.c
src/output-json-http.c
src/suricata.c
src/util-lua-http.c

index f1f0ba439dac5f69b9f1e5dad12512b33dca3e01..e0bf26470dfd41ca48ecea601d838689d5a47588 100644 (file)
@@ -24,7 +24,7 @@ Describe changes:
 
 ### Provide values to any of the below to override the defaults.
 
-- To use an LibHTP, Suricata-Verify or Suricata-Update pull request,
+- To use a Suricata-Verify or Suricata-Update pull request,
   link to the pull request in the respective `_BRANCH` variable.
 - Leave unused overrides blank or remove.
 
@@ -32,5 +32,3 @@ SV_REPO=
 SV_BRANCH=
 SU_REPO=
 SU_BRANCH=
-LIBHTP_REPO=
-LIBHTP_BRANCH=
index 900fb5c8179a4a536ccdaf89c4ca53afda9b5e4e..add1c0a25ee95c64c0c566c78e196ea48925e7bc 100644 (file)
@@ -13,8 +13,6 @@ on:
       - "doc/**"
   workflow_dispatch:
     inputs:
-      LIBHTP_REPO:
-      LIBHTP_BRANCH:
       SU_REPO:
       SU_BRANCH:
       SV_REPO:
@@ -145,7 +143,6 @@ jobs:
         with:
           name: prep
           path: prep
-      - run: tar xvf prep/libhtp.tar.gz
       - run: tar xvf prep/suricata-update.tar.gz
       - run: tar xvf prep/suricata-verify.tar.gz
       - name: Configuring
@@ -293,7 +290,6 @@ jobs:
         with:
           name: prep
           path: prep
-      - run: tar xvf prep/libhtp.tar.gz
       - run: tar xvf prep/suricata-update.tar.gz
       - run: tar xvf prep/suricata-verify.tar.gz
       - name: Build
@@ -311,74 +307,6 @@ jobs:
         run: cargo clippy --all-features
         working-directory: rust
 
-  almalinux-9-non-bundled-libhtp:
-    name: AlmaLinux 9 Non-Bundled LibHTP
-    runs-on: ubuntu-latest
-    container: almalinux:9
-    needs: [prepare-deps, ubuntu-22-04-dist]
-    steps:
-      # Cache Rust stuff.
-      - name: Cache cargo registry
-        uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57
-        with:
-          path: ~/.cargo/registry
-          key: cargo-registry
-
-      - name: Determine number of CPUs
-        run: echo CPUS=$(nproc --all) >> $GITHUB_ENV
-
-      - name: Install system packages
-        run: |
-          dnf -y install dnf-plugins-core epel-release
-          dnf config-manager --set-enabled crb
-          dnf -y install \
-                autoconf \
-                automake \
-                cargo-vendor \
-                cbindgen \
-                diffutils \
-                numactl-devel \
-                dpdk-devel \
-                file-devel \
-                gcc \
-                gcc-c++ \
-                git \
-                jansson-devel \
-                jq \
-                libtool \
-                libyaml-devel \
-                libnfnetlink-devel \
-                libnetfilter_queue-devel \
-                libnet-devel \
-                libcap-ng-devel \
-                libevent-devel \
-                libmaxminddb-devel \
-                libpcap-devel \
-                libtool \
-                lz4-devel \
-                make \
-                pcre2-devel \
-                pkgconfig \
-                python3-devel \
-                python3-sphinx \
-                python3-yaml \
-                rust-toolset \
-                sudo \
-                which \
-                zlib-devel
-
-      - name: Download suricata.tar.gz
-        uses: actions/download-artifact@95815c38cf2ff2164869cbab79da8d1f422bc89e
-        with:
-          name: dist
-
-      - run: tar xf suricata-*.tar.gz --strip-components=1
-      - run: cd libhtp && ./configure --prefix=/usr/local
-      - run: cd libhtp && make -j ${{ env.CPUS }}
-      - run: cd libhtp && make install
-
-      - run: PKG_CONFIG_PATH=/usr/local/lib/pkgconfig ./configure --enable-non-bundled-htp --with-libhtp-includes=/usr/local/include --with-libhtp-libraries=/usr/local/lib
-
   rpms:
     name: Build RPMs
     runs-on: ubuntu-latest
@@ -391,7 +319,7 @@ jobs:
           - almalinux:9
           - fedora:41
     env:
-      skip: false
+      skip: true
     steps:
       - name: Cache cargo registry
         uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57
@@ -526,7 +454,6 @@ jobs:
         with:
           name: prep
           path: prep
-      - run: tar xvf prep/libhtp.tar.gz
       - run: tar xvf prep/suricata-update.tar.gz
       - run: tar xvf prep/suricata-verify.tar.gz
       - uses: ./.github/actions/install-cbindgen
@@ -704,7 +631,6 @@ jobs:
         with:
           name: prep
           path: prep
-      - run: tar xf prep/libhtp.tar.gz
       - run: tar xf prep/suricata-update.tar.gz
       - run: ./autogen.sh
       - run: ./configure --enable-warnings --disable-shared
@@ -793,7 +719,6 @@ jobs:
         with:
           name: prep
           path: prep
-      - run: tar xf prep/libhtp.tar.gz
       - run: tar xf prep/suricata-update.tar.gz
       - run: ./autogen.sh
       - run: CC="clang" CFLAGS="$DEFAULT_CFLAGS -Wshadow" ./configure --disable-shared
@@ -887,7 +812,6 @@ jobs:
         with:
           name: prep
           path: prep
-      - run: tar xf prep/libhtp.tar.gz
       - run: tar xf prep/suricata-update.tar.gz
       - run: ./autogen.sh
       - run: ./configure --enable-warnings --enable-debug --enable-unittests --disable-shared --enable-rust-strict --enable-hiredis --enable-nfqueue
@@ -973,7 +897,6 @@ jobs:
         with:
           name: prep
           path: prep
-      - run: tar xf prep/libhtp.tar.gz
       - run: tar xf prep/suricata-update.tar.gz
       - run: tar xf prep/suricata-verify.tar.gz
       - run: mkdir /home/suricata/suricata
@@ -1058,7 +981,6 @@ jobs:
         with:
           name: prep
           path: prep
-      - run: tar xf prep/libhtp.tar.gz
       - run: ./autogen.sh
       - run: |
           if ./configure; then
@@ -1113,7 +1035,6 @@ jobs:
         with:
           name: prep
           path: prep
-      - run: tar xf prep/libhtp.tar.gz
       - run: ./autogen.sh
       - run: CFLAGS="${DEFAULT_CFLAGS}" ./configure
       - run: make -j ${{ env.CPUS }}
@@ -1181,7 +1102,6 @@ jobs:
         with:
           name: prep
           path: prep
-      - run: tar xf prep/libhtp.tar.gz
       - run: tar xf prep/suricata-update.tar.gz
       - run: tar xf prep/suricata-verify.tar.gz
       - run: ./autogen.sh
@@ -1258,7 +1178,6 @@ jobs:
         with:
           name: prep
           path: prep
-      - run: tar xf prep/libhtp.tar.gz
       - uses: ./.github/actions/install-cbindgen
       - run: ./autogen.sh
       - run: ./configure --enable-warnings --disable-shared --enable-unittests
@@ -1314,14 +1233,6 @@ jobs:
           CARGO_INCREMENTAL: 0
       - run: llvm-profdata-14 merge -o ct.profdata /tmp/ct.profraw
       - run: llvm-cov-14 show $(find rust/target/debug/deps/ -type f -regex 'rust/target/debug/deps/suricata\-[a-z0-9]+$') -instr-profile=ct.profdata --show-instantiations --ignore-filename-regex="^/root/.*" >> coverage.txt
-      - run: |
-          cd libhtp
-          make test
-          cd ..
-        env:
-          LLVM_PROFILE_FILE: "/tmp/htp-test.profraw"
-      - run: llvm-profdata-14 merge -o htp-test.profdata /tmp/htp-test.profraw
-      - run: llvm-cov-14 show libhtp/test/test_all -instr-profile=htp-test.profdata --show-instantiations --ignore-filename-regex="^/root/.*" >> coverage.txt
       - name: Upload coverage to Codecov
         uses: codecov/codecov-action@0565863a31f2c772f9f0395002a31e3f06189574
         with:
@@ -1399,7 +1310,6 @@ jobs:
         with:
           name: prep
           path: prep
-      - run: tar xf prep/libhtp.tar.gz
       - run: tar xf prep/suricata-verify.tar.gz
       - uses: ./.github/actions/install-cbindgen
       - name: Fix kernel mmap rnd bits
@@ -1505,7 +1415,6 @@ jobs:
         with:
           name: prep
           path: prep
-      - run: tar xf prep/libhtp.tar.gz
       - uses: ./.github/actions/install-cbindgen
       - name: Fix kernel mmap rnd bits
       # Asan in llvm 14 provided in ubuntu 22.04 is incompatible with
@@ -1640,7 +1549,6 @@ jobs:
         with:
           name: prep
           path: prep
-      - run: tar xf prep/libhtp.tar.gz
       - name: Extracting suricata-verify
         run: tar xf prep/suricata-verify.tar.gz
       - name: Fix kernel mmap rnd bits
@@ -1728,7 +1636,6 @@ jobs:
         with:
           name: prep
           path: prep
-      - run: tar xf prep/libhtp.tar.gz
       - name: Fix kernel mmap rnd bits
         run: sudo sysctl vm.mmap_rnd_bits=28
       - run: ./autogen.sh
@@ -1831,7 +1738,6 @@ jobs:
         with:
           name: prep
           path: prep
-      - run: tar xf prep/libhtp.tar.gz
       - uses: ./.github/actions/install-cbindgen
       - name: Fix kernel mmap rnd bits
       # Asan in llvm 14 provided in ubuntu 22.04 is incompatible with
@@ -1922,7 +1828,6 @@ jobs:
         with:
           name: prep
           path: prep
-      - run: tar xf prep/libhtp.tar.gz
       - run: tar xf prep/suricata-update.tar.gz
       - uses: ./.github/actions/install-cbindgen
       - run: ./autogen.sh
@@ -2072,7 +1977,6 @@ jobs:
         with:
           name: prep
           path: prep
-      - run: tar xf prep/libhtp.tar.gz
       - uses: ./.github/actions/install-cbindgen
       - name: Fix kernel mmap rnd bits
       # Asan in llvm 14 provided in ubuntu 22.04 is incompatible with
@@ -2150,7 +2054,6 @@ jobs:
         with:
           name: prep
           path: prep
-      - run: tar xf prep/libhtp.tar.gz
       - uses: ./.github/actions/install-cbindgen
       - run: ./autogen.sh
       - run: AFL_HARDEN=1 ac_cv_func_realloc_0_nonnull=yes ac_cv_func_malloc_0_nonnull=yes CFLAGS="-fsanitize=address -fno-omit-frame-pointer" CXXFLAGS=$CFLAGS CC=afl-clang-fast CXX=afl-clang-fast++ LDFLAGS="-fsanitize=address" ./configure --enable-warnings --enable-fuzztargets --disable-shared
@@ -2246,7 +2149,6 @@ jobs:
         with:
           name: prep
           path: prep
-      - run: tar xf prep/libhtp.tar.gz
       - uses: ./.github/actions/install-cbindgen
       - run: ./autogen.sh
       - run: CFLAGS="${DEFAULT_CFLAGS}" ./configure --enable-warnings --enable-netmap
@@ -2281,7 +2183,6 @@ jobs:
         with:
           name: prep
           path: prep
-      - run: tar xf prep/libhtp.tar.gz
       - run: tar xf prep/suricata-update.tar.gz
       - run: tar xf prep/suricata-verify.tar.gz
       - run: ./autogen.sh
@@ -2382,7 +2283,6 @@ jobs:
         with:
           name: prep
           path: prep
-      - run: tar xf prep/libhtp.tar.gz
       - uses: ./.github/actions/install-cbindgen
       - run: ./autogen.sh
       - run: CFLAGS="${DEFAULT_CFLAGS}" ./configure --enable-warnings --enable-dpdk
@@ -2463,7 +2363,6 @@ jobs:
         run: curl https://sh.rustup.rs -sSf | sh -s -- --default-toolchain $(grep rust-version rust/Cargo.toml.in|sed 's/\"//g'|awk '{print $3}') -y
       - run: echo "$HOME/.cargo/bin" >> $GITHUB_PATH
       - uses: ./.github/actions/install-cbindgen
-      - run: tar xf prep/libhtp.tar.gz
       - run: tar xf prep/suricata-update.tar.gz
       - run: tar xf prep/suricata-verify.tar.gz
       - run: ./autogen.sh
@@ -2548,7 +2447,6 @@ jobs:
         with:
           name: prep
           path: prep
-      - run: tar xf prep/libhtp.tar.gz
       - run: tar xf prep/suricata-update.tar.gz
       - uses: ./.github/actions/install-cbindgen
       - run: ./autogen.sh
@@ -2634,7 +2532,6 @@ jobs:
         run: curl https://sh.rustup.rs -sSf | sh -s -- --default-toolchain $(grep rust-version rust/Cargo.toml.in|sed 's/\"//g'|awk '{print $3}') -y
       - run: echo "$HOME/.cargo/bin" >> $GITHUB_PATH
       - uses: ./.github/actions/install-cbindgen
-      - run: tar xf prep/libhtp.tar.gz
       - run: tar xf prep/suricata-update.tar.gz
       - run: tar xf prep/suricata-verify.tar.gz
       - run: ./autogen.sh
@@ -2710,7 +2607,6 @@ jobs:
         with:
           name: prep
           path: prep
-      - run: tar xf prep/libhtp.tar.gz
       - run: tar xf prep/suricata-update.tar.gz
       - uses: ./.github/actions/install-cbindgen
       - run: ./autogen.sh
@@ -2781,7 +2677,6 @@ jobs:
         with:
           name: prep
           path: prep
-      - run: tar xf prep/libhtp.tar.gz
       - run: tar xf prep/suricata-update.tar.gz
       - uses: ./.github/actions/install-cbindgen
       - run: ./autogen.sh
@@ -2832,7 +2727,6 @@ jobs:
         with:
           name: prep
           path: prep
-      - run: tar xvf prep/libhtp.tar.gz
       - run: tar xvf prep/suricata-update.tar.gz
       - name: Create Python virtual environment
         run: python3 -m venv ./testenv
@@ -2843,14 +2737,13 @@ jobs:
       - run: ./autogen.sh
       - run: CPATH="$HOMEBREW_PREFIX/include:$CPATH" LIBRARY_PATH="$HOMEBREW_PREFIX/lib:$LIBRARY_PATH" PATH="/opt/homebrew/opt/libtool/libexec/gnubin:$PATH" CFLAGS="${DEFAULT_CFLAGS}" ./configure  --enable-warnings --enable-unittests --prefix="$HOME/.local/"
       - run: CPATH="$HOMEBREW_PREFIX/include:$CPATH" LIBRARY_PATH="$HOMEBREW_PREFIX/lib:$LIBRARY_PATH" PATH="/opt/homebrew/opt/libtool/libexec/gnubin:$PATH" CFLAGS="${DEFAULT_CFLAGS}" make -j2
-      # somehow it gets included by some C++ stdlib header (case unsensitive)
-      - run: rm libhtp/VERSION && make check
+      - run: CPATH="$HOMEBREW_PREFIX/include:$CPATH" LIBRARY_PATH="$HOMEBREW_PREFIX/lib:$LIBRARY_PATH" PATH="/opt/homebrew/opt/libtool/libexec/gnubin:$PATH" CFLAGS="${DEFAULT_CFLAGS}" make check
       - run: tar xf prep/suricata-verify.tar.gz
       - name: Running suricata-verify
         run: |
           . ./testenv/bin/activate
           python3 ./suricata-verify/run.py -q --debug-failed
-      - run: make install
+      - run: CPATH="$HOMEBREW_PREFIX/include:$CPATH" LIBRARY_PATH="$HOMEBREW_PREFIX/lib:$LIBRARY_PATH" PATH="/opt/homebrew/opt/libtool/libexec/gnubin:$PATH" CFLAGS="${DEFAULT_CFLAGS}" make install
       - name: Check Suricata-Update
         run: |
           . ./testenv/bin/activate
@@ -2886,7 +2779,6 @@ jobs:
         with:
           name: prep
           path: prep
-      - run: tar xf prep/libhtp.tar.gz
       - run: tar xf prep/suricata-update.tar.gz
       - name: Npcap DLL
         run: |
@@ -2989,7 +2881,6 @@ jobs:
         with:
           name: prep
           path: prep
-      - run: tar xf prep/libhtp.tar.gz
       - name: WinDivert
         run: |
           curl -sL -O https://github.com/basil00/Divert/releases/download/v1.4.3/WinDivert-1.4.3-A.zip
index 2ffccea46b0d550950a9886fb2012e6bf7bcb4d1..f92505a06f8bd6d4845cb21df87b68a66eb257d6 100644 (file)
@@ -54,7 +54,6 @@ jobs:
        sudo apt-get install libjansson-dev
        sudo apt-get install libpcap-dev
        sudo apt-get install libnuma-dev
-       git clone --depth 1 https://github.com/OISF/libhtp.git
        cargo install cbindgen
        export PATH=/opt/work/.cargo/bin:$PATH
         chmod +x autogen.sh
index 51fde97b6c300e3c26bd18531c9b3240c4f6e5af..56d82152320f57befd3e075a92dae2d46a6fd58d 100644 (file)
@@ -80,7 +80,6 @@ jobs:
       # The action above is supposed to do this for us, but it doesn't appear to stick.
       - run: /usr/bin/git config --global --add safe.directory /__w/suricata/suricata
       - run: git fetch
-      - run: git clone https://github.com/OISF/libhtp -b 0.5.x
       - name: Building all commits
         run: |
           echo "Building commits from ${GITHUB_BASE_REF}."
index 7f16b5044d7ae1189865ae1aaa1b27661dfb0aa2..53a74718597801fea5548e022a2ca8a770b590de 100644 (file)
@@ -8,8 +8,6 @@ on:
   pull_request:
   workflow_dispatch:
     inputs:
-      LIBHTP_REPO:
-      LIBHTP_BRANCH:
       SU_REPO:
       SU_BRANCH:
       SV_REPO:
@@ -130,7 +128,6 @@ jobs:
           cp prep/cbindgen $HOME/.cargo/bin
           chmod 755 $HOME/.cargo/bin/cbindgen
           echo "$HOME/.cargo/bin" >> $GITHUB_PATH
-      - run: tar xf prep/libhtp.tar.gz
       - run: tar xf prep/suricata-update.tar.gz
       - run: ./autogen.sh
       - run: CFLAGS="${DEFAULT_CFLAGS}" ./configure
index 508317adf222982c5af4a58ef58d98d6af9ef364..4be9d3c684fe4d0e8720d4ccd07c2eeba6f09a31 100644 (file)
@@ -128,7 +128,6 @@ jobs:
               echo "No github merge commit found"
           fi
         shell: bash {0}
-      - run: git clone https://github.com/OISF/libhtp -b 0.5.x
       - run: ./autogen.sh
       - run: ./configure --enable-warnings --enable-unittests
       - name: Check formatting
index 32f1e43288060d1f9e9a4de1dd0e78a94e70b1d7..a9692db309a2feafc200cffe4561406c1fece6fb 100644 (file)
@@ -29,9 +29,6 @@ jobs:
               echo "Parsing branch and PR info from:"
               echo "${body}"
 
-              LIBHTP_REPO=$(echo "${body}" | awk -F = '/^LIBHTP_REPO=/ { print $2 }')
-              LIBHTP_BRANCH=$(echo "${body}" | awk -F = '/^LIBHTP_BRANCH=/ { print $2 }')
-
               SU_REPO=$(echo "${body}" | awk -F = '/^SU_REPO=/ { print $2 }')
               SU_BRANCH=$(echo "${body}" | awk -F = '/^SU_BRANCH=/ { print $2 }')
 
@@ -39,8 +36,6 @@ jobs:
               SV_BRANCH=$(echo "${body}" | awk -F = '/^SV_BRANCH=/ { print $2 }')
           else
               echo "No pull request body, will use inputs or defaults."
-              LIBHTP_REPO=${{ inputs.LIBHTP_REPO }}
-              LIBHTP_BRANCH=${{ inputs.LIBHTP_BRANCH }}
               SU_REPO=${{ inputs.SU_REPO }}
               SU_BRANCH=${{ inputs.SU_BRANCH }}
               SV_REPO=${{ inputs.SV_REPO }}
@@ -48,9 +43,6 @@ jobs:
           fi
 
           # If the _REPO variables don't contain a full URL, add GitHub.
-          if [ "${LIBHTP_REPO}" ] && ! echo "${LIBHTP_REPO}" | grep -q '^https://'; then
-              LIBHTP_REPO="https://github.com/${LIBHTP_REPO}"
-          fi
           if [ "${SU_REPO}" ] && ! echo "${SU_REPO}" | grep -q '^https://'; then
               SU_REPO="https://github.com/${SU_REPO}"
           fi
@@ -58,9 +50,6 @@ jobs:
               SV_REPO="https://github.com/${SV_REPO}"
           fi
 
-          echo LIBHTP_REPO=${LIBHTP_REPO} | tee -a ${GITHUB_ENV}
-          echo LIBHTP_BRANCH=${LIBHTP_BRANCH} | tee -a ${GITHUB_ENV}
-
           echo SU_REPO=${SU_REPO} | tee -a ${GITHUB_ENV}
           echo SU_BRANCH=${SU_BRANCH} | tee -a ${GITHUB_ENV}
 
@@ -69,8 +58,6 @@ jobs:
 
       - name: Annotate output
         run: |
-          echo "::notice:: LIBHTP_REPO=${LIBHTP_REPO}"
-          echo "::notice:: LIBHTP_BRANCH=${LIBHTP_BRANCH}"
           echo "::notice:: SU_REPO=${SU_REPO}"
           echo "::notice:: SU_BRANCH=${SU_BRANCH}"
           echo "::notice:: SV_REPO=${SV_REPO}"
@@ -81,10 +68,6 @@ jobs:
         uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
       - run: git config --global --add safe.directory /__w/suricata/suricata
 
-      - name: Fetching libhtp
-        run: |
-          DESTDIR=./bundle ./scripts/bundle.sh libhtp
-          tar zcf libhtp.tar.gz -C bundle libhtp
       - name: Fetching suricata-update
         run: |
           DESTDIR=./bundle ./scripts/bundle.sh suricata-update
@@ -116,6 +99,5 @@ jobs:
         with:
           name: prep
           path: |
-            libhtp.tar.gz
             suricata-update.tar.gz
             suricata-verify.tar.gz
index 1ee648e7bca74b8696ae8a95160dc85ffdef93fe..0c17346f380cfe7aa08ed2c4f308dcf337e1a57e 100644 (file)
@@ -76,7 +76,6 @@ jobs:
       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
       - name: Configure Suricata
         run: |
-          ./scripts/bundle.sh libhtp
           ./autogen.sh
           ./configure --enable-warnings
       - name: Run Cargo Audit
@@ -151,7 +150,6 @@ jobs:
           echo "$HOME/.cargo/bin" >> $GITHUB_PATH
       - name: Configure Suricata
         run: |
-          ./scripts/bundle.sh libhtp
           ./autogen.sh
           ./configure --enable-warnings
       - name: Cargo Update and Build
index 2a674407dcdcaa8c71ec7c98f52a1e20036d1b46..f0a0d098b5e6cc5dff7cff520924b86bf253317a 100644 (file)
@@ -107,3 +107,6 @@ jobs:
               echo "::error ::Cargo.lock.in needs to be updated"
               exit 1
           fi
+      # does not work in other subdirectories for now
+      - run: cargo fmt
+        working-directory: rust/htp
index 5c0f7989a7b9644c1b90a3b24b6ac1a4de6d105b..e3046fffa62b2885067e84a972fb0f8964485a70 100644 (file)
@@ -74,11 +74,10 @@ jobs:
       - run: scan-build-18 ./configure --enable-warnings --enable-dpdk --enable-nfqueue --enable-nflog
         env:
           CC: clang-18
-      # exclude libhtp from the analysis
       # disable security.insecureAPI.DeprecatedOrUnsafeBufferHandling explicitly as
       # this will require significant effort to address.
       - run: |
-          scan-build-18 --status-bugs --exclude libhtp/ --exclude rust \
+          scan-build-18 --status-bugs --exclude rust \
                 -enable-checker valist.Uninitialized \
                 -enable-checker valist.CopyToSelf \
                 -enable-checker valist.Unterminated \
index ed57b1b3ec3b1cf2b46482b28a21c7d79db23f4f..f6d1086b2dd208e2e50606f90f3772c7a71455cd 100644 (file)
@@ -32,19 +32,6 @@ doc/userguide/suricata.1
 etc/suricata.logrotate
 etc/suricata.service
 install-sh
-libhtp/TAGS
-libhtp/aclocal.m4
-libhtp/autom4te.cache/
-libhtp/config.h
-libhtp/config.log
-libhtp/config.status
-libhtp/configure
-libhtp/htp.pc
-libhtp/htp/TAGS
-libhtp/htp/libhtp.la
-libhtp/libtool
-libhtp/stamp-h1
-libhtp/test/TAGS
 libtool
 ltmain.sh
 missing
index 348ef59ba01dec29cc463ab1f0f594b7f0ac5674..ac3addea017425c28e57f4a55e7be9e9596e0607 100644 (file)
@@ -12,7 +12,7 @@ EXTRA_DIST = ChangeLog COPYING LICENSE suricata.yaml.in \
             scripts/docs-ubuntu-debian-minimal-build.sh \
        scripts/evedoc.py \
             examples/plugins
-SUBDIRS = $(HTP_DIR) rust src plugins qa rules doc etc python ebpf \
+SUBDIRS = rust src plugins qa rules doc etc python ebpf \
           $(SURICATA_UPDATE_DIR)
 DIST_SUBDIRS = $(SUBDIRS) examples/lib/simple examples/lib/custom
 
@@ -54,7 +54,7 @@ endif
        @echo "You can now start suricata by running as root something like:"
        @echo "  $(DESTDIR)$(bindir)/suricata -c $(DESTDIR)$(e_sysconfdir)suricata.yaml -i eth0"
        @echo ""
-       @echo "If a library like libhtp.so is not found, you can run suricata with:"
+       @echo "If a shared library is not found, you can add library paths with:"
        @echo "  LD_LIBRARY_PATH="$(DESTDIR)$(prefix)/lib" "$(DESTDIR)$(bindir)/suricata" -c "$(DESTDIR)$(e_sysconfdir)suricata.yaml" -i eth0"
        @echo ""
        @echo "The Emerging Threats Open rules are now installed. Rules can be"
index 2a6c92007f7cd1f544b7d65030e77117e4d8ccb4..23b6f97c57f43afc1f10fdd55a03fd8c39257ad4 100644 (file)
         [test "x$install_suricata_update" = "xyes"])
     AC_SUBST([install_suricata_update_reason])
 
-  # libhtp
-    AC_ARG_ENABLE(non-bundled-htp,
-           AS_HELP_STRING([--enable-non-bundled-htp], [Enable the use of an already installed version of htp]),[enable_non_bundled_htp=$enableval],[enable_non_bundled_htp=no])
-    AS_IF([test "x$enable_non_bundled_htp" = "xyes"], [
-        PKG_CHECK_MODULES([libhtp], htp,, [with_pkgconfig_htp=no])
-        if test "$with_pkgconfig_htp" != "no"; then
-            CPPFLAGS="${CPPFLAGS} ${libhtp_CFLAGS}"
-            LIBS="${LIBS} ${libhtp_LIBS}"
-        fi
-
-        AC_ARG_WITH(libhtp_includes,
-                [  --with-libhtp-includes=DIR  libhtp include directory],
-                [with_libhtp_includes="$withval"],[with_libhtp_includes=no])
-        AC_ARG_WITH(libhtp_libraries,
-                [  --with-libhtp-libraries=DIR    libhtp library directory],
-                [with_libhtp_libraries="$withval"],[with_libhtp_libraries="no"])
-
-        if test "$with_libhtp_includes" != "no"; then
-            CPPFLAGS="-I${with_libhtp_includes} ${CPPFLAGS}"
-        fi
-
-        if test "$with_libhtp_libraries" != "no"; then
-            LDFLAGS="${LDFLAGS} -L${with_libhtp_libraries}"
-        fi
-
-        AC_CHECK_HEADER(htp/htp.h,,[AC_MSG_ERROR(htp/htp.h not found ...)])
-
-        LIBHTP=""
-        AC_CHECK_LIB(htp, htp_conn_create,, LIBHTP="no")
-        if test "$LIBHTP" = "no"; then
-            echo
-            echo "   ERROR! libhtp library not found"
-            echo
-            exit 1
-        fi
-        PKG_CHECK_MODULES(LIBHTPMINVERSION, [htp >= 0.5.45],[libhtp_minver_found="yes"],[libhtp_minver_found="no"])
-        if test "$libhtp_minver_found" = "no"; then
-            PKG_CHECK_MODULES(LIBHTPDEVVERSION, [htp = 0.5.X],[libhtp_devver_found="yes"],[libhtp_devver_found="no"])
-            if test "$libhtp_devver_found" = "no"; then
-                echo
-                echo "   ERROR! libhtp was found but it is neither >= 0.5.45, nor the dev 0.5.X"
-                echo
-                exit 1
-            fi
-        fi
-
-        AC_CHECK_LIB([htp], [htp_config_register_request_uri_normalize],AC_DEFINE_UNQUOTED([HAVE_HTP_URI_NORMALIZE_HOOK],[1],[Found htp_config_register_request_uri_normalize function in libhtp]) ,,[-lhtp])
-        # check for htp_tx_get_response_headers_raw
-        AC_CHECK_LIB([htp], [htp_tx_get_response_headers_raw],AC_DEFINE_UNQUOTED([HAVE_HTP_TX_GET_RESPONSE_HEADERS_RAW],[1],[Found htp_tx_get_response_headers_raw in libhtp]) ,,[-lhtp])
-        AC_CHECK_LIB([htp], [htp_decode_query_inplace],AC_DEFINE_UNQUOTED([HAVE_HTP_DECODE_QUERY_INPLACE],[1],[Found htp_decode_query_inplace function in libhtp]) ,,[-lhtp])
-        AC_CHECK_LIB([htp], [htp_config_set_response_decompression_layer_limit],AC_DEFINE_UNQUOTED([HAVE_HTP_CONFIG_SET_RESPONSE_DECOMPRESSION_LAYER_LIMIT],[1],[Found htp_config_set_response_decompression_layer_limit function in libhtp]) ,,[-lhtp])
-        AC_CHECK_LIB([htp], [htp_config_set_allow_space_uri],AC_DEFINE_UNQUOTED([HAVE_HTP_CONFIG_SET_ALLOW_SPACE_URI],[1],[Found htp_config_set_allow_space_uri function in libhtp]) ,,[-lhtp])
-        AC_EGREP_HEADER(htp_config_set_path_decode_u_encoding, htp/htp.h, AC_DEFINE_UNQUOTED([HAVE_HTP_SET_PATH_DECODE_U_ENCODING],[1],[Found usable htp_config_set_path_decode_u_encoding function in libhtp]) )
-        AC_CHECK_LIB([htp], [htp_config_set_lzma_memlimit],AC_DEFINE_UNQUOTED([HAVE_HTP_CONFIG_SET_LZMA_MEMLIMIT],[1],[Found htp_config_set_lzma_memlimit function in libhtp]) ,,[-lhtp])
-        AC_CHECK_LIB([htp], [htp_config_set_lzma_layers],AC_DEFINE_UNQUOTED([HAVE_HTP_CONFIG_SET_LZMA_LAYERS],[1],[Found htp_config_set_lzma_layers function in libhtp]) ,,[-lhtp])
-        AC_CHECK_LIB([htp], [htp_config_set_compression_bomb_limit],AC_DEFINE_UNQUOTED([HAVE_HTP_CONFIG_SET_COMPRESSION_BOMB_LIMIT],[1],[Found htp_config_set_compression_bomb_limit function in libhtp]) ,,[-lhtp])
-        AC_CHECK_LIB([htp], [htp_config_set_compression_time_limit],AC_DEFINE_UNQUOTED([HAVE_HTP_CONFIG_SET_COMPRESSION_TIME_LIMIT],[1],[Found htp_config_set_compression_time_limit function in libhtp]) ,,[-lhtp])
-        AC_CHECK_LIB([htp], [htp_config_set_max_tx],AC_DEFINE_UNQUOTED([HAVE_HTP_CONFIG_SET_MAX_TX],[1],[Found htp_config_set_max_tx function in libhtp]) ,,[-lhtp])
-        AC_CHECK_LIB([htp], [htp_config_set_number_headers_limit],AC_DEFINE_UNQUOTED([HAVE_HTP_CONFIG_SET_HEADERS_LIMIT],[1],[Found htp_config_set_number_headers_limit function in libhtp]) ,,[-lhtp])
-    ])
-
-    if test "x$enable_non_bundled_htp" = "xno"; then
-        # test if we have a bundled htp
-        if test -d "$srcdir/libhtp"; then
-            AC_CONFIG_SUBDIRS([libhtp])
-            HTP_DIR="libhtp"
-            AC_SUBST(HTP_DIR)
-            HTP_LDADD="../libhtp/htp/libhtp.la"
-            AC_SUBST(HTP_LDADD)
-            # make sure libhtp is added to the includes
-            CPPFLAGS="-I\${srcdir}/../libhtp/ ${CPPFLAGS}"
-
-            AC_CHECK_HEADER(iconv.h,,[AC_MSG_ERROR(iconv.h not found ...)])
-            AC_CHECK_LIB(iconv, libiconv_close)
-            AC_DEFINE_UNQUOTED([HAVE_HTP_URI_NORMALIZE_HOOK],[1],[Assuming htp_config_register_request_uri_normalize function in bundled libhtp])
-            AC_DEFINE_UNQUOTED([HAVE_HTP_TX_GET_RESPONSE_HEADERS_RAW],[1],[Assuming htp_tx_get_response_headers_raw function in bundled libhtp])
-            AC_DEFINE_UNQUOTED([HAVE_HTP_DECODE_QUERY_INPLACE],[1],[Assuming htp_decode_query_inplace function in bundled libhtp])
-            # enable when libhtp has been updated
-            AC_DEFINE_UNQUOTED([HAVE_HTP_CONFIG_SET_RESPONSE_DECOMPRESSION_LAYER_LIMIT],[1],[Assuming htp_config_set_response_decompression_layer_limit function in bundled libhtp])
-            AC_DEFINE_UNQUOTED([HAVE_HTP_CONFIG_SET_ALLOW_SPACE_URI],[1],[Assuming htp_config_set_allow_space_uri function in bundled libhtp])
-            AC_DEFINE_UNQUOTED([HAVE_HTP_CONFIG_SET_LZMA_MEMLIMIT],[1],[Assuming htp_config_set_lzma_memlimit function in bundled libhtp])
-            AC_DEFINE_UNQUOTED([HAVE_HTP_CONFIG_SET_LZMA_LAYERS],[1],[Assuming htp_config_set_lzma_layers function in bundled libhtp])
-            AC_DEFINE_UNQUOTED([HAVE_HTP_CONFIG_SET_COMPRESSION_BOMB_LIMIT],[1],[Assuming htp_config_set_compression_bomb_limit function in bundled libhtp])
-            AC_DEFINE_UNQUOTED([HAVE_HTP_CONFIG_SET_COMPRESSION_TIME_LIMIT],[1],[Assuming htp_config_set_compression_time_limit function in bundled libhtp])
-            AC_DEFINE_UNQUOTED([HAVE_HTP_CONFIG_SET_MAX_TX],[1],[Assuming htp_config_set_max_tx function in bundled libhtp])
-            AC_DEFINE_UNQUOTED([HAVE_HTP_CONFIG_SET_HEADERS_LIMIT],[1],[Assuming htp_config_set_number_headers_limit function in bundled libhtp])
-        else
-            echo
-            echo "  ERROR: Libhtp is not bundled. Get libhtp by doing:"
-            echo "     git clone https://github.com/OISF/libhtp"
-            echo "  Then re-run Suricata's autogen.sh and configure script."
-            echo "  Or, if libhtp is installed in a different location,"
-            echo "  pass --enable-non-bundled-htp to Suricata's configure script."
-            echo "  Add --with-libhtp-includes=<dir> and --with-libhtp-libraries=<dir> if"
-            echo "  libhtp is not installed in the include and library paths."
-            echo
-            exit 1
-        fi
-    fi
-
-    AM_CONDITIONAL([HTP_LDADD], [test "x${HTP_LDADD}" != "x"])
-
   # Check for libcap-ng
     case $host in
     *-*-linux*)
@@ -2582,7 +2480,6 @@ AC_SUBST(MAJOR_MINOR)
 AC_SUBST(RUST_FEATURES)
 AC_SUBST(RUST_SURICATA_LIBDIR)
 AC_SUBST(RUST_SURICATA_LIBNAME)
-AC_SUBST(enable_non_bundled_htp)
 
 AM_CONDITIONAL([BUILD_SHARED_LIBRARY], [test "x$enable_shared" = "xyes"] && [test "x$can_build_shared_library" = "xyes"])
 
@@ -2634,7 +2531,6 @@ SURICATA_BUILD_CONF="Suricata Configuration:
   GeoIP2 support:                          ${enable_geoip}
   JA3 support:                             ${enable_ja3}
   JA4 support:                             ${enable_ja4}
-  Non-bundled htp:                         ${enable_non_bundled_htp}
   Hyperscan support:                       ${enable_hyperscan}
   Libnet support:                          ${enable_libnet}
   liblz4 support:                          ${enable_liblz4}
index 9d7a45a5439259f5f10ecae7842d5eebc059ad28..373a6e4fe968c8c6b05644c26bc5f58d53f81d89 100644 (file)
@@ -72,7 +72,7 @@ Next, enter the following line in the terminal:
   git clone https://github.com/OISF/suricata.git
   cd suricata
 
-Libhtp and suricata-update are not bundled. Get them by doing:
+Suricata-update is not bundled. Get it by doing:
 
 .. code-block:: bash
 
index c712e90a99b88d49b16c98e1223f4b64f86fd2b9..41cd88c81047c2155dff5f80b6f566d8b0756ae7 100644 (file)
@@ -30,7 +30,7 @@ Use these to check that specific functions behave as expected, in success and in
 during development, for nom parsers in the Rust codebase, for instance, or for checking that messages
 or message parts of a protocol/stream are processed as they should.
 
-To execute all unit tests (both from C and Rust code), as well as ``libhtp`` ones, from the Suricata main directory, run::
+To execute all unit tests (both from C and Rust code) from the Suricata main directory, run::
 
     make check
 
index 8570a029ec7ebe47d1de2b393e590b0e113d9081..bcc0008bad923292edf61143bf34220c3719be9e 100644 (file)
@@ -103,7 +103,7 @@ Example:
 HttpGetRequestHost
 ~~~~~~~~~~~~~~~~~~
 
-Get the host from libhtp's tx->request_hostname, which can either be
+Get the host from libhtp's htp_tx_request_hostname(tx), which can either be
 the host portion of the url or the host portion of the Host header.
 
 Example:
index 205bcd3d967fa95e9ac78f0c801a956124ebebd0..33443886af216d404a601b13ae44e0150694261e 100644 (file)
@@ -171,6 +171,10 @@ Logging changes
   live transaction, to reduce the chances of logging unrelated data.** This may
   lead to what looks like a regression in behavior, but it is a considered choice.
 
+Other Changes
+~~~~~~~~~~~~~
+- libhtp has been replaced with a rust version. This means libhtp is no longer built and linked as a shared library, and the libhtp dependency is now built directly into suricata.
+
 Upgrading 6.0 to 7.0
 --------------------
 
index 22fc4543a34df65726fcc58d397558bacdd62b03..c110daa439a58c734a3c9516326316ed136a4356 100644 (file)
@@ -829,7 +829,7 @@ WARN_LOGFILE           =
 # spaces. See also FILE_PATTERNS and EXTENSION_MAPPING
 # Note: If this tag is empty the current directory is searched.
 
-INPUT                  = src/ libhtp/htp/ examples/
+INPUT                  = src/ examples/
 
 # This tag can be used to specify the character encoding of the source files
 # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
index fc9899dade2c9a325e9645c3e8fdd8c4c7496fdf..f503cafa7d88c71a8f06655e1fd8c3e0b1371d5b 100644 (file)
@@ -6,7 +6,4 @@ AM_CPPFLAGS = -I$(top_srcdir)/src
 
 custom_LDFLAGS = $(all_libraries) $(SECLDFLAGS)
 custom_LDADD = "-Wl,--start-group,$(top_builddir)/src/libsuricata_c.a,../../$(RUST_SURICATA_LIB),--end-group" $(RUST_LDADD)
-if HTP_LDADD
-custom_LDADD += ../../$(HTP_LDADD)
-endif
 custom_DEPENDENCIES = $(top_builddir)/src/libsuricata_c.a ../../$(RUST_SURICATA_LIB)
index c4004b9446c9d3d63f6c62a10ab409e3231f2c57..32821827392aeedcc3de220725ade777470f348c 100644 (file)
@@ -6,7 +6,4 @@ AM_CPPFLAGS = -I$(top_srcdir)/src
 
 simple_LDFLAGS = $(all_libraries) $(SECLDFLAGS)
 simple_LDADD = "-Wl,--start-group,$(top_builddir)/src/libsuricata_c.a,../../$(RUST_SURICATA_LIB),--end-group" $(RUST_LDADD)
-if HTP_LDADD
-simple_LDADD += ../../$(HTP_LDADD)
-endif
 simple_DEPENDENCIES = $(top_builddir)/src/libsuricata_c.a ../../$(RUST_SURICATA_LIB)
index 1fabe076526886ce3e73026c76c6ff2e01c3bfc7..94bbf9a81b98550bd613426e685da8b90d4aa2c3 100644 (file)
@@ -9,8 +9,6 @@ LIBS="@LIBS@ @RUST_LDADD@"
 shared_lib="-lsuricata"
 static_lib="-lsuricata_c -lsuricata_rust"
 
-enable_non_bundled_htp="@enable_non_bundled_htp@"
-
 lib="$shared_lib"
 
 show_libs="no"
@@ -47,12 +45,6 @@ if [ "$use_static" = "no" ]; then
     fi
 fi
 
-# If we're using a bundled htp, add it to the libs as well. It will
-# already be present if we're use a non-bundled libhtp.
-if [ "$enable_non_bundled_htp" = "no" ]; then
-    lib="${lib} -lhtp"
-fi
-
 output=""
 
 if [ "$show_cflags" = "yes" ]; then
index 6df1358f075fca14efdcc0e93970a46d800f1a46..537f896bfd7913e9633c4cc04cdf116ecee373eb 100644 (file)
@@ -1,7 +1,6 @@
-# Specify libhtp and suricata-update requirements.
+# Specify suricata-update requirements.
 #
 # Format:
 #
 #   name {repo} {branch|tag}
-libhtp https://github.com/OISF/libhtp 0.5.x
 suricata-update https://github.com/OISF/suricata-update master
index e08d84eb46cdb9d2baabac47979aaa1366041dfb..7e4d1fd258e0431760db4ebb1b8b15cc925c5303 100644 (file)
@@ -96,4 +96,5 @@ alert http any any -> any any (msg:"SURICATA HTTP request missing protocol"; flo
 alert http any any -> any any (msg:"SURICATA HTTP request too many headers"; flow:established,to_server; app-layer-event:http.request_too_many_headers; classtype:protocol-command-decode; sid:2221056; rev:1;)
 alert http any any -> any any (msg:"SURICATA HTTP response too many headers"; flow:established,to_client; app-layer-event:http.response_too_many_headers; classtype:protocol-command-decode; sid:2221057; rev:1;)
 
-# next sid 2221058
+#alert http any any -> any any (msg:"SURICATA HTTP response chunk extension"; flow:established; app-layer-event:http.response_chunk_extension; classtype:protocol-command-decode; sid:2221058; rev:1;)
+# next sid 2221059
index faa4f5bdc2303d4986be5afc89ac77384b6cf046..7e20af651934b1ecb849f54305c73febc16779bb 100644 (file)
@@ -175,6 +175,17 @@ dependencies = [
  "alloc-stdlib",
 ]
 
+[[package]]
+name = "bstr"
+version = "1.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6798148dccfbff0fae41c7574d2fa8f1ef3492fba0face179de5d8d447d67b05"
+dependencies = [
+ "memchr",
+ "regex-automata",
+ "serde",
+]
+
 [[package]]
 name = "build_const"
 version = "0.2.2"
@@ -187,6 +198,21 @@ version = "1.4.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"
 
+[[package]]
+name = "cc"
+version = "1.2.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c736e259eea577f443d5c86c304f9f4ae0295c43f3ba05c21f1d66b5f06001af"
+dependencies = [
+ "shlex",
+]
+
+[[package]]
+name = "cdylib-link-lines"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d98eabef08bbdf5afd0b9c0cabb1ac335f7c70447ef095eed85dffd9628b20bc"
+
 [[package]]
 name = "cfg-if"
 version = "1.0.0"
@@ -450,6 +476,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c936bfdafb507ebbf50b8074c54fa31c5be9a1e7e5f467dd659697041407d07c"
 dependencies = [
  "crc32fast",
+ "libz-sys",
  "miniz_oxide",
 ]
 
@@ -465,6 +492,101 @@ version = "1.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
 
+[[package]]
+name = "futures"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876"
+dependencies = [
+ "futures-channel",
+ "futures-core",
+ "futures-executor",
+ "futures-io",
+ "futures-sink",
+ "futures-task",
+ "futures-util",
+]
+
+[[package]]
+name = "futures-channel"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10"
+dependencies = [
+ "futures-core",
+ "futures-sink",
+]
+
+[[package]]
+name = "futures-core"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e"
+
+[[package]]
+name = "futures-executor"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f"
+dependencies = [
+ "futures-core",
+ "futures-task",
+ "futures-util",
+]
+
+[[package]]
+name = "futures-io"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6"
+
+[[package]]
+name = "futures-macro"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.98",
+]
+
+[[package]]
+name = "futures-sink"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7"
+
+[[package]]
+name = "futures-task"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988"
+
+[[package]]
+name = "futures-timer"
+version = "3.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24"
+
+[[package]]
+name = "futures-util"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81"
+dependencies = [
+ "futures-channel",
+ "futures-core",
+ "futures-io",
+ "futures-macro",
+ "futures-sink",
+ "futures-task",
+ "memchr",
+ "pin-project-lite",
+ "pin-utils",
+ "slab",
+]
+
 [[package]]
 name = "generic-array"
 version = "0.14.7"
@@ -537,6 +659,22 @@ dependencies = [
  "digest",
 ]
 
+[[package]]
+name = "htp"
+version = "2.0.0"
+dependencies = [
+ "base64",
+ "bstr",
+ "cdylib-link-lines",
+ "flate2",
+ "lazy_static",
+ "libc",
+ "lzma-rs",
+ "nom",
+ "rstest",
+ "time",
+]
+
 [[package]]
 name = "indexmap"
 version = "2.7.1"
@@ -607,6 +745,18 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "libz-sys"
+version = "1.1.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df9b68e50e6e0b26f672573834882eb57759f6db9b3be2ea3c35c91188bb4eaa"
+dependencies = [
+ "cc",
+ "libc",
+ "pkg-config",
+ "vcpkg",
+]
+
 [[package]]
 name = "linux-raw-sys"
 version = "0.4.15"
@@ -945,6 +1095,18 @@ version = "0.2.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b"
 
+[[package]]
+name = "pin-utils"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
+
+[[package]]
+name = "pkg-config"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2"
+
 [[package]]
 name = "polyval"
 version = "0.5.3"
@@ -1062,12 +1224,53 @@ dependencies = [
  "regex-syntax",
 ]
 
+[[package]]
+name = "regex-automata"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "59b23e92ee4318893fa3fe3e6fb365258efbfe6ac6ab30f090cdcbb7aa37efa9"
+
 [[package]]
 name = "regex-syntax"
 version = "0.6.29"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"
 
+[[package]]
+name = "rstest"
+version = "0.17.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "de1bb486a691878cd320c2f0d319ba91eeaa2e894066d8b5f8f117c000e9d962"
+dependencies = [
+ "futures",
+ "futures-timer",
+ "rstest_macros",
+ "rustc_version",
+]
+
+[[package]]
+name = "rstest_macros"
+version = "0.17.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "290ca1a1c8ca7edb7c3283bd44dc35dd54fdec6253a3912e201ba1072018fca8"
+dependencies = [
+ "cfg-if",
+ "proc-macro2",
+ "quote",
+ "rustc_version",
+ "syn 1.0.109",
+ "unicode-ident",
+]
+
+[[package]]
+name = "rustc_version"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92"
+dependencies = [
+ "semver",
+]
+
 [[package]]
 name = "rusticata-macros"
 version = "4.1.0"
@@ -1184,6 +1387,12 @@ version = "1.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
 
+[[package]]
+name = "semver"
+version = "1.0.25"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f79dfe2d285b0488816f30e700a7438c5a73d816b5b7d3ac72fbc48b0d185e03"
+
 [[package]]
 name = "serde"
 version = "1.0.218"
@@ -1248,12 +1457,27 @@ dependencies = [
  "lazy_static",
 ]
 
+[[package]]
+name = "shlex"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
+
 [[package]]
 name = "siphasher"
 version = "0.3.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d"
 
+[[package]]
+name = "slab"
+version = "0.4.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67"
+dependencies = [
+ "autocfg",
+]
+
 [[package]]
 name = "smallvec"
 version = "1.14.0"
@@ -1302,6 +1526,7 @@ dependencies = [
  "flate2",
  "hex",
  "hkdf",
+ "htp",
  "ipsec-parser",
  "kerberos-parser",
  "lazy_static",
@@ -1510,9 +1735,9 @@ dependencies = [
 
 [[package]]
 name = "time"
-version = "0.3.37"
+version = "0.3.36"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "35e7868883861bd0e56d9ac6efcaaca0d6d5d82a2a7ec8209ff492c07cf37b21"
+checksum = "5dfd88e563464686c916c7e46e623e520ddc6d79fa6641390f2e3fa86e83e885"
 dependencies = [
  "deranged",
  "itoa",
@@ -1531,9 +1756,9 @@ checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3"
 
 [[package]]
 name = "time-macros"
-version = "0.2.19"
+version = "0.2.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2834e6017e3e5e4b9834939793b282bc03b37a3336245fa820e35e233e2a85de"
+checksum = "3f252a68540fde3a3877aeea552b832b40ab9a69e318efd078774a01ddee1ccf"
 dependencies = [
  "num-conv",
  "time-core",
@@ -1677,6 +1902,12 @@ version = "0.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65"
 
+[[package]]
+name = "vcpkg"
+version = "0.2.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
+
 [[package]]
 name = "version_check"
 version = "0.9.5"
index a15da70b2c4a183af3e05aa0c871f7e2cf2d5d75..c8d52f8a0b56d912f2afe24e864b100380fd1c13 100644 (file)
@@ -13,6 +13,7 @@ members = [
     "suricatactl",
     "suricatasc",
     "sys",
+    "htp",
 ]
 
 default-members = [
@@ -84,5 +85,7 @@ suricata-sys = { path = "./sys", version = "@PACKAGE_VERSION@" }
 
 suricata-lua-sys = { version = "0.1.0-alpha.6" }
 
+htp = { path = "./htp", version = "2.0.0" }
+
 [dev-dependencies]
 test-case = "~3.3.1"
index dce105165d85f7fc359530784647c379758519e7..521bac2d8c9c7ba7ee7a2f73b2c30918ef8c792b 100644 (file)
@@ -2,17 +2,19 @@ SUBDIRS =     sys \
                suricatasc \
                suricatactl
 
-EXTRA_DIST =   src derive \
+EXTRA_DIST =   src derive htp \
                .cargo/config.toml.in \
                cbindgen.toml \
                dist/rust-bindings.h \
+               dist/htp/htp_rs.h \
                vendor \
                Cargo.toml Cargo.lock \
                derive/Cargo.toml \
                sys \
                sys/Cargo.toml \
                suricatasc \
-               suricatactl
+               suricatactl \
+               htp/Cargo.toml
 
 if !DEBUG
 RELEASE = --release
@@ -71,6 +73,7 @@ all-local: Cargo.toml
                        $(RUST_SURICATA_LIBDIR)/${RUST_SURICATA_LIBNAME}; \
        fi
        $(MAKE) gen/rust-bindings.h
+       $(MAKE) gen/htp/htp_rs.h
 
 install-exec-local:
        install -d -m 0755 "$(DESTDIR)$(bindir)"
@@ -152,6 +155,15 @@ else
 gen/rust-bindings.h:
 endif
 
+if HAVE_CBINDGEN
+gen/htp/htp_rs.h: $(RUST_SURICATA_LIB) htp/cbindgen.toml
+       cd $(abs_top_srcdir)/rust/htp && \
+               cbindgen --config $(abs_top_srcdir)/rust/htp/cbindgen.toml \
+               --quiet --verify --output $(abs_top_builddir)/rust/gen/htp/htp_rs.h || true
+else
+gen/htp/htp_rs.h:
+endif
+
 doc:
        CARGO_HOME=$(CARGO_HOME) $(CARGO) doc --all-features --no-deps
 
@@ -163,6 +175,15 @@ else
 dist/rust-bindings.h:
 endif
 
+if HAVE_CBINDGEN
+dist/htp/htp_rs.h:
+       cd $(abs_top_srcdir)/rust/htp && \
+       cbindgen --config cbindgen.toml \
+               --quiet --output $(abs_top_builddir)/rust/dist/htp/htp_rs.h
+else
+dist/htp/htp_rs.h:
+endif
+
 Cargo.toml: Cargo.toml.in
 
 update-lock: Cargo.toml
diff --git a/rust/htp/.gitignore b/rust/htp/.gitignore
new file mode 100644 (file)
index 0000000..01c3566
--- /dev/null
@@ -0,0 +1 @@
+!Cargo.toml
diff --git a/rust/htp/Cargo.toml b/rust/htp/Cargo.toml
new file mode 100644 (file)
index 0000000..6d9a75f
--- /dev/null
@@ -0,0 +1,40 @@
+[package]
+name = "htp"
+authors = ["ivanr = Ivan Ristic <ivanr@webkreator.com>", "cccs = Canadian Centre for Cyber Security"]
+version = "2.0.0"
+publish = false
+edition = "2021"
+autobins = false
+license-file = "LICENSE"
+description = "Security Aware HTP Protocol parsing library"
+readme = "README.md"
+repository = "https://github.com/CybercentreCanada/libhtp-rs-internal"
+homepage = "https://github.com/CybercentreCanada/libhtp-rs-internal"
+keywords = ["parser", "HTTP", "protocol", "network", "api"]
+categories = ["parsing", "network-programming"]
+include = [
+    "Cargo.toml",
+    "LICENSE",
+    "README.md",
+    "src/**/*.rs",
+    "cbindgen.toml",
+]
+
+[lib]
+crate-type = ["staticlib", "rlib", "cdylib"]
+
+[dependencies]
+base64 = "0.22.1"
+bstr = "1.6.0"
+libc = "0.2"
+nom = "7.1.1"
+lzma-rs = { version = "0.2.0", features = ["stream"] }
+flate2 = { version = "~1.0.35", features = ["zlib-default"], default-features = false }
+lazy_static = "1.4.0"
+time = "=0.3.36"
+
+[dev-dependencies]
+rstest = "0.17.0"
+
+[build-dependencies]
+cdylib-link-lines = "0.1.5"
diff --git a/rust/htp/LICENSE b/rust/htp/LICENSE
new file mode 100644 (file)
index 0000000..3d4227e
--- /dev/null
@@ -0,0 +1,31 @@
+Copyright (c) 2009-2010 Open Information Security Foundation
+Copyright (c) 2010-2013 Qualys, Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+- Redistributions of source code must retain the above copyright
+  notice, this list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright
+  notice, this list of conditions and the following disclaimer in the
+  documentation and/or other materials provided with the distribution.
+
+- Neither the name of the Qualys, Inc. nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
diff --git a/rust/htp/README.md b/rust/htp/README.md
new file mode 100644 (file)
index 0000000..40aac9b
--- /dev/null
@@ -0,0 +1,46 @@
+# LibHTP
+
+---
+
+Copyright 2009-2010 Open Information Security Foundation  
+Copyright 2010-2013 Qualys, Inc.
+
+---
+
+LibHTP is a security-aware parser for the HTTP protocol and the related bits
+and pieces. The goal of the project is mainly to support the Suricata use case.
+Other use cases might not be fully supported, and we encourage you to cover these.
+
+See the LICENSE file distributed with this work for information
+regarding licensing, copying and copyright ownership.
+
+
+# Usage
+Start using libHTP by including it in your project's `Cargo.toml`
+dependencies. The base library will also be required for using common
+types.
+
+**The minimum supported version of `rustc` is `1.58.1`.**
+
+## Example
+```
+[dependencies]
+htp = "2.0.0"
+```
+
+## FFI Support
+LibHTP has a foreign function interface for use in C/C++ projects.
+FFI Support can be enabled by building with the `cbindgen` feature.
+
+```
+# Install cbindgen which is required to generate headers
+cargo install --force cbindgen
+
+# Build headers and shared objects
+make
+```
+
+## LICENSE
+
+LibHTP is licensed under the BSD 3-Clause license (also known as "BSD New" and
+"BSD Simplified".) The complete text of the license is enclosed in the file LICENSE.
diff --git a/rust/htp/cbindgen.toml b/rust/htp/cbindgen.toml
new file mode 100644 (file)
index 0000000..40c42c1
--- /dev/null
@@ -0,0 +1,64 @@
+language = "C"
+
+# Header wrapping options
+#header = "LICENSE here"
+#trailer = ""
+include_guard = "_HTP_H"
+autogen_warning = "/* Warning, this file is autogenerated by cbindgen. Do NOT modify manually */"
+#include_version = true
+#sys_includes = [] # Sys headers
+includes = []
+no_includes = false
+cpp_compat = true
+#after_includes = ""
+
+# Code style
+#braces = "SameLine"
+#line_length = 100
+#tab_wideth = 2
+#documentation_style = auto
+
+# Codegen
+style = "both"
+
+after_includes = """
+#define htp_url_encoding_handling_t HtpUrlEncodingHandling
+#define htp_log_code_t HtpLogCode
+"""
+
+[export.rename]
+"HtpFlags" = "HTP_FLAGS"
+"Config" = "htp_cfg_t"
+"Connection" = "htp_conn_t"
+"ConnectionParser" = "htp_connp_t"
+"Header" = "htp_header_t"
+"Headers" = "htp_headers_t"
+"Param" = "htp_param_t"
+"Data" = "htp_tx_data_t"
+"Transaction" = "htp_tx_t"
+"Transactions" = "htp_txs_t"
+"Uri" = "htp_uri_t"
+"Bstr" = "bstr"
+"Table" = "htp_table_t"
+"Log" = "htp_log_t"
+"timeval" = "struct timeval"
+"Logs" = "htp_logs_t"
+
+[export]
+include = [
+"HtpUrlEncodingHandling",
+"HtpLogCode",
+"HtpFlags",
+]
+
+[enum]
+rename_variants = "QualifiedScreamingSnakeCase"
+prefix_with_name = false
+
+[macro_expansion]
+bitflags = true
+
+# Rust parsing options
+[parse]
+parse_deps = false
+clean = false
diff --git a/rust/htp/fuzz/Cargo.toml b/rust/htp/fuzz/Cargo.toml
new file mode 100644 (file)
index 0000000..4ef6ca6
--- /dev/null
@@ -0,0 +1,25 @@
+
+[package]
+name = "htp-fuzz"
+version = "0.0.1"
+authors = ["Automatically generated"]
+publish = false
+edition = "2018"
+
+[package.metadata]
+cargo-fuzz = true
+
+[dependencies.htp]
+path = ".."
+[dependencies.libfuzzer-sys]
+git = "https://github.com/rust-fuzz/libfuzzer-sys.git"
+
+[dependencies]
+
+# Prevent this from interfering with workspaces
+[workspace]
+members = ["."]
+
+[[bin]]
+name = "fuzz_htp_rs"
+path = "fuzz_targets/fuzz_htp.rs"
diff --git a/rust/htp/fuzz/fuzz_targets/fuzz_htp.rs b/rust/htp/fuzz/fuzz_targets/fuzz_htp.rs
new file mode 100644 (file)
index 0000000..fb8ff54
--- /dev/null
@@ -0,0 +1,14 @@
+#![allow(non_snake_case)]
+#![no_main]
+#[macro_use] extern crate libfuzzer_sys;
+
+extern crate htp;
+
+use htp::test::{Test, TestConfig};
+use std::env;
+
+
+fuzz_target!(|data: &[u8]| {
+    let mut t = Test::new(TestConfig());
+    t.run_slice(data);
+});
diff --git a/rust/htp/src/bstr.rs b/rust/htp/src/bstr.rs
new file mode 100644 (file)
index 0000000..22b4961
--- /dev/null
@@ -0,0 +1,475 @@
+use bstr::{BString, ByteSlice};
+use core::cmp::Ordering;
+use std::ops::{Deref, DerefMut};
+
+/// Bstr is a convenience wrapper around binary data that adds string-like functions.
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub struct Bstr {
+    // Wrap a BString under the hood. We want to be able to
+    // implement behaviours on top of this if needed, so we wrap
+    // it instead of exposing it directly in our public API.
+    s: BString,
+}
+
+impl Default for Bstr {
+    fn default() -> Self {
+        Self {
+            s: BString::from(Vec::new()),
+        }
+    }
+}
+
+impl Bstr {
+    /// Make a new owned Bstr
+    pub(crate) fn new() -> Self {
+        Bstr {
+            s: BString::from(Vec::new()),
+        }
+    }
+
+    /// Make a new owned Bstr with given capacity
+    pub(crate) fn with_capacity(len: usize) -> Self {
+        Bstr {
+            s: BString::from(Vec::with_capacity(len)),
+        }
+    }
+
+    /// Compare this bstr with the given slice
+    pub(crate) fn cmp_slice<B: AsRef<[u8]>>(&self, other: B) -> Ordering {
+        self.as_slice().cmp(other.as_ref())
+    }
+
+    /// Return true if self is equal to other
+    pub(crate) fn eq_slice<B: AsRef<[u8]>>(&self, other: B) -> bool {
+        self.cmp_slice(other) == Ordering::Equal
+    }
+
+    /// Compare bstr with the given slice, ingnoring ascii case.
+    pub(crate) fn cmp_nocase<B: AsRef<[u8]>>(&self, other: B) -> bool {
+        let lefts: &[u8] = self.as_ref();
+        let mut lefts = lefts.iter();
+        let mut rights = other.as_ref().iter();
+        loop {
+            match (lefts.next(), rights.next()) {
+                (None, None) => {
+                    return true;
+                }
+                (Some(l), Some(r)) => {
+                    if !l.eq_ignore_ascii_case(r) {
+                        return false;
+                    }
+                }
+                _ => {
+                    return false;
+                }
+            }
+        }
+    }
+
+    /// Case insensitive comparison between self and other, ignoring any zeros in self
+    pub(crate) fn cmp_nocase_nozero<B: AsRef<[u8]>>(&self, other: B) -> bool {
+        let lefts: &[u8] = self.as_ref();
+        let mut lefts = lefts.iter().filter(|c| (**c) > 0);
+        let mut rights = other.as_ref().iter();
+        loop {
+            match (lefts.next(), rights.next()) {
+                (None, None) => {
+                    return true;
+                }
+                (Some(l), Some(r)) => {
+                    if !l.eq_ignore_ascii_case(r) {
+                        return false;
+                    }
+                }
+                _ => {
+                    // TODO trim ?
+                    return false;
+                }
+            }
+        }
+    }
+
+    /// Extend this bstr with the given slice
+    pub(crate) fn add<B: AsRef<[u8]>>(&mut self, other: B) {
+        self.extend_from_slice(other.as_ref())
+    }
+
+    /// Extend the bstr as much as possible without growing
+    #[cfg(test)]
+    pub(crate) fn add_noex<B: AsRef<[u8]>>(&mut self, other: B) {
+        let len = std::cmp::min(self.capacity() - self.len(), other.as_ref().len());
+        self.add(&other.as_ref()[..len]);
+    }
+
+    /// Return true if this bstr starts with other
+    #[cfg(test)]
+    pub(crate) fn starts_with<B: AsRef<[u8]>>(&self, other: B) -> bool {
+        self.as_slice().starts_with(other.as_ref())
+    }
+
+    /// Return true if this bstr starts with other, ignoring ascii case
+    pub(crate) fn starts_with_nocase<B: AsRef<[u8]>>(&self, other: B) -> bool {
+        if self.len() < other.as_ref().len() {
+            return false;
+        }
+        let len: usize = std::cmp::min(self.len(), other.as_ref().len());
+        self.as_slice()[..len].eq_ignore_ascii_case(&other.as_ref()[..len])
+    }
+
+    /// Find the index of the given slice
+    #[cfg(test)]
+    pub(crate) fn index_of<B: AsRef<[u8]>>(&self, other: B) -> Option<usize> {
+        self.find(other.as_ref())
+    }
+
+    /// Find the index of the given slice ignoring ascii case
+    pub(crate) fn index_of_nocase<B: AsRef<[u8]>>(&self, other: B) -> Option<usize> {
+        let src = self.as_slice();
+        let mut haystack = LowercaseIterator::new(&src);
+        let needle = other.as_ref().to_ascii_lowercase();
+        haystack.index_of(&needle)
+    }
+
+    /// Find the index of the given slice ignoring ascii case and any zeros in self
+    pub(crate) fn index_of_nocase_nozero<B: AsRef<[u8]>>(&self, other: B) -> Option<usize> {
+        let src = self.as_slice();
+        let mut haystack = LowercaseNoZeroIterator::new(&src);
+        let needle = other.as_ref().to_ascii_lowercase();
+        haystack.index_of(&needle)
+    }
+}
+
+// Trait Implementations for Bstr
+
+/// Let callers access BString functions
+impl Deref for Bstr {
+    type Target = BString;
+
+    fn deref(&self) -> &Self::Target {
+        &self.s
+    }
+}
+
+/// Let callers access mutable BString functions
+impl DerefMut for Bstr {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        &mut self.s
+    }
+}
+
+impl From<&[u8]> for Bstr {
+    fn from(src: &[u8]) -> Self {
+        Bstr {
+            s: BString::from(src),
+        }
+    }
+}
+
+impl From<&str> for Bstr {
+    fn from(src: &str) -> Self {
+        src.as_bytes().into()
+    }
+}
+
+impl From<Vec<u8>> for Bstr {
+    fn from(src: Vec<u8>) -> Self {
+        Bstr {
+            s: BString::from(src),
+        }
+    }
+}
+
+/// Compare a Bstr to a &str byte for byte
+impl PartialEq<&str> for Bstr {
+    fn eq(&self, rhs: &&str) -> bool {
+        self.as_bytes() == rhs.as_bytes()
+    }
+}
+
+/// A trait that lets us find the byte index of slices in a generic way.
+///
+/// This layer of abstraction is motivated by the need to find needle in
+/// haystack when we want to perform case sensitive, case insensitive, and
+/// case insensitive + zero skipping. All of these algorithms are identical
+/// except we compare the needle bytes with the src bytes in different ways,
+/// and in the case of zero skipping we want to pretend that zero bytes in
+/// the haystack do not exist. So we define iterators for each of lowercase
+/// and lowercase + zero skipping, and then implement this trait for both of
+/// those, and then define the search function in terms of this trait.
+trait SubIterator: Iterator<Item = u8> {
+    /// Return a new iterator of the same type starting at the current byte index
+    fn subiter(&self) -> Self;
+    /// Return the current byte index into the iterator
+    fn index(&self) -> usize;
+    /// Find the given needle in self and return the byte index
+    fn index_of(&mut self, needle: impl AsRef<[u8]>) -> Option<usize>;
+}
+
+/// Find the byte index of the given slice in the source.
+///
+/// Someday an enterprising soul can implement this function inside SubIterator
+/// directly (where it arguably belongs), but this involves handling dyn Self,
+/// and implementing it this way lets monomorphization emit concrete
+/// implementations for each of the two types we actually have.
+fn index_of<T: SubIterator, S: AsRef<[u8]>>(haystack: &mut T, needle: &S) -> Option<usize> {
+    let first = needle.as_ref().first()?;
+    while let Some(s) = haystack.next() {
+        if s == *first {
+            let mut test = haystack.subiter();
+            let mut equal = false;
+            for cmp_byte in needle.as_ref().as_bytes() {
+                equal = Some(*cmp_byte) == test.next();
+                if !equal {
+                    break;
+                }
+            }
+            if equal {
+                return Some(haystack.index());
+            }
+        }
+    }
+    None
+}
+
+/// A convenience iterator for anything that satisfies AsRef<[u8]>
+/// that yields lowercase ascii bytes and skips null bytes
+struct LowercaseNoZeroIterator<'a, T: AsRef<[u8]>> {
+    src: &'a T,
+    idx: usize,
+    first: bool,
+}
+
+impl<'a, T: AsRef<[u8]>> LowercaseNoZeroIterator<'a, T> {
+    fn new(src: &'a T) -> Self {
+        LowercaseNoZeroIterator {
+            src,
+            idx: 0,
+            first: true,
+        }
+    }
+}
+
+impl<T: AsRef<[u8]>> Iterator for LowercaseNoZeroIterator<'_, T> {
+    type Item = u8;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        loop {
+            if self.first {
+                self.first = false;
+            } else {
+                self.idx += 1;
+            }
+            let next = self
+                .src
+                .as_ref()
+                .get(self.idx)
+                .map(|c| c.to_ascii_lowercase());
+            if next != Some(0) {
+                break next;
+            }
+        }
+    }
+}
+
+impl<T: AsRef<[u8]>> SubIterator for LowercaseNoZeroIterator<'_, T> {
+    fn subiter(&self) -> Self {
+        LowercaseNoZeroIterator {
+            src: self.src,
+            idx: self.idx,
+            first: true,
+        }
+    }
+
+    fn index(&self) -> usize {
+        self.idx
+    }
+
+    fn index_of(&mut self, needle: impl AsRef<[u8]>) -> Option<usize> {
+        index_of(self, &needle)
+    }
+}
+
+/// A convenience iterator for anything that satisfies AsRef<[u8]>
+/// that yields lowercase ascii bytes
+struct LowercaseIterator<'a, T: AsRef<[u8]>> {
+    src: &'a T,
+    idx: usize,
+    first: bool,
+}
+
+impl<'a, T: AsRef<[u8]>> LowercaseIterator<'a, T> {
+    fn new(src: &'a T) -> Self {
+        LowercaseIterator {
+            src,
+            idx: 0,
+            first: true,
+        }
+    }
+}
+
+impl<T: AsRef<[u8]>> Iterator for LowercaseIterator<'_, T> {
+    type Item = u8;
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.first {
+            self.first = false;
+        } else {
+            self.idx += 1;
+        }
+        self.src
+            .as_ref()
+            .get(self.idx)
+            .map(|c| c.to_ascii_lowercase())
+    }
+}
+
+impl<T: AsRef<[u8]>> SubIterator for LowercaseIterator<'_, T> {
+    fn subiter(&self) -> Self {
+        LowercaseIterator {
+            src: self.src,
+            idx: self.idx,
+            first: true,
+        }
+    }
+
+    fn index(&self) -> usize {
+        self.idx
+    }
+
+    fn index_of(&mut self, needle: impl AsRef<[u8]>) -> Option<usize> {
+        index_of(self, &needle)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::bstr::*;
+    use core::cmp::Ordering;
+    use rstest::rstest;
+
+    #[test]
+    fn Compare() {
+        let b = Bstr::from("ABCDefgh");
+        // direct equality
+        assert_eq!(Ordering::Equal, b.cmp_slice("ABCDefgh"));
+        // case sensitive
+        assert_ne!(Ordering::Equal, b.cmp_slice("abcdefgh"));
+        // src shorter than dst
+        assert_eq!(Ordering::Less, b.cmp_slice("ABCDefghi"));
+        // src longer than dst
+        assert_eq!(Ordering::Greater, b.cmp_slice("ABCDefg"));
+        // case less
+        assert_eq!(Ordering::Less, b.cmp_slice("abcdefgh"));
+        // case greater
+        assert_eq!(Ordering::Greater, b.cmp_slice("ABCDEFGH"));
+    }
+
+    #[test]
+    fn CompareNocase() {
+        let b = Bstr::from("ABCDefgh");
+        assert!(b.cmp_nocase("ABCDefgh"));
+        assert!(b.cmp_nocase("abcdefgh"));
+        assert!(b.cmp_nocase("ABCDEFGH"));
+        assert!(!b.cmp_nocase("ABCDefghi"));
+        assert!(!b.cmp_nocase("ABCDefg"));
+    }
+
+    #[test]
+    fn CompareNocaseNozero() {
+        // nocase_nozero only applies to the source string. The caller
+        // is not expected to pass in a search string with nulls in it.
+        let b = Bstr::from("A\x00B\x00\x00C\x00Defg\x00h");
+        assert!(b.cmp_nocase_nozero("ABCDefgh"));
+        assert!(b.cmp_nocase_nozero("abcdefgh"));
+        assert!(b.cmp_nocase_nozero("ABCDEFGH"));
+        assert!(!b.cmp_nocase_nozero("ABCDefghi"));
+        assert!(!b.cmp_nocase_nozero("ABCDefg"));
+    }
+
+    #[rstest]
+    #[case("abc", "defgh", "abcdefgh")]
+    #[case("ABC", "DEFGH", "ABCDEFGH")]
+    #[case("aBc", "Defgh", "aBcDefgh")]
+    #[case(
+        "TestLongerDataBc",
+        "Defghikjlmnopqrstuvwxyz",
+        "TestLongerDataBcDefghikjlmnopqrstuvwxyz"
+    )]
+    fn test_add(#[case] input: &str, #[case] input_add: &str, #[case] expected: &str) {
+        let mut b = Bstr::from(input);
+        b.add(input_add);
+        assert_eq!(b.cmp_slice(expected), Ordering::Equal);
+    }
+
+    #[rstest]
+    #[case(10, "abcd", "efghij", "abcdefghij")]
+    #[case(5, "ABcd", "efgh", "ABcde")]
+    #[case(4, "AbCd", "EFGH", "AbCd")]
+    #[case(20, "abcd", "efGHij", "abcdefGHij")]
+    fn test_add_no_ex(
+        #[case] capacity: usize, #[case] input: &str, #[case] input_add: &str,
+        #[case] expected: &str,
+    ) {
+        let mut b = Bstr::with_capacity(capacity);
+        b.add_noex(input);
+        b.add_noex(input_add);
+        assert_eq!(b.cmp_slice(expected), Ordering::Equal);
+    }
+
+    #[test]
+    fn StartsWith() {
+        let b = Bstr::from("ABCD");
+        assert!(b.starts_with("AB"));
+        assert!(!b.starts_with("ab"));
+        assert!(!b.starts_with("Ab"));
+        assert!(!b.starts_with("aB"));
+        assert!(!b.starts_with("CD"));
+    }
+
+    #[test]
+    fn StartsWithNocase() {
+        let b = Bstr::from("ABCD");
+        assert!(b.starts_with_nocase("AB"));
+        assert!(b.starts_with_nocase("ab"));
+        assert!(b.starts_with_nocase("Ab"));
+        assert!(b.starts_with_nocase("aB"));
+        assert!(!b.starts_with_nocase("CD"));
+    }
+
+    #[test]
+    fn IndexOf() {
+        let b = Bstr::from("ABCDefgh");
+        assert_eq!(Some(4), b.index_of("e"));
+        assert_eq!(Some(0), b.index_of("A"));
+        assert_eq!(Some(7), b.index_of("h"));
+        assert_eq!(Some(3), b.index_of("De"));
+        assert_eq!(None, b.index_of("z"));
+        assert_eq!(None, b.index_of("a"));
+        assert_eq!(None, b.index_of("hi"));
+    }
+
+    #[test]
+    fn IndexOfNocase() {
+        let b = Bstr::from("ABCDefgh");
+        assert_eq!(Some(4), b.index_of_nocase("E"));
+        assert_eq!(Some(0), b.index_of_nocase("a"));
+        assert_eq!(Some(0), b.index_of_nocase("A"));
+        assert_eq!(Some(7), b.index_of_nocase("H"));
+        assert_eq!(Some(3), b.index_of_nocase("dE"));
+        assert_eq!(None, b.index_of_nocase("z"));
+        assert_eq!(None, b.index_of_nocase("Hi"));
+    }
+
+    #[test]
+    fn IndexOfNocaseNozero() {
+        let b = Bstr::from("A\x00B\x00\x00C\x00Defg\x00h");
+        assert_eq!(Some(8), b.index_of_nocase_nozero("E"));
+        assert_eq!(Some(0), b.index_of_nocase_nozero("a"));
+        assert_eq!(Some(0), b.index_of_nocase_nozero("A"));
+        assert_eq!(Some(12), b.index_of_nocase_nozero("H"));
+        assert_eq!(Some(7), b.index_of_nocase_nozero("dE"));
+        assert_eq!(Some(2), b.index_of_nocase_nozero("bc"));
+        assert_eq!(None, b.index_of_nocase_nozero("z"));
+        assert_eq!(None, b.index_of_nocase_nozero("Hi"));
+        assert_eq!(None, b.index_of_nocase_nozero("ghi"));
+    }
+}
diff --git a/rust/htp/src/c_api/bstr.rs b/rust/htp/src/c_api/bstr.rs
new file mode 100644 (file)
index 0000000..af0dd61
--- /dev/null
@@ -0,0 +1,196 @@
+use crate::bstr::Bstr;
+use core::cmp::Ordering;
+use std::{boxed::Box, ffi::CStr};
+
+/// Allocate a zero-length bstring, reserving space for at least size bytes.
+fn bstr_alloc(len: libc::size_t) -> *mut Bstr {
+    let b = Bstr::with_capacity(len);
+    let boxed = Box::new(b);
+    Box::into_raw(boxed)
+}
+
+/// Deallocate the supplied bstring instance. Allows NULL on input.
+/// # Safety
+/// This function is unsafe because improper use may lead to memory problems. For example, a double-free may occur if the function is called twice on the same raw pointer.
+#[no_mangle]
+pub unsafe extern "C" fn bstr_free(b: *mut Bstr) {
+    if !b.is_null() {
+        drop(Box::from_raw(b));
+    }
+}
+
+/// Return the length of the string
+/// # Safety
+/// x must be properly intialized: not NULL, dangling, or misaligned
+#[no_mangle]
+pub unsafe extern "C" fn bstr_len(x: *const Bstr) -> libc::size_t {
+    (*x).len()
+}
+
+/// Return a pointer to the bstr payload
+/// # Safety
+/// x must be properly intialized: not NULL, dangling, or misaligned
+#[no_mangle]
+pub unsafe extern "C" fn bstr_ptr(x: *const Bstr) -> *mut libc::c_uchar {
+    (*x).as_ptr() as *mut u8
+}
+
+/// Return the capacity of the string
+/// # Safety
+/// x must be properly intialized: not NULL, dangling, or misaligned
+#[no_mangle]
+pub unsafe extern "C" fn bstr_size(x: *const Bstr) -> libc::size_t {
+    (*x).capacity()
+}
+
+/// Case-sensitive comparison of a bstring and a NUL-terminated string.
+/// returns -1 if b is less than c
+///          0 if b is equal to c
+///          1 if b is greater than c
+/// # Safety
+/// b and c must be properly intialized: not NULL, dangling, or misaligned.
+/// c must point to memory that contains a valid nul terminator byte at the end of the string
+#[no_mangle]
+pub unsafe extern "C" fn bstr_cmp_c(b: *const Bstr, c: *const libc::c_char) -> libc::c_int {
+    let cs = CStr::from_ptr(c);
+    match (*b).cmp_slice(cs.to_bytes()) {
+        Ordering::Less => -1,
+        Ordering::Equal => 0,
+        Ordering::Greater => 1,
+    }
+}
+
+/// Case-indensitive comparison of a bstring and a NUL-terminated string.
+/// returns -1 if b is less than c
+///          0 if b is equal to c
+///          1 if b is greater than c
+/// # Safety
+/// b and c must be properly intialized: not NULL, dangling, or misaligned.
+/// c must point to memory that contains a valid nul terminator byte at the end of the string
+#[no_mangle]
+pub unsafe extern "C" fn bstr_cmp_c_nocase(b: *const Bstr, c: *const libc::c_char) -> bool {
+    let cs = CStr::from_ptr(c);
+    (*b).cmp_nocase(cs.to_bytes())
+}
+
+/// Create a new bstring by copying the provided NUL-terminated string
+/// # Safety
+/// cstr must be properly intialized: not NULL, dangling, or misaligned.
+/// cstr must point to memory that contains a valid nul terminator byte at the end of the string
+#[no_mangle]
+pub unsafe extern "C" fn bstr_dup_c(cstr: *const libc::c_char) -> *mut Bstr {
+    let cs = CStr::from_ptr(cstr).to_bytes();
+    let new = bstr_alloc(cs.len());
+    (*new).add(cs);
+    new
+}
+
+/// Create a new NUL-terminated string out of the provided bstring. If NUL bytes
+/// are contained in the bstring, each will be replaced with "\0" (two characters).
+/// The caller is responsible to keep track of the allocated memory area and free
+/// it once it is no longer needed.
+/// returns The newly created NUL-terminated string, or NULL in case of memory
+///         allocation failure.
+/// # Safety
+/// b must be properly intialized and not dangling nor misaligned.
+#[no_mangle]
+pub unsafe extern "C" fn bstr_util_strdup_to_c(b: *const Bstr) -> *mut libc::c_char {
+    if b.is_null() {
+        return std::ptr::null_mut();
+    }
+    let src = std::slice::from_raw_parts(bstr_ptr(b), bstr_len(b));
+
+    // Since the memory returned here is just a char* and the caller will
+    // free() it we have to use malloc() here.
+    // So we allocate enough space for doubled NULL bytes plus the trailing NULL.
+    let mut null_count = 1;
+    for byte in src {
+        if *byte == 0 {
+            null_count += 1;
+        }
+    }
+    let newlen = bstr_len(b) + null_count;
+    let mem = libc::malloc(newlen) as *mut libc::c_char;
+    if mem.is_null() {
+        return std::ptr::null_mut();
+    }
+    let dst: &mut [libc::c_char] = std::slice::from_raw_parts_mut(mem, newlen);
+    let mut dst_idx = 0;
+    for byte in src {
+        if *byte == 0 {
+            dst[dst_idx] = '\\' as libc::c_char;
+            dst_idx += 1;
+            dst[dst_idx] = '0' as libc::c_char;
+        } else {
+            dst[dst_idx] = *byte as libc::c_char;
+        }
+        dst_idx += 1;
+    }
+    dst[dst_idx] = 0;
+
+    mem
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use std::ffi::CString;
+
+    macro_rules! cstr {
+        ( $x:expr ) => {{
+            CString::new($x).unwrap()
+        }};
+    }
+
+    #[test]
+    fn Bstr_Alloc() {
+        unsafe {
+            let p1 = bstr_alloc(10);
+            assert_eq!(10, bstr_size(p1));
+            assert_eq!(0, bstr_len(p1));
+            bstr_free(p1);
+        }
+    }
+
+    #[test]
+    fn Bstr_DupC() {
+        unsafe {
+            let p1 = bstr_dup_c(cstr!("arfarf").as_ptr());
+
+            assert_eq!(6, bstr_size(p1));
+            assert_eq!(6, bstr_len(p1));
+            assert_eq!(
+                0,
+                libc::memcmp(
+                    cstr!("arfarf").as_ptr() as *const core::ffi::c_void,
+                    bstr_ptr(p1) as *const core::ffi::c_void,
+                    6
+                )
+            );
+            bstr_free(p1);
+        }
+    }
+
+    #[test]
+    fn Bstr_UtilDupToC() {
+        unsafe {
+            let s = Bstr::from(b"ABCDEFGHIJKL\x00NOPQRST" as &[u8]);
+            let c = bstr_util_strdup_to_c(&s);
+            let e = CString::new("ABCDEFGHIJKL\\0NOPQRST").unwrap();
+            assert_eq!(0, libc::strcmp(e.as_ptr(), c));
+
+            libc::free(c as *mut core::ffi::c_void);
+        }
+    }
+
+    #[test]
+    fn Bstr_CmpC() {
+        unsafe {
+            let p1 = Bstr::from("arfarf");
+            assert_eq!(0, bstr_cmp_c(&p1, cstr!("arfarf").as_ptr()));
+            assert_eq!(-1, bstr_cmp_c(&p1, cstr!("arfarf2").as_ptr()));
+            assert_eq!(1, bstr_cmp_c(&p1, cstr!("arf").as_ptr()));
+            assert_eq!(-1, bstr_cmp_c(&p1, cstr!("not equal").as_ptr()));
+        }
+    }
+}
diff --git a/rust/htp/src/c_api/config.rs b/rust/htp/src/c_api/config.rs
new file mode 100644 (file)
index 0000000..0b85b7c
--- /dev/null
@@ -0,0 +1,498 @@
+#![deny(missing_docs)]
+use crate::{
+    config::{Config, HtpServerPersonality, HtpUrlEncodingHandling},
+    hook::{DataExternalCallbackFn, TxExternalCallbackFn},
+    HtpStatus,
+};
+use std::convert::TryInto;
+
+/// Creates a new configuration structure. Configuration structures created at
+/// configuration time must not be changed afterwards in order to support lock-less
+/// copying.
+#[no_mangle]
+pub extern "C" fn htp_config_create() -> *mut Config {
+    let cfg: Config = Config::default();
+    let b = Box::new(cfg);
+    Box::into_raw(b)
+}
+
+/// Destroy a configuration structure.
+/// # Safety
+/// This function is unsafe because improper use may lead to memory problems. For example, a double-free may occur if the function is called twice on the same raw pointer.
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_destroy(cfg: *mut Config) {
+    if !cfg.is_null() {
+        drop(Box::from_raw(cfg));
+    }
+}
+
+/// Registers a REQUEST_BODY_DATA callback.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_register_request_body_data(
+    cfg: *mut Config, cbk_fn: DataExternalCallbackFn,
+) {
+    if let Some(cfg) = cfg.as_mut() {
+        cfg.hook_request_body_data.register_extern(cbk_fn)
+    }
+}
+
+/// Registers a REQUEST_COMPLETE callback.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_register_request_complete(
+    cfg: *mut Config, cbk_fn: TxExternalCallbackFn,
+) {
+    if let Some(cfg) = cfg.as_mut() {
+        cfg.hook_request_complete.register_extern(cbk_fn)
+    }
+}
+
+/// Registers a REQUEST_HEADER_DATA callback.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_register_request_header_data(
+    cfg: *mut Config, cbk_fn: DataExternalCallbackFn,
+) {
+    if let Some(cfg) = cfg.as_mut() {
+        cfg.hook_request_header_data.register_extern(cbk_fn)
+    }
+}
+
+/// Registers a REQUEST_LINE callback.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_register_request_line(
+    cfg: *mut Config, cbk_fn: TxExternalCallbackFn,
+) {
+    if let Some(cfg) = cfg.as_mut() {
+        cfg.hook_request_line.register_extern(cbk_fn)
+    }
+}
+
+/// Registers a REQUEST_START callback, which is invoked every time a new
+/// request begins and before any parsing is done.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_register_request_start(
+    cfg: *mut Config, cbk_fn: TxExternalCallbackFn,
+) {
+    if let Some(cfg) = cfg.as_mut() {
+        cfg.hook_request_start.register_extern(cbk_fn)
+    }
+}
+
+/// Registers a HTP_REQUEST_TRAILER callback.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_register_request_trailer(
+    cfg: *mut Config, cbk_fn: TxExternalCallbackFn,
+) {
+    if let Some(cfg) = cfg.as_mut() {
+        cfg.hook_request_trailer.register_extern(cbk_fn)
+    }
+}
+
+/// Registers a REQUEST_TRAILER_DATA callback.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_register_request_trailer_data(
+    cfg: *mut Config, cbk_fn: DataExternalCallbackFn,
+) {
+    if let Some(cfg) = cfg.as_mut() {
+        cfg.hook_request_trailer_data.register_extern(cbk_fn)
+    }
+}
+
+/// Registers a RESPONSE_BODY_DATA callback.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_register_response_body_data(
+    cfg: *mut Config, cbk_fn: DataExternalCallbackFn,
+) {
+    if let Some(cfg) = cfg.as_mut() {
+        cfg.hook_response_body_data.register_extern(cbk_fn)
+    }
+}
+
+/// Registers a RESPONSE_COMPLETE callback.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_register_response_complete(
+    cfg: *mut Config, cbk_fn: TxExternalCallbackFn,
+) {
+    if let Some(cfg) = cfg.as_mut() {
+        cfg.hook_response_complete.register_extern(cbk_fn)
+    }
+}
+
+/// Registers a RESPONSE_HEADER_DATA callback.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_register_response_header_data(
+    cfg: *mut Config, cbk_fn: DataExternalCallbackFn,
+) {
+    if let Some(cfg) = cfg.as_mut() {
+        cfg.hook_response_header_data.register_extern(cbk_fn)
+    }
+}
+
+/// Registers a RESPONSE_START callback.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_register_response_start(
+    cfg: *mut Config, cbk_fn: TxExternalCallbackFn,
+) {
+    if let Some(cfg) = cfg.as_mut() {
+        cfg.hook_response_start.register_extern(cbk_fn)
+    }
+}
+
+/// Registers a RESPONSE_TRAILER callback.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_register_response_trailer(
+    cfg: *mut Config, cbk_fn: TxExternalCallbackFn,
+) {
+    if let Some(cfg) = cfg.as_mut() {
+        cfg.hook_response_trailer.register_extern(cbk_fn)
+    }
+}
+
+/// Registers a RESPONSE_TRAILER_DATA callback.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_register_response_trailer_data(
+    cfg: *mut Config, cbk_fn: DataExternalCallbackFn,
+) {
+    if let Some(cfg) = cfg.as_mut() {
+        cfg.hook_response_trailer_data.register_extern(cbk_fn)
+    }
+}
+
+/// Configures whether backslash characters are treated as path segment separators. They
+/// are not on Unix systems, but are on Windows systems. If this setting is enabled, a path
+/// such as "/one\two/three" will be converted to "/one/two/three".
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_set_backslash_convert_slashes(
+    cfg: *mut Config, enabled: libc::c_int,
+) {
+    if let Some(cfg) = cfg.as_mut() {
+        cfg.set_backslash_convert_slashes(enabled == 1)
+    }
+}
+
+/// Sets the replacement character that will be used to in the lossy best-fit
+/// mapping from multi-byte to single-byte streams. The question mark character
+/// is used as the default replacement byte.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_set_bestfit_replacement_byte(cfg: *mut Config, b: libc::c_int) {
+    if let Some(cfg) = cfg.as_mut() {
+        cfg.set_bestfit_replacement_byte(b as u8)
+    }
+}
+
+/// Configures the maximum compression bomb size LibHTP will decompress.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_set_compression_bomb_limit(
+    cfg: *mut Config, bomblimit: libc::size_t,
+) {
+    if let Ok(bomblimit) = bomblimit.try_into() {
+        if let Some(cfg) = cfg.as_mut() {
+            cfg.compression_options.set_bomb_limit(bomblimit)
+        }
+    }
+}
+
+/// Configures the maximum compression time LibHTP will allow.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_set_compression_time_limit(
+    cfg: *mut Config, timelimit: libc::c_uint,
+) {
+    if let Some(cfg) = cfg.as_mut() {
+        cfg.compression_options.set_time_limit(timelimit)
+    }
+}
+
+/// Configures whether input data will be converted to lowercase. Useful for handling servers with
+/// case-insensitive filesystems.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_set_convert_lowercase(cfg: *mut Config, enabled: libc::c_int) {
+    if let Some(cfg) = cfg.as_mut() {
+        cfg.set_convert_lowercase(enabled == 1)
+    }
+}
+
+/// Configures the maximum size of the buffer LibHTP will use when all data is not available
+/// in the current buffer (e.g., a very long header line that might span several packets). This
+/// limit is controlled by the field_limit parameter.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_set_field_limit(cfg: *mut Config, field_limit: libc::size_t) {
+    if let Some(cfg) = cfg.as_mut() {
+        cfg.set_field_limit(field_limit)
+    }
+}
+
+/// Configures the maximum memlimit LibHTP will pass to liblzma.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_set_lzma_memlimit(cfg: *mut Config, memlimit: libc::size_t) {
+    if let Some(cfg) = cfg.as_mut() {
+        cfg.compression_options.set_lzma_memlimit(memlimit)
+    }
+}
+
+/// Configures the maximum number of lzma layers to pass to the decompressor.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_set_lzma_layers(cfg: *mut Config, limit: libc::c_int) {
+    if let Some(cfg) = cfg.as_mut() {
+        cfg.compression_options.set_lzma_layers(if limit <= 0 {
+            None
+        } else {
+            limit.try_into().ok()
+        })
+    }
+}
+
+/// Configures the maximum number of live transactions per connection
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_set_max_tx(cfg: *mut Config, limit: u32) {
+    if let Some(cfg) = cfg.as_mut() {
+        cfg.max_tx = limit;
+    }
+}
+
+/// Configures the maximum number of headers in one transaction
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_set_number_headers_limit(cfg: *mut Config, limit: u32) {
+    if let Some(cfg) = cfg.as_mut() {
+        cfg.number_headers_limit = limit;
+    }
+}
+
+/// Configures how the server reacts to encoded NUL bytes. Some servers will stop at
+/// at NUL, while some will respond with 400 or 404. When the termination option is not
+/// used, the NUL byte will remain in the path.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_set_nul_encoded_terminates(
+    cfg: *mut Config, enabled: libc::c_int,
+) {
+    if let Some(cfg) = cfg.as_mut() {
+        cfg.set_nul_encoded_terminates(enabled == 1)
+    }
+}
+
+/// Configures the handling of raw NUL bytes. If enabled, raw NUL terminates strings.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_set_nul_raw_terminates(cfg: *mut Config, enabled: libc::c_int) {
+    if let Some(cfg) = cfg.as_mut() {
+        cfg.set_nul_raw_terminates(enabled == 1)
+    }
+}
+
+/// Enable or disable request cookie parsing. Enabled by default.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_set_parse_request_cookies(
+    _cfg: *mut Config, _parse_request_cookies: libc::c_int,
+) {
+    // do nothing, but keep API
+}
+
+/// Configures whether consecutive path segment separators will be compressed. When enabled, a path
+/// such as "/one//two" will be normalized to "/one/two". Backslash conversion and path segment separator
+/// decoding are carried out before compression. For example, the path "/one\\/two\/%5cthree/%2f//four"
+/// will be converted to "/one/two/three/four" (assuming all 3 options are enabled).
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_set_path_separators_compress(
+    cfg: *mut Config, enabled: libc::c_int,
+) {
+    if let Some(cfg) = cfg.as_mut() {
+        cfg.set_path_separators_compress(enabled == 1)
+    }
+}
+
+/// Configures whether plus characters are converted to spaces when decoding URL-encoded strings. This
+/// is appropriate to do for parameters, but not for URLs. Only applies to contexts where decoding
+/// is taking place.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_set_plusspace_decode(cfg: *mut Config, enabled: libc::c_int) {
+    if let Some(cfg) = cfg.as_mut() {
+        cfg.set_plusspace_decode(enabled == 1)
+    }
+}
+
+/// Configures whether encoded path segment separators will be decoded. Apache does not do
+/// this by default, but IIS does. If enabled, a path such as "/one%2ftwo" will be normalized
+/// to "/one/two". If the backslash_separators option is also enabled, encoded backslash
+/// characters will be converted too (and subsequently normalized to forward slashes).
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_set_path_separators_decode(
+    cfg: *mut Config, enabled: libc::c_int,
+) {
+    if let Some(cfg) = cfg.as_mut() {
+        cfg.set_path_separators_decode(enabled == 1)
+    }
+}
+
+/// Configures whether request data is decompressed
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_set_request_decompression(
+    cfg: *mut Config, enabled: libc::c_int,
+) {
+    if let Some(cfg) = cfg.as_mut() {
+        cfg.set_request_decompression(enabled == 1)
+    }
+}
+
+/// Configures many layers of compression we try to decompress.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_set_decompression_layer_limit(
+    cfg: *mut Config, limit: libc::c_int,
+) {
+    if let Some(cfg) = cfg.as_mut() {
+        cfg.set_decompression_layer_limit(if limit <= 0 {
+            None
+        } else {
+            limit.try_into().ok()
+        })
+    }
+}
+
+/// Enable or disable allowing spaces in URIs. Disabled by default.
+/// # Safety
+/// When calling this method the given cfg must be initialized or NULL.
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_set_allow_space_uri(cfg: *mut Config, allow_space: bool) {
+    if let Some(cfg) = cfg.as_mut() {
+        cfg.set_allow_space_uri(allow_space)
+    }
+}
+
+/// Configure desired server personality.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_set_server_personality(
+    cfg: *mut Config, personality: HtpServerPersonality,
+) -> HtpStatus {
+    cfg.as_mut()
+        .map(|cfg| cfg.set_server_personality(personality).into())
+        .unwrap_or(HtpStatus::ERROR)
+}
+
+/// Configures whether %u-encoded sequences are decoded. Such sequences
+/// will be treated as invalid URL encoding if decoding is not desirable.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_set_u_encoding_decode(cfg: *mut Config, enabled: libc::c_int) {
+    if let Some(cfg) = cfg.as_mut() {
+        cfg.set_u_encoding_decode(enabled == 1)
+    }
+}
+
+/// Configures how the server handles to invalid URL encoding.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_set_url_encoding_invalid_handling(
+    cfg: *mut Config, handling: HtpUrlEncodingHandling,
+) {
+    if let Some(cfg) = cfg.as_mut() {
+        cfg.set_url_encoding_invalid_handling(handling)
+    }
+}
+
+/// Controls whether the data should be treated as UTF-8 and converted to a single-byte
+/// stream using best-fit mapping.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_set_utf8_convert_bestfit(
+    cfg: *mut Config, enabled: libc::c_int,
+) {
+    if let Some(cfg) = cfg.as_mut() {
+        cfg.set_utf8_convert_bestfit(enabled == 1)
+    }
+}
+
+/// Configures whether to attempt to decode a double encoded query in the normalized uri
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_set_double_decode_normalized_query(
+    cfg: *mut Config, set: bool,
+) {
+    if let Some(cfg) = cfg.as_mut() {
+        cfg.set_double_decode_normalized_query(set)
+    }
+}
+
+/// Configures whether to attempt to decode a double encoded path in the normalized uri
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_set_double_decode_normalized_path(cfg: *mut Config, set: bool) {
+    if let Some(cfg) = cfg.as_mut() {
+        cfg.set_double_decode_normalized_path(set)
+    }
+}
+
+/// Configures whether to normalize URIs into a complete or partial form.
+/// Pass `true` to use complete normalized URI or `false` to use partials.
+/// # Safety
+/// When calling this method, you have to ensure that cfg is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_config_set_normalized_uri_include_all(cfg: *mut Config, set: bool) {
+    if let Some(cfg) = cfg.as_mut() {
+        cfg.set_normalized_uri_include_all(set)
+    }
+}
diff --git a/rust/htp/src/c_api/connection.rs b/rust/htp/src/c_api/connection.rs
new file mode 100644 (file)
index 0000000..d254410
--- /dev/null
@@ -0,0 +1,36 @@
+#![deny(missing_docs)]
+use crate::{connection::Connection, log::Log};
+
+/// Returns the request_data_counter
+/// # Safety
+/// When calling this method, you have to ensure that conn is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_conn_request_data_counter(conn: *const Connection) -> u64 {
+    conn.as_ref()
+        .map(|conn| conn.request_data_counter)
+        .unwrap_or(0)
+}
+
+/// Returns the response_data_counter
+/// # Safety
+/// When calling this method, you have to ensure that conn is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_conn_response_data_counter(conn: *const Connection) -> u64 {
+    conn.as_ref()
+        .map(|conn| conn.response_data_counter)
+        .unwrap_or(0)
+}
+
+/// Get the next logged message from the connection
+///
+/// Returns the next log or NULL on error.
+/// The caller must free this result with htp_log_free
+/// # Safety
+/// When calling this method, you have to ensure that conn is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_conn_next_log(conn: *const Connection) -> *mut Log {
+    conn.as_ref()
+        .and_then(|conn| conn.get_next_log())
+        .map(|log| Box::into_raw(Box::new(log)))
+        .unwrap_or(std::ptr::null_mut())
+}
diff --git a/rust/htp/src/c_api/connection_parser.rs b/rust/htp/src/c_api/connection_parser.rs
new file mode 100644 (file)
index 0000000..017e67d
--- /dev/null
@@ -0,0 +1,308 @@
+#![deny(missing_docs)]
+use crate::{
+    config::Config,
+    connection::Connection,
+    connection_parser::{ConnectionParser, HtpStreamState, ParserData},
+    transaction::Transaction,
+};
+use std::{
+    convert::{TryFrom, TryInto},
+    ffi::CStr,
+};
+use time::{Duration, OffsetDateTime};
+
+/// Take seconds and microseconds and return a OffsetDateTime
+fn datetime_from_sec_usec(sec: i64, usec: i64) -> Option<OffsetDateTime> {
+    match OffsetDateTime::from_unix_timestamp(sec) {
+        Ok(date) => Some(date + Duration::microseconds(usec)),
+        Err(_) => None,
+    }
+}
+
+/// Closes the connection associated with the supplied parser.
+///
+/// timestamp is optional
+/// # Safety
+/// When calling this method, you have to ensure that connp is either properly initialized or NULL
+#[no_mangle]
+#[allow(clippy::useless_conversion)]
+pub unsafe extern "C" fn htp_connp_close(
+    connp: *mut ConnectionParser, timestamp: *const libc::timeval,
+) {
+    if let Some(connp) = connp.as_mut() {
+        connp.close(
+            timestamp
+                .as_ref()
+                .map(|val| datetime_from_sec_usec(val.tv_sec.into(), val.tv_usec.into()))
+                .unwrap_or(None),
+        )
+    }
+}
+
+/// Creates a new connection parser using the provided configuration or a default configuration if NULL provided.
+/// Note the provided config will be copied into the created connection parser. Therefore, subsequent modification
+/// to the original config will have no effect.
+///
+/// Returns a new connection parser instance, or NULL on error.
+/// # Safety
+/// When calling this method, you have to ensure that connp is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_connp_create(cfg: *const Config) -> *mut ConnectionParser {
+    Box::into_raw(Box::new(ConnectionParser::new(cfg.as_ref().unwrap())))
+}
+
+/// Destroys the connection parser, its data structures, as well
+/// as the connection and its transactions.
+/// # Safety
+/// When calling this method, you have to ensure that connp is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_connp_destroy_all(connp: *mut ConnectionParser) {
+    drop(Box::from_raw(connp));
+}
+
+/// Returns the connection associated with the connection parser.
+///
+/// Returns Connection instance, or NULL if one is not available.
+/// # Safety
+/// When calling this method, you have to ensure that connp is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_connp_connection(connp: *const ConnectionParser) -> *const Connection {
+    connp
+        .as_ref()
+        .map(|val| &val.conn as *const Connection)
+        .unwrap_or(std::ptr::null())
+}
+
+/// Retrieve the user data associated with this connection parser.
+/// Returns user data, or NULL if there isn't any.
+/// # Safety
+/// When calling this method, you have to ensure that connp is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_connp_user_data(connp: *const ConnectionParser) -> *mut libc::c_void {
+    connp
+        .as_ref()
+        .and_then(|val| val.user_data::<*mut libc::c_void>())
+        .copied()
+        .unwrap_or(std::ptr::null_mut())
+}
+
+/// Associate user data with the supplied parser.
+/// # Safety
+/// When calling this method, you have to ensure that connp is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_connp_set_user_data(
+    connp: *mut ConnectionParser, user_data: *mut libc::c_void,
+) {
+    if let Some(connp) = connp.as_mut() {
+        connp.set_user_data(Box::new(user_data))
+    }
+}
+
+/// Opens connection.
+///
+/// timestamp is optional
+/// # Safety
+/// When calling this method, you have to ensure that connp is either properly initialized or NULL
+#[no_mangle]
+#[allow(clippy::useless_conversion)]
+pub unsafe extern "C" fn htp_connp_open(
+    connp: *mut ConnectionParser, client_addr: *const libc::c_char, client_port: libc::c_int,
+    server_addr: *const libc::c_char, server_port: libc::c_int, timestamp: *const libc::timeval,
+) {
+    if let Some(connp) = connp.as_mut() {
+        connp.open(
+            client_addr.as_ref().and_then(|client_addr| {
+                CStr::from_ptr(client_addr)
+                    .to_str()
+                    .ok()
+                    .and_then(|val| val.parse().ok())
+            }),
+            client_port.try_into().ok(),
+            server_addr.as_ref().and_then(|server_addr| {
+                CStr::from_ptr(server_addr)
+                    .to_str()
+                    .ok()
+                    .and_then(|val| val.parse().ok())
+            }),
+            server_port.try_into().ok(),
+            timestamp
+                .as_ref()
+                .map(|val| datetime_from_sec_usec(val.tv_sec.into(), val.tv_usec.into()))
+                .unwrap_or(None),
+        )
+    }
+}
+
+/// Closes the connection associated with the supplied parser.
+///
+/// timestamp is optional
+/// # Safety
+/// When calling this method, you have to ensure that connp is either properly initialized or NULL
+#[no_mangle]
+#[allow(clippy::useless_conversion)]
+pub unsafe extern "C" fn htp_connp_request_close(
+    connp: *mut ConnectionParser, timestamp: *const libc::timeval,
+) {
+    if let Some(connp) = connp.as_mut() {
+        connp.request_close(
+            timestamp
+                .as_ref()
+                .map(|val| datetime_from_sec_usec(val.tv_sec.into(), val.tv_usec.into()))
+                .unwrap_or(None),
+        )
+    }
+}
+
+/// Process a chunk of inbound client request data
+///
+/// timestamp is optional
+/// Returns HTP_STREAM_STATE_DATA, HTP_STREAM_STATE_ERROR or HTP_STREAM_STATE_DATA_OTHER (see QUICK_START).
+///         HTP_STREAM_STATE_CLOSED and HTP_STREAM_STATE_TUNNEL are also possible.
+/// # Safety
+/// When calling this method, you have to ensure that connp is either properly initialized or NULL
+#[no_mangle]
+#[allow(clippy::useless_conversion)]
+pub unsafe extern "C" fn htp_connp_request_data(
+    connp: *mut ConnectionParser, timestamp: *const libc::timeval, data: *const libc::c_void,
+    len: libc::size_t,
+) -> HtpStreamState {
+    connp
+        .as_mut()
+        .map(|connp| {
+            connp.request_data(
+                ParserData::from((data as *const u8, len)),
+                timestamp
+                    .as_ref()
+                    .map(|val| datetime_from_sec_usec(val.tv_sec.into(), val.tv_usec.into()))
+                    .unwrap_or(None),
+            )
+        })
+        .unwrap_or(HtpStreamState::ERROR)
+}
+
+/// Process a chunk of outbound (server or response) data.
+///
+/// timestamp is optional.
+/// Returns HTP_STREAM_STATE_OK on state change, HTP_STREAM_STATE_ERROR on error, or HTP_STREAM_STATE_DATA when more data is needed
+/// # Safety
+/// When calling this method, you have to ensure that connp is either properly initialized or NULL
+#[no_mangle]
+#[allow(clippy::useless_conversion)]
+pub unsafe extern "C" fn htp_connp_response_data(
+    connp: *mut ConnectionParser, timestamp: *const libc::timeval, data: *const libc::c_void,
+    len: libc::size_t,
+) -> HtpStreamState {
+    connp
+        .as_mut()
+        .map(|connp| {
+            connp.response_data(
+                ParserData::from((data as *const u8, len)),
+                timestamp
+                    .as_ref()
+                    .map(|val| datetime_from_sec_usec(val.tv_sec.into(), val.tv_usec.into()))
+                    .unwrap_or(None),
+            )
+        })
+        .unwrap_or(HtpStreamState::ERROR)
+}
+
+/// Get the number of transactions processed on this connection.
+///
+/// Returns the number of transactions or -1 on error.
+/// # Safety
+/// When calling this method, you have to ensure that connp is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_connp_tx_size(connp: *const ConnectionParser) -> isize {
+    connp
+        .as_ref()
+        .map(|connp| isize::try_from(connp.tx_size()).unwrap_or(-1))
+        .unwrap_or(-1)
+}
+
+/// Get a transaction.
+///
+/// Returns the transaction or NULL on error.
+/// # Safety
+/// When calling this method, you have to ensure that connp is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_connp_tx(
+    connp: *mut ConnectionParser, tx_id: usize,
+) -> *const Transaction {
+    connp
+        .as_ref()
+        .map(|connp| {
+            connp
+                .tx(tx_id)
+                .map(|tx| {
+                    if tx.is_started() {
+                        tx as *const Transaction
+                    } else {
+                        std::ptr::null()
+                    }
+                })
+                .unwrap_or(std::ptr::null())
+        })
+        .unwrap_or(std::ptr::null())
+}
+
+/// Retrieves the pointer to the active response transaction. In connection
+/// parsing mode there can be many open transactions, and up to 2 active
+/// transactions at any one time. This is due to HTTP pipelining. Can be NULL.
+/// # Safety
+/// When calling this method, you have to ensure that connp is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_connp_get_response_tx(
+    connp: *mut ConnectionParser,
+) -> *const Transaction {
+    if let Some(connp) = connp.as_mut() {
+        if let Some(req) = connp.response() {
+            return req;
+        }
+    }
+    std::ptr::null()
+}
+
+/// Retrieves the pointer to the active request transaction. In connection
+/// parsing mode there can be many open transactions, and up to 2 active
+/// transactions at any one time. This is due to HTTP pipelining. Call be NULL.
+/// # Safety
+/// When calling this method, you have to ensure that connp is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_connp_get_request_tx(
+    connp: *mut ConnectionParser,
+) -> *const Transaction {
+    if let Some(connp) = connp.as_mut() {
+        if let Some(req) = connp.request() {
+            return req;
+        }
+    }
+    std::ptr::null()
+}
+
+/// Returns the number of bytes consumed from the current data chunks so far or -1 on error.
+/// # Safety
+/// When calling this method, you have to ensure that connp is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_connp_request_data_consumed(connp: *const ConnectionParser) -> i64 {
+    connp
+        .as_ref()
+        .map(|connp| connp.request_data_consumed().try_into().ok().unwrap_or(-1))
+        .unwrap_or(-1)
+}
+
+/// Returns the number of bytes consumed from the most recent outbound data chunk. Normally, an invocation
+/// of htp_connp_response_data() will consume all data from the supplied buffer, but there are circumstances
+/// where only partial consumption is possible. In such cases HTP_STREAM_DATA_OTHER will be returned.
+/// Consumed bytes are no longer necessary, but the remainder of the buffer will be need to be saved
+/// for later.
+/// Returns the number of bytes consumed from the last data chunk sent for outbound processing
+/// or -1 on error.
+/// # Safety
+/// When calling this method, you have to ensure that connp is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_connp_response_data_consumed(connp: *const ConnectionParser) -> i64 {
+    connp
+        .as_ref()
+        .map(|connp| connp.response_data_consumed().try_into().ok().unwrap_or(-1))
+        .unwrap_or(-1)
+}
diff --git a/rust/htp/src/c_api/header.rs b/rust/htp/src/c_api/header.rs
new file mode 100644 (file)
index 0000000..d47cccc
--- /dev/null
@@ -0,0 +1,158 @@
+#![deny(missing_docs)]
+use crate::{
+    bstr::Bstr,
+    c_api::bstr::bstr_ptr,
+    transaction::{Header, Headers},
+};
+use std::convert::TryFrom;
+
+/// Get the first header value matching the key.
+///
+/// headers: Header table.
+/// ckey: Header name to match.
+///
+/// Returns the header or NULL when not found or on error
+/// # Safety
+/// When calling this method, you have to ensure that headers is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_headers_get(
+    headers: *const Headers, ckey: *const libc::c_char,
+) -> *const Header {
+    if let (Some(headers), Some(ckey)) = (headers.as_ref(), ckey.as_ref()) {
+        headers
+            .get_nocase_nozero(std::ffi::CStr::from_ptr(ckey).to_bytes())
+            .map(|value| value as *const Header)
+            .unwrap_or(std::ptr::null())
+    } else {
+        std::ptr::null()
+    }
+}
+
+/// Get the header at a given index.
+///
+/// headers: Header table.
+/// index: Index into the table.
+///
+/// Returns the header or NULL when not found or on error
+/// # Safety
+/// When calling this method, you have to ensure that header is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_headers_get_index(
+    headers: *const Headers, index: usize,
+) -> *const Header {
+    headers
+        .as_ref()
+        .map(|headers| {
+            headers
+                .elements
+                .get(index)
+                .map(|value| value as *const Header)
+                .unwrap_or(std::ptr::null())
+        })
+        .unwrap_or(std::ptr::null())
+}
+
+/// Get the size of the headers table.
+///
+/// headers: Headers table.
+///
+/// Returns the size or -1 on error
+/// # Safety
+/// When calling this method, you have to ensure that header is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_headers_size(headers: *const Headers) -> isize {
+    headers
+        .as_ref()
+        .map(|headers| isize::try_from(headers.size()).unwrap_or(-1))
+        .unwrap_or(-1)
+}
+
+/// Get the name of a header.
+///
+/// header: Header pointer.
+///
+/// Returns the name or NULL on error.
+/// # Safety
+/// When calling this method, you have to ensure that header is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_header_name(header: *const Header) -> *const Bstr {
+    header
+        .as_ref()
+        .map(|header| &header.name as *const Bstr)
+        .unwrap_or(std::ptr::null())
+}
+
+/// Get the name of a header as a ptr.
+///
+/// header: Header pointer.
+///
+/// Returns the pointer or NULL on error.
+/// # Safety
+/// When calling this method, you have to ensure that header is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_header_name_ptr(header: *const Header) -> *const u8 {
+    header
+        .as_ref()
+        .map(|header| bstr_ptr(&header.name) as *const u8)
+        .unwrap_or(std::ptr::null())
+}
+
+/// Get the length of a header name.
+///
+/// tx: Header pointer.
+///
+/// Returns the length or -1 on error.
+/// # Safety
+/// When calling this method, you have to ensure that header is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_header_name_len(header: *const Header) -> isize {
+    header
+        .as_ref()
+        .map(|header| isize::try_from(header.name.len()).unwrap_or(-1))
+        .unwrap_or(-1)
+}
+
+/// Get the value of a header.
+///
+/// tx: Header pointer.
+///
+/// Returns the value or NULL on error.
+/// # Safety
+/// When calling this method, you have to ensure that header is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_header_value(header: *const Header) -> *const Bstr {
+    header
+        .as_ref()
+        .map(|header| &header.value as *const Bstr)
+        .unwrap_or(std::ptr::null())
+}
+
+/// Get the value of a header as a ptr.
+///
+/// tx: Header pointer.
+///
+/// Returns the pointer or NULL on error.
+/// # Safety
+/// When calling this method, you have to ensure that header is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_header_value_ptr(header: *const Header) -> *const u8 {
+    header
+        .as_ref()
+        .map(|header| bstr_ptr(&header.value) as *const u8)
+        .unwrap_or(std::ptr::null())
+}
+
+/// Get the length of a header value.
+///
+/// tx: Header pointer.
+///
+/// Returns the length or 0 on a NULL pointer.
+/// # Safety
+/// When calling this method, you have to ensure that header is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_header_value_len(header: *const Header) -> usize {
+    header
+        .as_ref()
+        .map(|header| header.value.len())
+        .unwrap_or(0)
+}
diff --git a/rust/htp/src/c_api/log.rs b/rust/htp/src/c_api/log.rs
new file mode 100644 (file)
index 0000000..fa6aa56
--- /dev/null
@@ -0,0 +1,39 @@
+#![deny(missing_docs)]
+use crate::log::{HtpLogCode, Log};
+use std::{ffi::CString, os::raw::c_char};
+
+/// Get the log's message string
+///
+/// Returns the log message as a cstring or NULL on error
+/// The caller must free this result with htp_free_cstring
+/// # Safety
+/// When calling this method, you have to ensure that log is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_log_message(log: *const Log) -> *mut c_char {
+    log.as_ref()
+        .and_then(|log| CString::new(log.msg.msg.clone()).ok())
+        .map(|msg| msg.into_raw())
+        .unwrap_or(std::ptr::null_mut())
+}
+
+/// Get a log's message code
+///
+/// Returns a code or HTP_LOG_CODE_ERROR on error
+/// # Safety
+/// When calling this method, you have to ensure that log is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_log_code(log: *const Log) -> HtpLogCode {
+    log.as_ref()
+        .map(|log| log.msg.code)
+        .unwrap_or(HtpLogCode::ERROR)
+}
+
+/// Free log
+/// # Safety
+/// This function is unsafe because improper use may lead to memory problems. For example, a double-free may occur if the function is called twice on the same raw pointer.
+#[no_mangle]
+pub unsafe extern "C" fn htp_log_free(log: *mut Log) {
+    if !log.is_null() {
+        drop(Box::from_raw(log));
+    }
+}
diff --git a/rust/htp/src/c_api/mod.rs b/rust/htp/src/c_api/mod.rs
new file mode 100644 (file)
index 0000000..bc5c982
--- /dev/null
@@ -0,0 +1,35 @@
+#![deny(missing_docs)]
+use crate::util::get_version;
+use std::ffi::CString;
+
+/// Functions for working with Bstr.
+pub mod bstr;
+/// Functions for working with config.
+pub mod config;
+/// Functions for working with connection.
+pub mod connection;
+/// Functions for working with connection parser.
+pub mod connection_parser;
+/// Functions for working with headers.
+pub mod header;
+/// Functions for working with logs.
+pub mod log;
+/// Functions for working with transactions.
+pub mod transaction;
+/// Functions for working with request uri.
+pub mod uri;
+
+/// Returns the LibHTP version string.
+#[no_mangle]
+pub extern "C" fn htp_get_version() -> *const libc::c_char {
+    get_version().as_ptr() as *const libc::c_char
+}
+
+/// Free rust allocated cstring
+///
+/// # Safety
+/// This should only ever be called with a pointer that was earlier obtained by calling [CString::into_raw].
+#[no_mangle]
+pub unsafe extern "C" fn htp_free_cstring(input: *mut libc::c_char) {
+    input.as_mut().map(|input| CString::from_raw(input));
+}
diff --git a/rust/htp/src/c_api/transaction.rs b/rust/htp/src/c_api/transaction.rs
new file mode 100644 (file)
index 0000000..6c0fc9b
--- /dev/null
@@ -0,0 +1,522 @@
+use crate::{
+    bstr::Bstr, c_api::header::htp_headers_get, connection_parser::ConnectionParser,
+    request::HtpMethod, transaction::*, uri::Uri,
+};
+use std::convert::{TryFrom, TryInto};
+
+/// Destroys the supplied transaction.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_destroy(connp: *mut ConnectionParser, tx: *const Transaction) {
+    if let (Some(connp), Some(tx)) = (connp.as_mut(), tx.as_ref()) {
+        connp.remove_tx(tx.index)
+    }
+}
+
+/// Get a transaction's normalized parsed uri.
+///
+/// tx: Transaction pointer.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_normalized_uri(tx: *const Transaction) -> *const Bstr {
+    if (*tx).cfg.decoder_cfg.normalized_uri_include_all {
+        tx.as_ref()
+            .and_then(|tx| tx.complete_normalized_uri.as_ref())
+            .map(|uri| uri as *const Bstr)
+            .unwrap_or(std::ptr::null())
+    } else {
+        tx.as_ref()
+            .and_then(|tx| tx.partial_normalized_uri.as_ref())
+            .map(|uri| uri as *const Bstr)
+            .unwrap_or(std::ptr::null())
+    }
+}
+
+/// Returns the user data associated with this transaction or NULL on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_get_user_data(tx: *const Transaction) -> *mut libc::c_void {
+    tx.as_ref()
+        .and_then(|val| val.user_data::<*mut libc::c_void>())
+        .copied()
+        .unwrap_or(std::ptr::null_mut())
+}
+
+/// Associates user data with this transaction.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_set_user_data(tx: *mut Transaction, user_data: *mut libc::c_void) {
+    if let Some(tx) = tx.as_mut() {
+        tx.set_user_data(Box::new(user_data))
+    }
+}
+
+/// Get a transaction's request line.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the request line or NULL on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_request_line(tx: *const Transaction) -> *const Bstr {
+    tx.as_ref()
+        .and_then(|tx| tx.request_line.as_ref())
+        .map(|line| line as *const Bstr)
+        .unwrap_or(std::ptr::null())
+}
+
+/// Get a transaction's request method.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the request method or NULL on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_request_method(tx: *const Transaction) -> *const Bstr {
+    tx.as_ref()
+        .and_then(|tx| tx.request_method.as_ref())
+        .map(|method| method as *const Bstr)
+        .unwrap_or(std::ptr::null())
+}
+
+/// Get the transaction's request method number.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the request method number or ERROR on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_request_method_number(tx: *const Transaction) -> HtpMethod {
+    tx.as_ref()
+        .map(|tx| tx.request_method_number)
+        .unwrap_or(HtpMethod::ERROR)
+}
+
+/// Get a transaction's request uri.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the request uri or NULL on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_request_uri(tx: *const Transaction) -> *const Bstr {
+    tx.as_ref()
+        .and_then(|tx| tx.request_uri.as_ref())
+        .map(|uri| uri as *const Bstr)
+        .unwrap_or(std::ptr::null())
+}
+
+/// Get a transaction's request protocol.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the protocol or NULL on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_request_protocol(tx: *const Transaction) -> *const Bstr {
+    tx.as_ref()
+        .and_then(|tx| tx.request_protocol.as_ref())
+        .map(|protocol| protocol as *const Bstr)
+        .unwrap_or(std::ptr::null())
+}
+
+/// Get a transaction's request protocol number.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the protocol number or ERROR on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_request_protocol_number(tx: *const Transaction) -> HtpProtocol {
+    tx.as_ref()
+        .map(|tx| tx.request_protocol_number)
+        .unwrap_or(HtpProtocol::Error)
+}
+
+/// Get whether a transaction's protocol is version 0.9.
+///
+/// tx: Transaction pointer.
+///
+/// Returns 1 if the version is 0.9 or 0 otherwise. A NULL argument will
+/// also result in a return value of 0.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_is_protocol_0_9(tx: *const Transaction) -> i32 {
+    tx.as_ref().map(|tx| tx.is_protocol_0_9 as i32).unwrap_or(0)
+}
+
+/// Get a transaction's parsed uri.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the parsed uri or NULL on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_parsed_uri(tx: *const Transaction) -> *const Uri {
+    tx.as_ref()
+        .and_then(|tx| tx.parsed_uri.as_ref())
+        .map(|uri| uri as *const Uri)
+        .unwrap_or(std::ptr::null())
+}
+
+/// Get a transaction's request headers.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the request headers or NULL on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_request_headers(tx: *const Transaction) -> *const Headers {
+    tx.as_ref()
+        .map(|tx| &tx.request_headers as *const Headers)
+        .unwrap_or(std::ptr::null())
+}
+
+/// Get a transaction's request headers size.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the size or -1 on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_request_headers_size(tx: *const Transaction) -> isize {
+    tx.as_ref()
+        .map(|tx| isize::try_from(tx.request_headers.size()).unwrap_or(-1))
+        .unwrap_or(-1)
+}
+
+/// Get the first request header value matching the key from a transaction.
+///
+/// tx: Transaction pointer.
+/// ckey: Header name to match.
+///
+/// Returns the header or NULL when not found or on error
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_request_header(
+    tx: *const Transaction, ckey: *const libc::c_char,
+) -> *const Header {
+    tx.as_ref()
+        .map(|tx| htp_headers_get(&tx.request_headers, ckey))
+        .unwrap_or(std::ptr::null())
+}
+
+/// Get the request header at the given index.
+///
+/// tx: Transaction pointer.
+/// index: request header table index.
+///
+/// Returns the header or NULL on error
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_request_header_index(
+    tx: *const Transaction, index: usize,
+) -> *const Header {
+    tx.as_ref()
+        .map(|tx| {
+            tx.request_headers
+                .elements
+                .get(index)
+                .map(|value| value as *const Header)
+                .unwrap_or(std::ptr::null())
+        })
+        .unwrap_or(std::ptr::null())
+}
+
+/// Get the transaction's request authentication type.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the auth type or HTP_AUTH_ERROR on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_request_auth_type(tx: *const Transaction) -> HtpAuthType {
+    tx.as_ref()
+        .map(|tx| tx.request_auth_type)
+        .unwrap_or(HtpAuthType::ERROR)
+}
+
+/// Get a transaction's request hostname.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the request hostname or NULL on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_request_hostname(tx: *const Transaction) -> *const Bstr {
+    tx.as_ref()
+        .and_then(|tx| tx.request_hostname.as_ref())
+        .map(|hostname| hostname as *const Bstr)
+        .unwrap_or(std::ptr::null())
+}
+
+/// Get the transaction's request port number.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the request port number or -1 on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_request_port_number(tx: *const Transaction) -> i32 {
+    tx.as_ref()
+        .and_then(|tx| tx.request_port_number.as_ref())
+        .map(|port| *port as i32)
+        .unwrap_or(-1)
+}
+
+/// Get a transaction's request message length.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the request message length or -1 on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_request_message_len(tx: *const Transaction) -> i64 {
+    tx.as_ref()
+        .map(|tx| tx.request_message_len.try_into().ok().unwrap_or(-1))
+        .unwrap_or(-1)
+}
+
+/// Get a transaction's response line.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the response line or NULL on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_response_line(tx: *const Transaction) -> *const Bstr {
+    tx.as_ref()
+        .and_then(|tx| tx.response_line.as_ref())
+        .map(|response_line| response_line as *const Bstr)
+        .unwrap_or(std::ptr::null())
+}
+
+/// Get a transaction's response protocol.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the response protocol or NULL on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_response_protocol(tx: *const Transaction) -> *const Bstr {
+    tx.as_ref()
+        .and_then(|tx| tx.response_protocol.as_ref())
+        .map(|response_protocol| response_protocol as *const Bstr)
+        .unwrap_or(std::ptr::null())
+}
+
+/// Get a transaction's response protocol number.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the protocol number or ERROR on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_response_protocol_number(tx: *const Transaction) -> HtpProtocol {
+    tx.as_ref()
+        .map(|tx| tx.response_protocol_number)
+        .unwrap_or(HtpProtocol::Error)
+}
+
+/// Get the transaction's response status.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the response status or NULL on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_response_status(tx: *const Transaction) -> *const Bstr {
+    tx.as_ref()
+        .and_then(|tx| tx.response_status.as_ref())
+        .map(|response_status| response_status as *const Bstr)
+        .unwrap_or(std::ptr::null())
+}
+
+/// Get the transaction's response status number.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the response status number or -1 on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_response_status_number(tx: *const Transaction) -> i32 {
+    tx.as_ref()
+        .map(|tx| match tx.response_status_number {
+            HtpResponseNumber::Unknown => 0,
+            HtpResponseNumber::Invalid => -1,
+            HtpResponseNumber::Valid(status) => status as i32,
+        })
+        .unwrap_or(-1)
+}
+
+/// Get a transaction's response message.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the response message or NULL on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_response_message(tx: *const Transaction) -> *const Bstr {
+    tx.as_ref()
+        .and_then(|tx| tx.response_message.as_ref())
+        .map(|response_message| response_message as *const Bstr)
+        .unwrap_or(std::ptr::null())
+}
+
+/// Get a transaction's response headers.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the response headers or NULL on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_response_headers(tx: *const Transaction) -> *const Headers {
+    tx.as_ref()
+        .map(|tx| &tx.response_headers as *const Headers)
+        .unwrap_or(std::ptr::null())
+}
+
+/// Get the first response header value matching the key from a transaction.
+///
+/// tx: Transaction pointer.
+/// ckey: Header name to match.
+///
+/// Returns the header or NULL when not found or on error
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_response_header(
+    tx: *const Transaction, ckey: *const libc::c_char,
+) -> *const Header {
+    tx.as_ref()
+        .map(|tx| htp_headers_get(&tx.response_headers, ckey))
+        .unwrap_or(std::ptr::null())
+}
+
+/// Get a transaction's response message length.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the response message length or -1 on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_response_message_len(tx: *const Transaction) -> i64 {
+    tx.as_ref()
+        .map(|tx| tx.response_message_len.try_into().ok().unwrap_or(-1))
+        .unwrap_or(-1)
+}
+
+/// Get the transaction's bit flags.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the flags represented as an integer or 0 if the flags are empty
+/// or a NULL ptr is passed as an argument.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_flags(tx: *const Transaction) -> u64 {
+    tx.as_ref().map(|tx| tx.flags).unwrap_or(0)
+}
+
+/// Get the transaction's request progress.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the progress or HTP_REQUEST_ERROR on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_request_progress(tx: *const Transaction) -> HtpRequestProgress {
+    tx.as_ref()
+        .map(|tx| tx.request_progress)
+        .unwrap_or(HtpRequestProgress::ERROR)
+}
+
+/// Get the transaction's response progress.
+///
+/// tx: Transaction pointer.
+///
+/// Returns the progress or ERROR on error.
+/// # Safety
+/// When calling this method, you have to ensure that tx is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_response_progress(tx: *const Transaction) -> HtpResponseProgress {
+    tx.as_ref()
+        .map(|tx| tx.response_progress)
+        .unwrap_or(HtpResponseProgress::ERROR)
+}
+
+/// Get the data's transaction.
+///
+/// Returns the transaction or NULL on error.
+/// # Safety
+/// When calling this method, you have to ensure that data is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_data_tx(data: *const Data) -> *const Transaction {
+    data.as_ref()
+        .map(|data| data.tx() as *const Transaction)
+        .unwrap_or(std::ptr::null())
+}
+
+/// Get the data pointer.
+///
+/// Returns the data or NULL on error.
+/// # Safety
+/// When calling this method, you have to ensure that data is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_data_data(data: *const Data) -> *const u8 {
+    data.as_ref()
+        .map(|data| data.data())
+        .unwrap_or(std::ptr::null())
+}
+
+/// Get the length of the data.
+///
+/// Returns the length or -1 on error.
+/// # Safety
+/// When calling this method, you have to ensure that data is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_data_len(data: *const Data) -> isize {
+    data.as_ref()
+        .map(|data| isize::try_from(data.len()).unwrap_or(-1))
+        .unwrap_or(-1)
+}
+
+/// Get whether this data is empty.
+///
+/// Returns true if data is NULL or zero-length.
+/// # Safety
+/// When calling this method, you have to ensure that data is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_tx_data_is_empty(data: *const Data) -> bool {
+    data.as_ref().map(|data| data.is_empty()).unwrap_or(true)
+}
diff --git a/rust/htp/src/c_api/uri.rs b/rust/htp/src/c_api/uri.rs
new file mode 100644 (file)
index 0000000..d1efb21
--- /dev/null
@@ -0,0 +1,27 @@
+use crate::{bstr::Bstr, uri::Uri};
+
+/// Get the hostname of a uri.
+///
+/// Returns the hostname for uri or NULL on error.
+/// # Safety
+/// When calling this method, you have to ensure that uri is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_uri_hostname(uri: *const Uri) -> *const Bstr {
+    uri.as_ref()
+        .and_then(|uri| uri.hostname.as_ref())
+        .map(|hostname| hostname as *const Bstr)
+        .unwrap_or(std::ptr::null())
+}
+
+/// Get the path of a uri.
+///
+/// Returns the path for uri or NULL on error.
+/// # Safety
+/// When calling this method, you have to ensure that uri is either properly initialized or NULL
+#[no_mangle]
+pub unsafe extern "C" fn htp_uri_path(uri: *const Uri) -> *const Bstr {
+    uri.as_ref()
+        .and_then(|uri| uri.path.as_ref())
+        .map(|path| path as *const Bstr)
+        .unwrap_or(std::ptr::null())
+}
diff --git a/rust/htp/src/config.rs b/rust/htp/src/config.rs
new file mode 100644 (file)
index 0000000..fc62965
--- /dev/null
@@ -0,0 +1,575 @@
+use crate::decompressors::Options;
+use crate::{
+    error::Result,
+    hook::{DataHook, TxHook},
+    unicode_bestfit_map::UnicodeBestfitMap,
+    HtpStatus,
+};
+
+#[cfg(test)]
+use crate::hook::{DataNativeCallbackFn, TxNativeCallbackFn};
+
+/// Configuration for libhtp parsing.
+#[derive(Clone)]
+pub struct Config {
+    /// The maximum size of the buffer that is used when the current
+    /// input chunk does not contain all the necessary data (e.g., a header
+    /// line that spans several packets).
+    pub(crate) field_limit: usize,
+    /// Server personality identifier.
+    pub(crate) server_personality: HtpServerPersonality,
+    /// Decoder configuration for url path.
+    pub(crate) decoder_cfg: DecoderConfig,
+    /// Request start hook, invoked when the parser receives the first byte of a new
+    /// request. Because an HTTP transaction always starts with a request, this hook
+    /// doubles as a transaction start hook.
+    pub(crate) hook_request_start: TxHook,
+    /// Request line hook, invoked after a request line has been parsed.
+    pub(crate) hook_request_line: TxHook,
+    /// Receives raw request header data, starting immediately after the request line,
+    /// including all headers as they are seen on the TCP connection, and including the
+    /// terminating empty line. Not available on genuine HTTP/0.9 requests (because
+    /// they don't use headers).
+    pub(crate) hook_request_header_data: DataHook,
+    /// Request headers hook, invoked after all request headers are seen.
+    #[cfg(test)]
+    pub(crate) hook_request_headers: TxHook,
+    /// Request body data hook, invoked every time body data is available. Each
+    /// invocation will provide a Data instance. Chunked data
+    /// will be dechunked before the data is passed to this hook. Decompression
+    /// is not currently implemented. At the end of the request body
+    /// there will be a call with the data set to None.
+    pub(crate) hook_request_body_data: DataHook,
+    /// Receives raw request trailer data, which can be available on requests that have
+    /// chunked bodies. The data starts immediately after the zero-length chunk
+    /// and includes the terminating empty line.
+    pub(crate) hook_request_trailer_data: DataHook,
+    /// Request trailer hook, invoked after all trailer headers are seen,
+    /// and if they are seen (not invoked otherwise).
+    pub(crate) hook_request_trailer: TxHook,
+    /// Request hook, invoked after a complete request is seen.
+    pub(crate) hook_request_complete: TxHook,
+    /// Response startup hook, invoked when a response transaction is found and
+    /// processing started.
+    pub(crate) hook_response_start: TxHook,
+    /// Response line hook, invoked after a response line has been parsed.
+    #[cfg(test)]
+    pub(crate) hook_response_line: TxHook,
+    /// Receives raw response header data, starting immediately after the status line
+    /// and including all headers as they are seen on the TCP connection, and including the
+    /// terminating empty line. Not available on genuine HTTP/0.9 responses (because
+    /// they don't have response headers).
+    pub(crate) hook_response_header_data: DataHook,
+    /// Response headers book, invoked after all response headers have been seen.
+    #[cfg(test)]
+    pub(crate) hook_response_headers: TxHook,
+    /// Response body data hook, invoked every time body data is available. Each
+    /// invocation will provide a Data instance. Chunked data
+    /// will be dechunked before the data is passed to this hook. By default,
+    /// compressed data will be decompressed, but decompression can be disabled
+    /// in configuration. At the end of the response body there will be a call
+    /// with the data pointer set to NULL.
+    pub(crate) hook_response_body_data: DataHook,
+    /// Receives raw response trailer data, which can be available on responses that have
+    /// chunked bodies. The data starts immediately after the zero-length chunk
+    /// and includes the terminating empty line.
+    pub(crate) hook_response_trailer_data: DataHook,
+    /// Response trailer hook, invoked after all trailer headers have been processed,
+    /// and only if the trailer exists.
+    pub(crate) hook_response_trailer: TxHook,
+    /// Response hook, invoked after a response has been seen. Because sometimes servers
+    /// respond before receiving complete requests, a response_complete callback may be
+    /// invoked prior to a request_complete callback.
+    pub(crate) hook_response_complete: TxHook,
+    /// Transaction complete hook, which is invoked once the entire transaction is
+    /// considered complete (request and response are both complete). This is always
+    /// the last hook to be invoked.
+    #[cfg(test)]
+    pub(crate) hook_transaction_complete: TxHook,
+    /// Reaction to leading whitespace on the request line
+    pub(crate) requestline_leading_whitespace_unwanted: HtpUnwanted,
+    /// Whether to decompress compressed request bodies.
+    pub(crate) request_decompression_enabled: bool,
+    /// Configuration options for decompression.
+    pub(crate) compression_options: Options,
+    /// Maximum number of transactions
+    pub(crate) max_tx: u32,
+    /// Maximum number of headers
+    pub(crate) number_headers_limit: u32,
+}
+
+impl Default for Config {
+    fn default() -> Self {
+        Self {
+            field_limit: 18000,
+            server_personality: HtpServerPersonality::MINIMAL,
+            decoder_cfg: Default::default(),
+            hook_request_start: TxHook::default(),
+            hook_request_line: TxHook::default(),
+            hook_request_header_data: DataHook::default(),
+            #[cfg(test)]
+            hook_request_headers: TxHook::default(),
+            hook_request_body_data: DataHook::default(),
+            hook_request_trailer_data: DataHook::default(),
+            hook_request_trailer: TxHook::default(),
+            hook_request_complete: TxHook::default(),
+            hook_response_start: TxHook::default(),
+            #[cfg(test)]
+            hook_response_line: TxHook::default(),
+            hook_response_header_data: DataHook::default(),
+            #[cfg(test)]
+            hook_response_headers: TxHook::default(),
+            hook_response_body_data: DataHook::default(),
+            hook_response_trailer_data: DataHook::default(),
+            hook_response_trailer: TxHook::default(),
+            hook_response_complete: TxHook::default(),
+            #[cfg(test)]
+            hook_transaction_complete: TxHook::default(),
+            requestline_leading_whitespace_unwanted: HtpUnwanted::Ignore,
+            request_decompression_enabled: false,
+            compression_options: Options::default(),
+            max_tx: 512,
+            number_headers_limit: 1024,
+        }
+    }
+}
+
+/// Configuration options for decoding.
+#[derive(Copy, Clone)]
+pub(crate) struct DecoderConfig {
+    ///Whether to double decode the path in normalized uri
+    pub(crate) double_decode_normalized_path: bool,
+    /// Whether to double decode the query in the normalized uri
+    pub(crate) double_decode_normalized_query: bool,
+    // Path-specific decoding options.
+    /// Convert backslash characters to slashes.
+    pub(crate) backslash_convert_slashes: bool,
+    /// Convert to lowercase.
+    pub(crate) convert_lowercase: bool,
+    /// Compress slash characters.
+    pub(crate) path_separators_compress: bool,
+    /// Should we URL-decode encoded path segment separators?
+    pub(crate) path_separators_decode: bool,
+    /// Should we decode '+' characters to spaces?
+    pub(crate) plusspace_decode: bool,
+    // Special characters options.
+    /// Controls how raw NUL bytes are handled.
+    pub(crate) nul_raw_terminates: bool,
+    /// Determines server response to a raw NUL byte in the path.
+    pub(crate) nul_raw_unwanted: HtpUnwanted,
+    /// Reaction to control characters.
+    pub(crate) control_chars_unwanted: HtpUnwanted,
+    /// Allow whitespace characters in request uri path
+    pub(crate) allow_space_uri: bool,
+    // URL encoding options.
+    /// Should we decode %u-encoded characters?
+    pub(crate) u_encoding_decode: bool,
+    /// Reaction to %u encoding.
+    pub(crate) u_encoding_unwanted: HtpUnwanted,
+    /// Handling of invalid URL encodings.
+    pub(crate) url_encoding_invalid_handling: HtpUrlEncodingHandling,
+    /// Reaction to invalid URL encoding.
+    pub(crate) url_encoding_invalid_unwanted: HtpUnwanted,
+    /// Controls how encoded NUL bytes are handled.
+    pub(crate) nul_encoded_terminates: bool,
+    /// How are we expected to react to an encoded NUL byte?
+    pub(crate) nul_encoded_unwanted: HtpUnwanted,
+    // Normalized URI preference
+    /// Controls whether the client wants the complete or partial normalized URI.
+    pub(crate) normalized_uri_include_all: bool,
+    // UTF-8 options.
+    /// Controls how invalid UTF-8 characters are handled.
+    pub(crate) utf8_invalid_unwanted: HtpUnwanted,
+    /// Convert UTF-8 characters into bytes using best-fit mapping.
+    pub(crate) utf8_convert_bestfit: bool,
+    /// Best-fit map for UTF-8 decoding.
+    pub(crate) bestfit_map: UnicodeBestfitMap,
+}
+
+impl Default for DecoderConfig {
+    fn default() -> Self {
+        Self {
+            double_decode_normalized_path: false,
+            double_decode_normalized_query: false,
+            backslash_convert_slashes: false,
+            convert_lowercase: false,
+            path_separators_compress: false,
+            path_separators_decode: false,
+            plusspace_decode: true,
+            nul_raw_terminates: false,
+            nul_raw_unwanted: HtpUnwanted::Ignore,
+            control_chars_unwanted: HtpUnwanted::Ignore,
+            allow_space_uri: false,
+            u_encoding_decode: false,
+            u_encoding_unwanted: HtpUnwanted::Ignore,
+            url_encoding_invalid_handling: HtpUrlEncodingHandling::PRESERVE_PERCENT,
+            url_encoding_invalid_unwanted: HtpUnwanted::Ignore,
+            nul_encoded_terminates: false,
+            nul_encoded_unwanted: HtpUnwanted::Ignore,
+            normalized_uri_include_all: false,
+            utf8_invalid_unwanted: HtpUnwanted::Ignore,
+            utf8_convert_bestfit: false,
+            bestfit_map: UnicodeBestfitMap::default(),
+        }
+    }
+}
+
+/// Enumerates the possible server personalities.
+#[repr(C)]
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+pub enum HtpServerPersonality {
+    /// Minimal personality that performs as little work as possible. All optional
+    /// features are disabled. This personality is a good starting point for customization.
+    MINIMAL,
+    /// A generic personality that aims to work reasonably well for all server types.
+    GENERIC,
+    /// The IDS personality tries to perform as much decoding as possible.
+    IDS,
+    /// Mimics the behavior of IIS 4.0, as shipped with Windows NT 4.0.
+    IIS_4_0,
+    /// Mimics the behavior of IIS 5.0, as shipped with Windows 2000.
+    IIS_5_0,
+    /// Mimics the behavior of IIS 5.1, as shipped with Windows XP Professional.
+    IIS_5_1,
+    /// Mimics the behavior of IIS 6.0, as shipped with Windows 2003.
+    IIS_6_0,
+    /// Mimics the behavior of IIS 7.0, as shipped with Windows 2008.
+    IIS_7_0,
+    /// Mimics the behavior of IIS 7.5, as shipped with Windows 7.
+    IIS_7_5,
+    /// Mimics the behavior of Apache 2.x.
+    APACHE_2,
+}
+
+/// Enumerates the ways in which servers respond to malformed data.
+#[repr(C)]
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+pub(crate) enum HtpUnwanted {
+    /// Ignores problem.
+    Ignore,
+    /// Responds with HTTP 400 status code.
+    Code400 = 400,
+}
+
+/// Enumerates the possible approaches to handling invalid URL-encodings.
+#[repr(C)]
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+pub enum HtpUrlEncodingHandling {
+    /// Ignore invalid URL encodings and leave the % in the data.
+    PRESERVE_PERCENT,
+    /// Ignore invalid URL encodings, but remove the % from the data.
+    REMOVE_PERCENT,
+    /// Decode invalid URL encodings.
+    PROCESS_INVALID,
+}
+
+impl Config {
+    /// Registers a request_complete callback, which is invoked when we see the
+    /// first bytes of data from a request.
+    #[cfg(test)]
+    pub(crate) fn register_request_complete(&mut self, cbk_fn: TxNativeCallbackFn) {
+        self.hook_request_complete.register(cbk_fn);
+    }
+
+    /// Registers a request_body_data callback, which is invoked whenever we see
+    /// bytes of request body data.
+    #[cfg(test)]
+    pub(crate) fn register_request_body_data(&mut self, cbk_fn: DataNativeCallbackFn) {
+        self.hook_request_body_data.register(cbk_fn);
+    }
+
+    /// Registers a request_header_data callback, which is invoked when we see header
+    /// data. This callback receives raw header data as seen on the connection, including
+    /// the terminating line and anything seen after the request line.
+    #[cfg(test)]
+    pub(crate) fn register_request_header_data(&mut self, cbk_fn: DataNativeCallbackFn) {
+        self.hook_request_header_data.register(cbk_fn);
+    }
+
+    /// Registers a request_headers callback, which is invoked after we see all the
+    /// request headers.
+    #[cfg(test)]
+    pub(crate) fn register_request_headers(&mut self, cbk_fn: TxNativeCallbackFn) {
+        self.hook_request_headers.register(cbk_fn);
+    }
+
+    /// Registers a request_line callback, which is invoked after we parse the entire
+    /// request line.
+    #[cfg(test)]
+    pub(crate) fn register_request_line(&mut self, cbk_fn: TxNativeCallbackFn) {
+        self.hook_request_line.register(cbk_fn);
+    }
+
+    /// Registers a request_start callback, which is invoked every time a new
+    /// request begins and before any parsing is done.
+    #[cfg(test)]
+    pub(crate) fn register_request_start(&mut self, cbk_fn: TxNativeCallbackFn) {
+        self.hook_request_start.register(cbk_fn);
+    }
+
+    /// Registers a request_trailer_data callback, which may be invoked on requests with
+    /// chunked bodies. This callback receives the raw response trailer data after the zero-length
+    /// chunk including the terminating line.
+    #[cfg(test)]
+    pub(crate) fn register_request_trailer_data(&mut self, cbk_fn: DataNativeCallbackFn) {
+        self.hook_request_trailer_data.register(cbk_fn);
+    }
+
+    /// Registers a response_body_data callback, which is invoked whenever we see
+    /// bytes of response body data.
+    #[cfg(test)]
+    pub(crate) fn register_response_body_data(&mut self, cbk_fn: DataNativeCallbackFn) {
+        self.hook_response_body_data.register(cbk_fn);
+    }
+
+    /// Registers a response_complete callback, which is invoked when we see the
+    /// first bytes of data from a response.
+    #[cfg(test)]
+    pub(crate) fn register_response_complete(&mut self, cbk_fn: TxNativeCallbackFn) {
+        self.hook_response_complete.register(cbk_fn);
+    }
+
+    /// Registers a response_header_data callback, which is invoked when we see header
+    /// data. This callback receives raw header data as seen on the connection, including
+    /// the terminating line and anything seen after the response line.
+    #[cfg(test)]
+    pub(crate) fn register_response_header_data(&mut self, cbk_fn: DataNativeCallbackFn) {
+        self.hook_response_header_data.register(cbk_fn);
+    }
+
+    /// Registers a response_headers callback, which is invoked after we see all the
+    /// response headers.
+    #[cfg(test)]
+    pub(crate) fn register_response_headers(&mut self, cbk_fn: TxNativeCallbackFn) {
+        self.hook_response_headers.register(cbk_fn);
+    }
+
+    /// Registers a response_line callback, which is invoked after we parse the entire
+    /// response line.
+    #[cfg(test)]
+    pub(crate) fn register_response_line(&mut self, cbk_fn: TxNativeCallbackFn) {
+        self.hook_response_line.register(cbk_fn);
+    }
+
+    /// Registers a response_start callback, which is invoked when we see the
+    /// first bytes of data from a response.
+    #[cfg(test)]
+    pub(crate) fn register_response_start(&mut self, cbk_fn: TxNativeCallbackFn) {
+        self.hook_response_start.register(cbk_fn);
+    }
+
+    /// Registers a response_trailer_data callback, which may be invoked on responses with
+    /// chunked bodies. This callback receives the raw response trailer data after the zero-length
+    /// chunk and including the terminating line.
+    #[cfg(test)]
+    pub(crate) fn register_response_trailer_data(&mut self, cbk_fn: DataNativeCallbackFn) {
+        self.hook_response_trailer_data.register(cbk_fn);
+    }
+
+    /// Registers a transaction_complete callback, which is invoked once the request and response
+    /// are both complete.
+    #[cfg(test)]
+    pub(crate) fn register_transaction_complete(&mut self, cbk_fn: TxNativeCallbackFn) {
+        self.hook_transaction_complete.register(cbk_fn);
+    }
+
+    /// Enable or disable the double decoding of the path in the normalized uri
+    pub(crate) fn set_double_decode_normalized_path(
+        &mut self, double_decode_normalized_path: bool,
+    ) {
+        self.decoder_cfg.double_decode_normalized_path = double_decode_normalized_path;
+    }
+
+    /// Enable or disable the double decoding of the query in the normalized uri
+    pub(crate) fn set_double_decode_normalized_query(
+        &mut self, double_decode_normalized_query: bool,
+    ) {
+        self.decoder_cfg.double_decode_normalized_query = double_decode_normalized_query;
+    }
+
+    /// Configures the maximum size of the buffer LibHTP will use when all data is not available
+    /// in the current buffer (e.g., a very long header line that might span several packets). This
+    /// limit is controlled by the field_limit parameter.
+    pub(crate) fn set_field_limit(&mut self, field_limit: usize) {
+        self.field_limit = field_limit;
+    }
+
+    /// Enable or disable spaces in URIs. Disabled by default.
+    pub(crate) fn set_allow_space_uri(&mut self, allow_space: bool) {
+        self.decoder_cfg.allow_space_uri = allow_space;
+    }
+
+    /// Configure desired server personality.
+    /// Returns an Error if the personality is not supported.
+    pub(crate) fn set_server_personality(
+        &mut self, personality: HtpServerPersonality,
+    ) -> Result<()> {
+        match personality {
+            HtpServerPersonality::MINIMAL => {}
+            HtpServerPersonality::GENERIC => {
+                self.set_backslash_convert_slashes(true);
+                self.set_path_separators_decode(true);
+                self.set_path_separators_compress(true);
+            }
+            HtpServerPersonality::IDS => {
+                self.set_backslash_convert_slashes(true);
+                self.set_path_separators_decode(true);
+                self.set_path_separators_compress(true);
+                self.set_convert_lowercase(true);
+                self.set_utf8_convert_bestfit(true);
+                self.set_u_encoding_decode(true);
+                self.set_requestline_leading_whitespace_unwanted(HtpUnwanted::Ignore);
+            }
+            HtpServerPersonality::APACHE_2 => {
+                self.set_backslash_convert_slashes(false);
+                self.set_path_separators_decode(false);
+                self.set_path_separators_compress(true);
+                self.set_u_encoding_decode(false);
+                self.set_url_encoding_invalid_handling(HtpUrlEncodingHandling::PRESERVE_PERCENT);
+                self.set_url_encoding_invalid_unwanted(HtpUnwanted::Code400);
+                self.set_control_chars_unwanted(HtpUnwanted::Ignore);
+                self.set_requestline_leading_whitespace_unwanted(HtpUnwanted::Code400);
+            }
+            HtpServerPersonality::IIS_5_1 => {
+                self.set_backslash_convert_slashes(true);
+                self.set_path_separators_decode(true);
+                self.set_path_separators_compress(true);
+                self.set_u_encoding_decode(false);
+                self.set_url_encoding_invalid_handling(HtpUrlEncodingHandling::PRESERVE_PERCENT);
+                self.set_control_chars_unwanted(HtpUnwanted::Ignore);
+                self.set_requestline_leading_whitespace_unwanted(HtpUnwanted::Ignore);
+            }
+            HtpServerPersonality::IIS_6_0 => {
+                self.set_backslash_convert_slashes(true);
+                self.set_path_separators_decode(true);
+                self.set_path_separators_compress(true);
+                self.set_u_encoding_decode(true);
+                self.set_url_encoding_invalid_handling(HtpUrlEncodingHandling::PRESERVE_PERCENT);
+                self.set_u_encoding_unwanted(HtpUnwanted::Code400);
+                self.set_control_chars_unwanted(HtpUnwanted::Code400);
+                self.set_requestline_leading_whitespace_unwanted(HtpUnwanted::Ignore);
+            }
+            HtpServerPersonality::IIS_7_0 | HtpServerPersonality::IIS_7_5 => {
+                self.set_backslash_convert_slashes(true);
+                self.set_path_separators_decode(true);
+                self.set_path_separators_compress(true);
+                self.set_u_encoding_decode(true);
+                self.set_url_encoding_invalid_handling(HtpUrlEncodingHandling::PRESERVE_PERCENT);
+                self.set_url_encoding_invalid_unwanted(HtpUnwanted::Code400);
+                self.set_control_chars_unwanted(HtpUnwanted::Code400);
+                self.set_requestline_leading_whitespace_unwanted(HtpUnwanted::Ignore);
+            }
+            _ => return Err(HtpStatus::ERROR),
+        }
+        // Remember the personality
+        self.server_personality = personality;
+        Ok(())
+    }
+
+    /// Sets the replacement character that will be used in the lossy best-fit
+    /// mapping from multi-byte to single-byte streams. The question mark character
+    /// is used as the default replacement byte.
+    pub(crate) fn set_bestfit_replacement_byte(&mut self, b: u8) {
+        self.decoder_cfg.bestfit_map.replacement_byte = b;
+    }
+
+    /// Configures how the server handles to invalid URL encoding.
+    pub(crate) fn set_url_encoding_invalid_handling(&mut self, handling: HtpUrlEncodingHandling) {
+        self.decoder_cfg.url_encoding_invalid_handling = handling;
+    }
+
+    /// Configures the handling of raw NUL bytes. If enabled, raw NUL terminates strings.
+    pub(crate) fn set_nul_raw_terminates(&mut self, enabled: bool) {
+        self.decoder_cfg.nul_raw_terminates = enabled;
+    }
+
+    /// Configures how the server reacts to encoded NUL bytes. Some servers will stop at
+    /// at NUL, while some will respond with 400 or 404. When the termination option is not
+    /// used, the NUL byte will remain in the path.
+    pub(crate) fn set_nul_encoded_terminates(&mut self, enabled: bool) {
+        self.decoder_cfg.nul_encoded_terminates = enabled;
+    }
+
+    /// Configures whether %u-encoded sequences are decoded. Such sequences
+    /// will be treated as invalid URL encoding if decoding is not desirable.
+    pub(crate) fn set_u_encoding_decode(&mut self, enabled: bool) {
+        self.decoder_cfg.u_encoding_decode = enabled;
+    }
+
+    /// Configures whether backslash characters are treated as path segment separators. They
+    /// are not on Unix systems, but are on Windows systems. If this setting is enabled, a path
+    /// such as "/one\two/three" will be converted to "/one/two/three".
+    pub(crate) fn set_backslash_convert_slashes(&mut self, enabled: bool) {
+        self.decoder_cfg.backslash_convert_slashes = enabled;
+    }
+
+    /// Configures whether encoded path segment separators will be decoded. Apache does not do
+    /// this by default, but IIS does. If enabled, a path such as "/one%2ftwo" will be normalized
+    /// to "/one/two". If the backslash_separators option is also enabled, encoded backslash
+    /// characters will be converted too (and subsequently normalized to forward slashes).
+    pub(crate) fn set_path_separators_decode(&mut self, enabled: bool) {
+        self.decoder_cfg.path_separators_decode = enabled;
+    }
+
+    /// Configures whether consecutive path segment separators will be compressed. When enabled, a path
+    /// such as "/one//two" will be normalized to "/one/two". Backslash conversion and path segment separator
+    /// decoding are carried out before compression. For example, the path "/one\\/two\/%5cthree/%2f//four"
+    /// will be converted to "/one/two/three/four" (assuming all 3 options are enabled).
+    pub(crate) fn set_path_separators_compress(&mut self, enabled: bool) {
+        self.decoder_cfg.path_separators_compress = enabled;
+    }
+
+    /// Configures whether plus characters are converted to spaces when decoding URL-encoded strings. This
+    /// is appropriate to do for parameters, but not for URLs. Only applies to contexts where decoding
+    /// is taking place.
+    pub(crate) fn set_plusspace_decode(&mut self, enabled: bool) {
+        self.decoder_cfg.plusspace_decode = enabled;
+    }
+
+    /// Configures whether input data will be converted to lowercase. Useful for handling servers with
+    /// case-insensitive filesystems.
+    pub(crate) fn set_convert_lowercase(&mut self, enabled: bool) {
+        self.decoder_cfg.convert_lowercase = enabled;
+    }
+
+    /// Controls whether the data should be treated as UTF-8 and converted to a single-byte
+    /// stream using best-fit mapping.
+    pub(crate) fn set_utf8_convert_bestfit(&mut self, enabled: bool) {
+        self.decoder_cfg.utf8_convert_bestfit = enabled;
+    }
+
+    /// Configures reaction to %u-encoded sequences in input data.
+    pub(crate) fn set_u_encoding_unwanted(&mut self, unwanted: HtpUnwanted) {
+        self.decoder_cfg.u_encoding_unwanted = unwanted;
+    }
+
+    /// Controls reaction to raw control characters in the data.
+    pub(crate) fn set_control_chars_unwanted(&mut self, unwanted: HtpUnwanted) {
+        self.decoder_cfg.control_chars_unwanted = unwanted;
+    }
+
+    /// Controls whether to use complete or partial URI normalization
+    pub(crate) fn set_normalized_uri_include_all(&mut self, set: bool) {
+        self.decoder_cfg.normalized_uri_include_all = set;
+    }
+
+    /// Configures how the server reacts to invalid URL encoding.
+    pub(crate) fn set_url_encoding_invalid_unwanted(&mut self, unwanted: HtpUnwanted) {
+        self.decoder_cfg.url_encoding_invalid_unwanted = unwanted;
+    }
+
+    /// Configures how the server reacts to leading whitespace on the request line.
+    pub(crate) fn set_requestline_leading_whitespace_unwanted(&mut self, unwanted: HtpUnwanted) {
+        self.requestline_leading_whitespace_unwanted = unwanted;
+    }
+
+    /// Configures whether request data is decompressed.
+    pub(crate) fn set_request_decompression(&mut self, set: bool) {
+        self.request_decompression_enabled = set;
+    }
+
+    /// Configures many layers of compression we try to decompress.
+    pub(crate) fn set_decompression_layer_limit(&mut self, limit: Option<u32>) {
+        self.compression_options.set_layer_limit(limit);
+    }
+}
diff --git a/rust/htp/src/connection.rs b/rust/htp/src/connection.rs
new file mode 100644 (file)
index 0000000..35aded4
--- /dev/null
@@ -0,0 +1,128 @@
+use crate::log::Log;
+use std::{cell::RefCell, collections::VecDeque, net::IpAddr, rc::Rc, time::SystemTime};
+use time::OffsetDateTime;
+
+/// Export Connection ConnectionFlags
+#[repr(C)]
+pub(crate) struct ConnectionFlags;
+
+/// `Connection` Flags
+impl ConnectionFlags {
+    /// Seen pipelined requests.
+    pub(crate) const PIPELINED: u8 = 0x01;
+    /// Seen extra data after a HTTP 0.9 communication.
+    pub(crate) const HTTP_0_9_EXTRA: u8 = 0x02;
+}
+
+/// Stores information about the session.
+pub struct Connection {
+    /// Client IP address.
+    pub(crate) client_addr: Option<IpAddr>,
+    /// Client port.
+    pub(crate) client_port: Option<u16>,
+    /// Server IP address.
+    pub(crate) server_addr: Option<IpAddr>,
+    /// Server port.
+    pub(crate) server_port: Option<u16>,
+
+    /// Messages channel associated with this connection.
+    log_channel: Rc<RefCell<VecDeque<Log>>>,
+
+    /// Parsing flags.
+    pub(crate) flags: u8,
+    /// When was this connection opened?
+    pub(crate) open_timestamp: OffsetDateTime,
+    /// When was this connection closed?
+    pub(crate) close_timestamp: OffsetDateTime,
+    /// Inbound data counter.
+    pub(crate) request_data_counter: u64,
+    /// Outbound data counter.
+    pub(crate) response_data_counter: u64,
+}
+
+impl Default for Connection {
+    /// Returns a new Connection instance with default values.
+    fn default() -> Self {
+        Self {
+            client_addr: None,
+            client_port: None,
+            server_addr: None,
+            server_port: None,
+            log_channel: Rc::new(RefCell::new(VecDeque::new())),
+            flags: 0,
+            open_timestamp: OffsetDateTime::from(SystemTime::now()),
+            close_timestamp: OffsetDateTime::from(SystemTime::now()),
+            request_data_counter: 0,
+            response_data_counter: 0,
+        }
+    }
+}
+
+impl Connection {
+    /// Opens a connection. This function will essentially only store the provided data
+    /// for future reference.
+    pub(crate) fn open(
+        &mut self, client_addr: Option<IpAddr>, client_port: Option<u16>,
+        server_addr: Option<IpAddr>, server_port: Option<u16>, timestamp: Option<OffsetDateTime>,
+    ) {
+        self.client_addr = client_addr;
+        self.client_port = client_port;
+        self.server_addr = server_addr;
+        self.server_port = server_port;
+
+        // Remember when the connection was opened.
+        if let Some(timestamp) = timestamp {
+            self.open_timestamp = timestamp;
+        }
+    }
+
+    /// Closes the connection.
+    pub(crate) fn close(&mut self, timestamp: Option<OffsetDateTime>) {
+        // Update timestamp.
+        if let Some(timestamp) = timestamp {
+            self.close_timestamp = timestamp;
+        }
+    }
+
+    /// Keeps track of inbound packets and data.
+    pub(crate) fn track_inbound_data(&mut self, len: usize) {
+        self.request_data_counter = (self.request_data_counter).wrapping_add(len as u64);
+    }
+
+    /// Keeps track of outbound packets and data.
+    pub(crate) fn track_outbound_data(&mut self, len: usize) {
+        self.response_data_counter = (self.response_data_counter).wrapping_add(len as u64);
+    }
+
+    /// Return the log channel sender
+    pub(crate) fn get_sender(&self) -> &Rc<RefCell<VecDeque<Log>>> {
+        &self.log_channel
+    }
+
+    /// Drains and returns a vector of all current logs received by the log channel
+    #[cfg(test)]
+    pub(crate) fn get_logs(&self) -> Vec<Log> {
+        let mut lc = self.log_channel.borrow_mut();
+        let mut r = Vec::with_capacity(lc.len());
+        while let Some(e) = lc.pop_front() {
+            r.push(e)
+        }
+        r
+    }
+
+    /// Returns the next logged message received by the log channel
+    pub(crate) fn get_next_log(&self) -> Option<Log> {
+        let mut lc = self.log_channel.borrow_mut();
+        lc.pop_front()
+    }
+}
+
+impl PartialEq for Connection {
+    /// Returns true if connections are the same, false otherwise.
+    fn eq(&self, rhs: &Self) -> bool {
+        self.client_addr == rhs.client_addr
+            && self.client_port == rhs.client_port
+            && self.server_addr == rhs.server_addr
+            && self.server_port == rhs.server_port
+    }
+}
diff --git a/rust/htp/src/connection_parser.rs b/rust/htp/src/connection_parser.rs
new file mode 100644 (file)
index 0000000..90c172c
--- /dev/null
@@ -0,0 +1,946 @@
+use crate::{
+    bstr::Bstr,
+    config::Config,
+    connection::{Connection, ConnectionFlags},
+    decompressors::HtpContentEncoding,
+    error::Result,
+    hook::DataHook,
+    log::Logger,
+    transaction::{HtpRequestProgress, HtpResponseProgress, HtpTransferCoding, Transaction},
+    transactions::Transactions,
+    util::{FlagOperations, HtpFlags},
+    HtpStatus,
+};
+use std::{any::Any, borrow::Cow, cell::Cell, net::IpAddr, time::SystemTime};
+use time::OffsetDateTime;
+
+/// Enumerates parsing state.
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+pub(crate) enum State {
+    /// Default state.
+    None,
+    /// State once a transaction is processed or about to be processed.
+    Idle,
+    /// State for request/response line parsing.
+    Line,
+    /// State for header parsing.
+    Headers,
+    /// State for finalizing chunked body data parsing.
+    BodyChunkedDataEnd,
+    /// State for chunked body data.
+    BodyChunkedData,
+    /// Parse the chunked length state.
+    BodyChunkedLength,
+    /// State to determine encoding of body data.
+    BodyDetermine,
+    /// State for finalizing transaction side.
+    Finalize,
+    // Used by request_state only
+    /// State for determining the request protocol.
+    Protocol,
+    /// State to determine if there is a CONNECT request.
+    ConnectCheck,
+    /// State to determine if inbound parsing needs to be suspended.
+    ConnectProbeData,
+    /// State to determine if inbound parsing can continue if it was suspended.
+    ConnectWaitResponse,
+    /// State to process request body data.
+    BodyIdentity,
+    /// State to consume remaining data in request buffer for the HTTP 0.9 case.
+    IgnoreDataAfterHTTP09,
+    // Used by response_state only
+    /// State to consume response remaining body data when content-length is unknown.
+    BodyIdentityStreamClose,
+    /// State to consume response body data when content-length is known.
+    BodyIdentityCLKnown,
+}
+
+/// Enumerates all stream states. Each connection has two streams, one
+/// inbound and one outbound. Their states are tracked separately.
+#[repr(C)]
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+pub enum HtpStreamState {
+    /// Default stream state.
+    NEW,
+    /// State when connection is open.
+    OPEN,
+    /// State when connection is closed.
+    CLOSED,
+    /// State when stream produces a fatal error.
+    ERROR,
+    /// State for a tunnelled stream.
+    TUNNEL,
+    /// State when parsing is suspended and not consumed in order. This is to
+    /// allow processing on another stream.
+    DATA_OTHER,
+    /// State when we should stop parsing the associated connection.
+    STOP,
+    /// State when all current data in the stream has been processed.
+    DATA,
+}
+
+#[derive(Debug, Default, Clone)]
+/// This structure is used to pass data (for example
+/// request and response body buffers or gaps) to parsers.
+pub(crate) struct ParserData<'a> {
+    /// Ref to the data buffer.
+    data: Option<Cow<'a, [u8]>>,
+    // Length of data gap. Only set if is a gap.
+    gap_len: Option<usize>,
+    // Current position offset of the data to parse
+    position: Cell<usize>,
+    // Current callback data position
+    callback_position: usize,
+}
+
+impl ParserData<'_> {
+    /// Returns a pointer to the raw data associated with Data.
+    /// This returns a pointer to the entire data chunk.
+    pub(crate) fn data_ptr(&self) -> *const u8 {
+        self.data()
+            .as_ref()
+            .map(|data| data.as_ptr())
+            .unwrap_or(std::ptr::null())
+    }
+
+    /// Returns the unconsumed data
+    pub(crate) fn data(&self) -> Option<&[u8]> {
+        let data = self.data.as_ref()?;
+        if self.position.get() <= data.len() {
+            Some(&data[self.position.get()..])
+        } else {
+            None
+        }
+    }
+
+    /// Returns the length of the unconsumed data.
+    pub(crate) fn len(&self) -> usize {
+        if let Some(gap_len) = self.gap_len {
+            if self.position.get() >= gap_len {
+                0
+            } else {
+                gap_len - self.position.get()
+            }
+        } else {
+            self.as_slice().len()
+        }
+    }
+
+    /// Returns how much data has been consumed so far
+    fn consumed_len(&self) -> usize {
+        self.position.get()
+    }
+
+    /// Return an immutable slice view of the unconsumed data.
+    pub(crate) fn as_slice(&self) -> &[u8] {
+        if let Some(data) = self.data.as_ref() {
+            if self.position.get() <= data.len() {
+                return &data[self.position.get()..];
+            }
+        }
+        b""
+    }
+
+    /// Determines if this chunk is a gap or not
+    pub(crate) fn is_gap(&self) -> bool {
+        self.gap_len.is_some()
+    }
+
+    /// Determine whether there is no more data to consume.
+    pub(crate) fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+
+    /// Set the position offset into the data for parsing
+    fn set_position(&self, position: usize) {
+        self.position.set(position);
+    }
+
+    /// Advances the internal position where we are parsing
+    pub(crate) fn consume(&self, consumed: usize) {
+        self.set_position(self.position.get() + consumed);
+    }
+
+    /// Decrements the internal position where we are parsing
+    fn unconsume(&self, unconsume: usize) {
+        if unconsume < self.position.get() {
+            self.set_position(self.position.get() - unconsume);
+        } else {
+            self.set_position(0);
+        }
+    }
+
+    /// Make an owned version of this data.
+    #[cfg(test)]
+    pub(crate) fn into_owned(self) -> ParserData<'static> {
+        ParserData {
+            data: self.data.map(|d| Cow::Owned(d.into_owned())),
+            gap_len: self.gap_len,
+            position: self.position,
+            callback_position: self.callback_position,
+        }
+    }
+
+    /// Callback data is raw data buffer content that is passed to the
+    /// application via the header and trailer data hooks.
+    ///
+    /// This function will return any data that has been consumed but not
+    /// yet returned from this function.
+    pub(crate) fn callback_data(&mut self) -> &[u8] {
+        if let Some(data) = self.data.as_ref() {
+            if self.position.get() <= data.len() && self.callback_position <= self.position.get() {
+                let d = &data[self.callback_position..self.position.get()];
+                self.callback_position = self.position.get();
+                return d;
+            }
+        }
+        b""
+    }
+
+    /// Sets the callback start location to the current parsing location
+    pub(crate) fn reset_callback_start(&mut self) {
+        self.callback_position = self.position.get();
+    }
+}
+
+impl<'a> From<Option<&'a [u8]>> for ParserData<'a> {
+    fn from(data: Option<&'a [u8]>) -> Self {
+        ParserData {
+            data: data.map(Cow::Borrowed),
+            gap_len: None,
+            position: Cell::new(0),
+            callback_position: 0,
+        }
+    }
+}
+
+impl<'a> From<&'a [u8]> for ParserData<'a> {
+    fn from(data: &'a [u8]) -> Self {
+        ParserData {
+            data: Some(Cow::Borrowed(data)),
+            gap_len: None,
+            position: Cell::new(0),
+            callback_position: 0,
+        }
+    }
+}
+
+impl From<Vec<u8>> for ParserData<'static> {
+    fn from(data: Vec<u8>) -> Self {
+        ParserData {
+            data: Some(Cow::Owned(data)),
+            gap_len: None,
+            position: Cell::new(0),
+            callback_position: 0,
+        }
+    }
+}
+
+impl<'a> From<&'a Vec<u8>> for ParserData<'a> {
+    fn from(data: &'a Vec<u8>) -> Self {
+        ParserData {
+            data: Some(Cow::Borrowed(data.as_slice())),
+            gap_len: None,
+            position: Cell::new(0),
+            callback_position: 0,
+        }
+    }
+}
+
+impl From<usize> for ParserData<'_> {
+    fn from(gap_len: usize) -> Self {
+        ParserData {
+            data: None,
+            gap_len: Some(gap_len),
+            position: Cell::new(0),
+            callback_position: 0,
+        }
+    }
+}
+
+impl From<(*const u8, usize)> for ParserData<'_> {
+    fn from((data, len): (*const u8, usize)) -> Self {
+        if data.is_null() {
+            if len > 0 {
+                ParserData::from(len)
+            } else {
+                ParserData::from(b"".as_ref())
+            }
+        } else {
+            unsafe { ParserData::from(std::slice::from_raw_parts(data, len)) }
+        }
+    }
+}
+
+/// Stores information about the parsing process and associated transactions.
+pub struct ConnectionParser {
+    // General fields
+    /// The logger structure associated with this parser
+    pub(crate) logger: Logger,
+    /// A reference to the current parser configuration structure.
+    pub(crate) cfg: &'static Config,
+    /// The connection structure associated with this parser.
+    pub(crate) conn: Connection,
+    /// Opaque user data associated with this parser.
+    pub(crate) user_data: Option<Box<dyn Any>>,
+    // Request parser fields
+    /// Parser inbound status. Starts as OK, but may turn into ERROR.
+    pub(crate) request_status: HtpStreamState,
+    /// Parser outbound status. Starts as OK, but may turn into ERROR.
+    pub(crate) response_status: HtpStreamState,
+    /// When true, this field indicates that there is unprocessed inbound data, and
+    /// that the response parsing code should stop at the end of the current request
+    /// in order to allow more requests to be produced.
+    pub(crate) response_data_other_at_tx_end: bool,
+    /// The time when the last request data chunk was received.
+    pub(crate) request_timestamp: OffsetDateTime,
+    /// How many bytes from the last input chunk have we consumed
+    /// This is mostly used from callbacks, where the caller
+    /// wants to know how far into the last chunk the parser is.
+    pub(crate) request_bytes_consumed: usize,
+    /// How many data chunks does the inbound connection stream consist of?
+    pub(crate) request_chunk_count: usize,
+    /// The index of the first chunk used in the current request.
+    pub(crate) request_chunk_request_index: usize,
+    /// Used to buffer a line of inbound data when buffering cannot be avoided.
+    pub(crate) request_buf: Bstr,
+    /// Stores the current value of a folded request header. Such headers span
+    /// multiple lines, and are processed only when all data is available.
+    pub(crate) request_header: Option<Bstr>,
+    /// The request body length declared in a valid request header. The key here
+    /// is "valid". This field will not be populated if the request contains both
+    /// a Transfer-Encoding header and a Content-Length header.
+    pub(crate) request_content_length: Option<u64>,
+    /// Holds the remaining request body length that we expect to read. This
+    /// field will be available only when the length of a request body is known
+    /// in advance, i.e. when request headers contain a Content-Length header.
+    pub(crate) request_body_data_left: Option<u64>,
+    /// Holds the amount of data that needs to be read from the
+    /// current data chunk. Only used with chunked request bodies.
+    pub(crate) request_chunked_length: Option<u64>,
+    /// Current request parser state.
+    pub(crate) request_state: State,
+    /// Previous request parser state. Used to detect state changes.
+    pub(crate) request_state_previous: State,
+    /// The hook that should be receiving raw connection data.
+    pub(crate) request_data_receiver_hook: Option<DataHook>,
+
+    // Response parser fields
+    /// The time when the last response data chunk was received.
+    pub(crate) response_timestamp: OffsetDateTime,
+    /// How many bytes from the last input chunk have we consumed
+    /// This is mostly used from callbacks, where the caller
+    /// wants to know how far into the last chunk the parser is.
+    pub(crate) response_bytes_consumed: usize,
+    /// Used to buffer a line of outbound data when buffering cannot be avoided.
+    pub(crate) response_buf: Bstr,
+    /// Stores the current value of a folded response header. Such headers span
+    /// multiple lines, and are processed only when all data is available.
+    pub(crate) response_header: Option<Bstr>,
+    /// The length of the current response body as presented in the
+    /// Content-Length response header.
+    pub(crate) response_content_length: Option<u64>,
+    /// The remaining length of the current response body, if known. Set to None otherwise.
+    pub(crate) response_body_data_left: Option<u64>,
+    /// Holds the amount of data that needs to be read from the
+    /// current response data chunk. Only used with chunked response bodies.
+    pub(crate) response_chunked_length: Option<u64>,
+    /// Current response parser state.
+    pub(crate) response_state: State,
+    /// Previous response parser state.
+    pub(crate) response_state_previous: State,
+    /// The hook that should be receiving raw connection data.
+    pub(crate) response_data_receiver_hook: Option<DataHook>,
+
+    /// Transactions processed by this parser
+    transactions: Transactions,
+}
+
+impl std::fmt::Debug for ConnectionParser {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        f.debug_struct("ConnectionParser")
+            .field("request_status", &self.request_status)
+            .field("response_status", &self.response_status)
+            .field("request_index", &self.request_index())
+            .field("response_index", &self.response_index())
+            .finish()
+    }
+}
+
+impl ConnectionParser {
+    /// Creates a new ConnectionParser with a preconfigured `Config` struct.
+    pub(crate) fn new(cfg: &'static Config) -> Self {
+        let conn = Connection::default();
+        let logger = Logger::new(conn.get_sender());
+        Self {
+            logger: logger.clone(),
+            cfg,
+            conn,
+            user_data: None,
+            request_status: HtpStreamState::NEW,
+            response_status: HtpStreamState::NEW,
+            response_data_other_at_tx_end: false,
+            request_timestamp: OffsetDateTime::from(SystemTime::now()),
+            request_bytes_consumed: 0,
+            request_chunk_count: 0,
+            request_chunk_request_index: 0,
+            request_buf: Bstr::new(),
+            request_header: None,
+            request_content_length: None,
+            request_body_data_left: None,
+            request_chunked_length: None,
+            request_state: State::Idle,
+            request_state_previous: State::None,
+            request_data_receiver_hook: None,
+            response_timestamp: OffsetDateTime::from(SystemTime::now()),
+            response_bytes_consumed: 0,
+            response_buf: Bstr::new(),
+            response_header: None,
+            response_content_length: None,
+            response_body_data_left: None,
+            response_chunked_length: None,
+            response_state: State::Idle,
+            response_state_previous: State::None,
+            response_data_receiver_hook: None,
+            transactions: Transactions::new(cfg, &logger),
+        }
+    }
+
+    /// Get the current request transaction
+    pub(crate) fn request(&mut self) -> Option<&Transaction> {
+        self.transactions.request()
+    }
+
+    /// Get the current request transaction
+    pub(crate) fn request_mut(&mut self) -> Option<&mut Transaction> {
+        self.transactions.request_mut()
+    }
+
+    /// Get the current response transaction
+    pub(crate) fn response(&mut self) -> Option<&Transaction> {
+        self.transactions.response()
+    }
+
+    /// Get the current response transaction
+    pub(crate) fn response_mut(&mut self) -> Option<&mut Transaction> {
+        self.transactions.response_mut()
+    }
+
+    /// Advance to the next request
+    /// Returns the next request transaction id
+    pub(crate) fn request_next(&mut self) -> usize {
+        // Detect pipelining.
+        if self.transactions.request_index() > self.transactions.response_index() {
+            self.conn.flags.set(ConnectionFlags::PIPELINED)
+        }
+        self.transactions.request_next()
+    }
+
+    /// Advance to the next response
+    /// Returns the next response transaction id
+    pub(crate) fn response_next(&mut self) -> usize {
+        self.transactions.response_next()
+    }
+
+    /// Get the index of the request transaction
+    pub(crate) fn request_index(&self) -> usize {
+        self.transactions.request_index()
+    }
+
+    /// Get the index of the response transaction
+    pub(crate) fn response_index(&self) -> usize {
+        self.transactions.response_index()
+    }
+
+    /// Get the number of transactions processed up to now
+    pub(crate) fn tx_size(&self) -> usize {
+        self.transactions.size()
+    }
+
+    /// Get a specific transaction
+    pub(crate) fn tx(&self, index: usize) -> Option<&Transaction> {
+        self.transactions.get(index)
+    }
+
+    /// Get a specific transaction
+    pub(crate) fn tx_mut(&mut self, index: usize) -> Option<&mut Transaction> {
+        self.transactions.get_mut(index)
+    }
+
+    /// Handle the current state to be processed.
+    pub(crate) fn handle_request_state(&mut self, data: &mut ParserData) -> Result<()> {
+        match self.request_state {
+            State::None => Err(HtpStatus::ERROR),
+            State::Idle => self.request_idle(data),
+            State::IgnoreDataAfterHTTP09 => self.request_ignore_data_after_http_0_9(data),
+            State::Line => self.request_line(data),
+            State::Protocol => self.request_protocol(data),
+            State::Headers => self.request_headers(data),
+            State::ConnectWaitResponse => self.request_connect_wait_response(),
+            State::ConnectCheck => self.request_connect_check(),
+            State::ConnectProbeData => self.request_connect_probe_data(data),
+            State::BodyDetermine => self.request_body_determine(),
+            State::BodyChunkedData => self.request_body_chunked_data(data),
+            State::BodyChunkedLength => self.request_body_chunked_length(data),
+            State::BodyChunkedDataEnd => self.request_body_chunked_data_end(data),
+            State::BodyIdentity => self.request_body_identity(data),
+            State::Finalize => self.request_finalize(data),
+            // These are only used by response_state
+            _ => Err(HtpStatus::ERROR),
+        }
+    }
+
+    /// Handle the current state to be processed.
+    pub(crate) fn handle_response_state(&mut self, data: &mut ParserData) -> Result<()> {
+        match self.response_state {
+            State::None => Err(HtpStatus::ERROR),
+            State::Idle => self.response_idle(data),
+            State::Line => self.response_line(data),
+            State::Headers => self.response_headers(data),
+            State::BodyDetermine => self.response_body_determine(data),
+            State::BodyChunkedData => self.response_body_chunked_data(data),
+            State::BodyChunkedLength => self.response_body_chunked_length(data),
+            State::BodyChunkedDataEnd => self.response_body_chunked_data_end(data),
+            State::Finalize => self.response_finalize(data),
+            State::BodyIdentityStreamClose => self.response_body_identity_stream_close(data),
+            State::BodyIdentityCLKnown => self.response_body_identity_cl_known(data),
+            // These are only used by request_state
+            _ => Err(HtpStatus::ERROR),
+        }
+    }
+
+    /// Closes the connection associated with the supplied parser.
+    pub(crate) fn request_close(&mut self, timestamp: Option<OffsetDateTime>) {
+        // Update internal flags
+        if self.request_status != HtpStreamState::ERROR {
+            self.request_status = HtpStreamState::CLOSED
+        }
+        // Call the parsers one last time, which will allow them
+        // to process the events that depend on stream closure
+        self.request_data(ParserData::default(), timestamp);
+    }
+
+    /// Closes the connection associated with the supplied parser.
+    pub(crate) fn close(&mut self, timestamp: Option<OffsetDateTime>) {
+        // Close the underlying connection.
+        self.conn.close(timestamp);
+        // Update internal flags
+        if self.request_status != HtpStreamState::ERROR {
+            self.request_status = HtpStreamState::CLOSED
+        }
+        if self.response_status != HtpStreamState::ERROR {
+            self.response_status = HtpStreamState::CLOSED
+        }
+        // Call the parsers one last time, which will allow them
+        // to process the events that depend on stream closure
+        self.request_data(ParserData::default(), timestamp);
+        self.response_data(ParserData::default(), timestamp);
+    }
+
+    /// This function is most likely not used and/or not needed.
+    pub(crate) fn request_reset(&mut self) {
+        self.request_content_length = None;
+        self.request_body_data_left = None;
+        self.request_chunk_request_index = self.request_chunk_count;
+    }
+
+    /// Returns the number of bytes consumed from the current data chunks so far.
+    pub(crate) fn request_data_consumed(&self) -> usize {
+        self.request_bytes_consumed
+    }
+
+    /// Consume the given number of bytes from the ParserData and update
+    /// the internal counter for how many bytes consumed so far.
+    pub(crate) fn request_data_consume(&mut self, input: &ParserData, consumed: usize) {
+        input.consume(consumed);
+        self.request_bytes_consumed = input.consumed_len();
+    }
+
+    /// Unconsume the given number of bytes from the ParserData and update the
+    /// the internal counter for how many bytes are consumed.
+    /// If the requested number of bytes is larger than the number of bytes
+    /// already consumed then the parser will be unwound to the beginning.
+    pub(crate) fn request_data_unconsume(&mut self, input: &mut ParserData, unconsume: usize) {
+        input.unconsume(unconsume);
+        self.request_bytes_consumed = input.consumed_len();
+    }
+
+    /// Consume the given number of bytes from the ParserData and update
+    /// the internal counter for how many bytes consumed so far.
+    pub(crate) fn response_data_consume(&mut self, input: &ParserData, consumed: usize) {
+        input.consume(consumed);
+        self.response_bytes_consumed = input.consumed_len();
+    }
+
+    /// Unconsume the given number of bytes from the ParserData and update the
+    /// the internal counter for how many bytes are consumed.
+    /// If the requested number of bytes is larger than the number of bytes
+    /// already consumed then the parser will be unwound to the beginning.
+    pub(crate) fn response_data_unconsume(&mut self, input: &mut ParserData, unconsume: usize) {
+        input.unconsume(unconsume);
+        self.response_bytes_consumed = input.consumed_len();
+    }
+
+    /// Returns the number of bytes consumed from the most recent outbound data chunk. Normally, an invocation
+    /// of response_data() will consume all data from the supplied buffer, but there are circumstances
+    /// where only partial consumption is possible. In such cases DATA_OTHER will be returned.
+    /// Consumed bytes are no longer necessary, but the remainder of the buffer will be saved
+    /// for later.
+    pub(crate) fn response_data_consumed(&self) -> usize {
+        self.response_bytes_consumed
+    }
+
+    /// Opens connection.
+    pub(crate) fn open(
+        &mut self, client_addr: Option<IpAddr>, client_port: Option<u16>,
+        server_addr: Option<IpAddr>, server_port: Option<u16>, timestamp: Option<OffsetDateTime>,
+    ) {
+        // Check connection parser state first.
+        if self.request_status != HtpStreamState::NEW || self.response_status != HtpStreamState::NEW
+        {
+            htp_error!(
+                self.logger,
+                HtpLogCode::CONNECTION_ALREADY_OPEN,
+                "Connection is already open"
+            );
+            return;
+        }
+        self.conn.open(
+            client_addr,
+            client_port,
+            server_addr,
+            server_port,
+            timestamp,
+        );
+        self.request_status = HtpStreamState::OPEN;
+        self.response_status = HtpStreamState::OPEN;
+    }
+
+    /// Set the user data.
+    pub(crate) fn set_user_data(&mut self, data: Box<dyn Any + 'static>) {
+        self.user_data = Some(data);
+    }
+
+    /// Get a reference to the user data.
+    pub(crate) fn user_data<T: 'static>(&self) -> Option<&T> {
+        self.user_data
+            .as_ref()
+            .and_then(|ud| ud.downcast_ref::<T>())
+    }
+
+    /// Initialize request parsing, change state to LINE,
+    /// and invoke all registered callbacks.
+    ///
+    /// Returns HtpStatus::OK on success; HtpStatus::ERROR on error, HtpStatus::STOP if one of the
+    /// callbacks does not want to follow the transaction any more.
+    pub(crate) fn state_request_start(&mut self) -> Result<()> {
+        // Change state into request line parsing.
+        self.request_state = State::Line;
+        let req = self.request_mut();
+        if req.is_none() {
+            return Err(HtpStatus::ERROR);
+        }
+        req.unwrap().request_progress = HtpRequestProgress::LINE;
+        // Run hook REQUEST_START.
+        self.cfg
+            .hook_request_start
+            .clone()
+            .run_all(self, self.request_index())?;
+        Ok(())
+    }
+
+    /// Change transaction state to HEADERS and invoke all
+    /// registered callbacks.
+    ///
+    /// Returns HtpStatus::OK on success; HtpStatus::ERROR on error, HtpStatus::STOP if one of the
+    /// callbacks does not want to follow the transaction any more.
+    pub(crate) fn state_request_headers(&mut self, input: &mut ParserData) -> Result<()> {
+        // Finalize sending raw header data
+        self.request_receiver_finalize_clear(input)?;
+        // If we're in HTP_REQ_HEADERS that means that this is the
+        // first time we're processing headers in a request. Otherwise,
+        // we're dealing with trailing headers.
+        let req = self.request();
+        if req.is_none() {
+            return Err(HtpStatus::ERROR);
+        }
+        let request_progress = req.unwrap().request_progress;
+        if request_progress > HtpRequestProgress::HEADERS {
+            // Request trailers.
+            // Run hook HTP_REQUEST_TRAILER.
+            self.cfg
+                .hook_request_trailer
+                .clone()
+                .run_all(self, self.request_index())?;
+            // Completed parsing this request; finalize it now.
+            self.request_state = State::Finalize;
+        } else if request_progress >= HtpRequestProgress::LINE {
+            // Request headers.
+            // Did this request arrive in multiple data chunks?
+            let req = self.transactions.request_mut().unwrap();
+            if self.request_chunk_count != self.request_chunk_request_index {
+                req.flags.set(HtpFlags::MULTI_PACKET_HEAD)
+            }
+            req.process_request_headers()?;
+            // Run hook REQUEST_HEADERS.
+            #[cfg(test)]
+            self.cfg
+                .hook_request_headers
+                .clone()
+                .run_all(self, self.request_index())?;
+            self.request_initialize_decompressors()?;
+
+            // We still proceed if the request is invalid.
+            self.request_state = State::ConnectCheck;
+        } else {
+            htp_warn!(
+                self.logger,
+                HtpLogCode::RESPONSE_BODY_INTERNAL_ERROR,
+                format!(
+                    "[Internal Error] Invalid tx progress: {:?}",
+                    request_progress
+                )
+            );
+            return Err(HtpStatus::ERROR);
+        }
+        Ok(())
+    }
+
+    /// Change transaction state to PROTOCOL and invoke all
+    /// registered callbacks.
+    ///
+    /// Returns HtpStatus::OK on success; HtpStatus::ERROR on error, HtpStatus::STOP if one of the
+    /// callbacks does not want to follow the transaction any more.
+    pub(crate) fn state_request_line(&mut self) -> Result<()> {
+        let req = self.request_mut();
+        if req.is_none() {
+            return Err(HtpStatus::ERROR);
+        }
+        req.unwrap().parse_request_line()?;
+        // Run hook REQUEST_LINE.
+        self.cfg
+            .hook_request_line
+            .clone()
+            .run_all(self, self.request_index())?;
+        let logger = self.logger.clone();
+        let req = self.request_mut().unwrap();
+        if let Some(parsed_uri) = req.parsed_uri.as_mut() {
+            let (partial_normalized_uri, complete_normalized_uri) =
+                parsed_uri.generate_normalized_uri(Some(logger));
+            req.partial_normalized_uri = partial_normalized_uri;
+            req.complete_normalized_uri = complete_normalized_uri;
+        }
+        // Move on to the next phase.
+        self.request_state = State::Protocol;
+        Ok(())
+    }
+
+    /// Advance state after processing request headers.
+    ///
+    /// Returns HtpStatus::OK on success; HtpStatus::ERROR on error, HtpStatus::STOP
+    /// if one of the callbacks does not want to follow the transaction any more.
+    pub(crate) fn state_request_complete(&mut self, input: &mut ParserData) -> Result<()> {
+        let req = self.request_mut();
+        if req.is_none() {
+            return Err(HtpStatus::ERROR);
+        }
+        let req = req.unwrap();
+        if req.request_progress != HtpRequestProgress::COMPLETE {
+            // Finalize request body.
+            if req.request_has_body() {
+                self.request_body_data(None)?;
+            }
+            self.request_mut().unwrap().request_progress = HtpRequestProgress::COMPLETE;
+            // Run hook REQUEST_COMPLETE.
+            self.cfg
+                .hook_request_complete
+                .clone()
+                .run_all(self, self.request_index())?;
+
+            // Clear request data
+            self.request_receiver_finalize_clear(input)?;
+        }
+        // Determine what happens next, and remove this transaction from the parser.
+        self.request_state = if self.request().unwrap().is_protocol_0_9 {
+            State::IgnoreDataAfterHTTP09
+        } else {
+            State::Idle
+        };
+        // Check if the entire transaction is complete.
+        self.finalize(self.request_index())?;
+        self.request_next();
+        Ok(())
+    }
+
+    /// Determine if the transaction is complete and run any hooks.
+    fn finalize(&mut self, _tx_index: usize) -> Result<()> {
+        #[cfg(test)]
+        if let Some(tx) = self.tx(_tx_index) {
+            if !tx.is_complete() {
+                return Ok(());
+            }
+            // Disconnect transaction from the parser.
+            // Run hook TRANSACTION_COMPLETE.
+            self.cfg
+                .hook_transaction_complete
+                .clone()
+                .run_all(self, _tx_index)?;
+        }
+        Ok(())
+    }
+
+    /// Advance state to LINE, or BODY if http version is 0.9.
+    ///
+    /// Returns HtpStatus::OK on success; HtpStatus::ERROR on error, HtpStatus::STOP
+    /// if one of the callbacks does not want to follow the transaction any more.
+    pub(crate) fn state_response_start(&mut self) -> Result<()> {
+        // Change state into response line parsing, except if we're following
+        // a HTTP/0.9 request (no status line or response headers).
+        let tx = self.response_mut();
+        if tx.is_none() {
+            return Err(HtpStatus::ERROR);
+        }
+        let tx = tx.unwrap();
+
+        if tx.is_protocol_0_9 {
+            tx.response_transfer_coding = HtpTransferCoding::Identity;
+            tx.response_content_encoding_processing = HtpContentEncoding::None;
+            tx.response_progress = HtpResponseProgress::BODY;
+            self.response_state = State::BodyIdentityStreamClose;
+            self.response_body_data_left = None
+        } else {
+            tx.response_progress = HtpResponseProgress::LINE;
+            self.response_state = State::Line
+        }
+        // Run hook RESPONSE_START.
+        self.cfg
+            .hook_response_start
+            .clone()
+            .run_all(self, self.response_index())?;
+        // If at this point we have no method and no uri and our status
+        // is still REQ_LINE, we likely have timed out request
+        // or a overly long request
+        let tx = self.response_mut().unwrap();
+        if tx.request_method.is_none()
+            && tx.request_uri.is_none()
+            && self.request_state == State::Line
+        {
+            htp_warn!(
+                self.logger,
+                HtpLogCode::REQUEST_LINE_INCOMPLETE,
+                "Request line incomplete"
+            );
+        }
+        Ok(())
+    }
+
+    /// Advance state after processing response headers.
+    ///
+    /// Returns HtpStatus::OK on success; HtpStatus::ERROR on error, HtpStatus::STOP
+    /// if one of the callbacks does not want to follow the transaction any more.
+    pub(crate) fn state_response_headers(&mut self, input: &mut ParserData) -> Result<()> {
+        // Finalize sending raw header data.
+        self.response_receiver_finalize_clear(input)?;
+        // Run hook RESPONSE_HEADERS.
+        #[cfg(test)]
+        self.cfg
+            .hook_response_headers
+            .clone()
+            .run_all(self, self.response_index())?;
+        self.response_initialize_decompressors()
+    }
+
+    /// Change transaction state to RESPONSE_LINE and invoke registered callbacks.
+    ///
+    /// Returns HtpStatus::OK on success; HtpStatus::ERROR on error, HtpStatus::STOP
+    /// if one of the callbacks does not want to follow the transaction any more.
+    pub(crate) fn state_response_line(&mut self) -> Result<()> {
+        // Is the response line valid?
+        let tx = self.response_mut();
+        if tx.is_none() {
+            return Err(HtpStatus::ERROR);
+        }
+        let tx = tx.unwrap();
+
+        tx.validate_response_line();
+        #[cfg(test)]
+        let index = tx.index;
+        // Run hook HTP_RESPONSE_LINE
+        #[cfg(test)]
+        return self.cfg.hook_response_line.clone().run_all(self, index);
+        #[cfg(not(test))]
+        return Ok(());
+    }
+
+    /// Change transaction state to COMPLETE and invoke registered callbacks.
+    ///
+    /// Returns HtpStatus::OK on success; HtpStatus::ERROR on error, HtpStatus::STOP
+    /// if one of the callbacks does not want to follow the transaction any more.
+    pub(crate) fn state_response_complete(&mut self, input: &mut ParserData) -> Result<()> {
+        let response_index = self.response_index();
+        let tx = self.response_mut();
+        if tx.is_none() {
+            return Err(HtpStatus::ERROR);
+        }
+        let tx = tx.unwrap();
+        if tx.response_progress != HtpResponseProgress::COMPLETE {
+            tx.response_progress = HtpResponseProgress::COMPLETE;
+            // Run the last RESPONSE_BODY_DATA HOOK, but only if there was a response body present.
+            if tx.response_transfer_coding != HtpTransferCoding::NoBody {
+                let _ = self.response_body_data(None);
+            }
+            // Run hook RESPONSE_COMPLETE.
+            self.cfg
+                .hook_response_complete
+                .clone()
+                .run_all(self, response_index)?;
+
+            // Clear the data receivers hook if any
+            self.response_receiver_finalize_clear(input)?;
+        }
+        // Check if we want to signal the caller to send request data
+        self.request_parser_check_waiting()?;
+        // Otherwise finalize the transaction
+        self.finalize(response_index)?;
+        self.response_next();
+        self.response_state = State::Idle;
+        Ok(())
+    }
+
+    /// Check if we had previously signalled the caller to give us response
+    /// data, and now we are ready to receive it
+    fn request_parser_check_waiting(&mut self) -> Result<()> {
+        // Check if the inbound parser is waiting on us. If it is, that means that
+        // there might be request data that the inbound parser hasn't consumed yet.
+        // If we don't stop parsing we might encounter a response without a request,
+        // which is why we want to return straight away before processing any data.
+        //
+        // This situation will occur any time the parser needs to see the server
+        // respond to a particular situation before it can decide how to proceed. For
+        // example, when a CONNECT is sent, different paths are used when it is accepted
+        // and when it is not accepted.
+        //
+        // It is not enough to check only in_status here. Because of pipelining, it's possible
+        // that many inbound transactions have been processed, and that the parser is
+        // waiting on a response that we have not seen yet.
+        if self.response_status == HtpStreamState::DATA_OTHER
+            && self.response_index() == self.request_index()
+        {
+            return Err(HtpStatus::DATA_OTHER);
+        }
+
+        // Do we have a signal to yield to inbound processing at
+        // the end of the next transaction?
+        if self.response_data_other_at_tx_end {
+            // We do. Let's yield then.
+            self.response_data_other_at_tx_end = false;
+            return Err(HtpStatus::DATA_OTHER);
+        }
+        Ok(())
+    }
+
+    /// Remove the given transaction from the parser
+    pub(crate) fn remove_tx(&mut self, tx_id: usize) {
+        self.transactions.remove(tx_id);
+    }
+}
diff --git a/rust/htp/src/decompressors.rs b/rust/htp/src/decompressors.rs
new file mode 100644 (file)
index 0000000..6fe55eb
--- /dev/null
@@ -0,0 +1,1032 @@
+use std::{
+    io::{Cursor, Write},
+    time::Instant,
+};
+
+/// Buffer compression output to this chunk size.
+const ENCODING_CHUNK_SIZE: usize = 8192;
+
+/// Default LZMA dictionary memory limit in bytes.
+const DEFAULT_LZMA_MEMLIMIT: usize = 1_048_576;
+/// Default number of LZMA layers to pass to the decompressor.
+const DEFAULT_LZMA_LAYERS: u32 = 1;
+/// Default max output size for a compression bomb in bytes (1 MB default).
+const DEFAULT_BOMB_LIMIT: u64 = 1_048_576;
+/// Default compressed-to-decrompressed ratio that should not be exceeded during decompression.
+const DEFAULT_BOMB_RATIO: u64 = 2048;
+/// Default time limit for a decompression bomb in microseconds.
+const DEFAULT_TIME_LIMIT: u32 = 100_000;
+/// Default number of iterations before checking the time limit.
+const DEFAULT_TIME_FREQ_TEST: u32 = 256;
+/// Default number of layers that will be decompressed
+const DEFAULT_LAYER_LIMIT: u32 = 2;
+
+#[derive(Copy, Clone)]
+/// Decompression options
+pub(crate) struct Options {
+    /// lzma options or None to disable lzma.
+    lzma: Option<lzma_rs::decompress::Options>,
+    /// Max number of LZMA layers to pass to the decompressor.
+    lzma_layers: Option<u32>,
+    /// max output size for a compression bomb.
+    bomb_limit: u64,
+    /// max compressed-to-decrompressed ratio that should not be exceeded during decompression.
+    bomb_ratio: u64,
+    /// max time for a decompression bomb in microseconds.
+    time_limit: u32,
+    /// number of iterations to before checking the time_limit.
+    time_test_freq: u32,
+    /// Max number of layers of compression we will decompress
+    layer_limit: Option<u32>,
+}
+
+impl Options {
+    /// Set the lzma memlimit.
+    ///
+    /// A value of 0 will disable lzma.
+    pub(crate) fn set_lzma_memlimit(&mut self, memlimit: usize) {
+        self.lzma = if memlimit == 0 {
+            None
+        } else {
+            Some(lzma_rs::decompress::Options {
+                memlimit: Some(memlimit),
+                ..Default::default()
+            })
+        }
+    }
+
+    /// Configures the maximum layers passed to lzma-rs.
+    pub(crate) fn set_lzma_layers(&mut self, layers: Option<u32>) {
+        self.lzma_layers = layers;
+    }
+
+    /// Gets the maximum layers passed to lzma-rs.
+    pub(crate) fn get_lzma_layers(&self) -> Option<u32> {
+        self.lzma_layers
+    }
+
+    /// Get the compression bomb limit.
+    pub(crate) fn get_bomb_limit(&self) -> u64 {
+        self.bomb_limit
+    }
+
+    /// Set the compression bomb limit.
+    pub(crate) fn set_bomb_limit(&mut self, bomblimit: u64) {
+        self.bomb_limit = bomblimit;
+    }
+
+    /// Get the bomb ratio.
+    pub(crate) fn get_bomb_ratio(&self) -> u64 {
+        self.bomb_ratio
+    }
+
+    /// Set the bomb ratio.
+    #[cfg(test)]
+    pub(crate) fn set_bomb_ratio(&mut self, bomb_ratio: u64) {
+        self.bomb_ratio = bomb_ratio;
+    }
+
+    /// Get the compression time limit in microseconds.
+    pub(crate) fn get_time_limit(&self) -> u32 {
+        self.time_limit
+    }
+
+    /// Set the compression time limit in microseconds.
+    pub(crate) fn set_time_limit(&mut self, time_limit: u32) {
+        self.time_limit = time_limit
+    }
+
+    /// Get the time test frequency.
+    pub(crate) fn get_time_test_freq(&self) -> u32 {
+        self.time_test_freq
+    }
+
+    /// Get the decompression layer limit.
+    pub(crate) fn get_layer_limit(&self) -> Option<u32> {
+        self.layer_limit
+    }
+
+    /// Set the decompression layer limit.
+    pub(crate) fn set_layer_limit(&mut self, layer_limit: Option<u32>) {
+        self.layer_limit = layer_limit;
+    }
+}
+
+impl Default for Options {
+    fn default() -> Self {
+        Self {
+            lzma: Some(lzma_rs::decompress::Options {
+                memlimit: Some(DEFAULT_LZMA_MEMLIMIT),
+                ..Default::default()
+            }),
+            lzma_layers: Some(DEFAULT_LZMA_LAYERS),
+            bomb_limit: DEFAULT_BOMB_LIMIT,
+            bomb_ratio: DEFAULT_BOMB_RATIO,
+            time_limit: DEFAULT_TIME_LIMIT,
+            time_test_freq: DEFAULT_TIME_FREQ_TEST,
+            layer_limit: Some(DEFAULT_LAYER_LIMIT),
+        }
+    }
+}
+
+/// Describes a decompressor that is able to restart and passthrough data.
+/// Actual decompression is done using the `Write` trait.
+pub(crate) trait Decompress: Write {
+    /// Restarts the decompressor to try the same one again or a different one.
+    fn restart(&mut self) -> std::io::Result<()>;
+
+    /// Tells all decompressors to passthrough their data instead of
+    /// decompressing to directly call the callback
+    fn set_passthrough(&mut self, passthrough: bool);
+
+    /// Indicates that we have reached the end of data. This would be equivalent
+    /// to sending a NULL pointer in C and may be used by the hooks.
+    fn finish(&mut self) -> std::io::Result<()>;
+}
+
+/// Type alias for callback function.
+pub(crate) type CallbackFn = Box<dyn FnMut(Option<&[u8]>) -> Result<usize, std::io::Error>>;
+
+/// Simple wrapper around a closure to chain it to the other decompressors
+pub(crate) struct CallbackWriter(CallbackFn);
+
+impl CallbackWriter {
+    /// Create a new CallbackWriter.
+    pub(crate) fn new(cbk: CallbackFn) -> Self {
+        CallbackWriter(cbk)
+    }
+}
+
+impl Write for CallbackWriter {
+    fn write(&mut self, data: &[u8]) -> std::result::Result<usize, std::io::Error> {
+        (self.0)(Some(data))
+    }
+
+    fn flush(&mut self) -> std::result::Result<(), std::io::Error> {
+        Ok(())
+    }
+}
+
+impl Decompress for CallbackWriter {
+    fn restart(&mut self) -> std::io::Result<()> {
+        Ok(())
+    }
+
+    fn set_passthrough(&mut self, _passthrough: bool) {}
+
+    fn finish(&mut self) -> std::io::Result<()> {
+        (self.0)(None)?;
+        Ok(())
+    }
+}
+
+/// Type of compression.
+#[repr(C)]
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+pub(crate) enum HtpContentEncoding {
+    /// No compression.
+    None,
+    /// Gzip compression.
+    Gzip,
+    /// Deflate compression (RFC 1951).
+    Deflate,
+    /// Deflate compression with zlib header (RFC 1950)
+    Zlib,
+    /// LZMA compression.
+    Lzma,
+}
+
+/// The outer decompressor tracks the number of callbacks and time spent
+/// decompressing.
+pub(crate) struct Decompressor {
+    /// First decompressor to call
+    inner: Box<dyn Decompress>,
+    /// Time we started decompression
+    time_before: Option<Instant>,
+    /// Time spent decompressing so far in microseconds (usec)
+    time_spent: u64,
+    /// Number of times the callback was called
+    nb_callbacks: u32,
+}
+
+impl Decompressor {
+    /// Creates a new decompressor from a struct implementing the Decompress trait.
+    fn new(inner: Box<dyn Decompress>) -> Self {
+        Self {
+            inner,
+            time_before: None,
+            time_spent: 0,
+            nb_callbacks: 0,
+        }
+    }
+
+    /// Creates a new decompressor from a callback to call when decompressed
+    /// data is ready.
+    fn callback(callback: CallbackFn) -> Self {
+        Self::new(Box::new(CallbackWriter::new(callback)))
+    }
+
+    /// Prepends a decompressor to this chain by consuming `self.inner`
+    /// and creating a new Decompressor.
+    ///
+    /// Note that decompressors should be added in the same order the data was
+    /// compressed, starting with the callback.
+    ///
+    pub(crate) fn prepend(
+        self, encoding: HtpContentEncoding, options: Options,
+    ) -> std::io::Result<Self> {
+        match encoding {
+            HtpContentEncoding::None => Ok(Decompressor::new(self.inner)),
+            HtpContentEncoding::Gzip
+            | HtpContentEncoding::Deflate
+            | HtpContentEncoding::Zlib
+            | HtpContentEncoding::Lzma => Ok(Decompressor::new(Box::new(InnerDecompressor::new(
+                encoding, self.inner, options,
+            )?))),
+        }
+    }
+
+    /// Creates a new decompressor with `encoding` and adds a callback to be called
+    /// when data is ready.
+    pub(crate) fn new_with_callback(
+        encoding: HtpContentEncoding, callback: CallbackFn, options: Options,
+    ) -> std::io::Result<Self> {
+        Self::callback(callback).prepend(encoding, options)
+    }
+
+    /// Starts the decompression timer.
+    fn timer_start(&mut self) {
+        self.time_before.replace(Instant::now());
+    }
+
+    /// Stops the decompression timer, updates and returns the time spent
+    /// decompressing in microseconds (usec).
+    pub(crate) fn timer_reset(&mut self) -> Option<u64> {
+        let now = Instant::now();
+        if let Some(time_before) = self.time_before.replace(now) {
+            // it is unlikely that more than 2^64 will be spent on a single stream
+            self.time_spent = self
+                .time_spent
+                .wrapping_add(now.duration_since(time_before).as_micros() as u64);
+            Some(self.time_spent)
+        } else {
+            None
+        }
+    }
+
+    /// Increments the number of times the callback was called.
+    pub(crate) fn callback_inc(&mut self) -> u32 {
+        self.nb_callbacks = self.nb_callbacks.wrapping_add(1);
+        self.nb_callbacks
+    }
+
+    /// Returns the time spent decompressing in microseconds (usec).
+    pub(crate) fn time_spent(&self) -> u64 {
+        self.time_spent
+    }
+
+    /// Decompress the input `data` by calling the chain of decompressors and
+    /// the data callback.
+    ///
+    /// This will reset the number of callbacks called and restart the
+    /// decompression timer.
+    pub(crate) fn decompress(&mut self, data: &[u8]) -> std::io::Result<()> {
+        self.nb_callbacks = 0;
+        self.timer_start();
+
+        let result = self.inner.write_all(data).and_then(|_| self.inner.flush());
+
+        self.timer_reset();
+        result
+    }
+
+    /// Notify decompressors that the end of stream as reached. This is equivalent
+    /// to sending a NULL data pointer.
+    pub(crate) fn finish(&mut self) -> std::io::Result<()> {
+        self.inner.finish()
+    }
+
+    /// Set this decompressor to passthrough
+    pub(crate) fn set_passthrough(&mut self, passthrough: bool) {
+        self.inner.set_passthrough(passthrough)
+    }
+}
+
+impl std::fmt::Debug for Decompressor {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        f.debug_struct("Decompressor")
+            .field("time_spent", &self.time_spent)
+            .field("nb_callbacks", &self.nb_callbacks)
+            .finish()
+    }
+}
+
+/// Trait that represents the decompression writers (gzip, deflate, etc.) and
+/// methods needed to write to a temporary buffer.
+pub(crate) trait BufWriter: Write {
+    /// Get a mutable reference to the buffer.
+    fn get_mut(&mut self) -> Option<&mut Cursor<Box<[u8]>>>;
+    /// Notify end of data.
+    fn finish(self: Box<Self>) -> std::io::Result<Cursor<Box<[u8]>>>;
+    /// Attempt to finish this output stream, writing out final chunks of data.
+    fn try_finish(&mut self) -> std::io::Result<()>;
+}
+
+/// A BufWriter that doesn't consume any data.
+///
+/// This should be used exclusively with passthrough mode.
+struct NullBufWriter(Cursor<Box<[u8]>>);
+
+impl Write for NullBufWriter {
+    fn write(&mut self, _: &[u8]) -> std::io::Result<usize> {
+        Ok(0)
+    }
+
+    fn flush(&mut self) -> std::io::Result<()> {
+        Ok(())
+    }
+}
+
+impl BufWriter for NullBufWriter {
+    fn get_mut(&mut self) -> Option<&mut Cursor<Box<[u8]>>> {
+        Some(&mut self.0)
+    }
+
+    fn finish(self: Box<Self>) -> std::io::Result<Cursor<Box<[u8]>>> {
+        Ok(self.0)
+    }
+
+    fn try_finish(&mut self) -> std::io::Result<()> {
+        Ok(())
+    }
+}
+
+#[derive(Debug, PartialEq)]
+enum GzState {
+    Start,
+    Xlen,
+    Extra,
+    Filename,
+    Comment,
+    Crc,
+    AfterHeader,
+}
+
+/// Wrapper around a gzip header parser and a deflate decoder.
+/// We parse the header separately because we want to be tolerant of
+/// checksum or other gzip errors that do not affect our ability
+/// to decompress the data stream but would cause 'correct' gzip decoders
+/// to fail. We want to be tolerant of gzip errors because browsers
+/// are apparently tolerant of gzip errors
+///
+/// https://noxxi.de/research/http-evader-explained-5-gzip.html
+struct GzipBufWriter {
+    buffer: Vec<u8>,
+    flags: u8,
+    xlen: u16,
+    inner: flate2::write::DeflateDecoder<Cursor<Box<[u8]>>>,
+    state: GzState,
+}
+
+impl GzipBufWriter {
+    fn new(buf: Cursor<Box<[u8]>>) -> Self {
+        GzipBufWriter {
+            buffer: Vec::with_capacity(10),
+            flags: 0,
+            xlen: 0,
+            inner: flate2::write::DeflateDecoder::new(buf),
+            state: GzState::Start,
+        }
+    }
+
+    fn parse_start(data: &[u8]) -> nom::IResult<&[u8], u8> {
+        use nom::bytes::streaming::tag;
+        use nom::number::streaming::{le_i32, le_u8};
+        use nom::sequence::tuple;
+
+        let (rest, (_, flags, _mtime, _xfl, _operating_system)) =
+            tuple((tag(b"\x1f\x8b\x08"), le_u8, le_i32, le_u8, le_u8))(data)?;
+        Ok((rest, flags))
+    }
+}
+
+impl Write for GzipBufWriter {
+    fn write(&mut self, data: &[u8]) -> std::io::Result<usize> {
+        use nom::bytes::streaming::{tag, take_until};
+        use nom::number::streaming::le_u16;
+        use nom::sequence::tuple;
+
+        const FHCRC: u8 = 1 << 1;
+        const FEXTRA: u8 = 1 << 2;
+        const FNAME: u8 = 1 << 3;
+        const FCOMMENT: u8 = 1 << 4;
+
+        let (mut parse, direct) = if !self.buffer.is_empty() && self.state == GzState::Start {
+            self.buffer.extend_from_slice(data);
+            (self.buffer.as_ref(), false)
+        } else {
+            (data, true)
+        };
+
+        loop {
+            match self.state {
+                GzState::Start => match GzipBufWriter::parse_start(parse) {
+                    Ok((rest, flags)) => {
+                        parse = rest;
+                        self.flags = flags;
+                        self.state = GzState::Xlen;
+                    }
+                    Err(nom::Err::Incomplete(_)) => {
+                        if direct {
+                            self.buffer.extend_from_slice(data);
+                        }
+                        return Ok(data.len());
+                    }
+                    Err(_) => {
+                        return Err(std::io::Error::new(
+                            std::io::ErrorKind::InvalidInput,
+                            "Could not parse gzip header",
+                        ));
+                    }
+                },
+                GzState::Xlen => {
+                    if self.flags & FEXTRA != 0 {
+                        match le_u16::<&[u8], nom::error::Error<&[u8]>>(parse) {
+                            Ok((rest, xlen)) => {
+                                parse = rest;
+                                self.xlen = xlen;
+                            }
+                            Err(nom::Err::Incomplete(_)) => {
+                                return Ok(data.len() - parse.len());
+                            }
+                            Err(_) => {
+                                return Err(std::io::Error::new(
+                                    std::io::ErrorKind::InvalidInput,
+                                    "Could not parse gzip header",
+                                )); // this one is unreachable
+                            }
+                        }
+                    }
+                    self.state = GzState::Extra;
+                }
+                GzState::Extra => {
+                    if self.xlen > 0 {
+                        if parse.len() < self.xlen as usize {
+                            self.xlen -= parse.len() as u16;
+                            return Ok(data.len());
+                        }
+                        parse = &parse[self.xlen as usize..];
+                    }
+                    self.state = GzState::Filename;
+                }
+                GzState::Filename => {
+                    if self.flags & FNAME != 0 {
+                        match tuple((
+                            take_until::<&[u8], &[u8], nom::error::Error<&[u8]>>(b"\0" as &[u8]),
+                            tag(b"\0"),
+                        ))(parse)
+                        {
+                            Ok((rest, _)) => {
+                                parse = rest;
+                            }
+                            Err(nom::Err::Incomplete(_)) => {
+                                return Ok(data.len());
+                            }
+                            Err(_) => {
+                                return Err(std::io::Error::new(
+                                    std::io::ErrorKind::InvalidInput,
+                                    "Could not parse gzip header",
+                                )); // this one is unreachable
+                            }
+                        }
+                    }
+                    self.state = GzState::Comment;
+                }
+                GzState::Comment => {
+                    if self.flags & FCOMMENT != 0 {
+                        match tuple((
+                            take_until::<&[u8], &[u8], nom::error::Error<&[u8]>>(b"\0" as &[u8]),
+                            tag(b"\0"),
+                        ))(parse)
+                        {
+                            Ok((rest, _)) => {
+                                parse = rest;
+                            }
+                            Err(nom::Err::Incomplete(_)) => {
+                                return Ok(data.len());
+                            }
+                            Err(_) => {
+                                return Err(std::io::Error::new(
+                                    std::io::ErrorKind::InvalidInput,
+                                    "Could not parse gzip header",
+                                )); // this one is unreachable
+                            }
+                        }
+                    }
+                    self.state = GzState::Crc;
+                }
+                GzState::Crc => {
+                    if self.flags & FHCRC != 0 {
+                        match le_u16::<&[u8], nom::error::Error<&[u8]>>(parse) {
+                            Ok((rest, _)) => {
+                                parse = rest;
+                            }
+                            Err(nom::Err::Incomplete(_)) => {
+                                return Ok(data.len() - parse.len());
+                            }
+                            Err(_) => {
+                                return Err(std::io::Error::new(
+                                    std::io::ErrorKind::InvalidInput,
+                                    "Could not parse gzip header",
+                                )); // this one is unreachable
+                            }
+                        }
+                    }
+                    self.state = GzState::AfterHeader;
+                    return Ok(data.len() - parse.len());
+                }
+                GzState::AfterHeader => {
+                    return self.inner.write(parse);
+                }
+            }
+        }
+    }
+
+    fn flush(&mut self) -> std::io::Result<()> {
+        self.inner.flush()
+    }
+}
+
+impl BufWriter for GzipBufWriter {
+    fn get_mut(&mut self) -> Option<&mut Cursor<Box<[u8]>>> {
+        Some(self.inner.get_mut())
+    }
+
+    fn finish(self: Box<Self>) -> std::io::Result<Cursor<Box<[u8]>>> {
+        self.inner.finish()
+    }
+
+    fn try_finish(&mut self) -> std::io::Result<()> {
+        self.inner.try_finish()
+    }
+}
+
+/// Simple wrapper around a deflate implementation
+struct DeflateBufWriter(flate2::write::DeflateDecoder<Cursor<Box<[u8]>>>);
+
+impl Write for DeflateBufWriter {
+    fn write(&mut self, data: &[u8]) -> std::io::Result<usize> {
+        self.0.write(data)
+    }
+
+    fn flush(&mut self) -> std::io::Result<()> {
+        self.0.flush()
+    }
+}
+
+impl BufWriter for DeflateBufWriter {
+    fn get_mut(&mut self) -> Option<&mut Cursor<Box<[u8]>>> {
+        Some(self.0.get_mut())
+    }
+
+    fn finish(self: Box<Self>) -> std::io::Result<Cursor<Box<[u8]>>> {
+        self.0.finish()
+    }
+
+    fn try_finish(&mut self) -> std::io::Result<()> {
+        self.0.try_finish()
+    }
+}
+
+/// Simple wrapper around a zlib implementation
+struct ZlibBufWriter(flate2::write::ZlibDecoder<Cursor<Box<[u8]>>>);
+
+impl Write for ZlibBufWriter {
+    fn write(&mut self, data: &[u8]) -> std::io::Result<usize> {
+        self.0.write(data)
+    }
+
+    fn flush(&mut self) -> std::io::Result<()> {
+        self.0.flush()
+    }
+}
+
+impl BufWriter for ZlibBufWriter {
+    fn get_mut(&mut self) -> Option<&mut Cursor<Box<[u8]>>> {
+        Some(self.0.get_mut())
+    }
+
+    fn finish(self: Box<Self>) -> std::io::Result<Cursor<Box<[u8]>>> {
+        self.0.finish()
+    }
+
+    fn try_finish(&mut self) -> std::io::Result<()> {
+        self.0.try_finish()
+    }
+}
+
+/// Simple wrapper around an lzma implementation
+struct LzmaBufWriter(lzma_rs::decompress::Stream<Cursor<Box<[u8]>>>);
+
+impl Write for LzmaBufWriter {
+    fn write(&mut self, data: &[u8]) -> std::io::Result<usize> {
+        self.0.write(data)
+    }
+
+    fn flush(&mut self) -> std::io::Result<()> {
+        self.0.flush()
+    }
+}
+
+impl BufWriter for LzmaBufWriter {
+    fn get_mut(&mut self) -> Option<&mut Cursor<Box<[u8]>>> {
+        self.0.get_output_mut()
+    }
+
+    fn finish(self: Box<Self>) -> std::io::Result<Cursor<Box<[u8]>>> {
+        self.0.finish().map_err(|e| match e {
+            lzma_rs::error::Error::IoError(e) => e,
+            lzma_rs::error::Error::HeaderTooShort(e) => {
+                std::io::Error::new(std::io::ErrorKind::Other, format!("{}", e))
+            }
+            lzma_rs::error::Error::LzmaError(e) | lzma_rs::error::Error::XzError(e) => {
+                std::io::Error::new(std::io::ErrorKind::Other, e)
+            }
+        })
+    }
+
+    fn try_finish(&mut self) -> std::io::Result<()> {
+        Ok(())
+    }
+}
+
+/// Structure that represents each decompressor in the chain.
+struct InnerDecompressor {
+    /// Decoder implementation that will write to a temporary buffer.
+    writer: Option<Box<dyn BufWriter>>,
+    /// Next decompressor to call.
+    inner: Option<Box<dyn Decompress>>,
+    /// Encoding type of the decompressor.
+    encoding: HtpContentEncoding,
+    /// Indicates whether to pass through the data without calling the writer.
+    passthrough: bool,
+    /// Tracks the number of restarts
+    restarts: u8,
+    /// Options for decompression
+    options: Options,
+}
+
+impl InnerDecompressor {
+    /// Returns a new writer according to the content encoding type and whether to passthrough.
+    fn writer(
+        encoding: HtpContentEncoding, options: &Options,
+    ) -> std::io::Result<(Box<dyn BufWriter>, bool)> {
+        let buf = Cursor::new(Box::new([0u8; ENCODING_CHUNK_SIZE]) as Box<[u8]>);
+
+        match encoding {
+            HtpContentEncoding::Gzip => Ok((Box::new(GzipBufWriter::new(buf)), false)),
+            HtpContentEncoding::Deflate => Ok((
+                Box::new(DeflateBufWriter(flate2::write::DeflateDecoder::new(buf))),
+                false,
+            )),
+            HtpContentEncoding::Zlib => Ok((
+                Box::new(ZlibBufWriter(flate2::write::ZlibDecoder::new(buf))),
+                false,
+            )),
+            HtpContentEncoding::Lzma => {
+                if let Some(options) = options.lzma {
+                    Ok((
+                        Box::new(LzmaBufWriter(
+                            lzma_rs::decompress::Stream::new_with_options(&options, buf),
+                        )),
+                        false,
+                    ))
+                } else {
+                    Ok((Box::new(NullBufWriter(buf)), true))
+                }
+            }
+            HtpContentEncoding::None => Err(std::io::Error::new(
+                std::io::ErrorKind::Other,
+                "expected a valid encoding",
+            )),
+        }
+    }
+
+    /// Create a new `InnerDecompressor` given a content encoding type and the
+    /// next (`inner`) decompressor to call.
+    fn new(
+        encoding: HtpContentEncoding, inner: Box<dyn Decompress>, options: Options,
+    ) -> std::io::Result<Self> {
+        let (writer, passthrough) = Self::writer(encoding, &options)?;
+        Ok(Self {
+            inner: Some(inner),
+            encoding,
+            writer: Some(writer),
+            passthrough,
+            restarts: 0,
+            options,
+        })
+    }
+
+    /// Tries to pass data to the callback instead of calling the writers.
+    ///
+    /// This will set passthrough mode on success or revert on error.
+    fn try_passthrough(&mut self, data: &[u8]) -> std::io::Result<usize> {
+        self.set_passthrough(true);
+        if let Some(inner) = &mut self.inner {
+            let result = inner.write(data);
+            if result.is_err() {
+                self.set_passthrough(false);
+            }
+            result
+        } else {
+            Ok(data.len())
+        }
+    }
+
+    /// Flushes the writer and the temporary buffer it writes to.
+    ///
+    /// The writer should be taken out of its slot and passed directly instead of
+    /// `self.writer` to avoid holding multiple mutable references.
+    fn flush_writer(&mut self, writer: &mut Box<dyn BufWriter>) -> std::io::Result<()> {
+        if let Some(mut inner) = self.inner.take() {
+            loop {
+                let result = writer.flush();
+
+                // Flush all of the bytes the writer has written to our temporary
+                // buffer of fixed size.
+                if let Some(cursor) = writer.get_mut() {
+                    inner.write_all(&cursor.get_ref()[0..cursor.position() as usize])?;
+                    cursor.set_position(0);
+                }
+
+                // Continue flushing if the flush resulted in a `WriteZero`. This
+                // error indicates that the writer was unable to write all bytes
+                // to our temporary buffer, likely because it was full.
+                if let Err(e) = result {
+                    match e.kind() {
+                        std::io::ErrorKind::WriteZero => {}
+                        _ => {
+                            self.restart()?;
+                            break;
+                        }
+                    }
+                } else {
+                    break;
+                }
+            }
+            self.inner.replace(inner);
+            Ok(())
+        } else {
+            Err(std::io::Error::new(
+                std::io::ErrorKind::Other,
+                "nothing to flush to",
+            ))
+        }
+    }
+
+    fn try_finish(&mut self, writer: &mut Box<dyn BufWriter>) -> bool {
+        loop {
+            let redo = match writer.try_finish() {
+                Err(e) => e.kind() == std::io::ErrorKind::WriteZero,
+                _ => false,
+            };
+            if let Some(cursor) = writer.get_mut() {
+                if cursor.position() > 0 {
+                    if let Some(mut inner) = self.inner.take() {
+                        _ = inner.write_all(&cursor.get_ref()[0..cursor.position() as usize]);
+                        cursor.set_position(0);
+                        self.inner.replace(inner);
+                        if redo {
+                            continue;
+                        }
+                        return true;
+                    }
+                }
+            }
+            return false;
+        }
+    }
+}
+
+impl Write for InnerDecompressor {
+    fn write(&mut self, data: &[u8]) -> std::io::Result<usize> {
+        // Passthrough mode
+        if self.passthrough {
+            if let Some(inner) = &mut self.inner {
+                inner.write(data)
+            } else {
+                Ok(data.len())
+            }
+
+        // Take the writer out of its slot to avoid holding multiple mutable
+        // references. Any calls using `self.writer` should be avoided while the
+        // writer is in this state.
+        } else if let Some(mut writer) = self.writer.take() {
+            match writer.write(data) {
+                Ok(consumed) => {
+                    let result = if consumed == 0 {
+                        // This could indicate that we have reached the end
+                        // of the stream. Any data after the first end of
+                        // stream (such as in multipart gzip) is ignored and
+                        // we pretend to have consumed this data.
+                        Ok(data.len())
+                    } else {
+                        Ok(consumed)
+                    };
+                    self.writer.replace(writer);
+                    result
+                }
+                Err(e) => {
+                    match e.kind() {
+                        std::io::ErrorKind::WriteZero => {
+                            self.flush_writer(&mut writer)?;
+                            // Recursion: the buffer was flushed until `WriteZero`
+                            // stopped occuring.
+                            self.writer.replace(writer);
+                            self.write(data)
+                        }
+                        _ => {
+                            if self.restarts == 0 {
+                                let written = self.try_finish(&mut writer);
+                                if written {
+                                    // error, but some data has been written, stop here
+                                    return Err(e);
+                                }
+                            }
+                            // try to restart, any data in the temp buffer will be
+                            // discarded
+                            if self.restart().is_err() {
+                                self.try_passthrough(data)
+                            } else {
+                                // Recursion: restart will fail after a small
+                                // number of attempts
+                                self.write(data)
+                            }
+                        }
+                    }
+                }
+            }
+        } else {
+            Err(std::io::Error::new(
+                std::io::ErrorKind::WriteZero,
+                "writer was not initialized",
+            ))
+        }
+    }
+
+    fn flush(&mut self) -> std::io::Result<()> {
+        if let Some(mut writer) = self.writer.take() {
+            self.flush_writer(&mut writer)?;
+            self.writer.replace(writer);
+        }
+        if let Some(inner) = &mut self.inner {
+            inner.flush()
+        } else {
+            Ok(())
+        }
+    }
+}
+
+impl Decompress for InnerDecompressor {
+    fn restart(&mut self) -> std::io::Result<()> {
+        if self.restarts < 3 {
+            // first retry the same encoding type
+            self.encoding = match self.encoding {
+                HtpContentEncoding::Gzip => HtpContentEncoding::Deflate,
+                HtpContentEncoding::Deflate => HtpContentEncoding::Zlib,
+                HtpContentEncoding::Zlib => HtpContentEncoding::Gzip,
+                HtpContentEncoding::Lzma => HtpContentEncoding::Deflate,
+                HtpContentEncoding::None => {
+                    return Err(std::io::Error::new(
+                        std::io::ErrorKind::Other,
+                        "expected a valid encoding",
+                    ))
+                }
+            };
+            let (writer, passthrough) = Self::writer(self.encoding, &self.options)?;
+            self.writer = Some(writer);
+            if passthrough {
+                self.passthrough = passthrough;
+            }
+            self.restarts += 1;
+            Ok(())
+        } else {
+            Err(std::io::Error::new(
+                std::io::ErrorKind::Other,
+                "too many restart attempts",
+            ))
+        }
+    }
+
+    // Tell all the decompressors to pass through the data instead of calling
+    // the writer.
+    fn set_passthrough(&mut self, passthrough: bool) {
+        self.passthrough = passthrough;
+        if let Some(inner) = &mut self.inner {
+            inner.set_passthrough(passthrough);
+        }
+    }
+
+    // Tell all decompressors that there is no more data to receive.
+    fn finish(&mut self) -> std::io::Result<()> {
+        let output = if let Some(mut writer) = self.writer.take() {
+            self.flush_writer(&mut writer)?;
+            Some(writer.finish()?)
+        } else {
+            None
+        };
+
+        if let Some(mut inner) = self.inner.take() {
+            if let Some(output) = output {
+                inner.write_all(&output.get_ref()[..output.position() as usize])?;
+            }
+            inner.finish()
+        } else {
+            Ok(())
+        }
+    }
+}
+
+#[test]
+fn test_gz_header() {
+    // No flags or other bits
+    let input = b"\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\x00";
+    let buf = Cursor::new(Box::new([0u8; ENCODING_CHUNK_SIZE]) as Box<[u8]>);
+    let mut gzw = GzipBufWriter::new(buf);
+    assert_eq!(gzw.write(input).unwrap(), input.len());
+    assert_eq!(gzw.state, GzState::AfterHeader);
+
+    // Just CRC
+    let input = b"\x1f\x8b\x08\x02\x00\x00\x00\x00\x00\x00\x11\x22";
+    let buf = Cursor::new(Box::new([0u8; ENCODING_CHUNK_SIZE]) as Box<[u8]>);
+    let mut gzw = GzipBufWriter::new(buf);
+    assert_eq!(gzw.write(input).unwrap(), input.len());
+    assert_eq!(gzw.state, GzState::AfterHeader);
+
+    // Just extra
+    let input = b"\x1f\x8b\x08\x04\x00\x00\x00\x00\x00\x00\x04\x00abcd";
+    let buf = Cursor::new(Box::new([0u8; ENCODING_CHUNK_SIZE]) as Box<[u8]>);
+    let mut gzw = GzipBufWriter::new(buf);
+    assert_eq!(gzw.write(input).unwrap(), input.len());
+    assert_eq!(gzw.state, GzState::AfterHeader);
+
+    // Just filename
+    let input = b"\x1f\x8b\x08\x08\x00\x00\x00\x00\x00\x00variable\x00";
+    let buf = Cursor::new(Box::new([0u8; ENCODING_CHUNK_SIZE]) as Box<[u8]>);
+    let mut gzw = GzipBufWriter::new(buf);
+    assert_eq!(gzw.write(input).unwrap(), input.len());
+    assert_eq!(gzw.state, GzState::AfterHeader);
+
+    // Just comment
+    let input = b"\x1f\x8b\x08\x10\x00\x00\x00\x00\x00\x00also variable\x00";
+    let buf = Cursor::new(Box::new([0u8; ENCODING_CHUNK_SIZE]) as Box<[u8]>);
+    let mut gzw = GzipBufWriter::new(buf);
+    assert_eq!(gzw.write(input).unwrap(), input.len());
+    assert_eq!(gzw.state, GzState::AfterHeader);
+
+    // Extra and Filename
+    let input = b"\x1f\x8b\x08\x0c\x00\x00\x00\x00\x00\x00\x05\x00extrafilename\x00";
+    let buf = Cursor::new(Box::new([0u8; ENCODING_CHUNK_SIZE]) as Box<[u8]>);
+    let mut gzw = GzipBufWriter::new(buf);
+    assert_eq!(gzw.write(input).unwrap(), input.len());
+    assert_eq!(gzw.state, GzState::AfterHeader);
+
+    // Extra and Comment and CRC
+    let input = b"\x1f\x8b\x08\x16\x00\x00\x00\x00\x00\x00\x05\x00extracomment\x00\x34\x12";
+    let buf = Cursor::new(Box::new([0u8; ENCODING_CHUNK_SIZE]) as Box<[u8]>);
+    let mut gzw = GzipBufWriter::new(buf);
+    assert_eq!(gzw.write(input).unwrap(), input.len());
+    assert_eq!(gzw.state, GzState::AfterHeader);
+
+    // Filename and Comment
+    let input = b"\x1f\x8b\x08\x18\x00\x00\x00\x00\x00\x00filename\x00comment\x00";
+    let buf = Cursor::new(Box::new([0u8; ENCODING_CHUNK_SIZE]) as Box<[u8]>);
+    let mut gzw = GzipBufWriter::new(buf);
+    assert_eq!(gzw.write(input).unwrap(), input.len());
+    assert_eq!(gzw.state, GzState::AfterHeader);
+
+    // Extra Filename and Comment and CRC
+    let input =
+        b"\x1f\x8b\x08\x1e\x00\x00\x00\x00\x00\x00\x05\x00extrafilename\x00comment\x00\x34\x12";
+    let buf = Cursor::new(Box::new([0u8; ENCODING_CHUNK_SIZE]) as Box<[u8]>);
+    let mut gzw = GzipBufWriter::new(buf);
+    assert_eq!(gzw.write(input).unwrap(), input.len());
+    assert_eq!(gzw.state, GzState::AfterHeader);
+
+    // Too short
+    let input = b"\x1f\x8b\x08\x1e\x00\x00\x00\x00\x00\x00\x05\x00extrafilename\x00comment\x00\x34";
+    let buf = Cursor::new(Box::new([0u8; ENCODING_CHUNK_SIZE]) as Box<[u8]>);
+    let mut gzw = GzipBufWriter::new(buf);
+    assert_eq!(gzw.write(input).unwrap(), input.len() - 1);
+    assert_eq!(gzw.state, GzState::Crc);
+    // final missing CRC in header
+    let input = b"\x34\xee";
+    assert_eq!(gzw.write(input).unwrap(), input.len());
+    assert_eq!(gzw.state, GzState::AfterHeader);
+    let input = b"\x1f\x8b\x08\x01\x00\x00\x00\x00\x00";
+    let buf = Cursor::new(Box::new([0u8; ENCODING_CHUNK_SIZE]) as Box<[u8]>);
+    let mut gzw = GzipBufWriter::new(buf);
+    assert_eq!(gzw.write(input).unwrap(), input.len());
+    assert_eq!(gzw.state, GzState::Start);
+}
diff --git a/rust/htp/src/error.rs b/rust/htp/src/error.rs
new file mode 100644 (file)
index 0000000..e9421f0
--- /dev/null
@@ -0,0 +1,47 @@
+use crate::HtpStatus;
+use nom::error::ErrorKind as NomErrorKind;
+
+/// Helper for nom's default error type
+pub(crate) type NomError<I> = nom::error::Error<I>;
+
+/// Alias for libhtp Result type. Result types are classified by `HtpStatus`.
+pub(crate) type Result<T> = std::result::Result<T, HtpStatus>;
+
+impl<T> From<Result<T>> for HtpStatus {
+    /// Returns HtpStatus from result.
+    fn from(res: Result<T>) -> HtpStatus {
+        match res {
+            Ok(_) => HtpStatus::OK,
+            Err(e) => e,
+        }
+    }
+}
+
+impl From<HtpStatus> for Result<()> {
+    /// Returns Result from `HtpStatus`
+    fn from(status: HtpStatus) -> Result<()> {
+        if status == HtpStatus::OK {
+            Ok(())
+        } else {
+            Err(status)
+        }
+    }
+}
+
+impl From<std::io::Error> for HtpStatus {
+    fn from(_: std::io::Error) -> Self {
+        HtpStatus::ERROR
+    }
+}
+
+impl<I: std::fmt::Debug> From<nom::Err<NomError<I>>> for HtpStatus {
+    fn from(_: nom::Err<NomError<I>>) -> Self {
+        HtpStatus::ERROR
+    }
+}
+
+impl From<NomErrorKind> for HtpStatus {
+    fn from(_: NomErrorKind) -> Self {
+        HtpStatus::ERROR
+    }
+}
diff --git a/rust/htp/src/headers.rs b/rust/htp/src/headers.rs
new file mode 100644 (file)
index 0000000..5c65306
--- /dev/null
@@ -0,0 +1,968 @@
+use crate::util::{is_token, trimmed, FlagOperations};
+use nom::{
+    branch::alt,
+    bytes::complete::tag as complete_tag,
+    bytes::streaming::{tag, take_till, take_while, take_while1},
+    character::{is_space, streaming::space0},
+    combinator::{complete, map, not, opt, peek},
+    sequence::tuple,
+    Err::Incomplete,
+    IResult, Needed,
+};
+
+/// Helper for Parsed bytes and corresponding HeaderFlags
+pub(crate) type ParsedBytes<'a> = (&'a [u8], u64);
+// Helper for Parsed Headers and corresonding termination
+pub(crate) type ParsedHeaders = (Vec<Header>, bool);
+// Helper for matched eol+ folding bytes + flags
+pub(crate) type FoldingBytes<'a> = (&'a [u8], &'a [u8], u64);
+// Helper for folding or terminator bytes
+pub(crate) type FoldingOrTerminator<'a> = (ParsedBytes<'a>, Option<&'a [u8]>);
+// Helper for value bytes and the value terminator
+pub(crate) type ValueBytes<'a> = (&'a [u8], FoldingOrTerminator<'a>);
+
+#[repr(C)]
+#[derive(Debug, PartialEq, Eq)]
+pub(crate) struct HeaderFlags;
+
+impl HeaderFlags {
+    pub(crate) const FOLDING: u64 = 0x0001;
+    pub(crate) const FOLDING_SPECIAL_CASE: u64 = (0x0002 | Self::FOLDING);
+    pub(crate) const NAME_EMPTY: u64 = 0x0004;
+    pub(crate) const VALUE_EMPTY: u64 = 0x0008;
+    pub(crate) const NAME_NON_TOKEN_CHARS: u64 = 0x0010;
+    pub(crate) const FIELD_REPEATED: u64 = 0x0020;
+    pub(crate) const NAME_TRAILING_WHITESPACE: u64 = 0x0040;
+    pub(crate) const NAME_LEADING_WHITESPACE: u64 = 0x0080;
+    pub(crate) const NULL_TERMINATED: u64 = 0x0100;
+    pub(crate) const MISSING_COLON: u64 = (0x0200 | Self::NAME_EMPTY);
+    pub(crate) const DEFORMED_EOL: u64 = 0x0400;
+}
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub(crate) struct Name {
+    pub(crate) name: Vec<u8>,
+    pub(crate) flags: u64,
+}
+
+impl Name {
+    pub(crate) fn new(name: &[u8], flags: u64) -> Self {
+        Self {
+            name: trimmed(name).to_vec(),
+            flags,
+        }
+    }
+}
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub(crate) struct Value {
+    pub(crate) value: Vec<u8>,
+    pub(crate) flags: u64,
+}
+
+impl Value {
+    pub(crate) fn new(value: &[u8], flags: u64) -> Self {
+        Self {
+            value: trimmed(value).to_vec(),
+            flags,
+        }
+    }
+}
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub(crate) struct Header {
+    pub(crate) name: Name,
+    pub(crate) value: Value,
+}
+
+impl Header {
+    pub(crate) fn new(name: Name, value: Value) -> Self {
+        Self { name, value }
+    }
+
+    pub(crate) fn new_with_flags(
+        name_bytes: &[u8], name_flags: u64, value_bytes: &[u8], value_flags: u64,
+    ) -> Self {
+        Self::new(
+            Name::new(name_bytes, name_flags),
+            Value::new(value_bytes, value_flags),
+        )
+    }
+}
+
+/// Enumerates possible parser types
+#[derive(PartialEq, Eq, Copy, Clone, Debug)]
+pub(crate) enum Side {
+    /// Request Parser: null terminates
+    Request,
+    /// Response Parser: accepts CR as a line ending
+    Response,
+}
+
+pub(crate) struct Parser {
+    side: Side,
+    complete: bool,
+}
+
+impl Parser {
+    pub(crate) fn new(side: Side) -> Self {
+        Self {
+            side,
+            complete: false,
+        }
+    }
+
+    /// Sets the parser complete state.
+    ///
+    /// If set to true, parser operates under the assumption that no more data is incoming
+    pub(crate) fn set_complete(&mut self, complete: bool) {
+        self.complete = complete;
+    }
+
+    /// Returns true if c is a line feed character
+    fn is_eol(&self) -> impl Fn(u8) -> bool + '_ {
+        move |c| c == b'\n' || (self.side == Side::Response && c == b'\r')
+    }
+
+    /// Parse one complete end of line character or character set
+    fn complete_eol_regular(&self) -> impl Fn(&[u8]) -> IResult<&[u8], &[u8]> + '_ {
+        move |input| {
+            if self.side == Side::Response {
+                alt((
+                    complete_tag("\r\n"),
+                    complete_tag("\n\r"),
+                    complete_tag("\n"),
+                    complete_tag("\r"),
+                ))(input)
+            } else {
+                alt((complete_tag("\r\n"), complete_tag("\n")))(input)
+            }
+        }
+    }
+
+    /// Parse one complete deformed end of line character set
+    fn complete_eol_deformed(&self) -> impl Fn(&[u8]) -> IResult<&[u8], ParsedBytes> + '_ {
+        move |input| {
+            if self.side == Side::Response {
+                alt((
+                    map(
+                        tuple((
+                            complete_tag("\n\r\r\n"),
+                            peek(alt((complete_tag("\n"), complete_tag("\r\n")))),
+                        )),
+                        |(eol, _)| (eol, HeaderFlags::DEFORMED_EOL),
+                    ),
+                    map(
+                        tuple((
+                            complete_tag("\r\n\r"),
+                            take_while1(|c| c == b'\r' || c == b' ' || c == b'\t'),
+                            opt(complete_tag("\n")),
+                            not(alt((complete_tag("\n"), complete_tag("\r\n")))),
+                        )),
+                        |(eol1, eol2, eol3, _): (&[u8], &[u8], Option<&[u8]>, _)| {
+                            (
+                                &input[..(eol1.len() + eol2.len() + eol3.unwrap_or(b"").len())],
+                                HeaderFlags::DEFORMED_EOL,
+                            )
+                        },
+                    ),
+                ))(input)
+            } else {
+                map(
+                    alt((
+                        tuple((
+                            complete_tag("\n\r\r\n"),
+                            peek(alt((complete_tag("\n"), complete_tag("\r\n")))),
+                        )),
+                        tuple((complete_tag("\n\r"), peek(complete_tag("\r\n")))),
+                    )),
+                    |(eol, _)| (eol, HeaderFlags::DEFORMED_EOL),
+                )(input)
+            }
+        }
+    }
+
+    /// Parse one complete end of line character or character set
+    fn complete_eol(&self) -> impl Fn(&[u8]) -> IResult<&[u8], ParsedBytes> + '_ {
+        move |input| {
+            alt((
+                self.complete_eol_deformed(),
+                map(self.complete_eol_regular(), |eol| (eol, 0)),
+            ))(input)
+        }
+    }
+
+    /// Parse one header end of line, and guarantee that it is not folding
+    fn eol(&self) -> impl Fn(&[u8]) -> IResult<&[u8], ParsedBytes> + '_ {
+        move |input| {
+            map(
+                tuple((self.complete_eol(), not(folding_lws))),
+                |(end, _)| end,
+            )(input)
+        }
+    }
+
+    /// Parse one null byte or one end of line, and guarantee that it is not folding
+    fn null_or_eol(&self) -> impl Fn(&[u8]) -> IResult<&[u8], ParsedBytes> + '_ {
+        move |input| alt((null, self.eol()))(input)
+    }
+
+    /// Parse one null byte or complete end of line
+    fn complete_null_or_eol(&self) -> impl Fn(&[u8]) -> IResult<&[u8], ParsedBytes> + '_ {
+        move |input| alt((null, self.complete_eol()))(input)
+    }
+
+    /// Parse header folding bytes (eol + whitespace or eol + special cases)
+    fn folding(&self) -> impl Fn(&[u8]) -> IResult<&[u8], FoldingBytes> + '_ {
+        move |input| {
+            if self.side == Side::Response {
+                map(
+                    tuple((
+                        map(self.complete_eol_regular(), |eol| (eol, 0)),
+                        folding_lws,
+                    )),
+                    |((eol, flags), (lws, other_flags))| (eol, lws, flags | other_flags),
+                )(input)
+            } else {
+                map(
+                    tuple((self.complete_eol(), folding_lws)),
+                    |((eol, flags), (lws, other_flags))| (eol, lws, flags | other_flags),
+                )(input)
+            }
+        }
+    }
+
+    /// Parse complete folding bytes or a value terminator (eol or null)
+    fn complete_folding_or_terminator(
+        &self,
+    ) -> impl Fn(&[u8]) -> IResult<&[u8], FoldingOrTerminator> + '_ {
+        move |input| {
+            alt((
+                complete(map(self.folding(), |(end, fold, flags)| {
+                    ((end, flags), Some(fold))
+                })),
+                map(self.complete_null_or_eol(), |end| (end, None)),
+            ))(input)
+        }
+    }
+
+    /// Parse complete folding bytes or a value terminator (eol or null)
+    fn streaming_folding_or_terminator(
+        &self,
+    ) -> impl Fn(&[u8]) -> IResult<&[u8], FoldingOrTerminator> + '_ {
+        move |input| {
+            alt((
+                map(self.folding(), |(end, fold, flags)| {
+                    ((end, flags), Some(fold))
+                }),
+                map(self.null_or_eol(), |end| (end, None)),
+            ))(input)
+        }
+    }
+
+    /// Parse folding bytes or a value terminator (eol or null)
+    fn folding_or_terminator(&self) -> impl Fn(&[u8]) -> IResult<&[u8], FoldingOrTerminator> + '_ {
+        move |input| {
+            if self.complete {
+                self.complete_folding_or_terminator()(input)
+            } else {
+                self.streaming_folding_or_terminator()(input)
+            }
+        }
+    }
+
+    /// Parse a header value.
+    /// Returns the bytes and the value terminator; null, eol or folding
+    /// eg. (bytes, (eol_bytes, Option<fold_bytes>))
+    fn value_bytes(&self) -> impl Fn(&[u8]) -> IResult<&[u8], ValueBytes> + '_ {
+        move |input| {
+            let (mut remaining, mut value) = take_till(self.is_eol())(input)?;
+            if value.last() == Some(&b'\r') {
+                value = &value[..value.len() - 1];
+                remaining = &input[value.len()..];
+            }
+            let (remaining, result) = self.folding_or_terminator()(remaining)?;
+            Ok((remaining, (value, result)))
+        }
+    }
+
+    /// Parse a complete header value, including any folded headers
+    fn value(&self) -> impl Fn(&[u8]) -> IResult<&[u8], Value> + '_ {
+        move |input| {
+            let (mut rest, (val_bytes, ((_eol, mut flags), fold))) = self.value_bytes()(input)?;
+            let mut value = val_bytes.to_vec();
+            if let Some(fold) = fold {
+                let mut i = rest;
+                let mut ofold = fold;
+                loop {
+                    if self.side == Side::Response {
+                        // Peek ahead for ambiguous name with lws vs. value with folding
+                        match tuple((token_chars, separator_regular))(i) {
+                            Ok((_, ((_, tokens, _), (_, _)))) if !tokens.is_empty() => {
+                                flags.unset(HeaderFlags::FOLDING_SPECIAL_CASE);
+                                if value.is_empty() {
+                                    flags.set(HeaderFlags::VALUE_EMPTY);
+                                }
+                                // i is now the latest rest
+                                return Ok((i, Value::new(&value, flags)));
+                            }
+                            Err(Incomplete(_)) => {
+                                return Err(Incomplete(Needed::new(1)));
+                            }
+                            _ => {}
+                        }
+                    }
+                    let (rest2, (val_bytes, ((eol, other_flags), fold))) = self.value_bytes()(i)?;
+                    i = rest2;
+                    flags.set(other_flags);
+                    //If the value is empty, the value started with a fold and we don't want to push back a space
+                    if !value.is_empty() {
+                        if !ofold.is_empty() {
+                            value.push(ofold[0]);
+                        } else {
+                            value.push(b' ');
+                        }
+                    }
+                    if !val_bytes.is_empty() || eol.len() > 1 {
+                        // we keep empty folding as a future new eol
+                        rest = rest2;
+                        value.extend(val_bytes);
+                    } else if val_bytes.is_empty()
+                        && eol.len() == 1
+                        && !rest2.is_empty()
+                        && rest2[0] == b'\n'
+                    {
+                        // eol empty fold double eol is enfo of headers
+                        rest = rest2;
+                    }
+                    if let Some(fold) = fold {
+                        ofold = fold;
+                    } else {
+                        return Ok((rest, Value::new(&value, flags)));
+                    }
+                }
+            } else {
+                if value.is_empty() {
+                    flags.set(HeaderFlags::VALUE_EMPTY);
+                }
+                Ok((rest, Value::new(&value, flags)))
+            }
+        }
+    }
+
+    /// Parse one header name
+    fn name(&self) -> impl Fn(&[u8]) -> IResult<&[u8], Name> + '_ {
+        move |input| {
+            let mut terminated = 0;
+            let mut offset = 0;
+            for (i, c) in input.iter().enumerate() {
+                if terminated == 0 {
+                    if *c == b':' {
+                        offset = i;
+                        break;
+                    } else if *c == b'\n' || (self.side == Side::Response && *c == b'\r') {
+                        terminated = *c;
+                    }
+                } else if *c == b' ' {
+                    terminated = 0;
+                } else if *c == b'\n' && terminated == b'\r' {
+                    terminated = *c;
+                } else {
+                    offset = i - 1;
+                    break;
+                }
+            }
+            let (name, rem) = input.split_at(offset);
+            let mut flags = 0;
+            if !name.is_empty() {
+                if is_space(name[0]) {
+                    flags.set(HeaderFlags::NAME_LEADING_WHITESPACE)
+                }
+                if let Some(end) = name.last() {
+                    if is_space(*end) {
+                        flags.set(HeaderFlags::NAME_TRAILING_WHITESPACE);
+                    }
+                }
+                if let Ok((rem, _)) = token_chars(name) {
+                    if !rem.is_empty() {
+                        flags.set(HeaderFlags::NAME_NON_TOKEN_CHARS);
+                    }
+                }
+            } else {
+                flags.set(HeaderFlags::NAME_EMPTY)
+            }
+            Ok((rem, Name::new(name, flags)))
+        }
+    }
+
+    /// Parse a separator between header name and value
+    fn separator(&self) -> impl Fn(&[u8]) -> IResult<&[u8], u64> + '_ {
+        move |input| map(separator_regular, |_| 0)(input)
+    }
+
+    /// Parse data before an eol with no colon as an empty name with the data as the value
+    fn header_sans_colon(&self) -> impl Fn(&[u8]) -> IResult<&[u8], Header> + '_ {
+        move |input| {
+            let (remaining, (_, value)) = tuple((not(complete_tag("\r\n")), self.value()))(input)?;
+
+            let flags = value.flags | HeaderFlags::MISSING_COLON;
+            Ok((
+                remaining,
+                Header::new_with_flags(b"", flags, &value.value, flags),
+            ))
+        }
+    }
+
+    /// Parse a header name separator value
+    fn header_with_colon(&self) -> impl Fn(&[u8]) -> IResult<&[u8], Header> + '_ {
+        move |input| {
+            map(
+                tuple((self.name(), self.separator(), self.value())),
+                |(mut name, flag, mut value)| {
+                    name.flags |= flag;
+                    value.flags |= flag;
+                    Header::new(name, value)
+                },
+            )(input)
+        }
+    }
+
+    /// Parses a header name and value with, or without a colon separator
+    fn header(&self) -> impl Fn(&[u8]) -> IResult<&[u8], Header> + '_ {
+        move |input| alt((complete(self.header_with_colon()), self.header_sans_colon()))(input)
+    }
+
+    /// Parse multiple headers and indicate if end of headers or null was found
+    pub(crate) fn headers(&self) -> impl Fn(&[u8]) -> IResult<&[u8], ParsedHeaders> + '_ {
+        move |input| {
+            let mut out = Vec::with_capacity(16);
+            let mut i = input;
+            loop {
+                match self.header()(i) {
+                    Ok((rest, head)) => {
+                        i = rest;
+                        let is_null_terminated =
+                            head.value.flags.is_set(HeaderFlags::NULL_TERMINATED);
+                        out.push(head);
+                        if is_null_terminated {
+                            return Ok((rest, (out, true)));
+                        }
+                        if let Ok((rest2, _eoh)) = self.complete_eol_regular()(rest) {
+                            return Ok((rest2, (out, true)));
+                        }
+                    }
+                    Err(Incomplete(x)) => {
+                        if out.is_empty() {
+                            return Err(Incomplete(x));
+                        }
+                        return Ok((i, (out, false)));
+                    }
+                    Err(e) => {
+                        if out.is_empty() {
+                            if let Ok((rest2, _eoh)) = self.complete_eol()(i) {
+                                return Ok((rest2, (out, true)));
+                            }
+                        }
+                        return Err(e);
+                    }
+                }
+            }
+        }
+    }
+}
+
+/// Parse one null character and return it and the NULL_TERMINATED flag
+fn null(input: &[u8]) -> IResult<&[u8], ParsedBytes> {
+    map(complete_tag("\0"), |null| {
+        (null, HeaderFlags::NULL_TERMINATED)
+    })(input)
+}
+
+/// Extracts folding lws (whitespace only)
+fn folding_lws(input: &[u8]) -> IResult<&[u8], ParsedBytes> {
+    map(alt((tag(" "), tag("\t"), tag("\0"))), |fold| {
+        (fold, HeaderFlags::FOLDING)
+    })(input)
+}
+
+/// Parse a regular separator (colon followed by optional spaces) between header name and value
+fn separator_regular(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
+    tuple((complete_tag(":"), space0))(input)
+}
+
+type leading_token_trailing<'a> = (&'a [u8], &'a [u8], &'a [u8]);
+/// Parse token characters with leading and trailing whitespace
+fn token_chars(input: &[u8]) -> IResult<&[u8], leading_token_trailing> {
+    tuple((space0, take_while(is_token), space0))(input)
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use crate::error::NomError;
+    use nom::{
+        error::ErrorKind::{Not, Tag},
+        Err::{Error, Incomplete},
+        Needed,
+    };
+    use rstest::rstest;
+    macro_rules! b {
+        ($b: literal) => {
+            $b.as_bytes()
+        };
+    }
+    // Helper for matched leading whitespace, byes, and trailing whitespace
+    pub(crate) type SurroundedBytes<'a> = (&'a [u8], &'a [u8], &'a [u8]);
+
+    #[rstest]
+    #[case::null_does_not_terminate(b"k1:v1\r\nk2:v2 before\0v2 after\r\n\r\n",Ok((b!(""), (vec![Header::new_with_flags(b"k1", 0, b"v1", 0), Header::new_with_flags(b"k2", 0, b"v2 before\0v2 after", 0)], true))), None)]
+    #[case::flags(b"k1:v1\r\n:v2\r\n v2+\r\nk3: v3\r\nk4 v4\r\nk\r5:v\r5\n\rmore\r\n\r\n", Ok((b!(""), (
+            vec![
+                Header::new_with_flags(b"k1", 0, b"v1", 0),
+                Header::new_with_flags(b"", HeaderFlags::NAME_EMPTY, b"v2 v2+", HeaderFlags::FOLDING),
+                Header::new_with_flags(b"k3", 0, b"v3", 0),
+                Header::new_with_flags(b"", HeaderFlags::MISSING_COLON, b"k4 v4", HeaderFlags::MISSING_COLON),
+                Header::new_with_flags(b"k\r5", HeaderFlags::NAME_NON_TOKEN_CHARS, b"v\r5", 0),
+                Header::new_with_flags(b"", HeaderFlags::MISSING_COLON, b"more", HeaderFlags::MISSING_COLON),
+                ], true))), Some(Ok((b!(""), (
+            vec![
+                Header::new_with_flags(b"k1", 0, b"v1", 0),
+                Header::new_with_flags(b"", HeaderFlags::NAME_EMPTY, b"v2 v2+", HeaderFlags::FOLDING),
+                Header::new_with_flags(b"k3", 0, b"v3", 0),
+                Header::new_with_flags(b"", HeaderFlags::MISSING_COLON, b"k4 v4", HeaderFlags::MISSING_COLON),
+                Header::new_with_flags(b"", HeaderFlags::MISSING_COLON, b"k", HeaderFlags::MISSING_COLON),
+                Header::new_with_flags(b"5", 0, b"v", 0),
+                Header::new_with_flags(b"", HeaderFlags::MISSING_COLON, b"5", HeaderFlags::MISSING_COLON),
+                Header::new_with_flags(b"", HeaderFlags::MISSING_COLON, b"more", HeaderFlags::MISSING_COLON),
+                ], true)))))]
+    #[case::incomplete_eoh(b"k1:v1\r\nk2:v2\r", Ok((b!("k2:v2\r"), (vec![Header::new_with_flags(b"k1", 0, b"v1", 0)], false))), None)]
+    #[case::incomplete_eoh_null(b"k1:v1\nk2:v2\0v2\r\nk3:v3\r", Ok((b!("k3:v3\r"), (vec![Header::new_with_flags(b"k1", 0, b"v1", 0), Header::new_with_flags(b"k2", 0, b"v2\0v2", 0)], false))), None)]
+    fn test_headers(
+        #[case] input: &[u8], #[case] expected: IResult<&[u8], ParsedHeaders>,
+        #[case] diff_res_expected: Option<IResult<&[u8], ParsedHeaders>>,
+    ) {
+        let req_parser = Parser::new(Side::Request);
+        assert_eq!(req_parser.headers()(input), expected);
+
+        let res_parser = Parser::new(Side::Response);
+        if let Some(res_expected) = diff_res_expected {
+            assert_eq!(res_parser.headers()(input), res_expected);
+        } else {
+            assert_eq!(res_parser.headers()(input), expected);
+        }
+    }
+
+    #[rstest]
+    #[case::only_lf_eoh(
+        b"Name1: Value1\nName2:Value2\nName3: Val\n ue3\nName4: Value4\n Value4.1\n Value4.2\n\n",
+        None
+    )]
+    #[case::only_crlf_eoh(b"Name1: Value1\r\nName2:Value2\r\nName3: Val\r\n ue3\r\nName4: Value4\r\n Value4.1\r\n Value4.2\r\n\r\n", None)]
+    #[case::crlf_lf_eoh(b"Name1: Value1\r\nName2:Value2\nName3: Val\r\n ue3\r\nName4: Value4\r\n Value4.1\n Value4.2\r\n\n", None)]
+    #[case::only_cr(b"Name1: Value1\rName2:Value2\rName3: Val\r\n ue3\rName4: Value4\r\n Value4.1\r\n Value4.2\r\r\n", Some(Err(Incomplete(Needed::new(1)))))]
+    #[case::cr_lf_crlf_eoh(b"Name1: Value1\rName2:Value2\rName3: Val\r\n ue3\r\nName4: Value4\r\n Value4.1\n Value4.2\r\n\n", Some(Ok((b!(""),
+        (
+            vec![
+                Header::new_with_flags(b"Name1", 0, b"Value1\rName2:Value2\rName3: Val ue3", HeaderFlags::FOLDING),
+                Header::new_with_flags(b"Name4", 0, b"Value4 Value4.1 Value4.2", HeaderFlags::FOLDING)
+                ],
+                true
+        )))))]
+    #[case::crlf_lfcr_lf(b"Name1: Value1\r\nName2:Value2\nName3: Val\n\r ue3\n\rName4: Value4\r\n Value4.1\n Value4.2\r\n\n", Some(Ok((b!(""),
+        (
+            vec![
+                Header::new_with_flags(b"Name1", 0, b"Value1", 0),
+                Header::new_with_flags(b"Name2", 0, b"Value2", 0),
+                Header::new_with_flags(b"Name3", 0, b"Val", 0),
+                Header::new_with_flags(b"", HeaderFlags::MISSING_COLON, b"ue3", HeaderFlags::MISSING_COLON),
+                Header::new_with_flags(b"Name4", HeaderFlags::NAME_NON_TOKEN_CHARS, b"Value4 Value4.1 Value4.2", HeaderFlags::FOLDING),
+                ],
+                true
+        )))))]
+    #[case::lfcr_eoh(b"Name1: Value1\n\rName2:Value2\n\rName3: Val\n\r ue3\n\rName4: Value4\n\r Value4.1\n\r Value4.2\n\r\n\r", Some(Ok((b!("\r"),
+        (
+            vec![
+                Header::new_with_flags(b"Name1", 0, b"Value1", 0),
+                Header::new_with_flags(b"Name2", HeaderFlags::NAME_NON_TOKEN_CHARS, b"Value2", 0),
+                Header::new_with_flags(b"Name3", HeaderFlags::NAME_NON_TOKEN_CHARS, b"Val", 0),
+                Header::new_with_flags(b"", HeaderFlags::MISSING_COLON, b"ue3", HeaderFlags::MISSING_COLON),
+                Header::new_with_flags(b"Name4", HeaderFlags::NAME_NON_TOKEN_CHARS, b"Value4", 0),
+                Header::new_with_flags(b"", HeaderFlags::MISSING_COLON, b"Value4.1", HeaderFlags::MISSING_COLON),
+                Header::new_with_flags(b"", HeaderFlags::MISSING_COLON, b"Value4.2", HeaderFlags::MISSING_COLON),
+                ],
+            true
+        )))))]
+    fn test_headers_eoh(
+        #[case] input: &[u8], #[case] diff_req_expected: Option<IResult<&[u8], ParsedHeaders>>,
+    ) {
+        let expected = Ok((
+            b!(""),
+            (
+                vec![
+                    Header::new_with_flags(b"Name1", 0, b"Value1", 0),
+                    Header::new_with_flags(b"Name2", 0, b"Value2", 0),
+                    Header::new_with_flags(b"Name3", 0, b"Val ue3", HeaderFlags::FOLDING),
+                    Header::new_with_flags(
+                        b"Name4",
+                        0,
+                        b"Value4 Value4.1 Value4.2",
+                        HeaderFlags::FOLDING,
+                    ),
+                ],
+                true,
+            ),
+        ));
+        let req_parser = Parser::new(Side::Request);
+        let res_parser = Parser::new(Side::Response);
+        if let Some(req_expected) = diff_req_expected {
+            assert_eq!(req_parser.headers()(input), req_expected);
+        } else {
+            assert_eq!(req_parser.headers()(input), expected);
+        }
+        assert_eq!(res_parser.headers()(input), expected);
+    }
+
+    #[rstest]
+    #[case::incomplete(b"K V", Err(Incomplete(Needed::new(1))), None)]
+    #[case::contains_colon_1(b"K:V\r\n", Err(Incomplete(Needed::new(1))), None)]
+    #[case::contains_colon_2(b"K:V\r\nK2: V2", Ok((b!("K2: V2"), Header::new_with_flags(b"", HeaderFlags::MISSING_COLON, b"K:V", HeaderFlags::MISSING_COLON))), None)]
+    #[case::empty_name_value(b"\r\n", Err(Error(NomError::new(b!("\r\n"), Not))), None)]
+    #[case::contains_null(b"K V\0alue\r\nk", Ok((b!("k"), Header::new_with_flags(b"", HeaderFlags::MISSING_COLON, b"K V\0alue", HeaderFlags::MISSING_COLON))), None)]
+    #[case::folding(b"K V\ralue\r\nk", Ok((b!("k"), Header::new_with_flags(b"", HeaderFlags::MISSING_COLON, b"K V\ralue", HeaderFlags::MISSING_COLON))), Some(Ok((b!("alue\r\nk"), Header::new_with_flags(b"", HeaderFlags::MISSING_COLON, b"K V", HeaderFlags::MISSING_COLON)))))]
+    #[case::crlf(b"K V\r\nk1:v1\r\n", Ok((b!("k1:v1\r\n"), Header::new_with_flags(b"", HeaderFlags::MISSING_COLON, b"K V", HeaderFlags::MISSING_COLON))), None)]
+    #[case::lf(b"K V\nk1:v1\r\n", Ok((b!("k1:v1\r\n"), Header::new_with_flags(b"", HeaderFlags::MISSING_COLON, b"K V", HeaderFlags::MISSING_COLON))), None)]
+    fn test_header_sans_colon(
+        #[case] input: &[u8], #[case] expected: IResult<&[u8], Header>,
+        #[case] response_parser_expected: Option<IResult<&[u8], Header>>,
+    ) {
+        let req_parser = Parser::new(Side::Request);
+        assert_eq!(req_parser.header_sans_colon()(input), expected);
+
+        let res_parser = Parser::new(Side::Response);
+        let res_expected = if let Some(response_expected) = response_parser_expected {
+            response_expected
+        } else {
+            expected
+        };
+        assert_eq!(res_parser.header_sans_colon()(input), res_expected);
+    }
+
+    #[rstest]
+    #[case::incomplete(b"K: V", Err(Incomplete(Needed::new(1))))]
+    #[case::contains_colon(b"K: V\r\n", Err(Incomplete(Needed::new(1))))]
+    #[case::missing_colon(b"K V\nK:V\r\n", Err(Error(NomError::new(b!("\nK:V\r\n"), Tag))))]
+    #[case::contains_null(b":\r\n\r\n", Ok((b!("\r\n"), Header::new_with_flags(b"", HeaderFlags::NAME_EMPTY, b"", HeaderFlags::VALUE_EMPTY))))]
+    #[case::folding(b"K:\r\n\r\n", Ok((b!("\r\n"), Header::new_with_flags(b"K", 0, b"", HeaderFlags::VALUE_EMPTY))))]
+    #[case::crlf(b":V\r\n\r\n", Ok((b!("\r\n"), Header::new_with_flags(b"", HeaderFlags::NAME_EMPTY, b"V", 0))))]
+    #[case::lf_1(b"K:folded\r\n\rV\r\n\r\n", Ok((b!("\rV\r\n\r\n"), Header::new_with_flags(b"K", 0, b"folded", 0))))]
+    #[case::lf_2(b"K: V\r\n\r\n", Ok((b!("\r\n"), Header::new_with_flags(b"K", 0, b"V", 0))))]
+    #[case::lf_3(b"K: V before\0 V after\r\n\r\n", Ok((b!("\r\n"), Header::new_with_flags(b"K", 0, b"V before\0 V after", 0))))]
+    #[case::lf_4(b"K: V\r\n\r\n", Ok((b!("\r\n"), Header::new_with_flags(b"K", 0, b"V", 0))))]
+    #[case::lf_5(b"K: V before\0 V after\r\n\r\n", Ok((b!("\r\n"), Header::new_with_flags(b"K", 0, b"V before\0 V after", 0))))]
+    #[case::lf_6(b"K: V\r\n a\r\n l\r\n u\r\n\te\r\n\r\n", Ok((b!("\r\n"), Header::new_with_flags(b"K", 0, b"V a l u\te", HeaderFlags::FOLDING))))]
+    fn test_header_with_colon(#[case] input: &[u8], #[case] expected: IResult<&[u8], Header>) {
+        let req_parser = Parser::new(Side::Request);
+        assert_eq!(req_parser.header_with_colon()(input), expected);
+
+        let res_parser = Parser::new(Side::Response);
+        assert_eq!(res_parser.header_with_colon()(input), expected);
+    }
+
+    #[rstest]
+    #[case::incomplete(b"K: V", Err(Incomplete(Needed::new(1))), None)]
+    #[case::contains_colon(b"K: V\r\n", Err(Incomplete(Needed::new(1))), None)]
+    #[case::missing_colon_1(b"K V\r\n", Err(Incomplete(Needed::new(1))), None)]
+    #[case::missing_colon_2(b"K1 V1\r\nK2:V2\n\r\n", Ok((b!("K2:V2\n\r\n"), Header::new_with_flags(b"", HeaderFlags::MISSING_COLON, b"K1 V1", HeaderFlags::MISSING_COLON))), None)]
+    #[case::empty_name_value(b"K1:V1\nK2:V2\n\r\n", Ok((b!("K2:V2\n\r\n"), Header::new_with_flags(b"K1", 0, b"V1", 0))), None)]
+    #[case::contains_null(b":\r\n\r\n", Ok((b!("\r\n"), Header::new_with_flags(b"", HeaderFlags::NAME_EMPTY, b"", HeaderFlags::VALUE_EMPTY))), None)]
+    #[case::folding(b"K:\r\n\r\n", Ok((b!("\r\n"), Header::new_with_flags(b"K", 0, b"", HeaderFlags::VALUE_EMPTY))), None)]
+    #[case::empty_name(b":V\r\n\r\n", Ok((b!("\r\n"), Header::new_with_flags(b"", HeaderFlags::NAME_EMPTY, b"V", 0))), None)]
+    #[case::special_folding(b"K:folded\r\n\rV\r\n\r\n", Ok((b!("\rV\r\n\r\n"), Header::new_with_flags(b"K", 0, b"folded", 0))), None)]
+    #[case::regular_eoh(b"K: V\r\n\r\n", Ok((b!("\r\n"), Header::new_with_flags(b"K", 0, b"V", 0))), None)]
+    #[case::folding(b"K: V\n a\r\n l\n u\r\n\te\r\n\r\n", Ok((b!("\r\n"), Header::new_with_flags(b"K", 0, b"V a l u\te", HeaderFlags::FOLDING))), None)]
+    #[case::cr_in_name(b"Host:www.google.com\rName: Value\r\n\r\n", Ok((b!("\r\n"), Header::new_with_flags(b"Host", 0, b"www.google.com\rName: Value", 0))), Some(Ok((b!("Name: Value\r\n\r\n"), Header::new_with_flags(b"Host", 0, b"www.google.com", 0)))))]
+    #[case::null_in_value(b"K: V before\0 V after\r\n\r\n", Ok((b!("\r\n"), Header::new_with_flags(b"K", 0, b"V before\0 V after", 0))), None)]
+    #[case::folding(b"K: V\r a\r\n l\n u\r\n\te\r\n\r\n", Ok((b!("\r\n"), Header::new_with_flags(b"K", 0, b"V\r a l u\te", HeaderFlags::FOLDING))), Some(Ok((b!("\r\n"), Header::new_with_flags(b"K", 0, b"V a l u\te", HeaderFlags::FOLDING)))))]
+    #[case::deformed_folding_1(b"K:deformed folded\n\r V\n\r\r\n\n", Ok((b!("\r V\n\r\r\n\n"), Header::new_with_flags(b"K", 0, b"deformed folded", 0))), Some(Ok((b!("\n"), Header::new_with_flags(b"K", 0, b"deformed folded V", HeaderFlags::FOLDING | HeaderFlags::DEFORMED_EOL)))))]
+    #[case::deformed_folding_2(b"K:deformed folded\n\r V\r\n\r\n", Ok(( b!("\r V\r\n\r\n"), Header::new_with_flags(b"K", 0, b"deformed folded", 0))), Some(Ok((b!("\r\n"), Header::new_with_flags(b"K", 0, b"deformed folded V", HeaderFlags::FOLDING)))))]
+    #[case::deformed_folding_3(b"K:deformed folded\n\r\r V\r\n\r\n", Ok(( b!("\r\r V\r\n\r\n"), Header::new_with_flags(b"K", 0, b"deformed folded", 0))), Some(Ok((b!("\r V\r\n\r\n"), Header::new_with_flags(b"K", 0, b"deformed folded", 0)))))]
+    #[case::non_token_trailing_ws(b"K\r \r :\r V\r\n\r\n", Ok((b!("\r\n"), Header::new_with_flags(b"K\r \r ", HeaderFlags::NAME_NON_TOKEN_CHARS | HeaderFlags::NAME_TRAILING_WHITESPACE, b"\r V", 0))), Some(Ok((b!("\r\n"), Header::new_with_flags(b"K", HeaderFlags::NAME_NON_TOKEN_CHARS | HeaderFlags::NAME_TRAILING_WHITESPACE, b"V", HeaderFlags::FOLDING)))))]
+    #[case::non_token(b"K\x0c:Value\r\n V\r\n\r\n", Ok((b!("\r\n"), Header::new_with_flags(b"K\x0c", HeaderFlags::NAME_NON_TOKEN_CHARS, b"Value V", HeaderFlags::FOLDING))), None)]
+    #[case::non_token_trailing(b"K\r :Value\r\n V\r\n\r\n", Ok((b!("\r\n"), Header::new_with_flags(b"K\r ", HeaderFlags::NAME_TRAILING_WHITESPACE | HeaderFlags::NAME_NON_TOKEN_CHARS, b"Value V", HeaderFlags::FOLDING))), None)]
+    fn test_header(
+        #[case] input: &[u8], #[case] expected: IResult<&[u8], Header>,
+        #[case] diff_res_expected: Option<IResult<&[u8], Header>>,
+    ) {
+        let req_parser = Parser::new(Side::Request);
+        assert_eq!(req_parser.header()(input), expected);
+
+        let res_parser = Parser::new(Side::Response);
+        if let Some(res_expected) = diff_res_expected {
+            assert_eq!(res_parser.header()(input), res_expected);
+        } else {
+            assert_eq!(res_parser.header()(input), expected);
+        }
+    }
+
+    #[rstest]
+    #[case::not_a_separator(b"\n", Err(Error(NomError::new(b!("\n"), Tag))), None)]
+    #[case::colon(b":value", Ok((b!("value"), 0)), None)]
+    #[case::colon_whitespace(b": value", Ok((b!("value"), 0)), None)]
+    #[case::colon_tab(b":\t value", Ok((b!("value"), 0)), None)]
+    fn test_separators(
+        #[case] input: &[u8], #[case] expected: IResult<&[u8], u64>,
+        #[case] diff_res_expected: Option<IResult<&[u8], u64>>,
+    ) {
+        let req_parser = Parser::new(Side::Request);
+        assert_eq!(req_parser.separator()(input), expected);
+
+        let res_parser = Parser::new(Side::Response);
+        if let Some(res_expected) = diff_res_expected {
+            assert_eq!(res_parser.separator()(input), res_expected);
+        } else {
+            assert_eq!(res_parser.separator()(input), expected);
+        }
+    }
+
+    #[rstest]
+    #[case::incomplete(b"name", Err(Incomplete(Needed::new(1))))]
+    #[case::token(b"name:", Ok((b!(":"), (b!(""), b!("name"), b!("")))))]
+    #[case::trailing_whitespace(b"name :", Ok((b!(":"), (b!(""), b!("name"), b!(" ")))))]
+    #[case::surrounding_whitespace(b" name :", Ok((b!(":"), (b!(" "), b!("name"), b!(" ")))))]
+    fn test_token_chars(#[case] input: &[u8], #[case] expected: IResult<&[u8], SurroundedBytes>) {
+        assert_eq!(token_chars(input), expected);
+    }
+
+    #[rstest]
+    #[case::name(b"Hello: world", Ok((b!(": world"), Name {name: b"Hello".to_vec(), flags: 0})), None)]
+    #[case::name(b"Host:www.google.com\rName: Value", Ok((b!(":www.google.com\rName: Value"), Name {name: b"Host".to_vec(), flags: 0})), None)]
+    #[case::trailing_whitespace(b"Hello : world", Ok((b!(": world"), Name {name: b"Hello".to_vec(), flags: HeaderFlags::NAME_TRAILING_WHITESPACE})), None)]
+    #[case::surrounding_whitespace(b" Hello : world", Ok((b!(": world"), Name {name: b"Hello".to_vec(), flags: HeaderFlags::NAME_LEADING_WHITESPACE | HeaderFlags::NAME_TRAILING_WHITESPACE})), None)]
+    #[case::semicolon(b"Hello;invalid: world", Ok((b!(": world"), Name {name: b"Hello;invalid".to_vec(), flags: HeaderFlags::NAME_NON_TOKEN_CHARS})), None)]
+    #[case::space(b"Hello invalid: world", Ok((b!(": world"), Name {name: b"Hello invalid".to_vec(), flags: HeaderFlags::NAME_NON_TOKEN_CHARS})), None)]
+    #[case::surrounding_internal_space(b" Hello invalid : world", Ok((b!(": world"), Name {name: b"Hello invalid".to_vec(), flags: HeaderFlags::NAME_LEADING_WHITESPACE | HeaderFlags::NAME_TRAILING_WHITESPACE | HeaderFlags::NAME_NON_TOKEN_CHARS})), None)]
+    #[case::only_space_name(b"   : world", Ok((b!(": world"), Name {name: b"".to_vec(), flags: HeaderFlags::NAME_LEADING_WHITESPACE | HeaderFlags::NAME_TRAILING_WHITESPACE })), None)]
+    fn test_name(
+        #[case] input: &[u8], #[case] expected: IResult<&[u8], Name>,
+        #[case] diff_res_expected: Option<IResult<&[u8], Name>>,
+    ) {
+        let req_parser = Parser::new(Side::Request);
+        assert_eq!(req_parser.name()(input), expected);
+
+        let res_parser = Parser::new(Side::Response);
+        if let Some(res_expected) = diff_res_expected {
+            assert_eq!(res_parser.name()(input), res_expected);
+        } else {
+            assert_eq!(res_parser.name()(input), expected);
+        }
+    }
+
+    #[rstest]
+    #[case(b"test", Err(Error(NomError::new(b!("test"), Tag))))]
+    #[case(b"\r\n", Err(Error(NomError::new(b!("\r\n"), Tag))))]
+    #[case(b"\n", Err(Error(NomError::new(b!("\n"), Tag))))]
+    #[case(b"\0a", Ok((b!("a"), (b!("\0"), HeaderFlags::NULL_TERMINATED))))]
+    fn test_null(#[case] input: &[u8], #[case] expected: IResult<&[u8], ParsedBytes>) {
+        assert_eq!(null(input), expected);
+    }
+
+    #[rstest]
+    #[case::not_eol(b"test", Err(Error(NomError::new(b!("test"), Tag))), None)]
+    #[case::incomplete_eol(b"\r\n", Err(Incomplete(Needed::new(1))), None)]
+    #[case::incomplete_eol(b"\n", Err(Incomplete(Needed::new(1))), None)]
+    #[case::incomplete_eol(b"\r\n\t", Err(Error(NomError::new(b!("\t"), Not))), None)]
+    #[case::complete_cr(b"\ra", Err(Error(NomError::new(b!("\ra"), Tag))), Some(Ok((b!("a"), (b!("\r"), 0)))))]
+    #[case::incomplete_crcr(b"\r\r", Err(Error(NomError::new(b!("\r\r"), Tag))), Some(Ok((b!("\r"), (b!("\r"), 0)))))]
+    #[case::incomplete_lfcr(b"\n\r", Ok((b!("\r"), (b!("\n"), 0))), Some(Err(Incomplete(Needed::new(1)))))]
+    #[case::complete_lfcr(b"\n\ra", Ok((b!("\ra"), (b!("\n"), 0))), Some(Ok((b!("a"), (b!("\n\r"), 0)))))]
+    #[case::lfcrlf(b"\n\r\n", Ok((b!("\r\n"), (b!("\n"), 0))), Some(Ok((b!("\n"), (b!("\n\r"), 0)))))]
+    #[case::lfcrlfcr(b"\n\r\n\r", Ok((b!("\r\n\r"), (b!("\n"), 0))), Some(Ok((b!("\n\r"), (b!("\n\r"), 0)))))]
+    #[case::complete_lf(b"\na", Ok((b!("a"), (b!("\n"), 0))), None)]
+    #[case::complete_lfcrcrlf(b"\n\r\r\na", Ok((b!("\r\na"), (b!("\n\r"), HeaderFlags::DEFORMED_EOL))), Some(Ok((b!("\r\na"), (b!("\n\r"), 0)))))]
+    #[case::complete_crlfcrlf(b"\r\n\r\na", Ok((b!("\r\na"), (b!("\r\n"), 0))), None)]
+    #[case::incomplete_crlf(b"\r\n", Err(Incomplete(Needed::new(1))), None)]
+    #[case::incomplete_lf(b"\n", Err(Incomplete(Needed::new(1))), None)]
+    #[case::lfcrcrlf(b"\n\r\r\n", Ok((b!("\r\n"), (b!("\n\r"), HeaderFlags::DEFORMED_EOL))), Some(Ok((b!("\r\n"), (b!("\n\r"), 0)))))]
+    #[case::crlfcrlf(b"\r\n\r\n", Ok((b!("\r\n"), (b!("\r\n"), 0))), None)]
+    #[case::null(b"\0a", Err(Error(NomError::new(b!("\0a"), Tag))), None)]
+    fn test_eol(
+        #[case] input: &[u8], #[case] expected: IResult<&[u8], ParsedBytes>,
+        #[case] diff_res_expected: Option<IResult<&[u8], ParsedBytes>>,
+    ) {
+        let req_parser = Parser::new(Side::Request);
+        assert_eq!(req_parser.eol()(input), expected);
+
+        let res_parser = Parser::new(Side::Response);
+        if let Some(res_expected) = diff_res_expected {
+            assert_eq!(res_parser.eol()(input), res_expected);
+        } else {
+            assert_eq!(res_parser.eol()(input), expected);
+        }
+    }
+
+    #[rstest]
+    #[case::not_eol(b"test", Err(Error(NomError::new(b!("test"), Tag))), None)]
+    #[case::incomplete_eol(b"\r\n", Err(Incomplete(Needed::new(1))), None)]
+    #[case::incomplete_eol(b"\n", Err(Incomplete(Needed::new(1))), None)]
+    #[case::incomplete_eol(b"\r\n\t", Err(Error(NomError::new(b!("\t"), Not))), None)]
+    #[case::complete_cr(b"\ra", Err(Error(NomError::new(b!("\ra"), Tag))), Some(Ok((b!("a"), (b!("\r"), 0)))))]
+    #[case::incomplete_crcr(b"\r\r", Err(Error(NomError::new(b!("\r\r"), Tag))), Some(Ok((b!("\r"), (b!("\r"), 0)))))]
+    #[case::incomplete_lfcr(b"\n\r", Ok((b!("\r"), (b!("\n"), 0))), Some(Err(Incomplete(Needed::new(1)))))]
+    #[case::complete_lfcr(b"\n\ra", Ok((b!("\ra"), (b!("\n"), 0))), Some(Ok((b!("a"), (b!("\n\r"), 0)))))]
+    #[case::lfcrlf(b"\n\r\n", Ok((b!("\r\n"), (b!("\n"), 0))), Some(Ok((b!("\n"), (b!("\n\r"), 0)))))]
+    #[case::lfcrlfcr(b"\n\r\n\r", Ok((b!("\r\n\r"), (b!("\n"), 0))), Some(Ok((b!("\n\r"), (b!("\n\r"), 0)))))]
+    #[case::complete_lf(b"\na", Ok((b!("a"), (b!("\n"), 0))), None)]
+    #[case::complete_lfcrcrlf(b"\n\r\r\na", Ok((b!("\r\na"), (b!("\n\r"), HeaderFlags::DEFORMED_EOL))), Some(Ok((b!("\r\na"), (b!("\n\r"), 0)))))]
+    #[case::complete_crlfcrlf(b"\r\n\r\na", Ok((b!("\r\na"), (b!("\r\n"), 0))), None)]
+    #[case::incomplete_crlf(b"\r\n", Err(Incomplete(Needed::new(1))), None)]
+    #[case::incomplete_lf(b"\n", Err(Incomplete(Needed::new(1))), None)]
+    #[case::lfcrcrlf(b"\n\r\r\n", Ok((b!("\r\n"), (b!("\n\r"), HeaderFlags::DEFORMED_EOL))), Some(Ok((b!("\r\n"), (b!("\n\r"), 0)))))]
+    #[case::crlfcrlf(b"\r\n\r\n", Ok((b!("\r\n"), (b!("\r\n"), 0))), None)]
+    #[case::null(b"\0a", Ok((b!("a"), (b!("\0"), HeaderFlags::NULL_TERMINATED))), None)]
+    fn test_null_or_eol(
+        #[case] input: &[u8], #[case] expected: IResult<&[u8], ParsedBytes>,
+        #[case] diff_res_expected: Option<IResult<&[u8], ParsedBytes>>,
+    ) {
+        let req_parser = Parser::new(Side::Request);
+        assert_eq!(req_parser.null_or_eol()(input), expected);
+
+        let res_parser = Parser::new(Side::Response);
+        if let Some(res_expected) = diff_res_expected {
+            assert_eq!(res_parser.null_or_eol()(input), res_expected);
+        } else {
+            assert_eq!(res_parser.null_or_eol()(input), expected);
+        }
+    }
+
+    #[rstest]
+    #[case::no_fold_tag(b"test", Err(Error(NomError::new(b!("test"), Tag))), None)]
+    #[case::cr(b"\r", Err(Error(NomError::new(b!("\r"), Tag))), Some(Err(Incomplete(Needed::new(1)))))]
+    #[case::crcr(b"\r\r",  Err(Error(NomError::new(b!("\r\r"), Tag))), Some(Err(Error(NomError::new(b!("\r"), Tag)))))]
+    #[case::incomplete_crlf(b"\r\n", Err(Incomplete(Needed::new(1))), None)]
+    #[case::incomplete_crlf_ws(b"\r\n\t", Ok((b!(""), (b!("\r\n"), b!("\t"), HeaderFlags::FOLDING))), None)]
+    #[case::incomplete_crlf_ws(b"\r\n \t", Ok((b!("\t"), (b!("\r\n"), b!(" "), HeaderFlags::FOLDING))), None)]
+    #[case::incomplete_crlfcr(b"\r\n\r", Err(Error(NomError::new(b!("\r"), Tag))), None)]
+    #[case::not_fold_1(b"\r\n\r\n", Err(Error(NomError::new(b!("\r\n"), Tag))), None)]
+    #[case::not_fold_2(b"\r\n\r\r", Err(Error(NomError::new(b!("\r\r"), Tag))), None)]
+    #[case::fold(b"\r\n next", Ok((b!("next"), (b!("\r\n"), b!(" "), HeaderFlags::FOLDING))), None)]
+    #[case::fold(b"\r\n\tnext", Ok((b!("next"), (b!("\r\n"), b!("\t"), HeaderFlags::FOLDING))), None)]
+    #[case::fold(b"\r\n\t next", Ok((b!(" next"), (b!("\r\n"), b!("\t"), HeaderFlags::FOLDING))), None)]
+    #[case::fold_not_res(b"\r\n\t\t\r\n", Ok((b!("\t\r\n"), (b!("\r\n"), b!("\t"), HeaderFlags::FOLDING))), None)]
+    #[case::fold_not_res(b"\r\n\t \t\r", Ok((b!(" \t\r"), (b!("\r\n"), b!("\t"), HeaderFlags::FOLDING))), None)]
+    #[case::fold_not_res(b"\r\n     \n", Ok((b!("    \n"), (b!("\r\n"), b!(" "), HeaderFlags::FOLDING))), None)]
+    #[case::special_fold_not_res(b"\n\r     \n", Err(Error(NomError::new(b!("\r     \n"), Tag))), Some( Ok((b!("    \n"), (b!("\n\r"), b!(" "), HeaderFlags::FOLDING)))))]
+    #[case::special_fold_1(b"\r\n\rnext", Err(Error(NomError::new(b!("\rnext"), Tag))), None)]
+    #[case::special_fold_2(b"\r\n\r\t next", Err(Error(NomError::new(b!("\r\t next"), Tag))), None)]
+    #[case::fold_res(b"\r    hello \n", Err(Error(NomError::new(b!("\r    hello \n"), Tag))), Some(Ok((b!("   hello \n"), (b!("\r"), b!(" "), HeaderFlags::FOLDING)))))]
+    fn test_folding(
+        #[case] input: &[u8], #[case] expected: IResult<&[u8], FoldingBytes>,
+        #[case] diff_res_expected: Option<IResult<&[u8], FoldingBytes>>,
+    ) {
+        let req_parser = Parser::new(Side::Request);
+        assert_eq!(req_parser.folding()(input), expected);
+
+        let res_parser = Parser::new(Side::Response);
+        if let Some(res_expected) = diff_res_expected {
+            assert_eq!(res_parser.folding()(input), res_expected);
+        } else {
+            assert_eq!(res_parser.folding()(input), expected);
+        }
+    }
+
+    #[rstest]
+    #[case::incomplete_1(b"\r\n", Err(Incomplete(Needed::new(1))), None)]
+    #[case::incomplete_2(b"\r\n\t", Ok((b!(""), ((b!("\r\n"), HeaderFlags::FOLDING), Some(b!("\t"))))), None)]
+    #[case::incomplete_3(b"\r\n ", Ok((b!(""), ((b!("\r\n"), HeaderFlags::FOLDING), Some(b!(" "))))), None)]
+    #[case::incomplete_4(b"\r\n\r", Ok((b!("\r"),((b!("\r\n"), 0), None))), Some(Err(Incomplete(Needed::new(1)))))]
+    #[case::crcr(b"\r\r", Err(Error(NomError::new(b!("\r\r"), Tag))), Some(Ok((b!("\r"), ((b!("\r"), 0), None)))))]
+    #[case::fold(b"\r\n\ta", Ok((b!("a"), ((b!("\r\n"), HeaderFlags::FOLDING), Some(b!("\t"))))), None)]
+    #[case::special_fold(b"\r\n\ra", Ok((b!("\ra"),((b!("\r\n"), 0), None))), None)]
+    #[case::fold(b"\r\n a", Ok((b!("a"), ((b!("\r\n"), HeaderFlags::FOLDING), Some(b!(" "))))), None)]
+    #[case::crlf_eol(b"\r\na", Ok((b!("a"), ((b!("\r\n"), 0), None))), None)]
+    #[case::lflf_eol(b"\n\na", Ok((b!("\na"), ((b!("\n"), 0), None))), None)]
+    #[case::crlfcrlf_eol(b"\r\n\r\na", Ok((b!("\r\na"), ((b!("\r\n"), 0), None))), None)]
+    #[case::req_deformed_eol(b"\n\r\r\na", Ok((b!("\r\na"), ((b!("\n\r"), HeaderFlags::DEFORMED_EOL), None))), Some(Ok((b!("\r\na"), ((b!("\n\r"), 0), None)))))]
+    #[case::null_terminated(b"\0a", Ok((b!("a"), ((b!("\0"), HeaderFlags::NULL_TERMINATED), None))), None)]
+    #[case::res_fold(b"\r a", Err(Error(NomError::new(b!("\r a"), Tag))), Some(Ok((b!("a"), ((b!("\r"), HeaderFlags::FOLDING), Some(b!(" ")))))))]
+    #[case::multi_space_line(b"\n  \r\n\n", Ok((b!(" \r\n\n"), ((b!("\n"), HeaderFlags::FOLDING), Some(b!(" "))))), None)]
+    fn test_folding_or_terminator(
+        #[case] input: &[u8], #[case] expected: IResult<&[u8], FoldingOrTerminator>,
+        #[case] diff_res_expected: Option<IResult<&[u8], FoldingOrTerminator>>,
+    ) {
+        let req_parser = Parser::new(Side::Request);
+        assert_eq!(req_parser.folding_or_terminator()(input), expected);
+
+        let res_parser = Parser::new(Side::Response);
+        if let Some(res_expected) = diff_res_expected {
+            assert_eq!(res_parser.folding_or_terminator()(input), res_expected);
+        } else {
+            assert_eq!(res_parser.folding_or_terminator()(input), expected);
+        }
+    }
+
+    #[rstest]
+    #[case::incomplete_1(b" ", Err(Incomplete(Needed::new(1))), None)]
+    #[case::incomplete_2(b"value", Err(Incomplete(Needed::new(1))), None)]
+    #[case::incomplete_3(b"\tvalue", Err(Incomplete(Needed::new(1))), None)]
+    #[case::incomplete_4(b" value", Err(Incomplete(Needed::new(1))), None)]
+    #[case::incomplete_5(b"value\r\n", Err(Incomplete(Needed::new(1))), None)]
+    #[case::incomplete_6(b"\r\r", Err(Incomplete(Needed::new(1))), Some(Ok((b!("\r"), (b!(""), ((b!("\r"), 0), None))))))]
+    #[case::diff_values_1(b"www.google.com\rName: Value\r\n\r\n", Ok((b!("\r\n"), (b!("www.google.com\rName: Value"), ((b!("\r\n"), 0), None)))), Some(Ok((b!("Name: Value\r\n\r\n"), (b!("www.google.com"), ((b!("\r"), 0), None))))))]
+    #[case::diff_values_2(b"www.google.com\rName: Value\n\r\n", Ok((b!("\r\n"), (b!("www.google.com\rName: Value"), ((b!("\n"), 0), None)))), Some(Ok((b!("Name: Value\n\r\n"), (b!("www.google.com"), ((b!("\r"), 0), None))))))]
+    #[case::diff_values_3(b"www.google.com\rName: Value\r\n\n", Ok((b!("\n"), (b!("www.google.com\rName: Value"), ((b!("\r\n"), 0), None)))), Some(Ok((b!("Name: Value\r\n\n"), (b!("www.google.com"), ((b!("\r"), 0), None))))))]
+    #[case::value_1(b"\r\nnext", Ok((b!("next"), (b!(""), ((b!("\r\n"), 0), None)))), None)]
+    #[case::value_2(b"value\r\nname2", Ok((b!("name2"), (b!("value"), ((b!("\r\n"), 0), None)))), None)]
+    #[case::fold_value_1(b"value\n more", Ok((b!("more"), (b!("value"), ((b!("\n"), HeaderFlags::FOLDING), Some(b!(" ")))))), None)]
+    #[case::fold_value_2(b"value\r\n\t more", Ok((b!(" more"), (b!("value"), ((b!("\r\n"), HeaderFlags::FOLDING), Some(b!("\t")))))), None)]
+    #[case::req_special_fold_res_value_1(b"value\r\n\t more", Ok((b!(" more"), (b!("value"), ((b!("\r\n"), HeaderFlags::FOLDING), Some(b!("\t")))))), None)]
+    #[case::req_special_fold_res_value_2(b"value\n\rmore", Ok((b!("\rmore"), (b!("value"), ((b!("\n"), 0), None)))), Some(Ok((b!("more"), (b!("value"), ((b!("\n\r"), 0), None))))))]
+    #[case::special_fold(b"value\r\n\rmore", Ok((b!("\rmore"), (b!("value"), ((b!("\r\n"), 0), None)))), None)]
+    fn test_value_bytes(
+        #[case] input: &[u8], #[case] expected: IResult<&[u8], ValueBytes>,
+        #[case] diff_res_expected: Option<IResult<&[u8], ValueBytes>>,
+    ) {
+        let req_parser = Parser::new(Side::Request);
+        assert_eq!(req_parser.value_bytes()(input), expected);
+
+        let res_parser = Parser::new(Side::Response);
+        if let Some(res_expected) = diff_res_expected {
+            assert_eq!(res_parser.value_bytes()(input), res_expected);
+        } else {
+            assert_eq!(res_parser.value_bytes()(input), expected);
+        }
+    }
+
+    #[rstest]
+    #[case::incomplete(b"value\r\n more\r\n", Err(Incomplete(Needed::new(1))), None)]
+    #[case::incomplete(b"value\r\n ", Err(Incomplete(Needed::new(1))), None)]
+    #[case::incomplete(b"value\r\n more", Err(Incomplete(Needed::new(1))), None)]
+    #[case::incomplete(b"value\r\n more\n", Err(Incomplete(Needed::new(1))), None)]
+    #[case::incomplete(b"value\n more\r\n", Err(Incomplete(Needed::new(1))), None)]
+    #[case::fold(b"\r\n value    \r\nnext:", Ok((b!("next:"), Value {value: b"value".to_vec(), flags: HeaderFlags::FOLDING})), None)]
+    #[case::fold(b"\r\n value\r\nnext:", Ok((b!("next:"), Value {value: b"value".to_vec(), flags: HeaderFlags::FOLDING})), None)]
+    #[case::fold(b"value\r\n more\r\n\r\n", Ok((b!("\r\n"), Value {value: b"value more".to_vec(), flags: HeaderFlags::FOLDING})), None)]
+    #[case::fold(b"value\r\n more\r\n\tand more\r\nnext:", Ok((b!("next:"), Value {value: b"value more\tand more".to_vec(), flags: HeaderFlags::FOLDING})), None)]
+    #[case::fold(b"value\n\t\tmore\r\n  and\r\n more\r\nnext:", Ok((b!("next:"), Value {value: b"value\t\tmore  and more".to_vec(), flags: HeaderFlags::FOLDING})), None)]
+    #[case::req_special_res_fold_1(b"value\n more\n\r\tand more\r\n\r\n", Ok((b!("\r\tand more\r\n\r\n"), Value {value: b"value more".to_vec(), flags: HeaderFlags::FOLDING})), Some(Ok((b!("\r\n"), Value {value: b"value more\tand more".to_vec(), flags: HeaderFlags::FOLDING}))))]
+    #[case::req_special_res_fold_2(b"value\n\r\t\tmore\r\n  and\r\n more\r\nnext:", Ok((b!("\r\t\tmore\r\n  and\r\n more\r\nnext:"), Value {value: b"value".to_vec(), flags: 0})), Some(Ok((b!("next:"), Value {value: b"value\t\tmore  and more".to_vec(), flags: HeaderFlags::FOLDING}))))]
+    #[case::req_special_res_value(b"value\n\r\t\tmore\r\n  and\r\n more\r\nnext:", Ok((b!("\r\t\tmore\r\n  and\r\n more\r\nnext:"), Value {value: b"value".to_vec(), flags: 0})), Some(Ok((b!("next:"), Value {value: b"value\t\tmore  and more".to_vec(), flags: HeaderFlags::FOLDING}))))]
+    #[case::req_special_deformed_res_fold(b"value1\n\r next: value2\r\n  and\r\n more\r\nnext3:", Ok((b!("\r next: value2\r\n  and\r\n more\r\nnext3:"), Value {value: b"value1".to_vec(), flags: 0})), Some(Ok((b!("next: value2\r\n  and\r\n more\r\nnext3:"), Value {value: b"value1".to_vec(), flags: 0}))))]
+    #[case::value(b"value\r\nnext:", Ok((b!("next:"), Value {value: b"value".to_vec(), flags: 0})), None)]
+    #[case::value_empty(b"\r\nnext:", Ok((b!("next:"), Value {value: b"".to_vec(), flags: HeaderFlags::VALUE_EMPTY})), None)]
+    #[case::value_wrapping_with_colon(b"b\r\n c: d\r\nAAA", Ok((b!("AAA"), Value {value: b"b c: d".to_vec(), flags: HeaderFlags::FOLDING})), Some(Ok((b!("c: d\r\nAAA"), Value {value: b"b".to_vec(), flags: 0}))))]
+    #[case::value_wrapping_with_colon_no_tokens(b"b\r\n : d\r\nAAA", Ok((b!("AAA"), Value {value: b"b : d".to_vec(), flags: HeaderFlags::FOLDING})), Some(Ok((b!("AAA"), Value {value: b"b : d".to_vec(), flags: HeaderFlags::FOLDING}))))]
+    fn test_value(
+        #[case] input: &[u8], #[case] expected: IResult<&[u8], Value>,
+        #[case] diff_res_expected: Option<IResult<&[u8], Value>>,
+    ) {
+        let req_parser = Parser::new(Side::Request);
+        assert_eq!(req_parser.value()(input), expected);
+
+        let res_parser = Parser::new(Side::Response);
+        if let Some(res_expected) = diff_res_expected {
+            assert_eq!(res_parser.value()(input), res_expected);
+        } else {
+            assert_eq!(res_parser.value()(input), expected);
+        }
+    }
+}
diff --git a/rust/htp/src/hook.rs b/rust/htp/src/hook.rs
new file mode 100644 (file)
index 0000000..29d02c0
--- /dev/null
@@ -0,0 +1,120 @@
+use crate::{
+    connection_parser::{ConnectionParser, ParserData},
+    error::Result,
+    transaction::{Data, Transaction},
+    HtpStatus,
+};
+
+/// External (C) callback function prototype
+pub(crate) type TxExternalCallbackFn =
+    unsafe extern "C" fn(connp: *const ConnectionParser, tx: *mut Transaction) -> HtpStatus;
+
+/// Native (rust) callback function prototype
+pub(crate) type TxNativeCallbackFn = fn(tx: &mut Transaction) -> Result<()>;
+
+/// Hook for Transaction
+pub(crate) type TxHook = Hook<TxExternalCallbackFn, TxNativeCallbackFn>;
+
+/// External (C) callback function prototype
+pub(crate) type DataExternalCallbackFn =
+    unsafe extern "C" fn(connp: *const ConnectionParser, data: *mut Data) -> HtpStatus;
+
+/// Native (rust) callback function prototype
+pub(crate) type DataNativeCallbackFn = fn(&mut Transaction, data: &ParserData) -> Result<()>;
+
+/// Hook for Data
+pub(crate) type DataHook = Hook<DataExternalCallbackFn, DataNativeCallbackFn>;
+
+/// Callback list
+#[derive(Clone)]
+pub struct Hook<E, N> {
+    /// List of all callbacks.
+    pub(crate) callbacks: Vec<Callback<E, N>>,
+}
+
+impl<E, N> Default for Hook<E, N> {
+    /// Create a new callback list
+    fn default() -> Self {
+        Hook {
+            callbacks: Vec::new(),
+        }
+    }
+}
+impl<E, N> Hook<E, N> {
+    /// Register a native (rust) callback function
+    #[cfg(test)]
+    pub(crate) fn register(&mut self, cbk_fn: N) {
+        self.callbacks.push(Callback::Native(cbk_fn))
+    }
+
+    /// Register an external (C) callback function
+    pub(crate) fn register_extern(&mut self, cbk_fn: E) {
+        self.callbacks.push(Callback::External(cbk_fn))
+    }
+}
+
+impl TxHook {
+    /// Run all callbacks on the list
+    ///
+    /// This function will exit early if a callback fails to return HtpStatus::OK
+    /// or HtpStatus::DECLINED.
+    pub(crate) fn run_all(&self, connp: &mut ConnectionParser, tx_index: usize) -> Result<()> {
+        let connp_ptr: *mut ConnectionParser = connp as *mut ConnectionParser;
+        if let Some(tx) = connp.tx_mut(tx_index) {
+            for cbk_fn in &self.callbacks {
+                match cbk_fn {
+                    Callback::External(cbk_fn) => {
+                        let result = unsafe { cbk_fn(connp_ptr, tx) };
+                        if result != HtpStatus::OK && result != HtpStatus::DECLINED {
+                            return Err(result);
+                        }
+                    }
+                    Callback::Native(cbk_fn) => {
+                        if let Err(e) = cbk_fn(tx) {
+                            if e != HtpStatus::DECLINED {
+                                return Err(e);
+                            }
+                        }
+                    }
+                };
+            }
+        }
+        Ok(())
+    }
+}
+
+impl DataHook {
+    /// Run all callbacks on the list
+    ///
+    /// This function will exit early if a callback fails to return HtpStatus::OK
+    /// or HtpStatus::DECLINED.
+    pub(crate) fn run_all(&self, connp: &ConnectionParser, data: &mut Data) -> Result<()> {
+        for cbk_fn in &self.callbacks {
+            match cbk_fn {
+                Callback::External(cbk_fn) => {
+                    let result = unsafe { cbk_fn(connp, data) };
+                    if result != HtpStatus::OK && result != HtpStatus::DECLINED {
+                        return Err(result);
+                    }
+                }
+                Callback::Native(cbk_fn) => {
+                    if let Err(e) = cbk_fn(unsafe { &mut *data.tx() }, data.parser_data()) {
+                        if e != HtpStatus::DECLINED {
+                            return Err(e);
+                        }
+                    }
+                }
+            };
+        }
+        Ok(())
+    }
+}
+
+/// Type of callbacks
+#[derive(Copy, Clone)]
+pub enum Callback<E, N> {
+    /// External (C) callback function
+    External(E),
+    /// Native (rust) callback function
+    Native(N),
+}
diff --git a/rust/htp/src/lib.rs b/rust/htp/src/lib.rs
new file mode 100644 (file)
index 0000000..2bec55e
--- /dev/null
@@ -0,0 +1,93 @@
+//! Root crate for libhtp.
+
+#![deny(warnings)]
+#![deny(missing_docs)]
+#![deny(unused_lifetimes)]
+#![allow(non_camel_case_types)]
+#![allow(non_snake_case)]
+#![allow(non_upper_case_globals)]
+#[repr(C)]
+#[derive(PartialEq, Eq, Debug)]
+
+/// Status codes used by LibHTP internally.
+pub enum HtpStatus {
+    /// The lowest value LibHTP will use internally.
+    ERROR_RESERVED = -1000,
+    /// General-purpose error code.
+    ERROR = -1,
+    /// No processing or work was done. This is typically used by callbacks
+    /// to indicate that they were not interested in doing any work in the
+    /// given context.
+    DECLINED = 0,
+    /// Returned by a function when its work was successfully completed.
+    OK = 1,
+    ///  Returned when processing a connection stream, after consuming all
+    ///  provided data. The caller should call again with more data.
+    DATA = 2,
+    /// Returned when processing a connection stream, after encountering
+    /// a situation where processing needs to continue on the alternate
+    /// stream (e.g., the inbound parser needs to observe some outbound
+    /// data). The data provided was not completely consumed. On the next
+    /// invocation the caller should supply only the data that has not
+    /// been processed already. Use request_data_consumed() and response_data_consumed()
+    /// to determine how much of the most recent data chunk was consumed.
+    DATA_OTHER = 3,
+    /// Used by callbacks to indicate that the processing should stop. For example,
+    /// returning HtpStatus::STOP from a connection callback indicates that LibHTP should
+    /// stop following that particular connection.
+    STOP = 4,
+    /// Same as DATA, but indicates that any non-consumed part of the data chunk
+    /// should be preserved (buffered) for later.
+    DATA_BUFFER = 5,
+    /// The highest value LibHTP will use internally.
+    STATUS_RESERVED = 1000,
+}
+
+/// Module for providing logging functions.
+#[macro_use]
+pub mod log;
+/// Module for bstr functions.
+pub mod bstr;
+/// Module for all functions facing c_api.
+pub mod c_api;
+/// Module for all decompressors functions.
+pub mod decompressors;
+/// Module for all errors.
+pub mod error;
+/// Module for header parsing.
+mod headers;
+/// Module for hooks.
+pub mod hook;
+/// Module for providing unicode bestfit mappings.
+#[macro_use]
+mod unicode_bestfit_map;
+/// Module for libhtp configurations.
+pub mod config;
+/// Module for all connection.
+pub mod connection;
+/// Module for connection parser.
+pub mod connection_parser;
+/// Module for extra utility parsers. (only public for doc tests)
+pub mod parsers;
+/// Module for request parsing.
+pub mod request;
+/// Module for response parsing.
+pub mod response;
+/// Module for custom table.
+pub mod table;
+/// Module for transaction parsing.
+pub mod transaction;
+/// Module to track multiple transactions
+pub mod transactions;
+/// Module for uri parsing.
+pub mod uri;
+/// Module for url decoding.
+pub mod urlencoded;
+/// Module for utf8 decoding.
+mod utf8_decoder;
+/// Module for utility functions.
+pub mod util;
+
+/// Test harness
+#[cfg(test)]
+pub mod test;
diff --git a/rust/htp/src/log.rs b/rust/htp/src/log.rs
new file mode 100644 (file)
index 0000000..38ac21c
--- /dev/null
@@ -0,0 +1,293 @@
+use std::cell::RefCell;
+use std::collections::VecDeque;
+use std::rc::Rc;
+
+/// Different codes used for logging.
+#[repr(C)]
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+pub enum HtpLogCode {
+    /// Default
+    UNKNOWN = 0,
+    /// Gzip Decompression Failed
+    GZIP_DECOMPRESSION_FAILED,
+    /// Request field missing a colon.
+    REQUEST_FIELD_MISSING_COLON,
+    /// Response field missing a colon.
+    RESPONSE_FIELD_MISSING_COLON,
+    /// Request chunk length parsing failed.
+    INVALID_REQUEST_CHUNK_LEN,
+    /// Response chunked-length parsing failed.
+    INVALID_RESPONSE_CHUNK_LEN,
+    /// Response chunk exension.
+    REQUEST_CHUNK_EXTENSION,
+    /// Response chunk exension.
+    RESPONSE_CHUNK_EXTENSION,
+    /// Request has too many headers.
+    REQUEST_TOO_MANY_HEADERS,
+    /// Response has too many headers.
+    RESPONSE_TOO_MANY_HEADERS,
+    /// Request transfer-encoding invalid.
+    INVALID_TRANSFER_ENCODING_VALUE_IN_REQUEST,
+    /// Response transfer-encoding invalid.
+    INVALID_TRANSFER_ENCODING_VALUE_IN_RESPONSE,
+    /// Request content-length parsing failed.
+    INVALID_CONTENT_LENGTH_FIELD_IN_REQUEST,
+    /// Response content-length parsing failed.
+    INVALID_CONTENT_LENGTH_FIELD_IN_RESPONSE,
+    /// Request has a duplicate content-length field.
+    DUPLICATE_CONTENT_LENGTH_FIELD_IN_REQUEST,
+    /// Response has a duplicate content-length field.
+    DUPLICATE_CONTENT_LENGTH_FIELD_IN_RESPONSE,
+    /// 100 Continue response status already seen.
+    CONTINUE_ALREADY_SEEN,
+    /// Unable to match response to a request.
+    UNABLE_TO_MATCH_RESPONSE_TO_REQUEST,
+    /// Request server port is invalid.
+    INVALID_SERVER_PORT_IN_REQUEST,
+    /// Authority port is invalid.
+    INVALID_AUTHORITY_PORT,
+    /// Request header name is incorrectly formed.
+    REQUEST_HEADER_INVALID,
+    /// Response header name is incorrectly formed.
+    RESPONSE_HEADER_INVALID,
+    /// Host header is missing.
+    MISSING_HOST_HEADER,
+    /// Host header is ambiguous.
+    HOST_HEADER_AMBIGUOUS,
+    /// Request has invalid line folding.
+    INVALID_REQUEST_FIELD_FOLDING,
+    /// Response has invalid line folding.
+    INVALID_RESPONSE_FIELD_FOLDING,
+    /// Request buffer field is over the limit.
+    REQUEST_FIELD_TOO_LONG,
+    /// Response buffer field is over the limit.
+    RESPONSE_FIELD_TOO_LONG,
+    /// Mismatch between request server port and tcp port.
+    REQUEST_SERVER_PORT_TCP_PORT_MISMATCH,
+    /// Uri hostname is invalid.
+    URI_HOST_INVALID,
+    /// Header hostname is invalid.
+    HEADER_HOST_INVALID,
+    /// Non compliant delimiter between method and URI in request line.
+    METHOD_DELIM_NON_COMPLIANT,
+    /// Parsed request-uri contains a non compliant delimiter.
+    URI_DELIM_NON_COMPLIANT,
+    /// Request line has leading whitespace.
+    REQUEST_LINE_LEADING_WHITESPACE,
+    /// Response content encoding lzma layers is greater than limit.
+    RESPONSE_TOO_MANY_LZMA_LAYERS,
+    /// Request content encoding lzma layers is greater than limit.
+    REQUEST_TOO_MANY_LZMA_LAYERS,
+    /// Too many request or response encoding layers
+    TOO_MANY_ENCODING_LAYERS,
+    /// Response header content-encoding header is invalid
+    ABNORMAL_CE_HEADER,
+    /// Request authorization header unrecognized
+    AUTH_UNRECOGNIZED,
+    /// Request header has been seen more than once.
+    REQUEST_HEADER_REPETITION,
+    /// response header has been seen more than once.
+    RESPONSE_HEADER_REPETITION,
+    /// Response content-type is multipart-byteranges (unsupported).
+    RESPONSE_MULTIPART_BYTERANGES,
+    /// Response transfer-encoding has an abnormal chunked value.
+    RESPONSE_ABNORMAL_TRANSFER_ENCODING,
+    /// Response chunked transfer-encoding on HTTP/0.9 or HTTP/1.0.
+    RESPONSE_CHUNKED_OLD_PROTO,
+    /// Response protocol invalid.
+    RESPONSE_INVALID_PROTOCOL,
+    /// Response status invalid.
+    RESPONSE_INVALID_STATUS,
+    /// Response line is incomplete.
+    REQUEST_LINE_INCOMPLETE,
+    /// Request uri has double encoding.
+    DOUBLE_ENCODED_URI,
+    /// Request line is invalid.
+    REQUEST_LINE_INVALID,
+    /// Unexpected request body present.
+    REQUEST_BODY_UNEXPECTED,
+    /// Reached LZMA memory limit.
+    LZMA_MEMLIMIT_REACHED,
+    /// Reached configured time limit for decompression or reached bomb limit.
+    COMPRESSION_BOMB,
+    /// Unexpected response body present.
+    RESPONSE_BODY_UNEXPECTED,
+    /// Content-length parsing contains extra leading characters.
+    CONTENT_LENGTH_EXTRA_DATA_START,
+    /// Content-length parsing contains extra trailing characters
+    CONTENT_LENGTH_EXTRA_DATA_END,
+    /// 101 Switching Protocol seen with a content-length.
+    SWITCHING_PROTO_WITH_CONTENT_LENGTH,
+    /// End of line is deformed.
+    DEFORMED_EOL,
+    /// Parsing error encountered in request or response.
+    PARSER_STATE_ERROR,
+    /// Missing outbound transaction while state is not idle.
+    MISSING_OUTBOUND_TRANSACTION_DATA,
+    /// Missing inbound transaction while state is not idle.
+    MISSING_INBOUND_TRANSACTION_DATA,
+    /// Supplied data chunk has a length of zero.
+    ZERO_LENGTH_DATA_CHUNKS,
+    /// Request Line method is unknown.
+    REQUEST_LINE_UNKNOWN_METHOD,
+    /// Request line method is unknown and no protocol information was found.
+    REQUEST_LINE_UNKNOWN_METHOD_NO_PROTOCOL,
+    /// Request line method is unknown and protocol is invalid.
+    REQUEST_LINE_UNKNOWN_METHOD_INVALID_PROTOCOL,
+    /// Request line protocol information was not found.
+    REQUEST_LINE_NO_PROTOCOL,
+    /// Response line protocol is invalid.
+    RESPONSE_LINE_INVALID_PROTOCOL,
+    /// Response line status number is out of range.
+    RESPONSE_LINE_INVALID_RESPONSE_STATUS,
+    /// Response parsing progress is at an invalid state.
+    RESPONSE_BODY_INTERNAL_ERROR,
+    /// Request body data callback produced a error.
+    REQUEST_BODY_DATA_CALLBACK_ERROR,
+    /// Response header name is empty.
+    RESPONSE_INVALID_EMPTY_NAME,
+    /// Request header name is empty.
+    REQUEST_INVALID_EMPTY_NAME,
+    /// Response header name has extra whitespace after name.
+    RESPONSE_INVALID_LWS_AFTER_NAME,
+    /// Response header name is not a valid token.
+    RESPONSE_HEADER_NAME_NOT_TOKEN,
+    /// Request header name has extra whitespace after name.
+    REQUEST_INVALID_LWS_AFTER_NAME,
+    /// LZMA decompression is disabled.
+    LZMA_DECOMPRESSION_DISABLED,
+    /// Tried to open a connection that is already open.
+    CONNECTION_ALREADY_OPEN,
+    /// Protocol parsing detected leading or trailing data.
+    PROTOCOL_CONTAINS_EXTRA_DATA,
+    /// Invalid gap detected.
+    INVALID_GAP,
+    /// Compression bomb due to double lzma encoding.
+    COMPRESSION_BOMB_DOUBLE_LZMA,
+    /// Invalid content-encoding detected.
+    INVALID_CONTENT_ENCODING,
+    /// Error retrieving a log message's code
+    ERROR,
+}
+
+/// Enumerates all log levels.
+#[repr(C)]
+#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Debug)]
+pub(crate) enum HtpLogLevel {
+    /// Designates fatal error.
+    Error,
+    /// Designates hazardous situations.
+    Warning,
+    /// Default log level value.
+    Notice,
+    /// Designates useful information,
+    Info,
+}
+#[derive(Clone)]
+/// Logger struct
+pub(crate) struct Logger {
+    /// The sender half of a logging channel
+    pub(crate) sender: Rc<RefCell<VecDeque<Log>>>,
+    /// Log level used when deciding whether to store or
+    /// ignore the messages issued by the parser.
+    pub(crate) level: HtpLogLevel,
+}
+
+impl Logger {
+    /// Returns a new logger instance
+    pub(crate) fn new(sender: &Rc<RefCell<VecDeque<Log>>>) -> Logger {
+        Self {
+            sender: sender.clone(),
+            level: HtpLogLevel::Notice,
+        }
+    }
+    /// Logs a message to the logger channel.
+    pub(crate) fn log(
+        &mut self, _file: &str, _line: u32, level: HtpLogLevel, code: HtpLogCode, msg: String,
+    ) {
+        // Ignore messages below our log level.
+        if level <= self.level {
+            let mut sender = self.sender.borrow_mut();
+            sender.push_back(Log::new(Message::new(code, msg)));
+        }
+    }
+}
+
+#[derive(Clone)]
+/// Represents a single Message entry for a log
+pub(crate) struct Message {
+    /// Log message string.
+    pub(crate) msg: String,
+    //level: HtpLogLevel,
+    /// Message code.
+    pub(crate) code: HtpLogCode,
+    //pub(crate) file: String,
+    //line: u32,
+}
+
+impl Message {
+    /// Returns a new Message instance
+    pub(crate) fn new(code: HtpLogCode, msg: String) -> Message {
+        Self { code, msg }
+    }
+}
+
+/// Represents a single log entry.
+#[derive(Clone)]
+pub struct Log {
+    /// Log message.
+    pub(crate) msg: Message,
+}
+
+impl Log {
+    /// Returns a new Log instance.
+    pub(crate) fn new(msg: Message) -> Log {
+        Self { msg }
+    }
+}
+
+/// Logs a message at the given level.
+#[macro_export]
+macro_rules! htp_log {
+    ($logger:expr, $level:expr, $code:expr, $msg:expr) => {{
+        use $crate::log::{HtpLogCode, HtpLogLevel};
+        $logger.log(file!(), line!(), $level, $code, $msg.to_string());
+    }};
+}
+
+/// Logs a message at the info level.
+#[macro_export]
+macro_rules! htp_info {
+    ($logger:expr, $code:expr, $msg:expr) => {
+        htp_log!($logger, HtpLogLevel::Info, $code, $msg);
+    };
+}
+
+/// Logs a message at the warning level.
+#[macro_export]
+macro_rules! htp_warn {
+    ($logger:expr, $code:expr, $msg:expr) => {
+        htp_log!($logger, HtpLogLevel::Warning, $code, $msg);
+    };
+}
+
+/// Logs a message at the error level.
+#[macro_export]
+macro_rules! htp_error {
+    ($logger:expr, $code:expr, $msg:expr) => {
+        htp_log!($logger, HtpLogLevel::Error, $code, $msg);
+    };
+}
+
+/// Logs a message at the warning level, ensuring that it ones logs the message once.
+#[macro_export]
+macro_rules! htp_warn_once {
+    ($logger:expr, $code:expr, $msg:expr, $tx_flags:expr, $flags:expr, $flag:expr) => {
+        // Log only once per transaction.
+        if !$tx_flags.is_set($flag) {
+            htp_warn!($logger, $code, $msg);
+        }
+        $tx_flags.set($flag);
+        $flags.set($flag);
+    };
+}
diff --git a/rust/htp/src/parsers.rs b/rust/htp/src/parsers.rs
new file mode 100644 (file)
index 0000000..9ab1e49
--- /dev/null
@@ -0,0 +1,629 @@
+use crate::{
+    bstr::Bstr,
+    error::Result,
+    log::Logger,
+    transaction::{Header, HtpAuthType, HtpProtocol, HtpResponseNumber, Transaction},
+    util::{
+        ascii_digits, convert_port, hex_digits, take_ascii_whitespace, take_chunked_ctl_chars,
+        validate_hostname,
+    },
+    HtpStatus,
+};
+use base64::{engine::general_purpose::STANDARD, Engine};
+use nom::{
+    branch::alt,
+    bytes::complete::{is_not, tag, tag_no_case, take_till, take_until, take_while},
+    combinator::{map, not, opt, peek},
+    error::ErrorKind,
+    multi::many0,
+    sequence::tuple,
+    IResult,
+};
+
+/// Parses the content type header, trimming any leading whitespace.
+/// Finds the end of the MIME type, using the same approach PHP 5.4.3 uses.
+///
+/// Returns a tuple of the remaining unparsed header data and the content type
+fn content_type() -> impl Fn(&[u8]) -> IResult<&[u8], &[u8]> {
+    move |input| {
+        map(
+            tuple((
+                take_ascii_whitespace(),
+                take_till(|c| c == b';' || c == b',' || c == b' '),
+            )),
+            |(_, content_type)| content_type,
+        )(input)
+    }
+}
+
+/// Parses the content type header from the given header value, lowercases it, and stores it in the provided ct bstr.
+/// Finds the end of the MIME type, using the same approach PHP 5.4.3 uses.
+pub(crate) fn parse_content_type(header: &[u8]) -> Result<Bstr> {
+    let (_, content_type) = content_type()(header)?;
+    let mut ct = Bstr::from(content_type);
+    ct.make_ascii_lowercase();
+    Ok(ct)
+}
+
+/// Parses Content-Length string (positive decimal number). White space is
+/// allowed before and after the number.
+///
+/// Returns content length, or None if input is not valid.
+pub(crate) fn parse_content_length(input: &[u8], logger: Option<&mut Logger>) -> Option<u64> {
+    let (trailing_data, (leading_data, content_length)) = ascii_digits(input).ok()?;
+    if let Some(logger) = logger {
+        if !leading_data.is_empty() {
+            // Contains invalid characters! But still attempt to process
+            htp_warn!(
+                logger,
+                HtpLogCode::CONTENT_LENGTH_EXTRA_DATA_START,
+                "C-L value with extra data in the beginning"
+            );
+        }
+
+        if !trailing_data.is_empty() {
+            // Ok to have junk afterwards
+            htp_warn!(
+                logger,
+                HtpLogCode::CONTENT_LENGTH_EXTRA_DATA_END,
+                "C-L value with extra data in the end"
+            );
+        }
+    }
+    std::str::from_utf8(content_length)
+        .ok()?
+        .parse::<u64>()
+        .ok()
+}
+
+/// Parses chunked length (positive hexadecimal number). White space is allowed before
+/// and after the number.
+pub(crate) fn parse_chunked_length(input: &[u8]) -> Result<(Option<u64>, bool)> {
+    let (rest, _) = take_chunked_ctl_chars(input)?;
+    let (trailing_data, chunked_length) = hex_digits()(rest)?;
+    if trailing_data.is_empty() && chunked_length.is_empty() {
+        return Ok((None, false));
+    }
+    let chunked_len = u64::from_str_radix(
+        std::str::from_utf8(chunked_length).map_err(|_| HtpStatus::ERROR)?,
+        16,
+    )
+    .map_err(|_| HtpStatus::ERROR)?;
+    //TODO: remove this limit and update appropriate tests after differential fuzzing
+    if chunked_len > i32::MAX as u64 {
+        return Ok((None, false));
+    }
+    let has_ext = trailing_data.contains(&b';');
+    Ok((Some(chunked_len), has_ext))
+}
+
+/// Attempts to extract the scheme from a given input URI.
+///
+/// Returns a tuple of the unconsumed data and the matched scheme.
+pub(crate) fn scheme() -> impl Fn(&[u8]) -> IResult<&[u8], &[u8]> {
+    move |input| {
+        // Scheme test: if it doesn't start with a forward slash character (which it must
+        // for the contents to be a path or an authority), then it must be the scheme part
+        map(
+            tuple((peek(not(tag("/"))), take_until(":"), tag(":"))),
+            |(_, scheme, _)| scheme,
+        )(input)
+    }
+}
+
+/// Helper for parsed credentials (username, Option<password>)
+pub(crate) type ParsedCredentials<'a> = (&'a [u8], Option<&'a [u8]>);
+
+/// Attempts to extract the credentials from a given input URI, assuming the scheme has already been extracted.
+///
+/// Returns a tuple of the remaining unconsumed data and a tuple of the matched username and password.
+pub(crate) fn credentials() -> impl Fn(&[u8]) -> IResult<&[u8], ParsedCredentials> {
+    move |input| {
+        // Authority test: two forward slash characters and it's an authority.
+        // One, three or more slash characters, and it's a path.
+        // Note: we only attempt to parse authority if we've seen a scheme.
+        let (input, (_, _, credentials, _)) =
+            tuple((tag("//"), peek(not(tag("/"))), take_until("@"), tag("@")))(input)?;
+        let (password, username) = opt(tuple((take_until(":"), tag(":"))))(credentials)?;
+        if let Some((username, _)) = username {
+            Ok((input, (username, Some(password))))
+        } else {
+            Ok((input, (credentials, None)))
+        }
+    }
+}
+
+/// Attempts to extract an IPv6 hostname from a given input URI,
+/// assuming any scheme, credentials, hostname, port, and path have been already parsed out.
+///
+/// Returns a tuple of the remaining unconsumed data and the matched ipv6 hostname.
+pub(crate) fn ipv6() -> impl Fn(&[u8]) -> IResult<&[u8], &[u8]> {
+    move |input| -> IResult<&[u8], &[u8]> {
+        let (rest, _) = tuple((tag("["), is_not("/?#]"), opt(tag("]"))))(input)?;
+        Ok((rest, &input[..input.len() - rest.len()]))
+    }
+}
+
+/// Attempts to extract the hostname from a given input URI
+///
+/// Returns a tuple of the remaining unconsumed data and the matched hostname.
+pub(crate) fn hostname() -> impl Fn(&[u8]) -> IResult<&[u8], &[u8]> {
+    move |input| {
+        let (input, mut hostname) = map(
+            tuple((
+                opt(tag("//")), //If it starts with "//", skip (might have parsed a scheme and no creds)
+                peek(not(tag("/"))), //If it starts with '/', this is a path, not a hostname
+                many0(tag(" ")),
+                alt((ipv6(), is_not("/?#:"))),
+            )),
+            |(_, _, _, hostname)| hostname,
+        )(input)?;
+        //There may be spaces in the middle of a hostname, so much trim only at the end
+        while hostname.ends_with(b" ") {
+            hostname = &hostname[..hostname.len() - 1];
+        }
+        Ok((input, hostname))
+    }
+}
+
+/// Attempts to extract the port from a given input URI,
+/// assuming any scheme, credentials, or hostname have been already parsed out.
+///
+/// Returns a tuple of the remaining unconsumed data and the matched port.
+pub(crate) fn port() -> impl Fn(&[u8]) -> IResult<&[u8], &[u8]> {
+    move |input| {
+        // Must start with ":" for there to be a port to parse
+        let (input, (_, _, port, _)) =
+            tuple((tag(":"), many0(tag(" ")), is_not("/?#"), many0(tag(" "))))(input)?;
+        let (_, port) = is_not(" ")(port)?; //we assume there never will be a space in the middle of a port
+        Ok((input, port))
+    }
+}
+
+/// Attempts to extract the path from a given input URI,
+/// assuming any scheme, credentials, hostname, and port have been already parsed out.
+///
+/// Returns a tuple of the remaining unconsumed data and the matched path.
+pub(crate) fn path() -> impl Fn(&[u8]) -> IResult<&[u8], &[u8]> {
+    move |input| is_not("#?")(input)
+}
+
+/// Attempts to extract the query from a given input URI,
+/// assuming any scheme, credentials, hostname, port, and path have been already parsed out.
+///
+/// Returns a tuple of the remaining unconsumed data and the matched query.
+pub(crate) fn query() -> impl Fn(&[u8]) -> IResult<&[u8], &[u8]> {
+    move |input| {
+        // Skip the starting '?'
+        map(tuple((tag("?"), take_till(|c| c == b'#'))), |(_, query)| {
+            query
+        })(input)
+    }
+}
+
+/// Attempts to extract the fragment from a given input URI,
+/// assuming any other components have been parsed out.
+///
+/// Returns a tuple of the remaining unconsumed data and the matched fragment.
+pub(crate) fn fragment() -> impl Fn(&[u8]) -> IResult<&[u8], &[u8]> {
+    move |input| {
+        // Skip the starting '#'
+        let (input, _) = tag("#")(input)?;
+        Ok((b"", input))
+    }
+}
+
+type parsed_port<'a> = Option<(&'a [u8], Option<u16>)>;
+type parsed_hostport<'a> = (&'a [u8], parsed_port<'a>, bool);
+
+/// Parses an authority string, which consists of a hostname with an optional port number
+///
+/// Returns a remaining unparsed data, parsed hostname, parsed port, converted port number,
+/// and a flag indicating whether the parsed data is valid.
+pub(crate) fn parse_hostport(input: &[u8]) -> IResult<&[u8], parsed_hostport> {
+    let (input, host) = hostname()(input)?;
+    let mut valid = validate_hostname(host);
+    if let Ok((_, p)) = port()(input) {
+        if let Some(port) = convert_port(p) {
+            return Ok((input, (host, Some((p, Some(port))), valid)));
+        } else {
+            return Ok((input, (host, Some((p, None)), false)));
+        }
+    } else if !input.is_empty() {
+        //Trailing data after the hostname that is invalid e.g. [::1]xxxxx
+        valid = false;
+    }
+    Ok((input, (host, None, valid)))
+}
+
+/// Extracts the version protocol from the input slice.
+///
+/// Returns (any unparsed trailing data, (version_number, flag indicating whether input contains trailing and/or leading whitespace and/or leading zeros))
+fn protocol_version(input: &[u8]) -> IResult<&[u8], (&[u8], bool)> {
+    map(
+        tuple((
+            take_ascii_whitespace(),
+            tag_no_case("HTTP"),
+            take_ascii_whitespace(),
+            tag("/"),
+            take_while(|c: u8| c.is_ascii_whitespace() || c == b'0'),
+            alt((tag(".9"), tag("1.0"), tag("1.1"))),
+            take_ascii_whitespace(),
+        )),
+        |(_, _, leading, _, trailing, version, _)| {
+            (version, !leading.is_empty() || !trailing.is_empty())
+        },
+    )(input)
+}
+
+/// Determines protocol number from a textual representation (i.e., "HTTP/1.1"). This
+/// function tries to be flexible, allowing whitespace before and after the forward slash,
+/// as well as allowing leading zeros in the version number. If such leading/trailing
+/// characters are discovered, however, a warning will be logged.
+///
+/// Returns HtpProtocol version or invalid.
+pub(crate) fn parse_protocol(input: &[u8], logger: &mut Logger) -> HtpProtocol {
+    if let Ok((remaining, (version, contains_trailing))) = protocol_version(input) {
+        if !remaining.is_empty() {
+            return HtpProtocol::Invalid;
+        }
+        if contains_trailing {
+            htp_warn!(
+                    logger,
+                    HtpLogCode::PROTOCOL_CONTAINS_EXTRA_DATA,
+                    "HtpProtocol version contains leading and/or trailing whitespace and/or leading zeros"
+                );
+        }
+        match version {
+            b".9" => HtpProtocol::V0_9,
+            b"1.0" => HtpProtocol::V1_0,
+            b"1.1" => HtpProtocol::V1_1,
+            _ => HtpProtocol::Invalid,
+        }
+    } else {
+        HtpProtocol::Invalid
+    }
+}
+
+/// Determines the numerical value of a response status given as a string.
+pub(crate) fn parse_status(status: &[u8]) -> HtpResponseNumber {
+    if let Ok((trailing_data, (leading_data, status_code))) = ascii_digits(status) {
+        if !trailing_data.is_empty() || !leading_data.is_empty() {
+            //There are invalid characters in the status code
+            return HtpResponseNumber::Invalid;
+        }
+        if let Ok(status_code) = std::str::from_utf8(status_code) {
+            if let Ok(status_code) = status_code.parse::<u16>() {
+                if (100..=999).contains(&status_code) {
+                    return HtpResponseNumber::Valid(status_code);
+                }
+            }
+        }
+    }
+    HtpResponseNumber::Invalid
+}
+
+/// Parses Digest Authorization request header.
+fn parse_authorization_digest(auth_header_value: &[u8]) -> IResult<&[u8], Vec<u8>> {
+    // Extract the username
+    let (mut remaining_input, _) = tuple((
+        take_until("username="),
+        tag("username="),
+        take_ascii_whitespace(), // allow lws
+        tag("\""),               // First character after LWS must be a double quote
+    ))(auth_header_value)?;
+    let mut result = Vec::new();
+    // Unescape any escaped double quotes and find the closing quote
+    loop {
+        let (remaining, (auth_header, _)) = tuple((take_until("\""), tag("\"")))(remaining_input)?;
+        remaining_input = remaining;
+        result.extend_from_slice(auth_header);
+        if result.last() == Some(&(b'\\')) {
+            // Remove the escape and push back the double quote
+            result.pop();
+            result.push(b'\"');
+        } else {
+            // We found the closing double quote!
+            break;
+        }
+    }
+    Ok((remaining_input, result))
+}
+
+/// Parses Basic Authorization request header.
+fn parse_authorization_basic(request_tx: &mut Transaction, auth_header: &Header) -> Result<()> {
+    // Skip 'Basic<lws>'
+    let (remaining_input, _) =
+        tuple((tag_no_case("basic"), take_ascii_whitespace()))(auth_header.value.as_slice())
+            .map_err(|_| HtpStatus::DECLINED)?;
+    // Decode base64-encoded data
+    let decoded = STANDARD
+        .decode(remaining_input)
+        .map_err(|_| HtpStatus::DECLINED)?;
+    let (password, (username, _)) =
+        tuple::<_, _, (&[u8], ErrorKind), _>((take_until(":"), tag(":")))(decoded.as_slice())
+            .map_err(|_| HtpStatus::DECLINED)?;
+    request_tx.request_auth_username = Some(Bstr::from(username));
+    request_tx.request_auth_password = Some(Bstr::from(password));
+    Ok(())
+}
+
+/// Parses Authorization request header.
+pub(crate) fn parse_authorization(request_tx: &mut Transaction) -> Result<()> {
+    let auth_header = if let Some(auth_header) = request_tx
+        .request_headers
+        .get_nocase_nozero("authorization")
+    {
+        auth_header.clone()
+    } else {
+        request_tx.request_auth_type = HtpAuthType::NONE;
+        return Ok(());
+    };
+    // TODO Need a flag to raise when failing to parse authentication headers.
+    if auth_header.value.starts_with_nocase("basic") {
+        // Basic authentication
+        request_tx.request_auth_type = HtpAuthType::BASIC;
+        return parse_authorization_basic(request_tx, &auth_header);
+    } else if auth_header.value.starts_with_nocase("digest") {
+        // Digest authentication
+        request_tx.request_auth_type = HtpAuthType::DIGEST;
+        let (_, auth_username) = parse_authorization_digest(auth_header.value.as_slice())
+            .map_err(|_| HtpStatus::DECLINED)?;
+        if let Some(username) = &mut request_tx.request_auth_username {
+            username.clear();
+            username.add(auth_username);
+        } else {
+            request_tx.request_auth_username = Some(Bstr::from(auth_username));
+        }
+    } else if auth_header.value.starts_with_nocase("bearer") {
+        request_tx.request_auth_type = HtpAuthType::BEARER;
+        let (token, _) = tuple((
+            tag_no_case("bearer"),
+            take_ascii_whitespace(), // allow lws
+        ))(auth_header.value.as_slice())
+        .map_err(|_| HtpStatus::DECLINED)?;
+        request_tx.request_auth_token = Some(Bstr::from(token));
+    } else {
+        // Unrecognized authentication method
+        request_tx.request_auth_type = HtpAuthType::UNRECOGNIZED
+    }
+    Ok(())
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use rstest::rstest;
+
+    #[rstest]
+    #[case("   username=   \"ivan\\\"r\\\"\"", "ivan\"r\"", "")]
+    #[case("username=\"ivan\\\"r\\\"\"", "ivan\"r\"", "")]
+    #[case("username=\"ivan\\\"r\\\"\"   ", "ivan\"r\"", "   ")]
+    #[case("username=\"ivanr\"   ", "ivanr", "   ")]
+    #[case("username=   \"ivanr\"   ", "ivanr", "   ")]
+    #[should_panic]
+    #[case("username=ivanr\"   ", "", "")]
+    #[should_panic]
+    #[case("username=\"ivanr   ", "", "")]
+    fn test_parse_authorization_digest(
+        #[case] input: &str, #[case] username: &str, #[case] remaining: &str,
+    ) {
+        assert_eq!(
+            parse_authorization_digest(input.as_bytes()).unwrap(),
+            (remaining.as_bytes(), username.as_bytes().to_vec())
+        );
+    }
+
+    #[rstest]
+    #[case("   200    ", HtpResponseNumber::Valid(200))]
+    #[case("  \t 404    ", HtpResponseNumber::Valid(404))]
+    #[case("123", HtpResponseNumber::Valid(123))]
+    #[case("99", HtpResponseNumber::Invalid)]
+    #[case("1000", HtpResponseNumber::Invalid)]
+    #[case("200 OK", HtpResponseNumber::Invalid)]
+    #[case("NOT 200", HtpResponseNumber::Invalid)]
+    fn test_parse_status(#[case] input: &str, #[case] expected: HtpResponseNumber) {
+        assert_eq!(parse_status(&Bstr::from(input)), expected);
+    }
+
+    #[rstest]
+    #[case(
+        "http://user:pass@www.example.com:1234/path1/path2?a=b&c=d#frag",
+        "http",
+        "//user:pass@www.example.com:1234/path1/path2?a=b&c=d#frag"
+    )]
+    #[should_panic]
+    #[case(
+        "/http://user:pass@www.example.com:1234/path1/path2?a=b&c=d#frag",
+        "",
+        ""
+    )]
+    fn test_scheme(#[case] input: &str, #[case] s: &str, #[case] remaining: &str) {
+        assert_eq!(
+            scheme()(input.as_bytes()).unwrap(),
+            (remaining.as_bytes(), s.as_bytes())
+        );
+    }
+
+    #[rstest]
+    #[case(
+        "//user:pass@www.example.com:1234/path1/path2?a=b&c=d#frag",
+        "user",
+        Some("pass"),
+        "www.example.com:1234/path1/path2?a=b&c=d#frag"
+    )]
+    #[case(
+        "//user@www.example.com:1234/path1/path2?a=b&c=d#frag",
+        "user",
+        None,
+        "www.example.com:1234/path1/path2?a=b&c=d#frag"
+    )]
+    #[should_panic]
+    #[case(
+        "http://user:pass@www.example.com:1234/path1/path2?a=b&c=d#frag",
+        "",
+        None,
+        ""
+    )]
+    fn test_credentials(
+        #[case] input: &str, #[case] username: &str, #[case] password: Option<&str>,
+        #[case] remaining: &str,
+    ) {
+        assert_eq!(
+            credentials()(input.as_bytes()).unwrap(),
+            (
+                remaining.as_bytes(),
+                (username.as_bytes(), password.map(|i| i.as_bytes()))
+            )
+        );
+    }
+
+    #[rstest]
+    #[case(
+        "www.example.com:1234/path1/path2?a=b&c=d#frag",
+        "www.example.com",
+        ":1234/path1/path2?a=b&c=d#frag"
+    )]
+    #[case(
+        "www.example.com/path1/path2?a=b&c=d#frag",
+        "www.example.com",
+        "/path1/path2?a=b&c=d#frag"
+    )]
+    #[case("www.example.com?a=b&c=d#frag", "www.example.com", "?a=b&c=d#frag")]
+    #[case("www.example.com#frag", "www.example.com", "#frag")]
+    #[case("[::1]:8080", "[::1]", ":8080")]
+    #[case("[::1", "[::1", "")]
+    #[case("[::1/path1[0]", "[::1", "/path1[0]")]
+    #[case("[::1]xxxx", "[::1]", "xxxx")]
+    #[should_panic]
+    #[case("/www.example.com/path1/path2?a=b&c=d#frag", "", "")]
+    fn test_hostname(#[case] input: &str, #[case] host: &str, #[case] remaining: &str) {
+        assert_eq!(
+            hostname()(input.as_bytes()).unwrap(),
+            (remaining.as_bytes(), host.as_bytes())
+        );
+    }
+
+    #[rstest]
+    #[case(":1234/path1/path2?a=b&c=d#frag", "1234", "/path1/path2?a=b&c=d#frag")]
+    #[case(":1234?a=b&c=d#frag", "1234", "?a=b&c=d#frag")]
+    #[case(":1234#frag", "1234", "#frag")]
+    #[should_panic]
+    #[case("1234/path1/path2?a=b&c=d#frag", "", "")]
+    fn test_port(#[case] input: &str, #[case] p: &str, #[case] remaining: &str) {
+        assert_eq!(
+            port()(input.as_bytes()).unwrap(),
+            (remaining.as_bytes(), p.as_bytes())
+        );
+    }
+
+    #[rstest]
+    #[case("/path1/path2?a=b&c=d#frag", "/path1/path2", "?a=b&c=d#frag")]
+    #[case("/path1/path2#frag", "/path1/path2", "#frag")]
+    #[case("path1/path2?a=b&c=d#frag", "path1/path2", "?a=b&c=d#frag")]
+    #[case("//", "//", "")]
+    #[case(
+        "/uid=0(root) gid=0(root) groups=0(root)asdf",
+        "/uid=0(root) gid=0(root) groups=0(root)asdf",
+        ""
+    )]
+    fn test_path(#[case] input: &str, #[case] p: &str, #[case] remaining: &str) {
+        assert_eq!(
+            path()(input.as_bytes()).unwrap(),
+            (remaining.as_bytes(), p.as_bytes())
+        );
+    }
+
+    #[rstest]
+    #[case("?a=b&c=d#frag", "a=b&c=d", "#frag")]
+    #[case("?a=b&c=d", "a=b&c=d", "")]
+    #[case("?", "", "")]
+    fn test_query(#[case] input: &str, #[case] q: &str, #[case] remaining: &str) {
+        assert_eq!(
+            query()(input.as_bytes()).unwrap(),
+            (remaining.as_bytes(), q.as_bytes())
+        );
+    }
+
+    #[rstest]
+    #[case("#frag", "frag")]
+    #[case("##frag", "#frag")]
+    #[should_panic]
+    #[case("frag", "")]
+    #[should_panic]
+    #[case("/path#frag", "")]
+    fn test_fragment(#[case] input: &str, #[case] frag: &str) {
+        assert_eq!(
+            fragment()(input.as_bytes()).unwrap(),
+            ("".as_bytes(), frag.as_bytes())
+        );
+    }
+
+    #[rstest]
+    #[case("www.example.com", "www.example.com", None, true, "")]
+    #[case(" www.example.com ", "www.example.com", None, true, "")]
+    #[case(" www.example.com:8001 ", "www.example.com", Some(("8001", Some(8001))), true, ":8001 ")]
+    #[case(" www.example.com :  8001 ", "www.example.com", Some(("8001", Some(8001))), true, ":  8001 ")]
+    #[case("www.example.com.", "www.example.com.", None, true, "")]
+    #[case("www.example.com.", "www.example.com.", None, true, "")]
+    #[case("www.example.com:", "www.example.com", None, false, ":")]
+    #[case("www.example.com:ff", "www.example.com", Some(("ff", None)), false, ":ff")]
+    #[case("www.example.com:0", "www.example.com", Some(("0", None)), false, ":0")]
+    #[case("www.example.com:65536", "www.example.com", Some(("65536", None)), false, ":65536")]
+    #[case("[::1]:8080", "[::1]", Some(("8080", Some(8080))), true, ":8080")]
+    #[case("[::1]:", "[::1]", None, false, ":")]
+    #[case("[::1]x", "[::1]", None, false, "x")]
+    #[case("[::1", "[::1", None, false, "")]
+    fn test_parse_hostport(
+        #[case] input: &str, #[case] hostname: &str,
+        #[case] parsed_port: Option<(&str, Option<u16>)>, #[case] valid: bool,
+        #[case] remaining: &str,
+    ) {
+        assert_eq!(
+            parse_hostport(input.as_bytes()).unwrap(),
+            (
+                remaining.as_bytes(),
+                (
+                    hostname.as_bytes(),
+                    parsed_port.map(|(port, port_nmb)| (port.as_bytes(), port_nmb)),
+                    valid
+                )
+            )
+        );
+    }
+
+    #[rstest]
+    #[case("134", Some(134))]
+    #[case("    \t134    ", Some(134))]
+    #[case("abcd134    ", Some(134))]
+    #[case("abcd    ", None)]
+    fn test_parse_content_length(#[case] input: &str, #[case] expected: Option<u64>) {
+        assert_eq!(parse_content_length(input.as_bytes(), None), expected);
+    }
+
+    #[rstest]
+    #[case("0 ; qw3=asd3; zc3=\"rt\"y3\"", (Some(0), true))]
+    #[case("12a5", (Some(0x12a5), false))]
+    #[case("12a5;ext=value", (Some(0x12a5), true))]
+    #[case("    \t12a5    ", (Some(0x12a5), false))]
+    #[case("    \t    ", (None, false))]
+    fn test_parse_chunked_length(#[case] input: &str, #[case] expected: (Option<u64>, bool)) {
+        assert_eq!(parse_chunked_length(input.as_bytes()).unwrap(), expected);
+    }
+
+    #[rstest]
+    #[case("multipart/form-data", "multipart/form-data")]
+    #[case("multipart/form-data;boundary=X", "multipart/form-data")]
+    #[case("multipart/form-data boundary=X", "multipart/form-data")]
+    #[case("multipart/form-data,boundary=X", "multipart/form-data")]
+    #[case("multipart/FoRm-data", "multipart/form-data")]
+    #[case("multipart/form-data\t boundary=X", "multipart/form-data\t")]
+    #[case("   \tmultipart/form-data boundary=X", "multipart/form-data")]
+    #[case("", "")]
+    fn test_parse_content_type(#[case] input: &str, #[case] expected: &str) {
+        assert_eq!(
+            parse_content_type(input.as_bytes()).unwrap(),
+            Bstr::from(expected)
+        );
+    }
+}
diff --git a/rust/htp/src/request.rs b/rust/htp/src/request.rs
new file mode 100644 (file)
index 0000000..c26dfe7
--- /dev/null
@@ -0,0 +1,1624 @@
+use crate::{
+    bstr::Bstr,
+    config::{HtpServerPersonality, HtpUnwanted},
+    connection::ConnectionFlags,
+    connection_parser::{ConnectionParser, HtpStreamState, ParserData, State},
+    decompressors::{Decompressor, HtpContentEncoding},
+    error::Result,
+    headers::HeaderFlags,
+    hook::DataHook,
+    parsers::{parse_chunked_length, parse_content_length, parse_protocol},
+    transaction::{
+        Data, Header, HtpProtocol, HtpRequestProgress, HtpResponseProgress, HtpTransferCoding,
+    },
+    util::{
+        chomp, is_chunked_ctl_line, is_line_ignorable, is_space, is_valid_chunked_length_data,
+        split_on_predicate, take_is_space, take_not_is_space, take_till_lf, take_till_lf_null,
+        take_until_null, trimmed, FlagOperations, HtpFlags,
+    },
+    HtpStatus,
+};
+use nom::sequence::tuple;
+use std::{
+    cmp::{min, Ordering},
+    mem::take,
+};
+use time::OffsetDateTime;
+
+const HTTP09_MAX_JUNK_LEN: usize = 16;
+
+/// Enumerate HTTP methods.
+#[repr(C)]
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+pub enum HtpMethod {
+    /// Used by default, until the method is determined (e.g., before
+    /// the request line is processed.
+    Unknown,
+    /// HEAD
+    HEAD,
+    /// GET
+    GET,
+    /// PUT
+    PUT,
+    /// POST
+    POST,
+    /// DELETE
+    DELETE,
+    /// CONNECT
+    CONNECT,
+    /// OPTIONS
+    OPTIONS,
+    /// TRACE
+    TRACE,
+    /// PATCH
+    PATCH,
+    /// PROPFIND
+    PROPFIND,
+    /// PROPPATCH
+    PROPPATCH,
+    /// MKCOL
+    MKCOL,
+    /// COPY
+    COPY,
+    /// MOVE
+    MOVE,
+    /// LOCK
+    LOCK,
+    /// UNLOCK
+    UNLOCK,
+    /// VERSION_CONTROL
+    VERSION_CONTROL,
+    /// CHECKOUT
+    CHECKOUT,
+    /// UNCHECKOUT
+    UNCHECKOUT,
+    /// CHECKIN
+    CHECKIN,
+    /// UPDATE
+    UPDATE,
+    /// LABEL
+    LABEL,
+    /// REPORT
+    REPORT,
+    /// MKWORKSPACE
+    MKWORKSPACE,
+    /// MKACTIVITY
+    MKACTIVITY,
+    /// BASELINE_CONTROL
+    BASELINE_CONTROL,
+    /// MERGE
+    MERGE,
+    /// INVALID
+    Invalid,
+    /// ERROR
+    ERROR,
+}
+
+impl HtpMethod {
+    /// Creates a new HtpMethod from the slice.
+    fn new(method: &[u8]) -> Self {
+        match method {
+            b"GET" => HtpMethod::GET,
+            b"PUT" => HtpMethod::PUT,
+            b"POST" => HtpMethod::POST,
+            b"DELETE" => HtpMethod::DELETE,
+            b"CONNECT" => HtpMethod::CONNECT,
+            b"OPTIONS" => HtpMethod::OPTIONS,
+            b"TRACE" => HtpMethod::TRACE,
+            b"PATCH" => HtpMethod::PATCH,
+            b"PROPFIND" => HtpMethod::PROPFIND,
+            b"PROPPATCH" => HtpMethod::PROPPATCH,
+            b"MKCOL" => HtpMethod::MKCOL,
+            b"COPY" => HtpMethod::COPY,
+            b"MOVE" => HtpMethod::MOVE,
+            b"LOCK" => HtpMethod::LOCK,
+            b"UNLOCK" => HtpMethod::UNLOCK,
+            b"VERSION-CONTROL" => HtpMethod::VERSION_CONTROL,
+            b"CHECKOUT" => HtpMethod::CHECKOUT,
+            b"UNCHECKOUT" => HtpMethod::UNCHECKOUT,
+            b"CHECKIN" => HtpMethod::CHECKIN,
+            b"UPDATE" => HtpMethod::UPDATE,
+            b"LABEL" => HtpMethod::LABEL,
+            b"REPORT" => HtpMethod::REPORT,
+            b"MKWORKSPACE" => HtpMethod::MKWORKSPACE,
+            b"MKACTIVITY" => HtpMethod::MKACTIVITY,
+            b"BASELINE-CONTROL" => HtpMethod::BASELINE_CONTROL,
+            b"MERGE" => HtpMethod::MERGE,
+            b"INVALID" => HtpMethod::Invalid,
+            b"HEAD" => HtpMethod::HEAD,
+            _ => HtpMethod::Unknown,
+        }
+    }
+}
+impl ConnectionParser {
+    /// Sends outstanding connection data to the currently active data receiver hook.
+    fn request_receiver_send_data(&mut self, data: &mut ParserData) -> Result<()> {
+        let data = ParserData::from(data.callback_data());
+        let req = self.request_mut();
+        if req.is_none() {
+            return Err(HtpStatus::ERROR);
+        }
+        let mut tx_data = Data::new(req.unwrap(), &data);
+        if let Some(hook) = &self.request_data_receiver_hook {
+            hook.run_all(self, &mut tx_data)?;
+        } else {
+            return Ok(());
+        };
+        Ok(())
+    }
+
+    /// Configures the data receiver hook.
+    fn request_receiver_set(&mut self, data_receiver_hook: Option<DataHook>) -> Result<()> {
+        self.request_data_receiver_hook = data_receiver_hook;
+        Ok(())
+    }
+
+    /// Finalizes an existing data receiver hook by sending any outstanding data to it. The
+    /// hook is then removed so that it receives no more data.
+    pub(crate) fn request_receiver_finalize_clear(&mut self, input: &mut ParserData) -> Result<()> {
+        if self.request_data_receiver_hook.is_none() {
+            return Ok(());
+        }
+        let rc = self.request_receiver_send_data(input);
+        self.request_data_receiver_hook = None;
+        rc
+    }
+
+    /// Handles request parser state changes. At the moment, this function is used only
+    /// to configure data receivers, which are sent raw connection data.
+    fn request_handle_state_change(&mut self, input: &mut ParserData) -> Result<()> {
+        if self.request_state_previous == self.request_state {
+            return Ok(());
+        }
+
+        if self.request_state == State::Headers {
+            // ensured by caller
+            let req = self.request().unwrap();
+            let header_fn = Some(req.cfg.hook_request_header_data.clone());
+            let trailer_fn = Some(req.cfg.hook_request_trailer_data.clone());
+            input.reset_callback_start();
+
+            match req.request_progress {
+                HtpRequestProgress::HEADERS => self.request_receiver_set(header_fn),
+                HtpRequestProgress::TRAILER => self.request_receiver_set(trailer_fn),
+                _ => Ok(()),
+            }?;
+        }
+        // Initially, I had the finalization of raw data sending here, but that
+        // caused the last REQUEST_HEADER_DATA hook to be invoked after the
+        // REQUEST_HEADERS hook -- which I thought made no sense. For that reason,
+        // the finalization is now initiated from the request header processing code,
+        // which is less elegant but provides a better user experience. Having some
+        // (or all) hooks to be invoked on state change might work better.
+        self.request_state_previous = self.request_state;
+        Ok(())
+    }
+
+    /// If there is any data left in the inbound data chunk, this function will preserve
+    /// it for later consumption. The maximum amount accepted for buffering is controlled
+    /// by Config::field_limit.
+    fn check_request_buffer_limit(&mut self, len: usize) -> Result<()> {
+        if len == 0 {
+            return Ok(());
+        }
+        // Check the hard (buffering) limit.
+        let mut newlen: usize = self.request_buf.len().wrapping_add(len);
+        // When calculating the size of the buffer, take into account the
+        // space we're using for the request header buffer.
+        if let Some(header) = &self.request_header {
+            newlen = newlen.wrapping_add(header.len())
+        }
+        let field_limit = self.cfg.field_limit;
+        if newlen > field_limit {
+            htp_error!(
+                self.logger,
+                HtpLogCode::REQUEST_FIELD_TOO_LONG,
+                format!(
+                    "Request buffer over the limit: size {} limit {}.",
+                    newlen, field_limit
+                )
+            );
+            return Err(HtpStatus::ERROR);
+        }
+        Ok(())
+    }
+
+    /// Performs a check for a CONNECT transaction to decide whether inbound
+    /// parsing needs to be suspended.
+    ///
+    /// Returns OK if the request does not use CONNECT, or HtpStatus::DATA_OTHER if
+    /// inbound parsing needs to be suspended until we hear from the
+    /// other side.
+    pub(crate) fn request_connect_check(&mut self) -> Result<()> {
+        let req = self.request();
+        if req.is_none() {
+            return Err(HtpStatus::ERROR);
+        }
+
+        // If the request uses the CONNECT method, then there will
+        // not be a request body, but first we need to wait to see the
+        // response in order to determine if the tunneling request
+        // was a success.
+        if req.unwrap().request_method_number == HtpMethod::CONNECT {
+            self.request_state = State::ConnectWaitResponse;
+            self.request_status = HtpStreamState::DATA_OTHER;
+            return Err(HtpStatus::DATA_OTHER);
+        }
+        // Continue to the next step to determine
+        // the presence of request body
+        self.request_state = State::BodyDetermine;
+        Ok(())
+    }
+
+    /// Determines whether inbound parsing needs to continue or stop. In
+    /// case the data appears to be plain text HTTP, we try to continue.
+    ///
+    /// Returns OK if the parser can resume parsing, HtpStatus::DATA_BUFFER if
+    /// we need more data.
+    pub(crate) fn request_connect_probe_data(&mut self, input: &mut ParserData) -> Result<()> {
+        let data = if let Ok((_, data)) = take_till_lf_null(input.as_slice()) {
+            data
+        } else {
+            return self.handle_request_absent_lf(input);
+        };
+
+        if !self.request_buf.is_empty() {
+            self.check_request_buffer_limit(data.len())?;
+        }
+        // copy, will still need buffer data for next state.
+        let mut buffered = self.request_buf.clone();
+        buffered.add(data);
+
+        // The request method starts at the beginning of the
+        // line and ends with the first whitespace character.
+        // We skip leading whitespace as IIS allows this.
+        let res = tuple((take_is_space, take_not_is_space))(buffered.as_slice());
+        if let Ok((_, (_, method))) = res {
+            if HtpMethod::new(method) == HtpMethod::Unknown {
+                self.request_status = HtpStreamState::TUNNEL;
+                self.response_status = HtpStreamState::TUNNEL
+            } else {
+                return self.state_request_complete(input);
+            }
+        };
+        Ok(())
+    }
+
+    /// Determines whether inbound parsing, which was suspended after
+    /// encountering a CONNECT transaction, can proceed (after receiving
+    /// the response).
+    ///
+    /// Returns OK if the parser can resume parsing, HtpStatus::DATA_OTHER if
+    /// it needs to continue waiting.
+    pub(crate) fn request_connect_wait_response(&mut self) -> Result<()> {
+        let req = self.request();
+        if req.is_none() {
+            return Err(HtpStatus::ERROR);
+        }
+        let req = req.unwrap();
+
+        // Check that we saw the response line of the current inbound transaction.
+        if req.response_progress <= HtpResponseProgress::LINE {
+            return Err(HtpStatus::DATA_OTHER);
+        }
+        // A 2xx response means a tunnel was established. Anything
+        // else means we continue to follow the HTTP stream.
+        if req.response_status_number.in_range(200, 299) {
+            // TODO Check that the server did not accept a connection to itself.
+            // The requested tunnel was established: we are going
+            // to probe the remaining data on this stream to see
+            // if we need to ignore it or parse it
+            self.request_state = State::ConnectProbeData;
+        } else {
+            // No tunnel; continue to the next transaction
+            self.request_state = State::Finalize
+        }
+        Ok(())
+    }
+
+    /// Consumes bytes until the end of the current line.
+    ///
+    /// Returns OK on state change, ERROR on error, or HtpStatus::DATA_BUFFER
+    /// when more data is needed.
+    pub(crate) fn request_body_chunked_data_end(&mut self, input: &mut ParserData) -> Result<()> {
+        let req = self.request_mut();
+        if req.is_none() {
+            return Err(HtpStatus::ERROR);
+        }
+        let req = req.unwrap();
+
+        // TODO We shouldn't really see anything apart from CR and LF,
+        //      so we should warn about anything else.
+        if let Ok((_, line)) = take_till_lf(input.as_slice()) {
+            let len = line.len();
+            req.request_message_len = req.request_message_len.wrapping_add(len as u64);
+            self.request_data_consume(input, len);
+            self.request_state = State::BodyChunkedLength;
+            Ok(())
+        } else {
+            req.request_message_len = req.request_message_len.wrapping_add(input.len() as u64);
+            self.handle_request_absent_lf(input)
+        }
+    }
+
+    /// Processes a chunk of data.
+    ///
+    /// Returns OK on state change, ERROR on error, or HtpStatus::DATA_BUFFER
+    /// when more data is needed.
+    pub(crate) fn request_body_chunked_data(&mut self, input: &mut ParserData) -> Result<()> {
+        // Determine how many bytes we can consume.
+        let bytes_to_consume: usize = min(
+            input.len(),
+            self.request_chunked_length.unwrap_or(0) as usize,
+        );
+        // If the input buffer is empty, ask for more data.
+        if bytes_to_consume == 0 {
+            return Err(HtpStatus::DATA);
+        }
+        // Consume the data.
+        self.request_body_data(Some(&input.as_slice()[0..bytes_to_consume]))?;
+
+        // Adjust counters.
+        self.request_data_consume(input, bytes_to_consume);
+        if let Some(len) = self.request_chunked_length.as_mut() {
+            *len -= bytes_to_consume as u64;
+            if *len == 0 {
+                // End of the chunk.
+                self.request_state = State::BodyChunkedDataEnd;
+                return Ok(());
+            }
+        }
+        // Ask for more data.
+        Err(HtpStatus::DATA)
+    }
+
+    /// Extracts chunk length.
+    /// Returns OK on state change, ERROR on error, or HtpStatus::DATA_BUFFER
+    /// when more data is needed.
+    pub(crate) fn request_body_chunked_length(&mut self, input: &mut ParserData) -> Result<()> {
+        let mut data = input.as_slice();
+        loop {
+            if let Ok((remaining, line)) = take_till_lf(data) {
+                self.request_data_consume(input, line.len());
+                if !self.request_buf.is_empty() {
+                    self.check_request_buffer_limit(line.len())?;
+                }
+
+                let mut data2 = take(&mut self.request_buf);
+                data2.add(line);
+                if is_chunked_ctl_line(&data2) {
+                    let req = self.request_mut().unwrap();
+                    req.request_message_len =
+                        req.request_message_len.wrapping_add(data2.len() as u64);
+                    //Empty chunk len. Try to continue parsing.
+                    data = remaining;
+                    continue;
+                }
+                let req = self.request_mut().unwrap();
+                req.request_message_len = req.request_message_len.wrapping_add(data2.len() as u64);
+                // Handle chunk length.
+                let (len, ext) = parse_chunked_length(&data2)?;
+                self.request_chunked_length = len;
+                if ext {
+                    htp_warn!(
+                        self.logger,
+                        HtpLogCode::REQUEST_CHUNK_EXTENSION,
+                        "Request chunk extension"
+                    );
+                }
+                let len = len.as_ref().ok_or(HtpStatus::ERROR).map_err(|e| {
+                    // Invalid chunk length
+                    htp_error!(
+                        self.logger,
+                        HtpLogCode::INVALID_REQUEST_CHUNK_LEN,
+                        format!("Request chunk encoding: Invalid chunk length ({:?})", e)
+                    );
+                    e
+                })?;
+                match len.cmp(&0) {
+                    Ordering::Equal => {
+                        // End of data
+                        self.request_state = State::Headers;
+                        self.request_mut().unwrap().request_progress = HtpRequestProgress::TRAILER
+                    }
+                    Ordering::Greater => {
+                        // More data available.
+                        self.request_state = State::BodyChunkedData
+                    }
+                    _ => {}
+                }
+                return Ok(());
+            } else {
+                // Check if the data we have seen so far is invalid
+                return if !is_valid_chunked_length_data(data) {
+                    // Contains leading junk non hex_ascii data
+                    // Invalid chunk length
+                    htp_error!(
+                        self.logger,
+                        HtpLogCode::INVALID_REQUEST_CHUNK_LEN,
+                        "Request chunk encoding: Invalid chunk length"
+                    );
+                    Err(HtpStatus::ERROR)
+                } else {
+                    self.handle_request_absent_lf(input)
+                };
+            }
+        }
+    }
+
+    /// Processes identity request body.
+    ///
+    /// Returns OK on state change, ERROR on error, or HtpStatus::DATA_BUFFER
+    /// when more data is needed.
+    pub(crate) fn request_body_identity(&mut self, data: &mut ParserData) -> Result<()> {
+        let left = self.request_body_data_left.ok_or(HtpStatus::ERROR)?;
+        // Determine how many bytes we can consume.
+        let bytes_to_consume: usize = min(data.len(), left as usize);
+        // If the input buffer is empty, ask for more data.
+        if bytes_to_consume == 0 {
+            return Err(HtpStatus::DATA);
+        }
+        if data.is_gap() {
+            let req = self.request_mut();
+            if req.is_none() {
+                return Err(HtpStatus::ERROR);
+            }
+            let req = req.unwrap();
+            req.request_message_len = req
+                .request_message_len
+                .wrapping_add(bytes_to_consume as u64);
+            // Create a new gap of the appropriate length
+            let parser_data = ParserData::from(bytes_to_consume);
+            // Send the gap to the data hooks
+            let mut tx_data = Data::new(req, &parser_data);
+            self.request_run_hook_body_data(&mut tx_data)?;
+        } else {
+            // Consume the data.
+            self.request_body_data(Some(&data.as_slice()[0..bytes_to_consume]))?;
+        }
+
+        // Adjust the counters.
+        self.request_data_consume(data, bytes_to_consume);
+        self.request_body_data_left = Some(left - bytes_to_consume as u64);
+
+        // Have we seen the entire request body?
+        if self.request_body_data_left > Some(0) {
+            //Ask for more data;
+            return Err(HtpStatus::DATA);
+        }
+        // End of request body.
+        self.request_state = State::Finalize;
+        // Sends close signal to decompressors, outputting any partially decompressed data
+        self.request_body_data(None)
+    }
+
+    /// Determines presence (and encoding) of a request body.
+    ///
+    /// Returns OK on state change, ERROR on error, or HtpStatus::DATA_BUFFER
+    /// when more data is needed.
+    pub(crate) fn request_body_determine(&mut self) -> Result<()> {
+        let req = self.request_mut();
+        if req.is_none() {
+            return Err(HtpStatus::ERROR);
+        }
+        let req = req.unwrap();
+
+        // Determine the next state based on the presence of the request
+        // body, and the coding used.
+        match req.request_transfer_coding {
+            HtpTransferCoding::Chunked => {
+                req.request_progress = HtpRequestProgress::BODY;
+                self.request_state = State::BodyChunkedLength
+            }
+            HtpTransferCoding::Identity => {
+                if req.request_content_length > Some(0) {
+                    req.request_progress = HtpRequestProgress::BODY;
+                }
+                self.request_content_length = req.request_content_length;
+                self.request_body_data_left = self.request_content_length;
+                if self.request_content_length > Some(0) {
+                    self.request_state = State::BodyIdentity
+                } else {
+                    self.request_state = State::Finalize
+                }
+            }
+            HtpTransferCoding::NoBody => {
+                // This request does not have a body, which
+                // means that we're done with it
+                self.request_state = State::Finalize
+            }
+            _ => {
+                // Should not be here
+                return Err(HtpStatus::ERROR);
+            }
+        }
+        Ok(())
+    }
+
+    /// Parses request headers.
+    /// Returns OK on state change, ERROR on error, or HtpStatus::DATA_BUFFER
+    /// when more data is needed.
+    pub(crate) fn request_headers(&mut self, input: &mut ParserData) -> Result<()> {
+        let data = input.as_slice();
+        if self.request_status == HtpStreamState::CLOSED {
+            let req = self.request_mut();
+            if req.is_none() {
+                return Err(HtpStatus::ERROR);
+            }
+            let req = req.unwrap();
+
+            req.request_header_parser.set_complete(true);
+            // Parse previous header, if any.
+            req.request_progress = HtpRequestProgress::TRAILER;
+            if let Some(request_header) = self.request_header.take() {
+                self.parse_request_headers(request_header.as_slice())?;
+            }
+            self.request_buf.clear();
+            // We've seen all the request headers.
+            return self.state_request_headers(input);
+        }
+        let mut taken = false;
+        // libhtp.c did not take full data, but only till LF
+        if let Ok((_, line)) = take_till_lf(data) {
+            if self.request_header.is_some() {
+                self.check_request_buffer_limit(line.len())?;
+            }
+        } else {
+            self.request_data_consume(input, data.len());
+            self.check_request_buffer_limit(data.len())?;
+            if let Some(rh) = &mut self.request_header {
+                rh.extend_from_slice(data);
+            } else {
+                self.request_header = Some(Bstr::from(data));
+            }
+            return Err(HtpStatus::DATA_BUFFER);
+        }
+
+        let request_header = if let Some(mut request_header) = self.request_header.take() {
+            request_header.add(data);
+            taken = true;
+            request_header
+        } else {
+            Bstr::new()
+        };
+        let data2 = if taken {
+            request_header.as_slice()
+        } else {
+            data
+        };
+
+        let (remaining, eoh) = self.parse_request_headers(data2)?;
+        //TODO: Update the request state machine so that we don't have to have this EOL check
+        let eol = remaining.len() == data2.len()
+            && (remaining.starts_with(b"\r\n") || remaining.starts_with(b"\n"));
+        if eoh
+            //If the input started with an EOL, we assume this is the end of the headers
+            || eol
+        {
+            if remaining.len() < data.len() {
+                self.request_data_consume(input, data.len() - remaining.len());
+            } else if eol {
+                if remaining.starts_with(b"\r\n") {
+                    self.request_data_consume(input, min(data.len(), 2));
+                } else if remaining.starts_with(b"\n") {
+                    self.request_data_consume(input, min(data.len(), 1));
+                }
+            }
+            // We've seen all the request headers.
+            self.state_request_headers(input)
+        } else {
+            self.request_data_consume(input, data.len());
+            self.check_request_buffer_limit(remaining.len())?;
+            let remaining = Bstr::from(remaining);
+            self.request_header.replace(remaining);
+            Err(HtpStatus::DATA_BUFFER)
+        }
+    }
+
+    /// Determines request protocol.
+    /// Returns OK on state change, ERROR on error, or HtpStatus::DATA_BUFFER
+    /// when more data is needed.
+    pub(crate) fn request_protocol(&mut self, input: &mut ParserData) -> Result<()> {
+        let req = self.request_mut();
+        if req.is_none() {
+            return Err(HtpStatus::ERROR);
+        }
+        let req = req.unwrap();
+
+        // Is this a short-style HTTP/0.9 request? If it is,
+        // we will not want to parse request headers.
+        if !req.is_protocol_0_9 {
+            // Switch to request header parsing.
+            req.request_progress = HtpRequestProgress::HEADERS;
+            self.request_state = State::Headers
+        } else {
+            if let Ok((rem, sp)) = take_is_space(input.as_slice()) {
+                if !rem.is_empty() || sp.len() > HTTP09_MAX_JUNK_LEN {
+                    // we have more than spaces, no HTTP/0.9
+                    req.is_protocol_0_9 = false;
+                    req.request_progress = HtpRequestProgress::HEADERS;
+                    htp_warn!(
+                        self.logger,
+                        HtpLogCode::REQUEST_LINE_NO_PROTOCOL,
+                        "Request line: missing protocol"
+                    );
+                    // Switch to request header parsing.
+                    self.request_state = State::Headers;
+                    return Ok(());
+                }
+            }
+            // We're done with this request.
+            self.request_state = State::Finalize;
+        }
+        Ok(())
+    }
+
+    /// Parse the request line.
+    ///
+    /// Returns OK on state change, ERROR on error, or HtpStatus::DATA_BUFFER
+    /// when more data is needed.
+    fn request_line_complete(&mut self, line: &[u8]) -> Result<()> {
+        self.check_request_buffer_limit(line.len())?;
+        if line.is_empty() {
+            return Err(HtpStatus::DATA);
+        }
+        let perso = self.cfg.server_personality;
+        let req = self.request_mut();
+        if req.is_none() {
+            return Err(HtpStatus::ERROR);
+        }
+        let req = req.unwrap();
+
+        // Is this a line that should be ignored?
+        if is_line_ignorable(perso, line) {
+            // We have an empty/whitespace line, which we'll note, ignore and move on.
+            req.request_ignored_lines = req.request_ignored_lines.wrapping_add(1);
+            return Ok(());
+        }
+        // Process request line.
+        let data = chomp(line);
+        self.parse_request_line(data)?;
+        // Finalize request line parsing.
+        self.state_request_line()?;
+        Ok(())
+    }
+
+    /// Parses request line.
+    ///
+    /// Returns OK on state change, ERROR on error, or HtpStatus::DATA_BUFFER
+    /// when more data is needed.
+    pub(crate) fn request_line(&mut self, input: &mut ParserData) -> Result<()> {
+        match take_till_lf(input.as_slice()) {
+            Ok((_, line)) => {
+                // We have a line ending, so consume the input
+                // and grab any buffered data
+                let mut data = take(&mut self.request_buf);
+                data.add(line);
+                self.request_data_consume(input, line.len());
+                self.request_line_complete(data.as_slice())
+            }
+            _ => {
+                if self.request_status == HtpStreamState::CLOSED {
+                    let mut data = take(&mut self.request_buf);
+                    data.add(input.as_slice());
+                    self.request_data_consume(input, input.len());
+                    self.request_line_complete(data.as_slice())
+                } else {
+                    self.handle_request_absent_lf(input)
+                }
+            }
+        }
+    }
+
+    /// Extract one request header. A header can span multiple lines, in
+    /// which case they will be folded into one before parsing is attempted.
+    fn process_request_header(&mut self, header: Header) -> Result<()> {
+        // Try to parse the header.
+        // ensured by caller
+        let hl = self.cfg.number_headers_limit as usize;
+        let req = self.request_mut().unwrap();
+        let mut repeated = false;
+        let reps = req.request_header_repetitions;
+        let mut update_reps = false;
+        // Do we already have a header with the same name?
+        if let Some(h_existing) = req.request_headers.get_nocase_mut(header.name.as_slice()) {
+            if !h_existing.flags.is_set(HeaderFlags::FIELD_REPEATED) {
+                // This is the second occurence for this header.
+                repeated = true;
+            } else if reps < 64 {
+                update_reps = true;
+            } else {
+                return Ok(());
+            }
+            // For simplicity reasons, we count the repetitions of all headers
+            h_existing.flags.set(HeaderFlags::FIELD_REPEATED);
+            // Having multiple C-L headers is against the RFC but
+            // servers may ignore the subsequent headers if the values are the same.
+            if header.name.cmp_nocase("Content-Length") {
+                // Don't use string comparison here because we want to
+                // ignore small formatting differences.
+                let existing_cl = parse_content_length(&h_existing.value, None);
+                let new_cl = parse_content_length(&header.value, None);
+                // Ambiguous response C-L value.
+                if existing_cl.is_none() || new_cl.is_none() || existing_cl != new_cl {
+                    htp_warn!(
+                        self.logger,
+                        HtpLogCode::DUPLICATE_CONTENT_LENGTH_FIELD_IN_REQUEST,
+                        "Ambiguous request C-L value"
+                    );
+                }
+            } else {
+                // Add to the existing header.
+                h_existing.value.extend_from_slice(b", ");
+                h_existing.value.extend_from_slice(header.value.as_slice());
+            }
+        } else {
+            if req.request_headers.elements.len() > hl {
+                if !req.flags.is_set(HtpFlags::HEADERS_TOO_MANY) {
+                    htp_warn!(
+                        self.logger,
+                        HtpLogCode::REQUEST_TOO_MANY_HEADERS,
+                        "Too many request headers"
+                    );
+                    let req = self.request_mut().unwrap();
+                    req.flags.set(HtpFlags::HEADERS_TOO_MANY);
+                }
+                return Err(HtpStatus::ERROR);
+            }
+            req.request_headers.elements.push(header);
+        }
+        let req = self.request_mut().unwrap();
+        if update_reps {
+            req.request_header_repetitions = req.request_header_repetitions.wrapping_add(1)
+        }
+        if repeated {
+            htp_warn!(
+                self.logger,
+                HtpLogCode::REQUEST_HEADER_REPETITION,
+                "Repetition for header"
+            );
+        }
+        Ok(())
+    }
+
+    /// Parse request headers
+    fn parse_request_headers<'a>(&mut self, data: &'a [u8]) -> Result<(&'a [u8], bool)> {
+        let req = self.request_mut();
+        if req.is_none() {
+            return Err(HtpStatus::ERROR);
+        }
+
+        let rc = req.unwrap().request_header_parser.headers()(data);
+        if let Ok((remaining, (headers, eoh))) = rc {
+            for h in headers {
+                let mut flags = 0;
+                let name_flags = h.name.flags;
+                // Ignore LWS after field-name.
+                if name_flags.is_set(HeaderFlags::NAME_TRAILING_WHITESPACE) {
+                    // Log only once per transaction.
+                    htp_warn_once!(
+                        self.logger,
+                        HtpLogCode::REQUEST_INVALID_LWS_AFTER_NAME,
+                        "Request field invalid: LWS after name",
+                        self.request_mut().unwrap().flags,
+                        flags,
+                        HtpFlags::FIELD_INVALID
+                    );
+                }
+                //If name has leading whitespace, probably invalid folding
+                if name_flags.is_set(HeaderFlags::NAME_LEADING_WHITESPACE) {
+                    // Invalid folding.
+                    // Warn only once per transaction.
+                    htp_warn_once!(
+                        self.logger,
+                        HtpLogCode::INVALID_REQUEST_FIELD_FOLDING,
+                        "Invalid request field folding",
+                        self.request_mut().unwrap().flags,
+                        flags,
+                        HtpFlags::INVALID_FOLDING
+                    );
+                }
+                // Check that field-name is a token
+                if name_flags.is_set(HeaderFlags::NAME_NON_TOKEN_CHARS) {
+                    // Incorrectly formed header name.
+                    // Log only once per transaction.
+                    htp_warn_once!(
+                        self.logger,
+                        HtpLogCode::REQUEST_HEADER_INVALID,
+                        "Request header name is not a token",
+                        self.request_mut().unwrap().flags,
+                        flags,
+                        HtpFlags::FIELD_INVALID
+                    );
+                }
+                // No colon?
+                if name_flags.is_set(HeaderFlags::MISSING_COLON) {
+                    // Log only once per transaction.
+                    // We handle this case as a header with an empty name, with the value equal
+                    // to the entire input string.
+                    // TODO Apache will respond to this problem with a 400.
+                    // Now extract the name and the value
+                    htp_warn_once!(
+                        self.logger,
+                        HtpLogCode::REQUEST_FIELD_MISSING_COLON,
+                        "Request field invalid: colon missing",
+                        self.request_mut().unwrap().flags,
+                        flags,
+                        HtpFlags::FIELD_UNPARSEABLE
+                    );
+                } else if name_flags.is_set(HeaderFlags::NAME_EMPTY) {
+                    // Empty header name.
+                    // Log only once per transaction.
+                    htp_warn_once!(
+                        self.logger,
+                        HtpLogCode::REQUEST_INVALID_EMPTY_NAME,
+                        "Request field invalid: empty name",
+                        self.request_mut().unwrap().flags,
+                        flags,
+                        HtpFlags::FIELD_INVALID
+                    );
+                }
+                self.process_request_header(Header::new_with_flags(
+                    h.name.name.into(),
+                    h.value.value.into(),
+                    flags,
+                ))?;
+            }
+            Ok((remaining, eoh))
+        } else {
+            Ok((data, false))
+        }
+    }
+
+    /// Parses a single request line.
+    pub(crate) fn parse_request_line(&mut self, request_line: &[u8]) -> Result<()> {
+        let req = self.request_mut();
+        if req.is_none() {
+            return Err(HtpStatus::ERROR);
+        }
+        let req = req.unwrap();
+
+        req.request_line = Some(Bstr::from(request_line));
+        let mut mstart: bool = false;
+        let mut data: &[u8] = request_line;
+        if self.cfg.server_personality == HtpServerPersonality::APACHE_2 {
+            //Null terminates
+            if let Ok((_, before_null)) = take_until_null(data) {
+                data = before_null
+            }
+        }
+        // The request method starts at the beginning of the
+        // line and ends with the first whitespace character.
+        let mut method_parser = tuple
+                                // skip past leading whitespace. IIS allows this
+                               ((take_is_space,
+                               take_not_is_space,
+                                // Ignore whitespace after request method. The RFC allows
+                                 // for only one SP, but then suggests any number of SP and HT
+                                 // should be permitted. Apache uses isspace(), which is even
+                                 // more permitting, so that's what we use here.
+                               take_is_space
+                               ));
+
+        if let Ok((remaining, (ls, method, ws))) = method_parser(data) {
+            if !ls.is_empty() {
+                htp_warn!(
+                    self.logger,
+                    HtpLogCode::REQUEST_LINE_LEADING_WHITESPACE,
+                    "Request line: leading whitespace"
+                );
+
+                let requestline_leading_whitespace_unwanted =
+                    self.cfg.requestline_leading_whitespace_unwanted;
+                if requestline_leading_whitespace_unwanted != HtpUnwanted::Ignore {
+                    // reset mstart so that we copy the whitespace into the method
+                    mstart = true;
+                    // set expected response code to this anomaly
+                    let req = self.request_mut().unwrap();
+                    req.response_status_expected_number = requestline_leading_whitespace_unwanted
+                }
+            }
+
+            let req = self.request_mut().unwrap();
+            if mstart {
+                req.request_method = Some(Bstr::from([ls, method].concat()));
+            } else {
+                req.request_method = Some(Bstr::from(method));
+            }
+
+            if let Some(request_method) = &req.request_method {
+                req.request_method_number = HtpMethod::new(request_method.as_slice());
+            }
+
+            // Too much performance overhead for fuzzing
+            if ws.iter().any(|&c| c != 0x20) {
+                htp_warn!(
+                    self.logger,
+                    HtpLogCode::METHOD_DELIM_NON_COMPLIANT,
+                    "Request line: non-compliant delimiter between Method and URI"
+                );
+            }
+
+            if remaining.is_empty() {
+                // No, this looks like a HTTP/0.9 request.
+                let req = self.request_mut().unwrap();
+                req.is_protocol_0_9 = true;
+                req.request_protocol_number = HtpProtocol::V0_9;
+                if req.request_method_number == HtpMethod::Unknown {
+                    htp_warn!(
+                        self.logger,
+                        HtpLogCode::REQUEST_LINE_UNKNOWN_METHOD,
+                        "Request line: unknown method only"
+                    );
+                }
+                return Ok(());
+            }
+
+            let remaining = trimmed(remaining);
+
+            let (mut uri, mut protocol) =
+                split_on_predicate(remaining, self.cfg.decoder_cfg.allow_space_uri, true, |c| {
+                    *c == 0x20
+                });
+
+            if uri.len() == remaining.len() && uri.iter().any(|&c| is_space(c)) {
+                // warn regardless if we've seen non-compliant chars
+                htp_warn!(
+                    self.logger,
+                    HtpLogCode::URI_DELIM_NON_COMPLIANT,
+                    "Request line: URI contains non-compliant delimiter"
+                );
+                // if we've seen some 'bad' delimiters, we retry with those
+                let uri_protocol = split_on_predicate(
+                    remaining,
+                    self.cfg.decoder_cfg.allow_space_uri,
+                    true,
+                    |c| is_space(*c),
+                );
+                uri = uri_protocol.0;
+                protocol = uri_protocol.1;
+            }
+
+            let req = self.request_mut().unwrap();
+            req.request_uri = Some(Bstr::from(uri));
+
+            // Is there protocol information available?
+            if protocol.is_empty() {
+                // No, this looks like a HTTP/0.9 request.
+                req.is_protocol_0_9 = true;
+                req.request_protocol_number = HtpProtocol::V0_9;
+                if req.request_method_number == HtpMethod::Unknown {
+                    htp_warn!(
+                        self.logger,
+                        HtpLogCode::REQUEST_LINE_UNKNOWN_METHOD_NO_PROTOCOL,
+                        "Request line: unknown method and no protocol"
+                    );
+                }
+                return Ok(());
+            }
+
+            // The protocol information continues until the end of the line.
+            req.request_protocol = Some(Bstr::from(protocol));
+            self.request_mut().unwrap().request_protocol_number =
+                parse_protocol(protocol, &mut self.logger);
+            let req = self.request().unwrap();
+            if req.request_method_number == HtpMethod::Unknown
+                && req.request_protocol_number == HtpProtocol::Invalid
+            {
+                htp_warn!(
+                    self.logger,
+                    HtpLogCode::REQUEST_LINE_UNKNOWN_METHOD_INVALID_PROTOCOL,
+                    "Request line: unknown method and invalid protocol"
+                );
+            }
+        }
+        Ok(())
+    }
+
+    /// Consumes request body data.
+    /// This function assumes that handling of chunked encoding is implemented
+    /// by the container. When you're done submitting body data, invoke a state
+    /// change (to REQUEST) to finalize any processing that might be pending.
+    /// The supplied data is fully consumed and there is no expectation that it
+    /// will be available afterwards. The protocol parsing code makes no copies
+    /// of the data, but some parsers might.
+    ///
+    /// Returns HtpStatus::OK on success or HtpStatus::ERROR if the request transaction
+    /// is invalid or response body data hook fails.
+    pub(crate) fn request_body_data(&mut self, data: Option<&[u8]>) -> Result<()> {
+        // None data is used to indicate the end of request body.
+        // Keep track of body size before decompression.
+        let req = self.request_mut();
+        if req.is_none() {
+            return Err(HtpStatus::ERROR);
+        }
+        let req = req.unwrap();
+
+        req.request_message_len = req
+            .request_message_len
+            .wrapping_add(data.unwrap_or(b"").len() as u64);
+        match req.request_content_encoding_processing {
+            HtpContentEncoding::Gzip
+            | HtpContentEncoding::Deflate
+            | HtpContentEncoding::Zlib
+            | HtpContentEncoding::Lzma => {
+                // Send data buffer to the decompressor if it exists
+                if req.request_decompressor.is_none() && data.is_none() {
+                    return Ok(());
+                }
+                let mut decompressor = req.request_decompressor.take().ok_or(HtpStatus::ERROR)?;
+                if let Some(data) = data {
+                    let _ = decompressor.decompress(data);
+                    if decompressor.time_spent()
+                        > self.cfg.compression_options.get_time_limit() as u64
+                    {
+                        htp_error!(
+                            self.logger,
+                            HtpLogCode::COMPRESSION_BOMB,
+                            format!(
+                                "Compression bomb: spent {} us decompressing",
+                                decompressor.time_spent(),
+                            )
+                        );
+                        decompressor.set_passthrough(true);
+                    }
+                    // put the decompressor back in its slot
+                    let req = self.request_mut().unwrap();
+                    req.request_decompressor.replace(decompressor);
+                } else {
+                    // don't put the decompressor back in its slot
+                    // ignore errors
+                    let _ = decompressor.finish();
+                }
+            }
+            HtpContentEncoding::None => {
+                // When there's no decompression, request_entity_len.
+                // is identical to request_message_len.
+                // None data is used to indicate the end of request body.
+                // Keep track of the body length.
+                req.request_entity_len += data.unwrap_or(b"").len() as u64;
+                // Send data to the callbacks.
+                let data = ParserData::from(data);
+                let mut data = Data::new(req, &data);
+                self.request_run_hook_body_data(&mut data).map_err(|e| {
+                    htp_error!(
+                        self.logger,
+                        HtpLogCode::REQUEST_BODY_DATA_CALLBACK_ERROR,
+                        format!("Request body data callback returned error ({:?})", e)
+                    );
+                    e
+                })?
+            }
+        }
+        Ok(())
+    }
+
+    /// Initialize the request decompression engine. We can deal with three
+    /// scenarios:
+    ///
+    /// 1. Decompression is enabled, compression indicated in headers, and we decompress.
+    ///
+    /// 2. As above, but the user disables decompression by setting response_content_encoding
+    ///    to COMPRESSION_NONE.
+    ///
+    /// 3. Decompression is disabled and we do not attempt to enable it, but the user
+    ///    forces decompression by setting response_content_encoding to one of the
+    ///    supported algorithms.
+    pub(crate) fn request_initialize_decompressors(&mut self) -> Result<()> {
+        let req = self.request_mut();
+        if req.is_none() {
+            return Err(HtpStatus::ERROR);
+        }
+        let req = req.unwrap();
+        let ce = req
+            .request_headers
+            .get_nocase_nozero("content-encoding")
+            .map(|val| val.value.clone());
+        // Process multiple encodings if there is no match on fast path
+        let mut slow_path = false;
+
+        // Fast path - try to match directly on the encoding value
+        req.request_content_encoding = if let Some(ce) = &ce {
+            if ce.cmp_nocase_nozero(b"gzip") || ce.cmp_nocase_nozero(b"x-gzip") {
+                HtpContentEncoding::Gzip
+            } else if ce.cmp_nocase_nozero(b"deflate") || ce.cmp_nocase_nozero(b"x-deflate") {
+                HtpContentEncoding::Deflate
+            } else if ce.cmp_nocase_nozero(b"lzma") {
+                HtpContentEncoding::Lzma
+            } else if ce.cmp_nocase_nozero(b"inflate") || ce.cmp_nocase_nozero(b"none") {
+                HtpContentEncoding::None
+            } else {
+                slow_path = true;
+                HtpContentEncoding::None
+            }
+        } else {
+            HtpContentEncoding::None
+        };
+
+        // Configure decompression, if enabled in the configuration.
+        self.request_mut()
+            .unwrap()
+            .request_content_encoding_processing = if self.cfg.request_decompression_enabled {
+            self.request().unwrap().request_content_encoding
+        } else {
+            slow_path = false;
+            HtpContentEncoding::None
+        };
+
+        let req = self.request_mut().unwrap();
+        let request_content_encoding_processing = req.request_content_encoding_processing;
+        let compression_options = self.cfg.compression_options;
+        match &request_content_encoding_processing {
+            HtpContentEncoding::Gzip
+            | HtpContentEncoding::Deflate
+            | HtpContentEncoding::Zlib
+            | HtpContentEncoding::Lzma => {
+                self.request_prepend_decompressor(request_content_encoding_processing)?;
+            }
+            HtpContentEncoding::None => {
+                if slow_path {
+                    if let Some(ce) = &ce {
+                        let mut layers = 0;
+                        for encoding in ce.split(|c| *c == b',' || *c == b' ') {
+                            if encoding.is_empty() {
+                                continue;
+                            }
+                            layers += 1;
+
+                            if let Some(limit) = compression_options.get_layer_limit() {
+                                // decompression layer depth check
+                                if layers > limit {
+                                    htp_warn!(
+                                        self.logger,
+                                        HtpLogCode::TOO_MANY_ENCODING_LAYERS,
+                                        "Too many request content encoding layers"
+                                    );
+                                    break;
+                                }
+                            }
+
+                            let encoding = Bstr::from(encoding);
+                            let encoding = if encoding.index_of_nocase(b"gzip").is_some() {
+                                if !(encoding.cmp_slice(b"gzip") == Ordering::Equal
+                                    || encoding.cmp_slice(b"x-gzip") == Ordering::Equal)
+                                {
+                                    htp_warn!(
+                                        self.logger,
+                                        HtpLogCode::ABNORMAL_CE_HEADER,
+                                        "C-E gzip has abnormal value"
+                                    );
+                                }
+                                HtpContentEncoding::Gzip
+                            } else if encoding.index_of_nocase(b"deflate").is_some() {
+                                if !(encoding.cmp_slice(b"deflate") == Ordering::Equal
+                                    || encoding.cmp_slice(b"x-deflate") == Ordering::Equal)
+                                {
+                                    htp_warn!(
+                                        self.logger,
+                                        HtpLogCode::ABNORMAL_CE_HEADER,
+                                        "C-E deflate has abnormal value"
+                                    );
+                                }
+                                HtpContentEncoding::Deflate
+                            } else if encoding.cmp_slice(b"lzma") == Ordering::Equal {
+                                if let Some(limit) = compression_options.get_lzma_layers() {
+                                    // LZMA decompression layer depth check
+                                    if layers > limit {
+                                        htp_warn!(
+                                            self.logger,
+                                            HtpLogCode::REQUEST_TOO_MANY_LZMA_LAYERS,
+                                            "Compression bomb: multiple encoding with lzma"
+                                        );
+                                        break;
+                                    }
+                                }
+                                HtpContentEncoding::Lzma
+                            } else if encoding.cmp_slice(b"inflate") == Ordering::Equal
+                                || encoding.cmp_slice(b"none") == Ordering::Equal
+                            {
+                                HtpContentEncoding::None
+                            } else {
+                                htp_warn!(
+                                    self.logger,
+                                    HtpLogCode::ABNORMAL_CE_HEADER,
+                                    "C-E unknown setting"
+                                );
+                                HtpContentEncoding::None
+                            };
+                            self.request_prepend_decompressor(encoding)?;
+                        }
+                    }
+                }
+            }
+        }
+        Ok(())
+    }
+
+    /// Prepend a decompressor to the request
+    fn request_prepend_decompressor(&mut self, encoding: HtpContentEncoding) -> Result<()> {
+        let compression_options = self.cfg.compression_options;
+        if encoding != HtpContentEncoding::None {
+            // ensured by caller
+            let req = self.request_mut().unwrap();
+            if let Some(decompressor) = req.request_decompressor.take() {
+                req.request_decompressor
+                    .replace(decompressor.prepend(encoding, compression_options)?);
+            } else {
+                // The processing encoding will be the first one encountered
+                req.request_content_encoding_processing = encoding;
+
+                // Add the callback first because it will be called last in
+                // the chain of writers
+
+                // TODO: fix lifetime error and remove this line!
+                let connp_ptr: *mut ConnectionParser = self as *mut ConnectionParser;
+                let decompressor = unsafe {
+                    Decompressor::new_with_callback(
+                        encoding,
+                        Box::new(move |data: Option<&[u8]>| -> std::io::Result<usize> {
+                            (*connp_ptr).request_decompressor_callback(data)
+                        }),
+                        compression_options,
+                    )?
+                };
+                let req = self.request_mut().unwrap();
+                req.request_decompressor.replace(decompressor);
+            }
+        }
+        Ok(())
+    }
+
+    fn request_decompressor_callback(&mut self, data: Option<&[u8]>) -> std::io::Result<usize> {
+        // If no data is passed, call the hooks with NULL to signify the end of the
+        // request body.
+        let parser_data = ParserData::from(data);
+        // ensured by only caller
+        let req = self.request_mut().unwrap();
+        let mut tx_data = Data::new(req, &parser_data);
+
+        // Keep track of actual request body length.
+        req.request_entity_len = req.request_entity_len.wrapping_add(tx_data.len() as u64);
+
+        // Invoke all callbacks.
+        self.request_run_hook_body_data(&mut tx_data)
+            .map_err(|_| std::io::Error::new(std::io::ErrorKind::Other, "body data hook failed"))?;
+
+        let compression_options = self.cfg.compression_options;
+        let req = self.request_mut().unwrap();
+        if let Some(decompressor) = &mut req.request_decompressor {
+            if decompressor.callback_inc() % compression_options.get_time_test_freq() == 0 {
+                if let Some(time_spent) = decompressor.timer_reset() {
+                    if time_spent > compression_options.get_time_limit() as u64 {
+                        decompressor.set_passthrough(true);
+                        htp_error!(
+                            self.logger,
+                            HtpLogCode::COMPRESSION_BOMB,
+                            format!("Compression bomb: spent {} us decompressing", time_spent)
+                        );
+                    }
+                }
+            }
+        }
+
+        // output > ratio * input ?
+        let ratio = compression_options.get_bomb_ratio();
+        let req = self.request().unwrap();
+        let exceeds_ratio = if let Some(ratio) = req.request_message_len.checked_mul(ratio) {
+            req.request_entity_len > ratio
+        } else {
+            // overflow occured
+            true
+        };
+
+        let bomb_limit = compression_options.get_bomb_limit();
+        let request_entity_len = req.request_entity_len;
+        let request_message_len = req.request_message_len;
+        if request_entity_len > bomb_limit && exceeds_ratio {
+            htp_error!(
+                self.logger,
+                HtpLogCode::COMPRESSION_BOMB,
+                format!(
+                    "Compression bomb: decompressed {} bytes out of {}",
+                    request_entity_len, request_message_len,
+                )
+            );
+            return Err(std::io::Error::new(
+                std::io::ErrorKind::Other,
+                "compression_bomb_limit reached",
+            ));
+        }
+        Ok(tx_data.len())
+    }
+
+    /// Finalizes request.
+    ///
+    /// Returns OK on state change, ERROR on error, or HtpStatus::DATA_BUFFER
+    /// when more data is needed.
+    pub(crate) fn request_finalize(&mut self, input: &mut ParserData) -> Result<()> {
+        if input.is_gap() {
+            return self.state_request_complete(input);
+        }
+        let mut work = input.as_slice();
+        let mut has_lf = false;
+        if self.request_status != HtpStreamState::CLOSED {
+            let request_next_byte = input.as_slice().first();
+            if request_next_byte.is_none() {
+                return self.state_request_complete(input);
+            }
+
+            if let Ok((_, line)) = take_till_lf(work) {
+                work = &line[..line.len() - 1];
+                has_lf = true;
+                self.request_data_consume(input, line.len() - 1);
+            } else {
+                return self.handle_request_absent_lf(input);
+            }
+        }
+
+        if !self.request_buf.is_empty() {
+            self.check_request_buffer_limit(work.len())?;
+        }
+        let mut data = take(&mut self.request_buf);
+        let buf_len = data.len();
+        data.add(work);
+
+        if data.is_empty() {
+            //closing
+            return self.state_request_complete(input);
+        }
+        let res = tuple((take_is_space, take_not_is_space))(&data);
+
+        if let Ok((_, (_, method))) = res {
+            if method.is_empty() {
+                // empty whitespace line
+                if has_lf {
+                    //Adds linefeed to the buffer if there was one
+                    self.request_data_consume(input, 1);
+                    data.add(b"\n");
+                }
+                let rc = self.request_body_data(Some(&data));
+                self.request_buf.clear();
+                return rc;
+            }
+            if HtpMethod::new(method) == HtpMethod::Unknown {
+                if self.request_body_data_left.unwrap_or(0) == 0 {
+                    // log only once per transaction
+                    htp_warn!(
+                        self.logger,
+                        HtpLogCode::REQUEST_BODY_UNEXPECTED,
+                        "Unexpected request body"
+                    );
+                } else {
+                    self.request_body_data_left = Some(1);
+                }
+                if has_lf {
+                    //Adds linefeed to the buffer if there was one
+                    self.request_data_consume(input, 1);
+                    data.add(b"\n");
+                }
+                // Interpret remaining bytes as body data
+                let rc = self.request_body_data(Some(&data));
+                self.request_buf.clear();
+                return rc;
+            } // else continue
+            self.request_body_data_left = None;
+        }
+        // didnt use data, restore
+        self.request_buf.add(&data[0..buf_len]);
+        //unread last end of line so that request_line works
+        self.request_data_unconsume(input, data.len());
+        self.state_request_complete(input)
+    }
+
+    /// Consumes whatever is left in the buffer after detecting an http/0.9 session.
+    pub(crate) fn request_ignore_data_after_http_0_9(
+        &mut self, data: &mut ParserData,
+    ) -> Result<()> {
+        if !data.is_empty() {
+            self.conn.flags.set(ConnectionFlags::HTTP_0_9_EXTRA)
+        }
+        self.request_data_consume(data, data.len());
+        Err(HtpStatus::DATA)
+    }
+
+    /// The idle state is where the parser will end up after a transaction is processed.
+    /// If there is more data available, a new request will be started.
+    ///
+    /// Returns OK on state change, ERROR on error, or HTP_DATA when more data is needed.
+    pub(crate) fn request_idle(&mut self, data: &mut ParserData) -> Result<()> {
+        // We want to start parsing the next request (and change
+        // the state from IDLE) only if there's at least one
+        // byte of data available. Otherwise we could be creating
+        // new structures even if there's no more data on the
+        // connection.
+        if data.is_empty() {
+            // we may have buffered some data, if we are closing, we want to process it
+            if self.request_status != HtpStreamState::CLOSED || self.request_buf.is_empty() {
+                return Err(HtpStatus::DATA);
+            }
+        }
+        self.request_reset();
+        // Change state to TRANSACTION_START
+        // Ignore the result.
+        let _ = self.state_request_start();
+        Ok(())
+    }
+
+    /// Buffer incomplete request data and verify that field_limit
+    /// constraint is met.
+    fn handle_request_absent_lf(&mut self, data: &ParserData) -> Result<()> {
+        self.check_request_buffer_limit(data.len())?;
+        self.request_buf.add(data.as_slice());
+        self.request_data_consume(data, data.len());
+        Err(HtpStatus::DATA_BUFFER)
+    }
+
+    /// Run the REQUEST_BODY_DATA hook.
+    fn request_run_hook_body_data(&mut self, d: &mut Data) -> Result<()> {
+        // Do not invoke callbacks with an empty data chunk
+        let req = self.request_mut().unwrap();
+        if !d.data().is_null() && d.is_empty() {
+            return Ok(());
+        }
+        req.hook_request_body_data.clone().run_all(self, d)?;
+        // Run configuration hooks second
+        self.cfg.hook_request_body_data.run_all(self, d)?;
+        Ok(())
+    }
+
+    /// Process a chunk of inbound (client or request) data.
+    pub(crate) fn request_data(
+        &mut self, mut chunk: ParserData, timestamp: Option<OffsetDateTime>,
+    ) -> HtpStreamState {
+        // Reset the bytes consumed counter
+        self.request_bytes_consumed = 0;
+
+        // Return if the connection is in stop state.
+        if self.request_status == HtpStreamState::STOP {
+            htp_info!(
+                self.logger,
+                HtpLogCode::PARSER_STATE_ERROR,
+                "Inbound parser is in STOP state"
+            );
+            return HtpStreamState::STOP;
+        }
+        // Return if the connection had a fatal error earlier
+        if self.request_status == HtpStreamState::ERROR {
+            htp_error!(
+                self.logger,
+                HtpLogCode::PARSER_STATE_ERROR,
+                "Inbound parser is in ERROR state"
+            );
+            return HtpStreamState::ERROR;
+        }
+
+        // If the length of the supplied data chunk is zero, proceed
+        // only if the stream has been closed. We do not allow zero-sized
+        // chunks in the API, but we use them internally to force the parsers
+        // to finalize parsing.
+        if chunk.is_empty() && self.request_status != HtpStreamState::CLOSED {
+            htp_error!(
+                self.logger,
+                HtpLogCode::ZERO_LENGTH_DATA_CHUNKS,
+                "Zero-length data chunks are not allowed"
+            );
+            return HtpStreamState::CLOSED;
+        }
+        // Remember the timestamp of the current request data chunk
+        if let Some(timestamp) = timestamp {
+            self.request_timestamp = timestamp;
+        }
+
+        // Store the current chunk information
+        self.request_chunk_count = self.request_chunk_count.wrapping_add(1);
+        self.conn.track_inbound_data(chunk.len());
+        // Return without processing any data if the stream is in tunneling
+        // mode (which it would be after an initial CONNECT transaction).
+        if self.request_status == HtpStreamState::TUNNEL {
+            return HtpStreamState::TUNNEL;
+        }
+        if self.response_status == HtpStreamState::DATA_OTHER {
+            self.response_status = HtpStreamState::DATA
+        }
+        //handle gap
+        if chunk.is_gap() {
+            // Mark the transaction as having a gap
+            let idx = self.request_index();
+            let req = self.request_mut();
+            if req.is_none() {
+                return HtpStreamState::ERROR;
+            }
+            let req = req.unwrap();
+
+            req.flags.set(HtpFlags::REQUEST_MISSING_BYTES);
+
+            if idx == 0 && req.request_progress == HtpRequestProgress::NOT_STARTED {
+                // We have a leading gap on the first transaction.
+                return HtpStreamState::CLOSED;
+            }
+        }
+
+        loop
+        // Invoke a processor, in a loop, until an error
+        // occurs or until we run out of data. Many processors
+        // will process a request, each pointing to the next
+        // processor that needs to run.
+        // Return if there's been an error or if we've run out of data. We are relying
+        // on processors to supply error messages, so we'll keep quiet here.
+        {
+            // handle gap
+            if chunk.is_gap()
+                && self.request_state != State::BodyIdentity
+                && self.request_state != State::IgnoreDataAfterHTTP09
+                && self.request_state != State::Finalize
+            {
+                // go to request_connect_probe_data ?
+                htp_error!(
+                    self.logger,
+                    HtpLogCode::INVALID_GAP,
+                    "Gaps are not allowed during this state"
+                );
+                return HtpStreamState::CLOSED;
+            }
+            let mut rc = self.handle_request_state(&mut chunk);
+
+            if rc.is_ok() {
+                if self.request_status == HtpStreamState::TUNNEL {
+                    return HtpStreamState::TUNNEL;
+                }
+                rc = self.request_handle_state_change(&mut chunk)
+            }
+            match rc {
+                // Continue looping.
+                Ok(_) => {}
+                // Do we need more data?
+                Err(HtpStatus::DATA) | Err(HtpStatus::DATA_BUFFER) => {
+                    // Ignore result.
+                    let _ = self.request_receiver_send_data(&mut chunk);
+                    self.request_status = HtpStreamState::DATA;
+                    return HtpStreamState::DATA;
+                }
+                // Check for suspended parsing.
+                Err(HtpStatus::DATA_OTHER) => {
+                    // We might have actually consumed the entire data chunk?
+                    if chunk.is_empty() {
+                        // Do not send STREAM_DATE_DATA_OTHER if we've consumed the entire chunk.
+                        self.request_status = HtpStreamState::DATA;
+                        return HtpStreamState::DATA;
+                    } else {
+                        // Partial chunk consumption.
+                        self.request_status = HtpStreamState::DATA_OTHER;
+                        return HtpStreamState::DATA_OTHER;
+                    }
+                }
+                // Check for the stop signal.
+                Err(HtpStatus::STOP) => {
+                    self.request_status = HtpStreamState::STOP;
+                    return HtpStreamState::STOP;
+                }
+                // Permanent stream error.
+                Err(_) => {
+                    self.request_status = HtpStreamState::ERROR;
+                    return HtpStreamState::ERROR;
+                }
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use rstest::rstest;
+
+    #[rstest]
+    #[case(b"GET", HtpMethod::GET)]
+    #[case(b"PUT", HtpMethod::PUT)]
+    #[case(b"POST", HtpMethod::POST)]
+    #[case(b"PoST", HtpMethod::Unknown)]
+    #[case(b"post", HtpMethod::Unknown)]
+    #[case(b"NOT_METHOD", HtpMethod::Unknown)]
+    fn test_method(#[case] input: &[u8], #[case] expected: HtpMethod) {
+        assert_eq!(HtpMethod::new(input), expected);
+    }
+}
diff --git a/rust/htp/src/request_generic.rs b/rust/htp/src/request_generic.rs
new file mode 100644 (file)
index 0000000..3898579
--- /dev/null
@@ -0,0 +1,306 @@
+use crate::{
+    bstr::Bstr,
+    config::HtpUnwanted,
+    connection_parser::ConnectionParser,
+    error::Result,
+    headers::Flags as HeaderFlags,
+    parsers::{parse_content_length, parse_protocol},
+    request::HtpMethod,
+    transaction::{Header, HtpProtocol},
+    util::{
+        is_space, take_ascii_whitespace, take_is_space, take_not_is_space, take_until_null,
+        FlagOperations, HtpFlags,
+    },
+};
+use nom::{bytes::complete::take_while, error::ErrorKind, sequence::tuple};
+use std::cmp::Ordering;
+
+impl ConnectionParser {
+    /// Extract one request header. A header can span multiple lines, in
+    /// which case they will be folded into one before parsing is attempted.
+    fn process_request_header_generic(&mut self, header: Header) -> Result<()> {
+        // Try to parse the header.
+        let mut repeated = false;
+        let reps = self.request().request_header_repetitions;
+        let mut update_reps = false;
+        // Do we already have a header with the same name?
+        if let Some((_, h_existing)) = self
+            .request_mut()
+            .request_headers
+            .get_nocase_mut(header.name.as_slice())
+        {
+            // TODO Do we want to have a list of the headers that are
+            //      allowed to be combined in this way?
+            if !h_existing.flags.is_set(HtpFlags::FIELD_REPEATED) {
+                // This is the second occurence for this header.
+                repeated = true;
+            } else if reps < 64 {
+                update_reps = true;
+            } else {
+                return Ok(());
+            }
+            // For simplicity reasons, we count the repetitions of all headers
+            // Keep track of repeated same-name headers.
+            h_existing.flags.set(HtpFlags::FIELD_REPEATED);
+            // Having multiple C-L headers is against the RFC but
+            // servers may ignore the subsequent headers if the values are the same.
+            if header.name.cmp_nocase("Content-Length") == Ordering::Equal {
+                // Don't use string comparison here because we want to
+                // ignore small formatting differences.
+                let existing_cl = parse_content_length(&h_existing.value, None);
+                let new_cl = parse_content_length(&header.value, None);
+                // Ambiguous response C-L value.
+                if existing_cl.is_none() || new_cl.is_none() || existing_cl != new_cl {
+                    htp_warn!(
+                        self.logger,
+                        HtpLogCode::DUPLICATE_CONTENT_LENGTH_FIELD_IN_REQUEST,
+                        "Ambiguous request C-L value"
+                    );
+                }
+            } else {
+                // Add to the existing header.
+                h_existing.value.extend_from_slice(b", ");
+                h_existing.value.extend_from_slice(header.value.as_slice());
+            }
+        } else {
+            self.request_mut()
+                .request_headers
+                .add(header.name.clone(), header);
+        }
+        if update_reps {
+            self.request_mut().request_header_repetitions =
+                self.request().request_header_repetitions.wrapping_add(1)
+        }
+        if repeated {
+            htp_warn!(
+                self.logger,
+                HtpLogCode::REQUEST_HEADER_REPETITION,
+                "Repetition for header"
+            );
+        }
+        Ok(())
+    }
+
+    /// Generic request header parser.
+    pub(crate) fn process_request_headers_generic<'a>(
+        &mut self,
+        data: &'a [u8],
+    ) -> Result<(&'a [u8], bool)> {
+        let rc = self.request_mut().request_header_parser.headers()(data);
+        if let Ok((remaining, (headers, eoh))) = rc {
+            for h in headers {
+                let mut flags = 0;
+                let name_flags = h.name.flags;
+                // Ignore LWS after field-name.
+                if name_flags.is_set(HeaderFlags::NAME_TRAILING_WHITESPACE) {
+                    // Log only once per transaction.
+                    htp_warn_once!(
+                        self.logger,
+                        HtpLogCode::REQUEST_INVALID_LWS_AFTER_NAME,
+                        "Request field invalid: LWS after name",
+                        self.request_mut().flags,
+                        flags,
+                        HtpFlags::FIELD_INVALID
+                    );
+                }
+                //If name has leading whitespace, probably invalid folding
+                if name_flags.is_set(HeaderFlags::NAME_LEADING_WHITESPACE) {
+                    // Invalid folding.
+                    // Warn only once per transaction.
+                    htp_warn_once!(
+                        self.logger,
+                        HtpLogCode::Invalid_REQUEST_FIELD_FOLDING,
+                        "Invalid request field folding",
+                        self.request_mut().flags,
+                        flags,
+                        HtpFlags::Invalid_FOLDING
+                    );
+                }
+                // Check that field-name is a token
+                if name_flags.is_set(HeaderFlags::NAME_NON_TOKEN_CHARS) {
+                    // Incorrectly formed header name.
+                    // Log only once per transaction.
+                    htp_warn_once!(
+                        self.logger,
+                        HtpLogCode::REQUEST_HEADER_INVALID,
+                        "Request header name is not a token",
+                        self.request_mut().flags,
+                        flags,
+                        HtpFlags::FIELD_INVALID
+                    );
+                }
+                // No colon?
+                if name_flags.is_set(HeaderFlags::MISSING_COLON) {
+                    // Log only once per transaction.
+                    // We handle this case as a header with an empty name, with the value equal
+                    // to the entire input string.
+                    // TODO Apache will respond to this problem with a 400.
+                    // Now extract the name and the value
+                    htp_warn_once!(
+                        self.logger,
+                        HtpLogCode::REQUEST_FIELD_MISSING_COLON,
+                        "Request field invalid: colon missing",
+                        self.request_mut().flags,
+                        flags,
+                        HtpFlags::FIELD_UNPARSEABLE
+                    );
+                } else if name_flags.is_set(HeaderFlags::NAME_EMPTY) {
+                    // Empty header name.
+                    // Log only once per transaction.
+                    htp_warn_once!(
+                        self.logger,
+                        HtpLogCode::REQUEST_INVALID_EMPTY_NAME,
+                        "Request field invalid: empty name",
+                        self.request_mut().flags,
+                        flags,
+                        HtpFlags::FIELD_INVALID
+                    );
+                }
+                self.process_request_header_generic(Header::new_with_flags(
+                    h.name.name.into(),
+                    h.value.value.into(),
+                    flags,
+                ))?;
+            }
+            Ok((remaining, eoh))
+        } else {
+            Ok((data, false))
+        }
+    }
+
+    /// Parses a single request line.
+    pub(crate) fn parse_request_line_generic_ex(
+        &mut self,
+        request_line: &[u8],
+        nul_terminates: bool,
+    ) -> Result<()> {
+        let mut mstart: bool = false;
+        let mut data: &[u8] = request_line;
+        if nul_terminates {
+            if let Ok((_, before_null)) = take_until_null(data) {
+                data = before_null
+            }
+        }
+        // The request method starts at the beginning of the
+        // line and ends with the first whitespace character.
+        let method_parser = tuple::<_, _, (_, ErrorKind), _>
+                                // skip past leading whitespace. IIS allows this
+                               ((take_is_space,
+                               take_not_is_space,
+                                // Ignore whitespace after request method. The RFC allows
+                                 // for only one SP, but then suggests any number of SP and HT
+                                 // should be permitted. Apache uses isspace(), which is even
+                                 // more permitting, so that's what we use here.
+                               take_ascii_whitespace()
+                               ));
+
+        if let Ok((remaining, (ls, method, ws))) = method_parser(data) {
+            if !ls.is_empty() {
+                htp_warn!(
+                    self.logger,
+                    HtpLogCode::REQUEST_LINE_LEADING_WHITESPACE,
+                    "Request line: leading whitespace"
+                );
+
+                let requestline_leading_whitespace_unwanted =
+                    self.cfg.requestline_leading_whitespace_unwanted;
+                if requestline_leading_whitespace_unwanted != HtpUnwanted::Ignore {
+                    // reset mstart so that we copy the whitespace into the method
+                    mstart = true;
+                    // set expected response code to this anomaly
+                    self.request_mut().response_status_expected_number =
+                        requestline_leading_whitespace_unwanted
+                }
+            }
+
+            if mstart {
+                self.request_mut().request_method =
+                    Some(Bstr::from([&ls[..], &method[..]].concat()));
+            } else {
+                self.request_mut().request_method = Some(Bstr::from(method));
+            }
+
+            if let Some(request_method) = &self.request().request_method {
+                self.request_mut().request_method_number =
+                    HtpMethod::new(request_method.as_slice());
+            }
+
+            // Too much performance overhead for fuzzing
+            if ws.iter().any(|&c| c != 0x20) {
+                htp_warn!(
+                    self.logger,
+                    HtpLogCode::METHOD_DELIM_NON_COMPLIANT,
+                    "Request line: non-compliant delimiter between Method and URI"
+                );
+            }
+
+            if remaining.is_empty() {
+                // No, this looks like a HTTP/0.9 request.
+                self.request_mut().is_protocol_0_9 = true;
+                self.request_mut().request_protocol_number = HtpProtocol::V0_9;
+                if self.request().request_method_number == HtpMethod::Unknown {
+                    htp_warn!(
+                        self.logger,
+                        HtpLogCode::REQUEST_LINE_UNKNOWN_METHOD,
+                        "Request line: unknown method only"
+                    );
+                }
+                return Ok(());
+            }
+
+            let uri_protocol_parser = tuple::<_, _, (_, ErrorKind), _>
+            // The URI ends with the first whitespace.
+            ((take_while(|c: u8| c != 0x20),
+              // Ignore whitespace after URI.
+              take_is_space)
+            );
+
+            if let Ok((mut protocol, (mut uri, _))) = uri_protocol_parser(remaining) {
+                if uri.len() == remaining.len() && uri.iter().any(|&c| is_space(c)) {
+                    // warn regardless if we've seen non-compliant chars
+                    htp_warn!(
+                        self.logger,
+                        HtpLogCode::URI_DELIM_NON_COMPLIANT,
+                        "Request line: URI contains non-compliant delimiter"
+                    );
+                    // if we've seen some 'bad' delimiters, we retry with those
+                    let uri_protocol_parser2 =
+                        tuple::<_, _, (_, ErrorKind), _>((take_not_is_space, take_is_space));
+                    if let Ok((protocol2, (uri2, _))) = uri_protocol_parser2(remaining) {
+                        uri = uri2;
+                        protocol = protocol2;
+                    }
+                }
+                self.request_mut().request_uri = Some(Bstr::from(uri));
+                // Is there protocol information available?
+                if protocol.is_empty() {
+                    // No, this looks like a HTTP/0.9 request.
+                    self.request_mut().is_protocol_0_9 = true;
+                    self.request_mut().request_protocol_number = HtpProtocol::V0_9;
+                    if self.request().request_method_number == HtpMethod::Unknown {
+                        htp_warn!(
+                            self.logger,
+                            HtpLogCode::REQUEST_LINE_UNKNOWN_METHOD_NO_PROTOCOL,
+                            "Request line: unknown method and no protocol"
+                        );
+                    }
+                    return Ok(());
+                }
+                // The protocol information continues until the end of the line.
+                self.request_mut().request_protocol = Some(Bstr::from(protocol));
+                self.request_mut().request_protocol_number =
+                    parse_protocol(protocol, &mut self.logger);
+                if self.request().request_method_number == HtpMethod::Unknown
+                    && self.request().request_protocol_number == HtpProtocol::Invalid
+                {
+                    htp_warn!(
+                        self.logger,
+                        HtpLogCode::REQUEST_LINE_UNKNOWN_METHOD_INVALID_PROTOCOL,
+                        "Request line: unknown method and invalid protocol"
+                    );
+                }
+            }
+        }
+        Ok(())
+    }
+}
diff --git a/rust/htp/src/response.rs b/rust/htp/src/response.rs
new file mode 100644 (file)
index 0000000..820f878
--- /dev/null
@@ -0,0 +1,1602 @@
+use crate::{
+    bstr::Bstr,
+    connection_parser::{ConnectionParser, HtpStreamState, ParserData, State},
+    decompressors::{Decompressor, HtpContentEncoding},
+    error::Result,
+    headers::HeaderFlags,
+    hook::DataHook,
+    parsers::{parse_chunked_length, parse_content_length, parse_protocol, parse_status},
+    request::HtpMethod,
+    transaction::{
+        Data, Header, HtpProtocol, HtpRequestProgress, HtpResponseNumber, HtpResponseProgress,
+        HtpTransferCoding,
+    },
+    uri::Uri,
+    util::{
+        chomp, is_chunked_ctl_line, is_line_ignorable, is_space, is_valid_chunked_length_data,
+        take_ascii_whitespace, take_is_space, take_is_space_or_null, take_not_is_space,
+        take_till_eol, take_till_lf, treat_response_line_as_body, FlagOperations, HtpFlags,
+    },
+    HtpStatus,
+};
+use nom::{bytes::streaming::take_till as streaming_take_till, error::ErrorKind, sequence::tuple};
+use std::{
+    cmp::{min, Ordering},
+    mem::take,
+};
+use time::OffsetDateTime;
+
+impl ConnectionParser {
+    /// Sends outstanding connection data to the currently active data receiver hook.
+    fn response_receiver_send_data(&mut self, data: &mut ParserData) -> Result<()> {
+        let data = ParserData::from(data.callback_data());
+        let resp = self.response_mut();
+        if resp.is_none() {
+            return Err(HtpStatus::ERROR);
+        }
+        let mut tx_data = Data::new(resp.unwrap(), &data);
+        if let Some(hook) = &self.response_data_receiver_hook {
+            hook.run_all(self, &mut tx_data)?;
+        } else {
+            return Ok(());
+        };
+        Ok(())
+    }
+
+    /// Finalizes an existing data receiver hook by sending any outstanding data to it. The
+    /// hook is then removed so that it receives no more data.
+    pub(crate) fn response_receiver_finalize_clear(
+        &mut self, input: &mut ParserData,
+    ) -> Result<()> {
+        if self.response_data_receiver_hook.is_none() {
+            return Ok(());
+        }
+        let rc = self.response_receiver_send_data(input);
+        self.response_data_receiver_hook = None;
+        rc
+    }
+
+    /// Configures the data receiver hook.
+    fn response_receiver_set(&mut self, data_receiver_hook: Option<DataHook>) -> Result<()> {
+        self.response_data_receiver_hook = data_receiver_hook;
+        Ok(())
+    }
+
+    /// Handles response parser state changes. At the moment, this function is used only
+    /// to configure data receivers, which are sent raw connection data.
+    fn response_handle_state_change(&mut self, input: &mut ParserData) -> Result<()> {
+        if self.response_state_previous == self.response_state {
+            return Ok(());
+        }
+
+        if self.response_state == State::Headers {
+            let resp = self.response_mut();
+            if resp.is_none() {
+                return Err(HtpStatus::ERROR);
+            }
+            let resp = resp.unwrap();
+            let header_fn = Some(resp.cfg.hook_response_header_data.clone());
+            let trailer_fn = Some(resp.cfg.hook_response_trailer_data.clone());
+            input.reset_callback_start();
+
+            match resp.response_progress {
+                HtpResponseProgress::HEADERS => self.response_receiver_set(header_fn),
+                HtpResponseProgress::TRAILER => self.response_receiver_set(trailer_fn),
+                _ => Ok(()),
+            }?;
+        }
+        // Same comment as in request_handle_state_change(). Below is a copy.
+        // Initially, I had the finalization of raw data sending here, but that
+        // caused the last REQUEST_HEADER_DATA hook to be invoked after the
+        // REQUEST_HEADERS hook -- which I thought made no sense. For that reason,
+        // the finalization is now initiated from the request header processing code,
+        // which is less elegant but provides a better user experience. Having some
+        // (or all) hooks to be invoked on state change might work better.
+        self.response_state_previous = self.response_state;
+        Ok(())
+    }
+
+    /// The maximum amount accepted for buffering is controlled
+    /// by htp_config_t::field_limit.
+    fn check_response_buffer_limit(&mut self, len: usize) -> Result<()> {
+        if len == 0 {
+            return Ok(());
+        }
+        // Check the hard (buffering) limit.
+        let mut newlen: usize = self.response_buf.len().wrapping_add(len);
+        // When calculating the size of the buffer, take into account the
+        // space we're using for the response header buffer.
+        if let Some(response_header) = &self.response_header {
+            newlen = newlen.wrapping_add(response_header.len());
+        }
+        let field_limit = self.cfg.field_limit;
+        if newlen > field_limit {
+            htp_error!(
+                self.logger,
+                HtpLogCode::RESPONSE_FIELD_TOO_LONG,
+                format!(
+                    "Response the buffer limit: size {} limit {}.",
+                    newlen, field_limit
+                )
+            );
+            return Err(HtpStatus::ERROR);
+        }
+        Ok(())
+    }
+
+    /// Consumes bytes until the end of the current line.
+    ///
+    /// Returns HtpStatus::OK on state change, HtpStatus::Error on error, or HtpStatus::DATA
+    /// when more data is needed.
+    pub(crate) fn response_body_chunked_data_end(&mut self, input: &ParserData) -> Result<()> {
+        // TODO We shouldn't really see anything apart from CR and LF,
+        //      so we should warn about anything else.
+        let resp = self.response_mut();
+        if resp.is_none() {
+            return Err(HtpStatus::ERROR);
+        }
+        let resp = resp.unwrap();
+
+        if let Ok((_, line)) = take_till_lf(input.as_slice()) {
+            let len = line.len();
+            self.response_data_consume(input, len);
+            let resp = self.response_mut().unwrap();
+            resp.response_message_len = resp.response_message_len.wrapping_add(len as u64);
+            self.response_state = State::BodyChunkedLength;
+            Ok(())
+        } else {
+            // Advance to end. Dont need to buffer
+            resp.response_message_len = resp.response_message_len.wrapping_add(input.len() as u64);
+            self.response_data_consume(input, input.len());
+            Err(HtpStatus::DATA_BUFFER)
+        }
+    }
+
+    /// Processes a chunk of data.
+    ///
+    /// Returns HtpStatus::OK on state change, HtpStatus::Error on error, or
+    /// HtpStatus::DATA when more data is needed.
+    pub(crate) fn response_body_chunked_data(&mut self, input: &ParserData) -> Result<()> {
+        if self.response_status == HtpStreamState::CLOSED {
+            self.response_state = State::Finalize;
+            // Sends close signal to decompressors
+            return self.response_body_data(input.data());
+        }
+        let bytes_to_consume = min(
+            input.len(),
+            self.response_chunked_length.unwrap_or(0) as usize,
+        );
+        if bytes_to_consume == 0 {
+            return Err(HtpStatus::DATA);
+        }
+        // Consume the data.
+        self.response_body_data(Some(&input.as_slice()[0..bytes_to_consume]))?;
+        // Adjust the counters.
+        self.response_data_consume(input, bytes_to_consume);
+        if let Some(len) = &mut self.response_chunked_length {
+            *len -= bytes_to_consume as u64;
+            // Have we seen the entire chunk?
+            if *len == 0 {
+                self.response_state = State::BodyChunkedDataEnd;
+                return Ok(());
+            }
+        }
+
+        Err(HtpStatus::DATA)
+    }
+
+    /// Extracts chunk length.
+    ///
+    /// Returns Ok(()) on success, Err(HTP_ERROR) on error, or Err(HTP_DATA) when more data is needed.
+    pub(crate) fn response_body_chunked_length(&mut self, input: &mut ParserData) -> Result<()> {
+        let mut data = input.as_slice();
+        loop {
+            let buf_empty = self.response_buf.is_empty();
+            let resp = self.response_mut();
+            if resp.is_none() {
+                return Err(HtpStatus::ERROR);
+            }
+            let resp = resp.unwrap();
+
+            match take_till_lf(data) {
+                Ok((remaining, line)) => {
+                    self.response_data_consume(input, line.len());
+                    if !buf_empty {
+                        self.check_response_buffer_limit(line.len())?;
+                    }
+                    let mut data2 = take(&mut self.response_buf);
+                    data2.add(line);
+                    if is_chunked_ctl_line(&data2) {
+                        let resp = self.response_mut().unwrap();
+                        resp.response_message_len =
+                            (resp.response_message_len).wrapping_add(data2.len() as u64);
+                        //Empty chunk len. Try to continue parsing.
+                        data = remaining;
+                        continue;
+                    }
+                    let resp = self.response_mut().unwrap();
+                    resp.response_message_len =
+                        (resp.response_message_len).wrapping_add(data2.len() as u64);
+
+                    match parse_chunked_length(&data2) {
+                        Ok((len, ext)) => {
+                            self.response_chunked_length = len;
+                            if ext {
+                                htp_warn!(
+                                    self.logger,
+                                    HtpLogCode::RESPONSE_CHUNK_EXTENSION,
+                                    "Response chunk extension"
+                                );
+                            }
+                            // Handle chunk length
+                            if let Some(len) = len {
+                                match len.cmp(&0) {
+                                    Ordering::Equal => {
+                                        // End of data
+                                        self.response_state = State::Headers;
+                                        self.response_mut().unwrap().response_progress =
+                                            HtpResponseProgress::TRAILER
+                                    }
+                                    Ordering::Greater => {
+                                        // More data available.
+                                        self.response_state = State::BodyChunkedData
+                                    }
+                                    _ => {}
+                                }
+                            } else {
+                                return Ok(()); // empty chunk length line, lets try to continue
+                            }
+                        }
+                        Err(_) => {
+                            // unconsume so response_body_identity_stream_close doesn't miss the first bytes
+                            self.response_data_unconsume(input, line.len());
+                            self.response_state = State::BodyIdentityStreamClose;
+                            self.response_mut().unwrap().response_transfer_coding =
+                                HtpTransferCoding::Identity;
+                            htp_error!(
+                                self.logger,
+                                HtpLogCode::INVALID_RESPONSE_CHUNK_LEN,
+                                "Response chunk encoding: Invalid chunk length"
+                            );
+                        }
+                    }
+
+                    return Ok(());
+                }
+                _ => {
+                    // Check if the data we have seen so far is invalid
+                    if buf_empty && !is_valid_chunked_length_data(data) {
+                        // Contains leading junk non hex_ascii data
+                        resp.response_transfer_coding = HtpTransferCoding::Identity;
+                        self.response_state = State::BodyIdentityStreamClose;
+                        htp_error!(
+                            self.logger,
+                            HtpLogCode::INVALID_RESPONSE_CHUNK_LEN,
+                            "Response chunk encoding: Invalid chunk length"
+                        );
+                        return Ok(());
+                    } else {
+                        return self.handle_response_absent_lf(input);
+                    }
+                }
+            }
+        }
+    }
+
+    /// Processes an identity response body of known length.
+    ///
+    /// Returns HtpStatus::OK on state change, HtpStatus::ERROR on error, or
+    /// HtpStatus::DATA when more data is needed.
+    pub(crate) fn response_body_identity_cl_known(&mut self, data: &mut ParserData) -> Result<()> {
+        if self.response_status == HtpStreamState::CLOSED {
+            self.response_state = State::Finalize;
+            // Sends close signal to decompressors
+            return self.response_body_data(data.data());
+        }
+        let left = self.response_body_data_left.ok_or(HtpStatus::ERROR)?;
+        let bytes_to_consume = std::cmp::min(data.len() as u64, left);
+        if bytes_to_consume == 0 {
+            return Err(HtpStatus::DATA);
+        }
+        if data.is_gap() {
+            let resp = self.response_mut();
+            if resp.is_none() {
+                return Err(HtpStatus::ERROR);
+            }
+            let resp = resp.unwrap();
+
+            if resp.response_content_encoding_processing == HtpContentEncoding::None {
+                resp.response_message_len =
+                    resp.response_message_len.wrapping_add(bytes_to_consume);
+                // Create a new gap of the appropriate length
+                let parser_data = ParserData::from(bytes_to_consume as usize);
+                // Send the gap to the data hooks
+                let mut tx_data = Data::new(resp, &parser_data);
+                self.response_run_hook_body_data(&mut tx_data)?;
+            } else {
+                // end decompression on gap
+                self.response_body_data(None)?;
+            }
+        } else {
+            // Consume the data.
+            self.response_body_data(Some(&data.as_slice()[0..bytes_to_consume as usize]))?;
+        }
+        // Adjust the counters.
+        self.response_data_consume(data, bytes_to_consume as usize);
+        self.response_body_data_left = Some(left - bytes_to_consume);
+        // Have we seen the entire response body?
+        if self.response_body_data_left > Some(0) {
+            return Err(HtpStatus::DATA);
+        }
+        // End of response body.
+        self.response_state = State::Finalize;
+        // Sends close signal to decompressors, outputting any partially decompressed data
+        self.response_body_data(None)
+    }
+
+    /// Processes identity response body of unknown length. In this case, we assume the
+    /// response body consumes all data until the end of the stream.
+    ///
+    /// Returns HtpStatus::OK on state change, HtpStatus::ERROR on error, or HtpStatus::DATA
+    /// when more data is needed.
+    pub(crate) fn response_body_identity_stream_close(&mut self, data: &ParserData) -> Result<()> {
+        if !data.is_empty() {
+            // Consume all data from the input buffer.
+            self.response_body_data(data.data())?;
+            // Adjust the counters.
+            self.response_data_consume(data, data.len());
+        }
+        // Have we seen the entire response body?
+        if self.response_status == HtpStreamState::CLOSED {
+            self.response_state = State::Finalize;
+            return Ok(());
+        }
+
+        Err(HtpStatus::DATA)
+    }
+
+    /// Determines presence (and encoding) of a response body.
+    pub(crate) fn response_body_determine(&mut self, input: &mut ParserData) -> Result<()> {
+        // If the request uses the CONNECT method, then not only are we
+        // to assume there's no body, but we need to ignore all
+        // subsequent data in the stream.
+        let response_tx = self.response_mut();
+        if response_tx.is_none() {
+            return Err(HtpStatus::ERROR);
+        }
+        let response_tx = response_tx.unwrap();
+
+        if response_tx.request_method_number == HtpMethod::CONNECT {
+            if response_tx.response_status_number.in_range(200, 299) {
+                // This is a successful CONNECT stream, which means
+                // we need to switch into tunneling mode: on the
+                // request side we'll now probe the tunnel data to see
+                // if we need to parse or ignore it. So on the response
+                // side we wrap up the tx and wait.
+                self.response_state = State::Finalize;
+                // we may have response headers
+                return self.state_response_headers(input);
+            } else if response_tx.response_status_number.eq_num(407) {
+                // proxy telling us to auth
+                if self.request_status != HtpStreamState::ERROR {
+                    self.request_status = HtpStreamState::DATA
+                }
+            } else {
+                // This is a failed CONNECT stream, which means that
+                // we can unblock request parsing
+                if self.request_status != HtpStreamState::ERROR {
+                    self.request_status = HtpStreamState::DATA
+                }
+                // We are going to continue processing this transaction,
+                // adding a note for ourselves to stop at the end (because
+                // we don't want to see the beginning of a new transaction).
+                self.response_data_other_at_tx_end = true
+            }
+        }
+        let response_tx = self.response_mut().unwrap();
+        let cl_opt = response_tx
+            .response_headers
+            .get_nocase_nozero("content-length")
+            .cloned();
+        let te_opt = response_tx
+            .response_headers
+            .get_nocase_nozero("transfer-encoding")
+            .cloned();
+        // Check for "101 Switching Protocol" response.
+        // If it's seen, it means that traffic after empty line following headers
+        // is no longer HTTP. We can treat it similarly to CONNECT.
+        // Unlike CONNECT, however, upgrades from HTTP to HTTP seem
+        // rather unlikely, so don't try to probe tunnel for nested HTTP,
+        // and switch to tunnel mode right away.
+        if response_tx.response_status_number.eq_num(101) {
+            if response_tx
+                .response_headers
+                .get_nocase_nozero("upgrade")
+                .map(|upgrade| upgrade.value.index_of_nocase_nozero("h2c").is_some())
+                .unwrap_or(false)
+            {
+                response_tx.is_http_2_upgrade = true;
+            }
+            if te_opt.is_none() && cl_opt.is_none() {
+                self.response_state = State::Finalize;
+                if self.request_status != HtpStreamState::ERROR {
+                    self.request_status = HtpStreamState::TUNNEL
+                }
+                self.response_status = HtpStreamState::TUNNEL;
+                // we may have response headers
+                return self.state_response_headers(input);
+            } else {
+                htp_warn!(
+                    self.logger,
+                    HtpLogCode::SWITCHING_PROTO_WITH_CONTENT_LENGTH,
+                    "Switching Protocol with Content-Length"
+                );
+            }
+        }
+        // Check for an interim "100 Continue" response. Ignore it if found, and revert back to RES_LINE.
+        else if response_tx.response_status_number.eq_num(100) && te_opt.is_none() {
+            match cl_opt
+                .as_ref()
+                .and_then(|cl| parse_content_length(cl.value.as_slice(), Some(&mut self.logger)))
+            {
+                // 100 Continue with a Content-Length > 0 isn't treated as a 100 Continue,
+                // so we do nothing here.
+                Some(x) if x > 0 => (),
+                // Otherwise we treat it as a continue and prep for the next response
+                _ => {
+                    let response_tx = self.response_mut().unwrap();
+                    if response_tx.seen_100continue {
+                        htp_error!(
+                            self.logger,
+                            HtpLogCode::CONTINUE_ALREADY_SEEN,
+                            "Already seen 100-Continue."
+                        );
+                    }
+                    // Expecting to see another response line next.
+                    self.response_state = State::Line;
+                    let response_tx = self.response_mut().unwrap();
+                    // Ignore any response headers seen so far.
+                    response_tx.response_headers.elements.clear();
+                    response_tx.response_progress = HtpResponseProgress::LINE;
+                    response_tx.seen_100continue = true;
+                    return Ok(());
+                }
+            }
+        }
+        // A request can indicate it waits for headers validation
+        // before sending its body cf
+        // https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Expect
+        else if response_tx.response_status_number.in_range(400, 499)
+            && self.request_content_length > Some(0)
+            && self.request_body_data_left == self.request_content_length
+        {
+            let response_tx = self.response_mut().unwrap();
+            if let Some(expect) = response_tx.request_headers.get_nocase("expect") {
+                if expect.value.eq_slice("100-continue") {
+                    self.request_state = State::Finalize;
+                }
+            }
+        }
+
+        // 1. Any response message which MUST NOT include a message-body
+        //  (such as the 1xx, 204, and 304 responses and any response to a HEAD
+        //  request) is always terminated by the first empty line after the
+        //  header fields, regardless of the entity-header fields present in the
+        //  message.
+        let response_tx = self.response_mut().unwrap();
+        if response_tx.request_method_number == HtpMethod::HEAD {
+            // There's no response body whatsoever
+            response_tx.response_transfer_coding = HtpTransferCoding::NoBody;
+            self.response_state = State::Finalize
+        } else if response_tx.response_status_number.in_range(100, 199)
+            || response_tx.response_status_number.eq_num(204)
+            || response_tx.response_status_number.eq_num(304)
+        {
+            // There should be no response body
+            // but browsers interpret content sent by the server as such
+            if te_opt.is_none() && cl_opt.is_none() {
+                response_tx.response_transfer_coding = HtpTransferCoding::NoBody;
+                self.response_state = State::Finalize
+            } else {
+                htp_warn!(
+                    self.logger,
+                    HtpLogCode::RESPONSE_BODY_UNEXPECTED,
+                    "Unexpected Response body"
+                );
+            }
+        }
+        // Hack condition to check that we do not assume "no body"
+        let mut multipart_byteranges = false;
+        if self.response_state != State::Finalize {
+            // We have a response body
+            let response_tx = self.response_mut().unwrap();
+            let response_content_type = if let Some(ct) = response_tx
+                .response_headers
+                .get_nocase_nozero("content-type")
+            {
+                // TODO Some platforms may do things differently here.
+                let response_content_type = if let Ok((_, ct)) =
+                    streaming_take_till::<_, _, (&[u8], ErrorKind)>(|c| c == b';' || is_space(c))(
+                        &ct.value,
+                    ) {
+                    ct
+                } else {
+                    &ct.value
+                };
+
+                let mut response_content_type = Bstr::from(response_content_type);
+                response_content_type.make_ascii_lowercase();
+                if response_content_type
+                    .index_of_nocase("multipart/byteranges")
+                    .is_some()
+                {
+                    multipart_byteranges = true;
+                }
+                Some(response_content_type)
+            } else {
+                None
+            };
+
+            if response_content_type.is_some() {
+                response_tx.response_content_type = response_content_type;
+            }
+            // 2. If a Transfer-Encoding header field (section 14.40) is present and
+            //   indicates that the "chunked" transfer coding has been applied, then
+            //   the length is defined by the chunked encoding (section 3.6).
+            if let Some(te) =
+                te_opt.and_then(|te| te.value.index_of_nocase_nozero("chunked").and(Some(te)))
+            {
+                if !te.value.cmp_nocase("chunked") {
+                    htp_warn!(
+                        self.logger,
+                        HtpLogCode::RESPONSE_ABNORMAL_TRANSFER_ENCODING,
+                        "Transfer-encoding has abnormal chunked value"
+                    );
+                }
+                // 3. If a Content-Length header field (section 14.14) is present, its
+                // spec says chunked is HTTP/1.1 only, but some browsers accept it
+                // with 1.0 as well
+                let response_tx = self.response_mut().unwrap();
+                if response_tx.response_protocol_number < HtpProtocol::V1_1 {
+                    htp_warn!(
+                        self.logger,
+                        HtpLogCode::RESPONSE_CHUNKED_OLD_PROTO,
+                        "Chunked transfer-encoding on HTTP/0.9 or HTTP/1.0"
+                    );
+                }
+                // If the T-E header is present we are going to use it.
+                let response_tx = self.response_mut().unwrap();
+                response_tx.response_transfer_coding = HtpTransferCoding::Chunked;
+                // We are still going to check for the presence of C-L
+                if cl_opt.is_some() {
+                    // This is a violation of the RFC
+                    response_tx.flags.set(HtpFlags::REQUEST_SMUGGLING)
+                }
+                response_tx.response_progress = HtpResponseProgress::BODY;
+                self.response_state = State::BodyChunkedLength
+            } else if let Some(cl) = cl_opt {
+                //   value in bytes represents the length of the message-body.
+                // We know the exact length
+                response_tx.response_transfer_coding = HtpTransferCoding::Identity;
+                // Check for multiple C-L headers
+                if cl.flags.is_set(HtpFlags::FIELD_REPEATED) {
+                    response_tx.flags.set(HtpFlags::REQUEST_SMUGGLING)
+                }
+                // Get body length
+                let response_content_length =
+                    parse_content_length((*cl.value).as_slice(), Some(&mut self.logger));
+                self.response_mut().unwrap().response_content_length = response_content_length;
+                self.response_content_length = response_content_length;
+                self.response_body_data_left = response_content_length;
+                if let Some(len) = response_content_length {
+                    if len != 0 {
+                        self.response_state = State::BodyIdentityCLKnown;
+                        self.response_mut().unwrap().response_progress = HtpResponseProgress::BODY
+                    } else {
+                        self.response_state = State::Finalize
+                    }
+                } else {
+                    htp_error!(
+                        self.logger,
+                        HtpLogCode::INVALID_CONTENT_LENGTH_FIELD_IN_RESPONSE,
+                        "Invalid C-L field in response"
+                    );
+                    return Err(HtpStatus::ERROR);
+                }
+            } else {
+                // 4. If the message uses the media type "multipart/byteranges", which is
+                //   self-delimiting, then that defines the length. This media type MUST
+                //   NOT be used unless the sender knows that the recipient can parse it;
+                //   the presence in a request of a Range header with multiple byte-range
+                //   specifiers implies that the client can parse multipart/byteranges
+                //   responses.
+                // TODO Handle multipart/byteranges
+                if multipart_byteranges {
+                    htp_error!(
+                        self.logger,
+                        HtpLogCode::RESPONSE_MULTIPART_BYTERANGES,
+                        "C-T multipart/byteranges in responses not supported"
+                    );
+                    return Err(HtpStatus::ERROR);
+                }
+                // 5. By the server closing the connection. (Closing the connection
+                //   cannot be used to indicate the end of a request body, since that
+                //   would leave no possibility for the server to send back a response.)
+                response_tx.response_transfer_coding = HtpTransferCoding::Identity;
+                response_tx.response_progress = HtpResponseProgress::BODY;
+                self.response_state = State::BodyIdentityStreamClose;
+                self.response_body_data_left = None
+            }
+        }
+        // NOTE We do not need to check for short-style HTTP/0.9 requests here because
+        //      that is done earlier, before response line parsing begins
+        self.state_response_headers(input)
+    }
+
+    /// Parses response line.
+    ///
+    /// Returns HtpStatus::OK on state change, HtpStatus::ERROR on error, or HtpStatus::DATA
+    /// when more data is needed.
+    pub(crate) fn response_line(&mut self, input: &ParserData) -> Result<()> {
+        match take_till_eol(input.as_slice()) {
+            Ok((_, (line, _))) => {
+                // We have a line ending, so consume the input
+                // and grab any buffered data.
+                let mut data = take(&mut self.response_buf);
+                data.add(line);
+                self.response_data_consume(input, line.len());
+                self.response_line_complete(data.as_slice(), input)
+            }
+            _ => {
+                if self.response_status == HtpStreamState::CLOSED {
+                    let mut data = take(&mut self.response_buf);
+                    data.add(input.as_slice());
+                    self.response_data_consume(input, input.len());
+                    self.response_line_complete(data.as_slice(), input)
+                } else {
+                    self.handle_response_absent_lf(input)
+                }
+            }
+        }
+    }
+
+    /// Parse the complete response line.
+    ///
+    /// Returns OK on state change, ERROR on error, or HtpStatus::DATA_BUFFER
+    /// when more data is needed.
+    fn response_line_complete(&mut self, line: &[u8], input: &ParserData) -> Result<()> {
+        self.check_response_buffer_limit(line.len())?;
+        if line.is_empty() {
+            return Err(HtpStatus::DATA);
+        }
+        let response_tx = self.response_mut();
+        if response_tx.is_none() {
+            return Err(HtpStatus::ERROR);
+        }
+        if is_line_ignorable(self.cfg.server_personality, line) {
+            if self.response_status == HtpStreamState::CLOSED {
+                self.response_state = State::Finalize
+            }
+            // We have an empty/whitespace line, which we'll note, ignore and move on
+            let response_tx = self.response_mut().unwrap();
+            response_tx.response_ignored_lines = response_tx.response_ignored_lines.wrapping_add(1);
+            // TODO How many lines are we willing to accept?
+            // Start again
+            return Ok(());
+        }
+        // Deallocate previous response line allocations, which we would have on a 100 response.
+        let response_tx = self.response_mut().unwrap();
+        response_tx.response_line = None;
+        response_tx.response_protocol = None;
+        response_tx.response_status = None;
+        response_tx.response_message = None;
+
+        // Process response line.
+        // If the response line is invalid, determine if it _looks_ like
+        // a response line. If it does not look like a line, process the
+        // data as a response body because that is what browsers do.
+        if treat_response_line_as_body(line) {
+            // if we have a next line beginning with H, skip this one
+            if input.len() > 1 && (input.as_slice()[0] == b'H' || chomp(line).len() <= 2) {
+                response_tx.response_ignored_lines =
+                    response_tx.response_ignored_lines.wrapping_add(1);
+                return Ok(());
+            }
+            response_tx.response_content_encoding_processing = HtpContentEncoding::None;
+            self.response_body_data(Some(line))?;
+            // Continue to process response body. Because we don't have
+            // any headers to parse, we assume the body continues until
+            // the end of the stream.
+            // Have we seen the entire response body?
+            if input.is_empty() {
+                let response_tx = self.response_mut().unwrap();
+                response_tx.response_transfer_coding = HtpTransferCoding::Identity;
+                response_tx.response_progress = HtpResponseProgress::BODY;
+                self.response_body_data_left = None;
+                self.response_state = State::Finalize
+            }
+            return Ok(());
+        }
+        self.parse_response_line(line)?;
+        self.state_response_line()?;
+        // Move on to the next phase.
+        self.response_state = State::Headers;
+        self.response_mut().unwrap().response_progress = HtpResponseProgress::HEADERS;
+        Ok(())
+    }
+
+    /// Parses the response line.
+    pub(crate) fn parse_response_line(&mut self, response_line: &[u8]) -> Result<()> {
+        let response_tx = self.response_mut();
+        if response_tx.is_none() {
+            return Err(HtpStatus::ERROR);
+        }
+        let response_tx = response_tx.unwrap();
+
+        response_tx.response_line = Some(Bstr::from(response_line));
+        response_tx.response_protocol_number = HtpProtocol::Invalid;
+        response_tx.response_status = None;
+        response_tx.response_status_number = HtpResponseNumber::Invalid;
+        response_tx.response_message = None;
+
+        let mut response_line_parser = tuple((
+            take_is_space_or_null,
+            take_not_is_space,
+            take_is_space,
+            take_not_is_space,
+            take_ascii_whitespace(),
+        ));
+
+        let (message, (_ls, response_protocol, ws1, status_code, ws2)) =
+            response_line_parser(response_line)?;
+        if response_protocol.is_empty() {
+            return Ok(());
+        }
+
+        response_tx.response_protocol = Some(Bstr::from(response_protocol));
+        self.response_mut().unwrap().response_protocol_number =
+            parse_protocol(response_protocol, &mut self.logger);
+
+        if ws1.is_empty() || status_code.is_empty() {
+            return Ok(());
+        }
+
+        let response_tx = self.response_mut().unwrap();
+        response_tx.response_status = Some(Bstr::from(status_code));
+        response_tx.response_status_number = parse_status(status_code);
+
+        if ws2.is_empty() {
+            return Ok(());
+        }
+
+        response_tx.response_message = Some(Bstr::from(chomp(message)));
+        Ok(())
+    }
+
+    /// Response header parser.
+    ///
+    ///Returns a tuple of the unparsed data and a boolean indicating if the EOH was seen.
+    fn parse_response_headers<'a>(&mut self, data: &'a [u8]) -> Result<(&'a [u8], bool)> {
+        let resp = self.response_mut();
+        if resp.is_none() {
+            return Err(HtpStatus::ERROR);
+        }
+
+        let rc = resp.unwrap().response_header_parser.headers()(data);
+        if let Ok((remaining, (headers, eoh))) = rc {
+            for h in headers {
+                let mut flags = 0;
+                let name_flags = &h.name.flags;
+                let value_flags = &h.value.flags;
+                if value_flags.is_set(HeaderFlags::DEFORMED_EOL)
+                    || name_flags.is_set(HeaderFlags::DEFORMED_EOL)
+                {
+                    htp_warn!(
+                        self.logger,
+                        HtpLogCode::DEFORMED_EOL,
+                        "Weird response end of lines mix"
+                    );
+                }
+                // Ignore LWS after field-name.
+                if name_flags.is_set(HeaderFlags::NAME_TRAILING_WHITESPACE) {
+                    htp_warn_once!(
+                        self.logger,
+                        HtpLogCode::RESPONSE_INVALID_LWS_AFTER_NAME,
+                        "Request field invalid: LWS after name",
+                        self.response_mut().unwrap().flags,
+                        flags,
+                        HtpFlags::FIELD_INVALID
+                    );
+                }
+                //If there was leading whitespace, probably was invalid folding.
+                if name_flags.is_set(HeaderFlags::NAME_LEADING_WHITESPACE) {
+                    htp_warn_once!(
+                        self.logger,
+                        HtpLogCode::INVALID_RESPONSE_FIELD_FOLDING,
+                        "Invalid response field folding",
+                        self.response_mut().unwrap().flags,
+                        flags,
+                        HtpFlags::INVALID_FOLDING
+                    );
+                    flags.set(HtpFlags::FIELD_INVALID);
+                }
+                // Check that field-name is a token
+                if name_flags.is_set(HeaderFlags::NAME_NON_TOKEN_CHARS) {
+                    // Incorrectly formed header name.
+                    htp_warn_once!(
+                        self.logger,
+                        HtpLogCode::RESPONSE_HEADER_NAME_NOT_TOKEN,
+                        "Response header name is not a token",
+                        self.response_mut().unwrap().flags,
+                        flags,
+                        HtpFlags::FIELD_INVALID
+                    );
+                }
+                // No colon?
+                if name_flags.is_set(HeaderFlags::MISSING_COLON) {
+                    // We handle this case as a header with an empty name, with the value equal
+                    // to the entire input string.
+                    // TODO Apache will respond to this problem with a 400.
+                    // Now extract the name and the value
+                    htp_warn_once!(
+                        self.logger,
+                        HtpLogCode::RESPONSE_FIELD_MISSING_COLON,
+                        "Response field invalid: colon missing",
+                        self.response_mut().unwrap().flags,
+                        flags,
+                        HtpFlags::FIELD_UNPARSEABLE
+                    );
+                    flags.set(HtpFlags::FIELD_INVALID);
+                } else if name_flags.is_set(HeaderFlags::NAME_EMPTY) {
+                    // Empty header name.
+                    htp_warn_once!(
+                        self.logger,
+                        HtpLogCode::RESPONSE_INVALID_EMPTY_NAME,
+                        "Response field invalid: empty name",
+                        self.response_mut().unwrap().flags,
+                        flags,
+                        HtpFlags::FIELD_INVALID
+                    );
+                }
+                self.process_response_header(Header::new_with_flags(
+                    h.name.name.into(),
+                    h.value.value.into(),
+                    flags,
+                ))?;
+            }
+            Ok((remaining, eoh))
+        } else {
+            Ok((data, false))
+        }
+    }
+
+    /// Response header line(s) processor, which assembles folded lines
+    /// into a single buffer before invoking the parsing function.
+    fn process_response_header(&mut self, header: Header) -> Result<()> {
+        let mut repeated = false;
+        let hl = self.cfg.number_headers_limit as usize;
+        let resp = self.response_mut();
+        if resp.is_none() {
+            return Err(HtpStatus::ERROR);
+        }
+        let resp = resp.unwrap();
+
+        let reps = resp.response_header_repetitions;
+        let mut update_reps = false;
+        // Do we already have a header with the same name?
+        if let Some(h_existing) = resp.response_headers.get_nocase_mut(header.name.as_slice()) {
+            if !h_existing.flags.is_set(HeaderFlags::FIELD_REPEATED) {
+                // This is the second occurence for this header.
+                repeated = true;
+            } else if reps < 64 {
+                update_reps = true;
+            } else {
+                return Ok(());
+            }
+            h_existing.flags.set(HeaderFlags::FIELD_REPEATED);
+            // For simplicity reasons, we count the repetitions of all headers
+            // Having multiple C-L headers is against the RFC but many
+            // browsers ignore the subsequent headers if the values are the same.
+            if header.name.cmp_nocase("Content-Length") {
+                // Don't use string comparison here because we want to
+                // ignore small formatting differences.
+                let existing_cl = parse_content_length(&h_existing.value, None);
+                let new_cl = parse_content_length(&(header.value), None);
+                if existing_cl.is_none() || new_cl.is_none() || existing_cl != new_cl {
+                    // Ambiguous response C-L value.
+                    htp_warn!(
+                        self.logger,
+                        HtpLogCode::DUPLICATE_CONTENT_LENGTH_FIELD_IN_RESPONSE,
+                        "Ambiguous response C-L value"
+                    );
+                }
+            } else {
+                // Add to the existing header.
+                h_existing.value.extend_from_slice(b", ");
+                h_existing.value.extend_from_slice(header.value.as_slice());
+            }
+        } else {
+            if resp.response_headers.elements.len() > hl {
+                if !resp.flags.is_set(HtpFlags::HEADERS_TOO_MANY) {
+                    htp_warn!(
+                        self.logger,
+                        HtpLogCode::RESPONSE_TOO_MANY_HEADERS,
+                        "Too many response headers"
+                    );
+                    let resp = self.response_mut().unwrap();
+                    resp.flags.set(HtpFlags::HEADERS_TOO_MANY);
+                }
+                return Err(HtpStatus::ERROR);
+            }
+            resp.response_headers.elements.push(header);
+        }
+        let resp = self.response_mut().unwrap();
+        if update_reps {
+            resp.response_header_repetitions = resp.response_header_repetitions.wrapping_add(1)
+        }
+        if repeated {
+            htp_warn!(
+                self.logger,
+                HtpLogCode::RESPONSE_HEADER_REPETITION,
+                "Repetition for header"
+            );
+        }
+        Ok(())
+    }
+    /// Parses response headers.
+    ///
+    /// Returns HtpStatus::OK on state change, HtpStatus::ERROR on error, or HtpStatus::DATA when more data is needed.
+    pub(crate) fn response_headers(&mut self, input: &mut ParserData) -> Result<()> {
+        let response_index = self.response_index();
+        if self.response_status == HtpStreamState::CLOSED {
+            let resp = self.response_mut();
+            if resp.is_none() {
+                return Err(HtpStatus::ERROR);
+            }
+            let resp = resp.unwrap();
+            resp.response_header_parser.set_complete(true);
+            // Parse previous header, if any.
+            if let Some(response_header) = self.response_header.take() {
+                self.parse_response_headers(response_header.as_slice())?;
+            }
+            // Finalize sending raw trailer data.
+            self.response_receiver_finalize_clear(input)?;
+            // Run hook response_TRAILER
+            self.cfg
+                .hook_response_trailer
+                .clone()
+                .run_all(self, response_index)?;
+            self.response_state = State::Finalize;
+            return Ok(());
+        }
+        if let Ok((_, line)) = take_till_lf(input.as_slice()) {
+            if self.response_header.is_some() {
+                self.check_response_buffer_limit(line.len())?;
+            }
+        } else {
+            let data = input.as_slice();
+            self.response_data_consume(input, data.len());
+            self.check_response_buffer_limit(data.len())?;
+            if let Some(rh) = &mut self.response_header {
+                rh.extend_from_slice(data);
+            } else {
+                self.response_header = Some(Bstr::from(data));
+            }
+            return Err(HtpStatus::DATA_BUFFER);
+        }
+        let response_header = if let Some(mut response_header) = self.response_header.take() {
+            response_header.add(input.as_slice());
+            response_header
+        } else {
+            Bstr::from(input.as_slice())
+        };
+
+        let (remaining, eoh) = self.parse_response_headers(response_header.as_slice())?;
+        //TODO: Update the response state machine so that we don't have to have this EOL check
+        let eol = remaining.len() == response_header.len()
+            && (remaining.eq(b"\r\n") || remaining.eq(b"\n"));
+        // If remaining is EOL or header parsing saw EOH this is end of headers
+        if eoh || eol {
+            if eol {
+                //Consume the EOL so it isn't included in data processing
+                self.response_data_consume(input, input.len());
+            } else if remaining.len() <= input.len() {
+                self.response_data_consume(input, input.len() - remaining.len());
+            }
+            // We've seen all response headers. At terminator.
+            self.response_state =
+                if self.response().unwrap().response_progress == HtpResponseProgress::HEADERS {
+                    // Response headers.
+                    // The next step is to determine if this response has a body.
+                    State::BodyDetermine
+                } else {
+                    // Response trailer.
+                    // Finalize sending raw trailer data.
+                    self.response_receiver_finalize_clear(input)?;
+                    // Run hook response_TRAILER.
+                    self.cfg
+                        .hook_response_trailer
+                        .clone()
+                        .run_all(self, response_index)?;
+                    // The next step is to finalize this response.
+                    State::Finalize
+                };
+            Ok(())
+        } else {
+            self.response_data_consume(input, input.len());
+            self.check_response_buffer_limit(remaining.len())?;
+            let remaining = Bstr::from(remaining);
+            self.response_header.replace(remaining);
+            Err(HtpStatus::DATA_BUFFER)
+        }
+    }
+
+    /// Consumes response body data.
+    /// This function assumes that handling of chunked encoding is implemented
+    /// by the container. When you're done submitting body data, invoking a state
+    /// change (to RESPONSE) will finalize any processing that might be pending.
+    ///
+    /// The response body data will be decompressed if two conditions are met: one,
+    /// decompression is enabled in configuration and two, if the response headers
+    /// indicate compression. Alternatively, you can control decompression from
+    /// a RESPONSE_HEADERS callback, by setting tx->response_content_encoding either
+    /// to COMPRESSION_NONE (to disable compression), or to one of the supported
+    /// decompression algorithms.
+    ///
+    /// Returns HtpStatus::OK on success or HtpStatus::ERROR if the request transaction
+    /// is invalid or response body data hook fails.
+    pub(crate) fn response_body_data(&mut self, data: Option<&[u8]>) -> Result<()> {
+        // None data is used to indicate the end of response body.
+        // Keep track of body size before decompression.
+        let resp = self.response_mut();
+        if resp.is_none() {
+            return Err(HtpStatus::ERROR);
+        }
+        let resp = resp.unwrap();
+
+        resp.response_message_len = resp
+            .response_message_len
+            .wrapping_add(data.unwrap_or(b"").len() as u64);
+
+        match resp.response_content_encoding_processing {
+            HtpContentEncoding::Gzip
+            | HtpContentEncoding::Deflate
+            | HtpContentEncoding::Zlib
+            | HtpContentEncoding::Lzma => {
+                // Send data buffer to the decompressor if it exists
+                if resp.response_decompressor.is_none() && data.is_none() {
+                    return Ok(());
+                }
+                let mut decompressor = resp.response_decompressor.take().ok_or(HtpStatus::ERROR)?;
+                if let Some(data) = data {
+                    let _ = decompressor.decompress(data);
+
+                    if decompressor.time_spent()
+                        > self.cfg.compression_options.get_time_limit() as u64
+                    {
+                        htp_error!(
+                            self.logger,
+                            HtpLogCode::COMPRESSION_BOMB,
+                            format!(
+                                "Compression bomb: spent {} us decompressing",
+                                decompressor.time_spent(),
+                            )
+                        );
+                        decompressor.set_passthrough(true);
+                    }
+                    // put the decompressor back in its slot
+                    self.response_mut()
+                        .unwrap()
+                        .response_decompressor
+                        .replace(decompressor);
+                } else {
+                    // don't put the decompressor back in its slot
+                    // ignore errors
+                    let _ = decompressor.finish();
+                }
+            }
+            HtpContentEncoding::None => {
+                // When there's no decompression, response_entity_len.
+                // is identical to response_message_len.
+                let data = ParserData::from(data);
+                let mut tx_data = Data::new(resp, &data);
+                resp.response_entity_len =
+                    resp.response_entity_len.wrapping_add(tx_data.len() as u64);
+                self.response_run_hook_body_data(&mut tx_data)?;
+            }
+        }
+        Ok(())
+    }
+
+    /// Initialize the response decompression engine. We can deal with three
+    /// scenarios:
+    ///
+    /// 1. Decompression is enabled, compression indicated in headers, and we decompress.
+    ///
+    /// 2. As above, but the user disables decompression by setting response_content_encoding
+    ///    to COMPRESSION_NONE.
+    ///
+    /// 3. Decompression is disabled and we do not attempt to enable it, but the user
+    ///    forces decompression by setting response_content_encoding to one of the
+    ///    supported algorithms.
+    pub(crate) fn response_initialize_decompressors(&mut self) -> Result<()> {
+        let resp = self.response_mut();
+        if resp.is_none() {
+            return Err(HtpStatus::ERROR);
+        }
+        let resp = resp.unwrap();
+
+        let ce = resp
+            .response_headers
+            .get_nocase_nozero("content-encoding")
+            .map(|val| val.value.clone());
+        // Process multiple encodings if there is no match on fast path
+        let mut slow_path = false;
+
+        // Fast path - try to match directly on the encoding value
+        resp.response_content_encoding = if let Some(ce) = &ce {
+            if ce.cmp_nocase_nozero(b"gzip") || ce.cmp_nocase_nozero(b"x-gzip") {
+                HtpContentEncoding::Gzip
+            } else if ce.cmp_nocase_nozero(b"deflate") || ce.cmp_nocase_nozero(b"x-deflate") {
+                HtpContentEncoding::Deflate
+            } else if ce.cmp_nocase_nozero(b"lzma") {
+                HtpContentEncoding::Lzma
+            } else if ce.cmp_nocase_nozero(b"inflate") || ce.cmp_nocase_nozero(b"none") {
+                HtpContentEncoding::None
+            } else {
+                slow_path = true;
+                HtpContentEncoding::None
+            }
+        } else {
+            HtpContentEncoding::None
+        };
+
+        // Configure decompression, if enabled in the configuration.
+        resp.response_content_encoding_processing = resp.response_content_encoding;
+
+        let response_content_encoding_processing = resp.response_content_encoding_processing;
+        let compression_options = self.cfg.compression_options;
+        match &response_content_encoding_processing {
+            HtpContentEncoding::Gzip
+            | HtpContentEncoding::Deflate
+            | HtpContentEncoding::Zlib
+            | HtpContentEncoding::Lzma => {
+                self.response_prepend_decompressor(response_content_encoding_processing)?;
+            }
+            HtpContentEncoding::None => {
+                if slow_path {
+                    if let Some(ce) = &ce {
+                        let mut layers = 0;
+                        let mut lzma_layers = 0;
+                        for encoding in ce.split(|c| *c == b',' || *c == b' ') {
+                            if encoding.is_empty() {
+                                continue;
+                            }
+                            layers += 1;
+
+                            if let Some(limit) = compression_options.get_layer_limit() {
+                                // decompression layer depth check
+                                if layers > limit {
+                                    htp_warn!(
+                                        self.logger,
+                                        HtpLogCode::TOO_MANY_ENCODING_LAYERS,
+                                        "Too many response content encoding layers"
+                                    );
+                                    break;
+                                }
+                            }
+
+                            let encoding = Bstr::from(encoding);
+                            let encoding = if encoding.index_of_nocase(b"gzip").is_some() {
+                                if !(encoding.cmp_slice(b"gzip") == Ordering::Equal
+                                    || encoding.cmp_slice(b"x-gzip") == Ordering::Equal)
+                                {
+                                    htp_warn!(
+                                        self.logger,
+                                        HtpLogCode::ABNORMAL_CE_HEADER,
+                                        "C-E gzip has abnormal value"
+                                    );
+                                }
+                                HtpContentEncoding::Gzip
+                            } else if encoding.index_of_nocase(b"deflate").is_some() {
+                                if !(encoding.cmp_slice(b"deflate") == Ordering::Equal
+                                    || encoding.cmp_slice(b"x-deflate") == Ordering::Equal)
+                                {
+                                    htp_warn!(
+                                        self.logger,
+                                        HtpLogCode::ABNORMAL_CE_HEADER,
+                                        "C-E deflate has abnormal value"
+                                    );
+                                }
+                                HtpContentEncoding::Deflate
+                            } else if encoding.cmp_slice(b"lzma") == Ordering::Equal {
+                                lzma_layers += 1;
+                                if let Some(limit) = compression_options.get_lzma_layers() {
+                                    // Lzma layer depth check
+                                    if lzma_layers > limit {
+                                        htp_warn!(
+                                            self.logger,
+                                            HtpLogCode::RESPONSE_TOO_MANY_LZMA_LAYERS,
+                                            "Too many response content encoding lzma layers"
+                                        );
+                                        break;
+                                    }
+                                }
+                                HtpContentEncoding::Lzma
+                            } else if encoding.cmp_slice(b"inflate") == Ordering::Equal
+                                || encoding.cmp_slice(b"none") == Ordering::Equal
+                            {
+                                HtpContentEncoding::None
+                            } else {
+                                htp_warn!(
+                                    self.logger,
+                                    HtpLogCode::ABNORMAL_CE_HEADER,
+                                    "C-E unknown setting"
+                                );
+                                HtpContentEncoding::None
+                            };
+
+                            self.response_prepend_decompressor(encoding)?;
+                        }
+                    }
+                }
+            }
+        }
+        Ok(())
+    }
+
+    fn response_decompressor_callback(&mut self, data: Option<&[u8]>) -> std::io::Result<usize> {
+        // If no data is passed, call the hooks with NULL to signify the end of the
+        // response body.
+        let parser_data = ParserData::from(data);
+        let compression_options = self.cfg.compression_options;
+        let resp = self.response_mut().unwrap();
+        let mut tx_data = Data::new(resp, &parser_data);
+
+        // Keep track of actual response body length.
+        resp.response_entity_len = resp.response_entity_len.wrapping_add(tx_data.len() as u64);
+
+        // Invoke all callbacks.
+        self.response_run_hook_body_data(&mut tx_data)
+            .map_err(|_| std::io::Error::new(std::io::ErrorKind::Other, "body data hook failed"))?;
+        let resp = self.response_mut().unwrap();
+        if let Some(decompressor) = &mut resp.response_decompressor {
+            if decompressor.callback_inc() % compression_options.get_time_test_freq() == 0 {
+                if let Some(time_spent) = decompressor.timer_reset() {
+                    if time_spent > compression_options.get_time_limit() as u64 {
+                        decompressor.set_passthrough(true);
+                        htp_error!(
+                            self.logger,
+                            HtpLogCode::COMPRESSION_BOMB,
+                            format!("Compression bomb: spent {} us decompressing", time_spent)
+                        );
+                    }
+                }
+            }
+        }
+
+        // output > ratio * input ?
+        let ratio = compression_options.get_bomb_ratio();
+        let resp = self.response_mut().unwrap();
+        let exceeds_ratio = if let Some(ratio) = resp.response_message_len.checked_mul(ratio) {
+            resp.response_entity_len > ratio
+        } else {
+            // overflow occured
+            true
+        };
+
+        let bomb_limit = compression_options.get_bomb_limit();
+        let response_entity_len = resp.response_entity_len;
+        let response_message_len = resp.response_message_len;
+        if response_entity_len > bomb_limit && exceeds_ratio {
+            htp_error!(
+                self.logger,
+                HtpLogCode::COMPRESSION_BOMB,
+                format!(
+                    "Compression bomb: decompressed {} bytes out of {}",
+                    response_entity_len, response_message_len,
+                )
+            );
+            return Err(std::io::Error::new(
+                std::io::ErrorKind::Other,
+                "compression_bomb_limit reached",
+            ));
+        }
+        Ok(tx_data.len())
+    }
+
+    /// Prepend response decompressor
+    fn response_prepend_decompressor(&mut self, encoding: HtpContentEncoding) -> Result<()> {
+        let compression_options = self.cfg.compression_options;
+        if encoding != HtpContentEncoding::None {
+            // ensured by caller
+            let resp = self.response_mut().unwrap();
+            if let Some(decompressor) = resp.response_decompressor.take() {
+                let decompressor = decompressor.prepend(encoding, compression_options)?;
+                resp.response_decompressor.replace(decompressor);
+            } else {
+                // The processing encoding will be the first one encountered
+                resp.response_content_encoding_processing = encoding;
+
+                // Add the callback first because it will be called last in
+                // the chain of writers
+
+                // TODO: fix lifetime error and remove this line!
+                let connp_ptr = self as *mut Self;
+                let decompressor = unsafe {
+                    Decompressor::new_with_callback(
+                        encoding,
+                        Box::new(move |data: Option<&[u8]>| -> std::io::Result<usize> {
+                            (*connp_ptr).response_decompressor_callback(data)
+                        }),
+                        compression_options,
+                    )?
+                };
+                self.response_mut()
+                    .unwrap()
+                    .response_decompressor
+                    .replace(decompressor);
+            }
+        }
+        Ok(())
+    }
+
+    /// Finalizes response parsing.
+    pub(crate) fn response_finalize(&mut self, input: &mut ParserData) -> Result<()> {
+        if input.is_gap() {
+            return self.state_response_complete(input);
+        }
+        let mut work = input.as_slice();
+        if self.response_status != HtpStreamState::CLOSED {
+            let response_next_byte = input.as_slice().first();
+            if response_next_byte.is_none() {
+                return self.state_response_complete(input);
+            }
+            let lf = response_next_byte
+                .map(|byte| *byte == b'\n')
+                .unwrap_or(false);
+            if !lf {
+                if let Ok((_, line)) = take_till_lf(work) {
+                    self.response_data_consume(input, line.len());
+                    work = line;
+                } else {
+                    return self.handle_response_absent_lf(input);
+                }
+            } else {
+                self.response_data_consume(input, work.len());
+            }
+        }
+        if !self.response_buf.is_empty() {
+            self.check_response_buffer_limit(work.len())?;
+        }
+        let mut data = take(&mut self.response_buf);
+        let buf_len = data.len();
+        data.add(work);
+
+        if data.is_empty() {
+            //closing
+            return self.state_response_complete(input);
+        }
+        if treat_response_line_as_body(&data) {
+            // Interpret remaining bytes as body data
+            htp_warn!(
+                self.logger,
+                HtpLogCode::RESPONSE_BODY_UNEXPECTED,
+                "Unexpected response body"
+            );
+            return self.response_body_data(Some(data.as_slice()));
+        }
+        // didnt use data, restore
+        self.response_buf.add(&data[0..buf_len]);
+        //unread last end of line so that RES_LINE works
+        self.response_data_unconsume(input, data.len());
+        self.state_response_complete(input)
+    }
+
+    /// The response idle state will initialize response processing, as well as
+    /// finalize each transactions after we are done with it.
+    ///
+    /// Returns HtpStatus::OK on state change, HtpStatus::ERROR on error, or HtpStatus::DATA
+    /// when more data is needed.
+    pub(crate) fn response_idle(&mut self, input: &ParserData) -> Result<()> {
+        // We want to start parsing the next response (and change
+        // the state from IDLE) only if there's at least one
+        // byte of data available. Otherwise we could be creating
+        // new structures even if there's no more data on the
+        // connection.
+        if input.is_empty() {
+            return Err(HtpStatus::DATA);
+        }
+
+        // Parsing a new response
+        // Log if we have not seen the corresponding request yet
+        let resp = self.response();
+        if resp.is_none() {
+            return Err(HtpStatus::ERROR);
+        }
+        if resp.unwrap().request_progress == HtpRequestProgress::NOT_STARTED {
+            htp_error!(
+                self.logger,
+                HtpLogCode::UNABLE_TO_MATCH_RESPONSE_TO_REQUEST,
+                "Unable to match response to request"
+            );
+            if self.request_state == State::Finalize {
+                let _ = self.state_request_complete(&mut ParserData::from(None));
+            }
+            let tx = self.response_mut();
+            if tx.is_none() {
+                return Err(HtpStatus::ERROR);
+            }
+            let tx = tx.unwrap();
+
+            let uri = Uri {
+                path: Some(Bstr::from("/libhtp::request_uri_not_seen")),
+                ..Default::default()
+            };
+            tx.request_uri = uri.path.clone();
+            tx.parsed_uri = Some(uri);
+            tx.request_progress = HtpRequestProgress::COMPLETE;
+            self.request_next();
+        }
+        self.response_content_length = None;
+        self.response_body_data_left = None;
+        self.state_response_start()
+    }
+
+    /// Run the RESPONSE_BODY_DATA hook.
+    fn response_run_hook_body_data(&mut self, d: &mut Data) -> Result<()> {
+        // Do not invoke callbacks with an empty data chunk.
+        if d.is_empty() {
+            return Ok(());
+        }
+        let resp = self.response().unwrap();
+        // Run transaction hooks first
+        resp.hook_response_body_data.clone().run_all(self, d)?;
+        // Run configuration hooks second
+        self.cfg.hook_response_body_data.run_all(self, d)?;
+        Ok(())
+    }
+
+    /// Process a chunk of outbound (server or response) data.
+    pub(crate) fn response_data(
+        &mut self, mut chunk: ParserData, timestamp: Option<OffsetDateTime>,
+    ) -> HtpStreamState {
+        // Reset consumed data tracker
+        self.response_bytes_consumed = 0;
+
+        // Return if the connection is in stop state
+        if self.response_status == HtpStreamState::STOP {
+            htp_info!(
+                self.logger,
+                HtpLogCode::PARSER_STATE_ERROR,
+                "Outbound parser is in HTP_STREAM_STATE_STOP"
+            );
+            return HtpStreamState::STOP;
+        }
+        // Return if the connection has had a fatal error
+        if self.response_status == HtpStreamState::ERROR {
+            htp_error!(
+                self.logger,
+                HtpLogCode::PARSER_STATE_ERROR,
+                "Outbound parser is in HTP_STREAM_STATE_ERROR"
+            );
+            return HtpStreamState::ERROR;
+        }
+
+        // If the length of the supplied data chunk is zero, proceed
+        // only if the stream has been closed. We do not allow zero-sized
+        // chunks in the API, but we use it internally to force the parsers
+        // to finalize parsing.
+        if chunk.is_empty() && self.response_status != HtpStreamState::CLOSED {
+            htp_error!(
+                self.logger,
+                HtpLogCode::ZERO_LENGTH_DATA_CHUNKS,
+                "Zero-length data chunks are not allowed"
+            );
+            return HtpStreamState::CLOSED;
+        }
+        // Remember the timestamp of the current response data chunk
+        if let Some(timestamp) = timestamp {
+            self.response_timestamp = timestamp;
+        }
+
+        // Store the current chunk information
+        self.conn.track_outbound_data(chunk.len());
+        // Return without processing any data if the stream is in tunneling
+        // mode (which it would be after an initial CONNECT transaction.
+        if self.response_status == HtpStreamState::TUNNEL {
+            return HtpStreamState::TUNNEL;
+        }
+        if chunk.is_gap() {
+            // Mark the transaction as having a gap
+            let idx = self.request_index();
+            let resp = self.response_mut();
+            if resp.is_none() {
+                return HtpStreamState::ERROR;
+            }
+            let resp = resp.unwrap();
+
+            resp.flags.set(HtpFlags::RESPONSE_MISSING_BYTES);
+
+            if idx == 0 && resp.response_progress == HtpResponseProgress::NOT_STARTED {
+                // We have a leading gap on the first transaction.
+                return HtpStreamState::CLOSED;
+            }
+        }
+
+        loop
+        // Invoke a processor, in a loop, until an error
+        // occurs or until we run out of data. Many processors
+        // will process a request, each pointing to the next
+        // processor that needs to run.
+        // Return if there's been an error
+        // or if we've run out of data. We are relying
+        // on processors to add error messages, so we'll
+        // keep quiet here.
+        {
+            if chunk.is_gap()
+                && self.response_state != State::BodyIdentityCLKnown
+                && self.response_state != State::BodyIdentityStreamClose
+                && self.response_state != State::Finalize
+            {
+                htp_error!(
+                    self.logger,
+                    HtpLogCode::INVALID_GAP,
+                    "Gaps are not allowed during this state"
+                );
+                return HtpStreamState::CLOSED;
+            }
+            let mut rc = self.handle_response_state(&mut chunk);
+
+            if rc.is_ok() {
+                if self.response_status == HtpStreamState::TUNNEL {
+                    return HtpStreamState::TUNNEL;
+                }
+                rc = self.response_handle_state_change(&mut chunk);
+            }
+            match rc {
+                // Continue looping.
+                Ok(_) => {}
+                // Do we need more data?
+                Err(HtpStatus::DATA) | Err(HtpStatus::DATA_BUFFER) => {
+                    // Ignore result.
+                    let _ = self.response_receiver_send_data(&mut chunk);
+                    self.response_status = HtpStreamState::DATA;
+                    return HtpStreamState::DATA;
+                }
+                // Check for stop
+                Err(HtpStatus::STOP) => {
+                    self.response_status = HtpStreamState::STOP;
+                    return HtpStreamState::STOP;
+                }
+                // Check for suspended parsing
+                Err(HtpStatus::DATA_OTHER) => {
+                    // We might have actually consumed the entire data chunk?
+                    if chunk.is_empty() {
+                        self.response_status = HtpStreamState::DATA;
+                        // Do not send STREAM_DATE_DATA_OTHER if we've
+                        // consumed the entire chunk
+                        return HtpStreamState::DATA;
+                    } else {
+                        self.response_status = HtpStreamState::DATA_OTHER;
+                        // Partial chunk consumption
+                        return HtpStreamState::DATA_OTHER;
+                    }
+                }
+                // Permanent stream error.
+                Err(_) => {
+                    self.response_status = HtpStreamState::ERROR;
+                    return HtpStreamState::ERROR;
+                }
+            }
+        }
+    }
+
+    /// Advance out buffer cursor and buffer data.
+    fn handle_response_absent_lf(&mut self, data: &ParserData) -> Result<()> {
+        self.check_response_buffer_limit(data.len())?;
+        self.response_buf.add(data.as_slice());
+        self.response_data_consume(data, data.len());
+        Err(HtpStatus::DATA_BUFFER)
+    }
+}
diff --git a/rust/htp/src/response_generic.rs b/rust/htp/src/response_generic.rs
new file mode 100644 (file)
index 0000000..45fa0e9
--- /dev/null
@@ -0,0 +1,220 @@
+use crate::{
+    bstr::Bstr,
+    connection_parser::ConnectionParser,
+    error::Result,
+    headers::Flags as HeaderFlags,
+    parsers::{parse_content_length, parse_protocol, parse_status},
+    transaction::{Header, HtpProtocol, HtpResponseNumber},
+    util::{
+        take_ascii_whitespace, take_is_space, take_is_space_or_null, take_not_is_space,
+        FlagOperations, HtpFlags,
+    },
+    HtpStatus,
+};
+use nom::{error::ErrorKind, sequence::tuple};
+use std::cmp::Ordering;
+
+impl ConnectionParser {
+    /// Generic response line parser.
+    pub(crate) fn parse_response_line_generic(&mut self, response_line: &[u8]) -> Result<()> {
+        let response_tx = self.response_mut();
+        response_tx.response_protocol_number = HtpProtocol::Invalid;
+        response_tx.response_status = None;
+        response_tx.response_status_number = HtpResponseNumber::Invalid;
+        response_tx.response_message = None;
+
+        let response_line_parser = tuple::<_, _, (_, ErrorKind), _>((
+            take_is_space_or_null,
+            take_not_is_space,
+            take_is_space,
+            take_not_is_space,
+            take_ascii_whitespace(),
+        ));
+
+        if let Ok((message, (_ls, response_protocol, ws1, status_code, ws2))) =
+            response_line_parser(response_line)
+        {
+            if response_protocol.is_empty() {
+                return Ok(());
+            }
+
+            response_tx.response_protocol = Some(Bstr::from(response_protocol));
+            self.response_mut().response_protocol_number =
+                parse_protocol(response_protocol, &mut self.logger);
+
+            if ws1.is_empty() || status_code.is_empty() {
+                return Ok(());
+            }
+
+            let response_tx = self.response_mut();
+            response_tx.response_status = Some(Bstr::from(status_code));
+            response_tx.response_status_number = parse_status(status_code);
+
+            if ws2.is_empty() {
+                return Ok(());
+            }
+
+            response_tx.response_message = Some(Bstr::from(message));
+        } else {
+            return Err(HtpStatus::ERROR);
+        }
+        Ok(())
+    }
+
+    /// Generic response header parser.
+    ///
+    ///Returns a tuple of the unparsed data and a boolean indicating if the EOH was seen.
+    pub(crate) fn process_response_headers_generic<'a>(
+        &mut self,
+        data: &'a [u8],
+    ) -> Result<(&'a [u8], bool)> {
+        let rc = self.response_mut().response_header_parser.headers()(data);
+        if let Ok((remaining, (headers, eoh))) = rc {
+            for h in headers {
+                let mut flags = 0;
+                let name_flags = &h.name.flags;
+                let value_flags = &h.value.flags;
+                if value_flags.is_set(HeaderFlags::DEFORMED_EOL)
+                    || name_flags.is_set(HeaderFlags::DEFORMED_EOL)
+                {
+                    htp_warn!(
+                        self.logger,
+                        HtpLogCode::DEFORMED_EOL,
+                        "Weird response end of lines mix"
+                    );
+                }
+                // Ignore LWS after field-name.
+                if name_flags.is_set(HeaderFlags::NAME_TRAILING_WHITESPACE) {
+                    htp_warn_once!(
+                        self.logger,
+                        HtpLogCode::RESPONSE_INVALID_LWS_AFTER_NAME,
+                        "Request field invalid: LWS after name",
+                        self.response_mut().flags,
+                        flags,
+                        HtpFlags::FIELD_INVALID
+                    );
+                }
+                //If there was leading whitespace, probably was invalid folding.
+                if name_flags.is_set(HeaderFlags::NAME_LEADING_WHITESPACE) {
+                    htp_warn_once!(
+                        self.logger,
+                        HtpLogCode::Invalid_RESPONSE_FIELD_FOLDING,
+                        "Invalid response field folding",
+                        self.response_mut().flags,
+                        flags,
+                        HtpFlags::Invalid_FOLDING
+                    );
+                    flags.set(HtpFlags::FIELD_INVALID);
+                }
+                // Check that field-name is a token
+                if name_flags.is_set(HeaderFlags::NAME_NON_TOKEN_CHARS) {
+                    // Incorrectly formed header name.
+                    htp_warn_once!(
+                        self.logger,
+                        HtpLogCode::RESPONSE_HEADER_NAME_NOT_TOKEN,
+                        "Response header name is not a token",
+                        self.response_mut().flags,
+                        flags,
+                        HtpFlags::FIELD_INVALID
+                    );
+                }
+                // No colon?
+                if name_flags.is_set(HeaderFlags::MISSING_COLON) {
+                    // We handle this case as a header with an empty name, with the value equal
+                    // to the entire input string.
+                    // TODO Apache will respond to this problem with a 400.
+                    // Now extract the name and the value
+                    htp_warn_once!(
+                        self.logger,
+                        HtpLogCode::RESPONSE_FIELD_MISSING_COLON,
+                        "Response field invalid: colon missing",
+                        self.response_mut().flags,
+                        flags,
+                        HtpFlags::FIELD_UNPARSEABLE
+                    );
+                    flags.set(HtpFlags::FIELD_INVALID);
+                } else if name_flags.is_set(HeaderFlags::NAME_EMPTY) {
+                    // Empty header name.
+                    htp_warn_once!(
+                        self.logger,
+                        HtpLogCode::RESPONSE_INVALID_EMPTY_NAME,
+                        "Response field invalid: empty name",
+                        self.response_mut().flags,
+                        flags,
+                        HtpFlags::FIELD_INVALID
+                    );
+                }
+                self.process_response_header_generic(Header::new_with_flags(
+                    h.name.name.into(),
+                    h.value.value.into(),
+                    flags,
+                ))?;
+            }
+            Ok((remaining, eoh))
+        } else {
+            Ok((data, false))
+        }
+    }
+
+    /// Generic response header line(s) processor, which assembles folded lines
+    /// into a single buffer before invoking the parsing function.
+    fn process_response_header_generic(&mut self, header: Header) -> Result<()> {
+        let mut repeated = false;
+        let reps = self.response().response_header_repetitions;
+        let mut update_reps = false;
+        // Do we already have a header with the same name?
+        if let Some((_, h_existing)) = self
+            .response_mut()
+            .response_headers
+            .get_nocase_mut(header.name.as_slice())
+        {
+            // Keep track of repeated same-name headers.
+            if !h_existing.flags.is_set(HtpFlags::FIELD_REPEATED) {
+                // This is the second occurence for this header.
+                repeated = true;
+            } else if reps < 64 {
+                update_reps = true;
+            } else {
+                return Ok(());
+            }
+            h_existing.flags.set(HtpFlags::FIELD_REPEATED);
+            // For simplicity reasons, we count the repetitions of all headers
+            // Having multiple C-L headers is against the RFC but many
+            // browsers ignore the subsequent headers if the values are the same.
+            if header.name.cmp_nocase("Content-Length") == Ordering::Equal {
+                // Don't use string comparison here because we want to
+                // ignore small formatting differences.
+                let existing_cl = parse_content_length(&h_existing.value, None);
+                let new_cl = parse_content_length(&(header.value), None);
+                if existing_cl.is_none() || new_cl.is_none() || existing_cl != new_cl {
+                    // Ambiguous response C-L value.
+                    htp_warn!(
+                        self.logger,
+                        HtpLogCode::DUPLICATE_CONTENT_LENGTH_FIELD_IN_RESPONSE,
+                        "Ambiguous response C-L value"
+                    );
+                }
+            } else {
+                // Add to the existing header.
+                h_existing.value.extend_from_slice(b", ");
+                h_existing.value.extend_from_slice(header.value.as_slice());
+            }
+        } else {
+            self.response_mut()
+                .response_headers
+                .add(header.name.clone(), header);
+        }
+        if update_reps {
+            self.response_mut().response_header_repetitions =
+                self.response().response_header_repetitions.wrapping_add(1)
+        }
+        if repeated {
+            htp_warn!(
+                self.logger,
+                HtpLogCode::RESPONSE_HEADER_REPETITION,
+                "Repetition for header"
+            );
+        }
+        Ok(())
+    }
+}
diff --git a/rust/htp/src/table.rs b/rust/htp/src/table.rs
new file mode 100644 (file)
index 0000000..6e2d571
--- /dev/null
@@ -0,0 +1,192 @@
+use crate::bstr::Bstr;
+use std::ops::Index;
+#[cfg(test)]
+use std::{cmp::Ordering, iter::Iterator, slice::SliceIndex};
+
+/// The table structure for key value pairs.
+#[derive(Clone, Debug)]
+pub(crate) struct Table<T> {
+    /// Entries in the table.
+    elements: Vec<(Bstr, T)>,
+}
+
+impl<T> Index<usize> for Table<T> {
+    type Output = (Bstr, T);
+    fn index(&self, idx: usize) -> &(Bstr, T) {
+        &self.elements[idx]
+    }
+}
+
+impl<'a, T> IntoIterator for &'a Table<T> {
+    type Item = &'a (Bstr, T);
+    type IntoIter = std::slice::Iter<'a, (Bstr, T)>;
+
+    fn into_iter(self) -> std::slice::Iter<'a, (Bstr, T)> {
+        self.elements.iter()
+    }
+}
+
+impl<'a, T> IntoIterator for &'a mut Table<T> {
+    type Item = &'a mut (Bstr, T);
+    type IntoIter = std::slice::IterMut<'a, (Bstr, T)>;
+
+    fn into_iter(self) -> std::slice::IterMut<'a, (Bstr, T)> {
+        self.elements.iter_mut()
+    }
+}
+
+impl<T> IntoIterator for Table<T> {
+    type Item = (Bstr, T);
+    type IntoIter = std::vec::IntoIter<(Bstr, T)>;
+
+    fn into_iter(self) -> std::vec::IntoIter<(Bstr, T)> {
+        self.elements.into_iter()
+    }
+}
+
+impl<T> Table<T> {
+    #[cfg(test)]
+    /// Make a new owned Table with given capacity
+    pub(crate) fn with_capacity(size: usize) -> Self {
+        Self {
+            elements: Vec::with_capacity(size),
+        }
+    }
+
+    #[cfg(test)]
+    /// Add a new tuple (key, item) to the table
+    pub(crate) fn add(&mut self, key: Bstr, item: T) {
+        self.elements.push((key, item));
+    }
+
+    /// Retrieve an element from a specific index.
+    #[cfg(test)]
+    pub(crate) fn get<I>(&self, index: I) -> Option<&I::Output>
+    where
+        I: SliceIndex<[(Bstr, T)]>,
+    {
+        self.elements.get(index)
+    }
+
+    /// Retrieve a mutable reference to an element from a specific index.
+    #[cfg(test)]
+    pub(crate) fn get_mut<I>(&mut self, index: I) -> Option<&mut I::Output>
+    where
+        I: SliceIndex<[(Bstr, T)]>,
+    {
+        self.elements.get_mut(index)
+    }
+
+    /// Search the table for the first tuple with a key matching the given slice, ingnoring ascii case in self
+    ///
+    /// Returns None if no match is found.
+    #[cfg(test)]
+    pub(crate) fn get_nocase<K: AsRef<[u8]>>(&self, key: K) -> Option<&(Bstr, T)> {
+        self.elements.iter().find(|x| x.0.cmp_nocase(key.as_ref()))
+    }
+
+    #[cfg(test)]
+    /// Returns the number of elements in the table
+    pub(crate) fn size(&self) -> usize {
+        self.elements.len()
+    }
+}
+
+// Tests
+
+#[test]
+fn Add() {
+    let mut t = Table::with_capacity(1);
+    let mut k = Bstr::from("Key");
+    assert_eq!(0, t.size());
+    t.add(k, "Value1");
+    assert_eq!(1, t.size());
+    k = Bstr::from("AnotherKey");
+    t.add(k, "Value2");
+    assert_eq!(2, t.size());
+}
+
+#[test]
+fn GetNoCase() {
+    let mut t = Table::with_capacity(2);
+    let mut k = Bstr::from("Key1");
+    t.add(k, "Value1");
+    k = Bstr::from("KeY2");
+    t.add(k, "Value2");
+
+    let mut result = t.get_nocase("KEY1");
+    let mut res = result.unwrap();
+    assert_eq!(Ordering::Equal, res.0.cmp_slice("Key1"));
+    assert_eq!("Value1", res.1);
+
+    result = t.get_nocase("keY1");
+    res = result.unwrap();
+    assert_eq!(Ordering::Equal, res.0.cmp_slice("Key1"));
+    assert_eq!("Value1", res.1);
+
+    result = t.get_nocase("key2");
+    res = result.unwrap();
+    assert_eq!(Ordering::Equal, res.0.cmp_slice("KeY2"));
+    assert_eq!("Value2", res.1);
+
+    result = t.get_nocase("NotAKey");
+    assert!(result.is_none());
+}
+
+#[test]
+fn IndexAccess() {
+    let mut t = Table::with_capacity(2);
+    let mut k = Bstr::from("Key1");
+    t.add(k, "Value1");
+    k = Bstr::from("KeY2");
+    t.add(k, "Value2");
+
+    let res = &t[1];
+    assert_eq!(Ordering::Equal, res.0.cmp_slice("KeY2"));
+    assert_eq!("Value2", res.1);
+    assert_eq!("Value2", t.get(1).unwrap().1);
+
+    let res_mut = t.get_mut(1).unwrap();
+    res_mut.1 = "Value3";
+    assert_eq!("Value3", t.get(1).unwrap().1);
+}
+
+#[test]
+fn Iterators() {
+    let mut table = Table::with_capacity(2);
+    table.add("1".into(), "abc".to_string());
+    table.add("2".into(), "def".to_string());
+
+    let mut iter_ref: std::slice::Iter<(Bstr, String)> = (&table).into_iter();
+    let (key1, _): &(Bstr, String) = iter_ref.next().unwrap();
+    assert_eq!(key1, &"1");
+    assert_eq!(table.get_nocase("1").unwrap().1, "abc");
+
+    let mut iter_mut_ref: std::slice::IterMut<(Bstr, String)> = (&mut table).into_iter();
+    let (key1, ref mut val1): &mut (Bstr, String) = iter_mut_ref.next().unwrap();
+    *val1 = "xyz".to_string();
+    assert_eq!(key1, &"1");
+    assert_eq!(table.get_nocase("1").unwrap().1, "xyz");
+
+    let mut iter_owned: std::vec::IntoIter<(Bstr, String)> = table.into_iter();
+    let (key1, val1) = iter_owned.next().unwrap();
+    assert_eq!(key1, "1");
+    assert_eq!(val1, "xyz");
+}
+
+#[test]
+fn Table_Misc() {
+    let mut t: Table<&str> = Table::with_capacity(2);
+
+    let mut pkey = Bstr::with_capacity(1);
+    pkey.add("p");
+
+    let mut qkey = Bstr::with_capacity(1);
+    qkey.add("q");
+
+    t.add(pkey, "1");
+    t.add(qkey, "2");
+
+    assert!(t.get_nocase("z").is_none());
+    assert_eq!("1", t.get_nocase("p").unwrap().1);
+}
diff --git a/rust/htp/src/test/common.rs b/rust/htp/src/test/common.rs
new file mode 100644 (file)
index 0000000..1a2335d
--- /dev/null
@@ -0,0 +1,598 @@
+#![allow(non_snake_case)]
+use crate::{
+    bstr::Bstr,
+    config::{Config, HtpServerPersonality},
+    connection_parser::{ConnectionParser, HtpStreamState, ParserData},
+    error::Result,
+    transaction::Transaction,
+};
+use std::{
+    env,
+    iter::IntoIterator,
+    net::{IpAddr, Ipv4Addr},
+    path::PathBuf,
+    time::SystemTime,
+};
+use time::OffsetDateTime;
+
+#[derive(Debug)]
+enum Chunk {
+    Client(ParserData<'static>),
+    Server(ParserData<'static>),
+}
+
+/// A structure to hold callback data
+pub(super) struct MainUserData {
+    /// Call order of callbacks
+    pub order: Vec<String>,
+    /// Request data from callbacks
+    pub request_data: Vec<Bstr>,
+    /// Response data from callbacks
+    pub response_data: Vec<Bstr>,
+}
+
+impl Default for MainUserData {
+    /// Make a new user data struct
+    fn default() -> Self {
+        Self {
+            order: Vec::new(),
+            request_data: Vec::with_capacity(5),
+            response_data: Vec::with_capacity(5),
+        }
+    }
+}
+
+#[derive(Debug)]
+struct TestInput {
+    chunks: Vec<Chunk>,
+}
+
+impl IntoIterator for TestInput {
+    type Item = Chunk;
+    type IntoIter = std::vec::IntoIter<Self::Item>;
+    fn into_iter(self) -> Self::IntoIter {
+        self.chunks.into_iter()
+    }
+}
+
+impl From<PathBuf> for TestInput {
+    fn from(file: PathBuf) -> Self {
+        let input = std::fs::read(file)
+            .expect("Could not read file {:?}. Do you need to set a base dir in env('srcdir')?");
+        TestInput::from(input.as_slice())
+    }
+}
+
+impl From<&[u8]> for TestInput {
+    fn from(input: &[u8]) -> Self {
+        let mut test_input = TestInput { chunks: Vec::new() };
+        let mut current = Vec::<u8>::new();
+        let mut client = true;
+        let mut is_gap = false;
+        let mut start = true;
+        for line in input.split_inclusive(|c| *c == b'\n') {
+            if line.len() >= 4
+                && line.len() <= 5
+                && (&line[0..3] == b"<<<"
+                    || &line[0..3] == b"<><"
+                    || &line[0..3] == b">>>"
+                    || &line[0..3] == b"><>")
+                && (line.len() == 4 || line[3] == b'\r')
+                && line[line.len() - 1] == b'\n'
+            {
+                if !current.is_empty() {
+                    // Pop off the CRLF from the last line, which
+                    // just separates the previous data from the
+                    // boundary <<< >>> chars and isn't actual data
+                    if let Some(b'\n') = current.last() {
+                        current.pop();
+                    }
+                    if let Some(b'\r') = current.last() {
+                        current.pop();
+                    }
+                    test_input.append(client, current, is_gap);
+                    current = Vec::<u8>::new();
+                }
+                // Client represented by first char is >
+                client = line[0] == b'>';
+                // Gaps represented by <>< or ><>
+                is_gap = line[0] != line[1];
+                start = false;
+            } else {
+                if start {
+                    // we need to start with an indicated direction
+                    return test_input;
+                }
+                current.append(&mut line.to_vec());
+            }
+        }
+        test_input.append(client, current, is_gap);
+        test_input
+    }
+}
+
+impl TestInput {
+    fn append(&mut self, client: bool, data: Vec<u8>, is_gap: bool) {
+        let chunk = match (client, is_gap) {
+            // client gap
+            (true, true) => Chunk::Client(data.len().into()),
+            // client data
+            (true, false) => Chunk::Client(data.into()),
+            // server gap
+            (false, true) => Chunk::Server(data.len().into()),
+            // server data
+            (false, false) => Chunk::Server(data.into()),
+        };
+        self.chunks.push(chunk);
+    }
+}
+
+/// Error types
+#[derive(Debug)]
+pub(super) enum TestError {
+    /// The parser entered the Error state
+    StreamError,
+}
+
+/// Test harness
+#[derive(Debug)]
+pub(super) struct Test {
+    /// The connection parse
+    pub connp: ConnectionParser,
+    /// The base directory for the crate - used to find files.
+    pub basedir: Option<PathBuf>,
+}
+
+/// Return a default Config to use with tests
+pub(super) fn TestConfig() -> Config {
+    let mut cfg = Config::default();
+    cfg.set_server_personality(HtpServerPersonality::APACHE_2)
+        .unwrap();
+    // The default bomb limit may be slow in some development environments causing tests to fail.
+    cfg.compression_options
+        .set_time_limit(10 * cfg.compression_options.get_time_limit());
+
+    cfg
+}
+
+impl Test {
+    /// Make a new test with the given config
+    pub(super) fn new(cfg: Config) -> Self {
+        let basedir = if let Ok(dir) = std::env::var("srcdir") {
+            Some(PathBuf::from(dir))
+        } else if let Ok(dir) = env::var("CARGO_MANIFEST_DIR") {
+            let mut base = PathBuf::from(dir);
+            base.push("src");
+            base.push("test");
+            base.push("files");
+            Some(base)
+        } else {
+            None
+        };
+
+        let cfg = Box::leak(Box::new(cfg));
+        let connp = ConnectionParser::new(cfg);
+        Test { connp, basedir }
+    }
+
+    /// Make a new test with the default TestConfig and register body callbacks.
+    pub(super) fn new_with_callbacks() -> Self {
+        let mut cfg = TestConfig();
+        cfg.register_request_start(request_start);
+        cfg.register_request_complete(request_complete);
+        cfg.register_response_start(response_start);
+        cfg.register_response_complete(response_complete);
+        cfg.register_response_body_data(response_body_data);
+        cfg.register_request_body_data(request_body_data);
+        cfg.register_transaction_complete(transaction_complete);
+        let mut t = Test::new(cfg);
+        // Configure user data and callbacks
+        t.connp
+            .response_mut()
+            .unwrap()
+            .set_user_data(Box::<MainUserData>::default());
+        t
+    }
+
+    /// Open a connection on the underlying ConnectionParser. Useful if you
+    /// want to send data directly to the ConnectionParser after.
+    pub(super) fn open_connection(&mut self, tv_start: Option<OffsetDateTime>) {
+        self.connp.open(
+            Some(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1))),
+            Some(10000),
+            Some(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1))),
+            Some(80),
+            tv_start,
+        );
+    }
+
+    fn run(&mut self, test: TestInput) -> std::result::Result<(), TestError> {
+        let tv_start = Some(OffsetDateTime::from(SystemTime::now()));
+        self.open_connection(tv_start);
+
+        let mut request_buf: Option<ParserData> = None;
+        let mut response_buf: Option<ParserData> = None;
+        for chunk in test {
+            match chunk {
+                Chunk::Client(data) => {
+                    let rc = self.connp.request_data(data.clone(), tv_start);
+
+                    if rc == HtpStreamState::ERROR {
+                        return Err(TestError::StreamError);
+                    }
+
+                    if rc == HtpStreamState::DATA_OTHER {
+                        let consumed = self.connp.request_data_consumed();
+                        let remaining = data.clone().into_owned();
+                        remaining.consume(consumed);
+                        request_buf = Some(remaining);
+                    }
+                }
+                Chunk::Server(data) => {
+                    // If we have leftover data from before then use it first
+                    if let Some(response_remaining) = response_buf {
+                        let rc = self
+                            .connp
+                            .response_data(response_remaining.as_slice().into(), tv_start);
+                        response_buf = None;
+                        if rc == HtpStreamState::ERROR {
+                            return Err(TestError::StreamError);
+                        }
+                    }
+
+                    // Now use up this data chunk
+                    let rc = self.connp.response_data(data.clone(), tv_start);
+                    if rc == HtpStreamState::ERROR {
+                        return Err(TestError::StreamError);
+                    }
+
+                    if rc == HtpStreamState::DATA_OTHER {
+                        let consumed = self.connp.response_data_consumed();
+                        let remaining = data.clone().into_owned();
+                        remaining.consume(consumed);
+                        response_buf = Some(remaining);
+                    }
+
+                    // And check if we also had some input data buffered
+                    if let Some(request_remaining) = request_buf {
+                        let rc = self
+                            .connp
+                            .request_data(request_remaining.as_slice().into(), tv_start);
+                        request_buf = None;
+                        if rc == HtpStreamState::ERROR {
+                            return Err(TestError::StreamError);
+                        }
+                    }
+                }
+            }
+        }
+
+        // Clean up any remaining server data
+        if let Some(response_remaining) = response_buf {
+            let rc = self
+                .connp
+                .response_data(response_remaining.as_slice().into(), tv_start);
+            if rc == HtpStreamState::ERROR {
+                return Err(TestError::StreamError);
+            }
+        }
+        self.connp
+            .close(Some(OffsetDateTime::from(SystemTime::now())));
+        Ok(())
+    }
+
+    /// Run on a slice of input data. Used with fuzzing.
+    pub(super) fn run_slice(&mut self, slice: &[u8]) -> std::result::Result<(), TestError> {
+        self.run(TestInput::from(slice))
+    }
+
+    /// Run on a file path. Used in integration tests.
+    pub(super) fn run_file(&mut self, file: &str) -> std::result::Result<(), TestError> {
+        let testfile = if let Some(base) = &self.basedir {
+            let mut path = base.clone();
+            path.push(file);
+            path
+        } else {
+            PathBuf::from(file)
+        };
+
+        self.run(TestInput::from(testfile))
+    }
+}
+
+fn request_start(tx: &mut Transaction) -> Result<()> {
+    let id = tx.index;
+    let user_data = tx.user_data_mut::<MainUserData>().unwrap();
+    user_data.order.push(format!("request_start {}", id));
+    Ok(())
+}
+
+fn request_complete(tx: &mut Transaction) -> Result<()> {
+    let id = tx.index;
+    let user_data = &mut tx.user_data_mut::<MainUserData>().unwrap();
+    user_data.order.push(format!("request_complete {}", id));
+    Ok(())
+}
+
+fn response_start(tx: &mut Transaction) -> Result<()> {
+    let id = tx.index;
+    let user_data = tx.user_data_mut::<MainUserData>().unwrap();
+    user_data.order.push(format!("response_start {}", id));
+    Ok(())
+}
+
+fn response_complete(tx: &mut Transaction) -> Result<()> {
+    let id = tx.index;
+    let user_data = tx.user_data_mut::<MainUserData>().unwrap();
+    user_data.order.push(format!("response_complete {}", id));
+    Ok(())
+}
+
+fn transaction_complete(tx: &mut Transaction) -> Result<()> {
+    let id = tx.index;
+    let user_data = tx.user_data_mut::<MainUserData>().unwrap();
+    user_data.order.push(format!("transaction_complete {}", id));
+    Ok(())
+}
+
+fn response_body_data(tx: &mut Transaction, d: &ParserData) -> Result<()> {
+    let user_data = tx.user_data_mut::<MainUserData>().unwrap();
+    let bstr = if d.is_gap() {
+        Bstr::with_capacity(d.len())
+    } else {
+        Bstr::from(d.as_slice())
+    };
+    user_data.response_data.push(bstr);
+    Ok(())
+}
+
+fn request_body_data(tx: &mut Transaction, d: &ParserData) -> Result<()> {
+    let user_data = tx.user_data_mut::<MainUserData>().unwrap();
+    let bstr = if d.is_gap() {
+        Bstr::with_capacity(d.len())
+    } else {
+        Bstr::from(d.as_slice())
+    };
+    user_data.request_data.push(bstr);
+    Ok(())
+}
+
+#[no_mangle]
+/// Creates a Fuzz test runner, and runs a byte slice on it
+/// # Safety
+/// Input pointer must be non-null.
+pub unsafe extern "C" fn libhtprsFuzzRun(
+    input: *const u8, input_len: u32,
+) -> *mut std::os::raw::c_void {
+    let mut cfg = TestConfig();
+    cfg.set_server_personality(HtpServerPersonality::IDS)
+        .unwrap();
+    let mut t = Test::new(cfg);
+    let data = std::slice::from_raw_parts(input, input_len as usize);
+    t.run_slice(data).ok();
+    let boxed = Box::new(t);
+    Box::into_raw(boxed) as *mut _
+}
+
+#[no_mangle]
+/// Frees a Fuzz test runner
+/// # Safety
+/// Input pointer must be non-null.
+pub unsafe extern "C" fn libhtprsFreeFuzzRun(state: *mut std::os::raw::c_void) {
+    //just unbox
+    std::mem::drop(Box::from_raw(state as *mut Test));
+}
+
+#[no_mangle]
+/// Gets connection parser out of a test runner
+/// # Safety
+/// Input pointer must be non-null.
+pub unsafe extern "C" fn libhtprsFuzzConnp(t: *mut std::os::raw::c_void) -> *mut ConnectionParser {
+    let state = t as *mut Test;
+    &mut (*state).connp
+}
+
+#[macro_export]
+/// Cstring converter
+macro_rules! cstr {
+    ( $x:expr ) => {{
+        CString::new($x).unwrap().as_ptr()
+    }};
+}
+
+/// Compares a transaction's header value to an expected value.
+///
+/// The `attr` argument is meant to be either `request_headers` or `response_headers`.
+///
+/// Example usage:
+/// assert_header_eq!(tx, request_headers, "host", ""www.example.com");
+macro_rules! assert_header_eq {
+    ($tx:expr, $attr:ident, $key:expr, $val:expr) => {{
+        let header = &(*$tx).$attr
+            .get_nocase_nozero($key)
+            .expect(format!(
+                "expected header '{}' to exist at {}:{}:{}",
+                $key,
+                file!(),
+                line!(),
+                column!()
+            ).as_ref());
+        assert_eq!(*header.value, $val);
+    }};
+    ($tx:expr, $attr:ident, $key:expr, $val:expr,) => {{
+        assert_header_eq!($tx, $attr, $key, $val);
+    }};
+    ($tx:expr, $attr:ident, $key:expr, $val:expr, $($arg:tt)+) => {{
+        let header = (*(*$tx).$attr)
+            .get_nocase_nozero($key)
+            .expect(format!(
+                "expected header '{}' to exist at {}:{}:{}",
+                $key,
+                file!(),
+                line!(),
+                column!()
+            ).as_ref())
+            .1
+            .as_ref()
+            .expect(format!(
+                "expected header '{}' to exist at {}:{}:{}",
+                $key,
+                file!(),
+                line!(),
+                column!()
+            ).as_ref());
+        assert_eq!(*header.value, $val, $($arg)*);
+    }};
+}
+pub(crate) use assert_header_eq;
+
+/// Compares a transaction's request header value to an expected value.
+///
+/// Example usage:
+/// assert_request_header_eq!(tx, "host", ""www.example.com");
+macro_rules! assert_request_header_eq {
+    ($tx:expr, $key:expr, $val:expr) => {{
+        assert_header_eq!($tx, request_headers, $key, $val);
+    }};
+    ($tx:expr, $key:expr, $val:expr,) => {{
+        assert_header_eq!($tx, request_headers, $key, $val);
+    }};
+    ($tx:expr, $key:expr, $val:expr, $($arg:tt)+) => {{
+        assert_header_eq!($tx, request_headers, $val, $($arg)*);
+    }};
+}
+pub(crate) use assert_request_header_eq;
+
+/// Compares a transaction's response header value to an expected value.
+///
+/// Example usage:
+/// assert_response_header_eq!(tx, "content-encoding", ""gzip");
+macro_rules! assert_response_header_eq {
+    ($tx:expr, $key:expr, $val:expr) => {{
+        assert_header_eq!($tx, response_headers, $key, $val);
+    }};
+    ($tx:expr, $key:expr, $val:expr,) => {{
+        assert_header_eq!($tx, response_headers, $key, $val);
+    }};
+    ($tx:expr, $key:expr, $val:expr, $($arg:tt)+) => {{
+        assert_header_eq!($tx, response_headers, $val, $($arg)*);
+    }};
+}
+pub(crate) use assert_response_header_eq;
+
+/// Asserts that a transaction's response contains a flag.
+///
+/// Example usage:
+/// assert_response_header_flag_contains!(tx, "Content-Length", Flags::FIELD_REPEATED);
+macro_rules! assert_response_header_flag_contains {
+    ($tx:expr, $key:expr, $val:expr) => {{
+        let header = &(*$tx).response_headers
+            .get_nocase_nozero($key)
+            .expect(format!(
+                "expected header '{}' to exist at {}:{}:{}",
+                $key,
+                file!(),
+                line!(),
+                column!()
+            ).as_ref());
+        assert!(header.flags.is_set($val));
+        }};
+    ($tx:expr, $key:expr, $val:expr,) => {{
+        assert_response_header_flag_contains!($tx, response_headers, $key, $val);
+    }};
+    ($tx:expr, $key:expr, $val:expr, $($arg:tt)+) => {{
+        let header = (*(*$tx).response_headers)
+            .get_nocase_nozero($key)
+            .expect(format!(
+                "expected header '{}' to exist at {}:{}:{}",
+                $key,
+                file!(),
+                line!(),
+                column!()
+            ).as_ref())
+            .1
+            .as_ref()
+            .expect(format!(
+                "expected header '{}' to exist at {}:{}:{}",
+                $key,
+                file!(),
+                line!(),
+                column!()
+            ).as_ref());
+        assert_eq!(*header.value, $val, $($arg)*);
+        assert!((*header).flags.is_set($val), $($arg)*);
+    }};
+}
+pub(crate) use assert_response_header_flag_contains;
+
+/// Assert the common evader request values are as expected
+///
+/// Example usage:
+/// assert_evader_request!(tx, "url");
+macro_rules! assert_evader_request {
+    ($tx:expr, $url:expr) => {{
+        assert!(($tx).request_method.as_ref().unwrap().eq_slice("GET"));
+        assert!(($tx).request_uri.as_ref().unwrap().eq_slice($url));
+        assert_eq!(HtpProtocol::V1_1, ($tx).request_protocol_number);
+        assert_header_eq!($tx, request_headers, "host", "evader.example.com");
+    }};
+}
+pub(crate) use assert_evader_request;
+
+/// Assert the common evader response values are as expected
+///
+/// Example usage:
+/// assert_evader_response!(tx);
+macro_rules! assert_evader_response {
+    ($tx:expr) => {{
+        assert_eq!(HtpProtocol::V1_1, ($tx).response_protocol_number);
+        assert!(($tx).response_status_number.eq_num(200));
+        assert_response_header_eq!($tx, "Content-type", "application/octet-stream");
+        assert_response_header_eq!(
+            $tx,
+            "Content-disposition",
+            "attachment; filename=\"eicar.txt\""
+        );
+        assert!(($tx)
+            .response_headers
+            .get_nocase_nozero("Connection")
+            .is_some());
+    }};
+}
+pub(crate) use assert_evader_response;
+
+/// Assert the response transfer encoding is detected as chunked
+///
+/// Example usage:
+/// assert_evader_chunked_response!(tx);
+macro_rules! assert_evader_chunked {
+    ($tx:expr) => {{
+        assert_eq!($tx.response_transfer_coding, HtpTransferCoding::Chunked);
+        assert_response_header_eq!($tx, "Yet-Another-Header", "foo");
+        assert_eq!(68, ($tx).response_entity_len);
+        assert_eq!(156, ($tx).response_message_len);
+        let user_data = ($tx).user_data::<MainUserData>().unwrap();
+        assert!(user_data.request_data.is_empty());
+        assert_eq!(17, user_data.response_data.len());
+        assert_eq!(b"X5O!".as_ref(), (&user_data.response_data[0]).as_slice());
+        assert_eq!(b"P%@A".as_ref(), (&user_data.response_data[1]).as_slice());
+        assert_eq!(b"P[4\\".as_ref(), (&user_data.response_data[2]).as_slice());
+        assert_eq!(b"PZX5".as_ref(), (&user_data.response_data[3]).as_slice());
+        assert_eq!(b"4(P^".as_ref(), (&user_data.response_data[4]).as_slice());
+        assert_eq!(b")7CC".as_ref(), (&user_data.response_data[5]).as_slice());
+        assert_eq!(b")7}$".as_ref(), (&user_data.response_data[6]).as_slice());
+        assert_eq!(b"EICA".as_ref(), (&user_data.response_data[7]).as_slice());
+        assert_eq!(b"R-ST".as_ref(), (&user_data.response_data[8]).as_slice());
+        assert_eq!(b"ANDA".as_ref(), (&user_data.response_data[9]).as_slice());
+        assert_eq!(b"RD-A".as_ref(), (&user_data.response_data[10]).as_slice());
+        assert_eq!(b"NTIV".as_ref(), (&user_data.response_data[11]).as_slice());
+        assert_eq!(b"IRUS".as_ref(), (&user_data.response_data[12]).as_slice());
+        assert_eq!(b"-TES".as_ref(), (&user_data.response_data[13]).as_slice());
+        assert_eq!(b"T-FI".as_ref(), (&user_data.response_data[14]).as_slice());
+        assert_eq!(b"LE!$".as_ref(), (&user_data.response_data[15]).as_slice());
+        assert_eq!(b"H+H*".as_ref(), (&user_data.response_data[16]).as_slice());
+        assert_eq!(HtpRequestProgress::COMPLETE, ($tx).request_progress);
+        assert_eq!(HtpResponseProgress::COMPLETE, ($tx).response_progress);
+    }};
+}
+pub(crate) use assert_evader_chunked;
diff --git a/rust/htp/src/test/files/00-adhoc.t b/rust/htp/src/test/files/00-adhoc.t
new file mode 100644 (file)
index 0000000..36805de
--- /dev/null
@@ -0,0 +1,14 @@
+>>>
+GET /?p=%20 HTTP/1.0
+User-Agent: Mozilla
+
+
+<<<
+HTTP/1.0 200 OK
+Date: Mon, 31 Aug 2009 20:25:50 GMT
+Server: Apache
+Connection: close
+Content-Type: text/html
+Content-Length: 12
+
+Hello World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/01-get.t b/rust/htp/src/test/files/01-get.t
new file mode 100644 (file)
index 0000000..e9edceb
--- /dev/null
@@ -0,0 +1,14 @@
+>>>\r
+GET /?p=%20 HTTP/1.0\r
+User-Agent: Mozilla\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Content-Length: 12\r
+\r
+Hello World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/02-header-test-apache2.t b/rust/htp/src/test/files/02-header-test-apache2.t
new file mode 100644 (file)
index 0000000..a7cb787
Binary files /dev/null and b/rust/htp/src/test/files/02-header-test-apache2.t differ
diff --git a/rust/htp/src/test/files/03-post-urlencoded.t b/rust/htp/src/test/files/03-post-urlencoded.t
new file mode 100644 (file)
index 0000000..052377e
--- /dev/null
@@ -0,0 +1,34 @@
+>>>\r
+POST /?qsp1=1&%20p%20q=2&u=Ivan+Risti%C4%87_Ivan+Risti%C4%87_Ivan+Risti%C4%87_Ivan+Risti%C4%87_Ivan+Risti%C4%87_Ivan+Risti%C4%87_ HTTP/1.0\r
+Content-Length: 12\r
+Content-Type: application/x-www-form-urlencoded\r
+User-Agent: Mozilla\r
+\r
+p=0123456789\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Content-Length: 12\r
+\r
+Hello World!\r
+>>>\r
+GET / HTTP/1.0\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Transfer-Encoding: chunked\r
+\r
+9\r
+012345678\r
+1\r
+9\r
+0\r
+\r
diff --git a/rust/htp/src/test/files/04-post-urlencoded-chunked.t b/rust/htp/src/test/files/04-post-urlencoded-chunked.t
new file mode 100644 (file)
index 0000000..1d72e71
--- /dev/null
@@ -0,0 +1,26 @@
+>>>\r
+POST / HTTP/1.1\r
+Transfer-Encoding: chunked\r
+Content-Type: application/x-www-form-urlencoded\r
+User-Agent: Mozilla\r
+Cookie: 1\r
+\r
+b\r
+p=012345678\r
+1\r
+9\r
+0\r
+Cookie:\r
+>>>\r
+ 2\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Content-Length: 12\r
+\r
+Hello World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/05-expect.t b/rust/htp/src/test/files/05-expect.t
new file mode 100644 (file)
index 0000000..0ad3090
--- /dev/null
@@ -0,0 +1,39 @@
+>>>
+POST / HTTP/1.1
+User-Agent: curl/7.18.2 (i486-pc-linux-gnu) libcurl/7.18.2 OpenSSL/0.9.8g zlib/1.2.3.3 libidn/1.8 libssh2/0.18
+Accept: */*
+Content-Length: 216
+Expect: 100-continue
+Content-Type: multipart/form-data; boundary=----------------------------07869933ca1b
+
+
+<<<
+HTTP/1.1 100 Continue
+Header1: This
+Header2: That
+
+
+>>>
+------------------------------07869933ca1b
+Content-Disposition: form-data; name="file"; filename="404.php"
+Content-Type: application/octet-stream
+
+
+>>>
+<? echo "404"; ?>
+>>>
+
+------------------------------07869933ca1b--
+
+<<<
+HTTP/1.1 200 OK
+Date: Tue, 03 Nov 2009 09:27:47 GMT
+Server: Apache
+Last-Modified: Thu, 30 Apr 2009 12:20:49 GMT
+ETag: "2dcada-2d-468c4b9ec6a40"
+Accept-Ranges: bytes
+Content-Length: 45
+Vary: Accept-Encoding
+Content-Type: text/html
+
+<html><body><h1>It works!</h1></body></html>
diff --git a/rust/htp/src/test/files/06-uri-normal.t b/rust/htp/src/test/files/06-uri-normal.t
new file mode 100644 (file)
index 0000000..78a138c
--- /dev/null
@@ -0,0 +1,9 @@
+>>>\r
+GET http://username:password@www.example.com:8080/sub/folder/file.jsp?p=q#f HTTP/1.0\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Content-Length: 12\r
+\r
+Hello World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/07-pipelined-connection.t b/rust/htp/src/test/files/07-pipelined-connection.t
new file mode 100644 (file)
index 0000000..07ef603
--- /dev/null
@@ -0,0 +1,15 @@
+>>>\r
+GET /first HTTP/1.1\r
+\r
+GET /second HTTP/1.1\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Content-Length: 12\r
+\r
+Hello World!\r
+HTTP/1.0 200 OK\r
+Content-Length: 12\r
+\r
+Hello World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/08-not-pipelined-connection.t b/rust/htp/src/test/files/08-not-pipelined-connection.t
new file mode 100644 (file)
index 0000000..2a1bac3
--- /dev/null
@@ -0,0 +1,18 @@
+>>>\r
+GET /first HTTP/1.1\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Content-Length: 12\r
+\r
+Hello World!\r
+>>>\r
+GET /second HTTP/1.1\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Content-Length: 12\r
+\r
+Hello World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/09-multi-packet-request-head.t b/rust/htp/src/test/files/09-multi-packet-request-head.t
new file mode 100644 (file)
index 0000000..08a16f2
--- /dev/null
@@ -0,0 +1,14 @@
+>>>\r
+GET / HTTP/1.0\r
+\r
+>>>\r
+Host: www.example.com\r
+\r
+>>>\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Content-Length: 12\r
+\r
+Hello World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/10-host-in-headers.t b/rust/htp/src/test/files/10-host-in-headers.t
new file mode 100644 (file)
index 0000000..b892b0a
--- /dev/null
@@ -0,0 +1,34 @@
+>>>\r
+GET / HTTP/1.1\r
+Host: www.example.com\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Content-Length: 12\r
+\r
+Hello World!\r
+>>>\r
+GET / HTTP/1.1\r
+Host: www.example.com.\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Content-Length: 12\r
+>>>\r
+GET / HTTP/1.1\r
+Host: WwW.ExamPle.cOm\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Content-Length: 12\r
+>>>\r
+GET / HTTP/1.1\r
+Host: www.example.com:80\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Content-Length: 12
\ No newline at end of file
diff --git a/rust/htp/src/test/files/100-auth-digest-escaped-quote.t b/rust/htp/src/test/files/100-auth-digest-escaped-quote.t
new file mode 100644 (file)
index 0000000..f0bed0b
--- /dev/null
@@ -0,0 +1,8 @@
+>>>
+GET / HTTP/1.1
+Host: www.example.com
+Authorization: Digest username="ivan\"r\"", realm="Book Review",
+ nonce="OgmPjb/jAwA=7c5a49c2ed9416dba1b04b5307d6d935f74a859d",
+ uri="/review/", algorithm=MD5, response="3c430d26043cc306e0282635929d57cb",
+ qop=auth, nc=00000004, cnonce="c3bcee9534c051a0"
+
diff --git a/rust/htp/src/test/files/101-request-cookies-2.t b/rust/htp/src/test/files/101-request-cookies-2.t
new file mode 100644 (file)
index 0000000..4554c6d
--- /dev/null
@@ -0,0 +1,16 @@
+>>>
+GET / HTTP/1.0
+Host: www.example.com
+User-Agent: Mozilla
+Cookie: =0; p=1; q=2; =; ; z=
+
+
+<<<
+HTTP/1.0 200 OK
+Date: Mon, 31 Aug 2009 20:25:50 GMT
+Server: Apache
+Connection: close
+Content-Type: text/html
+Content-Length: 12
+
+Hello World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/102-request-cookies-3.t b/rust/htp/src/test/files/102-request-cookies-3.t
new file mode 100644 (file)
index 0000000..cfe2d68
--- /dev/null
@@ -0,0 +1,16 @@
+>>>
+GET / HTTP/1.0
+Host: www.example.com
+User-Agent: Mozilla
+Cookie: = ; a=1; b=2  ;    =7; c=double=equal
+
+
+<<<
+HTTP/1.0 200 OK
+Date: Mon, 31 Aug 2009 20:25:50 GMT
+Server: Apache
+Connection: close
+Content-Type: text/html
+Content-Length: 12
+
+Hello World!
diff --git a/rust/htp/src/test/files/103-request-cookies-4.t b/rust/htp/src/test/files/103-request-cookies-4.t
new file mode 100644 (file)
index 0000000..78b63ec
--- /dev/null
@@ -0,0 +1,16 @@
+>>>
+GET / HTTP/1.0
+Host: www.example.com
+User-Agent: Mozilla
+Cookie: = ;;c=1; a=1  ;   b=2;
+
+
+<<<
+HTTP/1.0 200 OK
+Date: Mon, 31 Aug 2009 20:25:50 GMT
+Server: Apache
+Connection: close
+Content-Type: text/html
+Content-Length: 12
+
+Hello World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/104-request-cookies-5.t b/rust/htp/src/test/files/104-request-cookies-5.t
new file mode 100644 (file)
index 0000000..e39a3c3
--- /dev/null
@@ -0,0 +1,16 @@
+>>>
+GET / HTTP/1.0
+Host: www.example.com
+User-Agent: Mozilla
+Cookie:             
+
+
+<<<
+HTTP/1.0 200 OK
+Date: Mon, 31 Aug 2009 20:25:50 GMT
+Server: Apache
+Connection: close
+Content-Type: text/html
+Content-Length: 12
+
+Hello World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/105-expect-100.t b/rust/htp/src/test/files/105-expect-100.t
new file mode 100644 (file)
index 0000000..422f97f
--- /dev/null
@@ -0,0 +1,27 @@
+>>>
+PUT /forbidden HTTP/1.1
+Content-Length: 14
+Expect: 100-continue
+
+
+<<<
+HTTP/1.0 401 Forbidden
+Content-Length: 0
+
+
+>>>
+POST /ok HTTP/1.1
+Content-Length: 14
+Expect: 100-continue
+
+
+<<<
+HTTP/1.0 100 continue
+Content-Length: 0
+
+
+>>>
+Hello People!
+
+<<<
+HTTP/1.0 200 OK
diff --git a/rust/htp/src/test/files/106-tunnelled-1.t b/rust/htp/src/test/files/106-tunnelled-1.t
new file mode 100644 (file)
index 0000000..dc67ea9
--- /dev/null
@@ -0,0 +1,15 @@
+>>>
+CONNECT abc:443 HTTP/1.1
+User-Agent: Victor/1.0
+
+GET / HTTP/1.1
+User-Agent: Victor/1.0
+
+
+<<<
+HTTP/1.1 200 OK
+Server: VictorServer/1.0
+
+
+
+
diff --git a/rust/htp/src/test/files/107-response_unknown_status.t b/rust/htp/src/test/files/107-response_unknown_status.t
new file mode 100644 (file)
index 0000000..26ab8b1
--- /dev/null
@@ -0,0 +1,12 @@
+>>>
+GET /ld/index.php?id=412784631&cid=0064&version=4&name=try HTTP/1.1
+Accept: */*
+User-Agent:LD-agent
+Host: 209.405.196.16
+
+<<<
+NOTHTTP REALLY
+SOMEOTHERDATA
+STILLNOTHTTP
+SOMEMOREOTHERDATA
+
diff --git a/rust/htp/src/test/files/108-response-headers-cr-only.t b/rust/htp/src/test/files/108-response-headers-cr-only.t
new file mode 100644 (file)
index 0000000..bead617
--- /dev/null
@@ -0,0 +1,9 @@
+>>>\r
+GET /index.html HTTP/1.0\r
+Host: www.google.org\r
+User-Agent: Mozilla/5.0\r
+\r
+<<<\r
+HTTP/1.0 200 message\r
+Content-Type: text/html\rContent-Length: 7\r
+\r
diff --git a/rust/htp/src/test/files/109-response-headers-deformed-eol.t b/rust/htp/src/test/files/109-response-headers-deformed-eol.t
new file mode 100644 (file)
index 0000000..9650121
--- /dev/null
@@ -0,0 +1,12 @@
+>>>
+GET /index.html HTTP/1.0\r
+Host: www.google.org\r
+User-Agent: Mozilla/5.0\r
+\r
+<<<
+HTTP/1.0 200 message\r
+Content-Type: text/html\r
+Content-Length: 6
+\r\r
+\r
+abcdef
diff --git a/rust/htp/src/test/files/11-response-stream-closure.t b/rust/htp/src/test/files/11-response-stream-closure.t
new file mode 100644 (file)
index 0000000..8bc167c
--- /dev/null
@@ -0,0 +1,13 @@
+>>>\r
+GET / HTTP/1.0\r
+User-Agent: Mozilla\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+\r
+Hello World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/110-response-folded-headers-2.t b/rust/htp/src/test/files/110-response-folded-headers-2.t
new file mode 100644 (file)
index 0000000..2e74d0d
--- /dev/null
@@ -0,0 +1,14 @@
+>>>
+POST / HTTP/1.0
+Content-Length: 12
+Content-Type: application/x-www-form-urlencoded
+User-Agent: Mozilla
+
+p=0123456789
+<<<
+HTTP/1.0 200 OK
+Date: Mon, 31 Aug 2009 20:25:50 GMT
+Server: Apache
+ Server
+Connection: close
+
diff --git a/rust/htp/src/test/files/111-response-headers-chunked.t b/rust/htp/src/test/files/111-response-headers-chunked.t
new file mode 100644 (file)
index 0000000..b1ca3eb
--- /dev/null
@@ -0,0 +1,13 @@
+>>>
+GET /index.html HTTP/1.0
+Host: www.google.org
+User-Agent: Mozilla/5.0
+
+<<<
+HTTP/1.0 200 message
+Content-
+<<<
+Type: text/html
+Content-Length: 12
+
+Hello World!
diff --git a/rust/htp/src/test/files/112-response-headers-chunked-2.t b/rust/htp/src/test/files/112-response-headers-chunked-2.t
new file mode 100644 (file)
index 0000000..21f2ef4
--- /dev/null
@@ -0,0 +1,15 @@
+>>>
+GET /index.html HTTP/1.0
+Host: www.google.org
+User-Agent: Mozilla/5.0
+
+<<<
+HTTP/1.0 200 message
+Content-
+<<<
+Type: text/html
+Content-Length:
+<<<
+12
+
+Hello World!
diff --git a/rust/htp/src/test/files/113-response-multipart-byte-ranges.t b/rust/htp/src/test/files/113-response-multipart-byte-ranges.t
new file mode 100644 (file)
index 0000000..57d7a2d
--- /dev/null
@@ -0,0 +1,23 @@
+>>>
+GET / HTTP/1.1
+Host: www.example.com
+Range: bytes=0-50, 100-150
+
+
+<<<
+HTTP/1.1 206 Partial content
+Date: Wed, 15 Nov 1995 06:25:24 GMT
+Last-modified: Wed, 15 Nov 1995 04:58:08 GMT
+Content-type: multipart/byteranges; boundary=THIS_STRING_SEPARATES
+
+--THIS_STRING_SEPARATES
+Content-type: application/pdf
+Content-range: bytes 500-999/8000
+
+...the first range...
+--THIS_STRING_SEPARATES
+Content-type: application/pdf
+Content-range: bytes 7000-7999/8000
+
+...the second range
+--THIS_STRING_SEPARATES--
\ No newline at end of file
diff --git a/rust/htp/src/test/files/114-http-2-upgrade.t b/rust/htp/src/test/files/114-http-2-upgrade.t
new file mode 100644 (file)
index 0000000..f15d819
--- /dev/null
@@ -0,0 +1,33 @@
+>>>
+GET /robots.txt HTTP/1.1
+Host: nghttp2.org
+User-Agent: curl/7.61.0
+Accept: */*
+Connection: Upgrade, HTTP2-Settings
+Upgrade: h2c
+HTTP2-Settings: AAMAAABkAARAAAAAAAIAAAAA
+
+
+<<<
+HTTP/1.1 200 OK
+Content-Type: text/html
+Content-Length: 12
+
+Hello World!
+
+
+>>>
+GET /robots.txt HTTP/1.1
+Host: nghttp2.org
+User-Agent: curl/7.61.0
+Accept: */*
+Connection: Upgrade, HTTP2-Settings
+Upgrade: h2c
+HTTP2-Settings: AAMAAABkAARAAAAAAAIAAAAA
+
+
+<<<
+HTTP/1.1 101 Switching Protocols
+Connection: Upgrade
+Upgrade: h2c
+
diff --git a/rust/htp/src/test/files/115-auth-bearer.t b/rust/htp/src/test/files/115-auth-bearer.t
new file mode 100644 (file)
index 0000000..b0c6e77
--- /dev/null
@@ -0,0 +1,8 @@
+>>>
+GET /resource HTTP/1.1
+Host: server.example.com
+Authorization: Bearer mF_9.B5f-4.1JqM
+
+
+
+
diff --git a/rust/htp/src/test/files/116-request-compression.t b/rust/htp/src/test/files/116-request-compression.t
new file mode 100644 (file)
index 0000000..6bcc208
Binary files /dev/null and b/rust/htp/src/test/files/116-request-compression.t differ
diff --git a/rust/htp/src/test/files/117-request-response-compression.t b/rust/htp/src/test/files/117-request-response-compression.t
new file mode 100644 (file)
index 0000000..66856f3
Binary files /dev/null and b/rust/htp/src/test/files/117-request-response-compression.t differ
diff --git a/rust/htp/src/test/files/118-post.t b/rust/htp/src/test/files/118-post.t
new file mode 100644 (file)
index 0000000..1044d56
--- /dev/null
@@ -0,0 +1,16 @@
+>>>\r
+POST / HTTP/1.0\r
+Host: www.example.com\r
+User-Agent: Mozilla\r
+Content-Length: 12\r
+\r
+Hello World!\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Content-Length: 12\r
+\r
+Hello World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/119-ambiguous-eol.t b/rust/htp/src/test/files/119-ambiguous-eol.t
new file mode 100644 (file)
index 0000000..ea520b8
--- /dev/null
@@ -0,0 +1,11 @@
+>>>
+POST / HTTP/1.0
+\r\r
+>>>
+
+<<<
+<<<
+HTTP/1.0 200 OK
+\r\r
+<<<
+
diff --git a/rust/htp/src/test/files/12-connect-request.t b/rust/htp/src/test/files/12-connect-request.t
new file mode 100644 (file)
index 0000000..89faf8e
--- /dev/null
@@ -0,0 +1,21 @@
+>>>\r
+CONNECT www.ssllabs.com:443 HTTP/1.0\r
+\r
+\r
+<<<\r
+HTTP/1.1 405 Method Not Allowed\r
+Date: Sat, 12 Dec 2009 05:08:45 GMT\r
+Server: Apache/2.2.14 (Unix) mod_ssl/2.2.14 OpenSSL/0.9.8g PHP/5.3.0\r
+Allow: GET,HEAD,POST,OPTIONS,TRACE\r
+Vary: Accept-Encoding\r
+Content-Length: 230\r
+Connection: close\r
+Content-Type: text/html; charset=iso-8859-1\r
+\r
+<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">\r
+<html><head>\r
+<title>405 Method Not Allowed</title>\r
+</head><body>\r
+<h1>Method Not Allowed</h1>\r
+<p>The requested method CONNECT is not allowed for the URL /.</p>\r
+</body></html>
\ No newline at end of file
diff --git a/rust/htp/src/test/files/120-request-gap.t b/rust/htp/src/test/files/120-request-gap.t
new file mode 100644 (file)
index 0000000..451d7d4
--- /dev/null
@@ -0,0 +1,43 @@
+>>>
+POST / HTTP/1.1
+User-Agent: curl/7.18.2 (i486-pc-linux-gnu) libcurl/7.18.2 OpenSSL/0.9.8g zlib/1.2.3.3 libidn/1.8 libssh2/0.18
+Accept: */*
+Content-Length: 216
+Expect: 100-continue
+Content-Type: multipart/form-data; boundary=----------------------------07869933ca1b
+
+
+<<<
+HTTP/1.1 100 Continue
+Header1: This
+Header2: That
+
+
+>>>
+------------------------------07869933ca1b
+Content-Disposition: form-data; name="file"; filename="404.php"
+Content-Type: application/octet-stream
+
+
+>>>
+<? echo 
+><>
+"404"
+>>>
+; ?>
+>>>
+
+------------------------------07869933ca1b--
+
+<<<
+HTTP/1.1 200 OK
+Date: Tue, 03 Nov 2009 09:27:47 GMT
+Server: Apache
+Last-Modified: Thu, 30 Apr 2009 12:20:49 GMT
+ETag: "2dcada-2d-468c4b9ec6a40"
+Accept-Ranges: bytes
+Content-Length: 45
+Vary: Accept-Encoding
+Content-Type: text/html
+
+<html><body><h1>It works!</h1></body></html>
diff --git a/rust/htp/src/test/files/121-response-gap.t b/rust/htp/src/test/files/121-response-gap.t
new file mode 100644 (file)
index 0000000..7b3cdf1
--- /dev/null
@@ -0,0 +1,17 @@
+>>>\r
+GET / HTTP/1.0\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Content-Length: 12\r
+\r
+Hell\r
+<><\r
+o Wo\r
+<<<\r
+rld!\r
diff --git a/rust/htp/src/test/files/122-response-body-data.t b/rust/htp/src/test/files/122-response-body-data.t
new file mode 100644 (file)
index 0000000..020bebd
--- /dev/null
@@ -0,0 +1,6 @@
+<<<
+1
+2
+<<<
+3
+4
\ No newline at end of file
diff --git a/rust/htp/src/test/files/123-response-header-bug.t b/rust/htp/src/test/files/123-response-header-bug.t
new file mode 100644 (file)
index 0000000..dd980b8
--- /dev/null
@@ -0,0 +1,14 @@
+>>>\r
+GET http://www.example.com:XXX/?p=%20 HTTP/1.0\r
+Host: www.example.com\r
+User-Agent: Mozilla\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Content-Length: 12\r
+\r\1aHello World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/124-response-incomplete.t b/rust/htp/src/test/files/124-response-incomplete.t
new file mode 100644 (file)
index 0000000..4730570
--- /dev/null
@@ -0,0 +1,10 @@
+>>>\r
+GET /index.html HTTP/1.0\r
+Content-Type: text/html\r
+Host: www.openinfosecfoundation.org\r
+User-Agent: This is dummy message body\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Content-Length: 7\r
+Content-Type: text/html\r
diff --git a/rust/htp/src/test/files/13-compressed-response-gzip-ct.t b/rust/htp/src/test/files/13-compressed-response-gzip-ct.t
new file mode 100644 (file)
index 0000000..d5a2e31
Binary files /dev/null and b/rust/htp/src/test/files/13-compressed-response-gzip-ct.t differ
diff --git a/rust/htp/src/test/files/14-compressed-response-gzip-chunked.t b/rust/htp/src/test/files/14-compressed-response-gzip-chunked.t
new file mode 100644 (file)
index 0000000..bae8a2d
Binary files /dev/null and b/rust/htp/src/test/files/14-compressed-response-gzip-chunked.t differ
diff --git a/rust/htp/src/test/files/15-connect-complete.t b/rust/htp/src/test/files/15-connect-complete.t
new file mode 100644 (file)
index 0000000..071d064
Binary files /dev/null and b/rust/htp/src/test/files/15-connect-complete.t differ
diff --git a/rust/htp/src/test/files/16-connect-extra.t b/rust/htp/src/test/files/16-connect-extra.t
new file mode 100644 (file)
index 0000000..9c08f17
--- /dev/null
@@ -0,0 +1,32 @@
+>>>\r
+CONNECT www.feistyduck.com:80 HTTP/1.1\r
+Host: www.feistyduck.com\r
+\r
+HEAD / HTTP/1.0\r
+\r
+\r
+<<<\r
+HTTP/1.1 301 Moved Permanently\r
+Date: Wed, 06 Jan 2010 17:41:34 GMT\r
+Server: Apache\r
+Location: https://www.feistyduck.com/\r
+Vary: Accept-Encoding\r
+Content-Length: 235\r
+Content-Type: text/html; charset=iso-8859-1\r
+\r
+<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
+<html><head>
+<title>301 Moved Permanently</title>
+</head><body>
+<h1>Moved Permanently</h1>
+<p>The document has moved <a href="https://www.feistyduck.com/">here</a>.</p>
+</body></html>
+\r
+HTTP/1.1 301 Moved Permanently\r
+Date: Wed, 06 Jan 2010 17:41:46 GMT\r
+Server: Apache\r
+Location: https://www.feistyduck.com/\r
+Vary: Accept-Encoding\r
+Connection: close\r
+Content-Type: text/html; charset=iso-8859-1\r
+\r
diff --git a/rust/htp/src/test/files/17-multipart-1.t b/rust/htp/src/test/files/17-multipart-1.t
new file mode 100644 (file)
index 0000000..7c083c6
--- /dev/null
@@ -0,0 +1,41 @@
+>>>\r
+POST /upload.php?qsp1=1&%20p%20q=2 HTTP/1.1\r
+Host: 192.168.3.100:8080\r
+User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.1.7) Gecko/20091221 Firefox/3.5.7 (.NET CLR 3.5.30729)\r
+Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r
+Accept-Language: en-us,en;q=0.5\r
+Accept-Encoding: gzip,deflate\r
+Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7\r
+Keep-Alive: 300\r
+Connection: keep-alive\r
+Content-Type: multipart/form-data; boundary=---------------------------41184676334\r
+Content-Length: 610\r
+\r
+-----------------------------41184676334\r
+Content-Disposition: form-data; name="field1"\r
+\r
+0123456789\r
+-----------------------------41184676334\r
+Content-Disposition: form-data; name="field2"\r
+\r
+9876543210\r
+-----------------------------41184676334\r
+Content-Disposition: form-data; name="file1"; filename="New Text Document.txt"\r
+Content-Type: text/plain\r
+\r
+FFFFFFFFFFFFFFFFFFFFFFFFFFFF\r
+-----------------------------41184676334\r
+Content-Disposition: form-data; name="file2"; filename="New Text Document.txt"\r
+Content-Type: text/plain\r
+\r
+FFFFFFFFFFFFFFFFFFFFFFFFFFFF\r
+-----------------------------41184676334--\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+\r
+Hello World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/18-compressed-response-deflate.t b/rust/htp/src/test/files/18-compressed-response-deflate.t
new file mode 100644 (file)
index 0000000..b70940e
Binary files /dev/null and b/rust/htp/src/test/files/18-compressed-response-deflate.t differ
diff --git a/rust/htp/src/test/files/19-urlencoded-test.t b/rust/htp/src/test/files/19-urlencoded-test.t
new file mode 100644 (file)
index 0000000..21d7f27
--- /dev/null
@@ -0,0 +1,15 @@
+>>>\r
+POST /?p=1&q=2 HTTP/1.0\r
+Content-Type: application/x-www-form-urlencoded\r
+Content-Length: 11\r
+\r
+p=3&q=4&z=5\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Content-Length: 12\r
+\r
+Hello World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/20-ambiguous-host.t b/rust/htp/src/test/files/20-ambiguous-host.t
new file mode 100644 (file)
index 0000000..bb3e551
--- /dev/null
@@ -0,0 +1,58 @@
+>>>\r
+GET http://example.com/1 HTTP/1.1\r
+Host: example.com\r
+\r
+\r
+<<<\r
+HTTP/1.1 200 OK\r
+Date: Mon, 26 Apr 2010 13:56:31 GMT\r
+Content-Length: 8\r
+\r
+12345678\r
+>>>\r
+GET http://example.com/2 HTTP/1.1\r
+Host: foo.com\r
+\r
+\r
+<<<\r
+HTTP/1.1 200 OK\r
+Date: Mon, 26 Apr 2010 13:56:31 GMT\r
+Content-Length: 8\r
+\r
+12345678\r
+>>>\r
+POST http://www.example.com:8001/3 HTTP/1.1\r
+Host: www.example.com:8001\r
+Content-Length: 8\r
+\r
+12345678\r
+<<<\r
+HTTP/1.1 200 OK\r
+Date: Mon, 26 Apr 2010 13:56:31 GMT\r
+Content-Length: 8\r
+\r
+12345678\r
+>>>\r
+POST http://www.example.com:8002/4 HTTP/1.1\r
+Host: www.example.com:8003\r
+Content-Length: 8\r
+\r
+12345678\r
+<<<\r
+HTTP/1.1 200 OK\r
+Date: Mon, 26 Apr 2010 13:56:31 GMT\r
+Content-Length: 8\r
+\r
+12345678\r
+>>>\r
+POST http://www.example.com:80/5 HTTP/1.1\r
+Host: www.example.com\r
+Content-Length: 8\r
+\r
+12345678\r
+<<<\r
+HTTP/1.1 200 OK\r
+Date: Mon, 26 Apr 2010 13:56:31 GMT\r
+Content-Length: 8\r
+\r
+12345678
\ No newline at end of file
diff --git a/rust/htp/src/test/files/21-http09.t b/rust/htp/src/test/files/21-http09.t
new file mode 100644 (file)
index 0000000..5359a7f
--- /dev/null
@@ -0,0 +1,11 @@
+>>>\r
+GET /?foo=bar\r
+\r
+<<<\r
+<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">\r
+<html><head>\r
+<title>301 Moved Permanently</title>\r
+</head><body>\r
+<h1>Moved Permanently</h1>\r
+<p>The document has moved.</p>\r
+</body></html>\r
diff --git a/rust/htp/src/test/files/22-http_1_1-host_missing b/rust/htp/src/test/files/22-http_1_1-host_missing
new file mode 100644 (file)
index 0000000..53ca3e8
--- /dev/null
@@ -0,0 +1,14 @@
+>>>\r
+GET /?p=%20 HTTP/1.1\r
+User-Agent: Mozilla\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Content-Length: 12\r
+\r
+Hello World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/22-php-param-processing.t b/rust/htp/src/test/files/22-php-param-processing.t
new file mode 100644 (file)
index 0000000..772a6d1
--- /dev/null
@@ -0,0 +1,14 @@
+>>>\r
+GET /?%20p%20q%20=1&q=2&z%20w=3 HTTP/1.0\r
+User-Agent: Mozilla\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Content-Length: 12\r
+\r
+Hello World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/23-http09-multiple.t b/rust/htp/src/test/files/23-http09-multiple.t
new file mode 100644 (file)
index 0000000..5fcb04b
--- /dev/null
@@ -0,0 +1,12 @@
+>>>\r
+GET /?foo=bar\r
+GET /?foo=bar\r
+\r
+<<<\r
+<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">\r
+<html><head>\r
+<title>301 Moved Permanently</title>\r
+</head><body>\r
+<h1>Moved Permanently</h1>\r
+<p>The document has moved.</p>\r
+</body></html>\r
diff --git a/rust/htp/src/test/files/24-http09-explicit.t b/rust/htp/src/test/files/24-http09-explicit.t
new file mode 100644 (file)
index 0000000..0198991
--- /dev/null
@@ -0,0 +1,13 @@
+>>>\r
+GET /?foo=bar HTTP/0.9\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Content-Length: 12\r
+\r
+Hello World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/25-small-chunks.t b/rust/htp/src/test/files/25-small-chunks.t
new file mode 100644 (file)
index 0000000..fdbfc9e
--- /dev/null
@@ -0,0 +1,41 @@
+>>>
+GET
+>>>
+ /?x=y
+>>>
+ HTTP/1.0
+User-Agent:
+>>>
+ Test
+>>>
+ User
+>>>
+ Agent
+Host: www.example.com
+
+
+<<<
+HTTP/1.0
+<<<
+ 200
+<<<
+ OK
+Date:
+<<<
+ Mon,
+<<<
+ 31
+<<<
+ Aug
+<<<
+ 2009
+<<<
+ 20:25:50
+<<<
+ GMT
+Server: Apache
+Connection: close
+Content-Type: text/html
+Content-Length: 12
+
+Hello World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/26-request-headers-raw.t b/rust/htp/src/test/files/26-request-headers-raw.t
new file mode 100644 (file)
index 0000000..fdbfc9e
--- /dev/null
@@ -0,0 +1,41 @@
+>>>
+GET
+>>>
+ /?x=y
+>>>
+ HTTP/1.0
+User-Agent:
+>>>
+ Test
+>>>
+ User
+>>>
+ Agent
+Host: www.example.com
+
+
+<<<
+HTTP/1.0
+<<<
+ 200
+<<<
+ OK
+Date:
+<<<
+ Mon,
+<<<
+ 31
+<<<
+ Aug
+<<<
+ 2009
+<<<
+ 20:25:50
+<<<
+ GMT
+Server: Apache
+Connection: close
+Content-Type: text/html
+Content-Length: 12
+
+Hello World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/27-request-trailer-raw.t b/rust/htp/src/test/files/27-request-trailer-raw.t
new file mode 100644 (file)
index 0000000..1d72e71
--- /dev/null
@@ -0,0 +1,26 @@
+>>>\r
+POST / HTTP/1.1\r
+Transfer-Encoding: chunked\r
+Content-Type: application/x-www-form-urlencoded\r
+User-Agent: Mozilla\r
+Cookie: 1\r
+\r
+b\r
+p=012345678\r
+1\r
+9\r
+0\r
+Cookie:\r
+>>>\r
+ 2\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Content-Length: 12\r
+\r
+Hello World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/28-response-headers-raw.t b/rust/htp/src/test/files/28-response-headers-raw.t
new file mode 100644 (file)
index 0000000..db1e07f
--- /dev/null
@@ -0,0 +1,33 @@
+>>>\r
+GET /?p=%20 HTTP/1.0\r
+User-Agent: Mozilla\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date:\r
+<<<\r
+ Mon,\r
+<<<\r
+ 31 Aug 2009 20:25:50 GMT\r
+Server:\r
+<<<\r
+ Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Transfer-Encoding: chunked\r
+\r
+b\r
+Hello World\r
+1\r
+!\r
+0\r
+Set-Cookie:\r
+<<<\r
+ name=\r
+<<<\r
+value\r
+Another-Header:\r
+<<<\r
+ Header-Value\r
+\r
diff --git a/rust/htp/src/test/files/29-response-trailer-raw.t b/rust/htp/src/test/files/29-response-trailer-raw.t
new file mode 100644 (file)
index 0000000..db1e07f
--- /dev/null
@@ -0,0 +1,33 @@
+>>>\r
+GET /?p=%20 HTTP/1.0\r
+User-Agent: Mozilla\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date:\r
+<<<\r
+ Mon,\r
+<<<\r
+ 31 Aug 2009 20:25:50 GMT\r
+Server:\r
+<<<\r
+ Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Transfer-Encoding: chunked\r
+\r
+b\r
+Hello World\r
+1\r
+!\r
+0\r
+Set-Cookie:\r
+<<<\r
+ name=\r
+<<<\r
+value\r
+Another-Header:\r
+<<<\r
+ Header-Value\r
+\r
diff --git a/rust/htp/src/test/files/30-get-ipv6.t b/rust/htp/src/test/files/30-get-ipv6.t
new file mode 100644 (file)
index 0000000..baf3920
--- /dev/null
@@ -0,0 +1,15 @@
+>>>\r
+GET http://[::1]:8080/?p=%20 HTTP/1.0\r
+Host: [::1]:8080\r
+User-Agent: Mozilla\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Content-Length: 12\r
+\r
+Hello World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/31-get-request-line-nul.t b/rust/htp/src/test/files/31-get-request-line-nul.t
new file mode 100644 (file)
index 0000000..3de2eb4
Binary files /dev/null and b/rust/htp/src/test/files/31-get-request-line-nul.t differ
diff --git a/rust/htp/src/test/files/32-invalid-hostname.t b/rust/htp/src/test/files/32-invalid-hostname.t
new file mode 100644 (file)
index 0000000..a034785
--- /dev/null
@@ -0,0 +1,15 @@
+>>>\r
+GET http://www..example.com/?p=%20 HTTP/1.0\r
+Host: www example.com\r
+User-Agent: Mozilla\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Content-Length: 12\r
+\r
+Hello World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/33-invalid-hostname.t b/rust/htp/src/test/files/33-invalid-hostname.t
new file mode 100644 (file)
index 0000000..ad18d33
--- /dev/null
@@ -0,0 +1,15 @@
+>>>\r
+GET http://www.example.com:XXX/?p=%20 HTTP/1.0\r
+Host: www.example.com\r
+User-Agent: Mozilla\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Content-Length: 12\r
+\r
+Hello World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/34-invalid-hostname.t b/rust/htp/src/test/files/34-invalid-hostname.t
new file mode 100644 (file)
index 0000000..e886ebf
--- /dev/null
@@ -0,0 +1,15 @@
+>>>\r
+GET /?p=%20 HTTP/1.0\r
+Host: www.example.com:\r
+User-Agent: Mozilla\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Content-Length: 12\r
+\r
+Hello World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/35-early-response.t b/rust/htp/src/test/files/35-early-response.t
new file mode 100644 (file)
index 0000000..4b205db
--- /dev/null
@@ -0,0 +1,18 @@
+>>>\r
+POST / HTTP/1.0\r
+Content-Length: 12\r
+Content-Type: application/x-www-form-urlencoded\r
+User-Agent: Mozilla\r
+\r
+\r
+<<<\r
+HTTP/1.0 400 Bad Request\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Content-Length: 11\r
+\r
+Bad Request\r
+>>>\r
+p=0123456789
\ No newline at end of file
diff --git a/rust/htp/src/test/files/36-invalid-request-1-invalid-c-l.t b/rust/htp/src/test/files/36-invalid-request-1-invalid-c-l.t
new file mode 100644 (file)
index 0000000..42980f4
--- /dev/null
@@ -0,0 +1,17 @@
+>>>\r
+POST / HTTP/1.0\r
+Host: www.example.com\r
+Content-Length: ABC\r
+Content-Type: application/x-www-form-urlencoded\r
+User-Agent: Mozilla\r
+\r
+p=0123456789\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Content-Length: 12\r
+\r
+Hello World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/37-invalid-request-2-t-e-and-c-l.t b/rust/htp/src/test/files/37-invalid-request-2-t-e-and-c-l.t
new file mode 100644 (file)
index 0000000..8edab9c
--- /dev/null
@@ -0,0 +1,28 @@
+>>>\r
+POST / HTTP/1.1\r
+Transfer-Encoding: chunked\r
+Content-Length: 12\r
+Host: www.example.com\r
+Content-Type: application/x-www-form-urlencoded\r
+User-Agent: Mozilla\r
+Cookie: 1\r
+\r
+b\r
+p=012345678\r
+1\r
+9\r
+0\r
+Cookie:\r
+>>>\r
+ 2\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Content-Length: 12\r
+\r
+Hello World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/38-invalid-request-3-invalid-t-e.t b/rust/htp/src/test/files/38-invalid-request-3-invalid-t-e.t
new file mode 100644 (file)
index 0000000..ada8dd5
--- /dev/null
@@ -0,0 +1,27 @@
+>>>\r
+POST / HTTP/1.1\r
+Transfer-Encoding: ABC\r
+Host: www.example.com\r
+Content-Type: application/x-www-form-urlencoded\r
+User-Agent: Mozilla\r
+Cookie: 1\r
+\r
+b\r
+p=012345678\r
+1\r
+9\r
+0\r
+Cookie:\r
+>>>\r
+ 2\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Content-Length: 12\r
+\r
+Hello World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/39-auto-destroy-crash.t b/rust/htp/src/test/files/39-auto-destroy-crash.t
new file mode 100644 (file)
index 0000000..b892b0a
--- /dev/null
@@ -0,0 +1,34 @@
+>>>\r
+GET / HTTP/1.1\r
+Host: www.example.com\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Content-Length: 12\r
+\r
+Hello World!\r
+>>>\r
+GET / HTTP/1.1\r
+Host: www.example.com.\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Content-Length: 12\r
+>>>\r
+GET / HTTP/1.1\r
+Host: WwW.ExamPle.cOm\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Content-Length: 12\r
+>>>\r
+GET / HTTP/1.1\r
+Host: www.example.com:80\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Content-Length: 12
\ No newline at end of file
diff --git a/rust/htp/src/test/files/40-auth-basic.t b/rust/htp/src/test/files/40-auth-basic.t
new file mode 100644 (file)
index 0000000..8606001
--- /dev/null
@@ -0,0 +1,5 @@
+>>>\r
+GET / HTTP/1.0\r
+Host: www.example.com\r
+Authorization: Basic aXZhbnI6c2VjcmV0\r
+\r
diff --git a/rust/htp/src/test/files/41-auth-digest.t b/rust/htp/src/test/files/41-auth-digest.t
new file mode 100644 (file)
index 0000000..53065b0
--- /dev/null
@@ -0,0 +1,8 @@
+>>>\r
+GET / HTTP/1.1\r
+Host: www.example.com\r
+Authorization: Digest username="ivanr", realm="Book Review",\r
+ nonce="OgmPjb/jAwA=7c5a49c2ed9416dba1b04b5307d6d935f74a859d",\r
+ uri="/review/", algorithm=MD5, response="3c430d26043cc306e0282635929d57cb",\r
+ qop=auth, nc=00000004, cnonce="c3bcee9534c051a0"\r
+\r
diff --git a/rust/htp/src/test/files/42-unknown-method_only.t b/rust/htp/src/test/files/42-unknown-method_only.t
new file mode 100644 (file)
index 0000000..8c9a603
--- /dev/null
@@ -0,0 +1,3 @@
+>>>\r
+HELLO\r
+\r
diff --git a/rust/htp/src/test/files/43-invalid-protocol.t b/rust/htp/src/test/files/43-invalid-protocol.t
new file mode 100644 (file)
index 0000000..0e4c0ad
--- /dev/null
@@ -0,0 +1,3 @@
+>>>\r
+GET / JUNK/1.0\r
+\r
diff --git a/rust/htp/src/test/files/44-auth-basic-invalid.t b/rust/htp/src/test/files/44-auth-basic-invalid.t
new file mode 100644 (file)
index 0000000..ed44445
--- /dev/null
@@ -0,0 +1,5 @@
+>>>\r
+GET / HTTP/1.0\r
+Host: www.example.com\r
+Authorization: Basic notBase64:EncodedStuff\r
+\r
diff --git a/rust/htp/src/test/files/45-auth-digest-unquoted-username.t b/rust/htp/src/test/files/45-auth-digest-unquoted-username.t
new file mode 100644 (file)
index 0000000..855e00e
--- /dev/null
@@ -0,0 +1,8 @@
+>>>\r
+GET / HTTP/1.1\r
+Host: www.example.com\r
+Authorization: Digest username=ivanr, realm="Book Review",\r
+ nonce="OgmPjb/jAwA=7c5a49c2ed9416dba1b04b5307d6d935f74a859d",\r
+ uri="/review/", algorithm=MD5, response="3c430d26043cc306e0282635929d57cb",\r
+ qop=auth, nc=00000004, cnonce="c3bcee9534c051a0"\r
+\r
diff --git a/rust/htp/src/test/files/46-auth-digest-invalid-username.t b/rust/htp/src/test/files/46-auth-digest-invalid-username.t
new file mode 100644 (file)
index 0000000..dbd1c43
--- /dev/null
@@ -0,0 +1,8 @@
+>>>\r
+GET / HTTP/1.1\r
+Host: www.example.com\r
+Authorization: Digest username = ivanr, realm="Book Review",\r
+ nonce="OgmPjb/jAwA=7c5a49c2ed9416dba1b04b5307d6d935f74a859d",\r
+ uri="/review/", algorithm=MD5, response="3c430d26043cc306e0282635929d57cb",\r
+ qop=auth, nc=00000004, cnonce="c3bcee9534c051a0"\r
+\r
diff --git a/rust/htp/src/test/files/47-auth-unrecognized.t b/rust/htp/src/test/files/47-auth-unrecognized.t
new file mode 100644 (file)
index 0000000..5d51455
--- /dev/null
@@ -0,0 +1,5 @@
+>>>\r
+GET / HTTP/1.1\r
+Host: www.example.com\r
+Authorization: Turbo customAuthDataHere\r
+\r
diff --git a/rust/htp/src/test/files/48-invalid-response-headers-1.t b/rust/htp/src/test/files/48-invalid-response-headers-1.t
new file mode 100644 (file)
index 0000000..d10582a
--- /dev/null
@@ -0,0 +1,17 @@
+>>>\r
+GET /?p=%20 HTTP/1.0\r
+User-Agent: Mozilla\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Content-Length: 12\r
+No Colon\r
+Lws : After Header Name\r
+Header@Name: Not Token\r
+\r
+Hello World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/49-invalid-response-headers-2.t b/rust/htp/src/test/files/49-invalid-response-headers-2.t
new file mode 100644 (file)
index 0000000..16970f9
--- /dev/null
@@ -0,0 +1,15 @@
+>>>\r
+GET /?p=%20 HTTP/1.0\r
+User-Agent: Mozilla\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Content-Length: 12\r
+: Empty Name\r
+\r
+Hello World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/51-get-ipv6-invalid.t b/rust/htp/src/test/files/51-get-ipv6-invalid.t
new file mode 100644 (file)
index 0000000..bde929c
--- /dev/null
@@ -0,0 +1,15 @@
+>>>\r
+GET http://[::1:8080/?p=%20 HTTP/1.0\r
+Host: [::1]:8080\r
+User-Agent: Mozilla\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Content-Length: 12\r
+\r
+Hello World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/52-invalid-path.t b/rust/htp/src/test/files/52-invalid-path.t
new file mode 100644 (file)
index 0000000..97528e7
--- /dev/null
@@ -0,0 +1,15 @@
+>>>\r
+GET invalid/path?p=%20 HTTP/1.0\r
+Host: [::1]:8080\r
+User-Agent: Mozilla\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Content-Length: 12\r
+\r
+Hello World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/53-path-utf8-none.t b/rust/htp/src/test/files/53-path-utf8-none.t
new file mode 100644 (file)
index 0000000..9234cd9
--- /dev/null
@@ -0,0 +1,15 @@
+>>>\r
+GET /Ristic.txt HTTP/1.0\r
+Host: www.example.com\r
+User-Agent: Mozilla\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Content-Length: 12\r
+\r
+Hello World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/54-path-utf8-valid.t b/rust/htp/src/test/files/54-path-utf8-valid.t
new file mode 100644 (file)
index 0000000..518918e
--- /dev/null
@@ -0,0 +1,15 @@
+>>>\r
+GET /Risti%C4%87.txt HTTP/1.0\r
+Host: www.example.com\r
+User-Agent: Mozilla\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Content-Length: 12\r
+\r
+Hello World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/55-path-utf8-overlong-2.t b/rust/htp/src/test/files/55-path-utf8-overlong-2.t
new file mode 100644 (file)
index 0000000..f78a088
--- /dev/null
@@ -0,0 +1,15 @@
+>>>\r
+GET /%c0%a6.txt HTTP/1.0\r
+Host: www.example.com\r
+User-Agent: Mozilla\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Content-Length: 12\r
+\r
+Hello World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/56-path-utf8-overlong-3.t b/rust/htp/src/test/files/56-path-utf8-overlong-3.t
new file mode 100644 (file)
index 0000000..3184dc8
--- /dev/null
@@ -0,0 +1,15 @@
+>>>\r
+GET /%e0%80%a6.txt HTTP/1.0\r
+Host: www.example.com\r
+User-Agent: Mozilla\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Content-Length: 12\r
+\r
+Hello World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/57-path-utf8-overlong-4.t b/rust/htp/src/test/files/57-path-utf8-overlong-4.t
new file mode 100644 (file)
index 0000000..cfccdbe
--- /dev/null
@@ -0,0 +1,15 @@
+>>>\r
+GET /%f0%80%80%a6.txt HTTP/1.0\r
+Host: www.example.com\r
+User-Agent: Mozilla\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Content-Length: 12\r
+\r
+Hello World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/58-path-utf8-invalid.t b/rust/htp/src/test/files/58-path-utf8-invalid.t
new file mode 100644 (file)
index 0000000..f3d5803
--- /dev/null
@@ -0,0 +1,15 @@
+>>>\r
+GET /Risti%C4%87%80.txt HTTP/1.0\r
+Host: www.example.com\r
+User-Agent: Mozilla\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Content-Length: 12\r
+\r
+Hello World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/59-path-utf8-fullwidth.t b/rust/htp/src/test/files/59-path-utf8-fullwidth.t
new file mode 100644 (file)
index 0000000..4321652
--- /dev/null
@@ -0,0 +1,15 @@
+>>>\r
+GET /%EF%BC%86.txt HTTP/1.0\r
+Host: www.example.com\r
+User-Agent: Mozilla\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Content-Length: 12\r
+\r
+Hello World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/60-request-cookies-1.t b/rust/htp/src/test/files/60-request-cookies-1.t
new file mode 100644 (file)
index 0000000..51aca6f
--- /dev/null
@@ -0,0 +1,16 @@
+>>>\r
+GET / HTTP/1.0\r
+Host: www.example.com\r
+User-Agent: Mozilla\r
+Cookie: =0; p=1; q=2; =; z=\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Content-Length: 12\r
+\r
+Hello World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/61-empty-line-between-requests.t b/rust/htp/src/test/files/61-empty-line-between-requests.t
new file mode 100644 (file)
index 0000000..47a8c21
--- /dev/null
@@ -0,0 +1,19 @@
+>>>\r
+GET /first HTTP/1.1\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Content-Length: 12\r
+\r
+Hello World!\r
+>>>\r
+\r
+GET /second HTTP/1.1\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Content-Length: 12\r
+\r
+Hello World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/62-post-no-body.t b/rust/htp/src/test/files/62-post-no-body.t
new file mode 100644 (file)
index 0000000..10a8d4b
--- /dev/null
@@ -0,0 +1,34 @@
+>>>\r
+POST / HTTP/1.0\r
+Content-Length: 0\r
+Content-Type: application/x-www-form-urlencoded\r
+User-Agent: Mozilla\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Content-Length: 12\r
+\r
+Hello World!\r
+>>>\r
+GET / HTTP/1.0\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Transfer-Encoding: chunked\r
+\r
+9\r
+012345678\r
+1\r
+9\r
+0\r
+\r
diff --git a/rust/htp/src/test/files/63-post-chunked-invalid-1.t b/rust/htp/src/test/files/63-post-chunked-invalid-1.t
new file mode 100644 (file)
index 0000000..eb5ef0c
--- /dev/null
@@ -0,0 +1,26 @@
+>>>\r
+POST / HTTP/1.1\r
+Transfer-Encoding: chunked\r
+Content-Type: application/x-www-form-urlencoded\r
+User-Agent: Mozilla\r
+Cookie: 1\r
+\r
+80000000\r
+p=012345678\r
+1\r
+9\r
+0\r
+Cookie:\r
+>>>\r
+ 2\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Content-Length: 12\r
+\r
+Hello World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/64-post-chunked-invalid-2.t b/rust/htp/src/test/files/64-post-chunked-invalid-2.t
new file mode 100644 (file)
index 0000000..f5fc91d
--- /dev/null
@@ -0,0 +1,26 @@
+>>>\r
+POST / HTTP/1.1\r
+Transfer-Encoding: chunked\r
+Content-Type: application/x-www-form-urlencoded\r
+User-Agent: Mozilla\r
+Cookie: 1\r
+\r
+-1\r
+p=012345678\r
+1\r
+9\r
+0\r
+Cookie:\r
+>>>\r
+ 2\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Content-Length: 12\r
+\r
+Hello World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/65-post-chunked-invalid-3.t b/rust/htp/src/test/files/65-post-chunked-invalid-3.t
new file mode 100644 (file)
index 0000000..4076e0b
--- /dev/null
@@ -0,0 +1,26 @@
+>>>\r
+POST / HTTP/1.1\r
+Transfer-Encoding: chunked\r
+Content-Type: application/x-www-form-urlencoded\r
+User-Agent: Mozilla\r
+Cookie: 1\r
+\r
+\r
+p=012345678\r
+1\r
+9\r
+0\r
+Cookie:\r
+>>>\r
+ 2\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Content-Length: 12\r
+\r
+Hello World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/66-post-chunked-split-chunk.t b/rust/htp/src/test/files/66-post-chunked-split-chunk.t
new file mode 100644 (file)
index 0000000..6f5dd48
--- /dev/null
@@ -0,0 +1,28 @@
+>>>\r
+POST / HTTP/1.1\r
+Transfer-Encoding: chunked\r
+Content-Type: application/x-www-form-urlencoded\r
+User-Agent: Mozilla\r
+Cookie: 1\r
+\r
+b\r
+p=01234\r
+>>>\r
+5678\r
+1\r
+9\r
+0\r
+Cookie:\r
+>>>\r
+ 2\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Content-Length: 12\r
+\r
+Hello World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/67-long-request-line.t b/rust/htp/src/test/files/67-long-request-line.t
new file mode 100644 (file)
index 0000000..fa3f984
--- /dev/null
@@ -0,0 +1,16 @@
+>>>\r
+GET /0123456789/\r
+>>>\r
+0123456789/ HTTP/1.0\r
+User-Agent: Mozilla\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Content-Length: 12\r
+\r
+Hello World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/68-invalid-request-header.t b/rust/htp/src/test/files/68-invalid-request-header.t
new file mode 100644 (file)
index 0000000..4e6d688
Binary files /dev/null and b/rust/htp/src/test/files/68-invalid-request-header.t differ
diff --git a/rust/htp/src/test/files/69-long-response-header.t b/rust/htp/src/test/files/69-long-response-header.t
new file mode 100644 (file)
index 0000000..822d3ca
--- /dev/null
@@ -0,0 +1,16 @@
+>>>\r
+GET / HTTP/1.0\r
+User-Agent: Mozilla\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache Apache Apache Apache\r
+<<<\r
+Apache Apache Apache Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Content-Length: 12\r
+\r
+Hello World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/70-response-invalid-chunk-length.t b/rust/htp/src/test/files/70-response-invalid-chunk-length.t
new file mode 100644 (file)
index 0000000..68b43e3
--- /dev/null
@@ -0,0 +1,18 @@
+>>>\r
+GET / HTTP/1.0\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Transfer-Encoding: chunked\r
+\r
+g\r
+012345678\r
+1\r
+9\r
+0\r
+\r
diff --git a/rust/htp/src/test/files/71-response-split-chunk.t b/rust/htp/src/test/files/71-response-split-chunk.t
new file mode 100644 (file)
index 0000000..1d3f091
--- /dev/null
@@ -0,0 +1,20 @@
+>>>\r
+GET / HTTP/1.0\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Transfer-Encoding: chunked\r
+\r
+9\r
+01234\r
+<<<\r
+5678\r
+1\r
+9\r
+0\r
+\r
diff --git a/rust/htp/src/test/files/72-response-split-body.t b/rust/htp/src/test/files/72-response-split-body.t
new file mode 100644 (file)
index 0000000..db5ab9f
--- /dev/null
@@ -0,0 +1,16 @@
+>>>\r
+GET /?p=%20 HTTP/1.0\r
+User-Agent: Mozilla\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Content-Length: 12\r
+\r
+Hello\r
+<<<\r
+ World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/73-response-te-and-cl.t b/rust/htp/src/test/files/73-response-te-and-cl.t
new file mode 100644 (file)
index 0000000..46c646d
--- /dev/null
@@ -0,0 +1,19 @@
+>>>\r
+GET / HTTP/1.0\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Content-Length: 10\r
+Transfer-Encoding: chunked\r
+\r
+9\r
+012345678\r
+1\r
+9\r
+0\r
+\r
diff --git a/rust/htp/src/test/files/74-response-multiple-cl.t b/rust/htp/src/test/files/74-response-multiple-cl.t
new file mode 100644 (file)
index 0000000..556fb8f
--- /dev/null
@@ -0,0 +1,14 @@
+>>>\r
+GET / HTTP/1.0\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Content-Length: 12\r
+Content-Length: 12\r
+\r
+Hello World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/75-response-invalid-cl.t b/rust/htp/src/test/files/75-response-invalid-cl.t
new file mode 100644 (file)
index 0000000..8743d88
--- /dev/null
@@ -0,0 +1,13 @@
+>>>\r
+GET / HTTP/1.0\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Content-Length: -1\r
+\r
+Hello World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/76-response-no-body.t b/rust/htp/src/test/files/76-response-no-body.t
new file mode 100644 (file)
index 0000000..831571c
--- /dev/null
@@ -0,0 +1,34 @@
+>>>\r
+POST /?qsp1=1&%20p%20q=2&u=Ivan+Risti%C4%87_Ivan+Risti%C4%87_Ivan+Risti%C4%87_Ivan+Risti%C4%87_Ivan+Risti%C4%87_Ivan+Risti%C4%87_ HTTP/1.0\r
+Content-Length: 12\r
+Content-Type: application/x-www-form-urlencoded\r
+User-Agent: Mozilla\r
+\r
+p=0123456789\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Content-Length: 0\r
+\r
+\r
+>>>\r
+GET / HTTP/1.0\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Transfer-Encoding: chunked\r
+\r
+9\r
+012345678\r
+1\r
+9\r
+0\r
+\r
diff --git a/rust/htp/src/test/files/77-response-folded-headers.t b/rust/htp/src/test/files/77-response-folded-headers.t
new file mode 100644 (file)
index 0000000..dd33c07
--- /dev/null
@@ -0,0 +1,35 @@
+>>>\r
+POST / HTTP/1.0\r
+Content-Length: 12\r
+Content-Type: application/x-www-form-urlencoded\r
+User-Agent: Mozilla\r
+\r
+p=0123456789\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+ Server\r
+Connection: close\r
+Content-Type: text/html\r
+Content-Length: 0\r
+\r
+\r
+>>>\r
+GET / HTTP/1.0\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apach2\r
+Connection: close\r
+Content-Type: text/html\r
+Transfer-Encoding: chunked\r
+\r
+9\r
+012345678\r
+1\r
+9\r
+0\r
+\r
diff --git a/rust/htp/src/test/files/78-response-no-status-headers.t b/rust/htp/src/test/files/78-response-no-status-headers.t
new file mode 100644 (file)
index 0000000..82e8d2a
--- /dev/null
@@ -0,0 +1,8 @@
+>>>\r
+GET / HTTP/1.0\r
+User-Agent: Mozilla\r
+\r
+\r
+<<<\r
+Hello\r
+World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/79-connect-invalid-hostport.t b/rust/htp/src/test/files/79-connect-invalid-hostport.t
new file mode 100644 (file)
index 0000000..9258b7b
--- /dev/null
@@ -0,0 +1,32 @@
+>>>\r
+CONNECT [:80 HTTP/1.1\r
+Host: www.feistyduck.com\r
+\r
+HEAD / HTTP/1.0\r
+\r
+\r
+<<<\r
+HTTP/1.1 301 Moved Permanently\r
+Date: Wed, 06 Jan 2010 17:41:34 GMT\r
+Server: Apache\r
+Location: https://www.feistyduck.com/\r
+Vary: Accept-Encoding\r
+Content-Length: 235\r
+Content-Type: text/html; charset=iso-8859-1\r
+\r
+<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">\r
+<html><head>\r
+<title>301 Moved Permanently</title>\r
+</head><body>\r
+<h1>Moved Permanently</h1>\r
+<p>The document has moved <a href="https://www.feistyduck.com/">here</a>.</p>\r
+</body></html>\r
+\r
+HTTP/1.1 301 Moved Permanently\r
+Date: Wed, 06 Jan 2010 17:41:46 GMT\r
+Server: Apache\r
+Location: https://www.feistyduck.com/\r
+Vary: Accept-Encoding\r
+Connection: close\r
+Content-Type: text/html; charset=iso-8859-1\r
+\r
diff --git a/rust/htp/src/test/files/80-hostname-invalid-1.t b/rust/htp/src/test/files/80-hostname-invalid-1.t
new file mode 100644 (file)
index 0000000..f5e28c1
--- /dev/null
@@ -0,0 +1,15 @@
+>>>\r
+GET http://www.example.com/?p=%20 HTTP/1.0\r
+Host: [:80\r
+User-Agent: Mozilla\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Content-Length: 12\r
+\r
+Hello World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/81-hostname-invalid-2.t b/rust/htp/src/test/files/81-hostname-invalid-2.t
new file mode 100644 (file)
index 0000000..d3065c9
--- /dev/null
@@ -0,0 +1,15 @@
+>>>\r
+GET http://[:80/?p=%20 HTTP/1.0\r
+Host: www.example.com\r
+User-Agent: Mozilla\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Content-Length: 12\r
+\r
+Hello World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/82-put.t b/rust/htp/src/test/files/82-put.t
new file mode 100644 (file)
index 0000000..9931462
--- /dev/null
@@ -0,0 +1,16 @@
+>>>\r
+PUT / HTTP/1.0\r
+Host: www.example.com\r
+User-Agent: Mozilla\r
+Content-Length: 12\r
+\r
+Hello World!\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Content-Length: 12\r
+\r
+Hello World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/83-auth-digest-invalid-username-2.t b/rust/htp/src/test/files/83-auth-digest-invalid-username-2.t
new file mode 100644 (file)
index 0000000..2344a40
--- /dev/null
@@ -0,0 +1,5 @@
+>>>\r
+GET / HTTP/1.1\r
+Host: www.example.com\r
+Authorization: Digest username="ivanr\r
+\r
diff --git a/rust/htp/src/test/files/84-response-no-status-headers-2.t b/rust/htp/src/test/files/84-response-no-status-headers-2.t
new file mode 100644 (file)
index 0000000..239e08a
--- /dev/null
@@ -0,0 +1,7 @@
+>>>\r
+GET / HTTP/1.0\r
+User-Agent: Mozilla\r
+\r
+\r
+<<<\r
+Hello World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/85-zero-byte-request-timeout.t b/rust/htp/src/test/files/85-zero-byte-request-timeout.t
new file mode 100644 (file)
index 0000000..0cc0e09
--- /dev/null
@@ -0,0 +1,16 @@
+<<<
+HTTP/1.0 408 Request Time-out
+Server: AkamaiGHost
+Mime-Version: 1.0
+Date: Fri, 27 Sep 2013 16:37:37 GMT
+Content-Type: text/html
+Content-Length: 218
+Expires: Fri, 27 Sep 2013 16:37:37 GMT
+
+<HTML><HEAD>
+<TITLE>Request Timeout</TITLE>
+</HEAD><BODY>
+<H1>Request Timeout</H1>
+The server timed out while waiting for the browser's request.<P>
+Reference&#32;&#35;2&#46;9efcd4d9&#46;1380708056&#46;0
+</BODY></HTML>
diff --git a/rust/htp/src/test/files/86-partial-request-timeout.t b/rust/htp/src/test/files/86-partial-request-timeout.t
new file mode 100644 (file)
index 0000000..97dc4bb
--- /dev/null
@@ -0,0 +1,18 @@
+>>>
+GET
+<<<
+HTTP/1.0 408 Request Time-out
+Server: AkamaiGHost
+Mime-Version: 1.0
+Date: Fri, 27 Sep 2013 16:37:37 GMT
+Content-Type: text/html
+Content-Length: 218
+Expires: Fri, 27 Sep 2013 16:37:37 GMT
+
+<HTML><HEAD>
+<TITLE>Request Timeout</TITLE>
+</HEAD><BODY>
+<H1>Request Timeout</H1>
+The server timed out while waiting for the browser's request.<P>
+Reference&#32;&#35;2&#46;9efcd4d9&#46;1380708056&#46;0
+</BODY></HTML>
diff --git a/rust/htp/src/test/files/87-issue-55-incorrect-host-ambiguous-warning.t b/rust/htp/src/test/files/87-issue-55-incorrect-host-ambiguous-warning.t
new file mode 100644 (file)
index 0000000..463ce5d
--- /dev/null
@@ -0,0 +1,8 @@
+>>>\r
+CONNECT www.example.com:443 HTTP/1.1\r
+Host: www.example.com:443\r
+Accept: */*\r
+Content-Type: text/html\r
+Proxy-Connection: Keep-Alive\r
+Content-length: 0\r
+\r
diff --git a/rust/htp/src/test/files/88-response-multiple-cl-mismatch.t b/rust/htp/src/test/files/88-response-multiple-cl-mismatch.t
new file mode 100644 (file)
index 0000000..a1c17c8
--- /dev/null
@@ -0,0 +1,14 @@
+>>>\r
+GET / HTTP/1.0\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Content-Length: 12\r
+Content-Length: 11\r
+\r
+Hello World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/89-get-whitespace.t b/rust/htp/src/test/files/89-get-whitespace.t
new file mode 100644 (file)
index 0000000..0bb5b2d
--- /dev/null
@@ -0,0 +1,14 @@
+>>>\r
+ GET /?p=%20 HTTP/1.0\r
+User-Agent: Mozilla\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Content-Length: 12\r
+\r
+Hello World!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/90-request-uri-too-large.t b/rust/htp/src/test/files/90-request-uri-too-large.t
new file mode 100644 (file)
index 0000000..eef176a
--- /dev/null
@@ -0,0 +1,17 @@
+>>>
+GET /blaaaaaaaaaaaaaaaaaaaaaaaaa
+<<<
+HTTP/1.0 414 Request-URI Too Large
+Server: MyBigFatServer
+Mime-Version: 1.0
+Date: Fri, 27 Sep 2013 16:37:37 GMT
+Content-Type: text/html
+Content-Length: 139
+Expires: Fri, 27 Sep 2013 16:37:37 GMT
+
+<HTML><HEAD>
+<TITLE>Request-URI Too Large</TITLE>
+</HEAD><BODY>
+<H1>Request-URI Too Large</H1>
+The Request-URI is Too Large
+</BODY></HTML>
diff --git a/rust/htp/src/test/files/91-request-unexpected-body.t b/rust/htp/src/test/files/91-request-unexpected-body.t
new file mode 100644 (file)
index 0000000..358da12
--- /dev/null
@@ -0,0 +1,16 @@
+>>>\r
+POST / HTTP/1.1\r
+Host: localhost\r
+Content-Type: application/x-www-form-urlencoded\r
+\r
+login=foo&password=bar\r
+\r
+<<<\r
+HTTP/1.1 200 OK\r
+Content-Length: 0 \r
+\r
+\r
+>>>\r
+GET / HTTP/1.1\r
+Host: localhost\r
+\r
diff --git a/rust/htp/src/test/files/92-http_0_9-method_only.t b/rust/htp/src/test/files/92-http_0_9-method_only.t
new file mode 100644 (file)
index 0000000..5c7c9b2
--- /dev/null
@@ -0,0 +1,3 @@
+>>>\r
+GET /\r
+\r
diff --git a/rust/htp/src/test/files/93-compressed-response-deflateasgzip.t b/rust/htp/src/test/files/93-compressed-response-deflateasgzip.t
new file mode 100644 (file)
index 0000000..e6c2eb5
Binary files /dev/null and b/rust/htp/src/test/files/93-compressed-response-deflateasgzip.t differ
diff --git a/rust/htp/src/test/files/94-compressed-response-multiple.t b/rust/htp/src/test/files/94-compressed-response-multiple.t
new file mode 100644 (file)
index 0000000..4d0fdf7
Binary files /dev/null and b/rust/htp/src/test/files/94-compressed-response-multiple.t differ
diff --git a/rust/htp/src/test/files/95-compressed-response-gzipasdeflate.t b/rust/htp/src/test/files/95-compressed-response-gzipasdeflate.t
new file mode 100644 (file)
index 0000000..8076f83
Binary files /dev/null and b/rust/htp/src/test/files/95-compressed-response-gzipasdeflate.t differ
diff --git a/rust/htp/src/test/files/96-compressed-response-lzma.t b/rust/htp/src/test/files/96-compressed-response-lzma.t
new file mode 100644 (file)
index 0000000..a5ea306
Binary files /dev/null and b/rust/htp/src/test/files/96-compressed-response-lzma.t differ
diff --git a/rust/htp/src/test/files/97-requests-cut.t b/rust/htp/src/test/files/97-requests-cut.t
new file mode 100644 (file)
index 0000000..2d2da6c
--- /dev/null
@@ -0,0 +1,9 @@
+>>>\r
+GET /?p=%20 HTTP/1.1\r
+User-Agent: Mozilla\r
+\r
+G\r
+>>>\r
+ET /?p=%21 HTTP/1.1\r
+User-Agent: Mozilla
+
diff --git a/rust/htp/src/test/files/98-responses-cut.t b/rust/htp/src/test/files/98-responses-cut.t
new file mode 100644 (file)
index 0000000..5bd8164
--- /dev/null
@@ -0,0 +1,26 @@
+>>>\r
+GET /?p=%20 HTTP/1.1\r
+User-Agent: Mozilla\r
+\r
+GET /?p=%21 HTTP/1.1\r
+User-Agent: Mozilla\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Content-Length: 14\r
+\r
+Hello World!\r
+H\r
+<<<\r
+TTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Content-Length: 13\r
+\r
+Hello People!
\ No newline at end of file
diff --git a/rust/htp/src/test/files/99-get.t b/rust/htp/src/test/files/99-get.t
new file mode 100644 (file)
index 0000000..5c892a7
--- /dev/null
@@ -0,0 +1,15 @@
+>>>\r
+GET /%2e%2e/images.gif HTTP/1.1\r
+Host: www.ExAmPlE.cOM\r
+User-Agent: Mozilla\r
+\r
+\r
+<<<\r
+HTTP/1.0 200 OK\r
+Date: Mon, 31 Aug 2009 20:25:50 GMT\r
+Server: Apache\r
+Connection: close\r
+Content-Type: text/html\r
+Content-Length: 12\r
+\r
+Hello World!\r
diff --git a/rust/htp/src/test/files/anchor.empty b/rust/htp/src/test/files/anchor.empty
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/rust/htp/src/test/files/generate-gzip-tests.php b/rust/htp/src/test/files/generate-gzip-tests.php
new file mode 100755 (executable)
index 0000000..021c436
--- /dev/null
@@ -0,0 +1,322 @@
+#!/usr/bin/env php
+<?
+
+/*
+Copyright (c) 2009-2010 Open Information Security Foundation
+Copyright (c) 2010-2013 Qualys, Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+- Redistributions of source code must retain the above copyright
+  notice, this list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright
+  notice, this list of conditions and the following disclaimer in the
+  documentation and/or other materials provided with the distribution.
+
+- Neither the name of the Qualys, Inc. nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+/*
+
+ZLIB Compressed Data Format Specification version 3.3
+http://www.ietf.org/rfc/rfc1950.txt
+
+DEFLATE Compressed Data Format Specification version 1.3
+http://www.ietf.org/rfc/rfc1951.txt
+
+GZIP file format specification version 4.3
+http://www.ietf.org/rfc/rfc1952.txt
+
+*/
+
+class GzipTest {
+
+  private $compressionMethod = 0x08;
+
+  private $forcedFlags = false;
+
+  private $filename = false;
+  
+  private $comment = false;
+  
+  private $extra = false;
+  
+  private $textFlag = false;
+  
+  private $useHeaderCrc = false;
+  
+  private $headerCrc = false;
+  
+  private $crc32 = false;
+  
+  private $isize = false;
+  
+  private $data = "The five boxing wizards jump quickly.";
+  
+  private $xfl = 0;
+  
+  public function setCompressionMethod($m) {
+    $this->compressionMethod = $m;
+  }
+  
+  public function setCrc32($crc) {
+    $this->crc32 = $crc;
+  }
+  
+  public function setInputSize($len) {
+    $this->isize = $len;
+  }
+  
+  public function setXfl($xfl) {
+    $this->xfl = $xfl;
+  }
+  
+  public function setFilename($filename) {
+    $this->filename = $filename;
+  }
+  
+  public function setComment($comment) {
+    $this->comment = $comment;
+  }
+  
+  public function setExtra($extra) {
+    $this->extra = $extra;
+  }
+  
+  public function setTextFlag($b) {
+    $this->textFlag = $b;
+  }
+  
+  public function useHeaderCrc($b) {
+    $this->useHeaderCrc = $b;
+  }
+  
+  public function setHeaderCrc($crc) {
+    $this->headerCrc = $crc;
+  }
+  
+  public function setFlags($f) {
+    $this->forcedFlags = $f;
+  }
+  
+  public function getFlags() {
+    if ($this->forcedFlags !== false) {
+      return $this->forcedFlags;
+    }
+    
+    $flags = 0;
+    
+    // FTEXT
+    if ($this->textFlag) {
+      $flags = $flags | 0x01;
+    }
+    
+    // FHCRC
+    if ($this->useHeaderCrc) {
+      $flags = $flags | 0x02;
+    }
+    
+    // FEXTRA
+    if ($this->extra !== false) {
+      $flags = $flags | 0x04;
+    }
+    
+    // FNAME
+    if ($this->filename !== false) {
+      $flags = $flags | 0x08;
+    }
+    
+    // FCOMMENT
+    if ($this->comment !== false) {
+      $flags = $flags | 0x16;
+    }
+    
+    return $flags;
+  }
+  
+  public function setData($data) {
+    $this->data = $data;
+  }
+  
+  public function writeTo($filename) {
+    $fp = fopen($filename, "w+");
+    $this->write($fp);
+    fclose($fp);
+  }
+
+  public function write($fp) {
+    $header = "";
+    
+    // header (ID1 + ID2)
+    $header .= "\x1f\x8b";
+    
+    // compression method (CM)
+    $header .= pack("C", $this->compressionMethod);
+    
+    // flags (FLG)
+    $header .= pack("C", $this->getFlags());
+    
+    // mtime (MTIME)
+    $header .= "\x9c\x54\xf4\x50";
+    
+    // extra flags (XFL)
+    $header .= pack("C", $this->xfl);
+    
+    // operating system (OS)
+    $header .= "\xff";
+    
+    // FEXTRA
+    if ($this->extra !== false) {
+      $header .= pack("v", strlen($this->extra));
+      $header .= $this->extra;
+    }
+    
+    // FNAME
+    if ($this->filename !== false) {
+      $header .= $this->filename;
+      $header .= "\x00";
+    }
+    
+    // FCOMMENT
+    if ($this->comment !== false) {
+      $header .= $this->comment;
+      $header .= "\x00";
+    }
+    
+    fwrite($fp, $header);
+    
+    // FHCRC
+    if ($this->useHeaderCrc) {
+      if ($this->headerCrc !== false) {
+        // "The CRC16 consists of the two least significant bytes of the CRC32 [...]"
+        fwrite($fp, pack("v", crc32($header)));
+      } else {
+        fwrite($fp, pack("v", $this->headerCrc));
+      }
+    }
+    
+    // compressed blocks
+    $compressedData = gzcompress($this->data);
+    // The gzcompress() function does not produce output that's fully compatible with gzip,
+    // so we need to strip out the extra data: remove 2 bytes from the beginning
+    // (CMF and FLG) and 4 bytes from the end (Adler CRC).
+    $compressedData = substr($compressedData, 2, strlen($compressedData) - 6);
+    fwrite($fp, $compressedData);
+    
+    // CRC32
+    if ($this->crc32 === false) {
+      fwrite($fp, pack("V", crc32($this->data)));
+    } else {
+      fwrite($fp, pack("V", $this->crc32));
+    }
+    
+    // uncompressed size (ISIZE)
+    if ($this->isize === false) {
+      fwrite($fp, pack("V", strlen($this->data)));
+    } else {
+      fwrite($fp, pack("V", $this->isize));
+    }
+  }
+}
+
+// 01: minimal file
+$gz = new GzipTest();
+$gz->writeTo("gztest-01-minimal.gz");
+
+// 02: with FNAME
+$gz = new GzipTest();
+$gz->setFilename("file.txt");
+$gz->writeTo("gztest-02-fname.gz");
+
+// 03: with FCOMMENT
+$gz = new GzipTest();
+$gz->setComment("COMMENT");
+$gz->writeTo("gztest-03-fcomment.gz");
+
+// 04: with FHCRC
+$gz = new GzipTest();
+$gz->useHeaderCrc(true);
+$gz->writeTo("gztest-04-fhcrc.gz");
+
+// 05: with FEXTRA
+$gz = new GzipTest();
+$gz->setExtra("EXTRA");
+$gz->writeTo("gztest-05-fextra.gz");
+
+// 06: with FTEXT
+$gz = new GzipTest();
+$gz->setTextFlag(true);
+$gz->writeTo("gztest-06-ftext.gz");
+
+// 07: with FRESERVED1
+$gz = new GzipTest();
+$gz->setFlags($gz->getFlags() | 0x20);
+$gz->writeTo("gztest-07-freserved1.gz");
+
+// 08: with FRESERVED2
+$gz = new GzipTest();
+$gz->setFlags($gz->getFlags() | 0x40);
+$gz->writeTo("gztest-08-freserved2.gz");
+
+// 09: with FRESERVED3
+$gz = new GzipTest();
+$gz->setFlags($gz->getFlags() | 0x80);
+$gz->writeTo("gztest-09-freserved3.gz");
+
+// 10: Two parts (compressed streams) 
+$gz = new GzipTest();
+$fp = fopen("gztest-10-multipart.gz", "w+");
+$gz->setFilename("file1.txt");
+$gz->write($fp);
+$gz->setData("The quick brown fox jumps over the lazy dog.");
+$gz->setFilename("file2.txt");
+$gz->write($fp);
+fclose($fp);
+
+// 11: Invalid compression method
+$gz = new GzipTest();
+$gz->setCompressionMethod(0x07);
+$gz->writeTo("gztest-11-invalid-method.gz");
+
+// 12: Invalid CRC32
+$gz = new GzipTest();
+$gz->setCrc32(0xffffffff);
+$gz->writeTo("gztest-12-invalid-crc32.gz");
+
+// 13: Invalid ISIZE
+$gz = new GzipTest();
+$gz->setData("Grumpy Wizards make toxic brew for the Evil Queen and Jack.");
+$gz->setInputSize(0x10);
+$gz->writeTo("gztest-13-invalid-isize.gz");
+
+// 14: Invalid extra flags (XFL)
+$gz = new GzipTest();
+$gz->setXfl(0xff);
+$gz->writeTo("gztest-14-invalid-xfl.gz");
+
+// 15: Invalid header CRC (FHCRC)
+$gz = new GzipTest();
+$gz->useHeaderCrc(true);
+$gz->setHeaderCrc(0xffff);
+$gz->writeTo("gztest-15-invalid-fhcrc.gz");
+
+?>
diff --git a/rust/htp/src/test/files/gztest-01-minimal.gz b/rust/htp/src/test/files/gztest-01-minimal.gz
new file mode 100644 (file)
index 0000000..e82fcde
Binary files /dev/null and b/rust/htp/src/test/files/gztest-01-minimal.gz differ
diff --git a/rust/htp/src/test/files/gztest-02-fname.gz b/rust/htp/src/test/files/gztest-02-fname.gz
new file mode 100644 (file)
index 0000000..bb38b70
Binary files /dev/null and b/rust/htp/src/test/files/gztest-02-fname.gz differ
diff --git a/rust/htp/src/test/files/gztest-03-fcomment.gz b/rust/htp/src/test/files/gztest-03-fcomment.gz
new file mode 100644 (file)
index 0000000..fe55135
Binary files /dev/null and b/rust/htp/src/test/files/gztest-03-fcomment.gz differ
diff --git a/rust/htp/src/test/files/gztest-04-fhcrc.gz b/rust/htp/src/test/files/gztest-04-fhcrc.gz
new file mode 100644 (file)
index 0000000..cd0ce6b
Binary files /dev/null and b/rust/htp/src/test/files/gztest-04-fhcrc.gz differ
diff --git a/rust/htp/src/test/files/gztest-05-fextra.gz b/rust/htp/src/test/files/gztest-05-fextra.gz
new file mode 100644 (file)
index 0000000..72290b0
Binary files /dev/null and b/rust/htp/src/test/files/gztest-05-fextra.gz differ
diff --git a/rust/htp/src/test/files/gztest-06-ftext.gz b/rust/htp/src/test/files/gztest-06-ftext.gz
new file mode 100644 (file)
index 0000000..9d9aecc
Binary files /dev/null and b/rust/htp/src/test/files/gztest-06-ftext.gz differ
diff --git a/rust/htp/src/test/files/gztest-07-freserved1.gz b/rust/htp/src/test/files/gztest-07-freserved1.gz
new file mode 100644 (file)
index 0000000..bd365b5
Binary files /dev/null and b/rust/htp/src/test/files/gztest-07-freserved1.gz differ
diff --git a/rust/htp/src/test/files/gztest-08-freserved2.gz b/rust/htp/src/test/files/gztest-08-freserved2.gz
new file mode 100644 (file)
index 0000000..e240ec1
Binary files /dev/null and b/rust/htp/src/test/files/gztest-08-freserved2.gz differ
diff --git a/rust/htp/src/test/files/gztest-09-freserved3.gz b/rust/htp/src/test/files/gztest-09-freserved3.gz
new file mode 100644 (file)
index 0000000..4071cdc
Binary files /dev/null and b/rust/htp/src/test/files/gztest-09-freserved3.gz differ
diff --git a/rust/htp/src/test/files/gztest-10-multipart.gz b/rust/htp/src/test/files/gztest-10-multipart.gz
new file mode 100644 (file)
index 0000000..a2c0cd5
Binary files /dev/null and b/rust/htp/src/test/files/gztest-10-multipart.gz differ
diff --git a/rust/htp/src/test/files/gztest-11-invalid-method.gz b/rust/htp/src/test/files/gztest-11-invalid-method.gz
new file mode 100644 (file)
index 0000000..9c13768
Binary files /dev/null and b/rust/htp/src/test/files/gztest-11-invalid-method.gz differ
diff --git a/rust/htp/src/test/files/gztest-12-invalid-crc32.gz b/rust/htp/src/test/files/gztest-12-invalid-crc32.gz
new file mode 100644 (file)
index 0000000..1832ef8
Binary files /dev/null and b/rust/htp/src/test/files/gztest-12-invalid-crc32.gz differ
diff --git a/rust/htp/src/test/files/gztest-13-invalid-isize.gz b/rust/htp/src/test/files/gztest-13-invalid-isize.gz
new file mode 100644 (file)
index 0000000..55263bc
Binary files /dev/null and b/rust/htp/src/test/files/gztest-13-invalid-isize.gz differ
diff --git a/rust/htp/src/test/files/gztest-14-invalid-xfl.gz b/rust/htp/src/test/files/gztest-14-invalid-xfl.gz
new file mode 100644 (file)
index 0000000..a844957
Binary files /dev/null and b/rust/htp/src/test/files/gztest-14-invalid-xfl.gz differ
diff --git a/rust/htp/src/test/files/gztest-15-invalid-fhcrc.gz b/rust/htp/src/test/files/gztest-15-invalid-fhcrc.gz
new file mode 100644 (file)
index 0000000..b6fa5dd
Binary files /dev/null and b/rust/htp/src/test/files/gztest-15-invalid-fhcrc.gz differ
diff --git a/rust/htp/src/test/files/http-close-headers.t b/rust/htp/src/test/files/http-close-headers.t
new file mode 100644 (file)
index 0000000..e8afa09
--- /dev/null
@@ -0,0 +1,12 @@
+>>>
+GET / HTTP/1.1\r
+Host: 100.64.0.200\r
+Connection: keep-alive\r
+Accept-Encoding: gzip, deflate\r
+Accept: */*\r
+User-Agent: python-requests/2.21.0\r
+\r
+
+<<<
+HTTP/1.0 200 OK
+Server:ng1nx
diff --git a/rust/htp/src/test/files/http-evader-017.t b/rust/htp/src/test/files/http-evader-017.t
new file mode 100644 (file)
index 0000000..ed531c0
--- /dev/null
@@ -0,0 +1,23 @@
+>>>
+GET /chunked/eicar.txt/cr-size HTTP/1.1\r
+Host: evader.example.com\r
+\r
+
+<<<
+HTTP/1.1 200 ok\r
+Content-type: application/octet-stream\r
+Content-disposition: attachment; filename="eicar.txt"\r
+Transfer-Encoding: chunked\r
+Connection: close\r
+\r
+\rf\r
+X5O!P%@AP[4\PZX\r
+\rf\r
+54(P^)7CC)7}$EI\r
+\rf\r
+CAR-STANDARD-AN\r
+\rf\r
+TIVIRUS-TEST-FI\r
+\r8\r
+LE!$H+H*\r
+0\r
diff --git a/rust/htp/src/test/files/http-evader-018.t b/rust/htp/src/test/files/http-evader-018.t
new file mode 100644 (file)
index 0000000..db5a7a9
--- /dev/null
@@ -0,0 +1,28 @@
+>>>
+GET /chunked/eicar.txt/lf-size HTTP/1.1\r
+Host: evader.example.com\r
+\r
+
+<<<
+HTTP/1.1 200 ok\r
+Content-type: application/octet-stream\r
+Content-disposition: attachment; filename="eicar.txt"\r
+Transfer-Encoding: chunked\r
+Connection: close\r
+\r
+
+f\r
+X5O!P%@AP[4\PZX\r
+
+f\r
+54(P^)7CC)7}$EI\r
+
+f\r
+CAR-STANDARD-AN\r
+
+f\r
+TIVIRUS-TEST-FI\r
+
+8\r
+LE!$H+H*\r
+0\r
diff --git a/rust/htp/src/test/files/http-evader-044.t b/rust/htp/src/test/files/http-evader-044.t
new file mode 100644 (file)
index 0000000..d14489c
--- /dev/null
@@ -0,0 +1,13 @@
+>>>
+GET /chunked/eicar.txt/chunked,http10,do_clen HTTP/1.1\r
+Host: evader.example.com\r
+\r
+
+<<<
+HTTP/1.0 200 ok\r
+Content-type: application/octet-stream\r
+Content-disposition: attachment; filename="eicar.txt"\r
+Transfer-Encoding: chunked\r
+Connection: close\r
+\r
+X5O!P%@AP[4\PZX54(P^)7CC)7}$EICAR-STANDARD-ANTIVIRUS-TEST-FILE!$H+H*
\ No newline at end of file
diff --git a/rust/htp/src/test/files/http-evader-059.t b/rust/htp/src/test/files/http-evader-059.t
new file mode 100644 (file)
index 0000000..21d0506
--- /dev/null
@@ -0,0 +1,50 @@
+>>>
+GET /chunked/eicar.txt/chunkednl- HTTP/1.1\r
+Host: evader.example.com\r
+\r
+
+<<<
+HTTP/1.1 200 ok\r
+Content-type: application/octet-stream\r
+Yet-another-header: foo\r
+Content-disposition: attachment; filename="eicar.txt"\r
+Transfer-Encoding: chunked\r
\r
+Connection: close\r
+\r
+4\r
+X5O!\r
+4\r
+P%@A\r
+4\r
+P[4\\r
+4\r
+PZX5\r
+4\r
+4(P^\r
+4\r
+)7CC\r
+4\r
+)7}$\r
+4\r
+EICA\r
+4\r
+R-ST\r
+4\r
+ANDA\r
+4\r
+RD-A\r
+4\r
+NTIV\r
+4\r
+IRUS\r
+4\r
+-TES\r
+4\r
+T-FI\r
+4\r
+LE!$\r
+4\r
+H+H*\r
+0\r
+\r
diff --git a/rust/htp/src/test/files/http-evader-060.t b/rust/htp/src/test/files/http-evader-060.t
new file mode 100644 (file)
index 0000000..b4dd8f7
--- /dev/null
@@ -0,0 +1,51 @@
+>>>\r
+GET /chunked/eicar.txt/nl-nl-chunked HTTP/1.1\r
+Host: evader.example.com\r
+\r
+\r
+<<<\r
+HTTP/1.1 200 ok\r
+Content-type: application/octet-stream\r
+Yet-another-header: foo\r
+Content-disposition: attachment; filename="eicar.txt"\r
+Transfer-Encoding: \r
\r
+ chunked\r
+Connection: close\r
+\r
+4\r
+X5O!\r
+4\r
+P%@A\r
+4\r
+P[4\\r
+4\r
+PZX5\r
+4\r
+4(P^\r
+4\r
+)7CC\r
+4\r
+)7}$\r
+4\r
+EICA\r
+4\r
+R-ST\r
+4\r
+ANDA\r
+4\r
+RD-A\r
+4\r
+NTIV\r
+4\r
+IRUS\r
+4\r
+-TES\r
+4\r
+T-FI\r
+4\r
+LE!$\r
+4\r
+H+H*\r
+0\r
+\r
diff --git a/rust/htp/src/test/files/http-evader-061.t b/rust/htp/src/test/files/http-evader-061.t
new file mode 100644 (file)
index 0000000..63a77d0
--- /dev/null
@@ -0,0 +1,52 @@
+>>>\r
+GET /chunked/eicar.txt/nl-nl-chunked-nl- HTTP/1.1\r
+Host: evader.example.com\r
+\r
+\r
+<<<\r
+HTTP/1.1 200 ok\r
+Content-type: application/octet-stream\r
+Yet-another-header: foo\r
+Content-disposition: attachment; filename="eicar.txt"\r
+Transfer-Encoding: \r
\r
+ chunked \r
\r
+Connection: close\r
+\r
+4\r
+X5O!\r
+4\r
+P%@A\r
+4\r
+P[4\\r
+4\r
+PZX5\r
+4\r
+4(P^\r
+4\r
+)7CC\r
+4\r
+)7}$\r
+4\r
+EICA\r
+4\r
+R-ST\r
+4\r
+ANDA\r
+4\r
+RD-A\r
+4\r
+NTIV\r
+4\r
+IRUS\r
+4\r
+-TES\r
+4\r
+T-FI\r
+4\r
+LE!$\r
+4\r
+H+H*\r
+0\r
+\r
diff --git a/rust/htp/src/test/files/http-evader-078.t b/rust/htp/src/test/files/http-evader-078.t
new file mode 100644 (file)
index 0000000..ae61150
--- /dev/null
@@ -0,0 +1,13 @@
+>>>
+GET /chunked/eicar.txt/chunkedcr-,do_clen HTTP/1.1\r
+Host: evader.example.com\r
+\r
+
+<<<
+HTTP/1.1 200 ok\r
+Content-type: application/octet-stream\r
+Content-disposition: attachment; filename="eicar.txt"\r
+Transfer-Encoding: chunked\r \r
+Connection: close\r
+\r
+X5O!P%@AP[4\PZX54(P^)7CC)7}$EICAR-STANDARD-ANTIVIRUS-TEST-FILE!$H+H*
\ No newline at end of file
diff --git a/rust/htp/src/test/files/http-evader-118.t b/rust/htp/src/test/files/http-evader-118.t
new file mode 100644 (file)
index 0000000..2cbcd6c
Binary files /dev/null and b/rust/htp/src/test/files/http-evader-118.t differ
diff --git a/rust/htp/src/test/files/http-evader-130.t b/rust/htp/src/test/files/http-evader-130.t
new file mode 100644 (file)
index 0000000..6ea0730
Binary files /dev/null and b/rust/htp/src/test/files/http-evader-130.t differ
diff --git a/rust/htp/src/test/files/http-evader-195.t b/rust/htp/src/test/files/http-evader-195.t
new file mode 100644 (file)
index 0000000..78097e5
Binary files /dev/null and b/rust/htp/src/test/files/http-evader-195.t differ
diff --git a/rust/htp/src/test/files/http-evader-274.t b/rust/htp/src/test/files/http-evader-274.t
new file mode 100644 (file)
index 0000000..5045bea
--- /dev/null
@@ -0,0 +1,50 @@
+>>>
+GET /broken/eicar.txt/somehdr;space;chunked HTTP/1.1\r
+Host: evader.example.com\r
+\r
+
+<<<
+HTTP/1.1 200 ok\r
+Content-type: application/octet-stream\r
+Content-disposition: attachment; filename="eicar.txt"\r
+Connection: close\r
+X-Foo: bar\r
+ Transfer-Encoding: chunked\r
+Yet-another-header: foo\r
+\r
+4\r
+X5O!\r
+4\r
+P%@A\r
+4\r
+P[4\\r
+4\r
+PZX5\r
+4\r
+4(P^\r
+4\r
+)7CC\r
+4\r
+)7}$\r
+4\r
+EICA\r
+4\r
+R-ST\r
+4\r
+ANDA\r
+4\r
+RD-A\r
+4\r
+NTIV\r
+4\r
+IRUS\r
+4\r
+-TES\r
+4\r
+T-FI\r
+4\r
+LE!$\r
+4\r
+H+H*\r
+0\r
+\r
diff --git a/rust/htp/src/test/files/http-evader-284.t b/rust/htp/src/test/files/http-evader-284.t
new file mode 100644 (file)
index 0000000..02bf5d6
--- /dev/null
@@ -0,0 +1,50 @@
+>>>
+GET /broken/eicar.txt/cr;chunked HTTP/1.1\r
+Host: evader.example.com\r
+\r
+
+<<<
+HTTP/1.1 200 ok\r
+Content-type: application/octet-stream\r
+Content-disposition: attachment; filename="eicar.txt"\r
+Connection: close\r
+\r\r
+Transfer-Encoding: chunked\r
+Yet-another-header: foo\r
+\r
+4\r
+X5O!\r
+4\r
+P%@A\r
+4\r
+P[4\\r
+4\r
+PZX5\r
+4\r
+4(P^\r
+4\r
+)7CC\r
+4\r
+)7}$\r
+4\r
+EICA\r
+4\r
+R-ST\r
+4\r
+ANDA\r
+4\r
+RD-A\r
+4\r
+NTIV\r
+4\r
+IRUS\r
+4\r
+-TES\r
+4\r
+T-FI\r
+4\r
+LE!$\r
+4\r
+H+H*\r
+0\r
+\r
diff --git a/rust/htp/src/test/files/http-evader-286.t b/rust/htp/src/test/files/http-evader-286.t
new file mode 100644 (file)
index 0000000..fe450b9
--- /dev/null
@@ -0,0 +1,49 @@
+>>>
+GET /broken/eicar.txt/crcronly;chunked HTTP/1.1\r
+Host: evader.example.com\r
+\r
+
+<<<
+HTTP/1.1 200 ok\r
+Content-type: application/octet-stream\r
+Content-disposition: attachment; filename="eicar.txt"\r
+Connection: close\r
+\r\rTransfer-Encoding: chunked\r
+Yet-another-header: foo\r
+\r
+4\r
+X5O!\r
+4\r
+P%@A\r
+4\r
+P[4\\r
+4\r
+PZX5\r
+4\r
+4(P^\r
+4\r
+)7CC\r
+4\r
+)7}$\r
+4\r
+EICA\r
+4\r
+R-ST\r
+4\r
+ANDA\r
+4\r
+RD-A\r
+4\r
+NTIV\r
+4\r
+IRUS\r
+4\r
+-TES\r
+4\r
+T-FI\r
+4\r
+LE!$\r
+4\r
+H+H*\r
+0\r
+\r
diff --git a/rust/htp/src/test/files/http-evader-287.t b/rust/htp/src/test/files/http-evader-287.t
new file mode 100644 (file)
index 0000000..e221868
--- /dev/null
@@ -0,0 +1,49 @@
+>>>
+GET /broken/eicar.txt/cr-cronly;chunked HTTP/1.1\r
+Host: evader.example.com\r
+\r
+
+<<<
+HTTP/1.1 200 ok\r
+Content-type: application/octet-stream\r
+Content-disposition: attachment; filename="eicar.txt"\r
+Connection: close\r
+\r \rTransfer-Encoding: chunked\r
+Yet-another-header: foo\r
+\r
+4\r
+X5O!\r
+4\r
+P%@A\r
+4\r
+P[4\\r
+4\r
+PZX5\r
+4\r
+4(P^\r
+4\r
+)7CC\r
+4\r
+)7}$\r
+4\r
+EICA\r
+4\r
+R-ST\r
+4\r
+ANDA\r
+4\r
+RD-A\r
+4\r
+NTIV\r
+4\r
+IRUS\r
+4\r
+-TES\r
+4\r
+T-FI\r
+4\r
+LE!$\r
+4\r
+H+H*\r
+0\r
+\r
diff --git a/rust/htp/src/test/files/http-evader-297.t b/rust/htp/src/test/files/http-evader-297.t
new file mode 100644 (file)
index 0000000..7d74dcd
--- /dev/null
@@ -0,0 +1,49 @@
+>>>
+GET /broken/eicar.txt/te%5C015%5C040%3Achunked;do_chunked HTTP/1.1\r
+Host: evader.example.com\r
+\r
+
+<<<
+HTTP/1.1 200 ok\r
+Content-type: application/octet-stream\r
+Content-disposition: attachment; filename="eicar.txt"\r
+Connection: close\r
+Transfer-Encoding\r :chunked\r
+Yet-another-header: foo\r
+\r
+4\r
+X5O!\r
+4\r
+P%@A\r
+4\r
+P[4\\r
+4\r
+PZX5\r
+4\r
+4(P^\r
+4\r
+)7CC\r
+4\r
+)7}$\r
+4\r
+EICA\r
+4\r
+R-ST\r
+4\r
+ANDA\r
+4\r
+RD-A\r
+4\r
+NTIV\r
+4\r
+IRUS\r
+4\r
+-TES\r
+4\r
+T-FI\r
+4\r
+LE!$\r
+4\r
+H+H*\r
+0\r
+\r
diff --git a/rust/htp/src/test/files/http-evader-300.t b/rust/htp/src/test/files/http-evader-300.t
new file mode 100644 (file)
index 0000000..cb4ecb1
--- /dev/null
@@ -0,0 +1,52 @@
+>>>
+GET /broken/eicar.txt/te%5C015%5C012%5C040%5C015%5C012%5C040%3A%5C015%5C012%5C040chunked;do_chunked HTTP/1.1\r
+Host: evader.example.com\r
+\r
+
+<<<
+HTTP/1.1 200 ok\r
+Content-type: application/octet-stream\r
+Content-disposition: attachment; filename="eicar.txt"\r
+Connection: close\r
+Transfer-Encoding\r
\r
+ :\r
+ chunked\r
+Yet-another-header: foo\r
+\r
+4\r
+X5O!\r
+4\r
+P%@A\r
+4\r
+P[4\\r
+4\r
+PZX5\r
+4\r
+4(P^\r
+4\r
+)7CC\r
+4\r
+)7}$\r
+4\r
+EICA\r
+4\r
+R-ST\r
+4\r
+ANDA\r
+4\r
+RD-A\r
+4\r
+NTIV\r
+4\r
+IRUS\r
+4\r
+-TES\r
+4\r
+T-FI\r
+4\r
+LE!$\r
+4\r
+H+H*\r
+0\r
+\r
diff --git a/rust/htp/src/test/files/http-evader-303.t b/rust/htp/src/test/files/http-evader-303.t
new file mode 100644 (file)
index 0000000..81bf205
Binary files /dev/null and b/rust/htp/src/test/files/http-evader-303.t differ
diff --git a/rust/htp/src/test/files/http-evader-307.t b/rust/htp/src/test/files/http-evader-307.t
new file mode 100644 (file)
index 0000000..ab60170
Binary files /dev/null and b/rust/htp/src/test/files/http-evader-307.t differ
diff --git a/rust/htp/src/test/files/http-evader-318.t b/rust/htp/src/test/files/http-evader-318.t
new file mode 100644 (file)
index 0000000..aa99bca
Binary files /dev/null and b/rust/htp/src/test/files/http-evader-318.t differ
diff --git a/rust/htp/src/test/files/http-evader-320.t b/rust/htp/src/test/files/http-evader-320.t
new file mode 100644 (file)
index 0000000..f9a8b5d
Binary files /dev/null and b/rust/htp/src/test/files/http-evader-320.t differ
diff --git a/rust/htp/src/test/files/http-evader-321.t b/rust/htp/src/test/files/http-evader-321.t
new file mode 100644 (file)
index 0000000..80f21a8
Binary files /dev/null and b/rust/htp/src/test/files/http-evader-321.t differ
diff --git a/rust/htp/src/test/files/http-evader-390.t b/rust/htp/src/test/files/http-evader-390.t
new file mode 100644 (file)
index 0000000..476c9a4
Binary files /dev/null and b/rust/htp/src/test/files/http-evader-390.t differ
diff --git a/rust/htp/src/test/files/http-evader-402.t b/rust/htp/src/test/files/http-evader-402.t
new file mode 100644 (file)
index 0000000..1c52b78
--- /dev/null
@@ -0,0 +1,44 @@
+>>>
+GET /broken/eicar.txt/chunked;cr-no-crlf;end-crlflf HTTP/1.1\r
+Host: evader.example.com\r
+\r
+
+<<<
+HTTP/1.1 200 ok\rContent-type: application/octet-stream\rContent-disposition: attachment; filename="eicar.txt"\rConnection: close\rTransfer-Encoding: chunked\rYet-another-header: foo\r
+
+4\r
+X5O!\r
+4\r
+P%@A\r
+4\r
+P[4\\r
+4\r
+PZX5\r
+4\r
+4(P^\r
+4\r
+)7CC\r
+4\r
+)7}$\r
+4\r
+EICA\r
+4\r
+R-ST\r
+4\r
+ANDA\r
+4\r
+RD-A\r
+4\r
+NTIV\r
+4\r
+IRUS\r
+4\r
+-TES\r
+4\r
+T-FI\r
+4\r
+LE!$\r
+4\r
+H+H*\r
+0\r
+\r
diff --git a/rust/htp/src/test/files/http-evader-405.t b/rust/htp/src/test/files/http-evader-405.t
new file mode 100644 (file)
index 0000000..4c41e43
--- /dev/null
@@ -0,0 +1,49 @@
+>>>
+GET /broken/eicar.txt/chunked;lfcr-no-crlf;end-crlfcrlf HTTP/1.1\r
+Host: evader.example.com\r
+\r
+
+<<<
+HTTP/1.1 200 ok
+\rContent-type: application/octet-stream
+\rContent-disposition: attachment; filename="eicar.txt"
+\rConnection: close
+\rTransfer-Encoding: chunked
+\rYet-another-header: foo\r
+\r
+4\r
+X5O!\r
+4\r
+P%@A\r
+4\r
+P[4\\r
+4\r
+PZX5\r
+4\r
+4(P^\r
+4\r
+)7CC\r
+4\r
+)7}$\r
+4\r
+EICA\r
+4\r
+R-ST\r
+4\r
+ANDA\r
+4\r
+RD-A\r
+4\r
+NTIV\r
+4\r
+IRUS\r
+4\r
+-TES\r
+4\r
+T-FI\r
+4\r
+LE!$\r
+4\r
+H+H*\r
+0\r
+\r
diff --git a/rust/htp/src/test/files/http-evader-411.t b/rust/htp/src/test/files/http-evader-411.t
new file mode 100644 (file)
index 0000000..e24e4d3
--- /dev/null
@@ -0,0 +1,49 @@
+>>>
+GET /broken/eicar.txt/end-lfcrcrlf;chunked HTTP/1.1\r
+Host: evader.example.com\r
+\r
+
+<<<
+HTTP/1.1 200 ok\r
+Content-type: application/octet-stream\r
+Content-disposition: attachment; filename="eicar.txt"\r
+Connection: close\r
+Transfer-Encoding: chunked\r
+Yet-another-header: foo
+\r\r
+4\r
+X5O!\r
+4\r
+P%@A\r
+4\r
+P[4\\r
+4\r
+PZX5\r
+4\r
+4(P^\r
+4\r
+)7CC\r
+4\r
+)7}$\r
+4\r
+EICA\r
+4\r
+R-ST\r
+4\r
+ANDA\r
+4\r
+RD-A\r
+4\r
+NTIV\r
+4\r
+IRUS\r
+4\r
+-TES\r
+4\r
+T-FI\r
+4\r
+LE!$\r
+4\r
+H+H*\r
+0\r
+\r
diff --git a/rust/htp/src/test/files/http-evader-416.t b/rust/htp/src/test/files/http-evader-416.t
new file mode 100644 (file)
index 0000000..c1cb2bd
--- /dev/null
@@ -0,0 +1,14 @@
+>>>
+GET /broken/eicar.txt/end-lf%5C040lf HTTP/1.1\r
+Host: evader.example.com\r
+\r
+
+<<<
+HTTP/1.1 200 ok\r
+Content-type: application/octet-stream\r
+Content-disposition: attachment; filename="eicar.txt"\r
+Connection: close\r
+Yet-another-header: foo\r
+Content-length: 68
+X5O!P%@AP[4\PZX54(P^)7CC)7}$EICAR-STANDARD-ANTIVIRUS-TEST-FILE!$H+H*
diff --git a/rust/htp/src/test/files/http-evader-419.t b/rust/htp/src/test/files/http-evader-419.t
new file mode 100644 (file)
index 0000000..34c310f
--- /dev/null
@@ -0,0 +1,49 @@
+>>>
+GET /broken/eicar.txt/chunked;end-lf%5C040lf HTTP/1.1\r
+Host: evader.example.com\r
+\r
+
+<<<
+HTTP/1.1 200 ok\r
+Content-type: application/octet-stream\r
+Content-disposition: attachment; filename="eicar.txt"\r
+Connection: close\r
+Transfer-Encoding: chunked\r
+Yet-another-header: foo
+4\r
+X5O!\r
+4\r
+P%@A\r
+4\r
+P[4\\r
+4\r
+PZX5\r
+4\r
+4(P^\r
+4\r
+)7CC\r
+4\r
+)7}$\r
+4\r
+EICA\r
+4\r
+R-ST\r
+4\r
+ANDA\r
+4\r
+RD-A\r
+4\r
+NTIV\r
+4\r
+IRUS\r
+4\r
+-TES\r
+4\r
+T-FI\r
+4\r
+LE!$\r
+4\r
+H+H*\r
+0\r
+\r
diff --git a/rust/htp/src/test/files/http-evader-423.t b/rust/htp/src/test/files/http-evader-423.t
new file mode 100644 (file)
index 0000000..0f3ad0d
Binary files /dev/null and b/rust/htp/src/test/files/http-evader-423.t differ
diff --git a/rust/htp/src/test/files/http-start-from-response.t b/rust/htp/src/test/files/http-start-from-response.t
new file mode 100644 (file)
index 0000000..2763ea7
--- /dev/null
@@ -0,0 +1,41 @@
+<<<
+HTTP/1.1 200 OK\r
+Date: Wed, 04 Jul 2018 09:35:14 GMT\r
+Server: Apache/2.4.33 (Fedora) OpenSSL/1.1.0h-fips\r
+Last-Modified: Tue, 03 Jul 2018 10:54:38 GMT\r
+ETag: "b-5701623f27308"\r
+Accept-Ranges: bytes\r
+Content-Length: 11\r
+Keep-Alive: timeout=5, max=100\r
+Connection: Keep-Alive\r
+Content-Type: text/html; charset=UTF-8\r
+\r
+Hello GCX!
+
+>>>
+GET /favicon.ico HTTP/1.1\r
+Host: 172.16.9.189\r
+Connection: keep-alive\r
+User-Agent: Mozilla/5.0 (X11; Fedora; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36\r
+Accept: image/webp,image/apng,image/*,*/*;q=0.8\r
+Referer: http://172.16.9.189/\r
+Accept-Encoding: gzip, deflate\r
+Accept-Language: en-US,en;q=0.9\r
+\r
+
+<<<
+HTTP/1.1 404 Not Found\r
+Date: Wed, 04 Jul 2018 09:35:14 GMT\r
+Server: Apache/2.4.33 (Fedora) OpenSSL/1.1.0h-fips\r
+Content-Length: 209\r
+Keep-Alive: timeout=5, max=99\r
+Connection: Keep-Alive\r
+Content-Type: text/html; charset=iso-8859-1\r
+\r
+<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
+<html><head>
+<title>404 Not Found</title>
+</head><body>
+<h1>Not Found</h1>
+<p>The requested URL /favicon.ico was not found on this server.</p>
+</body></html>
diff --git a/rust/htp/src/test/gunzip.rs b/rust/htp/src/test/gunzip.rs
new file mode 100644 (file)
index 0000000..7f9dbac
--- /dev/null
@@ -0,0 +1,199 @@
+#![allow(non_snake_case)]
+use crate::{
+    bstr::*,
+    config::{Config, HtpServerPersonality},
+    connection_parser::{ConnectionParser, ParserData},
+    decompressors::{Decompressor, HtpContentEncoding},
+    transaction::Transaction,
+    HtpStatus,
+};
+use std::{env, path::PathBuf};
+
+#[derive(Debug)]
+struct Test {
+    connp: ConnectionParser,
+    expected: Bstr,
+    decompressor: Decompressor,
+}
+
+enum TestError {
+    Io(()),
+    Htp(()),
+}
+
+fn GUnzip_decompressor_callback(tx: &mut Transaction, d: &ParserData) -> HtpStatus {
+    tx.set_user_data(Box::new(Bstr::from(d.as_slice())));
+    HtpStatus::OK
+}
+
+impl Test {
+    fn new() -> Self {
+        let mut cfg = Config::default();
+        cfg.set_server_personality(HtpServerPersonality::APACHE_2)
+            .unwrap();
+        // The default bomb limit may be slow in some development environments causing tests to fail.
+        cfg.compression_options.set_time_limit(u32::MAX);
+        let cfg = Box::leak(Box::new(cfg));
+        let mut connp = ConnectionParser::new(cfg);
+
+        let expected = Bstr::from("The five boxing wizards jump quickly.");
+        let tx = connp.request_mut().unwrap() as *mut Transaction;
+        Test {
+            connp,
+            expected,
+            decompressor: Decompressor::new_with_callback(
+                HtpContentEncoding::Gzip,
+                Box::new(move |data: Option<&[u8]>| {
+                    let data = ParserData::from(data);
+                    GUnzip_decompressor_callback(unsafe { &mut *tx }, &data);
+                    Ok(data.len())
+                }),
+                Default::default(),
+            )
+            .unwrap(),
+        }
+    }
+
+    fn run(&mut self, filename: &str) -> Result<(), TestError> {
+        let mut filepath = if let Ok(dir) = std::env::var("srcdir") {
+            PathBuf::from(dir)
+        } else {
+            let mut base = PathBuf::from(
+                env::var("CARGO_MANIFEST_DIR").expect("Could not determine test file directory"),
+            );
+            base.push("src");
+            base.push("test");
+            base.push("files");
+            base
+        };
+        filepath.push(filename);
+
+        let data = std::fs::read(filepath).map_err(|_| TestError::Io(()))?;
+        self.decompressor
+            .decompress(&data)
+            .map(|_| ())
+            .map_err(|_| TestError::Htp(()))
+    }
+}
+
+#[test]
+fn GUnzip_Minimal() {
+    let mut t = Test::new();
+    assert!(t.run("gztest-01-minimal.gz").is_ok());
+    let request_tx = t.connp.request().unwrap();
+    let output = request_tx.user_data::<Bstr>().unwrap();
+    assert_eq!(*output, t.expected);
+}
+
+#[test]
+fn GUnzip_FNAME() {
+    let mut t = Test::new();
+    assert!(t.run("gztest-02-fname.gz").is_ok());
+    let request_tx = t.connp.request().unwrap();
+    let output = request_tx.user_data::<Bstr>().unwrap();
+    assert_eq!(*output, t.expected);
+}
+
+#[test]
+fn GUnzip_FEXTRA() {
+    let mut t = Test::new();
+    assert!(t.run("gztest-05-fextra.gz").is_ok());
+    let request_tx = t.connp.request().unwrap();
+    let output = request_tx.user_data::<Bstr>().unwrap();
+    assert_eq!(*output, t.expected);
+}
+
+#[test]
+fn GUnzip_FTEXT() {
+    let mut t = Test::new();
+    assert!(t.run("gztest-06-ftext.gz").is_ok());
+    let request_tx = t.connp.request().unwrap();
+    let output = request_tx.user_data::<Bstr>().unwrap();
+    assert_eq!(*output, t.expected);
+}
+
+#[test]
+fn GUnzip_Multipart() {
+    let mut t = Test::new();
+    assert!(t.run("gztest-10-multipart.gz").is_ok());
+    let request_tx = t.connp.request().unwrap();
+    let output = request_tx.user_data::<Bstr>().unwrap();
+    assert_eq!(*output, t.expected);
+}
+
+#[test]
+fn GUnzip_InvalidExtraFlags() {
+    let mut t = Test::new();
+    assert!(t.run("gztest-14-invalid-xfl.gz").is_ok());
+    let request_tx = t.connp.request().unwrap();
+    let output = request_tx.user_data::<Bstr>().unwrap();
+    assert_eq!(*output, t.expected);
+}
+
+#[test]
+fn GUnzip_InvalidHeaderCrc() {
+    let mut t = Test::new();
+    assert!(t.run("gztest-15-invalid-fhcrc.gz").is_ok());
+    let request_tx = t.connp.request().unwrap();
+    let output = request_tx.user_data::<Bstr>().unwrap();
+    assert_eq!(*output, t.expected);
+}
+
+/*
+// These tests were disabled in libhtp
+#[test]
+fn GUnzip_FCOMMENT() {
+    let mut t = Test::new();
+    assert!(t.run("gztest-03-fcomment.gz").is_ok());
+    assert_eq!(t.output, t.expected);
+}
+
+#[test]
+fn GUnzip_FHCRC() {
+    let mut t = Test::new();
+    assert!(t.run("gztest-04-fhcrc.gz").is_ok());
+    assert_eq!(t.output, t.expected);
+}
+
+#[test]
+fn GUnzip_FRESERVED1() {
+    let mut t = Test::new();
+    assert!(t.run("gztest-07-freserved1.gz").is_ok());
+    assert_eq!(t.output, t.expected);
+}
+
+#[test]
+fn GUnzip_FRESERVED2() {
+    let mut t = Test::new();
+    assert!(t.run("gztest-08-freserved2.gz").is_ok());
+    assert_eq!(t.output, t.expected);
+}
+
+#[test]
+fn GUnzip_FRESERVED3() {
+    let mut t = Test::new();
+    assert!(t.run("gztest-09-freserved3.gz").is_ok());
+    assert_eq!(t.output, t.expected);
+}
+
+#[test]
+fn GUnzip_InvalidMethod() {
+    let mut t = Test::new();
+    assert!(t.run("gztest-11-invalid-method.gz.gz").is_ok());
+    assert_eq!(t.output, t.expected);
+}
+
+#[test]
+fn GUnzip_InvalidCrc() {
+    let mut t = Test::new();
+    assert!(t.run("gztest-12-invalid-crc32.gz").is_ok());
+    assert_eq!(t.output, t.expected);
+}
+
+#[test]
+fn GUnzip_InvalidInputSize() {
+    let mut t = Test::new();
+    assert!(t.run("gztest-13-invalid-isize.gz").is_ok());
+    assert_eq!(t.output, t.expected);
+}
+*/
diff --git a/rust/htp/src/test/hybrid.rs b/rust/htp/src/test/hybrid.rs
new file mode 100644 (file)
index 0000000..4c2c9fa
--- /dev/null
@@ -0,0 +1,817 @@
+#![allow(non_snake_case)]
+#![allow(non_camel_case_types)]
+use crate::{
+    bstr::Bstr,
+    config::{Config, HtpServerPersonality},
+    connection_parser::{ConnectionParser, ParserData},
+    error::Result,
+    transaction::{Header, HtpProtocol, HtpResponseNumber, Transaction},
+    uri::Uri,
+    HtpStatus,
+};
+use base64::{engine::general_purpose::STANDARD, Engine};
+use std::net::{IpAddr, Ipv4Addr};
+
+use super::common::{assert_header_eq, assert_request_header_eq, assert_response_header_eq};
+
+struct HybridParsing_Get_User_Data {
+    // Request callback indicators.
+    callback_REQUEST_START_invoked: i32,
+    callback_REQUEST_LINE_invoked: i32,
+    callback_REQUEST_HEADERS_invoked: i32,
+    callback_REQUEST_COMPLETE_invoked: i32,
+
+    // Response callback indicators.
+    callback_RESPONSE_START_invoked: i32,
+    callback_RESPONSE_LINE_invoked: i32,
+    callback_RESPONSE_HEADERS_invoked: i32,
+    callback_RESPONSE_COMPLETE_invoked: i32,
+
+    // Transaction callback indicators.
+    callback_TRANSACTION_COMPLETE_invoked: i32,
+
+    // Response body handling fields.
+    response_body_chunks_seen: i32,
+    response_body_correctly_received: i32,
+}
+
+impl HybridParsing_Get_User_Data {
+    fn new() -> Self {
+        HybridParsing_Get_User_Data {
+            callback_REQUEST_START_invoked: 0,
+            callback_REQUEST_LINE_invoked: 0,
+            callback_REQUEST_HEADERS_invoked: 0,
+            callback_REQUEST_COMPLETE_invoked: 0,
+            callback_RESPONSE_START_invoked: 0,
+            callback_RESPONSE_LINE_invoked: 0,
+            callback_RESPONSE_HEADERS_invoked: 0,
+            callback_RESPONSE_COMPLETE_invoked: 0,
+            callback_TRANSACTION_COMPLETE_invoked: 0,
+            response_body_chunks_seen: 0,
+            response_body_correctly_received: 0,
+        }
+    }
+}
+
+fn HybridParsing_Get_Callback_REQUEST_START(tx: &mut Transaction) -> Result<()> {
+    let user_data = tx.user_data_mut::<HybridParsing_Get_User_Data>().unwrap();
+    user_data.callback_REQUEST_START_invoked += 1;
+    Ok(())
+}
+
+fn HybridParsing_Get_Callback_REQUEST_LINE(tx: &mut Transaction) -> Result<()> {
+    let user_data = tx.user_data_mut::<HybridParsing_Get_User_Data>().unwrap();
+    user_data.callback_REQUEST_LINE_invoked += 1;
+    Ok(())
+}
+
+fn HybridParsing_Get_Callback_REQUEST_HEADERS(tx: &mut Transaction) -> Result<()> {
+    let user_data = tx.user_data_mut::<HybridParsing_Get_User_Data>().unwrap();
+    user_data.callback_REQUEST_HEADERS_invoked += 1;
+    Ok(())
+}
+
+fn HybridParsing_Get_Callback_REQUEST_COMPLETE(tx: &mut Transaction) -> Result<()> {
+    let user_data = tx.user_data_mut::<HybridParsing_Get_User_Data>().unwrap();
+    user_data.callback_REQUEST_COMPLETE_invoked += 1;
+    Ok(())
+}
+
+fn HybridParsing_Get_Callback_RESPONSE_START(tx: &mut Transaction) -> Result<()> {
+    let user_data = tx.user_data_mut::<HybridParsing_Get_User_Data>().unwrap();
+    user_data.callback_RESPONSE_START_invoked += 1;
+    Ok(())
+}
+
+fn HybridParsing_Get_Callback_RESPONSE_LINE(tx: &mut Transaction) -> Result<()> {
+    let user_data = tx.user_data_mut::<HybridParsing_Get_User_Data>().unwrap();
+    user_data.callback_RESPONSE_LINE_invoked += 1;
+    Ok(())
+}
+
+fn HybridParsing_Get_Callback_RESPONSE_HEADERS(tx: &mut Transaction) -> Result<()> {
+    let user_data = tx.user_data_mut::<HybridParsing_Get_User_Data>().unwrap();
+    user_data.callback_RESPONSE_HEADERS_invoked += 1;
+    Ok(())
+}
+
+fn HybridParsing_Get_Callback_RESPONSE_COMPLETE(tx: &mut Transaction) -> Result<()> {
+    let user_data = tx.user_data_mut::<HybridParsing_Get_User_Data>().unwrap();
+    user_data.callback_RESPONSE_COMPLETE_invoked += 1;
+    Ok(())
+}
+
+fn HybridParsing_Get_Callback_TRANSACTION_COMPLETE(tx: &mut Transaction) -> Result<()> {
+    let user_data = tx.user_data_mut::<HybridParsing_Get_User_Data>().unwrap();
+    user_data.callback_TRANSACTION_COMPLETE_invoked += 1;
+    Ok(())
+}
+
+fn HybridParsing_Get_Callback_RESPONSE_BODY_DATA(
+    tx: &mut Transaction, d: &ParserData,
+) -> Result<()> {
+    let user_data = tx.user_data_mut::<HybridParsing_Get_User_Data>().unwrap();
+
+    // Don't do anything if in errored state.
+    if user_data.response_body_correctly_received == -1 {
+        return Err(HtpStatus::ERROR);
+    }
+
+    let data = d.as_slice();
+    match user_data.response_body_chunks_seen {
+        0 => {
+            if data == b"<h1>Hello" {
+                user_data.response_body_chunks_seen += 1;
+            } else {
+                eprintln!("Mismatch in 1st chunk");
+                user_data.response_body_correctly_received = -1;
+            }
+        }
+        1 => {
+            if data == b" " {
+                user_data.response_body_chunks_seen += 1;
+            } else {
+                eprintln!("Mismatch in 2nd chunk");
+                user_data.response_body_correctly_received = -1;
+            }
+        }
+        2 => {
+            if data == b"World!</h1>" {
+                user_data.response_body_chunks_seen += 1;
+                user_data.response_body_correctly_received = 1;
+            } else {
+                eprintln!("Mismatch in 3rd chunk");
+                user_data.response_body_correctly_received = -1;
+            }
+        }
+        _ => {
+            eprintln!("Seen more than 3 chunks");
+            user_data.response_body_correctly_received = -1;
+        }
+    }
+    Ok(())
+}
+
+// Set one request header.
+macro_rules! tx_set_header {
+    ($headers:expr, $name:expr, $value:expr) => {
+        $headers
+            .elements
+            .push(Header::new($name.into(), $value.into()))
+    };
+}
+
+fn TestConfig() -> Config {
+    let mut cfg = Config::default();
+    cfg.set_server_personality(HtpServerPersonality::APACHE_2)
+        .unwrap();
+    cfg
+}
+
+fn register_user_callbacks(cfg: &mut Config) {
+    // Request callbacks
+    cfg.register_request_start(HybridParsing_Get_Callback_REQUEST_START);
+    cfg.register_request_line(HybridParsing_Get_Callback_REQUEST_LINE);
+    cfg.register_request_headers(HybridParsing_Get_Callback_REQUEST_HEADERS);
+    cfg.register_request_complete(HybridParsing_Get_Callback_REQUEST_COMPLETE);
+
+    // Response callbacks
+    cfg.register_response_start(HybridParsing_Get_Callback_RESPONSE_START);
+    cfg.register_response_line(HybridParsing_Get_Callback_RESPONSE_LINE);
+    cfg.register_response_headers(HybridParsing_Get_Callback_RESPONSE_HEADERS);
+    cfg.register_response_body_data(HybridParsing_Get_Callback_RESPONSE_BODY_DATA);
+    cfg.register_response_complete(HybridParsing_Get_Callback_RESPONSE_COMPLETE);
+
+    // Transaction callbacks
+    cfg.register_transaction_complete(HybridParsing_Get_Callback_TRANSACTION_COMPLETE);
+}
+
+struct HybridParsingTest {
+    connp: ConnectionParser,
+}
+
+impl HybridParsingTest {
+    fn new(cfg: Config) -> Self {
+        let cfg = Box::leak(Box::new(cfg));
+        let mut connp = ConnectionParser::new(cfg);
+        connp.open(
+            Some(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1))),
+            Some(32768),
+            Some(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1))),
+            Some(80),
+            None,
+        );
+
+        HybridParsingTest { connp }
+    }
+}
+
+/// Test hybrid mode with one complete GET transaction; request then response
+/// with a body. Most features are tested, including query string parameters and callbacks.
+#[test]
+fn GetTest() {
+    let mut cfg = TestConfig();
+    // Register callbacks
+    register_user_callbacks(&mut cfg);
+    let mut t = HybridParsingTest::new(cfg);
+    let tx = t.connp.request_mut().unwrap();
+
+    // Configure user data and callbacks
+    tx.set_user_data(Box::new(HybridParsing_Get_User_Data::new()));
+    // We should be operating on the same transaction throughout
+    let tx_id = tx.index;
+
+    // Make dummy parser data to satisfy callbacks
+    let mut p = ParserData::from(b"" as &[u8]);
+
+    // Request begins
+    t.connp.state_request_start().unwrap();
+    let tx = t.connp.tx(tx_id).unwrap();
+    let user_data = tx.user_data::<HybridParsing_Get_User_Data>().unwrap();
+    assert_eq!(1, user_data.callback_REQUEST_START_invoked);
+
+    // Request line data
+    t.connp
+        .parse_request_line(b"GET /?p=1&q=2 HTTP/1.1")
+        .unwrap();
+
+    // Request line complete
+    t.connp.state_request_line().unwrap();
+    let tx = t.connp.tx(tx_id).unwrap();
+    let user_data = tx.user_data::<HybridParsing_Get_User_Data>().unwrap();
+    assert_eq!(1, user_data.callback_REQUEST_LINE_invoked);
+
+    // Check request line data
+    let tx = t.connp.tx_mut(tx_id).unwrap();
+    assert!(tx.request_method.as_ref().unwrap().eq_slice("GET"));
+    assert!(tx.request_uri.as_ref().unwrap().eq_slice("/?p=1&q=2"));
+    assert!(tx.request_protocol.as_ref().unwrap().eq_slice("HTTP/1.1"));
+    let parsed_uri = tx.parsed_uri.as_ref().unwrap();
+    assert!(parsed_uri.path.as_ref().unwrap().eq_slice("/"));
+    assert!(parsed_uri.query.as_ref().unwrap().eq_slice("p=1&q=2"));
+
+    // Request headers
+    tx_set_header!(tx.request_headers, "Host", "www.example.com");
+    tx_set_header!(tx.request_headers, "Connection", "keep-alive");
+    tx_set_header!(tx.request_headers, "User-Agent", "Mozilla/5.0");
+
+    // Request headers complete
+    t.connp.state_request_headers(&mut p).unwrap();
+
+    // Check headers
+    let tx = t.connp.tx(tx_id).unwrap();
+    let user_data = tx.user_data::<HybridParsing_Get_User_Data>().unwrap();
+    assert_eq!(1, user_data.callback_REQUEST_HEADERS_invoked);
+
+    let tx = t.connp.tx(tx_id).unwrap();
+    assert_request_header_eq!(tx, "host", "www.example.com");
+    assert_request_header_eq!(tx, "connection", "keep-alive");
+    assert_request_header_eq!(tx, "user-agent", "Mozilla/5.0");
+
+    // Request complete
+    t.connp.state_request_complete(&mut p).unwrap();
+    let tx = t.connp.tx(tx_id).unwrap();
+    let user_data = tx.user_data::<HybridParsing_Get_User_Data>().unwrap();
+    assert_eq!(1, user_data.callback_REQUEST_COMPLETE_invoked);
+
+    // Response begins
+    t.connp.state_response_start().unwrap();
+    let tx = t.connp.tx(tx_id).unwrap();
+    let user_data = tx.user_data::<HybridParsing_Get_User_Data>().unwrap();
+    assert_eq!(1, user_data.callback_RESPONSE_START_invoked);
+
+    // Response line data
+    t.connp.parse_response_line(b"HTTP/1.1 200 OK").unwrap();
+    let tx = t.connp.tx(tx_id).unwrap();
+    assert!(tx.response_protocol.as_ref().unwrap().eq_slice("HTTP/1.1"));
+    assert_eq!(HtpProtocol::V1_1, tx.response_protocol_number);
+    assert!(tx.response_status.as_ref().unwrap().eq_slice("200"));
+    assert!(tx.response_status_number.eq_num(200));
+    assert!(tx.response_message.as_ref().unwrap().eq_slice("OK"));
+
+    // Response line complete
+    t.connp.state_response_line().unwrap();
+    let tx = t.connp.tx(tx_id).unwrap();
+    let user_data = tx.user_data::<HybridParsing_Get_User_Data>().unwrap();
+
+    assert_eq!(1, user_data.callback_RESPONSE_LINE_invoked);
+
+    // Response header data
+    let tx = t.connp.tx_mut(tx_id).unwrap();
+    tx_set_header!(tx.response_headers, "Content-Type", "text/html");
+    tx_set_header!(tx.response_headers, "Server", "Apache");
+
+    // Response headers complete
+    t.connp.state_response_headers(&mut p).unwrap();
+    let tx = t.connp.tx(tx_id).unwrap();
+    let user_data = tx.user_data::<HybridParsing_Get_User_Data>().unwrap();
+    assert_eq!(1, user_data.callback_RESPONSE_HEADERS_invoked);
+
+    // Check response headers
+    let tx = t.connp.tx(tx_id).unwrap();
+    assert_response_header_eq!(tx, "content-type", "text/html");
+    assert_response_header_eq!(tx, "server", "Apache");
+
+    // Response body data
+    t.connp.response_body_data(Some(b"<h1>Hello")).unwrap();
+    t.connp.response_body_data(Some(b" ")).unwrap();
+    t.connp.response_body_data(Some(b"World!</h1>")).unwrap();
+    let tx = t.connp.tx(tx_id).unwrap();
+    let user_data = tx.user_data::<HybridParsing_Get_User_Data>().unwrap();
+    assert_eq!(1, user_data.response_body_correctly_received);
+
+    let tx = t.connp.tx_mut(tx_id).unwrap();
+    tx_set_header!(tx.response_headers, "Content-Type", "text/html");
+    tx_set_header!(tx.response_headers, "Server", "Apache");
+
+    // Check trailing response headers
+    assert_response_header_eq!(tx, "content-type", "text/html");
+    assert_response_header_eq!(tx, "server", "Apache");
+
+    t.connp.state_response_complete(&mut p).unwrap();
+    let tx = t.connp.tx(tx_id).unwrap();
+    let user_data = tx.user_data::<HybridParsing_Get_User_Data>().unwrap();
+    assert_eq!(1, user_data.callback_RESPONSE_COMPLETE_invoked);
+}
+
+/// Use a POST request in order to test request body processing and parameter parsing.
+#[test]
+fn PostUrlecodedTest() {
+    let mut t = HybridParsingTest::new(TestConfig());
+    let tx_id = t.connp.request().unwrap().index;
+
+    // Make dummy parser data to satisfy callbacks
+    let mut p = ParserData::from(b"" as &[u8]);
+
+    // Request begins
+    t.connp.state_request_start().unwrap();
+
+    // Request line data
+    t.connp.parse_request_line(b"POST / HTTP/1.1").unwrap();
+
+    // Request line complete
+    t.connp.state_request_line().unwrap();
+
+    // Configure headers to trigger the URLENCODED parser
+    let tx = t.connp.tx_mut(tx_id).unwrap();
+    tx_set_header!(
+        tx.request_headers,
+        "Content-Type",
+        "application/x-www-form-urlencoded"
+    );
+    tx_set_header!(tx.request_headers, "Content-Length", "7");
+
+    // Request headers complete
+    t.connp.state_request_headers(&mut p).unwrap();
+
+    // Send request body
+    t.connp.request_body_data(Some(b"p=1")).unwrap();
+    t.connp.request_body_data(Some(b"")).unwrap();
+    t.connp.request_body_data(Some(b"&")).unwrap();
+    t.connp.request_body_data(Some(b"q=2")).unwrap();
+
+    let tx = t.connp.tx_mut(tx_id).unwrap();
+    tx_set_header!(tx.request_headers, "Host", "www.example.com");
+    tx_set_header!(tx.request_headers, "Connection", "keep-alive");
+    tx_set_header!(tx.request_headers, "User-Agent", "Mozilla/5.0");
+
+    assert_request_header_eq!(tx, "host", "www.example.com");
+    assert_request_header_eq!(tx, "connection", "keep-alive");
+    assert_request_header_eq!(tx, "user-agent", "Mozilla/5.0");
+
+    // Request complete
+    t.connp.state_request_complete(&mut p).unwrap();
+}
+
+/// Test with a compressed response body and decompression enabled.
+#[test]
+fn CompressedResponse() {
+    let mut t = HybridParsingTest::new(TestConfig());
+    let tx_id = t.connp.request().unwrap().index;
+
+    // Make dummy parser data to satisfy callbacks
+    let mut p = ParserData::from(b"" as &[u8]);
+
+    t.connp.state_request_start().unwrap();
+
+    t.connp.parse_request_line(b"GET / HTTP/1.1").unwrap();
+
+    t.connp.state_request_line().unwrap();
+    t.connp.state_request_headers(&mut p).unwrap();
+    t.connp.state_request_complete(&mut p).unwrap();
+
+    t.connp.state_response_start().unwrap();
+
+    t.connp.parse_response_line(b"HTTP/1.1 200 OK").unwrap();
+    let tx = t.connp.tx_mut(tx_id).unwrap();
+    tx_set_header!(tx.response_headers, "Content-Encoding", "gzip");
+    tx_set_header!(tx.response_headers, "Content-Length", "187");
+
+    t.connp.state_response_headers(&mut p).unwrap();
+
+    let RESPONSE: &[u8] =
+        b"H4sIAAAAAAAAAG2PwQ6CMBBE73xFU++tXk2pASliAiEhPegRYUOJYEktEP5eqB6dy2ZnJ5O3LJFZ\
+      yj2WiCBah7zKVPBMT1AjCf2gTWnabmH0e/AY/QXDPLqj8HLO07zw8S52wkiKm1zXvRPeeg//2lbX\
+      kwpQrauxh5dFqnyj3uVYgJJCxD5W1g5HSud5Jo3WTQek0mR8UgNlDYZOLcz0ZMuH3y+YKzDAaMDJ\
+      SrihOVL32QceVXUy4QAAAA==";
+
+    let body = Bstr::from(STANDARD.decode(RESPONSE).unwrap());
+
+    t.connp.response_body_data(Some(body.as_slice())).unwrap();
+
+    t.connp.state_response_complete(&mut p).unwrap();
+
+    let tx = t.connp.tx(tx_id).unwrap();
+    assert_eq!(187, tx.response_message_len);
+    assert_eq!(225, tx.response_entity_len);
+}
+
+#[test]
+fn ParamCaseSensitivity() {
+    let mut t = HybridParsingTest::new(TestConfig());
+
+    // Request begins
+    t.connp.state_request_start().unwrap();
+
+    // Request line data
+    t.connp
+        .parse_request_line(b"GET /?p=1&Q=2 HTTP/1.1")
+        .unwrap();
+
+    // Request line complete
+    t.connp.state_request_line().unwrap();
+}
+
+/// Use a POST request in order to test request body processing and parameter
+/// parsing. In hybrid mode, we expect that the body arrives to us dechunked.
+#[test]
+fn PostUrlecodedChunked() {
+    let mut t = HybridParsingTest::new(TestConfig());
+    let tx_id = t.connp.request().unwrap().index;
+
+    // Make dummy parser data to satisfy callbacks
+    let mut p = ParserData::from(b"" as &[u8]);
+
+    // Request begins.
+    t.connp.state_request_start().unwrap();
+
+    // Request line data.
+    t.connp.parse_request_line(b"POST / HTTP/1.1").unwrap();
+    t.connp.state_request_line().unwrap();
+
+    // Configure headers to trigger the URLENCODED parser.
+    let tx = t.connp.tx_mut(tx_id).unwrap();
+    tx_set_header!(
+        tx.request_headers,
+        "Content-Type",
+        "application/x-www-form-urlencoded"
+    );
+    tx_set_header!(tx.request_headers, "Transfer-Encoding", "chunked");
+
+    // Request headers complete.
+    t.connp.state_request_headers(&mut p).unwrap();
+
+    // Send request body.
+    t.connp.request_body_data(Some(b"p=1")).unwrap();
+    t.connp.request_body_data(Some(b"&")).unwrap();
+    t.connp.request_body_data(Some(b"q=2")).unwrap();
+
+    // Request complete.
+    t.connp.state_request_complete(&mut p).unwrap();
+}
+
+#[test]
+fn RequestLineParsing1() {
+    let mut t = HybridParsingTest::new(TestConfig());
+    let tx_id = t.connp.request().unwrap().index;
+
+    // Request begins
+    t.connp.state_request_start().unwrap();
+
+    // Request line data
+    t.connp
+        .parse_request_line(b"GET /?p=1&q=2 HTTP/1.0")
+        .unwrap();
+
+    // Request line complete
+    t.connp.state_request_line().unwrap();
+
+    let tx = t.connp.tx(tx_id).unwrap();
+    assert!(tx.request_method.as_ref().unwrap().eq_slice("GET"));
+    assert!(tx.request_uri.as_ref().unwrap().eq_slice("/?p=1&q=2"));
+    assert!(tx.request_protocol.as_ref().unwrap().eq_slice("HTTP/1.0"));
+    let parsed_uri = tx.parsed_uri.as_ref().unwrap();
+    assert!(parsed_uri.query.as_ref().unwrap().eq_slice("p=1&q=2"));
+}
+
+#[test]
+fn RequestLineParsing2() {
+    let mut t = HybridParsingTest::new(TestConfig());
+    let tx_id = t.connp.request().unwrap().index;
+
+    // Feed data to the parser.
+    t.connp.state_request_start().unwrap();
+    t.connp.parse_request_line(b"GET /").unwrap();
+    t.connp.state_request_line().unwrap();
+
+    // Check the results now.
+    let tx = t.connp.tx(tx_id).unwrap();
+    assert!(tx.request_method.as_ref().unwrap().eq_slice("GET"));
+    assert!(tx.is_protocol_0_9);
+    assert_eq!(HtpProtocol::V0_9, tx.request_protocol_number);
+    assert!(tx.request_protocol.is_none());
+    assert!(tx.request_uri.as_ref().unwrap().eq_slice("/"));
+}
+
+#[test]
+fn RequestLineParsing3() {
+    let mut t = HybridParsingTest::new(TestConfig());
+    let tx_id = t.connp.request().unwrap().index;
+
+    // Feed data to the parser.
+    t.connp.state_request_start().unwrap();
+    t.connp.parse_request_line(b"GET / HTTP  / 01.1").unwrap();
+    t.connp.state_request_line().unwrap();
+
+    // Check the results now.
+    let tx = t.connp.tx(tx_id).unwrap();
+    assert!(tx.request_method.as_ref().unwrap().eq_slice("GET"));
+    assert_eq!(HtpProtocol::V1_1, tx.request_protocol_number);
+    assert!(tx
+        .request_protocol
+        .as_ref()
+        .unwrap()
+        .eq_slice("HTTP  / 01.1"));
+    assert!(tx.request_uri.as_ref().unwrap().eq_slice("/"));
+}
+
+#[test]
+fn RequestLineParsing4() {
+    let mut t = HybridParsingTest::new(TestConfig());
+    let tx_id = t.connp.request().unwrap().index;
+
+    // Feed data to the parser.
+    t.connp.state_request_start().unwrap();
+    t.connp.parse_request_line(b"GET / HTTP  / 01.10").unwrap();
+    t.connp.state_request_line().unwrap();
+
+    // Check the results now.
+    let tx = t.connp.tx(tx_id).unwrap();
+    assert!(tx.request_method.as_ref().unwrap().eq_slice("GET"));
+    assert_eq!(HtpProtocol::Invalid, tx.request_protocol_number);
+    assert!(tx
+        .request_protocol
+        .as_ref()
+        .unwrap()
+        .eq_slice("HTTP  / 01.10"));
+    assert!(tx.request_uri.as_ref().unwrap().eq_slice("/"));
+}
+
+#[test]
+fn RequestLineParsing5() {
+    let mut cfg = TestConfig();
+    cfg.set_allow_space_uri(true);
+    let mut t = HybridParsingTest::new(cfg);
+    let tx_id = t.connp.request().unwrap().index;
+
+    // Feed data to the parser.
+    t.connp.state_request_start().unwrap();
+    t.connp.parse_request_line(b"GET / HTTP  / 01.10").unwrap();
+    t.connp.state_request_line().unwrap();
+
+    // Check the results now.
+    let tx = t.connp.tx(tx_id).unwrap();
+    assert!(tx.request_method.as_ref().unwrap().eq_slice("GET"));
+    assert_eq!(HtpProtocol::Invalid, tx.request_protocol_number);
+    assert!(tx.request_protocol.as_ref().unwrap().eq_slice("01.10"));
+    assert!(tx.request_uri.as_ref().unwrap().eq_slice("/ HTTP  /"));
+}
+
+#[test]
+fn RequestLineParsing6() {
+    let mut cfg = TestConfig();
+    cfg.set_allow_space_uri(true);
+    let mut t = HybridParsingTest::new(cfg);
+    let tx_id = t.connp.request().unwrap().index;
+
+    // Feed data to the parser.
+    t.connp.state_request_start().unwrap();
+    // Test the parser's "found bad chars" path
+    t.connp
+        .parse_request_line(b"GET\t/\tHTTP\t\t/\t01.10")
+        .unwrap();
+    t.connp.state_request_line().unwrap();
+
+    // Check the results now.
+    let tx = t.connp.tx(tx_id).unwrap();
+    assert!(tx.request_method.as_ref().unwrap().eq_slice("GET"));
+    assert_eq!(HtpProtocol::Invalid, tx.request_protocol_number);
+    assert!(tx.request_protocol.as_ref().unwrap().eq_slice("01.10"));
+    assert!(tx.request_uri.as_ref().unwrap().eq_slice("/\tHTTP\t\t/"));
+}
+
+#[test]
+fn ParsedUriSupplied() {
+    let mut t = HybridParsingTest::new(TestConfig());
+    let tx_id = t.connp.request().unwrap().index;
+
+    // Feed data to the parser.
+    t.connp.state_request_start().unwrap();
+    t.connp
+        .parse_request_line(b"GET /?p=1&q=2 HTTP/1.0")
+        .unwrap();
+
+    let tx = t.connp.tx_mut(tx_id).unwrap();
+    let u = Uri {
+        path: Some(Bstr::from("/123")),
+        ..Default::default()
+    };
+    tx.parsed_uri = Some(u);
+    t.connp.state_request_line().unwrap();
+
+    // Check the results now.
+    let tx = t.connp.tx(tx_id).unwrap();
+    assert!(tx.request_method.as_ref().unwrap().eq_slice("GET"));
+    assert_eq!(HtpProtocol::V1_0, tx.request_protocol_number);
+    assert!(tx.request_uri.as_ref().unwrap().eq_slice("/?p=1&q=2"));
+    let parsed_uri = tx.parsed_uri.as_ref().unwrap();
+    assert!(parsed_uri.path.as_ref().unwrap().eq_slice("/123"));
+}
+
+#[test]
+fn DoubleEncodedUriPath() {
+    let mut cfg = TestConfig();
+    cfg.set_double_decode_normalized_path(true);
+    let mut t = HybridParsingTest::new(cfg);
+    // Feed data to the parser.
+
+    t.connp.state_request_start().unwrap();
+    t.connp.parse_request_line(b"GET /%2500 HTTP/1.0").unwrap();
+    t.connp.state_request_line().unwrap();
+
+    // Check the results now.
+
+    let tx = t.connp.request().unwrap();
+    assert!(tx.request_method.as_ref().unwrap().eq_slice("GET"));
+    assert_eq!(HtpProtocol::V1_0, tx.request_protocol_number);
+    assert!(tx.request_uri.as_ref().unwrap().eq_slice("/%2500"));
+    let parsed_uri = tx.parsed_uri.as_ref().unwrap();
+    assert!(parsed_uri.path.as_ref().unwrap().eq_slice("/%00"));
+    assert!(tx.complete_normalized_uri.as_ref().unwrap().eq_slice("/\0"));
+}
+
+#[test]
+fn DoubleEncodedUriQuery() {
+    let mut cfg = TestConfig();
+    cfg.set_double_decode_normalized_query(true);
+    let mut t = HybridParsingTest::new(cfg);
+    // Feed data to the parser.
+
+    t.connp.state_request_start().unwrap();
+    t.connp
+        .parse_request_line(b"GET /?a=%2500 HTTP/1.0")
+        .unwrap();
+    t.connp.state_request_line().unwrap();
+
+    // Check the results now.
+
+    let tx = t.connp.request().unwrap();
+    assert!(tx.request_method.as_ref().unwrap().eq_slice("GET"));
+    assert_eq!(HtpProtocol::V1_0, tx.request_protocol_number);
+    assert!(tx.request_uri.as_ref().unwrap().eq_slice("/?a=%2500"));
+    let parsed_uri = tx.parsed_uri.as_ref().unwrap();
+    assert!(parsed_uri.path.as_ref().unwrap().eq_slice("/"));
+    assert!(parsed_uri.query.as_ref().unwrap().eq_slice("a=%2500"));
+    assert!(tx
+        .complete_normalized_uri
+        .as_ref()
+        .unwrap()
+        .eq_slice("/?a=\0"));
+}
+
+/// Test hybrid mode with one complete GET transaction; request then response
+/// with no body. Used to crash in htp_connp_close().
+#[test]
+fn TestRepeatCallbacks() {
+    let mut cfg = TestConfig();
+    // Request callbacks
+    register_user_callbacks(&mut cfg);
+    let mut t = HybridParsingTest::new(cfg);
+
+    let tx_id = t.connp.request().unwrap().index;
+
+    // Configure user data and callbacks
+    let tx = t.connp.tx_mut(tx_id).unwrap();
+    tx.set_user_data(Box::new(HybridParsing_Get_User_Data::new()));
+
+    // Make dummy parser data to satisfy callbacks
+    let mut p = ParserData::from(b"" as &[u8]);
+
+    // Request begins
+    t.connp.state_request_start().unwrap();
+    let tx = t.connp.tx(tx_id).unwrap();
+    let user_data = tx.user_data::<HybridParsing_Get_User_Data>().unwrap();
+    assert_eq!(1, user_data.callback_REQUEST_START_invoked);
+
+    // Request line data
+    t.connp.parse_request_line(b"GET / HTTP/1.0").unwrap();
+
+    // Request line complete
+    t.connp.state_request_line().unwrap();
+    let tx = t.connp.tx(tx_id).unwrap();
+    let user_data = tx.user_data::<HybridParsing_Get_User_Data>().unwrap();
+    assert_eq!(1, user_data.callback_REQUEST_LINE_invoked);
+
+    let tx = t.connp.tx(tx_id).unwrap();
+    // Check request line data
+    assert!(tx.request_method.as_ref().unwrap().eq_slice("GET"));
+    assert!(tx.request_uri.as_ref().unwrap().eq_slice("/"));
+    assert!(tx.request_protocol.as_ref().unwrap().eq_slice("HTTP/1.0"));
+    let parsed_uri = tx.parsed_uri.as_ref().unwrap();
+    assert!(parsed_uri.path.as_ref().unwrap().eq_slice("/"));
+
+    // Request headers complete
+    t.connp.state_request_headers(&mut p).unwrap();
+    let tx = t.connp.tx(tx_id).unwrap();
+    let user_data = tx.user_data::<HybridParsing_Get_User_Data>().unwrap();
+    assert_eq!(1, user_data.callback_REQUEST_HEADERS_invoked);
+
+    // Request complete
+    t.connp.state_request_complete(&mut p).unwrap();
+    let tx = t.connp.tx(tx_id).unwrap();
+    let user_data = tx.user_data::<HybridParsing_Get_User_Data>().unwrap();
+    assert_eq!(1, user_data.callback_REQUEST_COMPLETE_invoked);
+
+    // Response begins
+    t.connp.state_response_start().unwrap();
+    let tx = t.connp.tx(tx_id).unwrap();
+    let user_data = tx.user_data::<HybridParsing_Get_User_Data>().unwrap();
+    assert_eq!(1, user_data.callback_RESPONSE_START_invoked);
+
+    // Response line data
+    t.connp.parse_response_line(b"HTTP/1.1 200 OK\r\n").unwrap();
+
+    // Response line complete
+    t.connp.state_response_line().unwrap();
+    let tx = t.connp.tx(tx_id).unwrap();
+    let user_data = tx.user_data::<HybridParsing_Get_User_Data>().unwrap();
+    assert_eq!(1, user_data.callback_RESPONSE_LINE_invoked);
+
+    // Response headers complete
+    t.connp.state_response_headers(&mut p).unwrap();
+    let tx = t.connp.tx(tx_id).unwrap();
+    let user_data = tx.user_data::<HybridParsing_Get_User_Data>().unwrap();
+    assert_eq!(1, user_data.callback_RESPONSE_HEADERS_invoked);
+
+    // Response complete
+    t.connp.state_response_complete(&mut p).unwrap();
+    let tx = t.connp.tx(tx_id).unwrap();
+    let user_data = tx.user_data::<HybridParsing_Get_User_Data>().unwrap();
+    assert_eq!(1, user_data.callback_REQUEST_START_invoked);
+    assert_eq!(1, user_data.callback_REQUEST_LINE_invoked);
+    assert_eq!(1, user_data.callback_REQUEST_HEADERS_invoked);
+    assert_eq!(1, user_data.callback_REQUEST_COMPLETE_invoked);
+    assert_eq!(1, user_data.callback_RESPONSE_START_invoked);
+    assert_eq!(1, user_data.callback_RESPONSE_LINE_invoked);
+    assert_eq!(1, user_data.callback_RESPONSE_HEADERS_invoked);
+    assert_eq!(1, user_data.callback_RESPONSE_COMPLETE_invoked);
+    assert_eq!(1, user_data.callback_TRANSACTION_COMPLETE_invoked);
+}
+
+/// Try response line with missing response code and message
+#[test]
+fn ResponseLineIncomplete() {
+    let mut t = HybridParsingTest::new(TestConfig());
+
+    // Make dummy parser data to satisfy callbacks
+    let mut p = ParserData::from(b"" as &[u8]);
+
+    t.connp.state_response_start().unwrap();
+    t.connp.parse_response_line(b"HTTP/1.1").unwrap();
+    let tx = t.connp.response().unwrap();
+    assert!(tx.response_protocol.as_ref().unwrap().eq_slice("HTTP/1.1"));
+    assert_eq!(HtpProtocol::V1_1, tx.response_protocol_number);
+    assert!(tx.response_status.is_none());
+    assert_eq!(HtpResponseNumber::Invalid, tx.response_status_number);
+    assert!(tx.response_message.is_none());
+    t.connp.state_response_complete(&mut p).unwrap();
+}
+
+/// Try response line with missing response message
+#[test]
+fn ResponseLineIncomplete1() {
+    let mut t = HybridParsingTest::new(TestConfig());
+
+    // Make dummy parser data to satisfy callbacks
+    let mut p = ParserData::from(b"" as &[u8]);
+
+    t.connp.state_response_start().unwrap();
+    t.connp.parse_response_line(b"HTTP/1.1 200").unwrap();
+    let tx = t.connp.response().unwrap();
+    assert!(tx.response_protocol.as_ref().unwrap().eq_slice("HTTP/1.1"));
+    assert_eq!(HtpProtocol::V1_1, tx.response_protocol_number);
+    assert!(tx.response_status.as_ref().unwrap().eq_slice("200"));
+    assert!(tx.response_status_number.eq_num(200));
+    assert!(tx.response_message.is_none());
+    t.connp.state_response_complete(&mut p).unwrap();
+}
diff --git a/rust/htp/src/test/main.rs b/rust/htp/src/test/main.rs
new file mode 100644 (file)
index 0000000..5e6b9d3
--- /dev/null
@@ -0,0 +1,2937 @@
+#![allow(non_snake_case)]
+use crate::{
+    bstr::Bstr,
+    config::HtpServerPersonality,
+    connection::ConnectionFlags,
+    connection_parser::ParserData,
+    error::Result,
+    log::HtpLogCode,
+    transaction::{
+        HtpAuthType, HtpProtocol, HtpRequestProgress, HtpResponseNumber, HtpResponseProgress,
+        HtpTransferCoding, Transaction,
+    },
+    util::{FlagOperations, HtpFlags},
+};
+
+use super::common::{
+    assert_evader_chunked, assert_evader_request, assert_evader_response, assert_header_eq,
+    assert_request_header_eq, assert_response_header_eq, assert_response_header_flag_contains,
+};
+use super::common::{MainUserData, Test, TestConfig};
+
+use std::iter::IntoIterator;
+
+#[test]
+fn AdHoc() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("00-adhoc.t").is_ok());
+}
+
+#[test]
+fn Get() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("01-get.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert!(tx.request_method.as_ref().unwrap().eq_slice("GET"));
+    assert!(tx.request_uri.as_ref().unwrap().eq_slice("/?p=%20"));
+
+    assert!(tx
+        .parsed_uri
+        .as_ref()
+        .unwrap()
+        .query
+        .as_ref()
+        .unwrap()
+        .eq_slice("p=%20"));
+}
+
+#[test]
+fn GetSlice() {
+    let mut t = Test::new(TestConfig());
+    assert!(t
+        .run_slice(
+            b">>>
+GET /?p=%20 HTTP/1.0
+User-Agent: Mozilla
+
+
+<<<
+HTTP/1.0 200 OK
+Date: Mon, 31 Aug 2009 20:25:50 GMT
+Server: Apache
+Connection: close
+Content-Type: text/html
+Content-Length: 12
+
+Hello World!"
+        )
+        .is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert!(tx.request_method.as_ref().unwrap().eq_slice("GET"));
+    assert!(tx.request_uri.as_ref().unwrap().eq_slice("/?p=%20"));
+
+    assert!(tx
+        .parsed_uri
+        .as_ref()
+        .unwrap()
+        .query
+        .as_ref()
+        .unwrap()
+        .eq_slice("p=%20"));
+}
+
+#[test]
+fn GetEncodedRelPath() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("99-get.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert!(tx.request_method.as_ref().unwrap().eq_slice("GET"));
+    assert!(tx
+        .request_hostname
+        .as_ref()
+        .unwrap()
+        .eq_slice("www.example.com"));
+    assert!(tx
+        .parsed_uri
+        .as_ref()
+        .unwrap()
+        .path
+        .as_ref()
+        .unwrap()
+        .eq_slice("/images.gif"));
+}
+
+#[test]
+fn ApacheHeaderParsing() {
+    let mut t = Test::new(TestConfig());
+
+    assert!(t.run_file("02-header-test-apache2.t").is_ok());
+
+    let tx = t.connp.tx(0).expect("expected tx to exist");
+
+    let actual: Vec<(&[u8], &[u8])> = (&tx.request_headers)
+        .into_iter()
+        .map(|val| (val.name.as_slice(), val.value.as_slice()))
+        .collect();
+
+    let expected: Vec<(&[u8], &[u8])> = [
+        ("Invalid-Folding", "1"),
+        ("Valid-Folding", "2 2"),
+        ("Normal-Header", "3"),
+        ("Invalid Header Name", "4"),
+        ("Same-Name-Headers", "5, 6"),
+        ("Empty-Value-Header", ""),
+        ("", "8, "),
+        ("Header-With-LWS-After", "9"),
+        ("Header-With-NUL", "BEFORE\0AFTER"),
+    ]
+    .iter()
+    .map(|(key, val)| (key.as_bytes(), val.as_bytes()))
+    .collect();
+    assert_eq!(
+        actual,
+        expected,
+        "{:?} != {:?}",
+        actual
+            .clone()
+            .into_iter()
+            .map(|(key, val)| (
+                String::from_utf8_lossy(key).to_string(),
+                String::from_utf8_lossy(val).to_string()
+            ))
+            .collect::<Vec<(String, String)>>(),
+        expected
+            .clone()
+            .into_iter()
+            .map(|(key, val)| (
+                String::from_utf8_lossy(key).to_string(),
+                String::from_utf8_lossy(val).to_string()
+            ))
+            .collect::<Vec<(String, String)>>(),
+    );
+}
+
+#[test]
+fn PostUrlencoded() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("03-post-urlencoded.t").is_ok());
+
+    assert_eq!(2, t.connp.tx_size());
+
+    // Transaction 1
+    let tx = t.connp.tx(0).unwrap();
+
+    assert_eq!(tx.request_progress, HtpRequestProgress::COMPLETE);
+    assert_eq!(tx.response_progress, HtpResponseProgress::COMPLETE);
+
+    assert_response_header_eq!(tx, "Server", "Apache");
+
+    // Transaction 2
+    let tx2 = t.connp.tx(1).unwrap();
+
+    assert_eq!(tx2.request_progress, HtpRequestProgress::COMPLETE);
+    assert_eq!(tx2.response_progress, HtpResponseProgress::COMPLETE);
+
+    assert_response_header_eq!(tx2, "Server", "Apache");
+}
+
+#[test]
+fn PostUrlencodedChunked() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("04-post-urlencoded-chunked.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert_eq!(25, tx.request_message_len);
+    assert_eq!(12, tx.request_entity_len);
+}
+
+#[test]
+fn Expect() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("05-expect.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    // The interim header from the 100 response should not be among the final headers.
+    assert!(tx.request_headers.get_nocase_nozero("Header1").is_none());
+}
+
+#[test]
+fn UriNormal() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("06-uri-normal.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let _tx = t.connp.tx(0).unwrap();
+}
+
+#[test]
+fn PipelinedConn() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("07-pipelined-connection.t").is_ok());
+
+    assert_eq!(2, t.connp.tx_size());
+
+    assert!(t.connp.conn.flags.is_set(ConnectionFlags::PIPELINED));
+
+    let _tx = t.connp.tx(0).unwrap();
+}
+
+#[test]
+fn NotPipelinedConn() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("08-not-pipelined-connection.t").is_ok());
+
+    assert_eq!(2, t.connp.tx_size());
+
+    assert!(!t.connp.conn.flags.is_set(ConnectionFlags::PIPELINED));
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert!(!tx.flags.is_set(HtpFlags::MULTI_PACKET_HEAD));
+}
+
+#[test]
+fn MultiPacketRequest() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("09-multi-packet-request-head.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert!(tx.flags.is_set(HtpFlags::MULTI_PACKET_HEAD));
+}
+
+#[test]
+fn HeaderHostParsing() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("10-host-in-headers.t").is_ok());
+    assert_eq!(4, t.connp.tx_size());
+
+    let tx1 = t.connp.tx(0).unwrap();
+
+    assert!(tx1
+        .request_hostname
+        .as_ref()
+        .unwrap()
+        .eq_slice("www.example.com"));
+
+    let tx2 = t.connp.tx(1).unwrap();
+
+    assert!(tx2
+        .request_hostname
+        .as_ref()
+        .unwrap()
+        .eq_slice("www.example.com."));
+
+    let tx3 = t.connp.tx(2).unwrap();
+
+    assert!(tx3
+        .request_hostname
+        .as_ref()
+        .unwrap()
+        .eq_slice("www.example.com"));
+
+    let tx4 = t.connp.tx(3).unwrap();
+
+    assert!(tx4
+        .request_hostname
+        .as_ref()
+        .unwrap()
+        .eq_slice("www.example.com"));
+}
+
+#[test]
+fn ResponseWithoutContentLength() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("11-response-stream-closure.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert!(tx.is_complete());
+}
+
+#[test]
+fn FailedConnectRequest() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("12-connect-request.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert!(tx.is_complete());
+    assert!(tx.request_method.as_ref().unwrap().eq_slice("CONNECT"));
+    assert!(tx
+        .response_content_type
+        .as_ref()
+        .unwrap()
+        .eq_slice("text/html"));
+    assert!(tx
+        .response_message
+        .as_ref()
+        .unwrap()
+        .eq_slice("Method Not Allowed"));
+    assert!(tx.response_status_number.eq_num(405));
+}
+
+#[test]
+fn CompressedResponseContentType() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("13-compressed-response-gzip-ct.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+    assert_eq!(187, tx.response_message_len);
+    assert_eq!(225, tx.response_entity_len);
+    assert!(tx
+        .response_message
+        .as_ref()
+        .unwrap()
+        .eq_slice("Moved Temporarily"));
+}
+
+#[test]
+fn CompressedResponseChunked() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("14-compressed-response-gzip-chunked.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert!(tx.is_complete());
+
+    assert_eq!(28261, tx.response_message_len);
+
+    assert_eq!(159_590, tx.response_entity_len);
+}
+
+#[test]
+fn SuccessfulConnectRequest() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("15-connect-complete.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    // TODO: Update the test_run_file() function to provide better
+    //       simulation of real traffic. At the moment, it does not
+    //       invoke inbound parsing after outbound parsing returns
+    //       HTP_DATA_OTHER, which is why the check below fails.
+    //assert!(tx.is_complete());
+
+    assert!(tx.request_method.as_ref().unwrap().eq_slice("CONNECT"));
+
+    assert!(tx.response_status_number.eq_num(200));
+}
+
+#[test]
+fn ConnectRequestWithExtraData() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("16-connect-extra.t").is_ok());
+
+    assert_eq!(2, t.connp.tx_size());
+
+    let tx1 = t.connp.tx(0).unwrap();
+
+    assert!(tx1.is_complete());
+    assert!(tx1
+        .response_content_type
+        .as_ref()
+        .unwrap()
+        .eq_slice("text/html"));
+
+    let tx2 = t.connp.tx(1).unwrap();
+
+    assert!(tx2.is_complete());
+}
+
+#[test]
+fn Multipart() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("17-multipart-1.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert!(tx.is_complete());
+}
+
+#[test]
+fn CompressedResponseDeflate() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("18-compressed-response-deflate.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert!(tx.is_complete());
+
+    assert_eq!(755, tx.response_message_len);
+
+    assert_eq!(1433, tx.response_entity_len);
+}
+
+#[test]
+fn UrlEncoded() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("19-urlencoded-test.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert!(tx.is_complete());
+
+    assert!(tx.request_method.as_ref().unwrap().eq_slice("POST"));
+    assert!(tx.request_uri.as_ref().unwrap().eq_slice("/?p=1&q=2"));
+}
+
+#[test]
+fn AmbiguousHost() {
+    let mut t = Test::new(TestConfig());
+
+    assert!(t.run_file("20-ambiguous-host.t").is_ok());
+
+    assert_eq!(5, t.connp.tx_size());
+
+    let tx1 = t.connp.tx(0).unwrap();
+
+    assert!(tx1.is_complete());
+    assert!(!tx1.flags.is_set(HtpFlags::HOST_AMBIGUOUS));
+
+    let tx2 = t.connp.tx(1).unwrap();
+
+    assert!(tx2.is_complete());
+    assert!(tx2.flags.is_set(HtpFlags::HOST_AMBIGUOUS));
+    assert!(tx2
+        .request_hostname
+        .as_ref()
+        .unwrap()
+        .eq_slice("example.com"));
+
+    let tx3 = t.connp.tx(2).unwrap();
+
+    assert!(tx3.is_complete());
+    assert!(!tx3.flags.is_set(HtpFlags::HOST_AMBIGUOUS));
+    assert!(tx3
+        .request_hostname
+        .as_ref()
+        .unwrap()
+        .eq_slice("www.example.com"));
+    assert_eq!(Some(8001), tx3.request_port_number);
+
+    let tx4 = t.connp.tx(3).unwrap();
+
+    assert!(tx4.is_complete());
+    assert!(tx4.flags.is_set(HtpFlags::HOST_AMBIGUOUS));
+    assert!(tx4
+        .request_hostname
+        .as_ref()
+        .unwrap()
+        .eq_slice("www.example.com"));
+    assert_eq!(Some(8002), tx4.request_port_number);
+
+    let tx5 = t.connp.tx(4).unwrap();
+
+    assert!(tx5.is_complete());
+    assert!(!tx5.flags.is_set(HtpFlags::HOST_AMBIGUOUS));
+    assert!(tx5
+        .request_hostname
+        .as_ref()
+        .unwrap()
+        .eq_slice("www.example.com"));
+    assert_eq!(Some(80), tx5.request_port_number);
+}
+
+#[test]
+fn Http_0_9() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("21-http09.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+    assert!(!t.connp.conn.flags.is_set(ConnectionFlags::HTTP_0_9_EXTRA));
+
+    let _tx = t.connp.tx(0).unwrap();
+}
+
+#[test]
+fn Http11HostMissing() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("22-http_1_1-host_missing").is_ok());
+    assert_eq!(1, t.connp.tx_size());
+    let tx = t.connp.tx(0).unwrap();
+    assert!(tx.flags.is_set(HtpFlags::HOST_MISSING));
+}
+
+#[test]
+fn Http_0_9_Multiple() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("23-http09-multiple.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let _tx = t.connp.tx(0).unwrap();
+}
+
+#[test]
+fn Http_0_9_Explicit() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("24-http09-explicit.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+    assert!(!tx.is_protocol_0_9);
+}
+
+#[test]
+fn SmallChunks() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("25-small-chunks.t").is_ok());
+}
+
+fn ConnectionParsing_RequestHeaderData_REQUEST_HEADER_DATA(
+    tx: &mut Transaction, d: &ParserData,
+) -> Result<()> {
+    let mut counter = *tx.user_data::<i32>().unwrap_or(&0);
+    let data = d.as_slice();
+    match counter {
+        0 => {
+            if data != b"User-Agent:" {
+                eprintln!("Mismatch in chunk 0");
+                counter = -1;
+            }
+        }
+        1 => {
+            if data != b" Test" {
+                eprintln!("Mismatch in chunk 1");
+                counter = -1;
+            }
+        }
+        2 => {
+            if data != b" User" {
+                eprintln!("Mismatch in chunk 2");
+                counter = -1;
+            }
+        }
+        3 => {
+            if data != b" Agent\nHost: www.example.com\n\n" {
+                eprintln!("Mismatch in chunk 3");
+                counter = -1;
+            }
+        }
+        _ => {
+            if counter >= 0 {
+                eprintln!("Seen more than 4 chunks");
+                counter = -1;
+            }
+        }
+    }
+
+    if counter >= 0 {
+        counter += 1;
+    }
+    tx.set_user_data(Box::new(counter));
+    Ok(())
+}
+
+#[test]
+fn RequestHeaderData() {
+    let mut cfg = TestConfig();
+    cfg.register_request_header_data(ConnectionParsing_RequestHeaderData_REQUEST_HEADER_DATA);
+    let mut t = Test::new(cfg);
+    assert!(t.run_file("26-request-headers-raw.t").is_ok());
+    let tx = t.connp.tx(0).unwrap();
+    assert_eq!(4, *tx.user_data::<i32>().unwrap());
+}
+
+fn ConnectionParsing_RequestTrailerData_REQUEST_TRAILER_DATA(
+    tx: &mut Transaction, d: &ParserData,
+) -> Result<()> {
+    let mut counter = *tx.user_data::<i32>().unwrap_or(&0);
+    let data = d.as_slice();
+    match counter {
+        0 => {
+            if data != b"Cookie:" {
+                eprintln!("Mismatch in chunk 0");
+                counter = -1;
+            }
+        }
+        1 => {
+            if data != b" 2\r\n\r\n" {
+                eprintln!("Mismatch in chunk 1");
+                counter = -2;
+            }
+        }
+        _ => {
+            if counter >= 0 {
+                eprintln!("Seen more than 4 chunks");
+                counter = -3;
+            }
+        }
+    }
+
+    if counter >= 0 {
+        counter += 1;
+    }
+    tx.set_user_data(Box::new(counter));
+    Ok(())
+}
+
+#[test]
+fn RequestTrailerData() {
+    let mut cfg = TestConfig();
+    cfg.register_request_trailer_data(ConnectionParsing_RequestTrailerData_REQUEST_TRAILER_DATA);
+    let mut t = Test::new(cfg);
+    assert!(t.run_file("27-request-trailer-raw.t").is_ok());
+    let tx = t.connp.tx(0).unwrap();
+    assert_eq!(2, *tx.user_data::<i32>().unwrap());
+}
+
+fn ConnectionParsing_ResponseHeaderData_RESPONSE_HEADER_DATA(
+    tx: &mut Transaction, d: &ParserData,
+) -> Result<()> {
+    let mut counter = *tx.user_data::<i32>().unwrap_or(&0);
+    let data = d.as_slice();
+    match counter {
+            0 => {
+                if data != b"Date:" {
+                    eprintln!("Mismatch in chunk 0");
+                    counter = -1;
+                }
+            }
+            1 => {
+                if data != b" Mon," {
+                    eprintln!("Mismatch in chunk 1");
+                    counter = -2;
+                }
+            }
+            2 => {
+                if data != b" 31 Aug 2009 20:25:50 GMT\r\nServer:" {
+                    eprintln!("Mismatch in chunk 2");
+                    counter = -3;
+                }
+            }
+            3 => {
+                if data != b" Apache\r\nConnection: close\r\nContent-Type: text/html\r\nTransfer-Encoding: chunked\r\n\r\n" {
+                    eprintln!("Mismatch in chunk 3");
+                    counter = -4;
+                }
+            }
+            _ => {
+                if counter >= 0 {
+                    eprintln!("Seen more than 4 chunks");
+                    counter = -5;
+                }
+            }
+        }
+
+    if counter >= 0 {
+        counter += 1;
+    }
+    tx.set_user_data(Box::new(counter));
+    Ok(())
+}
+
+#[test]
+fn ResponseHeaderData() {
+    let mut cfg = TestConfig();
+    cfg.register_response_header_data(ConnectionParsing_ResponseHeaderData_RESPONSE_HEADER_DATA);
+    let mut t = Test::new(cfg);
+    assert!(t.run_file("28-response-headers-raw.t").is_ok());
+
+    let tx = t.connp.tx(0).unwrap();
+    assert_eq!(4, *tx.user_data::<i32>().unwrap());
+}
+
+fn ConnectionParsing_ResponseTrailerData_RESPONSE_TRAILER_DATA(
+    tx: &mut Transaction, d: &ParserData,
+) -> Result<()> {
+    let mut counter = *tx.user_data::<i32>().unwrap_or(&0);
+    let data = d.as_slice();
+    match counter {
+        0 => {
+            if data != b"Set-Cookie:" {
+                eprintln!("Mismatch in chunk 0");
+                counter = -1;
+            }
+        }
+
+        1 => {
+            if data != b" name=" {
+                eprintln!("Mismatch in chunk 1");
+                counter = -2;
+            }
+        }
+
+        2 => {
+            if data != b"value\r\nAnother-Header:" {
+                eprintln!("Mismatch in chunk 1");
+                counter = -3;
+            }
+        }
+
+        3 => {
+            if data != b" Header-Value\r\n\r\n" {
+                eprintln!("Mismatch in chunk 1");
+                counter = -4;
+            }
+        }
+
+        _ => {
+            if counter >= 0 {
+                eprintln!("Seen more than 4 chunks");
+                counter = -5;
+            }
+        }
+    }
+
+    if counter >= 0 {
+        counter += 1;
+    }
+    tx.set_user_data(Box::new(counter));
+    Ok(())
+}
+
+#[test]
+fn ResponseTrailerData() {
+    let mut cfg = TestConfig();
+    cfg.register_response_trailer_data(ConnectionParsing_ResponseTrailerData_RESPONSE_TRAILER_DATA);
+    let mut t = Test::new(cfg);
+    assert!(t.run_file("29-response-trailer-raw.t").is_ok());
+
+    let tx = t.connp.tx(0).unwrap();
+    assert_eq!(4, *tx.user_data::<i32>().unwrap());
+}
+
+#[test]
+fn GetIPv6() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("30-get-ipv6.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert!(tx.request_method.as_ref().unwrap().eq_slice("GET"));
+
+    assert!(tx
+        .request_uri
+        .as_ref()
+        .unwrap()
+        .eq_slice("http://[::1]:8080/?p=%20"));
+
+    assert!(tx
+        .parsed_uri
+        .as_ref()
+        .unwrap()
+        .hostname
+        .as_ref()
+        .unwrap()
+        .eq_slice("[::1]"));
+    assert_eq!(8080, tx.parsed_uri.as_ref().unwrap().port_number.unwrap());
+    assert!(tx
+        .parsed_uri
+        .as_ref()
+        .unwrap()
+        .query
+        .as_ref()
+        .unwrap()
+        .eq_slice("p=%20"));
+}
+
+#[test]
+fn GetRequestLineNul() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("31-get-request-line-nul.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert!(tx.request_uri.as_ref().unwrap().eq_slice("/?p=%20"));
+}
+
+#[test]
+fn InvalidHostname1() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("32-invalid-hostname.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+    assert!(tx.flags.is_set(HtpFlags::HOSTH_INVALID));
+    assert!(tx.flags.is_set(HtpFlags::HOSTU_INVALID));
+}
+
+#[test]
+fn InvalidHostname2() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("33-invalid-hostname.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert!(!tx.flags.is_set(HtpFlags::HOSTH_INVALID));
+    assert!(tx.flags.is_set(HtpFlags::HOSTU_INVALID));
+}
+
+#[test]
+fn InvalidHostname3() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("34-invalid-hostname.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert!(tx.flags.is_set(HtpFlags::HOSTH_INVALID));
+    assert!(!tx.flags.is_set(HtpFlags::HOSTU_INVALID));
+}
+
+#[test]
+fn EarlyResponse() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("35-early-response.t").is_ok());
+    let tx = t.connp.tx(0).unwrap();
+    assert!(tx.is_complete());
+}
+
+#[test]
+fn InvalidRequest1() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("36-invalid-request-1-invalid-c-l.t").is_err());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert_eq!(HtpRequestProgress::HEADERS, tx.request_progress);
+
+    assert!(tx.flags.is_set(HtpFlags::REQUEST_INVALID));
+    assert!(tx.flags.is_set(HtpFlags::REQUEST_INVALID_C_L));
+
+    assert!(tx.request_hostname.is_some());
+}
+
+#[test]
+fn InvalidRequest2() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("37-invalid-request-2-t-e-and-c-l.t").is_ok());
+    // No error, flags only.
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress);
+
+    assert!(tx.flags.is_set(HtpFlags::REQUEST_SMUGGLING));
+
+    assert!(tx.request_hostname.is_some());
+}
+
+#[test]
+fn InvalidRequest3() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("38-invalid-request-3-invalid-t-e.t").is_err());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert_eq!(HtpRequestProgress::HEADERS, tx.request_progress);
+
+    assert!(tx.flags.is_set(HtpFlags::REQUEST_INVALID));
+    assert!(tx.flags.is_set(HtpFlags::REQUEST_INVALID_T_E));
+
+    assert!(tx.request_hostname.is_some());
+}
+
+#[test]
+fn AutoDestroyCrash() {
+    let cfg = TestConfig();
+    let mut t = Test::new(cfg);
+    assert!(t.run_file("39-auto-destroy-crash.t").is_ok());
+
+    assert_eq!(4, t.connp.tx_size());
+}
+
+#[test]
+fn AuthBasic() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("40-auth-basic.t").is_ok());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress);
+    assert_eq!(HtpAuthType::BASIC, tx.request_auth_type);
+
+    assert!(tx.request_auth_username.as_ref().unwrap().eq_slice("ivanr"));
+    assert!(tx
+        .request_auth_password
+        .as_ref()
+        .unwrap()
+        .eq_slice("secret"));
+}
+
+#[test]
+fn AuthDigest() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("41-auth-digest.t").is_ok());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress);
+
+    assert_eq!(HtpAuthType::DIGEST, tx.request_auth_type);
+
+    assert!(tx.request_auth_username.as_ref().unwrap().eq_slice("ivanr"));
+
+    assert!(tx.request_auth_password.is_none());
+}
+
+#[test]
+fn Unknown_MethodOnly() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("42-unknown-method_only.t").is_ok());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress);
+
+    assert!(tx.request_method.as_ref().unwrap().eq_slice("HELLO"));
+
+    assert!(tx.request_uri.is_none());
+
+    assert!(tx.is_protocol_0_9);
+}
+
+#[test]
+fn InvalidHtpProtocol() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("43-invalid-protocol.t").is_ok());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress);
+
+    assert_eq!(HtpProtocol::Invalid, tx.request_protocol_number);
+}
+
+#[test]
+fn AuthBasicInvalid() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("44-auth-basic-invalid.t").is_ok());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress);
+
+    assert_eq!(HtpAuthType::BASIC, tx.request_auth_type);
+
+    assert!(tx.request_auth_username.is_none());
+
+    assert!(tx.request_auth_password.is_none());
+
+    assert!(tx.flags.is_set(HtpFlags::AUTH_INVALID));
+}
+
+#[test]
+fn AuthDigestUnquotedUsername() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("45-auth-digest-unquoted-username.t").is_ok());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress);
+
+    assert_eq!(HtpAuthType::DIGEST, tx.request_auth_type);
+
+    assert!(tx.request_auth_username.is_none());
+
+    assert!(tx.request_auth_password.is_none());
+
+    assert!(tx.flags.is_set(HtpFlags::AUTH_INVALID));
+}
+
+#[test]
+fn AuthDigestInvalidUsername1() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("46-auth-digest-invalid-username.t").is_ok());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress);
+
+    assert_eq!(HtpAuthType::DIGEST, tx.request_auth_type);
+
+    assert!(tx.request_auth_username.is_none());
+
+    assert!(tx.request_auth_password.is_none());
+
+    assert!(tx.flags.is_set(HtpFlags::AUTH_INVALID));
+}
+
+#[test]
+fn AuthUnrecognized() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("47-auth-unrecognized.t").is_ok());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress);
+
+    assert_eq!(HtpAuthType::UNRECOGNIZED, tx.request_auth_type);
+
+    assert!(tx.request_auth_username.is_none());
+
+    assert!(tx.request_auth_password.is_none());
+}
+
+#[test]
+fn InvalidResponseHeaders1() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("48-invalid-response-headers-1.t").is_ok());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress);
+
+    assert_eq!(8, tx.response_headers.size());
+
+    assert_response_header_eq!(tx, "", "No Colon");
+    assert_response_header_flag_contains!(tx, "", HtpFlags::FIELD_INVALID);
+    assert_response_header_flag_contains!(tx, "", HtpFlags::FIELD_UNPARSEABLE);
+
+    assert_response_header_eq!(tx, "Lws", "After Header Name");
+    assert_response_header_flag_contains!(tx, "Lws", HtpFlags::FIELD_INVALID);
+
+    assert_response_header_eq!(tx, "Header@Name", "Not Token");
+    assert_response_header_flag_contains!(tx, "Header@Name", HtpFlags::FIELD_INVALID);
+}
+
+#[test]
+fn InvalidResponseHeaders2() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("49-invalid-response-headers-2.t").is_ok());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress);
+
+    assert_eq!(6, tx.response_headers.size());
+
+    assert_response_header_eq!(tx, "", "Empty Name");
+    assert_response_header_flag_contains!(tx, "", HtpFlags::FIELD_INVALID);
+}
+
+#[test]
+fn GetIPv6Invalid() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("51-get-ipv6-invalid.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert!(tx.request_method.as_ref().unwrap().eq_slice("GET"));
+
+    assert!(tx
+        .request_uri
+        .as_ref()
+        .unwrap()
+        .eq_slice("http://[::1:8080/?p=%20"));
+    assert!(tx
+        .parsed_uri
+        .as_ref()
+        .unwrap()
+        .hostname
+        .as_ref()
+        .unwrap()
+        .eq_slice("[::1:8080"));
+}
+
+#[test]
+fn InvalidPath() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("52-invalid-path.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert!(tx.request_method.as_ref().unwrap().eq_slice("GET"));
+
+    assert!(tx
+        .request_uri
+        .as_ref()
+        .unwrap()
+        .eq_slice("invalid/path?p=%20"));
+    assert!(tx
+        .parsed_uri
+        .as_ref()
+        .unwrap()
+        .path
+        .as_ref()
+        .unwrap()
+        .eq_slice("invalid/path"));
+}
+
+#[test]
+fn PathUtf8_None() {
+    let mut t = Test::new(TestConfig());
+
+    assert!(t.run_file("53-path-utf8-none.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert!(!tx.flags.is_set(HtpFlags::PATH_UTF8_VALID));
+    assert!(!tx.flags.is_set(HtpFlags::PATH_UTF8_OVERLONG));
+    assert!(!tx.flags.is_set(HtpFlags::PATH_HALF_FULL_RANGE));
+}
+
+#[test]
+fn PathUtf8_Valid() {
+    let mut t = Test::new(TestConfig());
+
+    assert!(t.run_file("54-path-utf8-valid.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert!(tx.flags.is_set(HtpFlags::PATH_UTF8_VALID));
+}
+
+#[test]
+fn PathUtf8_Overlong2() {
+    let mut t = Test::new(TestConfig());
+
+    assert!(t.run_file("55-path-utf8-overlong-2.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert!(tx.flags.is_set(HtpFlags::PATH_UTF8_OVERLONG));
+}
+
+#[test]
+fn PathUtf8_Overlong3() {
+    let mut t = Test::new(TestConfig());
+
+    assert!(t.run_file("56-path-utf8-overlong-3.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert!(tx.flags.is_set(HtpFlags::PATH_UTF8_OVERLONG));
+}
+
+#[test]
+fn PathUtf8_Overlong4() {
+    let mut t = Test::new(TestConfig());
+
+    assert!(t.run_file("57-path-utf8-overlong-4.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert!(tx.flags.is_set(HtpFlags::PATH_UTF8_OVERLONG));
+}
+
+#[test]
+fn PathUtf8_Invalid() {
+    let mut t = Test::new(TestConfig());
+
+    assert!(t.run_file("58-path-utf8-invalid.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert!(tx.flags.is_set(HtpFlags::PATH_UTF8_INVALID));
+    assert!(!tx.flags.is_set(HtpFlags::PATH_UTF8_VALID));
+}
+
+#[test]
+fn PathUtf8_FullWidth() {
+    let mut t = Test::new(TestConfig());
+
+    assert!(t.run_file("59-path-utf8-fullwidth.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert!(tx.flags.is_set(HtpFlags::PATH_HALF_FULL_RANGE));
+}
+
+#[test]
+fn PathUtf8_Decode_Valid() {
+    let mut cfg = TestConfig();
+    cfg.set_utf8_convert_bestfit(true);
+    let mut t = Test::new(cfg);
+
+    assert!(t.run_file("54-path-utf8-valid.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+    assert!(tx
+        .parsed_uri
+        .as_ref()
+        .unwrap()
+        .path
+        .as_ref()
+        .unwrap()
+        .eq_slice("/Ristic.txt"));
+}
+
+#[test]
+fn PathUtf8_Decode_Overlong2() {
+    let mut cfg = TestConfig();
+    cfg.set_utf8_convert_bestfit(true);
+    let mut t = Test::new(cfg);
+    assert!(t.run_file("55-path-utf8-overlong-2.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert!(tx.flags.is_set(HtpFlags::PATH_UTF8_OVERLONG));
+
+    assert!(tx
+        .parsed_uri
+        .as_ref()
+        .unwrap()
+        .path
+        .as_ref()
+        .unwrap()
+        .eq_slice("/&.txt"));
+}
+
+#[test]
+fn PathUtf8_Decode_Overlong3() {
+    let mut cfg = TestConfig();
+    cfg.set_utf8_convert_bestfit(true);
+    let mut t = Test::new(cfg);
+
+    assert!(t.run_file("56-path-utf8-overlong-3.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert!(tx.flags.is_set(HtpFlags::PATH_UTF8_OVERLONG));
+
+    assert!(tx
+        .parsed_uri
+        .as_ref()
+        .unwrap()
+        .path
+        .as_ref()
+        .unwrap()
+        .eq_slice("/&.txt"));
+}
+
+#[test]
+fn PathUtf8_Decode_Overlong4() {
+    let mut cfg = TestConfig();
+    cfg.set_utf8_convert_bestfit(true);
+    let mut t = Test::new(cfg);
+
+    assert!(t.run_file("57-path-utf8-overlong-4.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert!(tx.flags.is_set(HtpFlags::PATH_UTF8_OVERLONG));
+    assert!(tx
+        .parsed_uri
+        .as_ref()
+        .unwrap()
+        .path
+        .as_ref()
+        .unwrap()
+        .eq_slice("/&.txt"));
+}
+
+#[test]
+fn PathUtf8_Decode_Invalid() {
+    let mut cfg = TestConfig();
+    cfg.set_utf8_convert_bestfit(true);
+    let mut t = Test::new(cfg);
+    assert!(t.run_file("58-path-utf8-invalid.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert!(tx.flags.is_set(HtpFlags::PATH_UTF8_INVALID));
+    assert!(!tx.flags.is_set(HtpFlags::PATH_UTF8_VALID));
+    assert!(tx
+        .parsed_uri
+        .as_ref()
+        .unwrap()
+        .path
+        .as_ref()
+        .unwrap()
+        .eq_slice("/Ristic?.txt"));
+}
+
+#[test]
+fn PathUtf8_Decode_FullWidth() {
+    let mut cfg = TestConfig();
+    cfg.set_utf8_convert_bestfit(true);
+    let mut t = Test::new(cfg);
+
+    assert!(t.run_file("59-path-utf8-fullwidth.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert!(tx.flags.is_set(HtpFlags::PATH_HALF_FULL_RANGE));
+
+    assert!(tx
+        .parsed_uri
+        .as_ref()
+        .unwrap()
+        .path
+        .as_ref()
+        .unwrap()
+        .eq_slice("/&.txt"));
+}
+
+#[test]
+fn EmptyLineBetweenRequests() {
+    let mut t = Test::new(TestConfig());
+
+    assert!(t.run_file("61-empty-line-between-requests.t").is_ok());
+
+    assert_eq!(2, t.connp.tx_size());
+
+    let _tx = t.connp.tx(1).unwrap();
+
+    /*part of previous request body assert_eq!(1, tx.request_ignored_lines);*/
+}
+
+#[test]
+fn PostNoBody() {
+    let mut t = Test::new(TestConfig());
+
+    assert!(t.run_file("62-post-no-body.t").is_ok());
+
+    assert_eq!(2, t.connp.tx_size());
+
+    let tx1 = t.connp.tx(0).unwrap();
+
+    assert_eq!(HtpRequestProgress::COMPLETE, tx1.request_progress);
+    assert_eq!(HtpResponseProgress::COMPLETE, tx1.response_progress);
+    assert!(tx1
+        .response_content_type
+        .as_ref()
+        .unwrap()
+        .eq_slice("text/html"));
+
+    let tx2 = t.connp.tx(1).unwrap();
+
+    assert_eq!(HtpRequestProgress::COMPLETE, tx2.request_progress);
+    assert_eq!(HtpResponseProgress::COMPLETE, tx2.response_progress);
+    assert!(tx2
+        .response_content_type
+        .as_ref()
+        .unwrap()
+        .eq_slice("text/html"));
+}
+
+#[test]
+fn PostChunkedValid1() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("63-post-chunked-invalid-1.t").is_err());
+}
+
+#[test]
+fn PostChunkedInvalid2() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("64-post-chunked-invalid-2.t").is_err());
+}
+
+#[test]
+fn PostChunkedInvalid3() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("65-post-chunked-invalid-3.t").is_err());
+}
+
+#[test]
+fn PostChunkedSplitChunk() {
+    let mut t = Test::new(TestConfig());
+
+    assert!(t.run_file("66-post-chunked-split-chunk.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+}
+
+#[test]
+fn LongRequestLine1() {
+    let mut t = Test::new(TestConfig());
+
+    assert!(t.run_file("67-long-request-line.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert!(tx
+        .request_uri
+        .as_ref()
+        .unwrap()
+        .eq_slice("/0123456789/0123456789/"));
+}
+
+#[test]
+fn LongRequestLine2() {
+    let mut cfg = TestConfig();
+    cfg.set_field_limit(16);
+    let mut t = Test::new(cfg);
+
+    assert!(t.run_file("67-long-request-line.t").is_err());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert_eq!(HtpRequestProgress::LINE, tx.request_progress);
+}
+
+#[test]
+fn InvalidRequestHeader() {
+    let mut t = Test::new(TestConfig());
+
+    assert!(t.run_file("68-invalid-request-header.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).expect("expected at least one transaction");
+
+    assert_request_header_eq!(tx, "Header-With-NUL", "BEFORE  \0AFTER");
+}
+
+#[test]
+fn TestGenericPersonality() {
+    let mut cfg = TestConfig();
+    cfg.set_server_personality(HtpServerPersonality::IDS)
+        .unwrap();
+    let mut t = Test::new(cfg);
+
+    assert!(t.run_file("02-header-test-apache2.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let _tx = t.connp.tx(0).unwrap();
+}
+
+#[test]
+fn LongResponseHeader() {
+    let mut cfg = TestConfig();
+    cfg.set_field_limit(18);
+    let mut t = Test::new(cfg);
+
+    assert!(t.run_file("69-long-response-header.t").is_err());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress);
+    assert_eq!(HtpResponseProgress::HEADERS, tx.response_progress);
+}
+
+#[test]
+fn ResponseInvalidChunkLength() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("70-response-invalid-chunk-length.t").is_ok());
+}
+
+#[test]
+fn ResponseSplitChunk() {
+    let mut t = Test::new(TestConfig());
+
+    assert!(t.run_file("71-response-split-chunk.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress);
+    assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress);
+}
+
+#[test]
+fn ResponseBody() {
+    let mut t = Test::new(TestConfig());
+
+    assert!(t.run_file("72-response-split-body.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress);
+    assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress);
+}
+
+#[test]
+fn ResponseContainsTeAndCl() {
+    let mut t = Test::new(TestConfig());
+
+    assert!(t.run_file("73-response-te-and-cl.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress);
+    assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress);
+
+    assert!(tx.flags.is_set(HtpFlags::REQUEST_SMUGGLING));
+}
+
+#[test]
+fn ResponseMultipleCl() {
+    let mut t = Test::new(TestConfig());
+
+    assert!(t.run_file("74-response-multiple-cl.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress);
+    assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress);
+
+    assert!(tx.flags.is_set(HtpFlags::REQUEST_SMUGGLING));
+
+    assert_response_header_eq!(tx, "Content-Length", "12");
+    assert_response_header_flag_contains!(tx, "Content-Length", HtpFlags::FIELD_REPEATED);
+}
+
+#[test]
+fn ResponseMultipleClMismatch() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("88-response-multiple-cl-mismatch.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress);
+    assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress);
+
+    assert!(tx.flags.is_set(HtpFlags::REQUEST_SMUGGLING));
+
+    assert_response_header_eq!(tx, "Content-Length", "12");
+    assert_response_header_flag_contains!(tx, "Content-Length", HtpFlags::FIELD_REPEATED);
+
+    let logs = t.connp.conn.get_logs();
+    assert_eq!(2, logs.len());
+    assert_eq!(
+        logs.first().unwrap().msg.msg,
+        "Ambiguous response C-L value"
+    );
+    assert_eq!(logs.get(1).unwrap().msg.msg, "Repetition for header");
+}
+
+#[test]
+fn ResponseInvalidCl() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("75-response-invalid-cl.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress);
+    assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress);
+
+    assert!(!tx.flags.is_set(HtpFlags::REQUEST_SMUGGLING));
+}
+
+#[test]
+fn ResponseNoBody() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("76-response-no-body.t").is_ok());
+
+    assert_eq!(2, t.connp.tx_size());
+
+    let tx1 = t.connp.tx(0).unwrap();
+
+    assert_eq!(HtpRequestProgress::COMPLETE, tx1.request_progress);
+    assert_eq!(HtpResponseProgress::COMPLETE, tx1.response_progress);
+
+    assert_response_header_eq!(tx1, "Server", "Apache");
+
+    let tx2 = t.connp.tx(1).unwrap();
+
+    assert_eq!(HtpRequestProgress::COMPLETE, tx2.request_progress);
+    assert_eq!(HtpResponseProgress::COMPLETE, tx2.response_progress);
+
+    assert!(tx1 != tx2);
+}
+
+#[test]
+fn ResponseFoldedHeaders() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("77-response-folded-headers.t").is_ok());
+
+    assert_eq!(2, t.connp.tx_size());
+
+    let tx1 = t.connp.tx(0).unwrap();
+
+    assert_eq!(HtpRequestProgress::COMPLETE, tx1.request_progress);
+    assert_eq!(HtpResponseProgress::COMPLETE, tx1.response_progress);
+
+    assert_response_header_eq!(tx1, "Server", "Apache Server");
+
+    let tx2 = t.connp.tx(1).unwrap();
+
+    assert_eq!(HtpRequestProgress::COMPLETE, tx2.request_progress);
+    assert_eq!(HtpResponseProgress::COMPLETE, tx2.response_progress);
+}
+
+#[test]
+fn ResponseNoStatusHeaders() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("78-response-no-status-headers.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress);
+    assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress);
+}
+
+#[test]
+fn ConnectInvalidHostport() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("79-connect-invalid-hostport.t").is_ok());
+
+    assert_eq!(2, t.connp.tx_size());
+}
+
+#[test]
+fn HostnameInvalid1() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("80-hostname-invalid-1.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+}
+
+#[test]
+fn HostnameInvalid2() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("81-hostname-invalid-2.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+}
+
+#[test]
+fn AuthDigestInvalidUsername2() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("83-auth-digest-invalid-username-2.t").is_ok());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress);
+
+    assert_eq!(HtpAuthType::DIGEST, tx.request_auth_type);
+
+    assert!(tx.request_auth_username.is_none());
+
+    assert!(tx.request_auth_password.is_none());
+
+    assert!(tx.flags.is_set(HtpFlags::AUTH_INVALID));
+}
+
+#[test]
+fn ResponseNoStatusHeaders2() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("84-response-no-status-headers-2.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress);
+    assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress);
+}
+
+// Test was commented out of libhtp
+//#[test]
+//fn ZeroByteRequestTimeout() {
+//    let mut t = Test::new(TestConfig());
+//unsafe {
+//    assert!(t.run_file("85-zero-byte-request-timeout.t").is_ok());
+//
+//    assert_eq!(1, t.connp.tx_size());
+//
+//    let tx = t.connp.conn.get_tx(0);
+//    assert!(!tx.is_null());
+//
+//    assert_eq!(HtpRequestProgress::NOT_STARTED, tx.request_progress);
+//    assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress);
+//}}
+
+#[test]
+fn PartialRequestTimeout() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("86-partial-request-timeout.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress);
+    assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress);
+}
+
+#[test]
+fn IncorrectHostAmbiguousWarning() {
+    let mut t = Test::new(TestConfig());
+    assert!(t
+        .run_file("87-issue-55-incorrect-host-ambiguous-warning.t")
+        .is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert!(tx
+        .parsed_uri_raw
+        .as_ref()
+        .unwrap()
+        .port
+        .as_ref()
+        .unwrap()
+        .eq_slice("443"));
+    assert!(tx
+        .parsed_uri_raw
+        .as_ref()
+        .unwrap()
+        .hostname
+        .as_ref()
+        .unwrap()
+        .eq_slice("www.example.com"));
+    assert_eq!(
+        443,
+        tx.parsed_uri_raw.as_ref().unwrap().port_number.unwrap()
+    );
+
+    assert!(tx
+        .request_hostname
+        .as_ref()
+        .unwrap()
+        .eq_slice("www.example.com"));
+
+    assert!(!tx.flags.is_set(HtpFlags::HOST_AMBIGUOUS));
+}
+
+#[test]
+fn GetWhitespace() {
+    let mut t = Test::new(TestConfig());
+
+    assert!(t.run_file("89-get-whitespace.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert!(tx.request_method.as_ref().unwrap().eq_slice(" GET"));
+    assert!(tx.request_uri.as_ref().unwrap().eq_slice("/?p=%20"));
+    assert!(tx
+        .parsed_uri
+        .as_ref()
+        .unwrap()
+        .query
+        .as_ref()
+        .unwrap()
+        .eq_slice("p=%20"));
+}
+
+#[test]
+fn RequestUriTooLarge() {
+    let mut t = Test::new(TestConfig());
+
+    assert!(t.run_file("90-request-uri-too-large.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress);
+    assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress);
+}
+
+#[test]
+fn RequestInvalid() {
+    let mut t = Test::new(TestConfig());
+
+    assert!(t.run_file("91-request-unexpected-body.t").is_ok());
+
+    assert_eq!(2, t.connp.tx_size());
+
+    let mut tx = t.connp.tx(0).unwrap();
+
+    assert!(tx.request_method.as_ref().unwrap().eq_slice("POST"));
+    assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress);
+    assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress);
+
+    tx = t.connp.tx(1).unwrap();
+
+    assert!(tx.request_method.as_ref().unwrap().eq_slice("GET"));
+    assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress);
+    assert_eq!(HtpResponseProgress::NOT_STARTED, tx.response_progress);
+}
+
+#[test]
+fn Http_0_9_MethodOnly() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("92-http_0_9-method_only.t").is_ok());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress);
+
+    assert!(tx.request_method.as_ref().unwrap().eq_slice("GET"));
+    assert!(tx.request_uri.as_ref().unwrap().eq_slice("/"));
+    assert!(tx.is_protocol_0_9);
+}
+
+#[test]
+fn CompressedResponseDeflateAsGzip() {
+    let mut t = Test::new(TestConfig());
+
+    assert!(t.run_file("93-compressed-response-deflateasgzip.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert!(tx.is_complete());
+
+    assert_eq!(755, tx.response_message_len);
+    assert_eq!(1433, tx.response_entity_len);
+}
+
+#[test]
+fn CompressedResponseZlibAsDeflate() {
+    let mut t = Test::new_with_callbacks();
+    assert!(t.run_file("http-evader-118.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+    assert!(tx.is_complete());
+
+    assert_response_header_eq!(
+        tx,
+        "content-disposition",
+        "attachment; filename=\"eicar.txt\""
+    );
+    assert_response_header_eq!(tx, "content-encoding", "deflate");
+    assert_eq!(68, tx.response_entity_len);
+    let user_data = tx.user_data::<MainUserData>().unwrap();
+    assert!(user_data.request_data.is_empty());
+    assert_eq!(1, user_data.response_data.len());
+    let chunk = &user_data.response_data[0];
+    assert_eq!(
+        b"X5O!P%@AP[4\\PZX54(P^)7CC)7}$EICAR-STANDARD-ANTIVIRUS-TEST-FILE!$H+H*".as_ref(),
+        chunk.as_slice()
+    );
+}
+
+#[test]
+fn CompressedResponseMultiple() {
+    let mut t = Test::new(TestConfig());
+
+    assert!(t.run_file("94-compressed-response-multiple.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert!(tx.is_complete());
+
+    assert_eq!(51, tx.response_message_len);
+    assert_eq!(25, tx.response_entity_len);
+}
+
+#[test]
+fn CompressedResponseBombLimitOkay() {
+    let mut cfg = TestConfig();
+    cfg.compression_options.set_bomb_limit(0);
+    let mut t = Test::new(cfg);
+
+    assert!(t.run_file("14-compressed-response-gzip-chunked.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert!(tx.is_complete());
+
+    assert_eq!(28261, tx.response_message_len);
+    assert_eq!(159_590, tx.response_entity_len);
+}
+
+#[test]
+fn CompressedResponseBombLimitExceeded() {
+    let mut cfg = TestConfig();
+    cfg.compression_options.set_bomb_limit(0);
+    cfg.compression_options.set_bomb_ratio(2);
+    let mut t = Test::new(cfg);
+
+    assert!(t.run_file("14-compressed-response-gzip-chunked.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+    assert!(tx.is_complete());
+
+    assert_eq!(28261, tx.response_message_len);
+    assert_eq!(2608, tx.response_entity_len);
+}
+
+#[test]
+fn CompressedResponseTimeLimitExceeded() {
+    let mut cfg = TestConfig();
+    cfg.compression_options.set_time_limit(0);
+    let mut t = Test::new(cfg);
+
+    assert!(t.run_file("14-compressed-response-gzip-chunked.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+    assert!(tx.is_complete());
+
+    assert_eq!(28261, tx.response_message_len);
+    assert_eq!(29656, tx.response_entity_len);
+}
+
+#[test]
+fn CompressedResponseGzipAsDeflate() {
+    let mut t = Test::new(TestConfig());
+
+    assert!(t.run_file("95-compressed-response-gzipasdeflate.t").is_ok());
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert!(tx.is_complete());
+
+    assert_eq!(187, tx.response_message_len);
+    assert_eq!(225, tx.response_entity_len);
+}
+
+#[test]
+fn CompressedResponseLzma() {
+    let mut t = Test::new(TestConfig());
+
+    assert!(t.run_file("96-compressed-response-lzma.t").is_ok());
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert!(tx.is_complete());
+
+    assert_eq!(90, tx.response_message_len);
+    assert_eq!(68, tx.response_entity_len);
+}
+
+#[test]
+fn CompressedResponseLzmaDisabled() {
+    let mut cfg = TestConfig();
+    cfg.compression_options.set_lzma_memlimit(0);
+    let mut t = Test::new(cfg);
+
+    assert!(t.run_file("96-compressed-response-lzma.t").is_ok());
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+    assert!(tx.is_complete());
+
+    assert_eq!(90, tx.response_message_len);
+    assert_eq!(90, tx.response_entity_len);
+}
+
+#[test]
+fn CompressedResponseLzmaMemlimit() {
+    let mut cfg = TestConfig();
+    cfg.compression_options.set_lzma_memlimit(1);
+    let mut t = Test::new(cfg);
+
+    assert!(t.run_file("96-compressed-response-lzma.t").is_ok());
+    assert_eq!(1, t.connp.tx_size());
+    let tx = t.connp.tx(0).unwrap();
+    assert!(tx.is_complete());
+    assert_eq!(90, tx.response_message_len);
+    assert_eq!(72, tx.response_entity_len);
+    assert!(tx.response_message.as_ref().unwrap().eq_slice("ok"));
+}
+
+#[test]
+fn RequestsCut() {
+    let mut t = Test::new(TestConfig());
+
+    assert!(t.run_file("97-requests-cut.t").is_ok());
+
+    assert_eq!(2, t.connp.tx_size());
+    let mut tx = t.connp.tx(0).unwrap();
+    assert!(tx.request_method.as_ref().unwrap().eq_slice("GET"));
+    assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress);
+
+    tx = t.connp.tx(1).unwrap();
+
+    assert!(tx.request_method.as_ref().unwrap().eq_slice("GET"));
+    assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress);
+}
+
+#[test]
+fn ResponsesCut() {
+    let mut t = Test::new(TestConfig());
+
+    assert!(t.run_file("98-responses-cut.t").is_ok());
+
+    assert_eq!(2, t.connp.tx_size());
+    let mut tx = t.connp.tx(0).unwrap();
+
+    assert!(tx.request_method.as_ref().unwrap().eq_slice("GET"));
+    assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress);
+    assert!(tx.response_status_number.eq_num(200));
+    assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress);
+
+    tx = t.connp.tx(1).unwrap();
+
+    assert!(tx.request_method.as_ref().unwrap().eq_slice("GET"));
+    assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress);
+    assert!(tx.response_status_number.eq_num(200));
+    assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress);
+}
+
+#[test]
+fn AuthDigest_EscapedQuote() {
+    let mut t = Test::new(TestConfig());
+
+    assert!(t.run_file("100-auth-digest-escaped-quote.t").is_ok());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress);
+
+    assert_eq!(HtpAuthType::DIGEST, tx.request_auth_type);
+
+    assert!(tx
+        .request_auth_username
+        .as_ref()
+        .unwrap()
+        .eq_slice("ivan\"r\""));
+
+    assert!(tx.request_auth_password.is_none());
+}
+
+#[test]
+fn Tunnelled1() {
+    let mut t = Test::new(TestConfig());
+
+    assert!(t.run_file("106-tunnelled-1.t").is_ok());
+    assert_eq!(2, t.connp.tx_size());
+    let tx1 = t.connp.tx(0).unwrap();
+
+    assert!(tx1.request_method.as_ref().unwrap().eq_slice("CONNECT"));
+    let tx2 = t.connp.tx(1).unwrap();
+
+    assert!(tx2.request_method.as_ref().unwrap().eq_slice("GET"));
+}
+
+#[test]
+fn Expect100() {
+    let mut t = Test::new(TestConfig());
+
+    assert!(t.run_file("105-expect-100.t").is_ok());
+    assert_eq!(2, t.connp.tx_size());
+    let tx = t.connp.tx(0).unwrap();
+
+    assert!(tx.request_method.as_ref().unwrap().eq_slice("PUT"));
+    assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress);
+    assert!(tx.response_status_number.eq_num(401));
+    assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress);
+
+    let tx = t.connp.tx(1).unwrap();
+
+    assert!(tx.request_method.as_ref().unwrap().eq_slice("POST"));
+    assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress);
+    assert!(tx.response_status_number.eq_num(200));
+    assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress);
+}
+
+#[test]
+fn UnknownStatusNumber() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("107-response_unknown_status.t").is_ok());
+    assert_eq!(1, t.connp.tx_size());
+    let tx = t.connp.tx(0).unwrap();
+
+    assert_eq!(tx.response_status_number, HtpResponseNumber::Unknown);
+}
+
+#[test]
+fn ResponseHeaderCrOnly() {
+    // Content-Length terminated with \r only.
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("108-response-headers-cr-only.t").is_ok());
+    let tx = t.connp.tx(0).unwrap();
+    assert_eq!(2, tx.response_headers.size());
+    // Check response headers
+    assert_response_header_eq!(tx, "content-type", "text/html");
+    assert_response_header_eq!(tx, "Content-Length", "7");
+}
+
+#[test]
+fn ResponseHeaderDeformedEOL() {
+    // Content-Length terminated with \n\r\r\n\r\n only.
+    let mut t = Test::new_with_callbacks();
+    assert!(t.run_file("109-response-headers-deformed-eol.t").is_ok());
+    let tx = t.connp.tx(0).unwrap();
+    assert_eq!(2, tx.response_headers.size());
+    // Check response headers
+    assert_response_header_eq!(tx, "content-type", "text/html");
+    assert_response_header_eq!(tx, "content-length", "6");
+    let logs = t.connp.conn.get_logs();
+    let log_message_count = logs.len();
+    assert_eq!(log_message_count, 2);
+    assert_eq!(logs.first().unwrap().msg.code, HtpLogCode::DEFORMED_EOL);
+
+    let user_data = tx.user_data::<MainUserData>().unwrap();
+    assert!(user_data.request_data.is_empty());
+    assert_eq!(2, user_data.response_data.len());
+    assert_eq!(b"abcdef".as_ref(), user_data.response_data[0].as_slice());
+}
+
+#[test]
+fn ResponseFoldedHeaders2() {
+    // Space folding char
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("110-response-folded-headers-2.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress);
+    assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress);
+
+    assert_response_header_eq!(tx, "Server", "Apache Server");
+    assert_eq!(3, tx.response_headers.size());
+}
+
+#[test]
+fn ResponseHeadersChunked() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("111-response-headers-chunked.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress);
+    assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress);
+
+    assert_eq!(2, tx.response_headers.size());
+
+    assert_response_header_eq!(tx, "content-type", "text/html");
+    assert_response_header_eq!(tx, "content-length", "12");
+}
+
+#[test]
+fn ResponseHeadersChunked2() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("112-response-headers-chunked-2.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress);
+    assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress);
+
+    assert_eq!(2, tx.response_headers.size());
+
+    assert_response_header_eq!(tx, "content-type", "text/html");
+    assert_response_header_eq!(tx, "content-length", "12");
+}
+
+#[test]
+fn ResponseMultipartRanges() {
+    // This should be is_ok() once multipart/byteranges is handled in response parsing
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("113-response-multipart-byte-ranges.t").is_err());
+}
+
+#[test]
+fn Http2Upgrade() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("114-http-2-upgrade.t").is_ok());
+
+    assert_eq!(2, t.connp.tx_size());
+    assert!(!t.connp.tx(0).unwrap().is_http_2_upgrade);
+    assert!(t.connp.tx(1).unwrap().is_http_2_upgrade);
+}
+
+#[test]
+fn AuthBearer() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("115-auth-bearer.t").is_ok());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress);
+    assert_eq!(HtpAuthType::BEARER, tx.request_auth_type);
+
+    assert!(tx
+        .request_auth_token
+        .as_ref()
+        .unwrap()
+        .eq_slice("mF_9.B5f-4.1JqM"));
+}
+
+#[test]
+fn HttpCloseHeaders() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("http-close-headers.t").is_ok());
+    let tx = t.connp.tx(0).unwrap();
+    assert!(tx.request_method.as_ref().unwrap().eq_slice("GET"));
+    assert!(tx.request_uri.as_ref().unwrap().eq_slice("/"));
+
+    assert_eq!(HtpProtocol::V1_1, tx.request_protocol_number);
+    assert_eq!(HtpProtocol::V1_0, tx.response_protocol_number);
+
+    assert_request_header_eq!(tx, "Host", "100.64.0.200");
+    assert_request_header_eq!(tx, "Connection", "keep-alive");
+    assert_request_header_eq!(tx, "Accept-Encoding", "gzip, deflate");
+    assert_request_header_eq!(tx, "Accept", "*/*");
+    assert_request_header_eq!(tx, "User-Agent", "python-requests/2.21.0");
+    assert_response_header_eq!(tx, "Server", "ng1nx");
+
+    assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress);
+    assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress);
+}
+
+#[test]
+fn HttpStartFromResponse() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("http-start-from-response.t").is_ok());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert!(tx.request_method.is_none());
+    assert_eq!(
+        tx.request_uri,
+        Some(Bstr::from("/libhtp::request_uri_not_seen"))
+    );
+    assert!(tx.response_status_number.eq_num(200));
+
+    assert_eq!(HtpProtocol::Unknown, tx.request_protocol_number);
+    assert_eq!(HtpProtocol::V1_1, tx.response_protocol_number);
+
+    assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress);
+    assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress);
+
+    let tx = t.connp.tx(1).unwrap();
+    assert_eq!(tx.request_method, Some(Bstr::from("GET")));
+    assert_eq!(tx.request_uri, Some(Bstr::from("/favicon.ico")));
+    assert!(tx.response_status_number.eq_num(404));
+
+    assert_eq!(HtpProtocol::V1_1, tx.request_protocol_number);
+    assert_eq!(HtpProtocol::V1_1, tx.response_protocol_number);
+
+    assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress);
+    assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress);
+
+    let logs = t.connp.conn.get_logs();
+    assert_eq!(1, logs.len());
+    assert_eq!(
+        logs.first().unwrap().msg.msg,
+        "Unable to match response to request"
+    );
+}
+
+#[test]
+fn RequestCompression() {
+    let mut cfg = TestConfig();
+    cfg.set_request_decompression(true);
+    let mut t = Test::new(cfg);
+
+    assert!(t.run_file("116-request-compression.t").is_ok());
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert!(tx.is_complete());
+
+    assert_eq!(1355, tx.request_message_len);
+    assert_eq!(2614, tx.request_entity_len);
+}
+
+#[test]
+fn RequestResponseCompression() {
+    let mut cfg = TestConfig();
+    cfg.set_request_decompression(true);
+    let mut t = Test::new(cfg);
+
+    assert!(t.run_file("117-request-response-compression.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert!(tx.is_complete());
+
+    assert_eq!(1355, tx.request_message_len);
+    assert_eq!(2614, tx.request_entity_len);
+
+    assert_eq!(51, tx.response_message_len);
+    assert_eq!(25, tx.response_entity_len);
+}
+
+#[test]
+fn AmbiguousEOL() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("119-ambiguous-eol.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+
+    assert!(tx.request_method.as_ref().unwrap().eq_slice("POST"));
+    assert!(tx.request_uri.as_ref().unwrap().eq_slice("/"));
+    assert_eq!(HtpProtocol::V1_0, tx.request_protocol_number);
+
+    assert_eq!(HtpProtocol::V1_0, tx.response_protocol_number);
+    assert!(tx.response_status_number.eq_num(200));
+}
+
+// Evader Tests
+#[test]
+fn HttpEvader017() {
+    let mut t = Test::new_with_callbacks();
+    assert!(t.run_file("http-evader-017.t").is_ok());
+    let tx = t.connp.tx(0).unwrap();
+    assert_evader_request!(tx, "/chunked/eicar.txt/cr-size");
+    assert_evader_response!(tx);
+    assert_response_header_eq!(tx, "transfer-encoding", "chunked");
+    assert_eq!(68, tx.response_entity_len);
+    assert_eq!(101, tx.response_message_len);
+    let user_data = tx.user_data::<MainUserData>().unwrap();
+    assert!(user_data.request_data.is_empty());
+    assert_eq!(5, user_data.response_data.len());
+    assert_eq!(
+        b"X5O!P%@AP[4\\PZX".as_ref(),
+        user_data.response_data[0].as_slice()
+    );
+    assert_eq!(
+        b"54(P^)7CC)7}$EI".as_ref(),
+        user_data.response_data[1].as_slice()
+    );
+    assert_eq!(
+        b"CAR-STANDARD-AN".as_ref(),
+        user_data.response_data[2].as_slice()
+    );
+    assert_eq!(
+        b"TIVIRUS-TEST-FI".as_ref(),
+        user_data.response_data[3].as_slice()
+    );
+    assert_eq!(b"LE!$H+H*".as_ref(), user_data.response_data[4].as_slice());
+    assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress);
+    assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress);
+}
+
+#[test]
+fn HttpEvader018() {
+    let mut t = Test::new_with_callbacks();
+    assert!(t.run_file("http-evader-018.t").is_ok());
+    let tx = t.connp.tx(0).unwrap();
+    assert_evader_request!(tx, "/chunked/eicar.txt/lf-size");
+    assert_evader_response!(tx);
+    assert_response_header_eq!(tx, "transfer-encoding", "chunked");
+    assert_eq!(68, tx.response_entity_len);
+    assert_eq!(101, tx.response_message_len);
+    let user_data = tx.user_data::<MainUserData>().unwrap();
+    assert!(user_data.request_data.is_empty());
+    assert_eq!(5, user_data.response_data.len());
+    assert_eq!(
+        b"X5O!P%@AP[4\\PZX".as_ref(),
+        user_data.response_data[0].as_slice()
+    );
+    assert_eq!(
+        b"54(P^)7CC)7}$EI".as_ref(),
+        user_data.response_data[1].as_slice()
+    );
+    assert_eq!(
+        b"CAR-STANDARD-AN".as_ref(),
+        user_data.response_data[2].as_slice()
+    );
+    assert_eq!(
+        b"TIVIRUS-TEST-FI".as_ref(),
+        user_data.response_data[3].as_slice()
+    );
+    assert_eq!(b"LE!$H+H*".as_ref(), user_data.response_data[4].as_slice());
+    assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress);
+    assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress);
+}
+
+#[test]
+fn HttpEvader044() {
+    let mut t = Test::new_with_callbacks();
+    assert!(t.run_file("http-evader-044.t").is_ok());
+    let tx = t.connp.tx(0).unwrap();
+    assert_evader_request!(tx, "/chunked/eicar.txt/chunked,http10,do_clen");
+    assert_eq!(HtpProtocol::V1_0, tx.response_protocol_number);
+    assert!(tx.response_status_number.eq_num(200));
+    assert_response_header_eq!(tx, "content-type", "application/octet-stream");
+    assert_response_header_eq!(
+        tx,
+        "content-disposition",
+        "attachment; filename=\"eicar.txt\""
+    );
+    assert_response_header_eq!(tx, "transfer-encoding", "chunked");
+    assert_response_header_eq!(tx, "connection", "close");
+    assert_eq!(68, tx.response_entity_len);
+    assert_eq!(68, tx.response_message_len);
+    let user_data = tx.user_data::<MainUserData>().unwrap();
+    assert!(user_data.request_data.is_empty());
+    assert_eq!(1, user_data.response_data.len());
+    let chunk = &user_data.response_data[0];
+    assert_eq!(
+        b"X5O!P%@AP[4\\PZX54(P^)7CC)7}$EICAR-STANDARD-ANTIVIRUS-TEST-FILE!$H+H*".as_ref(),
+        chunk.as_slice()
+    );
+    assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress);
+    assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress);
+}
+
+#[test]
+fn HttpEvader059() {
+    let mut t = Test::new_with_callbacks();
+    assert!(t.run_file("http-evader-059.t").is_ok());
+    let tx = t.connp.tx(0).unwrap();
+    assert_evader_request!(tx, "/chunked/eicar.txt/chunkednl-");
+    assert_evader_response!(tx);
+    assert_evader_chunked!(tx);
+}
+
+#[test]
+fn HttpEvader060() {
+    let mut t = Test::new_with_callbacks();
+    assert!(t.run_file("http-evader-060.t").is_ok());
+    let tx = t.connp.tx(0).unwrap();
+    assert_evader_request!(tx, "/chunked/eicar.txt/nl-nl-chunked");
+    assert_evader_response!(tx);
+    assert_evader_chunked!(tx);
+}
+
+#[test]
+fn HttpEvader061() {
+    let mut t = Test::new_with_callbacks();
+    assert!(t.run_file("http-evader-061.t").is_ok());
+    let tx = t.connp.tx(0).unwrap();
+    assert_evader_request!(tx, "/chunked/eicar.txt/nl-nl-chunked-nl-");
+    assert_evader_response!(tx);
+    assert_evader_chunked!(tx);
+}
+#[test]
+fn HttpEvader078() {
+    let mut t = Test::new_with_callbacks();
+    assert!(t.run_file("http-evader-078.t").is_ok());
+    let tx = t.connp.tx(0).unwrap();
+    assert_evader_request!(tx, "/chunked/eicar.txt/chunkedcr-,do_clen");
+    assert_evader_response!(tx);
+    assert_response_header_eq!(tx, "transfer-encoding", "chunked");
+    assert_eq!(68, tx.response_entity_len);
+    assert_eq!(68, tx.response_message_len);
+    let user_data = tx.user_data::<MainUserData>().unwrap();
+    assert!(user_data.request_data.is_empty());
+    assert_eq!(1, user_data.response_data.len());
+    let chunk = &user_data.response_data[0];
+    assert_eq!(
+        b"X5O!P%@AP[4\\PZX54(P^)7CC)7}$EICAR-STANDARD-ANTIVIRUS-TEST-FILE!$H+H*".as_ref(),
+        chunk.as_slice()
+    );
+    assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress);
+    assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress);
+}
+
+#[test]
+fn HttpEvader130() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("http-evader-130.t").is_err());
+    let tx = t.connp.tx(0).unwrap();
+    assert_evader_request!(
+        tx,
+        "/compressed/eicar.txt/ce%3Adeflate-nl-,-nl-deflate-nl-;deflate;deflate"
+    );
+    assert_evader_response!(tx);
+    assert_response_header_eq!(tx, "Content-Encoding", "deflate , deflate");
+    assert_response_header_eq!(tx, "Content-Length", "75");
+    assert_eq!(68, tx.response_entity_len);
+    assert_eq!(76, tx.response_message_len);
+}
+
+#[test]
+fn HttpEvader195() {
+    let mut t = Test::new_with_callbacks();
+    assert!(t.run_file("http-evader-195.t").is_ok());
+    let tx = t.connp.tx(0).unwrap();
+    assert_evader_request!(
+        tx,
+        "/compressed/eicar.txt/ce%3Agzip;gzip;replace%3A3,1%7C02;replace%3A10,0=0000"
+    );
+    assert_response_header_eq!(tx, "Content-Encoding", "gzip");
+    assert_eq!(68, tx.response_entity_len);
+    assert_eq!(90, tx.response_message_len);
+    let user_data = tx.user_data::<MainUserData>().unwrap();
+    assert!(user_data.request_data.is_empty());
+    assert_eq!(1, user_data.response_data.len());
+    assert_eq!(
+        user_data.response_data[0].as_slice(),
+        b"X5O!P%@AP[4\\PZX54(P^)7CC)7}$EICAR-STANDARD-ANTIVIRUS-TEST-FILE!$H+H*".as_ref()
+    );
+}
+
+#[test]
+fn HttpEvader274() {
+    let mut t = Test::new_with_callbacks();
+    assert!(t.run_file("http-evader-274.t").is_ok());
+    let tx = t.connp.tx(0).unwrap();
+    assert_evader_request!(tx, "/broken/eicar.txt/somehdr;space;chunked");
+    assert_evader_response!(tx);
+    assert_evader_chunked!(tx);
+}
+
+#[test]
+fn HttpEvader284() {
+    let mut t = Test::new_with_callbacks();
+    assert!(t.run_file("http-evader-284.t").is_ok());
+    let tx = t.connp.tx(0).unwrap();
+    assert_evader_request!(tx, "/broken/eicar.txt/cr;chunked");
+    assert_evader_response!(tx);
+    assert_evader_chunked!(tx);
+}
+
+#[test]
+fn HttpEvader286() {
+    let mut t = Test::new_with_callbacks();
+    assert!(t.run_file("http-evader-286.t").is_ok());
+    let tx = t.connp.tx(0).unwrap();
+    assert_evader_request!(tx, "/broken/eicar.txt/crcronly;chunked");
+    assert_evader_response!(tx);
+    assert_evader_chunked!(tx);
+}
+
+#[test]
+fn HttpEvader287() {
+    let mut t = Test::new_with_callbacks();
+    assert!(t.run_file("http-evader-287.t").is_ok());
+    let tx = t.connp.tx(0).unwrap();
+    assert_evader_request!(tx, "/broken/eicar.txt/cr-cronly;chunked");
+    assert_evader_response!(tx);
+    assert_evader_chunked!(tx);
+}
+
+#[test]
+fn HttpEvader297() {
+    let mut t = Test::new_with_callbacks();
+    assert!(t.run_file("http-evader-297.t").is_ok());
+    let tx = t.connp.tx(0).unwrap();
+    assert_evader_request!(tx, "/broken/eicar.txt/te%5C015%5C040%3Achunked;do_chunked");
+    assert_evader_response!(tx);
+    assert_evader_chunked!(tx);
+}
+
+#[test]
+fn HttpEvader300() {
+    let mut t = Test::new_with_callbacks();
+    assert!(t.run_file("http-evader-300.t").is_ok());
+    let tx = t.connp.tx(0).unwrap();
+    assert_evader_request!(tx, "/broken/eicar.txt/te%5C015%5C012%5C040%5C015%5C012%5C040%3A%5C015%5C012%5C040chunked;do_chunked");
+    assert_evader_response!(tx);
+    assert_evader_chunked!(tx);
+}
+
+#[test]
+fn HttpEvader303() {
+    let mut t = Test::new_with_callbacks();
+    assert!(t.run_file("http-evader-303.t").is_ok());
+    let tx = t.connp.tx(0).unwrap();
+    assert_evader_request!(tx, "/broken/eicar.txt/te%3A%5C000chunked;do_chunked");
+    assert_evader_response!(tx);
+    assert_evader_chunked!(tx);
+}
+
+#[test]
+fn HttpEvader307() {
+    let mut t = Test::new_with_callbacks();
+    assert!(t.run_file("http-evader-307.t").is_ok());
+    let tx = t.connp.tx(0).unwrap();
+    assert_evader_request!(tx, "/broken/eicar.txt/te%3A%5C012%5C000chunked;do_chunked");
+    assert_evader_response!(tx);
+    assert_evader_chunked!(tx);
+}
+
+#[test]
+fn HttpEvader318() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("http-evader-318.t").is_err());
+    let tx = t.connp.tx(0).unwrap();
+    assert_evader_request!(tx, "/broken/eicar.txt/ce%5C015%5C012%5C040%3Agzip;do_gzip");
+    assert_evader_response!(tx);
+    assert_response_header_eq!(tx, "Content-Encoding", "gzip");
+    assert_eq!(68, tx.response_entity_len);
+    assert_eq!(89, tx.response_message_len);
+}
+
+#[test]
+fn HttpEvader320() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("http-evader-320.t").is_err());
+    let tx = t.connp.tx(0).unwrap();
+    assert_evader_request!(tx, "/broken/eicar.txt/ce%5C013%3Agzip;do_gzip");
+    assert_evader_response!(tx);
+    assert_response_header_eq!(tx, "Content-Encoding", "gzip");
+    assert_response_header_eq!(tx, "Content-Length", "88");
+    assert_eq!(88, tx.response_entity_len);
+    assert_eq!(99, tx.response_message_len);
+}
+
+#[test]
+fn HttpEvader321() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("http-evader-321.t").is_err());
+    let tx = t.connp.tx(0).unwrap();
+    assert_evader_request!(tx, "/broken/eicar.txt/ce%5C014%3Agzip;do_gzip");
+    assert_evader_response!(tx);
+    assert_response_header_eq!(tx, "Content-Encoding", "gzip");
+    assert_response_header_eq!(tx, "Content-Length", "88");
+    assert_eq!(88, tx.response_entity_len);
+    assert_eq!(99, tx.response_message_len);
+}
+
+#[test]
+fn HttpEvader390() {
+    let mut t = Test::new_with_callbacks();
+    assert!(t.run_file("http-evader-390.t").is_ok());
+    let tx = t.connp.tx(0).unwrap();
+    assert_evader_request!(
+        tx,
+        "/broken/eicar.txt/status%3A%5C000HTTP/1.1%28space%29200%28space%29ok;chunked"
+    );
+    assert_evader_response!(tx);
+    assert_evader_chunked!(tx);
+}
+
+#[test]
+fn HttpEvader402() {
+    let mut t = Test::new_with_callbacks();
+    assert!(t.run_file("http-evader-402.t").is_ok());
+    let tx = t.connp.tx(0).unwrap();
+    assert_evader_request!(tx, "/broken/eicar.txt/chunked;cr-no-crlf;end-crlflf");
+    assert_evader_response!(tx);
+    assert_evader_chunked!(tx);
+}
+
+#[test]
+fn HttpEvader405() {
+    let mut t = Test::new_with_callbacks();
+    assert!(t.run_file("http-evader-405.t").is_ok());
+    let tx = t.connp.tx(0).unwrap();
+    assert_evader_request!(tx, "/broken/eicar.txt/chunked;lfcr-no-crlf;end-crlfcrlf");
+    assert_evader_response!(tx);
+    assert_evader_chunked!(tx);
+}
+
+#[test]
+fn HttpEvader411() {
+    let mut t = Test::new_with_callbacks();
+    assert!(t.run_file("http-evader-411.t").is_ok());
+    let tx = t.connp.tx(0).unwrap();
+    assert_evader_request!(tx, "/broken/eicar.txt/end-lfcrcrlf;chunked");
+    assert_evader_response!(tx);
+    assert_evader_chunked!(tx);
+}
+
+#[test]
+fn HttpEvader416() {
+    let mut t = Test::new_with_callbacks();
+    assert!(t.run_file("http-evader-416.t").is_ok());
+    let tx = t.connp.tx(0).unwrap();
+    assert_evader_request!(tx, "/broken/eicar.txt/end-lf%5C040lf");
+    assert_evader_response!(tx);
+    assert_response_header_eq!(tx, "Content-length", "68");
+    assert_eq!(69, tx.response_message_len);
+    assert_eq!(69, tx.response_entity_len);
+    let user_data = tx.user_data::<MainUserData>().unwrap();
+    assert!(user_data.request_data.is_empty());
+    assert_eq!(2, user_data.response_data.len());
+    assert_eq!(
+        b"X5O!P%@AP[4\\PZX54(P^)7CC)7}$EICAR-STANDARD-ANTIVIRUS-TEST-FILE!$H+H*".as_ref(),
+        user_data.response_data[0].as_slice()
+    );
+    assert_eq!(b"\n".as_ref(), user_data.response_data[1].as_slice());
+    assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress);
+    assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress);
+}
+
+#[test]
+fn HttpEvader419() {
+    let mut t = Test::new_with_callbacks();
+    assert!(t.run_file("http-evader-419.t").is_ok());
+    let tx = t.connp.tx(0).unwrap();
+    assert_evader_request!(tx, "/broken/eicar.txt/chunked;end-lf%5C040lf");
+    assert_evader_response!(tx);
+    assert_evader_chunked!(tx);
+}
+
+#[test]
+fn HttpEvader423() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("http-evader-423.t").is_err());
+    let tx = t.connp.tx(0).unwrap();
+    assert_evader_request!(tx, "/broken/eicar.txt/gzip;end-lf%5C040lflf");
+    assert_evader_response!(tx);
+    assert_response_header_eq!(tx, "Content-Encoding", "gzip");
+    assert_response_header_eq!(tx, "Content-length", "88");
+    assert_eq!(89, tx.response_message_len);
+    assert_eq!(68, tx.response_entity_len);
+}
+
+#[test]
+fn RequestGap() {
+    let mut t = Test::new_with_callbacks();
+    assert!(t.run_file("120-request-gap.t").is_ok());
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+    let user_data = tx.user_data::<MainUserData>().unwrap();
+
+    assert!(tx.flags.is_set(HtpFlags::REQUEST_MISSING_BYTES));
+
+    // The interim header from the 100 response should not be among the final headers.
+    assert!(tx.request_headers.get_nocase_nozero("Header1").is_none());
+    assert_eq!(user_data.request_data[1].as_slice(), b"<? echo ".as_ref());
+    // Next chunk is a gap of size 5
+    assert_eq!(user_data.request_data[2].as_slice(), b"".as_ref());
+    assert_eq!(user_data.request_data[2].capacity(), 5);
+    assert_eq!(user_data.request_data[3].as_slice(), b"; ?>".as_ref());
+}
+
+#[test]
+fn ResponseGap() {
+    let mut t = Test::new_with_callbacks();
+    assert!(t.run_file("121-response-gap.t").is_ok());
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+    let user_data = tx.user_data::<MainUserData>().unwrap();
+
+    assert!(tx.flags.is_set(HtpFlags::RESPONSE_MISSING_BYTES));
+
+    assert_eq!(user_data.response_data[0].as_slice(), b"Hell".as_ref());
+    // Next chunk is a gap of size 4
+    assert_eq!(user_data.response_data[1].as_slice(), b"".as_ref());
+    assert_eq!(user_data.response_data[1].capacity(), 4);
+    assert_eq!(user_data.response_data[2].as_slice(), b"rld!".as_ref());
+}
+
+#[test]
+fn ResponseBodyData() {
+    let mut t = Test::new_with_callbacks();
+    assert!(t.run_file("122-response-body-data.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+    assert!(tx.is_complete());
+
+    let user_data = tx.user_data::<MainUserData>().unwrap();
+    let response_data = &user_data.response_data;
+    assert_eq!(3, response_data.len());
+    assert_eq!(b"1\n", response_data[0].as_slice());
+    assert_eq!(b"23\n", response_data[1].as_slice());
+    assert_eq!(b"4", response_data[2].as_slice());
+}
+
+#[test]
+fn ResponseHeaderParsing() {
+    let mut t = Test::new(TestConfig());
+    assert!(t.run_file("123-response-header-bug.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).expect("expected tx to exist");
+
+    let actual: Vec<(&[u8], &[u8])> = (&tx.response_headers)
+        .into_iter()
+        .map(|val| (val.name.as_slice(), val.value.as_slice()))
+        .collect();
+
+    let expected: Vec<(&[u8], &[u8])> = [
+        ("Date", "Mon, 31 Aug 2009 20:25:50 GMT"),
+        ("Server", "Apache"),
+        ("Connection", "close"),
+        ("Content-Type", "text/html"),
+        ("Content-Length", "12"),
+    ]
+    .iter()
+    .map(|(key, val)| (key.as_bytes(), val.as_bytes()))
+    .collect();
+    assert_eq!(
+        actual,
+        expected,
+        "{:?} != {:?}",
+        actual
+            .clone()
+            .into_iter()
+            .map(|(key, val)| (
+                String::from_utf8_lossy(key).to_string(),
+                String::from_utf8_lossy(val).to_string()
+            ))
+            .collect::<Vec<(String, String)>>(),
+        expected
+            .clone()
+            .into_iter()
+            .map(|(key, val)| (
+                String::from_utf8_lossy(key).to_string(),
+                String::from_utf8_lossy(val).to_string()
+            ))
+            .collect::<Vec<(String, String)>>(),
+    );
+}
+
+#[test]
+fn RequestSingleBytes() {
+    // Test input fed in one byte at a time
+    let input = b" GET / HTTP/1.0\r\nUser-Agent: Test/1.0\r\n\r\n";
+    let mut t = Test::new_with_callbacks();
+    t.open_connection(None);
+    for x in 0..input.len() {
+        t.connp
+            .request_data(ParserData::from(&input[x..(x + 1)]), None);
+    }
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+    let h = tx.request_headers.get_nocase_nozero("User-Agent").unwrap();
+    assert!(h.value.eq_slice(b"Test/1.0"));
+}
+
+#[test]
+fn ResponseIncomplete() {
+    let mut t = Test::new_with_callbacks();
+    assert!(t.run_file("124-response-incomplete.t").is_ok());
+
+    assert_eq!(1, t.connp.tx_size());
+
+    let tx = t.connp.tx(0).unwrap();
+    assert!(tx.is_complete());
+
+    let user_data = tx.user_data::<MainUserData>().unwrap();
+
+    assert_eq!(
+        vec![
+            "request_start 0",
+            "response_start 0",
+            "request_complete 0",
+            "response_complete 0",
+            "transaction_complete 0"
+        ],
+        user_data.order
+    );
+}
+
+#[test]
+fn RandomInput() {
+    let mut t = Test::new(TestConfig());
+    if let Ok(file) = std::env::var("LIBHTP_TEST") {
+        t.run_file(&file).ok();
+        println!("{:#?}", t.connp);
+        for x in 0..t.connp.tx_size() {
+            println!("{:#?}", t.connp.tx(x));
+        }
+    }
+}
diff --git a/rust/htp/src/test/mod.rs b/rust/htp/src/test/mod.rs
new file mode 100644 (file)
index 0000000..bc2fb69
--- /dev/null
@@ -0,0 +1,8 @@
+/// helper for tests
+pub mod common;
+/// gunzip tests
+pub mod gunzip;
+/// hybrid tests
+pub mod hybrid;
+/// main tests
+pub mod main;
diff --git a/rust/htp/src/transaction.rs b/rust/htp/src/transaction.rs
new file mode 100644 (file)
index 0000000..fd60c7b
--- /dev/null
@@ -0,0 +1,985 @@
+use crate::{
+    bstr::Bstr,
+    config::{Config, HtpUnwanted},
+    connection_parser::ParserData,
+    decompressors::{Decompressor, HtpContentEncoding},
+    error::Result,
+    headers::{Parser as HeaderParser, Side},
+    hook::DataHook,
+    log::Logger,
+    parsers::{parse_authorization, parse_content_length, parse_content_type, parse_hostport},
+    request::HtpMethod,
+    uri::Uri,
+    util::{validate_hostname, FlagOperations, HtpFlags},
+    HtpStatus,
+};
+
+use std::any::Any;
+#[cfg(test)]
+use std::cmp::Ordering;
+
+#[derive(Debug, Clone)]
+/// This structure is used to pass transaction data (for example
+/// request and response body buffers) to callbacks.
+pub struct Data<'a> {
+    /// Transaction pointer.
+    tx: *mut Transaction,
+    /// Ref to the parser data.
+    data: &'a ParserData<'a>,
+}
+
+impl<'a> Data<'a> {
+    /// Construct a new Data.
+    pub(crate) fn new(tx: *mut Transaction, data: &'a ParserData<'a>) -> Self {
+        Self { tx, data }
+    }
+
+    /// Returns the transaction associated with the Data.
+    pub(crate) fn tx(&self) -> *mut Transaction {
+        self.tx
+    }
+
+    /// Returns a pointer to the raw data associated with Data.
+    pub(crate) fn data(&self) -> *const u8 {
+        self.data.data_ptr()
+    }
+
+    /// Returns the length of the data.
+    pub(crate) fn len(&self) -> usize {
+        self.data.len()
+    }
+
+    /// Determine whether this data is empty.
+    pub(crate) fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+
+    /// Returns a reference to the internal ParserData struct.
+    pub(crate) fn parser_data(&self) -> &ParserData {
+        self.data
+    }
+}
+
+/// Enumerates the possible request and response body codings.
+#[repr(C)]
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+pub(crate) enum HtpTransferCoding {
+    /// Body coding not determined yet.
+    Unknown,
+    /// No body.
+    NoBody,
+    /// Identity coding is used, which means that the body was sent as is.
+    Identity,
+    /// Chunked encoding.
+    Chunked,
+    /// We could not recognize the encoding.
+    Invalid,
+}
+
+/// Enumerates the possible server personalities.
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+pub(crate) enum HtpResponseNumber {
+    /// Default
+    Unknown,
+    /// Could not resolve response number
+    Invalid,
+    /// Valid response number
+    Valid(u16),
+}
+
+impl HtpResponseNumber {
+    /// Determine if the response status number is in the given range.
+    pub(crate) fn in_range(self, min: u16, max: u16) -> bool {
+        use HtpResponseNumber::*;
+        match self {
+            Unknown | Invalid => false,
+            Valid(ref status) => status >= &min && status <= &max,
+        }
+    }
+
+    /// Determine if the response status number matches the
+    /// given status number.
+    pub(crate) fn eq_num(self, num: u16) -> bool {
+        use HtpResponseNumber::*;
+        match self {
+            Unknown | Invalid => false,
+            Valid(ref status) => status == &num,
+        }
+    }
+}
+
+/// Represents a single request or response header.
+#[derive(Clone, Debug)]
+pub struct Header {
+    /// Header name.
+    pub name: Bstr,
+    /// Header value.
+    pub value: Bstr,
+    /// Parsing flags; a combination of: HTP_FIELD_INVALID, HTP_FIELD_FOLDED, HTP_FIELD_REPEATED.
+    pub flags: u64,
+}
+
+/// Table of request or response headers.
+#[derive(Clone, Debug)]
+pub struct Headers {
+    /// Entries in the table.
+    pub elements: Vec<Header>,
+}
+
+impl Headers {
+    /// Make a new owned Headers Table with given capacity
+    pub(crate) fn with_capacity(size: usize) -> Self {
+        Self {
+            elements: Vec::with_capacity(size),
+        }
+    }
+
+    /// Search the Headers table for the first tuple with a tuple key matching the given slice, ignoring ascii case and any zeros in self
+    ///
+    /// Returns None if no match is found.
+    pub(crate) fn get_nocase_nozero<K: AsRef<[u8]>>(&self, key: K) -> Option<&Header> {
+        self.elements
+            .iter()
+            .find(|x| x.name.cmp_nocase_nozero(key.as_ref()))
+    }
+
+    /// Search the Headers table for the first tuple with a tuple key matching the given slice, ignoring ascii case and any zeros in self
+    ///
+    /// Returns None if no match is found.
+    pub(crate) fn get_nocase_nozero_mut<K: AsRef<[u8]>>(&mut self, key: K) -> Option<&mut Header> {
+        self.elements
+            .iter_mut()
+            .find(|x| x.name.cmp_nocase_nozero(key.as_ref()))
+    }
+
+    /// Search the Headers table for the first tuple with a key matching the given slice, ingnoring ascii case in self
+    ///
+    /// Returns None if no match is found.
+    pub(crate) fn get_nocase_mut<K: AsRef<[u8]>>(&mut self, key: K) -> Option<&mut Header> {
+        self.elements
+            .iter_mut()
+            .find(|x| x.name.cmp_nocase(key.as_ref()))
+    }
+
+    /// Search the Headers table for the first tuple with a key matching the given slice, ingnoring ascii case in self
+    ///
+    /// Returns None if no match is found.
+    pub(crate) fn get_nocase<K: AsRef<[u8]>>(&self, key: K) -> Option<&Header> {
+        self.elements
+            .iter()
+            .find(|x| x.name.cmp_nocase(key.as_ref()))
+    }
+
+    /// Returns the number of elements in the Headers table
+    pub(crate) fn size(&self) -> usize {
+        self.elements.len()
+    }
+}
+
+impl<'a> IntoIterator for &'a Headers {
+    type Item = &'a Header;
+    type IntoIter = std::slice::Iter<'a, Header>;
+
+    fn into_iter(self) -> std::slice::Iter<'a, Header> {
+        self.elements.iter()
+    }
+}
+
+impl IntoIterator for Headers {
+    type Item = Header;
+    type IntoIter = std::vec::IntoIter<Header>;
+
+    fn into_iter(self) -> std::vec::IntoIter<Header> {
+        self.elements.into_iter()
+    }
+}
+
+impl Header {
+    /// Construct a new header.
+    #[cfg(test)]
+    pub(crate) fn new(name: Bstr, value: Bstr) -> Self {
+        Self::new_with_flags(name, value, 0)
+    }
+
+    /// Construct a new header with flags.
+    pub(crate) fn new_with_flags(name: Bstr, value: Bstr, flags: u64) -> Self {
+        Self { name, value, flags }
+    }
+}
+
+/// Possible states of a progressing transaction. Internally, progress will change
+/// to the next state when the processing activities associated with that state
+/// begin. For example, when we start to process request line bytes, the request
+/// state will change from NOT_STARTED to LINE.*
+#[repr(C)]
+#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Debug)]
+pub enum HtpResponseProgress {
+    /// Default state.
+    NOT_STARTED,
+    /// Response Line.
+    LINE,
+    /// Response Headers.
+    HEADERS,
+    /// Response Body.
+    BODY,
+    /// Trailer data.
+    TRAILER,
+    /// Response completed.
+    COMPLETE,
+    /// Error involving response side of transaction.
+    ERROR,
+    /// Response gap.
+    GAP,
+}
+
+/// Possible states of a progressing transaction. Internally, progress will change
+/// to the next state when the processing activities associated with that state
+/// begin. For example, when we start to process request line bytes, the request
+/// state will change from NOT_STARTED to LINE.*
+#[repr(C)]
+#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Debug)]
+pub enum HtpRequestProgress {
+    /// Default state.
+    NOT_STARTED,
+    /// In request line state.
+    LINE,
+    /// In request headers state.
+    HEADERS,
+    /// In request body state.
+    BODY,
+    /// Trailer data.
+    TRAILER,
+    /// Request is completed.
+    COMPLETE,
+    /// Error involving request side of transaction.
+    ERROR,
+    /// In request gap state.
+    GAP,
+}
+
+/// Enumerates the possible values for authentication type.
+#[repr(C)]
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+pub enum HtpAuthType {
+    /// This is the default value that is used before
+    /// the presence of authentication is determined (e.g.,
+    /// before request headers are seen).
+    Unknown,
+    /// No authentication.
+    NONE,
+    /// HTTP Basic authentication used.
+    BASIC,
+    /// HTTP Digest authentication used.
+    DIGEST,
+    /// HTTP Bearer authentication used.
+    BEARER,
+    /// Unrecognized authentication method.
+    UNRECOGNIZED = 9,
+    /// Error retrieving the auth type.
+    ERROR,
+}
+
+/// Protocol version constants.
+#[repr(C)]
+#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Debug)]
+pub enum HtpProtocol {
+    /// Error with the transaction side.
+    Error = -3,
+    /// Could not resolve protocol version number.
+    Invalid = -2,
+    /// Default protocol value.
+    Unknown = -1,
+    /// HTTP/0.9 version.
+    V0_9 = 9,
+    /// HTTP/1.0 version.
+    V1_0 = 100,
+    /// HTTP/1.1 version.
+    V1_1 = 101,
+}
+
+/// Represents a single HTTP transaction, which is a combination of a request and a response.
+pub struct Transaction {
+    /// The logger structure associated with this transaction
+    pub(crate) logger: Logger,
+    /// The configuration structure associated with this transaction.
+    pub(crate) cfg: &'static Config,
+    /// The user data associated with this transaction.
+    pub(crate) user_data: Option<Box<dyn Any>>,
+    // Request fields
+    /// Contains a count of how many empty lines were skipped before the request line.
+    pub(crate) request_ignored_lines: u32,
+    /// The first line of this request.
+    pub(crate) request_line: Option<Bstr>,
+    /// Request method.
+    pub(crate) request_method: Option<Bstr>,
+    /// Request method, as number. Available only if we were able to recognize the request method.
+    pub(crate) request_method_number: HtpMethod,
+    /// Request URI, raw, as given to us on the request line. This field can take different forms,
+    /// for example authority for CONNECT methods, absolute URIs for proxy requests, and the query
+    /// string when one is provided. Use Transaction::parsed_uri if you need to access to specific
+    /// URI elements. Can be NULL if the request line contains only a request method (which is
+    /// an extreme case of HTTP/0.9, but passes in practice.
+    pub(crate) request_uri: Option<Bstr>,
+    /// Request protocol, as text. Can be NULL if no protocol was specified.
+    pub(crate) request_protocol: Option<Bstr>,
+    /// Protocol version as a number. Multiply the high version number by 100, then add the low
+    /// version number. You should prefer to work the pre-defined HtpProtocol constants.
+    pub(crate) request_protocol_number: HtpProtocol,
+    /// Is this request using HTTP/0.9? We need a separate field for this purpose because
+    /// the protocol version alone is not sufficient to determine if HTTP/0.9 is used. For
+    /// example, if you submit "GET / HTTP/0.9" to Apache, it will not treat the request
+    /// as HTTP/0.9.
+    pub(crate) is_protocol_0_9: bool,
+    /// This structure holds the individual components parsed out of the request URI, with
+    /// appropriate normalization and transformation applied, per configuration. No information
+    /// is added. In extreme cases when no URI is provided on the request line, all fields
+    /// will be NULL. (Well, except for port_number, which will be -1.) To inspect raw data, use
+    /// Transaction::request_uri or Transaction::parsed_uri_raw.
+    pub(crate) parsed_uri: Option<Uri>,
+    /// This structure holds the individual components parsed out of the request URI, but
+    /// without any modification. The purpose of this field is to allow you to look at the data as it
+    /// was supplied on the request line. Fields can be NULL, depending on what data was supplied.
+    /// The port_number field is always -1.
+    pub(crate) parsed_uri_raw: Option<Uri>,
+    ///  This structure holds the whole normalized uri, including path, query, fragment, scheme, username, password, hostname, and port
+    pub(crate) complete_normalized_uri: Option<Bstr>,
+    ///  This structure holds the normalized uri, including path, query, and fragment
+    pub(crate) partial_normalized_uri: Option<Bstr>,
+    /// HTTP 1.1 RFC
+    ///
+    /// 4.3 Message Body
+    ///
+    /// The message-body (if any) of an HTTP message is used to carry the
+    /// entity-body associated with the request or response. The message-body
+    /// differs from the entity-body only when a transfer-coding has been
+    /// applied, as indicated by the Transfer-Encoding header field (section
+    /// 14.41).
+    ///
+    /// ```text
+    ///     message-body = entity-body
+    ///                  | <entity-body encoded as per Transfer-Encoding>
+    /// ```
+    ///
+    /// The length of the request message-body. In most cases, this value
+    /// will be the same as request_entity_len. The values will be different
+    /// if request compression or chunking were applied. In that case,
+    /// request_message_len contains the length of the request body as it
+    /// has been seen over TCP; request_entity_len contains length after
+    /// de-chunking and decompression.
+    pub(crate) request_message_len: u64,
+    /// The length of the request entity-body. In most cases, this value
+    /// will be the same as request_message_len. The values will be different
+    /// if request compression or chunking were applied. In that case,
+    /// request_message_len contains the length of the request body as it
+    /// has been seen over TCP; request_entity_len contains length after
+    /// de-chunking and decompression.
+    pub(crate) request_entity_len: u64,
+    /// Parsed request headers.
+    pub(crate) request_headers: Headers,
+    /// Request transfer coding. Can be one of UNKNOWN (body presence not
+    /// determined yet), IDENTITY, CHUNKED, NO_BODY,
+    /// and UNRECOGNIZED.
+    pub(crate) request_transfer_coding: HtpTransferCoding,
+    /// Request body compression, which indicates if compression is used
+    /// for the request body. This field is an interpretation of the information
+    /// available in request headers.
+    pub(crate) request_content_encoding: HtpContentEncoding,
+    /// Request body compression processing information, which is related to how
+    /// the library is going to process (or has processed) a request body. Changing
+    /// this field mid-processing can influence library actions. For example, setting
+    /// this field to NONE in a request_headers callback will prevent
+    /// decompression.
+    pub(crate) request_content_encoding_processing: HtpContentEncoding,
+    /// This field will contain the request content type when that information
+    /// is available in request headers. The contents of the field will be converted
+    /// to lowercase and any parameters (e.g., character set information) removed.
+    pub(crate) request_content_type: Option<Bstr>,
+    /// Request decompressor used to decompress request body data.
+    pub(crate) request_decompressor: Option<Decompressor>,
+    /// Contains the value specified in the Content-Length header. The value of this
+    /// field will be None from the beginning of the transaction and until request
+    /// headers are processed. It will stay None if the C-L header was not provided,
+    /// or if the value in it cannot be parsed.
+    pub(crate) request_content_length: Option<u64>,
+    /// Transaction-specific REQUEST_BODY_DATA hook. Behaves as
+    /// the configuration hook with the same name.
+    pub(crate) hook_request_body_data: DataHook,
+    /// Transaction-specific RESPONSE_BODY_DATA hook. Behaves as
+    /// the configuration hook with the same name.
+    pub(crate) hook_response_body_data: DataHook,
+    /// Authentication type used in the request.
+    pub(crate) request_auth_type: HtpAuthType,
+    /// Authentication username.
+    pub(crate) request_auth_username: Option<Bstr>,
+    /// Authentication password. Available only when Transaction::request_auth_type is HTP_AUTH_BASIC.
+    pub(crate) request_auth_password: Option<Bstr>,
+    /// Authentication token. Available only when Transaction::request_auth_type is HTP_AUTH_BEARER.
+    pub(crate) request_auth_token: Option<Bstr>,
+    /// Request hostname. Per the RFC, the hostname will be taken from the Host header
+    /// when available. If the host information is also available in the URI, it is used
+    /// instead of whatever might be in the Host header. Can be NULL. This field does
+    /// not contain port information.
+    pub(crate) request_hostname: Option<Bstr>,
+    /// Request port number, if presented. The rules for Transaction::request_host apply. Set to
+    /// None by default.
+    pub(crate) request_port_number: Option<u16>,
+
+    // Response fields
+    /// How many empty lines did we ignore before reaching the status line?
+    pub(crate) response_ignored_lines: u32,
+    /// Response line.
+    pub(crate) response_line: Option<Bstr>,
+    /// Response protocol, as text. Can be NULL.
+    pub(crate) response_protocol: Option<Bstr>,
+    /// Response protocol as number. Available only if we were able to parse the protocol version,
+    /// INVALID otherwise. UNKNOWN until parsing is attempted.
+    pub(crate) response_protocol_number: HtpProtocol,
+    /// Response status code, as text. Starts as NULL and can remain NULL on
+    /// an invalid response that does not specify status code.
+    pub(crate) response_status: Option<Bstr>,
+    /// Response status code, available only if we were able to parse it, HTP_STATUS_INVALID
+    /// otherwise. HTP_STATUS_UNKNOWN until parsing is attempted.
+    pub(crate) response_status_number: HtpResponseNumber,
+    /// This field is set by the protocol decoder with it thinks that the
+    /// backend server will reject a request with a particular status code.
+    pub(crate) response_status_expected_number: HtpUnwanted,
+    /// The message associated with the response status code. Can be NULL.
+    pub(crate) response_message: Option<Bstr>,
+    /// Have we seen the server respond with a 100 response?
+    pub(crate) seen_100continue: bool,
+    /// Parsed response headers. Contains instances of Header.
+    pub(crate) response_headers: Headers,
+    /// Is this a response a HTTP/2.0 upgrade?
+    pub(crate) is_http_2_upgrade: bool,
+
+    /// HTTP 1.1 RFC
+    ///
+    /// 4.3 Message Body
+    ///
+    /// The message-body (if any) of an HTTP message is used to carry the
+    /// entity-body associated with the request or response. The message-body
+    /// differs from the entity-body only when a transfer-coding has been
+    /// applied, as indicated by the Transfer-Encoding header field (section
+    /// 14.41).
+    ///
+    /// ```text
+    ///     message-body = entity-body
+    ///                  | <entity-body encoded as per Transfer-Encoding>
+    /// ```
+    ///
+    /// The length of the response message-body. In most cases, this value
+    /// will be the same as response_entity_len. The values will be different
+    /// if response compression or chunking were applied. In that case,
+    /// response_message_len contains the length of the response body as it
+    /// has been seen over TCP; response_entity_len contains the length after
+    /// de-chunking and decompression.
+    pub(crate) response_message_len: u64,
+    /// The length of the response entity-body. In most cases, this value
+    /// will be the same as response_message_len. The values will be different
+    /// if request compression or chunking were applied. In that case,
+    /// response_message_len contains the length of the response body as it
+    /// has been seen over TCP; response_entity_len contains length after
+    /// de-chunking and decompression.
+    pub(crate) response_entity_len: u64,
+    /// Contains the value specified in the Content-Length header. The value of this
+    /// field will be -1 from the beginning of the transaction and until response
+    /// headers are processed. It will stay None if the C-L header was not provided,
+    /// or if the value in it cannot be parsed.
+    pub(crate) response_content_length: Option<u64>,
+    /// Response transfer coding, which indicates if there is a response body,
+    /// and how it is transported (e.g., as-is, or chunked).
+    pub(crate) response_transfer_coding: HtpTransferCoding,
+    /// Response body compression, which indicates if compression is used
+    /// for the response body. This field is an interpretation of the information
+    /// available in response headers.
+    pub(crate) response_content_encoding: HtpContentEncoding,
+    /// Response body compression processing information, which is related to how
+    /// the library is going to process (or has processed) a response body. Changing
+    /// this field mid-processing can influence library actions. For example, setting
+    /// this field to NONE in a RESPONSE_HEADERS callback will prevent
+    /// decompression.
+    pub(crate) response_content_encoding_processing: HtpContentEncoding,
+    /// This field will contain the response content type when that information
+    /// is available in response headers. The contents of the field will be converted
+    /// to lowercase and any parameters (e.g., character set information) removed.
+    pub(crate) response_content_type: Option<Bstr>,
+    /// Response decompressor used to decompress response body data.
+    pub(crate) response_decompressor: Option<Decompressor>,
+
+    // Common fields
+    /// Parsing flags; a combination of: HTP_REQUEST_INVALID_T_E, HTP_INVALID_FOLDING,
+    /// HTP_REQUEST_SMUGGLING, HTP_MULTI_PACKET_HEAD, and HTP_FIELD_UNPARSEABLE.
+    pub(crate) flags: u64,
+    /// Request progress.
+    pub(crate) request_progress: HtpRequestProgress,
+    /// Response progress.
+    pub(crate) response_progress: HtpResponseProgress,
+    /// Transaction index on the connection.
+    pub(crate) index: usize,
+    /// Total repetitions for headers in request.
+    pub(crate) request_header_repetitions: u16,
+    /// Total repetitions for headers in response.
+    pub(crate) response_header_repetitions: u16,
+    /// Request header parser
+    pub(crate) request_header_parser: HeaderParser,
+    /// Response header parser
+    pub(crate) response_header_parser: HeaderParser,
+}
+
+impl std::fmt::Debug for Transaction {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("Transaction")
+            .field("request_line", &self.request_line)
+            .field("request_method", &self.request_method)
+            .field("request_method_number", &self.request_method_number)
+            .field("request_uri", &self.request_uri)
+            .field("request_protocol", &self.request_protocol)
+            .field("request_protocol_number", &self.request_protocol_number)
+            .field("is_protocol_0_9", &self.is_protocol_0_9)
+            .field("parsed_uri", &self.parsed_uri)
+            .field("parsed_uri_raw", &self.parsed_uri_raw)
+            .field("complete_normalized_uri", &self.complete_normalized_uri)
+            .field("partial_normalized_uri", &self.partial_normalized_uri)
+            .field("request_message_len", &self.request_message_len)
+            .field("request_entity_len", &self.request_entity_len)
+            .field("request_headers", &self.request_headers)
+            .field("request_transfer_coding", &self.request_transfer_coding)
+            .field("request_content_encoding", &self.request_content_encoding)
+            .field(
+                "request_content_encoding_processing",
+                &self.request_content_encoding_processing,
+            )
+            .field("request_content_type", &self.request_content_type)
+            .field("request_content_length", &self.request_content_length)
+            .field("request_auth_type", &self.request_auth_type)
+            .field("request_auth_username", &self.request_auth_username)
+            .field("request_auth_password", &self.request_auth_password)
+            .field("request_auth_token", &self.request_auth_token)
+            .field("request_hostname", &self.request_hostname)
+            .field("request_port_number", &self.request_port_number)
+            .field("request_ignored_lines", &self.request_ignored_lines)
+            .field("response_ignored_lines", &self.response_ignored_lines)
+            .field("response_line", &self.response_line)
+            .field("response_protocol", &self.response_protocol)
+            .field("response_protocol_number", &self.response_protocol_number)
+            .field("response_status", &self.response_status)
+            .field("response_status_number", &self.response_status_number)
+            .field(
+                "response_status_expected_number",
+                &self.response_status_expected_number,
+            )
+            .field("response_message", &self.response_message)
+            .field("seen_100continue", &self.seen_100continue)
+            .field("response_headers", &self.response_headers)
+            .field("is_http_2_upgrade", &self.is_http_2_upgrade)
+            .field("response_message_len", &self.response_message_len)
+            .field("response_entity_len", &self.response_entity_len)
+            .field("response_content_length", &self.response_content_length)
+            .field("response_transfer_coding", &self.response_transfer_coding)
+            .field("response_content_encoding", &self.response_content_encoding)
+            .field(
+                "response_content_encoding_processing",
+                &self.response_content_encoding_processing,
+            )
+            .field("response_content_type", &self.response_content_type)
+            .field("flags", &self.flags)
+            .field("request_progress", &self.request_progress)
+            .field("response_progress", &self.response_progress)
+            .field("index", &self.index)
+            .field(
+                "request_header_repetitions",
+                &self.request_header_repetitions,
+            )
+            .field(
+                "response_header_repetitions",
+                &self.response_header_repetitions,
+            )
+            .finish()
+    }
+}
+
+impl Transaction {
+    /// Construct a new transaction.
+    pub(crate) fn new(cfg: &'static Config, logger: &Logger, index: usize) -> Self {
+        Self {
+            logger: logger.clone(),
+            cfg,
+            user_data: None,
+            request_ignored_lines: 0,
+            request_line: None,
+            request_method: None,
+            request_method_number: HtpMethod::Unknown,
+            request_uri: None,
+            request_protocol: None,
+            request_protocol_number: HtpProtocol::Unknown,
+            is_protocol_0_9: false,
+            parsed_uri: None,
+            parsed_uri_raw: None,
+            complete_normalized_uri: None,
+            partial_normalized_uri: None,
+            request_message_len: 0,
+            request_entity_len: 0,
+            request_headers: Headers::with_capacity(32),
+            request_transfer_coding: HtpTransferCoding::Unknown,
+            request_content_encoding: HtpContentEncoding::None,
+            request_content_encoding_processing: HtpContentEncoding::None,
+            request_content_type: None,
+            request_content_length: None,
+            request_decompressor: None,
+            hook_request_body_data: DataHook::default(),
+            hook_response_body_data: DataHook::default(),
+            request_auth_type: HtpAuthType::Unknown,
+            request_auth_username: None,
+            request_auth_password: None,
+            request_auth_token: None,
+            request_hostname: None,
+            request_port_number: None,
+            response_ignored_lines: 0,
+            response_line: None,
+            response_protocol: None,
+            response_protocol_number: HtpProtocol::Unknown,
+            response_status: None,
+            response_status_number: HtpResponseNumber::Unknown,
+            response_status_expected_number: HtpUnwanted::Ignore,
+            response_message: None,
+            seen_100continue: false,
+            response_headers: Headers::with_capacity(32),
+            is_http_2_upgrade: false,
+            response_message_len: 0,
+            response_entity_len: 0,
+            response_content_length: None,
+            response_transfer_coding: HtpTransferCoding::Unknown,
+            response_content_encoding: HtpContentEncoding::None,
+            response_content_encoding_processing: HtpContentEncoding::None,
+            response_content_type: None,
+            response_decompressor: None,
+            flags: 0,
+            request_progress: HtpRequestProgress::NOT_STARTED,
+            response_progress: HtpResponseProgress::NOT_STARTED,
+            index,
+            request_header_repetitions: 0,
+            response_header_repetitions: 0,
+            request_header_parser: HeaderParser::new(Side::Request),
+            response_header_parser: HeaderParser::new(Side::Response),
+        }
+    }
+
+    /// Has this transaction started?
+    pub(crate) fn is_started(&self) -> bool {
+        !(self.request_progress == HtpRequestProgress::NOT_STARTED
+            && self.response_progress == HtpResponseProgress::NOT_STARTED)
+    }
+
+    /// Set the user data.
+    pub(crate) fn set_user_data(&mut self, data: Box<dyn Any + 'static>) {
+        self.user_data = Some(data);
+    }
+
+    /// Get a reference to the user data.
+    pub(crate) fn user_data<T: 'static>(&self) -> Option<&T> {
+        self.user_data
+            .as_ref()
+            .and_then(|ud| ud.downcast_ref::<T>())
+    }
+
+    /// Get a mutable reference to the user data.
+    #[cfg(test)]
+    pub(crate) fn user_data_mut<T: 'static>(&mut self) -> Option<&mut T> {
+        self.user_data
+            .as_mut()
+            .and_then(|ud| ud.downcast_mut::<T>())
+    }
+
+    /// Determine if the request has a body.
+    pub(crate) fn request_has_body(&self) -> bool {
+        self.request_transfer_coding == HtpTransferCoding::Identity
+            || self.request_transfer_coding == HtpTransferCoding::Chunked
+    }
+
+    /// Process the extracted request headers and set the appropriate flags
+    pub(crate) fn process_request_headers(&mut self) -> Result<()> {
+        // Determine if we have a request body, and how it is packaged.
+        let cl_opt = self.request_headers.get_nocase_nozero("content-length");
+        // Check for the Transfer-Encoding header, which would indicate a chunked request body.
+        if let Some(te) = self.request_headers.get_nocase_nozero("transfer-encoding") {
+            // Make sure it contains "chunked" only.
+            // TODO The HTTP/1.1 RFC also allows the T-E header to contain "identity", which
+            //      presumably should have the same effect as T-E header absence. However, Apache
+            //      (2.2.22 on Ubuntu 12.04 LTS) instead errors out with "Unknown Transfer-Encoding: identity".
+            //      And it behaves strangely, too, sending a 501 and proceeding to process the request
+            //      (e.g., PHP is run), but without the body. It then closes the connection.
+            if te.value.index_of_nocase_nozero("chunked").is_none() {
+                // Invalid T-E header value.
+                self.request_transfer_coding = HtpTransferCoding::Invalid;
+                self.flags.set(HtpFlags::REQUEST_INVALID_T_E);
+                self.flags.set(HtpFlags::REQUEST_INVALID)
+            } else {
+                // Chunked encoding is a HTTP/1.1 feature, so check that an earlier protocol
+                // version is not used. The flag will also be set if the protocol could not be parsed.
+                //
+                // TODO IIS 7.0, for example, would ignore the T-E header when it
+                //      it is used with a protocol below HTTP 1.1. This should be a
+                //      personality trait.
+                if self.request_protocol_number < HtpProtocol::V1_1 {
+                    self.flags.set(HtpFlags::REQUEST_INVALID_T_E);
+                    self.flags.set(HtpFlags::REQUEST_SMUGGLING);
+                }
+                // If the T-E header is present we are going to use it.
+                self.request_transfer_coding = HtpTransferCoding::Chunked;
+                // We are still going to check for the presence of C-L.
+                if cl_opt.is_some() {
+                    // According to the HTTP/1.1 RFC (section 4.4):
+                    //
+                    // "The Content-Length header field MUST NOT be sent
+                    //  if these two lengths are different (i.e., if a Transfer-Encoding
+                    //  header field is present). If a message is received with both a
+                    //  Transfer-Encoding header field and a Content-Length header field,
+                    //  the latter MUST be ignored."
+                    //
+                    self.flags.set(HtpFlags::REQUEST_SMUGGLING)
+                }
+            }
+        } else if let Some(cl) = cl_opt {
+            // Check for a folded C-L header.
+            if cl.flags.is_set(HtpFlags::FIELD_FOLDED) {
+                self.flags.set(HtpFlags::REQUEST_SMUGGLING)
+            }
+            // Check for multiple C-L headers.
+            if cl.flags.is_set(HtpFlags::FIELD_REPEATED) {
+                self.flags.set(HtpFlags::REQUEST_SMUGGLING)
+                // TODO Personality trait to determine which C-L header to parse.
+                //      At the moment we're parsing the combination of all instances,
+                //      which is bound to fail (because it will contain commas).
+            }
+            // Get the body length.
+            self.request_content_length =
+                parse_content_length(cl.value.as_slice(), Some(&mut self.logger));
+            if self.request_content_length.is_some() {
+                // We have a request body of known length.
+                self.request_transfer_coding = HtpTransferCoding::Identity
+            } else {
+                self.request_transfer_coding = HtpTransferCoding::Invalid;
+                self.flags.set(HtpFlags::REQUEST_INVALID_C_L);
+                self.flags.set(HtpFlags::REQUEST_INVALID)
+            }
+        } else {
+            // No body.
+            self.request_transfer_coding = HtpTransferCoding::NoBody
+        }
+        // If we could not determine the correct body handling,
+        // consider the request invalid.
+        if self.request_transfer_coding == HtpTransferCoding::Unknown {
+            self.request_transfer_coding = HtpTransferCoding::Invalid;
+            self.flags.set(HtpFlags::REQUEST_INVALID)
+        }
+
+        // Determine hostname.
+        // Use the hostname from the URI, when available.
+        if let Some(hostname) = self.get_parsed_uri_hostname() {
+            self.request_hostname = Some(Bstr::from(hostname.as_slice()));
+        }
+
+        if let Some(port_number) = self.get_parsed_uri_port_number() {
+            self.request_port_number = Some(*port_number);
+        }
+        // Examine the Host header.
+        if let Some(header) = self.request_headers.get_nocase_nozero_mut("host") {
+            // Host information available in the headers.
+            if let Ok((_, (hostname, port_nmb, valid))) = parse_hostport(&header.value) {
+                if !valid {
+                    self.flags.set(HtpFlags::HOSTH_INVALID)
+                }
+                // The host information in the headers is valid.
+                // Is there host information in the URI?
+                if self.request_hostname.is_none() {
+                    // There is no host information in the URI. Place the
+                    // hostname from the headers into the parsed_uri structure.
+                    let mut hostname = Bstr::from(hostname);
+                    hostname.make_ascii_lowercase();
+                    self.request_hostname = Some(hostname);
+                    if let Some((_, port)) = port_nmb {
+                        self.request_port_number = port;
+                    }
+                } else {
+                    // The host information appears in the URI and in the headers. The
+                    // HTTP RFC states that we should ignore the header copy.
+                    // Check for different hostnames.
+                    if let Some(host) = &self.request_hostname {
+                        if !host.cmp_nocase(hostname) {
+                            self.flags.set(HtpFlags::HOST_AMBIGUOUS)
+                        }
+                    }
+
+                    if let Some((_, port)) = port_nmb {
+                        // Check for different ports.
+                        if self.request_port_number.is_some() && self.request_port_number != port {
+                            self.flags.set(HtpFlags::HOST_AMBIGUOUS)
+                        }
+                    }
+                }
+            } else if self.request_hostname.is_some() {
+                // Invalid host information in the headers.
+                // Raise the flag, even though the host information in the headers is invalid.
+                self.flags.set(HtpFlags::HOST_AMBIGUOUS)
+            }
+        } else {
+            // No host information in the headers.
+            // HTTP/1.1 requires host information in the headers.
+            if self.request_protocol_number >= HtpProtocol::V1_1 {
+                self.flags.set(HtpFlags::HOST_MISSING)
+            }
+        }
+        // Determine Content-Type.
+        if let Some(ct) = self.request_headers.get_nocase_nozero("content-type") {
+            self.request_content_type = Some(parse_content_type(ct.value.as_slice())?);
+        }
+        // Parse authentication information.
+        parse_authorization(self).or_else(|rc| {
+            if rc == HtpStatus::DECLINED {
+                // Don't fail the stream if an authorization header is invalid, just set a flag.
+                self.flags.set(HtpFlags::AUTH_INVALID);
+                Ok(())
+            } else {
+                Err(rc)
+            }
+        })?;
+        Ok(())
+    }
+
+    /// Sanity check the response line, logging if there is an invalid protocol or status number.
+    pub(crate) fn validate_response_line(&mut self) {
+        // Is the response line valid?
+        if self.response_protocol_number == HtpProtocol::Invalid {
+            htp_warn!(
+                self.logger,
+                HtpLogCode::RESPONSE_LINE_INVALID_PROTOCOL,
+                "Invalid response line: invalid protocol"
+            );
+            self.flags.set(HtpFlags::STATUS_LINE_INVALID)
+        }
+        if !self.response_status_number.in_range(100, 999) {
+            htp_warn!(
+                self.logger,
+                HtpLogCode::RESPONSE_LINE_INVALID_RESPONSE_STATUS,
+                "Invalid response line: invalid response status."
+            );
+            self.response_status_number = HtpResponseNumber::Invalid;
+            self.flags.set(HtpFlags::STATUS_LINE_INVALID)
+        }
+    }
+
+    /// Parse the raw request line
+    pub(crate) fn parse_request_line(&mut self) -> Result<()> {
+        // Determine how to process the request URI.
+        let mut parsed_uri = Uri::with_config(self.cfg.decoder_cfg);
+        if self.request_method_number == HtpMethod::CONNECT {
+            // When CONNECT is used, the request URI contains an authority string.
+            parsed_uri.parse_uri_hostport(
+                self.request_uri.as_ref().ok_or(HtpStatus::ERROR)?,
+                &mut self.flags,
+            );
+        } else if let Some(uri) = self.request_uri.as_ref() {
+            parsed_uri.parse_uri(uri.as_slice());
+        }
+        self.parsed_uri_raw = Some(parsed_uri);
+        // Parse the request URI into Transaction::parsed_uri_raw.
+        // Build Transaction::parsed_uri, but only if it was not explicitly set already.
+        if self.parsed_uri.is_none() {
+            // Keep the original URI components, but create a copy which we can normalize and use internally.
+            self.normalize_parsed_uri();
+        }
+
+        // Check parsed_uri hostname.
+        if let Some(hostname) = self.get_parsed_uri_hostname() {
+            if !validate_hostname(hostname.as_slice()) {
+                self.flags.set(HtpFlags::HOSTU_INVALID)
+            }
+        }
+        Ok(())
+    }
+
+    #[cfg(test)]
+    /// Determines if both request and response are complete.
+    pub(crate) fn is_complete(&self) -> bool {
+        // A transaction is considered complete only when both the request and
+        // response are complete. (Sometimes a complete response can be seen
+        // even while the request is ongoing.)
+        self.request_progress == HtpRequestProgress::COMPLETE
+            && self.response_progress == HtpResponseProgress::COMPLETE
+    }
+
+    /// Return a reference to the uri hostname.
+    pub(crate) fn get_parsed_uri_hostname(&self) -> Option<&Bstr> {
+        self.parsed_uri
+            .as_ref()
+            .and_then(|parsed_uri| parsed_uri.hostname.as_ref())
+    }
+
+    /// Return a reference to the uri port_number.
+    pub(crate) fn get_parsed_uri_port_number(&self) -> Option<&u16> {
+        self.parsed_uri
+            .as_ref()
+            .and_then(|parsed_uri| parsed_uri.port_number.as_ref())
+    }
+
+    /// Normalize a previously-parsed request URI.
+    pub(crate) fn normalize_parsed_uri(&mut self) {
+        let mut uri = Uri::with_config(self.cfg.decoder_cfg);
+        if let Some(incomplete) = &self.parsed_uri_raw {
+            uri.scheme = incomplete.normalized_scheme();
+            uri.username = incomplete.normalized_username(&mut self.flags);
+            uri.password = incomplete.normalized_password(&mut self.flags);
+            uri.hostname = incomplete.normalized_hostname(&mut self.flags);
+            uri.port_number = incomplete.normalized_port(&mut self.flags);
+            uri.query = incomplete.query.clone();
+            uri.fragment = incomplete.normalized_fragment(&mut self.flags);
+            uri.path = incomplete
+                .normalized_path(&mut self.flags, &mut self.response_status_expected_number);
+        }
+        self.parsed_uri = Some(uri);
+    }
+}
+
+impl PartialEq for Transaction {
+    /// Determines if other references the same transaction.
+    fn eq(&self, other: &Self) -> bool {
+        self.index == other.index
+    }
+}
+
+#[test]
+fn GetNocaseNozero() {
+    let mut t = Headers::with_capacity(2);
+    let v1 = Bstr::from("Value1");
+    let mut k = Bstr::from("K\x00\x00\x00\x00ey\x001");
+    let mut h = Header::new(k, v1.clone());
+    t.elements.push(h);
+    k = Bstr::from("K\x00e\x00\x00Y2");
+    let v2 = Bstr::from("Value2");
+    h = Header::new(k, v2.clone());
+    t.elements.push(h);
+
+    let mut result = t.get_nocase_nozero("key1");
+    let mut res = result.unwrap();
+    assert_eq!(
+        Ordering::Equal,
+        res.name.cmp_slice("K\x00\x00\x00\x00ey\x001")
+    );
+    assert_eq!(v1, res.value);
+
+    result = t.get_nocase_nozero("KeY1");
+    res = result.unwrap();
+    assert_eq!(
+        Ordering::Equal,
+        res.name.cmp_slice("K\x00\x00\x00\x00ey\x001")
+    );
+    assert_eq!(v1, res.value);
+
+    result = t.get_nocase_nozero("KEY2");
+    res = result.unwrap();
+    assert_eq!(Ordering::Equal, res.name.cmp_slice("K\x00e\x00\x00Y2"));
+    assert_eq!(v2, res.value);
+
+    result = t.get_nocase("key1");
+    assert!(result.is_none());
+}
diff --git a/rust/htp/src/transactions.rs b/rust/htp/src/transactions.rs
new file mode 100644 (file)
index 0000000..0b7e527
--- /dev/null
@@ -0,0 +1,163 @@
+use crate::{config::Config, log::Logger, transaction::Transaction};
+use std::collections::btree_map::Entry;
+use std::collections::BTreeMap;
+
+/// Transaction is a structure which tracks request and response
+/// transactions, and guarantees that the current request or
+/// response transaction will always exist.
+pub(crate) struct Transactions {
+    config: &'static Config,
+    logger: Logger,
+    request: usize,
+    response: usize,
+    transactions: BTreeMap<usize, Transaction>,
+}
+
+impl Transactions {
+    /// Make a new Transactions struct with the given config
+    pub(crate) fn new(cfg: &'static Config, logger: &Logger) -> Self {
+        Self {
+            config: cfg,
+            logger: logger.clone(),
+            request: 0,
+            response: 0,
+            transactions: BTreeMap::default(),
+        }
+    }
+
+    /// Return the number of transactions processed.
+    /// The value returned may wrap around if the number of transactions
+    /// exceeds the storage size available to `usize`.
+    pub(crate) fn size(&self) -> usize {
+        // The total number of transactions is just the maximum
+        // of the request or response transaction index + 1 (if
+        // that transaction is started), or zero if neither
+        // request or response transaction exist yet
+        let tx_to_check = std::cmp::max(self.request, self.response);
+        match self.transactions.get(&tx_to_check) {
+            // Transaction is created, check if it is started
+            Some(tx) => tx.index.wrapping_add(tx.is_started() as usize),
+            // Transaction doesn't exist yet, so the index is the size
+            None => tx_to_check,
+        }
+    }
+
+    /// Get the current request transaction index
+    pub(crate) fn request_index(&self) -> usize {
+        self.request
+    }
+
+    /// Get the current request transaction
+    pub(crate) fn request(&mut self) -> Option<&Transaction> {
+        match self.request_mut() {
+            Some(req) => Some(req),
+            None => None,
+        }
+    }
+
+    /// Get the current request transaction
+    pub(crate) fn request_mut(&mut self) -> Option<&mut Transaction> {
+        let cfg = &self.config;
+        let logger = &self.logger;
+        let request = self.request;
+        let nbtx = self.transactions.len();
+        match self.transactions.entry(request) {
+            Entry::Occupied(entry) => Some(entry.into_mut()),
+            Entry::Vacant(entry) => {
+                if nbtx >= cfg.max_tx as usize {
+                    return None;
+                }
+                Some(entry.insert(Transaction::new(cfg, logger, request)))
+            }
+        }
+    }
+
+    /// Get the current response transaction index
+    pub(crate) fn response_index(&self) -> usize {
+        self.response
+    }
+
+    /// Get the current response transaction
+    pub(crate) fn response(&mut self) -> Option<&Transaction> {
+        match self.response_mut() {
+            Some(resp) => Some(resp),
+            None => None,
+        }
+    }
+
+    /// Get the current response transaction
+    pub(crate) fn response_mut(&mut self) -> Option<&mut Transaction> {
+        let cfg = &self.config;
+        let logger = &self.logger;
+        let response = self.response;
+        let nbtx = self.transactions.len();
+        match self.transactions.entry(response) {
+            Entry::Occupied(entry) => Some(entry.into_mut()),
+            Entry::Vacant(entry) => {
+                if nbtx >= cfg.max_tx as usize {
+                    return None;
+                }
+                Some(entry.insert(Transaction::new(cfg, logger, response)))
+            }
+        }
+    }
+
+    /// Increment the request transaction number.
+    /// May cause the previous transaction to be freed if configured to auto-destroy.
+    /// Returns the new request transaction index
+    pub(crate) fn request_next(&mut self) -> usize {
+        self.request = self.request.wrapping_add(1);
+        self.request
+    }
+
+    /// Increment the response transaction number.
+    /// May cause the previous transaction to be freed if configured to auto-destroy.
+    /// Returns the new response transaction index
+    pub(crate) fn response_next(&mut self) -> usize {
+        self.response = self.response.wrapping_add(1);
+        self.response
+    }
+
+    /// Remove the transaction at the given index. If the transaction
+    /// existed, it is returned.
+    pub(crate) fn remove(&mut self, index: usize) -> Option<Transaction> {
+        self.transactions.remove(&index)
+    }
+
+    /// Get the given transaction by index number
+    pub(crate) fn get(&self, index: usize) -> Option<&Transaction> {
+        self.transactions.get(&index)
+    }
+
+    /// Get the given transaction by index number
+    pub(crate) fn get_mut(&mut self, index: usize) -> Option<&mut Transaction> {
+        self.transactions.get_mut(&index)
+    }
+}
+
+/// An iterator over Transactions
+pub(crate) struct TransactionsIterator<'a> {
+    iter: std::collections::btree_map::IterMut<'a, usize, Transaction>,
+}
+
+impl<'a> Iterator for TransactionsIterator<'a> {
+    type Item = &'a mut Transaction;
+    fn next(&mut self) -> Option<Self::Item> {
+        if let Some((_index, tx)) = self.iter.next() {
+            Some(tx)
+        } else {
+            None
+        }
+    }
+}
+
+impl<'a> IntoIterator for &'a mut Transactions {
+    type Item = &'a mut Transaction;
+    type IntoIter = TransactionsIterator<'a>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        TransactionsIterator {
+            iter: self.transactions.iter_mut(),
+        }
+    }
+}
diff --git a/rust/htp/src/unicode_bestfit_map.rs b/rust/htp/src/unicode_bestfit_map.rs
new file mode 100644 (file)
index 0000000..74572d5
--- /dev/null
@@ -0,0 +1,435 @@
+use lazy_static::lazy_static;
+use std::collections::HashMap;
+
+#[derive(Copy, Clone)]
+pub(crate) struct UnicodeBestfitMap {
+    // Best-fit mapping options.
+    /// The best-fit map to use to decode %u-encoded characters.
+    pub(crate) map: &'static HashMap<u32, u8>,
+    /// The replacement byte used when there is no best-fit mapping.
+    pub(crate) replacement_byte: u8,
+}
+
+impl Default for UnicodeBestfitMap {
+    fn default() -> Self {
+        Self {
+            map: &bestfit_1252,
+            replacement_byte: b'?',
+        }
+    }
+}
+
+impl UnicodeBestfitMap {
+    pub(crate) fn get(&self, unicode: u32) -> u8 {
+        self.map
+            .get(&unicode)
+            .copied()
+            .unwrap_or(self.replacement_byte)
+    }
+}
+
+/// Create bestfit key from two 8 bit bytes.
+#[macro_export]
+macro_rules! bestfit_key {
+    ($c1:expr, $c2:expr) => {
+        ((($c1 as i32) << 8 as i32) + $c2 as i32) as u32
+    };
+}
+
+lazy_static! {
+    static ref bestfit_1252: HashMap<u32, u8> = [
+        (bestfit_key!(0x01, 0), 0x41),
+        (bestfit_key!(0x1, 0x1), 0x61),
+        (bestfit_key!(0x1, 0x2), 0x41),
+        (bestfit_key!(0x1, 0x3), 0x61),
+        (bestfit_key!(0x1, 0x4), 0x41),
+        (bestfit_key!(0x1, 0x5), 0x61),
+        (bestfit_key!(0x1, 0x6), 0x43),
+        (bestfit_key!(0x1, 0x7), 0x63),
+        (bestfit_key!(0x1, 0x8), 0x43),
+        (bestfit_key!(0x1, 0x9), 0x63),
+        (bestfit_key!(0x1, 0xa), 0x43),
+        (bestfit_key!(0x1, 0xb), 0x63),
+        (bestfit_key!(0x1, 0xc), 0x43),
+        (bestfit_key!(0x1, 0xd), 0x63),
+        (bestfit_key!(0x1, 0xe), 0x44),
+        (bestfit_key!(0x1, 0xf), 0x64),
+        (bestfit_key!(0x1, 0x11), 0x64),
+        (bestfit_key!(0x1, 0x12), 0x45),
+        (bestfit_key!(0x1, 0x13), 0x65),
+        (bestfit_key!(0x1, 0x14), 0x45),
+        (bestfit_key!(0x1, 0x15), 0x65),
+        (bestfit_key!(0x1, 0x16), 0x45),
+        (bestfit_key!(0x1, 0x17), 0x65),
+        (bestfit_key!(0x1, 0x18), 0x45),
+        (bestfit_key!(0x1, 0x19), 0x65),
+        (bestfit_key!(0x1, 0x1a), 0x45),
+        (bestfit_key!(0x1, 0x1b), 0x65),
+        (bestfit_key!(0x1, 0x1c), 0x47),
+        (bestfit_key!(0x1, 0x1d), 0x67),
+        (bestfit_key!(0x1, 0x1e), 0x47),
+        (bestfit_key!(0x1, 0x1f), 0x67),
+        (bestfit_key!(0x1, 0x20), 0x47),
+        (bestfit_key!(0x1, 0x21), 0x67),
+        (bestfit_key!(0x1, 0x22), 0x47),
+        (bestfit_key!(0x1, 0x23), 0x67),
+        (bestfit_key!(0x1, 0x24), 0x48),
+        (bestfit_key!(0x1, 0x25), 0x68),
+        (bestfit_key!(0x1, 0x26), 0x48),
+        (bestfit_key!(0x1, 0x27), 0x68),
+        (bestfit_key!(0x1, 0x28), 0x49),
+        (bestfit_key!(0x1, 0x29), 0x69),
+        (bestfit_key!(0x1, 0x2a), 0x49),
+        (bestfit_key!(0x1, 0x2b), 0x69),
+        (bestfit_key!(0x1, 0x2c), 0x49),
+        (bestfit_key!(0x1, 0x2d), 0x69),
+        (bestfit_key!(0x1, 0x2e), 0x49),
+        (bestfit_key!(0x1, 0x2f), 0x69),
+        (bestfit_key!(0x1, 0x30), 0x49),
+        (bestfit_key!(0x1, 0x31), 0x69),
+        (bestfit_key!(0x1, 0x34), 0x4a),
+        (bestfit_key!(0x1, 0x35), 0x6a),
+        (bestfit_key!(0x1, 0x36), 0x4b),
+        (bestfit_key!(0x1, 0x37), 0x6b),
+        (bestfit_key!(0x1, 0x39), 0x4c),
+        (bestfit_key!(0x1, 0x3a), 0x6c),
+        (bestfit_key!(0x1, 0x3b), 0x4c),
+        (bestfit_key!(0x1, 0x3c), 0x6c),
+        (bestfit_key!(0x1, 0x3d), 0x4c),
+        (bestfit_key!(0x1, 0x3e), 0x6c),
+        (bestfit_key!(0x1, 0x41), 0x4c),
+        (bestfit_key!(0x1, 0x42), 0x6c),
+        (bestfit_key!(0x1, 0x43), 0x4e),
+        (bestfit_key!(0x1, 0x44), 0x6e),
+        (bestfit_key!(0x1, 0x45), 0x4e),
+        (bestfit_key!(0x1, 0x46), 0x6e),
+        (bestfit_key!(0x1, 0x47), 0x4e),
+        (bestfit_key!(0x1, 0x48), 0x6e),
+        (bestfit_key!(0x1, 0x4c), 0x4f),
+        (bestfit_key!(0x1, 0x4d), 0x6f),
+        (bestfit_key!(0x1, 0x4e), 0x4f),
+        (bestfit_key!(0x1, 0x4f), 0x6f),
+        (bestfit_key!(0x1, 0x50), 0x4f),
+        (bestfit_key!(0x1, 0x51), 0x6f),
+        (bestfit_key!(0x1, 0x54), 0x52),
+        (bestfit_key!(0x1, 0x55), 0x72),
+        (bestfit_key!(0x1, 0x56), 0x52),
+        (bestfit_key!(0x1, 0x57), 0x72),
+        (bestfit_key!(0x1, 0x58), 0x52),
+        (bestfit_key!(0x1, 0x59), 0x72),
+        (bestfit_key!(0x1, 0x5a), 0x53),
+        (bestfit_key!(0x1, 0x5b), 0x73),
+        (bestfit_key!(0x1, 0x5c), 0x53),
+        (bestfit_key!(0x1, 0x5d), 0x73),
+        (bestfit_key!(0x1, 0x5e), 0x53),
+        (bestfit_key!(0x1, 0x5f), 0x73),
+        (bestfit_key!(0x1, 0x62), 0x54),
+        (bestfit_key!(0x1, 0x63), 0x74),
+        (bestfit_key!(0x1, 0x64), 0x54),
+        (bestfit_key!(0x1, 0x65), 0x74),
+        (bestfit_key!(0x1, 0x66), 0x54),
+        (bestfit_key!(0x1, 0x67), 0x74),
+        (bestfit_key!(0x1, 0x68), 0x55),
+        (bestfit_key!(0x1, 0x69), 0x75),
+        (bestfit_key!(0x1, 0x6a), 0x55),
+        (bestfit_key!(0x1, 0x6b), 0x75),
+        (bestfit_key!(0x1, 0x6c), 0x55),
+        (bestfit_key!(0x1, 0x6d), 0x75),
+        (bestfit_key!(0x1, 0x6e), 0x55),
+        (bestfit_key!(0x1, 0x6f), 0x75),
+        (bestfit_key!(0x1, 0x70), 0x55),
+        (bestfit_key!(0x1, 0x71), 0x75),
+        (bestfit_key!(0x1, 0x72), 0x55),
+        (bestfit_key!(0x1, 0x73), 0x75),
+        (bestfit_key!(0x1, 0x74), 0x57),
+        (bestfit_key!(0x1, 0x75), 0x77),
+        (bestfit_key!(0x1, 0x76), 0x59),
+        (bestfit_key!(0x1, 0x77), 0x79),
+        (bestfit_key!(0x1, 0x79), 0x5a),
+        (bestfit_key!(0x1, 0x7b), 0x5a),
+        (bestfit_key!(0x1, 0x7c), 0x7a),
+        (bestfit_key!(0x1, 0x80), 0x62),
+        (bestfit_key!(0x1, 0x97), 0x49),
+        (bestfit_key!(0x1, 0x9a), 0x6c),
+        (bestfit_key!(0x1, 0x9f), 0x4f),
+        (bestfit_key!(0x1, 0xa0), 0x4f),
+        (bestfit_key!(0x1, 0xa1), 0x6f),
+        (bestfit_key!(0x1, 0xab), 0x74),
+        (bestfit_key!(0x1, 0xae), 0x54),
+        (bestfit_key!(0x1, 0xaf), 0x55),
+        (bestfit_key!(0x1, 0xb0), 0x75),
+        (bestfit_key!(0x1, 0xb6), 0x7a),
+        (bestfit_key!(0x1, 0xc0), 0x7c),
+        (bestfit_key!(0x1, 0xc3), 0x21),
+        (bestfit_key!(0x1, 0xcd), 0x41),
+        (bestfit_key!(0x1, 0xce), 0x61),
+        (bestfit_key!(0x1, 0xcf), 0x49),
+        (bestfit_key!(0x1, 0xd0), 0x69),
+        (bestfit_key!(0x1, 0xd1), 0x4f),
+        (bestfit_key!(0x1, 0xd2), 0x6f),
+        (bestfit_key!(0x1, 0xd3), 0x55),
+        (bestfit_key!(0x1, 0xd4), 0x75),
+        (bestfit_key!(0x1, 0xd5), 0x55),
+        (bestfit_key!(0x1, 0xd6), 0x75),
+        (bestfit_key!(0x1, 0xd7), 0x55),
+        (bestfit_key!(0x1, 0xd8), 0x75),
+        (bestfit_key!(0x1, 0xd9), 0x55),
+        (bestfit_key!(0x1, 0xda), 0x75),
+        (bestfit_key!(0x1, 0xdb), 0x55),
+        (bestfit_key!(0x1, 0xdc), 0x75),
+        (bestfit_key!(0x1, 0xde), 0x41),
+        (bestfit_key!(0x1, 0xdf), 0x61),
+        (bestfit_key!(0x1, 0xe4), 0x47),
+        (bestfit_key!(0x1, 0xe5), 0x67),
+        (bestfit_key!(0x1, 0xe6), 0x47),
+        (bestfit_key!(0x1, 0xe7), 0x67),
+        (bestfit_key!(0x1, 0xe8), 0x4b),
+        (bestfit_key!(0x1, 0xe9), 0x6b),
+        (bestfit_key!(0x1, 0xea), 0x4f),
+        (bestfit_key!(0x1, 0xeb), 0x6f),
+        (bestfit_key!(0x1, 0xec), 0x4f),
+        (bestfit_key!(0x1, 0xed), 0x6f),
+        (bestfit_key!(0x1, 0xf0), 0x6a),
+        (bestfit_key!(0x2, 0x61), 0x67),
+        (bestfit_key!(0x2, 0xb9), 0x27),
+        (bestfit_key!(0x2, 0xba), 0x22),
+        (bestfit_key!(0x2, 0xbc), 0x27),
+        (bestfit_key!(0x2, 0xc4), 0x5e),
+        (bestfit_key!(0x2, 0xc8), 0x27),
+        (bestfit_key!(0x2, 0xcb), 0x60),
+        (bestfit_key!(0x2, 0xcd), 0x5f),
+        (bestfit_key!(0x3, 0x00), 0x60),
+        (bestfit_key!(0x3, 0x2), 0x5e),
+        (bestfit_key!(0x3, 0x3), 0x7e),
+        (bestfit_key!(0x3, 0xe), 0x22),
+        (bestfit_key!(0x3, 0x31), 0x5f),
+        (bestfit_key!(0x3, 0x32), 0x5f),
+        (bestfit_key!(0x3, 0x7e), 0x3b),
+        (bestfit_key!(0x3, 0x93), 0x47),
+        (bestfit_key!(0x3, 0x98), 0x54),
+        (bestfit_key!(0x3, 0xa3), 0x53),
+        (bestfit_key!(0x3, 0xa6), 0x46),
+        (bestfit_key!(0x3, 0xa9), 0x4f),
+        (bestfit_key!(0x3, 0xb1), 0x61),
+        (bestfit_key!(0x3, 0xb4), 0x64),
+        (bestfit_key!(0x3, 0xb5), 0x65),
+        (bestfit_key!(0x3, 0xc0), 0x70),
+        (bestfit_key!(0x3, 0xc3), 0x73),
+        (bestfit_key!(0x3, 0xc4), 0x74),
+        (bestfit_key!(0x3, 0xc6), 0x66),
+        (bestfit_key!(0x4, 0xbb), 0x68),
+        (bestfit_key!(0x5, 0x89), 0x3a),
+        (bestfit_key!(0x6, 0x6a), 0x25),
+        (bestfit_key!(0x20, 0), 0x20),
+        (bestfit_key!(0x20, 0x1), 0x20),
+        (bestfit_key!(0x20, 0x2), 0x20),
+        (bestfit_key!(0x20, 0x3), 0x20),
+        (bestfit_key!(0x20, 0x4), 0x20),
+        (bestfit_key!(0x20, 0x5), 0x20),
+        (bestfit_key!(0x20, 0x6), 0x20),
+        (bestfit_key!(0x20, 0x10), 0x2d),
+        (bestfit_key!(0x20, 0x11), 0x2d),
+        (bestfit_key!(0x20, 0x17), 0x3d),
+        (bestfit_key!(0x20, 0x32), 0x27),
+        (bestfit_key!(0x20, 0x35), 0x60),
+        (bestfit_key!(0x20, 0x44), 0x2f),
+        (bestfit_key!(0x20, 0x74), 0x34),
+        (bestfit_key!(0x20, 0x75), 0x35),
+        (bestfit_key!(0x20, 0x76), 0x36),
+        (bestfit_key!(0x20, 0x77), 0x37),
+        (bestfit_key!(0x20, 0x78), 0x38),
+        (bestfit_key!(0x20, 0x7f), 0x6e),
+        (bestfit_key!(0x20, 0x80), 0x30),
+        (bestfit_key!(0x20, 0x81), 0x31),
+        (bestfit_key!(0x20, 0x82), 0x32),
+        (bestfit_key!(0x20, 0x83), 0x33),
+        (bestfit_key!(0x20, 0x84), 0x34),
+        (bestfit_key!(0x20, 0x85), 0x35),
+        (bestfit_key!(0x20, 0x86), 0x36),
+        (bestfit_key!(0x20, 0x87), 0x37),
+        (bestfit_key!(0x20, 0x88), 0x38),
+        (bestfit_key!(0x20, 0x89), 0x39),
+        (bestfit_key!(0x20, 0xa7), 0x50),
+        (bestfit_key!(0x21, 0x2), 0x43),
+        (bestfit_key!(0x21, 0x7), 0x45),
+        (bestfit_key!(0x21, 0xa), 0x67),
+        (bestfit_key!(0x21, 0xb), 0x48),
+        (bestfit_key!(0x21, 0xc), 0x48),
+        (bestfit_key!(0x21, 0xd), 0x48),
+        (bestfit_key!(0x21, 0xe), 0x68),
+        (bestfit_key!(0x21, 0x10), 0x49),
+        (bestfit_key!(0x21, 0x11), 0x49),
+        (bestfit_key!(0x21, 0x12), 0x4c),
+        (bestfit_key!(0x21, 0x13), 0x6c),
+        (bestfit_key!(0x21, 0x15), 0x4e),
+        (bestfit_key!(0x21, 0x18), 0x50),
+        (bestfit_key!(0x21, 0x19), 0x50),
+        (bestfit_key!(0x21, 0x1a), 0x51),
+        (bestfit_key!(0x21, 0x1b), 0x52),
+        (bestfit_key!(0x21, 0x1c), 0x52),
+        (bestfit_key!(0x21, 0x1d), 0x52),
+        (bestfit_key!(0x21, 0x24), 0x5a),
+        (bestfit_key!(0x21, 0x28), 0x5a),
+        (bestfit_key!(0x21, 0x2a), 0x4b),
+        (bestfit_key!(0x21, 0x2c), 0x42),
+        (bestfit_key!(0x21, 0x2d), 0x43),
+        (bestfit_key!(0x21, 0x2e), 0x65),
+        (bestfit_key!(0x21, 0x2f), 0x65),
+        (bestfit_key!(0x21, 0x30), 0x45),
+        (bestfit_key!(0x21, 0x31), 0x46),
+        (bestfit_key!(0x21, 0x33), 0x4d),
+        (bestfit_key!(0x21, 0x34), 0x6f),
+        (bestfit_key!(0x22, 0x12), 0x2d),
+        (bestfit_key!(0x22, 0x15), 0x2f),
+        (bestfit_key!(0x22, 0x16), 0x5c),
+        (bestfit_key!(0x22, 0x17), 0x2a),
+        (bestfit_key!(0x22, 0x1a), 0x76),
+        (bestfit_key!(0x22, 0x1e), 0x38),
+        (bestfit_key!(0x22, 0x23), 0x7c),
+        (bestfit_key!(0x22, 0x29), 0x6e),
+        (bestfit_key!(0x22, 0x36), 0x3a),
+        (bestfit_key!(0x22, 0x3c), 0x7e),
+        (bestfit_key!(0x22, 0x61), 0x3d),
+        (bestfit_key!(0x22, 0x64), 0x3d),
+        (bestfit_key!(0x22, 0x65), 0x3d),
+        (bestfit_key!(0x23, 0x3), 0x5e),
+        (bestfit_key!(0x23, 0x20), 0x28),
+        (bestfit_key!(0x23, 0x21), 0x29),
+        (bestfit_key!(0x23, 0x29), 0x3c),
+        (bestfit_key!(0x23, 0x2a), 0x3e),
+        (bestfit_key!(0x25, 0), 0x2d),
+        (bestfit_key!(0x25, 0xc), 0x2b),
+        (bestfit_key!(0x25, 0x10), 0x2b),
+        (bestfit_key!(0x25, 0x14), 0x2b),
+        (bestfit_key!(0x25, 0x18), 0x2b),
+        (bestfit_key!(0x25, 0x1c), 0x2b),
+        (bestfit_key!(0x25, 0x2c), 0x2d),
+        (bestfit_key!(0x25, 0x34), 0x2d),
+        (bestfit_key!(0x25, 0x3c), 0x2b),
+        (bestfit_key!(0x25, 0x50), 0x2d),
+        (bestfit_key!(0x25, 0x52), 0x2b),
+        (bestfit_key!(0x25, 0x53), 0x2b),
+        (bestfit_key!(0x25, 0x54), 0x2b),
+        (bestfit_key!(0x25, 0x55), 0x2b),
+        (bestfit_key!(0x25, 0x56), 0x2b),
+        (bestfit_key!(0x25, 0x57), 0x2b),
+        (bestfit_key!(0x25, 0x58), 0x2b),
+        (bestfit_key!(0x25, 0x59), 0x2b),
+        (bestfit_key!(0x25, 0x5a), 0x2b),
+        (bestfit_key!(0x25, 0x5b), 0x2b),
+        (bestfit_key!(0x25, 0x5c), 0x2b),
+        (bestfit_key!(0x25, 0x5d), 0x2b),
+        (bestfit_key!(0x25, 0x64), 0x2d),
+        (bestfit_key!(0x25, 0x65), 0x2d),
+        (bestfit_key!(0x25, 0x66), 0x2d),
+        (bestfit_key!(0x25, 0x67), 0x2d),
+        (bestfit_key!(0x25, 0x68), 0x2d),
+        (bestfit_key!(0x25, 0x69), 0x2d),
+        (bestfit_key!(0x25, 0x6a), 0x2b),
+        (bestfit_key!(0x25, 0x6b), 0x2b),
+        (bestfit_key!(0x25, 0x6c), 0x2b),
+        (bestfit_key!(0x25, 0x84), 0x5f),
+        (bestfit_key!(0x27, 0x58), 0x7c),
+        (bestfit_key!(0x30, 0), 0x20),
+        (bestfit_key!(0x30, 0x8), 0x3c),
+        (bestfit_key!(0x30, 0x9), 0x3e),
+        (bestfit_key!(0x30, 0x1a), 0x5b),
+        (bestfit_key!(0x30, 0x1b), 0x5d),
+        (bestfit_key!(0xff, 0x1), 0x21),
+        (bestfit_key!(0xff, 0x2), 0x22),
+        (bestfit_key!(0xff, 0x3), 0x23),
+        (bestfit_key!(0xff, 0x4), 0x24),
+        (bestfit_key!(0xff, 0x5), 0x25),
+        (bestfit_key!(0xff, 0x6), 0x26),
+        (bestfit_key!(0xff, 0x7), 0x27),
+        (bestfit_key!(0xff, 0x8), 0x28),
+        (bestfit_key!(0xff, 0x9), 0x29),
+        (bestfit_key!(0xff, 0xa), 0x2a),
+        (bestfit_key!(0xff, 0xb), 0x2b),
+        (bestfit_key!(0xff, 0xc), 0x2c),
+        (bestfit_key!(0xff, 0xd), 0x2d),
+        (bestfit_key!(0xff, 0xe), 0x2e),
+        (bestfit_key!(0xff, 0xf), 0x2f),
+        (bestfit_key!(0xff, 0x10), 0x30),
+        (bestfit_key!(0xff, 0x11), 0x31),
+        (bestfit_key!(0xff, 0x12), 0x32),
+        (bestfit_key!(0xff, 0x13), 0x33),
+        (bestfit_key!(0xff, 0x14), 0x34),
+        (bestfit_key!(0xff, 0x15), 0x35),
+        (bestfit_key!(0xff, 0x16), 0x36),
+        (bestfit_key!(0xff, 0x17), 0x37),
+        (bestfit_key!(0xff, 0x18), 0x38),
+        (bestfit_key!(0xff, 0x19), 0x39),
+        (bestfit_key!(0xff, 0x1a), 0x3a),
+        (bestfit_key!(0xff, 0x1b), 0x3b),
+        (bestfit_key!(0xff, 0x1c), 0x3c),
+        (bestfit_key!(0xff, 0x1d), 0x3d),
+        (bestfit_key!(0xff, 0x1e), 0x3e),
+        (bestfit_key!(0xff, 0x20), 0x40),
+        (bestfit_key!(0xff, 0x21), 0x41),
+        (bestfit_key!(0xff, 0x22), 0x42),
+        (bestfit_key!(0xff, 0x23), 0x43),
+        (bestfit_key!(0xff, 0x24), 0x44),
+        (bestfit_key!(0xff, 0x25), 0x45),
+        (bestfit_key!(0xff, 0x26), 0x46),
+        (bestfit_key!(0xff, 0x27), 0x47),
+        (bestfit_key!(0xff, 0x28), 0x48),
+        (bestfit_key!(0xff, 0x29), 0x49),
+        (bestfit_key!(0xff, 0x2a), 0x4a),
+        (bestfit_key!(0xff, 0x2b), 0x4b),
+        (bestfit_key!(0xff, 0x2c), 0x4c),
+        (bestfit_key!(0xff, 0x2d), 0x4d),
+        (bestfit_key!(0xff, 0x2e), 0x4e),
+        (bestfit_key!(0xff, 0x2f), 0x4f),
+        (bestfit_key!(0xff, 0x30), 0x50),
+        (bestfit_key!(0xff, 0x31), 0x51),
+        (bestfit_key!(0xff, 0x32), 0x52),
+        (bestfit_key!(0xff, 0x33), 0x53),
+        (bestfit_key!(0xff, 0x34), 0x54),
+        (bestfit_key!(0xff, 0x35), 0x55),
+        (bestfit_key!(0xff, 0x36), 0x56),
+        (bestfit_key!(0xff, 0x37), 0x57),
+        (bestfit_key!(0xff, 0x38), 0x58),
+        (bestfit_key!(0xff, 0x39), 0x59),
+        (bestfit_key!(0xff, 0x3a), 0x5a),
+        (bestfit_key!(0xff, 0x3b), 0x5b),
+        (bestfit_key!(0xff, 0x3c), 0x5c),
+        (bestfit_key!(0xff, 0x3d), 0x5d),
+        (bestfit_key!(0xff, 0x3e), 0x5e),
+        (bestfit_key!(0xff, 0x3f), 0x5f),
+        (bestfit_key!(0xff, 0x40), 0x60),
+        (bestfit_key!(0xff, 0x41), 0x61),
+        (bestfit_key!(0xff, 0x42), 0x62),
+        (bestfit_key!(0xff, 0x43), 0x63),
+        (bestfit_key!(0xff, 0x44), 0x64),
+        (bestfit_key!(0xff, 0x45), 0x65),
+        (bestfit_key!(0xff, 0x46), 0x66),
+        (bestfit_key!(0xff, 0x47), 0x67),
+        (bestfit_key!(0xff, 0x48), 0x68),
+        (bestfit_key!(0xff, 0x49), 0x69),
+        (bestfit_key!(0xff, 0x4a), 0x6a),
+        (bestfit_key!(0xff, 0x4b), 0x6b),
+        (bestfit_key!(0xff, 0x4c), 0x6c),
+        (bestfit_key!(0xff, 0x4d), 0x6d),
+        (bestfit_key!(0xff, 0x4e), 0x6e),
+        (bestfit_key!(0xff, 0x4f), 0x6f),
+        (bestfit_key!(0xff, 0x50), 0x70),
+        (bestfit_key!(0xff, 0x51), 0x71),
+        (bestfit_key!(0xff, 0x52), 0x72),
+        (bestfit_key!(0xff, 0x53), 0x73),
+        (bestfit_key!(0xff, 0x54), 0x74),
+        (bestfit_key!(0xff, 0x55), 0x75),
+        (bestfit_key!(0xff, 0x56), 0x76),
+        (bestfit_key!(0xff, 0x57), 0x77),
+        (bestfit_key!(0xff, 0x58), 0x78),
+        (bestfit_key!(0xff, 0x59), 0x79),
+        (bestfit_key!(0xff, 0x5a), 0x7a),
+        (bestfit_key!(0xff, 0x5b), 0x7b),
+        (bestfit_key!(0xff, 0x5c), 0x7c),
+        (bestfit_key!(0xff, 0x5d), 0x7d),
+        (bestfit_key!(0xff, 0x5e), 0x7e),
+    ]
+    .iter()
+    .cloned()
+    .collect();
+}
diff --git a/rust/htp/src/uri.rs b/rust/htp/src/uri.rs
new file mode 100644 (file)
index 0000000..e832fbe
--- /dev/null
@@ -0,0 +1,674 @@
+use crate::{
+    bstr::Bstr,
+    config::{DecoderConfig, HtpUnwanted},
+    log::Logger,
+    parsers::{credentials, fragment, hostname, parse_hostport, path, port, query, scheme},
+    urlencoded::{decode_uri_inplace, decode_uri_with_flags, path_decode_uri_inplace},
+    utf8_decoder::decode_and_validate_inplace,
+    util::{convert_port, FlagOperations, HtpFlags},
+};
+use nom::{combinator::opt, sequence::tuple};
+
+/// URI structure. Each of the fields provides access to a single
+/// URI element. Where an element is not present in a URI, the
+/// corresponding field will be set to NULL or -1, depending on the
+/// field type.
+#[derive(Clone)]
+pub struct Uri {
+    /// Decoder configuration
+    pub(crate) cfg: DecoderConfig,
+    /// Scheme, e.g., "http".
+    pub(crate) scheme: Option<Bstr>,
+    /// Username.
+    pub(crate) username: Option<Bstr>,
+    /// Password.
+    pub(crate) password: Option<Bstr>,
+    /// Hostname.
+    pub(crate) hostname: Option<Bstr>,
+    /// Port, as string.
+    pub(crate) port: Option<Bstr>,
+    /// Port, as number. This field will be None if there was
+    /// no port information in the URI or the port information
+    /// was invalid (e.g., it's not a number or it falls out of range.
+    pub(crate) port_number: Option<u16>,
+    /// The path part of this URI.
+    pub(crate) path: Option<Bstr>,
+    /// Query string.
+    pub(crate) query: Option<Bstr>,
+    /// Fragment identifier. This field will rarely be available in a server-side
+    /// setting, but it's not impossible to see it.
+    pub(crate) fragment: Option<Bstr>,
+}
+
+impl std::fmt::Debug for Uri {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        f.debug_struct("Uri")
+            .field("scheme", &self.scheme)
+            .field("username", &self.username)
+            .field("password", &self.password)
+            .field("hostname", &self.hostname)
+            .field("port", &self.port)
+            .field("port_number", &self.port_number)
+            .field("path", &self.path)
+            .field("query", &self.query)
+            .field("fragment", &self.fragment)
+            .finish()
+    }
+}
+
+impl Default for Uri {
+    /// Create an empty Uri struct.
+    fn default() -> Self {
+        Self {
+            cfg: DecoderConfig::default(),
+            scheme: None,
+            username: None,
+            password: None,
+            hostname: None,
+            port: None,
+            port_number: None,
+            path: None,
+            query: None,
+            fragment: None,
+        }
+    }
+}
+
+impl Uri {
+    /// Create an empty Uri struct but with the given DecoderCfg
+    pub(crate) fn with_config(cfg: DecoderConfig) -> Self {
+        Self {
+            cfg,
+            scheme: None,
+            username: None,
+            password: None,
+            hostname: None,
+            port: None,
+            port_number: None,
+            path: None,
+            query: None,
+            fragment: None,
+        }
+    }
+
+    /// Normalize uri scheme.
+    pub(crate) fn normalized_scheme(&self) -> Option<Bstr> {
+        if let Some(mut scheme) = self.scheme.clone() {
+            scheme.make_ascii_lowercase();
+            Some(scheme)
+        } else {
+            None
+        }
+    }
+
+    /// Normalize uri username.
+    pub(crate) fn normalized_username(&self, flags: &mut u64) -> Option<Bstr> {
+        if let Some(username) = self.username.as_ref() {
+            decode_uri_with_flags(&self.cfg, flags, username.as_slice()).ok()
+        } else {
+            None
+        }
+    }
+
+    /// Normalize uri password.
+    pub(crate) fn normalized_password(&self, flags: &mut u64) -> Option<Bstr> {
+        if let Some(password) = self.password.as_ref() {
+            decode_uri_with_flags(&self.cfg, flags, password.as_slice()).ok()
+        } else {
+            None
+        }
+    }
+
+    /// Normalize uri hostname.
+    pub(crate) fn normalized_hostname(&self, flags: &mut u64) -> Option<Bstr> {
+        if let Some(hostname) = self.hostname.as_ref() {
+            let mut normalized_hostname =
+                decode_uri_with_flags(&self.cfg, flags, hostname.as_slice()).ok()?;
+            normalized_hostname.make_ascii_lowercase();
+            // Remove dots from the end of the string.
+            while normalized_hostname.last() == Some(&(b'.')) {
+                normalized_hostname.pop();
+            }
+            Some(normalized_hostname)
+        } else {
+            None
+        }
+    }
+
+    /// Normalize uri port.
+    pub(crate) fn normalized_port(&self, flags: &mut u64) -> Option<u16> {
+        if let Some(port) = self.port.as_ref() {
+            let normalized_port = convert_port(port.as_slice());
+            if normalized_port.is_none() {
+                // Failed to parse the port number.
+                flags.set(HtpFlags::HOSTU_INVALID);
+            }
+            normalized_port
+        } else {
+            None
+        }
+    }
+
+    /// Normalize uri fragment.
+    pub(crate) fn normalized_fragment(&self, flags: &mut u64) -> Option<Bstr> {
+        if let Some(fragment) = self.fragment.as_ref() {
+            decode_uri_with_flags(&self.cfg, flags, fragment).ok()
+        } else {
+            None
+        }
+    }
+
+    /// Normalize uri path.
+    pub(crate) fn normalized_path(
+        &self, flags: &mut u64, status: &mut HtpUnwanted,
+    ) -> Option<Bstr> {
+        if let Some(mut path) = self.path.clone() {
+            // Decode URL-encoded (and %u-encoded) characters, as well as lowercase,
+            // compress separators and convert backslashes.
+            // Ignore result.
+            path_decode_uri_inplace(&self.cfg, flags, status, &mut path);
+            // Handle UTF-8 in the path. Validate it first, and only save it if cfg specifies it
+            decode_and_validate_inplace(&self.cfg, flags, status, &mut path);
+            // RFC normalization.
+            normalize_uri_path_inplace(&mut path);
+            Some(path)
+        } else {
+            None
+        }
+    }
+
+    /// Parses request URI, making no attempt to validate the contents.
+    ///
+    /// It attempts, but is not guaranteed to successfully parse out a scheme, username, password, hostname, port, query, and fragment.
+    /// Note: only attempts to extract a username, password, and hostname and subsequently port if it successfully parsed a scheme.
+    pub(crate) fn parse_uri(&mut self, input: &[u8]) {
+        let res = tuple((
+            opt(tuple((
+                scheme(),
+                opt(credentials()),
+                opt(tuple((hostname(), opt(port())))),
+            ))),
+            opt(path()),
+            opt(query()),
+            opt(fragment()),
+        ))(input);
+        if let Ok((_, (scheme_authority, path, query, fragment))) = res {
+            if let Some(path) = path {
+                self.path = Some(Bstr::from(path));
+            }
+            if let Some(query) = query {
+                self.query = Some(Bstr::from(query));
+            }
+            if let Some(fragment) = fragment {
+                self.fragment = Some(Bstr::from(fragment));
+            }
+            if let Some((scheme, authority, hostname_port)) = scheme_authority {
+                self.scheme = Some(Bstr::from(scheme));
+                if let Some((username, password)) = authority {
+                    self.username = Some(Bstr::from(username));
+                    if let Some(password) = password {
+                        self.password = Some(Bstr::from(password));
+                    }
+                }
+                if let Some((hostname, port)) = hostname_port {
+                    self.hostname = Some(Bstr::from(hostname));
+                    if let Some(port) = port {
+                        self.port = Some(Bstr::from(port));
+                    }
+                }
+            }
+        }
+    }
+
+    /// Parses hostport provided in the URI.
+    pub(crate) fn parse_uri_hostport(&mut self, hostport: &Bstr, flags: &mut u64) {
+        if let Ok((_, (host, port_nmb, mut valid))) = parse_hostport(hostport) {
+            let hostname = &host.to_ascii_lowercase();
+            self.hostname = Some(Bstr::from(hostname.as_slice()));
+            if let Some((port, port_nmb)) = port_nmb {
+                self.port = Some(Bstr::from(port));
+                if let Some(num) = port_nmb {
+                    self.port_number = Some(num);
+                } else {
+                    valid = false;
+                }
+            }
+            if !valid {
+                flags.set(HtpFlags::HOSTU_INVALID)
+            }
+        }
+    }
+
+    /// Generate a normalized uri string.
+    pub(crate) fn generate_normalized_uri(
+        &self, mut logger: Option<Logger>,
+    ) -> (Option<Bstr>, Option<Bstr>) {
+        // On the first pass determine the length of the final bstrs
+        let mut partial_len = 0usize;
+        let mut complete_len = 0usize;
+        complete_len = complete_len.wrapping_add(
+            self.scheme
+                .as_ref()
+                .map(|scheme| scheme.len() + 3)
+                .unwrap_or(0),
+        ); // '://'
+        complete_len = complete_len.wrapping_add(
+            self.username
+                .as_ref()
+                .map(|username| username.len())
+                .unwrap_or(0),
+        );
+        complete_len = complete_len.wrapping_add(
+            self.password
+                .as_ref()
+                .map(|password| password.len())
+                .unwrap_or(0),
+        );
+        if self.username.is_some() || self.password.is_some() {
+            complete_len = complete_len.wrapping_add(2); // ':' and '@'
+        }
+        complete_len = complete_len.wrapping_add(
+            self.hostname
+                .as_ref()
+                .map(|hostname| hostname.len())
+                .unwrap_or(0),
+        );
+        complete_len =
+            complete_len.wrapping_add(self.port.as_ref().map(|port| port.len()).unwrap_or(0)); // ':'
+        partial_len =
+            partial_len.wrapping_add(self.path.as_ref().map(|path| path.len()).unwrap_or(0));
+        partial_len = partial_len.wrapping_add(
+            self.query
+                .as_ref()
+                .map(|query| query.len() + 1)
+                .unwrap_or(0),
+        ); // ?
+        partial_len = partial_len.wrapping_add(
+            self.fragment
+                .as_ref()
+                .map(|fragment| fragment.len() + 1)
+                .unwrap_or(0),
+        ); // #
+        complete_len = complete_len.wrapping_add(partial_len);
+        // On the second pass construct the string
+        let mut normalized_uri = Bstr::with_capacity(complete_len);
+        let mut partial_normalized_uri = Bstr::with_capacity(partial_len);
+
+        if let Some(scheme) = self.scheme.as_ref() {
+            normalized_uri.add(scheme.as_slice());
+            normalized_uri.add("://");
+        }
+        if self.username.is_some() || self.password.is_some() {
+            if let Some(username) = self.username.as_ref() {
+                normalized_uri.add(username.as_slice());
+            }
+            normalized_uri.add(":");
+            if let Some(password) = self.password.as_ref() {
+                normalized_uri.add(password.as_slice());
+            }
+            normalized_uri.add("@");
+        }
+        if let Some(hostname) = self.hostname.as_ref() {
+            normalized_uri.add(hostname.as_slice());
+        }
+        if let Some(port) = self.port.as_ref() {
+            normalized_uri.add(":");
+            normalized_uri.add(port.as_slice());
+        }
+        if let Some(mut path) = self.path.clone() {
+            // Path is already decoded when we parsed the uri in transaction, only decode once more
+            if self.cfg.double_decode_normalized_path {
+                let path_len = path.len();
+                let _ = decode_uri_inplace(&self.cfg, &mut path);
+                if path_len > path.len() {
+                    if let Some(logger) = logger.as_mut() {
+                        htp_warn!(
+                            logger,
+                            HtpLogCode::DOUBLE_ENCODED_URI,
+                            "URI path is double encoded"
+                        );
+                    }
+                }
+            }
+            partial_normalized_uri.add(path.as_slice());
+        }
+        if let Some(mut query) = self.query.clone() {
+            let _ = decode_uri_inplace(&self.cfg, &mut query);
+            if self.cfg.double_decode_normalized_query {
+                let query_len = query.len();
+                let _ = decode_uri_inplace(&self.cfg, &mut query);
+                if query_len > query.len() {
+                    if let Some(logger) = logger.as_mut() {
+                        htp_warn!(
+                            logger,
+                            HtpLogCode::DOUBLE_ENCODED_URI,
+                            "URI query is double encoded"
+                        );
+                    }
+                }
+            }
+            partial_normalized_uri.add("?");
+            partial_normalized_uri.add(query.as_slice());
+        }
+        if let Some(fragment) = self.fragment.as_ref() {
+            partial_normalized_uri.add("#");
+            partial_normalized_uri.add(fragment.as_slice());
+        }
+        normalized_uri.add(partial_normalized_uri.as_slice());
+        if !normalized_uri.is_empty() {
+            if !partial_normalized_uri.is_empty() {
+                (Some(partial_normalized_uri), Some(normalized_uri))
+            } else {
+                (None, Some(normalized_uri))
+            }
+        } else {
+            (None, None)
+        }
+    }
+}
+
+enum NormUriState {
+    Start,
+    DotStart,
+    TwoDotStart,
+    Slash,
+    SlashDot,
+    SlashDotDot,
+    SlashDotDotSlash,
+    Regular,
+}
+
+/// Normalize URI path in place. This function implements the remove dot segments algorithm
+/// specified in RFC 3986, section 5.2.4.
+fn normalize_uri_path_inplace(s: &mut Bstr) {
+    let mut state = NormUriState::Start;
+    let slen = s.len();
+    let mut w = 0;
+    for i in 0..slen {
+        let c = s[i];
+        match state {
+            NormUriState::Start => match c {
+                b'.' => {
+                    state = NormUriState::DotStart;
+                }
+                b'/' => {
+                    state = NormUriState::Slash;
+                }
+                _ => {
+                    s[w] = c;
+                    w += 1;
+                    state = NormUriState::Regular;
+                }
+            },
+            NormUriState::DotStart => {
+                match c {
+                    b'.' => {
+                        state = NormUriState::TwoDotStart;
+                    }
+                    b'/' => {
+                        // If the input buffer begins with a prefix of "./", then remove that prefix
+                        state = NormUriState::Start;
+                    }
+                    _ => {
+                        s[w] = b'.';
+                        w += 1;
+                        s[w] = c;
+                        w += 1;
+                        state = NormUriState::Regular;
+                    }
+                }
+            }
+            NormUriState::TwoDotStart => {
+                match c {
+                    b'/' => {
+                        // If the input buffer begins with a prefix of "../", then remove that prefix
+                        state = NormUriState::Start;
+                    }
+                    _ => {
+                        s[w] = b'.';
+                        w += 1;
+                        s[w] = b'.';
+                        w += 1;
+                        s[w] = c;
+                        w += 1;
+                        state = NormUriState::Regular;
+                    }
+                }
+            }
+            NormUriState::Slash | NormUriState::SlashDotDotSlash => match c {
+                b'.' => {
+                    state = NormUriState::SlashDot;
+                }
+                _ => {
+                    s[w] = b'/';
+                    w += 1;
+                    s[w] = c;
+                    w += 1;
+                    state = NormUriState::Regular;
+                }
+            },
+            NormUriState::SlashDot => match c {
+                b'/' => {
+                    // /./ turns into /
+                    state = NormUriState::SlashDotDotSlash;
+                }
+                b'.' => {
+                    state = NormUriState::SlashDotDot;
+                }
+                _ => {
+                    s[w] = b'/';
+                    w += 1;
+                    s[w] = b'.';
+                    w += 1;
+                    s[w] = c;
+                    w += 1;
+                    state = NormUriState::Regular;
+                }
+            },
+            NormUriState::SlashDotDot => match c {
+                b'/' => {
+                    while w > 0 && s[w - 1] != b'/' {
+                        w -= 1;
+                    }
+                    w = w.saturating_sub(1);
+                    state = NormUriState::SlashDotDotSlash;
+                }
+                _ => {
+                    s[w] = b'/';
+                    w += 1;
+                    s[w] = b'.';
+                    w += 1;
+                    s[w] = b'.';
+                    w += 1;
+                    s[w] = c;
+                    w += 1;
+                    state = NormUriState::Regular;
+                }
+            },
+            NormUriState::Regular => match c {
+                b'/' => {
+                    state = NormUriState::Slash;
+                }
+                _ => {
+                    s[w] = c;
+                    w += 1;
+                }
+            },
+        }
+    }
+    match state {
+        NormUriState::Slash => {
+            s[w] = b'/';
+            w += 1;
+        }
+        // if the input buffer consists only of "." or "..", then remove that
+        NormUriState::DotStart | NormUriState::TwoDotStart => {}
+        // if the input buffer ends with "/." or "/..", remove that
+        NormUriState::SlashDot => {}
+        // we already erased the previous part, and do not add the trailing slash
+        NormUriState::SlashDotDotSlash => {}
+        NormUriState::SlashDotDot => {
+            while w > 0 && s[w - 1] != b'/' {
+                w -= 1;
+            }
+            w = w.saturating_sub(1);
+        }
+        // nothing special to do
+        NormUriState::Start | NormUriState::Regular => {}
+    }
+    s.truncate(w);
+}
+
+//Tests
+#[cfg(test)]
+mod test {
+    use super::*;
+    use rstest::rstest;
+    #[rstest]
+    #[case::no_port(b"http://user:pass@www.example.com:1234/path1/path2?a=b&c=d#frag",
+    Some("http://user:pass@www.example.com:1234/path1/path2?a=b&c=d#frag"),
+    Some("/path1/path2?a=b&c=d#frag"),
+        Uri {
+                cfg: DecoderConfig::default(),
+                scheme: Some(Bstr::from("http")),
+                username: Some(Bstr::from("user")),
+                password: Some(Bstr::from("pass")),
+                hostname: Some(Bstr::from("www.example.com")),
+                port: Some(Bstr::from("1234")),
+                port_number: None,
+                path: Some(Bstr::from("/path1/path2")),
+                query: Some(Bstr::from("a=b&c=d")),
+                fragment: Some(Bstr::from("frag")),
+        })]
+    #[case::scheme_hostname_path(b"http://host.com/path",
+    Some("http://host.com/path"),
+    Some("/path"),
+            Uri {
+                cfg: DecoderConfig::default(),
+                scheme: Some(Bstr::from("http")),
+                username: None,
+                password: None,
+                hostname: Some(Bstr::from("host.com")),
+                port: None,
+                port_number: None,
+                path: Some(Bstr::from("/path")),
+                query: None,
+                fragment: None,
+            })]
+    #[case::scheme_hostname(b"http://host.com",
+    Some("http://host.com"),
+    None,
+            Uri {
+                cfg: DecoderConfig::default(),
+                scheme: Some(Bstr::from("http")),
+                username: None,
+                password: None,
+                hostname: Some(Bstr::from("host.com")),
+                port: None,
+                port_number: None,
+                path: None,
+                query: None,
+                fragment: None,
+            })]
+    #[case::scheme_path(b"http://",
+    Some("http:////"),
+    Some("//"),
+            Uri {
+                cfg: DecoderConfig::default(),
+                scheme: Some(Bstr::from("http")),
+                username: None,
+                password: None,
+                hostname: None,
+                port: None,
+                port_number: None,
+                path: Some(Bstr::from("//")),
+                query: None,
+                fragment: None,
+            })]
+    #[case::path(b"/path",
+    Some("/path"),
+    Some("/path"),
+            Uri {
+                cfg: DecoderConfig::default(),
+                scheme: None,
+                username: None,
+                password: None,
+                hostname: None,
+                port: None,
+                port_number: None,
+                path: Some(Bstr::from("/path")),
+                query: None,
+                fragment: None,
+            })]
+    #[case::empty_scheme_path(b"://",
+    Some(":////"),
+    Some("//"),
+            Uri {
+                cfg: DecoderConfig::default(),
+                scheme: Some(Bstr::from("")),
+                username: None,
+                password: None,
+                hostname: None,
+                port: None,
+                port_number: None,
+                path: Some(Bstr::from("//")),
+                query: None,
+                fragment: None,
+            })]
+    #[case::empty(b"", None, None, Uri::default())]
+    #[case::scheme_user_host(b"http://user@host.com",
+    Some("http://user:@host.com"),
+    None,
+            Uri {
+                cfg: DecoderConfig::default(),
+                scheme: Some(Bstr::from("http")),
+                username: Some(Bstr::from("user")),
+                password: None,
+                hostname: Some(Bstr::from("host.com")),
+                port: None,
+                port_number: None,
+                path: None,
+                query: None,
+                fragment: None,
+            })]
+    fn test_parse_uri(
+        #[case] input: &[u8], #[case] expected_normalized: Option<&str>,
+        #[case] expected_partial: Option<&str>, #[case] expected: Uri,
+    ) {
+        let mut uri = Uri::default();
+        uri.parse_uri(input);
+        assert_eq!(uri.scheme, expected.scheme);
+        assert_eq!(uri.username, expected.username);
+        assert_eq!(uri.password, expected.password);
+        assert_eq!(uri.hostname, expected.hostname);
+        assert_eq!(uri.port, expected.port);
+        assert_eq!(uri.path, expected.path);
+        assert_eq!(uri.query, expected.query);
+        assert_eq!(uri.fragment, expected.fragment);
+        assert_eq!(
+            uri.generate_normalized_uri(None),
+            (
+                expected_partial.map(Bstr::from),
+                expected_normalized.map(Bstr::from)
+            )
+        );
+    }
+
+    #[rstest]
+    #[case(b"/a/b/c/./../../g", b"/a/g")]
+    #[case(b"mid/content=5/../6", b"mid/6")]
+    #[case(b"./one", b"one")]
+    #[case(b"../one", b"one")]
+    #[case(b".", b"")]
+    #[case(b"..", b"")]
+    #[case(b"one/.", b"one")]
+    #[case(b"one/..", b"")]
+    #[case(b"/", b"/")]
+    #[case(b"one/../", b"")]
+    #[case(b"/../../../images.gif", b"/images.gif")]
+    fn test_normalize_uri_path(#[case] input: &[u8], #[case] expected: &[u8]) {
+        let mut s = Bstr::from(input);
+        normalize_uri_path_inplace(&mut s);
+        assert!(s.eq_slice(expected))
+    }
+}
diff --git a/rust/htp/src/urlencoded.rs b/rust/htp/src/urlencoded.rs
new file mode 100644 (file)
index 0000000..21a2be1
--- /dev/null
@@ -0,0 +1,1089 @@
+use crate::{
+    bstr::Bstr,
+    config::{DecoderConfig, HtpUnwanted, HtpUrlEncodingHandling},
+    error::Result,
+    util::{FlagOperations, HtpFlags},
+};
+
+use nom::{
+    branch::alt,
+    bytes::complete::{tag_no_case, take, take_while_m_n},
+    character::complete::char,
+    combinator::{map, not},
+    multi::fold_many0,
+    number::complete::be_u8,
+    sequence::tuple,
+    IResult,
+};
+
+/// Convert two input bytes, pointed to by the pointer parameter,
+/// into a single byte by assuming the input consists of hexadecimal
+/// characters. This function will happily convert invalid input.
+///
+/// Returns hex-decoded byte
+fn x2c(input: &[u8]) -> IResult<&[u8], u8> {
+    let (input, (c1, c2)) = tuple((be_u8, be_u8))(input)?;
+    let mut decoded_byte = if c1 >= b'A' {
+        ((c1 & 0xdf) - b'A') + 10
+    } else {
+        c1 - b'0'
+    };
+    decoded_byte = (decoded_byte as i32 * 16) as u8;
+    decoded_byte += if c2 >= b'A' {
+        ((c2 & 0xdf) - b'A') + 10
+    } else {
+        c2 - b'0'
+    };
+    Ok((input, decoded_byte))
+}
+
+/// Decode a path %u-encoded character, using best-fit mapping as necessary.
+///
+/// Sets i to decoded byte
+fn path_decode_u_encoding<'a>(
+    i: &'a [u8], cfg: &DecoderConfig,
+) -> IResult<&'a [u8], (u8, u64, HtpUnwanted)> {
+    let mut flags = 0;
+    let mut expected_status_code = HtpUnwanted::Ignore;
+    let (i, c1) = x2c(i)?;
+    let (i, c2) = x2c(i)?;
+    let mut r = c2;
+    if c1 == 0 {
+        flags.set(HtpFlags::PATH_OVERLONG_U)
+    } else {
+        // Check for fullwidth form evasion
+        if c1 == 0xff {
+            flags.set(HtpFlags::PATH_HALF_FULL_RANGE)
+        }
+        expected_status_code = cfg.u_encoding_unwanted;
+        // Use best-fit mapping
+        r = cfg.bestfit_map.get(bestfit_key!(c1, c2));
+    }
+    // Check for encoded path separators
+    if r == b'/' || cfg.backslash_convert_slashes && r == b'\\' {
+        flags.set(HtpFlags::PATH_ENCODED_SEPARATOR)
+    }
+    Ok((i, (r, flags, expected_status_code)))
+}
+
+/// Decode a %u-encoded character, using best-fit mapping as necessary. Params version.
+///
+/// Returns decoded byte
+fn decode_u_encoding_params<'a>(i: &'a [u8], cfg: &DecoderConfig) -> IResult<&'a [u8], (u8, u64)> {
+    let (i, c1) = x2c(i)?;
+    let (i, c2) = x2c(i)?;
+    let mut flags = 0;
+    // Check for overlong usage first.
+    if c1 == 0 {
+        flags.set(HtpFlags::URLEN_OVERLONG_U);
+        return Ok((i, (c2, flags)));
+    }
+    // Both bytes were used.
+    // Detect half-width and full-width range.
+    if c1 == 0xff && c2 <= 0xef {
+        flags.set(HtpFlags::URLEN_HALF_FULL_RANGE)
+    }
+    // Use best-fit mapping.
+    Ok((i, (cfg.bestfit_map.get(bestfit_key!(c1, c2)), flags)))
+}
+
+struct UrlParseResult {
+    byte: u8,
+    expected_status_code: HtpUnwanted,
+    flags: u64,
+    decode: bool,
+}
+
+/// Decodes path valid uencoded params according to the given cfg settings.
+///
+/// Returns decoded byte, corresponding status code, appropriate flags and whether the byte should be output.
+fn path_decode_valid_u_encoding(
+    cfg: &DecoderConfig,
+) -> impl Fn(&[u8]) -> IResult<&[u8], UrlParseResult> + '_ {
+    move |remaining_input| {
+        let (left, _) = tag_no_case("u")(remaining_input)?;
+        let mut output = remaining_input;
+        let mut byte = b'%';
+        let mut flags = 0;
+        let mut expected_status_code = HtpUnwanted::Ignore;
+        if cfg.u_encoding_decode {
+            let (left, hex) = take_while_m_n(4, 4, |c: u8| c.is_ascii_hexdigit())(left)?;
+            output = left;
+            expected_status_code = cfg.u_encoding_unwanted;
+            // Decode a valid %u encoding.
+            let (_, (b, f, c)) = path_decode_u_encoding(hex, cfg)?;
+            byte = b;
+            flags.set(f);
+            if c != HtpUnwanted::Ignore {
+                expected_status_code = c;
+            }
+            if byte == 0 {
+                flags.set(HtpFlags::PATH_ENCODED_NUL);
+                if cfg.nul_encoded_unwanted != HtpUnwanted::Ignore {
+                    expected_status_code = cfg.nul_encoded_unwanted
+                }
+                if cfg.nul_encoded_terminates {
+                    // Terminate the path at the raw NUL byte.
+                    return Ok((
+                        b"",
+                        UrlParseResult {
+                            byte,
+                            expected_status_code,
+                            flags,
+                            decode: false,
+                        },
+                    ));
+                }
+            }
+        }
+        let (byte, code) = path_decode_control(byte, cfg);
+        if code != HtpUnwanted::Ignore {
+            expected_status_code = code;
+        }
+        Ok((
+            output,
+            UrlParseResult {
+                byte,
+                expected_status_code,
+                flags,
+                decode: true,
+            },
+        ))
+    }
+}
+
+/// Decodes path invalid uencoded params according to the given cfg settings.
+///
+/// Returns decoded byte, corresponding status code, appropriate flags and whether the byte should be output.
+fn path_decode_invalid_u_encoding(
+    cfg: &DecoderConfig,
+) -> impl Fn(&[u8]) -> IResult<&[u8], UrlParseResult> + '_ {
+    move |remaining_input| {
+        let mut output = remaining_input;
+        let mut byte = b'%';
+        let mut flags = 0;
+        let mut expected_status_code = HtpUnwanted::Ignore;
+        let (left, _) = tag_no_case("u")(remaining_input)?;
+        if cfg.u_encoding_decode {
+            let (left, hex) = take(4usize)(left)?;
+            // Invalid %u encoding
+            flags = HtpFlags::PATH_INVALID_ENCODING;
+            expected_status_code = cfg.url_encoding_invalid_unwanted;
+            if cfg.url_encoding_invalid_handling == HtpUrlEncodingHandling::REMOVE_PERCENT {
+                // Do not place anything in output; consume the %.
+                return Ok((
+                    remaining_input,
+                    UrlParseResult {
+                        byte,
+                        expected_status_code,
+                        flags,
+                        decode: false,
+                    },
+                ));
+            } else if cfg.url_encoding_invalid_handling == HtpUrlEncodingHandling::PROCESS_INVALID {
+                let (_, (b, f, c)) = path_decode_u_encoding(hex, cfg)?;
+                if c != HtpUnwanted::Ignore {
+                    expected_status_code = c;
+                }
+                flags.set(f);
+                byte = b;
+                output = left;
+            }
+        }
+        let (byte, code) = path_decode_control(byte, cfg);
+        if code != HtpUnwanted::Ignore {
+            expected_status_code = code;
+        }
+        Ok((
+            output,
+            UrlParseResult {
+                byte,
+                expected_status_code,
+                flags,
+                decode: true,
+            },
+        ))
+    }
+}
+
+/// Decodes path valid hex according to the given cfg settings.
+///
+/// Returns decoded byte, corresponding status code, appropriate flags and whether the byte should be output.
+fn path_decode_valid_hex(
+    cfg: &DecoderConfig,
+) -> impl Fn(&[u8]) -> IResult<&[u8], UrlParseResult> + '_ {
+    move |remaining_input| {
+        let original_remaining = remaining_input;
+        // Valid encoding (2 xbytes)
+        not(tag_no_case("u"))(remaining_input)?;
+        let (mut left, hex) = take_while_m_n(2, 2, |c: u8| c.is_ascii_hexdigit())(remaining_input)?;
+        let mut flags = 0;
+        // Convert from hex.
+        let (_, mut byte) = x2c(hex)?;
+        if byte == 0 {
+            flags.set(HtpFlags::PATH_ENCODED_NUL);
+            if cfg.nul_encoded_terminates {
+                // Terminate the path at the raw NUL byte.
+                return Ok((
+                    b"",
+                    UrlParseResult {
+                        byte,
+                        expected_status_code: cfg.nul_encoded_unwanted,
+                        flags,
+                        decode: false,
+                    },
+                ));
+            }
+        }
+        if byte == b'/' || (cfg.backslash_convert_slashes && byte == b'\\') {
+            flags.set(HtpFlags::PATH_ENCODED_SEPARATOR);
+            if !cfg.path_separators_decode {
+                // Leave encoded
+                byte = b'%';
+                left = original_remaining;
+            }
+        }
+        let (byte, expected_status_code) = path_decode_control(byte, cfg);
+        Ok((
+            left,
+            UrlParseResult {
+                byte,
+                expected_status_code,
+                flags,
+                decode: true,
+            },
+        ))
+    }
+}
+
+/// Decodes invalid path hex according to the given cfg settings.
+///
+/// Returns decoded byte, corresponding status code, appropriate flags and whether the byte should be output.
+fn path_decode_invalid_hex(
+    cfg: &DecoderConfig,
+) -> impl Fn(&[u8]) -> IResult<&[u8], UrlParseResult> + '_ {
+    move |remaining_input| {
+        let mut remaining = remaining_input;
+        // Valid encoding (2 xbytes)
+        not(tag_no_case("u"))(remaining_input)?;
+        let (left, hex) = take(2usize)(remaining_input)?;
+        let mut byte = b'%';
+        // Invalid encoding
+        let flags = HtpFlags::PATH_INVALID_ENCODING;
+        let expected_status_code = cfg.url_encoding_invalid_unwanted;
+        if cfg.url_encoding_invalid_handling == HtpUrlEncodingHandling::REMOVE_PERCENT {
+            // Do not place anything in output; consume the %.
+            return Ok((
+                remaining_input,
+                UrlParseResult {
+                    byte,
+                    expected_status_code,
+                    flags,
+                    decode: false,
+                },
+            ));
+        } else if cfg.url_encoding_invalid_handling == HtpUrlEncodingHandling::PROCESS_INVALID {
+            // Decode
+            let (_, b) = x2c(hex)?;
+            remaining = left;
+            byte = b;
+        }
+        let (byte, expected_status_code) = path_decode_control(byte, cfg);
+        Ok((
+            remaining,
+            UrlParseResult {
+                byte,
+                expected_status_code,
+                flags,
+                decode: true,
+            },
+        ))
+    }
+}
+
+/// If the first byte of the input path string is a '%', it attempts to decode according to the
+/// configuration specified by cfg. Various flags (HTP_PATH_*) might be set. If something in the
+/// input would cause a particular server to respond with an error, the appropriate status
+/// code will be set.
+///
+/// Returns decoded byte, corresponding status code, appropriate flags and whether the byte should be output.
+fn path_decode_percent(
+    cfg: &DecoderConfig,
+) -> impl Fn(&[u8]) -> IResult<&[u8], UrlParseResult> + '_ {
+    move |i| {
+        map(
+            tuple((
+                char('%'),
+                alt((
+                    path_decode_valid_u_encoding(cfg),
+                    path_decode_invalid_u_encoding(cfg),
+                    move |remaining_input| {
+                        let (_, _) = tag_no_case("u")(remaining_input)?;
+                        // Incomplete invalid %u encoding
+                        Ok((
+                            remaining_input,
+                            UrlParseResult {
+                                byte: b'%',
+                                expected_status_code: cfg.url_encoding_invalid_unwanted,
+                                flags: HtpFlags::PATH_INVALID_ENCODING,
+                                decode: cfg.url_encoding_invalid_handling
+                                    != HtpUrlEncodingHandling::REMOVE_PERCENT,
+                            },
+                        ))
+                    },
+                    path_decode_valid_hex(cfg),
+                    path_decode_invalid_hex(cfg),
+                    move |remaining_input| {
+                        // Invalid URL encoding (not even 2 bytes of data)
+                        Ok((
+                            remaining_input,
+                            UrlParseResult {
+                                byte: b'%',
+                                expected_status_code: cfg.url_encoding_invalid_unwanted,
+                                flags: HtpFlags::PATH_INVALID_ENCODING,
+                                decode: cfg.url_encoding_invalid_handling
+                                    != HtpUrlEncodingHandling::REMOVE_PERCENT,
+                            },
+                        ))
+                    },
+                )),
+            )),
+            |(_, result)| result,
+        )(i)
+    }
+}
+
+/// Assumes the input is already decoded and checks if it is null byte or control character, handling each
+/// according to the decoder configurations settings.
+///
+/// Returns parsed byte, corresponding status code, appropriate flags and whether the byte should be output.
+fn path_parse_other(cfg: &DecoderConfig) -> impl Fn(&[u8]) -> IResult<&[u8], UrlParseResult> + '_ {
+    move |i| {
+        let (remaining_input, byte) = be_u8(i)?;
+        // One non-encoded byte.
+        // Did we get a raw NUL byte?
+        if byte == 0 && cfg.nul_raw_terminates {
+            // Terminate the path at the encoded NUL byte.
+            return Ok((
+                b"",
+                UrlParseResult {
+                    byte,
+                    expected_status_code: cfg.nul_raw_unwanted,
+                    flags: 0,
+                    decode: false,
+                },
+            ));
+        }
+        let (byte, expected_status_code) = path_decode_control(byte, cfg);
+        Ok((
+            remaining_input,
+            UrlParseResult {
+                byte,
+                expected_status_code,
+                flags: 0,
+                decode: true,
+            },
+        ))
+    }
+}
+/// Checks for control characters and converts them according to the cfg settings
+///
+/// Returns decoded byte and expected_status_code
+fn path_decode_control(mut byte: u8, cfg: &DecoderConfig) -> (u8, HtpUnwanted) {
+    // Note: What if an invalid encoding decodes into a path
+    //       separator? This is theoretical at the moment, because
+    //       the only platform we know doesn't convert separators is
+    //       Apache, who will also respond with 400 if invalid encoding
+    //       is encountered. Thus no check for a separator here.
+    // Place the character into output
+    // Check for control characters
+    let expected_status_code = if byte < 0x20 {
+        cfg.control_chars_unwanted
+    } else {
+        HtpUnwanted::Ignore
+    };
+    // Convert backslashes to forward slashes, if necessary
+    if byte == b'\\' && cfg.backslash_convert_slashes {
+        byte = b'/'
+    }
+    // Lowercase characters, if necessary
+    if cfg.convert_lowercase {
+        byte = byte.to_ascii_lowercase()
+    }
+    (byte, expected_status_code)
+}
+
+/// Performs decoding of the input path uri string, according to the configuration specified
+/// by cfg. Various flags (HTP_PATH_*) might be set. If something in the input would
+/// cause a particular server to respond with an error, the appropriate status
+/// code will be set.
+///
+/// Returns decoded bytes, flags set during decoding, and corresponding status code
+fn path_decode_uri<'a>(
+    input: &'a [u8], cfg: &DecoderConfig,
+) -> IResult<&'a [u8], (Vec<u8>, u64, HtpUnwanted)> {
+    fold_many0(
+        alt((path_decode_percent(cfg), path_parse_other(cfg))),
+        || (Vec::new(), 0, HtpUnwanted::Ignore),
+        |mut acc: (Vec<_>, u64, HtpUnwanted), upr| {
+            // If we're compressing separators then we need
+            // to check if the previous character was a separator
+            if upr.decode {
+                // insert
+                if upr.byte == b'/' && cfg.path_separators_compress {
+                    if !acc.0.is_empty() {
+                        if acc.0[acc.0.len() - 1] != b'/' {
+                            acc.0.push(upr.byte);
+                        }
+                    } else {
+                        acc.0.push(upr.byte);
+                    }
+                } else {
+                    acc.0.push(upr.byte);
+                }
+            }
+            acc.1.set(upr.flags);
+            acc.2 = upr.expected_status_code;
+            acc
+        },
+    )(input)
+}
+
+/// Decode the parsed uri path inplace according to the settings in the
+/// transaction configuration structure.
+pub(crate) fn path_decode_uri_inplace(
+    decoder_cfg: &DecoderConfig, flag: &mut u64, status: &mut HtpUnwanted, path: &mut Bstr,
+) {
+    if let Ok((_, (consumed, flags, expected_status_code))) =
+        path_decode_uri(path.as_slice(), decoder_cfg)
+    {
+        path.clear();
+        path.add(consumed.as_slice());
+        *status = expected_status_code;
+        flag.set(flags);
+    }
+}
+
+/// Performs decoding of the input uri string, according to the configuration specified
+/// by cfg. Various flags (HTP_URLEN_*) might be set. If something in the input would
+/// cause a particular server to respond with an error, the appropriate status
+/// code will be set.
+///
+/// Returns decoded bytes, flags set during decoding, and corresponding status code
+fn decode_uri<'a>(
+    input: &'a [u8], cfg: &DecoderConfig,
+) -> IResult<&'a [u8], (Vec<u8>, u64, HtpUnwanted)> {
+    fold_many0(
+        alt((decode_percent(cfg), decode_plus(cfg), unencoded_byte(cfg))),
+        || (Vec::new(), 0, HtpUnwanted::Ignore),
+        |mut acc: (Vec<_>, u64, HtpUnwanted), upr| {
+            if upr.decode {
+                acc.0.push(upr.byte);
+            }
+            acc.1.set(upr.flags);
+            if upr.expected_status_code != HtpUnwanted::Ignore {
+                acc.2 = upr.expected_status_code;
+            }
+            acc
+        },
+    )(input)
+}
+
+/// Performs decoding of the uri string, according to the configuration specified
+/// by cfg. Various flags might be set.
+pub(crate) fn decode_uri_with_flags(
+    decoder_cfg: &DecoderConfig, flags: &mut u64, input: &[u8],
+) -> Result<Bstr> {
+    let (_, (consumed, f, _)) = decode_uri(input, decoder_cfg)?;
+    if f.is_set(HtpFlags::URLEN_INVALID_ENCODING) {
+        flags.set(HtpFlags::PATH_INVALID_ENCODING)
+    }
+    if f.is_set(HtpFlags::URLEN_ENCODED_NUL) {
+        flags.set(HtpFlags::PATH_ENCODED_NUL)
+    }
+    if f.is_set(HtpFlags::URLEN_RAW_NUL) {
+        flags.set(HtpFlags::PATH_RAW_NUL);
+    }
+    Ok(Bstr::from(consumed))
+}
+
+/// Performs in-place decoding of the input uri string, according to the configuration specified by cfg and ctx.
+///
+/// Returns OK on success, ERROR on failure.
+pub(crate) fn decode_uri_inplace(cfg: &DecoderConfig, input: &mut Bstr) -> Result<()> {
+    let (_, (consumed, _, _)) = decode_uri(input.as_slice(), cfg)?;
+    (*input).clear();
+    input.add(consumed.as_slice());
+    Ok(())
+}
+
+/// Decodes valid uencoded hex bytes according to the given cfg settings.
+/// e.g. "u0064" -> "d"
+///
+/// Returns decoded byte, corresponding status code, appropriate flags and whether the byte should be output.
+fn decode_valid_u_encoding(
+    cfg: &DecoderConfig,
+) -> impl Fn(&[u8]) -> IResult<&[u8], UrlParseResult> + '_ {
+    move |input| {
+        let (left, _) = alt((char('u'), char('U')))(input)?;
+        if cfg.u_encoding_decode {
+            let (input, hex) = take_while_m_n(4, 4, |c: u8| c.is_ascii_hexdigit())(left)?;
+            let (_, (byte, flags)) = decode_u_encoding_params(hex, cfg)?;
+            return Ok((
+                input,
+                UrlParseResult {
+                    byte,
+                    expected_status_code: cfg.u_encoding_unwanted,
+                    flags,
+                    decode: true,
+                },
+            ));
+        }
+        Ok((
+            input,
+            UrlParseResult {
+                byte: b'%',
+                expected_status_code: HtpUnwanted::Ignore,
+                flags: 0,
+                decode: true,
+            },
+        ))
+    }
+}
+
+/// Decodes invalid uencoded params according to the given cfg settings.
+/// e.g. "u00}9" -> "i"
+///
+/// Returns decoded byte, corresponding status code, appropriate flags and whether the byte should be output.
+fn decode_invalid_u_encoding(
+    cfg: &DecoderConfig,
+) -> impl Fn(&[u8]) -> IResult<&[u8], UrlParseResult> + '_ {
+    move |mut input| {
+        let (left, _) = alt((char('u'), char('U')))(input)?;
+        let mut byte = b'%';
+        let mut code = HtpUnwanted::Ignore;
+        let mut flags = 0;
+        let mut insert = true;
+        if cfg.u_encoding_decode {
+            // Invalid %u encoding (could not find 4 xdigits).
+            let (left, invalid_hex) = take(4usize)(left)?;
+            flags.set(HtpFlags::URLEN_INVALID_ENCODING);
+            code = if cfg.url_encoding_invalid_unwanted != HtpUnwanted::Ignore {
+                cfg.url_encoding_invalid_unwanted
+            } else {
+                cfg.u_encoding_unwanted
+            };
+            if cfg.url_encoding_invalid_handling == HtpUrlEncodingHandling::REMOVE_PERCENT {
+                // Do not place anything in output; consume the %.
+                insert = false;
+            } else if cfg.url_encoding_invalid_handling == HtpUrlEncodingHandling::PROCESS_INVALID {
+                let (_, (b, f)) = decode_u_encoding_params(invalid_hex, cfg)?;
+                flags.set(f);
+                byte = b;
+                input = left;
+            }
+        }
+        Ok((
+            input,
+            UrlParseResult {
+                byte,
+                expected_status_code: code,
+                flags,
+                decode: insert,
+            },
+        ))
+    }
+}
+
+/// Decodes valid hex byte.
+///  e.g. "2f" -> "/"
+///
+/// Returns decoded byte, corresponding status code, appropriate flags and whether the byte should be output.
+fn decode_valid_hex() -> impl Fn(&[u8]) -> IResult<&[u8], UrlParseResult> {
+    move |input| {
+        // Valid encoding (2 xbytes)
+        not(alt((char('u'), char('U'))))(input)?;
+        let (input, hex) = take_while_m_n(2, 2, |c: u8| c.is_ascii_hexdigit())(input)?;
+        let (_, byte) = x2c(hex)?;
+        Ok((
+            input,
+            UrlParseResult {
+                byte,
+                expected_status_code: HtpUnwanted::Ignore,
+                flags: 0,
+                decode: true,
+            },
+        ))
+    }
+}
+
+/// Decodes invalid hex byte according to the given cfg settings.
+/// e.g. "}9" -> "i"
+///
+/// Returns decoded byte, corresponding status code, appropriate flags and whether the byte should be output.
+fn decode_invalid_hex(
+    cfg: &DecoderConfig,
+) -> impl Fn(&[u8]) -> IResult<&[u8], UrlParseResult> + '_ {
+    move |mut input| {
+        not(alt((char('u'), char('U'))))(input)?;
+        // Invalid encoding (2 bytes, but not hexadecimal digits).
+        let mut byte = b'%';
+        let mut insert = true;
+        if cfg.url_encoding_invalid_handling == HtpUrlEncodingHandling::REMOVE_PERCENT {
+            // Do not place anything in output; consume the %.
+            insert = false;
+        } else if cfg.url_encoding_invalid_handling == HtpUrlEncodingHandling::PROCESS_INVALID {
+            let (left, b) = x2c(input)?;
+            input = left;
+            byte = b;
+        }
+        Ok((
+            input,
+            UrlParseResult {
+                byte,
+                expected_status_code: cfg.url_encoding_invalid_unwanted,
+                flags: HtpFlags::URLEN_INVALID_ENCODING,
+                decode: insert,
+            },
+        ))
+    }
+}
+
+/// If the first byte of the input string is a '%', it attempts to decode according to the
+/// configuration specified by cfg. Various flags (HTP_URLEN_*) might be set. If something in the
+/// input would cause a particular server to respond with an error, the appropriate status
+/// code will be set.
+///
+/// Returns decoded byte, corresponding status code, appropriate flags and whether the byte should be output.
+fn decode_percent(cfg: &DecoderConfig) -> impl Fn(&[u8]) -> IResult<&[u8], UrlParseResult> + '_ {
+    move |i| {
+        let (input, _) = char('%')(i)?;
+        let (input, upr) = alt((
+            decode_valid_u_encoding(cfg),
+            decode_invalid_u_encoding(cfg),
+            decode_valid_hex(),
+            decode_invalid_hex(cfg),
+            move |input| {
+                // Invalid %u encoding; not enough data. (not even 2 bytes)
+                // Do not place anything in output if REMOVE_PERCENT; consume the %.
+                Ok((
+                    input,
+                    UrlParseResult {
+                        byte: b'%',
+                        expected_status_code: cfg.url_encoding_invalid_unwanted,
+                        flags: HtpFlags::URLEN_INVALID_ENCODING,
+                        decode: !(cfg.url_encoding_invalid_handling
+                            == HtpUrlEncodingHandling::REMOVE_PERCENT),
+                    },
+                ))
+            },
+        ))(input)?;
+        //Did we get an encoded NUL byte?
+        if upr.byte == 0 {
+            let flags = upr.flags | HtpFlags::URLEN_ENCODED_NUL;
+            let mut expected_status_code = upr.expected_status_code;
+            if cfg.nul_encoded_unwanted != HtpUnwanted::Ignore {
+                expected_status_code = cfg.nul_encoded_unwanted
+            }
+            if cfg.nul_encoded_terminates {
+                // Terminate the path at the encoded NUL byte.
+                return Ok((
+                    b"",
+                    UrlParseResult {
+                        byte: upr.byte,
+                        expected_status_code,
+                        flags,
+                        decode: false,
+                    },
+                ));
+            }
+        }
+        Ok((
+            input,
+            UrlParseResult {
+                byte: upr.byte,
+                expected_status_code: upr.expected_status_code,
+                flags: upr.flags,
+                decode: upr.decode,
+            },
+        ))
+    }
+}
+
+/// Consumes the next nullbyte if it is a '+', decoding it according to the cfg
+///
+/// Returns decoded byte, corresponding status code, appropriate flags and whether the byte should be output.
+fn decode_plus(cfg: &DecoderConfig) -> impl Fn(&[u8]) -> IResult<&[u8], UrlParseResult> + '_ {
+    move |input| {
+        let (input, byte) = map(char('+'), |byte| {
+            // Decoding of the plus character is conditional on the configuration.
+            if cfg.plusspace_decode {
+                0x20
+            } else {
+                byte as u8
+            }
+        })(input)?;
+        Ok((
+            input,
+            UrlParseResult {
+                byte,
+                expected_status_code: HtpUnwanted::Ignore,
+                flags: 0,
+                decode: true,
+            },
+        ))
+    }
+}
+
+/// Consumes the next byte in the input string and treats it as an unencoded byte.
+/// Handles raw null bytes according to the input cfg settings.
+///
+/// Returns decoded byte, corresponding status code, appropriate flags and whether the byte should be output.
+fn unencoded_byte(cfg: &DecoderConfig) -> impl Fn(&[u8]) -> IResult<&[u8], UrlParseResult> + '_ {
+    move |input| {
+        let (input, byte) = be_u8(input)?;
+        // One non-encoded byte.
+        // Did we get a raw NUL byte?
+        if byte == 0 {
+            return Ok((
+                if cfg.nul_raw_terminates { b"" } else { input },
+                UrlParseResult {
+                    byte,
+                    expected_status_code: cfg.nul_raw_unwanted,
+                    flags: HtpFlags::URLEN_RAW_NUL,
+                    decode: !cfg.nul_raw_terminates,
+                },
+            ));
+        }
+        Ok((
+            input,
+            UrlParseResult {
+                byte,
+                expected_status_code: HtpUnwanted::Ignore,
+                flags: 0,
+                decode: true,
+            },
+        ))
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use crate::config::Config;
+    use rstest::rstest;
+
+    #[rstest]
+    #[case("/dest", "/dest", "/dest", "/dest")]
+    #[case("/%64est", "/dest", "/dest", "/dest")]
+    #[case("/%xxest", "/1est", "/%xxest", "/xxest")]
+    #[case("/%a", "/%a", "/%a", "/a")]
+    #[case("/%00ABC", "/\0ABC", "/\0ABC", "/\0ABC")]
+    #[case("/%u0064", "/%u0064", "/%u0064", "/%u0064")]
+    #[case("/%u006", "/%u006", "/%u006", "/%u006")]
+    #[case("/%uXXXX", "/%uXXXX", "/%uXXXX", "/%uXXXX")]
+    #[case("/%u0000ABC", "/%u0000ABC", "/%u0000ABC", "/%u0000ABC")]
+    #[case("/\0ABC", "/\0ABC", "/\0ABC", "/\0ABC")]
+    #[case("/one%2ftwo", "/one/two", "/one/two", "/one/two")]
+    fn test_decode_uri(
+        #[case] input: &str, #[case] expected_process: &str, #[case] expected_preserve: &str,
+        #[case] expected_remove: &str,
+    ) {
+        let i = Bstr::from(input);
+        let mut cfg = Config::default();
+
+        cfg.set_url_encoding_invalid_handling(HtpUrlEncodingHandling::PROCESS_INVALID);
+        assert_eq!(
+            decode_uri(&i, &cfg.decoder_cfg).unwrap().1 .0,
+            expected_process.as_bytes()
+        );
+
+        cfg.set_url_encoding_invalid_handling(HtpUrlEncodingHandling::PRESERVE_PERCENT);
+        assert_eq!(
+            decode_uri(&i, &cfg.decoder_cfg).unwrap().1 .0,
+            expected_preserve.as_bytes()
+        );
+
+        cfg.set_url_encoding_invalid_handling(HtpUrlEncodingHandling::REMOVE_PERCENT);
+        assert_eq!(
+            decode_uri(&i, &cfg.decoder_cfg).unwrap().1 .0,
+            expected_remove.as_bytes()
+        );
+    }
+
+    #[rstest]
+    #[case("/dest", "/dest", "/dest", "/dest")]
+    #[case("/%64est", "/dest", "/dest", "/dest")]
+    #[case("/%xxest", "/1est", "/%xxest", "/xxest")]
+    #[case("/%a", "/%a", "/%a", "/a")]
+    #[case("/%00ABC", "/\0ABC", "/\0ABC", "/\0ABC")]
+    #[case("/%u0064", "/d", "/d", "/d")]
+    #[case("/%U0064", "/d", "/d", "/d")]
+    #[case("/%u006", "/%u006", "/%u006", "/u006")]
+    #[case("/%uXXXX", "/?", "/%uXXXX", "/uXXXX")]
+    #[case("/%u0000ABC", "/\0ABC", "/\0ABC", "/\0ABC")]
+    #[case("/\0ABC", "/\0ABC", "/\0ABC", "/\0ABC")]
+    #[case("/one%2ftwo", "/one/two", "/one/two", "/one/two")]
+    fn test_decode_uri_decode(
+        #[case] input: &str, #[case] expected_process: &str, #[case] expected_preserve: &str,
+        #[case] expected_remove: &str,
+    ) {
+        let i = Bstr::from(input);
+        let mut cfg = Config::default();
+        cfg.set_u_encoding_decode(true);
+
+        cfg.set_url_encoding_invalid_handling(HtpUrlEncodingHandling::PROCESS_INVALID);
+        assert_eq!(
+            decode_uri(&i, &cfg.decoder_cfg).unwrap().1 .0,
+            expected_process.as_bytes()
+        );
+
+        cfg.set_url_encoding_invalid_handling(HtpUrlEncodingHandling::PRESERVE_PERCENT);
+        assert_eq!(
+            decode_uri(&i, &cfg.decoder_cfg).unwrap().1 .0,
+            expected_preserve.as_bytes()
+        );
+
+        cfg.set_url_encoding_invalid_handling(HtpUrlEncodingHandling::REMOVE_PERCENT);
+        assert_eq!(
+            decode_uri(&i, &cfg.decoder_cfg).unwrap().1 .0,
+            expected_remove.as_bytes()
+        );
+    }
+
+    #[rstest]
+    #[case("/%u0000ABC")]
+    #[case("/%00ABC")]
+    #[case("/\0ABC")]
+    fn test_decode_uri_nul_terminates(#[case] input: &str) {
+        let i = Bstr::from(input);
+        let mut cfg = Config::default();
+        cfg.set_u_encoding_decode(true);
+        cfg.set_nul_encoded_terminates(true);
+        cfg.set_nul_raw_terminates(true);
+        assert_eq!(decode_uri(&i, &cfg.decoder_cfg).unwrap().1 .0, b"/");
+    }
+
+    #[rstest]
+    #[case("/dest", "/dest", "/dest", "/dest", 0)]
+    #[case("/%64est", "/dest", "/dest", "/dest", 0)]
+    #[case(
+        "/%xxest",
+        "/1est",
+        "/%xxest",
+        "/xxest",
+        HtpFlags::PATH_INVALID_ENCODING
+    )]
+    #[case("/%a", "/%a", "/%a", "/a", HtpFlags::PATH_INVALID_ENCODING)]
+    #[case("/%00ABC", "/\0ABC", "/\0ABC", "/\0ABC", HtpFlags::PATH_ENCODED_NUL)]
+    #[case("/%u0064", "/%u0064", "/%u0064", "/%u0064", 0)]
+    #[case("/%u006", "/%u006", "/%u006", "/%u006", 0)]
+    #[case("/%uXXXX", "/%uXXXX", "/%uXXXX", "/%uXXXX", 0)]
+    #[case("/%u0000ABC", "/%u0000ABC", "/%u0000ABC", "/%u0000ABC", 0)]
+    #[case("/\0ABC", "/\0ABC", "/\0ABC", "/\0ABC", 0)]
+    #[case(
+        "/one%2ftwo",
+        "/one%2ftwo",
+        "/one%2ftwo",
+        "/one%2ftwo",
+        HtpFlags::PATH_ENCODED_SEPARATOR
+    )]
+    fn test_path_decode_uri_inplace(
+        #[case] input: &str, #[case] expected_process: &str, #[case] expected_preserve: &str,
+        #[case] expected_remove: &str, #[case] flags: u64,
+    ) {
+        let mut cfg = Config::default();
+        let mut response_status_expected_number = HtpUnwanted::Ignore;
+
+        let mut input_process = Bstr::from(input);
+        let mut flags_process = 0;
+        cfg.set_url_encoding_invalid_handling(HtpUrlEncodingHandling::PROCESS_INVALID);
+        path_decode_uri_inplace(
+            &cfg.decoder_cfg,
+            &mut flags_process,
+            &mut response_status_expected_number,
+            &mut input_process,
+        );
+        assert_eq!(input_process, Bstr::from(expected_process));
+        assert_eq!(flags_process, flags);
+
+        let mut input_preserve = Bstr::from(input);
+        let mut flags_preserve = 0;
+        cfg.set_url_encoding_invalid_handling(HtpUrlEncodingHandling::PRESERVE_PERCENT);
+        path_decode_uri_inplace(
+            &cfg.decoder_cfg,
+            &mut flags_preserve,
+            &mut response_status_expected_number,
+            &mut input_preserve,
+        );
+        assert_eq!(input_preserve, Bstr::from(expected_preserve));
+        assert_eq!(flags_preserve, flags);
+
+        let mut input_remove = Bstr::from(input);
+        let mut flags_remove = 0;
+        cfg.set_url_encoding_invalid_handling(HtpUrlEncodingHandling::REMOVE_PERCENT);
+        path_decode_uri_inplace(
+            &cfg.decoder_cfg,
+            &mut flags_remove,
+            &mut response_status_expected_number,
+            &mut input_remove,
+        );
+        assert_eq!(input_remove, Bstr::from(expected_remove));
+        assert_eq!(flags_remove, flags);
+    }
+
+    #[rstest]
+    #[case("/dest", "/dest", "/dest", "/dest", 0)]
+    #[case("/%64est", "/dest", "/dest", "/dest", 0)]
+    #[case(
+        "/%xxest",
+        "/1est",
+        "/%xxest",
+        "/xxest",
+        HtpFlags::PATH_INVALID_ENCODING
+    )]
+    #[case("/%a", "/%a", "/%a", "/a", HtpFlags::PATH_INVALID_ENCODING)]
+    #[case("/%00ABC", "/\0ABC", "/\0ABC", "/\0ABC", HtpFlags::PATH_ENCODED_NUL)]
+    #[case("/%u0064", "/d", "/d", "/d", HtpFlags::PATH_OVERLONG_U)]
+    #[case("/%U0064", "/d", "/d", "/d", HtpFlags::PATH_OVERLONG_U)]
+    #[case("/%u006", "/%u006", "/%u006", "/u006", HtpFlags::PATH_INVALID_ENCODING)]
+    #[case("/%uXXXX", "/?", "/%uXXXX", "/uXXXX", HtpFlags::PATH_INVALID_ENCODING)]
+    #[case("/%u0000ABC", "/\0ABC", "/\0ABC", "/\0ABC", HtpFlags::PATH_ENCODED_NUL | HtpFlags::PATH_OVERLONG_U)]
+    #[case("/\0ABC", "/\0ABC", "/\0ABC", "/\0ABC", 0)]
+    #[case(
+        "/one%2ftwo",
+        "/one%2ftwo",
+        "/one%2ftwo",
+        "/one%2ftwo",
+        HtpFlags::PATH_ENCODED_SEPARATOR
+    )]
+    fn test_path_decode_uri_inplace_decode(
+        #[case] input: &str, #[case] expected_process: &str, #[case] expected_preserve: &str,
+        #[case] expected_remove: &str, #[case] flags: u64,
+    ) {
+        let mut cfg = Config::default();
+        cfg.set_u_encoding_decode(true);
+        let mut response_status_expected_number = HtpUnwanted::Ignore;
+
+        let mut input_process = Bstr::from(input);
+        cfg.set_url_encoding_invalid_handling(HtpUrlEncodingHandling::PROCESS_INVALID);
+        let mut flags_process = 0;
+        path_decode_uri_inplace(
+            &cfg.decoder_cfg,
+            &mut flags_process,
+            &mut response_status_expected_number,
+            &mut input_process,
+        );
+        assert_eq!(input_process, Bstr::from(expected_process));
+        assert_eq!(flags_process, flags);
+
+        let mut input_preserve = Bstr::from(input);
+        cfg.set_url_encoding_invalid_handling(HtpUrlEncodingHandling::PRESERVE_PERCENT);
+        let mut flags_preserve = 0;
+        path_decode_uri_inplace(
+            &cfg.decoder_cfg,
+            &mut flags_preserve,
+            &mut response_status_expected_number,
+            &mut input_preserve,
+        );
+        assert_eq!(input_preserve, Bstr::from(expected_preserve));
+        assert_eq!(flags_preserve, flags);
+
+        let mut input_remove = Bstr::from(input);
+        cfg.set_url_encoding_invalid_handling(HtpUrlEncodingHandling::REMOVE_PERCENT);
+        let mut flags_remove = 0;
+        path_decode_uri_inplace(
+            &cfg.decoder_cfg,
+            &mut flags_remove,
+            &mut response_status_expected_number,
+            &mut input_remove,
+        );
+        assert_eq!(input_remove, Bstr::from(expected_remove));
+        assert_eq!(flags_remove, flags);
+    }
+
+    #[rstest]
+    #[case("/%u0000ABC", HtpFlags::PATH_ENCODED_NUL | HtpFlags::PATH_OVERLONG_U)]
+    #[case("/%00ABC", HtpFlags::PATH_ENCODED_NUL)]
+    #[case("/\0ABC", 0)]
+    fn test_path_decode_inplace_nul_terminates(#[case] input: &str, #[case] expected_flags: u64) {
+        let mut cfg = Config::default();
+        cfg.set_u_encoding_decode(true);
+        cfg.set_nul_encoded_terminates(true);
+        cfg.set_nul_raw_terminates(true);
+        let mut i = Bstr::from(input);
+        let mut flags = 0;
+        let mut response_status_expected_number = HtpUnwanted::Ignore;
+        path_decode_uri_inplace(
+            &cfg.decoder_cfg,
+            &mut flags,
+            &mut response_status_expected_number,
+            &mut i,
+        );
+        assert_eq!(i, Bstr::from("/"));
+        assert_eq!(flags, expected_flags);
+    }
+
+    #[rstest]
+    #[case::encoded("/one%2ftwo")]
+    #[case::convert("/one\\two")]
+    #[case::compress("/one//two")]
+    fn test_path_decode_inplace_seps(#[case] input: &str) {
+        let mut cfg = Config::default();
+        cfg.set_backslash_convert_slashes(true);
+        cfg.set_path_separators_decode(true);
+        cfg.set_path_separators_compress(true);
+        let mut i = Bstr::from(input);
+        let mut flags = 0;
+        let mut response_status_expected_number = HtpUnwanted::Ignore;
+        path_decode_uri_inplace(
+            &cfg.decoder_cfg,
+            &mut flags,
+            &mut response_status_expected_number,
+            &mut i,
+        );
+        assert_eq!(i, Bstr::from("/one/two"));
+    }
+
+    #[rstest]
+    #[case(
+        "/one/tw%u006f/three/%u123",
+        "/one/two/three/%u123",
+        "/one/two/three/%u123",
+        "/one/two/three/u123"
+    )]
+    #[case(
+        "/one/tw%u006f/three/%3",
+        "/one/two/three/%3",
+        "/one/two/three/%3",
+        "/one/two/three/3"
+    )]
+    #[case(
+        "/one/tw%u006f/three/%uXXXX",
+        "/one/two/three/?",
+        "/one/two/three/%uXXXX",
+        "/one/two/three/uXXXX"
+    )]
+    fn test_decode_uri_inplace(
+        #[case] input: &str, #[case] expected_process: &str, #[case] expected_preserve: &str,
+        #[case] expected_remove: &str,
+    ) {
+        let mut cfg = Config::default();
+        cfg.set_u_encoding_decode(true);
+
+        cfg.set_url_encoding_invalid_handling(HtpUrlEncodingHandling::PROCESS_INVALID);
+        let mut input_process = Bstr::from(input);
+        decode_uri_inplace(&cfg.decoder_cfg, &mut input_process).unwrap();
+        assert_eq!(input_process, Bstr::from(expected_process));
+
+        cfg.set_url_encoding_invalid_handling(HtpUrlEncodingHandling::PRESERVE_PERCENT);
+        let mut input_preserve = Bstr::from(input);
+        decode_uri_inplace(&cfg.decoder_cfg, &mut input_preserve).unwrap();
+        assert_eq!(input_preserve, Bstr::from(expected_preserve));
+
+        cfg.set_url_encoding_invalid_handling(HtpUrlEncodingHandling::REMOVE_PERCENT);
+        let mut input_remove = Bstr::from(input);
+        decode_uri_inplace(&cfg.decoder_cfg, &mut input_remove).unwrap();
+        assert_eq!(input_remove, Bstr::from(expected_remove));
+    }
+}
diff --git a/rust/htp/src/utf8_decoder.rs b/rust/htp/src/utf8_decoder.rs
new file mode 100644 (file)
index 0000000..eeaf5a6
--- /dev/null
@@ -0,0 +1,251 @@
+// Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this software
+// and associated documentation files (the "Software"), to deal in the Software without restriction,
+// including without limitation the rights to use, copy, modify, merge, publish, distribute,
+// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all copies or
+// substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
+// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+// Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
+// See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
+use crate::{
+    bstr::Bstr,
+    config::{DecoderConfig, HtpUnwanted},
+    unicode_bestfit_map::UnicodeBestfitMap,
+    util::{FlagOperations, HtpFlags},
+};
+
+static utf8d: [u8; 400] = [
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+    8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    0xa, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x4, 0x3, 0x3, 0xb, 0x6, 0x6,
+    0x6, 0x5, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0, 0x1, 0x2, 0x3, 0x5, 0x8,
+    0x7, 0x1, 0x1, 0x1, 0x4, 0x6, 0x1, 0x1, 0x1, 0x1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 3, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 3, 1, 3, 1, 1, 1, 1, 1,
+    1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+];
+static utf8d_allow_overlong: [u8; 400] = [
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x4, 0x3, 0x3, 0x6, 0x6, 0x6,
+    0x6, 0x5, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0, 0x1, 0x2, 0x3, 0x5, 0x8,
+    0x7, 0x1, 0x1, 0x1, 0x4, 0x6, 0x1, 0x1, 0x1, 0x1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 3, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 3, 1, 3, 1, 1, 1, 1, 1,
+    1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+];
+
+#[derive(Clone)]
+pub(crate) struct Utf8Decoder {
+    bestfit_map: UnicodeBestfitMap,
+    state: u32,
+    seq: u32,
+    codepoint: u32,
+    pub(crate) flags: u64,
+    pub(crate) seen_valid: bool,
+    pub(crate) decoded_bytes: Vec<u8>,
+}
+
+impl Utf8Decoder {
+    /// Make a new owned Utf8Decoder
+    pub(crate) fn new(bestfit_map: UnicodeBestfitMap) -> Self {
+        Self {
+            bestfit_map,
+            state: 0,
+            seq: 0,
+            codepoint: 0,
+            flags: 0,
+            seen_valid: false,
+            decoded_bytes: Vec::new(),
+        }
+    }
+
+    /// Decode utf8 byte using best-fit map.
+    fn decode_byte(&mut self, encoded_byte: u8, is_last_byte: bool) {
+        self.seq = self.seq.wrapping_add(1);
+        self.decode_byte_allow_overlong(encoded_byte as u32);
+        match self.state {
+            0 => {
+                if self.seq == 1 {
+                    // ASCII character, which we just copy.
+                    self.decoded_bytes.push(self.codepoint as u8);
+                } else {
+                    // A valid UTF-8 character, which we need to convert.
+                    self.seen_valid = true;
+                    // Check for overlong characters and set the flag accordingly.
+                    if (self.seq == 2 && self.codepoint < 0x80)
+                        || (self.seq == 3 && self.codepoint < 0x800)
+                        || (self.seq == 4 && self.codepoint < 0x10000)
+                    {
+                        self.flags.set(HtpFlags::PATH_UTF8_OVERLONG);
+                    }
+                    // Special flag for half-width/full-width evasion.
+                    if self.codepoint >= 0xff00 && self.codepoint <= 0xffef {
+                        self.flags.set(HtpFlags::PATH_HALF_FULL_RANGE)
+                    }
+                    // Use best-fit mapping to convert to a single byte.
+                    self.decoded_bytes.push(self.bestfit_codepoint());
+                }
+                self.seq = 0;
+            }
+            1 => {
+                // Invalid UTF-8 character.
+                self.flags.set(HtpFlags::PATH_UTF8_INVALID);
+                // Output the replacement byte, replacing one or more invalid bytes.
+                // If the invalid byte was first in a sequence, consume it. Otherwise,
+                // assume it's the starting byte of the next character.
+                self.state = 0;
+                self.codepoint = 0;
+                self.decoded_bytes.push(self.bestfit_map.replacement_byte);
+                if self.seq != 1 {
+                    self.seq = 0;
+                    self.decode_byte(encoded_byte, is_last_byte);
+                } else {
+                    self.seq = 0;
+                }
+            }
+            _ => {
+                // The character is not yet formed.
+                if is_last_byte {
+                    // If the last input chunk ended with an incomplete byte sequence for a code point,
+                    // this is an error and a replacement character is emitted hence starting from 1 not 0
+                    for _ in 1..self.seq {
+                        self.decoded_bytes.push(self.bestfit_map.replacement_byte);
+                    }
+                }
+            }
+        }
+    }
+
+    /// Decode a UTF-8 encoded path. Replaces a possibly-invalid utf8 byte stream
+    /// with an ascii stream, storing the result in self.decoded_bytes. Overlong
+    /// characters will be decoded and invalid characters will be replaced with
+    /// the replacement byte specified in the bestfit_map. Best-fit mapping will be used
+    /// to convert UTF-8 into a single-byte stream.
+    fn decode_and_validate(&mut self, input: &[u8]) {
+        //Reset all internals
+        self.state = 0;
+        self.seq = 0;
+        self.codepoint = 0;
+        self.flags = 0;
+        self.decoded_bytes.clear();
+        self.decoded_bytes.reserve(input.len());
+        self.seen_valid = false;
+        for (byte, is_last) in input
+            .iter()
+            .enumerate()
+            .map(|(i, b)| (b, i + 1 == input.len()))
+        {
+            self.decode_byte(*byte, is_last);
+        }
+        // Did the input stream seem like a valid UTF-8 string?
+        if self.seen_valid && !self.flags.is_set(HtpFlags::PATH_UTF8_INVALID) {
+            self.flags.set(HtpFlags::PATH_UTF8_VALID)
+        }
+    }
+
+    /// Process one byte of UTF-8 data and set the code point if one is available. Allows
+    /// overlong characters in input.
+    ///
+    /// Sets the state to ACCEPT(0) for a valid character, REJECT(1) for an invalid character,
+    ///         or OTHER(u32) if the character has not yet been formed
+    fn decode_byte_allow_overlong(&mut self, byte: u32) {
+        let type_0: u32 = utf8d_allow_overlong[byte as usize] as u32;
+        self.codepoint = if self.state != 0 {
+            (byte & 0x3f) | (self.codepoint << 6)
+        } else {
+            (0xff >> type_0) & byte
+        };
+        self.state = utf8d[(256u32)
+            .wrapping_add((self.state).wrapping_mul(16))
+            .wrapping_add(type_0) as usize] as u32;
+    }
+
+    /// Convert a Unicode codepoint into a single-byte, using best-fit
+    /// mapping (as specified in the provided configuration structure).
+    ///
+    /// Returns converted single byte
+    fn bestfit_codepoint(&self) -> u8 {
+        // Is it a single-byte codepoint?
+        if self.codepoint < 0x100 {
+            return self.codepoint as u8;
+        }
+        self.bestfit_map.get(self.codepoint)
+    }
+}
+
+/// Decode a UTF-8 encoded path. Replaces a possibly-invalid utf8 byte stream with
+/// an ascii stream. Overlong characters will be decoded and invalid characters will
+/// be replaced with the replacement byte specified in the cfg. Best-fit mapping will
+/// be used to convert UTF-8 into a single-byte stream. The resulting decoded path will
+/// be stored in the input path if the transaction cfg indicates it
+pub(crate) fn decode_and_validate_inplace(
+    cfg: &DecoderConfig, flags: &mut u64, status: &mut HtpUnwanted, path: &mut Bstr,
+) {
+    let mut decoder = Utf8Decoder::new(cfg.bestfit_map);
+    decoder.decode_and_validate(path.as_slice());
+    if cfg.utf8_convert_bestfit {
+        path.clear();
+        path.add(decoder.decoded_bytes.as_slice());
+    }
+    flags.set(decoder.flags);
+
+    if flags.is_set(HtpFlags::PATH_UTF8_INVALID) && cfg.utf8_invalid_unwanted != HtpUnwanted::Ignore
+    {
+        *status = cfg.utf8_invalid_unwanted;
+    }
+}
+#[cfg(test)]
+mod tests {
+    use crate::{
+        bstr::Bstr, config::Config, config::HtpUnwanted, utf8_decoder::decode_and_validate_inplace,
+    };
+    use rstest::rstest;
+
+    #[rstest]
+    #[case(b"\xf1.\xf1\xef\xbd\x9dabcd", "?.?}abcd")]
+    //1111 0000 1001 0000 1000 1101 1111 1111
+    #[case::invalid_incomplete_seq(b"\xf0\x90\x8d\xff", "??")]
+    //1110 0010 1000 0010
+    #[case::invalid_incomplete_seq(b"\xe2\x82", "?")]
+    //1100 0010 1111 1111 1111 0000
+    #[case::invalid_incomplete_seq(b"\xc2\xff\xf0", "??")]
+    //1111 0000 1001 0000 0010 1000 1011 1100
+    #[case::invalid_incomplete_seq(b"\xf0\x90\x28\xbc", "?(?")]
+    fn test_decode_and_validate_inplace(#[case] input: &[u8], #[case] expected: &str) {
+        let mut cfg = Config::default();
+        cfg.set_utf8_convert_bestfit(true);
+        let mut i = Bstr::from(input);
+        let mut flags = 0;
+        let mut response_status_expected_number = HtpUnwanted::Ignore;
+        decode_and_validate_inplace(
+            &cfg.decoder_cfg,
+            &mut flags,
+            &mut response_status_expected_number,
+            &mut i,
+        );
+        assert_eq!(i, Bstr::from(expected));
+    }
+}
diff --git a/rust/htp/src/util.rs b/rust/htp/src/util.rs
new file mode 100644 (file)
index 0000000..ace65f6
--- /dev/null
@@ -0,0 +1,806 @@
+//! Utility functions for http parsing.
+
+use crate::{config::HtpServerPersonality, error::NomError};
+use nom::{
+    branch::alt,
+    bytes::complete::{
+        is_not, tag, tag_no_case, take_till, take_until, take_while, take_while1, take_while_m_n,
+    },
+    bytes::streaming::{tag as streaming_tag, take_till as streaming_take_till},
+    character::complete::{char, digit1},
+    character::is_space as nom_is_space,
+    combinator::{map, opt},
+    sequence::tuple,
+    Err::Incomplete,
+    IResult, Needed,
+};
+
+use std::str::FromStr;
+
+/// String for the libhtp version.
+pub const HTP_VERSION_STRING_FULL: &'_ str = concat!("LibHTP v", env!("CARGO_PKG_VERSION"), "\x00");
+
+/// Trait to allow interacting with flags.
+pub(crate) trait FlagOperations<T> {
+    /// Inserts the specified flags in-place.
+    fn set(&mut self, other: T);
+    /// Removes the specified flags in-place.
+    fn unset(&mut self, other: T);
+    /// Determine if the specified flags are set
+    fn is_set(&self, other: T) -> bool;
+}
+
+impl FlagOperations<u8> for u8 {
+    /// Inserts the specified flags in-place.
+    fn set(&mut self, other: u8) {
+        *self |= other;
+    }
+    /// Removes the specified flags in-place.
+    fn unset(&mut self, other: u8) {
+        *self &= !other;
+    }
+    /// Determine if the specified flags are set
+    fn is_set(&self, other: u8) -> bool {
+        self & other != 0
+    }
+}
+
+impl FlagOperations<u64> for u64 {
+    /// Inserts the specified flags in-place.
+    fn set(&mut self, other: u64) {
+        *self |= other;
+    }
+    /// Removes the specified flags in-place.
+    fn unset(&mut self, other: u64) {
+        *self &= !other;
+    }
+    /// Determine if the specified flags are set
+    fn is_set(&self, other: u64) -> bool {
+        self & other != 0
+    }
+}
+
+/// Various flag bits. Even though we have a flag field in several places
+/// (header, transaction, connection), these fields are all in the same namespace
+/// because we may want to set the same flag in several locations. For example, we
+/// may set HTP_FIELD_FOLDED on the actual folded header, but also on the transaction
+/// that contains the header. Both uses are useful.
+#[repr(C)]
+pub struct HtpFlags;
+
+impl HtpFlags {
+    /// Field cannot be parsed.
+    pub const FIELD_UNPARSEABLE: u64 = 0x0000_0000_0004;
+    /// Field is invalid.
+    pub const FIELD_INVALID: u64 = 0x0000_0000_0008;
+    /// Field is folded.
+    pub const FIELD_FOLDED: u64 = 0x0000_0000_0010;
+    /// Field has been seen more than once.
+    pub const FIELD_REPEATED: u64 = 0x0000_0000_0020;
+    // Field is too long.
+    //pub const FIELD_LONG: u64 = 0x0000_0000_0040;
+    // Field contains raw null byte.
+    //pub const FIELD_RAW_NUL: u64 = 0x0000_0000_0080;
+    /// Detect HTTP request smuggling.
+    pub const REQUEST_SMUGGLING: u64 = 0x0000_0000_0100;
+    /// Invalid header folding.
+    pub const INVALID_FOLDING: u64 = 0x0000_0000_0200;
+    /// Invalid request transfer-encoding.
+    pub const REQUEST_INVALID_T_E: u64 = 0x0000_0000_0400;
+    /// Multiple chunks.
+    pub const MULTI_PACKET_HEAD: u64 = 0x0000_0000_0800;
+    /// No host information in header.
+    pub const HOST_MISSING: u64 = 0x0000_0000_1000;
+    /// Inconsistent host or port information.
+    pub const HOST_AMBIGUOUS: u64 = 0x0000_0000_2000;
+    /// Encoded path contains null.
+    pub const PATH_ENCODED_NUL: u64 = 0x0000_0000_4000;
+    /// Url encoded contains raw null.
+    pub const PATH_RAW_NUL: u64 = 0x0000_0000_8000;
+    /// Url encoding is invalid.
+    pub const PATH_INVALID_ENCODING: u64 = 0x0000_0001_0000;
+    // Path is invalid.
+    //pub const PATH_INVALID: u64 = 0x0000_0002_0000;
+    /// Overlong usage in path.
+    pub const PATH_OVERLONG_U: u64 = 0x0000_0004_0000;
+    /// Encoded path separators present.
+    pub const PATH_ENCODED_SEPARATOR: u64 = 0x0000_0008_0000;
+    /// At least one valid UTF-8 character and no invalid ones.
+    pub const PATH_UTF8_VALID: u64 = 0x0000_0010_0000;
+    /// Invalid utf8 in path.
+    pub const PATH_UTF8_INVALID: u64 = 0x0000_0020_0000;
+    /// Invalid utf8 overlong character.
+    pub const PATH_UTF8_OVERLONG: u64 = 0x0000_0040_0000;
+    /// Range U+FF00 - U+FFEF detected.
+    pub const PATH_HALF_FULL_RANGE: u64 = 0x0000_0080_0000;
+    /// Status line is invalid.
+    pub const STATUS_LINE_INVALID: u64 = 0x0000_0100_0000;
+    /// Host in the URI.
+    pub const HOSTU_INVALID: u64 = 0x0000_0200_0000;
+    /// Host in the Host header.
+    pub const HOSTH_INVALID: u64 = 0x0000_0400_0000;
+    /// Contains null.
+    pub const URLEN_ENCODED_NUL: u64 = 0x0000_0800_0000;
+    /// Invalid encoding.
+    pub const URLEN_INVALID_ENCODING: u64 = 0x0000_1000_0000;
+    /// Overlong usage.
+    pub const URLEN_OVERLONG_U: u64 = 0x0000_2000_0000;
+    /// Range U+FF00 - U+FFEF detected.
+    pub const URLEN_HALF_FULL_RANGE: u64 = 0x0000_4000_0000;
+    /// Raw null byte.
+    pub const URLEN_RAW_NUL: u64 = 0x0000_8000_0000;
+    /// Request invalid.
+    pub const REQUEST_INVALID: u64 = 0x0001_0000_0000;
+    /// Request content-length invalid.
+    pub const REQUEST_INVALID_C_L: u64 = 0x0002_0000_0000;
+    /// Authorization is invalid.
+    pub const AUTH_INVALID: u64 = 0x0004_0000_0000;
+    /// Missing bytes in request and/or response data.
+    pub const MISSING_BYTES: u64 = 0x0008_0000_0000;
+    /// Missing bytes in request data.
+    pub const REQUEST_MISSING_BYTES: u64 = (0x0010_0000_0000 | Self::MISSING_BYTES);
+    /// Missing bytes in the response data.
+    pub const RESPONSE_MISSING_BYTES: u64 = (0x0020_0000_0000 | Self::MISSING_BYTES);
+    /// Too many headers, log only once.
+    pub const HEADERS_TOO_MANY: u64 = 0x0040_0000_0000;
+}
+
+#[allow(clippy::upper_case_acronyms)]
+/// Enumerates possible EOLs
+#[derive(PartialEq, Eq, Copy, Clone, Debug)]
+pub(crate) enum Eol {
+    /// '\n'
+    LF,
+    /// '\r'
+    CR,
+    /// "\r\n"
+    CRLF,
+}
+
+/// Determines if character in a seperator.
+/// separators = "(" | ")" | "<" | ">" | "@"
+/// | "," | ";" | ":" | "\" | <">
+/// | "/" | "[" | "]" | "?" | "="
+/// | "{" | "}" | SP | HT
+fn is_separator(c: u8) -> bool {
+    matches!(
+        c as char,
+        '(' | ')'
+            | '<'
+            | '>'
+            | '@'
+            | ','
+            | ';'
+            | ':'
+            | '\\'
+            | '"'
+            | '/'
+            | '['
+            | ']'
+            | '?'
+            | '='
+            | '{'
+            | '}'
+            | ' '
+            | '\t'
+    )
+}
+
+/// Determines if character is a token.
+/// token = 1*<any CHAR except CTLs or separators>
+/// CHAR  = <any US-ASCII character (octets 0 - 127)>
+pub(crate) fn is_token(c: u8) -> bool {
+    (32..=126).contains(&c) && !is_separator(c)
+}
+
+/// This parser takes leading whitespace as defined by is_ascii_whitespace.
+pub(crate) fn take_ascii_whitespace() -> impl Fn(&[u8]) -> IResult<&[u8], &[u8]> {
+    move |input| take_while(|c: u8| c.is_ascii_whitespace())(input)
+}
+
+/// Remove all line terminators (LF, CR or CRLF) from
+/// the end of the line provided as input.
+pub(crate) fn chomp(mut data: &[u8]) -> &[u8] {
+    loop {
+        let last_char = data.last();
+        if last_char == Some(&(b'\n')) || last_char == Some(&(b'\r')) {
+            data = &data[..data.len() - 1];
+        } else {
+            break;
+        }
+    }
+    data
+}
+
+/// Trim the leading whitespace
+fn trim_start(input: &[u8]) -> &[u8] {
+    let mut result = input;
+    while let Some(x) = result.first() {
+        if is_space(*x) {
+            result = &result[1..]
+        } else {
+            break;
+        }
+    }
+    result
+}
+
+/// Trim the trailing whitespace
+fn trim_end(input: &[u8]) -> &[u8] {
+    let mut result = input;
+    while let Some(x) = result.last() {
+        if is_space(*x) {
+            result = &result[..(result.len() - 1)]
+        } else {
+            break;
+        }
+    }
+    result
+}
+
+/// Trim the leading and trailing whitespace from this byteslice.
+pub(crate) fn trimmed(input: &[u8]) -> &[u8] {
+    trim_end(trim_start(input))
+}
+
+/// Splits the given input into two halves using the given predicate.
+/// The `reverse` parameter determines whether or not to split on the
+/// first match or the second match.
+/// The `do_trim` parameter will return results with leading and trailing
+/// whitespace trimmed.
+/// If the predicate does not match, then the entire input is returned
+/// in the first predicate element and an empty binary string is returned
+/// in the second element.
+pub(crate) fn split_on_predicate<F>(
+    input: &[u8], reverse: bool, do_trim: bool, predicate: F,
+) -> (&[u8], &[u8])
+where
+    F: FnMut(&u8) -> bool,
+{
+    let (first, second) = if reverse {
+        let mut iter = input.rsplitn(2, predicate);
+        let mut second = iter.next();
+        let mut first = iter.next();
+        // If we do not get two results, then put the only result first
+        if first.is_none() {
+            first = second;
+            second = None;
+        }
+        (first.unwrap_or(b""), second.unwrap_or(b""))
+    } else {
+        let mut iter = input.splitn(2, predicate);
+        let first = iter.next();
+        let second = iter.next();
+        (first.unwrap_or(b""), second.unwrap_or(b""))
+    };
+
+    if do_trim {
+        (trimmed(first), trimmed(second))
+    } else {
+        (first, second)
+    }
+}
+
+/// Determines if character is a whitespace character.
+/// whitespace = ' ' | '\t' | '\r' | '\n' | '\x0b' | '\x0c'
+pub(crate) fn is_space(c: u8) -> bool {
+    matches!(c as char, ' ' | '\t' | '\r' | '\n' | '\x0b' | '\x0c')
+}
+
+/// Is the given line empty?
+///
+/// Returns true or false
+fn is_line_empty(data: &[u8]) -> bool {
+    matches!(data, b"\x0d" | b"\x0a" | b"\x0d\x0a")
+}
+
+/// Determine if entire line is whitespace as defined by
+/// util::is_space.
+fn is_line_whitespace(data: &[u8]) -> bool {
+    !data.iter().any(|c| !is_space(*c))
+}
+
+/// Searches for and extracts the next set of ascii digits from the input slice if present
+/// Parses over leading and trailing LWS characters.
+///
+/// Returns (any trailing non-LWS characters, (non-LWS leading characters, ascii digits))
+pub(crate) fn ascii_digits(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
+    map(
+        tuple((
+            nom_take_is_space,
+            take_till(|c: u8| c.is_ascii_digit()),
+            digit1,
+            nom_take_is_space,
+        )),
+        |(_, leading_data, digits, _)| (leading_data, digits),
+    )(input)
+}
+
+/// Searches for and extracts the next set of hex digits from the input slice if present
+/// Parses over leading and trailing LWS characters.
+///
+/// Returns a tuple of any trailing non-LWS characters and the found hex digits
+pub(crate) fn hex_digits() -> impl Fn(&[u8]) -> IResult<&[u8], &[u8]> {
+    move |input| {
+        map(
+            tuple((
+                nom_take_is_space,
+                take_while(|c: u8| c.is_ascii_hexdigit()),
+                nom_take_is_space,
+            )),
+            |(_, digits, _)| digits,
+        )(input)
+    }
+}
+
+/// Determines if the given line is a request terminator.
+fn is_line_terminator(
+    server_personality: HtpServerPersonality, data: &[u8], next_no_lf: bool,
+) -> bool {
+    // Is this the end of request headers?
+    if server_personality == HtpServerPersonality::IIS_5_0 {
+        // IIS 5 will accept a whitespace line as a terminator
+        if is_line_whitespace(data) {
+            return true;
+        }
+    }
+
+    // Treat an empty line as terminator
+    if is_line_empty(data) {
+        return true;
+    }
+    if data.len() == 2 && nom_is_space(data[0]) && data[1] == b'\n' {
+        return next_no_lf;
+    }
+    false
+}
+
+/// Determines if the given line can be ignored when it appears before a request.
+pub(crate) fn is_line_ignorable(server_personality: HtpServerPersonality, data: &[u8]) -> bool {
+    is_line_terminator(server_personality, data, false)
+}
+
+/// Attempts to convert the provided port slice to a u16
+///
+/// Returns port number if a valid one is found. None if fails to convert or the result is 0
+pub(crate) fn convert_port(port: &[u8]) -> Option<u16> {
+    if port.is_empty() {
+        return None;
+    }
+    let port_number = std::str::from_utf8(port).ok()?.parse::<u16>().ok()?;
+    if port_number == 0 {
+        None
+    } else {
+        Some(port_number)
+    }
+}
+
+/// Determine if the information provided on the response line
+/// is good enough. Browsers are lax when it comes to response
+/// line parsing. In most cases they will only look for the
+/// words "http" at the beginning.
+///
+/// Returns true for good enough (treat as response body) or false for not good enough
+pub(crate) fn treat_response_line_as_body(data: &[u8]) -> bool {
+    // Browser behavior:
+    //      Firefox 3.5.x: (?i)^\s*http
+    //      IE: (?i)^\s*http\s*/
+    //      Safari: ^HTTP/\d+\.\d+\s+\d{3}
+
+    tuple((opt(take_is_space_or_null), tag_no_case("http")))(data).is_err()
+}
+
+/// Implements relaxed (not strictly RFC) hostname validation.
+///
+/// Returns true if the supplied hostname is valid; false if it is not.
+pub(crate) fn validate_hostname(input: &[u8]) -> bool {
+    if input.is_empty() || input.len() > 255 {
+        return false;
+    }
+
+    // Check IPv6
+    if let Ok((_rest, (_left_br, addr, _right_br))) = tuple((
+        char::<_, NomError<&[u8]>>('['),
+        is_not::<_, _, NomError<&[u8]>>("#?/]"),
+        char::<_, NomError<&[u8]>>(']'),
+    ))(input)
+    {
+        if let Ok(str) = std::str::from_utf8(addr) {
+            return std::net::Ipv6Addr::from_str(str).is_ok();
+        }
+    }
+
+    if tag::<_, _, NomError<&[u8]>>(".")(input).is_ok()
+        || take_until::<_, _, NomError<&[u8]>>("..")(input).is_ok()
+    {
+        return false;
+    }
+    for section in input.split(|&c| c == b'.') {
+        if section.len() > 63 {
+            return false;
+        }
+        // According to the RFC, an underscore it not allowed in the label, but
+        // we allow it here because we think it's often seen in practice.
+        if take_while_m_n::<_, _, NomError<&[u8]>>(section.len(), section.len(), |c| {
+            c == b'_' || c == b'-' || (c as char).is_alphanumeric()
+        })(section)
+        .is_err()
+        {
+            return false;
+        }
+    }
+    true
+}
+
+/// Returns the LibHTP version string.
+pub(crate) fn get_version() -> &'static str {
+    HTP_VERSION_STRING_FULL
+}
+
+/// Take leading whitespace as defined by nom_is_space.
+pub(crate) fn nom_take_is_space(data: &[u8]) -> IResult<&[u8], &[u8]> {
+    take_while(nom_is_space)(data)
+}
+
+/// Take data before the first null character if it exists.
+pub(crate) fn take_until_null(data: &[u8]) -> IResult<&[u8], &[u8]> {
+    take_while(|c| c != b'\0')(data)
+}
+
+/// Take leading space as defined by util::is_space.
+pub(crate) fn take_is_space(data: &[u8]) -> IResult<&[u8], &[u8]> {
+    take_while(is_space)(data)
+}
+
+/// Take leading null characters or spaces as defined by util::is_space
+pub(crate) fn take_is_space_or_null(data: &[u8]) -> IResult<&[u8], &[u8]> {
+    take_while(|c| is_space(c) || c == b'\0')(data)
+}
+
+/// Take any non-space character as defined by is_space.
+pub(crate) fn take_not_is_space(data: &[u8]) -> IResult<&[u8], &[u8]> {
+    take_while(|c: u8| !is_space(c))(data)
+}
+
+/// Returns all data up to and including the first new line or null
+/// Returns Err if not found
+pub(crate) fn take_till_lf_null(data: &[u8]) -> IResult<&[u8], &[u8]> {
+    let (_, line) = streaming_take_till(|c| c == b'\n' || c == 0)(data)?;
+    Ok((&data[line.len() + 1..], &data[0..line.len() + 1]))
+}
+
+/// Returns all data up to and including the first new line
+/// Returns Err if not found
+pub(crate) fn take_till_lf(data: &[u8]) -> IResult<&[u8], &[u8]> {
+    let (_, line) = streaming_take_till(|c| c == b'\n')(data)?;
+    Ok((&data[line.len() + 1..], &data[0..line.len() + 1]))
+}
+
+/// Returns all data up to and including the first EOL and which EOL was seen
+///
+/// Returns Err if not found
+pub(crate) fn take_till_eol(data: &[u8]) -> IResult<&[u8], (&[u8], Eol)> {
+    let (_, (line, eol)) = tuple((
+        streaming_take_till(|c| c == b'\n' || c == b'\r'),
+        alt((
+            streaming_tag("\r\n"),
+            streaming_tag("\r"),
+            streaming_tag("\n"),
+        )),
+    ))(data)?;
+    match eol {
+        b"\n" => Ok((&data[line.len() + 1..], (&data[0..line.len() + 1], Eol::LF))),
+        b"\r" => Ok((&data[line.len() + 1..], (&data[0..line.len() + 1], Eol::CR))),
+        b"\r\n" => Ok((
+            &data[line.len() + 2..],
+            (&data[0..line.len() + 2], Eol::CRLF),
+        )),
+        _ => Err(Incomplete(Needed::new(1))),
+    }
+}
+
+/// Skip control characters
+pub(crate) fn take_chunked_ctl_chars(data: &[u8]) -> IResult<&[u8], &[u8]> {
+    take_while(is_chunked_ctl_char)(data)
+}
+
+/// Check if the data contains valid chunked length chars, i.e. leading chunked ctl chars and ascii hexdigits
+///
+/// Returns true if valid, false otherwise
+pub(crate) fn is_valid_chunked_length_data(data: &[u8]) -> bool {
+    tuple((
+        take_chunked_ctl_chars,
+        take_while1(|c: u8| !c.is_ascii_hexdigit()),
+    ))(data)
+    .is_err()
+}
+
+fn is_chunked_ctl_char(c: u8) -> bool {
+    matches!(c, 0x0d | 0x0a | 0x20 | 0x09 | 0x0b | 0x0c)
+}
+
+/// Check if the entire input line is chunked control characters
+pub(crate) fn is_chunked_ctl_line(l: &[u8]) -> bool {
+    for c in l {
+        if !is_chunked_ctl_char(*c) {
+            return false;
+        }
+    }
+    true
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::util::*;
+    use rstest::rstest;
+
+    #[rstest]
+    #[case("", "", "")]
+    #[case("hello world", "", "hello world")]
+    #[case("\0", "\0", "")]
+    #[case("hello_world  \0   ", "\0   ", "hello_world  ")]
+    #[case("hello\0\0\0\0", "\0\0\0\0", "hello")]
+    fn test_take_until_null(#[case] input: &str, #[case] remaining: &str, #[case] parsed: &str) {
+        assert_eq!(
+            take_until_null(input.as_bytes()).unwrap(),
+            (remaining.as_bytes(), parsed.as_bytes())
+        );
+    }
+
+    #[rstest]
+    #[case("", "", "")]
+    #[case("   hell o", "hell o", "   ")]
+    #[case("   \thell o", "hell o", "   \t")]
+    #[case("hell o", "hell o", "")]
+    #[case("\r\x0b  \thell \to", "hell \to", "\r\x0b  \t")]
+    fn test_take_is_space(#[case] input: &str, #[case] remaining: &str, #[case] parsed: &str) {
+        assert_eq!(
+            take_is_space(input.as_bytes()).unwrap(),
+            (remaining.as_bytes(), parsed.as_bytes())
+        );
+    }
+
+    #[rstest]
+    #[case("   http 1.1", false)]
+    #[case("\0 http 1.1", false)]
+    #[case("http", false)]
+    #[case("HTTP", false)]
+    #[case("    HTTP", false)]
+    #[case("test", true)]
+    #[case("     test", true)]
+    #[case("", true)]
+    #[case("kfgjl  hTtp ", true)]
+    fn test_treat_response_line_as_body(#[case] input: &str, #[case] expected: bool) {
+        assert_eq!(treat_response_line_as_body(input.as_bytes()), expected);
+    }
+
+    #[rstest]
+    #[should_panic(expected = "called `Result::unwrap()` on an `Err` value: Incomplete(Size(1))")]
+    #[case("", "", "")]
+    #[should_panic(expected = "called `Result::unwrap()` on an `Err` value: Incomplete(Size(1))")]
+    #[case("header:value\r\r", "", "")]
+    #[should_panic(expected = "called `Result::unwrap()` on an `Err` value: Incomplete(Size(1))")]
+    #[case("header:value", "", "")]
+    #[case("\nheader:value\r\n", "header:value\r\n", "\n")]
+    #[case("header:value\r\n", "", "header:value\r\n")]
+    #[case("header:value\n\r", "\r", "header:value\n")]
+    #[case("header:value\n\n", "\n", "header:value\n")]
+    #[case("abcdefg\nhijk", "hijk", "abcdefg\n")]
+    fn test_take_till_lf(#[case] input: &str, #[case] remaining: &str, #[case] parsed: &str) {
+        assert_eq!(
+            take_till_lf(input.as_bytes()).unwrap(),
+            (remaining.as_bytes(), parsed.as_bytes())
+        );
+    }
+
+    #[rstest]
+    #[should_panic(expected = "called `Result::unwrap()` on an `Err` value: Incomplete(Size(1))")]
+    #[case("", "", "", Eol::CR)]
+    #[case("abcdefg\n", "", "abcdefg\n", Eol::LF)]
+    #[case("abcdefg\n\r", "\r", "abcdefg\n", Eol::LF)]
+    #[should_panic(expected = "called `Result::unwrap()` on an `Err` value: Incomplete(Size(1))")]
+    #[case("abcdefg\r", "", "", Eol::CR)]
+    #[should_panic(expected = "called `Result::unwrap()` on an `Err` value: Incomplete(Size(1))")]
+    #[case("abcdefg", "", "", Eol::CR)]
+    #[case("abcdefg\nhijk", "hijk", "abcdefg\n", Eol::LF)]
+    #[case("abcdefg\n\r\nhijk", "\r\nhijk", "abcdefg\n", Eol::LF)]
+    #[case("abcdefg\rhijk", "hijk", "abcdefg\r", Eol::CR)]
+    #[case("abcdefg\r\nhijk", "hijk", "abcdefg\r\n", Eol::CRLF)]
+    #[case("abcdefg\r\n", "", "abcdefg\r\n", Eol::CRLF)]
+    fn test_take_till_eol(
+        #[case] input: &str, #[case] remaining: &str, #[case] parsed: &str, #[case] eol: Eol,
+    ) {
+        assert_eq!(
+            take_till_eol(input.as_bytes()).unwrap(),
+            (remaining.as_bytes(), (parsed.as_bytes(), eol))
+        );
+    }
+
+    #[rstest]
+    #[case(b'a', false)]
+    #[case(b'^', false)]
+    #[case(b'-', false)]
+    #[case(b'_', false)]
+    #[case(b'&', false)]
+    #[case(b'(', true)]
+    #[case(b'\\', true)]
+    #[case(b'/', true)]
+    #[case(b'=', true)]
+    #[case(b'\t', true)]
+    fn test_is_separator(#[case] input: u8, #[case] expected: bool) {
+        assert_eq!(is_separator(input), expected);
+    }
+
+    #[rstest]
+    #[case(b'a', true)]
+    #[case(b'&', true)]
+    #[case(b'+', true)]
+    #[case(b'\t', false)]
+    #[case(b'\n', false)]
+    fn test_is_token(#[case] input: u8, #[case] expected: bool) {
+        assert_eq!(is_token(input), expected);
+    }
+
+    #[rstest]
+    #[case("", "")]
+    #[case("test\n", "test")]
+    #[case("test\r\n", "test")]
+    #[case("test\r\n\n", "test")]
+    #[case("test\n\r\r\n\r", "test")]
+    #[case("test", "test")]
+    #[case("te\nst", "te\nst")]
+    fn test_chomp(#[case] input: &str, #[case] expected: &str) {
+        assert_eq!(chomp(input.as_bytes()), expected.as_bytes());
+    }
+
+    #[rstest]
+    #[case::trimmed(b"notrim", b"notrim")]
+    #[case::trim_start(b"\t trim", b"trim")]
+    #[case::trim_both(b" trim ", b"trim")]
+    #[case::trim_both_ignore_middle(b" trim trim ", b"trim trim")]
+    #[case::trim_end(b"trim \t", b"trim")]
+    #[case::trim_empty(b"", b"")]
+    fn test_trim(#[case] input: &[u8], #[case] expected: &[u8]) {
+        assert_eq!(trimmed(input), expected);
+    }
+
+    #[rstest]
+    #[case::non_space(0x61, false)]
+    #[case::space(0x20, true)]
+    #[case::form_feed(0x0c, true)]
+    #[case::newline(0x0a, true)]
+    #[case::carriage_return(0x0d, true)]
+    #[case::tab(0x09, true)]
+    #[case::vertical_tab(0x0b, true)]
+    fn test_is_space(#[case] input: u8, #[case] expected: bool) {
+        assert_eq!(is_space(input), expected);
+    }
+
+    #[rstest]
+    #[case("", false)]
+    #[case("arfarf", false)]
+    #[case("\n\r", false)]
+    #[case("\rabc", false)]
+    #[case("\r\n", true)]
+    #[case("\r", true)]
+    #[case("\n", true)]
+    fn test_is_line_empty(#[case] input: &str, #[case] expected: bool) {
+        assert_eq!(is_line_empty(input.as_bytes()), expected);
+    }
+
+    #[rstest]
+    #[case("", false)]
+    #[case("www.ExAmplE-1984.com", true)]
+    #[case("[::]", true)]
+    #[case("[2001:3db8:0000:0000:0000:ff00:d042:8530]", true)]
+    #[case("www.example.com", true)]
+    #[case("www.exa-mple.com", true)]
+    #[case("www.exa_mple.com", true)]
+    #[case(".www.example.com", false)]
+    #[case("www..example.com", false)]
+    #[case("www.example.com..", false)]
+    #[case("www example com", false)]
+    #[case("[::", false)]
+    #[case("[::/path[0]", false)]
+    #[case("[::#garbage]", false)]
+    #[case("[::?]", false)]
+    #[case::over64_char(
+        "www.exampleexampleexampleexampleexampleexampleexampleexampleexampleexample.com",
+        false
+    )]
+    fn test_validate_hostname(#[case] input: &str, #[case] expected: bool) {
+        assert_eq!(validate_hostname(input.as_bytes()), expected);
+    }
+
+    #[rstest]
+    #[should_panic(
+        expected = "called `Result::unwrap()` on an `Err` value: Error(Error { input: [], code: Digit })"
+    )]
+    #[case("   garbage no ascii ", "", "", "")]
+    #[case("    a200 \t  bcd ", "bcd ", "a", "200")]
+    #[case("   555555555    ", "", "", "555555555")]
+    #[case("   555555555    500", "500", "", "555555555")]
+    fn test_ascii_digits(
+        #[case] input: &str, #[case] remaining: &str, #[case] leading: &str, #[case] digits: &str,
+    ) {
+        // Returns (any trailing non-LWS characters, (non-LWS leading characters, ascii digits))
+        assert_eq!(
+            ascii_digits(input.as_bytes()).unwrap(),
+            (
+                remaining.as_bytes(),
+                (leading.as_bytes(), digits.as_bytes())
+            )
+        );
+    }
+
+    #[rstest]
+    #[case("", "", "")]
+    #[case("12a5", "", "12a5")]
+    #[case("12a5   .....", ".....", "12a5")]
+    #[case("    \t12a5.....    ", ".....    ", "12a5")]
+    #[case(" 68656c6c6f   12a5", "12a5", "68656c6c6f")]
+    #[case("  .....", ".....", "")]
+    fn test_hex_digits(#[case] input: &str, #[case] remaining: &str, #[case] digits: &str) {
+        //(trailing non-LWS characters, found hex digits)
+        assert_eq!(
+            hex_digits()(input.as_bytes()).unwrap(),
+            (remaining.as_bytes(), digits.as_bytes())
+        );
+    }
+
+    #[rstest]
+    #[case("", "", "")]
+    #[case("no chunked ctl chars here", "no chunked ctl chars here", "")]
+    #[case(
+        "\x0d\x0a\x20\x09\x0b\x0cno chunked ctl chars here",
+        "no chunked ctl chars here",
+        "\x0d\x0a\x20\x09\x0b\x0c"
+    )]
+    #[case(
+        "no chunked ctl chars here\x20\x09\x0b\x0c",
+        "no chunked ctl chars here\x20\x09\x0b\x0c",
+        ""
+    )]
+    #[case(
+        "\x20\x09\x0b\x0cno chunked ctl chars here\x20\x09\x0b\x0c",
+        "no chunked ctl chars here\x20\x09\x0b\x0c",
+        "\x20\x09\x0b\x0c"
+    )]
+    fn test_take_chunked_ctl_chars(
+        #[case] input: &str, #[case] remaining: &str, #[case] hex_digits: &str,
+    ) {
+        //(trailing non-LWS characters, found hex digits)
+        assert_eq!(
+            take_chunked_ctl_chars(input.as_bytes()).unwrap(),
+            (remaining.as_bytes(), hex_digits.as_bytes())
+        );
+    }
+
+    #[rstest]
+    #[case("", true)]
+    #[case("68656c6c6f", true)]
+    #[case("\x0d\x0a\x20\x09\x0b\x0c68656c6c6f", true)]
+    #[case("X5O!P%@AP", false)]
+    #[case("\x0d\x0a\x20\x09\x0b\x0cX5O!P%@AP", false)]
+    fn test_is_valid_chunked_length_data(#[case] input: &str, #[case] expected: bool) {
+        assert_eq!(is_valid_chunked_length_data(input.as_bytes()), expected);
+    }
+
+    #[rstest]
+    #[case("", false, true, ("", ""))]
+    #[case("ONE TWO THREE", false, true, ("ONE", "TWO THREE"))]
+    #[case("ONE TWO THREE", true, true, ("ONE TWO", "THREE"))]
+    #[case("ONE   TWO   THREE", false, true, ("ONE", "TWO   THREE"))]
+    #[case("ONE   TWO   THREE", true, true, ("ONE   TWO", "THREE"))]
+    #[case("ONE", false, true, ("ONE", ""))]
+    #[case("ONE", true, true, ("ONE", ""))]
+    fn test_split_on_predicate(
+        #[case] input: &str, #[case] reverse: bool, #[case] trim: bool,
+        #[case] expected: (&str, &str),
+    ) {
+        assert_eq!(
+            split_on_predicate(input.as_bytes(), reverse, trim, |c| *c == 0x20),
+            (expected.0.as_bytes(), expected.1.as_bytes())
+        );
+    }
+}
index bdf3d6b1c2d1e94e5f12ec8eda0e9288bc999bf6..ff4b099ea7423badd0c64de0ff6072b74e089df9 100644 (file)
@@ -140,3 +140,5 @@ pub mod direction;
 
 #[allow(unused_imports)]
 pub use suricata_lua_sys;
+//Re-export htp symbols
+pub use htp::c_api::*;
index 06ea1ba9285d4539048037ce22f498112013a5c9..2b90684c3f10d1c17f59b185e1151bb7c7bb2738 100755 (executable)
@@ -1,12 +1,12 @@
 #! /usr/bin/env bash
 #
-# This script will bundle libhtp and/or suricata-update for you.
+# This script will bundle suricata-update for you.
 #
 # To use, run from the top Suricata source directory:
 #
-#    ./scripts/bundle.sh [suricata-update|libhtp]
+#    ./scripts/bundle.sh [suricata-update]
 #
-# If no arguments are provided, both suricata-update and libhtp will
+# If no arguments are provided, suricata-update will
 # be bundled.
 #
 # Environment variables:
 #   SU_BRANCH: Override the Suricata-Update branch to a branch, tag or
 #              {pull,merge}-request.
 #
-#   LIBHTP_REPO:   Overrides the libhtp git repo
-#   LIBHTP_BRANCH: Override the libhtp branch to a branch, tag or
-#                  {pull,merge}-request.
-#
 #   DESTDIR: Checkout to another directory instead of the current
 #            directory.
 #
@@ -39,8 +35,8 @@ what="$1"
 # For GitHub the following formats are allowed:
 # - pr/123
 # - pull/123
-# - https://github.com/OISF/libhtp/pull/123
-# - OISF/libhtp#123
+# - https://github.com/OISF/suricata-update/pull/123
+# - OISF/suricata-update#123
 #
 # For GibLab only the format "mr/123" is supported.
 transform_branch() {
@@ -96,14 +92,6 @@ while IFS= read -r requirement; do
             cp -a ${DESTDIR}/suricata-update.tmp/. ${DESTDIR}/suricata-update
             rm -rf ${DESTDIR}/suricata-update.tmp
             ;;
-        libhtp)
-            LIBHTP_REPO=${LIBHTP_REPO:-$2}
-            LIBHTP_BRANCH=$(transform_branch ${LIBHTP_BRANCH:-$3})
-            echo "===> Bundling ${LIBHTP_REPO} (${LIBHTP_BRANCH})"
-            rm -rf ${DESTDIR}/libhtp
-            fetch "${LIBHTP_REPO}" "${DESTDIR}/libhtp" "${LIBHTP_BRANCH}"
-            rm -rf libhtp/.git
-            ;;
         \#*)
             # Ignore comment.
             ;;
index 9abe8001f1db40ff0b72ca07695653a58018ebff..174ee8dd9d020fd45680187babab40c0ad272e0e 100755 (executable)
@@ -8,14 +8,6 @@ trap "rm -rf ${tmpdir}" EXIT
 
 (cd .. && tar cf - $(git ls-files)) | (cd ${tmpdir} && tar xf -)
 
-if [ -e ../libhtp ]; then
-    (cd ../libhtp && git archive --format=tar --prefix=libhtp/ HEAD) | \
-       (cd ${tmpdir} && tar xvf -)
-else
-    echo "error: this script required bundled libhtp..."
-    exit 1
-fi
-
 cd ${tmpdir}
 
 # Do initial build.
index fc69e49dbf31874bebe561a8ed7dd463e7dd2122..dc658fb82b9c157aab4e70f41cfadcc39a123b94 100755 (executable)
@@ -301,8 +301,7 @@ function RequireProgram {
 
 # Make sure we are running from the top-level git directory.
 # Same approach as for setup-decoder.sh. Good enough.
-# We could probably use git rev-parse --show-toplevel to do so, as long as we
-# handle the libhtp subfolder correctly.
+# We could probably use git rev-parse --show-toplevel to do so
 function SetTopLevelDir {
     if [ -e ./src/suricata.c ]; then
         # Do nothing.
index 7b69b4f7247be169dcadfc2868ac41bae1fa8303..8746f25b8bcc7d4c992cbf80bcd8f83f5956e3f6 100755 (executable)
@@ -25,7 +25,6 @@ noinst_HEADERS = \
        app-layer-htp-body.h \
        app-layer-htp-file.h \
        app-layer-htp.h \
-       app-layer-htp-libhtp.h \
        app-layer-htp-mem.h \
        app-layer-htp-range.h \
        app-layer-htp-xff.h \
@@ -611,7 +610,6 @@ libsuricata_c_a_SOURCES = \
        app-layer-htp-body.c \
        app-layer-htp.c \
        app-layer-htp-file.c \
-       app-layer-htp-libhtp.c \
        app-layer-htp-mem.c \
        app-layer-htp-range.c \
        app-layer-htp-xff.c \
@@ -1240,9 +1238,9 @@ suricata_SOURCES = main.c
 suricata_LDFLAGS = $(all_libraries) ${SECLDFLAGS}
 # rust library depends also on c
 if LINKER_SUPPORTS_GROUP
-LDADD_GENERIC = "-Wl,--start-group,libsuricata_c.a,$(RUST_SURICATA_LIB),--end-group" $(HTP_LDADD) $(RUST_LDADD)
+LDADD_GENERIC = "-Wl,--start-group,libsuricata_c.a,$(RUST_SURICATA_LIB),--end-group" $(RUST_LDADD)
 else
-LDADD_GENERIC = libsuricata_c.a $(RUST_SURICATA_LIB) libsuricata_c.a $(RUST_SURICATA_LIB) $(HTP_LDADD) $(RUST_LDADD)
+LDADD_GENERIC = libsuricata_c.a $(RUST_SURICATA_LIB) libsuricata_c.a $(RUST_SURICATA_LIB) $(RUST_LDADD)
 endif
 suricata_LDADD = $(LDADD_GENERIC)
 suricata_DEPENDENCIES = libsuricata_c.a $(RUST_SURICATA_LIB)
diff --git a/src/app-layer-htp-libhtp.c b/src/app-layer-htp-libhtp.c
deleted file mode 100644 (file)
index dcc4a92..0000000
+++ /dev/null
@@ -1,174 +0,0 @@
-/*
- * We are using this file to hold APIs copied from libhtp 0.5.x.
- */
-
-/***************************************************************************
- * Copyright (c) 2009-2010 Open Information Security Foundation
- * Copyright (c) 2010-2013 Qualys, Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- * - Redistributions of source code must retain the above copyright
- *   notice, this list of conditions and the following disclaimer.
- *
- * - Redistributions in binary form must reproduce the above copyright
- *   notice, this list of conditions and the following disclaimer in the
- *   documentation and/or other materials provided with the distribution.
- *
- * - Neither the name of the Qualys, Inc. nor the names of its
- *   contributors may be used to endorse or promote products derived from
- *   this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- ***************************************************************************/
-
-/**
- * \file
- *
- * \author Anoop Saldanha <anoopsaldanha@gmail.com>
- *
- * APIs from libhtp 0.5.x.
- */
-
-#include "suricata-common.h"
-#include <htp/htp.h>
-#include "app-layer-htp-libhtp.h"
-
-/**
- * \brief Generates the normalized uri.
- *
- *        Libhtp doesn't recreate the whole normalized uri and save it.
- *        That duty has now been passed to us.  A lot of this code has been
- *        copied from libhtp.
- *
- *        Keep an eye out on the tx->parsed_uri struct and how the parameters
- *        in it are generated, just in case some modifications are made to
- *        them in the future.
- *
- * \param uri_include_all boolean to indicate if scheme, username/password,
-                          hostname and port should be part of the buffer
- */
-bstr *SCHTPGenerateNormalizedUri(htp_tx_t *tx, htp_uri_t *uri, bool uri_include_all)
-{
-    if (uri == NULL)
-        return NULL;
-
-    // On the first pass determine the length of the final string
-    size_t len = 0;
-
-    if (uri_include_all) {
-        if (uri->scheme != NULL) {
-            len += bstr_len(uri->scheme);
-            len += 3; // "://"
-        }
-
-        if ((uri->username != NULL) || (uri->password != NULL)) {
-            if (uri->username != NULL) {
-                len += bstr_len(uri->username);
-            }
-
-            len += 1; // ":"
-
-            if (uri->password != NULL) {
-                len += bstr_len(uri->password);
-            }
-
-            len += 1; // "@"
-        }
-
-        if (uri->hostname != NULL) {
-            len += bstr_len(uri->hostname);
-        }
-
-        if (uri->port != NULL) {
-            len += 1; // ":"
-            len += bstr_len(uri->port);
-        }
-    }
-
-    if (uri->path != NULL) {
-        len += bstr_len(uri->path);
-    }
-
-    if (uri->query != NULL) {
-        len += 1; // "?"
-        len += bstr_len(uri->query);
-    }
-
-    if (uri->fragment != NULL) {
-        len += 1; // "#"
-        len += bstr_len(uri->fragment);
-    }
-
-    // On the second pass construct the string
-    /* FIXME in memcap */
-    bstr *r = bstr_alloc(len);
-    if (r == NULL) {
-        return NULL;
-    }
-
-    if (uri_include_all) {
-        if (uri->scheme != NULL) {
-            bstr_add_noex(r, uri->scheme);
-            bstr_add_c_noex(r, "://");
-        }
-
-        if ((uri->username != NULL) || (uri->password != NULL)) {
-            if (uri->username != NULL) {
-                bstr_add_noex(r, uri->username);
-            }
-
-            bstr_add_c_noex(r, ":");
-
-            if (uri->password != NULL) {
-                bstr_add_noex(r, uri->password);
-            }
-
-            bstr_add_c_noex(r, "@");
-        }
-
-        if (uri->hostname != NULL) {
-            bstr_add_noex(r, uri->hostname);
-        }
-
-        if (uri->port != NULL) {
-            bstr_add_c_noex(r, ":");
-            bstr_add_noex(r, uri->port);
-        }
-    }
-
-    if (uri->path != NULL) {
-        bstr_add_noex(r, uri->path);
-    }
-
-    if (uri->query != NULL) {
-        bstr *query = bstr_dup(uri->query);
-        if (query) {
-            uint64_t flags = 0;
-            htp_urldecode_inplace(tx->cfg, HTP_DECODER_URLENCODED, query, &flags);
-            bstr_add_c_noex(r, "?");
-            bstr_add_noex(r, query);
-            bstr_free(query);
-        }
-    }
-
-    if (uri->fragment != NULL) {
-        bstr_add_c_noex(r, "#");
-        bstr_add_noex(r, uri->fragment);
-    }
-
-    return r;
-}
diff --git a/src/app-layer-htp-libhtp.h b/src/app-layer-htp-libhtp.h
deleted file mode 100644 (file)
index 3cc4c9b..0000000
+++ /dev/null
@@ -1,213 +0,0 @@
-/*
- * We are using this file to hold APIs copied from libhtp 0.5.x.
- */
-
-/***************************************************************************
- * Copyright (c) 2009-2010 Open Information Security Foundation
- * Copyright (c) 2010-2013 Qualys, Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- * - Redistributions of source code must retain the above copyright
- *   notice, this list of conditions and the following disclaimer.
- *
- * - Redistributions in binary form must reproduce the above copyright
- *   notice, this list of conditions and the following disclaimer in the
- *   documentation and/or other materials provided with the distribution.
- *
- * - Neither the name of the Qualys, Inc. nor the names of its
- *   contributors may be used to endorse or promote products derived from
- *   this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- ***************************************************************************/
-
-/**
- * \file
- *
- * \author Anoop Saldanha <anoopsaldanha@gmail.com>
- *
- * APIs from libhtp 0.5.x.
- */
-
-#ifndef SURICATA_APP_LAYER_HTP_LIBHTP__H
-#define SURICATA_APP_LAYER_HTP_LIBHTP__H
-
-#include "suricata-common.h"
-
-// Temporary alias definitions before switching to libhtp rust
-#define HTP_STATUS_OK    HTP_OK
-#define HTP_STATUS_ERROR HTP_ERROR
-
-#define HTP_SERVER_PERSONALITY_APACHE_2 HTP_SERVER_APACHE_2
-#define HTP_SERVER_PERSONALITY_MINIMAL  HTP_SERVER_MINIMAL
-#define HTP_SERVER_PERSONALITY_GENERIC  HTP_SERVER_GENERIC
-#define HTP_SERVER_PERSONALITY_IDS      HTP_SERVER_IDS
-#define HTP_SERVER_PERSONALITY_IIS_4_0  HTP_SERVER_IIS_4_0
-#define HTP_SERVER_PERSONALITY_IIS_5_0  HTP_SERVER_IIS_5_0
-#define HTP_SERVER_PERSONALITY_IIS_5_1  HTP_SERVER_IIS_5_1
-#define HTP_SERVER_PERSONALITY_IIS_6_0  HTP_SERVER_IIS_6_0
-#define HTP_SERVER_PERSONALITY_IIS_7_0  HTP_SERVER_IIS_7_0
-#define HTP_SERVER_PERSONALITY_IIS_7_5  HTP_SERVER_IIS_7_5
-
-#define HTP_FLAGS_REQUEST_INVALID_T_E HTP_REQUEST_INVALID_T_E
-#define HTP_FLAGS_REQUEST_INVALID_C_L HTP_REQUEST_INVALID_C_L
-#define HTP_FLAGS_HOST_MISSING        HTP_HOST_MISSING
-#define HTP_FLAGS_HOST_AMBIGUOUS      HTP_HOST_AMBIGUOUS
-#define HTP_FLAGS_HOSTU_INVALID       HTP_HOSTU_INVALID
-#define HTP_FLAGS_HOSTH_INVALID       HTP_HOSTH_INVALID
-
-#define HTP_AUTH_TYPE_UNRECOGNIZED HTP_AUTH_UNRECOGNIZED
-
-#define HTP_METHOD_UNKNOWN HTP_M_UNKNOWN
-#define HTP_METHOD_GET     HTP_M_GET
-#define HTP_METHOD_POST    HTP_M_POST
-#define HTP_METHOD_PUT     HTP_M_PUT
-#define HTP_METHOD_CONNECT HTP_M_CONNECT
-
-#define HTP_STREAM_STATE_ERROR  HTP_STREAM_ERROR
-#define HTP_STREAM_STATE_TUNNEL HTP_STREAM_TUNNEL
-
-#define HTP_PROTOCOL_V1_1 HTP_PROTOCOL_1_1
-#define HTP_PROTOCOL_V1_0 HTP_PROTOCOL_1_0
-#define HTP_PROTOCOL_V0_9 HTP_PROTOCOL_0_9
-
-#define HTP_REQUEST_PROGRESS_LINE      HTP_REQUEST_LINE
-#define HTP_REQUEST_PROGRESS_HEADERS   HTP_REQUEST_HEADERS
-#define HTP_REQUEST_PROGRESS_BODY      HTP_REQUEST_BODY
-#define HTP_REQUEST_PROGRESS_TRAILER   HTP_REQUEST_TRAILER
-#define HTP_REQUEST_PROGRESS_COMPLETE  HTP_REQUEST_COMPLETE
-#define HTP_RESPONSE_PROGRESS_LINE     HTP_RESPONSE_LINE
-#define HTP_RESPONSE_PROGRESS_HEADERS  HTP_RESPONSE_HEADERS
-#define HTP_RESPONSE_PROGRESS_BODY     HTP_RESPONSE_BODY
-#define HTP_RESPONSE_PROGRESS_TRAILER  HTP_RESPONSE_TRAILER
-#define HTP_RESPONSE_PROGRESS_COMPLETE HTP_RESPONSE_COMPLETE
-
-#define HTP_LOG_CODE_UNKNOWN                      HTTP_DECODER_EVENT_UNKNOWN_ERROR
-#define HTP_LOG_CODE_GZIP_DECOMPRESSION_FAILED    HTTP_DECODER_EVENT_GZIP_DECOMPRESSION_FAILED
-#define HTP_LOG_CODE_REQUEST_FIELD_MISSING_COLON  HTTP_DECODER_EVENT_REQUEST_FIELD_MISSING_COLON
-#define HTP_LOG_CODE_RESPONSE_FIELD_MISSING_COLON HTTP_DECODER_EVENT_RESPONSE_FIELD_MISSING_COLON
-#define HTP_LOG_CODE_INVALID_REQUEST_CHUNK_LEN    HTTP_DECODER_EVENT_INVALID_REQUEST_CHUNK_LEN
-#define HTP_LOG_CODE_INVALID_RESPONSE_CHUNK_LEN   HTTP_DECODER_EVENT_INVALID_RESPONSE_CHUNK_LEN
-#define HTP_LOG_CODE_INVALID_TRANSFER_ENCODING_VALUE_IN_REQUEST                                    \
-    HTTP_DECODER_EVENT_INVALID_TRANSFER_ENCODING_VALUE_IN_REQUEST
-#define HTP_LOG_CODE_INVALID_TRANSFER_ENCODING_VALUE_IN_RESPONSE                                   \
-    HTTP_DECODER_EVENT_INVALID_TRANSFER_ENCODING_VALUE_IN_RESPONSE
-#define HTP_LOG_CODE_INVALID_CONTENT_LENGTH_FIELD_IN_REQUEST                                       \
-    HTTP_DECODER_EVENT_INVALID_CONTENT_LENGTH_FIELD_IN_REQUEST
-#define HTP_LOG_CODE_INVALID_CONTENT_LENGTH_FIELD_IN_RESPONSE                                      \
-    HTTP_DECODER_EVENT_INVALID_CONTENT_LENGTH_FIELD_IN_RESPONSE
-#define HTP_LOG_CODE_DUPLICATE_CONTENT_LENGTH_FIELD_IN_REQUEST                                     \
-    HTTP_DECODER_EVENT_DUPLICATE_CONTENT_LENGTH_FIELD_IN_REQUEST
-#define HTP_LOG_CODE_DUPLICATE_CONTENT_LENGTH_FIELD_IN_RESPONSE                                    \
-    HTTP_DECODER_EVENT_DUPLICATE_CONTENT_LENGTH_FIELD_IN_RESPONSE
-#define HTP_LOG_CODE_CONTINUE_ALREADY_SEEN HTTP_DECODER_EVENT_100_CONTINUE_ALREADY_SEEN
-#define HTP_LOG_CODE_UNABLE_TO_MATCH_RESPONSE_TO_REQUEST                                           \
-    HTTP_DECODER_EVENT_UNABLE_TO_MATCH_RESPONSE_TO_REQUEST
-#define HTP_LOG_CODE_INVALID_SERVER_PORT_IN_REQUEST                                                \
-    HTTP_DECODER_EVENT_INVALID_SERVER_PORT_IN_REQUEST
-#define HTP_LOG_CODE_INVALID_AUTHORITY_PORT        HTTP_DECODER_EVENT_INVALID_AUTHORITY_PORT
-#define HTP_LOG_CODE_REQUEST_HEADER_INVALID        HTTP_DECODER_EVENT_REQUEST_HEADER_INVALID
-#define HTP_LOG_CODE_RESPONSE_HEADER_INVALID       HTTP_DECODER_EVENT_RESPONSE_HEADER_INVALID
-#define HTP_LOG_CODE_MISSING_HOST_HEADER           HTTP_DECODER_EVENT_MISSING_HOST_HEADER
-#define HTP_LOG_CODE_HOST_HEADER_AMBIGUOUS         HTTP_DECODER_EVENT_HOST_HEADER_AMBIGUOUS
-#define HTP_LOG_CODE_INVALID_REQUEST_FIELD_FOLDING HTTP_DECODER_EVENT_INVALID_REQUEST_FIELD_FOLDING
-#define HTP_LOG_CODE_INVALID_RESPONSE_FIELD_FOLDING                                                \
-    HTTP_DECODER_EVENT_INVALID_RESPONSE_FIELD_FOLDING
-#define HTP_LOG_CODE_REQUEST_FIELD_TOO_LONG  HTTP_DECODER_EVENT_REQUEST_FIELD_TOO_LONG
-#define HTP_LOG_CODE_RESPONSE_FIELD_TOO_LONG HTTP_DECODER_EVENT_RESPONSE_FIELD_TOO_LONG
-#define HTP_LOG_CODE_FILE_NAME_TOO_LONG      HTTP_DECODER_EVENT_FILE_NAME_TOO_LONG
-#define HTP_LOG_CODE_REQUEST_LINE_INVALID    HTTP_DECODER_EVENT_REQUEST_LINE_INVALID
-#define HTP_LOG_CODE_REQUEST_BODY_UNEXPECTED HTTP_DECODER_EVENT_REQUEST_BODY_UNEXPECTED
-#define HTP_LOG_CODE_REQUEST_SERVER_PORT_TCP_PORT_MISMATCH                                         \
-    HTTP_DECODER_EVENT_REQUEST_SERVER_PORT_TCP_PORT_MISMATCH
-#define HTP_LOG_CODE_URI_HOST_INVALID           HTTP_DECODER_EVENT_URI_HOST_INVALID
-#define HTP_LOG_CODE_HEADER_HOST_INVALID        HTTP_DECODER_EVENT_HEADER_HOST_INVALID
-#define HTP_LOG_CODE_AUTH_UNRECOGNIZED          HTTP_DECODER_EVENT_AUTH_UNRECOGNIZED
-#define HTP_LOG_CODE_REQUEST_HEADER_REPETITION  HTTP_DECODER_EVENT_REQUEST_HEADER_REPETITION
-#define HTP_LOG_CODE_RESPONSE_HEADER_REPETITION HTTP_DECODER_EVENT_RESPONSE_HEADER_REPETITION
-#define HTP_LOG_CODE_DOUBLE_ENCODED_URI         HTTP_DECODER_EVENT_DOUBLE_ENCODED_URI
-#define HTP_LOG_CODE_URI_DELIM_NON_COMPLIANT    HTTP_DECODER_EVENT_URI_DELIM_NON_COMPLIANT
-#define HTP_LOG_CODE_METHOD_DELIM_NON_COMPLIANT HTTP_DECODER_EVENT_METHOD_DELIM_NON_COMPLIANT
-#define HTP_LOG_CODE_REQUEST_LINE_LEADING_WHITESPACE                                               \
-    HTTP_DECODER_EVENT_REQUEST_LINE_LEADING_WHITESPACE
-#define HTP_LOG_CODE_TOO_MANY_ENCODING_LAYERS      HTTP_DECODER_EVENT_TOO_MANY_ENCODING_LAYERS
-#define HTP_LOG_CODE_ABNORMAL_CE_HEADER            HTTP_DECODER_EVENT_ABNORMAL_CE_HEADER
-#define HTP_LOG_CODE_RESPONSE_MULTIPART_BYTERANGES HTTP_DECODER_EVENT_RESPONSE_MULTIPART_BYTERANGES
-#define HTP_LOG_CODE_RESPONSE_ABNORMAL_TRANSFER_ENCODING                                           \
-    HTTP_DECODER_EVENT_RESPONSE_ABNORMAL_TRANSFER_ENCODING
-#define HTP_LOG_CODE_RESPONSE_CHUNKED_OLD_PROTO HTTP_DECODER_EVENT_RESPONSE_CHUNKED_OLD_PROTO
-#define HTP_LOG_CODE_RESPONSE_INVALID_PROTOCOL  HTTP_DECODER_EVENT_RESPONSE_INVALID_PROTOCOL
-#define HTP_LOG_CODE_RESPONSE_INVALID_STATUS    HTTP_DECODER_EVENT_RESPONSE_INVALID_STATUS
-#define HTP_LOG_CODE_REQUEST_LINE_INCOMPLETE    HTTP_DECODER_EVENT_REQUEST_LINE_INCOMPLETE
-#define HTP_LOG_CODE_LZMA_MEMLIMIT_REACHED      HTTP_DECODER_EVENT_LZMA_MEMLIMIT_REACHED
-#define HTP_LOG_CODE_COMPRESSION_BOMB           HTTP_DECODER_EVENT_COMPRESSION_BOMB
-
-// Functions introduced to handle opaque htp_tx_t
-#define htp_tx_flags(tx)                    (tx)->flags
-#define htp_tx_is_protocol_0_9(tx)          (tx)->is_protocol_0_9
-#define htp_tx_request_auth_type(tx)        (tx)->request_auth_type
-#define htp_tx_request_hostname(tx)         (tx)->request_hostname
-#define htp_tx_request_line(tx)             (tx)->request_line
-#define htp_tx_request_message_len(tx)      (tx)->request_message_len
-#define htp_tx_request_method(tx)           (tx)->request_method
-#define htp_tx_request_method_number(tx)    tx->request_method_number
-#define htp_tx_request_port_number(tx)      (tx)->request_port_number
-#define htp_tx_request_progress(tx)         (tx)->request_progress
-#define htp_tx_request_protocol(tx)         (tx)->request_protocol
-#define htp_tx_request_protocol_number(tx)  (tx)->request_protocol_number
-#define htp_tx_request_uri(tx)              (tx)->request_uri
-#define htp_tx_request_headers(tx)          (tx)->request_headers
-#define htp_tx_response_headers(tx)         (tx)->response_headers
-#define htp_tx_response_protocol(tx)        (tx)->response_protocol
-#define htp_tx_response_line(tx)            (tx)->response_line
-#define htp_tx_response_message(tx)         (tx)->response_message
-#define htp_tx_response_message_len(tx)     (tx)->response_message_len
-#define htp_tx_response_status(tx)          (tx)->response_status
-#define htp_tx_response_status_number(tx)   (tx)->response_status_number
-#define htp_tx_response_progress(tx)        (tx)->response_progress
-#define htp_tx_response_protocol_number(tx) (tx)->response_protocol_number
-
-#define htp_tx_request_header(tx, header)  htp_table_get_c((tx)->request_headers, header)
-#define htp_tx_response_header(tx, header) htp_table_get_c((tx)->response_headers, header)
-
-// Functions introduced to handle opaque htp_header_t
-#define htp_header_name_len(h)  bstr_len((h)->name)
-#define htp_header_name_ptr(h)  bstr_ptr((h)->name)
-#define htp_header_name(h)      (h)->name
-#define htp_header_value_len(h) bstr_len((h)->value)
-#define htp_header_value_ptr(h) bstr_ptr((h)->value)
-#define htp_header_value(h)     (h)->value
-
-// Functions introduced to handle opaque htp_headers_t:
-#define htp_headers_size(headers)             htp_table_size(headers)
-#define htp_headers_get_index(headers, index) htp_table_get_index(headers, index, NULL)
-#define htp_tx_request_headers_size(tx)       htp_table_size((tx)->request_headers)
-#define htp_tx_request_header_index(tx, i)    htp_table_get_index((tx)->request_headers, i, NULL);
-#define htp_headers_t                         htp_table_t
-
-// Functions introduced to handle opaque htp_tx_data_t:
-#define htp_tx_data_len(d)  (d)->len
-#define htp_tx_data_data(d) (d)->data
-#define htp_tx_data_tx(d)   (d)->tx
-
-// Functions introduced to handle opaque htp_conn_t:
-#define htp_conn_request_data_counter(c)  (c)->in_data_counter
-#define htp_conn_response_data_counter(c) (c)->out_data_counter
-
-bstr *SCHTPGenerateNormalizedUri(htp_tx_t *tx, htp_uri_t *uri, bool uri_include_all);
-
-#endif /* SURICATA_APP_LAYER_HTP_LIBHTP__H */
index 6f43033dd411032843c58ae0f83ad315b0bb9a09..b76b717c2493d2cf73b2355ff1b0327f44ee5516 100644 (file)
@@ -52,7 +52,6 @@
 #include "app-layer-htp.h"
 #include "app-layer-htp-body.h"
 #include "app-layer-htp-file.h"
-#include "app-layer-htp-libhtp.h"
 #include "app-layer-htp-xff.h"
 #include "app-layer-htp-range.h"
 #include "app-layer-htp-mem.h"
@@ -133,9 +132,9 @@ SCEnumCharMap http_decoder_event_table[] = {
     { "INVALID_RESPONSE_FIELD_FOLDING", HTP_LOG_CODE_INVALID_RESPONSE_FIELD_FOLDING },
     { "REQUEST_FIELD_TOO_LONG", HTP_LOG_CODE_REQUEST_FIELD_TOO_LONG },
     { "RESPONSE_FIELD_TOO_LONG", HTP_LOG_CODE_RESPONSE_FIELD_TOO_LONG },
-    { "FILE_NAME_TOO_LONG", HTP_LOG_CODE_FILE_NAME_TOO_LONG },
     { "REQUEST_LINE_INVALID", HTP_LOG_CODE_REQUEST_LINE_INVALID },
     { "REQUEST_BODY_UNEXPECTED", HTP_LOG_CODE_REQUEST_BODY_UNEXPECTED },
+    { "RESPONSE_BODY_UNEXPECTED", HTP_LOG_CODE_RESPONSE_BODY_UNEXPECTED },
     { "REQUEST_SERVER_PORT_TCP_PORT_MISMATCH", HTP_LOG_CODE_REQUEST_SERVER_PORT_TCP_PORT_MISMATCH },
     { "REQUEST_URI_HOST_INVALID", HTP_LOG_CODE_URI_HOST_INVALID },
     { "REQUEST_HEADER_HOST_INVALID", HTP_LOG_CODE_HEADER_HOST_INVALID },
@@ -147,6 +146,8 @@ SCEnumCharMap http_decoder_event_table[] = {
     { "METHOD_DELIM_NON_COMPLIANT", HTP_LOG_CODE_METHOD_DELIM_NON_COMPLIANT },
     { "REQUEST_LINE_LEADING_WHITESPACE", HTP_LOG_CODE_REQUEST_LINE_LEADING_WHITESPACE },
     { "TOO_MANY_ENCODING_LAYERS", HTP_LOG_CODE_TOO_MANY_ENCODING_LAYERS },
+    { "REQUEST_TOO_MANY_LZMA_LAYERS", HTP_LOG_CODE_REQUEST_TOO_MANY_LZMA_LAYERS },
+    { "RESPONSE_TOO_MANY_LZMA_LAYERS", HTP_LOG_CODE_RESPONSE_TOO_MANY_LZMA_LAYERS },
     { "ABNORMAL_CE_HEADER", HTP_LOG_CODE_ABNORMAL_CE_HEADER },
     { "RESPONSE_MULTIPART_BYTERANGES", HTP_LOG_CODE_RESPONSE_MULTIPART_BYTERANGES },
     { "RESPONSE_ABNORMAL_TRANSFER_ENCODING", HTP_LOG_CODE_RESPONSE_ABNORMAL_TRANSFER_ENCODING },
@@ -154,23 +155,63 @@ SCEnumCharMap http_decoder_event_table[] = {
     { "RESPONSE_INVALID_PROTOCOL", HTP_LOG_CODE_RESPONSE_INVALID_PROTOCOL },
     { "RESPONSE_INVALID_STATUS", HTP_LOG_CODE_RESPONSE_INVALID_STATUS },
     { "REQUEST_LINE_INCOMPLETE", HTP_LOG_CODE_REQUEST_LINE_INCOMPLETE },
+    { "PROTOCOL_CONTAINS_EXTRA_DATA", HTP_LOG_CODE_PROTOCOL_CONTAINS_EXTRA_DATA },
+    {
+            "CONTENT_LENGTH_EXTRA_DATA_START",
+            HTP_LOG_CODE_CONTENT_LENGTH_EXTRA_DATA_START,
+    },
+    {
+            "CONTENT_LENGTH_EXTRA_DATA_END",
+            HTP_LOG_CODE_CONTENT_LENGTH_EXTRA_DATA_END,
+    },
+    {
+            "CONTENT_LENGTH_EXTRA_DATA_END",
+            HTP_LOG_CODE_CONTENT_LENGTH_EXTRA_DATA_END,
+    },
+    { "SWITCHING_PROTO_WITH_CONTENT_LENGTH", HTP_LOG_CODE_SWITCHING_PROTO_WITH_CONTENT_LENGTH },
+    { "DEFORMED_EOL", HTP_LOG_CODE_DEFORMED_EOL },
+    { "PARSER_STATE_ERROR", HTP_LOG_CODE_PARSER_STATE_ERROR },
+    { "MISSING_OUTBOUND_TRANSACTION_DATA", HTP_LOG_CODE_MISSING_OUTBOUND_TRANSACTION_DATA },
+    { "MISSING_INBOUND_TRANSACTION_DATA", HTP_LOG_CODE_MISSING_INBOUND_TRANSACTION_DATA },
+    { "MISSING_INBOUND_TRANSACTION_DATA", HTP_LOG_CODE_MISSING_INBOUND_TRANSACTION_DATA },
+    { "ZERO_LENGTH_DATA_CHUNKS", HTP_LOG_CODE_ZERO_LENGTH_DATA_CHUNKS },
+    { "REQUEST_LINE_UNKNOWN_METHOD", HTP_LOG_CODE_REQUEST_LINE_UNKNOWN_METHOD },
+    { "REQUEST_LINE_UNKNOWN_METHOD", HTP_LOG_CODE_REQUEST_LINE_UNKNOWN_METHOD },
+    { "REQUEST_LINE_UNKNOWN_METHOD_NO_PROTOCOL",
+            HTP_LOG_CODE_REQUEST_LINE_UNKNOWN_METHOD_NO_PROTOCOL },
+    { "REQUEST_LINE_UNKNOWN_METHOD_INVALID_PROTOCOL",
+            HTP_LOG_CODE_REQUEST_LINE_UNKNOWN_METHOD_INVALID_PROTOCOL },
+    { "REQUEST_LINE_MISSING_PROTOCOL", HTP_LOG_CODE_REQUEST_LINE_NO_PROTOCOL },
+    { "RESPONSE_LINE_INVALID_PROTOCOL", HTP_LOG_CODE_RESPONSE_LINE_INVALID_PROTOCOL },
+    { "RESPONSE_LINE_INVALID_RESPONSE_STATUS", HTP_LOG_CODE_RESPONSE_LINE_INVALID_RESPONSE_STATUS },
+    { "RESPONSE_BODY_INTERNAL_ERROR", HTP_LOG_CODE_RESPONSE_BODY_INTERNAL_ERROR },
+    { "REQUEST_BODY_DATA_CALLBACK_ERROR", HTP_LOG_CODE_REQUEST_BODY_DATA_CALLBACK_ERROR },
+    { "RESPONSE_INVALID_EMPTY_NAME", HTP_LOG_CODE_RESPONSE_INVALID_EMPTY_NAME },
+    { "REQUEST_INVALID_EMPTY_NAME", HTP_LOG_CODE_REQUEST_INVALID_EMPTY_NAME },
+    { "RESPONSE_INVALID_LWS_AFTER_NAME", HTP_LOG_CODE_RESPONSE_INVALID_LWS_AFTER_NAME },
+    { "RESPONSE_HEADER_NAME_NOT_TOKEN", HTP_LOG_CODE_RESPONSE_HEADER_NAME_NOT_TOKEN },
+    { "REQUEST_INVALID_LWS_AFTER_NAME", HTP_LOG_CODE_REQUEST_INVALID_LWS_AFTER_NAME },
+    { "LZMA_DECOMPRESSION_DISABLED", HTP_LOG_CODE_LZMA_DECOMPRESSION_DISABLED },
+    { "CONNECTION_ALREADY_OPEN", HTP_LOG_CODE_CONNECTION_ALREADY_OPEN },
+    { "COMPRESSION_BOMB_DOUBLE_LZMA", HTP_LOG_CODE_COMPRESSION_BOMB_DOUBLE_LZMA },
+    { "INVALID_CONTENT_ENCODING", HTP_LOG_CODE_INVALID_CONTENT_ENCODING },
+    { "INVALID_GAP", HTP_LOG_CODE_INVALID_GAP },
+    { "REQUEST_CHUNK_EXTENSION", HTP_LOG_CODE_REQUEST_CHUNK_EXTENSION },
+    { "RESPONSE_CHUNK_EXTENSION", HTP_LOG_CODE_RESPONSE_CHUNK_EXTENSION },
 
     { "LZMA_MEMLIMIT_REACHED", HTP_LOG_CODE_LZMA_MEMLIMIT_REACHED },
     { "COMPRESSION_BOMB", HTP_LOG_CODE_COMPRESSION_BOMB },
 
-    { "RANGE_INVALID", HTTP_DECODER_EVENT_RANGE_INVALID },
-    { "REQUEST_CHUNK_EXTENSION", HTTP_DECODER_EVENT_REQUEST_CHUNK_EXTENSION },
-    { "REQUEST_LINE_MISSING_PROTOCOL", HTTP_DECODER_EVENT_REQUEST_LINE_MISSING_PROTOCOL },
-
-    { "REQUEST_TOO_MANY_HEADERS", HTTP_DECODER_EVENT_REQUEST_TOO_MANY_HEADERS },
-    { "RESPONSE_TOO_MANY_HEADERS", HTTP_DECODER_EVENT_RESPONSE_TOO_MANY_HEADERS },
+    { "REQUEST_TOO_MANY_HEADERS", HTP_LOG_CODE_REQUEST_TOO_MANY_HEADERS },
+    { "RESPONSE_TOO_MANY_HEADERS", HTP_LOG_CODE_RESPONSE_TOO_MANY_HEADERS },
 
     /* suricata warnings/errors */
     { "MULTIPART_GENERIC_ERROR", HTTP_DECODER_EVENT_MULTIPART_GENERIC_ERROR },
     { "MULTIPART_NO_FILEDATA", HTTP_DECODER_EVENT_MULTIPART_NO_FILEDATA },
     { "MULTIPART_INVALID_HEADER", HTTP_DECODER_EVENT_MULTIPART_INVALID_HEADER },
-
     { "TOO_MANY_WARNINGS", HTTP_DECODER_EVENT_TOO_MANY_WARNINGS },
+    { "RANGE_INVALID", HTTP_DECODER_EVENT_RANGE_INVALID },
+    { "FILE_NAME_TOO_LONG", HTTP_DECODER_EVENT_FILE_NAME_TOO_LONG },
     { "FAILED_PROTOCOL_CHANGE", HTTP_DECODER_EVENT_FAILED_PROTOCOL_CHANGE },
 
     { NULL, -1 },
@@ -319,7 +360,7 @@ static void HTPSetEvent(HtpState *s, HtpTxUserData *htud,
     if (tx == NULL && tx_id > 0)
         tx = HTPStateGetTx(s, tx_id - 1);
     if (tx != NULL) {
-        htud = (HtpTxUserData *) htp_tx_get_user_data(tx);
+        htud = (HtpTxUserData *)htp_tx_get_user_data(tx);
         if (htud != NULL) {
             AppLayerDecoderEventsSetEventRaw(&htud->tx_data.events, e);
             s->events++;
@@ -359,7 +400,6 @@ static void HtpTxUserDataFree(HtpState *state, HtpTxUserData *htud)
     if (likely(htud)) {
         HtpBodyFree(&htud->request_body);
         HtpBodyFree(&htud->response_body);
-        bstr_free(htud->request_uri_normalized);
         if (htud->request_headers_raw)
             HTPFree(htud->request_headers_raw, htud->request_headers_raw_len);
         if (htud->response_headers_raw)
@@ -397,10 +437,10 @@ void HTPStateFree(void *state)
         uint64_t total_txs = HTPStateGetTxCnt(state);
         /* free the list of body chunks */
         if (s->conn != NULL) {
-            for (tx_id = s->tx_freed; tx_id < total_txs; tx_id++) {
+            for (tx_id = 0; tx_id < total_txs; tx_id++) {
                 htp_tx_t *tx = HTPStateGetTx(s, tx_id);
                 if (tx != NULL) {
-                    HtpTxUserData *htud = (HtpTxUserData *) htp_tx_get_user_data(tx);
+                    HtpTxUserData *htud = (HtpTxUserData *)htp_tx_get_user_data(tx);
                     HtpTxUserDataFree(s, htud);
                     htp_tx_set_user_data(tx, NULL);
                 }
@@ -425,8 +465,6 @@ void HTPStateFree(void *state)
 /**
  *  \brief HTP transaction cleanup callback
  *
- *  \warning We cannot actually free the transactions here. It seems that
- *           HTP only accepts freeing of transactions in the response callback.
  */
 static void HTPStateTransactionFree(void *state, uint64_t id)
 {
@@ -439,23 +477,11 @@ static void HTPStateTransactionFree(void *state, uint64_t id)
     htp_tx_t *tx = HTPStateGetTx(s, id);
     if (tx != NULL) {
         /* This will remove obsolete body chunks */
-        HtpTxUserData *htud = (HtpTxUserData *) htp_tx_get_user_data(tx);
+        HtpTxUserData *htud = (HtpTxUserData *)htp_tx_get_user_data(tx);
         HtpTxUserDataFree(s, htud);
         htp_tx_set_user_data(tx, NULL);
-
-        /* hack: even if libhtp considers the tx incomplete, we want to
-         * free it here. htp_tx_destroy however, will refuse to do this.
-         * As htp_tx_destroy_incomplete isn't available in the public API,
-         * we hack around it here. */
-        if (unlikely(!(htp_tx_request_progress(tx) == HTP_REQUEST_PROGRESS_COMPLETE &&
-                       htp_tx_response_progress(tx) == HTP_RESPONSE_PROGRESS_COMPLETE))) {
-            htp_tx_request_progress(tx) = HTP_REQUEST_PROGRESS_COMPLETE;
-            htp_tx_response_progress(tx) = HTP_RESPONSE_PROGRESS_COMPLETE;
-        }
-        // replaces tx in the s->conn->transactions list by NULL
-        htp_tx_destroy(tx);
+        htp_tx_destroy(s->connp, tx);
     }
-    s->tx_freed += htp_connp_tx_freed(s->connp);
 }
 
 /**
@@ -502,7 +528,7 @@ void AppLayerHtpNeedFileInspection(void)
 
 static void AppLayerHtpSetStreamDepthFlag(void *tx, const uint8_t flags)
 {
-    HtpTxUserData *tx_ud = (HtpTxUserData *) htp_tx_get_user_data((htp_tx_t *)tx);
+    HtpTxUserData *tx_ud = (HtpTxUserData *)htp_tx_get_user_data((htp_tx_t *)tx);
     if (tx_ud) {
         SCLogDebug("setting HTP_STREAM_DEPTH_SET, flags %02x", flags);
         if (flags & STREAM_TOCLIENT) {
@@ -551,136 +577,6 @@ static uint32_t AppLayerHtpComputeChunkLength(uint64_t content_len_so_far, uint3
     return (chunk_len == 0 ? data_len : chunk_len);
 }
 
-/* below error messages updated up to libhtp 0.5.7 (git 379632278b38b9a792183694a4febb9e0dbd1e7a) */
-struct {
-    const char *msg;
-    uint8_t de;
-} htp_errors[] = {
-    { "GZip decompressor: inflateInit2 failed", HTP_LOG_CODE_GZIP_DECOMPRESSION_FAILED },
-    { "Request field invalid: colon missing", HTP_LOG_CODE_REQUEST_FIELD_MISSING_COLON },
-    { "Response field invalid: missing colon", HTP_LOG_CODE_RESPONSE_FIELD_MISSING_COLON },
-    { "Request chunk encoding: Invalid chunk length", HTP_LOG_CODE_INVALID_REQUEST_CHUNK_LEN },
-    { "Response chunk encoding: Invalid chunk length", HTP_LOG_CODE_INVALID_RESPONSE_CHUNK_LEN },
-    /*  { "Invalid T-E value in request",
-       HTP_LOG_CODE_INVALID_TRANSFER_ENCODING_VALUE_IN_REQUEST}, <- tx flag
-       HTP_FLAGS_REQUEST_INVALID_T_E { "Invalid T-E value in response",
-       HTP_LOG_CODE_INVALID_TRANSFER_ENCODING_VALUE_IN_RESPONSE}, <- nothing to replace it */
-    /*  { "Invalid C-L field in request",
-       HTP_LOG_CODE_INVALID_CONTENT_LENGTH_FIELD_IN_REQUEST}, <- tx flag
-       HTP_FLAGS_REQUEST_INVALID_C_L */
-    { "Invalid C-L field in response", HTP_LOG_CODE_INVALID_CONTENT_LENGTH_FIELD_IN_RESPONSE },
-    { "Already seen 100-Continue", HTP_LOG_CODE_CONTINUE_ALREADY_SEEN },
-    { "Unable to match response to request", HTP_LOG_CODE_UNABLE_TO_MATCH_RESPONSE_TO_REQUEST },
-    { "Invalid server port information in request", HTP_LOG_CODE_INVALID_SERVER_PORT_IN_REQUEST },
-    /*    { "Invalid authority port", HTP_LOG_CODE_INVALID_AUTHORITY_PORT}, htp no longer
-       returns this error */
-    { "Request buffer over", HTP_LOG_CODE_REQUEST_FIELD_TOO_LONG },
-    { "Response buffer over", HTP_LOG_CODE_RESPONSE_FIELD_TOO_LONG },
-    { "C-T multipart/byteranges in responses not supported",
-            HTP_LOG_CODE_RESPONSE_MULTIPART_BYTERANGES },
-    { "Compression bomb:", HTP_LOG_CODE_COMPRESSION_BOMB },
-};
-
-struct {
-    const char *msg;
-    uint8_t de;
-} htp_warnings[] = {
-    { "GZip decompressor:", HTP_LOG_CODE_GZIP_DECOMPRESSION_FAILED },
-    { "Request field invalid", HTP_LOG_CODE_REQUEST_HEADER_INVALID },
-    { "Response field invalid", HTP_LOG_CODE_RESPONSE_HEADER_INVALID },
-    { "Request header name is not a token", HTP_LOG_CODE_REQUEST_HEADER_INVALID },
-    { "Response header name is not a token", HTP_LOG_CODE_RESPONSE_HEADER_INVALID },
-    /*  { "Host information in request headers required by HTTP/1.1",
-       HTP_LOG_CODE_MISSING_HOST_HEADER}, <- tx flag HTP_FLAGS_HOST_MISSING { "Host
-       information ambiguous", HTP_LOG_CODE_HOST_HEADER_AMBIGUOUS}, <- tx flag
-       HTP_FLAGS_HOST_AMBIGUOUS */
-    { "Invalid request field folding", HTP_LOG_CODE_INVALID_REQUEST_FIELD_FOLDING },
-    { "Invalid response field folding", HTP_LOG_CODE_INVALID_RESPONSE_FIELD_FOLDING },
-    /* line is now: htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "Request server port=%d number
-     * differs from the actual TCP port=%d", port, connp->conn->server_port); luckily, "Request
-     * server port=" is unique */
-    /*    { "Request server port number differs from the actual TCP port",
-       HTP_LOG_CODE_REQUEST_SERVER_PORT_TCP_PORT_MISMATCH}, */
-    { "Request server port=", HTP_LOG_CODE_REQUEST_SERVER_PORT_TCP_PORT_MISMATCH },
-    { "Request line: URI contains non-compliant delimiter", HTP_LOG_CODE_URI_DELIM_NON_COMPLIANT },
-    { "Request line: non-compliant delimiter between Method and URI",
-            HTP_LOG_CODE_METHOD_DELIM_NON_COMPLIANT },
-    { "Request line: leading whitespace", HTP_LOG_CODE_REQUEST_LINE_LEADING_WHITESPACE },
-    { "Too many response content encoding layers", HTP_LOG_CODE_TOO_MANY_ENCODING_LAYERS },
-    { "C-E gzip has abnormal value", HTP_LOG_CODE_ABNORMAL_CE_HEADER },
-    { "C-E deflate has abnormal value", HTP_LOG_CODE_ABNORMAL_CE_HEADER },
-    { "C-E unknown setting", HTP_LOG_CODE_ABNORMAL_CE_HEADER },
-    { "Excessive request header repetitions", HTP_LOG_CODE_REQUEST_HEADER_REPETITION },
-    { "Excessive response header repetitions", HTP_LOG_CODE_RESPONSE_HEADER_REPETITION },
-    { "Transfer-encoding has abnormal chunked value",
-            HTP_LOG_CODE_RESPONSE_ABNORMAL_TRANSFER_ENCODING },
-    { "Chunked transfer-encoding on HTTP/0.9 or HTTP/1.0",
-            HTP_LOG_CODE_RESPONSE_CHUNKED_OLD_PROTO },
-    { "Invalid response line: invalid protocol", HTP_LOG_CODE_RESPONSE_INVALID_PROTOCOL },
-    { "Invalid response line: invalid response status", HTP_LOG_CODE_RESPONSE_INVALID_STATUS },
-    { "Request line incomplete", HTP_LOG_CODE_REQUEST_LINE_INCOMPLETE },
-    { "Unexpected request body", HTP_LOG_CODE_REQUEST_BODY_UNEXPECTED },
-    { "LZMA decompressor: memory limit reached", HTP_LOG_CODE_LZMA_MEMLIMIT_REACHED },
-    { "Ambiguous request C-L value", HTP_LOG_CODE_DUPLICATE_CONTENT_LENGTH_FIELD_IN_REQUEST },
-    { "Ambiguous response C-L value", HTP_LOG_CODE_DUPLICATE_CONTENT_LENGTH_FIELD_IN_RESPONSE },
-    { "Request chunk extension", HTTP_DECODER_EVENT_REQUEST_CHUNK_EXTENSION },
-    { "Request line: missing protocol", HTTP_DECODER_EVENT_REQUEST_LINE_MISSING_PROTOCOL },
-    { "Too many request headers", HTTP_DECODER_EVENT_REQUEST_TOO_MANY_HEADERS },
-    { "Too many response headers", HTTP_DECODER_EVENT_RESPONSE_TOO_MANY_HEADERS },
-};
-
-#define HTP_ERROR_MAX (sizeof(htp_errors) / sizeof(htp_errors[0]))
-#define HTP_WARNING_MAX (sizeof(htp_warnings) / sizeof(htp_warnings[0]))
-
-/**
- *  \internal
- *
- *  \brief Get the warning id for the warning msg.
- *
- *  \param msg warning message
- *
- *  \retval id the id or 0 in case of not found
- */
-static uint8_t HTPHandleWarningGetId(const char *msg)
-{
-    SCLogDebug("received warning \"%s\"", msg);
-    size_t idx;
-    for (idx = 0; idx < HTP_WARNING_MAX; idx++) {
-        if (strncmp(htp_warnings[idx].msg, msg,
-                    strlen(htp_warnings[idx].msg)) == 0)
-        {
-            return htp_warnings[idx].de;
-        }
-    }
-
-    return 0;
-}
-
-/**
- *  \internal
- *
- *  \brief Get the error id for the error msg.
- *
- *  \param msg error message
- *
- *  \retval id the id or 0 in case of not found
- */
-static uint8_t HTPHandleErrorGetId(const char *msg)
-{
-    SCLogDebug("received error \"%s\"", msg);
-
-    size_t idx;
-    for (idx = 0; idx < HTP_ERROR_MAX; idx++) {
-        if (strncmp(htp_errors[idx].msg, msg,
-                    strlen(htp_errors[idx].msg)) == 0)
-        {
-            return htp_errors[idx].de;
-        }
-    }
-
-    return 0;
-}
-
 /**
  *  \internal
  *
@@ -691,50 +587,39 @@ static uint8_t HTPHandleErrorGetId(const char *msg)
  */
 static void HTPHandleError(HtpState *s, const uint8_t dir)
 {
-    if (s == NULL || s->conn == NULL ||
-        s->conn->messages == NULL) {
-        return;
-    }
-
-    size_t size = htp_list_size(s->conn->messages);
-    size_t msg;
-    if(size >= HTP_MAX_MESSAGES) {
-        if (s->htp_messages_offset < HTP_MAX_MESSAGES) {
-            //only once per HtpState
-            HTPSetEvent(s, NULL, dir, HTTP_DECODER_EVENT_TOO_MANY_WARNINGS);
-            s->htp_messages_offset = HTP_MAX_MESSAGES;
-            //too noisy in fuzzing
-            //DEBUG_VALIDATE_BUG_ON("Too many libhtp messages");
-        }
+    if (s == NULL || s->conn == NULL || s->htp_messages_count >= HTP_MAX_MESSAGES) {
         // ignore further messages
         return;
     }
 
-    for (msg = s->htp_messages_offset; msg < size; msg++) {
-        htp_log_t *log = htp_list_get(s->conn->messages, msg);
-        if (log == NULL)
+    htp_log_t *log = htp_conn_next_log(s->conn);
+    while (log != NULL) {
+        char *msg = htp_log_message(log);
+        if (msg == NULL) {
+            htp_log_free(log);
+            log = htp_conn_next_log(s->conn);
             continue;
+        }
 
-        HtpTxUserData *htud = NULL;
-        htp_tx_t *tx = log->tx; // will be NULL in <=0.5.9
-        if (tx != NULL)
-            htud = (HtpTxUserData *) htp_tx_get_user_data(tx);
-
-        SCLogDebug("message %s", log->msg);
+        SCLogDebug("message %s", msg);
 
-        uint8_t id = HTPHandleErrorGetId(log->msg);
-        if (id == 0) {
-            id = HTPHandleWarningGetId(log->msg);
-            if (id == 0)
-                id = HTP_LOG_CODE_UNKNOWN;
+        htp_log_code_t id = htp_log_code(log);
+        if (id != HTP_LOG_CODE_UNKNOWN && id != HTP_LOG_CODE_ERROR) {
+            HTPSetEvent(s, NULL, dir, (uint8_t)id);
         }
-
-        if (id > 0) {
-            HTPSetEvent(s, htud, dir, id);
+        htp_free_cstring(msg);
+        htp_log_free(log);
+        s->htp_messages_count++;
+        if (s->htp_messages_count >= HTP_MAX_MESSAGES) {
+            // only once per HtpState
+            HTPSetEvent(s, NULL, dir, HTTP_DECODER_EVENT_TOO_MANY_WARNINGS);
+            // too noisy in fuzzing
+            // DEBUG_VALIDATE_BUG_ON("Too many libhtp messages");
+            break;
         }
+        log = htp_conn_next_log(s->conn);
     }
-    s->htp_messages_offset = (uint16_t)msg;
-    SCLogDebug("s->htp_messages_offset %u", s->htp_messages_offset);
+    SCLogDebug("s->htp_messages_count %u", s->htp_messages_count);
 }
 
 static inline void HTPErrorCheckTxRequestFlags(HtpState *s, const htp_tx_t *tx)
@@ -745,7 +630,7 @@ static inline void HTPErrorCheckTxRequestFlags(HtpState *s, const htp_tx_t *tx)
     if (htp_tx_flags(tx) & (HTP_FLAGS_REQUEST_INVALID_T_E | HTP_FLAGS_REQUEST_INVALID_C_L |
                                    HTP_FLAGS_HOST_MISSING | HTP_FLAGS_HOST_AMBIGUOUS |
                                    HTP_FLAGS_HOSTU_INVALID | HTP_FLAGS_HOSTH_INVALID)) {
-        HtpTxUserData *htud = (HtpTxUserData *) htp_tx_get_user_data(tx);
+        HtpTxUserData *htud = (HtpTxUserData *)htp_tx_get_user_data(tx);
         if (htud == NULL)
             return;
 
@@ -765,7 +650,7 @@ static inline void HTPErrorCheckTxRequestFlags(HtpState *s, const htp_tx_t *tx)
             HTPSetEvent(s, htud, STREAM_TOSERVER, HTP_LOG_CODE_HEADER_HOST_INVALID);
     }
     if (htp_tx_request_auth_type(tx) == HTP_AUTH_TYPE_UNRECOGNIZED) {
-        HtpTxUserData *htud = (HtpTxUserData *) htp_tx_get_user_data(tx);
+        HtpTxUserData *htud = (HtpTxUserData *)htp_tx_get_user_data(tx);
         if (htud == NULL)
             return;
         HTPSetEvent(s, htud, STREAM_TOSERVER, HTP_LOG_CODE_AUTH_UNRECOGNIZED);
@@ -773,10 +658,10 @@ static inline void HTPErrorCheckTxRequestFlags(HtpState *s, const htp_tx_t *tx)
     if (htp_tx_is_protocol_0_9(tx) && htp_tx_request_method_number(tx) == HTP_METHOD_UNKNOWN &&
             (htp_tx_request_protocol_number(tx) == HTP_PROTOCOL_INVALID ||
                     htp_tx_request_protocol_number(tx) == HTP_PROTOCOL_UNKNOWN)) {
-        HtpTxUserData *htud = (HtpTxUserData *) htp_tx_get_user_data(tx);
+        HtpTxUserData *htud = (HtpTxUserData *)htp_tx_get_user_data(tx);
         if (htud == NULL)
             return;
-        HTPSetEvent(s, htud, STREAM_TOSERVER, HTP_LOG_CODE_REQUEST_BODY_UNEXPECTED);
+        HTPSetEvent(s, htud, STREAM_TOSERVER, HTP_LOG_CODE_REQUEST_LINE_INVALID);
     }
 }
 
@@ -824,7 +709,7 @@ static int Setup(Flow *f, HtpState *hstate)
         goto error;
     }
 
-    hstate->conn = htp_connp_get_connection(hstate->connp);
+    hstate->conn = (htp_conn_t *)htp_connp_connection(hstate->connp);
 
     htp_connp_set_user_data(hstate->connp, (void *)hstate);
     hstate->cfg = htp_cfg_rec;
@@ -875,10 +760,10 @@ static AppLayerResult HTPHandleRequestData(Flow *f, void *htp_state, AppLayerPar
     const uint8_t *input = StreamSliceGetData(&stream_slice);
     uint32_t input_len = StreamSliceGetDataLen(&stream_slice);
 
-    htp_time_t ts = { SCTIME_SECS(f->startts), SCTIME_USECS(f->startts) };
+    struct timeval ts = { SCTIME_SECS(f->startts), SCTIME_USECS(f->startts) };
     /* pass the new data to the htp parser */
     if (input_len > 0) {
-        const int r = htp_connp_req_data(hstate->connp, &ts, input, input_len);
+        const int r = htp_connp_request_data(hstate->connp, &ts, input, input_len);
         switch (r) {
             case HTP_STREAM_STATE_ERROR:
                 ret = -1;
@@ -893,7 +778,7 @@ static AppLayerResult HTPHandleRequestData(Flow *f, void *htp_state, AppLayerPar
     if (AppLayerParserStateIssetFlag(pstate, APP_LAYER_PARSER_EOF_TS) &&
         !(hstate->flags & HTP_FLAG_STATE_CLOSED_TS))
     {
-        htp_connp_req_close(hstate->connp, &ts);
+        htp_connp_request_close(hstate->connp, &ts);
         hstate->flags |= HTP_FLAG_STATE_CLOSED_TS;
         SCLogDebug("stream eof encountered, closing htp handle for ts");
     }
@@ -942,17 +827,17 @@ static AppLayerResult HTPHandleResponseData(Flow *f, void *htp_state, AppLayerPa
     DEBUG_VALIDATE_BUG_ON(hstate->connp == NULL);
     hstate->slice = &stream_slice;
 
-    htp_time_t ts = { SCTIME_SECS(f->startts), SCTIME_USECS(f->startts) };
-    htp_tx_t *tx = NULL;
+    struct timeval ts = { SCTIME_SECS(f->startts), SCTIME_USECS(f->startts) };
+    const htp_tx_t *tx = NULL;
     uint32_t consumed = 0;
     if (input_len > 0) {
-        const int r = htp_connp_res_data(hstate->connp, &ts, input, input_len);
+        const int r = htp_connp_response_data(hstate->connp, &ts, input, input_len);
         switch (r) {
             case HTP_STREAM_STATE_ERROR:
                 ret = -1;
                 break;
             case HTP_STREAM_STATE_TUNNEL:
-                tx = htp_connp_get_out_tx(hstate->connp);
+                tx = htp_connp_get_response_tx(hstate->connp);
                 if (tx != NULL && htp_tx_response_status_number(tx) == 101) {
                     const htp_header_t *h = htp_tx_response_header(tx, "Upgrade");
                     if (h == NULL) {
@@ -962,7 +847,7 @@ static AppLayerResult HTPHandleResponseData(Flow *f, void *htp_state, AppLayerPa
                     if (htp_tx_request_port_number(tx) != -1) {
                         dp = (uint16_t)htp_tx_request_port_number(tx);
                     }
-                    consumed = (uint32_t)htp_connp_res_data_consumed(hstate->connp);
+                    consumed = (uint32_t)htp_connp_response_data_consumed(hstate->connp);
                     if (bstr_cmp_c(htp_header_value(h), "h2c") == 0) {
                         if (AppLayerProtoDetectGetProtoName(ALPROTO_HTTP2) == NULL) {
                             // if HTTP2 is disabled, keep the HTP_STREAM_STATE_TUNNEL mode
@@ -979,7 +864,7 @@ static AppLayerResult HTPHandleResponseData(Flow *f, void *htp_state, AppLayerPa
                             SCReturnStruct(APP_LAYER_INCOMPLETE(consumed, input_len - consumed));
                         }
                         SCReturnStruct(APP_LAYER_OK);
-                    } else if (bstr_cmp_c_nocase(htp_header_value(h), "WebSocket") == 0) {
+                    } else if (bstr_cmp_c_nocase(htp_header_value(h), "WebSocket")) {
                         if (AppLayerProtoDetectGetProtoName(ALPROTO_WEBSOCKET) == NULL) {
                             // if WS is disabled, keep the HTP_STREAM_STATE_TUNNEL mode
                             break;
@@ -1280,16 +1165,16 @@ static int HtpRequestBodyHandlePOSTorPUT(HtpState *hstate, HtpTxUserData *htud,
         size_t filename_len = 0;
 
         /* get the name */
-        if (tx->parsed_uri != NULL && tx->parsed_uri->path != NULL) {
-            filename = (uint8_t *)bstr_ptr(tx->parsed_uri->path);
-            filename_len = bstr_len(tx->parsed_uri->path);
+        if (htp_uri_path(htp_tx_parsed_uri(tx)) != NULL) {
+            filename = (uint8_t *)bstr_ptr(htp_uri_path(htp_tx_parsed_uri(tx)));
+            filename_len = bstr_len(htp_uri_path(htp_tx_parsed_uri(tx)));
         }
 
         if (filename != NULL) {
             if (filename_len > SC_FILENAME_MAX) {
                 // explicitly truncate the file name if too long
                 filename_len = SC_FILENAME_MAX;
-                HTPSetEvent(hstate, htud, STREAM_TOSERVER, HTP_LOG_CODE_REQUEST_LINE_INVALID);
+                HTPSetEvent(hstate, htud, STREAM_TOSERVER, HTTP_DECODER_EVENT_FILE_NAME_TOO_LONG);
             }
             result = HTPFileOpen(hstate, htud, filename, (uint16_t)filename_len, data, data_len,
                     STREAM_TOSERVER);
@@ -1346,16 +1231,15 @@ static int HtpResponseBodyHandle(HtpState *hstate, HtpTxUserData *htud, const ht
         if (h != NULL && htp_header_value_len(h) > 0) {
             /* parse content-disposition */
             (void)HTTPParseContentDispositionHeader((uint8_t *)"filename=", 9,
-                    (uint8_t *)htp_header_value_ptr(h), htp_header_value_len(h), &filename,
-                    &filename_len);
+                    htp_header_value_ptr(h), htp_header_value_len(h), &filename, &filename_len);
         }
 
         /* fall back to name from the uri */
         if (filename == NULL) {
             /* get the name */
-            if (tx->parsed_uri != NULL && tx->parsed_uri->path != NULL) {
-                filename = (uint8_t *)bstr_ptr(tx->parsed_uri->path);
-                filename_len = bstr_len(tx->parsed_uri->path);
+            if (htp_uri_path(htp_tx_parsed_uri(tx)) != NULL) {
+                filename = (uint8_t *)bstr_ptr(htp_uri_path(htp_tx_parsed_uri(tx)));
+                filename_len = bstr_len(htp_uri_path(htp_tx_parsed_uri(tx)));
             }
         }
 
@@ -1365,11 +1249,11 @@ static int HtpResponseBodyHandle(HtpState *hstate, HtpTxUserData *htud, const ht
             if (filename_len > SC_FILENAME_MAX) {
                 // explicitly truncate the file name if too long
                 filename_len = SC_FILENAME_MAX;
-                HTPSetEvent(hstate, htud, STREAM_TOSERVER, HTP_LOG_CODE_REQUEST_LINE_INVALID);
+                HTPSetEvent(hstate, htud, STREAM_TOSERVER, HTTP_DECODER_EVENT_FILE_NAME_TOO_LONG);
             }
             if (h_content_range != NULL) {
                 result = HTPFileOpenWithRange(hstate, htud, filename, (uint16_t)filename_len, data,
-                        data_len, tx, h_content_range->value, htud);
+                        data_len, tx, htp_header_value(h_content_range), htud);
             } else {
                 result = HTPFileOpen(hstate, htud, filename, (uint16_t)filename_len, data, data_len,
                         STREAM_TOCLIENT);
@@ -1411,14 +1295,16 @@ end:
  * \param d pointer to the htp_tx_data_t structure (a chunk from htp lib)
  * \retval int HTP_STATUS_OK if all goes well
  */
-static int HTPCallbackRequestBodyData(htp_tx_data_t *d)
+static int HTPCallbackRequestBodyData(const htp_connp_t *connp, htp_tx_data_t *d)
 {
     SCEnter();
 
+    const htp_tx_t *tx = htp_tx_data_tx(d);
+
     if (!(SC_ATOMIC_GET(htp_config_flags) & HTP_REQUIRE_REQUEST_BODY))
         SCReturnInt(HTP_STATUS_OK);
 
-    if (htp_tx_data_len(d) == 0)
+    if (htp_tx_data_is_empty(d))
         SCReturnInt(HTP_STATUS_OK);
 
 #ifdef PRINT
@@ -1427,7 +1313,7 @@ static int HTPCallbackRequestBodyData(htp_tx_data_t *d)
     printf("HTPBODY END: \n");
 #endif
 
-    HtpState *hstate = htp_connp_get_user_data(d->tx->connp);
+    HtpState *hstate = htp_connp_user_data(connp);
     if (hstate == NULL) {
         SCReturnInt(HTP_STATUS_ERROR);
     }
@@ -1436,7 +1322,7 @@ static int HTPCallbackRequestBodyData(htp_tx_data_t *d)
                "%" PRIu32 "",
             hstate, d, htp_tx_data_data(d), (uint32_t)htp_tx_data_len(d));
 
-    HtpTxUserData *tx_ud = (HtpTxUserData *) htp_tx_get_user_data(d->tx);
+    HtpTxUserData *tx_ud = (HtpTxUserData *)htp_tx_get_user_data(tx);
     if (tx_ud == NULL) {
         SCReturnInt(HTP_STATUS_OK);
     }
@@ -1446,16 +1332,16 @@ static int HTPCallbackRequestBodyData(htp_tx_data_t *d)
     if (!tx_ud->response_body_init) {
         tx_ud->response_body_init = 1;
 
-        if (d->htp_tx_request_method_number(tx) == HTP_METHOD_POST) {
+        if (htp_tx_request_method_number(tx) == HTP_METHOD_POST) {
             SCLogDebug("POST");
-            int r = HtpRequestBodySetupMultipart(htp_tx_data_tx(d), tx_ud);
+            int r = HtpRequestBodySetupMultipart(tx, tx_ud);
             if (r == 1) {
                 tx_ud->request_body_type = HTP_BODY_REQUEST_MULTIPART;
             } else if (r == 0) {
                 tx_ud->request_body_type = HTP_BODY_REQUEST_POST;
                 SCLogDebug("not multipart");
             }
-        } else if (d->htp_tx_request_method_number(tx) == HTP_METHOD_PUT) {
+        } else if (htp_tx_request_method_number(tx) == HTP_METHOD_PUT) {
             tx_ud->request_body_type = HTP_BODY_REQUEST_PUT;
         }
     }
@@ -1501,7 +1387,7 @@ static int HTPCallbackRequestBodyData(htp_tx_data_t *d)
         } else if (tx_ud->request_body_type == HTP_BODY_REQUEST_POST ||
                    tx_ud->request_body_type == HTP_BODY_REQUEST_PUT) {
             HtpRequestBodyHandlePOSTorPUT(
-                    hstate, tx_ud, htp_tx_data_tx(d), (uint8_t *)htp_tx_data_data(d), len);
+                    hstate, tx_ud, htp_tx_data_tx(d), htp_tx_data_data(d), len);
         }
 
     } else {
@@ -1531,7 +1417,7 @@ end:
                     (uint64_t)htp_conn_request_data_counter(hstate->conn) -
                                     hstate->last_request_data_stamp <
                             (uint64_t)UINT_MAX) {
-                const uint32_t data_size =
+                uint32_t data_size =
                         (uint32_t)((uint64_t)htp_conn_request_data_counter(hstate->conn) -
                                    hstate->last_request_data_stamp);
                 const uint32_t depth = MIN(data_size, hstate->cfg->request.inspect_min_size);
@@ -1553,17 +1439,19 @@ end:
  * \param d pointer to the htp_tx_data_t structure (a chunk from htp lib)
  * \retval int HTP_STATUS_OK if all goes well
  */
-static int HTPCallbackResponseBodyData(htp_tx_data_t *d)
+static int HTPCallbackResponseBodyData(const htp_connp_t *connp, htp_tx_data_t *d)
 {
     SCEnter();
 
+    const htp_tx_t *tx = htp_tx_data_tx(d);
+
     if (!(SC_ATOMIC_GET(htp_config_flags) & HTP_REQUIRE_RESPONSE_BODY))
         SCReturnInt(HTP_STATUS_OK);
 
-    if (htp_tx_data_len(d) == 0)
+    if (htp_tx_data_is_empty(d))
         SCReturnInt(HTP_STATUS_OK);
 
-    HtpState *hstate = htp_connp_get_user_data(d->tx->connp);
+    HtpState *hstate = htp_connp_user_data(connp);
     if (hstate == NULL) {
         SCReturnInt(HTP_STATUS_ERROR);
     }
@@ -1572,7 +1460,7 @@ static int HTPCallbackResponseBodyData(htp_tx_data_t *d)
                "%" PRIu32 "",
             hstate, d, htp_tx_data_data(d), (uint32_t)htp_tx_data_len(d));
 
-    HtpTxUserData *tx_ud = (HtpTxUserData *) htp_tx_get_user_data(d->tx);
+    HtpTxUserData *tx_ud = (HtpTxUserData *)htp_tx_get_user_data(tx);
     if (tx_ud == NULL) {
         SCReturnInt(HTP_STATUS_OK);
     }
@@ -1598,8 +1486,7 @@ static int HTPCallbackResponseBodyData(htp_tx_data_t *d)
 
         HtpBodyAppendChunk(&tx_ud->response_body, htp_tx_data_data(d), len);
 
-        HtpResponseBodyHandle(
-                hstate, tx_ud, htp_tx_data_tx(d), (uint8_t *)htp_tx_data_data(d), len);
+        HtpResponseBodyHandle(hstate, tx_ud, htp_tx_data_tx(d), htp_tx_data_data(d), len);
     } else {
         if (tx_ud->tcflags & HTP_FILENAME_SET) {
             SCLogDebug("closing file that was being stored");
@@ -1625,7 +1512,7 @@ static int HTPCallbackResponseBodyData(htp_tx_data_t *d)
                     (uint64_t)htp_conn_response_data_counter(hstate->conn) -
                                     hstate->last_response_data_stamp <
                             (uint64_t)UINT_MAX) {
-                const uint32_t data_size =
+                uint32_t data_size =
                         (uint32_t)((uint64_t)htp_conn_response_data_counter(hstate->conn) -
                                    hstate->last_response_data_stamp);
                 const uint32_t depth = MIN(data_size, hstate->cfg->response.inspect_min_size);
@@ -1682,7 +1569,7 @@ void HTPFreeConfig(void)
     SCReturn;
 }
 
-static int HTPCallbackRequestHasTrailer(htp_tx_t *tx)
+static int HTPCallbackRequestHasTrailer(const htp_connp_t *connp, htp_tx_t *tx)
 {
     HtpTxUserData *htud = (HtpTxUserData *)htp_tx_get_user_data(tx);
     if (htud != NULL) {
@@ -1692,7 +1579,7 @@ static int HTPCallbackRequestHasTrailer(htp_tx_t *tx)
     return HTP_STATUS_OK;
 }
 
-static int HTPCallbackResponseHasTrailer(htp_tx_t *tx)
+static int HTPCallbackResponseHasTrailer(const htp_connp_t *connp, htp_tx_t *tx)
 {
     HtpTxUserData *htud = (HtpTxUserData *)htp_tx_get_user_data(tx);
     if (htud != NULL) {
@@ -1706,17 +1593,16 @@ static int HTPCallbackResponseHasTrailer(htp_tx_t *tx)
  * \brief called at start of request
  * Set min inspect size.
  */
-static int HTPCallbackRequestStart(htp_tx_t *tx)
+static int HTPCallbackRequestStart(const htp_connp_t *connp, htp_tx_t *tx)
 {
-    HtpState *hstate = htp_connp_get_user_data(tx->connp);
+    HtpState *hstate = htp_connp_user_data(connp);
     if (hstate == NULL) {
         SCReturnInt(HTP_STATUS_ERROR);
     }
 
-    uint64_t consumed = hstate->slice->offset + htp_connp_req_data_consumed(hstate->connp);
+    uint64_t consumed = hstate->slice->offset + htp_connp_request_data_consumed(hstate->connp);
     SCLogDebug("HTTP request start: data offset %" PRIu64 ", in_data_counter %" PRIu64, consumed,
             (uint64_t)htp_conn_request_data_counter(hstate->conn));
-
     /* app-layer-frame-documentation tag start: frame registration http request */
     Frame *frame = AppLayerFrameNewByAbsoluteOffset(
             hstate->f, hstate->slice, consumed, -1, 0, HTTP_FRAME_REQUEST);
@@ -1731,7 +1617,7 @@ static int HTPCallbackRequestStart(htp_tx_t *tx)
         StreamTcpReassemblySetMinInspectDepth(hstate->f->protoctx, STREAM_TOSERVER,
                 hstate->cfg->request.inspect_min_size);
 
-    HtpTxUserData *tx_ud = (HtpTxUserData *) htp_tx_get_user_data(tx);
+    HtpTxUserData *tx_ud = (HtpTxUserData *)htp_tx_get_user_data(tx);
     if (tx_ud == NULL) {
         tx_ud = HTPCalloc(1, sizeof(HtpTxUserData));
         if (unlikely(tx_ud == NULL)) {
@@ -1749,14 +1635,14 @@ static int HTPCallbackRequestStart(htp_tx_t *tx)
  * \brief called at start of response
  * Set min inspect size.
  */
-static int HTPCallbackResponseStart(htp_tx_t *tx)
+static int HTPCallbackResponseStart(const htp_connp_t *connp, htp_tx_t *tx)
 {
-    HtpState *hstate = htp_connp_get_user_data(tx->connp);
+    HtpState *hstate = htp_connp_user_data(connp);
     if (hstate == NULL) {
         SCReturnInt(HTP_STATUS_ERROR);
     }
 
-    uint64_t consumed = hstate->slice->offset + htp_connp_res_data_consumed(hstate->connp);
+    uint64_t consumed = hstate->slice->offset + htp_connp_response_data_consumed(hstate->connp);
     SCLogDebug("HTTP response start: data offset %" PRIu64 ", out_data_counter %" PRIu64, consumed,
             (uint64_t)htp_conn_response_data_counter(hstate->conn));
 
@@ -1772,7 +1658,7 @@ static int HTPCallbackResponseStart(htp_tx_t *tx)
         StreamTcpReassemblySetMinInspectDepth(hstate->f->protoctx, STREAM_TOCLIENT,
                 hstate->cfg->response.inspect_min_size);
 
-    HtpTxUserData *tx_ud = (HtpTxUserData *) htp_tx_get_user_data(tx);
+    HtpTxUserData *tx_ud = (HtpTxUserData *)htp_tx_get_user_data(tx);
     if (tx_ud == NULL) {
         tx_ud = HTPCalloc(1, sizeof(HtpTxUserData));
         if (unlikely(tx_ud == NULL)) {
@@ -1789,11 +1675,11 @@ static int HTPCallbackResponseStart(htp_tx_t *tx)
 
 /**
  *  \brief  callback for request to store the recent incoming request
-            into the recent_in_tx for the given htp state
+            into the recent_request_tx for the given htp state
  *  \param  connp   pointer to the current connection parser which has the htp
  *                  state in it as user data
  */
-static int HTPCallbackRequestComplete(htp_tx_t *tx)
+static int HTPCallbackRequestComplete(const htp_connp_t *connp, htp_tx_t *tx)
 {
     SCEnter();
 
@@ -1801,13 +1687,13 @@ static int HTPCallbackRequestComplete(htp_tx_t *tx)
         SCReturnInt(HTP_STATUS_ERROR);
     }
 
-    HtpState *hstate = htp_connp_get_user_data(tx->connp);
+    HtpState *hstate = htp_connp_user_data(connp);
     if (hstate == NULL) {
         SCReturnInt(HTP_STATUS_ERROR);
     }
 
     const uint64_t abs_right_edge =
-            hstate->slice->offset + htp_connp_req_data_consumed(hstate->connp);
+            hstate->slice->offset + htp_connp_request_data_consumed(hstate->connp);
 
     /* app-layer-frame-documentation tag start: updating frame->len */
     if (hstate->request_frame_id > 0) {
@@ -1855,15 +1741,15 @@ static int HTPCallbackRequestComplete(htp_tx_t *tx)
 
 /**
  *  \brief  callback for response to remove the recent received requests
-            from the recent_in_tx for the given htp state
+            from the recent_request_tx for the given htp state
  *  \param  connp   pointer to the current connection parser which has the htp
  *                  state in it as user data
  */
-static int HTPCallbackResponseComplete(htp_tx_t *tx)
+static int HTPCallbackResponseComplete(const htp_connp_t *connp, htp_tx_t *tx)
 {
     SCEnter();
 
-    HtpState *hstate = htp_connp_get_user_data(tx->connp);
+    HtpState *hstate = htp_connp_user_data(connp);
     if (hstate == NULL) {
         SCReturnInt(HTP_STATUS_ERROR);
     }
@@ -1872,7 +1758,7 @@ static int HTPCallbackResponseComplete(htp_tx_t *tx)
     hstate->transaction_cnt++;
 
     const uint64_t abs_right_edge =
-            hstate->slice->offset + htp_connp_res_data_consumed(hstate->connp);
+            hstate->slice->offset + htp_connp_response_data_consumed(hstate->connp);
 
     if (hstate->response_frame_id > 0) {
         Frame *frame = AppLayerFrameGetById(hstate->f, 1, hstate->response_frame_id);
@@ -1888,7 +1774,7 @@ static int HTPCallbackResponseComplete(htp_tx_t *tx)
         hstate->response_frame_id = 0;
     }
 
-    HtpTxUserData *htud = (HtpTxUserData *) htp_tx_get_user_data(tx);
+    HtpTxUserData *htud = (HtpTxUserData *)htp_tx_get_user_data(tx);
     if (htud != NULL) {
         htud->tx_data.updated_tc = true;
         if (htud->tcflags & HTP_FILENAME_SET) {
@@ -1917,8 +1803,6 @@ static int HTPCallbackResponseComplete(htp_tx_t *tx)
                 HTPSetEvent(
                         hstate, htud, STREAM_TOCLIENT, HTTP_DECODER_EVENT_FAILED_PROTOCOL_CHANGE);
             }
-            htp_tx_request_progress(tx) = HTP_REQUEST_PROGRESS_COMPLETE;
-            htp_tx_response_progress(tx) = HTP_RESPONSE_PROGRESS_COMPLETE;
         }
     }
 
@@ -1926,25 +1810,15 @@ static int HTPCallbackResponseComplete(htp_tx_t *tx)
     SCReturnInt(HTP_STATUS_OK);
 }
 
-static int HTPCallbackRequestLine(htp_tx_t *tx)
+static int HTPCallbackRequestLine(const htp_connp_t *connp, htp_tx_t *tx)
 {
     HtpTxUserData *tx_ud;
-    bstr *request_uri_normalized;
-    HtpState *hstate = htp_connp_get_user_data(tx->connp);
-    const HTPCfgRec *cfg = hstate->cfg;
-
-    request_uri_normalized = SCHTPGenerateNormalizedUri(tx, tx->parsed_uri, cfg->uri_include_all);
-    if (request_uri_normalized == NULL)
-        return HTP_STATUS_OK;
+    HtpState *hstate = htp_connp_user_data(connp);
 
     tx_ud = htp_tx_get_user_data(tx);
     if (unlikely(tx_ud == NULL)) {
-        bstr_free(request_uri_normalized);
         return HTP_STATUS_OK;
     }
-    if (unlikely(tx_ud->request_uri_normalized != NULL))
-        bstr_free(tx_ud->request_uri_normalized);
-    tx_ud->request_uri_normalized = request_uri_normalized;
 
     if (htp_tx_flags(tx)) {
         HTPErrorCheckTxRequestFlags(hstate, tx);
@@ -1952,51 +1826,14 @@ static int HTPCallbackRequestLine(htp_tx_t *tx)
     return HTP_STATUS_OK;
 }
 
-static int HTPCallbackDoubleDecodeUriPart(htp_tx_t *tx, bstr *part)
-{
-    if (part == NULL)
-        return HTP_STATUS_OK;
-
-    uint64_t flags = 0;
-    size_t prevlen = bstr_len(part);
-    htp_status_t res = htp_urldecode_inplace(tx->cfg, HTP_DECODER_URLENCODED, part, &flags);
-    // shorter string means that uri was encoded
-    if (res == HTP_STATUS_OK && prevlen > bstr_len(part)) {
-        HtpTxUserData *htud = (HtpTxUserData *) htp_tx_get_user_data(tx);
-        if (htud == NULL)
-            return HTP_STATUS_OK;
-        HtpState *s = htp_connp_get_user_data(tx->connp);
-        if (s == NULL)
-            return HTP_STATUS_OK;
-        HTPSetEvent(s, htud, STREAM_TOSERVER, HTP_LOG_CODE_DOUBLE_ENCODED_URI);
-    }
-
-    return HTP_STATUS_OK;
-}
-
-static int HTPCallbackDoubleDecodeQuery(htp_tx_t *tx)
-{
-    if (tx->parsed_uri == NULL)
-        return HTP_STATUS_OK;
-
-    return HTPCallbackDoubleDecodeUriPart(tx, tx->parsed_uri->query);
-}
-
-static int HTPCallbackDoubleDecodePath(htp_tx_t *tx)
-{
-    if (tx->parsed_uri == NULL)
-        return HTP_STATUS_OK;
-
-    return HTPCallbackDoubleDecodeUriPart(tx, tx->parsed_uri->path);
-}
-
-static int HTPCallbackRequestHeaderData(htp_tx_data_t *tx_data)
+static int HTPCallbackRequestHeaderData(const htp_connp_t *connp, htp_tx_data_t *tx_data)
 {
     void *ptmp;
-    if (htp_tx_data_len(tx_data) == 0 || htp_tx_data_tx(tx_data) == NULL)
+    const htp_tx_t *tx = htp_tx_data_tx(tx_data);
+    if (htp_tx_data_is_empty(tx_data) || tx == NULL)
         return HTP_STATUS_OK;
 
-    HtpTxUserData *tx_ud = htp_tx_get_user_data(htp_tx_data_tx(tx_data));
+    HtpTxUserData *tx_ud = htp_tx_get_user_data(tx);
     if (tx_ud == NULL) {
         return HTP_STATUS_OK;
     }
@@ -2012,20 +1849,21 @@ static int HTPCallbackRequestHeaderData(htp_tx_data_t *tx_data)
             htp_tx_data_len(tx_data));
     tx_ud->request_headers_raw_len += htp_tx_data_len(tx_data);
 
-    if (htp_tx_data_tx(tx_data) && htp_tx_data_tx(tx_data)->flags) {
-        HtpState *hstate = htp_connp_get_user_data(htp_tx_data_tx(tx_data)->connp);
-        HTPErrorCheckTxRequestFlags(hstate, htp_tx_data_tx(tx_data));
+    if (tx && htp_tx_flags(tx)) {
+        HtpState *hstate = htp_connp_user_data(connp);
+        HTPErrorCheckTxRequestFlags(hstate, tx);
     }
     return HTP_STATUS_OK;
 }
 
-static int HTPCallbackResponseHeaderData(htp_tx_data_t *tx_data)
+static int HTPCallbackResponseHeaderData(const htp_connp_t *connp, htp_tx_data_t *tx_data)
 {
     void *ptmp;
-    if (htp_tx_data_len(tx_data) == 0 || htp_tx_data_tx(tx_data) == NULL)
+    const htp_tx_t *tx = htp_tx_data_tx(tx_data);
+    if (htp_tx_data_is_empty(tx_data) || tx == NULL)
         return HTP_STATUS_OK;
 
-    HtpTxUserData *tx_ud = htp_tx_get_user_data(htp_tx_data_tx(tx_data));
+    HtpTxUserData *tx_ud = htp_tx_get_user_data(tx);
     if (tx_ud == NULL) {
         return HTP_STATUS_OK;
     }
@@ -2049,7 +1887,7 @@ static int HTPCallbackResponseHeaderData(htp_tx_data_t *tx_data)
  */
 static void HTPConfigSetDefaultsPhase1(HTPCfgRec *cfg_prec)
 {
-    cfg_prec->uri_include_all = false;
+    htp_config_set_normalized_uri_include_all(cfg_prec->cfg, false);
     cfg_prec->request.body_limit = HTP_CONFIG_DEFAULT_REQUEST_BODY_LIMIT;
     cfg_prec->response.body_limit = HTP_CONFIG_DEFAULT_RESPONSE_BODY_LIMIT;
     cfg_prec->request.inspect_min_size = HTP_CONFIG_DEFAULT_REQUEST_INSPECT_MIN_SIZE;
@@ -2082,45 +1920,21 @@ static void HTPConfigSetDefaultsPhase1(HTPCfgRec *cfg_prec)
     htp_config_register_response_complete(cfg_prec->cfg, HTPCallbackResponseComplete);
 
     htp_config_set_parse_request_cookies(cfg_prec->cfg, 0);
-#ifdef HAVE_HTP_CONFIG_SET_ALLOW_SPACE_URI
     htp_config_set_allow_space_uri(cfg_prec->cfg, 1);
-#endif
 
     /* don't convert + to space by default */
-    htp_config_set_plusspace_decode(cfg_prec->cfg, HTP_DECODER_URLENCODED, 0);
+    htp_config_set_plusspace_decode(cfg_prec->cfg, 0);
     // enables request decompression
     htp_config_set_request_decompression(cfg_prec->cfg, 1);
-#ifdef HAVE_HTP_CONFIG_SET_LZMA_LAYERS
-    // disable by default
     htp_config_set_lzma_layers(cfg_prec->cfg, HTP_CONFIG_DEFAULT_LZMA_LAYERS);
-#endif
-#ifdef HAVE_HTP_CONFIG_SET_LZMA_MEMLIMIT
-    htp_config_set_lzma_memlimit(cfg_prec->cfg,
-            HTP_CONFIG_DEFAULT_LZMA_MEMLIMIT);
-#endif
-#ifdef HAVE_HTP_CONFIG_SET_COMPRESSION_BOMB_LIMIT
-    htp_config_set_compression_bomb_limit(cfg_prec->cfg,
-                                          HTP_CONFIG_DEFAULT_COMPRESSION_BOMB_LIMIT);
-#endif
-#ifdef HAVE_HTP_CONFIG_SET_COMPRESSION_TIME_LIMIT
+    htp_config_set_lzma_memlimit(cfg_prec->cfg, HTP_CONFIG_DEFAULT_LZMA_MEMLIMIT);
+    htp_config_set_compression_bomb_limit(cfg_prec->cfg, HTP_CONFIG_DEFAULT_COMPRESSION_BOMB_LIMIT);
     htp_config_set_compression_time_limit(cfg_prec->cfg, HTP_CONFIG_DEFAULT_COMPRESSION_TIME_LIMIT);
-#endif
-#ifdef HAVE_HTP_CONFIG_SET_MAX_TX
 #define HTP_CONFIG_DEFAULT_MAX_TX_LIMIT 512
     htp_config_set_max_tx(cfg_prec->cfg, HTP_CONFIG_DEFAULT_MAX_TX_LIMIT);
-#endif
-#ifdef HAVE_HTP_CONFIG_SET_HEADERS_LIMIT
 #define HTP_CONFIG_DEFAULT_HEADERS_LIMIT 1024
     htp_config_set_number_headers_limit(cfg_prec->cfg, HTP_CONFIG_DEFAULT_HEADERS_LIMIT);
-#endif
-    /* libhtp <= 0.5.9 doesn't use soft limit, but it's impossible to set
-     * only the hard limit. So we set both here to the (current) htp defaults.
-     * The reason we do this is that if the user sets the hard limit in the
-     * config, we have to set the soft limit as well. If libhtp starts using
-     * the soft limit in the future, we at least make sure we control what
-     * it's value is. */
-    htp_config_set_field_limits(cfg_prec->cfg, (size_t)HTP_CONFIG_DEFAULT_FIELD_LIMIT_SOFT,
-            (size_t)HTP_CONFIG_DEFAULT_FIELD_LIMIT_HARD);
+    htp_config_set_field_limit(cfg_prec->cfg, (size_t)HTP_CONFIG_DEFAULT_FIELD_LIMIT);
 }
 
 /* hack: htp random range code expects random values in range of 0-RAND_MAX,
@@ -2232,7 +2046,7 @@ static void HTPConfigParseParameters(HTPCfgRec *cfg_prec, SCConfNode *s, struct
                 /* The IDS personality by default converts the path (and due to
                  * our query string callback also the query string) to lowercase.
                  * Signatures do not expect this, so override it. */
-                htp_config_set_convert_lowercase(cfg_prec->cfg, HTP_DECODER_URL_PATH, 0);
+                htp_config_set_convert_lowercase(cfg_prec->cfg, 0);
             } else {
                 SCLogWarning("LIBHTP Unknown personality "
                              "\"%s\", ignoring",
@@ -2274,17 +2088,9 @@ static void HTPConfigParseParameters(HTPCfgRec *cfg_prec, SCConfNode *s, struct
             }
 
         } else if (strcasecmp("double-decode-query", p->name) == 0) {
-            if (SCConfValIsTrue(p->val)) {
-                htp_config_register_request_line(cfg_prec->cfg,
-                                                 HTPCallbackDoubleDecodeQuery);
-            }
-
+            htp_config_set_double_decode_normalized_query(cfg_prec->cfg, SCConfValIsTrue(p->val));
         } else if (strcasecmp("double-decode-path", p->name) == 0) {
-            if (SCConfValIsTrue(p->val)) {
-                htp_config_register_request_line(cfg_prec->cfg,
-                                                 HTPCallbackDoubleDecodePath);
-            }
-
+            htp_config_set_double_decode_normalized_path(cfg_prec->cfg, SCConfValIsTrue(p->val));
         } else if (strcasecmp("response-body-minimal-inspect-size", p->name) == 0) {
             if (ParseSizeStringU32(p->val, &cfg_prec->response.inspect_min_size) < 0) {
                 SCLogError("Error parsing response-body-minimal-inspect-size "
@@ -2309,69 +2115,49 @@ static void HTPConfigParseParameters(HTPCfgRec *cfg_prec, SCConfNode *s, struct
                         p->val);
                 exit(EXIT_FAILURE);
             }
-#ifdef HAVE_HTP_CONFIG_SET_RESPONSE_DECOMPRESSION_LAYER_LIMIT
-            htp_config_set_response_decompression_layer_limit(cfg_prec->cfg, value);
-#else
-            SCLogWarning("can't set response-body-decompress-layer-limit "
-                         "to %u, libhtp version too old",
-                    value);
-#endif
+            htp_config_set_decompression_layer_limit(cfg_prec->cfg, value);
         } else if (strcasecmp("path-convert-backslash-separators", p->name) == 0) {
-            htp_config_set_backslash_convert_slashes(
-                    cfg_prec->cfg, HTP_DECODER_URL_PATH, SCConfValIsTrue(p->val));
+            htp_config_set_backslash_convert_slashes(cfg_prec->cfg, SCConfValIsTrue(p->val));
         } else if (strcasecmp("path-bestfit-replacement-char", p->name) == 0) {
             if (strlen(p->val) == 1) {
-                htp_config_set_bestfit_replacement_byte(cfg_prec->cfg,
-                                                        HTP_DECODER_URL_PATH,
-                                                        p->val[0]);
+                htp_config_set_bestfit_replacement_byte(cfg_prec->cfg, p->val[0]);
             } else {
                 SCLogError("Invalid entry "
                            "for libhtp param path-bestfit-replacement-char");
             }
         } else if (strcasecmp("path-convert-lowercase", p->name) == 0) {
-            htp_config_set_convert_lowercase(
-                    cfg_prec->cfg, HTP_DECODER_URL_PATH, SCConfValIsTrue(p->val));
+            htp_config_set_convert_lowercase(cfg_prec->cfg, SCConfValIsTrue(p->val));
         } else if (strcasecmp("path-nul-encoded-terminates", p->name) == 0) {
-            htp_config_set_nul_encoded_terminates(
-                    cfg_prec->cfg, HTP_DECODER_URL_PATH, SCConfValIsTrue(p->val));
+            htp_config_set_nul_encoded_terminates(cfg_prec->cfg, SCConfValIsTrue(p->val));
         } else if (strcasecmp("path-nul-raw-terminates", p->name) == 0) {
-            htp_config_set_nul_raw_terminates(
-                    cfg_prec->cfg, HTP_DECODER_URL_PATH, SCConfValIsTrue(p->val));
+            htp_config_set_nul_raw_terminates(cfg_prec->cfg, SCConfValIsTrue(p->val));
         } else if (strcasecmp("path-separators-compress", p->name) == 0) {
-            htp_config_set_path_separators_compress(
-                    cfg_prec->cfg, HTP_DECODER_URL_PATH, SCConfValIsTrue(p->val));
+            htp_config_set_path_separators_compress(cfg_prec->cfg, SCConfValIsTrue(p->val));
         } else if (strcasecmp("path-separators-decode", p->name) == 0) {
-            htp_config_set_path_separators_decode(
-                    cfg_prec->cfg, HTP_DECODER_URL_PATH, SCConfValIsTrue(p->val));
+            htp_config_set_path_separators_decode(cfg_prec->cfg, SCConfValIsTrue(p->val));
         } else if (strcasecmp("path-u-encoding-decode", p->name) == 0) {
-            htp_config_set_u_encoding_decode(
-                    cfg_prec->cfg, HTP_DECODER_URL_PATH, SCConfValIsTrue(p->val));
+            htp_config_set_u_encoding_decode(cfg_prec->cfg, SCConfValIsTrue(p->val));
         } else if (strcasecmp("path-url-encoding-invalid-handling", p->name) == 0) {
             enum htp_url_encoding_handling_t handling;
             if (strcasecmp(p->val, "preserve_percent") == 0) {
-                handling = HTP_URL_DECODE_PRESERVE_PERCENT;
+                handling = HTP_URL_ENCODING_HANDLING_PRESERVE_PERCENT;
             } else if (strcasecmp(p->val, "remove_percent") == 0) {
-                handling = HTP_URL_DECODE_REMOVE_PERCENT;
+                handling = HTP_URL_ENCODING_HANDLING_REMOVE_PERCENT;
             } else if (strcasecmp(p->val, "decode_invalid") == 0) {
-                handling = HTP_URL_DECODE_PROCESS_INVALID;
+                handling = HTP_URL_ENCODING_HANDLING_PROCESS_INVALID;
             } else {
                 SCLogError("Invalid entry "
                            "for libhtp param path-url-encoding-invalid-handling");
                 return;
             }
-            htp_config_set_url_encoding_invalid_handling(cfg_prec->cfg,
-                                                         HTP_DECODER_URL_PATH,
-                                                         handling);
+            htp_config_set_url_encoding_invalid_handling(cfg_prec->cfg, handling);
         } else if (strcasecmp("path-utf8-convert-bestfit", p->name) == 0) {
-            htp_config_set_utf8_convert_bestfit(
-                    cfg_prec->cfg, HTP_DECODER_URL_PATH, SCConfValIsTrue(p->val));
+            htp_config_set_utf8_convert_bestfit(cfg_prec->cfg, SCConfValIsTrue(p->val));
         } else if (strcasecmp("uri-include-all", p->name) == 0) {
-            cfg_prec->uri_include_all = (1 == SCConfValIsTrue(p->val));
-            SCLogDebug("uri-include-all %s",
-                    cfg_prec->uri_include_all ? "enabled" : "disabled");
+            htp_config_set_normalized_uri_include_all(cfg_prec->cfg, SCConfValIsTrue(p->val));
+            SCLogDebug("uri-include-all %s", SCConfValIsTrue(p->val) ? "enabled" : "disabled");
         } else if (strcasecmp("query-plusspace-decode", p->name) == 0) {
-            htp_config_set_plusspace_decode(
-                    cfg_prec->cfg, HTP_DECODER_URLENCODED, SCConfValIsTrue(p->val));
+            htp_config_set_plusspace_decode(cfg_prec->cfg, SCConfValIsTrue(p->val));
         } else if (strcasecmp("meta-field-limit", p->name) == 0) {
             uint32_t limit = 0;
             if (ParseSizeStringU32(p->val, &limit) < 0) {
@@ -2385,10 +2171,7 @@ static void HTPConfigParseParameters(HTPCfgRec *cfg_prec, SCConfNode *s, struct
                            "from conf file cannot be 0.  Killing engine");
             }
             /* set default soft-limit with our new hard limit */
-            htp_config_set_field_limits(cfg_prec->cfg,
-                    (size_t)HTP_CONFIG_DEFAULT_FIELD_LIMIT_SOFT,
-                    (size_t)limit);
-#ifdef HAVE_HTP_CONFIG_SET_LZMA_MEMLIMIT
+            htp_config_set_field_limit(cfg_prec->cfg, (size_t)limit);
         } else if (strcasecmp("lzma-memlimit", p->name) == 0) {
             uint32_t limit = 0;
             if (ParseSizeStringU32(p->val, &limit) < 0) {
@@ -2403,8 +2186,6 @@ static void HTPConfigParseParameters(HTPCfgRec *cfg_prec, SCConfNode *s, struct
             /* set default soft-limit with our new hard limit */
             SCLogConfig("Setting HTTP LZMA memory limit to %"PRIu32" bytes", limit);
             htp_config_set_lzma_memlimit(cfg_prec->cfg, (size_t)limit);
-#endif
-#ifdef HAVE_HTP_CONFIG_SET_LZMA_LAYERS
         } else if (strcasecmp("lzma-enabled", p->name) == 0) {
             if (SCConfValIsTrue(p->val)) {
                 htp_config_set_lzma_layers(cfg_prec->cfg, 1);
@@ -2418,8 +2199,6 @@ static void HTPConfigParseParameters(HTPCfgRec *cfg_prec, SCConfNode *s, struct
                 SCLogConfig("Setting HTTP LZMA decompression layers to %" PRIu32 "", (int)limit);
                 htp_config_set_lzma_layers(cfg_prec->cfg, limit);
             }
-#endif
-#ifdef HAVE_HTP_CONFIG_SET_COMPRESSION_BOMB_LIMIT
         } else if (strcasecmp("compression-bomb-limit", p->name) == 0) {
             uint32_t limit = 0;
             if (ParseSizeStringU32(p->val, &limit) < 0) {
@@ -2434,8 +2213,6 @@ static void HTPConfigParseParameters(HTPCfgRec *cfg_prec, SCConfNode *s, struct
             /* set default soft-limit with our new hard limit */
             SCLogConfig("Setting HTTP compression bomb limit to %"PRIu32" bytes", limit);
             htp_config_set_compression_bomb_limit(cfg_prec->cfg, (size_t)limit);
-#endif
-#ifdef HAVE_HTP_CONFIG_SET_COMPRESSION_TIME_LIMIT
         } else if (strcasecmp("decompression-time-limit", p->name) == 0) {
             uint32_t limit = 0;
             // between 1 usec and 1 second
@@ -2445,9 +2222,7 @@ static void HTPConfigParseParameters(HTPCfgRec *cfg_prec, SCConfNode *s, struct
                         p->val);
             }
             SCLogConfig("Setting HTTP decompression time limit to %" PRIu32 " usec", limit);
-            htp_config_set_compression_time_limit(cfg_prec->cfg, (size_t)limit);
-#endif
-#ifdef HAVE_HTP_CONFIG_SET_MAX_TX
+            htp_config_set_compression_time_limit(cfg_prec->cfg, limit);
         } else if (strcasecmp("max-tx", p->name) == 0) {
             uint32_t limit = 0;
             if (ParseSizeStringU32(p->val, &limit) < 0) {
@@ -2458,8 +2233,6 @@ static void HTPConfigParseParameters(HTPCfgRec *cfg_prec, SCConfNode *s, struct
             /* set default soft-limit with our new hard limit */
             SCLogConfig("Setting HTTP max-tx limit to %" PRIu32 " bytes", limit);
             htp_config_set_max_tx(cfg_prec->cfg, limit);
-#endif
-#ifdef HAVE_HTP_CONFIG_SET_HEADERS_LIMIT
         } else if (strcasecmp("headers-limit", p->name) == 0) {
             uint32_t limit = 0;
             if (ParseSizeStringU32(p->val, &limit) < 0) {
@@ -2469,7 +2242,6 @@ static void HTPConfigParseParameters(HTPCfgRec *cfg_prec, SCConfNode *s, struct
             }
             SCLogConfig("Setting HTTP headers limit to %" PRIu32, limit);
             htp_config_set_number_headers_limit(cfg_prec->cfg, limit);
-#endif
         } else if (strcasecmp("randomize-inspection-sizes", p->name) == 0) {
             if (!g_disable_randomness) {
                 cfg_prec->randomize = SCConfValIsTrue(p->val);
@@ -2659,21 +2431,21 @@ static AppLayerGetFileState HTPGetTxFiles(void *txv, uint8_t direction)
 static int HTPStateGetAlstateProgress(void *tx, uint8_t direction)
 {
     if (direction & STREAM_TOSERVER)
-        return ((htp_tx_t *)tx)->request_progress;
+        return htp_tx_request_progress((htp_tx_t *)tx);
     else
-        return ((htp_tx_t *)tx)->response_progress;
+        return htp_tx_response_progress((htp_tx_t *)tx);
 }
 
 static uint64_t HTPStateGetTxCnt(void *alstate)
 {
     HtpState *http_state = (HtpState *)alstate;
 
-    if (http_state != NULL && http_state->conn != NULL) {
-        const int64_t size = (int64_t)htp_list_size(http_state->conn->transactions);
+    if (http_state != NULL && http_state->connp != NULL) {
+        const int64_t size = htp_connp_tx_size(http_state->connp);
         if (size < 0)
             return 0ULL;
         SCLogDebug("size %"PRIu64, size);
-        return (uint64_t)size + http_state->tx_freed;
+        return (uint64_t)size;
     } else {
         return 0ULL;
     }
@@ -2683,8 +2455,8 @@ static void *HTPStateGetTx(void *alstate, uint64_t tx_id)
 {
     HtpState *http_state = (HtpState *)alstate;
 
-    if (http_state != NULL && http_state->conn != NULL && tx_id >= http_state->tx_freed)
-        return htp_list_get(http_state->conn->transactions, tx_id - http_state->tx_freed);
+    if (http_state != NULL && http_state->connp != NULL)
+        return (void *)htp_connp_tx(http_state->connp, tx_id);
     else
         return NULL;
 }
@@ -2693,10 +2465,10 @@ void *HtpGetTxForH2(void *alstate)
 {
     // gets last transaction
     HtpState *http_state = (HtpState *)alstate;
-    if (http_state != NULL && http_state->conn != NULL) {
-        size_t txid = HTPStateGetTxCnt(http_state);
-        if (txid > http_state->tx_freed) {
-            return htp_list_get(http_state->conn->transactions, txid - http_state->tx_freed - 1);
+    if (http_state != NULL && http_state->connp != NULL) {
+        size_t txid = htp_connp_tx_size(http_state->connp);
+        if (txid > 0) {
+            return (void *)htp_connp_tx(http_state->connp, txid - 1);
         }
     }
     return NULL;
@@ -2921,7 +2693,7 @@ static int HTPParserTest01(void)
     const htp_header_t *h = htp_tx_request_header_index(tx, 0);
     FAIL_IF_NULL(h);
 
-    FAIL_IF(strcmp(bstr_util_strdup_to_c(htp_header_value(h)), "Victor/1.0"));
+    FAIL_IF(bstr_cmp_c(htp_header_value(h), "Victor/1.0"));
     FAIL_IF(htp_tx_request_method_number(tx) != HTP_METHOD_POST);
     FAIL_IF(htp_tx_request_protocol_number(tx) != HTP_PROTOCOL_V1_0);
 
@@ -3211,7 +2983,6 @@ static int HTPParserTest04(void)
     htp_tx_t *tx = HTPStateGetTx(htp_state, 0);
     FAIL_IF_NULL(tx);
     const htp_header_t *h = htp_tx_request_header_index(tx, 0);
-
     FAIL_IF_NOT_NULL(h);
     FAIL_IF(htp_tx_request_method_number(tx) != HTP_METHOD_UNKNOWN);
     FAIL_IF(htp_tx_request_protocol_number(tx) != HTP_PROTOCOL_V0_9);
@@ -3428,14 +3199,11 @@ static int HTPParserTest07(void)
 
     htp_tx_t *tx = HTPStateGetTx(htp_state, 0);
     FAIL_IF_NULL(tx);
-    HtpTxUserData *tx_ud = (HtpTxUserData *)htp_tx_get_user_data(tx);
-
-    FAIL_IF_NULL(tx_ud);
-    FAIL_IF_NULL(tx_ud->request_uri_normalized);
-    FAIL_IF(reflen != bstr_len(tx_ud->request_uri_normalized));
+    bstr *request_uri_normalized = (bstr *)htp_tx_normalized_uri(tx);
+    FAIL_IF_NULL(request_uri_normalized);
+    FAIL_IF(reflen != bstr_len(request_uri_normalized));
 
-    FAIL_IF(memcmp(bstr_ptr(tx_ud->request_uri_normalized), ref,
-                    bstr_len(tx_ud->request_uri_normalized)) != 0);
+    FAIL_IF(memcmp(bstr_ptr(request_uri_normalized), ref, bstr_len(request_uri_normalized)) != 0);
 
     AppLayerParserThreadCtxFree(alp_tctx);
     StreamTcpFreeConfig(true);
@@ -3492,11 +3260,9 @@ libhtp:\n\
 
     htp_tx_t *tx = HTPStateGetTx(htp_state, 0);
     FAIL_IF_NULL(tx);
-    HtpTxUserData *tx_ud = (HtpTxUserData *) htp_tx_get_user_data(tx);
-    FAIL_IF_NULL(tx_ud);
-    FAIL_IF_NULL(tx_ud->request_uri_normalized);
-    PrintRawDataFp(stdout, bstr_ptr(tx_ud->request_uri_normalized),
-            bstr_len(tx_ud->request_uri_normalized));
+    bstr *request_uri_normalized = (bstr *)htp_tx_normalized_uri(tx);
+    FAIL_IF_NULL(request_uri_normalized);
+    PrintRawDataFp(stdout, bstr_ptr(request_uri_normalized), bstr_len(request_uri_normalized));
 
     AppLayerParserThreadCtxFree(alp_tctx);
     StreamTcpFreeConfig(true);
@@ -3556,12 +3322,9 @@ libhtp:\n\
 
     htp_tx_t *tx = HTPStateGetTx(htp_state, 0);
     FAIL_IF_NULL(tx);
-
-    HtpTxUserData *tx_ud = (HtpTxUserData *) htp_tx_get_user_data(tx);
-    FAIL_IF_NULL(tx_ud);
-    FAIL_IF_NULL(tx_ud->request_uri_normalized);
-    PrintRawDataFp(stdout, bstr_ptr(tx_ud->request_uri_normalized),
-            bstr_len(tx_ud->request_uri_normalized));
+    bstr *request_uri_normalized = (bstr *)htp_tx_normalized_uri(tx);
+    FAIL_IF_NULL(request_uri_normalized);
+    PrintRawDataFp(stdout, bstr_ptr(request_uri_normalized), bstr_len(request_uri_normalized));
 
     AppLayerParserThreadCtxFree(alp_tctx);
     StreamTcpFreeConfig(true);
@@ -3674,17 +3437,14 @@ static int HTPParserTest11(void)
 
     htp_tx_t *tx = HTPStateGetTx(htp_state, 0);
     FAIL_IF_NULL(tx);
+    bstr *request_uri_normalized = (bstr *)htp_tx_normalized_uri(tx);
+    FAIL_IF_NULL(request_uri_normalized);
 
-    HtpTxUserData *tx_ud = (HtpTxUserData *)htp_tx_get_user_data(tx);
-    FAIL_IF_NULL(tx_ud);
-    FAIL_IF_NULL(tx_ud->request_uri_normalized);
-
-    FAIL_IF(bstr_len(tx_ud->request_uri_normalized) != 4);
-
-    FAIL_IF(bstr_ptr(tx_ud->request_uri_normalized)[0] != '/');
-    FAIL_IF(bstr_ptr(tx_ud->request_uri_normalized)[1] != '%');
-    FAIL_IF(bstr_ptr(tx_ud->request_uri_normalized)[2] != '0');
-    FAIL_IF(bstr_ptr(tx_ud->request_uri_normalized)[3] != '0');
+    FAIL_IF(bstr_len(request_uri_normalized) != 4);
+    FAIL_IF(bstr_ptr(request_uri_normalized)[0] != '/');
+    FAIL_IF(bstr_ptr(request_uri_normalized)[1] != '%');
+    FAIL_IF(bstr_ptr(request_uri_normalized)[2] != '0');
+    FAIL_IF(bstr_ptr(request_uri_normalized)[3] != '0');
 
     AppLayerParserThreadCtxFree(alp_tctx);
     StreamTcpFreeConfig(true);
@@ -3733,19 +3493,17 @@ static int HTPParserTest12(void)
 
     htp_tx_t *tx = HTPStateGetTx(htp_state, 0);
     FAIL_IF_NULL(tx);
-    HtpTxUserData *tx_ud = (HtpTxUserData *) htp_tx_get_user_data(tx);
-    FAIL_IF_NULL(tx_ud);
-    FAIL_IF_NULL(tx_ud->request_uri_normalized);
-
-    FAIL_IF(bstr_len(tx_ud->request_uri_normalized) != 7);
-
-    FAIL_IF(bstr_ptr(tx_ud->request_uri_normalized)[0] != '/');
-    FAIL_IF(bstr_ptr(tx_ud->request_uri_normalized)[1] != '?');
-    FAIL_IF(bstr_ptr(tx_ud->request_uri_normalized)[2] != 'a');
-    FAIL_IF(bstr_ptr(tx_ud->request_uri_normalized)[3] != '=');
-    FAIL_IF(bstr_ptr(tx_ud->request_uri_normalized)[4] != '%');
-    FAIL_IF(bstr_ptr(tx_ud->request_uri_normalized)[5] != '0');
-    FAIL_IF(bstr_ptr(tx_ud->request_uri_normalized)[6] != '0');
+    bstr *request_uri_normalized = (bstr *)htp_tx_normalized_uri(tx);
+    FAIL_IF_NULL(request_uri_normalized);
+
+    FAIL_IF(bstr_len(request_uri_normalized) != 7);
+    FAIL_IF(bstr_ptr(request_uri_normalized)[0] != '/');
+    FAIL_IF(bstr_ptr(request_uri_normalized)[1] != '?');
+    FAIL_IF(bstr_ptr(request_uri_normalized)[2] != 'a');
+    FAIL_IF(bstr_ptr(request_uri_normalized)[3] != '=');
+    FAIL_IF(bstr_ptr(request_uri_normalized)[4] != '%');
+    FAIL_IF(bstr_ptr(request_uri_normalized)[5] != '0');
+    FAIL_IF(bstr_ptr(request_uri_normalized)[6] != '0');
 
     AppLayerParserThreadCtxFree(alp_tctx);
     StreamTcpFreeConfig(true);
@@ -4076,11 +3834,8 @@ libhtp:\n\
 
     htp_tx_t *tx = HTPStateGetTx(htp_state, 0);
     FAIL_IF_NULL(tx);
-    FAIL_IF(tx->cfg != htp);
-
     tx = HTPStateGetTx(htp_state, 1);
     FAIL_IF_NULL(tx);
-    FAIL_IF(tx->cfg != htp);
 
     AppLayerParserThreadCtxFree(alp_tctx);
     HTPFreeConfig();
@@ -4155,36 +3910,36 @@ libhtp:\n\
     FAIL_IF_NULL(tx);
 
     HtpTxUserData *tx_ud = (HtpTxUserData *)htp_tx_get_user_data(tx);
+    bstr *request_uri_normalized = (bstr *)htp_tx_normalized_uri(tx);
     FAIL_IF_NULL(tx_ud);
-    FAIL_IF_NULL(tx_ud->request_uri_normalized);
-    FAIL_IF(reflen != bstr_len(tx_ud->request_uri_normalized));
-    FAIL_IF(memcmp(bstr_ptr(tx_ud->request_uri_normalized), ref1,
-                    bstr_len(tx_ud->request_uri_normalized)) != 0);
+    FAIL_IF_NULL(request_uri_normalized);
+    FAIL_IF(reflen != bstr_len(request_uri_normalized));
+    FAIL_IF(memcmp(bstr_ptr(request_uri_normalized), ref1, bstr_len(request_uri_normalized)) != 0);
 
     uint8_t ref2[] = "/abc/def?ghi/jkl";
     reflen = sizeof(ref2) - 1;
 
     tx = HTPStateGetTx(htp_state, 1);
     FAIL_IF_NULL(tx);
+
     tx_ud = (HtpTxUserData *)htp_tx_get_user_data(tx);
+    request_uri_normalized = (bstr *)htp_tx_normalized_uri(tx);
     FAIL_IF_NULL(tx_ud);
-    FAIL_IF_NULL(tx_ud->request_uri_normalized);
-    FAIL_IF(reflen != bstr_len(tx_ud->request_uri_normalized));
-
-    FAIL_IF(memcmp(bstr_ptr(tx_ud->request_uri_normalized), ref2,
-                    bstr_len(tx_ud->request_uri_normalized)) != 0);
+    FAIL_IF_NULL(request_uri_normalized);
+    FAIL_IF(reflen != bstr_len(request_uri_normalized));
+    FAIL_IF(memcmp(bstr_ptr(request_uri_normalized), ref2, bstr_len(request_uri_normalized)) != 0);
 
     uint8_t ref3[] = "/abc/def?ghi%2fjkl";
     reflen = sizeof(ref3) - 1;
     tx = HTPStateGetTx(htp_state, 2);
     FAIL_IF_NULL(tx);
-    tx_ud = (HtpTxUserData *) htp_tx_get_user_data(tx);
-    FAIL_IF_NULL(tx_ud);
-    FAIL_IF_NULL(tx_ud->request_uri_normalized);
-    FAIL_IF(reflen != bstr_len(tx_ud->request_uri_normalized));
 
-    FAIL_IF(memcmp(bstr_ptr(tx_ud->request_uri_normalized), ref3,
-                    bstr_len(tx_ud->request_uri_normalized)) != 0);
+    tx_ud = (HtpTxUserData *)htp_tx_get_user_data(tx);
+    request_uri_normalized = (bstr *)htp_tx_normalized_uri(tx);
+    FAIL_IF_NULL(tx_ud);
+    FAIL_IF_NULL(request_uri_normalized);
+    FAIL_IF(reflen != bstr_len(request_uri_normalized));
+    FAIL_IF(memcmp(bstr_ptr(request_uri_normalized), ref3, bstr_len(request_uri_normalized)) != 0);
 
     AppLayerParserThreadCtxFree(alp_tctx);
     HTPFreeConfig();
@@ -4246,11 +4001,11 @@ libhtp:\n\
     FAIL_IF_NULL(tx);
 
     HtpTxUserData *tx_ud = (HtpTxUserData *)htp_tx_get_user_data(tx);
+    bstr *request_uri_normalized = (bstr *)htp_tx_normalized_uri(tx);
     FAIL_IF_NULL(tx_ud);
-    FAIL_IF_NULL(tx_ud->request_uri_normalized);
-    FAIL_IF(reflen != bstr_len(tx_ud->request_uri_normalized));
-    FAIL_IF(memcmp(bstr_ptr(tx_ud->request_uri_normalized), ref1,
-                    bstr_len(tx_ud->request_uri_normalized)) != 0);
+    FAIL_IF_NULL(request_uri_normalized);
+    FAIL_IF(reflen != bstr_len(request_uri_normalized));
+    FAIL_IF(memcmp(bstr_ptr(request_uri_normalized), ref1, bstr_len(request_uri_normalized)) != 0);
 
     uint8_t ref2[] = "/abc/def?ghi/jkl";
     reflen = sizeof(ref2) - 1;
@@ -4258,24 +4013,24 @@ libhtp:\n\
     tx = HTPStateGetTx(htp_state, 1);
     FAIL_IF_NULL(tx);
     tx_ud = (HtpTxUserData *)htp_tx_get_user_data(tx);
+    request_uri_normalized = (bstr *)htp_tx_normalized_uri(tx);
     FAIL_IF_NULL(tx_ud);
-    FAIL_IF_NULL(tx_ud->request_uri_normalized);
-    FAIL_IF(reflen != bstr_len(tx_ud->request_uri_normalized));
+    FAIL_IF_NULL(request_uri_normalized);
+    FAIL_IF(reflen != bstr_len(request_uri_normalized));
 
-    FAIL_IF(memcmp(bstr_ptr(tx_ud->request_uri_normalized), ref2,
-                    bstr_len(tx_ud->request_uri_normalized)) != 0);
+    FAIL_IF(memcmp(bstr_ptr(request_uri_normalized), ref2, bstr_len(request_uri_normalized)) != 0);
 
     uint8_t ref3[] = "/abc/def?ghi%2fjkl";
     reflen = sizeof(ref3) - 1;
     tx = HTPStateGetTx(htp_state, 2);
     FAIL_IF_NULL(tx);
     tx_ud = (HtpTxUserData *)htp_tx_get_user_data(tx);
+    request_uri_normalized = (bstr *)htp_tx_normalized_uri(tx);
     FAIL_IF_NULL(tx_ud);
-    FAIL_IF_NULL(tx_ud->request_uri_normalized);
-    FAIL_IF(reflen != bstr_len(tx_ud->request_uri_normalized));
+    FAIL_IF_NULL(request_uri_normalized);
+    FAIL_IF(reflen != bstr_len(request_uri_normalized));
 
-    FAIL_IF(memcmp(bstr_ptr(tx_ud->request_uri_normalized), ref3,
-                    bstr_len(tx_ud->request_uri_normalized)) != 0);
+    FAIL_IF(memcmp(bstr_ptr(request_uri_normalized), ref3, bstr_len(request_uri_normalized)) != 0);
 
     AppLayerParserThreadCtxFree(alp_tctx);
     HTPFreeConfig();
@@ -4353,37 +4108,31 @@ libhtp:\n\
 
     htp_tx_t *tx = HTPStateGetTx(htp_state, 0);
     FAIL_IF_NULL(tx);
-    HtpTxUserData *tx_ud = (HtpTxUserData *)htp_tx_get_user_data(tx);
-    FAIL_IF_NULL(tx_ud);
-    FAIL_IF_NULL(tx_ud->request_uri_normalized);
-    FAIL_IF(reflen != bstr_len(tx_ud->request_uri_normalized));
-    FAIL_IF(memcmp(bstr_ptr(tx_ud->request_uri_normalized), ref1,
-                    bstr_len(tx_ud->request_uri_normalized)) != 0);
+    bstr *request_uri_normalized = (bstr *)htp_tx_normalized_uri(tx);
+    FAIL_IF_NULL(request_uri_normalized);
+    FAIL_IF(reflen != bstr_len(request_uri_normalized));
+    FAIL_IF(memcmp(bstr_ptr(request_uri_normalized), ref1, bstr_len(request_uri_normalized)) != 0);
 
     uint8_t ref2[] = "/abc/def?ghi/jkl";
     reflen = sizeof(ref2) - 1;
 
     tx = HTPStateGetTx(htp_state, 1);
     FAIL_IF_NULL(tx);
-    tx_ud = (HtpTxUserData *)htp_tx_get_user_data(tx);
-    FAIL_IF_NULL(tx_ud);
-    FAIL_IF_NULL(tx_ud->request_uri_normalized);
-    FAIL_IF(reflen != bstr_len(tx_ud->request_uri_normalized));
+    request_uri_normalized = (bstr *)htp_tx_normalized_uri(tx);
+    FAIL_IF_NULL(request_uri_normalized);
+    FAIL_IF(reflen != bstr_len(request_uri_normalized));
 
-    FAIL_IF(memcmp(bstr_ptr(tx_ud->request_uri_normalized), ref2,
-                    bstr_len(tx_ud->request_uri_normalized)) != 0);
+    FAIL_IF(memcmp(bstr_ptr(request_uri_normalized), ref2, bstr_len(request_uri_normalized)) != 0);
 
     uint8_t ref3[] = "/abc/def?ghi%2fjkl";
     reflen = sizeof(ref3) - 1;
     tx = HTPStateGetTx(htp_state, 2);
     FAIL_IF_NULL(tx);
-    tx_ud = (HtpTxUserData *)htp_tx_get_user_data(tx);
-    FAIL_IF_NULL(tx_ud);
-    FAIL_IF_NULL(tx_ud->request_uri_normalized);
-    FAIL_IF(reflen != bstr_len(tx_ud->request_uri_normalized));
+    request_uri_normalized = (bstr *)htp_tx_normalized_uri(tx);
+    FAIL_IF_NULL(request_uri_normalized);
+    FAIL_IF(reflen != bstr_len(request_uri_normalized));
 
-    FAIL_IF(memcmp(bstr_ptr(tx_ud->request_uri_normalized), ref3,
-                    bstr_len(tx_ud->request_uri_normalized)) != 0);
+    FAIL_IF(memcmp(bstr_ptr(request_uri_normalized), ref3, bstr_len(request_uri_normalized)) != 0);
 
     AppLayerParserThreadCtxFree(alp_tctx);
     HTPFreeConfig();
@@ -4459,26 +4208,22 @@ libhtp:\n\
 
     htp_tx_t *tx = HTPStateGetTx(htp_state, 0);
     FAIL_IF_NULL(tx);
-    HtpTxUserData *tx_ud = (HtpTxUserData *) htp_tx_get_user_data(tx);
-    FAIL_IF_NULL(tx_ud);
-    FAIL_IF_NULL(tx_ud->request_uri_normalized);
-    FAIL_IF(reflen != bstr_len(tx_ud->request_uri_normalized));
+    bstr *request_uri_normalized = (bstr *)htp_tx_normalized_uri(tx);
+    FAIL_IF_NULL(request_uri_normalized);
+    FAIL_IF(reflen != bstr_len(request_uri_normalized));
 
-    FAIL_IF(memcmp(bstr_ptr(tx_ud->request_uri_normalized), ref1,
-                    bstr_len(tx_ud->request_uri_normalized)) != 0);
+    FAIL_IF(memcmp(bstr_ptr(request_uri_normalized), ref1, bstr_len(request_uri_normalized)) != 0);
 
     uint8_t ref2[] = "/abc/def?ghi/jkl";
     reflen = sizeof(ref2) - 1;
 
     tx = HTPStateGetTx(htp_state, 1);
     FAIL_IF_NULL(tx);
-    tx_ud = (HtpTxUserData *)htp_tx_get_user_data(tx);
-    FAIL_IF_NULL(tx_ud);
-    FAIL_IF_NULL(tx_ud->request_uri_normalized);
-    FAIL_IF(reflen != bstr_len(tx_ud->request_uri_normalized));
+    request_uri_normalized = (bstr *)htp_tx_normalized_uri(tx);
+    FAIL_IF_NULL(request_uri_normalized);
+    FAIL_IF(reflen != bstr_len(request_uri_normalized));
 
-    FAIL_IF(memcmp(bstr_ptr(tx_ud->request_uri_normalized), ref2,
-                    bstr_len(tx_ud->request_uri_normalized)) != 0);
+    FAIL_IF(memcmp(bstr_ptr(request_uri_normalized), ref2, bstr_len(request_uri_normalized)) != 0);
 
     AppLayerParserThreadCtxFree(alp_tctx);
     HTPFreeConfig();
@@ -4550,13 +4295,11 @@ libhtp:\n\
 
     htp_tx_t *tx = HTPStateGetTx(htp_state, 0);
     FAIL_IF_NULL(tx);
-    HtpTxUserData *tx_ud = (HtpTxUserData *) htp_tx_get_user_data(tx);
-    FAIL_IF_NULL(tx_ud);
-    FAIL_IF_NULL(tx_ud->request_uri_normalized);
-    FAIL_IF(reflen != bstr_len(tx_ud->request_uri_normalized));
+    bstr *request_uri_normalized = (bstr *)htp_tx_normalized_uri(tx);
+    FAIL_IF_NULL(request_uri_normalized);
+    FAIL_IF(reflen != bstr_len(request_uri_normalized));
 
-    FAIL_IF(memcmp(bstr_ptr(tx_ud->request_uri_normalized), ref1,
-                    bstr_len(tx_ud->request_uri_normalized)) != 0);
+    FAIL_IF(memcmp(bstr_ptr(request_uri_normalized), ref1, bstr_len(request_uri_normalized)) != 0);
 
     AppLayerParserThreadCtxFree(alp_tctx);
     HTPFreeConfig();
@@ -4628,13 +4371,11 @@ libhtp:\n\
 
     htp_tx_t *tx = HTPStateGetTx(htp_state, 0);
     FAIL_IF_NULL(tx);
-    HtpTxUserData *tx_ud = (HtpTxUserData *) htp_tx_get_user_data(tx);
-    FAIL_IF_NULL(tx_ud);
-    FAIL_IF_NULL(tx_ud->request_uri_normalized);
-    FAIL_IF(reflen != bstr_len(tx_ud->request_uri_normalized));
+    bstr *request_uri_normalized = (bstr *)htp_tx_normalized_uri(tx);
+    FAIL_IF_NULL(request_uri_normalized);
+    FAIL_IF(reflen != bstr_len(request_uri_normalized));
 
-    FAIL_IF(memcmp(bstr_ptr(tx_ud->request_uri_normalized), ref1,
-                    bstr_len(tx_ud->request_uri_normalized)) != 0);
+    FAIL_IF(memcmp(bstr_ptr(request_uri_normalized), ref1, bstr_len(request_uri_normalized)) != 0);
 
     AppLayerParserThreadCtxFree(alp_tctx);
     HTPFreeConfig();
@@ -4706,13 +4447,11 @@ libhtp:\n\
 
     htp_tx_t *tx = HTPStateGetTx(htp_state, 0);
     FAIL_IF_NULL(tx);
-    HtpTxUserData *tx_ud = (HtpTxUserData *) htp_tx_get_user_data(tx);
-    FAIL_IF_NULL(tx_ud);
-    FAIL_IF_NULL(tx_ud->request_uri_normalized);
-    FAIL_IF(reflen != bstr_len(tx_ud->request_uri_normalized));
+    bstr *request_uri_normalized = (bstr *)htp_tx_normalized_uri(tx);
+    FAIL_IF_NULL(request_uri_normalized);
+    FAIL_IF(reflen != bstr_len(request_uri_normalized));
 
-    FAIL_IF(memcmp(bstr_ptr(tx_ud->request_uri_normalized), ref1,
-                    bstr_len(tx_ud->request_uri_normalized)) != 0);
+    FAIL_IF(memcmp(bstr_ptr(request_uri_normalized), ref1, bstr_len(request_uri_normalized)) != 0);
 
     AppLayerParserThreadCtxFree(alp_tctx);
     HTPFreeConfig();
@@ -4785,13 +4524,11 @@ libhtp:\n\
 
     htp_tx_t *tx = HTPStateGetTx(htp_state, 0);
     FAIL_IF_NULL(tx);
-    HtpTxUserData *tx_ud = (HtpTxUserData *) htp_tx_get_user_data(tx);
-    FAIL_IF_NULL(tx_ud);
-    FAIL_IF_NULL(tx_ud->request_uri_normalized);
-    FAIL_IF(reflen != bstr_len(tx_ud->request_uri_normalized));
+    bstr *request_uri_normalized = (bstr *)htp_tx_normalized_uri(tx);
+    FAIL_IF_NULL(request_uri_normalized);
+    FAIL_IF(reflen != bstr_len(request_uri_normalized));
 
-    FAIL_IF(memcmp(bstr_ptr(tx_ud->request_uri_normalized), ref1,
-                    bstr_len(tx_ud->request_uri_normalized)) != 0);
+    FAIL_IF(memcmp(bstr_ptr(request_uri_normalized), ref1, bstr_len(request_uri_normalized)) != 0);
 
     AppLayerParserThreadCtxFree(alp_tctx);
     HTPFreeConfig();
@@ -4861,13 +4598,11 @@ libhtp:\n\
 
     htp_tx_t *tx = HTPStateGetTx(htp_state, 0);
     FAIL_IF_NULL(tx);
-    HtpTxUserData *tx_ud = (HtpTxUserData *) htp_tx_get_user_data(tx);
-    FAIL_IF_NULL(tx_ud);
-    FAIL_IF_NULL(tx_ud->request_uri_normalized);
-    FAIL_IF(reflen != bstr_len(tx_ud->request_uri_normalized));
+    bstr *request_uri_normalized = (bstr *)htp_tx_normalized_uri(tx);
+    FAIL_IF_NULL(request_uri_normalized);
+    FAIL_IF(reflen != bstr_len(request_uri_normalized));
 
-    FAIL_IF(memcmp(bstr_ptr(tx_ud->request_uri_normalized), ref1,
-                    bstr_len(tx_ud->request_uri_normalized)) != 0);
+    FAIL_IF(memcmp(bstr_ptr(request_uri_normalized), ref1, bstr_len(request_uri_normalized)) != 0);
 
     AppLayerParserThreadCtxFree(alp_tctx);
     HTPFreeConfig();
@@ -4938,13 +4673,11 @@ libhtp:\n\
 
     htp_tx_t *tx = HTPStateGetTx(htp_state, 0);
     FAIL_IF_NULL(tx);
-    HtpTxUserData *tx_ud = (HtpTxUserData *) htp_tx_get_user_data(tx);
-    FAIL_IF_NULL(tx_ud);
-    FAIL_IF_NULL(tx_ud->request_uri_normalized);
-    FAIL_IF(reflen != bstr_len(tx_ud->request_uri_normalized));
+    bstr *request_uri_normalized = (bstr *)htp_tx_normalized_uri(tx);
+    FAIL_IF_NULL(request_uri_normalized);
+    FAIL_IF(reflen != bstr_len(request_uri_normalized));
 
-    FAIL_IF(memcmp(bstr_ptr(tx_ud->request_uri_normalized), ref1,
-                    bstr_len(tx_ud->request_uri_normalized)) != 0);
+    FAIL_IF(memcmp(bstr_ptr(request_uri_normalized), ref1, bstr_len(request_uri_normalized)) != 0);
 
     AppLayerParserThreadCtxFree(alp_tctx);
     HTPFreeConfig();
@@ -4967,8 +4700,12 @@ static int HTPBodyReassemblyTest01(void)
     Flow flow;
     memset(&flow, 0x00, sizeof(flow));
     AppLayerParserState *parser = AppLayerParserStateAlloc();
-    htp_tx_t tx;
-    memset(&tx, 0, sizeof(tx));
+    htp_cfg_t *cfg = htp_config_create();
+    BUG_ON(cfg == NULL);
+    htp_connp_t *connp = htp_connp_create(cfg);
+    BUG_ON(connp == NULL);
+    const htp_tx_t *tx = htp_connp_get_request_tx(connp);
+    BUG_ON(tx == NULL);
 
     hstate.f = &flow;
     flow.alparser = parser;
@@ -5312,7 +5049,6 @@ static int HTPParserTest16(void)
     FAIL_IF_NULL(decoder_events);
     FAIL_IF(decoder_events->events[0] != HTP_LOG_CODE_METHOD_DELIM_NON_COMPLIANT);
     FAIL_IF(decoder_events->events[1] != HTP_LOG_CODE_URI_DELIM_NON_COMPLIANT);
-
 #endif
 
     AppLayerParserThreadCtxFree(alp_tctx);
index a7e3dee6e879c6e33d1ba60b6cfd7427d3c2f5b8..145c014b8c73c6e333ef7539f272809804edca8a 100644 (file)
 #include "rust.h"
 #include "app-layer-frames.h"
 
-#include <htp/htp.h>
-
-enum {
-    /* libhtp errors/warnings */
-    HTTP_DECODER_EVENT_UNKNOWN_ERROR,
-    HTTP_DECODER_EVENT_GZIP_DECOMPRESSION_FAILED,
-    HTTP_DECODER_EVENT_REQUEST_FIELD_MISSING_COLON,
-    HTTP_DECODER_EVENT_RESPONSE_FIELD_MISSING_COLON,
-    HTTP_DECODER_EVENT_INVALID_REQUEST_CHUNK_LEN,
-    HTTP_DECODER_EVENT_INVALID_RESPONSE_CHUNK_LEN,
-    HTTP_DECODER_EVENT_INVALID_TRANSFER_ENCODING_VALUE_IN_REQUEST,
-    HTTP_DECODER_EVENT_INVALID_TRANSFER_ENCODING_VALUE_IN_RESPONSE,
-    HTTP_DECODER_EVENT_INVALID_CONTENT_LENGTH_FIELD_IN_REQUEST,
-    HTTP_DECODER_EVENT_INVALID_CONTENT_LENGTH_FIELD_IN_RESPONSE,
-    HTTP_DECODER_EVENT_DUPLICATE_CONTENT_LENGTH_FIELD_IN_REQUEST,
-    HTTP_DECODER_EVENT_DUPLICATE_CONTENT_LENGTH_FIELD_IN_RESPONSE,
-    HTTP_DECODER_EVENT_100_CONTINUE_ALREADY_SEEN,
-    HTTP_DECODER_EVENT_UNABLE_TO_MATCH_RESPONSE_TO_REQUEST,
-    HTTP_DECODER_EVENT_INVALID_SERVER_PORT_IN_REQUEST,
-    HTTP_DECODER_EVENT_INVALID_AUTHORITY_PORT,
-    HTTP_DECODER_EVENT_REQUEST_HEADER_INVALID,
-    HTTP_DECODER_EVENT_RESPONSE_HEADER_INVALID,
-    HTTP_DECODER_EVENT_MISSING_HOST_HEADER,
-    HTTP_DECODER_EVENT_HOST_HEADER_AMBIGUOUS,
-    HTTP_DECODER_EVENT_INVALID_REQUEST_FIELD_FOLDING,
-    HTTP_DECODER_EVENT_INVALID_RESPONSE_FIELD_FOLDING,
-    HTTP_DECODER_EVENT_REQUEST_FIELD_TOO_LONG,
-    HTTP_DECODER_EVENT_RESPONSE_FIELD_TOO_LONG,
-    HTTP_DECODER_EVENT_FILE_NAME_TOO_LONG,
-    HTTP_DECODER_EVENT_REQUEST_SERVER_PORT_TCP_PORT_MISMATCH,
-    HTTP_DECODER_EVENT_URI_HOST_INVALID,
-    HTTP_DECODER_EVENT_HEADER_HOST_INVALID,
-    HTTP_DECODER_EVENT_METHOD_DELIM_NON_COMPLIANT,
-    HTTP_DECODER_EVENT_URI_DELIM_NON_COMPLIANT,
-    HTTP_DECODER_EVENT_REQUEST_LINE_LEADING_WHITESPACE,
-    HTTP_DECODER_EVENT_TOO_MANY_ENCODING_LAYERS,
-    HTTP_DECODER_EVENT_ABNORMAL_CE_HEADER,
-    HTTP_DECODER_EVENT_AUTH_UNRECOGNIZED,
-    HTTP_DECODER_EVENT_REQUEST_HEADER_REPETITION,
-    HTTP_DECODER_EVENT_RESPONSE_HEADER_REPETITION,
-    HTTP_DECODER_EVENT_RESPONSE_MULTIPART_BYTERANGES,
-    HTTP_DECODER_EVENT_RESPONSE_ABNORMAL_TRANSFER_ENCODING,
-    HTTP_DECODER_EVENT_RESPONSE_CHUNKED_OLD_PROTO,
-    HTTP_DECODER_EVENT_RESPONSE_INVALID_PROTOCOL,
-    HTTP_DECODER_EVENT_RESPONSE_INVALID_STATUS,
-    HTTP_DECODER_EVENT_REQUEST_LINE_INCOMPLETE,
-    HTTP_DECODER_EVENT_DOUBLE_ENCODED_URI,
-    HTTP_DECODER_EVENT_REQUEST_LINE_INVALID,
-    HTTP_DECODER_EVENT_REQUEST_BODY_UNEXPECTED,
-
-    HTTP_DECODER_EVENT_LZMA_MEMLIMIT_REACHED,
-    HTTP_DECODER_EVENT_COMPRESSION_BOMB,
-
-    HTTP_DECODER_EVENT_RANGE_INVALID,
-    HTTP_DECODER_EVENT_REQUEST_CHUNK_EXTENSION,
-    HTTP_DECODER_EVENT_REQUEST_LINE_MISSING_PROTOCOL,
-    HTTP_DECODER_EVENT_REQUEST_TOO_MANY_HEADERS,
-    HTTP_DECODER_EVENT_RESPONSE_TOO_MANY_HEADERS,
-
-    /* suricata errors/warnings */
-    HTTP_DECODER_EVENT_MULTIPART_GENERIC_ERROR,
-    HTTP_DECODER_EVENT_MULTIPART_NO_FILEDATA,
-    HTTP_DECODER_EVENT_MULTIPART_INVALID_HEADER,
-
-    HTTP_DECODER_EVENT_TOO_MANY_WARNINGS,
-
-    HTTP_DECODER_EVENT_FAILED_PROTOCOL_CHANGE,
-};
-
-// Temporary include directly app-layer-htp-libhtp.h
-// This helps libhtp.rs transition by making small steps
-// app-layer-htp-libhtp.h will be removed with libhtp.rs final merge
-#include "app-layer-htp-libhtp.h"
+#include "htp/htp_rs.h"
 
 /* default request body limit */
 #define HTP_CONFIG_DEFAULT_REQUEST_BODY_LIMIT        4096U
@@ -117,8 +45,7 @@ enum {
 #define HTP_CONFIG_DEFAULT_REQUEST_INSPECT_WINDOW    4096U
 #define HTP_CONFIG_DEFAULT_RESPONSE_INSPECT_MIN_SIZE 32768U
 #define HTP_CONFIG_DEFAULT_RESPONSE_INSPECT_WINDOW   4096U
-#define HTP_CONFIG_DEFAULT_FIELD_LIMIT_SOFT          9000U
-#define HTP_CONFIG_DEFAULT_FIELD_LIMIT_HARD          18000U
+#define HTP_CONFIG_DEFAULT_FIELD_LIMIT               18000U
 
 #define HTP_CONFIG_DEFAULT_LZMA_LAYERS 0U
 /* default libhtp lzma limit, taken from libhtp. */
@@ -145,6 +72,18 @@ enum {
     HTP_BODY_REQUEST_PUT,
 };
 
+enum {
+    /* suricata errors/warnings */
+    HTTP_DECODER_EVENT_MULTIPART_GENERIC_ERROR = 200,
+    HTTP_DECODER_EVENT_MULTIPART_NO_FILEDATA = 201,
+    HTTP_DECODER_EVENT_MULTIPART_INVALID_HEADER = 202,
+
+    HTTP_DECODER_EVENT_TOO_MANY_WARNINGS = 203,
+    HTTP_DECODER_EVENT_RANGE_INVALID = 204,
+    HTTP_DECODER_EVENT_FILE_NAME_TOO_LONG = 205,
+    HTTP_DECODER_EVENT_FAILED_PROTOCOL_CHANGE = 206,
+};
+
 typedef enum HtpSwfCompressType_ {
     HTTP_SWF_COMPRESSION_NONE = 0,
     HTTP_SWF_COMPRESSION_ZLIB,
@@ -225,8 +164,6 @@ typedef struct HtpTxUserData_ {
     HtpBody request_body;
     HtpBody response_body;
 
-    bstr *request_uri_normalized;
-
     uint8_t *request_headers_raw;
     uint8_t *response_headers_raw;
     uint32_t request_headers_raw_len;
@@ -248,18 +185,11 @@ typedef struct HtpState_ {
     htp_conn_t *conn;
     Flow *f;                /**< Needed to retrieve the original flow when using HTPLib callbacks */
     uint64_t transaction_cnt;
-    // tx_freed is the number of already freed transactions
-    // This is needed as libhtp only keeps the live transactions :
-    // To get the total number of transactions, we need to add
-    // the number of transactions tracked by libhtp to this number.
-    // It is also needed as an offset to translate between suricata
-    // transaction id to libhtp offset in its list/array
-    uint64_t tx_freed;
     const struct HTPCfgRec_ *cfg;
     uint16_t flags;
     uint16_t events;
-    uint16_t htp_messages_offset; /**< offset into conn->messages list */
-    uint32_t file_track_id;       /**< used to assign file track ids to files */
+    uint16_t htp_messages_count; /**< Number of already logged messages */
+    uint32_t file_track_id;      /**< used to assign file track ids to files */
     uint64_t last_request_data_stamp;
     uint64_t last_response_data_stamp;
     StreamSlice *slice;
index 8a0809bd6266d182de2c4aef8eb7a498d2b251da..529dca5ad5ae3d78b5daacc396beb528444c7d74 100644 (file)
@@ -29,8 +29,7 @@
 #include "suricata-common.h"
 #include "flow.h"
 
-#include <htp/htp.h>
-#include "app-layer-htp-libhtp.h"
+#include "htp/htp_rs.h"
 
 #include "detect.h"
 #include "detect-parse.h"
index 1bc02281489310df857aec06f6b3e0c3eb579c69..89a7708931ac853707a0403a3feb712a9ee07ed5 100644 (file)
@@ -348,7 +348,7 @@ static InspectionBuffer *GetRawData(DetectEngineThreadCtx *det_ctx,
         const uint8_t *data = NULL;
         uint32_t data_len = 0;
 
-        if (tx->parsed_uri == NULL || tx->parsed_uri->hostname == NULL) {
+        if (htp_uri_hostname(htp_tx_parsed_uri(tx)) == NULL) {
             if (htp_tx_request_headers(tx) == NULL)
                 return NULL;
 
@@ -356,11 +356,11 @@ static InspectionBuffer *GetRawData(DetectEngineThreadCtx *det_ctx,
             if (h == NULL || htp_header_value(h) == NULL)
                 return NULL;
 
-            data = (const uint8_t *)htp_header_value_ptr(h);
+            data = htp_header_value_ptr(h);
             data_len = htp_header_value_len(h);
         } else {
-            data = (const uint8_t *)bstr_ptr(tx->parsed_uri->hostname);
-            data_len = bstr_len(tx->parsed_uri->hostname);
+            data = (const uint8_t *)bstr_ptr(htp_uri_hostname(htp_tx_parsed_uri(tx)));
+            data_len = bstr_len(htp_uri_hostname(htp_tx_parsed_uri(tx)));
         }
 
         InspectionBufferSetupAndApplyTransforms(
index 1a04baa36f3371c50c1b8e610c27eb1d42a7b274..d7c28bfb29647efd4d85906eb7bd340ed71f8406 100644 (file)
@@ -130,7 +130,6 @@ static InspectionBuffer *GetData2(DetectEngineThreadCtx *det_ctx,
 static bool DetectHttpProtocolValidateCallback(
         const Signature *s, const char **sigerror, const DetectBufferType *dbt)
 {
-#ifdef HAVE_HTP_CONFIG_SET_ALLOW_SPACE_URI
     for (uint32_t x = 0; x < s->init_data->buffer_index; x++) {
         if (s->init_data->buffers[x].id != (uint32_t)dbt->id)
             continue;
@@ -148,7 +147,6 @@ static bool DetectHttpProtocolValidateCallback(
             }
         }
     }
-#endif
     return true;
 }
 
index 702d97bcabb73fdf00caeff203a45d23173cfa7d..0d9c03adf9eace57dd2e18032f3f3b0cd98165ad 100644 (file)
@@ -216,15 +216,12 @@ static InspectionBuffer *GetData(DetectEngineThreadCtx *det_ctx,
     InspectionBuffer *buffer = InspectionBufferGet(det_ctx, list_id);
     if (!buffer->initialized) {
         htp_tx_t *tx = (htp_tx_t *)txv;
-        HtpTxUserData *tx_ud = htp_tx_get_user_data(tx);
-
-        if (tx_ud == NULL || tx_ud->request_uri_normalized == NULL) {
-            SCLogDebug("no tx_id or uri");
+        bstr *request_uri_normalized = (bstr *)htp_tx_normalized_uri(tx);
+        if (request_uri_normalized == NULL)
             return NULL;
-        }
 
-        const uint32_t data_len = bstr_len(tx_ud->request_uri_normalized);
-        const uint8_t *data = bstr_ptr(tx_ud->request_uri_normalized);
+        const uint32_t data_len = bstr_len(request_uri_normalized);
+        const uint8_t *data = bstr_ptr(request_uri_normalized);
 
         InspectionBufferSetupAndApplyTransforms(
                 det_ctx, list_id, buffer, data, data_len, transforms);
index c65a987b2d563fba7544849e4f02af6298a33b0d..adf5234daf094f589c0a6423be2f7721695de8ee 100644 (file)
@@ -332,7 +332,7 @@ static void EveHttpLogJSONHeaders(
                     if (((http_ctx->flags & LOG_HTTP_EXTENDED) == 0) ||
                             ((http_ctx->flags & LOG_HTTP_EXTENDED) !=
                                     (http_fields[f].flags & LOG_HTTP_EXTENDED))) {
-                        if (bstr_cmp_c_nocase(htp_header_name(h), http_fields[f].htp_field) == 0) {
+                        if (bstr_cmp_c_nocase(htp_header_name(h), http_fields[f].htp_field)) {
                             tolog = true;
                             break;
                         }
index 68aadaf406f3c893147d2b57deb9d0c514fe07a5..771456ee3d7d76f0c32d0300710a1f532fd87180 100644 (file)
@@ -739,9 +739,7 @@ static void PrintBuildInfo(void)
 #ifdef HAVE_LIBNET11
     strlcat(features, "LIBNET1.1 ", sizeof(features));
 #endif
-#ifdef HAVE_HTP_URI_NORMALIZE_HOOK
     strlcat(features, "HAVE_HTP_URI_NORMALIZE_HOOK ", sizeof(features));
-#endif
 #ifdef PCRE2_HAVE_JIT
     strlcat(features, "PCRE_JIT ", sizeof(features));
 #endif
@@ -886,8 +884,7 @@ static void PrintBuildInfo(void)
 #endif
     printf("thread local storage method: %s\n", tls);
 
-    printf("compiled with %s, linked against %s\n",
-           HTP_VERSION_STRING_FULL, htp_get_version());
+    printf("compiled with %s\n", htp_get_version());
     printf("\n");
 #include "build-info.h"
 }
index 9a269d2fb77ad01dc6ee172f6eb5bf3b2fd93777..f92a706ac487b2dd9e9ba569858540dd2100dc74 100644 (file)
@@ -89,18 +89,14 @@ static int LuaHttpGetRequestUriNormalized(lua_State *luastate)
         lua_pushnil(luastate);
         return 1;
     }
-    HtpTxUserData *htud = (HtpTxUserData *)htp_tx_get_user_data(tx->tx);
-    if (htud == NULL)
-        return LuaCallbackError(luastate, "no htud in tx");
+    bstr *request_uri_normalized = (bstr *)htp_tx_normalized_uri(tx->tx);
 
-    if (htud->request_uri_normalized == NULL ||
-        bstr_ptr(htud->request_uri_normalized) == NULL ||
-        bstr_len(htud->request_uri_normalized) == 0)
+    if (request_uri_normalized == NULL || bstr_ptr(request_uri_normalized) == NULL ||
+            bstr_len(request_uri_normalized) == 0)
         return LuaCallbackError(luastate, "no normalized uri");
 
-    return LuaPushStringBuffer(luastate,
-            bstr_ptr(htud->request_uri_normalized),
-            bstr_len(htud->request_uri_normalized));
+    return LuaPushStringBuffer(
+            luastate, bstr_ptr(request_uri_normalized), bstr_len(request_uri_normalized));
 }
 
 static int LuaHttpGetRequestLine(lua_State *luastate)