From: Mike Stepanek (mstepane) Date: Wed, 16 Sep 2020 13:16:44 +0000 (+0000) Subject: Merge pull request #2468 in SNORT/snort3 from ~KATHARVE/snort3:http_cont_disp to... X-Git-Tag: 3.0.3-1~20 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=e4ff799d7f3cad5502a636eea5d582df04fce86e;p=thirdparty%2Fsnort3.git Merge pull request #2468 in SNORT/snort3 from ~KATHARVE/snort3:http_cont_disp to master Squashed commit of the following: commit 0000fe4885165c1f1c1461635a78257bd9ee7046 Author: Katura Harvey Date: Wed Sep 9 16:55:08 2020 -0400 http_inspect: extract filename from content-disposition header for HTTP uploads --- diff --git a/src/service_inspectors/http_inspect/http_enum.h b/src/service_inspectors/http_inspect/http_enum.h index 844c8b41a..5178b09d5 100644 --- a/src/service_inspectors/http_inspect/http_enum.h +++ b/src/service_inspectors/http_inspect/http_enum.h @@ -119,7 +119,7 @@ enum HeaderId { HEAD__NOT_COMPUTE=-14, HEAD__PROBLEMATIC=-12, HEAD__NOT_PRESENT= HEAD_CONTENT_LENGTH, HEAD_CONTENT_LOCATION, HEAD_CONTENT_MD5, HEAD_CONTENT_RANGE, HEAD_CONTENT_TYPE, HEAD_EXPIRES, HEAD_LAST_MODIFIED, HEAD_X_FORWARDED_FOR, HEAD_TRUE_CLIENT_IP, HEAD_X_WORKING_WITH, HEAD_CONTENT_TRANSFER_ENCODING, HEAD_MIME_VERSION, HEAD_PROXY_AGENT, - HEAD__MAX_VALUE }; + HEAD_CONTENT_DISPOSITION, HEAD__MAX_VALUE }; // All the infractions we might find while parsing and analyzing a message enum Infraction @@ -238,6 +238,7 @@ enum Infraction INF_200_CONNECT_RESP_WITH_TE, INF_100_CONNECT_RESP, INF_EARLY_CONNECT_RESPONSE, + INF_MALFORMED_CD_FILENAME, INF__MAX_VALUE }; @@ -365,6 +366,7 @@ enum EventSid EVENT_200_CONNECT_RESP_WITH_TE, EVENT_100_CONNECT_RESP, EVENT_EARLY_CONNECT_RESPONSE, // 258 + EVENT_MALFORMED_CD_FILENAME, EVENT__MAX_VALUE }; diff --git a/src/service_inspectors/http_inspect/http_msg_body.cc b/src/service_inspectors/http_inspect/http_msg_body.cc index 3fdb2078d..f19395fe0 100644 --- a/src/service_inspectors/http_inspect/http_msg_body.cc +++ b/src/service_inspectors/http_inspect/http_msg_body.cc @@ -29,6 +29,7 @@ #include "http_common.h" #include "http_enum.h" #include "http_js_norm.h" +#include "http_msg_header.h" #include "http_msg_request.h" using namespace snort; @@ -270,34 +271,41 @@ void HttpMsgBody::do_file_processing(const Field& file_data) return; const FileDirection dir = source_id == SRC_SERVER ? FILE_DOWNLOAD : FILE_UPLOAD; + Field cont_disp_filename; - size_t file_index = 0; - - // For downloads file_id for the file cache is the URL since that should be unique per file. - // Upload verdicts are not currently cached since we have no unique information. - // FIXIT-E For uploads use the filename for the file_id when available. - if ((request != nullptr) and (request->get_http_uri() != nullptr) - and (dir == FILE_DOWNLOAD)) - { - file_index = request->get_http_uri()->get_file_proc_hash(); - } + const uint64_t file_index = get_header(source_id)->get_file_cache_index(); if (file_flows->file_process(p, file_index, file_data.start(), fp_length, session_data->file_octets[source_id], dir, - transaction->get_file_processing_id(source_id), file_position)) + get_header(source_id)->get_multi_file_processing_id(), file_position)) { session_data->file_depth_remaining[source_id] -= fp_length; - // With the first piece of the file we must provide the "name" which means URI + // With the first piece of the file we must provide the "name". If an upload contains a + // filename in a Content-Disposition header, we use that. Otherwise the name is the URI. if (front) { if (request != nullptr) { - const Field& transaction_uri = request->get_uri(); - if (transaction_uri.length() > 0) + bool has_cd_filename = false; + if (dir == FILE_UPLOAD) + { + const Field& cd_filename = get_header(source_id)-> + get_content_disposition_filename(); + if (cd_filename.length() > 0) + { + file_flows->set_file_name(cd_filename.start(), cd_filename.length()); + has_cd_filename = true; + } + } + if (!has_cd_filename) { - file_flows->set_file_name(transaction_uri.start(), - transaction_uri.length()); + const Field& transaction_uri = request->get_uri(); + if (transaction_uri.length() > 0) + { + file_flows->set_file_name(transaction_uri.start(), + transaction_uri.length()); + } } } } diff --git a/src/service_inspectors/http_inspect/http_msg_head_shared.cc b/src/service_inspectors/http_inspect/http_msg_head_shared.cc index 4328a39c3..d5d625fd0 100644 --- a/src/service_inspectors/http_inspect/http_msg_head_shared.cc +++ b/src/service_inspectors/http_inspect/http_msg_head_shared.cc @@ -21,12 +21,18 @@ #include "config.h" #endif +#include "http_msg_head_shared.h" + +#include "hash/hash_key_operations.h" +#include "utils/util_cstring.h" + #include "http_common.h" #include "http_enum.h" -#include "http_msg_head_shared.h" +#include "http_msg_request.h" using namespace HttpCommon; using namespace HttpEnums; +using namespace snort; HttpMsgHeadShared::~HttpMsgHeadShared() { @@ -342,6 +348,220 @@ const Field& HttpMsgHeadShared::get_header_value_norm(HeaderId header_id) return node->norm; } +// For downloads we use the hash of the URL if it exists. For uploads we use a hash of the filename +// parameter in the Content-Disposition header, if one exists. Otherwise the file_index is 0, +// meaning the file verdict will not be cached. +uint64_t HttpMsgHeadShared::get_file_cache_index() +{ + if (file_cache_index_computed) + return file_cache_index; + + if (source_id == SRC_SERVER) + { + if ((request != nullptr) and (request->get_http_uri() != nullptr)) + { + const Field& abs_path = request->get_http_uri()->get_abs_path(); + if (abs_path.length() > 0) + { + file_cache_index = str_to_hash(abs_path.start(), abs_path.length()); + } + } + } + else + { + const Field& cd_filename = get_content_disposition_filename(); + if (cd_filename.length() > 0) + file_cache_index = str_to_hash(cd_filename.start(), cd_filename.length()); + } + file_cache_index_computed = true; + + return file_cache_index; +} + +const Field& HttpMsgHeadShared::get_content_disposition_filename() +{ + if (content_disposition_filename.length() == STAT_NOT_COMPUTE) + extract_filename_from_content_disposition(); + + return content_disposition_filename; +} + +// Extract the filename from the content-disposition header +void HttpMsgHeadShared::extract_filename_from_content_disposition() +{ + const Field& cont_disp = get_header_value_raw(HEAD_CONTENT_DISPOSITION); + + const uint8_t* cur = cont_disp.start(); + const uint8_t* const end = cont_disp.start() + cont_disp.length(); + const uint8_t* fname_begin = nullptr; + const uint8_t* fname_end = nullptr; + bool char_set = false; + enum { + CD_STATE_START, + CD_STATE_BEFORE_VAL, + CD_STATE_VAL, + CD_STATE_QUOTED_VAL, + CD_STATE_BEFORE_EXT_VAL, + CD_STATE_CHARSET, + CD_STATE_LANGUAGE, + CD_STATE_EXT_VAL, + CD_STATE_FINAL, + CD_STATE_PROBLEM + }; + const char *cd_file1 = "filename"; + const char *cd_file2 = "filename*"; + const char* filename_str_start = nullptr; + + if (cont_disp.length() <= 0) + { + content_disposition_filename.set(STAT_NOT_PRESENT); + return; + } + uint8_t state = CD_STATE_START; + + while (cur < end and state != CD_STATE_PROBLEM) + { + switch (state) + { + case CD_STATE_START: + if ((filename_str_start = SnortStrcasestr((const char*)cur, end - cur, cd_file2))) + { + state = CD_STATE_BEFORE_EXT_VAL; + cur = (const uint8_t*)filename_str_start + strlen(cd_file2) - 1; + } + else if ((filename_str_start = SnortStrcasestr((const char*)cur, end - cur, + cd_file1))) + { + state = CD_STATE_BEFORE_VAL; + cur = (const uint8_t*)filename_str_start + strlen(cd_file1) - 1; + } + else + { + content_disposition_filename.set(STAT_NOT_PRESENT); + return; + } + break; + case CD_STATE_BEFORE_VAL: + if (*cur == '=') + state = CD_STATE_VAL; + else if (*cur != ' ') + state = CD_STATE_START; + break; + case CD_STATE_VAL: + if (!fname_begin && *cur == '"') + state = CD_STATE_QUOTED_VAL; + else if (*cur == ';' || *cur == '\r' || *cur == '\n' || *cur == ' ' || *cur == '\t') + { + if (fname_begin) + { + fname_end = cur; + state = CD_STATE_FINAL; + } + } + else if (!fname_begin) + fname_begin = cur; + break; + case CD_STATE_QUOTED_VAL: + if (!fname_begin) + fname_begin = cur; + if (*cur == '"' ) + { + fname_end = cur; + state = CD_STATE_FINAL; + } + break; + case CD_STATE_BEFORE_EXT_VAL: + if (*cur == '=') + state = CD_STATE_CHARSET; + else if (*cur != ' ') + state = CD_STATE_START; + break; + case CD_STATE_CHARSET: + if (*cur == '\'') + { + if (!char_set) + { + state = CD_STATE_PROBLEM; + break; + } + else + state = CD_STATE_LANGUAGE; + } + else if (!char_set) + { + // Ignore space before the ext-value + while (cur < end && *cur == ' ') + cur++; + if (cur < end) + { + if (!strncasecmp((const char*) cur, "UTF-8", 5)) + cur += 5; + else if (!strncasecmp((const char*) cur, "ISO-8859-1", 10)) + cur += 10; + else if (!strncasecmp((const char*) cur, "mime-charset", 12)) + cur += 12; + else + { + state = CD_STATE_PROBLEM; + break; + } + char_set = true; + continue; + } + } + else + { + state = CD_STATE_PROBLEM; + break; + } + break; + case CD_STATE_LANGUAGE: + if (*cur == '\'') + state = CD_STATE_EXT_VAL; + break; + case CD_STATE_EXT_VAL: + if(*cur == ';' || *cur == '\r' || *cur == '\n' || *cur == ' ' || *cur == '\t') + { + fname_end = cur; + state = CD_STATE_FINAL; + } + else if (!fname_begin) + fname_begin = cur; + break; + case CD_STATE_FINAL: + if (fname_begin && fname_end) + { + content_disposition_filename.set(fname_end-fname_begin, fname_begin); + return; + } + // fallthrough + default: + state = CD_STATE_PROBLEM; + break; + } + cur++; + } + switch (state) + { + case CD_STATE_FINAL: + case CD_STATE_VAL: + case CD_STATE_EXT_VAL: + if (fname_begin) + { + if (!fname_end) + fname_end = end; + content_disposition_filename.set(fname_end-fname_begin, fname_begin); + break; + } + // fallthrough + default: + add_infraction(INF_MALFORMED_CD_FILENAME); + create_event(EVENT_MALFORMED_CD_FILENAME); + content_disposition_filename.set(STAT_PROBLEMATIC); + break; + } +} + #ifdef REG_TEST void HttpMsgHeadShared::print_headers(FILE* output) { diff --git a/src/service_inspectors/http_inspect/http_msg_head_shared.h b/src/service_inspectors/http_inspect/http_msg_head_shared.h index 6016b842c..f8aebc146 100644 --- a/src/service_inspectors/http_inspect/http_msg_head_shared.h +++ b/src/service_inspectors/http_inspect/http_msg_head_shared.h @@ -52,6 +52,11 @@ public: static const StrCode charset_code_list[]; static const StrCode charset_code_opt_list[]; + // The file_cache_index is used along with the source ip and destination ip to cache file + // verdicts. + uint64_t get_file_cache_index(); + const Field& get_content_disposition_filename(); + protected: HttpMsgHeadShared(const uint8_t* buffer, const uint16_t buf_size, HttpFlowData* session_data_, HttpCommon::SourceId source_id_, bool buf_owner, snort::Flow* flow_, @@ -120,6 +125,11 @@ private: int32_t num_headers = HttpCommon::STAT_NOT_COMPUTE; std::bitset headers_present = 0; + + void extract_filename_from_content_disposition(); + Field content_disposition_filename; + uint64_t file_cache_index = 0; + bool file_cache_index_computed = false; }; #endif diff --git a/src/service_inspectors/http_inspect/http_msg_header.cc b/src/service_inspectors/http_inspect/http_msg_header.cc index b8d5dac74..ecfda91a2 100644 --- a/src/service_inspectors/http_inspect/http_msg_header.cc +++ b/src/service_inspectors/http_inspect/http_msg_header.cc @@ -21,22 +21,24 @@ #include "config.h" #endif -#include - #include "http_msg_header.h" +#include + #include "decompress/file_decomp.h" #include "file_api/file_flows.h" #include "file_api/file_service.h" +#include "hash/hash_key_operations.h" +#include "pub_sub/http_events.h" +#include "service_inspectors/http2_inspect/http2_flow_data.h" +#include "sfip/sf_ip.h" + #include "http_api.h" #include "http_common.h" #include "http_enum.h" #include "http_inspect.h" #include "http_msg_request.h" #include "http_msg_body.h" -#include "pub_sub/http_events.h" -#include "service_inspectors/http2_inspect/http2_flow_data.h" -#include "sfip/sf_ip.h" using namespace snort; using namespace HttpCommon; @@ -417,9 +419,8 @@ void HttpMsgHeader::setup_file_processing() return; } - // Generate the unique file id for file processing - transaction->set_file_processing_id(source_id, get_transaction_id(), - get_h2_stream_id(source_id)); + // Generate the unique file id for multi file processing + set_multi_file_processing_id(get_transaction_id(), get_h2_stream_id(source_id)); // Do we meet all the conditions for MIME file processing? if (source_id == SRC_CLIENT) @@ -430,7 +431,7 @@ void HttpMsgHeader::setup_file_processing() if (boundary_present(content_type)) { session_data->mime_state[source_id] = new MimeSession(&FileService::decode_conf, - &mime_conf, transaction->get_file_processing_id(source_id), true); + &mime_conf, get_multi_file_processing_id(), true); // Show file processing the Content-Type header as if it were regular data. // This will enable it to find the boundary string. // FIXIT-L develop a proper interface for passing the boundary string. @@ -595,6 +596,22 @@ void HttpMsgHeader::setup_file_decompression() (void)File_Decomp_Init(session_data->fd_state); } +// Each file processed has a unique id per flow: hash(source_id, transaction_id, h2_stream_id) +// If this is an HTTP/1 flow, h2_stream_id is 0 +void HttpMsgHeader::set_multi_file_processing_id(const uint64_t transaction_id, + const uint32_t stream_id) +{ + const int data_len = sizeof(source_id) + sizeof(transaction_id) + sizeof(stream_id); + uint8_t data[data_len]; + memcpy(data, (void*)&source_id, sizeof(source_id)); + uint32_t offset = sizeof(source_id); + memcpy(data + offset, (void*)&transaction_id, sizeof(transaction_id)); + offset += sizeof(transaction_id); + memcpy(data + offset, (void*)&stream_id, sizeof(stream_id)); + + multi_file_processing_id = str_to_hash(data, data_len); +} + #ifdef REG_TEST void HttpMsgHeader::print_section(FILE* output) { diff --git a/src/service_inspectors/http_inspect/http_msg_header.h b/src/service_inspectors/http_inspect/http_msg_header.h index b09c2de6b..056f195a3 100644 --- a/src/service_inspectors/http_inspect/http_msg_header.h +++ b/src/service_inspectors/http_inspect/http_msg_header.h @@ -46,6 +46,11 @@ public: const Field& get_true_ip(); const Field& get_true_ip_addr(); + // The multi_file_processing_id is unique for each file transferred within a single connection + // and is used by file processing to store partially processed file contexts in the flow data. + void set_multi_file_processing_id(const uint64_t transaction_id, const uint32_t stream_id); + uint64_t get_multi_file_processing_id() { return multi_file_processing_id; } + private: void prepare_body(); void setup_file_processing(); @@ -59,6 +64,8 @@ private: Field true_ip; Field true_ip_addr; + uint64_t multi_file_processing_id = 0; + #ifdef REG_TEST void print_section(FILE* output) override; #endif diff --git a/src/service_inspectors/http_inspect/http_stream_splitter_finish.cc b/src/service_inspectors/http_inspect/http_stream_splitter_finish.cc index 073bf844c..c53993e6d 100644 --- a/src/service_inspectors/http_inspect/http_stream_splitter_finish.cc +++ b/src/service_inspectors/http_inspect/http_stream_splitter_finish.cc @@ -28,6 +28,7 @@ #include "http_enum.h" #include "http_inspect.h" #include "http_module.h" +#include "http_msg_header.h" #include "http_msg_request.h" #include "http_stream_splitter.h" #include "http_test_input.h" @@ -129,16 +130,12 @@ bool HttpStreamSplitter::finish(Flow* flow) const FileDirection dir = (source_id == SRC_SERVER) ? FILE_DOWNLOAD : FILE_UPLOAD; - size_t file_index = 0; - assert(session_data->transaction[source_id] != nullptr); - HttpMsgRequest* request = session_data->transaction[source_id]->get_request(); - if ((request != nullptr) and (request->get_http_uri() != nullptr)) - { - file_index = request->get_http_uri()->get_file_proc_hash(); - } - const uint64_t file_processing_id = - session_data->transaction[source_id]->get_file_processing_id(source_id); + HttpMsgHeader* header = session_data->transaction[source_id]->get_header(source_id); + assert(header); + + uint64_t file_index = header->get_file_cache_index(); + const uint64_t file_processing_id = header->get_multi_file_processing_id(); file_flows->file_process(packet, file_index, nullptr, 0, 0, dir, file_processing_id, SNORT_FILE_END); #ifdef REG_TEST diff --git a/src/service_inspectors/http_inspect/http_tables.cc b/src/service_inspectors/http_inspect/http_tables.cc index 6b69a5dae..6d17143a0 100644 --- a/src/service_inspectors/http_inspect/http_tables.cc +++ b/src/service_inspectors/http_inspect/http_tables.cc @@ -138,6 +138,7 @@ const StrCode HttpMsgHeadShared::header_list[] = { HEAD_CONTENT_TRANSFER_ENCODING, "content-transfer-encoding" }, { HEAD_MIME_VERSION, "mime-version" }, { HEAD_PROXY_AGENT, "proxy-agent" }, + { HEAD_CONTENT_DISPOSITION, "content-disposition" }, { 0, nullptr } }; @@ -273,6 +274,7 @@ const HeaderNormalizer* const HttpMsgHeadShared::header_norms[HEAD__MAX_VALUE] = &NORMALIZER_TOKEN_LIST, // HEAD_CONTENT_TRANSFER_ENCODING &NORMALIZER_BASIC, // HEAD_MIME_VERSION &NORMALIZER_BASIC, // HEAD_PROXY_AGENT + &NORMALIZER_BASIC, // HEAD_CONTENT_DISPOSITION }; const RuleMap HttpModule::http_events[] = @@ -392,6 +394,7 @@ const RuleMap HttpModule::http_events[] = { EVENT_200_CONNECT_RESP_WITH_TE, "HTTP CONNECT 2XX response with Transfer-Encoding header" }, { EVENT_100_CONNECT_RESP, "HTTP CONNECT response with 1XX status code" }, { EVENT_EARLY_CONNECT_RESPONSE, "HTTP CONNECT response before request message completed" }, + { EVENT_MALFORMED_CD_FILENAME, "malformed HTTP Content-Disposition filename parameter" }, { 0, nullptr } }; diff --git a/src/service_inspectors/http_inspect/http_transaction.cc b/src/service_inspectors/http_inspect/http_transaction.cc index 180ec8e89..e31e6f4a7 100644 --- a/src/service_inspectors/http_inspect/http_transaction.cc +++ b/src/service_inspectors/http_inspect/http_transaction.cc @@ -32,8 +32,6 @@ #include "http_msg_status.h" #include "http_msg_trailer.h" -#include "hash/hash_key_operations.h" - using namespace HttpCommon; using namespace HttpEnums; using namespace snort; @@ -254,17 +252,3 @@ void HttpTransaction::set_one_hundred_response() one_hundred_response = true; second_response_expected = true; } - -void HttpTransaction::set_file_processing_id(const SourceId source_id, - const uint64_t transaction_id, const uint32_t stream_id) -{ - const int data_len = sizeof(source_id) + sizeof(transaction_id) + sizeof(stream_id); - uint8_t data[data_len]; - memcpy(data, (void*)&source_id, sizeof(source_id)); - uint32_t offset = sizeof(source_id); - memcpy(data + offset, (void*)&transaction_id, sizeof(transaction_id)); - offset += sizeof(transaction_id); - memcpy(data + offset, (void*)&stream_id, sizeof(stream_id)); - - file_processing_id[source_id] = str_to_hash(data, data_len); -} diff --git a/src/service_inspectors/http_inspect/http_transaction.h b/src/service_inspectors/http_inspect/http_transaction.h index fd14372ce..aa9fc19aa 100644 --- a/src/service_inspectors/http_inspect/http_transaction.h +++ b/src/service_inspectors/http_inspect/http_transaction.h @@ -68,13 +68,6 @@ public: HttpTransaction* next = nullptr; - // Each file processed has a unique id per flow: hash(source_id, transaction_id, h2_stream_id) - // If this is an HTTP/1 flow, h2_stream_id is 0 - void set_file_processing_id(const HttpCommon::SourceId source_id, - const uint64_t transaction_id, const uint32_t stream_id); - uint64_t get_file_processing_id(HttpCommon::SourceId source_id) - { return file_processing_id[source_id]; } - private: HttpTransaction(HttpFlowData* session_data_) : session_data(session_data_) { @@ -95,8 +88,6 @@ private: HttpMsgSection* discard_list = nullptr; HttpInfractions* infractions[2]; - uint64_t file_processing_id[2] = { 0, 0 }; - bool response_seen = false; bool one_hundred_response = false; bool second_response_expected = false; diff --git a/src/service_inspectors/http_inspect/http_uri.cc b/src/service_inspectors/http_inspect/http_uri.cc index f92cb465e..77ee45b1e 100644 --- a/src/service_inspectors/http_inspect/http_uri.cc +++ b/src/service_inspectors/http_inspect/http_uri.cc @@ -342,19 +342,6 @@ void HttpUri::normalize() } } -size_t HttpUri::get_file_proc_hash() -{ - if (abs_path_hash) - return abs_path_hash; - - if (abs_path.length() > 0 ) - { - abs_path_hash = str_to_hash(abs_path.start(), abs_path.length()); - } - - return abs_path_hash; -} - const Field& HttpUri::get_norm_host() { if (host_norm.length() != STAT_NOT_COMPUTE) diff --git a/src/service_inspectors/http_inspect/http_uri.h b/src/service_inspectors/http_inspect/http_uri.h index c8e5f847f..866538865 100644 --- a/src/service_inspectors/http_inspect/http_uri.h +++ b/src/service_inspectors/http_inspect/http_uri.h @@ -55,7 +55,6 @@ public: const Field& get_norm_query() { return query_norm; } const Field& get_norm_fragment() { return fragment_norm; } const Field& get_norm_classic() { return classic_norm; } - size_t get_file_proc_hash(); private: const Field uri; @@ -76,7 +75,6 @@ private: Field classic_norm; HttpInfractions* const infractions; HttpEventGen* const events; - size_t abs_path_hash = 0; HttpEnums::UriType uri_type = HttpEnums::URI__NOT_COMPUTE; const HttpEnums::MethodId method_id; const HttpParaList::UriParam& uri_param;