unified2 is currently the best logger for serializing various data like
events and packets and is the only Logger supporting extra data fields.
-Currently only the SMTP and HTTP inspectors produce exta data.
+Currently only the SMTP and HTTP inspectors produce extra data.
There is separate utility called u2spewfoo provided under tools/ that can
dump the binary u2 log in text format.
#define BIT(i) (0x1 << (i-1))
+inline void SetExtraData(Packet* p, const uint32_t xid) { p->xtradata_mask |= BIT(xid); }
+
inline uint16_t extract_16bits(const uint8_t* const p)
{ return ntohs(*(uint16_t*)(p)); }
(splitter) accepts TCP payload data from Stream and subdivides it into message sections.
HttpInspect (inspector) processes individual message sections.
-Unlike other inspectors HI has an empty eval() member. All processing is done by the inspector
-process() member which is called directly from splitter reassemble(). Thus the data flow for
-processing a message section is one or more calls to splitter scan(), followed by one or more calls
-to splitter reassemble(), the last of which calls process(). The reassembled buffer returned to the
-framework is already ready for detection and the subsequent eval() call does nothing.
-
Splitter finish() is called by the framework when the TCP connection closes (including pruning).
It serves several specialized purposes in cases where the HTTP message is truncated (ends
unexpectedly).
extern const bool is_sp_tab_cr_lf_vt_ff[256];
extern const bool is_sp_tab_quote_dquote[256];
extern const bool is_print_char[256]; // printable includes SP, tab, CR, LF
+extern const bool is_sp_comma[256];
} // end namespace HttpEnums
#endif
#include "http_msg_status.h"
#include "http_msg_trailer.h"
#include "http_test_manager.h"
+#include "log/unified2.h"
#include "protocols/packet.h"
+#include "stream/stream.h"
using namespace HttpEnums;
+uint32_t HttpInspect::xtra_trueip_id;
+uint32_t HttpInspect::xtra_uri_id;
+uint32_t HttpInspect::xtra_host_id;
+uint32_t HttpInspect::xtra_jsnorm_id;
+
HttpInspect::HttpInspect(const HttpParaList* params_) : params(params_)
{
#ifdef REG_TEST
{
if (params->js_norm_param.normalize_javascript)
params->js_norm_param.js_norm->configure();
+
+ xtra_trueip_id = Stream::reg_xtra_data_cb(get_xtra_trueip);
+ xtra_uri_id = Stream::reg_xtra_data_cb(get_xtra_uri);
+ xtra_host_id = Stream::reg_xtra_data_cb(get_xtra_host);
+ xtra_jsnorm_id = Stream::reg_xtra_data_cb(get_xtra_jsnorm);
+
return true;
}
return get_buf(ibt, p, b);
}
+int HttpInspect::get_xtra_trueip(Flow* flow, uint8_t** buf, uint32_t* len, uint32_t* type)
+{
+ const HttpFlowData* const session_data =
+ (HttpFlowData*)flow->get_flow_data(HttpFlowData::inspector_id);
+
+ if ((session_data == nullptr) || (session_data->latest_section == nullptr))
+ return 0;
+
+ const HttpTransaction* const transaction = session_data->latest_section->get_transaction();
+ HttpMsgHeader* const req_header = transaction->get_header(SRC_CLIENT);
+ if (req_header == nullptr)
+ return 0;
+ const Field& true_ip = req_header->get_true_ip();
+ if (true_ip.length() <= 0)
+ return 0;
+
+ *buf = const_cast<uint8_t*>(true_ip.start());
+ *len = true_ip.length();
+ *type = (*len == 4) ? EVENT_INFO_XFF_IPV4 : EVENT_INFO_XFF_IPV6;
+ return 1;
+}
+
+int HttpInspect::get_xtra_uri(Flow* flow, uint8_t** buf, uint32_t* len, uint32_t* type)
+{
+ const HttpFlowData* const session_data =
+ (HttpFlowData*)flow->get_flow_data(HttpFlowData::inspector_id);
+
+ if ((session_data == nullptr) || (session_data->latest_section == nullptr))
+ return 0;
+
+ const HttpTransaction* const transaction = session_data->latest_section->get_transaction();
+ HttpMsgRequest* const request = transaction->get_request();
+ if (request == nullptr)
+ return 0;
+ const Field& uri = request->get_uri();
+ if (uri.length() <= 0)
+ return 0;
+
+ *buf = const_cast<uint8_t*>(uri.start());
+ *len = uri.length();
+ *type = EVENT_INFO_HTTP_URI;
+
+ return 1;
+}
+
+int HttpInspect::get_xtra_host(Flow* flow, uint8_t** buf, uint32_t* len, uint32_t* type)
+{
+ const HttpFlowData* const session_data =
+ (HttpFlowData*)flow->get_flow_data(HttpFlowData::inspector_id);
+
+ if ((session_data == nullptr) || (session_data->latest_section == nullptr))
+ return 0;
+
+ const HttpTransaction* const transaction = session_data->latest_section->get_transaction();
+ HttpMsgHeader* const req_header = transaction->get_header(SRC_CLIENT);
+ if (req_header == nullptr)
+ return 0;
+ const Field& host = req_header->get_header_value_norm(HEAD_HOST);
+ if (host.length() <= 0)
+ return 0;
+
+ *buf = const_cast<uint8_t*>(host.start());
+ *len = host.length();
+ *type = EVENT_INFO_HTTP_HOSTNAME;
+
+ return 1;
+}
+
+// The name of this method reflects its legacy purpose. We actually return the normalized data
+// from a response message body which may include other forms of normalization in addition to
+// JavaScript normalization. But if you don't turn JavaScript normalization on you get nothing.
+int HttpInspect::get_xtra_jsnorm(Flow* flow, uint8_t** buf, uint32_t* len, uint32_t* type)
+{
+ const HttpFlowData* const session_data =
+ (HttpFlowData*)flow->get_flow_data(HttpFlowData::inspector_id);
+
+ if ((session_data == nullptr) || (session_data->latest_section == nullptr) ||
+ (session_data->latest_section->get_source_id() != SRC_SERVER) ||
+ !session_data->latest_section->get_params()->js_norm_param.normalize_javascript)
+ return 0;
+
+ const HttpTransaction* const transaction = session_data->latest_section->get_transaction();
+ HttpMsgBody* const body = transaction->get_body();
+ if (body == nullptr)
+ return 0;
+ assert((void*)body == (void*)session_data->latest_section);
+ const Field& detect_data = body->get_detect_data();
+ if (detect_data.length() <= 0)
+ return 0;
+
+ *buf = const_cast<uint8_t*>(detect_data.start());
+ *len = detect_data.length();
+ *type = EVENT_INFO_JSNORM_DATA;
+
+ return 1;
+}
+
void HttpInspect::eval(Packet* p)
{
const SourceId source_id = p->is_from_client() ? SRC_CLIENT : SRC_SERVER;
}
}
#endif
+
+ // Whenever we process a packet we set these flags. If someone asks for an extra data
+ // buffer the JIT code will figure out if we actually have it.
+ SetExtraData(p, xtra_trueip_id);
+ SetExtraData(p, xtra_uri_id);
+ SetExtraData(p, xtra_host_id);
+ SetExtraData(p, xtra_jsnorm_id);
}
bool HttpInspect::process(const uint8_t* data, const uint16_t dsize, Flow* const flow,
// HttpInspect class
//-------------------------------------------------------------------------
-#include "log/messages.h"
-
#include "http_enum.h"
#include "http_field.h"
#include "http_module.h"
#include "http_msg_section.h"
#include "http_stream_splitter.h"
+#include "log/messages.h"
class HttpApi;
}
static HttpEnums::InspectSection get_latest_is(const Packet* p);
+ // Callbacks that provide "extra data"
+ static int get_xtra_trueip(Flow*, uint8_t**, uint32_t*, uint32_t*);
+ static int get_xtra_uri(Flow*, uint8_t**, uint32_t*, uint32_t*);
+ static int get_xtra_host(Flow*, uint8_t** buf, uint32_t* len, uint32_t* type);
+ static int get_xtra_jsnorm(Flow*, uint8_t**, uint32_t*, uint32_t*);
+
private:
friend HttpApi;
friend HttpStreamSplitter;
static HttpEnums::SourceId get_latest_src(const Packet* p);
const HttpParaList* const params;
+
+ // Registrations for "extra data"
+ static uint32_t xtra_trueip_id;
+ static uint32_t xtra_uri_id;
+ static uint32_t xtra_host_id;
+ static uint32_t xtra_jsnorm_id;
};
#endif
{
if (request != nullptr)
{
- const Field& tranaction_uri = request->get_uri_norm_classic();
- if (tranaction_uri.length() > 0)
+ const Field& transaction_uri = request->get_uri_norm_classic();
+ if (transaction_uri.length() > 0)
{
- file_flows->set_file_name(tranaction_uri.start(), tranaction_uri.length());
+ file_flows->set_file_name(transaction_uri.start(),
+ transaction_uri.length());
}
}
}
{ return detection_section ? HttpEnums::IS_DETECTION : HttpEnums::IS_BODY; }
bool detection_required() const override;
const Field& get_classic_client_body();
+ const Field& get_detect_data() { return detect_data; }
static void fd_event_callback(void* context, int event);
protected:
#include "http_msg_header.h"
+#include "decompress/file_decomp.h"
#include "file_api/file_flows.h"
#include "file_api/file_service.h"
-#include "pub_sub/http_events.h"
-#include "decompress/file_decomp.h"
-
#include "http_api.h"
#include "http_msg_request.h"
#include "http_msg_body.h"
+#include "pub_sub/http_events.h"
+#include "sfip/sf_ip.h"
using namespace HttpEnums;
}
}
+const Field& HttpMsgHeader::get_true_ip()
+{
+ if (true_ip.length() != STAT_NOT_COMPUTE)
+ return true_ip;
+
+ const Field* header_to_use;
+ const Field& xff = get_header_value_norm(HEAD_X_FORWARDED_FOR);
+ if (xff.length() > 0)
+ header_to_use = &xff;
+ else
+ {
+ const Field& tcip = get_header_value_norm(HEAD_TRUE_CLIENT_IP);
+ if (tcip.length() > 0)
+ header_to_use = &tcip;
+ else
+ {
+ true_ip.set(STAT_NOT_PRESENT);
+ return true_ip;
+ }
+ }
+
+ // This is potentially a comma-separated list of IP addresses. Just take the first one in
+ // the list. Since this is a normalized header field any whitespace will be an actual space.
+ uint8_t* addr_str = new uint8_t[header_to_use->length()+1];
+ int32_t length;
+ for (length = 0; length < header_to_use->length(); length++)
+ {
+ if (is_sp_comma[header_to_use->start()[length]])
+ break;
+ addr_str[length] = header_to_use->start()[length];
+ }
+ addr_str[length] = '\0';
+
+ SfIp tmp_sfip;
+ const SfIpRet status = tmp_sfip.set((char*)addr_str);
+ delete[] addr_str;
+ if (status != SFIP_SUCCESS)
+ {
+ true_ip.set(STAT_PROBLEMATIC);
+ }
+ else
+ {
+ const size_t addr_length = (tmp_sfip.is_ip6() ? 4 : 1);
+ uint32_t* const addr_buf = new uint32_t[addr_length];
+ memcpy(addr_buf, tmp_sfip.get_ptr(), addr_length * sizeof(uint32_t));
+ true_ip.set(addr_length * sizeof(uint32_t), (uint8_t*)addr_buf, true);
+ }
+ return true_ip;
+}
+
void HttpMsgHeader::gen_events()
{
if ((get_header_count(HEAD_CONTENT_LENGTH) > 0) &&
#include "file_api/file_api.h"
#include "http_enum.h"
+#include "http_field.h"
#include "http_msg_head_shared.h"
//-------------------------------------------------------------------------
void update_flow() override;
void gen_events() override;
void publish() override;
+ const Field& get_true_ip();
private:
// Dummy configurations to support MIME processing
bool detection_section = true;
+ Field true_ip;
+
#ifdef REG_TEST
void print_section(FILE* output) override;
#endif
{ return HttpEnums::IS_NONE; }
virtual bool detection_required() const;
HttpEnums::SourceId get_source_id() const { return source_id; }
+ HttpTransaction* get_transaction() const { return transaction; }
+ const HttpParaList* get_params() const { return params; }
// Minimum necessary processing for every message
virtual void analyze() = 0;
false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false
};
+const bool HttpEnums::is_sp_comma[256] =
+{
+ false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+
+ true, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false,
+ false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+
+ false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+
+ false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+
+ false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+
+ false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+
+ false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+
+ false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false
+};
+
const bool HttpEnums::is_print_char[256] =
{
false, false, false, false, false, false, false, false, false, true, true, false, false, true, false, false,