From: Oleksii Shumeiko -X (oshumeik - SOFTSERVE INC at Cisco) Date: Tue, 29 Nov 2022 13:56:17 +0000 (+0000) Subject: Pull request #3681: js_norm: implement Enhanced JS Normalization for PDF X-Git-Tag: 3.1.48.0~7 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=89ed79998b4411cbeb9d4762858bdfa8a260e64e;p=thirdparty%2Fsnort3.git Pull request #3681: js_norm: implement Enhanced JS Normalization for PDF Merge in SNORT/snort3 from ~OSERHIIE/snort3:js_pdf to master Squashed commit of the following: commit 343d3c517880d059532dfc803feae254ef491cd7 Author: Oleksii Shumeiko Date: Tue Nov 8 17:53:51 2022 +0200 js_norm: implement Enhanced JS Normalization for PDF * js_norm: implement JS extractor from PDF * js_norm: add unit tests for JS extractor from PDF * js_norm: update dev_notes * http_inspect: implement JS from PDF normalizer * http_inspect: update dev_notes --- diff --git a/src/js_norm/CMakeLists.txt b/src/js_norm/CMakeLists.txt index 7128c458b..15aa9c20c 100644 --- a/src/js_norm/CMakeLists.txt +++ b/src/js_norm/CMakeLists.txt @@ -1,9 +1,15 @@ +FLEX ( pdf_tokenizer + ${CMAKE_CURRENT_SOURCE_DIR}/pdf_tokenizer.l + ${CMAKE_CURRENT_BINARY_DIR}/pdf_tokenizer.cc +) + FLEX ( js_tokenizer ${CMAKE_CURRENT_SOURCE_DIR}/js_tokenizer.l ${CMAKE_CURRENT_BINARY_DIR}/js_tokenizer.cc ) set ( JS_SOURCES + ${pdf_tokenizer_OUTPUTS} ${js_tokenizer_OUTPUTS} js_config.h js_enum.h @@ -16,6 +22,7 @@ set ( JS_SOURCES js_normalizer.cc js_normalizer.h js_tokenizer.h + pdf_tokenizer.h ) add_library(js_norm OBJECT ${JS_SOURCES}) diff --git a/src/js_norm/dev_notes.txt b/src/js_norm/dev_notes.txt index 6196aabe6..0e0a6b94c 100644 --- a/src/js_norm/dev_notes.txt +++ b/src/js_norm/dev_notes.txt @@ -12,7 +12,7 @@ So, the number of unique identifiers available is 65536 names per transaction. If Normalizer overruns the configured limit, built-in alert is generated. A config option to set the limit manually: - * js_norm.identifier_depth. + * js_norm.identifier_depth Identifiers from the ident_ignore list will be placed as is, without substitution. Starting with the listed identifier, any chain of dot accessors, brackets and function calls will be kept @@ -129,3 +129,12 @@ Verbosity levels: 4. Temporary buffer (debug build only) 5. Matched token (debug build only) 6. Identifier substitution (debug build only) + +PDF parser follows "PDF 32000-1:2008 First Edition 2008-7-1 Document +management Portable document format Part 1: PDF 1.7". +Known limitations: +* Nested dictionaries are not fully supported. Properties of the last object +are tracked. Once the nested object ends, it clears all info about the object +type. +* Nested dictionaries are not allowed in JavaScript-type dictionary. +* Stream objects are ignored. diff --git a/src/js_norm/js_enum.h b/src/js_norm/js_enum.h index 8f25b66ff..8df38525c 100644 --- a/src/js_norm/js_enum.h +++ b/src/js_norm/js_enum.h @@ -30,7 +30,9 @@ static constexpr unsigned js_gid = 154; enum { TRACE_PROC = 0, - TRACE_DUMP + TRACE_DUMP, + TRACE_PDF_PROC, + TRACE_PDF_DUMP }; // This enum must be synchronized with JSNormModule::peg_names[] in js_norm_module.cc diff --git a/src/js_norm/js_norm.cc b/src/js_norm/js_norm.cc index e21ba8bb4..a7e023002 100644 --- a/src/js_norm/js_norm.cc +++ b/src/js_norm/js_norm.cc @@ -23,9 +23,11 @@ #include "js_norm.h" +#include "log/messages.h" +#include "trace/trace_api.h" + #include "js_identifier_ctx.h" #include "js_normalizer.h" - #include "js_norm_module.h" using namespace jsn; @@ -110,14 +112,21 @@ void JSNorm::normalize(const void* in_data, size_t in_len, const void*& data, si } pdu_cnt = 0; + const Packet* packet = DetectionEngine::get_current_packet(); src_ptr = (const uint8_t*)in_data; src_end = src_ptr + in_len; while (alive and pre_proc()) { + trace_logf(3, js_trace, TRACE_DUMP, packet, + "original[%zu]: %.*s\n", src_end - src_ptr, (int)(src_end - src_ptr), src_ptr); + auto ret = jsn_ctx->normalize((const char*)src_ptr, src_end - src_ptr, ext_script_type); const uint8_t* next = (const uint8_t*)jsn_ctx->get_src_next(); + trace_logf(3, js_trace, TRACE_PROC, packet, + "normalizer returned with %d '%s'\n", ret, jsn::ret2str(ret)); + JSNormModule::increment_peg_counts(PEG_BYTES, next - src_ptr); src_ptr = next; diff --git a/src/js_norm/js_norm_module.cc b/src/js_norm/js_norm_module.cc index af573ba37..01d241e6b 100644 --- a/src/js_norm/js_norm_module.cc +++ b/src/js_norm/js_norm_module.cc @@ -79,8 +79,12 @@ const Parameter JSNormModule::params[] = static const TraceOption trace_options[] = { - { "proc", TRACE_PROC, "enable processing logging" }, - { "dump", TRACE_DUMP, "enable data logging" }, + { "proc", TRACE_PROC, "enable processing logging" }, + { "dump", TRACE_DUMP, "enable data logging" }, +#ifdef DEBUG_MSGS + { "pdf_proc", TRACE_PDF_PROC, "enable processing logging for PDF extractor" }, + { "pdf_dump", TRACE_PDF_DUMP, "enable data logging for PDF extractor" }, +#endif { nullptr, 0, nullptr } }; diff --git a/src/js_norm/js_normalizer.cc b/src/js_norm/js_normalizer.cc index 5407ee2ba..72df7eb10 100644 --- a/src/js_norm/js_normalizer.cc +++ b/src/js_norm/js_normalizer.cc @@ -24,6 +24,15 @@ #include "js_normalizer.h" #include "js_norm/js_enum.h" +#include "log/messages.h" +#include "trace/trace_api.h" + +namespace snort +{ +class Trace; +} + +extern THREAD_LOCAL const snort::Trace* js_trace; #define BUFF_EXP_FACTOR 1.3 diff --git a/src/js_norm/js_tokenizer.h b/src/js_norm/js_tokenizer.h index f5c97de0f..8eba08683 100644 --- a/src/js_norm/js_tokenizer.h +++ b/src/js_norm/js_tokenizer.h @@ -25,11 +25,6 @@ #include #include -#include "log/messages.h" -#include "trace/trace_api.h" - -extern THREAD_LOCAL const snort::Trace* js_trace; - // The longest pattern has 9 characters " < / s c r i p t > ", // 8 of them can reside in 1st chunk // Each character in the identifier forms its own group (pattern matching case), diff --git a/src/js_norm/js_tokenizer.l b/src/js_norm/js_tokenizer.l index d368dabc6..77214d6ca 100644 --- a/src/js_norm/js_tokenizer.l +++ b/src/js_norm/js_tokenizer.l @@ -20,6 +20,7 @@ %option c++ %option yyclass="JSTokenizer" +%option prefix="js" %option align full 8bit batch never-interactive %option noinput nounput noyywrap %option noyy_push_state noyy_pop_state noyy_top_state @@ -37,8 +38,12 @@ #include "js_norm/js_enum.h" #include "js_norm/js_identifier_ctx.h" #include "js_norm/js_tokenizer.h" +#include "log/messages.h" +#include "trace/trace_api.h" #include "utils/util_cstring.h" +extern THREAD_LOCAL const snort::Trace* js_trace; + using namespace jsn; #define YY_NO_UNPUT diff --git a/src/js_norm/pdf_tokenizer.h b/src/js_norm/pdf_tokenizer.h new file mode 100644 index 000000000..1b80be7da --- /dev/null +++ b/src/js_norm/pdf_tokenizer.h @@ -0,0 +1,127 @@ +//-------------------------------------------------------------------------- +// Copyright (C) 2022-2022 Cisco and/or its affiliates. All rights reserved. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License Version 2 as published +// by the Free Software Foundation. You may not use, modify or distribute +// this program under any other version of the GNU General Public License. +// +// This program is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this program; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +//-------------------------------------------------------------------------- +// pdf_tokenizer.h author Cisco + +#ifndef PDF_TOKENIZER_H +#define PDF_TOKENIZER_H + +#include +#include +#include +#include +#include + +#define PDFTOKENIZER_NAME_MAX_SIZE 16 + +namespace jsn +{ + +class PDFTokenizer : public yyFlexLexer +{ +public: + enum PDFRet + { + EOS = 0, + NOT_NAME_IN_DICTIONARY_KEY, + INCOMPLETE_ARRAY_IN_DICTIONARY, + MAX + }; + + PDFTokenizer() = delete; + explicit PDFTokenizer(std::istream& in, std::ostream& out); + ~PDFTokenizer() override; + + PDFRet process(); + +private: + int yylex() override; + + PDFRet h_dict_open(); + PDFRet h_dict_close(); + PDFRet h_dict_name(); + PDFRet h_dict_other(); + inline bool h_lit_str(); + inline bool h_hex_str(); + inline bool h_lit_open(); + inline bool h_lit_close(); + PDFRet h_lit_unescape(); + PDFRet h_lit_oct2chr(); + PDFRet h_hex_hex2chr(); + + struct ObjectString + { + void clear() + { parenthesis_level = 0; } + + int parenthesis_level = 0; + }; + + struct ObjectArray + { + void clear() + { nesting_level = 0; } + + int nesting_level = 0; + }; + + struct ObjectDictionary + { + void clear() + { key_value = true; array_level = 0; } + + bool key_value = true; + int array_level = 0; + }; + + struct DictionaryEntry + { + void clear() + { key[0] = '\0'; } + + char key[PDFTOKENIZER_NAME_MAX_SIZE] = {0}; + }; + + ObjectString obj_string; + ObjectArray obj_array; + ObjectDictionary obj_dictionary; + DictionaryEntry obj_entry; +}; + +bool PDFTokenizer::h_lit_str() +{ + return obj_dictionary.array_level == obj_array.nesting_level and !strcmp(obj_entry.key, "/JS"); +} + +bool PDFTokenizer::h_hex_str() +{ + return obj_dictionary.array_level == obj_array.nesting_level and !strcmp(obj_entry.key, "/JS"); +} + +bool PDFTokenizer::h_lit_open() +{ + return ++obj_string.parenthesis_level == 1; +} + +bool PDFTokenizer::h_lit_close() +{ + return --obj_string.parenthesis_level == 0; +} + +} + +#endif diff --git a/src/js_norm/pdf_tokenizer.l b/src/js_norm/pdf_tokenizer.l new file mode 100644 index 000000000..181c21959 --- /dev/null +++ b/src/js_norm/pdf_tokenizer.l @@ -0,0 +1,337 @@ +/*-------------------------------------------------------------------------- +// Copyright (C) 2022-2022 Cisco and/or its affiliates. All rights reserved. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License Version 2 as published +// by the Free Software Foundation. You may not use, modify or distribute +// this program under any other version of the GNU General Public License. +// +// This program is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this program; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +//-------------------------------------------------------------------------- +// pdf_tokenizer.l author Cisco +*/ + +%option c++ +%option yyclass="PDFTokenizer" +%option prefix="pdf" +%option align full 8bit batch never-interactive stack +%option noinput nounput noyywrap noyy_top_state + +%{ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + + +#include +#include +#include + +#include "js_norm/js_enum.h" +#include "js_norm/pdf_tokenizer.h" +#include "log/messages.h" +#include "trace/trace_api.h" + +extern THREAD_LOCAL const snort::Trace* js_trace; + +using namespace jsn; + +#define YY_NO_UNPUT + +#define YY_FATAL_ERROR(msg) { snort::FatalError("%s", msg); } + +#define PUSH(x) yy_push_state(x) +#define POP() yy_pop_state() + +#define YY_USER_ACTION \ + { \ + debug_logf(5, js_trace, TRACE_PDF_PROC, nullptr, \ + "PDF pattern #%d, sc %d\n", yy_act, YY_START); \ + \ + debug_logf(5, js_trace, TRACE_PDF_DUMP, nullptr, \ + "PDF text '%s'\n", YYText()); \ + } + +#define EXEC(f) \ + { \ + auto r = (f); \ + if (r) \ + return r; \ + } + +%} + +/* PDF 32000-1:2008 definitions follow */ + +/* 7.2.2 Character Set */ +CHARS_WHITESPACE \x00\x09\x0a\x0c\x0d\x20 +CHARS_DELIMITER \(\)\<\>\[\]\{\}\/\% +GRP_WHITESPACE [\x00\x09\x0a\x0c\x0d\x20] +GRP_DELIMITER [\(\)\<\>\[\]\{\}\/\%] +GRP_REGULAR [^\x00\x09\x0a\x0c\x0d\x20\(\)\<\>\[\]\{\}\/\%] + +/* 7.2.3 Comments */ +COMMENT %.* + +/* 7.3.2 Boolean Objects */ +OBJ_BOOLEAN true|false + +/* 7.3.3 Numeric Objects */ +OBJ_INT_NUM [+-]?[0-9]{1,64} +OBJ_REL_NUM [+-]?("."?[0-9]{1,64}|[0-9]{1,64}"."?|[0-9]{1,64}"."?[0-9]{1,64}) + +/* 7.3.4 String Objects */ +OBJ_LIT_STR_OPEN "(" +OBJ_LIT_STR_CLOSE ")" +OBJ_HEX_STR_OPEN "<" +OBJ_HEX_STR_CLOSE ">" + +/* 7.3.4.2 Literal Strings */ +LIT_STR_ESC \\[^0-7] +LIT_STR_ESC_OCT \\[0-7]{1}|\\[0-7]{2}|\\[0-7]{3} +LIT_STR_ESC_EOL \\[\x0d\x0a]|\\\x0d\x0a +LIT_STR_EOL [\x0d\x0a]|\x0d\x0a +LIT_STR_BODY [^\\\(\)]{1,64} + +/* 7.3.4.3 Hexadecimal Strings */ +HEX_STR_BODY [0-9A-Fa-f]{1,64} +HEX_STR_SKIP [^0-9A-Fa-f>]{1,64} + +/* 7.3.5 Name Objects */ +OBJ_NAME \/{GRP_REGULAR}{1,256} + +/* 7.3.6 Array Objects */ +OBJ_ARRAY_OPEN "[" +OBJ_ARRAY_CLOSE "]" + +/* 7.3.7 Dictionary Objects */ +OBJ_DICT_OPEN "<<" +OBJ_DICT_CLOSE ">>" + +/* FIXIT: improve bytes consuming */ +OBJ_DICT_SKIP . + +/* 7.3.8 Stream Objects */ +OBJ_STREAM_OPEN stream$ +OBJ_STREAM_CLOSE ^endstream + +/* 7.3.9 Null Object */ +OBJ_NULL null + +/* 7.3.10 Indirect Objects */ +INDIRECT_OBJ {OBJ_INT_NUM}{GRP_WHITESPACE}+{OBJ_INT_NUM}{GRP_WHITESPACE}+obj +RECORD_OBJ {OBJ_INT_NUM}{GRP_WHITESPACE}+{OBJ_INT_NUM}{GRP_WHITESPACE}+R + +/* Not dictionary, not strings */ +SKIP [^<\(%]{1,64} +WHITESPACE {GRP_WHITESPACE}{1,64} + +/* Start conditions: INITIAL or inside dictionary, literal string, hexadecimal string */ +%x dictnr +%x litstr +%x hexstr +%x jslstr +%x jshstr + +%% + +{SKIP} { } +{COMMENT} { } + +{OBJ_DICT_OPEN} { PUSH(dictnr); EXEC(h_dict_open()) } +{OBJ_DICT_CLOSE} { POP(); EXEC(h_dict_close()) } +{COMMENT} { } +{WHITESPACE} { } +{RECORD_OBJ} { EXEC(h_dict_other()) } +{OBJ_BOOLEAN} { EXEC(h_dict_other()) } +{OBJ_INT_NUM} { EXEC(h_dict_other()) } +{OBJ_REL_NUM} { EXEC(h_dict_other()) } +{OBJ_NULL} { EXEC(h_dict_other()) } +{OBJ_NAME} { EXEC(h_dict_name()) } +{OBJ_ARRAY_OPEN} { ++obj_array.nesting_level; EXEC(h_dict_other()) } +{OBJ_ARRAY_CLOSE} { --obj_array.nesting_level; EXEC(h_dict_other()) } +{OBJ_LIT_STR_OPEN} { EXEC(h_dict_other()) if (h_lit_str()) PUSH(jslstr); else PUSH(litstr); yyless(0); } +{OBJ_HEX_STR_OPEN} { EXEC(h_dict_other()) if (h_hex_str()) PUSH(jshstr); else PUSH(hexstr); yyless(0); } +{OBJ_DICT_SKIP} { } + +{OBJ_LIT_STR_OPEN} { if (h_lit_open()) PUSH(litstr); } +{OBJ_LIT_STR_OPEN} { h_lit_open(); } +{OBJ_LIT_STR_CLOSE} { if (h_lit_close()) POP(); } +{LIT_STR_ESC} { } +{LIT_STR_ESC_OCT} { } +{LIT_STR_ESC_EOL} { } +{LIT_STR_EOL} { } +{LIT_STR_BODY} { } + +{OBJ_HEX_STR_OPEN} { PUSH(hexstr); } +{OBJ_HEX_STR_CLOSE} { POP(); } +{HEX_STR_BODY} { } +{HEX_STR_SKIP} { } + +{OBJ_LIT_STR_OPEN} { if (!h_lit_open()) ECHO; } +{OBJ_LIT_STR_CLOSE} { if (h_lit_close()) POP(); else ECHO; } +{LIT_STR_ESC} { EXEC(h_lit_unescape()) } +{LIT_STR_ESC_OCT} { EXEC(h_lit_oct2chr()) } +{LIT_STR_ESC_EOL}{WHITESPACE} { } +{LIT_STR_EOL} { ECHO; } +{LIT_STR_BODY} { ECHO; } + +{OBJ_HEX_STR_OPEN} { } +{OBJ_HEX_STR_CLOSE} { POP(); } +{HEX_STR_BODY} { EXEC(h_hex_hex2chr()) } +{HEX_STR_SKIP} { } + +<> { return PDFRet::EOS; } + +%% + +PDFTokenizer::PDFRet PDFTokenizer::h_dict_open() +{ + obj_dictionary.clear(); + obj_dictionary.array_level = obj_array.nesting_level; + + debug_logf(6, js_trace, TRACE_PDF_PROC, nullptr, + "dictionary open, at array level %d\n", obj_array.nesting_level); + + return PDFRet::EOS; +} + +PDFTokenizer::PDFRet PDFTokenizer::h_dict_close() +{ + obj_dictionary.clear(); + + debug_logf(6, js_trace, TRACE_PDF_PROC, nullptr, + "dictionary close, at array level %d\n", obj_array.nesting_level); + + if (obj_dictionary.array_level != obj_array.nesting_level) + return PDFRet::INCOMPLETE_ARRAY_IN_DICTIONARY; + + return PDFRet::EOS; +} + +PDFTokenizer::PDFRet PDFTokenizer::h_dict_other() +{ + if (obj_dictionary.array_level != obj_array.nesting_level) + return PDFRet::EOS; + + if (obj_dictionary.key_value) + return PDFRet::NOT_NAME_IN_DICTIONARY_KEY; + + debug_logf(6, js_trace, TRACE_PDF_PROC, nullptr, + "dictionary token: other\n"); + + obj_dictionary.key_value = !obj_dictionary.key_value; + + return PDFRet::EOS; +} + +PDFTokenizer::PDFRet PDFTokenizer::h_dict_name() +{ + if (obj_dictionary.array_level != obj_array.nesting_level) + return PDFRet::EOS; + + if (obj_dictionary.key_value) + strncpy(obj_entry.key, yytext, sizeof(obj_entry.key) - 1); + + obj_dictionary.key_value = !obj_dictionary.key_value; + + debug_logf(6, js_trace, TRACE_PDF_PROC, nullptr, + "dictionary token: name as %s\n", obj_dictionary.key_value ? "value" : "key"); + + debug_logf(6, js_trace, TRACE_PDF_DUMP, nullptr, + "dictionary entry: %s, %s\n", obj_entry.key, obj_dictionary.key_value ? yytext : "..."); + + return PDFRet::EOS; +} + +PDFTokenizer::PDFRet PDFTokenizer::h_lit_unescape() +{ + assert(yyleng == 2); + assert(yytext[0] == '\\'); + + char c; + + // 7.3.4.2 Literal Strings, Table 3 Escape sequences in literal strings + switch (yytext[1]) + { + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\t'; break; + case 'b': c = '\b'; break; + case 'f': c = '\f'; break; + case '(': c = '('; break; + case ')': c = ')'; break; + case '\\': c = '\\'; break; + default: c = yytext[1]; + } + + yyout << c; + + return PDFRet::EOS; +} + +PDFTokenizer::PDFRet PDFTokenizer::h_lit_oct2chr() +{ + assert(0 < yyleng and yyleng < 5); + assert(yytext[0] == '\\'); + + unsigned v; + sscanf(yytext + 1, "%o", &v); + yyout << (char)v; + + debug_logf(6, js_trace, TRACE_PDF_DUMP, nullptr, + "literal string, %s to %c \n", yytext, v); + + return PDFRet::EOS; +} + +PDFTokenizer::PDFRet PDFTokenizer::h_hex_hex2chr() +{ + int len = yyleng & ~1; + const char* ptr = yytext; + const char* end = yytext + len; + + while (ptr < end) + { + unsigned v; + sscanf(ptr, "%02x", &v); + yyout << (char)v; + ptr += 2; + } + + if (len != yyleng) + { + unsigned v; + sscanf(ptr, "%01x", &v); + yyout << (char)(v << 4); + } + + debug_logf(6, js_trace, TRACE_PDF_DUMP, nullptr, + "literal string, in hex: %s\n", yytext); + + return PDFRet::EOS; +} + +PDFTokenizer::PDFTokenizer(std::istream& in, std::ostream& out) + : yyFlexLexer(in, out) +{ +} + +PDFTokenizer::~PDFTokenizer() +{ +} + +PDFTokenizer::PDFRet PDFTokenizer::process() +{ + auto r = yylex(); + + return (PDFRet)r; +} diff --git a/src/js_norm/test/CMakeLists.txt b/src/js_norm/test/CMakeLists.txt index 4dce8615b..260c15a9c 100644 --- a/src/js_norm/test/CMakeLists.txt +++ b/src/js_norm/test/CMakeLists.txt @@ -3,6 +3,11 @@ FLEX ( js_tokenizer ${CMAKE_CURRENT_BINARY_DIR}/../js_tokenizer.cc ) +FLEX ( pdf_tokenizer + ${CMAKE_CURRENT_SOURCE_DIR}/../pdf_tokenizer.l + ${CMAKE_CURRENT_BINARY_DIR}/../pdf_tokenizer.cc +) + add_catch_test( js_normalizer_test SOURCES ${js_tokenizer_OUTPUTS} @@ -68,3 +73,9 @@ add_catch_test( jsn_test ${CMAKE_SOURCE_DIR}/src/utils/streambuf.cc js_test_stubs.cc ) + +add_catch_test( pdf_tokenizer_test + SOURCES + ${pdf_tokenizer_OUTPUTS} + js_test_stubs.cc +) diff --git a/src/js_norm/test/js_test_options.cc b/src/js_norm/test/js_test_options.cc index cd5c9dffa..5910ffeed 100644 --- a/src/js_norm/test/js_test_options.cc +++ b/src/js_norm/test/js_test_options.cc @@ -23,6 +23,8 @@ #include "js_test_options.h" +#include + Config::Config(const Config& other) : type(other.type) { switch (other.type) diff --git a/src/js_norm/test/js_test_stubs.cc b/src/js_norm/test/js_test_stubs.cc index 645598b11..58fb88a47 100644 --- a/src/js_norm/test/js_test_stubs.cc +++ b/src/js_norm/test/js_test_stubs.cc @@ -37,4 +37,5 @@ uint8_t TraceApi::get_constraints_generation() { return 0; } void TraceApi::filter(const Packet&) { } int DetectionEngine::queue_event(unsigned int, unsigned int) { return 0; } +Packet* DetectionEngine::get_current_packet() { return nullptr; } } diff --git a/src/js_norm/test/pdf_tokenizer_test.cc b/src/js_norm/test/pdf_tokenizer_test.cc new file mode 100644 index 000000000..b2986538b --- /dev/null +++ b/src/js_norm/test/pdf_tokenizer_test.cc @@ -0,0 +1,399 @@ +//-------------------------------------------------------------------------- +// Copyright (C) 2022-2022 Cisco and/or its affiliates. All rights reserved. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License Version 2 as published +// by the Free Software Foundation. You may not use, modify or distribute +// this program under any other version of the GNU General Public License. +// +// This program is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this program; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +//-------------------------------------------------------------------------- +// pdf_tokenizer_test.cc author Cisco + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include + +#include + +#include "catch/catch.hpp" +#include "js_norm/pdf_tokenizer.h" + +using namespace jsn; +using namespace std; + +typedef pair Chunk; + +static void test_pdf_proc(const string& source, const string& expected, + PDFTokenizer::PDFRet ret = PDFTokenizer::PDFRet::EOS) +{ + istringstream in(source); + ostringstream out; + PDFTokenizer extractor(in, out); + + auto r = extractor.process(); + + CHECK(ret == r); + CHECK(expected == out.str()); +} + +static void test_pdf_proc(const vector& chunks) +{ + istringstream in; + ostringstream out; + PDFTokenizer extractor(in, out); + + for (auto& chunk : chunks) + { + auto src = chunk.first; + auto exp = chunk.second; + + in.rdbuf()->pubsetbuf((char*)src.c_str(), src.length()); + out.str(""); + + auto r = extractor.process(); + + CHECK(PDFTokenizer::PDFRet::EOS == r); + CHECK(exp == out.str()); + } +} + +TEST_CASE("basic", "[PDFTokenizer]") +{ + SECTION("no input") + { + test_pdf_proc( + "", + "" + ); + } + SECTION("minimal PDF") + { + test_pdf_proc( + "20 0 obj" + "<<" + "/Creator (Acrobat Pro DC 22.1.20169)" + "/ModDate (D:20220714154535+03'00')" + "/CreationDate (D:20220714153909+03'00')" + "/Producer (Acrobat Pro DC 22.1.20169)" + ">>" + "endobj", + "" + ); + } + SECTION("direct object") + { + test_pdf_proc( + "<<" + "/S /JavaScript" + ">>", + "" + ); + } + SECTION("indirect object") + { + test_pdf_proc( + "19 0 obj" + "<<" + "/S /JavaScript" + ">>" + "endobj", + "" + ); + } + SECTION("records") + { + test_pdf_proc( + "1 0 R" + "<<" + "/T 2 0 R" + ">>", + "" + ); + } + SECTION("sub array") + { + test_pdf_proc( + "<<" + "/K [ /name1 /name2 /name3 ]" + ">>", + "" + ); + } + SECTION("sub dictionary") + { + test_pdf_proc( + "<<" + "/K << /k1 /v1 /k2 /v2 >> " + ">>", + "" + ); + } + SECTION("more items") + { + test_pdf_proc( + "<00>" + "<< >>" + "<<" + "/K << /k1 /v1 /k2 [ /i1 /i2 /i3 /i4 ] /k3 /v3 /k4 <000102> /k5 (abc) >>" + ">>" + "[" + "<> <>> <>>>>" + "]", + "" + ); + } + SECTION("comments") + { + test_pdf_proc( + "% comment 1\n" + "<>" + "<>\n" + "(% not a comment)\n" + "% comment 2\n" + "<>", + "a % b; script 2" + ); + } + SECTION("escapes in string") + { + test_pdf_proc( + "(() \\n\\r\\t\\b\\f\\(\\)\\\\ \\123 \\A\\B\\C \\x\\y\\z)", + "" + ); + } + SECTION("hex string") + { + test_pdf_proc( + "<000102030405>", + "" + ); + } + SECTION("key after literal string") + { + test_pdf_proc( + "<<" + "/Lang (EN-GB)" + "/K [12 0 R]" + ">>", + "" + ); + } + SECTION("key after hex string") + { + test_pdf_proc( + "<<" + "/Lang <62617a>" + "/K [12 0 R]" + ">>", + "" + ); + } + SECTION("number values") + { + test_pdf_proc( + "<<" + "/N 10" + "/N 1.0" + "/N 1." + "/N .1" + "/N 1" + ">>", + "" + ); + } + SECTION("not name for key") + { + test_pdf_proc( + "<<" + "/K1 /V1" + "[/K2] /V2" + "/K3 /V3" + ">>", + "", PDFTokenizer::PDFRet::NOT_NAME_IN_DICTIONARY_KEY + ); + } + SECTION("literal string as a key") + { + test_pdf_proc( + "<<" + "/K1 /V1" + "(foo) /V2" + "/K3 /V3" + ">>", + "", PDFTokenizer::PDFRet::NOT_NAME_IN_DICTIONARY_KEY + ); + } + SECTION("hex string as a key") + { + test_pdf_proc( + "<<" + "/K1 /V1" + "<62617a> /V2" + "/K3 /V3" + ">>", + "", PDFTokenizer::PDFRet::NOT_NAME_IN_DICTIONARY_KEY + ); + } + SECTION("incomplete array") + { + test_pdf_proc( + "<<" + "/K1 [ /V1 /V2 /V3 " + ">>", + "", PDFTokenizer::PDFRet::INCOMPLETE_ARRAY_IN_DICTIONARY + ); + } +} + +TEST_CASE("JS location", "[PDFTokenizer]") +{ + SECTION("wrong type") + { + test_pdf_proc( + "<>", + "" + ); + } + SECTION("no sub-type") + { + test_pdf_proc( + "<< /JS (script) >>", + "script" + ); + } + SECTION("no sub-type checks") + { + test_pdf_proc( + "<< /JS (script) /S /JavaScript >>", + "script" + ); + } + SECTION("no spaces") + { + test_pdf_proc( + "<>", + "script" + ); + } + SECTION("as hex string") + { + test_pdf_proc( + "<< /JS <62617a> >>", + "baz" + ); + test_pdf_proc( + "<< /JS <70> >>", + "p" + ); + test_pdf_proc( + "<< /JS <7> >>", + "p" + ); + } + SECTION("prepended with records") + { + test_pdf_proc( + "<>", + "script" + ); + } +} + +TEST_CASE("JS processing", "[PDFTokenizer]") +{ + SECTION("simple text") + { + test_pdf_proc( + "<>", + "var _abc1 = 'Hello World!';" + ); + } + SECTION("balanced parenthesis") + { + test_pdf_proc( + "<>", + "function foo() { console.log(\"Hello world!\") }" + ); + } + SECTION("with escapes") + { + test_pdf_proc( + "<>", + "function bar(var x)\r{\r console.log(\"baz\")\r}" + ); + } + SECTION("all escapes") + { + test_pdf_proc( + "<>", + "() \n\r\t\b\f()\\ \123 ABC xyz" + ); + } + SECTION("escaped new line") + { + test_pdf_proc( + "<>", + "var str = 'Hello, world!';" + ); + } +} + +TEST_CASE("split", "[PDFTokenizer]") +{ + SECTION("no input") + { + test_pdf_proc({ + {"", ""}, + {"", ""}, + {"", ""} + }); + } + SECTION("minimal PDF") + { + test_pdf_proc({ + {"20 0 obj", ""}, + {"<<", ""}, + {"/Creator (Acrobat Pro DC 22.1.20169)", ""}, + {"/ModDate (D:20220714154535+03'00')", ""}, + {"/CreationDate (D:20220714153909+03'00')", ""}, + {"/Producer (Acrobat Pro DC 22.1.20169)", ""}, + {">>", ""}, + {"endobj", ""} + }); + } + SECTION("script") + { + test_pdf_proc({ + {"% comment", ""}, + {"\n", ""}, + {"<>\n", "a % b"}, + {"(% not a", ""}, + {"comment)\n", ""}, + {"<>", ""}, + {"<>", "script 3"} + }); + } +} diff --git a/src/service_inspectors/http_inspect/dev_notes_js_norm.txt b/src/service_inspectors/http_inspect/dev_notes_js_norm.txt index 42a88673d..5eb613c17 100644 --- a/src/service_inspectors/http_inspect/dev_notes_js_norm.txt +++ b/src/service_inspectors/http_inspect/dev_notes_js_norm.txt @@ -1,9 +1,10 @@ HttpJsNorm class serves as a script Normalizer, and currently has two implementations: the Legacy Normalizer and the Enhanced Normalizer. -In NHI, there are two JSNorm extensions: +In NHI, there are three JSNorm extensions: * HttpInlineJSNorm, processes content of HTML script tags. * HttpExternalJSNorm, processes payload with JavaScript MIME type. + * HttpPDFJSNorm, processes payload with PDF MIME type. Normalization context is per transaction. It is created once js_data calls for normalized JS data, and is deleted once transaction ends. Partial inspections feed data incrementally to JS Normalizer, @@ -16,6 +17,9 @@ During message body analysis the Enhanced Normalizer does one of the following: 2. If it is an HTML-page, Normalizer searches for an opening tag and processes subsequent bytes in a stream mode, until it finds a closing tag. It proceeds and scans the entire message body for inline scripts. +3. If it is PDF file transferred as MIME attachment or as a message body then + Normalizer extracts strings marked with '/JS' keyword and normalizes their + content as JS text. Also, js_data IPS option's buffer is a part of NHI processing in order to start the normalization. diff --git a/src/service_inspectors/http_inspect/http_enum.h b/src/service_inspectors/http_inspect/http_enum.h index 9b2032dc5..8615bb5c8 100755 --- a/src/service_inspectors/http_inspect/http_enum.h +++ b/src/service_inspectors/http_inspect/http_enum.h @@ -63,7 +63,7 @@ enum PEG_COUNT { PEG_FLOW = 0, PEG_SCAN, PEG_REASSEMBLE, PEG_INSPECT, PEG_REQUES PEG_CONCURRENT_SESSIONS, PEG_MAX_CONCURRENT_SESSIONS, PEG_SCRIPT_DETECTION, PEG_PARTIAL_INSPECT, PEG_EXCESS_PARAMS, PEG_PARAMS, PEG_CUTOVERS, PEG_SSL_SEARCH_ABND_EARLY, PEG_PIPELINED_FLOWS, PEG_PIPELINED_REQUESTS, PEG_TOTAL_BYTES, PEG_JS_INLINE, PEG_JS_EXTERNAL, - PEG_SKIP_MIME_ATTACH, PEG_COUNT_MAX }; + PEG_JS_PDF, PEG_SKIP_MIME_ATTACH, PEG_COUNT_MAX }; // Result of scanning by splitter enum ScanResult { SCAN_NOT_FOUND, SCAN_NOT_FOUND_ACCELERATE, SCAN_FOUND, SCAN_FOUND_PIECE, @@ -121,11 +121,12 @@ enum Contentcoding { CONTENTCODE__OTHER=1, CONTENTCODE_GZIP, CONTENTCODE_DEFLATE CONTENTCODE_XPRESS, CONTENTCODE_XZ }; // Content media-types (MIME types) -enum ContentType { CT__OTHER=1, CT_APPLICATION_JAVASCRIPT, CT_APPLICATION_ECMASCRIPT, - CT_APPLICATION_X_JAVASCRIPT, CT_APPLICATION_X_ECMASCRIPT, CT_APPLICATION_XHTML_XML, - CT_TEXT_JAVASCRIPT, CT_TEXT_JAVASCRIPT_1_0, CT_TEXT_JAVASCRIPT_1_1, CT_TEXT_JAVASCRIPT_1_2, - CT_TEXT_JAVASCRIPT_1_3, CT_TEXT_JAVASCRIPT_1_4, CT_TEXT_JAVASCRIPT_1_5, CT_TEXT_ECMASCRIPT, - CT_TEXT_X_JAVASCRIPT, CT_TEXT_X_ECMASCRIPT, CT_TEXT_JSCRIPT, CT_TEXT_LIVESCRIPT, CT_TEXT_HTML }; +enum ContentType { CT__OTHER=1, CT_APPLICATION_PDF, CT_APPLICATION_OCTET_STREAM, + CT_APPLICATION_JAVASCRIPT, CT_APPLICATION_ECMASCRIPT, CT_APPLICATION_X_JAVASCRIPT, + CT_APPLICATION_X_ECMASCRIPT, CT_APPLICATION_XHTML_XML, CT_TEXT_JAVASCRIPT, + CT_TEXT_JAVASCRIPT_1_0, CT_TEXT_JAVASCRIPT_1_1, CT_TEXT_JAVASCRIPT_1_2, CT_TEXT_JAVASCRIPT_1_3, + CT_TEXT_JAVASCRIPT_1_4, CT_TEXT_JAVASCRIPT_1_5, CT_TEXT_ECMASCRIPT, CT_TEXT_X_JAVASCRIPT, + CT_TEXT_X_ECMASCRIPT, CT_TEXT_JSCRIPT, CT_TEXT_LIVESCRIPT, CT_TEXT_HTML }; // Transfer-Encoding header values enum TransferEncoding { TE__OTHER=1, TE_CHUNKED, TE_IDENTITY }; diff --git a/src/service_inspectors/http_inspect/http_flow_data.cc b/src/service_inspectors/http_inspect/http_flow_data.cc index c8c7a210d..1ca2c94d3 100644 --- a/src/service_inspectors/http_inspect/http_flow_data.cc +++ b/src/service_inspectors/http_inspect/http_flow_data.cc @@ -116,6 +116,7 @@ HttpFlowData::~HttpFlowData() if (fd_state[k] != nullptr) File_Decomp_StopFree(fd_state[k]); delete js_ctx[k]; + delete js_ctx_mime[k]; } delete_pipeline(); diff --git a/src/service_inspectors/http_inspect/http_flow_data.h b/src/service_inspectors/http_inspect/http_flow_data.h index 2ae92e715..69dcb6498 100644 --- a/src/service_inspectors/http_inspect/http_flow_data.h +++ b/src/service_inspectors/http_inspect/http_flow_data.h @@ -208,6 +208,7 @@ private: void delete_pipeline(); HttpJSNorm* js_ctx[2] = { nullptr, nullptr }; + HttpJSNorm* js_ctx_mime[2] = { nullptr, nullptr }; bool cutover_on_clear = false; bool ssl_search_abandoned = false; diff --git a/src/service_inspectors/http_inspect/http_js_norm.cc b/src/service_inspectors/http_inspect/http_js_norm.cc index 58b662148..6744a6905 100644 --- a/src/service_inspectors/http_inspect/http_js_norm.cc +++ b/src/service_inspectors/http_inspect/http_js_norm.cc @@ -381,17 +381,11 @@ bool HttpInlineJSNorm::pre_proc() ext_script_type = false; output_size = jsn_ctx->script_size(); - trace_logf(3, js_trace, TRACE_DUMP, packet, - "original[%zu]: %.*s\n", src_end - src_ptr, (int)(src_end - src_ptr), src_ptr); - return true; } bool HttpInlineJSNorm::post_proc(int ret) { - trace_logf(3, js_trace, TRACE_PROC, DetectionEngine::get_current_packet(), - "normalizer returned with %d '%s'\n", ret, jsn::ret2str(ret)); - assert(http_events); assert(infractions); @@ -431,16 +425,63 @@ bool HttpExternalJSNorm::pre_proc() "script continues\n"); } - trace_logf(3, js_trace, TRACE_DUMP, packet, - "original[%zu]: %.*s\n", src_end - src_ptr, (int)(src_end - src_ptr), src_ptr); - return true; } bool HttpExternalJSNorm::post_proc(int ret) { - trace_logf(3, js_trace, TRACE_PROC, DetectionEngine::get_current_packet(), - "normalizer returned with %d '%s'\n", ret, jsn::ret2str(ret)); + script_continue = ret == (int)jsn::JSTokenizer::SCRIPT_CONTINUE; + + return JSNorm::post_proc(ret); +} + +bool HttpPDFJSNorm::pre_proc() +{ + if (src_ptr >= src_end) + return false; + + const Packet* packet = DetectionEngine::get_current_packet(); + + if (!ext_script_type) + { + HttpModule::increment_peg_counts(PEG_JS_PDF); + trace_logf(1, js_trace, TRACE_PROC, packet, + "PDF starts\n"); + ext_script_type = true; + } + else + { + trace_logf(2, js_trace, TRACE_PROC, packet, + "PDF continues\n"); + } + + // an input stream should not write to its buffer + pdf_in.rdbuf()->pubsetbuf(const_cast((const char*)src_ptr), src_end - src_ptr); + pdf_out.clear(); + delete[] buf_pdf_out.take_data(); + + auto r = extractor.process(); + + if (r != PDFTokenizer::PDFRet::EOS) + { + trace_logf(2, js_trace, TRACE_PROC, DetectionEngine::get_current_packet(), + "pdf processing failed: %d\n", (int)r); + return false; + } + + src_ptr = (const uint8_t*)buf_pdf_out.data(); + src_end = src_ptr + buf_pdf_out.data_len(); + + // script object not found + if (!src_ptr) + return false; + + return true; +} + +bool HttpPDFJSNorm::post_proc(int ret) +{ + src_ptr = src_end; // one time per PDU, even if JS Normalizer has not finished script_continue = ret == (int)jsn::JSTokenizer::SCRIPT_CONTINUE; diff --git a/src/service_inspectors/http_inspect/http_js_norm.h b/src/service_inspectors/http_inspect/http_js_norm.h index ee61c14e0..1b750f0e1 100644 --- a/src/service_inspectors/http_inspect/http_js_norm.h +++ b/src/service_inspectors/http_inspect/http_js_norm.h @@ -22,9 +22,12 @@ #define HTTP_JS_NORM_H #include +#include #include "js_norm/js_norm.h" +#include "js_norm/pdf_tokenizer.h" #include "search_engines/search_tool.h" +#include "utils/streambuf.h" #include "http_field.h" #include "http_flow_data.h" @@ -88,5 +91,30 @@ protected: bool post_proc(int) override; }; +class HttpPDFJSNorm : public HttpJSNorm +{ +public: + static bool is_pdf(const void* data, size_t len) + { + constexpr char magic[] = "%PDF-1."; + constexpr int magic_len = sizeof(magic) - 1; + return magic_len < len and !strncmp((const char*)data, magic, magic_len); + } + + HttpPDFJSNorm(JSNormConfig* jsn_config, uint64_t tid) : + HttpJSNorm(jsn_config), pdf_out(&buf_pdf_out), extractor(pdf_in, pdf_out) + { trans_num = tid; } + +protected: + bool pre_proc() override; + bool post_proc(int) override; + +private: + snort::ostreambuf_infl buf_pdf_out; + std::istringstream pdf_in; + std::ostream pdf_out; + jsn::PDFTokenizer extractor; +}; + #endif diff --git a/src/service_inspectors/http_inspect/http_msg_body.cc b/src/service_inspectors/http_inspect/http_msg_body.cc index 6ab2b765b..1102dcc6a 100644 --- a/src/service_inspectors/http_inspect/http_msg_body.cc +++ b/src/service_inspectors/http_inspect/http_msg_body.cc @@ -504,13 +504,48 @@ HttpJSNorm* HttpMsgBody::acquire_js_ctx() js_ctx = new HttpInlineJSNorm(jsn_config, trans_num, params->js_norm_param.mpse_otag, params->js_norm_param.mpse_attr); break; + + case CT_APPLICATION_PDF: + js_ctx = new HttpPDFJSNorm(jsn_config, trans_num); + break; + + case CT_APPLICATION_OCTET_STREAM: + js_ctx = first_body and HttpPDFJSNorm::is_pdf(decompressed_file_body.start(), decompressed_file_body.length()) ? + new HttpPDFJSNorm(jsn_config, trans_num) : nullptr; + break; } session_data->js_ctx[source_id] = js_ctx; + return js_ctx; +} + +HttpJSNorm* HttpMsgBody::acquire_js_ctx_mime() +{ + HttpJSNorm* js_ctx = session_data->js_ctx_mime[source_id]; + + if (js_ctx) + { + if (js_ctx->get_trans_num() == trans_num) + return js_ctx; + + delete js_ctx; + js_ctx = nullptr; + } + JSNormConfig* jsn_config = get_inspection_policy()->jsn_config; + js_ctx = HttpPDFJSNorm::is_pdf(decompressed_file_body.start(), decompressed_file_body.length()) ? + new HttpPDFJSNorm(jsn_config, trans_num) : nullptr; + + session_data->js_ctx_mime[source_id] = js_ctx; return js_ctx; } +void HttpMsgBody::clear_js_ctx_mime() +{ + delete session_data->js_ctx_mime[source_id]; + session_data->js_ctx_mime[source_id] = nullptr; +} + void HttpMsgBody::do_file_processing(const Field& file_data) { // Using the trick that cutter is deleted when regular or chunked body is complete @@ -582,19 +617,37 @@ bool HttpMsgBody::run_detection(snort::Packet* p) return false; if ((mime_bufs != nullptr) && !mime_bufs->empty()) { + HttpJSNorm* js_ctx_tmp = nullptr; auto mb = mime_bufs->cbegin(); + uint32_t mime_bufs_size = mime_bufs->size(); + for (uint32_t count = 0; (count < params->max_mime_attach) && (mb != mime_bufs->cend()); ++count, ++mb) { + bool is_last_attachment = ((count + 1 == mime_bufs_size) || + (count + 1 == params->max_mime_attach)); const uint64_t idx = get_header(source_id)->get_multi_file_processing_id(); set_file_data(mb->file.start(), mb->file.length(), idx, count or mb->file.is_accumulated(), std::next(mb) != mime_bufs->end() or last_attachment_complete); if (mb->vba.length() > 0) ole_data.set(mb->vba.length(), mb->vba.start()); + decompressed_file_body.reset(); + decompressed_file_body.set(mb->file.length(), mb->file.start()); + + js_ctx_tmp = session_data->js_ctx[source_id]; + session_data->js_ctx[source_id] = acquire_js_ctx_mime(); + DetectionEngine::detect(p); + + if (!is_last_attachment || last_attachment_complete) + clear_js_ctx_mime(); + + session_data->js_ctx[source_id] = js_ctx_tmp; + ole_data.reset(); decompressed_vba_data.reset(); + decompressed_file_body.reset(); } if (mb != mime_bufs->cend()) { diff --git a/src/service_inspectors/http_inspect/http_msg_body.h b/src/service_inspectors/http_inspect/http_msg_body.h index 10ceb6b17..48ef219cb 100644 --- a/src/service_inspectors/http_inspect/http_msg_body.h +++ b/src/service_inspectors/http_inspect/http_msg_body.h @@ -73,6 +73,8 @@ private: void do_file_decompression(const Field& input, Field& output); void do_legacy_js_normalization(const Field& input, Field& output); HttpJSNorm* acquire_js_ctx(); + HttpJSNorm* acquire_js_ctx_mime(); + void clear_js_ctx_mime(); void clean_partial(uint32_t& partial_inspected_octets, uint32_t& partial_detect_length, uint8_t*& partial_detect_buffer, uint32_t& partial_js_detect_length); diff --git a/src/service_inspectors/http_inspect/http_tables.cc b/src/service_inspectors/http_inspect/http_tables.cc index 1db07a9e3..ab2bb07ef 100755 --- a/src/service_inspectors/http_inspect/http_tables.cc +++ b/src/service_inspectors/http_inspect/http_tables.cc @@ -168,6 +168,8 @@ const StrCode HttpMsgHeadShared::content_code_list[] = const StrCode HttpMsgHeadShared::content_type_list[] = { + { CT_APPLICATION_PDF, "application/pdf" }, + { CT_APPLICATION_OCTET_STREAM, "application/octet-stream" }, { CT_APPLICATION_JAVASCRIPT, "application/javascript" }, { CT_APPLICATION_ECMASCRIPT, "application/ecmascript" }, { CT_APPLICATION_X_JAVASCRIPT, "application/x-javascript" }, @@ -383,6 +385,7 @@ const PegInfo HttpModule::peg_names[PEG_COUNT_MAX+1] = { CountType::SUM, "total_bytes", "total HTTP data bytes inspected" }, { CountType::SUM, "js_inline_scripts", "total number of inline JavaScripts processed" }, { CountType::SUM, "js_external_scripts", "total number of external JavaScripts processed" }, + { CountType::SUM, "js_pdf_scripts", "total number of PDF JavaScripts processed" }, { CountType::SUM, "skip_mime_attach", "total number of HTTP requests with too many MIME attachments to inspect" }, { CountType::END, nullptr, nullptr } };