]> git.ipfire.org Git - thirdparty/snort3.git/commitdiff
Pull request #3428: JS Norm: Check Content-Type
authorMike Stepanek (mstepane) <mstepane@cisco.com>
Tue, 24 May 2022 14:30:17 +0000 (14:30 +0000)
committerMike Stepanek (mstepane) <mstepane@cisco.com>
Tue, 24 May 2022 14:30:17 +0000 (14:30 +0000)
Merge in SNORT/snort3 from ~OSERHIIE/snort3:js_content_type to master

Squashed commit of the following:

commit 457cf486d8846108cb3cda7ea9bf99aaae4c5985
Author: Oleksandr Serhiienko <oserhiie@cisco.com>
Date:   Tue May 17 22:59:38 2022 +0300

    http_inspect: implement general approach of checking Content-Type header

        Adding a general approach of checking Content-Type header values.
        Comparison uses normalized header value and returns appropriate
        code value if matched. The headers comparison is strict and precise.
        Additional header parameters, like charset, are ignored. Comparison
        happens against MIME type/subtupe only.

commit 79fae25f1bf59d6bcf34f2f6b92a2b8666ee830d
Author: Oleksandr Serhiienko <oserhiie@cisco.com>
Date:   Tue May 17 13:29:09 2022 +0300

    http_inspect: add Content-Type header validation for Enhanced JS Normalizer

        Avoid lookup for Inline JavaScript if media-type is not of HTML type.

        Accepted media-types follows:
            * application/xhtml+xml
            * text/html

        If Content-Type header is not specified, default media-type will be
        application/octet-stream which is not allowed. The normalization
        will be skipped.

src/service_inspectors/http_inspect/http_enum.h
src/service_inspectors/http_inspect/http_msg_body.cc
src/service_inspectors/http_inspect/http_msg_head_shared.cc
src/service_inspectors/http_inspect/http_msg_head_shared.h
src/service_inspectors/http_inspect/http_tables.cc

index bbbcc320cf09d218b28984785f41069a1f39d3d6..cc82f33dd5a69a0d1e930d5f67bc1e244998a324 100755 (executable)
@@ -128,6 +128,13 @@ enum Contentcoding { CONTENTCODE__OTHER=1, CONTENTCODE_GZIP, CONTENTCODE_DEFLATE
     CONTENTCODE_BZIP2, CONTENTCODE_LZMA, CONTENTCODE_PEERDIST, CONTENTCODE_SDCH,
     CONTENTCODE_XPRESS, CONTENTCODE_XZ };
 
+// Content media-types (MIME types)
+enum ContentType { CT__OTHER=1, CT_APPLICATION_JAVASCRIPT, CT_APPLICATION_ECMASCRIPT,
+    CT_APPLICATION_X_JAVASCRIPT, CT_APPLICATION_X_ECMASCRIPT, CT_APPLICATION_XHTML_XML,
+    CT_TEXT_JAVASCRIPT, CT_TEXT_JAVASCRIPT_1_0, CT_TEXT_JAVASCRIPT_1_1, CT_TEXT_JAVASCRIPT_1_2,
+    CT_TEXT_JAVASCRIPT_1_3, CT_TEXT_JAVASCRIPT_1_4, CT_TEXT_JAVASCRIPT_1_5, CT_TEXT_ECMASCRIPT,
+    CT_TEXT_X_JAVASCRIPT, CT_TEXT_X_ECMASCRIPT, CT_TEXT_JSCRIPT, CT_TEXT_LIVESCRIPT, CT_TEXT_HTML };
+
 // Transfer-Encoding header values
 enum TransferEncoding { TE__OTHER=1, TE_CHUNKED, TE_IDENTITY };
 
index b3fb14de148f0f513be54dcc768eb582b85fd20a..5e122acb227a27cc96b018bb8e6a152b8ca4f32f 100644 (file)
@@ -416,10 +416,35 @@ void HttpMsgBody::do_enhanced_js_normalization(const Field& input, Field& output
         return;
     }
 
-    if (http_header and http_header->is_external_js())
+    if (!http_header)
+        return;
+
+    switch(http_header->get_content_type())
+    {
+    case CT_APPLICATION_JAVASCRIPT:
+    case CT_APPLICATION_ECMASCRIPT:
+    case CT_APPLICATION_X_JAVASCRIPT:
+    case CT_APPLICATION_X_ECMASCRIPT:
+    case CT_TEXT_JAVASCRIPT:
+    case CT_TEXT_JAVASCRIPT_1_0:
+    case CT_TEXT_JAVASCRIPT_1_1:
+    case CT_TEXT_JAVASCRIPT_1_2:
+    case CT_TEXT_JAVASCRIPT_1_3:
+    case CT_TEXT_JAVASCRIPT_1_4:
+    case CT_TEXT_JAVASCRIPT_1_5:
+    case CT_TEXT_ECMASCRIPT:
+    case CT_TEXT_X_JAVASCRIPT:
+    case CT_TEXT_X_ECMASCRIPT:
+    case CT_TEXT_JSCRIPT:
+    case CT_TEXT_LIVESCRIPT:
         normalizer->do_external(input, output, infractions, session_data, back);
-    else
+        break;
+
+    case CT_APPLICATION_XHTML_XML:
+    case CT_TEXT_HTML:
         normalizer->do_inline(input, output, infractions, session_data, back);
+        break;
+    }
 }
 
 void HttpMsgBody::do_legacy_js_normalization(const Field& input, Field& output)
index dacb2733405dffe4a8193784eadd30dada9163fc..15ad80d13cbb773107fc2b07d1cc3c77ebb9fb3a 100755 (executable)
@@ -55,56 +55,26 @@ HttpMsgHeadShared::~HttpMsgHeadShared()
     }
 }
 
-bool HttpMsgHeadShared::is_external_js()
+int32_t HttpMsgHeadShared::get_content_type()
 {
-    if (js_external != STAT_NOT_COMPUTE)
-        return js_external;
+    if (content_type != STAT_NOT_COMPUTE)
+        return content_type;
 
-    const Field& content_type = get_header_value_raw(HEAD_CONTENT_TYPE);
-    const char* cur = (const char*)content_type.start();
-    int len = content_type.length();
-    if (SnortStrcasestr(cur, len, "application/"))
-    {
-        if (SnortStrcasestr(cur, len, "javascript"))
-        {
-            js_external = 1;
-            return true;
-        }
+    const Field& content_type_hdr = get_header_value_norm(HEAD_CONTENT_TYPE);
+    const uint8_t* start = content_type_hdr.start();
+    int32_t len = content_type_hdr.length();
 
-        if (SnortStrcasestr(cur, len, "ecmascript"))
-        {
-            js_external = 1;
-            return true;
-        }
-    }
-    else if (SnortStrcasestr(cur, len, "text/"))
+    if (len <= 0)
     {
-        if (SnortStrcasestr(cur, len, "javascript"))
-        {
-            js_external = 1;
-            return true;
-        }
-
-        if (SnortStrcasestr(cur, len, "ecmascript"))
-        {
-            js_external = 1;
-            return true;
-        }
+        content_type = CT__OTHER;
+        return content_type;
+    }
 
-        if (SnortStrcasestr(cur, len, "jscript"))
-        {
-            js_external = 1;
-            return true;
-        }
+    if (const uint8_t* semicolon = (const uint8_t*)memchr(start, ';', len))
+        len = semicolon - start;
 
-        if (SnortStrcasestr(cur, len, "livescript"))
-        {
-            js_external = 1;
-            return true;
-        }
-    }
-    js_external = 0;
-    return js_external;
+    content_type = str_to_code(start, len, content_type_list);
+    return content_type;
 }
 
 // All the header processing that is done for every message (i.e. not just-in-time) is done here.
index 5acdd00b4794fd91324535379fb0210163d14e34..3fbb64dcaf78f5a24e747b73cb9650ac8158bd5f 100755 (executable)
@@ -49,6 +49,7 @@ public:
     // Tables of header field names and header value names
     static const StrCode header_list[];
     static const StrCode content_code_list[];
+    static const StrCode content_type_list[];
     static const StrCode charset_code_list[];
     static const StrCode charset_code_opt_list[];
     static const StrCode transfer_encoding_list[];
@@ -58,8 +59,8 @@ public:
     // verdicts.
     uint64_t get_file_cache_index();
     const Field& get_content_disposition_filename();
-    bool is_external_js();
     int32_t get_num_headers() const { return num_headers; }
+    int32_t get_content_type();
 
     static const int MAX_HEADERS = 200;  // I'm an arbitrary number. FIXIT-RC
 protected:
@@ -109,7 +110,7 @@ private:
     bool file_cache_index_computed = false;
 
     bool own_msg_buffer;
-    int js_external = HttpCommon::STAT_NOT_COMPUTE;
+    int32_t content_type = HttpCommon::STAT_NOT_COMPUTE;
 };
 
 #endif
index f6e9018f34746b76c5b2cbf341f2c6abcedb4e67..492c97242926ae1f569b331d7a49ed84dfe80dec 100755 (executable)
@@ -166,6 +166,29 @@ const StrCode HttpMsgHeadShared::content_code_list[] =
     { 0,                         nullptr }
 };
 
+const StrCode HttpMsgHeadShared::content_type_list[] =
+{
+    { CT_APPLICATION_JAVASCRIPT,   "application/javascript" },
+    { CT_APPLICATION_ECMASCRIPT,   "application/ecmascript" },
+    { CT_APPLICATION_X_JAVASCRIPT, "application/x-javascript" },
+    { CT_APPLICATION_X_ECMASCRIPT, "application/x-ecmascript" },
+    { CT_APPLICATION_XHTML_XML,    "application/xhtml+xml" },
+    { CT_TEXT_JAVASCRIPT,          "text/javascript" },
+    { CT_TEXT_JAVASCRIPT_1_0,      "text/javascript1.0" },
+    { CT_TEXT_JAVASCRIPT_1_1,      "text/javascript1.1" },
+    { CT_TEXT_JAVASCRIPT_1_2,      "text/javascript1.2" },
+    { CT_TEXT_JAVASCRIPT_1_3,      "text/javascript1.3" },
+    { CT_TEXT_JAVASCRIPT_1_4,      "text/javascript1.4" },
+    { CT_TEXT_JAVASCRIPT_1_5,      "text/javascript1.5" },
+    { CT_TEXT_ECMASCRIPT,          "text/ecmascript" },
+    { CT_TEXT_X_JAVASCRIPT,        "text/x-javascript" },
+    { CT_TEXT_X_ECMASCRIPT,        "text/x-ecmascript" },
+    { CT_TEXT_JSCRIPT,             "text/jscript" },
+    { CT_TEXT_LIVESCRIPT,          "text/livescript" },
+    { CT_TEXT_HTML,                "text/html" },
+    { 0,                           nullptr }
+};
+
 const StrCode HttpMsgHeadShared::charset_code_list[] =
 {
     { CHARSET_DEFAULT,       "charset=utf-8" },