From: Anna Norokh -X (anorokh - SOFTSERVE INC at Cisco) <anorokh@cisco.com>
Date: Mon, 6 Jan 2025 21:33:15 +0000 (+0000)
Subject: Pull request #4555: js_norm: fix int code overflow
X-Git-Tag: 3.6.2.0~16
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=5952523edc7ee8c20ee4ddd092df5a55ac5285e0;p=thirdparty%2Fsnort3.git

Pull request #4555: js_norm: fix int code overflow

Merge in SNORT/snort3 from ~ANOROKH/snort3:js_norm_int_code_overflow to master

Squashed commit of the following:

commit fa6d223a340d7c5dae2f35327ec87d6fead07aa3
Author: dkyrylov <dkyrylov@cisco.com>
Date:   Wed Nov 27 18:50:18 2024 +0200

    js_norm: add stoi out of range exception handling
---

diff --git a/src/js_norm/js_tokenizer.l b/src/js_norm/js_tokenizer.l
index 2885996f2..d4a111010 100644
--- a/src/js_norm/js_tokenizer.l
+++ b/src/js_norm/js_tokenizer.l
@@ -34,6 +34,7 @@
 
 #include <algorithm>
 #include <cassert>
+#include <cstdlib>
 
 #include "js_norm/js_enum.h"
 #include "js_norm/js_identifier_ctx.h"
@@ -1258,6 +1259,11 @@ ALL_UNICODE    [\0-\x7F]|[\xC2-\xDF][\x80-\xBF]|(\xE0[\xA0-\xBF]|[\xE1-\xEF][\x8
 
 // static helper functions
 
+static unsigned int str2num(const std::string& s, int base)
+{
+    return std::min(strtoul(s.c_str(), nullptr, base), 0xffffffffUL);
+}
+
 static std::string unicode_to_utf8(const unsigned int code)
 {
     std::string res;
@@ -1331,7 +1337,7 @@ static std::string unescape_unicode(const char* lexeme)
             unicode_str += ch;
             if (!(--digits_left))
             {
-                const unsigned int unicode = std::stoi(unicode_str, nullptr, 16);
+                const unsigned int unicode = str2num(unicode_str, 16);
                 res += unicode_to_utf8(unicode);
 
                 unicode_str = "";
@@ -1345,7 +1351,7 @@ static std::string unescape_unicode(const char* lexeme)
         {
             if (ch == '}')
             {
-                const unsigned int code_point = std::stoi(unicode_str, nullptr, 16);
+                const unsigned int code_point = str2num(unicode_str, 16);
                 res += unicode_to_utf8(code_point);
 
                 unicode_str = "";
@@ -2982,14 +2988,14 @@ void JSTokenizer::escaped_unicode_latin_1()
 {
     // truncate escape symbol, get hex number only
     std::string code(YYText() + 2);
-    yyout << (char)std::stoi(code, nullptr, 16);
+    yyout << (char)str2num(code, 16);
 }
 
 void JSTokenizer::escaped_unicode_utf_8()
 {
     // truncate escape symbol, get hex number only
     std::string code(YYText() + 2);
-    yyout << unicode_to_utf8(std::stoi(code, nullptr, 16));
+    yyout << unicode_to_utf8(str2num(code, 16));
 }
 
 void JSTokenizer::escaped_code_point()
@@ -2997,21 +3003,21 @@ void JSTokenizer::escaped_code_point()
     // truncate escape symbols, get hex number only
     std::string code(YYText() + 3);
     code.resize(code.size() - 1);
-    yyout << unicode_to_utf8(std::stoi(code, nullptr, 16));
+    yyout << unicode_to_utf8(str2num(code, 16));
 }
 
 void JSTokenizer::escaped_url_sequence_latin_1()
 {
     // truncate escape symbol, get hex number only
     std::string code(YYText() + 1);
-    yyout << (char)std::stoi(code, nullptr, 16);
+    yyout << (char)str2num(code, 16);
 }
 
 void JSTokenizer::lit_int_code_point(int base)
 {
     std::string code(base != 10 && !isdigit(YYText()[1]) ? YYText() + 2 : YYText());
     code.erase(std::remove(code.begin(), code.end(), '_'), code.end());
-    yyout << unicode_to_utf8(std::stoi(code, nullptr, base));
+    yyout << unicode_to_utf8(str2num(code, base));
 }
 
 void JSTokenizer::char_code_no_match()
diff --git a/src/js_norm/test/js_unescape_test.cc b/src/js_norm/test/js_unescape_test.cc
index 0c0a19382..e89eb66b7 100644
--- a/src/js_norm/test/js_unescape_test.cc
+++ b/src/js_norm/test/js_unescape_test.cc
@@ -1398,6 +1398,16 @@ TEST_CASE("Internal limits", "[JSNormalizer]")
             "$/i).test(var_0001)}"
         );
     }
+
+    SECTION("Out of range value")
+    {
+        test_normalization(
+            "String.fromCodePoint(0xFFFFFFFFFFFF);"
+            "String.fromCharCode(0xFFFFFFFFFFFF);",
+            "'\xf7\xbf\xbf\xbf';"
+            "'\xf7\xbf\xbf\xbf';"
+        );
+    }
 }
 
 TEST_CASE("Function type detection", "[JSNormalizer]")