* Also including new-line and carriage-return line-break characters
All JavaScript identifier names will be substituted to unified names with the
-following format: a0 -> z9999. So, the number of unique identifiers available
-is 260000 names per HTTP transaction. If Normalizer overruns the configured
-limit, built-in alert generated. Additionaly, there is a config option to
+following format: var_0000 -> var_ffff. So, the number of unique identifiers available
+is 65536 names per HTTP transaction. If Normalizer overruns the configured
+limit, built-in alert is generated. Additionally, there is a config option to
specify the limit manually:
* http_inspect.js_norm_identifier_depth.
"number of input JavaScript bytes to normalize (-1 unlimited) "
"(experimental)" },
- // range of accepted identifier names is (a0:z9999), so the max is 26 * 10000 = 260000
- { "js_norm_identifier_depth", Parameter::PT_INT, "0:260000", "260000",
+ // range of accepted identifier names is (var_0000:var_ffff), so the max is 2^16
+ { "js_norm_identifier_depth", Parameter::PT_INT, "0:65536", "65536",
"max number of unique JavaScript identifiers to normalize" },
{ "js_norm_max_tmpl_nest", Parameter::PT_INT, "0:255", "32",
From performance perspective, ostreambuf_infl can reserve an amount of memory
before actual operations. Also, memory extending is done by predefined
-portions of 2^8^, 2^9^, 2^10^, 2^13^, 2^16^, 2^16^, 2^16^...
+portions of 2^11^, 2^12^, 2^13^, 2^14^, 2^15^, 2^15^, 2^15^...
This tries to minimize the number of memory reallocation.
};
#endif // CATCH_TEST_BUILD
-#define FIRST_NAME_SIZE 26
-#define LAST_NAME_SIZE 9999
+#define MAX_LAST_NAME 65535
+#define HEX_DIGIT_MASK 15
-static const char s_ident_first_names[FIRST_NAME_SIZE] =
+static const char hex_digits[] =
{
- 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
- 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'
+ '0', '1','2','3', '4', '5', '6', '7', '8','9', 'a', 'b', 'c', 'd', 'e', 'f'
};
+static inline std::string format_name(int32_t num)
+{
+ std::string name("var_");
+ name.reserve(8);
+ name.push_back(hex_digits[(num >> 12) & HEX_DIGIT_MASK]);
+ name.push_back(hex_digits[(num >> 8) & HEX_DIGIT_MASK]);
+ name.push_back(hex_digits[(num >> 4) & HEX_DIGIT_MASK]);
+ name.push_back(hex_digits[num & HEX_DIGIT_MASK]);
+
+ return name;
+}
+
const char* JSIdentifierCtx::substitute(const char* identifier)
{
const auto it = ident_names.find(identifier);
if (it != ident_names.end())
return it->second.c_str();
- if (++ident_last_name > LAST_NAME_SIZE)
- {
- if (++ident_first_name > FIRST_NAME_SIZE - 1)
- return nullptr;
-
- ident_last_name = 0;
- }
-
- if (++unique_ident_cnt > depth)
+ if (ident_last_name >= depth || ident_last_name > MAX_LAST_NAME)
return nullptr;
- ident_names[identifier] = s_ident_first_names[ident_first_name]
- + std::to_string(ident_last_name);
-
+ ident_names[identifier] = format_name(ident_last_name++);
HttpModule::increment_peg_counts(HttpEnums::PEG_JS_IDENTIFIER);
return ident_names[identifier].c_str();
}
void JSIdentifierCtx::reset()
{
- ident_first_name = 0;
- ident_last_name = -1;
- unique_ident_cnt = 0;
+ ident_last_name = 0;
ident_names.clear();
}
{ return (sizeof(JSIdentifierCtx) + (sizeof(std::string) * 2 * 500)); }
private:
- int ident_first_name = 0;
- int ident_last_name = -1;
- int32_t unique_ident_cnt = 0;
+ int32_t ident_last_name = 0;
int32_t depth;
std::unordered_map<std::string, std::string> ident_names;
#include "js_normalizer.h"
+#define BUFF_EXP_FACTOR 1.3
+
using namespace snort;
using namespace std;
in_buf.pubsetbuf(nullptr, 0)
->pubsetbuf(tmp_buf, tmp_buf_size)
->pubsetbuf(const_cast<char*>(src), len);
- out_buf.reserve(src_len);
+ out_buf.reserve(src_len * BUFF_EXP_FACTOR);
JSTokenizer::JSRet ret = static_cast<JSTokenizer::JSRet>(tokenizer.yylex());
in.clear();
const ostreambuf_infl::State ostreambuf_infl::states[] =
{
- {states + 1, 1 << 8},
- {states + 2, 1 << 9},
- {states + 3, 1 << 10},
- {states + 4, 1 << 13},
- {states + 4, 1 << 16}
+ {states + 1, 1 << 11},
+ {states + 2, 1 << 12},
+ {states + 3, 1 << 13},
+ {states + 4, 1 << 14},
+ {states + 4, 1 << 15}
};
ostreambuf_infl::ostreambuf_infl() :
auto c_avail = epptr() - pptr();
if (n > c_avail)
- enlarge(n - c_avail);
+ gen.n > (n - c_avail) ? enlarge() : enlarge(n - c_avail);
auto n_avail = epptr() - pptr();
n = min(n, n_avail);
#include <cstring>
#include <vector>
+#include <iomanip>
#include "utils/js_identifier_ctx.h"
-#define DEPTH 260000
-
-#define FIRST_NAME_SIZE 26
-#define LAST_NAME_SIZE 9999
-
-static const char s_ident_first_names[FIRST_NAME_SIZE] =
-{
- 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
- 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'
-};
+#define DEPTH 65536
TEST_CASE("JSIdentifierCtx::substitute()", "[JSIdentifierCtx]")
{
{
JSIdentifierCtx ident_ctx(DEPTH);
- CHECK(!strcmp(ident_ctx.substitute("a"), "a0"));
- CHECK(!strcmp(ident_ctx.substitute("a"), "a0"));
+ CHECK(!strcmp(ident_ctx.substitute("a"), "var_0000"));
+ CHECK(!strcmp(ident_ctx.substitute("a"), "var_0000"));
}
SECTION("different names")
{
JSIdentifierCtx ident_ctx(DEPTH);
- CHECK(!strcmp(ident_ctx.substitute("a"), "a0"));
- CHECK(!strcmp(ident_ctx.substitute("b"), "a1"));
- CHECK(!strcmp(ident_ctx.substitute("a"), "a0"));
+ CHECK(!strcmp(ident_ctx.substitute("a"), "var_0000"));
+ CHECK(!strcmp(ident_ctx.substitute("b"), "var_0001"));
+ CHECK(!strcmp(ident_ctx.substitute("a"), "var_0000"));
}
SECTION("depth reached")
{
JSIdentifierCtx ident_ctx(2);
- CHECK(!strcmp(ident_ctx.substitute("a"), "a0"));
- CHECK(!strcmp(ident_ctx.substitute("b"), "a1"));
+ CHECK(!strcmp(ident_ctx.substitute("a"), "var_0000"));
+ CHECK(!strcmp(ident_ctx.substitute("b"), "var_0001"));
CHECK(ident_ctx.substitute("c") == nullptr);
CHECK(ident_ctx.substitute("d") == nullptr);
- CHECK(!strcmp(ident_ctx.substitute("a"), "a0"));
+ CHECK(!strcmp(ident_ctx.substitute("a"), "var_0000"));
}
SECTION("max names")
{
for (int it = 0; it < DEPTH + 2; ++it)
n.push_back("n" + std::to_string(it));
- for (int it_first = 0; it_first < FIRST_NAME_SIZE; ++it_first)
+ for (int it_name = 0; it_name < DEPTH; ++it_name)
{
- for (int it_last = 0; it_last <= LAST_NAME_SIZE; ++it_last)
- e.push_back(s_ident_first_names[it_first] + std::to_string(it_last));
+ std::stringstream stream;
+ stream << std::setfill ('0') << std::setw(4)
+ << std::hex << it_name;
+ e.push_back("var_" + stream.str());
}
for (int it = 0; it < DEPTH; ++it)
CHECK(c == 'A');
CHECK(off_b == 0);
CHECK(off_c == 1);
- CHECK(off_e == 256);
+ CHECK(off_e == 2048);
- EXP_RES(b, exp, 1, 256);
+ EXP_RES(b, exp, 1, 2048);
}
SECTION("put two chars")
CHECK(off_b == 0);
CHECK(off_1 == 1);
CHECK(off_2 == 2);
- CHECK(off_e == 256);
+ CHECK(off_e == 2048);
- EXP_RES(b, exp, 2, 256);
+ EXP_RES(b, exp, 2, 2048);
}
SECTION("extend buffer")
CHECK(c2 == 'Z');
CHECK(off_b == 0);
CHECK(off_1 == 1);
- CHECK(off_2 == 257);
- CHECK(off_e == 256);
- CHECK(off_z == 768);
+ CHECK(off_2 == 2049);
+ CHECK(off_e == 2048);
+ CHECK(off_z == 6144);
}
SECTION("put sequence of chars")
CHECK(c2 == 'Z');
CHECK(off_b == 0);
CHECK(off_c == len + 2);
- CHECK(off_e == 4096 + 512);
+ CHECK(off_e == 4096 + 2048);
}
}