}
buf_pdf_in.pubsetbuf(nullptr, 0)
+ ->pubsetbuf(state_buf, state_len)
->pubsetbuf(const_cast<char*>((const char*)src_ptr), src_end - src_ptr);
pdf_out.clear();
delete[] buf_pdf_out.take_data();
PDFJSNorm(JSNormConfig* cfg, uint32_t gen_id) :
JSNorm(cfg, false, gen_id),
pdf_in(&buf_pdf_in), pdf_out(&buf_pdf_out),
- extractor(pdf_in, pdf_out, cfg ? cfg->pdf_max_dictionary_depth : 0)
+ extractor(pdf_in, pdf_out, state_buf, state_len, cfg ? cfg->pdf_max_dictionary_depth : 0)
{ }
+ virtual ~PDFJSNorm() override
+ { delete[] state_buf; }
+
protected:
bool pre_proc() override;
bool post_proc(int) override;
private:
+ char* state_buf = nullptr;
+ int state_len = 0;
snort::istreambuf_glue buf_pdf_in;
snort::ostreambuf_infl buf_pdf_out;
std::istream pdf_in;
};
PDFTokenizer() = delete;
- explicit PDFTokenizer(std::istream& in, std::ostream& out, int dictionaries_max_size);
+ explicit PDFTokenizer(std::istream& in, std::ostream& out, char*& state_buf, int& state_len, int dictionaries_max_size);
~PDFTokenizer() override;
PDFRet process();
private:
int yylex() override;
+ void state_add(int len);
+ void state_store();
+ void state_clear();
+ void state_act();
+
PDFRet h_dict_open();
PDFRet h_dict_close();
PDFRet h_dict_name();
+ PDFRet h_dict_number();
PDFRet h_dict_other();
inline bool h_lit_str();
inline bool h_hex_str();
PDFRet h_hex_hex2chr_u16();
PDFRet h_lit_u16();
PDFRet h_lit_u16_unescape();
- PDFRet h_stream_open();
- PDFRet h_stream();
PDFRet h_array_nesting();
+ PDFRet h_stream_open();
+ void h_stream();
+ void h_stream_part_close();
+ PDFRet h_stream_dump_remainder();
+ PDFRet h_stream_dump_remainder_u16();
bool h_stream_close();
void h_stream_length();
void h_ref();
struct ObjectDictionary
{
void clear()
- { key_value = true; array_level = 0; }
+ { key_value = true; consecutive_number = false; array_level = 0; }
bool key_value = true;
+ bool consecutive_number = false;
int array_level = 0;
};
struct Stream
{
int rem_length = -1;
+ int endstream_part = 0;
bool is_js = false;
bool is_ref_len = false;
};
+ char*& state_buf;
+ int& state_len;
+ bool state_added = false;
+
ObjectString obj_string;
ObjectArray obj_array;
std::stack<ObjectDictionary> dictionaries;
#define YY_USER_ACTION \
{ \
+ state_act(); \
debug_logf(5, js_trace, TRACE_PDF_PROC, nullptr, \
"PDF pattern #%d, sc %d\n", yy_act, YY_START); \
debug_logf(5, js_trace, TRACE_PDF_DUMP, nullptr, \
/* 7.3.2 Boolean Objects */
OBJ_BOOLEAN true|false
+OBJ_PARTIAL_BOOL t|tr|tru|f|fa|fal|fals
/* 7.3.3 Numeric Objects */
OBJ_INT_NUM [+-]?[0-9]{1,16}
OBJ_DICT_SKIP .|{GRP_NEWLINE}
/* 7.3.8 Stream Objects */
-OBJ_STREAM_OPEN stream\r?\n
-OBJ_STREAM_CLOSE {EOL_MARKER}endstream
-OBJ_STREAM_SKIP {GRP_NOT_NEWLINE}{1,16}|{GRP_NEWLINE}
+OBJ_STREAM_OPEN stream\r?\n
+OBJ_STREAM_PARTIAL_OPEN s|st|str|stre|strea|stream|stream\r
+OBJ_STREAM_CLOSE endstream
+OBJ_STREAM_PARTIAL_CLOSE e|en|end|ends|endst|endstr|endstre|endstrea
+OBJ_STREAM_SKIP [^e]{1,16}
/* 7.3.9 Null Object */
OBJ_NULL null
+OBJ_PARTIAL_NULL n|nu|nul
/* 7.3.10 Indirect Objects */
-INDIRECT_OBJ_OPEN {OBJ_INT_NUM}{GRP_WHITESPACE}+{OBJ_INT_NUM}{GRP_WHITESPACE}+obj
+INDIRECT_OBJ_OPEN {OBJ_INT_NUM}{GRP_WHITESPACE}+{OBJ_INT_NUM}{GRP_WHITESPACE}+obj
+INDIRECT_OBJ_PARTIAL_OPEN {OBJ_INT_NUM}{GRP_WHITESPACE}*{OBJ_INT_NUM}?{GRP_WHITESPACE}*(o|ob)
INDIRECT_OBJ_CLOSE endobj
+INDIRECT_OBJ_PARTIAL_CLOSE e|en|end|endo|endob
OBJ_REFERENCE {OBJ_INT_NUM}{GRP_WHITESPACE}+{OBJ_INT_NUM}{GRP_WHITESPACE}+R
<comment>{COMMENT_END} { POP(); }
<INITIAL>{INDIRECT_OBJ_OPEN} { PUSH(indobj); h_ind_obj_open(); }
+<INITIAL>{INDIRECT_OBJ_PARTIAL_OPEN} { state_add(yyleng); }
<indobj>{WHITESPACE} { }
<indobj>{INDIRECT_OBJ_CLOSE} { POP(); h_ind_obj_close(); EXEC(h_array_nesting()); }
+<indobj>{INDIRECT_OBJ_PARTIAL_CLOSE} { state_add(yyleng); }
<indobj>{OBJ_ARRAY_OPEN} { PUSH(array); ++obj_array.nesting_level; }
<indobj>{OBJ_REFERENCE} { indirect_obj.ref_met = true; }
<indobj>{OBJ_BOOLEAN} { }
<array>{OBJ_NULL} { }
<array>{OBJ_NAME} { }
<array>{OBJ_LIT_STR_OPEN} { if (h_lit_open()) PUSH(litstr); }
-<array>{OBJ_HEX_STR_OPEN} { PUSH(hexstr); }
+<array>{OBJ_HEX_STR_OPEN} { PUSH(hexstr); state_add(yyleng); }
<array>{OBJ_ARRAY_SKIP} { }
<array>{INDIRECT_OBJ_CLOSE} { return PDFRet::UNEXPECTED_SYMBOL; }
<indobj>{OBJ_STREAM_OPEN} { EXEC(h_stream_open()); PUSH(obj_stream.is_js ? u16 : stream); }
-<stream>{OBJ_STREAM_SKIP} { EXEC(h_stream()); }
-<jsstream>{OBJ_STREAM_SKIP} { EXEC(h_stream()); ECHO; }
-<jsstreamu16>{OBJ_STREAM_SKIP} { EXEC(h_stream()); EXEC(h_lit_u16()); }
+<indobj>{OBJ_STREAM_PARTIAL_OPEN} { state_add(yyleng); }
<stream>{OBJ_STREAM_CLOSE} { if (h_stream_close()) POP(); }
+<stream>{OBJ_STREAM_PARTIAL_CLOSE} { EXEC(h_stream_dump_remainder()); h_stream_part_close(); state_add(yyleng); }
<jsstream>{OBJ_STREAM_CLOSE} { if (h_stream_close()) POP(); }
+<jsstream>{OBJ_STREAM_PARTIAL_CLOSE} { EXEC(h_stream_dump_remainder()); h_stream_part_close(); state_add(yyleng); }
<jsstreamu16>{OBJ_STREAM_CLOSE} { if (h_stream_close()) POP(); }
+<jsstreamu16>{OBJ_STREAM_PARTIAL_CLOSE} { EXEC(h_stream_dump_remainder_u16()); h_stream_part_close(); state_add(yyleng); }
+<stream>{OBJ_STREAM_SKIP} { EXEC(h_stream_dump_remainder()); h_stream(); }
+<jsstream>{OBJ_STREAM_SKIP} { EXEC(h_stream_dump_remainder()); h_stream(); ECHO; }
+<jsstreamu16>{OBJ_STREAM_SKIP} { EXEC(h_stream_dump_remainder_u16()); h_stream(); EXEC(h_lit_u16()); }
<dictnr>{OBJ_DICT_OPEN} { PUSH(dictnr); EXEC(h_dict_open()); }
<indobj>{OBJ_DICT_OPEN} { PUSH(dictnr); EXEC(h_dict_open()); }
<array>{OBJ_DICT_OPEN} { PUSH(dictnr); EXEC(h_dict_open()); }
<array>{OBJ_DICT_CLOSE} { return PDFRet::INCORRECT_BRACKETS_NESTING; }
<dictnr>{OBJ_DICT_CLOSE} { POP(); EXEC(h_dict_close()); }
-<dictnr>{WHITESPACE} { }
-<dictnr>{OBJ_REFERENCE} { EXEC(h_dict_other()); h_ref(); }
+<dictnr>{WHITESPACE} { state_add(yyleng); }
+<dictnr>{OBJ_REFERENCE} { dictionaries.top().consecutive_number = false; EXEC(h_dict_other()); h_ref(); }
<dictnr>{OBJ_BOOLEAN} { EXEC(h_dict_other()); }
-<dictnr>{OBJ_INT_NUM} { EXEC(h_dict_other()); h_stream_length(); }
-<dictnr>{OBJ_REL_NUM} { EXEC(h_dict_other()); }
+<dictnr>{OBJ_PARTIAL_BOOL} { state_add(yyleng); }
+<dictnr>{OBJ_INT_NUM} { EXEC(h_dict_number()); h_stream_length(); state_add(yyleng); }
+<dictnr>{OBJ_REL_NUM} { EXEC(h_dict_number()); state_add(yyleng); }
<dictnr>{OBJ_NULL} { EXEC(h_dict_other()); }
-<dictnr>{OBJ_NAME} { EXEC(h_dict_name()); }
+<dictnr>{OBJ_PARTIAL_NULL} { state_add(yyleng); }
+<dictnr>{OBJ_NAME} { EXEC(h_dict_name()); state_add(yyleng); }
<dictnr>{OBJ_ARRAY_OPEN} { PUSH(array); ++obj_array.nesting_level; EXEC(h_dict_other()); }
<dictnr>{OBJ_ARRAY_CLOSE} { return PDFRet::INCORRECT_BRACKETS_NESTING; }
<dictnr>{OBJ_LIT_STR_OPEN} { EXEC(h_dict_other()); if (h_lit_str()) PUSH(jslstr); else PUSH(litstr); yyless(0); }
-<dictnr>{OBJ_HEX_STR_OPEN} { EXEC(h_dict_other()); if (h_hex_str()) PUSH(jshstr); else PUSH(hexstr); yyless(0); }
-<dictnr>{OBJ_DICT_SKIP} { }
+<dictnr>{OBJ_HEX_STR_OPEN} { EXEC(h_dict_other()); if (h_hex_str()) PUSH(jshstr); else PUSH(hexstr); yyless(0); state_add(yyleng); }
+<dictnr>{OBJ_DICT_SKIP} { state_add(yyleng); }
<dictnr>{INDIRECT_OBJ_CLOSE} { return PDFRet::UNEXPECTED_SYMBOL; }
<indobj>{OBJ_LIT_STR_OPEN} { if (h_lit_open()) PUSH(litstr); }
<litstr>{LIT_STR_EOL} { }
<litstr>{LIT_STR_BODY} { }
-<indobj>{OBJ_HEX_STR_OPEN} { PUSH(hexstr); }
+<indobj>{OBJ_HEX_STR_OPEN} { state_add(yyleng); PUSH(hexstr); }
<hexstr>{OBJ_HEX_STR_CLOSE} { POP(); }
<hexstr>{HEX_STR_BODY} { }
<hexstr>{HEX_STR_SKIP} { }
<jshstru16>{HEX_STR_BODY} { EXEC(h_hex_hex2chr_u16()); }
<jshstr,jshstru16>{HEX_STR_SKIP} { }
-<*><<EOF>> { return PDFRet::EOS; }
+<*><<EOF>> { state_store(); return PDFRet::EOS; }
-{SKIP} { }
+{SKIP} { state_add(yyleng); }
<*>.|\n { return PDFRet::UNEXPECTED_SYMBOL; }
%%
if (dictionaries.top().array_level != obj_array.nesting_level)
return PDFRet::EOS;
+ if (dictionaries.top().consecutive_number)
+ {
+ dictionaries.top().consecutive_number = false;
+ dictionaries.top().key_value = !dictionaries.top().key_value;
+ }
+
if (dictionaries.top().key_value)
return PDFRet::NOT_NAME_IN_DICTIONARY_KEY;
return PDFRet::EOS;
}
+PDFTokenizer::PDFRet PDFTokenizer::h_dict_number()
+{
+ if(!dictionaries.top().consecutive_number)
+ state_clear();
+
+ if (dictionaries.top().key_value)
+ return PDFRet::NOT_NAME_IN_DICTIONARY_KEY;
+
+ debug_logf(6, js_trace, TRACE_PDF_PROC, nullptr,
+ "dictionary token: number\n");
+
+ debug_logf(6, js_trace, TRACE_PDF_DUMP, nullptr,
+ "dictionary entry: %s, %s\n", obj_entry.key, yytext);
+
+ dictionaries.top().consecutive_number = true;
+
+ return PDFRet::EOS;
+}
+
PDFTokenizer::PDFRet PDFTokenizer::h_dict_name()
{
if (dictionaries.top().array_level != obj_array.nesting_level)
return PDFRet::EOS;
+ if (dictionaries.top().consecutive_number)
+ {
+ dictionaries.top().consecutive_number = false;
+ dictionaries.top().key_value = !dictionaries.top().key_value;
+ }
+
if (dictionaries.top().key_value)
strncpy(obj_entry.key, yytext, sizeof(obj_entry.key) - 1);
return PDFRet::EOS;
}
-PDFTokenizer::PDFRet PDFTokenizer::h_stream()
+void PDFTokenizer::h_stream()
{
obj_stream.rem_length -= yyleng;
+}
+
+void PDFTokenizer::h_stream_part_close()
+{
+ obj_stream.endstream_part = yyleng;
+}
+
+static const char endstream_tag[] = "endstream";
+
+PDFTokenizer::PDFRet PDFTokenizer::h_stream_dump_remainder()
+{
+ int part = obj_stream.endstream_part;
+ obj_stream.endstream_part = 0;
+ obj_stream.rem_length -= part;
+ if (YY_START == jsstream)
+ for(const char* c = endstream_tag; c < endstream_tag + part; c++)
+ yyout << *c;
+ return PDFRet::EOS;
+}
+
+PDFTokenizer::PDFRet PDFTokenizer::h_stream_dump_remainder_u16()
+{
+ int part = obj_stream.endstream_part;
+ obj_stream.endstream_part = 0;
+ obj_stream.rem_length -= part;
+ for(const char* c = endstream_tag; c < endstream_tag + part; c++)
+ EXEC(u16_eval(*c));
return PDFRet::EOS;
}
obj_stream.rem_length -= yyleng;
if (obj_stream.rem_length <= 0)
- {
- if (YY_START == jsstream)
- yyout << '\n';
return true;
- }
if (YY_START == jsstream)
ECHO;
default:
assert(false);
}
+ u16_state.cur_byte = 0;
}
void PDFTokenizer::h_u16_break()
assert(YY_START == jshstr);
POP();
PUSH(jshstru16);
+ u16_state.cur_byte = 0;
}
void PDFTokenizer::h_u16_hex_break()
yyout << out;
}
-PDFTokenizer::PDFTokenizer(std::istream& in, std::ostream& out, int dictionaries_max_size)
- : yyFlexLexer(in, out), dictionaries_max_size(dictionaries_max_size)
+PDFTokenizer::PDFTokenizer(std::istream& in, std::ostream& out, char*& state_buf, int& state_len, int dictionaries_max_size)
+ : yyFlexLexer(in, out), state_buf(state_buf), state_len(state_len), dictionaries_max_size(dictionaries_max_size)
{
dictionaries.push(ObjectDictionary());
}
return r;
}
+
+void PDFTokenizer::state_add(int len)
+{
+ state_len += len;
+ state_added = true;
+}
+
+void PDFTokenizer::state_store()
+{
+ state_act();
+
+ if (state_len == 0)
+ return;
+
+ if (YY_START == hexstr)
+ POP();
+
+ if (!dictionaries.top().key_value and !dictionaries.top().consecutive_number)
+ dictionaries.top().key_value = true;
+
+ obj_stream.endstream_part = 0;
+
+ char* buf = new char[state_len];
+
+ yyin.seekg(-state_len, std::ios_base::end);
+ yyin.clear();
+ yyin.read(buf, state_len);
+
+ debug_logf(6, js_trace, TRACE_PDF_DUMP, nullptr,
+ "storing %d bytes for reassembly: \"%.*s\"\n",state_len,state_len,buf);
+
+ delete[] state_buf;
+ state_buf = buf;
+}
+
+void PDFTokenizer::state_clear()
+{
+ state_len = 0;
+}
+
+void PDFTokenizer::state_act()
+{
+ if (state_added)
+ state_added = false;
+ else
+ state_clear();
+}
add_catch_test( pdf_tokenizer_test
SOURCES
${pdf_tokenizer_OUTPUTS}
+ ${CMAKE_SOURCE_DIR}/src/helpers/streambuf.cc
js_test_stubs.cc
)
using namespace snort;
using namespace std;
+static constexpr int nesting_level = 10;
+
static const string make_input(const char* begin, const char* mid, const char* end, size_t len)
{
string str(begin);
ostreambuf_infl buf_out;
istream in(&buf_in);
ostream out(&buf_out);
- PDFTokenizer parser(in, out);
+ char* buf = nullptr;
+ int len;
+ PDFTokenizer parser(in, out, buf, len, nesting_level);
BENCHMARK("memcpy()")
{
rewind();
return parser.process();
};
+
+ delete[] buf;
}
TEST_CASE("PDF Parser, literals by 64 K", "[PDFTokenizer]")
ostreambuf_infl buf_out;
istream in(&buf_in);
ostream out(&buf_out);
- PDFTokenizer parser(in, out);
+ char* buf = nullptr;
+ int len;
+ PDFTokenizer parser(in, out, buf, len, nesting_level);
BENCHMARK("memcpy()")
{
rewind();
return parser.process();
};
+
+ delete[] buf;
}
TEST_CASE("PDF Tokenizer, indirect objects", "[PDFTokenizer]")
ostreambuf_infl buf_out;
istream in(&buf_in);
ostream out(&buf_out);
- PDFTokenizer parser(in, out);
+ char* buf = nullptr;
+ int len;
+ PDFTokenizer parser(in, out, buf, len, nesting_level);
set_input(data);
BENCHMARK("same object repeated")
rewind();
return parser.process();
};
+
+ delete[] buf;
}
#endif
#include <FlexLexer.h>
#include "catch/catch.hpp"
+#include "helpers/streambuf.h"
#include "js_norm/pdf_tokenizer.h"
using namespace jsn;
{
istringstream in(source);
ostringstream out;
- PDFTokenizer extractor(in, out, nesting_level);
+ char* buf = nullptr;
+ int len = 0;
+ PDFTokenizer extractor(in, out, buf, len, nesting_level);
auto r = extractor.process();
+ delete[] buf;
+
CHECK(ret == r);
CHECK(expected == out.str());
}
static void test_pdf_proc(const vector<Chunk>& chunks)
{
- istringstream in;
+ snort::istreambuf_glue in_buf;
+ istream in(&in_buf);
ostringstream out;
- PDFTokenizer extractor(in, out, nesting_level);
+ char* state_buf = nullptr;
+ int state_len = 0;
+ PDFTokenizer extractor(in, out, state_buf, state_len, nesting_level);
for (const auto& chunk : chunks)
{
auto src = chunk.first;
auto exp = chunk.second;
- in.str(src);
+ in_buf.pubsetbuf(nullptr,0)
+ ->pubsetbuf(state_buf, state_len)
+ ->pubsetbuf(const_cast<char*>(src.c_str()), src.length());
out.str("");
auto r = extractor.process();
CHECK(PDFTokenizer::PDFRet::EOS == r);
CHECK(exp == out.str());
}
+
+ delete[] state_buf;
}
TEST_CASE("basic", "[PDFTokenizer]")
{
- SECTION("no input")
- {
- test_pdf_proc(
- "",
- ""
- );
- }
SECTION("minimal PDF")
{
test_pdf_proc(
"", PDFTokenizer::PDFRet::NOT_NAME_IN_DICTIONARY_KEY
);
}
+ SECTION("number as a key")
+ {
+ test_pdf_proc(
+ "1 0 obj"
+ "<<"
+ "/K1 /V1"
+ "1234 /V2"
+ "/JS (foo)"
+ ">>"
+ "endobj",
+ "", PDFTokenizer::PDFRet::NOT_NAME_IN_DICTIONARY_KEY
+ );
+ }
+ SECTION("number as a key after a value")
+ {
+ test_pdf_proc(
+ "1 0 obj"
+ "<<"
+ "/K1 null "
+ "1234 (bar)"
+ "/JS (foo)"
+ ">>"
+ "endobj",
+ "", PDFTokenizer::PDFRet::NOT_NAME_IN_DICTIONARY_KEY
+ );
+ }
+ SECTION("value as a key after a number")
+ {
+ test_pdf_proc(
+ "1 0 obj"
+ "<<"
+ "/K1 1234 "
+ "null (bar) "
+ "/JS (foo)"
+ ">>"
+ "endobj",
+ "", PDFTokenizer::PDFRet::NOT_NAME_IN_DICTIONARY_KEY
+ );
+ }
SECTION("token too long")
{
test_pdf_proc(
TEST_CASE("split", "[PDFTokenizer]")
{
- SECTION("no input")
- {
- test_pdf_proc({
- {"", ""},
- {"", ""},
- {"", ""}
- });
- }
+
SECTION("minimal PDF")
{
test_pdf_proc({
{"<</JS(script 3)>>\nendobj", "script 3"}
});
}
+
+ SECTION("split in indirect object index: first number")
+ {
+ test_pdf_proc({
+ {"\n1", ""},
+ {"23 0 obj\n", ""},
+ {"<< /JS (a % b) >>\nendobj\n", "a % b"},
+ });
+ }
+ SECTION("split in indirect object index: between numbers")
+ {
+ test_pdf_proc({
+ {"\n1", ""},
+ {" 0 obj\n", ""},
+ {"<< /JS (a % b) >>\nendobj\n", "a % b"},
+ });
+ }
+ SECTION("split in indirect object index: second number")
+ {
+ test_pdf_proc({
+ {"\n1 12", ""},
+ {"3 obj\n", ""},
+ {"<< /JS (a % b) >>\nendobj\n", "a % b"},
+ });
+ }
+ SECTION("split in indirect object index: after numbers")
+ {
+ test_pdf_proc({
+ {"\n1 0", ""},
+ {" obj\n", ""},
+ {"<< /JS (a % b) >>\nendobj\n", "a % b"},
+ });
+ }
+ SECTION("split in indirect object index: in keyword")
+ {
+ test_pdf_proc({
+ {"\n1 0 ob", ""},
+ {"j\n", ""},
+ {"<< /JS (a % b) >>\nendobj\n", "a % b"},
+ });
+ }
+ SECTION("split in indirect object index: multi-PDU reassembly")
+ {
+ test_pdf_proc({
+ {"\n1 ", ""},
+ {"0 ob", ""},
+ {"j\n<< /JS (a % b) >>\nendobj\n", "a % b"},
+ });
+ }
+ SECTION("split in indirect object close keyword")
+ {
+ test_pdf_proc({
+ {"\n1 0 obj\n", ""},
+ {"<< /K /V /JS (foo % bar) >>\nend", "foo % bar"},
+ {"obj\n", ""},
+ {"2 0 obj\n<</JS (c % d)>>\nendobj\n","c % d"}
+ });
+ }
+ SECTION("split in dictionary opening brackets")
+ {
+ test_pdf_proc({
+ {"\n1 0 obj\n", ""},
+ {"<", ""},
+ {"< /JS (a % b) >>\nendobj\n", "a % b"},
+ });
+ }
+ SECTION("split in dictionary closing brackets")
+ {
+ test_pdf_proc({
+ {"\n1 0 obj\n", ""},
+ {"<< /K /V /JS (foo % bar) >", "foo % bar"},
+ {">\nendobj\n", ""},
+ {"2 0 obj\n<</JS (c % d)>>\nendobj\n","c % d"}
+ });
+ }
+ SECTION("split in name as dict-key")
+ {
+ test_pdf_proc({
+ {"\n1 0 obj\n", ""},
+ {"<</K/V /J", ""},
+ {"S (a % b)>>\nendobj\n", "a % b"}
+ });
+ }
+ SECTION("split in name as dict-value")
+ {
+ test_pdf_proc({
+ {"\n1 0 obj\n", ""},
+ {"<</K/V /foo /foo", ""},
+ {"bar /JS (a % b)>>\nendobj\n", "a % b"}
+ });
+ }
+ SECTION("split after name as dict-value")
+ {
+ test_pdf_proc({
+ {"\n1 0 obj\n", ""},
+ {"<< /K /V ", ""},
+ {"/JS (a % b) >>\nendobj\n", "a % b"}
+ });
+ }
+ SECTION("split in object reference value: first number")
+ {
+ test_pdf_proc({
+ {"\n1 0 obj\n", ""},
+ {"<</K 12", ""},
+ {"3 0 R /JS (a % b)>>\nendobj\n", "a % b"}
+ });
+ }
+ SECTION("split in object reference value: second number")
+ {
+ test_pdf_proc({
+ {"\n1 0 obj\n", ""},
+ {"<</K 123 1", ""},
+ {"0 R /JS (a % b)>>\nendobj\n", "a % b"}
+ });
+ }
+ SECTION("split in integer number value")
+ {
+ test_pdf_proc({
+ {"\n1 0 obj\n", ""},
+ {"<</K 12", ""},
+ {"345 /JS (a % b)>>\nendobj\n", "a % b"}
+ });
+ }
+ SECTION("split in integer number value after sign")
+ {
+ test_pdf_proc({
+ {"\n1 0 obj\n", ""},
+ {"<</K +", ""},
+ {"1 /JS (a % b)>>\nendobj\n", "a % b"}
+ });
+ }
+ SECTION("split in real number value before the dot")
+ {
+ test_pdf_proc({
+ {"\n1 0 obj\n", ""},
+ {"<</K 123 0", ""},
+ {".5 /JS (a % b)>>\nendobj\n", "a % b"}
+ });
+ }
+ SECTION("split in real number value after the dot")
+ {
+ test_pdf_proc({
+ {"\n1 0 obj\n", ""},
+ {"<</K 123 0.", ""},
+ {"5 /JS (a % b)>>\nendobj\n", "a % b"}
+ });
+ }
+ SECTION("split in boolean value")
+ {
+ test_pdf_proc({
+ {"\n1 0 obj\n", ""},
+ {"<</K tr", ""},
+ {"ue /JS (a % b)>>\nendobj\n", "a % b"}
+ });
+ }
+ SECTION("split in null value")
+ {
+ test_pdf_proc({
+ {"\n1 0 obj\n", ""},
+ {"<</K nu", ""},
+ {"ll /JS (a % b)>>\nendobj\n", "a % b"}
+ });
+ }
+ SECTION("split in object reference to stream")
+ {
+ test_pdf_proc({
+ {"\n1 0 obj\n", ""},
+ {"<</K /V /JS 2 ", ""},
+ {" 0 R >>\nendobj\n", ""},
+ {
+ "2 0 obj\n"
+ "<< /Length 20 >>\n"
+ "stream\n"
+ "JavaScript in stream\n"
+ "endstream\n"
+ "endobj\n",
+ "JavaScript in stream\n"
+ }
+ });
+ }
+ SECTION("split in hex string")
+ {
+ test_pdf_proc({
+ {"\n1 0 obj\n", ""},
+ {"<< /JS <", ""},
+ {"66 6F 6F 20 62 61 72> >>\nendobj\n", "foo bar"},
+ });
+ }
+ SECTION("split in stream: length key")
+ {
+ test_pdf_proc({
+ {
+ "\n1 0 obj\n"
+ "<</K /V /JS 2 0 R>>\n"
+ "endobj\n"
+ "2 0 obj\n"
+ "<< /Leng",
+ ""
+ },
+ {
+ "th 20 >>\n"
+ "stream\n"
+ "JavaScript in stream\n"
+ "endstream\n"
+ "endobj\n",
+ "JavaScript in stream\n"
+ }
+ });
+ }
+ SECTION("split in stream: length value")
+ {
+ test_pdf_proc({
+ {
+ "\n1 0 obj\n"
+ "<</K /V /JS 2 0 R>>\n"
+ "endobj\n"
+ "2 0 obj\n"
+ "<< /Length 2",
+ ""
+ },
+ {
+ "0 >>\n"
+ "stream\n"
+ "JavaScript in stream\n"
+ "endstream\n"
+ "endobj\n",
+ "JavaScript in stream\n"
+ }
+ });
+ }
+ SECTION("split in stream: stream keyword")
+ {
+ test_pdf_proc({
+ {
+ "\n1 0 obj\n"
+ "<</K /V /JS 2 0 R>>\n"
+ "endobj\n"
+ "2 0 obj\n"
+ "<< /Length 20 >>\n st",
+ ""
+ },
+ {
+ "ream\n"
+ "JavaScript in stream\n"
+ "endstream\n"
+ "endobj\n",
+ "JavaScript in stream\n"
+ }
+ });
+ }
+ SECTION("split in stream: content")
+ {
+ test_pdf_proc({
+ {
+ "\n1 0 obj\n"
+ "<</K /V /JS 2 0 R>>\n"
+ "endobj\n"
+ "2 0 obj\n"
+ "<< /Length 20 >>\n stream\n"
+ "JavaScript",
+ "JavaScript"
+ },
+ {
+ " in stream\n"
+ "endstream\n"
+ "endobj\n",
+ " in stream\n"
+ }
+ });
+ }
+ SECTION("split in stream: content that looks like endstream")
+ {
+ test_pdf_proc({
+ {
+ "\n1 0 obj\n"
+ "<</K /V /JS 2 0 R>>\n"
+ "endobj\n"
+ "2 0 obj\n"
+ "<< /Length 23 >>\n stream\n"
+ "end",
+ ""
+ },
+ {
+ "stream in JavaScript\n"
+ "endstream\n"
+ "endobj\n",
+ "endstream in JavaScript\n"
+ }
+ });
+ }
+ SECTION("split in stream: endstream keyword")
+ {
+ test_pdf_proc({
+ {
+ "\n1 0 obj\n"
+ "<</K /V /JS 2 0 R>>\n"
+ "endobj\n"
+ "2 0 obj\n"
+ "<< /Length 20 >>\n stream\n"
+ "JavaScript in stream\n"
+ "end",
+ "JavaScript in stream\n"
+ },
+ {
+ "stream\n"
+ "endobj\n"
+ "\n3 0 obj\n"
+ "<</K /V /JS (foo)>>\n"
+ "endobj\n",
+ "foo"
+ }
+ });
+ }
}
TEST_CASE("stream object", "[PDFTokenizer]")
"bar\r\n"
"endstream\n"
"endobj\n",
- "bar\n"
+ "bar\r\n"
);
}
SECTION("reference as length")
"\nendstream\n \r\n"
"endstream\n"
"endobj\n",
- "\nendstream\n \n"
+ "\nendstream\n \r\n"
);
}
SECTION("referenced JavaScript")
"foo"
"endstream\n"
"endobj\n",
- "fooendstream\n"
- "endobj\n", PDFTokenizer::PDFRet::EOS
+ "foo", PDFTokenizer::PDFRet::EOS
);
}
}
"foo"s
);
}
+ SECTION("stream with 'endstream' content")
+ {
+ test_pdf_proc(
+ "1 0 obj\n"
+ "<<"
+ "/S /JavaScript"
+ "/JS 2 0 R"
+ ">>\n"
+ "endobj\n"
+ "2 0 obj\n"
+ "<</Length 8>>\n"
+ "stream\n"
+ "\xfe\xff\0e\0n\0d\0s\0t\0r\0e\0a\0m\n"
+ "endstream\n"
+ "endobj"s,
+ "endstream"s
+ );
+ }
SECTION("hexadecimal string")
{
test_pdf_proc(
PDFTokenizer::PDFRet::UNEXPECTED_SYMBOL
);
}
+ SECTION("unfinished trailing symbol")
+ {
+ test_pdf_proc(
+ "1 0 obj\n"
+ "<<"
+ "/S /JavaScript"
+ "/JS (\xfe\xff\0f\0o\0o\xF0)"
+ "/JS (\xfe\xff\0b\0a\0r\xBA)"s,
+ "foobar"s
+ );
+ }
}
TEST_CASE("UTF-16, cross-PDU", "[PDFTokenizer]")
}
});
}
+ SECTION("split in stream that looks like endstream")
+ {
+ test_pdf_proc({
+ {
+ "1 0 obj\n"
+ "<</S/JavaScript/JS 2 0 R>>\n"
+ "endobj\n"
+ "2 0 obj\n"
+ "<</Length 14>>\n"
+ "stream\n"
+ "\xfe\xff\0f\0o\0o\0e"s,
+ "foo"s
+ },
+ {
+ "\0n\0d\n"
+ "endstream\n"
+ "endobj"s,
+ "end"s
+ }
+ });
+ }
+ SECTION("split in endstream tag")
+ {
+ test_pdf_proc({
+ {
+ "1 0 obj\n"
+ "<</S/JavaScript/JS 2 0 R>>\n"
+ "endobj\n"
+ "2 0 obj\n"
+ "<</Length 14>>\n"
+ "stream\n"
+ "\xfe\xff\0f\0o\0o\nend"s,
+ "foo"s
+ },
+ {
+ "stream\n"
+ "endobj\n"
+ "3 0 obj\n"
+ "<</S/JavaScript/JS(foo)>>\n"
+ "endobj\n"s,
+ "foo"s
+ }
+ });
+ }
}