#endif
-#include <algorithm>
#include <cassert>
-#include <stdio.h>
#include "js_norm/js_enum.h"
#include "js_norm/pdf_tokenizer.h"
#define PUSH(x) yy_push_state(x)
#define POP() yy_pop_state()
-#define YY_USER_ACTION \
- { \
- debug_logf(5, js_trace, TRACE_PDF_PROC, nullptr, \
- "PDF pattern #%d, sc %d\n", yy_act, YY_START); \
- \
- debug_logf(5, js_trace, TRACE_PDF_DUMP, nullptr, \
- "PDF text '%s'\n", YYText()); \
+#define YY_USER_ACTION \
+ { \
+ debug_logf(5, js_trace, TRACE_PDF_PROC, nullptr, \
+ "PDF pattern #%d, sc %d\n", yy_act, YY_START); \
+ debug_logf(5, js_trace, TRACE_PDF_DUMP, nullptr, \
+ "PDF text '%s'\n", YYText()); \
}
-#define EXEC(f) \
- { \
- auto r = (f); \
- if (r) \
- return r; \
+#define EXEC(f) \
+ { \
+ auto r = (f); \
+ if (r) \
+ return r; \
}
%}
/* PDF 32000-1:2008 definitions follow */
/* 7.2.2 Character Set */
-CHARS_WHITESPACE \x00\x09\x0a\x0c\x0d\x20
-CHARS_DELIMITER \(\)\<\>\[\]\{\}\/\%
-GRP_WHITESPACE [\x00\x09\x0a\x0c\x0d\x20]
-GRP_DELIMITER [\(\)\<\>\[\]\{\}\/\%]
-GRP_REGULAR [^\x00\x09\x0a\x0c\x0d\x20\(\)\<\>\[\]\{\}\/\%]
+CHARS_WHITESPACE \x00\x09\x0a\x0c\x0d\x20
+CHARS_DELIMITER \(\)\<\>\[\]\{\}\/\%
+GRP_WHITESPACE [\x00\x09\x0a\x0c\x0d\x20]
+EOL_MARKER \r|\n|\r\n
+GRP_NEWLINE [\x0d\x0a]
+GRP_NOT_NEWLINE [^\x0d\x0a]
+GRP_DELIMITER [\(\)\<\>\[\]\{\}\/\%]
+GRP_REGULAR [^\x00\x09\x0a\x0c\x0d\x20\(\)\<\>\[\]\{\}\/\%]
/* 7.2.3 Comments */
-COMMENT %.*
+COMMENT %{GRP_NOT_NEWLINE}*{EOL_MARKER}
/* 7.3.2 Boolean Objects */
-OBJ_BOOLEAN true|false
+OBJ_BOOLEAN true|false
/* 7.3.3 Numeric Objects */
-OBJ_INT_NUM [+-]?[0-9]{1,64}
-OBJ_REL_NUM [+-]?("."?[0-9]{1,64}|[0-9]{1,64}"."?|[0-9]{1,64}"."?[0-9]{1,64})
+OBJ_INT_NUM [+-]?[0-9]{1,64}
+OBJ_REL_NUM [+-]?("."?[0-9]{1,64}|[0-9]{1,64}"."?|[0-9]{1,64}"."?[0-9]{1,64})
/* 7.3.4 String Objects */
-OBJ_LIT_STR_OPEN "("
-OBJ_LIT_STR_CLOSE ")"
-OBJ_HEX_STR_OPEN "<"
-OBJ_HEX_STR_CLOSE ">"
+OBJ_LIT_STR_OPEN "("
+OBJ_LIT_STR_CLOSE ")"
+OBJ_HEX_STR_OPEN "<"
+OBJ_HEX_STR_CLOSE ">"
/* 7.3.4.2 Literal Strings */
-LIT_STR_ESC \\[^0-7]
-LIT_STR_ESC_OCT \\[0-7]{1}|\\[0-7]{2}|\\[0-7]{3}
-LIT_STR_ESC_EOL \\[\x0d\x0a]|\\\x0d\x0a
-LIT_STR_EOL [\x0d\x0a]|\x0d\x0a
-LIT_STR_BODY [^\\\(\)]{1,64}
+LIT_STR_ESC \\[^0-7]
+LIT_STR_ESC_OCT \\[0-7]{1}|\\[0-7]{2}|\\[0-7]{3}
+LIT_STR_ESC_EOL \\[\x0d\x0a]|\\\x0d\x0a
+LIT_STR_EOL [\x0d\x0a]|\x0d\x0a
+LIT_STR_BODY [^\\\(\)]{1,64}
/* 7.3.4.3 Hexadecimal Strings */
-HEX_STR_BODY [0-9A-Fa-f]{1,64}
-HEX_STR_SKIP [^0-9A-Fa-f>]{1,64}
+HEX_STR_BODY [0-9A-Fa-f]{1,64}
+HEX_STR_SKIP [^0-9A-Fa-f>]{1,64}
/* 7.3.5 Name Objects */
-OBJ_NAME \/{GRP_REGULAR}{1,256}
+OBJ_NAME \/{GRP_REGULAR}{1,256}
/* 7.3.6 Array Objects */
-OBJ_ARRAY_OPEN "["
-OBJ_ARRAY_CLOSE "]"
+OBJ_ARRAY_OPEN "["
+OBJ_ARRAY_CLOSE "]"
/* 7.3.7 Dictionary Objects */
-OBJ_DICT_OPEN "<<"
-OBJ_DICT_CLOSE ">>"
+OBJ_DICT_OPEN "<<"
+OBJ_DICT_CLOSE ">>"
-/* FIXIT: improve bytes consuming */
-OBJ_DICT_SKIP .
+OBJ_DICT_SKIP .|{GRP_NEWLINE}
/* 7.3.8 Stream Objects */
-OBJ_STREAM_OPEN stream$
-OBJ_STREAM_CLOSE ^endstream
+OBJ_STREAM_OPEN stream\r?\n
+OBJ_STREAM_CLOSE {EOL_MARKER}endstream
+OBJ_STREAM_SKIP {GRP_NOT_NEWLINE}{1,64}|{GRP_NEWLINE}
/* 7.3.9 Null Object */
-OBJ_NULL null
+OBJ_NULL null
/* 7.3.10 Indirect Objects */
-INDIRECT_OBJ {OBJ_INT_NUM}{GRP_WHITESPACE}+{OBJ_INT_NUM}{GRP_WHITESPACE}+obj
-RECORD_OBJ {OBJ_INT_NUM}{GRP_WHITESPACE}+{OBJ_INT_NUM}{GRP_WHITESPACE}+R
+INDIRECT_OBJ_OPEN {OBJ_INT_NUM}{GRP_WHITESPACE}+{OBJ_INT_NUM}{GRP_WHITESPACE}+obj
-/* Not dictionary, not strings */
-SKIP [^<\(%]{1,64}
+INDIRECT_OBJ_CLOSE endobj
+
+OBJ_REFERENCE {OBJ_INT_NUM}{GRP_WHITESPACE}+{OBJ_INT_NUM}{GRP_WHITESPACE}+R
+
+
+/* Not object start, not comments */
+SKIP [^[:digit:]%]{1,64}|.
WHITESPACE {GRP_WHITESPACE}{1,64}
/* Start conditions: INITIAL or inside dictionary, literal string, hexadecimal string */
+%x indobj
+%x stream
%x dictnr
%x litstr
%x hexstr
%x jslstr
%x jshstr
+%x jsstream
%%
{SKIP} { }
{COMMENT} { }
-<INITIAL,dictnr>{OBJ_DICT_OPEN} { PUSH(dictnr); EXEC(h_dict_open()) }
+<INITIAL>{INDIRECT_OBJ_OPEN} { PUSH(indobj); h_ind_obj_open(); }
+<indobj>{COMMENT} { }
+<indobj>{WHITESPACE} { }
+<indobj>{INDIRECT_OBJ_CLOSE} { POP(); h_ind_obj_close(); }
+
+<indobj>{OBJ_STREAM_OPEN} { EXEC(h_stream_open()) PUSH(obj_stream.is_js ? jsstream : stream); }
+<stream>{OBJ_STREAM_SKIP} { EXEC(h_stream()) }
+<jsstream>{OBJ_STREAM_SKIP} { EXEC(h_stream()) ECHO; }
+<stream>{OBJ_STREAM_CLOSE} { if (h_stream_close()) POP(); }
+<jsstream>{OBJ_STREAM_CLOSE} { if (h_stream_close()) POP(); }
+
+<dictnr>{OBJ_DICT_OPEN} { PUSH(dictnr); EXEC(h_dict_open()) }
+<indobj>{OBJ_DICT_OPEN} { PUSH(dictnr); EXEC(h_dict_open()) }
<dictnr>{OBJ_DICT_CLOSE} { POP(); EXEC(h_dict_close()) }
<dictnr>{COMMENT} { }
<dictnr>{WHITESPACE} { }
-<dictnr>{RECORD_OBJ} { EXEC(h_dict_other()) }
+<dictnr>{OBJ_REFERENCE} { EXEC(h_dict_other()) h_ref(); }
<dictnr>{OBJ_BOOLEAN} { EXEC(h_dict_other()) }
-<dictnr>{OBJ_INT_NUM} { EXEC(h_dict_other()) }
+<dictnr>{OBJ_INT_NUM} { EXEC(h_dict_other()) h_stream_length(); }
<dictnr>{OBJ_REL_NUM} { EXEC(h_dict_other()) }
<dictnr>{OBJ_NULL} { EXEC(h_dict_other()) }
<dictnr>{OBJ_NAME} { EXEC(h_dict_name()) }
<dictnr>{OBJ_HEX_STR_OPEN} { EXEC(h_dict_other()) if (h_hex_str()) PUSH(jshstr); else PUSH(hexstr); yyless(0); }
<dictnr>{OBJ_DICT_SKIP} { }
-<INITIAL>{OBJ_LIT_STR_OPEN} { if (h_lit_open()) PUSH(litstr); }
+<indobj>{OBJ_LIT_STR_OPEN} { if (h_lit_open()) PUSH(litstr); }
<litstr>{OBJ_LIT_STR_OPEN} { h_lit_open(); }
<litstr>{OBJ_LIT_STR_CLOSE} { if (h_lit_close()) POP(); }
<litstr>{LIT_STR_ESC} { }
<litstr>{LIT_STR_EOL} { }
<litstr>{LIT_STR_BODY} { }
-<INITIAL>{OBJ_HEX_STR_OPEN} { PUSH(hexstr); }
+<indobj>{OBJ_HEX_STR_OPEN} { PUSH(hexstr); }
<hexstr>{OBJ_HEX_STR_CLOSE} { POP(); }
<hexstr>{HEX_STR_BODY} { }
<hexstr>{HEX_STR_SKIP} { }
<jshstr>{HEX_STR_BODY} { EXEC(h_hex_hex2chr()) }
<jshstr>{HEX_STR_SKIP} { }
-<INITIAL,dictnr,litstr,hexstr,jslstr,jshstr><<EOF>> { return PDFRet::EOS; }
+<*><<EOF>> { return PDFRet::EOS; }
+
+<*>.|\n { return PDFRet::UNEXPECTED_SYMBOL; }
%%
debug_logf(6, js_trace, TRACE_PDF_PROC, nullptr,
"dictionary token: other\n");
+ debug_logf(6, js_trace, TRACE_PDF_DUMP, nullptr,
+ "dictionary entry: %s, %s\n", obj_entry.key, yytext);
+
obj_dictionary.key_value = !obj_dictionary.key_value;
return PDFRet::EOS;
return PDFRet::EOS;
}
+PDFTokenizer::PDFRet PDFTokenizer::h_stream_open()
+{
+ if (obj_stream.rem_length < 0)
+ return PDFRet::STREAM_NO_LENGTH;
+
+ debug_logf(6, js_trace, TRACE_PDF_PROC, nullptr,
+ "Starting %s stream, length %d\n", obj_stream.is_js ? "JavaScript" : "skipping", obj_stream.rem_length);
+
+ return PDFRet::EOS;
+}
+
+PDFTokenizer::PDFRet PDFTokenizer::h_stream()
+{
+ obj_stream.rem_length -= yyleng;
+ return PDFRet::EOS;
+}
+
+bool PDFTokenizer::h_stream_close()
+{
+ obj_stream.rem_length -= yyleng;
+ if (obj_stream.rem_length <= 0)
+ {
+ if (YY_START == jsstream)
+ yyout << '\n';
+ return true;
+ }
+
+ if (YY_START == jsstream)
+ ECHO;
+ return false;
+}
+
+void PDFTokenizer::h_stream_length()
+{
+ if (!strcmp(obj_entry.key, "/Length"))
+ obj_stream.rem_length = atoi(yytext);
+}
+
+void PDFTokenizer::h_ref()
+{
+ if (!strcmp(obj_entry.key, "/JS"))
+ js_stream_refs.insert(atoi(yytext));
+}
+
+void PDFTokenizer::h_ind_obj_open()
+{
+ int value = atoi(yytext);
+ if (js_stream_refs.count(value) > 0)
+ obj_stream.is_js = true;
+}
+
PDFTokenizer::PDFTokenizer(std::istream& in, std::ostream& out)
: yyFlexLexer(in, out)
{
PDFTokenizer::PDFRet PDFTokenizer::process()
{
auto r = yylex();
-
- return (PDFRet)r;
+ return static_cast<PDFTokenizer::PDFRet>(r);
}
SECTION("comments")
{
test_pdf_proc(
+ "1 0 obj\n"
"% comment 1\n"
"<</K/V % comment /JS (script 1)\n>>"
- "<</K/V /JS (a % b)>>\n"
- "(% not a comment)\n"
+ "<</K/V % comment\r /JS (script 2; )\n>>"
+ "<</K/V /JS (a % b; )>>\n"
"% comment 2\n"
- "<</JS (; script 2) % comment 3\n>>",
- "a % b; script 2"
+ "<</JS (script 3) % comment 3\n>>"
+ "(% not a comment)\n"
+ "endobj\n",
+ "script 2; a % b; script 3"
);
}
SECTION("escapes in string")
SECTION("not name for key")
{
test_pdf_proc(
+ "1 0 obj"
"<<"
"/K1 /V1"
"[/K2] /V2"
"/K3 /V3"
- ">>",
+ ">>"
+ "endobj",
"", PDFTokenizer::PDFRet::NOT_NAME_IN_DICTIONARY_KEY
);
}
SECTION("literal string as a key")
{
test_pdf_proc(
+ "1 0 obj"
"<<"
"/K1 /V1"
"(foo) /V2"
"/K3 /V3"
- ">>",
+ ">>"
+ "endobj",
"", PDFTokenizer::PDFRet::NOT_NAME_IN_DICTIONARY_KEY
);
}
SECTION("hex string as a key")
{
test_pdf_proc(
+ "1 0 obj"
"<<"
"/K1 /V1"
"<62617a> /V2"
"/K3 /V3"
- ">>",
+ ">>"
+ "endobj",
"", PDFTokenizer::PDFRet::NOT_NAME_IN_DICTIONARY_KEY
);
}
SECTION("incomplete array")
{
test_pdf_proc(
+ "1 0 obj"
"<<"
"/K1 [ /V1 /V2 /V3 "
- ">>",
+ ">>"
+ "endobj",
"", PDFTokenizer::PDFRet::INCOMPLETE_ARRAY_IN_DICTIONARY
);
}
SECTION("no sub-type")
{
test_pdf_proc(
- "<< /JS (script) >>",
+ "1 0 obj\n<< /JS (script) >>",
"script"
);
}
SECTION("no sub-type checks")
{
test_pdf_proc(
- "<< /JS (script) /S /JavaScript >>",
+ "1 0 obj\n<< /JS (script) /S /JavaScript >>",
"script"
);
}
SECTION("no spaces")
{
test_pdf_proc(
- "<</S/JavaScript/JS(script)>>",
+ "1 0 obj\n<</S/JavaScript/JS(script)>>",
"script"
);
}
SECTION("as hex string")
{
test_pdf_proc(
- "<< /JS <62617a> >>",
+ "1 0 obj\n<< /JS <62617a> >>",
"baz"
);
test_pdf_proc(
- "<< /JS <70> >>",
+ "1 0 obj\n<< /JS <70> >>",
"p"
);
test_pdf_proc(
- "<< /JS <7> >>",
+ "1 0 obj\n<< /JS <7> >>",
"p"
);
}
SECTION("prepended with records")
{
test_pdf_proc(
- "<</A 10 0 R /B 11 1 R/S/JavaScript/JS(script)>>",
+ "1 0 obj\n<</A 10 0 R /B 11 1 R/S/JavaScript/JS(script)>>",
"script"
);
}
SECTION("simple text")
{
test_pdf_proc(
- "<</JS"
+ "1 0 obj\n<</JS"
"(var _abc1 = 'Hello World!';)"
">>",
"var _abc1 = 'Hello World!';"
SECTION("balanced parenthesis")
{
test_pdf_proc(
- "<</JS"
+ "1 0 obj\n<</JS"
"(function foo() { console.log(\"Hello world!\") })"
">>",
"function foo() { console.log(\"Hello world!\") }"
SECTION("with escapes")
{
test_pdf_proc(
- "<</JS"
+ "1 0 obj\n<</JS"
"(function bar\\(var x\\)\\r{\\r console.log\\(\"baz\"\\)\\r})"
">>",
"function bar(var x)\r{\r console.log(\"baz\")\r}"
SECTION("all escapes")
{
test_pdf_proc(
- "<</JS"
+ "1 0 obj\n<</JS"
"(() \\n\\r\\t\\b\\f\\(\\)\\\\ \\123 \\A\\B\\C \\x\\y\\z)"
">>",
"() \n\r\t\b\f()\\ \123 ABC xyz"
SECTION("escaped new line")
{
test_pdf_proc(
- "<</JS"
+ "1 0 obj\n<</JS"
"(var str = 'Hello\\\n , \\\r world\\\r\n\t!';)"
">>",
"var str = 'Hello, world!';"
{
test_pdf_proc({
{"% comment", ""},
- {"\n", ""},
+ {"\n1 0 obj\n", ""},
{"<</K/V /JS (a % b)>>\n", "a % b"},
- {"(% not a", ""},
- {"comment)\n", ""},
+ {"endobj\n2 0 obj\n(% not a", ""},
+ {"comment)\nendobj\n3 0 obj\n", ""},
{"<</JS (;", ";"},
{"script 2)", "script 2"},
- {">>", ""},
- {"<</JS(script 3)>>", "script 3"}
+ {">>\nendobj\n4 0 obj\n", ""},
+ {"<</JS(script 3)>>\nendobj", "script 3"}
+ });
+ }
+}
+
+TEST_CASE("stream object", "[PDFTokenizer]")
+{
+ SECTION("zero length")
+ {
+ test_pdf_proc(
+ "1 0 obj\n"
+ "<</S /JavaScript /JS 2 0 R>>\n"
+ "endobj\n"
+ "2 0 obj\n"
+ "<<"
+ "/Length 0"
+ ">>"
+ "stream\n"
+ "\n"
+ "endstream\n"
+ "endobj\n",
+ "\n"
+ );
+ }
+ SECTION("exact length")
+ {
+ test_pdf_proc(
+ "1 0 obj\n"
+ "<</S /JavaScript /JS 2 0 R>>\n"
+ "endobj\n"
+ "2 0 obj\n"
+ "<<"
+ "/Length 6\n"
+ ">>\n"
+ "stream\n"
+ "foobar\n"
+ "endstream\n"
+ "endobj\n",
+ "foobar\n"
+ );
+ }
+ SECTION("carriage return and line feed as EOL")
+ {
+ test_pdf_proc(
+ "1 0 obj\n"
+ "<</S /JavaScript /JS 2 0 R>>\n"
+ "endobj\n"
+ "2 0 obj\n"
+ "<<"
+ "/Length 3\n"
+ ">>"
+ "stream\r\n"
+ "bar\r\n"
+ "endstream\n"
+ "endobj\n",
+ "bar\n"
+ );
+ }
+ SECTION("special symbols in a stream")
+ {
+ test_pdf_proc(
+ "1 0 obj\n"
+ "<</S /JavaScript /JS 2 0 R>>\n"
+ "endobj\n"
+ "2 0 obj\n"
+ "<<"
+ "/Length 13\n"
+ ">>"
+ "stream\n"
+ "\nendstream\n \r\n"
+ "endstream\n"
+ "endobj\n",
+ "\nendstream\n \n"
+ );
+ }
+ SECTION("referenced JavaScript")
+ {
+ test_pdf_proc(
+ "1 0 obj\n"
+ "<</S /JavaScript /JS 2 0 R>>\n"
+ "endobj\n"
+ "2 0 obj\n"
+ "<<"
+ "/Length 9\n"
+ ">>"
+ "stream\n"
+ "var a = 0\n"
+ "endstream\n"
+ "endobj\n",
+ "var a = 0\n"
+ );
+ }
+ SECTION("referenced JavaScript after another stream")
+ {
+ test_pdf_proc(
+ "1 0 obj\n"
+ "<</S /JavaScript /JS 2 0 R>>\n"
+ "endobj\n"
+ "3 0 obj\n"
+ "<</Length 1>>\n"
+ "stream\n"
+ " \n"
+ "endstream\n"
+ "endobj\n"
+ "2 0 obj\n"
+ "<<"
+ "/Length 9\n"
+ ">>"
+ "stream\n"
+ "var a = 0\n"
+ "endstream\n"
+ "endobj\n",
+ "var a = 0\n"
+ );
+ }
+ SECTION("multiple revisions")
+ {
+ test_pdf_proc(
+ "1 0 obj\n"
+ "<</S /JavaScript /JS 2 1 R>>\n"
+ "endobj\n"
+ "2 0 obj\n"
+ "<</Length 13>>\n"
+ "stream\n"
+ "//revision 1\n\n"
+ "endstream\n"
+ "endobj\n"
+ "2 1 obj\n"
+ "<</Length 13>>\n"
+ "stream\n"
+ "//revision 2\n\n"
+ "endstream\n"
+ "endobj\n",
+ "//revision 1\n\n"
+ "//revision 2\n\n"
+ );
+ }
+}
+
+TEST_CASE("stream object malformed", "[PDFTokenizer]")
+{
+ SECTION("no dictionary")
+ {
+ test_pdf_proc(
+ "1 0 obj\n"
+ "<</S /JavaScript /JS 2 0 R>>\n"
+ "endobj\n"
+ "2 0 obj\n"
+ "stream\n"
+ "foo\n"
+ "endstream\n"
+ "endobj\n",
+ "", PDFTokenizer::PDFRet::STREAM_NO_LENGTH
+ );
+ }
+ SECTION("a direct stream")
+ {
+ test_pdf_proc(
+ "1 0 obj\n"
+ "<</S /JavaScript /JS 2 0 R>>\n"
+ "endobj\n"
+ "<<"
+ "/Length 3"
+ ">>\n"
+ "stream\n"
+ "foo\n"
+ "endstream\n",
+ "", PDFTokenizer::PDFRet::EOS
+ );
+ }
+ SECTION("an indirect dictionary")
+ {
+ test_pdf_proc(
+ "1 0 obj\n"
+ "<</S /JavaScript /JS 3 0 R>>\n"
+ "endobj\n"
+ "2 0 obj\n"
+ "<<"
+ "/Length 3"
+ ">>\n"
+ "endobj\n"
+ "3 0 obj\n"
+ "2 0 R\n"
+ "stream\n"
+ "foo\n"
+ "endstream\n"
+ "endobj\n",
+ "", PDFTokenizer::PDFRet::UNEXPECTED_SYMBOL
+ );
+ }
+ SECTION("no length")
+ {
+ test_pdf_proc(
+ "1 0 obj\n"
+ "<</S /JavaScript /JS 2 0 R>>\n"
+ "endobj\n"
+ "2 0 obj\n"
+ "<<"
+ "/Creator (Acrobat Pro DC 22.1.20169)"
+ ">>\n"
+ "stream\n"
+ "foo\n"
+ "endstream\n"
+ "endobj\n",
+ "", PDFTokenizer::PDFRet::STREAM_NO_LENGTH
+ );
+ }
+ SECTION("length less")
+ {
+ test_pdf_proc(
+ "1 0 obj\n"
+ "<</S /JavaScript /JS 2 0 R>>\n"
+ "endobj\n"
+ "2 0 obj\n"
+ "<<"
+ "/Length 2"
+ ">>\n"
+ "stream\n"
+ "foo\n"
+ "endstream\n"
+ "endobj\n",
+ "foo\n", PDFTokenizer::PDFRet::EOS
+ );
+ }
+ SECTION("length greater within a few bytes")
+ {
+ test_pdf_proc(
+ "1 0 obj\n"
+ "<</S /JavaScript /JS 2 0 R>>\n"
+ "endobj\n"
+ "2 0 obj\n"
+ "<<"
+ "/Length 4"
+ ">>\n"
+ "stream\n"
+ "foo\n"
+ "endstream\n"
+ "endobj\n",
+ "foo\n", PDFTokenizer::PDFRet::EOS
+ // note that '\n' in expected is not extracted from source data.
+ // preprocessor does not extract exactly "/Length" bytes, and as long
+ // as length is greater by no more than a few bytes stream will be read
+ // correctly up to endstream marker.
+ );
+ }
+ SECTION("length greater")
+ {
+ test_pdf_proc(
+ "1 0 obj\n"
+ "<</S /JavaScript /JS 2 0 R>>\n"
+ "endobj\n"
+ "2 0 obj\n"
+ "<<"
+ "/Length 100"
+ ">>\n"
+ "stream\n"
+ "foo\n"
+ "endstream\n"
+ "endobj\n",
+ "foo\n"
+ "endstream\n"
+ "endobj\n", PDFTokenizer::PDFRet::EOS
+ );
+ }
+ SECTION("carriage return following the keyword stream")
+ {
+ test_pdf_proc(
+ "1 0 obj\n"
+ "<</S /JavaScript /JS 2 0 R>>\n"
+ "endobj\n"
+ "2 0 obj\n"
+ "<<"
+ "/Length 3"
+ ">>\n"
+ "stream\r"
+ "foo\r"
+ "endstream\n"
+ "endobj\n",
+ "", PDFTokenizer::PDFRet::UNEXPECTED_SYMBOL
+ );
+ }
+ SECTION("no end-off-line marker specified")
+ {
+ test_pdf_proc(
+ "1 0 obj\n"
+ "<</S /JavaScript /JS 2 0 R>>\n"
+ "endobj\n"
+ "2 0 obj\n"
+ "<<"
+ "/Length 3"
+ ">>\n"
+ "stream"
+ "foo"
+ "endstream\n"
+ "endobj\n",
+ "", PDFTokenizer::PDFRet::UNEXPECTED_SYMBOL
+ );
+ }
+ SECTION("no end-off-line marker in stream data")
+ {
+ test_pdf_proc(
+ "1 0 obj\n"
+ "<</S /JavaScript /JS 2 0 R>>\n"
+ "endobj\n"
+ "2 0 obj\n"
+ "<<"
+ "/Length 3"
+ ">>\n"
+ "stream\n"
+ "foo"
+ "endstream\n"
+ "endobj\n",
+ "fooendstream\n"
+ "endobj\n", PDFTokenizer::PDFRet::EOS
+ );
+ }
+}
+
+TEST_CASE("stream object over PDU", "[PDFTokenizer]")
+{
+ SECTION("split inside non-JS stream")
+ {
+ test_pdf_proc({
+ {
+ "10 0 obj\n"
+ "<</Length 6>>\n"
+ "stream\n"
+ "foo",
+ ""
+ },
+ {
+ "bar\n"
+ "endstream\n"
+ "endobj\n",
+ ""
+ }
+ });
+ }
+ SECTION("split inside JavaScript stream")
+ {
+ test_pdf_proc({
+ {
+ "1 0 obj\n"
+ "<</JS 10 0 R>>\n"
+ "endobj\n"
+ "10 0 obj\n"
+ "<</Length 6>>\n"
+ "stream\n"
+ "foo",
+ "foo"
+ },
+ {
+ "bar\n"
+ "endstream\n"
+ "endobj\n",
+ "bar\n"
+ }
+ });
+ }
+ SECTION("split between reference and stream obj")
+ {
+ test_pdf_proc({
+ {
+ "1 0 obj\n"
+ "<</JS 10 0 R>>\n"
+ "endobj\n",
+ ""
+ },
+ {
+ "10 0 obj\n"
+ "<</Length 6>>\n"
+ "stream\n"
+ "foobar\n"
+ "endstream\n"
+ "endobj\n",
+ "foobar\n"
+ }
+ });
+ }
+ SECTION("split between dictionary and stream")
+ {
+ test_pdf_proc({
+ {
+ "1 0 obj\n"
+ "<</JS 10 0 R>>\n"
+ "endobj\n"
+ "10 0 obj\n"
+ "<</Length 6>>\n",
+ ""
+ },
+ {
+ "stream\n"
+ "foobar\n"
+ "endstream\n"
+ "endobj\n",
+ "foobar\n"
+ }
});
}
}