]>
Commit | Line | Data |
---|---|---|
ef416fc2 | 1 | //======================================================================== |
2 | // | |
3 | // Parser.cc | |
4 | // | |
5 | // Copyright 1996-2003 Glyph & Cog, LLC | |
6 | // | |
7 | //======================================================================== | |
8 | ||
9 | #include <config.h> | |
10 | ||
11 | #ifdef USE_GCC_PRAGMAS | |
12 | #pragma implementation | |
13 | #endif | |
14 | ||
15 | #include <stddef.h> | |
16 | #include "Object.h" | |
17 | #include "Array.h" | |
18 | #include "Dict.h" | |
19 | #include "Parser.h" | |
20 | #include "XRef.h" | |
21 | #include "Error.h" | |
22 | #include "Decrypt.h" | |
23 | ||
24 | Parser::Parser(XRef *xrefA, Lexer *lexerA) { | |
25 | xref = xrefA; | |
26 | lexer = lexerA; | |
27 | inlineImg = 0; | |
28 | lexer->getObj(&buf1); | |
29 | lexer->getObj(&buf2); | |
30 | } | |
31 | ||
32 | Parser::~Parser() { | |
33 | buf1.free(); | |
34 | buf2.free(); | |
35 | delete lexer; | |
36 | } | |
37 | ||
38 | Object *Parser::getObj(Object *obj, | |
39 | Guchar *fileKey, int keyLength, | |
40 | int objNum, int objGen) { | |
41 | char *key; | |
42 | Stream *str; | |
43 | Object obj2; | |
44 | int num; | |
45 | Decrypt *decrypt; | |
46 | GString *s; | |
47 | char *p; | |
48 | int i; | |
49 | ||
50 | // refill buffer after inline image data | |
51 | if (inlineImg == 2) { | |
52 | buf1.free(); | |
53 | buf2.free(); | |
54 | lexer->getObj(&buf1); | |
55 | lexer->getObj(&buf2); | |
56 | inlineImg = 0; | |
57 | } | |
58 | ||
59 | // array | |
60 | if (buf1.isCmd("[")) { | |
61 | shift(); | |
62 | obj->initArray(xref); | |
63 | while (!buf1.isCmd("]") && !buf1.isEOF()) | |
64 | obj->arrayAdd(getObj(&obj2, fileKey, keyLength, objNum, objGen)); | |
65 | if (buf1.isEOF()) | |
66 | error(getPos(), "End of file inside array"); | |
67 | shift(); | |
68 | ||
69 | // dictionary or stream | |
70 | } else if (buf1.isCmd("<<")) { | |
71 | shift(); | |
72 | obj->initDict(xref); | |
73 | while (!buf1.isCmd(">>") && !buf1.isEOF()) { | |
74 | if (!buf1.isName()) { | |
75 | error(getPos(), "Dictionary key must be a name object"); | |
76 | shift(); | |
77 | } else { | |
78 | key = copyString(buf1.getName()); | |
79 | shift(); | |
80 | if (buf1.isEOF() || buf1.isError()) { | |
81 | gfree(key); | |
82 | break; | |
83 | } | |
84 | obj->dictAdd(key, getObj(&obj2, fileKey, keyLength, objNum, objGen)); | |
85 | } | |
86 | } | |
87 | if (buf1.isEOF()) | |
88 | error(getPos(), "End of file inside dictionary"); | |
89 | if (buf2.isCmd("stream")) { | |
90 | if ((str = makeStream(obj))) { | |
91 | obj->initStream(str); | |
92 | if (fileKey) { | |
93 | str->getBaseStream()->doDecryption(fileKey, keyLength, | |
94 | objNum, objGen); | |
95 | } | |
96 | } else { | |
97 | obj->free(); | |
98 | obj->initError(); | |
99 | } | |
100 | } else { | |
101 | shift(); | |
102 | } | |
103 | ||
104 | // indirect reference or integer | |
105 | } else if (buf1.isInt()) { | |
106 | num = buf1.getInt(); | |
107 | shift(); | |
108 | if (buf1.isInt() && buf2.isCmd("R")) { | |
109 | obj->initRef(num, buf1.getInt()); | |
110 | shift(); | |
111 | shift(); | |
112 | } else { | |
113 | obj->initInt(num); | |
114 | } | |
115 | ||
116 | // string | |
117 | } else if (buf1.isString() && fileKey) { | |
118 | buf1.copy(obj); | |
119 | s = obj->getString(); | |
120 | decrypt = new Decrypt(fileKey, keyLength, objNum, objGen); | |
121 | for (i = 0, p = obj->getString()->getCString(); | |
122 | i < s->getLength(); | |
123 | ++i, ++p) { | |
124 | *p = decrypt->decryptByte(*p); | |
125 | } | |
126 | delete decrypt; | |
127 | shift(); | |
128 | ||
129 | // simple object | |
130 | } else { | |
131 | buf1.copy(obj); | |
132 | shift(); | |
133 | } | |
134 | ||
135 | return obj; | |
136 | } | |
137 | ||
138 | Stream *Parser::makeStream(Object *dict) { | |
139 | Object obj; | |
140 | BaseStream *baseStr; | |
141 | Stream *str; | |
142 | Guint pos, endPos, length; | |
143 | ||
144 | // get stream start position | |
145 | lexer->skipToNextLine(); | |
146 | pos = lexer->getPos(); | |
147 | ||
148 | // get length | |
149 | dict->dictLookup("Length", &obj); | |
150 | if (obj.isInt()) { | |
151 | length = (Guint)obj.getInt(); | |
152 | obj.free(); | |
153 | } else { | |
154 | error(getPos(), "Bad 'Length' attribute in stream"); | |
155 | obj.free(); | |
156 | return NULL; | |
157 | } | |
158 | ||
159 | // check for length in damaged file | |
160 | if (xref && xref->getStreamEnd(pos, &endPos)) { | |
161 | length = endPos - pos; | |
162 | } | |
163 | ||
164 | // in badly damaged PDF files, we can run off the end of the input | |
165 | // stream immediately after the "stream" token | |
166 | if (!lexer->getStream()) { | |
167 | return NULL; | |
168 | } | |
169 | baseStr = lexer->getStream()->getBaseStream(); | |
170 | ||
171 | // skip over stream data | |
172 | lexer->setPos(pos + length); | |
173 | ||
174 | // refill token buffers and check for 'endstream' | |
175 | shift(); // kill '>>' | |
176 | shift(); // kill 'stream' | |
177 | if (buf1.isCmd("endstream")) { | |
178 | shift(); | |
179 | } else { | |
180 | error(getPos(), "Missing 'endstream'"); | |
181 | // kludge for broken PDF files: just add 5k to the length, and | |
182 | // hope its enough | |
183 | length += 5000; | |
184 | } | |
185 | ||
186 | // make base stream | |
187 | str = baseStr->makeSubStream(pos, gTrue, length, dict); | |
188 | ||
189 | // get filters | |
190 | str = str->addFilters(dict); | |
191 | ||
192 | return str; | |
193 | } | |
194 | ||
195 | void Parser::shift() { | |
196 | if (inlineImg > 0) { | |
197 | if (inlineImg < 2) { | |
198 | ++inlineImg; | |
199 | } else { | |
200 | // in a damaged content stream, if 'ID' shows up in the middle | |
201 | // of a dictionary, we need to reset | |
202 | inlineImg = 0; | |
203 | } | |
204 | } else if (buf2.isCmd("ID")) { | |
205 | lexer->skipChar(); // skip char after 'ID' command | |
206 | inlineImg = 1; | |
207 | } | |
208 | buf1.free(); | |
209 | buf1 = buf2; | |
210 | if (inlineImg > 0) // don't buffer inline image data | |
211 | buf2.initNull(); | |
212 | else | |
213 | lexer->getObj(&buf2); | |
214 | } |