]>
Commit | Line | Data |
---|---|---|
ef416fc2 | 1 | //======================================================================== |
2 | // | |
3 | // Catalog.cc | |
4 | // | |
5 | // Copyright 1996-2003 Glyph & Cog, LLC | |
6 | // | |
7 | //======================================================================== | |
8 | ||
9 | #include <config.h> | |
10 | ||
11 | #ifdef USE_GCC_PRAGMAS | |
12 | #pragma implementation | |
13 | #endif | |
14 | ||
15 | #include <stddef.h> | |
16 | #include "gmem.h" | |
17 | #include "Object.h" | |
18 | #include "XRef.h" | |
19 | #include "Array.h" | |
20 | #include "Dict.h" | |
21 | #include "Page.h" | |
22 | #include "Error.h" | |
23 | #include "Link.h" | |
24 | #include "Catalog.h" | |
25 | ||
7594b224 | 26 | // This define is used to limit the depth of recursive readPageTree calls |
27 | // This is needed because the page tree nodes can reference their parents | |
28 | // leaving us in an infinite loop | |
29 | // Most sane pdf documents don't have a call depth higher than 10 | |
30 | #define MAX_CALL_DEPTH 1000 | |
31 | ||
ef416fc2 | 32 | //------------------------------------------------------------------------ |
33 | // Catalog | |
34 | //------------------------------------------------------------------------ | |
35 | ||
36 | Catalog::Catalog(XRef *xrefA) { | |
37 | Object catDict, pagesDict; | |
38 | Object obj, obj2; | |
39 | int numPages0; | |
40 | int i; | |
41 | ||
42 | ok = gTrue; | |
43 | xref = xrefA; | |
44 | pages = NULL; | |
45 | pageRefs = NULL; | |
46 | numPages = pagesSize = 0; | |
47 | baseURI = NULL; | |
48 | ||
49 | xref->getCatalog(&catDict); | |
50 | if (!catDict.isDict()) { | |
51 | error(-1, "Catalog object is wrong type (%s)", catDict.getTypeName()); | |
52 | goto err1; | |
53 | } | |
54 | ||
55 | // read page tree | |
56 | catDict.dictLookup("Pages", &pagesDict); | |
57 | // This should really be isDict("Pages"), but I've seen at least one | |
58 | // PDF file where the /Type entry is missing. | |
59 | if (!pagesDict.isDict()) { | |
60 | error(-1, "Top-level pages object is wrong type (%s)", | |
61 | pagesDict.getTypeName()); | |
62 | goto err2; | |
63 | } | |
64 | pagesDict.dictLookup("Count", &obj); | |
65 | // some PDF files actually use real numbers here ("/Count 9.0") | |
66 | if (!obj.isNum()) { | |
67 | error(-1, "Page count in top-level pages object is wrong type (%s)", | |
68 | obj.getTypeName()); | |
69 | goto err3; | |
70 | } | |
71 | pagesSize = numPages0 = (int)obj.getNum(); | |
72 | obj.free(); | |
73 | pages = (Page **)gmallocn(pagesSize, sizeof(Page *)); | |
74 | pageRefs = (Ref *)gmallocn(pagesSize, sizeof(Ref)); | |
75 | for (i = 0; i < pagesSize; ++i) { | |
76 | pages[i] = NULL; | |
77 | pageRefs[i].num = -1; | |
78 | pageRefs[i].gen = -1; | |
79 | } | |
7594b224 | 80 | numPages = readPageTree(pagesDict.getDict(), NULL, 0, 0); |
ef416fc2 | 81 | if (numPages != numPages0) { |
82 | error(-1, "Page count in top-level pages object is incorrect"); | |
83 | } | |
84 | pagesDict.free(); | |
85 | ||
86 | // read named destination dictionary | |
87 | catDict.dictLookup("Dests", &dests); | |
88 | ||
89 | // read root of named destination tree | |
90 | if (catDict.dictLookup("Names", &obj)->isDict()) | |
91 | obj.dictLookup("Dests", &nameTree); | |
92 | else | |
93 | nameTree.initNull(); | |
94 | obj.free(); | |
95 | ||
96 | // read base URI | |
97 | if (catDict.dictLookup("URI", &obj)->isDict()) { | |
98 | if (obj.dictLookup("Base", &obj2)->isString()) { | |
99 | baseURI = obj2.getString()->copy(); | |
100 | } | |
101 | obj2.free(); | |
102 | } | |
103 | obj.free(); | |
104 | ||
105 | // get the metadata stream | |
106 | catDict.dictLookup("Metadata", &metadata); | |
107 | ||
108 | // get the structure tree root | |
109 | catDict.dictLookup("StructTreeRoot", &structTreeRoot); | |
110 | ||
111 | // get the outline dictionary | |
112 | catDict.dictLookup("Outlines", &outline); | |
113 | ||
114 | // get the AcroForm dictionary | |
115 | catDict.dictLookup("AcroForm", &acroForm); | |
116 | ||
117 | catDict.free(); | |
118 | return; | |
119 | ||
120 | err3: | |
121 | obj.free(); | |
122 | err2: | |
123 | pagesDict.free(); | |
124 | err1: | |
125 | catDict.free(); | |
126 | dests.initNull(); | |
127 | nameTree.initNull(); | |
128 | ok = gFalse; | |
129 | } | |
130 | ||
131 | Catalog::~Catalog() { | |
132 | int i; | |
133 | ||
134 | if (pages) { | |
135 | for (i = 0; i < pagesSize; ++i) { | |
136 | if (pages[i]) { | |
137 | delete pages[i]; | |
138 | } | |
139 | } | |
140 | gfree(pages); | |
141 | gfree(pageRefs); | |
142 | } | |
143 | dests.free(); | |
144 | nameTree.free(); | |
145 | if (baseURI) { | |
146 | delete baseURI; | |
147 | } | |
148 | metadata.free(); | |
149 | structTreeRoot.free(); | |
150 | outline.free(); | |
151 | acroForm.free(); | |
152 | } | |
153 | ||
154 | GString *Catalog::readMetadata() { | |
155 | GString *s; | |
156 | Dict *dict; | |
157 | Object obj; | |
158 | int c; | |
159 | ||
160 | if (!metadata.isStream()) { | |
161 | return NULL; | |
162 | } | |
163 | dict = metadata.streamGetDict(); | |
164 | if (!dict->lookup("Subtype", &obj)->isName("XML")) { | |
165 | error(-1, "Unknown Metadata type: '%s'", | |
166 | obj.isName() ? obj.getName() : "???"); | |
167 | } | |
168 | obj.free(); | |
169 | s = new GString(); | |
170 | metadata.streamReset(); | |
171 | while ((c = metadata.streamGetChar()) != EOF) { | |
172 | s->append(c); | |
173 | } | |
174 | metadata.streamClose(); | |
175 | return s; | |
176 | } | |
177 | ||
7594b224 | 178 | int Catalog::readPageTree(Dict *pagesDict, PageAttrs *attrs, int start, int callDepth) { |
ef416fc2 | 179 | Object kids; |
180 | Object kid; | |
181 | Object kidRef; | |
182 | PageAttrs *attrs1, *attrs2; | |
183 | Page *page; | |
184 | int i, j; | |
185 | ||
186 | attrs1 = new PageAttrs(attrs, pagesDict); | |
187 | pagesDict->lookup("Kids", &kids); | |
188 | if (!kids.isArray()) { | |
189 | error(-1, "Kids object (page %d) is wrong type (%s)", | |
190 | start+1, kids.getTypeName()); | |
191 | goto err1; | |
192 | } | |
193 | for (i = 0; i < kids.arrayGetLength(); ++i) { | |
194 | kids.arrayGet(i, &kid); | |
195 | if (kid.isDict("Page")) { | |
196 | attrs2 = new PageAttrs(attrs1, kid.getDict()); | |
197 | page = new Page(xref, start+1, kid.getDict(), attrs2); | |
198 | if (!page->isOk()) { | |
199 | ++start; | |
200 | goto err3; | |
201 | } | |
202 | if (start >= pagesSize) { | |
203 | pagesSize += 32; | |
204 | pages = (Page **)greallocn(pages, pagesSize, sizeof(Page *)); | |
205 | pageRefs = (Ref *)greallocn(pageRefs, pagesSize, sizeof(Ref)); | |
206 | for (j = pagesSize - 32; j < pagesSize; ++j) { | |
207 | pages[j] = NULL; | |
208 | pageRefs[j].num = -1; | |
209 | pageRefs[j].gen = -1; | |
210 | } | |
211 | } | |
212 | pages[start] = page; | |
213 | kids.arrayGetNF(i, &kidRef); | |
214 | if (kidRef.isRef()) { | |
215 | pageRefs[start].num = kidRef.getRefNum(); | |
216 | pageRefs[start].gen = kidRef.getRefGen(); | |
217 | } | |
218 | kidRef.free(); | |
219 | ++start; | |
220 | // This should really be isDict("Pages"), but I've seen at least one | |
221 | // PDF file where the /Type entry is missing. | |
222 | } else if (kid.isDict()) { | |
7594b224 | 223 | if (callDepth > MAX_CALL_DEPTH) { |
224 | error(-1, "Limit of %d recursive calls reached while reading the page tree. If your document is correct and not a test to try to force a crash, please report a bug.", MAX_CALL_DEPTH); | |
225 | } else { | |
226 | if ((start = readPageTree(kid.getDict(), attrs1, start, callDepth + 1)) | |
227 | < 0) | |
228 | goto err2; | |
229 | } | |
ef416fc2 | 230 | } else { |
231 | error(-1, "Kid object (page %d) is wrong type (%s)", | |
232 | start+1, kid.getTypeName()); | |
233 | } | |
234 | kid.free(); | |
235 | } | |
236 | delete attrs1; | |
237 | kids.free(); | |
238 | return start; | |
239 | ||
240 | err3: | |
241 | delete page; | |
242 | err2: | |
243 | kid.free(); | |
244 | err1: | |
245 | kids.free(); | |
246 | delete attrs1; | |
247 | ok = gFalse; | |
248 | return -1; | |
249 | } | |
250 | ||
251 | int Catalog::findPage(int num, int gen) { | |
252 | int i; | |
253 | ||
254 | for (i = 0; i < numPages; ++i) { | |
255 | if (pageRefs[i].num == num && pageRefs[i].gen == gen) | |
256 | return i + 1; | |
257 | } | |
258 | return 0; | |
259 | } | |
260 | ||
261 | LinkDest *Catalog::findDest(GString *name) { | |
262 | LinkDest *dest; | |
263 | Object obj1, obj2; | |
264 | GBool found; | |
265 | ||
266 | // try named destination dictionary then name tree | |
267 | found = gFalse; | |
268 | if (dests.isDict()) { | |
269 | if (!dests.dictLookup(name->getCString(), &obj1)->isNull()) | |
270 | found = gTrue; | |
271 | else | |
272 | obj1.free(); | |
273 | } | |
274 | if (!found && nameTree.isDict()) { | |
275 | if (!findDestInTree(&nameTree, name, &obj1)->isNull()) | |
276 | found = gTrue; | |
277 | else | |
278 | obj1.free(); | |
279 | } | |
280 | if (!found) | |
281 | return NULL; | |
282 | ||
283 | // construct LinkDest | |
284 | dest = NULL; | |
285 | if (obj1.isArray()) { | |
286 | dest = new LinkDest(obj1.getArray()); | |
287 | } else if (obj1.isDict()) { | |
288 | if (obj1.dictLookup("D", &obj2)->isArray()) | |
289 | dest = new LinkDest(obj2.getArray()); | |
290 | else | |
291 | error(-1, "Bad named destination value"); | |
292 | obj2.free(); | |
293 | } else { | |
294 | error(-1, "Bad named destination value"); | |
295 | } | |
296 | obj1.free(); | |
297 | if (dest && !dest->isOk()) { | |
298 | delete dest; | |
299 | dest = NULL; | |
300 | } | |
301 | ||
302 | return dest; | |
303 | } | |
304 | ||
305 | Object *Catalog::findDestInTree(Object *tree, GString *name, Object *obj) { | |
306 | Object names, name1; | |
307 | Object kids, kid, limits, low, high; | |
308 | GBool done, found; | |
309 | int cmp, i; | |
310 | ||
311 | // leaf node | |
312 | if (tree->dictLookup("Names", &names)->isArray()) { | |
313 | done = found = gFalse; | |
314 | for (i = 0; !done && i < names.arrayGetLength(); i += 2) { | |
315 | if (names.arrayGet(i, &name1)->isString()) { | |
316 | cmp = name->cmp(name1.getString()); | |
317 | if (cmp == 0) { | |
318 | names.arrayGet(i+1, obj); | |
319 | found = gTrue; | |
320 | done = gTrue; | |
321 | } else if (cmp < 0) { | |
322 | done = gTrue; | |
323 | } | |
324 | } | |
325 | name1.free(); | |
326 | } | |
327 | names.free(); | |
328 | if (!found) | |
329 | obj->initNull(); | |
330 | return obj; | |
331 | } | |
332 | names.free(); | |
333 | ||
334 | // root or intermediate node | |
335 | done = gFalse; | |
336 | if (tree->dictLookup("Kids", &kids)->isArray()) { | |
337 | for (i = 0; !done && i < kids.arrayGetLength(); ++i) { | |
338 | if (kids.arrayGet(i, &kid)->isDict()) { | |
339 | if (kid.dictLookup("Limits", &limits)->isArray()) { | |
340 | if (limits.arrayGet(0, &low)->isString() && | |
341 | name->cmp(low.getString()) >= 0) { | |
342 | if (limits.arrayGet(1, &high)->isString() && | |
343 | name->cmp(high.getString()) <= 0) { | |
344 | findDestInTree(&kid, name, obj); | |
345 | done = gTrue; | |
346 | } | |
347 | high.free(); | |
348 | } | |
349 | low.free(); | |
350 | } | |
351 | limits.free(); | |
352 | } | |
353 | kid.free(); | |
354 | } | |
355 | } | |
356 | kids.free(); | |
357 | ||
358 | // name was outside of ranges of all kids | |
359 | if (!done) | |
360 | obj->initNull(); | |
361 | ||
362 | return obj; | |
363 | } |