]>
Commit | Line | Data |
---|---|---|
9c72faab | 1 | //======================================================================== |
2 | // | |
3 | // XRef.cc | |
4 | // | |
5 | // Copyright 1996 Derek B. Noonburg | |
6 | // | |
7 | //======================================================================== | |
8 | ||
9 | #ifdef __GNUC__ | |
10 | #pragma implementation | |
11 | #endif | |
12 | ||
13 | #include <stdlib.h> | |
14 | #include <stddef.h> | |
15 | #include <string.h> | |
16 | #include <ctype.h> | |
17 | #include "gmem.h" | |
18 | #include "Object.h" | |
19 | #include "Stream.h" | |
20 | #include "Lexer.h" | |
21 | #include "Parser.h" | |
22 | #include "Dict.h" | |
52118ca3 | 23 | #ifndef NO_DECRYPTION |
24 | #include "Decrypt.h" | |
25 | #endif | |
9c72faab | 26 | #include "Error.h" |
27 | #include "XRef.h" | |
28 | ||
29 | //------------------------------------------------------------------------ | |
30 | ||
31 | #define xrefSearchSize 1024 // read this many bytes at end of file | |
32 | // to look for 'startxref' | |
33 | ||
52118ca3 | 34 | #ifndef NO_DECRYPTION |
35 | //------------------------------------------------------------------------ | |
36 | // Permission bits | |
37 | //------------------------------------------------------------------------ | |
38 | ||
39 | #define permPrint (1<<2) | |
40 | #define permChange (1<<3) | |
41 | #define permCopy (1<<4) | |
42 | #define permNotes (1<<5) | |
43 | #define defPermFlags 0xfffc | |
44 | #endif | |
45 | ||
9c72faab | 46 | //------------------------------------------------------------------------ |
47 | // The global xref table | |
48 | //------------------------------------------------------------------------ | |
49 | ||
50 | XRef *xref = NULL; | |
51 | ||
52 | //------------------------------------------------------------------------ | |
53 | // XRef | |
54 | //------------------------------------------------------------------------ | |
55 | ||
52118ca3 | 56 | XRef::XRef(BaseStream *str, GString *userPassword) { |
9c72faab | 57 | XRef *oldXref; |
58 | int pos; | |
59 | int i; | |
60 | ||
61 | ok = gTrue; | |
62 | size = 0; | |
63 | entries = NULL; | |
b5cb0608 | 64 | streamEnds = NULL; |
65 | streamEndsLen = 0; | |
9c72faab | 66 | |
67 | // get rid of old xref (otherwise it will try to fetch the Root object | |
68 | // in the new document, using the old xref) | |
69 | oldXref = xref; | |
70 | xref = NULL; | |
71 | ||
72 | // read the trailer | |
52118ca3 | 73 | this->str = str; |
9c72faab | 74 | start = str->getStart(); |
52118ca3 | 75 | pos = readTrailer(); |
9c72faab | 76 | |
77 | // if there was a problem with the trailer, | |
78 | // try to reconstruct the xref table | |
79 | if (pos == 0) { | |
52118ca3 | 80 | if (!(ok = constructXRef())) { |
9c72faab | 81 | xref = oldXref; |
82 | return; | |
83 | } | |
84 | ||
85 | // trailer is ok - read the xref table | |
86 | } else { | |
87 | entries = (XRefEntry *)gmalloc(size * sizeof(XRefEntry)); | |
88 | for (i = 0; i < size; ++i) { | |
89 | entries[i].offset = -1; | |
90 | entries[i].used = gFalse; | |
91 | } | |
52118ca3 | 92 | while (readXRef(&pos)) ; |
9c72faab | 93 | |
94 | // if there was a problem with the xref table, | |
95 | // try to reconstruct it | |
96 | if (!ok) { | |
97 | gfree(entries); | |
98 | size = 0; | |
99 | entries = NULL; | |
52118ca3 | 100 | if (!(ok = constructXRef())) { |
9c72faab | 101 | xref = oldXref; |
102 | return; | |
103 | } | |
104 | } | |
105 | } | |
106 | ||
107 | // set up new xref table | |
108 | xref = this; | |
109 | ||
110 | // check for encryption | |
52118ca3 | 111 | #ifndef NO_DECRYPTION |
112 | encrypted = gFalse; | |
113 | #endif | |
114 | if (checkEncrypted(userPassword)) { | |
9c72faab | 115 | ok = gFalse; |
116 | xref = oldXref; | |
117 | return; | |
118 | } | |
119 | } | |
120 | ||
121 | XRef::~XRef() { | |
122 | gfree(entries); | |
123 | trailerDict.free(); | |
b5cb0608 | 124 | if (streamEnds) { |
125 | gfree(streamEnds); | |
126 | } | |
9c72faab | 127 | } |
128 | ||
129 | // Read startxref position, xref table size, and root. Returns | |
130 | // first xref position. | |
52118ca3 | 131 | int XRef::readTrailer() { |
9c72faab | 132 | Parser *parser; |
133 | Object obj; | |
134 | char buf[xrefSearchSize+1]; | |
135 | int n, pos, pos1; | |
136 | char *p; | |
137 | int c; | |
138 | int i; | |
139 | ||
140 | // read last xrefSearchSize bytes | |
141 | str->setPos(-xrefSearchSize); | |
142 | for (n = 0; n < xrefSearchSize; ++n) { | |
143 | if ((c = str->getChar()) == EOF) | |
144 | break; | |
145 | buf[n] = c; | |
146 | } | |
147 | buf[n] = '\0'; | |
148 | ||
149 | // find startxref | |
150 | for (i = n - 9; i >= 0; --i) { | |
151 | if (!strncmp(&buf[i], "startxref", 9)) | |
152 | break; | |
153 | } | |
154 | if (i < 0) | |
155 | return 0; | |
156 | for (p = &buf[i+9]; isspace(*p); ++p) ; | |
52118ca3 | 157 | pos = lastXRefPos = atoi(p); |
9c72faab | 158 | |
159 | // find trailer dict by looking after first xref table | |
160 | // (NB: we can't just use the trailer dict at the end of the file -- | |
161 | // this won't work for linearized files.) | |
162 | str->setPos(start + pos); | |
163 | for (i = 0; i < 4; ++i) | |
164 | buf[i] = str->getChar(); | |
165 | if (strncmp(buf, "xref", 4)) | |
166 | return 0; | |
167 | pos1 = pos + 4; | |
168 | while (1) { | |
169 | str->setPos(start + pos1); | |
170 | for (i = 0; i < 35; ++i) { | |
171 | if ((c = str->getChar()) == EOF) | |
172 | return 0; | |
173 | buf[i] = c; | |
174 | } | |
175 | if (!strncmp(buf, "trailer", 7)) | |
176 | break; | |
177 | p = buf; | |
178 | while (isspace(*p)) ++p; | |
179 | while ('0' <= *p && *p <= '9') ++p; | |
180 | while (isspace(*p)) ++p; | |
181 | n = atoi(p); | |
182 | while ('0' <= *p && *p <= '9') ++p; | |
183 | while (isspace(*p)) ++p; | |
184 | if (p == buf) | |
185 | return 0; | |
186 | pos1 += (p - buf) + n * 20; | |
187 | } | |
188 | pos1 += 7; | |
189 | ||
190 | // read trailer dict | |
191 | obj.initNull(); | |
52118ca3 | 192 | parser = new Parser(new Lexer(str->makeSubStream(start + pos1, -1, &obj))); |
9c72faab | 193 | parser->getObj(&trailerDict); |
194 | if (trailerDict.isDict()) { | |
195 | trailerDict.dictLookupNF("Size", &obj); | |
196 | if (obj.isInt()) | |
197 | size = obj.getInt(); | |
198 | else | |
199 | pos = 0; | |
200 | obj.free(); | |
201 | trailerDict.dictLookupNF("Root", &obj); | |
202 | if (obj.isRef()) { | |
203 | rootNum = obj.getRefNum(); | |
204 | rootGen = obj.getRefGen(); | |
205 | } else { | |
206 | pos = 0; | |
207 | } | |
208 | obj.free(); | |
209 | } else { | |
210 | pos = 0; | |
211 | } | |
212 | delete parser; | |
213 | ||
214 | // return first xref position | |
215 | return pos; | |
216 | } | |
217 | ||
218 | // Read an xref table and the prev pointer from the trailer. | |
52118ca3 | 219 | GBool XRef::readXRef(int *pos) { |
9c72faab | 220 | Parser *parser; |
221 | Object obj, obj2; | |
222 | char s[20]; | |
223 | GBool more; | |
224 | int first, n, i, j; | |
225 | int c; | |
226 | ||
227 | // seek to xref in stream | |
228 | str->setPos(start + *pos); | |
229 | ||
230 | // make sure it's an xref table | |
231 | while ((c = str->getChar()) != EOF && isspace(c)) ; | |
232 | s[0] = (char)c; | |
233 | s[1] = (char)str->getChar(); | |
234 | s[2] = (char)str->getChar(); | |
235 | s[3] = (char)str->getChar(); | |
236 | if (!(s[0] == 'x' && s[1] == 'r' && s[2] == 'e' && s[3] == 'f')) | |
237 | goto err2; | |
238 | ||
239 | // read xref | |
240 | while (1) { | |
241 | while ((c = str->lookChar()) != EOF && isspace(c)) | |
242 | str->getChar(); | |
243 | if (c == 't') | |
244 | break; | |
245 | for (i = 0; (c = str->getChar()) != EOF && isdigit(c) && i < 20; ++i) | |
246 | s[i] = (char)c; | |
247 | if (i == 0) | |
248 | goto err2; | |
249 | s[i] = '\0'; | |
250 | first = atoi(s); | |
251 | while ((c = str->lookChar()) != EOF && isspace(c)) | |
252 | str->getChar(); | |
253 | for (i = 0; (c = str->getChar()) != EOF && isdigit(c) && i < 20; ++i) | |
254 | s[i] = (char)c; | |
255 | if (i == 0) | |
256 | goto err2; | |
257 | s[i] = '\0'; | |
258 | n = atoi(s); | |
259 | while ((c = str->lookChar()) != EOF && isspace(c)) | |
260 | str->getChar(); | |
261 | for (i = first; i < first + n; ++i) { | |
262 | for (j = 0; j < 20; ++j) { | |
263 | if ((c = str->getChar()) == EOF) | |
264 | goto err2; | |
265 | s[j] = (char)c; | |
266 | } | |
267 | if (entries[i].offset < 0) { | |
268 | s[10] = '\0'; | |
269 | entries[i].offset = atoi(s); | |
270 | s[16] = '\0'; | |
271 | entries[i].gen = atoi(&s[11]); | |
272 | if (s[17] == 'n') | |
273 | entries[i].used = gTrue; | |
274 | else if (s[17] == 'f') | |
275 | entries[i].used = gFalse; | |
276 | else | |
277 | goto err2; | |
52118ca3 | 278 | #if 1 //~ |
279 | //~ PDF files of patents from the IBM Intellectual Property | |
280 | //~ Network have a bug: the xref table claims to start at 1 | |
281 | //~ instead of 0. | |
282 | if (i == 1 && first == 1 && | |
283 | entries[1].offset == 0 && entries[1].gen == 65535 && | |
284 | !entries[1].used) { | |
285 | i = first = 0; | |
286 | entries[0] = entries[1]; | |
287 | entries[1].offset = -1; | |
288 | } | |
289 | #endif | |
9c72faab | 290 | } |
291 | } | |
292 | } | |
293 | ||
294 | // read prev pointer from trailer dictionary | |
295 | obj.initNull(); | |
52118ca3 | 296 | parser = new Parser(new Lexer(str->makeSubStream(str->getPos(), -1, &obj))); |
9c72faab | 297 | parser->getObj(&obj); |
298 | if (!obj.isCmd("trailer")) | |
299 | goto err1; | |
300 | obj.free(); | |
301 | parser->getObj(&obj); | |
302 | if (!obj.isDict()) | |
303 | goto err1; | |
304 | obj.getDict()->lookupNF("Prev", &obj2); | |
305 | if (obj2.isInt()) { | |
306 | *pos = obj2.getInt(); | |
307 | more = gTrue; | |
308 | } else { | |
309 | more = gFalse; | |
310 | } | |
311 | obj.free(); | |
312 | obj2.free(); | |
313 | ||
314 | delete parser; | |
315 | return more; | |
316 | ||
317 | err1: | |
318 | obj.free(); | |
319 | err2: | |
320 | ok = gFalse; | |
321 | return gFalse; | |
322 | } | |
323 | ||
324 | // Attempt to construct an xref table for a damaged file. | |
52118ca3 | 325 | GBool XRef::constructXRef() { |
9c72faab | 326 | Parser *parser; |
327 | Object obj; | |
328 | char buf[256]; | |
329 | int pos; | |
330 | int num, gen; | |
331 | int newSize; | |
b5cb0608 | 332 | int streamEndsSize; |
9c72faab | 333 | char *p; |
334 | int i; | |
335 | GBool gotRoot; | |
336 | ||
337 | error(0, "PDF file is damaged - attempting to reconstruct xref table..."); | |
338 | gotRoot = gFalse; | |
b5cb0608 | 339 | streamEndsLen = streamEndsSize = 0; |
9c72faab | 340 | |
341 | str->reset(); | |
342 | while (1) { | |
343 | pos = str->getPos(); | |
b5cb0608 | 344 | if (!str->getLine(buf, 256)) { |
9c72faab | 345 | break; |
b5cb0608 | 346 | } |
9c72faab | 347 | p = buf; |
348 | ||
349 | // got trailer dictionary | |
350 | if (!strncmp(p, "trailer", 7)) { | |
351 | obj.initNull(); | |
352 | parser = new Parser(new Lexer( | |
b5cb0608 | 353 | str->makeSubStream(start + pos + 7, -1, &obj))); |
9c72faab | 354 | if (!trailerDict.isNone()) |
355 | trailerDict.free(); | |
356 | parser->getObj(&trailerDict); | |
357 | if (trailerDict.isDict()) { | |
358 | trailerDict.dictLookupNF("Root", &obj); | |
359 | if (obj.isRef()) { | |
360 | rootNum = obj.getRefNum(); | |
361 | rootGen = obj.getRefGen(); | |
362 | gotRoot = gTrue; | |
363 | } | |
364 | obj.free(); | |
365 | } else { | |
366 | pos = 0; | |
367 | } | |
368 | delete parser; | |
369 | ||
370 | // look for object | |
371 | } else if (isdigit(*p)) { | |
372 | num = atoi(p); | |
373 | do { | |
374 | ++p; | |
375 | } while (*p && isdigit(*p)); | |
376 | if (isspace(*p)) { | |
377 | do { | |
378 | ++p; | |
379 | } while (*p && isspace(*p)); | |
380 | if (isdigit(*p)) { | |
381 | gen = atoi(p); | |
382 | do { | |
383 | ++p; | |
384 | } while (*p && isdigit(*p)); | |
385 | if (isspace(*p)) { | |
386 | do { | |
387 | ++p; | |
388 | } while (*p && isspace(*p)); | |
389 | if (!strncmp(p, "obj", 3)) { | |
390 | if (num >= size) { | |
391 | newSize = (num + 1 + 255) & ~255; | |
392 | entries = (XRefEntry *) | |
393 | grealloc(entries, newSize * sizeof(XRefEntry)); | |
394 | for (i = size; i < newSize; ++i) { | |
395 | entries[i].offset = -1; | |
396 | entries[i].used = gFalse; | |
397 | } | |
398 | size = newSize; | |
399 | } | |
400 | if (!entries[num].used || gen >= entries[num].gen) { | |
401 | entries[num].offset = pos - start; | |
402 | entries[num].gen = gen; | |
403 | entries[num].used = gTrue; | |
404 | } | |
405 | } | |
406 | } | |
407 | } | |
408 | } | |
b5cb0608 | 409 | |
410 | } else if (!strncmp(p, "endstream", 9)) { | |
411 | if (streamEndsLen == streamEndsSize) { | |
412 | streamEndsSize += 64; | |
413 | streamEnds = (int *)grealloc(streamEnds, streamEndsSize * sizeof(int)); | |
414 | } | |
415 | streamEnds[streamEndsLen++] = pos; | |
9c72faab | 416 | } |
417 | } | |
418 | ||
419 | if (gotRoot) | |
420 | return gTrue; | |
421 | ||
422 | error(-1, "Couldn't find trailer dictionary"); | |
423 | return gFalse; | |
424 | } | |
425 | ||
52118ca3 | 426 | #ifndef NO_DECRYPTION |
427 | GBool XRef::checkEncrypted(GString *userPassword) { | |
428 | Object encrypt, ownerKey, userKey, permissions, fileID, fileID1; | |
429 | GBool encrypted1; | |
430 | GBool ret; | |
431 | ||
432 | ret = gFalse; | |
433 | ||
434 | permFlags = defPermFlags; | |
435 | trailerDict.dictLookup("Encrypt", &encrypt); | |
436 | if ((encrypted1 = encrypt.isDict())) { | |
437 | ret = gTrue; | |
438 | encrypt.dictLookup("O", &ownerKey); | |
439 | encrypt.dictLookup("U", &userKey); | |
440 | encrypt.dictLookup("P", &permissions); | |
441 | trailerDict.dictLookup("ID", &fileID); | |
442 | if (ownerKey.isString() && ownerKey.getString()->getLength() == 32 && | |
443 | userKey.isString() && userKey.getString()->getLength() == 32 && | |
444 | permissions.isInt() && | |
445 | fileID.isArray()) { | |
446 | permFlags = permissions.getInt(); | |
447 | fileID.arrayGet(0, &fileID1); | |
448 | if (fileID1.isString()) { | |
449 | if (Decrypt::makeFileKey(ownerKey.getString(), userKey.getString(), | |
450 | permFlags, fileID1.getString(), | |
451 | userPassword, fileKey)) { | |
452 | ret = gFalse; | |
453 | } else { | |
454 | error(-1, "Incorrect user password"); | |
455 | } | |
456 | } else { | |
457 | error(-1, "Weird encryption info"); | |
458 | } | |
459 | fileID1.free(); | |
460 | } else { | |
461 | error(-1, "Weird encryption info"); | |
462 | } | |
463 | ownerKey.free(); | |
464 | userKey.free(); | |
465 | permissions.free(); | |
466 | fileID.free(); | |
467 | } | |
468 | encrypt.free(); | |
469 | ||
470 | // this flag has to be set *after* we read the O/U/P strings | |
471 | encrypted = encrypted1; | |
472 | ||
473 | return ret; | |
474 | } | |
475 | #else | |
476 | GBool XRef::checkEncrypted(GString *userPassword) { | |
9c72faab | 477 | Object obj; |
478 | GBool encrypted; | |
479 | ||
480 | trailerDict.dictLookup("Encrypt", &obj); | |
481 | if ((encrypted = !obj.isNull())) { | |
482 | error(-1, "PDF file is encrypted and cannot be displayed"); | |
483 | error(-1, "* Decryption support is currently not included in xpdf"); | |
484 | error(-1, "* due to legal restrictions: the U.S.A. still has bogus"); | |
485 | error(-1, "* export controls on cryptography software."); | |
486 | } | |
487 | obj.free(); | |
488 | return encrypted; | |
489 | } | |
52118ca3 | 490 | #endif |
9c72faab | 491 | |
492 | GBool XRef::okToPrint() { | |
52118ca3 | 493 | #ifndef NO_DECRYPTION |
494 | if (!(permFlags & permPrint)) { | |
495 | return gFalse; | |
496 | } | |
497 | #endif | |
498 | return gTrue; | |
499 | } | |
500 | ||
501 | GBool XRef::okToChange() { | |
502 | #ifndef NO_DECRYPTION | |
503 | if (!(permFlags & permChange)) { | |
504 | return gFalse; | |
505 | } | |
506 | #endif | |
9c72faab | 507 | return gTrue; |
508 | } | |
509 | ||
510 | GBool XRef::okToCopy() { | |
52118ca3 | 511 | #ifndef NO_DECRYPTION |
512 | if (!(permFlags & permCopy)) { | |
513 | return gFalse; | |
514 | } | |
515 | #endif | |
516 | return gTrue; | |
517 | } | |
518 | ||
519 | GBool XRef::okToAddNotes() { | |
520 | #ifndef NO_DECRYPTION | |
521 | if (!(permFlags & permNotes)) { | |
522 | return gFalse; | |
523 | } | |
524 | #endif | |
9c72faab | 525 | return gTrue; |
526 | } | |
527 | ||
528 | Object *XRef::fetch(int num, int gen, Object *obj) { | |
529 | XRefEntry *e; | |
530 | Parser *parser; | |
531 | Object obj1, obj2, obj3; | |
532 | ||
533 | // check for bogus ref - this can happen in corrupted PDF files | |
534 | if (num < 0 || num >= size) { | |
535 | obj->initNull(); | |
536 | return obj; | |
537 | } | |
538 | ||
539 | e = &entries[num]; | |
540 | if (e->gen == gen && e->offset >= 0) { | |
541 | obj1.initNull(); | |
542 | parser = new Parser(new Lexer( | |
52118ca3 | 543 | str->makeSubStream(start + e->offset, -1, &obj1))); |
9c72faab | 544 | parser->getObj(&obj1); |
545 | parser->getObj(&obj2); | |
546 | parser->getObj(&obj3); | |
547 | if (obj1.isInt() && obj1.getInt() == num && | |
548 | obj2.isInt() && obj2.getInt() == gen && | |
549 | obj3.isCmd("obj")) { | |
52118ca3 | 550 | #ifndef NO_DECRYPTION |
551 | parser->getObj(obj, encrypted ? fileKey : (Guchar *)NULL, num, gen); | |
552 | #else | |
9c72faab | 553 | parser->getObj(obj); |
52118ca3 | 554 | #endif |
9c72faab | 555 | } else { |
556 | obj->initNull(); | |
557 | } | |
558 | obj1.free(); | |
559 | obj2.free(); | |
560 | obj3.free(); | |
561 | delete parser; | |
562 | } else { | |
563 | obj->initNull(); | |
564 | } | |
565 | return obj; | |
566 | } | |
567 | ||
568 | Object *XRef::getDocInfo(Object *obj) { | |
569 | return trailerDict.dictLookup("Info", obj); | |
570 | } | |
b5cb0608 | 571 | |
572 | int XRef::getStreamEnd(int start) { | |
573 | int a, b, m; | |
574 | ||
575 | if (streamEndsLen == 0 || | |
576 | start > streamEnds[streamEndsLen - 1]) { | |
577 | return -1; | |
578 | } | |
579 | ||
580 | a = -1; | |
581 | b = streamEndsLen - 1; | |
582 | // invariant: streamEnds[a] < start <= streamEnds[b] | |
583 | while (b - a > 1) { | |
584 | m = (a + b) / 2; | |
585 | if (start <= streamEnds[m]) { | |
586 | b = m; | |
587 | } else { | |
588 | a = m; | |
589 | } | |
590 | } | |
591 | return streamEnds[b]; | |
592 | } |