]> git.ipfire.org Git - thirdparty/cups.git/blob - pdftops/Lexer.cxx
Import cups.org releases
[thirdparty/cups.git] / pdftops / Lexer.cxx
1 //========================================================================
2 //
3 // Lexer.cc
4 //
5 // Copyright 1996-2003 Glyph & Cog, LLC
6 //
7 //========================================================================
8
9 #include <config.h>
10
11 #ifdef USE_GCC_PRAGMAS
12 #pragma implementation
13 #endif
14
15 #include <stdlib.h>
16 #include <stddef.h>
17 #include <string.h>
18 #include <ctype.h>
19 #include "Lexer.h"
20 #include "Error.h"
21
22 //------------------------------------------------------------------------
23
24 // A '1' in this array means the character is white space. A '1' or
25 // '2' means the character ends a name or command.
26 static char specialChars[256] = {
27 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, // 0x
28 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
29 1, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, // 2x
30 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, // 3x
31 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 4x
32 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, // 5x
33 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 6x
34 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, // 7x
35 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
36 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
37 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ax
38 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // bx
39 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // cx
40 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // dx
41 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ex
42 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // fx
43 };
44
45 //------------------------------------------------------------------------
46 // Lexer
47 //------------------------------------------------------------------------
48
49 Lexer::Lexer(XRef *xref, Stream *str) {
50 Object obj;
51
52 curStr.initStream(str);
53 streams = new Array(xref);
54 streams->add(curStr.copy(&obj));
55 strPtr = 0;
56 freeArray = gTrue;
57 curStr.streamReset();
58 }
59
60 Lexer::Lexer(XRef *xref, Object *obj) {
61 Object obj2;
62
63 if (obj->isStream()) {
64 streams = new Array(xref);
65 freeArray = gTrue;
66 streams->add(obj->copy(&obj2));
67 } else {
68 streams = obj->getArray();
69 freeArray = gFalse;
70 }
71 strPtr = 0;
72 if (streams->getLength() > 0) {
73 streams->get(strPtr, &curStr);
74 curStr.streamReset();
75 }
76 }
77
78 Lexer::~Lexer() {
79 if (!curStr.isNone()) {
80 curStr.streamClose();
81 curStr.free();
82 }
83 if (freeArray) {
84 delete streams;
85 }
86 }
87
88 int Lexer::getChar() {
89 int c;
90
91 c = EOF;
92 while (!curStr.isNone() && (c = curStr.streamGetChar()) == EOF) {
93 curStr.streamClose();
94 curStr.free();
95 ++strPtr;
96 if (strPtr < streams->getLength()) {
97 streams->get(strPtr, &curStr);
98 curStr.streamReset();
99 }
100 }
101 return c;
102 }
103
104 int Lexer::lookChar() {
105 if (curStr.isNone()) {
106 return EOF;
107 }
108 return curStr.streamLookChar();
109 }
110
111 Object *Lexer::getObj(Object *obj) {
112 char *p;
113 int c, c2;
114 GBool comment, neg, done;
115 int numParen;
116 int xi;
117 double xf, scale;
118 GString *s;
119 int n, m;
120
121 // skip whitespace and comments
122 comment = gFalse;
123 while (1) {
124 if ((c = getChar()) == EOF) {
125 return obj->initEOF();
126 }
127 if (comment) {
128 if (c == '\r' || c == '\n')
129 comment = gFalse;
130 } else if (c == '%') {
131 comment = gTrue;
132 } else if (specialChars[c] != 1) {
133 break;
134 }
135 }
136
137 // start reading token
138 switch (c) {
139
140 // number
141 case '0': case '1': case '2': case '3': case '4':
142 case '5': case '6': case '7': case '8': case '9':
143 case '-': case '.':
144 neg = gFalse;
145 xi = 0;
146 if (c == '-') {
147 neg = gTrue;
148 } else if (c == '.') {
149 goto doReal;
150 } else {
151 xi = c - '0';
152 }
153 while (1) {
154 c = lookChar();
155 if (isdigit(c)) {
156 getChar();
157 xi = xi * 10 + (c - '0');
158 } else if (c == '.') {
159 getChar();
160 goto doReal;
161 } else {
162 break;
163 }
164 }
165 if (neg)
166 xi = -xi;
167 obj->initInt(xi);
168 break;
169 doReal:
170 xf = xi;
171 scale = 0.1;
172 while (1) {
173 c = lookChar();
174 if (!isdigit(c)) {
175 break;
176 }
177 getChar();
178 xf = xf + scale * (c - '0');
179 scale *= 0.1;
180 }
181 if (neg)
182 xf = -xf;
183 obj->initReal(xf);
184 break;
185
186 // string
187 case '(':
188 p = tokBuf;
189 n = 0;
190 numParen = 1;
191 done = gFalse;
192 s = NULL;
193 do {
194 c2 = EOF;
195 switch (c = getChar()) {
196
197 case EOF:
198 #if 0
199 // This breaks some PDF files, e.g., ones from Photoshop.
200 case '\r':
201 case '\n':
202 #endif
203 error(getPos(), "Unterminated string");
204 done = gTrue;
205 break;
206
207 case '(':
208 ++numParen;
209 c2 = c;
210 break;
211
212 case ')':
213 if (--numParen == 0) {
214 done = gTrue;
215 } else {
216 c2 = c;
217 }
218 break;
219
220 case '\\':
221 switch (c = getChar()) {
222 case 'n':
223 c2 = '\n';
224 break;
225 case 'r':
226 c2 = '\r';
227 break;
228 case 't':
229 c2 = '\t';
230 break;
231 case 'b':
232 c2 = '\b';
233 break;
234 case 'f':
235 c2 = '\f';
236 break;
237 case '\\':
238 case '(':
239 case ')':
240 c2 = c;
241 break;
242 case '0': case '1': case '2': case '3':
243 case '4': case '5': case '6': case '7':
244 c2 = c - '0';
245 c = lookChar();
246 if (c >= '0' && c <= '7') {
247 getChar();
248 c2 = (c2 << 3) + (c - '0');
249 c = lookChar();
250 if (c >= '0' && c <= '7') {
251 getChar();
252 c2 = (c2 << 3) + (c - '0');
253 }
254 }
255 break;
256 case '\r':
257 c = lookChar();
258 if (c == '\n') {
259 getChar();
260 }
261 break;
262 case '\n':
263 break;
264 case EOF:
265 error(getPos(), "Unterminated string");
266 done = gTrue;
267 break;
268 default:
269 c2 = c;
270 break;
271 }
272 break;
273
274 default:
275 c2 = c;
276 break;
277 }
278
279 if (c2 != EOF) {
280 if (n == tokBufSize) {
281 if (!s)
282 s = new GString(tokBuf, tokBufSize);
283 else
284 s->append(tokBuf, tokBufSize);
285 p = tokBuf;
286 n = 0;
287 }
288 *p++ = (char)c2;
289 ++n;
290 }
291 } while (!done);
292 if (!s)
293 s = new GString(tokBuf, n);
294 else
295 s->append(tokBuf, n);
296 obj->initString(s);
297 break;
298
299 // name
300 case '/':
301 p = tokBuf;
302 n = 0;
303 while ((c = lookChar()) != EOF && !specialChars[c]) {
304 getChar();
305 if (c == '#') {
306 c2 = lookChar();
307 if (c2 >= '0' && c2 <= '9') {
308 c = c2 - '0';
309 } else if (c2 >= 'A' && c2 <= 'F') {
310 c = c2 - 'A' + 10;
311 } else if (c2 >= 'a' && c2 <= 'f') {
312 c = c2 - 'a' + 10;
313 } else {
314 goto notEscChar;
315 }
316 getChar();
317 c <<= 4;
318 c2 = getChar();
319 if (c2 >= '0' && c2 <= '9') {
320 c += c2 - '0';
321 } else if (c2 >= 'A' && c2 <= 'F') {
322 c += c2 - 'A' + 10;
323 } else if (c2 >= 'a' && c2 <= 'f') {
324 c += c2 - 'a' + 10;
325 } else {
326 error(getPos(), "Illegal digit in hex char in name");
327 }
328 }
329 notEscChar:
330 if (++n == tokBufSize) {
331 error(getPos(), "Name token too long");
332 break;
333 }
334 *p++ = c;
335 }
336 *p = '\0';
337 obj->initName(tokBuf);
338 break;
339
340 // array punctuation
341 case '[':
342 case ']':
343 tokBuf[0] = c;
344 tokBuf[1] = '\0';
345 obj->initCmd(tokBuf);
346 break;
347
348 // hex string or dict punctuation
349 case '<':
350 c = lookChar();
351
352 // dict punctuation
353 if (c == '<') {
354 getChar();
355 tokBuf[0] = tokBuf[1] = '<';
356 tokBuf[2] = '\0';
357 obj->initCmd(tokBuf);
358
359 // hex string
360 } else {
361 p = tokBuf;
362 m = n = 0;
363 c2 = 0;
364 s = NULL;
365 while (1) {
366 c = getChar();
367 if (c == '>') {
368 break;
369 } else if (c == EOF) {
370 error(getPos(), "Unterminated hex string");
371 break;
372 } else if (specialChars[c] != 1) {
373 c2 = c2 << 4;
374 if (c >= '0' && c <= '9')
375 c2 += c - '0';
376 else if (c >= 'A' && c <= 'F')
377 c2 += c - 'A' + 10;
378 else if (c >= 'a' && c <= 'f')
379 c2 += c - 'a' + 10;
380 else
381 error(getPos(), "Illegal character <%02x> in hex string", c);
382 if (++m == 2) {
383 if (n == tokBufSize) {
384 if (!s)
385 s = new GString(tokBuf, tokBufSize);
386 else
387 s->append(tokBuf, tokBufSize);
388 p = tokBuf;
389 n = 0;
390 }
391 *p++ = (char)c2;
392 ++n;
393 c2 = 0;
394 m = 0;
395 }
396 }
397 }
398 if (!s)
399 s = new GString(tokBuf, n);
400 else
401 s->append(tokBuf, n);
402 if (m == 1)
403 s->append((char)(c2 << 4));
404 obj->initString(s);
405 }
406 break;
407
408 // dict punctuation
409 case '>':
410 c = lookChar();
411 if (c == '>') {
412 getChar();
413 tokBuf[0] = tokBuf[1] = '>';
414 tokBuf[2] = '\0';
415 obj->initCmd(tokBuf);
416 } else {
417 error(getPos(), "Illegal character '>'");
418 obj->initError();
419 }
420 break;
421
422 // error
423 case ')':
424 case '{':
425 case '}':
426 error(getPos(), "Illegal character '%c'", c);
427 obj->initError();
428 break;
429
430 // command
431 default:
432 p = tokBuf;
433 *p++ = c;
434 n = 1;
435 while ((c = lookChar()) != EOF && !specialChars[c]) {
436 getChar();
437 if (++n == tokBufSize) {
438 error(getPos(), "Command token too long");
439 break;
440 }
441 *p++ = c;
442 }
443 *p = '\0';
444 if (tokBuf[0] == 't' && !strcmp(tokBuf, "true")) {
445 obj->initBool(gTrue);
446 } else if (tokBuf[0] == 'f' && !strcmp(tokBuf, "false")) {
447 obj->initBool(gFalse);
448 } else if (tokBuf[0] == 'n' && !strcmp(tokBuf, "null")) {
449 obj->initNull();
450 } else {
451 obj->initCmd(tokBuf);
452 }
453 break;
454 }
455
456 return obj;
457 }
458
459 void Lexer::skipToNextLine() {
460 int c;
461
462 while (1) {
463 c = getChar();
464 if (c == EOF || c == '\n') {
465 return;
466 }
467 if (c == '\r') {
468 if ((c = lookChar()) == '\n') {
469 getChar();
470 }
471 return;
472 }
473 }
474 }