]> git.ipfire.org Git - thirdparty/cups.git/blob - pdftops/Lexer.cxx
4ca8cfe9f51e323d3fa41fd16bf6a7fddbcb0f8f
[thirdparty/cups.git] / pdftops / Lexer.cxx
1 //========================================================================
2 //
3 // Lexer.cc
4 //
5 // Copyright 1996 Derek B. Noonburg
6 //
7 //========================================================================
8
9 #ifdef __GNUC__
10 #pragma implementation
11 #endif
12
13 #include <stdlib.h>
14 #include <stddef.h>
15 #include <string.h>
16 #include <ctype.h>
17 #include "Lexer.h"
18 #include "Error.h"
19
20 //------------------------------------------------------------------------
21
22 // A '1' in this array means the character is white space. A '1' or
23 // '2' means the character ends a name or command.
24 static char specialChars[256] = {
25 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, // 0x
26 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
27 1, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, // 2x
28 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, // 3x
29 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 4x
30 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, // 5x
31 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 6x
32 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, // 7x
33 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
34 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
35 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ax
36 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // bx
37 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // cx
38 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // dx
39 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ex
40 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // fx
41 };
42
43 //------------------------------------------------------------------------
44 // Lexer
45 //------------------------------------------------------------------------
46
47 Lexer::Lexer(Stream *str) {
48 Object obj;
49
50 curStr.initStream(str);
51 streams = new Array();
52 streams->add(curStr.copy(&obj));
53 strPtr = 0;
54 freeArray = gTrue;
55 curStr.streamReset();
56 }
57
58 Lexer::Lexer(Object *obj) {
59 Object obj2;
60
61 if (obj->isStream()) {
62 streams = new Array();
63 freeArray = gTrue;
64 streams->add(obj->copy(&obj2));
65 } else {
66 streams = obj->getArray();
67 freeArray = gFalse;
68 }
69 strPtr = 0;
70 if (streams->getLength() > 0) {
71 streams->get(strPtr, &curStr);
72 curStr.streamReset();
73 }
74 }
75
76 Lexer::~Lexer() {
77 if (!curStr.isNone()) {
78 curStr.streamClose();
79 curStr.free();
80 }
81 if (freeArray) {
82 delete streams;
83 }
84 }
85
86 int Lexer::getChar() {
87 int c;
88
89 c = EOF;
90 while (!curStr.isNone() && (c = curStr.streamGetChar()) == EOF) {
91 curStr.streamClose();
92 curStr.free();
93 ++strPtr;
94 if (strPtr < streams->getLength()) {
95 streams->get(strPtr, &curStr);
96 curStr.streamReset();
97 }
98 }
99 return c;
100 }
101
102 int Lexer::lookChar() {
103 if (curStr.isNone()) {
104 return EOF;
105 }
106 return curStr.streamLookChar();
107 }
108
109 Object *Lexer::getObj(Object *obj) {
110 char *p;
111 int c, c2;
112 GBool comment, neg, done;
113 int numParen;
114 int xi;
115 double xf, scale;
116 GString *s;
117 int n, m;
118
119 // skip whitespace and comments
120 comment = gFalse;
121 while (1) {
122 if ((c = getChar()) == EOF) {
123 return obj->initEOF();
124 }
125 if (comment) {
126 if (c == '\r' || c == '\n')
127 comment = gFalse;
128 } else if (c == '%') {
129 comment = gTrue;
130 } else if (specialChars[c] != 1) {
131 break;
132 }
133 }
134
135 // start reading token
136 switch (c) {
137
138 // number
139 case '0': case '1': case '2': case '3': case '4':
140 case '5': case '6': case '7': case '8': case '9':
141 case '-': case '.':
142 neg = gFalse;
143 xi = 0;
144 if (c == '-') {
145 neg = gTrue;
146 } else if (c == '.') {
147 goto doReal;
148 } else {
149 xi = c - '0';
150 }
151 while (1) {
152 c = lookChar();
153 if (isdigit(c)) {
154 getChar();
155 xi = xi * 10 + (c - '0');
156 } else if (c == '.') {
157 getChar();
158 goto doReal;
159 } else {
160 break;
161 }
162 }
163 if (neg)
164 xi = -xi;
165 obj->initInt(xi);
166 break;
167 doReal:
168 xf = xi;
169 scale = 0.1;
170 while (1) {
171 c = lookChar();
172 if (!isdigit(c)) {
173 break;
174 }
175 getChar();
176 xf = xf + scale * (c - '0');
177 scale *= 0.1;
178 }
179 if (neg)
180 xf = -xf;
181 obj->initReal(xf);
182 break;
183
184 // string
185 case '(':
186 p = tokBuf;
187 n = 0;
188 numParen = 1;
189 done = gFalse;
190 s = NULL;
191 do {
192 c2 = EOF;
193 switch (c = getChar()) {
194
195 case EOF:
196 #if 0
197 // This breaks some PDF files, e.g., ones from Photoshop.
198 case '\r':
199 case '\n':
200 #endif
201 error(getPos(), "Unterminated string");
202 done = gTrue;
203 break;
204
205 case '(':
206 ++numParen;
207 break;
208
209 case ')':
210 if (--numParen == 0)
211 done = gTrue;
212 break;
213
214 case '\\':
215 switch (c = getChar()) {
216 case 'n':
217 c2 = '\n';
218 break;
219 case 'r':
220 c2 = '\r';
221 break;
222 case 't':
223 c2 = '\t';
224 break;
225 case 'b':
226 c2 = '\b';
227 break;
228 case 'f':
229 c2 = '\f';
230 break;
231 case '\\':
232 case '(':
233 case ')':
234 c2 = c;
235 break;
236 case '0': case '1': case '2': case '3':
237 case '4': case '5': case '6': case '7':
238 c2 = c - '0';
239 c = lookChar();
240 if (c >= '0' && c <= '7') {
241 getChar();
242 c2 = (c2 << 3) + (c - '0');
243 c = lookChar();
244 if (c >= '0' && c <= '7') {
245 getChar();
246 c2 = (c2 << 3) + (c - '0');
247 }
248 }
249 break;
250 case '\r':
251 c = lookChar();
252 if (c == '\n') {
253 getChar();
254 }
255 break;
256 case '\n':
257 break;
258 case EOF:
259 error(getPos(), "Unterminated string");
260 done = gTrue;
261 break;
262 default:
263 c2 = c;
264 break;
265 }
266 break;
267
268 default:
269 c2 = c;
270 break;
271 }
272
273 if (c2 != EOF) {
274 if (n == tokBufSize) {
275 if (!s)
276 s = new GString(tokBuf, tokBufSize);
277 else
278 s->append(tokBuf, tokBufSize);
279 p = tokBuf;
280 n = 0;
281 }
282 *p++ = (char)c2;
283 ++n;
284 }
285 } while (!done);
286 if (!s)
287 s = new GString(tokBuf, n);
288 else
289 s->append(tokBuf, n);
290 obj->initString(s);
291 break;
292
293 // name
294 case '/':
295 p = tokBuf;
296 n = 0;
297 while ((c = lookChar()) != EOF && !specialChars[c]) {
298 getChar();
299 if (c == '#') {
300 c2 = lookChar();
301 if (c2 >= '0' && c2 <= '9') {
302 c = c2 - '0';
303 } else if (c2 >= 'A' && c2 <= 'F') {
304 c = c2 - 'A' + 10;
305 } else if (c2 >= 'a' && c2 <= 'f') {
306 c = c2 - 'a' + 10;
307 } else {
308 goto notEscChar;
309 }
310 getChar();
311 c <<= 4;
312 c2 = getChar();
313 if (c2 >= '0' && c2 <= '9') {
314 c += c2 - '0';
315 } else if (c2 >= 'A' && c2 <= 'F') {
316 c += c2 - 'A' + 10;
317 } else if (c2 >= 'a' && c2 <= 'f') {
318 c += c2 - 'a' + 10;
319 } else {
320 error(getPos(), "Illegal digit in hex char in name");
321 }
322 }
323 notEscChar:
324 if (++n == tokBufSize) {
325 error(getPos(), "Name token too long");
326 break;
327 }
328 *p++ = c;
329 }
330 *p = '\0';
331 obj->initName(tokBuf);
332 break;
333
334 // array punctuation
335 case '[':
336 case ']':
337 tokBuf[0] = c;
338 tokBuf[1] = '\0';
339 obj->initCmd(tokBuf);
340 break;
341
342 // hex string or dict punctuation
343 case '<':
344 c = lookChar();
345
346 // dict punctuation
347 if (c == '<') {
348 getChar();
349 tokBuf[0] = tokBuf[1] = '<';
350 tokBuf[2] = '\0';
351 obj->initCmd(tokBuf);
352
353 // hex string
354 } else {
355 p = tokBuf;
356 m = n = 0;
357 c2 = 0;
358 s = NULL;
359 while (1) {
360 c = getChar();
361 if (c == '>') {
362 break;
363 } else if (c == EOF) {
364 error(getPos(), "Unterminated hex string");
365 break;
366 } else if (specialChars[c] != 1) {
367 c2 = c2 << 4;
368 if (c >= '0' && c <= '9')
369 c2 += c - '0';
370 else if (c >= 'A' && c <= 'F')
371 c2 += c - 'A' + 10;
372 else if (c >= 'a' && c <= 'f')
373 c2 += c - 'a' + 10;
374 else
375 error(getPos(), "Illegal character <%02x> in hex string", c);
376 if (++m == 2) {
377 if (n == tokBufSize) {
378 if (!s)
379 s = new GString(tokBuf, tokBufSize);
380 else
381 s->append(tokBuf, tokBufSize);
382 p = tokBuf;
383 n = 0;
384 }
385 *p++ = (char)c2;
386 ++n;
387 c2 = 0;
388 m = 0;
389 }
390 }
391 }
392 if (!s)
393 s = new GString(tokBuf, n);
394 else
395 s->append(tokBuf, n);
396 if (m == 1)
397 s->append((char)(c2 << 4));
398 obj->initString(s);
399 }
400 break;
401
402 // dict punctuation
403 case '>':
404 c = lookChar();
405 if (c == '>') {
406 getChar();
407 tokBuf[0] = tokBuf[1] = '>';
408 tokBuf[2] = '\0';
409 obj->initCmd(tokBuf);
410 } else {
411 error(getPos(), "Illegal character '>'");
412 obj->initError();
413 }
414 break;
415
416 // error
417 case ')':
418 case '{':
419 case '}':
420 error(getPos(), "Illegal character '%c'", c);
421 obj->initError();
422 break;
423
424 // command
425 default:
426 p = tokBuf;
427 *p++ = c;
428 n = 1;
429 while ((c = lookChar()) != EOF && !specialChars[c]) {
430 getChar();
431 if (++n == tokBufSize) {
432 error(getPos(), "Command token too long");
433 break;
434 }
435 *p++ = c;
436 }
437 *p = '\0';
438 if (tokBuf[0] == 't' && !strcmp(tokBuf, "true")) {
439 obj->initBool(gTrue);
440 } else if (tokBuf[0] == 'f' && !strcmp(tokBuf, "false")) {
441 obj->initBool(gFalse);
442 } else if (tokBuf[0] == 'n' && !strcmp(tokBuf, "null")) {
443 obj->initNull();
444 } else {
445 obj->initCmd(tokBuf);
446 }
447 break;
448 }
449
450 return obj;
451 }
452
453 void Lexer::skipToNextLine() {
454 int c;
455
456 while (1) {
457 c = getChar();
458 if (c == EOF || c == '\n') {
459 return;
460 }
461 if (c == '\r') {
462 if ((c = lookChar()) == '\n') {
463 getChar();
464 }
465 return;
466 }
467 }
468 }