]> git.ipfire.org Git - thirdparty/cups.git/blame - pdftops/Lexer.cxx
Load cups into easysw/current.
[thirdparty/cups.git] / pdftops / Lexer.cxx
CommitLineData
ef416fc2 1//========================================================================
2//
3// Lexer.cc
4//
5// Copyright 1996-2003 Glyph & Cog, LLC
6//
7//========================================================================
8
9#include <config.h>
10
11#ifdef USE_GCC_PRAGMAS
12#pragma implementation
13#endif
14
15#include <stdlib.h>
16#include <stddef.h>
17#include <string.h>
18#include <ctype.h>
19#include "Lexer.h"
20#include "Error.h"
21
22//------------------------------------------------------------------------
23
24// A '1' in this array means the character is white space. A '1' or
25// '2' means the character ends a name or command.
26static char specialChars[256] = {
27 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, // 0x
28 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
29 1, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, // 2x
30 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, // 3x
31 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 4x
32 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, // 5x
33 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 6x
34 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, // 7x
35 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
36 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
37 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ax
38 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // bx
39 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // cx
40 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // dx
41 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ex
42 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // fx
43};
44
45//------------------------------------------------------------------------
46// Lexer
47//------------------------------------------------------------------------
48
49Lexer::Lexer(XRef *xref, Stream *str) {
50 Object obj;
51
52 curStr.initStream(str);
53 streams = new Array(xref);
54 streams->add(curStr.copy(&obj));
55 strPtr = 0;
56 freeArray = gTrue;
57 curStr.streamReset();
58}
59
60Lexer::Lexer(XRef *xref, Object *obj) {
61 Object obj2;
62
63 if (obj->isStream()) {
64 streams = new Array(xref);
65 freeArray = gTrue;
66 streams->add(obj->copy(&obj2));
67 } else {
68 streams = obj->getArray();
69 freeArray = gFalse;
70 }
71 strPtr = 0;
72 if (streams->getLength() > 0) {
73 streams->get(strPtr, &curStr);
74 curStr.streamReset();
75 }
76}
77
78Lexer::~Lexer() {
79 if (!curStr.isNone()) {
80 curStr.streamClose();
81 curStr.free();
82 }
83 if (freeArray) {
84 delete streams;
85 }
86}
87
88int Lexer::getChar() {
89 int c;
90
91 c = EOF;
92 while (!curStr.isNone() && (c = curStr.streamGetChar()) == EOF) {
93 curStr.streamClose();
94 curStr.free();
95 ++strPtr;
96 if (strPtr < streams->getLength()) {
97 streams->get(strPtr, &curStr);
98 curStr.streamReset();
99 }
100 }
101 return c;
102}
103
104int Lexer::lookChar() {
105 if (curStr.isNone()) {
106 return EOF;
107 }
108 return curStr.streamLookChar();
109}
110
111Object *Lexer::getObj(Object *obj) {
112 char *p;
113 int c, c2;
114 GBool comment, neg, done;
115 int numParen;
116 int xi;
117 double xf, scale;
118 GString *s;
119 int n, m;
120
121 // skip whitespace and comments
122 comment = gFalse;
123 while (1) {
124 if ((c = getChar()) == EOF) {
125 return obj->initEOF();
126 }
127 if (comment) {
128 if (c == '\r' || c == '\n')
129 comment = gFalse;
130 } else if (c == '%') {
131 comment = gTrue;
132 } else if (specialChars[c] != 1) {
133 break;
134 }
135 }
136
137 // start reading token
138 switch (c) {
139
140 // number
141 case '0': case '1': case '2': case '3': case '4':
142 case '5': case '6': case '7': case '8': case '9':
143 case '-': case '.':
144 neg = gFalse;
145 xi = 0;
146 if (c == '-') {
147 neg = gTrue;
148 } else if (c == '.') {
149 goto doReal;
150 } else {
151 xi = c - '0';
152 }
153 while (1) {
154 c = lookChar();
155 if (isdigit(c)) {
156 getChar();
157 xi = xi * 10 + (c - '0');
158 } else if (c == '.') {
159 getChar();
160 goto doReal;
161 } else {
162 break;
163 }
164 }
165 if (neg)
166 xi = -xi;
167 obj->initInt(xi);
168 break;
169 doReal:
170 xf = xi;
171 scale = 0.1;
172 while (1) {
173 c = lookChar();
174 if (c == '-') {
175 // ignore minus signs in the middle of numbers to match
176 // Adobe's behavior
177 error(getPos(), "Badly formatted number");
178 getChar();
179 continue;
180 }
181 if (!isdigit(c)) {
182 break;
183 }
184 getChar();
185 xf = xf + scale * (c - '0');
186 scale *= 0.1;
187 }
188 if (neg)
189 xf = -xf;
190 obj->initReal(xf);
191 break;
192
193 // string
194 case '(':
195 p = tokBuf;
196 n = 0;
197 numParen = 1;
198 done = gFalse;
199 s = NULL;
200 do {
201 c2 = EOF;
202 switch (c = getChar()) {
203
204 case EOF:
205#if 0
206 // This breaks some PDF files, e.g., ones from Photoshop.
207 case '\r':
208 case '\n':
209#endif
210 error(getPos(), "Unterminated string");
211 done = gTrue;
212 break;
213
214 case '(':
215 ++numParen;
216 c2 = c;
217 break;
218
219 case ')':
220 if (--numParen == 0) {
221 done = gTrue;
222 } else {
223 c2 = c;
224 }
225 break;
226
227 case '\\':
228 switch (c = getChar()) {
229 case 'n':
230 c2 = '\n';
231 break;
232 case 'r':
233 c2 = '\r';
234 break;
235 case 't':
236 c2 = '\t';
237 break;
238 case 'b':
239 c2 = '\b';
240 break;
241 case 'f':
242 c2 = '\f';
243 break;
244 case '\\':
245 case '(':
246 case ')':
247 c2 = c;
248 break;
249 case '0': case '1': case '2': case '3':
250 case '4': case '5': case '6': case '7':
251 c2 = c - '0';
252 c = lookChar();
253 if (c >= '0' && c <= '7') {
254 getChar();
255 c2 = (c2 << 3) + (c - '0');
256 c = lookChar();
257 if (c >= '0' && c <= '7') {
258 getChar();
259 c2 = (c2 << 3) + (c - '0');
260 }
261 }
262 break;
263 case '\r':
264 c = lookChar();
265 if (c == '\n') {
266 getChar();
267 }
268 break;
269 case '\n':
270 break;
271 case EOF:
272 error(getPos(), "Unterminated string");
273 done = gTrue;
274 break;
275 default:
276 c2 = c;
277 break;
278 }
279 break;
280
281 default:
282 c2 = c;
283 break;
284 }
285
286 if (c2 != EOF) {
287 if (n == tokBufSize) {
288 if (!s)
289 s = new GString(tokBuf, tokBufSize);
290 else
291 s->append(tokBuf, tokBufSize);
292 p = tokBuf;
293 n = 0;
294 }
295 *p++ = (char)c2;
296 ++n;
297 }
298 } while (!done);
299 if (!s)
300 s = new GString(tokBuf, n);
301 else
302 s->append(tokBuf, n);
303 obj->initString(s);
304 break;
305
306 // name
307 case '/':
308 p = tokBuf;
309 n = 0;
310 while ((c = lookChar()) != EOF && !specialChars[c]) {
311 getChar();
312 if (c == '#') {
313 c2 = lookChar();
314 if (c2 >= '0' && c2 <= '9') {
315 c = c2 - '0';
316 } else if (c2 >= 'A' && c2 <= 'F') {
317 c = c2 - 'A' + 10;
318 } else if (c2 >= 'a' && c2 <= 'f') {
319 c = c2 - 'a' + 10;
320 } else {
321 goto notEscChar;
322 }
323 getChar();
324 c <<= 4;
325 c2 = getChar();
326 if (c2 >= '0' && c2 <= '9') {
327 c += c2 - '0';
328 } else if (c2 >= 'A' && c2 <= 'F') {
329 c += c2 - 'A' + 10;
330 } else if (c2 >= 'a' && c2 <= 'f') {
331 c += c2 - 'a' + 10;
332 } else {
333 error(getPos(), "Illegal digit in hex char in name");
334 }
335 }
336 notEscChar:
337 if (++n == tokBufSize) {
338 error(getPos(), "Name token too long");
339 break;
340 }
341 *p++ = c;
342 }
343 *p = '\0';
344 obj->initName(tokBuf);
345 break;
346
347 // array punctuation
348 case '[':
349 case ']':
350 tokBuf[0] = c;
351 tokBuf[1] = '\0';
352 obj->initCmd(tokBuf);
353 break;
354
355 // hex string or dict punctuation
356 case '<':
357 c = lookChar();
358
359 // dict punctuation
360 if (c == '<') {
361 getChar();
362 tokBuf[0] = tokBuf[1] = '<';
363 tokBuf[2] = '\0';
364 obj->initCmd(tokBuf);
365
366 // hex string
367 } else {
368 p = tokBuf;
369 m = n = 0;
370 c2 = 0;
371 s = NULL;
372 while (1) {
373 c = getChar();
374 if (c == '>') {
375 break;
376 } else if (c == EOF) {
377 error(getPos(), "Unterminated hex string");
378 break;
379 } else if (specialChars[c] != 1) {
380 c2 = c2 << 4;
381 if (c >= '0' && c <= '9')
382 c2 += c - '0';
383 else if (c >= 'A' && c <= 'F')
384 c2 += c - 'A' + 10;
385 else if (c >= 'a' && c <= 'f')
386 c2 += c - 'a' + 10;
387 else
388 error(getPos(), "Illegal character <%02x> in hex string", c);
389 if (++m == 2) {
390 if (n == tokBufSize) {
391 if (!s)
392 s = new GString(tokBuf, tokBufSize);
393 else
394 s->append(tokBuf, tokBufSize);
395 p = tokBuf;
396 n = 0;
397 }
398 *p++ = (char)c2;
399 ++n;
400 c2 = 0;
401 m = 0;
402 }
403 }
404 }
405 if (!s)
406 s = new GString(tokBuf, n);
407 else
408 s->append(tokBuf, n);
409 if (m == 1)
410 s->append((char)(c2 << 4));
411 obj->initString(s);
412 }
413 break;
414
415 // dict punctuation
416 case '>':
417 c = lookChar();
418 if (c == '>') {
419 getChar();
420 tokBuf[0] = tokBuf[1] = '>';
421 tokBuf[2] = '\0';
422 obj->initCmd(tokBuf);
423 } else {
424 error(getPos(), "Illegal character '>'");
425 obj->initError();
426 }
427 break;
428
429 // error
430 case ')':
431 case '{':
432 case '}':
433 error(getPos(), "Illegal character '%c'", c);
434 obj->initError();
435 break;
436
437 // command
438 default:
439 p = tokBuf;
440 *p++ = c;
441 n = 1;
442 while ((c = lookChar()) != EOF && !specialChars[c]) {
443 getChar();
444 if (++n == tokBufSize) {
445 error(getPos(), "Command token too long");
446 break;
447 }
448 *p++ = c;
449 }
450 *p = '\0';
451 if (tokBuf[0] == 't' && !strcmp(tokBuf, "true")) {
452 obj->initBool(gTrue);
453 } else if (tokBuf[0] == 'f' && !strcmp(tokBuf, "false")) {
454 obj->initBool(gFalse);
455 } else if (tokBuf[0] == 'n' && !strcmp(tokBuf, "null")) {
456 obj->initNull();
457 } else {
458 obj->initCmd(tokBuf);
459 }
460 break;
461 }
462
463 return obj;
464}
465
466void Lexer::skipToNextLine() {
467 int c;
468
469 while (1) {
470 c = getChar();
471 if (c == EOF || c == '\n') {
472 return;
473 }
474 if (c == '\r') {
475 if ((c = lookChar()) == '\n') {
476 getChar();
477 }
478 return;
479 }
480 }
481}
482
483GBool Lexer::isSpace(int c) {
484 return c >= 0 && c <= 0xff && specialChars[c] == 1;
485}