]> git.ipfire.org Git - thirdparty/cups.git/blame - pdftops/CharCodeToUnicode.cxx
Load cups into easysw/current.
[thirdparty/cups.git] / pdftops / CharCodeToUnicode.cxx
CommitLineData
ef416fc2 1//========================================================================
2//
3// CharCodeToUnicode.cc
4//
5// Copyright 2001-2003 Glyph & Cog, LLC
6//
7//========================================================================
8
9#include <config.h>
10
11#ifdef USE_GCC_PRAGMAS
12#pragma implementation
13#endif
14
15#include <stdio.h>
16#include <string.h>
17#include "gmem.h"
18#include "gfile.h"
19#include "GString.h"
20#include "Error.h"
21#include "GlobalParams.h"
22#include "PSTokenizer.h"
23#include "CharCodeToUnicode.h"
24
25//------------------------------------------------------------------------
26
27#define maxUnicodeString 8
28
29struct CharCodeToUnicodeString {
30 CharCode c;
31 Unicode u[maxUnicodeString];
32 int len;
33};
34
35//------------------------------------------------------------------------
36
37static int getCharFromString(void *data) {
38 char *p;
39 int c;
40
41 p = *(char **)data;
42 if (*p) {
43 c = *p++;
44 *(char **)data = p;
45 } else {
46 c = EOF;
47 }
48 return c;
49}
50
51static int getCharFromFile(void *data) {
52 return fgetc((FILE *)data);
53}
54
55//------------------------------------------------------------------------
56
57CharCodeToUnicode *CharCodeToUnicode::parseCIDToUnicode(GString *fileName,
58 GString *collection) {
59 FILE *f;
60 Unicode *mapA;
61 CharCode size, mapLenA;
62 char buf[64];
63 Unicode u;
64 CharCodeToUnicode *ctu;
65
66 if (!(f = fopen(fileName->getCString(), "r"))) {
67 error(-1, "Couldn't open cidToUnicode file '%s'",
68 fileName->getCString());
69 return NULL;
70 }
71
72 size = 32768;
73 mapA = (Unicode *)gmallocn(size, sizeof(Unicode));
74 mapLenA = 0;
75
76 while (getLine(buf, sizeof(buf), f)) {
77 if (mapLenA == size) {
78 size *= 2;
79 mapA = (Unicode *)greallocn(mapA, size, sizeof(Unicode));
80 }
81 if (sscanf(buf, "%x", &u) == 1) {
82 mapA[mapLenA] = u;
83 } else {
84 error(-1, "Bad line (%d) in cidToUnicode file '%s'",
85 (int)(mapLenA + 1), fileName->getCString());
86 mapA[mapLenA] = 0;
87 }
88 ++mapLenA;
89 }
90 fclose(f);
91
92 ctu = new CharCodeToUnicode(collection->copy(), mapA, mapLenA, gTrue,
93 NULL, 0, 0);
94 gfree(mapA);
95 return ctu;
96}
97
98CharCodeToUnicode *CharCodeToUnicode::parseUnicodeToUnicode(
99 GString *fileName) {
100 FILE *f;
101 Unicode *mapA;
102 CharCodeToUnicodeString *sMapA;
103 CharCode size, oldSize, len, sMapSizeA, sMapLenA;
104 char buf[256];
105 char *tok;
106 Unicode u0;
107 Unicode uBuf[maxUnicodeString];
108 CharCodeToUnicode *ctu;
109 int line, n, i;
110
111 if (!(f = fopen(fileName->getCString(), "r"))) {
112 error(-1, "Couldn't open unicodeToUnicode file '%s'",
113 fileName->getCString());
114 return NULL;
115 }
116
117 size = 4096;
118 mapA = (Unicode *)gmallocn(size, sizeof(Unicode));
119 memset(mapA, 0, size * sizeof(Unicode));
120 len = 0;
121 sMapA = NULL;
122 sMapSizeA = sMapLenA = 0;
123
124 line = 0;
125 while (getLine(buf, sizeof(buf), f)) {
126 ++line;
127 if (!(tok = strtok(buf, " \t\r\n")) ||
128 sscanf(tok, "%x", &u0) != 1) {
129 error(-1, "Bad line (%d) in unicodeToUnicode file '%s'",
130 line, fileName->getCString());
131 continue;
132 }
133 n = 0;
134 while (n < maxUnicodeString) {
135 if (!(tok = strtok(NULL, " \t\r\n"))) {
136 break;
137 }
138 if (sscanf(tok, "%x", &uBuf[n]) != 1) {
139 error(-1, "Bad line (%d) in unicodeToUnicode file '%s'",
140 line, fileName->getCString());
141 break;
142 }
143 ++n;
144 }
145 if (n < 1) {
146 error(-1, "Bad line (%d) in unicodeToUnicode file '%s'",
147 line, fileName->getCString());
148 continue;
149 }
150 if (u0 >= size) {
151 oldSize = size;
152 while (u0 >= size) {
153 size *= 2;
154 }
155 mapA = (Unicode *)greallocn(mapA, size, sizeof(Unicode));
156 memset(mapA + oldSize, 0, (size - oldSize) * sizeof(Unicode));
157 }
158 if (n == 1) {
159 mapA[u0] = uBuf[0];
160 } else {
161 mapA[u0] = 0;
162 if (sMapLenA == sMapSizeA) {
163 sMapSizeA += 16;
164 sMapA = (CharCodeToUnicodeString *)
165 greallocn(sMapA, sMapSizeA, sizeof(CharCodeToUnicodeString));
166 }
167 sMapA[sMapLenA].c = u0;
168 for (i = 0; i < n; ++i) {
169 sMapA[sMapLenA].u[i] = uBuf[i];
170 }
171 sMapA[sMapLenA].len = n;
172 ++sMapLenA;
173 }
174 if (u0 >= len) {
175 len = u0 + 1;
176 }
177 }
178 fclose(f);
179
180 ctu = new CharCodeToUnicode(fileName->copy(), mapA, len, gTrue,
181 sMapA, sMapLenA, sMapSizeA);
182 gfree(mapA);
183 return ctu;
184}
185
186CharCodeToUnicode *CharCodeToUnicode::make8BitToUnicode(Unicode *toUnicode) {
187 return new CharCodeToUnicode(NULL, toUnicode, 256, gTrue, NULL, 0, 0);
188}
189
190CharCodeToUnicode *CharCodeToUnicode::parseCMap(GString *buf, int nBits) {
191 CharCodeToUnicode *ctu;
192 char *p;
193
194 ctu = new CharCodeToUnicode(NULL);
195 p = buf->getCString();
196 ctu->parseCMap1(&getCharFromString, &p, nBits);
197 return ctu;
198}
199
200void CharCodeToUnicode::mergeCMap(GString *buf, int nBits) {
201 char *p;
202
203 p = buf->getCString();
204 parseCMap1(&getCharFromString, &p, nBits);
205}
206
207void CharCodeToUnicode::parseCMap1(int (*getCharFunc)(void *), void *data,
208 int nBits) {
209 PSTokenizer *pst;
210 char tok1[256], tok2[256], tok3[256];
211 int nDigits, n1, n2, n3;
212 CharCode i;
213 CharCode code1, code2;
214 GString *name;
215 FILE *f;
216
217 nDigits = nBits / 4;
218 pst = new PSTokenizer(getCharFunc, data);
219 pst->getToken(tok1, sizeof(tok1), &n1);
220 while (pst->getToken(tok2, sizeof(tok2), &n2)) {
221 if (!strcmp(tok2, "usecmap")) {
222 if (tok1[0] == '/') {
223 name = new GString(tok1 + 1);
224 if ((f = globalParams->findToUnicodeFile(name))) {
225 parseCMap1(&getCharFromFile, f, nBits);
226 fclose(f);
227 } else {
228 error(-1, "Couldn't find ToUnicode CMap file for '%s'",
229 name->getCString());
230 }
231 delete name;
232 }
233 pst->getToken(tok1, sizeof(tok1), &n1);
234 } else if (!strcmp(tok2, "beginbfchar")) {
235 while (pst->getToken(tok1, sizeof(tok1), &n1)) {
236 if (!strcmp(tok1, "endbfchar")) {
237 break;
238 }
239 if (!pst->getToken(tok2, sizeof(tok2), &n2) ||
240 !strcmp(tok2, "endbfchar")) {
241 error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
242 break;
243 }
244 if (!(n1 == 2 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>' &&
245 tok2[0] == '<' && tok2[n2 - 1] == '>')) {
246 error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
247 continue;
248 }
249 tok1[n1 - 1] = tok2[n2 - 1] = '\0';
250 if (sscanf(tok1 + 1, "%x", &code1) != 1) {
251 error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
252 continue;
253 }
254 addMapping(code1, tok2 + 1, n2 - 2, 0);
255 }
256 pst->getToken(tok1, sizeof(tok1), &n1);
257 } else if (!strcmp(tok2, "beginbfrange")) {
258 while (pst->getToken(tok1, sizeof(tok1), &n1)) {
259 if (!strcmp(tok1, "endbfrange")) {
260 break;
261 }
262 if (!pst->getToken(tok2, sizeof(tok2), &n2) ||
263 !strcmp(tok2, "endbfrange") ||
264 !pst->getToken(tok3, sizeof(tok3), &n3) ||
265 !strcmp(tok3, "endbfrange")) {
266 error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
267 break;
268 }
269 if (!(n1 == 2 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>' &&
270 n2 == 2 + nDigits && tok2[0] == '<' && tok2[n2 - 1] == '>')) {
271 error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
272 continue;
273 }
274 tok1[n1 - 1] = tok2[n2 - 1] = '\0';
275 if (sscanf(tok1 + 1, "%x", &code1) != 1 ||
276 sscanf(tok2 + 1, "%x", &code2) != 1) {
277 error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
278 continue;
279 }
280 if (!strcmp(tok3, "[")) {
281 i = 0;
282 while (pst->getToken(tok1, sizeof(tok1), &n1) &&
283 code1 + i <= code2) {
284 if (!strcmp(tok1, "]")) {
285 break;
286 }
287 if (tok1[0] == '<' && tok1[n1 - 1] == '>') {
288 tok1[n1 - 1] = '\0';
289 addMapping(code1 + i, tok1 + 1, n1 - 2, 0);
290 } else {
291 error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
292 }
293 ++i;
294 }
295 } else if (tok3[0] == '<' && tok3[n3 - 1] == '>') {
296 tok3[n3 - 1] = '\0';
297 for (i = 0; code1 <= code2; ++code1, ++i) {
298 addMapping(code1, tok3 + 1, n3 - 2, i);
299 }
300
301 } else {
302 error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
303 }
304 }
305 pst->getToken(tok1, sizeof(tok1), &n1);
306 } else {
307 strcpy(tok1, tok2);
308 }
309 }
310 delete pst;
311}
312
313void CharCodeToUnicode::addMapping(CharCode code, char *uStr, int n,
314 int offset) {
315 CharCode oldLen, i;
316 Unicode u;
317 char uHex[5];
318 int j;
319
320 if (code >= mapLen) {
321 oldLen = mapLen;
322 mapLen = (code + 256) & ~255;
323 map = (Unicode *)greallocn(map, mapLen, sizeof(Unicode));
324 for (i = oldLen; i < mapLen; ++i) {
325 map[i] = 0;
326 }
327 }
328 if (n <= 4) {
329 if (sscanf(uStr, "%x", &u) != 1) {
330 error(-1, "Illegal entry in ToUnicode CMap");
331 return;
332 }
333 map[code] = u + offset;
334 } else {
335 if (sMapLen >= sMapSize) {
336 sMapSize = sMapSize + 16;
337 sMap = (CharCodeToUnicodeString *)
338 greallocn(sMap, sMapSize, sizeof(CharCodeToUnicodeString));
339 }
340 map[code] = 0;
341 sMap[sMapLen].c = code;
342 sMap[sMapLen].len = n / 4;
343 for (j = 0; j < sMap[sMapLen].len && j < maxUnicodeString; ++j) {
344 strncpy(uHex, uStr + j*4, 4);
345 uHex[4] = '\0';
346 if (sscanf(uHex, "%x", &sMap[sMapLen].u[j]) != 1) {
347 error(-1, "Illegal entry in ToUnicode CMap");
348 }
349 }
350 sMap[sMapLen].u[sMap[sMapLen].len - 1] += offset;
351 ++sMapLen;
352 }
353}
354
355CharCodeToUnicode::CharCodeToUnicode(GString *tagA) {
356 CharCode i;
357
358 tag = tagA;
359 mapLen = 256;
360 map = (Unicode *)gmallocn(mapLen, sizeof(Unicode));
361 for (i = 0; i < mapLen; ++i) {
362 map[i] = 0;
363 }
364 sMap = NULL;
365 sMapLen = sMapSize = 0;
366 refCnt = 1;
367#if MULTITHREADED
368 gInitMutex(&mutex);
369#endif
370}
371
372CharCodeToUnicode::CharCodeToUnicode(GString *tagA, Unicode *mapA,
373 CharCode mapLenA, GBool copyMap,
374 CharCodeToUnicodeString *sMapA,
375 int sMapLenA, int sMapSizeA) {
376 tag = tagA;
377 mapLen = mapLenA;
378 if (copyMap) {
379 map = (Unicode *)gmallocn(mapLen, sizeof(Unicode));
380 memcpy(map, mapA, mapLen * sizeof(Unicode));
381 } else {
382 map = mapA;
383 }
384 sMap = sMapA;
385 sMapLen = sMapLenA;
386 sMapSize = sMapSizeA;
387 refCnt = 1;
388#if MULTITHREADED
389 gInitMutex(&mutex);
390#endif
391}
392
393CharCodeToUnicode::~CharCodeToUnicode() {
394 if (tag) {
395 delete tag;
396 }
397 gfree(map);
398 if (sMap) {
399 gfree(sMap);
400 }
401#if MULTITHREADED
402 gDestroyMutex(&mutex);
403#endif
404}
405
406void CharCodeToUnicode::incRefCnt() {
407#if MULTITHREADED
408 gLockMutex(&mutex);
409#endif
410 ++refCnt;
411#if MULTITHREADED
412 gUnlockMutex(&mutex);
413#endif
414}
415
416void CharCodeToUnicode::decRefCnt() {
417 GBool done;
418
419#if MULTITHREADED
420 gLockMutex(&mutex);
421#endif
422 done = --refCnt == 0;
423#if MULTITHREADED
424 gUnlockMutex(&mutex);
425#endif
426 if (done) {
427 delete this;
428 }
429}
430
431GBool CharCodeToUnicode::match(GString *tagA) {
432 return tag && !tag->cmp(tagA);
433}
434
435void CharCodeToUnicode::setMapping(CharCode c, Unicode *u, int len) {
436 int i, j;
437
438 if (len == 1) {
439 map[c] = u[0];
440 } else {
441 for (i = 0; i < sMapLen; ++i) {
442 if (sMap[i].c == c) {
443 break;
444 }
445 }
446 if (i == sMapLen) {
447 if (sMapLen == sMapSize) {
448 sMapSize += 8;
449 sMap = (CharCodeToUnicodeString *)
450 greallocn(sMap, sMapSize, sizeof(CharCodeToUnicodeString));
451 }
452 ++sMapLen;
453 }
454 map[c] = 0;
455 sMap[i].c = c;
456 sMap[i].len = len;
457 for (j = 0; j < len && j < maxUnicodeString; ++j) {
458 sMap[i].u[j] = u[j];
459 }
460 }
461}
462
463int CharCodeToUnicode::mapToUnicode(CharCode c, Unicode *u, int size) {
464 int i, j;
465
466 if (c >= mapLen) {
467 return 0;
468 }
469 if (map[c]) {
470 u[0] = map[c];
471 return 1;
472 }
473 for (i = 0; i < sMapLen; ++i) {
474 if (sMap[i].c == c) {
475 for (j = 0; j < sMap[i].len && j < size; ++j) {
476 u[j] = sMap[i].u[j];
477 }
478 return j;
479 }
480 }
481 return 0;
482}
483
484//------------------------------------------------------------------------
485
486CharCodeToUnicodeCache::CharCodeToUnicodeCache(int sizeA) {
487 int i;
488
489 size = sizeA;
490 cache = (CharCodeToUnicode **)gmallocn(size, sizeof(CharCodeToUnicode *));
491 for (i = 0; i < size; ++i) {
492 cache[i] = NULL;
493 }
494}
495
496CharCodeToUnicodeCache::~CharCodeToUnicodeCache() {
497 int i;
498
499 for (i = 0; i < size; ++i) {
500 if (cache[i]) {
501 cache[i]->decRefCnt();
502 }
503 }
504 gfree(cache);
505}
506
507CharCodeToUnicode *CharCodeToUnicodeCache::getCharCodeToUnicode(GString *tag) {
508 CharCodeToUnicode *ctu;
509 int i, j;
510
511 if (cache[0] && cache[0]->match(tag)) {
512 cache[0]->incRefCnt();
513 return cache[0];
514 }
515 for (i = 1; i < size; ++i) {
516 if (cache[i] && cache[i]->match(tag)) {
517 ctu = cache[i];
518 for (j = i; j >= 1; --j) {
519 cache[j] = cache[j - 1];
520 }
521 cache[0] = ctu;
522 ctu->incRefCnt();
523 return ctu;
524 }
525 }
526 return NULL;
527}
528
529void CharCodeToUnicodeCache::add(CharCodeToUnicode *ctu) {
530 int i;
531
532 if (cache[size - 1]) {
533 cache[size - 1]->decRefCnt();
534 }
535 for (i = size - 1; i >= 1; --i) {
536 cache[i] = cache[i - 1];
537 }
538 cache[0] = ctu;
539 ctu->incRefCnt();
540}