]>
Commit | Line | Data |
---|---|---|
ef416fc2 | 1 | //======================================================================== |
2 | // | |
3 | // UnicodeMap.cc | |
4 | // | |
5 | // Copyright 2001-2003 Glyph & Cog, LLC | |
6 | // | |
7 | //======================================================================== | |
8 | ||
9 | #include <config.h> | |
10 | ||
11 | #ifdef USE_GCC_PRAGMAS | |
12 | #pragma implementation | |
13 | #endif | |
14 | ||
15 | #include <stdio.h> | |
16 | #include <string.h> | |
17 | #include "gmem.h" | |
18 | #include "gfile.h" | |
19 | #include "GString.h" | |
20 | #include "GList.h" | |
21 | #include "Error.h" | |
22 | #include "GlobalParams.h" | |
23 | #include "UnicodeMap.h" | |
24 | ||
25 | //------------------------------------------------------------------------ | |
26 | ||
27 | #define maxExtCode 16 | |
28 | ||
29 | struct UnicodeMapExt { | |
30 | Unicode u; // Unicode char | |
31 | char code[maxExtCode]; | |
32 | Guint nBytes; | |
33 | }; | |
34 | ||
35 | //------------------------------------------------------------------------ | |
36 | ||
37 | UnicodeMap *UnicodeMap::parse(GString *encodingNameA) { | |
38 | FILE *f; | |
39 | UnicodeMap *map; | |
40 | UnicodeMapRange *range; | |
41 | UnicodeMapExt *eMap; | |
42 | int size, eMapsSize; | |
43 | char buf[256]; | |
44 | int line, nBytes, i, x; | |
45 | char *tok1, *tok2, *tok3; | |
46 | ||
47 | if (!(f = globalParams->getUnicodeMapFile(encodingNameA))) { | |
48 | error(-1, "Couldn't find unicodeMap file for the '%s' encoding", | |
49 | encodingNameA->getCString()); | |
50 | return NULL; | |
51 | } | |
52 | ||
53 | map = new UnicodeMap(encodingNameA->copy()); | |
54 | ||
55 | size = 8; | |
56 | map->ranges = (UnicodeMapRange *)gmallocn(size, sizeof(UnicodeMapRange)); | |
57 | eMapsSize = 0; | |
58 | ||
59 | line = 1; | |
60 | while (getLine(buf, sizeof(buf), f)) { | |
61 | if ((tok1 = strtok(buf, " \t\r\n")) && | |
62 | (tok2 = strtok(NULL, " \t\r\n"))) { | |
63 | if (!(tok3 = strtok(NULL, " \t\r\n"))) { | |
64 | tok3 = tok2; | |
65 | tok2 = tok1; | |
66 | } | |
67 | nBytes = strlen(tok3) / 2; | |
68 | if (nBytes <= 4) { | |
69 | if (map->len == size) { | |
70 | size *= 2; | |
71 | map->ranges = (UnicodeMapRange *) | |
72 | greallocn(map->ranges, size, sizeof(UnicodeMapRange)); | |
73 | } | |
74 | range = &map->ranges[map->len]; | |
75 | sscanf(tok1, "%x", &range->start); | |
76 | sscanf(tok2, "%x", &range->end); | |
77 | sscanf(tok3, "%x", &range->code); | |
78 | range->nBytes = nBytes; | |
79 | ++map->len; | |
80 | } else if (tok2 == tok1) { | |
81 | if (map->eMapsLen == eMapsSize) { | |
82 | eMapsSize += 16; | |
83 | map->eMaps = (UnicodeMapExt *) | |
84 | greallocn(map->eMaps, eMapsSize, sizeof(UnicodeMapExt)); | |
85 | } | |
86 | eMap = &map->eMaps[map->eMapsLen]; | |
87 | sscanf(tok1, "%x", &eMap->u); | |
88 | for (i = 0; i < nBytes; ++i) { | |
89 | sscanf(tok3 + i*2, "%2x", &x); | |
90 | eMap->code[i] = (char)x; | |
91 | } | |
92 | eMap->nBytes = nBytes; | |
93 | ++map->eMapsLen; | |
94 | } else { | |
95 | error(-1, "Bad line (%d) in unicodeMap file for the '%s' encoding", | |
96 | line, encodingNameA->getCString()); | |
97 | } | |
98 | } else { | |
99 | error(-1, "Bad line (%d) in unicodeMap file for the '%s' encoding", | |
100 | line, encodingNameA->getCString()); | |
101 | } | |
102 | ++line; | |
103 | } | |
104 | ||
105 | fclose(f); | |
106 | ||
107 | return map; | |
108 | } | |
109 | ||
110 | UnicodeMap::UnicodeMap(GString *encodingNameA) { | |
111 | encodingName = encodingNameA; | |
112 | unicodeOut = gFalse; | |
113 | kind = unicodeMapUser; | |
114 | ranges = NULL; | |
115 | len = 0; | |
116 | eMaps = NULL; | |
117 | eMapsLen = 0; | |
118 | refCnt = 1; | |
119 | #if MULTITHREADED | |
120 | gInitMutex(&mutex); | |
121 | #endif | |
122 | } | |
123 | ||
124 | UnicodeMap::UnicodeMap(char *encodingNameA, GBool unicodeOutA, | |
125 | UnicodeMapRange *rangesA, int lenA) { | |
126 | encodingName = new GString(encodingNameA); | |
127 | unicodeOut = unicodeOutA; | |
128 | kind = unicodeMapResident; | |
129 | ranges = rangesA; | |
130 | len = lenA; | |
131 | eMaps = NULL; | |
132 | eMapsLen = 0; | |
133 | refCnt = 1; | |
134 | #if MULTITHREADED | |
135 | gInitMutex(&mutex); | |
136 | #endif | |
137 | } | |
138 | ||
139 | UnicodeMap::UnicodeMap(char *encodingNameA, GBool unicodeOutA, | |
140 | UnicodeMapFunc funcA) { | |
141 | encodingName = new GString(encodingNameA); | |
142 | unicodeOut = unicodeOutA; | |
143 | kind = unicodeMapFunc; | |
144 | func = funcA; | |
145 | eMaps = NULL; | |
146 | eMapsLen = 0; | |
147 | refCnt = 1; | |
148 | #if MULTITHREADED | |
149 | gInitMutex(&mutex); | |
150 | #endif | |
151 | } | |
152 | ||
153 | UnicodeMap::~UnicodeMap() { | |
154 | delete encodingName; | |
155 | if (kind == unicodeMapUser && ranges) { | |
156 | gfree(ranges); | |
157 | } | |
158 | if (eMaps) { | |
159 | gfree(eMaps); | |
160 | } | |
161 | #if MULTITHREADED | |
162 | gDestroyMutex(&mutex); | |
163 | #endif | |
164 | } | |
165 | ||
166 | void UnicodeMap::incRefCnt() { | |
167 | #if MULTITHREADED | |
168 | gLockMutex(&mutex); | |
169 | #endif | |
170 | ++refCnt; | |
171 | #if MULTITHREADED | |
172 | gUnlockMutex(&mutex); | |
173 | #endif | |
174 | } | |
175 | ||
176 | void UnicodeMap::decRefCnt() { | |
177 | GBool done; | |
178 | ||
179 | #if MULTITHREADED | |
180 | gLockMutex(&mutex); | |
181 | #endif | |
182 | done = --refCnt == 0; | |
183 | #if MULTITHREADED | |
184 | gUnlockMutex(&mutex); | |
185 | #endif | |
186 | if (done) { | |
187 | delete this; | |
188 | } | |
189 | } | |
190 | ||
191 | GBool UnicodeMap::match(GString *encodingNameA) { | |
192 | return !encodingName->cmp(encodingNameA); | |
193 | } | |
194 | ||
195 | int UnicodeMap::mapUnicode(Unicode u, char *buf, int bufSize) { | |
196 | int a, b, m, n, i, j; | |
197 | Guint code; | |
198 | ||
199 | if (kind == unicodeMapFunc) { | |
200 | return (*func)(u, buf, bufSize); | |
201 | } | |
202 | ||
203 | a = 0; | |
204 | b = len; | |
205 | if (u >= ranges[a].start) { | |
206 | // invariant: ranges[a].start <= u < ranges[b].start | |
207 | while (b - a > 1) { | |
208 | m = (a + b) / 2; | |
209 | if (u >= ranges[m].start) { | |
210 | a = m; | |
211 | } else if (u < ranges[m].start) { | |
212 | b = m; | |
213 | } | |
214 | } | |
215 | if (u <= ranges[a].end) { | |
216 | n = ranges[a].nBytes; | |
217 | if (n > bufSize) { | |
218 | return 0; | |
219 | } | |
220 | code = ranges[a].code + (u - ranges[a].start); | |
221 | for (i = n - 1; i >= 0; --i) { | |
222 | buf[i] = (char)(code & 0xff); | |
223 | code >>= 8; | |
224 | } | |
225 | return n; | |
226 | } | |
227 | } | |
228 | ||
229 | for (i = 0; i < eMapsLen; ++i) { | |
230 | if (eMaps[i].u == u) { | |
231 | n = eMaps[i].nBytes; | |
232 | for (j = 0; j < n; ++j) { | |
233 | buf[j] = eMaps[i].code[j]; | |
234 | } | |
235 | return n; | |
236 | } | |
237 | } | |
238 | ||
239 | return 0; | |
240 | } | |
241 | ||
242 | //------------------------------------------------------------------------ | |
243 | ||
244 | UnicodeMapCache::UnicodeMapCache() { | |
245 | int i; | |
246 | ||
247 | for (i = 0; i < unicodeMapCacheSize; ++i) { | |
248 | cache[i] = NULL; | |
249 | } | |
250 | } | |
251 | ||
252 | UnicodeMapCache::~UnicodeMapCache() { | |
253 | int i; | |
254 | ||
255 | for (i = 0; i < unicodeMapCacheSize; ++i) { | |
256 | if (cache[i]) { | |
257 | cache[i]->decRefCnt(); | |
258 | } | |
259 | } | |
260 | } | |
261 | ||
262 | UnicodeMap *UnicodeMapCache::getUnicodeMap(GString *encodingName) { | |
263 | UnicodeMap *map; | |
264 | int i, j; | |
265 | ||
266 | if (cache[0] && cache[0]->match(encodingName)) { | |
267 | cache[0]->incRefCnt(); | |
268 | return cache[0]; | |
269 | } | |
270 | for (i = 1; i < unicodeMapCacheSize; ++i) { | |
271 | if (cache[i] && cache[i]->match(encodingName)) { | |
272 | map = cache[i]; | |
273 | for (j = i; j >= 1; --j) { | |
274 | cache[j] = cache[j - 1]; | |
275 | } | |
276 | cache[0] = map; | |
277 | map->incRefCnt(); | |
278 | return map; | |
279 | } | |
280 | } | |
281 | if ((map = UnicodeMap::parse(encodingName))) { | |
282 | if (cache[unicodeMapCacheSize - 1]) { | |
283 | cache[unicodeMapCacheSize - 1]->decRefCnt(); | |
284 | } | |
285 | for (j = unicodeMapCacheSize - 1; j >= 1; --j) { | |
286 | cache[j] = cache[j - 1]; | |
287 | } | |
288 | cache[0] = map; | |
289 | map->incRefCnt(); | |
290 | return map; | |
291 | } | |
292 | return NULL; | |
293 | } |