]>
Commit | Line | Data |
---|---|---|
ef416fc2 | 1 | /* |
fa73b229 | 2 | * "$Id: normalize.c 4967 2006-01-24 03:42:15Z mike $" |
ef416fc2 | 3 | * |
4 | * Unicode normalization for the Common UNIX Printing System (CUPS). | |
5 | * | |
6 | * Copyright 1997-2006 by Easy Software Products. | |
7 | * | |
8 | * These coded instructions, statements, and computer programs are | |
9 | * the property of Easy Software Products and are protected by Federal | |
10 | * copyright law. Distribution and use rights are outlined in the | |
11 | * file "LICENSE.txt" which should have been included with this file. | |
12 | * If this file is missing or damaged please contact Easy Software | |
13 | * Products at: | |
14 | * | |
15 | * Attn: CUPS Licensing Information | |
16 | * Easy Software Products | |
17 | * 44141 Airport View Drive, Suite 204 | |
18 | * Hollywood, Maryland 20636 USA | |
19 | * | |
20 | * Voice: (301) 373-9600 | |
21 | * EMail: cups-info@cups.org | |
22 | * WWW: http://www.cups.org | |
23 | * | |
24 | * Contents: | |
25 | * | |
26 | * cupsNormalizeMapsGet() - Get all norm maps to cache. | |
27 | * cupsNormalizeMapsFree() - Free all norm maps in cache. | |
28 | * cupsNormalizeMapsFlush() - Flush all norm maps in cache. | |
fa73b229 | 29 | * _cupsNormalizeMapsFlush() - Flush all normalization maps in cache. |
ef416fc2 | 30 | * cupsUTF8Normalize() - Normalize UTF-8 string. |
31 | * cupsUTF32Normalize() - Normalize UTF-32 string. | |
32 | * cupsUTF8CaseFold() - Case fold UTF-8 string. | |
33 | * cupsUTF32CaseFold() - Case fold UTF-32 string. | |
34 | * cupsUTF8CompareCaseless() - Compare case folded UTF-8 strings. | |
35 | * cupsUTF32CompareCaseless() - Compare case folded UTF-32 strings. | |
36 | * cupsUTF8CompareIdentifier() - Compare folded NFKC UTF-8 strings. | |
37 | * cupsUTF32CompareIdentifier() - Compare folded NFKC UTF-32 strings. | |
38 | * cupsUTF32CharacterProperty() - Get UTF-32 character property. | |
39 | * get_general_category() - Get UTF-32 Char General Category. | |
40 | * get_bidi_category() - Get UTF-32 Char Bidi Category. | |
41 | * get_combining_class() - Get UTF-32 Char Combining Class. | |
42 | * get_break_class() - Get UTF-32 Char Line Break Class. | |
43 | * get_map_count() - Count lines in a map file. | |
44 | * get_normmap() - Get Unicode norm map to cache. | |
45 | * get_foldmap() - Get Unicode casefold map to cache. | |
46 | * get_propmap() - Get Unicode property map to cache. | |
47 | * get_combmap() - Get Unicode combining map to cache. | |
48 | * get_breakmap() - Get Unicode break map to cache. | |
49 | * compare_compose() - Compare key for compose match. | |
50 | * compare_decompose() - Compare key for decompose match. | |
51 | * compare_foldchar() - Compare key for case fold match. | |
52 | * compare_combchar() - Compare key for combining match. | |
53 | * compare_breakchar() - Compare key for line break match. | |
54 | * compare_propchar() - Compare key for property char match. | |
55 | */ | |
56 | ||
57 | /* | |
58 | * Include necessary headers... | |
59 | */ | |
60 | ||
61 | #include "globals.h" | |
62 | #include "debug.h" | |
63 | #include <stdlib.h> | |
64 | #include <errno.h> | |
65 | #include <time.h> | |
66 | ||
67 | ||
68 | typedef struct /**** General Category Index Struct****/ | |
69 | { | |
70 | cups_gencat_t gencat; /* General Category Value */ | |
71 | const char *str; /* General Category String */ | |
72 | } gencat_t; | |
73 | ||
74 | static const gencat_t gencat_index[] = /* General Category Index */ | |
75 | { | |
76 | { CUPS_GENCAT_LU, "Lu" }, /* Letter, Uppercase */ | |
77 | { CUPS_GENCAT_LL, "Ll" }, /* Letter, Lowercase */ | |
78 | { CUPS_GENCAT_LT, "Lt" }, /* Letter, Titlecase */ | |
79 | { CUPS_GENCAT_LM, "Lm" }, /* Letter, Modifier */ | |
80 | { CUPS_GENCAT_LO, "Lo" }, /* Letter, Other */ | |
81 | { CUPS_GENCAT_MN, "Mn" }, /* Mark, Non-Spacing */ | |
82 | { CUPS_GENCAT_MC, "Mc" }, /* Mark, Spacing Combining */ | |
83 | { CUPS_GENCAT_ME, "Me" }, /* Mark, Enclosing */ | |
84 | { CUPS_GENCAT_ND, "Nd" }, /* Number, Decimal Digit */ | |
85 | { CUPS_GENCAT_NL, "Nl" }, /* Number, Letter */ | |
86 | { CUPS_GENCAT_NO, "No" }, /* Number, Other */ | |
87 | { CUPS_GENCAT_PC, "Pc" }, /* Punctuation, Connector */ | |
88 | { CUPS_GENCAT_PD, "Pd" }, /* Punctuation, Dash */ | |
89 | { CUPS_GENCAT_PS, "Ps" }, /* Punctuation, Open (start) */ | |
90 | { CUPS_GENCAT_PE, "Pe" }, /* Punctuation, Close (end) */ | |
91 | { CUPS_GENCAT_PI, "Pi" }, /* Punctuation, Initial Quote */ | |
92 | { CUPS_GENCAT_PF, "Pf" }, /* Punctuation, Final Quote */ | |
93 | { CUPS_GENCAT_PO, "Po" }, /* Punctuation, Other */ | |
94 | { CUPS_GENCAT_SM, "Sm" }, /* Symbol, Math */ | |
95 | { CUPS_GENCAT_SC, "Sc" }, /* Symbol, Currency */ | |
96 | { CUPS_GENCAT_SK, "Sk" }, /* Symbol, Modifier */ | |
97 | { CUPS_GENCAT_SO, "So" }, /* Symbol, Other */ | |
98 | { CUPS_GENCAT_ZS, "Zs" }, /* Separator, Space */ | |
99 | { CUPS_GENCAT_ZL, "Zl" }, /* Separator, Line */ | |
100 | { CUPS_GENCAT_ZP, "Zp" }, /* Separator, Paragraph */ | |
101 | { CUPS_GENCAT_CC, "Cc" }, /* Other, Control */ | |
102 | { CUPS_GENCAT_CF, "Cf" }, /* Other, Format */ | |
103 | { CUPS_GENCAT_CS, "Cs" }, /* Other, Surrogate */ | |
104 | { CUPS_GENCAT_CO, "Co" }, /* Other, Private Use */ | |
105 | { CUPS_GENCAT_CN, "Cn" }, /* Other, Not Assigned */ | |
106 | { 0, NULL } | |
107 | }; | |
108 | ||
109 | static const char * const bidicat_index[] = | |
110 | /* Bidi Category Index */ | |
111 | { | |
112 | "L", /* Left-to-Right (Alpha, Syllabic, Ideographic) */ | |
113 | "LRE", /* Left-to-Right Embedding (explicit) */ | |
114 | "LRO", /* Left-to-Right Override (explicit) */ | |
115 | "R", /* Right-to-Left (Hebrew alphabet and most punct) */ | |
116 | "AL", /* Right-to-Left Arabic (Arabic, Thaana, Syriac) */ | |
117 | "RLE", /* Right-to-Left Embedding (explicit) */ | |
118 | "RLO", /* Right-to-Left Override (explicit) */ | |
119 | "PDF", /* Pop Directional Format */ | |
120 | "EN", /* Euro Number (Euro and East Arabic-Indic digits) */ | |
121 | "ES", /* Euro Number Separator (Slash) */ | |
122 | "ET", /* Euro Number Termintor (Plus, Minus, Degree, etc) */ | |
123 | "AN", /* Arabic Number (Arabic-Indic digits, separators) */ | |
124 | "CS", /* Common Number Separator (Colon, Comma, Dot, etc) */ | |
125 | "NSM", /* Non-Spacing Mark (category Mn / Me in UCD) */ | |
126 | "BN", /* Boundary Neutral (Formatting / Control chars) */ | |
127 | "B", /* Paragraph Separator */ | |
128 | "S", /* Segment Separator (Tab) */ | |
129 | "WS", /* Whitespace Space (Space, Line Separator, etc) */ | |
130 | "ON", /* Other Neutrals */ | |
131 | NULL | |
132 | }; | |
133 | ||
134 | typedef struct /**** Line Break Class Index Struct****/ | |
135 | { | |
136 | cups_break_class_t breakclass; /* Line Break Class Value */ | |
137 | const char *str; /* Line Break Class String */ | |
138 | } _cups_break_t; | |
139 | ||
140 | static const _cups_break_t break_index[] = /* Line Break Class Index */ | |
141 | { | |
142 | { CUPS_BREAK_AI, "AI" }, /* Ambiguous (Alphabetic or Ideograph) */ | |
143 | { CUPS_BREAK_AL, "AL" }, /* Ordinary Alpha/Symbol Chars (XP) */ | |
144 | { CUPS_BREAK_BA, "BA" }, /* Break Opportunity After Chars (A) */ | |
145 | { CUPS_BREAK_BB, "BB" }, /* Break Opportunities Before Chars (B) */ | |
146 | { CUPS_BREAK_B2, "B2" }, /* Break Opportunity Either (B/A/XP) */ | |
147 | { CUPS_BREAK_BK, "BK" }, /* Mandatory Break (A) (norm) */ | |
148 | { CUPS_BREAK_CB, "CB" }, /* Contingent Break (B/A) (norm) */ | |
149 | { CUPS_BREAK_CL, "CL" }, /* Closing Punctuation (XB) */ | |
150 | { CUPS_BREAK_CM, "CM" }, /* Attached/Combining (XB) (norm) */ | |
151 | { CUPS_BREAK_CR, "CR" }, /* Carriage Return (A) (norm) */ | |
152 | { CUPS_BREAK_EX, "EX" }, /* Exclamation / Interrogation (XB) */ | |
153 | { CUPS_BREAK_GL, "GL" }, /* Non-breaking ("Glue") (XB/XA) (norm) */ | |
154 | { CUPS_BREAK_HY, "HY" }, /* Hyphen (XA) */ | |
155 | { CUPS_BREAK_ID, "ID" }, /* Ideographic (B/A) */ | |
156 | { CUPS_BREAK_IN, "IN" }, /* Inseparable chars (XP) */ | |
157 | { CUPS_BREAK_IS, "IS" }, /* Numeric Separator (Infix) (XB) */ | |
158 | { CUPS_BREAK_LF, "LF" }, /* Line Feed (A) (norm) */ | |
159 | { CUPS_BREAK_NS, "NS" }, /* Non-starters (XB) */ | |
160 | { CUPS_BREAK_NU, "NU" }, /* Numeric (XP) */ | |
161 | { CUPS_BREAK_OP, "OP" }, /* Opening Punctuation (XA) */ | |
162 | { CUPS_BREAK_PO, "PO" }, /* Postfix (Numeric) (XB) */ | |
163 | { CUPS_BREAK_PR, "PR" }, /* Prefix (Numeric) (XA) */ | |
164 | { CUPS_BREAK_QU, "QU" }, /* Ambiguous Quotation (XB/XA) */ | |
165 | { CUPS_BREAK_SA, "SA" }, /* Context Dependent (SE Asian) (P) */ | |
166 | { CUPS_BREAK_SG, "SG" }, /* Surrogates (XP) (norm) */ | |
167 | { CUPS_BREAK_SP, "SP" }, /* Space (A) (norm) */ | |
168 | { CUPS_BREAK_SY, "SY" }, /* Symbols Allowing Break After (A) */ | |
169 | { CUPS_BREAK_XX, "XX" }, /* Unknown (XP) */ | |
170 | { CUPS_BREAK_ZW, "ZW" }, /* Zero Width Space (A) (norm) */ | |
171 | { 0, NULL } | |
172 | }; | |
173 | ||
174 | /* | |
175 | * Prototypes... | |
176 | */ | |
177 | ||
178 | static int compare_breakchar(const void *k1, const void *k2); | |
179 | static int compare_combchar(const void *k1, const void *k2); | |
180 | static int compare_compose(const void *k1, const void *k2); | |
181 | static int compare_decompose(const void *k1, const void *k2); | |
182 | static int compare_foldchar(const void *k1, const void *k2); | |
183 | static int compare_propchar(const void *k1, const void *k2); | |
184 | static int get_bidi_category(const cups_utf32_t ch); | |
185 | static int get_break_class(const cups_utf32_t ch); | |
186 | static int get_breakmap(void); | |
187 | static int get_combining_class(const cups_utf32_t ch); | |
188 | static int get_combmap(void); | |
189 | static int get_foldmap(const cups_folding_t fold); | |
190 | static int get_general_category(const cups_utf32_t ch); | |
191 | static int get_map_count(const char *filename); | |
192 | static int get_normmap(const cups_normalize_t normalize); | |
193 | static int get_propmap(void); | |
194 | ||
195 | ||
196 | /* | |
197 | * 'cupsNormalizeMapsGet()' - Get all normalization maps to cache. | |
198 | */ | |
199 | ||
200 | int /* O - Zero or -1 on error */ | |
201 | cupsNormalizeMapsGet(void) | |
202 | { | |
203 | _cups_norm_map_t *nmap; /* Unicode Normalization Map */ | |
204 | _cups_fold_map_t *fmap; /* Unicode Case Folding Map */ | |
205 | _cups_globals_t *cg = _cupsGlobals(); | |
206 | /* Pointer to library globals */ | |
207 | ||
208 | ||
209 | /* | |
210 | * See if we already have normalization maps loaded... | |
211 | */ | |
212 | ||
213 | if (cg->normmap_cache) | |
214 | { | |
215 | for (nmap = cg->normmap_cache; nmap != NULL; nmap = nmap->next) | |
216 | nmap->used ++; | |
217 | ||
218 | for (fmap = cg->foldmap_cache; fmap != NULL; fmap = fmap->next) | |
219 | fmap->used ++; | |
220 | ||
221 | if (cg->combmap_cache) | |
222 | cg->combmap_cache->used ++; | |
223 | ||
224 | if (cg->propmap_cache) | |
225 | cg->propmap_cache->used ++; | |
226 | ||
227 | if (cg->breakmap_cache) | |
228 | cg->breakmap_cache->used ++; | |
229 | ||
230 | return (0); | |
231 | } | |
232 | ||
233 | /* | |
234 | * Get normalization maps... | |
235 | */ | |
236 | ||
237 | if (get_normmap(CUPS_NORM_NFD) < 0) | |
238 | return (-1); | |
239 | ||
240 | if (get_normmap(CUPS_NORM_NFKD) < 0) | |
241 | return (-1); | |
242 | ||
243 | if (get_normmap(CUPS_NORM_NFC) < 0) | |
244 | return (-1); | |
245 | ||
246 | /* | |
247 | * Get case folding, combining class, character property maps... | |
248 | */ | |
249 | ||
250 | if (get_foldmap(CUPS_FOLD_SIMPLE) < 0) | |
251 | return (-1); | |
252 | ||
253 | if (get_foldmap(CUPS_FOLD_FULL) < 0) | |
254 | return (-1); | |
255 | ||
256 | if (get_propmap() < 0) | |
257 | return (-1); | |
258 | ||
259 | if (get_combmap() < 0) | |
260 | return (-1); | |
261 | ||
262 | if (get_breakmap() < 0) | |
263 | return (-1); | |
264 | ||
265 | return (0); | |
266 | } | |
267 | ||
268 | ||
269 | /* | |
270 | * 'cupsNormalizeMapsFree()' - Free all normalization maps in cache. | |
271 | * | |
272 | * This does not actually free; use 'cupsNormalizeMapsFlush()' for that. | |
273 | */ | |
274 | ||
275 | int /* O - Zero or -1 on error */ | |
276 | cupsNormalizeMapsFree(void) | |
277 | { | |
278 | _cups_norm_map_t *nmap; /* Unicode Normalization Map */ | |
279 | _cups_fold_map_t *fmap; /* Unicode Case Folding Map */ | |
280 | _cups_globals_t *cg = _cupsGlobals(); | |
281 | /* Pointer to library globals */ | |
282 | ||
283 | ||
284 | /* | |
285 | * See if we already have normalization maps loaded... | |
286 | */ | |
287 | ||
288 | if (cg->normmap_cache == NULL) | |
289 | return (-1); | |
290 | ||
291 | for (nmap = cg->normmap_cache; nmap != NULL; nmap = nmap->next) | |
292 | if (nmap->used > 0) | |
293 | nmap->used --; | |
294 | ||
295 | for (fmap = cg->foldmap_cache; fmap != NULL; fmap = fmap->next) | |
296 | if (fmap->used > 0) | |
297 | fmap->used --; | |
298 | ||
299 | if (cg->propmap_cache && (cg->propmap_cache->used > 0)) | |
300 | cg->propmap_cache->used --; | |
301 | ||
302 | if (cg->combmap_cache && (cg->combmap_cache->used > 0)) | |
303 | cg->combmap_cache->used --; | |
304 | ||
305 | if (cg->breakmap_cache && (cg->breakmap_cache->used > 0)) | |
306 | cg->breakmap_cache->used --; | |
307 | ||
308 | return (0); | |
309 | } | |
310 | ||
311 | ||
312 | /* | |
313 | * 'cupsNormalizeMapsFlush()' - Flush all normalization maps in cache. | |
314 | */ | |
315 | ||
316 | void | |
317 | cupsNormalizeMapsFlush(void) | |
fa73b229 | 318 | { |
319 | _cupsNormalizeMapsFlush(_cupsGlobals()); | |
320 | } | |
321 | ||
322 | ||
323 | /* | |
324 | * '_cupsNormalizeMapsFlush()' - Flush all normalization maps in cache. | |
325 | */ | |
326 | ||
327 | void | |
328 | _cupsNormalizeMapsFlush( | |
329 | _cups_globals_t *cg) /* I - Global data */ | |
ef416fc2 | 330 | { |
331 | _cups_norm_map_t *nmap; /* Unicode Normalization Map */ | |
332 | _cups_norm_map_t *nextnorm; /* Next Unicode Normalization Map */ | |
333 | _cups_fold_map_t *fmap; /* Unicode Case Folding Map */ | |
334 | _cups_fold_map_t *nextfold; /* Next Unicode Case Folding Map */ | |
ef416fc2 | 335 | |
336 | ||
337 | /* | |
338 | * Flush all normalization maps... | |
339 | */ | |
340 | ||
341 | for (nmap = cg->normmap_cache; nmap != NULL; nmap = nextnorm) | |
342 | { | |
343 | free(nmap->uni2norm); | |
344 | nextnorm = nmap->next; | |
345 | free(nmap); | |
346 | } | |
347 | ||
348 | cg->normmap_cache = NULL; | |
349 | ||
350 | for (fmap = cg->foldmap_cache; fmap != NULL; fmap = nextfold) | |
351 | { | |
352 | free(fmap->uni2fold); | |
353 | nextfold = fmap->next; | |
354 | free(fmap); | |
355 | } | |
356 | ||
357 | cg->foldmap_cache = NULL; | |
358 | ||
359 | if (cg->propmap_cache) | |
360 | { | |
361 | free(cg->propmap_cache->uni2prop); | |
362 | free(cg->propmap_cache); | |
363 | cg->propmap_cache = NULL; | |
364 | } | |
365 | ||
366 | if (cg->combmap_cache) | |
367 | { | |
368 | free(cg->combmap_cache->uni2comb); | |
369 | free(cg->combmap_cache); | |
370 | cg->combmap_cache = NULL; | |
371 | } | |
372 | ||
373 | if (cg->breakmap_cache) | |
374 | { | |
375 | free(cg->breakmap_cache->uni2break); | |
376 | free(cg->breakmap_cache); | |
377 | cg->breakmap_cache = NULL; | |
378 | } | |
379 | } | |
380 | ||
381 | ||
382 | /* | |
383 | * 'cupsUTF8Normalize()' - Normalize UTF-8 string. | |
384 | * | |
385 | * Normalize UTF-8 string to Unicode UAX-15 Normalization Form | |
386 | * Note - Compatibility Normalization Forms (NFKD/NFKC) are | |
387 | * unsafe for subsequent transcoding to legacy charsets | |
388 | */ | |
389 | ||
390 | int /* O - Count or -1 on error */ | |
391 | cupsUTF8Normalize( | |
392 | cups_utf8_t *dest, /* O - Target string */ | |
393 | const cups_utf8_t *src, /* I - Source string */ | |
394 | const int maxout, /* I - Max output */ | |
395 | const cups_normalize_t normalize) /* I - Normalization */ | |
396 | { | |
397 | int len; /* String length */ | |
398 | cups_utf32_t work1[CUPS_MAX_USTRING];/* First internal UCS-4 string */ | |
399 | cups_utf32_t work2[CUPS_MAX_USTRING];/* Second internal UCS-4 string */ | |
400 | ||
401 | ||
402 | /* | |
403 | * Check for valid arguments and clear output... | |
404 | */ | |
405 | ||
406 | if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING) | |
407 | return (-1); | |
408 | ||
409 | *dest = 0; | |
410 | ||
411 | /* | |
412 | * Convert input UTF-8 to internal UCS-4 (and insert BOM)... | |
413 | */ | |
414 | ||
415 | len = cupsUTF8ToUTF32(work1, src, CUPS_MAX_USTRING); | |
416 | ||
417 | if (len < 0) | |
418 | return (-1); | |
419 | ||
420 | /* | |
421 | * Normalize internal UCS-4 to second internal UCS-4... | |
422 | */ | |
423 | ||
424 | len = cupsUTF32Normalize(work2, work1, CUPS_MAX_USTRING, normalize); | |
425 | ||
426 | if (len < 0) | |
427 | return (-1); | |
428 | ||
429 | /* | |
430 | * Convert internal UCS-4 to output UTF-8 (and delete BOM)... | |
431 | */ | |
432 | ||
433 | len = cupsUTF32ToUTF8(dest, work2, maxout); | |
434 | ||
435 | return (len); | |
436 | } | |
437 | ||
438 | ||
439 | /* | |
440 | * 'cupsUTF32Normalize()' - Normalize UTF-32 string. | |
441 | * | |
442 | * Normalize UTF-32 string to Unicode UAX-15 Normalization Form | |
443 | * Note - Compatibility Normalization Forms (NFKD/NFKC) are | |
444 | * unsafe for subsequent transcoding to legacy charsets | |
445 | */ | |
446 | ||
447 | int /* O - Count or -1 on error */ | |
448 | cupsUTF32Normalize( | |
449 | cups_utf32_t *dest, /* O - Target string */ | |
450 | const cups_utf32_t *src, /* I - Source string */ | |
451 | const int maxout, /* I - Max output */ | |
452 | const cups_normalize_t normalize) /* I - Normalization */ | |
453 | { | |
454 | int i; /* Looping variable */ | |
455 | int result; /* Result Value */ | |
456 | cups_ucs2_t *mp; /* Map char pointer */ | |
457 | int pass; /* Pass count for each transform */ | |
458 | int hit; /* Hit count from binary search */ | |
459 | cups_utf32_t unichar1; /* Unicode character value */ | |
460 | cups_utf32_t unichar2; /* Unicode character value */ | |
461 | _cups_comb_class_t class1; /* First Combining Class */ | |
462 | _cups_comb_class_t class2; /* Second Combining Class */ | |
463 | int len; /* String length */ | |
464 | cups_utf32_t work1[CUPS_MAX_USTRING]; | |
465 | /* First internal UCS-4 string */ | |
466 | cups_utf32_t work2[CUPS_MAX_USTRING]; | |
467 | /* Second internal UCS-4 string */ | |
468 | cups_utf32_t *p1; /* First UCS-4 string pointer */ | |
469 | cups_utf32_t *p2; /* Second UCS-4 string pointer */ | |
470 | _cups_norm_map_t *nmap; /* Unicode Normalization Map */ | |
471 | cups_normalize_t decompose; /* Decomposition Type */ | |
472 | _cups_globals_t *cg = _cupsGlobals(); | |
473 | /* Pointer to library globals */ | |
474 | ||
475 | ||
476 | /* | |
477 | * Check for valid arguments and clear output... | |
478 | */ | |
479 | ||
480 | if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING) | |
481 | return (-1); | |
482 | ||
483 | *dest = 0; | |
484 | ||
485 | result = cupsNormalizeMapsGet(); | |
486 | ||
487 | if (result < 0) | |
488 | return (-1); | |
489 | ||
490 | /* | |
491 | * Find decomposition map... | |
492 | */ | |
493 | ||
494 | switch (normalize) | |
495 | { | |
496 | case CUPS_NORM_NFD: | |
497 | case CUPS_NORM_NFC: | |
498 | decompose = CUPS_NORM_NFD; | |
499 | break; | |
500 | ||
501 | case CUPS_NORM_NFKD: | |
502 | case CUPS_NORM_NFKC: | |
503 | decompose = CUPS_NORM_NFKD; | |
504 | break; | |
505 | ||
506 | default: | |
507 | return (-1); | |
508 | } | |
509 | ||
510 | for (nmap = cg->normmap_cache; nmap != NULL; nmap = nmap->next) | |
511 | if (nmap->normalize == decompose) | |
512 | break; | |
513 | ||
514 | if (nmap == NULL) | |
515 | return (-1); | |
516 | ||
517 | /* | |
518 | * Copy input to internal buffer... | |
519 | */ | |
520 | ||
521 | p1 = &work1[0]; | |
522 | ||
523 | for (i = 0; i < CUPS_MAX_USTRING; i ++) | |
524 | { | |
525 | if (*src == 0) | |
526 | break; | |
527 | ||
528 | *p1 ++ = *src ++; | |
529 | } | |
530 | ||
531 | *p1 = 0; | |
532 | len = i; | |
533 | ||
534 | /* | |
535 | * Decompose until no further decomposition... | |
536 | */ | |
537 | ||
538 | for (pass = 0; pass < 20; pass ++) | |
539 | { | |
540 | p1 = &work1[0]; | |
541 | p2 = &work2[0]; | |
542 | ||
543 | for (hit = 0; *p1 != 0; p1 ++) | |
544 | { | |
545 | /* | |
546 | * Check for decomposition defined... | |
547 | */ | |
548 | ||
549 | mp = (cups_ucs2_t *)bsearch(p1, nmap->uni2norm, nmap->normcount, | |
550 | (sizeof(cups_ucs2_t) * 3), compare_decompose); | |
551 | if (mp == NULL) | |
552 | { | |
553 | *p2 ++ = *p1; | |
554 | continue; | |
555 | } | |
556 | ||
557 | /* | |
558 | * Decompose input character to one or two output characters... | |
559 | */ | |
560 | ||
561 | hit ++; | |
562 | mp ++; | |
563 | *p2 ++ = (cups_utf32_t) *mp ++; | |
564 | ||
565 | if (*mp != 0) | |
566 | *p2 ++ = (cups_utf32_t) *mp; | |
567 | } | |
568 | ||
569 | *p2 = 0; | |
570 | len = (int)(p2 - &work2[0]); | |
571 | ||
572 | /* | |
573 | * Check for decomposition finished... | |
574 | */ | |
575 | if (hit == 0) | |
576 | break; | |
577 | memcpy (work1, work2, sizeof(cups_utf32_t) * (len + 1)); | |
578 | } | |
579 | ||
580 | /* | |
581 | * Canonical reorder until no further reordering... | |
582 | */ | |
583 | ||
584 | for (pass = 0; pass < 20; pass ++) | |
585 | { | |
586 | p1 = &work1[0]; | |
587 | ||
588 | for (hit = 0; *p1 != 0; p1 ++) | |
589 | { | |
590 | /* | |
591 | * Check for combining characters to reorder... | |
592 | */ | |
593 | ||
594 | unichar1 = *p1; | |
595 | unichar2 = *(p1 + 1); | |
596 | ||
597 | if (unichar2 == 0) | |
598 | break; | |
599 | ||
600 | class1 = get_combining_class(unichar1); | |
601 | class2 = get_combining_class(unichar2); | |
602 | ||
603 | if ((class1 < 0) || (class2 < 0)) | |
604 | return (-1); | |
605 | ||
606 | if ((class1 == 0) || (class2 == 0)) | |
607 | continue; | |
608 | ||
609 | if (class1 <= class2) | |
610 | continue; | |
611 | ||
612 | /* | |
613 | * Swap two combining characters... | |
614 | */ | |
615 | ||
616 | *p1 = unichar2; | |
617 | p1 ++; | |
618 | *p1 = unichar1; | |
619 | hit ++; | |
620 | } | |
621 | ||
622 | if (hit == 0) | |
623 | break; | |
624 | } | |
625 | ||
626 | /* | |
627 | * Check for decomposition only... | |
628 | */ | |
629 | ||
630 | if (normalize == CUPS_NORM_NFD || normalize == CUPS_NORM_NFKD) | |
631 | { | |
632 | memcpy(dest, work1, sizeof(cups_utf32_t) * (len + 1)); | |
633 | return (len); | |
634 | } | |
635 | ||
636 | /* | |
637 | * Find composition map... | |
638 | */ | |
639 | ||
640 | for (nmap = cg->normmap_cache; nmap != NULL; nmap = nmap->next) | |
641 | if (nmap->normalize == CUPS_NORM_NFC) | |
642 | break; | |
643 | ||
644 | if (nmap == NULL) | |
645 | return (-1); | |
646 | ||
647 | /* | |
648 | * Compose until no further composition... | |
649 | */ | |
650 | ||
651 | for (pass = 0; pass < 20; pass ++) | |
652 | { | |
653 | p1 = &work1[0]; | |
654 | p2 = &work2[0]; | |
655 | ||
656 | for (hit = 0; *p1 != 0; p1 ++) | |
657 | { | |
658 | /* | |
659 | * Check for composition defined... | |
660 | */ | |
661 | ||
662 | unichar1 = *p1; | |
663 | unichar2 = *(p1 + 1); | |
664 | ||
665 | if (unichar2 == 0) | |
666 | { | |
667 | *p2 ++ = unichar1; | |
668 | break; | |
669 | } | |
670 | ||
671 | mp = (cups_ucs2_t *)bsearch(p1, nmap->uni2norm, nmap->normcount, | |
672 | (sizeof(cups_ucs2_t) * 3), compare_compose); | |
673 | if (mp == NULL) | |
674 | { | |
675 | *p2 ++ = *p1; | |
676 | continue; | |
677 | } | |
678 | ||
679 | /* | |
680 | * Compose two input characters to one output character... | |
681 | */ | |
682 | ||
683 | hit ++; | |
684 | mp += 2; | |
685 | *p2 ++ = (cups_utf32_t) *mp; | |
686 | p1 ++; | |
687 | } | |
688 | ||
689 | *p2 = 0; | |
690 | len = (int) (p2 - &work2[0]); | |
691 | ||
692 | /* | |
693 | * Check for composition finished... | |
694 | */ | |
695 | ||
696 | if (hit == 0) | |
697 | break; | |
698 | ||
699 | memcpy (work1, work2, sizeof(cups_utf32_t) * (len + 1)); | |
700 | } | |
701 | ||
702 | memcpy (dest, work1, sizeof(cups_utf32_t) * (len + 1)); | |
703 | ||
704 | cupsNormalizeMapsFree(); | |
705 | ||
706 | return (len); | |
707 | } | |
708 | ||
709 | ||
710 | /* | |
711 | * 'cupsUTF8CaseFold()' - Case fold UTF-8 string. | |
712 | * | |
713 | * Case Fold UTF-8 string per Unicode UAX-21 Section 2.3 | |
714 | * Note - Case folding output is | |
715 | * unsafe for subsequent transcoding to legacy charsets | |
716 | */ | |
717 | ||
718 | int /* O - Count or -1 on error */ | |
719 | cupsUTF8CaseFold( | |
720 | cups_utf8_t *dest, /* O - Target string */ | |
721 | const cups_utf8_t *src, /* I - Source string */ | |
722 | const int maxout, /* I - Max output */ | |
723 | const cups_folding_t fold) /* I - Fold Mode */ | |
724 | { | |
725 | int len; /* String length */ | |
726 | cups_utf32_t work1[CUPS_MAX_USTRING];/* First internal UCS-4 string */ | |
727 | cups_utf32_t work2[CUPS_MAX_USTRING];/* Second internal UCS-4 string */ | |
728 | ||
729 | ||
730 | /* | |
731 | * Check for valid arguments and clear output... | |
732 | */ | |
733 | ||
734 | if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING) | |
735 | return (-1); | |
736 | ||
737 | *dest = 0; | |
738 | ||
739 | if (fold != CUPS_FOLD_SIMPLE && fold != CUPS_FOLD_FULL) | |
740 | return (-1); | |
741 | ||
742 | /* | |
743 | * Convert input UTF-8 to internal UCS-4 (and insert BOM)... | |
744 | */ | |
745 | ||
746 | len = cupsUTF8ToUTF32(work1, src, CUPS_MAX_USTRING); | |
747 | ||
748 | if (len < 0) | |
749 | return (-1); | |
750 | ||
751 | /* | |
752 | * Case Fold internal UCS-4 to second internal UCS-4... | |
753 | */ | |
754 | ||
755 | len = cupsUTF32CaseFold(work2, work1, CUPS_MAX_USTRING, fold); | |
756 | ||
757 | if (len < 0) | |
758 | return (-1); | |
759 | ||
760 | /* | |
761 | * Convert internal UCS-4 to output UTF-8 (and delete BOM)... | |
762 | */ | |
763 | ||
764 | len = cupsUTF32ToUTF8(dest, work2, maxout); | |
765 | ||
766 | return (len); | |
767 | } | |
768 | ||
769 | ||
770 | /* | |
771 | * 'cupsUTF32CaseFold()' - Case fold UTF-32 string. | |
772 | * | |
773 | * Case Fold UTF-32 string per Unicode UAX-21 Section 2.3 | |
774 | * Note - Case folding output is | |
775 | * unsafe for subsequent transcoding to legacy charsets | |
776 | */ | |
777 | ||
778 | int /* O - Count or -1 on error */ | |
779 | cupsUTF32CaseFold( | |
780 | cups_utf32_t *dest, /* O - Target string */ | |
781 | const cups_utf32_t *src, /* I - Source string */ | |
782 | const int maxout, /* I - Max output */ | |
783 | const cups_folding_t fold) /* I - Fold Mode */ | |
784 | { | |
785 | cups_utf32_t *start = dest; /* Start of destination string */ | |
786 | int i; /* Looping variable */ | |
787 | int result; /* Result Value */ | |
788 | cups_ucs2_t *mp; /* Map char pointer */ | |
789 | _cups_fold_map_t *fmap; /* Unicode Case Folding Map */ | |
790 | _cups_globals_t *cg = _cupsGlobals(); | |
791 | /* Pointer to library globals */ | |
792 | ||
793 | ||
794 | /* | |
795 | * Check for valid arguments and clear output... | |
796 | */ | |
797 | ||
798 | if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING) | |
799 | return (-1); | |
800 | ||
801 | *dest = 0; | |
802 | ||
803 | if (fold != CUPS_FOLD_SIMPLE && fold != CUPS_FOLD_FULL) | |
804 | return (-1); | |
805 | ||
806 | /* | |
807 | * Find case folding map... | |
808 | */ | |
809 | ||
810 | result = cupsNormalizeMapsGet(); | |
811 | ||
812 | if (result < 0) | |
813 | return (-1); | |
814 | ||
815 | for (fmap = cg->foldmap_cache; fmap != NULL; fmap = fmap->next) | |
816 | if (fmap->fold == fold) | |
817 | break; | |
818 | ||
819 | if (fmap == NULL) | |
820 | return (-1); | |
821 | ||
822 | /* | |
823 | * Case fold input string to output string... | |
824 | */ | |
825 | ||
826 | for (i = 0; i < (maxout - 1); i ++, src ++) | |
827 | { | |
828 | /* | |
829 | * Check for case folding defined... | |
830 | */ | |
831 | ||
832 | mp = (cups_ucs2_t *)bsearch(src, fmap->uni2fold, fmap->foldcount, | |
833 | (sizeof(cups_ucs2_t) * 4), compare_foldchar); | |
834 | if (mp == NULL) | |
835 | { | |
836 | *dest ++ = *src; | |
837 | continue; | |
838 | } | |
839 | ||
840 | /* | |
841 | * Case fold input character to one or two output characters... | |
842 | */ | |
843 | ||
844 | mp ++; | |
845 | *dest ++ = (cups_utf32_t) *mp ++; | |
846 | ||
847 | if (*mp != 0 && fold == CUPS_FOLD_FULL) | |
848 | { | |
849 | i ++; | |
850 | if (i >= (maxout - 1)) | |
851 | break; | |
852 | ||
853 | *dest ++ = (cups_utf32_t) *mp; | |
854 | } | |
855 | } | |
856 | ||
857 | *dest = 0; | |
858 | ||
859 | cupsNormalizeMapsFree(); | |
860 | ||
861 | return ((int)(dest - start)); | |
862 | } | |
863 | ||
864 | ||
865 | /* | |
866 | * 'cupsUTF8CompareCaseless()' - Compare case folded UTF-8 strings. | |
867 | */ | |
868 | ||
869 | int /* O - Difference of strings */ | |
870 | cupsUTF8CompareCaseless( | |
871 | const cups_utf8_t *s1, /* I - String1 */ | |
872 | const cups_utf8_t *s2) /* I - String2 */ | |
873 | { | |
874 | int difference; /* Difference of two strings */ | |
875 | int len; /* String length */ | |
876 | cups_utf32_t work1[CUPS_MAX_USTRING];/* First internal UCS-4 string */ | |
877 | cups_utf32_t work2[CUPS_MAX_USTRING];/* Second internal UCS-4 string */ | |
878 | ||
879 | ||
880 | /* | |
881 | * Check for valid arguments... | |
882 | */ | |
883 | ||
884 | if (!s1 || !s2) | |
885 | return (-1); | |
886 | ||
887 | /* | |
888 | * Convert input UTF-8 to internal UCS-4 (and insert BOM)... | |
889 | */ | |
890 | ||
891 | len = cupsUTF8ToUTF32(work1, s1, CUPS_MAX_USTRING); | |
892 | ||
893 | if (len < 0) | |
894 | return (-1); | |
895 | ||
896 | len = cupsUTF8ToUTF32(work2, s2, CUPS_MAX_USTRING); | |
897 | ||
898 | if (len < 0) | |
899 | return (-1); | |
900 | ||
901 | /* | |
902 | * Compare first internal UCS-4 to second internal UCS-4... | |
903 | */ | |
904 | ||
905 | difference = cupsUTF32CompareCaseless(work1, work2); | |
906 | ||
907 | return (difference); | |
908 | } | |
909 | ||
910 | ||
911 | /* | |
912 | * 'cupsUTF32CompareCaseless()' - Compare case folded UTF-32 strings. | |
913 | */ | |
914 | ||
915 | int /* O - Difference of strings */ | |
916 | cupsUTF32CompareCaseless( | |
917 | const cups_utf32_t *s1, /* I - String1 */ | |
918 | const cups_utf32_t *s2) /* I - String2 */ | |
919 | { | |
920 | int difference; /* Difference of two strings */ | |
921 | int len; /* String length */ | |
922 | cups_folding_t fold = CUPS_FOLD_FULL; | |
923 | /* Case folding mode */ | |
924 | cups_utf32_t fold1[CUPS_MAX_USTRING]; | |
925 | /* First UCS-4 folded string */ | |
926 | cups_utf32_t fold2[CUPS_MAX_USTRING]; | |
927 | /* Second UCS-4 folded string */ | |
928 | cups_utf32_t *p1; /* First UCS-4 string pointer */ | |
929 | cups_utf32_t *p2; /* Second UCS-4 string pointer */ | |
930 | ||
931 | ||
932 | /* | |
933 | * Check for valid arguments... | |
934 | */ | |
935 | ||
936 | if (!s1 || !s2) | |
937 | return (-1); | |
938 | ||
939 | /* | |
940 | * Case Fold input UTF-32 strings to internal UCS-4 strings... | |
941 | */ | |
942 | ||
943 | len = cupsUTF32CaseFold(fold1, s1, CUPS_MAX_USTRING, fold); | |
944 | ||
945 | if (len < 0) | |
946 | return (-1); | |
947 | ||
948 | len = cupsUTF32CaseFold(fold2, s2, CUPS_MAX_USTRING, fold); | |
949 | ||
950 | if (len < 0) | |
951 | return (-1); | |
952 | ||
953 | /* | |
954 | * Compare first internal UCS-4 to second internal UCS-4... | |
955 | */ | |
956 | ||
957 | p1 = &fold1[0]; | |
958 | p2 = &fold2[0]; | |
959 | ||
960 | for (;; p1 ++, p2 ++) | |
961 | { | |
962 | difference = (int) (*p1 - *p2); | |
963 | ||
964 | if (difference != 0) | |
965 | break; | |
966 | ||
967 | if ((*p1 == 0) && (*p2 == 0)) | |
968 | break; | |
969 | } | |
970 | ||
971 | return (difference); | |
972 | } | |
973 | ||
974 | ||
975 | /* | |
976 | * 'cupsUTF8CompareIdentifier()' - Compare folded NFKC UTF-8 strings. | |
977 | */ | |
978 | ||
979 | int /* O - Result of comparison */ | |
980 | cupsUTF8CompareIdentifier( | |
981 | const cups_utf8_t *s1, /* I - String1 */ | |
982 | const cups_utf8_t *s2) /* I - String2 */ | |
983 | { | |
984 | int difference; /* Difference of two strings */ | |
985 | int len; /* String length */ | |
986 | cups_utf32_t work1[CUPS_MAX_USTRING];/* First internal UCS-4 string */ | |
987 | cups_utf32_t work2[CUPS_MAX_USTRING];/* Second internal UCS-4 string */ | |
988 | ||
989 | ||
990 | /* | |
991 | * Check for valid arguments... | |
992 | */ | |
993 | ||
994 | if (!s1 || !s2) | |
995 | return (-1); | |
996 | ||
997 | /* | |
998 | * Convert input UTF-8 to internal UCS-4 (and insert BOM)... | |
999 | */ | |
1000 | ||
1001 | len = cupsUTF8ToUTF32(work1, s1, CUPS_MAX_USTRING); | |
1002 | ||
1003 | if (len < 0) | |
1004 | return (-1); | |
1005 | ||
1006 | len = cupsUTF8ToUTF32(work2, s2, CUPS_MAX_USTRING); | |
1007 | ||
1008 | if (len < 0) | |
1009 | return (-1); | |
1010 | ||
1011 | /* | |
1012 | * Compare first internal UCS-4 to second internal UCS-4... | |
1013 | */ | |
1014 | ||
1015 | difference = cupsUTF32CompareIdentifier(work1, work2); | |
1016 | ||
1017 | return (difference); | |
1018 | } | |
1019 | ||
1020 | ||
1021 | /* | |
1022 | * 'cupsUTF32CompareIdentifier()' - Compare folded NFKC UTF-32 strings. | |
1023 | */ | |
1024 | ||
1025 | int /* O - Result of comparison */ | |
1026 | cupsUTF32CompareIdentifier( | |
1027 | const cups_utf32_t *s1, /* I - String1 */ | |
1028 | const cups_utf32_t *s2) /* I - String2 */ | |
1029 | { | |
1030 | int difference; /* Difference of two strings */ | |
1031 | int len; /* String length */ | |
1032 | cups_folding_t fold = CUPS_FOLD_FULL; | |
1033 | /* Case folding mode */ | |
1034 | cups_utf32_t fold1[CUPS_MAX_USTRING]; | |
1035 | /* First UCS-4 folded string */ | |
1036 | cups_utf32_t fold2[CUPS_MAX_USTRING]; | |
1037 | /* Second UCS-4 folded string */ | |
1038 | cups_normalize_t normalize = CUPS_NORM_NFKC; | |
1039 | /* Normalization form */ | |
1040 | cups_utf32_t norm1[CUPS_MAX_USTRING]; | |
1041 | /* First UCS-4 normalized string */ | |
1042 | cups_utf32_t norm2[CUPS_MAX_USTRING]; | |
1043 | /* Second UCS-4 normalized string */ | |
1044 | cups_utf32_t *p1; /* First UCS-4 string pointer */ | |
1045 | cups_utf32_t *p2; /* Second UCS-4 string pointer */ | |
1046 | ||
1047 | ||
1048 | /* | |
1049 | * Check for valid arguments... | |
1050 | */ | |
1051 | ||
1052 | if (!s1 || !s2) | |
1053 | return (-1); | |
1054 | ||
1055 | /* | |
1056 | * Case Fold input UTF-32 strings to internal UCS-4 strings... | |
1057 | */ | |
1058 | ||
1059 | len = cupsUTF32CaseFold(fold1, s1, CUPS_MAX_USTRING, fold); | |
1060 | ||
1061 | if (len < 0) | |
1062 | return (-1); | |
1063 | ||
1064 | len = cupsUTF32CaseFold(fold2, s2, CUPS_MAX_USTRING, fold); | |
1065 | ||
1066 | if (len < 0) | |
1067 | return (-1); | |
1068 | ||
1069 | /* | |
1070 | * Normalize internal UCS-4 strings to NFKC... | |
1071 | */ | |
1072 | ||
1073 | len = cupsUTF32Normalize(norm1, fold1, CUPS_MAX_USTRING, normalize); | |
1074 | ||
1075 | if (len < 0) | |
1076 | return (-1); | |
1077 | ||
1078 | len = cupsUTF32Normalize(norm2, fold2, CUPS_MAX_USTRING, normalize); | |
1079 | ||
1080 | if (len < 0) | |
1081 | return (-1); | |
1082 | ||
1083 | /* | |
1084 | * Compare first internal UCS-4 to second internal UCS-4... | |
1085 | */ | |
1086 | ||
1087 | p1 = &norm1[0]; | |
1088 | p2 = &norm2[0]; | |
1089 | ||
1090 | for (;; p1 ++, p2 ++) | |
1091 | { | |
1092 | difference = (int) (*p1 - *p2); | |
1093 | ||
1094 | if (difference != 0) | |
1095 | break; | |
1096 | ||
1097 | if ((*p1 == 0) && (*p2 == 0)) | |
1098 | break; | |
1099 | } | |
1100 | ||
1101 | return (difference); | |
1102 | } | |
1103 | ||
1104 | ||
1105 | /* | |
1106 | * 'cupsUTF32CharacterProperty()' - Get UTF-32 character property. | |
1107 | */ | |
1108 | ||
1109 | int /* O - Result of comparison */ | |
1110 | cupsUTF32CharacterProperty( | |
1111 | const cups_utf32_t ch, /* I - Source char */ | |
1112 | const cups_property_t prop) /* I - Char Property */ | |
1113 | { | |
1114 | int result; /* Result Value */ | |
1115 | ||
1116 | ||
1117 | /* | |
1118 | * Check for valid arguments... | |
1119 | */ | |
1120 | ||
1121 | if (ch == 0) | |
1122 | return (-1); | |
1123 | ||
1124 | /* | |
1125 | * Find character property... | |
1126 | */ | |
1127 | ||
1128 | switch (prop) | |
1129 | { | |
1130 | case CUPS_PROP_GENERAL_CATEGORY: | |
1131 | result = (get_general_category(ch)); | |
1132 | break; | |
1133 | ||
1134 | case CUPS_PROP_BIDI_CATEGORY: | |
1135 | result = (get_bidi_category(ch)); | |
1136 | break; | |
1137 | ||
1138 | case CUPS_PROP_COMBINING_CLASS: | |
1139 | result = (get_combining_class(ch)); | |
1140 | break; | |
1141 | case CUPS_PROP_BREAK_CLASS: | |
1142 | result = (get_break_class(ch)); | |
1143 | break; | |
1144 | ||
1145 | default: | |
1146 | return (-1); | |
1147 | } | |
1148 | ||
1149 | return (result); | |
1150 | } | |
1151 | ||
1152 | ||
1153 | /* | |
1154 | * 'get_general_category()' - Get UTF-32 Character General Category. | |
1155 | */ | |
1156 | ||
1157 | static int /* O - Class or -1 on error */ | |
1158 | get_general_category( | |
1159 | const cups_utf32_t ch) /* I - Source char */ | |
1160 | { | |
1161 | int result; /* Result Value */ | |
1162 | cups_gencat_t gencat; /* General Category Value */ | |
1163 | _cups_prop_map_t *pmap; /* Unicode Property Map */ | |
1164 | _cups_prop_t *uni2prop; /* Unicode Char -> Properties */ | |
1165 | _cups_globals_t *cg = _cupsGlobals(); | |
1166 | /* Pointer to library globals */ | |
1167 | ||
1168 | ||
1169 | /* | |
1170 | * Check for valid argument... | |
1171 | */ | |
1172 | ||
1173 | if (ch == 0) | |
1174 | return (-1); | |
1175 | ||
1176 | /* | |
1177 | * Find property map... | |
1178 | */ | |
1179 | ||
1180 | result = cupsNormalizeMapsGet(); | |
1181 | ||
1182 | if (result < 0) | |
1183 | return (-1); | |
1184 | ||
1185 | pmap = cg->propmap_cache; | |
1186 | ||
1187 | if (pmap == NULL) | |
1188 | return (-1); | |
1189 | ||
1190 | /* | |
1191 | * Find character in map... | |
1192 | */ | |
1193 | ||
1194 | uni2prop = (_cups_prop_t *)bsearch(&ch, pmap->uni2prop, pmap->propcount, | |
1195 | (sizeof(_cups_prop_t)), compare_propchar); | |
1196 | ||
1197 | cupsNormalizeMapsFree(); | |
1198 | ||
1199 | if (uni2prop == NULL) | |
1200 | gencat = CUPS_GENCAT_CN; /* Other, Not Assigned */ | |
1201 | else | |
1202 | gencat = (cups_gencat_t)uni2prop->gencat; | |
1203 | ||
1204 | result = (int)gencat; | |
1205 | ||
1206 | return (result); | |
1207 | } | |
1208 | ||
1209 | ||
1210 | /* | |
1211 | * 'get_bidi_category()' - Get UTF-32 Character Bidi Category. | |
1212 | */ | |
1213 | ||
1214 | static int /* O - Class or -1 on error */ | |
1215 | get_bidi_category(const cups_utf32_t ch)/* I - Source char */ | |
1216 | { | |
1217 | int result; /* Result Value */ | |
1218 | cups_bidi_t bidicat; /* Bidi Category Value */ | |
1219 | _cups_prop_map_t *pmap; /* Unicode Property Map */ | |
1220 | _cups_prop_t *uni2prop; /* Unicode Char -> Properties */ | |
1221 | _cups_globals_t *cg = _cupsGlobals(); | |
1222 | /* Pointer to library globals */ | |
1223 | ||
1224 | ||
1225 | /* | |
1226 | * Check for valid argument... | |
1227 | */ | |
1228 | ||
1229 | if (ch == 0) | |
1230 | return (-1); | |
1231 | ||
1232 | /* | |
1233 | * Find property map... | |
1234 | */ | |
1235 | ||
1236 | result = cupsNormalizeMapsGet(); | |
1237 | ||
1238 | if (result < 0) | |
1239 | return (-1); | |
1240 | ||
1241 | pmap = cg->propmap_cache; | |
1242 | ||
1243 | if (pmap == NULL) | |
1244 | return (-1); | |
1245 | ||
1246 | /* | |
1247 | * Find character in map... | |
1248 | */ | |
1249 | ||
1250 | uni2prop = (_cups_prop_t *)bsearch(&ch, pmap->uni2prop, pmap->propcount, | |
1251 | (sizeof(_cups_prop_t)), compare_propchar); | |
1252 | ||
1253 | cupsNormalizeMapsFree(); | |
1254 | ||
1255 | if (uni2prop == NULL) | |
1256 | bidicat = CUPS_BIDI_ON; /* Other Neutral */ | |
1257 | else | |
1258 | bidicat = (cups_bidi_t)uni2prop->bidicat; | |
1259 | ||
1260 | result = (int)bidicat; | |
1261 | ||
1262 | return (result); | |
1263 | } | |
1264 | ||
1265 | /* | |
1266 | * 'get_combining_class()' - Get UTF-32 Character Combining Class. | |
1267 | * | |
1268 | * Note - Zero is non-combining (base character) | |
1269 | */ | |
1270 | ||
1271 | static int /* O - Class or -1 on error */ | |
1272 | get_combining_class( | |
1273 | const cups_utf32_t ch) /* I - Source char */ | |
1274 | { | |
1275 | int result; /* Result Value */ | |
1276 | _cups_comb_map_t *cmap; /* Unicode Combining Class Map */ | |
1277 | _cups_comb_class_t combclass; /* Unicode Combining Class */ | |
1278 | _cups_comb_t *uni2comb; /* Unicode Char -> Combining Class */ | |
1279 | _cups_globals_t *cg = _cupsGlobals(); | |
1280 | /* Pointer to library globals */ | |
1281 | ||
1282 | ||
1283 | /* | |
1284 | * Check for valid argument... | |
1285 | */ | |
1286 | ||
1287 | if (ch == 0) | |
1288 | return (-1); | |
1289 | ||
1290 | /* | |
1291 | * Find combining class map... | |
1292 | */ | |
1293 | ||
1294 | result = cupsNormalizeMapsGet(); | |
1295 | ||
1296 | if (result < 0) | |
1297 | return (-1); | |
1298 | ||
1299 | cmap = cg->combmap_cache; | |
1300 | ||
1301 | if (cmap == NULL) | |
1302 | return (-1); | |
1303 | ||
1304 | /* | |
1305 | * Find combining character in map... | |
1306 | */ | |
1307 | ||
1308 | uni2comb = (_cups_comb_t *)bsearch(&ch, cmap->uni2comb, cmap->combcount, | |
1309 | (sizeof(_cups_comb_t)), compare_combchar); | |
1310 | ||
1311 | cupsNormalizeMapsFree(); | |
1312 | ||
1313 | if (uni2comb == NULL) | |
1314 | combclass = 0; | |
1315 | else | |
1316 | combclass = (_cups_comb_class_t)uni2comb->combclass; | |
1317 | ||
1318 | result = (int)combclass; | |
1319 | ||
1320 | return (result); | |
1321 | } | |
1322 | ||
1323 | ||
1324 | /* | |
1325 | * 'get_break_class()' - Get UTF-32 Character Line Break Class. | |
1326 | */ | |
1327 | ||
1328 | static int /* O - Class or -1 on error */ | |
1329 | get_break_class(const cups_utf32_t ch) /* I - Source char */ | |
1330 | { | |
1331 | int result; /* Result Value */ | |
1332 | _cups_break_map_t *bmap; /* Unicode Line Break Class Map */ | |
1333 | cups_break_class_t breakclass; /* Unicode Line Break Class */ | |
1334 | cups_ucs2_t *uni2break; /* Unicode -> Line Break Class */ | |
1335 | _cups_globals_t *cg = _cupsGlobals(); | |
1336 | /* Pointer to library globals */ | |
1337 | ||
1338 | ||
1339 | /* | |
1340 | * Check for valid argument... | |
1341 | */ | |
1342 | ||
1343 | if (ch == 0) | |
1344 | return (-1); | |
1345 | ||
1346 | /* | |
1347 | * Find line break class map... | |
1348 | */ | |
1349 | ||
1350 | result = cupsNormalizeMapsGet(); | |
1351 | ||
1352 | if (result < 0) | |
1353 | return (-1); | |
1354 | ||
1355 | bmap = cg->breakmap_cache; | |
1356 | ||
1357 | if (bmap == NULL) | |
1358 | return (-1); | |
1359 | ||
1360 | /* | |
1361 | * Find line break character in map... | |
1362 | */ | |
1363 | ||
1364 | uni2break = (cups_ucs2_t *)bsearch(&ch, bmap->uni2break, bmap->breakcount, | |
1365 | (sizeof(cups_ucs2_t) * 3), | |
1366 | compare_breakchar); | |
1367 | ||
1368 | cupsNormalizeMapsFree(); | |
1369 | ||
1370 | if (uni2break == NULL) | |
1371 | breakclass = CUPS_BREAK_AI; | |
1372 | else | |
1373 | breakclass = (cups_break_class_t)*(uni2break + 2); | |
1374 | ||
1375 | result = (int)breakclass; | |
1376 | ||
1377 | return (result); | |
1378 | } | |
1379 | ||
1380 | ||
1381 | /* | |
1382 | * 'get_map_count()' - Count lines in a map file. | |
1383 | */ | |
1384 | ||
1385 | static int /* O - Count or -1 on error */ | |
1386 | get_map_count(const char *filename) /* I - Map Filename */ | |
1387 | { | |
1388 | int i; /* Looping variable */ | |
1389 | cups_file_t *fp; /* Map input file pointer */ | |
1390 | char *s; /* Line parsing pointer */ | |
1391 | char line[256]; /* Line from input map file */ | |
1392 | cups_utf32_t unichar; /* Unicode character value */ | |
1393 | ||
1394 | ||
1395 | /* | |
1396 | * Open map input file... | |
1397 | */ | |
1398 | ||
1399 | if (!filename || !*filename) | |
1400 | return (-1); | |
1401 | ||
1402 | fp = cupsFileOpen(filename, "r"); | |
1403 | if (fp == NULL) | |
1404 | return (-1); | |
1405 | ||
1406 | /* | |
1407 | * Count lines in map input file... | |
1408 | */ | |
1409 | ||
1410 | for (i = 0; i < 50000;) | |
1411 | { | |
1412 | s = cupsFileGets(fp, line, sizeof(line)); | |
1413 | if (s == NULL) | |
1414 | break; | |
1415 | if ((*s == '#') || (*s == '\n') || (*s == '\0')) | |
1416 | continue; | |
1417 | if (strncmp (s, "0x", 2) == 0) | |
1418 | s += 2; | |
1419 | if (sscanf(s, "%lx", &unichar) != 1) | |
1420 | break; | |
1421 | if (unichar > 0xffff) | |
1422 | break; | |
1423 | i ++; | |
1424 | } | |
1425 | if (i == 0) | |
1426 | i = -1; | |
1427 | ||
1428 | /* | |
1429 | * Close file and return map count (non-comment line count)... | |
1430 | */ | |
1431 | ||
1432 | cupsFileClose(fp); | |
1433 | ||
1434 | return (i); | |
1435 | } | |
1436 | ||
1437 | ||
1438 | /* | |
1439 | * 'get_normmap()' - Get Unicode normalization map to cache. | |
1440 | */ | |
1441 | ||
1442 | static int /* O - Zero or -1 on error */ | |
1443 | get_normmap( | |
1444 | const cups_normalize_t normalize) /* I - Normalization Form */ | |
1445 | { | |
1446 | int i; /* Looping variable */ | |
1447 | cups_utf32_t unichar1; /* Unicode character value */ | |
1448 | cups_utf32_t unichar2; /* Unicode character value */ | |
1449 | cups_utf32_t unichar3; /* Unicode character value */ | |
1450 | _cups_norm_map_t *nmap; /* Unicode Normalization Map */ | |
1451 | int normcount; /* Count of Unicode Source Chars */ | |
1452 | cups_ucs2_t *uni2norm; /* Unicode Char -> Normalization */ | |
1453 | char *mapname; /* Normalization map name */ | |
1454 | char filename[1024]; /* Filename for charset map file */ | |
1455 | cups_file_t *fp; /* Normalization map file pointer */ | |
1456 | char *s; /* Line parsing pointer */ | |
1457 | char line[256]; /* Line from input map file */ | |
1458 | _cups_globals_t *cg = _cupsGlobals(); | |
1459 | /* Pointer to library globals */ | |
1460 | ||
1461 | ||
1462 | /* | |
1463 | * See if we already have this normalization map loaded... | |
1464 | */ | |
1465 | ||
1466 | for (nmap = cg->normmap_cache; nmap != NULL; nmap = nmap->next) | |
1467 | if (nmap->normalize == normalize) | |
1468 | return (0); | |
1469 | ||
1470 | /* | |
1471 | * Get the mapping name... | |
1472 | */ | |
1473 | ||
1474 | switch (normalize) | |
1475 | { | |
1476 | case CUPS_NORM_NFD: /* Canonical Decomposition */ | |
1477 | mapname = "uni-nfd.txt"; | |
1478 | break; | |
1479 | ||
1480 | case CUPS_NORM_NFKD: /* Compatibility Decomposition */ | |
1481 | mapname = "uni-nfkd.txt"; | |
1482 | break; | |
1483 | ||
1484 | case CUPS_NORM_NFC: /* Canonical Composition */ | |
1485 | mapname = "uni-nfc.txt"; | |
1486 | break; | |
1487 | ||
1488 | case CUPS_NORM_NFKC: /* no such map file... */ | |
1489 | default: | |
1490 | return (-1); | |
1491 | } | |
1492 | ||
1493 | /* | |
1494 | * Open normalization map input file... | |
1495 | */ | |
1496 | ||
1497 | snprintf(filename, sizeof(filename), "%s/charmaps/%s", | |
1498 | cg->cups_datadir, mapname); | |
1499 | if ((normcount = get_map_count(filename)) <= 0) | |
1500 | return (-1); | |
1501 | ||
1502 | fp = cupsFileOpen(filename, "r"); | |
1503 | if (fp == NULL) | |
1504 | return (-1); | |
1505 | ||
1506 | /* | |
1507 | * Allocate memory for normalization map and add to cache... | |
1508 | */ | |
1509 | ||
1510 | nmap = (_cups_norm_map_t *)calloc(1, sizeof(_cups_norm_map_t)); | |
1511 | if (nmap == NULL) | |
1512 | { | |
1513 | cupsFileClose(fp); | |
1514 | return (-1); | |
1515 | } | |
1516 | ||
1517 | uni2norm = (cups_ucs2_t *)calloc(1, sizeof(cups_ucs2_t) * 3 * normcount); | |
1518 | if (uni2norm == NULL) | |
1519 | { | |
1520 | free(nmap); | |
1521 | cupsFileClose(fp); | |
1522 | return (-1); | |
1523 | } | |
1524 | nmap->next = cg->normmap_cache; | |
1525 | cg->normmap_cache = nmap; | |
1526 | nmap->used ++; | |
1527 | nmap->normalize = normalize; | |
1528 | nmap->normcount = normcount; | |
1529 | nmap->uni2norm = uni2norm; | |
1530 | ||
1531 | /* | |
1532 | * Save normalization map into memory for later use... | |
1533 | */ | |
1534 | for (i = 0; i < normcount; ) | |
1535 | { | |
1536 | s = cupsFileGets(fp, line, sizeof(line)); | |
1537 | if (s == NULL) | |
1538 | break; | |
1539 | if ((*s == '#') || (*s == '\n') || (*s == '\0')) | |
1540 | continue; | |
1541 | if (sscanf(s, "%lx %lx %lx", &unichar1, &unichar2, &unichar3) != 3) | |
1542 | break; | |
1543 | if ((unichar1 > 0xffff) | |
1544 | || (unichar2 > 0xffff) | |
1545 | || (unichar3 > 0xffff)) | |
1546 | break; | |
1547 | *uni2norm ++ = (cups_ucs2_t) unichar1; | |
1548 | *uni2norm ++ = (cups_ucs2_t) unichar2; | |
1549 | *uni2norm ++ = (cups_ucs2_t) unichar3; | |
1550 | i ++; | |
1551 | } | |
1552 | if (i < normcount) | |
1553 | nmap->normcount = i; | |
1554 | cupsFileClose(fp); | |
1555 | return (0); | |
1556 | } | |
1557 | ||
1558 | ||
1559 | /* | |
1560 | * 'get_foldmap()' - Get Unicode case folding map to cache. | |
1561 | */ | |
1562 | ||
1563 | static int /* O - Zero or -1 on error */ | |
1564 | get_foldmap(const cups_folding_t fold) /* I - Case folding type */ | |
1565 | { | |
1566 | int i; /* Looping variable */ | |
1567 | cups_utf32_t unichar1; /* Unicode character value */ | |
1568 | cups_utf32_t unichar2; /* Unicode character value */ | |
1569 | cups_utf32_t unichar3; /* Unicode character value */ | |
1570 | cups_utf32_t unichar4; /* Unicode character value */ | |
1571 | _cups_fold_map_t *fmap; /* Unicode Case Folding Map */ | |
1572 | int foldcount; /* Count of Unicode Source Chars */ | |
1573 | cups_ucs2_t *uni2fold; /* Unicode -> Folded Char(s) */ | |
1574 | char *mapname; /* Case Folding map name */ | |
1575 | char filename[1024]; /* Filename for charset map file */ | |
1576 | cups_file_t *fp; /* Case Folding map file pointer */ | |
1577 | char *s; /* Line parsing pointer */ | |
1578 | char line[256]; /* Line from input map file */ | |
1579 | _cups_globals_t *cg = _cupsGlobals(); | |
1580 | /* Pointer to library globals */ | |
1581 | ||
1582 | ||
1583 | /* | |
1584 | * See if we already have this case folding map loaded... | |
1585 | */ | |
1586 | ||
1587 | for (fmap = cg->foldmap_cache; fmap != NULL; fmap = fmap->next) | |
1588 | if (fmap->fold == fold) | |
1589 | return (0); | |
1590 | ||
1591 | /* | |
1592 | * Get the mapping name... | |
1593 | */ | |
1594 | ||
1595 | switch (fold) | |
1596 | { | |
1597 | case CUPS_FOLD_SIMPLE: /* Simple case folding */ | |
1598 | mapname = "uni-fold.txt"; | |
1599 | break; | |
1600 | case CUPS_FOLD_FULL: /* Full case folding */ | |
1601 | mapname = "uni-full.txt"; | |
1602 | break; | |
1603 | default: | |
1604 | return (-1); | |
1605 | } | |
1606 | ||
1607 | /* | |
1608 | * Open case folding map input file... | |
1609 | */ | |
1610 | ||
1611 | snprintf(filename, sizeof(filename), "%s/charmaps/%s", | |
1612 | cg->cups_datadir, mapname); | |
1613 | if ((foldcount = get_map_count(filename)) <= 0) | |
1614 | return (-1); | |
1615 | fp = cupsFileOpen(filename, "r"); | |
1616 | if (fp == NULL) | |
1617 | return (-1); | |
1618 | ||
1619 | /* | |
1620 | * Allocate memory for case folding map and add to cache... | |
1621 | */ | |
1622 | fmap = (_cups_fold_map_t *)calloc(1, sizeof(_cups_fold_map_t)); | |
1623 | if (fmap == NULL) | |
1624 | { | |
1625 | cupsFileClose(fp); | |
1626 | return (-1); | |
1627 | } | |
1628 | uni2fold = (cups_ucs2_t *)calloc(1, sizeof(cups_ucs2_t) * 4 * foldcount); | |
1629 | if (uni2fold == NULL) | |
1630 | { | |
1631 | free(fmap); | |
1632 | cupsFileClose(fp); | |
1633 | return (-1); | |
1634 | } | |
1635 | fmap->next = cg->foldmap_cache; | |
1636 | cg->foldmap_cache = fmap; | |
1637 | fmap->used ++; | |
1638 | fmap->fold = fold; | |
1639 | fmap->foldcount = foldcount; | |
1640 | fmap->uni2fold = uni2fold; | |
1641 | ||
1642 | /* | |
1643 | * Save case folding map into memory for later use... | |
1644 | */ | |
1645 | ||
1646 | for (i = 0; i < foldcount; ) | |
1647 | { | |
1648 | s = cupsFileGets(fp, line, sizeof(line)); | |
1649 | if (s == NULL) | |
1650 | break; | |
1651 | if ((*s == '#') || (*s == '\n') || (*s == '\0')) | |
1652 | continue; | |
1653 | unichar1 = unichar2 = unichar3 = unichar4 = 0; | |
1654 | if ((fold == CUPS_FOLD_SIMPLE) | |
1655 | && (sscanf(s, "%lx %lx", &unichar1, &unichar2) != 2)) | |
1656 | break; | |
1657 | if ((fold == CUPS_FOLD_FULL) | |
1658 | && (sscanf(s, "%lx %lx %lx %lx", | |
1659 | &unichar1, &unichar2, &unichar3, &unichar4) != 4)) | |
1660 | break; | |
1661 | if ((unichar1 > 0xffff) | |
1662 | || (unichar2 > 0xffff) | |
1663 | || (unichar3 > 0xffff) | |
1664 | || (unichar4 > 0xffff)) | |
1665 | break; | |
1666 | *uni2fold ++ = (cups_ucs2_t) unichar1; | |
1667 | *uni2fold ++ = (cups_ucs2_t) unichar2; | |
1668 | *uni2fold ++ = (cups_ucs2_t) unichar3; | |
1669 | *uni2fold ++ = (cups_ucs2_t) unichar4; | |
1670 | i ++; | |
1671 | } | |
1672 | if (i < foldcount) | |
1673 | fmap->foldcount = i; | |
1674 | cupsFileClose(fp); | |
1675 | return (0); | |
1676 | } | |
1677 | ||
1678 | /* | |
1679 | * 'get_propmap()' - Get Unicode character property map to cache. | |
1680 | */ | |
1681 | ||
1682 | static int /* O - Zero or -1 on error */ | |
1683 | get_propmap(void) | |
1684 | { | |
1685 | int i, j; /* Looping variables */ | |
1686 | size_t len; /* String length */ | |
1687 | cups_utf32_t unichar; /* Unicode character value */ | |
1688 | cups_gencat_t gencat; /* General Category Value */ | |
1689 | cups_bidi_t bidicat; /* Bidi Category Value */ | |
1690 | _cups_prop_map_t *pmap; /* Unicode Char Property Map */ | |
1691 | int propcount; /* Count of Unicode Source Chars */ | |
1692 | _cups_prop_t *uni2prop; /* Unicode Char -> Properties */ | |
1693 | char *mapname; /* Char Property map name */ | |
1694 | char filename[1024]; /* Filename for charset map file */ | |
1695 | cups_file_t *fp; /* Char Property map file pointer */ | |
1696 | char *s; /* Line parsing pointer */ | |
1697 | char line[256]; /* Line from input map file */ | |
1698 | _cups_globals_t *cg = _cupsGlobals(); | |
1699 | /* Pointer to library globals */ | |
1700 | ||
1701 | ||
1702 | /* | |
1703 | * See if we already have this char properties map loaded... | |
1704 | */ | |
1705 | ||
1706 | if ((pmap = cg->propmap_cache) != NULL) | |
1707 | return (0); | |
1708 | ||
1709 | /* | |
1710 | * Get the mapping name... | |
1711 | */ | |
1712 | ||
1713 | mapname = "uni-prop.txt"; | |
1714 | ||
1715 | /* | |
1716 | * Open char properties map input file... | |
1717 | */ | |
1718 | snprintf(filename, sizeof(filename), "%s/charmaps/%s", | |
1719 | cg->cups_datadir, mapname); | |
1720 | if ((propcount = get_map_count(filename)) <= 0) | |
1721 | return (-1); | |
1722 | fp = cupsFileOpen(filename, "r"); | |
1723 | if (fp == NULL) | |
1724 | return (-1); | |
1725 | ||
1726 | /* | |
1727 | * Allocate memory for char properties map and add to cache... | |
1728 | */ | |
1729 | pmap = (_cups_prop_map_t *)calloc(1, sizeof(_cups_prop_map_t)); | |
1730 | if (pmap == NULL) | |
1731 | { | |
1732 | cupsFileClose(fp); | |
1733 | return (-1); | |
1734 | } | |
1735 | uni2prop = (_cups_prop_t *)calloc(1, sizeof(_cups_prop_t) * propcount); | |
1736 | if (uni2prop == NULL) | |
1737 | { | |
1738 | free(pmap); | |
1739 | cupsFileClose(fp); | |
1740 | return (-1); | |
1741 | } | |
1742 | cg->propmap_cache = pmap; | |
1743 | pmap->used ++; | |
1744 | pmap->propcount = propcount; | |
1745 | pmap->uni2prop = uni2prop; | |
1746 | ||
1747 | /* | |
1748 | * Save char properties map into memory for later use... | |
1749 | */ | |
1750 | for (i = 0; i < propcount; ) | |
1751 | { | |
1752 | s = cupsFileGets(fp, line, sizeof(line)); | |
1753 | if (s == NULL) | |
1754 | break; | |
1755 | if (strlen(s) > 0) | |
1756 | *(s + strlen(s) - 1) = '\0'; | |
1757 | if ((*s == '#') || (*s == '\n') || (*s == '\0')) | |
1758 | continue; | |
1759 | if (sscanf(s, "%lx", &unichar) != 1) | |
1760 | break; | |
1761 | if (unichar > 0xffff) | |
1762 | break; | |
1763 | while ((*s != '\0') && (*s != ';')) | |
1764 | s ++; | |
1765 | if (*s != ';') | |
1766 | break; | |
1767 | s ++; | |
1768 | for (j = 0; gencat_index[j].str != NULL; j ++) | |
1769 | { | |
1770 | len = strlen(gencat_index[j].str); | |
1771 | if (strncmp (s, gencat_index[j].str, len) == 0) | |
1772 | break; | |
1773 | } | |
1774 | if (gencat_index[j].str == NULL) | |
1775 | return (-1); | |
1776 | gencat = gencat_index[j].gencat; | |
1777 | while ((*s != '\0') && (*s != ';')) | |
1778 | s ++; | |
1779 | if (*s != ';') | |
1780 | break; | |
1781 | s ++; | |
1782 | for (j = 0; bidicat_index[j] != NULL; j ++) | |
1783 | { | |
1784 | len = strlen(bidicat_index[j]); | |
1785 | if (strncmp (s, bidicat_index[j], len) == 0) | |
1786 | break; | |
1787 | } | |
1788 | if (bidicat_index[j] == NULL) | |
1789 | return (-1); | |
1790 | bidicat = (cups_bidi_t) j; | |
1791 | uni2prop->ch = (cups_ucs2_t) unichar; | |
1792 | uni2prop->gencat = (unsigned char) gencat; | |
1793 | uni2prop->bidicat = (unsigned char) bidicat; | |
1794 | uni2prop ++; | |
1795 | i ++; | |
1796 | } | |
1797 | if (i < propcount) | |
1798 | pmap->propcount = i; | |
1799 | cupsFileClose(fp); | |
1800 | return (0); | |
1801 | } | |
1802 | ||
1803 | ||
1804 | /* | |
1805 | * 'get_combmap()' - Get Unicode combining class map to cache. | |
1806 | */ | |
1807 | ||
1808 | static int /* O - Zero or -1 on error */ | |
1809 | get_combmap(void) | |
1810 | { | |
1811 | int i; /* Looping variable */ | |
1812 | cups_utf32_t unichar; /* Unicode character value */ | |
1813 | int combclass; /* Unicode char combining class */ | |
1814 | _cups_comb_map_t *cmap; /* Unicode Comb Class Map */ | |
1815 | int combcount; /* Count of Unicode Source Chars */ | |
1816 | _cups_comb_t *uni2comb; /* Unicode Char -> Combining Class */ | |
1817 | char *mapname; /* Comb Class map name */ | |
1818 | char filename[1024]; /* Filename for charset map file */ | |
1819 | cups_file_t *fp; /* Comb Class map file pointer */ | |
1820 | char *s; /* Line parsing pointer */ | |
1821 | char line[256]; /* Line from input map file */ | |
1822 | _cups_globals_t *cg = _cupsGlobals(); | |
1823 | /* Pointer to library globals */ | |
1824 | ||
1825 | ||
1826 | /* | |
1827 | * See if we already have this combining class map loaded... | |
1828 | */ | |
1829 | ||
1830 | if ((cmap = cg->combmap_cache) != NULL) | |
1831 | return (0); | |
1832 | ||
1833 | /* | |
1834 | * Get the mapping name... | |
1835 | */ | |
1836 | ||
1837 | mapname = "uni-comb.txt"; | |
1838 | ||
1839 | /* | |
1840 | * Open combining class map input file... | |
1841 | */ | |
1842 | ||
1843 | snprintf(filename, sizeof(filename), "%s/charmaps/%s", | |
1844 | cg->cups_datadir, mapname); | |
1845 | if ((combcount = get_map_count(filename)) <= 0) | |
1846 | return (-1); | |
1847 | fp = cupsFileOpen(filename, "r"); | |
1848 | if (fp == NULL) | |
1849 | return (-1); | |
1850 | ||
1851 | /* | |
1852 | * Allocate memory for combining class map and add to cache... | |
1853 | */ | |
1854 | ||
1855 | cmap = (_cups_comb_map_t *)calloc(1, sizeof(_cups_comb_map_t)); | |
1856 | if (cmap == NULL) | |
1857 | { | |
1858 | cupsFileClose(fp); | |
1859 | return (-1); | |
1860 | } | |
1861 | ||
1862 | uni2comb = (_cups_comb_t *)calloc(1, sizeof(_cups_comb_t) * combcount); | |
1863 | if (uni2comb == NULL) | |
1864 | { | |
1865 | free(cmap); | |
1866 | cupsFileClose(fp); | |
1867 | return (-1); | |
1868 | } | |
1869 | cg->combmap_cache = cmap; | |
1870 | cmap->used ++; | |
1871 | cmap->combcount = combcount; | |
1872 | cmap->uni2comb = uni2comb; | |
1873 | ||
1874 | /* | |
1875 | * Save combining class map into memory for later use... | |
1876 | */ | |
1877 | for (i = 0; i < combcount; ) | |
1878 | { | |
1879 | s = cupsFileGets(fp, line, sizeof(line)); | |
1880 | if (s == NULL) | |
1881 | break; | |
1882 | if ((*s == '#') || (*s == '\n') || (*s == '\0')) | |
1883 | continue; | |
1884 | if (sscanf(s, "%lx", &unichar) != 1) | |
1885 | break; | |
1886 | if (unichar > 0xffff) | |
1887 | break; | |
1888 | while ((*s != '\0') && (*s != ';')) | |
1889 | s ++; | |
1890 | if (*s != ';') | |
1891 | break; | |
1892 | s ++; | |
1893 | if (sscanf(s, "%d", &combclass) != 1) | |
1894 | break; | |
1895 | uni2comb->ch = (cups_ucs2_t) unichar; | |
1896 | uni2comb->combclass = (unsigned char) combclass; | |
1897 | uni2comb ++; | |
1898 | i ++; | |
1899 | } | |
1900 | if (i < combcount) | |
1901 | cmap->combcount = i; | |
1902 | cupsFileClose(fp); | |
1903 | return (0); | |
1904 | } | |
1905 | ||
1906 | ||
1907 | /* | |
1908 | * 'get_breakmap()' - Get Unicode line break class map to cache. | |
1909 | */ | |
1910 | ||
1911 | static int /* O - Zero or -1 on error */ | |
1912 | get_breakmap(void) | |
1913 | { | |
1914 | int i, j; /* Looping variables */ | |
1915 | int len; /* String length */ | |
1916 | cups_utf32_t unichar1; /* Unicode character value */ | |
1917 | cups_utf32_t unichar2; /* Unicode character value */ | |
1918 | cups_break_class_t breakclass; /* Unicode char line break class */ | |
1919 | _cups_break_map_t *bmap; /* Unicode Line Break Class Map */ | |
1920 | int breakcount; /* Count of Unicode Source Chars */ | |
1921 | cups_ucs2_t *uni2break; /* Unicode -> Line Break Class */ | |
1922 | char *mapname; /* Comb Class map name */ | |
1923 | char filename[1024]; /* Filename for charset map file */ | |
1924 | cups_file_t *fp; /* Comb Class map file pointer */ | |
1925 | char *s; /* Line parsing pointer */ | |
1926 | char line[256]; /* Line from input map file */ | |
1927 | _cups_globals_t *cg = _cupsGlobals(); | |
1928 | /* Pointer to library globals */ | |
1929 | ||
1930 | ||
1931 | /* | |
1932 | * See if we already have this line break class map loaded... | |
1933 | */ | |
1934 | ||
1935 | if ((bmap = cg->breakmap_cache) != NULL) | |
1936 | return (0); | |
1937 | ||
1938 | /* | |
1939 | * Get the mapping name... | |
1940 | */ | |
1941 | ||
1942 | mapname = "uni-line.txt"; | |
1943 | ||
1944 | /* | |
1945 | * Open line break class map input file... | |
1946 | */ | |
1947 | ||
1948 | snprintf(filename, sizeof(filename), "%s/charmaps/%s", | |
1949 | cg->cups_datadir, mapname); | |
1950 | if ((breakcount = get_map_count(filename)) <= 0) | |
1951 | return (-1); | |
1952 | fp = cupsFileOpen(filename, "r"); | |
1953 | if (fp == NULL) | |
1954 | return (-1); | |
1955 | ||
1956 | /* | |
1957 | * Allocate memory for line break class map and add to cache... | |
1958 | */ | |
1959 | ||
1960 | bmap = (_cups_break_map_t *)calloc(1, sizeof(_cups_break_map_t)); | |
1961 | if (bmap == NULL) | |
1962 | { | |
1963 | cupsFileClose(fp); | |
1964 | return (-1); | |
1965 | } | |
1966 | ||
1967 | uni2break = (cups_ucs2_t *)calloc(1, sizeof(cups_ucs2_t) * 3 * breakcount); | |
1968 | if (uni2break == NULL) | |
1969 | { | |
1970 | free(bmap); | |
1971 | cupsFileClose(fp); | |
1972 | return (-1); | |
1973 | } | |
1974 | cg->breakmap_cache = bmap; | |
1975 | bmap->used ++; | |
1976 | bmap->breakcount = breakcount; | |
1977 | bmap->uni2break = uni2break; | |
1978 | ||
1979 | /* | |
1980 | * Save line break class map into memory for later use... | |
1981 | */ | |
1982 | for (i = 0; i < breakcount; ) | |
1983 | { | |
1984 | s = cupsFileGets(fp, line, sizeof(line)); | |
1985 | if (s == NULL) | |
1986 | break; | |
1987 | if (strlen(s) > 0) | |
1988 | *(s + strlen(s) - 1) = '\0'; | |
1989 | if ((*s == '#') || (*s == '\n') || (*s == '\0')) | |
1990 | continue; | |
1991 | if (sscanf(s, "%lx %lx", &unichar1, &unichar2) != 2) | |
1992 | break; | |
1993 | if ((unichar1 > 0xffff) | |
1994 | || (unichar2 > 0xffff)) | |
1995 | break; | |
1996 | while ((*s != '\0') && (*s != ';')) | |
1997 | s ++; | |
1998 | if (*s != ';') | |
1999 | break; | |
2000 | s ++; | |
2001 | for (j = 0; break_index[j].str != NULL; j ++) | |
2002 | { | |
2003 | len = strlen (break_index[j].str); | |
2004 | if (strncmp (s, break_index[j].str, len) == 0) | |
2005 | break; | |
2006 | } | |
2007 | if (break_index[j].str == NULL) | |
2008 | return (-1); | |
2009 | breakclass = break_index[j].breakclass; | |
2010 | *uni2break ++ = (cups_ucs2_t) unichar1; | |
2011 | *uni2break ++ = (cups_ucs2_t) unichar2; | |
2012 | *uni2break ++ = (cups_ucs2_t) breakclass; | |
2013 | i ++; | |
2014 | } | |
2015 | if (i < breakcount) | |
2016 | bmap->breakcount = i; | |
2017 | cupsFileClose(fp); | |
2018 | return (0); | |
2019 | } | |
2020 | ||
2021 | ||
2022 | /* | |
2023 | * 'compare_compose()' - Compare key for compose match. | |
2024 | * | |
2025 | * Note - This function cannot be easily modified for 32-bit Unicode. | |
2026 | */ | |
2027 | ||
2028 | static int /* O - Result of comparison */ | |
2029 | compare_compose(const void *k1, /* I - Key char */ | |
2030 | const void *k2) /* I - Map char */ | |
2031 | { | |
2032 | cups_utf32_t *kp = (cups_utf32_t *)k1; | |
2033 | /* Key char pointer */ | |
2034 | cups_ucs2_t *mp = (cups_ucs2_t *)k2;/* Map char pointer */ | |
2035 | unsigned long key; /* Pair of key characters */ | |
2036 | unsigned long map; /* Pair of map characters */ | |
2037 | int result; /* Result Value */ | |
2038 | ||
2039 | ||
2040 | key = (*kp << 16); | |
2041 | key |= *(kp + 1); | |
2042 | map = (unsigned long) (*mp << 16); | |
2043 | map |= (unsigned long) *(mp + 1); | |
2044 | ||
2045 | if (key >= map) | |
2046 | result = (int) (key - map); | |
2047 | else | |
2048 | result = -1 * ((int) (map - key)); | |
2049 | ||
2050 | return (result); | |
2051 | } | |
2052 | ||
2053 | ||
2054 | /* | |
2055 | * 'compare_decompose()' - Compare key for decompose match. | |
2056 | */ | |
2057 | ||
2058 | static int /* O - Result of comparison */ | |
2059 | compare_decompose(const void *k1, /* I - Key char */ | |
2060 | const void *k2) /* I - Map char */ | |
2061 | { | |
2062 | cups_utf32_t *kp = (cups_utf32_t *)k1; | |
2063 | /* Key char pointer */ | |
2064 | cups_ucs2_t *mp = (cups_ucs2_t *)k2;/* Map char pointer */ | |
2065 | cups_ucs2_t ch; /* Key char as UCS-2 */ | |
2066 | int result; /* Result Value */ | |
2067 | ||
2068 | ||
2069 | ch = (cups_ucs2_t) *kp; | |
2070 | ||
2071 | if (ch >= *mp) | |
2072 | result = (int) (ch - *mp); | |
2073 | else | |
2074 | result = -1 * ((int) (*mp - ch)); | |
2075 | ||
2076 | return (result); | |
2077 | } | |
2078 | ||
2079 | ||
2080 | /* | |
2081 | * 'compare_foldchar()' - Compare key for case fold match. | |
2082 | */ | |
2083 | ||
2084 | static int /* O - Result of comparison */ | |
2085 | compare_foldchar(const void *k1, /* I - Key char */ | |
2086 | const void *k2) /* I - Map char */ | |
2087 | { | |
2088 | cups_utf32_t *kp = (cups_utf32_t *)k1; | |
2089 | /* Key char pointer */ | |
2090 | cups_ucs2_t *mp = (cups_ucs2_t *)k2;/* Map char pointer */ | |
2091 | cups_ucs2_t ch; /* Key char as UCS-2 */ | |
2092 | int result; /* Result Value */ | |
2093 | ||
2094 | ||
2095 | ch = (cups_ucs2_t) *kp; | |
2096 | ||
2097 | if (ch >= *mp) | |
2098 | result = (int) (ch - *mp); | |
2099 | else | |
2100 | result = -1 * ((int) (*mp - ch)); | |
2101 | ||
2102 | return (result); | |
2103 | } | |
2104 | ||
2105 | ||
2106 | /* | |
2107 | * 'compare_combchar()' - Compare key for combining char match. | |
2108 | */ | |
2109 | ||
2110 | static int /* O - Result of comparison */ | |
2111 | compare_combchar(const void *k1, /* I - Key char */ | |
2112 | const void *k2) /* I - Map char */ | |
2113 | { | |
2114 | cups_utf32_t *kp = (cups_utf32_t *)k1; | |
2115 | /* Key char pointer */ | |
2116 | _cups_comb_t *cp = (_cups_comb_t *)k2;/* Combining map row pointer */ | |
2117 | cups_ucs2_t ch; /* Key char as UCS-2 */ | |
2118 | int result; /* Result Value */ | |
2119 | ||
2120 | ||
2121 | ch = (cups_ucs2_t) *kp; | |
2122 | ||
2123 | if (ch >= cp->ch) | |
2124 | result = (int) (ch - cp->ch); | |
2125 | else | |
2126 | result = -1 * ((int) (cp->ch - ch)); | |
2127 | ||
2128 | return (result); | |
2129 | } | |
2130 | ||
2131 | ||
2132 | /* | |
2133 | * 'compare_breakchar()' - Compare key for line break char match. | |
2134 | */ | |
2135 | ||
2136 | static int /* O - Result of comparison */ | |
2137 | compare_breakchar(const void *k1, /* I - Key char */ | |
2138 | const void *k2) /* I - Map char */ | |
2139 | { | |
2140 | cups_utf32_t *kp = (cups_utf32_t *)k1; | |
2141 | /* Key char pointer */ | |
2142 | cups_ucs2_t *mp = (cups_ucs2_t *)k2;/* Map char pointer */ | |
2143 | cups_ucs2_t ch; /* Key char as UCS-2 */ | |
2144 | int result; /* Result Value */ | |
2145 | ||
2146 | ||
2147 | ch = (cups_ucs2_t) *kp; | |
2148 | ||
2149 | if (ch < *mp) | |
2150 | result = -1 * (int) (*mp - ch); | |
2151 | else if (ch > *(mp + 1)) | |
2152 | result = (int) (ch - *(mp + 1)); | |
2153 | else | |
2154 | result = 0; | |
2155 | ||
2156 | return (result); | |
2157 | } | |
2158 | ||
2159 | ||
2160 | /* | |
2161 | * 'compare_propchar()' - Compare key for property char match. | |
2162 | */ | |
2163 | ||
2164 | static int /* O - Result of comparison */ | |
2165 | compare_propchar(const void *k1, /* I - Key char */ | |
2166 | const void *k2) /* I - Map char */ | |
2167 | { | |
2168 | cups_utf32_t *kp = (cups_utf32_t *)k1; | |
2169 | /* Key char pointer */ | |
2170 | _cups_prop_t *pp = (_cups_prop_t *)k2;/* Property map row pointer */ | |
2171 | cups_ucs2_t ch; /* Key char as UCS-2 */ | |
2172 | int result; /* Result Value */ | |
2173 | ||
2174 | ||
2175 | ch = (cups_ucs2_t) *kp; | |
2176 | ||
2177 | if (ch >= pp->ch) | |
2178 | result = (int) (ch - pp->ch); | |
2179 | else | |
2180 | result = -1 * ((int) (pp->ch - ch)); | |
2181 | ||
2182 | return (result); | |
2183 | } | |
2184 | ||
2185 | ||
2186 | /* | |
fa73b229 | 2187 | * End of "$Id: normalize.c 4967 2006-01-24 03:42:15Z mike $" |
ef416fc2 | 2188 | */ |