]>
Commit | Line | Data |
---|---|---|
c5061b15 | 1 | /* |
c9d3f842 | 2 | * "$Id$" |
c5061b15 | 3 | * |
47c9dfee | 4 | * Unicode normalization for the Common UNIX Printing System (CUPS). |
c5061b15 | 5 | * |
24c1b5ce | 6 | * Copyright 1997-2006 by Easy Software Products. |
c5061b15 | 7 | * |
8 | * These coded instructions, statements, and computer programs are | |
9 | * the property of Easy Software Products and are protected by Federal | |
10 | * copyright law. Distribution and use rights are outlined in the | |
11 | * file "LICENSE.txt" which should have been included with this file. | |
12 | * If this file is missing or damaged please contact Easy Software | |
13 | * Products at: | |
14 | * | |
47c9dfee | 15 | * Attn: CUPS Licensing Information |
16 | * Easy Software Products | |
17 | * 44141 Airport View Drive, Suite 204 | |
c9d3f842 | 18 | * Hollywood, Maryland 20636 USA |
c5061b15 | 19 | * |
9639c4de | 20 | * Voice: (301) 373-9600 |
47c9dfee | 21 | * EMail: cups-info@cups.org |
22 | * WWW: http://www.cups.org | |
c5061b15 | 23 | * |
24 | * Contents: | |
25 | * | |
47c9dfee | 26 | * cupsNormalizeMapsGet() - Get all norm maps to cache. |
27 | * cupsNormalizeMapsFree() - Free all norm maps in cache. | |
28 | * cupsNormalizeMapsFlush() - Flush all norm maps in cache. | |
158e5e17 | 29 | * _cupsNormalizeMapsFlush() - Flush all normalization maps in cache. |
47c9dfee | 30 | * cupsUTF8Normalize() - Normalize UTF-8 string. |
31 | * cupsUTF32Normalize() - Normalize UTF-32 string. | |
32 | * cupsUTF8CaseFold() - Case fold UTF-8 string. | |
33 | * cupsUTF32CaseFold() - Case fold UTF-32 string. | |
34 | * cupsUTF8CompareCaseless() - Compare case folded UTF-8 strings. | |
35 | * cupsUTF32CompareCaseless() - Compare case folded UTF-32 strings. | |
4a95bc63 | 36 | * cupsUTF8CompareIdentifier() - Compare folded NFKC UTF-8 strings. |
37 | * cupsUTF32CompareIdentifier() - Compare folded NFKC UTF-32 strings. | |
47c9dfee | 38 | * cupsUTF32CharacterProperty() - Get UTF-32 character property. |
39 | * get_general_category() - Get UTF-32 Char General Category. | |
40 | * get_bidi_category() - Get UTF-32 Char Bidi Category. | |
41 | * get_combining_class() - Get UTF-32 Char Combining Class. | |
42 | * get_break_class() - Get UTF-32 Char Line Break Class. | |
43 | * get_map_count() - Count lines in a map file. | |
44 | * get_normmap() - Get Unicode norm map to cache. | |
45 | * get_foldmap() - Get Unicode casefold map to cache. | |
46 | * get_propmap() - Get Unicode property map to cache. | |
47 | * get_combmap() - Get Unicode combining map to cache. | |
48 | * get_breakmap() - Get Unicode break map to cache. | |
49 | * compare_compose() - Compare key for compose match. | |
50 | * compare_decompose() - Compare key for decompose match. | |
51 | * compare_foldchar() - Compare key for case fold match. | |
52 | * compare_combchar() - Compare key for combining match. | |
53 | * compare_breakchar() - Compare key for line break match. | |
54 | * compare_propchar() - Compare key for property char match. | |
c5061b15 | 55 | */ |
56 | ||
57 | /* | |
58 | * Include necessary headers... | |
59 | */ | |
60 | ||
03f61bf3 | 61 | #include "globals.h" |
62 | #include "debug.h" | |
c5061b15 | 63 | #include <stdlib.h> |
c5061b15 | 64 | #include <errno.h> |
c5061b15 | 65 | #include <time.h> |
66 | ||
c5061b15 | 67 | |
03f61bf3 | 68 | typedef struct /**** General Category Index Struct****/ |
c5061b15 | 69 | { |
03f61bf3 | 70 | cups_gencat_t gencat; /* General Category Value */ |
71 | const char *str; /* General Category String */ | |
c5061b15 | 72 | } gencat_t; |
73 | ||
03f61bf3 | 74 | static const gencat_t gencat_index[] = /* General Category Index */ |
c5061b15 | 75 | { |
03f61bf3 | 76 | { CUPS_GENCAT_LU, "Lu" }, /* Letter, Uppercase */ |
77 | { CUPS_GENCAT_LL, "Ll" }, /* Letter, Lowercase */ | |
78 | { CUPS_GENCAT_LT, "Lt" }, /* Letter, Titlecase */ | |
79 | { CUPS_GENCAT_LM, "Lm" }, /* Letter, Modifier */ | |
80 | { CUPS_GENCAT_LO, "Lo" }, /* Letter, Other */ | |
81 | { CUPS_GENCAT_MN, "Mn" }, /* Mark, Non-Spacing */ | |
82 | { CUPS_GENCAT_MC, "Mc" }, /* Mark, Spacing Combining */ | |
83 | { CUPS_GENCAT_ME, "Me" }, /* Mark, Enclosing */ | |
84 | { CUPS_GENCAT_ND, "Nd" }, /* Number, Decimal Digit */ | |
85 | { CUPS_GENCAT_NL, "Nl" }, /* Number, Letter */ | |
86 | { CUPS_GENCAT_NO, "No" }, /* Number, Other */ | |
87 | { CUPS_GENCAT_PC, "Pc" }, /* Punctuation, Connector */ | |
88 | { CUPS_GENCAT_PD, "Pd" }, /* Punctuation, Dash */ | |
89 | { CUPS_GENCAT_PS, "Ps" }, /* Punctuation, Open (start) */ | |
90 | { CUPS_GENCAT_PE, "Pe" }, /* Punctuation, Close (end) */ | |
91 | { CUPS_GENCAT_PI, "Pi" }, /* Punctuation, Initial Quote */ | |
92 | { CUPS_GENCAT_PF, "Pf" }, /* Punctuation, Final Quote */ | |
93 | { CUPS_GENCAT_PO, "Po" }, /* Punctuation, Other */ | |
94 | { CUPS_GENCAT_SM, "Sm" }, /* Symbol, Math */ | |
95 | { CUPS_GENCAT_SC, "Sc" }, /* Symbol, Currency */ | |
96 | { CUPS_GENCAT_SK, "Sk" }, /* Symbol, Modifier */ | |
97 | { CUPS_GENCAT_SO, "So" }, /* Symbol, Other */ | |
98 | { CUPS_GENCAT_ZS, "Zs" }, /* Separator, Space */ | |
99 | { CUPS_GENCAT_ZL, "Zl" }, /* Separator, Line */ | |
100 | { CUPS_GENCAT_ZP, "Zp" }, /* Separator, Paragraph */ | |
101 | { CUPS_GENCAT_CC, "Cc" }, /* Other, Control */ | |
102 | { CUPS_GENCAT_CF, "Cf" }, /* Other, Format */ | |
103 | { CUPS_GENCAT_CS, "Cs" }, /* Other, Surrogate */ | |
104 | { CUPS_GENCAT_CO, "Co" }, /* Other, Private Use */ | |
105 | { CUPS_GENCAT_CN, "Cn" }, /* Other, Not Assigned */ | |
c5061b15 | 106 | { 0, NULL } |
107 | }; | |
108 | ||
03f61bf3 | 109 | static const char * const bidicat_index[] = |
110 | /* Bidi Category Index */ | |
c5061b15 | 111 | { |
03f61bf3 | 112 | "L", /* Left-to-Right (Alpha, Syllabic, Ideographic) */ |
113 | "LRE", /* Left-to-Right Embedding (explicit) */ | |
114 | "LRO", /* Left-to-Right Override (explicit) */ | |
115 | "R", /* Right-to-Left (Hebrew alphabet and most punct) */ | |
116 | "AL", /* Right-to-Left Arabic (Arabic, Thaana, Syriac) */ | |
117 | "RLE", /* Right-to-Left Embedding (explicit) */ | |
118 | "RLO", /* Right-to-Left Override (explicit) */ | |
119 | "PDF", /* Pop Directional Format */ | |
120 | "EN", /* Euro Number (Euro and East Arabic-Indic digits) */ | |
121 | "ES", /* Euro Number Separator (Slash) */ | |
122 | "ET", /* Euro Number Termintor (Plus, Minus, Degree, etc) */ | |
123 | "AN", /* Arabic Number (Arabic-Indic digits, separators) */ | |
124 | "CS", /* Common Number Separator (Colon, Comma, Dot, etc) */ | |
125 | "NSM", /* Non-Spacing Mark (category Mn / Me in UCD) */ | |
126 | "BN", /* Boundary Neutral (Formatting / Control chars) */ | |
127 | "B", /* Paragraph Separator */ | |
128 | "S", /* Segment Separator (Tab) */ | |
129 | "WS", /* Whitespace Space (Space, Line Separator, etc) */ | |
130 | "ON", /* Other Neutrals */ | |
c5061b15 | 131 | NULL |
132 | }; | |
133 | ||
03f61bf3 | 134 | typedef struct /**** Line Break Class Index Struct****/ |
c5061b15 | 135 | { |
67871650 | 136 | cups_break_class_t breakclass; /* Line Break Class Value */ |
03f61bf3 | 137 | const char *str; /* Line Break Class String */ |
2625096f | 138 | } _cups_break_t; |
c5061b15 | 139 | |
2625096f | 140 | static const _cups_break_t break_index[] = /* Line Break Class Index */ |
c5061b15 | 141 | { |
03f61bf3 | 142 | { CUPS_BREAK_AI, "AI" }, /* Ambiguous (Alphabetic or Ideograph) */ |
143 | { CUPS_BREAK_AL, "AL" }, /* Ordinary Alpha/Symbol Chars (XP) */ | |
144 | { CUPS_BREAK_BA, "BA" }, /* Break Opportunity After Chars (A) */ | |
145 | { CUPS_BREAK_BB, "BB" }, /* Break Opportunities Before Chars (B) */ | |
146 | { CUPS_BREAK_B2, "B2" }, /* Break Opportunity Either (B/A/XP) */ | |
147 | { CUPS_BREAK_BK, "BK" }, /* Mandatory Break (A) (norm) */ | |
148 | { CUPS_BREAK_CB, "CB" }, /* Contingent Break (B/A) (norm) */ | |
149 | { CUPS_BREAK_CL, "CL" }, /* Closing Punctuation (XB) */ | |
150 | { CUPS_BREAK_CM, "CM" }, /* Attached/Combining (XB) (norm) */ | |
151 | { CUPS_BREAK_CR, "CR" }, /* Carriage Return (A) (norm) */ | |
152 | { CUPS_BREAK_EX, "EX" }, /* Exclamation / Interrogation (XB) */ | |
153 | { CUPS_BREAK_GL, "GL" }, /* Non-breaking ("Glue") (XB/XA) (norm) */ | |
154 | { CUPS_BREAK_HY, "HY" }, /* Hyphen (XA) */ | |
155 | { CUPS_BREAK_ID, "ID" }, /* Ideographic (B/A) */ | |
156 | { CUPS_BREAK_IN, "IN" }, /* Inseparable chars (XP) */ | |
157 | { CUPS_BREAK_IS, "IS" }, /* Numeric Separator (Infix) (XB) */ | |
158 | { CUPS_BREAK_LF, "LF" }, /* Line Feed (A) (norm) */ | |
159 | { CUPS_BREAK_NS, "NS" }, /* Non-starters (XB) */ | |
160 | { CUPS_BREAK_NU, "NU" }, /* Numeric (XP) */ | |
161 | { CUPS_BREAK_OP, "OP" }, /* Opening Punctuation (XA) */ | |
162 | { CUPS_BREAK_PO, "PO" }, /* Postfix (Numeric) (XB) */ | |
163 | { CUPS_BREAK_PR, "PR" }, /* Prefix (Numeric) (XA) */ | |
164 | { CUPS_BREAK_QU, "QU" }, /* Ambiguous Quotation (XB/XA) */ | |
165 | { CUPS_BREAK_SA, "SA" }, /* Context Dependent (SE Asian) (P) */ | |
166 | { CUPS_BREAK_SG, "SG" }, /* Surrogates (XP) (norm) */ | |
167 | { CUPS_BREAK_SP, "SP" }, /* Space (A) (norm) */ | |
168 | { CUPS_BREAK_SY, "SY" }, /* Symbols Allowing Break After (A) */ | |
169 | { CUPS_BREAK_XX, "XX" }, /* Unknown (XP) */ | |
170 | { CUPS_BREAK_ZW, "ZW" }, /* Zero Width Space (A) (norm) */ | |
c5061b15 | 171 | { 0, NULL } |
172 | }; | |
173 | ||
47c9dfee | 174 | /* |
175 | * Prototypes... | |
176 | */ | |
177 | ||
03f61bf3 | 178 | static int compare_breakchar(const void *k1, const void *k2); |
179 | static int compare_combchar(const void *k1, const void *k2); | |
180 | static int compare_compose(const void *k1, const void *k2); | |
181 | static int compare_decompose(const void *k1, const void *k2); | |
182 | static int compare_foldchar(const void *k1, const void *k2); | |
183 | static int compare_propchar(const void *k1, const void *k2); | |
4a95bc63 | 184 | static int get_bidi_category(const cups_utf32_t ch); |
4a95bc63 | 185 | static int get_break_class(const cups_utf32_t ch); |
03f61bf3 | 186 | static int get_breakmap(void); |
187 | static int get_combining_class(const cups_utf32_t ch); | |
188 | static int get_combmap(void); | |
189 | static int get_foldmap(const cups_folding_t fold); | |
4a95bc63 | 190 | static int get_general_category(const cups_utf32_t ch); |
47c9dfee | 191 | static int get_map_count(const char *filename); |
192 | static int get_normmap(const cups_normalize_t normalize); | |
c5061b15 | 193 | static int get_propmap(void); |
03f61bf3 | 194 | |
c5061b15 | 195 | |
196 | /* | |
47c9dfee | 197 | * 'cupsNormalizeMapsGet()' - Get all normalization maps to cache. |
c5061b15 | 198 | */ |
03f61bf3 | 199 | |
200 | int /* O - Zero or -1 on error */ | |
c5061b15 | 201 | cupsNormalizeMapsGet(void) |
202 | { | |
2625096f | 203 | _cups_norm_map_t *nmap; /* Unicode Normalization Map */ |
204 | _cups_fold_map_t *fmap; /* Unicode Case Folding Map */ | |
205 | _cups_globals_t *cg = _cupsGlobals(); | |
03f61bf3 | 206 | /* Pointer to library globals */ |
207 | ||
c5061b15 | 208 | |
209 | /* | |
210 | * See if we already have normalization maps loaded... | |
211 | */ | |
03f61bf3 | 212 | |
213 | if (cg->normmap_cache) | |
c5061b15 | 214 | { |
03f61bf3 | 215 | for (nmap = cg->normmap_cache; nmap != NULL; nmap = nmap->next) |
c5061b15 | 216 | nmap->used ++; |
03f61bf3 | 217 | |
218 | for (fmap = cg->foldmap_cache; fmap != NULL; fmap = fmap->next) | |
c5061b15 | 219 | fmap->used ++; |
03f61bf3 | 220 | |
221 | if (cg->combmap_cache) | |
222 | cg->combmap_cache->used ++; | |
223 | ||
224 | if (cg->propmap_cache) | |
225 | cg->propmap_cache->used ++; | |
226 | ||
227 | if (cg->breakmap_cache) | |
228 | cg->breakmap_cache->used ++; | |
229 | ||
c5061b15 | 230 | return (0); |
231 | } | |
232 | ||
233 | /* | |
234 | * Get normalization maps... | |
235 | */ | |
03f61bf3 | 236 | |
c5061b15 | 237 | if (get_normmap(CUPS_NORM_NFD) < 0) |
238 | return (-1); | |
03f61bf3 | 239 | |
c5061b15 | 240 | if (get_normmap(CUPS_NORM_NFKD) < 0) |
241 | return (-1); | |
03f61bf3 | 242 | |
c5061b15 | 243 | if (get_normmap(CUPS_NORM_NFC) < 0) |
244 | return (-1); | |
245 | ||
246 | /* | |
247 | * Get case folding, combining class, character property maps... | |
248 | */ | |
03f61bf3 | 249 | |
c5061b15 | 250 | if (get_foldmap(CUPS_FOLD_SIMPLE) < 0) |
251 | return (-1); | |
03f61bf3 | 252 | |
c5061b15 | 253 | if (get_foldmap(CUPS_FOLD_FULL) < 0) |
254 | return (-1); | |
03f61bf3 | 255 | |
c5061b15 | 256 | if (get_propmap() < 0) |
257 | return (-1); | |
03f61bf3 | 258 | |
c5061b15 | 259 | if (get_combmap() < 0) |
260 | return (-1); | |
03f61bf3 | 261 | |
c5061b15 | 262 | if (get_breakmap() < 0) |
263 | return (-1); | |
03f61bf3 | 264 | |
c5061b15 | 265 | return (0); |
266 | } | |
267 | ||
03f61bf3 | 268 | |
c5061b15 | 269 | /* |
270 | * 'cupsNormalizeMapsFree()' - Free all normalization maps in cache. | |
271 | * | |
272 | * This does not actually free; use 'cupsNormalizeMapsFlush()' for that. | |
273 | */ | |
03f61bf3 | 274 | |
275 | int /* O - Zero or -1 on error */ | |
c5061b15 | 276 | cupsNormalizeMapsFree(void) |
277 | { | |
2625096f | 278 | _cups_norm_map_t *nmap; /* Unicode Normalization Map */ |
279 | _cups_fold_map_t *fmap; /* Unicode Case Folding Map */ | |
280 | _cups_globals_t *cg = _cupsGlobals(); | |
03f61bf3 | 281 | /* Pointer to library globals */ |
282 | ||
c5061b15 | 283 | |
284 | /* | |
285 | * See if we already have normalization maps loaded... | |
286 | */ | |
03f61bf3 | 287 | |
288 | if (cg->normmap_cache == NULL) | |
c5061b15 | 289 | return (-1); |
03f61bf3 | 290 | |
291 | for (nmap = cg->normmap_cache; nmap != NULL; nmap = nmap->next) | |
c5061b15 | 292 | if (nmap->used > 0) |
293 | nmap->used --; | |
03f61bf3 | 294 | |
295 | for (fmap = cg->foldmap_cache; fmap != NULL; fmap = fmap->next) | |
c5061b15 | 296 | if (fmap->used > 0) |
297 | fmap->used --; | |
03f61bf3 | 298 | |
299 | if (cg->propmap_cache && (cg->propmap_cache->used > 0)) | |
300 | cg->propmap_cache->used --; | |
301 | ||
302 | if (cg->combmap_cache && (cg->combmap_cache->used > 0)) | |
303 | cg->combmap_cache->used --; | |
304 | ||
305 | if (cg->breakmap_cache && (cg->breakmap_cache->used > 0)) | |
306 | cg->breakmap_cache->used --; | |
307 | ||
c5061b15 | 308 | return (0); |
309 | } | |
310 | ||
03f61bf3 | 311 | |
c5061b15 | 312 | /* |
313 | * 'cupsNormalizeMapsFlush()' - Flush all normalization maps in cache. | |
314 | */ | |
03f61bf3 | 315 | |
c5061b15 | 316 | void |
317 | cupsNormalizeMapsFlush(void) | |
158e5e17 | 318 | { |
319 | _cupsNormalizeMapsFlush(_cupsGlobals()); | |
320 | } | |
321 | ||
322 | ||
323 | /* | |
324 | * '_cupsNormalizeMapsFlush()' - Flush all normalization maps in cache. | |
325 | */ | |
326 | ||
327 | void | |
328 | _cupsNormalizeMapsFlush( | |
329 | _cups_globals_t *cg) /* I - Global data */ | |
c5061b15 | 330 | { |
2625096f | 331 | _cups_norm_map_t *nmap; /* Unicode Normalization Map */ |
332 | _cups_norm_map_t *nextnorm; /* Next Unicode Normalization Map */ | |
333 | _cups_fold_map_t *fmap; /* Unicode Case Folding Map */ | |
334 | _cups_fold_map_t *nextfold; /* Next Unicode Case Folding Map */ | |
03f61bf3 | 335 | |
c5061b15 | 336 | |
337 | /* | |
338 | * Flush all normalization maps... | |
339 | */ | |
03f61bf3 | 340 | |
341 | for (nmap = cg->normmap_cache; nmap != NULL; nmap = nextnorm) | |
c5061b15 | 342 | { |
343 | free(nmap->uni2norm); | |
344 | nextnorm = nmap->next; | |
345 | free(nmap); | |
346 | } | |
03f61bf3 | 347 | |
348 | cg->normmap_cache = NULL; | |
349 | ||
350 | for (fmap = cg->foldmap_cache; fmap != NULL; fmap = nextfold) | |
c5061b15 | 351 | { |
352 | free(fmap->uni2fold); | |
353 | nextfold = fmap->next; | |
354 | free(fmap); | |
355 | } | |
03f61bf3 | 356 | |
357 | cg->foldmap_cache = NULL; | |
358 | ||
359 | if (cg->propmap_cache) | |
c5061b15 | 360 | { |
03f61bf3 | 361 | free(cg->propmap_cache->uni2prop); |
362 | free(cg->propmap_cache); | |
363 | cg->propmap_cache = NULL; | |
c5061b15 | 364 | } |
03f61bf3 | 365 | |
366 | if (cg->combmap_cache) | |
c5061b15 | 367 | { |
03f61bf3 | 368 | free(cg->combmap_cache->uni2comb); |
369 | free(cg->combmap_cache); | |
370 | cg->combmap_cache = NULL; | |
c5061b15 | 371 | } |
03f61bf3 | 372 | |
373 | if (cg->breakmap_cache) | |
c5061b15 | 374 | { |
03f61bf3 | 375 | free(cg->breakmap_cache->uni2break); |
376 | free(cg->breakmap_cache); | |
377 | cg->breakmap_cache = NULL; | |
c5061b15 | 378 | } |
c5061b15 | 379 | } |
380 | ||
03f61bf3 | 381 | |
c5061b15 | 382 | /* |
4a95bc63 | 383 | * 'cupsUTF8Normalize()' - Normalize UTF-8 string. |
c5061b15 | 384 | * |
385 | * Normalize UTF-8 string to Unicode UAX-15 Normalization Form | |
386 | * Note - Compatibility Normalization Forms (NFKD/NFKC) are | |
387 | * unsafe for subsequent transcoding to legacy charsets | |
388 | */ | |
03f61bf3 | 389 | |
390 | int /* O - Count or -1 on error */ | |
391 | cupsUTF8Normalize( | |
392 | cups_utf8_t *dest, /* O - Target string */ | |
393 | const cups_utf8_t *src, /* I - Source string */ | |
394 | const int maxout, /* I - Max output */ | |
395 | const cups_normalize_t normalize) /* I - Normalization */ | |
c5061b15 | 396 | { |
03f61bf3 | 397 | int len; /* String length */ |
398 | cups_utf32_t work1[CUPS_MAX_USTRING];/* First internal UCS-4 string */ | |
399 | cups_utf32_t work2[CUPS_MAX_USTRING];/* Second internal UCS-4 string */ | |
400 | ||
c5061b15 | 401 | |
402 | /* | |
403 | * Check for valid arguments and clear output... | |
404 | */ | |
03f61bf3 | 405 | |
406 | if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING) | |
c5061b15 | 407 | return (-1); |
03f61bf3 | 408 | |
c5061b15 | 409 | *dest = 0; |
410 | ||
411 | /* | |
412 | * Convert input UTF-8 to internal UCS-4 (and insert BOM)... | |
413 | */ | |
03f61bf3 | 414 | |
4a95bc63 | 415 | len = cupsUTF8ToUTF32(work1, src, CUPS_MAX_USTRING); |
03f61bf3 | 416 | |
c5061b15 | 417 | if (len < 0) |
418 | return (-1); | |
419 | ||
420 | /* | |
421 | * Normalize internal UCS-4 to second internal UCS-4... | |
422 | */ | |
03f61bf3 | 423 | |
4a95bc63 | 424 | len = cupsUTF32Normalize(work2, work1, CUPS_MAX_USTRING, normalize); |
03f61bf3 | 425 | |
c5061b15 | 426 | if (len < 0) |
427 | return (-1); | |
428 | ||
429 | /* | |
430 | * Convert internal UCS-4 to output UTF-8 (and delete BOM)... | |
431 | */ | |
03f61bf3 | 432 | |
4a95bc63 | 433 | len = cupsUTF32ToUTF8(dest, work2, maxout); |
03f61bf3 | 434 | |
c5061b15 | 435 | return (len); |
436 | } | |
437 | ||
03f61bf3 | 438 | |
c5061b15 | 439 | /* |
4a95bc63 | 440 | * 'cupsUTF32Normalize()' - Normalize UTF-32 string. |
c5061b15 | 441 | * |
442 | * Normalize UTF-32 string to Unicode UAX-15 Normalization Form | |
443 | * Note - Compatibility Normalization Forms (NFKD/NFKC) are | |
444 | * unsafe for subsequent transcoding to legacy charsets | |
445 | */ | |
03f61bf3 | 446 | |
447 | int /* O - Count or -1 on error */ | |
448 | cupsUTF32Normalize( | |
449 | cups_utf32_t *dest, /* O - Target string */ | |
450 | const cups_utf32_t *src, /* I - Source string */ | |
451 | const int maxout, /* I - Max output */ | |
452 | const cups_normalize_t normalize) /* I - Normalization */ | |
c5061b15 | 453 | { |
03f61bf3 | 454 | int i; /* Looping variable */ |
455 | int result; /* Result Value */ | |
456 | cups_ucs2_t *mp; /* Map char pointer */ | |
457 | int pass; /* Pass count for each transform */ | |
458 | int hit; /* Hit count from binary search */ | |
459 | cups_utf32_t unichar1; /* Unicode character value */ | |
460 | cups_utf32_t unichar2; /* Unicode character value */ | |
2625096f | 461 | _cups_comb_class_t class1; /* First Combining Class */ |
462 | _cups_comb_class_t class2; /* Second Combining Class */ | |
03f61bf3 | 463 | int len; /* String length */ |
464 | cups_utf32_t work1[CUPS_MAX_USTRING]; | |
465 | /* First internal UCS-4 string */ | |
466 | cups_utf32_t work2[CUPS_MAX_USTRING]; | |
467 | /* Second internal UCS-4 string */ | |
468 | cups_utf32_t *p1; /* First UCS-4 string pointer */ | |
469 | cups_utf32_t *p2; /* Second UCS-4 string pointer */ | |
2625096f | 470 | _cups_norm_map_t *nmap; /* Unicode Normalization Map */ |
03f61bf3 | 471 | cups_normalize_t decompose; /* Decomposition Type */ |
2625096f | 472 | _cups_globals_t *cg = _cupsGlobals(); |
03f61bf3 | 473 | /* Pointer to library globals */ |
474 | ||
c5061b15 | 475 | |
476 | /* | |
477 | * Check for valid arguments and clear output... | |
478 | */ | |
03f61bf3 | 479 | |
480 | if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING) | |
c5061b15 | 481 | return (-1); |
03f61bf3 | 482 | |
c5061b15 | 483 | *dest = 0; |
03f61bf3 | 484 | |
c5061b15 | 485 | result = cupsNormalizeMapsGet(); |
03f61bf3 | 486 | |
c5061b15 | 487 | if (result < 0) |
488 | return (-1); | |
489 | ||
490 | /* | |
491 | * Find decomposition map... | |
492 | */ | |
03f61bf3 | 493 | |
c5061b15 | 494 | switch (normalize) |
495 | { | |
496 | case CUPS_NORM_NFD: | |
497 | case CUPS_NORM_NFC: | |
03f61bf3 | 498 | decompose = CUPS_NORM_NFD; |
499 | break; | |
500 | ||
c5061b15 | 501 | case CUPS_NORM_NFKD: |
502 | case CUPS_NORM_NFKC: | |
03f61bf3 | 503 | decompose = CUPS_NORM_NFKD; |
504 | break; | |
505 | ||
c5061b15 | 506 | default: |
03f61bf3 | 507 | return (-1); |
c5061b15 | 508 | } |
03f61bf3 | 509 | |
510 | for (nmap = cg->normmap_cache; nmap != NULL; nmap = nmap->next) | |
c5061b15 | 511 | if (nmap->normalize == decompose) |
512 | break; | |
03f61bf3 | 513 | |
c5061b15 | 514 | if (nmap == NULL) |
515 | return (-1); | |
516 | ||
517 | /* | |
518 | * Copy input to internal buffer... | |
519 | */ | |
03f61bf3 | 520 | |
c5061b15 | 521 | p1 = &work1[0]; |
03f61bf3 | 522 | |
c5061b15 | 523 | for (i = 0; i < CUPS_MAX_USTRING; i ++) |
524 | { | |
525 | if (*src == 0) | |
526 | break; | |
03f61bf3 | 527 | |
c5061b15 | 528 | *p1 ++ = *src ++; |
529 | } | |
03f61bf3 | 530 | |
c5061b15 | 531 | *p1 = 0; |
532 | len = i; | |
533 | ||
534 | /* | |
535 | * Decompose until no further decomposition... | |
536 | */ | |
03f61bf3 | 537 | |
47c9dfee | 538 | for (pass = 0; pass < 20; pass ++) |
c5061b15 | 539 | { |
540 | p1 = &work1[0]; | |
541 | p2 = &work2[0]; | |
03f61bf3 | 542 | |
c5061b15 | 543 | for (hit = 0; *p1 != 0; p1 ++) |
544 | { | |
545 | /* | |
546 | * Check for decomposition defined... | |
547 | */ | |
03f61bf3 | 548 | |
549 | mp = (cups_ucs2_t *)bsearch(p1, nmap->uni2norm, nmap->normcount, | |
550 | (sizeof(cups_ucs2_t) * 3), compare_decompose); | |
c5061b15 | 551 | if (mp == NULL) |
552 | { | |
47c9dfee | 553 | *p2 ++ = *p1; |
554 | continue; | |
c5061b15 | 555 | } |
556 | ||
557 | /* | |
558 | * Decompose input character to one or two output characters... | |
559 | */ | |
03f61bf3 | 560 | |
c5061b15 | 561 | hit ++; |
562 | mp ++; | |
4a95bc63 | 563 | *p2 ++ = (cups_utf32_t) *mp ++; |
03f61bf3 | 564 | |
c5061b15 | 565 | if (*mp != 0) |
47c9dfee | 566 | *p2 ++ = (cups_utf32_t) *mp; |
c5061b15 | 567 | } |
03f61bf3 | 568 | |
c5061b15 | 569 | *p2 = 0; |
03f61bf3 | 570 | len = (int)(p2 - &work2[0]); |
c5061b15 | 571 | |
572 | /* | |
573 | * Check for decomposition finished... | |
574 | */ | |
575 | if (hit == 0) | |
576 | break; | |
4a95bc63 | 577 | memcpy (work1, work2, sizeof(cups_utf32_t) * (len + 1)); |
c5061b15 | 578 | } |
579 | ||
580 | /* | |
581 | * Canonical reorder until no further reordering... | |
582 | */ | |
03f61bf3 | 583 | |
47c9dfee | 584 | for (pass = 0; pass < 20; pass ++) |
c5061b15 | 585 | { |
586 | p1 = &work1[0]; | |
03f61bf3 | 587 | |
c5061b15 | 588 | for (hit = 0; *p1 != 0; p1 ++) |
589 | { | |
590 | /* | |
591 | * Check for combining characters to reorder... | |
592 | */ | |
03f61bf3 | 593 | |
c5061b15 | 594 | unichar1 = *p1; |
595 | unichar2 = *(p1 + 1); | |
03f61bf3 | 596 | |
c5061b15 | 597 | if (unichar2 == 0) |
47c9dfee | 598 | break; |
03f61bf3 | 599 | |
c5061b15 | 600 | class1 = get_combining_class(unichar1); |
601 | class2 = get_combining_class(unichar2); | |
03f61bf3 | 602 | |
c5061b15 | 603 | if ((class1 < 0) || (class2 < 0)) |
47c9dfee | 604 | return (-1); |
03f61bf3 | 605 | |
c5061b15 | 606 | if ((class1 == 0) || (class2 == 0)) |
47c9dfee | 607 | continue; |
03f61bf3 | 608 | |
47c9dfee | 609 | if (class1 <= class2) |
610 | continue; | |
c5061b15 | 611 | |
612 | /* | |
613 | * Swap two combining characters... | |
614 | */ | |
03f61bf3 | 615 | |
616 | *p1 = unichar2; | |
617 | p1 ++; | |
618 | *p1 = unichar1; | |
619 | hit ++; | |
c5061b15 | 620 | } |
03f61bf3 | 621 | |
c5061b15 | 622 | if (hit == 0) |
623 | break; | |
624 | } | |
625 | ||
626 | /* | |
627 | * Check for decomposition only... | |
628 | */ | |
03f61bf3 | 629 | |
630 | if (normalize == CUPS_NORM_NFD || normalize == CUPS_NORM_NFKD) | |
c5061b15 | 631 | { |
03f61bf3 | 632 | memcpy(dest, work1, sizeof(cups_utf32_t) * (len + 1)); |
c5061b15 | 633 | return (len); |
634 | } | |
635 | ||
636 | /* | |
637 | * Find composition map... | |
638 | */ | |
03f61bf3 | 639 | |
640 | for (nmap = cg->normmap_cache; nmap != NULL; nmap = nmap->next) | |
c5061b15 | 641 | if (nmap->normalize == CUPS_NORM_NFC) |
642 | break; | |
03f61bf3 | 643 | |
c5061b15 | 644 | if (nmap == NULL) |
645 | return (-1); | |
646 | ||
647 | /* | |
648 | * Compose until no further composition... | |
649 | */ | |
03f61bf3 | 650 | |
47c9dfee | 651 | for (pass = 0; pass < 20; pass ++) |
c5061b15 | 652 | { |
653 | p1 = &work1[0]; | |
654 | p2 = &work2[0]; | |
03f61bf3 | 655 | |
c5061b15 | 656 | for (hit = 0; *p1 != 0; p1 ++) |
657 | { | |
658 | /* | |
659 | * Check for composition defined... | |
660 | */ | |
03f61bf3 | 661 | |
c5061b15 | 662 | unichar1 = *p1; |
663 | unichar2 = *(p1 + 1); | |
03f61bf3 | 664 | |
c5061b15 | 665 | if (unichar2 == 0) |
666 | { | |
47c9dfee | 667 | *p2 ++ = unichar1; |
668 | break; | |
c5061b15 | 669 | } |
03f61bf3 | 670 | |
671 | mp = (cups_ucs2_t *)bsearch(p1, nmap->uni2norm, nmap->normcount, | |
672 | (sizeof(cups_ucs2_t) * 3), compare_compose); | |
c5061b15 | 673 | if (mp == NULL) |
674 | { | |
47c9dfee | 675 | *p2 ++ = *p1; |
676 | continue; | |
c5061b15 | 677 | } |
678 | ||
679 | /* | |
680 | * Compose two input characters to one output character... | |
681 | */ | |
03f61bf3 | 682 | |
c5061b15 | 683 | hit ++; |
684 | mp += 2; | |
4a95bc63 | 685 | *p2 ++ = (cups_utf32_t) *mp; |
c5061b15 | 686 | p1 ++; |
687 | } | |
03f61bf3 | 688 | |
c5061b15 | 689 | *p2 = 0; |
690 | len = (int) (p2 - &work2[0]); | |
691 | ||
692 | /* | |
693 | * Check for composition finished... | |
694 | */ | |
03f61bf3 | 695 | |
c5061b15 | 696 | if (hit == 0) |
697 | break; | |
03f61bf3 | 698 | |
4a95bc63 | 699 | memcpy (work1, work2, sizeof(cups_utf32_t) * (len + 1)); |
c5061b15 | 700 | } |
03f61bf3 | 701 | |
4a95bc63 | 702 | memcpy (dest, work1, sizeof(cups_utf32_t) * (len + 1)); |
03f61bf3 | 703 | |
c5061b15 | 704 | cupsNormalizeMapsFree(); |
03f61bf3 | 705 | |
c5061b15 | 706 | return (len); |
707 | } | |
708 | ||
03f61bf3 | 709 | |
c5061b15 | 710 | /* |
4a95bc63 | 711 | * 'cupsUTF8CaseFold()' - Case fold UTF-8 string. |
c5061b15 | 712 | * |
713 | * Case Fold UTF-8 string per Unicode UAX-21 Section 2.3 | |
714 | * Note - Case folding output is | |
715 | * unsafe for subsequent transcoding to legacy charsets | |
716 | */ | |
03f61bf3 | 717 | |
718 | int /* O - Count or -1 on error */ | |
719 | cupsUTF8CaseFold( | |
720 | cups_utf8_t *dest, /* O - Target string */ | |
721 | const cups_utf8_t *src, /* I - Source string */ | |
722 | const int maxout, /* I - Max output */ | |
723 | const cups_folding_t fold) /* I - Fold Mode */ | |
c5061b15 | 724 | { |
03f61bf3 | 725 | int len; /* String length */ |
726 | cups_utf32_t work1[CUPS_MAX_USTRING];/* First internal UCS-4 string */ | |
727 | cups_utf32_t work2[CUPS_MAX_USTRING];/* Second internal UCS-4 string */ | |
728 | ||
c5061b15 | 729 | |
730 | /* | |
731 | * Check for valid arguments and clear output... | |
732 | */ | |
03f61bf3 | 733 | |
734 | if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING) | |
c5061b15 | 735 | return (-1); |
03f61bf3 | 736 | |
c5061b15 | 737 | *dest = 0; |
03f61bf3 | 738 | |
739 | if (fold != CUPS_FOLD_SIMPLE && fold != CUPS_FOLD_FULL) | |
c5061b15 | 740 | return (-1); |
741 | ||
742 | /* | |
743 | * Convert input UTF-8 to internal UCS-4 (and insert BOM)... | |
744 | */ | |
03f61bf3 | 745 | |
4a95bc63 | 746 | len = cupsUTF8ToUTF32(work1, src, CUPS_MAX_USTRING); |
03f61bf3 | 747 | |
c5061b15 | 748 | if (len < 0) |
749 | return (-1); | |
750 | ||
751 | /* | |
752 | * Case Fold internal UCS-4 to second internal UCS-4... | |
753 | */ | |
03f61bf3 | 754 | |
4a95bc63 | 755 | len = cupsUTF32CaseFold(work2, work1, CUPS_MAX_USTRING, fold); |
03f61bf3 | 756 | |
c5061b15 | 757 | if (len < 0) |
758 | return (-1); | |
759 | ||
760 | /* | |
761 | * Convert internal UCS-4 to output UTF-8 (and delete BOM)... | |
762 | */ | |
03f61bf3 | 763 | |
4a95bc63 | 764 | len = cupsUTF32ToUTF8(dest, work2, maxout); |
03f61bf3 | 765 | |
c5061b15 | 766 | return (len); |
767 | } | |
768 | ||
03f61bf3 | 769 | |
c5061b15 | 770 | /* |
4a95bc63 | 771 | * 'cupsUTF32CaseFold()' - Case fold UTF-32 string. |
c5061b15 | 772 | * |
773 | * Case Fold UTF-32 string per Unicode UAX-21 Section 2.3 | |
774 | * Note - Case folding output is | |
775 | * unsafe for subsequent transcoding to legacy charsets | |
776 | */ | |
03f61bf3 | 777 | |
778 | int /* O - Count or -1 on error */ | |
779 | cupsUTF32CaseFold( | |
780 | cups_utf32_t *dest, /* O - Target string */ | |
781 | const cups_utf32_t *src, /* I - Source string */ | |
782 | const int maxout, /* I - Max output */ | |
783 | const cups_folding_t fold) /* I - Fold Mode */ | |
c5061b15 | 784 | { |
03f61bf3 | 785 | cups_utf32_t *start = dest; /* Start of destination string */ |
786 | int i; /* Looping variable */ | |
787 | int result; /* Result Value */ | |
788 | cups_ucs2_t *mp; /* Map char pointer */ | |
2625096f | 789 | _cups_fold_map_t *fmap; /* Unicode Case Folding Map */ |
790 | _cups_globals_t *cg = _cupsGlobals(); | |
03f61bf3 | 791 | /* Pointer to library globals */ |
792 | ||
c5061b15 | 793 | |
794 | /* | |
795 | * Check for valid arguments and clear output... | |
796 | */ | |
03f61bf3 | 797 | |
798 | if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING) | |
c5061b15 | 799 | return (-1); |
03f61bf3 | 800 | |
c5061b15 | 801 | *dest = 0; |
03f61bf3 | 802 | |
803 | if (fold != CUPS_FOLD_SIMPLE && fold != CUPS_FOLD_FULL) | |
c5061b15 | 804 | return (-1); |
805 | ||
806 | /* | |
807 | * Find case folding map... | |
808 | */ | |
03f61bf3 | 809 | |
c5061b15 | 810 | result = cupsNormalizeMapsGet(); |
03f61bf3 | 811 | |
c5061b15 | 812 | if (result < 0) |
813 | return (-1); | |
03f61bf3 | 814 | |
815 | for (fmap = cg->foldmap_cache; fmap != NULL; fmap = fmap->next) | |
c5061b15 | 816 | if (fmap->fold == fold) |
817 | break; | |
03f61bf3 | 818 | |
c5061b15 | 819 | if (fmap == NULL) |
820 | return (-1); | |
821 | ||
822 | /* | |
823 | * Case fold input string to output string... | |
824 | */ | |
03f61bf3 | 825 | |
c5061b15 | 826 | for (i = 0; i < (maxout - 1); i ++, src ++) |
827 | { | |
828 | /* | |
829 | * Check for case folding defined... | |
830 | */ | |
03f61bf3 | 831 | |
832 | mp = (cups_ucs2_t *)bsearch(src, fmap->uni2fold, fmap->foldcount, | |
833 | (sizeof(cups_ucs2_t) * 4), compare_foldchar); | |
c5061b15 | 834 | if (mp == NULL) |
835 | { | |
836 | *dest ++ = *src; | |
837 | continue; | |
838 | } | |
839 | ||
840 | /* | |
841 | * Case fold input character to one or two output characters... | |
842 | */ | |
03f61bf3 | 843 | |
c5061b15 | 844 | mp ++; |
4a95bc63 | 845 | *dest ++ = (cups_utf32_t) *mp ++; |
03f61bf3 | 846 | |
847 | if (*mp != 0 && fold == CUPS_FOLD_FULL) | |
c5061b15 | 848 | { |
849 | i ++; | |
03f61bf3 | 850 | if (i >= (maxout - 1)) |
47c9dfee | 851 | break; |
03f61bf3 | 852 | |
4a95bc63 | 853 | *dest ++ = (cups_utf32_t) *mp; |
c5061b15 | 854 | } |
855 | } | |
03f61bf3 | 856 | |
c5061b15 | 857 | *dest = 0; |
03f61bf3 | 858 | |
c5061b15 | 859 | cupsNormalizeMapsFree(); |
03f61bf3 | 860 | |
b296d7d9 | 861 | return ((int)(dest - start)); |
c5061b15 | 862 | } |
863 | ||
03f61bf3 | 864 | |
c5061b15 | 865 | /* |
4a95bc63 | 866 | * 'cupsUTF8CompareCaseless()' - Compare case folded UTF-8 strings. |
c5061b15 | 867 | */ |
03f61bf3 | 868 | |
869 | int /* O - Difference of strings */ | |
870 | cupsUTF8CompareCaseless( | |
871 | const cups_utf8_t *s1, /* I - String1 */ | |
872 | const cups_utf8_t *s2) /* I - String2 */ | |
c5061b15 | 873 | { |
03f61bf3 | 874 | int difference; /* Difference of two strings */ |
875 | int len; /* String length */ | |
876 | cups_utf32_t work1[CUPS_MAX_USTRING];/* First internal UCS-4 string */ | |
877 | cups_utf32_t work2[CUPS_MAX_USTRING];/* Second internal UCS-4 string */ | |
878 | ||
c5061b15 | 879 | |
880 | /* | |
881 | * Check for valid arguments... | |
882 | */ | |
03f61bf3 | 883 | |
884 | if (!s1 || !s2) | |
c5061b15 | 885 | return (-1); |
886 | ||
887 | /* | |
888 | * Convert input UTF-8 to internal UCS-4 (and insert BOM)... | |
889 | */ | |
03f61bf3 | 890 | |
4a95bc63 | 891 | len = cupsUTF8ToUTF32(work1, s1, CUPS_MAX_USTRING); |
03f61bf3 | 892 | |
c5061b15 | 893 | if (len < 0) |
894 | return (-1); | |
03f61bf3 | 895 | |
4a95bc63 | 896 | len = cupsUTF8ToUTF32(work2, s2, CUPS_MAX_USTRING); |
03f61bf3 | 897 | |
c5061b15 | 898 | if (len < 0) |
899 | return (-1); | |
900 | ||
901 | /* | |
902 | * Compare first internal UCS-4 to second internal UCS-4... | |
903 | */ | |
03f61bf3 | 904 | |
4a95bc63 | 905 | difference = cupsUTF32CompareCaseless(work1, work2); |
03f61bf3 | 906 | |
c5061b15 | 907 | return (difference); |
908 | } | |
909 | ||
03f61bf3 | 910 | |
c5061b15 | 911 | /* |
4a95bc63 | 912 | * 'cupsUTF32CompareCaseless()' - Compare case folded UTF-32 strings. |
c5061b15 | 913 | */ |
03f61bf3 | 914 | |
915 | int /* O - Difference of strings */ | |
916 | cupsUTF32CompareCaseless( | |
917 | const cups_utf32_t *s1, /* I - String1 */ | |
918 | const cups_utf32_t *s2) /* I - String2 */ | |
c5061b15 | 919 | { |
03f61bf3 | 920 | int difference; /* Difference of two strings */ |
921 | int len; /* String length */ | |
922 | cups_folding_t fold = CUPS_FOLD_FULL; | |
923 | /* Case folding mode */ | |
924 | cups_utf32_t fold1[CUPS_MAX_USTRING]; | |
925 | /* First UCS-4 folded string */ | |
926 | cups_utf32_t fold2[CUPS_MAX_USTRING]; | |
927 | /* Second UCS-4 folded string */ | |
928 | cups_utf32_t *p1; /* First UCS-4 string pointer */ | |
929 | cups_utf32_t *p2; /* Second UCS-4 string pointer */ | |
930 | ||
c5061b15 | 931 | |
932 | /* | |
933 | * Check for valid arguments... | |
934 | */ | |
03f61bf3 | 935 | |
936 | if (!s1 || !s2) | |
c5061b15 | 937 | return (-1); |
938 | ||
939 | /* | |
940 | * Case Fold input UTF-32 strings to internal UCS-4 strings... | |
941 | */ | |
03f61bf3 | 942 | |
4a95bc63 | 943 | len = cupsUTF32CaseFold(fold1, s1, CUPS_MAX_USTRING, fold); |
03f61bf3 | 944 | |
c5061b15 | 945 | if (len < 0) |
946 | return (-1); | |
03f61bf3 | 947 | |
4a95bc63 | 948 | len = cupsUTF32CaseFold(fold2, s2, CUPS_MAX_USTRING, fold); |
03f61bf3 | 949 | |
c5061b15 | 950 | if (len < 0) |
951 | return (-1); | |
952 | ||
953 | /* | |
954 | * Compare first internal UCS-4 to second internal UCS-4... | |
955 | */ | |
03f61bf3 | 956 | |
c5061b15 | 957 | p1 = &fold1[0]; |
958 | p2 = &fold2[0]; | |
03f61bf3 | 959 | |
c5061b15 | 960 | for (;; p1 ++, p2 ++) |
961 | { | |
962 | difference = (int) (*p1 - *p2); | |
03f61bf3 | 963 | |
c5061b15 | 964 | if (difference != 0) |
965 | break; | |
03f61bf3 | 966 | |
c5061b15 | 967 | if ((*p1 == 0) && (*p2 == 0)) |
968 | break; | |
969 | } | |
03f61bf3 | 970 | |
c5061b15 | 971 | return (difference); |
972 | } | |
973 | ||
03f61bf3 | 974 | |
c5061b15 | 975 | /* |
4a95bc63 | 976 | * 'cupsUTF8CompareIdentifier()' - Compare folded NFKC UTF-8 strings. |
c5061b15 | 977 | */ |
03f61bf3 | 978 | |
979 | int /* O - Result of comparison */ | |
980 | cupsUTF8CompareIdentifier( | |
981 | const cups_utf8_t *s1, /* I - String1 */ | |
982 | const cups_utf8_t *s2) /* I - String2 */ | |
c5061b15 | 983 | { |
03f61bf3 | 984 | int difference; /* Difference of two strings */ |
985 | int len; /* String length */ | |
986 | cups_utf32_t work1[CUPS_MAX_USTRING];/* First internal UCS-4 string */ | |
987 | cups_utf32_t work2[CUPS_MAX_USTRING];/* Second internal UCS-4 string */ | |
988 | ||
c5061b15 | 989 | |
990 | /* | |
991 | * Check for valid arguments... | |
992 | */ | |
03f61bf3 | 993 | |
994 | if (!s1 || !s2) | |
c5061b15 | 995 | return (-1); |
996 | ||
997 | /* | |
998 | * Convert input UTF-8 to internal UCS-4 (and insert BOM)... | |
999 | */ | |
03f61bf3 | 1000 | |
4a95bc63 | 1001 | len = cupsUTF8ToUTF32(work1, s1, CUPS_MAX_USTRING); |
03f61bf3 | 1002 | |
c5061b15 | 1003 | if (len < 0) |
1004 | return (-1); | |
03f61bf3 | 1005 | |
4a95bc63 | 1006 | len = cupsUTF8ToUTF32(work2, s2, CUPS_MAX_USTRING); |
03f61bf3 | 1007 | |
c5061b15 | 1008 | if (len < 0) |
1009 | return (-1); | |
1010 | ||
1011 | /* | |
1012 | * Compare first internal UCS-4 to second internal UCS-4... | |
1013 | */ | |
03f61bf3 | 1014 | |
4a95bc63 | 1015 | difference = cupsUTF32CompareIdentifier(work1, work2); |
03f61bf3 | 1016 | |
c5061b15 | 1017 | return (difference); |
1018 | } | |
1019 | ||
03f61bf3 | 1020 | |
c5061b15 | 1021 | /* |
4a95bc63 | 1022 | * 'cupsUTF32CompareIdentifier()' - Compare folded NFKC UTF-32 strings. |
c5061b15 | 1023 | */ |
03f61bf3 | 1024 | |
1025 | int /* O - Result of comparison */ | |
1026 | cupsUTF32CompareIdentifier( | |
1027 | const cups_utf32_t *s1, /* I - String1 */ | |
1028 | const cups_utf32_t *s2) /* I - String2 */ | |
c5061b15 | 1029 | { |
03f61bf3 | 1030 | int difference; /* Difference of two strings */ |
1031 | int len; /* String length */ | |
1032 | cups_folding_t fold = CUPS_FOLD_FULL; | |
1033 | /* Case folding mode */ | |
1034 | cups_utf32_t fold1[CUPS_MAX_USTRING]; | |
1035 | /* First UCS-4 folded string */ | |
1036 | cups_utf32_t fold2[CUPS_MAX_USTRING]; | |
1037 | /* Second UCS-4 folded string */ | |
1038 | cups_normalize_t normalize = CUPS_NORM_NFKC; | |
1039 | /* Normalization form */ | |
1040 | cups_utf32_t norm1[CUPS_MAX_USTRING]; | |
1041 | /* First UCS-4 normalized string */ | |
1042 | cups_utf32_t norm2[CUPS_MAX_USTRING]; | |
1043 | /* Second UCS-4 normalized string */ | |
1044 | cups_utf32_t *p1; /* First UCS-4 string pointer */ | |
1045 | cups_utf32_t *p2; /* Second UCS-4 string pointer */ | |
1046 | ||
c5061b15 | 1047 | |
1048 | /* | |
1049 | * Check for valid arguments... | |
1050 | */ | |
03f61bf3 | 1051 | |
1052 | if (!s1 || !s2) | |
c5061b15 | 1053 | return (-1); |
1054 | ||
1055 | /* | |
1056 | * Case Fold input UTF-32 strings to internal UCS-4 strings... | |
1057 | */ | |
03f61bf3 | 1058 | |
4a95bc63 | 1059 | len = cupsUTF32CaseFold(fold1, s1, CUPS_MAX_USTRING, fold); |
03f61bf3 | 1060 | |
c5061b15 | 1061 | if (len < 0) |
1062 | return (-1); | |
03f61bf3 | 1063 | |
4a95bc63 | 1064 | len = cupsUTF32CaseFold(fold2, s2, CUPS_MAX_USTRING, fold); |
03f61bf3 | 1065 | |
c5061b15 | 1066 | if (len < 0) |
1067 | return (-1); | |
1068 | ||
1069 | /* | |
1070 | * Normalize internal UCS-4 strings to NFKC... | |
1071 | */ | |
03f61bf3 | 1072 | |
4a95bc63 | 1073 | len = cupsUTF32Normalize(norm1, fold1, CUPS_MAX_USTRING, normalize); |
03f61bf3 | 1074 | |
c5061b15 | 1075 | if (len < 0) |
1076 | return (-1); | |
03f61bf3 | 1077 | |
4a95bc63 | 1078 | len = cupsUTF32Normalize(norm2, fold2, CUPS_MAX_USTRING, normalize); |
03f61bf3 | 1079 | |
c5061b15 | 1080 | if (len < 0) |
1081 | return (-1); | |
1082 | ||
1083 | /* | |
1084 | * Compare first internal UCS-4 to second internal UCS-4... | |
1085 | */ | |
03f61bf3 | 1086 | |
c5061b15 | 1087 | p1 = &norm1[0]; |
1088 | p2 = &norm2[0]; | |
03f61bf3 | 1089 | |
c5061b15 | 1090 | for (;; p1 ++, p2 ++) |
1091 | { | |
1092 | difference = (int) (*p1 - *p2); | |
03f61bf3 | 1093 | |
c5061b15 | 1094 | if (difference != 0) |
1095 | break; | |
03f61bf3 | 1096 | |
c5061b15 | 1097 | if ((*p1 == 0) && (*p2 == 0)) |
1098 | break; | |
1099 | } | |
03f61bf3 | 1100 | |
c5061b15 | 1101 | return (difference); |
1102 | } | |
1103 | ||
03f61bf3 | 1104 | |
c5061b15 | 1105 | /* |
47c9dfee | 1106 | * 'cupsUTF32CharacterProperty()' - Get UTF-32 character property. |
c5061b15 | 1107 | */ |
03f61bf3 | 1108 | |
1109 | int /* O - Result of comparison */ | |
1110 | cupsUTF32CharacterProperty( | |
1111 | const cups_utf32_t ch, /* I - Source char */ | |
1112 | const cups_property_t prop) /* I - Char Property */ | |
c5061b15 | 1113 | { |
03f61bf3 | 1114 | int result; /* Result Value */ |
1115 | ||
c5061b15 | 1116 | |
1117 | /* | |
1118 | * Check for valid arguments... | |
1119 | */ | |
03f61bf3 | 1120 | |
c5061b15 | 1121 | if (ch == 0) |
1122 | return (-1); | |
1123 | ||
1124 | /* | |
1125 | * Find character property... | |
1126 | */ | |
03f61bf3 | 1127 | |
47c9dfee | 1128 | switch (prop) |
c5061b15 | 1129 | { |
1130 | case CUPS_PROP_GENERAL_CATEGORY: | |
03f61bf3 | 1131 | result = (get_general_category(ch)); |
1132 | break; | |
1133 | ||
c5061b15 | 1134 | case CUPS_PROP_BIDI_CATEGORY: |
03f61bf3 | 1135 | result = (get_bidi_category(ch)); |
1136 | break; | |
1137 | ||
c5061b15 | 1138 | case CUPS_PROP_COMBINING_CLASS: |
03f61bf3 | 1139 | result = (get_combining_class(ch)); |
1140 | break; | |
c5061b15 | 1141 | case CUPS_PROP_BREAK_CLASS: |
03f61bf3 | 1142 | result = (get_break_class(ch)); |
1143 | break; | |
1144 | ||
c5061b15 | 1145 | default: |
03f61bf3 | 1146 | return (-1); |
c5061b15 | 1147 | } |
03f61bf3 | 1148 | |
c5061b15 | 1149 | return (result); |
1150 | } | |
1151 | ||
03f61bf3 | 1152 | |
c5061b15 | 1153 | /* |
1154 | * 'get_general_category()' - Get UTF-32 Character General Category. | |
1155 | */ | |
03f61bf3 | 1156 | |
1157 | static int /* O - Class or -1 on error */ | |
1158 | get_general_category( | |
1159 | const cups_utf32_t ch) /* I - Source char */ | |
c5061b15 | 1160 | { |
03f61bf3 | 1161 | int result; /* Result Value */ |
1162 | cups_gencat_t gencat; /* General Category Value */ | |
2625096f | 1163 | _cups_prop_map_t *pmap; /* Unicode Property Map */ |
1164 | _cups_prop_t *uni2prop; /* Unicode Char -> Properties */ | |
1165 | _cups_globals_t *cg = _cupsGlobals(); | |
03f61bf3 | 1166 | /* Pointer to library globals */ |
1167 | ||
c5061b15 | 1168 | |
1169 | /* | |
1170 | * Check for valid argument... | |
1171 | */ | |
03f61bf3 | 1172 | |
c5061b15 | 1173 | if (ch == 0) |
1174 | return (-1); | |
1175 | ||
1176 | /* | |
1177 | * Find property map... | |
1178 | */ | |
03f61bf3 | 1179 | |
c5061b15 | 1180 | result = cupsNormalizeMapsGet(); |
03f61bf3 | 1181 | |
c5061b15 | 1182 | if (result < 0) |
1183 | return (-1); | |
03f61bf3 | 1184 | |
1185 | pmap = cg->propmap_cache; | |
1186 | ||
c5061b15 | 1187 | if (pmap == NULL) |
1188 | return (-1); | |
1189 | ||
1190 | /* | |
1191 | * Find character in map... | |
1192 | */ | |
03f61bf3 | 1193 | |
2625096f | 1194 | uni2prop = (_cups_prop_t *)bsearch(&ch, pmap->uni2prop, pmap->propcount, |
1195 | (sizeof(_cups_prop_t)), compare_propchar); | |
03f61bf3 | 1196 | |
c5061b15 | 1197 | cupsNormalizeMapsFree(); |
03f61bf3 | 1198 | |
c5061b15 | 1199 | if (uni2prop == NULL) |
47c9dfee | 1200 | gencat = CUPS_GENCAT_CN; /* Other, Not Assigned */ |
c5061b15 | 1201 | else |
03f61bf3 | 1202 | gencat = (cups_gencat_t)uni2prop->gencat; |
1203 | ||
1204 | result = (int)gencat; | |
1205 | ||
c5061b15 | 1206 | return (result); |
1207 | } | |
1208 | ||
03f61bf3 | 1209 | |
c5061b15 | 1210 | /* |
1211 | * 'get_bidi_category()' - Get UTF-32 Character Bidi Category. | |
1212 | */ | |
03f61bf3 | 1213 | |
1214 | static int /* O - Class or -1 on error */ | |
1215 | get_bidi_category(const cups_utf32_t ch)/* I - Source char */ | |
c5061b15 | 1216 | { |
03f61bf3 | 1217 | int result; /* Result Value */ |
2625096f | 1218 | cups_bidi_t bidicat; /* Bidi Category Value */ |
1219 | _cups_prop_map_t *pmap; /* Unicode Property Map */ | |
1220 | _cups_prop_t *uni2prop; /* Unicode Char -> Properties */ | |
1221 | _cups_globals_t *cg = _cupsGlobals(); | |
03f61bf3 | 1222 | /* Pointer to library globals */ |
1223 | ||
c5061b15 | 1224 | |
1225 | /* | |
1226 | * Check for valid argument... | |
1227 | */ | |
03f61bf3 | 1228 | |
c5061b15 | 1229 | if (ch == 0) |
1230 | return (-1); | |
1231 | ||
1232 | /* | |
1233 | * Find property map... | |
1234 | */ | |
03f61bf3 | 1235 | |
c5061b15 | 1236 | result = cupsNormalizeMapsGet(); |
03f61bf3 | 1237 | |
c5061b15 | 1238 | if (result < 0) |
1239 | return (-1); | |
03f61bf3 | 1240 | |
1241 | pmap = cg->propmap_cache; | |
1242 | ||
c5061b15 | 1243 | if (pmap == NULL) |
1244 | return (-1); | |
1245 | ||
1246 | /* | |
1247 | * Find character in map... | |
1248 | */ | |
03f61bf3 | 1249 | |
2625096f | 1250 | uni2prop = (_cups_prop_t *)bsearch(&ch, pmap->uni2prop, pmap->propcount, |
1251 | (sizeof(_cups_prop_t)), compare_propchar); | |
03f61bf3 | 1252 | |
c5061b15 | 1253 | cupsNormalizeMapsFree(); |
03f61bf3 | 1254 | |
c5061b15 | 1255 | if (uni2prop == NULL) |
47c9dfee | 1256 | bidicat = CUPS_BIDI_ON; /* Other Neutral */ |
c5061b15 | 1257 | else |
2625096f | 1258 | bidicat = (cups_bidi_t)uni2prop->bidicat; |
03f61bf3 | 1259 | |
1260 | result = (int)bidicat; | |
1261 | ||
c5061b15 | 1262 | return (result); |
1263 | } | |
1264 | ||
1265 | /* | |
1266 | * 'get_combining_class()' - Get UTF-32 Character Combining Class. | |
1267 | * | |
1268 | * Note - Zero is non-combining (base character) | |
1269 | */ | |
03f61bf3 | 1270 | |
1271 | static int /* O - Class or -1 on error */ | |
1272 | get_combining_class( | |
1273 | const cups_utf32_t ch) /* I - Source char */ | |
c5061b15 | 1274 | { |
03f61bf3 | 1275 | int result; /* Result Value */ |
2625096f | 1276 | _cups_comb_map_t *cmap; /* Unicode Combining Class Map */ |
1277 | _cups_comb_class_t combclass; /* Unicode Combining Class */ | |
1278 | _cups_comb_t *uni2comb; /* Unicode Char -> Combining Class */ | |
1279 | _cups_globals_t *cg = _cupsGlobals(); | |
03f61bf3 | 1280 | /* Pointer to library globals */ |
1281 | ||
c5061b15 | 1282 | |
1283 | /* | |
1284 | * Check for valid argument... | |
1285 | */ | |
03f61bf3 | 1286 | |
c5061b15 | 1287 | if (ch == 0) |
1288 | return (-1); | |
1289 | ||
1290 | /* | |
1291 | * Find combining class map... | |
1292 | */ | |
03f61bf3 | 1293 | |
c5061b15 | 1294 | result = cupsNormalizeMapsGet(); |
03f61bf3 | 1295 | |
c5061b15 | 1296 | if (result < 0) |
1297 | return (-1); | |
03f61bf3 | 1298 | |
1299 | cmap = cg->combmap_cache; | |
1300 | ||
c5061b15 | 1301 | if (cmap == NULL) |
1302 | return (-1); | |
1303 | ||
1304 | /* | |
1305 | * Find combining character in map... | |
1306 | */ | |
03f61bf3 | 1307 | |
2625096f | 1308 | uni2comb = (_cups_comb_t *)bsearch(&ch, cmap->uni2comb, cmap->combcount, |
1309 | (sizeof(_cups_comb_t)), compare_combchar); | |
03f61bf3 | 1310 | |
c5061b15 | 1311 | cupsNormalizeMapsFree(); |
03f61bf3 | 1312 | |
c5061b15 | 1313 | if (uni2comb == NULL) |
1314 | combclass = 0; | |
1315 | else | |
2625096f | 1316 | combclass = (_cups_comb_class_t)uni2comb->combclass; |
03f61bf3 | 1317 | |
1318 | result = (int)combclass; | |
1319 | ||
c5061b15 | 1320 | return (result); |
1321 | } | |
1322 | ||
03f61bf3 | 1323 | |
c5061b15 | 1324 | /* |
1325 | * 'get_break_class()' - Get UTF-32 Character Line Break Class. | |
1326 | */ | |
03f61bf3 | 1327 | |
1328 | static int /* O - Class or -1 on error */ | |
1329 | get_break_class(const cups_utf32_t ch) /* I - Source char */ | |
c5061b15 | 1330 | { |
03f61bf3 | 1331 | int result; /* Result Value */ |
2625096f | 1332 | _cups_break_map_t *bmap; /* Unicode Line Break Class Map */ |
67871650 | 1333 | cups_break_class_t breakclass; /* Unicode Line Break Class */ |
03f61bf3 | 1334 | cups_ucs2_t *uni2break; /* Unicode -> Line Break Class */ |
2625096f | 1335 | _cups_globals_t *cg = _cupsGlobals(); |
03f61bf3 | 1336 | /* Pointer to library globals */ |
1337 | ||
c5061b15 | 1338 | |
1339 | /* | |
1340 | * Check for valid argument... | |
1341 | */ | |
03f61bf3 | 1342 | |
c5061b15 | 1343 | if (ch == 0) |
1344 | return (-1); | |
1345 | ||
1346 | /* | |
1347 | * Find line break class map... | |
1348 | */ | |
03f61bf3 | 1349 | |
c5061b15 | 1350 | result = cupsNormalizeMapsGet(); |
03f61bf3 | 1351 | |
c5061b15 | 1352 | if (result < 0) |
1353 | return (-1); | |
03f61bf3 | 1354 | |
1355 | bmap = cg->breakmap_cache; | |
1356 | ||
c5061b15 | 1357 | if (bmap == NULL) |
1358 | return (-1); | |
1359 | ||
1360 | /* | |
1361 | * Find line break character in map... | |
1362 | */ | |
03f61bf3 | 1363 | |
1364 | uni2break = (cups_ucs2_t *)bsearch(&ch, bmap->uni2break, bmap->breakcount, | |
1365 | (sizeof(cups_ucs2_t) * 3), | |
1366 | compare_breakchar); | |
1367 | ||
c5061b15 | 1368 | cupsNormalizeMapsFree(); |
03f61bf3 | 1369 | |
c5061b15 | 1370 | if (uni2break == NULL) |
1371 | breakclass = CUPS_BREAK_AI; | |
1372 | else | |
67871650 | 1373 | breakclass = (cups_break_class_t)*(uni2break + 2); |
03f61bf3 | 1374 | |
1375 | result = (int)breakclass; | |
1376 | ||
c5061b15 | 1377 | return (result); |
1378 | } | |
1379 | ||
03f61bf3 | 1380 | |
c5061b15 | 1381 | /* |
1382 | * 'get_map_count()' - Count lines in a map file. | |
1383 | */ | |
03f61bf3 | 1384 | |
1385 | static int /* O - Count or -1 on error */ | |
1386 | get_map_count(const char *filename) /* I - Map Filename */ | |
c5061b15 | 1387 | { |
03f61bf3 | 1388 | int i; /* Looping variable */ |
1389 | cups_file_t *fp; /* Map input file pointer */ | |
1390 | char *s; /* Line parsing pointer */ | |
1391 | char line[256]; /* Line from input map file */ | |
1392 | cups_utf32_t unichar; /* Unicode character value */ | |
1393 | ||
c5061b15 | 1394 | |
1395 | /* | |
1396 | * Open map input file... | |
1397 | */ | |
03f61bf3 | 1398 | |
1399 | if (!filename || !*filename) | |
c5061b15 | 1400 | return (-1); |
03f61bf3 | 1401 | |
1402 | fp = cupsFileOpen(filename, "r"); | |
c5061b15 | 1403 | if (fp == NULL) |
1404 | return (-1); | |
1405 | ||
1406 | /* | |
1407 | * Count lines in map input file... | |
1408 | */ | |
03f61bf3 | 1409 | |
c5061b15 | 1410 | for (i = 0; i < 50000;) |
1411 | { | |
03f61bf3 | 1412 | s = cupsFileGets(fp, line, sizeof(line)); |
c5061b15 | 1413 | if (s == NULL) |
1414 | break; | |
1415 | if ((*s == '#') || (*s == '\n') || (*s == '\0')) | |
1416 | continue; | |
1417 | if (strncmp (s, "0x", 2) == 0) | |
1418 | s += 2; | |
1419 | if (sscanf(s, "%lx", &unichar) != 1) | |
1420 | break; | |
1421 | if (unichar > 0xffff) | |
1422 | break; | |
1423 | i ++; | |
1424 | } | |
1425 | if (i == 0) | |
1426 | i = -1; | |
1427 | ||
1428 | /* | |
1429 | * Close file and return map count (non-comment line count)... | |
1430 | */ | |
03f61bf3 | 1431 | |
1432 | cupsFileClose(fp); | |
1433 | ||
c5061b15 | 1434 | return (i); |
1435 | } | |
1436 | ||
03f61bf3 | 1437 | |
c5061b15 | 1438 | /* |
47c9dfee | 1439 | * 'get_normmap()' - Get Unicode normalization map to cache. |
c5061b15 | 1440 | */ |
03f61bf3 | 1441 | |
1442 | static int /* O - Zero or -1 on error */ | |
1443 | get_normmap( | |
1444 | const cups_normalize_t normalize) /* I - Normalization Form */ | |
c5061b15 | 1445 | { |
03f61bf3 | 1446 | int i; /* Looping variable */ |
1447 | cups_utf32_t unichar1; /* Unicode character value */ | |
1448 | cups_utf32_t unichar2; /* Unicode character value */ | |
1449 | cups_utf32_t unichar3; /* Unicode character value */ | |
2625096f | 1450 | _cups_norm_map_t *nmap; /* Unicode Normalization Map */ |
03f61bf3 | 1451 | int normcount; /* Count of Unicode Source Chars */ |
1452 | cups_ucs2_t *uni2norm; /* Unicode Char -> Normalization */ | |
03f61bf3 | 1453 | char *mapname; /* Normalization map name */ |
1454 | char filename[1024]; /* Filename for charset map file */ | |
1455 | cups_file_t *fp; /* Normalization map file pointer */ | |
1456 | char *s; /* Line parsing pointer */ | |
1457 | char line[256]; /* Line from input map file */ | |
2625096f | 1458 | _cups_globals_t *cg = _cupsGlobals(); |
03f61bf3 | 1459 | /* Pointer to library globals */ |
1460 | ||
c5061b15 | 1461 | |
1462 | /* | |
1463 | * See if we already have this normalization map loaded... | |
1464 | */ | |
03f61bf3 | 1465 | |
1466 | for (nmap = cg->normmap_cache; nmap != NULL; nmap = nmap->next) | |
c5061b15 | 1467 | if (nmap->normalize == normalize) |
1468 | return (0); | |
1469 | ||
1470 | /* | |
a501ad17 | 1471 | * Get the mapping name... |
c5061b15 | 1472 | */ |
03f61bf3 | 1473 | |
c5061b15 | 1474 | switch (normalize) |
1475 | { | |
47c9dfee | 1476 | case CUPS_NORM_NFD: /* Canonical Decomposition */ |
03f61bf3 | 1477 | mapname = "uni-nfd.txt"; |
1478 | break; | |
1479 | ||
47c9dfee | 1480 | case CUPS_NORM_NFKD: /* Compatibility Decomposition */ |
03f61bf3 | 1481 | mapname = "uni-nfkd.txt"; |
1482 | break; | |
1483 | ||
47c9dfee | 1484 | case CUPS_NORM_NFC: /* Canonical Composition */ |
03f61bf3 | 1485 | mapname = "uni-nfc.txt"; |
1486 | break; | |
1487 | ||
47c9dfee | 1488 | case CUPS_NORM_NFKC: /* no such map file... */ |
c5061b15 | 1489 | default: |
03f61bf3 | 1490 | return (-1); |
c5061b15 | 1491 | } |
1492 | ||
1493 | /* | |
1494 | * Open normalization map input file... | |
1495 | */ | |
03f61bf3 | 1496 | |
47c9dfee | 1497 | snprintf(filename, sizeof(filename), "%s/charmaps/%s", |
a501ad17 | 1498 | cg->cups_datadir, mapname); |
c5061b15 | 1499 | if ((normcount = get_map_count(filename)) <= 0) |
1500 | return (-1); | |
03f61bf3 | 1501 | |
1502 | fp = cupsFileOpen(filename, "r"); | |
c5061b15 | 1503 | if (fp == NULL) |
1504 | return (-1); | |
1505 | ||
1506 | /* | |
1507 | * Allocate memory for normalization map and add to cache... | |
1508 | */ | |
03f61bf3 | 1509 | |
2625096f | 1510 | nmap = (_cups_norm_map_t *)calloc(1, sizeof(_cups_norm_map_t)); |
c5061b15 | 1511 | if (nmap == NULL) |
1512 | { | |
03f61bf3 | 1513 | cupsFileClose(fp); |
c5061b15 | 1514 | return (-1); |
1515 | } | |
03f61bf3 | 1516 | |
1517 | uni2norm = (cups_ucs2_t *)calloc(1, sizeof(cups_ucs2_t) * 3 * normcount); | |
c5061b15 | 1518 | if (uni2norm == NULL) |
1519 | { | |
03f61bf3 | 1520 | free(nmap); |
1521 | cupsFileClose(fp); | |
c5061b15 | 1522 | return (-1); |
1523 | } | |
03f61bf3 | 1524 | nmap->next = cg->normmap_cache; |
1525 | cg->normmap_cache = nmap; | |
c5061b15 | 1526 | nmap->used ++; |
1527 | nmap->normalize = normalize; | |
1528 | nmap->normcount = normcount; | |
1529 | nmap->uni2norm = uni2norm; | |
1530 | ||
1531 | /* | |
1532 | * Save normalization map into memory for later use... | |
1533 | */ | |
1534 | for (i = 0; i < normcount; ) | |
1535 | { | |
03f61bf3 | 1536 | s = cupsFileGets(fp, line, sizeof(line)); |
c5061b15 | 1537 | if (s == NULL) |
1538 | break; | |
1539 | if ((*s == '#') || (*s == '\n') || (*s == '\0')) | |
1540 | continue; | |
1541 | if (sscanf(s, "%lx %lx %lx", &unichar1, &unichar2, &unichar3) != 3) | |
1542 | break; | |
1543 | if ((unichar1 > 0xffff) | |
1544 | || (unichar2 > 0xffff) | |
1545 | || (unichar3 > 0xffff)) | |
1546 | break; | |
4a95bc63 | 1547 | *uni2norm ++ = (cups_ucs2_t) unichar1; |
1548 | *uni2norm ++ = (cups_ucs2_t) unichar2; | |
1549 | *uni2norm ++ = (cups_ucs2_t) unichar3; | |
c5061b15 | 1550 | i ++; |
1551 | } | |
1552 | if (i < normcount) | |
1553 | nmap->normcount = i; | |
03f61bf3 | 1554 | cupsFileClose(fp); |
c5061b15 | 1555 | return (0); |
1556 | } | |
1557 | ||
03f61bf3 | 1558 | |
c5061b15 | 1559 | /* |
47c9dfee | 1560 | * 'get_foldmap()' - Get Unicode case folding map to cache. |
c5061b15 | 1561 | */ |
03f61bf3 | 1562 | |
1563 | static int /* O - Zero or -1 on error */ | |
1564 | get_foldmap(const cups_folding_t fold) /* I - Case folding type */ | |
c5061b15 | 1565 | { |
03f61bf3 | 1566 | int i; /* Looping variable */ |
1567 | cups_utf32_t unichar1; /* Unicode character value */ | |
1568 | cups_utf32_t unichar2; /* Unicode character value */ | |
1569 | cups_utf32_t unichar3; /* Unicode character value */ | |
1570 | cups_utf32_t unichar4; /* Unicode character value */ | |
2625096f | 1571 | _cups_fold_map_t *fmap; /* Unicode Case Folding Map */ |
03f61bf3 | 1572 | int foldcount; /* Count of Unicode Source Chars */ |
1573 | cups_ucs2_t *uni2fold; /* Unicode -> Folded Char(s) */ | |
03f61bf3 | 1574 | char *mapname; /* Case Folding map name */ |
1575 | char filename[1024]; /* Filename for charset map file */ | |
1576 | cups_file_t *fp; /* Case Folding map file pointer */ | |
1577 | char *s; /* Line parsing pointer */ | |
1578 | char line[256]; /* Line from input map file */ | |
2625096f | 1579 | _cups_globals_t *cg = _cupsGlobals(); |
03f61bf3 | 1580 | /* Pointer to library globals */ |
1581 | ||
c5061b15 | 1582 | |
1583 | /* | |
1584 | * See if we already have this case folding map loaded... | |
1585 | */ | |
03f61bf3 | 1586 | |
1587 | for (fmap = cg->foldmap_cache; fmap != NULL; fmap = fmap->next) | |
c5061b15 | 1588 | if (fmap->fold == fold) |
1589 | return (0); | |
1590 | ||
1591 | /* | |
a501ad17 | 1592 | * Get the mapping name... |
c5061b15 | 1593 | */ |
03f61bf3 | 1594 | |
c5061b15 | 1595 | switch (fold) |
1596 | { | |
47c9dfee | 1597 | case CUPS_FOLD_SIMPLE: /* Simple case folding */ |
03f61bf3 | 1598 | mapname = "uni-fold.txt"; |
1599 | break; | |
47c9dfee | 1600 | case CUPS_FOLD_FULL: /* Full case folding */ |
03f61bf3 | 1601 | mapname = "uni-full.txt"; |
1602 | break; | |
c5061b15 | 1603 | default: |
03f61bf3 | 1604 | return (-1); |
c5061b15 | 1605 | } |
1606 | ||
1607 | /* | |
1608 | * Open case folding map input file... | |
1609 | */ | |
03f61bf3 | 1610 | |
47c9dfee | 1611 | snprintf(filename, sizeof(filename), "%s/charmaps/%s", |
a501ad17 | 1612 | cg->cups_datadir, mapname); |
c5061b15 | 1613 | if ((foldcount = get_map_count(filename)) <= 0) |
1614 | return (-1); | |
03f61bf3 | 1615 | fp = cupsFileOpen(filename, "r"); |
c5061b15 | 1616 | if (fp == NULL) |
1617 | return (-1); | |
1618 | ||
1619 | /* | |
1620 | * Allocate memory for case folding map and add to cache... | |
1621 | */ | |
2625096f | 1622 | fmap = (_cups_fold_map_t *)calloc(1, sizeof(_cups_fold_map_t)); |
c5061b15 | 1623 | if (fmap == NULL) |
1624 | { | |
03f61bf3 | 1625 | cupsFileClose(fp); |
c5061b15 | 1626 | return (-1); |
1627 | } | |
03f61bf3 | 1628 | uni2fold = (cups_ucs2_t *)calloc(1, sizeof(cups_ucs2_t) * 4 * foldcount); |
c5061b15 | 1629 | if (uni2fold == NULL) |
1630 | { | |
03f61bf3 | 1631 | free(fmap); |
1632 | cupsFileClose(fp); | |
c5061b15 | 1633 | return (-1); |
1634 | } | |
03f61bf3 | 1635 | fmap->next = cg->foldmap_cache; |
1636 | cg->foldmap_cache = fmap; | |
c5061b15 | 1637 | fmap->used ++; |
1638 | fmap->fold = fold; | |
1639 | fmap->foldcount = foldcount; | |
1640 | fmap->uni2fold = uni2fold; | |
1641 | ||
1642 | /* | |
1643 | * Save case folding map into memory for later use... | |
1644 | */ | |
03f61bf3 | 1645 | |
c5061b15 | 1646 | for (i = 0; i < foldcount; ) |
1647 | { | |
03f61bf3 | 1648 | s = cupsFileGets(fp, line, sizeof(line)); |
c5061b15 | 1649 | if (s == NULL) |
1650 | break; | |
1651 | if ((*s == '#') || (*s == '\n') || (*s == '\0')) | |
1652 | continue; | |
1653 | unichar1 = unichar2 = unichar3 = unichar4 = 0; | |
1654 | if ((fold == CUPS_FOLD_SIMPLE) | |
1655 | && (sscanf(s, "%lx %lx", &unichar1, &unichar2) != 2)) | |
1656 | break; | |
1657 | if ((fold == CUPS_FOLD_FULL) | |
1658 | && (sscanf(s, "%lx %lx %lx %lx", | |
47c9dfee | 1659 | &unichar1, &unichar2, &unichar3, &unichar4) != 4)) |
c5061b15 | 1660 | break; |
1661 | if ((unichar1 > 0xffff) | |
1662 | || (unichar2 > 0xffff) | |
1663 | || (unichar3 > 0xffff) | |
1664 | || (unichar4 > 0xffff)) | |
1665 | break; | |
4a95bc63 | 1666 | *uni2fold ++ = (cups_ucs2_t) unichar1; |
1667 | *uni2fold ++ = (cups_ucs2_t) unichar2; | |
1668 | *uni2fold ++ = (cups_ucs2_t) unichar3; | |
1669 | *uni2fold ++ = (cups_ucs2_t) unichar4; | |
c5061b15 | 1670 | i ++; |
1671 | } | |
1672 | if (i < foldcount) | |
1673 | fmap->foldcount = i; | |
03f61bf3 | 1674 | cupsFileClose(fp); |
c5061b15 | 1675 | return (0); |
1676 | } | |
1677 | ||
1678 | /* | |
47c9dfee | 1679 | * 'get_propmap()' - Get Unicode character property map to cache. |
c5061b15 | 1680 | */ |
03f61bf3 | 1681 | |
1682 | static int /* O - Zero or -1 on error */ | |
c5061b15 | 1683 | get_propmap(void) |
1684 | { | |
03f61bf3 | 1685 | int i, j; /* Looping variables */ |
b296d7d9 | 1686 | size_t len; /* String length */ |
03f61bf3 | 1687 | cups_utf32_t unichar; /* Unicode character value */ |
1688 | cups_gencat_t gencat; /* General Category Value */ | |
2625096f | 1689 | cups_bidi_t bidicat; /* Bidi Category Value */ |
1690 | _cups_prop_map_t *pmap; /* Unicode Char Property Map */ | |
03f61bf3 | 1691 | int propcount; /* Count of Unicode Source Chars */ |
2625096f | 1692 | _cups_prop_t *uni2prop; /* Unicode Char -> Properties */ |
03f61bf3 | 1693 | char *mapname; /* Char Property map name */ |
1694 | char filename[1024]; /* Filename for charset map file */ | |
1695 | cups_file_t *fp; /* Char Property map file pointer */ | |
1696 | char *s; /* Line parsing pointer */ | |
1697 | char line[256]; /* Line from input map file */ | |
2625096f | 1698 | _cups_globals_t *cg = _cupsGlobals(); |
03f61bf3 | 1699 | /* Pointer to library globals */ |
1700 | ||
c5061b15 | 1701 | |
1702 | /* | |
1703 | * See if we already have this char properties map loaded... | |
1704 | */ | |
03f61bf3 | 1705 | |
1706 | if ((pmap = cg->propmap_cache) != NULL) | |
c5061b15 | 1707 | return (0); |
1708 | ||
1709 | /* | |
a501ad17 | 1710 | * Get the mapping name... |
c5061b15 | 1711 | */ |
03f61bf3 | 1712 | |
c5061b15 | 1713 | mapname = "uni-prop.txt"; |
1714 | ||
1715 | /* | |
1716 | * Open char properties map input file... | |
1717 | */ | |
47c9dfee | 1718 | snprintf(filename, sizeof(filename), "%s/charmaps/%s", |
a501ad17 | 1719 | cg->cups_datadir, mapname); |
c5061b15 | 1720 | if ((propcount = get_map_count(filename)) <= 0) |
1721 | return (-1); | |
03f61bf3 | 1722 | fp = cupsFileOpen(filename, "r"); |
c5061b15 | 1723 | if (fp == NULL) |
1724 | return (-1); | |
1725 | ||
1726 | /* | |
1727 | * Allocate memory for char properties map and add to cache... | |
1728 | */ | |
2625096f | 1729 | pmap = (_cups_prop_map_t *)calloc(1, sizeof(_cups_prop_map_t)); |
c5061b15 | 1730 | if (pmap == NULL) |
1731 | { | |
03f61bf3 | 1732 | cupsFileClose(fp); |
c5061b15 | 1733 | return (-1); |
1734 | } | |
2625096f | 1735 | uni2prop = (_cups_prop_t *)calloc(1, sizeof(_cups_prop_t) * propcount); |
c5061b15 | 1736 | if (uni2prop == NULL) |
1737 | { | |
03f61bf3 | 1738 | free(pmap); |
1739 | cupsFileClose(fp); | |
c5061b15 | 1740 | return (-1); |
1741 | } | |
03f61bf3 | 1742 | cg->propmap_cache = pmap; |
c5061b15 | 1743 | pmap->used ++; |
1744 | pmap->propcount = propcount; | |
1745 | pmap->uni2prop = uni2prop; | |
1746 | ||
1747 | /* | |
1748 | * Save char properties map into memory for later use... | |
1749 | */ | |
1750 | for (i = 0; i < propcount; ) | |
1751 | { | |
03f61bf3 | 1752 | s = cupsFileGets(fp, line, sizeof(line)); |
c5061b15 | 1753 | if (s == NULL) |
1754 | break; | |
1755 | if (strlen(s) > 0) | |
1756 | *(s + strlen(s) - 1) = '\0'; | |
47c9dfee | 1757 | if ((*s == '#') || (*s == '\n') || (*s == '\0')) |
c5061b15 | 1758 | continue; |
1759 | if (sscanf(s, "%lx", &unichar) != 1) | |
1760 | break; | |
1761 | if (unichar > 0xffff) | |
1762 | break; | |
1763 | while ((*s != '\0') && (*s != ';')) | |
1764 | s ++; | |
1765 | if (*s != ';') | |
1766 | break; | |
1767 | s ++; | |
1768 | for (j = 0; gencat_index[j].str != NULL; j ++) | |
1769 | { | |
1770 | len = strlen(gencat_index[j].str); | |
1771 | if (strncmp (s, gencat_index[j].str, len) == 0) | |
47c9dfee | 1772 | break; |
c5061b15 | 1773 | } |
1774 | if (gencat_index[j].str == NULL) | |
1775 | return (-1); | |
1776 | gencat = gencat_index[j].gencat; | |
1777 | while ((*s != '\0') && (*s != ';')) | |
1778 | s ++; | |
1779 | if (*s != ';') | |
1780 | break; | |
1781 | s ++; | |
1782 | for (j = 0; bidicat_index[j] != NULL; j ++) | |
1783 | { | |
1784 | len = strlen(bidicat_index[j]); | |
1785 | if (strncmp (s, bidicat_index[j], len) == 0) | |
47c9dfee | 1786 | break; |
c5061b15 | 1787 | } |
1788 | if (bidicat_index[j] == NULL) | |
1789 | return (-1); | |
2625096f | 1790 | bidicat = (cups_bidi_t) j; |
4a95bc63 | 1791 | uni2prop->ch = (cups_ucs2_t) unichar; |
c5061b15 | 1792 | uni2prop->gencat = (unsigned char) gencat; |
1793 | uni2prop->bidicat = (unsigned char) bidicat; | |
1794 | uni2prop ++; | |
1795 | i ++; | |
1796 | } | |
1797 | if (i < propcount) | |
1798 | pmap->propcount = i; | |
03f61bf3 | 1799 | cupsFileClose(fp); |
c5061b15 | 1800 | return (0); |
1801 | } | |
1802 | ||
03f61bf3 | 1803 | |
c5061b15 | 1804 | /* |
47c9dfee | 1805 | * 'get_combmap()' - Get Unicode combining class map to cache. |
c5061b15 | 1806 | */ |
03f61bf3 | 1807 | |
1808 | static int /* O - Zero or -1 on error */ | |
c5061b15 | 1809 | get_combmap(void) |
1810 | { | |
03f61bf3 | 1811 | int i; /* Looping variable */ |
1812 | cups_utf32_t unichar; /* Unicode character value */ | |
1813 | int combclass; /* Unicode char combining class */ | |
2625096f | 1814 | _cups_comb_map_t *cmap; /* Unicode Comb Class Map */ |
03f61bf3 | 1815 | int combcount; /* Count of Unicode Source Chars */ |
2625096f | 1816 | _cups_comb_t *uni2comb; /* Unicode Char -> Combining Class */ |
03f61bf3 | 1817 | char *mapname; /* Comb Class map name */ |
1818 | char filename[1024]; /* Filename for charset map file */ | |
1819 | cups_file_t *fp; /* Comb Class map file pointer */ | |
1820 | char *s; /* Line parsing pointer */ | |
1821 | char line[256]; /* Line from input map file */ | |
2625096f | 1822 | _cups_globals_t *cg = _cupsGlobals(); |
03f61bf3 | 1823 | /* Pointer to library globals */ |
1824 | ||
c5061b15 | 1825 | |
1826 | /* | |
1827 | * See if we already have this combining class map loaded... | |
1828 | */ | |
03f61bf3 | 1829 | |
1830 | if ((cmap = cg->combmap_cache) != NULL) | |
c5061b15 | 1831 | return (0); |
1832 | ||
1833 | /* | |
a501ad17 | 1834 | * Get the mapping name... |
c5061b15 | 1835 | */ |
03f61bf3 | 1836 | |
c5061b15 | 1837 | mapname = "uni-comb.txt"; |
1838 | ||
1839 | /* | |
1840 | * Open combining class map input file... | |
1841 | */ | |
03f61bf3 | 1842 | |
47c9dfee | 1843 | snprintf(filename, sizeof(filename), "%s/charmaps/%s", |
a501ad17 | 1844 | cg->cups_datadir, mapname); |
c5061b15 | 1845 | if ((combcount = get_map_count(filename)) <= 0) |
1846 | return (-1); | |
03f61bf3 | 1847 | fp = cupsFileOpen(filename, "r"); |
c5061b15 | 1848 | if (fp == NULL) |
1849 | return (-1); | |
1850 | ||
1851 | /* | |
1852 | * Allocate memory for combining class map and add to cache... | |
1853 | */ | |
03f61bf3 | 1854 | |
2625096f | 1855 | cmap = (_cups_comb_map_t *)calloc(1, sizeof(_cups_comb_map_t)); |
c5061b15 | 1856 | if (cmap == NULL) |
1857 | { | |
03f61bf3 | 1858 | cupsFileClose(fp); |
c5061b15 | 1859 | return (-1); |
1860 | } | |
03f61bf3 | 1861 | |
2625096f | 1862 | uni2comb = (_cups_comb_t *)calloc(1, sizeof(_cups_comb_t) * combcount); |
c5061b15 | 1863 | if (uni2comb == NULL) |
1864 | { | |
03f61bf3 | 1865 | free(cmap); |
1866 | cupsFileClose(fp); | |
c5061b15 | 1867 | return (-1); |
1868 | } | |
03f61bf3 | 1869 | cg->combmap_cache = cmap; |
c5061b15 | 1870 | cmap->used ++; |
1871 | cmap->combcount = combcount; | |
1872 | cmap->uni2comb = uni2comb; | |
1873 | ||
1874 | /* | |
1875 | * Save combining class map into memory for later use... | |
1876 | */ | |
1877 | for (i = 0; i < combcount; ) | |
1878 | { | |
03f61bf3 | 1879 | s = cupsFileGets(fp, line, sizeof(line)); |
c5061b15 | 1880 | if (s == NULL) |
1881 | break; | |
1882 | if ((*s == '#') || (*s == '\n') || (*s == '\0')) | |
1883 | continue; | |
1884 | if (sscanf(s, "%lx", &unichar) != 1) | |
1885 | break; | |
1886 | if (unichar > 0xffff) | |
1887 | break; | |
1888 | while ((*s != '\0') && (*s != ';')) | |
1889 | s ++; | |
1890 | if (*s != ';') | |
1891 | break; | |
1892 | s ++; | |
1893 | if (sscanf(s, "%d", &combclass) != 1) | |
1894 | break; | |
4a95bc63 | 1895 | uni2comb->ch = (cups_ucs2_t) unichar; |
c5061b15 | 1896 | uni2comb->combclass = (unsigned char) combclass; |
1897 | uni2comb ++; | |
1898 | i ++; | |
1899 | } | |
1900 | if (i < combcount) | |
1901 | cmap->combcount = i; | |
03f61bf3 | 1902 | cupsFileClose(fp); |
c5061b15 | 1903 | return (0); |
1904 | } | |
1905 | ||
03f61bf3 | 1906 | |
c5061b15 | 1907 | /* |
47c9dfee | 1908 | * 'get_breakmap()' - Get Unicode line break class map to cache. |
c5061b15 | 1909 | */ |
03f61bf3 | 1910 | |
1911 | static int /* O - Zero or -1 on error */ | |
c5061b15 | 1912 | get_breakmap(void) |
1913 | { | |
03f61bf3 | 1914 | int i, j; /* Looping variables */ |
1915 | int len; /* String length */ | |
1916 | cups_utf32_t unichar1; /* Unicode character value */ | |
1917 | cups_utf32_t unichar2; /* Unicode character value */ | |
67871650 | 1918 | cups_break_class_t breakclass; /* Unicode char line break class */ |
2625096f | 1919 | _cups_break_map_t *bmap; /* Unicode Line Break Class Map */ |
03f61bf3 | 1920 | int breakcount; /* Count of Unicode Source Chars */ |
1921 | cups_ucs2_t *uni2break; /* Unicode -> Line Break Class */ | |
03f61bf3 | 1922 | char *mapname; /* Comb Class map name */ |
1923 | char filename[1024]; /* Filename for charset map file */ | |
1924 | cups_file_t *fp; /* Comb Class map file pointer */ | |
1925 | char *s; /* Line parsing pointer */ | |
1926 | char line[256]; /* Line from input map file */ | |
2625096f | 1927 | _cups_globals_t *cg = _cupsGlobals(); |
03f61bf3 | 1928 | /* Pointer to library globals */ |
1929 | ||
c5061b15 | 1930 | |
1931 | /* | |
1932 | * See if we already have this line break class map loaded... | |
1933 | */ | |
03f61bf3 | 1934 | |
1935 | if ((bmap = cg->breakmap_cache) != NULL) | |
c5061b15 | 1936 | return (0); |
1937 | ||
1938 | /* | |
a501ad17 | 1939 | * Get the mapping name... |
c5061b15 | 1940 | */ |
03f61bf3 | 1941 | |
c5061b15 | 1942 | mapname = "uni-line.txt"; |
1943 | ||
1944 | /* | |
1945 | * Open line break class map input file... | |
1946 | */ | |
03f61bf3 | 1947 | |
47c9dfee | 1948 | snprintf(filename, sizeof(filename), "%s/charmaps/%s", |
a501ad17 | 1949 | cg->cups_datadir, mapname); |
c5061b15 | 1950 | if ((breakcount = get_map_count(filename)) <= 0) |
1951 | return (-1); | |
03f61bf3 | 1952 | fp = cupsFileOpen(filename, "r"); |
c5061b15 | 1953 | if (fp == NULL) |
1954 | return (-1); | |
1955 | ||
1956 | /* | |
1957 | * Allocate memory for line break class map and add to cache... | |
1958 | */ | |
03f61bf3 | 1959 | |
2625096f | 1960 | bmap = (_cups_break_map_t *)calloc(1, sizeof(_cups_break_map_t)); |
c5061b15 | 1961 | if (bmap == NULL) |
1962 | { | |
03f61bf3 | 1963 | cupsFileClose(fp); |
c5061b15 | 1964 | return (-1); |
1965 | } | |
03f61bf3 | 1966 | |
1967 | uni2break = (cups_ucs2_t *)calloc(1, sizeof(cups_ucs2_t) * 3 * breakcount); | |
c5061b15 | 1968 | if (uni2break == NULL) |
1969 | { | |
03f61bf3 | 1970 | free(bmap); |
1971 | cupsFileClose(fp); | |
c5061b15 | 1972 | return (-1); |
1973 | } | |
03f61bf3 | 1974 | cg->breakmap_cache = bmap; |
c5061b15 | 1975 | bmap->used ++; |
1976 | bmap->breakcount = breakcount; | |
1977 | bmap->uni2break = uni2break; | |
1978 | ||
1979 | /* | |
1980 | * Save line break class map into memory for later use... | |
1981 | */ | |
1982 | for (i = 0; i < breakcount; ) | |
1983 | { | |
03f61bf3 | 1984 | s = cupsFileGets(fp, line, sizeof(line)); |
c5061b15 | 1985 | if (s == NULL) |
1986 | break; | |
1987 | if (strlen(s) > 0) | |
1988 | *(s + strlen(s) - 1) = '\0'; | |
47c9dfee | 1989 | if ((*s == '#') || (*s == '\n') || (*s == '\0')) |
c5061b15 | 1990 | continue; |
1991 | if (sscanf(s, "%lx %lx", &unichar1, &unichar2) != 2) | |
1992 | break; | |
1993 | if ((unichar1 > 0xffff) | |
1994 | || (unichar2 > 0xffff)) | |
1995 | break; | |
1996 | while ((*s != '\0') && (*s != ';')) | |
1997 | s ++; | |
1998 | if (*s != ';') | |
1999 | break; | |
2000 | s ++; | |
2001 | for (j = 0; break_index[j].str != NULL; j ++) | |
2002 | { | |
2003 | len = strlen (break_index[j].str); | |
2004 | if (strncmp (s, break_index[j].str, len) == 0) | |
47c9dfee | 2005 | break; |
c5061b15 | 2006 | } |
2007 | if (break_index[j].str == NULL) | |
2008 | return (-1); | |
2009 | breakclass = break_index[j].breakclass; | |
4a95bc63 | 2010 | *uni2break ++ = (cups_ucs2_t) unichar1; |
2011 | *uni2break ++ = (cups_ucs2_t) unichar2; | |
2012 | *uni2break ++ = (cups_ucs2_t) breakclass; | |
c5061b15 | 2013 | i ++; |
2014 | } | |
2015 | if (i < breakcount) | |
2016 | bmap->breakcount = i; | |
03f61bf3 | 2017 | cupsFileClose(fp); |
c5061b15 | 2018 | return (0); |
2019 | } | |
2020 | ||
03f61bf3 | 2021 | |
c5061b15 | 2022 | /* |
2023 | * 'compare_compose()' - Compare key for compose match. | |
2024 | * | |
2025 | * Note - This function cannot be easily modified for 32-bit Unicode. | |
2026 | */ | |
03f61bf3 | 2027 | |
2028 | static int /* O - Result of comparison */ | |
2029 | compare_compose(const void *k1, /* I - Key char */ | |
2030 | const void *k2) /* I - Map char */ | |
c5061b15 | 2031 | { |
03f61bf3 | 2032 | cups_utf32_t *kp = (cups_utf32_t *)k1; |
2033 | /* Key char pointer */ | |
2034 | cups_ucs2_t *mp = (cups_ucs2_t *)k2;/* Map char pointer */ | |
2035 | unsigned long key; /* Pair of key characters */ | |
2036 | unsigned long map; /* Pair of map characters */ | |
2037 | int result; /* Result Value */ | |
2038 | ||
c5061b15 | 2039 | |
2040 | key = (*kp << 16); | |
2041 | key |= *(kp + 1); | |
2042 | map = (unsigned long) (*mp << 16); | |
2043 | map |= (unsigned long) *(mp + 1); | |
03f61bf3 | 2044 | |
c5061b15 | 2045 | if (key >= map) |
2046 | result = (int) (key - map); | |
2047 | else | |
2048 | result = -1 * ((int) (map - key)); | |
03f61bf3 | 2049 | |
c5061b15 | 2050 | return (result); |
2051 | } | |
2052 | ||
03f61bf3 | 2053 | |
c5061b15 | 2054 | /* |
2055 | * 'compare_decompose()' - Compare key for decompose match. | |
2056 | */ | |
03f61bf3 | 2057 | |
2058 | static int /* O - Result of comparison */ | |
2059 | compare_decompose(const void *k1, /* I - Key char */ | |
2060 | const void *k2) /* I - Map char */ | |
c5061b15 | 2061 | { |
03f61bf3 | 2062 | cups_utf32_t *kp = (cups_utf32_t *)k1; |
2063 | /* Key char pointer */ | |
2064 | cups_ucs2_t *mp = (cups_ucs2_t *)k2;/* Map char pointer */ | |
2065 | cups_ucs2_t ch; /* Key char as UCS-2 */ | |
2066 | int result; /* Result Value */ | |
2067 | ||
c5061b15 | 2068 | |
4a95bc63 | 2069 | ch = (cups_ucs2_t) *kp; |
03f61bf3 | 2070 | |
c5061b15 | 2071 | if (ch >= *mp) |
2072 | result = (int) (ch - *mp); | |
2073 | else | |
2074 | result = -1 * ((int) (*mp - ch)); | |
03f61bf3 | 2075 | |
c5061b15 | 2076 | return (result); |
2077 | } | |
2078 | ||
03f61bf3 | 2079 | |
c5061b15 | 2080 | /* |
2081 | * 'compare_foldchar()' - Compare key for case fold match. | |
2082 | */ | |
03f61bf3 | 2083 | |
2084 | static int /* O - Result of comparison */ | |
2085 | compare_foldchar(const void *k1, /* I - Key char */ | |
2086 | const void *k2) /* I - Map char */ | |
c5061b15 | 2087 | { |
03f61bf3 | 2088 | cups_utf32_t *kp = (cups_utf32_t *)k1; |
2089 | /* Key char pointer */ | |
2090 | cups_ucs2_t *mp = (cups_ucs2_t *)k2;/* Map char pointer */ | |
2091 | cups_ucs2_t ch; /* Key char as UCS-2 */ | |
2092 | int result; /* Result Value */ | |
2093 | ||
c5061b15 | 2094 | |
4a95bc63 | 2095 | ch = (cups_ucs2_t) *kp; |
03f61bf3 | 2096 | |
c5061b15 | 2097 | if (ch >= *mp) |
2098 | result = (int) (ch - *mp); | |
2099 | else | |
2100 | result = -1 * ((int) (*mp - ch)); | |
03f61bf3 | 2101 | |
c5061b15 | 2102 | return (result); |
2103 | } | |
2104 | ||
03f61bf3 | 2105 | |
c5061b15 | 2106 | /* |
2107 | * 'compare_combchar()' - Compare key for combining char match. | |
2108 | */ | |
03f61bf3 | 2109 | |
2110 | static int /* O - Result of comparison */ | |
2111 | compare_combchar(const void *k1, /* I - Key char */ | |
2112 | const void *k2) /* I - Map char */ | |
c5061b15 | 2113 | { |
03f61bf3 | 2114 | cups_utf32_t *kp = (cups_utf32_t *)k1; |
2115 | /* Key char pointer */ | |
2625096f | 2116 | _cups_comb_t *cp = (_cups_comb_t *)k2;/* Combining map row pointer */ |
03f61bf3 | 2117 | cups_ucs2_t ch; /* Key char as UCS-2 */ |
2118 | int result; /* Result Value */ | |
2119 | ||
c5061b15 | 2120 | |
4a95bc63 | 2121 | ch = (cups_ucs2_t) *kp; |
03f61bf3 | 2122 | |
c5061b15 | 2123 | if (ch >= cp->ch) |
2124 | result = (int) (ch - cp->ch); | |
2125 | else | |
2126 | result = -1 * ((int) (cp->ch - ch)); | |
03f61bf3 | 2127 | |
c5061b15 | 2128 | return (result); |
2129 | } | |
2130 | ||
03f61bf3 | 2131 | |
c5061b15 | 2132 | /* |
2133 | * 'compare_breakchar()' - Compare key for line break char match. | |
2134 | */ | |
03f61bf3 | 2135 | |
2136 | static int /* O - Result of comparison */ | |
2137 | compare_breakchar(const void *k1, /* I - Key char */ | |
2138 | const void *k2) /* I - Map char */ | |
c5061b15 | 2139 | { |
03f61bf3 | 2140 | cups_utf32_t *kp = (cups_utf32_t *)k1; |
2141 | /* Key char pointer */ | |
2142 | cups_ucs2_t *mp = (cups_ucs2_t *)k2;/* Map char pointer */ | |
2143 | cups_ucs2_t ch; /* Key char as UCS-2 */ | |
2144 | int result; /* Result Value */ | |
2145 | ||
c5061b15 | 2146 | |
4a95bc63 | 2147 | ch = (cups_ucs2_t) *kp; |
03f61bf3 | 2148 | |
c5061b15 | 2149 | if (ch < *mp) |
2150 | result = -1 * (int) (*mp - ch); | |
2151 | else if (ch > *(mp + 1)) | |
2152 | result = (int) (ch - *(mp + 1)); | |
2153 | else | |
2154 | result = 0; | |
03f61bf3 | 2155 | |
c5061b15 | 2156 | return (result); |
2157 | } | |
2158 | ||
03f61bf3 | 2159 | |
c5061b15 | 2160 | /* |
2161 | * 'compare_propchar()' - Compare key for property char match. | |
2162 | */ | |
03f61bf3 | 2163 | |
2164 | static int /* O - Result of comparison */ | |
2165 | compare_propchar(const void *k1, /* I - Key char */ | |
2166 | const void *k2) /* I - Map char */ | |
c5061b15 | 2167 | { |
03f61bf3 | 2168 | cups_utf32_t *kp = (cups_utf32_t *)k1; |
2169 | /* Key char pointer */ | |
2625096f | 2170 | _cups_prop_t *pp = (_cups_prop_t *)k2;/* Property map row pointer */ |
03f61bf3 | 2171 | cups_ucs2_t ch; /* Key char as UCS-2 */ |
2172 | int result; /* Result Value */ | |
2173 | ||
c5061b15 | 2174 | |
4a95bc63 | 2175 | ch = (cups_ucs2_t) *kp; |
03f61bf3 | 2176 | |
c5061b15 | 2177 | if (ch >= pp->ch) |
2178 | result = (int) (ch - pp->ch); | |
2179 | else | |
2180 | result = -1 * ((int) (pp->ch - ch)); | |
03f61bf3 | 2181 | |
c5061b15 | 2182 | return (result); |
2183 | } | |
2184 | ||
03f61bf3 | 2185 | |
c5061b15 | 2186 | /* |
c9d3f842 | 2187 | * End of "$Id$" |
c5061b15 | 2188 | */ |