]> git.ipfire.org Git - thirdparty/cups.git/blame - cups/normalize.c
File cleanup.
[thirdparty/cups.git] / cups / normalize.c
CommitLineData
c5061b15 1/*
c9d3f842 2 * "$Id$"
c5061b15 3 *
47c9dfee 4 * Unicode normalization for the Common UNIX Printing System (CUPS).
c5061b15 5 *
24c1b5ce 6 * Copyright 1997-2006 by Easy Software Products.
c5061b15 7 *
8 * These coded instructions, statements, and computer programs are
9 * the property of Easy Software Products and are protected by Federal
10 * copyright law. Distribution and use rights are outlined in the
11 * file "LICENSE.txt" which should have been included with this file.
12 * If this file is missing or damaged please contact Easy Software
13 * Products at:
14 *
47c9dfee 15 * Attn: CUPS Licensing Information
16 * Easy Software Products
17 * 44141 Airport View Drive, Suite 204
c9d3f842 18 * Hollywood, Maryland 20636 USA
c5061b15 19 *
9639c4de 20 * Voice: (301) 373-9600
47c9dfee 21 * EMail: cups-info@cups.org
22 * WWW: http://www.cups.org
c5061b15 23 *
24 * Contents:
25 *
47c9dfee 26 * cupsNormalizeMapsGet() - Get all norm maps to cache.
27 * cupsNormalizeMapsFree() - Free all norm maps in cache.
28 * cupsNormalizeMapsFlush() - Flush all norm maps in cache.
158e5e17 29 * _cupsNormalizeMapsFlush() - Flush all normalization maps in cache.
47c9dfee 30 * cupsUTF8Normalize() - Normalize UTF-8 string.
31 * cupsUTF32Normalize() - Normalize UTF-32 string.
32 * cupsUTF8CaseFold() - Case fold UTF-8 string.
33 * cupsUTF32CaseFold() - Case fold UTF-32 string.
34 * cupsUTF8CompareCaseless() - Compare case folded UTF-8 strings.
35 * cupsUTF32CompareCaseless() - Compare case folded UTF-32 strings.
4a95bc63 36 * cupsUTF8CompareIdentifier() - Compare folded NFKC UTF-8 strings.
37 * cupsUTF32CompareIdentifier() - Compare folded NFKC UTF-32 strings.
47c9dfee 38 * cupsUTF32CharacterProperty() - Get UTF-32 character property.
39 * get_general_category() - Get UTF-32 Char General Category.
40 * get_bidi_category() - Get UTF-32 Char Bidi Category.
41 * get_combining_class() - Get UTF-32 Char Combining Class.
42 * get_break_class() - Get UTF-32 Char Line Break Class.
43 * get_map_count() - Count lines in a map file.
44 * get_normmap() - Get Unicode norm map to cache.
45 * get_foldmap() - Get Unicode casefold map to cache.
46 * get_propmap() - Get Unicode property map to cache.
47 * get_combmap() - Get Unicode combining map to cache.
48 * get_breakmap() - Get Unicode break map to cache.
49 * compare_compose() - Compare key for compose match.
50 * compare_decompose() - Compare key for decompose match.
51 * compare_foldchar() - Compare key for case fold match.
52 * compare_combchar() - Compare key for combining match.
53 * compare_breakchar() - Compare key for line break match.
54 * compare_propchar() - Compare key for property char match.
c5061b15 55 */
56
57/*
58 * Include necessary headers...
59 */
60
03f61bf3 61#include "globals.h"
62#include "debug.h"
c5061b15 63#include <stdlib.h>
c5061b15 64#include <errno.h>
c5061b15 65#include <time.h>
66
c5061b15 67
03f61bf3 68typedef struct /**** General Category Index Struct****/
c5061b15 69{
03f61bf3 70 cups_gencat_t gencat; /* General Category Value */
71 const char *str; /* General Category String */
c5061b15 72} gencat_t;
73
03f61bf3 74static const gencat_t gencat_index[] = /* General Category Index */
c5061b15 75{
03f61bf3 76 { CUPS_GENCAT_LU, "Lu" }, /* Letter, Uppercase */
77 { CUPS_GENCAT_LL, "Ll" }, /* Letter, Lowercase */
78 { CUPS_GENCAT_LT, "Lt" }, /* Letter, Titlecase */
79 { CUPS_GENCAT_LM, "Lm" }, /* Letter, Modifier */
80 { CUPS_GENCAT_LO, "Lo" }, /* Letter, Other */
81 { CUPS_GENCAT_MN, "Mn" }, /* Mark, Non-Spacing */
82 { CUPS_GENCAT_MC, "Mc" }, /* Mark, Spacing Combining */
83 { CUPS_GENCAT_ME, "Me" }, /* Mark, Enclosing */
84 { CUPS_GENCAT_ND, "Nd" }, /* Number, Decimal Digit */
85 { CUPS_GENCAT_NL, "Nl" }, /* Number, Letter */
86 { CUPS_GENCAT_NO, "No" }, /* Number, Other */
87 { CUPS_GENCAT_PC, "Pc" }, /* Punctuation, Connector */
88 { CUPS_GENCAT_PD, "Pd" }, /* Punctuation, Dash */
89 { CUPS_GENCAT_PS, "Ps" }, /* Punctuation, Open (start) */
90 { CUPS_GENCAT_PE, "Pe" }, /* Punctuation, Close (end) */
91 { CUPS_GENCAT_PI, "Pi" }, /* Punctuation, Initial Quote */
92 { CUPS_GENCAT_PF, "Pf" }, /* Punctuation, Final Quote */
93 { CUPS_GENCAT_PO, "Po" }, /* Punctuation, Other */
94 { CUPS_GENCAT_SM, "Sm" }, /* Symbol, Math */
95 { CUPS_GENCAT_SC, "Sc" }, /* Symbol, Currency */
96 { CUPS_GENCAT_SK, "Sk" }, /* Symbol, Modifier */
97 { CUPS_GENCAT_SO, "So" }, /* Symbol, Other */
98 { CUPS_GENCAT_ZS, "Zs" }, /* Separator, Space */
99 { CUPS_GENCAT_ZL, "Zl" }, /* Separator, Line */
100 { CUPS_GENCAT_ZP, "Zp" }, /* Separator, Paragraph */
101 { CUPS_GENCAT_CC, "Cc" }, /* Other, Control */
102 { CUPS_GENCAT_CF, "Cf" }, /* Other, Format */
103 { CUPS_GENCAT_CS, "Cs" }, /* Other, Surrogate */
104 { CUPS_GENCAT_CO, "Co" }, /* Other, Private Use */
105 { CUPS_GENCAT_CN, "Cn" }, /* Other, Not Assigned */
c5061b15 106 { 0, NULL }
107};
108
03f61bf3 109static const char * const bidicat_index[] =
110 /* Bidi Category Index */
c5061b15 111{
03f61bf3 112 "L", /* Left-to-Right (Alpha, Syllabic, Ideographic) */
113 "LRE", /* Left-to-Right Embedding (explicit) */
114 "LRO", /* Left-to-Right Override (explicit) */
115 "R", /* Right-to-Left (Hebrew alphabet and most punct) */
116 "AL", /* Right-to-Left Arabic (Arabic, Thaana, Syriac) */
117 "RLE", /* Right-to-Left Embedding (explicit) */
118 "RLO", /* Right-to-Left Override (explicit) */
119 "PDF", /* Pop Directional Format */
120 "EN", /* Euro Number (Euro and East Arabic-Indic digits) */
121 "ES", /* Euro Number Separator (Slash) */
122 "ET", /* Euro Number Termintor (Plus, Minus, Degree, etc) */
123 "AN", /* Arabic Number (Arabic-Indic digits, separators) */
124 "CS", /* Common Number Separator (Colon, Comma, Dot, etc) */
125 "NSM", /* Non-Spacing Mark (category Mn / Me in UCD) */
126 "BN", /* Boundary Neutral (Formatting / Control chars) */
127 "B", /* Paragraph Separator */
128 "S", /* Segment Separator (Tab) */
129 "WS", /* Whitespace Space (Space, Line Separator, etc) */
130 "ON", /* Other Neutrals */
c5061b15 131 NULL
132};
133
03f61bf3 134typedef struct /**** Line Break Class Index Struct****/
c5061b15 135{
67871650 136 cups_break_class_t breakclass; /* Line Break Class Value */
03f61bf3 137 const char *str; /* Line Break Class String */
2625096f 138} _cups_break_t;
c5061b15 139
2625096f 140static const _cups_break_t break_index[] = /* Line Break Class Index */
c5061b15 141{
03f61bf3 142 { CUPS_BREAK_AI, "AI" }, /* Ambiguous (Alphabetic or Ideograph) */
143 { CUPS_BREAK_AL, "AL" }, /* Ordinary Alpha/Symbol Chars (XP) */
144 { CUPS_BREAK_BA, "BA" }, /* Break Opportunity After Chars (A) */
145 { CUPS_BREAK_BB, "BB" }, /* Break Opportunities Before Chars (B) */
146 { CUPS_BREAK_B2, "B2" }, /* Break Opportunity Either (B/A/XP) */
147 { CUPS_BREAK_BK, "BK" }, /* Mandatory Break (A) (norm) */
148 { CUPS_BREAK_CB, "CB" }, /* Contingent Break (B/A) (norm) */
149 { CUPS_BREAK_CL, "CL" }, /* Closing Punctuation (XB) */
150 { CUPS_BREAK_CM, "CM" }, /* Attached/Combining (XB) (norm) */
151 { CUPS_BREAK_CR, "CR" }, /* Carriage Return (A) (norm) */
152 { CUPS_BREAK_EX, "EX" }, /* Exclamation / Interrogation (XB) */
153 { CUPS_BREAK_GL, "GL" }, /* Non-breaking ("Glue") (XB/XA) (norm) */
154 { CUPS_BREAK_HY, "HY" }, /* Hyphen (XA) */
155 { CUPS_BREAK_ID, "ID" }, /* Ideographic (B/A) */
156 { CUPS_BREAK_IN, "IN" }, /* Inseparable chars (XP) */
157 { CUPS_BREAK_IS, "IS" }, /* Numeric Separator (Infix) (XB) */
158 { CUPS_BREAK_LF, "LF" }, /* Line Feed (A) (norm) */
159 { CUPS_BREAK_NS, "NS" }, /* Non-starters (XB) */
160 { CUPS_BREAK_NU, "NU" }, /* Numeric (XP) */
161 { CUPS_BREAK_OP, "OP" }, /* Opening Punctuation (XA) */
162 { CUPS_BREAK_PO, "PO" }, /* Postfix (Numeric) (XB) */
163 { CUPS_BREAK_PR, "PR" }, /* Prefix (Numeric) (XA) */
164 { CUPS_BREAK_QU, "QU" }, /* Ambiguous Quotation (XB/XA) */
165 { CUPS_BREAK_SA, "SA" }, /* Context Dependent (SE Asian) (P) */
166 { CUPS_BREAK_SG, "SG" }, /* Surrogates (XP) (norm) */
167 { CUPS_BREAK_SP, "SP" }, /* Space (A) (norm) */
168 { CUPS_BREAK_SY, "SY" }, /* Symbols Allowing Break After (A) */
169 { CUPS_BREAK_XX, "XX" }, /* Unknown (XP) */
170 { CUPS_BREAK_ZW, "ZW" }, /* Zero Width Space (A) (norm) */
c5061b15 171 { 0, NULL }
172};
173
47c9dfee 174/*
175 * Prototypes...
176 */
177
03f61bf3 178static int compare_breakchar(const void *k1, const void *k2);
179static int compare_combchar(const void *k1, const void *k2);
180static int compare_compose(const void *k1, const void *k2);
181static int compare_decompose(const void *k1, const void *k2);
182static int compare_foldchar(const void *k1, const void *k2);
183static int compare_propchar(const void *k1, const void *k2);
4a95bc63 184static int get_bidi_category(const cups_utf32_t ch);
4a95bc63 185static int get_break_class(const cups_utf32_t ch);
03f61bf3 186static int get_breakmap(void);
187static int get_combining_class(const cups_utf32_t ch);
188static int get_combmap(void);
189static int get_foldmap(const cups_folding_t fold);
4a95bc63 190static int get_general_category(const cups_utf32_t ch);
47c9dfee 191static int get_map_count(const char *filename);
192static int get_normmap(const cups_normalize_t normalize);
c5061b15 193static int get_propmap(void);
03f61bf3 194
c5061b15 195
196/*
47c9dfee 197 * 'cupsNormalizeMapsGet()' - Get all normalization maps to cache.
c5061b15 198 */
03f61bf3 199
200int /* O - Zero or -1 on error */
c5061b15 201cupsNormalizeMapsGet(void)
202{
2625096f 203 _cups_norm_map_t *nmap; /* Unicode Normalization Map */
204 _cups_fold_map_t *fmap; /* Unicode Case Folding Map */
205 _cups_globals_t *cg = _cupsGlobals();
03f61bf3 206 /* Pointer to library globals */
207
c5061b15 208
209 /*
210 * See if we already have normalization maps loaded...
211 */
03f61bf3 212
213 if (cg->normmap_cache)
c5061b15 214 {
03f61bf3 215 for (nmap = cg->normmap_cache; nmap != NULL; nmap = nmap->next)
c5061b15 216 nmap->used ++;
03f61bf3 217
218 for (fmap = cg->foldmap_cache; fmap != NULL; fmap = fmap->next)
c5061b15 219 fmap->used ++;
03f61bf3 220
221 if (cg->combmap_cache)
222 cg->combmap_cache->used ++;
223
224 if (cg->propmap_cache)
225 cg->propmap_cache->used ++;
226
227 if (cg->breakmap_cache)
228 cg->breakmap_cache->used ++;
229
c5061b15 230 return (0);
231 }
232
233 /*
234 * Get normalization maps...
235 */
03f61bf3 236
c5061b15 237 if (get_normmap(CUPS_NORM_NFD) < 0)
238 return (-1);
03f61bf3 239
c5061b15 240 if (get_normmap(CUPS_NORM_NFKD) < 0)
241 return (-1);
03f61bf3 242
c5061b15 243 if (get_normmap(CUPS_NORM_NFC) < 0)
244 return (-1);
245
246 /*
247 * Get case folding, combining class, character property maps...
248 */
03f61bf3 249
c5061b15 250 if (get_foldmap(CUPS_FOLD_SIMPLE) < 0)
251 return (-1);
03f61bf3 252
c5061b15 253 if (get_foldmap(CUPS_FOLD_FULL) < 0)
254 return (-1);
03f61bf3 255
c5061b15 256 if (get_propmap() < 0)
257 return (-1);
03f61bf3 258
c5061b15 259 if (get_combmap() < 0)
260 return (-1);
03f61bf3 261
c5061b15 262 if (get_breakmap() < 0)
263 return (-1);
03f61bf3 264
c5061b15 265 return (0);
266}
267
03f61bf3 268
c5061b15 269/*
270 * 'cupsNormalizeMapsFree()' - Free all normalization maps in cache.
271 *
272 * This does not actually free; use 'cupsNormalizeMapsFlush()' for that.
273 */
03f61bf3 274
275int /* O - Zero or -1 on error */
c5061b15 276cupsNormalizeMapsFree(void)
277{
2625096f 278 _cups_norm_map_t *nmap; /* Unicode Normalization Map */
279 _cups_fold_map_t *fmap; /* Unicode Case Folding Map */
280 _cups_globals_t *cg = _cupsGlobals();
03f61bf3 281 /* Pointer to library globals */
282
c5061b15 283
284 /*
285 * See if we already have normalization maps loaded...
286 */
03f61bf3 287
288 if (cg->normmap_cache == NULL)
c5061b15 289 return (-1);
03f61bf3 290
291 for (nmap = cg->normmap_cache; nmap != NULL; nmap = nmap->next)
c5061b15 292 if (nmap->used > 0)
293 nmap->used --;
03f61bf3 294
295 for (fmap = cg->foldmap_cache; fmap != NULL; fmap = fmap->next)
c5061b15 296 if (fmap->used > 0)
297 fmap->used --;
03f61bf3 298
299 if (cg->propmap_cache && (cg->propmap_cache->used > 0))
300 cg->propmap_cache->used --;
301
302 if (cg->combmap_cache && (cg->combmap_cache->used > 0))
303 cg->combmap_cache->used --;
304
305 if (cg->breakmap_cache && (cg->breakmap_cache->used > 0))
306 cg->breakmap_cache->used --;
307
c5061b15 308 return (0);
309}
310
03f61bf3 311
c5061b15 312/*
313 * 'cupsNormalizeMapsFlush()' - Flush all normalization maps in cache.
314 */
03f61bf3 315
c5061b15 316void
317cupsNormalizeMapsFlush(void)
158e5e17 318{
319 _cupsNormalizeMapsFlush(_cupsGlobals());
320}
321
322
323/*
324 * '_cupsNormalizeMapsFlush()' - Flush all normalization maps in cache.
325 */
326
327void
328_cupsNormalizeMapsFlush(
329 _cups_globals_t *cg) /* I - Global data */
c5061b15 330{
2625096f 331 _cups_norm_map_t *nmap; /* Unicode Normalization Map */
332 _cups_norm_map_t *nextnorm; /* Next Unicode Normalization Map */
333 _cups_fold_map_t *fmap; /* Unicode Case Folding Map */
334 _cups_fold_map_t *nextfold; /* Next Unicode Case Folding Map */
03f61bf3 335
c5061b15 336
337 /*
338 * Flush all normalization maps...
339 */
03f61bf3 340
341 for (nmap = cg->normmap_cache; nmap != NULL; nmap = nextnorm)
c5061b15 342 {
343 free(nmap->uni2norm);
344 nextnorm = nmap->next;
345 free(nmap);
346 }
03f61bf3 347
348 cg->normmap_cache = NULL;
349
350 for (fmap = cg->foldmap_cache; fmap != NULL; fmap = nextfold)
c5061b15 351 {
352 free(fmap->uni2fold);
353 nextfold = fmap->next;
354 free(fmap);
355 }
03f61bf3 356
357 cg->foldmap_cache = NULL;
358
359 if (cg->propmap_cache)
c5061b15 360 {
03f61bf3 361 free(cg->propmap_cache->uni2prop);
362 free(cg->propmap_cache);
363 cg->propmap_cache = NULL;
c5061b15 364 }
03f61bf3 365
366 if (cg->combmap_cache)
c5061b15 367 {
03f61bf3 368 free(cg->combmap_cache->uni2comb);
369 free(cg->combmap_cache);
370 cg->combmap_cache = NULL;
c5061b15 371 }
03f61bf3 372
373 if (cg->breakmap_cache)
c5061b15 374 {
03f61bf3 375 free(cg->breakmap_cache->uni2break);
376 free(cg->breakmap_cache);
377 cg->breakmap_cache = NULL;
c5061b15 378 }
c5061b15 379}
380
03f61bf3 381
c5061b15 382/*
4a95bc63 383 * 'cupsUTF8Normalize()' - Normalize UTF-8 string.
c5061b15 384 *
385 * Normalize UTF-8 string to Unicode UAX-15 Normalization Form
386 * Note - Compatibility Normalization Forms (NFKD/NFKC) are
387 * unsafe for subsequent transcoding to legacy charsets
388 */
03f61bf3 389
390int /* O - Count or -1 on error */
391cupsUTF8Normalize(
392 cups_utf8_t *dest, /* O - Target string */
393 const cups_utf8_t *src, /* I - Source string */
394 const int maxout, /* I - Max output */
395 const cups_normalize_t normalize) /* I - Normalization */
c5061b15 396{
03f61bf3 397 int len; /* String length */
398 cups_utf32_t work1[CUPS_MAX_USTRING];/* First internal UCS-4 string */
399 cups_utf32_t work2[CUPS_MAX_USTRING];/* Second internal UCS-4 string */
400
c5061b15 401
402 /*
403 * Check for valid arguments and clear output...
404 */
03f61bf3 405
406 if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
c5061b15 407 return (-1);
03f61bf3 408
c5061b15 409 *dest = 0;
410
411 /*
412 * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
413 */
03f61bf3 414
4a95bc63 415 len = cupsUTF8ToUTF32(work1, src, CUPS_MAX_USTRING);
03f61bf3 416
c5061b15 417 if (len < 0)
418 return (-1);
419
420 /*
421 * Normalize internal UCS-4 to second internal UCS-4...
422 */
03f61bf3 423
4a95bc63 424 len = cupsUTF32Normalize(work2, work1, CUPS_MAX_USTRING, normalize);
03f61bf3 425
c5061b15 426 if (len < 0)
427 return (-1);
428
429 /*
430 * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
431 */
03f61bf3 432
4a95bc63 433 len = cupsUTF32ToUTF8(dest, work2, maxout);
03f61bf3 434
c5061b15 435 return (len);
436}
437
03f61bf3 438
c5061b15 439/*
4a95bc63 440 * 'cupsUTF32Normalize()' - Normalize UTF-32 string.
c5061b15 441 *
442 * Normalize UTF-32 string to Unicode UAX-15 Normalization Form
443 * Note - Compatibility Normalization Forms (NFKD/NFKC) are
444 * unsafe for subsequent transcoding to legacy charsets
445 */
03f61bf3 446
447int /* O - Count or -1 on error */
448cupsUTF32Normalize(
449 cups_utf32_t *dest, /* O - Target string */
450 const cups_utf32_t *src, /* I - Source string */
451 const int maxout, /* I - Max output */
452 const cups_normalize_t normalize) /* I - Normalization */
c5061b15 453{
03f61bf3 454 int i; /* Looping variable */
455 int result; /* Result Value */
456 cups_ucs2_t *mp; /* Map char pointer */
457 int pass; /* Pass count for each transform */
458 int hit; /* Hit count from binary search */
459 cups_utf32_t unichar1; /* Unicode character value */
460 cups_utf32_t unichar2; /* Unicode character value */
2625096f 461 _cups_comb_class_t class1; /* First Combining Class */
462 _cups_comb_class_t class2; /* Second Combining Class */
03f61bf3 463 int len; /* String length */
464 cups_utf32_t work1[CUPS_MAX_USTRING];
465 /* First internal UCS-4 string */
466 cups_utf32_t work2[CUPS_MAX_USTRING];
467 /* Second internal UCS-4 string */
468 cups_utf32_t *p1; /* First UCS-4 string pointer */
469 cups_utf32_t *p2; /* Second UCS-4 string pointer */
2625096f 470 _cups_norm_map_t *nmap; /* Unicode Normalization Map */
03f61bf3 471 cups_normalize_t decompose; /* Decomposition Type */
2625096f 472 _cups_globals_t *cg = _cupsGlobals();
03f61bf3 473 /* Pointer to library globals */
474
c5061b15 475
476 /*
477 * Check for valid arguments and clear output...
478 */
03f61bf3 479
480 if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
c5061b15 481 return (-1);
03f61bf3 482
c5061b15 483 *dest = 0;
03f61bf3 484
c5061b15 485 result = cupsNormalizeMapsGet();
03f61bf3 486
c5061b15 487 if (result < 0)
488 return (-1);
489
490 /*
491 * Find decomposition map...
492 */
03f61bf3 493
c5061b15 494 switch (normalize)
495 {
496 case CUPS_NORM_NFD:
497 case CUPS_NORM_NFC:
03f61bf3 498 decompose = CUPS_NORM_NFD;
499 break;
500
c5061b15 501 case CUPS_NORM_NFKD:
502 case CUPS_NORM_NFKC:
03f61bf3 503 decompose = CUPS_NORM_NFKD;
504 break;
505
c5061b15 506 default:
03f61bf3 507 return (-1);
c5061b15 508 }
03f61bf3 509
510 for (nmap = cg->normmap_cache; nmap != NULL; nmap = nmap->next)
c5061b15 511 if (nmap->normalize == decompose)
512 break;
03f61bf3 513
c5061b15 514 if (nmap == NULL)
515 return (-1);
516
517 /*
518 * Copy input to internal buffer...
519 */
03f61bf3 520
c5061b15 521 p1 = &work1[0];
03f61bf3 522
c5061b15 523 for (i = 0; i < CUPS_MAX_USTRING; i ++)
524 {
525 if (*src == 0)
526 break;
03f61bf3 527
c5061b15 528 *p1 ++ = *src ++;
529 }
03f61bf3 530
c5061b15 531 *p1 = 0;
532 len = i;
533
534 /*
535 * Decompose until no further decomposition...
536 */
03f61bf3 537
47c9dfee 538 for (pass = 0; pass < 20; pass ++)
c5061b15 539 {
540 p1 = &work1[0];
541 p2 = &work2[0];
03f61bf3 542
c5061b15 543 for (hit = 0; *p1 != 0; p1 ++)
544 {
545 /*
546 * Check for decomposition defined...
547 */
03f61bf3 548
549 mp = (cups_ucs2_t *)bsearch(p1, nmap->uni2norm, nmap->normcount,
550 (sizeof(cups_ucs2_t) * 3), compare_decompose);
c5061b15 551 if (mp == NULL)
552 {
47c9dfee 553 *p2 ++ = *p1;
554 continue;
c5061b15 555 }
556
557 /*
558 * Decompose input character to one or two output characters...
559 */
03f61bf3 560
c5061b15 561 hit ++;
562 mp ++;
4a95bc63 563 *p2 ++ = (cups_utf32_t) *mp ++;
03f61bf3 564
c5061b15 565 if (*mp != 0)
47c9dfee 566 *p2 ++ = (cups_utf32_t) *mp;
c5061b15 567 }
03f61bf3 568
c5061b15 569 *p2 = 0;
03f61bf3 570 len = (int)(p2 - &work2[0]);
c5061b15 571
572 /*
573 * Check for decomposition finished...
574 */
575 if (hit == 0)
576 break;
4a95bc63 577 memcpy (work1, work2, sizeof(cups_utf32_t) * (len + 1));
c5061b15 578 }
579
580 /*
581 * Canonical reorder until no further reordering...
582 */
03f61bf3 583
47c9dfee 584 for (pass = 0; pass < 20; pass ++)
c5061b15 585 {
586 p1 = &work1[0];
03f61bf3 587
c5061b15 588 for (hit = 0; *p1 != 0; p1 ++)
589 {
590 /*
591 * Check for combining characters to reorder...
592 */
03f61bf3 593
c5061b15 594 unichar1 = *p1;
595 unichar2 = *(p1 + 1);
03f61bf3 596
c5061b15 597 if (unichar2 == 0)
47c9dfee 598 break;
03f61bf3 599
c5061b15 600 class1 = get_combining_class(unichar1);
601 class2 = get_combining_class(unichar2);
03f61bf3 602
c5061b15 603 if ((class1 < 0) || (class2 < 0))
47c9dfee 604 return (-1);
03f61bf3 605
c5061b15 606 if ((class1 == 0) || (class2 == 0))
47c9dfee 607 continue;
03f61bf3 608
47c9dfee 609 if (class1 <= class2)
610 continue;
c5061b15 611
612 /*
613 * Swap two combining characters...
614 */
03f61bf3 615
616 *p1 = unichar2;
617 p1 ++;
618 *p1 = unichar1;
619 hit ++;
c5061b15 620 }
03f61bf3 621
c5061b15 622 if (hit == 0)
623 break;
624 }
625
626 /*
627 * Check for decomposition only...
628 */
03f61bf3 629
630 if (normalize == CUPS_NORM_NFD || normalize == CUPS_NORM_NFKD)
c5061b15 631 {
03f61bf3 632 memcpy(dest, work1, sizeof(cups_utf32_t) * (len + 1));
c5061b15 633 return (len);
634 }
635
636 /*
637 * Find composition map...
638 */
03f61bf3 639
640 for (nmap = cg->normmap_cache; nmap != NULL; nmap = nmap->next)
c5061b15 641 if (nmap->normalize == CUPS_NORM_NFC)
642 break;
03f61bf3 643
c5061b15 644 if (nmap == NULL)
645 return (-1);
646
647 /*
648 * Compose until no further composition...
649 */
03f61bf3 650
47c9dfee 651 for (pass = 0; pass < 20; pass ++)
c5061b15 652 {
653 p1 = &work1[0];
654 p2 = &work2[0];
03f61bf3 655
c5061b15 656 for (hit = 0; *p1 != 0; p1 ++)
657 {
658 /*
659 * Check for composition defined...
660 */
03f61bf3 661
c5061b15 662 unichar1 = *p1;
663 unichar2 = *(p1 + 1);
03f61bf3 664
c5061b15 665 if (unichar2 == 0)
666 {
47c9dfee 667 *p2 ++ = unichar1;
668 break;
c5061b15 669 }
03f61bf3 670
671 mp = (cups_ucs2_t *)bsearch(p1, nmap->uni2norm, nmap->normcount,
672 (sizeof(cups_ucs2_t) * 3), compare_compose);
c5061b15 673 if (mp == NULL)
674 {
47c9dfee 675 *p2 ++ = *p1;
676 continue;
c5061b15 677 }
678
679 /*
680 * Compose two input characters to one output character...
681 */
03f61bf3 682
c5061b15 683 hit ++;
684 mp += 2;
4a95bc63 685 *p2 ++ = (cups_utf32_t) *mp;
c5061b15 686 p1 ++;
687 }
03f61bf3 688
c5061b15 689 *p2 = 0;
690 len = (int) (p2 - &work2[0]);
691
692 /*
693 * Check for composition finished...
694 */
03f61bf3 695
c5061b15 696 if (hit == 0)
697 break;
03f61bf3 698
4a95bc63 699 memcpy (work1, work2, sizeof(cups_utf32_t) * (len + 1));
c5061b15 700 }
03f61bf3 701
4a95bc63 702 memcpy (dest, work1, sizeof(cups_utf32_t) * (len + 1));
03f61bf3 703
c5061b15 704 cupsNormalizeMapsFree();
03f61bf3 705
c5061b15 706 return (len);
707}
708
03f61bf3 709
c5061b15 710/*
4a95bc63 711 * 'cupsUTF8CaseFold()' - Case fold UTF-8 string.
c5061b15 712 *
713 * Case Fold UTF-8 string per Unicode UAX-21 Section 2.3
714 * Note - Case folding output is
715 * unsafe for subsequent transcoding to legacy charsets
716 */
03f61bf3 717
718int /* O - Count or -1 on error */
719cupsUTF8CaseFold(
720 cups_utf8_t *dest, /* O - Target string */
721 const cups_utf8_t *src, /* I - Source string */
722 const int maxout, /* I - Max output */
723 const cups_folding_t fold) /* I - Fold Mode */
c5061b15 724{
03f61bf3 725 int len; /* String length */
726 cups_utf32_t work1[CUPS_MAX_USTRING];/* First internal UCS-4 string */
727 cups_utf32_t work2[CUPS_MAX_USTRING];/* Second internal UCS-4 string */
728
c5061b15 729
730 /*
731 * Check for valid arguments and clear output...
732 */
03f61bf3 733
734 if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
c5061b15 735 return (-1);
03f61bf3 736
c5061b15 737 *dest = 0;
03f61bf3 738
739 if (fold != CUPS_FOLD_SIMPLE && fold != CUPS_FOLD_FULL)
c5061b15 740 return (-1);
741
742 /*
743 * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
744 */
03f61bf3 745
4a95bc63 746 len = cupsUTF8ToUTF32(work1, src, CUPS_MAX_USTRING);
03f61bf3 747
c5061b15 748 if (len < 0)
749 return (-1);
750
751 /*
752 * Case Fold internal UCS-4 to second internal UCS-4...
753 */
03f61bf3 754
4a95bc63 755 len = cupsUTF32CaseFold(work2, work1, CUPS_MAX_USTRING, fold);
03f61bf3 756
c5061b15 757 if (len < 0)
758 return (-1);
759
760 /*
761 * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
762 */
03f61bf3 763
4a95bc63 764 len = cupsUTF32ToUTF8(dest, work2, maxout);
03f61bf3 765
c5061b15 766 return (len);
767}
768
03f61bf3 769
c5061b15 770/*
4a95bc63 771 * 'cupsUTF32CaseFold()' - Case fold UTF-32 string.
c5061b15 772 *
773 * Case Fold UTF-32 string per Unicode UAX-21 Section 2.3
774 * Note - Case folding output is
775 * unsafe for subsequent transcoding to legacy charsets
776 */
03f61bf3 777
778int /* O - Count or -1 on error */
779cupsUTF32CaseFold(
780 cups_utf32_t *dest, /* O - Target string */
781 const cups_utf32_t *src, /* I - Source string */
782 const int maxout, /* I - Max output */
783 const cups_folding_t fold) /* I - Fold Mode */
c5061b15 784{
03f61bf3 785 cups_utf32_t *start = dest; /* Start of destination string */
786 int i; /* Looping variable */
787 int result; /* Result Value */
788 cups_ucs2_t *mp; /* Map char pointer */
2625096f 789 _cups_fold_map_t *fmap; /* Unicode Case Folding Map */
790 _cups_globals_t *cg = _cupsGlobals();
03f61bf3 791 /* Pointer to library globals */
792
c5061b15 793
794 /*
795 * Check for valid arguments and clear output...
796 */
03f61bf3 797
798 if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
c5061b15 799 return (-1);
03f61bf3 800
c5061b15 801 *dest = 0;
03f61bf3 802
803 if (fold != CUPS_FOLD_SIMPLE && fold != CUPS_FOLD_FULL)
c5061b15 804 return (-1);
805
806 /*
807 * Find case folding map...
808 */
03f61bf3 809
c5061b15 810 result = cupsNormalizeMapsGet();
03f61bf3 811
c5061b15 812 if (result < 0)
813 return (-1);
03f61bf3 814
815 for (fmap = cg->foldmap_cache; fmap != NULL; fmap = fmap->next)
c5061b15 816 if (fmap->fold == fold)
817 break;
03f61bf3 818
c5061b15 819 if (fmap == NULL)
820 return (-1);
821
822 /*
823 * Case fold input string to output string...
824 */
03f61bf3 825
c5061b15 826 for (i = 0; i < (maxout - 1); i ++, src ++)
827 {
828 /*
829 * Check for case folding defined...
830 */
03f61bf3 831
832 mp = (cups_ucs2_t *)bsearch(src, fmap->uni2fold, fmap->foldcount,
833 (sizeof(cups_ucs2_t) * 4), compare_foldchar);
c5061b15 834 if (mp == NULL)
835 {
836 *dest ++ = *src;
837 continue;
838 }
839
840 /*
841 * Case fold input character to one or two output characters...
842 */
03f61bf3 843
c5061b15 844 mp ++;
4a95bc63 845 *dest ++ = (cups_utf32_t) *mp ++;
03f61bf3 846
847 if (*mp != 0 && fold == CUPS_FOLD_FULL)
c5061b15 848 {
849 i ++;
03f61bf3 850 if (i >= (maxout - 1))
47c9dfee 851 break;
03f61bf3 852
4a95bc63 853 *dest ++ = (cups_utf32_t) *mp;
c5061b15 854 }
855 }
03f61bf3 856
c5061b15 857 *dest = 0;
03f61bf3 858
c5061b15 859 cupsNormalizeMapsFree();
03f61bf3 860
b296d7d9 861 return ((int)(dest - start));
c5061b15 862}
863
03f61bf3 864
c5061b15 865/*
4a95bc63 866 * 'cupsUTF8CompareCaseless()' - Compare case folded UTF-8 strings.
c5061b15 867 */
03f61bf3 868
869int /* O - Difference of strings */
870cupsUTF8CompareCaseless(
871 const cups_utf8_t *s1, /* I - String1 */
872 const cups_utf8_t *s2) /* I - String2 */
c5061b15 873{
03f61bf3 874 int difference; /* Difference of two strings */
875 int len; /* String length */
876 cups_utf32_t work1[CUPS_MAX_USTRING];/* First internal UCS-4 string */
877 cups_utf32_t work2[CUPS_MAX_USTRING];/* Second internal UCS-4 string */
878
c5061b15 879
880 /*
881 * Check for valid arguments...
882 */
03f61bf3 883
884 if (!s1 || !s2)
c5061b15 885 return (-1);
886
887 /*
888 * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
889 */
03f61bf3 890
4a95bc63 891 len = cupsUTF8ToUTF32(work1, s1, CUPS_MAX_USTRING);
03f61bf3 892
c5061b15 893 if (len < 0)
894 return (-1);
03f61bf3 895
4a95bc63 896 len = cupsUTF8ToUTF32(work2, s2, CUPS_MAX_USTRING);
03f61bf3 897
c5061b15 898 if (len < 0)
899 return (-1);
900
901 /*
902 * Compare first internal UCS-4 to second internal UCS-4...
903 */
03f61bf3 904
4a95bc63 905 difference = cupsUTF32CompareCaseless(work1, work2);
03f61bf3 906
c5061b15 907 return (difference);
908}
909
03f61bf3 910
c5061b15 911/*
4a95bc63 912 * 'cupsUTF32CompareCaseless()' - Compare case folded UTF-32 strings.
c5061b15 913 */
03f61bf3 914
915int /* O - Difference of strings */
916cupsUTF32CompareCaseless(
917 const cups_utf32_t *s1, /* I - String1 */
918 const cups_utf32_t *s2) /* I - String2 */
c5061b15 919{
03f61bf3 920 int difference; /* Difference of two strings */
921 int len; /* String length */
922 cups_folding_t fold = CUPS_FOLD_FULL;
923 /* Case folding mode */
924 cups_utf32_t fold1[CUPS_MAX_USTRING];
925 /* First UCS-4 folded string */
926 cups_utf32_t fold2[CUPS_MAX_USTRING];
927 /* Second UCS-4 folded string */
928 cups_utf32_t *p1; /* First UCS-4 string pointer */
929 cups_utf32_t *p2; /* Second UCS-4 string pointer */
930
c5061b15 931
932 /*
933 * Check for valid arguments...
934 */
03f61bf3 935
936 if (!s1 || !s2)
c5061b15 937 return (-1);
938
939 /*
940 * Case Fold input UTF-32 strings to internal UCS-4 strings...
941 */
03f61bf3 942
4a95bc63 943 len = cupsUTF32CaseFold(fold1, s1, CUPS_MAX_USTRING, fold);
03f61bf3 944
c5061b15 945 if (len < 0)
946 return (-1);
03f61bf3 947
4a95bc63 948 len = cupsUTF32CaseFold(fold2, s2, CUPS_MAX_USTRING, fold);
03f61bf3 949
c5061b15 950 if (len < 0)
951 return (-1);
952
953 /*
954 * Compare first internal UCS-4 to second internal UCS-4...
955 */
03f61bf3 956
c5061b15 957 p1 = &fold1[0];
958 p2 = &fold2[0];
03f61bf3 959
c5061b15 960 for (;; p1 ++, p2 ++)
961 {
962 difference = (int) (*p1 - *p2);
03f61bf3 963
c5061b15 964 if (difference != 0)
965 break;
03f61bf3 966
c5061b15 967 if ((*p1 == 0) && (*p2 == 0))
968 break;
969 }
03f61bf3 970
c5061b15 971 return (difference);
972}
973
03f61bf3 974
c5061b15 975/*
4a95bc63 976 * 'cupsUTF8CompareIdentifier()' - Compare folded NFKC UTF-8 strings.
c5061b15 977 */
03f61bf3 978
979int /* O - Result of comparison */
980cupsUTF8CompareIdentifier(
981 const cups_utf8_t *s1, /* I - String1 */
982 const cups_utf8_t *s2) /* I - String2 */
c5061b15 983{
03f61bf3 984 int difference; /* Difference of two strings */
985 int len; /* String length */
986 cups_utf32_t work1[CUPS_MAX_USTRING];/* First internal UCS-4 string */
987 cups_utf32_t work2[CUPS_MAX_USTRING];/* Second internal UCS-4 string */
988
c5061b15 989
990 /*
991 * Check for valid arguments...
992 */
03f61bf3 993
994 if (!s1 || !s2)
c5061b15 995 return (-1);
996
997 /*
998 * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
999 */
03f61bf3 1000
4a95bc63 1001 len = cupsUTF8ToUTF32(work1, s1, CUPS_MAX_USTRING);
03f61bf3 1002
c5061b15 1003 if (len < 0)
1004 return (-1);
03f61bf3 1005
4a95bc63 1006 len = cupsUTF8ToUTF32(work2, s2, CUPS_MAX_USTRING);
03f61bf3 1007
c5061b15 1008 if (len < 0)
1009 return (-1);
1010
1011 /*
1012 * Compare first internal UCS-4 to second internal UCS-4...
1013 */
03f61bf3 1014
4a95bc63 1015 difference = cupsUTF32CompareIdentifier(work1, work2);
03f61bf3 1016
c5061b15 1017 return (difference);
1018}
1019
03f61bf3 1020
c5061b15 1021/*
4a95bc63 1022 * 'cupsUTF32CompareIdentifier()' - Compare folded NFKC UTF-32 strings.
c5061b15 1023 */
03f61bf3 1024
1025int /* O - Result of comparison */
1026cupsUTF32CompareIdentifier(
1027 const cups_utf32_t *s1, /* I - String1 */
1028 const cups_utf32_t *s2) /* I - String2 */
c5061b15 1029{
03f61bf3 1030 int difference; /* Difference of two strings */
1031 int len; /* String length */
1032 cups_folding_t fold = CUPS_FOLD_FULL;
1033 /* Case folding mode */
1034 cups_utf32_t fold1[CUPS_MAX_USTRING];
1035 /* First UCS-4 folded string */
1036 cups_utf32_t fold2[CUPS_MAX_USTRING];
1037 /* Second UCS-4 folded string */
1038 cups_normalize_t normalize = CUPS_NORM_NFKC;
1039 /* Normalization form */
1040 cups_utf32_t norm1[CUPS_MAX_USTRING];
1041 /* First UCS-4 normalized string */
1042 cups_utf32_t norm2[CUPS_MAX_USTRING];
1043 /* Second UCS-4 normalized string */
1044 cups_utf32_t *p1; /* First UCS-4 string pointer */
1045 cups_utf32_t *p2; /* Second UCS-4 string pointer */
1046
c5061b15 1047
1048 /*
1049 * Check for valid arguments...
1050 */
03f61bf3 1051
1052 if (!s1 || !s2)
c5061b15 1053 return (-1);
1054
1055 /*
1056 * Case Fold input UTF-32 strings to internal UCS-4 strings...
1057 */
03f61bf3 1058
4a95bc63 1059 len = cupsUTF32CaseFold(fold1, s1, CUPS_MAX_USTRING, fold);
03f61bf3 1060
c5061b15 1061 if (len < 0)
1062 return (-1);
03f61bf3 1063
4a95bc63 1064 len = cupsUTF32CaseFold(fold2, s2, CUPS_MAX_USTRING, fold);
03f61bf3 1065
c5061b15 1066 if (len < 0)
1067 return (-1);
1068
1069 /*
1070 * Normalize internal UCS-4 strings to NFKC...
1071 */
03f61bf3 1072
4a95bc63 1073 len = cupsUTF32Normalize(norm1, fold1, CUPS_MAX_USTRING, normalize);
03f61bf3 1074
c5061b15 1075 if (len < 0)
1076 return (-1);
03f61bf3 1077
4a95bc63 1078 len = cupsUTF32Normalize(norm2, fold2, CUPS_MAX_USTRING, normalize);
03f61bf3 1079
c5061b15 1080 if (len < 0)
1081 return (-1);
1082
1083 /*
1084 * Compare first internal UCS-4 to second internal UCS-4...
1085 */
03f61bf3 1086
c5061b15 1087 p1 = &norm1[0];
1088 p2 = &norm2[0];
03f61bf3 1089
c5061b15 1090 for (;; p1 ++, p2 ++)
1091 {
1092 difference = (int) (*p1 - *p2);
03f61bf3 1093
c5061b15 1094 if (difference != 0)
1095 break;
03f61bf3 1096
c5061b15 1097 if ((*p1 == 0) && (*p2 == 0))
1098 break;
1099 }
03f61bf3 1100
c5061b15 1101 return (difference);
1102}
1103
03f61bf3 1104
c5061b15 1105/*
47c9dfee 1106 * 'cupsUTF32CharacterProperty()' - Get UTF-32 character property.
c5061b15 1107 */
03f61bf3 1108
1109int /* O - Result of comparison */
1110cupsUTF32CharacterProperty(
1111 const cups_utf32_t ch, /* I - Source char */
1112 const cups_property_t prop) /* I - Char Property */
c5061b15 1113{
03f61bf3 1114 int result; /* Result Value */
1115
c5061b15 1116
1117 /*
1118 * Check for valid arguments...
1119 */
03f61bf3 1120
c5061b15 1121 if (ch == 0)
1122 return (-1);
1123
1124 /*
1125 * Find character property...
1126 */
03f61bf3 1127
47c9dfee 1128 switch (prop)
c5061b15 1129 {
1130 case CUPS_PROP_GENERAL_CATEGORY:
03f61bf3 1131 result = (get_general_category(ch));
1132 break;
1133
c5061b15 1134 case CUPS_PROP_BIDI_CATEGORY:
03f61bf3 1135 result = (get_bidi_category(ch));
1136 break;
1137
c5061b15 1138 case CUPS_PROP_COMBINING_CLASS:
03f61bf3 1139 result = (get_combining_class(ch));
1140 break;
c5061b15 1141 case CUPS_PROP_BREAK_CLASS:
03f61bf3 1142 result = (get_break_class(ch));
1143 break;
1144
c5061b15 1145 default:
03f61bf3 1146 return (-1);
c5061b15 1147 }
03f61bf3 1148
c5061b15 1149 return (result);
1150}
1151
03f61bf3 1152
c5061b15 1153/*
1154 * 'get_general_category()' - Get UTF-32 Character General Category.
1155 */
03f61bf3 1156
1157static int /* O - Class or -1 on error */
1158get_general_category(
1159 const cups_utf32_t ch) /* I - Source char */
c5061b15 1160{
03f61bf3 1161 int result; /* Result Value */
1162 cups_gencat_t gencat; /* General Category Value */
2625096f 1163 _cups_prop_map_t *pmap; /* Unicode Property Map */
1164 _cups_prop_t *uni2prop; /* Unicode Char -> Properties */
1165 _cups_globals_t *cg = _cupsGlobals();
03f61bf3 1166 /* Pointer to library globals */
1167
c5061b15 1168
1169 /*
1170 * Check for valid argument...
1171 */
03f61bf3 1172
c5061b15 1173 if (ch == 0)
1174 return (-1);
1175
1176 /*
1177 * Find property map...
1178 */
03f61bf3 1179
c5061b15 1180 result = cupsNormalizeMapsGet();
03f61bf3 1181
c5061b15 1182 if (result < 0)
1183 return (-1);
03f61bf3 1184
1185 pmap = cg->propmap_cache;
1186
c5061b15 1187 if (pmap == NULL)
1188 return (-1);
1189
1190 /*
1191 * Find character in map...
1192 */
03f61bf3 1193
2625096f 1194 uni2prop = (_cups_prop_t *)bsearch(&ch, pmap->uni2prop, pmap->propcount,
1195 (sizeof(_cups_prop_t)), compare_propchar);
03f61bf3 1196
c5061b15 1197 cupsNormalizeMapsFree();
03f61bf3 1198
c5061b15 1199 if (uni2prop == NULL)
47c9dfee 1200 gencat = CUPS_GENCAT_CN; /* Other, Not Assigned */
c5061b15 1201 else
03f61bf3 1202 gencat = (cups_gencat_t)uni2prop->gencat;
1203
1204 result = (int)gencat;
1205
c5061b15 1206 return (result);
1207}
1208
03f61bf3 1209
c5061b15 1210/*
1211 * 'get_bidi_category()' - Get UTF-32 Character Bidi Category.
1212 */
03f61bf3 1213
1214static int /* O - Class or -1 on error */
1215get_bidi_category(const cups_utf32_t ch)/* I - Source char */
c5061b15 1216{
03f61bf3 1217 int result; /* Result Value */
2625096f 1218 cups_bidi_t bidicat; /* Bidi Category Value */
1219 _cups_prop_map_t *pmap; /* Unicode Property Map */
1220 _cups_prop_t *uni2prop; /* Unicode Char -> Properties */
1221 _cups_globals_t *cg = _cupsGlobals();
03f61bf3 1222 /* Pointer to library globals */
1223
c5061b15 1224
1225 /*
1226 * Check for valid argument...
1227 */
03f61bf3 1228
c5061b15 1229 if (ch == 0)
1230 return (-1);
1231
1232 /*
1233 * Find property map...
1234 */
03f61bf3 1235
c5061b15 1236 result = cupsNormalizeMapsGet();
03f61bf3 1237
c5061b15 1238 if (result < 0)
1239 return (-1);
03f61bf3 1240
1241 pmap = cg->propmap_cache;
1242
c5061b15 1243 if (pmap == NULL)
1244 return (-1);
1245
1246 /*
1247 * Find character in map...
1248 */
03f61bf3 1249
2625096f 1250 uni2prop = (_cups_prop_t *)bsearch(&ch, pmap->uni2prop, pmap->propcount,
1251 (sizeof(_cups_prop_t)), compare_propchar);
03f61bf3 1252
c5061b15 1253 cupsNormalizeMapsFree();
03f61bf3 1254
c5061b15 1255 if (uni2prop == NULL)
47c9dfee 1256 bidicat = CUPS_BIDI_ON; /* Other Neutral */
c5061b15 1257 else
2625096f 1258 bidicat = (cups_bidi_t)uni2prop->bidicat;
03f61bf3 1259
1260 result = (int)bidicat;
1261
c5061b15 1262 return (result);
1263}
1264
1265/*
1266 * 'get_combining_class()' - Get UTF-32 Character Combining Class.
1267 *
1268 * Note - Zero is non-combining (base character)
1269 */
03f61bf3 1270
1271static int /* O - Class or -1 on error */
1272get_combining_class(
1273 const cups_utf32_t ch) /* I - Source char */
c5061b15 1274{
03f61bf3 1275 int result; /* Result Value */
2625096f 1276 _cups_comb_map_t *cmap; /* Unicode Combining Class Map */
1277 _cups_comb_class_t combclass; /* Unicode Combining Class */
1278 _cups_comb_t *uni2comb; /* Unicode Char -> Combining Class */
1279 _cups_globals_t *cg = _cupsGlobals();
03f61bf3 1280 /* Pointer to library globals */
1281
c5061b15 1282
1283 /*
1284 * Check for valid argument...
1285 */
03f61bf3 1286
c5061b15 1287 if (ch == 0)
1288 return (-1);
1289
1290 /*
1291 * Find combining class map...
1292 */
03f61bf3 1293
c5061b15 1294 result = cupsNormalizeMapsGet();
03f61bf3 1295
c5061b15 1296 if (result < 0)
1297 return (-1);
03f61bf3 1298
1299 cmap = cg->combmap_cache;
1300
c5061b15 1301 if (cmap == NULL)
1302 return (-1);
1303
1304 /*
1305 * Find combining character in map...
1306 */
03f61bf3 1307
2625096f 1308 uni2comb = (_cups_comb_t *)bsearch(&ch, cmap->uni2comb, cmap->combcount,
1309 (sizeof(_cups_comb_t)), compare_combchar);
03f61bf3 1310
c5061b15 1311 cupsNormalizeMapsFree();
03f61bf3 1312
c5061b15 1313 if (uni2comb == NULL)
1314 combclass = 0;
1315 else
2625096f 1316 combclass = (_cups_comb_class_t)uni2comb->combclass;
03f61bf3 1317
1318 result = (int)combclass;
1319
c5061b15 1320 return (result);
1321}
1322
03f61bf3 1323
c5061b15 1324/*
1325 * 'get_break_class()' - Get UTF-32 Character Line Break Class.
1326 */
03f61bf3 1327
1328static int /* O - Class or -1 on error */
1329get_break_class(const cups_utf32_t ch) /* I - Source char */
c5061b15 1330{
03f61bf3 1331 int result; /* Result Value */
2625096f 1332 _cups_break_map_t *bmap; /* Unicode Line Break Class Map */
67871650 1333 cups_break_class_t breakclass; /* Unicode Line Break Class */
03f61bf3 1334 cups_ucs2_t *uni2break; /* Unicode -> Line Break Class */
2625096f 1335 _cups_globals_t *cg = _cupsGlobals();
03f61bf3 1336 /* Pointer to library globals */
1337
c5061b15 1338
1339 /*
1340 * Check for valid argument...
1341 */
03f61bf3 1342
c5061b15 1343 if (ch == 0)
1344 return (-1);
1345
1346 /*
1347 * Find line break class map...
1348 */
03f61bf3 1349
c5061b15 1350 result = cupsNormalizeMapsGet();
03f61bf3 1351
c5061b15 1352 if (result < 0)
1353 return (-1);
03f61bf3 1354
1355 bmap = cg->breakmap_cache;
1356
c5061b15 1357 if (bmap == NULL)
1358 return (-1);
1359
1360 /*
1361 * Find line break character in map...
1362 */
03f61bf3 1363
1364 uni2break = (cups_ucs2_t *)bsearch(&ch, bmap->uni2break, bmap->breakcount,
1365 (sizeof(cups_ucs2_t) * 3),
1366 compare_breakchar);
1367
c5061b15 1368 cupsNormalizeMapsFree();
03f61bf3 1369
c5061b15 1370 if (uni2break == NULL)
1371 breakclass = CUPS_BREAK_AI;
1372 else
67871650 1373 breakclass = (cups_break_class_t)*(uni2break + 2);
03f61bf3 1374
1375 result = (int)breakclass;
1376
c5061b15 1377 return (result);
1378}
1379
03f61bf3 1380
c5061b15 1381/*
1382 * 'get_map_count()' - Count lines in a map file.
1383 */
03f61bf3 1384
1385static int /* O - Count or -1 on error */
1386get_map_count(const char *filename) /* I - Map Filename */
c5061b15 1387{
03f61bf3 1388 int i; /* Looping variable */
1389 cups_file_t *fp; /* Map input file pointer */
1390 char *s; /* Line parsing pointer */
1391 char line[256]; /* Line from input map file */
1392 cups_utf32_t unichar; /* Unicode character value */
1393
c5061b15 1394
1395 /*
1396 * Open map input file...
1397 */
03f61bf3 1398
1399 if (!filename || !*filename)
c5061b15 1400 return (-1);
03f61bf3 1401
1402 fp = cupsFileOpen(filename, "r");
c5061b15 1403 if (fp == NULL)
1404 return (-1);
1405
1406 /*
1407 * Count lines in map input file...
1408 */
03f61bf3 1409
c5061b15 1410 for (i = 0; i < 50000;)
1411 {
03f61bf3 1412 s = cupsFileGets(fp, line, sizeof(line));
c5061b15 1413 if (s == NULL)
1414 break;
1415 if ((*s == '#') || (*s == '\n') || (*s == '\0'))
1416 continue;
1417 if (strncmp (s, "0x", 2) == 0)
1418 s += 2;
1419 if (sscanf(s, "%lx", &unichar) != 1)
1420 break;
1421 if (unichar > 0xffff)
1422 break;
1423 i ++;
1424 }
1425 if (i == 0)
1426 i = -1;
1427
1428 /*
1429 * Close file and return map count (non-comment line count)...
1430 */
03f61bf3 1431
1432 cupsFileClose(fp);
1433
c5061b15 1434 return (i);
1435}
1436
03f61bf3 1437
c5061b15 1438/*
47c9dfee 1439 * 'get_normmap()' - Get Unicode normalization map to cache.
c5061b15 1440 */
03f61bf3 1441
1442static int /* O - Zero or -1 on error */
1443get_normmap(
1444 const cups_normalize_t normalize) /* I - Normalization Form */
c5061b15 1445{
03f61bf3 1446 int i; /* Looping variable */
1447 cups_utf32_t unichar1; /* Unicode character value */
1448 cups_utf32_t unichar2; /* Unicode character value */
1449 cups_utf32_t unichar3; /* Unicode character value */
2625096f 1450 _cups_norm_map_t *nmap; /* Unicode Normalization Map */
03f61bf3 1451 int normcount; /* Count of Unicode Source Chars */
1452 cups_ucs2_t *uni2norm; /* Unicode Char -> Normalization */
03f61bf3 1453 char *mapname; /* Normalization map name */
1454 char filename[1024]; /* Filename for charset map file */
1455 cups_file_t *fp; /* Normalization map file pointer */
1456 char *s; /* Line parsing pointer */
1457 char line[256]; /* Line from input map file */
2625096f 1458 _cups_globals_t *cg = _cupsGlobals();
03f61bf3 1459 /* Pointer to library globals */
1460
c5061b15 1461
1462 /*
1463 * See if we already have this normalization map loaded...
1464 */
03f61bf3 1465
1466 for (nmap = cg->normmap_cache; nmap != NULL; nmap = nmap->next)
c5061b15 1467 if (nmap->normalize == normalize)
1468 return (0);
1469
1470 /*
a501ad17 1471 * Get the mapping name...
c5061b15 1472 */
03f61bf3 1473
c5061b15 1474 switch (normalize)
1475 {
47c9dfee 1476 case CUPS_NORM_NFD: /* Canonical Decomposition */
03f61bf3 1477 mapname = "uni-nfd.txt";
1478 break;
1479
47c9dfee 1480 case CUPS_NORM_NFKD: /* Compatibility Decomposition */
03f61bf3 1481 mapname = "uni-nfkd.txt";
1482 break;
1483
47c9dfee 1484 case CUPS_NORM_NFC: /* Canonical Composition */
03f61bf3 1485 mapname = "uni-nfc.txt";
1486 break;
1487
47c9dfee 1488 case CUPS_NORM_NFKC: /* no such map file... */
c5061b15 1489 default:
03f61bf3 1490 return (-1);
c5061b15 1491 }
1492
1493 /*
1494 * Open normalization map input file...
1495 */
03f61bf3 1496
47c9dfee 1497 snprintf(filename, sizeof(filename), "%s/charmaps/%s",
a501ad17 1498 cg->cups_datadir, mapname);
c5061b15 1499 if ((normcount = get_map_count(filename)) <= 0)
1500 return (-1);
03f61bf3 1501
1502 fp = cupsFileOpen(filename, "r");
c5061b15 1503 if (fp == NULL)
1504 return (-1);
1505
1506 /*
1507 * Allocate memory for normalization map and add to cache...
1508 */
03f61bf3 1509
2625096f 1510 nmap = (_cups_norm_map_t *)calloc(1, sizeof(_cups_norm_map_t));
c5061b15 1511 if (nmap == NULL)
1512 {
03f61bf3 1513 cupsFileClose(fp);
c5061b15 1514 return (-1);
1515 }
03f61bf3 1516
1517 uni2norm = (cups_ucs2_t *)calloc(1, sizeof(cups_ucs2_t) * 3 * normcount);
c5061b15 1518 if (uni2norm == NULL)
1519 {
03f61bf3 1520 free(nmap);
1521 cupsFileClose(fp);
c5061b15 1522 return (-1);
1523 }
03f61bf3 1524 nmap->next = cg->normmap_cache;
1525 cg->normmap_cache = nmap;
c5061b15 1526 nmap->used ++;
1527 nmap->normalize = normalize;
1528 nmap->normcount = normcount;
1529 nmap->uni2norm = uni2norm;
1530
1531 /*
1532 * Save normalization map into memory for later use...
1533 */
1534 for (i = 0; i < normcount; )
1535 {
03f61bf3 1536 s = cupsFileGets(fp, line, sizeof(line));
c5061b15 1537 if (s == NULL)
1538 break;
1539 if ((*s == '#') || (*s == '\n') || (*s == '\0'))
1540 continue;
1541 if (sscanf(s, "%lx %lx %lx", &unichar1, &unichar2, &unichar3) != 3)
1542 break;
1543 if ((unichar1 > 0xffff)
1544 || (unichar2 > 0xffff)
1545 || (unichar3 > 0xffff))
1546 break;
4a95bc63 1547 *uni2norm ++ = (cups_ucs2_t) unichar1;
1548 *uni2norm ++ = (cups_ucs2_t) unichar2;
1549 *uni2norm ++ = (cups_ucs2_t) unichar3;
c5061b15 1550 i ++;
1551 }
1552 if (i < normcount)
1553 nmap->normcount = i;
03f61bf3 1554 cupsFileClose(fp);
c5061b15 1555 return (0);
1556}
1557
03f61bf3 1558
c5061b15 1559/*
47c9dfee 1560 * 'get_foldmap()' - Get Unicode case folding map to cache.
c5061b15 1561 */
03f61bf3 1562
1563static int /* O - Zero or -1 on error */
1564get_foldmap(const cups_folding_t fold) /* I - Case folding type */
c5061b15 1565{
03f61bf3 1566 int i; /* Looping variable */
1567 cups_utf32_t unichar1; /* Unicode character value */
1568 cups_utf32_t unichar2; /* Unicode character value */
1569 cups_utf32_t unichar3; /* Unicode character value */
1570 cups_utf32_t unichar4; /* Unicode character value */
2625096f 1571 _cups_fold_map_t *fmap; /* Unicode Case Folding Map */
03f61bf3 1572 int foldcount; /* Count of Unicode Source Chars */
1573 cups_ucs2_t *uni2fold; /* Unicode -> Folded Char(s) */
03f61bf3 1574 char *mapname; /* Case Folding map name */
1575 char filename[1024]; /* Filename for charset map file */
1576 cups_file_t *fp; /* Case Folding map file pointer */
1577 char *s; /* Line parsing pointer */
1578 char line[256]; /* Line from input map file */
2625096f 1579 _cups_globals_t *cg = _cupsGlobals();
03f61bf3 1580 /* Pointer to library globals */
1581
c5061b15 1582
1583 /*
1584 * See if we already have this case folding map loaded...
1585 */
03f61bf3 1586
1587 for (fmap = cg->foldmap_cache; fmap != NULL; fmap = fmap->next)
c5061b15 1588 if (fmap->fold == fold)
1589 return (0);
1590
1591 /*
a501ad17 1592 * Get the mapping name...
c5061b15 1593 */
03f61bf3 1594
c5061b15 1595 switch (fold)
1596 {
47c9dfee 1597 case CUPS_FOLD_SIMPLE: /* Simple case folding */
03f61bf3 1598 mapname = "uni-fold.txt";
1599 break;
47c9dfee 1600 case CUPS_FOLD_FULL: /* Full case folding */
03f61bf3 1601 mapname = "uni-full.txt";
1602 break;
c5061b15 1603 default:
03f61bf3 1604 return (-1);
c5061b15 1605 }
1606
1607 /*
1608 * Open case folding map input file...
1609 */
03f61bf3 1610
47c9dfee 1611 snprintf(filename, sizeof(filename), "%s/charmaps/%s",
a501ad17 1612 cg->cups_datadir, mapname);
c5061b15 1613 if ((foldcount = get_map_count(filename)) <= 0)
1614 return (-1);
03f61bf3 1615 fp = cupsFileOpen(filename, "r");
c5061b15 1616 if (fp == NULL)
1617 return (-1);
1618
1619 /*
1620 * Allocate memory for case folding map and add to cache...
1621 */
2625096f 1622 fmap = (_cups_fold_map_t *)calloc(1, sizeof(_cups_fold_map_t));
c5061b15 1623 if (fmap == NULL)
1624 {
03f61bf3 1625 cupsFileClose(fp);
c5061b15 1626 return (-1);
1627 }
03f61bf3 1628 uni2fold = (cups_ucs2_t *)calloc(1, sizeof(cups_ucs2_t) * 4 * foldcount);
c5061b15 1629 if (uni2fold == NULL)
1630 {
03f61bf3 1631 free(fmap);
1632 cupsFileClose(fp);
c5061b15 1633 return (-1);
1634 }
03f61bf3 1635 fmap->next = cg->foldmap_cache;
1636 cg->foldmap_cache = fmap;
c5061b15 1637 fmap->used ++;
1638 fmap->fold = fold;
1639 fmap->foldcount = foldcount;
1640 fmap->uni2fold = uni2fold;
1641
1642 /*
1643 * Save case folding map into memory for later use...
1644 */
03f61bf3 1645
c5061b15 1646 for (i = 0; i < foldcount; )
1647 {
03f61bf3 1648 s = cupsFileGets(fp, line, sizeof(line));
c5061b15 1649 if (s == NULL)
1650 break;
1651 if ((*s == '#') || (*s == '\n') || (*s == '\0'))
1652 continue;
1653 unichar1 = unichar2 = unichar3 = unichar4 = 0;
1654 if ((fold == CUPS_FOLD_SIMPLE)
1655 && (sscanf(s, "%lx %lx", &unichar1, &unichar2) != 2))
1656 break;
1657 if ((fold == CUPS_FOLD_FULL)
1658 && (sscanf(s, "%lx %lx %lx %lx",
47c9dfee 1659 &unichar1, &unichar2, &unichar3, &unichar4) != 4))
c5061b15 1660 break;
1661 if ((unichar1 > 0xffff)
1662 || (unichar2 > 0xffff)
1663 || (unichar3 > 0xffff)
1664 || (unichar4 > 0xffff))
1665 break;
4a95bc63 1666 *uni2fold ++ = (cups_ucs2_t) unichar1;
1667 *uni2fold ++ = (cups_ucs2_t) unichar2;
1668 *uni2fold ++ = (cups_ucs2_t) unichar3;
1669 *uni2fold ++ = (cups_ucs2_t) unichar4;
c5061b15 1670 i ++;
1671 }
1672 if (i < foldcount)
1673 fmap->foldcount = i;
03f61bf3 1674 cupsFileClose(fp);
c5061b15 1675 return (0);
1676}
1677
1678/*
47c9dfee 1679 * 'get_propmap()' - Get Unicode character property map to cache.
c5061b15 1680 */
03f61bf3 1681
1682static int /* O - Zero or -1 on error */
c5061b15 1683get_propmap(void)
1684{
03f61bf3 1685 int i, j; /* Looping variables */
b296d7d9 1686 size_t len; /* String length */
03f61bf3 1687 cups_utf32_t unichar; /* Unicode character value */
1688 cups_gencat_t gencat; /* General Category Value */
2625096f 1689 cups_bidi_t bidicat; /* Bidi Category Value */
1690 _cups_prop_map_t *pmap; /* Unicode Char Property Map */
03f61bf3 1691 int propcount; /* Count of Unicode Source Chars */
2625096f 1692 _cups_prop_t *uni2prop; /* Unicode Char -> Properties */
03f61bf3 1693 char *mapname; /* Char Property map name */
1694 char filename[1024]; /* Filename for charset map file */
1695 cups_file_t *fp; /* Char Property map file pointer */
1696 char *s; /* Line parsing pointer */
1697 char line[256]; /* Line from input map file */
2625096f 1698 _cups_globals_t *cg = _cupsGlobals();
03f61bf3 1699 /* Pointer to library globals */
1700
c5061b15 1701
1702 /*
1703 * See if we already have this char properties map loaded...
1704 */
03f61bf3 1705
1706 if ((pmap = cg->propmap_cache) != NULL)
c5061b15 1707 return (0);
1708
1709 /*
a501ad17 1710 * Get the mapping name...
c5061b15 1711 */
03f61bf3 1712
c5061b15 1713 mapname = "uni-prop.txt";
1714
1715 /*
1716 * Open char properties map input file...
1717 */
47c9dfee 1718 snprintf(filename, sizeof(filename), "%s/charmaps/%s",
a501ad17 1719 cg->cups_datadir, mapname);
c5061b15 1720 if ((propcount = get_map_count(filename)) <= 0)
1721 return (-1);
03f61bf3 1722 fp = cupsFileOpen(filename, "r");
c5061b15 1723 if (fp == NULL)
1724 return (-1);
1725
1726 /*
1727 * Allocate memory for char properties map and add to cache...
1728 */
2625096f 1729 pmap = (_cups_prop_map_t *)calloc(1, sizeof(_cups_prop_map_t));
c5061b15 1730 if (pmap == NULL)
1731 {
03f61bf3 1732 cupsFileClose(fp);
c5061b15 1733 return (-1);
1734 }
2625096f 1735 uni2prop = (_cups_prop_t *)calloc(1, sizeof(_cups_prop_t) * propcount);
c5061b15 1736 if (uni2prop == NULL)
1737 {
03f61bf3 1738 free(pmap);
1739 cupsFileClose(fp);
c5061b15 1740 return (-1);
1741 }
03f61bf3 1742 cg->propmap_cache = pmap;
c5061b15 1743 pmap->used ++;
1744 pmap->propcount = propcount;
1745 pmap->uni2prop = uni2prop;
1746
1747 /*
1748 * Save char properties map into memory for later use...
1749 */
1750 for (i = 0; i < propcount; )
1751 {
03f61bf3 1752 s = cupsFileGets(fp, line, sizeof(line));
c5061b15 1753 if (s == NULL)
1754 break;
1755 if (strlen(s) > 0)
1756 *(s + strlen(s) - 1) = '\0';
47c9dfee 1757 if ((*s == '#') || (*s == '\n') || (*s == '\0'))
c5061b15 1758 continue;
1759 if (sscanf(s, "%lx", &unichar) != 1)
1760 break;
1761 if (unichar > 0xffff)
1762 break;
1763 while ((*s != '\0') && (*s != ';'))
1764 s ++;
1765 if (*s != ';')
1766 break;
1767 s ++;
1768 for (j = 0; gencat_index[j].str != NULL; j ++)
1769 {
1770 len = strlen(gencat_index[j].str);
1771 if (strncmp (s, gencat_index[j].str, len) == 0)
47c9dfee 1772 break;
c5061b15 1773 }
1774 if (gencat_index[j].str == NULL)
1775 return (-1);
1776 gencat = gencat_index[j].gencat;
1777 while ((*s != '\0') && (*s != ';'))
1778 s ++;
1779 if (*s != ';')
1780 break;
1781 s ++;
1782 for (j = 0; bidicat_index[j] != NULL; j ++)
1783 {
1784 len = strlen(bidicat_index[j]);
1785 if (strncmp (s, bidicat_index[j], len) == 0)
47c9dfee 1786 break;
c5061b15 1787 }
1788 if (bidicat_index[j] == NULL)
1789 return (-1);
2625096f 1790 bidicat = (cups_bidi_t) j;
4a95bc63 1791 uni2prop->ch = (cups_ucs2_t) unichar;
c5061b15 1792 uni2prop->gencat = (unsigned char) gencat;
1793 uni2prop->bidicat = (unsigned char) bidicat;
1794 uni2prop ++;
1795 i ++;
1796 }
1797 if (i < propcount)
1798 pmap->propcount = i;
03f61bf3 1799 cupsFileClose(fp);
c5061b15 1800 return (0);
1801}
1802
03f61bf3 1803
c5061b15 1804/*
47c9dfee 1805 * 'get_combmap()' - Get Unicode combining class map to cache.
c5061b15 1806 */
03f61bf3 1807
1808static int /* O - Zero or -1 on error */
c5061b15 1809get_combmap(void)
1810{
03f61bf3 1811 int i; /* Looping variable */
1812 cups_utf32_t unichar; /* Unicode character value */
1813 int combclass; /* Unicode char combining class */
2625096f 1814 _cups_comb_map_t *cmap; /* Unicode Comb Class Map */
03f61bf3 1815 int combcount; /* Count of Unicode Source Chars */
2625096f 1816 _cups_comb_t *uni2comb; /* Unicode Char -> Combining Class */
03f61bf3 1817 char *mapname; /* Comb Class map name */
1818 char filename[1024]; /* Filename for charset map file */
1819 cups_file_t *fp; /* Comb Class map file pointer */
1820 char *s; /* Line parsing pointer */
1821 char line[256]; /* Line from input map file */
2625096f 1822 _cups_globals_t *cg = _cupsGlobals();
03f61bf3 1823 /* Pointer to library globals */
1824
c5061b15 1825
1826 /*
1827 * See if we already have this combining class map loaded...
1828 */
03f61bf3 1829
1830 if ((cmap = cg->combmap_cache) != NULL)
c5061b15 1831 return (0);
1832
1833 /*
a501ad17 1834 * Get the mapping name...
c5061b15 1835 */
03f61bf3 1836
c5061b15 1837 mapname = "uni-comb.txt";
1838
1839 /*
1840 * Open combining class map input file...
1841 */
03f61bf3 1842
47c9dfee 1843 snprintf(filename, sizeof(filename), "%s/charmaps/%s",
a501ad17 1844 cg->cups_datadir, mapname);
c5061b15 1845 if ((combcount = get_map_count(filename)) <= 0)
1846 return (-1);
03f61bf3 1847 fp = cupsFileOpen(filename, "r");
c5061b15 1848 if (fp == NULL)
1849 return (-1);
1850
1851 /*
1852 * Allocate memory for combining class map and add to cache...
1853 */
03f61bf3 1854
2625096f 1855 cmap = (_cups_comb_map_t *)calloc(1, sizeof(_cups_comb_map_t));
c5061b15 1856 if (cmap == NULL)
1857 {
03f61bf3 1858 cupsFileClose(fp);
c5061b15 1859 return (-1);
1860 }
03f61bf3 1861
2625096f 1862 uni2comb = (_cups_comb_t *)calloc(1, sizeof(_cups_comb_t) * combcount);
c5061b15 1863 if (uni2comb == NULL)
1864 {
03f61bf3 1865 free(cmap);
1866 cupsFileClose(fp);
c5061b15 1867 return (-1);
1868 }
03f61bf3 1869 cg->combmap_cache = cmap;
c5061b15 1870 cmap->used ++;
1871 cmap->combcount = combcount;
1872 cmap->uni2comb = uni2comb;
1873
1874 /*
1875 * Save combining class map into memory for later use...
1876 */
1877 for (i = 0; i < combcount; )
1878 {
03f61bf3 1879 s = cupsFileGets(fp, line, sizeof(line));
c5061b15 1880 if (s == NULL)
1881 break;
1882 if ((*s == '#') || (*s == '\n') || (*s == '\0'))
1883 continue;
1884 if (sscanf(s, "%lx", &unichar) != 1)
1885 break;
1886 if (unichar > 0xffff)
1887 break;
1888 while ((*s != '\0') && (*s != ';'))
1889 s ++;
1890 if (*s != ';')
1891 break;
1892 s ++;
1893 if (sscanf(s, "%d", &combclass) != 1)
1894 break;
4a95bc63 1895 uni2comb->ch = (cups_ucs2_t) unichar;
c5061b15 1896 uni2comb->combclass = (unsigned char) combclass;
1897 uni2comb ++;
1898 i ++;
1899 }
1900 if (i < combcount)
1901 cmap->combcount = i;
03f61bf3 1902 cupsFileClose(fp);
c5061b15 1903 return (0);
1904}
1905
03f61bf3 1906
c5061b15 1907/*
47c9dfee 1908 * 'get_breakmap()' - Get Unicode line break class map to cache.
c5061b15 1909 */
03f61bf3 1910
1911static int /* O - Zero or -1 on error */
c5061b15 1912get_breakmap(void)
1913{
03f61bf3 1914 int i, j; /* Looping variables */
1915 int len; /* String length */
1916 cups_utf32_t unichar1; /* Unicode character value */
1917 cups_utf32_t unichar2; /* Unicode character value */
67871650 1918 cups_break_class_t breakclass; /* Unicode char line break class */
2625096f 1919 _cups_break_map_t *bmap; /* Unicode Line Break Class Map */
03f61bf3 1920 int breakcount; /* Count of Unicode Source Chars */
1921 cups_ucs2_t *uni2break; /* Unicode -> Line Break Class */
03f61bf3 1922 char *mapname; /* Comb Class map name */
1923 char filename[1024]; /* Filename for charset map file */
1924 cups_file_t *fp; /* Comb Class map file pointer */
1925 char *s; /* Line parsing pointer */
1926 char line[256]; /* Line from input map file */
2625096f 1927 _cups_globals_t *cg = _cupsGlobals();
03f61bf3 1928 /* Pointer to library globals */
1929
c5061b15 1930
1931 /*
1932 * See if we already have this line break class map loaded...
1933 */
03f61bf3 1934
1935 if ((bmap = cg->breakmap_cache) != NULL)
c5061b15 1936 return (0);
1937
1938 /*
a501ad17 1939 * Get the mapping name...
c5061b15 1940 */
03f61bf3 1941
c5061b15 1942 mapname = "uni-line.txt";
1943
1944 /*
1945 * Open line break class map input file...
1946 */
03f61bf3 1947
47c9dfee 1948 snprintf(filename, sizeof(filename), "%s/charmaps/%s",
a501ad17 1949 cg->cups_datadir, mapname);
c5061b15 1950 if ((breakcount = get_map_count(filename)) <= 0)
1951 return (-1);
03f61bf3 1952 fp = cupsFileOpen(filename, "r");
c5061b15 1953 if (fp == NULL)
1954 return (-1);
1955
1956 /*
1957 * Allocate memory for line break class map and add to cache...
1958 */
03f61bf3 1959
2625096f 1960 bmap = (_cups_break_map_t *)calloc(1, sizeof(_cups_break_map_t));
c5061b15 1961 if (bmap == NULL)
1962 {
03f61bf3 1963 cupsFileClose(fp);
c5061b15 1964 return (-1);
1965 }
03f61bf3 1966
1967 uni2break = (cups_ucs2_t *)calloc(1, sizeof(cups_ucs2_t) * 3 * breakcount);
c5061b15 1968 if (uni2break == NULL)
1969 {
03f61bf3 1970 free(bmap);
1971 cupsFileClose(fp);
c5061b15 1972 return (-1);
1973 }
03f61bf3 1974 cg->breakmap_cache = bmap;
c5061b15 1975 bmap->used ++;
1976 bmap->breakcount = breakcount;
1977 bmap->uni2break = uni2break;
1978
1979 /*
1980 * Save line break class map into memory for later use...
1981 */
1982 for (i = 0; i < breakcount; )
1983 {
03f61bf3 1984 s = cupsFileGets(fp, line, sizeof(line));
c5061b15 1985 if (s == NULL)
1986 break;
1987 if (strlen(s) > 0)
1988 *(s + strlen(s) - 1) = '\0';
47c9dfee 1989 if ((*s == '#') || (*s == '\n') || (*s == '\0'))
c5061b15 1990 continue;
1991 if (sscanf(s, "%lx %lx", &unichar1, &unichar2) != 2)
1992 break;
1993 if ((unichar1 > 0xffff)
1994 || (unichar2 > 0xffff))
1995 break;
1996 while ((*s != '\0') && (*s != ';'))
1997 s ++;
1998 if (*s != ';')
1999 break;
2000 s ++;
2001 for (j = 0; break_index[j].str != NULL; j ++)
2002 {
2003 len = strlen (break_index[j].str);
2004 if (strncmp (s, break_index[j].str, len) == 0)
47c9dfee 2005 break;
c5061b15 2006 }
2007 if (break_index[j].str == NULL)
2008 return (-1);
2009 breakclass = break_index[j].breakclass;
4a95bc63 2010 *uni2break ++ = (cups_ucs2_t) unichar1;
2011 *uni2break ++ = (cups_ucs2_t) unichar2;
2012 *uni2break ++ = (cups_ucs2_t) breakclass;
c5061b15 2013 i ++;
2014 }
2015 if (i < breakcount)
2016 bmap->breakcount = i;
03f61bf3 2017 cupsFileClose(fp);
c5061b15 2018 return (0);
2019}
2020
03f61bf3 2021
c5061b15 2022/*
2023 * 'compare_compose()' - Compare key for compose match.
2024 *
2025 * Note - This function cannot be easily modified for 32-bit Unicode.
2026 */
03f61bf3 2027
2028static int /* O - Result of comparison */
2029compare_compose(const void *k1, /* I - Key char */
2030 const void *k2) /* I - Map char */
c5061b15 2031{
03f61bf3 2032 cups_utf32_t *kp = (cups_utf32_t *)k1;
2033 /* Key char pointer */
2034 cups_ucs2_t *mp = (cups_ucs2_t *)k2;/* Map char pointer */
2035 unsigned long key; /* Pair of key characters */
2036 unsigned long map; /* Pair of map characters */
2037 int result; /* Result Value */
2038
c5061b15 2039
2040 key = (*kp << 16);
2041 key |= *(kp + 1);
2042 map = (unsigned long) (*mp << 16);
2043 map |= (unsigned long) *(mp + 1);
03f61bf3 2044
c5061b15 2045 if (key >= map)
2046 result = (int) (key - map);
2047 else
2048 result = -1 * ((int) (map - key));
03f61bf3 2049
c5061b15 2050 return (result);
2051}
2052
03f61bf3 2053
c5061b15 2054/*
2055 * 'compare_decompose()' - Compare key for decompose match.
2056 */
03f61bf3 2057
2058static int /* O - Result of comparison */
2059compare_decompose(const void *k1, /* I - Key char */
2060 const void *k2) /* I - Map char */
c5061b15 2061{
03f61bf3 2062 cups_utf32_t *kp = (cups_utf32_t *)k1;
2063 /* Key char pointer */
2064 cups_ucs2_t *mp = (cups_ucs2_t *)k2;/* Map char pointer */
2065 cups_ucs2_t ch; /* Key char as UCS-2 */
2066 int result; /* Result Value */
2067
c5061b15 2068
4a95bc63 2069 ch = (cups_ucs2_t) *kp;
03f61bf3 2070
c5061b15 2071 if (ch >= *mp)
2072 result = (int) (ch - *mp);
2073 else
2074 result = -1 * ((int) (*mp - ch));
03f61bf3 2075
c5061b15 2076 return (result);
2077}
2078
03f61bf3 2079
c5061b15 2080/*
2081 * 'compare_foldchar()' - Compare key for case fold match.
2082 */
03f61bf3 2083
2084static int /* O - Result of comparison */
2085compare_foldchar(const void *k1, /* I - Key char */
2086 const void *k2) /* I - Map char */
c5061b15 2087{
03f61bf3 2088 cups_utf32_t *kp = (cups_utf32_t *)k1;
2089 /* Key char pointer */
2090 cups_ucs2_t *mp = (cups_ucs2_t *)k2;/* Map char pointer */
2091 cups_ucs2_t ch; /* Key char as UCS-2 */
2092 int result; /* Result Value */
2093
c5061b15 2094
4a95bc63 2095 ch = (cups_ucs2_t) *kp;
03f61bf3 2096
c5061b15 2097 if (ch >= *mp)
2098 result = (int) (ch - *mp);
2099 else
2100 result = -1 * ((int) (*mp - ch));
03f61bf3 2101
c5061b15 2102 return (result);
2103}
2104
03f61bf3 2105
c5061b15 2106/*
2107 * 'compare_combchar()' - Compare key for combining char match.
2108 */
03f61bf3 2109
2110static int /* O - Result of comparison */
2111compare_combchar(const void *k1, /* I - Key char */
2112 const void *k2) /* I - Map char */
c5061b15 2113{
03f61bf3 2114 cups_utf32_t *kp = (cups_utf32_t *)k1;
2115 /* Key char pointer */
2625096f 2116 _cups_comb_t *cp = (_cups_comb_t *)k2;/* Combining map row pointer */
03f61bf3 2117 cups_ucs2_t ch; /* Key char as UCS-2 */
2118 int result; /* Result Value */
2119
c5061b15 2120
4a95bc63 2121 ch = (cups_ucs2_t) *kp;
03f61bf3 2122
c5061b15 2123 if (ch >= cp->ch)
2124 result = (int) (ch - cp->ch);
2125 else
2126 result = -1 * ((int) (cp->ch - ch));
03f61bf3 2127
c5061b15 2128 return (result);
2129}
2130
03f61bf3 2131
c5061b15 2132/*
2133 * 'compare_breakchar()' - Compare key for line break char match.
2134 */
03f61bf3 2135
2136static int /* O - Result of comparison */
2137compare_breakchar(const void *k1, /* I - Key char */
2138 const void *k2) /* I - Map char */
c5061b15 2139{
03f61bf3 2140 cups_utf32_t *kp = (cups_utf32_t *)k1;
2141 /* Key char pointer */
2142 cups_ucs2_t *mp = (cups_ucs2_t *)k2;/* Map char pointer */
2143 cups_ucs2_t ch; /* Key char as UCS-2 */
2144 int result; /* Result Value */
2145
c5061b15 2146
4a95bc63 2147 ch = (cups_ucs2_t) *kp;
03f61bf3 2148
c5061b15 2149 if (ch < *mp)
2150 result = -1 * (int) (*mp - ch);
2151 else if (ch > *(mp + 1))
2152 result = (int) (ch - *(mp + 1));
2153 else
2154 result = 0;
03f61bf3 2155
c5061b15 2156 return (result);
2157}
2158
03f61bf3 2159
c5061b15 2160/*
2161 * 'compare_propchar()' - Compare key for property char match.
2162 */
03f61bf3 2163
2164static int /* O - Result of comparison */
2165compare_propchar(const void *k1, /* I - Key char */
2166 const void *k2) /* I - Map char */
c5061b15 2167{
03f61bf3 2168 cups_utf32_t *kp = (cups_utf32_t *)k1;
2169 /* Key char pointer */
2625096f 2170 _cups_prop_t *pp = (_cups_prop_t *)k2;/* Property map row pointer */
03f61bf3 2171 cups_ucs2_t ch; /* Key char as UCS-2 */
2172 int result; /* Result Value */
2173
c5061b15 2174
4a95bc63 2175 ch = (cups_ucs2_t) *kp;
03f61bf3 2176
c5061b15 2177 if (ch >= pp->ch)
2178 result = (int) (ch - pp->ch);
2179 else
2180 result = -1 * ((int) (pp->ch - ch));
03f61bf3 2181
c5061b15 2182 return (result);
2183}
2184
03f61bf3 2185
c5061b15 2186/*
c9d3f842 2187 * End of "$Id$"
c5061b15 2188 */