cups/normalize.c

   1 /*
   2  * "$Id: normalize.c 4903 2006-01-10 20:02:46Z mike $"
   3  *
   4  *   Unicode normalization for the Common UNIX Printing System (CUPS).
   5  *
   6  *   Copyright 1997-2006 by Easy Software Products.
   7  *
   8  *   These coded instructions, statements, and computer programs are
   9  *   the property of Easy Software Products and are protected by Federal
  10  *   copyright law.  Distribution and use rights are outlined in the
  11  *   file "LICENSE.txt" which should have been included with this file.
  12  *   If this file is missing or damaged please contact Easy Software
  13  *   Products at:
  14  *
  15  *       Attn: CUPS Licensing Information
  16  *       Easy Software Products
  17  *       44141 Airport View Drive, Suite 204
  18  *       Hollywood, Maryland 20636 USA
  19  *
  20  *       Voice: (301) 373-9600
  21  *       EMail: cups-info@cups.org
  22  *         WWW: http://www.cups.org
  23  *
  24  * Contents:
  25  *
  26  *   cupsNormalizeMapsGet()       - Get all norm maps to cache.
  27  *   cupsNormalizeMapsFree()      - Free all norm maps in cache.
  28  *   cupsNormalizeMapsFlush()     - Flush all norm maps in cache.
  29  *   cupsUTF8Normalize()          - Normalize UTF-8 string.
  30  *   cupsUTF32Normalize()         - Normalize UTF-32 string.
  31  *   cupsUTF8CaseFold()           - Case fold UTF-8 string.
  32  *   cupsUTF32CaseFold()          - Case fold UTF-32 string.
  33  *   cupsUTF8CompareCaseless()    - Compare case folded UTF-8 strings.
  34  *   cupsUTF32CompareCaseless()   - Compare case folded UTF-32 strings.
  35  *   cupsUTF8CompareIdentifier()  - Compare folded NFKC UTF-8 strings.
  36  *   cupsUTF32CompareIdentifier() - Compare folded NFKC UTF-32 strings.
  37  *   cupsUTF32CharacterProperty() - Get UTF-32 character property.
  38  *   get_general_category()       - Get UTF-32 Char General Category.
  39  *   get_bidi_category()          - Get UTF-32 Char Bidi Category.
  40  *   get_combining_class()        - Get UTF-32 Char Combining Class.
  41  *   get_break_class()            - Get UTF-32 Char Line Break Class.
  42  *   get_map_count()              - Count lines in a map file.
  43  *   get_normmap()                - Get Unicode norm map to cache.
  44  *   get_foldmap()                - Get Unicode casefold map to cache.
  45  *   get_propmap()                - Get Unicode property map to cache.
  46  *   get_combmap()                - Get Unicode combining map to cache.
  47  *   get_breakmap()               - Get Unicode break map to cache.
  48  *   compare_compose()            - Compare key for compose match.
  49  *   compare_decompose()          - Compare key for decompose match.
  50  *   compare_foldchar()           - Compare key for case fold match.
  51  *   compare_combchar()           - Compare key for combining match.
  52  *   compare_breakchar()          - Compare key for line break match.
  53  *   compare_propchar()           - Compare key for property char match.
  54  */
  55
  56 /*
  57  * Include necessary headers...
  58  */
  59
  60 #include "globals.h"
  61 #include "debug.h"
  62 #include <stdlib.h>
  63 #include <errno.h>
  64 #include <time.h>
  65
  66
  67 typedef struct                          /**** General Category Index Struct****/
  68 {
  69   cups_gencat_t gencat;                 /* General Category Value */
  70   const char    *str;                   /* General Category String */
  71 } gencat_t;
  72
  73 static const gencat_t gencat_index[] =  /* General Category Index */
  74 {
  75   { CUPS_GENCAT_LU, "Lu" },             /* Letter, Uppercase */
  76   { CUPS_GENCAT_LL, "Ll" },             /* Letter, Lowercase */
  77   { CUPS_GENCAT_LT, "Lt" },             /* Letter, Titlecase */
  78   { CUPS_GENCAT_LM, "Lm" },             /* Letter, Modifier */
  79   { CUPS_GENCAT_LO, "Lo" },             /* Letter, Other */
  80   { CUPS_GENCAT_MN, "Mn" },             /* Mark, Non-Spacing */
  81   { CUPS_GENCAT_MC, "Mc" },             /* Mark, Spacing Combining */
  82   { CUPS_GENCAT_ME, "Me" },             /* Mark, Enclosing */
  83   { CUPS_GENCAT_ND, "Nd" },             /* Number, Decimal Digit */
  84   { CUPS_GENCAT_NL, "Nl" },             /* Number, Letter */
  85   { CUPS_GENCAT_NO, "No" },             /* Number, Other */
  86   { CUPS_GENCAT_PC, "Pc" },             /* Punctuation, Connector */
  87   { CUPS_GENCAT_PD, "Pd" },             /* Punctuation, Dash */
  88   { CUPS_GENCAT_PS, "Ps" },             /* Punctuation, Open (start) */
  89   { CUPS_GENCAT_PE, "Pe" },             /* Punctuation, Close (end) */
  90   { CUPS_GENCAT_PI, "Pi" },             /* Punctuation, Initial Quote */
  91   { CUPS_GENCAT_PF, "Pf" },             /* Punctuation, Final Quote */
  92   { CUPS_GENCAT_PO, "Po" },             /* Punctuation, Other */
  93   { CUPS_GENCAT_SM, "Sm" },             /* Symbol, Math */
  94   { CUPS_GENCAT_SC, "Sc" },             /* Symbol, Currency */
  95   { CUPS_GENCAT_SK, "Sk" },             /* Symbol, Modifier */
  96   { CUPS_GENCAT_SO, "So" },             /* Symbol, Other */
  97   { CUPS_GENCAT_ZS, "Zs" },             /* Separator, Space */
  98   { CUPS_GENCAT_ZL, "Zl" },             /* Separator, Line */
  99   { CUPS_GENCAT_ZP, "Zp" },             /* Separator, Paragraph */
 100   { CUPS_GENCAT_CC, "Cc" },             /* Other, Control */
 101   { CUPS_GENCAT_CF, "Cf" },             /* Other, Format */
 102   { CUPS_GENCAT_CS, "Cs" },             /* Other, Surrogate */
 103   { CUPS_GENCAT_CO, "Co" },             /* Other, Private Use */
 104   { CUPS_GENCAT_CN, "Cn" },             /* Other, Not Assigned */
 105   { 0, NULL }
 106 };
 107
 108 static const char * const bidicat_index[] =
 109                                         /* Bidi Category Index */
 110 {
 111   "L",                                  /* Left-to-Right (Alpha, Syllabic, Ideographic) */
 112   "LRE",                                /* Left-to-Right Embedding (explicit) */
 113   "LRO",                                /* Left-to-Right Override (explicit) */
 114   "R",                                  /* Right-to-Left (Hebrew alphabet and most punct) */
 115   "AL",                                 /* Right-to-Left Arabic (Arabic, Thaana, Syriac) */
 116   "RLE",                                /* Right-to-Left Embedding (explicit) */
 117   "RLO",                                /* Right-to-Left Override (explicit) */
 118   "PDF",                                /* Pop Directional Format */
 119   "EN",                                 /* Euro Number (Euro and East Arabic-Indic digits) */
 120   "ES",                                 /* Euro Number Separator (Slash) */
 121   "ET",                                 /* Euro Number Termintor (Plus, Minus, Degree, etc) */
 122   "AN",                                 /* Arabic Number (Arabic-Indic digits, separators) */
 123   "CS",                                 /* Common Number Separator (Colon, Comma, Dot, etc) */
 124   "NSM",                                /* Non-Spacing Mark (category Mn / Me in UCD) */
 125   "BN",                                 /* Boundary Neutral (Formatting / Control chars) */
 126   "B",                                  /* Paragraph Separator */
 127   "S",                                  /* Segment Separator (Tab) */
 128   "WS",                                 /* Whitespace Space (Space, Line Separator, etc) */
 129   "ON",                                 /* Other Neutrals */
 130   NULL
 131 };
 132
 133 typedef struct                          /**** Line Break Class Index Struct****/
 134 {
 135   cups_break_class_t    breakclass;     /* Line Break Class Value */
 136   const char            *str;           /* Line Break Class String */
 137 } _cups_break_t;
 138
 139 static const _cups_break_t break_index[] =      /* Line Break Class Index */
 140 {
 141   { CUPS_BREAK_AI, "AI" },              /* Ambiguous (Alphabetic or Ideograph) */
 142   { CUPS_BREAK_AL, "AL" },              /* Ordinary Alpha/Symbol Chars (XP) */
 143   { CUPS_BREAK_BA, "BA" },              /* Break Opportunity After Chars (A) */
 144   { CUPS_BREAK_BB, "BB" },              /* Break Opportunities Before Chars (B) */
 145   { CUPS_BREAK_B2, "B2" },              /* Break Opportunity Either (B/A/XP) */
 146   { CUPS_BREAK_BK, "BK" },              /* Mandatory Break (A) (norm) */
 147   { CUPS_BREAK_CB, "CB" },              /* Contingent Break (B/A) (norm) */
 148   { CUPS_BREAK_CL, "CL" },              /* Closing Punctuation (XB) */
 149   { CUPS_BREAK_CM, "CM" },              /* Attached/Combining (XB) (norm) */
 150   { CUPS_BREAK_CR, "CR" },              /* Carriage Return (A) (norm) */
 151   { CUPS_BREAK_EX, "EX" },              /* Exclamation / Interrogation (XB) */
 152   { CUPS_BREAK_GL, "GL" },              /* Non-breaking ("Glue") (XB/XA) (norm) */
 153   { CUPS_BREAK_HY, "HY" },              /* Hyphen (XA) */
 154   { CUPS_BREAK_ID, "ID" },              /* Ideographic (B/A) */
 155   { CUPS_BREAK_IN, "IN" },              /* Inseparable chars (XP) */
 156   { CUPS_BREAK_IS, "IS" },              /* Numeric Separator (Infix) (XB) */
 157   { CUPS_BREAK_LF, "LF" },              /* Line Feed (A) (norm) */
 158   { CUPS_BREAK_NS, "NS" },              /* Non-starters (XB) */
 159   { CUPS_BREAK_NU, "NU" },              /* Numeric (XP) */
 160   { CUPS_BREAK_OP, "OP" },              /* Opening Punctuation (XA) */
 161   { CUPS_BREAK_PO, "PO" },              /* Postfix (Numeric) (XB) */
 162   { CUPS_BREAK_PR, "PR" },              /* Prefix (Numeric) (XA) */
 163   { CUPS_BREAK_QU, "QU" },              /* Ambiguous Quotation (XB/XA) */
 164   { CUPS_BREAK_SA, "SA" },              /* Context Dependent (SE Asian) (P) */
 165   { CUPS_BREAK_SG, "SG" },              /* Surrogates (XP) (norm) */
 166   { CUPS_BREAK_SP, "SP" },              /* Space (A) (norm) */
 167   { CUPS_BREAK_SY, "SY" },              /* Symbols Allowing Break After (A) */
 168   { CUPS_BREAK_XX, "XX" },              /* Unknown (XP) */
 169   { CUPS_BREAK_ZW, "ZW" },              /* Zero Width Space (A) (norm) */
 170   { 0, NULL }
 171 };
 172
 173 /*
 174  * Prototypes...
 175  */
 176
 177 static int compare_breakchar(const void *k1, const void *k2);
 178 static int compare_combchar(const void *k1, const void *k2);
 179 static int compare_compose(const void *k1, const void *k2);
 180 static int compare_decompose(const void *k1, const void *k2);
 181 static int compare_foldchar(const void *k1, const void *k2);
 182 static int compare_propchar(const void *k1, const void *k2);
 183 static int get_bidi_category(const cups_utf32_t ch);
 184 static int get_break_class(const cups_utf32_t ch);
 185 static int get_breakmap(void);
 186 static int get_combining_class(const cups_utf32_t ch);
 187 static int get_combmap(void);
 188 static int get_foldmap(const cups_folding_t fold);
 189 static int get_general_category(const cups_utf32_t ch);
 190 static int get_map_count(const char *filename);
 191 static int get_normmap(const cups_normalize_t normalize);
 192 static int get_propmap(void);
 193
 194
 195 /*
 196  * 'cupsNormalizeMapsGet()' - Get all normalization maps to cache.
 197  */
 198
 199 int                                     /* O - Zero or -1 on error */
 200 cupsNormalizeMapsGet(void)
 201 {
 202   _cups_norm_map_t      *nmap;          /* Unicode Normalization Map */
 203   _cups_fold_map_t      *fmap;          /* Unicode Case Folding Map */
 204   _cups_globals_t       *cg = _cupsGlobals();
 205                                         /* Pointer to library globals */
 206
 207
 208  /*
 209   * See if we already have normalization maps loaded...
 210   */
 211
 212   if (cg->normmap_cache)
 213   {
 214     for (nmap = cg->normmap_cache; nmap != NULL; nmap = nmap->next)
 215       nmap->used ++;
 216
 217     for (fmap = cg->foldmap_cache; fmap != NULL; fmap = fmap->next)
 218       fmap->used ++;
 219
 220     if (cg->combmap_cache)
 221       cg->combmap_cache->used ++;
 222
 223     if (cg->propmap_cache)
 224       cg->propmap_cache->used ++;
 225
 226     if (cg->breakmap_cache)
 227       cg->breakmap_cache->used ++;
 228
 229     return (0);
 230   }
 231
 232  /*
 233   * Get normalization maps...
 234   */
 235
 236   if (get_normmap(CUPS_NORM_NFD) < 0)
 237     return (-1);
 238
 239   if (get_normmap(CUPS_NORM_NFKD) < 0)
 240     return (-1);
 241
 242   if (get_normmap(CUPS_NORM_NFC) < 0)
 243     return (-1);
 244
 245  /*
 246   * Get case folding, combining class, character property maps...
 247   */
 248
 249   if (get_foldmap(CUPS_FOLD_SIMPLE) < 0)
 250     return (-1);
 251
 252   if (get_foldmap(CUPS_FOLD_FULL) < 0)
 253     return (-1);
 254
 255   if (get_propmap() < 0)
 256     return (-1);
 257
 258   if (get_combmap() < 0)
 259     return (-1);
 260
 261   if (get_breakmap() < 0)
 262     return (-1);
 263
 264   return (0);
 265 }
 266
 267
 268 /*
 269  * 'cupsNormalizeMapsFree()' - Free all normalization maps in cache.
 270  *
 271  * This does not actually free; use 'cupsNormalizeMapsFlush()' for that.
 272  */
 273
 274 int                                     /* O - Zero or -1 on error */
 275 cupsNormalizeMapsFree(void)
 276 {
 277   _cups_norm_map_t      *nmap;          /* Unicode Normalization Map */
 278   _cups_fold_map_t      *fmap;          /* Unicode Case Folding Map */
 279   _cups_globals_t       *cg = _cupsGlobals();
 280                                         /* Pointer to library globals */
 281
 282
 283  /*
 284   * See if we already have normalization maps loaded...
 285   */
 286
 287   if (cg->normmap_cache == NULL)
 288     return (-1);
 289
 290   for (nmap = cg->normmap_cache; nmap != NULL; nmap = nmap->next)
 291     if (nmap->used > 0)
 292       nmap->used --;
 293
 294   for (fmap = cg->foldmap_cache; fmap != NULL; fmap = fmap->next)
 295     if (fmap->used > 0)
 296       fmap->used --;
 297
 298   if (cg->propmap_cache && (cg->propmap_cache->used > 0))
 299     cg->propmap_cache->used --;
 300
 301   if (cg->combmap_cache && (cg->combmap_cache->used > 0))
 302     cg->combmap_cache->used --;
 303
 304   if (cg->breakmap_cache && (cg->breakmap_cache->used > 0))
 305     cg->breakmap_cache->used --;
 306
 307   return (0);
 308 }
 309
 310
 311 /*
 312  * 'cupsNormalizeMapsFlush()' - Flush all normalization maps in cache.
 313  */
 314
 315 void
 316 cupsNormalizeMapsFlush(void)
 317 {
 318   _cups_norm_map_t      *nmap;          /* Unicode Normalization Map */
 319   _cups_norm_map_t      *nextnorm;      /* Next Unicode Normalization Map */
 320   _cups_fold_map_t      *fmap;          /* Unicode Case Folding Map */
 321   _cups_fold_map_t      *nextfold;      /* Next Unicode Case Folding Map */
 322   _cups_globals_t       *cg = _cupsGlobals();
 323                                         /* Pointer to library globals */
 324
 325
 326  /*
 327   * Flush all normalization maps...
 328   */
 329
 330   for (nmap = cg->normmap_cache; nmap != NULL; nmap = nextnorm)
 331   {
 332     free(nmap->uni2norm);
 333     nextnorm = nmap->next;
 334     free(nmap);
 335   }
 336
 337   cg->normmap_cache = NULL;
 338
 339   for (fmap = cg->foldmap_cache; fmap != NULL; fmap = nextfold)
 340   {
 341     free(fmap->uni2fold);
 342     nextfold = fmap->next;
 343     free(fmap);
 344   }
 345
 346   cg->foldmap_cache = NULL;
 347
 348   if (cg->propmap_cache)
 349   {
 350     free(cg->propmap_cache->uni2prop);
 351     free(cg->propmap_cache);
 352     cg->propmap_cache = NULL;
 353   }
 354
 355   if (cg->combmap_cache)
 356   {
 357     free(cg->combmap_cache->uni2comb);
 358     free(cg->combmap_cache);
 359     cg->combmap_cache = NULL;
 360   }
 361
 362   if (cg->breakmap_cache)
 363   {
 364     free(cg->breakmap_cache->uni2break);
 365     free(cg->breakmap_cache);
 366     cg->breakmap_cache = NULL;
 367   }
 368 }
 369
 370
 371 /*
 372  * 'cupsUTF8Normalize()' - Normalize UTF-8 string.
 373  *
 374  * Normalize UTF-8 string to Unicode UAX-15 Normalization Form
 375  * Note - Compatibility Normalization Forms (NFKD/NFKC) are
 376  * unsafe for subsequent transcoding to legacy charsets
 377  */
 378
 379 int                                     /* O - Count or -1 on error */
 380 cupsUTF8Normalize(
 381     cups_utf8_t            *dest,       /* O - Target string */
 382     const cups_utf8_t      *src,        /* I - Source string */
 383     const int              maxout,      /* I - Max output */
 384     const cups_normalize_t normalize)   /* I - Normalization */
 385 {
 386   int           len;                    /* String length */
 387   cups_utf32_t  work1[CUPS_MAX_USTRING];/* First internal UCS-4 string */
 388   cups_utf32_t  work2[CUPS_MAX_USTRING];/* Second internal UCS-4 string */
 389
 390
 391  /*
 392   * Check for valid arguments and clear output...
 393   */
 394
 395   if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
 396     return (-1);
 397
 398   *dest = 0;
 399
 400  /*
 401   * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
 402   */
 403
 404   len = cupsUTF8ToUTF32(work1, src, CUPS_MAX_USTRING);
 405
 406   if (len < 0)
 407     return (-1);
 408
 409  /*
 410   * Normalize internal UCS-4 to second internal UCS-4...
 411   */
 412
 413   len = cupsUTF32Normalize(work2, work1, CUPS_MAX_USTRING, normalize);
 414
 415   if (len < 0)
 416     return (-1);
 417
 418  /*
 419   * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
 420   */
 421
 422   len = cupsUTF32ToUTF8(dest, work2, maxout);
 423
 424   return (len);
 425 }
 426
 427
 428 /*
 429  * 'cupsUTF32Normalize()' - Normalize UTF-32 string.
 430  *
 431  * Normalize UTF-32 string to Unicode UAX-15 Normalization Form
 432  * Note - Compatibility Normalization Forms (NFKD/NFKC) are
 433  * unsafe for subsequent transcoding to legacy charsets
 434  */
 435
 436 int                                     /* O - Count or -1 on error */
 437 cupsUTF32Normalize(
 438     cups_utf32_t           *dest,       /* O - Target string */
 439     const cups_utf32_t     *src,        /* I - Source string */
 440     const int              maxout,      /* I - Max output */
 441     const cups_normalize_t normalize)   /* I - Normalization */
 442 {
 443   int                   i;              /* Looping variable */
 444   int                   result;         /* Result Value */
 445   cups_ucs2_t           *mp;            /* Map char pointer */
 446   int                   pass;           /* Pass count for each transform */
 447   int                   hit;            /* Hit count from binary search */
 448   cups_utf32_t          unichar1;       /* Unicode character value */
 449   cups_utf32_t          unichar2;       /* Unicode character value */
 450   _cups_comb_class_t    class1;         /* First Combining Class */
 451   _cups_comb_class_t    class2;         /* Second Combining Class */
 452   int                   len;            /* String length */
 453   cups_utf32_t          work1[CUPS_MAX_USTRING];
 454                                         /* First internal UCS-4 string */
 455   cups_utf32_t          work2[CUPS_MAX_USTRING];
 456                                         /* Second internal UCS-4 string */
 457   cups_utf32_t          *p1;            /* First UCS-4 string pointer */
 458   cups_utf32_t          *p2;            /* Second UCS-4 string pointer */
 459   _cups_norm_map_t      *nmap;          /* Unicode Normalization Map */
 460   cups_normalize_t      decompose;      /* Decomposition Type */
 461   _cups_globals_t       *cg = _cupsGlobals();
 462                                         /* Pointer to library globals */
 463
 464
 465  /*
 466   * Check for valid arguments and clear output...
 467   */
 468
 469   if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
 470     return (-1);
 471
 472   *dest = 0;
 473
 474   result = cupsNormalizeMapsGet();
 475
 476   if (result < 0)
 477     return (-1);
 478
 479  /*
 480   * Find decomposition map...
 481   */
 482
 483   switch (normalize)
 484   {
 485     case CUPS_NORM_NFD:
 486     case CUPS_NORM_NFC:
 487         decompose = CUPS_NORM_NFD;
 488         break;
 489
 490     case CUPS_NORM_NFKD:
 491     case CUPS_NORM_NFKC:
 492         decompose = CUPS_NORM_NFKD;
 493         break;
 494
 495     default:
 496         return (-1);
 497   }
 498
 499   for (nmap = cg->normmap_cache; nmap != NULL; nmap = nmap->next)
 500     if (nmap->normalize == decompose)
 501       break;
 502
 503   if (nmap == NULL)
 504     return (-1);
 505
 506  /*
 507   * Copy input to internal buffer...
 508   */
 509
 510   p1 = &work1[0];
 511
 512   for (i = 0; i < CUPS_MAX_USTRING; i ++)
 513   {
 514     if (*src == 0)
 515       break;
 516
 517     *p1 ++ = *src ++;
 518   }
 519
 520   *p1 = 0;
 521   len = i;
 522
 523  /*
 524   * Decompose until no further decomposition...
 525   */
 526
 527   for (pass = 0; pass < 20; pass ++)
 528   {
 529     p1 = &work1[0];
 530     p2 = &work2[0];
 531
 532     for (hit = 0; *p1 != 0; p1 ++)
 533     {
 534      /*
 535       * Check for decomposition defined...
 536       */
 537
 538       mp = (cups_ucs2_t *)bsearch(p1, nmap->uni2norm, nmap->normcount,
 539                                   (sizeof(cups_ucs2_t) * 3), compare_decompose);
 540       if (mp == NULL)
 541       {
 542         *p2 ++ = *p1;
 543         continue;
 544       }
 545
 546      /*
 547       * Decompose input character to one or two output characters...
 548       */
 549
 550       hit ++;
 551       mp ++;
 552       *p2 ++ = (cups_utf32_t) *mp ++;
 553
 554       if (*mp != 0)
 555         *p2 ++ = (cups_utf32_t) *mp;
 556     }
 557
 558     *p2 = 0;
 559     len = (int)(p2 - &work2[0]);
 560
 561    /*
 562     * Check for decomposition finished...
 563     */
 564     if (hit == 0)
 565       break;
 566     memcpy (work1, work2, sizeof(cups_utf32_t) * (len + 1));
 567   }
 568
 569  /*
 570   * Canonical reorder until no further reordering...
 571   */
 572
 573   for (pass = 0; pass < 20; pass ++)
 574   {
 575     p1 = &work1[0];
 576
 577     for (hit = 0; *p1 != 0; p1 ++)
 578     {
 579      /*
 580       * Check for combining characters to reorder...
 581       */
 582
 583       unichar1 = *p1;
 584       unichar2 = *(p1 + 1);
 585
 586       if (unichar2 == 0)
 587         break;
 588
 589       class1 = get_combining_class(unichar1);
 590       class2 = get_combining_class(unichar2);
 591
 592       if ((class1 < 0) || (class2 < 0))
 593         return (-1);
 594
 595       if ((class1 == 0) || (class2 == 0))
 596         continue;
 597
 598       if (class1 <= class2)
 599         continue;
 600
 601      /*
 602       * Swap two combining characters...
 603       */
 604
 605       *p1 = unichar2;
 606       p1 ++;
 607       *p1 = unichar1;
 608       hit ++;
 609     }
 610
 611     if (hit == 0)
 612       break;
 613   }
 614
 615  /*
 616   * Check for decomposition only...
 617   */
 618
 619   if (normalize == CUPS_NORM_NFD || normalize == CUPS_NORM_NFKD)
 620   {
 621     memcpy(dest, work1, sizeof(cups_utf32_t) * (len + 1));
 622     return (len);
 623   }
 624
 625  /*
 626   * Find composition map...
 627   */
 628
 629   for (nmap = cg->normmap_cache; nmap != NULL; nmap = nmap->next)
 630     if (nmap->normalize == CUPS_NORM_NFC)
 631       break;
 632
 633   if (nmap == NULL)
 634     return (-1);
 635
 636  /*
 637   * Compose until no further composition...
 638   */
 639
 640   for (pass = 0; pass < 20; pass ++)
 641   {
 642     p1 = &work1[0];
 643     p2 = &work2[0];
 644
 645     for (hit = 0; *p1 != 0; p1 ++)
 646     {
 647      /*
 648       * Check for composition defined...
 649       */
 650
 651       unichar1 = *p1;
 652       unichar2 = *(p1 + 1);
 653
 654       if (unichar2 == 0)
 655       {
 656         *p2 ++ = unichar1;
 657         break;
 658       }
 659
 660       mp = (cups_ucs2_t *)bsearch(p1, nmap->uni2norm, nmap->normcount,
 661                                   (sizeof(cups_ucs2_t) * 3), compare_compose);
 662       if (mp == NULL)
 663       {
 664         *p2 ++ = *p1;
 665         continue;
 666       }
 667
 668      /*
 669       * Compose two input characters to one output character...
 670       */
 671
 672       hit ++;
 673       mp += 2;
 674       *p2 ++ = (cups_utf32_t) *mp;
 675       p1 ++;
 676     }
 677
 678     *p2 = 0;
 679     len = (int) (p2 - &work2[0]);
 680
 681    /*
 682     * Check for composition finished...
 683     */
 684
 685     if (hit == 0)
 686       break;
 687
 688     memcpy (work1, work2, sizeof(cups_utf32_t) * (len + 1));
 689   }
 690
 691   memcpy (dest, work1, sizeof(cups_utf32_t) * (len + 1));
 692
 693   cupsNormalizeMapsFree();
 694
 695   return (len);
 696 }
 697
 698
 699 /*
 700  * 'cupsUTF8CaseFold()' - Case fold UTF-8 string.
 701  *
 702  * Case Fold UTF-8 string per Unicode UAX-21 Section 2.3
 703  * Note - Case folding output is
 704  * unsafe for subsequent transcoding to legacy charsets
 705  */
 706
 707 int                                     /* O - Count or -1 on error */
 708 cupsUTF8CaseFold(
 709     cups_utf8_t          *dest,         /* O - Target string */
 710     const cups_utf8_t    *src,          /* I - Source string */
 711     const int            maxout,        /* I - Max output */
 712     const cups_folding_t fold)          /* I - Fold Mode */
 713 {
 714   int           len;                    /* String length */
 715   cups_utf32_t  work1[CUPS_MAX_USTRING];/* First internal UCS-4 string */
 716   cups_utf32_t  work2[CUPS_MAX_USTRING];/* Second internal UCS-4 string */
 717
 718
 719  /*
 720   * Check for valid arguments and clear output...
 721   */
 722
 723   if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
 724     return (-1);
 725
 726   *dest = 0;
 727
 728   if (fold != CUPS_FOLD_SIMPLE && fold != CUPS_FOLD_FULL)
 729     return (-1);
 730
 731  /*
 732   * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
 733   */
 734
 735   len = cupsUTF8ToUTF32(work1, src, CUPS_MAX_USTRING);
 736
 737   if (len < 0)
 738     return (-1);
 739
 740  /*
 741   * Case Fold internal UCS-4 to second internal UCS-4...
 742   */
 743
 744   len = cupsUTF32CaseFold(work2, work1, CUPS_MAX_USTRING, fold);
 745
 746   if (len < 0)
 747     return (-1);
 748
 749  /*
 750   * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
 751   */
 752
 753   len = cupsUTF32ToUTF8(dest, work2, maxout);
 754
 755   return (len);
 756 }
 757
 758
 759 /*
 760  * 'cupsUTF32CaseFold()' - Case fold UTF-32 string.
 761  *
 762  * Case Fold UTF-32 string per Unicode UAX-21 Section 2.3
 763  * Note - Case folding output is
 764  * unsafe for subsequent transcoding to legacy charsets
 765  */
 766
 767 int                                     /* O - Count or -1 on error */
 768 cupsUTF32CaseFold(
 769     cups_utf32_t         *dest,         /* O - Target string */
 770     const cups_utf32_t   *src,          /* I - Source string */
 771     const int            maxout,        /* I - Max output */
 772     const cups_folding_t fold)          /* I - Fold Mode */
 773 {
 774   cups_utf32_t          *start = dest;  /* Start of destination string */
 775   int                   i;              /* Looping variable */
 776   int                   result;         /* Result Value */
 777   cups_ucs2_t           *mp;            /* Map char pointer */
 778   _cups_fold_map_t      *fmap;          /* Unicode Case Folding Map */
 779   _cups_globals_t       *cg = _cupsGlobals();
 780                                         /* Pointer to library globals */
 781
 782
 783  /*
 784   * Check for valid arguments and clear output...
 785   */
 786
 787   if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
 788     return (-1);
 789
 790   *dest = 0;
 791
 792   if (fold != CUPS_FOLD_SIMPLE && fold != CUPS_FOLD_FULL)
 793     return (-1);
 794
 795  /*
 796   * Find case folding map...
 797   */
 798
 799   result = cupsNormalizeMapsGet();
 800
 801   if (result < 0)
 802     return (-1);
 803
 804   for (fmap = cg->foldmap_cache; fmap != NULL; fmap = fmap->next)
 805     if (fmap->fold == fold)
 806       break;
 807
 808   if (fmap == NULL)
 809     return (-1);
 810
 811  /*
 812   * Case fold input string to output string...
 813   */
 814
 815   for (i = 0; i < (maxout - 1); i ++, src ++)
 816   {
 817    /*
 818     * Check for case folding defined...
 819     */
 820
 821     mp = (cups_ucs2_t *)bsearch(src, fmap->uni2fold, fmap->foldcount,
 822                                 (sizeof(cups_ucs2_t) * 4), compare_foldchar);
 823     if (mp == NULL)
 824     {
 825       *dest ++ = *src;
 826       continue;
 827     }
 828
 829    /*
 830     * Case fold input character to one or two output characters...
 831     */
 832
 833     mp ++;
 834     *dest ++ = (cups_utf32_t) *mp ++;
 835
 836     if (*mp != 0 && fold == CUPS_FOLD_FULL)
 837     {
 838       i ++;
 839       if (i >= (maxout - 1))
 840         break;
 841
 842       *dest ++ = (cups_utf32_t) *mp;
 843     }
 844   }
 845
 846   *dest = 0;
 847
 848   cupsNormalizeMapsFree();
 849
 850   return ((int)(dest - start));
 851 }
 852
 853
 854 /*
 855  * 'cupsUTF8CompareCaseless()' - Compare case folded UTF-8 strings.
 856  */
 857
 858 int                                     /* O - Difference of strings */
 859 cupsUTF8CompareCaseless(
 860     const cups_utf8_t *s1,              /* I - String1 */
 861     const cups_utf8_t *s2)              /* I - String2 */
 862 {
 863   int           difference;             /* Difference of two strings */
 864   int           len;                    /* String length */
 865   cups_utf32_t  work1[CUPS_MAX_USTRING];/* First internal UCS-4 string */
 866   cups_utf32_t  work2[CUPS_MAX_USTRING];/* Second internal UCS-4 string */
 867
 868
 869  /*
 870   * Check for valid arguments...
 871   */
 872
 873   if (!s1 || !s2)
 874     return (-1);
 875
 876  /*
 877   * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
 878   */
 879
 880   len = cupsUTF8ToUTF32(work1, s1, CUPS_MAX_USTRING);
 881
 882   if (len < 0)
 883     return (-1);
 884
 885   len = cupsUTF8ToUTF32(work2, s2, CUPS_MAX_USTRING);
 886
 887   if (len < 0)
 888     return (-1);
 889
 890  /*
 891   * Compare first internal UCS-4 to second internal UCS-4...
 892   */
 893
 894   difference = cupsUTF32CompareCaseless(work1, work2);
 895
 896   return (difference);
 897 }
 898
 899
 900 /*
 901  * 'cupsUTF32CompareCaseless()' - Compare case folded UTF-32 strings.
 902  */
 903
 904 int                                     /* O - Difference of strings */
 905 cupsUTF32CompareCaseless(
 906     const cups_utf32_t *s1,             /* I - String1 */
 907     const cups_utf32_t *s2)             /* I - String2 */
 908 {
 909   int                   difference;     /* Difference of two strings */
 910   int                   len;            /* String length */
 911   cups_folding_t        fold = CUPS_FOLD_FULL;
 912                                         /* Case folding mode */
 913   cups_utf32_t          fold1[CUPS_MAX_USTRING];
 914                                         /* First UCS-4 folded string */
 915   cups_utf32_t          fold2[CUPS_MAX_USTRING];
 916                                         /* Second UCS-4 folded string */
 917   cups_utf32_t          *p1;            /* First UCS-4 string pointer */
 918   cups_utf32_t          *p2;            /* Second UCS-4 string pointer */
 919
 920
 921  /*
 922   * Check for valid arguments...
 923   */
 924
 925   if (!s1 || !s2)
 926     return (-1);
 927
 928  /*
 929   * Case Fold input UTF-32 strings to internal UCS-4 strings...
 930   */
 931
 932   len = cupsUTF32CaseFold(fold1, s1, CUPS_MAX_USTRING, fold);
 933
 934   if (len < 0)
 935     return (-1);
 936
 937   len = cupsUTF32CaseFold(fold2, s2, CUPS_MAX_USTRING, fold);
 938
 939   if (len < 0)
 940     return (-1);
 941
 942  /*
 943   * Compare first internal UCS-4 to second internal UCS-4...
 944   */
 945
 946   p1 = &fold1[0];
 947   p2 = &fold2[0];
 948
 949   for (;; p1 ++, p2 ++)
 950   {
 951     difference = (int) (*p1 - *p2);
 952
 953     if (difference != 0)
 954       break;
 955
 956     if ((*p1 == 0) && (*p2 == 0))
 957       break;
 958   }
 959
 960   return (difference);
 961 }
 962
 963
 964 /*
 965  * 'cupsUTF8CompareIdentifier()' - Compare folded NFKC UTF-8 strings.
 966  */
 967
 968 int                                     /* O - Result of comparison */
 969 cupsUTF8CompareIdentifier(
 970     const cups_utf8_t *s1,              /* I - String1 */
 971     const cups_utf8_t *s2)              /* I - String2 */
 972 {
 973   int           difference;             /* Difference of two strings */
 974   int           len;                    /* String length */
 975   cups_utf32_t  work1[CUPS_MAX_USTRING];/* First internal UCS-4 string */
 976   cups_utf32_t  work2[CUPS_MAX_USTRING];/* Second internal UCS-4 string */
 977
 978
 979  /*
 980   * Check for valid arguments...
 981   */
 982
 983   if (!s1 || !s2)
 984     return (-1);
 985
 986  /*
 987   * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
 988   */
 989
 990   len = cupsUTF8ToUTF32(work1, s1, CUPS_MAX_USTRING);
 991
 992   if (len < 0)
 993     return (-1);
 994
 995   len = cupsUTF8ToUTF32(work2, s2, CUPS_MAX_USTRING);
 996
 997   if (len < 0)
 998     return (-1);
 999
1000  /*
1001   * Compare first internal UCS-4 to second internal UCS-4...
1002   */
1003
1004   difference = cupsUTF32CompareIdentifier(work1, work2);
1005
1006   return (difference);
1007 }
1008
1009
1010 /*
1011  * 'cupsUTF32CompareIdentifier()' - Compare folded NFKC UTF-32 strings.
1012  */
1013
1014 int                                     /* O - Result of comparison */
1015 cupsUTF32CompareIdentifier(
1016     const cups_utf32_t *s1,             /* I - String1 */
1017     const cups_utf32_t *s2)             /* I - String2 */
1018 {
1019   int                   difference;     /* Difference of two strings */
1020   int                   len;            /* String length */
1021   cups_folding_t        fold = CUPS_FOLD_FULL;
1022                                         /* Case folding mode */
1023   cups_utf32_t          fold1[CUPS_MAX_USTRING];
1024                                         /* First UCS-4 folded string */
1025   cups_utf32_t          fold2[CUPS_MAX_USTRING];
1026                                         /* Second UCS-4 folded string */
1027   cups_normalize_t      normalize = CUPS_NORM_NFKC;
1028                                         /* Normalization form */
1029   cups_utf32_t          norm1[CUPS_MAX_USTRING];
1030                                         /* First UCS-4 normalized string */
1031   cups_utf32_t          norm2[CUPS_MAX_USTRING];
1032                                         /* Second UCS-4 normalized string */
1033   cups_utf32_t          *p1;            /* First UCS-4 string pointer */
1034   cups_utf32_t          *p2;            /* Second UCS-4 string pointer */
1035
1036
1037  /*
1038   * Check for valid arguments...
1039   */
1040
1041   if (!s1 || !s2)
1042     return (-1);
1043
1044  /*
1045   * Case Fold input UTF-32 strings to internal UCS-4 strings...
1046   */
1047
1048   len = cupsUTF32CaseFold(fold1, s1, CUPS_MAX_USTRING, fold);
1049
1050   if (len < 0)
1051     return (-1);
1052
1053   len = cupsUTF32CaseFold(fold2, s2, CUPS_MAX_USTRING, fold);
1054
1055   if (len < 0)
1056     return (-1);
1057
1058  /*
1059   * Normalize internal UCS-4 strings to NFKC...
1060   */
1061
1062   len = cupsUTF32Normalize(norm1, fold1, CUPS_MAX_USTRING, normalize);
1063
1064   if (len < 0)
1065     return (-1);
1066
1067   len = cupsUTF32Normalize(norm2, fold2, CUPS_MAX_USTRING, normalize);
1068
1069   if (len < 0)
1070     return (-1);
1071
1072  /*
1073   * Compare first internal UCS-4 to second internal UCS-4...
1074   */
1075
1076   p1 = &norm1[0];
1077   p2 = &norm2[0];
1078
1079   for (;; p1 ++, p2 ++)
1080   {
1081     difference = (int) (*p1 - *p2);
1082
1083     if (difference != 0)
1084       break;
1085
1086     if ((*p1 == 0) && (*p2 == 0))
1087       break;
1088   }
1089
1090   return (difference);
1091 }
1092
1093
1094 /*
1095  * 'cupsUTF32CharacterProperty()' - Get UTF-32 character property.
1096  */
1097
1098 int                                     /* O - Result of comparison */
1099 cupsUTF32CharacterProperty(
1100     const cups_utf32_t    ch,           /* I - Source char */
1101     const cups_property_t prop)         /* I - Char Property */
1102 {
1103   int   result;                         /* Result Value */
1104
1105
1106  /*
1107   * Check for valid arguments...
1108   */
1109
1110   if (ch == 0)
1111     return (-1);
1112
1113  /*
1114   * Find character property...
1115   */
1116
1117   switch (prop)
1118   {
1119     case CUPS_PROP_GENERAL_CATEGORY:
1120         result = (get_general_category(ch));
1121         break;
1122
1123     case CUPS_PROP_BIDI_CATEGORY:
1124         result = (get_bidi_category(ch));
1125         break;
1126
1127     case CUPS_PROP_COMBINING_CLASS:
1128         result = (get_combining_class(ch));
1129         break;
1130     case CUPS_PROP_BREAK_CLASS:
1131         result = (get_break_class(ch));
1132         break;
1133
1134     default:
1135         return (-1);
1136   }
1137
1138   return (result);
1139 }
1140
1141
1142 /*
1143  * 'get_general_category()' - Get UTF-32 Character General Category.
1144  */
1145
1146 static int                              /* O - Class or -1 on error */
1147 get_general_category(
1148     const cups_utf32_t ch)              /* I - Source char */
1149 {
1150   int                   result;         /* Result Value */
1151   cups_gencat_t         gencat;         /* General Category Value */
1152   _cups_prop_map_t      *pmap;          /* Unicode Property Map */
1153   _cups_prop_t          *uni2prop;      /* Unicode Char -> Properties */
1154   _cups_globals_t       *cg = _cupsGlobals();
1155                                         /* Pointer to library globals */
1156
1157
1158  /*
1159   * Check for valid argument...
1160   */
1161
1162   if (ch == 0)
1163     return (-1);
1164
1165  /*
1166   * Find property map...
1167   */
1168
1169   result = cupsNormalizeMapsGet();
1170
1171   if (result < 0)
1172     return (-1);
1173
1174   pmap = cg->propmap_cache;
1175
1176   if (pmap == NULL)
1177     return (-1);
1178
1179  /*
1180   * Find character in map...
1181   */
1182
1183   uni2prop = (_cups_prop_t *)bsearch(&ch, pmap->uni2prop, pmap->propcount,
1184                                     (sizeof(_cups_prop_t)), compare_propchar);
1185
1186   cupsNormalizeMapsFree();
1187
1188   if (uni2prop == NULL)
1189     gencat = CUPS_GENCAT_CN;            /* Other, Not Assigned */
1190   else
1191     gencat = (cups_gencat_t)uni2prop->gencat;
1192
1193   result = (int)gencat;
1194
1195   return (result);
1196 }
1197
1198
1199 /*
1200  * 'get_bidi_category()' - Get UTF-32 Character Bidi Category.
1201  */
1202
1203 static int                              /* O - Class or -1 on error */
1204 get_bidi_category(const cups_utf32_t ch)/* I - Source char */
1205 {
1206   int                   result;         /* Result Value */
1207   cups_bidi_t   bidicat;        /* Bidi Category Value */
1208   _cups_prop_map_t      *pmap;          /* Unicode Property Map */
1209   _cups_prop_t          *uni2prop;      /* Unicode Char -> Properties */
1210   _cups_globals_t       *cg = _cupsGlobals();
1211                                         /* Pointer to library globals */
1212
1213
1214  /*
1215   * Check for valid argument...
1216   */
1217
1218   if (ch == 0)
1219     return (-1);
1220
1221  /*
1222   * Find property map...
1223   */
1224
1225   result = cupsNormalizeMapsGet();
1226
1227   if (result < 0)
1228     return (-1);
1229
1230   pmap = cg->propmap_cache;
1231
1232   if (pmap == NULL)
1233     return (-1);
1234
1235  /*
1236   * Find character in map...
1237   */
1238
1239   uni2prop = (_cups_prop_t *)bsearch(&ch, pmap->uni2prop, pmap->propcount,
1240                                     (sizeof(_cups_prop_t)), compare_propchar);
1241
1242   cupsNormalizeMapsFree();
1243
1244   if (uni2prop == NULL)
1245     bidicat = CUPS_BIDI_ON;             /* Other Neutral */
1246   else
1247     bidicat = (cups_bidi_t)uni2prop->bidicat;
1248
1249   result = (int)bidicat;
1250
1251   return (result);
1252 }
1253
1254 /*
1255  * 'get_combining_class()' - Get UTF-32 Character Combining Class.
1256  *
1257  * Note - Zero is non-combining (base character)
1258  */
1259
1260 static int                              /* O - Class or -1 on error */
1261 get_combining_class(
1262     const cups_utf32_t ch)              /* I - Source char */
1263 {
1264   int                   result;         /* Result Value */
1265   _cups_comb_map_t      *cmap;          /* Unicode Combining Class Map */
1266   _cups_comb_class_t    combclass;      /* Unicode Combining Class */
1267   _cups_comb_t          *uni2comb;      /* Unicode Char -> Combining Class */
1268   _cups_globals_t       *cg = _cupsGlobals();
1269                                         /* Pointer to library globals */
1270
1271
1272  /*
1273   * Check for valid argument...
1274   */
1275
1276   if (ch == 0)
1277     return (-1);
1278
1279  /*
1280   * Find combining class map...
1281   */
1282
1283   result = cupsNormalizeMapsGet();
1284
1285   if (result < 0)
1286     return (-1);
1287
1288   cmap = cg->combmap_cache;
1289
1290   if (cmap == NULL)
1291     return (-1);
1292
1293  /*
1294   * Find combining character in map...
1295   */
1296
1297   uni2comb = (_cups_comb_t *)bsearch(&ch, cmap->uni2comb, cmap->combcount,
1298                                     (sizeof(_cups_comb_t)), compare_combchar);
1299
1300   cupsNormalizeMapsFree();
1301
1302   if (uni2comb == NULL)
1303     combclass = 0;
1304   else
1305     combclass = (_cups_comb_class_t)uni2comb->combclass;
1306
1307   result = (int)combclass;
1308
1309   return (result);
1310 }
1311
1312
1313 /*
1314  * 'get_break_class()' - Get UTF-32 Character Line Break Class.
1315  */
1316
1317 static int                              /* O - Class or -1 on error */
1318 get_break_class(const cups_utf32_t ch)  /* I - Source char */
1319 {
1320   int                   result;         /* Result Value */
1321   _cups_break_map_t     *bmap;          /* Unicode Line Break Class Map */
1322   cups_break_class_t    breakclass;     /* Unicode Line Break Class */
1323   cups_ucs2_t           *uni2break;     /* Unicode -> Line Break Class */
1324   _cups_globals_t       *cg = _cupsGlobals();
1325                                         /* Pointer to library globals */
1326
1327
1328  /*
1329   * Check for valid argument...
1330   */
1331
1332   if (ch == 0)
1333     return (-1);
1334
1335  /*
1336   * Find line break class map...
1337   */
1338
1339   result = cupsNormalizeMapsGet();
1340
1341   if (result < 0)
1342     return (-1);
1343
1344   bmap = cg->breakmap_cache;
1345
1346   if (bmap == NULL)
1347     return (-1);
1348
1349  /*
1350   * Find line break character in map...
1351   */
1352
1353   uni2break = (cups_ucs2_t *)bsearch(&ch, bmap->uni2break, bmap->breakcount,
1354                                      (sizeof(cups_ucs2_t) * 3),
1355                                      compare_breakchar);
1356
1357   cupsNormalizeMapsFree();
1358
1359   if (uni2break == NULL)
1360     breakclass = CUPS_BREAK_AI;
1361   else
1362     breakclass = (cups_break_class_t)*(uni2break + 2);
1363
1364   result = (int)breakclass;
1365
1366   return (result);
1367 }
1368
1369
1370 /*
1371  * 'get_map_count()' - Count lines in a map file.
1372  */
1373
1374 static int                              /* O - Count or -1 on error */
1375 get_map_count(const char *filename)     /* I - Map Filename */
1376 {
1377   int           i;                      /* Looping variable */
1378   cups_file_t   *fp;                    /* Map input file pointer */
1379   char          *s;                     /* Line parsing pointer */
1380   char          line[256];              /* Line from input map file */
1381   cups_utf32_t  unichar;                /* Unicode character value */
1382
1383
1384  /*
1385   * Open map input file...
1386   */
1387
1388   if (!filename || !*filename)
1389     return (-1);
1390
1391   fp = cupsFileOpen(filename, "r");
1392   if (fp == NULL)
1393     return (-1);
1394
1395  /*
1396   * Count lines in map input file...
1397   */
1398
1399   for (i = 0; i < 50000;)
1400   {
1401     s = cupsFileGets(fp, line, sizeof(line));
1402     if (s == NULL)
1403       break;
1404     if ((*s == '#') || (*s == '\n') || (*s == '\0'))
1405       continue;
1406     if (strncmp (s, "0x", 2) == 0)
1407       s += 2;
1408     if (sscanf(s, "%lx", &unichar) != 1)
1409       break;
1410     if (unichar > 0xffff)
1411       break;
1412     i ++;
1413   }
1414   if (i == 0)
1415     i = -1;
1416
1417  /*
1418   * Close file and return map count (non-comment line count)...
1419   */
1420
1421   cupsFileClose(fp);
1422
1423   return (i);
1424 }
1425
1426
1427 /*
1428  * 'get_normmap()' - Get Unicode normalization map to cache.
1429  */
1430
1431 static int                              /* O - Zero or -1 on error */
1432 get_normmap(
1433     const cups_normalize_t normalize)   /* I - Normalization Form */
1434 {
1435   int                   i;              /* Looping variable */
1436   cups_utf32_t          unichar1;       /* Unicode character value */
1437   cups_utf32_t          unichar2;       /* Unicode character value */
1438   cups_utf32_t          unichar3;       /* Unicode character value */
1439   _cups_norm_map_t      *nmap;          /* Unicode Normalization Map */
1440   int                   normcount;      /* Count of Unicode Source Chars */
1441   cups_ucs2_t           *uni2norm;      /* Unicode Char -> Normalization */
1442   char                  *mapname;       /* Normalization map name */
1443   char                  filename[1024]; /* Filename for charset map file */
1444   cups_file_t           *fp;            /* Normalization map file pointer */
1445   char                  *s;             /* Line parsing pointer */
1446   char                  line[256];      /* Line from input map file */
1447   _cups_globals_t       *cg = _cupsGlobals();
1448                                         /* Pointer to library globals */
1449
1450
1451  /*
1452   * See if we already have this normalization map loaded...
1453   */
1454
1455   for (nmap = cg->normmap_cache; nmap != NULL; nmap = nmap->next)
1456     if (nmap->normalize == normalize)
1457       return (0);
1458
1459  /*
1460   * Get the mapping name...
1461   */
1462
1463   switch (normalize)
1464   {
1465     case CUPS_NORM_NFD:         /* Canonical Decomposition */
1466         mapname = "uni-nfd.txt";
1467         break;
1468
1469     case CUPS_NORM_NFKD:        /* Compatibility Decomposition */
1470         mapname = "uni-nfkd.txt";
1471         break;
1472
1473     case CUPS_NORM_NFC:         /* Canonical Composition */
1474         mapname = "uni-nfc.txt";
1475         break;
1476
1477     case CUPS_NORM_NFKC:        /* no such map file... */
1478     default:
1479         return (-1);
1480   }
1481
1482  /*
1483   * Open normalization map input file...
1484   */
1485
1486   snprintf(filename, sizeof(filename), "%s/charmaps/%s",
1487            cg->cups_datadir, mapname);
1488   if ((normcount = get_map_count(filename)) <= 0)
1489     return (-1);
1490
1491   fp = cupsFileOpen(filename, "r");
1492   if (fp == NULL)
1493     return (-1);
1494
1495  /*
1496   * Allocate memory for normalization map and add to cache...
1497   */
1498
1499   nmap = (_cups_norm_map_t *)calloc(1, sizeof(_cups_norm_map_t));
1500   if (nmap == NULL)
1501   {
1502     cupsFileClose(fp);
1503     return (-1);
1504   }
1505
1506   uni2norm = (cups_ucs2_t *)calloc(1, sizeof(cups_ucs2_t) * 3 * normcount);
1507   if (uni2norm == NULL)
1508   {
1509     free(nmap);
1510     cupsFileClose(fp);
1511     return (-1);
1512   }
1513   nmap->next = cg->normmap_cache;
1514   cg->normmap_cache = nmap;
1515   nmap->used ++;
1516   nmap->normalize = normalize;
1517   nmap->normcount = normcount;
1518   nmap->uni2norm = uni2norm;
1519
1520  /*
1521   * Save normalization map into memory for later use...
1522   */
1523   for (i = 0; i < normcount; )
1524   {
1525     s = cupsFileGets(fp, line, sizeof(line));
1526     if (s == NULL)
1527       break;
1528     if ((*s == '#') || (*s == '\n') || (*s == '\0'))
1529       continue;
1530     if (sscanf(s, "%lx %lx %lx", &unichar1, &unichar2, &unichar3) != 3)
1531        break;
1532     if ((unichar1 > 0xffff)
1533     || (unichar2 > 0xffff)
1534     || (unichar3 > 0xffff))
1535       break;
1536     *uni2norm ++ = (cups_ucs2_t) unichar1;
1537     *uni2norm ++ = (cups_ucs2_t) unichar2;
1538     *uni2norm ++ = (cups_ucs2_t) unichar3;
1539     i ++;
1540   }
1541   if (i < normcount)
1542     nmap->normcount = i;
1543   cupsFileClose(fp);
1544   return (0);
1545 }
1546
1547
1548 /*
1549  * 'get_foldmap()' - Get Unicode case folding map to cache.
1550  */
1551
1552 static int                              /* O - Zero or -1 on error */
1553 get_foldmap(const cups_folding_t fold)  /* I - Case folding type */
1554 {
1555   int                   i;              /* Looping variable */
1556   cups_utf32_t          unichar1;       /* Unicode character value */
1557   cups_utf32_t          unichar2;       /* Unicode character value */
1558   cups_utf32_t          unichar3;       /* Unicode character value */
1559   cups_utf32_t          unichar4;       /* Unicode character value */
1560   _cups_fold_map_t      *fmap;          /* Unicode Case Folding Map */
1561   int                   foldcount;      /* Count of Unicode Source Chars */
1562   cups_ucs2_t           *uni2fold;      /* Unicode -> Folded Char(s) */
1563   char                  *mapname;       /* Case Folding map name */
1564   char                  filename[1024]; /* Filename for charset map file */
1565   cups_file_t           *fp;            /* Case Folding map file pointer */
1566   char                  *s;             /* Line parsing pointer */
1567   char                  line[256];      /* Line from input map file */
1568   _cups_globals_t       *cg = _cupsGlobals();
1569                                         /* Pointer to library globals */
1570
1571
1572  /*
1573   * See if we already have this case folding map loaded...
1574   */
1575
1576   for (fmap = cg->foldmap_cache; fmap != NULL; fmap = fmap->next)
1577     if (fmap->fold == fold)
1578       return (0);
1579
1580  /*
1581   * Get the mapping name...
1582   */
1583
1584   switch (fold)
1585   {
1586     case CUPS_FOLD_SIMPLE:      /* Simple case folding */
1587         mapname = "uni-fold.txt";
1588         break;
1589     case CUPS_FOLD_FULL:        /* Full case folding */
1590         mapname = "uni-full.txt";
1591         break;
1592     default:
1593         return (-1);
1594   }
1595
1596  /*
1597   * Open case folding map input file...
1598   */
1599
1600   snprintf(filename, sizeof(filename), "%s/charmaps/%s",
1601            cg->cups_datadir, mapname);
1602   if ((foldcount = get_map_count(filename)) <= 0)
1603     return (-1);
1604   fp = cupsFileOpen(filename, "r");
1605   if (fp == NULL)
1606     return (-1);
1607
1608  /*
1609   * Allocate memory for case folding map and add to cache...
1610   */
1611   fmap = (_cups_fold_map_t *)calloc(1, sizeof(_cups_fold_map_t));
1612   if (fmap == NULL)
1613   {
1614     cupsFileClose(fp);
1615     return (-1);
1616   }
1617   uni2fold = (cups_ucs2_t *)calloc(1, sizeof(cups_ucs2_t) * 4 * foldcount);
1618   if (uni2fold == NULL)
1619   {
1620     free(fmap);
1621     cupsFileClose(fp);
1622     return (-1);
1623   }
1624   fmap->next = cg->foldmap_cache;
1625   cg->foldmap_cache = fmap;
1626   fmap->used ++;
1627   fmap->fold = fold;
1628   fmap->foldcount = foldcount;
1629   fmap->uni2fold = uni2fold;
1630
1631  /*
1632   * Save case folding map into memory for later use...
1633   */
1634
1635   for (i = 0; i < foldcount; )
1636   {
1637     s = cupsFileGets(fp, line, sizeof(line));
1638     if (s == NULL)
1639       break;
1640     if ((*s == '#') || (*s == '\n') || (*s == '\0'))
1641       continue;
1642     unichar1 = unichar2 = unichar3 = unichar4 = 0;
1643     if ((fold == CUPS_FOLD_SIMPLE)
1644     && (sscanf(s, "%lx %lx", &unichar1, &unichar2) != 2))
1645       break;
1646     if ((fold == CUPS_FOLD_FULL)
1647     && (sscanf(s, "%lx %lx %lx %lx",
1648                &unichar1, &unichar2, &unichar3, &unichar4) != 4))
1649       break;
1650     if ((unichar1 > 0xffff)
1651     || (unichar2 > 0xffff)
1652     || (unichar3 > 0xffff)
1653     || (unichar4 > 0xffff))
1654       break;
1655     *uni2fold ++ = (cups_ucs2_t) unichar1;
1656     *uni2fold ++ = (cups_ucs2_t) unichar2;
1657     *uni2fold ++ = (cups_ucs2_t) unichar3;
1658     *uni2fold ++ = (cups_ucs2_t) unichar4;
1659     i ++;
1660   }
1661   if (i < foldcount)
1662     fmap->foldcount = i;
1663   cupsFileClose(fp);
1664   return (0);
1665 }
1666
1667 /*
1668  * 'get_propmap()' - Get Unicode character property map to cache.
1669  */
1670
1671 static int                              /* O - Zero or -1 on error */
1672 get_propmap(void)
1673 {
1674   int                   i, j;           /* Looping variables */
1675   size_t                len;            /* String length */
1676   cups_utf32_t          unichar;        /* Unicode character value */
1677   cups_gencat_t         gencat;         /* General Category Value */
1678   cups_bidi_t   bidicat;        /* Bidi Category Value */
1679   _cups_prop_map_t      *pmap;          /* Unicode Char Property Map */
1680   int                   propcount;      /* Count of Unicode Source Chars */
1681   _cups_prop_t          *uni2prop;      /* Unicode Char -> Properties */
1682   char                  *mapname;       /* Char Property map name */
1683   char                  filename[1024]; /* Filename for charset map file */
1684   cups_file_t           *fp;            /* Char Property map file pointer */
1685   char                  *s;             /* Line parsing pointer */
1686   char                  line[256];      /* Line from input map file */
1687   _cups_globals_t       *cg = _cupsGlobals();
1688                                         /* Pointer to library globals */
1689
1690
1691  /*
1692   * See if we already have this char properties map loaded...
1693   */
1694
1695   if ((pmap = cg->propmap_cache) != NULL)
1696     return (0);
1697
1698  /*
1699   * Get the mapping name...
1700   */
1701
1702   mapname = "uni-prop.txt";
1703
1704  /*
1705   * Open char properties map input file...
1706   */
1707   snprintf(filename, sizeof(filename), "%s/charmaps/%s",
1708            cg->cups_datadir, mapname);
1709   if ((propcount = get_map_count(filename)) <= 0)
1710     return (-1);
1711   fp = cupsFileOpen(filename, "r");
1712   if (fp == NULL)
1713     return (-1);
1714
1715  /*
1716   * Allocate memory for char properties map and add to cache...
1717   */
1718   pmap = (_cups_prop_map_t *)calloc(1, sizeof(_cups_prop_map_t));
1719   if (pmap == NULL)
1720   {
1721     cupsFileClose(fp);
1722     return (-1);
1723   }
1724   uni2prop = (_cups_prop_t *)calloc(1, sizeof(_cups_prop_t) * propcount);
1725   if (uni2prop == NULL)
1726   {
1727     free(pmap);
1728     cupsFileClose(fp);
1729     return (-1);
1730   }
1731   cg->propmap_cache = pmap;
1732   pmap->used ++;
1733   pmap->propcount = propcount;
1734   pmap->uni2prop = uni2prop;
1735
1736  /*
1737   * Save char properties map into memory for later use...
1738   */
1739   for (i = 0; i < propcount; )
1740   {
1741     s = cupsFileGets(fp, line, sizeof(line));
1742     if (s == NULL)
1743       break;
1744     if (strlen(s) > 0)
1745       *(s + strlen(s) - 1) = '\0';
1746     if ((*s == '#') || (*s == '\n') || (*s == '\0'))
1747       continue;
1748     if (sscanf(s, "%lx", &unichar) != 1)
1749        break;
1750     if (unichar > 0xffff)
1751       break;
1752     while ((*s != '\0') && (*s != ';'))
1753       s ++;
1754     if (*s != ';')
1755       break;
1756     s ++;
1757     for (j = 0; gencat_index[j].str != NULL; j ++)
1758     {
1759       len = strlen(gencat_index[j].str);
1760       if (strncmp (s, gencat_index[j].str, len) == 0)
1761         break;
1762     }
1763     if (gencat_index[j].str == NULL)
1764       return (-1);
1765     gencat = gencat_index[j].gencat;
1766     while ((*s != '\0') && (*s != ';'))
1767       s ++;
1768     if (*s != ';')
1769       break;
1770     s ++;
1771     for (j = 0; bidicat_index[j] != NULL; j ++)
1772     {
1773       len = strlen(bidicat_index[j]);
1774       if (strncmp (s, bidicat_index[j], len) == 0)
1775         break;
1776     }
1777     if (bidicat_index[j] == NULL)
1778       return (-1);
1779     bidicat = (cups_bidi_t) j;
1780     uni2prop->ch = (cups_ucs2_t) unichar;
1781     uni2prop->gencat = (unsigned char) gencat;
1782     uni2prop->bidicat = (unsigned char) bidicat;
1783     uni2prop ++;
1784     i ++;
1785   }
1786   if (i < propcount)
1787     pmap->propcount = i;
1788   cupsFileClose(fp);
1789   return (0);
1790 }
1791
1792
1793 /*
1794  * 'get_combmap()' - Get Unicode combining class map to cache.
1795  */
1796
1797 static int                              /* O - Zero or -1 on error */
1798 get_combmap(void)
1799 {
1800   int                   i;              /* Looping variable */
1801   cups_utf32_t          unichar;        /* Unicode character value */
1802   int                   combclass;      /* Unicode char combining class */
1803   _cups_comb_map_t      *cmap;          /* Unicode Comb Class Map */
1804   int                   combcount;      /* Count of Unicode Source Chars */
1805   _cups_comb_t          *uni2comb;      /* Unicode Char -> Combining Class */
1806   char                  *mapname;       /* Comb Class map name */
1807   char                  filename[1024]; /* Filename for charset map file */
1808   cups_file_t           *fp;            /* Comb Class map file pointer */
1809   char                  *s;             /* Line parsing pointer */
1810   char                  line[256];      /* Line from input map file */
1811   _cups_globals_t       *cg = _cupsGlobals();
1812                                         /* Pointer to library globals */
1813
1814
1815  /*
1816   * See if we already have this combining class map loaded...
1817   */
1818
1819   if ((cmap = cg->combmap_cache) != NULL)
1820     return (0);
1821
1822  /*
1823   * Get the mapping name...
1824   */
1825
1826   mapname = "uni-comb.txt";
1827
1828  /*
1829   * Open combining class map input file...
1830   */
1831
1832   snprintf(filename, sizeof(filename), "%s/charmaps/%s",
1833            cg->cups_datadir, mapname);
1834   if ((combcount = get_map_count(filename)) <= 0)
1835     return (-1);
1836   fp = cupsFileOpen(filename, "r");
1837   if (fp == NULL)
1838     return (-1);
1839
1840  /*
1841   * Allocate memory for combining class map and add to cache...
1842   */
1843
1844   cmap = (_cups_comb_map_t *)calloc(1, sizeof(_cups_comb_map_t));
1845   if (cmap == NULL)
1846   {
1847     cupsFileClose(fp);
1848     return (-1);
1849   }
1850
1851   uni2comb = (_cups_comb_t *)calloc(1, sizeof(_cups_comb_t) * combcount);
1852   if (uni2comb == NULL)
1853   {
1854     free(cmap);
1855     cupsFileClose(fp);
1856     return (-1);
1857   }
1858   cg->combmap_cache = cmap;
1859   cmap->used ++;
1860   cmap->combcount = combcount;
1861   cmap->uni2comb = uni2comb;
1862
1863  /*
1864   * Save combining class map into memory for later use...
1865   */
1866   for (i = 0; i < combcount; )
1867   {
1868     s = cupsFileGets(fp, line, sizeof(line));
1869     if (s == NULL)
1870       break;
1871     if ((*s == '#') || (*s == '\n') || (*s == '\0'))
1872       continue;
1873     if (sscanf(s, "%lx", &unichar) != 1)
1874        break;
1875     if (unichar > 0xffff)
1876       break;
1877     while ((*s != '\0') && (*s != ';'))
1878       s ++;
1879     if (*s != ';')
1880       break;
1881     s ++;
1882     if (sscanf(s, "%d", &combclass) != 1)
1883        break;
1884     uni2comb->ch = (cups_ucs2_t) unichar;
1885     uni2comb->combclass = (unsigned char) combclass;
1886     uni2comb ++;
1887     i ++;
1888   }
1889   if (i < combcount)
1890     cmap->combcount = i;
1891   cupsFileClose(fp);
1892   return (0);
1893 }
1894
1895
1896 /*
1897  * 'get_breakmap()' - Get Unicode line break class map to cache.
1898  */
1899
1900 static int                              /* O - Zero or -1 on error */
1901 get_breakmap(void)
1902 {
1903   int                   i, j;           /* Looping variables */
1904   int                   len;            /* String length */
1905   cups_utf32_t          unichar1;       /* Unicode character value */
1906   cups_utf32_t          unichar2;       /* Unicode character value */
1907   cups_break_class_t    breakclass;     /* Unicode char line break class */
1908   _cups_break_map_t     *bmap;          /* Unicode Line Break Class Map */
1909   int                   breakcount;     /* Count of Unicode Source Chars */
1910   cups_ucs2_t           *uni2break;     /* Unicode -> Line Break Class */
1911   char                  *mapname;       /* Comb Class map name */
1912   char                  filename[1024]; /* Filename for charset map file */
1913   cups_file_t           *fp;            /* Comb Class map file pointer */
1914   char                  *s;             /* Line parsing pointer */
1915   char                  line[256];      /* Line from input map file */
1916   _cups_globals_t       *cg = _cupsGlobals();
1917                                         /* Pointer to library globals */
1918
1919
1920  /*
1921   * See if we already have this line break class map loaded...
1922   */
1923
1924   if ((bmap = cg->breakmap_cache) != NULL)
1925     return (0);
1926
1927  /*
1928   * Get the mapping name...
1929   */
1930
1931   mapname = "uni-line.txt";
1932
1933  /*
1934   * Open line break class map input file...
1935   */
1936
1937   snprintf(filename, sizeof(filename), "%s/charmaps/%s",
1938            cg->cups_datadir, mapname);
1939   if ((breakcount = get_map_count(filename)) <= 0)
1940     return (-1);
1941   fp = cupsFileOpen(filename, "r");
1942   if (fp == NULL)
1943     return (-1);
1944
1945  /*
1946   * Allocate memory for line break class map and add to cache...
1947   */
1948
1949   bmap = (_cups_break_map_t *)calloc(1, sizeof(_cups_break_map_t));
1950   if (bmap == NULL)
1951   {
1952     cupsFileClose(fp);
1953     return (-1);
1954   }
1955
1956   uni2break = (cups_ucs2_t *)calloc(1, sizeof(cups_ucs2_t) * 3 * breakcount);
1957   if (uni2break == NULL)
1958   {
1959     free(bmap);
1960     cupsFileClose(fp);
1961     return (-1);
1962   }
1963   cg->breakmap_cache = bmap;
1964   bmap->used ++;
1965   bmap->breakcount = breakcount;
1966   bmap->uni2break = uni2break;
1967
1968  /*
1969   * Save line break class map into memory for later use...
1970   */
1971   for (i = 0; i < breakcount; )
1972   {
1973     s = cupsFileGets(fp, line, sizeof(line));
1974     if (s == NULL)
1975       break;
1976     if (strlen(s) > 0)
1977       *(s + strlen(s) - 1) = '\0';
1978     if ((*s == '#') || (*s == '\n') || (*s == '\0'))
1979       continue;
1980     if (sscanf(s, "%lx %lx", &unichar1, &unichar2) != 2)
1981        break;
1982     if ((unichar1 > 0xffff)
1983     || (unichar2 > 0xffff))
1984       break;
1985     while ((*s != '\0') && (*s != ';'))
1986       s ++;
1987     if (*s != ';')
1988       break;
1989     s ++;
1990     for (j = 0; break_index[j].str != NULL; j ++)
1991     {
1992       len = strlen (break_index[j].str);
1993       if (strncmp (s, break_index[j].str, len) == 0)
1994         break;
1995     }
1996     if (break_index[j].str == NULL)
1997       return (-1);
1998     breakclass = break_index[j].breakclass;
1999     *uni2break ++ = (cups_ucs2_t) unichar1;
2000     *uni2break ++ = (cups_ucs2_t) unichar2;
2001     *uni2break ++ = (cups_ucs2_t) breakclass;
2002     i ++;
2003   }
2004   if (i < breakcount)
2005     bmap->breakcount = i;
2006   cupsFileClose(fp);
2007   return (0);
2008 }
2009
2010
2011 /*
2012  * 'compare_compose()' - Compare key for compose match.
2013  *
2014  * Note - This function cannot be easily modified for 32-bit Unicode.
2015  */
2016
2017 static int                              /* O - Result of comparison */
2018 compare_compose(const void *k1,         /* I - Key char */
2019                 const void *k2)         /* I - Map char */
2020 {
2021   cups_utf32_t  *kp = (cups_utf32_t *)k1;
2022                                         /* Key char pointer */
2023   cups_ucs2_t   *mp = (cups_ucs2_t *)k2;/* Map char pointer */
2024   unsigned long key;                    /* Pair of key characters */
2025   unsigned long map;                    /* Pair of map characters */
2026   int           result;                 /* Result Value */
2027
2028
2029   key = (*kp << 16);
2030   key |= *(kp + 1);
2031   map = (unsigned long) (*mp << 16);
2032   map |= (unsigned long) *(mp + 1);
2033
2034   if (key >= map)
2035     result = (int) (key - map);
2036   else
2037     result = -1 * ((int) (map - key));
2038
2039   return (result);
2040 }
2041
2042
2043 /*
2044  * 'compare_decompose()' - Compare key for decompose match.
2045  */
2046
2047 static int                              /* O - Result of comparison */
2048 compare_decompose(const void *k1,       /* I - Key char */
2049                   const void *k2)       /* I - Map char */
2050 {
2051   cups_utf32_t  *kp = (cups_utf32_t *)k1;
2052                                         /* Key char pointer */
2053   cups_ucs2_t   *mp = (cups_ucs2_t *)k2;/* Map char pointer */
2054   cups_ucs2_t   ch;                     /* Key char as UCS-2 */
2055   int           result;                 /* Result Value */
2056
2057
2058   ch = (cups_ucs2_t) *kp;
2059
2060   if (ch >= *mp)
2061     result = (int) (ch - *mp);
2062   else
2063     result = -1 * ((int) (*mp - ch));
2064
2065   return (result);
2066 }
2067
2068
2069 /*
2070  * 'compare_foldchar()' - Compare key for case fold match.
2071  */
2072
2073 static int                              /* O - Result of comparison */
2074 compare_foldchar(const void *k1,        /* I - Key char */
2075                  const void *k2)        /* I - Map char */
2076 {
2077   cups_utf32_t  *kp = (cups_utf32_t *)k1;
2078                                         /* Key char pointer */
2079   cups_ucs2_t   *mp = (cups_ucs2_t *)k2;/* Map char pointer */
2080   cups_ucs2_t   ch;                     /* Key char as UCS-2 */
2081   int           result;                 /* Result Value */
2082
2083
2084   ch = (cups_ucs2_t) *kp;
2085
2086   if (ch >= *mp)
2087     result = (int) (ch - *mp);
2088   else
2089     result = -1 * ((int) (*mp - ch));
2090
2091   return (result);
2092 }
2093
2094
2095 /*
2096  * 'compare_combchar()' - Compare key for combining char match.
2097  */
2098
2099 static int                              /* O - Result of comparison */
2100 compare_combchar(const void *k1,        /* I - Key char */
2101                  const void *k2)        /* I - Map char */
2102 {
2103   cups_utf32_t  *kp = (cups_utf32_t *)k1;
2104                                         /* Key char pointer */
2105   _cups_comb_t  *cp = (_cups_comb_t *)k2;/* Combining map row pointer */
2106   cups_ucs2_t   ch;                     /* Key char as UCS-2 */
2107   int           result;                 /* Result Value */
2108
2109
2110   ch = (cups_ucs2_t) *kp;
2111
2112   if (ch >= cp->ch)
2113     result = (int) (ch - cp->ch);
2114   else
2115     result = -1 * ((int) (cp->ch - ch));
2116
2117   return (result);
2118 }
2119
2120
2121 /*
2122  * 'compare_breakchar()' - Compare key for line break char match.
2123  */
2124
2125 static int                              /* O - Result of comparison */
2126 compare_breakchar(const void *k1,       /* I - Key char */
2127                   const void *k2)       /* I - Map char */
2128 {
2129   cups_utf32_t  *kp = (cups_utf32_t *)k1;
2130                                         /* Key char pointer */
2131   cups_ucs2_t   *mp = (cups_ucs2_t *)k2;/* Map char pointer */
2132   cups_ucs2_t   ch;                     /* Key char as UCS-2 */
2133   int           result;                 /* Result Value */
2134
2135
2136   ch = (cups_ucs2_t) *kp;
2137
2138   if (ch < *mp)
2139     result = -1 * (int) (*mp - ch);
2140   else if (ch > *(mp + 1))
2141     result = (int) (ch - *(mp + 1));
2142   else
2143     result = 0;
2144
2145   return (result);
2146 }
2147
2148
2149 /*
2150  * 'compare_propchar()' - Compare key for property char match.
2151  */
2152
2153 static int                              /* O - Result of comparison */
2154 compare_propchar(const void *k1,        /* I - Key char */
2155                  const void *k2)        /* I - Map char */
2156 {
2157   cups_utf32_t  *kp = (cups_utf32_t *)k1;
2158                                         /* Key char pointer */
2159   _cups_prop_t  *pp = (_cups_prop_t *)k2;/* Property map row pointer */
2160   cups_ucs2_t   ch;                     /* Key char as UCS-2 */
2161   int           result;                 /* Result Value */
2162
2163
2164   ch = (cups_ucs2_t) *kp;
2165
2166   if (ch >= pp->ch)
2167     result = (int) (ch - pp->ch);
2168   else
2169     result = -1 * ((int) (pp->ch - ch));
2170
2171   return (result);
2172 }
2173
2174
2175 /*
2176  * End of "$Id: normalize.c 4903 2006-01-10 20:02:46Z mike $"
2177  */