cups/normalize.c

   1 /*
   2  * "$Id: normalize.c 4967 2006-01-24 03:42:15Z mike $"
   3  *
   4  *   Unicode normalization for the Common UNIX Printing System (CUPS).
   5  *
   6  *   Copyright 1997-2006 by Easy Software Products.
   7  *
   8  *   These coded instructions, statements, and computer programs are
   9  *   the property of Easy Software Products and are protected by Federal
  10  *   copyright law.  Distribution and use rights are outlined in the
  11  *   file "LICENSE.txt" which should have been included with this file.
  12  *   If this file is missing or damaged please contact Easy Software
  13  *   Products at:
  14  *
  15  *       Attn: CUPS Licensing Information
  16  *       Easy Software Products
  17  *       44141 Airport View Drive, Suite 204
  18  *       Hollywood, Maryland 20636 USA
  19  *
  20  *       Voice: (301) 373-9600
  21  *       EMail: cups-info@cups.org
  22  *         WWW: http://www.cups.org
  23  *
  24  * Contents:
  25  *
  26  *   cupsNormalizeMapsGet()       - Get all norm maps to cache.
  27  *   cupsNormalizeMapsFree()      - Free all norm maps in cache.
  28  *   cupsNormalizeMapsFlush()     - Flush all norm maps in cache.
  29  *   _cupsNormalizeMapsFlush()    - Flush all normalization maps in cache.
  30  *   cupsUTF8Normalize()          - Normalize UTF-8 string.
  31  *   cupsUTF32Normalize()         - Normalize UTF-32 string.
  32  *   cupsUTF8CaseFold()           - Case fold UTF-8 string.
  33  *   cupsUTF32CaseFold()          - Case fold UTF-32 string.
  34  *   cupsUTF8CompareCaseless()    - Compare case folded UTF-8 strings.
  35  *   cupsUTF32CompareCaseless()   - Compare case folded UTF-32 strings.
  36  *   cupsUTF8CompareIdentifier()  - Compare folded NFKC UTF-8 strings.
  37  *   cupsUTF32CompareIdentifier() - Compare folded NFKC UTF-32 strings.
  38  *   cupsUTF32CharacterProperty() - Get UTF-32 character property.
  39  *   get_general_category()       - Get UTF-32 Char General Category.
  40  *   get_bidi_category()          - Get UTF-32 Char Bidi Category.
  41  *   get_combining_class()        - Get UTF-32 Char Combining Class.
  42  *   get_break_class()            - Get UTF-32 Char Line Break Class.
  43  *   get_map_count()              - Count lines in a map file.
  44  *   get_normmap()                - Get Unicode norm map to cache.
  45  *   get_foldmap()                - Get Unicode casefold map to cache.
  46  *   get_propmap()                - Get Unicode property map to cache.
  47  *   get_combmap()                - Get Unicode combining map to cache.
  48  *   get_breakmap()               - Get Unicode break map to cache.
  49  *   compare_compose()            - Compare key for compose match.
  50  *   compare_decompose()          - Compare key for decompose match.
  51  *   compare_foldchar()           - Compare key for case fold match.
  52  *   compare_combchar()           - Compare key for combining match.
  53  *   compare_breakchar()          - Compare key for line break match.
  54  *   compare_propchar()           - Compare key for property char match.
  55  */
  56
  57 /*
  58  * Include necessary headers...
  59  */
  60
  61 #include "globals.h"
  62 #include "debug.h"
  63 #include <stdlib.h>
  64 #include <errno.h>
  65 #include <time.h>
  66
  67
  68 typedef struct                          /**** General Category Index Struct****/
  69 {
  70   cups_gencat_t gencat;                 /* General Category Value */
  71   const char    *str;                   /* General Category String */
  72 } gencat_t;
  73
  74 static const gencat_t gencat_index[] =  /* General Category Index */
  75 {
  76   { CUPS_GENCAT_LU, "Lu" },             /* Letter, Uppercase */
  77   { CUPS_GENCAT_LL, "Ll" },             /* Letter, Lowercase */
  78   { CUPS_GENCAT_LT, "Lt" },             /* Letter, Titlecase */
  79   { CUPS_GENCAT_LM, "Lm" },             /* Letter, Modifier */
  80   { CUPS_GENCAT_LO, "Lo" },             /* Letter, Other */
  81   { CUPS_GENCAT_MN, "Mn" },             /* Mark, Non-Spacing */
  82   { CUPS_GENCAT_MC, "Mc" },             /* Mark, Spacing Combining */
  83   { CUPS_GENCAT_ME, "Me" },             /* Mark, Enclosing */
  84   { CUPS_GENCAT_ND, "Nd" },             /* Number, Decimal Digit */
  85   { CUPS_GENCAT_NL, "Nl" },             /* Number, Letter */
  86   { CUPS_GENCAT_NO, "No" },             /* Number, Other */
  87   { CUPS_GENCAT_PC, "Pc" },             /* Punctuation, Connector */
  88   { CUPS_GENCAT_PD, "Pd" },             /* Punctuation, Dash */
  89   { CUPS_GENCAT_PS, "Ps" },             /* Punctuation, Open (start) */
  90   { CUPS_GENCAT_PE, "Pe" },             /* Punctuation, Close (end) */
  91   { CUPS_GENCAT_PI, "Pi" },             /* Punctuation, Initial Quote */
  92   { CUPS_GENCAT_PF, "Pf" },             /* Punctuation, Final Quote */
  93   { CUPS_GENCAT_PO, "Po" },             /* Punctuation, Other */
  94   { CUPS_GENCAT_SM, "Sm" },             /* Symbol, Math */
  95   { CUPS_GENCAT_SC, "Sc" },             /* Symbol, Currency */
  96   { CUPS_GENCAT_SK, "Sk" },             /* Symbol, Modifier */
  97   { CUPS_GENCAT_SO, "So" },             /* Symbol, Other */
  98   { CUPS_GENCAT_ZS, "Zs" },             /* Separator, Space */
  99   { CUPS_GENCAT_ZL, "Zl" },             /* Separator, Line */
 100   { CUPS_GENCAT_ZP, "Zp" },             /* Separator, Paragraph */
 101   { CUPS_GENCAT_CC, "Cc" },             /* Other, Control */
 102   { CUPS_GENCAT_CF, "Cf" },             /* Other, Format */
 103   { CUPS_GENCAT_CS, "Cs" },             /* Other, Surrogate */
 104   { CUPS_GENCAT_CO, "Co" },             /* Other, Private Use */
 105   { CUPS_GENCAT_CN, "Cn" },             /* Other, Not Assigned */
 106   { 0, NULL }
 107 };
 108
 109 static const char * const bidicat_index[] =
 110                                         /* Bidi Category Index */
 111 {
 112   "L",                                  /* Left-to-Right (Alpha, Syllabic, Ideographic) */
 113   "LRE",                                /* Left-to-Right Embedding (explicit) */
 114   "LRO",                                /* Left-to-Right Override (explicit) */
 115   "R",                                  /* Right-to-Left (Hebrew alphabet and most punct) */
 116   "AL",                                 /* Right-to-Left Arabic (Arabic, Thaana, Syriac) */
 117   "RLE",                                /* Right-to-Left Embedding (explicit) */
 118   "RLO",                                /* Right-to-Left Override (explicit) */
 119   "PDF",                                /* Pop Directional Format */
 120   "EN",                                 /* Euro Number (Euro and East Arabic-Indic digits) */
 121   "ES",                                 /* Euro Number Separator (Slash) */
 122   "ET",                                 /* Euro Number Termintor (Plus, Minus, Degree, etc) */
 123   "AN",                                 /* Arabic Number (Arabic-Indic digits, separators) */
 124   "CS",                                 /* Common Number Separator (Colon, Comma, Dot, etc) */
 125   "NSM",                                /* Non-Spacing Mark (category Mn / Me in UCD) */
 126   "BN",                                 /* Boundary Neutral (Formatting / Control chars) */
 127   "B",                                  /* Paragraph Separator */
 128   "S",                                  /* Segment Separator (Tab) */
 129   "WS",                                 /* Whitespace Space (Space, Line Separator, etc) */
 130   "ON",                                 /* Other Neutrals */
 131   NULL
 132 };
 133
 134 typedef struct                          /**** Line Break Class Index Struct****/
 135 {
 136   cups_break_class_t    breakclass;     /* Line Break Class Value */
 137   const char            *str;           /* Line Break Class String */
 138 } _cups_break_t;
 139
 140 static const _cups_break_t break_index[] =      /* Line Break Class Index */
 141 {
 142   { CUPS_BREAK_AI, "AI" },              /* Ambiguous (Alphabetic or Ideograph) */
 143   { CUPS_BREAK_AL, "AL" },              /* Ordinary Alpha/Symbol Chars (XP) */
 144   { CUPS_BREAK_BA, "BA" },              /* Break Opportunity After Chars (A) */
 145   { CUPS_BREAK_BB, "BB" },              /* Break Opportunities Before Chars (B) */
 146   { CUPS_BREAK_B2, "B2" },              /* Break Opportunity Either (B/A/XP) */
 147   { CUPS_BREAK_BK, "BK" },              /* Mandatory Break (A) (norm) */
 148   { CUPS_BREAK_CB, "CB" },              /* Contingent Break (B/A) (norm) */
 149   { CUPS_BREAK_CL, "CL" },              /* Closing Punctuation (XB) */
 150   { CUPS_BREAK_CM, "CM" },              /* Attached/Combining (XB) (norm) */
 151   { CUPS_BREAK_CR, "CR" },              /* Carriage Return (A) (norm) */
 152   { CUPS_BREAK_EX, "EX" },              /* Exclamation / Interrogation (XB) */
 153   { CUPS_BREAK_GL, "GL" },              /* Non-breaking ("Glue") (XB/XA) (norm) */
 154   { CUPS_BREAK_HY, "HY" },              /* Hyphen (XA) */
 155   { CUPS_BREAK_ID, "ID" },              /* Ideographic (B/A) */
 156   { CUPS_BREAK_IN, "IN" },              /* Inseparable chars (XP) */
 157   { CUPS_BREAK_IS, "IS" },              /* Numeric Separator (Infix) (XB) */
 158   { CUPS_BREAK_LF, "LF" },              /* Line Feed (A) (norm) */
 159   { CUPS_BREAK_NS, "NS" },              /* Non-starters (XB) */
 160   { CUPS_BREAK_NU, "NU" },              /* Numeric (XP) */
 161   { CUPS_BREAK_OP, "OP" },              /* Opening Punctuation (XA) */
 162   { CUPS_BREAK_PO, "PO" },              /* Postfix (Numeric) (XB) */
 163   { CUPS_BREAK_PR, "PR" },              /* Prefix (Numeric) (XA) */
 164   { CUPS_BREAK_QU, "QU" },              /* Ambiguous Quotation (XB/XA) */
 165   { CUPS_BREAK_SA, "SA" },              /* Context Dependent (SE Asian) (P) */
 166   { CUPS_BREAK_SG, "SG" },              /* Surrogates (XP) (norm) */
 167   { CUPS_BREAK_SP, "SP" },              /* Space (A) (norm) */
 168   { CUPS_BREAK_SY, "SY" },              /* Symbols Allowing Break After (A) */
 169   { CUPS_BREAK_XX, "XX" },              /* Unknown (XP) */
 170   { CUPS_BREAK_ZW, "ZW" },              /* Zero Width Space (A) (norm) */
 171   { 0, NULL }
 172 };
 173
 174 /*
 175  * Prototypes...
 176  */
 177
 178 static int compare_breakchar(const void *k1, const void *k2);
 179 static int compare_combchar(const void *k1, const void *k2);
 180 static int compare_compose(const void *k1, const void *k2);
 181 static int compare_decompose(const void *k1, const void *k2);
 182 static int compare_foldchar(const void *k1, const void *k2);
 183 static int compare_propchar(const void *k1, const void *k2);
 184 static int get_bidi_category(const cups_utf32_t ch);
 185 static int get_break_class(const cups_utf32_t ch);
 186 static int get_breakmap(void);
 187 static int get_combining_class(const cups_utf32_t ch);
 188 static int get_combmap(void);
 189 static int get_foldmap(const cups_folding_t fold);
 190 static int get_general_category(const cups_utf32_t ch);
 191 static int get_map_count(const char *filename);
 192 static int get_normmap(const cups_normalize_t normalize);
 193 static int get_propmap(void);
 194
 195
 196 /*
 197  * 'cupsNormalizeMapsGet()' - Get all normalization maps to cache.
 198  */
 199
 200 int                                     /* O - Zero or -1 on error */
 201 cupsNormalizeMapsGet(void)
 202 {
 203   _cups_norm_map_t      *nmap;          /* Unicode Normalization Map */
 204   _cups_fold_map_t      *fmap;          /* Unicode Case Folding Map */
 205   _cups_globals_t       *cg = _cupsGlobals();
 206                                         /* Pointer to library globals */
 207
 208
 209  /*
 210   * See if we already have normalization maps loaded...
 211   */
 212
 213   if (cg->normmap_cache)
 214   {
 215     for (nmap = cg->normmap_cache; nmap != NULL; nmap = nmap->next)
 216       nmap->used ++;
 217
 218     for (fmap = cg->foldmap_cache; fmap != NULL; fmap = fmap->next)
 219       fmap->used ++;
 220
 221     if (cg->combmap_cache)
 222       cg->combmap_cache->used ++;
 223
 224     if (cg->propmap_cache)
 225       cg->propmap_cache->used ++;
 226
 227     if (cg->breakmap_cache)
 228       cg->breakmap_cache->used ++;
 229
 230     return (0);
 231   }
 232
 233  /*
 234   * Get normalization maps...
 235   */
 236
 237   if (get_normmap(CUPS_NORM_NFD) < 0)
 238     return (-1);
 239
 240   if (get_normmap(CUPS_NORM_NFKD) < 0)
 241     return (-1);
 242
 243   if (get_normmap(CUPS_NORM_NFC) < 0)
 244     return (-1);
 245
 246  /*
 247   * Get case folding, combining class, character property maps...
 248   */
 249
 250   if (get_foldmap(CUPS_FOLD_SIMPLE) < 0)
 251     return (-1);
 252
 253   if (get_foldmap(CUPS_FOLD_FULL) < 0)
 254     return (-1);
 255
 256   if (get_propmap() < 0)
 257     return (-1);
 258
 259   if (get_combmap() < 0)
 260     return (-1);
 261
 262   if (get_breakmap() < 0)
 263     return (-1);
 264
 265   return (0);
 266 }
 267
 268
 269 /*
 270  * 'cupsNormalizeMapsFree()' - Free all normalization maps in cache.
 271  *
 272  * This does not actually free; use 'cupsNormalizeMapsFlush()' for that.
 273  */
 274
 275 int                                     /* O - Zero or -1 on error */
 276 cupsNormalizeMapsFree(void)
 277 {
 278   _cups_norm_map_t      *nmap;          /* Unicode Normalization Map */
 279   _cups_fold_map_t      *fmap;          /* Unicode Case Folding Map */
 280   _cups_globals_t       *cg = _cupsGlobals();
 281                                         /* Pointer to library globals */
 282
 283
 284  /*
 285   * See if we already have normalization maps loaded...
 286   */
 287
 288   if (cg->normmap_cache == NULL)
 289     return (-1);
 290
 291   for (nmap = cg->normmap_cache; nmap != NULL; nmap = nmap->next)
 292     if (nmap->used > 0)
 293       nmap->used --;
 294
 295   for (fmap = cg->foldmap_cache; fmap != NULL; fmap = fmap->next)
 296     if (fmap->used > 0)
 297       fmap->used --;
 298
 299   if (cg->propmap_cache && (cg->propmap_cache->used > 0))
 300     cg->propmap_cache->used --;
 301
 302   if (cg->combmap_cache && (cg->combmap_cache->used > 0))
 303     cg->combmap_cache->used --;
 304
 305   if (cg->breakmap_cache && (cg->breakmap_cache->used > 0))
 306     cg->breakmap_cache->used --;
 307
 308   return (0);
 309 }
 310
 311
 312 /*
 313  * 'cupsNormalizeMapsFlush()' - Flush all normalization maps in cache.
 314  */
 315
 316 void
 317 cupsNormalizeMapsFlush(void)
 318 {
 319   _cupsNormalizeMapsFlush(_cupsGlobals());
 320 }
 321
 322
 323 /*
 324  * '_cupsNormalizeMapsFlush()' - Flush all normalization maps in cache.
 325  */
 326
 327 void
 328 _cupsNormalizeMapsFlush(
 329     _cups_globals_t *cg)                /* I - Global data */
 330 {
 331   _cups_norm_map_t      *nmap;          /* Unicode Normalization Map */
 332   _cups_norm_map_t      *nextnorm;      /* Next Unicode Normalization Map */
 333   _cups_fold_map_t      *fmap;          /* Unicode Case Folding Map */
 334   _cups_fold_map_t      *nextfold;      /* Next Unicode Case Folding Map */
 335
 336
 337  /*
 338   * Flush all normalization maps...
 339   */
 340
 341   for (nmap = cg->normmap_cache; nmap != NULL; nmap = nextnorm)
 342   {
 343     free(nmap->uni2norm);
 344     nextnorm = nmap->next;
 345     free(nmap);
 346   }
 347
 348   cg->normmap_cache = NULL;
 349
 350   for (fmap = cg->foldmap_cache; fmap != NULL; fmap = nextfold)
 351   {
 352     free(fmap->uni2fold);
 353     nextfold = fmap->next;
 354     free(fmap);
 355   }
 356
 357   cg->foldmap_cache = NULL;
 358
 359   if (cg->propmap_cache)
 360   {
 361     free(cg->propmap_cache->uni2prop);
 362     free(cg->propmap_cache);
 363     cg->propmap_cache = NULL;
 364   }
 365
 366   if (cg->combmap_cache)
 367   {
 368     free(cg->combmap_cache->uni2comb);
 369     free(cg->combmap_cache);
 370     cg->combmap_cache = NULL;
 371   }
 372
 373   if (cg->breakmap_cache)
 374   {
 375     free(cg->breakmap_cache->uni2break);
 376     free(cg->breakmap_cache);
 377     cg->breakmap_cache = NULL;
 378   }
 379 }
 380
 381
 382 /*
 383  * 'cupsUTF8Normalize()' - Normalize UTF-8 string.
 384  *
 385  * Normalize UTF-8 string to Unicode UAX-15 Normalization Form
 386  * Note - Compatibility Normalization Forms (NFKD/NFKC) are
 387  * unsafe for subsequent transcoding to legacy charsets
 388  */
 389
 390 int                                     /* O - Count or -1 on error */
 391 cupsUTF8Normalize(
 392     cups_utf8_t            *dest,       /* O - Target string */
 393     const cups_utf8_t      *src,        /* I - Source string */
 394     const int              maxout,      /* I - Max output */
 395     const cups_normalize_t normalize)   /* I - Normalization */
 396 {
 397   int           len;                    /* String length */
 398   cups_utf32_t  work1[CUPS_MAX_USTRING];/* First internal UCS-4 string */
 399   cups_utf32_t  work2[CUPS_MAX_USTRING];/* Second internal UCS-4 string */
 400
 401
 402  /*
 403   * Check for valid arguments and clear output...
 404   */
 405
 406   if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
 407     return (-1);
 408
 409   *dest = 0;
 410
 411  /*
 412   * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
 413   */
 414
 415   len = cupsUTF8ToUTF32(work1, src, CUPS_MAX_USTRING);
 416
 417   if (len < 0)
 418     return (-1);
 419
 420  /*
 421   * Normalize internal UCS-4 to second internal UCS-4...
 422   */
 423
 424   len = cupsUTF32Normalize(work2, work1, CUPS_MAX_USTRING, normalize);
 425
 426   if (len < 0)
 427     return (-1);
 428
 429  /*
 430   * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
 431   */
 432
 433   len = cupsUTF32ToUTF8(dest, work2, maxout);
 434
 435   return (len);
 436 }
 437
 438
 439 /*
 440  * 'cupsUTF32Normalize()' - Normalize UTF-32 string.
 441  *
 442  * Normalize UTF-32 string to Unicode UAX-15 Normalization Form
 443  * Note - Compatibility Normalization Forms (NFKD/NFKC) are
 444  * unsafe for subsequent transcoding to legacy charsets
 445  */
 446
 447 int                                     /* O - Count or -1 on error */
 448 cupsUTF32Normalize(
 449     cups_utf32_t           *dest,       /* O - Target string */
 450     const cups_utf32_t     *src,        /* I - Source string */
 451     const int              maxout,      /* I - Max output */
 452     const cups_normalize_t normalize)   /* I - Normalization */
 453 {
 454   int                   i;              /* Looping variable */
 455   int                   result;         /* Result Value */
 456   cups_ucs2_t           *mp;            /* Map char pointer */
 457   int                   pass;           /* Pass count for each transform */
 458   int                   hit;            /* Hit count from binary search */
 459   cups_utf32_t          unichar1;       /* Unicode character value */
 460   cups_utf32_t          unichar2;       /* Unicode character value */
 461   _cups_comb_class_t    class1;         /* First Combining Class */
 462   _cups_comb_class_t    class2;         /* Second Combining Class */
 463   int                   len;            /* String length */
 464   cups_utf32_t          work1[CUPS_MAX_USTRING];
 465                                         /* First internal UCS-4 string */
 466   cups_utf32_t          work2[CUPS_MAX_USTRING];
 467                                         /* Second internal UCS-4 string */
 468   cups_utf32_t          *p1;            /* First UCS-4 string pointer */
 469   cups_utf32_t          *p2;            /* Second UCS-4 string pointer */
 470   _cups_norm_map_t      *nmap;          /* Unicode Normalization Map */
 471   cups_normalize_t      decompose;      /* Decomposition Type */
 472   _cups_globals_t       *cg = _cupsGlobals();
 473                                         /* Pointer to library globals */
 474
 475
 476  /*
 477   * Check for valid arguments and clear output...
 478   */
 479
 480   if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
 481     return (-1);
 482
 483   *dest = 0;
 484
 485   result = cupsNormalizeMapsGet();
 486
 487   if (result < 0)
 488     return (-1);
 489
 490  /*
 491   * Find decomposition map...
 492   */
 493
 494   switch (normalize)
 495   {
 496     case CUPS_NORM_NFD:
 497     case CUPS_NORM_NFC:
 498         decompose = CUPS_NORM_NFD;
 499         break;
 500
 501     case CUPS_NORM_NFKD:
 502     case CUPS_NORM_NFKC:
 503         decompose = CUPS_NORM_NFKD;
 504         break;
 505
 506     default:
 507         return (-1);
 508   }
 509
 510   for (nmap = cg->normmap_cache; nmap != NULL; nmap = nmap->next)
 511     if (nmap->normalize == decompose)
 512       break;
 513
 514   if (nmap == NULL)
 515     return (-1);
 516
 517  /*
 518   * Copy input to internal buffer...
 519   */
 520
 521   p1 = &work1[0];
 522
 523   for (i = 0; i < CUPS_MAX_USTRING; i ++)
 524   {
 525     if (*src == 0)
 526       break;
 527
 528     *p1 ++ = *src ++;
 529   }
 530
 531   *p1 = 0;
 532   len = i;
 533
 534  /*
 535   * Decompose until no further decomposition...
 536   */
 537
 538   for (pass = 0; pass < 20; pass ++)
 539   {
 540     p1 = &work1[0];
 541     p2 = &work2[0];
 542
 543     for (hit = 0; *p1 != 0; p1 ++)
 544     {
 545      /*
 546       * Check for decomposition defined...
 547       */
 548
 549       mp = (cups_ucs2_t *)bsearch(p1, nmap->uni2norm, nmap->normcount,
 550                                   (sizeof(cups_ucs2_t) * 3), compare_decompose);
 551       if (mp == NULL)
 552       {
 553         *p2 ++ = *p1;
 554         continue;
 555       }
 556
 557      /*
 558       * Decompose input character to one or two output characters...
 559       */
 560
 561       hit ++;
 562       mp ++;
 563       *p2 ++ = (cups_utf32_t) *mp ++;
 564
 565       if (*mp != 0)
 566         *p2 ++ = (cups_utf32_t) *mp;
 567     }
 568
 569     *p2 = 0;
 570     len = (int)(p2 - &work2[0]);
 571
 572    /*
 573     * Check for decomposition finished...
 574     */
 575     if (hit == 0)
 576       break;
 577     memcpy (work1, work2, sizeof(cups_utf32_t) * (len + 1));
 578   }
 579
 580  /*
 581   * Canonical reorder until no further reordering...
 582   */
 583
 584   for (pass = 0; pass < 20; pass ++)
 585   {
 586     p1 = &work1[0];
 587
 588     for (hit = 0; *p1 != 0; p1 ++)
 589     {
 590      /*
 591       * Check for combining characters to reorder...
 592       */
 593
 594       unichar1 = *p1;
 595       unichar2 = *(p1 + 1);
 596
 597       if (unichar2 == 0)
 598         break;
 599
 600       class1 = get_combining_class(unichar1);
 601       class2 = get_combining_class(unichar2);
 602
 603       if ((class1 < 0) || (class2 < 0))
 604         return (-1);
 605
 606       if ((class1 == 0) || (class2 == 0))
 607         continue;
 608
 609       if (class1 <= class2)
 610         continue;
 611
 612      /*
 613       * Swap two combining characters...
 614       */
 615
 616       *p1 = unichar2;
 617       p1 ++;
 618       *p1 = unichar1;
 619       hit ++;
 620     }
 621
 622     if (hit == 0)
 623       break;
 624   }
 625
 626  /*
 627   * Check for decomposition only...
 628   */
 629
 630   if (normalize == CUPS_NORM_NFD || normalize == CUPS_NORM_NFKD)
 631   {
 632     memcpy(dest, work1, sizeof(cups_utf32_t) * (len + 1));
 633     return (len);
 634   }
 635
 636  /*
 637   * Find composition map...
 638   */
 639
 640   for (nmap = cg->normmap_cache; nmap != NULL; nmap = nmap->next)
 641     if (nmap->normalize == CUPS_NORM_NFC)
 642       break;
 643
 644   if (nmap == NULL)
 645     return (-1);
 646
 647  /*
 648   * Compose until no further composition...
 649   */
 650
 651   for (pass = 0; pass < 20; pass ++)
 652   {
 653     p1 = &work1[0];
 654     p2 = &work2[0];
 655
 656     for (hit = 0; *p1 != 0; p1 ++)
 657     {
 658      /*
 659       * Check for composition defined...
 660       */
 661
 662       unichar1 = *p1;
 663       unichar2 = *(p1 + 1);
 664
 665       if (unichar2 == 0)
 666       {
 667         *p2 ++ = unichar1;
 668         break;
 669       }
 670
 671       mp = (cups_ucs2_t *)bsearch(p1, nmap->uni2norm, nmap->normcount,
 672                                   (sizeof(cups_ucs2_t) * 3), compare_compose);
 673       if (mp == NULL)
 674       {
 675         *p2 ++ = *p1;
 676         continue;
 677       }
 678
 679      /*
 680       * Compose two input characters to one output character...
 681       */
 682
 683       hit ++;
 684       mp += 2;
 685       *p2 ++ = (cups_utf32_t) *mp;
 686       p1 ++;
 687     }
 688
 689     *p2 = 0;
 690     len = (int) (p2 - &work2[0]);
 691
 692    /*
 693     * Check for composition finished...
 694     */
 695
 696     if (hit == 0)
 697       break;
 698
 699     memcpy (work1, work2, sizeof(cups_utf32_t) * (len + 1));
 700   }
 701
 702   memcpy (dest, work1, sizeof(cups_utf32_t) * (len + 1));
 703
 704   cupsNormalizeMapsFree();
 705
 706   return (len);
 707 }
 708
 709
 710 /*
 711  * 'cupsUTF8CaseFold()' - Case fold UTF-8 string.
 712  *
 713  * Case Fold UTF-8 string per Unicode UAX-21 Section 2.3
 714  * Note - Case folding output is
 715  * unsafe for subsequent transcoding to legacy charsets
 716  */
 717
 718 int                                     /* O - Count or -1 on error */
 719 cupsUTF8CaseFold(
 720     cups_utf8_t          *dest,         /* O - Target string */
 721     const cups_utf8_t    *src,          /* I - Source string */
 722     const int            maxout,        /* I - Max output */
 723     const cups_folding_t fold)          /* I - Fold Mode */
 724 {
 725   int           len;                    /* String length */
 726   cups_utf32_t  work1[CUPS_MAX_USTRING];/* First internal UCS-4 string */
 727   cups_utf32_t  work2[CUPS_MAX_USTRING];/* Second internal UCS-4 string */
 728
 729
 730  /*
 731   * Check for valid arguments and clear output...
 732   */
 733
 734   if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
 735     return (-1);
 736
 737   *dest = 0;
 738
 739   if (fold != CUPS_FOLD_SIMPLE && fold != CUPS_FOLD_FULL)
 740     return (-1);
 741
 742  /*
 743   * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
 744   */
 745
 746   len = cupsUTF8ToUTF32(work1, src, CUPS_MAX_USTRING);
 747
 748   if (len < 0)
 749     return (-1);
 750
 751  /*
 752   * Case Fold internal UCS-4 to second internal UCS-4...
 753   */
 754
 755   len = cupsUTF32CaseFold(work2, work1, CUPS_MAX_USTRING, fold);
 756
 757   if (len < 0)
 758     return (-1);
 759
 760  /*
 761   * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
 762   */
 763
 764   len = cupsUTF32ToUTF8(dest, work2, maxout);
 765
 766   return (len);
 767 }
 768
 769
 770 /*
 771  * 'cupsUTF32CaseFold()' - Case fold UTF-32 string.
 772  *
 773  * Case Fold UTF-32 string per Unicode UAX-21 Section 2.3
 774  * Note - Case folding output is
 775  * unsafe for subsequent transcoding to legacy charsets
 776  */
 777
 778 int                                     /* O - Count or -1 on error */
 779 cupsUTF32CaseFold(
 780     cups_utf32_t         *dest,         /* O - Target string */
 781     const cups_utf32_t   *src,          /* I - Source string */
 782     const int            maxout,        /* I - Max output */
 783     const cups_folding_t fold)          /* I - Fold Mode */
 784 {
 785   cups_utf32_t          *start = dest;  /* Start of destination string */
 786   int                   i;              /* Looping variable */
 787   int                   result;         /* Result Value */
 788   cups_ucs2_t           *mp;            /* Map char pointer */
 789   _cups_fold_map_t      *fmap;          /* Unicode Case Folding Map */
 790   _cups_globals_t       *cg = _cupsGlobals();
 791                                         /* Pointer to library globals */
 792
 793
 794  /*
 795   * Check for valid arguments and clear output...
 796   */
 797
 798   if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
 799     return (-1);
 800
 801   *dest = 0;
 802
 803   if (fold != CUPS_FOLD_SIMPLE && fold != CUPS_FOLD_FULL)
 804     return (-1);
 805
 806  /*
 807   * Find case folding map...
 808   */
 809
 810   result = cupsNormalizeMapsGet();
 811
 812   if (result < 0)
 813     return (-1);
 814
 815   for (fmap = cg->foldmap_cache; fmap != NULL; fmap = fmap->next)
 816     if (fmap->fold == fold)
 817       break;
 818
 819   if (fmap == NULL)
 820     return (-1);
 821
 822  /*
 823   * Case fold input string to output string...
 824   */
 825
 826   for (i = 0; i < (maxout - 1); i ++, src ++)
 827   {
 828    /*
 829     * Check for case folding defined...
 830     */
 831
 832     mp = (cups_ucs2_t *)bsearch(src, fmap->uni2fold, fmap->foldcount,
 833                                 (sizeof(cups_ucs2_t) * 4), compare_foldchar);
 834     if (mp == NULL)
 835     {
 836       *dest ++ = *src;
 837       continue;
 838     }
 839
 840    /*
 841     * Case fold input character to one or two output characters...
 842     */
 843
 844     mp ++;
 845     *dest ++ = (cups_utf32_t) *mp ++;
 846
 847     if (*mp != 0 && fold == CUPS_FOLD_FULL)
 848     {
 849       i ++;
 850       if (i >= (maxout - 1))
 851         break;
 852
 853       *dest ++ = (cups_utf32_t) *mp;
 854     }
 855   }
 856
 857   *dest = 0;
 858
 859   cupsNormalizeMapsFree();
 860
 861   return ((int)(dest - start));
 862 }
 863
 864
 865 /*
 866  * 'cupsUTF8CompareCaseless()' - Compare case folded UTF-8 strings.
 867  */
 868
 869 int                                     /* O - Difference of strings */
 870 cupsUTF8CompareCaseless(
 871     const cups_utf8_t *s1,              /* I - String1 */
 872     const cups_utf8_t *s2)              /* I - String2 */
 873 {
 874   int           difference;             /* Difference of two strings */
 875   int           len;                    /* String length */
 876   cups_utf32_t  work1[CUPS_MAX_USTRING];/* First internal UCS-4 string */
 877   cups_utf32_t  work2[CUPS_MAX_USTRING];/* Second internal UCS-4 string */
 878
 879
 880  /*
 881   * Check for valid arguments...
 882   */
 883
 884   if (!s1 || !s2)
 885     return (-1);
 886
 887  /*
 888   * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
 889   */
 890
 891   len = cupsUTF8ToUTF32(work1, s1, CUPS_MAX_USTRING);
 892
 893   if (len < 0)
 894     return (-1);
 895
 896   len = cupsUTF8ToUTF32(work2, s2, CUPS_MAX_USTRING);
 897
 898   if (len < 0)
 899     return (-1);
 900
 901  /*
 902   * Compare first internal UCS-4 to second internal UCS-4...
 903   */
 904
 905   difference = cupsUTF32CompareCaseless(work1, work2);
 906
 907   return (difference);
 908 }
 909
 910
 911 /*
 912  * 'cupsUTF32CompareCaseless()' - Compare case folded UTF-32 strings.
 913  */
 914
 915 int                                     /* O - Difference of strings */
 916 cupsUTF32CompareCaseless(
 917     const cups_utf32_t *s1,             /* I - String1 */
 918     const cups_utf32_t *s2)             /* I - String2 */
 919 {
 920   int                   difference;     /* Difference of two strings */
 921   int                   len;            /* String length */
 922   cups_folding_t        fold = CUPS_FOLD_FULL;
 923                                         /* Case folding mode */
 924   cups_utf32_t          fold1[CUPS_MAX_USTRING];
 925                                         /* First UCS-4 folded string */
 926   cups_utf32_t          fold2[CUPS_MAX_USTRING];
 927                                         /* Second UCS-4 folded string */
 928   cups_utf32_t          *p1;            /* First UCS-4 string pointer */
 929   cups_utf32_t          *p2;            /* Second UCS-4 string pointer */
 930
 931
 932  /*
 933   * Check for valid arguments...
 934   */
 935
 936   if (!s1 || !s2)
 937     return (-1);
 938
 939  /*
 940   * Case Fold input UTF-32 strings to internal UCS-4 strings...
 941   */
 942
 943   len = cupsUTF32CaseFold(fold1, s1, CUPS_MAX_USTRING, fold);
 944
 945   if (len < 0)
 946     return (-1);
 947
 948   len = cupsUTF32CaseFold(fold2, s2, CUPS_MAX_USTRING, fold);
 949
 950   if (len < 0)
 951     return (-1);
 952
 953  /*
 954   * Compare first internal UCS-4 to second internal UCS-4...
 955   */
 956
 957   p1 = &fold1[0];
 958   p2 = &fold2[0];
 959
 960   for (;; p1 ++, p2 ++)
 961   {
 962     difference = (int) (*p1 - *p2);
 963
 964     if (difference != 0)
 965       break;
 966
 967     if ((*p1 == 0) && (*p2 == 0))
 968       break;
 969   }
 970
 971   return (difference);
 972 }
 973
 974
 975 /*
 976  * 'cupsUTF8CompareIdentifier()' - Compare folded NFKC UTF-8 strings.
 977  */
 978
 979 int                                     /* O - Result of comparison */
 980 cupsUTF8CompareIdentifier(
 981     const cups_utf8_t *s1,              /* I - String1 */
 982     const cups_utf8_t *s2)              /* I - String2 */
 983 {
 984   int           difference;             /* Difference of two strings */
 985   int           len;                    /* String length */
 986   cups_utf32_t  work1[CUPS_MAX_USTRING];/* First internal UCS-4 string */
 987   cups_utf32_t  work2[CUPS_MAX_USTRING];/* Second internal UCS-4 string */
 988
 989
 990  /*
 991   * Check for valid arguments...
 992   */
 993
 994   if (!s1 || !s2)
 995     return (-1);
 996
 997  /*
 998   * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
 999   */
1000
1001   len = cupsUTF8ToUTF32(work1, s1, CUPS_MAX_USTRING);
1002
1003   if (len < 0)
1004     return (-1);
1005
1006   len = cupsUTF8ToUTF32(work2, s2, CUPS_MAX_USTRING);
1007
1008   if (len < 0)
1009     return (-1);
1010
1011  /*
1012   * Compare first internal UCS-4 to second internal UCS-4...
1013   */
1014
1015   difference = cupsUTF32CompareIdentifier(work1, work2);
1016
1017   return (difference);
1018 }
1019
1020
1021 /*
1022  * 'cupsUTF32CompareIdentifier()' - Compare folded NFKC UTF-32 strings.
1023  */
1024
1025 int                                     /* O - Result of comparison */
1026 cupsUTF32CompareIdentifier(
1027     const cups_utf32_t *s1,             /* I - String1 */
1028     const cups_utf32_t *s2)             /* I - String2 */
1029 {
1030   int                   difference;     /* Difference of two strings */
1031   int                   len;            /* String length */
1032   cups_folding_t        fold = CUPS_FOLD_FULL;
1033                                         /* Case folding mode */
1034   cups_utf32_t          fold1[CUPS_MAX_USTRING];
1035                                         /* First UCS-4 folded string */
1036   cups_utf32_t          fold2[CUPS_MAX_USTRING];
1037                                         /* Second UCS-4 folded string */
1038   cups_normalize_t      normalize = CUPS_NORM_NFKC;
1039                                         /* Normalization form */
1040   cups_utf32_t          norm1[CUPS_MAX_USTRING];
1041                                         /* First UCS-4 normalized string */
1042   cups_utf32_t          norm2[CUPS_MAX_USTRING];
1043                                         /* Second UCS-4 normalized string */
1044   cups_utf32_t          *p1;            /* First UCS-4 string pointer */
1045   cups_utf32_t          *p2;            /* Second UCS-4 string pointer */
1046
1047
1048  /*
1049   * Check for valid arguments...
1050   */
1051
1052   if (!s1 || !s2)
1053     return (-1);
1054
1055  /*
1056   * Case Fold input UTF-32 strings to internal UCS-4 strings...
1057   */
1058
1059   len = cupsUTF32CaseFold(fold1, s1, CUPS_MAX_USTRING, fold);
1060
1061   if (len < 0)
1062     return (-1);
1063
1064   len = cupsUTF32CaseFold(fold2, s2, CUPS_MAX_USTRING, fold);
1065
1066   if (len < 0)
1067     return (-1);
1068
1069  /*
1070   * Normalize internal UCS-4 strings to NFKC...
1071   */
1072
1073   len = cupsUTF32Normalize(norm1, fold1, CUPS_MAX_USTRING, normalize);
1074
1075   if (len < 0)
1076     return (-1);
1077
1078   len = cupsUTF32Normalize(norm2, fold2, CUPS_MAX_USTRING, normalize);
1079
1080   if (len < 0)
1081     return (-1);
1082
1083  /*
1084   * Compare first internal UCS-4 to second internal UCS-4...
1085   */
1086
1087   p1 = &norm1[0];
1088   p2 = &norm2[0];
1089
1090   for (;; p1 ++, p2 ++)
1091   {
1092     difference = (int) (*p1 - *p2);
1093
1094     if (difference != 0)
1095       break;
1096
1097     if ((*p1 == 0) && (*p2 == 0))
1098       break;
1099   }
1100
1101   return (difference);
1102 }
1103
1104
1105 /*
1106  * 'cupsUTF32CharacterProperty()' - Get UTF-32 character property.
1107  */
1108
1109 int                                     /* O - Result of comparison */
1110 cupsUTF32CharacterProperty(
1111     const cups_utf32_t    ch,           /* I - Source char */
1112     const cups_property_t prop)         /* I - Char Property */
1113 {
1114   int   result;                         /* Result Value */
1115
1116
1117  /*
1118   * Check for valid arguments...
1119   */
1120
1121   if (ch == 0)
1122     return (-1);
1123
1124  /*
1125   * Find character property...
1126   */
1127
1128   switch (prop)
1129   {
1130     case CUPS_PROP_GENERAL_CATEGORY:
1131         result = (get_general_category(ch));
1132         break;
1133
1134     case CUPS_PROP_BIDI_CATEGORY:
1135         result = (get_bidi_category(ch));
1136         break;
1137
1138     case CUPS_PROP_COMBINING_CLASS:
1139         result = (get_combining_class(ch));
1140         break;
1141     case CUPS_PROP_BREAK_CLASS:
1142         result = (get_break_class(ch));
1143         break;
1144
1145     default:
1146         return (-1);
1147   }
1148
1149   return (result);
1150 }
1151
1152
1153 /*
1154  * 'get_general_category()' - Get UTF-32 Character General Category.
1155  */
1156
1157 static int                              /* O - Class or -1 on error */
1158 get_general_category(
1159     const cups_utf32_t ch)              /* I - Source char */
1160 {
1161   int                   result;         /* Result Value */
1162   cups_gencat_t         gencat;         /* General Category Value */
1163   _cups_prop_map_t      *pmap;          /* Unicode Property Map */
1164   _cups_prop_t          *uni2prop;      /* Unicode Char -> Properties */
1165   _cups_globals_t       *cg = _cupsGlobals();
1166                                         /* Pointer to library globals */
1167
1168
1169  /*
1170   * Check for valid argument...
1171   */
1172
1173   if (ch == 0)
1174     return (-1);
1175
1176  /*
1177   * Find property map...
1178   */
1179
1180   result = cupsNormalizeMapsGet();
1181
1182   if (result < 0)
1183     return (-1);
1184
1185   pmap = cg->propmap_cache;
1186
1187   if (pmap == NULL)
1188     return (-1);
1189
1190  /*
1191   * Find character in map...
1192   */
1193
1194   uni2prop = (_cups_prop_t *)bsearch(&ch, pmap->uni2prop, pmap->propcount,
1195                                     (sizeof(_cups_prop_t)), compare_propchar);
1196
1197   cupsNormalizeMapsFree();
1198
1199   if (uni2prop == NULL)
1200     gencat = CUPS_GENCAT_CN;            /* Other, Not Assigned */
1201   else
1202     gencat = (cups_gencat_t)uni2prop->gencat;
1203
1204   result = (int)gencat;
1205
1206   return (result);
1207 }
1208
1209
1210 /*
1211  * 'get_bidi_category()' - Get UTF-32 Character Bidi Category.
1212  */
1213
1214 static int                              /* O - Class or -1 on error */
1215 get_bidi_category(const cups_utf32_t ch)/* I - Source char */
1216 {
1217   int                   result;         /* Result Value */
1218   cups_bidi_t   bidicat;        /* Bidi Category Value */
1219   _cups_prop_map_t      *pmap;          /* Unicode Property Map */
1220   _cups_prop_t          *uni2prop;      /* Unicode Char -> Properties */
1221   _cups_globals_t       *cg = _cupsGlobals();
1222                                         /* Pointer to library globals */
1223
1224
1225  /*
1226   * Check for valid argument...
1227   */
1228
1229   if (ch == 0)
1230     return (-1);
1231
1232  /*
1233   * Find property map...
1234   */
1235
1236   result = cupsNormalizeMapsGet();
1237
1238   if (result < 0)
1239     return (-1);
1240
1241   pmap = cg->propmap_cache;
1242
1243   if (pmap == NULL)
1244     return (-1);
1245
1246  /*
1247   * Find character in map...
1248   */
1249
1250   uni2prop = (_cups_prop_t *)bsearch(&ch, pmap->uni2prop, pmap->propcount,
1251                                     (sizeof(_cups_prop_t)), compare_propchar);
1252
1253   cupsNormalizeMapsFree();
1254
1255   if (uni2prop == NULL)
1256     bidicat = CUPS_BIDI_ON;             /* Other Neutral */
1257   else
1258     bidicat = (cups_bidi_t)uni2prop->bidicat;
1259
1260   result = (int)bidicat;
1261
1262   return (result);
1263 }
1264
1265 /*
1266  * 'get_combining_class()' - Get UTF-32 Character Combining Class.
1267  *
1268  * Note - Zero is non-combining (base character)
1269  */
1270
1271 static int                              /* O - Class or -1 on error */
1272 get_combining_class(
1273     const cups_utf32_t ch)              /* I - Source char */
1274 {
1275   int                   result;         /* Result Value */
1276   _cups_comb_map_t      *cmap;          /* Unicode Combining Class Map */
1277   _cups_comb_class_t    combclass;      /* Unicode Combining Class */
1278   _cups_comb_t          *uni2comb;      /* Unicode Char -> Combining Class */
1279   _cups_globals_t       *cg = _cupsGlobals();
1280                                         /* Pointer to library globals */
1281
1282
1283  /*
1284   * Check for valid argument...
1285   */
1286
1287   if (ch == 0)
1288     return (-1);
1289
1290  /*
1291   * Find combining class map...
1292   */
1293
1294   result = cupsNormalizeMapsGet();
1295
1296   if (result < 0)
1297     return (-1);
1298
1299   cmap = cg->combmap_cache;
1300
1301   if (cmap == NULL)
1302     return (-1);
1303
1304  /*
1305   * Find combining character in map...
1306   */
1307
1308   uni2comb = (_cups_comb_t *)bsearch(&ch, cmap->uni2comb, cmap->combcount,
1309                                     (sizeof(_cups_comb_t)), compare_combchar);
1310
1311   cupsNormalizeMapsFree();
1312
1313   if (uni2comb == NULL)
1314     combclass = 0;
1315   else
1316     combclass = (_cups_comb_class_t)uni2comb->combclass;
1317
1318   result = (int)combclass;
1319
1320   return (result);
1321 }
1322
1323
1324 /*
1325  * 'get_break_class()' - Get UTF-32 Character Line Break Class.
1326  */
1327
1328 static int                              /* O - Class or -1 on error */
1329 get_break_class(const cups_utf32_t ch)  /* I - Source char */
1330 {
1331   int                   result;         /* Result Value */
1332   _cups_break_map_t     *bmap;          /* Unicode Line Break Class Map */
1333   cups_break_class_t    breakclass;     /* Unicode Line Break Class */
1334   cups_ucs2_t           *uni2break;     /* Unicode -> Line Break Class */
1335   _cups_globals_t       *cg = _cupsGlobals();
1336                                         /* Pointer to library globals */
1337
1338
1339  /*
1340   * Check for valid argument...
1341   */
1342
1343   if (ch == 0)
1344     return (-1);
1345
1346  /*
1347   * Find line break class map...
1348   */
1349
1350   result = cupsNormalizeMapsGet();
1351
1352   if (result < 0)
1353     return (-1);
1354
1355   bmap = cg->breakmap_cache;
1356
1357   if (bmap == NULL)
1358     return (-1);
1359
1360  /*
1361   * Find line break character in map...
1362   */
1363
1364   uni2break = (cups_ucs2_t *)bsearch(&ch, bmap->uni2break, bmap->breakcount,
1365                                      (sizeof(cups_ucs2_t) * 3),
1366                                      compare_breakchar);
1367
1368   cupsNormalizeMapsFree();
1369
1370   if (uni2break == NULL)
1371     breakclass = CUPS_BREAK_AI;
1372   else
1373     breakclass = (cups_break_class_t)*(uni2break + 2);
1374
1375   result = (int)breakclass;
1376
1377   return (result);
1378 }
1379
1380
1381 /*
1382  * 'get_map_count()' - Count lines in a map file.
1383  */
1384
1385 static int                              /* O - Count or -1 on error */
1386 get_map_count(const char *filename)     /* I - Map Filename */
1387 {
1388   int           i;                      /* Looping variable */
1389   cups_file_t   *fp;                    /* Map input file pointer */
1390   char          *s;                     /* Line parsing pointer */
1391   char          line[256];              /* Line from input map file */
1392   cups_utf32_t  unichar;                /* Unicode character value */
1393
1394
1395  /*
1396   * Open map input file...
1397   */
1398
1399   if (!filename || !*filename)
1400     return (-1);
1401
1402   fp = cupsFileOpen(filename, "r");
1403   if (fp == NULL)
1404     return (-1);
1405
1406  /*
1407   * Count lines in map input file...
1408   */
1409
1410   for (i = 0; i < 50000;)
1411   {
1412     s = cupsFileGets(fp, line, sizeof(line));
1413     if (s == NULL)
1414       break;
1415     if ((*s == '#') || (*s == '\n') || (*s == '\0'))
1416       continue;
1417     if (strncmp (s, "0x", 2) == 0)
1418       s += 2;
1419     if (sscanf(s, "%lx", &unichar) != 1)
1420       break;
1421     if (unichar > 0xffff)
1422       break;
1423     i ++;
1424   }
1425   if (i == 0)
1426     i = -1;
1427
1428  /*
1429   * Close file and return map count (non-comment line count)...
1430   */
1431
1432   cupsFileClose(fp);
1433
1434   return (i);
1435 }
1436
1437
1438 /*
1439  * 'get_normmap()' - Get Unicode normalization map to cache.
1440  */
1441
1442 static int                              /* O - Zero or -1 on error */
1443 get_normmap(
1444     const cups_normalize_t normalize)   /* I - Normalization Form */
1445 {
1446   int                   i;              /* Looping variable */
1447   cups_utf32_t          unichar1;       /* Unicode character value */
1448   cups_utf32_t          unichar2;       /* Unicode character value */
1449   cups_utf32_t          unichar3;       /* Unicode character value */
1450   _cups_norm_map_t      *nmap;          /* Unicode Normalization Map */
1451   int                   normcount;      /* Count of Unicode Source Chars */
1452   cups_ucs2_t           *uni2norm;      /* Unicode Char -> Normalization */
1453   char                  *mapname;       /* Normalization map name */
1454   char                  filename[1024]; /* Filename for charset map file */
1455   cups_file_t           *fp;            /* Normalization map file pointer */
1456   char                  *s;             /* Line parsing pointer */
1457   char                  line[256];      /* Line from input map file */
1458   _cups_globals_t       *cg = _cupsGlobals();
1459                                         /* Pointer to library globals */
1460
1461
1462  /*
1463   * See if we already have this normalization map loaded...
1464   */
1465
1466   for (nmap = cg->normmap_cache; nmap != NULL; nmap = nmap->next)
1467     if (nmap->normalize == normalize)
1468       return (0);
1469
1470  /*
1471   * Get the mapping name...
1472   */
1473
1474   switch (normalize)
1475   {
1476     case CUPS_NORM_NFD:         /* Canonical Decomposition */
1477         mapname = "uni-nfd.txt";
1478         break;
1479
1480     case CUPS_NORM_NFKD:        /* Compatibility Decomposition */
1481         mapname = "uni-nfkd.txt";
1482         break;
1483
1484     case CUPS_NORM_NFC:         /* Canonical Composition */
1485         mapname = "uni-nfc.txt";
1486         break;
1487
1488     case CUPS_NORM_NFKC:        /* no such map file... */
1489     default:
1490         return (-1);
1491   }
1492
1493  /*
1494   * Open normalization map input file...
1495   */
1496
1497   snprintf(filename, sizeof(filename), "%s/charmaps/%s",
1498            cg->cups_datadir, mapname);
1499   if ((normcount = get_map_count(filename)) <= 0)
1500     return (-1);
1501
1502   fp = cupsFileOpen(filename, "r");
1503   if (fp == NULL)
1504     return (-1);
1505
1506  /*
1507   * Allocate memory for normalization map and add to cache...
1508   */
1509
1510   nmap = (_cups_norm_map_t *)calloc(1, sizeof(_cups_norm_map_t));
1511   if (nmap == NULL)
1512   {
1513     cupsFileClose(fp);
1514     return (-1);
1515   }
1516
1517   uni2norm = (cups_ucs2_t *)calloc(1, sizeof(cups_ucs2_t) * 3 * normcount);
1518   if (uni2norm == NULL)
1519   {
1520     free(nmap);
1521     cupsFileClose(fp);
1522     return (-1);
1523   }
1524   nmap->next = cg->normmap_cache;
1525   cg->normmap_cache = nmap;
1526   nmap->used ++;
1527   nmap->normalize = normalize;
1528   nmap->normcount = normcount;
1529   nmap->uni2norm = uni2norm;
1530
1531  /*
1532   * Save normalization map into memory for later use...
1533   */
1534   for (i = 0; i < normcount; )
1535   {
1536     s = cupsFileGets(fp, line, sizeof(line));
1537     if (s == NULL)
1538       break;
1539     if ((*s == '#') || (*s == '\n') || (*s == '\0'))
1540       continue;
1541     if (sscanf(s, "%lx %lx %lx", &unichar1, &unichar2, &unichar3) != 3)
1542        break;
1543     if ((unichar1 > 0xffff)
1544     || (unichar2 > 0xffff)
1545     || (unichar3 > 0xffff))
1546       break;
1547     *uni2norm ++ = (cups_ucs2_t) unichar1;
1548     *uni2norm ++ = (cups_ucs2_t) unichar2;
1549     *uni2norm ++ = (cups_ucs2_t) unichar3;
1550     i ++;
1551   }
1552   if (i < normcount)
1553     nmap->normcount = i;
1554   cupsFileClose(fp);
1555   return (0);
1556 }
1557
1558
1559 /*
1560  * 'get_foldmap()' - Get Unicode case folding map to cache.
1561  */
1562
1563 static int                              /* O - Zero or -1 on error */
1564 get_foldmap(const cups_folding_t fold)  /* I - Case folding type */
1565 {
1566   int                   i;              /* Looping variable */
1567   cups_utf32_t          unichar1;       /* Unicode character value */
1568   cups_utf32_t          unichar2;       /* Unicode character value */
1569   cups_utf32_t          unichar3;       /* Unicode character value */
1570   cups_utf32_t          unichar4;       /* Unicode character value */
1571   _cups_fold_map_t      *fmap;          /* Unicode Case Folding Map */
1572   int                   foldcount;      /* Count of Unicode Source Chars */
1573   cups_ucs2_t           *uni2fold;      /* Unicode -> Folded Char(s) */
1574   char                  *mapname;       /* Case Folding map name */
1575   char                  filename[1024]; /* Filename for charset map file */
1576   cups_file_t           *fp;            /* Case Folding map file pointer */
1577   char                  *s;             /* Line parsing pointer */
1578   char                  line[256];      /* Line from input map file */
1579   _cups_globals_t       *cg = _cupsGlobals();
1580                                         /* Pointer to library globals */
1581
1582
1583  /*
1584   * See if we already have this case folding map loaded...
1585   */
1586
1587   for (fmap = cg->foldmap_cache; fmap != NULL; fmap = fmap->next)
1588     if (fmap->fold == fold)
1589       return (0);
1590
1591  /*
1592   * Get the mapping name...
1593   */
1594
1595   switch (fold)
1596   {
1597     case CUPS_FOLD_SIMPLE:      /* Simple case folding */
1598         mapname = "uni-fold.txt";
1599         break;
1600     case CUPS_FOLD_FULL:        /* Full case folding */
1601         mapname = "uni-full.txt";
1602         break;
1603     default:
1604         return (-1);
1605   }
1606
1607  /*
1608   * Open case folding map input file...
1609   */
1610
1611   snprintf(filename, sizeof(filename), "%s/charmaps/%s",
1612            cg->cups_datadir, mapname);
1613   if ((foldcount = get_map_count(filename)) <= 0)
1614     return (-1);
1615   fp = cupsFileOpen(filename, "r");
1616   if (fp == NULL)
1617     return (-1);
1618
1619  /*
1620   * Allocate memory for case folding map and add to cache...
1621   */
1622   fmap = (_cups_fold_map_t *)calloc(1, sizeof(_cups_fold_map_t));
1623   if (fmap == NULL)
1624   {
1625     cupsFileClose(fp);
1626     return (-1);
1627   }
1628   uni2fold = (cups_ucs2_t *)calloc(1, sizeof(cups_ucs2_t) * 4 * foldcount);
1629   if (uni2fold == NULL)
1630   {
1631     free(fmap);
1632     cupsFileClose(fp);
1633     return (-1);
1634   }
1635   fmap->next = cg->foldmap_cache;
1636   cg->foldmap_cache = fmap;
1637   fmap->used ++;
1638   fmap->fold = fold;
1639   fmap->foldcount = foldcount;
1640   fmap->uni2fold = uni2fold;
1641
1642  /*
1643   * Save case folding map into memory for later use...
1644   */
1645
1646   for (i = 0; i < foldcount; )
1647   {
1648     s = cupsFileGets(fp, line, sizeof(line));
1649     if (s == NULL)
1650       break;
1651     if ((*s == '#') || (*s == '\n') || (*s == '\0'))
1652       continue;
1653     unichar1 = unichar2 = unichar3 = unichar4 = 0;
1654     if ((fold == CUPS_FOLD_SIMPLE)
1655     && (sscanf(s, "%lx %lx", &unichar1, &unichar2) != 2))
1656       break;
1657     if ((fold == CUPS_FOLD_FULL)
1658     && (sscanf(s, "%lx %lx %lx %lx",
1659                &unichar1, &unichar2, &unichar3, &unichar4) != 4))
1660       break;
1661     if ((unichar1 > 0xffff)
1662     || (unichar2 > 0xffff)
1663     || (unichar3 > 0xffff)
1664     || (unichar4 > 0xffff))
1665       break;
1666     *uni2fold ++ = (cups_ucs2_t) unichar1;
1667     *uni2fold ++ = (cups_ucs2_t) unichar2;
1668     *uni2fold ++ = (cups_ucs2_t) unichar3;
1669     *uni2fold ++ = (cups_ucs2_t) unichar4;
1670     i ++;
1671   }
1672   if (i < foldcount)
1673     fmap->foldcount = i;
1674   cupsFileClose(fp);
1675   return (0);
1676 }
1677
1678 /*
1679  * 'get_propmap()' - Get Unicode character property map to cache.
1680  */
1681
1682 static int                              /* O - Zero or -1 on error */
1683 get_propmap(void)
1684 {
1685   int                   i, j;           /* Looping variables */
1686   size_t                len;            /* String length */
1687   cups_utf32_t          unichar;        /* Unicode character value */
1688   cups_gencat_t         gencat;         /* General Category Value */
1689   cups_bidi_t   bidicat;        /* Bidi Category Value */
1690   _cups_prop_map_t      *pmap;          /* Unicode Char Property Map */
1691   int                   propcount;      /* Count of Unicode Source Chars */
1692   _cups_prop_t          *uni2prop;      /* Unicode Char -> Properties */
1693   char                  *mapname;       /* Char Property map name */
1694   char                  filename[1024]; /* Filename for charset map file */
1695   cups_file_t           *fp;            /* Char Property map file pointer */
1696   char                  *s;             /* Line parsing pointer */
1697   char                  line[256];      /* Line from input map file */
1698   _cups_globals_t       *cg = _cupsGlobals();
1699                                         /* Pointer to library globals */
1700
1701
1702  /*
1703   * See if we already have this char properties map loaded...
1704   */
1705
1706   if ((pmap = cg->propmap_cache) != NULL)
1707     return (0);
1708
1709  /*
1710   * Get the mapping name...
1711   */
1712
1713   mapname = "uni-prop.txt";
1714
1715  /*
1716   * Open char properties map input file...
1717   */
1718   snprintf(filename, sizeof(filename), "%s/charmaps/%s",
1719            cg->cups_datadir, mapname);
1720   if ((propcount = get_map_count(filename)) <= 0)
1721     return (-1);
1722   fp = cupsFileOpen(filename, "r");
1723   if (fp == NULL)
1724     return (-1);
1725
1726  /*
1727   * Allocate memory for char properties map and add to cache...
1728   */
1729   pmap = (_cups_prop_map_t *)calloc(1, sizeof(_cups_prop_map_t));
1730   if (pmap == NULL)
1731   {
1732     cupsFileClose(fp);
1733     return (-1);
1734   }
1735   uni2prop = (_cups_prop_t *)calloc(1, sizeof(_cups_prop_t) * propcount);
1736   if (uni2prop == NULL)
1737   {
1738     free(pmap);
1739     cupsFileClose(fp);
1740     return (-1);
1741   }
1742   cg->propmap_cache = pmap;
1743   pmap->used ++;
1744   pmap->propcount = propcount;
1745   pmap->uni2prop = uni2prop;
1746
1747  /*
1748   * Save char properties map into memory for later use...
1749   */
1750   for (i = 0; i < propcount; )
1751   {
1752     s = cupsFileGets(fp, line, sizeof(line));
1753     if (s == NULL)
1754       break;
1755     if (strlen(s) > 0)
1756       *(s + strlen(s) - 1) = '\0';
1757     if ((*s == '#') || (*s == '\n') || (*s == '\0'))
1758       continue;
1759     if (sscanf(s, "%lx", &unichar) != 1)
1760        break;
1761     if (unichar > 0xffff)
1762       break;
1763     while ((*s != '\0') && (*s != ';'))
1764       s ++;
1765     if (*s != ';')
1766       break;
1767     s ++;
1768     for (j = 0; gencat_index[j].str != NULL; j ++)
1769     {
1770       len = strlen(gencat_index[j].str);
1771       if (strncmp (s, gencat_index[j].str, len) == 0)
1772         break;
1773     }
1774     if (gencat_index[j].str == NULL)
1775       return (-1);
1776     gencat = gencat_index[j].gencat;
1777     while ((*s != '\0') && (*s != ';'))
1778       s ++;
1779     if (*s != ';')
1780       break;
1781     s ++;
1782     for (j = 0; bidicat_index[j] != NULL; j ++)
1783     {
1784       len = strlen(bidicat_index[j]);
1785       if (strncmp (s, bidicat_index[j], len) == 0)
1786         break;
1787     }
1788     if (bidicat_index[j] == NULL)
1789       return (-1);
1790     bidicat = (cups_bidi_t) j;
1791     uni2prop->ch = (cups_ucs2_t) unichar;
1792     uni2prop->gencat = (unsigned char) gencat;
1793     uni2prop->bidicat = (unsigned char) bidicat;
1794     uni2prop ++;
1795     i ++;
1796   }
1797   if (i < propcount)
1798     pmap->propcount = i;
1799   cupsFileClose(fp);
1800   return (0);
1801 }
1802
1803
1804 /*
1805  * 'get_combmap()' - Get Unicode combining class map to cache.
1806  */
1807
1808 static int                              /* O - Zero or -1 on error */
1809 get_combmap(void)
1810 {
1811   int                   i;              /* Looping variable */
1812   cups_utf32_t          unichar;        /* Unicode character value */
1813   int                   combclass;      /* Unicode char combining class */
1814   _cups_comb_map_t      *cmap;          /* Unicode Comb Class Map */
1815   int                   combcount;      /* Count of Unicode Source Chars */
1816   _cups_comb_t          *uni2comb;      /* Unicode Char -> Combining Class */
1817   char                  *mapname;       /* Comb Class map name */
1818   char                  filename[1024]; /* Filename for charset map file */
1819   cups_file_t           *fp;            /* Comb Class map file pointer */
1820   char                  *s;             /* Line parsing pointer */
1821   char                  line[256];      /* Line from input map file */
1822   _cups_globals_t       *cg = _cupsGlobals();
1823                                         /* Pointer to library globals */
1824
1825
1826  /*
1827   * See if we already have this combining class map loaded...
1828   */
1829
1830   if ((cmap = cg->combmap_cache) != NULL)
1831     return (0);
1832
1833  /*
1834   * Get the mapping name...
1835   */
1836
1837   mapname = "uni-comb.txt";
1838
1839  /*
1840   * Open combining class map input file...
1841   */
1842
1843   snprintf(filename, sizeof(filename), "%s/charmaps/%s",
1844            cg->cups_datadir, mapname);
1845   if ((combcount = get_map_count(filename)) <= 0)
1846     return (-1);
1847   fp = cupsFileOpen(filename, "r");
1848   if (fp == NULL)
1849     return (-1);
1850
1851  /*
1852   * Allocate memory for combining class map and add to cache...
1853   */
1854
1855   cmap = (_cups_comb_map_t *)calloc(1, sizeof(_cups_comb_map_t));
1856   if (cmap == NULL)
1857   {
1858     cupsFileClose(fp);
1859     return (-1);
1860   }
1861
1862   uni2comb = (_cups_comb_t *)calloc(1, sizeof(_cups_comb_t) * combcount);
1863   if (uni2comb == NULL)
1864   {
1865     free(cmap);
1866     cupsFileClose(fp);
1867     return (-1);
1868   }
1869   cg->combmap_cache = cmap;
1870   cmap->used ++;
1871   cmap->combcount = combcount;
1872   cmap->uni2comb = uni2comb;
1873
1874  /*
1875   * Save combining class map into memory for later use...
1876   */
1877   for (i = 0; i < combcount; )
1878   {
1879     s = cupsFileGets(fp, line, sizeof(line));
1880     if (s == NULL)
1881       break;
1882     if ((*s == '#') || (*s == '\n') || (*s == '\0'))
1883       continue;
1884     if (sscanf(s, "%lx", &unichar) != 1)
1885        break;
1886     if (unichar > 0xffff)
1887       break;
1888     while ((*s != '\0') && (*s != ';'))
1889       s ++;
1890     if (*s != ';')
1891       break;
1892     s ++;
1893     if (sscanf(s, "%d", &combclass) != 1)
1894        break;
1895     uni2comb->ch = (cups_ucs2_t) unichar;
1896     uni2comb->combclass = (unsigned char) combclass;
1897     uni2comb ++;
1898     i ++;
1899   }
1900   if (i < combcount)
1901     cmap->combcount = i;
1902   cupsFileClose(fp);
1903   return (0);
1904 }
1905
1906
1907 /*
1908  * 'get_breakmap()' - Get Unicode line break class map to cache.
1909  */
1910
1911 static int                              /* O - Zero or -1 on error */
1912 get_breakmap(void)
1913 {
1914   int                   i, j;           /* Looping variables */
1915   int                   len;            /* String length */
1916   cups_utf32_t          unichar1;       /* Unicode character value */
1917   cups_utf32_t          unichar2;       /* Unicode character value */
1918   cups_break_class_t    breakclass;     /* Unicode char line break class */
1919   _cups_break_map_t     *bmap;          /* Unicode Line Break Class Map */
1920   int                   breakcount;     /* Count of Unicode Source Chars */
1921   cups_ucs2_t           *uni2break;     /* Unicode -> Line Break Class */
1922   char                  *mapname;       /* Comb Class map name */
1923   char                  filename[1024]; /* Filename for charset map file */
1924   cups_file_t           *fp;            /* Comb Class map file pointer */
1925   char                  *s;             /* Line parsing pointer */
1926   char                  line[256];      /* Line from input map file */
1927   _cups_globals_t       *cg = _cupsGlobals();
1928                                         /* Pointer to library globals */
1929
1930
1931  /*
1932   * See if we already have this line break class map loaded...
1933   */
1934
1935   if ((bmap = cg->breakmap_cache) != NULL)
1936     return (0);
1937
1938  /*
1939   * Get the mapping name...
1940   */
1941
1942   mapname = "uni-line.txt";
1943
1944  /*
1945   * Open line break class map input file...
1946   */
1947
1948   snprintf(filename, sizeof(filename), "%s/charmaps/%s",
1949            cg->cups_datadir, mapname);
1950   if ((breakcount = get_map_count(filename)) <= 0)
1951     return (-1);
1952   fp = cupsFileOpen(filename, "r");
1953   if (fp == NULL)
1954     return (-1);
1955
1956  /*
1957   * Allocate memory for line break class map and add to cache...
1958   */
1959
1960   bmap = (_cups_break_map_t *)calloc(1, sizeof(_cups_break_map_t));
1961   if (bmap == NULL)
1962   {
1963     cupsFileClose(fp);
1964     return (-1);
1965   }
1966
1967   uni2break = (cups_ucs2_t *)calloc(1, sizeof(cups_ucs2_t) * 3 * breakcount);
1968   if (uni2break == NULL)
1969   {
1970     free(bmap);
1971     cupsFileClose(fp);
1972     return (-1);
1973   }
1974   cg->breakmap_cache = bmap;
1975   bmap->used ++;
1976   bmap->breakcount = breakcount;
1977   bmap->uni2break = uni2break;
1978
1979  /*
1980   * Save line break class map into memory for later use...
1981   */
1982   for (i = 0; i < breakcount; )
1983   {
1984     s = cupsFileGets(fp, line, sizeof(line));
1985     if (s == NULL)
1986       break;
1987     if (strlen(s) > 0)
1988       *(s + strlen(s) - 1) = '\0';
1989     if ((*s == '#') || (*s == '\n') || (*s == '\0'))
1990       continue;
1991     if (sscanf(s, "%lx %lx", &unichar1, &unichar2) != 2)
1992        break;
1993     if ((unichar1 > 0xffff)
1994     || (unichar2 > 0xffff))
1995       break;
1996     while ((*s != '\0') && (*s != ';'))
1997       s ++;
1998     if (*s != ';')
1999       break;
2000     s ++;
2001     for (j = 0; break_index[j].str != NULL; j ++)
2002     {
2003       len = strlen (break_index[j].str);
2004       if (strncmp (s, break_index[j].str, len) == 0)
2005         break;
2006     }
2007     if (break_index[j].str == NULL)
2008       return (-1);
2009     breakclass = break_index[j].breakclass;
2010     *uni2break ++ = (cups_ucs2_t) unichar1;
2011     *uni2break ++ = (cups_ucs2_t) unichar2;
2012     *uni2break ++ = (cups_ucs2_t) breakclass;
2013     i ++;
2014   }
2015   if (i < breakcount)
2016     bmap->breakcount = i;
2017   cupsFileClose(fp);
2018   return (0);
2019 }
2020
2021
2022 /*
2023  * 'compare_compose()' - Compare key for compose match.
2024  *
2025  * Note - This function cannot be easily modified for 32-bit Unicode.
2026  */
2027
2028 static int                              /* O - Result of comparison */
2029 compare_compose(const void *k1,         /* I - Key char */
2030                 const void *k2)         /* I - Map char */
2031 {
2032   cups_utf32_t  *kp = (cups_utf32_t *)k1;
2033                                         /* Key char pointer */
2034   cups_ucs2_t   *mp = (cups_ucs2_t *)k2;/* Map char pointer */
2035   unsigned long key;                    /* Pair of key characters */
2036   unsigned long map;                    /* Pair of map characters */
2037   int           result;                 /* Result Value */
2038
2039
2040   key = (*kp << 16);
2041   key |= *(kp + 1);
2042   map = (unsigned long) (*mp << 16);
2043   map |= (unsigned long) *(mp + 1);
2044
2045   if (key >= map)
2046     result = (int) (key - map);
2047   else
2048     result = -1 * ((int) (map - key));
2049
2050   return (result);
2051 }
2052
2053
2054 /*
2055  * 'compare_decompose()' - Compare key for decompose match.
2056  */
2057
2058 static int                              /* O - Result of comparison */
2059 compare_decompose(const void *k1,       /* I - Key char */
2060                   const void *k2)       /* I - Map char */
2061 {
2062   cups_utf32_t  *kp = (cups_utf32_t *)k1;
2063                                         /* Key char pointer */
2064   cups_ucs2_t   *mp = (cups_ucs2_t *)k2;/* Map char pointer */
2065   cups_ucs2_t   ch;                     /* Key char as UCS-2 */
2066   int           result;                 /* Result Value */
2067
2068
2069   ch = (cups_ucs2_t) *kp;
2070
2071   if (ch >= *mp)
2072     result = (int) (ch - *mp);
2073   else
2074     result = -1 * ((int) (*mp - ch));
2075
2076   return (result);
2077 }
2078
2079
2080 /*
2081  * 'compare_foldchar()' - Compare key for case fold match.
2082  */
2083
2084 static int                              /* O - Result of comparison */
2085 compare_foldchar(const void *k1,        /* I - Key char */
2086                  const void *k2)        /* I - Map char */
2087 {
2088   cups_utf32_t  *kp = (cups_utf32_t *)k1;
2089                                         /* Key char pointer */
2090   cups_ucs2_t   *mp = (cups_ucs2_t *)k2;/* Map char pointer */
2091   cups_ucs2_t   ch;                     /* Key char as UCS-2 */
2092   int           result;                 /* Result Value */
2093
2094
2095   ch = (cups_ucs2_t) *kp;
2096
2097   if (ch >= *mp)
2098     result = (int) (ch - *mp);
2099   else
2100     result = -1 * ((int) (*mp - ch));
2101
2102   return (result);
2103 }
2104
2105
2106 /*
2107  * 'compare_combchar()' - Compare key for combining char match.
2108  */
2109
2110 static int                              /* O - Result of comparison */
2111 compare_combchar(const void *k1,        /* I - Key char */
2112                  const void *k2)        /* I - Map char */
2113 {
2114   cups_utf32_t  *kp = (cups_utf32_t *)k1;
2115                                         /* Key char pointer */
2116   _cups_comb_t  *cp = (_cups_comb_t *)k2;/* Combining map row pointer */
2117   cups_ucs2_t   ch;                     /* Key char as UCS-2 */
2118   int           result;                 /* Result Value */
2119
2120
2121   ch = (cups_ucs2_t) *kp;
2122
2123   if (ch >= cp->ch)
2124     result = (int) (ch - cp->ch);
2125   else
2126     result = -1 * ((int) (cp->ch - ch));
2127
2128   return (result);
2129 }
2130
2131
2132 /*
2133  * 'compare_breakchar()' - Compare key for line break char match.
2134  */
2135
2136 static int                              /* O - Result of comparison */
2137 compare_breakchar(const void *k1,       /* I - Key char */
2138                   const void *k2)       /* I - Map char */
2139 {
2140   cups_utf32_t  *kp = (cups_utf32_t *)k1;
2141                                         /* Key char pointer */
2142   cups_ucs2_t   *mp = (cups_ucs2_t *)k2;/* Map char pointer */
2143   cups_ucs2_t   ch;                     /* Key char as UCS-2 */
2144   int           result;                 /* Result Value */
2145
2146
2147   ch = (cups_ucs2_t) *kp;
2148
2149   if (ch < *mp)
2150     result = -1 * (int) (*mp - ch);
2151   else if (ch > *(mp + 1))
2152     result = (int) (ch - *(mp + 1));
2153   else
2154     result = 0;
2155
2156   return (result);
2157 }
2158
2159
2160 /*
2161  * 'compare_propchar()' - Compare key for property char match.
2162  */
2163
2164 static int                              /* O - Result of comparison */
2165 compare_propchar(const void *k1,        /* I - Key char */
2166                  const void *k2)        /* I - Map char */
2167 {
2168   cups_utf32_t  *kp = (cups_utf32_t *)k1;
2169                                         /* Key char pointer */
2170   _cups_prop_t  *pp = (_cups_prop_t *)k2;/* Property map row pointer */
2171   cups_ucs2_t   ch;                     /* Key char as UCS-2 */
2172   int           result;                 /* Result Value */
2173
2174
2175   ch = (cups_ucs2_t) *kp;
2176
2177   if (ch >= pp->ch)
2178     result = (int) (ch - pp->ch);
2179   else
2180     result = -1 * ((int) (pp->ch - ch));
2181
2182   return (result);
2183 }
2184
2185
2186 /*
2187  * End of "$Id: normalize.c 4967 2006-01-24 03:42:15Z mike $"
2188  */