2 * "$Id: normalize.c 4903 2006-01-10 20:02:46Z mike $"
4 * Unicode normalization for the Common UNIX Printing System (CUPS).
6 * Copyright 1997-2006 by Easy Software Products.
8 * These coded instructions, statements, and computer programs are
9 * the property of Easy Software Products and are protected by Federal
10 * copyright law. Distribution and use rights are outlined in the
11 * file "LICENSE.txt" which should have been included with this file.
12 * If this file is missing or damaged please contact Easy Software
15 * Attn: CUPS Licensing Information
16 * Easy Software Products
17 * 44141 Airport View Drive, Suite 204
18 * Hollywood, Maryland 20636 USA
20 * Voice: (301) 373-9600
21 * EMail: cups-info@cups.org
22 * WWW: http://www.cups.org
26 * cupsNormalizeMapsGet() - Get all norm maps to cache.
27 * cupsNormalizeMapsFree() - Free all norm maps in cache.
28 * cupsNormalizeMapsFlush() - Flush all norm maps in cache.
29 * cupsUTF8Normalize() - Normalize UTF-8 string.
30 * cupsUTF32Normalize() - Normalize UTF-32 string.
31 * cupsUTF8CaseFold() - Case fold UTF-8 string.
32 * cupsUTF32CaseFold() - Case fold UTF-32 string.
33 * cupsUTF8CompareCaseless() - Compare case folded UTF-8 strings.
34 * cupsUTF32CompareCaseless() - Compare case folded UTF-32 strings.
35 * cupsUTF8CompareIdentifier() - Compare folded NFKC UTF-8 strings.
36 * cupsUTF32CompareIdentifier() - Compare folded NFKC UTF-32 strings.
37 * cupsUTF32CharacterProperty() - Get UTF-32 character property.
38 * get_general_category() - Get UTF-32 Char General Category.
39 * get_bidi_category() - Get UTF-32 Char Bidi Category.
40 * get_combining_class() - Get UTF-32 Char Combining Class.
41 * get_break_class() - Get UTF-32 Char Line Break Class.
42 * get_map_count() - Count lines in a map file.
43 * get_normmap() - Get Unicode norm map to cache.
44 * get_foldmap() - Get Unicode casefold map to cache.
45 * get_propmap() - Get Unicode property map to cache.
46 * get_combmap() - Get Unicode combining map to cache.
47 * get_breakmap() - Get Unicode break map to cache.
48 * compare_compose() - Compare key for compose match.
49 * compare_decompose() - Compare key for decompose match.
50 * compare_foldchar() - Compare key for case fold match.
51 * compare_combchar() - Compare key for combining match.
52 * compare_breakchar() - Compare key for line break match.
53 * compare_propchar() - Compare key for property char match.
57 * Include necessary headers...
67 typedef struct /**** General Category Index Struct****/
69 cups_gencat_t gencat
; /* General Category Value */
70 const char *str
; /* General Category String */
73 static const gencat_t gencat_index
[] = /* General Category Index */
75 { CUPS_GENCAT_LU
, "Lu" }, /* Letter, Uppercase */
76 { CUPS_GENCAT_LL
, "Ll" }, /* Letter, Lowercase */
77 { CUPS_GENCAT_LT
, "Lt" }, /* Letter, Titlecase */
78 { CUPS_GENCAT_LM
, "Lm" }, /* Letter, Modifier */
79 { CUPS_GENCAT_LO
, "Lo" }, /* Letter, Other */
80 { CUPS_GENCAT_MN
, "Mn" }, /* Mark, Non-Spacing */
81 { CUPS_GENCAT_MC
, "Mc" }, /* Mark, Spacing Combining */
82 { CUPS_GENCAT_ME
, "Me" }, /* Mark, Enclosing */
83 { CUPS_GENCAT_ND
, "Nd" }, /* Number, Decimal Digit */
84 { CUPS_GENCAT_NL
, "Nl" }, /* Number, Letter */
85 { CUPS_GENCAT_NO
, "No" }, /* Number, Other */
86 { CUPS_GENCAT_PC
, "Pc" }, /* Punctuation, Connector */
87 { CUPS_GENCAT_PD
, "Pd" }, /* Punctuation, Dash */
88 { CUPS_GENCAT_PS
, "Ps" }, /* Punctuation, Open (start) */
89 { CUPS_GENCAT_PE
, "Pe" }, /* Punctuation, Close (end) */
90 { CUPS_GENCAT_PI
, "Pi" }, /* Punctuation, Initial Quote */
91 { CUPS_GENCAT_PF
, "Pf" }, /* Punctuation, Final Quote */
92 { CUPS_GENCAT_PO
, "Po" }, /* Punctuation, Other */
93 { CUPS_GENCAT_SM
, "Sm" }, /* Symbol, Math */
94 { CUPS_GENCAT_SC
, "Sc" }, /* Symbol, Currency */
95 { CUPS_GENCAT_SK
, "Sk" }, /* Symbol, Modifier */
96 { CUPS_GENCAT_SO
, "So" }, /* Symbol, Other */
97 { CUPS_GENCAT_ZS
, "Zs" }, /* Separator, Space */
98 { CUPS_GENCAT_ZL
, "Zl" }, /* Separator, Line */
99 { CUPS_GENCAT_ZP
, "Zp" }, /* Separator, Paragraph */
100 { CUPS_GENCAT_CC
, "Cc" }, /* Other, Control */
101 { CUPS_GENCAT_CF
, "Cf" }, /* Other, Format */
102 { CUPS_GENCAT_CS
, "Cs" }, /* Other, Surrogate */
103 { CUPS_GENCAT_CO
, "Co" }, /* Other, Private Use */
104 { CUPS_GENCAT_CN
, "Cn" }, /* Other, Not Assigned */
108 static const char * const bidicat_index
[] =
109 /* Bidi Category Index */
111 "L", /* Left-to-Right (Alpha, Syllabic, Ideographic) */
112 "LRE", /* Left-to-Right Embedding (explicit) */
113 "LRO", /* Left-to-Right Override (explicit) */
114 "R", /* Right-to-Left (Hebrew alphabet and most punct) */
115 "AL", /* Right-to-Left Arabic (Arabic, Thaana, Syriac) */
116 "RLE", /* Right-to-Left Embedding (explicit) */
117 "RLO", /* Right-to-Left Override (explicit) */
118 "PDF", /* Pop Directional Format */
119 "EN", /* Euro Number (Euro and East Arabic-Indic digits) */
120 "ES", /* Euro Number Separator (Slash) */
121 "ET", /* Euro Number Termintor (Plus, Minus, Degree, etc) */
122 "AN", /* Arabic Number (Arabic-Indic digits, separators) */
123 "CS", /* Common Number Separator (Colon, Comma, Dot, etc) */
124 "NSM", /* Non-Spacing Mark (category Mn / Me in UCD) */
125 "BN", /* Boundary Neutral (Formatting / Control chars) */
126 "B", /* Paragraph Separator */
127 "S", /* Segment Separator (Tab) */
128 "WS", /* Whitespace Space (Space, Line Separator, etc) */
129 "ON", /* Other Neutrals */
133 typedef struct /**** Line Break Class Index Struct****/
135 cups_break_class_t breakclass
; /* Line Break Class Value */
136 const char *str
; /* Line Break Class String */
139 static const _cups_break_t break_index
[] = /* Line Break Class Index */
141 { CUPS_BREAK_AI
, "AI" }, /* Ambiguous (Alphabetic or Ideograph) */
142 { CUPS_BREAK_AL
, "AL" }, /* Ordinary Alpha/Symbol Chars (XP) */
143 { CUPS_BREAK_BA
, "BA" }, /* Break Opportunity After Chars (A) */
144 { CUPS_BREAK_BB
, "BB" }, /* Break Opportunities Before Chars (B) */
145 { CUPS_BREAK_B2
, "B2" }, /* Break Opportunity Either (B/A/XP) */
146 { CUPS_BREAK_BK
, "BK" }, /* Mandatory Break (A) (norm) */
147 { CUPS_BREAK_CB
, "CB" }, /* Contingent Break (B/A) (norm) */
148 { CUPS_BREAK_CL
, "CL" }, /* Closing Punctuation (XB) */
149 { CUPS_BREAK_CM
, "CM" }, /* Attached/Combining (XB) (norm) */
150 { CUPS_BREAK_CR
, "CR" }, /* Carriage Return (A) (norm) */
151 { CUPS_BREAK_EX
, "EX" }, /* Exclamation / Interrogation (XB) */
152 { CUPS_BREAK_GL
, "GL" }, /* Non-breaking ("Glue") (XB/XA) (norm) */
153 { CUPS_BREAK_HY
, "HY" }, /* Hyphen (XA) */
154 { CUPS_BREAK_ID
, "ID" }, /* Ideographic (B/A) */
155 { CUPS_BREAK_IN
, "IN" }, /* Inseparable chars (XP) */
156 { CUPS_BREAK_IS
, "IS" }, /* Numeric Separator (Infix) (XB) */
157 { CUPS_BREAK_LF
, "LF" }, /* Line Feed (A) (norm) */
158 { CUPS_BREAK_NS
, "NS" }, /* Non-starters (XB) */
159 { CUPS_BREAK_NU
, "NU" }, /* Numeric (XP) */
160 { CUPS_BREAK_OP
, "OP" }, /* Opening Punctuation (XA) */
161 { CUPS_BREAK_PO
, "PO" }, /* Postfix (Numeric) (XB) */
162 { CUPS_BREAK_PR
, "PR" }, /* Prefix (Numeric) (XA) */
163 { CUPS_BREAK_QU
, "QU" }, /* Ambiguous Quotation (XB/XA) */
164 { CUPS_BREAK_SA
, "SA" }, /* Context Dependent (SE Asian) (P) */
165 { CUPS_BREAK_SG
, "SG" }, /* Surrogates (XP) (norm) */
166 { CUPS_BREAK_SP
, "SP" }, /* Space (A) (norm) */
167 { CUPS_BREAK_SY
, "SY" }, /* Symbols Allowing Break After (A) */
168 { CUPS_BREAK_XX
, "XX" }, /* Unknown (XP) */
169 { CUPS_BREAK_ZW
, "ZW" }, /* Zero Width Space (A) (norm) */
177 static int compare_breakchar(const void *k1
, const void *k2
);
178 static int compare_combchar(const void *k1
, const void *k2
);
179 static int compare_compose(const void *k1
, const void *k2
);
180 static int compare_decompose(const void *k1
, const void *k2
);
181 static int compare_foldchar(const void *k1
, const void *k2
);
182 static int compare_propchar(const void *k1
, const void *k2
);
183 static int get_bidi_category(const cups_utf32_t ch
);
184 static int get_break_class(const cups_utf32_t ch
);
185 static int get_breakmap(void);
186 static int get_combining_class(const cups_utf32_t ch
);
187 static int get_combmap(void);
188 static int get_foldmap(const cups_folding_t fold
);
189 static int get_general_category(const cups_utf32_t ch
);
190 static int get_map_count(const char *filename
);
191 static int get_normmap(const cups_normalize_t normalize
);
192 static int get_propmap(void);
196 * 'cupsNormalizeMapsGet()' - Get all normalization maps to cache.
199 int /* O - Zero or -1 on error */
200 cupsNormalizeMapsGet(void)
202 _cups_norm_map_t
*nmap
; /* Unicode Normalization Map */
203 _cups_fold_map_t
*fmap
; /* Unicode Case Folding Map */
204 _cups_globals_t
*cg
= _cupsGlobals();
205 /* Pointer to library globals */
209 * See if we already have normalization maps loaded...
212 if (cg
->normmap_cache
)
214 for (nmap
= cg
->normmap_cache
; nmap
!= NULL
; nmap
= nmap
->next
)
217 for (fmap
= cg
->foldmap_cache
; fmap
!= NULL
; fmap
= fmap
->next
)
220 if (cg
->combmap_cache
)
221 cg
->combmap_cache
->used
++;
223 if (cg
->propmap_cache
)
224 cg
->propmap_cache
->used
++;
226 if (cg
->breakmap_cache
)
227 cg
->breakmap_cache
->used
++;
233 * Get normalization maps...
236 if (get_normmap(CUPS_NORM_NFD
) < 0)
239 if (get_normmap(CUPS_NORM_NFKD
) < 0)
242 if (get_normmap(CUPS_NORM_NFC
) < 0)
246 * Get case folding, combining class, character property maps...
249 if (get_foldmap(CUPS_FOLD_SIMPLE
) < 0)
252 if (get_foldmap(CUPS_FOLD_FULL
) < 0)
255 if (get_propmap() < 0)
258 if (get_combmap() < 0)
261 if (get_breakmap() < 0)
269 * 'cupsNormalizeMapsFree()' - Free all normalization maps in cache.
271 * This does not actually free; use 'cupsNormalizeMapsFlush()' for that.
274 int /* O - Zero or -1 on error */
275 cupsNormalizeMapsFree(void)
277 _cups_norm_map_t
*nmap
; /* Unicode Normalization Map */
278 _cups_fold_map_t
*fmap
; /* Unicode Case Folding Map */
279 _cups_globals_t
*cg
= _cupsGlobals();
280 /* Pointer to library globals */
284 * See if we already have normalization maps loaded...
287 if (cg
->normmap_cache
== NULL
)
290 for (nmap
= cg
->normmap_cache
; nmap
!= NULL
; nmap
= nmap
->next
)
294 for (fmap
= cg
->foldmap_cache
; fmap
!= NULL
; fmap
= fmap
->next
)
298 if (cg
->propmap_cache
&& (cg
->propmap_cache
->used
> 0))
299 cg
->propmap_cache
->used
--;
301 if (cg
->combmap_cache
&& (cg
->combmap_cache
->used
> 0))
302 cg
->combmap_cache
->used
--;
304 if (cg
->breakmap_cache
&& (cg
->breakmap_cache
->used
> 0))
305 cg
->breakmap_cache
->used
--;
312 * 'cupsNormalizeMapsFlush()' - Flush all normalization maps in cache.
316 cupsNormalizeMapsFlush(void)
318 _cups_norm_map_t
*nmap
; /* Unicode Normalization Map */
319 _cups_norm_map_t
*nextnorm
; /* Next Unicode Normalization Map */
320 _cups_fold_map_t
*fmap
; /* Unicode Case Folding Map */
321 _cups_fold_map_t
*nextfold
; /* Next Unicode Case Folding Map */
322 _cups_globals_t
*cg
= _cupsGlobals();
323 /* Pointer to library globals */
327 * Flush all normalization maps...
330 for (nmap
= cg
->normmap_cache
; nmap
!= NULL
; nmap
= nextnorm
)
332 free(nmap
->uni2norm
);
333 nextnorm
= nmap
->next
;
337 cg
->normmap_cache
= NULL
;
339 for (fmap
= cg
->foldmap_cache
; fmap
!= NULL
; fmap
= nextfold
)
341 free(fmap
->uni2fold
);
342 nextfold
= fmap
->next
;
346 cg
->foldmap_cache
= NULL
;
348 if (cg
->propmap_cache
)
350 free(cg
->propmap_cache
->uni2prop
);
351 free(cg
->propmap_cache
);
352 cg
->propmap_cache
= NULL
;
355 if (cg
->combmap_cache
)
357 free(cg
->combmap_cache
->uni2comb
);
358 free(cg
->combmap_cache
);
359 cg
->combmap_cache
= NULL
;
362 if (cg
->breakmap_cache
)
364 free(cg
->breakmap_cache
->uni2break
);
365 free(cg
->breakmap_cache
);
366 cg
->breakmap_cache
= NULL
;
372 * 'cupsUTF8Normalize()' - Normalize UTF-8 string.
374 * Normalize UTF-8 string to Unicode UAX-15 Normalization Form
375 * Note - Compatibility Normalization Forms (NFKD/NFKC) are
376 * unsafe for subsequent transcoding to legacy charsets
379 int /* O - Count or -1 on error */
381 cups_utf8_t
*dest
, /* O - Target string */
382 const cups_utf8_t
*src
, /* I - Source string */
383 const int maxout
, /* I - Max output */
384 const cups_normalize_t normalize
) /* I - Normalization */
386 int len
; /* String length */
387 cups_utf32_t work1
[CUPS_MAX_USTRING
];/* First internal UCS-4 string */
388 cups_utf32_t work2
[CUPS_MAX_USTRING
];/* Second internal UCS-4 string */
392 * Check for valid arguments and clear output...
395 if (!dest
|| !src
|| maxout
< 1 || maxout
> CUPS_MAX_USTRING
)
401 * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
404 len
= cupsUTF8ToUTF32(work1
, src
, CUPS_MAX_USTRING
);
410 * Normalize internal UCS-4 to second internal UCS-4...
413 len
= cupsUTF32Normalize(work2
, work1
, CUPS_MAX_USTRING
, normalize
);
419 * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
422 len
= cupsUTF32ToUTF8(dest
, work2
, maxout
);
429 * 'cupsUTF32Normalize()' - Normalize UTF-32 string.
431 * Normalize UTF-32 string to Unicode UAX-15 Normalization Form
432 * Note - Compatibility Normalization Forms (NFKD/NFKC) are
433 * unsafe for subsequent transcoding to legacy charsets
436 int /* O - Count or -1 on error */
438 cups_utf32_t
*dest
, /* O - Target string */
439 const cups_utf32_t
*src
, /* I - Source string */
440 const int maxout
, /* I - Max output */
441 const cups_normalize_t normalize
) /* I - Normalization */
443 int i
; /* Looping variable */
444 int result
; /* Result Value */
445 cups_ucs2_t
*mp
; /* Map char pointer */
446 int pass
; /* Pass count for each transform */
447 int hit
; /* Hit count from binary search */
448 cups_utf32_t unichar1
; /* Unicode character value */
449 cups_utf32_t unichar2
; /* Unicode character value */
450 _cups_comb_class_t class1
; /* First Combining Class */
451 _cups_comb_class_t class2
; /* Second Combining Class */
452 int len
; /* String length */
453 cups_utf32_t work1
[CUPS_MAX_USTRING
];
454 /* First internal UCS-4 string */
455 cups_utf32_t work2
[CUPS_MAX_USTRING
];
456 /* Second internal UCS-4 string */
457 cups_utf32_t
*p1
; /* First UCS-4 string pointer */
458 cups_utf32_t
*p2
; /* Second UCS-4 string pointer */
459 _cups_norm_map_t
*nmap
; /* Unicode Normalization Map */
460 cups_normalize_t decompose
; /* Decomposition Type */
461 _cups_globals_t
*cg
= _cupsGlobals();
462 /* Pointer to library globals */
466 * Check for valid arguments and clear output...
469 if (!dest
|| !src
|| maxout
< 1 || maxout
> CUPS_MAX_USTRING
)
474 result
= cupsNormalizeMapsGet();
480 * Find decomposition map...
487 decompose
= CUPS_NORM_NFD
;
492 decompose
= CUPS_NORM_NFKD
;
499 for (nmap
= cg
->normmap_cache
; nmap
!= NULL
; nmap
= nmap
->next
)
500 if (nmap
->normalize
== decompose
)
507 * Copy input to internal buffer...
512 for (i
= 0; i
< CUPS_MAX_USTRING
; i
++)
524 * Decompose until no further decomposition...
527 for (pass
= 0; pass
< 20; pass
++)
532 for (hit
= 0; *p1
!= 0; p1
++)
535 * Check for decomposition defined...
538 mp
= (cups_ucs2_t
*)bsearch(p1
, nmap
->uni2norm
, nmap
->normcount
,
539 (sizeof(cups_ucs2_t
) * 3), compare_decompose
);
547 * Decompose input character to one or two output characters...
552 *p2
++ = (cups_utf32_t
) *mp
++;
555 *p2
++ = (cups_utf32_t
) *mp
;
559 len
= (int)(p2
- &work2
[0]);
562 * Check for decomposition finished...
566 memcpy (work1
, work2
, sizeof(cups_utf32_t
) * (len
+ 1));
570 * Canonical reorder until no further reordering...
573 for (pass
= 0; pass
< 20; pass
++)
577 for (hit
= 0; *p1
!= 0; p1
++)
580 * Check for combining characters to reorder...
584 unichar2
= *(p1
+ 1);
589 class1
= get_combining_class(unichar1
);
590 class2
= get_combining_class(unichar2
);
592 if ((class1
< 0) || (class2
< 0))
595 if ((class1
== 0) || (class2
== 0))
598 if (class1
<= class2
)
602 * Swap two combining characters...
616 * Check for decomposition only...
619 if (normalize
== CUPS_NORM_NFD
|| normalize
== CUPS_NORM_NFKD
)
621 memcpy(dest
, work1
, sizeof(cups_utf32_t
) * (len
+ 1));
626 * Find composition map...
629 for (nmap
= cg
->normmap_cache
; nmap
!= NULL
; nmap
= nmap
->next
)
630 if (nmap
->normalize
== CUPS_NORM_NFC
)
637 * Compose until no further composition...
640 for (pass
= 0; pass
< 20; pass
++)
645 for (hit
= 0; *p1
!= 0; p1
++)
648 * Check for composition defined...
652 unichar2
= *(p1
+ 1);
660 mp
= (cups_ucs2_t
*)bsearch(p1
, nmap
->uni2norm
, nmap
->normcount
,
661 (sizeof(cups_ucs2_t
) * 3), compare_compose
);
669 * Compose two input characters to one output character...
674 *p2
++ = (cups_utf32_t
) *mp
;
679 len
= (int) (p2
- &work2
[0]);
682 * Check for composition finished...
688 memcpy (work1
, work2
, sizeof(cups_utf32_t
) * (len
+ 1));
691 memcpy (dest
, work1
, sizeof(cups_utf32_t
) * (len
+ 1));
693 cupsNormalizeMapsFree();
700 * 'cupsUTF8CaseFold()' - Case fold UTF-8 string.
702 * Case Fold UTF-8 string per Unicode UAX-21 Section 2.3
703 * Note - Case folding output is
704 * unsafe for subsequent transcoding to legacy charsets
707 int /* O - Count or -1 on error */
709 cups_utf8_t
*dest
, /* O - Target string */
710 const cups_utf8_t
*src
, /* I - Source string */
711 const int maxout
, /* I - Max output */
712 const cups_folding_t fold
) /* I - Fold Mode */
714 int len
; /* String length */
715 cups_utf32_t work1
[CUPS_MAX_USTRING
];/* First internal UCS-4 string */
716 cups_utf32_t work2
[CUPS_MAX_USTRING
];/* Second internal UCS-4 string */
720 * Check for valid arguments and clear output...
723 if (!dest
|| !src
|| maxout
< 1 || maxout
> CUPS_MAX_USTRING
)
728 if (fold
!= CUPS_FOLD_SIMPLE
&& fold
!= CUPS_FOLD_FULL
)
732 * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
735 len
= cupsUTF8ToUTF32(work1
, src
, CUPS_MAX_USTRING
);
741 * Case Fold internal UCS-4 to second internal UCS-4...
744 len
= cupsUTF32CaseFold(work2
, work1
, CUPS_MAX_USTRING
, fold
);
750 * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
753 len
= cupsUTF32ToUTF8(dest
, work2
, maxout
);
760 * 'cupsUTF32CaseFold()' - Case fold UTF-32 string.
762 * Case Fold UTF-32 string per Unicode UAX-21 Section 2.3
763 * Note - Case folding output is
764 * unsafe for subsequent transcoding to legacy charsets
767 int /* O - Count or -1 on error */
769 cups_utf32_t
*dest
, /* O - Target string */
770 const cups_utf32_t
*src
, /* I - Source string */
771 const int maxout
, /* I - Max output */
772 const cups_folding_t fold
) /* I - Fold Mode */
774 cups_utf32_t
*start
= dest
; /* Start of destination string */
775 int i
; /* Looping variable */
776 int result
; /* Result Value */
777 cups_ucs2_t
*mp
; /* Map char pointer */
778 _cups_fold_map_t
*fmap
; /* Unicode Case Folding Map */
779 _cups_globals_t
*cg
= _cupsGlobals();
780 /* Pointer to library globals */
784 * Check for valid arguments and clear output...
787 if (!dest
|| !src
|| maxout
< 1 || maxout
> CUPS_MAX_USTRING
)
792 if (fold
!= CUPS_FOLD_SIMPLE
&& fold
!= CUPS_FOLD_FULL
)
796 * Find case folding map...
799 result
= cupsNormalizeMapsGet();
804 for (fmap
= cg
->foldmap_cache
; fmap
!= NULL
; fmap
= fmap
->next
)
805 if (fmap
->fold
== fold
)
812 * Case fold input string to output string...
815 for (i
= 0; i
< (maxout
- 1); i
++, src
++)
818 * Check for case folding defined...
821 mp
= (cups_ucs2_t
*)bsearch(src
, fmap
->uni2fold
, fmap
->foldcount
,
822 (sizeof(cups_ucs2_t
) * 4), compare_foldchar
);
830 * Case fold input character to one or two output characters...
834 *dest
++ = (cups_utf32_t
) *mp
++;
836 if (*mp
!= 0 && fold
== CUPS_FOLD_FULL
)
839 if (i
>= (maxout
- 1))
842 *dest
++ = (cups_utf32_t
) *mp
;
848 cupsNormalizeMapsFree();
850 return ((int)(dest
- start
));
855 * 'cupsUTF8CompareCaseless()' - Compare case folded UTF-8 strings.
858 int /* O - Difference of strings */
859 cupsUTF8CompareCaseless(
860 const cups_utf8_t
*s1
, /* I - String1 */
861 const cups_utf8_t
*s2
) /* I - String2 */
863 int difference
; /* Difference of two strings */
864 int len
; /* String length */
865 cups_utf32_t work1
[CUPS_MAX_USTRING
];/* First internal UCS-4 string */
866 cups_utf32_t work2
[CUPS_MAX_USTRING
];/* Second internal UCS-4 string */
870 * Check for valid arguments...
877 * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
880 len
= cupsUTF8ToUTF32(work1
, s1
, CUPS_MAX_USTRING
);
885 len
= cupsUTF8ToUTF32(work2
, s2
, CUPS_MAX_USTRING
);
891 * Compare first internal UCS-4 to second internal UCS-4...
894 difference
= cupsUTF32CompareCaseless(work1
, work2
);
901 * 'cupsUTF32CompareCaseless()' - Compare case folded UTF-32 strings.
904 int /* O - Difference of strings */
905 cupsUTF32CompareCaseless(
906 const cups_utf32_t
*s1
, /* I - String1 */
907 const cups_utf32_t
*s2
) /* I - String2 */
909 int difference
; /* Difference of two strings */
910 int len
; /* String length */
911 cups_folding_t fold
= CUPS_FOLD_FULL
;
912 /* Case folding mode */
913 cups_utf32_t fold1
[CUPS_MAX_USTRING
];
914 /* First UCS-4 folded string */
915 cups_utf32_t fold2
[CUPS_MAX_USTRING
];
916 /* Second UCS-4 folded string */
917 cups_utf32_t
*p1
; /* First UCS-4 string pointer */
918 cups_utf32_t
*p2
; /* Second UCS-4 string pointer */
922 * Check for valid arguments...
929 * Case Fold input UTF-32 strings to internal UCS-4 strings...
932 len
= cupsUTF32CaseFold(fold1
, s1
, CUPS_MAX_USTRING
, fold
);
937 len
= cupsUTF32CaseFold(fold2
, s2
, CUPS_MAX_USTRING
, fold
);
943 * Compare first internal UCS-4 to second internal UCS-4...
949 for (;; p1
++, p2
++)
951 difference
= (int) (*p1
- *p2
);
956 if ((*p1
== 0) && (*p2
== 0))
965 * 'cupsUTF8CompareIdentifier()' - Compare folded NFKC UTF-8 strings.
968 int /* O - Result of comparison */
969 cupsUTF8CompareIdentifier(
970 const cups_utf8_t
*s1
, /* I - String1 */
971 const cups_utf8_t
*s2
) /* I - String2 */
973 int difference
; /* Difference of two strings */
974 int len
; /* String length */
975 cups_utf32_t work1
[CUPS_MAX_USTRING
];/* First internal UCS-4 string */
976 cups_utf32_t work2
[CUPS_MAX_USTRING
];/* Second internal UCS-4 string */
980 * Check for valid arguments...
987 * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
990 len
= cupsUTF8ToUTF32(work1
, s1
, CUPS_MAX_USTRING
);
995 len
= cupsUTF8ToUTF32(work2
, s2
, CUPS_MAX_USTRING
);
1001 * Compare first internal UCS-4 to second internal UCS-4...
1004 difference
= cupsUTF32CompareIdentifier(work1
, work2
);
1006 return (difference
);
1011 * 'cupsUTF32CompareIdentifier()' - Compare folded NFKC UTF-32 strings.
1014 int /* O - Result of comparison */
1015 cupsUTF32CompareIdentifier(
1016 const cups_utf32_t
*s1
, /* I - String1 */
1017 const cups_utf32_t
*s2
) /* I - String2 */
1019 int difference
; /* Difference of two strings */
1020 int len
; /* String length */
1021 cups_folding_t fold
= CUPS_FOLD_FULL
;
1022 /* Case folding mode */
1023 cups_utf32_t fold1
[CUPS_MAX_USTRING
];
1024 /* First UCS-4 folded string */
1025 cups_utf32_t fold2
[CUPS_MAX_USTRING
];
1026 /* Second UCS-4 folded string */
1027 cups_normalize_t normalize
= CUPS_NORM_NFKC
;
1028 /* Normalization form */
1029 cups_utf32_t norm1
[CUPS_MAX_USTRING
];
1030 /* First UCS-4 normalized string */
1031 cups_utf32_t norm2
[CUPS_MAX_USTRING
];
1032 /* Second UCS-4 normalized string */
1033 cups_utf32_t
*p1
; /* First UCS-4 string pointer */
1034 cups_utf32_t
*p2
; /* Second UCS-4 string pointer */
1038 * Check for valid arguments...
1045 * Case Fold input UTF-32 strings to internal UCS-4 strings...
1048 len
= cupsUTF32CaseFold(fold1
, s1
, CUPS_MAX_USTRING
, fold
);
1053 len
= cupsUTF32CaseFold(fold2
, s2
, CUPS_MAX_USTRING
, fold
);
1059 * Normalize internal UCS-4 strings to NFKC...
1062 len
= cupsUTF32Normalize(norm1
, fold1
, CUPS_MAX_USTRING
, normalize
);
1067 len
= cupsUTF32Normalize(norm2
, fold2
, CUPS_MAX_USTRING
, normalize
);
1073 * Compare first internal UCS-4 to second internal UCS-4...
1079 for (;; p1
++, p2
++)
1081 difference
= (int) (*p1
- *p2
);
1083 if (difference
!= 0)
1086 if ((*p1
== 0) && (*p2
== 0))
1090 return (difference
);
1095 * 'cupsUTF32CharacterProperty()' - Get UTF-32 character property.
1098 int /* O - Result of comparison */
1099 cupsUTF32CharacterProperty(
1100 const cups_utf32_t ch
, /* I - Source char */
1101 const cups_property_t prop
) /* I - Char Property */
1103 int result
; /* Result Value */
1107 * Check for valid arguments...
1114 * Find character property...
1119 case CUPS_PROP_GENERAL_CATEGORY
:
1120 result
= (get_general_category(ch
));
1123 case CUPS_PROP_BIDI_CATEGORY
:
1124 result
= (get_bidi_category(ch
));
1127 case CUPS_PROP_COMBINING_CLASS
:
1128 result
= (get_combining_class(ch
));
1130 case CUPS_PROP_BREAK_CLASS
:
1131 result
= (get_break_class(ch
));
1143 * 'get_general_category()' - Get UTF-32 Character General Category.
1146 static int /* O - Class or -1 on error */
1147 get_general_category(
1148 const cups_utf32_t ch
) /* I - Source char */
1150 int result
; /* Result Value */
1151 cups_gencat_t gencat
; /* General Category Value */
1152 _cups_prop_map_t
*pmap
; /* Unicode Property Map */
1153 _cups_prop_t
*uni2prop
; /* Unicode Char -> Properties */
1154 _cups_globals_t
*cg
= _cupsGlobals();
1155 /* Pointer to library globals */
1159 * Check for valid argument...
1166 * Find property map...
1169 result
= cupsNormalizeMapsGet();
1174 pmap
= cg
->propmap_cache
;
1180 * Find character in map...
1183 uni2prop
= (_cups_prop_t
*)bsearch(&ch
, pmap
->uni2prop
, pmap
->propcount
,
1184 (sizeof(_cups_prop_t
)), compare_propchar
);
1186 cupsNormalizeMapsFree();
1188 if (uni2prop
== NULL
)
1189 gencat
= CUPS_GENCAT_CN
; /* Other, Not Assigned */
1191 gencat
= (cups_gencat_t
)uni2prop
->gencat
;
1193 result
= (int)gencat
;
1200 * 'get_bidi_category()' - Get UTF-32 Character Bidi Category.
1203 static int /* O - Class or -1 on error */
1204 get_bidi_category(const cups_utf32_t ch
)/* I - Source char */
1206 int result
; /* Result Value */
1207 cups_bidi_t bidicat
; /* Bidi Category Value */
1208 _cups_prop_map_t
*pmap
; /* Unicode Property Map */
1209 _cups_prop_t
*uni2prop
; /* Unicode Char -> Properties */
1210 _cups_globals_t
*cg
= _cupsGlobals();
1211 /* Pointer to library globals */
1215 * Check for valid argument...
1222 * Find property map...
1225 result
= cupsNormalizeMapsGet();
1230 pmap
= cg
->propmap_cache
;
1236 * Find character in map...
1239 uni2prop
= (_cups_prop_t
*)bsearch(&ch
, pmap
->uni2prop
, pmap
->propcount
,
1240 (sizeof(_cups_prop_t
)), compare_propchar
);
1242 cupsNormalizeMapsFree();
1244 if (uni2prop
== NULL
)
1245 bidicat
= CUPS_BIDI_ON
; /* Other Neutral */
1247 bidicat
= (cups_bidi_t
)uni2prop
->bidicat
;
1249 result
= (int)bidicat
;
1255 * 'get_combining_class()' - Get UTF-32 Character Combining Class.
1257 * Note - Zero is non-combining (base character)
1260 static int /* O - Class or -1 on error */
1261 get_combining_class(
1262 const cups_utf32_t ch
) /* I - Source char */
1264 int result
; /* Result Value */
1265 _cups_comb_map_t
*cmap
; /* Unicode Combining Class Map */
1266 _cups_comb_class_t combclass
; /* Unicode Combining Class */
1267 _cups_comb_t
*uni2comb
; /* Unicode Char -> Combining Class */
1268 _cups_globals_t
*cg
= _cupsGlobals();
1269 /* Pointer to library globals */
1273 * Check for valid argument...
1280 * Find combining class map...
1283 result
= cupsNormalizeMapsGet();
1288 cmap
= cg
->combmap_cache
;
1294 * Find combining character in map...
1297 uni2comb
= (_cups_comb_t
*)bsearch(&ch
, cmap
->uni2comb
, cmap
->combcount
,
1298 (sizeof(_cups_comb_t
)), compare_combchar
);
1300 cupsNormalizeMapsFree();
1302 if (uni2comb
== NULL
)
1305 combclass
= (_cups_comb_class_t
)uni2comb
->combclass
;
1307 result
= (int)combclass
;
1314 * 'get_break_class()' - Get UTF-32 Character Line Break Class.
1317 static int /* O - Class or -1 on error */
1318 get_break_class(const cups_utf32_t ch
) /* I - Source char */
1320 int result
; /* Result Value */
1321 _cups_break_map_t
*bmap
; /* Unicode Line Break Class Map */
1322 cups_break_class_t breakclass
; /* Unicode Line Break Class */
1323 cups_ucs2_t
*uni2break
; /* Unicode -> Line Break Class */
1324 _cups_globals_t
*cg
= _cupsGlobals();
1325 /* Pointer to library globals */
1329 * Check for valid argument...
1336 * Find line break class map...
1339 result
= cupsNormalizeMapsGet();
1344 bmap
= cg
->breakmap_cache
;
1350 * Find line break character in map...
1353 uni2break
= (cups_ucs2_t
*)bsearch(&ch
, bmap
->uni2break
, bmap
->breakcount
,
1354 (sizeof(cups_ucs2_t
) * 3),
1357 cupsNormalizeMapsFree();
1359 if (uni2break
== NULL
)
1360 breakclass
= CUPS_BREAK_AI
;
1362 breakclass
= (cups_break_class_t
)*(uni2break
+ 2);
1364 result
= (int)breakclass
;
1371 * 'get_map_count()' - Count lines in a map file.
1374 static int /* O - Count or -1 on error */
1375 get_map_count(const char *filename
) /* I - Map Filename */
1377 int i
; /* Looping variable */
1378 cups_file_t
*fp
; /* Map input file pointer */
1379 char *s
; /* Line parsing pointer */
1380 char line
[256]; /* Line from input map file */
1381 cups_utf32_t unichar
; /* Unicode character value */
1385 * Open map input file...
1388 if (!filename
|| !*filename
)
1391 fp
= cupsFileOpen(filename
, "r");
1396 * Count lines in map input file...
1399 for (i
= 0; i
< 50000;)
1401 s
= cupsFileGets(fp
, line
, sizeof(line
));
1404 if ((*s
== '#') || (*s
== '\n') || (*s
== '\0'))
1406 if (strncmp (s
, "0x", 2) == 0)
1408 if (sscanf(s
, "%lx", &unichar
) != 1)
1410 if (unichar
> 0xffff)
1418 * Close file and return map count (non-comment line count)...
1428 * 'get_normmap()' - Get Unicode normalization map to cache.
1431 static int /* O - Zero or -1 on error */
1433 const cups_normalize_t normalize
) /* I - Normalization Form */
1435 int i
; /* Looping variable */
1436 cups_utf32_t unichar1
; /* Unicode character value */
1437 cups_utf32_t unichar2
; /* Unicode character value */
1438 cups_utf32_t unichar3
; /* Unicode character value */
1439 _cups_norm_map_t
*nmap
; /* Unicode Normalization Map */
1440 int normcount
; /* Count of Unicode Source Chars */
1441 cups_ucs2_t
*uni2norm
; /* Unicode Char -> Normalization */
1442 char *mapname
; /* Normalization map name */
1443 char filename
[1024]; /* Filename for charset map file */
1444 cups_file_t
*fp
; /* Normalization map file pointer */
1445 char *s
; /* Line parsing pointer */
1446 char line
[256]; /* Line from input map file */
1447 _cups_globals_t
*cg
= _cupsGlobals();
1448 /* Pointer to library globals */
1452 * See if we already have this normalization map loaded...
1455 for (nmap
= cg
->normmap_cache
; nmap
!= NULL
; nmap
= nmap
->next
)
1456 if (nmap
->normalize
== normalize
)
1460 * Get the mapping name...
1465 case CUPS_NORM_NFD
: /* Canonical Decomposition */
1466 mapname
= "uni-nfd.txt";
1469 case CUPS_NORM_NFKD
: /* Compatibility Decomposition */
1470 mapname
= "uni-nfkd.txt";
1473 case CUPS_NORM_NFC
: /* Canonical Composition */
1474 mapname
= "uni-nfc.txt";
1477 case CUPS_NORM_NFKC
: /* no such map file... */
1483 * Open normalization map input file...
1486 snprintf(filename
, sizeof(filename
), "%s/charmaps/%s",
1487 cg
->cups_datadir
, mapname
);
1488 if ((normcount
= get_map_count(filename
)) <= 0)
1491 fp
= cupsFileOpen(filename
, "r");
1496 * Allocate memory for normalization map and add to cache...
1499 nmap
= (_cups_norm_map_t
*)calloc(1, sizeof(_cups_norm_map_t
));
1506 uni2norm
= (cups_ucs2_t
*)calloc(1, sizeof(cups_ucs2_t
) * 3 * normcount
);
1507 if (uni2norm
== NULL
)
1513 nmap
->next
= cg
->normmap_cache
;
1514 cg
->normmap_cache
= nmap
;
1516 nmap
->normalize
= normalize
;
1517 nmap
->normcount
= normcount
;
1518 nmap
->uni2norm
= uni2norm
;
1521 * Save normalization map into memory for later use...
1523 for (i
= 0; i
< normcount
; )
1525 s
= cupsFileGets(fp
, line
, sizeof(line
));
1528 if ((*s
== '#') || (*s
== '\n') || (*s
== '\0'))
1530 if (sscanf(s
, "%lx %lx %lx", &unichar1
, &unichar2
, &unichar3
) != 3)
1532 if ((unichar1
> 0xffff)
1533 || (unichar2
> 0xffff)
1534 || (unichar3
> 0xffff))
1536 *uni2norm
++ = (cups_ucs2_t
) unichar1
;
1537 *uni2norm
++ = (cups_ucs2_t
) unichar2
;
1538 *uni2norm
++ = (cups_ucs2_t
) unichar3
;
1542 nmap
->normcount
= i
;
1549 * 'get_foldmap()' - Get Unicode case folding map to cache.
1552 static int /* O - Zero or -1 on error */
1553 get_foldmap(const cups_folding_t fold
) /* I - Case folding type */
1555 int i
; /* Looping variable */
1556 cups_utf32_t unichar1
; /* Unicode character value */
1557 cups_utf32_t unichar2
; /* Unicode character value */
1558 cups_utf32_t unichar3
; /* Unicode character value */
1559 cups_utf32_t unichar4
; /* Unicode character value */
1560 _cups_fold_map_t
*fmap
; /* Unicode Case Folding Map */
1561 int foldcount
; /* Count of Unicode Source Chars */
1562 cups_ucs2_t
*uni2fold
; /* Unicode -> Folded Char(s) */
1563 char *mapname
; /* Case Folding map name */
1564 char filename
[1024]; /* Filename for charset map file */
1565 cups_file_t
*fp
; /* Case Folding map file pointer */
1566 char *s
; /* Line parsing pointer */
1567 char line
[256]; /* Line from input map file */
1568 _cups_globals_t
*cg
= _cupsGlobals();
1569 /* Pointer to library globals */
1573 * See if we already have this case folding map loaded...
1576 for (fmap
= cg
->foldmap_cache
; fmap
!= NULL
; fmap
= fmap
->next
)
1577 if (fmap
->fold
== fold
)
1581 * Get the mapping name...
1586 case CUPS_FOLD_SIMPLE
: /* Simple case folding */
1587 mapname
= "uni-fold.txt";
1589 case CUPS_FOLD_FULL
: /* Full case folding */
1590 mapname
= "uni-full.txt";
1597 * Open case folding map input file...
1600 snprintf(filename
, sizeof(filename
), "%s/charmaps/%s",
1601 cg
->cups_datadir
, mapname
);
1602 if ((foldcount
= get_map_count(filename
)) <= 0)
1604 fp
= cupsFileOpen(filename
, "r");
1609 * Allocate memory for case folding map and add to cache...
1611 fmap
= (_cups_fold_map_t
*)calloc(1, sizeof(_cups_fold_map_t
));
1617 uni2fold
= (cups_ucs2_t
*)calloc(1, sizeof(cups_ucs2_t
) * 4 * foldcount
);
1618 if (uni2fold
== NULL
)
1624 fmap
->next
= cg
->foldmap_cache
;
1625 cg
->foldmap_cache
= fmap
;
1628 fmap
->foldcount
= foldcount
;
1629 fmap
->uni2fold
= uni2fold
;
1632 * Save case folding map into memory for later use...
1635 for (i
= 0; i
< foldcount
; )
1637 s
= cupsFileGets(fp
, line
, sizeof(line
));
1640 if ((*s
== '#') || (*s
== '\n') || (*s
== '\0'))
1642 unichar1
= unichar2
= unichar3
= unichar4
= 0;
1643 if ((fold
== CUPS_FOLD_SIMPLE
)
1644 && (sscanf(s
, "%lx %lx", &unichar1
, &unichar2
) != 2))
1646 if ((fold
== CUPS_FOLD_FULL
)
1647 && (sscanf(s
, "%lx %lx %lx %lx",
1648 &unichar1
, &unichar2
, &unichar3
, &unichar4
) != 4))
1650 if ((unichar1
> 0xffff)
1651 || (unichar2
> 0xffff)
1652 || (unichar3
> 0xffff)
1653 || (unichar4
> 0xffff))
1655 *uni2fold
++ = (cups_ucs2_t
) unichar1
;
1656 *uni2fold
++ = (cups_ucs2_t
) unichar2
;
1657 *uni2fold
++ = (cups_ucs2_t
) unichar3
;
1658 *uni2fold
++ = (cups_ucs2_t
) unichar4
;
1662 fmap
->foldcount
= i
;
1668 * 'get_propmap()' - Get Unicode character property map to cache.
1671 static int /* O - Zero or -1 on error */
1674 int i
, j
; /* Looping variables */
1675 size_t len
; /* String length */
1676 cups_utf32_t unichar
; /* Unicode character value */
1677 cups_gencat_t gencat
; /* General Category Value */
1678 cups_bidi_t bidicat
; /* Bidi Category Value */
1679 _cups_prop_map_t
*pmap
; /* Unicode Char Property Map */
1680 int propcount
; /* Count of Unicode Source Chars */
1681 _cups_prop_t
*uni2prop
; /* Unicode Char -> Properties */
1682 char *mapname
; /* Char Property map name */
1683 char filename
[1024]; /* Filename for charset map file */
1684 cups_file_t
*fp
; /* Char Property map file pointer */
1685 char *s
; /* Line parsing pointer */
1686 char line
[256]; /* Line from input map file */
1687 _cups_globals_t
*cg
= _cupsGlobals();
1688 /* Pointer to library globals */
1692 * See if we already have this char properties map loaded...
1695 if ((pmap
= cg
->propmap_cache
) != NULL
)
1699 * Get the mapping name...
1702 mapname
= "uni-prop.txt";
1705 * Open char properties map input file...
1707 snprintf(filename
, sizeof(filename
), "%s/charmaps/%s",
1708 cg
->cups_datadir
, mapname
);
1709 if ((propcount
= get_map_count(filename
)) <= 0)
1711 fp
= cupsFileOpen(filename
, "r");
1716 * Allocate memory for char properties map and add to cache...
1718 pmap
= (_cups_prop_map_t
*)calloc(1, sizeof(_cups_prop_map_t
));
1724 uni2prop
= (_cups_prop_t
*)calloc(1, sizeof(_cups_prop_t
) * propcount
);
1725 if (uni2prop
== NULL
)
1731 cg
->propmap_cache
= pmap
;
1733 pmap
->propcount
= propcount
;
1734 pmap
->uni2prop
= uni2prop
;
1737 * Save char properties map into memory for later use...
1739 for (i
= 0; i
< propcount
; )
1741 s
= cupsFileGets(fp
, line
, sizeof(line
));
1745 *(s
+ strlen(s
) - 1) = '\0';
1746 if ((*s
== '#') || (*s
== '\n') || (*s
== '\0'))
1748 if (sscanf(s
, "%lx", &unichar
) != 1)
1750 if (unichar
> 0xffff)
1752 while ((*s
!= '\0') && (*s
!= ';'))
1757 for (j
= 0; gencat_index
[j
].str
!= NULL
; j
++)
1759 len
= strlen(gencat_index
[j
].str
);
1760 if (strncmp (s
, gencat_index
[j
].str
, len
) == 0)
1763 if (gencat_index
[j
].str
== NULL
)
1765 gencat
= gencat_index
[j
].gencat
;
1766 while ((*s
!= '\0') && (*s
!= ';'))
1771 for (j
= 0; bidicat_index
[j
] != NULL
; j
++)
1773 len
= strlen(bidicat_index
[j
]);
1774 if (strncmp (s
, bidicat_index
[j
], len
) == 0)
1777 if (bidicat_index
[j
] == NULL
)
1779 bidicat
= (cups_bidi_t
) j
;
1780 uni2prop
->ch
= (cups_ucs2_t
) unichar
;
1781 uni2prop
->gencat
= (unsigned char) gencat
;
1782 uni2prop
->bidicat
= (unsigned char) bidicat
;
1787 pmap
->propcount
= i
;
1794 * 'get_combmap()' - Get Unicode combining class map to cache.
1797 static int /* O - Zero or -1 on error */
1800 int i
; /* Looping variable */
1801 cups_utf32_t unichar
; /* Unicode character value */
1802 int combclass
; /* Unicode char combining class */
1803 _cups_comb_map_t
*cmap
; /* Unicode Comb Class Map */
1804 int combcount
; /* Count of Unicode Source Chars */
1805 _cups_comb_t
*uni2comb
; /* Unicode Char -> Combining Class */
1806 char *mapname
; /* Comb Class map name */
1807 char filename
[1024]; /* Filename for charset map file */
1808 cups_file_t
*fp
; /* Comb Class map file pointer */
1809 char *s
; /* Line parsing pointer */
1810 char line
[256]; /* Line from input map file */
1811 _cups_globals_t
*cg
= _cupsGlobals();
1812 /* Pointer to library globals */
1816 * See if we already have this combining class map loaded...
1819 if ((cmap
= cg
->combmap_cache
) != NULL
)
1823 * Get the mapping name...
1826 mapname
= "uni-comb.txt";
1829 * Open combining class map input file...
1832 snprintf(filename
, sizeof(filename
), "%s/charmaps/%s",
1833 cg
->cups_datadir
, mapname
);
1834 if ((combcount
= get_map_count(filename
)) <= 0)
1836 fp
= cupsFileOpen(filename
, "r");
1841 * Allocate memory for combining class map and add to cache...
1844 cmap
= (_cups_comb_map_t
*)calloc(1, sizeof(_cups_comb_map_t
));
1851 uni2comb
= (_cups_comb_t
*)calloc(1, sizeof(_cups_comb_t
) * combcount
);
1852 if (uni2comb
== NULL
)
1858 cg
->combmap_cache
= cmap
;
1860 cmap
->combcount
= combcount
;
1861 cmap
->uni2comb
= uni2comb
;
1864 * Save combining class map into memory for later use...
1866 for (i
= 0; i
< combcount
; )
1868 s
= cupsFileGets(fp
, line
, sizeof(line
));
1871 if ((*s
== '#') || (*s
== '\n') || (*s
== '\0'))
1873 if (sscanf(s
, "%lx", &unichar
) != 1)
1875 if (unichar
> 0xffff)
1877 while ((*s
!= '\0') && (*s
!= ';'))
1882 if (sscanf(s
, "%d", &combclass
) != 1)
1884 uni2comb
->ch
= (cups_ucs2_t
) unichar
;
1885 uni2comb
->combclass
= (unsigned char) combclass
;
1890 cmap
->combcount
= i
;
1897 * 'get_breakmap()' - Get Unicode line break class map to cache.
1900 static int /* O - Zero or -1 on error */
1903 int i
, j
; /* Looping variables */
1904 int len
; /* String length */
1905 cups_utf32_t unichar1
; /* Unicode character value */
1906 cups_utf32_t unichar2
; /* Unicode character value */
1907 cups_break_class_t breakclass
; /* Unicode char line break class */
1908 _cups_break_map_t
*bmap
; /* Unicode Line Break Class Map */
1909 int breakcount
; /* Count of Unicode Source Chars */
1910 cups_ucs2_t
*uni2break
; /* Unicode -> Line Break Class */
1911 char *mapname
; /* Comb Class map name */
1912 char filename
[1024]; /* Filename for charset map file */
1913 cups_file_t
*fp
; /* Comb Class map file pointer */
1914 char *s
; /* Line parsing pointer */
1915 char line
[256]; /* Line from input map file */
1916 _cups_globals_t
*cg
= _cupsGlobals();
1917 /* Pointer to library globals */
1921 * See if we already have this line break class map loaded...
1924 if ((bmap
= cg
->breakmap_cache
) != NULL
)
1928 * Get the mapping name...
1931 mapname
= "uni-line.txt";
1934 * Open line break class map input file...
1937 snprintf(filename
, sizeof(filename
), "%s/charmaps/%s",
1938 cg
->cups_datadir
, mapname
);
1939 if ((breakcount
= get_map_count(filename
)) <= 0)
1941 fp
= cupsFileOpen(filename
, "r");
1946 * Allocate memory for line break class map and add to cache...
1949 bmap
= (_cups_break_map_t
*)calloc(1, sizeof(_cups_break_map_t
));
1956 uni2break
= (cups_ucs2_t
*)calloc(1, sizeof(cups_ucs2_t
) * 3 * breakcount
);
1957 if (uni2break
== NULL
)
1963 cg
->breakmap_cache
= bmap
;
1965 bmap
->breakcount
= breakcount
;
1966 bmap
->uni2break
= uni2break
;
1969 * Save line break class map into memory for later use...
1971 for (i
= 0; i
< breakcount
; )
1973 s
= cupsFileGets(fp
, line
, sizeof(line
));
1977 *(s
+ strlen(s
) - 1) = '\0';
1978 if ((*s
== '#') || (*s
== '\n') || (*s
== '\0'))
1980 if (sscanf(s
, "%lx %lx", &unichar1
, &unichar2
) != 2)
1982 if ((unichar1
> 0xffff)
1983 || (unichar2
> 0xffff))
1985 while ((*s
!= '\0') && (*s
!= ';'))
1990 for (j
= 0; break_index
[j
].str
!= NULL
; j
++)
1992 len
= strlen (break_index
[j
].str
);
1993 if (strncmp (s
, break_index
[j
].str
, len
) == 0)
1996 if (break_index
[j
].str
== NULL
)
1998 breakclass
= break_index
[j
].breakclass
;
1999 *uni2break
++ = (cups_ucs2_t
) unichar1
;
2000 *uni2break
++ = (cups_ucs2_t
) unichar2
;
2001 *uni2break
++ = (cups_ucs2_t
) breakclass
;
2005 bmap
->breakcount
= i
;
2012 * 'compare_compose()' - Compare key for compose match.
2014 * Note - This function cannot be easily modified for 32-bit Unicode.
2017 static int /* O - Result of comparison */
2018 compare_compose(const void *k1
, /* I - Key char */
2019 const void *k2
) /* I - Map char */
2021 cups_utf32_t
*kp
= (cups_utf32_t
*)k1
;
2022 /* Key char pointer */
2023 cups_ucs2_t
*mp
= (cups_ucs2_t
*)k2
;/* Map char pointer */
2024 unsigned long key
; /* Pair of key characters */
2025 unsigned long map
; /* Pair of map characters */
2026 int result
; /* Result Value */
2031 map
= (unsigned long) (*mp
<< 16);
2032 map
|= (unsigned long) *(mp
+ 1);
2035 result
= (int) (key
- map
);
2037 result
= -1 * ((int) (map
- key
));
2044 * 'compare_decompose()' - Compare key for decompose match.
2047 static int /* O - Result of comparison */
2048 compare_decompose(const void *k1
, /* I - Key char */
2049 const void *k2
) /* I - Map char */
2051 cups_utf32_t
*kp
= (cups_utf32_t
*)k1
;
2052 /* Key char pointer */
2053 cups_ucs2_t
*mp
= (cups_ucs2_t
*)k2
;/* Map char pointer */
2054 cups_ucs2_t ch
; /* Key char as UCS-2 */
2055 int result
; /* Result Value */
2058 ch
= (cups_ucs2_t
) *kp
;
2061 result
= (int) (ch
- *mp
);
2063 result
= -1 * ((int) (*mp
- ch
));
2070 * 'compare_foldchar()' - Compare key for case fold match.
2073 static int /* O - Result of comparison */
2074 compare_foldchar(const void *k1
, /* I - Key char */
2075 const void *k2
) /* I - Map char */
2077 cups_utf32_t
*kp
= (cups_utf32_t
*)k1
;
2078 /* Key char pointer */
2079 cups_ucs2_t
*mp
= (cups_ucs2_t
*)k2
;/* Map char pointer */
2080 cups_ucs2_t ch
; /* Key char as UCS-2 */
2081 int result
; /* Result Value */
2084 ch
= (cups_ucs2_t
) *kp
;
2087 result
= (int) (ch
- *mp
);
2089 result
= -1 * ((int) (*mp
- ch
));
2096 * 'compare_combchar()' - Compare key for combining char match.
2099 static int /* O - Result of comparison */
2100 compare_combchar(const void *k1
, /* I - Key char */
2101 const void *k2
) /* I - Map char */
2103 cups_utf32_t
*kp
= (cups_utf32_t
*)k1
;
2104 /* Key char pointer */
2105 _cups_comb_t
*cp
= (_cups_comb_t
*)k2
;/* Combining map row pointer */
2106 cups_ucs2_t ch
; /* Key char as UCS-2 */
2107 int result
; /* Result Value */
2110 ch
= (cups_ucs2_t
) *kp
;
2113 result
= (int) (ch
- cp
->ch
);
2115 result
= -1 * ((int) (cp
->ch
- ch
));
2122 * 'compare_breakchar()' - Compare key for line break char match.
2125 static int /* O - Result of comparison */
2126 compare_breakchar(const void *k1
, /* I - Key char */
2127 const void *k2
) /* I - Map char */
2129 cups_utf32_t
*kp
= (cups_utf32_t
*)k1
;
2130 /* Key char pointer */
2131 cups_ucs2_t
*mp
= (cups_ucs2_t
*)k2
;/* Map char pointer */
2132 cups_ucs2_t ch
; /* Key char as UCS-2 */
2133 int result
; /* Result Value */
2136 ch
= (cups_ucs2_t
) *kp
;
2139 result
= -1 * (int) (*mp
- ch
);
2140 else if (ch
> *(mp
+ 1))
2141 result
= (int) (ch
- *(mp
+ 1));
2150 * 'compare_propchar()' - Compare key for property char match.
2153 static int /* O - Result of comparison */
2154 compare_propchar(const void *k1
, /* I - Key char */
2155 const void *k2
) /* I - Map char */
2157 cups_utf32_t
*kp
= (cups_utf32_t
*)k1
;
2158 /* Key char pointer */
2159 _cups_prop_t
*pp
= (_cups_prop_t
*)k2
;/* Property map row pointer */
2160 cups_ucs2_t ch
; /* Key char as UCS-2 */
2161 int result
; /* Result Value */
2164 ch
= (cups_ucs2_t
) *kp
;
2167 result
= (int) (ch
- pp
->ch
);
2169 result
= -1 * ((int) (pp
->ch
- ch
));
2176 * End of "$Id: normalize.c 4903 2006-01-10 20:02:46Z mike $"