2 * "$Id: normalize.c 4967 2006-01-24 03:42:15Z mike $"
4 * Unicode normalization for the Common UNIX Printing System (CUPS).
6 * Copyright 1997-2006 by Easy Software Products.
8 * These coded instructions, statements, and computer programs are
9 * the property of Easy Software Products and are protected by Federal
10 * copyright law. Distribution and use rights are outlined in the
11 * file "LICENSE.txt" which should have been included with this file.
12 * If this file is missing or damaged please contact Easy Software
15 * Attn: CUPS Licensing Information
16 * Easy Software Products
17 * 44141 Airport View Drive, Suite 204
18 * Hollywood, Maryland 20636 USA
20 * Voice: (301) 373-9600
21 * EMail: cups-info@cups.org
22 * WWW: http://www.cups.org
26 * cupsNormalizeMapsGet() - Get all norm maps to cache.
27 * cupsNormalizeMapsFree() - Free all norm maps in cache.
28 * cupsNormalizeMapsFlush() - Flush all norm maps in cache.
29 * _cupsNormalizeMapsFlush() - Flush all normalization maps in cache.
30 * cupsUTF8Normalize() - Normalize UTF-8 string.
31 * cupsUTF32Normalize() - Normalize UTF-32 string.
32 * cupsUTF8CaseFold() - Case fold UTF-8 string.
33 * cupsUTF32CaseFold() - Case fold UTF-32 string.
34 * cupsUTF8CompareCaseless() - Compare case folded UTF-8 strings.
35 * cupsUTF32CompareCaseless() - Compare case folded UTF-32 strings.
36 * cupsUTF8CompareIdentifier() - Compare folded NFKC UTF-8 strings.
37 * cupsUTF32CompareIdentifier() - Compare folded NFKC UTF-32 strings.
38 * cupsUTF32CharacterProperty() - Get UTF-32 character property.
39 * get_general_category() - Get UTF-32 Char General Category.
40 * get_bidi_category() - Get UTF-32 Char Bidi Category.
41 * get_combining_class() - Get UTF-32 Char Combining Class.
42 * get_break_class() - Get UTF-32 Char Line Break Class.
43 * get_map_count() - Count lines in a map file.
44 * get_normmap() - Get Unicode norm map to cache.
45 * get_foldmap() - Get Unicode casefold map to cache.
46 * get_propmap() - Get Unicode property map to cache.
47 * get_combmap() - Get Unicode combining map to cache.
48 * get_breakmap() - Get Unicode break map to cache.
49 * compare_compose() - Compare key for compose match.
50 * compare_decompose() - Compare key for decompose match.
51 * compare_foldchar() - Compare key for case fold match.
52 * compare_combchar() - Compare key for combining match.
53 * compare_breakchar() - Compare key for line break match.
54 * compare_propchar() - Compare key for property char match.
58 * Include necessary headers...
68 typedef struct /**** General Category Index Struct****/
70 cups_gencat_t gencat
; /* General Category Value */
71 const char *str
; /* General Category String */
74 static const gencat_t gencat_index
[] = /* General Category Index */
76 { CUPS_GENCAT_LU
, "Lu" }, /* Letter, Uppercase */
77 { CUPS_GENCAT_LL
, "Ll" }, /* Letter, Lowercase */
78 { CUPS_GENCAT_LT
, "Lt" }, /* Letter, Titlecase */
79 { CUPS_GENCAT_LM
, "Lm" }, /* Letter, Modifier */
80 { CUPS_GENCAT_LO
, "Lo" }, /* Letter, Other */
81 { CUPS_GENCAT_MN
, "Mn" }, /* Mark, Non-Spacing */
82 { CUPS_GENCAT_MC
, "Mc" }, /* Mark, Spacing Combining */
83 { CUPS_GENCAT_ME
, "Me" }, /* Mark, Enclosing */
84 { CUPS_GENCAT_ND
, "Nd" }, /* Number, Decimal Digit */
85 { CUPS_GENCAT_NL
, "Nl" }, /* Number, Letter */
86 { CUPS_GENCAT_NO
, "No" }, /* Number, Other */
87 { CUPS_GENCAT_PC
, "Pc" }, /* Punctuation, Connector */
88 { CUPS_GENCAT_PD
, "Pd" }, /* Punctuation, Dash */
89 { CUPS_GENCAT_PS
, "Ps" }, /* Punctuation, Open (start) */
90 { CUPS_GENCAT_PE
, "Pe" }, /* Punctuation, Close (end) */
91 { CUPS_GENCAT_PI
, "Pi" }, /* Punctuation, Initial Quote */
92 { CUPS_GENCAT_PF
, "Pf" }, /* Punctuation, Final Quote */
93 { CUPS_GENCAT_PO
, "Po" }, /* Punctuation, Other */
94 { CUPS_GENCAT_SM
, "Sm" }, /* Symbol, Math */
95 { CUPS_GENCAT_SC
, "Sc" }, /* Symbol, Currency */
96 { CUPS_GENCAT_SK
, "Sk" }, /* Symbol, Modifier */
97 { CUPS_GENCAT_SO
, "So" }, /* Symbol, Other */
98 { CUPS_GENCAT_ZS
, "Zs" }, /* Separator, Space */
99 { CUPS_GENCAT_ZL
, "Zl" }, /* Separator, Line */
100 { CUPS_GENCAT_ZP
, "Zp" }, /* Separator, Paragraph */
101 { CUPS_GENCAT_CC
, "Cc" }, /* Other, Control */
102 { CUPS_GENCAT_CF
, "Cf" }, /* Other, Format */
103 { CUPS_GENCAT_CS
, "Cs" }, /* Other, Surrogate */
104 { CUPS_GENCAT_CO
, "Co" }, /* Other, Private Use */
105 { CUPS_GENCAT_CN
, "Cn" }, /* Other, Not Assigned */
109 static const char * const bidicat_index
[] =
110 /* Bidi Category Index */
112 "L", /* Left-to-Right (Alpha, Syllabic, Ideographic) */
113 "LRE", /* Left-to-Right Embedding (explicit) */
114 "LRO", /* Left-to-Right Override (explicit) */
115 "R", /* Right-to-Left (Hebrew alphabet and most punct) */
116 "AL", /* Right-to-Left Arabic (Arabic, Thaana, Syriac) */
117 "RLE", /* Right-to-Left Embedding (explicit) */
118 "RLO", /* Right-to-Left Override (explicit) */
119 "PDF", /* Pop Directional Format */
120 "EN", /* Euro Number (Euro and East Arabic-Indic digits) */
121 "ES", /* Euro Number Separator (Slash) */
122 "ET", /* Euro Number Termintor (Plus, Minus, Degree, etc) */
123 "AN", /* Arabic Number (Arabic-Indic digits, separators) */
124 "CS", /* Common Number Separator (Colon, Comma, Dot, etc) */
125 "NSM", /* Non-Spacing Mark (category Mn / Me in UCD) */
126 "BN", /* Boundary Neutral (Formatting / Control chars) */
127 "B", /* Paragraph Separator */
128 "S", /* Segment Separator (Tab) */
129 "WS", /* Whitespace Space (Space, Line Separator, etc) */
130 "ON", /* Other Neutrals */
134 typedef struct /**** Line Break Class Index Struct****/
136 cups_break_class_t breakclass
; /* Line Break Class Value */
137 const char *str
; /* Line Break Class String */
140 static const _cups_break_t break_index
[] = /* Line Break Class Index */
142 { CUPS_BREAK_AI
, "AI" }, /* Ambiguous (Alphabetic or Ideograph) */
143 { CUPS_BREAK_AL
, "AL" }, /* Ordinary Alpha/Symbol Chars (XP) */
144 { CUPS_BREAK_BA
, "BA" }, /* Break Opportunity After Chars (A) */
145 { CUPS_BREAK_BB
, "BB" }, /* Break Opportunities Before Chars (B) */
146 { CUPS_BREAK_B2
, "B2" }, /* Break Opportunity Either (B/A/XP) */
147 { CUPS_BREAK_BK
, "BK" }, /* Mandatory Break (A) (norm) */
148 { CUPS_BREAK_CB
, "CB" }, /* Contingent Break (B/A) (norm) */
149 { CUPS_BREAK_CL
, "CL" }, /* Closing Punctuation (XB) */
150 { CUPS_BREAK_CM
, "CM" }, /* Attached/Combining (XB) (norm) */
151 { CUPS_BREAK_CR
, "CR" }, /* Carriage Return (A) (norm) */
152 { CUPS_BREAK_EX
, "EX" }, /* Exclamation / Interrogation (XB) */
153 { CUPS_BREAK_GL
, "GL" }, /* Non-breaking ("Glue") (XB/XA) (norm) */
154 { CUPS_BREAK_HY
, "HY" }, /* Hyphen (XA) */
155 { CUPS_BREAK_ID
, "ID" }, /* Ideographic (B/A) */
156 { CUPS_BREAK_IN
, "IN" }, /* Inseparable chars (XP) */
157 { CUPS_BREAK_IS
, "IS" }, /* Numeric Separator (Infix) (XB) */
158 { CUPS_BREAK_LF
, "LF" }, /* Line Feed (A) (norm) */
159 { CUPS_BREAK_NS
, "NS" }, /* Non-starters (XB) */
160 { CUPS_BREAK_NU
, "NU" }, /* Numeric (XP) */
161 { CUPS_BREAK_OP
, "OP" }, /* Opening Punctuation (XA) */
162 { CUPS_BREAK_PO
, "PO" }, /* Postfix (Numeric) (XB) */
163 { CUPS_BREAK_PR
, "PR" }, /* Prefix (Numeric) (XA) */
164 { CUPS_BREAK_QU
, "QU" }, /* Ambiguous Quotation (XB/XA) */
165 { CUPS_BREAK_SA
, "SA" }, /* Context Dependent (SE Asian) (P) */
166 { CUPS_BREAK_SG
, "SG" }, /* Surrogates (XP) (norm) */
167 { CUPS_BREAK_SP
, "SP" }, /* Space (A) (norm) */
168 { CUPS_BREAK_SY
, "SY" }, /* Symbols Allowing Break After (A) */
169 { CUPS_BREAK_XX
, "XX" }, /* Unknown (XP) */
170 { CUPS_BREAK_ZW
, "ZW" }, /* Zero Width Space (A) (norm) */
178 static int compare_breakchar(const void *k1
, const void *k2
);
179 static int compare_combchar(const void *k1
, const void *k2
);
180 static int compare_compose(const void *k1
, const void *k2
);
181 static int compare_decompose(const void *k1
, const void *k2
);
182 static int compare_foldchar(const void *k1
, const void *k2
);
183 static int compare_propchar(const void *k1
, const void *k2
);
184 static int get_bidi_category(const cups_utf32_t ch
);
185 static int get_break_class(const cups_utf32_t ch
);
186 static int get_breakmap(void);
187 static int get_combining_class(const cups_utf32_t ch
);
188 static int get_combmap(void);
189 static int get_foldmap(const cups_folding_t fold
);
190 static int get_general_category(const cups_utf32_t ch
);
191 static int get_map_count(const char *filename
);
192 static int get_normmap(const cups_normalize_t normalize
);
193 static int get_propmap(void);
197 * 'cupsNormalizeMapsGet()' - Get all normalization maps to cache.
200 int /* O - Zero or -1 on error */
201 cupsNormalizeMapsGet(void)
203 _cups_norm_map_t
*nmap
; /* Unicode Normalization Map */
204 _cups_fold_map_t
*fmap
; /* Unicode Case Folding Map */
205 _cups_globals_t
*cg
= _cupsGlobals();
206 /* Pointer to library globals */
210 * See if we already have normalization maps loaded...
213 if (cg
->normmap_cache
)
215 for (nmap
= cg
->normmap_cache
; nmap
!= NULL
; nmap
= nmap
->next
)
218 for (fmap
= cg
->foldmap_cache
; fmap
!= NULL
; fmap
= fmap
->next
)
221 if (cg
->combmap_cache
)
222 cg
->combmap_cache
->used
++;
224 if (cg
->propmap_cache
)
225 cg
->propmap_cache
->used
++;
227 if (cg
->breakmap_cache
)
228 cg
->breakmap_cache
->used
++;
234 * Get normalization maps...
237 if (get_normmap(CUPS_NORM_NFD
) < 0)
240 if (get_normmap(CUPS_NORM_NFKD
) < 0)
243 if (get_normmap(CUPS_NORM_NFC
) < 0)
247 * Get case folding, combining class, character property maps...
250 if (get_foldmap(CUPS_FOLD_SIMPLE
) < 0)
253 if (get_foldmap(CUPS_FOLD_FULL
) < 0)
256 if (get_propmap() < 0)
259 if (get_combmap() < 0)
262 if (get_breakmap() < 0)
270 * 'cupsNormalizeMapsFree()' - Free all normalization maps in cache.
272 * This does not actually free; use 'cupsNormalizeMapsFlush()' for that.
275 int /* O - Zero or -1 on error */
276 cupsNormalizeMapsFree(void)
278 _cups_norm_map_t
*nmap
; /* Unicode Normalization Map */
279 _cups_fold_map_t
*fmap
; /* Unicode Case Folding Map */
280 _cups_globals_t
*cg
= _cupsGlobals();
281 /* Pointer to library globals */
285 * See if we already have normalization maps loaded...
288 if (cg
->normmap_cache
== NULL
)
291 for (nmap
= cg
->normmap_cache
; nmap
!= NULL
; nmap
= nmap
->next
)
295 for (fmap
= cg
->foldmap_cache
; fmap
!= NULL
; fmap
= fmap
->next
)
299 if (cg
->propmap_cache
&& (cg
->propmap_cache
->used
> 0))
300 cg
->propmap_cache
->used
--;
302 if (cg
->combmap_cache
&& (cg
->combmap_cache
->used
> 0))
303 cg
->combmap_cache
->used
--;
305 if (cg
->breakmap_cache
&& (cg
->breakmap_cache
->used
> 0))
306 cg
->breakmap_cache
->used
--;
313 * 'cupsNormalizeMapsFlush()' - Flush all normalization maps in cache.
317 cupsNormalizeMapsFlush(void)
319 _cupsNormalizeMapsFlush(_cupsGlobals());
324 * '_cupsNormalizeMapsFlush()' - Flush all normalization maps in cache.
328 _cupsNormalizeMapsFlush(
329 _cups_globals_t
*cg
) /* I - Global data */
331 _cups_norm_map_t
*nmap
; /* Unicode Normalization Map */
332 _cups_norm_map_t
*nextnorm
; /* Next Unicode Normalization Map */
333 _cups_fold_map_t
*fmap
; /* Unicode Case Folding Map */
334 _cups_fold_map_t
*nextfold
; /* Next Unicode Case Folding Map */
338 * Flush all normalization maps...
341 for (nmap
= cg
->normmap_cache
; nmap
!= NULL
; nmap
= nextnorm
)
343 free(nmap
->uni2norm
);
344 nextnorm
= nmap
->next
;
348 cg
->normmap_cache
= NULL
;
350 for (fmap
= cg
->foldmap_cache
; fmap
!= NULL
; fmap
= nextfold
)
352 free(fmap
->uni2fold
);
353 nextfold
= fmap
->next
;
357 cg
->foldmap_cache
= NULL
;
359 if (cg
->propmap_cache
)
361 free(cg
->propmap_cache
->uni2prop
);
362 free(cg
->propmap_cache
);
363 cg
->propmap_cache
= NULL
;
366 if (cg
->combmap_cache
)
368 free(cg
->combmap_cache
->uni2comb
);
369 free(cg
->combmap_cache
);
370 cg
->combmap_cache
= NULL
;
373 if (cg
->breakmap_cache
)
375 free(cg
->breakmap_cache
->uni2break
);
376 free(cg
->breakmap_cache
);
377 cg
->breakmap_cache
= NULL
;
383 * 'cupsUTF8Normalize()' - Normalize UTF-8 string.
385 * Normalize UTF-8 string to Unicode UAX-15 Normalization Form
386 * Note - Compatibility Normalization Forms (NFKD/NFKC) are
387 * unsafe for subsequent transcoding to legacy charsets
390 int /* O - Count or -1 on error */
392 cups_utf8_t
*dest
, /* O - Target string */
393 const cups_utf8_t
*src
, /* I - Source string */
394 const int maxout
, /* I - Max output */
395 const cups_normalize_t normalize
) /* I - Normalization */
397 int len
; /* String length */
398 cups_utf32_t work1
[CUPS_MAX_USTRING
];/* First internal UCS-4 string */
399 cups_utf32_t work2
[CUPS_MAX_USTRING
];/* Second internal UCS-4 string */
403 * Check for valid arguments and clear output...
406 if (!dest
|| !src
|| maxout
< 1 || maxout
> CUPS_MAX_USTRING
)
412 * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
415 len
= cupsUTF8ToUTF32(work1
, src
, CUPS_MAX_USTRING
);
421 * Normalize internal UCS-4 to second internal UCS-4...
424 len
= cupsUTF32Normalize(work2
, work1
, CUPS_MAX_USTRING
, normalize
);
430 * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
433 len
= cupsUTF32ToUTF8(dest
, work2
, maxout
);
440 * 'cupsUTF32Normalize()' - Normalize UTF-32 string.
442 * Normalize UTF-32 string to Unicode UAX-15 Normalization Form
443 * Note - Compatibility Normalization Forms (NFKD/NFKC) are
444 * unsafe for subsequent transcoding to legacy charsets
447 int /* O - Count or -1 on error */
449 cups_utf32_t
*dest
, /* O - Target string */
450 const cups_utf32_t
*src
, /* I - Source string */
451 const int maxout
, /* I - Max output */
452 const cups_normalize_t normalize
) /* I - Normalization */
454 int i
; /* Looping variable */
455 int result
; /* Result Value */
456 cups_ucs2_t
*mp
; /* Map char pointer */
457 int pass
; /* Pass count for each transform */
458 int hit
; /* Hit count from binary search */
459 cups_utf32_t unichar1
; /* Unicode character value */
460 cups_utf32_t unichar2
; /* Unicode character value */
461 _cups_comb_class_t class1
; /* First Combining Class */
462 _cups_comb_class_t class2
; /* Second Combining Class */
463 int len
; /* String length */
464 cups_utf32_t work1
[CUPS_MAX_USTRING
];
465 /* First internal UCS-4 string */
466 cups_utf32_t work2
[CUPS_MAX_USTRING
];
467 /* Second internal UCS-4 string */
468 cups_utf32_t
*p1
; /* First UCS-4 string pointer */
469 cups_utf32_t
*p2
; /* Second UCS-4 string pointer */
470 _cups_norm_map_t
*nmap
; /* Unicode Normalization Map */
471 cups_normalize_t decompose
; /* Decomposition Type */
472 _cups_globals_t
*cg
= _cupsGlobals();
473 /* Pointer to library globals */
477 * Check for valid arguments and clear output...
480 if (!dest
|| !src
|| maxout
< 1 || maxout
> CUPS_MAX_USTRING
)
485 result
= cupsNormalizeMapsGet();
491 * Find decomposition map...
498 decompose
= CUPS_NORM_NFD
;
503 decompose
= CUPS_NORM_NFKD
;
510 for (nmap
= cg
->normmap_cache
; nmap
!= NULL
; nmap
= nmap
->next
)
511 if (nmap
->normalize
== decompose
)
518 * Copy input to internal buffer...
523 for (i
= 0; i
< CUPS_MAX_USTRING
; i
++)
535 * Decompose until no further decomposition...
538 for (pass
= 0; pass
< 20; pass
++)
543 for (hit
= 0; *p1
!= 0; p1
++)
546 * Check for decomposition defined...
549 mp
= (cups_ucs2_t
*)bsearch(p1
, nmap
->uni2norm
, nmap
->normcount
,
550 (sizeof(cups_ucs2_t
) * 3), compare_decompose
);
558 * Decompose input character to one or two output characters...
563 *p2
++ = (cups_utf32_t
) *mp
++;
566 *p2
++ = (cups_utf32_t
) *mp
;
570 len
= (int)(p2
- &work2
[0]);
573 * Check for decomposition finished...
577 memcpy (work1
, work2
, sizeof(cups_utf32_t
) * (len
+ 1));
581 * Canonical reorder until no further reordering...
584 for (pass
= 0; pass
< 20; pass
++)
588 for (hit
= 0; *p1
!= 0; p1
++)
591 * Check for combining characters to reorder...
595 unichar2
= *(p1
+ 1);
600 class1
= get_combining_class(unichar1
);
601 class2
= get_combining_class(unichar2
);
603 if ((class1
< 0) || (class2
< 0))
606 if ((class1
== 0) || (class2
== 0))
609 if (class1
<= class2
)
613 * Swap two combining characters...
627 * Check for decomposition only...
630 if (normalize
== CUPS_NORM_NFD
|| normalize
== CUPS_NORM_NFKD
)
632 memcpy(dest
, work1
, sizeof(cups_utf32_t
) * (len
+ 1));
637 * Find composition map...
640 for (nmap
= cg
->normmap_cache
; nmap
!= NULL
; nmap
= nmap
->next
)
641 if (nmap
->normalize
== CUPS_NORM_NFC
)
648 * Compose until no further composition...
651 for (pass
= 0; pass
< 20; pass
++)
656 for (hit
= 0; *p1
!= 0; p1
++)
659 * Check for composition defined...
663 unichar2
= *(p1
+ 1);
671 mp
= (cups_ucs2_t
*)bsearch(p1
, nmap
->uni2norm
, nmap
->normcount
,
672 (sizeof(cups_ucs2_t
) * 3), compare_compose
);
680 * Compose two input characters to one output character...
685 *p2
++ = (cups_utf32_t
) *mp
;
690 len
= (int) (p2
- &work2
[0]);
693 * Check for composition finished...
699 memcpy (work1
, work2
, sizeof(cups_utf32_t
) * (len
+ 1));
702 memcpy (dest
, work1
, sizeof(cups_utf32_t
) * (len
+ 1));
704 cupsNormalizeMapsFree();
711 * 'cupsUTF8CaseFold()' - Case fold UTF-8 string.
713 * Case Fold UTF-8 string per Unicode UAX-21 Section 2.3
714 * Note - Case folding output is
715 * unsafe for subsequent transcoding to legacy charsets
718 int /* O - Count or -1 on error */
720 cups_utf8_t
*dest
, /* O - Target string */
721 const cups_utf8_t
*src
, /* I - Source string */
722 const int maxout
, /* I - Max output */
723 const cups_folding_t fold
) /* I - Fold Mode */
725 int len
; /* String length */
726 cups_utf32_t work1
[CUPS_MAX_USTRING
];/* First internal UCS-4 string */
727 cups_utf32_t work2
[CUPS_MAX_USTRING
];/* Second internal UCS-4 string */
731 * Check for valid arguments and clear output...
734 if (!dest
|| !src
|| maxout
< 1 || maxout
> CUPS_MAX_USTRING
)
739 if (fold
!= CUPS_FOLD_SIMPLE
&& fold
!= CUPS_FOLD_FULL
)
743 * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
746 len
= cupsUTF8ToUTF32(work1
, src
, CUPS_MAX_USTRING
);
752 * Case Fold internal UCS-4 to second internal UCS-4...
755 len
= cupsUTF32CaseFold(work2
, work1
, CUPS_MAX_USTRING
, fold
);
761 * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
764 len
= cupsUTF32ToUTF8(dest
, work2
, maxout
);
771 * 'cupsUTF32CaseFold()' - Case fold UTF-32 string.
773 * Case Fold UTF-32 string per Unicode UAX-21 Section 2.3
774 * Note - Case folding output is
775 * unsafe for subsequent transcoding to legacy charsets
778 int /* O - Count or -1 on error */
780 cups_utf32_t
*dest
, /* O - Target string */
781 const cups_utf32_t
*src
, /* I - Source string */
782 const int maxout
, /* I - Max output */
783 const cups_folding_t fold
) /* I - Fold Mode */
785 cups_utf32_t
*start
= dest
; /* Start of destination string */
786 int i
; /* Looping variable */
787 int result
; /* Result Value */
788 cups_ucs2_t
*mp
; /* Map char pointer */
789 _cups_fold_map_t
*fmap
; /* Unicode Case Folding Map */
790 _cups_globals_t
*cg
= _cupsGlobals();
791 /* Pointer to library globals */
795 * Check for valid arguments and clear output...
798 if (!dest
|| !src
|| maxout
< 1 || maxout
> CUPS_MAX_USTRING
)
803 if (fold
!= CUPS_FOLD_SIMPLE
&& fold
!= CUPS_FOLD_FULL
)
807 * Find case folding map...
810 result
= cupsNormalizeMapsGet();
815 for (fmap
= cg
->foldmap_cache
; fmap
!= NULL
; fmap
= fmap
->next
)
816 if (fmap
->fold
== fold
)
823 * Case fold input string to output string...
826 for (i
= 0; i
< (maxout
- 1); i
++, src
++)
829 * Check for case folding defined...
832 mp
= (cups_ucs2_t
*)bsearch(src
, fmap
->uni2fold
, fmap
->foldcount
,
833 (sizeof(cups_ucs2_t
) * 4), compare_foldchar
);
841 * Case fold input character to one or two output characters...
845 *dest
++ = (cups_utf32_t
) *mp
++;
847 if (*mp
!= 0 && fold
== CUPS_FOLD_FULL
)
850 if (i
>= (maxout
- 1))
853 *dest
++ = (cups_utf32_t
) *mp
;
859 cupsNormalizeMapsFree();
861 return ((int)(dest
- start
));
866 * 'cupsUTF8CompareCaseless()' - Compare case folded UTF-8 strings.
869 int /* O - Difference of strings */
870 cupsUTF8CompareCaseless(
871 const cups_utf8_t
*s1
, /* I - String1 */
872 const cups_utf8_t
*s2
) /* I - String2 */
874 int difference
; /* Difference of two strings */
875 int len
; /* String length */
876 cups_utf32_t work1
[CUPS_MAX_USTRING
];/* First internal UCS-4 string */
877 cups_utf32_t work2
[CUPS_MAX_USTRING
];/* Second internal UCS-4 string */
881 * Check for valid arguments...
888 * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
891 len
= cupsUTF8ToUTF32(work1
, s1
, CUPS_MAX_USTRING
);
896 len
= cupsUTF8ToUTF32(work2
, s2
, CUPS_MAX_USTRING
);
902 * Compare first internal UCS-4 to second internal UCS-4...
905 difference
= cupsUTF32CompareCaseless(work1
, work2
);
912 * 'cupsUTF32CompareCaseless()' - Compare case folded UTF-32 strings.
915 int /* O - Difference of strings */
916 cupsUTF32CompareCaseless(
917 const cups_utf32_t
*s1
, /* I - String1 */
918 const cups_utf32_t
*s2
) /* I - String2 */
920 int difference
; /* Difference of two strings */
921 int len
; /* String length */
922 cups_folding_t fold
= CUPS_FOLD_FULL
;
923 /* Case folding mode */
924 cups_utf32_t fold1
[CUPS_MAX_USTRING
];
925 /* First UCS-4 folded string */
926 cups_utf32_t fold2
[CUPS_MAX_USTRING
];
927 /* Second UCS-4 folded string */
928 cups_utf32_t
*p1
; /* First UCS-4 string pointer */
929 cups_utf32_t
*p2
; /* Second UCS-4 string pointer */
933 * Check for valid arguments...
940 * Case Fold input UTF-32 strings to internal UCS-4 strings...
943 len
= cupsUTF32CaseFold(fold1
, s1
, CUPS_MAX_USTRING
, fold
);
948 len
= cupsUTF32CaseFold(fold2
, s2
, CUPS_MAX_USTRING
, fold
);
954 * Compare first internal UCS-4 to second internal UCS-4...
960 for (;; p1
++, p2
++)
962 difference
= (int) (*p1
- *p2
);
967 if ((*p1
== 0) && (*p2
== 0))
976 * 'cupsUTF8CompareIdentifier()' - Compare folded NFKC UTF-8 strings.
979 int /* O - Result of comparison */
980 cupsUTF8CompareIdentifier(
981 const cups_utf8_t
*s1
, /* I - String1 */
982 const cups_utf8_t
*s2
) /* I - String2 */
984 int difference
; /* Difference of two strings */
985 int len
; /* String length */
986 cups_utf32_t work1
[CUPS_MAX_USTRING
];/* First internal UCS-4 string */
987 cups_utf32_t work2
[CUPS_MAX_USTRING
];/* Second internal UCS-4 string */
991 * Check for valid arguments...
998 * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
1001 len
= cupsUTF8ToUTF32(work1
, s1
, CUPS_MAX_USTRING
);
1006 len
= cupsUTF8ToUTF32(work2
, s2
, CUPS_MAX_USTRING
);
1012 * Compare first internal UCS-4 to second internal UCS-4...
1015 difference
= cupsUTF32CompareIdentifier(work1
, work2
);
1017 return (difference
);
1022 * 'cupsUTF32CompareIdentifier()' - Compare folded NFKC UTF-32 strings.
1025 int /* O - Result of comparison */
1026 cupsUTF32CompareIdentifier(
1027 const cups_utf32_t
*s1
, /* I - String1 */
1028 const cups_utf32_t
*s2
) /* I - String2 */
1030 int difference
; /* Difference of two strings */
1031 int len
; /* String length */
1032 cups_folding_t fold
= CUPS_FOLD_FULL
;
1033 /* Case folding mode */
1034 cups_utf32_t fold1
[CUPS_MAX_USTRING
];
1035 /* First UCS-4 folded string */
1036 cups_utf32_t fold2
[CUPS_MAX_USTRING
];
1037 /* Second UCS-4 folded string */
1038 cups_normalize_t normalize
= CUPS_NORM_NFKC
;
1039 /* Normalization form */
1040 cups_utf32_t norm1
[CUPS_MAX_USTRING
];
1041 /* First UCS-4 normalized string */
1042 cups_utf32_t norm2
[CUPS_MAX_USTRING
];
1043 /* Second UCS-4 normalized string */
1044 cups_utf32_t
*p1
; /* First UCS-4 string pointer */
1045 cups_utf32_t
*p2
; /* Second UCS-4 string pointer */
1049 * Check for valid arguments...
1056 * Case Fold input UTF-32 strings to internal UCS-4 strings...
1059 len
= cupsUTF32CaseFold(fold1
, s1
, CUPS_MAX_USTRING
, fold
);
1064 len
= cupsUTF32CaseFold(fold2
, s2
, CUPS_MAX_USTRING
, fold
);
1070 * Normalize internal UCS-4 strings to NFKC...
1073 len
= cupsUTF32Normalize(norm1
, fold1
, CUPS_MAX_USTRING
, normalize
);
1078 len
= cupsUTF32Normalize(norm2
, fold2
, CUPS_MAX_USTRING
, normalize
);
1084 * Compare first internal UCS-4 to second internal UCS-4...
1090 for (;; p1
++, p2
++)
1092 difference
= (int) (*p1
- *p2
);
1094 if (difference
!= 0)
1097 if ((*p1
== 0) && (*p2
== 0))
1101 return (difference
);
1106 * 'cupsUTF32CharacterProperty()' - Get UTF-32 character property.
1109 int /* O - Result of comparison */
1110 cupsUTF32CharacterProperty(
1111 const cups_utf32_t ch
, /* I - Source char */
1112 const cups_property_t prop
) /* I - Char Property */
1114 int result
; /* Result Value */
1118 * Check for valid arguments...
1125 * Find character property...
1130 case CUPS_PROP_GENERAL_CATEGORY
:
1131 result
= (get_general_category(ch
));
1134 case CUPS_PROP_BIDI_CATEGORY
:
1135 result
= (get_bidi_category(ch
));
1138 case CUPS_PROP_COMBINING_CLASS
:
1139 result
= (get_combining_class(ch
));
1141 case CUPS_PROP_BREAK_CLASS
:
1142 result
= (get_break_class(ch
));
1154 * 'get_general_category()' - Get UTF-32 Character General Category.
1157 static int /* O - Class or -1 on error */
1158 get_general_category(
1159 const cups_utf32_t ch
) /* I - Source char */
1161 int result
; /* Result Value */
1162 cups_gencat_t gencat
; /* General Category Value */
1163 _cups_prop_map_t
*pmap
; /* Unicode Property Map */
1164 _cups_prop_t
*uni2prop
; /* Unicode Char -> Properties */
1165 _cups_globals_t
*cg
= _cupsGlobals();
1166 /* Pointer to library globals */
1170 * Check for valid argument...
1177 * Find property map...
1180 result
= cupsNormalizeMapsGet();
1185 pmap
= cg
->propmap_cache
;
1191 * Find character in map...
1194 uni2prop
= (_cups_prop_t
*)bsearch(&ch
, pmap
->uni2prop
, pmap
->propcount
,
1195 (sizeof(_cups_prop_t
)), compare_propchar
);
1197 cupsNormalizeMapsFree();
1199 if (uni2prop
== NULL
)
1200 gencat
= CUPS_GENCAT_CN
; /* Other, Not Assigned */
1202 gencat
= (cups_gencat_t
)uni2prop
->gencat
;
1204 result
= (int)gencat
;
1211 * 'get_bidi_category()' - Get UTF-32 Character Bidi Category.
1214 static int /* O - Class or -1 on error */
1215 get_bidi_category(const cups_utf32_t ch
)/* I - Source char */
1217 int result
; /* Result Value */
1218 cups_bidi_t bidicat
; /* Bidi Category Value */
1219 _cups_prop_map_t
*pmap
; /* Unicode Property Map */
1220 _cups_prop_t
*uni2prop
; /* Unicode Char -> Properties */
1221 _cups_globals_t
*cg
= _cupsGlobals();
1222 /* Pointer to library globals */
1226 * Check for valid argument...
1233 * Find property map...
1236 result
= cupsNormalizeMapsGet();
1241 pmap
= cg
->propmap_cache
;
1247 * Find character in map...
1250 uni2prop
= (_cups_prop_t
*)bsearch(&ch
, pmap
->uni2prop
, pmap
->propcount
,
1251 (sizeof(_cups_prop_t
)), compare_propchar
);
1253 cupsNormalizeMapsFree();
1255 if (uni2prop
== NULL
)
1256 bidicat
= CUPS_BIDI_ON
; /* Other Neutral */
1258 bidicat
= (cups_bidi_t
)uni2prop
->bidicat
;
1260 result
= (int)bidicat
;
1266 * 'get_combining_class()' - Get UTF-32 Character Combining Class.
1268 * Note - Zero is non-combining (base character)
1271 static int /* O - Class or -1 on error */
1272 get_combining_class(
1273 const cups_utf32_t ch
) /* I - Source char */
1275 int result
; /* Result Value */
1276 _cups_comb_map_t
*cmap
; /* Unicode Combining Class Map */
1277 _cups_comb_class_t combclass
; /* Unicode Combining Class */
1278 _cups_comb_t
*uni2comb
; /* Unicode Char -> Combining Class */
1279 _cups_globals_t
*cg
= _cupsGlobals();
1280 /* Pointer to library globals */
1284 * Check for valid argument...
1291 * Find combining class map...
1294 result
= cupsNormalizeMapsGet();
1299 cmap
= cg
->combmap_cache
;
1305 * Find combining character in map...
1308 uni2comb
= (_cups_comb_t
*)bsearch(&ch
, cmap
->uni2comb
, cmap
->combcount
,
1309 (sizeof(_cups_comb_t
)), compare_combchar
);
1311 cupsNormalizeMapsFree();
1313 if (uni2comb
== NULL
)
1316 combclass
= (_cups_comb_class_t
)uni2comb
->combclass
;
1318 result
= (int)combclass
;
1325 * 'get_break_class()' - Get UTF-32 Character Line Break Class.
1328 static int /* O - Class or -1 on error */
1329 get_break_class(const cups_utf32_t ch
) /* I - Source char */
1331 int result
; /* Result Value */
1332 _cups_break_map_t
*bmap
; /* Unicode Line Break Class Map */
1333 cups_break_class_t breakclass
; /* Unicode Line Break Class */
1334 cups_ucs2_t
*uni2break
; /* Unicode -> Line Break Class */
1335 _cups_globals_t
*cg
= _cupsGlobals();
1336 /* Pointer to library globals */
1340 * Check for valid argument...
1347 * Find line break class map...
1350 result
= cupsNormalizeMapsGet();
1355 bmap
= cg
->breakmap_cache
;
1361 * Find line break character in map...
1364 uni2break
= (cups_ucs2_t
*)bsearch(&ch
, bmap
->uni2break
, bmap
->breakcount
,
1365 (sizeof(cups_ucs2_t
) * 3),
1368 cupsNormalizeMapsFree();
1370 if (uni2break
== NULL
)
1371 breakclass
= CUPS_BREAK_AI
;
1373 breakclass
= (cups_break_class_t
)*(uni2break
+ 2);
1375 result
= (int)breakclass
;
1382 * 'get_map_count()' - Count lines in a map file.
1385 static int /* O - Count or -1 on error */
1386 get_map_count(const char *filename
) /* I - Map Filename */
1388 int i
; /* Looping variable */
1389 cups_file_t
*fp
; /* Map input file pointer */
1390 char *s
; /* Line parsing pointer */
1391 char line
[256]; /* Line from input map file */
1392 cups_utf32_t unichar
; /* Unicode character value */
1396 * Open map input file...
1399 if (!filename
|| !*filename
)
1402 fp
= cupsFileOpen(filename
, "r");
1407 * Count lines in map input file...
1410 for (i
= 0; i
< 50000;)
1412 s
= cupsFileGets(fp
, line
, sizeof(line
));
1415 if ((*s
== '#') || (*s
== '\n') || (*s
== '\0'))
1417 if (strncmp (s
, "0x", 2) == 0)
1419 if (sscanf(s
, "%lx", &unichar
) != 1)
1421 if (unichar
> 0xffff)
1429 * Close file and return map count (non-comment line count)...
1439 * 'get_normmap()' - Get Unicode normalization map to cache.
1442 static int /* O - Zero or -1 on error */
1444 const cups_normalize_t normalize
) /* I - Normalization Form */
1446 int i
; /* Looping variable */
1447 cups_utf32_t unichar1
; /* Unicode character value */
1448 cups_utf32_t unichar2
; /* Unicode character value */
1449 cups_utf32_t unichar3
; /* Unicode character value */
1450 _cups_norm_map_t
*nmap
; /* Unicode Normalization Map */
1451 int normcount
; /* Count of Unicode Source Chars */
1452 cups_ucs2_t
*uni2norm
; /* Unicode Char -> Normalization */
1453 char *mapname
; /* Normalization map name */
1454 char filename
[1024]; /* Filename for charset map file */
1455 cups_file_t
*fp
; /* Normalization map file pointer */
1456 char *s
; /* Line parsing pointer */
1457 char line
[256]; /* Line from input map file */
1458 _cups_globals_t
*cg
= _cupsGlobals();
1459 /* Pointer to library globals */
1463 * See if we already have this normalization map loaded...
1466 for (nmap
= cg
->normmap_cache
; nmap
!= NULL
; nmap
= nmap
->next
)
1467 if (nmap
->normalize
== normalize
)
1471 * Get the mapping name...
1476 case CUPS_NORM_NFD
: /* Canonical Decomposition */
1477 mapname
= "uni-nfd.txt";
1480 case CUPS_NORM_NFKD
: /* Compatibility Decomposition */
1481 mapname
= "uni-nfkd.txt";
1484 case CUPS_NORM_NFC
: /* Canonical Composition */
1485 mapname
= "uni-nfc.txt";
1488 case CUPS_NORM_NFKC
: /* no such map file... */
1494 * Open normalization map input file...
1497 snprintf(filename
, sizeof(filename
), "%s/charmaps/%s",
1498 cg
->cups_datadir
, mapname
);
1499 if ((normcount
= get_map_count(filename
)) <= 0)
1502 fp
= cupsFileOpen(filename
, "r");
1507 * Allocate memory for normalization map and add to cache...
1510 nmap
= (_cups_norm_map_t
*)calloc(1, sizeof(_cups_norm_map_t
));
1517 uni2norm
= (cups_ucs2_t
*)calloc(1, sizeof(cups_ucs2_t
) * 3 * normcount
);
1518 if (uni2norm
== NULL
)
1524 nmap
->next
= cg
->normmap_cache
;
1525 cg
->normmap_cache
= nmap
;
1527 nmap
->normalize
= normalize
;
1528 nmap
->normcount
= normcount
;
1529 nmap
->uni2norm
= uni2norm
;
1532 * Save normalization map into memory for later use...
1534 for (i
= 0; i
< normcount
; )
1536 s
= cupsFileGets(fp
, line
, sizeof(line
));
1539 if ((*s
== '#') || (*s
== '\n') || (*s
== '\0'))
1541 if (sscanf(s
, "%lx %lx %lx", &unichar1
, &unichar2
, &unichar3
) != 3)
1543 if ((unichar1
> 0xffff)
1544 || (unichar2
> 0xffff)
1545 || (unichar3
> 0xffff))
1547 *uni2norm
++ = (cups_ucs2_t
) unichar1
;
1548 *uni2norm
++ = (cups_ucs2_t
) unichar2
;
1549 *uni2norm
++ = (cups_ucs2_t
) unichar3
;
1553 nmap
->normcount
= i
;
1560 * 'get_foldmap()' - Get Unicode case folding map to cache.
1563 static int /* O - Zero or -1 on error */
1564 get_foldmap(const cups_folding_t fold
) /* I - Case folding type */
1566 int i
; /* Looping variable */
1567 cups_utf32_t unichar1
; /* Unicode character value */
1568 cups_utf32_t unichar2
; /* Unicode character value */
1569 cups_utf32_t unichar3
; /* Unicode character value */
1570 cups_utf32_t unichar4
; /* Unicode character value */
1571 _cups_fold_map_t
*fmap
; /* Unicode Case Folding Map */
1572 int foldcount
; /* Count of Unicode Source Chars */
1573 cups_ucs2_t
*uni2fold
; /* Unicode -> Folded Char(s) */
1574 char *mapname
; /* Case Folding map name */
1575 char filename
[1024]; /* Filename for charset map file */
1576 cups_file_t
*fp
; /* Case Folding map file pointer */
1577 char *s
; /* Line parsing pointer */
1578 char line
[256]; /* Line from input map file */
1579 _cups_globals_t
*cg
= _cupsGlobals();
1580 /* Pointer to library globals */
1584 * See if we already have this case folding map loaded...
1587 for (fmap
= cg
->foldmap_cache
; fmap
!= NULL
; fmap
= fmap
->next
)
1588 if (fmap
->fold
== fold
)
1592 * Get the mapping name...
1597 case CUPS_FOLD_SIMPLE
: /* Simple case folding */
1598 mapname
= "uni-fold.txt";
1600 case CUPS_FOLD_FULL
: /* Full case folding */
1601 mapname
= "uni-full.txt";
1608 * Open case folding map input file...
1611 snprintf(filename
, sizeof(filename
), "%s/charmaps/%s",
1612 cg
->cups_datadir
, mapname
);
1613 if ((foldcount
= get_map_count(filename
)) <= 0)
1615 fp
= cupsFileOpen(filename
, "r");
1620 * Allocate memory for case folding map and add to cache...
1622 fmap
= (_cups_fold_map_t
*)calloc(1, sizeof(_cups_fold_map_t
));
1628 uni2fold
= (cups_ucs2_t
*)calloc(1, sizeof(cups_ucs2_t
) * 4 * foldcount
);
1629 if (uni2fold
== NULL
)
1635 fmap
->next
= cg
->foldmap_cache
;
1636 cg
->foldmap_cache
= fmap
;
1639 fmap
->foldcount
= foldcount
;
1640 fmap
->uni2fold
= uni2fold
;
1643 * Save case folding map into memory for later use...
1646 for (i
= 0; i
< foldcount
; )
1648 s
= cupsFileGets(fp
, line
, sizeof(line
));
1651 if ((*s
== '#') || (*s
== '\n') || (*s
== '\0'))
1653 unichar1
= unichar2
= unichar3
= unichar4
= 0;
1654 if ((fold
== CUPS_FOLD_SIMPLE
)
1655 && (sscanf(s
, "%lx %lx", &unichar1
, &unichar2
) != 2))
1657 if ((fold
== CUPS_FOLD_FULL
)
1658 && (sscanf(s
, "%lx %lx %lx %lx",
1659 &unichar1
, &unichar2
, &unichar3
, &unichar4
) != 4))
1661 if ((unichar1
> 0xffff)
1662 || (unichar2
> 0xffff)
1663 || (unichar3
> 0xffff)
1664 || (unichar4
> 0xffff))
1666 *uni2fold
++ = (cups_ucs2_t
) unichar1
;
1667 *uni2fold
++ = (cups_ucs2_t
) unichar2
;
1668 *uni2fold
++ = (cups_ucs2_t
) unichar3
;
1669 *uni2fold
++ = (cups_ucs2_t
) unichar4
;
1673 fmap
->foldcount
= i
;
1679 * 'get_propmap()' - Get Unicode character property map to cache.
1682 static int /* O - Zero or -1 on error */
1685 int i
, j
; /* Looping variables */
1686 size_t len
; /* String length */
1687 cups_utf32_t unichar
; /* Unicode character value */
1688 cups_gencat_t gencat
; /* General Category Value */
1689 cups_bidi_t bidicat
; /* Bidi Category Value */
1690 _cups_prop_map_t
*pmap
; /* Unicode Char Property Map */
1691 int propcount
; /* Count of Unicode Source Chars */
1692 _cups_prop_t
*uni2prop
; /* Unicode Char -> Properties */
1693 char *mapname
; /* Char Property map name */
1694 char filename
[1024]; /* Filename for charset map file */
1695 cups_file_t
*fp
; /* Char Property map file pointer */
1696 char *s
; /* Line parsing pointer */
1697 char line
[256]; /* Line from input map file */
1698 _cups_globals_t
*cg
= _cupsGlobals();
1699 /* Pointer to library globals */
1703 * See if we already have this char properties map loaded...
1706 if ((pmap
= cg
->propmap_cache
) != NULL
)
1710 * Get the mapping name...
1713 mapname
= "uni-prop.txt";
1716 * Open char properties map input file...
1718 snprintf(filename
, sizeof(filename
), "%s/charmaps/%s",
1719 cg
->cups_datadir
, mapname
);
1720 if ((propcount
= get_map_count(filename
)) <= 0)
1722 fp
= cupsFileOpen(filename
, "r");
1727 * Allocate memory for char properties map and add to cache...
1729 pmap
= (_cups_prop_map_t
*)calloc(1, sizeof(_cups_prop_map_t
));
1735 uni2prop
= (_cups_prop_t
*)calloc(1, sizeof(_cups_prop_t
) * propcount
);
1736 if (uni2prop
== NULL
)
1742 cg
->propmap_cache
= pmap
;
1744 pmap
->propcount
= propcount
;
1745 pmap
->uni2prop
= uni2prop
;
1748 * Save char properties map into memory for later use...
1750 for (i
= 0; i
< propcount
; )
1752 s
= cupsFileGets(fp
, line
, sizeof(line
));
1756 *(s
+ strlen(s
) - 1) = '\0';
1757 if ((*s
== '#') || (*s
== '\n') || (*s
== '\0'))
1759 if (sscanf(s
, "%lx", &unichar
) != 1)
1761 if (unichar
> 0xffff)
1763 while ((*s
!= '\0') && (*s
!= ';'))
1768 for (j
= 0; gencat_index
[j
].str
!= NULL
; j
++)
1770 len
= strlen(gencat_index
[j
].str
);
1771 if (strncmp (s
, gencat_index
[j
].str
, len
) == 0)
1774 if (gencat_index
[j
].str
== NULL
)
1776 gencat
= gencat_index
[j
].gencat
;
1777 while ((*s
!= '\0') && (*s
!= ';'))
1782 for (j
= 0; bidicat_index
[j
] != NULL
; j
++)
1784 len
= strlen(bidicat_index
[j
]);
1785 if (strncmp (s
, bidicat_index
[j
], len
) == 0)
1788 if (bidicat_index
[j
] == NULL
)
1790 bidicat
= (cups_bidi_t
) j
;
1791 uni2prop
->ch
= (cups_ucs2_t
) unichar
;
1792 uni2prop
->gencat
= (unsigned char) gencat
;
1793 uni2prop
->bidicat
= (unsigned char) bidicat
;
1798 pmap
->propcount
= i
;
1805 * 'get_combmap()' - Get Unicode combining class map to cache.
1808 static int /* O - Zero or -1 on error */
1811 int i
; /* Looping variable */
1812 cups_utf32_t unichar
; /* Unicode character value */
1813 int combclass
; /* Unicode char combining class */
1814 _cups_comb_map_t
*cmap
; /* Unicode Comb Class Map */
1815 int combcount
; /* Count of Unicode Source Chars */
1816 _cups_comb_t
*uni2comb
; /* Unicode Char -> Combining Class */
1817 char *mapname
; /* Comb Class map name */
1818 char filename
[1024]; /* Filename for charset map file */
1819 cups_file_t
*fp
; /* Comb Class map file pointer */
1820 char *s
; /* Line parsing pointer */
1821 char line
[256]; /* Line from input map file */
1822 _cups_globals_t
*cg
= _cupsGlobals();
1823 /* Pointer to library globals */
1827 * See if we already have this combining class map loaded...
1830 if ((cmap
= cg
->combmap_cache
) != NULL
)
1834 * Get the mapping name...
1837 mapname
= "uni-comb.txt";
1840 * Open combining class map input file...
1843 snprintf(filename
, sizeof(filename
), "%s/charmaps/%s",
1844 cg
->cups_datadir
, mapname
);
1845 if ((combcount
= get_map_count(filename
)) <= 0)
1847 fp
= cupsFileOpen(filename
, "r");
1852 * Allocate memory for combining class map and add to cache...
1855 cmap
= (_cups_comb_map_t
*)calloc(1, sizeof(_cups_comb_map_t
));
1862 uni2comb
= (_cups_comb_t
*)calloc(1, sizeof(_cups_comb_t
) * combcount
);
1863 if (uni2comb
== NULL
)
1869 cg
->combmap_cache
= cmap
;
1871 cmap
->combcount
= combcount
;
1872 cmap
->uni2comb
= uni2comb
;
1875 * Save combining class map into memory for later use...
1877 for (i
= 0; i
< combcount
; )
1879 s
= cupsFileGets(fp
, line
, sizeof(line
));
1882 if ((*s
== '#') || (*s
== '\n') || (*s
== '\0'))
1884 if (sscanf(s
, "%lx", &unichar
) != 1)
1886 if (unichar
> 0xffff)
1888 while ((*s
!= '\0') && (*s
!= ';'))
1893 if (sscanf(s
, "%d", &combclass
) != 1)
1895 uni2comb
->ch
= (cups_ucs2_t
) unichar
;
1896 uni2comb
->combclass
= (unsigned char) combclass
;
1901 cmap
->combcount
= i
;
1908 * 'get_breakmap()' - Get Unicode line break class map to cache.
1911 static int /* O - Zero or -1 on error */
1914 int i
, j
; /* Looping variables */
1915 int len
; /* String length */
1916 cups_utf32_t unichar1
; /* Unicode character value */
1917 cups_utf32_t unichar2
; /* Unicode character value */
1918 cups_break_class_t breakclass
; /* Unicode char line break class */
1919 _cups_break_map_t
*bmap
; /* Unicode Line Break Class Map */
1920 int breakcount
; /* Count of Unicode Source Chars */
1921 cups_ucs2_t
*uni2break
; /* Unicode -> Line Break Class */
1922 char *mapname
; /* Comb Class map name */
1923 char filename
[1024]; /* Filename for charset map file */
1924 cups_file_t
*fp
; /* Comb Class map file pointer */
1925 char *s
; /* Line parsing pointer */
1926 char line
[256]; /* Line from input map file */
1927 _cups_globals_t
*cg
= _cupsGlobals();
1928 /* Pointer to library globals */
1932 * See if we already have this line break class map loaded...
1935 if ((bmap
= cg
->breakmap_cache
) != NULL
)
1939 * Get the mapping name...
1942 mapname
= "uni-line.txt";
1945 * Open line break class map input file...
1948 snprintf(filename
, sizeof(filename
), "%s/charmaps/%s",
1949 cg
->cups_datadir
, mapname
);
1950 if ((breakcount
= get_map_count(filename
)) <= 0)
1952 fp
= cupsFileOpen(filename
, "r");
1957 * Allocate memory for line break class map and add to cache...
1960 bmap
= (_cups_break_map_t
*)calloc(1, sizeof(_cups_break_map_t
));
1967 uni2break
= (cups_ucs2_t
*)calloc(1, sizeof(cups_ucs2_t
) * 3 * breakcount
);
1968 if (uni2break
== NULL
)
1974 cg
->breakmap_cache
= bmap
;
1976 bmap
->breakcount
= breakcount
;
1977 bmap
->uni2break
= uni2break
;
1980 * Save line break class map into memory for later use...
1982 for (i
= 0; i
< breakcount
; )
1984 s
= cupsFileGets(fp
, line
, sizeof(line
));
1988 *(s
+ strlen(s
) - 1) = '\0';
1989 if ((*s
== '#') || (*s
== '\n') || (*s
== '\0'))
1991 if (sscanf(s
, "%lx %lx", &unichar1
, &unichar2
) != 2)
1993 if ((unichar1
> 0xffff)
1994 || (unichar2
> 0xffff))
1996 while ((*s
!= '\0') && (*s
!= ';'))
2001 for (j
= 0; break_index
[j
].str
!= NULL
; j
++)
2003 len
= strlen (break_index
[j
].str
);
2004 if (strncmp (s
, break_index
[j
].str
, len
) == 0)
2007 if (break_index
[j
].str
== NULL
)
2009 breakclass
= break_index
[j
].breakclass
;
2010 *uni2break
++ = (cups_ucs2_t
) unichar1
;
2011 *uni2break
++ = (cups_ucs2_t
) unichar2
;
2012 *uni2break
++ = (cups_ucs2_t
) breakclass
;
2016 bmap
->breakcount
= i
;
2023 * 'compare_compose()' - Compare key for compose match.
2025 * Note - This function cannot be easily modified for 32-bit Unicode.
2028 static int /* O - Result of comparison */
2029 compare_compose(const void *k1
, /* I - Key char */
2030 const void *k2
) /* I - Map char */
2032 cups_utf32_t
*kp
= (cups_utf32_t
*)k1
;
2033 /* Key char pointer */
2034 cups_ucs2_t
*mp
= (cups_ucs2_t
*)k2
;/* Map char pointer */
2035 unsigned long key
; /* Pair of key characters */
2036 unsigned long map
; /* Pair of map characters */
2037 int result
; /* Result Value */
2042 map
= (unsigned long) (*mp
<< 16);
2043 map
|= (unsigned long) *(mp
+ 1);
2046 result
= (int) (key
- map
);
2048 result
= -1 * ((int) (map
- key
));
2055 * 'compare_decompose()' - Compare key for decompose match.
2058 static int /* O - Result of comparison */
2059 compare_decompose(const void *k1
, /* I - Key char */
2060 const void *k2
) /* I - Map char */
2062 cups_utf32_t
*kp
= (cups_utf32_t
*)k1
;
2063 /* Key char pointer */
2064 cups_ucs2_t
*mp
= (cups_ucs2_t
*)k2
;/* Map char pointer */
2065 cups_ucs2_t ch
; /* Key char as UCS-2 */
2066 int result
; /* Result Value */
2069 ch
= (cups_ucs2_t
) *kp
;
2072 result
= (int) (ch
- *mp
);
2074 result
= -1 * ((int) (*mp
- ch
));
2081 * 'compare_foldchar()' - Compare key for case fold match.
2084 static int /* O - Result of comparison */
2085 compare_foldchar(const void *k1
, /* I - Key char */
2086 const void *k2
) /* I - Map char */
2088 cups_utf32_t
*kp
= (cups_utf32_t
*)k1
;
2089 /* Key char pointer */
2090 cups_ucs2_t
*mp
= (cups_ucs2_t
*)k2
;/* Map char pointer */
2091 cups_ucs2_t ch
; /* Key char as UCS-2 */
2092 int result
; /* Result Value */
2095 ch
= (cups_ucs2_t
) *kp
;
2098 result
= (int) (ch
- *mp
);
2100 result
= -1 * ((int) (*mp
- ch
));
2107 * 'compare_combchar()' - Compare key for combining char match.
2110 static int /* O - Result of comparison */
2111 compare_combchar(const void *k1
, /* I - Key char */
2112 const void *k2
) /* I - Map char */
2114 cups_utf32_t
*kp
= (cups_utf32_t
*)k1
;
2115 /* Key char pointer */
2116 _cups_comb_t
*cp
= (_cups_comb_t
*)k2
;/* Combining map row pointer */
2117 cups_ucs2_t ch
; /* Key char as UCS-2 */
2118 int result
; /* Result Value */
2121 ch
= (cups_ucs2_t
) *kp
;
2124 result
= (int) (ch
- cp
->ch
);
2126 result
= -1 * ((int) (cp
->ch
- ch
));
2133 * 'compare_breakchar()' - Compare key for line break char match.
2136 static int /* O - Result of comparison */
2137 compare_breakchar(const void *k1
, /* I - Key char */
2138 const void *k2
) /* I - Map char */
2140 cups_utf32_t
*kp
= (cups_utf32_t
*)k1
;
2141 /* Key char pointer */
2142 cups_ucs2_t
*mp
= (cups_ucs2_t
*)k2
;/* Map char pointer */
2143 cups_ucs2_t ch
; /* Key char as UCS-2 */
2144 int result
; /* Result Value */
2147 ch
= (cups_ucs2_t
) *kp
;
2150 result
= -1 * (int) (*mp
- ch
);
2151 else if (ch
> *(mp
+ 1))
2152 result
= (int) (ch
- *(mp
+ 1));
2161 * 'compare_propchar()' - Compare key for property char match.
2164 static int /* O - Result of comparison */
2165 compare_propchar(const void *k1
, /* I - Key char */
2166 const void *k2
) /* I - Map char */
2168 cups_utf32_t
*kp
= (cups_utf32_t
*)k1
;
2169 /* Key char pointer */
2170 _cups_prop_t
*pp
= (_cups_prop_t
*)k2
;/* Property map row pointer */
2171 cups_ucs2_t ch
; /* Key char as UCS-2 */
2172 int result
; /* Result Value */
2175 ch
= (cups_ucs2_t
) *kp
;
2178 result
= (int) (ch
- pp
->ch
);
2180 result
= -1 * ((int) (pp
->ch
- ch
));
2187 * End of "$Id: normalize.c 4967 2006-01-24 03:42:15Z mike $"