]>
git.ipfire.org Git - thirdparty/glibc.git/blob - string/strcoll.c
1 /* Copyright (C) 1995, 1996, 1997, 1998, 1999 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Written by Ulrich Drepper <drepper@cygnus.com>, 1995.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
26 #include "../locale/localeinfo.h"
28 #ifdef USE_IN_EXTENDED_LOCALE_MODEL
29 # define STRCOLL __strcoll_l
31 # define STRCOLL strcoll
34 #ifndef USE_IN_EXTENDED_LOCALE_MODEL
47 #ifdef USE_IN_EXTENDED_LOCALE_MODEL
48 struct locale_data
*current
= l
->__locales
[LC_COLLATE
];
49 uint_fast32_t nrules
= *((uint32_t *) current
->values
[_NL_ITEM_INDEX (_NL_COLLATE_NRULES
)].string
);
51 uint32_t nrules
= _NL_CURRENT_WORD (LC_COLLATE
, _NL_COLLATE_NRULES
);
53 /* We don't assign the following values right away since it might be
54 unnecessary in case there are no rules. */
55 const unsigned char *rulesets
;
57 const unsigned char *weights
;
58 const unsigned char *extra
;
59 const int32_t *indirect
;
62 const unsigned char *us1
;
63 const unsigned char *us2
;
68 unsigned char *rule1arr
;
69 unsigned char *rule2arr
;
83 #include "../locale/weight.h"
86 return strcmp (s1
, s2
);
88 #ifdef USE_IN_EXTENDED_LOCALE_MODEL
89 rulesets
= (const unsigned char *)
90 current
->values
[_NL_ITEM_INDEX (_NL_COLLATE_RULESETS
)].string
;
91 table
= (const int32_t *)
92 current
->values
[_NL_ITEM_INDEX (_NL_COLLATE_TABLEMB
)].string
;
93 weights
= (const unsigned char *)
94 current
->values
[_NL_ITEM_INDEX (_NL_COLLATE_WEIGHTMB
)].string
;
95 extra
= (const unsigned char *)
96 current
->values
[_NL_ITEM_INDEX (_NL_COLLATE_EXTRAMB
)].string
;
97 indirect
= (const int32_t *)
98 current
->values
[_NL_ITEM_INDEX (_NL_COLLATE_INDIRECTMB
)].string
;
100 rulesets
= (const unsigned char *)
101 _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_RULESETS
);
102 table
= (const int32_t *)
103 _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_TABLEMB
);
104 weights
= (const unsigned char *)
105 _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_WEIGHTMB
);
106 extra
= (const unsigned char *)
107 _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_EXTRAMB
);
108 indirect
= (const int32_t *)
109 _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_INDIRECTMB
);
112 /* We need this a few times. */
116 /* We need the elements of the strings as unsigned values since they
117 are used as indeces. */
118 us1
= (const unsigned char *) s1
;
119 us2
= (const unsigned char *) s2
;
121 /* Perform the first pass over the string and while doing this find
122 and store the weights for each character. Since we want this to
123 be as fast as possible we are using `alloca' to store the temporary
124 values. But since there is no limit on the length of the string
125 we have to use `malloc' if the string is too long. We should be
126 very conservative here.
128 Please note that the localedef programs makes sure that `position'
129 is not used at the first level. */
130 if (s1len
+ s2len
>= 16384)
132 idx1arr
= (int32_t *) malloc ((s1len
+ s2len
) * (sizeof (int32_t) + 1));
133 idx2arr
= &idx1arr
[s2len
];
134 rule1arr
= (unsigned char *) &idx2arr
[s2len
];
135 rule2arr
= &rule1arr
[s1len
];
138 /* No memory. Well, go with the stack then.
140 XXX Once this implementation is stable we will handle this
141 differently. Instead of precomputing the indeces we will
142 do this in time. This means, though, that this happens for
150 idx1arr
= (int32_t *) alloca (s1len
* sizeof (int32_t));
151 idx2arr
= (int32_t *) alloca (s2len
* sizeof (int32_t));
152 rule1arr
= (unsigned char *) alloca (s2len
);
153 rule2arr
= (unsigned char *) alloca (s2len
);
164 position
= rulesets
[rule1arr
[0] * nrules
] & sort_position
;
176 /* Get the next non-IGNOREd element for string `s1'. */
181 if (backw1_stop
!= ~0ul)
183 /* The is something pushed. */
184 if (backw1
== backw1_stop
)
186 /* The last pushed character was handled. Continue
187 with forward characters. */
188 if (idx1cnt
< idx1max
)
192 /* Nothing anymore. The backward sequence ended with
193 the last sequence in the string. */
203 backw1_stop
= idx1max
;
207 int32_t tmp
= findidx (&us1
);
208 rule1arr
[idx1max
] = tmp
>> 24;
209 idx1arr
[idx1max
] = tmp
& 0xffffff;
212 if ((rulesets
[rule1arr
[idx1cnt
] * nrules
]
213 & sort_backward
) == 0)
214 /* No more backward characters to push. */
219 if (backw1_stop
>= idx1cnt
)
221 /* No sequence at all or just one. */
223 if (idx1cnt
== idx1max
|| backw1_stop
> idx1cnt
)
231 /* We pushed backward sequences. */
232 idx1now
= backw1
= idx1cnt
- 1;
235 while (weights
[idx1arr
[idx1now
]] == '\0');
237 /* And the same for string `s2'. */
242 if (backw2_stop
!= ~0ul)
244 /* The is something pushed. */
245 if (backw2
== backw2_stop
)
247 /* The last pushed character was handled. Continue
248 with forward characters. */
249 if (idx2cnt
< idx2max
)
253 /* Nothing anymore. The backward sequence ended with
254 the last sequence in the string. */
264 backw2_stop
= idx2max
;
268 int32_t tmp
= findidx (&us2
);
269 rule2arr
[idx2max
] = tmp
>> 24;
270 idx2arr
[idx2max
] = tmp
& 0xffffff;
273 if ((rulesets
[rule2arr
[idx2cnt
] * nrules
]
274 & sort_backward
) == 0)
275 /* No more backward characters to push. */
280 if (backw2_stop
>= idx2cnt
)
282 /* No sequence at all or just one. */
284 if (idx2cnt
== idx2max
|| backw2_stop
> idx2cnt
)
292 /* We pushed backward sequences. */
293 idx2now
= backw2
= idx2cnt
- 1;
296 while (weights
[idx2arr
[idx2now
]] == '\0');
298 /* See whether any or both strings are empty. */
299 if (idx1now
== ~0ul || idx2now
== ~0ul)
301 if (idx1now
== idx2now
)
302 /* Both ended. So far so good, both strings are equal at the
306 /* This means one string is shorter than the other. Find out
307 which one and return an appropriate value. */
308 result
= idx1now
== ~0ul ? -1 : 1;
309 goto free_and_return
;
312 /* Test for position if necessary. */
313 if (position
&& val1
!= val2
)
315 result
= val1
- val2
;
316 goto free_and_return
;
319 /* Compare the two sequences. */
320 seq1len
= weights
[idx1arr
[idx1now
]++];
321 seq2len
= weights
[idx2arr
[idx2now
]++];
322 for (i
= 0; i
< seq1len
&& i
< seq2len
; ++i
)
323 if (weights
[idx1arr
[idx1now
]] != weights
[idx2arr
[idx2now
]])
325 /* The sequences differ. */
326 result
= weights
[idx1arr
[idx1now
]] - weights
[idx2arr
[idx2now
]];
327 goto free_and_return
;
331 /* Increment the offsets. */
336 result
= seq1len
- seq2len
;
338 goto free_and_return
;
341 /* Now the remaining passes over the weights. We now use the
342 indeces we found before. */
343 for (pass
= 1; pass
< nrules
; ++pass
)
345 /* We assume that if a rule has defined `position' in one section
346 this is true for all of them. */
353 position
= rulesets
[rule1arr
[0] * nrules
+ pass
] & sort_position
;
366 /* Get the next non-IGNOREd element for string `s1'. */
371 if (backw1_stop
!= ~0ul)
373 /* The is something pushed. */
374 if (backw1
== backw1_stop
)
376 /* The last pushed character was handled. Continue
377 with forward characters. */
378 if (idx1cnt
< idx1max
)
382 /* Nothing anymore. The backward sequence ended with
383 the last sequence in the string. */
393 backw1_stop
= idx1cnt
;
395 while (idx1cnt
< idx1max
)
397 if ((rulesets
[rule1arr
[idx1cnt
] * nrules
+ pass
]
398 & sort_backward
) == 0)
399 /* No more backward characters to push. */
404 if (backw1_stop
== idx1cnt
)
406 /* No sequence at all or just one. */
408 if (idx1cnt
== idx1max
)
416 /* We pushed backward sequences. */
417 idx1now
= backw1
= idx1cnt
- 1;
420 while (weights
[idx1arr
[idx1now
]] == '\0');
422 /* And the same for string `s2'. */
427 if (backw2_stop
!= ~0ul)
429 /* The is something pushed. */
430 if (backw2
== backw2_stop
)
432 /* The last pushed character was handled. Continue
433 with forward characters. */
434 if (idx2cnt
< idx2max
)
438 /* Nothing anymore. The backward sequence ended with
439 the last sequence in the string. */
449 backw2_stop
= idx2cnt
;
451 while (idx2cnt
< idx2max
)
453 if ((rulesets
[rule2arr
[idx2cnt
] * nrules
+ pass
]
454 & sort_backward
) == 0)
455 /* No more backward characters to push. */
460 if (backw2_stop
== idx2cnt
)
462 /* No sequence at all or just one. */
464 if (idx2cnt
== idx2max
)
472 /* We pushed backward sequences. */
473 idx2now
= backw2
= idx2cnt
- 1;
476 while (weights
[idx2arr
[idx2now
]] == '\0');
478 /* See whether any or both strings are empty. */
479 if (idx1now
== ~0ul || idx2now
== ~0ul)
481 if (idx1now
== idx2now
)
482 /* Both ended. So far so good, both strings are equal at the
486 /* This means one string is shorter than the other. Find out
487 which one and return an appropriate value. */
488 result
= idx1now
== ~0ul ? -1 : 1;
489 goto free_and_return
;
492 /* Test for position if necessary. */
493 if (position
&& val1
!= val2
)
495 result
= val1
- val2
;
496 goto free_and_return
;
499 /* Compare the two sequences. */
500 seq1len
= weights
[idx1arr
[idx1now
]++];
501 seq2len
= weights
[idx2arr
[idx2now
]++];
502 for (i
= 0; i
< seq1len
&& i
< seq2len
; ++i
)
503 if (weights
[idx1arr
[idx1now
]] != weights
[idx2arr
[idx2now
]])
505 /* The sequences differ. */
506 result
= weights
[idx1arr
[idx1now
]] - weights
[idx2arr
[idx2now
]];
507 goto free_and_return
;
511 /* Increment the offsets. */
516 result
= seq1len
- seq2len
;
518 goto free_and_return
;
522 /* Free the memory if needed. */