1 From c1b97d6d896b1f22fdf5d28471ef7859ec840a57 Mon Sep 17 00:00:00 2001
2 From: Andreas Schwab <schwab@redhat.com>
3 Date: Wed, 1 Sep 2010 17:26:15 +0200
4 Subject: [PATCH] Fix handling of collating symbols in regexps
7 * posix/regcomp.c (parse_bracket_exp): When looking up collating
8 elements compare against the byte sequence of it, not its name.
12 posix/regcomp.c | 72 ++++++++++++++++++++----------------------------------
13 2 files changed, 31 insertions(+), 45 deletions(-)
17 @@ -2772,40 +2772,29 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
19 /* Local function for parse_bracket_exp used in _LIBC environement.
20 Seek the collating symbol entry correspondings to NAME.
21 - Return the index of the symbol in the SYMB_TABLE. */
22 + Return the index of the symbol in the SYMB_TABLE,
23 + or -1 if not found. */
26 __attribute ((always_inline))
27 - seek_collating_symbol_entry (name, name_len)
28 - const unsigned char *name;
30 + seek_collating_symbol_entry (const unsigned char *name, size_t name_len)
32 - int32_t hash = elem_hash ((const char *) name, name_len);
33 - int32_t elem = hash % table_size;
34 - if (symb_table[2 * elem] != 0)
36 - int32_t second = hash % (table_size - 2) + 1;
40 - /* First compare the hashing value. */
41 - if (symb_table[2 * elem] == hash
42 - /* Compare the length of the name. */
43 - && name_len == extra[symb_table[2 * elem + 1]]
44 - /* Compare the name. */
45 - && memcmp (name, &extra[symb_table[2 * elem + 1] + 1],
48 - /* Yep, this is the entry. */
56 - while (symb_table[2 * elem] != 0);
59 + for (elem = 0; elem < table_size; elem++)
60 + if (symb_table[2 * elem] != 0)
62 + int32_t idx = symb_table[2 * elem + 1];
63 + /* Skip the name of collating element name. */
64 + idx += 1 + extra[idx];
65 + if (/* Compare the length of the name. */
66 + name_len == extra[idx]
67 + /* Compare the name. */
68 + && memcmp (name, &extra[idx + 1], name_len) == 0)
69 + /* Yep, this is the entry. */
75 /* Local function for parse_bracket_exp used in _LIBC environment.
76 @@ -2814,8 +2803,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
78 auto inline unsigned int
79 __attribute ((always_inline))
80 - lookup_collation_sequence_value (br_elem)
81 - bracket_elem_t *br_elem;
82 + lookup_collation_sequence_value (bracket_elem_t *br_elem)
84 if (br_elem->type == SB_CHAR)
86 @@ -2843,7 +2831,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
88 elem = seek_collating_symbol_entry (br_elem->opr.name,
90 - if (symb_table[2 * elem] != 0)
93 /* We found the entry. */
94 idx = symb_table[2 * elem + 1];
95 @@ -2861,7 +2849,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
96 /* Return the collation sequence value. */
97 return *(unsigned int *) (extra + idx);
99 - else if (symb_table[2 * elem] == 0 && sym_name_len == 1)
100 + else if (sym_name_len == 1)
102 /* No valid character. Match it as a single byte
104 @@ -2883,11 +2871,8 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
106 auto inline reg_errcode_t
107 __attribute ((always_inline))
108 - build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem)
109 - re_charset_t *mbcset;
112 - bracket_elem_t *start_elem, *end_elem;
113 + build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc,
114 + bracket_elem_t *start_elem, bracket_elem_t *end_elem)
117 uint32_t start_collseq;
118 @@ -2966,25 +2951,22 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
120 auto inline reg_errcode_t
121 __attribute ((always_inline))
122 - build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name)
123 - re_charset_t *mbcset;
124 - int *coll_sym_alloc;
126 - const unsigned char *name;
127 + build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
128 + int *coll_sym_alloc, const unsigned char *name)
131 size_t name_len = strlen ((const char *) name);
134 elem = seek_collating_symbol_entry (name, name_len);
135 - if (symb_table[2 * elem] != 0)
138 /* We found the entry. */
139 idx = symb_table[2 * elem + 1];
140 /* Skip the name of collating element name. */
141 idx += 1 + extra[idx];
143 - else if (symb_table[2 * elem] == 0 && name_len == 1)
144 + else if (name_len == 1)
146 /* No valid character, treat it as a normal