]> git.ipfire.org Git - thirdparty/glibc.git/blame - posix/regex_internal.c
Add some spaces before '('.
[thirdparty/glibc.git] / posix / regex_internal.c
CommitLineData
3b0bdc72 1/* Extended regular expression matching and search library.
04277e02 2 Copyright (C) 2002-2019 Free Software Foundation, Inc.
3b0bdc72
UD
3 This file is part of the GNU C Library.
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
59ba27a6 17 License along with the GNU C Library; if not, see
eb04c213 18 <https://www.gnu.org/licenses/>. */
3b0bdc72 19
eb04c213 20static void re_string_construct_common (const char *str, Idx len,
1b2c2628 21 re_string_t *pstr,
eb04c213 22 RE_TRANSLATE_TYPE trans, bool icase,
b41bd5bc 23 const re_dfa_t *dfa);
01ed6ceb 24static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa,
1b2c2628 25 const re_node_set *nodes,
eb04c213 26 re_hashval_t hash);
01ed6ceb 27static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa,
1b2c2628
UD
28 const re_node_set *nodes,
29 unsigned int context,
eb04c213
AZ
30 re_hashval_t hash);
31static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr,
32 Idx new_buf_len);
33#ifdef RE_ENABLE_I18N
34static void build_wcs_buffer (re_string_t *pstr);
35static reg_errcode_t build_wcs_upper_buffer (re_string_t *pstr);
36#endif /* RE_ENABLE_I18N */
37static void build_upper_buffer (re_string_t *pstr);
38static void re_string_translate_buffer (re_string_t *pstr);
39static unsigned int re_string_context_at (const re_string_t *input, Idx idx,
40 int eflags) __attribute__ ((pure));
3b0bdc72
UD
41\f
42/* Functions for string operation. */
43
612546c6
UD
44/* This function allocate the buffers. It is necessary to call
45 re_string_reconstruct before using the object. */
46
3b0bdc72 47static reg_errcode_t
b41bd5bc 48__attribute_warn_unused_result__
eb04c213
AZ
49re_string_allocate (re_string_t *pstr, const char *str, Idx len, Idx init_len,
50 RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
3b0bdc72
UD
51{
52 reg_errcode_t ret;
eb04c213 53 Idx init_buf_len;
97fd3a30
UD
54
55 /* Ensure at least one character fits into the buffers. */
56 if (init_len < dfa->mb_cur_max)
57 init_len = dfa->mb_cur_max;
58 init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
f0c7c524 59 re_string_construct_common (str, len, pstr, trans, icase, dfa);
612546c6
UD
60
61 ret = re_string_realloc_buffers (pstr, init_buf_len);
f4efbdfb 62 if (__glibc_unlikely (ret != REG_NOERROR))
612546c6
UD
63 return ret;
64
56b168be
UD
65 pstr->word_char = dfa->word_char;
66 pstr->word_ops_used = dfa->word_ops_used;
37369d1c
UD
67 pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
68 pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len;
69 pstr->valid_raw_len = pstr->valid_len;
612546c6
UD
70 return REG_NOERROR;
71}
72
73/* This function allocate the buffers, and initialize them. */
74
75static reg_errcode_t
b41bd5bc 76__attribute_warn_unused_result__
eb04c213
AZ
77re_string_construct (re_string_t *pstr, const char *str, Idx len,
78 RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
612546c6
UD
79{
80 reg_errcode_t ret;
56b168be 81 memset (pstr, '\0', sizeof (re_string_t));
f0c7c524 82 re_string_construct_common (str, len, pstr, trans, icase, dfa);
612546c6
UD
83
84 if (len > 0)
3b0bdc72 85 {
612546c6 86 ret = re_string_realloc_buffers (pstr, len + 1);
f4efbdfb 87 if (__glibc_unlikely (ret != REG_NOERROR))
1b2c2628 88 return ret;
3b0bdc72 89 }
37369d1c 90 pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
612546c6
UD
91
92 if (icase)
93 {
94#ifdef RE_ENABLE_I18N
f0c7c524 95 if (dfa->mb_cur_max > 1)
37369d1c
UD
96 {
97 while (1)
98 {
99 ret = build_wcs_upper_buffer (pstr);
f4efbdfb 100 if (__glibc_unlikely (ret != REG_NOERROR))
37369d1c
UD
101 return ret;
102 if (pstr->valid_raw_len >= len)
103 break;
104 if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
105 break;
106 ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
f4efbdfb 107 if (__glibc_unlikely (ret != REG_NOERROR))
37369d1c
UD
108 return ret;
109 }
110 }
612546c6 111 else
3b0bdc72 112#endif /* RE_ENABLE_I18N */
1b2c2628 113 build_upper_buffer (pstr);
612546c6
UD
114 }
115 else
3b0bdc72 116 {
612546c6 117#ifdef RE_ENABLE_I18N
f0c7c524 118 if (dfa->mb_cur_max > 1)
1b2c2628 119 build_wcs_buffer (pstr);
612546c6
UD
120 else
121#endif /* RE_ENABLE_I18N */
1b2c2628
UD
122 {
123 if (trans != NULL)
124 re_string_translate_buffer (pstr);
125 else
37369d1c
UD
126 {
127 pstr->valid_len = pstr->bufs_len;
128 pstr->valid_raw_len = pstr->bufs_len;
129 }
1b2c2628 130 }
3b0bdc72 131 }
612546c6 132
3b0bdc72
UD
133 return REG_NOERROR;
134}
135
612546c6 136/* Helper functions for re_string_allocate, and re_string_construct. */
3b0bdc72
UD
137
138static reg_errcode_t
b41bd5bc 139__attribute_warn_unused_result__
eb04c213 140re_string_realloc_buffers (re_string_t *pstr, Idx new_buf_len)
3b0bdc72 141{
3b0bdc72 142#ifdef RE_ENABLE_I18N
3c0fb574 143 if (pstr->mb_cur_max > 1)
3b0bdc72 144 {
54dd0ab3
UD
145 wint_t *new_wcs;
146
147 /* Avoid overflow in realloc. */
eb04c213 148 const size_t max_object_size = MAX (sizeof (wint_t), sizeof (Idx));
f4efbdfb
PE
149 if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / max_object_size)
150 < new_buf_len))
54dd0ab3
UD
151 return REG_ESPACE;
152
153 new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len);
f4efbdfb 154 if (__glibc_unlikely (new_wcs == NULL))
1b2c2628 155 return REG_ESPACE;
2d87db5b 156 pstr->wcs = new_wcs;
37369d1c
UD
157 if (pstr->offsets != NULL)
158 {
eb04c213 159 Idx *new_offsets = re_realloc (pstr->offsets, Idx, new_buf_len);
f4efbdfb 160 if (__glibc_unlikely (new_offsets == NULL))
37369d1c 161 return REG_ESPACE;
2d87db5b 162 pstr->offsets = new_offsets;
37369d1c 163 }
3b0bdc72 164 }
3b0bdc72 165#endif /* RE_ENABLE_I18N */
37369d1c 166 if (pstr->mbs_allocated)
3b0bdc72 167 {
2d87db5b
UD
168 unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char,
169 new_buf_len);
f4efbdfb 170 if (__glibc_unlikely (new_mbs == NULL))
1b2c2628 171 return REG_ESPACE;
2d87db5b 172 pstr->mbs = new_mbs;
3b0bdc72 173 }
612546c6 174 pstr->bufs_len = new_buf_len;
3b0bdc72
UD
175 return REG_NOERROR;
176}
177
612546c6 178
3b0bdc72 179static void
eb04c213
AZ
180re_string_construct_common (const char *str, Idx len, re_string_t *pstr,
181 RE_TRANSLATE_TYPE trans, bool icase,
e2f55264 182 const re_dfa_t *dfa)
3b0bdc72 183{
c202c2c5 184 pstr->raw_mbs = (const unsigned char *) str;
3b0bdc72 185 pstr->len = len;
37369d1c 186 pstr->raw_len = len;
997470b3 187 pstr->trans = trans;
eb04c213 188 pstr->icase = icase;
37369d1c 189 pstr->mbs_allocated = (trans != NULL || icase);
f0c7c524
UD
190 pstr->mb_cur_max = dfa->mb_cur_max;
191 pstr->is_utf8 = dfa->is_utf8;
192 pstr->map_notascii = dfa->map_notascii;
37369d1c
UD
193 pstr->stop = pstr->len;
194 pstr->raw_stop = pstr->stop;
3b0bdc72
UD
195}
196
197#ifdef RE_ENABLE_I18N
198
612546c6 199/* Build wide character buffer PSTR->WCS.
3b0bdc72
UD
200 If the byte sequence of the string are:
201 <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
202 Then wide character buffer will be:
203 <wc1> , WEOF , <wc2> , WEOF , <wc3>
204 We use WEOF for padding, they indicate that the position isn't
612546c6 205 a first byte of a multibyte character.
3b0bdc72 206
612546c6
UD
207 Note that this function assumes PSTR->VALID_LEN elements are already
208 built and starts from PSTR->VALID_LEN. */
209
210static void
e2f55264 211build_wcs_buffer (re_string_t *pstr)
3b0bdc72 212{
37369d1c 213#ifdef _LIBC
ec73fd87
UD
214 unsigned char buf[MB_LEN_MAX];
215 assert (MB_LEN_MAX >= pstr->mb_cur_max);
37369d1c
UD
216#else
217 unsigned char buf[64];
218#endif
612546c6 219 mbstate_t prev_st;
eb04c213 220 Idx byte_idx, end_idx, remain_len;
88764ae2 221 size_t mbclen;
37369d1c 222
612546c6
UD
223 /* Build the buffers from pstr->valid_len to either pstr->len or
224 pstr->bufs_len. */
37369d1c 225 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
612546c6
UD
226 for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
227 {
228 wchar_t wc;
37369d1c
UD
229 const char *p;
230
612546c6
UD
231 remain_len = end_idx - byte_idx;
232 prev_st = pstr->cur_state;
37369d1c 233 /* Apply the translation if we need. */
f4efbdfb 234 if (__glibc_unlikely (pstr->trans != NULL))
37369d1c
UD
235 {
236 int i, ch;
237
238 for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
239 {
240 ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i];
d40eb37a 241 buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch];
37369d1c
UD
242 }
243 p = (const char *) buf;
244 }
245 else
246 p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
b3918c7d 247 mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
f4efbdfb
PE
248 if (__glibc_unlikely (mbclen == (size_t) -1 || mbclen == 0
249 || (mbclen == (size_t) -2
250 && pstr->bufs_len >= pstr->len)))
1b2c2628
UD
251 {
252 /* We treat these cases as a singlebyte character. */
253 mbclen = 1;
254 wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
f4efbdfb 255 if (__glibc_unlikely (pstr->trans != NULL))
37369d1c 256 wc = pstr->trans[wc];
1b2c2628
UD
257 pstr->cur_state = prev_st;
258 }
f4efbdfb 259 else if (__glibc_unlikely (mbclen == (size_t) -2))
8887a920
UD
260 {
261 /* The buffer doesn't have enough space, finish to build. */
262 pstr->cur_state = prev_st;
263 break;
264 }
612546c6 265
3b0bdc72 266 /* Write wide character and padding. */
612546c6
UD
267 pstr->wcs[byte_idx++] = wc;
268 /* Write paddings. */
269 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
1b2c2628 270 pstr->wcs[byte_idx++] = WEOF;
3b0bdc72 271 }
612546c6 272 pstr->valid_len = byte_idx;
37369d1c 273 pstr->valid_raw_len = byte_idx;
3b0bdc72
UD
274}
275
612546c6
UD
276/* Build wide character buffer PSTR->WCS like build_wcs_buffer,
277 but for REG_ICASE. */
278
4f7e7f8e 279static reg_errcode_t
b41bd5bc 280__attribute_warn_unused_result__
e2f55264 281build_wcs_upper_buffer (re_string_t *pstr)
3b0bdc72 282{
612546c6 283 mbstate_t prev_st;
eb04c213 284 Idx src_idx, byte_idx, end_idx, remain_len;
88764ae2 285 size_t mbclen;
37369d1c 286#ifdef _LIBC
ec73fd87
UD
287 char buf[MB_LEN_MAX];
288 assert (MB_LEN_MAX >= pstr->mb_cur_max);
37369d1c 289#else
1c99f950 290 char buf[64];
37369d1c
UD
291#endif
292
293 byte_idx = pstr->valid_len;
294 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
f0c7c524 295
e40a38b3
UD
296 /* The following optimization assumes that ASCII characters can be
297 mapped to wide characters with a simple cast. */
37369d1c
UD
298 if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
299 {
300 while (byte_idx < end_idx)
1b2c2628 301 {
f0c7c524 302 wchar_t wc;
37369d1c
UD
303
304 if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
305 && mbsinit (&pstr->cur_state))
306 {
307 /* In case of a singlebyte character. */
308 pstr->mbs[byte_idx]
309 = toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
310 /* The next step uses the assumption that wchar_t is encoded
e40a38b3 311 ASCII-safe: all ASCII values can be converted like this. */
37369d1c
UD
312 pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
313 ++byte_idx;
314 continue;
315 }
316
f0c7c524
UD
317 remain_len = end_idx - byte_idx;
318 prev_st = pstr->cur_state;
b3918c7d
UD
319 mbclen = __mbrtowc (&wc,
320 ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
321 + byte_idx), remain_len, &pstr->cur_state);
f4efbdfb 322 if (__glibc_likely (0 < mbclen && mbclen < (size_t) -2))
1b2c2628 323 {
eb04c213
AZ
324 wchar_t wcu = __towupper (wc);
325 if (wcu != wc)
37369d1c 326 {
88764ae2 327 size_t mbcdlen;
37369d1c 328
a5f0adb3 329 mbcdlen = __wcrtomb (buf, wcu, &prev_st);
f4efbdfb 330 if (__glibc_likely (mbclen == mbcdlen))
37369d1c
UD
331 memcpy (pstr->mbs + byte_idx, buf, mbclen);
332 else
333 {
334 src_idx = byte_idx;
335 goto offsets_needed;
336 }
337 }
f0c7c524
UD
338 else
339 memcpy (pstr->mbs + byte_idx,
340 pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
37369d1c 341 pstr->wcs[byte_idx++] = wcu;
f0c7c524
UD
342 /* Write paddings. */
343 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
344 pstr->wcs[byte_idx++] = WEOF;
345 }
8887a920
UD
346 else if (mbclen == (size_t) -1 || mbclen == 0
347 || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len))
f0c7c524 348 {
8887a920
UD
349 /* It is an invalid character, an incomplete character
350 at the end of the string, or '\0'. Just use the byte. */
f0c7c524 351 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
02b50340
UD
352 pstr->mbs[byte_idx] = ch;
353 /* And also cast it to wide char. */
354 pstr->wcs[byte_idx++] = (wchar_t) ch;
f4efbdfb 355 if (__glibc_unlikely (mbclen == (size_t) -1))
f0c7c524 356 pstr->cur_state = prev_st;
1b2c2628 357 }
1b2c2628 358 else
f0c7c524
UD
359 {
360 /* The buffer doesn't have enough space, finish to build. */
361 pstr->cur_state = prev_st;
362 break;
363 }
1b2c2628 364 }
37369d1c
UD
365 pstr->valid_len = byte_idx;
366 pstr->valid_raw_len = byte_idx;
367 return REG_NOERROR;
368 }
f0c7c524 369 else
37369d1c 370 for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
f0c7c524
UD
371 {
372 wchar_t wc;
37369d1c 373 const char *p;
e40a38b3 374 offsets_needed:
f0c7c524
UD
375 remain_len = end_idx - byte_idx;
376 prev_st = pstr->cur_state;
f4efbdfb 377 if (__glibc_unlikely (pstr->trans != NULL))
f0c7c524 378 {
37369d1c
UD
379 int i, ch;
380
381 for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
f0c7c524 382 {
37369d1c
UD
383 ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i];
384 buf[i] = pstr->trans[ch];
f0c7c524 385 }
37369d1c 386 p = (const char *) buf;
f0c7c524 387 }
37369d1c
UD
388 else
389 p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
b3918c7d 390 mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
f4efbdfb 391 if (__glibc_likely (0 < mbclen && mbclen < (size_t) -2))
f0c7c524 392 {
eb04c213
AZ
393 wchar_t wcu = __towupper (wc);
394 if (wcu != wc)
37369d1c 395 {
88764ae2 396 size_t mbcdlen;
37369d1c 397
a5f0adb3 398 mbcdlen = __wcrtomb ((char *) buf, wcu, &prev_st);
f4efbdfb 399 if (__glibc_likely (mbclen == mbcdlen))
37369d1c 400 memcpy (pstr->mbs + byte_idx, buf, mbclen);
88764ae2 401 else if (mbcdlen != (size_t) -1)
37369d1c 402 {
88764ae2 403 size_t i;
37369d1c
UD
404
405 if (byte_idx + mbcdlen > pstr->bufs_len)
406 {
407 pstr->cur_state = prev_st;
408 break;
409 }
410
411 if (pstr->offsets == NULL)
412 {
eb04c213 413 pstr->offsets = re_malloc (Idx, pstr->bufs_len);
37369d1c
UD
414
415 if (pstr->offsets == NULL)
416 return REG_ESPACE;
417 }
418 if (!pstr->offsets_needed)
419 {
88764ae2 420 for (i = 0; i < (size_t) byte_idx; ++i)
37369d1c
UD
421 pstr->offsets[i] = i;
422 pstr->offsets_needed = 1;
423 }
424
425 memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
426 pstr->wcs[byte_idx] = wcu;
427 pstr->offsets[byte_idx] = src_idx;
428 for (i = 1; i < mbcdlen; ++i)
429 {
430 pstr->offsets[byte_idx + i]
431 = src_idx + (i < mbclen ? i : mbclen - 1);
432 pstr->wcs[byte_idx + i] = WEOF;
433 }
434 pstr->len += mbcdlen - mbclen;
435 if (pstr->raw_stop > src_idx)
436 pstr->stop += mbcdlen - mbclen;
437 end_idx = (pstr->bufs_len > pstr->len)
438 ? pstr->len : pstr->bufs_len;
439 byte_idx += mbcdlen;
440 src_idx += mbclen;
441 continue;
442 }
2da42bc0
UD
443 else
444 memcpy (pstr->mbs + byte_idx, p, mbclen);
37369d1c 445 }
f0c7c524 446 else
37369d1c
UD
447 memcpy (pstr->mbs + byte_idx, p, mbclen);
448
f4efbdfb 449 if (__glibc_unlikely (pstr->offsets_needed != 0))
37369d1c 450 {
88764ae2 451 size_t i;
37369d1c
UD
452 for (i = 0; i < mbclen; ++i)
453 pstr->offsets[byte_idx + i] = src_idx + i;
454 }
455 src_idx += mbclen;
456
457 pstr->wcs[byte_idx++] = wcu;
f0c7c524
UD
458 /* Write paddings. */
459 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
460 pstr->wcs[byte_idx++] = WEOF;
461 }
8887a920
UD
462 else if (mbclen == (size_t) -1 || mbclen == 0
463 || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len))
02b50340 464 {
37369d1c
UD
465 /* It is an invalid character or '\0'. Just use the byte. */
466 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
467
f4efbdfb 468 if (__glibc_unlikely (pstr->trans != NULL))
37369d1c 469 ch = pstr->trans [ch];
02b50340 470 pstr->mbs[byte_idx] = ch;
37369d1c 471
f4efbdfb 472 if (__glibc_unlikely (pstr->offsets_needed != 0))
37369d1c
UD
473 pstr->offsets[byte_idx] = src_idx;
474 ++src_idx;
475
02b50340
UD
476 /* And also cast it to wide char. */
477 pstr->wcs[byte_idx++] = (wchar_t) ch;
f4efbdfb 478 if (__glibc_unlikely (mbclen == (size_t) -1))
02b50340
UD
479 pstr->cur_state = prev_st;
480 }
f0c7c524
UD
481 else
482 {
483 /* The buffer doesn't have enough space, finish to build. */
484 pstr->cur_state = prev_st;
485 break;
486 }
487 }
612546c6 488 pstr->valid_len = byte_idx;
37369d1c
UD
489 pstr->valid_raw_len = src_idx;
490 return REG_NOERROR;
612546c6
UD
491}
492
493/* Skip characters until the index becomes greater than NEW_RAW_IDX.
494 Return the index. */
495
eb04c213
AZ
496static Idx
497re_string_skip_chars (re_string_t *pstr, Idx new_raw_idx, wint_t *last_wc)
612546c6
UD
498{
499 mbstate_t prev_st;
eb04c213 500 Idx rawbuf_idx;
88764ae2 501 size_t mbclen;
4f08104c 502 wint_t wc = WEOF;
612546c6
UD
503
504 /* Skip the characters which are not necessary to check. */
37369d1c 505 for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
612546c6
UD
506 rawbuf_idx < new_raw_idx;)
507 {
4f08104c 508 wchar_t wc2;
eb04c213 509 Idx remain_len = pstr->raw_len - rawbuf_idx;
612546c6 510 prev_st = pstr->cur_state;
4f08104c 511 mbclen = __mbrtowc (&wc2, (const char *) pstr->raw_mbs + rawbuf_idx,
b3918c7d 512 remain_len, &pstr->cur_state);
f4efbdfb
PE
513 if (__glibc_unlikely (mbclen == (size_t) -2 || mbclen == (size_t) -1
514 || mbclen == 0))
1b2c2628 515 {
33e63e79
UD
516 /* We treat these cases as a single byte character. */
517 if (mbclen == 0 || remain_len == 0)
518 wc = L'\0';
519 else
520 wc = *(unsigned char *) (pstr->raw_mbs + rawbuf_idx);
1b2c2628
UD
521 mbclen = 1;
522 pstr->cur_state = prev_st;
523 }
4f08104c 524 else
eb04c213 525 wc = wc2;
612546c6
UD
526 /* Then proceed the next character. */
527 rawbuf_idx += mbclen;
528 }
4f08104c 529 *last_wc = wc;
612546c6 530 return rawbuf_idx;
3b0bdc72
UD
531}
532#endif /* RE_ENABLE_I18N */
533
1b2c2628 534/* Build the buffer PSTR->MBS, and apply the translation if we need.
612546c6
UD
535 This function is used in case of REG_ICASE. */
536
537static void
e2f55264 538build_upper_buffer (re_string_t *pstr)
3b0bdc72 539{
eb04c213 540 Idx char_idx, end_idx;
612546c6 541 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
3b0bdc72 542
612546c6 543 for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
3b0bdc72 544 {
612546c6 545 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
f4efbdfb 546 if (__glibc_unlikely (pstr->trans != NULL))
37369d1c 547 ch = pstr->trans[ch];
eb04c213 548 pstr->mbs[char_idx] = toupper (ch);
3b0bdc72 549 }
612546c6 550 pstr->valid_len = char_idx;
37369d1c 551 pstr->valid_raw_len = char_idx;
3b0bdc72
UD
552}
553
612546c6 554/* Apply TRANS to the buffer in PSTR. */
3b0bdc72 555
612546c6 556static void
e2f55264 557re_string_translate_buffer (re_string_t *pstr)
3b0bdc72 558{
eb04c213 559 Idx buf_idx, end_idx;
612546c6
UD
560 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
561
562 for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
3b0bdc72 563 {
612546c6 564 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
37369d1c 565 pstr->mbs[buf_idx] = pstr->trans[ch];
3b0bdc72 566 }
612546c6
UD
567
568 pstr->valid_len = buf_idx;
37369d1c 569 pstr->valid_raw_len = buf_idx;
612546c6
UD
570}
571
572/* This function re-construct the buffers.
3c0fb574 573 Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
612546c6
UD
574 convert to upper case in case of REG_ICASE, apply translation. */
575
576static reg_errcode_t
b41bd5bc 577__attribute_warn_unused_result__
eb04c213 578re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags)
612546c6 579{
eb04c213
AZ
580 Idx offset;
581
f4efbdfb 582 if (__glibc_unlikely (pstr->raw_mbs_idx <= idx))
eb04c213
AZ
583 offset = idx - pstr->raw_mbs_idx;
584 else
612546c6
UD
585 {
586 /* Reset buffer. */
434d3784 587#ifdef RE_ENABLE_I18N
3c0fb574 588 if (pstr->mb_cur_max > 1)
1b2c2628 589 memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
434d3784 590#endif /* RE_ENABLE_I18N */
37369d1c
UD
591 pstr->len = pstr->raw_len;
592 pstr->stop = pstr->raw_stop;
593 pstr->valid_len = 0;
594 pstr->raw_mbs_idx = 0;
595 pstr->valid_raw_len = 0;
596 pstr->offsets_needed = 0;
612546c6 597 pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
1b2c2628 598 : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
37369d1c 599 if (!pstr->mbs_allocated)
1b2c2628 600 pstr->mbs = (unsigned char *) pstr->raw_mbs;
612546c6
UD
601 offset = idx;
602 }
603
f4efbdfb 604 if (__glibc_likely (offset != 0))
612546c6 605 {
ba40cc15 606 /* Should the already checked characters be kept? */
f4efbdfb 607 if (__glibc_likely (offset < pstr->valid_raw_len))
a334319f
UD
608 {
609 /* Yes, move them to the front of the buffer. */
0ecb606c 610#ifdef RE_ENABLE_I18N
f4efbdfb 611 if (__glibc_unlikely (pstr->offsets_needed))
ba40cc15 612 {
eb04c213 613 Idx low = 0, high = pstr->valid_len, mid;
ba40cc15
UD
614 do
615 {
616 mid = (high + low) / 2;
617 if (pstr->offsets[mid] > offset)
618 high = mid;
619 else if (pstr->offsets[mid] < offset)
620 low = mid + 1;
621 else
622 break;
623 }
624 while (low < high);
625 if (pstr->offsets[mid] < offset)
626 ++mid;
627 pstr->tip_context = re_string_context_at (pstr, mid - 1,
628 eflags);
629 /* This can be quite complicated, so handle specially
630 only the common and easy case where the character with
631 different length representation of lower and upper
632 case is present at or after offset. */
633 if (pstr->valid_len > offset
634 && mid == offset && pstr->offsets[mid] == offset)
635 {
636 memmove (pstr->wcs, pstr->wcs + offset,
637 (pstr->valid_len - offset) * sizeof (wint_t));
638 memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset);
639 pstr->valid_len -= offset;
640 pstr->valid_raw_len -= offset;
641 for (low = 0; low < pstr->valid_len; low++)
642 pstr->offsets[low] = pstr->offsets[low + offset] - offset;
643 }
644 else
645 {
646 /* Otherwise, just find out how long the partial multibyte
647 character at offset is and fill it with WEOF/255. */
648 pstr->len = pstr->raw_len - idx + offset;
649 pstr->stop = pstr->raw_stop - idx + offset;
650 pstr->offsets_needed = 0;
651 while (mid > 0 && pstr->offsets[mid - 1] == offset)
652 --mid;
653 while (mid < pstr->valid_len)
654 if (pstr->wcs[mid] != WEOF)
655 break;
656 else
657 ++mid;
658 if (mid == pstr->valid_len)
659 pstr->valid_len = 0;
660 else
661 {
662 pstr->valid_len = pstr->offsets[mid] - offset;
663 if (pstr->valid_len)
664 {
665 for (low = 0; low < pstr->valid_len; ++low)
666 pstr->wcs[low] = WEOF;
667 memset (pstr->mbs, 255, pstr->valid_len);
668 }
669 }
670 pstr->valid_raw_len = pstr->valid_len;
671 }
672 }
673 else
674#endif
675 {
676 pstr->tip_context = re_string_context_at (pstr, offset - 1,
677 eflags);
678#ifdef RE_ENABLE_I18N
679 if (pstr->mb_cur_max > 1)
680 memmove (pstr->wcs, pstr->wcs + offset,
681 (pstr->valid_len - offset) * sizeof (wint_t));
612546c6 682#endif /* RE_ENABLE_I18N */
f4efbdfb 683 if (__glibc_unlikely (pstr->mbs_allocated))
ba40cc15
UD
684 memmove (pstr->mbs, pstr->mbs + offset,
685 pstr->valid_len - offset);
686 pstr->valid_len -= offset;
687 pstr->valid_raw_len -= offset;
78dd658a 688#if defined DEBUG && DEBUG
ba40cc15 689 assert (pstr->valid_len > 0);
3b0bdc72 690#endif
ba40cc15 691 }
1b2c2628 692 }
612546c6 693 else
1b2c2628 694 {
ed8ae46b 695#ifdef RE_ENABLE_I18N
1b2c2628 696 /* No, skip all characters until IDX. */
eb04c213 697 Idx prev_valid_len = pstr->valid_len;
ba40cc15 698
f4efbdfb 699 if (__glibc_unlikely (pstr->offsets_needed))
37369d1c
UD
700 {
701 pstr->len = pstr->raw_len - idx + offset;
702 pstr->stop = pstr->raw_stop - idx + offset;
703 pstr->offsets_needed = 0;
704 }
705#endif
1b2c2628 706 pstr->valid_len = 0;
3b0bdc72 707#ifdef RE_ENABLE_I18N
3c0fb574 708 if (pstr->mb_cur_max > 1)
1b2c2628 709 {
eb04c213 710 Idx wcs_idx;
3c0fb574
UD
711 wint_t wc = WEOF;
712
713 if (pstr->is_utf8)
714 {
0dae5d4e 715 const unsigned char *raw, *p, *end;
3c0fb574
UD
716
717 /* Special case UTF-8. Multi-byte chars start with any
718 byte other than 0x80 - 0xbf. */
719 raw = pstr->raw_mbs + pstr->raw_mbs_idx;
97fd3a30 720 end = raw + (offset - pstr->mb_cur_max);
ba40cc15
UD
721 if (end < pstr->raw_mbs)
722 end = pstr->raw_mbs;
01ed6ceb
UD
723 p = raw + offset - 1;
724#ifdef _LIBC
725 /* We know the wchar_t encoding is UCS4, so for the simple
726 case, ASCII characters, skip the conversion step. */
f4efbdfb 727 if (isascii (*p) && __glibc_likely (pstr->trans == NULL))
01ed6ceb
UD
728 {
729 memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
ba40cc15 730 /* pstr->valid_len = 0; */
01ed6ceb
UD
731 wc = (wchar_t) *p;
732 }
733 else
734#endif
735 for (; p >= end; --p)
736 if ((*p & 0xc0) != 0x80)
737 {
738 mbstate_t cur_state;
739 wchar_t wc2;
eb04c213 740 Idx mlen = raw + pstr->len - p;
01ed6ceb
UD
741 unsigned char buf[6];
742 size_t mbclen;
743
5e2b63c6 744 const unsigned char *pp = p;
f4efbdfb 745 if (__glibc_unlikely (pstr->trans != NULL))
01ed6ceb
UD
746 {
747 int i = mlen < 6 ? mlen : 6;
748 while (--i >= 0)
749 buf[i] = pstr->trans[p[i]];
5e2b63c6 750 pp = buf;
01ed6ceb
UD
751 }
752 /* XXX Don't use mbrtowc, we know which conversion
753 to use (UTF-8 -> UCS4). */
754 memset (&cur_state, 0, sizeof (cur_state));
5e2b63c6 755 mbclen = __mbrtowc (&wc2, (const char *) pp, mlen,
b3918c7d 756 &cur_state);
01ed6ceb
UD
757 if (raw + offset - p <= mbclen
758 && mbclen < (size_t) -2)
759 {
760 memset (&pstr->cur_state, '\0',
761 sizeof (mbstate_t));
762 pstr->valid_len = mbclen - (raw + offset - p);
763 wc = wc2;
764 }
765 break;
766 }
3c0fb574 767 }
e40a38b3 768
3c0fb574 769 if (wc == WEOF)
97fd3a30 770 pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
33e63e79
UD
771 if (wc == WEOF)
772 pstr->tip_context
ba40cc15 773 = re_string_context_at (pstr, prev_valid_len - 1, eflags);
33e63e79 774 else
f4efbdfb 775 pstr->tip_context = ((__glibc_unlikely (pstr->word_ops_used != 0)
33e63e79
UD
776 && IS_WIDE_WORD_CHAR (wc))
777 ? CONTEXT_WORD
778 : ((IS_WIDE_NEWLINE (wc)
779 && pstr->newline_anchor)
780 ? CONTEXT_NEWLINE : 0));
f4efbdfb 781 if (__glibc_unlikely (pstr->valid_len))
37369d1c
UD
782 {
783 for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
784 pstr->wcs[wcs_idx] = WEOF;
785 if (pstr->mbs_allocated)
786 memset (pstr->mbs, 255, pstr->valid_len);
787 }
788 pstr->valid_raw_len = pstr->valid_len;
1b2c2628 789 }
1843975c 790 else
612546c6 791#endif /* RE_ENABLE_I18N */
1843975c
RM
792 {
793 int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
33e63e79 794 pstr->valid_raw_len = 0;
1843975c
RM
795 if (pstr->trans)
796 c = pstr->trans[c];
56b168be
UD
797 pstr->tip_context = (bitset_contain (pstr->word_char, c)
798 ? CONTEXT_WORD
799 : ((IS_NEWLINE (c) && pstr->newline_anchor)
1843975c
RM
800 ? CONTEXT_NEWLINE : 0));
801 }
1b2c2628 802 }
f4efbdfb 803 if (!__glibc_unlikely (pstr->mbs_allocated))
37369d1c 804 pstr->mbs += offset;
3b0bdc72 805 }
612546c6
UD
806 pstr->raw_mbs_idx = idx;
807 pstr->len -= offset;
ac3d553b 808 pstr->stop -= offset;
612546c6
UD
809
810 /* Then build the buffers. */
811#ifdef RE_ENABLE_I18N
3c0fb574 812 if (pstr->mb_cur_max > 1)
3b0bdc72 813 {
612546c6 814 if (pstr->icase)
37369d1c 815 {
4f7e7f8e 816 reg_errcode_t ret = build_wcs_upper_buffer (pstr);
f4efbdfb 817 if (__glibc_unlikely (ret != REG_NOERROR))
37369d1c
UD
818 return ret;
819 }
612546c6 820 else
1b2c2628 821 build_wcs_buffer (pstr);
3b0bdc72
UD
822 }
823 else
612546c6 824#endif /* RE_ENABLE_I18N */
f4efbdfb 825 if (__glibc_unlikely (pstr->mbs_allocated))
01ed6ceb
UD
826 {
827 if (pstr->icase)
828 build_upper_buffer (pstr);
829 else if (pstr->trans != NULL)
830 re_string_translate_buffer (pstr);
831 }
832 else
833 pstr->valid_len = pstr->len;
612546c6 834
bb677c95 835 pstr->cur_idx = 0;
3b0bdc72
UD
836 return REG_NOERROR;
837}
838
37369d1c 839static unsigned char
eb04c213
AZ
840__attribute__ ((pure))
841re_string_peek_byte_case (const re_string_t *pstr, Idx idx)
37369d1c 842{
eb04c213
AZ
843 int ch;
844 Idx off;
37369d1c
UD
845
846 /* Handle the common (easiest) cases first. */
f4efbdfb 847 if (__glibc_likely (!pstr->mbs_allocated))
37369d1c
UD
848 return re_string_peek_byte (pstr, idx);
849
850#ifdef RE_ENABLE_I18N
851 if (pstr->mb_cur_max > 1
852 && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
853 return re_string_peek_byte (pstr, idx);
854#endif
855
856 off = pstr->cur_idx + idx;
857#ifdef RE_ENABLE_I18N
858 if (pstr->offsets_needed)
859 off = pstr->offsets[off];
860#endif
861
862 ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
37369d1c
UD
863
864#ifdef RE_ENABLE_I18N
865 /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
866 this function returns CAPITAL LETTER I instead of first byte of
867 DOTLESS SMALL LETTER I. The latter would confuse the parser,
868 since peek_byte_case doesn't advance cur_idx in any way. */
869 if (pstr->offsets_needed && !isascii (ch))
870 return re_string_peek_byte (pstr, idx);
871#endif
872
873 return ch;
874}
875
876static unsigned char
e2f55264 877re_string_fetch_byte_case (re_string_t *pstr)
37369d1c 878{
f4efbdfb 879 if (__glibc_likely (!pstr->mbs_allocated))
37369d1c
UD
880 return re_string_fetch_byte (pstr);
881
882#ifdef RE_ENABLE_I18N
883 if (pstr->offsets_needed)
884 {
eb04c213
AZ
885 Idx off;
886 int ch;
c0d5034e 887
37369d1c
UD
888 /* For tr_TR.UTF-8 [[:islower:]] there is
889 [[: CAPITAL LETTER I WITH DOT lower:]] in mbs. Skip
890 in that case the whole multi-byte character and return
891 the original letter. On the other side, with
892 [[: DOTLESS SMALL LETTER I return [[:I, as doing
893 anything else would complicate things too much. */
894
895 if (!re_string_first_byte (pstr, pstr->cur_idx))
896 return re_string_fetch_byte (pstr);
897
898 off = pstr->offsets[pstr->cur_idx];
899 ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
37369d1c
UD
900
901 if (! isascii (ch))
902 return re_string_fetch_byte (pstr);
903
904 re_string_skip_bytes (pstr,
905 re_string_char_size_at (pstr, pstr->cur_idx));
906 return ch;
907 }
908#endif
909
f39eef7b 910 return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
37369d1c
UD
911}
912
3b0bdc72 913static void
e2f55264 914re_string_destruct (re_string_t *pstr)
3b0bdc72
UD
915{
916#ifdef RE_ENABLE_I18N
917 re_free (pstr->wcs);
37369d1c 918 re_free (pstr->offsets);
3b0bdc72 919#endif /* RE_ENABLE_I18N */
37369d1c 920 if (pstr->mbs_allocated)
612546c6 921 re_free (pstr->mbs);
3b0bdc72
UD
922}
923
924/* Return the context at IDX in INPUT. */
612546c6 925
3b0bdc72 926static unsigned int
eb04c213 927re_string_context_at (const re_string_t *input, Idx idx, int eflags)
3b0bdc72
UD
928{
929 int c;
f4efbdfb 930 if (__glibc_unlikely (idx < 0))
bb677c95
UD
931 /* In this case, we use the value stored in input->tip_context,
932 since we can't know the character in input->mbs[-1] here. */
933 return input->tip_context;
f4efbdfb 934 if (__glibc_unlikely (idx == input->len))
bb677c95
UD
935 return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
936 : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
a0756231 937#ifdef RE_ENABLE_I18N
3c0fb574 938 if (input->mb_cur_max > 1)
1843975c
RM
939 {
940 wint_t wc;
eb04c213 941 Idx wc_idx = idx;
1843975c
RM
942 while(input->wcs[wc_idx] == WEOF)
943 {
78dd658a 944#if defined DEBUG && DEBUG
1843975c
RM
945 /* It must not happen. */
946 assert (wc_idx >= 0);
947#endif
948 --wc_idx;
949 if (wc_idx < 0)
950 return input->tip_context;
951 }
952 wc = input->wcs[wc_idx];
f4efbdfb
PE
953 if (__glibc_unlikely (input->word_ops_used != 0)
954 && IS_WIDE_WORD_CHAR (wc))
1843975c 955 return CONTEXT_WORD;
56b168be
UD
956 return (IS_WIDE_NEWLINE (wc) && input->newline_anchor
957 ? CONTEXT_NEWLINE : 0);
1843975c 958 }
a0756231
RM
959 else
960#endif
961 {
962 c = re_string_byte_at (input, idx);
56b168be 963 if (bitset_contain (input->word_char, c))
a0756231 964 return CONTEXT_WORD;
56b168be 965 return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0;
a0756231 966 }
3b0bdc72
UD
967}
968\f
969/* Functions for set operation. */
970
971static reg_errcode_t
b41bd5bc 972__attribute_warn_unused_result__
eb04c213 973re_node_set_alloc (re_node_set *set, Idx size)
3b0bdc72
UD
974{
975 set->alloc = size;
976 set->nelem = 0;
eb04c213 977 set->elems = re_malloc (Idx, size);
f4efbdfb
PE
978 if (__glibc_unlikely (set->elems == NULL)
979 && (MALLOC_0_IS_NONNULL || size != 0))
3b0bdc72
UD
980 return REG_ESPACE;
981 return REG_NOERROR;
982}
983
984static reg_errcode_t
b41bd5bc 985__attribute_warn_unused_result__
eb04c213 986re_node_set_init_1 (re_node_set *set, Idx elem)
3b0bdc72
UD
987{
988 set->alloc = 1;
989 set->nelem = 1;
eb04c213 990 set->elems = re_malloc (Idx, 1);
f4efbdfb 991 if (__glibc_unlikely (set->elems == NULL))
6291ee3c
UD
992 {
993 set->alloc = set->nelem = 0;
994 return REG_ESPACE;
995 }
3b0bdc72
UD
996 set->elems[0] = elem;
997 return REG_NOERROR;
998}
999
1000static reg_errcode_t
b41bd5bc 1001__attribute_warn_unused_result__
eb04c213 1002re_node_set_init_2 (re_node_set *set, Idx elem1, Idx elem2)
3b0bdc72
UD
1003{
1004 set->alloc = 2;
eb04c213 1005 set->elems = re_malloc (Idx, 2);
f4efbdfb 1006 if (__glibc_unlikely (set->elems == NULL))
3b0bdc72
UD
1007 return REG_ESPACE;
1008 if (elem1 == elem2)
1009 {
1010 set->nelem = 1;
1011 set->elems[0] = elem1;
1012 }
1013 else
1014 {
1015 set->nelem = 2;
1016 if (elem1 < elem2)
1b2c2628
UD
1017 {
1018 set->elems[0] = elem1;
1019 set->elems[1] = elem2;
1020 }
3b0bdc72 1021 else
1b2c2628
UD
1022 {
1023 set->elems[0] = elem2;
1024 set->elems[1] = elem1;
1025 }
3b0bdc72
UD
1026 }
1027 return REG_NOERROR;
1028}
1029
1030static reg_errcode_t
b41bd5bc 1031__attribute_warn_unused_result__
e2f55264 1032re_node_set_init_copy (re_node_set *dest, const re_node_set *src)
3b0bdc72
UD
1033{
1034 dest->nelem = src->nelem;
1035 if (src->nelem > 0)
1036 {
1037 dest->alloc = dest->nelem;
eb04c213 1038 dest->elems = re_malloc (Idx, dest->alloc);
f4efbdfb 1039 if (__glibc_unlikely (dest->elems == NULL))
6291ee3c
UD
1040 {
1041 dest->alloc = dest->nelem = 0;
1042 return REG_ESPACE;
1043 }
eb04c213 1044 memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
3b0bdc72
UD
1045 }
1046 else
1047 re_node_set_init_empty (dest);
1048 return REG_NOERROR;
1049}
1050
a9388965
UD
1051/* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
1052 DEST. Return value indicate the error code or REG_NOERROR if succeeded.
1053 Note: We assume dest->elems is NULL, when dest->alloc is 0. */
1054
3b0bdc72 1055static reg_errcode_t
b41bd5bc 1056__attribute_warn_unused_result__
e2f55264
UD
1057re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1,
1058 const re_node_set *src2)
3b0bdc72 1059{
eb04c213 1060 Idx i1, i2, is, id, delta, sbase;
59e7ebcc
UD
1061 if (src1->nelem == 0 || src2->nelem == 0)
1062 return REG_NOERROR;
1063
1064 /* We need dest->nelem + 2 * elems_in_intersection; this is a
1065 conservative estimate. */
1066 if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
3b0bdc72 1067 {
eb04c213
AZ
1068 Idx new_alloc = src1->nelem + src2->nelem + dest->alloc;
1069 Idx *new_elems = re_realloc (dest->elems, Idx, new_alloc);
f4efbdfb 1070 if (__glibc_unlikely (new_elems == NULL))
2da42bc0 1071 return REG_ESPACE;
59e7ebcc
UD
1072 dest->elems = new_elems;
1073 dest->alloc = new_alloc;
3b0bdc72 1074 }
3b0bdc72 1075
59e7ebcc
UD
1076 /* Find the items in the intersection of SRC1 and SRC2, and copy
1077 into the top of DEST those that are not already in DEST itself. */
1078 sbase = dest->nelem + src1->nelem + src2->nelem;
1079 i1 = src1->nelem - 1;
1080 i2 = src2->nelem - 1;
1081 id = dest->nelem - 1;
1082 for (;;)
3b0bdc72 1083 {
59e7ebcc 1084 if (src1->elems[i1] == src2->elems[i2])
1b2c2628 1085 {
59e7ebcc
UD
1086 /* Try to find the item in DEST. Maybe we could binary search? */
1087 while (id >= 0 && dest->elems[id] > src1->elems[i1])
1088 --id;
1089
2da42bc0 1090 if (id < 0 || dest->elems[id] != src1->elems[i1])
eb04c213 1091 dest->elems[--sbase] = src1->elems[i1];
59e7ebcc
UD
1092
1093 if (--i1 < 0 || --i2 < 0)
1094 break;
1b2c2628 1095 }
59e7ebcc
UD
1096
1097 /* Lower the highest of the two items. */
1098 else if (src1->elems[i1] < src2->elems[i2])
1b2c2628 1099 {
59e7ebcc
UD
1100 if (--i2 < 0)
1101 break;
1102 }
1103 else
1104 {
1105 if (--i1 < 0)
1106 break;
1b2c2628 1107 }
3b0bdc72 1108 }
59e7ebcc
UD
1109
1110 id = dest->nelem - 1;
1111 is = dest->nelem + src1->nelem + src2->nelem - 1;
1112 delta = is - sbase + 1;
1113
1114 /* Now copy. When DELTA becomes zero, the remaining
1115 DEST elements are already in place; this is more or
1116 less the same loop that is in re_node_set_merge. */
1117 dest->nelem += delta;
1118 if (delta > 0 && id >= 0)
1119 for (;;)
1120 {
2da42bc0
UD
1121 if (dest->elems[is] > dest->elems[id])
1122 {
1123 /* Copy from the top. */
1124 dest->elems[id + delta--] = dest->elems[is--];
1125 if (delta == 0)
1126 break;
1127 }
1128 else
1129 {
1130 /* Slide from the bottom. */
1131 dest->elems[id + delta] = dest->elems[id];
1132 if (--id < 0)
1133 break;
1134 }
59e7ebcc
UD
1135 }
1136
1137 /* Copy remaining SRC elements. */
eb04c213 1138 memcpy (dest->elems, dest->elems + sbase, delta * sizeof (Idx));
59e7ebcc 1139
3b0bdc72
UD
1140 return REG_NOERROR;
1141}
1142
a9388965
UD
1143/* Calculate the union set of the sets SRC1 and SRC2. And store it to
1144 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
1145
3b0bdc72 1146static reg_errcode_t
b41bd5bc 1147__attribute_warn_unused_result__
e2f55264
UD
1148re_node_set_init_union (re_node_set *dest, const re_node_set *src1,
1149 const re_node_set *src2)
3b0bdc72 1150{
eb04c213 1151 Idx i1, i2, id;
3b0bdc72
UD
1152 if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
1153 {
1154 dest->alloc = src1->nelem + src2->nelem;
eb04c213 1155 dest->elems = re_malloc (Idx, dest->alloc);
f4efbdfb 1156 if (__glibc_unlikely (dest->elems == NULL))
1b2c2628 1157 return REG_ESPACE;
3b0bdc72
UD
1158 }
1159 else
1160 {
1161 if (src1 != NULL && src1->nelem > 0)
1b2c2628 1162 return re_node_set_init_copy (dest, src1);
3b0bdc72 1163 else if (src2 != NULL && src2->nelem > 0)
1b2c2628 1164 return re_node_set_init_copy (dest, src2);
3b0bdc72 1165 else
1b2c2628 1166 re_node_set_init_empty (dest);
3b0bdc72
UD
1167 return REG_NOERROR;
1168 }
1169 for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
1170 {
1171 if (src1->elems[i1] > src2->elems[i2])
1b2c2628
UD
1172 {
1173 dest->elems[id++] = src2->elems[i2++];
1174 continue;
1175 }
3b0bdc72 1176 if (src1->elems[i1] == src2->elems[i2])
1b2c2628 1177 ++i2;
3b0bdc72
UD
1178 dest->elems[id++] = src1->elems[i1++];
1179 }
1180 if (i1 < src1->nelem)
1181 {
1182 memcpy (dest->elems + id, src1->elems + i1,
eb04c213 1183 (src1->nelem - i1) * sizeof (Idx));
3b0bdc72
UD
1184 id += src1->nelem - i1;
1185 }
1186 else if (i2 < src2->nelem)
1187 {
1188 memcpy (dest->elems + id, src2->elems + i2,
eb04c213 1189 (src2->nelem - i2) * sizeof (Idx));
3b0bdc72
UD
1190 id += src2->nelem - i2;
1191 }
1192 dest->nelem = id;
1193 return REG_NOERROR;
1194}
1195
a9388965
UD
1196/* Calculate the union set of the sets DEST and SRC. And store it to
1197 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
1198
3b0bdc72 1199static reg_errcode_t
b41bd5bc 1200__attribute_warn_unused_result__
e2f55264 1201re_node_set_merge (re_node_set *dest, const re_node_set *src)
3b0bdc72 1202{
eb04c213 1203 Idx is, id, sbase, delta;
3b0bdc72
UD
1204 if (src == NULL || src->nelem == 0)
1205 return REG_NOERROR;
59e7ebcc 1206 if (dest->alloc < 2 * src->nelem + dest->nelem)
3b0bdc72 1207 {
eb04c213
AZ
1208 Idx new_alloc = 2 * (src->nelem + dest->alloc);
1209 Idx *new_buffer = re_realloc (dest->elems, Idx, new_alloc);
f4efbdfb 1210 if (__glibc_unlikely (new_buffer == NULL))
1b2c2628
UD
1211 return REG_ESPACE;
1212 dest->elems = new_buffer;
951d6408 1213 dest->alloc = new_alloc;
3b0bdc72
UD
1214 }
1215
f4efbdfb 1216 if (__glibc_unlikely (dest->nelem == 0))
3b0bdc72 1217 {
59e7ebcc 1218 dest->nelem = src->nelem;
eb04c213 1219 memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
59e7ebcc
UD
1220 return REG_NOERROR;
1221 }
3b0bdc72 1222
59e7ebcc
UD
1223 /* Copy into the top of DEST the items of SRC that are not
1224 found in DEST. Maybe we could binary search in DEST? */
1225 for (sbase = dest->nelem + 2 * src->nelem,
1226 is = src->nelem - 1, id = dest->nelem - 1; is >= 0 && id >= 0; )
1227 {
1228 if (dest->elems[id] == src->elems[is])
2da42bc0 1229 is--, id--;
59e7ebcc 1230 else if (dest->elems[id] < src->elems[is])
2da42bc0 1231 dest->elems[--sbase] = src->elems[is--];
59e7ebcc 1232 else /* if (dest->elems[id] > src->elems[is]) */
2da42bc0 1233 --id;
59e7ebcc 1234 }
3b0bdc72 1235
59e7ebcc
UD
1236 if (is >= 0)
1237 {
1238 /* If DEST is exhausted, the remaining items of SRC must be unique. */
1239 sbase -= is + 1;
eb04c213 1240 memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (Idx));
3b0bdc72
UD
1241 }
1242
59e7ebcc
UD
1243 id = dest->nelem - 1;
1244 is = dest->nelem + 2 * src->nelem - 1;
1245 delta = is - sbase + 1;
1246 if (delta == 0)
1247 return REG_NOERROR;
1248
1249 /* Now copy. When DELTA becomes zero, the remaining
1250 DEST elements are already in place. */
1251 dest->nelem += delta;
1252 for (;;)
3b0bdc72 1253 {
59e7ebcc 1254 if (dest->elems[is] > dest->elems[id])
2da42bc0 1255 {
59e7ebcc 1256 /* Copy from the top. */
2da42bc0 1257 dest->elems[id + delta--] = dest->elems[is--];
59e7ebcc
UD
1258 if (delta == 0)
1259 break;
1260 }
1261 else
2da42bc0
UD
1262 {
1263 /* Slide from the bottom. */
1264 dest->elems[id + delta] = dest->elems[id];
59e7ebcc
UD
1265 if (--id < 0)
1266 {
1267 /* Copy remaining SRC elements. */
1268 memcpy (dest->elems, dest->elems + sbase,
eb04c213 1269 delta * sizeof (Idx));
59e7ebcc
UD
1270 break;
1271 }
1272 }
3b0bdc72 1273 }
59e7ebcc 1274
3b0bdc72
UD
1275 return REG_NOERROR;
1276}
1277
1278/* Insert the new element ELEM to the re_node_set* SET.
8503c987 1279 SET should not already have ELEM.
eb04c213 1280 Return true if successful. */
3b0bdc72 1281
eb04c213 1282static bool
b41bd5bc 1283__attribute_warn_unused_result__
eb04c213 1284re_node_set_insert (re_node_set *set, Idx elem)
3b0bdc72 1285{
eb04c213 1286 Idx idx;
8503c987
UD
1287 /* In case the set is empty. */
1288 if (set->alloc == 0)
f4efbdfb 1289 return __glibc_likely (re_node_set_init_1 (set, elem) == REG_NOERROR);
3b0bdc72 1290
f4efbdfb 1291 if (__glibc_unlikely (set->nelem) == 0)
3b0bdc72 1292 {
8503c987
UD
1293 /* We already guaranteed above that set->alloc != 0. */
1294 set->elems[0] = elem;
1295 ++set->nelem;
eb04c213 1296 return true;
3b0bdc72
UD
1297 }
1298
1299 /* Realloc if we need. */
8503c987 1300 if (set->alloc == set->nelem)
3b0bdc72 1301 {
eb04c213 1302 Idx *new_elems;
3b0bdc72 1303 set->alloc = set->alloc * 2;
eb04c213 1304 new_elems = re_realloc (set->elems, Idx, set->alloc);
f4efbdfb 1305 if (__glibc_unlikely (new_elems == NULL))
eb04c213 1306 return false;
2d87db5b 1307 set->elems = new_elems;
3b0bdc72 1308 }
8503c987
UD
1309
1310 /* Move the elements which follows the new element. Test the
1311 first element separately to skip a check in the inner loop. */
1312 if (elem < set->elems[0])
1313 {
1314 idx = 0;
59e7ebcc 1315 for (idx = set->nelem; idx > 0; idx--)
2da42bc0 1316 set->elems[idx] = set->elems[idx - 1];
8503c987 1317 }
3b0bdc72
UD
1318 else
1319 {
8503c987 1320 for (idx = set->nelem; set->elems[idx - 1] > elem; idx--)
2da42bc0 1321 set->elems[idx] = set->elems[idx - 1];
3b0bdc72 1322 }
8503c987 1323
3b0bdc72
UD
1324 /* Insert the new element. */
1325 set->elems[idx] = elem;
1326 ++set->nelem;
eb04c213 1327 return true;
3b0bdc72
UD
1328}
1329
20dc2f79
UD
1330/* Insert the new element ELEM to the re_node_set* SET.
1331 SET should not already have any element greater than or equal to ELEM.
eb04c213 1332 Return true if successful. */
20dc2f79 1333
eb04c213 1334static bool
b41bd5bc 1335__attribute_warn_unused_result__
eb04c213 1336re_node_set_insert_last (re_node_set *set, Idx elem)
20dc2f79
UD
1337{
1338 /* Realloc if we need. */
1339 if (set->alloc == set->nelem)
1340 {
eb04c213 1341 Idx *new_elems;
20dc2f79 1342 set->alloc = (set->alloc + 1) * 2;
eb04c213 1343 new_elems = re_realloc (set->elems, Idx, set->alloc);
f4efbdfb 1344 if (__glibc_unlikely (new_elems == NULL))
eb04c213 1345 return false;
2d87db5b 1346 set->elems = new_elems;
20dc2f79
UD
1347 }
1348
1349 /* Insert the new element. */
1350 set->elems[set->nelem++] = elem;
eb04c213 1351 return true;
20dc2f79
UD
1352}
1353
3b0bdc72 1354/* Compare two node sets SET1 and SET2.
eb04c213 1355 Return true if SET1 and SET2 are equivalent. */
3b0bdc72 1356
eb04c213
AZ
1357static bool
1358__attribute__ ((pure))
e2f55264 1359re_node_set_compare (const re_node_set *set1, const re_node_set *set2)
3b0bdc72 1360{
eb04c213 1361 Idx i;
3b0bdc72 1362 if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
eb04c213 1363 return false;
59e7ebcc 1364 for (i = set1->nelem ; --i >= 0 ; )
3b0bdc72 1365 if (set1->elems[i] != set2->elems[i])
eb04c213
AZ
1366 return false;
1367 return true;
3b0bdc72
UD
1368}
1369
0742e48e 1370/* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */
3b0bdc72 1371
eb04c213
AZ
1372static Idx
1373__attribute__ ((pure))
1374re_node_set_contains (const re_node_set *set, Idx elem)
3b0bdc72 1375{
eb04c213 1376 __re_size_t idx, right, mid;
3b0bdc72
UD
1377 if (set->nelem <= 0)
1378 return 0;
1379
1380 /* Binary search the element. */
1381 idx = 0;
1382 right = set->nelem - 1;
1383 while (idx < right)
1384 {
1385 mid = (idx + right) / 2;
1386 if (set->elems[mid] < elem)
1b2c2628 1387 idx = mid + 1;
3b0bdc72 1388 else
1b2c2628 1389 right = mid;
3b0bdc72 1390 }
0742e48e 1391 return set->elems[idx] == elem ? idx + 1 : 0;
3b0bdc72
UD
1392}
1393
1394static void
eb04c213 1395re_node_set_remove_at (re_node_set *set, Idx idx)
3b0bdc72
UD
1396{
1397 if (idx < 0 || idx >= set->nelem)
1398 return;
3b0bdc72 1399 --set->nelem;
59e7ebcc
UD
1400 for (; idx < set->nelem; idx++)
1401 set->elems[idx] = set->elems[idx + 1];
3b0bdc72
UD
1402}
1403\f
1404
1405/* Add the token TOKEN to dfa->nodes, and return the index of the token.
eb04c213 1406 Or return -1 if an error occurred. */
3b0bdc72 1407
eb04c213 1408static Idx
e2f55264 1409re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
3b0bdc72 1410{
f4efbdfb 1411 if (__glibc_unlikely (dfa->nodes_len >= dfa->nodes_alloc))
3b0bdc72 1412 {
01ed6ceb 1413 size_t new_nodes_alloc = dfa->nodes_alloc * 2;
eb04c213 1414 Idx *new_nexts, *new_indices;
963d8d78 1415 re_node_set *new_edests, *new_eclosures;
01ed6ceb
UD
1416 re_token_t *new_nodes;
1417
22364644
UD
1418 /* Avoid overflows in realloc. */
1419 const size_t max_object_size = MAX (sizeof (re_token_t),
1420 MAX (sizeof (re_node_set),
eb04c213 1421 sizeof (Idx)));
f4efbdfb
PE
1422 if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / max_object_size)
1423 < new_nodes_alloc))
01ed6ceb 1424 return -1;
02f3550c 1425
01ed6ceb 1426 new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc);
f4efbdfb 1427 if (__glibc_unlikely (new_nodes == NULL))
0ecb606c 1428 return -1;
2d87db5b 1429 dfa->nodes = new_nodes;
eb04c213
AZ
1430 new_nexts = re_realloc (dfa->nexts, Idx, new_nodes_alloc);
1431 new_indices = re_realloc (dfa->org_indices, Idx, new_nodes_alloc);
02f3550c
UD
1432 new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
1433 new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
f4efbdfb
PE
1434 if (__glibc_unlikely (new_nexts == NULL || new_indices == NULL
1435 || new_edests == NULL || new_eclosures == NULL))
eb04c213
AZ
1436 {
1437 re_free (new_nexts);
1438 re_free (new_indices);
1439 re_free (new_edests);
1440 re_free (new_eclosures);
1441 return -1;
1442 }
02f3550c
UD
1443 dfa->nexts = new_nexts;
1444 dfa->org_indices = new_indices;
1445 dfa->edests = new_edests;
1446 dfa->eclosures = new_eclosures;
951d6408 1447 dfa->nodes_alloc = new_nodes_alloc;
3b0bdc72
UD
1448 }
1449 dfa->nodes[dfa->nodes_len] = token;
485d775d 1450 dfa->nodes[dfa->nodes_len].constraint = 0;
02f3550c
UD
1451#ifdef RE_ENABLE_I18N
1452 dfa->nodes[dfa->nodes_len].accept_mb =
eb04c213
AZ
1453 ((token.type == OP_PERIOD && dfa->mb_cur_max > 1)
1454 || token.type == COMPLEX_BRACKET);
02f3550c
UD
1455#endif
1456 dfa->nexts[dfa->nodes_len] = -1;
1457 re_node_set_init_empty (dfa->edests + dfa->nodes_len);
1458 re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
3b0bdc72
UD
1459 return dfa->nodes_len++;
1460}
1461
eb04c213 1462static re_hashval_t
e2f55264 1463calc_state_hash (const re_node_set *nodes, unsigned int context)
3b0bdc72 1464{
eb04c213
AZ
1465 re_hashval_t hash = nodes->nelem + context;
1466 Idx i;
3b0bdc72
UD
1467 for (i = 0 ; i < nodes->nelem ; i++)
1468 hash += nodes->elems[i];
1469 return hash;
1470}
1471
1472/* Search for the state whose node_set is equivalent to NODES.
1473 Return the pointer to the state, if we found it in the DFA.
a9388965
UD
1474 Otherwise create the new one and return it. In case of an error
1475 return NULL and set the error code in ERR.
1476 Note: - We assume NULL as the invalid state, then it is possible that
1b2c2628
UD
1477 return value is NULL and ERR is REG_NOERROR.
1478 - We never return non-NULL value in case of any errors, it is for
1479 optimization. */
a9388965 1480
e2f55264 1481static re_dfastate_t *
b41bd5bc 1482__attribute_warn_unused_result__
e2f55264
UD
1483re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa,
1484 const re_node_set *nodes)
3b0bdc72 1485{
eb04c213 1486 re_hashval_t hash;
a9388965 1487 re_dfastate_t *new_state;
3b0bdc72 1488 struct re_state_table_entry *spot;
eb04c213
AZ
1489 Idx i;
1490#if defined GCC_LINT || defined lint
1491 /* Suppress bogus uninitialized-variable warnings. */
1492 *err = REG_NOERROR;
1493#endif
f4efbdfb 1494 if (__glibc_unlikely (nodes->nelem == 0))
a9388965
UD
1495 {
1496 *err = REG_NOERROR;
1497 return NULL;
1498 }
3b0bdc72
UD
1499 hash = calc_state_hash (nodes, 0);
1500 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1501
bc15410e 1502 for (i = 0 ; i < spot->num ; i++)
3b0bdc72 1503 {
bc15410e
UD
1504 re_dfastate_t *state = spot->array[i];
1505 if (hash != state->hash)
1b2c2628 1506 continue;
bc15410e 1507 if (re_node_set_compare (&state->nodes, nodes))
1b2c2628 1508 return state;
3b0bdc72 1509 }
3b0bdc72
UD
1510
1511 /* There are no appropriate state in the dfa, create the new one. */
a9388965 1512 new_state = create_ci_newstate (dfa, nodes, hash);
f4efbdfb 1513 if (__glibc_unlikely (new_state == NULL))
2d87db5b
UD
1514 *err = REG_ESPACE;
1515
1516 return new_state;
3b0bdc72
UD
1517}
1518
1519/* Search for the state whose node_set is equivalent to NODES and
1520 whose context is equivalent to CONTEXT.
1521 Return the pointer to the state, if we found it in the DFA.
a9388965
UD
1522 Otherwise create the new one and return it. In case of an error
1523 return NULL and set the error code in ERR.
1524 Note: - We assume NULL as the invalid state, then it is possible that
1b2c2628
UD
1525 return value is NULL and ERR is REG_NOERROR.
1526 - We never return non-NULL value in case of any errors, it is for
1527 optimization. */
a9388965 1528
db26cb75 1529static re_dfastate_t *
b41bd5bc 1530__attribute_warn_unused_result__
e2f55264
UD
1531re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa,
1532 const re_node_set *nodes, unsigned int context)
3b0bdc72 1533{
eb04c213 1534 re_hashval_t hash;
a9388965 1535 re_dfastate_t *new_state;
3b0bdc72 1536 struct re_state_table_entry *spot;
eb04c213
AZ
1537 Idx i;
1538#if defined GCC_LINT || defined lint
1539 /* Suppress bogus uninitialized-variable warnings. */
1540 *err = REG_NOERROR;
1541#endif
3b0bdc72 1542 if (nodes->nelem == 0)
a9388965
UD
1543 {
1544 *err = REG_NOERROR;
1545 return NULL;
1546 }
3b0bdc72
UD
1547 hash = calc_state_hash (nodes, context);
1548 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1549
bc15410e 1550 for (i = 0 ; i < spot->num ; i++)
3b0bdc72 1551 {
bc15410e 1552 re_dfastate_t *state = spot->array[i];
a0a8461c
UD
1553 if (state->hash == hash
1554 && state->context == context
1555 && re_node_set_compare (state->entrance_nodes, nodes))
1b2c2628 1556 return state;
3b0bdc72 1557 }
eb04c213 1558 /* There are no appropriate state in 'dfa', create the new one. */
a9388965 1559 new_state = create_cd_newstate (dfa, nodes, context, hash);
f4efbdfb 1560 if (__glibc_unlikely (new_state == NULL))
2d87db5b
UD
1561 *err = REG_ESPACE;
1562
1563 return new_state;
3b0bdc72
UD
1564}
1565
5cf1ec52
UD
1566/* Finish initialization of the new state NEWSTATE, and using its hash value
1567 HASH put in the appropriate bucket of DFA's state table. Return value
1568 indicates the error code if failed. */
a9388965 1569
5cf1ec52 1570static reg_errcode_t
2da42bc0 1571__attribute_warn_unused_result__
e2f55264 1572register_state (const re_dfa_t *dfa, re_dfastate_t *newstate,
eb04c213 1573 re_hashval_t hash)
3b0bdc72 1574{
5cf1ec52 1575 struct re_state_table_entry *spot;
1b2c2628 1576 reg_errcode_t err;
eb04c213 1577 Idx i;
5cf1ec52
UD
1578
1579 newstate->hash = hash;
1580 err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem);
f4efbdfb 1581 if (__glibc_unlikely (err != REG_NOERROR))
5cf1ec52
UD
1582 return REG_ESPACE;
1583 for (i = 0; i < newstate->nodes.nelem; i++)
1b2c2628 1584 {
eb04c213 1585 Idx elem = newstate->nodes.elems[i];
5cf1ec52 1586 if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
eb04c213 1587 if (! re_node_set_insert_last (&newstate->non_eps_nodes, elem))
2da42bc0 1588 return REG_ESPACE;
1b2c2628 1589 }
3b0bdc72 1590
3b0bdc72 1591 spot = dfa->state_table + (hash & dfa->state_hash_mask);
f4efbdfb 1592 if (__glibc_unlikely (spot->alloc <= spot->num))
3b0bdc72 1593 {
eb04c213 1594 Idx new_alloc = 2 * spot->num + 2;
951d6408
UD
1595 re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *,
1596 new_alloc);
f4efbdfb 1597 if (__glibc_unlikely (new_array == NULL))
1b2c2628
UD
1598 return REG_ESPACE;
1599 spot->array = new_array;
951d6408 1600 spot->alloc = new_alloc;
3b0bdc72 1601 }
bc15410e 1602 spot->array[spot->num++] = newstate;
a9388965 1603 return REG_NOERROR;
3b0bdc72
UD
1604}
1605
db26cb75
UD
1606static void
1607free_state (re_dfastate_t *state)
1608{
1609 re_node_set_free (&state->non_eps_nodes);
1610 re_node_set_free (&state->inveclosure);
1611 if (state->entrance_nodes != &state->nodes)
1612 {
1613 re_node_set_free (state->entrance_nodes);
1614 re_free (state->entrance_nodes);
1615 }
1616 re_node_set_free (&state->nodes);
1617 re_free (state->word_trtable);
1618 re_free (state->trtable);
1619 re_free (state);
1620}
1621
5069ff32 1622/* Create the new state which is independent of contexts.
a9388965
UD
1623 Return the new state if succeeded, otherwise return NULL. */
1624
3b0bdc72 1625static re_dfastate_t *
b41bd5bc 1626__attribute_warn_unused_result__
e2f55264 1627create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
eb04c213 1628 re_hashval_t hash)
3b0bdc72 1629{
eb04c213 1630 Idx i;
a9388965 1631 reg_errcode_t err;
3b0bdc72 1632 re_dfastate_t *newstate;
5cf1ec52
UD
1633
1634 newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
f4efbdfb 1635 if (__glibc_unlikely (newstate == NULL))
a9388965 1636 return NULL;
5cf1ec52 1637 err = re_node_set_init_copy (&newstate->nodes, nodes);
f4efbdfb 1638 if (__glibc_unlikely (err != REG_NOERROR))
5cf1ec52
UD
1639 {
1640 re_free (newstate);
1641 return NULL;
1642 }
3b0bdc72 1643
5cf1ec52 1644 newstate->entrance_nodes = &newstate->nodes;
3b0bdc72
UD
1645 for (i = 0 ; i < nodes->nelem ; i++)
1646 {
1647 re_token_t *node = dfa->nodes + nodes->elems[i];
1648 re_token_type_t type = node->type;
485d775d 1649 if (type == CHARACTER && !node->constraint)
1b2c2628 1650 continue;
02f3550c
UD
1651#ifdef RE_ENABLE_I18N
1652 newstate->accept_mb |= node->accept_mb;
1653#endif /* RE_ENABLE_I18N */
3b0bdc72
UD
1654
1655 /* If the state has the halt node, the state is a halt state. */
02f3550c 1656 if (type == END_OF_RE)
1b2c2628 1657 newstate->halt = 1;
3b0bdc72 1658 else if (type == OP_BACK_REF)
1b2c2628 1659 newstate->has_backref = 1;
485d775d 1660 else if (type == ANCHOR || node->constraint)
1b2c2628 1661 newstate->has_constraint = 1;
3b0bdc72 1662 }
a9388965 1663 err = register_state (dfa, newstate, hash);
f4efbdfb 1664 if (__glibc_unlikely (err != REG_NOERROR))
1b2c2628
UD
1665 {
1666 free_state (newstate);
1667 newstate = NULL;
1668 }
1669 return newstate;
3b0bdc72
UD
1670}
1671
a9388965
UD
1672/* Create the new state which is depend on the context CONTEXT.
1673 Return the new state if succeeded, otherwise return NULL. */
1674
3b0bdc72 1675static re_dfastate_t *
b41bd5bc 1676__attribute_warn_unused_result__
e2f55264 1677create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
eb04c213 1678 unsigned int context, re_hashval_t hash)
3b0bdc72 1679{
eb04c213 1680 Idx i, nctx_nodes = 0;
a9388965 1681 reg_errcode_t err;
3b0bdc72
UD
1682 re_dfastate_t *newstate;
1683
5cf1ec52 1684 newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
f4efbdfb 1685 if (__glibc_unlikely (newstate == NULL))
a9388965 1686 return NULL;
5cf1ec52 1687 err = re_node_set_init_copy (&newstate->nodes, nodes);
f4efbdfb 1688 if (__glibc_unlikely (err != REG_NOERROR))
5cf1ec52
UD
1689 {
1690 re_free (newstate);
1691 return NULL;
1692 }
1693
3b0bdc72
UD
1694 newstate->context = context;
1695 newstate->entrance_nodes = &newstate->nodes;
1696
1697 for (i = 0 ; i < nodes->nelem ; i++)
1698 {
3b0bdc72
UD
1699 re_token_t *node = dfa->nodes + nodes->elems[i];
1700 re_token_type_t type = node->type;
0caca71a 1701 unsigned int constraint = node->constraint;
3b0bdc72 1702
485d775d 1703 if (type == CHARACTER && !constraint)
1b2c2628 1704 continue;
a334319f 1705#ifdef RE_ENABLE_I18N
02f3550c 1706 newstate->accept_mb |= node->accept_mb;
a334319f 1707#endif /* RE_ENABLE_I18N */
02f3550c
UD
1708
1709 /* If the state has the halt node, the state is a halt state. */
1710 if (type == END_OF_RE)
1711 newstate->halt = 1;
3b0bdc72 1712 else if (type == OP_BACK_REF)
1b2c2628 1713 newstate->has_backref = 1;
3b0bdc72
UD
1714
1715 if (constraint)
1b2c2628
UD
1716 {
1717 if (newstate->entrance_nodes == &newstate->nodes)
1718 {
1719 newstate->entrance_nodes = re_malloc (re_node_set, 1);
f4efbdfb 1720 if (__glibc_unlikely (newstate->entrance_nodes == NULL))
1b2c2628
UD
1721 {
1722 free_state (newstate);
1723 return NULL;
1724 }
2da42bc0
UD
1725 if (re_node_set_init_copy (newstate->entrance_nodes, nodes)
1726 != REG_NOERROR)
1727 return NULL;
1b2c2628
UD
1728 nctx_nodes = 0;
1729 newstate->has_constraint = 1;
1730 }
1731
1732 if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
1733 {
1734 re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
1735 ++nctx_nodes;
1736 }
1737 }
3b0bdc72 1738 }
a9388965 1739 err = register_state (dfa, newstate, hash);
f4efbdfb 1740 if (__glibc_unlikely (err != REG_NOERROR))
1b2c2628
UD
1741 {
1742 free_state (newstate);
1743 newstate = NULL;
1744 }
1745 return newstate;
1746}