]> git.ipfire.org Git - thirdparty/glibc.git/blame - posix/regex_internal.c
Update copyright notices with scripts/update-copyrights
[thirdparty/glibc.git] / posix / regex_internal.c
CommitLineData
3b0bdc72 1/* Extended regular expression matching and search library.
d4697bc9 2 Copyright (C) 2002-2014 Free Software Foundation, Inc.
3b0bdc72
UD
3 This file is part of the GNU C Library.
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
59ba27a6
PE
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
3b0bdc72 19
92b27c74 20static void re_string_construct_common (const char *str, int len,
1b2c2628 21 re_string_t *pstr,
3c0fb574 22 RE_TRANSLATE_TYPE trans, int icase,
8cae99db 23 const re_dfa_t *dfa) internal_function;
01ed6ceb 24static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa,
1b2c2628 25 const re_node_set *nodes,
8cae99db 26 unsigned int hash) internal_function;
01ed6ceb 27static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa,
1b2c2628
UD
28 const re_node_set *nodes,
29 unsigned int context,
8cae99db 30 unsigned int hash) internal_function;
3b0bdc72
UD
31\f
32/* Functions for string operation. */
33
612546c6
UD
34/* This function allocate the buffers. It is necessary to call
35 re_string_reconstruct before using the object. */
36
3b0bdc72 37static reg_errcode_t
2da42bc0 38internal_function __attribute_warn_unused_result__
e2f55264
UD
39re_string_allocate (re_string_t *pstr, const char *str, int len, int init_len,
40 RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa)
3b0bdc72
UD
41{
42 reg_errcode_t ret;
97fd3a30
UD
43 int init_buf_len;
44
45 /* Ensure at least one character fits into the buffers. */
46 if (init_len < dfa->mb_cur_max)
47 init_len = dfa->mb_cur_max;
48 init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
f0c7c524 49 re_string_construct_common (str, len, pstr, trans, icase, dfa);
612546c6
UD
50
51 ret = re_string_realloc_buffers (pstr, init_buf_len);
52 if (BE (ret != REG_NOERROR, 0))
53 return ret;
54
56b168be
UD
55 pstr->word_char = dfa->word_char;
56 pstr->word_ops_used = dfa->word_ops_used;
37369d1c
UD
57 pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
58 pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len;
59 pstr->valid_raw_len = pstr->valid_len;
612546c6
UD
60 return REG_NOERROR;
61}
62
63/* This function allocate the buffers, and initialize them. */
64
65static reg_errcode_t
2da42bc0 66internal_function __attribute_warn_unused_result__
e2f55264
UD
67re_string_construct (re_string_t *pstr, const char *str, int len,
68 RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa)
612546c6
UD
69{
70 reg_errcode_t ret;
56b168be 71 memset (pstr, '\0', sizeof (re_string_t));
f0c7c524 72 re_string_construct_common (str, len, pstr, trans, icase, dfa);
612546c6
UD
73
74 if (len > 0)
3b0bdc72 75 {
612546c6 76 ret = re_string_realloc_buffers (pstr, len + 1);
bc15410e 77 if (BE (ret != REG_NOERROR, 0))
1b2c2628 78 return ret;
3b0bdc72 79 }
37369d1c 80 pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
612546c6
UD
81
82 if (icase)
83 {
84#ifdef RE_ENABLE_I18N
f0c7c524 85 if (dfa->mb_cur_max > 1)
37369d1c
UD
86 {
87 while (1)
88 {
89 ret = build_wcs_upper_buffer (pstr);
90 if (BE (ret != REG_NOERROR, 0))
91 return ret;
92 if (pstr->valid_raw_len >= len)
93 break;
94 if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
95 break;
96 ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
97 if (BE (ret != REG_NOERROR, 0))
98 return ret;
99 }
100 }
612546c6 101 else
3b0bdc72 102#endif /* RE_ENABLE_I18N */
1b2c2628 103 build_upper_buffer (pstr);
612546c6
UD
104 }
105 else
3b0bdc72 106 {
612546c6 107#ifdef RE_ENABLE_I18N
f0c7c524 108 if (dfa->mb_cur_max > 1)
1b2c2628 109 build_wcs_buffer (pstr);
612546c6
UD
110 else
111#endif /* RE_ENABLE_I18N */
1b2c2628
UD
112 {
113 if (trans != NULL)
114 re_string_translate_buffer (pstr);
115 else
37369d1c
UD
116 {
117 pstr->valid_len = pstr->bufs_len;
118 pstr->valid_raw_len = pstr->bufs_len;
119 }
1b2c2628 120 }
3b0bdc72 121 }
612546c6 122
3b0bdc72
UD
123 return REG_NOERROR;
124}
125
612546c6 126/* Helper functions for re_string_allocate, and re_string_construct. */
3b0bdc72
UD
127
128static reg_errcode_t
2da42bc0 129internal_function __attribute_warn_unused_result__
e2f55264 130re_string_realloc_buffers (re_string_t *pstr, int new_buf_len)
3b0bdc72 131{
3b0bdc72 132#ifdef RE_ENABLE_I18N
3c0fb574 133 if (pstr->mb_cur_max > 1)
3b0bdc72 134 {
54dd0ab3
UD
135 wint_t *new_wcs;
136
137 /* Avoid overflow in realloc. */
138 const size_t max_object_size = MAX (sizeof (wint_t), sizeof (int));
139 if (BE (SIZE_MAX / max_object_size < new_buf_len, 0))
140 return REG_ESPACE;
141
142 new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len);
2d87db5b 143 if (BE (new_wcs == NULL, 0))
1b2c2628 144 return REG_ESPACE;
2d87db5b 145 pstr->wcs = new_wcs;
37369d1c
UD
146 if (pstr->offsets != NULL)
147 {
2d87db5b
UD
148 int *new_offsets = re_realloc (pstr->offsets, int, new_buf_len);
149 if (BE (new_offsets == NULL, 0))
37369d1c 150 return REG_ESPACE;
2d87db5b 151 pstr->offsets = new_offsets;
37369d1c 152 }
3b0bdc72 153 }
3b0bdc72 154#endif /* RE_ENABLE_I18N */
37369d1c 155 if (pstr->mbs_allocated)
3b0bdc72 156 {
2d87db5b
UD
157 unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char,
158 new_buf_len);
159 if (BE (new_mbs == NULL, 0))
1b2c2628 160 return REG_ESPACE;
2d87db5b 161 pstr->mbs = new_mbs;
3b0bdc72 162 }
612546c6 163 pstr->bufs_len = new_buf_len;
3b0bdc72
UD
164 return REG_NOERROR;
165}
166
612546c6 167
3b0bdc72 168static void
1ba81cea 169internal_function
e2f55264
UD
170re_string_construct_common (const char *str, int len, re_string_t *pstr,
171 RE_TRANSLATE_TYPE trans, int icase,
172 const re_dfa_t *dfa)
3b0bdc72 173{
c202c2c5 174 pstr->raw_mbs = (const unsigned char *) str;
3b0bdc72 175 pstr->len = len;
37369d1c 176 pstr->raw_len = len;
997470b3 177 pstr->trans = trans;
612546c6 178 pstr->icase = icase ? 1 : 0;
37369d1c 179 pstr->mbs_allocated = (trans != NULL || icase);
f0c7c524
UD
180 pstr->mb_cur_max = dfa->mb_cur_max;
181 pstr->is_utf8 = dfa->is_utf8;
182 pstr->map_notascii = dfa->map_notascii;
37369d1c
UD
183 pstr->stop = pstr->len;
184 pstr->raw_stop = pstr->stop;
3b0bdc72
UD
185}
186
187#ifdef RE_ENABLE_I18N
188
612546c6 189/* Build wide character buffer PSTR->WCS.
3b0bdc72
UD
190 If the byte sequence of the string are:
191 <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
192 Then wide character buffer will be:
193 <wc1> , WEOF , <wc2> , WEOF , <wc3>
194 We use WEOF for padding, they indicate that the position isn't
612546c6 195 a first byte of a multibyte character.
3b0bdc72 196
612546c6
UD
197 Note that this function assumes PSTR->VALID_LEN elements are already
198 built and starts from PSTR->VALID_LEN. */
199
200static void
1ba81cea 201internal_function
e2f55264 202build_wcs_buffer (re_string_t *pstr)
3b0bdc72 203{
37369d1c 204#ifdef _LIBC
ec73fd87
UD
205 unsigned char buf[MB_LEN_MAX];
206 assert (MB_LEN_MAX >= pstr->mb_cur_max);
37369d1c
UD
207#else
208 unsigned char buf[64];
209#endif
612546c6 210 mbstate_t prev_st;
88764ae2
UD
211 int byte_idx, end_idx, remain_len;
212 size_t mbclen;
37369d1c 213
612546c6
UD
214 /* Build the buffers from pstr->valid_len to either pstr->len or
215 pstr->bufs_len. */
37369d1c 216 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
612546c6
UD
217 for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
218 {
219 wchar_t wc;
37369d1c
UD
220 const char *p;
221
612546c6
UD
222 remain_len = end_idx - byte_idx;
223 prev_st = pstr->cur_state;
37369d1c
UD
224 /* Apply the translation if we need. */
225 if (BE (pstr->trans != NULL, 0))
226 {
227 int i, ch;
228
229 for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
230 {
231 ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i];
d40eb37a 232 buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch];
37369d1c
UD
233 }
234 p = (const char *) buf;
235 }
236 else
237 p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
b3918c7d 238 mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
8887a920
UD
239 if (BE (mbclen == (size_t) -1 || mbclen == 0
240 || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len), 0))
1b2c2628
UD
241 {
242 /* We treat these cases as a singlebyte character. */
243 mbclen = 1;
244 wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
37369d1c
UD
245 if (BE (pstr->trans != NULL, 0))
246 wc = pstr->trans[wc];
1b2c2628
UD
247 pstr->cur_state = prev_st;
248 }
8887a920
UD
249 else if (BE (mbclen == (size_t) -2, 0))
250 {
251 /* The buffer doesn't have enough space, finish to build. */
252 pstr->cur_state = prev_st;
253 break;
254 }
612546c6 255
3b0bdc72 256 /* Write wide character and padding. */
612546c6
UD
257 pstr->wcs[byte_idx++] = wc;
258 /* Write paddings. */
259 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
1b2c2628 260 pstr->wcs[byte_idx++] = WEOF;
3b0bdc72 261 }
612546c6 262 pstr->valid_len = byte_idx;
37369d1c 263 pstr->valid_raw_len = byte_idx;
3b0bdc72
UD
264}
265
612546c6
UD
266/* Build wide character buffer PSTR->WCS like build_wcs_buffer,
267 but for REG_ICASE. */
268
4f7e7f8e 269static reg_errcode_t
2da42bc0 270internal_function __attribute_warn_unused_result__
e2f55264 271build_wcs_upper_buffer (re_string_t *pstr)
3b0bdc72 272{
612546c6 273 mbstate_t prev_st;
88764ae2
UD
274 int src_idx, byte_idx, end_idx, remain_len;
275 size_t mbclen;
37369d1c 276#ifdef _LIBC
ec73fd87
UD
277 char buf[MB_LEN_MAX];
278 assert (MB_LEN_MAX >= pstr->mb_cur_max);
37369d1c 279#else
1c99f950 280 char buf[64];
37369d1c
UD
281#endif
282
283 byte_idx = pstr->valid_len;
284 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
f0c7c524 285
e40a38b3
UD
286 /* The following optimization assumes that ASCII characters can be
287 mapped to wide characters with a simple cast. */
37369d1c
UD
288 if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
289 {
290 while (byte_idx < end_idx)
1b2c2628 291 {
f0c7c524 292 wchar_t wc;
37369d1c
UD
293
294 if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
295 && mbsinit (&pstr->cur_state))
296 {
297 /* In case of a singlebyte character. */
298 pstr->mbs[byte_idx]
299 = toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
300 /* The next step uses the assumption that wchar_t is encoded
e40a38b3 301 ASCII-safe: all ASCII values can be converted like this. */
37369d1c
UD
302 pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
303 ++byte_idx;
304 continue;
305 }
306
f0c7c524
UD
307 remain_len = end_idx - byte_idx;
308 prev_st = pstr->cur_state;
b3918c7d
UD
309 mbclen = __mbrtowc (&wc,
310 ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
311 + byte_idx), remain_len, &pstr->cur_state);
88764ae2 312 if (BE (mbclen + 2 > 2, 1))
1b2c2628 313 {
37369d1c 314 wchar_t wcu = wc;
f0c7c524 315 if (iswlower (wc))
37369d1c 316 {
88764ae2 317 size_t mbcdlen;
37369d1c
UD
318
319 wcu = towupper (wc);
320 mbcdlen = wcrtomb (buf, wcu, &prev_st);
321 if (BE (mbclen == mbcdlen, 1))
322 memcpy (pstr->mbs + byte_idx, buf, mbclen);
323 else
324 {
325 src_idx = byte_idx;
326 goto offsets_needed;
327 }
328 }
f0c7c524
UD
329 else
330 memcpy (pstr->mbs + byte_idx,
331 pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
37369d1c 332 pstr->wcs[byte_idx++] = wcu;
f0c7c524
UD
333 /* Write paddings. */
334 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
335 pstr->wcs[byte_idx++] = WEOF;
336 }
8887a920
UD
337 else if (mbclen == (size_t) -1 || mbclen == 0
338 || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len))
f0c7c524 339 {
8887a920
UD
340 /* It is an invalid character, an incomplete character
341 at the end of the string, or '\0'. Just use the byte. */
f0c7c524 342 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
02b50340
UD
343 pstr->mbs[byte_idx] = ch;
344 /* And also cast it to wide char. */
345 pstr->wcs[byte_idx++] = (wchar_t) ch;
f0c7c524
UD
346 if (BE (mbclen == (size_t) -1, 0))
347 pstr->cur_state = prev_st;
1b2c2628 348 }
1b2c2628 349 else
f0c7c524
UD
350 {
351 /* The buffer doesn't have enough space, finish to build. */
352 pstr->cur_state = prev_st;
353 break;
354 }
1b2c2628 355 }
37369d1c
UD
356 pstr->valid_len = byte_idx;
357 pstr->valid_raw_len = byte_idx;
358 return REG_NOERROR;
359 }
f0c7c524 360 else
37369d1c 361 for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
f0c7c524
UD
362 {
363 wchar_t wc;
37369d1c 364 const char *p;
e40a38b3 365 offsets_needed:
f0c7c524
UD
366 remain_len = end_idx - byte_idx;
367 prev_st = pstr->cur_state;
37369d1c 368 if (BE (pstr->trans != NULL, 0))
f0c7c524 369 {
37369d1c
UD
370 int i, ch;
371
372 for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
f0c7c524 373 {
37369d1c
UD
374 ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i];
375 buf[i] = pstr->trans[ch];
f0c7c524 376 }
37369d1c 377 p = (const char *) buf;
f0c7c524 378 }
37369d1c
UD
379 else
380 p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
b3918c7d 381 mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
88764ae2 382 if (BE (mbclen + 2 > 2, 1))
f0c7c524 383 {
37369d1c 384 wchar_t wcu = wc;
f0c7c524 385 if (iswlower (wc))
37369d1c 386 {
88764ae2 387 size_t mbcdlen;
37369d1c
UD
388
389 wcu = towupper (wc);
17568537 390 mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st);
37369d1c
UD
391 if (BE (mbclen == mbcdlen, 1))
392 memcpy (pstr->mbs + byte_idx, buf, mbclen);
88764ae2 393 else if (mbcdlen != (size_t) -1)
37369d1c 394 {
88764ae2 395 size_t i;
37369d1c
UD
396
397 if (byte_idx + mbcdlen > pstr->bufs_len)
398 {
399 pstr->cur_state = prev_st;
400 break;
401 }
402
403 if (pstr->offsets == NULL)
404 {
405 pstr->offsets = re_malloc (int, pstr->bufs_len);
406
407 if (pstr->offsets == NULL)
408 return REG_ESPACE;
409 }
410 if (!pstr->offsets_needed)
411 {
88764ae2 412 for (i = 0; i < (size_t) byte_idx; ++i)
37369d1c
UD
413 pstr->offsets[i] = i;
414 pstr->offsets_needed = 1;
415 }
416
417 memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
418 pstr->wcs[byte_idx] = wcu;
419 pstr->offsets[byte_idx] = src_idx;
420 for (i = 1; i < mbcdlen; ++i)
421 {
422 pstr->offsets[byte_idx + i]
423 = src_idx + (i < mbclen ? i : mbclen - 1);
424 pstr->wcs[byte_idx + i] = WEOF;
425 }
426 pstr->len += mbcdlen - mbclen;
427 if (pstr->raw_stop > src_idx)
428 pstr->stop += mbcdlen - mbclen;
429 end_idx = (pstr->bufs_len > pstr->len)
430 ? pstr->len : pstr->bufs_len;
431 byte_idx += mbcdlen;
432 src_idx += mbclen;
433 continue;
434 }
2da42bc0
UD
435 else
436 memcpy (pstr->mbs + byte_idx, p, mbclen);
37369d1c 437 }
f0c7c524 438 else
37369d1c
UD
439 memcpy (pstr->mbs + byte_idx, p, mbclen);
440
441 if (BE (pstr->offsets_needed != 0, 0))
442 {
88764ae2 443 size_t i;
37369d1c
UD
444 for (i = 0; i < mbclen; ++i)
445 pstr->offsets[byte_idx + i] = src_idx + i;
446 }
447 src_idx += mbclen;
448
449 pstr->wcs[byte_idx++] = wcu;
f0c7c524
UD
450 /* Write paddings. */
451 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
452 pstr->wcs[byte_idx++] = WEOF;
453 }
8887a920
UD
454 else if (mbclen == (size_t) -1 || mbclen == 0
455 || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len))
02b50340 456 {
37369d1c
UD
457 /* It is an invalid character or '\0'. Just use the byte. */
458 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
459
460 if (BE (pstr->trans != NULL, 0))
461 ch = pstr->trans [ch];
02b50340 462 pstr->mbs[byte_idx] = ch;
37369d1c
UD
463
464 if (BE (pstr->offsets_needed != 0, 0))
465 pstr->offsets[byte_idx] = src_idx;
466 ++src_idx;
467
02b50340
UD
468 /* And also cast it to wide char. */
469 pstr->wcs[byte_idx++] = (wchar_t) ch;
470 if (BE (mbclen == (size_t) -1, 0))
471 pstr->cur_state = prev_st;
472 }
f0c7c524
UD
473 else
474 {
475 /* The buffer doesn't have enough space, finish to build. */
476 pstr->cur_state = prev_st;
477 break;
478 }
479 }
612546c6 480 pstr->valid_len = byte_idx;
37369d1c
UD
481 pstr->valid_raw_len = src_idx;
482 return REG_NOERROR;
612546c6
UD
483}
484
485/* Skip characters until the index becomes greater than NEW_RAW_IDX.
486 Return the index. */
487
488static int
e2f55264
UD
489internal_function
490re_string_skip_chars (re_string_t *pstr, int new_raw_idx, wint_t *last_wc)
612546c6
UD
491{
492 mbstate_t prev_st;
88764ae2
UD
493 int rawbuf_idx;
494 size_t mbclen;
4f08104c 495 wint_t wc = WEOF;
612546c6
UD
496
497 /* Skip the characters which are not necessary to check. */
37369d1c 498 for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
612546c6
UD
499 rawbuf_idx < new_raw_idx;)
500 {
4f08104c 501 wchar_t wc2;
71b5d1c5 502 int remain_len = pstr->raw_len - rawbuf_idx;
612546c6 503 prev_st = pstr->cur_state;
4f08104c 504 mbclen = __mbrtowc (&wc2, (const char *) pstr->raw_mbs + rawbuf_idx,
b3918c7d 505 remain_len, &pstr->cur_state);
5ddf954c 506 if (BE ((ssize_t) mbclen <= 0, 0))
1b2c2628 507 {
33e63e79
UD
508 /* We treat these cases as a single byte character. */
509 if (mbclen == 0 || remain_len == 0)
510 wc = L'\0';
511 else
512 wc = *(unsigned char *) (pstr->raw_mbs + rawbuf_idx);
1b2c2628
UD
513 mbclen = 1;
514 pstr->cur_state = prev_st;
515 }
4f08104c
UD
516 else
517 wc = (wint_t) wc2;
612546c6
UD
518 /* Then proceed the next character. */
519 rawbuf_idx += mbclen;
520 }
4f08104c 521 *last_wc = wc;
612546c6 522 return rawbuf_idx;
3b0bdc72
UD
523}
524#endif /* RE_ENABLE_I18N */
525
1b2c2628 526/* Build the buffer PSTR->MBS, and apply the translation if we need.
612546c6
UD
527 This function is used in case of REG_ICASE. */
528
529static void
1ba81cea 530internal_function
e2f55264 531build_upper_buffer (re_string_t *pstr)
3b0bdc72 532{
612546c6
UD
533 int char_idx, end_idx;
534 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
3b0bdc72 535
612546c6 536 for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
3b0bdc72 537 {
612546c6 538 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
37369d1c
UD
539 if (BE (pstr->trans != NULL, 0))
540 ch = pstr->trans[ch];
612546c6 541 if (islower (ch))
1b2c2628 542 pstr->mbs[char_idx] = toupper (ch);
3b0bdc72 543 else
1b2c2628 544 pstr->mbs[char_idx] = ch;
3b0bdc72 545 }
612546c6 546 pstr->valid_len = char_idx;
37369d1c 547 pstr->valid_raw_len = char_idx;
3b0bdc72
UD
548}
549
612546c6 550/* Apply TRANS to the buffer in PSTR. */
3b0bdc72 551
612546c6 552static void
1ba81cea 553internal_function
e2f55264 554re_string_translate_buffer (re_string_t *pstr)
3b0bdc72 555{
612546c6
UD
556 int buf_idx, end_idx;
557 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
558
559 for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
3b0bdc72 560 {
612546c6 561 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
37369d1c 562 pstr->mbs[buf_idx] = pstr->trans[ch];
3b0bdc72 563 }
612546c6
UD
564
565 pstr->valid_len = buf_idx;
37369d1c 566 pstr->valid_raw_len = buf_idx;
612546c6
UD
567}
568
569/* This function re-construct the buffers.
3c0fb574 570 Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
612546c6
UD
571 convert to upper case in case of REG_ICASE, apply translation. */
572
573static reg_errcode_t
2da42bc0 574internal_function __attribute_warn_unused_result__
e2f55264 575re_string_reconstruct (re_string_t *pstr, int idx, int eflags)
612546c6
UD
576{
577 int offset = idx - pstr->raw_mbs_idx;
bb677c95 578 if (BE (offset < 0, 0))
612546c6
UD
579 {
580 /* Reset buffer. */
434d3784 581#ifdef RE_ENABLE_I18N
3c0fb574 582 if (pstr->mb_cur_max > 1)
1b2c2628 583 memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
434d3784 584#endif /* RE_ENABLE_I18N */
37369d1c
UD
585 pstr->len = pstr->raw_len;
586 pstr->stop = pstr->raw_stop;
587 pstr->valid_len = 0;
588 pstr->raw_mbs_idx = 0;
589 pstr->valid_raw_len = 0;
590 pstr->offsets_needed = 0;
612546c6 591 pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
1b2c2628 592 : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
37369d1c 593 if (!pstr->mbs_allocated)
1b2c2628 594 pstr->mbs = (unsigned char *) pstr->raw_mbs;
612546c6
UD
595 offset = idx;
596 }
597
bb677c95 598 if (BE (offset != 0, 1))
612546c6 599 {
ba40cc15
UD
600 /* Should the already checked characters be kept? */
601 if (BE (offset < pstr->valid_raw_len, 1))
a334319f
UD
602 {
603 /* Yes, move them to the front of the buffer. */
0ecb606c 604#ifdef RE_ENABLE_I18N
ba40cc15
UD
605 if (BE (pstr->offsets_needed, 0))
606 {
607 int low = 0, high = pstr->valid_len, mid;
608 do
609 {
610 mid = (high + low) / 2;
611 if (pstr->offsets[mid] > offset)
612 high = mid;
613 else if (pstr->offsets[mid] < offset)
614 low = mid + 1;
615 else
616 break;
617 }
618 while (low < high);
619 if (pstr->offsets[mid] < offset)
620 ++mid;
621 pstr->tip_context = re_string_context_at (pstr, mid - 1,
622 eflags);
623 /* This can be quite complicated, so handle specially
624 only the common and easy case where the character with
625 different length representation of lower and upper
626 case is present at or after offset. */
627 if (pstr->valid_len > offset
628 && mid == offset && pstr->offsets[mid] == offset)
629 {
630 memmove (pstr->wcs, pstr->wcs + offset,
631 (pstr->valid_len - offset) * sizeof (wint_t));
632 memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset);
633 pstr->valid_len -= offset;
634 pstr->valid_raw_len -= offset;
635 for (low = 0; low < pstr->valid_len; low++)
636 pstr->offsets[low] = pstr->offsets[low + offset] - offset;
637 }
638 else
639 {
640 /* Otherwise, just find out how long the partial multibyte
641 character at offset is and fill it with WEOF/255. */
642 pstr->len = pstr->raw_len - idx + offset;
643 pstr->stop = pstr->raw_stop - idx + offset;
644 pstr->offsets_needed = 0;
645 while (mid > 0 && pstr->offsets[mid - 1] == offset)
646 --mid;
647 while (mid < pstr->valid_len)
648 if (pstr->wcs[mid] != WEOF)
649 break;
650 else
651 ++mid;
652 if (mid == pstr->valid_len)
653 pstr->valid_len = 0;
654 else
655 {
656 pstr->valid_len = pstr->offsets[mid] - offset;
657 if (pstr->valid_len)
658 {
659 for (low = 0; low < pstr->valid_len; ++low)
660 pstr->wcs[low] = WEOF;
661 memset (pstr->mbs, 255, pstr->valid_len);
662 }
663 }
664 pstr->valid_raw_len = pstr->valid_len;
665 }
666 }
667 else
668#endif
669 {
670 pstr->tip_context = re_string_context_at (pstr, offset - 1,
671 eflags);
672#ifdef RE_ENABLE_I18N
673 if (pstr->mb_cur_max > 1)
674 memmove (pstr->wcs, pstr->wcs + offset,
675 (pstr->valid_len - offset) * sizeof (wint_t));
612546c6 676#endif /* RE_ENABLE_I18N */
ba40cc15
UD
677 if (BE (pstr->mbs_allocated, 0))
678 memmove (pstr->mbs, pstr->mbs + offset,
679 pstr->valid_len - offset);
680 pstr->valid_len -= offset;
681 pstr->valid_raw_len -= offset;
612546c6 682#if DEBUG
ba40cc15 683 assert (pstr->valid_len > 0);
3b0bdc72 684#endif
ba40cc15 685 }
1b2c2628 686 }
612546c6 687 else
1b2c2628
UD
688 {
689 /* No, skip all characters until IDX. */
ba40cc15
UD
690 int prev_valid_len = pstr->valid_len;
691
37369d1c
UD
692#ifdef RE_ENABLE_I18N
693 if (BE (pstr->offsets_needed, 0))
694 {
695 pstr->len = pstr->raw_len - idx + offset;
696 pstr->stop = pstr->raw_stop - idx + offset;
697 pstr->offsets_needed = 0;
698 }
699#endif
1b2c2628 700 pstr->valid_len = 0;
3b0bdc72 701#ifdef RE_ENABLE_I18N
3c0fb574 702 if (pstr->mb_cur_max > 1)
1b2c2628
UD
703 {
704 int wcs_idx;
3c0fb574
UD
705 wint_t wc = WEOF;
706
707 if (pstr->is_utf8)
708 {
0dae5d4e 709 const unsigned char *raw, *p, *end;
3c0fb574
UD
710
711 /* Special case UTF-8. Multi-byte chars start with any
712 byte other than 0x80 - 0xbf. */
713 raw = pstr->raw_mbs + pstr->raw_mbs_idx;
97fd3a30 714 end = raw + (offset - pstr->mb_cur_max);
ba40cc15
UD
715 if (end < pstr->raw_mbs)
716 end = pstr->raw_mbs;
01ed6ceb
UD
717 p = raw + offset - 1;
718#ifdef _LIBC
719 /* We know the wchar_t encoding is UCS4, so for the simple
720 case, ASCII characters, skip the conversion step. */
721 if (isascii (*p) && BE (pstr->trans == NULL, 1))
722 {
723 memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
ba40cc15 724 /* pstr->valid_len = 0; */
01ed6ceb
UD
725 wc = (wchar_t) *p;
726 }
727 else
728#endif
729 for (; p >= end; --p)
730 if ((*p & 0xc0) != 0x80)
731 {
732 mbstate_t cur_state;
733 wchar_t wc2;
734 int mlen = raw + pstr->len - p;
735 unsigned char buf[6];
736 size_t mbclen;
737
5e2b63c6 738 const unsigned char *pp = p;
01ed6ceb
UD
739 if (BE (pstr->trans != NULL, 0))
740 {
741 int i = mlen < 6 ? mlen : 6;
742 while (--i >= 0)
743 buf[i] = pstr->trans[p[i]];
5e2b63c6 744 pp = buf;
01ed6ceb
UD
745 }
746 /* XXX Don't use mbrtowc, we know which conversion
747 to use (UTF-8 -> UCS4). */
748 memset (&cur_state, 0, sizeof (cur_state));
5e2b63c6 749 mbclen = __mbrtowc (&wc2, (const char *) pp, mlen,
b3918c7d 750 &cur_state);
01ed6ceb
UD
751 if (raw + offset - p <= mbclen
752 && mbclen < (size_t) -2)
753 {
754 memset (&pstr->cur_state, '\0',
755 sizeof (mbstate_t));
756 pstr->valid_len = mbclen - (raw + offset - p);
757 wc = wc2;
758 }
759 break;
760 }
3c0fb574 761 }
e40a38b3 762
3c0fb574 763 if (wc == WEOF)
97fd3a30 764 pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
33e63e79
UD
765 if (wc == WEOF)
766 pstr->tip_context
ba40cc15 767 = re_string_context_at (pstr, prev_valid_len - 1, eflags);
33e63e79
UD
768 else
769 pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)
770 && IS_WIDE_WORD_CHAR (wc))
771 ? CONTEXT_WORD
772 : ((IS_WIDE_NEWLINE (wc)
773 && pstr->newline_anchor)
774 ? CONTEXT_NEWLINE : 0));
37369d1c
UD
775 if (BE (pstr->valid_len, 0))
776 {
777 for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
778 pstr->wcs[wcs_idx] = WEOF;
779 if (pstr->mbs_allocated)
780 memset (pstr->mbs, 255, pstr->valid_len);
781 }
782 pstr->valid_raw_len = pstr->valid_len;
1b2c2628 783 }
1843975c 784 else
612546c6 785#endif /* RE_ENABLE_I18N */
1843975c
RM
786 {
787 int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
33e63e79 788 pstr->valid_raw_len = 0;
1843975c
RM
789 if (pstr->trans)
790 c = pstr->trans[c];
56b168be
UD
791 pstr->tip_context = (bitset_contain (pstr->word_char, c)
792 ? CONTEXT_WORD
793 : ((IS_NEWLINE (c) && pstr->newline_anchor)
1843975c
RM
794 ? CONTEXT_NEWLINE : 0));
795 }
1b2c2628 796 }
bb677c95 797 if (!BE (pstr->mbs_allocated, 0))
37369d1c 798 pstr->mbs += offset;
3b0bdc72 799 }
612546c6
UD
800 pstr->raw_mbs_idx = idx;
801 pstr->len -= offset;
ac3d553b 802 pstr->stop -= offset;
612546c6
UD
803
804 /* Then build the buffers. */
805#ifdef RE_ENABLE_I18N
3c0fb574 806 if (pstr->mb_cur_max > 1)
3b0bdc72 807 {
612546c6 808 if (pstr->icase)
37369d1c 809 {
4f7e7f8e 810 reg_errcode_t ret = build_wcs_upper_buffer (pstr);
37369d1c
UD
811 if (BE (ret != REG_NOERROR, 0))
812 return ret;
813 }
612546c6 814 else
1b2c2628 815 build_wcs_buffer (pstr);
3b0bdc72
UD
816 }
817 else
612546c6 818#endif /* RE_ENABLE_I18N */
01ed6ceb
UD
819 if (BE (pstr->mbs_allocated, 0))
820 {
821 if (pstr->icase)
822 build_upper_buffer (pstr);
823 else if (pstr->trans != NULL)
824 re_string_translate_buffer (pstr);
825 }
826 else
827 pstr->valid_len = pstr->len;
612546c6 828
bb677c95 829 pstr->cur_idx = 0;
3b0bdc72
UD
830 return REG_NOERROR;
831}
832
37369d1c 833static unsigned char
db26cb75 834internal_function __attribute ((pure))
e2f55264 835re_string_peek_byte_case (const re_string_t *pstr, int idx)
37369d1c
UD
836{
837 int ch, off;
838
839 /* Handle the common (easiest) cases first. */
f39eef7b 840 if (BE (!pstr->mbs_allocated, 1))
37369d1c
UD
841 return re_string_peek_byte (pstr, idx);
842
843#ifdef RE_ENABLE_I18N
844 if (pstr->mb_cur_max > 1
845 && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
846 return re_string_peek_byte (pstr, idx);
847#endif
848
849 off = pstr->cur_idx + idx;
850#ifdef RE_ENABLE_I18N
851 if (pstr->offsets_needed)
852 off = pstr->offsets[off];
853#endif
854
855 ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
37369d1c
UD
856
857#ifdef RE_ENABLE_I18N
858 /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
859 this function returns CAPITAL LETTER I instead of first byte of
860 DOTLESS SMALL LETTER I. The latter would confuse the parser,
861 since peek_byte_case doesn't advance cur_idx in any way. */
862 if (pstr->offsets_needed && !isascii (ch))
863 return re_string_peek_byte (pstr, idx);
864#endif
865
866 return ch;
867}
868
869static unsigned char
2ba92745 870internal_function
e2f55264 871re_string_fetch_byte_case (re_string_t *pstr)
37369d1c 872{
f39eef7b 873 if (BE (!pstr->mbs_allocated, 1))
37369d1c
UD
874 return re_string_fetch_byte (pstr);
875
876#ifdef RE_ENABLE_I18N
877 if (pstr->offsets_needed)
878 {
f39eef7b 879 int off, ch;
c0d5034e 880
37369d1c
UD
881 /* For tr_TR.UTF-8 [[:islower:]] there is
882 [[: CAPITAL LETTER I WITH DOT lower:]] in mbs. Skip
883 in that case the whole multi-byte character and return
884 the original letter. On the other side, with
885 [[: DOTLESS SMALL LETTER I return [[:I, as doing
886 anything else would complicate things too much. */
887
888 if (!re_string_first_byte (pstr, pstr->cur_idx))
889 return re_string_fetch_byte (pstr);
890
891 off = pstr->offsets[pstr->cur_idx];
892 ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
37369d1c
UD
893
894 if (! isascii (ch))
895 return re_string_fetch_byte (pstr);
896
897 re_string_skip_bytes (pstr,
898 re_string_char_size_at (pstr, pstr->cur_idx));
899 return ch;
900 }
901#endif
902
f39eef7b 903 return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
37369d1c
UD
904}
905
3b0bdc72 906static void
db26cb75 907internal_function
e2f55264 908re_string_destruct (re_string_t *pstr)
3b0bdc72
UD
909{
910#ifdef RE_ENABLE_I18N
911 re_free (pstr->wcs);
37369d1c 912 re_free (pstr->offsets);
3b0bdc72 913#endif /* RE_ENABLE_I18N */
37369d1c 914 if (pstr->mbs_allocated)
612546c6 915 re_free (pstr->mbs);
3b0bdc72
UD
916}
917
918/* Return the context at IDX in INPUT. */
612546c6 919
3b0bdc72 920static unsigned int
1ba81cea 921internal_function
e2f55264 922re_string_context_at (const re_string_t *input, int idx, int eflags)
3b0bdc72
UD
923{
924 int c;
bb677c95
UD
925 if (BE (idx < 0, 0))
926 /* In this case, we use the value stored in input->tip_context,
927 since we can't know the character in input->mbs[-1] here. */
928 return input->tip_context;
929 if (BE (idx == input->len, 0))
930 return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
931 : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
a0756231 932#ifdef RE_ENABLE_I18N
3c0fb574 933 if (input->mb_cur_max > 1)
1843975c
RM
934 {
935 wint_t wc;
936 int wc_idx = idx;
937 while(input->wcs[wc_idx] == WEOF)
938 {
939#ifdef DEBUG
940 /* It must not happen. */
941 assert (wc_idx >= 0);
942#endif
943 --wc_idx;
944 if (wc_idx < 0)
945 return input->tip_context;
946 }
947 wc = input->wcs[wc_idx];
56b168be 948 if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc))
1843975c 949 return CONTEXT_WORD;
56b168be
UD
950 return (IS_WIDE_NEWLINE (wc) && input->newline_anchor
951 ? CONTEXT_NEWLINE : 0);
1843975c 952 }
a0756231
RM
953 else
954#endif
955 {
956 c = re_string_byte_at (input, idx);
56b168be 957 if (bitset_contain (input->word_char, c))
a0756231 958 return CONTEXT_WORD;
56b168be 959 return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0;
a0756231 960 }
3b0bdc72
UD
961}
962\f
963/* Functions for set operation. */
964
965static reg_errcode_t
2da42bc0 966internal_function __attribute_warn_unused_result__
e2f55264 967re_node_set_alloc (re_node_set *set, int size)
3b0bdc72
UD
968{
969 set->alloc = size;
970 set->nelem = 0;
971 set->elems = re_malloc (int, size);
bc15410e 972 if (BE (set->elems == NULL, 0))
3b0bdc72
UD
973 return REG_ESPACE;
974 return REG_NOERROR;
975}
976
977static reg_errcode_t
2da42bc0 978internal_function __attribute_warn_unused_result__
e2f55264 979re_node_set_init_1 (re_node_set *set, int elem)
3b0bdc72
UD
980{
981 set->alloc = 1;
982 set->nelem = 1;
983 set->elems = re_malloc (int, 1);
bc15410e 984 if (BE (set->elems == NULL, 0))
6291ee3c
UD
985 {
986 set->alloc = set->nelem = 0;
987 return REG_ESPACE;
988 }
3b0bdc72
UD
989 set->elems[0] = elem;
990 return REG_NOERROR;
991}
992
993static reg_errcode_t
2da42bc0 994internal_function __attribute_warn_unused_result__
e2f55264 995re_node_set_init_2 (re_node_set *set, int elem1, int elem2)
3b0bdc72
UD
996{
997 set->alloc = 2;
998 set->elems = re_malloc (int, 2);
bc15410e 999 if (BE (set->elems == NULL, 0))
3b0bdc72
UD
1000 return REG_ESPACE;
1001 if (elem1 == elem2)
1002 {
1003 set->nelem = 1;
1004 set->elems[0] = elem1;
1005 }
1006 else
1007 {
1008 set->nelem = 2;
1009 if (elem1 < elem2)
1b2c2628
UD
1010 {
1011 set->elems[0] = elem1;
1012 set->elems[1] = elem2;
1013 }
3b0bdc72 1014 else
1b2c2628
UD
1015 {
1016 set->elems[0] = elem2;
1017 set->elems[1] = elem1;
1018 }
3b0bdc72
UD
1019 }
1020 return REG_NOERROR;
1021}
1022
1023static reg_errcode_t
2da42bc0 1024internal_function __attribute_warn_unused_result__
e2f55264 1025re_node_set_init_copy (re_node_set *dest, const re_node_set *src)
3b0bdc72
UD
1026{
1027 dest->nelem = src->nelem;
1028 if (src->nelem > 0)
1029 {
1030 dest->alloc = dest->nelem;
1031 dest->elems = re_malloc (int, dest->alloc);
bc15410e 1032 if (BE (dest->elems == NULL, 0))
6291ee3c
UD
1033 {
1034 dest->alloc = dest->nelem = 0;
1035 return REG_ESPACE;
1036 }
3b0bdc72
UD
1037 memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
1038 }
1039 else
1040 re_node_set_init_empty (dest);
1041 return REG_NOERROR;
1042}
1043
a9388965
UD
1044/* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
1045 DEST. Return value indicate the error code or REG_NOERROR if succeeded.
1046 Note: We assume dest->elems is NULL, when dest->alloc is 0. */
1047
3b0bdc72 1048static reg_errcode_t
2da42bc0 1049internal_function __attribute_warn_unused_result__
e2f55264
UD
1050re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1,
1051 const re_node_set *src2)
3b0bdc72 1052{
59e7ebcc
UD
1053 int i1, i2, is, id, delta, sbase;
1054 if (src1->nelem == 0 || src2->nelem == 0)
1055 return REG_NOERROR;
1056
1057 /* We need dest->nelem + 2 * elems_in_intersection; this is a
1058 conservative estimate. */
1059 if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
3b0bdc72 1060 {
59e7ebcc
UD
1061 int new_alloc = src1->nelem + src2->nelem + dest->alloc;
1062 int *new_elems = re_realloc (dest->elems, int, new_alloc);
1063 if (BE (new_elems == NULL, 0))
2da42bc0 1064 return REG_ESPACE;
59e7ebcc
UD
1065 dest->elems = new_elems;
1066 dest->alloc = new_alloc;
3b0bdc72 1067 }
3b0bdc72 1068
59e7ebcc
UD
1069 /* Find the items in the intersection of SRC1 and SRC2, and copy
1070 into the top of DEST those that are not already in DEST itself. */
1071 sbase = dest->nelem + src1->nelem + src2->nelem;
1072 i1 = src1->nelem - 1;
1073 i2 = src2->nelem - 1;
1074 id = dest->nelem - 1;
1075 for (;;)
3b0bdc72 1076 {
59e7ebcc 1077 if (src1->elems[i1] == src2->elems[i2])
1b2c2628 1078 {
59e7ebcc
UD
1079 /* Try to find the item in DEST. Maybe we could binary search? */
1080 while (id >= 0 && dest->elems[id] > src1->elems[i1])
1081 --id;
1082
2da42bc0
UD
1083 if (id < 0 || dest->elems[id] != src1->elems[i1])
1084 dest->elems[--sbase] = src1->elems[i1];
59e7ebcc
UD
1085
1086 if (--i1 < 0 || --i2 < 0)
1087 break;
1b2c2628 1088 }
59e7ebcc
UD
1089
1090 /* Lower the highest of the two items. */
1091 else if (src1->elems[i1] < src2->elems[i2])
1b2c2628 1092 {
59e7ebcc
UD
1093 if (--i2 < 0)
1094 break;
1095 }
1096 else
1097 {
1098 if (--i1 < 0)
1099 break;
1b2c2628 1100 }
3b0bdc72 1101 }
59e7ebcc
UD
1102
1103 id = dest->nelem - 1;
1104 is = dest->nelem + src1->nelem + src2->nelem - 1;
1105 delta = is - sbase + 1;
1106
1107 /* Now copy. When DELTA becomes zero, the remaining
1108 DEST elements are already in place; this is more or
1109 less the same loop that is in re_node_set_merge. */
1110 dest->nelem += delta;
1111 if (delta > 0 && id >= 0)
1112 for (;;)
1113 {
2da42bc0
UD
1114 if (dest->elems[is] > dest->elems[id])
1115 {
1116 /* Copy from the top. */
1117 dest->elems[id + delta--] = dest->elems[is--];
1118 if (delta == 0)
1119 break;
1120 }
1121 else
1122 {
1123 /* Slide from the bottom. */
1124 dest->elems[id + delta] = dest->elems[id];
1125 if (--id < 0)
1126 break;
1127 }
59e7ebcc
UD
1128 }
1129
1130 /* Copy remaining SRC elements. */
1131 memcpy (dest->elems, dest->elems + sbase, delta * sizeof (int));
1132
3b0bdc72
UD
1133 return REG_NOERROR;
1134}
1135
a9388965
UD
1136/* Calculate the union set of the sets SRC1 and SRC2. And store it to
1137 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
1138
3b0bdc72 1139static reg_errcode_t
2da42bc0 1140internal_function __attribute_warn_unused_result__
e2f55264
UD
1141re_node_set_init_union (re_node_set *dest, const re_node_set *src1,
1142 const re_node_set *src2)
3b0bdc72
UD
1143{
1144 int i1, i2, id;
1145 if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
1146 {
1147 dest->alloc = src1->nelem + src2->nelem;
1148 dest->elems = re_malloc (int, dest->alloc);
bc15410e 1149 if (BE (dest->elems == NULL, 0))
1b2c2628 1150 return REG_ESPACE;
3b0bdc72
UD
1151 }
1152 else
1153 {
1154 if (src1 != NULL && src1->nelem > 0)
1b2c2628 1155 return re_node_set_init_copy (dest, src1);
3b0bdc72 1156 else if (src2 != NULL && src2->nelem > 0)
1b2c2628 1157 return re_node_set_init_copy (dest, src2);
3b0bdc72 1158 else
1b2c2628 1159 re_node_set_init_empty (dest);
3b0bdc72
UD
1160 return REG_NOERROR;
1161 }
1162 for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
1163 {
1164 if (src1->elems[i1] > src2->elems[i2])
1b2c2628
UD
1165 {
1166 dest->elems[id++] = src2->elems[i2++];
1167 continue;
1168 }
3b0bdc72 1169 if (src1->elems[i1] == src2->elems[i2])
1b2c2628 1170 ++i2;
3b0bdc72
UD
1171 dest->elems[id++] = src1->elems[i1++];
1172 }
1173 if (i1 < src1->nelem)
1174 {
1175 memcpy (dest->elems + id, src1->elems + i1,
1b2c2628 1176 (src1->nelem - i1) * sizeof (int));
3b0bdc72
UD
1177 id += src1->nelem - i1;
1178 }
1179 else if (i2 < src2->nelem)
1180 {
1181 memcpy (dest->elems + id, src2->elems + i2,
1b2c2628 1182 (src2->nelem - i2) * sizeof (int));
3b0bdc72
UD
1183 id += src2->nelem - i2;
1184 }
1185 dest->nelem = id;
1186 return REG_NOERROR;
1187}
1188
a9388965
UD
1189/* Calculate the union set of the sets DEST and SRC. And store it to
1190 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
1191
3b0bdc72 1192static reg_errcode_t
2da42bc0 1193internal_function __attribute_warn_unused_result__
e2f55264 1194re_node_set_merge (re_node_set *dest, const re_node_set *src)
3b0bdc72 1195{
59e7ebcc 1196 int is, id, sbase, delta;
3b0bdc72
UD
1197 if (src == NULL || src->nelem == 0)
1198 return REG_NOERROR;
59e7ebcc 1199 if (dest->alloc < 2 * src->nelem + dest->nelem)
3b0bdc72 1200 {
951d6408
UD
1201 int new_alloc = 2 * (src->nelem + dest->alloc);
1202 int *new_buffer = re_realloc (dest->elems, int, new_alloc);
1b2c2628
UD
1203 if (BE (new_buffer == NULL, 0))
1204 return REG_ESPACE;
1205 dest->elems = new_buffer;
951d6408 1206 dest->alloc = new_alloc;
3b0bdc72
UD
1207 }
1208
59e7ebcc 1209 if (BE (dest->nelem == 0, 0))
3b0bdc72 1210 {
59e7ebcc
UD
1211 dest->nelem = src->nelem;
1212 memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
1213 return REG_NOERROR;
1214 }
3b0bdc72 1215
59e7ebcc
UD
1216 /* Copy into the top of DEST the items of SRC that are not
1217 found in DEST. Maybe we could binary search in DEST? */
1218 for (sbase = dest->nelem + 2 * src->nelem,
1219 is = src->nelem - 1, id = dest->nelem - 1; is >= 0 && id >= 0; )
1220 {
1221 if (dest->elems[id] == src->elems[is])
2da42bc0 1222 is--, id--;
59e7ebcc 1223 else if (dest->elems[id] < src->elems[is])
2da42bc0 1224 dest->elems[--sbase] = src->elems[is--];
59e7ebcc 1225 else /* if (dest->elems[id] > src->elems[is]) */
2da42bc0 1226 --id;
59e7ebcc 1227 }
3b0bdc72 1228
59e7ebcc
UD
1229 if (is >= 0)
1230 {
1231 /* If DEST is exhausted, the remaining items of SRC must be unique. */
1232 sbase -= is + 1;
1233 memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (int));
3b0bdc72
UD
1234 }
1235
59e7ebcc
UD
1236 id = dest->nelem - 1;
1237 is = dest->nelem + 2 * src->nelem - 1;
1238 delta = is - sbase + 1;
1239 if (delta == 0)
1240 return REG_NOERROR;
1241
1242 /* Now copy. When DELTA becomes zero, the remaining
1243 DEST elements are already in place. */
1244 dest->nelem += delta;
1245 for (;;)
3b0bdc72 1246 {
59e7ebcc 1247 if (dest->elems[is] > dest->elems[id])
2da42bc0 1248 {
59e7ebcc 1249 /* Copy from the top. */
2da42bc0 1250 dest->elems[id + delta--] = dest->elems[is--];
59e7ebcc
UD
1251 if (delta == 0)
1252 break;
1253 }
1254 else
2da42bc0
UD
1255 {
1256 /* Slide from the bottom. */
1257 dest->elems[id + delta] = dest->elems[id];
59e7ebcc
UD
1258 if (--id < 0)
1259 {
1260 /* Copy remaining SRC elements. */
1261 memcpy (dest->elems, dest->elems + sbase,
2da42bc0 1262 delta * sizeof (int));
59e7ebcc
UD
1263 break;
1264 }
1265 }
3b0bdc72 1266 }
59e7ebcc 1267
3b0bdc72
UD
1268 return REG_NOERROR;
1269}
1270
1271/* Insert the new element ELEM to the re_node_set* SET.
8503c987 1272 SET should not already have ELEM.
3b0bdc72
UD
1273 return -1 if an error is occured, return 1 otherwise. */
1274
1275static int
2da42bc0 1276internal_function __attribute_warn_unused_result__
e2f55264 1277re_node_set_insert (re_node_set *set, int elem)
3b0bdc72 1278{
56b168be 1279 int idx;
8503c987
UD
1280 /* In case the set is empty. */
1281 if (set->alloc == 0)
3b0bdc72 1282 {
bc15410e 1283 if (BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1))
1b2c2628 1284 return 1;
3b0bdc72 1285 else
1b2c2628 1286 return -1;
3b0bdc72
UD
1287 }
1288
8503c987 1289 if (BE (set->nelem, 0) == 0)
3b0bdc72 1290 {
8503c987
UD
1291 /* We already guaranteed above that set->alloc != 0. */
1292 set->elems[0] = elem;
1293 ++set->nelem;
1294 return 1;
3b0bdc72
UD
1295 }
1296
1297 /* Realloc if we need. */
8503c987 1298 if (set->alloc == set->nelem)
3b0bdc72 1299 {
2d87db5b 1300 int *new_elems;
3b0bdc72 1301 set->alloc = set->alloc * 2;
2d87db5b
UD
1302 new_elems = re_realloc (set->elems, int, set->alloc);
1303 if (BE (new_elems == NULL, 0))
1b2c2628 1304 return -1;
2d87db5b 1305 set->elems = new_elems;
3b0bdc72 1306 }
8503c987
UD
1307
1308 /* Move the elements which follows the new element. Test the
1309 first element separately to skip a check in the inner loop. */
1310 if (elem < set->elems[0])
1311 {
1312 idx = 0;
59e7ebcc 1313 for (idx = set->nelem; idx > 0; idx--)
2da42bc0 1314 set->elems[idx] = set->elems[idx - 1];
8503c987 1315 }
3b0bdc72
UD
1316 else
1317 {
8503c987 1318 for (idx = set->nelem; set->elems[idx - 1] > elem; idx--)
2da42bc0 1319 set->elems[idx] = set->elems[idx - 1];
3b0bdc72 1320 }
8503c987 1321
3b0bdc72
UD
1322 /* Insert the new element. */
1323 set->elems[idx] = elem;
1324 ++set->nelem;
1325 return 1;
1326}
1327
20dc2f79
UD
1328/* Insert the new element ELEM to the re_node_set* SET.
1329 SET should not already have any element greater than or equal to ELEM.
1330 Return -1 if an error is occured, return 1 otherwise. */
1331
1332static int
2da42bc0 1333internal_function __attribute_warn_unused_result__
e2f55264 1334re_node_set_insert_last (re_node_set *set, int elem)
20dc2f79
UD
1335{
1336 /* Realloc if we need. */
1337 if (set->alloc == set->nelem)
1338 {
2d87db5b 1339 int *new_elems;
20dc2f79 1340 set->alloc = (set->alloc + 1) * 2;
2d87db5b
UD
1341 new_elems = re_realloc (set->elems, int, set->alloc);
1342 if (BE (new_elems == NULL, 0))
20dc2f79 1343 return -1;
2d87db5b 1344 set->elems = new_elems;
20dc2f79
UD
1345 }
1346
1347 /* Insert the new element. */
1348 set->elems[set->nelem++] = elem;
1349 return 1;
1350}
1351
3b0bdc72 1352/* Compare two node sets SET1 and SET2.
56b168be 1353 return 1 if SET1 and SET2 are equivalent, return 0 otherwise. */
3b0bdc72
UD
1354
1355static int
db26cb75 1356internal_function __attribute ((pure))
e2f55264 1357re_node_set_compare (const re_node_set *set1, const re_node_set *set2)
3b0bdc72
UD
1358{
1359 int i;
1360 if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
1361 return 0;
59e7ebcc 1362 for (i = set1->nelem ; --i >= 0 ; )
3b0bdc72
UD
1363 if (set1->elems[i] != set2->elems[i])
1364 return 0;
1365 return 1;
1366}
1367
0742e48e 1368/* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */
3b0bdc72
UD
1369
1370static int
db26cb75 1371internal_function __attribute ((pure))
e2f55264 1372re_node_set_contains (const re_node_set *set, int elem)
3b0bdc72 1373{
5cf1ec52 1374 unsigned int idx, right, mid;
3b0bdc72
UD
1375 if (set->nelem <= 0)
1376 return 0;
1377
1378 /* Binary search the element. */
1379 idx = 0;
1380 right = set->nelem - 1;
1381 while (idx < right)
1382 {
1383 mid = (idx + right) / 2;
1384 if (set->elems[mid] < elem)
1b2c2628 1385 idx = mid + 1;
3b0bdc72 1386 else
1b2c2628 1387 right = mid;
3b0bdc72 1388 }
0742e48e 1389 return set->elems[idx] == elem ? idx + 1 : 0;
3b0bdc72
UD
1390}
1391
1392static void
db26cb75 1393internal_function
e2f55264 1394re_node_set_remove_at (re_node_set *set, int idx)
3b0bdc72
UD
1395{
1396 if (idx < 0 || idx >= set->nelem)
1397 return;
3b0bdc72 1398 --set->nelem;
59e7ebcc
UD
1399 for (; idx < set->nelem; idx++)
1400 set->elems[idx] = set->elems[idx + 1];
3b0bdc72
UD
1401}
1402\f
1403
1404/* Add the token TOKEN to dfa->nodes, and return the index of the token.
1405 Or return -1, if an error will be occured. */
1406
1407static int
db26cb75 1408internal_function
e2f55264 1409re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
3b0bdc72 1410{
02f3550c 1411 int type = token.type;
18e3dc56 1412 if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
3b0bdc72 1413 {
01ed6ceb 1414 size_t new_nodes_alloc = dfa->nodes_alloc * 2;
02f3550c 1415 int *new_nexts, *new_indices;
963d8d78 1416 re_node_set *new_edests, *new_eclosures;
01ed6ceb
UD
1417 re_token_t *new_nodes;
1418
22364644
UD
1419 /* Avoid overflows in realloc. */
1420 const size_t max_object_size = MAX (sizeof (re_token_t),
1421 MAX (sizeof (re_node_set),
1422 sizeof (int)));
1423 if (BE (SIZE_MAX / max_object_size < new_nodes_alloc, 0))
01ed6ceb 1424 return -1;
02f3550c 1425
01ed6ceb 1426 new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc);
2d87db5b 1427 if (BE (new_nodes == NULL, 0))
0ecb606c 1428 return -1;
2d87db5b 1429 dfa->nodes = new_nodes;
02f3550c
UD
1430 new_nexts = re_realloc (dfa->nexts, int, new_nodes_alloc);
1431 new_indices = re_realloc (dfa->org_indices, int, new_nodes_alloc);
1432 new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
1433 new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
02f3550c 1434 if (BE (new_nexts == NULL || new_indices == NULL
963d8d78 1435 || new_edests == NULL || new_eclosures == NULL, 0))
02f3550c
UD
1436 return -1;
1437 dfa->nexts = new_nexts;
1438 dfa->org_indices = new_indices;
1439 dfa->edests = new_edests;
1440 dfa->eclosures = new_eclosures;
951d6408 1441 dfa->nodes_alloc = new_nodes_alloc;
3b0bdc72
UD
1442 }
1443 dfa->nodes[dfa->nodes_len] = token;
485d775d 1444 dfa->nodes[dfa->nodes_len].constraint = 0;
02f3550c
UD
1445#ifdef RE_ENABLE_I18N
1446 dfa->nodes[dfa->nodes_len].accept_mb =
1447 (type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET;
1448#endif
1449 dfa->nexts[dfa->nodes_len] = -1;
1450 re_node_set_init_empty (dfa->edests + dfa->nodes_len);
1451 re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
3b0bdc72
UD
1452 return dfa->nodes_len++;
1453}
1454
2d87db5b 1455static inline unsigned int
e2f55264
UD
1456internal_function
1457calc_state_hash (const re_node_set *nodes, unsigned int context)
3b0bdc72
UD
1458{
1459 unsigned int hash = nodes->nelem + context;
1460 int i;
1461 for (i = 0 ; i < nodes->nelem ; i++)
1462 hash += nodes->elems[i];
1463 return hash;
1464}
1465
1466/* Search for the state whose node_set is equivalent to NODES.
1467 Return the pointer to the state, if we found it in the DFA.
a9388965
UD
1468 Otherwise create the new one and return it. In case of an error
1469 return NULL and set the error code in ERR.
1470 Note: - We assume NULL as the invalid state, then it is possible that
1b2c2628
UD
1471 return value is NULL and ERR is REG_NOERROR.
1472 - We never return non-NULL value in case of any errors, it is for
1473 optimization. */
a9388965 1474
e2f55264 1475static re_dfastate_t *
2da42bc0 1476internal_function __attribute_warn_unused_result__
e2f55264
UD
1477re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa,
1478 const re_node_set *nodes)
3b0bdc72
UD
1479{
1480 unsigned int hash;
a9388965 1481 re_dfastate_t *new_state;
3b0bdc72
UD
1482 struct re_state_table_entry *spot;
1483 int i;
bc15410e 1484 if (BE (nodes->nelem == 0, 0))
a9388965
UD
1485 {
1486 *err = REG_NOERROR;
1487 return NULL;
1488 }
3b0bdc72
UD
1489 hash = calc_state_hash (nodes, 0);
1490 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1491
bc15410e 1492 for (i = 0 ; i < spot->num ; i++)
3b0bdc72 1493 {
bc15410e
UD
1494 re_dfastate_t *state = spot->array[i];
1495 if (hash != state->hash)
1b2c2628 1496 continue;
bc15410e 1497 if (re_node_set_compare (&state->nodes, nodes))
1b2c2628 1498 return state;
3b0bdc72 1499 }
3b0bdc72
UD
1500
1501 /* There are no appropriate state in the dfa, create the new one. */
a9388965 1502 new_state = create_ci_newstate (dfa, nodes, hash);
2d87db5b
UD
1503 if (BE (new_state == NULL, 0))
1504 *err = REG_ESPACE;
1505
1506 return new_state;
3b0bdc72
UD
1507}
1508
1509/* Search for the state whose node_set is equivalent to NODES and
1510 whose context is equivalent to CONTEXT.
1511 Return the pointer to the state, if we found it in the DFA.
a9388965
UD
1512 Otherwise create the new one and return it. In case of an error
1513 return NULL and set the error code in ERR.
1514 Note: - We assume NULL as the invalid state, then it is possible that
1b2c2628
UD
1515 return value is NULL and ERR is REG_NOERROR.
1516 - We never return non-NULL value in case of any errors, it is for
1517 optimization. */
a9388965 1518
db26cb75 1519static re_dfastate_t *
2da42bc0 1520internal_function __attribute_warn_unused_result__
e2f55264
UD
1521re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa,
1522 const re_node_set *nodes, unsigned int context)
3b0bdc72
UD
1523{
1524 unsigned int hash;
a9388965 1525 re_dfastate_t *new_state;
3b0bdc72
UD
1526 struct re_state_table_entry *spot;
1527 int i;
1528 if (nodes->nelem == 0)
a9388965
UD
1529 {
1530 *err = REG_NOERROR;
1531 return NULL;
1532 }
3b0bdc72
UD
1533 hash = calc_state_hash (nodes, context);
1534 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1535
bc15410e 1536 for (i = 0 ; i < spot->num ; i++)
3b0bdc72 1537 {
bc15410e 1538 re_dfastate_t *state = spot->array[i];
a0a8461c
UD
1539 if (state->hash == hash
1540 && state->context == context
1541 && re_node_set_compare (state->entrance_nodes, nodes))
1b2c2628 1542 return state;
3b0bdc72 1543 }
3b0bdc72 1544 /* There are no appropriate state in `dfa', create the new one. */
a9388965 1545 new_state = create_cd_newstate (dfa, nodes, context, hash);
2d87db5b
UD
1546 if (BE (new_state == NULL, 0))
1547 *err = REG_ESPACE;
1548
1549 return new_state;
3b0bdc72
UD
1550}
1551
5cf1ec52
UD
1552/* Finish initialization of the new state NEWSTATE, and using its hash value
1553 HASH put in the appropriate bucket of DFA's state table. Return value
1554 indicates the error code if failed. */
a9388965 1555
5cf1ec52 1556static reg_errcode_t
2da42bc0 1557__attribute_warn_unused_result__
e2f55264
UD
1558register_state (const re_dfa_t *dfa, re_dfastate_t *newstate,
1559 unsigned int hash)
3b0bdc72 1560{
5cf1ec52 1561 struct re_state_table_entry *spot;
1b2c2628 1562 reg_errcode_t err;
5cf1ec52
UD
1563 int i;
1564
1565 newstate->hash = hash;
1566 err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem);
1b2c2628 1567 if (BE (err != REG_NOERROR, 0))
5cf1ec52
UD
1568 return REG_ESPACE;
1569 for (i = 0; i < newstate->nodes.nelem; i++)
1b2c2628 1570 {
5cf1ec52
UD
1571 int elem = newstate->nodes.elems[i];
1572 if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
2da42bc0
UD
1573 if (re_node_set_insert_last (&newstate->non_eps_nodes, elem) < 0)
1574 return REG_ESPACE;
1b2c2628 1575 }
3b0bdc72 1576
3b0bdc72 1577 spot = dfa->state_table + (hash & dfa->state_hash_mask);
951d6408 1578 if (BE (spot->alloc <= spot->num, 0))
3b0bdc72 1579 {
951d6408
UD
1580 int new_alloc = 2 * spot->num + 2;
1581 re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *,
1582 new_alloc);
1b2c2628
UD
1583 if (BE (new_array == NULL, 0))
1584 return REG_ESPACE;
1585 spot->array = new_array;
951d6408 1586 spot->alloc = new_alloc;
3b0bdc72 1587 }
bc15410e 1588 spot->array[spot->num++] = newstate;
a9388965 1589 return REG_NOERROR;
3b0bdc72
UD
1590}
1591
db26cb75
UD
1592static void
1593free_state (re_dfastate_t *state)
1594{
1595 re_node_set_free (&state->non_eps_nodes);
1596 re_node_set_free (&state->inveclosure);
1597 if (state->entrance_nodes != &state->nodes)
1598 {
1599 re_node_set_free (state->entrance_nodes);
1600 re_free (state->entrance_nodes);
1601 }
1602 re_node_set_free (&state->nodes);
1603 re_free (state->word_trtable);
1604 re_free (state->trtable);
1605 re_free (state);
1606}
1607
a9388965
UD
1608/* Create the new state which is independ of contexts.
1609 Return the new state if succeeded, otherwise return NULL. */
1610
3b0bdc72 1611static re_dfastate_t *
2da42bc0 1612internal_function __attribute_warn_unused_result__
e2f55264
UD
1613create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
1614 unsigned int hash)
3b0bdc72
UD
1615{
1616 int i;
a9388965 1617 reg_errcode_t err;
3b0bdc72 1618 re_dfastate_t *newstate;
5cf1ec52
UD
1619
1620 newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
bc15410e 1621 if (BE (newstate == NULL, 0))
a9388965 1622 return NULL;
5cf1ec52
UD
1623 err = re_node_set_init_copy (&newstate->nodes, nodes);
1624 if (BE (err != REG_NOERROR, 0))
1625 {
1626 re_free (newstate);
1627 return NULL;
1628 }
3b0bdc72 1629
5cf1ec52 1630 newstate->entrance_nodes = &newstate->nodes;
3b0bdc72
UD
1631 for (i = 0 ; i < nodes->nelem ; i++)
1632 {
1633 re_token_t *node = dfa->nodes + nodes->elems[i];
1634 re_token_type_t type = node->type;
485d775d 1635 if (type == CHARACTER && !node->constraint)
1b2c2628 1636 continue;
02f3550c
UD
1637#ifdef RE_ENABLE_I18N
1638 newstate->accept_mb |= node->accept_mb;
1639#endif /* RE_ENABLE_I18N */
3b0bdc72
UD
1640
1641 /* If the state has the halt node, the state is a halt state. */
02f3550c 1642 if (type == END_OF_RE)
1b2c2628 1643 newstate->halt = 1;
3b0bdc72 1644 else if (type == OP_BACK_REF)
1b2c2628 1645 newstate->has_backref = 1;
485d775d 1646 else if (type == ANCHOR || node->constraint)
1b2c2628 1647 newstate->has_constraint = 1;
3b0bdc72 1648 }
a9388965 1649 err = register_state (dfa, newstate, hash);
1b2c2628
UD
1650 if (BE (err != REG_NOERROR, 0))
1651 {
1652 free_state (newstate);
1653 newstate = NULL;
1654 }
1655 return newstate;
3b0bdc72
UD
1656}
1657
a9388965
UD
1658/* Create the new state which is depend on the context CONTEXT.
1659 Return the new state if succeeded, otherwise return NULL. */
1660
3b0bdc72 1661static re_dfastate_t *
2da42bc0 1662internal_function __attribute_warn_unused_result__
e2f55264
UD
1663create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
1664 unsigned int context, unsigned int hash)
3b0bdc72
UD
1665{
1666 int i, nctx_nodes = 0;
a9388965 1667 reg_errcode_t err;
3b0bdc72
UD
1668 re_dfastate_t *newstate;
1669
5cf1ec52 1670 newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
bc15410e 1671 if (BE (newstate == NULL, 0))
a9388965 1672 return NULL;
5cf1ec52
UD
1673 err = re_node_set_init_copy (&newstate->nodes, nodes);
1674 if (BE (err != REG_NOERROR, 0))
1675 {
1676 re_free (newstate);
1677 return NULL;
1678 }
1679
3b0bdc72
UD
1680 newstate->context = context;
1681 newstate->entrance_nodes = &newstate->nodes;
1682
1683 for (i = 0 ; i < nodes->nelem ; i++)
1684 {
3b0bdc72
UD
1685 re_token_t *node = dfa->nodes + nodes->elems[i];
1686 re_token_type_t type = node->type;
0caca71a 1687 unsigned int constraint = node->constraint;
3b0bdc72 1688
485d775d 1689 if (type == CHARACTER && !constraint)
1b2c2628 1690 continue;
a334319f 1691#ifdef RE_ENABLE_I18N
02f3550c 1692 newstate->accept_mb |= node->accept_mb;
a334319f 1693#endif /* RE_ENABLE_I18N */
02f3550c
UD
1694
1695 /* If the state has the halt node, the state is a halt state. */
1696 if (type == END_OF_RE)
1697 newstate->halt = 1;
3b0bdc72 1698 else if (type == OP_BACK_REF)
1b2c2628 1699 newstate->has_backref = 1;
3b0bdc72
UD
1700
1701 if (constraint)
1b2c2628
UD
1702 {
1703 if (newstate->entrance_nodes == &newstate->nodes)
1704 {
1705 newstate->entrance_nodes = re_malloc (re_node_set, 1);
bc15410e 1706 if (BE (newstate->entrance_nodes == NULL, 0))
1b2c2628
UD
1707 {
1708 free_state (newstate);
1709 return NULL;
1710 }
2da42bc0
UD
1711 if (re_node_set_init_copy (newstate->entrance_nodes, nodes)
1712 != REG_NOERROR)
1713 return NULL;
1b2c2628
UD
1714 nctx_nodes = 0;
1715 newstate->has_constraint = 1;
1716 }
1717
1718 if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
1719 {
1720 re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
1721 ++nctx_nodes;
1722 }
1723 }
3b0bdc72 1724 }
a9388965 1725 err = register_state (dfa, newstate, hash);
1b2c2628
UD
1726 if (BE (err != REG_NOERROR, 0))
1727 {
1728 free_state (newstate);
1729 newstate = NULL;
1730 }
1731 return newstate;
1732}