]> git.ipfire.org Git - thirdparty/glibc.git/blame - posix/regex_internal.c
Update.
[thirdparty/glibc.git] / posix / regex_internal.c
CommitLineData
3b0bdc72 1/* Extended regular expression matching and search library.
54e1cabc 2 Copyright (C) 2002, 2003 Free Software Foundation, Inc.
3b0bdc72
UD
3 This file is part of the GNU C Library.
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA. */
20
92b27c74 21static void re_string_construct_common (const char *str, int len,
1b2c2628 22 re_string_t *pstr,
3c0fb574
UD
23 RE_TRANSLATE_TYPE trans, int icase,
24 int mb_cur_max, int is_utf8);
434d3784 25#ifdef RE_ENABLE_I18N
1843975c
RM
26static int re_string_skip_chars (re_string_t *pstr, int new_raw_idx,
27 wint_t *last_wc);
434d3784 28#endif /* RE_ENABLE_I18N */
3b0bdc72 29static re_dfastate_t *create_newstate_common (re_dfa_t *dfa,
1b2c2628
UD
30 const re_node_set *nodes,
31 unsigned int hash);
a9388965 32static reg_errcode_t register_state (re_dfa_t *dfa, re_dfastate_t *newstate,
1b2c2628 33 unsigned int hash);
3b0bdc72 34static re_dfastate_t *create_ci_newstate (re_dfa_t *dfa,
1b2c2628
UD
35 const re_node_set *nodes,
36 unsigned int hash);
3b0bdc72 37static re_dfastate_t *create_cd_newstate (re_dfa_t *dfa,
1b2c2628
UD
38 const re_node_set *nodes,
39 unsigned int context,
40 unsigned int hash);
3b0bdc72 41static unsigned int inline calc_state_hash (const re_node_set *nodes,
1b2c2628 42 unsigned int context);
3b0bdc72
UD
43\f
44/* Functions for string operation. */
45
612546c6
UD
46/* This function allocate the buffers. It is necessary to call
47 re_string_reconstruct before using the object. */
48
3b0bdc72 49static reg_errcode_t
3c0fb574
UD
50re_string_allocate (pstr, str, len, init_len, trans, icase,
51 mb_cur_max, is_utf8)
3b0bdc72 52 re_string_t *pstr;
92b27c74 53 const char *str;
3c0fb574 54 int len, init_len, icase, mb_cur_max, is_utf8;
3b0bdc72
UD
55 RE_TRANSLATE_TYPE trans;
56{
57 reg_errcode_t ret;
612546c6 58 int init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
3c0fb574
UD
59 re_string_construct_common (str, len, pstr, trans, icase,
60 mb_cur_max, is_utf8);
ac3d553b 61 pstr->stop = pstr->len;
612546c6
UD
62
63 ret = re_string_realloc_buffers (pstr, init_buf_len);
64 if (BE (ret != REG_NOERROR, 0))
65 return ret;
66
c202c2c5 67 pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case
1b2c2628 68 : (unsigned char *) str);
612546c6
UD
69 pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case;
70 pstr->valid_len = (MBS_CASE_ALLOCATED (pstr) || MBS_ALLOCATED (pstr)
3c0fb574 71 || mb_cur_max > 1) ? pstr->valid_len : len;
612546c6
UD
72 return REG_NOERROR;
73}
74
75/* This function allocate the buffers, and initialize them. */
76
77static reg_errcode_t
3c0fb574 78re_string_construct (pstr, str, len, trans, icase, mb_cur_max, is_utf8)
612546c6 79 re_string_t *pstr;
92b27c74 80 const char *str;
3c0fb574 81 int len, icase, mb_cur_max, is_utf8;
612546c6
UD
82 RE_TRANSLATE_TYPE trans;
83{
84 reg_errcode_t ret;
3c0fb574
UD
85 re_string_construct_common (str, len, pstr, trans, icase,
86 mb_cur_max, is_utf8);
ac3d553b 87 pstr->stop = pstr->len;
612546c6
UD
88 /* Set 0 so that this function can initialize whole buffers. */
89 pstr->valid_len = 0;
90
91 if (len > 0)
3b0bdc72 92 {
612546c6 93 ret = re_string_realloc_buffers (pstr, len + 1);
bc15410e 94 if (BE (ret != REG_NOERROR, 0))
1b2c2628 95 return ret;
3b0bdc72 96 }
c202c2c5 97 pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case
1b2c2628 98 : (unsigned char *) str);
612546c6
UD
99 pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case;
100
101 if (icase)
102 {
103#ifdef RE_ENABLE_I18N
3c0fb574 104 if (mb_cur_max > 1)
1b2c2628 105 build_wcs_upper_buffer (pstr);
612546c6 106 else
3b0bdc72 107#endif /* RE_ENABLE_I18N */
1b2c2628 108 build_upper_buffer (pstr);
612546c6
UD
109 }
110 else
3b0bdc72 111 {
612546c6 112#ifdef RE_ENABLE_I18N
3c0fb574 113 if (mb_cur_max > 1)
1b2c2628 114 build_wcs_buffer (pstr);
612546c6
UD
115 else
116#endif /* RE_ENABLE_I18N */
1b2c2628
UD
117 {
118 if (trans != NULL)
119 re_string_translate_buffer (pstr);
120 else
121 pstr->valid_len = len;
122 }
3b0bdc72 123 }
612546c6
UD
124
125 /* Initialized whole buffers, then valid_len == bufs_len. */
126 pstr->valid_len = pstr->bufs_len;
3b0bdc72
UD
127 return REG_NOERROR;
128}
129
612546c6 130/* Helper functions for re_string_allocate, and re_string_construct. */
3b0bdc72
UD
131
132static reg_errcode_t
612546c6 133re_string_realloc_buffers (pstr, new_buf_len)
3b0bdc72 134 re_string_t *pstr;
612546c6 135 int new_buf_len;
3b0bdc72 136{
3b0bdc72 137#ifdef RE_ENABLE_I18N
3c0fb574 138 if (pstr->mb_cur_max > 1)
3b0bdc72 139 {
1b2c2628
UD
140 wint_t *new_array = re_realloc (pstr->wcs, wint_t, new_buf_len);
141 if (BE (new_array == NULL, 0))
142 return REG_ESPACE;
143 pstr->wcs = new_array;
3b0bdc72 144 }
3b0bdc72 145#endif /* RE_ENABLE_I18N */
612546c6 146 if (MBS_ALLOCATED (pstr))
3b0bdc72 147 {
1b2c2628
UD
148 unsigned char *new_array = re_realloc (pstr->mbs, unsigned char,
149 new_buf_len);
150 if (BE (new_array == NULL, 0))
151 return REG_ESPACE;
152 pstr->mbs = new_array;
3b0bdc72 153 }
612546c6 154 if (MBS_CASE_ALLOCATED (pstr))
3b0bdc72 155 {
1b2c2628
UD
156 unsigned char *new_array = re_realloc (pstr->mbs_case, unsigned char,
157 new_buf_len);
158 if (BE (new_array == NULL, 0))
159 return REG_ESPACE;
160 pstr->mbs_case = new_array;
612546c6 161 if (!MBS_ALLOCATED (pstr))
1b2c2628 162 pstr->mbs = pstr->mbs_case;
3b0bdc72 163 }
612546c6 164 pstr->bufs_len = new_buf_len;
3b0bdc72
UD
165 return REG_NOERROR;
166}
167
612546c6 168
3b0bdc72 169static void
3c0fb574 170re_string_construct_common (str, len, pstr, trans, icase, mb_cur_max, is_utf8)
92b27c74 171 const char *str;
3b0bdc72
UD
172 int len;
173 re_string_t *pstr;
612546c6 174 RE_TRANSLATE_TYPE trans;
3c0fb574 175 int icase, mb_cur_max, is_utf8;
3b0bdc72 176{
612546c6 177 memset (pstr, '\0', sizeof (re_string_t));
c202c2c5 178 pstr->raw_mbs = (const unsigned char *) str;
3b0bdc72 179 pstr->len = len;
612546c6
UD
180 pstr->trans = trans;
181 pstr->icase = icase ? 1 : 0;
3c0fb574
UD
182 pstr->mb_cur_max = mb_cur_max;
183 pstr->is_utf8 = is_utf8;
3b0bdc72
UD
184}
185
186#ifdef RE_ENABLE_I18N
187
612546c6 188/* Build wide character buffer PSTR->WCS.
3b0bdc72
UD
189 If the byte sequence of the string are:
190 <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
191 Then wide character buffer will be:
192 <wc1> , WEOF , <wc2> , WEOF , <wc3>
193 We use WEOF for padding, they indicate that the position isn't
612546c6 194 a first byte of a multibyte character.
3b0bdc72 195
612546c6
UD
196 Note that this function assumes PSTR->VALID_LEN elements are already
197 built and starts from PSTR->VALID_LEN. */
198
199static void
3b0bdc72
UD
200build_wcs_buffer (pstr)
201 re_string_t *pstr;
202{
612546c6
UD
203 mbstate_t prev_st;
204 int byte_idx, end_idx, mbclen, remain_len;
205 /* Build the buffers from pstr->valid_len to either pstr->len or
206 pstr->bufs_len. */
207 end_idx = (pstr->bufs_len > pstr->len)? pstr->len : pstr->bufs_len;
208 for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
209 {
210 wchar_t wc;
211 remain_len = end_idx - byte_idx;
212 prev_st = pstr->cur_state;
c202c2c5 213 mbclen = mbrtowc (&wc, ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
1b2c2628 214 + byte_idx), remain_len, &pstr->cur_state);
612546c6 215 if (BE (mbclen == (size_t) -2, 0))
1b2c2628
UD
216 {
217 /* The buffer doesn't have enough space, finish to build. */
218 pstr->cur_state = prev_st;
219 break;
220 }
612546c6 221 else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))
1b2c2628
UD
222 {
223 /* We treat these cases as a singlebyte character. */
224 mbclen = 1;
225 wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
226 pstr->cur_state = prev_st;
227 }
612546c6 228
74e12fbc 229 /* Apply the translation if we need. */
612546c6 230 if (pstr->trans != NULL && mbclen == 1)
1b2c2628
UD
231 {
232 int ch = pstr->trans[pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]];
233 pstr->mbs_case[byte_idx] = ch;
234 }
3b0bdc72 235 /* Write wide character and padding. */
612546c6
UD
236 pstr->wcs[byte_idx++] = wc;
237 /* Write paddings. */
238 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
1b2c2628 239 pstr->wcs[byte_idx++] = WEOF;
3b0bdc72 240 }
612546c6 241 pstr->valid_len = byte_idx;
3b0bdc72
UD
242}
243
612546c6
UD
244/* Build wide character buffer PSTR->WCS like build_wcs_buffer,
245 but for REG_ICASE. */
246
247static void
3b0bdc72
UD
248build_wcs_upper_buffer (pstr)
249 re_string_t *pstr;
250{
612546c6
UD
251 mbstate_t prev_st;
252 int byte_idx, end_idx, mbclen, remain_len;
253 /* Build the buffers from pstr->valid_len to either pstr->len or
254 pstr->bufs_len. */
255 end_idx = (pstr->bufs_len > pstr->len)? pstr->len : pstr->bufs_len;
256 for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
257 {
258 wchar_t wc;
259 remain_len = end_idx - byte_idx;
260 prev_st = pstr->cur_state;
c202c2c5 261 mbclen = mbrtowc (&wc, ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
1b2c2628 262 + byte_idx), remain_len, &pstr->cur_state);
612546c6 263 if (BE (mbclen == (size_t) -2, 0))
1b2c2628
UD
264 {
265 /* The buffer doesn't have enough space, finish to build. */
266 pstr->cur_state = prev_st;
267 break;
268 }
612546c6 269 else if (mbclen == 1 || mbclen == (size_t) -1 || mbclen == 0)
1b2c2628
UD
270 {
271 /* In case of a singlebyte character. */
272 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
74e12fbc 273 /* Apply the translation if we need. */
1b2c2628
UD
274 if (pstr->trans != NULL && mbclen == 1)
275 {
276 ch = pstr->trans[ch];
277 pstr->mbs_case[byte_idx] = ch;
278 }
74e12fbc 279 pstr->wcs[byte_idx] = iswlower (wc) ? towupper (wc) : wc;
1b2c2628
UD
280 pstr->mbs[byte_idx++] = islower (ch) ? toupper (ch) : ch;
281 if (BE (mbclen == (size_t) -1, 0))
282 pstr->cur_state = prev_st;
283 }
3b0bdc72 284 else /* mbclen > 1 */
1b2c2628
UD
285 {
286 if (iswlower (wc))
287 wcrtomb ((char *) pstr->mbs + byte_idx, towupper (wc), &prev_st);
288 else
289 memcpy (pstr->mbs + byte_idx,
290 pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
74e12fbc 291 pstr->wcs[byte_idx++] = iswlower (wc) ? towupper (wc) : wc;
1b2c2628
UD
292 /* Write paddings. */
293 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
294 pstr->wcs[byte_idx++] = WEOF;
295 }
3b0bdc72 296 }
612546c6
UD
297 pstr->valid_len = byte_idx;
298}
299
300/* Skip characters until the index becomes greater than NEW_RAW_IDX.
301 Return the index. */
302
303static int
1843975c 304re_string_skip_chars (pstr, new_raw_idx, last_wc)
612546c6
UD
305 re_string_t *pstr;
306 int new_raw_idx;
1843975c 307 wint_t *last_wc;
612546c6
UD
308{
309 mbstate_t prev_st;
310 int rawbuf_idx, mbclen;
1843975c 311 wchar_t wc = 0;
612546c6
UD
312
313 /* Skip the characters which are not necessary to check. */
314 for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_len;
315 rawbuf_idx < new_raw_idx;)
316 {
1843975c
RM
317 int remain_len;
318 remain_len = pstr->len - rawbuf_idx;
612546c6 319 prev_st = pstr->cur_state;
1843975c
RM
320 mbclen = mbrtowc (&wc, (const char *) pstr->raw_mbs + rawbuf_idx,
321 remain_len, &pstr->cur_state);
612546c6 322 if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
1b2c2628
UD
323 {
324 /* We treat these cases as a singlebyte character. */
325 mbclen = 1;
326 pstr->cur_state = prev_st;
327 }
612546c6
UD
328 /* Then proceed the next character. */
329 rawbuf_idx += mbclen;
330 }
1843975c 331 *last_wc = (wint_t) wc;
612546c6 332 return rawbuf_idx;
3b0bdc72
UD
333}
334#endif /* RE_ENABLE_I18N */
335
1b2c2628 336/* Build the buffer PSTR->MBS, and apply the translation if we need.
612546c6
UD
337 This function is used in case of REG_ICASE. */
338
339static void
3b0bdc72
UD
340build_upper_buffer (pstr)
341 re_string_t *pstr;
342{
612546c6
UD
343 int char_idx, end_idx;
344 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
3b0bdc72 345
612546c6 346 for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
3b0bdc72 347 {
612546c6
UD
348 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
349 if (pstr->trans != NULL)
1b2c2628 350 {
74e12fbc 351 ch = pstr->trans[ch];
1b2c2628
UD
352 pstr->mbs_case[char_idx] = ch;
353 }
612546c6 354 if (islower (ch))
1b2c2628 355 pstr->mbs[char_idx] = toupper (ch);
3b0bdc72 356 else
1b2c2628 357 pstr->mbs[char_idx] = ch;
3b0bdc72 358 }
612546c6 359 pstr->valid_len = char_idx;
3b0bdc72
UD
360}
361
612546c6 362/* Apply TRANS to the buffer in PSTR. */
3b0bdc72 363
612546c6
UD
364static void
365re_string_translate_buffer (pstr)
3b0bdc72 366 re_string_t *pstr;
3b0bdc72 367{
612546c6
UD
368 int buf_idx, end_idx;
369 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
370
371 for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
3b0bdc72 372 {
612546c6
UD
373 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
374 pstr->mbs_case[buf_idx] = pstr->trans[ch];
3b0bdc72 375 }
612546c6
UD
376
377 pstr->valid_len = buf_idx;
378}
379
380/* This function re-construct the buffers.
3c0fb574 381 Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
612546c6
UD
382 convert to upper case in case of REG_ICASE, apply translation. */
383
384static reg_errcode_t
385re_string_reconstruct (pstr, idx, eflags, newline)
386 re_string_t *pstr;
387 int idx, eflags, newline;
388{
389 int offset = idx - pstr->raw_mbs_idx;
390 if (offset < 0)
391 {
392 /* Reset buffer. */
434d3784 393#ifdef RE_ENABLE_I18N
3c0fb574 394 if (pstr->mb_cur_max > 1)
1b2c2628 395 memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
434d3784 396#endif /* RE_ENABLE_I18N */
dd385d7c
UD
397 pstr->len += pstr->raw_mbs_idx;
398 pstr->stop += pstr->raw_mbs_idx;
612546c6
UD
399 pstr->valid_len = pstr->raw_mbs_idx = 0;
400 pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
1b2c2628 401 : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
612546c6 402 if (!MBS_CASE_ALLOCATED (pstr))
1b2c2628 403 pstr->mbs_case = (unsigned char *) pstr->raw_mbs;
612546c6 404 if (!MBS_ALLOCATED (pstr) && !MBS_CASE_ALLOCATED (pstr))
1b2c2628 405 pstr->mbs = (unsigned char *) pstr->raw_mbs;
612546c6
UD
406 offset = idx;
407 }
408
409 if (offset != 0)
410 {
612546c6
UD
411 /* Are the characters which are already checked remain? */
412 if (offset < pstr->valid_len)
1b2c2628
UD
413 {
414 /* Yes, move them to the front of the buffer. */
1843975c
RM
415 pstr->tip_context = re_string_context_at (pstr, offset - 1, eflags,
416 newline);
3b0bdc72 417#ifdef RE_ENABLE_I18N
3c0fb574 418 if (pstr->mb_cur_max > 1)
1b2c2628
UD
419 memmove (pstr->wcs, pstr->wcs + offset,
420 (pstr->valid_len - offset) * sizeof (wint_t));
612546c6 421#endif /* RE_ENABLE_I18N */
1b2c2628
UD
422 if (MBS_ALLOCATED (pstr))
423 memmove (pstr->mbs, pstr->mbs + offset,
424 pstr->valid_len - offset);
425 if (MBS_CASE_ALLOCATED (pstr))
426 memmove (pstr->mbs_case, pstr->mbs_case + offset,
427 pstr->valid_len - offset);
428 pstr->valid_len -= offset;
612546c6 429#if DEBUG
1b2c2628 430 assert (pstr->valid_len > 0);
3b0bdc72 431#endif
1b2c2628 432 }
612546c6 433 else
1b2c2628
UD
434 {
435 /* No, skip all characters until IDX. */
436 pstr->valid_len = 0;
3b0bdc72 437#ifdef RE_ENABLE_I18N
3c0fb574 438 if (pstr->mb_cur_max > 1)
1b2c2628
UD
439 {
440 int wcs_idx;
3c0fb574
UD
441 wint_t wc = WEOF;
442
443 if (pstr->is_utf8)
444 {
445 const unsigned char *raw, *p, *end;
446
447 /* Special case UTF-8. Multi-byte chars start with any
448 byte other than 0x80 - 0xbf. */
449 raw = pstr->raw_mbs + pstr->raw_mbs_idx;
450 end = raw + (pstr->valid_len > offset - pstr->mb_cur_max
451 ? pstr->valid_len : offset - pstr->mb_cur_max);
452 for (p = raw + offset - 1; p >= end; --p)
453 if ((*p & 0xc0) != 0x80)
454 {
455 mbstate_t cur_state;
456 wchar_t wc2;
457
458 memset (&cur_state, 0, sizeof (cur_state));
459 if (mbrtowc (&wc2, p, raw + offset - p, &cur_state)
460 == raw + offset - p)
461 {
462 memset (&pstr->cur_state, '\0',
463 sizeof (mbstate_t));
464 wc = wc2;
465 }
466 break;
467 }
468 }
469 if (wc == WEOF)
470 {
471 pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
472 for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
473 pstr->wcs[wcs_idx] = WEOF;
474 }
1843975c
RM
475 if (pstr->trans && wc <= 0xff)
476 wc = pstr->trans[wc];
477 pstr->tip_context = (IS_WIDE_WORD_CHAR (wc) ? CONTEXT_WORD
478 : ((newline && IS_WIDE_NEWLINE (wc))
479 ? CONTEXT_NEWLINE : 0));
1b2c2628 480 }
1843975c 481 else
612546c6 482#endif /* RE_ENABLE_I18N */
1843975c
RM
483 {
484 int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
485 if (pstr->trans)
486 c = pstr->trans[c];
487 pstr->tip_context = (IS_WORD_CHAR (c) ? CONTEXT_WORD
488 : ((newline && IS_NEWLINE (c))
489 ? CONTEXT_NEWLINE : 0));
490 }
1b2c2628 491 }
612546c6 492 if (!MBS_CASE_ALLOCATED (pstr))
1b2c2628
UD
493 {
494 pstr->mbs_case += offset;
495 /* In case of !MBS_ALLOCATED && !MBS_CASE_ALLOCATED. */
496 if (!MBS_ALLOCATED (pstr))
497 pstr->mbs += offset;
498 }
3b0bdc72 499 }
612546c6
UD
500 pstr->raw_mbs_idx = idx;
501 pstr->len -= offset;
ac3d553b 502 pstr->stop -= offset;
612546c6
UD
503
504 /* Then build the buffers. */
505#ifdef RE_ENABLE_I18N
3c0fb574 506 if (pstr->mb_cur_max > 1)
3b0bdc72 507 {
612546c6 508 if (pstr->icase)
1b2c2628 509 build_wcs_upper_buffer (pstr);
612546c6 510 else
1b2c2628 511 build_wcs_buffer (pstr);
3b0bdc72
UD
512 }
513 else
612546c6 514#endif /* RE_ENABLE_I18N */
3b0bdc72 515 {
612546c6 516 if (pstr->icase)
1b2c2628 517 build_upper_buffer (pstr);
612546c6 518 else if (pstr->trans != NULL)
1b2c2628 519 re_string_translate_buffer (pstr);
3b0bdc72 520 }
612546c6
UD
521 pstr->cur_idx = 0;
522
3b0bdc72
UD
523 return REG_NOERROR;
524}
525
526static void
527re_string_destruct (pstr)
528 re_string_t *pstr;
529{
530#ifdef RE_ENABLE_I18N
531 re_free (pstr->wcs);
532#endif /* RE_ENABLE_I18N */
612546c6
UD
533 if (MBS_ALLOCATED (pstr))
534 re_free (pstr->mbs);
535 if (MBS_CASE_ALLOCATED (pstr))
536 re_free (pstr->mbs_case);
3b0bdc72
UD
537}
538
539/* Return the context at IDX in INPUT. */
612546c6 540
3b0bdc72
UD
541static unsigned int
542re_string_context_at (input, idx, eflags, newline_anchor)
543 const re_string_t *input;
544 int idx, eflags, newline_anchor;
545{
546 int c;
547 if (idx < 0 || idx == input->len)
548 {
3b0bdc72 549 if (idx < 0)
1b2c2628
UD
550 /* In this case, we use the value stored in input->tip_context,
551 since we can't know the character in input->mbs[-1] here. */
552 return input->tip_context;
bc15410e 553 else /* (idx == input->len) */
1b2c2628
UD
554 return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
555 : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
3b0bdc72 556 }
a0756231 557#ifdef RE_ENABLE_I18N
3c0fb574 558 if (input->mb_cur_max > 1)
1843975c
RM
559 {
560 wint_t wc;
561 int wc_idx = idx;
562 while(input->wcs[wc_idx] == WEOF)
563 {
564#ifdef DEBUG
565 /* It must not happen. */
566 assert (wc_idx >= 0);
567#endif
568 --wc_idx;
569 if (wc_idx < 0)
570 return input->tip_context;
571 }
572 wc = input->wcs[wc_idx];
573 if (IS_WIDE_WORD_CHAR (wc))
574 return CONTEXT_WORD;
575 return (newline_anchor && IS_WIDE_NEWLINE (wc)) ? CONTEXT_NEWLINE : 0;
576 }
a0756231
RM
577 else
578#endif
579 {
580 c = re_string_byte_at (input, idx);
581 if (IS_WORD_CHAR (c))
582 return CONTEXT_WORD;
583 return (newline_anchor && IS_NEWLINE (c)) ? CONTEXT_NEWLINE : 0;
584 }
3b0bdc72
UD
585}
586\f
587/* Functions for set operation. */
588
589static reg_errcode_t
590re_node_set_alloc (set, size)
591 re_node_set *set;
592 int size;
593{
594 set->alloc = size;
595 set->nelem = 0;
596 set->elems = re_malloc (int, size);
bc15410e 597 if (BE (set->elems == NULL, 0))
3b0bdc72
UD
598 return REG_ESPACE;
599 return REG_NOERROR;
600}
601
602static reg_errcode_t
603re_node_set_init_1 (set, elem)
604 re_node_set *set;
605 int elem;
606{
607 set->alloc = 1;
608 set->nelem = 1;
609 set->elems = re_malloc (int, 1);
bc15410e 610 if (BE (set->elems == NULL, 0))
6291ee3c
UD
611 {
612 set->alloc = set->nelem = 0;
613 return REG_ESPACE;
614 }
3b0bdc72
UD
615 set->elems[0] = elem;
616 return REG_NOERROR;
617}
618
619static reg_errcode_t
620re_node_set_init_2 (set, elem1, elem2)
621 re_node_set *set;
622 int elem1, elem2;
623{
624 set->alloc = 2;
625 set->elems = re_malloc (int, 2);
bc15410e 626 if (BE (set->elems == NULL, 0))
3b0bdc72
UD
627 return REG_ESPACE;
628 if (elem1 == elem2)
629 {
630 set->nelem = 1;
631 set->elems[0] = elem1;
632 }
633 else
634 {
635 set->nelem = 2;
636 if (elem1 < elem2)
1b2c2628
UD
637 {
638 set->elems[0] = elem1;
639 set->elems[1] = elem2;
640 }
3b0bdc72 641 else
1b2c2628
UD
642 {
643 set->elems[0] = elem2;
644 set->elems[1] = elem1;
645 }
3b0bdc72
UD
646 }
647 return REG_NOERROR;
648}
649
650static reg_errcode_t
651re_node_set_init_copy (dest, src)
652 re_node_set *dest;
653 const re_node_set *src;
654{
655 dest->nelem = src->nelem;
656 if (src->nelem > 0)
657 {
658 dest->alloc = dest->nelem;
659 dest->elems = re_malloc (int, dest->alloc);
bc15410e 660 if (BE (dest->elems == NULL, 0))
6291ee3c
UD
661 {
662 dest->alloc = dest->nelem = 0;
663 return REG_ESPACE;
664 }
3b0bdc72
UD
665 memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
666 }
667 else
668 re_node_set_init_empty (dest);
669 return REG_NOERROR;
670}
671
a9388965
UD
672/* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
673 DEST. Return value indicate the error code or REG_NOERROR if succeeded.
674 Note: We assume dest->elems is NULL, when dest->alloc is 0. */
675
3b0bdc72
UD
676static reg_errcode_t
677re_node_set_add_intersect (dest, src1, src2)
678 re_node_set *dest;
679 const re_node_set *src1, *src2;
680{
681 int i1, i2, id;
682 if (src1->nelem > 0 && src2->nelem > 0)
683 {
684 if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
1b2c2628
UD
685 {
686 dest->alloc = src1->nelem + src2->nelem + dest->nelem;
687 dest->elems = re_realloc (dest->elems, int, dest->alloc);
688 if (BE (dest->elems == NULL, 0))
689 return REG_ESPACE;
690 }
3b0bdc72
UD
691 }
692 else
693 return REG_NOERROR;
694
695 for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
696 {
697 if (src1->elems[i1] > src2->elems[i2])
1b2c2628
UD
698 {
699 ++i2;
700 continue;
701 }
3b0bdc72 702 if (src1->elems[i1] == src2->elems[i2])
1b2c2628
UD
703 {
704 while (id < dest->nelem && dest->elems[id] < src2->elems[i2])
705 ++id;
706 if (id < dest->nelem && dest->elems[id] == src2->elems[i2])
707 ++id;
708 else
709 {
710 memmove (dest->elems + id + 1, dest->elems + id,
711 sizeof (int) * (dest->nelem - id));
712 dest->elems[id++] = src2->elems[i2++];
713 ++dest->nelem;
714 }
715 }
3b0bdc72
UD
716 ++i1;
717 }
718 return REG_NOERROR;
719}
720
a9388965
UD
721/* Calculate the union set of the sets SRC1 and SRC2. And store it to
722 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
723
3b0bdc72
UD
724static reg_errcode_t
725re_node_set_init_union (dest, src1, src2)
726 re_node_set *dest;
727 const re_node_set *src1, *src2;
728{
729 int i1, i2, id;
730 if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
731 {
732 dest->alloc = src1->nelem + src2->nelem;
733 dest->elems = re_malloc (int, dest->alloc);
bc15410e 734 if (BE (dest->elems == NULL, 0))
1b2c2628 735 return REG_ESPACE;
3b0bdc72
UD
736 }
737 else
738 {
739 if (src1 != NULL && src1->nelem > 0)
1b2c2628 740 return re_node_set_init_copy (dest, src1);
3b0bdc72 741 else if (src2 != NULL && src2->nelem > 0)
1b2c2628 742 return re_node_set_init_copy (dest, src2);
3b0bdc72 743 else
1b2c2628 744 re_node_set_init_empty (dest);
3b0bdc72
UD
745 return REG_NOERROR;
746 }
747 for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
748 {
749 if (src1->elems[i1] > src2->elems[i2])
1b2c2628
UD
750 {
751 dest->elems[id++] = src2->elems[i2++];
752 continue;
753 }
3b0bdc72 754 if (src1->elems[i1] == src2->elems[i2])
1b2c2628 755 ++i2;
3b0bdc72
UD
756 dest->elems[id++] = src1->elems[i1++];
757 }
758 if (i1 < src1->nelem)
759 {
760 memcpy (dest->elems + id, src1->elems + i1,
1b2c2628 761 (src1->nelem - i1) * sizeof (int));
3b0bdc72
UD
762 id += src1->nelem - i1;
763 }
764 else if (i2 < src2->nelem)
765 {
766 memcpy (dest->elems + id, src2->elems + i2,
1b2c2628 767 (src2->nelem - i2) * sizeof (int));
3b0bdc72
UD
768 id += src2->nelem - i2;
769 }
770 dest->nelem = id;
771 return REG_NOERROR;
772}
773
a9388965
UD
774/* Calculate the union set of the sets DEST and SRC. And store it to
775 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
776
3b0bdc72
UD
777static reg_errcode_t
778re_node_set_merge (dest, src)
779 re_node_set *dest;
780 const re_node_set *src;
781{
782 int si, di;
783 if (src == NULL || src->nelem == 0)
784 return REG_NOERROR;
3b0bdc72
UD
785 if (dest->alloc < src->nelem + dest->nelem)
786 {
1b2c2628 787 int *new_buffer;
3b0bdc72 788 dest->alloc = 2 * (src->nelem + dest->alloc);
1b2c2628
UD
789 new_buffer = re_realloc (dest->elems, int, dest->alloc);
790 if (BE (new_buffer == NULL, 0))
791 return REG_ESPACE;
792 dest->elems = new_buffer;
3b0bdc72
UD
793 }
794
795 for (si = 0, di = 0 ; si < src->nelem && di < dest->nelem ;)
796 {
797 int cp_from, ncp, mid, right, src_elem = src->elems[si];
798 /* Binary search the spot we will add the new element. */
799 right = dest->nelem;
800 while (di < right)
1b2c2628
UD
801 {
802 mid = (di + right) / 2;
803 if (dest->elems[mid] < src_elem)
804 di = mid + 1;
805 else
806 right = mid;
807 }
3b0bdc72 808 if (di >= dest->nelem)
1b2c2628 809 break;
3b0bdc72
UD
810
811 if (dest->elems[di] == src_elem)
1b2c2628
UD
812 {
813 /* Skip since, DEST already has the element. */
814 ++di;
815 ++si;
816 continue;
817 }
3b0bdc72
UD
818
819 /* Skip the src elements which are less than dest->elems[di]. */
820 cp_from = si;
821 while (si < src->nelem && src->elems[si] < dest->elems[di])
1b2c2628 822 ++si;
3b0bdc72
UD
823 /* Copy these src elements. */
824 ncp = si - cp_from;
825 memmove (dest->elems + di + ncp, dest->elems + di,
1b2c2628 826 sizeof (int) * (dest->nelem - di));
3b0bdc72 827 memcpy (dest->elems + di, src->elems + cp_from,
1b2c2628 828 sizeof (int) * ncp);
3b0bdc72
UD
829 /* Update counters. */
830 di += ncp;
831 dest->nelem += ncp;
832 }
833
834 /* Copy remaining src elements. */
835 if (si < src->nelem)
836 {
837 memcpy (dest->elems + di, src->elems + si,
1b2c2628 838 sizeof (int) * (src->nelem - si));
3b0bdc72
UD
839 dest->nelem += src->nelem - si;
840 }
841 return REG_NOERROR;
842}
843
844/* Insert the new element ELEM to the re_node_set* SET.
845 return 0 if SET already has ELEM,
846 return -1 if an error is occured, return 1 otherwise. */
847
848static int
849re_node_set_insert (set, elem)
850 re_node_set *set;
851 int elem;
852{
853 int idx, right, mid;
854 /* In case of the set is empty. */
855 if (set->elems == NULL || set->alloc == 0)
856 {
bc15410e 857 if (BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1))
1b2c2628 858 return 1;
3b0bdc72 859 else
1b2c2628 860 return -1;
3b0bdc72
UD
861 }
862
863 /* Binary search the spot we will add the new element. */
864 idx = 0;
865 right = set->nelem;
866 while (idx < right)
867 {
868 mid = (idx + right) / 2;
869 if (set->elems[mid] < elem)
1b2c2628 870 idx = mid + 1;
3b0bdc72 871 else
1b2c2628 872 right = mid;
3b0bdc72
UD
873 }
874
875 /* Realloc if we need. */
876 if (set->alloc < set->nelem + 1)
877 {
878 int *new_array;
879 set->alloc = set->alloc * 2;
880 new_array = re_malloc (int, set->alloc);
bc15410e 881 if (BE (new_array == NULL, 0))
1b2c2628 882 return -1;
3b0bdc72
UD
883 /* Copy the elements they are followed by the new element. */
884 if (idx > 0)
1b2c2628 885 memcpy (new_array, set->elems, sizeof (int) * (idx));
3b0bdc72
UD
886 /* Copy the elements which follows the new element. */
887 if (set->nelem - idx > 0)
1b2c2628 888 memcpy (new_array + idx + 1, set->elems + idx,
3b0bdc72 889 sizeof (int) * (set->nelem - idx));
612546c6 890 re_free (set->elems);
3b0bdc72
UD
891 set->elems = new_array;
892 }
893 else
894 {
895 /* Move the elements which follows the new element. */
896 if (set->nelem - idx > 0)
1b2c2628
UD
897 memmove (set->elems + idx + 1, set->elems + idx,
898 sizeof (int) * (set->nelem - idx));
3b0bdc72
UD
899 }
900 /* Insert the new element. */
901 set->elems[idx] = elem;
902 ++set->nelem;
903 return 1;
904}
905
906/* Compare two node sets SET1 and SET2.
907 return 1 if SET1 and SET2 are equivalent, retrun 0 otherwise. */
908
909static int
910re_node_set_compare (set1, set2)
911 const re_node_set *set1, *set2;
912{
913 int i;
914 if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
915 return 0;
916 for (i = 0 ; i < set1->nelem ; i++)
917 if (set1->elems[i] != set2->elems[i])
918 return 0;
919 return 1;
920}
921
0742e48e 922/* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */
3b0bdc72
UD
923
924static int
925re_node_set_contains (set, elem)
926 const re_node_set *set;
927 int elem;
928{
929 int idx, right, mid;
930 if (set->nelem <= 0)
931 return 0;
932
933 /* Binary search the element. */
934 idx = 0;
935 right = set->nelem - 1;
936 while (idx < right)
937 {
938 mid = (idx + right) / 2;
939 if (set->elems[mid] < elem)
1b2c2628 940 idx = mid + 1;
3b0bdc72 941 else
1b2c2628 942 right = mid;
3b0bdc72 943 }
0742e48e 944 return set->elems[idx] == elem ? idx + 1 : 0;
3b0bdc72
UD
945}
946
947static void
948re_node_set_remove_at (set, idx)
949 re_node_set *set;
950 int idx;
951{
952 if (idx < 0 || idx >= set->nelem)
953 return;
954 if (idx < set->nelem - 1)
955 memmove (set->elems + idx, set->elems + idx + 1,
1b2c2628 956 sizeof (int) * (set->nelem - idx - 1));
3b0bdc72
UD
957 --set->nelem;
958}
959\f
960
961/* Add the token TOKEN to dfa->nodes, and return the index of the token.
962 Or return -1, if an error will be occured. */
963
964static int
965re_dfa_add_node (dfa, token, mode)
966 re_dfa_t *dfa;
967 re_token_t token;
968 int mode;
969{
970 if (dfa->nodes_len >= dfa->nodes_alloc)
971 {
972 re_token_t *new_array;
973 dfa->nodes_alloc *= 2;
974 new_array = re_realloc (dfa->nodes, re_token_t, dfa->nodes_alloc);
bc15410e 975 if (BE (new_array == NULL, 0))
1b2c2628 976 return -1;
3b0bdc72 977 else
1b2c2628 978 dfa->nodes = new_array;
3b0bdc72 979 if (mode)
1b2c2628 980 {
a7d5c291 981 int *new_nexts, *new_indices;
1b2c2628
UD
982 re_node_set *new_edests, *new_eclosures, *new_inveclosures;
983
984 new_nexts = re_realloc (dfa->nexts, int, dfa->nodes_alloc);
a7d5c291 985 new_indices = re_realloc (dfa->org_indices, int, dfa->nodes_alloc);
1b2c2628
UD
986 new_edests = re_realloc (dfa->edests, re_node_set, dfa->nodes_alloc);
987 new_eclosures = re_realloc (dfa->eclosures, re_node_set,
988 dfa->nodes_alloc);
989 new_inveclosures = re_realloc (dfa->inveclosures, re_node_set,
990 dfa->nodes_alloc);
a7d5c291
UD
991 if (BE (new_nexts == NULL || new_indices == NULL
992 || new_edests == NULL || new_eclosures == NULL
993 || new_inveclosures == NULL, 0))
1b2c2628
UD
994 return -1;
995 dfa->nexts = new_nexts;
a7d5c291 996 dfa->org_indices = new_indices;
1b2c2628
UD
997 dfa->edests = new_edests;
998 dfa->eclosures = new_eclosures;
999 dfa->inveclosures = new_inveclosures;
1000 }
3b0bdc72
UD
1001 }
1002 dfa->nodes[dfa->nodes_len] = token;
1003 dfa->nodes[dfa->nodes_len].duplicated = 0;
485d775d 1004 dfa->nodes[dfa->nodes_len].constraint = 0;
3b0bdc72
UD
1005 return dfa->nodes_len++;
1006}
1007
1008static unsigned int inline
1009calc_state_hash (nodes, context)
1010 const re_node_set *nodes;
1011 unsigned int context;
1012{
1013 unsigned int hash = nodes->nelem + context;
1014 int i;
1015 for (i = 0 ; i < nodes->nelem ; i++)
1016 hash += nodes->elems[i];
1017 return hash;
1018}
1019
1020/* Search for the state whose node_set is equivalent to NODES.
1021 Return the pointer to the state, if we found it in the DFA.
a9388965
UD
1022 Otherwise create the new one and return it. In case of an error
1023 return NULL and set the error code in ERR.
1024 Note: - We assume NULL as the invalid state, then it is possible that
1b2c2628
UD
1025 return value is NULL and ERR is REG_NOERROR.
1026 - We never return non-NULL value in case of any errors, it is for
1027 optimization. */
a9388965
UD
1028
1029static re_dfastate_t*
1030re_acquire_state (err, dfa, nodes)
1031 reg_errcode_t *err;
3b0bdc72
UD
1032 re_dfa_t *dfa;
1033 const re_node_set *nodes;
1034{
1035 unsigned int hash;
a9388965 1036 re_dfastate_t *new_state;
3b0bdc72
UD
1037 struct re_state_table_entry *spot;
1038 int i;
bc15410e 1039 if (BE (nodes->nelem == 0, 0))
a9388965
UD
1040 {
1041 *err = REG_NOERROR;
1042 return NULL;
1043 }
3b0bdc72
UD
1044 hash = calc_state_hash (nodes, 0);
1045 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1046
bc15410e 1047 for (i = 0 ; i < spot->num ; i++)
3b0bdc72 1048 {
bc15410e
UD
1049 re_dfastate_t *state = spot->array[i];
1050 if (hash != state->hash)
1b2c2628 1051 continue;
bc15410e 1052 if (re_node_set_compare (&state->nodes, nodes))
1b2c2628 1053 return state;
3b0bdc72 1054 }
3b0bdc72
UD
1055
1056 /* There are no appropriate state in the dfa, create the new one. */
a9388965 1057 new_state = create_ci_newstate (dfa, nodes, hash);
bc15410e 1058 if (BE (new_state != NULL, 1))
a9388965
UD
1059 return new_state;
1060 else
1061 {
1062 *err = REG_ESPACE;
1063 return NULL;
1064 }
3b0bdc72
UD
1065}
1066
1067/* Search for the state whose node_set is equivalent to NODES and
1068 whose context is equivalent to CONTEXT.
1069 Return the pointer to the state, if we found it in the DFA.
a9388965
UD
1070 Otherwise create the new one and return it. In case of an error
1071 return NULL and set the error code in ERR.
1072 Note: - We assume NULL as the invalid state, then it is possible that
1b2c2628
UD
1073 return value is NULL and ERR is REG_NOERROR.
1074 - We never return non-NULL value in case of any errors, it is for
1075 optimization. */
a9388965
UD
1076
1077static re_dfastate_t*
1078re_acquire_state_context (err, dfa, nodes, context)
1079 reg_errcode_t *err;
3b0bdc72
UD
1080 re_dfa_t *dfa;
1081 const re_node_set *nodes;
1082 unsigned int context;
1083{
1084 unsigned int hash;
a9388965 1085 re_dfastate_t *new_state;
3b0bdc72
UD
1086 struct re_state_table_entry *spot;
1087 int i;
1088 if (nodes->nelem == 0)
a9388965
UD
1089 {
1090 *err = REG_NOERROR;
1091 return NULL;
1092 }
3b0bdc72
UD
1093 hash = calc_state_hash (nodes, context);
1094 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1095
bc15410e 1096 for (i = 0 ; i < spot->num ; i++)
3b0bdc72 1097 {
bc15410e
UD
1098 re_dfastate_t *state = spot->array[i];
1099 if (hash != state->hash)
1b2c2628 1100 continue;
bc15410e 1101 if (re_node_set_compare (state->entrance_nodes, nodes)
1b2c2628
UD
1102 && state->context == context)
1103 return state;
3b0bdc72 1104 }
3b0bdc72 1105 /* There are no appropriate state in `dfa', create the new one. */
a9388965 1106 new_state = create_cd_newstate (dfa, nodes, context, hash);
bc15410e 1107 if (BE (new_state != NULL, 1))
a9388965
UD
1108 return new_state;
1109 else
1110 {
1111 *err = REG_ESPACE;
1112 return NULL;
1113 }
3b0bdc72
UD
1114}
1115
a9388965
UD
1116/* Allocate memory for DFA state and initialize common properties.
1117 Return the new state if succeeded, otherwise return NULL. */
1118
3b0bdc72
UD
1119static re_dfastate_t *
1120create_newstate_common (dfa, nodes, hash)
1121 re_dfa_t *dfa;
1122 const re_node_set *nodes;
1123 unsigned int hash;
1124{
1125 re_dfastate_t *newstate;
1b2c2628 1126 reg_errcode_t err;
3b0bdc72 1127 newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
bc15410e 1128 if (BE (newstate == NULL, 0))
a9388965 1129 return NULL;
1b2c2628
UD
1130 err = re_node_set_init_copy (&newstate->nodes, nodes);
1131 if (BE (err != REG_NOERROR, 0))
1132 {
1133 re_free (newstate);
1134 return NULL;
1135 }
3b0bdc72
UD
1136 newstate->trtable = NULL;
1137 newstate->trtable_search = NULL;
1138 newstate->hash = hash;
1139 return newstate;
1140}
1141
a9388965
UD
1142/* Store the new state NEWSTATE whose hash value is HASH in appropriate
1143 position. Return value indicate the error code if failed. */
1144
1145static reg_errcode_t
3b0bdc72
UD
1146register_state (dfa, newstate, hash)
1147 re_dfa_t *dfa;
1148 re_dfastate_t *newstate;
1149 unsigned int hash;
1150{
1151 struct re_state_table_entry *spot;
1152 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1153
1154 if (spot->alloc <= spot->num)
1155 {
1b2c2628 1156 re_dfastate_t **new_array;
bc15410e 1157 spot->alloc = 2 * spot->num + 2;
1b2c2628
UD
1158 new_array = re_realloc (spot->array, re_dfastate_t *, spot->alloc);
1159 if (BE (new_array == NULL, 0))
1160 return REG_ESPACE;
1161 spot->array = new_array;
3b0bdc72 1162 }
bc15410e 1163 spot->array[spot->num++] = newstate;
a9388965 1164 return REG_NOERROR;
3b0bdc72
UD
1165}
1166
a9388965
UD
1167/* Create the new state which is independ of contexts.
1168 Return the new state if succeeded, otherwise return NULL. */
1169
3b0bdc72
UD
1170static re_dfastate_t *
1171create_ci_newstate (dfa, nodes, hash)
1172 re_dfa_t *dfa;
1173 const re_node_set *nodes;
1174 unsigned int hash;
1175{
1176 int i;
a9388965 1177 reg_errcode_t err;
3b0bdc72
UD
1178 re_dfastate_t *newstate;
1179 newstate = create_newstate_common (dfa, nodes, hash);
bc15410e 1180 if (BE (newstate == NULL, 0))
a9388965 1181 return NULL;
3b0bdc72
UD
1182 newstate->entrance_nodes = &newstate->nodes;
1183
1184 for (i = 0 ; i < nodes->nelem ; i++)
1185 {
1186 re_token_t *node = dfa->nodes + nodes->elems[i];
1187 re_token_type_t type = node->type;
485d775d 1188 if (type == CHARACTER && !node->constraint)
1b2c2628 1189 continue;
3b0bdc72
UD
1190
1191 /* If the state has the halt node, the state is a halt state. */
1192 else if (type == END_OF_RE)
1b2c2628 1193 newstate->halt = 1;
c0a0f9a3 1194#ifdef RE_ENABLE_I18N
3b0bdc72 1195 else if (type == COMPLEX_BRACKET
3c0fb574 1196 || (type == OP_PERIOD && dfa->mb_cur_max > 1))
1b2c2628 1197 newstate->accept_mb = 1;
c0a0f9a3 1198#endif /* RE_ENABLE_I18N */
3b0bdc72 1199 else if (type == OP_BACK_REF)
1b2c2628 1200 newstate->has_backref = 1;
485d775d 1201 else if (type == ANCHOR || node->constraint)
1b2c2628 1202 newstate->has_constraint = 1;
3b0bdc72 1203 }
a9388965 1204 err = register_state (dfa, newstate, hash);
1b2c2628
UD
1205 if (BE (err != REG_NOERROR, 0))
1206 {
1207 free_state (newstate);
1208 newstate = NULL;
1209 }
1210 return newstate;
3b0bdc72
UD
1211}
1212
a9388965
UD
1213/* Create the new state which is depend on the context CONTEXT.
1214 Return the new state if succeeded, otherwise return NULL. */
1215
3b0bdc72
UD
1216static re_dfastate_t *
1217create_cd_newstate (dfa, nodes, context, hash)
1218 re_dfa_t *dfa;
1219 const re_node_set *nodes;
1220 unsigned int context, hash;
1221{
1222 int i, nctx_nodes = 0;
a9388965 1223 reg_errcode_t err;
3b0bdc72
UD
1224 re_dfastate_t *newstate;
1225
1226 newstate = create_newstate_common (dfa, nodes, hash);
bc15410e 1227 if (BE (newstate == NULL, 0))
a9388965 1228 return NULL;
3b0bdc72
UD
1229 newstate->context = context;
1230 newstate->entrance_nodes = &newstate->nodes;
1231
1232 for (i = 0 ; i < nodes->nelem ; i++)
1233 {
1234 unsigned int constraint = 0;
1235 re_token_t *node = dfa->nodes + nodes->elems[i];
1236 re_token_type_t type = node->type;
485d775d 1237 if (node->constraint)
1b2c2628 1238 constraint = node->constraint;
3b0bdc72 1239
485d775d 1240 if (type == CHARACTER && !constraint)
1b2c2628 1241 continue;
3b0bdc72
UD
1242 /* If the state has the halt node, the state is a halt state. */
1243 else if (type == END_OF_RE)
1b2c2628 1244 newstate->halt = 1;
c0a0f9a3 1245#ifdef RE_ENABLE_I18N
3b0bdc72 1246 else if (type == COMPLEX_BRACKET
3c0fb574 1247 || (type == OP_PERIOD && dfa->mb_cur_max > 1))
1b2c2628 1248 newstate->accept_mb = 1;
c0a0f9a3 1249#endif /* RE_ENABLE_I18N */
3b0bdc72 1250 else if (type == OP_BACK_REF)
1b2c2628 1251 newstate->has_backref = 1;
3b0bdc72 1252 else if (type == ANCHOR)
1b2c2628 1253 constraint = node->opr.ctx_type;
3b0bdc72
UD
1254
1255 if (constraint)
1b2c2628
UD
1256 {
1257 if (newstate->entrance_nodes == &newstate->nodes)
1258 {
1259 newstate->entrance_nodes = re_malloc (re_node_set, 1);
bc15410e 1260 if (BE (newstate->entrance_nodes == NULL, 0))
1b2c2628
UD
1261 {
1262 free_state (newstate);
1263 return NULL;
1264 }
1265 re_node_set_init_copy (newstate->entrance_nodes, nodes);
1266 nctx_nodes = 0;
1267 newstate->has_constraint = 1;
1268 }
1269
1270 if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
1271 {
1272 re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
1273 ++nctx_nodes;
1274 }
1275 }
3b0bdc72 1276 }
a9388965 1277 err = register_state (dfa, newstate, hash);
1b2c2628
UD
1278 if (BE (err != REG_NOERROR, 0))
1279 {
1280 free_state (newstate);
1281 newstate = NULL;
1282 }
1283 return newstate;
1284}
1285
1286static void
1287free_state (state)
1288 re_dfastate_t *state;
1289{
1290 if (state->entrance_nodes != &state->nodes)
1291 {
1292 re_node_set_free (state->entrance_nodes);
1293 re_free (state->entrance_nodes);
1294 }
1295 re_node_set_free (&state->nodes);
1296 re_free (state->trtable);
1297 re_free (state->trtable_search);
1298 re_free (state);
3b0bdc72 1299}