]> git.ipfire.org Git - thirdparty/glibc.git/blame - posix/regex_internal.c
* libio/libioP.h [_LIBC && !SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_1)]
[thirdparty/glibc.git] / posix / regex_internal.c
CommitLineData
3b0bdc72
UD
1/* Extended regular expression matching and search library.
2 Copyright (C) 2002 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA. */
20
21#include <assert.h>
22#include <ctype.h>
23#include <limits.h>
24#include <stdio.h>
25#include <stdlib.h>
26#include <string.h>
c202c2c5
UD
27
28#if defined HAVE_WCHAR_H || defined _LIBC
29# include <wchar.h>
30#endif /* HAVE_WCHAR_H || _LIBC */
31#if defined HAVE_WCTYPE_H || defined _LIBC
32# include <wctype.h>
33#endif /* HAVE_WCTYPE_H || _LIBC */
3b0bdc72
UD
34
35#ifdef _LIBC
36# ifndef _RE_DEFINE_LOCALE_FUNCTIONS
37# define _RE_DEFINE_LOCALE_FUNCTIONS 1
38# include <locale/localeinfo.h>
39# include <locale/elem-hash.h>
40# include <locale/coll-lookup.h>
41# endif
42#endif
43
44/* This is for other GNU distributions with internationalized messages. */
45#if HAVE_LIBINTL_H || defined _LIBC
46# include <libintl.h>
47# ifdef _LIBC
48# undef gettext
71319b9c
UD
49# define gettext(msgid) \
50 INTUSE(__dcgettext) (_libc_intl_domainname_internal, msgid, LC_MESSAGES)
3b0bdc72
UD
51# endif
52#else
53# define gettext(msgid) (msgid)
54#endif
55
56#ifndef gettext_noop
57/* This define is so xgettext can find the internationalizable
58 strings. */
59# define gettext_noop(String) String
60#endif
61
62#include "regex.h"
63#include "regex_internal.h"
64
92b27c74 65static void re_string_construct_common (const char *str, int len,
1b2c2628
UD
66 re_string_t *pstr,
67 RE_TRANSLATE_TYPE trans, int icase);
434d3784 68#ifdef RE_ENABLE_I18N
612546c6 69static int re_string_skip_chars (re_string_t *pstr, int new_raw_idx);
434d3784 70#endif /* RE_ENABLE_I18N */
3b0bdc72 71static re_dfastate_t *create_newstate_common (re_dfa_t *dfa,
1b2c2628
UD
72 const re_node_set *nodes,
73 unsigned int hash);
a9388965 74static reg_errcode_t register_state (re_dfa_t *dfa, re_dfastate_t *newstate,
1b2c2628 75 unsigned int hash);
3b0bdc72 76static re_dfastate_t *create_ci_newstate (re_dfa_t *dfa,
1b2c2628
UD
77 const re_node_set *nodes,
78 unsigned int hash);
3b0bdc72 79static re_dfastate_t *create_cd_newstate (re_dfa_t *dfa,
1b2c2628
UD
80 const re_node_set *nodes,
81 unsigned int context,
82 unsigned int hash);
3b0bdc72 83static unsigned int inline calc_state_hash (const re_node_set *nodes,
1b2c2628 84 unsigned int context);
3b0bdc72
UD
85\f
86/* Functions for string operation. */
87
612546c6
UD
88/* This function allocate the buffers. It is necessary to call
89 re_string_reconstruct before using the object. */
90
3b0bdc72 91static reg_errcode_t
612546c6 92re_string_allocate (pstr, str, len, init_len, trans, icase)
3b0bdc72 93 re_string_t *pstr;
92b27c74 94 const char *str;
612546c6 95 int len, init_len, icase;
3b0bdc72
UD
96 RE_TRANSLATE_TYPE trans;
97{
98 reg_errcode_t ret;
612546c6
UD
99 int init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
100 re_string_construct_common (str, len, pstr, trans, icase);
ac3d553b 101 pstr->stop = pstr->len;
612546c6
UD
102
103 ret = re_string_realloc_buffers (pstr, init_buf_len);
104 if (BE (ret != REG_NOERROR, 0))
105 return ret;
106
c202c2c5 107 pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case
1b2c2628 108 : (unsigned char *) str);
612546c6
UD
109 pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case;
110 pstr->valid_len = (MBS_CASE_ALLOCATED (pstr) || MBS_ALLOCATED (pstr)
1b2c2628 111 || MB_CUR_MAX > 1) ? pstr->valid_len : len;
612546c6
UD
112 return REG_NOERROR;
113}
114
115/* This function allocate the buffers, and initialize them. */
116
117static reg_errcode_t
118re_string_construct (pstr, str, len, trans, icase)
119 re_string_t *pstr;
92b27c74 120 const char *str;
612546c6
UD
121 int len, icase;
122 RE_TRANSLATE_TYPE trans;
123{
124 reg_errcode_t ret;
125 re_string_construct_common (str, len, pstr, trans, icase);
ac3d553b 126 pstr->stop = pstr->len;
612546c6
UD
127 /* Set 0 so that this function can initialize whole buffers. */
128 pstr->valid_len = 0;
129
130 if (len > 0)
3b0bdc72 131 {
612546c6 132 ret = re_string_realloc_buffers (pstr, len + 1);
bc15410e 133 if (BE (ret != REG_NOERROR, 0))
1b2c2628 134 return ret;
3b0bdc72 135 }
c202c2c5 136 pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case
1b2c2628 137 : (unsigned char *) str);
612546c6
UD
138 pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case;
139
140 if (icase)
141 {
142#ifdef RE_ENABLE_I18N
143 if (MB_CUR_MAX > 1)
1b2c2628 144 build_wcs_upper_buffer (pstr);
612546c6 145 else
3b0bdc72 146#endif /* RE_ENABLE_I18N */
1b2c2628 147 build_upper_buffer (pstr);
612546c6
UD
148 }
149 else
3b0bdc72 150 {
612546c6
UD
151#ifdef RE_ENABLE_I18N
152 if (MB_CUR_MAX > 1)
1b2c2628 153 build_wcs_buffer (pstr);
612546c6
UD
154 else
155#endif /* RE_ENABLE_I18N */
1b2c2628
UD
156 {
157 if (trans != NULL)
158 re_string_translate_buffer (pstr);
159 else
160 pstr->valid_len = len;
161 }
3b0bdc72 162 }
612546c6
UD
163
164 /* Initialized whole buffers, then valid_len == bufs_len. */
165 pstr->valid_len = pstr->bufs_len;
3b0bdc72
UD
166 return REG_NOERROR;
167}
168
612546c6 169/* Helper functions for re_string_allocate, and re_string_construct. */
3b0bdc72
UD
170
171static reg_errcode_t
612546c6 172re_string_realloc_buffers (pstr, new_buf_len)
3b0bdc72 173 re_string_t *pstr;
612546c6 174 int new_buf_len;
3b0bdc72 175{
3b0bdc72 176#ifdef RE_ENABLE_I18N
612546c6 177 if (MB_CUR_MAX > 1)
3b0bdc72 178 {
1b2c2628
UD
179 wint_t *new_array = re_realloc (pstr->wcs, wint_t, new_buf_len);
180 if (BE (new_array == NULL, 0))
181 return REG_ESPACE;
182 pstr->wcs = new_array;
3b0bdc72 183 }
3b0bdc72 184#endif /* RE_ENABLE_I18N */
612546c6 185 if (MBS_ALLOCATED (pstr))
3b0bdc72 186 {
1b2c2628
UD
187 unsigned char *new_array = re_realloc (pstr->mbs, unsigned char,
188 new_buf_len);
189 if (BE (new_array == NULL, 0))
190 return REG_ESPACE;
191 pstr->mbs = new_array;
3b0bdc72 192 }
612546c6 193 if (MBS_CASE_ALLOCATED (pstr))
3b0bdc72 194 {
1b2c2628
UD
195 unsigned char *new_array = re_realloc (pstr->mbs_case, unsigned char,
196 new_buf_len);
197 if (BE (new_array == NULL, 0))
198 return REG_ESPACE;
199 pstr->mbs_case = new_array;
612546c6 200 if (!MBS_ALLOCATED (pstr))
1b2c2628 201 pstr->mbs = pstr->mbs_case;
3b0bdc72 202 }
612546c6 203 pstr->bufs_len = new_buf_len;
3b0bdc72
UD
204 return REG_NOERROR;
205}
206
612546c6 207
3b0bdc72 208static void
612546c6 209re_string_construct_common (str, len, pstr, trans, icase)
92b27c74 210 const char *str;
3b0bdc72
UD
211 int len;
212 re_string_t *pstr;
612546c6
UD
213 RE_TRANSLATE_TYPE trans;
214 int icase;
3b0bdc72 215{
612546c6 216 memset (pstr, '\0', sizeof (re_string_t));
c202c2c5 217 pstr->raw_mbs = (const unsigned char *) str;
3b0bdc72 218 pstr->len = len;
612546c6
UD
219 pstr->trans = trans;
220 pstr->icase = icase ? 1 : 0;
3b0bdc72
UD
221}
222
223#ifdef RE_ENABLE_I18N
224
612546c6 225/* Build wide character buffer PSTR->WCS.
3b0bdc72
UD
226 If the byte sequence of the string are:
227 <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
228 Then wide character buffer will be:
229 <wc1> , WEOF , <wc2> , WEOF , <wc3>
230 We use WEOF for padding, they indicate that the position isn't
612546c6 231 a first byte of a multibyte character.
3b0bdc72 232
612546c6
UD
233 Note that this function assumes PSTR->VALID_LEN elements are already
234 built and starts from PSTR->VALID_LEN. */
235
236static void
3b0bdc72
UD
237build_wcs_buffer (pstr)
238 re_string_t *pstr;
239{
612546c6
UD
240 mbstate_t prev_st;
241 int byte_idx, end_idx, mbclen, remain_len;
242 /* Build the buffers from pstr->valid_len to either pstr->len or
243 pstr->bufs_len. */
244 end_idx = (pstr->bufs_len > pstr->len)? pstr->len : pstr->bufs_len;
245 for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
246 {
247 wchar_t wc;
248 remain_len = end_idx - byte_idx;
249 prev_st = pstr->cur_state;
c202c2c5 250 mbclen = mbrtowc (&wc, ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
1b2c2628 251 + byte_idx), remain_len, &pstr->cur_state);
612546c6 252 if (BE (mbclen == (size_t) -2, 0))
1b2c2628
UD
253 {
254 /* The buffer doesn't have enough space, finish to build. */
255 pstr->cur_state = prev_st;
256 break;
257 }
612546c6 258 else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))
1b2c2628
UD
259 {
260 /* We treat these cases as a singlebyte character. */
261 mbclen = 1;
262 wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
263 pstr->cur_state = prev_st;
264 }
612546c6
UD
265
266 /* Apply the translateion if we need. */
267 if (pstr->trans != NULL && mbclen == 1)
1b2c2628
UD
268 {
269 int ch = pstr->trans[pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]];
270 pstr->mbs_case[byte_idx] = ch;
271 }
3b0bdc72 272 /* Write wide character and padding. */
612546c6
UD
273 pstr->wcs[byte_idx++] = wc;
274 /* Write paddings. */
275 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
1b2c2628 276 pstr->wcs[byte_idx++] = WEOF;
3b0bdc72 277 }
612546c6 278 pstr->valid_len = byte_idx;
3b0bdc72
UD
279}
280
612546c6
UD
281/* Build wide character buffer PSTR->WCS like build_wcs_buffer,
282 but for REG_ICASE. */
283
284static void
3b0bdc72
UD
285build_wcs_upper_buffer (pstr)
286 re_string_t *pstr;
287{
612546c6
UD
288 mbstate_t prev_st;
289 int byte_idx, end_idx, mbclen, remain_len;
290 /* Build the buffers from pstr->valid_len to either pstr->len or
291 pstr->bufs_len. */
292 end_idx = (pstr->bufs_len > pstr->len)? pstr->len : pstr->bufs_len;
293 for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
294 {
295 wchar_t wc;
296 remain_len = end_idx - byte_idx;
297 prev_st = pstr->cur_state;
c202c2c5 298 mbclen = mbrtowc (&wc, ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
1b2c2628 299 + byte_idx), remain_len, &pstr->cur_state);
612546c6 300 if (BE (mbclen == (size_t) -2, 0))
1b2c2628
UD
301 {
302 /* The buffer doesn't have enough space, finish to build. */
303 pstr->cur_state = prev_st;
304 break;
305 }
612546c6 306 else if (mbclen == 1 || mbclen == (size_t) -1 || mbclen == 0)
1b2c2628
UD
307 {
308 /* In case of a singlebyte character. */
309 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
310 /* Apply the translateion if we need. */
311 if (pstr->trans != NULL && mbclen == 1)
312 {
313 ch = pstr->trans[ch];
314 pstr->mbs_case[byte_idx] = ch;
315 }
316 pstr->wcs[byte_idx] = iswlower (wc) ? toupper (wc) : wc;
317 pstr->mbs[byte_idx++] = islower (ch) ? toupper (ch) : ch;
318 if (BE (mbclen == (size_t) -1, 0))
319 pstr->cur_state = prev_st;
320 }
3b0bdc72 321 else /* mbclen > 1 */
1b2c2628
UD
322 {
323 if (iswlower (wc))
324 wcrtomb ((char *) pstr->mbs + byte_idx, towupper (wc), &prev_st);
325 else
326 memcpy (pstr->mbs + byte_idx,
327 pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
328 pstr->wcs[byte_idx++] = iswlower (wc) ? toupper (wc) : wc;
329 /* Write paddings. */
330 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
331 pstr->wcs[byte_idx++] = WEOF;
332 }
3b0bdc72 333 }
612546c6
UD
334 pstr->valid_len = byte_idx;
335}
336
337/* Skip characters until the index becomes greater than NEW_RAW_IDX.
338 Return the index. */
339
340static int
341re_string_skip_chars (pstr, new_raw_idx)
342 re_string_t *pstr;
343 int new_raw_idx;
344{
345 mbstate_t prev_st;
346 int rawbuf_idx, mbclen;
347
348 /* Skip the characters which are not necessary to check. */
349 for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_len;
350 rawbuf_idx < new_raw_idx;)
351 {
352 int remain_len = pstr->len - rawbuf_idx;
353 prev_st = pstr->cur_state;
c202c2c5 354 mbclen = mbrlen ((const char *) pstr->raw_mbs + rawbuf_idx, remain_len,
1b2c2628 355 &pstr->cur_state);
612546c6 356 if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
1b2c2628
UD
357 {
358 /* We treat these cases as a singlebyte character. */
359 mbclen = 1;
360 pstr->cur_state = prev_st;
361 }
612546c6
UD
362 /* Then proceed the next character. */
363 rawbuf_idx += mbclen;
364 }
365 return rawbuf_idx;
3b0bdc72
UD
366}
367#endif /* RE_ENABLE_I18N */
368
1b2c2628 369/* Build the buffer PSTR->MBS, and apply the translation if we need.
612546c6
UD
370 This function is used in case of REG_ICASE. */
371
372static void
3b0bdc72
UD
373build_upper_buffer (pstr)
374 re_string_t *pstr;
375{
612546c6
UD
376 int char_idx, end_idx;
377 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
3b0bdc72 378
612546c6 379 for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
3b0bdc72 380 {
612546c6
UD
381 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
382 if (pstr->trans != NULL)
1b2c2628
UD
383 {
384 ch = pstr->trans[ch];
385 pstr->mbs_case[char_idx] = ch;
386 }
612546c6 387 if (islower (ch))
1b2c2628 388 pstr->mbs[char_idx] = toupper (ch);
3b0bdc72 389 else
1b2c2628 390 pstr->mbs[char_idx] = ch;
3b0bdc72 391 }
612546c6 392 pstr->valid_len = char_idx;
3b0bdc72
UD
393}
394
612546c6 395/* Apply TRANS to the buffer in PSTR. */
3b0bdc72 396
612546c6
UD
397static void
398re_string_translate_buffer (pstr)
3b0bdc72 399 re_string_t *pstr;
3b0bdc72 400{
612546c6
UD
401 int buf_idx, end_idx;
402 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
403
404 for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
3b0bdc72 405 {
612546c6
UD
406 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
407 pstr->mbs_case[buf_idx] = pstr->trans[ch];
3b0bdc72 408 }
612546c6
UD
409
410 pstr->valid_len = buf_idx;
411}
412
413/* This function re-construct the buffers.
414 Concretely, convert to wide character in case of MB_CUR_MAX > 1,
415 convert to upper case in case of REG_ICASE, apply translation. */
416
417static reg_errcode_t
418re_string_reconstruct (pstr, idx, eflags, newline)
419 re_string_t *pstr;
420 int idx, eflags, newline;
421{
422 int offset = idx - pstr->raw_mbs_idx;
423 if (offset < 0)
424 {
425 /* Reset buffer. */
434d3784
UD
426#ifdef RE_ENABLE_I18N
427 if (MB_CUR_MAX > 1)
1b2c2628 428 memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
434d3784 429#endif /* RE_ENABLE_I18N */
dd385d7c
UD
430 pstr->len += pstr->raw_mbs_idx;
431 pstr->stop += pstr->raw_mbs_idx;
612546c6
UD
432 pstr->valid_len = pstr->raw_mbs_idx = 0;
433 pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
1b2c2628 434 : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
612546c6 435 if (!MBS_CASE_ALLOCATED (pstr))
1b2c2628 436 pstr->mbs_case = (unsigned char *) pstr->raw_mbs;
612546c6 437 if (!MBS_ALLOCATED (pstr) && !MBS_CASE_ALLOCATED (pstr))
1b2c2628 438 pstr->mbs = (unsigned char *) pstr->raw_mbs;
612546c6
UD
439 offset = idx;
440 }
441
442 if (offset != 0)
443 {
444 pstr->tip_context = re_string_context_at (pstr, offset - 1, eflags,
1b2c2628 445 newline);
612546c6
UD
446 /* Are the characters which are already checked remain? */
447 if (offset < pstr->valid_len)
1b2c2628
UD
448 {
449 /* Yes, move them to the front of the buffer. */
3b0bdc72 450#ifdef RE_ENABLE_I18N
1b2c2628
UD
451 if (MB_CUR_MAX > 1)
452 memmove (pstr->wcs, pstr->wcs + offset,
453 (pstr->valid_len - offset) * sizeof (wint_t));
612546c6 454#endif /* RE_ENABLE_I18N */
1b2c2628
UD
455 if (MBS_ALLOCATED (pstr))
456 memmove (pstr->mbs, pstr->mbs + offset,
457 pstr->valid_len - offset);
458 if (MBS_CASE_ALLOCATED (pstr))
459 memmove (pstr->mbs_case, pstr->mbs_case + offset,
460 pstr->valid_len - offset);
461 pstr->valid_len -= offset;
612546c6 462#if DEBUG
1b2c2628 463 assert (pstr->valid_len > 0);
3b0bdc72 464#endif
1b2c2628 465 }
612546c6 466 else
1b2c2628
UD
467 {
468 /* No, skip all characters until IDX. */
469 pstr->valid_len = 0;
3b0bdc72 470#ifdef RE_ENABLE_I18N
1b2c2628
UD
471 if (MB_CUR_MAX > 1)
472 {
473 int wcs_idx;
474 pstr->valid_len = re_string_skip_chars (pstr, idx) - idx;
475 for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
476 pstr->wcs[wcs_idx] = WEOF;
477 }
612546c6 478#endif /* RE_ENABLE_I18N */
1b2c2628 479 }
612546c6 480 if (!MBS_CASE_ALLOCATED (pstr))
1b2c2628
UD
481 {
482 pstr->mbs_case += offset;
483 /* In case of !MBS_ALLOCATED && !MBS_CASE_ALLOCATED. */
484 if (!MBS_ALLOCATED (pstr))
485 pstr->mbs += offset;
486 }
3b0bdc72 487 }
612546c6
UD
488 pstr->raw_mbs_idx = idx;
489 pstr->len -= offset;
ac3d553b 490 pstr->stop -= offset;
612546c6
UD
491
492 /* Then build the buffers. */
493#ifdef RE_ENABLE_I18N
494 if (MB_CUR_MAX > 1)
3b0bdc72 495 {
612546c6 496 if (pstr->icase)
1b2c2628 497 build_wcs_upper_buffer (pstr);
612546c6 498 else
1b2c2628 499 build_wcs_buffer (pstr);
3b0bdc72
UD
500 }
501 else
612546c6 502#endif /* RE_ENABLE_I18N */
3b0bdc72 503 {
612546c6 504 if (pstr->icase)
1b2c2628 505 build_upper_buffer (pstr);
612546c6 506 else if (pstr->trans != NULL)
1b2c2628 507 re_string_translate_buffer (pstr);
3b0bdc72 508 }
612546c6
UD
509 pstr->cur_idx = 0;
510
3b0bdc72
UD
511 return REG_NOERROR;
512}
513
514static void
515re_string_destruct (pstr)
516 re_string_t *pstr;
517{
518#ifdef RE_ENABLE_I18N
519 re_free (pstr->wcs);
520#endif /* RE_ENABLE_I18N */
612546c6
UD
521 if (MBS_ALLOCATED (pstr))
522 re_free (pstr->mbs);
523 if (MBS_CASE_ALLOCATED (pstr))
524 re_free (pstr->mbs_case);
3b0bdc72
UD
525}
526
527/* Return the context at IDX in INPUT. */
612546c6 528
3b0bdc72
UD
529static unsigned int
530re_string_context_at (input, idx, eflags, newline_anchor)
531 const re_string_t *input;
532 int idx, eflags, newline_anchor;
533{
534 int c;
535 if (idx < 0 || idx == input->len)
536 {
3b0bdc72 537 if (idx < 0)
1b2c2628
UD
538 /* In this case, we use the value stored in input->tip_context,
539 since we can't know the character in input->mbs[-1] here. */
540 return input->tip_context;
bc15410e 541 else /* (idx == input->len) */
1b2c2628
UD
542 return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
543 : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
3b0bdc72
UD
544 }
545 c = re_string_byte_at (input, idx);
546 if (IS_WORD_CHAR (c))
547 return CONTEXT_WORD;
548 return (newline_anchor && IS_NEWLINE (c)) ? CONTEXT_NEWLINE : 0;
549}
550\f
551/* Functions for set operation. */
552
553static reg_errcode_t
554re_node_set_alloc (set, size)
555 re_node_set *set;
556 int size;
557{
558 set->alloc = size;
559 set->nelem = 0;
560 set->elems = re_malloc (int, size);
bc15410e 561 if (BE (set->elems == NULL, 0))
3b0bdc72
UD
562 return REG_ESPACE;
563 return REG_NOERROR;
564}
565
566static reg_errcode_t
567re_node_set_init_1 (set, elem)
568 re_node_set *set;
569 int elem;
570{
571 set->alloc = 1;
572 set->nelem = 1;
573 set->elems = re_malloc (int, 1);
bc15410e 574 if (BE (set->elems == NULL, 0))
3b0bdc72
UD
575 return REG_ESPACE;
576 set->elems[0] = elem;
577 return REG_NOERROR;
578}
579
580static reg_errcode_t
581re_node_set_init_2 (set, elem1, elem2)
582 re_node_set *set;
583 int elem1, elem2;
584{
585 set->alloc = 2;
586 set->elems = re_malloc (int, 2);
bc15410e 587 if (BE (set->elems == NULL, 0))
3b0bdc72
UD
588 return REG_ESPACE;
589 if (elem1 == elem2)
590 {
591 set->nelem = 1;
592 set->elems[0] = elem1;
593 }
594 else
595 {
596 set->nelem = 2;
597 if (elem1 < elem2)
1b2c2628
UD
598 {
599 set->elems[0] = elem1;
600 set->elems[1] = elem2;
601 }
3b0bdc72 602 else
1b2c2628
UD
603 {
604 set->elems[0] = elem2;
605 set->elems[1] = elem1;
606 }
3b0bdc72
UD
607 }
608 return REG_NOERROR;
609}
610
611static reg_errcode_t
612re_node_set_init_copy (dest, src)
613 re_node_set *dest;
614 const re_node_set *src;
615{
616 dest->nelem = src->nelem;
617 if (src->nelem > 0)
618 {
619 dest->alloc = dest->nelem;
620 dest->elems = re_malloc (int, dest->alloc);
bc15410e 621 if (BE (dest->elems == NULL, 0))
1b2c2628 622 return REG_ESPACE;
3b0bdc72
UD
623 memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
624 }
625 else
626 re_node_set_init_empty (dest);
627 return REG_NOERROR;
628}
629
a9388965
UD
630/* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
631 DEST. Return value indicate the error code or REG_NOERROR if succeeded.
632 Note: We assume dest->elems is NULL, when dest->alloc is 0. */
633
3b0bdc72
UD
634static reg_errcode_t
635re_node_set_add_intersect (dest, src1, src2)
636 re_node_set *dest;
637 const re_node_set *src1, *src2;
638{
639 int i1, i2, id;
640 if (src1->nelem > 0 && src2->nelem > 0)
641 {
642 if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
1b2c2628
UD
643 {
644 dest->alloc = src1->nelem + src2->nelem + dest->nelem;
645 dest->elems = re_realloc (dest->elems, int, dest->alloc);
646 if (BE (dest->elems == NULL, 0))
647 return REG_ESPACE;
648 }
3b0bdc72
UD
649 }
650 else
651 return REG_NOERROR;
652
653 for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
654 {
655 if (src1->elems[i1] > src2->elems[i2])
1b2c2628
UD
656 {
657 ++i2;
658 continue;
659 }
3b0bdc72 660 if (src1->elems[i1] == src2->elems[i2])
1b2c2628
UD
661 {
662 while (id < dest->nelem && dest->elems[id] < src2->elems[i2])
663 ++id;
664 if (id < dest->nelem && dest->elems[id] == src2->elems[i2])
665 ++id;
666 else
667 {
668 memmove (dest->elems + id + 1, dest->elems + id,
669 sizeof (int) * (dest->nelem - id));
670 dest->elems[id++] = src2->elems[i2++];
671 ++dest->nelem;
672 }
673 }
3b0bdc72
UD
674 ++i1;
675 }
676 return REG_NOERROR;
677}
678
a9388965
UD
679/* Calculate the union set of the sets SRC1 and SRC2. And store it to
680 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
681
3b0bdc72
UD
682static reg_errcode_t
683re_node_set_init_union (dest, src1, src2)
684 re_node_set *dest;
685 const re_node_set *src1, *src2;
686{
687 int i1, i2, id;
688 if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
689 {
690 dest->alloc = src1->nelem + src2->nelem;
691 dest->elems = re_malloc (int, dest->alloc);
bc15410e 692 if (BE (dest->elems == NULL, 0))
1b2c2628 693 return REG_ESPACE;
3b0bdc72
UD
694 }
695 else
696 {
697 if (src1 != NULL && src1->nelem > 0)
1b2c2628 698 return re_node_set_init_copy (dest, src1);
3b0bdc72 699 else if (src2 != NULL && src2->nelem > 0)
1b2c2628 700 return re_node_set_init_copy (dest, src2);
3b0bdc72 701 else
1b2c2628 702 re_node_set_init_empty (dest);
3b0bdc72
UD
703 return REG_NOERROR;
704 }
705 for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
706 {
707 if (src1->elems[i1] > src2->elems[i2])
1b2c2628
UD
708 {
709 dest->elems[id++] = src2->elems[i2++];
710 continue;
711 }
3b0bdc72 712 if (src1->elems[i1] == src2->elems[i2])
1b2c2628 713 ++i2;
3b0bdc72
UD
714 dest->elems[id++] = src1->elems[i1++];
715 }
716 if (i1 < src1->nelem)
717 {
718 memcpy (dest->elems + id, src1->elems + i1,
1b2c2628 719 (src1->nelem - i1) * sizeof (int));
3b0bdc72
UD
720 id += src1->nelem - i1;
721 }
722 else if (i2 < src2->nelem)
723 {
724 memcpy (dest->elems + id, src2->elems + i2,
1b2c2628 725 (src2->nelem - i2) * sizeof (int));
3b0bdc72
UD
726 id += src2->nelem - i2;
727 }
728 dest->nelem = id;
729 return REG_NOERROR;
730}
731
a9388965
UD
732/* Calculate the union set of the sets DEST and SRC. And store it to
733 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
734
3b0bdc72
UD
735static reg_errcode_t
736re_node_set_merge (dest, src)
737 re_node_set *dest;
738 const re_node_set *src;
739{
740 int si, di;
741 if (src == NULL || src->nelem == 0)
742 return REG_NOERROR;
3b0bdc72
UD
743 if (dest->alloc < src->nelem + dest->nelem)
744 {
1b2c2628 745 int *new_buffer;
3b0bdc72 746 dest->alloc = 2 * (src->nelem + dest->alloc);
1b2c2628
UD
747 new_buffer = re_realloc (dest->elems, int, dest->alloc);
748 if (BE (new_buffer == NULL, 0))
749 return REG_ESPACE;
750 dest->elems = new_buffer;
3b0bdc72
UD
751 }
752
753 for (si = 0, di = 0 ; si < src->nelem && di < dest->nelem ;)
754 {
755 int cp_from, ncp, mid, right, src_elem = src->elems[si];
756 /* Binary search the spot we will add the new element. */
757 right = dest->nelem;
758 while (di < right)
1b2c2628
UD
759 {
760 mid = (di + right) / 2;
761 if (dest->elems[mid] < src_elem)
762 di = mid + 1;
763 else
764 right = mid;
765 }
3b0bdc72 766 if (di >= dest->nelem)
1b2c2628 767 break;
3b0bdc72
UD
768
769 if (dest->elems[di] == src_elem)
1b2c2628
UD
770 {
771 /* Skip since, DEST already has the element. */
772 ++di;
773 ++si;
774 continue;
775 }
3b0bdc72
UD
776
777 /* Skip the src elements which are less than dest->elems[di]. */
778 cp_from = si;
779 while (si < src->nelem && src->elems[si] < dest->elems[di])
1b2c2628 780 ++si;
3b0bdc72
UD
781 /* Copy these src elements. */
782 ncp = si - cp_from;
783 memmove (dest->elems + di + ncp, dest->elems + di,
1b2c2628 784 sizeof (int) * (dest->nelem - di));
3b0bdc72 785 memcpy (dest->elems + di, src->elems + cp_from,
1b2c2628 786 sizeof (int) * ncp);
3b0bdc72
UD
787 /* Update counters. */
788 di += ncp;
789 dest->nelem += ncp;
790 }
791
792 /* Copy remaining src elements. */
793 if (si < src->nelem)
794 {
795 memcpy (dest->elems + di, src->elems + si,
1b2c2628 796 sizeof (int) * (src->nelem - si));
3b0bdc72
UD
797 dest->nelem += src->nelem - si;
798 }
799 return REG_NOERROR;
800}
801
802/* Insert the new element ELEM to the re_node_set* SET.
803 return 0 if SET already has ELEM,
804 return -1 if an error is occured, return 1 otherwise. */
805
806static int
807re_node_set_insert (set, elem)
808 re_node_set *set;
809 int elem;
810{
811 int idx, right, mid;
812 /* In case of the set is empty. */
813 if (set->elems == NULL || set->alloc == 0)
814 {
bc15410e 815 if (BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1))
1b2c2628 816 return 1;
3b0bdc72 817 else
1b2c2628 818 return -1;
3b0bdc72
UD
819 }
820
821 /* Binary search the spot we will add the new element. */
822 idx = 0;
823 right = set->nelem;
824 while (idx < right)
825 {
826 mid = (idx + right) / 2;
827 if (set->elems[mid] < elem)
1b2c2628 828 idx = mid + 1;
3b0bdc72 829 else
1b2c2628 830 right = mid;
3b0bdc72
UD
831 }
832
833 /* Realloc if we need. */
834 if (set->alloc < set->nelem + 1)
835 {
836 int *new_array;
837 set->alloc = set->alloc * 2;
838 new_array = re_malloc (int, set->alloc);
bc15410e 839 if (BE (new_array == NULL, 0))
1b2c2628 840 return -1;
3b0bdc72
UD
841 /* Copy the elements they are followed by the new element. */
842 if (idx > 0)
1b2c2628 843 memcpy (new_array, set->elems, sizeof (int) * (idx));
3b0bdc72
UD
844 /* Copy the elements which follows the new element. */
845 if (set->nelem - idx > 0)
1b2c2628 846 memcpy (new_array + idx + 1, set->elems + idx,
3b0bdc72 847 sizeof (int) * (set->nelem - idx));
612546c6 848 re_free (set->elems);
3b0bdc72
UD
849 set->elems = new_array;
850 }
851 else
852 {
853 /* Move the elements which follows the new element. */
854 if (set->nelem - idx > 0)
1b2c2628
UD
855 memmove (set->elems + idx + 1, set->elems + idx,
856 sizeof (int) * (set->nelem - idx));
3b0bdc72
UD
857 }
858 /* Insert the new element. */
859 set->elems[idx] = elem;
860 ++set->nelem;
861 return 1;
862}
863
864/* Compare two node sets SET1 and SET2.
865 return 1 if SET1 and SET2 are equivalent, retrun 0 otherwise. */
866
867static int
868re_node_set_compare (set1, set2)
869 const re_node_set *set1, *set2;
870{
871 int i;
872 if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
873 return 0;
874 for (i = 0 ; i < set1->nelem ; i++)
875 if (set1->elems[i] != set2->elems[i])
876 return 0;
877 return 1;
878}
879
0742e48e 880/* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */
3b0bdc72
UD
881
882static int
883re_node_set_contains (set, elem)
884 const re_node_set *set;
885 int elem;
886{
887 int idx, right, mid;
888 if (set->nelem <= 0)
889 return 0;
890
891 /* Binary search the element. */
892 idx = 0;
893 right = set->nelem - 1;
894 while (idx < right)
895 {
896 mid = (idx + right) / 2;
897 if (set->elems[mid] < elem)
1b2c2628 898 idx = mid + 1;
3b0bdc72 899 else
1b2c2628 900 right = mid;
3b0bdc72 901 }
0742e48e 902 return set->elems[idx] == elem ? idx + 1 : 0;
3b0bdc72
UD
903}
904
905static void
906re_node_set_remove_at (set, idx)
907 re_node_set *set;
908 int idx;
909{
910 if (idx < 0 || idx >= set->nelem)
911 return;
912 if (idx < set->nelem - 1)
913 memmove (set->elems + idx, set->elems + idx + 1,
1b2c2628 914 sizeof (int) * (set->nelem - idx - 1));
3b0bdc72
UD
915 --set->nelem;
916}
917\f
918
919/* Add the token TOKEN to dfa->nodes, and return the index of the token.
920 Or return -1, if an error will be occured. */
921
922static int
923re_dfa_add_node (dfa, token, mode)
924 re_dfa_t *dfa;
925 re_token_t token;
926 int mode;
927{
928 if (dfa->nodes_len >= dfa->nodes_alloc)
929 {
930 re_token_t *new_array;
931 dfa->nodes_alloc *= 2;
932 new_array = re_realloc (dfa->nodes, re_token_t, dfa->nodes_alloc);
bc15410e 933 if (BE (new_array == NULL, 0))
1b2c2628 934 return -1;
3b0bdc72 935 else
1b2c2628 936 dfa->nodes = new_array;
3b0bdc72 937 if (mode)
1b2c2628
UD
938 {
939 int *new_nexts;
940 re_node_set *new_edests, *new_eclosures, *new_inveclosures;
941
942 new_nexts = re_realloc (dfa->nexts, int, dfa->nodes_alloc);
943 new_edests = re_realloc (dfa->edests, re_node_set, dfa->nodes_alloc);
944 new_eclosures = re_realloc (dfa->eclosures, re_node_set,
945 dfa->nodes_alloc);
946 new_inveclosures = re_realloc (dfa->inveclosures, re_node_set,
947 dfa->nodes_alloc);
948 if (BE (new_nexts == NULL || new_edests == NULL
949 || new_eclosures == NULL || new_inveclosures == NULL, 0))
950 return -1;
951 dfa->nexts = new_nexts;
952 dfa->edests = new_edests;
953 dfa->eclosures = new_eclosures;
954 dfa->inveclosures = new_inveclosures;
955 }
3b0bdc72
UD
956 }
957 dfa->nodes[dfa->nodes_len] = token;
958 dfa->nodes[dfa->nodes_len].duplicated = 0;
485d775d 959 dfa->nodes[dfa->nodes_len].constraint = 0;
3b0bdc72
UD
960 return dfa->nodes_len++;
961}
962
963static unsigned int inline
964calc_state_hash (nodes, context)
965 const re_node_set *nodes;
966 unsigned int context;
967{
968 unsigned int hash = nodes->nelem + context;
969 int i;
970 for (i = 0 ; i < nodes->nelem ; i++)
971 hash += nodes->elems[i];
972 return hash;
973}
974
975/* Search for the state whose node_set is equivalent to NODES.
976 Return the pointer to the state, if we found it in the DFA.
a9388965
UD
977 Otherwise create the new one and return it. In case of an error
978 return NULL and set the error code in ERR.
979 Note: - We assume NULL as the invalid state, then it is possible that
1b2c2628
UD
980 return value is NULL and ERR is REG_NOERROR.
981 - We never return non-NULL value in case of any errors, it is for
982 optimization. */
a9388965
UD
983
984static re_dfastate_t*
985re_acquire_state (err, dfa, nodes)
986 reg_errcode_t *err;
3b0bdc72
UD
987 re_dfa_t *dfa;
988 const re_node_set *nodes;
989{
990 unsigned int hash;
a9388965 991 re_dfastate_t *new_state;
3b0bdc72
UD
992 struct re_state_table_entry *spot;
993 int i;
bc15410e 994 if (BE (nodes->nelem == 0, 0))
a9388965
UD
995 {
996 *err = REG_NOERROR;
997 return NULL;
998 }
3b0bdc72
UD
999 hash = calc_state_hash (nodes, 0);
1000 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1001
bc15410e 1002 for (i = 0 ; i < spot->num ; i++)
3b0bdc72 1003 {
bc15410e
UD
1004 re_dfastate_t *state = spot->array[i];
1005 if (hash != state->hash)
1b2c2628 1006 continue;
bc15410e 1007 if (re_node_set_compare (&state->nodes, nodes))
1b2c2628 1008 return state;
3b0bdc72 1009 }
3b0bdc72
UD
1010
1011 /* There are no appropriate state in the dfa, create the new one. */
a9388965 1012 new_state = create_ci_newstate (dfa, nodes, hash);
bc15410e 1013 if (BE (new_state != NULL, 1))
a9388965
UD
1014 return new_state;
1015 else
1016 {
1017 *err = REG_ESPACE;
1018 return NULL;
1019 }
3b0bdc72
UD
1020}
1021
1022/* Search for the state whose node_set is equivalent to NODES and
1023 whose context is equivalent to CONTEXT.
1024 Return the pointer to the state, if we found it in the DFA.
a9388965
UD
1025 Otherwise create the new one and return it. In case of an error
1026 return NULL and set the error code in ERR.
1027 Note: - We assume NULL as the invalid state, then it is possible that
1b2c2628
UD
1028 return value is NULL and ERR is REG_NOERROR.
1029 - We never return non-NULL value in case of any errors, it is for
1030 optimization. */
a9388965
UD
1031
1032static re_dfastate_t*
1033re_acquire_state_context (err, dfa, nodes, context)
1034 reg_errcode_t *err;
3b0bdc72
UD
1035 re_dfa_t *dfa;
1036 const re_node_set *nodes;
1037 unsigned int context;
1038{
1039 unsigned int hash;
a9388965 1040 re_dfastate_t *new_state;
3b0bdc72
UD
1041 struct re_state_table_entry *spot;
1042 int i;
1043 if (nodes->nelem == 0)
a9388965
UD
1044 {
1045 *err = REG_NOERROR;
1046 return NULL;
1047 }
3b0bdc72
UD
1048 hash = calc_state_hash (nodes, context);
1049 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1050
bc15410e 1051 for (i = 0 ; i < spot->num ; i++)
3b0bdc72 1052 {
bc15410e
UD
1053 re_dfastate_t *state = spot->array[i];
1054 if (hash != state->hash)
1b2c2628 1055 continue;
bc15410e 1056 if (re_node_set_compare (state->entrance_nodes, nodes)
1b2c2628
UD
1057 && state->context == context)
1058 return state;
3b0bdc72 1059 }
3b0bdc72 1060 /* There are no appropriate state in `dfa', create the new one. */
a9388965 1061 new_state = create_cd_newstate (dfa, nodes, context, hash);
bc15410e 1062 if (BE (new_state != NULL, 1))
a9388965
UD
1063 return new_state;
1064 else
1065 {
1066 *err = REG_ESPACE;
1067 return NULL;
1068 }
3b0bdc72
UD
1069}
1070
a9388965
UD
1071/* Allocate memory for DFA state and initialize common properties.
1072 Return the new state if succeeded, otherwise return NULL. */
1073
3b0bdc72
UD
1074static re_dfastate_t *
1075create_newstate_common (dfa, nodes, hash)
1076 re_dfa_t *dfa;
1077 const re_node_set *nodes;
1078 unsigned int hash;
1079{
1080 re_dfastate_t *newstate;
1b2c2628 1081 reg_errcode_t err;
3b0bdc72 1082 newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
bc15410e 1083 if (BE (newstate == NULL, 0))
a9388965 1084 return NULL;
1b2c2628
UD
1085 err = re_node_set_init_copy (&newstate->nodes, nodes);
1086 if (BE (err != REG_NOERROR, 0))
1087 {
1088 re_free (newstate);
1089 return NULL;
1090 }
3b0bdc72
UD
1091 newstate->trtable = NULL;
1092 newstate->trtable_search = NULL;
1093 newstate->hash = hash;
1094 return newstate;
1095}
1096
a9388965
UD
1097/* Store the new state NEWSTATE whose hash value is HASH in appropriate
1098 position. Return value indicate the error code if failed. */
1099
1100static reg_errcode_t
3b0bdc72
UD
1101register_state (dfa, newstate, hash)
1102 re_dfa_t *dfa;
1103 re_dfastate_t *newstate;
1104 unsigned int hash;
1105{
1106 struct re_state_table_entry *spot;
1107 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1108
1109 if (spot->alloc <= spot->num)
1110 {
1b2c2628 1111 re_dfastate_t **new_array;
bc15410e 1112 spot->alloc = 2 * spot->num + 2;
1b2c2628
UD
1113 new_array = re_realloc (spot->array, re_dfastate_t *, spot->alloc);
1114 if (BE (new_array == NULL, 0))
1115 return REG_ESPACE;
1116 spot->array = new_array;
3b0bdc72 1117 }
bc15410e 1118 spot->array[spot->num++] = newstate;
a9388965 1119 return REG_NOERROR;
3b0bdc72
UD
1120}
1121
a9388965
UD
1122/* Create the new state which is independ of contexts.
1123 Return the new state if succeeded, otherwise return NULL. */
1124
3b0bdc72
UD
1125static re_dfastate_t *
1126create_ci_newstate (dfa, nodes, hash)
1127 re_dfa_t *dfa;
1128 const re_node_set *nodes;
1129 unsigned int hash;
1130{
1131 int i;
a9388965 1132 reg_errcode_t err;
3b0bdc72
UD
1133 re_dfastate_t *newstate;
1134 newstate = create_newstate_common (dfa, nodes, hash);
bc15410e 1135 if (BE (newstate == NULL, 0))
a9388965 1136 return NULL;
3b0bdc72
UD
1137 newstate->entrance_nodes = &newstate->nodes;
1138
1139 for (i = 0 ; i < nodes->nelem ; i++)
1140 {
1141 re_token_t *node = dfa->nodes + nodes->elems[i];
1142 re_token_type_t type = node->type;
485d775d 1143 if (type == CHARACTER && !node->constraint)
1b2c2628 1144 continue;
3b0bdc72
UD
1145
1146 /* If the state has the halt node, the state is a halt state. */
1147 else if (type == END_OF_RE)
1b2c2628 1148 newstate->halt = 1;
c0a0f9a3 1149#ifdef RE_ENABLE_I18N
3b0bdc72 1150 else if (type == COMPLEX_BRACKET
1b2c2628
UD
1151 || (type == OP_PERIOD && MB_CUR_MAX > 1))
1152 newstate->accept_mb = 1;
c0a0f9a3 1153#endif /* RE_ENABLE_I18N */
3b0bdc72 1154 else if (type == OP_BACK_REF)
1b2c2628 1155 newstate->has_backref = 1;
485d775d 1156 else if (type == ANCHOR || node->constraint)
1b2c2628 1157 newstate->has_constraint = 1;
3b0bdc72 1158 }
a9388965 1159 err = register_state (dfa, newstate, hash);
1b2c2628
UD
1160 if (BE (err != REG_NOERROR, 0))
1161 {
1162 free_state (newstate);
1163 newstate = NULL;
1164 }
1165 return newstate;
3b0bdc72
UD
1166}
1167
a9388965
UD
1168/* Create the new state which is depend on the context CONTEXT.
1169 Return the new state if succeeded, otherwise return NULL. */
1170
3b0bdc72
UD
1171static re_dfastate_t *
1172create_cd_newstate (dfa, nodes, context, hash)
1173 re_dfa_t *dfa;
1174 const re_node_set *nodes;
1175 unsigned int context, hash;
1176{
1177 int i, nctx_nodes = 0;
a9388965 1178 reg_errcode_t err;
3b0bdc72
UD
1179 re_dfastate_t *newstate;
1180
1181 newstate = create_newstate_common (dfa, nodes, hash);
bc15410e 1182 if (BE (newstate == NULL, 0))
a9388965 1183 return NULL;
3b0bdc72
UD
1184 newstate->context = context;
1185 newstate->entrance_nodes = &newstate->nodes;
1186
1187 for (i = 0 ; i < nodes->nelem ; i++)
1188 {
1189 unsigned int constraint = 0;
1190 re_token_t *node = dfa->nodes + nodes->elems[i];
1191 re_token_type_t type = node->type;
485d775d 1192 if (node->constraint)
1b2c2628 1193 constraint = node->constraint;
3b0bdc72 1194
485d775d 1195 if (type == CHARACTER && !constraint)
1b2c2628 1196 continue;
3b0bdc72
UD
1197 /* If the state has the halt node, the state is a halt state. */
1198 else if (type == END_OF_RE)
1b2c2628 1199 newstate->halt = 1;
c0a0f9a3 1200#ifdef RE_ENABLE_I18N
3b0bdc72 1201 else if (type == COMPLEX_BRACKET
1b2c2628
UD
1202 || (type == OP_PERIOD && MB_CUR_MAX > 1))
1203 newstate->accept_mb = 1;
c0a0f9a3 1204#endif /* RE_ENABLE_I18N */
3b0bdc72 1205 else if (type == OP_BACK_REF)
1b2c2628 1206 newstate->has_backref = 1;
3b0bdc72 1207 else if (type == ANCHOR)
1b2c2628 1208 constraint = node->opr.ctx_type;
3b0bdc72
UD
1209
1210 if (constraint)
1b2c2628
UD
1211 {
1212 if (newstate->entrance_nodes == &newstate->nodes)
1213 {
1214 newstate->entrance_nodes = re_malloc (re_node_set, 1);
bc15410e 1215 if (BE (newstate->entrance_nodes == NULL, 0))
1b2c2628
UD
1216 {
1217 free_state (newstate);
1218 return NULL;
1219 }
1220 re_node_set_init_copy (newstate->entrance_nodes, nodes);
1221 nctx_nodes = 0;
1222 newstate->has_constraint = 1;
1223 }
1224
1225 if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
1226 {
1227 re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
1228 ++nctx_nodes;
1229 }
1230 }
3b0bdc72 1231 }
a9388965 1232 err = register_state (dfa, newstate, hash);
1b2c2628
UD
1233 if (BE (err != REG_NOERROR, 0))
1234 {
1235 free_state (newstate);
1236 newstate = NULL;
1237 }
1238 return newstate;
1239}
1240
1241static void
1242free_state (state)
1243 re_dfastate_t *state;
1244{
1245 if (state->entrance_nodes != &state->nodes)
1246 {
1247 re_node_set_free (state->entrance_nodes);
1248 re_free (state->entrance_nodes);
1249 }
1250 re_node_set_free (&state->nodes);
1251 re_free (state->trtable);
1252 re_free (state->trtable_search);
1253 re_free (state);
3b0bdc72 1254}