]> git.ipfire.org Git - thirdparty/glibc.git/blame - posix/regex_internal.c
Update.
[thirdparty/glibc.git] / posix / regex_internal.c
CommitLineData
3b0bdc72
UD
1/* Extended regular expression matching and search library.
2 Copyright (C) 2002 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA. */
20
21#include <assert.h>
22#include <ctype.h>
23#include <limits.h>
24#include <stdio.h>
25#include <stdlib.h>
26#include <string.h>
27#include <wchar.h>
28#include <wctype.h>
29
30#ifdef _LIBC
31# ifndef _RE_DEFINE_LOCALE_FUNCTIONS
32# define _RE_DEFINE_LOCALE_FUNCTIONS 1
33# include <locale/localeinfo.h>
34# include <locale/elem-hash.h>
35# include <locale/coll-lookup.h>
36# endif
37#endif
38
39/* This is for other GNU distributions with internationalized messages. */
40#if HAVE_LIBINTL_H || defined _LIBC
41# include <libintl.h>
42# ifdef _LIBC
43# undef gettext
71319b9c
UD
44# define gettext(msgid) \
45 INTUSE(__dcgettext) (_libc_intl_domainname_internal, msgid, LC_MESSAGES)
3b0bdc72
UD
46# endif
47#else
48# define gettext(msgid) (msgid)
49#endif
50
51#ifndef gettext_noop
52/* This define is so xgettext can find the internationalizable
53 strings. */
54# define gettext_noop(String) String
55#endif
56
57#include "regex.h"
58#include "regex_internal.h"
59
60static void re_string_construct_common (const unsigned char *str,
612546c6
UD
61 int len, re_string_t *pstr,
62 RE_TRANSLATE_TYPE trans, int icase);
434d3784 63#ifdef RE_ENABLE_I18N
612546c6 64static int re_string_skip_chars (re_string_t *pstr, int new_raw_idx);
434d3784 65#endif /* RE_ENABLE_I18N */
3b0bdc72
UD
66static re_dfastate_t *create_newstate_common (re_dfa_t *dfa,
67 const re_node_set *nodes,
68 unsigned int hash);
a9388965
UD
69static reg_errcode_t register_state (re_dfa_t *dfa, re_dfastate_t *newstate,
70 unsigned int hash);
3b0bdc72
UD
71static re_dfastate_t *create_ci_newstate (re_dfa_t *dfa,
72 const re_node_set *nodes,
73 unsigned int hash);
74static re_dfastate_t *create_cd_newstate (re_dfa_t *dfa,
75 const re_node_set *nodes,
76 unsigned int context,
77 unsigned int hash);
78static unsigned int inline calc_state_hash (const re_node_set *nodes,
79 unsigned int context);
80\f
81/* Functions for string operation. */
82
612546c6
UD
83/* This function allocate the buffers. It is necessary to call
84 re_string_reconstruct before using the object. */
85
3b0bdc72 86static reg_errcode_t
612546c6 87re_string_allocate (pstr, str, len, init_len, trans, icase)
3b0bdc72
UD
88 re_string_t *pstr;
89 const unsigned char *str;
612546c6 90 int len, init_len, icase;
3b0bdc72
UD
91 RE_TRANSLATE_TYPE trans;
92{
93 reg_errcode_t ret;
612546c6
UD
94 int init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
95 re_string_construct_common (str, len, pstr, trans, icase);
96
97 ret = re_string_realloc_buffers (pstr, init_buf_len);
98 if (BE (ret != REG_NOERROR, 0))
99 return ret;
100
101 pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case
102 : (unsigned char *)str);
103 pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case;
104 pstr->valid_len = (MBS_CASE_ALLOCATED (pstr) || MBS_ALLOCATED (pstr)
105 || MB_CUR_MAX > 1) ? pstr->valid_len : len;
106 return REG_NOERROR;
107}
108
109/* This function allocate the buffers, and initialize them. */
110
111static reg_errcode_t
112re_string_construct (pstr, str, len, trans, icase)
113 re_string_t *pstr;
114 const unsigned char *str;
115 int len, icase;
116 RE_TRANSLATE_TYPE trans;
117{
118 reg_errcode_t ret;
119 re_string_construct_common (str, len, pstr, trans, icase);
120 /* Set 0 so that this function can initialize whole buffers. */
121 pstr->valid_len = 0;
122
123 if (len > 0)
3b0bdc72 124 {
612546c6 125 ret = re_string_realloc_buffers (pstr, len + 1);
bc15410e 126 if (BE (ret != REG_NOERROR, 0))
3b0bdc72
UD
127 return ret;
128 }
612546c6
UD
129 pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case
130 : (unsigned char *)str);
131 pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case;
132
133 if (icase)
134 {
135#ifdef RE_ENABLE_I18N
136 if (MB_CUR_MAX > 1)
137 build_wcs_upper_buffer (pstr);
138 else
3b0bdc72 139#endif /* RE_ENABLE_I18N */
434d3784 140 build_upper_buffer (pstr);
612546c6
UD
141 }
142 else
3b0bdc72 143 {
612546c6
UD
144#ifdef RE_ENABLE_I18N
145 if (MB_CUR_MAX > 1)
146 build_wcs_buffer (pstr);
147 else
148#endif /* RE_ENABLE_I18N */
149 {
150 if (trans != NULL)
151 re_string_translate_buffer (pstr);
152 else
153 pstr->valid_len = len;
154 }
3b0bdc72 155 }
612546c6
UD
156
157 /* Initialized whole buffers, then valid_len == bufs_len. */
158 pstr->valid_len = pstr->bufs_len;
3b0bdc72
UD
159 return REG_NOERROR;
160}
161
612546c6 162/* Helper functions for re_string_allocate, and re_string_construct. */
3b0bdc72
UD
163
164static reg_errcode_t
612546c6 165re_string_realloc_buffers (pstr, new_buf_len)
3b0bdc72 166 re_string_t *pstr;
612546c6 167 int new_buf_len;
3b0bdc72 168{
3b0bdc72 169#ifdef RE_ENABLE_I18N
612546c6 170 if (MB_CUR_MAX > 1)
3b0bdc72 171 {
c0a0f9a3 172 pstr->wcs = re_realloc (pstr->wcs, wint_t, new_buf_len);
612546c6
UD
173 if (BE (pstr->wcs == NULL, 0))
174 return REG_ESPACE;
3b0bdc72 175 }
3b0bdc72 176#endif /* RE_ENABLE_I18N */
612546c6 177 if (MBS_ALLOCATED (pstr))
3b0bdc72 178 {
612546c6
UD
179 pstr->mbs = re_realloc (pstr->mbs, unsigned char, new_buf_len);
180 if (BE (pstr->mbs == NULL, 0))
181 return REG_ESPACE;
3b0bdc72 182 }
612546c6 183 if (MBS_CASE_ALLOCATED (pstr))
3b0bdc72 184 {
612546c6
UD
185 pstr->mbs_case = re_realloc (pstr->mbs_case, unsigned char, new_buf_len);
186 if (BE (pstr->mbs_case == NULL, 0))
187 return REG_ESPACE;
188 if (!MBS_ALLOCATED (pstr))
189 pstr->mbs = pstr->mbs_case;
3b0bdc72 190 }
612546c6 191 pstr->bufs_len = new_buf_len;
3b0bdc72
UD
192 return REG_NOERROR;
193}
194
612546c6 195
3b0bdc72 196static void
612546c6 197re_string_construct_common (str, len, pstr, trans, icase)
3b0bdc72
UD
198 const unsigned char *str;
199 int len;
200 re_string_t *pstr;
612546c6
UD
201 RE_TRANSLATE_TYPE trans;
202 int icase;
3b0bdc72 203{
612546c6
UD
204 memset (pstr, '\0', sizeof (re_string_t));
205 pstr->raw_mbs = str;
3b0bdc72 206 pstr->len = len;
612546c6
UD
207 pstr->trans = trans;
208 pstr->icase = icase ? 1 : 0;
3b0bdc72
UD
209}
210
211#ifdef RE_ENABLE_I18N
212
612546c6 213/* Build wide character buffer PSTR->WCS.
3b0bdc72
UD
214 If the byte sequence of the string are:
215 <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
216 Then wide character buffer will be:
217 <wc1> , WEOF , <wc2> , WEOF , <wc3>
218 We use WEOF for padding, they indicate that the position isn't
612546c6 219 a first byte of a multibyte character.
3b0bdc72 220
612546c6
UD
221 Note that this function assumes PSTR->VALID_LEN elements are already
222 built and starts from PSTR->VALID_LEN. */
223
224static void
3b0bdc72
UD
225build_wcs_buffer (pstr)
226 re_string_t *pstr;
227{
612546c6
UD
228 mbstate_t prev_st;
229 int byte_idx, end_idx, mbclen, remain_len;
230 /* Build the buffers from pstr->valid_len to either pstr->len or
231 pstr->bufs_len. */
232 end_idx = (pstr->bufs_len > pstr->len)? pstr->len : pstr->bufs_len;
233 for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
234 {
235 wchar_t wc;
236 remain_len = end_idx - byte_idx;
237 prev_st = pstr->cur_state;
238 mbclen = mbrtowc (&wc, pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx,
239 remain_len, &pstr->cur_state);
240 if (BE (mbclen == (size_t) -2, 0))
241 {
242 /* The buffer doesn't have enough space, finish to build. */
243 pstr->cur_state = prev_st;
244 break;
245 }
246 else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))
3b0bdc72 247 {
612546c6 248 /* We treat these cases as a singlebyte character. */
3b0bdc72 249 mbclen = 1;
612546c6
UD
250 wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
251 pstr->cur_state = prev_st;
252 }
253
254 /* Apply the translateion if we need. */
255 if (pstr->trans != NULL && mbclen == 1)
256 {
257 int ch = pstr->trans[pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]];
258 pstr->mbs_case[byte_idx] = ch;
3b0bdc72
UD
259 }
260 /* Write wide character and padding. */
612546c6
UD
261 pstr->wcs[byte_idx++] = wc;
262 /* Write paddings. */
263 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
264 pstr->wcs[byte_idx++] = WEOF;
3b0bdc72 265 }
612546c6 266 pstr->valid_len = byte_idx;
3b0bdc72
UD
267}
268
612546c6
UD
269/* Build wide character buffer PSTR->WCS like build_wcs_buffer,
270 but for REG_ICASE. */
271
272static void
3b0bdc72
UD
273build_wcs_upper_buffer (pstr)
274 re_string_t *pstr;
275{
612546c6
UD
276 mbstate_t prev_st;
277 int byte_idx, end_idx, mbclen, remain_len;
278 /* Build the buffers from pstr->valid_len to either pstr->len or
279 pstr->bufs_len. */
280 end_idx = (pstr->bufs_len > pstr->len)? pstr->len : pstr->bufs_len;
281 for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
282 {
283 wchar_t wc;
284 remain_len = end_idx - byte_idx;
285 prev_st = pstr->cur_state;
286 mbclen = mbrtowc (&wc, pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx,
287 remain_len, &pstr->cur_state);
288 if (BE (mbclen == (size_t) -2, 0))
3b0bdc72 289 {
612546c6
UD
290 /* The buffer doesn't have enough space, finish to build. */
291 pstr->cur_state = prev_st;
292 break;
3b0bdc72 293 }
612546c6 294 else if (mbclen == 1 || mbclen == (size_t) -1 || mbclen == 0)
3b0bdc72 295 {
612546c6
UD
296 /* In case of a singlebyte character. */
297 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
298 /* Apply the translateion if we need. */
299 if (pstr->trans != NULL && mbclen == 1)
300 {
301 ch = pstr->trans[ch];
302 pstr->mbs_case[byte_idx] = ch;
303 }
304 pstr->wcs[byte_idx] = iswlower (wc) ? toupper (wc) : wc;
305 pstr->mbs[byte_idx++] = islower (ch) ? toupper (ch) : ch;
306 if (BE (mbclen == (size_t) -1, 0))
307 pstr->cur_state = prev_st;
3b0bdc72
UD
308 }
309 else /* mbclen > 1 */
310 {
3b0bdc72 311 if (iswlower (wc))
612546c6 312 wcrtomb (pstr->mbs + byte_idx, towupper (wc), &prev_st);
3b0bdc72 313 else
612546c6
UD
314 memcpy (pstr->mbs + byte_idx,
315 pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
316 pstr->wcs[byte_idx++] = iswlower (wc) ? toupper (wc) : wc;
317 /* Write paddings. */
318 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
319 pstr->wcs[byte_idx++] = WEOF;
3b0bdc72
UD
320 }
321 }
612546c6
UD
322 pstr->valid_len = byte_idx;
323}
324
325/* Skip characters until the index becomes greater than NEW_RAW_IDX.
326 Return the index. */
327
328static int
329re_string_skip_chars (pstr, new_raw_idx)
330 re_string_t *pstr;
331 int new_raw_idx;
332{
333 mbstate_t prev_st;
334 int rawbuf_idx, mbclen;
335
336 /* Skip the characters which are not necessary to check. */
337 for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_len;
338 rawbuf_idx < new_raw_idx;)
339 {
340 int remain_len = pstr->len - rawbuf_idx;
341 prev_st = pstr->cur_state;
342 mbclen = mbrlen (pstr->raw_mbs + rawbuf_idx, remain_len,
343 &pstr->cur_state);
344 if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
345 {
346 /* We treat these cases as a singlebyte character. */
347 mbclen = 1;
348 pstr->cur_state = prev_st;
349 }
350 /* Then proceed the next character. */
351 rawbuf_idx += mbclen;
352 }
353 return rawbuf_idx;
3b0bdc72
UD
354}
355#endif /* RE_ENABLE_I18N */
356
612546c6
UD
357/* Build the buffer PSTR->MBS, and apply the translation if we need.
358 This function is used in case of REG_ICASE. */
359
360static void
3b0bdc72
UD
361build_upper_buffer (pstr)
362 re_string_t *pstr;
363{
612546c6
UD
364 int char_idx, end_idx;
365 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
3b0bdc72 366
612546c6 367 for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
3b0bdc72 368 {
612546c6
UD
369 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
370 if (pstr->trans != NULL)
371 {
372 ch = pstr->trans[ch];
373 pstr->mbs_case[char_idx] = ch;
374 }
375 if (islower (ch))
376 pstr->mbs[char_idx] = toupper (ch);
3b0bdc72 377 else
612546c6 378 pstr->mbs[char_idx] = ch;
3b0bdc72 379 }
612546c6 380 pstr->valid_len = char_idx;
3b0bdc72
UD
381}
382
612546c6 383/* Apply TRANS to the buffer in PSTR. */
3b0bdc72 384
612546c6
UD
385static void
386re_string_translate_buffer (pstr)
3b0bdc72 387 re_string_t *pstr;
3b0bdc72 388{
612546c6
UD
389 int buf_idx, end_idx;
390 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
391
392 for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
3b0bdc72 393 {
612546c6
UD
394 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
395 pstr->mbs_case[buf_idx] = pstr->trans[ch];
3b0bdc72 396 }
612546c6
UD
397
398 pstr->valid_len = buf_idx;
399}
400
401/* This function re-construct the buffers.
402 Concretely, convert to wide character in case of MB_CUR_MAX > 1,
403 convert to upper case in case of REG_ICASE, apply translation. */
404
405static reg_errcode_t
406re_string_reconstruct (pstr, idx, eflags, newline)
407 re_string_t *pstr;
408 int idx, eflags, newline;
409{
410 int offset = idx - pstr->raw_mbs_idx;
411 if (offset < 0)
412 {
413 /* Reset buffer. */
434d3784
UD
414#ifdef RE_ENABLE_I18N
415 if (MB_CUR_MAX > 1)
416 memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
417#endif /* RE_ENABLE_I18N */
612546c6
UD
418 pstr->valid_len = pstr->raw_mbs_idx = 0;
419 pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
420 : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
421 if (!MBS_CASE_ALLOCATED (pstr))
422 pstr->mbs_case = (unsigned char *)pstr->raw_mbs;
423 if (!MBS_ALLOCATED (pstr) && !MBS_CASE_ALLOCATED (pstr))
424 pstr->mbs = (unsigned char *)pstr->raw_mbs;
425 offset = idx;
426 }
427
428 if (offset != 0)
429 {
430 pstr->tip_context = re_string_context_at (pstr, offset - 1, eflags,
431 newline);
432 /* Are the characters which are already checked remain? */
433 if (offset < pstr->valid_len)
434 {
435 /* Yes, move them to the front of the buffer. */
3b0bdc72 436#ifdef RE_ENABLE_I18N
612546c6
UD
437 if (MB_CUR_MAX > 1)
438 memmove (pstr->wcs, pstr->wcs + offset,
c0a0f9a3 439 (pstr->valid_len - offset) * sizeof (wint_t));
612546c6
UD
440#endif /* RE_ENABLE_I18N */
441 if (MBS_ALLOCATED (pstr))
442 memmove (pstr->mbs, pstr->mbs + offset,
443 pstr->valid_len - offset);
444 if (MBS_CASE_ALLOCATED (pstr))
445 memmove (pstr->mbs_case, pstr->mbs_case + offset,
446 pstr->valid_len - offset);
447 pstr->valid_len -= offset;
448#if DEBUG
449 assert (pstr->valid_len > 0);
3b0bdc72 450#endif
612546c6
UD
451 }
452 else
3b0bdc72 453 {
612546c6
UD
454 /* No, skip all characters until IDX. */
455 pstr->valid_len = 0;
3b0bdc72 456#ifdef RE_ENABLE_I18N
612546c6
UD
457 if (MB_CUR_MAX > 1)
458 {
459 int wcs_idx;
460 pstr->valid_len = re_string_skip_chars (pstr, idx) - idx;
461 for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
462 pstr->wcs[wcs_idx] = WEOF;
463 }
464#endif /* RE_ENABLE_I18N */
465 }
466 if (!MBS_CASE_ALLOCATED (pstr))
467 {
468 pstr->mbs_case += offset;
469 /* In case of !MBS_ALLOCATED && !MBS_CASE_ALLOCATED. */
470 if (!MBS_ALLOCATED (pstr))
471 pstr->mbs += offset;
3b0bdc72
UD
472 }
473 }
612546c6
UD
474 pstr->raw_mbs_idx = idx;
475 pstr->len -= offset;
476
477 /* Then build the buffers. */
478#ifdef RE_ENABLE_I18N
479 if (MB_CUR_MAX > 1)
3b0bdc72 480 {
612546c6
UD
481 if (pstr->icase)
482 build_wcs_upper_buffer (pstr);
483 else
484 build_wcs_buffer (pstr);
3b0bdc72
UD
485 }
486 else
612546c6 487#endif /* RE_ENABLE_I18N */
3b0bdc72 488 {
612546c6
UD
489 if (pstr->icase)
490 build_upper_buffer (pstr);
491 else if (pstr->trans != NULL)
492 re_string_translate_buffer (pstr);
3b0bdc72 493 }
612546c6
UD
494 pstr->cur_idx = 0;
495
3b0bdc72
UD
496 return REG_NOERROR;
497}
498
499static void
500re_string_destruct (pstr)
501 re_string_t *pstr;
502{
503#ifdef RE_ENABLE_I18N
504 re_free (pstr->wcs);
505#endif /* RE_ENABLE_I18N */
612546c6
UD
506 if (MBS_ALLOCATED (pstr))
507 re_free (pstr->mbs);
508 if (MBS_CASE_ALLOCATED (pstr))
509 re_free (pstr->mbs_case);
3b0bdc72
UD
510}
511
512/* Return the context at IDX in INPUT. */
612546c6 513
3b0bdc72
UD
514static unsigned int
515re_string_context_at (input, idx, eflags, newline_anchor)
516 const re_string_t *input;
517 int idx, eflags, newline_anchor;
518{
519 int c;
520 if (idx < 0 || idx == input->len)
521 {
3b0bdc72 522 if (idx < 0)
612546c6
UD
523 /* In this case, we use the value stored in input->tip_context,
524 since we can't know the character in input->mbs[-1] here. */
525 return input->tip_context;
bc15410e 526 else /* (idx == input->len) */
612546c6
UD
527 return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
528 : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
3b0bdc72
UD
529 }
530 c = re_string_byte_at (input, idx);
531 if (IS_WORD_CHAR (c))
532 return CONTEXT_WORD;
533 return (newline_anchor && IS_NEWLINE (c)) ? CONTEXT_NEWLINE : 0;
534}
535\f
536/* Functions for set operation. */
537
538static reg_errcode_t
539re_node_set_alloc (set, size)
540 re_node_set *set;
541 int size;
542{
543 set->alloc = size;
544 set->nelem = 0;
545 set->elems = re_malloc (int, size);
bc15410e 546 if (BE (set->elems == NULL, 0))
3b0bdc72
UD
547 return REG_ESPACE;
548 return REG_NOERROR;
549}
550
551static reg_errcode_t
552re_node_set_init_1 (set, elem)
553 re_node_set *set;
554 int elem;
555{
556 set->alloc = 1;
557 set->nelem = 1;
558 set->elems = re_malloc (int, 1);
bc15410e 559 if (BE (set->elems == NULL, 0))
3b0bdc72
UD
560 return REG_ESPACE;
561 set->elems[0] = elem;
562 return REG_NOERROR;
563}
564
565static reg_errcode_t
566re_node_set_init_2 (set, elem1, elem2)
567 re_node_set *set;
568 int elem1, elem2;
569{
570 set->alloc = 2;
571 set->elems = re_malloc (int, 2);
bc15410e 572 if (BE (set->elems == NULL, 0))
3b0bdc72
UD
573 return REG_ESPACE;
574 if (elem1 == elem2)
575 {
576 set->nelem = 1;
577 set->elems[0] = elem1;
578 }
579 else
580 {
581 set->nelem = 2;
582 if (elem1 < elem2)
583 {
584 set->elems[0] = elem1;
585 set->elems[1] = elem2;
586 }
587 else
588 {
589 set->elems[0] = elem2;
590 set->elems[1] = elem1;
591 }
592 }
593 return REG_NOERROR;
594}
595
596static reg_errcode_t
597re_node_set_init_copy (dest, src)
598 re_node_set *dest;
599 const re_node_set *src;
600{
601 dest->nelem = src->nelem;
602 if (src->nelem > 0)
603 {
604 dest->alloc = dest->nelem;
605 dest->elems = re_malloc (int, dest->alloc);
bc15410e 606 if (BE (dest->elems == NULL, 0))
3b0bdc72
UD
607 return REG_ESPACE;
608 memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
609 }
610 else
611 re_node_set_init_empty (dest);
612 return REG_NOERROR;
613}
614
a9388965
UD
615/* Calculate the intersection of the sets SRC1 and SRC2. And store it in
616 DEST. Return value indicate the error code or REG_NOERROR if succeeded.
617 Note: We assume dest->elems is NULL, when dest->alloc is 0. */
618
3b0bdc72
UD
619static reg_errcode_t
620re_node_set_intersect (dest, src1, src2)
621 re_node_set *dest;
622 const re_node_set *src1, *src2;
623{
624 int i1, i2, id;
625 if (src1->nelem > 0 && src2->nelem > 0)
626 {
627 if (src1->nelem + src2->nelem > dest->alloc)
628 {
3b0bdc72 629 dest->alloc = src1->nelem + src2->nelem;
a9388965 630 dest->elems = re_realloc (dest->elems, int, dest->alloc);
bc15410e 631 if (BE (dest->elems == NULL, 0))
3b0bdc72 632 return REG_ESPACE;
3b0bdc72
UD
633 }
634 }
635 else
636 {
a9388965 637 /* The intersection of empty sets is also empty set. */
3b0bdc72
UD
638 dest->nelem = 0;
639 return REG_NOERROR;
640 }
641
a9388965 642 for (i1 = i2 = id = 0; i1 < src1->nelem && i2 < src2->nelem; )
3b0bdc72
UD
643 {
644 if (src1->elems[i1] > src2->elems[i2])
645 {
646 ++i2;
647 continue;
648 }
a9388965
UD
649 /* The intersection must have the elements which are in both of
650 SRC1 and SRC2. */
3b0bdc72
UD
651 if (src1->elems[i1] == src2->elems[i2])
652 dest->elems[id++] = src2->elems[i2++];
653 ++i1;
654 }
655 dest->nelem = id;
656 return REG_NOERROR;
657}
658
a9388965
UD
659/* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
660 DEST. Return value indicate the error code or REG_NOERROR if succeeded.
661 Note: We assume dest->elems is NULL, when dest->alloc is 0. */
662
3b0bdc72
UD
663static reg_errcode_t
664re_node_set_add_intersect (dest, src1, src2)
665 re_node_set *dest;
666 const re_node_set *src1, *src2;
667{
668 int i1, i2, id;
669 if (src1->nelem > 0 && src2->nelem > 0)
670 {
671 if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
672 {
3b0bdc72 673 dest->alloc = src1->nelem + src2->nelem + dest->nelem;
a9388965 674 dest->elems = re_realloc (dest->elems, int, dest->alloc);
bc15410e 675 if (BE (dest->elems == NULL, 0))
3b0bdc72 676 return REG_ESPACE;
3b0bdc72
UD
677 }
678 }
679 else
680 return REG_NOERROR;
681
682 for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
683 {
684 if (src1->elems[i1] > src2->elems[i2])
685 {
686 ++i2;
687 continue;
688 }
689 if (src1->elems[i1] == src2->elems[i2])
690 {
691 while (id < dest->nelem && dest->elems[id] < src2->elems[i2])
692 ++id;
693 if (id < dest->nelem && dest->elems[id] == src2->elems[i2])
694 ++id;
695 else
696 {
697 memmove (dest->elems + id + 1, dest->elems + id,
698 sizeof (int) * (dest->nelem - id));
699 dest->elems[id++] = src2->elems[i2++];
700 ++dest->nelem;
701 }
702 }
703 ++i1;
704 }
705 return REG_NOERROR;
706}
707
a9388965
UD
708/* Calculate the union set of the sets SRC1 and SRC2. And store it to
709 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
710
3b0bdc72
UD
711static reg_errcode_t
712re_node_set_init_union (dest, src1, src2)
713 re_node_set *dest;
714 const re_node_set *src1, *src2;
715{
716 int i1, i2, id;
717 if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
718 {
719 dest->alloc = src1->nelem + src2->nelem;
720 dest->elems = re_malloc (int, dest->alloc);
bc15410e 721 if (BE (dest->elems == NULL, 0))
3b0bdc72
UD
722 return REG_ESPACE;
723 }
724 else
725 {
726 if (src1 != NULL && src1->nelem > 0)
727 return re_node_set_init_copy (dest, src1);
728 else if (src2 != NULL && src2->nelem > 0)
729 return re_node_set_init_copy (dest, src2);
730 else
731 re_node_set_init_empty (dest);
732 return REG_NOERROR;
733 }
734 for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
735 {
736 if (src1->elems[i1] > src2->elems[i2])
737 {
738 dest->elems[id++] = src2->elems[i2++];
739 continue;
740 }
741 if (src1->elems[i1] == src2->elems[i2])
742 ++i2;
743 dest->elems[id++] = src1->elems[i1++];
744 }
745 if (i1 < src1->nelem)
746 {
747 memcpy (dest->elems + id, src1->elems + i1,
748 (src1->nelem - i1) * sizeof (int));
749 id += src1->nelem - i1;
750 }
751 else if (i2 < src2->nelem)
752 {
753 memcpy (dest->elems + id, src2->elems + i2,
754 (src2->nelem - i2) * sizeof (int));
755 id += src2->nelem - i2;
756 }
757 dest->nelem = id;
758 return REG_NOERROR;
759}
760
a9388965
UD
761/* Calculate the union set of the sets DEST and SRC. And store it to
762 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
763
3b0bdc72
UD
764static reg_errcode_t
765re_node_set_merge (dest, src)
766 re_node_set *dest;
767 const re_node_set *src;
768{
769 int si, di;
770 if (src == NULL || src->nelem == 0)
771 return REG_NOERROR;
3b0bdc72
UD
772 if (dest->alloc < src->nelem + dest->nelem)
773 {
774 dest->alloc = 2 * (src->nelem + dest->alloc);
775 dest->elems = re_realloc (dest->elems, int, dest->alloc);
bc15410e 776 if (BE (dest->elems == NULL, 0))
a9388965 777 return REG_ESPACE;
3b0bdc72
UD
778 }
779
780 for (si = 0, di = 0 ; si < src->nelem && di < dest->nelem ;)
781 {
782 int cp_from, ncp, mid, right, src_elem = src->elems[si];
783 /* Binary search the spot we will add the new element. */
784 right = dest->nelem;
785 while (di < right)
786 {
787 mid = (di + right) / 2;
788 if (dest->elems[mid] < src_elem)
789 di = mid + 1;
790 else
791 right = mid;
792 }
793 if (di >= dest->nelem)
794 break;
795
796 if (dest->elems[di] == src_elem)
797 {
798 /* Skip since, DEST already has the element. */
799 ++di;
800 ++si;
801 continue;
802 }
803
804 /* Skip the src elements which are less than dest->elems[di]. */
805 cp_from = si;
806 while (si < src->nelem && src->elems[si] < dest->elems[di])
807 ++si;
808 /* Copy these src elements. */
809 ncp = si - cp_from;
810 memmove (dest->elems + di + ncp, dest->elems + di,
811 sizeof (int) * (dest->nelem - di));
812 memcpy (dest->elems + di, src->elems + cp_from,
813 sizeof (int) * ncp);
814 /* Update counters. */
815 di += ncp;
816 dest->nelem += ncp;
817 }
818
819 /* Copy remaining src elements. */
820 if (si < src->nelem)
821 {
822 memcpy (dest->elems + di, src->elems + si,
823 sizeof (int) * (src->nelem - si));
824 dest->nelem += src->nelem - si;
825 }
826 return REG_NOERROR;
827}
828
829/* Insert the new element ELEM to the re_node_set* SET.
830 return 0 if SET already has ELEM,
831 return -1 if an error is occured, return 1 otherwise. */
832
833static int
834re_node_set_insert (set, elem)
835 re_node_set *set;
836 int elem;
837{
838 int idx, right, mid;
839 /* In case of the set is empty. */
840 if (set->elems == NULL || set->alloc == 0)
841 {
bc15410e 842 if (BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1))
3b0bdc72
UD
843 return 1;
844 else
845 return -1;
846 }
847
848 /* Binary search the spot we will add the new element. */
849 idx = 0;
850 right = set->nelem;
851 while (idx < right)
852 {
853 mid = (idx + right) / 2;
854 if (set->elems[mid] < elem)
855 idx = mid + 1;
856 else
857 right = mid;
858 }
859
860 /* Realloc if we need. */
861 if (set->alloc < set->nelem + 1)
862 {
863 int *new_array;
864 set->alloc = set->alloc * 2;
865 new_array = re_malloc (int, set->alloc);
bc15410e 866 if (BE (new_array == NULL, 0))
3b0bdc72
UD
867 return -1;
868 /* Copy the elements they are followed by the new element. */
869 if (idx > 0)
870 memcpy (new_array, set->elems, sizeof (int) * (idx));
871 /* Copy the elements which follows the new element. */
872 if (set->nelem - idx > 0)
873 memcpy (new_array + idx + 1, set->elems + idx,
874 sizeof (int) * (set->nelem - idx));
612546c6 875 re_free (set->elems);
3b0bdc72
UD
876 set->elems = new_array;
877 }
878 else
879 {
880 /* Move the elements which follows the new element. */
881 if (set->nelem - idx > 0)
882 memmove (set->elems + idx + 1, set->elems + idx,
883 sizeof (int) * (set->nelem - idx));
884 }
885 /* Insert the new element. */
886 set->elems[idx] = elem;
887 ++set->nelem;
888 return 1;
889}
890
891/* Compare two node sets SET1 and SET2.
892 return 1 if SET1 and SET2 are equivalent, retrun 0 otherwise. */
893
894static int
895re_node_set_compare (set1, set2)
896 const re_node_set *set1, *set2;
897{
898 int i;
899 if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
900 return 0;
901 for (i = 0 ; i < set1->nelem ; i++)
902 if (set1->elems[i] != set2->elems[i])
903 return 0;
904 return 1;
905}
906
907/* Return 1 if SET contains the element ELEM, return 0 otherwise. */
908
909static int
910re_node_set_contains (set, elem)
911 const re_node_set *set;
912 int elem;
913{
914 int idx, right, mid;
915 if (set->nelem <= 0)
916 return 0;
917
918 /* Binary search the element. */
919 idx = 0;
920 right = set->nelem - 1;
921 while (idx < right)
922 {
923 mid = (idx + right) / 2;
924 if (set->elems[mid] < elem)
925 idx = mid + 1;
926 else
927 right = mid;
928 }
929 return set->elems[idx] == elem;
930}
931
932static void
933re_node_set_remove_at (set, idx)
934 re_node_set *set;
935 int idx;
936{
937 if (idx < 0 || idx >= set->nelem)
938 return;
939 if (idx < set->nelem - 1)
940 memmove (set->elems + idx, set->elems + idx + 1,
941 sizeof (int) * (set->nelem - idx - 1));
942 --set->nelem;
943}
944\f
945
946/* Add the token TOKEN to dfa->nodes, and return the index of the token.
947 Or return -1, if an error will be occured. */
948
949static int
950re_dfa_add_node (dfa, token, mode)
951 re_dfa_t *dfa;
952 re_token_t token;
953 int mode;
954{
955 if (dfa->nodes_len >= dfa->nodes_alloc)
956 {
957 re_token_t *new_array;
958 dfa->nodes_alloc *= 2;
959 new_array = re_realloc (dfa->nodes, re_token_t, dfa->nodes_alloc);
bc15410e 960 if (BE (new_array == NULL, 0))
3b0bdc72
UD
961 return -1;
962 else
963 dfa->nodes = new_array;
964 if (mode)
965 {
966 int *new_firsts, *new_nexts;
967 re_node_set *new_edests, *new_eclosures, *new_inveclosures;
968
969 new_firsts = re_realloc (dfa->firsts, int, dfa->nodes_alloc);
970 new_nexts = re_realloc (dfa->nexts, int, dfa->nodes_alloc);
971 new_edests = re_realloc (dfa->edests, re_node_set, dfa->nodes_alloc);
972 new_eclosures = re_realloc (dfa->eclosures, re_node_set,
973 dfa->nodes_alloc);
974 new_inveclosures = re_realloc (dfa->inveclosures, re_node_set,
975 dfa->nodes_alloc);
bc15410e
UD
976 if (BE (new_firsts == NULL || new_nexts == NULL || new_edests == NULL
977 || new_eclosures == NULL || new_inveclosures == NULL, 0))
3b0bdc72
UD
978 return -1;
979 dfa->firsts = new_firsts;
980 dfa->nexts = new_nexts;
981 dfa->edests = new_edests;
982 dfa->eclosures = new_eclosures;
983 dfa->inveclosures = new_inveclosures;
984 }
985 }
986 dfa->nodes[dfa->nodes_len] = token;
987 dfa->nodes[dfa->nodes_len].duplicated = 0;
988 return dfa->nodes_len++;
989}
990
991static unsigned int inline
992calc_state_hash (nodes, context)
993 const re_node_set *nodes;
994 unsigned int context;
995{
996 unsigned int hash = nodes->nelem + context;
997 int i;
998 for (i = 0 ; i < nodes->nelem ; i++)
999 hash += nodes->elems[i];
1000 return hash;
1001}
1002
1003/* Search for the state whose node_set is equivalent to NODES.
1004 Return the pointer to the state, if we found it in the DFA.
a9388965
UD
1005 Otherwise create the new one and return it. In case of an error
1006 return NULL and set the error code in ERR.
1007 Note: - We assume NULL as the invalid state, then it is possible that
1008 return value is NULL and ERR is REG_NOERROR.
1009 - We never return non-NULL value in case of any errors, it is for
1010 optimization. */
1011
1012static re_dfastate_t*
1013re_acquire_state (err, dfa, nodes)
1014 reg_errcode_t *err;
3b0bdc72
UD
1015 re_dfa_t *dfa;
1016 const re_node_set *nodes;
1017{
1018 unsigned int hash;
a9388965 1019 re_dfastate_t *new_state;
3b0bdc72
UD
1020 struct re_state_table_entry *spot;
1021 int i;
bc15410e 1022 if (BE (nodes->nelem == 0, 0))
a9388965
UD
1023 {
1024 *err = REG_NOERROR;
1025 return NULL;
1026 }
3b0bdc72
UD
1027 hash = calc_state_hash (nodes, 0);
1028 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1029
bc15410e 1030 for (i = 0 ; i < spot->num ; i++)
3b0bdc72 1031 {
bc15410e
UD
1032 re_dfastate_t *state = spot->array[i];
1033 if (hash != state->hash)
1034 continue;
1035 if (re_node_set_compare (&state->nodes, nodes))
1036 return state;
3b0bdc72 1037 }
3b0bdc72
UD
1038
1039 /* There are no appropriate state in the dfa, create the new one. */
a9388965 1040 new_state = create_ci_newstate (dfa, nodes, hash);
bc15410e 1041 if (BE (new_state != NULL, 1))
a9388965
UD
1042 return new_state;
1043 else
1044 {
1045 *err = REG_ESPACE;
1046 return NULL;
1047 }
3b0bdc72
UD
1048}
1049
1050/* Search for the state whose node_set is equivalent to NODES and
1051 whose context is equivalent to CONTEXT.
1052 Return the pointer to the state, if we found it in the DFA.
a9388965
UD
1053 Otherwise create the new one and return it. In case of an error
1054 return NULL and set the error code in ERR.
1055 Note: - We assume NULL as the invalid state, then it is possible that
1056 return value is NULL and ERR is REG_NOERROR.
1057 - We never return non-NULL value in case of any errors, it is for
1058 optimization. */
1059
1060static re_dfastate_t*
1061re_acquire_state_context (err, dfa, nodes, context)
1062 reg_errcode_t *err;
3b0bdc72
UD
1063 re_dfa_t *dfa;
1064 const re_node_set *nodes;
1065 unsigned int context;
1066{
1067 unsigned int hash;
a9388965 1068 re_dfastate_t *new_state;
3b0bdc72
UD
1069 struct re_state_table_entry *spot;
1070 int i;
1071 if (nodes->nelem == 0)
a9388965
UD
1072 {
1073 *err = REG_NOERROR;
1074 return NULL;
1075 }
3b0bdc72
UD
1076 hash = calc_state_hash (nodes, context);
1077 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1078
bc15410e 1079 for (i = 0 ; i < spot->num ; i++)
3b0bdc72 1080 {
bc15410e
UD
1081 re_dfastate_t *state = spot->array[i];
1082 if (hash != state->hash)
1083 continue;
1084 if (re_node_set_compare (state->entrance_nodes, nodes)
1085 && state->context == context)
1086 return state;
3b0bdc72 1087 }
3b0bdc72 1088 /* There are no appropriate state in `dfa', create the new one. */
a9388965 1089 new_state = create_cd_newstate (dfa, nodes, context, hash);
bc15410e 1090 if (BE (new_state != NULL, 1))
a9388965
UD
1091 return new_state;
1092 else
1093 {
1094 *err = REG_ESPACE;
1095 return NULL;
1096 }
3b0bdc72
UD
1097}
1098
a9388965
UD
1099/* Allocate memory for DFA state and initialize common properties.
1100 Return the new state if succeeded, otherwise return NULL. */
1101
3b0bdc72
UD
1102static re_dfastate_t *
1103create_newstate_common (dfa, nodes, hash)
1104 re_dfa_t *dfa;
1105 const re_node_set *nodes;
1106 unsigned int hash;
1107{
1108 re_dfastate_t *newstate;
1109 newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
bc15410e 1110 if (BE (newstate == NULL, 0))
a9388965 1111 return NULL;
3b0bdc72
UD
1112 re_node_set_init_copy (&newstate->nodes, nodes);
1113 newstate->trtable = NULL;
1114 newstate->trtable_search = NULL;
1115 newstate->hash = hash;
1116 return newstate;
1117}
1118
a9388965
UD
1119/* Store the new state NEWSTATE whose hash value is HASH in appropriate
1120 position. Return value indicate the error code if failed. */
1121
1122static reg_errcode_t
3b0bdc72
UD
1123register_state (dfa, newstate, hash)
1124 re_dfa_t *dfa;
1125 re_dfastate_t *newstate;
1126 unsigned int hash;
1127{
1128 struct re_state_table_entry *spot;
1129 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1130
1131 if (spot->alloc <= spot->num)
1132 {
bc15410e
UD
1133 spot->alloc = 2 * spot->num + 2;
1134 spot->array = re_realloc (spot->array, re_dfastate_t *, spot->alloc);
1135 if (BE (spot->array == NULL, 0))
1136 return REG_ESPACE;
3b0bdc72 1137 }
bc15410e 1138 spot->array[spot->num++] = newstate;
a9388965 1139 return REG_NOERROR;
3b0bdc72
UD
1140}
1141
a9388965
UD
1142/* Create the new state which is independ of contexts.
1143 Return the new state if succeeded, otherwise return NULL. */
1144
3b0bdc72
UD
1145static re_dfastate_t *
1146create_ci_newstate (dfa, nodes, hash)
1147 re_dfa_t *dfa;
1148 const re_node_set *nodes;
1149 unsigned int hash;
1150{
1151 int i;
a9388965 1152 reg_errcode_t err;
3b0bdc72
UD
1153 re_dfastate_t *newstate;
1154 newstate = create_newstate_common (dfa, nodes, hash);
bc15410e 1155 if (BE (newstate == NULL, 0))
a9388965 1156 return NULL;
3b0bdc72
UD
1157 newstate->entrance_nodes = &newstate->nodes;
1158
1159 for (i = 0 ; i < nodes->nelem ; i++)
1160 {
1161 re_token_t *node = dfa->nodes + nodes->elems[i];
1162 re_token_type_t type = node->type;
1163 if (type == CHARACTER)
1164 continue;
1165
1166 /* If the state has the halt node, the state is a halt state. */
1167 else if (type == END_OF_RE)
1168 newstate->halt = 1;
c0a0f9a3 1169#ifdef RE_ENABLE_I18N
3b0bdc72
UD
1170 else if (type == COMPLEX_BRACKET
1171 || (type == OP_PERIOD && MB_CUR_MAX > 1))
1172 newstate->accept_mb = 1;
c0a0f9a3 1173#endif /* RE_ENABLE_I18N */
3b0bdc72
UD
1174 else if (type == OP_BACK_REF)
1175 newstate->has_backref = 1;
1176 else if (type == ANCHOR || OP_CONTEXT_NODE)
1177 {
1178 newstate->has_constraint = 1;
1179 if (type == OP_CONTEXT_NODE
1180 && dfa->nodes[node->opr.ctx_info->entity].type == END_OF_RE)
1181 newstate->halt = 1;
1182 }
1183 }
a9388965
UD
1184 err = register_state (dfa, newstate, hash);
1185 return (err != REG_NOERROR) ? NULL : newstate;
3b0bdc72
UD
1186}
1187
a9388965
UD
1188/* Create the new state which is depend on the context CONTEXT.
1189 Return the new state if succeeded, otherwise return NULL. */
1190
3b0bdc72
UD
1191static re_dfastate_t *
1192create_cd_newstate (dfa, nodes, context, hash)
1193 re_dfa_t *dfa;
1194 const re_node_set *nodes;
1195 unsigned int context, hash;
1196{
1197 int i, nctx_nodes = 0;
a9388965 1198 reg_errcode_t err;
3b0bdc72
UD
1199 re_dfastate_t *newstate;
1200
1201 newstate = create_newstate_common (dfa, nodes, hash);
bc15410e 1202 if (BE (newstate == NULL, 0))
a9388965 1203 return NULL;
3b0bdc72
UD
1204 newstate->context = context;
1205 newstate->entrance_nodes = &newstate->nodes;
1206
1207 for (i = 0 ; i < nodes->nelem ; i++)
1208 {
1209 unsigned int constraint = 0;
1210 re_token_t *node = dfa->nodes + nodes->elems[i];
1211 re_token_type_t type = node->type;
1212 if (type == CHARACTER)
1213 continue;
1214
1215 /* If the state has the halt node, the state is a halt state. */
1216 else if (type == END_OF_RE)
1217 newstate->halt = 1;
c0a0f9a3 1218#ifdef RE_ENABLE_I18N
3b0bdc72
UD
1219 else if (type == COMPLEX_BRACKET
1220 || (type == OP_PERIOD && MB_CUR_MAX > 1))
1221 newstate->accept_mb = 1;
c0a0f9a3 1222#endif /* RE_ENABLE_I18N */
3b0bdc72
UD
1223 else if (type == OP_BACK_REF)
1224 newstate->has_backref = 1;
1225 else if (type == ANCHOR)
1226 constraint = node->opr.ctx_type;
1227 else if (type == OP_CONTEXT_NODE)
1228 {
1229 re_token_type_t ctype = dfa->nodes[node->opr.ctx_info->entity].type;
1230 constraint = node->constraint;
1231 if (ctype == END_OF_RE)
1232 newstate->halt = 1;
1233 else if (ctype == OP_BACK_REF)
1234 newstate->has_backref = 1;
c0a0f9a3 1235#ifdef RE_ENABLE_I18N
3b0bdc72
UD
1236 else if (ctype == COMPLEX_BRACKET
1237 || (type == OP_PERIOD && MB_CUR_MAX > 1))
1238 newstate->accept_mb = 1;
c0a0f9a3 1239#endif /* RE_ENABLE_I18N */
3b0bdc72
UD
1240 }
1241
1242 if (constraint)
1243 {
1244 if (newstate->entrance_nodes == &newstate->nodes)
1245 {
1246 newstate->entrance_nodes = re_malloc (re_node_set, 1);
bc15410e 1247 if (BE (newstate->entrance_nodes == NULL, 0))
3b0bdc72
UD
1248 return NULL;
1249 re_node_set_init_copy (newstate->entrance_nodes, nodes);
1250 nctx_nodes = 0;
1251 newstate->has_constraint = 1;
1252 }
1253
1254 if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
1255 {
1256 re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
1257 ++nctx_nodes;
1258 }
1259 }
1260 }
a9388965
UD
1261 err = register_state (dfa, newstate, hash);
1262 return (err != REG_NOERROR) ? NULL : newstate;
3b0bdc72 1263}