]>
git.ipfire.org Git - thirdparty/glibc.git/blob - posix/regex_internal.c
1 /* Extended regular expression matching and search library.
2 Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
21 static void re_string_construct_common (const char *str
, int len
,
23 RE_TRANSLATE_TYPE trans
, int icase
,
24 const re_dfa_t
*dfa
) internal_function
;
25 static re_dfastate_t
*create_ci_newstate (const re_dfa_t
*dfa
,
26 const re_node_set
*nodes
,
27 unsigned int hash
) internal_function
;
28 static re_dfastate_t
*create_cd_newstate (const re_dfa_t
*dfa
,
29 const re_node_set
*nodes
,
31 unsigned int hash
) internal_function
;
33 /* Functions for string operation. */
35 /* This function allocate the buffers. It is necessary to call
36 re_string_reconstruct before using the object. */
40 re_string_allocate (re_string_t
*pstr
, const char *str
, int len
, int init_len
,
41 RE_TRANSLATE_TYPE trans
, int icase
, const re_dfa_t
*dfa
)
46 /* Ensure at least one character fits into the buffers. */
47 if (init_len
< dfa
->mb_cur_max
)
48 init_len
= dfa
->mb_cur_max
;
49 init_buf_len
= (len
+ 1 < init_len
) ? len
+ 1: init_len
;
50 re_string_construct_common (str
, len
, pstr
, trans
, icase
, dfa
);
52 ret
= re_string_realloc_buffers (pstr
, init_buf_len
);
53 if (BE (ret
!= REG_NOERROR
, 0))
56 pstr
->word_char
= dfa
->word_char
;
57 pstr
->word_ops_used
= dfa
->word_ops_used
;
58 pstr
->mbs
= pstr
->mbs_allocated
? pstr
->mbs
: (unsigned char *) str
;
59 pstr
->valid_len
= (pstr
->mbs_allocated
|| dfa
->mb_cur_max
> 1) ? 0 : len
;
60 pstr
->valid_raw_len
= pstr
->valid_len
;
64 /* This function allocate the buffers, and initialize them. */
68 re_string_construct (re_string_t
*pstr
, const char *str
, int len
,
69 RE_TRANSLATE_TYPE trans
, int icase
, const re_dfa_t
*dfa
)
72 memset (pstr
, '\0', sizeof (re_string_t
));
73 re_string_construct_common (str
, len
, pstr
, trans
, icase
, dfa
);
77 ret
= re_string_realloc_buffers (pstr
, len
+ 1);
78 if (BE (ret
!= REG_NOERROR
, 0))
81 pstr
->mbs
= pstr
->mbs_allocated
? pstr
->mbs
: (unsigned char *) str
;
86 if (dfa
->mb_cur_max
> 1)
90 ret
= build_wcs_upper_buffer (pstr
);
91 if (BE (ret
!= REG_NOERROR
, 0))
93 if (pstr
->valid_raw_len
>= len
)
95 if (pstr
->bufs_len
> pstr
->valid_len
+ dfa
->mb_cur_max
)
97 ret
= re_string_realloc_buffers (pstr
, pstr
->bufs_len
* 2);
98 if (BE (ret
!= REG_NOERROR
, 0))
103 #endif /* RE_ENABLE_I18N */
104 build_upper_buffer (pstr
);
108 #ifdef RE_ENABLE_I18N
109 if (dfa
->mb_cur_max
> 1)
110 build_wcs_buffer (pstr
);
112 #endif /* RE_ENABLE_I18N */
115 re_string_translate_buffer (pstr
);
118 pstr
->valid_len
= pstr
->bufs_len
;
119 pstr
->valid_raw_len
= pstr
->bufs_len
;
127 /* Helper functions for re_string_allocate, and re_string_construct. */
130 re_string_realloc_buffers (re_string_t
*pstr
, int new_buf_len
)
132 #ifdef RE_ENABLE_I18N
133 if (pstr
->mb_cur_max
> 1)
135 wint_t *new_wcs
= re_realloc (pstr
->wcs
, wint_t, new_buf_len
);
136 if (BE (new_wcs
== NULL
, 0))
139 if (pstr
->offsets
!= NULL
)
141 int *new_offsets
= re_realloc (pstr
->offsets
, int, new_buf_len
);
142 if (BE (new_offsets
== NULL
, 0))
144 pstr
->offsets
= new_offsets
;
147 #endif /* RE_ENABLE_I18N */
148 if (pstr
->mbs_allocated
)
150 unsigned char *new_mbs
= re_realloc (pstr
->mbs
, unsigned char,
152 if (BE (new_mbs
== NULL
, 0))
156 pstr
->bufs_len
= new_buf_len
;
162 re_string_construct_common (const char *str
, int len
, re_string_t
*pstr
,
163 RE_TRANSLATE_TYPE trans
, int icase
,
166 pstr
->raw_mbs
= (const unsigned char *) str
;
170 pstr
->icase
= icase
? 1 : 0;
171 pstr
->mbs_allocated
= (trans
!= NULL
|| icase
);
172 pstr
->mb_cur_max
= dfa
->mb_cur_max
;
173 pstr
->is_utf8
= dfa
->is_utf8
;
174 pstr
->map_notascii
= dfa
->map_notascii
;
175 pstr
->stop
= pstr
->len
;
176 pstr
->raw_stop
= pstr
->stop
;
179 #ifdef RE_ENABLE_I18N
181 /* Build wide character buffer PSTR->WCS.
182 If the byte sequence of the string are:
183 <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
184 Then wide character buffer will be:
185 <wc1> , WEOF , <wc2> , WEOF , <wc3>
186 We use WEOF for padding, they indicate that the position isn't
187 a first byte of a multibyte character.
189 Note that this function assumes PSTR->VALID_LEN elements are already
190 built and starts from PSTR->VALID_LEN. */
193 build_wcs_buffer (re_string_t
*pstr
)
196 unsigned char buf
[MB_LEN_MAX
];
197 assert (MB_LEN_MAX
>= pstr
->mb_cur_max
);
199 unsigned char buf
[64];
202 int byte_idx
, end_idx
, remain_len
;
205 /* Build the buffers from pstr->valid_len to either pstr->len or
207 end_idx
= (pstr
->bufs_len
> pstr
->len
) ? pstr
->len
: pstr
->bufs_len
;
208 for (byte_idx
= pstr
->valid_len
; byte_idx
< end_idx
;)
213 remain_len
= end_idx
- byte_idx
;
214 prev_st
= pstr
->cur_state
;
215 /* Apply the translation if we need. */
216 if (BE (pstr
->trans
!= NULL
, 0))
220 for (i
= 0; i
< pstr
->mb_cur_max
&& i
< remain_len
; ++i
)
222 ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ byte_idx
+ i
];
223 buf
[i
] = pstr
->mbs
[byte_idx
+ i
] = pstr
->trans
[ch
];
225 p
= (const char *) buf
;
228 p
= (const char *) pstr
->raw_mbs
+ pstr
->raw_mbs_idx
+ byte_idx
;
229 mbclen
= mbrtowc (&wc
, p
, remain_len
, &pstr
->cur_state
);
230 if (BE (mbclen
== (size_t) -2, 0))
232 /* The buffer doesn't have enough space, finish to build. */
233 pstr
->cur_state
= prev_st
;
236 else if (BE (mbclen
== (size_t) -1 || mbclen
== 0, 0))
238 /* We treat these cases as a singlebyte character. */
240 wc
= (wchar_t) pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ byte_idx
];
241 if (BE (pstr
->trans
!= NULL
, 0))
242 wc
= pstr
->trans
[wc
];
243 pstr
->cur_state
= prev_st
;
246 /* Write wide character and padding. */
247 pstr
->wcs
[byte_idx
++] = wc
;
248 /* Write paddings. */
249 for (remain_len
= byte_idx
+ mbclen
- 1; byte_idx
< remain_len
;)
250 pstr
->wcs
[byte_idx
++] = WEOF
;
252 pstr
->valid_len
= byte_idx
;
253 pstr
->valid_raw_len
= byte_idx
;
256 /* Build wide character buffer PSTR->WCS like build_wcs_buffer,
257 but for REG_ICASE. */
260 build_wcs_upper_buffer (re_string_t
*pstr
)
263 int src_idx
, byte_idx
, end_idx
, remain_len
;
266 char buf
[MB_LEN_MAX
];
267 assert (MB_LEN_MAX
>= pstr
->mb_cur_max
);
272 byte_idx
= pstr
->valid_len
;
273 end_idx
= (pstr
->bufs_len
> pstr
->len
) ? pstr
->len
: pstr
->bufs_len
;
275 /* The following optimization assumes that ASCII characters can be
276 mapped to wide characters with a simple cast. */
277 if (! pstr
->map_notascii
&& pstr
->trans
== NULL
&& !pstr
->offsets_needed
)
279 while (byte_idx
< end_idx
)
283 if (isascii (pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ byte_idx
])
284 && mbsinit (&pstr
->cur_state
))
286 /* In case of a singlebyte character. */
288 = toupper (pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ byte_idx
]);
289 /* The next step uses the assumption that wchar_t is encoded
290 ASCII-safe: all ASCII values can be converted like this. */
291 pstr
->wcs
[byte_idx
] = (wchar_t) pstr
->mbs
[byte_idx
];
296 remain_len
= end_idx
- byte_idx
;
297 prev_st
= pstr
->cur_state
;
298 mbclen
= mbrtowc (&wc
,
299 ((const char *) pstr
->raw_mbs
+ pstr
->raw_mbs_idx
300 + byte_idx
), remain_len
, &pstr
->cur_state
);
301 if (BE (mbclen
+ 2 > 2, 1))
309 mbcdlen
= wcrtomb (buf
, wcu
, &prev_st
);
310 if (BE (mbclen
== mbcdlen
, 1))
311 memcpy (pstr
->mbs
+ byte_idx
, buf
, mbclen
);
319 memcpy (pstr
->mbs
+ byte_idx
,
320 pstr
->raw_mbs
+ pstr
->raw_mbs_idx
+ byte_idx
, mbclen
);
321 pstr
->wcs
[byte_idx
++] = wcu
;
322 /* Write paddings. */
323 for (remain_len
= byte_idx
+ mbclen
- 1; byte_idx
< remain_len
;)
324 pstr
->wcs
[byte_idx
++] = WEOF
;
326 else if (mbclen
== (size_t) -1 || mbclen
== 0)
328 /* It is an invalid character or '\0'. Just use the byte. */
329 int ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ byte_idx
];
330 pstr
->mbs
[byte_idx
] = ch
;
331 /* And also cast it to wide char. */
332 pstr
->wcs
[byte_idx
++] = (wchar_t) ch
;
333 if (BE (mbclen
== (size_t) -1, 0))
334 pstr
->cur_state
= prev_st
;
338 /* The buffer doesn't have enough space, finish to build. */
339 pstr
->cur_state
= prev_st
;
343 pstr
->valid_len
= byte_idx
;
344 pstr
->valid_raw_len
= byte_idx
;
348 for (src_idx
= pstr
->valid_raw_len
; byte_idx
< end_idx
;)
353 remain_len
= end_idx
- byte_idx
;
354 prev_st
= pstr
->cur_state
;
355 if (BE (pstr
->trans
!= NULL
, 0))
359 for (i
= 0; i
< pstr
->mb_cur_max
&& i
< remain_len
; ++i
)
361 ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ src_idx
+ i
];
362 buf
[i
] = pstr
->trans
[ch
];
364 p
= (const char *) buf
;
367 p
= (const char *) pstr
->raw_mbs
+ pstr
->raw_mbs_idx
+ src_idx
;
368 mbclen
= mbrtowc (&wc
, p
, remain_len
, &pstr
->cur_state
);
369 if (BE (mbclen
+ 2 > 2, 1))
377 mbcdlen
= wcrtomb ((char *) buf
, wcu
, &prev_st
);
378 if (BE (mbclen
== mbcdlen
, 1))
379 memcpy (pstr
->mbs
+ byte_idx
, buf
, mbclen
);
380 else if (mbcdlen
!= (size_t) -1)
384 if (byte_idx
+ mbcdlen
> pstr
->bufs_len
)
386 pstr
->cur_state
= prev_st
;
390 if (pstr
->offsets
== NULL
)
392 pstr
->offsets
= re_malloc (int, pstr
->bufs_len
);
394 if (pstr
->offsets
== NULL
)
397 if (!pstr
->offsets_needed
)
399 for (i
= 0; i
< (size_t) byte_idx
; ++i
)
400 pstr
->offsets
[i
] = i
;
401 pstr
->offsets_needed
= 1;
404 memcpy (pstr
->mbs
+ byte_idx
, buf
, mbcdlen
);
405 pstr
->wcs
[byte_idx
] = wcu
;
406 pstr
->offsets
[byte_idx
] = src_idx
;
407 for (i
= 1; i
< mbcdlen
; ++i
)
409 pstr
->offsets
[byte_idx
+ i
]
410 = src_idx
+ (i
< mbclen
? i
: mbclen
- 1);
411 pstr
->wcs
[byte_idx
+ i
] = WEOF
;
413 pstr
->len
+= mbcdlen
- mbclen
;
414 if (pstr
->raw_stop
> src_idx
)
415 pstr
->stop
+= mbcdlen
- mbclen
;
416 end_idx
= (pstr
->bufs_len
> pstr
->len
)
417 ? pstr
->len
: pstr
->bufs_len
;
423 memcpy (pstr
->mbs
+ byte_idx
, p
, mbclen
);
426 memcpy (pstr
->mbs
+ byte_idx
, p
, mbclen
);
428 if (BE (pstr
->offsets_needed
!= 0, 0))
431 for (i
= 0; i
< mbclen
; ++i
)
432 pstr
->offsets
[byte_idx
+ i
] = src_idx
+ i
;
436 pstr
->wcs
[byte_idx
++] = wcu
;
437 /* Write paddings. */
438 for (remain_len
= byte_idx
+ mbclen
- 1; byte_idx
< remain_len
;)
439 pstr
->wcs
[byte_idx
++] = WEOF
;
441 else if (mbclen
== (size_t) -1 || mbclen
== 0)
443 /* It is an invalid character or '\0'. Just use the byte. */
444 int ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ src_idx
];
446 if (BE (pstr
->trans
!= NULL
, 0))
447 ch
= pstr
->trans
[ch
];
448 pstr
->mbs
[byte_idx
] = ch
;
450 if (BE (pstr
->offsets_needed
!= 0, 0))
451 pstr
->offsets
[byte_idx
] = src_idx
;
454 /* And also cast it to wide char. */
455 pstr
->wcs
[byte_idx
++] = (wchar_t) ch
;
456 if (BE (mbclen
== (size_t) -1, 0))
457 pstr
->cur_state
= prev_st
;
461 /* The buffer doesn't have enough space, finish to build. */
462 pstr
->cur_state
= prev_st
;
466 pstr
->valid_len
= byte_idx
;
467 pstr
->valid_raw_len
= src_idx
;
471 /* Skip characters until the index becomes greater than NEW_RAW_IDX.
476 re_string_skip_chars (re_string_t
*pstr
, int new_raw_idx
, wint_t *last_wc
)
483 /* Skip the characters which are not necessary to check. */
484 for (rawbuf_idx
= pstr
->raw_mbs_idx
+ pstr
->valid_raw_len
;
485 rawbuf_idx
< new_raw_idx
;)
488 remain_len
= pstr
->len
- rawbuf_idx
;
489 prev_st
= pstr
->cur_state
;
490 mbclen
= mbrtowc (&wc
, (const char *) pstr
->raw_mbs
+ rawbuf_idx
,
491 remain_len
, &pstr
->cur_state
);
492 if (BE (mbclen
== (size_t) -2 || mbclen
== (size_t) -1 || mbclen
== 0, 0))
494 /* We treat these cases as a singlebyte character. */
496 pstr
->cur_state
= prev_st
;
498 /* Then proceed the next character. */
499 rawbuf_idx
+= mbclen
;
501 *last_wc
= (wint_t) wc
;
504 #endif /* RE_ENABLE_I18N */
506 /* Build the buffer PSTR->MBS, and apply the translation if we need.
507 This function is used in case of REG_ICASE. */
510 build_upper_buffer (re_string_t
*pstr
)
512 int char_idx
, end_idx
;
513 end_idx
= (pstr
->bufs_len
> pstr
->len
) ? pstr
->len
: pstr
->bufs_len
;
515 for (char_idx
= pstr
->valid_len
; char_idx
< end_idx
; ++char_idx
)
517 int ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ char_idx
];
518 if (BE (pstr
->trans
!= NULL
, 0))
519 ch
= pstr
->trans
[ch
];
521 pstr
->mbs
[char_idx
] = toupper (ch
);
523 pstr
->mbs
[char_idx
] = ch
;
525 pstr
->valid_len
= char_idx
;
526 pstr
->valid_raw_len
= char_idx
;
529 /* Apply TRANS to the buffer in PSTR. */
532 re_string_translate_buffer (re_string_t
*pstr
)
534 int buf_idx
, end_idx
;
535 end_idx
= (pstr
->bufs_len
> pstr
->len
) ? pstr
->len
: pstr
->bufs_len
;
537 for (buf_idx
= pstr
->valid_len
; buf_idx
< end_idx
; ++buf_idx
)
539 int ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ buf_idx
];
540 pstr
->mbs
[buf_idx
] = pstr
->trans
[ch
];
543 pstr
->valid_len
= buf_idx
;
544 pstr
->valid_raw_len
= buf_idx
;
547 /* This function re-construct the buffers.
548 Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
549 convert to upper case in case of REG_ICASE, apply translation. */
553 re_string_reconstruct (re_string_t
*pstr
, int idx
, int eflags
)
555 int offset
= idx
- pstr
->raw_mbs_idx
;
556 if (BE (offset
< 0, 0))
559 #ifdef RE_ENABLE_I18N
560 if (pstr
->mb_cur_max
> 1)
561 memset (&pstr
->cur_state
, '\0', sizeof (mbstate_t));
562 #endif /* RE_ENABLE_I18N */
563 pstr
->len
= pstr
->raw_len
;
564 pstr
->stop
= pstr
->raw_stop
;
566 pstr
->raw_mbs_idx
= 0;
567 pstr
->valid_raw_len
= 0;
568 pstr
->offsets_needed
= 0;
569 pstr
->tip_context
= ((eflags
& REG_NOTBOL
) ? CONTEXT_BEGBUF
570 : CONTEXT_NEWLINE
| CONTEXT_BEGBUF
);
571 if (!pstr
->mbs_allocated
)
572 pstr
->mbs
= (unsigned char *) pstr
->raw_mbs
;
576 if (BE (offset
!= 0, 1))
578 /* Are the characters which are already checked remain? */
579 if (BE (offset
< pstr
->valid_raw_len
, 1)
580 #ifdef RE_ENABLE_I18N
581 /* Handling this would enlarge the code too much.
582 Accept a slowdown in that case. */
583 && pstr
->offsets_needed
== 0
587 /* Yes, move them to the front of the buffer. */
588 pstr
->tip_context
= re_string_context_at (pstr
, offset
- 1, eflags
);
589 #ifdef RE_ENABLE_I18N
590 if (pstr
->mb_cur_max
> 1)
591 memmove (pstr
->wcs
, pstr
->wcs
+ offset
,
592 (pstr
->valid_len
- offset
) * sizeof (wint_t));
593 #endif /* RE_ENABLE_I18N */
594 if (BE (pstr
->mbs_allocated
, 0))
595 memmove (pstr
->mbs
, pstr
->mbs
+ offset
,
596 pstr
->valid_len
- offset
);
597 pstr
->valid_len
-= offset
;
598 pstr
->valid_raw_len
-= offset
;
600 assert (pstr
->valid_len
> 0);
605 /* No, skip all characters until IDX. */
606 #ifdef RE_ENABLE_I18N
607 if (BE (pstr
->offsets_needed
, 0))
609 pstr
->len
= pstr
->raw_len
- idx
+ offset
;
610 pstr
->stop
= pstr
->raw_stop
- idx
+ offset
;
611 pstr
->offsets_needed
= 0;
615 pstr
->valid_raw_len
= 0;
616 #ifdef RE_ENABLE_I18N
617 if (pstr
->mb_cur_max
> 1)
624 const unsigned char *raw
, *p
, *q
, *end
;
626 /* Special case UTF-8. Multi-byte chars start with any
627 byte other than 0x80 - 0xbf. */
628 raw
= pstr
->raw_mbs
+ pstr
->raw_mbs_idx
;
629 end
= raw
+ (offset
- pstr
->mb_cur_max
);
630 p
= raw
+ offset
- 1;
632 /* We know the wchar_t encoding is UCS4, so for the simple
633 case, ASCII characters, skip the conversion step. */
634 if (isascii (*p
) && BE (pstr
->trans
== NULL
, 1))
636 memset (&pstr
->cur_state
, '\0', sizeof (mbstate_t));
642 for (; p
>= end
; --p
)
643 if ((*p
& 0xc0) != 0x80)
647 int mlen
= raw
+ pstr
->len
- p
;
648 unsigned char buf
[6];
652 if (BE (pstr
->trans
!= NULL
, 0))
654 int i
= mlen
< 6 ? mlen
: 6;
656 buf
[i
] = pstr
->trans
[p
[i
]];
659 /* XXX Don't use mbrtowc, we know which conversion
660 to use (UTF-8 -> UCS4). */
661 memset (&cur_state
, 0, sizeof (cur_state
));
662 mbclen
= mbrtowc (&wc2
, (const char *) p
, mlen
,
664 if (raw
+ offset
- p
<= mbclen
665 && mbclen
< (size_t) -2)
667 memset (&pstr
->cur_state
, '\0',
669 pstr
->valid_len
= mbclen
- (raw
+ offset
- p
);
677 pstr
->valid_len
= re_string_skip_chars (pstr
, idx
, &wc
) - idx
;
678 if (BE (pstr
->valid_len
, 0))
680 for (wcs_idx
= 0; wcs_idx
< pstr
->valid_len
; ++wcs_idx
)
681 pstr
->wcs
[wcs_idx
] = WEOF
;
682 if (pstr
->mbs_allocated
)
683 memset (pstr
->mbs
, 255, pstr
->valid_len
);
685 pstr
->valid_raw_len
= pstr
->valid_len
;
686 pstr
->tip_context
= ((BE (pstr
->word_ops_used
!= 0, 0)
687 && IS_WIDE_WORD_CHAR (wc
))
689 : ((IS_WIDE_NEWLINE (wc
)
690 && pstr
->newline_anchor
)
691 ? CONTEXT_NEWLINE
: 0));
694 #endif /* RE_ENABLE_I18N */
696 int c
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ offset
- 1];
699 pstr
->tip_context
= (bitset_contain (pstr
->word_char
, c
)
701 : ((IS_NEWLINE (c
) && pstr
->newline_anchor
)
702 ? CONTEXT_NEWLINE
: 0));
705 if (!BE (pstr
->mbs_allocated
, 0))
708 pstr
->raw_mbs_idx
= idx
;
710 pstr
->stop
-= offset
;
712 /* Then build the buffers. */
713 #ifdef RE_ENABLE_I18N
714 if (pstr
->mb_cur_max
> 1)
718 int ret
= build_wcs_upper_buffer (pstr
);
719 if (BE (ret
!= REG_NOERROR
, 0))
723 build_wcs_buffer (pstr
);
726 #endif /* RE_ENABLE_I18N */
727 if (BE (pstr
->mbs_allocated
, 0))
730 build_upper_buffer (pstr
);
731 else if (pstr
->trans
!= NULL
)
732 re_string_translate_buffer (pstr
);
735 pstr
->valid_len
= pstr
->len
;
742 internal_function
__attribute ((pure
))
743 re_string_peek_byte_case (const re_string_t
*pstr
, int idx
)
747 /* Handle the common (easiest) cases first. */
748 if (BE (!pstr
->mbs_allocated
, 1))
749 return re_string_peek_byte (pstr
, idx
);
751 #ifdef RE_ENABLE_I18N
752 if (pstr
->mb_cur_max
> 1
753 && ! re_string_is_single_byte_char (pstr
, pstr
->cur_idx
+ idx
))
754 return re_string_peek_byte (pstr
, idx
);
757 off
= pstr
->cur_idx
+ idx
;
758 #ifdef RE_ENABLE_I18N
759 if (pstr
->offsets_needed
)
760 off
= pstr
->offsets
[off
];
763 ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ off
];
765 #ifdef RE_ENABLE_I18N
766 /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
767 this function returns CAPITAL LETTER I instead of first byte of
768 DOTLESS SMALL LETTER I. The latter would confuse the parser,
769 since peek_byte_case doesn't advance cur_idx in any way. */
770 if (pstr
->offsets_needed
&& !isascii (ch
))
771 return re_string_peek_byte (pstr
, idx
);
778 internal_function
__attribute ((pure
))
779 re_string_fetch_byte_case (re_string_t
*pstr
)
781 if (BE (!pstr
->mbs_allocated
, 1))
782 return re_string_fetch_byte (pstr
);
784 #ifdef RE_ENABLE_I18N
785 if (pstr
->offsets_needed
)
789 /* For tr_TR.UTF-8 [[:islower:]] there is
790 [[: CAPITAL LETTER I WITH DOT lower:]] in mbs. Skip
791 in that case the whole multi-byte character and return
792 the original letter. On the other side, with
793 [[: DOTLESS SMALL LETTER I return [[:I, as doing
794 anything else would complicate things too much. */
796 if (!re_string_first_byte (pstr
, pstr
->cur_idx
))
797 return re_string_fetch_byte (pstr
);
799 off
= pstr
->offsets
[pstr
->cur_idx
];
800 ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ off
];
803 return re_string_fetch_byte (pstr
);
805 re_string_skip_bytes (pstr
,
806 re_string_char_size_at (pstr
, pstr
->cur_idx
));
811 return pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ pstr
->cur_idx
++];
816 re_string_destruct (re_string_t
*pstr
)
818 #ifdef RE_ENABLE_I18N
820 re_free (pstr
->offsets
);
821 #endif /* RE_ENABLE_I18N */
822 if (pstr
->mbs_allocated
)
826 /* Return the context at IDX in INPUT. */
829 re_string_context_at (const re_string_t
*input
, int idx
, int eflags
)
833 /* In this case, we use the value stored in input->tip_context,
834 since we can't know the character in input->mbs[-1] here. */
835 return input
->tip_context
;
836 if (BE (idx
== input
->len
, 0))
837 return ((eflags
& REG_NOTEOL
) ? CONTEXT_ENDBUF
838 : CONTEXT_NEWLINE
| CONTEXT_ENDBUF
);
839 #ifdef RE_ENABLE_I18N
840 if (input
->mb_cur_max
> 1)
844 while(input
->wcs
[wc_idx
] == WEOF
)
847 /* It must not happen. */
848 assert (wc_idx
>= 0);
852 return input
->tip_context
;
854 wc
= input
->wcs
[wc_idx
];
855 if (BE (input
->word_ops_used
!= 0, 0) && IS_WIDE_WORD_CHAR (wc
))
857 return (IS_WIDE_NEWLINE (wc
) && input
->newline_anchor
858 ? CONTEXT_NEWLINE
: 0);
863 c
= re_string_byte_at (input
, idx
);
864 if (bitset_contain (input
->word_char
, c
))
866 return IS_NEWLINE (c
) && input
->newline_anchor
? CONTEXT_NEWLINE
: 0;
870 /* Functions for set operation. */
874 re_node_set_alloc (re_node_set
*set
, int size
)
878 set
->elems
= re_malloc (int, size
);
879 if (BE (set
->elems
== NULL
, 0))
886 re_node_set_init_1 (re_node_set
*set
, int elem
)
890 set
->elems
= re_malloc (int, 1);
891 if (BE (set
->elems
== NULL
, 0))
893 set
->alloc
= set
->nelem
= 0;
896 set
->elems
[0] = elem
;
902 re_node_set_init_2 (re_node_set
*set
, int elem1
, int elem2
)
905 set
->elems
= re_malloc (int, 2);
906 if (BE (set
->elems
== NULL
, 0))
911 set
->elems
[0] = elem1
;
918 set
->elems
[0] = elem1
;
919 set
->elems
[1] = elem2
;
923 set
->elems
[0] = elem2
;
924 set
->elems
[1] = elem1
;
932 re_node_set_init_copy (re_node_set
*dest
, const re_node_set
*src
)
934 dest
->nelem
= src
->nelem
;
937 dest
->alloc
= dest
->nelem
;
938 dest
->elems
= re_malloc (int, dest
->alloc
);
939 if (BE (dest
->elems
== NULL
, 0))
941 dest
->alloc
= dest
->nelem
= 0;
944 memcpy (dest
->elems
, src
->elems
, src
->nelem
* sizeof (int));
947 re_node_set_init_empty (dest
);
951 /* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
952 DEST. Return value indicate the error code or REG_NOERROR if succeeded.
953 Note: We assume dest->elems is NULL, when dest->alloc is 0. */
957 re_node_set_add_intersect (re_node_set
*dest
, const re_node_set
*src1
,
958 const re_node_set
*src2
)
960 int i1
, i2
, is
, id
, delta
, sbase
;
961 if (src1
->nelem
== 0 || src2
->nelem
== 0)
964 /* We need dest->nelem + 2 * elems_in_intersection; this is a
965 conservative estimate. */
966 if (src1
->nelem
+ src2
->nelem
+ dest
->nelem
> dest
->alloc
)
968 int new_alloc
= src1
->nelem
+ src2
->nelem
+ dest
->alloc
;
969 int *new_elems
= re_realloc (dest
->elems
, int, new_alloc
);
970 if (BE (new_elems
== NULL
, 0))
972 dest
->elems
= new_elems
;
973 dest
->alloc
= new_alloc
;
976 /* Find the items in the intersection of SRC1 and SRC2, and copy
977 into the top of DEST those that are not already in DEST itself. */
978 sbase
= dest
->nelem
+ src1
->nelem
+ src2
->nelem
;
979 i1
= src1
->nelem
- 1;
980 i2
= src2
->nelem
- 1;
981 id
= dest
->nelem
- 1;
984 if (src1
->elems
[i1
] == src2
->elems
[i2
])
986 /* Try to find the item in DEST. Maybe we could binary search? */
987 while (id
>= 0 && dest
->elems
[id
] > src1
->elems
[i1
])
990 if (id
< 0 || dest
->elems
[id
] != src1
->elems
[i1
])
991 dest
->elems
[--sbase
] = src1
->elems
[i1
];
993 if (--i1
< 0 || --i2
< 0)
997 /* Lower the highest of the two items. */
998 else if (src1
->elems
[i1
] < src2
->elems
[i2
])
1010 id
= dest
->nelem
- 1;
1011 is
= dest
->nelem
+ src1
->nelem
+ src2
->nelem
- 1;
1012 delta
= is
- sbase
+ 1;
1014 /* Now copy. When DELTA becomes zero, the remaining
1015 DEST elements are already in place; this is more or
1016 less the same loop that is in re_node_set_merge. */
1017 dest
->nelem
+= delta
;
1018 if (delta
> 0 && id
>= 0)
1021 if (dest
->elems
[is
] > dest
->elems
[id
])
1023 /* Copy from the top. */
1024 dest
->elems
[id
+ delta
--] = dest
->elems
[is
--];
1030 /* Slide from the bottom. */
1031 dest
->elems
[id
+ delta
] = dest
->elems
[id
];
1037 /* Copy remaining SRC elements. */
1038 memcpy (dest
->elems
, dest
->elems
+ sbase
, delta
* sizeof (int));
1043 /* Calculate the union set of the sets SRC1 and SRC2. And store it to
1044 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
1046 static reg_errcode_t
1048 re_node_set_init_union (re_node_set
*dest
, const re_node_set
*src1
,
1049 const re_node_set
*src2
)
1052 if (src1
!= NULL
&& src1
->nelem
> 0 && src2
!= NULL
&& src2
->nelem
> 0)
1054 dest
->alloc
= src1
->nelem
+ src2
->nelem
;
1055 dest
->elems
= re_malloc (int, dest
->alloc
);
1056 if (BE (dest
->elems
== NULL
, 0))
1061 if (src1
!= NULL
&& src1
->nelem
> 0)
1062 return re_node_set_init_copy (dest
, src1
);
1063 else if (src2
!= NULL
&& src2
->nelem
> 0)
1064 return re_node_set_init_copy (dest
, src2
);
1066 re_node_set_init_empty (dest
);
1069 for (i1
= i2
= id
= 0 ; i1
< src1
->nelem
&& i2
< src2
->nelem
;)
1071 if (src1
->elems
[i1
] > src2
->elems
[i2
])
1073 dest
->elems
[id
++] = src2
->elems
[i2
++];
1076 if (src1
->elems
[i1
] == src2
->elems
[i2
])
1078 dest
->elems
[id
++] = src1
->elems
[i1
++];
1080 if (i1
< src1
->nelem
)
1082 memcpy (dest
->elems
+ id
, src1
->elems
+ i1
,
1083 (src1
->nelem
- i1
) * sizeof (int));
1084 id
+= src1
->nelem
- i1
;
1086 else if (i2
< src2
->nelem
)
1088 memcpy (dest
->elems
+ id
, src2
->elems
+ i2
,
1089 (src2
->nelem
- i2
) * sizeof (int));
1090 id
+= src2
->nelem
- i2
;
1096 /* Calculate the union set of the sets DEST and SRC. And store it to
1097 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
1099 static reg_errcode_t
1101 re_node_set_merge (re_node_set
*dest
, const re_node_set
*src
)
1103 int is
, id
, sbase
, delta
;
1104 if (src
== NULL
|| src
->nelem
== 0)
1106 if (dest
->alloc
< 2 * src
->nelem
+ dest
->nelem
)
1108 int new_alloc
= 2 * (src
->nelem
+ dest
->alloc
);
1109 int *new_buffer
= re_realloc (dest
->elems
, int, new_alloc
);
1110 if (BE (new_buffer
== NULL
, 0))
1112 dest
->elems
= new_buffer
;
1113 dest
->alloc
= new_alloc
;
1116 if (BE (dest
->nelem
== 0, 0))
1118 dest
->nelem
= src
->nelem
;
1119 memcpy (dest
->elems
, src
->elems
, src
->nelem
* sizeof (int));
1123 /* Copy into the top of DEST the items of SRC that are not
1124 found in DEST. Maybe we could binary search in DEST? */
1125 for (sbase
= dest
->nelem
+ 2 * src
->nelem
,
1126 is
= src
->nelem
- 1, id
= dest
->nelem
- 1; is
>= 0 && id
>= 0; )
1128 if (dest
->elems
[id
] == src
->elems
[is
])
1130 else if (dest
->elems
[id
] < src
->elems
[is
])
1131 dest
->elems
[--sbase
] = src
->elems
[is
--];
1132 else /* if (dest->elems[id] > src->elems[is]) */
1138 /* If DEST is exhausted, the remaining items of SRC must be unique. */
1140 memcpy (dest
->elems
+ sbase
, src
->elems
, (is
+ 1) * sizeof (int));
1143 id
= dest
->nelem
- 1;
1144 is
= dest
->nelem
+ 2 * src
->nelem
- 1;
1145 delta
= is
- sbase
+ 1;
1149 /* Now copy. When DELTA becomes zero, the remaining
1150 DEST elements are already in place. */
1151 dest
->nelem
+= delta
;
1154 if (dest
->elems
[is
] > dest
->elems
[id
])
1156 /* Copy from the top. */
1157 dest
->elems
[id
+ delta
--] = dest
->elems
[is
--];
1163 /* Slide from the bottom. */
1164 dest
->elems
[id
+ delta
] = dest
->elems
[id
];
1167 /* Copy remaining SRC elements. */
1168 memcpy (dest
->elems
, dest
->elems
+ sbase
,
1169 delta
* sizeof (int));
1178 /* Insert the new element ELEM to the re_node_set* SET.
1179 SET should not already have ELEM.
1180 return -1 if an error is occured, return 1 otherwise. */
1184 re_node_set_insert (re_node_set
*set
, int elem
)
1187 /* In case the set is empty. */
1188 if (set
->alloc
== 0)
1190 if (BE (re_node_set_init_1 (set
, elem
) == REG_NOERROR
, 1))
1196 if (BE (set
->nelem
, 0) == 0)
1198 /* We already guaranteed above that set->alloc != 0. */
1199 set
->elems
[0] = elem
;
1204 /* Realloc if we need. */
1205 if (set
->alloc
== set
->nelem
)
1208 set
->alloc
= set
->alloc
* 2;
1209 new_elems
= re_realloc (set
->elems
, int, set
->alloc
);
1210 if (BE (new_elems
== NULL
, 0))
1212 set
->elems
= new_elems
;
1215 /* Move the elements which follows the new element. Test the
1216 first element separately to skip a check in the inner loop. */
1217 if (elem
< set
->elems
[0])
1220 for (idx
= set
->nelem
; idx
> 0; idx
--)
1221 set
->elems
[idx
] = set
->elems
[idx
- 1];
1225 for (idx
= set
->nelem
; set
->elems
[idx
- 1] > elem
; idx
--)
1226 set
->elems
[idx
] = set
->elems
[idx
- 1];
1229 /* Insert the new element. */
1230 set
->elems
[idx
] = elem
;
1235 /* Insert the new element ELEM to the re_node_set* SET.
1236 SET should not already have any element greater than or equal to ELEM.
1237 Return -1 if an error is occured, return 1 otherwise. */
1241 re_node_set_insert_last (re_node_set
*set
, int elem
)
1243 /* Realloc if we need. */
1244 if (set
->alloc
== set
->nelem
)
1247 set
->alloc
= (set
->alloc
+ 1) * 2;
1248 new_elems
= re_realloc (set
->elems
, int, set
->alloc
);
1249 if (BE (new_elems
== NULL
, 0))
1251 set
->elems
= new_elems
;
1254 /* Insert the new element. */
1255 set
->elems
[set
->nelem
++] = elem
;
1259 /* Compare two node sets SET1 and SET2.
1260 return 1 if SET1 and SET2 are equivalent, return 0 otherwise. */
1263 internal_function
__attribute ((pure
))
1264 re_node_set_compare (const re_node_set
*set1
, const re_node_set
*set2
)
1267 if (set1
== NULL
|| set2
== NULL
|| set1
->nelem
!= set2
->nelem
)
1269 for (i
= set1
->nelem
; --i
>= 0 ; )
1270 if (set1
->elems
[i
] != set2
->elems
[i
])
1275 /* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */
1278 internal_function
__attribute ((pure
))
1279 re_node_set_contains (const re_node_set
*set
, int elem
)
1281 unsigned int idx
, right
, mid
;
1282 if (set
->nelem
<= 0)
1285 /* Binary search the element. */
1287 right
= set
->nelem
- 1;
1290 mid
= (idx
+ right
) / 2;
1291 if (set
->elems
[mid
] < elem
)
1296 return set
->elems
[idx
] == elem
? idx
+ 1 : 0;
1301 re_node_set_remove_at (re_node_set
*set
, int idx
)
1303 if (idx
< 0 || idx
>= set
->nelem
)
1306 for (; idx
< set
->nelem
; idx
++)
1307 set
->elems
[idx
] = set
->elems
[idx
+ 1];
1311 /* Add the token TOKEN to dfa->nodes, and return the index of the token.
1312 Or return -1, if an error will be occured. */
1316 re_dfa_add_node (re_dfa_t
*dfa
, re_token_t token
)
1318 int type
= token
.type
;
1319 if (BE (dfa
->nodes_len
>= dfa
->nodes_alloc
, 0))
1321 size_t new_nodes_alloc
= dfa
->nodes_alloc
* 2;
1322 int *new_nexts
, *new_indices
;
1323 re_node_set
*new_edests
, *new_eclosures
;
1324 re_token_t
*new_nodes
;
1326 /* Avoid overflows. */
1327 if (BE (new_nodes_alloc
< dfa
->nodes_alloc
, 0))
1330 new_nodes
= re_realloc (dfa
->nodes
, re_token_t
, new_nodes_alloc
);
1331 if (BE (new_nodes
== NULL
, 0))
1333 dfa
->nodes
= new_nodes
;
1334 new_nexts
= re_realloc (dfa
->nexts
, int, new_nodes_alloc
);
1335 new_indices
= re_realloc (dfa
->org_indices
, int, new_nodes_alloc
);
1336 new_edests
= re_realloc (dfa
->edests
, re_node_set
, new_nodes_alloc
);
1337 new_eclosures
= re_realloc (dfa
->eclosures
, re_node_set
, new_nodes_alloc
);
1338 if (BE (new_nexts
== NULL
|| new_indices
== NULL
1339 || new_edests
== NULL
|| new_eclosures
== NULL
, 0))
1341 dfa
->nexts
= new_nexts
;
1342 dfa
->org_indices
= new_indices
;
1343 dfa
->edests
= new_edests
;
1344 dfa
->eclosures
= new_eclosures
;
1345 dfa
->nodes_alloc
= new_nodes_alloc
;
1347 dfa
->nodes
[dfa
->nodes_len
] = token
;
1348 dfa
->nodes
[dfa
->nodes_len
].constraint
= 0;
1349 #ifdef RE_ENABLE_I18N
1350 dfa
->nodes
[dfa
->nodes_len
].accept_mb
=
1351 (type
== OP_PERIOD
&& dfa
->mb_cur_max
> 1) || type
== COMPLEX_BRACKET
;
1353 dfa
->nexts
[dfa
->nodes_len
] = -1;
1354 re_node_set_init_empty (dfa
->edests
+ dfa
->nodes_len
);
1355 re_node_set_init_empty (dfa
->eclosures
+ dfa
->nodes_len
);
1356 return dfa
->nodes_len
++;
1359 static inline unsigned int
1361 calc_state_hash (const re_node_set
*nodes
, unsigned int context
)
1363 unsigned int hash
= nodes
->nelem
+ context
;
1365 for (i
= 0 ; i
< nodes
->nelem
; i
++)
1366 hash
+= nodes
->elems
[i
];
1370 /* Search for the state whose node_set is equivalent to NODES.
1371 Return the pointer to the state, if we found it in the DFA.
1372 Otherwise create the new one and return it. In case of an error
1373 return NULL and set the error code in ERR.
1374 Note: - We assume NULL as the invalid state, then it is possible that
1375 return value is NULL and ERR is REG_NOERROR.
1376 - We never return non-NULL value in case of any errors, it is for
1379 static re_dfastate_t
*
1381 re_acquire_state (reg_errcode_t
*err
, const re_dfa_t
*dfa
,
1382 const re_node_set
*nodes
)
1385 re_dfastate_t
*new_state
;
1386 struct re_state_table_entry
*spot
;
1388 if (BE (nodes
->nelem
== 0, 0))
1393 hash
= calc_state_hash (nodes
, 0);
1394 spot
= dfa
->state_table
+ (hash
& dfa
->state_hash_mask
);
1396 for (i
= 0 ; i
< spot
->num
; i
++)
1398 re_dfastate_t
*state
= spot
->array
[i
];
1399 if (hash
!= state
->hash
)
1401 if (re_node_set_compare (&state
->nodes
, nodes
))
1405 /* There are no appropriate state in the dfa, create the new one. */
1406 new_state
= create_ci_newstate (dfa
, nodes
, hash
);
1407 if (BE (new_state
== NULL
, 0))
1413 /* Search for the state whose node_set is equivalent to NODES and
1414 whose context is equivalent to CONTEXT.
1415 Return the pointer to the state, if we found it in the DFA.
1416 Otherwise create the new one and return it. In case of an error
1417 return NULL and set the error code in ERR.
1418 Note: - We assume NULL as the invalid state, then it is possible that
1419 return value is NULL and ERR is REG_NOERROR.
1420 - We never return non-NULL value in case of any errors, it is for
1423 static re_dfastate_t
*
1425 re_acquire_state_context (reg_errcode_t
*err
, const re_dfa_t
*dfa
,
1426 const re_node_set
*nodes
, unsigned int context
)
1429 re_dfastate_t
*new_state
;
1430 struct re_state_table_entry
*spot
;
1432 if (nodes
->nelem
== 0)
1437 hash
= calc_state_hash (nodes
, context
);
1438 spot
= dfa
->state_table
+ (hash
& dfa
->state_hash_mask
);
1440 for (i
= 0 ; i
< spot
->num
; i
++)
1442 re_dfastate_t
*state
= spot
->array
[i
];
1443 if (state
->hash
== hash
1444 && state
->context
== context
1445 && re_node_set_compare (state
->entrance_nodes
, nodes
))
1448 /* There are no appropriate state in `dfa', create the new one. */
1449 new_state
= create_cd_newstate (dfa
, nodes
, context
, hash
);
1450 if (BE (new_state
== NULL
, 0))
1456 /* Finish initialization of the new state NEWSTATE, and using its hash value
1457 HASH put in the appropriate bucket of DFA's state table. Return value
1458 indicates the error code if failed. */
1460 static reg_errcode_t
1461 register_state (const re_dfa_t
*dfa
, re_dfastate_t
*newstate
,
1464 struct re_state_table_entry
*spot
;
1468 newstate
->hash
= hash
;
1469 err
= re_node_set_alloc (&newstate
->non_eps_nodes
, newstate
->nodes
.nelem
);
1470 if (BE (err
!= REG_NOERROR
, 0))
1472 for (i
= 0; i
< newstate
->nodes
.nelem
; i
++)
1474 int elem
= newstate
->nodes
.elems
[i
];
1475 if (!IS_EPSILON_NODE (dfa
->nodes
[elem
].type
))
1476 re_node_set_insert_last (&newstate
->non_eps_nodes
, elem
);
1479 spot
= dfa
->state_table
+ (hash
& dfa
->state_hash_mask
);
1480 if (BE (spot
->alloc
<= spot
->num
, 0))
1482 int new_alloc
= 2 * spot
->num
+ 2;
1483 re_dfastate_t
**new_array
= re_realloc (spot
->array
, re_dfastate_t
*,
1485 if (BE (new_array
== NULL
, 0))
1487 spot
->array
= new_array
;
1488 spot
->alloc
= new_alloc
;
1490 spot
->array
[spot
->num
++] = newstate
;
1495 free_state (re_dfastate_t
*state
)
1497 re_node_set_free (&state
->non_eps_nodes
);
1498 re_node_set_free (&state
->inveclosure
);
1499 if (state
->entrance_nodes
!= &state
->nodes
)
1501 re_node_set_free (state
->entrance_nodes
);
1502 re_free (state
->entrance_nodes
);
1504 re_node_set_free (&state
->nodes
);
1505 re_free (state
->word_trtable
);
1506 re_free (state
->trtable
);
1510 /* Create the new state which is independ of contexts.
1511 Return the new state if succeeded, otherwise return NULL. */
1513 static re_dfastate_t
*
1514 create_ci_newstate (const re_dfa_t
*dfa
, const re_node_set
*nodes
,
1519 re_dfastate_t
*newstate
;
1521 newstate
= (re_dfastate_t
*) calloc (sizeof (re_dfastate_t
), 1);
1522 if (BE (newstate
== NULL
, 0))
1524 err
= re_node_set_init_copy (&newstate
->nodes
, nodes
);
1525 if (BE (err
!= REG_NOERROR
, 0))
1531 newstate
->entrance_nodes
= &newstate
->nodes
;
1532 for (i
= 0 ; i
< nodes
->nelem
; i
++)
1534 re_token_t
*node
= dfa
->nodes
+ nodes
->elems
[i
];
1535 re_token_type_t type
= node
->type
;
1536 if (type
== CHARACTER
&& !node
->constraint
)
1538 #ifdef RE_ENABLE_I18N
1539 newstate
->accept_mb
|= node
->accept_mb
;
1540 #endif /* RE_ENABLE_I18N */
1542 /* If the state has the halt node, the state is a halt state. */
1543 if (type
== END_OF_RE
)
1545 else if (type
== OP_BACK_REF
)
1546 newstate
->has_backref
= 1;
1547 else if (type
== ANCHOR
|| node
->constraint
)
1548 newstate
->has_constraint
= 1;
1550 err
= register_state (dfa
, newstate
, hash
);
1551 if (BE (err
!= REG_NOERROR
, 0))
1553 free_state (newstate
);
1559 /* Create the new state which is depend on the context CONTEXT.
1560 Return the new state if succeeded, otherwise return NULL. */
1562 static re_dfastate_t
*
1563 create_cd_newstate (const re_dfa_t
*dfa
, const re_node_set
*nodes
,
1564 unsigned int context
, unsigned int hash
)
1566 int i
, nctx_nodes
= 0;
1568 re_dfastate_t
*newstate
;
1570 newstate
= (re_dfastate_t
*) calloc (sizeof (re_dfastate_t
), 1);
1571 if (BE (newstate
== NULL
, 0))
1573 err
= re_node_set_init_copy (&newstate
->nodes
, nodes
);
1574 if (BE (err
!= REG_NOERROR
, 0))
1580 newstate
->context
= context
;
1581 newstate
->entrance_nodes
= &newstate
->nodes
;
1583 for (i
= 0 ; i
< nodes
->nelem
; i
++)
1585 unsigned int constraint
= 0;
1586 re_token_t
*node
= dfa
->nodes
+ nodes
->elems
[i
];
1587 re_token_type_t type
= node
->type
;
1588 if (node
->constraint
)
1589 constraint
= node
->constraint
;
1591 if (type
== CHARACTER
&& !constraint
)
1593 #ifdef RE_ENABLE_I18N
1594 newstate
->accept_mb
|= node
->accept_mb
;
1595 #endif /* RE_ENABLE_I18N */
1597 /* If the state has the halt node, the state is a halt state. */
1598 if (type
== END_OF_RE
)
1600 else if (type
== OP_BACK_REF
)
1601 newstate
->has_backref
= 1;
1602 else if (type
== ANCHOR
)
1603 constraint
= node
->opr
.ctx_type
;
1607 if (newstate
->entrance_nodes
== &newstate
->nodes
)
1609 newstate
->entrance_nodes
= re_malloc (re_node_set
, 1);
1610 if (BE (newstate
->entrance_nodes
== NULL
, 0))
1612 free_state (newstate
);
1615 re_node_set_init_copy (newstate
->entrance_nodes
, nodes
);
1617 newstate
->has_constraint
= 1;
1620 if (NOT_SATISFY_PREV_CONSTRAINT (constraint
,context
))
1622 re_node_set_remove_at (&newstate
->nodes
, i
- nctx_nodes
);
1627 err
= register_state (dfa
, newstate
, hash
);
1628 if (BE (err
!= REG_NOERROR
, 0))
1630 free_state (newstate
);