]>
Commit | Line | Data |
---|---|---|
3b0bdc72 UD |
1 | /* Extended regular expression matching and search library. |
2 | Copyright (C) 2002 Free Software Foundation, Inc. | |
3 | This file is part of the GNU C Library. | |
4 | Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. | |
5 | ||
6 | The GNU C Library is free software; you can redistribute it and/or | |
7 | modify it under the terms of the GNU Lesser General Public | |
8 | License as published by the Free Software Foundation; either | |
9 | version 2.1 of the License, or (at your option) any later version. | |
10 | ||
11 | The GNU C Library is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | Lesser General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU Lesser General Public | |
17 | License along with the GNU C Library; if not, write to the Free | |
18 | Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
19 | 02111-1307 USA. */ | |
20 | ||
21 | #include <assert.h> | |
22 | #include <ctype.h> | |
23 | #include <limits.h> | |
24 | #include <stdio.h> | |
25 | #include <stdlib.h> | |
26 | #include <string.h> | |
c202c2c5 UD |
27 | |
28 | #if defined HAVE_WCHAR_H || defined _LIBC | |
29 | # include <wchar.h> | |
30 | #endif /* HAVE_WCHAR_H || _LIBC */ | |
31 | #if defined HAVE_WCTYPE_H || defined _LIBC | |
32 | # include <wctype.h> | |
33 | #endif /* HAVE_WCTYPE_H || _LIBC */ | |
3b0bdc72 UD |
34 | |
35 | #ifdef _LIBC | |
36 | # ifndef _RE_DEFINE_LOCALE_FUNCTIONS | |
37 | # define _RE_DEFINE_LOCALE_FUNCTIONS 1 | |
38 | # include <locale/localeinfo.h> | |
39 | # include <locale/elem-hash.h> | |
40 | # include <locale/coll-lookup.h> | |
41 | # endif | |
42 | #endif | |
43 | ||
44 | /* This is for other GNU distributions with internationalized messages. */ | |
45 | #if HAVE_LIBINTL_H || defined _LIBC | |
46 | # include <libintl.h> | |
47 | # ifdef _LIBC | |
48 | # undef gettext | |
71319b9c UD |
49 | # define gettext(msgid) \ |
50 | INTUSE(__dcgettext) (_libc_intl_domainname_internal, msgid, LC_MESSAGES) | |
3b0bdc72 UD |
51 | # endif |
52 | #else | |
53 | # define gettext(msgid) (msgid) | |
54 | #endif | |
55 | ||
56 | #ifndef gettext_noop | |
57 | /* This define is so xgettext can find the internationalizable | |
58 | strings. */ | |
59 | # define gettext_noop(String) String | |
60 | #endif | |
61 | ||
62 | #include "regex.h" | |
63 | #include "regex_internal.h" | |
64 | ||
92b27c74 | 65 | static void re_string_construct_common (const char *str, int len, |
1b2c2628 UD |
66 | re_string_t *pstr, |
67 | RE_TRANSLATE_TYPE trans, int icase); | |
434d3784 | 68 | #ifdef RE_ENABLE_I18N |
612546c6 | 69 | static int re_string_skip_chars (re_string_t *pstr, int new_raw_idx); |
434d3784 | 70 | #endif /* RE_ENABLE_I18N */ |
3b0bdc72 | 71 | static re_dfastate_t *create_newstate_common (re_dfa_t *dfa, |
1b2c2628 UD |
72 | const re_node_set *nodes, |
73 | unsigned int hash); | |
a9388965 | 74 | static reg_errcode_t register_state (re_dfa_t *dfa, re_dfastate_t *newstate, |
1b2c2628 | 75 | unsigned int hash); |
3b0bdc72 | 76 | static re_dfastate_t *create_ci_newstate (re_dfa_t *dfa, |
1b2c2628 UD |
77 | const re_node_set *nodes, |
78 | unsigned int hash); | |
3b0bdc72 | 79 | static re_dfastate_t *create_cd_newstate (re_dfa_t *dfa, |
1b2c2628 UD |
80 | const re_node_set *nodes, |
81 | unsigned int context, | |
82 | unsigned int hash); | |
3b0bdc72 | 83 | static unsigned int inline calc_state_hash (const re_node_set *nodes, |
1b2c2628 | 84 | unsigned int context); |
3b0bdc72 UD |
85 | \f |
86 | /* Functions for string operation. */ | |
87 | ||
612546c6 UD |
88 | /* This function allocate the buffers. It is necessary to call |
89 | re_string_reconstruct before using the object. */ | |
90 | ||
3b0bdc72 | 91 | static reg_errcode_t |
612546c6 | 92 | re_string_allocate (pstr, str, len, init_len, trans, icase) |
3b0bdc72 | 93 | re_string_t *pstr; |
92b27c74 | 94 | const char *str; |
612546c6 | 95 | int len, init_len, icase; |
3b0bdc72 UD |
96 | RE_TRANSLATE_TYPE trans; |
97 | { | |
98 | reg_errcode_t ret; | |
612546c6 UD |
99 | int init_buf_len = (len + 1 < init_len) ? len + 1: init_len; |
100 | re_string_construct_common (str, len, pstr, trans, icase); | |
ac3d553b | 101 | pstr->stop = pstr->len; |
612546c6 UD |
102 | |
103 | ret = re_string_realloc_buffers (pstr, init_buf_len); | |
104 | if (BE (ret != REG_NOERROR, 0)) | |
105 | return ret; | |
106 | ||
c202c2c5 | 107 | pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case |
1b2c2628 | 108 | : (unsigned char *) str); |
612546c6 UD |
109 | pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case; |
110 | pstr->valid_len = (MBS_CASE_ALLOCATED (pstr) || MBS_ALLOCATED (pstr) | |
1b2c2628 | 111 | || MB_CUR_MAX > 1) ? pstr->valid_len : len; |
612546c6 UD |
112 | return REG_NOERROR; |
113 | } | |
114 | ||
115 | /* This function allocate the buffers, and initialize them. */ | |
116 | ||
117 | static reg_errcode_t | |
118 | re_string_construct (pstr, str, len, trans, icase) | |
119 | re_string_t *pstr; | |
92b27c74 | 120 | const char *str; |
612546c6 UD |
121 | int len, icase; |
122 | RE_TRANSLATE_TYPE trans; | |
123 | { | |
124 | reg_errcode_t ret; | |
125 | re_string_construct_common (str, len, pstr, trans, icase); | |
ac3d553b | 126 | pstr->stop = pstr->len; |
612546c6 UD |
127 | /* Set 0 so that this function can initialize whole buffers. */ |
128 | pstr->valid_len = 0; | |
129 | ||
130 | if (len > 0) | |
3b0bdc72 | 131 | { |
612546c6 | 132 | ret = re_string_realloc_buffers (pstr, len + 1); |
bc15410e | 133 | if (BE (ret != REG_NOERROR, 0)) |
1b2c2628 | 134 | return ret; |
3b0bdc72 | 135 | } |
c202c2c5 | 136 | pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case |
1b2c2628 | 137 | : (unsigned char *) str); |
612546c6 UD |
138 | pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case; |
139 | ||
140 | if (icase) | |
141 | { | |
142 | #ifdef RE_ENABLE_I18N | |
143 | if (MB_CUR_MAX > 1) | |
1b2c2628 | 144 | build_wcs_upper_buffer (pstr); |
612546c6 | 145 | else |
3b0bdc72 | 146 | #endif /* RE_ENABLE_I18N */ |
1b2c2628 | 147 | build_upper_buffer (pstr); |
612546c6 UD |
148 | } |
149 | else | |
3b0bdc72 | 150 | { |
612546c6 UD |
151 | #ifdef RE_ENABLE_I18N |
152 | if (MB_CUR_MAX > 1) | |
1b2c2628 | 153 | build_wcs_buffer (pstr); |
612546c6 UD |
154 | else |
155 | #endif /* RE_ENABLE_I18N */ | |
1b2c2628 UD |
156 | { |
157 | if (trans != NULL) | |
158 | re_string_translate_buffer (pstr); | |
159 | else | |
160 | pstr->valid_len = len; | |
161 | } | |
3b0bdc72 | 162 | } |
612546c6 UD |
163 | |
164 | /* Initialized whole buffers, then valid_len == bufs_len. */ | |
165 | pstr->valid_len = pstr->bufs_len; | |
3b0bdc72 UD |
166 | return REG_NOERROR; |
167 | } | |
168 | ||
612546c6 | 169 | /* Helper functions for re_string_allocate, and re_string_construct. */ |
3b0bdc72 UD |
170 | |
171 | static reg_errcode_t | |
612546c6 | 172 | re_string_realloc_buffers (pstr, new_buf_len) |
3b0bdc72 | 173 | re_string_t *pstr; |
612546c6 | 174 | int new_buf_len; |
3b0bdc72 | 175 | { |
3b0bdc72 | 176 | #ifdef RE_ENABLE_I18N |
612546c6 | 177 | if (MB_CUR_MAX > 1) |
3b0bdc72 | 178 | { |
1b2c2628 UD |
179 | wint_t *new_array = re_realloc (pstr->wcs, wint_t, new_buf_len); |
180 | if (BE (new_array == NULL, 0)) | |
181 | return REG_ESPACE; | |
182 | pstr->wcs = new_array; | |
3b0bdc72 | 183 | } |
3b0bdc72 | 184 | #endif /* RE_ENABLE_I18N */ |
612546c6 | 185 | if (MBS_ALLOCATED (pstr)) |
3b0bdc72 | 186 | { |
1b2c2628 UD |
187 | unsigned char *new_array = re_realloc (pstr->mbs, unsigned char, |
188 | new_buf_len); | |
189 | if (BE (new_array == NULL, 0)) | |
190 | return REG_ESPACE; | |
191 | pstr->mbs = new_array; | |
3b0bdc72 | 192 | } |
612546c6 | 193 | if (MBS_CASE_ALLOCATED (pstr)) |
3b0bdc72 | 194 | { |
1b2c2628 UD |
195 | unsigned char *new_array = re_realloc (pstr->mbs_case, unsigned char, |
196 | new_buf_len); | |
197 | if (BE (new_array == NULL, 0)) | |
198 | return REG_ESPACE; | |
199 | pstr->mbs_case = new_array; | |
612546c6 | 200 | if (!MBS_ALLOCATED (pstr)) |
1b2c2628 | 201 | pstr->mbs = pstr->mbs_case; |
3b0bdc72 | 202 | } |
612546c6 | 203 | pstr->bufs_len = new_buf_len; |
3b0bdc72 UD |
204 | return REG_NOERROR; |
205 | } | |
206 | ||
612546c6 | 207 | |
3b0bdc72 | 208 | static void |
612546c6 | 209 | re_string_construct_common (str, len, pstr, trans, icase) |
92b27c74 | 210 | const char *str; |
3b0bdc72 UD |
211 | int len; |
212 | re_string_t *pstr; | |
612546c6 UD |
213 | RE_TRANSLATE_TYPE trans; |
214 | int icase; | |
3b0bdc72 | 215 | { |
612546c6 | 216 | memset (pstr, '\0', sizeof (re_string_t)); |
c202c2c5 | 217 | pstr->raw_mbs = (const unsigned char *) str; |
3b0bdc72 | 218 | pstr->len = len; |
612546c6 UD |
219 | pstr->trans = trans; |
220 | pstr->icase = icase ? 1 : 0; | |
3b0bdc72 UD |
221 | } |
222 | ||
223 | #ifdef RE_ENABLE_I18N | |
224 | ||
612546c6 | 225 | /* Build wide character buffer PSTR->WCS. |
3b0bdc72 UD |
226 | If the byte sequence of the string are: |
227 | <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3> | |
228 | Then wide character buffer will be: | |
229 | <wc1> , WEOF , <wc2> , WEOF , <wc3> | |
230 | We use WEOF for padding, they indicate that the position isn't | |
612546c6 | 231 | a first byte of a multibyte character. |
3b0bdc72 | 232 | |
612546c6 UD |
233 | Note that this function assumes PSTR->VALID_LEN elements are already |
234 | built and starts from PSTR->VALID_LEN. */ | |
235 | ||
236 | static void | |
3b0bdc72 UD |
237 | build_wcs_buffer (pstr) |
238 | re_string_t *pstr; | |
239 | { | |
612546c6 UD |
240 | mbstate_t prev_st; |
241 | int byte_idx, end_idx, mbclen, remain_len; | |
242 | /* Build the buffers from pstr->valid_len to either pstr->len or | |
243 | pstr->bufs_len. */ | |
244 | end_idx = (pstr->bufs_len > pstr->len)? pstr->len : pstr->bufs_len; | |
245 | for (byte_idx = pstr->valid_len; byte_idx < end_idx;) | |
246 | { | |
247 | wchar_t wc; | |
248 | remain_len = end_idx - byte_idx; | |
249 | prev_st = pstr->cur_state; | |
c202c2c5 | 250 | mbclen = mbrtowc (&wc, ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx |
1b2c2628 | 251 | + byte_idx), remain_len, &pstr->cur_state); |
612546c6 | 252 | if (BE (mbclen == (size_t) -2, 0)) |
1b2c2628 UD |
253 | { |
254 | /* The buffer doesn't have enough space, finish to build. */ | |
255 | pstr->cur_state = prev_st; | |
256 | break; | |
257 | } | |
612546c6 | 258 | else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0)) |
1b2c2628 UD |
259 | { |
260 | /* We treat these cases as a singlebyte character. */ | |
261 | mbclen = 1; | |
262 | wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]; | |
263 | pstr->cur_state = prev_st; | |
264 | } | |
612546c6 UD |
265 | |
266 | /* Apply the translateion if we need. */ | |
267 | if (pstr->trans != NULL && mbclen == 1) | |
1b2c2628 UD |
268 | { |
269 | int ch = pstr->trans[pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]]; | |
270 | pstr->mbs_case[byte_idx] = ch; | |
271 | } | |
3b0bdc72 | 272 | /* Write wide character and padding. */ |
612546c6 UD |
273 | pstr->wcs[byte_idx++] = wc; |
274 | /* Write paddings. */ | |
275 | for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) | |
1b2c2628 | 276 | pstr->wcs[byte_idx++] = WEOF; |
3b0bdc72 | 277 | } |
612546c6 | 278 | pstr->valid_len = byte_idx; |
3b0bdc72 UD |
279 | } |
280 | ||
612546c6 UD |
281 | /* Build wide character buffer PSTR->WCS like build_wcs_buffer, |
282 | but for REG_ICASE. */ | |
283 | ||
284 | static void | |
3b0bdc72 UD |
285 | build_wcs_upper_buffer (pstr) |
286 | re_string_t *pstr; | |
287 | { | |
612546c6 UD |
288 | mbstate_t prev_st; |
289 | int byte_idx, end_idx, mbclen, remain_len; | |
290 | /* Build the buffers from pstr->valid_len to either pstr->len or | |
291 | pstr->bufs_len. */ | |
292 | end_idx = (pstr->bufs_len > pstr->len)? pstr->len : pstr->bufs_len; | |
293 | for (byte_idx = pstr->valid_len; byte_idx < end_idx;) | |
294 | { | |
295 | wchar_t wc; | |
296 | remain_len = end_idx - byte_idx; | |
297 | prev_st = pstr->cur_state; | |
c202c2c5 | 298 | mbclen = mbrtowc (&wc, ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx |
1b2c2628 | 299 | + byte_idx), remain_len, &pstr->cur_state); |
612546c6 | 300 | if (BE (mbclen == (size_t) -2, 0)) |
1b2c2628 UD |
301 | { |
302 | /* The buffer doesn't have enough space, finish to build. */ | |
303 | pstr->cur_state = prev_st; | |
304 | break; | |
305 | } | |
612546c6 | 306 | else if (mbclen == 1 || mbclen == (size_t) -1 || mbclen == 0) |
1b2c2628 UD |
307 | { |
308 | /* In case of a singlebyte character. */ | |
309 | int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]; | |
310 | /* Apply the translateion if we need. */ | |
311 | if (pstr->trans != NULL && mbclen == 1) | |
312 | { | |
313 | ch = pstr->trans[ch]; | |
314 | pstr->mbs_case[byte_idx] = ch; | |
315 | } | |
316 | pstr->wcs[byte_idx] = iswlower (wc) ? toupper (wc) : wc; | |
317 | pstr->mbs[byte_idx++] = islower (ch) ? toupper (ch) : ch; | |
318 | if (BE (mbclen == (size_t) -1, 0)) | |
319 | pstr->cur_state = prev_st; | |
320 | } | |
3b0bdc72 | 321 | else /* mbclen > 1 */ |
1b2c2628 UD |
322 | { |
323 | if (iswlower (wc)) | |
324 | wcrtomb ((char *) pstr->mbs + byte_idx, towupper (wc), &prev_st); | |
325 | else | |
326 | memcpy (pstr->mbs + byte_idx, | |
327 | pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen); | |
328 | pstr->wcs[byte_idx++] = iswlower (wc) ? toupper (wc) : wc; | |
329 | /* Write paddings. */ | |
330 | for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) | |
331 | pstr->wcs[byte_idx++] = WEOF; | |
332 | } | |
3b0bdc72 | 333 | } |
612546c6 UD |
334 | pstr->valid_len = byte_idx; |
335 | } | |
336 | ||
337 | /* Skip characters until the index becomes greater than NEW_RAW_IDX. | |
338 | Return the index. */ | |
339 | ||
340 | static int | |
341 | re_string_skip_chars (pstr, new_raw_idx) | |
342 | re_string_t *pstr; | |
343 | int new_raw_idx; | |
344 | { | |
345 | mbstate_t prev_st; | |
346 | int rawbuf_idx, mbclen; | |
347 | ||
348 | /* Skip the characters which are not necessary to check. */ | |
349 | for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_len; | |
350 | rawbuf_idx < new_raw_idx;) | |
351 | { | |
352 | int remain_len = pstr->len - rawbuf_idx; | |
353 | prev_st = pstr->cur_state; | |
c202c2c5 | 354 | mbclen = mbrlen ((const char *) pstr->raw_mbs + rawbuf_idx, remain_len, |
1b2c2628 | 355 | &pstr->cur_state); |
612546c6 | 356 | if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0)) |
1b2c2628 UD |
357 | { |
358 | /* We treat these cases as a singlebyte character. */ | |
359 | mbclen = 1; | |
360 | pstr->cur_state = prev_st; | |
361 | } | |
612546c6 UD |
362 | /* Then proceed the next character. */ |
363 | rawbuf_idx += mbclen; | |
364 | } | |
365 | return rawbuf_idx; | |
3b0bdc72 UD |
366 | } |
367 | #endif /* RE_ENABLE_I18N */ | |
368 | ||
1b2c2628 | 369 | /* Build the buffer PSTR->MBS, and apply the translation if we need. |
612546c6 UD |
370 | This function is used in case of REG_ICASE. */ |
371 | ||
372 | static void | |
3b0bdc72 UD |
373 | build_upper_buffer (pstr) |
374 | re_string_t *pstr; | |
375 | { | |
612546c6 UD |
376 | int char_idx, end_idx; |
377 | end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len; | |
3b0bdc72 | 378 | |
612546c6 | 379 | for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx) |
3b0bdc72 | 380 | { |
612546c6 UD |
381 | int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx]; |
382 | if (pstr->trans != NULL) | |
1b2c2628 UD |
383 | { |
384 | ch = pstr->trans[ch]; | |
385 | pstr->mbs_case[char_idx] = ch; | |
386 | } | |
612546c6 | 387 | if (islower (ch)) |
1b2c2628 | 388 | pstr->mbs[char_idx] = toupper (ch); |
3b0bdc72 | 389 | else |
1b2c2628 | 390 | pstr->mbs[char_idx] = ch; |
3b0bdc72 | 391 | } |
612546c6 | 392 | pstr->valid_len = char_idx; |
3b0bdc72 UD |
393 | } |
394 | ||
612546c6 | 395 | /* Apply TRANS to the buffer in PSTR. */ |
3b0bdc72 | 396 | |
612546c6 UD |
397 | static void |
398 | re_string_translate_buffer (pstr) | |
3b0bdc72 | 399 | re_string_t *pstr; |
3b0bdc72 | 400 | { |
612546c6 UD |
401 | int buf_idx, end_idx; |
402 | end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len; | |
403 | ||
404 | for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx) | |
3b0bdc72 | 405 | { |
612546c6 UD |
406 | int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx]; |
407 | pstr->mbs_case[buf_idx] = pstr->trans[ch]; | |
3b0bdc72 | 408 | } |
612546c6 UD |
409 | |
410 | pstr->valid_len = buf_idx; | |
411 | } | |
412 | ||
413 | /* This function re-construct the buffers. | |
414 | Concretely, convert to wide character in case of MB_CUR_MAX > 1, | |
415 | convert to upper case in case of REG_ICASE, apply translation. */ | |
416 | ||
417 | static reg_errcode_t | |
418 | re_string_reconstruct (pstr, idx, eflags, newline) | |
419 | re_string_t *pstr; | |
420 | int idx, eflags, newline; | |
421 | { | |
422 | int offset = idx - pstr->raw_mbs_idx; | |
423 | if (offset < 0) | |
424 | { | |
425 | /* Reset buffer. */ | |
434d3784 UD |
426 | #ifdef RE_ENABLE_I18N |
427 | if (MB_CUR_MAX > 1) | |
1b2c2628 | 428 | memset (&pstr->cur_state, '\0', sizeof (mbstate_t)); |
434d3784 | 429 | #endif /* RE_ENABLE_I18N */ |
dd385d7c UD |
430 | pstr->len += pstr->raw_mbs_idx; |
431 | pstr->stop += pstr->raw_mbs_idx; | |
612546c6 UD |
432 | pstr->valid_len = pstr->raw_mbs_idx = 0; |
433 | pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF | |
1b2c2628 | 434 | : CONTEXT_NEWLINE | CONTEXT_BEGBUF); |
612546c6 | 435 | if (!MBS_CASE_ALLOCATED (pstr)) |
1b2c2628 | 436 | pstr->mbs_case = (unsigned char *) pstr->raw_mbs; |
612546c6 | 437 | if (!MBS_ALLOCATED (pstr) && !MBS_CASE_ALLOCATED (pstr)) |
1b2c2628 | 438 | pstr->mbs = (unsigned char *) pstr->raw_mbs; |
612546c6 UD |
439 | offset = idx; |
440 | } | |
441 | ||
442 | if (offset != 0) | |
443 | { | |
444 | pstr->tip_context = re_string_context_at (pstr, offset - 1, eflags, | |
1b2c2628 | 445 | newline); |
612546c6 UD |
446 | /* Are the characters which are already checked remain? */ |
447 | if (offset < pstr->valid_len) | |
1b2c2628 UD |
448 | { |
449 | /* Yes, move them to the front of the buffer. */ | |
3b0bdc72 | 450 | #ifdef RE_ENABLE_I18N |
1b2c2628 UD |
451 | if (MB_CUR_MAX > 1) |
452 | memmove (pstr->wcs, pstr->wcs + offset, | |
453 | (pstr->valid_len - offset) * sizeof (wint_t)); | |
612546c6 | 454 | #endif /* RE_ENABLE_I18N */ |
1b2c2628 UD |
455 | if (MBS_ALLOCATED (pstr)) |
456 | memmove (pstr->mbs, pstr->mbs + offset, | |
457 | pstr->valid_len - offset); | |
458 | if (MBS_CASE_ALLOCATED (pstr)) | |
459 | memmove (pstr->mbs_case, pstr->mbs_case + offset, | |
460 | pstr->valid_len - offset); | |
461 | pstr->valid_len -= offset; | |
612546c6 | 462 | #if DEBUG |
1b2c2628 | 463 | assert (pstr->valid_len > 0); |
3b0bdc72 | 464 | #endif |
1b2c2628 | 465 | } |
612546c6 | 466 | else |
1b2c2628 UD |
467 | { |
468 | /* No, skip all characters until IDX. */ | |
469 | pstr->valid_len = 0; | |
3b0bdc72 | 470 | #ifdef RE_ENABLE_I18N |
1b2c2628 UD |
471 | if (MB_CUR_MAX > 1) |
472 | { | |
473 | int wcs_idx; | |
474 | pstr->valid_len = re_string_skip_chars (pstr, idx) - idx; | |
475 | for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx) | |
476 | pstr->wcs[wcs_idx] = WEOF; | |
477 | } | |
612546c6 | 478 | #endif /* RE_ENABLE_I18N */ |
1b2c2628 | 479 | } |
612546c6 | 480 | if (!MBS_CASE_ALLOCATED (pstr)) |
1b2c2628 UD |
481 | { |
482 | pstr->mbs_case += offset; | |
483 | /* In case of !MBS_ALLOCATED && !MBS_CASE_ALLOCATED. */ | |
484 | if (!MBS_ALLOCATED (pstr)) | |
485 | pstr->mbs += offset; | |
486 | } | |
3b0bdc72 | 487 | } |
612546c6 UD |
488 | pstr->raw_mbs_idx = idx; |
489 | pstr->len -= offset; | |
ac3d553b | 490 | pstr->stop -= offset; |
612546c6 UD |
491 | |
492 | /* Then build the buffers. */ | |
493 | #ifdef RE_ENABLE_I18N | |
494 | if (MB_CUR_MAX > 1) | |
3b0bdc72 | 495 | { |
612546c6 | 496 | if (pstr->icase) |
1b2c2628 | 497 | build_wcs_upper_buffer (pstr); |
612546c6 | 498 | else |
1b2c2628 | 499 | build_wcs_buffer (pstr); |
3b0bdc72 UD |
500 | } |
501 | else | |
612546c6 | 502 | #endif /* RE_ENABLE_I18N */ |
3b0bdc72 | 503 | { |
612546c6 | 504 | if (pstr->icase) |
1b2c2628 | 505 | build_upper_buffer (pstr); |
612546c6 | 506 | else if (pstr->trans != NULL) |
1b2c2628 | 507 | re_string_translate_buffer (pstr); |
3b0bdc72 | 508 | } |
612546c6 UD |
509 | pstr->cur_idx = 0; |
510 | ||
3b0bdc72 UD |
511 | return REG_NOERROR; |
512 | } | |
513 | ||
514 | static void | |
515 | re_string_destruct (pstr) | |
516 | re_string_t *pstr; | |
517 | { | |
518 | #ifdef RE_ENABLE_I18N | |
519 | re_free (pstr->wcs); | |
520 | #endif /* RE_ENABLE_I18N */ | |
612546c6 UD |
521 | if (MBS_ALLOCATED (pstr)) |
522 | re_free (pstr->mbs); | |
523 | if (MBS_CASE_ALLOCATED (pstr)) | |
524 | re_free (pstr->mbs_case); | |
3b0bdc72 UD |
525 | } |
526 | ||
527 | /* Return the context at IDX in INPUT. */ | |
612546c6 | 528 | |
3b0bdc72 UD |
529 | static unsigned int |
530 | re_string_context_at (input, idx, eflags, newline_anchor) | |
531 | const re_string_t *input; | |
532 | int idx, eflags, newline_anchor; | |
533 | { | |
534 | int c; | |
535 | if (idx < 0 || idx == input->len) | |
536 | { | |
3b0bdc72 | 537 | if (idx < 0) |
1b2c2628 UD |
538 | /* In this case, we use the value stored in input->tip_context, |
539 | since we can't know the character in input->mbs[-1] here. */ | |
540 | return input->tip_context; | |
bc15410e | 541 | else /* (idx == input->len) */ |
1b2c2628 UD |
542 | return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF |
543 | : CONTEXT_NEWLINE | CONTEXT_ENDBUF); | |
3b0bdc72 UD |
544 | } |
545 | c = re_string_byte_at (input, idx); | |
546 | if (IS_WORD_CHAR (c)) | |
547 | return CONTEXT_WORD; | |
548 | return (newline_anchor && IS_NEWLINE (c)) ? CONTEXT_NEWLINE : 0; | |
549 | } | |
550 | \f | |
551 | /* Functions for set operation. */ | |
552 | ||
553 | static reg_errcode_t | |
554 | re_node_set_alloc (set, size) | |
555 | re_node_set *set; | |
556 | int size; | |
557 | { | |
558 | set->alloc = size; | |
559 | set->nelem = 0; | |
560 | set->elems = re_malloc (int, size); | |
bc15410e | 561 | if (BE (set->elems == NULL, 0)) |
3b0bdc72 UD |
562 | return REG_ESPACE; |
563 | return REG_NOERROR; | |
564 | } | |
565 | ||
566 | static reg_errcode_t | |
567 | re_node_set_init_1 (set, elem) | |
568 | re_node_set *set; | |
569 | int elem; | |
570 | { | |
571 | set->alloc = 1; | |
572 | set->nelem = 1; | |
573 | set->elems = re_malloc (int, 1); | |
bc15410e | 574 | if (BE (set->elems == NULL, 0)) |
3b0bdc72 UD |
575 | return REG_ESPACE; |
576 | set->elems[0] = elem; | |
577 | return REG_NOERROR; | |
578 | } | |
579 | ||
580 | static reg_errcode_t | |
581 | re_node_set_init_2 (set, elem1, elem2) | |
582 | re_node_set *set; | |
583 | int elem1, elem2; | |
584 | { | |
585 | set->alloc = 2; | |
586 | set->elems = re_malloc (int, 2); | |
bc15410e | 587 | if (BE (set->elems == NULL, 0)) |
3b0bdc72 UD |
588 | return REG_ESPACE; |
589 | if (elem1 == elem2) | |
590 | { | |
591 | set->nelem = 1; | |
592 | set->elems[0] = elem1; | |
593 | } | |
594 | else | |
595 | { | |
596 | set->nelem = 2; | |
597 | if (elem1 < elem2) | |
1b2c2628 UD |
598 | { |
599 | set->elems[0] = elem1; | |
600 | set->elems[1] = elem2; | |
601 | } | |
3b0bdc72 | 602 | else |
1b2c2628 UD |
603 | { |
604 | set->elems[0] = elem2; | |
605 | set->elems[1] = elem1; | |
606 | } | |
3b0bdc72 UD |
607 | } |
608 | return REG_NOERROR; | |
609 | } | |
610 | ||
611 | static reg_errcode_t | |
612 | re_node_set_init_copy (dest, src) | |
613 | re_node_set *dest; | |
614 | const re_node_set *src; | |
615 | { | |
616 | dest->nelem = src->nelem; | |
617 | if (src->nelem > 0) | |
618 | { | |
619 | dest->alloc = dest->nelem; | |
620 | dest->elems = re_malloc (int, dest->alloc); | |
bc15410e | 621 | if (BE (dest->elems == NULL, 0)) |
1b2c2628 | 622 | return REG_ESPACE; |
3b0bdc72 UD |
623 | memcpy (dest->elems, src->elems, src->nelem * sizeof (int)); |
624 | } | |
625 | else | |
626 | re_node_set_init_empty (dest); | |
627 | return REG_NOERROR; | |
628 | } | |
629 | ||
a9388965 UD |
630 | /* Calculate the intersection of the sets SRC1 and SRC2. And merge it to |
631 | DEST. Return value indicate the error code or REG_NOERROR if succeeded. | |
632 | Note: We assume dest->elems is NULL, when dest->alloc is 0. */ | |
633 | ||
3b0bdc72 UD |
634 | static reg_errcode_t |
635 | re_node_set_add_intersect (dest, src1, src2) | |
636 | re_node_set *dest; | |
637 | const re_node_set *src1, *src2; | |
638 | { | |
639 | int i1, i2, id; | |
640 | if (src1->nelem > 0 && src2->nelem > 0) | |
641 | { | |
642 | if (src1->nelem + src2->nelem + dest->nelem > dest->alloc) | |
1b2c2628 UD |
643 | { |
644 | dest->alloc = src1->nelem + src2->nelem + dest->nelem; | |
645 | dest->elems = re_realloc (dest->elems, int, dest->alloc); | |
646 | if (BE (dest->elems == NULL, 0)) | |
647 | return REG_ESPACE; | |
648 | } | |
3b0bdc72 UD |
649 | } |
650 | else | |
651 | return REG_NOERROR; | |
652 | ||
653 | for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;) | |
654 | { | |
655 | if (src1->elems[i1] > src2->elems[i2]) | |
1b2c2628 UD |
656 | { |
657 | ++i2; | |
658 | continue; | |
659 | } | |
3b0bdc72 | 660 | if (src1->elems[i1] == src2->elems[i2]) |
1b2c2628 UD |
661 | { |
662 | while (id < dest->nelem && dest->elems[id] < src2->elems[i2]) | |
663 | ++id; | |
664 | if (id < dest->nelem && dest->elems[id] == src2->elems[i2]) | |
665 | ++id; | |
666 | else | |
667 | { | |
668 | memmove (dest->elems + id + 1, dest->elems + id, | |
669 | sizeof (int) * (dest->nelem - id)); | |
670 | dest->elems[id++] = src2->elems[i2++]; | |
671 | ++dest->nelem; | |
672 | } | |
673 | } | |
3b0bdc72 UD |
674 | ++i1; |
675 | } | |
676 | return REG_NOERROR; | |
677 | } | |
678 | ||
a9388965 UD |
679 | /* Calculate the union set of the sets SRC1 and SRC2. And store it to |
680 | DEST. Return value indicate the error code or REG_NOERROR if succeeded. */ | |
681 | ||
3b0bdc72 UD |
682 | static reg_errcode_t |
683 | re_node_set_init_union (dest, src1, src2) | |
684 | re_node_set *dest; | |
685 | const re_node_set *src1, *src2; | |
686 | { | |
687 | int i1, i2, id; | |
688 | if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0) | |
689 | { | |
690 | dest->alloc = src1->nelem + src2->nelem; | |
691 | dest->elems = re_malloc (int, dest->alloc); | |
bc15410e | 692 | if (BE (dest->elems == NULL, 0)) |
1b2c2628 | 693 | return REG_ESPACE; |
3b0bdc72 UD |
694 | } |
695 | else | |
696 | { | |
697 | if (src1 != NULL && src1->nelem > 0) | |
1b2c2628 | 698 | return re_node_set_init_copy (dest, src1); |
3b0bdc72 | 699 | else if (src2 != NULL && src2->nelem > 0) |
1b2c2628 | 700 | return re_node_set_init_copy (dest, src2); |
3b0bdc72 | 701 | else |
1b2c2628 | 702 | re_node_set_init_empty (dest); |
3b0bdc72 UD |
703 | return REG_NOERROR; |
704 | } | |
705 | for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;) | |
706 | { | |
707 | if (src1->elems[i1] > src2->elems[i2]) | |
1b2c2628 UD |
708 | { |
709 | dest->elems[id++] = src2->elems[i2++]; | |
710 | continue; | |
711 | } | |
3b0bdc72 | 712 | if (src1->elems[i1] == src2->elems[i2]) |
1b2c2628 | 713 | ++i2; |
3b0bdc72 UD |
714 | dest->elems[id++] = src1->elems[i1++]; |
715 | } | |
716 | if (i1 < src1->nelem) | |
717 | { | |
718 | memcpy (dest->elems + id, src1->elems + i1, | |
1b2c2628 | 719 | (src1->nelem - i1) * sizeof (int)); |
3b0bdc72 UD |
720 | id += src1->nelem - i1; |
721 | } | |
722 | else if (i2 < src2->nelem) | |
723 | { | |
724 | memcpy (dest->elems + id, src2->elems + i2, | |
1b2c2628 | 725 | (src2->nelem - i2) * sizeof (int)); |
3b0bdc72 UD |
726 | id += src2->nelem - i2; |
727 | } | |
728 | dest->nelem = id; | |
729 | return REG_NOERROR; | |
730 | } | |
731 | ||
a9388965 UD |
732 | /* Calculate the union set of the sets DEST and SRC. And store it to |
733 | DEST. Return value indicate the error code or REG_NOERROR if succeeded. */ | |
734 | ||
3b0bdc72 UD |
735 | static reg_errcode_t |
736 | re_node_set_merge (dest, src) | |
737 | re_node_set *dest; | |
738 | const re_node_set *src; | |
739 | { | |
740 | int si, di; | |
741 | if (src == NULL || src->nelem == 0) | |
742 | return REG_NOERROR; | |
3b0bdc72 UD |
743 | if (dest->alloc < src->nelem + dest->nelem) |
744 | { | |
1b2c2628 | 745 | int *new_buffer; |
3b0bdc72 | 746 | dest->alloc = 2 * (src->nelem + dest->alloc); |
1b2c2628 UD |
747 | new_buffer = re_realloc (dest->elems, int, dest->alloc); |
748 | if (BE (new_buffer == NULL, 0)) | |
749 | return REG_ESPACE; | |
750 | dest->elems = new_buffer; | |
3b0bdc72 UD |
751 | } |
752 | ||
753 | for (si = 0, di = 0 ; si < src->nelem && di < dest->nelem ;) | |
754 | { | |
755 | int cp_from, ncp, mid, right, src_elem = src->elems[si]; | |
756 | /* Binary search the spot we will add the new element. */ | |
757 | right = dest->nelem; | |
758 | while (di < right) | |
1b2c2628 UD |
759 | { |
760 | mid = (di + right) / 2; | |
761 | if (dest->elems[mid] < src_elem) | |
762 | di = mid + 1; | |
763 | else | |
764 | right = mid; | |
765 | } | |
3b0bdc72 | 766 | if (di >= dest->nelem) |
1b2c2628 | 767 | break; |
3b0bdc72 UD |
768 | |
769 | if (dest->elems[di] == src_elem) | |
1b2c2628 UD |
770 | { |
771 | /* Skip since, DEST already has the element. */ | |
772 | ++di; | |
773 | ++si; | |
774 | continue; | |
775 | } | |
3b0bdc72 UD |
776 | |
777 | /* Skip the src elements which are less than dest->elems[di]. */ | |
778 | cp_from = si; | |
779 | while (si < src->nelem && src->elems[si] < dest->elems[di]) | |
1b2c2628 | 780 | ++si; |
3b0bdc72 UD |
781 | /* Copy these src elements. */ |
782 | ncp = si - cp_from; | |
783 | memmove (dest->elems + di + ncp, dest->elems + di, | |
1b2c2628 | 784 | sizeof (int) * (dest->nelem - di)); |
3b0bdc72 | 785 | memcpy (dest->elems + di, src->elems + cp_from, |
1b2c2628 | 786 | sizeof (int) * ncp); |
3b0bdc72 UD |
787 | /* Update counters. */ |
788 | di += ncp; | |
789 | dest->nelem += ncp; | |
790 | } | |
791 | ||
792 | /* Copy remaining src elements. */ | |
793 | if (si < src->nelem) | |
794 | { | |
795 | memcpy (dest->elems + di, src->elems + si, | |
1b2c2628 | 796 | sizeof (int) * (src->nelem - si)); |
3b0bdc72 UD |
797 | dest->nelem += src->nelem - si; |
798 | } | |
799 | return REG_NOERROR; | |
800 | } | |
801 | ||
802 | /* Insert the new element ELEM to the re_node_set* SET. | |
803 | return 0 if SET already has ELEM, | |
804 | return -1 if an error is occured, return 1 otherwise. */ | |
805 | ||
806 | static int | |
807 | re_node_set_insert (set, elem) | |
808 | re_node_set *set; | |
809 | int elem; | |
810 | { | |
811 | int idx, right, mid; | |
812 | /* In case of the set is empty. */ | |
813 | if (set->elems == NULL || set->alloc == 0) | |
814 | { | |
bc15410e | 815 | if (BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1)) |
1b2c2628 | 816 | return 1; |
3b0bdc72 | 817 | else |
1b2c2628 | 818 | return -1; |
3b0bdc72 UD |
819 | } |
820 | ||
821 | /* Binary search the spot we will add the new element. */ | |
822 | idx = 0; | |
823 | right = set->nelem; | |
824 | while (idx < right) | |
825 | { | |
826 | mid = (idx + right) / 2; | |
827 | if (set->elems[mid] < elem) | |
1b2c2628 | 828 | idx = mid + 1; |
3b0bdc72 | 829 | else |
1b2c2628 | 830 | right = mid; |
3b0bdc72 UD |
831 | } |
832 | ||
833 | /* Realloc if we need. */ | |
834 | if (set->alloc < set->nelem + 1) | |
835 | { | |
836 | int *new_array; | |
837 | set->alloc = set->alloc * 2; | |
838 | new_array = re_malloc (int, set->alloc); | |
bc15410e | 839 | if (BE (new_array == NULL, 0)) |
1b2c2628 | 840 | return -1; |
3b0bdc72 UD |
841 | /* Copy the elements they are followed by the new element. */ |
842 | if (idx > 0) | |
1b2c2628 | 843 | memcpy (new_array, set->elems, sizeof (int) * (idx)); |
3b0bdc72 UD |
844 | /* Copy the elements which follows the new element. */ |
845 | if (set->nelem - idx > 0) | |
1b2c2628 | 846 | memcpy (new_array + idx + 1, set->elems + idx, |
3b0bdc72 | 847 | sizeof (int) * (set->nelem - idx)); |
612546c6 | 848 | re_free (set->elems); |
3b0bdc72 UD |
849 | set->elems = new_array; |
850 | } | |
851 | else | |
852 | { | |
853 | /* Move the elements which follows the new element. */ | |
854 | if (set->nelem - idx > 0) | |
1b2c2628 UD |
855 | memmove (set->elems + idx + 1, set->elems + idx, |
856 | sizeof (int) * (set->nelem - idx)); | |
3b0bdc72 UD |
857 | } |
858 | /* Insert the new element. */ | |
859 | set->elems[idx] = elem; | |
860 | ++set->nelem; | |
861 | return 1; | |
862 | } | |
863 | ||
864 | /* Compare two node sets SET1 and SET2. | |
865 | return 1 if SET1 and SET2 are equivalent, retrun 0 otherwise. */ | |
866 | ||
867 | static int | |
868 | re_node_set_compare (set1, set2) | |
869 | const re_node_set *set1, *set2; | |
870 | { | |
871 | int i; | |
872 | if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem) | |
873 | return 0; | |
874 | for (i = 0 ; i < set1->nelem ; i++) | |
875 | if (set1->elems[i] != set2->elems[i]) | |
876 | return 0; | |
877 | return 1; | |
878 | } | |
879 | ||
0742e48e | 880 | /* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */ |
3b0bdc72 UD |
881 | |
882 | static int | |
883 | re_node_set_contains (set, elem) | |
884 | const re_node_set *set; | |
885 | int elem; | |
886 | { | |
887 | int idx, right, mid; | |
888 | if (set->nelem <= 0) | |
889 | return 0; | |
890 | ||
891 | /* Binary search the element. */ | |
892 | idx = 0; | |
893 | right = set->nelem - 1; | |
894 | while (idx < right) | |
895 | { | |
896 | mid = (idx + right) / 2; | |
897 | if (set->elems[mid] < elem) | |
1b2c2628 | 898 | idx = mid + 1; |
3b0bdc72 | 899 | else |
1b2c2628 | 900 | right = mid; |
3b0bdc72 | 901 | } |
0742e48e | 902 | return set->elems[idx] == elem ? idx + 1 : 0; |
3b0bdc72 UD |
903 | } |
904 | ||
905 | static void | |
906 | re_node_set_remove_at (set, idx) | |
907 | re_node_set *set; | |
908 | int idx; | |
909 | { | |
910 | if (idx < 0 || idx >= set->nelem) | |
911 | return; | |
912 | if (idx < set->nelem - 1) | |
913 | memmove (set->elems + idx, set->elems + idx + 1, | |
1b2c2628 | 914 | sizeof (int) * (set->nelem - idx - 1)); |
3b0bdc72 UD |
915 | --set->nelem; |
916 | } | |
917 | \f | |
918 | ||
919 | /* Add the token TOKEN to dfa->nodes, and return the index of the token. | |
920 | Or return -1, if an error will be occured. */ | |
921 | ||
922 | static int | |
923 | re_dfa_add_node (dfa, token, mode) | |
924 | re_dfa_t *dfa; | |
925 | re_token_t token; | |
926 | int mode; | |
927 | { | |
928 | if (dfa->nodes_len >= dfa->nodes_alloc) | |
929 | { | |
930 | re_token_t *new_array; | |
931 | dfa->nodes_alloc *= 2; | |
932 | new_array = re_realloc (dfa->nodes, re_token_t, dfa->nodes_alloc); | |
bc15410e | 933 | if (BE (new_array == NULL, 0)) |
1b2c2628 | 934 | return -1; |
3b0bdc72 | 935 | else |
1b2c2628 | 936 | dfa->nodes = new_array; |
3b0bdc72 | 937 | if (mode) |
1b2c2628 UD |
938 | { |
939 | int *new_nexts; | |
940 | re_node_set *new_edests, *new_eclosures, *new_inveclosures; | |
941 | ||
942 | new_nexts = re_realloc (dfa->nexts, int, dfa->nodes_alloc); | |
943 | new_edests = re_realloc (dfa->edests, re_node_set, dfa->nodes_alloc); | |
944 | new_eclosures = re_realloc (dfa->eclosures, re_node_set, | |
945 | dfa->nodes_alloc); | |
946 | new_inveclosures = re_realloc (dfa->inveclosures, re_node_set, | |
947 | dfa->nodes_alloc); | |
948 | if (BE (new_nexts == NULL || new_edests == NULL | |
949 | || new_eclosures == NULL || new_inveclosures == NULL, 0)) | |
950 | return -1; | |
951 | dfa->nexts = new_nexts; | |
952 | dfa->edests = new_edests; | |
953 | dfa->eclosures = new_eclosures; | |
954 | dfa->inveclosures = new_inveclosures; | |
955 | } | |
3b0bdc72 UD |
956 | } |
957 | dfa->nodes[dfa->nodes_len] = token; | |
958 | dfa->nodes[dfa->nodes_len].duplicated = 0; | |
485d775d | 959 | dfa->nodes[dfa->nodes_len].constraint = 0; |
3b0bdc72 UD |
960 | return dfa->nodes_len++; |
961 | } | |
962 | ||
963 | static unsigned int inline | |
964 | calc_state_hash (nodes, context) | |
965 | const re_node_set *nodes; | |
966 | unsigned int context; | |
967 | { | |
968 | unsigned int hash = nodes->nelem + context; | |
969 | int i; | |
970 | for (i = 0 ; i < nodes->nelem ; i++) | |
971 | hash += nodes->elems[i]; | |
972 | return hash; | |
973 | } | |
974 | ||
975 | /* Search for the state whose node_set is equivalent to NODES. | |
976 | Return the pointer to the state, if we found it in the DFA. | |
a9388965 UD |
977 | Otherwise create the new one and return it. In case of an error |
978 | return NULL and set the error code in ERR. | |
979 | Note: - We assume NULL as the invalid state, then it is possible that | |
1b2c2628 UD |
980 | return value is NULL and ERR is REG_NOERROR. |
981 | - We never return non-NULL value in case of any errors, it is for | |
982 | optimization. */ | |
a9388965 UD |
983 | |
984 | static re_dfastate_t* | |
985 | re_acquire_state (err, dfa, nodes) | |
986 | reg_errcode_t *err; | |
3b0bdc72 UD |
987 | re_dfa_t *dfa; |
988 | const re_node_set *nodes; | |
989 | { | |
990 | unsigned int hash; | |
a9388965 | 991 | re_dfastate_t *new_state; |
3b0bdc72 UD |
992 | struct re_state_table_entry *spot; |
993 | int i; | |
bc15410e | 994 | if (BE (nodes->nelem == 0, 0)) |
a9388965 UD |
995 | { |
996 | *err = REG_NOERROR; | |
997 | return NULL; | |
998 | } | |
3b0bdc72 UD |
999 | hash = calc_state_hash (nodes, 0); |
1000 | spot = dfa->state_table + (hash & dfa->state_hash_mask); | |
1001 | ||
bc15410e | 1002 | for (i = 0 ; i < spot->num ; i++) |
3b0bdc72 | 1003 | { |
bc15410e UD |
1004 | re_dfastate_t *state = spot->array[i]; |
1005 | if (hash != state->hash) | |
1b2c2628 | 1006 | continue; |
bc15410e | 1007 | if (re_node_set_compare (&state->nodes, nodes)) |
1b2c2628 | 1008 | return state; |
3b0bdc72 | 1009 | } |
3b0bdc72 UD |
1010 | |
1011 | /* There are no appropriate state in the dfa, create the new one. */ | |
a9388965 | 1012 | new_state = create_ci_newstate (dfa, nodes, hash); |
bc15410e | 1013 | if (BE (new_state != NULL, 1)) |
a9388965 UD |
1014 | return new_state; |
1015 | else | |
1016 | { | |
1017 | *err = REG_ESPACE; | |
1018 | return NULL; | |
1019 | } | |
3b0bdc72 UD |
1020 | } |
1021 | ||
1022 | /* Search for the state whose node_set is equivalent to NODES and | |
1023 | whose context is equivalent to CONTEXT. | |
1024 | Return the pointer to the state, if we found it in the DFA. | |
a9388965 UD |
1025 | Otherwise create the new one and return it. In case of an error |
1026 | return NULL and set the error code in ERR. | |
1027 | Note: - We assume NULL as the invalid state, then it is possible that | |
1b2c2628 UD |
1028 | return value is NULL and ERR is REG_NOERROR. |
1029 | - We never return non-NULL value in case of any errors, it is for | |
1030 | optimization. */ | |
a9388965 UD |
1031 | |
1032 | static re_dfastate_t* | |
1033 | re_acquire_state_context (err, dfa, nodes, context) | |
1034 | reg_errcode_t *err; | |
3b0bdc72 UD |
1035 | re_dfa_t *dfa; |
1036 | const re_node_set *nodes; | |
1037 | unsigned int context; | |
1038 | { | |
1039 | unsigned int hash; | |
a9388965 | 1040 | re_dfastate_t *new_state; |
3b0bdc72 UD |
1041 | struct re_state_table_entry *spot; |
1042 | int i; | |
1043 | if (nodes->nelem == 0) | |
a9388965 UD |
1044 | { |
1045 | *err = REG_NOERROR; | |
1046 | return NULL; | |
1047 | } | |
3b0bdc72 UD |
1048 | hash = calc_state_hash (nodes, context); |
1049 | spot = dfa->state_table + (hash & dfa->state_hash_mask); | |
1050 | ||
bc15410e | 1051 | for (i = 0 ; i < spot->num ; i++) |
3b0bdc72 | 1052 | { |
bc15410e UD |
1053 | re_dfastate_t *state = spot->array[i]; |
1054 | if (hash != state->hash) | |
1b2c2628 | 1055 | continue; |
bc15410e | 1056 | if (re_node_set_compare (state->entrance_nodes, nodes) |
1b2c2628 UD |
1057 | && state->context == context) |
1058 | return state; | |
3b0bdc72 | 1059 | } |
3b0bdc72 | 1060 | /* There are no appropriate state in `dfa', create the new one. */ |
a9388965 | 1061 | new_state = create_cd_newstate (dfa, nodes, context, hash); |
bc15410e | 1062 | if (BE (new_state != NULL, 1)) |
a9388965 UD |
1063 | return new_state; |
1064 | else | |
1065 | { | |
1066 | *err = REG_ESPACE; | |
1067 | return NULL; | |
1068 | } | |
3b0bdc72 UD |
1069 | } |
1070 | ||
a9388965 UD |
1071 | /* Allocate memory for DFA state and initialize common properties. |
1072 | Return the new state if succeeded, otherwise return NULL. */ | |
1073 | ||
3b0bdc72 UD |
1074 | static re_dfastate_t * |
1075 | create_newstate_common (dfa, nodes, hash) | |
1076 | re_dfa_t *dfa; | |
1077 | const re_node_set *nodes; | |
1078 | unsigned int hash; | |
1079 | { | |
1080 | re_dfastate_t *newstate; | |
1b2c2628 | 1081 | reg_errcode_t err; |
3b0bdc72 | 1082 | newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1); |
bc15410e | 1083 | if (BE (newstate == NULL, 0)) |
a9388965 | 1084 | return NULL; |
1b2c2628 UD |
1085 | err = re_node_set_init_copy (&newstate->nodes, nodes); |
1086 | if (BE (err != REG_NOERROR, 0)) | |
1087 | { | |
1088 | re_free (newstate); | |
1089 | return NULL; | |
1090 | } | |
3b0bdc72 UD |
1091 | newstate->trtable = NULL; |
1092 | newstate->trtable_search = NULL; | |
1093 | newstate->hash = hash; | |
1094 | return newstate; | |
1095 | } | |
1096 | ||
a9388965 UD |
1097 | /* Store the new state NEWSTATE whose hash value is HASH in appropriate |
1098 | position. Return value indicate the error code if failed. */ | |
1099 | ||
1100 | static reg_errcode_t | |
3b0bdc72 UD |
1101 | register_state (dfa, newstate, hash) |
1102 | re_dfa_t *dfa; | |
1103 | re_dfastate_t *newstate; | |
1104 | unsigned int hash; | |
1105 | { | |
1106 | struct re_state_table_entry *spot; | |
1107 | spot = dfa->state_table + (hash & dfa->state_hash_mask); | |
1108 | ||
1109 | if (spot->alloc <= spot->num) | |
1110 | { | |
1b2c2628 | 1111 | re_dfastate_t **new_array; |
bc15410e | 1112 | spot->alloc = 2 * spot->num + 2; |
1b2c2628 UD |
1113 | new_array = re_realloc (spot->array, re_dfastate_t *, spot->alloc); |
1114 | if (BE (new_array == NULL, 0)) | |
1115 | return REG_ESPACE; | |
1116 | spot->array = new_array; | |
3b0bdc72 | 1117 | } |
bc15410e | 1118 | spot->array[spot->num++] = newstate; |
a9388965 | 1119 | return REG_NOERROR; |
3b0bdc72 UD |
1120 | } |
1121 | ||
a9388965 UD |
1122 | /* Create the new state which is independ of contexts. |
1123 | Return the new state if succeeded, otherwise return NULL. */ | |
1124 | ||
3b0bdc72 UD |
1125 | static re_dfastate_t * |
1126 | create_ci_newstate (dfa, nodes, hash) | |
1127 | re_dfa_t *dfa; | |
1128 | const re_node_set *nodes; | |
1129 | unsigned int hash; | |
1130 | { | |
1131 | int i; | |
a9388965 | 1132 | reg_errcode_t err; |
3b0bdc72 UD |
1133 | re_dfastate_t *newstate; |
1134 | newstate = create_newstate_common (dfa, nodes, hash); | |
bc15410e | 1135 | if (BE (newstate == NULL, 0)) |
a9388965 | 1136 | return NULL; |
3b0bdc72 UD |
1137 | newstate->entrance_nodes = &newstate->nodes; |
1138 | ||
1139 | for (i = 0 ; i < nodes->nelem ; i++) | |
1140 | { | |
1141 | re_token_t *node = dfa->nodes + nodes->elems[i]; | |
1142 | re_token_type_t type = node->type; | |
485d775d | 1143 | if (type == CHARACTER && !node->constraint) |
1b2c2628 | 1144 | continue; |
3b0bdc72 UD |
1145 | |
1146 | /* If the state has the halt node, the state is a halt state. */ | |
1147 | else if (type == END_OF_RE) | |
1b2c2628 | 1148 | newstate->halt = 1; |
c0a0f9a3 | 1149 | #ifdef RE_ENABLE_I18N |
3b0bdc72 | 1150 | else if (type == COMPLEX_BRACKET |
1b2c2628 UD |
1151 | || (type == OP_PERIOD && MB_CUR_MAX > 1)) |
1152 | newstate->accept_mb = 1; | |
c0a0f9a3 | 1153 | #endif /* RE_ENABLE_I18N */ |
3b0bdc72 | 1154 | else if (type == OP_BACK_REF) |
1b2c2628 | 1155 | newstate->has_backref = 1; |
485d775d | 1156 | else if (type == ANCHOR || node->constraint) |
1b2c2628 | 1157 | newstate->has_constraint = 1; |
3b0bdc72 | 1158 | } |
a9388965 | 1159 | err = register_state (dfa, newstate, hash); |
1b2c2628 UD |
1160 | if (BE (err != REG_NOERROR, 0)) |
1161 | { | |
1162 | free_state (newstate); | |
1163 | newstate = NULL; | |
1164 | } | |
1165 | return newstate; | |
3b0bdc72 UD |
1166 | } |
1167 | ||
a9388965 UD |
1168 | /* Create the new state which is depend on the context CONTEXT. |
1169 | Return the new state if succeeded, otherwise return NULL. */ | |
1170 | ||
3b0bdc72 UD |
1171 | static re_dfastate_t * |
1172 | create_cd_newstate (dfa, nodes, context, hash) | |
1173 | re_dfa_t *dfa; | |
1174 | const re_node_set *nodes; | |
1175 | unsigned int context, hash; | |
1176 | { | |
1177 | int i, nctx_nodes = 0; | |
a9388965 | 1178 | reg_errcode_t err; |
3b0bdc72 UD |
1179 | re_dfastate_t *newstate; |
1180 | ||
1181 | newstate = create_newstate_common (dfa, nodes, hash); | |
bc15410e | 1182 | if (BE (newstate == NULL, 0)) |
a9388965 | 1183 | return NULL; |
3b0bdc72 UD |
1184 | newstate->context = context; |
1185 | newstate->entrance_nodes = &newstate->nodes; | |
1186 | ||
1187 | for (i = 0 ; i < nodes->nelem ; i++) | |
1188 | { | |
1189 | unsigned int constraint = 0; | |
1190 | re_token_t *node = dfa->nodes + nodes->elems[i]; | |
1191 | re_token_type_t type = node->type; | |
485d775d | 1192 | if (node->constraint) |
1b2c2628 | 1193 | constraint = node->constraint; |
3b0bdc72 | 1194 | |
485d775d | 1195 | if (type == CHARACTER && !constraint) |
1b2c2628 | 1196 | continue; |
3b0bdc72 UD |
1197 | /* If the state has the halt node, the state is a halt state. */ |
1198 | else if (type == END_OF_RE) | |
1b2c2628 | 1199 | newstate->halt = 1; |
c0a0f9a3 | 1200 | #ifdef RE_ENABLE_I18N |
3b0bdc72 | 1201 | else if (type == COMPLEX_BRACKET |
1b2c2628 UD |
1202 | || (type == OP_PERIOD && MB_CUR_MAX > 1)) |
1203 | newstate->accept_mb = 1; | |
c0a0f9a3 | 1204 | #endif /* RE_ENABLE_I18N */ |
3b0bdc72 | 1205 | else if (type == OP_BACK_REF) |
1b2c2628 | 1206 | newstate->has_backref = 1; |
3b0bdc72 | 1207 | else if (type == ANCHOR) |
1b2c2628 | 1208 | constraint = node->opr.ctx_type; |
3b0bdc72 UD |
1209 | |
1210 | if (constraint) | |
1b2c2628 UD |
1211 | { |
1212 | if (newstate->entrance_nodes == &newstate->nodes) | |
1213 | { | |
1214 | newstate->entrance_nodes = re_malloc (re_node_set, 1); | |
bc15410e | 1215 | if (BE (newstate->entrance_nodes == NULL, 0)) |
1b2c2628 UD |
1216 | { |
1217 | free_state (newstate); | |
1218 | return NULL; | |
1219 | } | |
1220 | re_node_set_init_copy (newstate->entrance_nodes, nodes); | |
1221 | nctx_nodes = 0; | |
1222 | newstate->has_constraint = 1; | |
1223 | } | |
1224 | ||
1225 | if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context)) | |
1226 | { | |
1227 | re_node_set_remove_at (&newstate->nodes, i - nctx_nodes); | |
1228 | ++nctx_nodes; | |
1229 | } | |
1230 | } | |
3b0bdc72 | 1231 | } |
a9388965 | 1232 | err = register_state (dfa, newstate, hash); |
1b2c2628 UD |
1233 | if (BE (err != REG_NOERROR, 0)) |
1234 | { | |
1235 | free_state (newstate); | |
1236 | newstate = NULL; | |
1237 | } | |
1238 | return newstate; | |
1239 | } | |
1240 | ||
1241 | static void | |
1242 | free_state (state) | |
1243 | re_dfastate_t *state; | |
1244 | { | |
1245 | if (state->entrance_nodes != &state->nodes) | |
1246 | { | |
1247 | re_node_set_free (state->entrance_nodes); | |
1248 | re_free (state->entrance_nodes); | |
1249 | } | |
1250 | re_node_set_free (&state->nodes); | |
1251 | re_free (state->trtable); | |
1252 | re_free (state->trtable_search); | |
1253 | re_free (state); | |
3b0bdc72 | 1254 | } |