]> git.ipfire.org Git - thirdparty/glibc.git/blame - posix/regex_internal.c
Update.
[thirdparty/glibc.git] / posix / regex_internal.c
CommitLineData
3b0bdc72 1/* Extended regular expression matching and search library.
54e1cabc 2 Copyright (C) 2002, 2003 Free Software Foundation, Inc.
3b0bdc72
UD
3 This file is part of the GNU C Library.
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA. */
20
92b27c74 21static void re_string_construct_common (const char *str, int len,
1b2c2628 22 re_string_t *pstr,
3c0fb574 23 RE_TRANSLATE_TYPE trans, int icase,
f0c7c524 24 const re_dfa_t *dfa);
434d3784 25#ifdef RE_ENABLE_I18N
1843975c
RM
26static int re_string_skip_chars (re_string_t *pstr, int new_raw_idx,
27 wint_t *last_wc);
434d3784 28#endif /* RE_ENABLE_I18N */
3b0bdc72 29static re_dfastate_t *create_newstate_common (re_dfa_t *dfa,
1b2c2628
UD
30 const re_node_set *nodes,
31 unsigned int hash);
a9388965 32static reg_errcode_t register_state (re_dfa_t *dfa, re_dfastate_t *newstate,
1b2c2628 33 unsigned int hash);
3b0bdc72 34static re_dfastate_t *create_ci_newstate (re_dfa_t *dfa,
1b2c2628
UD
35 const re_node_set *nodes,
36 unsigned int hash);
3b0bdc72 37static re_dfastate_t *create_cd_newstate (re_dfa_t *dfa,
1b2c2628
UD
38 const re_node_set *nodes,
39 unsigned int context,
40 unsigned int hash);
3b0bdc72 41static unsigned int inline calc_state_hash (const re_node_set *nodes,
1b2c2628 42 unsigned int context);
3b0bdc72
UD
43\f
44/* Functions for string operation. */
45
612546c6
UD
46/* This function allocate the buffers. It is necessary to call
47 re_string_reconstruct before using the object. */
48
3b0bdc72 49static reg_errcode_t
f0c7c524 50re_string_allocate (pstr, str, len, init_len, trans, icase, dfa)
3b0bdc72 51 re_string_t *pstr;
92b27c74 52 const char *str;
f0c7c524 53 int len, init_len, icase;
3b0bdc72 54 RE_TRANSLATE_TYPE trans;
f0c7c524 55 const re_dfa_t *dfa;
3b0bdc72
UD
56{
57 reg_errcode_t ret;
612546c6 58 int init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
f0c7c524 59 re_string_construct_common (str, len, pstr, trans, icase, dfa);
ac3d553b 60 pstr->stop = pstr->len;
612546c6
UD
61
62 ret = re_string_realloc_buffers (pstr, init_buf_len);
63 if (BE (ret != REG_NOERROR, 0))
64 return ret;
65
c202c2c5 66 pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case
1b2c2628 67 : (unsigned char *) str);
612546c6
UD
68 pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case;
69 pstr->valid_len = (MBS_CASE_ALLOCATED (pstr) || MBS_ALLOCATED (pstr)
f0c7c524 70 || dfa->mb_cur_max > 1) ? pstr->valid_len : len;
612546c6
UD
71 return REG_NOERROR;
72}
73
74/* This function allocate the buffers, and initialize them. */
75
76static reg_errcode_t
f0c7c524 77re_string_construct (pstr, str, len, trans, icase, dfa)
612546c6 78 re_string_t *pstr;
92b27c74 79 const char *str;
f0c7c524 80 int len, icase;
612546c6 81 RE_TRANSLATE_TYPE trans;
f0c7c524 82 const re_dfa_t *dfa;
612546c6
UD
83{
84 reg_errcode_t ret;
f0c7c524 85 re_string_construct_common (str, len, pstr, trans, icase, dfa);
ac3d553b 86 pstr->stop = pstr->len;
612546c6
UD
87 /* Set 0 so that this function can initialize whole buffers. */
88 pstr->valid_len = 0;
89
90 if (len > 0)
3b0bdc72 91 {
612546c6 92 ret = re_string_realloc_buffers (pstr, len + 1);
bc15410e 93 if (BE (ret != REG_NOERROR, 0))
1b2c2628 94 return ret;
3b0bdc72 95 }
c202c2c5 96 pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case
1b2c2628 97 : (unsigned char *) str);
612546c6
UD
98 pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case;
99
100 if (icase)
101 {
102#ifdef RE_ENABLE_I18N
f0c7c524 103 if (dfa->mb_cur_max > 1)
1b2c2628 104 build_wcs_upper_buffer (pstr);
612546c6 105 else
3b0bdc72 106#endif /* RE_ENABLE_I18N */
1b2c2628 107 build_upper_buffer (pstr);
612546c6
UD
108 }
109 else
3b0bdc72 110 {
612546c6 111#ifdef RE_ENABLE_I18N
f0c7c524 112 if (dfa->mb_cur_max > 1)
1b2c2628 113 build_wcs_buffer (pstr);
612546c6
UD
114 else
115#endif /* RE_ENABLE_I18N */
1b2c2628
UD
116 {
117 if (trans != NULL)
118 re_string_translate_buffer (pstr);
119 else
120 pstr->valid_len = len;
121 }
3b0bdc72 122 }
612546c6
UD
123
124 /* Initialized whole buffers, then valid_len == bufs_len. */
125 pstr->valid_len = pstr->bufs_len;
3b0bdc72
UD
126 return REG_NOERROR;
127}
128
612546c6 129/* Helper functions for re_string_allocate, and re_string_construct. */
3b0bdc72
UD
130
131static reg_errcode_t
612546c6 132re_string_realloc_buffers (pstr, new_buf_len)
3b0bdc72 133 re_string_t *pstr;
612546c6 134 int new_buf_len;
3b0bdc72 135{
3b0bdc72 136#ifdef RE_ENABLE_I18N
3c0fb574 137 if (pstr->mb_cur_max > 1)
3b0bdc72 138 {
1b2c2628
UD
139 wint_t *new_array = re_realloc (pstr->wcs, wint_t, new_buf_len);
140 if (BE (new_array == NULL, 0))
141 return REG_ESPACE;
142 pstr->wcs = new_array;
3b0bdc72 143 }
3b0bdc72 144#endif /* RE_ENABLE_I18N */
612546c6 145 if (MBS_ALLOCATED (pstr))
3b0bdc72 146 {
1b2c2628
UD
147 unsigned char *new_array = re_realloc (pstr->mbs, unsigned char,
148 new_buf_len);
149 if (BE (new_array == NULL, 0))
150 return REG_ESPACE;
151 pstr->mbs = new_array;
3b0bdc72 152 }
612546c6 153 if (MBS_CASE_ALLOCATED (pstr))
3b0bdc72 154 {
1b2c2628
UD
155 unsigned char *new_array = re_realloc (pstr->mbs_case, unsigned char,
156 new_buf_len);
157 if (BE (new_array == NULL, 0))
158 return REG_ESPACE;
159 pstr->mbs_case = new_array;
612546c6 160 if (!MBS_ALLOCATED (pstr))
1b2c2628 161 pstr->mbs = pstr->mbs_case;
3b0bdc72 162 }
612546c6 163 pstr->bufs_len = new_buf_len;
3b0bdc72
UD
164 return REG_NOERROR;
165}
166
612546c6 167
3b0bdc72 168static void
f0c7c524 169re_string_construct_common (str, len, pstr, trans, icase, dfa)
92b27c74 170 const char *str;
3b0bdc72
UD
171 int len;
172 re_string_t *pstr;
612546c6 173 RE_TRANSLATE_TYPE trans;
f0c7c524
UD
174 int icase;
175 const re_dfa_t *dfa;
3b0bdc72 176{
612546c6 177 memset (pstr, '\0', sizeof (re_string_t));
c202c2c5 178 pstr->raw_mbs = (const unsigned char *) str;
3b0bdc72 179 pstr->len = len;
612546c6
UD
180 pstr->trans = trans;
181 pstr->icase = icase ? 1 : 0;
f0c7c524
UD
182 pstr->mb_cur_max = dfa->mb_cur_max;
183 pstr->is_utf8 = dfa->is_utf8;
184 pstr->map_notascii = dfa->map_notascii;
3b0bdc72
UD
185}
186
187#ifdef RE_ENABLE_I18N
188
612546c6 189/* Build wide character buffer PSTR->WCS.
3b0bdc72
UD
190 If the byte sequence of the string are:
191 <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
192 Then wide character buffer will be:
193 <wc1> , WEOF , <wc2> , WEOF , <wc3>
194 We use WEOF for padding, they indicate that the position isn't
612546c6 195 a first byte of a multibyte character.
3b0bdc72 196
612546c6
UD
197 Note that this function assumes PSTR->VALID_LEN elements are already
198 built and starts from PSTR->VALID_LEN. */
199
200static void
3b0bdc72
UD
201build_wcs_buffer (pstr)
202 re_string_t *pstr;
203{
612546c6
UD
204 mbstate_t prev_st;
205 int byte_idx, end_idx, mbclen, remain_len;
206 /* Build the buffers from pstr->valid_len to either pstr->len or
207 pstr->bufs_len. */
208 end_idx = (pstr->bufs_len > pstr->len)? pstr->len : pstr->bufs_len;
209 for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
210 {
211 wchar_t wc;
212 remain_len = end_idx - byte_idx;
213 prev_st = pstr->cur_state;
c202c2c5 214 mbclen = mbrtowc (&wc, ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
1b2c2628 215 + byte_idx), remain_len, &pstr->cur_state);
612546c6 216 if (BE (mbclen == (size_t) -2, 0))
1b2c2628
UD
217 {
218 /* The buffer doesn't have enough space, finish to build. */
219 pstr->cur_state = prev_st;
220 break;
221 }
612546c6 222 else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))
1b2c2628
UD
223 {
224 /* We treat these cases as a singlebyte character. */
225 mbclen = 1;
226 wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
227 pstr->cur_state = prev_st;
228 }
612546c6 229
74e12fbc 230 /* Apply the translation if we need. */
612546c6 231 if (pstr->trans != NULL && mbclen == 1)
1b2c2628
UD
232 {
233 int ch = pstr->trans[pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]];
234 pstr->mbs_case[byte_idx] = ch;
235 }
3b0bdc72 236 /* Write wide character and padding. */
612546c6
UD
237 pstr->wcs[byte_idx++] = wc;
238 /* Write paddings. */
239 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
1b2c2628 240 pstr->wcs[byte_idx++] = WEOF;
3b0bdc72 241 }
612546c6 242 pstr->valid_len = byte_idx;
3b0bdc72
UD
243}
244
612546c6
UD
245/* Build wide character buffer PSTR->WCS like build_wcs_buffer,
246 but for REG_ICASE. */
247
248static void
3b0bdc72
UD
249build_wcs_upper_buffer (pstr)
250 re_string_t *pstr;
251{
612546c6
UD
252 mbstate_t prev_st;
253 int byte_idx, end_idx, mbclen, remain_len;
254 /* Build the buffers from pstr->valid_len to either pstr->len or
255 pstr->bufs_len. */
256 end_idx = (pstr->bufs_len > pstr->len)? pstr->len : pstr->bufs_len;
f0c7c524
UD
257
258#ifdef _LIBC
259 /* The following optimization assumes that the wchar_t encoding is
260 always ISO 10646. */
261 if (! pstr->map_notascii && pstr->trans == NULL)
262 for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
263 if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
264 && mbsinit (&pstr->cur_state))
1b2c2628 265 {
f0c7c524
UD
266 /* In case of a singlebyte character. */
267 pstr->mbs[byte_idx]
268 = toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
269 /* The next step uses the assumption that wchar_t is encoded
270 with ISO 10646: all ASCII values can be converted like this. */
271 pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
272 ++byte_idx;
1b2c2628 273 }
f0c7c524 274 else
1b2c2628 275 {
f0c7c524
UD
276 wchar_t wc;
277 remain_len = end_idx - byte_idx;
278 prev_st = pstr->cur_state;
279 mbclen = mbrtowc (&wc,
280 ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
281 + byte_idx), remain_len, &pstr->cur_state);
282 if (BE (mbclen > 1, 1))
1b2c2628 283 {
f0c7c524
UD
284 if (iswlower (wc))
285 wcrtomb ((char *) pstr->mbs + byte_idx, towupper (wc),
286 &prev_st);
287 else
288 memcpy (pstr->mbs + byte_idx,
289 pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
290 pstr->wcs[byte_idx++] = towupper (wc);
291 /* Write paddings. */
292 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
293 pstr->wcs[byte_idx++] = WEOF;
294 }
295 else if (mbclen == (size_t) -1 || mbclen == 0)
296 {
02b50340 297 /* It is an invalid character. Just use the byte. */
f0c7c524 298 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
02b50340
UD
299 pstr->mbs[byte_idx] = ch;
300 /* And also cast it to wide char. */
301 pstr->wcs[byte_idx++] = (wchar_t) ch;
f0c7c524
UD
302 if (BE (mbclen == (size_t) -1, 0))
303 pstr->cur_state = prev_st;
1b2c2628 304 }
1b2c2628 305 else
f0c7c524
UD
306 {
307 /* The buffer doesn't have enough space, finish to build. */
308 pstr->cur_state = prev_st;
309 break;
310 }
1b2c2628 311 }
f0c7c524
UD
312 else
313#endif
314 for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
315 {
316 wchar_t wc;
317 remain_len = end_idx - byte_idx;
318 prev_st = pstr->cur_state;
319 mbclen = mbrtowc (&wc,
320 ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
321 + byte_idx), remain_len, &pstr->cur_state);
02b50340 322 if (mbclen == 1)
f0c7c524
UD
323 {
324 /* In case of a singlebyte character. */
325 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
326 /* Apply the translation if we need. */
327 if (BE (pstr->trans != NULL, 0) && mbclen == 1)
328 {
329 ch = pstr->trans[ch];
330 pstr->mbs_case[byte_idx] = ch;
331 }
332 pstr->wcs[byte_idx] = towupper (wc);
333 pstr->mbs[byte_idx++] = toupper (ch);
334 if (BE (mbclen == (size_t) -1, 0))
335 pstr->cur_state = prev_st;
336 }
337 else if (BE (mbclen != (size_t) -2, 1))
338 {
339 if (iswlower (wc))
340 wcrtomb ((char *) pstr->mbs + byte_idx, towupper (wc), &prev_st);
341 else
342 memcpy (pstr->mbs + byte_idx,
343 pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
344 pstr->wcs[byte_idx++] = towupper (wc);
345 /* Write paddings. */
346 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
347 pstr->wcs[byte_idx++] = WEOF;
348 }
02b50340
UD
349 else if (mbclen == (size_t) -1 || mbclen == 0)
350 {
351 /* It is an invalid character. Just use the byte. */
352 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
353 pstr->mbs[byte_idx] = ch;
354 /* And also cast it to wide char. */
355 pstr->wcs[byte_idx++] = (wchar_t) ch;
356 if (BE (mbclen == (size_t) -1, 0))
357 pstr->cur_state = prev_st;
358 }
f0c7c524
UD
359 else
360 {
361 /* The buffer doesn't have enough space, finish to build. */
362 pstr->cur_state = prev_st;
363 break;
364 }
365 }
612546c6
UD
366 pstr->valid_len = byte_idx;
367}
368
369/* Skip characters until the index becomes greater than NEW_RAW_IDX.
370 Return the index. */
371
372static int
1843975c 373re_string_skip_chars (pstr, new_raw_idx, last_wc)
612546c6
UD
374 re_string_t *pstr;
375 int new_raw_idx;
1843975c 376 wint_t *last_wc;
612546c6
UD
377{
378 mbstate_t prev_st;
379 int rawbuf_idx, mbclen;
1843975c 380 wchar_t wc = 0;
612546c6
UD
381
382 /* Skip the characters which are not necessary to check. */
383 for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_len;
384 rawbuf_idx < new_raw_idx;)
385 {
1843975c
RM
386 int remain_len;
387 remain_len = pstr->len - rawbuf_idx;
612546c6 388 prev_st = pstr->cur_state;
1843975c
RM
389 mbclen = mbrtowc (&wc, (const char *) pstr->raw_mbs + rawbuf_idx,
390 remain_len, &pstr->cur_state);
612546c6 391 if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
1b2c2628
UD
392 {
393 /* We treat these cases as a singlebyte character. */
394 mbclen = 1;
395 pstr->cur_state = prev_st;
396 }
612546c6
UD
397 /* Then proceed the next character. */
398 rawbuf_idx += mbclen;
399 }
1843975c 400 *last_wc = (wint_t) wc;
612546c6 401 return rawbuf_idx;
3b0bdc72
UD
402}
403#endif /* RE_ENABLE_I18N */
404
1b2c2628 405/* Build the buffer PSTR->MBS, and apply the translation if we need.
612546c6
UD
406 This function is used in case of REG_ICASE. */
407
408static void
3b0bdc72
UD
409build_upper_buffer (pstr)
410 re_string_t *pstr;
411{
612546c6
UD
412 int char_idx, end_idx;
413 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
3b0bdc72 414
612546c6 415 for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
3b0bdc72 416 {
612546c6
UD
417 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
418 if (pstr->trans != NULL)
1b2c2628 419 {
74e12fbc 420 ch = pstr->trans[ch];
1b2c2628
UD
421 pstr->mbs_case[char_idx] = ch;
422 }
612546c6 423 if (islower (ch))
1b2c2628 424 pstr->mbs[char_idx] = toupper (ch);
3b0bdc72 425 else
1b2c2628 426 pstr->mbs[char_idx] = ch;
3b0bdc72 427 }
612546c6 428 pstr->valid_len = char_idx;
3b0bdc72
UD
429}
430
612546c6 431/* Apply TRANS to the buffer in PSTR. */
3b0bdc72 432
612546c6
UD
433static void
434re_string_translate_buffer (pstr)
3b0bdc72 435 re_string_t *pstr;
3b0bdc72 436{
612546c6
UD
437 int buf_idx, end_idx;
438 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
439
440 for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
3b0bdc72 441 {
612546c6
UD
442 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
443 pstr->mbs_case[buf_idx] = pstr->trans[ch];
3b0bdc72 444 }
612546c6
UD
445
446 pstr->valid_len = buf_idx;
447}
448
449/* This function re-construct the buffers.
3c0fb574 450 Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
612546c6
UD
451 convert to upper case in case of REG_ICASE, apply translation. */
452
453static reg_errcode_t
454re_string_reconstruct (pstr, idx, eflags, newline)
455 re_string_t *pstr;
456 int idx, eflags, newline;
457{
458 int offset = idx - pstr->raw_mbs_idx;
459 if (offset < 0)
460 {
461 /* Reset buffer. */
434d3784 462#ifdef RE_ENABLE_I18N
3c0fb574 463 if (pstr->mb_cur_max > 1)
1b2c2628 464 memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
434d3784 465#endif /* RE_ENABLE_I18N */
dd385d7c
UD
466 pstr->len += pstr->raw_mbs_idx;
467 pstr->stop += pstr->raw_mbs_idx;
612546c6
UD
468 pstr->valid_len = pstr->raw_mbs_idx = 0;
469 pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
1b2c2628 470 : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
612546c6 471 if (!MBS_CASE_ALLOCATED (pstr))
1b2c2628 472 pstr->mbs_case = (unsigned char *) pstr->raw_mbs;
612546c6 473 if (!MBS_ALLOCATED (pstr) && !MBS_CASE_ALLOCATED (pstr))
1b2c2628 474 pstr->mbs = (unsigned char *) pstr->raw_mbs;
612546c6
UD
475 offset = idx;
476 }
477
478 if (offset != 0)
479 {
612546c6
UD
480 /* Are the characters which are already checked remain? */
481 if (offset < pstr->valid_len)
1b2c2628
UD
482 {
483 /* Yes, move them to the front of the buffer. */
1843975c
RM
484 pstr->tip_context = re_string_context_at (pstr, offset - 1, eflags,
485 newline);
3b0bdc72 486#ifdef RE_ENABLE_I18N
3c0fb574 487 if (pstr->mb_cur_max > 1)
1b2c2628
UD
488 memmove (pstr->wcs, pstr->wcs + offset,
489 (pstr->valid_len - offset) * sizeof (wint_t));
612546c6 490#endif /* RE_ENABLE_I18N */
1b2c2628
UD
491 if (MBS_ALLOCATED (pstr))
492 memmove (pstr->mbs, pstr->mbs + offset,
493 pstr->valid_len - offset);
494 if (MBS_CASE_ALLOCATED (pstr))
495 memmove (pstr->mbs_case, pstr->mbs_case + offset,
496 pstr->valid_len - offset);
497 pstr->valid_len -= offset;
612546c6 498#if DEBUG
1b2c2628 499 assert (pstr->valid_len > 0);
3b0bdc72 500#endif
1b2c2628 501 }
612546c6 502 else
1b2c2628
UD
503 {
504 /* No, skip all characters until IDX. */
505 pstr->valid_len = 0;
3b0bdc72 506#ifdef RE_ENABLE_I18N
3c0fb574 507 if (pstr->mb_cur_max > 1)
1b2c2628
UD
508 {
509 int wcs_idx;
3c0fb574
UD
510 wint_t wc = WEOF;
511
512 if (pstr->is_utf8)
513 {
514 const unsigned char *raw, *p, *end;
515
516 /* Special case UTF-8. Multi-byte chars start with any
517 byte other than 0x80 - 0xbf. */
518 raw = pstr->raw_mbs + pstr->raw_mbs_idx;
519 end = raw + (pstr->valid_len > offset - pstr->mb_cur_max
520 ? pstr->valid_len : offset - pstr->mb_cur_max);
521 for (p = raw + offset - 1; p >= end; --p)
522 if ((*p & 0xc0) != 0x80)
523 {
524 mbstate_t cur_state;
525 wchar_t wc2;
526
3ee363e2
UD
527 /* XXX Don't use mbrtowc, we know which conversion
528 to use (UTF-8 -> UCS4). */
3c0fb574
UD
529 memset (&cur_state, 0, sizeof (cur_state));
530 if (mbrtowc (&wc2, p, raw + offset - p, &cur_state)
531 == raw + offset - p)
532 {
533 memset (&pstr->cur_state, '\0',
534 sizeof (mbstate_t));
535 wc = wc2;
536 }
537 break;
538 }
539 }
540 if (wc == WEOF)
541 {
542 pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
543 for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
544 pstr->wcs[wcs_idx] = WEOF;
545 }
1843975c
RM
546 if (pstr->trans && wc <= 0xff)
547 wc = pstr->trans[wc];
548 pstr->tip_context = (IS_WIDE_WORD_CHAR (wc) ? CONTEXT_WORD
549 : ((newline && IS_WIDE_NEWLINE (wc))
550 ? CONTEXT_NEWLINE : 0));
1b2c2628 551 }
1843975c 552 else
612546c6 553#endif /* RE_ENABLE_I18N */
1843975c
RM
554 {
555 int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
556 if (pstr->trans)
557 c = pstr->trans[c];
558 pstr->tip_context = (IS_WORD_CHAR (c) ? CONTEXT_WORD
559 : ((newline && IS_NEWLINE (c))
560 ? CONTEXT_NEWLINE : 0));
561 }
1b2c2628 562 }
612546c6 563 if (!MBS_CASE_ALLOCATED (pstr))
1b2c2628
UD
564 {
565 pstr->mbs_case += offset;
566 /* In case of !MBS_ALLOCATED && !MBS_CASE_ALLOCATED. */
567 if (!MBS_ALLOCATED (pstr))
568 pstr->mbs += offset;
569 }
3b0bdc72 570 }
612546c6
UD
571 pstr->raw_mbs_idx = idx;
572 pstr->len -= offset;
ac3d553b 573 pstr->stop -= offset;
612546c6
UD
574
575 /* Then build the buffers. */
576#ifdef RE_ENABLE_I18N
3c0fb574 577 if (pstr->mb_cur_max > 1)
3b0bdc72 578 {
612546c6 579 if (pstr->icase)
1b2c2628 580 build_wcs_upper_buffer (pstr);
612546c6 581 else
1b2c2628 582 build_wcs_buffer (pstr);
3b0bdc72
UD
583 }
584 else
612546c6 585#endif /* RE_ENABLE_I18N */
3b0bdc72 586 {
612546c6 587 if (pstr->icase)
1b2c2628 588 build_upper_buffer (pstr);
612546c6 589 else if (pstr->trans != NULL)
1b2c2628 590 re_string_translate_buffer (pstr);
ad7f28c2
UD
591 else
592 pstr->valid_len = pstr->len;
3b0bdc72 593 }
612546c6
UD
594 pstr->cur_idx = 0;
595
3b0bdc72
UD
596 return REG_NOERROR;
597}
598
599static void
600re_string_destruct (pstr)
601 re_string_t *pstr;
602{
603#ifdef RE_ENABLE_I18N
604 re_free (pstr->wcs);
605#endif /* RE_ENABLE_I18N */
612546c6
UD
606 if (MBS_ALLOCATED (pstr))
607 re_free (pstr->mbs);
608 if (MBS_CASE_ALLOCATED (pstr))
609 re_free (pstr->mbs_case);
3b0bdc72
UD
610}
611
612/* Return the context at IDX in INPUT. */
612546c6 613
3b0bdc72
UD
614static unsigned int
615re_string_context_at (input, idx, eflags, newline_anchor)
616 const re_string_t *input;
617 int idx, eflags, newline_anchor;
618{
619 int c;
620 if (idx < 0 || idx == input->len)
621 {
3b0bdc72 622 if (idx < 0)
1b2c2628
UD
623 /* In this case, we use the value stored in input->tip_context,
624 since we can't know the character in input->mbs[-1] here. */
625 return input->tip_context;
bc15410e 626 else /* (idx == input->len) */
1b2c2628
UD
627 return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
628 : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
3b0bdc72 629 }
a0756231 630#ifdef RE_ENABLE_I18N
3c0fb574 631 if (input->mb_cur_max > 1)
1843975c
RM
632 {
633 wint_t wc;
634 int wc_idx = idx;
635 while(input->wcs[wc_idx] == WEOF)
636 {
637#ifdef DEBUG
638 /* It must not happen. */
639 assert (wc_idx >= 0);
640#endif
641 --wc_idx;
642 if (wc_idx < 0)
643 return input->tip_context;
644 }
645 wc = input->wcs[wc_idx];
646 if (IS_WIDE_WORD_CHAR (wc))
647 return CONTEXT_WORD;
648 return (newline_anchor && IS_WIDE_NEWLINE (wc)) ? CONTEXT_NEWLINE : 0;
649 }
a0756231
RM
650 else
651#endif
652 {
653 c = re_string_byte_at (input, idx);
654 if (IS_WORD_CHAR (c))
655 return CONTEXT_WORD;
656 return (newline_anchor && IS_NEWLINE (c)) ? CONTEXT_NEWLINE : 0;
657 }
3b0bdc72
UD
658}
659\f
660/* Functions for set operation. */
661
662static reg_errcode_t
663re_node_set_alloc (set, size)
664 re_node_set *set;
665 int size;
666{
667 set->alloc = size;
668 set->nelem = 0;
669 set->elems = re_malloc (int, size);
bc15410e 670 if (BE (set->elems == NULL, 0))
3b0bdc72
UD
671 return REG_ESPACE;
672 return REG_NOERROR;
673}
674
675static reg_errcode_t
676re_node_set_init_1 (set, elem)
677 re_node_set *set;
678 int elem;
679{
680 set->alloc = 1;
681 set->nelem = 1;
682 set->elems = re_malloc (int, 1);
bc15410e 683 if (BE (set->elems == NULL, 0))
6291ee3c
UD
684 {
685 set->alloc = set->nelem = 0;
686 return REG_ESPACE;
687 }
3b0bdc72
UD
688 set->elems[0] = elem;
689 return REG_NOERROR;
690}
691
692static reg_errcode_t
693re_node_set_init_2 (set, elem1, elem2)
694 re_node_set *set;
695 int elem1, elem2;
696{
697 set->alloc = 2;
698 set->elems = re_malloc (int, 2);
bc15410e 699 if (BE (set->elems == NULL, 0))
3b0bdc72
UD
700 return REG_ESPACE;
701 if (elem1 == elem2)
702 {
703 set->nelem = 1;
704 set->elems[0] = elem1;
705 }
706 else
707 {
708 set->nelem = 2;
709 if (elem1 < elem2)
1b2c2628
UD
710 {
711 set->elems[0] = elem1;
712 set->elems[1] = elem2;
713 }
3b0bdc72 714 else
1b2c2628
UD
715 {
716 set->elems[0] = elem2;
717 set->elems[1] = elem1;
718 }
3b0bdc72
UD
719 }
720 return REG_NOERROR;
721}
722
723static reg_errcode_t
724re_node_set_init_copy (dest, src)
725 re_node_set *dest;
726 const re_node_set *src;
727{
728 dest->nelem = src->nelem;
729 if (src->nelem > 0)
730 {
731 dest->alloc = dest->nelem;
732 dest->elems = re_malloc (int, dest->alloc);
bc15410e 733 if (BE (dest->elems == NULL, 0))
6291ee3c
UD
734 {
735 dest->alloc = dest->nelem = 0;
736 return REG_ESPACE;
737 }
3b0bdc72
UD
738 memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
739 }
740 else
741 re_node_set_init_empty (dest);
742 return REG_NOERROR;
743}
744
a9388965
UD
745/* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
746 DEST. Return value indicate the error code or REG_NOERROR if succeeded.
747 Note: We assume dest->elems is NULL, when dest->alloc is 0. */
748
3b0bdc72
UD
749static reg_errcode_t
750re_node_set_add_intersect (dest, src1, src2)
751 re_node_set *dest;
752 const re_node_set *src1, *src2;
753{
754 int i1, i2, id;
755 if (src1->nelem > 0 && src2->nelem > 0)
756 {
757 if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
1b2c2628
UD
758 {
759 dest->alloc = src1->nelem + src2->nelem + dest->nelem;
760 dest->elems = re_realloc (dest->elems, int, dest->alloc);
761 if (BE (dest->elems == NULL, 0))
762 return REG_ESPACE;
763 }
3b0bdc72
UD
764 }
765 else
766 return REG_NOERROR;
767
768 for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
769 {
770 if (src1->elems[i1] > src2->elems[i2])
1b2c2628
UD
771 {
772 ++i2;
773 continue;
774 }
3b0bdc72 775 if (src1->elems[i1] == src2->elems[i2])
1b2c2628
UD
776 {
777 while (id < dest->nelem && dest->elems[id] < src2->elems[i2])
778 ++id;
779 if (id < dest->nelem && dest->elems[id] == src2->elems[i2])
780 ++id;
781 else
782 {
783 memmove (dest->elems + id + 1, dest->elems + id,
784 sizeof (int) * (dest->nelem - id));
785 dest->elems[id++] = src2->elems[i2++];
786 ++dest->nelem;
787 }
788 }
3b0bdc72
UD
789 ++i1;
790 }
791 return REG_NOERROR;
792}
793
a9388965
UD
794/* Calculate the union set of the sets SRC1 and SRC2. And store it to
795 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
796
3b0bdc72
UD
797static reg_errcode_t
798re_node_set_init_union (dest, src1, src2)
799 re_node_set *dest;
800 const re_node_set *src1, *src2;
801{
802 int i1, i2, id;
803 if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
804 {
805 dest->alloc = src1->nelem + src2->nelem;
806 dest->elems = re_malloc (int, dest->alloc);
bc15410e 807 if (BE (dest->elems == NULL, 0))
1b2c2628 808 return REG_ESPACE;
3b0bdc72
UD
809 }
810 else
811 {
812 if (src1 != NULL && src1->nelem > 0)
1b2c2628 813 return re_node_set_init_copy (dest, src1);
3b0bdc72 814 else if (src2 != NULL && src2->nelem > 0)
1b2c2628 815 return re_node_set_init_copy (dest, src2);
3b0bdc72 816 else
1b2c2628 817 re_node_set_init_empty (dest);
3b0bdc72
UD
818 return REG_NOERROR;
819 }
820 for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
821 {
822 if (src1->elems[i1] > src2->elems[i2])
1b2c2628
UD
823 {
824 dest->elems[id++] = src2->elems[i2++];
825 continue;
826 }
3b0bdc72 827 if (src1->elems[i1] == src2->elems[i2])
1b2c2628 828 ++i2;
3b0bdc72
UD
829 dest->elems[id++] = src1->elems[i1++];
830 }
831 if (i1 < src1->nelem)
832 {
833 memcpy (dest->elems + id, src1->elems + i1,
1b2c2628 834 (src1->nelem - i1) * sizeof (int));
3b0bdc72
UD
835 id += src1->nelem - i1;
836 }
837 else if (i2 < src2->nelem)
838 {
839 memcpy (dest->elems + id, src2->elems + i2,
1b2c2628 840 (src2->nelem - i2) * sizeof (int));
3b0bdc72
UD
841 id += src2->nelem - i2;
842 }
843 dest->nelem = id;
844 return REG_NOERROR;
845}
846
a9388965
UD
847/* Calculate the union set of the sets DEST and SRC. And store it to
848 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
849
3b0bdc72
UD
850static reg_errcode_t
851re_node_set_merge (dest, src)
852 re_node_set *dest;
853 const re_node_set *src;
854{
855 int si, di;
856 if (src == NULL || src->nelem == 0)
857 return REG_NOERROR;
3b0bdc72
UD
858 if (dest->alloc < src->nelem + dest->nelem)
859 {
1b2c2628 860 int *new_buffer;
3b0bdc72 861 dest->alloc = 2 * (src->nelem + dest->alloc);
1b2c2628
UD
862 new_buffer = re_realloc (dest->elems, int, dest->alloc);
863 if (BE (new_buffer == NULL, 0))
864 return REG_ESPACE;
865 dest->elems = new_buffer;
3b0bdc72
UD
866 }
867
868 for (si = 0, di = 0 ; si < src->nelem && di < dest->nelem ;)
869 {
870 int cp_from, ncp, mid, right, src_elem = src->elems[si];
871 /* Binary search the spot we will add the new element. */
872 right = dest->nelem;
873 while (di < right)
1b2c2628
UD
874 {
875 mid = (di + right) / 2;
876 if (dest->elems[mid] < src_elem)
877 di = mid + 1;
878 else
879 right = mid;
880 }
3b0bdc72 881 if (di >= dest->nelem)
1b2c2628 882 break;
3b0bdc72
UD
883
884 if (dest->elems[di] == src_elem)
1b2c2628
UD
885 {
886 /* Skip since, DEST already has the element. */
887 ++di;
888 ++si;
889 continue;
890 }
3b0bdc72
UD
891
892 /* Skip the src elements which are less than dest->elems[di]. */
893 cp_from = si;
894 while (si < src->nelem && src->elems[si] < dest->elems[di])
1b2c2628 895 ++si;
3b0bdc72
UD
896 /* Copy these src elements. */
897 ncp = si - cp_from;
898 memmove (dest->elems + di + ncp, dest->elems + di,
1b2c2628 899 sizeof (int) * (dest->nelem - di));
3b0bdc72 900 memcpy (dest->elems + di, src->elems + cp_from,
1b2c2628 901 sizeof (int) * ncp);
3b0bdc72
UD
902 /* Update counters. */
903 di += ncp;
904 dest->nelem += ncp;
905 }
906
907 /* Copy remaining src elements. */
908 if (si < src->nelem)
909 {
910 memcpy (dest->elems + di, src->elems + si,
1b2c2628 911 sizeof (int) * (src->nelem - si));
3b0bdc72
UD
912 dest->nelem += src->nelem - si;
913 }
914 return REG_NOERROR;
915}
916
917/* Insert the new element ELEM to the re_node_set* SET.
918 return 0 if SET already has ELEM,
919 return -1 if an error is occured, return 1 otherwise. */
920
921static int
922re_node_set_insert (set, elem)
923 re_node_set *set;
924 int elem;
925{
926 int idx, right, mid;
927 /* In case of the set is empty. */
928 if (set->elems == NULL || set->alloc == 0)
929 {
bc15410e 930 if (BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1))
1b2c2628 931 return 1;
3b0bdc72 932 else
1b2c2628 933 return -1;
3b0bdc72
UD
934 }
935
936 /* Binary search the spot we will add the new element. */
937 idx = 0;
938 right = set->nelem;
939 while (idx < right)
940 {
941 mid = (idx + right) / 2;
942 if (set->elems[mid] < elem)
1b2c2628 943 idx = mid + 1;
3b0bdc72 944 else
1b2c2628 945 right = mid;
3b0bdc72
UD
946 }
947
948 /* Realloc if we need. */
949 if (set->alloc < set->nelem + 1)
950 {
951 int *new_array;
952 set->alloc = set->alloc * 2;
953 new_array = re_malloc (int, set->alloc);
bc15410e 954 if (BE (new_array == NULL, 0))
1b2c2628 955 return -1;
3b0bdc72
UD
956 /* Copy the elements they are followed by the new element. */
957 if (idx > 0)
1b2c2628 958 memcpy (new_array, set->elems, sizeof (int) * (idx));
3b0bdc72
UD
959 /* Copy the elements which follows the new element. */
960 if (set->nelem - idx > 0)
1b2c2628 961 memcpy (new_array + idx + 1, set->elems + idx,
3b0bdc72 962 sizeof (int) * (set->nelem - idx));
612546c6 963 re_free (set->elems);
3b0bdc72
UD
964 set->elems = new_array;
965 }
966 else
967 {
968 /* Move the elements which follows the new element. */
969 if (set->nelem - idx > 0)
1b2c2628
UD
970 memmove (set->elems + idx + 1, set->elems + idx,
971 sizeof (int) * (set->nelem - idx));
3b0bdc72
UD
972 }
973 /* Insert the new element. */
974 set->elems[idx] = elem;
975 ++set->nelem;
976 return 1;
977}
978
979/* Compare two node sets SET1 and SET2.
980 return 1 if SET1 and SET2 are equivalent, retrun 0 otherwise. */
981
982static int
983re_node_set_compare (set1, set2)
984 const re_node_set *set1, *set2;
985{
986 int i;
987 if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
988 return 0;
989 for (i = 0 ; i < set1->nelem ; i++)
990 if (set1->elems[i] != set2->elems[i])
991 return 0;
992 return 1;
993}
994
0742e48e 995/* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */
3b0bdc72
UD
996
997static int
998re_node_set_contains (set, elem)
999 const re_node_set *set;
1000 int elem;
1001{
1002 int idx, right, mid;
1003 if (set->nelem <= 0)
1004 return 0;
1005
1006 /* Binary search the element. */
1007 idx = 0;
1008 right = set->nelem - 1;
1009 while (idx < right)
1010 {
1011 mid = (idx + right) / 2;
1012 if (set->elems[mid] < elem)
1b2c2628 1013 idx = mid + 1;
3b0bdc72 1014 else
1b2c2628 1015 right = mid;
3b0bdc72 1016 }
0742e48e 1017 return set->elems[idx] == elem ? idx + 1 : 0;
3b0bdc72
UD
1018}
1019
1020static void
1021re_node_set_remove_at (set, idx)
1022 re_node_set *set;
1023 int idx;
1024{
1025 if (idx < 0 || idx >= set->nelem)
1026 return;
1027 if (idx < set->nelem - 1)
1028 memmove (set->elems + idx, set->elems + idx + 1,
1b2c2628 1029 sizeof (int) * (set->nelem - idx - 1));
3b0bdc72
UD
1030 --set->nelem;
1031}
1032\f
1033
1034/* Add the token TOKEN to dfa->nodes, and return the index of the token.
1035 Or return -1, if an error will be occured. */
1036
1037static int
1038re_dfa_add_node (dfa, token, mode)
1039 re_dfa_t *dfa;
1040 re_token_t token;
1041 int mode;
1042{
1043 if (dfa->nodes_len >= dfa->nodes_alloc)
1044 {
1045 re_token_t *new_array;
1046 dfa->nodes_alloc *= 2;
1047 new_array = re_realloc (dfa->nodes, re_token_t, dfa->nodes_alloc);
bc15410e 1048 if (BE (new_array == NULL, 0))
1b2c2628 1049 return -1;
3b0bdc72 1050 else
1b2c2628 1051 dfa->nodes = new_array;
3b0bdc72 1052 if (mode)
1b2c2628 1053 {
a7d5c291 1054 int *new_nexts, *new_indices;
1b2c2628
UD
1055 re_node_set *new_edests, *new_eclosures, *new_inveclosures;
1056
1057 new_nexts = re_realloc (dfa->nexts, int, dfa->nodes_alloc);
a7d5c291 1058 new_indices = re_realloc (dfa->org_indices, int, dfa->nodes_alloc);
1b2c2628
UD
1059 new_edests = re_realloc (dfa->edests, re_node_set, dfa->nodes_alloc);
1060 new_eclosures = re_realloc (dfa->eclosures, re_node_set,
1061 dfa->nodes_alloc);
1062 new_inveclosures = re_realloc (dfa->inveclosures, re_node_set,
1063 dfa->nodes_alloc);
a7d5c291
UD
1064 if (BE (new_nexts == NULL || new_indices == NULL
1065 || new_edests == NULL || new_eclosures == NULL
1066 || new_inveclosures == NULL, 0))
1b2c2628
UD
1067 return -1;
1068 dfa->nexts = new_nexts;
a7d5c291 1069 dfa->org_indices = new_indices;
1b2c2628
UD
1070 dfa->edests = new_edests;
1071 dfa->eclosures = new_eclosures;
1072 dfa->inveclosures = new_inveclosures;
1073 }
3b0bdc72
UD
1074 }
1075 dfa->nodes[dfa->nodes_len] = token;
1076 dfa->nodes[dfa->nodes_len].duplicated = 0;
485d775d 1077 dfa->nodes[dfa->nodes_len].constraint = 0;
3b0bdc72
UD
1078 return dfa->nodes_len++;
1079}
1080
1081static unsigned int inline
1082calc_state_hash (nodes, context)
1083 const re_node_set *nodes;
1084 unsigned int context;
1085{
1086 unsigned int hash = nodes->nelem + context;
1087 int i;
1088 for (i = 0 ; i < nodes->nelem ; i++)
1089 hash += nodes->elems[i];
1090 return hash;
1091}
1092
1093/* Search for the state whose node_set is equivalent to NODES.
1094 Return the pointer to the state, if we found it in the DFA.
a9388965
UD
1095 Otherwise create the new one and return it. In case of an error
1096 return NULL and set the error code in ERR.
1097 Note: - We assume NULL as the invalid state, then it is possible that
1b2c2628
UD
1098 return value is NULL and ERR is REG_NOERROR.
1099 - We never return non-NULL value in case of any errors, it is for
1100 optimization. */
a9388965
UD
1101
1102static re_dfastate_t*
1103re_acquire_state (err, dfa, nodes)
1104 reg_errcode_t *err;
3b0bdc72
UD
1105 re_dfa_t *dfa;
1106 const re_node_set *nodes;
1107{
1108 unsigned int hash;
a9388965 1109 re_dfastate_t *new_state;
3b0bdc72
UD
1110 struct re_state_table_entry *spot;
1111 int i;
bc15410e 1112 if (BE (nodes->nelem == 0, 0))
a9388965
UD
1113 {
1114 *err = REG_NOERROR;
1115 return NULL;
1116 }
3b0bdc72
UD
1117 hash = calc_state_hash (nodes, 0);
1118 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1119
bc15410e 1120 for (i = 0 ; i < spot->num ; i++)
3b0bdc72 1121 {
bc15410e
UD
1122 re_dfastate_t *state = spot->array[i];
1123 if (hash != state->hash)
1b2c2628 1124 continue;
bc15410e 1125 if (re_node_set_compare (&state->nodes, nodes))
1b2c2628 1126 return state;
3b0bdc72 1127 }
3b0bdc72
UD
1128
1129 /* There are no appropriate state in the dfa, create the new one. */
a9388965 1130 new_state = create_ci_newstate (dfa, nodes, hash);
bc15410e 1131 if (BE (new_state != NULL, 1))
a9388965
UD
1132 return new_state;
1133 else
1134 {
1135 *err = REG_ESPACE;
1136 return NULL;
1137 }
3b0bdc72
UD
1138}
1139
1140/* Search for the state whose node_set is equivalent to NODES and
1141 whose context is equivalent to CONTEXT.
1142 Return the pointer to the state, if we found it in the DFA.
a9388965
UD
1143 Otherwise create the new one and return it. In case of an error
1144 return NULL and set the error code in ERR.
1145 Note: - We assume NULL as the invalid state, then it is possible that
1b2c2628
UD
1146 return value is NULL and ERR is REG_NOERROR.
1147 - We never return non-NULL value in case of any errors, it is for
1148 optimization. */
a9388965
UD
1149
1150static re_dfastate_t*
1151re_acquire_state_context (err, dfa, nodes, context)
1152 reg_errcode_t *err;
3b0bdc72
UD
1153 re_dfa_t *dfa;
1154 const re_node_set *nodes;
1155 unsigned int context;
1156{
1157 unsigned int hash;
a9388965 1158 re_dfastate_t *new_state;
3b0bdc72
UD
1159 struct re_state_table_entry *spot;
1160 int i;
1161 if (nodes->nelem == 0)
a9388965
UD
1162 {
1163 *err = REG_NOERROR;
1164 return NULL;
1165 }
3b0bdc72
UD
1166 hash = calc_state_hash (nodes, context);
1167 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1168
bc15410e 1169 for (i = 0 ; i < spot->num ; i++)
3b0bdc72 1170 {
bc15410e
UD
1171 re_dfastate_t *state = spot->array[i];
1172 if (hash != state->hash)
1b2c2628 1173 continue;
bc15410e 1174 if (re_node_set_compare (state->entrance_nodes, nodes)
1b2c2628
UD
1175 && state->context == context)
1176 return state;
3b0bdc72 1177 }
3b0bdc72 1178 /* There are no appropriate state in `dfa', create the new one. */
a9388965 1179 new_state = create_cd_newstate (dfa, nodes, context, hash);
bc15410e 1180 if (BE (new_state != NULL, 1))
a9388965
UD
1181 return new_state;
1182 else
1183 {
1184 *err = REG_ESPACE;
1185 return NULL;
1186 }
3b0bdc72
UD
1187}
1188
a9388965
UD
1189/* Allocate memory for DFA state and initialize common properties.
1190 Return the new state if succeeded, otherwise return NULL. */
1191
3b0bdc72
UD
1192static re_dfastate_t *
1193create_newstate_common (dfa, nodes, hash)
1194 re_dfa_t *dfa;
1195 const re_node_set *nodes;
1196 unsigned int hash;
1197{
1198 re_dfastate_t *newstate;
1b2c2628 1199 reg_errcode_t err;
3b0bdc72 1200 newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
bc15410e 1201 if (BE (newstate == NULL, 0))
a9388965 1202 return NULL;
1b2c2628
UD
1203 err = re_node_set_init_copy (&newstate->nodes, nodes);
1204 if (BE (err != REG_NOERROR, 0))
1205 {
1206 re_free (newstate);
1207 return NULL;
1208 }
3b0bdc72 1209 newstate->trtable = NULL;
3b0bdc72
UD
1210 newstate->hash = hash;
1211 return newstate;
1212}
1213
a9388965
UD
1214/* Store the new state NEWSTATE whose hash value is HASH in appropriate
1215 position. Return value indicate the error code if failed. */
1216
1217static reg_errcode_t
3b0bdc72
UD
1218register_state (dfa, newstate, hash)
1219 re_dfa_t *dfa;
1220 re_dfastate_t *newstate;
1221 unsigned int hash;
1222{
1223 struct re_state_table_entry *spot;
1224 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1225
1226 if (spot->alloc <= spot->num)
1227 {
1b2c2628 1228 re_dfastate_t **new_array;
bc15410e 1229 spot->alloc = 2 * spot->num + 2;
1b2c2628
UD
1230 new_array = re_realloc (spot->array, re_dfastate_t *, spot->alloc);
1231 if (BE (new_array == NULL, 0))
1232 return REG_ESPACE;
1233 spot->array = new_array;
3b0bdc72 1234 }
bc15410e 1235 spot->array[spot->num++] = newstate;
a9388965 1236 return REG_NOERROR;
3b0bdc72
UD
1237}
1238
a9388965
UD
1239/* Create the new state which is independ of contexts.
1240 Return the new state if succeeded, otherwise return NULL. */
1241
3b0bdc72
UD
1242static re_dfastate_t *
1243create_ci_newstate (dfa, nodes, hash)
1244 re_dfa_t *dfa;
1245 const re_node_set *nodes;
1246 unsigned int hash;
1247{
1248 int i;
a9388965 1249 reg_errcode_t err;
3b0bdc72
UD
1250 re_dfastate_t *newstate;
1251 newstate = create_newstate_common (dfa, nodes, hash);
bc15410e 1252 if (BE (newstate == NULL, 0))
a9388965 1253 return NULL;
3b0bdc72
UD
1254 newstate->entrance_nodes = &newstate->nodes;
1255
1256 for (i = 0 ; i < nodes->nelem ; i++)
1257 {
1258 re_token_t *node = dfa->nodes + nodes->elems[i];
1259 re_token_type_t type = node->type;
485d775d 1260 if (type == CHARACTER && !node->constraint)
1b2c2628 1261 continue;
3b0bdc72
UD
1262
1263 /* If the state has the halt node, the state is a halt state. */
1264 else if (type == END_OF_RE)
1b2c2628 1265 newstate->halt = 1;
c0a0f9a3 1266#ifdef RE_ENABLE_I18N
3b0bdc72 1267 else if (type == COMPLEX_BRACKET
ad7f28c2 1268 || type == OP_UTF8_PERIOD
3c0fb574 1269 || (type == OP_PERIOD && dfa->mb_cur_max > 1))
1b2c2628 1270 newstate->accept_mb = 1;
c0a0f9a3 1271#endif /* RE_ENABLE_I18N */
3b0bdc72 1272 else if (type == OP_BACK_REF)
1b2c2628 1273 newstate->has_backref = 1;
485d775d 1274 else if (type == ANCHOR || node->constraint)
1b2c2628 1275 newstate->has_constraint = 1;
3b0bdc72 1276 }
a9388965 1277 err = register_state (dfa, newstate, hash);
1b2c2628
UD
1278 if (BE (err != REG_NOERROR, 0))
1279 {
1280 free_state (newstate);
1281 newstate = NULL;
1282 }
1283 return newstate;
3b0bdc72
UD
1284}
1285
a9388965
UD
1286/* Create the new state which is depend on the context CONTEXT.
1287 Return the new state if succeeded, otherwise return NULL. */
1288
3b0bdc72
UD
1289static re_dfastate_t *
1290create_cd_newstate (dfa, nodes, context, hash)
1291 re_dfa_t *dfa;
1292 const re_node_set *nodes;
1293 unsigned int context, hash;
1294{
1295 int i, nctx_nodes = 0;
a9388965 1296 reg_errcode_t err;
3b0bdc72
UD
1297 re_dfastate_t *newstate;
1298
1299 newstate = create_newstate_common (dfa, nodes, hash);
bc15410e 1300 if (BE (newstate == NULL, 0))
a9388965 1301 return NULL;
3b0bdc72
UD
1302 newstate->context = context;
1303 newstate->entrance_nodes = &newstate->nodes;
1304
1305 for (i = 0 ; i < nodes->nelem ; i++)
1306 {
1307 unsigned int constraint = 0;
1308 re_token_t *node = dfa->nodes + nodes->elems[i];
1309 re_token_type_t type = node->type;
485d775d 1310 if (node->constraint)
1b2c2628 1311 constraint = node->constraint;
3b0bdc72 1312
485d775d 1313 if (type == CHARACTER && !constraint)
1b2c2628 1314 continue;
3b0bdc72
UD
1315 /* If the state has the halt node, the state is a halt state. */
1316 else if (type == END_OF_RE)
1b2c2628 1317 newstate->halt = 1;
c0a0f9a3 1318#ifdef RE_ENABLE_I18N
3b0bdc72 1319 else if (type == COMPLEX_BRACKET
ad7f28c2 1320 || type == OP_UTF8_PERIOD
3c0fb574 1321 || (type == OP_PERIOD && dfa->mb_cur_max > 1))
1b2c2628 1322 newstate->accept_mb = 1;
c0a0f9a3 1323#endif /* RE_ENABLE_I18N */
3b0bdc72 1324 else if (type == OP_BACK_REF)
1b2c2628 1325 newstate->has_backref = 1;
3b0bdc72 1326 else if (type == ANCHOR)
1b2c2628 1327 constraint = node->opr.ctx_type;
3b0bdc72
UD
1328
1329 if (constraint)
1b2c2628
UD
1330 {
1331 if (newstate->entrance_nodes == &newstate->nodes)
1332 {
1333 newstate->entrance_nodes = re_malloc (re_node_set, 1);
bc15410e 1334 if (BE (newstate->entrance_nodes == NULL, 0))
1b2c2628
UD
1335 {
1336 free_state (newstate);
1337 return NULL;
1338 }
1339 re_node_set_init_copy (newstate->entrance_nodes, nodes);
1340 nctx_nodes = 0;
1341 newstate->has_constraint = 1;
1342 }
1343
1344 if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
1345 {
1346 re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
1347 ++nctx_nodes;
1348 }
1349 }
3b0bdc72 1350 }
a9388965 1351 err = register_state (dfa, newstate, hash);
1b2c2628
UD
1352 if (BE (err != REG_NOERROR, 0))
1353 {
1354 free_state (newstate);
1355 newstate = NULL;
1356 }
1357 return newstate;
1358}
1359
1360static void
1361free_state (state)
1362 re_dfastate_t *state;
1363{
1364 if (state->entrance_nodes != &state->nodes)
1365 {
1366 re_node_set_free (state->entrance_nodes);
1367 re_free (state->entrance_nodes);
1368 }
1369 re_node_set_free (&state->nodes);
1370 re_free (state->trtable);
1b2c2628 1371 re_free (state);
3b0bdc72 1372}