]> git.ipfire.org Git - thirdparty/glibc.git/blame - posix/regexec.c
Update copyright dates with scripts/update-copyrights.
[thirdparty/glibc.git] / posix / regexec.c
CommitLineData
3b0bdc72 1/* Extended regular expression matching and search library.
b168057a 2 Copyright (C) 2002-2015 Free Software Foundation, Inc.
3b0bdc72
UD
3 This file is part of the GNU C Library.
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
59ba27a6
PE
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
3b0bdc72 19
e054f494
RA
20#include <stdint.h>
21
a9388965 22static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
56b168be 23 int n) internal_function;
3ce12656
UD
24static void match_ctx_clean (re_match_context_t *mctx) internal_function;
25static void match_ctx_free (re_match_context_t *cache) internal_function;
a9388965 26static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
e3a87852
UD
27 int str_idx, int from, int to)
28 internal_function;
01ed6ceb 29static int search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx)
e3a87852 30 internal_function;
6291ee3c 31static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node,
3ce12656 32 int str_idx) internal_function;
6291ee3c 33static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
e3a87852
UD
34 int node, int str_idx)
35 internal_function;
0742e48e 36static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
15a7d175 37 re_dfastate_t **limited_sts, int last_node,
d2c38eb3 38 int last_str_idx)
e3a87852 39 internal_function;
a9388965 40static reg_errcode_t re_search_internal (const regex_t *preg,
15a7d175
UD
41 const char *string, int length,
42 int start, int range, int stop,
43 size_t nmatch, regmatch_t pmatch[],
3ce12656 44 int eflags) internal_function;
ac3d553b 45static int re_search_2_stub (struct re_pattern_buffer *bufp,
15a7d175
UD
46 const char *string1, int length1,
47 const char *string2, int length2,
48 int start, int range, struct re_registers *regs,
3ce12656 49 int stop, int ret_len) internal_function;
ac3d553b 50static int re_search_stub (struct re_pattern_buffer *bufp,
15a7d175
UD
51 const char *string, int length, int start,
52 int range, int stop, struct re_registers *regs,
3ce12656 53 int ret_len) internal_function;
ac3d553b 54static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
3ce12656 55 int nregs, int regs_allocated) internal_function;
e3a87852
UD
56static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
57 internal_function;
15bad1a5 58static int check_matching (re_match_context_t *mctx, int fl_longest_match,
76b864c8 59 int *p_match_first) internal_function;
e3a87852
UD
60static int check_halt_state_context (const re_match_context_t *mctx,
61 const re_dfastate_t *state, int idx)
62 internal_function;
76b864c8 63static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
6b6557e8 64 regmatch_t *prev_idx_match, int cur_node,
3ce12656 65 int cur_idx, int nmatch) internal_function;
1b2c2628 66static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
5cf1ec52 67 int str_idx, int dest_node, int nregs,
15a7d175 68 regmatch_t *regs,
76b864c8
UD
69 re_node_set *eps_via_nodes)
70 internal_function;
612546c6 71static reg_errcode_t set_regs (const regex_t *preg,
15a7d175
UD
72 const re_match_context_t *mctx,
73 size_t nmatch, regmatch_t *pmatch,
3ce12656 74 int fl_backtrack) internal_function;
76b864c8
UD
75static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs)
76 internal_function;
485d775d 77
434d3784 78#ifdef RE_ENABLE_I18N
e3a87852 79static int sift_states_iter_mb (const re_match_context_t *mctx,
15a7d175 80 re_sift_context_t *sctx,
76b864c8
UD
81 int node_idx, int str_idx, int max_str_idx)
82 internal_function;
434d3784 83#endif /* RE_ENABLE_I18N */
76b864c8
UD
84static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
85 re_sift_context_t *sctx)
86 internal_function;
87static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
e40a38b3 88 re_sift_context_t *sctx, int str_idx,
76b864c8
UD
89 re_node_set *cur_dest)
90 internal_function;
91static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
15a7d175
UD
92 re_sift_context_t *sctx,
93 int str_idx,
76b864c8
UD
94 re_node_set *dest_nodes)
95 internal_function;
96static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
15a7d175 97 re_node_set *dest_nodes,
76b864c8
UD
98 const re_node_set *candidates)
99 internal_function;
01ed6ceb
UD
100static int check_dst_limits (const re_match_context_t *mctx,
101 re_node_set *limits,
e3a87852
UD
102 int dst_node, int dst_idx, int src_node,
103 int src_idx) internal_function;
01ed6ceb 104static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
e40a38b3 105 int boundaries, int subexp_idx,
76b864c8
UD
106 int from_node, int bkref_idx)
107 internal_function;
01ed6ceb 108static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
e40a38b3
UD
109 int limit, int subexp_idx,
110 int node, int str_idx,
111 int bkref_idx) internal_function;
76b864c8 112static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
15a7d175
UD
113 re_node_set *dest_nodes,
114 const re_node_set *candidates,
115 re_node_set *limits,
116 struct re_backref_cache_entry *bkref_ents,
3ce12656 117 int str_idx) internal_function;
76b864c8 118static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
15a7d175 119 re_sift_context_t *sctx,
76b864c8
UD
120 int str_idx, const re_node_set *candidates)
121 internal_function;
76b864c8
UD
122static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
123 re_dfastate_t **dst,
124 re_dfastate_t **src, int num)
125 internal_function;
4c595adb
UD
126static re_dfastate_t *find_recover_state (reg_errcode_t *err,
127 re_match_context_t *mctx) internal_function;
e3a87852 128static re_dfastate_t *transit_state (reg_errcode_t *err,
15a7d175 129 re_match_context_t *mctx,
3ce12656 130 re_dfastate_t *state) internal_function;
4c595adb
UD
131static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
132 re_match_context_t *mctx,
76b864c8
UD
133 re_dfastate_t *next_state)
134 internal_function;
e3a87852 135static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
6291ee3c 136 re_node_set *cur_nodes,
3ce12656 137 int str_idx) internal_function;
c13c99fa 138#if 0
e3a87852
UD
139static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
140 re_match_context_t *mctx,
76b864c8
UD
141 re_dfastate_t *pstate)
142 internal_function;
c13c99fa 143#endif
434d3784 144#ifdef RE_ENABLE_I18N
e3a87852 145static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
76b864c8
UD
146 re_dfastate_t *pstate)
147 internal_function;
434d3784 148#endif /* RE_ENABLE_I18N */
e3a87852 149static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
76b864c8
UD
150 const re_node_set *nodes)
151 internal_function;
e3a87852 152static reg_errcode_t get_subexp (re_match_context_t *mctx,
76b864c8
UD
153 int bkref_node, int bkref_str_idx)
154 internal_function;
e3a87852 155static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
fe9434bb 156 const re_sub_match_top_t *sub_top,
6291ee3c 157 re_sub_match_last_t *sub_last,
76b864c8
UD
158 int bkref_node, int bkref_str)
159 internal_function;
fe9434bb 160static int find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
3ce12656 161 int subexp_idx, int type) internal_function;
e3a87852 162static reg_errcode_t check_arrival (re_match_context_t *mctx,
6291ee3c
UD
163 state_array_t *path, int top_node,
164 int top_str, int last_node, int last_str,
3ce12656 165 int type) internal_function;
e3a87852 166static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
6291ee3c
UD
167 int str_idx,
168 re_node_set *cur_nodes,
76b864c8
UD
169 re_node_set *next_nodes)
170 internal_function;
6efbd82c 171static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
6291ee3c 172 re_node_set *cur_nodes,
76b864c8
UD
173 int ex_subexp, int type)
174 internal_function;
6efbd82c 175static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
6291ee3c
UD
176 re_node_set *dst_nodes,
177 int target, int ex_subexp,
3ce12656 178 int type) internal_function;
e3a87852 179static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
6291ee3c 180 re_node_set *cur_nodes, int cur_str,
76b864c8
UD
181 int subexp_num, int type)
182 internal_function;
183static int build_trtable (const re_dfa_t *dfa,
ab4b89fe 184 re_dfastate_t *state) internal_function;
434d3784 185#ifdef RE_ENABLE_I18N
c42b4152 186static int check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
76b864c8
UD
187 const re_string_t *input, int idx)
188 internal_function;
434d3784 189# ifdef _LIBC
c202c2c5 190static unsigned int find_collation_sequence_value (const unsigned char *mbs,
76b864c8
UD
191 size_t name_len)
192 internal_function;
434d3784
UD
193# endif /* _LIBC */
194#endif /* RE_ENABLE_I18N */
01ed6ceb 195static int group_nodes_into_DFAstates (const re_dfa_t *dfa,
15a7d175
UD
196 const re_dfastate_t *state,
197 re_node_set *states_node,
2c05d33f 198 bitset_t *states_ch) internal_function;
e3a87852 199static int check_node_accept (const re_match_context_t *mctx,
76b864c8
UD
200 const re_token_t *node, int idx)
201 internal_function;
a445af0b 202static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len)
76b864c8 203 internal_function;
3b0bdc72
UD
204\f
205/* Entry point for POSIX code. */
206
207/* regexec searches for a given pattern, specified by PREG, in the
208 string STRING.
209
210 If NMATCH is zero or REG_NOSUB was set in the cflags argument to
211 `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
212 least NMATCH elements, and we set them to the offsets of the
213 corresponding matched substrings.
214
215 EFLAGS specifies `execution flags' which affect matching: if
216 REG_NOTBOL is set, then ^ does not match at the beginning of the
217 string; if REG_NOTEOL is set, then $ does not match at the end.
218
219 We return 0 if we find a match and REG_NOMATCH if not. */
220
221int
222regexec (preg, string, nmatch, pmatch, eflags)
92b27c74
UD
223 const regex_t *__restrict preg;
224 const char *__restrict string;
3b0bdc72
UD
225 size_t nmatch;
226 regmatch_t pmatch[];
227 int eflags;
228{
a9388965 229 reg_errcode_t err;
6fefb4e0 230 int start, length;
76b864c8 231 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
3f2fb223
UD
232
233 if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
234 return REG_BADPAT;
235
6fefb4e0
UD
236 if (eflags & REG_STARTEND)
237 {
238 start = pmatch[0].rm_so;
239 length = pmatch[0].rm_eo;
240 }
241 else
242 {
243 start = 0;
244 length = strlen (string);
245 }
7b918993
UD
246
247 __libc_lock_lock (dfa->lock);
3b0bdc72 248 if (preg->no_sub)
6fefb4e0
UD
249 err = re_search_internal (preg, string, length, start, length - start,
250 length, 0, NULL, eflags);
3b0bdc72 251 else
6fefb4e0
UD
252 err = re_search_internal (preg, string, length, start, length - start,
253 length, nmatch, pmatch, eflags);
7b918993 254 __libc_lock_unlock (dfa->lock);
a9388965 255 return err != REG_NOERROR;
3b0bdc72 256}
3f2fb223 257
3b0bdc72 258#ifdef _LIBC
3f2fb223
UD
259# include <shlib-compat.h>
260versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
261
262# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
263__typeof__ (__regexec) __compat_regexec;
264
265int
78678039 266attribute_compat_text_section
3f2fb223
UD
267__compat_regexec (const regex_t *__restrict preg,
268 const char *__restrict string, size_t nmatch,
269 regmatch_t pmatch[], int eflags)
270{
271 return regexec (preg, string, nmatch, pmatch,
272 eflags & (REG_NOTBOL | REG_NOTEOL));
273}
274compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
275# endif
3b0bdc72
UD
276#endif
277
278/* Entry points for GNU code. */
279
ac3d553b
UD
280/* re_match, re_search, re_match_2, re_search_2
281
282 The former two functions operate on STRING with length LENGTH,
283 while the later two operate on concatenation of STRING1 and STRING2
284 with lengths LENGTH1 and LENGTH2, respectively.
285
286 re_match() matches the compiled pattern in BUFP against the string,
287 starting at index START.
288
289 re_search() first tries matching at index START, then it tries to match
290 starting from index START + 1, and so on. The last start position tried
291 is START + RANGE. (Thus RANGE = 0 forces re_search to operate the same
292 way as re_match().)
293
294 The parameter STOP of re_{match,search}_2 specifies that no match exceeding
295 the first STOP characters of the concatenation of the strings should be
296 concerned.
297
298 If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
299 and all groups is stroed in REGS. (For the "_2" variants, the offsets are
300 computed relative to the concatenation, not relative to the individual
301 strings.)
302
303 On success, re_match* functions return the length of the match, re_search*
304 return the position of the start of the match. Return value -1 means no
305 match was found and -2 indicates an internal error. */
3b0bdc72
UD
306
307int
ac3d553b
UD
308re_match (bufp, string, length, start, regs)
309 struct re_pattern_buffer *bufp;
3b0bdc72
UD
310 const char *string;
311 int length, start;
312 struct re_registers *regs;
313{
ac3d553b 314 return re_search_stub (bufp, string, length, start, 0, length, regs, 1);
3b0bdc72
UD
315}
316#ifdef _LIBC
317weak_alias (__re_match, re_match)
318#endif
319
ac3d553b
UD
320int
321re_search (bufp, string, length, start, range, regs)
322 struct re_pattern_buffer *bufp;
323 const char *string;
324 int length, start, range;
325 struct re_registers *regs;
326{
327 return re_search_stub (bufp, string, length, start, range, length, regs, 0);
328}
329#ifdef _LIBC
330weak_alias (__re_search, re_search)
331#endif
3b0bdc72
UD
332
333int
ac3d553b
UD
334re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)
335 struct re_pattern_buffer *bufp;
336 const char *string1, *string2;
337 int length1, length2, start, stop;
338 struct re_registers *regs;
3b0bdc72 339{
ac3d553b 340 return re_search_2_stub (bufp, string1, length1, string2, length2,
15a7d175 341 start, 0, regs, stop, 1);
3b0bdc72
UD
342}
343#ifdef _LIBC
344weak_alias (__re_match_2, re_match_2)
345#endif
346
3b0bdc72 347int
ac3d553b
UD
348re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)
349 struct re_pattern_buffer *bufp;
350 const char *string1, *string2;
351 int length1, length2, start, range, stop;
352 struct re_registers *regs;
353{
354 return re_search_2_stub (bufp, string1, length1, string2, length2,
15a7d175 355 start, range, regs, stop, 0);
ac3d553b
UD
356}
357#ifdef _LIBC
358weak_alias (__re_search_2, re_search_2)
359#endif
360
361static int
362re_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs,
15a7d175 363 stop, ret_len)
ac3d553b
UD
364 struct re_pattern_buffer *bufp;
365 const char *string1, *string2;
366 int length1, length2, start, range, stop, ret_len;
367 struct re_registers *regs;
368{
369 const char *str;
370 int rval;
371 int len = length1 + length2;
d044d844 372 char *s = NULL;
ac3d553b 373
42a2c9b5 374 if (BE (length1 < 0 || length2 < 0 || stop < 0 || len < length1, 0))
ac3d553b
UD
375 return -2;
376
377 /* Concatenate the strings. */
378 if (length2 > 0)
379 if (length1 > 0)
380 {
d044d844 381 s = re_malloc (char, len);
15a7d175
UD
382
383 if (BE (s == NULL, 0))
384 return -2;
c42b4152
UD
385#ifdef _LIBC
386 memcpy (__mempcpy (s, string1, length1), string2, length2);
387#else
15a7d175
UD
388 memcpy (s, string1, length1);
389 memcpy (s + length1, string2, length2);
c42b4152 390#endif
15a7d175 391 str = s;
ac3d553b
UD
392 }
393 else
394 str = string2;
395 else
396 str = string1;
397
d044d844
PE
398 rval = re_search_stub (bufp, str, len, start, range, stop, regs, ret_len);
399 re_free (s);
ac3d553b
UD
400 return rval;
401}
402
403/* The parameters have the same meaning as those of re_search.
404 Additional parameters:
405 If RET_LEN is nonzero the length of the match is returned (re_match style);
406 otherwise the position of the match is returned. */
407
408static int
409re_search_stub (bufp, string, length, start, range, stop, regs, ret_len)
410 struct re_pattern_buffer *bufp;
411 const char *string;
412 int length, start, range, stop, ret_len;
413 struct re_registers *regs;
3b0bdc72 414{
a9388965 415 reg_errcode_t result;
3b0bdc72 416 regmatch_t *pmatch;
ac3d553b
UD
417 int nregs, rval;
418 int eflags = 0;
76b864c8 419 re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
ac3d553b
UD
420
421 /* Check for out-of-range. */
a877402c 422 if (BE (start < 0 || start > length, 0))
ac3d553b
UD
423 return -1;
424 if (BE (start + range > length, 0))
425 range = length - start;
a877402c
UD
426 else if (BE (start + range < 0, 0))
427 range = -start;
3b0bdc72 428
7b918993
UD
429 __libc_lock_lock (dfa->lock);
430
3b0bdc72
UD
431 eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
432 eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
433
ac3d553b
UD
434 /* Compile fastmap if we haven't yet. */
435 if (range > 0 && bufp->fastmap != NULL && !bufp->fastmap_accurate)
436 re_compile_fastmap (bufp);
437
438 if (BE (bufp->no_sub, 0))
439 regs = NULL;
3b0bdc72
UD
440
441 /* We need at least 1 register. */
ac3d553b
UD
442 if (regs == NULL)
443 nregs = 1;
444 else if (BE (bufp->regs_allocated == REGS_FIXED &&
15a7d175 445 regs->num_regs < bufp->re_nsub + 1, 0))
ac3d553b
UD
446 {
447 nregs = regs->num_regs;
448 if (BE (nregs < 1, 0))
15a7d175
UD
449 {
450 /* Nothing can be copied to regs. */
451 regs = NULL;
452 nregs = 1;
453 }
ac3d553b
UD
454 }
455 else
456 nregs = bufp->re_nsub + 1;
3b0bdc72 457 pmatch = re_malloc (regmatch_t, nregs);
bc15410e 458 if (BE (pmatch == NULL, 0))
7b918993
UD
459 {
460 rval = -2;
461 goto out;
462 }
3b0bdc72 463
ac3d553b 464 result = re_search_internal (bufp, string, length, start, range, stop,
15a7d175 465 nregs, pmatch, eflags);
3b0bdc72 466
ac3d553b
UD
467 rval = 0;
468
469 /* I hope we needn't fill ther regs with -1's when no match was found. */
470 if (result != REG_NOERROR)
471 rval = -1;
472 else if (regs != NULL)
3b0bdc72 473 {
ac3d553b
UD
474 /* If caller wants register contents data back, copy them. */
475 bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
15a7d175 476 bufp->regs_allocated);
ac3d553b 477 if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
15a7d175 478 rval = -2;
3b0bdc72
UD
479 }
480
ac3d553b 481 if (BE (rval == 0, 1))
3b0bdc72 482 {
ac3d553b 483 if (ret_len)
15a7d175
UD
484 {
485 assert (pmatch[0].rm_so == start);
486 rval = pmatch[0].rm_eo - start;
487 }
ac3d553b 488 else
15a7d175 489 rval = pmatch[0].rm_so;
3b0bdc72 490 }
3b0bdc72 491 re_free (pmatch);
7b918993
UD
492 out:
493 __libc_lock_unlock (dfa->lock);
3b0bdc72
UD
494 return rval;
495}
3b0bdc72 496
ac3d553b
UD
497static unsigned
498re_copy_regs (regs, pmatch, nregs, regs_allocated)
3b0bdc72 499 struct re_registers *regs;
ac3d553b
UD
500 regmatch_t *pmatch;
501 int nregs, regs_allocated;
3b0bdc72 502{
ac3d553b
UD
503 int rval = REGS_REALLOCATE;
504 int i;
505 int need_regs = nregs + 1;
506 /* We need one extra element beyond `num_regs' for the `-1' marker GNU code
507 uses. */
508
509 /* Have the register data arrays been allocated? */
510 if (regs_allocated == REGS_UNALLOCATED)
44777771 511 { /* No. So allocate them with malloc. */
a96c63ed 512 regs->start = re_malloc (regoff_t, need_regs);
74bc9f14 513 if (BE (regs->start == NULL, 0))
15a7d175 514 return REGS_UNALLOCATED;
74bc9f14
PE
515 regs->end = re_malloc (regoff_t, need_regs);
516 if (BE (regs->end == NULL, 0))
517 {
518 re_free (regs->start);
519 return REGS_UNALLOCATED;
520 }
ac3d553b
UD
521 regs->num_regs = need_regs;
522 }
523 else if (regs_allocated == REGS_REALLOCATE)
524 { /* Yes. If we need more elements than were already
15a7d175
UD
525 allocated, reallocate them. If we need fewer, just
526 leave it alone. */
951d6408 527 if (BE (need_regs > regs->num_regs, 0))
15a7d175 528 {
44777771 529 regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
74bc9f14
PE
530 regoff_t *new_end;
531 if (BE (new_start == NULL, 0))
a96c63ed 532 return REGS_UNALLOCATED;
74bc9f14
PE
533 new_end = re_realloc (regs->end, regoff_t, need_regs);
534 if (BE (new_end == NULL, 0))
535 {
536 re_free (new_start);
537 return REGS_UNALLOCATED;
538 }
44777771
UD
539 regs->start = new_start;
540 regs->end = new_end;
15a7d175
UD
541 regs->num_regs = need_regs;
542 }
ac3d553b
UD
543 }
544 else
545 {
546 assert (regs_allocated == REGS_FIXED);
547 /* This function may not be called with REGS_FIXED and nregs too big. */
548 assert (regs->num_regs >= nregs);
549 rval = REGS_FIXED;
550 }
551
552 /* Copy the regs. */
553 for (i = 0; i < nregs; ++i)
554 {
555 regs->start[i] = pmatch[i].rm_so;
556 regs->end[i] = pmatch[i].rm_eo;
557 }
558 for ( ; i < regs->num_regs; ++i)
559 regs->start[i] = regs->end[i] = -1;
560
561 return rval;
3b0bdc72 562}
3b0bdc72
UD
563
564/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
565 ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
566 this memory for recording register information. STARTS and ENDS
567 must be allocated using the malloc library routine, and must each
568 be at least NUM_REGS * sizeof (regoff_t) bytes long.
569
570 If NUM_REGS == 0, then subsequent matches should allocate their own
571 register data.
572
573 Unless this function is called, the first search or match using
574 PATTERN_BUFFER will allocate its own register data, without
575 freeing the old data. */
576
577void
578re_set_registers (bufp, regs, num_regs, starts, ends)
579 struct re_pattern_buffer *bufp;
580 struct re_registers *regs;
581 unsigned num_regs;
582 regoff_t *starts, *ends;
583{
584 if (num_regs)
585 {
586 bufp->regs_allocated = REGS_REALLOCATE;
587 regs->num_regs = num_regs;
588 regs->start = starts;
589 regs->end = ends;
590 }
591 else
592 {
593 bufp->regs_allocated = REGS_UNALLOCATED;
594 regs->num_regs = 0;
595 regs->start = regs->end = (regoff_t *) 0;
596 }
597}
598#ifdef _LIBC
599weak_alias (__re_set_registers, re_set_registers)
600#endif
601\f
602/* Entry points compatible with 4.2 BSD regex library. We don't define
603 them unless specifically requested. */
604
605#if defined _REGEX_RE_COMP || defined _LIBC
606int
607# ifdef _LIBC
608weak_function
609# endif
610re_exec (s)
611 const char *s;
612{
613 return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
614}
615#endif /* _REGEX_RE_COMP */
616\f
3b0bdc72
UD
617/* Internal entry point. */
618
619/* Searches for a compiled pattern PREG in the string STRING, whose
620 length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same
621 mingings with regexec. START, and RANGE have the same meanings
622 with re_search.
a9388965
UD
623 Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
624 otherwise return the error code.
3b0bdc72
UD
625 Note: We assume front end functions already check ranges.
626 (START + RANGE >= 0 && START + RANGE <= LENGTH) */
627
a9388965 628static reg_errcode_t
2da42bc0 629__attribute_warn_unused_result__
ac3d553b 630re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
15a7d175 631 eflags)
3b0bdc72
UD
632 const regex_t *preg;
633 const char *string;
ac3d553b 634 int length, start, range, stop, eflags;
3b0bdc72
UD
635 size_t nmatch;
636 regmatch_t pmatch[];
637{
a9388965 638 reg_errcode_t err;
76b864c8 639 const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
612546c6 640 int left_lim, right_lim, incr;
bb677c95 641 int fl_longest_match, match_first, match_kind, match_last = -1;
963d8d78 642 int extra_nmatch;
bb677c95 643 int sb, ch;
e3a87852
UD
644#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
645 re_match_context_t mctx = { .dfa = dfa };
646#else
3b0bdc72 647 re_match_context_t mctx;
e3a87852 648#endif
bb677c95
UD
649 char *fastmap = (preg->fastmap != NULL && preg->fastmap_accurate
650 && range && !preg->can_be_null) ? preg->fastmap : NULL;
997470b3 651 RE_TRANSLATE_TYPE t = preg->translate;
3b0bdc72 652
e3a87852
UD
653#if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
654 memset (&mctx, '\0', sizeof (re_match_context_t));
655 mctx.dfa = dfa;
656#endif
657
963d8d78
UD
658 extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
659 nmatch -= extra_nmatch;
660
3b0bdc72 661 /* Check if the DFA haven't been compiled. */
bc15410e 662 if (BE (preg->used == 0 || dfa->init_state == NULL
15a7d175
UD
663 || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
664 || dfa->init_state_begbuf == NULL, 0))
a9388965 665 return REG_NOMATCH;
3b0bdc72 666
c70f81dd
UD
667#ifdef DEBUG
668 /* We assume front-end functions already check them. */
669 assert (start + range >= 0 && start + range <= length);
670#endif
671
672 /* If initial states with non-begbuf contexts have no elements,
673 the regex must be anchored. If preg->newline_anchor is set,
674 we'll never use init_state_nl, so do not check it. */
675 if (dfa->init_state->nodes.nelem == 0
676 && dfa->init_state_word->nodes.nelem == 0
677 && (dfa->init_state_nl->nodes.nelem == 0
678 || !preg->newline_anchor))
679 {
680 if (start != 0 && start + range != 0)
2da42bc0 681 return REG_NOMATCH;
c70f81dd
UD
682 start = range = 0;
683 }
684
3b0bdc72 685 /* We must check the longest matching, if nmatch > 0. */
6291ee3c 686 fl_longest_match = (nmatch != 0 || dfa->nbackref);
3b0bdc72 687
56b168be 688 err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
f0c7c524 689 preg->translate, preg->syntax & RE_ICASE, dfa);
612546c6 690 if (BE (err != REG_NOERROR, 0))
1b2c2628 691 goto free_return;
56b168be
UD
692 mctx.input.stop = stop;
693 mctx.input.raw_stop = stop;
694 mctx.input.newline_anchor = preg->newline_anchor;
612546c6 695
56b168be 696 err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
612546c6 697 if (BE (err != REG_NOERROR, 0))
1b2c2628 698 goto free_return;
612546c6 699
3b0bdc72
UD
700 /* We will log all the DFA states through which the dfa pass,
701 if nmatch > 1, or this dfa has "multibyte node", which is a
702 back-reference or a node which can accept multibyte character or
703 multi character collating element. */
704 if (nmatch > 1 || dfa->has_mb_node)
a9388965 705 {
eadc09f2
PE
706 /* Avoid overflow. */
707 if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= mctx.input.bufs_len, 0))
708 {
709 err = REG_ESPACE;
710 goto free_return;
711 }
712
56b168be 713 mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
612546c6 714 if (BE (mctx.state_log == NULL, 0))
15a7d175
UD
715 {
716 err = REG_ESPACE;
717 goto free_return;
718 }
a9388965 719 }
3b0bdc72 720 else
612546c6 721 mctx.state_log = NULL;
3b0bdc72 722
612546c6 723 match_first = start;
56b168be
UD
724 mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
725 : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
612546c6 726
3b0bdc72 727 /* Check incrementally whether of not the input string match. */
612546c6
UD
728 incr = (range < 0) ? -1 : 1;
729 left_lim = (range < 0) ? start + range : start;
730 right_lim = (range < 0) ? start : start + range;
3c0fb574 731 sb = dfa->mb_cur_max == 1;
bb677c95
UD
732 match_kind =
733 (fastmap
734 ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)
735 | (range >= 0 ? 2 : 0)
736 | (t != NULL ? 1 : 0))
737 : 8);
738
739 for (;; match_first += incr)
3b0bdc72 740 {
bb677c95
UD
741 err = REG_NOMATCH;
742 if (match_first < left_lim || right_lim < match_first)
743 goto free_return;
744
745 /* Advance as rapidly as possible through the string, until we
746 find a plausible place to start matching. This may be done
747 with varying efficiency, so there are various possibilities:
748 only the most common of them are specialized, in order to
749 save on code size. We use a switch statement for speed. */
750 switch (match_kind)
1b2c2628 751 {
bb677c95
UD
752 case 8:
753 /* No fastmap. */
754 break;
755
756 case 7:
757 /* Fastmap with single-byte translation, match forward. */
758 while (BE (match_first < right_lim, 1)
759 && !fastmap[t[(unsigned char) string[match_first]]])
760 ++match_first;
761 goto forward_match_found_start_or_reached_end;
762
763 case 6:
764 /* Fastmap without translation, match forward. */
765 while (BE (match_first < right_lim, 1)
766 && !fastmap[(unsigned char) string[match_first]])
767 ++match_first;
768
769 forward_match_found_start_or_reached_end:
770 if (BE (match_first == right_lim, 0))
1b2c2628 771 {
bb677c95
UD
772 ch = match_first >= length
773 ? 0 : (unsigned char) string[match_first];
774 if (!fastmap[t ? t[ch] : ch])
775 goto free_return;
1b2c2628 776 }
bb677c95
UD
777 break;
778
779 case 4:
780 case 5:
781 /* Fastmap without multi-byte translation, match backwards. */
782 while (match_first >= left_lim)
1b2c2628 783 {
bb677c95
UD
784 ch = match_first >= length
785 ? 0 : (unsigned char) string[match_first];
786 if (fastmap[t ? t[ch] : ch])
787 break;
788 --match_first;
789 }
790 if (match_first < left_lim)
791 goto free_return;
792 break;
1b2c2628 793
bb677c95
UD
794 default:
795 /* In this case, we can't determine easily the current byte,
796 since it might be a component byte of a multibyte
797 character. Then we use the constructed buffer instead. */
798 for (;;)
799 {
800 /* If MATCH_FIRST is out of the valid range, reconstruct the
801 buffers. */
802 unsigned int offset = match_first - mctx.input.raw_mbs_idx;
803 if (BE (offset >= (unsigned int) mctx.input.valid_raw_len, 0))
1b2c2628 804 {
bb677c95
UD
805 err = re_string_reconstruct (&mctx.input, match_first,
806 eflags);
807 if (BE (err != REG_NOERROR, 0))
808 goto free_return;
809
810 offset = match_first - mctx.input.raw_mbs_idx;
1b2c2628 811 }
bb677c95
UD
812 /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
813 Note that MATCH_FIRST must not be smaller than 0. */
814 ch = (match_first >= length
815 ? 0 : re_string_byte_at (&mctx.input, offset));
816 if (fastmap[ch])
1b2c2628 817 break;
bb677c95
UD
818 match_first += incr;
819 if (match_first < left_lim || match_first > right_lim)
2da42bc0
UD
820 {
821 err = REG_NOMATCH;
822 goto free_return;
823 }
1b2c2628 824 }
bb677c95 825 break;
1b2c2628 826 }
612546c6 827
1b2c2628 828 /* Reconstruct the buffers so that the matcher can assume that
fe9434bb 829 the matching starts from the beginning of the buffer. */
56b168be 830 err = re_string_reconstruct (&mctx.input, match_first, eflags);
1b2c2628
UD
831 if (BE (err != REG_NOERROR, 0))
832 goto free_return;
bb677c95 833
3b0bdc72 834#ifdef RE_ENABLE_I18N
bb677c95
UD
835 /* Don't consider this char as a possible match start if it part,
836 yet isn't the head, of a multibyte character. */
837 if (!sb && !re_string_first_byte (&mctx.input, 0))
838 continue;
3b0bdc72 839#endif
bb677c95
UD
840
841 /* It seems to be appropriate one, then use the matcher. */
842 /* We assume that the matching starts from 0. */
843 mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
844 match_last = check_matching (&mctx, fl_longest_match,
845 range >= 0 ? &match_first : NULL);
846 if (match_last != -1)
1b2c2628 847 {
bb677c95 848 if (BE (match_last == -2, 0))
1b2c2628 849 {
bb677c95
UD
850 err = REG_ESPACE;
851 goto free_return;
852 }
853 else
854 {
855 mctx.match_last = match_last;
856 if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
1b2c2628 857 {
bb677c95
UD
858 re_dfastate_t *pstate = mctx.state_log[match_last];
859 mctx.last_node = check_halt_state_context (&mctx, pstate,
860 match_last);
1b2c2628 861 }
bb677c95
UD
862 if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
863 || dfa->nbackref)
6291ee3c 864 {
bb677c95
UD
865 err = prune_impossible_nodes (&mctx);
866 if (err == REG_NOERROR)
867 break;
868 if (BE (err != REG_NOMATCH, 0))
869 goto free_return;
870 match_last = -1;
6291ee3c 871 }
bb677c95
UD
872 else
873 break; /* We found a match. */
1b2c2628
UD
874 }
875 }
bb677c95
UD
876
877 match_ctx_clean (&mctx);
3b0bdc72
UD
878 }
879
bb677c95
UD
880#ifdef DEBUG
881 assert (match_last != -1);
882 assert (err == REG_NOERROR);
883#endif
884
3b0bdc72 885 /* Set pmatch[] if we need. */
bb677c95 886 if (nmatch > 0)
3b0bdc72
UD
887 {
888 int reg_idx;
889
890 /* Initialize registers. */
97fd3a30 891 for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
15a7d175 892 pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
3b0bdc72
UD
893
894 /* Set the points where matching start/end. */
612546c6 895 pmatch[0].rm_so = 0;
6291ee3c 896 pmatch[0].rm_eo = mctx.match_last;
3b0bdc72
UD
897
898 if (!preg->no_sub && nmatch > 1)
15a7d175 899 {
15a7d175
UD
900 err = set_regs (preg, &mctx, nmatch, pmatch,
901 dfa->has_plural_match && dfa->nbackref > 0);
902 if (BE (err != REG_NOERROR, 0))
903 goto free_return;
904 }
612546c6
UD
905
906 /* At last, add the offset to the each registers, since we slided
97fd3a30
UD
907 the buffers so that we could assume that the matching starts
908 from 0. */
612546c6 909 for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
15a7d175
UD
910 if (pmatch[reg_idx].rm_so != -1)
911 {
c0d5034e 912#ifdef RE_ENABLE_I18N
56b168be 913 if (BE (mctx.input.offsets_needed != 0, 0))
bb3f4825 914 {
01ed6ceb
UD
915 pmatch[reg_idx].rm_so =
916 (pmatch[reg_idx].rm_so == mctx.input.valid_len
917 ? mctx.input.valid_raw_len
918 : mctx.input.offsets[pmatch[reg_idx].rm_so]);
919 pmatch[reg_idx].rm_eo =
920 (pmatch[reg_idx].rm_eo == mctx.input.valid_len
921 ? mctx.input.valid_raw_len
922 : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
bb3f4825 923 }
c0d5034e 924#else
56b168be 925 assert (mctx.input.offsets_needed == 0);
c0d5034e 926#endif
15a7d175
UD
927 pmatch[reg_idx].rm_so += match_first;
928 pmatch[reg_idx].rm_eo += match_first;
929 }
963d8d78
UD
930 for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
931 {
932 pmatch[nmatch + reg_idx].rm_so = -1;
933 pmatch[nmatch + reg_idx].rm_eo = -1;
934 }
c06a6956
UD
935
936 if (dfa->subexp_map)
2da42bc0
UD
937 for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
938 if (dfa->subexp_map[reg_idx] != reg_idx)
939 {
940 pmatch[reg_idx + 1].rm_so
941 = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
942 pmatch[reg_idx + 1].rm_eo
943 = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
944 }
3b0bdc72 945 }
bb677c95 946
1b2c2628 947 free_return:
612546c6 948 re_free (mctx.state_log);
3b0bdc72
UD
949 if (dfa->nbackref)
950 match_ctx_free (&mctx);
56b168be 951 re_string_destruct (&mctx.input);
1b2c2628 952 return err;
3b0bdc72
UD
953}
954
6291ee3c 955static reg_errcode_t
2da42bc0 956__attribute_warn_unused_result__
e3a87852 957prune_impossible_nodes (mctx)
6291ee3c
UD
958 re_match_context_t *mctx;
959{
76b864c8 960 const re_dfa_t *const dfa = mctx->dfa;
6291ee3c
UD
961 int halt_node, match_last;
962 reg_errcode_t ret;
6291ee3c
UD
963 re_dfastate_t **sifted_states;
964 re_dfastate_t **lim_states = NULL;
965 re_sift_context_t sctx;
966#ifdef DEBUG
967 assert (mctx->state_log != NULL);
968#endif
969 match_last = mctx->match_last;
970 halt_node = mctx->last_node;
4cd02867
PE
971
972 /* Avoid overflow. */
973 if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= match_last, 0))
974 return REG_ESPACE;
975
6291ee3c
UD
976 sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
977 if (BE (sifted_states == NULL, 0))
978 {
979 ret = REG_ESPACE;
980 goto free_return;
981 }
982 if (dfa->nbackref)
983 {
984 lim_states = re_malloc (re_dfastate_t *, match_last + 1);
985 if (BE (lim_states == NULL, 0))
986 {
987 ret = REG_ESPACE;
988 goto free_return;
989 }
990 while (1)
991 {
992 memset (lim_states, '\0',
993 sizeof (re_dfastate_t *) * (match_last + 1));
6291ee3c 994 sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
d2c38eb3 995 match_last);
e3a87852 996 ret = sift_states_backward (mctx, &sctx);
6291ee3c
UD
997 re_node_set_free (&sctx.limits);
998 if (BE (ret != REG_NOERROR, 0))
999 goto free_return;
1000 if (sifted_states[0] != NULL || lim_states[0] != NULL)
1001 break;
1002 do
1003 {
1004 --match_last;
1005 if (match_last < 0)
1006 {
1007 ret = REG_NOMATCH;
1008 goto free_return;
1009 }
97fd3a30
UD
1010 } while (mctx->state_log[match_last] == NULL
1011 || !mctx->state_log[match_last]->halt);
e3a87852 1012 halt_node = check_halt_state_context (mctx,
6291ee3c 1013 mctx->state_log[match_last],
e3a87852 1014 match_last);
6291ee3c
UD
1015 }
1016 ret = merge_state_array (dfa, sifted_states, lim_states,
1017 match_last + 1);
1018 re_free (lim_states);
1019 lim_states = NULL;
1020 if (BE (ret != REG_NOERROR, 0))
1021 goto free_return;
1022 }
1023 else
1024 {
d2c38eb3 1025 sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
e3a87852 1026 ret = sift_states_backward (mctx, &sctx);
6291ee3c
UD
1027 re_node_set_free (&sctx.limits);
1028 if (BE (ret != REG_NOERROR, 0))
1029 goto free_return;
76c7f2cd
UD
1030 if (sifted_states[0] == NULL)
1031 {
1032 ret = REG_NOMATCH;
1033 goto free_return;
1034 }
6291ee3c
UD
1035 }
1036 re_free (mctx->state_log);
1037 mctx->state_log = sifted_states;
1038 sifted_states = NULL;
1039 mctx->last_node = halt_node;
1040 mctx->match_last = match_last;
1041 ret = REG_NOERROR;
1042 free_return:
1043 re_free (sifted_states);
1044 re_free (lim_states);
1045 return ret;
1046}
1047
a9388965 1048/* Acquire an initial state and return it.
3b0bdc72
UD
1049 We must select appropriate initial state depending on the context,
1050 since initial states may have constraints like "\<", "^", etc.. */
1051
bb3f4825 1052static inline re_dfastate_t *
e2f55264
UD
1053__attribute ((always_inline)) internal_function
1054acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
1055 int idx)
3b0bdc72 1056{
76b864c8 1057 const re_dfa_t *const dfa = mctx->dfa;
3b0bdc72
UD
1058 if (dfa->init_state->has_constraint)
1059 {
1060 unsigned int context;
56b168be 1061 context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
3b0bdc72 1062 if (IS_WORD_CONTEXT (context))
15a7d175 1063 return dfa->init_state_word;
3b0bdc72 1064 else if (IS_ORDINARY_CONTEXT (context))
15a7d175 1065 return dfa->init_state;
3b0bdc72 1066 else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
15a7d175 1067 return dfa->init_state_begbuf;
3b0bdc72 1068 else if (IS_NEWLINE_CONTEXT (context))
15a7d175 1069 return dfa->init_state_nl;
3b0bdc72 1070 else if (IS_BEGBUF_CONTEXT (context))
15a7d175
UD
1071 {
1072 /* It is relatively rare case, then calculate on demand. */
4c595adb
UD
1073 return re_acquire_state_context (err, dfa,
1074 dfa->init_state->entrance_nodes,
1075 context);
15a7d175 1076 }
3b0bdc72 1077 else
15a7d175
UD
1078 /* Must not happen? */
1079 return dfa->init_state;
3b0bdc72
UD
1080 }
1081 else
1082 return dfa->init_state;
1083}
1084
1085/* Check whether the regular expression match input string INPUT or not,
a9388965
UD
1086 and return the index where the matching end, return -1 if not match,
1087 or return -2 in case of an error.
612546c6 1088 FL_LONGEST_MATCH means we want the POSIX longest matching.
15bad1a5
UD
1089 If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
1090 next place where we may want to try matching.
612546c6
UD
1091 Note that the matcher assume that the maching starts from the current
1092 index of the buffer. */
3b0bdc72
UD
1093
1094static int
2da42bc0 1095internal_function __attribute_warn_unused_result__
e2f55264
UD
1096check_matching (re_match_context_t *mctx, int fl_longest_match,
1097 int *p_match_first)
3b0bdc72 1098{
76b864c8 1099 const re_dfa_t *const dfa = mctx->dfa;
a9388965 1100 reg_errcode_t err;
612546c6
UD
1101 int match = 0;
1102 int match_last = -1;
56b168be 1103 int cur_str_idx = re_string_cur_idx (&mctx->input);
3b0bdc72 1104 re_dfastate_t *cur_state;
4c595adb
UD
1105 int at_init_state = p_match_first != NULL;
1106 int next_start_idx = cur_str_idx;
3b0bdc72 1107
4c595adb 1108 err = REG_NOERROR;
e3a87852 1109 cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
4c595adb 1110 /* An initial state must not be NULL (invalid). */
bc15410e 1111 if (BE (cur_state == NULL, 0))
4c595adb
UD
1112 {
1113 assert (err == REG_ESPACE);
1114 return -2;
1115 }
0742e48e 1116
4c595adb 1117 if (mctx->state_log != NULL)
6291ee3c 1118 {
4c595adb 1119 mctx->state_log[cur_str_idx] = cur_state;
6291ee3c 1120
4c595adb
UD
1121 /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
1122 later. E.g. Processing back references. */
1123 if (BE (dfa->nbackref, 0))
bb3f4825 1124 {
4c595adb
UD
1125 at_init_state = 0;
1126 err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
bb3f4825
UD
1127 if (BE (err != REG_NOERROR, 0))
1128 return err;
4c595adb
UD
1129
1130 if (cur_state->has_backref)
1131 {
1132 err = transit_state_bkref (mctx, &cur_state->nodes);
1133 if (BE (err != REG_NOERROR, 0))
2da42bc0 1134 return err;
4c595adb 1135 }
bb3f4825 1136 }
0742e48e
UD
1137 }
1138
3b0bdc72 1139 /* If the RE accepts NULL string. */
bb3f4825 1140 if (BE (cur_state->halt, 0))
3b0bdc72
UD
1141 {
1142 if (!cur_state->has_constraint
e3a87852 1143 || check_halt_state_context (mctx, cur_state, cur_str_idx))
15a7d175
UD
1144 {
1145 if (!fl_longest_match)
1146 return cur_str_idx;
1147 else
1148 {
1149 match_last = cur_str_idx;
1150 match = 1;
1151 }
1152 }
3b0bdc72
UD
1153 }
1154
56b168be 1155 while (!re_string_eoi (&mctx->input))
3b0bdc72 1156 {
15bad1a5 1157 re_dfastate_t *old_state = cur_state;
bb677c95
UD
1158 int next_char_idx = re_string_cur_idx (&mctx->input) + 1;
1159
8887a920
UD
1160 if ((BE (next_char_idx >= mctx->input.bufs_len, 0)
1161 && mctx->input.bufs_len < mctx->input.len)
2da42bc0
UD
1162 || (BE (next_char_idx >= mctx->input.valid_len, 0)
1163 && mctx->input.valid_len < mctx->input.len))
1164 {
a445af0b 1165 err = extend_buffers (mctx, next_char_idx + 1);
2da42bc0 1166 if (BE (err != REG_NOERROR, 0))
bb677c95
UD
1167 {
1168 assert (err == REG_ESPACE);
1169 return -2;
1170 }
2da42bc0 1171 }
bb677c95 1172
e3a87852 1173 cur_state = transit_state (&err, mctx, cur_state);
4c595adb
UD
1174 if (mctx->state_log != NULL)
1175 cur_state = merge_state_with_log (&err, mctx, cur_state);
15bad1a5 1176
4c595adb 1177 if (cur_state == NULL)
15a7d175 1178 {
4c595adb
UD
1179 /* Reached the invalid state or an error. Try to recover a valid
1180 state using the state log, if available and if we have not
1181 already found a valid (even if not the longest) match. */
15a7d175
UD
1182 if (BE (err != REG_NOERROR, 0))
1183 return -2;
4c595adb
UD
1184
1185 if (mctx->state_log == NULL
1186 || (match && !fl_longest_match)
1187 || (cur_state = find_recover_state (&err, mctx)) == NULL)
15a7d175 1188 break;
4c595adb
UD
1189 }
1190
bb677c95 1191 if (BE (at_init_state, 0))
4c595adb
UD
1192 {
1193 if (old_state == cur_state)
bb677c95 1194 next_start_idx = next_char_idx;
c13c99fa 1195 else
4c595adb 1196 at_init_state = 0;
15a7d175 1197 }
3b0bdc72 1198
4c595adb 1199 if (cur_state->halt)
15a7d175 1200 {
4c595adb 1201 /* Reached a halt state.
15a7d175
UD
1202 Check the halt state can satisfy the current context. */
1203 if (!cur_state->has_constraint
e3a87852 1204 || check_halt_state_context (mctx, cur_state,
56b168be 1205 re_string_cur_idx (&mctx->input)))
15a7d175
UD
1206 {
1207 /* We found an appropriate halt state. */
56b168be 1208 match_last = re_string_cur_idx (&mctx->input);
15a7d175 1209 match = 1;
bb677c95
UD
1210
1211 /* We found a match, do not modify match_first below. */
1212 p_match_first = NULL;
15a7d175
UD
1213 if (!fl_longest_match)
1214 break;
1215 }
1216 }
bb677c95 1217 }
15bad1a5 1218
bb677c95 1219 if (p_match_first)
4c595adb 1220 *p_match_first += next_start_idx;
15bad1a5 1221
3b0bdc72
UD
1222 return match_last;
1223}
1224
1225/* Check NODE match the current context. */
1226
e2f55264
UD
1227static int
1228internal_function
1229check_halt_node_context (const re_dfa_t *dfa, int node, unsigned int context)
3b0bdc72 1230{
3b0bdc72 1231 re_token_type_t type = dfa->nodes[node].type;
485d775d
UD
1232 unsigned int constraint = dfa->nodes[node].constraint;
1233 if (type != END_OF_RE)
3b0bdc72 1234 return 0;
485d775d
UD
1235 if (!constraint)
1236 return 1;
1237 if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
3b0bdc72
UD
1238 return 0;
1239 return 1;
1240}
1241
1242/* Check the halt state STATE match the current context.
1243 Return 0 if not match, if the node, STATE has, is a halt node and
1244 match the context, return the node. */
1245
1246static int
1ba81cea 1247internal_function
e2f55264
UD
1248check_halt_state_context (const re_match_context_t *mctx,
1249 const re_dfastate_t *state, int idx)
3b0bdc72 1250{
3b0bdc72
UD
1251 int i;
1252 unsigned int context;
1253#ifdef DEBUG
1254 assert (state->halt);
1255#endif
56b168be 1256 context = re_string_context_at (&mctx->input, idx, mctx->eflags);
3b0bdc72 1257 for (i = 0; i < state->nodes.nelem; ++i)
e3a87852 1258 if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
3b0bdc72
UD
1259 return state->nodes.elems[i];
1260 return 0;
1261}
1262
a9388965
UD
1263/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
1264 corresponding to the DFA).
1265 Return the destination node, and update EPS_VIA_NODES, return -1 in case
1266 of errors. */
3b0bdc72
UD
1267
1268static int
e2f55264
UD
1269internal_function
1270proceed_next_node (const re_match_context_t *mctx, int nregs, regmatch_t *regs,
1271 int *pidx, int node, re_node_set *eps_via_nodes,
1272 struct re_fail_stack_t *fs)
3b0bdc72 1273{
76b864c8 1274 const re_dfa_t *const dfa = mctx->dfa;
2d87db5b 1275 int i, err;
3b0bdc72
UD
1276 if (IS_EPSILON_NODE (dfa->nodes[node].type))
1277 {
485d775d 1278 re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
5cf1ec52
UD
1279 re_node_set *edests = &dfa->edests[node];
1280 int dest_node;
a9388965 1281 err = re_node_set_insert (eps_via_nodes, node);
bc15410e 1282 if (BE (err < 0, 0))
44777771 1283 return -2;
5cf1ec52
UD
1284 /* Pick up a valid destination, or return -1 if none is found. */
1285 for (dest_node = -1, i = 0; i < edests->nelem; ++i)
15a7d175 1286 {
5cf1ec52 1287 int candidate = edests->elems[i];
15a7d175
UD
1288 if (!re_node_set_contains (cur_nodes, candidate))
1289 continue;
2da42bc0 1290 if (dest_node == -1)
5cf1ec52
UD
1291 dest_node = candidate;
1292
2da42bc0 1293 else
5cf1ec52
UD
1294 {
1295 /* In order to avoid infinite loop like "(a*)*", return the second
2da42bc0 1296 epsilon-transition if the first was already considered. */
5cf1ec52 1297 if (re_node_set_contains (eps_via_nodes, dest_node))
2da42bc0 1298 return candidate;
5cf1ec52
UD
1299
1300 /* Otherwise, push the second epsilon-transition on the fail stack. */
1301 else if (fs != NULL
1302 && push_fail_stack (fs, *pidx, candidate, nregs, regs,
2da42bc0 1303 eps_via_nodes))
5cf1ec52
UD
1304 return -2;
1305
1306 /* We know we are going to exit. */
1307 break;
1308 }
1309 }
1310 return dest_node;
3b0bdc72
UD
1311 }
1312 else
1313 {
485d775d 1314 int naccepted = 0;
3b0bdc72 1315 re_token_type_t type = dfa->nodes[node].type;
3b0bdc72 1316
434d3784 1317#ifdef RE_ENABLE_I18N
02f3550c 1318 if (dfa->nodes[node].accept_mb)
56b168be 1319 naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
434d3784
UD
1320 else
1321#endif /* RE_ENABLE_I18N */
1322 if (type == OP_BACK_REF)
15a7d175 1323 {
d8f73de8 1324 int subexp_idx = dfa->nodes[node].opr.idx + 1;
15a7d175
UD
1325 naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
1326 if (fs != NULL)
1327 {
1328 if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
1329 return -1;
1330 else if (naccepted)
1331 {
56b168be 1332 char *buf = (char *) re_string_get_buffer (&mctx->input);
6291ee3c
UD
1333 if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1334 naccepted) != 0)
15a7d175
UD
1335 return -1;
1336 }
1337 }
1338
1339 if (naccepted == 0)
1340 {
2d87db5b 1341 int dest_node;
15a7d175
UD
1342 err = re_node_set_insert (eps_via_nodes, node);
1343 if (BE (err < 0, 0))
1344 return -2;
1345 dest_node = dfa->edests[node].elems[0];
1346 if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1347 dest_node))
1348 return dest_node;
1349 }
1350 }
3b0bdc72
UD
1351
1352 if (naccepted != 0
e3a87852 1353 || check_node_accept (mctx, dfa->nodes + node, *pidx))
15a7d175 1354 {
2d87db5b 1355 int dest_node = dfa->nexts[node];
15a7d175
UD
1356 *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
1357 if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
1358 || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1359 dest_node)))
1360 return -1;
1361 re_node_set_empty (eps_via_nodes);
1362 return dest_node;
1363 }
3b0bdc72 1364 }
a3022b82
UD
1365 return -1;
1366}
1367
1368static reg_errcode_t
2da42bc0 1369internal_function __attribute_warn_unused_result__
e2f55264
UD
1370push_fail_stack (struct re_fail_stack_t *fs, int str_idx, int dest_node,
1371 int nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
a3022b82
UD
1372{
1373 reg_errcode_t err;
1374 int num = fs->num++;
1375 if (fs->num == fs->alloc)
1376 {
1b2c2628 1377 struct re_fail_stack_ent_t *new_array;
1b2c2628 1378 new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)
44777771 1379 * fs->alloc * 2));
1b2c2628 1380 if (new_array == NULL)
15a7d175 1381 return REG_ESPACE;
44777771 1382 fs->alloc *= 2;
1b2c2628 1383 fs->stack = new_array;
a3022b82
UD
1384 }
1385 fs->stack[num].idx = str_idx;
5cf1ec52 1386 fs->stack[num].node = dest_node;
a3022b82 1387 fs->stack[num].regs = re_malloc (regmatch_t, nregs);
5a299c96
UD
1388 if (fs->stack[num].regs == NULL)
1389 return REG_ESPACE;
a3022b82
UD
1390 memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
1391 err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
1392 return err;
1393}
1b2c2628 1394
a3022b82 1395static int
e2f55264
UD
1396internal_function
1397pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs,
1398 regmatch_t *regs, re_node_set *eps_via_nodes)
a3022b82
UD
1399{
1400 int num = --fs->num;
1401 assert (num >= 0);
44777771 1402 *pidx = fs->stack[num].idx;
a3022b82
UD
1403 memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
1404 re_node_set_free (eps_via_nodes);
485d775d 1405 re_free (fs->stack[num].regs);
a3022b82
UD
1406 *eps_via_nodes = fs->stack[num].eps_via_nodes;
1407 return fs->stack[num].node;
3b0bdc72
UD
1408}
1409
1410/* Set the positions where the subexpressions are starts/ends to registers
1411 PMATCH.
1412 Note: We assume that pmatch[0] is already set, and
97fd3a30 1413 pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. */
3b0bdc72 1414
a9388965 1415static reg_errcode_t
2da42bc0 1416internal_function __attribute_warn_unused_result__
e2f55264
UD
1417set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
1418 regmatch_t *pmatch, int fl_backtrack)
3b0bdc72 1419{
76b864c8 1420 const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
963d8d78 1421 int idx, cur_node;
3b0bdc72 1422 re_node_set eps_via_nodes;
a3022b82 1423 struct re_fail_stack_t *fs;
6b6557e8
UD
1424 struct re_fail_stack_t fs_body = { 0, 2, NULL };
1425 regmatch_t *prev_idx_match;
2d87db5b 1426 int prev_idx_match_malloced = 0;
6b6557e8 1427
3b0bdc72
UD
1428#ifdef DEBUG
1429 assert (nmatch > 1);
612546c6 1430 assert (mctx->state_log != NULL);
3b0bdc72 1431#endif
a3022b82
UD
1432 if (fl_backtrack)
1433 {
1434 fs = &fs_body;
1435 fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
6b6557e8
UD
1436 if (fs->stack == NULL)
1437 return REG_ESPACE;
a3022b82
UD
1438 }
1439 else
1440 fs = NULL;
6b6557e8 1441
3b0bdc72 1442 cur_node = dfa->init_node;
3b0bdc72 1443 re_node_set_init_empty (&eps_via_nodes);
6b6557e8 1444
2d87db5b
UD
1445 if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
1446 prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));
1447 else
1448 {
1449 prev_idx_match = re_malloc (regmatch_t, nmatch);
1450 if (prev_idx_match == NULL)
1451 {
1452 free_fail_stack_return (fs);
1453 return REG_ESPACE;
1454 }
1455 prev_idx_match_malloced = 1;
1456 }
963d8d78 1457 memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
6b6557e8 1458
3b0bdc72
UD
1459 for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
1460 {
963d8d78 1461 update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
6b6557e8 1462
a3022b82 1463 if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
15a7d175
UD
1464 {
1465 int reg_idx;
1466 if (fs)
1467 {
1468 for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
1469 if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
1470 break;
1471 if (reg_idx == nmatch)
1472 {
1473 re_node_set_free (&eps_via_nodes);
2d87db5b
UD
1474 if (prev_idx_match_malloced)
1475 re_free (prev_idx_match);
15a7d175
UD
1476 return free_fail_stack_return (fs);
1477 }
1478 cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1479 &eps_via_nodes);
1480 }
1481 else
1482 {
1483 re_node_set_free (&eps_via_nodes);
2d87db5b
UD
1484 if (prev_idx_match_malloced)
1485 re_free (prev_idx_match);
15a7d175
UD
1486 return REG_NOERROR;
1487 }
1488 }
3b0bdc72
UD
1489
1490 /* Proceed to next node. */
e3a87852 1491 cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
15a7d175 1492 &eps_via_nodes, fs);
a3022b82 1493
bc15410e 1494 if (BE (cur_node < 0, 0))
15a7d175 1495 {
44777771
UD
1496 if (BE (cur_node == -2, 0))
1497 {
1498 re_node_set_free (&eps_via_nodes);
2d87db5b
UD
1499 if (prev_idx_match_malloced)
1500 re_free (prev_idx_match);
44777771
UD
1501 free_fail_stack_return (fs);
1502 return REG_ESPACE;
1503 }
15a7d175
UD
1504 if (fs)
1505 cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1506 &eps_via_nodes);
1507 else
1508 {
1509 re_node_set_free (&eps_via_nodes);
2d87db5b
UD
1510 if (prev_idx_match_malloced)
1511 re_free (prev_idx_match);
15a7d175
UD
1512 return REG_NOMATCH;
1513 }
1514 }
3b0bdc72
UD
1515 }
1516 re_node_set_free (&eps_via_nodes);
2d87db5b
UD
1517 if (prev_idx_match_malloced)
1518 re_free (prev_idx_match);
485d775d
UD
1519 return free_fail_stack_return (fs);
1520}
1521
1522static reg_errcode_t
1ba81cea 1523internal_function
e2f55264 1524free_fail_stack_return (struct re_fail_stack_t *fs)
485d775d
UD
1525{
1526 if (fs)
1527 {
1528 int fs_idx;
1529 for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
15a7d175
UD
1530 {
1531 re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
1532 re_free (fs->stack[fs_idx].regs);
1533 }
485d775d
UD
1534 re_free (fs->stack);
1535 }
a9388965 1536 return REG_NOERROR;
3b0bdc72
UD
1537}
1538
81c64d40 1539static void
1ba81cea 1540internal_function
e2f55264
UD
1541update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
1542 regmatch_t *prev_idx_match, int cur_node, int cur_idx, int nmatch)
81c64d40
UD
1543{
1544 int type = dfa->nodes[cur_node].type;
81c64d40
UD
1545 if (type == OP_OPEN_SUBEXP)
1546 {
6b6557e8
UD
1547 int reg_num = dfa->nodes[cur_node].opr.idx + 1;
1548
81c64d40 1549 /* We are at the first node of this sub expression. */
6b6557e8
UD
1550 if (reg_num < nmatch)
1551 {
1552 pmatch[reg_num].rm_so = cur_idx;
1553 pmatch[reg_num].rm_eo = -1;
1554 }
81c64d40
UD
1555 }
1556 else if (type == OP_CLOSE_SUBEXP)
6b6557e8
UD
1557 {
1558 int reg_num = dfa->nodes[cur_node].opr.idx + 1;
1559 if (reg_num < nmatch)
1560 {
1561 /* We are at the last node of this sub expression. */
1562 if (pmatch[reg_num].rm_so < cur_idx)
1563 {
1564 pmatch[reg_num].rm_eo = cur_idx;
1565 /* This is a non-empty match or we are not inside an optional
1566 subexpression. Accept this right away. */
1567 memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1568 }
1569 else
1570 {
1571 if (dfa->nodes[cur_node].opt_subexp
1572 && prev_idx_match[reg_num].rm_so != -1)
1573 /* We transited through an empty match for an optional
1574 subexpression, like (a?)*, and this is not the subexp's
1575 first match. Copy back the old content of the registers
1576 so that matches of an inner subexpression are undone as
1577 well, like in ((a?))*. */
1578 memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
1579 else
1580 /* We completed a subexpression, but it may be part of
1581 an optional one, so do not update PREV_IDX_MATCH. */
1582 pmatch[reg_num].rm_eo = cur_idx;
1583 }
1584 }
1585 }
0742e48e 1586}
81c64d40 1587
0742e48e 1588/* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
612546c6
UD
1589 and sift the nodes in each states according to the following rules.
1590 Updated state_log will be wrote to STATE_LOG.
3b0bdc72
UD
1591
1592 Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
1593 1. When STR_IDX == MATCH_LAST(the last index in the state_log):
15a7d175
UD
1594 If `a' isn't the LAST_NODE and `a' can't epsilon transit to
1595 the LAST_NODE, we throw away the node `a'.
612546c6 1596 2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
15a7d175
UD
1597 string `s' and transit to `b':
1598 i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1599 away the node `a'.
1600 ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
bb3f4825 1601 thrown away, we throw away the node `a'.
d0b96fc4 1602 3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
15a7d175
UD
1603 i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1604 node `a'.
bb3f4825 1605 ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
15a7d175 1606 we throw away the node `a'. */
3b0bdc72
UD
1607
1608#define STATE_NODE_CONTAINS(state,node) \
1609 ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1610
a9388965 1611static reg_errcode_t
1ba81cea 1612internal_function
e2f55264 1613sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
3b0bdc72 1614{
a9388965 1615 reg_errcode_t err;
0742e48e
UD
1616 int null_cnt = 0;
1617 int str_idx = sctx->last_str_idx;
1618 re_node_set cur_dest;
3b0bdc72
UD
1619
1620#ifdef DEBUG
612546c6 1621 assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
3b0bdc72 1622#endif
3b0bdc72
UD
1623
1624 /* Build sifted state_log[str_idx]. It has the nodes which can epsilon
1625 transit to the last_node and the last_node itself. */
0742e48e 1626 err = re_node_set_init_1 (&cur_dest, sctx->last_node);
bc15410e 1627 if (BE (err != REG_NOERROR, 0))
a9388965 1628 return err;
e3a87852 1629 err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
0742e48e 1630 if (BE (err != REG_NOERROR, 0))
1b2c2628 1631 goto free_return;
3b0bdc72
UD
1632
1633 /* Then check each states in the state_log. */
612546c6 1634 while (str_idx > 0)
3b0bdc72 1635 {
3b0bdc72 1636 /* Update counters. */
0742e48e
UD
1637 null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
1638 if (null_cnt > mctx->max_mb_elem_len)
15a7d175
UD
1639 {
1640 memset (sctx->sifted_states, '\0',
1641 sizeof (re_dfastate_t *) * str_idx);
1642 re_node_set_free (&cur_dest);
1643 return REG_NOERROR;
1644 }
0742e48e 1645 re_node_set_empty (&cur_dest);
3b0bdc72 1646 --str_idx;
15a7d175 1647
e40a38b3
UD
1648 if (mctx->state_log[str_idx])
1649 {
1650 err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
2da42bc0 1651 if (BE (err != REG_NOERROR, 0))
e40a38b3 1652 goto free_return;
15a7d175 1653 }
3b0bdc72 1654
0742e48e 1655 /* Add all the nodes which satisfy the following conditions:
15a7d175
UD
1656 - It can epsilon transit to a node in CUR_DEST.
1657 - It is in CUR_SRC.
1658 And update state_log. */
e3a87852 1659 err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
0742e48e 1660 if (BE (err != REG_NOERROR, 0))
15a7d175 1661 goto free_return;
3b0bdc72 1662 }
1b2c2628
UD
1663 err = REG_NOERROR;
1664 free_return:
0742e48e 1665 re_node_set_free (&cur_dest);
1b2c2628 1666 return err;
3b0bdc72
UD
1667}
1668
e40a38b3 1669static reg_errcode_t
2da42bc0 1670internal_function __attribute_warn_unused_result__
e2f55264
UD
1671build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
1672 int str_idx, re_node_set *cur_dest)
e40a38b3 1673{
76b864c8
UD
1674 const re_dfa_t *const dfa = mctx->dfa;
1675 const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
e40a38b3
UD
1676 int i;
1677
1678 /* Then build the next sifted state.
1679 We build the next sifted state on `cur_dest', and update
1680 `sifted_states[str_idx]' with `cur_dest'.
1681 Note:
1682 `cur_dest' is the sifted state from `state_log[str_idx + 1]'.
5cf1ec52
UD
1683 `cur_src' points the node_set of the old `state_log[str_idx]'
1684 (with the epsilon nodes pre-filtered out). */
e40a38b3
UD
1685 for (i = 0; i < cur_src->nelem; i++)
1686 {
1687 int prev_node = cur_src->elems[i];
1688 int naccepted = 0;
e40a38b3
UD
1689 int ret;
1690
a334319f 1691#ifdef DEBUG
02f3550c 1692 re_token_type_t type = dfa->nodes[prev_node].type;
5cf1ec52
UD
1693 assert (!IS_EPSILON_NODE (type));
1694#endif
e40a38b3
UD
1695#ifdef RE_ENABLE_I18N
1696 /* If the node may accept `multi byte'. */
02f3550c 1697 if (dfa->nodes[prev_node].accept_mb)
e40a38b3
UD
1698 naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
1699 str_idx, sctx->last_str_idx);
1700#endif /* RE_ENABLE_I18N */
1701
1702 /* We don't check backreferences here.
1703 See update_cur_sifted_state(). */
1704 if (!naccepted
1705 && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
1706 && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
1707 dfa->nexts[prev_node]))
1708 naccepted = 1;
1709
1710 if (naccepted == 0)
1711 continue;
1712
1713 if (sctx->limits.nelem)
1714 {
1715 int to_idx = str_idx + naccepted;
1716 if (check_dst_limits (mctx, &sctx->limits,
1717 dfa->nexts[prev_node], to_idx,
1718 prev_node, str_idx))
1719 continue;
1720 }
1721 ret = re_node_set_insert (cur_dest, prev_node);
1722 if (BE (ret == -1, 0))
1723 return REG_ESPACE;
1724 }
1725
1726 return REG_NOERROR;
1727}
1728
3b0bdc72
UD
1729/* Helper functions. */
1730
25337753 1731static reg_errcode_t
e2f55264
UD
1732internal_function
1733clean_state_log_if_needed (re_match_context_t *mctx, int next_state_log_idx)
3b0bdc72
UD
1734{
1735 int top = mctx->state_log_top;
612546c6 1736
8887a920
UD
1737 if ((next_state_log_idx >= mctx->input.bufs_len
1738 && mctx->input.bufs_len < mctx->input.len)
56b168be
UD
1739 || (next_state_log_idx >= mctx->input.valid_len
1740 && mctx->input.valid_len < mctx->input.len))
612546c6
UD
1741 {
1742 reg_errcode_t err;
a445af0b 1743 err = extend_buffers (mctx, next_state_log_idx + 1);
612546c6 1744 if (BE (err != REG_NOERROR, 0))
15a7d175 1745 return err;
612546c6
UD
1746 }
1747
3b0bdc72
UD
1748 if (top < next_state_log_idx)
1749 {
612546c6 1750 memset (mctx->state_log + top + 1, '\0',
15a7d175 1751 sizeof (re_dfastate_t *) * (next_state_log_idx - top));
3b0bdc72
UD
1752 mctx->state_log_top = next_state_log_idx;
1753 }
612546c6 1754 return REG_NOERROR;
3b0bdc72
UD
1755}
1756
1b2c2628 1757static reg_errcode_t
1ba81cea 1758internal_function
e2f55264
UD
1759merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,
1760 re_dfastate_t **src, int num)
3b0bdc72 1761{
0742e48e
UD
1762 int st_idx;
1763 reg_errcode_t err;
1764 for (st_idx = 0; st_idx < num; ++st_idx)
1765 {
1766 if (dst[st_idx] == NULL)
15a7d175 1767 dst[st_idx] = src[st_idx];
0742e48e 1768 else if (src[st_idx] != NULL)
15a7d175
UD
1769 {
1770 re_node_set merged_set;
1771 err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
1772 &src[st_idx]->nodes);
1773 if (BE (err != REG_NOERROR, 0))
1774 return err;
1775 dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
1776 re_node_set_free (&merged_set);
1777 if (BE (err != REG_NOERROR, 0))
1778 return err;
1779 }
0742e48e
UD
1780 }
1781 return REG_NOERROR;
3b0bdc72
UD
1782}
1783
0742e48e 1784static reg_errcode_t
1ba81cea 1785internal_function
e2f55264
UD
1786update_cur_sifted_state (const re_match_context_t *mctx,
1787 re_sift_context_t *sctx, int str_idx,
1788 re_node_set *dest_nodes)
3b0bdc72 1789{
76b864c8 1790 const re_dfa_t *const dfa = mctx->dfa;
c0c9615c 1791 reg_errcode_t err = REG_NOERROR;
0742e48e 1792 const re_node_set *candidates;
e40a38b3 1793 candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
15a7d175 1794 : &mctx->state_log[str_idx]->nodes);
0742e48e 1795
e40a38b3
UD
1796 if (dest_nodes->nelem == 0)
1797 sctx->sifted_states[str_idx] = NULL;
1798 else
485d775d 1799 {
e40a38b3
UD
1800 if (candidates)
1801 {
1802 /* At first, add the nodes which can epsilon transit to a node in
1803 DEST_NODE. */
1804 err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1805 if (BE (err != REG_NOERROR, 0))
1806 return err;
0742e48e 1807
e40a38b3
UD
1808 /* Then, check the limitations in the current sift_context. */
1809 if (sctx->limits.nelem)
1810 {
1811 err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
1812 mctx->bkref_ents, str_idx);
1813 if (BE (err != REG_NOERROR, 0))
1814 return err;
1815 }
1816 }
1817
1818 sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
0742e48e 1819 if (BE (err != REG_NOERROR, 0))
15a7d175 1820 return err;
0742e48e
UD
1821 }
1822
e40a38b3 1823 if (candidates && mctx->state_log[str_idx]->has_backref)
0742e48e 1824 {
d2c38eb3 1825 err = sift_states_bkref (mctx, sctx, str_idx, candidates);
0742e48e 1826 if (BE (err != REG_NOERROR, 0))
15a7d175 1827 return err;
0742e48e
UD
1828 }
1829 return REG_NOERROR;
3b0bdc72
UD
1830}
1831
a9388965 1832static reg_errcode_t
2da42bc0 1833internal_function __attribute_warn_unused_result__
e2f55264
UD
1834add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
1835 const re_node_set *candidates)
3b0bdc72 1836{
5cf1ec52
UD
1837 reg_errcode_t err = REG_NOERROR;
1838 int i;
0742e48e 1839
5cf1ec52 1840 re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
0742e48e
UD
1841 if (BE (err != REG_NOERROR, 0))
1842 return err;
5cf1ec52
UD
1843
1844 if (!state->inveclosure.alloc)
3b0bdc72 1845 {
5cf1ec52 1846 err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
0742e48e 1847 if (BE (err != REG_NOERROR, 0))
2da42bc0 1848 return REG_ESPACE;
5cf1ec52 1849 for (i = 0; i < dest_nodes->nelem; i++)
2da42bc0
UD
1850 {
1851 err = re_node_set_merge (&state->inveclosure,
1852 dfa->inveclosures + dest_nodes->elems[i]);
1853 if (BE (err != REG_NOERROR, 0))
1854 return REG_ESPACE;
1855 }
0742e48e 1856 }
5cf1ec52
UD
1857 return re_node_set_add_intersect (dest_nodes, candidates,
1858 &state->inveclosure);
0742e48e 1859}
3b0bdc72 1860
0742e48e 1861static reg_errcode_t
e2f55264
UD
1862internal_function
1863sub_epsilon_src_nodes (const re_dfa_t *dfa, int node, re_node_set *dest_nodes,
1864 const re_node_set *candidates)
0742e48e
UD
1865{
1866 int ecl_idx;
1867 reg_errcode_t err;
1868 re_node_set *inv_eclosure = dfa->inveclosures + node;
1869 re_node_set except_nodes;
1870 re_node_set_init_empty (&except_nodes);
1871 for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1872 {
15a7d175
UD
1873 int cur_node = inv_eclosure->elems[ecl_idx];
1874 if (cur_node == node)
1875 continue;
1876 if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
1877 {
1878 int edst1 = dfa->edests[cur_node].elems[0];
1879 int edst2 = ((dfa->edests[cur_node].nelem > 1)
1880 ? dfa->edests[cur_node].elems[1] : -1);
1881 if ((!re_node_set_contains (inv_eclosure, edst1)
1882 && re_node_set_contains (dest_nodes, edst1))
1883 || (edst2 > 0
1884 && !re_node_set_contains (inv_eclosure, edst2)
1885 && re_node_set_contains (dest_nodes, edst2)))
1886 {
1887 err = re_node_set_add_intersect (&except_nodes, candidates,
1888 dfa->inveclosures + cur_node);
1889 if (BE (err != REG_NOERROR, 0))
1890 {
1891 re_node_set_free (&except_nodes);
1892 return err;
1893 }
1894 }
1895 }
0742e48e
UD
1896 }
1897 for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1898 {
15a7d175
UD
1899 int cur_node = inv_eclosure->elems[ecl_idx];
1900 if (!re_node_set_contains (&except_nodes, cur_node))
1901 {
1902 int idx = re_node_set_contains (dest_nodes, cur_node) - 1;
1903 re_node_set_remove_at (dest_nodes, idx);
1904 }
0742e48e
UD
1905 }
1906 re_node_set_free (&except_nodes);
1907 return REG_NOERROR;
1908}
1909
a3022b82 1910static int
1ba81cea 1911internal_function
e2f55264
UD
1912check_dst_limits (const re_match_context_t *mctx, re_node_set *limits,
1913 int dst_node, int dst_idx, int src_node, int src_idx)
a3022b82 1914{
76b864c8 1915 const re_dfa_t *const dfa = mctx->dfa;
a3022b82
UD
1916 int lim_idx, src_pos, dst_pos;
1917
e40a38b3
UD
1918 int dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
1919 int src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
a3022b82
UD
1920 for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1921 {
485d775d 1922 int subexp_idx;
a3022b82
UD
1923 struct re_backref_cache_entry *ent;
1924 ent = mctx->bkref_ents + limits->elems[lim_idx];
d8f73de8 1925 subexp_idx = dfa->nodes[ent->node].opr.idx;
a3022b82 1926
e3a87852 1927 dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
e40a38b3
UD
1928 subexp_idx, dst_node, dst_idx,
1929 dst_bkref_idx);
e3a87852 1930 src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
e40a38b3
UD
1931 subexp_idx, src_node, src_idx,
1932 src_bkref_idx);
a3022b82
UD
1933
1934 /* In case of:
15a7d175
UD
1935 <src> <dst> ( <subexp> )
1936 ( <subexp> ) <src> <dst>
1937 ( <subexp1> <src> <subexp2> <dst> <subexp3> ) */
a3022b82 1938 if (src_pos == dst_pos)
15a7d175 1939 continue; /* This is unrelated limitation. */
a3022b82 1940 else
15a7d175 1941 return 1;
a3022b82
UD
1942 }
1943 return 0;
1944}
1945
1946static int
1ba81cea 1947internal_function
e2f55264
UD
1948check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
1949 int subexp_idx, int from_node, int bkref_idx)
a3022b82 1950{
76b864c8
UD
1951 const re_dfa_t *const dfa = mctx->dfa;
1952 const re_node_set *eclosures = dfa->eclosures + from_node;
1cef7b3c 1953 int node_idx;
d0b96fc4 1954
d0b96fc4
UD
1955 /* Else, we are on the boundary: examine the nodes on the epsilon
1956 closure. */
1cef7b3c
UD
1957 for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
1958 {
1959 int node = eclosures->elems[node_idx];
d0b96fc4
UD
1960 switch (dfa->nodes[node].type)
1961 {
1962 case OP_BACK_REF:
d8f73de8
UD
1963 if (bkref_idx != -1)
1964 {
1965 struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
1966 do
2da42bc0 1967 {
d8f73de8 1968 int dst, cpos;
d0b96fc4 1969
d8f73de8
UD
1970 if (ent->node != node)
1971 continue;
d0b96fc4 1972
2c05d33f
UD
1973 if (subexp_idx < BITSET_WORD_BITS
1974 && !(ent->eps_reachable_subexps_map
1975 & ((bitset_word_t) 1 << subexp_idx)))
d8f73de8 1976 continue;
d0b96fc4 1977
d8f73de8
UD
1978 /* Recurse trying to reach the OP_OPEN_SUBEXP and
1979 OP_CLOSE_SUBEXP cases below. But, if the
1980 destination node is the same node as the source
1981 node, don't recurse because it would cause an
1982 infinite loop: a regex that exhibits this behavior
1983 is ()\1*\1* */
1984 dst = dfa->edests[node].elems[0];
1985 if (dst == from_node)
1986 {
1987 if (boundaries & 1)
2da42bc0 1988 return -1;
d8f73de8 1989 else /* if (boundaries & 2) */
2da42bc0 1990 return 0;
d8f73de8 1991 }
7db61208 1992
d8f73de8
UD
1993 cpos =
1994 check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
1995 dst, bkref_idx);
1996 if (cpos == -1 /* && (boundaries & 1) */)
1997 return -1;
1998 if (cpos == 0 && (boundaries & 2))
1999 return 0;
2000
2c05d33f
UD
2001 if (subexp_idx < BITSET_WORD_BITS)
2002 ent->eps_reachable_subexps_map
2003 &= ~((bitset_word_t) 1 << subexp_idx);
2da42bc0 2004 }
d8f73de8
UD
2005 while (ent++->more);
2006 }
2007 break;
f0c7c524 2008
d0b96fc4 2009 case OP_OPEN_SUBEXP:
e40a38b3 2010 if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
d0b96fc4
UD
2011 return -1;
2012 break;
f0c7c524 2013
d0b96fc4 2014 case OP_CLOSE_SUBEXP:
e40a38b3 2015 if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
d0b96fc4
UD
2016 return 0;
2017 break;
2018
2019 default:
15a7d175
UD
2020 break;
2021 }
a3022b82 2022 }
d0b96fc4 2023
e40a38b3
UD
2024 return (boundaries & 2) ? 1 : 0;
2025}
2026
2027static int
1ba81cea 2028internal_function
e2f55264
UD
2029check_dst_limits_calc_pos (const re_match_context_t *mctx, int limit,
2030 int subexp_idx, int from_node, int str_idx,
2031 int bkref_idx)
e40a38b3
UD
2032{
2033 struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
2034 int boundaries;
2035
2036 /* If we are outside the range of the subexpression, return -1 or 1. */
2037 if (str_idx < lim->subexp_from)
2038 return -1;
2039
2040 if (lim->subexp_to < str_idx)
d0b96fc4 2041 return 1;
e40a38b3
UD
2042
2043 /* If we are within the subexpression, return 0. */
2044 boundaries = (str_idx == lim->subexp_from);
2045 boundaries |= (str_idx == lim->subexp_to) << 1;
2046 if (boundaries == 0)
d0b96fc4 2047 return 0;
e40a38b3
UD
2048
2049 /* Else, examine epsilon closure. */
2050 return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
2051 from_node, bkref_idx);
a3022b82
UD
2052}
2053
0742e48e
UD
2054/* Check the limitations of sub expressions LIMITS, and remove the nodes
2055 which are against limitations from DEST_NODES. */
2056
2057static reg_errcode_t
1ba81cea 2058internal_function
e2f55264
UD
2059check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
2060 const re_node_set *candidates, re_node_set *limits,
2061 struct re_backref_cache_entry *bkref_ents, int str_idx)
0742e48e
UD
2062{
2063 reg_errcode_t err;
2064 int node_idx, lim_idx;
2065
2066 for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
2067 {
485d775d 2068 int subexp_idx;
0742e48e
UD
2069 struct re_backref_cache_entry *ent;
2070 ent = bkref_ents + limits->elems[lim_idx];
2071
2072 if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
15a7d175 2073 continue; /* This is unrelated limitation. */
0742e48e 2074
d8f73de8 2075 subexp_idx = dfa->nodes[ent->node].opr.idx;
0742e48e 2076 if (ent->subexp_to == str_idx)
15a7d175
UD
2077 {
2078 int ops_node = -1;
2079 int cls_node = -1;
2080 for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2081 {
2082 int node = dest_nodes->elems[node_idx];
46bf9de7 2083 re_token_type_t type = dfa->nodes[node].type;
15a7d175
UD
2084 if (type == OP_OPEN_SUBEXP
2085 && subexp_idx == dfa->nodes[node].opr.idx)
2086 ops_node = node;
2087 else if (type == OP_CLOSE_SUBEXP
2088 && subexp_idx == dfa->nodes[node].opr.idx)
2089 cls_node = node;
2090 }
2091
2092 /* Check the limitation of the open subexpression. */
2093 /* Note that (ent->subexp_to = str_idx != ent->subexp_from). */
2094 if (ops_node >= 0)
2095 {
46bf9de7
UD
2096 err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
2097 candidates);
15a7d175
UD
2098 if (BE (err != REG_NOERROR, 0))
2099 return err;
2100 }
46bf9de7 2101
15a7d175 2102 /* Check the limitation of the close subexpression. */
46bf9de7
UD
2103 if (cls_node >= 0)
2104 for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2105 {
2106 int node = dest_nodes->elems[node_idx];
2107 if (!re_node_set_contains (dfa->inveclosures + node,
2108 cls_node)
2109 && !re_node_set_contains (dfa->eclosures + node,
2110 cls_node))
2111 {
2112 /* It is against this limitation.
2113 Remove it form the current sifted state. */
2114 err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2115 candidates);
2116 if (BE (err != REG_NOERROR, 0))
2117 return err;
2118 --node_idx;
2119 }
2120 }
15a7d175 2121 }
0742e48e 2122 else /* (ent->subexp_to != str_idx) */
15a7d175
UD
2123 {
2124 for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2125 {
2126 int node = dest_nodes->elems[node_idx];
46bf9de7 2127 re_token_type_t type = dfa->nodes[node].type;
15a7d175
UD
2128 if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
2129 {
2130 if (subexp_idx != dfa->nodes[node].opr.idx)
2131 continue;
d8f73de8
UD
2132 /* It is against this limitation.
2133 Remove it form the current sifted state. */
2134 err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2135 candidates);
2136 if (BE (err != REG_NOERROR, 0))
2137 return err;
15a7d175
UD
2138 }
2139 }
2140 }
0742e48e
UD
2141 }
2142 return REG_NOERROR;
2143}
2144
0742e48e 2145static reg_errcode_t
2da42bc0 2146internal_function __attribute_warn_unused_result__
e2f55264
UD
2147sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
2148 int str_idx, const re_node_set *candidates)
0742e48e 2149{
76b864c8 2150 const re_dfa_t *const dfa = mctx->dfa;
0742e48e 2151 reg_errcode_t err;
0742e48e
UD
2152 int node_idx, node;
2153 re_sift_context_t local_sctx;
e40a38b3
UD
2154 int first_idx = search_cur_bkref_entry (mctx, str_idx);
2155
2156 if (first_idx == -1)
2157 return REG_NOERROR;
2158
0742e48e
UD
2159 local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized. */
2160
2161 for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
2162 {
e40a38b3 2163 int enabled_idx;
0742e48e 2164 re_token_type_t type;
e40a38b3 2165 struct re_backref_cache_entry *entry;
0742e48e
UD
2166 node = candidates->elems[node_idx];
2167 type = dfa->nodes[node].type;
0742e48e
UD
2168 /* Avoid infinite loop for the REs like "()\1+". */
2169 if (node == sctx->last_node && str_idx == sctx->last_str_idx)
15a7d175 2170 continue;
e40a38b3
UD
2171 if (type != OP_BACK_REF)
2172 continue;
2173
2174 entry = mctx->bkref_ents + first_idx;
2175 enabled_idx = first_idx;
2176 do
15a7d175 2177 {
2d87db5b
UD
2178 int subexp_len;
2179 int to_idx;
2180 int dst_node;
2181 int ret;
e40a38b3
UD
2182 re_dfastate_t *cur_state;
2183
2184 if (entry->node != node)
2185 continue;
2186 subexp_len = entry->subexp_to - entry->subexp_from;
2187 to_idx = str_idx + subexp_len;
2188 dst_node = (subexp_len ? dfa->nexts[node]
2189 : dfa->edests[node].elems[0]);
2190
2191 if (to_idx > sctx->last_str_idx
2192 || sctx->sifted_states[to_idx] == NULL
2193 || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
2194 || check_dst_limits (mctx, &sctx->limits, node,
2195 str_idx, dst_node, to_idx))
2196 continue;
2197
2198 if (local_sctx.sifted_states == NULL)
15a7d175 2199 {
e40a38b3
UD
2200 local_sctx = *sctx;
2201 err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
2202 if (BE (err != REG_NOERROR, 0))
2203 goto free_return;
2204 }
2205 local_sctx.last_node = node;
2206 local_sctx.last_str_idx = str_idx;
2d87db5b
UD
2207 ret = re_node_set_insert (&local_sctx.limits, enabled_idx);
2208 if (BE (ret < 0, 0))
e40a38b3
UD
2209 {
2210 err = REG_ESPACE;
2211 goto free_return;
15a7d175 2212 }
e40a38b3
UD
2213 cur_state = local_sctx.sifted_states[str_idx];
2214 err = sift_states_backward (mctx, &local_sctx);
2215 if (BE (err != REG_NOERROR, 0))
2216 goto free_return;
2217 if (sctx->limited_states != NULL)
2218 {
2219 err = merge_state_array (dfa, sctx->limited_states,
2220 local_sctx.sifted_states,
2221 str_idx + 1);
2222 if (BE (err != REG_NOERROR, 0))
2223 goto free_return;
2224 }
2225 local_sctx.sifted_states[str_idx] = cur_state;
2226 re_node_set_remove (&local_sctx.limits, enabled_idx);
2227
2228 /* mctx->bkref_ents may have changed, reload the pointer. */
2da42bc0 2229 entry = mctx->bkref_ents + enabled_idx;
15a7d175 2230 }
e40a38b3 2231 while (enabled_idx++, entry++->more);
0742e48e 2232 }
1b2c2628
UD
2233 err = REG_NOERROR;
2234 free_return:
0742e48e
UD
2235 if (local_sctx.sifted_states != NULL)
2236 {
0742e48e
UD
2237 re_node_set_free (&local_sctx.limits);
2238 }
2239
1b2c2628 2240 return err;
0742e48e
UD
2241}
2242
2243
2244#ifdef RE_ENABLE_I18N
2245static int
1ba81cea 2246internal_function
e2f55264
UD
2247sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
2248 int node_idx, int str_idx, int max_str_idx)
0742e48e 2249{
c42b4152 2250 const re_dfa_t *const dfa = mctx->dfa;
0742e48e
UD
2251 int naccepted;
2252 /* Check the node can accept `multi byte'. */
56b168be 2253 naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
0742e48e
UD
2254 if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
2255 !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
15a7d175 2256 dfa->nexts[node_idx]))
0742e48e 2257 /* The node can't accept the `multi byte', or the
bb3f4825 2258 destination was already thrown away, then the node
0742e48e
UD
2259 could't accept the current input `multi byte'. */
2260 naccepted = 0;
2261 /* Otherwise, it is sure that the node could accept
2262 `naccepted' bytes input. */
2263 return naccepted;
2264}
2265#endif /* RE_ENABLE_I18N */
2266
3b0bdc72
UD
2267\f
2268/* Functions for state transition. */
2269
2270/* Return the next state to which the current state STATE will transit by
2271 accepting the current input byte, and update STATE_LOG if necessary.
2272 If STATE can accept a multibyte char/collating element/back reference
2273 update the destination of STATE_LOG. */
2274
2275static re_dfastate_t *
2da42bc0 2276internal_function __attribute_warn_unused_result__
e2f55264
UD
2277transit_state (reg_errcode_t *err, re_match_context_t *mctx,
2278 re_dfastate_t *state)
3b0bdc72 2279{
58845a70 2280 re_dfastate_t **trtable;
3b0bdc72
UD
2281 unsigned char ch;
2282
bb677c95
UD
2283#ifdef RE_ENABLE_I18N
2284 /* If the current state can accept multibyte. */
2285 if (BE (state->accept_mb, 0))
612546c6 2286 {
bb677c95 2287 *err = transit_state_mb (mctx, state);
612546c6 2288 if (BE (*err != REG_NOERROR, 0))
15a7d175 2289 return NULL;
612546c6 2290 }
434d3784 2291#endif /* RE_ENABLE_I18N */
3b0bdc72 2292
4c595adb 2293 /* Then decide the next state with the single byte. */
ab4b89fe
UD
2294#if 0
2295 if (0)
2296 /* don't use transition table */
2297 return transit_state_sb (err, mctx, state);
2298#endif
2299
2300 /* Use transition table */
2301 ch = re_string_fetch_byte (&mctx->input);
2302 for (;;)
4c595adb 2303 {
4c595adb 2304 trtable = state->trtable;
ab4b89fe
UD
2305 if (BE (trtable != NULL, 1))
2306 return trtable[ch];
2307
2308 trtable = state->word_trtable;
2309 if (BE (trtable != NULL, 1))
2da42bc0 2310 {
4c595adb
UD
2311 unsigned int context;
2312 context
2313 = re_string_context_at (&mctx->input,
2314 re_string_cur_idx (&mctx->input) - 1,
2315 mctx->eflags);
2316 if (IS_WORD_CONTEXT (context))
2317 return trtable[ch + SBC_MAX];
c13c99fa 2318 else
4c595adb 2319 return trtable[ch];
15a7d175 2320 }
ab4b89fe
UD
2321
2322 if (!build_trtable (mctx->dfa, state))
2323 {
2324 *err = REG_ESPACE;
2325 return NULL;
2326 }
2327
2328 /* Retry, we now have a transition table. */
3b0bdc72 2329 }
4c595adb 2330}
3b0bdc72 2331
4c595adb 2332/* Update the state_log if we need */
58845a70 2333re_dfastate_t *
1ba81cea 2334internal_function
e2f55264
UD
2335merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
2336 re_dfastate_t *next_state)
58845a70 2337{
c42b4152 2338 const re_dfa_t *const dfa = mctx->dfa;
4c595adb
UD
2339 int cur_idx = re_string_cur_idx (&mctx->input);
2340
2341 if (cur_idx > mctx->state_log_top)
3b0bdc72 2342 {
4c595adb
UD
2343 mctx->state_log[cur_idx] = next_state;
2344 mctx->state_log_top = cur_idx;
2345 }
2346 else if (mctx->state_log[cur_idx] == 0)
2347 {
2348 mctx->state_log[cur_idx] = next_state;
2349 }
2350 else
2351 {
2352 re_dfastate_t *pstate;
2353 unsigned int context;
2354 re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
2355 /* If (state_log[cur_idx] != 0), it implies that cur_idx is
2da42bc0
UD
2356 the destination of a multibyte char/collating element/
2357 back reference. Then the next state is the union set of
2358 these destinations and the results of the transition table. */
4c595adb
UD
2359 pstate = mctx->state_log[cur_idx];
2360 log_nodes = pstate->entrance_nodes;
2361 if (next_state != NULL)
2da42bc0
UD
2362 {
2363 table_nodes = next_state->entrance_nodes;
2364 *err = re_node_set_init_union (&next_nodes, table_nodes,
15a7d175 2365 log_nodes);
2da42bc0 2366 if (BE (*err != REG_NOERROR, 0))
4c595adb 2367 return NULL;
2da42bc0 2368 }
4c595adb 2369 else
2da42bc0 2370 next_nodes = *log_nodes;
4c595adb
UD
2371 /* Note: We already add the nodes of the initial state,
2372 then we don't need to add them here. */
2373
2374 context = re_string_context_at (&mctx->input,
2375 re_string_cur_idx (&mctx->input) - 1,
2376 mctx->eflags);
2377 next_state = mctx->state_log[cur_idx]
2da42bc0 2378 = re_acquire_state_context (err, dfa, &next_nodes, context);
4c595adb 2379 /* We don't need to check errors here, since the return value of
2da42bc0 2380 this function is next_state and ERR is already set. */
4c595adb
UD
2381
2382 if (table_nodes != NULL)
2da42bc0 2383 re_node_set_free (&next_nodes);
6291ee3c
UD
2384 }
2385
bb3f4825 2386 if (BE (dfa->nbackref, 0) && next_state != NULL)
6291ee3c 2387 {
bb3f4825
UD
2388 /* Check OP_OPEN_SUBEXP in the current state in case that we use them
2389 later. We must check them here, since the back references in the
2390 next state might use them. */
e3a87852 2391 *err = check_subexp_matching_top (mctx, &next_state->nodes,
6291ee3c
UD
2392 cur_idx);
2393 if (BE (*err != REG_NOERROR, 0))
2394 return NULL;
6291ee3c 2395
bb3f4825
UD
2396 /* If the next state has back references. */
2397 if (next_state->has_backref)
2398 {
e3a87852 2399 *err = transit_state_bkref (mctx, &next_state->nodes);
bb3f4825
UD
2400 if (BE (*err != REG_NOERROR, 0))
2401 return NULL;
2402 next_state = mctx->state_log[cur_idx];
2403 }
3b0bdc72 2404 }
4c595adb 2405
3b0bdc72
UD
2406 return next_state;
2407}
2408
4c595adb
UD
2409/* Skip bytes in the input that correspond to part of a
2410 multi-byte match, then look in the log for a state
2411 from which to restart matching. */
2412re_dfastate_t *
1ba81cea 2413internal_function
e2f55264 2414find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
4c595adb 2415{
1878e9af 2416 re_dfastate_t *cur_state;
4c595adb
UD
2417 do
2418 {
2419 int max = mctx->state_log_top;
2420 int cur_str_idx = re_string_cur_idx (&mctx->input);
2421
2422 do
2423 {
2da42bc0
UD
2424 if (++cur_str_idx > max)
2425 return NULL;
2426 re_string_skip_bytes (&mctx->input, 1);
4c595adb
UD
2427 }
2428 while (mctx->state_log[cur_str_idx] == NULL);
2429
2430 cur_state = merge_state_with_log (err, mctx, NULL);
2431 }
2d87db5b 2432 while (*err == REG_NOERROR && cur_state == NULL);
4c595adb
UD
2433 return cur_state;
2434}
2435
3b0bdc72
UD
2436/* Helper functions for transit_state. */
2437
6291ee3c
UD
2438/* From the node set CUR_NODES, pick up the nodes whose types are
2439 OP_OPEN_SUBEXP and which have corresponding back references in the regular
2440 expression. And register them to use them later for evaluating the
2441 correspoding back references. */
2442
2443static reg_errcode_t
1ba81cea 2444internal_function
e2f55264
UD
2445check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
2446 int str_idx)
6291ee3c 2447{
76b864c8 2448 const re_dfa_t *const dfa = mctx->dfa;
6291ee3c
UD
2449 int node_idx;
2450 reg_errcode_t err;
2451
2452 /* TODO: This isn't efficient.
2453 Because there might be more than one nodes whose types are
2454 OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2455 nodes.
2456 E.g. RE: (a){2} */
2457 for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
2458 {
2459 int node = cur_nodes->elems[node_idx];
2460 if (dfa->nodes[node].type == OP_OPEN_SUBEXP
2c05d33f
UD
2461 && dfa->nodes[node].opr.idx < BITSET_WORD_BITS
2462 && (dfa->used_bkref_map
2463 & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))
6291ee3c
UD
2464 {
2465 err = match_ctx_add_subtop (mctx, node, str_idx);
2466 if (BE (err != REG_NOERROR, 0))
2467 return err;
2468 }
2469 }
2470 return REG_NOERROR;
2471}
2472
c13c99fa 2473#if 0
3b0bdc72
UD
2474/* Return the next state to which the current state STATE will transit by
2475 accepting the current input byte. */
2476
2477static re_dfastate_t *
e2f55264
UD
2478transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
2479 re_dfastate_t *state)
3b0bdc72 2480{
c42b4152 2481 const re_dfa_t *const dfa = mctx->dfa;
3b0bdc72
UD
2482 re_node_set next_nodes;
2483 re_dfastate_t *next_state;
56b168be 2484 int node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
3b0bdc72
UD
2485 unsigned int context;
2486
a9388965 2487 *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
bc15410e 2488 if (BE (*err != REG_NOERROR, 0))
a9388965 2489 return NULL;
3b0bdc72
UD
2490 for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
2491 {
2492 int cur_node = state->nodes.elems[node_cnt];
e3a87852 2493 if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
15a7d175
UD
2494 {
2495 *err = re_node_set_merge (&next_nodes,
2496 dfa->eclosures + dfa->nexts[cur_node]);
2497 if (BE (*err != REG_NOERROR, 0))
2498 {
2499 re_node_set_free (&next_nodes);
2500 return NULL;
2501 }
2502 }
3b0bdc72 2503 }
56b168be 2504 context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
a9388965
UD
2505 next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
2506 /* We don't need to check errors here, since the return value of
2507 this function is next_state and ERR is already set. */
2508
3b0bdc72 2509 re_node_set_free (&next_nodes);
56b168be 2510 re_string_skip_bytes (&mctx->input, 1);
3b0bdc72
UD
2511 return next_state;
2512}
c13c99fa 2513#endif
3b0bdc72 2514
434d3784 2515#ifdef RE_ENABLE_I18N
a9388965 2516static reg_errcode_t
1ba81cea 2517internal_function
e2f55264 2518transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
3b0bdc72 2519{
c42b4152 2520 const re_dfa_t *const dfa = mctx->dfa;
a9388965 2521 reg_errcode_t err;
3b0bdc72
UD
2522 int i;
2523
2524 for (i = 0; i < pstate->nodes.nelem; ++i)
2525 {
2526 re_node_set dest_nodes, *new_nodes;
2527 int cur_node_idx = pstate->nodes.elems[i];
963d8d78 2528 int naccepted, dest_idx;
3b0bdc72
UD
2529 unsigned int context;
2530 re_dfastate_t *dest_state;
2531
963d8d78 2532 if (!dfa->nodes[cur_node_idx].accept_mb)
2da42bc0 2533 continue;
963d8d78 2534
485d775d 2535 if (dfa->nodes[cur_node_idx].constraint)
15a7d175 2536 {
56b168be
UD
2537 context = re_string_context_at (&mctx->input,
2538 re_string_cur_idx (&mctx->input),
2539 mctx->eflags);
15a7d175
UD
2540 if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
2541 context))
2542 continue;
2543 }
3b0bdc72 2544
ad7f28c2 2545 /* How many bytes the node can accept? */
963d8d78
UD
2546 naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
2547 re_string_cur_idx (&mctx->input));
3b0bdc72 2548 if (naccepted == 0)
15a7d175 2549 continue;
3b0bdc72
UD
2550
2551 /* The node can accepts `naccepted' bytes. */
56b168be 2552 dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
0742e48e 2553 mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
15a7d175 2554 : mctx->max_mb_elem_len);
7c1be3ec 2555 err = clean_state_log_if_needed (mctx, dest_idx);
612546c6 2556 if (BE (err != REG_NOERROR, 0))
15a7d175 2557 return err;
3b0bdc72
UD
2558#ifdef DEBUG
2559 assert (dfa->nexts[cur_node_idx] != -1);
2560#endif
963d8d78 2561 new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
3b0bdc72 2562
612546c6 2563 dest_state = mctx->state_log[dest_idx];
3b0bdc72 2564 if (dest_state == NULL)
15a7d175 2565 dest_nodes = *new_nodes;
3b0bdc72 2566 else
15a7d175
UD
2567 {
2568 err = re_node_set_init_union (&dest_nodes,
2569 dest_state->entrance_nodes, new_nodes);
2570 if (BE (err != REG_NOERROR, 0))
2571 return err;
2572 }
01ed6ceb
UD
2573 context = re_string_context_at (&mctx->input, dest_idx - 1,
2574 mctx->eflags);
612546c6 2575 mctx->state_log[dest_idx]
15a7d175 2576 = re_acquire_state_context (&err, dfa, &dest_nodes, context);
3b0bdc72 2577 if (dest_state != NULL)
15a7d175 2578 re_node_set_free (&dest_nodes);
1b2c2628 2579 if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
15a7d175 2580 return err;
3b0bdc72 2581 }
a9388965 2582 return REG_NOERROR;
3b0bdc72 2583}
434d3784 2584#endif /* RE_ENABLE_I18N */
3b0bdc72 2585
a9388965 2586static reg_errcode_t
1ba81cea 2587internal_function
e2f55264 2588transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
3b0bdc72 2589{
6efbd82c 2590 const re_dfa_t *const dfa = mctx->dfa;
a9388965 2591 reg_errcode_t err;
0742e48e 2592 int i;
56b168be 2593 int cur_str_idx = re_string_cur_idx (&mctx->input);
3b0bdc72
UD
2594
2595 for (i = 0; i < nodes->nelem; ++i)
2596 {
6291ee3c 2597 int dest_str_idx, prev_nelem, bkc_idx;
3b0bdc72
UD
2598 int node_idx = nodes->elems[i];
2599 unsigned int context;
fe9434bb 2600 const re_token_t *node = dfa->nodes + node_idx;
3b0bdc72
UD
2601 re_node_set *new_dest_nodes;
2602
2603 /* Check whether `node' is a backreference or not. */
6291ee3c 2604 if (node->type != OP_BACK_REF)
15a7d175 2605 continue;
485d775d
UD
2606
2607 if (node->constraint)
15a7d175 2608 {
56b168be
UD
2609 context = re_string_context_at (&mctx->input, cur_str_idx,
2610 mctx->eflags);
15a7d175
UD
2611 if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
2612 continue;
2613 }
3b0bdc72
UD
2614
2615 /* `node' is a backreference.
15a7d175 2616 Check the substring which the substring matched. */
6291ee3c 2617 bkc_idx = mctx->nbkref_ents;
e3a87852 2618 err = get_subexp (mctx, node_idx, cur_str_idx);
612546c6 2619 if (BE (err != REG_NOERROR, 0))
15a7d175 2620 goto free_return;
3b0bdc72
UD
2621
2622 /* And add the epsilon closures (which is `new_dest_nodes') of
15a7d175 2623 the backreference to appropriate state_log. */
3b0bdc72
UD
2624#ifdef DEBUG
2625 assert (dfa->nexts[node_idx] != -1);
2626#endif
6291ee3c 2627 for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
15a7d175
UD
2628 {
2629 int subexp_len;
2630 re_dfastate_t *dest_state;
2631 struct re_backref_cache_entry *bkref_ent;
2632 bkref_ent = mctx->bkref_ents + bkc_idx;
2633 if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
2634 continue;
2635 subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
2636 new_dest_nodes = (subexp_len == 0
2637 ? dfa->eclosures + dfa->edests[node_idx].elems[0]
2638 : dfa->eclosures + dfa->nexts[node_idx]);
2639 dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
2640 - bkref_ent->subexp_from);
56b168be
UD
2641 context = re_string_context_at (&mctx->input, dest_str_idx - 1,
2642 mctx->eflags);
15a7d175
UD
2643 dest_state = mctx->state_log[dest_str_idx];
2644 prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
2645 : mctx->state_log[cur_str_idx]->nodes.nelem);
2646 /* Add `new_dest_node' to state_log. */
2647 if (dest_state == NULL)
2648 {
2649 mctx->state_log[dest_str_idx]
2650 = re_acquire_state_context (&err, dfa, new_dest_nodes,
2651 context);
2652 if (BE (mctx->state_log[dest_str_idx] == NULL
2653 && err != REG_NOERROR, 0))
2654 goto free_return;
2655 }
2656 else
2657 {
2658 re_node_set dest_nodes;
2659 err = re_node_set_init_union (&dest_nodes,
2660 dest_state->entrance_nodes,
2661 new_dest_nodes);
2662 if (BE (err != REG_NOERROR, 0))
2663 {
2664 re_node_set_free (&dest_nodes);
2665 goto free_return;
2666 }
2667 mctx->state_log[dest_str_idx]
2668 = re_acquire_state_context (&err, dfa, &dest_nodes, context);
2669 re_node_set_free (&dest_nodes);
2670 if (BE (mctx->state_log[dest_str_idx] == NULL
2671 && err != REG_NOERROR, 0))
2672 goto free_return;
2673 }
2674 /* We need to check recursively if the backreference can epsilon
2675 transit. */
2676 if (subexp_len == 0
2677 && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
2678 {
e3a87852 2679 err = check_subexp_matching_top (mctx, new_dest_nodes,
6291ee3c
UD
2680 cur_str_idx);
2681 if (BE (err != REG_NOERROR, 0))
2682 goto free_return;
e3a87852 2683 err = transit_state_bkref (mctx, new_dest_nodes);
15a7d175
UD
2684 if (BE (err != REG_NOERROR, 0))
2685 goto free_return;
2686 }
2687 }
3b0bdc72 2688 }
1b2c2628
UD
2689 err = REG_NOERROR;
2690 free_return:
1b2c2628 2691 return err;
3b0bdc72
UD
2692}
2693
6291ee3c
UD
2694/* Enumerate all the candidates which the backreference BKREF_NODE can match
2695 at BKREF_STR_IDX, and register them by match_ctx_add_entry().
2696 Note that we might collect inappropriate candidates here.
2697 However, the cost of checking them strictly here is too high, then we
2698 delay these checking for prune_impossible_nodes(). */
3b0bdc72 2699
6291ee3c 2700static reg_errcode_t
2da42bc0 2701internal_function __attribute_warn_unused_result__
e2f55264 2702get_subexp (re_match_context_t *mctx, int bkref_node, int bkref_str_idx)
3b0bdc72 2703{
6efbd82c 2704 const re_dfa_t *const dfa = mctx->dfa;
6291ee3c 2705 int subexp_num, sub_top_idx;
56b168be 2706 const char *buf = (const char *) re_string_get_buffer (&mctx->input);
6291ee3c
UD
2707 /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX. */
2708 int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
e40a38b3 2709 if (cache_idx != -1)
6291ee3c 2710 {
01ed6ceb
UD
2711 const struct re_backref_cache_entry *entry
2712 = mctx->bkref_ents + cache_idx;
e40a38b3 2713 do
2da42bc0 2714 if (entry->node == bkref_node)
e40a38b3
UD
2715 return REG_NOERROR; /* We already checked it. */
2716 while (entry++->more);
6291ee3c 2717 }
e40a38b3 2718
d8f73de8 2719 subexp_num = dfa->nodes[bkref_node].opr.idx;
6291ee3c
UD
2720
2721 /* For each sub expression */
2722 for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
2723 {
2724 reg_errcode_t err;
2725 re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
2726 re_sub_match_last_t *sub_last;
7c1be3ec 2727 int sub_last_idx, sl_str, bkref_str_off;
6291ee3c
UD
2728
2729 if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
2730 continue; /* It isn't related. */
2731
2732 sl_str = sub_top->str_idx;
7c1be3ec 2733 bkref_str_off = bkref_str_idx;
6291ee3c
UD
2734 /* At first, check the last node of sub expressions we already
2735 evaluated. */
2736 for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
2737 {
2738 int sl_str_diff;
2739 sub_last = sub_top->lasts[sub_last_idx];
2740 sl_str_diff = sub_last->str_idx - sl_str;
2741 /* The matched string by the sub expression match with the substring
2742 at the back reference? */
c1baba0f
UD
2743 if (sl_str_diff > 0)
2744 {
2745 if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0))
2746 {
2747 /* Not enough chars for a successful match. */
2748 if (bkref_str_off + sl_str_diff > mctx->input.len)
2749 break;
2750
2751 err = clean_state_log_if_needed (mctx,
2752 bkref_str_off
2753 + sl_str_diff);
2754 if (BE (err != REG_NOERROR, 0))
2755 return err;
2756 buf = (const char *) re_string_get_buffer (&mctx->input);
2757 }
2758 if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
01ed6ceb
UD
2759 /* We don't need to search this sub expression any more. */
2760 break;
c1baba0f 2761 }
7c1be3ec 2762 bkref_str_off += sl_str_diff;
6291ee3c 2763 sl_str += sl_str_diff;
e3a87852 2764 err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
6291ee3c 2765 bkref_str_idx);
3ee363e2 2766
7c1be3ec
UD
2767 /* Reload buf, since the preceding call might have reallocated
2768 the buffer. */
56b168be 2769 buf = (const char *) re_string_get_buffer (&mctx->input);
3ee363e2 2770
6291ee3c
UD
2771 if (err == REG_NOMATCH)
2772 continue;
2773 if (BE (err != REG_NOERROR, 0))
2774 return err;
2775 }
7c1be3ec 2776
6291ee3c
UD
2777 if (sub_last_idx < sub_top->nlasts)
2778 continue;
2779 if (sub_last_idx > 0)
2780 ++sl_str;
2781 /* Then, search for the other last nodes of the sub expression. */
2782 for (; sl_str <= bkref_str_idx; ++sl_str)
2783 {
2784 int cls_node, sl_str_off;
fe9434bb 2785 const re_node_set *nodes;
6291ee3c
UD
2786 sl_str_off = sl_str - sub_top->str_idx;
2787 /* The matched string by the sub expression match with the substring
2788 at the back reference? */
c1baba0f
UD
2789 if (sl_str_off > 0)
2790 {
2791 if (BE (bkref_str_off >= mctx->input.valid_len, 0))
2792 {
2793 /* If we are at the end of the input, we cannot match. */
2794 if (bkref_str_off >= mctx->input.len)
2795 break;
2796
a445af0b 2797 err = extend_buffers (mctx, bkref_str_off + 1);
c1baba0f
UD
2798 if (BE (err != REG_NOERROR, 0))
2799 return err;
2800
2801 buf = (const char *) re_string_get_buffer (&mctx->input);
2802 }
2803 if (buf [bkref_str_off++] != buf[sl_str - 1])
2804 break; /* We don't need to search this sub expression
2805 any more. */
2806 }
6291ee3c
UD
2807 if (mctx->state_log[sl_str] == NULL)
2808 continue;
2809 /* Does this state have a ')' of the sub expression? */
2810 nodes = &mctx->state_log[sl_str]->nodes;
01ed6ceb
UD
2811 cls_node = find_subexp_node (dfa, nodes, subexp_num,
2812 OP_CLOSE_SUBEXP);
6291ee3c
UD
2813 if (cls_node == -1)
2814 continue; /* No. */
2815 if (sub_top->path == NULL)
2816 {
2817 sub_top->path = calloc (sizeof (state_array_t),
2818 sl_str - sub_top->str_idx + 1);
2819 if (sub_top->path == NULL)
2820 return REG_ESPACE;
2821 }
2822 /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
2823 in the current context? */
e3a87852 2824 err = check_arrival (mctx, sub_top->path, sub_top->node,
01ed6ceb
UD
2825 sub_top->str_idx, cls_node, sl_str,
2826 OP_CLOSE_SUBEXP);
6291ee3c
UD
2827 if (err == REG_NOMATCH)
2828 continue;
2829 if (BE (err != REG_NOERROR, 0))
2830 return err;
2831 sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
2832 if (BE (sub_last == NULL, 0))
2833 return REG_ESPACE;
e3a87852 2834 err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
6291ee3c
UD
2835 bkref_str_idx);
2836 if (err == REG_NOMATCH)
2837 continue;
2838 }
2839 }
2840 return REG_NOERROR;
2841}
2842
2843/* Helper functions for get_subexp(). */
2844
2845/* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
2846 If it can arrive, register the sub expression expressed with SUB_TOP
2847 and SUB_LAST. */
2848
2849static reg_errcode_t
1ba81cea 2850internal_function
e2f55264
UD
2851get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
2852 re_sub_match_last_t *sub_last, int bkref_node, int bkref_str)
6291ee3c
UD
2853{
2854 reg_errcode_t err;
2855 int to_idx;
2856 /* Can the subexpression arrive the back reference? */
e3a87852 2857 err = check_arrival (mctx, &sub_last->path, sub_last->node,
01ed6ceb
UD
2858 sub_last->str_idx, bkref_node, bkref_str,
2859 OP_OPEN_SUBEXP);
6291ee3c
UD
2860 if (err != REG_NOERROR)
2861 return err;
2862 err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
2863 sub_last->str_idx);
2864 if (BE (err != REG_NOERROR, 0))
2865 return err;
2866 to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
c1baba0f 2867 return clean_state_log_if_needed (mctx, to_idx);
6291ee3c
UD
2868}
2869
2870/* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
2871 Search '(' if FL_OPEN, or search ')' otherwise.
2872 TODO: This function isn't efficient...
2873 Because there might be more than one nodes whose types are
2874 OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2875 nodes.
2876 E.g. RE: (a){2} */
2877
2878static int
1ba81cea 2879internal_function
e2f55264
UD
2880find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
2881 int subexp_idx, int type)
6291ee3c
UD
2882{
2883 int cls_idx;
2884 for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
2885 {
2886 int cls_node = nodes->elems[cls_idx];
fe9434bb 2887 const re_token_t *node = dfa->nodes + cls_node;
0ce7f49c 2888 if (node->type == type
6291ee3c
UD
2889 && node->opr.idx == subexp_idx)
2890 return cls_node;
2891 }
2892 return -1;
2893}
2894
2895/* Check whether the node TOP_NODE at TOP_STR can arrive to the node
2896 LAST_NODE at LAST_STR. We record the path onto PATH since it will be
2897 heavily reused.
2898 Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */
2899
2900static reg_errcode_t
2da42bc0 2901internal_function __attribute_warn_unused_result__
e2f55264
UD
2902check_arrival (re_match_context_t *mctx, state_array_t *path, int top_node,
2903 int top_str, int last_node, int last_str, int type)
6291ee3c 2904{
6efbd82c 2905 const re_dfa_t *const dfa = mctx->dfa;
c0c9615c 2906 reg_errcode_t err = REG_NOERROR;
6291ee3c
UD
2907 int subexp_num, backup_cur_idx, str_idx, null_cnt;
2908 re_dfastate_t *cur_state = NULL;
2909 re_node_set *cur_nodes, next_nodes;
2910 re_dfastate_t **backup_state_log;
2911 unsigned int context;
2912
2913 subexp_num = dfa->nodes[top_node].opr.idx;
2914 /* Extend the buffer if we need. */
951d6408 2915 if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0))
6291ee3c
UD
2916 {
2917 re_dfastate_t **new_array;
2918 int old_alloc = path->alloc;
2919 path->alloc += last_str + mctx->max_mb_elem_len + 1;
2920 new_array = re_realloc (path->array, re_dfastate_t *, path->alloc);
6efbd82c 2921 if (BE (new_array == NULL, 0))
951d6408
UD
2922 {
2923 path->alloc = old_alloc;
2924 return REG_ESPACE;
2925 }
6291ee3c
UD
2926 path->array = new_array;
2927 memset (new_array + old_alloc, '\0',
2928 sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
2929 }
2930
513bbb25 2931 str_idx = path->next_idx ?: top_str;
6291ee3c
UD
2932
2933 /* Temporary modify MCTX. */
2934 backup_state_log = mctx->state_log;
56b168be 2935 backup_cur_idx = mctx->input.cur_idx;
6291ee3c 2936 mctx->state_log = path->array;
56b168be 2937 mctx->input.cur_idx = str_idx;
6291ee3c
UD
2938
2939 /* Setup initial node set. */
56b168be 2940 context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
6291ee3c
UD
2941 if (str_idx == top_str)
2942 {
2943 err = re_node_set_init_1 (&next_nodes, top_node);
2944 if (BE (err != REG_NOERROR, 0))
2945 return err;
0ce7f49c 2946 err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
6291ee3c
UD
2947 if (BE (err != REG_NOERROR, 0))
2948 {
2949 re_node_set_free (&next_nodes);
2950 return err;
2951 }
2952 }
2953 else
2954 {
2955 cur_state = mctx->state_log[str_idx];
2956 if (cur_state && cur_state->has_backref)
2957 {
2958 err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
6efbd82c 2959 if (BE (err != REG_NOERROR, 0))
6291ee3c
UD
2960 return err;
2961 }
2962 else
2963 re_node_set_init_empty (&next_nodes);
2964 }
2965 if (str_idx == top_str || (cur_state && cur_state->has_backref))
2966 {
2967 if (next_nodes.nelem)
2968 {
d2c38eb3 2969 err = expand_bkref_cache (mctx, &next_nodes, str_idx,
0ce7f49c 2970 subexp_num, type);
6efbd82c 2971 if (BE (err != REG_NOERROR, 0))
6291ee3c
UD
2972 {
2973 re_node_set_free (&next_nodes);
2974 return err;
2975 }
2976 }
2977 cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2978 if (BE (cur_state == NULL && err != REG_NOERROR, 0))
2979 {
2980 re_node_set_free (&next_nodes);
2981 return err;
2982 }
2983 mctx->state_log[str_idx] = cur_state;
2984 }
2985
2986 for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
2987 {
2988 re_node_set_empty (&next_nodes);
2989 if (mctx->state_log[str_idx + 1])
2990 {
2991 err = re_node_set_merge (&next_nodes,
2992 &mctx->state_log[str_idx + 1]->nodes);
2993 if (BE (err != REG_NOERROR, 0))
2994 {
2995 re_node_set_free (&next_nodes);
2996 return err;
2997 }
2998 }
2999 if (cur_state)
3000 {
e3a87852 3001 err = check_arrival_add_next_nodes (mctx, str_idx,
6efbd82c
UD
3002 &cur_state->non_eps_nodes,
3003 &next_nodes);
6291ee3c
UD
3004 if (BE (err != REG_NOERROR, 0))
3005 {
3006 re_node_set_free (&next_nodes);
3007 return err;
3008 }
3009 }
3010 ++str_idx;
3011 if (next_nodes.nelem)
3012 {
0ce7f49c 3013 err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
6291ee3c
UD
3014 if (BE (err != REG_NOERROR, 0))
3015 {
3016 re_node_set_free (&next_nodes);
3017 return err;
3018 }
d2c38eb3 3019 err = expand_bkref_cache (mctx, &next_nodes, str_idx,
0ce7f49c 3020 subexp_num, type);
6efbd82c 3021 if (BE (err != REG_NOERROR, 0))
6291ee3c
UD
3022 {
3023 re_node_set_free (&next_nodes);
3024 return err;
3025 }
3026 }
56b168be 3027 context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
6291ee3c
UD
3028 cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
3029 if (BE (cur_state == NULL && err != REG_NOERROR, 0))
3030 {
3031 re_node_set_free (&next_nodes);
3032 return err;
3033 }
3034 mctx->state_log[str_idx] = cur_state;
3035 null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
3036 }
3037 re_node_set_free (&next_nodes);
3038 cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
3039 : &mctx->state_log[last_str]->nodes);
3040 path->next_idx = str_idx;
3041
3042 /* Fix MCTX. */
3043 mctx->state_log = backup_state_log;
56b168be 3044 mctx->input.cur_idx = backup_cur_idx;
6291ee3c 3045
6291ee3c 3046 /* Then check the current node set has the node LAST_NODE. */
c0d5034e
UD
3047 if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
3048 return REG_NOERROR;
3049
3050 return REG_NOMATCH;
6291ee3c
UD
3051}
3052
3053/* Helper functions for check_arrival. */
3054
3055/* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
3056 to NEXT_NODES.
3057 TODO: This function is similar to the functions transit_state*(),
3058 however this function has many additional works.
3059 Can't we unify them? */
3060
3061static reg_errcode_t
2da42bc0 3062internal_function __attribute_warn_unused_result__
e2f55264
UD
3063check_arrival_add_next_nodes (re_match_context_t *mctx, int str_idx,
3064 re_node_set *cur_nodes, re_node_set *next_nodes)
6291ee3c 3065{
c42b4152 3066 const re_dfa_t *const dfa = mctx->dfa;
d2c38eb3 3067 int result;
6291ee3c 3068 int cur_idx;
c0c9615c 3069 reg_errcode_t err = REG_NOERROR;
6291ee3c
UD
3070 re_node_set union_set;
3071 re_node_set_init_empty (&union_set);
3072 for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
3073 {
3074 int naccepted = 0;
3075 int cur_node = cur_nodes->elems[cur_idx];
a334319f 3076#ifdef DEBUG
02f3550c 3077 re_token_type_t type = dfa->nodes[cur_node].type;
5cf1ec52
UD
3078 assert (!IS_EPSILON_NODE (type));
3079#endif
6291ee3c
UD
3080#ifdef RE_ENABLE_I18N
3081 /* If the node may accept `multi byte'. */
02f3550c 3082 if (dfa->nodes[cur_node].accept_mb)
6291ee3c 3083 {
56b168be 3084 naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
6291ee3c
UD
3085 str_idx);
3086 if (naccepted > 1)
3087 {
3088 re_dfastate_t *dest_state;
3089 int next_node = dfa->nexts[cur_node];
3090 int next_idx = str_idx + naccepted;
3091 dest_state = mctx->state_log[next_idx];
3092 re_node_set_empty (&union_set);
3093 if (dest_state)
3094 {
3095 err = re_node_set_merge (&union_set, &dest_state->nodes);
3096 if (BE (err != REG_NOERROR, 0))
3097 {
3098 re_node_set_free (&union_set);
3099 return err;
3100 }
6291ee3c 3101 }
d2c38eb3
UD
3102 result = re_node_set_insert (&union_set, next_node);
3103 if (BE (result < 0, 0))
6291ee3c 3104 {
44777771
UD
3105 re_node_set_free (&union_set);
3106 return REG_ESPACE;
6291ee3c
UD
3107 }
3108 mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
3109 &union_set);
3110 if (BE (mctx->state_log[next_idx] == NULL
3111 && err != REG_NOERROR, 0))
3112 {
3113 re_node_set_free (&union_set);
3114 return err;
3115 }
3116 }
3117 }
3118#endif /* RE_ENABLE_I18N */
3119 if (naccepted
e3a87852 3120 || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
6291ee3c 3121 {
d2c38eb3
UD
3122 result = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
3123 if (BE (result < 0, 0))
6291ee3c
UD
3124 {
3125 re_node_set_free (&union_set);
3126 return REG_ESPACE;
3127 }
3128 }
3129 }
3130 re_node_set_free (&union_set);
3131 return REG_NOERROR;
3132}
3133
3134/* For all the nodes in CUR_NODES, add the epsilon closures of them to
3135 CUR_NODES, however exclude the nodes which are:
3136 - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
3137 - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
3138*/
3139
3140static reg_errcode_t
1ba81cea 3141internal_function
e2f55264
UD
3142check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,
3143 int ex_subexp, int type)
6291ee3c
UD
3144{
3145 reg_errcode_t err;
3146 int idx, outside_node;
3147 re_node_set new_nodes;
3148#ifdef DEBUG
3149 assert (cur_nodes->nelem);
3150#endif
3151 err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
3152 if (BE (err != REG_NOERROR, 0))
3153 return err;
3154 /* Create a new node set NEW_NODES with the nodes which are epsilon
3155 closures of the node in CUR_NODES. */
3156
3157 for (idx = 0; idx < cur_nodes->nelem; ++idx)
3158 {
3159 int cur_node = cur_nodes->elems[idx];
6efbd82c 3160 const re_node_set *eclosure = dfa->eclosures + cur_node;
0ce7f49c 3161 outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
6291ee3c
UD
3162 if (outside_node == -1)
3163 {
3164 /* There are no problematic nodes, just merge them. */
3165 err = re_node_set_merge (&new_nodes, eclosure);
3166 if (BE (err != REG_NOERROR, 0))
3167 {
3168 re_node_set_free (&new_nodes);
3169 return err;
3170 }
3171 }
3172 else
3173 {
3174 /* There are problematic nodes, re-calculate incrementally. */
3175 err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
0ce7f49c 3176 ex_subexp, type);
6291ee3c
UD
3177 if (BE (err != REG_NOERROR, 0))
3178 {
3179 re_node_set_free (&new_nodes);
3180 return err;
3181 }
3182 }
3183 }
3184 re_node_set_free (cur_nodes);
3185 *cur_nodes = new_nodes;
3186 return REG_NOERROR;
3187}
3188
3189/* Helper function for check_arrival_expand_ecl.
3190 Check incrementally the epsilon closure of TARGET, and if it isn't
3191 problematic append it to DST_NODES. */
3192
3193static reg_errcode_t
2da42bc0 3194internal_function __attribute_warn_unused_result__
e2f55264
UD
3195check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
3196 int target, int ex_subexp, int type)
6291ee3c 3197{
0ce7f49c 3198 int cur_node;
6291ee3c
UD
3199 for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
3200 {
3201 int err;
6291ee3c 3202
0ce7f49c 3203 if (dfa->nodes[cur_node].type == type
6291ee3c
UD
3204 && dfa->nodes[cur_node].opr.idx == ex_subexp)
3205 {
0ce7f49c 3206 if (type == OP_CLOSE_SUBEXP)
6291ee3c
UD
3207 {
3208 err = re_node_set_insert (dst_nodes, cur_node);
3209 if (BE (err == -1, 0))
3210 return REG_ESPACE;
3211 }
3212 break;
3213 }
3214 err = re_node_set_insert (dst_nodes, cur_node);
3215 if (BE (err == -1, 0))
3216 return REG_ESPACE;
3217 if (dfa->edests[cur_node].nelem == 0)
3218 break;
3219 if (dfa->edests[cur_node].nelem == 2)
3220 {
3221 err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
3222 dfa->edests[cur_node].elems[1],
0ce7f49c 3223 ex_subexp, type);
6291ee3c
UD
3224 if (BE (err != REG_NOERROR, 0))
3225 return err;
3226 }
3227 cur_node = dfa->edests[cur_node].elems[0];
3228 }
3229 return REG_NOERROR;
3230}
3231
3232
3233/* For all the back references in the current state, calculate the
3234 destination of the back references by the appropriate entry
3235 in MCTX->BKREF_ENTS. */
3236
3237static reg_errcode_t
2da42bc0 3238internal_function __attribute_warn_unused_result__
e2f55264
UD
3239expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
3240 int cur_str, int subexp_num, int type)
6291ee3c 3241{
76b864c8 3242 const re_dfa_t *const dfa = mctx->dfa;
6291ee3c 3243 reg_errcode_t err;
e40a38b3
UD
3244 int cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
3245 struct re_backref_cache_entry *ent;
6291ee3c 3246
e40a38b3
UD
3247 if (cache_idx_start == -1)
3248 return REG_NOERROR;
3249
3250 restart:
3251 ent = mctx->bkref_ents + cache_idx_start;
3252 do
6291ee3c
UD
3253 {
3254 int to_idx, next_node;
e40a38b3 3255
6291ee3c
UD
3256 /* Is this entry ENT is appropriate? */
3257 if (!re_node_set_contains (cur_nodes, ent->node))
3258 continue; /* No. */
3259
3260 to_idx = cur_str + ent->subexp_to - ent->subexp_from;
3261 /* Calculate the destination of the back reference, and append it
3262 to MCTX->STATE_LOG. */
3263 if (to_idx == cur_str)
3264 {
3265 /* The backreference did epsilon transit, we must re-check all the
3266 node in the current state. */
3267 re_node_set new_dests;
3268 reg_errcode_t err2, err3;
3269 next_node = dfa->edests[ent->node].elems[0];
3270 if (re_node_set_contains (cur_nodes, next_node))
3271 continue;
3272 err = re_node_set_init_1 (&new_dests, next_node);
0ce7f49c 3273 err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
6291ee3c
UD
3274 err3 = re_node_set_merge (cur_nodes, &new_dests);
3275 re_node_set_free (&new_dests);
3276 if (BE (err != REG_NOERROR || err2 != REG_NOERROR
3277 || err3 != REG_NOERROR, 0))
3278 {
3279 err = (err != REG_NOERROR ? err
3280 : (err2 != REG_NOERROR ? err2 : err3));
3281 return err;
3282 }
3283 /* TODO: It is still inefficient... */
e40a38b3 3284 goto restart;
6291ee3c
UD
3285 }
3286 else
3287 {
3288 re_node_set union_set;
3289 next_node = dfa->nexts[ent->node];
3290 if (mctx->state_log[to_idx])
3291 {
3292 int ret;
3293 if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
3294 next_node))
3295 continue;
3296 err = re_node_set_init_copy (&union_set,
3297 &mctx->state_log[to_idx]->nodes);
3298 ret = re_node_set_insert (&union_set, next_node);
3299 if (BE (err != REG_NOERROR || ret < 0, 0))
3300 {
3301 re_node_set_free (&union_set);
3302 err = err != REG_NOERROR ? err : REG_ESPACE;
3303 return err;
3304 }
3305 }
3306 else
3307 {
3308 err = re_node_set_init_1 (&union_set, next_node);
3309 if (BE (err != REG_NOERROR, 0))
3310 return err;
3311 }
3312 mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
3313 re_node_set_free (&union_set);
3314 if (BE (mctx->state_log[to_idx] == NULL
3315 && err != REG_NOERROR, 0))
3316 return err;
3317 }
3318 }
e40a38b3 3319 while (ent++->more);
6291ee3c
UD
3320 return REG_NOERROR;
3321}
3322
3323/* Build transition table for the state.
ab4b89fe 3324 Return 1 if succeeded, otherwise return NULL. */
6291ee3c 3325
ab4b89fe 3326static int
1ba81cea 3327internal_function
e2f55264 3328build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
6291ee3c
UD
3329{
3330 reg_errcode_t err;
ab4b89fe 3331 int i, j, ch, need_word_trtable = 0;
2c05d33f
UD
3332 bitset_word_t elem, mask;
3333 bool dests_node_malloced = false;
3334 bool dest_states_malloced = false;
6291ee3c
UD
3335 int ndests; /* Number of the destination states from `state'. */
3336 re_dfastate_t **trtable;
3337 re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
3338 re_node_set follows, *dests_node;
2c05d33f
UD
3339 bitset_t *dests_ch;
3340 bitset_t acceptable;
3341
3342 struct dests_alloc
3343 {
3344 re_node_set dests_node[SBC_MAX];
3345 bitset_t dests_ch[SBC_MAX];
3346 } *dests_alloc;
3b0bdc72
UD
3347
3348 /* We build DFA states which corresponds to the destination nodes
3349 from `state'. `dests_node[i]' represents the nodes which i-th
3350 destination state contains, and `dests_ch[i]' represents the
3351 characters which i-th destination state accepts. */
2c05d33f
UD
3352 if (__libc_use_alloca (sizeof (struct dests_alloc)))
3353 dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc));
05dab910 3354 else
05dab910 3355 {
2c05d33f
UD
3356 dests_alloc = re_malloc (struct dests_alloc, 1);
3357 if (BE (dests_alloc == NULL, 0))
ab4b89fe 3358 return 0;
2c05d33f 3359 dests_node_malloced = true;
05dab910 3360 }
2c05d33f
UD
3361 dests_node = dests_alloc->dests_node;
3362 dests_ch = dests_alloc->dests_ch;
3b0bdc72
UD
3363
3364 /* Initialize transiton table. */
ab4b89fe 3365 state->word_trtable = state->trtable = NULL;
3b0bdc72
UD
3366
3367 /* At first, group all nodes belonging to `state' into several
3368 destinations. */
56b168be 3369 ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
bc15410e 3370 if (BE (ndests <= 0, 0))
3b0bdc72 3371 {
05dab910 3372 if (dests_node_malloced)
2c05d33f 3373 free (dests_alloc);
ab4b89fe 3374 /* Return 0 in case of an error, 1 otherwise. */
05dab910 3375 if (ndests == 0)
c13c99fa 3376 {
3ce12656 3377 state->trtable = (re_dfastate_t **)
ab4b89fe 3378 calloc (sizeof (re_dfastate_t *), SBC_MAX);
2543fef2
JM
3379 if (BE (state->trtable == NULL, 0))
3380 return 0;
ab4b89fe 3381 return 1;
c13c99fa 3382 }
ab4b89fe 3383 return 0;
3b0bdc72
UD
3384 }
3385
a9388965 3386 err = re_node_set_alloc (&follows, ndests + 1);
05dab910
RM
3387 if (BE (err != REG_NOERROR, 0))
3388 goto out_free;
daa84549
PE
3389
3390 /* Avoid arithmetic overflow in size calculation. */
3391 if (BE ((((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX)
3392 / (3 * sizeof (re_dfastate_t *)))
3393 < ndests),
3394 0))
3395 goto out_free;
05dab910 3396
2c05d33f 3397 if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
05dab910
RM
3398 + ndests * 3 * sizeof (re_dfastate_t *)))
3399 dest_states = (re_dfastate_t **)
ab4b89fe 3400 alloca (ndests * 3 * sizeof (re_dfastate_t *));
05dab910 3401 else
05dab910
RM
3402 {
3403 dest_states = (re_dfastate_t **)
ab4b89fe 3404 malloc (ndests * 3 * sizeof (re_dfastate_t *));
05dab910
RM
3405 if (BE (dest_states == NULL, 0))
3406 {
3407out_free:
3408 if (dest_states_malloced)
3409 free (dest_states);
3410 re_node_set_free (&follows);
3411 for (i = 0; i < ndests; ++i)
3412 re_node_set_free (dests_node + i);
05dab910 3413 if (dests_node_malloced)
2c05d33f 3414 free (dests_alloc);
ab4b89fe 3415 return 0;
05dab910 3416 }
2c05d33f 3417 dest_states_malloced = true;
05dab910
RM
3418 }
3419 dest_states_word = dest_states + ndests;
3420 dest_states_nl = dest_states_word + ndests;
3421 bitset_empty (acceptable);
a9388965 3422
3b0bdc72
UD
3423 /* Then build the states for all destinations. */
3424 for (i = 0; i < ndests; ++i)
3425 {
3426 int next_node;
3427 re_node_set_empty (&follows);
3428 /* Merge the follows of this destination states. */
3429 for (j = 0; j < dests_node[i].nelem; ++j)
15a7d175
UD
3430 {
3431 next_node = dfa->nexts[dests_node[i].elems[j]];
3432 if (next_node != -1)
3433 {
3434 err = re_node_set_merge (&follows, dfa->eclosures + next_node);
3435 if (BE (err != REG_NOERROR, 0))
05dab910 3436 goto out_free;
15a7d175
UD
3437 }
3438 }
a9388965 3439 dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
bc15410e 3440 if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
15a7d175 3441 goto out_free;
3b0bdc72 3442 /* If the new state has context constraint,
15a7d175 3443 build appropriate states for these contexts. */
3b0bdc72 3444 if (dest_states[i]->has_constraint)
15a7d175
UD
3445 {
3446 dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
3447 CONTEXT_WORD);
3448 if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
3449 goto out_free;
3ce12656 3450
ab4b89fe
UD
3451 if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
3452 need_word_trtable = 1;
3ce12656 3453
15a7d175
UD
3454 dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
3455 CONTEXT_NEWLINE);
3456 if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
3457 goto out_free;
3ce12656 3458 }
3b0bdc72 3459 else
15a7d175
UD
3460 {
3461 dest_states_word[i] = dest_states[i];
3462 dest_states_nl[i] = dest_states[i];
3463 }
3b0bdc72
UD
3464 bitset_merge (acceptable, dests_ch[i]);
3465 }
3466
ab4b89fe 3467 if (!BE (need_word_trtable, 0))
3ce12656
UD
3468 {
3469 /* We don't care about whether the following character is a word
3470 character, or we are in a single-byte character set so we can
3471 discern by looking at the character code: allocate a
3472 256-entry transition table. */
ab4b89fe
UD
3473 trtable = state->trtable =
3474 (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
3ce12656
UD
3475 if (BE (trtable == NULL, 0))
3476 goto out_free;
3477
3478 /* For all characters ch...: */
2c05d33f
UD
3479 for (i = 0; i < BITSET_WORDS; ++i)
3480 for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3ce12656
UD
3481 elem;
3482 mask <<= 1, elem >>= 1, ++ch)
3483 if (BE (elem & 1, 0))
3484 {
3485 /* There must be exactly one destination which accepts
3486 character ch. See group_nodes_into_DFAstates. */
3487 for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3488 ;
6b6557e8 3489
3ce12656 3490 /* j-th destination accepts the word character ch. */
56b168be 3491 if (dfa->word_char[i] & mask)
3ce12656
UD
3492 trtable[ch] = dest_states_word[j];
3493 else
3494 trtable[ch] = dest_states[j];
3495 }
3496 }
3497 else
3498 {
3499 /* We care about whether the following character is a word
3500 character, and we are in a multi-byte character set: discern
3501 by looking at the character code: build two 256-entry
3502 transition tables, one starting at trtable[0] and one
3503 starting at trtable[SBC_MAX]. */
ab4b89fe
UD
3504 trtable = state->word_trtable =
3505 (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX);
3ce12656
UD
3506 if (BE (trtable == NULL, 0))
3507 goto out_free;
6b6557e8 3508
3ce12656 3509 /* For all characters ch...: */
2c05d33f
UD
3510 for (i = 0; i < BITSET_WORDS; ++i)
3511 for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3ce12656
UD
3512 elem;
3513 mask <<= 1, elem >>= 1, ++ch)
3514 if (BE (elem & 1, 0))
3515 {
3516 /* There must be exactly one destination which accepts
3517 character ch. See group_nodes_into_DFAstates. */
3518 for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3519 ;
6b6557e8 3520
3ce12656
UD
3521 /* j-th destination accepts the word character ch. */
3522 trtable[ch] = dest_states[j];
3523 trtable[ch + SBC_MAX] = dest_states_word[j];
3524 }
3525 }
6b6557e8 3526
3b0bdc72 3527 /* new line */
c202c2c5
UD
3528 if (bitset_contain (acceptable, NEWLINE_CHAR))
3529 {
3530 /* The current state accepts newline character. */
3ce12656
UD
3531 for (j = 0; j < ndests; ++j)
3532 if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
15a7d175
UD
3533 {
3534 /* k-th destination accepts newline character. */
3ce12656 3535 trtable[NEWLINE_CHAR] = dest_states_nl[j];
ab4b89fe 3536 if (need_word_trtable)
3ce12656 3537 trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
15a7d175
UD
3538 /* There must be only one destination which accepts
3539 newline. See group_nodes_into_DFAstates. */
3540 break;
3541 }
c202c2c5 3542 }
3b0bdc72 3543
05dab910
RM
3544 if (dest_states_malloced)
3545 free (dest_states);
3b0bdc72
UD
3546
3547 re_node_set_free (&follows);
3548 for (i = 0; i < ndests; ++i)
3549 re_node_set_free (dests_node + i);
3550
05dab910 3551 if (dests_node_malloced)
2c05d33f 3552 free (dests_alloc);
3b0bdc72 3553
ab4b89fe 3554 return 1;
3b0bdc72
UD
3555}
3556
3557/* Group all nodes belonging to STATE into several destinations.
3558 Then for all destinations, set the nodes belonging to the destination
3559 to DESTS_NODE[i] and set the characters accepted by the destination
3560 to DEST_CH[i]. This function return the number of destinations. */
3561
3562static int
1ba81cea 3563internal_function
e2f55264
UD
3564group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
3565 re_node_set *dests_node, bitset_t *dests_ch)
3b0bdc72 3566{
a9388965 3567 reg_errcode_t err;
d2c38eb3 3568 int result;
3b0bdc72
UD
3569 int i, j, k;
3570 int ndests; /* Number of the destinations from `state'. */
2c05d33f 3571 bitset_t accepts; /* Characters a node can accept. */
3b0bdc72
UD
3572 const re_node_set *cur_nodes = &state->nodes;
3573 bitset_empty (accepts);
3574 ndests = 0;
3575
3576 /* For all the nodes belonging to `state', */
3577 for (i = 0; i < cur_nodes->nelem; ++i)
3578 {
3b0bdc72
UD
3579 re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
3580 re_token_type_t type = node->type;
485d775d 3581 unsigned int constraint = node->constraint;
3b0bdc72
UD
3582
3583 /* Enumerate all single byte character this node can accept. */
3584 if (type == CHARACTER)
15a7d175 3585 bitset_set (accepts, node->opr.c);
3b0bdc72 3586 else if (type == SIMPLE_BRACKET)
15a7d175
UD
3587 {
3588 bitset_merge (accepts, node->opr.sbcset);
3589 }
3b0bdc72 3590 else if (type == OP_PERIOD)
15a7d175 3591 {
65e6becf
UD
3592#ifdef RE_ENABLE_I18N
3593 if (dfa->mb_cur_max > 1)
3594 bitset_merge (accepts, dfa->sb_char);
3595 else
0ce7f49c 3596#endif
65e6becf 3597 bitset_set_all (accepts);
56b168be 3598 if (!(dfa->syntax & RE_DOT_NEWLINE))
15a7d175 3599 bitset_clear (accepts, '\n');
56b168be 3600 if (dfa->syntax & RE_DOT_NOT_NULL)
15a7d175
UD
3601 bitset_clear (accepts, '\0');
3602 }
c0d5034e 3603#ifdef RE_ENABLE_I18N
ad7f28c2 3604 else if (type == OP_UTF8_PERIOD)
2da42bc0 3605 {
2c05d33f 3606 memset (accepts, '\xff', sizeof (bitset_t) / 2);
56b168be 3607 if (!(dfa->syntax & RE_DOT_NEWLINE))
ad7f28c2 3608 bitset_clear (accepts, '\n');
56b168be 3609 if (dfa->syntax & RE_DOT_NOT_NULL)
ad7f28c2 3610 bitset_clear (accepts, '\0');
2da42bc0 3611 }
c0d5034e 3612#endif
3b0bdc72 3613 else
15a7d175 3614 continue;
3b0bdc72
UD
3615
3616 /* Check the `accepts' and sift the characters which are not
15a7d175 3617 match it the context. */
3b0bdc72 3618 if (constraint)
15a7d175 3619 {
15a7d175
UD
3620 if (constraint & NEXT_NEWLINE_CONSTRAINT)
3621 {
2c05d33f 3622 bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
15a7d175
UD
3623 bitset_empty (accepts);
3624 if (accepts_newline)
3625 bitset_set (accepts, NEWLINE_CHAR);
3626 else
3627 continue;
3628 }
66b110e8
UD
3629 if (constraint & NEXT_ENDBUF_CONSTRAINT)
3630 {
3631 bitset_empty (accepts);
3632 continue;
3633 }
c13c99fa 3634
66b110e8 3635 if (constraint & NEXT_WORD_CONSTRAINT)
65e6becf 3636 {
2c05d33f 3637 bitset_word_t any_set = 0;
1cef7b3c
UD
3638 if (type == CHARACTER && !node->word_char)
3639 {
3640 bitset_empty (accepts);
3641 continue;
3642 }
65e6becf
UD
3643#ifdef RE_ENABLE_I18N
3644 if (dfa->mb_cur_max > 1)
2c05d33f 3645 for (j = 0; j < BITSET_WORDS; ++j)
457beec8 3646 any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
65e6becf
UD
3647 else
3648#endif
2c05d33f 3649 for (j = 0; j < BITSET_WORDS; ++j)
457beec8
UD
3650 any_set |= (accepts[j] &= dfa->word_char[j]);
3651 if (!any_set)
3652 continue;
65e6becf 3653 }
66b110e8 3654 if (constraint & NEXT_NOTWORD_CONSTRAINT)
65e6becf 3655 {
2c05d33f 3656 bitset_word_t any_set = 0;
1cef7b3c
UD
3657 if (type == CHARACTER && node->word_char)
3658 {
3659 bitset_empty (accepts);
3660 continue;
3661 }
65e6becf
UD
3662#ifdef RE_ENABLE_I18N
3663 if (dfa->mb_cur_max > 1)
2c05d33f 3664 for (j = 0; j < BITSET_WORDS; ++j)
457beec8 3665 any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
65e6becf
UD
3666 else
3667#endif
2c05d33f 3668 for (j = 0; j < BITSET_WORDS; ++j)
457beec8
UD
3669 any_set |= (accepts[j] &= ~dfa->word_char[j]);
3670 if (!any_set)
3671 continue;
65e6becf 3672 }
15a7d175 3673 }
3b0bdc72
UD
3674
3675 /* Then divide `accepts' into DFA states, or create a new
457beec8 3676 state. Above, we make sure that accepts is not empty. */
3b0bdc72 3677 for (j = 0; j < ndests; ++j)
15a7d175 3678 {
2c05d33f
UD
3679 bitset_t intersec; /* Intersection sets, see below. */
3680 bitset_t remains;
15a7d175 3681 /* Flags, see below. */
2c05d33f 3682 bitset_word_t has_intersec, not_subset, not_consumed;
15a7d175
UD
3683
3684 /* Optimization, skip if this state doesn't accept the character. */
3685 if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
3686 continue;
3687
3688 /* Enumerate the intersection set of this state and `accepts'. */
3689 has_intersec = 0;
2c05d33f 3690 for (k = 0; k < BITSET_WORDS; ++k)
15a7d175
UD
3691 has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
3692 /* And skip if the intersection set is empty. */
3693 if (!has_intersec)
3694 continue;
3695
3696 /* Then check if this state is a subset of `accepts'. */
3697 not_subset = not_consumed = 0;
2c05d33f 3698 for (k = 0; k < BITSET_WORDS; ++k)
15a7d175
UD
3699 {
3700 not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
3701 not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
3702 }
3703
3704 /* If this state isn't a subset of `accepts', create a
3705 new group state, which has the `remains'. */
3706 if (not_subset)
3707 {
3708 bitset_copy (dests_ch[ndests], remains);
3709 bitset_copy (dests_ch[j], intersec);
3710 err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
3711 if (BE (err != REG_NOERROR, 0))
3712 goto error_return;
3713 ++ndests;
3714 }
3715
3716 /* Put the position in the current group. */
d2c38eb3
UD
3717 result = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
3718 if (BE (result < 0, 0))
15a7d175
UD
3719 goto error_return;
3720
3721 /* If all characters are consumed, go to next node. */
3722 if (!not_consumed)
3723 break;
3724 }
3b0bdc72
UD
3725 /* Some characters remain, create a new group. */
3726 if (j == ndests)
15a7d175
UD
3727 {
3728 bitset_copy (dests_ch[ndests], accepts);
3729 err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
3730 if (BE (err != REG_NOERROR, 0))
3731 goto error_return;
3732 ++ndests;
3733 bitset_empty (accepts);
3734 }
3b0bdc72
UD
3735 }
3736 return ndests;
1b2c2628
UD
3737 error_return:
3738 for (j = 0; j < ndests; ++j)
3739 re_node_set_free (dests_node + j);
3740 return -1;
3b0bdc72
UD
3741}
3742
434d3784
UD
3743#ifdef RE_ENABLE_I18N
3744/* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
3745 Return the number of the bytes the node accepts.
3746 STR_IDX is the current index of the input string.
3747
3748 This function handles the nodes which can accept one character, or
3749 one collating element like '.', '[a-z]', opposite to the other nodes
3750 can only accept one byte. */
3b0bdc72 3751
8c0ab919
RM
3752# ifdef _LIBC
3753# include <locale/weight.h>
3754# endif
3755
3b0bdc72 3756static int
1ba81cea 3757internal_function
e2f55264
UD
3758check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
3759 const re_string_t *input, int str_idx)
3b0bdc72 3760{
3b0bdc72 3761 const re_token_t *node = dfa->nodes + node_idx;
ad7f28c2 3762 int char_len, elem_len;
434d3784 3763 int i;
ad7f28c2
UD
3764
3765 if (BE (node->type == OP_UTF8_PERIOD, 0))
3766 {
3767 unsigned char c = re_string_byte_at (input, str_idx), d;
3768 if (BE (c < 0xc2, 1))
3769 return 0;
3770
3771 if (str_idx + 2 > input->len)
3772 return 0;
3773
3774 d = re_string_byte_at (input, str_idx + 1);
3775 if (c < 0xe0)
3776 return (d < 0x80 || d > 0xbf) ? 0 : 2;
3777 else if (c < 0xf0)
3778 {
3779 char_len = 3;
3780 if (c == 0xe0 && d < 0xa0)
3781 return 0;
3782 }
3783 else if (c < 0xf8)
3784 {
3785 char_len = 4;
3786 if (c == 0xf0 && d < 0x90)
3787 return 0;
3788 }
3789 else if (c < 0xfc)
3790 {
3791 char_len = 5;
3792 if (c == 0xf8 && d < 0x88)
3793 return 0;
3794 }
3795 else if (c < 0xfe)
3796 {
3797 char_len = 6;
3798 if (c == 0xfc && d < 0x84)
3799 return 0;
3800 }
3801 else
3802 return 0;
3803
3804 if (str_idx + char_len > input->len)
3805 return 0;
3806
3807 for (i = 1; i < char_len; ++i)
3808 {
3809 d = re_string_byte_at (input, str_idx + i);
3810 if (d < 0x80 || d > 0xbf)
3811 return 0;
3812 }
3813 return char_len;
3814 }
3815
3816 char_len = re_string_char_size_at (input, str_idx);
3b0bdc72
UD
3817 if (node->type == OP_PERIOD)
3818 {
ad7f28c2 3819 if (char_len <= 1)
2da42bc0 3820 return 0;
ad7f28c2
UD
3821 /* FIXME: I don't think this if is needed, as both '\n'
3822 and '\0' are char_len == 1. */
434d3784 3823 /* '.' accepts any one character except the following two cases. */
56b168be 3824 if ((!(dfa->syntax & RE_DOT_NEWLINE) &&
15a7d175 3825 re_string_byte_at (input, str_idx) == '\n') ||
56b168be 3826 ((dfa->syntax & RE_DOT_NOT_NULL) &&
15a7d175
UD
3827 re_string_byte_at (input, str_idx) == '\0'))
3828 return 0;
3b0bdc72
UD
3829 return char_len;
3830 }
ad7f28c2
UD
3831
3832 elem_len = re_string_elem_size_at (input, str_idx);
6c2a04a7 3833 if ((elem_len <= 1 && char_len <= 1) || char_len == 0)
ad7f28c2
UD
3834 return 0;
3835
3836 if (node->type == COMPLEX_BRACKET)
3b0bdc72
UD
3837 {
3838 const re_charset_t *cset = node->opr.mbcset;
434d3784 3839# ifdef _LIBC
1c99f950
UD
3840 const unsigned char *pin
3841 = ((const unsigned char *) re_string_get_buffer (input) + str_idx);
5f93cd52
UD
3842 int j;
3843 uint32_t nrules;
434d3784
UD
3844# endif /* _LIBC */
3845 int match_len = 0;
3846 wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
15a7d175 3847 ? re_string_wchar_at (input, str_idx) : 0);
434d3784
UD
3848
3849 /* match with multibyte character? */
3850 for (i = 0; i < cset->nmbchars; ++i)
15a7d175
UD
3851 if (wc == cset->mbchars[i])
3852 {
3853 match_len = char_len;
3854 goto check_node_accept_bytes_match;
3855 }
434d3784
UD
3856 /* match with character_class? */
3857 for (i = 0; i < cset->nchar_classes; ++i)
15a7d175
UD
3858 {
3859 wctype_t wt = cset->char_classes[i];
3860 if (__iswctype (wc, wt))
3861 {
3862 match_len = char_len;
3863 goto check_node_accept_bytes_match;
3864 }
3865 }
434d3784
UD
3866
3867# ifdef _LIBC
5f93cd52 3868 nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3b0bdc72 3869 if (nrules != 0)
15a7d175
UD
3870 {
3871 unsigned int in_collseq = 0;
3872 const int32_t *table, *indirect;
3873 const unsigned char *weights, *extra;
3874 const char *collseqwc;
3b0bdc72 3875
15a7d175
UD
3876 /* match with collating_symbol? */
3877 if (cset->ncoll_syms)
3878 extra = (const unsigned char *)
3879 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3880 for (i = 0; i < cset->ncoll_syms; ++i)
3881 {
3882 const unsigned char *coll_sym = extra + cset->coll_syms[i];
3883 /* Compare the length of input collating element and
3884 the length of current collating element. */
3885 if (*coll_sym != elem_len)
3886 continue;
3887 /* Compare each bytes. */
3888 for (j = 0; j < *coll_sym; j++)
3889 if (pin[j] != coll_sym[1 + j])
3890 break;
3891 if (j == *coll_sym)
3892 {
3893 /* Match if every bytes is equal. */
3894 match_len = j;
3895 goto check_node_accept_bytes_match;
3896 }
3897 }
3898
3899 if (cset->nranges)
3900 {
3901 if (elem_len <= char_len)
3902 {
3903 collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
25337753 3904 in_collseq = __collseq_table_lookup (collseqwc, wc);
15a7d175
UD
3905 }
3906 else
3907 in_collseq = find_collation_sequence_value (pin, elem_len);
3908 }
3909 /* match with range expression? */
3910 for (i = 0; i < cset->nranges; ++i)
3911 if (cset->range_starts[i] <= in_collseq
3912 && in_collseq <= cset->range_ends[i])
3913 {
3914 match_len = elem_len;
3915 goto check_node_accept_bytes_match;
3916 }
3917
3918 /* match with equivalence_class? */
3919 if (cset->nequiv_classes)
3920 {
3921 const unsigned char *cp = pin;
3922 table = (const int32_t *)
3923 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3924 weights = (const unsigned char *)
3925 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3926 extra = (const unsigned char *)
3927 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3928 indirect = (const int32_t *)
3929 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
8c0ab919 3930 int32_t idx = findidx (table, indirect, extra, &cp, elem_len);
15a7d175
UD
3931 if (idx > 0)
3932 for (i = 0; i < cset->nequiv_classes; ++i)
3933 {
3934 int32_t equiv_class_idx = cset->equiv_classes[i];
b7d1c5fa
UD
3935 size_t weight_len = weights[idx & 0xffffff];
3936 if (weight_len == weights[equiv_class_idx & 0xffffff]
3937 && (idx >> 24) == (equiv_class_idx >> 24))
15a7d175
UD
3938 {
3939 int cnt = 0;
b7d1c5fa
UD
3940
3941 idx &= 0xffffff;
3942 equiv_class_idx &= 0xffffff;
3943
15a7d175
UD
3944 while (cnt <= weight_len
3945 && (weights[equiv_class_idx + 1 + cnt]
3946 == weights[idx + 1 + cnt]))
3947 ++cnt;
3948 if (cnt > weight_len)
3949 {
3950 match_len = elem_len;
3951 goto check_node_accept_bytes_match;
3952 }
3953 }
3954 }
3955 }
3956 }
434d3784
UD
3957 else
3958# endif /* _LIBC */
15a7d175
UD
3959 {
3960 /* match with range expression? */
92b27c74 3961#if __GNUC__ >= 2
15a7d175 3962 wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
92b27c74 3963#else
15a7d175
UD
3964 wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
3965 cmp_buf[2] = wc;
92b27c74 3966#endif
15a7d175
UD
3967 for (i = 0; i < cset->nranges; ++i)
3968 {
3969 cmp_buf[0] = cset->range_starts[i];
3970 cmp_buf[4] = cset->range_ends[i];
3971 if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
3972 && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
3973 {
3974 match_len = char_len;
3975 goto check_node_accept_bytes_match;
3976 }
3977 }
3978 }
434d3784
UD
3979 check_node_accept_bytes_match:
3980 if (!cset->non_match)
15a7d175 3981 return match_len;
434d3784 3982 else
15a7d175
UD
3983 {
3984 if (match_len > 0)
3985 return 0;
3986 else
3987 return (elem_len > char_len) ? elem_len : char_len;
3988 }
3b0bdc72
UD
3989 }
3990 return 0;
3991}
3992
434d3784 3993# ifdef _LIBC
3b0bdc72 3994static unsigned int
1ba81cea 3995internal_function
e2f55264 3996find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
3b0bdc72
UD
3997{
3998 uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3999 if (nrules == 0)
4000 {
4001 if (mbs_len == 1)
15a7d175
UD
4002 {
4003 /* No valid character. Match it as a single byte character. */
4004 const unsigned char *collseq = (const unsigned char *)
4005 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
4006 return collseq[mbs[0]];
4007 }
3b0bdc72
UD
4008 return UINT_MAX;
4009 }
4010 else
4011 {
4012 int32_t idx;
4013 const unsigned char *extra = (const unsigned char *)
15a7d175 4014 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
6c2a04a7
UD
4015 int32_t extrasize = (const unsigned char *)
4016 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
3b0bdc72 4017
6c2a04a7 4018 for (idx = 0; idx < extrasize;)
15a7d175
UD
4019 {
4020 int mbs_cnt, found = 0;
4021 int32_t elem_mbs_len;
4022 /* Skip the name of collating element name. */
4023 idx = idx + extra[idx] + 1;
4024 elem_mbs_len = extra[idx++];
4025 if (mbs_len == elem_mbs_len)
4026 {
4027 for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
4028 if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
4029 break;
4030 if (mbs_cnt == elem_mbs_len)
4031 /* Found the entry. */
4032 found = 1;
4033 }
4034 /* Skip the byte sequence of the collating element. */
4035 idx += elem_mbs_len;
4036 /* Adjust for the alignment. */
4037 idx = (idx + 3) & ~3;
4038 /* Skip the collation sequence value. */
4039 idx += sizeof (uint32_t);
4040 /* Skip the wide char sequence of the collating element. */
d84acf38 4041 idx = idx + sizeof (uint32_t) * (*(int32_t *) (extra + idx) + 1);
15a7d175
UD
4042 /* If we found the entry, return the sequence value. */
4043 if (found)
4044 return *(uint32_t *) (extra + idx);
4045 /* Skip the collation sequence value. */
4046 idx += sizeof (uint32_t);
4047 }
6c2a04a7 4048 return UINT_MAX;
3b0bdc72
UD
4049 }
4050}
434d3784
UD
4051# endif /* _LIBC */
4052#endif /* RE_ENABLE_I18N */
3b0bdc72
UD
4053
4054/* Check whether the node accepts the byte which is IDX-th
4055 byte of the INPUT. */
4056
4057static int
1ba81cea 4058internal_function
e2f55264
UD
4059check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
4060 int idx)
3b0bdc72 4061{
3b0bdc72 4062 unsigned char ch;
56b168be 4063 ch = re_string_byte_at (&mctx->input, idx);
ad7f28c2
UD
4064 switch (node->type)
4065 {
4066 case CHARACTER:
5cf1ec52 4067 if (node->opr.c != ch)
2da42bc0 4068 return 0;
5cf1ec52
UD
4069 break;
4070
ad7f28c2 4071 case SIMPLE_BRACKET:
5cf1ec52 4072 if (!bitset_contain (node->opr.sbcset, ch))
2da42bc0 4073 return 0;
5cf1ec52
UD
4074 break;
4075
c0d5034e 4076#ifdef RE_ENABLE_I18N
ad7f28c2
UD
4077 case OP_UTF8_PERIOD:
4078 if (ch >= 0x80)
2da42bc0 4079 return 0;
ad7f28c2 4080 /* FALLTHROUGH */
c0d5034e 4081#endif
ad7f28c2 4082 case OP_PERIOD:
5cf1ec52
UD
4083 if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
4084 || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
4085 return 0;
4086 break;
4087
ad7f28c2
UD
4088 default:
4089 return 0;
4090 }
5cf1ec52
UD
4091
4092 if (node->constraint)
4093 {
4094 /* The node has constraints. Check whether the current context
4095 satisfies the constraints. */
4096 unsigned int context = re_string_context_at (&mctx->input, idx,
4097 mctx->eflags);
4098 if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
4099 return 0;
4100 }
4101
4102 return 1;
3b0bdc72 4103}
612546c6
UD
4104
4105/* Extend the buffers, if the buffers have run out. */
4106
4107static reg_errcode_t
2da42bc0 4108internal_function __attribute_warn_unused_result__
a445af0b 4109extend_buffers (re_match_context_t *mctx, int min_len)
612546c6
UD
4110{
4111 reg_errcode_t ret;
56b168be 4112 re_string_t *pstr = &mctx->input;
612546c6 4113
aef699dc
PE
4114 /* Avoid overflow. */
4115 if (BE (INT_MAX / 2 / sizeof (re_dfastate_t *) <= pstr->bufs_len, 0))
4116 return REG_ESPACE;
4117
a445af0b
AS
4118 /* Double the lengthes of the buffers, but allocate at least MIN_LEN. */
4119 ret = re_string_realloc_buffers (pstr,
4120 MAX (min_len,
4121 MIN (pstr->len, pstr->bufs_len * 2)));
612546c6
UD
4122 if (BE (ret != REG_NOERROR, 0))
4123 return ret;
4124
4125 if (mctx->state_log != NULL)
4126 {
4127 /* And double the length of state_log. */
951d6408
UD
4128 /* XXX We have no indication of the size of this buffer. If this
4129 allocation fail we have no indication that the state_log array
4130 does not have the right size. */
4131 re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
4132 pstr->bufs_len + 1);
1b2c2628 4133 if (BE (new_array == NULL, 0))
15a7d175 4134 return REG_ESPACE;
1b2c2628 4135 mctx->state_log = new_array;
612546c6
UD
4136 }
4137
4138 /* Then reconstruct the buffers. */
4139 if (pstr->icase)
4140 {
4141#ifdef RE_ENABLE_I18N
3c0fb574 4142 if (pstr->mb_cur_max > 1)
bb3f4825
UD
4143 {
4144 ret = build_wcs_upper_buffer (pstr);
4145 if (BE (ret != REG_NOERROR, 0))
4146 return ret;
4147 }
612546c6
UD
4148 else
4149#endif /* RE_ENABLE_I18N */
15a7d175 4150 build_upper_buffer (pstr);
612546c6
UD
4151 }
4152 else
4153 {
4154#ifdef RE_ENABLE_I18N
3c0fb574 4155 if (pstr->mb_cur_max > 1)
15a7d175 4156 build_wcs_buffer (pstr);
612546c6
UD
4157 else
4158#endif /* RE_ENABLE_I18N */
15a7d175
UD
4159 {
4160 if (pstr->trans != NULL)
4161 re_string_translate_buffer (pstr);
15a7d175 4162 }
612546c6
UD
4163 }
4164 return REG_NOERROR;
4165}
4166
3b0bdc72
UD
4167\f
4168/* Functions for matching context. */
4169
6291ee3c
UD
4170/* Initialize MCTX. */
4171
a9388965 4172static reg_errcode_t
2da42bc0 4173internal_function __attribute_warn_unused_result__
e2f55264 4174match_ctx_init (re_match_context_t *mctx, int eflags, int n)
3b0bdc72
UD
4175{
4176 mctx->eflags = eflags;
612546c6 4177 mctx->match_last = -1;
3b0bdc72 4178 if (n > 0)
a9388965
UD
4179 {
4180 mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
6291ee3c
UD
4181 mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
4182 if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))
15a7d175 4183 return REG_ESPACE;
a9388965 4184 }
56b168be
UD
4185 /* Already zero-ed by the caller.
4186 else
4187 mctx->bkref_ents = NULL;
4188 mctx->nbkref_ents = 0;
4189 mctx->nsub_tops = 0; */
3b0bdc72 4190 mctx->abkref_ents = n;
6291ee3c 4191 mctx->max_mb_elem_len = 1;
6291ee3c 4192 mctx->asub_tops = n;
a9388965 4193 return REG_NOERROR;
3b0bdc72
UD
4194}
4195
6291ee3c
UD
4196/* Clean the entries which depend on the current input in MCTX.
4197 This function must be invoked when the matcher changes the start index
4198 of the input, or changes the input string. */
4199
4200static void
1ba81cea 4201internal_function
e2f55264 4202match_ctx_clean (re_match_context_t *mctx)
6291ee3c
UD
4203{
4204 int st_idx;
4205 for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
4206 {
4207 int sl_idx;
4208 re_sub_match_top_t *top = mctx->sub_tops[st_idx];
4209 for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
4210 {
4211 re_sub_match_last_t *last = top->lasts[sl_idx];
4212 re_free (last->path.array);
4213 re_free (last);
4214 }
4215 re_free (top->lasts);
4216 if (top->path)
4217 {
4218 re_free (top->path->array);
4219 re_free (top->path);
4220 }
4221 free (top);
4222 }
cb265fec
UD
4223
4224 mctx->nsub_tops = 0;
4225 mctx->nbkref_ents = 0;
4226}
4227
4228/* Free all the memory associated with MCTX. */
4229
4230static void
1ba81cea 4231internal_function
e2f55264 4232match_ctx_free (re_match_context_t *mctx)
cb265fec
UD
4233{
4234 /* First, free all the memory associated with MCTX->SUB_TOPS. */
4235 match_ctx_clean (mctx);
4236 re_free (mctx->sub_tops);
4237 re_free (mctx->bkref_ents);
6291ee3c
UD
4238}
4239
4240/* Add a new backreference entry to MCTX.
4241 Note that we assume that caller never call this function with duplicate
4242 entry, and call with STR_IDX which isn't smaller than any existing entry.
4243*/
3b0bdc72 4244
a9388965 4245static reg_errcode_t
2da42bc0 4246internal_function __attribute_warn_unused_result__
e2f55264
UD
4247match_ctx_add_entry (re_match_context_t *mctx, int node, int str_idx, int from,
4248 int to)
3b0bdc72
UD
4249{
4250 if (mctx->nbkref_ents >= mctx->abkref_ents)
4251 {
1b2c2628
UD
4252 struct re_backref_cache_entry* new_entry;
4253 new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
15a7d175 4254 mctx->abkref_ents * 2);
1b2c2628 4255 if (BE (new_entry == NULL, 0))
15a7d175
UD
4256 {
4257 re_free (mctx->bkref_ents);
4258 return REG_ESPACE;
4259 }
1b2c2628 4260 mctx->bkref_ents = new_entry;
3b0bdc72 4261 memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
15a7d175 4262 sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
3b0bdc72
UD
4263 mctx->abkref_ents *= 2;
4264 }
e40a38b3
UD
4265 if (mctx->nbkref_ents > 0
4266 && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
4267 mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
4268
3b0bdc72 4269 mctx->bkref_ents[mctx->nbkref_ents].node = node;
0742e48e
UD
4270 mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
4271 mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
e40a38b3 4272 mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
7db61208
UD
4273
4274 /* This is a cache that saves negative results of check_dst_limits_calc_pos.
4275 If bit N is clear, means that this entry won't epsilon-transition to
4276 an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression. If
4277 it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
4278 such node.
4279
4280 A backreference does not epsilon-transition unless it is empty, so set
4281 to all zeros if FROM != TO. */
4282 mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
4283 = (from == to ? ~0 : 0);
4284
e40a38b3 4285 mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
0742e48e
UD
4286 if (mctx->max_mb_elem_len < to - from)
4287 mctx->max_mb_elem_len = to - from;
a9388965 4288 return REG_NOERROR;
3b0bdc72 4289}
0742e48e 4290
e40a38b3
UD
4291/* Search for the first entry which has the same str_idx, or -1 if none is
4292 found. Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. */
6291ee3c
UD
4293
4294static int
1ba81cea 4295internal_function
e2f55264 4296search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx)
6291ee3c 4297{
e40a38b3
UD
4298 int left, right, mid, last;
4299 last = right = mctx->nbkref_ents;
6291ee3c
UD
4300 for (left = 0; left < right;)
4301 {
4302 mid = (left + right) / 2;
4303 if (mctx->bkref_ents[mid].str_idx < str_idx)
4304 left = mid + 1;
4305 else
4306 right = mid;
4307 }
e40a38b3
UD
4308 if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
4309 return left;
4310 else
4311 return -1;
6291ee3c
UD
4312}
4313
6291ee3c
UD
4314/* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
4315 at STR_IDX. */
4316
4317static reg_errcode_t
2da42bc0 4318internal_function __attribute_warn_unused_result__
e2f55264 4319match_ctx_add_subtop (re_match_context_t *mctx, int node, int str_idx)
6291ee3c
UD
4320{
4321#ifdef DEBUG
4322 assert (mctx->sub_tops != NULL);
4323 assert (mctx->asub_tops > 0);
4324#endif
951d6408 4325 if (BE (mctx->nsub_tops == mctx->asub_tops, 0))
6291ee3c 4326 {
951d6408
UD
4327 int new_asub_tops = mctx->asub_tops * 2;
4328 re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
4329 re_sub_match_top_t *,
4330 new_asub_tops);
6291ee3c
UD
4331 if (BE (new_array == NULL, 0))
4332 return REG_ESPACE;
4333 mctx->sub_tops = new_array;
951d6408 4334 mctx->asub_tops = new_asub_tops;
6291ee3c
UD
4335 }
4336 mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
951d6408 4337 if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0))
6291ee3c
UD
4338 return REG_ESPACE;
4339 mctx->sub_tops[mctx->nsub_tops]->node = node;
4340 mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
4341 return REG_NOERROR;
4342}
4343
4344/* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
4345 at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. */
4346
4347static re_sub_match_last_t *
1ba81cea 4348internal_function
e2f55264 4349match_ctx_add_sublast (re_sub_match_top_t *subtop, int node, int str_idx)
6291ee3c
UD
4350{
4351 re_sub_match_last_t *new_entry;
951d6408 4352 if (BE (subtop->nlasts == subtop->alasts, 0))
6291ee3c 4353 {
951d6408
UD
4354 int new_alasts = 2 * subtop->alasts + 1;
4355 re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
4356 re_sub_match_last_t *,
4357 new_alasts);
6291ee3c
UD
4358 if (BE (new_array == NULL, 0))
4359 return NULL;
4360 subtop->lasts = new_array;
951d6408 4361 subtop->alasts = new_alasts;
6291ee3c
UD
4362 }
4363 new_entry = calloc (1, sizeof (re_sub_match_last_t));
951d6408
UD
4364 if (BE (new_entry != NULL, 1))
4365 {
4366 subtop->lasts[subtop->nlasts] = new_entry;
4367 new_entry->node = node;
4368 new_entry->str_idx = str_idx;
4369 ++subtop->nlasts;
4370 }
6291ee3c
UD
4371 return new_entry;
4372}
4373
0742e48e 4374static void
1ba81cea 4375internal_function
e2f55264
UD
4376sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
4377 re_dfastate_t **limited_sts, int last_node, int last_str_idx)
0742e48e
UD
4378{
4379 sctx->sifted_states = sifted_sts;
4380 sctx->limited_states = limited_sts;
4381 sctx->last_node = last_node;
4382 sctx->last_str_idx = last_str_idx;
0742e48e
UD
4383 re_node_set_init_empty (&sctx->limits);
4384}