]> git.ipfire.org Git - thirdparty/glibc.git/blame - posix/regexec.c
Convert 113 more function definitions to prototype style (files with assertions).
[thirdparty/glibc.git] / posix / regexec.c
CommitLineData
3b0bdc72 1/* Extended regular expression matching and search library.
b168057a 2 Copyright (C) 2002-2015 Free Software Foundation, Inc.
3b0bdc72
UD
3 This file is part of the GNU C Library.
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
59ba27a6
PE
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
3b0bdc72 19
e054f494
RA
20#include <stdint.h>
21
a9388965 22static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
56b168be 23 int n) internal_function;
3ce12656
UD
24static void match_ctx_clean (re_match_context_t *mctx) internal_function;
25static void match_ctx_free (re_match_context_t *cache) internal_function;
a9388965 26static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
e3a87852
UD
27 int str_idx, int from, int to)
28 internal_function;
01ed6ceb 29static int search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx)
e3a87852 30 internal_function;
6291ee3c 31static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node,
3ce12656 32 int str_idx) internal_function;
6291ee3c 33static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
e3a87852
UD
34 int node, int str_idx)
35 internal_function;
0742e48e 36static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
15a7d175 37 re_dfastate_t **limited_sts, int last_node,
d2c38eb3 38 int last_str_idx)
e3a87852 39 internal_function;
a9388965 40static reg_errcode_t re_search_internal (const regex_t *preg,
15a7d175
UD
41 const char *string, int length,
42 int start, int range, int stop,
43 size_t nmatch, regmatch_t pmatch[],
3ce12656 44 int eflags) internal_function;
ac3d553b 45static int re_search_2_stub (struct re_pattern_buffer *bufp,
15a7d175
UD
46 const char *string1, int length1,
47 const char *string2, int length2,
48 int start, int range, struct re_registers *regs,
3ce12656 49 int stop, int ret_len) internal_function;
ac3d553b 50static int re_search_stub (struct re_pattern_buffer *bufp,
15a7d175
UD
51 const char *string, int length, int start,
52 int range, int stop, struct re_registers *regs,
3ce12656 53 int ret_len) internal_function;
ac3d553b 54static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
3ce12656 55 int nregs, int regs_allocated) internal_function;
e3a87852
UD
56static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
57 internal_function;
15bad1a5 58static int check_matching (re_match_context_t *mctx, int fl_longest_match,
76b864c8 59 int *p_match_first) internal_function;
e3a87852
UD
60static int check_halt_state_context (const re_match_context_t *mctx,
61 const re_dfastate_t *state, int idx)
62 internal_function;
76b864c8 63static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
6b6557e8 64 regmatch_t *prev_idx_match, int cur_node,
3ce12656 65 int cur_idx, int nmatch) internal_function;
1b2c2628 66static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
5cf1ec52 67 int str_idx, int dest_node, int nregs,
15a7d175 68 regmatch_t *regs,
76b864c8
UD
69 re_node_set *eps_via_nodes)
70 internal_function;
612546c6 71static reg_errcode_t set_regs (const regex_t *preg,
15a7d175
UD
72 const re_match_context_t *mctx,
73 size_t nmatch, regmatch_t *pmatch,
3ce12656 74 int fl_backtrack) internal_function;
76b864c8
UD
75static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs)
76 internal_function;
485d775d 77
434d3784 78#ifdef RE_ENABLE_I18N
e3a87852 79static int sift_states_iter_mb (const re_match_context_t *mctx,
15a7d175 80 re_sift_context_t *sctx,
76b864c8
UD
81 int node_idx, int str_idx, int max_str_idx)
82 internal_function;
434d3784 83#endif /* RE_ENABLE_I18N */
76b864c8
UD
84static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
85 re_sift_context_t *sctx)
86 internal_function;
87static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
e40a38b3 88 re_sift_context_t *sctx, int str_idx,
76b864c8
UD
89 re_node_set *cur_dest)
90 internal_function;
91static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
15a7d175
UD
92 re_sift_context_t *sctx,
93 int str_idx,
76b864c8
UD
94 re_node_set *dest_nodes)
95 internal_function;
96static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
15a7d175 97 re_node_set *dest_nodes,
76b864c8
UD
98 const re_node_set *candidates)
99 internal_function;
01ed6ceb
UD
100static int check_dst_limits (const re_match_context_t *mctx,
101 re_node_set *limits,
e3a87852
UD
102 int dst_node, int dst_idx, int src_node,
103 int src_idx) internal_function;
01ed6ceb 104static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
e40a38b3 105 int boundaries, int subexp_idx,
76b864c8
UD
106 int from_node, int bkref_idx)
107 internal_function;
01ed6ceb 108static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
e40a38b3
UD
109 int limit, int subexp_idx,
110 int node, int str_idx,
111 int bkref_idx) internal_function;
76b864c8 112static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
15a7d175
UD
113 re_node_set *dest_nodes,
114 const re_node_set *candidates,
115 re_node_set *limits,
116 struct re_backref_cache_entry *bkref_ents,
3ce12656 117 int str_idx) internal_function;
76b864c8 118static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
15a7d175 119 re_sift_context_t *sctx,
76b864c8
UD
120 int str_idx, const re_node_set *candidates)
121 internal_function;
76b864c8
UD
122static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
123 re_dfastate_t **dst,
124 re_dfastate_t **src, int num)
125 internal_function;
4c595adb
UD
126static re_dfastate_t *find_recover_state (reg_errcode_t *err,
127 re_match_context_t *mctx) internal_function;
e3a87852 128static re_dfastate_t *transit_state (reg_errcode_t *err,
15a7d175 129 re_match_context_t *mctx,
3ce12656 130 re_dfastate_t *state) internal_function;
4c595adb
UD
131static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
132 re_match_context_t *mctx,
76b864c8
UD
133 re_dfastate_t *next_state)
134 internal_function;
e3a87852 135static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
6291ee3c 136 re_node_set *cur_nodes,
3ce12656 137 int str_idx) internal_function;
c13c99fa 138#if 0
e3a87852
UD
139static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
140 re_match_context_t *mctx,
76b864c8
UD
141 re_dfastate_t *pstate)
142 internal_function;
c13c99fa 143#endif
434d3784 144#ifdef RE_ENABLE_I18N
e3a87852 145static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
76b864c8
UD
146 re_dfastate_t *pstate)
147 internal_function;
434d3784 148#endif /* RE_ENABLE_I18N */
e3a87852 149static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
76b864c8
UD
150 const re_node_set *nodes)
151 internal_function;
e3a87852 152static reg_errcode_t get_subexp (re_match_context_t *mctx,
76b864c8
UD
153 int bkref_node, int bkref_str_idx)
154 internal_function;
e3a87852 155static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
fe9434bb 156 const re_sub_match_top_t *sub_top,
6291ee3c 157 re_sub_match_last_t *sub_last,
76b864c8
UD
158 int bkref_node, int bkref_str)
159 internal_function;
fe9434bb 160static int find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
3ce12656 161 int subexp_idx, int type) internal_function;
e3a87852 162static reg_errcode_t check_arrival (re_match_context_t *mctx,
6291ee3c
UD
163 state_array_t *path, int top_node,
164 int top_str, int last_node, int last_str,
3ce12656 165 int type) internal_function;
e3a87852 166static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
6291ee3c
UD
167 int str_idx,
168 re_node_set *cur_nodes,
76b864c8
UD
169 re_node_set *next_nodes)
170 internal_function;
6efbd82c 171static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
6291ee3c 172 re_node_set *cur_nodes,
76b864c8
UD
173 int ex_subexp, int type)
174 internal_function;
6efbd82c 175static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
6291ee3c
UD
176 re_node_set *dst_nodes,
177 int target, int ex_subexp,
3ce12656 178 int type) internal_function;
e3a87852 179static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
6291ee3c 180 re_node_set *cur_nodes, int cur_str,
76b864c8
UD
181 int subexp_num, int type)
182 internal_function;
183static int build_trtable (const re_dfa_t *dfa,
ab4b89fe 184 re_dfastate_t *state) internal_function;
434d3784 185#ifdef RE_ENABLE_I18N
c42b4152 186static int check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
76b864c8
UD
187 const re_string_t *input, int idx)
188 internal_function;
434d3784 189# ifdef _LIBC
c202c2c5 190static unsigned int find_collation_sequence_value (const unsigned char *mbs,
76b864c8
UD
191 size_t name_len)
192 internal_function;
434d3784
UD
193# endif /* _LIBC */
194#endif /* RE_ENABLE_I18N */
01ed6ceb 195static int group_nodes_into_DFAstates (const re_dfa_t *dfa,
15a7d175
UD
196 const re_dfastate_t *state,
197 re_node_set *states_node,
2c05d33f 198 bitset_t *states_ch) internal_function;
e3a87852 199static int check_node_accept (const re_match_context_t *mctx,
76b864c8
UD
200 const re_token_t *node, int idx)
201 internal_function;
a445af0b 202static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len)
76b864c8 203 internal_function;
3b0bdc72
UD
204\f
205/* Entry point for POSIX code. */
206
207/* regexec searches for a given pattern, specified by PREG, in the
208 string STRING.
209
210 If NMATCH is zero or REG_NOSUB was set in the cflags argument to
211 `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
212 least NMATCH elements, and we set them to the offsets of the
213 corresponding matched substrings.
214
215 EFLAGS specifies `execution flags' which affect matching: if
216 REG_NOTBOL is set, then ^ does not match at the beginning of the
217 string; if REG_NOTEOL is set, then $ does not match at the end.
218
219 We return 0 if we find a match and REG_NOMATCH if not. */
220
221int
9dd346ff
JM
222regexec (const regex_t *__restrict preg, const char *__restrict string,
223 size_t nmatch, regmatch_t pmatch[], int eflags)
3b0bdc72 224{
a9388965 225 reg_errcode_t err;
6fefb4e0 226 int start, length;
76b864c8 227 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
3f2fb223
UD
228
229 if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
230 return REG_BADPAT;
231
6fefb4e0
UD
232 if (eflags & REG_STARTEND)
233 {
234 start = pmatch[0].rm_so;
235 length = pmatch[0].rm_eo;
236 }
237 else
238 {
239 start = 0;
240 length = strlen (string);
241 }
7b918993
UD
242
243 __libc_lock_lock (dfa->lock);
3b0bdc72 244 if (preg->no_sub)
6fefb4e0
UD
245 err = re_search_internal (preg, string, length, start, length - start,
246 length, 0, NULL, eflags);
3b0bdc72 247 else
6fefb4e0
UD
248 err = re_search_internal (preg, string, length, start, length - start,
249 length, nmatch, pmatch, eflags);
7b918993 250 __libc_lock_unlock (dfa->lock);
a9388965 251 return err != REG_NOERROR;
3b0bdc72 252}
3f2fb223 253
3b0bdc72 254#ifdef _LIBC
3f2fb223
UD
255# include <shlib-compat.h>
256versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
257
258# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
259__typeof__ (__regexec) __compat_regexec;
260
261int
78678039 262attribute_compat_text_section
3f2fb223
UD
263__compat_regexec (const regex_t *__restrict preg,
264 const char *__restrict string, size_t nmatch,
265 regmatch_t pmatch[], int eflags)
266{
267 return regexec (preg, string, nmatch, pmatch,
268 eflags & (REG_NOTBOL | REG_NOTEOL));
269}
270compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
271# endif
3b0bdc72
UD
272#endif
273
274/* Entry points for GNU code. */
275
ac3d553b
UD
276/* re_match, re_search, re_match_2, re_search_2
277
278 The former two functions operate on STRING with length LENGTH,
279 while the later two operate on concatenation of STRING1 and STRING2
280 with lengths LENGTH1 and LENGTH2, respectively.
281
282 re_match() matches the compiled pattern in BUFP against the string,
283 starting at index START.
284
285 re_search() first tries matching at index START, then it tries to match
286 starting from index START + 1, and so on. The last start position tried
287 is START + RANGE. (Thus RANGE = 0 forces re_search to operate the same
288 way as re_match().)
289
290 The parameter STOP of re_{match,search}_2 specifies that no match exceeding
291 the first STOP characters of the concatenation of the strings should be
292 concerned.
293
294 If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
295 and all groups is stroed in REGS. (For the "_2" variants, the offsets are
296 computed relative to the concatenation, not relative to the individual
297 strings.)
298
299 On success, re_match* functions return the length of the match, re_search*
300 return the position of the start of the match. Return value -1 means no
301 match was found and -2 indicates an internal error. */
3b0bdc72
UD
302
303int
9dd346ff
JM
304re_match (struct re_pattern_buffer *bufp, const char *string, int length,
305 int start, struct re_registers *regs)
3b0bdc72 306{
ac3d553b 307 return re_search_stub (bufp, string, length, start, 0, length, regs, 1);
3b0bdc72
UD
308}
309#ifdef _LIBC
310weak_alias (__re_match, re_match)
311#endif
312
ac3d553b 313int
9dd346ff
JM
314re_search (struct re_pattern_buffer *bufp, const char *string, int length,
315 int start, int range, struct re_registers *regs)
ac3d553b
UD
316{
317 return re_search_stub (bufp, string, length, start, range, length, regs, 0);
318}
319#ifdef _LIBC
320weak_alias (__re_search, re_search)
321#endif
3b0bdc72
UD
322
323int
9dd346ff
JM
324re_match_2 (struct re_pattern_buffer *bufp, const char *string1, int length1,
325 const char *string2, int length2, int start,
326 struct re_registers *regs, int stop)
3b0bdc72 327{
ac3d553b 328 return re_search_2_stub (bufp, string1, length1, string2, length2,
15a7d175 329 start, 0, regs, stop, 1);
3b0bdc72
UD
330}
331#ifdef _LIBC
332weak_alias (__re_match_2, re_match_2)
333#endif
334
3b0bdc72 335int
9dd346ff
JM
336re_search_2 (struct re_pattern_buffer *bufp, const char *string1, int length1,
337 const char *string2, int length2, int start, int range,
338 struct re_registers *regs, int stop)
ac3d553b
UD
339{
340 return re_search_2_stub (bufp, string1, length1, string2, length2,
15a7d175 341 start, range, regs, stop, 0);
ac3d553b
UD
342}
343#ifdef _LIBC
344weak_alias (__re_search_2, re_search_2)
345#endif
346
347static int
348re_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs,
15a7d175 349 stop, ret_len)
ac3d553b
UD
350 struct re_pattern_buffer *bufp;
351 const char *string1, *string2;
352 int length1, length2, start, range, stop, ret_len;
353 struct re_registers *regs;
354{
355 const char *str;
356 int rval;
357 int len = length1 + length2;
d044d844 358 char *s = NULL;
ac3d553b 359
42a2c9b5 360 if (BE (length1 < 0 || length2 < 0 || stop < 0 || len < length1, 0))
ac3d553b
UD
361 return -2;
362
363 /* Concatenate the strings. */
364 if (length2 > 0)
365 if (length1 > 0)
366 {
d044d844 367 s = re_malloc (char, len);
15a7d175
UD
368
369 if (BE (s == NULL, 0))
370 return -2;
c42b4152
UD
371#ifdef _LIBC
372 memcpy (__mempcpy (s, string1, length1), string2, length2);
373#else
15a7d175
UD
374 memcpy (s, string1, length1);
375 memcpy (s + length1, string2, length2);
c42b4152 376#endif
15a7d175 377 str = s;
ac3d553b
UD
378 }
379 else
380 str = string2;
381 else
382 str = string1;
383
d044d844
PE
384 rval = re_search_stub (bufp, str, len, start, range, stop, regs, ret_len);
385 re_free (s);
ac3d553b
UD
386 return rval;
387}
388
389/* The parameters have the same meaning as those of re_search.
390 Additional parameters:
391 If RET_LEN is nonzero the length of the match is returned (re_match style);
392 otherwise the position of the match is returned. */
393
394static int
9dd346ff
JM
395internal_function
396re_search_stub (struct re_pattern_buffer *bufp, const char *string, int length,
397 int start, int range, int stop, struct re_registers *regs,
398 int ret_len)
3b0bdc72 399{
a9388965 400 reg_errcode_t result;
3b0bdc72 401 regmatch_t *pmatch;
ac3d553b
UD
402 int nregs, rval;
403 int eflags = 0;
76b864c8 404 re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
ac3d553b
UD
405
406 /* Check for out-of-range. */
a877402c 407 if (BE (start < 0 || start > length, 0))
ac3d553b
UD
408 return -1;
409 if (BE (start + range > length, 0))
410 range = length - start;
a877402c
UD
411 else if (BE (start + range < 0, 0))
412 range = -start;
3b0bdc72 413
7b918993
UD
414 __libc_lock_lock (dfa->lock);
415
3b0bdc72
UD
416 eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
417 eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
418
ac3d553b
UD
419 /* Compile fastmap if we haven't yet. */
420 if (range > 0 && bufp->fastmap != NULL && !bufp->fastmap_accurate)
421 re_compile_fastmap (bufp);
422
423 if (BE (bufp->no_sub, 0))
424 regs = NULL;
3b0bdc72
UD
425
426 /* We need at least 1 register. */
ac3d553b
UD
427 if (regs == NULL)
428 nregs = 1;
429 else if (BE (bufp->regs_allocated == REGS_FIXED &&
15a7d175 430 regs->num_regs < bufp->re_nsub + 1, 0))
ac3d553b
UD
431 {
432 nregs = regs->num_regs;
433 if (BE (nregs < 1, 0))
15a7d175
UD
434 {
435 /* Nothing can be copied to regs. */
436 regs = NULL;
437 nregs = 1;
438 }
ac3d553b
UD
439 }
440 else
441 nregs = bufp->re_nsub + 1;
3b0bdc72 442 pmatch = re_malloc (regmatch_t, nregs);
bc15410e 443 if (BE (pmatch == NULL, 0))
7b918993
UD
444 {
445 rval = -2;
446 goto out;
447 }
3b0bdc72 448
ac3d553b 449 result = re_search_internal (bufp, string, length, start, range, stop,
15a7d175 450 nregs, pmatch, eflags);
3b0bdc72 451
ac3d553b
UD
452 rval = 0;
453
454 /* I hope we needn't fill ther regs with -1's when no match was found. */
455 if (result != REG_NOERROR)
456 rval = -1;
457 else if (regs != NULL)
3b0bdc72 458 {
ac3d553b
UD
459 /* If caller wants register contents data back, copy them. */
460 bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
15a7d175 461 bufp->regs_allocated);
ac3d553b 462 if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
15a7d175 463 rval = -2;
3b0bdc72
UD
464 }
465
ac3d553b 466 if (BE (rval == 0, 1))
3b0bdc72 467 {
ac3d553b 468 if (ret_len)
15a7d175
UD
469 {
470 assert (pmatch[0].rm_so == start);
471 rval = pmatch[0].rm_eo - start;
472 }
ac3d553b 473 else
15a7d175 474 rval = pmatch[0].rm_so;
3b0bdc72 475 }
3b0bdc72 476 re_free (pmatch);
7b918993
UD
477 out:
478 __libc_lock_unlock (dfa->lock);
3b0bdc72
UD
479 return rval;
480}
3b0bdc72 481
ac3d553b 482static unsigned
9dd346ff
JM
483internal_function
484re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, int nregs,
485 int regs_allocated)
3b0bdc72 486{
ac3d553b
UD
487 int rval = REGS_REALLOCATE;
488 int i;
489 int need_regs = nregs + 1;
490 /* We need one extra element beyond `num_regs' for the `-1' marker GNU code
491 uses. */
492
493 /* Have the register data arrays been allocated? */
494 if (regs_allocated == REGS_UNALLOCATED)
44777771 495 { /* No. So allocate them with malloc. */
a96c63ed 496 regs->start = re_malloc (regoff_t, need_regs);
74bc9f14 497 if (BE (regs->start == NULL, 0))
15a7d175 498 return REGS_UNALLOCATED;
74bc9f14
PE
499 regs->end = re_malloc (regoff_t, need_regs);
500 if (BE (regs->end == NULL, 0))
501 {
502 re_free (regs->start);
503 return REGS_UNALLOCATED;
504 }
ac3d553b
UD
505 regs->num_regs = need_regs;
506 }
507 else if (regs_allocated == REGS_REALLOCATE)
508 { /* Yes. If we need more elements than were already
15a7d175
UD
509 allocated, reallocate them. If we need fewer, just
510 leave it alone. */
951d6408 511 if (BE (need_regs > regs->num_regs, 0))
15a7d175 512 {
44777771 513 regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
74bc9f14
PE
514 regoff_t *new_end;
515 if (BE (new_start == NULL, 0))
a96c63ed 516 return REGS_UNALLOCATED;
74bc9f14
PE
517 new_end = re_realloc (regs->end, regoff_t, need_regs);
518 if (BE (new_end == NULL, 0))
519 {
520 re_free (new_start);
521 return REGS_UNALLOCATED;
522 }
44777771
UD
523 regs->start = new_start;
524 regs->end = new_end;
15a7d175
UD
525 regs->num_regs = need_regs;
526 }
ac3d553b
UD
527 }
528 else
529 {
530 assert (regs_allocated == REGS_FIXED);
531 /* This function may not be called with REGS_FIXED and nregs too big. */
532 assert (regs->num_regs >= nregs);
533 rval = REGS_FIXED;
534 }
535
536 /* Copy the regs. */
537 for (i = 0; i < nregs; ++i)
538 {
539 regs->start[i] = pmatch[i].rm_so;
540 regs->end[i] = pmatch[i].rm_eo;
541 }
542 for ( ; i < regs->num_regs; ++i)
543 regs->start[i] = regs->end[i] = -1;
544
545 return rval;
3b0bdc72 546}
3b0bdc72
UD
547
548/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
549 ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
550 this memory for recording register information. STARTS and ENDS
551 must be allocated using the malloc library routine, and must each
552 be at least NUM_REGS * sizeof (regoff_t) bytes long.
553
554 If NUM_REGS == 0, then subsequent matches should allocate their own
555 register data.
556
557 Unless this function is called, the first search or match using
558 PATTERN_BUFFER will allocate its own register data, without
559 freeing the old data. */
560
561void
9dd346ff
JM
562re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs,
563 unsigned num_regs, regoff_t *starts, regoff_t *ends)
3b0bdc72
UD
564{
565 if (num_regs)
566 {
567 bufp->regs_allocated = REGS_REALLOCATE;
568 regs->num_regs = num_regs;
569 regs->start = starts;
570 regs->end = ends;
571 }
572 else
573 {
574 bufp->regs_allocated = REGS_UNALLOCATED;
575 regs->num_regs = 0;
576 regs->start = regs->end = (regoff_t *) 0;
577 }
578}
579#ifdef _LIBC
580weak_alias (__re_set_registers, re_set_registers)
581#endif
582\f
583/* Entry points compatible with 4.2 BSD regex library. We don't define
584 them unless specifically requested. */
585
586#if defined _REGEX_RE_COMP || defined _LIBC
587int
588# ifdef _LIBC
589weak_function
590# endif
591re_exec (s)
592 const char *s;
593{
594 return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
595}
596#endif /* _REGEX_RE_COMP */
597\f
3b0bdc72
UD
598/* Internal entry point. */
599
600/* Searches for a compiled pattern PREG in the string STRING, whose
601 length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same
602 mingings with regexec. START, and RANGE have the same meanings
603 with re_search.
a9388965
UD
604 Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
605 otherwise return the error code.
3b0bdc72
UD
606 Note: We assume front end functions already check ranges.
607 (START + RANGE >= 0 && START + RANGE <= LENGTH) */
608
a9388965 609static reg_errcode_t
2da42bc0 610__attribute_warn_unused_result__
ac3d553b 611re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
15a7d175 612 eflags)
3b0bdc72
UD
613 const regex_t *preg;
614 const char *string;
ac3d553b 615 int length, start, range, stop, eflags;
3b0bdc72
UD
616 size_t nmatch;
617 regmatch_t pmatch[];
618{
a9388965 619 reg_errcode_t err;
76b864c8 620 const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
612546c6 621 int left_lim, right_lim, incr;
bb677c95 622 int fl_longest_match, match_first, match_kind, match_last = -1;
963d8d78 623 int extra_nmatch;
bb677c95 624 int sb, ch;
e3a87852
UD
625#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
626 re_match_context_t mctx = { .dfa = dfa };
627#else
3b0bdc72 628 re_match_context_t mctx;
e3a87852 629#endif
bb677c95
UD
630 char *fastmap = (preg->fastmap != NULL && preg->fastmap_accurate
631 && range && !preg->can_be_null) ? preg->fastmap : NULL;
997470b3 632 RE_TRANSLATE_TYPE t = preg->translate;
3b0bdc72 633
e3a87852
UD
634#if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
635 memset (&mctx, '\0', sizeof (re_match_context_t));
636 mctx.dfa = dfa;
637#endif
638
963d8d78
UD
639 extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
640 nmatch -= extra_nmatch;
641
3b0bdc72 642 /* Check if the DFA haven't been compiled. */
bc15410e 643 if (BE (preg->used == 0 || dfa->init_state == NULL
15a7d175
UD
644 || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
645 || dfa->init_state_begbuf == NULL, 0))
a9388965 646 return REG_NOMATCH;
3b0bdc72 647
c70f81dd
UD
648#ifdef DEBUG
649 /* We assume front-end functions already check them. */
650 assert (start + range >= 0 && start + range <= length);
651#endif
652
653 /* If initial states with non-begbuf contexts have no elements,
654 the regex must be anchored. If preg->newline_anchor is set,
655 we'll never use init_state_nl, so do not check it. */
656 if (dfa->init_state->nodes.nelem == 0
657 && dfa->init_state_word->nodes.nelem == 0
658 && (dfa->init_state_nl->nodes.nelem == 0
659 || !preg->newline_anchor))
660 {
661 if (start != 0 && start + range != 0)
2da42bc0 662 return REG_NOMATCH;
c70f81dd
UD
663 start = range = 0;
664 }
665
3b0bdc72 666 /* We must check the longest matching, if nmatch > 0. */
6291ee3c 667 fl_longest_match = (nmatch != 0 || dfa->nbackref);
3b0bdc72 668
56b168be 669 err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
f0c7c524 670 preg->translate, preg->syntax & RE_ICASE, dfa);
612546c6 671 if (BE (err != REG_NOERROR, 0))
1b2c2628 672 goto free_return;
56b168be
UD
673 mctx.input.stop = stop;
674 mctx.input.raw_stop = stop;
675 mctx.input.newline_anchor = preg->newline_anchor;
612546c6 676
56b168be 677 err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
612546c6 678 if (BE (err != REG_NOERROR, 0))
1b2c2628 679 goto free_return;
612546c6 680
3b0bdc72
UD
681 /* We will log all the DFA states through which the dfa pass,
682 if nmatch > 1, or this dfa has "multibyte node", which is a
683 back-reference or a node which can accept multibyte character or
684 multi character collating element. */
685 if (nmatch > 1 || dfa->has_mb_node)
a9388965 686 {
eadc09f2
PE
687 /* Avoid overflow. */
688 if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= mctx.input.bufs_len, 0))
689 {
690 err = REG_ESPACE;
691 goto free_return;
692 }
693
56b168be 694 mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
612546c6 695 if (BE (mctx.state_log == NULL, 0))
15a7d175
UD
696 {
697 err = REG_ESPACE;
698 goto free_return;
699 }
a9388965 700 }
3b0bdc72 701 else
612546c6 702 mctx.state_log = NULL;
3b0bdc72 703
612546c6 704 match_first = start;
56b168be
UD
705 mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
706 : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
612546c6 707
3b0bdc72 708 /* Check incrementally whether of not the input string match. */
612546c6
UD
709 incr = (range < 0) ? -1 : 1;
710 left_lim = (range < 0) ? start + range : start;
711 right_lim = (range < 0) ? start : start + range;
3c0fb574 712 sb = dfa->mb_cur_max == 1;
bb677c95
UD
713 match_kind =
714 (fastmap
715 ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)
716 | (range >= 0 ? 2 : 0)
717 | (t != NULL ? 1 : 0))
718 : 8);
719
720 for (;; match_first += incr)
3b0bdc72 721 {
bb677c95
UD
722 err = REG_NOMATCH;
723 if (match_first < left_lim || right_lim < match_first)
724 goto free_return;
725
726 /* Advance as rapidly as possible through the string, until we
727 find a plausible place to start matching. This may be done
728 with varying efficiency, so there are various possibilities:
729 only the most common of them are specialized, in order to
730 save on code size. We use a switch statement for speed. */
731 switch (match_kind)
1b2c2628 732 {
bb677c95
UD
733 case 8:
734 /* No fastmap. */
735 break;
736
737 case 7:
738 /* Fastmap with single-byte translation, match forward. */
739 while (BE (match_first < right_lim, 1)
740 && !fastmap[t[(unsigned char) string[match_first]]])
741 ++match_first;
742 goto forward_match_found_start_or_reached_end;
743
744 case 6:
745 /* Fastmap without translation, match forward. */
746 while (BE (match_first < right_lim, 1)
747 && !fastmap[(unsigned char) string[match_first]])
748 ++match_first;
749
750 forward_match_found_start_or_reached_end:
751 if (BE (match_first == right_lim, 0))
1b2c2628 752 {
bb677c95
UD
753 ch = match_first >= length
754 ? 0 : (unsigned char) string[match_first];
755 if (!fastmap[t ? t[ch] : ch])
756 goto free_return;
1b2c2628 757 }
bb677c95
UD
758 break;
759
760 case 4:
761 case 5:
762 /* Fastmap without multi-byte translation, match backwards. */
763 while (match_first >= left_lim)
1b2c2628 764 {
bb677c95
UD
765 ch = match_first >= length
766 ? 0 : (unsigned char) string[match_first];
767 if (fastmap[t ? t[ch] : ch])
768 break;
769 --match_first;
770 }
771 if (match_first < left_lim)
772 goto free_return;
773 break;
1b2c2628 774
bb677c95
UD
775 default:
776 /* In this case, we can't determine easily the current byte,
777 since it might be a component byte of a multibyte
778 character. Then we use the constructed buffer instead. */
779 for (;;)
780 {
781 /* If MATCH_FIRST is out of the valid range, reconstruct the
782 buffers. */
783 unsigned int offset = match_first - mctx.input.raw_mbs_idx;
784 if (BE (offset >= (unsigned int) mctx.input.valid_raw_len, 0))
1b2c2628 785 {
bb677c95
UD
786 err = re_string_reconstruct (&mctx.input, match_first,
787 eflags);
788 if (BE (err != REG_NOERROR, 0))
789 goto free_return;
790
791 offset = match_first - mctx.input.raw_mbs_idx;
1b2c2628 792 }
bb677c95
UD
793 /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
794 Note that MATCH_FIRST must not be smaller than 0. */
795 ch = (match_first >= length
796 ? 0 : re_string_byte_at (&mctx.input, offset));
797 if (fastmap[ch])
1b2c2628 798 break;
bb677c95
UD
799 match_first += incr;
800 if (match_first < left_lim || match_first > right_lim)
2da42bc0
UD
801 {
802 err = REG_NOMATCH;
803 goto free_return;
804 }
1b2c2628 805 }
bb677c95 806 break;
1b2c2628 807 }
612546c6 808
1b2c2628 809 /* Reconstruct the buffers so that the matcher can assume that
fe9434bb 810 the matching starts from the beginning of the buffer. */
56b168be 811 err = re_string_reconstruct (&mctx.input, match_first, eflags);
1b2c2628
UD
812 if (BE (err != REG_NOERROR, 0))
813 goto free_return;
bb677c95 814
3b0bdc72 815#ifdef RE_ENABLE_I18N
bb677c95
UD
816 /* Don't consider this char as a possible match start if it part,
817 yet isn't the head, of a multibyte character. */
818 if (!sb && !re_string_first_byte (&mctx.input, 0))
819 continue;
3b0bdc72 820#endif
bb677c95
UD
821
822 /* It seems to be appropriate one, then use the matcher. */
823 /* We assume that the matching starts from 0. */
824 mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
825 match_last = check_matching (&mctx, fl_longest_match,
826 range >= 0 ? &match_first : NULL);
827 if (match_last != -1)
1b2c2628 828 {
bb677c95 829 if (BE (match_last == -2, 0))
1b2c2628 830 {
bb677c95
UD
831 err = REG_ESPACE;
832 goto free_return;
833 }
834 else
835 {
836 mctx.match_last = match_last;
837 if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
1b2c2628 838 {
bb677c95
UD
839 re_dfastate_t *pstate = mctx.state_log[match_last];
840 mctx.last_node = check_halt_state_context (&mctx, pstate,
841 match_last);
1b2c2628 842 }
bb677c95
UD
843 if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
844 || dfa->nbackref)
6291ee3c 845 {
bb677c95
UD
846 err = prune_impossible_nodes (&mctx);
847 if (err == REG_NOERROR)
848 break;
849 if (BE (err != REG_NOMATCH, 0))
850 goto free_return;
851 match_last = -1;
6291ee3c 852 }
bb677c95
UD
853 else
854 break; /* We found a match. */
1b2c2628
UD
855 }
856 }
bb677c95
UD
857
858 match_ctx_clean (&mctx);
3b0bdc72
UD
859 }
860
bb677c95
UD
861#ifdef DEBUG
862 assert (match_last != -1);
863 assert (err == REG_NOERROR);
864#endif
865
3b0bdc72 866 /* Set pmatch[] if we need. */
bb677c95 867 if (nmatch > 0)
3b0bdc72
UD
868 {
869 int reg_idx;
870
871 /* Initialize registers. */
97fd3a30 872 for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
15a7d175 873 pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
3b0bdc72
UD
874
875 /* Set the points where matching start/end. */
612546c6 876 pmatch[0].rm_so = 0;
6291ee3c 877 pmatch[0].rm_eo = mctx.match_last;
3b0bdc72
UD
878
879 if (!preg->no_sub && nmatch > 1)
15a7d175 880 {
15a7d175
UD
881 err = set_regs (preg, &mctx, nmatch, pmatch,
882 dfa->has_plural_match && dfa->nbackref > 0);
883 if (BE (err != REG_NOERROR, 0))
884 goto free_return;
885 }
612546c6
UD
886
887 /* At last, add the offset to the each registers, since we slided
97fd3a30
UD
888 the buffers so that we could assume that the matching starts
889 from 0. */
612546c6 890 for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
15a7d175
UD
891 if (pmatch[reg_idx].rm_so != -1)
892 {
c0d5034e 893#ifdef RE_ENABLE_I18N
56b168be 894 if (BE (mctx.input.offsets_needed != 0, 0))
bb3f4825 895 {
01ed6ceb
UD
896 pmatch[reg_idx].rm_so =
897 (pmatch[reg_idx].rm_so == mctx.input.valid_len
898 ? mctx.input.valid_raw_len
899 : mctx.input.offsets[pmatch[reg_idx].rm_so]);
900 pmatch[reg_idx].rm_eo =
901 (pmatch[reg_idx].rm_eo == mctx.input.valid_len
902 ? mctx.input.valid_raw_len
903 : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
bb3f4825 904 }
c0d5034e 905#else
56b168be 906 assert (mctx.input.offsets_needed == 0);
c0d5034e 907#endif
15a7d175
UD
908 pmatch[reg_idx].rm_so += match_first;
909 pmatch[reg_idx].rm_eo += match_first;
910 }
963d8d78
UD
911 for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
912 {
913 pmatch[nmatch + reg_idx].rm_so = -1;
914 pmatch[nmatch + reg_idx].rm_eo = -1;
915 }
c06a6956
UD
916
917 if (dfa->subexp_map)
2da42bc0
UD
918 for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
919 if (dfa->subexp_map[reg_idx] != reg_idx)
920 {
921 pmatch[reg_idx + 1].rm_so
922 = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
923 pmatch[reg_idx + 1].rm_eo
924 = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
925 }
3b0bdc72 926 }
bb677c95 927
1b2c2628 928 free_return:
612546c6 929 re_free (mctx.state_log);
3b0bdc72
UD
930 if (dfa->nbackref)
931 match_ctx_free (&mctx);
56b168be 932 re_string_destruct (&mctx.input);
1b2c2628 933 return err;
3b0bdc72
UD
934}
935
6291ee3c 936static reg_errcode_t
9dd346ff
JM
937internal_function __attribute_warn_unused_result__
938prune_impossible_nodes (re_match_context_t *mctx)
6291ee3c 939{
76b864c8 940 const re_dfa_t *const dfa = mctx->dfa;
6291ee3c
UD
941 int halt_node, match_last;
942 reg_errcode_t ret;
6291ee3c
UD
943 re_dfastate_t **sifted_states;
944 re_dfastate_t **lim_states = NULL;
945 re_sift_context_t sctx;
946#ifdef DEBUG
947 assert (mctx->state_log != NULL);
948#endif
949 match_last = mctx->match_last;
950 halt_node = mctx->last_node;
4cd02867
PE
951
952 /* Avoid overflow. */
953 if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= match_last, 0))
954 return REG_ESPACE;
955
6291ee3c
UD
956 sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
957 if (BE (sifted_states == NULL, 0))
958 {
959 ret = REG_ESPACE;
960 goto free_return;
961 }
962 if (dfa->nbackref)
963 {
964 lim_states = re_malloc (re_dfastate_t *, match_last + 1);
965 if (BE (lim_states == NULL, 0))
966 {
967 ret = REG_ESPACE;
968 goto free_return;
969 }
970 while (1)
971 {
972 memset (lim_states, '\0',
973 sizeof (re_dfastate_t *) * (match_last + 1));
6291ee3c 974 sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
d2c38eb3 975 match_last);
e3a87852 976 ret = sift_states_backward (mctx, &sctx);
6291ee3c
UD
977 re_node_set_free (&sctx.limits);
978 if (BE (ret != REG_NOERROR, 0))
979 goto free_return;
980 if (sifted_states[0] != NULL || lim_states[0] != NULL)
981 break;
982 do
983 {
984 --match_last;
985 if (match_last < 0)
986 {
987 ret = REG_NOMATCH;
988 goto free_return;
989 }
97fd3a30
UD
990 } while (mctx->state_log[match_last] == NULL
991 || !mctx->state_log[match_last]->halt);
e3a87852 992 halt_node = check_halt_state_context (mctx,
6291ee3c 993 mctx->state_log[match_last],
e3a87852 994 match_last);
6291ee3c
UD
995 }
996 ret = merge_state_array (dfa, sifted_states, lim_states,
997 match_last + 1);
998 re_free (lim_states);
999 lim_states = NULL;
1000 if (BE (ret != REG_NOERROR, 0))
1001 goto free_return;
1002 }
1003 else
1004 {
d2c38eb3 1005 sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
e3a87852 1006 ret = sift_states_backward (mctx, &sctx);
6291ee3c
UD
1007 re_node_set_free (&sctx.limits);
1008 if (BE (ret != REG_NOERROR, 0))
1009 goto free_return;
76c7f2cd
UD
1010 if (sifted_states[0] == NULL)
1011 {
1012 ret = REG_NOMATCH;
1013 goto free_return;
1014 }
6291ee3c
UD
1015 }
1016 re_free (mctx->state_log);
1017 mctx->state_log = sifted_states;
1018 sifted_states = NULL;
1019 mctx->last_node = halt_node;
1020 mctx->match_last = match_last;
1021 ret = REG_NOERROR;
1022 free_return:
1023 re_free (sifted_states);
1024 re_free (lim_states);
1025 return ret;
1026}
1027
a9388965 1028/* Acquire an initial state and return it.
3b0bdc72
UD
1029 We must select appropriate initial state depending on the context,
1030 since initial states may have constraints like "\<", "^", etc.. */
1031
bb3f4825 1032static inline re_dfastate_t *
e2f55264
UD
1033__attribute ((always_inline)) internal_function
1034acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
1035 int idx)
3b0bdc72 1036{
76b864c8 1037 const re_dfa_t *const dfa = mctx->dfa;
3b0bdc72
UD
1038 if (dfa->init_state->has_constraint)
1039 {
1040 unsigned int context;
56b168be 1041 context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
3b0bdc72 1042 if (IS_WORD_CONTEXT (context))
15a7d175 1043 return dfa->init_state_word;
3b0bdc72 1044 else if (IS_ORDINARY_CONTEXT (context))
15a7d175 1045 return dfa->init_state;
3b0bdc72 1046 else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
15a7d175 1047 return dfa->init_state_begbuf;
3b0bdc72 1048 else if (IS_NEWLINE_CONTEXT (context))
15a7d175 1049 return dfa->init_state_nl;
3b0bdc72 1050 else if (IS_BEGBUF_CONTEXT (context))
15a7d175
UD
1051 {
1052 /* It is relatively rare case, then calculate on demand. */
4c595adb
UD
1053 return re_acquire_state_context (err, dfa,
1054 dfa->init_state->entrance_nodes,
1055 context);
15a7d175 1056 }
3b0bdc72 1057 else
15a7d175
UD
1058 /* Must not happen? */
1059 return dfa->init_state;
3b0bdc72
UD
1060 }
1061 else
1062 return dfa->init_state;
1063}
1064
1065/* Check whether the regular expression match input string INPUT or not,
a9388965
UD
1066 and return the index where the matching end, return -1 if not match,
1067 or return -2 in case of an error.
612546c6 1068 FL_LONGEST_MATCH means we want the POSIX longest matching.
15bad1a5
UD
1069 If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
1070 next place where we may want to try matching.
612546c6
UD
1071 Note that the matcher assume that the maching starts from the current
1072 index of the buffer. */
3b0bdc72
UD
1073
1074static int
2da42bc0 1075internal_function __attribute_warn_unused_result__
e2f55264
UD
1076check_matching (re_match_context_t *mctx, int fl_longest_match,
1077 int *p_match_first)
3b0bdc72 1078{
76b864c8 1079 const re_dfa_t *const dfa = mctx->dfa;
a9388965 1080 reg_errcode_t err;
612546c6
UD
1081 int match = 0;
1082 int match_last = -1;
56b168be 1083 int cur_str_idx = re_string_cur_idx (&mctx->input);
3b0bdc72 1084 re_dfastate_t *cur_state;
4c595adb
UD
1085 int at_init_state = p_match_first != NULL;
1086 int next_start_idx = cur_str_idx;
3b0bdc72 1087
4c595adb 1088 err = REG_NOERROR;
e3a87852 1089 cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
4c595adb 1090 /* An initial state must not be NULL (invalid). */
bc15410e 1091 if (BE (cur_state == NULL, 0))
4c595adb
UD
1092 {
1093 assert (err == REG_ESPACE);
1094 return -2;
1095 }
0742e48e 1096
4c595adb 1097 if (mctx->state_log != NULL)
6291ee3c 1098 {
4c595adb 1099 mctx->state_log[cur_str_idx] = cur_state;
6291ee3c 1100
4c595adb
UD
1101 /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
1102 later. E.g. Processing back references. */
1103 if (BE (dfa->nbackref, 0))
bb3f4825 1104 {
4c595adb
UD
1105 at_init_state = 0;
1106 err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
bb3f4825
UD
1107 if (BE (err != REG_NOERROR, 0))
1108 return err;
4c595adb
UD
1109
1110 if (cur_state->has_backref)
1111 {
1112 err = transit_state_bkref (mctx, &cur_state->nodes);
1113 if (BE (err != REG_NOERROR, 0))
2da42bc0 1114 return err;
4c595adb 1115 }
bb3f4825 1116 }
0742e48e
UD
1117 }
1118
3b0bdc72 1119 /* If the RE accepts NULL string. */
bb3f4825 1120 if (BE (cur_state->halt, 0))
3b0bdc72
UD
1121 {
1122 if (!cur_state->has_constraint
e3a87852 1123 || check_halt_state_context (mctx, cur_state, cur_str_idx))
15a7d175
UD
1124 {
1125 if (!fl_longest_match)
1126 return cur_str_idx;
1127 else
1128 {
1129 match_last = cur_str_idx;
1130 match = 1;
1131 }
1132 }
3b0bdc72
UD
1133 }
1134
56b168be 1135 while (!re_string_eoi (&mctx->input))
3b0bdc72 1136 {
15bad1a5 1137 re_dfastate_t *old_state = cur_state;
bb677c95
UD
1138 int next_char_idx = re_string_cur_idx (&mctx->input) + 1;
1139
8887a920
UD
1140 if ((BE (next_char_idx >= mctx->input.bufs_len, 0)
1141 && mctx->input.bufs_len < mctx->input.len)
2da42bc0
UD
1142 || (BE (next_char_idx >= mctx->input.valid_len, 0)
1143 && mctx->input.valid_len < mctx->input.len))
1144 {
a445af0b 1145 err = extend_buffers (mctx, next_char_idx + 1);
2da42bc0 1146 if (BE (err != REG_NOERROR, 0))
bb677c95
UD
1147 {
1148 assert (err == REG_ESPACE);
1149 return -2;
1150 }
2da42bc0 1151 }
bb677c95 1152
e3a87852 1153 cur_state = transit_state (&err, mctx, cur_state);
4c595adb
UD
1154 if (mctx->state_log != NULL)
1155 cur_state = merge_state_with_log (&err, mctx, cur_state);
15bad1a5 1156
4c595adb 1157 if (cur_state == NULL)
15a7d175 1158 {
4c595adb
UD
1159 /* Reached the invalid state or an error. Try to recover a valid
1160 state using the state log, if available and if we have not
1161 already found a valid (even if not the longest) match. */
15a7d175
UD
1162 if (BE (err != REG_NOERROR, 0))
1163 return -2;
4c595adb
UD
1164
1165 if (mctx->state_log == NULL
1166 || (match && !fl_longest_match)
1167 || (cur_state = find_recover_state (&err, mctx)) == NULL)
15a7d175 1168 break;
4c595adb
UD
1169 }
1170
bb677c95 1171 if (BE (at_init_state, 0))
4c595adb
UD
1172 {
1173 if (old_state == cur_state)
bb677c95 1174 next_start_idx = next_char_idx;
c13c99fa 1175 else
4c595adb 1176 at_init_state = 0;
15a7d175 1177 }
3b0bdc72 1178
4c595adb 1179 if (cur_state->halt)
15a7d175 1180 {
4c595adb 1181 /* Reached a halt state.
15a7d175
UD
1182 Check the halt state can satisfy the current context. */
1183 if (!cur_state->has_constraint
e3a87852 1184 || check_halt_state_context (mctx, cur_state,
56b168be 1185 re_string_cur_idx (&mctx->input)))
15a7d175
UD
1186 {
1187 /* We found an appropriate halt state. */
56b168be 1188 match_last = re_string_cur_idx (&mctx->input);
15a7d175 1189 match = 1;
bb677c95
UD
1190
1191 /* We found a match, do not modify match_first below. */
1192 p_match_first = NULL;
15a7d175
UD
1193 if (!fl_longest_match)
1194 break;
1195 }
1196 }
bb677c95 1197 }
15bad1a5 1198
bb677c95 1199 if (p_match_first)
4c595adb 1200 *p_match_first += next_start_idx;
15bad1a5 1201
3b0bdc72
UD
1202 return match_last;
1203}
1204
1205/* Check NODE match the current context. */
1206
e2f55264
UD
1207static int
1208internal_function
1209check_halt_node_context (const re_dfa_t *dfa, int node, unsigned int context)
3b0bdc72 1210{
3b0bdc72 1211 re_token_type_t type = dfa->nodes[node].type;
485d775d
UD
1212 unsigned int constraint = dfa->nodes[node].constraint;
1213 if (type != END_OF_RE)
3b0bdc72 1214 return 0;
485d775d
UD
1215 if (!constraint)
1216 return 1;
1217 if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
3b0bdc72
UD
1218 return 0;
1219 return 1;
1220}
1221
1222/* Check the halt state STATE match the current context.
1223 Return 0 if not match, if the node, STATE has, is a halt node and
1224 match the context, return the node. */
1225
1226static int
1ba81cea 1227internal_function
e2f55264
UD
1228check_halt_state_context (const re_match_context_t *mctx,
1229 const re_dfastate_t *state, int idx)
3b0bdc72 1230{
3b0bdc72
UD
1231 int i;
1232 unsigned int context;
1233#ifdef DEBUG
1234 assert (state->halt);
1235#endif
56b168be 1236 context = re_string_context_at (&mctx->input, idx, mctx->eflags);
3b0bdc72 1237 for (i = 0; i < state->nodes.nelem; ++i)
e3a87852 1238 if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
3b0bdc72
UD
1239 return state->nodes.elems[i];
1240 return 0;
1241}
1242
a9388965
UD
1243/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
1244 corresponding to the DFA).
1245 Return the destination node, and update EPS_VIA_NODES, return -1 in case
1246 of errors. */
3b0bdc72
UD
1247
1248static int
e2f55264
UD
1249internal_function
1250proceed_next_node (const re_match_context_t *mctx, int nregs, regmatch_t *regs,
1251 int *pidx, int node, re_node_set *eps_via_nodes,
1252 struct re_fail_stack_t *fs)
3b0bdc72 1253{
76b864c8 1254 const re_dfa_t *const dfa = mctx->dfa;
2d87db5b 1255 int i, err;
3b0bdc72
UD
1256 if (IS_EPSILON_NODE (dfa->nodes[node].type))
1257 {
485d775d 1258 re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
5cf1ec52
UD
1259 re_node_set *edests = &dfa->edests[node];
1260 int dest_node;
a9388965 1261 err = re_node_set_insert (eps_via_nodes, node);
bc15410e 1262 if (BE (err < 0, 0))
44777771 1263 return -2;
5cf1ec52
UD
1264 /* Pick up a valid destination, or return -1 if none is found. */
1265 for (dest_node = -1, i = 0; i < edests->nelem; ++i)
15a7d175 1266 {
5cf1ec52 1267 int candidate = edests->elems[i];
15a7d175
UD
1268 if (!re_node_set_contains (cur_nodes, candidate))
1269 continue;
2da42bc0 1270 if (dest_node == -1)
5cf1ec52
UD
1271 dest_node = candidate;
1272
2da42bc0 1273 else
5cf1ec52
UD
1274 {
1275 /* In order to avoid infinite loop like "(a*)*", return the second
2da42bc0 1276 epsilon-transition if the first was already considered. */
5cf1ec52 1277 if (re_node_set_contains (eps_via_nodes, dest_node))
2da42bc0 1278 return candidate;
5cf1ec52
UD
1279
1280 /* Otherwise, push the second epsilon-transition on the fail stack. */
1281 else if (fs != NULL
1282 && push_fail_stack (fs, *pidx, candidate, nregs, regs,
2da42bc0 1283 eps_via_nodes))
5cf1ec52
UD
1284 return -2;
1285
1286 /* We know we are going to exit. */
1287 break;
1288 }
1289 }
1290 return dest_node;
3b0bdc72
UD
1291 }
1292 else
1293 {
485d775d 1294 int naccepted = 0;
3b0bdc72 1295 re_token_type_t type = dfa->nodes[node].type;
3b0bdc72 1296
434d3784 1297#ifdef RE_ENABLE_I18N
02f3550c 1298 if (dfa->nodes[node].accept_mb)
56b168be 1299 naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
434d3784
UD
1300 else
1301#endif /* RE_ENABLE_I18N */
1302 if (type == OP_BACK_REF)
15a7d175 1303 {
d8f73de8 1304 int subexp_idx = dfa->nodes[node].opr.idx + 1;
15a7d175
UD
1305 naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
1306 if (fs != NULL)
1307 {
1308 if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
1309 return -1;
1310 else if (naccepted)
1311 {
56b168be 1312 char *buf = (char *) re_string_get_buffer (&mctx->input);
6291ee3c
UD
1313 if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1314 naccepted) != 0)
15a7d175
UD
1315 return -1;
1316 }
1317 }
1318
1319 if (naccepted == 0)
1320 {
2d87db5b 1321 int dest_node;
15a7d175
UD
1322 err = re_node_set_insert (eps_via_nodes, node);
1323 if (BE (err < 0, 0))
1324 return -2;
1325 dest_node = dfa->edests[node].elems[0];
1326 if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1327 dest_node))
1328 return dest_node;
1329 }
1330 }
3b0bdc72
UD
1331
1332 if (naccepted != 0
e3a87852 1333 || check_node_accept (mctx, dfa->nodes + node, *pidx))
15a7d175 1334 {
2d87db5b 1335 int dest_node = dfa->nexts[node];
15a7d175
UD
1336 *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
1337 if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
1338 || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1339 dest_node)))
1340 return -1;
1341 re_node_set_empty (eps_via_nodes);
1342 return dest_node;
1343 }
3b0bdc72 1344 }
a3022b82
UD
1345 return -1;
1346}
1347
1348static reg_errcode_t
2da42bc0 1349internal_function __attribute_warn_unused_result__
e2f55264
UD
1350push_fail_stack (struct re_fail_stack_t *fs, int str_idx, int dest_node,
1351 int nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
a3022b82
UD
1352{
1353 reg_errcode_t err;
1354 int num = fs->num++;
1355 if (fs->num == fs->alloc)
1356 {
1b2c2628 1357 struct re_fail_stack_ent_t *new_array;
1b2c2628 1358 new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)
44777771 1359 * fs->alloc * 2));
1b2c2628 1360 if (new_array == NULL)
15a7d175 1361 return REG_ESPACE;
44777771 1362 fs->alloc *= 2;
1b2c2628 1363 fs->stack = new_array;
a3022b82
UD
1364 }
1365 fs->stack[num].idx = str_idx;
5cf1ec52 1366 fs->stack[num].node = dest_node;
a3022b82 1367 fs->stack[num].regs = re_malloc (regmatch_t, nregs);
5a299c96
UD
1368 if (fs->stack[num].regs == NULL)
1369 return REG_ESPACE;
a3022b82
UD
1370 memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
1371 err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
1372 return err;
1373}
1b2c2628 1374
a3022b82 1375static int
e2f55264
UD
1376internal_function
1377pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs,
1378 regmatch_t *regs, re_node_set *eps_via_nodes)
a3022b82
UD
1379{
1380 int num = --fs->num;
1381 assert (num >= 0);
44777771 1382 *pidx = fs->stack[num].idx;
a3022b82
UD
1383 memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
1384 re_node_set_free (eps_via_nodes);
485d775d 1385 re_free (fs->stack[num].regs);
a3022b82
UD
1386 *eps_via_nodes = fs->stack[num].eps_via_nodes;
1387 return fs->stack[num].node;
3b0bdc72
UD
1388}
1389
1390/* Set the positions where the subexpressions are starts/ends to registers
1391 PMATCH.
1392 Note: We assume that pmatch[0] is already set, and
97fd3a30 1393 pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. */
3b0bdc72 1394
a9388965 1395static reg_errcode_t
2da42bc0 1396internal_function __attribute_warn_unused_result__
e2f55264
UD
1397set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
1398 regmatch_t *pmatch, int fl_backtrack)
3b0bdc72 1399{
76b864c8 1400 const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
963d8d78 1401 int idx, cur_node;
3b0bdc72 1402 re_node_set eps_via_nodes;
a3022b82 1403 struct re_fail_stack_t *fs;
6b6557e8
UD
1404 struct re_fail_stack_t fs_body = { 0, 2, NULL };
1405 regmatch_t *prev_idx_match;
2d87db5b 1406 int prev_idx_match_malloced = 0;
6b6557e8 1407
3b0bdc72
UD
1408#ifdef DEBUG
1409 assert (nmatch > 1);
612546c6 1410 assert (mctx->state_log != NULL);
3b0bdc72 1411#endif
a3022b82
UD
1412 if (fl_backtrack)
1413 {
1414 fs = &fs_body;
1415 fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
6b6557e8
UD
1416 if (fs->stack == NULL)
1417 return REG_ESPACE;
a3022b82
UD
1418 }
1419 else
1420 fs = NULL;
6b6557e8 1421
3b0bdc72 1422 cur_node = dfa->init_node;
3b0bdc72 1423 re_node_set_init_empty (&eps_via_nodes);
6b6557e8 1424
2d87db5b
UD
1425 if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
1426 prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));
1427 else
1428 {
1429 prev_idx_match = re_malloc (regmatch_t, nmatch);
1430 if (prev_idx_match == NULL)
1431 {
1432 free_fail_stack_return (fs);
1433 return REG_ESPACE;
1434 }
1435 prev_idx_match_malloced = 1;
1436 }
963d8d78 1437 memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
6b6557e8 1438
3b0bdc72
UD
1439 for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
1440 {
963d8d78 1441 update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
6b6557e8 1442
a3022b82 1443 if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
15a7d175
UD
1444 {
1445 int reg_idx;
1446 if (fs)
1447 {
1448 for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
1449 if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
1450 break;
1451 if (reg_idx == nmatch)
1452 {
1453 re_node_set_free (&eps_via_nodes);
2d87db5b
UD
1454 if (prev_idx_match_malloced)
1455 re_free (prev_idx_match);
15a7d175
UD
1456 return free_fail_stack_return (fs);
1457 }
1458 cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1459 &eps_via_nodes);
1460 }
1461 else
1462 {
1463 re_node_set_free (&eps_via_nodes);
2d87db5b
UD
1464 if (prev_idx_match_malloced)
1465 re_free (prev_idx_match);
15a7d175
UD
1466 return REG_NOERROR;
1467 }
1468 }
3b0bdc72
UD
1469
1470 /* Proceed to next node. */
e3a87852 1471 cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
15a7d175 1472 &eps_via_nodes, fs);
a3022b82 1473
bc15410e 1474 if (BE (cur_node < 0, 0))
15a7d175 1475 {
44777771
UD
1476 if (BE (cur_node == -2, 0))
1477 {
1478 re_node_set_free (&eps_via_nodes);
2d87db5b
UD
1479 if (prev_idx_match_malloced)
1480 re_free (prev_idx_match);
44777771
UD
1481 free_fail_stack_return (fs);
1482 return REG_ESPACE;
1483 }
15a7d175
UD
1484 if (fs)
1485 cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1486 &eps_via_nodes);
1487 else
1488 {
1489 re_node_set_free (&eps_via_nodes);
2d87db5b
UD
1490 if (prev_idx_match_malloced)
1491 re_free (prev_idx_match);
15a7d175
UD
1492 return REG_NOMATCH;
1493 }
1494 }
3b0bdc72
UD
1495 }
1496 re_node_set_free (&eps_via_nodes);
2d87db5b
UD
1497 if (prev_idx_match_malloced)
1498 re_free (prev_idx_match);
485d775d
UD
1499 return free_fail_stack_return (fs);
1500}
1501
1502static reg_errcode_t
1ba81cea 1503internal_function
e2f55264 1504free_fail_stack_return (struct re_fail_stack_t *fs)
485d775d
UD
1505{
1506 if (fs)
1507 {
1508 int fs_idx;
1509 for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
15a7d175
UD
1510 {
1511 re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
1512 re_free (fs->stack[fs_idx].regs);
1513 }
485d775d
UD
1514 re_free (fs->stack);
1515 }
a9388965 1516 return REG_NOERROR;
3b0bdc72
UD
1517}
1518
81c64d40 1519static void
1ba81cea 1520internal_function
e2f55264
UD
1521update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
1522 regmatch_t *prev_idx_match, int cur_node, int cur_idx, int nmatch)
81c64d40
UD
1523{
1524 int type = dfa->nodes[cur_node].type;
81c64d40
UD
1525 if (type == OP_OPEN_SUBEXP)
1526 {
6b6557e8
UD
1527 int reg_num = dfa->nodes[cur_node].opr.idx + 1;
1528
81c64d40 1529 /* We are at the first node of this sub expression. */
6b6557e8
UD
1530 if (reg_num < nmatch)
1531 {
1532 pmatch[reg_num].rm_so = cur_idx;
1533 pmatch[reg_num].rm_eo = -1;
1534 }
81c64d40
UD
1535 }
1536 else if (type == OP_CLOSE_SUBEXP)
6b6557e8
UD
1537 {
1538 int reg_num = dfa->nodes[cur_node].opr.idx + 1;
1539 if (reg_num < nmatch)
1540 {
1541 /* We are at the last node of this sub expression. */
1542 if (pmatch[reg_num].rm_so < cur_idx)
1543 {
1544 pmatch[reg_num].rm_eo = cur_idx;
1545 /* This is a non-empty match or we are not inside an optional
1546 subexpression. Accept this right away. */
1547 memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1548 }
1549 else
1550 {
1551 if (dfa->nodes[cur_node].opt_subexp
1552 && prev_idx_match[reg_num].rm_so != -1)
1553 /* We transited through an empty match for an optional
1554 subexpression, like (a?)*, and this is not the subexp's
1555 first match. Copy back the old content of the registers
1556 so that matches of an inner subexpression are undone as
1557 well, like in ((a?))*. */
1558 memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
1559 else
1560 /* We completed a subexpression, but it may be part of
1561 an optional one, so do not update PREV_IDX_MATCH. */
1562 pmatch[reg_num].rm_eo = cur_idx;
1563 }
1564 }
1565 }
0742e48e 1566}
81c64d40 1567
0742e48e 1568/* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
612546c6
UD
1569 and sift the nodes in each states according to the following rules.
1570 Updated state_log will be wrote to STATE_LOG.
3b0bdc72
UD
1571
1572 Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
1573 1. When STR_IDX == MATCH_LAST(the last index in the state_log):
15a7d175
UD
1574 If `a' isn't the LAST_NODE and `a' can't epsilon transit to
1575 the LAST_NODE, we throw away the node `a'.
612546c6 1576 2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
15a7d175
UD
1577 string `s' and transit to `b':
1578 i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1579 away the node `a'.
1580 ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
bb3f4825 1581 thrown away, we throw away the node `a'.
d0b96fc4 1582 3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
15a7d175
UD
1583 i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1584 node `a'.
bb3f4825 1585 ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
15a7d175 1586 we throw away the node `a'. */
3b0bdc72
UD
1587
1588#define STATE_NODE_CONTAINS(state,node) \
1589 ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1590
a9388965 1591static reg_errcode_t
1ba81cea 1592internal_function
e2f55264 1593sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
3b0bdc72 1594{
a9388965 1595 reg_errcode_t err;
0742e48e
UD
1596 int null_cnt = 0;
1597 int str_idx = sctx->last_str_idx;
1598 re_node_set cur_dest;
3b0bdc72
UD
1599
1600#ifdef DEBUG
612546c6 1601 assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
3b0bdc72 1602#endif
3b0bdc72
UD
1603
1604 /* Build sifted state_log[str_idx]. It has the nodes which can epsilon
1605 transit to the last_node and the last_node itself. */
0742e48e 1606 err = re_node_set_init_1 (&cur_dest, sctx->last_node);
bc15410e 1607 if (BE (err != REG_NOERROR, 0))
a9388965 1608 return err;
e3a87852 1609 err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
0742e48e 1610 if (BE (err != REG_NOERROR, 0))
1b2c2628 1611 goto free_return;
3b0bdc72
UD
1612
1613 /* Then check each states in the state_log. */
612546c6 1614 while (str_idx > 0)
3b0bdc72 1615 {
3b0bdc72 1616 /* Update counters. */
0742e48e
UD
1617 null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
1618 if (null_cnt > mctx->max_mb_elem_len)
15a7d175
UD
1619 {
1620 memset (sctx->sifted_states, '\0',
1621 sizeof (re_dfastate_t *) * str_idx);
1622 re_node_set_free (&cur_dest);
1623 return REG_NOERROR;
1624 }
0742e48e 1625 re_node_set_empty (&cur_dest);
3b0bdc72 1626 --str_idx;
15a7d175 1627
e40a38b3
UD
1628 if (mctx->state_log[str_idx])
1629 {
1630 err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
2da42bc0 1631 if (BE (err != REG_NOERROR, 0))
e40a38b3 1632 goto free_return;
15a7d175 1633 }
3b0bdc72 1634
0742e48e 1635 /* Add all the nodes which satisfy the following conditions:
15a7d175
UD
1636 - It can epsilon transit to a node in CUR_DEST.
1637 - It is in CUR_SRC.
1638 And update state_log. */
e3a87852 1639 err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
0742e48e 1640 if (BE (err != REG_NOERROR, 0))
15a7d175 1641 goto free_return;
3b0bdc72 1642 }
1b2c2628
UD
1643 err = REG_NOERROR;
1644 free_return:
0742e48e 1645 re_node_set_free (&cur_dest);
1b2c2628 1646 return err;
3b0bdc72
UD
1647}
1648
e40a38b3 1649static reg_errcode_t
2da42bc0 1650internal_function __attribute_warn_unused_result__
e2f55264
UD
1651build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
1652 int str_idx, re_node_set *cur_dest)
e40a38b3 1653{
76b864c8
UD
1654 const re_dfa_t *const dfa = mctx->dfa;
1655 const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
e40a38b3
UD
1656 int i;
1657
1658 /* Then build the next sifted state.
1659 We build the next sifted state on `cur_dest', and update
1660 `sifted_states[str_idx]' with `cur_dest'.
1661 Note:
1662 `cur_dest' is the sifted state from `state_log[str_idx + 1]'.
5cf1ec52
UD
1663 `cur_src' points the node_set of the old `state_log[str_idx]'
1664 (with the epsilon nodes pre-filtered out). */
e40a38b3
UD
1665 for (i = 0; i < cur_src->nelem; i++)
1666 {
1667 int prev_node = cur_src->elems[i];
1668 int naccepted = 0;
e40a38b3
UD
1669 int ret;
1670
a334319f 1671#ifdef DEBUG
02f3550c 1672 re_token_type_t type = dfa->nodes[prev_node].type;
5cf1ec52
UD
1673 assert (!IS_EPSILON_NODE (type));
1674#endif
e40a38b3
UD
1675#ifdef RE_ENABLE_I18N
1676 /* If the node may accept `multi byte'. */
02f3550c 1677 if (dfa->nodes[prev_node].accept_mb)
e40a38b3
UD
1678 naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
1679 str_idx, sctx->last_str_idx);
1680#endif /* RE_ENABLE_I18N */
1681
1682 /* We don't check backreferences here.
1683 See update_cur_sifted_state(). */
1684 if (!naccepted
1685 && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
1686 && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
1687 dfa->nexts[prev_node]))
1688 naccepted = 1;
1689
1690 if (naccepted == 0)
1691 continue;
1692
1693 if (sctx->limits.nelem)
1694 {
1695 int to_idx = str_idx + naccepted;
1696 if (check_dst_limits (mctx, &sctx->limits,
1697 dfa->nexts[prev_node], to_idx,
1698 prev_node, str_idx))
1699 continue;
1700 }
1701 ret = re_node_set_insert (cur_dest, prev_node);
1702 if (BE (ret == -1, 0))
1703 return REG_ESPACE;
1704 }
1705
1706 return REG_NOERROR;
1707}
1708
3b0bdc72
UD
1709/* Helper functions. */
1710
25337753 1711static reg_errcode_t
e2f55264
UD
1712internal_function
1713clean_state_log_if_needed (re_match_context_t *mctx, int next_state_log_idx)
3b0bdc72
UD
1714{
1715 int top = mctx->state_log_top;
612546c6 1716
8887a920
UD
1717 if ((next_state_log_idx >= mctx->input.bufs_len
1718 && mctx->input.bufs_len < mctx->input.len)
56b168be
UD
1719 || (next_state_log_idx >= mctx->input.valid_len
1720 && mctx->input.valid_len < mctx->input.len))
612546c6
UD
1721 {
1722 reg_errcode_t err;
a445af0b 1723 err = extend_buffers (mctx, next_state_log_idx + 1);
612546c6 1724 if (BE (err != REG_NOERROR, 0))
15a7d175 1725 return err;
612546c6
UD
1726 }
1727
3b0bdc72
UD
1728 if (top < next_state_log_idx)
1729 {
612546c6 1730 memset (mctx->state_log + top + 1, '\0',
15a7d175 1731 sizeof (re_dfastate_t *) * (next_state_log_idx - top));
3b0bdc72
UD
1732 mctx->state_log_top = next_state_log_idx;
1733 }
612546c6 1734 return REG_NOERROR;
3b0bdc72
UD
1735}
1736
1b2c2628 1737static reg_errcode_t
1ba81cea 1738internal_function
e2f55264
UD
1739merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,
1740 re_dfastate_t **src, int num)
3b0bdc72 1741{
0742e48e
UD
1742 int st_idx;
1743 reg_errcode_t err;
1744 for (st_idx = 0; st_idx < num; ++st_idx)
1745 {
1746 if (dst[st_idx] == NULL)
15a7d175 1747 dst[st_idx] = src[st_idx];
0742e48e 1748 else if (src[st_idx] != NULL)
15a7d175
UD
1749 {
1750 re_node_set merged_set;
1751 err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
1752 &src[st_idx]->nodes);
1753 if (BE (err != REG_NOERROR, 0))
1754 return err;
1755 dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
1756 re_node_set_free (&merged_set);
1757 if (BE (err != REG_NOERROR, 0))
1758 return err;
1759 }
0742e48e
UD
1760 }
1761 return REG_NOERROR;
3b0bdc72
UD
1762}
1763
0742e48e 1764static reg_errcode_t
1ba81cea 1765internal_function
e2f55264
UD
1766update_cur_sifted_state (const re_match_context_t *mctx,
1767 re_sift_context_t *sctx, int str_idx,
1768 re_node_set *dest_nodes)
3b0bdc72 1769{
76b864c8 1770 const re_dfa_t *const dfa = mctx->dfa;
c0c9615c 1771 reg_errcode_t err = REG_NOERROR;
0742e48e 1772 const re_node_set *candidates;
e40a38b3 1773 candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
15a7d175 1774 : &mctx->state_log[str_idx]->nodes);
0742e48e 1775
e40a38b3
UD
1776 if (dest_nodes->nelem == 0)
1777 sctx->sifted_states[str_idx] = NULL;
1778 else
485d775d 1779 {
e40a38b3
UD
1780 if (candidates)
1781 {
1782 /* At first, add the nodes which can epsilon transit to a node in
1783 DEST_NODE. */
1784 err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1785 if (BE (err != REG_NOERROR, 0))
1786 return err;
0742e48e 1787
e40a38b3
UD
1788 /* Then, check the limitations in the current sift_context. */
1789 if (sctx->limits.nelem)
1790 {
1791 err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
1792 mctx->bkref_ents, str_idx);
1793 if (BE (err != REG_NOERROR, 0))
1794 return err;
1795 }
1796 }
1797
1798 sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
0742e48e 1799 if (BE (err != REG_NOERROR, 0))
15a7d175 1800 return err;
0742e48e
UD
1801 }
1802
e40a38b3 1803 if (candidates && mctx->state_log[str_idx]->has_backref)
0742e48e 1804 {
d2c38eb3 1805 err = sift_states_bkref (mctx, sctx, str_idx, candidates);
0742e48e 1806 if (BE (err != REG_NOERROR, 0))
15a7d175 1807 return err;
0742e48e
UD
1808 }
1809 return REG_NOERROR;
3b0bdc72
UD
1810}
1811
a9388965 1812static reg_errcode_t
2da42bc0 1813internal_function __attribute_warn_unused_result__
e2f55264
UD
1814add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
1815 const re_node_set *candidates)
3b0bdc72 1816{
5cf1ec52
UD
1817 reg_errcode_t err = REG_NOERROR;
1818 int i;
0742e48e 1819
5cf1ec52 1820 re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
0742e48e
UD
1821 if (BE (err != REG_NOERROR, 0))
1822 return err;
5cf1ec52
UD
1823
1824 if (!state->inveclosure.alloc)
3b0bdc72 1825 {
5cf1ec52 1826 err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
0742e48e 1827 if (BE (err != REG_NOERROR, 0))
2da42bc0 1828 return REG_ESPACE;
5cf1ec52 1829 for (i = 0; i < dest_nodes->nelem; i++)
2da42bc0
UD
1830 {
1831 err = re_node_set_merge (&state->inveclosure,
1832 dfa->inveclosures + dest_nodes->elems[i]);
1833 if (BE (err != REG_NOERROR, 0))
1834 return REG_ESPACE;
1835 }
0742e48e 1836 }
5cf1ec52
UD
1837 return re_node_set_add_intersect (dest_nodes, candidates,
1838 &state->inveclosure);
0742e48e 1839}
3b0bdc72 1840
0742e48e 1841static reg_errcode_t
e2f55264
UD
1842internal_function
1843sub_epsilon_src_nodes (const re_dfa_t *dfa, int node, re_node_set *dest_nodes,
1844 const re_node_set *candidates)
0742e48e
UD
1845{
1846 int ecl_idx;
1847 reg_errcode_t err;
1848 re_node_set *inv_eclosure = dfa->inveclosures + node;
1849 re_node_set except_nodes;
1850 re_node_set_init_empty (&except_nodes);
1851 for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1852 {
15a7d175
UD
1853 int cur_node = inv_eclosure->elems[ecl_idx];
1854 if (cur_node == node)
1855 continue;
1856 if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
1857 {
1858 int edst1 = dfa->edests[cur_node].elems[0];
1859 int edst2 = ((dfa->edests[cur_node].nelem > 1)
1860 ? dfa->edests[cur_node].elems[1] : -1);
1861 if ((!re_node_set_contains (inv_eclosure, edst1)
1862 && re_node_set_contains (dest_nodes, edst1))
1863 || (edst2 > 0
1864 && !re_node_set_contains (inv_eclosure, edst2)
1865 && re_node_set_contains (dest_nodes, edst2)))
1866 {
1867 err = re_node_set_add_intersect (&except_nodes, candidates,
1868 dfa->inveclosures + cur_node);
1869 if (BE (err != REG_NOERROR, 0))
1870 {
1871 re_node_set_free (&except_nodes);
1872 return err;
1873 }
1874 }
1875 }
0742e48e
UD
1876 }
1877 for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1878 {
15a7d175
UD
1879 int cur_node = inv_eclosure->elems[ecl_idx];
1880 if (!re_node_set_contains (&except_nodes, cur_node))
1881 {
1882 int idx = re_node_set_contains (dest_nodes, cur_node) - 1;
1883 re_node_set_remove_at (dest_nodes, idx);
1884 }
0742e48e
UD
1885 }
1886 re_node_set_free (&except_nodes);
1887 return REG_NOERROR;
1888}
1889
a3022b82 1890static int
1ba81cea 1891internal_function
e2f55264
UD
1892check_dst_limits (const re_match_context_t *mctx, re_node_set *limits,
1893 int dst_node, int dst_idx, int src_node, int src_idx)
a3022b82 1894{
76b864c8 1895 const re_dfa_t *const dfa = mctx->dfa;
a3022b82
UD
1896 int lim_idx, src_pos, dst_pos;
1897
e40a38b3
UD
1898 int dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
1899 int src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
a3022b82
UD
1900 for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1901 {
485d775d 1902 int subexp_idx;
a3022b82
UD
1903 struct re_backref_cache_entry *ent;
1904 ent = mctx->bkref_ents + limits->elems[lim_idx];
d8f73de8 1905 subexp_idx = dfa->nodes[ent->node].opr.idx;
a3022b82 1906
e3a87852 1907 dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
e40a38b3
UD
1908 subexp_idx, dst_node, dst_idx,
1909 dst_bkref_idx);
e3a87852 1910 src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
e40a38b3
UD
1911 subexp_idx, src_node, src_idx,
1912 src_bkref_idx);
a3022b82
UD
1913
1914 /* In case of:
15a7d175
UD
1915 <src> <dst> ( <subexp> )
1916 ( <subexp> ) <src> <dst>
1917 ( <subexp1> <src> <subexp2> <dst> <subexp3> ) */
a3022b82 1918 if (src_pos == dst_pos)
15a7d175 1919 continue; /* This is unrelated limitation. */
a3022b82 1920 else
15a7d175 1921 return 1;
a3022b82
UD
1922 }
1923 return 0;
1924}
1925
1926static int
1ba81cea 1927internal_function
e2f55264
UD
1928check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
1929 int subexp_idx, int from_node, int bkref_idx)
a3022b82 1930{
76b864c8
UD
1931 const re_dfa_t *const dfa = mctx->dfa;
1932 const re_node_set *eclosures = dfa->eclosures + from_node;
1cef7b3c 1933 int node_idx;
d0b96fc4 1934
d0b96fc4
UD
1935 /* Else, we are on the boundary: examine the nodes on the epsilon
1936 closure. */
1cef7b3c
UD
1937 for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
1938 {
1939 int node = eclosures->elems[node_idx];
d0b96fc4
UD
1940 switch (dfa->nodes[node].type)
1941 {
1942 case OP_BACK_REF:
d8f73de8
UD
1943 if (bkref_idx != -1)
1944 {
1945 struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
1946 do
2da42bc0 1947 {
d8f73de8 1948 int dst, cpos;
d0b96fc4 1949
d8f73de8
UD
1950 if (ent->node != node)
1951 continue;
d0b96fc4 1952
2c05d33f
UD
1953 if (subexp_idx < BITSET_WORD_BITS
1954 && !(ent->eps_reachable_subexps_map
1955 & ((bitset_word_t) 1 << subexp_idx)))
d8f73de8 1956 continue;
d0b96fc4 1957
d8f73de8
UD
1958 /* Recurse trying to reach the OP_OPEN_SUBEXP and
1959 OP_CLOSE_SUBEXP cases below. But, if the
1960 destination node is the same node as the source
1961 node, don't recurse because it would cause an
1962 infinite loop: a regex that exhibits this behavior
1963 is ()\1*\1* */
1964 dst = dfa->edests[node].elems[0];
1965 if (dst == from_node)
1966 {
1967 if (boundaries & 1)
2da42bc0 1968 return -1;
d8f73de8 1969 else /* if (boundaries & 2) */
2da42bc0 1970 return 0;
d8f73de8 1971 }
7db61208 1972
d8f73de8
UD
1973 cpos =
1974 check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
1975 dst, bkref_idx);
1976 if (cpos == -1 /* && (boundaries & 1) */)
1977 return -1;
1978 if (cpos == 0 && (boundaries & 2))
1979 return 0;
1980
2c05d33f
UD
1981 if (subexp_idx < BITSET_WORD_BITS)
1982 ent->eps_reachable_subexps_map
1983 &= ~((bitset_word_t) 1 << subexp_idx);
2da42bc0 1984 }
d8f73de8
UD
1985 while (ent++->more);
1986 }
1987 break;
f0c7c524 1988
d0b96fc4 1989 case OP_OPEN_SUBEXP:
e40a38b3 1990 if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
d0b96fc4
UD
1991 return -1;
1992 break;
f0c7c524 1993
d0b96fc4 1994 case OP_CLOSE_SUBEXP:
e40a38b3 1995 if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
d0b96fc4
UD
1996 return 0;
1997 break;
1998
1999 default:
15a7d175
UD
2000 break;
2001 }
a3022b82 2002 }
d0b96fc4 2003
e40a38b3
UD
2004 return (boundaries & 2) ? 1 : 0;
2005}
2006
2007static int
1ba81cea 2008internal_function
e2f55264
UD
2009check_dst_limits_calc_pos (const re_match_context_t *mctx, int limit,
2010 int subexp_idx, int from_node, int str_idx,
2011 int bkref_idx)
e40a38b3
UD
2012{
2013 struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
2014 int boundaries;
2015
2016 /* If we are outside the range of the subexpression, return -1 or 1. */
2017 if (str_idx < lim->subexp_from)
2018 return -1;
2019
2020 if (lim->subexp_to < str_idx)
d0b96fc4 2021 return 1;
e40a38b3
UD
2022
2023 /* If we are within the subexpression, return 0. */
2024 boundaries = (str_idx == lim->subexp_from);
2025 boundaries |= (str_idx == lim->subexp_to) << 1;
2026 if (boundaries == 0)
d0b96fc4 2027 return 0;
e40a38b3
UD
2028
2029 /* Else, examine epsilon closure. */
2030 return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
2031 from_node, bkref_idx);
a3022b82
UD
2032}
2033
0742e48e
UD
2034/* Check the limitations of sub expressions LIMITS, and remove the nodes
2035 which are against limitations from DEST_NODES. */
2036
2037static reg_errcode_t
1ba81cea 2038internal_function
e2f55264
UD
2039check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
2040 const re_node_set *candidates, re_node_set *limits,
2041 struct re_backref_cache_entry *bkref_ents, int str_idx)
0742e48e
UD
2042{
2043 reg_errcode_t err;
2044 int node_idx, lim_idx;
2045
2046 for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
2047 {
485d775d 2048 int subexp_idx;
0742e48e
UD
2049 struct re_backref_cache_entry *ent;
2050 ent = bkref_ents + limits->elems[lim_idx];
2051
2052 if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
15a7d175 2053 continue; /* This is unrelated limitation. */
0742e48e 2054
d8f73de8 2055 subexp_idx = dfa->nodes[ent->node].opr.idx;
0742e48e 2056 if (ent->subexp_to == str_idx)
15a7d175
UD
2057 {
2058 int ops_node = -1;
2059 int cls_node = -1;
2060 for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2061 {
2062 int node = dest_nodes->elems[node_idx];
46bf9de7 2063 re_token_type_t type = dfa->nodes[node].type;
15a7d175
UD
2064 if (type == OP_OPEN_SUBEXP
2065 && subexp_idx == dfa->nodes[node].opr.idx)
2066 ops_node = node;
2067 else if (type == OP_CLOSE_SUBEXP
2068 && subexp_idx == dfa->nodes[node].opr.idx)
2069 cls_node = node;
2070 }
2071
2072 /* Check the limitation of the open subexpression. */
2073 /* Note that (ent->subexp_to = str_idx != ent->subexp_from). */
2074 if (ops_node >= 0)
2075 {
46bf9de7
UD
2076 err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
2077 candidates);
15a7d175
UD
2078 if (BE (err != REG_NOERROR, 0))
2079 return err;
2080 }
46bf9de7 2081
15a7d175 2082 /* Check the limitation of the close subexpression. */
46bf9de7
UD
2083 if (cls_node >= 0)
2084 for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2085 {
2086 int node = dest_nodes->elems[node_idx];
2087 if (!re_node_set_contains (dfa->inveclosures + node,
2088 cls_node)
2089 && !re_node_set_contains (dfa->eclosures + node,
2090 cls_node))
2091 {
2092 /* It is against this limitation.
2093 Remove it form the current sifted state. */
2094 err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2095 candidates);
2096 if (BE (err != REG_NOERROR, 0))
2097 return err;
2098 --node_idx;
2099 }
2100 }
15a7d175 2101 }
0742e48e 2102 else /* (ent->subexp_to != str_idx) */
15a7d175
UD
2103 {
2104 for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2105 {
2106 int node = dest_nodes->elems[node_idx];
46bf9de7 2107 re_token_type_t type = dfa->nodes[node].type;
15a7d175
UD
2108 if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
2109 {
2110 if (subexp_idx != dfa->nodes[node].opr.idx)
2111 continue;
d8f73de8
UD
2112 /* It is against this limitation.
2113 Remove it form the current sifted state. */
2114 err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2115 candidates);
2116 if (BE (err != REG_NOERROR, 0))
2117 return err;
15a7d175
UD
2118 }
2119 }
2120 }
0742e48e
UD
2121 }
2122 return REG_NOERROR;
2123}
2124
0742e48e 2125static reg_errcode_t
2da42bc0 2126internal_function __attribute_warn_unused_result__
e2f55264
UD
2127sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
2128 int str_idx, const re_node_set *candidates)
0742e48e 2129{
76b864c8 2130 const re_dfa_t *const dfa = mctx->dfa;
0742e48e 2131 reg_errcode_t err;
0742e48e
UD
2132 int node_idx, node;
2133 re_sift_context_t local_sctx;
e40a38b3
UD
2134 int first_idx = search_cur_bkref_entry (mctx, str_idx);
2135
2136 if (first_idx == -1)
2137 return REG_NOERROR;
2138
0742e48e
UD
2139 local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized. */
2140
2141 for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
2142 {
e40a38b3 2143 int enabled_idx;
0742e48e 2144 re_token_type_t type;
e40a38b3 2145 struct re_backref_cache_entry *entry;
0742e48e
UD
2146 node = candidates->elems[node_idx];
2147 type = dfa->nodes[node].type;
0742e48e
UD
2148 /* Avoid infinite loop for the REs like "()\1+". */
2149 if (node == sctx->last_node && str_idx == sctx->last_str_idx)
15a7d175 2150 continue;
e40a38b3
UD
2151 if (type != OP_BACK_REF)
2152 continue;
2153
2154 entry = mctx->bkref_ents + first_idx;
2155 enabled_idx = first_idx;
2156 do
15a7d175 2157 {
2d87db5b
UD
2158 int subexp_len;
2159 int to_idx;
2160 int dst_node;
2161 int ret;
e40a38b3
UD
2162 re_dfastate_t *cur_state;
2163
2164 if (entry->node != node)
2165 continue;
2166 subexp_len = entry->subexp_to - entry->subexp_from;
2167 to_idx = str_idx + subexp_len;
2168 dst_node = (subexp_len ? dfa->nexts[node]
2169 : dfa->edests[node].elems[0]);
2170
2171 if (to_idx > sctx->last_str_idx
2172 || sctx->sifted_states[to_idx] == NULL
2173 || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
2174 || check_dst_limits (mctx, &sctx->limits, node,
2175 str_idx, dst_node, to_idx))
2176 continue;
2177
2178 if (local_sctx.sifted_states == NULL)
15a7d175 2179 {
e40a38b3
UD
2180 local_sctx = *sctx;
2181 err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
2182 if (BE (err != REG_NOERROR, 0))
2183 goto free_return;
2184 }
2185 local_sctx.last_node = node;
2186 local_sctx.last_str_idx = str_idx;
2d87db5b
UD
2187 ret = re_node_set_insert (&local_sctx.limits, enabled_idx);
2188 if (BE (ret < 0, 0))
e40a38b3
UD
2189 {
2190 err = REG_ESPACE;
2191 goto free_return;
15a7d175 2192 }
e40a38b3
UD
2193 cur_state = local_sctx.sifted_states[str_idx];
2194 err = sift_states_backward (mctx, &local_sctx);
2195 if (BE (err != REG_NOERROR, 0))
2196 goto free_return;
2197 if (sctx->limited_states != NULL)
2198 {
2199 err = merge_state_array (dfa, sctx->limited_states,
2200 local_sctx.sifted_states,
2201 str_idx + 1);
2202 if (BE (err != REG_NOERROR, 0))
2203 goto free_return;
2204 }
2205 local_sctx.sifted_states[str_idx] = cur_state;
2206 re_node_set_remove (&local_sctx.limits, enabled_idx);
2207
2208 /* mctx->bkref_ents may have changed, reload the pointer. */
2da42bc0 2209 entry = mctx->bkref_ents + enabled_idx;
15a7d175 2210 }
e40a38b3 2211 while (enabled_idx++, entry++->more);
0742e48e 2212 }
1b2c2628
UD
2213 err = REG_NOERROR;
2214 free_return:
0742e48e
UD
2215 if (local_sctx.sifted_states != NULL)
2216 {
0742e48e
UD
2217 re_node_set_free (&local_sctx.limits);
2218 }
2219
1b2c2628 2220 return err;
0742e48e
UD
2221}
2222
2223
2224#ifdef RE_ENABLE_I18N
2225static int
1ba81cea 2226internal_function
e2f55264
UD
2227sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
2228 int node_idx, int str_idx, int max_str_idx)
0742e48e 2229{
c42b4152 2230 const re_dfa_t *const dfa = mctx->dfa;
0742e48e
UD
2231 int naccepted;
2232 /* Check the node can accept `multi byte'. */
56b168be 2233 naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
0742e48e
UD
2234 if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
2235 !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
15a7d175 2236 dfa->nexts[node_idx]))
0742e48e 2237 /* The node can't accept the `multi byte', or the
bb3f4825 2238 destination was already thrown away, then the node
0742e48e
UD
2239 could't accept the current input `multi byte'. */
2240 naccepted = 0;
2241 /* Otherwise, it is sure that the node could accept
2242 `naccepted' bytes input. */
2243 return naccepted;
2244}
2245#endif /* RE_ENABLE_I18N */
2246
3b0bdc72
UD
2247\f
2248/* Functions for state transition. */
2249
2250/* Return the next state to which the current state STATE will transit by
2251 accepting the current input byte, and update STATE_LOG if necessary.
2252 If STATE can accept a multibyte char/collating element/back reference
2253 update the destination of STATE_LOG. */
2254
2255static re_dfastate_t *
2da42bc0 2256internal_function __attribute_warn_unused_result__
e2f55264
UD
2257transit_state (reg_errcode_t *err, re_match_context_t *mctx,
2258 re_dfastate_t *state)
3b0bdc72 2259{
58845a70 2260 re_dfastate_t **trtable;
3b0bdc72
UD
2261 unsigned char ch;
2262
bb677c95
UD
2263#ifdef RE_ENABLE_I18N
2264 /* If the current state can accept multibyte. */
2265 if (BE (state->accept_mb, 0))
612546c6 2266 {
bb677c95 2267 *err = transit_state_mb (mctx, state);
612546c6 2268 if (BE (*err != REG_NOERROR, 0))
15a7d175 2269 return NULL;
612546c6 2270 }
434d3784 2271#endif /* RE_ENABLE_I18N */
3b0bdc72 2272
4c595adb 2273 /* Then decide the next state with the single byte. */
ab4b89fe
UD
2274#if 0
2275 if (0)
2276 /* don't use transition table */
2277 return transit_state_sb (err, mctx, state);
2278#endif
2279
2280 /* Use transition table */
2281 ch = re_string_fetch_byte (&mctx->input);
2282 for (;;)
4c595adb 2283 {
4c595adb 2284 trtable = state->trtable;
ab4b89fe
UD
2285 if (BE (trtable != NULL, 1))
2286 return trtable[ch];
2287
2288 trtable = state->word_trtable;
2289 if (BE (trtable != NULL, 1))
2da42bc0 2290 {
4c595adb
UD
2291 unsigned int context;
2292 context
2293 = re_string_context_at (&mctx->input,
2294 re_string_cur_idx (&mctx->input) - 1,
2295 mctx->eflags);
2296 if (IS_WORD_CONTEXT (context))
2297 return trtable[ch + SBC_MAX];
c13c99fa 2298 else
4c595adb 2299 return trtable[ch];
15a7d175 2300 }
ab4b89fe
UD
2301
2302 if (!build_trtable (mctx->dfa, state))
2303 {
2304 *err = REG_ESPACE;
2305 return NULL;
2306 }
2307
2308 /* Retry, we now have a transition table. */
3b0bdc72 2309 }
4c595adb 2310}
3b0bdc72 2311
4c595adb 2312/* Update the state_log if we need */
58845a70 2313re_dfastate_t *
1ba81cea 2314internal_function
e2f55264
UD
2315merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
2316 re_dfastate_t *next_state)
58845a70 2317{
c42b4152 2318 const re_dfa_t *const dfa = mctx->dfa;
4c595adb
UD
2319 int cur_idx = re_string_cur_idx (&mctx->input);
2320
2321 if (cur_idx > mctx->state_log_top)
3b0bdc72 2322 {
4c595adb
UD
2323 mctx->state_log[cur_idx] = next_state;
2324 mctx->state_log_top = cur_idx;
2325 }
2326 else if (mctx->state_log[cur_idx] == 0)
2327 {
2328 mctx->state_log[cur_idx] = next_state;
2329 }
2330 else
2331 {
2332 re_dfastate_t *pstate;
2333 unsigned int context;
2334 re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
2335 /* If (state_log[cur_idx] != 0), it implies that cur_idx is
2da42bc0
UD
2336 the destination of a multibyte char/collating element/
2337 back reference. Then the next state is the union set of
2338 these destinations and the results of the transition table. */
4c595adb
UD
2339 pstate = mctx->state_log[cur_idx];
2340 log_nodes = pstate->entrance_nodes;
2341 if (next_state != NULL)
2da42bc0
UD
2342 {
2343 table_nodes = next_state->entrance_nodes;
2344 *err = re_node_set_init_union (&next_nodes, table_nodes,
15a7d175 2345 log_nodes);
2da42bc0 2346 if (BE (*err != REG_NOERROR, 0))
4c595adb 2347 return NULL;
2da42bc0 2348 }
4c595adb 2349 else
2da42bc0 2350 next_nodes = *log_nodes;
4c595adb
UD
2351 /* Note: We already add the nodes of the initial state,
2352 then we don't need to add them here. */
2353
2354 context = re_string_context_at (&mctx->input,
2355 re_string_cur_idx (&mctx->input) - 1,
2356 mctx->eflags);
2357 next_state = mctx->state_log[cur_idx]
2da42bc0 2358 = re_acquire_state_context (err, dfa, &next_nodes, context);
4c595adb 2359 /* We don't need to check errors here, since the return value of
2da42bc0 2360 this function is next_state and ERR is already set. */
4c595adb
UD
2361
2362 if (table_nodes != NULL)
2da42bc0 2363 re_node_set_free (&next_nodes);
6291ee3c
UD
2364 }
2365
bb3f4825 2366 if (BE (dfa->nbackref, 0) && next_state != NULL)
6291ee3c 2367 {
bb3f4825
UD
2368 /* Check OP_OPEN_SUBEXP in the current state in case that we use them
2369 later. We must check them here, since the back references in the
2370 next state might use them. */
e3a87852 2371 *err = check_subexp_matching_top (mctx, &next_state->nodes,
6291ee3c
UD
2372 cur_idx);
2373 if (BE (*err != REG_NOERROR, 0))
2374 return NULL;
6291ee3c 2375
bb3f4825
UD
2376 /* If the next state has back references. */
2377 if (next_state->has_backref)
2378 {
e3a87852 2379 *err = transit_state_bkref (mctx, &next_state->nodes);
bb3f4825
UD
2380 if (BE (*err != REG_NOERROR, 0))
2381 return NULL;
2382 next_state = mctx->state_log[cur_idx];
2383 }
3b0bdc72 2384 }
4c595adb 2385
3b0bdc72
UD
2386 return next_state;
2387}
2388
4c595adb
UD
2389/* Skip bytes in the input that correspond to part of a
2390 multi-byte match, then look in the log for a state
2391 from which to restart matching. */
2392re_dfastate_t *
1ba81cea 2393internal_function
e2f55264 2394find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
4c595adb 2395{
1878e9af 2396 re_dfastate_t *cur_state;
4c595adb
UD
2397 do
2398 {
2399 int max = mctx->state_log_top;
2400 int cur_str_idx = re_string_cur_idx (&mctx->input);
2401
2402 do
2403 {
2da42bc0
UD
2404 if (++cur_str_idx > max)
2405 return NULL;
2406 re_string_skip_bytes (&mctx->input, 1);
4c595adb
UD
2407 }
2408 while (mctx->state_log[cur_str_idx] == NULL);
2409
2410 cur_state = merge_state_with_log (err, mctx, NULL);
2411 }
2d87db5b 2412 while (*err == REG_NOERROR && cur_state == NULL);
4c595adb
UD
2413 return cur_state;
2414}
2415
3b0bdc72
UD
2416/* Helper functions for transit_state. */
2417
6291ee3c
UD
2418/* From the node set CUR_NODES, pick up the nodes whose types are
2419 OP_OPEN_SUBEXP and which have corresponding back references in the regular
2420 expression. And register them to use them later for evaluating the
2421 correspoding back references. */
2422
2423static reg_errcode_t
1ba81cea 2424internal_function
e2f55264
UD
2425check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
2426 int str_idx)
6291ee3c 2427{
76b864c8 2428 const re_dfa_t *const dfa = mctx->dfa;
6291ee3c
UD
2429 int node_idx;
2430 reg_errcode_t err;
2431
2432 /* TODO: This isn't efficient.
2433 Because there might be more than one nodes whose types are
2434 OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2435 nodes.
2436 E.g. RE: (a){2} */
2437 for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
2438 {
2439 int node = cur_nodes->elems[node_idx];
2440 if (dfa->nodes[node].type == OP_OPEN_SUBEXP
2c05d33f
UD
2441 && dfa->nodes[node].opr.idx < BITSET_WORD_BITS
2442 && (dfa->used_bkref_map
2443 & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))
6291ee3c
UD
2444 {
2445 err = match_ctx_add_subtop (mctx, node, str_idx);
2446 if (BE (err != REG_NOERROR, 0))
2447 return err;
2448 }
2449 }
2450 return REG_NOERROR;
2451}
2452
c13c99fa 2453#if 0
3b0bdc72
UD
2454/* Return the next state to which the current state STATE will transit by
2455 accepting the current input byte. */
2456
2457static re_dfastate_t *
e2f55264
UD
2458transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
2459 re_dfastate_t *state)
3b0bdc72 2460{
c42b4152 2461 const re_dfa_t *const dfa = mctx->dfa;
3b0bdc72
UD
2462 re_node_set next_nodes;
2463 re_dfastate_t *next_state;
56b168be 2464 int node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
3b0bdc72
UD
2465 unsigned int context;
2466
a9388965 2467 *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
bc15410e 2468 if (BE (*err != REG_NOERROR, 0))
a9388965 2469 return NULL;
3b0bdc72
UD
2470 for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
2471 {
2472 int cur_node = state->nodes.elems[node_cnt];
e3a87852 2473 if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
15a7d175
UD
2474 {
2475 *err = re_node_set_merge (&next_nodes,
2476 dfa->eclosures + dfa->nexts[cur_node]);
2477 if (BE (*err != REG_NOERROR, 0))
2478 {
2479 re_node_set_free (&next_nodes);
2480 return NULL;
2481 }
2482 }
3b0bdc72 2483 }
56b168be 2484 context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
a9388965
UD
2485 next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
2486 /* We don't need to check errors here, since the return value of
2487 this function is next_state and ERR is already set. */
2488
3b0bdc72 2489 re_node_set_free (&next_nodes);
56b168be 2490 re_string_skip_bytes (&mctx->input, 1);
3b0bdc72
UD
2491 return next_state;
2492}
c13c99fa 2493#endif
3b0bdc72 2494
434d3784 2495#ifdef RE_ENABLE_I18N
a9388965 2496static reg_errcode_t
1ba81cea 2497internal_function
e2f55264 2498transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
3b0bdc72 2499{
c42b4152 2500 const re_dfa_t *const dfa = mctx->dfa;
a9388965 2501 reg_errcode_t err;
3b0bdc72
UD
2502 int i;
2503
2504 for (i = 0; i < pstate->nodes.nelem; ++i)
2505 {
2506 re_node_set dest_nodes, *new_nodes;
2507 int cur_node_idx = pstate->nodes.elems[i];
963d8d78 2508 int naccepted, dest_idx;
3b0bdc72
UD
2509 unsigned int context;
2510 re_dfastate_t *dest_state;
2511
963d8d78 2512 if (!dfa->nodes[cur_node_idx].accept_mb)
2da42bc0 2513 continue;
963d8d78 2514
485d775d 2515 if (dfa->nodes[cur_node_idx].constraint)
15a7d175 2516 {
56b168be
UD
2517 context = re_string_context_at (&mctx->input,
2518 re_string_cur_idx (&mctx->input),
2519 mctx->eflags);
15a7d175
UD
2520 if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
2521 context))
2522 continue;
2523 }
3b0bdc72 2524
ad7f28c2 2525 /* How many bytes the node can accept? */
963d8d78
UD
2526 naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
2527 re_string_cur_idx (&mctx->input));
3b0bdc72 2528 if (naccepted == 0)
15a7d175 2529 continue;
3b0bdc72
UD
2530
2531 /* The node can accepts `naccepted' bytes. */
56b168be 2532 dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
0742e48e 2533 mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
15a7d175 2534 : mctx->max_mb_elem_len);
7c1be3ec 2535 err = clean_state_log_if_needed (mctx, dest_idx);
612546c6 2536 if (BE (err != REG_NOERROR, 0))
15a7d175 2537 return err;
3b0bdc72
UD
2538#ifdef DEBUG
2539 assert (dfa->nexts[cur_node_idx] != -1);
2540#endif
963d8d78 2541 new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
3b0bdc72 2542
612546c6 2543 dest_state = mctx->state_log[dest_idx];
3b0bdc72 2544 if (dest_state == NULL)
15a7d175 2545 dest_nodes = *new_nodes;
3b0bdc72 2546 else
15a7d175
UD
2547 {
2548 err = re_node_set_init_union (&dest_nodes,
2549 dest_state->entrance_nodes, new_nodes);
2550 if (BE (err != REG_NOERROR, 0))
2551 return err;
2552 }
01ed6ceb
UD
2553 context = re_string_context_at (&mctx->input, dest_idx - 1,
2554 mctx->eflags);
612546c6 2555 mctx->state_log[dest_idx]
15a7d175 2556 = re_acquire_state_context (&err, dfa, &dest_nodes, context);
3b0bdc72 2557 if (dest_state != NULL)
15a7d175 2558 re_node_set_free (&dest_nodes);
1b2c2628 2559 if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
15a7d175 2560 return err;
3b0bdc72 2561 }
a9388965 2562 return REG_NOERROR;
3b0bdc72 2563}
434d3784 2564#endif /* RE_ENABLE_I18N */
3b0bdc72 2565
a9388965 2566static reg_errcode_t
1ba81cea 2567internal_function
e2f55264 2568transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
3b0bdc72 2569{
6efbd82c 2570 const re_dfa_t *const dfa = mctx->dfa;
a9388965 2571 reg_errcode_t err;
0742e48e 2572 int i;
56b168be 2573 int cur_str_idx = re_string_cur_idx (&mctx->input);
3b0bdc72
UD
2574
2575 for (i = 0; i < nodes->nelem; ++i)
2576 {
6291ee3c 2577 int dest_str_idx, prev_nelem, bkc_idx;
3b0bdc72
UD
2578 int node_idx = nodes->elems[i];
2579 unsigned int context;
fe9434bb 2580 const re_token_t *node = dfa->nodes + node_idx;
3b0bdc72
UD
2581 re_node_set *new_dest_nodes;
2582
2583 /* Check whether `node' is a backreference or not. */
6291ee3c 2584 if (node->type != OP_BACK_REF)
15a7d175 2585 continue;
485d775d
UD
2586
2587 if (node->constraint)
15a7d175 2588 {
56b168be
UD
2589 context = re_string_context_at (&mctx->input, cur_str_idx,
2590 mctx->eflags);
15a7d175
UD
2591 if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
2592 continue;
2593 }
3b0bdc72
UD
2594
2595 /* `node' is a backreference.
15a7d175 2596 Check the substring which the substring matched. */
6291ee3c 2597 bkc_idx = mctx->nbkref_ents;
e3a87852 2598 err = get_subexp (mctx, node_idx, cur_str_idx);
612546c6 2599 if (BE (err != REG_NOERROR, 0))
15a7d175 2600 goto free_return;
3b0bdc72
UD
2601
2602 /* And add the epsilon closures (which is `new_dest_nodes') of
15a7d175 2603 the backreference to appropriate state_log. */
3b0bdc72
UD
2604#ifdef DEBUG
2605 assert (dfa->nexts[node_idx] != -1);
2606#endif
6291ee3c 2607 for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
15a7d175
UD
2608 {
2609 int subexp_len;
2610 re_dfastate_t *dest_state;
2611 struct re_backref_cache_entry *bkref_ent;
2612 bkref_ent = mctx->bkref_ents + bkc_idx;
2613 if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
2614 continue;
2615 subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
2616 new_dest_nodes = (subexp_len == 0
2617 ? dfa->eclosures + dfa->edests[node_idx].elems[0]
2618 : dfa->eclosures + dfa->nexts[node_idx]);
2619 dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
2620 - bkref_ent->subexp_from);
56b168be
UD
2621 context = re_string_context_at (&mctx->input, dest_str_idx - 1,
2622 mctx->eflags);
15a7d175
UD
2623 dest_state = mctx->state_log[dest_str_idx];
2624 prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
2625 : mctx->state_log[cur_str_idx]->nodes.nelem);
2626 /* Add `new_dest_node' to state_log. */
2627 if (dest_state == NULL)
2628 {
2629 mctx->state_log[dest_str_idx]
2630 = re_acquire_state_context (&err, dfa, new_dest_nodes,
2631 context);
2632 if (BE (mctx->state_log[dest_str_idx] == NULL
2633 && err != REG_NOERROR, 0))
2634 goto free_return;
2635 }
2636 else
2637 {
2638 re_node_set dest_nodes;
2639 err = re_node_set_init_union (&dest_nodes,
2640 dest_state->entrance_nodes,
2641 new_dest_nodes);
2642 if (BE (err != REG_NOERROR, 0))
2643 {
2644 re_node_set_free (&dest_nodes);
2645 goto free_return;
2646 }
2647 mctx->state_log[dest_str_idx]
2648 = re_acquire_state_context (&err, dfa, &dest_nodes, context);
2649 re_node_set_free (&dest_nodes);
2650 if (BE (mctx->state_log[dest_str_idx] == NULL
2651 && err != REG_NOERROR, 0))
2652 goto free_return;
2653 }
2654 /* We need to check recursively if the backreference can epsilon
2655 transit. */
2656 if (subexp_len == 0
2657 && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
2658 {
e3a87852 2659 err = check_subexp_matching_top (mctx, new_dest_nodes,
6291ee3c
UD
2660 cur_str_idx);
2661 if (BE (err != REG_NOERROR, 0))
2662 goto free_return;
e3a87852 2663 err = transit_state_bkref (mctx, new_dest_nodes);
15a7d175
UD
2664 if (BE (err != REG_NOERROR, 0))
2665 goto free_return;
2666 }
2667 }
3b0bdc72 2668 }
1b2c2628
UD
2669 err = REG_NOERROR;
2670 free_return:
1b2c2628 2671 return err;
3b0bdc72
UD
2672}
2673
6291ee3c
UD
2674/* Enumerate all the candidates which the backreference BKREF_NODE can match
2675 at BKREF_STR_IDX, and register them by match_ctx_add_entry().
2676 Note that we might collect inappropriate candidates here.
2677 However, the cost of checking them strictly here is too high, then we
2678 delay these checking for prune_impossible_nodes(). */
3b0bdc72 2679
6291ee3c 2680static reg_errcode_t
2da42bc0 2681internal_function __attribute_warn_unused_result__
e2f55264 2682get_subexp (re_match_context_t *mctx, int bkref_node, int bkref_str_idx)
3b0bdc72 2683{
6efbd82c 2684 const re_dfa_t *const dfa = mctx->dfa;
6291ee3c 2685 int subexp_num, sub_top_idx;
56b168be 2686 const char *buf = (const char *) re_string_get_buffer (&mctx->input);
6291ee3c
UD
2687 /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX. */
2688 int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
e40a38b3 2689 if (cache_idx != -1)
6291ee3c 2690 {
01ed6ceb
UD
2691 const struct re_backref_cache_entry *entry
2692 = mctx->bkref_ents + cache_idx;
e40a38b3 2693 do
2da42bc0 2694 if (entry->node == bkref_node)
e40a38b3
UD
2695 return REG_NOERROR; /* We already checked it. */
2696 while (entry++->more);
6291ee3c 2697 }
e40a38b3 2698
d8f73de8 2699 subexp_num = dfa->nodes[bkref_node].opr.idx;
6291ee3c
UD
2700
2701 /* For each sub expression */
2702 for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
2703 {
2704 reg_errcode_t err;
2705 re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
2706 re_sub_match_last_t *sub_last;
7c1be3ec 2707 int sub_last_idx, sl_str, bkref_str_off;
6291ee3c
UD
2708
2709 if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
2710 continue; /* It isn't related. */
2711
2712 sl_str = sub_top->str_idx;
7c1be3ec 2713 bkref_str_off = bkref_str_idx;
6291ee3c
UD
2714 /* At first, check the last node of sub expressions we already
2715 evaluated. */
2716 for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
2717 {
2718 int sl_str_diff;
2719 sub_last = sub_top->lasts[sub_last_idx];
2720 sl_str_diff = sub_last->str_idx - sl_str;
2721 /* The matched string by the sub expression match with the substring
2722 at the back reference? */
c1baba0f
UD
2723 if (sl_str_diff > 0)
2724 {
2725 if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0))
2726 {
2727 /* Not enough chars for a successful match. */
2728 if (bkref_str_off + sl_str_diff > mctx->input.len)
2729 break;
2730
2731 err = clean_state_log_if_needed (mctx,
2732 bkref_str_off
2733 + sl_str_diff);
2734 if (BE (err != REG_NOERROR, 0))
2735 return err;
2736 buf = (const char *) re_string_get_buffer (&mctx->input);
2737 }
2738 if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
01ed6ceb
UD
2739 /* We don't need to search this sub expression any more. */
2740 break;
c1baba0f 2741 }
7c1be3ec 2742 bkref_str_off += sl_str_diff;
6291ee3c 2743 sl_str += sl_str_diff;
e3a87852 2744 err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
6291ee3c 2745 bkref_str_idx);
3ee363e2 2746
7c1be3ec
UD
2747 /* Reload buf, since the preceding call might have reallocated
2748 the buffer. */
56b168be 2749 buf = (const char *) re_string_get_buffer (&mctx->input);
3ee363e2 2750
6291ee3c
UD
2751 if (err == REG_NOMATCH)
2752 continue;
2753 if (BE (err != REG_NOERROR, 0))
2754 return err;
2755 }
7c1be3ec 2756
6291ee3c
UD
2757 if (sub_last_idx < sub_top->nlasts)
2758 continue;
2759 if (sub_last_idx > 0)
2760 ++sl_str;
2761 /* Then, search for the other last nodes of the sub expression. */
2762 for (; sl_str <= bkref_str_idx; ++sl_str)
2763 {
2764 int cls_node, sl_str_off;
fe9434bb 2765 const re_node_set *nodes;
6291ee3c
UD
2766 sl_str_off = sl_str - sub_top->str_idx;
2767 /* The matched string by the sub expression match with the substring
2768 at the back reference? */
c1baba0f
UD
2769 if (sl_str_off > 0)
2770 {
2771 if (BE (bkref_str_off >= mctx->input.valid_len, 0))
2772 {
2773 /* If we are at the end of the input, we cannot match. */
2774 if (bkref_str_off >= mctx->input.len)
2775 break;
2776
a445af0b 2777 err = extend_buffers (mctx, bkref_str_off + 1);
c1baba0f
UD
2778 if (BE (err != REG_NOERROR, 0))
2779 return err;
2780
2781 buf = (const char *) re_string_get_buffer (&mctx->input);
2782 }
2783 if (buf [bkref_str_off++] != buf[sl_str - 1])
2784 break; /* We don't need to search this sub expression
2785 any more. */
2786 }
6291ee3c
UD
2787 if (mctx->state_log[sl_str] == NULL)
2788 continue;
2789 /* Does this state have a ')' of the sub expression? */
2790 nodes = &mctx->state_log[sl_str]->nodes;
01ed6ceb
UD
2791 cls_node = find_subexp_node (dfa, nodes, subexp_num,
2792 OP_CLOSE_SUBEXP);
6291ee3c
UD
2793 if (cls_node == -1)
2794 continue; /* No. */
2795 if (sub_top->path == NULL)
2796 {
2797 sub_top->path = calloc (sizeof (state_array_t),
2798 sl_str - sub_top->str_idx + 1);
2799 if (sub_top->path == NULL)
2800 return REG_ESPACE;
2801 }
2802 /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
2803 in the current context? */
e3a87852 2804 err = check_arrival (mctx, sub_top->path, sub_top->node,
01ed6ceb
UD
2805 sub_top->str_idx, cls_node, sl_str,
2806 OP_CLOSE_SUBEXP);
6291ee3c
UD
2807 if (err == REG_NOMATCH)
2808 continue;
2809 if (BE (err != REG_NOERROR, 0))
2810 return err;
2811 sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
2812 if (BE (sub_last == NULL, 0))
2813 return REG_ESPACE;
e3a87852 2814 err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
6291ee3c
UD
2815 bkref_str_idx);
2816 if (err == REG_NOMATCH)
2817 continue;
2818 }
2819 }
2820 return REG_NOERROR;
2821}
2822
2823/* Helper functions for get_subexp(). */
2824
2825/* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
2826 If it can arrive, register the sub expression expressed with SUB_TOP
2827 and SUB_LAST. */
2828
2829static reg_errcode_t
1ba81cea 2830internal_function
e2f55264
UD
2831get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
2832 re_sub_match_last_t *sub_last, int bkref_node, int bkref_str)
6291ee3c
UD
2833{
2834 reg_errcode_t err;
2835 int to_idx;
2836 /* Can the subexpression arrive the back reference? */
e3a87852 2837 err = check_arrival (mctx, &sub_last->path, sub_last->node,
01ed6ceb
UD
2838 sub_last->str_idx, bkref_node, bkref_str,
2839 OP_OPEN_SUBEXP);
6291ee3c
UD
2840 if (err != REG_NOERROR)
2841 return err;
2842 err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
2843 sub_last->str_idx);
2844 if (BE (err != REG_NOERROR, 0))
2845 return err;
2846 to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
c1baba0f 2847 return clean_state_log_if_needed (mctx, to_idx);
6291ee3c
UD
2848}
2849
2850/* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
2851 Search '(' if FL_OPEN, or search ')' otherwise.
2852 TODO: This function isn't efficient...
2853 Because there might be more than one nodes whose types are
2854 OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2855 nodes.
2856 E.g. RE: (a){2} */
2857
2858static int
1ba81cea 2859internal_function
e2f55264
UD
2860find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
2861 int subexp_idx, int type)
6291ee3c
UD
2862{
2863 int cls_idx;
2864 for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
2865 {
2866 int cls_node = nodes->elems[cls_idx];
fe9434bb 2867 const re_token_t *node = dfa->nodes + cls_node;
0ce7f49c 2868 if (node->type == type
6291ee3c
UD
2869 && node->opr.idx == subexp_idx)
2870 return cls_node;
2871 }
2872 return -1;
2873}
2874
2875/* Check whether the node TOP_NODE at TOP_STR can arrive to the node
2876 LAST_NODE at LAST_STR. We record the path onto PATH since it will be
2877 heavily reused.
2878 Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */
2879
2880static reg_errcode_t
2da42bc0 2881internal_function __attribute_warn_unused_result__
e2f55264
UD
2882check_arrival (re_match_context_t *mctx, state_array_t *path, int top_node,
2883 int top_str, int last_node, int last_str, int type)
6291ee3c 2884{
6efbd82c 2885 const re_dfa_t *const dfa = mctx->dfa;
c0c9615c 2886 reg_errcode_t err = REG_NOERROR;
6291ee3c
UD
2887 int subexp_num, backup_cur_idx, str_idx, null_cnt;
2888 re_dfastate_t *cur_state = NULL;
2889 re_node_set *cur_nodes, next_nodes;
2890 re_dfastate_t **backup_state_log;
2891 unsigned int context;
2892
2893 subexp_num = dfa->nodes[top_node].opr.idx;
2894 /* Extend the buffer if we need. */
951d6408 2895 if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0))
6291ee3c
UD
2896 {
2897 re_dfastate_t **new_array;
2898 int old_alloc = path->alloc;
2899 path->alloc += last_str + mctx->max_mb_elem_len + 1;
2900 new_array = re_realloc (path->array, re_dfastate_t *, path->alloc);
6efbd82c 2901 if (BE (new_array == NULL, 0))
951d6408
UD
2902 {
2903 path->alloc = old_alloc;
2904 return REG_ESPACE;
2905 }
6291ee3c
UD
2906 path->array = new_array;
2907 memset (new_array + old_alloc, '\0',
2908 sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
2909 }
2910
513bbb25 2911 str_idx = path->next_idx ?: top_str;
6291ee3c
UD
2912
2913 /* Temporary modify MCTX. */
2914 backup_state_log = mctx->state_log;
56b168be 2915 backup_cur_idx = mctx->input.cur_idx;
6291ee3c 2916 mctx->state_log = path->array;
56b168be 2917 mctx->input.cur_idx = str_idx;
6291ee3c
UD
2918
2919 /* Setup initial node set. */
56b168be 2920 context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
6291ee3c
UD
2921 if (str_idx == top_str)
2922 {
2923 err = re_node_set_init_1 (&next_nodes, top_node);
2924 if (BE (err != REG_NOERROR, 0))
2925 return err;
0ce7f49c 2926 err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
6291ee3c
UD
2927 if (BE (err != REG_NOERROR, 0))
2928 {
2929 re_node_set_free (&next_nodes);
2930 return err;
2931 }
2932 }
2933 else
2934 {
2935 cur_state = mctx->state_log[str_idx];
2936 if (cur_state && cur_state->has_backref)
2937 {
2938 err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
6efbd82c 2939 if (BE (err != REG_NOERROR, 0))
6291ee3c
UD
2940 return err;
2941 }
2942 else
2943 re_node_set_init_empty (&next_nodes);
2944 }
2945 if (str_idx == top_str || (cur_state && cur_state->has_backref))
2946 {
2947 if (next_nodes.nelem)
2948 {
d2c38eb3 2949 err = expand_bkref_cache (mctx, &next_nodes, str_idx,
0ce7f49c 2950 subexp_num, type);
6efbd82c 2951 if (BE (err != REG_NOERROR, 0))
6291ee3c
UD
2952 {
2953 re_node_set_free (&next_nodes);
2954 return err;
2955 }
2956 }
2957 cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2958 if (BE (cur_state == NULL && err != REG_NOERROR, 0))
2959 {
2960 re_node_set_free (&next_nodes);
2961 return err;
2962 }
2963 mctx->state_log[str_idx] = cur_state;
2964 }
2965
2966 for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
2967 {
2968 re_node_set_empty (&next_nodes);
2969 if (mctx->state_log[str_idx + 1])
2970 {
2971 err = re_node_set_merge (&next_nodes,
2972 &mctx->state_log[str_idx + 1]->nodes);
2973 if (BE (err != REG_NOERROR, 0))
2974 {
2975 re_node_set_free (&next_nodes);
2976 return err;
2977 }
2978 }
2979 if (cur_state)
2980 {
e3a87852 2981 err = check_arrival_add_next_nodes (mctx, str_idx,
6efbd82c
UD
2982 &cur_state->non_eps_nodes,
2983 &next_nodes);
6291ee3c
UD
2984 if (BE (err != REG_NOERROR, 0))
2985 {
2986 re_node_set_free (&next_nodes);
2987 return err;
2988 }
2989 }
2990 ++str_idx;
2991 if (next_nodes.nelem)
2992 {
0ce7f49c 2993 err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
6291ee3c
UD
2994 if (BE (err != REG_NOERROR, 0))
2995 {
2996 re_node_set_free (&next_nodes);
2997 return err;
2998 }
d2c38eb3 2999 err = expand_bkref_cache (mctx, &next_nodes, str_idx,
0ce7f49c 3000 subexp_num, type);
6efbd82c 3001 if (BE (err != REG_NOERROR, 0))
6291ee3c
UD
3002 {
3003 re_node_set_free (&next_nodes);
3004 return err;
3005 }
3006 }
56b168be 3007 context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
6291ee3c
UD
3008 cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
3009 if (BE (cur_state == NULL && err != REG_NOERROR, 0))
3010 {
3011 re_node_set_free (&next_nodes);
3012 return err;
3013 }
3014 mctx->state_log[str_idx] = cur_state;
3015 null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
3016 }
3017 re_node_set_free (&next_nodes);
3018 cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
3019 : &mctx->state_log[last_str]->nodes);
3020 path->next_idx = str_idx;
3021
3022 /* Fix MCTX. */
3023 mctx->state_log = backup_state_log;
56b168be 3024 mctx->input.cur_idx = backup_cur_idx;
6291ee3c 3025
6291ee3c 3026 /* Then check the current node set has the node LAST_NODE. */
c0d5034e
UD
3027 if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
3028 return REG_NOERROR;
3029
3030 return REG_NOMATCH;
6291ee3c
UD
3031}
3032
3033/* Helper functions for check_arrival. */
3034
3035/* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
3036 to NEXT_NODES.
3037 TODO: This function is similar to the functions transit_state*(),
3038 however this function has many additional works.
3039 Can't we unify them? */
3040
3041static reg_errcode_t
2da42bc0 3042internal_function __attribute_warn_unused_result__
e2f55264
UD
3043check_arrival_add_next_nodes (re_match_context_t *mctx, int str_idx,
3044 re_node_set *cur_nodes, re_node_set *next_nodes)
6291ee3c 3045{
c42b4152 3046 const re_dfa_t *const dfa = mctx->dfa;
d2c38eb3 3047 int result;
6291ee3c 3048 int cur_idx;
c0c9615c 3049 reg_errcode_t err = REG_NOERROR;
6291ee3c
UD
3050 re_node_set union_set;
3051 re_node_set_init_empty (&union_set);
3052 for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
3053 {
3054 int naccepted = 0;
3055 int cur_node = cur_nodes->elems[cur_idx];
a334319f 3056#ifdef DEBUG
02f3550c 3057 re_token_type_t type = dfa->nodes[cur_node].type;
5cf1ec52
UD
3058 assert (!IS_EPSILON_NODE (type));
3059#endif
6291ee3c
UD
3060#ifdef RE_ENABLE_I18N
3061 /* If the node may accept `multi byte'. */
02f3550c 3062 if (dfa->nodes[cur_node].accept_mb)
6291ee3c 3063 {
56b168be 3064 naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
6291ee3c
UD
3065 str_idx);
3066 if (naccepted > 1)
3067 {
3068 re_dfastate_t *dest_state;
3069 int next_node = dfa->nexts[cur_node];
3070 int next_idx = str_idx + naccepted;
3071 dest_state = mctx->state_log[next_idx];
3072 re_node_set_empty (&union_set);
3073 if (dest_state)
3074 {
3075 err = re_node_set_merge (&union_set, &dest_state->nodes);
3076 if (BE (err != REG_NOERROR, 0))
3077 {
3078 re_node_set_free (&union_set);
3079 return err;
3080 }
6291ee3c 3081 }
d2c38eb3
UD
3082 result = re_node_set_insert (&union_set, next_node);
3083 if (BE (result < 0, 0))
6291ee3c 3084 {
44777771
UD
3085 re_node_set_free (&union_set);
3086 return REG_ESPACE;
6291ee3c
UD
3087 }
3088 mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
3089 &union_set);
3090 if (BE (mctx->state_log[next_idx] == NULL
3091 && err != REG_NOERROR, 0))
3092 {
3093 re_node_set_free (&union_set);
3094 return err;
3095 }
3096 }
3097 }
3098#endif /* RE_ENABLE_I18N */
3099 if (naccepted
e3a87852 3100 || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
6291ee3c 3101 {
d2c38eb3
UD
3102 result = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
3103 if (BE (result < 0, 0))
6291ee3c
UD
3104 {
3105 re_node_set_free (&union_set);
3106 return REG_ESPACE;
3107 }
3108 }
3109 }
3110 re_node_set_free (&union_set);
3111 return REG_NOERROR;
3112}
3113
3114/* For all the nodes in CUR_NODES, add the epsilon closures of them to
3115 CUR_NODES, however exclude the nodes which are:
3116 - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
3117 - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
3118*/
3119
3120static reg_errcode_t
1ba81cea 3121internal_function
e2f55264
UD
3122check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,
3123 int ex_subexp, int type)
6291ee3c
UD
3124{
3125 reg_errcode_t err;
3126 int idx, outside_node;
3127 re_node_set new_nodes;
3128#ifdef DEBUG
3129 assert (cur_nodes->nelem);
3130#endif
3131 err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
3132 if (BE (err != REG_NOERROR, 0))
3133 return err;
3134 /* Create a new node set NEW_NODES with the nodes which are epsilon
3135 closures of the node in CUR_NODES. */
3136
3137 for (idx = 0; idx < cur_nodes->nelem; ++idx)
3138 {
3139 int cur_node = cur_nodes->elems[idx];
6efbd82c 3140 const re_node_set *eclosure = dfa->eclosures + cur_node;
0ce7f49c 3141 outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
6291ee3c
UD
3142 if (outside_node == -1)
3143 {
3144 /* There are no problematic nodes, just merge them. */
3145 err = re_node_set_merge (&new_nodes, eclosure);
3146 if (BE (err != REG_NOERROR, 0))
3147 {
3148 re_node_set_free (&new_nodes);
3149 return err;
3150 }
3151 }
3152 else
3153 {
3154 /* There are problematic nodes, re-calculate incrementally. */
3155 err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
0ce7f49c 3156 ex_subexp, type);
6291ee3c
UD
3157 if (BE (err != REG_NOERROR, 0))
3158 {
3159 re_node_set_free (&new_nodes);
3160 return err;
3161 }
3162 }
3163 }
3164 re_node_set_free (cur_nodes);
3165 *cur_nodes = new_nodes;
3166 return REG_NOERROR;
3167}
3168
3169/* Helper function for check_arrival_expand_ecl.
3170 Check incrementally the epsilon closure of TARGET, and if it isn't
3171 problematic append it to DST_NODES. */
3172
3173static reg_errcode_t
2da42bc0 3174internal_function __attribute_warn_unused_result__
e2f55264
UD
3175check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
3176 int target, int ex_subexp, int type)
6291ee3c 3177{
0ce7f49c 3178 int cur_node;
6291ee3c
UD
3179 for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
3180 {
3181 int err;
6291ee3c 3182
0ce7f49c 3183 if (dfa->nodes[cur_node].type == type
6291ee3c
UD
3184 && dfa->nodes[cur_node].opr.idx == ex_subexp)
3185 {
0ce7f49c 3186 if (type == OP_CLOSE_SUBEXP)
6291ee3c
UD
3187 {
3188 err = re_node_set_insert (dst_nodes, cur_node);
3189 if (BE (err == -1, 0))
3190 return REG_ESPACE;
3191 }
3192 break;
3193 }
3194 err = re_node_set_insert (dst_nodes, cur_node);
3195 if (BE (err == -1, 0))
3196 return REG_ESPACE;
3197 if (dfa->edests[cur_node].nelem == 0)
3198 break;
3199 if (dfa->edests[cur_node].nelem == 2)
3200 {
3201 err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
3202 dfa->edests[cur_node].elems[1],
0ce7f49c 3203 ex_subexp, type);
6291ee3c
UD
3204 if (BE (err != REG_NOERROR, 0))
3205 return err;
3206 }
3207 cur_node = dfa->edests[cur_node].elems[0];
3208 }
3209 return REG_NOERROR;
3210}
3211
3212
3213/* For all the back references in the current state, calculate the
3214 destination of the back references by the appropriate entry
3215 in MCTX->BKREF_ENTS. */
3216
3217static reg_errcode_t
2da42bc0 3218internal_function __attribute_warn_unused_result__
e2f55264
UD
3219expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
3220 int cur_str, int subexp_num, int type)
6291ee3c 3221{
76b864c8 3222 const re_dfa_t *const dfa = mctx->dfa;
6291ee3c 3223 reg_errcode_t err;
e40a38b3
UD
3224 int cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
3225 struct re_backref_cache_entry *ent;
6291ee3c 3226
e40a38b3
UD
3227 if (cache_idx_start == -1)
3228 return REG_NOERROR;
3229
3230 restart:
3231 ent = mctx->bkref_ents + cache_idx_start;
3232 do
6291ee3c
UD
3233 {
3234 int to_idx, next_node;
e40a38b3 3235
6291ee3c
UD
3236 /* Is this entry ENT is appropriate? */
3237 if (!re_node_set_contains (cur_nodes, ent->node))
3238 continue; /* No. */
3239
3240 to_idx = cur_str + ent->subexp_to - ent->subexp_from;
3241 /* Calculate the destination of the back reference, and append it
3242 to MCTX->STATE_LOG. */
3243 if (to_idx == cur_str)
3244 {
3245 /* The backreference did epsilon transit, we must re-check all the
3246 node in the current state. */
3247 re_node_set new_dests;
3248 reg_errcode_t err2, err3;
3249 next_node = dfa->edests[ent->node].elems[0];
3250 if (re_node_set_contains (cur_nodes, next_node))
3251 continue;
3252 err = re_node_set_init_1 (&new_dests, next_node);
0ce7f49c 3253 err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
6291ee3c
UD
3254 err3 = re_node_set_merge (cur_nodes, &new_dests);
3255 re_node_set_free (&new_dests);
3256 if (BE (err != REG_NOERROR || err2 != REG_NOERROR
3257 || err3 != REG_NOERROR, 0))
3258 {
3259 err = (err != REG_NOERROR ? err
3260 : (err2 != REG_NOERROR ? err2 : err3));
3261 return err;
3262 }
3263 /* TODO: It is still inefficient... */
e40a38b3 3264 goto restart;
6291ee3c
UD
3265 }
3266 else
3267 {
3268 re_node_set union_set;
3269 next_node = dfa->nexts[ent->node];
3270 if (mctx->state_log[to_idx])
3271 {
3272 int ret;
3273 if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
3274 next_node))
3275 continue;
3276 err = re_node_set_init_copy (&union_set,
3277 &mctx->state_log[to_idx]->nodes);
3278 ret = re_node_set_insert (&union_set, next_node);
3279 if (BE (err != REG_NOERROR || ret < 0, 0))
3280 {
3281 re_node_set_free (&union_set);
3282 err = err != REG_NOERROR ? err : REG_ESPACE;
3283 return err;
3284 }
3285 }
3286 else
3287 {
3288 err = re_node_set_init_1 (&union_set, next_node);
3289 if (BE (err != REG_NOERROR, 0))
3290 return err;
3291 }
3292 mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
3293 re_node_set_free (&union_set);
3294 if (BE (mctx->state_log[to_idx] == NULL
3295 && err != REG_NOERROR, 0))
3296 return err;
3297 }
3298 }
e40a38b3 3299 while (ent++->more);
6291ee3c
UD
3300 return REG_NOERROR;
3301}
3302
3303/* Build transition table for the state.
ab4b89fe 3304 Return 1 if succeeded, otherwise return NULL. */
6291ee3c 3305
ab4b89fe 3306static int
1ba81cea 3307internal_function
e2f55264 3308build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
6291ee3c
UD
3309{
3310 reg_errcode_t err;
ab4b89fe 3311 int i, j, ch, need_word_trtable = 0;
2c05d33f
UD
3312 bitset_word_t elem, mask;
3313 bool dests_node_malloced = false;
3314 bool dest_states_malloced = false;
6291ee3c
UD
3315 int ndests; /* Number of the destination states from `state'. */
3316 re_dfastate_t **trtable;
3317 re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
3318 re_node_set follows, *dests_node;
2c05d33f
UD
3319 bitset_t *dests_ch;
3320 bitset_t acceptable;
3321
3322 struct dests_alloc
3323 {
3324 re_node_set dests_node[SBC_MAX];
3325 bitset_t dests_ch[SBC_MAX];
3326 } *dests_alloc;
3b0bdc72
UD
3327
3328 /* We build DFA states which corresponds to the destination nodes
3329 from `state'. `dests_node[i]' represents the nodes which i-th
3330 destination state contains, and `dests_ch[i]' represents the
3331 characters which i-th destination state accepts. */
2c05d33f
UD
3332 if (__libc_use_alloca (sizeof (struct dests_alloc)))
3333 dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc));
05dab910 3334 else
05dab910 3335 {
2c05d33f
UD
3336 dests_alloc = re_malloc (struct dests_alloc, 1);
3337 if (BE (dests_alloc == NULL, 0))
ab4b89fe 3338 return 0;
2c05d33f 3339 dests_node_malloced = true;
05dab910 3340 }
2c05d33f
UD
3341 dests_node = dests_alloc->dests_node;
3342 dests_ch = dests_alloc->dests_ch;
3b0bdc72
UD
3343
3344 /* Initialize transiton table. */
ab4b89fe 3345 state->word_trtable = state->trtable = NULL;
3b0bdc72
UD
3346
3347 /* At first, group all nodes belonging to `state' into several
3348 destinations. */
56b168be 3349 ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
bc15410e 3350 if (BE (ndests <= 0, 0))
3b0bdc72 3351 {
05dab910 3352 if (dests_node_malloced)
2c05d33f 3353 free (dests_alloc);
ab4b89fe 3354 /* Return 0 in case of an error, 1 otherwise. */
05dab910 3355 if (ndests == 0)
c13c99fa 3356 {
3ce12656 3357 state->trtable = (re_dfastate_t **)
ab4b89fe 3358 calloc (sizeof (re_dfastate_t *), SBC_MAX);
2543fef2
JM
3359 if (BE (state->trtable == NULL, 0))
3360 return 0;
ab4b89fe 3361 return 1;
c13c99fa 3362 }
ab4b89fe 3363 return 0;
3b0bdc72
UD
3364 }
3365
a9388965 3366 err = re_node_set_alloc (&follows, ndests + 1);
05dab910
RM
3367 if (BE (err != REG_NOERROR, 0))
3368 goto out_free;
daa84549
PE
3369
3370 /* Avoid arithmetic overflow in size calculation. */
3371 if (BE ((((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX)
3372 / (3 * sizeof (re_dfastate_t *)))
3373 < ndests),
3374 0))
3375 goto out_free;
05dab910 3376
2c05d33f 3377 if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
05dab910
RM
3378 + ndests * 3 * sizeof (re_dfastate_t *)))
3379 dest_states = (re_dfastate_t **)
ab4b89fe 3380 alloca (ndests * 3 * sizeof (re_dfastate_t *));
05dab910 3381 else
05dab910
RM
3382 {
3383 dest_states = (re_dfastate_t **)
ab4b89fe 3384 malloc (ndests * 3 * sizeof (re_dfastate_t *));
05dab910
RM
3385 if (BE (dest_states == NULL, 0))
3386 {
3387out_free:
3388 if (dest_states_malloced)
3389 free (dest_states);
3390 re_node_set_free (&follows);
3391 for (i = 0; i < ndests; ++i)
3392 re_node_set_free (dests_node + i);
05dab910 3393 if (dests_node_malloced)
2c05d33f 3394 free (dests_alloc);
ab4b89fe 3395 return 0;
05dab910 3396 }
2c05d33f 3397 dest_states_malloced = true;
05dab910
RM
3398 }
3399 dest_states_word = dest_states + ndests;
3400 dest_states_nl = dest_states_word + ndests;
3401 bitset_empty (acceptable);
a9388965 3402
3b0bdc72
UD
3403 /* Then build the states for all destinations. */
3404 for (i = 0; i < ndests; ++i)
3405 {
3406 int next_node;
3407 re_node_set_empty (&follows);
3408 /* Merge the follows of this destination states. */
3409 for (j = 0; j < dests_node[i].nelem; ++j)
15a7d175
UD
3410 {
3411 next_node = dfa->nexts[dests_node[i].elems[j]];
3412 if (next_node != -1)
3413 {
3414 err = re_node_set_merge (&follows, dfa->eclosures + next_node);
3415 if (BE (err != REG_NOERROR, 0))
05dab910 3416 goto out_free;
15a7d175
UD
3417 }
3418 }
a9388965 3419 dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
bc15410e 3420 if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
15a7d175 3421 goto out_free;
3b0bdc72 3422 /* If the new state has context constraint,
15a7d175 3423 build appropriate states for these contexts. */
3b0bdc72 3424 if (dest_states[i]->has_constraint)
15a7d175
UD
3425 {
3426 dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
3427 CONTEXT_WORD);
3428 if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
3429 goto out_free;
3ce12656 3430
ab4b89fe
UD
3431 if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
3432 need_word_trtable = 1;
3ce12656 3433
15a7d175
UD
3434 dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
3435 CONTEXT_NEWLINE);
3436 if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
3437 goto out_free;
3ce12656 3438 }
3b0bdc72 3439 else
15a7d175
UD
3440 {
3441 dest_states_word[i] = dest_states[i];
3442 dest_states_nl[i] = dest_states[i];
3443 }
3b0bdc72
UD
3444 bitset_merge (acceptable, dests_ch[i]);
3445 }
3446
ab4b89fe 3447 if (!BE (need_word_trtable, 0))
3ce12656
UD
3448 {
3449 /* We don't care about whether the following character is a word
3450 character, or we are in a single-byte character set so we can
3451 discern by looking at the character code: allocate a
3452 256-entry transition table. */
ab4b89fe
UD
3453 trtable = state->trtable =
3454 (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
3ce12656
UD
3455 if (BE (trtable == NULL, 0))
3456 goto out_free;
3457
3458 /* For all characters ch...: */
2c05d33f
UD
3459 for (i = 0; i < BITSET_WORDS; ++i)
3460 for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3ce12656
UD
3461 elem;
3462 mask <<= 1, elem >>= 1, ++ch)
3463 if (BE (elem & 1, 0))
3464 {
3465 /* There must be exactly one destination which accepts
3466 character ch. See group_nodes_into_DFAstates. */
3467 for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3468 ;
6b6557e8 3469
3ce12656 3470 /* j-th destination accepts the word character ch. */
56b168be 3471 if (dfa->word_char[i] & mask)
3ce12656
UD
3472 trtable[ch] = dest_states_word[j];
3473 else
3474 trtable[ch] = dest_states[j];
3475 }
3476 }
3477 else
3478 {
3479 /* We care about whether the following character is a word
3480 character, and we are in a multi-byte character set: discern
3481 by looking at the character code: build two 256-entry
3482 transition tables, one starting at trtable[0] and one
3483 starting at trtable[SBC_MAX]. */
ab4b89fe
UD
3484 trtable = state->word_trtable =
3485 (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX);
3ce12656
UD
3486 if (BE (trtable == NULL, 0))
3487 goto out_free;
6b6557e8 3488
3ce12656 3489 /* For all characters ch...: */
2c05d33f
UD
3490 for (i = 0; i < BITSET_WORDS; ++i)
3491 for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3ce12656
UD
3492 elem;
3493 mask <<= 1, elem >>= 1, ++ch)
3494 if (BE (elem & 1, 0))
3495 {
3496 /* There must be exactly one destination which accepts
3497 character ch. See group_nodes_into_DFAstates. */
3498 for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3499 ;
6b6557e8 3500
3ce12656
UD
3501 /* j-th destination accepts the word character ch. */
3502 trtable[ch] = dest_states[j];
3503 trtable[ch + SBC_MAX] = dest_states_word[j];
3504 }
3505 }
6b6557e8 3506
3b0bdc72 3507 /* new line */
c202c2c5
UD
3508 if (bitset_contain (acceptable, NEWLINE_CHAR))
3509 {
3510 /* The current state accepts newline character. */
3ce12656
UD
3511 for (j = 0; j < ndests; ++j)
3512 if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
15a7d175
UD
3513 {
3514 /* k-th destination accepts newline character. */
3ce12656 3515 trtable[NEWLINE_CHAR] = dest_states_nl[j];
ab4b89fe 3516 if (need_word_trtable)
3ce12656 3517 trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
15a7d175
UD
3518 /* There must be only one destination which accepts
3519 newline. See group_nodes_into_DFAstates. */
3520 break;
3521 }
c202c2c5 3522 }
3b0bdc72 3523
05dab910
RM
3524 if (dest_states_malloced)
3525 free (dest_states);
3b0bdc72
UD
3526
3527 re_node_set_free (&follows);
3528 for (i = 0; i < ndests; ++i)
3529 re_node_set_free (dests_node + i);
3530
05dab910 3531 if (dests_node_malloced)
2c05d33f 3532 free (dests_alloc);
3b0bdc72 3533
ab4b89fe 3534 return 1;
3b0bdc72
UD
3535}
3536
3537/* Group all nodes belonging to STATE into several destinations.
3538 Then for all destinations, set the nodes belonging to the destination
3539 to DESTS_NODE[i] and set the characters accepted by the destination
3540 to DEST_CH[i]. This function return the number of destinations. */
3541
3542static int
1ba81cea 3543internal_function
e2f55264
UD
3544group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
3545 re_node_set *dests_node, bitset_t *dests_ch)
3b0bdc72 3546{
a9388965 3547 reg_errcode_t err;
d2c38eb3 3548 int result;
3b0bdc72
UD
3549 int i, j, k;
3550 int ndests; /* Number of the destinations from `state'. */
2c05d33f 3551 bitset_t accepts; /* Characters a node can accept. */
3b0bdc72
UD
3552 const re_node_set *cur_nodes = &state->nodes;
3553 bitset_empty (accepts);
3554 ndests = 0;
3555
3556 /* For all the nodes belonging to `state', */
3557 for (i = 0; i < cur_nodes->nelem; ++i)
3558 {
3b0bdc72
UD
3559 re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
3560 re_token_type_t type = node->type;
485d775d 3561 unsigned int constraint = node->constraint;
3b0bdc72
UD
3562
3563 /* Enumerate all single byte character this node can accept. */
3564 if (type == CHARACTER)
15a7d175 3565 bitset_set (accepts, node->opr.c);
3b0bdc72 3566 else if (type == SIMPLE_BRACKET)
15a7d175
UD
3567 {
3568 bitset_merge (accepts, node->opr.sbcset);
3569 }
3b0bdc72 3570 else if (type == OP_PERIOD)
15a7d175 3571 {
65e6becf
UD
3572#ifdef RE_ENABLE_I18N
3573 if (dfa->mb_cur_max > 1)
3574 bitset_merge (accepts, dfa->sb_char);
3575 else
0ce7f49c 3576#endif
65e6becf 3577 bitset_set_all (accepts);
56b168be 3578 if (!(dfa->syntax & RE_DOT_NEWLINE))
15a7d175 3579 bitset_clear (accepts, '\n');
56b168be 3580 if (dfa->syntax & RE_DOT_NOT_NULL)
15a7d175
UD
3581 bitset_clear (accepts, '\0');
3582 }
c0d5034e 3583#ifdef RE_ENABLE_I18N
ad7f28c2 3584 else if (type == OP_UTF8_PERIOD)
2da42bc0 3585 {
2c05d33f 3586 memset (accepts, '\xff', sizeof (bitset_t) / 2);
56b168be 3587 if (!(dfa->syntax & RE_DOT_NEWLINE))
ad7f28c2 3588 bitset_clear (accepts, '\n');
56b168be 3589 if (dfa->syntax & RE_DOT_NOT_NULL)
ad7f28c2 3590 bitset_clear (accepts, '\0');
2da42bc0 3591 }
c0d5034e 3592#endif
3b0bdc72 3593 else
15a7d175 3594 continue;
3b0bdc72
UD
3595
3596 /* Check the `accepts' and sift the characters which are not
15a7d175 3597 match it the context. */
3b0bdc72 3598 if (constraint)
15a7d175 3599 {
15a7d175
UD
3600 if (constraint & NEXT_NEWLINE_CONSTRAINT)
3601 {
2c05d33f 3602 bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
15a7d175
UD
3603 bitset_empty (accepts);
3604 if (accepts_newline)
3605 bitset_set (accepts, NEWLINE_CHAR);
3606 else
3607 continue;
3608 }
66b110e8
UD
3609 if (constraint & NEXT_ENDBUF_CONSTRAINT)
3610 {
3611 bitset_empty (accepts);
3612 continue;
3613 }
c13c99fa 3614
66b110e8 3615 if (constraint & NEXT_WORD_CONSTRAINT)
65e6becf 3616 {
2c05d33f 3617 bitset_word_t any_set = 0;
1cef7b3c
UD
3618 if (type == CHARACTER && !node->word_char)
3619 {
3620 bitset_empty (accepts);
3621 continue;
3622 }
65e6becf
UD
3623#ifdef RE_ENABLE_I18N
3624 if (dfa->mb_cur_max > 1)
2c05d33f 3625 for (j = 0; j < BITSET_WORDS; ++j)
457beec8 3626 any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
65e6becf
UD
3627 else
3628#endif
2c05d33f 3629 for (j = 0; j < BITSET_WORDS; ++j)
457beec8
UD
3630 any_set |= (accepts[j] &= dfa->word_char[j]);
3631 if (!any_set)
3632 continue;
65e6becf 3633 }
66b110e8 3634 if (constraint & NEXT_NOTWORD_CONSTRAINT)
65e6becf 3635 {
2c05d33f 3636 bitset_word_t any_set = 0;
1cef7b3c
UD
3637 if (type == CHARACTER && node->word_char)
3638 {
3639 bitset_empty (accepts);
3640 continue;
3641 }
65e6becf
UD
3642#ifdef RE_ENABLE_I18N
3643 if (dfa->mb_cur_max > 1)
2c05d33f 3644 for (j = 0; j < BITSET_WORDS; ++j)
457beec8 3645 any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
65e6becf
UD
3646 else
3647#endif
2c05d33f 3648 for (j = 0; j < BITSET_WORDS; ++j)
457beec8
UD
3649 any_set |= (accepts[j] &= ~dfa->word_char[j]);
3650 if (!any_set)
3651 continue;
65e6becf 3652 }
15a7d175 3653 }
3b0bdc72
UD
3654
3655 /* Then divide `accepts' into DFA states, or create a new
457beec8 3656 state. Above, we make sure that accepts is not empty. */
3b0bdc72 3657 for (j = 0; j < ndests; ++j)
15a7d175 3658 {
2c05d33f
UD
3659 bitset_t intersec; /* Intersection sets, see below. */
3660 bitset_t remains;
15a7d175 3661 /* Flags, see below. */
2c05d33f 3662 bitset_word_t has_intersec, not_subset, not_consumed;
15a7d175
UD
3663
3664 /* Optimization, skip if this state doesn't accept the character. */
3665 if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
3666 continue;
3667
3668 /* Enumerate the intersection set of this state and `accepts'. */
3669 has_intersec = 0;
2c05d33f 3670 for (k = 0; k < BITSET_WORDS; ++k)
15a7d175
UD
3671 has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
3672 /* And skip if the intersection set is empty. */
3673 if (!has_intersec)
3674 continue;
3675
3676 /* Then check if this state is a subset of `accepts'. */
3677 not_subset = not_consumed = 0;
2c05d33f 3678 for (k = 0; k < BITSET_WORDS; ++k)
15a7d175
UD
3679 {
3680 not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
3681 not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
3682 }
3683
3684 /* If this state isn't a subset of `accepts', create a
3685 new group state, which has the `remains'. */
3686 if (not_subset)
3687 {
3688 bitset_copy (dests_ch[ndests], remains);
3689 bitset_copy (dests_ch[j], intersec);
3690 err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
3691 if (BE (err != REG_NOERROR, 0))
3692 goto error_return;
3693 ++ndests;
3694 }
3695
3696 /* Put the position in the current group. */
d2c38eb3
UD
3697 result = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
3698 if (BE (result < 0, 0))
15a7d175
UD
3699 goto error_return;
3700
3701 /* If all characters are consumed, go to next node. */
3702 if (!not_consumed)
3703 break;
3704 }
3b0bdc72
UD
3705 /* Some characters remain, create a new group. */
3706 if (j == ndests)
15a7d175
UD
3707 {
3708 bitset_copy (dests_ch[ndests], accepts);
3709 err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
3710 if (BE (err != REG_NOERROR, 0))
3711 goto error_return;
3712 ++ndests;
3713 bitset_empty (accepts);
3714 }
3b0bdc72
UD
3715 }
3716 return ndests;
1b2c2628
UD
3717 error_return:
3718 for (j = 0; j < ndests; ++j)
3719 re_node_set_free (dests_node + j);
3720 return -1;
3b0bdc72
UD
3721}
3722
434d3784
UD
3723#ifdef RE_ENABLE_I18N
3724/* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
3725 Return the number of the bytes the node accepts.
3726 STR_IDX is the current index of the input string.
3727
3728 This function handles the nodes which can accept one character, or
3729 one collating element like '.', '[a-z]', opposite to the other nodes
3730 can only accept one byte. */
3b0bdc72 3731
8c0ab919
RM
3732# ifdef _LIBC
3733# include <locale/weight.h>
3734# endif
3735
3b0bdc72 3736static int
1ba81cea 3737internal_function
e2f55264
UD
3738check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
3739 const re_string_t *input, int str_idx)
3b0bdc72 3740{
3b0bdc72 3741 const re_token_t *node = dfa->nodes + node_idx;
ad7f28c2 3742 int char_len, elem_len;
434d3784 3743 int i;
ad7f28c2
UD
3744
3745 if (BE (node->type == OP_UTF8_PERIOD, 0))
3746 {
3747 unsigned char c = re_string_byte_at (input, str_idx), d;
3748 if (BE (c < 0xc2, 1))
3749 return 0;
3750
3751 if (str_idx + 2 > input->len)
3752 return 0;
3753
3754 d = re_string_byte_at (input, str_idx + 1);
3755 if (c < 0xe0)
3756 return (d < 0x80 || d > 0xbf) ? 0 : 2;
3757 else if (c < 0xf0)
3758 {
3759 char_len = 3;
3760 if (c == 0xe0 && d < 0xa0)
3761 return 0;
3762 }
3763 else if (c < 0xf8)
3764 {
3765 char_len = 4;
3766 if (c == 0xf0 && d < 0x90)
3767 return 0;
3768 }
3769 else if (c < 0xfc)
3770 {
3771 char_len = 5;
3772 if (c == 0xf8 && d < 0x88)
3773 return 0;
3774 }
3775 else if (c < 0xfe)
3776 {
3777 char_len = 6;
3778 if (c == 0xfc && d < 0x84)
3779 return 0;
3780 }
3781 else
3782 return 0;
3783
3784 if (str_idx + char_len > input->len)
3785 return 0;
3786
3787 for (i = 1; i < char_len; ++i)
3788 {
3789 d = re_string_byte_at (input, str_idx + i);
3790 if (d < 0x80 || d > 0xbf)
3791 return 0;
3792 }
3793 return char_len;
3794 }
3795
3796 char_len = re_string_char_size_at (input, str_idx);
3b0bdc72
UD
3797 if (node->type == OP_PERIOD)
3798 {
ad7f28c2 3799 if (char_len <= 1)
2da42bc0 3800 return 0;
ad7f28c2
UD
3801 /* FIXME: I don't think this if is needed, as both '\n'
3802 and '\0' are char_len == 1. */
434d3784 3803 /* '.' accepts any one character except the following two cases. */
56b168be 3804 if ((!(dfa->syntax & RE_DOT_NEWLINE) &&
15a7d175 3805 re_string_byte_at (input, str_idx) == '\n') ||
56b168be 3806 ((dfa->syntax & RE_DOT_NOT_NULL) &&
15a7d175
UD
3807 re_string_byte_at (input, str_idx) == '\0'))
3808 return 0;
3b0bdc72
UD
3809 return char_len;
3810 }
ad7f28c2
UD
3811
3812 elem_len = re_string_elem_size_at (input, str_idx);
6c2a04a7 3813 if ((elem_len <= 1 && char_len <= 1) || char_len == 0)
ad7f28c2
UD
3814 return 0;
3815
3816 if (node->type == COMPLEX_BRACKET)
3b0bdc72
UD
3817 {
3818 const re_charset_t *cset = node->opr.mbcset;
434d3784 3819# ifdef _LIBC
1c99f950
UD
3820 const unsigned char *pin
3821 = ((const unsigned char *) re_string_get_buffer (input) + str_idx);
5f93cd52
UD
3822 int j;
3823 uint32_t nrules;
434d3784
UD
3824# endif /* _LIBC */
3825 int match_len = 0;
3826 wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
15a7d175 3827 ? re_string_wchar_at (input, str_idx) : 0);
434d3784
UD
3828
3829 /* match with multibyte character? */
3830 for (i = 0; i < cset->nmbchars; ++i)
15a7d175
UD
3831 if (wc == cset->mbchars[i])
3832 {
3833 match_len = char_len;
3834 goto check_node_accept_bytes_match;
3835 }
434d3784
UD
3836 /* match with character_class? */
3837 for (i = 0; i < cset->nchar_classes; ++i)
15a7d175
UD
3838 {
3839 wctype_t wt = cset->char_classes[i];
3840 if (__iswctype (wc, wt))
3841 {
3842 match_len = char_len;
3843 goto check_node_accept_bytes_match;
3844 }
3845 }
434d3784
UD
3846
3847# ifdef _LIBC
5f93cd52 3848 nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3b0bdc72 3849 if (nrules != 0)
15a7d175
UD
3850 {
3851 unsigned int in_collseq = 0;
3852 const int32_t *table, *indirect;
3853 const unsigned char *weights, *extra;
3854 const char *collseqwc;
3b0bdc72 3855
15a7d175
UD
3856 /* match with collating_symbol? */
3857 if (cset->ncoll_syms)
3858 extra = (const unsigned char *)
3859 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3860 for (i = 0; i < cset->ncoll_syms; ++i)
3861 {
3862 const unsigned char *coll_sym = extra + cset->coll_syms[i];
3863 /* Compare the length of input collating element and
3864 the length of current collating element. */
3865 if (*coll_sym != elem_len)
3866 continue;
3867 /* Compare each bytes. */
3868 for (j = 0; j < *coll_sym; j++)
3869 if (pin[j] != coll_sym[1 + j])
3870 break;
3871 if (j == *coll_sym)
3872 {
3873 /* Match if every bytes is equal. */
3874 match_len = j;
3875 goto check_node_accept_bytes_match;
3876 }
3877 }
3878
3879 if (cset->nranges)
3880 {
3881 if (elem_len <= char_len)
3882 {
3883 collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
25337753 3884 in_collseq = __collseq_table_lookup (collseqwc, wc);
15a7d175
UD
3885 }
3886 else
3887 in_collseq = find_collation_sequence_value (pin, elem_len);
3888 }
3889 /* match with range expression? */
3890 for (i = 0; i < cset->nranges; ++i)
3891 if (cset->range_starts[i] <= in_collseq
3892 && in_collseq <= cset->range_ends[i])
3893 {
3894 match_len = elem_len;
3895 goto check_node_accept_bytes_match;
3896 }
3897
3898 /* match with equivalence_class? */
3899 if (cset->nequiv_classes)
3900 {
3901 const unsigned char *cp = pin;
3902 table = (const int32_t *)
3903 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3904 weights = (const unsigned char *)
3905 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3906 extra = (const unsigned char *)
3907 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3908 indirect = (const int32_t *)
3909 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
8c0ab919 3910 int32_t idx = findidx (table, indirect, extra, &cp, elem_len);
15a7d175
UD
3911 if (idx > 0)
3912 for (i = 0; i < cset->nequiv_classes; ++i)
3913 {
3914 int32_t equiv_class_idx = cset->equiv_classes[i];
b7d1c5fa
UD
3915 size_t weight_len = weights[idx & 0xffffff];
3916 if (weight_len == weights[equiv_class_idx & 0xffffff]
3917 && (idx >> 24) == (equiv_class_idx >> 24))
15a7d175
UD
3918 {
3919 int cnt = 0;
b7d1c5fa
UD
3920
3921 idx &= 0xffffff;
3922 equiv_class_idx &= 0xffffff;
3923
15a7d175
UD
3924 while (cnt <= weight_len
3925 && (weights[equiv_class_idx + 1 + cnt]
3926 == weights[idx + 1 + cnt]))
3927 ++cnt;
3928 if (cnt > weight_len)
3929 {
3930 match_len = elem_len;
3931 goto check_node_accept_bytes_match;
3932 }
3933 }
3934 }
3935 }
3936 }
434d3784
UD
3937 else
3938# endif /* _LIBC */
15a7d175
UD
3939 {
3940 /* match with range expression? */
92b27c74 3941#if __GNUC__ >= 2
15a7d175 3942 wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
92b27c74 3943#else
15a7d175
UD
3944 wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
3945 cmp_buf[2] = wc;
92b27c74 3946#endif
15a7d175
UD
3947 for (i = 0; i < cset->nranges; ++i)
3948 {
3949 cmp_buf[0] = cset->range_starts[i];
3950 cmp_buf[4] = cset->range_ends[i];
2f44ee08
JM
3951 if (__wcscoll (cmp_buf, cmp_buf + 2) <= 0
3952 && __wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
15a7d175
UD
3953 {
3954 match_len = char_len;
3955 goto check_node_accept_bytes_match;
3956 }
3957 }
3958 }
434d3784
UD
3959 check_node_accept_bytes_match:
3960 if (!cset->non_match)
15a7d175 3961 return match_len;
434d3784 3962 else
15a7d175
UD
3963 {
3964 if (match_len > 0)
3965 return 0;
3966 else
3967 return (elem_len > char_len) ? elem_len : char_len;
3968 }
3b0bdc72
UD
3969 }
3970 return 0;
3971}
3972
434d3784 3973# ifdef _LIBC
3b0bdc72 3974static unsigned int
1ba81cea 3975internal_function
e2f55264 3976find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
3b0bdc72
UD
3977{
3978 uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3979 if (nrules == 0)
3980 {
3981 if (mbs_len == 1)
15a7d175
UD
3982 {
3983 /* No valid character. Match it as a single byte character. */
3984 const unsigned char *collseq = (const unsigned char *)
3985 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
3986 return collseq[mbs[0]];
3987 }
3b0bdc72
UD
3988 return UINT_MAX;
3989 }
3990 else
3991 {
3992 int32_t idx;
3993 const unsigned char *extra = (const unsigned char *)
15a7d175 3994 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
6c2a04a7
UD
3995 int32_t extrasize = (const unsigned char *)
3996 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
3b0bdc72 3997
6c2a04a7 3998 for (idx = 0; idx < extrasize;)
15a7d175
UD
3999 {
4000 int mbs_cnt, found = 0;
4001 int32_t elem_mbs_len;
4002 /* Skip the name of collating element name. */
4003 idx = idx + extra[idx] + 1;
4004 elem_mbs_len = extra[idx++];
4005 if (mbs_len == elem_mbs_len)
4006 {
4007 for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
4008 if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
4009 break;
4010 if (mbs_cnt == elem_mbs_len)
4011 /* Found the entry. */
4012 found = 1;
4013 }
4014 /* Skip the byte sequence of the collating element. */
4015 idx += elem_mbs_len;
4016 /* Adjust for the alignment. */
4017 idx = (idx + 3) & ~3;
4018 /* Skip the collation sequence value. */
4019 idx += sizeof (uint32_t);
4020 /* Skip the wide char sequence of the collating element. */
d84acf38 4021 idx = idx + sizeof (uint32_t) * (*(int32_t *) (extra + idx) + 1);
15a7d175
UD
4022 /* If we found the entry, return the sequence value. */
4023 if (found)
4024 return *(uint32_t *) (extra + idx);
4025 /* Skip the collation sequence value. */
4026 idx += sizeof (uint32_t);
4027 }
6c2a04a7 4028 return UINT_MAX;
3b0bdc72
UD
4029 }
4030}
434d3784
UD
4031# endif /* _LIBC */
4032#endif /* RE_ENABLE_I18N */
3b0bdc72
UD
4033
4034/* Check whether the node accepts the byte which is IDX-th
4035 byte of the INPUT. */
4036
4037static int
1ba81cea 4038internal_function
e2f55264
UD
4039check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
4040 int idx)
3b0bdc72 4041{
3b0bdc72 4042 unsigned char ch;
56b168be 4043 ch = re_string_byte_at (&mctx->input, idx);
ad7f28c2
UD
4044 switch (node->type)
4045 {
4046 case CHARACTER:
5cf1ec52 4047 if (node->opr.c != ch)
2da42bc0 4048 return 0;
5cf1ec52
UD
4049 break;
4050
ad7f28c2 4051 case SIMPLE_BRACKET:
5cf1ec52 4052 if (!bitset_contain (node->opr.sbcset, ch))
2da42bc0 4053 return 0;
5cf1ec52
UD
4054 break;
4055
c0d5034e 4056#ifdef RE_ENABLE_I18N
ad7f28c2
UD
4057 case OP_UTF8_PERIOD:
4058 if (ch >= 0x80)
2da42bc0 4059 return 0;
ad7f28c2 4060 /* FALLTHROUGH */
c0d5034e 4061#endif
ad7f28c2 4062 case OP_PERIOD:
5cf1ec52
UD
4063 if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
4064 || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
4065 return 0;
4066 break;
4067
ad7f28c2
UD
4068 default:
4069 return 0;
4070 }
5cf1ec52
UD
4071
4072 if (node->constraint)
4073 {
4074 /* The node has constraints. Check whether the current context
4075 satisfies the constraints. */
4076 unsigned int context = re_string_context_at (&mctx->input, idx,
4077 mctx->eflags);
4078 if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
4079 return 0;
4080 }
4081
4082 return 1;
3b0bdc72 4083}
612546c6
UD
4084
4085/* Extend the buffers, if the buffers have run out. */
4086
4087static reg_errcode_t
2da42bc0 4088internal_function __attribute_warn_unused_result__
a445af0b 4089extend_buffers (re_match_context_t *mctx, int min_len)
612546c6
UD
4090{
4091 reg_errcode_t ret;
56b168be 4092 re_string_t *pstr = &mctx->input;
612546c6 4093
aef699dc
PE
4094 /* Avoid overflow. */
4095 if (BE (INT_MAX / 2 / sizeof (re_dfastate_t *) <= pstr->bufs_len, 0))
4096 return REG_ESPACE;
4097
a445af0b
AS
4098 /* Double the lengthes of the buffers, but allocate at least MIN_LEN. */
4099 ret = re_string_realloc_buffers (pstr,
4100 MAX (min_len,
4101 MIN (pstr->len, pstr->bufs_len * 2)));
612546c6
UD
4102 if (BE (ret != REG_NOERROR, 0))
4103 return ret;
4104
4105 if (mctx->state_log != NULL)
4106 {
4107 /* And double the length of state_log. */
951d6408
UD
4108 /* XXX We have no indication of the size of this buffer. If this
4109 allocation fail we have no indication that the state_log array
4110 does not have the right size. */
4111 re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
4112 pstr->bufs_len + 1);
1b2c2628 4113 if (BE (new_array == NULL, 0))
15a7d175 4114 return REG_ESPACE;
1b2c2628 4115 mctx->state_log = new_array;
612546c6
UD
4116 }
4117
4118 /* Then reconstruct the buffers. */
4119 if (pstr->icase)
4120 {
4121#ifdef RE_ENABLE_I18N
3c0fb574 4122 if (pstr->mb_cur_max > 1)
bb3f4825
UD
4123 {
4124 ret = build_wcs_upper_buffer (pstr);
4125 if (BE (ret != REG_NOERROR, 0))
4126 return ret;
4127 }
612546c6
UD
4128 else
4129#endif /* RE_ENABLE_I18N */
15a7d175 4130 build_upper_buffer (pstr);
612546c6
UD
4131 }
4132 else
4133 {
4134#ifdef RE_ENABLE_I18N
3c0fb574 4135 if (pstr->mb_cur_max > 1)
15a7d175 4136 build_wcs_buffer (pstr);
612546c6
UD
4137 else
4138#endif /* RE_ENABLE_I18N */
15a7d175
UD
4139 {
4140 if (pstr->trans != NULL)
4141 re_string_translate_buffer (pstr);
15a7d175 4142 }
612546c6
UD
4143 }
4144 return REG_NOERROR;
4145}
4146
3b0bdc72
UD
4147\f
4148/* Functions for matching context. */
4149
6291ee3c
UD
4150/* Initialize MCTX. */
4151
a9388965 4152static reg_errcode_t
2da42bc0 4153internal_function __attribute_warn_unused_result__
e2f55264 4154match_ctx_init (re_match_context_t *mctx, int eflags, int n)
3b0bdc72
UD
4155{
4156 mctx->eflags = eflags;
612546c6 4157 mctx->match_last = -1;
3b0bdc72 4158 if (n > 0)
a9388965
UD
4159 {
4160 mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
6291ee3c
UD
4161 mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
4162 if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))
15a7d175 4163 return REG_ESPACE;
a9388965 4164 }
56b168be
UD
4165 /* Already zero-ed by the caller.
4166 else
4167 mctx->bkref_ents = NULL;
4168 mctx->nbkref_ents = 0;
4169 mctx->nsub_tops = 0; */
3b0bdc72 4170 mctx->abkref_ents = n;
6291ee3c 4171 mctx->max_mb_elem_len = 1;
6291ee3c 4172 mctx->asub_tops = n;
a9388965 4173 return REG_NOERROR;
3b0bdc72
UD
4174}
4175
6291ee3c
UD
4176/* Clean the entries which depend on the current input in MCTX.
4177 This function must be invoked when the matcher changes the start index
4178 of the input, or changes the input string. */
4179
4180static void
1ba81cea 4181internal_function
e2f55264 4182match_ctx_clean (re_match_context_t *mctx)
6291ee3c
UD
4183{
4184 int st_idx;
4185 for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
4186 {
4187 int sl_idx;
4188 re_sub_match_top_t *top = mctx->sub_tops[st_idx];
4189 for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
4190 {
4191 re_sub_match_last_t *last = top->lasts[sl_idx];
4192 re_free (last->path.array);
4193 re_free (last);
4194 }
4195 re_free (top->lasts);
4196 if (top->path)
4197 {
4198 re_free (top->path->array);
4199 re_free (top->path);
4200 }
4201 free (top);
4202 }
cb265fec
UD
4203
4204 mctx->nsub_tops = 0;
4205 mctx->nbkref_ents = 0;
4206}
4207
4208/* Free all the memory associated with MCTX. */
4209
4210static void
1ba81cea 4211internal_function
e2f55264 4212match_ctx_free (re_match_context_t *mctx)
cb265fec
UD
4213{
4214 /* First, free all the memory associated with MCTX->SUB_TOPS. */
4215 match_ctx_clean (mctx);
4216 re_free (mctx->sub_tops);
4217 re_free (mctx->bkref_ents);
6291ee3c
UD
4218}
4219
4220/* Add a new backreference entry to MCTX.
4221 Note that we assume that caller never call this function with duplicate
4222 entry, and call with STR_IDX which isn't smaller than any existing entry.
4223*/
3b0bdc72 4224
a9388965 4225static reg_errcode_t
2da42bc0 4226internal_function __attribute_warn_unused_result__
e2f55264
UD
4227match_ctx_add_entry (re_match_context_t *mctx, int node, int str_idx, int from,
4228 int to)
3b0bdc72
UD
4229{
4230 if (mctx->nbkref_ents >= mctx->abkref_ents)
4231 {
1b2c2628
UD
4232 struct re_backref_cache_entry* new_entry;
4233 new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
15a7d175 4234 mctx->abkref_ents * 2);
1b2c2628 4235 if (BE (new_entry == NULL, 0))
15a7d175
UD
4236 {
4237 re_free (mctx->bkref_ents);
4238 return REG_ESPACE;
4239 }
1b2c2628 4240 mctx->bkref_ents = new_entry;
3b0bdc72 4241 memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
15a7d175 4242 sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
3b0bdc72
UD
4243 mctx->abkref_ents *= 2;
4244 }
e40a38b3
UD
4245 if (mctx->nbkref_ents > 0
4246 && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
4247 mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
4248
3b0bdc72 4249 mctx->bkref_ents[mctx->nbkref_ents].node = node;
0742e48e
UD
4250 mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
4251 mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
e40a38b3 4252 mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
7db61208
UD
4253
4254 /* This is a cache that saves negative results of check_dst_limits_calc_pos.
4255 If bit N is clear, means that this entry won't epsilon-transition to
4256 an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression. If
4257 it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
4258 such node.
4259
4260 A backreference does not epsilon-transition unless it is empty, so set
4261 to all zeros if FROM != TO. */
4262 mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
4263 = (from == to ? ~0 : 0);
4264
e40a38b3 4265 mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
0742e48e
UD
4266 if (mctx->max_mb_elem_len < to - from)
4267 mctx->max_mb_elem_len = to - from;
a9388965 4268 return REG_NOERROR;
3b0bdc72 4269}
0742e48e 4270
e40a38b3
UD
4271/* Search for the first entry which has the same str_idx, or -1 if none is
4272 found. Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. */
6291ee3c
UD
4273
4274static int
1ba81cea 4275internal_function
e2f55264 4276search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx)
6291ee3c 4277{
e40a38b3
UD
4278 int left, right, mid, last;
4279 last = right = mctx->nbkref_ents;
6291ee3c
UD
4280 for (left = 0; left < right;)
4281 {
4282 mid = (left + right) / 2;
4283 if (mctx->bkref_ents[mid].str_idx < str_idx)
4284 left = mid + 1;
4285 else
4286 right = mid;
4287 }
e40a38b3
UD
4288 if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
4289 return left;
4290 else
4291 return -1;
6291ee3c
UD
4292}
4293
6291ee3c
UD
4294/* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
4295 at STR_IDX. */
4296
4297static reg_errcode_t
2da42bc0 4298internal_function __attribute_warn_unused_result__
e2f55264 4299match_ctx_add_subtop (re_match_context_t *mctx, int node, int str_idx)
6291ee3c
UD
4300{
4301#ifdef DEBUG
4302 assert (mctx->sub_tops != NULL);
4303 assert (mctx->asub_tops > 0);
4304#endif
951d6408 4305 if (BE (mctx->nsub_tops == mctx->asub_tops, 0))
6291ee3c 4306 {
951d6408
UD
4307 int new_asub_tops = mctx->asub_tops * 2;
4308 re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
4309 re_sub_match_top_t *,
4310 new_asub_tops);
6291ee3c
UD
4311 if (BE (new_array == NULL, 0))
4312 return REG_ESPACE;
4313 mctx->sub_tops = new_array;
951d6408 4314 mctx->asub_tops = new_asub_tops;
6291ee3c
UD
4315 }
4316 mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
951d6408 4317 if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0))
6291ee3c
UD
4318 return REG_ESPACE;
4319 mctx->sub_tops[mctx->nsub_tops]->node = node;
4320 mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
4321 return REG_NOERROR;
4322}
4323
4324/* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
4325 at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. */
4326
4327static re_sub_match_last_t *
1ba81cea 4328internal_function
e2f55264 4329match_ctx_add_sublast (re_sub_match_top_t *subtop, int node, int str_idx)
6291ee3c
UD
4330{
4331 re_sub_match_last_t *new_entry;
951d6408 4332 if (BE (subtop->nlasts == subtop->alasts, 0))
6291ee3c 4333 {
951d6408
UD
4334 int new_alasts = 2 * subtop->alasts + 1;
4335 re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
4336 re_sub_match_last_t *,
4337 new_alasts);
6291ee3c
UD
4338 if (BE (new_array == NULL, 0))
4339 return NULL;
4340 subtop->lasts = new_array;
951d6408 4341 subtop->alasts = new_alasts;
6291ee3c
UD
4342 }
4343 new_entry = calloc (1, sizeof (re_sub_match_last_t));
951d6408
UD
4344 if (BE (new_entry != NULL, 1))
4345 {
4346 subtop->lasts[subtop->nlasts] = new_entry;
4347 new_entry->node = node;
4348 new_entry->str_idx = str_idx;
4349 ++subtop->nlasts;
4350 }
6291ee3c
UD
4351 return new_entry;
4352}
4353
0742e48e 4354static void
1ba81cea 4355internal_function
e2f55264
UD
4356sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
4357 re_dfastate_t **limited_sts, int last_node, int last_str_idx)
0742e48e
UD
4358{
4359 sctx->sifted_states = sifted_sts;
4360 sctx->limited_states = limited_sts;
4361 sctx->last_node = last_node;
4362 sctx->last_str_idx = last_str_idx;
0742e48e
UD
4363 re_node_set_init_empty (&sctx->limits);
4364}