]> git.ipfire.org Git - thirdparty/glibc.git/blame - posix/regexec.c
Update.
[thirdparty/glibc.git] / posix / regexec.c
CommitLineData
3b0bdc72
UD
1/* Extended regular expression matching and search library.
2 Copyright (C) 2002 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA. */
20
21#include <assert.h>
22#include <ctype.h>
23#include <stdio.h>
24#include <stdlib.h>
25#include <string.h>
26#include <wchar.h>
27#include <wctype.h>
28
29#ifdef _LIBC
30# ifndef _RE_DEFINE_LOCALE_FUNCTIONS
31# define _RE_DEFINE_LOCALE_FUNCTIONS 1
32# include <locale/localeinfo.h>
33# include <locale/elem-hash.h>
34# include <locale/coll-lookup.h>
35# endif
36#endif
37
38#include "regex.h"
39#include "regex_internal.h"
40
a9388965 41static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
612546c6 42 re_string_t *input, int n);
3b0bdc72 43static void match_ctx_free (re_match_context_t *cache);
a9388965
UD
44static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
45 int from, int to);
46static reg_errcode_t re_search_internal (const regex_t *preg,
47 const char *string, int length,
48 int start, int range, size_t nmatch,
49 regmatch_t pmatch[], int eflags);
50static inline re_dfastate_t *acquire_init_state_context (reg_errcode_t *err,
51 const regex_t *preg,
612546c6
UD
52 const re_match_context_t *mctx,
53 int idx);
54static int check_matching (const regex_t *preg, re_match_context_t *mctx,
55 int fl_search, int fl_longest_match);
3b0bdc72
UD
56static int check_halt_node_context (const re_dfa_t *dfa, int node,
57 unsigned int context);
58static int check_halt_state_context (const regex_t *preg,
59 const re_dfastate_t *state,
612546c6 60 const re_match_context_t *mctx, int idx);
3b0bdc72 61static int proceed_next_node (const regex_t *preg,
3b0bdc72 62 const re_match_context_t *mctx,
3b0bdc72 63 int *pidx, int node, re_node_set *eps_via_nodes);
612546c6 64static reg_errcode_t set_regs (const regex_t *preg,
a9388965 65 const re_match_context_t *mctx,
612546c6
UD
66 size_t nmatch, regmatch_t *pmatch, int last);
67static int sift_states_iter_mb (const regex_t *preg,
3b0bdc72 68 const re_match_context_t *mctx,
612546c6 69 int node_idx, int str_idx, int max_str_idx);
3b0bdc72
UD
70static int sift_states_iter_bkref (const re_dfa_t *dfa,
71 re_dfastate_t **state_log,
72 struct re_backref_cache_entry *mctx_entry,
612546c6 73 int node_idx, int idx, int match_last);
a9388965 74static reg_errcode_t sift_states_backward (const regex_t *preg,
a9388965 75 const re_match_context_t *mctx,
a9388965 76 int last_node);
612546c6
UD
77static reg_errcode_t clean_state_log_if_need (re_match_context_t *mctx,
78 int next_state_log_idx);
a9388965
UD
79static reg_errcode_t add_epsilon_backreference (const re_dfa_t *dfa,
80 const re_match_context_t *mctx,
81 const re_node_set *plog,
82 int idx,
83 re_node_set *state_buf);
84static re_dfastate_t *transit_state (reg_errcode_t *err, const regex_t *preg,
612546c6
UD
85 re_match_context_t *mctx,
86 re_dfastate_t *state, int fl_search);
a9388965 87static re_dfastate_t *transit_state_sb (reg_errcode_t *err, const regex_t *preg,
3b0bdc72 88 re_dfastate_t *pstate,
612546c6 89 int fl_search,
3b0bdc72 90 re_match_context_t *mctx);
a9388965
UD
91static reg_errcode_t transit_state_mb (const regex_t *preg,
92 re_dfastate_t *pstate,
a9388965
UD
93 re_match_context_t *mctx);
94static reg_errcode_t transit_state_bkref (const regex_t *preg,
95 re_dfastate_t *pstate,
a9388965
UD
96 re_match_context_t *mctx);
97static reg_errcode_t transit_state_bkref_loop (const regex_t *preg,
a9388965
UD
98 re_node_set *nodes,
99 re_dfastate_t **work_state_log,
a9388965 100 re_match_context_t *mctx);
3b0bdc72
UD
101static re_dfastate_t **build_trtable (const regex_t *dfa,
102 const re_dfastate_t *state,
103 int fl_search);
104static int check_node_accept_bytes (const regex_t *preg, int node_idx,
105 const re_string_t *input, int idx);
106static unsigned int find_collation_sequence_value (const unsigned char *mbs,
107 size_t name_len);
108static int group_nodes_into_DFAstates (const regex_t *dfa,
109 const re_dfastate_t *state,
110 re_node_set *states_node,
111 bitset *states_ch);
112static int check_node_accept (const regex_t *preg, const re_token_t *node,
612546c6
UD
113 const re_match_context_t *mctx, int idx);
114static reg_errcode_t extend_buffers (re_match_context_t *mctx);
3b0bdc72
UD
115\f
116/* Entry point for POSIX code. */
117
118/* regexec searches for a given pattern, specified by PREG, in the
119 string STRING.
120
121 If NMATCH is zero or REG_NOSUB was set in the cflags argument to
122 `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
123 least NMATCH elements, and we set them to the offsets of the
124 corresponding matched substrings.
125
126 EFLAGS specifies `execution flags' which affect matching: if
127 REG_NOTBOL is set, then ^ does not match at the beginning of the
128 string; if REG_NOTEOL is set, then $ does not match at the end.
129
130 We return 0 if we find a match and REG_NOMATCH if not. */
131
132int
133regexec (preg, string, nmatch, pmatch, eflags)
134 const regex_t *preg;
135 const char *string;
136 size_t nmatch;
137 regmatch_t pmatch[];
138 int eflags;
139{
a9388965 140 reg_errcode_t err;
3b0bdc72
UD
141 int length = strlen (string);
142 if (preg->no_sub)
a9388965
UD
143 err = re_search_internal (preg, string, length, 0, length, 0,
144 NULL, eflags);
3b0bdc72 145 else
a9388965
UD
146 err = re_search_internal (preg, string, length, 0, length, nmatch,
147 pmatch, eflags);
148 return err != REG_NOERROR;
3b0bdc72
UD
149}
150#ifdef _LIBC
151weak_alias (__regexec, regexec)
152#endif
153
154/* Entry points for GNU code. */
155
156/* re_match is like re_match_2 except it takes only a single string. */
157
158int
159re_match (buffer, string, length, start, regs)
160 struct re_pattern_buffer *buffer;
161 const char *string;
162 int length, start;
163 struct re_registers *regs;
164{
a9388965 165 reg_errcode_t result;
bc15410e 166 int i, tmp_nregs, nregs, rval, eflags = 0;
3b0bdc72
UD
167 regmatch_t *pmatch;
168
169 eflags |= (buffer->not_bol) ? REG_NOTBOL : 0;
170 eflags |= (buffer->not_eol) ? REG_NOTEOL : 0;
171
172 /* We need at least 1 register. */
bc15410e
UD
173 tmp_nregs = ((buffer->no_sub || regs == NULL || regs->num_regs < 1) ? 1
174 : regs->num_regs);
175 nregs = ((tmp_nregs < buffer->re_nsub + 1
176 && buffer->regs_allocated == REGS_FIXED) ? tmp_nregs
177 : buffer->re_nsub + 1);
3b0bdc72 178 pmatch = re_malloc (regmatch_t, nregs);
bc15410e 179 if (BE (pmatch == NULL, 0))
3b0bdc72
UD
180 return -2;
181 result = re_search_internal (buffer, string, length, start, 0,
182 nregs, pmatch, eflags);
183
184 /* If caller wants register contents data back, do it. */
185 if (regs && !buffer->no_sub)
186 {
187 /* Have the register data arrays been allocated? */
188 if (buffer->regs_allocated == REGS_UNALLOCATED)
189 { /* No. So allocate them with malloc. We need one
190 extra element beyond `num_regs' for the `-1' marker
191 GNU code uses. */
bc15410e 192 regs->num_regs = buffer->re_nsub + 1;
3b0bdc72
UD
193 regs->start = re_malloc (regoff_t, regs->num_regs);
194 regs->end = re_malloc (regoff_t, regs->num_regs);
bc15410e 195 if (BE (regs->start == NULL || regs->end == NULL, 0))
3b0bdc72
UD
196 {
197 re_free (pmatch);
198 return -2;
199 }
200 buffer->regs_allocated = REGS_REALLOCATE;
201 }
202 else if (buffer->regs_allocated == REGS_REALLOCATE)
203 { /* Yes. If we need more elements than were already
204 allocated, reallocate them. If we need fewer, just
205 leave it alone. */
206 if (regs->num_regs < buffer->re_nsub + 1)
207 {
208 regs->num_regs = buffer->re_nsub + 1;
209 regs->start = re_realloc (regs->start, regoff_t, regs->num_regs);
210 regs->end = re_realloc (regs->end, regoff_t, regs->num_regs);
bc15410e 211 if (BE (regs->start == NULL || regs->end == NULL, 0))
3b0bdc72
UD
212 {
213 re_free (pmatch);
214 return -2;
215 }
216 }
217 }
218 else
219 {
220 /* These braces fend off a "empty body in an else-statement"
221 warning under GCC when assert expands to nothing. */
222 assert (buffer->regs_allocated == REGS_FIXED);
223 }
224 }
225
226 /* Restore registers. */
227 if (regs != NULL)
228 {
bc15410e
UD
229 int max_regs = ((regs->num_regs < buffer->re_nsub + 1) ? regs->num_regs
230 : buffer->re_nsub + 1);
231 for (i = 0; i < max_regs; ++i)
3b0bdc72
UD
232 {
233 regs->start[i] = pmatch[i].rm_so;
234 regs->end[i] = pmatch[i].rm_eo;
235 }
236 for ( ; i < regs->num_regs; ++i)
237 {
238 regs->start[i] = -1;
239 regs->end[i] = -1;
240 }
241 }
242 /* Return value is -1 if not match, the length of mathing otherwise. */
a9388965 243 rval = (result != REG_NOERROR) ? -1 : pmatch[0].rm_eo - pmatch[0].rm_so;
3b0bdc72
UD
244 re_free (pmatch);
245 return rval;
246}
247#ifdef _LIBC
248weak_alias (__re_match, re_match)
249#endif
250
251/* re_match_2 matches the compiled pattern in BUFP against the
252 the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
253 and SIZE2, respectively). We start matching at POS, and stop
254 matching at STOP.
255
256 If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
257 store offsets for the substring each group matched in REGS. See the
258 documentation for exactly how many groups we fill.
259
260 We return -1 if no match, -2 if an internal error.
261 Otherwise, we return the length of the matched substring. */
262
263int
264re_match_2 (buffer, string1, length1, string2, length2, start, regs, stop)
265 struct re_pattern_buffer *buffer;
266 const char *string1, *string2;
267 int length1, length2, start, stop;
268 struct re_registers *regs;
269{
270 int len, ret;
271 char *str = re_malloc (char, length1 + length2);
bc15410e 272 if (BE (str == NULL, 0))
3b0bdc72
UD
273 return -2;
274 memcpy (str, string1, length1);
275 memcpy (str + length1, string2, length2);
276 len = (length1 + length2 < stop) ? length1 + length2 : stop;
277 ret = re_match (buffer, str, len, start, regs);
278 re_free (str);
279 return ret;
280}
281#ifdef _LIBC
282weak_alias (__re_match_2, re_match_2)
283#endif
284
285/* Like re_search_2, below, but only one string is specified, and
286 doesn't let you say where to stop matching. */
287
288int
289re_search (bufp, string, size, startpos, range, regs)
290 struct re_pattern_buffer *bufp;
291 const char *string;
292 int size, startpos, range;
293 struct re_registers *regs;
294{
a9388965 295 reg_errcode_t result;
bc15410e 296 int i, tmp_nregs, nregs, real_range, rval, eflags = 0;
3b0bdc72
UD
297 regmatch_t *pmatch;
298
299 eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
300 eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
301
302 /* Check for out-of-range. */
bc15410e 303 if (BE (startpos < 0 || startpos > size, 0))
3b0bdc72
UD
304 return -1;
305
306 /* We need at least 1 register. */
bc15410e
UD
307 tmp_nregs = ((bufp->no_sub || regs == NULL || regs->num_regs < 1) ? 1
308 : regs->num_regs);
309 nregs = ((tmp_nregs < bufp->re_nsub + 1
310 && bufp->regs_allocated == REGS_FIXED) ? tmp_nregs
311 : bufp->re_nsub + 1);
3b0bdc72 312 pmatch = re_malloc (regmatch_t, nregs);
bc15410e
UD
313 if (BE (pmatch == NULL, 0))
314 return -2;
3b0bdc72
UD
315
316 /* Correct range if we need. */
317 real_range = ((startpos + range > size) ? size - startpos
318 : ((startpos + range < 0) ? -startpos : range));
319
320 /* Compile fastmap if we haven't yet. */
321 if (bufp->fastmap != NULL && !bufp->fastmap_accurate)
322 re_compile_fastmap (bufp);
323
324 result = re_search_internal (bufp, string, size, startpos, real_range,
325 nregs, pmatch, eflags);
326
327 /* If caller wants register contents data back, do it. */
328 if (regs && !bufp->no_sub)
329 {
330 /* Have the register data arrays been allocated? */
331 if (bufp->regs_allocated == REGS_UNALLOCATED)
332 { /* No. So allocate them with malloc. We need one
333 extra element beyond `num_regs' for the `-1' marker
334 GNU code uses. */
bc15410e 335 regs->num_regs = bufp->re_nsub + 1;
3b0bdc72
UD
336 regs->start = re_malloc (regoff_t, regs->num_regs);
337 regs->end = re_malloc (regoff_t, regs->num_regs);
bc15410e 338 if (BE (regs->start == NULL || regs->end == NULL, 0))
3b0bdc72
UD
339 {
340 re_free (pmatch);
341 return -2;
342 }
343 bufp->regs_allocated = REGS_REALLOCATE;
344 }
345 else if (bufp->regs_allocated == REGS_REALLOCATE)
346 { /* Yes. If we need more elements than were already
347 allocated, reallocate them. If we need fewer, just
348 leave it alone. */
349 if (regs->num_regs < bufp->re_nsub + 1)
350 {
351 regs->num_regs = bufp->re_nsub + 1;
352 regs->start = re_realloc (regs->start, regoff_t, regs->num_regs);
353 regs->end = re_realloc (regs->end, regoff_t, regs->num_regs);
bc15410e 354 if (BE (regs->start == NULL || regs->end == NULL, 0))
3b0bdc72
UD
355 {
356 re_free (pmatch);
357 return -2;
358 }
359 }
360 }
361 else
362 {
363 /* These braces fend off a "empty body in an else-statement"
364 warning under GCC when assert expands to nothing. */
365 assert (bufp->regs_allocated == REGS_FIXED);
366 }
367 }
368
369 /* Restore registers. */
370 if (regs != NULL)
371 {
bc15410e
UD
372 int max_regs = ((regs->num_regs < bufp->re_nsub + 1) ? regs->num_regs
373 : bufp->re_nsub + 1);
374 for (i = 0; i < max_regs; ++i)
3b0bdc72
UD
375 {
376 regs->start[i] = pmatch[i].rm_so;
377 regs->end[i] = pmatch[i].rm_eo;
378 }
379 for ( ; i < regs->num_regs; ++i)
380 {
381 regs->start[i] = -1;
382 regs->end[i] = -1;
383 }
384 }
385 /* Return value is -1 if not match, the position where the mathing starts
386 otherwise. */
a9388965 387 rval = (result != REG_NOERROR) ? -1 : pmatch[0].rm_so;
3b0bdc72
UD
388 re_free (pmatch);
389 return rval;
390}
391#ifdef _LIBC
392weak_alias (__re_search, re_search)
393#endif
394
395/* Using the compiled pattern in BUFP, first tries to match the virtual
396 concatenation of STRING1 and STRING2, starting first at index
397 STARTPOS, then at STARTPOS + 1, and so on.
398
399 STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
400
401 RANGE is how far to scan while trying to match. RANGE = 0 means try
402 only at STARTPOS; in general, the last start tried is STARTPOS +
403 RANGE.
404
405 In REGS, return the indices of the virtual concatenation of STRING1
406 and STRING2 that matched the entire BUFP->buffer and its contained
407 subexpressions.
408
409 Do not consider matching one past the index STOP in the virtual
410 concatenation of STRING1 and STRING2.
411
412 We return either the position in the strings at which the match was
413 found, -1 if no match, or -2 if error. */
414
415int
416re_search_2 (bufp, string1, length1, string2, length2, start, range, regs,
417 stop)
418 struct re_pattern_buffer *bufp;
419 const char *string1, *string2;
420 int length1, length2, start, range, stop;
421 struct re_registers *regs;
422{
423 int len, ret;
424 char *str = re_malloc (char, length1 + length2);
425 memcpy (str, string1, length1);
426 memcpy (str + length1, string2, length2);
427 len = (length1 + length2 < stop) ? length1 + length2 : stop;
428 ret = re_search (bufp, str, len, start, range, regs);
429 re_free (str);
430 return ret;
431}
432#ifdef _LIBC
433weak_alias (__re_search_2, re_search_2)
434#endif
435
436/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
437 ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
438 this memory for recording register information. STARTS and ENDS
439 must be allocated using the malloc library routine, and must each
440 be at least NUM_REGS * sizeof (regoff_t) bytes long.
441
442 If NUM_REGS == 0, then subsequent matches should allocate their own
443 register data.
444
445 Unless this function is called, the first search or match using
446 PATTERN_BUFFER will allocate its own register data, without
447 freeing the old data. */
448
449void
450re_set_registers (bufp, regs, num_regs, starts, ends)
451 struct re_pattern_buffer *bufp;
452 struct re_registers *regs;
453 unsigned num_regs;
454 regoff_t *starts, *ends;
455{
456 if (num_regs)
457 {
458 bufp->regs_allocated = REGS_REALLOCATE;
459 regs->num_regs = num_regs;
460 regs->start = starts;
461 regs->end = ends;
462 }
463 else
464 {
465 bufp->regs_allocated = REGS_UNALLOCATED;
466 regs->num_regs = 0;
467 regs->start = regs->end = (regoff_t *) 0;
468 }
469}
470#ifdef _LIBC
471weak_alias (__re_set_registers, re_set_registers)
472#endif
473\f
474/* Entry points compatible with 4.2 BSD regex library. We don't define
475 them unless specifically requested. */
476
477#if defined _REGEX_RE_COMP || defined _LIBC
478int
479# ifdef _LIBC
480weak_function
481# endif
482re_exec (s)
483 const char *s;
484{
485 return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
486}
487#endif /* _REGEX_RE_COMP */
488\f
489static re_node_set empty_set;
490
491/* Internal entry point. */
492
493/* Searches for a compiled pattern PREG in the string STRING, whose
494 length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same
495 mingings with regexec. START, and RANGE have the same meanings
496 with re_search.
a9388965
UD
497 Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
498 otherwise return the error code.
3b0bdc72
UD
499 Note: We assume front end functions already check ranges.
500 (START + RANGE >= 0 && START + RANGE <= LENGTH) */
501
a9388965 502static reg_errcode_t
3b0bdc72
UD
503re_search_internal (preg, string, length, start, range, nmatch, pmatch, eflags)
504 const regex_t *preg;
505 const char *string;
506 int length, start, range, eflags;
507 size_t nmatch;
508 regmatch_t pmatch[];
509{
a9388965 510 reg_errcode_t err;
3b0bdc72
UD
511 re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
512 re_string_t input;
612546c6 513 int left_lim, right_lim, incr;
3b0bdc72
UD
514 int fl_longest_match, match_first, match_last = -1;
515 re_match_context_t mctx;
516 char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate)
517 ? preg->fastmap : NULL);
518
519 /* Check if the DFA haven't been compiled. */
bc15410e
UD
520 if (BE (preg->used == 0 || dfa->init_state == NULL
521 || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
522 || dfa->init_state_begbuf == NULL, 0))
a9388965 523 return REG_NOMATCH;
3b0bdc72
UD
524
525 re_node_set_init_empty (&empty_set);
526
527 /* We must check the longest matching, if nmatch > 0. */
528 fl_longest_match = (nmatch != 0);
529
612546c6
UD
530 err = re_string_allocate (&input, string, length, dfa->nodes_len + 1,
531 preg->translate, preg->syntax & RE_ICASE);
532 if (BE (err != REG_NOERROR, 0))
533 return err;
534
535 err = match_ctx_init (&mctx, eflags, &input, dfa->nbackref * 2);
536 if (BE (err != REG_NOERROR, 0))
537 return err;
538
3b0bdc72
UD
539 /* We will log all the DFA states through which the dfa pass,
540 if nmatch > 1, or this dfa has "multibyte node", which is a
541 back-reference or a node which can accept multibyte character or
542 multi character collating element. */
543 if (nmatch > 1 || dfa->has_mb_node)
a9388965 544 {
612546c6
UD
545 mctx.state_log = re_malloc (re_dfastate_t *, dfa->nodes_len + 1);
546 if (BE (mctx.state_log == NULL, 0))
a9388965
UD
547 return REG_ESPACE;
548 }
3b0bdc72 549 else
612546c6 550 mctx.state_log = NULL;
3b0bdc72
UD
551
552#ifdef DEBUG
553 /* We assume front-end functions already check them. */
554 assert (start + range >= 0 && start + range <= length);
555#endif
556
612546c6
UD
557 match_first = start;
558 input.tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
559 : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
560
3b0bdc72 561 /* Check incrementally whether of not the input string match. */
612546c6
UD
562 incr = (range < 0) ? -1 : 1;
563 left_lim = (range < 0) ? start + range : start;
564 right_lim = (range < 0) ? start : start + range;
565
566 for (;;)
3b0bdc72 567 {
612546c6
UD
568 /* At first get the current byte from input string. */
569 int ch;
570 if (MB_CUR_MAX > 1 && (preg->syntax & RE_ICASE || preg->translate))
571 {
572 /* In this case, we can't determin easily the current byte,
573 since it might be a component byte of a multibyte character.
574 Then we use the constructed buffer instead. */
575 /* If MATCH_FIRST is out of the valid range, reconstruct the
576 buffers. */
577 if (input.raw_mbs_idx + input.valid_len <= match_first)
578 re_string_reconstruct (&input, match_first, eflags,
579 preg->newline_anchor);
580 /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
581 Note that MATCH_FIRST must not be smaller than 0. */
582 ch = ((match_first >= length) ? 0
583 : re_string_byte_at (&input, match_first - input.raw_mbs_idx));
584 }
585 else
586 {
587 /* We apply translate/conversion manually, since it is trivial
588 in this case. */
589 /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
590 Note that MATCH_FIRST must not be smaller than 0. */
591 ch = (match_first < length) ? (unsigned char)string[match_first] : 0;
592 /* Apply translation if we need. */
593 ch = preg->translate ? preg->translate[ch] : ch;
594 /* In case of case insensitive mode, convert to upper case. */
595 ch = ((preg->syntax & RE_ICASE) && islower (ch)) ? toupper (ch) : ch;
596 }
597
598 /* Eliminate inappropriate one by fastmap. */
599 if (preg->can_be_null || fastmap == NULL || fastmap[ch])
3b0bdc72 600 {
612546c6
UD
601 /* Reconstruct the buffers so that the matcher can assume that
602 the matching starts from the begining of the buffer. */
603 re_string_reconstruct (&input, match_first, eflags,
604 preg->newline_anchor);
3b0bdc72 605#ifdef RE_ENABLE_I18N
612546c6
UD
606 /* Eliminate it when it is a component of a multibyte character
607 and isn't the head of a multibyte character. */
608 if (MB_CUR_MAX == 1 || re_string_first_byte (&input, 0))
3b0bdc72
UD
609#endif
610 {
612546c6
UD
611 /* It seems to be appropriate one, then use the matcher. */
612 /* We assume that the matching starts from 0. */
613 mctx.state_log_top = mctx.nbkref_ents = mctx.max_bkref_len = 0;
614 match_last = check_matching (preg, &mctx, 0, fl_longest_match);
3b0bdc72 615 if (match_last != -1)
a9388965 616 {
bc15410e 617 if (BE (match_last == -2, 0))
a9388965
UD
618 return REG_ESPACE;
619 else
620 break; /* We found a matching. */
621 }
3b0bdc72
UD
622 }
623 }
624 /* Update counter. */
612546c6
UD
625 match_first += incr;
626 if (match_first < left_lim || right_lim < match_first)
627 break;
3b0bdc72
UD
628 }
629
630 /* Set pmatch[] if we need. */
631 if (match_last != -1 && nmatch > 0)
632 {
633 int reg_idx;
634
635 /* Initialize registers. */
636 for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
637 pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
638
639 /* Set the points where matching start/end. */
612546c6 640 pmatch[0].rm_so = 0;
3b0bdc72
UD
641 mctx.match_last = pmatch[0].rm_eo = match_last;
642
643 if (!preg->no_sub && nmatch > 1)
644 {
645 /* We need the ranges of all the subexpressions. */
646 int halt_node;
612546c6 647 re_dfastate_t *pstate = mctx.state_log[match_last];
3b0bdc72 648#ifdef DEBUG
612546c6 649 assert (mctx.state_log != NULL);
3b0bdc72 650#endif
612546c6
UD
651 halt_node = check_halt_state_context (preg, pstate, &mctx,
652 match_last);
653 err = sift_states_backward (preg, &mctx, halt_node);
bc15410e 654 if (BE (err != REG_NOERROR, 0))
a9388965 655 return err;
612546c6 656 err = set_regs (preg, &mctx, nmatch, pmatch, halt_node);
bc15410e 657 if (BE (err != REG_NOERROR, 0))
a9388965 658 return err;
3b0bdc72 659 }
612546c6
UD
660
661 /* At last, add the offset to the each registers, since we slided
662 the buffers so that We can assume that the matching starts from 0. */
663 for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
664 if (pmatch[reg_idx].rm_so != -1)
665 {
666 pmatch[reg_idx].rm_so += match_first;
667 pmatch[reg_idx].rm_eo += match_first;
668 }
3b0bdc72
UD
669 }
670
612546c6 671 re_free (mctx.state_log);
3b0bdc72
UD
672 if (dfa->nbackref)
673 match_ctx_free (&mctx);
674 re_string_destruct (&input);
a9388965 675 return (match_last == -1) ? REG_NOMATCH : REG_NOERROR;
3b0bdc72
UD
676}
677
a9388965 678/* Acquire an initial state and return it.
3b0bdc72
UD
679 We must select appropriate initial state depending on the context,
680 since initial states may have constraints like "\<", "^", etc.. */
681
682static inline re_dfastate_t *
612546c6 683acquire_init_state_context (err, preg, mctx, idx)
a9388965
UD
684 reg_errcode_t *err;
685 const regex_t *preg;
612546c6
UD
686 const re_match_context_t *mctx;
687 int idx;
3b0bdc72
UD
688{
689 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
690
a9388965 691 *err = REG_NOERROR;
3b0bdc72
UD
692 if (dfa->init_state->has_constraint)
693 {
694 unsigned int context;
612546c6 695 context = re_string_context_at (mctx->input, idx - 1, mctx->eflags,
3b0bdc72
UD
696 preg->newline_anchor);
697 if (IS_WORD_CONTEXT (context))
698 return dfa->init_state_word;
699 else if (IS_ORDINARY_CONTEXT (context))
700 return dfa->init_state;
701 else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
702 return dfa->init_state_begbuf;
703 else if (IS_NEWLINE_CONTEXT (context))
704 return dfa->init_state_nl;
705 else if (IS_BEGBUF_CONTEXT (context))
a9388965
UD
706 {
707 /* It is relatively rare case, then calculate on demand. */
708 return re_acquire_state_context (err, dfa,
709 dfa->init_state->entrance_nodes,
710 context);
711 }
3b0bdc72
UD
712 else
713 /* Must not happen? */
714 return dfa->init_state;
715 }
716 else
717 return dfa->init_state;
718}
719
720/* Check whether the regular expression match input string INPUT or not,
a9388965
UD
721 and return the index where the matching end, return -1 if not match,
722 or return -2 in case of an error.
3b0bdc72 723 FL_SEARCH means we must search where the matching starts,
612546c6
UD
724 FL_LONGEST_MATCH means we want the POSIX longest matching.
725 Note that the matcher assume that the maching starts from the current
726 index of the buffer. */
3b0bdc72
UD
727
728static int
612546c6 729check_matching (preg, mctx, fl_search, fl_longest_match)
3b0bdc72 730 const regex_t *preg;
3b0bdc72 731 re_match_context_t *mctx;
612546c6 732 int fl_search, fl_longest_match;
3b0bdc72 733{
a9388965 734 reg_errcode_t err;
612546c6
UD
735 int match = 0;
736 int match_last = -1;
737 int cur_str_idx = re_string_cur_idx (mctx->input);
3b0bdc72
UD
738 re_dfastate_t *cur_state;
739
612546c6 740 cur_state = acquire_init_state_context (&err, preg, mctx, cur_str_idx);
a9388965 741 /* An initial state must not be NULL(invalid state). */
bc15410e 742 if (BE (cur_state == NULL, 0))
a9388965 743 return -2;
612546c6
UD
744 if (mctx->state_log != NULL)
745 mctx->state_log[cur_str_idx] = cur_state;
3b0bdc72
UD
746 /* If the RE accepts NULL string. */
747 if (cur_state->halt)
748 {
749 if (!cur_state->has_constraint
612546c6 750 || check_halt_state_context (preg, cur_state, mctx, cur_str_idx))
3b0bdc72
UD
751 {
752 if (!fl_longest_match)
612546c6 753 return cur_str_idx;
3b0bdc72
UD
754 else
755 {
612546c6 756 match_last = cur_str_idx;
3b0bdc72
UD
757 match = 1;
758 }
759 }
760 }
761
612546c6 762 while (!re_string_eoi (mctx->input))
3b0bdc72 763 {
612546c6
UD
764 cur_state = transit_state (&err, preg, mctx, cur_state,
765 fl_search && !match);
a9388965 766 if (cur_state == NULL) /* Reached at the invalid state or an error. */
3b0bdc72 767 {
612546c6 768 cur_str_idx = re_string_cur_idx (mctx->input);
bc15410e 769 if (BE (err != REG_NOERROR, 0))
a9388965 770 return -2;
3b0bdc72
UD
771 if (fl_search && !match)
772 {
773 /* Restart from initial state, since we are searching
774 the point from where matching start. */
775#ifdef RE_ENABLE_I18N
612546c6
UD
776 if (MB_CUR_MAX == 1
777 || re_string_first_byte (mctx->input, cur_str_idx))
3b0bdc72 778#endif /* RE_ENABLE_I18N */
612546c6
UD
779 cur_state = acquire_init_state_context (&err, preg, mctx,
780 cur_str_idx);
bc15410e 781 if (BE (cur_state == NULL && err != REG_NOERROR, 0))
a9388965 782 return -2;
612546c6
UD
783 if (mctx->state_log != NULL)
784 mctx->state_log[cur_str_idx] = cur_state;
3b0bdc72
UD
785 }
786 else if (!fl_longest_match && match)
787 break;
788 else /* (fl_longest_match && match) || (!fl_search && !match) */
789 {
612546c6 790 if (mctx->state_log == NULL)
3b0bdc72
UD
791 break;
792 else
793 {
794 int max = mctx->state_log_top;
795 for (; cur_str_idx <= max; ++cur_str_idx)
612546c6 796 if (mctx->state_log[cur_str_idx] != NULL)
3b0bdc72
UD
797 break;
798 if (cur_str_idx > max)
799 break;
800 }
801 }
802 }
803
804 if (cur_state != NULL && cur_state->halt)
805 {
806 /* Reached at a halt state.
807 Check the halt state can satisfy the current context. */
808 if (!cur_state->has_constraint
612546c6
UD
809 || check_halt_state_context (preg, cur_state, mctx,
810 re_string_cur_idx (mctx->input)))
3b0bdc72
UD
811 {
812 /* We found an appropriate halt state. */
612546c6 813 match_last = re_string_cur_idx (mctx->input);
3b0bdc72
UD
814 match = 1;
815 if (!fl_longest_match)
816 break;
817 }
818 }
819 }
820 return match_last;
821}
822
823/* Check NODE match the current context. */
824
825static int check_halt_node_context (dfa, node, context)
826 const re_dfa_t *dfa;
827 int node;
828 unsigned int context;
829{
830 int entity;
831 re_token_type_t type = dfa->nodes[node].type;
832 if (type == END_OF_RE)
833 return 1;
834 if (type != OP_CONTEXT_NODE)
835 return 0;
836 entity = dfa->nodes[node].opr.ctx_info->entity;
837 if (dfa->nodes[entity].type != END_OF_RE
838 || NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[node].constraint, context))
839 return 0;
840 return 1;
841}
842
843/* Check the halt state STATE match the current context.
844 Return 0 if not match, if the node, STATE has, is a halt node and
845 match the context, return the node. */
846
847static int
612546c6 848check_halt_state_context (preg, state, mctx, idx)
3b0bdc72
UD
849 const regex_t *preg;
850 const re_dfastate_t *state;
612546c6
UD
851 const re_match_context_t *mctx;
852 int idx;
3b0bdc72
UD
853{
854 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
855 int i;
856 unsigned int context;
857#ifdef DEBUG
858 assert (state->halt);
859#endif
612546c6
UD
860 context = re_string_context_at (mctx->input, idx, mctx->eflags,
861 preg->newline_anchor);
3b0bdc72
UD
862 for (i = 0; i < state->nodes.nelem; ++i)
863 if (check_halt_node_context (dfa, state->nodes.elems[i], context))
864 return state->nodes.elems[i];
865 return 0;
866}
867
a9388965
UD
868/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
869 corresponding to the DFA).
870 Return the destination node, and update EPS_VIA_NODES, return -1 in case
871 of errors. */
3b0bdc72
UD
872
873static int
612546c6 874proceed_next_node (preg, mctx, pidx, node, eps_via_nodes)
3b0bdc72 875 const regex_t *preg;
3b0bdc72 876 const re_match_context_t *mctx;
3b0bdc72
UD
877 int *pidx, node;
878 re_node_set *eps_via_nodes;
879{
880 re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
a9388965 881 int i, dest_node = -1, err;
3b0bdc72
UD
882 if (IS_EPSILON_NODE (dfa->nodes[node].type))
883 {
a9388965 884 err = re_node_set_insert (eps_via_nodes, node);
bc15410e 885 if (BE (err < 0, 0))
a9388965 886 return -1;
612546c6 887 for (i = 0; i < mctx->state_log[*pidx]->nodes.nelem; ++i)
3b0bdc72 888 {
612546c6 889 int candidate = mctx->state_log[*pidx]->nodes.elems[i];
3b0bdc72
UD
890 if (!re_node_set_contains (dfa->edests + node, candidate)
891 && !(dfa->nodes[candidate].type == OP_CONTEXT_NODE
892 && re_node_set_contains (dfa->edests + node,
893 dfa->nodes[candidate].opr.ctx_info->entity)))
894 continue;
895 dest_node = candidate;
896 /* In order to avoid infinite loop like "(a*)*". */
897 if (!re_node_set_contains (eps_via_nodes, dest_node))
898 break;
899 }
900#ifdef DEBUG
901 assert (dest_node != -1);
902#endif
903 return dest_node;
904 }
905 else
906 {
907 int naccepted = 0, entity = node;
908 re_token_type_t type = dfa->nodes[node].type;
909 if (type == OP_CONTEXT_NODE)
910 {
911 entity = dfa->nodes[node].opr.ctx_info->entity;
912 type = dfa->nodes[entity].type;
913 }
914
915 if (ACCEPT_MB_NODE (type))
612546c6 916 naccepted = check_node_accept_bytes (preg, entity, mctx->input, *pidx);
3b0bdc72
UD
917 else if (type == OP_BACK_REF)
918 {
919 for (i = 0; i < mctx->nbkref_ents; ++i)
920 {
921 if (mctx->bkref_ents[i].node == node
922 && mctx->bkref_ents[i].from == *pidx)
923 naccepted = mctx->bkref_ents[i].to - *pidx;
924 }
925 if (naccepted == 0)
926 {
a9388965 927 err = re_node_set_insert (eps_via_nodes, node);
bc15410e 928 if (BE (err < 0, 0))
a9388965 929 return -1;
3b0bdc72 930 dest_node = dfa->nexts[node];
612546c6
UD
931 if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
932 dest_node))
3b0bdc72 933 return dest_node;
612546c6 934 for (i = 0; i < mctx->state_log[*pidx]->nodes.nelem; ++i)
3b0bdc72 935 {
612546c6 936 dest_node = mctx->state_log[*pidx]->nodes.elems[i];
3b0bdc72
UD
937 if ((dfa->nodes[dest_node].type == OP_CONTEXT_NODE
938 && (dfa->nexts[node]
939 == dfa->nodes[dest_node].opr.ctx_info->entity)))
940 return dest_node;
941 }
942 }
943 }
944
945 if (naccepted != 0
612546c6 946 || check_node_accept (preg, dfa->nodes + node, mctx, *pidx))
3b0bdc72
UD
947 {
948 dest_node = dfa->nexts[node];
949 *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
950#ifdef DEBUG
612546c6 951 assert (mctx->state_log[*pidx] != NULL);
3b0bdc72
UD
952#endif
953 re_node_set_empty (eps_via_nodes);
954 return dest_node;
955 }
956 }
957 /* Must not reach here. */
958#ifdef DEBUG
959 assert (0);
960#endif
961 return 0;
962}
963
964/* Set the positions where the subexpressions are starts/ends to registers
965 PMATCH.
966 Note: We assume that pmatch[0] is already set, and
967 pmatch[i].rm_so == pmatch[i].rm_eo == -1 (i > 1). */
968
a9388965 969static reg_errcode_t
612546c6 970set_regs (preg, mctx, nmatch, pmatch, last_node)
3b0bdc72 971 const regex_t *preg;
3b0bdc72 972 const re_match_context_t *mctx;
3b0bdc72
UD
973 size_t nmatch;
974 regmatch_t *pmatch;
975 int last_node;
976{
977 re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
978 int idx, cur_node, node_entity, real_nmatch;
979 re_node_set eps_via_nodes;
980 int i;
981#ifdef DEBUG
982 assert (nmatch > 1);
612546c6 983 assert (mctx->state_log != NULL);
3b0bdc72
UD
984#endif
985 cur_node = dfa->init_node;
986 real_nmatch = (nmatch <= preg->re_nsub) ? nmatch : preg->re_nsub + 1;
987 re_node_set_init_empty (&eps_via_nodes);
988 for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
989 {
990 node_entity = ((dfa->nodes[cur_node].type == OP_CONTEXT_NODE)
991 ? dfa->nodes[cur_node].opr.ctx_info->entity : cur_node);
992 for (i = 1; i < real_nmatch; ++i)
993 {
994 if (dfa->subexps[i - 1].start == dfa->subexps[i - 1].end)
995 {
996 /* In case of the null subexpression like '()'. */
997 if (dfa->subexps[i - 1].start == node_entity)
998 {
999 pmatch[i].rm_so = idx;
1000 pmatch[i].rm_eo = idx;
1001 }
1002 }
1003 else if (dfa->subexps[i - 1].start <= node_entity
1004 && node_entity < dfa->subexps[i - 1].end)
1005 {
1006 if (pmatch[i].rm_so == -1 || pmatch[i].rm_eo != -1)
1007 /* We are at the first node of this sub expression. */
1008 {
1009 pmatch[i].rm_so = idx;
1010 pmatch[i].rm_eo = -1;
1011 }
1012 }
1013 else
1014 {
1015 if (pmatch[i].rm_so != -1 && pmatch[i].rm_eo == -1)
1016 /* We are at the last node of this sub expression. */
1017 pmatch[i].rm_eo = idx;
1018 }
1019 }
1020 if (idx == pmatch[0].rm_eo && cur_node == last_node)
1021 break;
1022
1023 /* Proceed to next node. */
612546c6 1024 cur_node = proceed_next_node (preg, mctx, &idx, cur_node, &eps_via_nodes);
bc15410e 1025 if (BE (cur_node < 0, 0))
a9388965 1026 return REG_ESPACE;
3b0bdc72
UD
1027 }
1028 re_node_set_free (&eps_via_nodes);
a9388965 1029 return REG_NOERROR;
3b0bdc72
UD
1030}
1031
1032#define NUMBER_OF_STATE 1
1033
612546c6
UD
1034/* This function checks the STATE_LOG from the MCTX->match_last to 0
1035 and sift the nodes in each states according to the following rules.
1036 Updated state_log will be wrote to STATE_LOG.
3b0bdc72
UD
1037
1038 Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
1039 1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1040 If `a' isn't the LAST_NODE and `a' can't epsilon transit to
1041 the LAST_NODE, we throw away the node `a'.
612546c6 1042 2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
3b0bdc72
UD
1043 string `s' and transit to `b':
1044 i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1045 away the node `a'.
1046 ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1047 throwed away, we throw away the node `a'.
1048 3. When 0 <= STR_IDX < n and 'a' epsilon transit to 'b':
1049 i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1050 node `a'.
1051 ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is throwed away,
1052 we throw away the node `a'. */
1053
1054#define STATE_NODE_CONTAINS(state,node) \
1055 ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1056
a9388965 1057static reg_errcode_t
612546c6 1058sift_states_backward (preg, mctx, last_node)
3b0bdc72 1059 const regex_t *preg;
3b0bdc72 1060 const re_match_context_t *mctx;
3b0bdc72
UD
1061 int last_node;
1062{
a9388965 1063 reg_errcode_t err;
3b0bdc72
UD
1064 re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
1065 re_node_set state_buf;
1066 int str_idx = mctx->match_last;
1067 re_node_set *plog; /* Points the state_log[str_idx]->nodes */
1068
1069#ifdef DEBUG
612546c6 1070 assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
3b0bdc72 1071#endif
a9388965 1072 err = re_node_set_alloc (&state_buf, NUMBER_OF_STATE);
bc15410e 1073 if (BE (err != REG_NOERROR, 0))
a9388965 1074 return err;
612546c6 1075 plog = &mctx->state_log[str_idx]->nodes;
3b0bdc72
UD
1076
1077 /* Build sifted state_log[str_idx]. It has the nodes which can epsilon
1078 transit to the last_node and the last_node itself. */
a9388965 1079 err = re_node_set_intersect (&state_buf, plog, dfa->inveclosures + last_node);
bc15410e 1080 if (BE (err != REG_NOERROR, 0))
a9388965 1081 return err;
3b0bdc72 1082
612546c6
UD
1083 if (mctx->state_log[str_idx] != NULL
1084 && mctx->state_log[str_idx]->has_backref)
a9388965
UD
1085 {
1086 err = add_epsilon_backreference (dfa, mctx, plog, str_idx, &state_buf);
bc15410e 1087 if (BE (err != REG_NOERROR, 0))
a9388965
UD
1088 return err;
1089 }
3b0bdc72
UD
1090
1091 /* Update state log. */
612546c6
UD
1092 mctx->state_log[str_idx] = re_acquire_state (&err, dfa, &state_buf);
1093 if (BE (mctx->state_log[str_idx] == NULL && err != REG_NOERROR, 0))
a9388965 1094 return err;
3b0bdc72
UD
1095
1096 /* Then check each states in the state_log. */
612546c6 1097 while (str_idx > 0)
3b0bdc72
UD
1098 {
1099 int i, j;
1100 /* Update counters. */
1101 re_node_set_empty (&state_buf);
1102 --str_idx;
612546c6
UD
1103 plog = ((mctx->state_log[str_idx] == NULL) ? &empty_set
1104 : &mctx->state_log[str_idx]->nodes);
3b0bdc72
UD
1105
1106 /* Then build the next sifted state.
1107 We build the next sifted state on `state_buf', and update
1108 `state_log[str_idx]' with `state_buf'.
1109 Note:
1110 `state_buf' is the sifted state from `state_log[str_idx + 1]'.
1111 `plog' points the node_set of the old `state_log[str_idx]'. */
1112 for (i = 0; i < plog->nelem; i++)
1113 {
1114 int prev_node = plog->elems[i];
1115 int entity = prev_node;
1116 int naccepted = 0;
1117 re_token_type_t type = dfa->nodes[prev_node].type;
1118 if (type == OP_CONTEXT_NODE)
1119 {
1120 entity = dfa->nodes[prev_node].opr.ctx_info->entity;
1121 type = dfa->nodes[entity].type;
1122 }
1123
1124 /* If the node may accept `multi byte'. */
1125 if (ACCEPT_MB_NODE (type))
612546c6 1126 naccepted = sift_states_iter_mb (preg, mctx, entity, str_idx,
3b0bdc72
UD
1127 mctx->match_last);
1128
1129 /* If the node is a back reference. */
1130 else if (type == OP_BACK_REF)
1131 for (j = 0; j < mctx->nbkref_ents; ++j)
1132 {
612546c6 1133 naccepted = sift_states_iter_bkref (dfa, mctx->state_log,
3b0bdc72
UD
1134 mctx->bkref_ents + j,
1135 prev_node, str_idx,
3b0bdc72
UD
1136 mctx->match_last);
1137 if (naccepted)
1138 break;
1139 }
1140
1141 if (!naccepted
612546c6
UD
1142 && check_node_accept (preg, dfa->nodes + prev_node, mctx,
1143 str_idx)
1144 && STATE_NODE_CONTAINS (mctx->state_log[str_idx + 1],
3b0bdc72
UD
1145 dfa->nexts[prev_node]))
1146 naccepted = 1;
1147
1148 if (naccepted == 0)
1149 continue;
1150
1151 /* `prev_node' may point the entity of the OP_CONTEXT_NODE,
1152 then we use plog->elems[i] instead. */
a9388965
UD
1153 err = re_node_set_add_intersect (&state_buf, plog,
1154 dfa->inveclosures + prev_node);
bc15410e 1155 if (BE (err != REG_NOERROR, 0))
a9388965 1156 return err;
3b0bdc72 1157 }
612546c6
UD
1158 if (mctx->state_log[str_idx] != NULL
1159 && mctx->state_log[str_idx]->has_backref)
a9388965
UD
1160 {
1161 err = add_epsilon_backreference (dfa, mctx, plog, str_idx, &state_buf);
bc15410e 1162 if (BE (err != REG_NOERROR, 0))
a9388965
UD
1163 return err;
1164 }
3b0bdc72
UD
1165
1166 /* Update state_log. */
612546c6
UD
1167 mctx->state_log[str_idx] = re_acquire_state (&err, dfa, &state_buf);
1168 if (BE (mctx->state_log[str_idx] == NULL && err != REG_NOERROR, 0))
a9388965 1169 return err;
3b0bdc72
UD
1170 }
1171
1172 re_node_set_free (&state_buf);
a9388965 1173 return REG_NOERROR;
3b0bdc72
UD
1174}
1175
1176/* Helper functions. */
1177
612546c6
UD
1178static inline reg_errcode_t
1179clean_state_log_if_need (mctx, next_state_log_idx)
3b0bdc72
UD
1180 re_match_context_t *mctx;
1181 int next_state_log_idx;
1182{
1183 int top = mctx->state_log_top;
612546c6
UD
1184
1185 if (next_state_log_idx >= mctx->input->bufs_len
1186 || (next_state_log_idx >= mctx->input->valid_len
1187 && mctx->input->valid_len < mctx->input->len))
1188 {
1189 reg_errcode_t err;
1190 err = extend_buffers (mctx);
1191 if (BE (err != REG_NOERROR, 0))
1192 return err;
1193 }
1194
3b0bdc72
UD
1195 if (top < next_state_log_idx)
1196 {
612546c6 1197 memset (mctx->state_log + top + 1, '\0',
3b0bdc72
UD
1198 sizeof (re_dfastate_t *) * (next_state_log_idx - top));
1199 mctx->state_log_top = next_state_log_idx;
1200 }
612546c6 1201 return REG_NOERROR;
3b0bdc72
UD
1202}
1203
1204static int
612546c6 1205sift_states_iter_mb (preg, mctx, node_idx, str_idx, max_str_idx)
3b0bdc72 1206 const regex_t *preg;
3b0bdc72 1207 const re_match_context_t *mctx;
3b0bdc72
UD
1208 int node_idx, str_idx, max_str_idx;
1209{
1210 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
1211 int naccepted;
1212 /* Check the node can accept `multi byte'. */
612546c6 1213 naccepted = check_node_accept_bytes (preg, node_idx, mctx->input, str_idx);
3b0bdc72 1214 if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
612546c6 1215 !STATE_NODE_CONTAINS (mctx->state_log[str_idx + naccepted],
3b0bdc72
UD
1216 dfa->nexts[node_idx]))
1217 /* The node can't accept the `multi byte', or the
1218 destination was already throwed away, then the node
1219 could't accept the current input `multi byte'. */
1220 naccepted = 0;
1221 /* Otherwise, it is sure that the node could accept
1222 `naccepted' bytes input. */
1223 return naccepted;
1224}
1225
1226static int
612546c6 1227sift_states_iter_bkref (dfa, state_log, mctx_entry, node_idx, idx, match_last)
3b0bdc72
UD
1228 const re_dfa_t *dfa;
1229 re_dfastate_t **state_log;
1230 struct re_backref_cache_entry *mctx_entry;
612546c6 1231 int node_idx, idx, match_last;
3b0bdc72
UD
1232{
1233 int naccepted = 0;
1234 int from_idx, to_idx;
1235 from_idx = mctx_entry->from;
1236 to_idx = mctx_entry->to;
1237 if (mctx_entry->node == node_idx
1238 && from_idx == idx && to_idx <= match_last
1239 && STATE_NODE_CONTAINS (state_log[to_idx], dfa->nexts[node_idx]))
1240 naccepted = to_idx - from_idx;
1241 return naccepted;
1242}
1243
a9388965 1244static reg_errcode_t
3b0bdc72
UD
1245add_epsilon_backreference (dfa, mctx, plog, idx, state_buf)
1246 const re_dfa_t *dfa;
1247 const re_match_context_t *mctx;
1248 const re_node_set *plog;
1249 int idx;
1250 re_node_set *state_buf;
1251{
1252 int i, j;
1253 for (i = 0; i < plog->nelem; ++i)
1254 {
1255 int node_idx = plog->elems[i];
1256 re_token_type_t type = dfa->nodes[node_idx].type;
1257 if (type == OP_CONTEXT_NODE)
1258 type = dfa->nodes[dfa->nodes[node_idx].opr.ctx_info->entity].type;
1259
1260 if (type == OP_BACK_REF &&
1261 !re_node_set_contains (state_buf, node_idx))
1262 {
1263 for (j = 0; j < mctx->nbkref_ents; ++j)
1264 {
1265 struct re_backref_cache_entry *entry;
1266 entry = mctx->bkref_ents + j;
1267 if (entry->from == entry->to && entry->from == idx)
1268 break;
1269 }
612546c6 1270 if (j < mctx->nbkref_ents || idx == 0)
3b0bdc72 1271 {
a9388965
UD
1272 reg_errcode_t err;
1273 err = re_node_set_add_intersect (state_buf, plog,
1274 dfa->inveclosures + node_idx);
bc15410e 1275 if (BE (err != REG_NOERROR, 0))
a9388965 1276 return err;
3b0bdc72
UD
1277 i = 0;
1278 }
1279 }
1280 }
a9388965 1281 return REG_NOERROR;
3b0bdc72
UD
1282}
1283\f
1284/* Functions for state transition. */
1285
1286/* Return the next state to which the current state STATE will transit by
1287 accepting the current input byte, and update STATE_LOG if necessary.
1288 If STATE can accept a multibyte char/collating element/back reference
1289 update the destination of STATE_LOG. */
1290
1291static re_dfastate_t *
612546c6 1292transit_state (err, preg, mctx, state, fl_search)
a9388965
UD
1293 reg_errcode_t *err;
1294 const regex_t *preg;
a9388965 1295 re_match_context_t *mctx;
612546c6
UD
1296 re_dfastate_t *state;
1297 int fl_search;
3b0bdc72
UD
1298{
1299 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
1300 re_dfastate_t **trtable, *next_state;
1301 unsigned char ch;
1302
612546c6
UD
1303 if (re_string_cur_idx (mctx->input) + 1 >= mctx->input->bufs_len
1304 || (re_string_cur_idx (mctx->input) + 1 >= mctx->input->valid_len
1305 && mctx->input->valid_len < mctx->input->len))
1306 {
1307 *err = extend_buffers (mctx);
1308 if (BE (*err != REG_NOERROR, 0))
1309 return NULL;
1310 }
1311
a9388965 1312 *err = REG_NOERROR;
3b0bdc72
UD
1313 if (state == NULL)
1314 {
1315 next_state = state;
612546c6 1316 re_string_skip_bytes (mctx->input, 1);
3b0bdc72
UD
1317 }
1318 else
1319 {
1320 /* If the current state can accept multibyte. */
1321 if (state->accept_mb)
a9388965 1322 {
612546c6 1323 *err = transit_state_mb (preg, state, mctx);
bc15410e 1324 if (BE (*err != REG_NOERROR, 0))
a9388965
UD
1325 return NULL;
1326 }
3b0bdc72
UD
1327
1328 /* Then decide the next state with the single byte. */
1329 if (1)
1330 {
1331 /* Use transition table */
612546c6 1332 ch = re_string_fetch_byte (mctx->input);
3b0bdc72
UD
1333 trtable = fl_search ? state->trtable_search : state->trtable;
1334 if (trtable == NULL)
1335 {
1336 trtable = build_trtable (preg, state, fl_search);
1337 if (fl_search)
1338 state->trtable_search = trtable;
1339 else
1340 state->trtable = trtable;
1341 }
1342 next_state = trtable[ch];
1343 }
1344 else
1345 {
1346 /* don't use transition table */
612546c6 1347 next_state = transit_state_sb (err, preg, state, fl_search, mctx);
bc15410e 1348 if (BE (next_state == NULL && err != REG_NOERROR, 0))
a9388965 1349 return NULL;
3b0bdc72
UD
1350 }
1351 }
1352
1353 /* Update the state_log if we need. */
612546c6 1354 if (mctx->state_log != NULL)
3b0bdc72 1355 {
612546c6 1356 int cur_idx = re_string_cur_idx (mctx->input);
3b0bdc72
UD
1357 if (cur_idx > mctx->state_log_top)
1358 {
612546c6 1359 mctx->state_log[cur_idx] = next_state;
3b0bdc72
UD
1360 mctx->state_log_top = cur_idx;
1361 }
612546c6 1362 else if (mctx->state_log[cur_idx] == 0)
3b0bdc72 1363 {
612546c6 1364 mctx->state_log[cur_idx] = next_state;
3b0bdc72
UD
1365 }
1366 else
1367 {
1368 re_dfastate_t *pstate;
1369 unsigned int context;
1370 re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
1371 /* If (state_log[cur_idx] != 0), it implies that cur_idx is
1372 the destination of a multibyte char/collating element/
1373 back reference. Then the next state is the union set of
1374 these destinations and the results of the transition table. */
612546c6 1375 pstate = mctx->state_log[cur_idx];
3b0bdc72
UD
1376 log_nodes = pstate->entrance_nodes;
1377 if (next_state != NULL)
1378 {
1379 table_nodes = next_state->entrance_nodes;
a9388965
UD
1380 *err = re_node_set_init_union (&next_nodes, table_nodes,
1381 log_nodes);
bc15410e 1382 if (BE (*err != REG_NOERROR, 0))
a9388965 1383 return NULL;
3b0bdc72
UD
1384 }
1385 else
1386 next_nodes = *log_nodes;
1387 /* Note: We already add the nodes of the initial state,
1388 then we don't need to add them here. */
1389
612546c6
UD
1390 context = re_string_context_at (mctx->input,
1391 re_string_cur_idx (mctx->input) - 1,
3b0bdc72 1392 mctx->eflags, preg->newline_anchor);
612546c6 1393 next_state = mctx->state_log[cur_idx]
a9388965
UD
1394 = re_acquire_state_context (err, dfa, &next_nodes, context);
1395 /* We don't need to check errors here, since the return value of
1396 this function is next_state and ERR is already set. */
1397
3b0bdc72
UD
1398 if (table_nodes != NULL)
1399 re_node_set_free (&next_nodes);
1400 }
1401 /* If the next state has back references. */
1402 if (next_state != NULL && next_state->has_backref)
1403 {
612546c6 1404 *err = transit_state_bkref (preg, next_state, mctx);
bc15410e 1405 if (BE (*err != REG_NOERROR, 0))
a9388965 1406 return NULL;
612546c6 1407 next_state = mctx->state_log[cur_idx];
3b0bdc72
UD
1408 }
1409 }
1410 return next_state;
1411}
1412
1413/* Helper functions for transit_state. */
1414
1415/* Return the next state to which the current state STATE will transit by
1416 accepting the current input byte. */
1417
1418static re_dfastate_t *
612546c6 1419transit_state_sb (err, preg, state, fl_search, mctx)
a9388965
UD
1420 reg_errcode_t *err;
1421 const regex_t *preg;
1422 re_dfastate_t *state;
a9388965
UD
1423 int fl_search;
1424 re_match_context_t *mctx;
3b0bdc72
UD
1425{
1426 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
1427 re_node_set next_nodes;
1428 re_dfastate_t *next_state;
612546c6 1429 int node_cnt, cur_str_idx = re_string_cur_idx (mctx->input);
3b0bdc72
UD
1430 unsigned int context;
1431
a9388965 1432 *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
bc15410e 1433 if (BE (*err != REG_NOERROR, 0))
a9388965 1434 return NULL;
3b0bdc72
UD
1435 for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
1436 {
1437 int cur_node = state->nodes.elems[node_cnt];
612546c6 1438 if (check_node_accept (preg, dfa->nodes + cur_node, mctx, cur_str_idx))
a9388965
UD
1439 {
1440 *err = re_node_set_merge (&next_nodes,
1441 dfa->eclosures + dfa->nexts[cur_node]);
bc15410e 1442 if (BE (*err != REG_NOERROR, 0))
a9388965
UD
1443 return NULL;
1444 }
3b0bdc72
UD
1445 }
1446 if (fl_search)
1447 {
1448#ifdef RE_ENABLE_I18N
1449 int not_initial = 0;
1450 if (MB_CUR_MAX > 1)
1451 for (node_cnt = 0; node_cnt < next_nodes.nelem; ++node_cnt)
1452 if (dfa->nodes[next_nodes.elems[node_cnt]].type == CHARACTER)
1453 {
1454 not_initial = dfa->nodes[next_nodes.elems[node_cnt]].mb_partial;
1455 break;
1456 }
1457 if (!not_initial)
1458#endif
a9388965
UD
1459 {
1460 *err = re_node_set_merge (&next_nodes,
1461 dfa->init_state->entrance_nodes);
bc15410e 1462 if (BE (*err != REG_NOERROR, 0))
a9388965
UD
1463 return NULL;
1464 }
3b0bdc72 1465 }
612546c6 1466 context = re_string_context_at (mctx->input, cur_str_idx, mctx->eflags,
3b0bdc72 1467 preg->newline_anchor);
a9388965
UD
1468 next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
1469 /* We don't need to check errors here, since the return value of
1470 this function is next_state and ERR is already set. */
1471
3b0bdc72 1472 re_node_set_free (&next_nodes);
612546c6 1473 re_string_skip_bytes (mctx->input, 1);
3b0bdc72
UD
1474 return next_state;
1475}
1476
a9388965 1477static reg_errcode_t
612546c6 1478transit_state_mb (preg, pstate, mctx)
3b0bdc72 1479 const regex_t *preg;
612546c6 1480 re_dfastate_t *pstate;
3b0bdc72
UD
1481 re_match_context_t *mctx;
1482{
a9388965 1483 reg_errcode_t err;
3b0bdc72
UD
1484 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
1485 int i;
1486
1487 for (i = 0; i < pstate->nodes.nelem; ++i)
1488 {
1489 re_node_set dest_nodes, *new_nodes;
1490 int cur_node_idx = pstate->nodes.elems[i];
1491 int naccepted = 0, dest_idx;
1492 unsigned int context;
1493 re_dfastate_t *dest_state;
1494
1495 if (dfa->nodes[cur_node_idx].type == OP_CONTEXT_NODE)
1496 {
612546c6
UD
1497 context = re_string_context_at (mctx->input,
1498 re_string_cur_idx (mctx->input),
3b0bdc72
UD
1499 mctx->eflags, preg->newline_anchor);
1500 if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
1501 context))
1502 continue;
1503 cur_node_idx = dfa->nodes[cur_node_idx].opr.ctx_info->entity;
1504 }
1505
1506 /* How many bytes the node can accepts? */
1507 if (ACCEPT_MB_NODE (dfa->nodes[cur_node_idx].type))
612546c6
UD
1508 naccepted = check_node_accept_bytes (preg, cur_node_idx, mctx->input,
1509 re_string_cur_idx (mctx->input));
3b0bdc72
UD
1510 if (naccepted == 0)
1511 continue;
1512
1513 /* The node can accepts `naccepted' bytes. */
612546c6
UD
1514 dest_idx = re_string_cur_idx (mctx->input) + naccepted;
1515 err = clean_state_log_if_need (mctx, dest_idx);
1516 if (BE (err != REG_NOERROR, 0))
1517 return err;
3b0bdc72
UD
1518#ifdef DEBUG
1519 assert (dfa->nexts[cur_node_idx] != -1);
1520#endif
1521 /* `cur_node_idx' may point the entity of the OP_CONTEXT_NODE,
1522 then we use pstate->nodes.elems[i] instead. */
1523 new_nodes = dfa->eclosures + dfa->nexts[pstate->nodes.elems[i]];
1524
612546c6 1525 dest_state = mctx->state_log[dest_idx];
3b0bdc72
UD
1526 if (dest_state == NULL)
1527 dest_nodes = *new_nodes;
1528 else
a9388965
UD
1529 {
1530 err = re_node_set_init_union (&dest_nodes,
1531 dest_state->entrance_nodes, new_nodes);
bc15410e 1532 if (BE (err != REG_NOERROR, 0))
a9388965
UD
1533 return err;
1534 }
612546c6 1535 context = re_string_context_at (mctx->input, dest_idx - 1, mctx->eflags,
3b0bdc72 1536 preg->newline_anchor);
612546c6
UD
1537 mctx->state_log[dest_idx]
1538 = re_acquire_state_context (&err, dfa, &dest_nodes, context);
1539 if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
a9388965 1540 return err;
3b0bdc72
UD
1541 if (dest_state != NULL)
1542 re_node_set_free (&dest_nodes);
1543 }
a9388965 1544 return REG_NOERROR;
3b0bdc72
UD
1545}
1546
a9388965 1547static reg_errcode_t
612546c6 1548transit_state_bkref (preg, pstate, mctx)
3b0bdc72 1549 const regex_t *preg;
612546c6 1550 re_dfastate_t *pstate;
3b0bdc72
UD
1551 re_match_context_t *mctx;
1552{
a9388965 1553 reg_errcode_t err;
3b0bdc72
UD
1554 re_dfastate_t **work_state_log;
1555
612546c6
UD
1556 work_state_log = re_malloc (re_dfastate_t *,
1557 re_string_cur_idx (mctx->input) + 1);
bc15410e 1558 if (BE (work_state_log == NULL, 0))
a9388965 1559 return REG_ESPACE;
3b0bdc72 1560
612546c6 1561 err = transit_state_bkref_loop (preg, &pstate->nodes, work_state_log, mctx);
3b0bdc72 1562 re_free (work_state_log);
a9388965 1563 return err;
3b0bdc72
UD
1564}
1565
1566/* Caller must allocate `work_state_log'. */
1567
a9388965 1568static reg_errcode_t
612546c6 1569transit_state_bkref_loop (preg, nodes, work_state_log, mctx)
3b0bdc72 1570 const regex_t *preg;
3b0bdc72 1571 re_node_set *nodes;
612546c6 1572 re_dfastate_t **work_state_log;
3b0bdc72
UD
1573 re_match_context_t *mctx;
1574{
a9388965 1575 reg_errcode_t err;
3b0bdc72
UD
1576 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
1577 int i, j;
612546c6 1578 re_dfastate_t **state_log_bak;
3b0bdc72 1579 regmatch_t *cur_regs = re_malloc (regmatch_t, preg->re_nsub + 1);
612546c6 1580 int cur_str_idx = re_string_cur_idx (mctx->input);
bc15410e 1581 if (BE (cur_regs == NULL, 0))
a9388965 1582 return REG_ESPACE;
3b0bdc72
UD
1583
1584 for (i = 0; i < nodes->nelem; ++i)
1585 {
612546c6 1586 unsigned char *buf;
3b0bdc72
UD
1587 int dest_str_idx, subexp_idx, prev_nelem, subexp_len;
1588 int node_idx = nodes->elems[i];
1589 unsigned int context;
1590 re_token_t *node = dfa->nodes + node_idx;
1591 re_dfastate_t *dest_state;
1592 re_node_set *new_dest_nodes;
1593
1594 /* Check whether `node' is a backreference or not. */
1595 if (node->type == OP_BACK_REF)
1596 subexp_idx = node->opr.idx;
1597 else if (node->type == OP_CONTEXT_NODE &&
1598 dfa->nodes[node->opr.ctx_info->entity].type == OP_BACK_REF)
1599 {
612546c6
UD
1600 context = re_string_context_at (mctx->input, cur_str_idx,
1601 mctx->eflags, preg->newline_anchor);
3b0bdc72
UD
1602 if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
1603 continue;
1604 subexp_idx = dfa->nodes[node->opr.ctx_info->entity].opr.idx;
1605 }
1606 else
1607 continue;
1608
1609 /* `node' is a backreference.
1610 At first, set registers to check the backreference. */
612546c6 1611 cur_regs[0].rm_so = 0;
3b0bdc72 1612 cur_regs[0].rm_eo = cur_str_idx;
612546c6
UD
1613 memcpy (work_state_log, mctx->state_log,
1614 sizeof (re_dfastate_t *) * (cur_str_idx + 1));
3b0bdc72 1615 mctx->match_last = cur_str_idx;
612546c6
UD
1616 state_log_bak = mctx->state_log;
1617 mctx->state_log = work_state_log;
1618 sift_states_backward (preg, mctx, node_idx);
1619 if (!STATE_NODE_CONTAINS (work_state_log[0], dfa->init_node))
3b0bdc72
UD
1620 continue;
1621 for (j = 1; j <= preg->re_nsub; ++j)
1622 cur_regs[j].rm_so = cur_regs[j].rm_eo = -1;
612546c6
UD
1623 set_regs (preg, mctx, subexp_idx + 1, cur_regs, node_idx);
1624 mctx->state_log = state_log_bak;
3b0bdc72
UD
1625
1626 /* Then check that the backreference can match the input string. */
1627 subexp_len = cur_regs[subexp_idx].rm_eo - cur_regs[subexp_idx].rm_so;
612546c6
UD
1628 if (subexp_len < 0 || cur_str_idx + subexp_len > mctx->input->len)
1629 continue;
1630
1631 if (cur_str_idx + subexp_len > mctx->input->valid_len
1632 && mctx->input->valid_len < mctx->input->len)
1633 {
1634 reg_errcode_t err;
1635 err = extend_buffers (mctx);
1636 if (BE (err != REG_NOERROR, 0))
1637 return err;
1638 }
1639 buf = re_string_get_buffer (mctx->input);
1640 if (strncmp (buf + cur_regs[subexp_idx].rm_so, buf + cur_str_idx,
1641 subexp_len) != 0)
3b0bdc72
UD
1642 continue;
1643
1644 /* Successfully matched, add a new cache entry. */
1645 dest_str_idx = cur_str_idx + subexp_len;
a9388965 1646 err = match_ctx_add_entry (mctx, node_idx, cur_str_idx, dest_str_idx);
bc15410e 1647 if (BE (err != REG_NOERROR, 0))
a9388965 1648 return err;
612546c6
UD
1649 err = clean_state_log_if_need (mctx, dest_str_idx);
1650 if (BE (err != REG_NOERROR, 0))
1651 return err;
3b0bdc72
UD
1652
1653 /* And add the epsilon closures (which is `new_dest_nodes') of
1654 the backreference to appropriate state_log. */
1655#ifdef DEBUG
1656 assert (dfa->nexts[node_idx] != -1);
1657#endif
1658 if (node->type == OP_CONTEXT_NODE && subexp_len == 0)
1659 new_dest_nodes = dfa->nodes[node_idx].opr.ctx_info->bkref_eclosure;
1660 else
1661 new_dest_nodes = dfa->eclosures + dfa->nexts[node_idx];
612546c6
UD
1662 context = (IS_WORD_CHAR (re_string_byte_at (mctx->input,
1663 dest_str_idx - 1))
3b0bdc72 1664 ? CONTEXT_WORD : 0);
612546c6 1665 dest_state = mctx->state_log[dest_str_idx];
3b0bdc72 1666
612546c6
UD
1667 prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
1668 : mctx->state_log[cur_str_idx]->nodes.nelem);
3b0bdc72
UD
1669 /* Add `new_dest_node' to state_log. */
1670 if (dest_state == NULL)
a9388965 1671 {
612546c6
UD
1672 mctx->state_log[dest_str_idx]
1673 = re_acquire_state_context (&err, dfa, new_dest_nodes, context);
1674 if (BE (mctx->state_log[dest_str_idx] == NULL
1675 && err != REG_NOERROR, 0))
a9388965
UD
1676 return err;
1677 }
3b0bdc72
UD
1678 else
1679 {
1680 re_node_set dest_nodes;
a9388965
UD
1681 err = re_node_set_init_union (&dest_nodes, dest_state->entrance_nodes,
1682 new_dest_nodes);
bc15410e 1683 if (BE (err != REG_NOERROR, 0))
a9388965 1684 return err;
612546c6
UD
1685 mctx->state_log[dest_str_idx]
1686 = re_acquire_state_context (&err, dfa, &dest_nodes, context);
1687 if (BE (mctx->state_log[dest_str_idx] == NULL
1688 && err != REG_NOERROR, 0))
a9388965 1689 return err;
3b0bdc72
UD
1690 re_node_set_free (&dest_nodes);
1691 }
1692
1693 /* We need to check recursively if the backreference can epsilon
1694 transit. */
612546c6
UD
1695 if (subexp_len == 0
1696 && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
a9388965 1697 {
612546c6
UD
1698 err = transit_state_bkref_loop (preg, new_dest_nodes, work_state_log,
1699 mctx);
bc15410e 1700 if (BE (err != REG_NOERROR, 0))
a9388965
UD
1701 return err;
1702 }
3b0bdc72
UD
1703 }
1704 re_free (cur_regs);
a9388965 1705 return REG_NOERROR;
3b0bdc72
UD
1706}
1707
a9388965
UD
1708/* Build transition table for the state.
1709 Return the new table if succeeded, otherwise return NULL. */
3b0bdc72
UD
1710
1711static re_dfastate_t **
1712build_trtable (preg, state, fl_search)
1713 const regex_t *preg;
1714 const re_dfastate_t *state;
1715 int fl_search;
1716{
a9388965 1717 reg_errcode_t err;
3b0bdc72
UD
1718 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
1719 int i, j, k, ch;
1720 int ndests; /* Number of the destination states from `state'. */
1721 re_dfastate_t **trtable, **dest_states, **dest_states_word, **dest_states_nl;
1722 re_node_set follows, *dests_node;
1723 bitset *dests_ch;
1724 bitset acceptable;
1725
1726 /* We build DFA states which corresponds to the destination nodes
1727 from `state'. `dests_node[i]' represents the nodes which i-th
1728 destination state contains, and `dests_ch[i]' represents the
1729 characters which i-th destination state accepts. */
1730 dests_node = re_malloc (re_node_set, SBC_MAX);
1731 dests_ch = re_malloc (bitset, SBC_MAX);
1732
1733 /* Initialize transiton table. */
1734 trtable = (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
bc15410e 1735 if (BE (dests_node == NULL || dests_ch == NULL || trtable == NULL, 0))
a9388965 1736 return NULL;
3b0bdc72
UD
1737
1738 /* At first, group all nodes belonging to `state' into several
1739 destinations. */
1740 ndests = group_nodes_into_DFAstates (preg, state, dests_node, dests_ch);
bc15410e 1741 if (BE (ndests <= 0, 0))
3b0bdc72
UD
1742 {
1743 re_free (dests_node);
1744 re_free (dests_ch);
a9388965
UD
1745 /* Return NULL in case of an error, trtable otherwise. */
1746 return (ndests < 0) ? NULL : trtable;
3b0bdc72
UD
1747 }
1748
1749 dest_states = re_malloc (re_dfastate_t *, ndests);
1750 dest_states_word = re_malloc (re_dfastate_t *, ndests);
1751 dest_states_nl = re_malloc (re_dfastate_t *, ndests);
1752 bitset_empty (acceptable);
1753
a9388965 1754 err = re_node_set_alloc (&follows, ndests + 1);
bc15410e
UD
1755 if (BE (dest_states == NULL || dest_states_word == NULL
1756 || dest_states_nl == NULL || err != REG_NOERROR, 0))
a9388965
UD
1757 return NULL;
1758
3b0bdc72
UD
1759 /* Then build the states for all destinations. */
1760 for (i = 0; i < ndests; ++i)
1761 {
1762 int next_node;
1763 re_node_set_empty (&follows);
1764 /* Merge the follows of this destination states. */
1765 for (j = 0; j < dests_node[i].nelem; ++j)
1766 {
1767 next_node = dfa->nexts[dests_node[i].elems[j]];
1768 if (next_node != -1)
1769 {
a9388965 1770 err = re_node_set_merge (&follows, dfa->eclosures + next_node);
bc15410e 1771 if (BE (err != REG_NOERROR, 0))
a9388965 1772 return NULL;
3b0bdc72
UD
1773 }
1774 }
1775 /* If search flag is set, merge the initial state. */
1776 if (fl_search)
1777 {
1778#ifdef RE_ENABLE_I18N
1779 int not_initial = 0;
1780 for (j = 0; j < follows.nelem; ++j)
1781 if (dfa->nodes[follows.elems[j]].type == CHARACTER)
1782 {
1783 not_initial = dfa->nodes[follows.elems[j]].mb_partial;
1784 break;
1785 }
1786 if (!not_initial)
1787#endif
a9388965
UD
1788 {
1789 err = re_node_set_merge (&follows,
1790 dfa->init_state->entrance_nodes);
bc15410e 1791 if (BE (err != REG_NOERROR, 0))
a9388965
UD
1792 return NULL;
1793 }
3b0bdc72 1794 }
a9388965 1795 dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
bc15410e 1796 if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
a9388965 1797 return NULL;
3b0bdc72
UD
1798 /* If the new state has context constraint,
1799 build appropriate states for these contexts. */
1800 if (dest_states[i]->has_constraint)
1801 {
a9388965 1802 dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
3b0bdc72 1803 CONTEXT_WORD);
bc15410e 1804 if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
a9388965
UD
1805 return NULL;
1806 dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
3b0bdc72 1807 CONTEXT_NEWLINE);
bc15410e 1808 if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
a9388965 1809 return NULL;
3b0bdc72
UD
1810 }
1811 else
1812 {
1813 dest_states_word[i] = dest_states[i];
1814 dest_states_nl[i] = dest_states[i];
1815 }
1816 bitset_merge (acceptable, dests_ch[i]);
1817 }
1818
1819 /* Update the transition table. */
1820 for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
1821 for (j = 0; j < UINT_BITS; ++j, ++ch)
1822 if ((acceptable[i] >> j) & 1)
1823 {
1824 if (IS_WORD_CHAR (ch))
1825 {
1826 for (k = 0; k < ndests; ++k)
1827 if ((dests_ch[k][i] >> j) & 1)
1828 trtable[ch] = dest_states_word[k];
1829 }
1830 else /* not WORD_CHAR */
1831 {
1832 for (k = 0; k < ndests; ++k)
1833 if ((dests_ch[k][i] >> j) & 1)
1834 trtable[ch] = dest_states[k];
1835 }
1836 }
1837 /* new line */
1838 for (k = 0; k < ndests; ++k)
1839 if (bitset_contain (acceptable, NEWLINE_CHAR))
1840 trtable[NEWLINE_CHAR] = dest_states_nl[k];
1841
1842 re_free (dest_states_nl);
1843 re_free (dest_states_word);
1844 re_free (dest_states);
1845
1846 re_node_set_free (&follows);
1847 for (i = 0; i < ndests; ++i)
1848 re_node_set_free (dests_node + i);
1849
1850 re_free (dests_ch);
1851 re_free (dests_node);
1852
1853 return trtable;
1854}
1855
1856/* Group all nodes belonging to STATE into several destinations.
1857 Then for all destinations, set the nodes belonging to the destination
1858 to DESTS_NODE[i] and set the characters accepted by the destination
1859 to DEST_CH[i]. This function return the number of destinations. */
1860
1861static int
1862group_nodes_into_DFAstates (preg, state, dests_node, dests_ch)
1863 const regex_t *preg;
1864 const re_dfastate_t *state;
1865 re_node_set *dests_node;
1866 bitset *dests_ch;
1867{
a9388965 1868 reg_errcode_t err;
3b0bdc72
UD
1869 const re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
1870 int i, j, k;
1871 int ndests; /* Number of the destinations from `state'. */
1872 bitset accepts; /* Characters a node can accept. */
1873 const re_node_set *cur_nodes = &state->nodes;
1874 bitset_empty (accepts);
1875 ndests = 0;
1876
1877 /* For all the nodes belonging to `state', */
1878 for (i = 0; i < cur_nodes->nelem; ++i)
1879 {
1880 unsigned int constraint = 0;
1881 re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
1882 re_token_type_t type = node->type;
1883
1884 if (type == OP_CONTEXT_NODE)
1885 {
1886 constraint = node->constraint;
1887 node = dfa->nodes + node->opr.ctx_info->entity;
1888 type = node->type;
1889 }
1890
1891 /* Enumerate all single byte character this node can accept. */
1892 if (type == CHARACTER)
1893 bitset_set (accepts, node->opr.c);
1894 else if (type == SIMPLE_BRACKET)
1895 {
1896 bitset_merge (accepts, node->opr.sbcset);
1897 }
1898 else if (type == OP_PERIOD)
1899 {
1900 bitset_set_all (accepts);
1901 if (!(preg->syntax & RE_DOT_NEWLINE))
1902 bitset_clear (accepts, '\n');
1903 if (preg->syntax & RE_DOT_NOT_NULL)
1904 bitset_clear (accepts, '\0');
1905 }
1906 else
1907 continue;
1908
1909 /* Check the `accepts' and sift the characters which are not
1910 match it the context. */
1911 if (constraint)
1912 {
1913 if (constraint & NEXT_WORD_CONSTRAINT)
1914 for (j = 0; j < BITSET_UINTS; ++j)
1915 accepts[j] &= dfa->word_char[j];
1916 else if (constraint & NEXT_NOTWORD_CONSTRAINT)
1917 for (j = 0; j < BITSET_UINTS; ++j)
1918 accepts[j] &= ~dfa->word_char[j];
1919 else if (constraint & NEXT_NEWLINE_CONSTRAINT)
1920 {
1921 int accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
1922 bitset_empty (accepts);
1923 if (accepts_newline)
1924 bitset_set (accepts, NEWLINE_CHAR);
1925 else
1926 continue;
1927 }
1928 }
1929
1930 /* Then divide `accepts' into DFA states, or create a new
1931 state. */
1932 for (j = 0; j < ndests; ++j)
1933 {
1934 bitset intersec; /* Intersection sets, see below. */
1935 bitset remains;
1936 /* Flags, see below. */
1937 int has_intersec, not_subset, not_consumed;
1938
1939 /* Optimization, skip if this state doesn't accept the character. */
1940 if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
1941 continue;
1942
1943 /* Enumerate the intersection set of this state and `accepts'. */
1944 has_intersec = 0;
1945 for (k = 0; k < BITSET_UINTS; ++k)
1946 has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
1947 /* And skip if the intersection set is empty. */
1948 if (!has_intersec)
1949 continue;
1950
1951 /* Then check if this state is a subset of `accepts'. */
1952 not_subset = not_consumed = 0;
1953 for (k = 0; k < BITSET_UINTS; ++k)
1954 {
1955 not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
1956 not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
1957 }
1958
1959 /* If this state isn't a subset of `accepts', create a
1960 new group state, which has the `remains'. */
1961 if (not_subset)
1962 {
1963 bitset_copy (dests_ch[ndests], remains);
1964 bitset_copy (dests_ch[j], intersec);
a9388965 1965 err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
bc15410e 1966 if (BE (err != REG_NOERROR, 0))
a9388965 1967 return -1;
3b0bdc72
UD
1968 ++ndests;
1969 }
1970
1971 /* Put the position in the current group. */
a9388965 1972 err = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
bc15410e 1973 if (BE (err < 0, 0))
a9388965 1974 return -1;
3b0bdc72
UD
1975
1976 /* If all characters are consumed, go to next node. */
1977 if (!not_consumed)
1978 break;
1979 }
1980 /* Some characters remain, create a new group. */
1981 if (j == ndests)
1982 {
1983 bitset_copy (dests_ch[ndests], accepts);
a9388965 1984 err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
bc15410e 1985 if (BE (err != REG_NOERROR, 0))
a9388965 1986 return -1;
3b0bdc72
UD
1987 ++ndests;
1988 bitset_empty (accepts);
1989 }
1990 }
1991 return ndests;
1992}
1993
1994/* Check how many bytes the node `dfa->nodes[node_idx]' accepts. */
1995
1996static int
1997check_node_accept_bytes (preg, node_idx, input, str_idx)
1998 const regex_t *preg;
1999 int node_idx, str_idx;
2000 const re_string_t *input;
2001{
2002 const re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2003 const re_token_t *node = dfa->nodes + node_idx;
2004 int elem_len = re_string_elem_size_at (input, str_idx);
2005 int char_len = re_string_char_size_at (input, str_idx);
2006 int i, j;
2007#ifdef _LIBC
2008 uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
2009#endif /* _LIBC */
2010 if (elem_len <= 1 && char_len <= 1)
2011 return 0;
2012 if (node->type == OP_PERIOD)
2013 {
2014 if ((!(preg->syntax & RE_DOT_NEWLINE) &&
2015 re_string_byte_at (input, str_idx) == '\n') ||
2016 ((preg->syntax & RE_DOT_NOT_NULL) &&
2017 re_string_byte_at (input, str_idx) == '\0'))
2018 return 0;
2019 return char_len;
2020 }
2021 else if (node->type == COMPLEX_BRACKET)
2022 {
2023 const re_charset_t *cset = node->opr.mbcset;
2024 const unsigned char *pin = re_string_get_buffer (input) + str_idx;
2025#ifdef _LIBC
2026 if (nrules != 0)
2027 {
2028 int match_len = 0;
2029 unsigned int in_collseq = 0;
2030 const int32_t *table, *indirect;
2031 const unsigned char *weights, *extra, *collseqwc;
2032 int32_t idx;
2033 wchar_t wc = 0;
2034 /* This #include defines a local function! */
2035# include <locale/weight.h>
2036
2037 /* match with collating_symbol? */
2038 if (cset->ncoll_syms)
2039 extra = (const unsigned char *)
2040 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
2041 for (i = 0; i < cset->ncoll_syms; ++i)
2042 {
2043 const unsigned char *coll_sym = extra + cset->coll_syms[i];
2044 /* Compare the length of input collating element and
2045 the length of current collating element. */
2046 if (*coll_sym != elem_len)
2047 continue;
2048 /* Compare each bytes. */
2049 for (j = 0; j < *coll_sym; j++)
2050 if (pin[j] != coll_sym[1 + j])
2051 break;
2052 if (j == *coll_sym)
2053 {
2054 /* Match if every bytes is equal. */
2055 match_len = j;
2056 goto check_node_accept_bytes_match;
2057 }
2058 }
2059
2060 if (cset->nranges || cset->nchar_classes || cset->nmbchars)
2061 wc = re_string_wchar_at (input, str_idx);
2062
2063 if (cset->nranges)
2064 {
2065 if (elem_len <= char_len)
2066 {
2067 collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
2068 in_collseq = collseq_table_lookup (collseqwc, wc);
2069 }
2070 else
2071 in_collseq = find_collation_sequence_value (pin, elem_len);
2072 }
2073 /* match with range expression? */
2074 for (i = 0; i < cset->nranges; ++i)
2075 if (cset->range_starts[i] <= in_collseq
2076 && in_collseq <= cset->range_ends[i])
2077 {
2078 match_len = elem_len;
2079 goto check_node_accept_bytes_match;
2080 }
2081
2082 /* match with equivalence_class? */
2083 if (cset->nequiv_classes)
2084 {
2085 const unsigned char *cp = pin;
2086 table = (const int32_t *)
2087 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
2088 weights = (const unsigned char *)
2089 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
2090 extra = (const unsigned char *)
2091 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
2092 indirect = (const int32_t *)
2093 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
2094 idx = findidx (&cp);
2095 if (idx > 0)
2096 for (i = 0; i < cset->nequiv_classes; ++i)
2097 {
2098 int32_t equiv_class_idx = cset->equiv_classes[i];
2099 size_t weight_len = weights[idx];
2100 if (weight_len == weights[equiv_class_idx])
2101 {
2102 int cnt = 0;
2103 while (cnt <= weight_len
2104 && (weights[equiv_class_idx + 1 + cnt]
2105 == weights[idx + 1 + cnt]))
2106 ++cnt;
2107 if (cnt > weight_len)
2108 {
2109 match_len = elem_len;
2110 goto check_node_accept_bytes_match;
2111 }
2112 }
2113 }
2114 }
2115
2116 /* match with multibyte character? */
2117 for (i = 0; i < cset->nmbchars; ++i)
2118 if (wc == cset->mbchars[i])
2119 {
2120 match_len = char_len;
2121 goto check_node_accept_bytes_match;
2122 }
2123
2124 /* match with character_class? */
2125 for (i = 0; i < cset->nchar_classes; ++i)
2126 {
2127 wctype_t wt = cset->char_classes[i];
2128 if (__iswctype (wc, wt))
2129 {
2130 match_len = char_len;
2131 goto check_node_accept_bytes_match;
2132 }
2133 }
2134
2135 check_node_accept_bytes_match:
2136 if (!cset->non_match)
2137 return match_len;
2138 else
2139 {
2140 if (match_len > 0)
2141 return 0;
2142 else
2143 return re_string_elem_size_at (input, str_idx);
2144 }
2145 }
2146#endif
2147 }
2148 return 0;
2149}
2150
2151#ifdef _LIBC
2152static unsigned int
2153find_collation_sequence_value (mbs, mbs_len)
2154 const unsigned char *mbs;
2155 size_t mbs_len;
2156{
2157 uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
2158 if (nrules == 0)
2159 {
2160 if (mbs_len == 1)
2161 {
2162 /* No valid character. Match it as a single byte character. */
2163 const unsigned char *collseq = (const unsigned char *)
2164 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
2165 return collseq[mbs[0]];
2166 }
2167 return UINT_MAX;
2168 }
2169 else
2170 {
2171 int32_t idx;
2172 const unsigned char *extra = (const unsigned char *)
2173 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
2174
2175 for (idx = 0; ;)
2176 {
2177 int mbs_cnt, found = 0;
2178 int32_t elem_mbs_len;
2179 /* Skip the name of collating element name. */
2180 idx = idx + extra[idx] + 1;
2181 elem_mbs_len = extra[idx++];
2182 if (mbs_len == elem_mbs_len)
2183 {
2184 for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
2185 if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
2186 break;
2187 if (mbs_cnt == elem_mbs_len)
2188 /* Found the entry. */
2189 found = 1;
2190 }
2191 /* Skip the byte sequence of the collating element. */
2192 idx += elem_mbs_len;
2193 /* Adjust for the alignment. */
2194 idx = (idx + 3) & ~3;
2195 /* Skip the collation sequence value. */
2196 idx += sizeof (uint32_t);
2197 /* Skip the wide char sequence of the collating element. */
2198 idx = idx + sizeof (uint32_t) * (extra[idx] + 1);
2199 /* If we found the entry, return the sequence value. */
2200 if (found)
2201 return *(uint32_t *) (extra + idx);
2202 /* Skip the collation sequence value. */
2203 idx += sizeof (uint32_t);
2204 }
2205 }
2206}
2207#endif
2208
2209/* Check whether the node accepts the byte which is IDX-th
2210 byte of the INPUT. */
2211
2212static int
612546c6 2213check_node_accept (preg, node, mctx, idx)
3b0bdc72
UD
2214 const regex_t *preg;
2215 const re_token_t *node;
612546c6
UD
2216 const re_match_context_t *mctx;
2217 int idx;
3b0bdc72
UD
2218{
2219 const re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2220 const re_token_t *cur_node;
2221 unsigned char ch;
2222 if (node->type == OP_CONTEXT_NODE)
2223 {
2224 /* The node has constraints. Check whether the current context
2225 satisfies the constraints. */
612546c6
UD
2226 unsigned int context = re_string_context_at (mctx->input, idx,
2227 mctx->eflags,
3b0bdc72
UD
2228 preg->newline_anchor);
2229 if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
2230 return 0;
2231 cur_node = dfa->nodes + node->opr.ctx_info->entity;
2232 }
2233 else
2234 cur_node = node;
2235
612546c6 2236 ch = re_string_byte_at (mctx->input, idx);
3b0bdc72
UD
2237 if (cur_node->type == CHARACTER)
2238 return cur_node->opr.c == ch;
2239 else if (cur_node->type == SIMPLE_BRACKET)
2240 return bitset_contain (cur_node->opr.sbcset, ch);
2241 else if (cur_node->type == OP_PERIOD)
2242 return !((ch == '\n' && !(preg->syntax & RE_DOT_NEWLINE))
2243 || (ch == '\0' && (preg->syntax & RE_DOT_NOT_NULL)));
2244 else
2245 return 0;
2246}
612546c6
UD
2247
2248/* Extend the buffers, if the buffers have run out. */
2249
2250static reg_errcode_t
2251extend_buffers (mctx)
2252 re_match_context_t *mctx;
2253{
2254 reg_errcode_t ret;
2255 re_string_t *pstr = mctx->input;
2256
2257 /* Double the lengthes of the buffers. */
2258 ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
2259 if (BE (ret != REG_NOERROR, 0))
2260 return ret;
2261
2262 if (mctx->state_log != NULL)
2263 {
2264 /* And double the length of state_log. */
2265 mctx->state_log = re_realloc (mctx->state_log, re_dfastate_t *,
2266 pstr->bufs_len * 2);
2267 if (BE (mctx->state_log == NULL, 0))
2268 return REG_ESPACE;
2269 }
2270
2271 /* Then reconstruct the buffers. */
2272 if (pstr->icase)
2273 {
2274#ifdef RE_ENABLE_I18N
2275 if (MB_CUR_MAX > 1)
2276 build_wcs_upper_buffer (pstr);
2277 else
2278#endif /* RE_ENABLE_I18N */
2279 build_upper_buffer (pstr);
2280 }
2281 else
2282 {
2283#ifdef RE_ENABLE_I18N
2284 if (MB_CUR_MAX > 1)
2285 build_wcs_buffer (pstr);
2286 else
2287#endif /* RE_ENABLE_I18N */
2288 {
2289 if (pstr->trans != NULL)
2290 re_string_translate_buffer (pstr);
2291 else
2292 pstr->valid_len = pstr->bufs_len;
2293 }
2294 }
2295 return REG_NOERROR;
2296}
2297
3b0bdc72
UD
2298\f
2299/* Functions for matching context. */
2300
a9388965 2301static reg_errcode_t
612546c6 2302match_ctx_init (mctx, eflags, input, n)
3b0bdc72 2303 re_match_context_t *mctx;
612546c6
UD
2304 int eflags, n;
2305 re_string_t *input;
3b0bdc72
UD
2306{
2307 mctx->eflags = eflags;
612546c6
UD
2308 mctx->input = input;
2309 mctx->match_last = -1;
3b0bdc72 2310 if (n > 0)
a9388965
UD
2311 {
2312 mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
bc15410e 2313 if (BE (mctx->bkref_ents == NULL, 0))
a9388965
UD
2314 return REG_ESPACE;
2315 }
3b0bdc72
UD
2316 else
2317 mctx->bkref_ents = NULL;
2318 mctx->nbkref_ents = 0;
2319 mctx->abkref_ents = n;
2320 mctx->max_bkref_len = 0;
a9388965 2321 return REG_NOERROR;
3b0bdc72
UD
2322}
2323
2324static void
2325match_ctx_free (mctx)
2326 re_match_context_t *mctx;
2327{
2328 re_free (mctx->bkref_ents);
2329}
2330
2331/* Add a new backreference entry to the cache. */
2332
a9388965 2333static reg_errcode_t
3b0bdc72
UD
2334match_ctx_add_entry (mctx, node, from, to)
2335 re_match_context_t *mctx;
2336 int node, from, to;
2337{
2338 if (mctx->nbkref_ents >= mctx->abkref_ents)
2339 {
2340 mctx->bkref_ents = re_realloc (mctx->bkref_ents,
2341 struct re_backref_cache_entry,
2342 mctx->abkref_ents * 2);
bc15410e 2343 if (BE (mctx->bkref_ents == NULL, 0))
a9388965 2344 return REG_ESPACE;
3b0bdc72
UD
2345 memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
2346 sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
2347 mctx->abkref_ents *= 2;
2348 }
2349 mctx->bkref_ents[mctx->nbkref_ents].node = node;
2350 mctx->bkref_ents[mctx->nbkref_ents].from = from;
2351 mctx->bkref_ents[mctx->nbkref_ents++].to = to;
2352 if (mctx->max_bkref_len < to - from)
2353 mctx->max_bkref_len = to - from;
a9388965 2354 return REG_NOERROR;
3b0bdc72 2355}