/* Extended regular expression matching and search library.
- Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+ Copyright (C) 2002-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <stdint.h>
static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
- int n) internal_function;
-static void match_ctx_clean (re_match_context_t *mctx) internal_function;
-static void match_ctx_free (re_match_context_t *cache) internal_function;
+ int n);
+static void match_ctx_clean (re_match_context_t *mctx);
+static void match_ctx_free (re_match_context_t *cache);
static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
- int str_idx, int from, int to)
- internal_function;
-static int search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx)
- internal_function;
+ int str_idx, int from, int to);
+static int search_cur_bkref_entry (const re_match_context_t *mctx,
+ int str_idx);
static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node,
- int str_idx) internal_function;
+ int str_idx);
static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
- int node, int str_idx)
- internal_function;
+ int node, int str_idx);
static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
re_dfastate_t **limited_sts, int last_node,
- int last_str_idx)
- internal_function;
+ int last_str_idx);
static reg_errcode_t re_search_internal (const regex_t *preg,
const char *string, int length,
int start, int range, int stop,
size_t nmatch, regmatch_t pmatch[],
- int eflags) internal_function;
+ int eflags);
static int re_search_2_stub (struct re_pattern_buffer *bufp,
const char *string1, int length1,
const char *string2, int length2,
int start, int range, struct re_registers *regs,
- int stop, int ret_len) internal_function;
+ int stop, int ret_len);
static int re_search_stub (struct re_pattern_buffer *bufp,
const char *string, int length, int start,
int range, int stop, struct re_registers *regs,
- int ret_len) internal_function;
+ int ret_len);
static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
- int nregs, int regs_allocated) internal_function;
-static inline re_dfastate_t *acquire_init_state_context
- (reg_errcode_t *err, const re_match_context_t *mctx, int idx)
- __attribute ((always_inline)) internal_function;
-static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
- internal_function;
+ int nregs, int regs_allocated);
+static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx);
static int check_matching (re_match_context_t *mctx, int fl_longest_match,
- int *p_match_first)
- internal_function;
-static int check_halt_node_context (const re_dfa_t *dfa, int node,
- unsigned int context) internal_function;
+ int *p_match_first);
static int check_halt_state_context (const re_match_context_t *mctx,
- const re_dfastate_t *state, int idx)
- internal_function;
-static void update_regs (re_dfa_t *dfa, regmatch_t *pmatch,
+ const re_dfastate_t *state, int idx);
+static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
regmatch_t *prev_idx_match, int cur_node,
- int cur_idx, int nmatch) internal_function;
-static int proceed_next_node (const re_match_context_t *mctx,
- int nregs, regmatch_t *regs,
- int *pidx, int node, re_node_set *eps_via_nodes,
- struct re_fail_stack_t *fs) internal_function;
+ int cur_idx, int nmatch);
static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
int str_idx, int dest_node, int nregs,
regmatch_t *regs,
- re_node_set *eps_via_nodes) internal_function;
-static int pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs,
- regmatch_t *regs, re_node_set *eps_via_nodes) internal_function;
+ re_node_set *eps_via_nodes);
static reg_errcode_t set_regs (const regex_t *preg,
const re_match_context_t *mctx,
size_t nmatch, regmatch_t *pmatch,
- int fl_backtrack) internal_function;
-static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs) internal_function;
+ int fl_backtrack);
+static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs);
#ifdef RE_ENABLE_I18N
static int sift_states_iter_mb (const re_match_context_t *mctx,
re_sift_context_t *sctx,
- int node_idx, int str_idx, int max_str_idx) internal_function;
+ int node_idx, int str_idx, int max_str_idx);
#endif /* RE_ENABLE_I18N */
-static reg_errcode_t sift_states_backward (re_match_context_t *mctx,
- re_sift_context_t *sctx) internal_function;
-static reg_errcode_t build_sifted_states (re_match_context_t *mctx,
+static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
+ re_sift_context_t *sctx);
+static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
re_sift_context_t *sctx, int str_idx,
- re_node_set *cur_dest) internal_function;
-static reg_errcode_t update_cur_sifted_state (re_match_context_t *mctx,
+ re_node_set *cur_dest);
+static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
re_sift_context_t *sctx,
int str_idx,
- re_node_set *dest_nodes) internal_function;
-static reg_errcode_t add_epsilon_src_nodes (re_dfa_t *dfa,
- re_node_set *dest_nodes,
- const re_node_set *candidates) internal_function;
-static reg_errcode_t sub_epsilon_src_nodes (re_dfa_t *dfa, int node,
+ re_node_set *dest_nodes);
+static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
re_node_set *dest_nodes,
- const re_node_set *and_nodes) internal_function;
+ const re_node_set *candidates);
static int check_dst_limits (const re_match_context_t *mctx,
re_node_set *limits,
int dst_node, int dst_idx, int src_node,
- int src_idx) internal_function;
+ int src_idx);
static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
int boundaries, int subexp_idx,
- int from_node, int bkref_idx) internal_function;
+ int from_node, int bkref_idx);
static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
int limit, int subexp_idx,
int node, int str_idx,
- int bkref_idx) internal_function;
-static reg_errcode_t check_subexp_limits (re_dfa_t *dfa,
+ int bkref_idx);
+static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
re_node_set *dest_nodes,
const re_node_set *candidates,
re_node_set *limits,
struct re_backref_cache_entry *bkref_ents,
- int str_idx) internal_function;
-static reg_errcode_t sift_states_bkref (re_match_context_t *mctx,
+ int str_idx);
+static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
re_sift_context_t *sctx,
- int str_idx, const re_node_set *candidates) internal_function;
-static reg_errcode_t clean_state_log_if_needed (re_match_context_t *mctx,
- int next_state_log_idx) internal_function;
-static reg_errcode_t merge_state_array (re_dfa_t *dfa, re_dfastate_t **dst,
- re_dfastate_t **src, int num) internal_function;
+ int str_idx,
+ const re_node_set *candidates);
+static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
+ re_dfastate_t **dst,
+ re_dfastate_t **src, int num);
static re_dfastate_t *find_recover_state (reg_errcode_t *err,
- re_match_context_t *mctx) internal_function;
+ re_match_context_t *mctx);
static re_dfastate_t *transit_state (reg_errcode_t *err,
re_match_context_t *mctx,
- re_dfastate_t *state) internal_function;
+ re_dfastate_t *state);
static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
re_match_context_t *mctx,
- re_dfastate_t *next_state) internal_function;
+ re_dfastate_t *next_state);
static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
re_node_set *cur_nodes,
- int str_idx) internal_function;
+ int str_idx);
#if 0
static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
re_match_context_t *mctx,
- re_dfastate_t *pstate) internal_function;
+ re_dfastate_t *pstate);
#endif
#ifdef RE_ENABLE_I18N
static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
- re_dfastate_t *pstate) internal_function;
+ re_dfastate_t *pstate);
#endif /* RE_ENABLE_I18N */
static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
- const re_node_set *nodes) internal_function;
+ const re_node_set *nodes);
static reg_errcode_t get_subexp (re_match_context_t *mctx,
- int bkref_node, int bkref_str_idx) internal_function;
+ int bkref_node, int bkref_str_idx);
static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
const re_sub_match_top_t *sub_top,
re_sub_match_last_t *sub_last,
- int bkref_node, int bkref_str) internal_function;
+ int bkref_node, int bkref_str);
static int find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
- int subexp_idx, int type) internal_function;
+ int subexp_idx, int type);
static reg_errcode_t check_arrival (re_match_context_t *mctx,
state_array_t *path, int top_node,
int top_str, int last_node, int last_str,
- int type) internal_function;
+ int type);
static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
int str_idx,
re_node_set *cur_nodes,
- re_node_set *next_nodes) internal_function;
+ re_node_set *next_nodes);
static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
re_node_set *cur_nodes,
- int ex_subexp, int type) internal_function;
+ int ex_subexp, int type);
static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
re_node_set *dst_nodes,
int target, int ex_subexp,
- int type) internal_function;
+ int type);
static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
re_node_set *cur_nodes, int cur_str,
- int subexp_num, int type) internal_function;
-static int build_trtable (re_dfa_t *dfa,
- re_dfastate_t *state) internal_function;
+ int subexp_num, int type);
+static int build_trtable (const re_dfa_t *dfa, re_dfastate_t *state);
#ifdef RE_ENABLE_I18N
static int check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
- const re_string_t *input, int idx) internal_function;
+ const re_string_t *input, int idx);
# ifdef _LIBC
static unsigned int find_collation_sequence_value (const unsigned char *mbs,
- size_t name_len) internal_function;
+ size_t name_len);
# endif /* _LIBC */
#endif /* RE_ENABLE_I18N */
static int group_nodes_into_DFAstates (const re_dfa_t *dfa,
const re_dfastate_t *state,
re_node_set *states_node,
- bitset *states_ch) internal_function;
+ bitset_t *states_ch);
static int check_node_accept (const re_match_context_t *mctx,
- const re_token_t *node, int idx) internal_function;
-static reg_errcode_t extend_buffers (re_match_context_t *mctx) internal_function;
+ const re_token_t *node, int idx);
+static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len);
\f
/* Entry point for POSIX code. */
We return 0 if we find a match and REG_NOMATCH if not. */
int
-regexec (preg, string, nmatch, pmatch, eflags)
- const regex_t *__restrict preg;
- const char *__restrict string;
- size_t nmatch;
- regmatch_t pmatch[];
- int eflags;
+regexec (const regex_t *__restrict preg, const char *__restrict string,
+ size_t nmatch, regmatch_t pmatch[], int eflags)
{
reg_errcode_t err;
int start, length;
- re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
return REG_BADPAT;
}
#ifdef _LIBC
+libc_hidden_def (__regexec)
+
# include <shlib-compat.h>
versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
concerned.
If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
- and all groups is stroed in REGS. (For the "_2" variants, the offsets are
+ and all groups is stored in REGS. (For the "_2" variants, the offsets are
computed relative to the concatenation, not relative to the individual
strings.)
match was found and -2 indicates an internal error. */
int
-re_match (bufp, string, length, start, regs)
- struct re_pattern_buffer *bufp;
- const char *string;
- int length, start;
- struct re_registers *regs;
+re_match (struct re_pattern_buffer *bufp, const char *string, int length,
+ int start, struct re_registers *regs)
{
return re_search_stub (bufp, string, length, start, 0, length, regs, 1);
}
#endif
int
-re_search (bufp, string, length, start, range, regs)
- struct re_pattern_buffer *bufp;
- const char *string;
- int length, start, range;
- struct re_registers *regs;
+re_search (struct re_pattern_buffer *bufp, const char *string, int length,
+ int start, int range, struct re_registers *regs)
{
return re_search_stub (bufp, string, length, start, range, length, regs, 0);
}
#endif
int
-re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)
- struct re_pattern_buffer *bufp;
- const char *string1, *string2;
- int length1, length2, start, stop;
- struct re_registers *regs;
+re_match_2 (struct re_pattern_buffer *bufp, const char *string1, int length1,
+ const char *string2, int length2, int start,
+ struct re_registers *regs, int stop)
{
return re_search_2_stub (bufp, string1, length1, string2, length2,
start, 0, regs, stop, 1);
#endif
int
-re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)
- struct re_pattern_buffer *bufp;
- const char *string1, *string2;
- int length1, length2, start, range, stop;
- struct re_registers *regs;
+re_search_2 (struct re_pattern_buffer *bufp, const char *string1, int length1,
+ const char *string2, int length2, int start, int range,
+ struct re_registers *regs, int stop)
{
return re_search_2_stub (bufp, string1, length1, string2, length2,
start, range, regs, stop, 0);
#endif
static int
-re_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs,
- stop, ret_len)
- struct re_pattern_buffer *bufp;
- const char *string1, *string2;
- int length1, length2, start, range, stop, ret_len;
- struct re_registers *regs;
+re_search_2_stub (struct re_pattern_buffer *bufp, const char *string1,
+ int length1, const char *string2, int length2, int start,
+ int range, struct re_registers *regs,
+ int stop, int ret_len)
{
const char *str;
int rval;
int len = length1 + length2;
- int free_str = 0;
+ char *s = NULL;
- if (BE (length1 < 0 || length2 < 0 || stop < 0, 0))
+ if (BE (length1 < 0 || length2 < 0 || stop < 0 || len < length1, 0))
return -2;
/* Concatenate the strings. */
if (length2 > 0)
if (length1 > 0)
{
- char *s = re_malloc (char, len);
+ s = re_malloc (char, len);
if (BE (s == NULL, 0))
return -2;
memcpy (s + length1, string2, length2);
#endif
str = s;
- free_str = 1;
}
else
str = string2;
else
str = string1;
- rval = re_search_stub (bufp, str, len, start, range, stop, regs,
- ret_len);
- if (free_str)
- re_free ((char *) str);
+ rval = re_search_stub (bufp, str, len, start, range, stop, regs, ret_len);
+ re_free (s);
return rval;
}
otherwise the position of the match is returned. */
static int
-re_search_stub (bufp, string, length, start, range, stop, regs, ret_len)
- struct re_pattern_buffer *bufp;
- const char *string;
- int length, start, range, stop, ret_len;
- struct re_registers *regs;
+re_search_stub (struct re_pattern_buffer *bufp, const char *string, int length,
+ int start, int range, int stop, struct re_registers *regs,
+ int ret_len)
{
reg_errcode_t result;
regmatch_t *pmatch;
int nregs, rval;
int eflags = 0;
- re_dfa_t *dfa = (re_dfa_t *)bufp->buffer;
+ re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
/* Check for out-of-range. */
if (BE (start < 0 || start > length, 0))
}
static unsigned
-re_copy_regs (regs, pmatch, nregs, regs_allocated)
- struct re_registers *regs;
- regmatch_t *pmatch;
- int nregs, regs_allocated;
+re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, int nregs,
+ int regs_allocated)
{
int rval = REGS_REALLOCATE;
int i;
if (regs_allocated == REGS_UNALLOCATED)
{ /* No. So allocate them with malloc. */
regs->start = re_malloc (regoff_t, need_regs);
- regs->end = re_malloc (regoff_t, need_regs);
- if (BE (regs->start == NULL, 0) || BE (regs->end == NULL, 0))
+ if (BE (regs->start == NULL, 0))
return REGS_UNALLOCATED;
+ regs->end = re_malloc (regoff_t, need_regs);
+ if (BE (regs->end == NULL, 0))
+ {
+ re_free (regs->start);
+ return REGS_UNALLOCATED;
+ }
regs->num_regs = need_regs;
}
else if (regs_allocated == REGS_REALLOCATE)
if (BE (need_regs > regs->num_regs, 0))
{
regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
- regoff_t *new_end = re_realloc (regs->end, regoff_t, need_regs);
- if (BE (new_start == NULL, 0) || BE (new_end == NULL, 0))
+ regoff_t *new_end;
+ if (BE (new_start == NULL, 0))
return REGS_UNALLOCATED;
+ new_end = re_realloc (regs->end, regoff_t, need_regs);
+ if (BE (new_end == NULL, 0))
+ {
+ re_free (new_start);
+ return REGS_UNALLOCATED;
+ }
regs->start = new_start;
regs->end = new_end;
regs->num_regs = need_regs;
freeing the old data. */
void
-re_set_registers (bufp, regs, num_regs, starts, ends)
- struct re_pattern_buffer *bufp;
- struct re_registers *regs;
- unsigned num_regs;
- regoff_t *starts, *ends;
+re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs,
+ unsigned num_regs, regoff_t *starts, regoff_t *ends)
{
if (num_regs)
{
# ifdef _LIBC
weak_function
# endif
-re_exec (s)
- const char *s;
+re_exec (const char *s)
{
return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
}
/* Searches for a compiled pattern PREG in the string STRING, whose
length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same
- mingings with regexec. START, and RANGE have the same meanings
+ meaning as with regexec. START, and RANGE have the same meanings
with re_search.
Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
otherwise return the error code.
(START + RANGE >= 0 && START + RANGE <= LENGTH) */
static reg_errcode_t
-re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
- eflags)
- const regex_t *preg;
- const char *string;
- int length, start, range, stop, eflags;
- size_t nmatch;
- regmatch_t pmatch[];
+__attribute_warn_unused_result__
+re_search_internal (const regex_t *preg, const char *string, int length,
+ int start, int range, int stop, size_t nmatch,
+ regmatch_t pmatch[], int eflags)
{
reg_errcode_t err;
- re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
+ const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
int left_lim, right_lim, incr;
int fl_longest_match, match_first, match_kind, match_last = -1;
int extra_nmatch;
#endif
char *fastmap = (preg->fastmap != NULL && preg->fastmap_accurate
&& range && !preg->can_be_null) ? preg->fastmap : NULL;
- unsigned RE_TRANSLATE_TYPE t = (unsigned RE_TRANSLATE_TYPE) preg->translate;
+ RE_TRANSLATE_TYPE t = preg->translate;
#if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
memset (&mctx, '\0', sizeof (re_match_context_t));
|| !preg->newline_anchor))
{
if (start != 0 && start + range != 0)
- return REG_NOMATCH;
+ return REG_NOMATCH;
start = range = 0;
}
multi character collating element. */
if (nmatch > 1 || dfa->has_mb_node)
{
+ /* Avoid overflow. */
+ if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= mctx.input.bufs_len, 0))
+ {
+ err = REG_ESPACE;
+ goto free_return;
+ }
+
mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
if (BE (mctx.state_log == NULL, 0))
{
break;
match_first += incr;
if (match_first < left_lim || match_first > right_lim)
- {
- err = REG_NOMATCH;
- goto free_return;
- }
+ {
+ err = REG_NOMATCH;
+ goto free_return;
+ }
}
break;
}
goto free_return;
}
- /* At last, add the offset to the each registers, since we slided
+ /* At last, add the offset to each register, since we slid
the buffers so that we could assume that the matching starts
from 0. */
for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
}
if (dfa->subexp_map)
- for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
- if (dfa->subexp_map[reg_idx] != reg_idx)
- {
- pmatch[reg_idx + 1].rm_so
- = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
- pmatch[reg_idx + 1].rm_eo
- = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
- }
+ for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
+ if (dfa->subexp_map[reg_idx] != reg_idx)
+ {
+ pmatch[reg_idx + 1].rm_so
+ = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
+ pmatch[reg_idx + 1].rm_eo
+ = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
+ }
}
free_return:
}
static reg_errcode_t
-prune_impossible_nodes (mctx)
- re_match_context_t *mctx;
+__attribute_warn_unused_result__
+prune_impossible_nodes (re_match_context_t *mctx)
{
- re_dfa_t *const dfa = mctx->dfa;
+ const re_dfa_t *const dfa = mctx->dfa;
int halt_node, match_last;
reg_errcode_t ret;
re_dfastate_t **sifted_states;
#endif
match_last = mctx->match_last;
halt_node = mctx->last_node;
+
+ /* Avoid overflow. */
+ if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= match_last, 0))
+ return REG_ESPACE;
+
sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
if (BE (sifted_states == NULL, 0))
{
re_node_set_free (&sctx.limits);
if (BE (ret != REG_NOERROR, 0))
goto free_return;
+ if (sifted_states[0] == NULL)
+ {
+ ret = REG_NOMATCH;
+ goto free_return;
+ }
}
re_free (mctx->state_log);
mctx->state_log = sifted_states;
since initial states may have constraints like "\<", "^", etc.. */
static inline re_dfastate_t *
-acquire_init_state_context (err, mctx, idx)
- reg_errcode_t *err;
- const re_match_context_t *mctx;
- int idx;
+__attribute ((always_inline))
+acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
+ int idx)
{
- re_dfa_t *const dfa = mctx->dfa;
+ const re_dfa_t *const dfa = mctx->dfa;
if (dfa->init_state->has_constraint)
{
unsigned int context;
index of the buffer. */
static int
-check_matching (mctx, fl_longest_match, p_match_first)
- re_match_context_t *mctx;
- int fl_longest_match;
- int *p_match_first;
+__attribute_warn_unused_result__
+check_matching (re_match_context_t *mctx, int fl_longest_match,
+ int *p_match_first)
{
- re_dfa_t *const dfa = mctx->dfa;
+ const re_dfa_t *const dfa = mctx->dfa;
reg_errcode_t err;
int match = 0;
int match_last = -1;
{
err = transit_state_bkref (mctx, &cur_state->nodes);
if (BE (err != REG_NOERROR, 0))
- return err;
+ return err;
}
}
}
re_dfastate_t *old_state = cur_state;
int next_char_idx = re_string_cur_idx (&mctx->input) + 1;
- if (BE (next_char_idx >= mctx->input.bufs_len, 0)
- || (BE (next_char_idx >= mctx->input.valid_len, 0)
- && mctx->input.valid_len < mctx->input.len))
- {
- err = extend_buffers (mctx);
- if (BE (err != REG_NOERROR, 0))
+ if ((BE (next_char_idx >= mctx->input.bufs_len, 0)
+ && mctx->input.bufs_len < mctx->input.len)
+ || (BE (next_char_idx >= mctx->input.valid_len, 0)
+ && mctx->input.valid_len < mctx->input.len))
+ {
+ err = extend_buffers (mctx, next_char_idx + 1);
+ if (BE (err != REG_NOERROR, 0))
{
assert (err == REG_ESPACE);
return -2;
}
- }
+ }
cur_state = transit_state (&err, mctx, cur_state);
if (mctx->state_log != NULL)
/* Check NODE match the current context. */
-static int check_halt_node_context (dfa, node, context)
- const re_dfa_t *dfa;
- int node;
- unsigned int context;
+static int
+check_halt_node_context (const re_dfa_t *dfa, int node, unsigned int context)
{
re_token_type_t type = dfa->nodes[node].type;
unsigned int constraint = dfa->nodes[node].constraint;
match the context, return the node. */
static int
-check_halt_state_context (mctx, state, idx)
- const re_match_context_t *mctx;
- const re_dfastate_t *state;
- int idx;
+check_halt_state_context (const re_match_context_t *mctx,
+ const re_dfastate_t *state, int idx)
{
int i;
unsigned int context;
of errors. */
static int
-proceed_next_node (mctx, nregs, regs, pidx, node, eps_via_nodes, fs)
- const re_match_context_t *mctx;
- regmatch_t *regs;
- int nregs, *pidx, node;
- re_node_set *eps_via_nodes;
- struct re_fail_stack_t *fs;
+proceed_next_node (const re_match_context_t *mctx, int nregs, regmatch_t *regs,
+ int *pidx, int node, re_node_set *eps_via_nodes,
+ struct re_fail_stack_t *fs)
{
- re_dfa_t *const dfa = mctx->dfa;
+ const re_dfa_t *const dfa = mctx->dfa;
int i, err;
if (IS_EPSILON_NODE (dfa->nodes[node].type))
{
int candidate = edests->elems[i];
if (!re_node_set_contains (cur_nodes, candidate))
continue;
- if (dest_node == -1)
+ if (dest_node == -1)
dest_node = candidate;
- else
+ else
{
/* In order to avoid infinite loop like "(a*)*", return the second
- epsilon-transition if the first was already considered. */
+ epsilon-transition if the first was already considered. */
if (re_node_set_contains (eps_via_nodes, dest_node))
- return candidate;
+ return candidate;
/* Otherwise, push the second epsilon-transition on the fail stack. */
else if (fs != NULL
&& push_fail_stack (fs, *pidx, candidate, nregs, regs,
- eps_via_nodes))
+ eps_via_nodes))
return -2;
/* We know we are going to exit. */
}
static reg_errcode_t
-push_fail_stack (fs, str_idx, dest_node, nregs, regs, eps_via_nodes)
- struct re_fail_stack_t *fs;
- int str_idx, dest_node, nregs;
- regmatch_t *regs;
- re_node_set *eps_via_nodes;
+__attribute_warn_unused_result__
+push_fail_stack (struct re_fail_stack_t *fs, int str_idx, int dest_node,
+ int nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
{
reg_errcode_t err;
int num = fs->num++;
}
static int
-pop_fail_stack (fs, pidx, nregs, regs, eps_via_nodes)
- struct re_fail_stack_t *fs;
- int *pidx, nregs;
- regmatch_t *regs;
- re_node_set *eps_via_nodes;
+pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs,
+ regmatch_t *regs, re_node_set *eps_via_nodes)
{
int num = --fs->num;
assert (num >= 0);
pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. */
static reg_errcode_t
-set_regs (preg, mctx, nmatch, pmatch, fl_backtrack)
- const regex_t *preg;
- const re_match_context_t *mctx;
- size_t nmatch;
- regmatch_t *pmatch;
- int fl_backtrack;
+__attribute_warn_unused_result__
+set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
+ regmatch_t *pmatch, int fl_backtrack)
{
- re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
int idx, cur_node;
re_node_set eps_via_nodes;
struct re_fail_stack_t *fs;
}
static reg_errcode_t
-free_fail_stack_return (fs)
- struct re_fail_stack_t *fs;
+free_fail_stack_return (struct re_fail_stack_t *fs)
{
if (fs)
{
}
static void
-update_regs (dfa, pmatch, prev_idx_match, cur_node, cur_idx, nmatch)
- re_dfa_t *dfa;
- regmatch_t *pmatch, *prev_idx_match;
- int cur_node, cur_idx, nmatch;
+update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
+ regmatch_t *prev_idx_match, int cur_node, int cur_idx, int nmatch)
{
int type = dfa->nodes[cur_node].type;
if (type == OP_OPEN_SUBEXP)
((state) != NULL && re_node_set_contains (&(state)->nodes, node))
static reg_errcode_t
-sift_states_backward (mctx, sctx)
- re_match_context_t *mctx;
- re_sift_context_t *sctx;
+sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
{
reg_errcode_t err;
int null_cnt = 0;
if (mctx->state_log[str_idx])
{
err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
- if (BE (err != REG_NOERROR, 0))
+ if (BE (err != REG_NOERROR, 0))
goto free_return;
}
}
static reg_errcode_t
-build_sifted_states (mctx, sctx, str_idx, cur_dest)
- re_match_context_t *mctx;
- re_sift_context_t *sctx;
- int str_idx;
- re_node_set *cur_dest;
+__attribute_warn_unused_result__
+build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
+ int str_idx, re_node_set *cur_dest)
{
- re_dfa_t *const dfa = mctx->dfa;
- re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
+ const re_dfa_t *const dfa = mctx->dfa;
+ const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
int i;
/* Then build the next sifted state.
/* Helper functions. */
static reg_errcode_t
-clean_state_log_if_needed (mctx, next_state_log_idx)
- re_match_context_t *mctx;
- int next_state_log_idx;
+clean_state_log_if_needed (re_match_context_t *mctx, int next_state_log_idx)
{
int top = mctx->state_log_top;
- if (next_state_log_idx >= mctx->input.bufs_len
+ if ((next_state_log_idx >= mctx->input.bufs_len
+ && mctx->input.bufs_len < mctx->input.len)
|| (next_state_log_idx >= mctx->input.valid_len
&& mctx->input.valid_len < mctx->input.len))
{
reg_errcode_t err;
- err = extend_buffers (mctx);
+ err = extend_buffers (mctx, next_state_log_idx + 1);
if (BE (err != REG_NOERROR, 0))
return err;
}
}
static reg_errcode_t
-merge_state_array (dfa, dst, src, num)
- re_dfa_t *dfa;
- re_dfastate_t **dst;
- re_dfastate_t **src;
- int num;
+merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,
+ re_dfastate_t **src, int num)
{
int st_idx;
reg_errcode_t err;
}
static reg_errcode_t
-update_cur_sifted_state (mctx, sctx, str_idx, dest_nodes)
- re_match_context_t *mctx;
- re_sift_context_t *sctx;
- int str_idx;
- re_node_set *dest_nodes;
+update_cur_sifted_state (const re_match_context_t *mctx,
+ re_sift_context_t *sctx, int str_idx,
+ re_node_set *dest_nodes)
{
- re_dfa_t *const dfa = mctx->dfa;
- reg_errcode_t err;
+ const re_dfa_t *const dfa = mctx->dfa;
+ reg_errcode_t err = REG_NOERROR;
const re_node_set *candidates;
candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
: &mctx->state_log[str_idx]->nodes);
}
static reg_errcode_t
-add_epsilon_src_nodes (dfa, dest_nodes, candidates)
- re_dfa_t *dfa;
- re_node_set *dest_nodes;
- const re_node_set *candidates;
+__attribute_warn_unused_result__
+add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
+ const re_node_set *candidates)
{
reg_errcode_t err = REG_NOERROR;
int i;
{
err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
if (BE (err != REG_NOERROR, 0))
- return REG_ESPACE;
+ return REG_ESPACE;
for (i = 0; i < dest_nodes->nelem; i++)
- re_node_set_merge (&state->inveclosure,
- dfa->inveclosures + dest_nodes->elems[i]);
+ {
+ err = re_node_set_merge (&state->inveclosure,
+ dfa->inveclosures + dest_nodes->elems[i]);
+ if (BE (err != REG_NOERROR, 0))
+ return REG_ESPACE;
+ }
}
return re_node_set_add_intersect (dest_nodes, candidates,
&state->inveclosure);
}
static reg_errcode_t
-sub_epsilon_src_nodes (dfa, node, dest_nodes, candidates)
- re_dfa_t *dfa;
- int node;
- re_node_set *dest_nodes;
- const re_node_set *candidates;
+sub_epsilon_src_nodes (const re_dfa_t *dfa, int node, re_node_set *dest_nodes,
+ const re_node_set *candidates)
{
int ecl_idx;
reg_errcode_t err;
}
static int
-check_dst_limits (mctx, limits, dst_node, dst_idx, src_node, src_idx)
- const re_match_context_t *mctx;
- re_node_set *limits;
- int dst_node, dst_idx, src_node, src_idx;
+check_dst_limits (const re_match_context_t *mctx, re_node_set *limits,
+ int dst_node, int dst_idx, int src_node, int src_idx)
{
- re_dfa_t *const dfa = mctx->dfa;
+ const re_dfa_t *const dfa = mctx->dfa;
int lim_idx, src_pos, dst_pos;
int dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
}
static int
-check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, from_node, bkref_idx)
- const re_match_context_t *mctx;
- int boundaries, subexp_idx, from_node, bkref_idx;
+check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
+ int subexp_idx, int from_node, int bkref_idx)
{
- re_dfa_t *const dfa = mctx->dfa;
- re_node_set *eclosures = dfa->eclosures + from_node;
+ const re_dfa_t *const dfa = mctx->dfa;
+ const re_node_set *eclosures = dfa->eclosures + from_node;
int node_idx;
/* Else, we are on the boundary: examine the nodes on the epsilon
{
struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
do
- {
+ {
int dst, cpos;
if (ent->node != node)
continue;
- if (subexp_idx
- < CHAR_BIT * sizeof ent->eps_reachable_subexps_map
- && !(ent->eps_reachable_subexps_map & (1u << subexp_idx)))
+ if (subexp_idx < BITSET_WORD_BITS
+ && !(ent->eps_reachable_subexps_map
+ & ((bitset_word_t) 1 << subexp_idx)))
continue;
/* Recurse trying to reach the OP_OPEN_SUBEXP and
if (dst == from_node)
{
if (boundaries & 1)
- return -1;
+ return -1;
else /* if (boundaries & 2) */
- return 0;
+ return 0;
}
cpos =
if (cpos == 0 && (boundaries & 2))
return 0;
- if (subexp_idx
- < CHAR_BIT * sizeof ent->eps_reachable_subexps_map)
- ent->eps_reachable_subexps_map &= ~(1u << subexp_idx);
- }
+ if (subexp_idx < BITSET_WORD_BITS)
+ ent->eps_reachable_subexps_map
+ &= ~((bitset_word_t) 1 << subexp_idx);
+ }
while (ent++->more);
}
break;
}
static int
-check_dst_limits_calc_pos (mctx, limit, subexp_idx, from_node, str_idx, bkref_idx)
- const re_match_context_t *mctx;
- int limit, subexp_idx, from_node, str_idx, bkref_idx;
+check_dst_limits_calc_pos (const re_match_context_t *mctx, int limit,
+ int subexp_idx, int from_node, int str_idx,
+ int bkref_idx)
{
struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
int boundaries;
which are against limitations from DEST_NODES. */
static reg_errcode_t
-check_subexp_limits (dfa, dest_nodes, candidates, limits, bkref_ents, str_idx)
- re_dfa_t *dfa;
- re_node_set *dest_nodes;
- const re_node_set *candidates;
- re_node_set *limits;
- struct re_backref_cache_entry *bkref_ents;
- int str_idx;
+check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
+ const re_node_set *candidates, re_node_set *limits,
+ struct re_backref_cache_entry *bkref_ents, int str_idx)
{
reg_errcode_t err;
int node_idx, lim_idx;
}
static reg_errcode_t
-sift_states_bkref (mctx, sctx, str_idx, candidates)
- re_match_context_t *mctx;
- re_sift_context_t *sctx;
- int str_idx;
- const re_node_set *candidates;
+__attribute_warn_unused_result__
+sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
+ int str_idx, const re_node_set *candidates)
{
- re_dfa_t *const dfa = mctx->dfa;
+ const re_dfa_t *const dfa = mctx->dfa;
reg_errcode_t err;
int node_idx, node;
re_sift_context_t local_sctx;
re_node_set_remove (&local_sctx.limits, enabled_idx);
/* mctx->bkref_ents may have changed, reload the pointer. */
- entry = mctx->bkref_ents + enabled_idx;
+ entry = mctx->bkref_ents + enabled_idx;
}
while (enabled_idx++, entry++->more);
}
#ifdef RE_ENABLE_I18N
static int
-sift_states_iter_mb (mctx, sctx, node_idx, str_idx, max_str_idx)
- const re_match_context_t *mctx;
- re_sift_context_t *sctx;
- int node_idx, str_idx, max_str_idx;
+sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
+ int node_idx, int str_idx, int max_str_idx)
{
const re_dfa_t *const dfa = mctx->dfa;
int naccepted;
update the destination of STATE_LOG. */
static re_dfastate_t *
-transit_state (err, mctx, state)
- reg_errcode_t *err;
- re_match_context_t *mctx;
- re_dfastate_t *state;
+__attribute_warn_unused_result__
+transit_state (reg_errcode_t *err, re_match_context_t *mctx,
+ re_dfastate_t *state)
{
re_dfastate_t **trtable;
unsigned char ch;
trtable = state->word_trtable;
if (BE (trtable != NULL, 1))
- {
+ {
unsigned int context;
context
= re_string_context_at (&mctx->input,
/* Update the state_log if we need */
re_dfastate_t *
-merge_state_with_log (err, mctx, next_state)
- reg_errcode_t *err;
- re_match_context_t *mctx;
- re_dfastate_t *next_state;
+merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
+ re_dfastate_t *next_state)
{
const re_dfa_t *const dfa = mctx->dfa;
int cur_idx = re_string_cur_idx (&mctx->input);
unsigned int context;
re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
/* If (state_log[cur_idx] != 0), it implies that cur_idx is
- the destination of a multibyte char/collating element/
- back reference. Then the next state is the union set of
- these destinations and the results of the transition table. */
+ the destination of a multibyte char/collating element/
+ back reference. Then the next state is the union set of
+ these destinations and the results of the transition table. */
pstate = mctx->state_log[cur_idx];
log_nodes = pstate->entrance_nodes;
if (next_state != NULL)
- {
- table_nodes = next_state->entrance_nodes;
- *err = re_node_set_init_union (&next_nodes, table_nodes,
+ {
+ table_nodes = next_state->entrance_nodes;
+ *err = re_node_set_init_union (&next_nodes, table_nodes,
log_nodes);
- if (BE (*err != REG_NOERROR, 0))
+ if (BE (*err != REG_NOERROR, 0))
return NULL;
- }
+ }
else
- next_nodes = *log_nodes;
+ next_nodes = *log_nodes;
/* Note: We already add the nodes of the initial state,
then we don't need to add them here. */
re_string_cur_idx (&mctx->input) - 1,
mctx->eflags);
next_state = mctx->state_log[cur_idx]
- = re_acquire_state_context (err, dfa, &next_nodes, context);
+ = re_acquire_state_context (err, dfa, &next_nodes, context);
/* We don't need to check errors here, since the return value of
- this function is next_state and ERR is already set. */
+ this function is next_state and ERR is already set. */
if (table_nodes != NULL)
- re_node_set_free (&next_nodes);
+ re_node_set_free (&next_nodes);
}
if (BE (dfa->nbackref, 0) && next_state != NULL)
multi-byte match, then look in the log for a state
from which to restart matching. */
re_dfastate_t *
-find_recover_state (err, mctx)
- reg_errcode_t *err;
- re_match_context_t *mctx;
+find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
{
re_dfastate_t *cur_state;
do
do
{
- if (++cur_str_idx > max)
- return NULL;
- re_string_skip_bytes (&mctx->input, 1);
+ if (++cur_str_idx > max)
+ return NULL;
+ re_string_skip_bytes (&mctx->input, 1);
}
while (mctx->state_log[cur_str_idx] == NULL);
/* From the node set CUR_NODES, pick up the nodes whose types are
OP_OPEN_SUBEXP and which have corresponding back references in the regular
expression. And register them to use them later for evaluating the
- correspoding back references. */
+ corresponding back references. */
static reg_errcode_t
-check_subexp_matching_top (mctx, cur_nodes, str_idx)
- re_match_context_t *mctx;
- re_node_set *cur_nodes;
- int str_idx;
+check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
+ int str_idx)
{
- re_dfa_t *const dfa = mctx->dfa;
+ const re_dfa_t *const dfa = mctx->dfa;
int node_idx;
reg_errcode_t err;
{
int node = cur_nodes->elems[node_idx];
if (dfa->nodes[node].type == OP_OPEN_SUBEXP
- && dfa->nodes[node].opr.idx < CHAR_BIT * sizeof dfa->used_bkref_map
- && dfa->used_bkref_map & (1u << dfa->nodes[node].opr.idx))
+ && dfa->nodes[node].opr.idx < BITSET_WORD_BITS
+ && (dfa->used_bkref_map
+ & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))
{
err = match_ctx_add_subtop (mctx, node, str_idx);
if (BE (err != REG_NOERROR, 0))
accepting the current input byte. */
static re_dfastate_t *
-transit_state_sb (err, mctx, state)
- reg_errcode_t *err;
- re_match_context_t *mctx;
- re_dfastate_t *state;
+transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
+ re_dfastate_t *state)
{
const re_dfa_t *const dfa = mctx->dfa;
re_node_set next_nodes;
#ifdef RE_ENABLE_I18N
static reg_errcode_t
-transit_state_mb (mctx, pstate)
- re_match_context_t *mctx;
- re_dfastate_t *pstate;
+transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
{
const re_dfa_t *const dfa = mctx->dfa;
reg_errcode_t err;
re_dfastate_t *dest_state;
if (!dfa->nodes[cur_node_idx].accept_mb)
- continue;
+ continue;
if (dfa->nodes[cur_node_idx].constraint)
{
#endif /* RE_ENABLE_I18N */
static reg_errcode_t
-transit_state_bkref (mctx, nodes)
- re_match_context_t *mctx;
- const re_node_set *nodes;
+transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
{
const re_dfa_t *const dfa = mctx->dfa;
reg_errcode_t err;
delay these checking for prune_impossible_nodes(). */
static reg_errcode_t
-get_subexp (mctx, bkref_node, bkref_str_idx)
- re_match_context_t *mctx;
- int bkref_node, bkref_str_idx;
+__attribute_warn_unused_result__
+get_subexp (re_match_context_t *mctx, int bkref_node, int bkref_str_idx)
{
const re_dfa_t *const dfa = mctx->dfa;
int subexp_num, sub_top_idx;
const struct re_backref_cache_entry *entry
= mctx->bkref_ents + cache_idx;
do
- if (entry->node == bkref_node)
+ if (entry->node == bkref_node)
return REG_NOERROR; /* We already checked it. */
while (entry++->more);
}
if (bkref_str_off >= mctx->input.len)
break;
- err = extend_buffers (mctx);
+ err = extend_buffers (mctx, bkref_str_off + 1);
if (BE (err != REG_NOERROR, 0))
return err;
and SUB_LAST. */
static reg_errcode_t
-get_subexp_sub (mctx, sub_top, sub_last, bkref_node, bkref_str)
- re_match_context_t *mctx;
- const re_sub_match_top_t *sub_top;
- re_sub_match_last_t *sub_last;
- int bkref_node, bkref_str;
+get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
+ re_sub_match_last_t *sub_last, int bkref_node, int bkref_str)
{
reg_errcode_t err;
int to_idx;
E.g. RE: (a){2} */
static int
-find_subexp_node (dfa, nodes, subexp_idx, type)
- const re_dfa_t *dfa;
- const re_node_set *nodes;
- int subexp_idx, type;
+find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
+ int subexp_idx, int type)
{
int cls_idx;
for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */
static reg_errcode_t
-check_arrival (mctx, path, top_node, top_str, last_node, last_str,
- type)
- re_match_context_t *mctx;
- state_array_t *path;
- int top_node, top_str, last_node, last_str, type;
+__attribute_warn_unused_result__
+check_arrival (re_match_context_t *mctx, state_array_t *path, int top_node,
+ int top_str, int last_node, int last_str, int type)
{
const re_dfa_t *const dfa = mctx->dfa;
- reg_errcode_t err;
+ reg_errcode_t err = REG_NOERROR;
int subexp_num, backup_cur_idx, str_idx, null_cnt;
re_dfastate_t *cur_state = NULL;
re_node_set *cur_nodes, next_nodes;
sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
}
- str_idx = path->next_idx == 0 ? top_str : path->next_idx;
+ str_idx = path->next_idx ?: top_str;
/* Temporary modify MCTX. */
backup_state_log = mctx->state_log;
Can't we unify them? */
static reg_errcode_t
-check_arrival_add_next_nodes (mctx, str_idx, cur_nodes, next_nodes)
- re_match_context_t *mctx;
- int str_idx;
- re_node_set *cur_nodes, *next_nodes;
+__attribute_warn_unused_result__
+check_arrival_add_next_nodes (re_match_context_t *mctx, int str_idx,
+ re_node_set *cur_nodes, re_node_set *next_nodes)
{
const re_dfa_t *const dfa = mctx->dfa;
int result;
int cur_idx;
- reg_errcode_t err;
+#ifdef RE_ENABLE_I18N
+ reg_errcode_t err = REG_NOERROR;
+#endif
re_node_set union_set;
re_node_set_init_empty (&union_set);
for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
*/
static reg_errcode_t
-check_arrival_expand_ecl (dfa, cur_nodes, ex_subexp, type)
- const re_dfa_t *dfa;
- re_node_set *cur_nodes;
- int ex_subexp, type;
+check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,
+ int ex_subexp, int type)
{
reg_errcode_t err;
int idx, outside_node;
problematic append it to DST_NODES. */
static reg_errcode_t
-check_arrival_expand_ecl_sub (dfa, dst_nodes, target, ex_subexp, type)
- const re_dfa_t *dfa;
- int target, ex_subexp, type;
- re_node_set *dst_nodes;
+__attribute_warn_unused_result__
+check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
+ int target, int ex_subexp, int type)
{
int cur_node;
for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
in MCTX->BKREF_ENTS. */
static reg_errcode_t
-expand_bkref_cache (mctx, cur_nodes, cur_str, subexp_num,
- type)
- re_match_context_t *mctx;
- int cur_str, subexp_num, type;
- re_node_set *cur_nodes;
+__attribute_warn_unused_result__
+expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
+ int cur_str, int subexp_num, int type)
{
- re_dfa_t *const dfa = mctx->dfa;
+ const re_dfa_t *const dfa = mctx->dfa;
reg_errcode_t err;
int cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
struct re_backref_cache_entry *ent;
Return 1 if succeeded, otherwise return NULL. */
static int
-build_trtable (dfa, state)
- re_dfa_t *dfa;
- re_dfastate_t *state;
+build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
{
reg_errcode_t err;
int i, j, ch, need_word_trtable = 0;
- unsigned int elem, mask;
- int dests_node_malloced = 0, dest_states_malloced = 0;
+ bitset_word_t elem, mask;
+ bool dests_node_malloced = false;
+ bool dest_states_malloced = false;
int ndests; /* Number of the destination states from `state'. */
re_dfastate_t **trtable;
re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
re_node_set follows, *dests_node;
- bitset *dests_ch;
- bitset acceptable;
+ bitset_t *dests_ch;
+ bitset_t acceptable;
+
+ struct dests_alloc
+ {
+ re_node_set dests_node[SBC_MAX];
+ bitset_t dests_ch[SBC_MAX];
+ } *dests_alloc;
/* We build DFA states which corresponds to the destination nodes
from `state'. `dests_node[i]' represents the nodes which i-th
destination state contains, and `dests_ch[i]' represents the
characters which i-th destination state accepts. */
- if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX))
- dests_node = (re_node_set *)
- alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
+ if (__libc_use_alloca (sizeof (struct dests_alloc)))
+ dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc));
else
{
- dests_node = (re_node_set *)
- malloc ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
- if (BE (dests_node == NULL, 0))
+ dests_alloc = re_malloc (struct dests_alloc, 1);
+ if (BE (dests_alloc == NULL, 0))
return 0;
- dests_node_malloced = 1;
+ dests_node_malloced = true;
}
- dests_ch = (bitset *) (dests_node + SBC_MAX);
+ dests_node = dests_alloc->dests_node;
+ dests_ch = dests_alloc->dests_ch;
/* Initialize transiton table. */
state->word_trtable = state->trtable = NULL;
if (BE (ndests <= 0, 0))
{
if (dests_node_malloced)
- free (dests_node);
+ free (dests_alloc);
/* Return 0 in case of an error, 1 otherwise. */
if (ndests == 0)
{
state->trtable = (re_dfastate_t **)
calloc (sizeof (re_dfastate_t *), SBC_MAX);
+ if (BE (state->trtable == NULL, 0))
+ return 0;
return 1;
}
return 0;
if (BE (err != REG_NOERROR, 0))
goto out_free;
- if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX
+ /* Avoid arithmetic overflow in size calculation. */
+ if (BE ((((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX)
+ / (3 * sizeof (re_dfastate_t *)))
+ < ndests),
+ 0))
+ goto out_free;
+
+ if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
+ ndests * 3 * sizeof (re_dfastate_t *)))
dest_states = (re_dfastate_t **)
alloca (ndests * 3 * sizeof (re_dfastate_t *));
for (i = 0; i < ndests; ++i)
re_node_set_free (dests_node + i);
if (dests_node_malloced)
- free (dests_node);
+ free (dests_alloc);
return 0;
}
- dest_states_malloced = 1;
+ dest_states_malloced = true;
}
dest_states_word = dest_states + ndests;
dest_states_nl = dest_states_word + ndests;
goto out_free;
/* For all characters ch...: */
- for (i = 0; i < BITSET_UINTS; ++i)
- for (ch = i * UINT_BITS, elem = acceptable[i], mask = 1;
+ for (i = 0; i < BITSET_WORDS; ++i)
+ for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
elem;
mask <<= 1, elem >>= 1, ++ch)
if (BE (elem & 1, 0))
goto out_free;
/* For all characters ch...: */
- for (i = 0; i < BITSET_UINTS; ++i)
- for (ch = i * UINT_BITS, elem = acceptable[i], mask = 1;
+ for (i = 0; i < BITSET_WORDS; ++i)
+ for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
elem;
mask <<= 1, elem >>= 1, ++ch)
if (BE (elem & 1, 0))
re_node_set_free (dests_node + i);
if (dests_node_malloced)
- free (dests_node);
+ free (dests_alloc);
return 1;
}
to DEST_CH[i]. This function return the number of destinations. */
static int
-group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch)
- const re_dfa_t *dfa;
- const re_dfastate_t *state;
- re_node_set *dests_node;
- bitset *dests_ch;
+group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
+ re_node_set *dests_node, bitset_t *dests_ch)
{
reg_errcode_t err;
int result;
int i, j, k;
int ndests; /* Number of the destinations from `state'. */
- bitset accepts; /* Characters a node can accept. */
+ bitset_t accepts; /* Characters a node can accept. */
const re_node_set *cur_nodes = &state->nodes;
bitset_empty (accepts);
ndests = 0;
}
#ifdef RE_ENABLE_I18N
else if (type == OP_UTF8_PERIOD)
- {
- memset (accepts, 255, sizeof (unsigned int) * BITSET_UINTS / 2);
+ {
+ memset (accepts, '\xff', sizeof (bitset_t) / 2);
if (!(dfa->syntax & RE_DOT_NEWLINE))
bitset_clear (accepts, '\n');
if (dfa->syntax & RE_DOT_NOT_NULL)
bitset_clear (accepts, '\0');
- }
+ }
#endif
else
continue;
{
if (constraint & NEXT_NEWLINE_CONSTRAINT)
{
- int accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
+ bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
bitset_empty (accepts);
if (accepts_newline)
bitset_set (accepts, NEWLINE_CHAR);
if (constraint & NEXT_WORD_CONSTRAINT)
{
- unsigned int any_set = 0;
+ bitset_word_t any_set = 0;
if (type == CHARACTER && !node->word_char)
{
bitset_empty (accepts);
}
#ifdef RE_ENABLE_I18N
if (dfa->mb_cur_max > 1)
- for (j = 0; j < BITSET_UINTS; ++j)
+ for (j = 0; j < BITSET_WORDS; ++j)
any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
else
#endif
- for (j = 0; j < BITSET_UINTS; ++j)
+ for (j = 0; j < BITSET_WORDS; ++j)
any_set |= (accepts[j] &= dfa->word_char[j]);
if (!any_set)
continue;
}
if (constraint & NEXT_NOTWORD_CONSTRAINT)
{
- unsigned int any_set = 0;
+ bitset_word_t any_set = 0;
if (type == CHARACTER && node->word_char)
{
bitset_empty (accepts);
}
#ifdef RE_ENABLE_I18N
if (dfa->mb_cur_max > 1)
- for (j = 0; j < BITSET_UINTS; ++j)
+ for (j = 0; j < BITSET_WORDS; ++j)
any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
else
#endif
- for (j = 0; j < BITSET_UINTS; ++j)
+ for (j = 0; j < BITSET_WORDS; ++j)
any_set |= (accepts[j] &= ~dfa->word_char[j]);
if (!any_set)
continue;
state. Above, we make sure that accepts is not empty. */
for (j = 0; j < ndests; ++j)
{
- bitset intersec; /* Intersection sets, see below. */
- bitset remains;
+ bitset_t intersec; /* Intersection sets, see below. */
+ bitset_t remains;
/* Flags, see below. */
- int has_intersec, not_subset, not_consumed;
+ bitset_word_t has_intersec, not_subset, not_consumed;
/* Optimization, skip if this state doesn't accept the character. */
if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
/* Enumerate the intersection set of this state and `accepts'. */
has_intersec = 0;
- for (k = 0; k < BITSET_UINTS; ++k)
+ for (k = 0; k < BITSET_WORDS; ++k)
has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
/* And skip if the intersection set is empty. */
if (!has_intersec)
/* Then check if this state is a subset of `accepts'. */
not_subset = not_consumed = 0;
- for (k = 0; k < BITSET_UINTS; ++k)
+ for (k = 0; k < BITSET_WORDS; ++k)
{
not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
one collating element like '.', '[a-z]', opposite to the other nodes
can only accept one byte. */
+# ifdef _LIBC
+# include <locale/weight.h>
+# endif
+
static int
-check_node_accept_bytes (dfa, node_idx, input, str_idx)
- const re_dfa_t *dfa;
- int node_idx, str_idx;
- const re_string_t *input;
+check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
+ const re_string_t *input, int str_idx)
{
const re_token_t *node = dfa->nodes + node_idx;
int char_len, elem_len;
if (node->type == OP_PERIOD)
{
if (char_len <= 1)
- return 0;
+ return 0;
/* FIXME: I don't think this if is needed, as both '\n'
and '\0' are char_len == 1. */
/* '.' accepts any one character except the following two cases. */
const int32_t *table, *indirect;
const unsigned char *weights, *extra;
const char *collseqwc;
- int32_t idx;
- /* This #include defines a local function! */
-# include <locale/weight.h>
/* match with collating_symbol? */
if (cset->ncoll_syms)
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
indirect = (const int32_t *)
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
- idx = findidx (&cp);
+ int32_t idx = findidx (table, indirect, extra, &cp, elem_len);
if (idx > 0)
for (i = 0; i < cset->nequiv_classes; ++i)
{
int32_t equiv_class_idx = cset->equiv_classes[i];
- size_t weight_len = weights[idx];
- if (weight_len == weights[equiv_class_idx])
+ size_t weight_len = weights[idx & 0xffffff];
+ if (weight_len == weights[equiv_class_idx & 0xffffff]
+ && (idx >> 24) == (equiv_class_idx >> 24))
{
int cnt = 0;
+
+ idx &= 0xffffff;
+ equiv_class_idx &= 0xffffff;
+
while (cnt <= weight_len
&& (weights[equiv_class_idx + 1 + cnt]
== weights[idx + 1 + cnt]))
{
cmp_buf[0] = cset->range_starts[i];
cmp_buf[4] = cset->range_ends[i];
- if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
- && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
+ if (__wcscoll (cmp_buf, cmp_buf + 2) <= 0
+ && __wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
{
match_len = char_len;
goto check_node_accept_bytes_match;
# ifdef _LIBC
static unsigned int
-find_collation_sequence_value (mbs, mbs_len)
- const unsigned char *mbs;
- size_t mbs_len;
+find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
{
uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
if (nrules == 0)
/* Skip the collation sequence value. */
idx += sizeof (uint32_t);
/* Skip the wide char sequence of the collating element. */
- idx = idx + sizeof (uint32_t) * (extra[idx] + 1);
+ idx = idx + sizeof (uint32_t) * (*(int32_t *) (extra + idx) + 1);
/* If we found the entry, return the sequence value. */
if (found)
return *(uint32_t *) (extra + idx);
byte of the INPUT. */
static int
-check_node_accept (mctx, node, idx)
- const re_match_context_t *mctx;
- const re_token_t *node;
- int idx;
+check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
+ int idx)
{
unsigned char ch;
ch = re_string_byte_at (&mctx->input, idx);
{
case CHARACTER:
if (node->opr.c != ch)
- return 0;
+ return 0;
break;
case SIMPLE_BRACKET:
if (!bitset_contain (node->opr.sbcset, ch))
- return 0;
+ return 0;
break;
#ifdef RE_ENABLE_I18N
case OP_UTF8_PERIOD:
if (ch >= 0x80)
- return 0;
+ return 0;
/* FALLTHROUGH */
#endif
case OP_PERIOD:
/* Extend the buffers, if the buffers have run out. */
static reg_errcode_t
-extend_buffers (mctx)
- re_match_context_t *mctx;
+__attribute_warn_unused_result__
+extend_buffers (re_match_context_t *mctx, int min_len)
{
reg_errcode_t ret;
re_string_t *pstr = &mctx->input;
- /* Double the lengthes of the buffers. */
- ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
+ /* Avoid overflow. */
+ if (BE (INT_MAX / 2 / sizeof (re_dfastate_t *) <= pstr->bufs_len, 0))
+ return REG_ESPACE;
+
+ /* Double the lengthes of the buffers, but allocate at least MIN_LEN. */
+ ret = re_string_realloc_buffers (pstr,
+ MAX (min_len,
+ MIN (pstr->len, pstr->bufs_len * 2)));
if (BE (ret != REG_NOERROR, 0))
return ret;
/* Initialize MCTX. */
static reg_errcode_t
-match_ctx_init (mctx, eflags, n)
- re_match_context_t *mctx;
- int eflags, n;
+__attribute_warn_unused_result__
+match_ctx_init (re_match_context_t *mctx, int eflags, int n)
{
mctx->eflags = eflags;
mctx->match_last = -1;
of the input, or changes the input string. */
static void
-match_ctx_clean (mctx)
- re_match_context_t *mctx;
+match_ctx_clean (re_match_context_t *mctx)
{
int st_idx;
for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
/* Free all the memory associated with MCTX. */
static void
-match_ctx_free (mctx)
- re_match_context_t *mctx;
+match_ctx_free (re_match_context_t *mctx)
{
/* First, free all the memory associated with MCTX->SUB_TOPS. */
match_ctx_clean (mctx);
*/
static reg_errcode_t
-match_ctx_add_entry (mctx, node, str_idx, from, to)
- re_match_context_t *mctx;
- int node, str_idx, from, to;
+__attribute_warn_unused_result__
+match_ctx_add_entry (re_match_context_t *mctx, int node, int str_idx, int from,
+ int to)
{
if (mctx->nbkref_ents >= mctx->abkref_ents)
{
found. Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. */
static int
-search_cur_bkref_entry (mctx, str_idx)
- const re_match_context_t *mctx;
- int str_idx;
+search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx)
{
int left, right, mid, last;
last = right = mctx->nbkref_ents;
at STR_IDX. */
static reg_errcode_t
-match_ctx_add_subtop (mctx, node, str_idx)
- re_match_context_t *mctx;
- int node, str_idx;
+__attribute_warn_unused_result__
+match_ctx_add_subtop (re_match_context_t *mctx, int node, int str_idx)
{
#ifdef DEBUG
assert (mctx->sub_tops != NULL);
at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. */
static re_sub_match_last_t *
-match_ctx_add_sublast (subtop, node, str_idx)
- re_sub_match_top_t *subtop;
- int node, str_idx;
+match_ctx_add_sublast (re_sub_match_top_t *subtop, int node, int str_idx)
{
re_sub_match_last_t *new_entry;
if (BE (subtop->nlasts == subtop->alasts, 0))
}
static void
-sift_ctx_init (sctx, sifted_sts, limited_sts, last_node, last_str_idx)
- re_sift_context_t *sctx;
- re_dfastate_t **sifted_sts, **limited_sts;
- int last_node, last_str_idx;
+sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
+ re_dfastate_t **limited_sts, int last_node, int last_str_idx)
{
sctx->sifted_states = sifted_sts;
sctx->limited_states = limited_sts;