/* Extended regular expression matching and search library.
- Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+ Copyright (C) 2002-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <stdint.h>
static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
- int n) internal_function;
-static void match_ctx_clean (re_match_context_t *mctx) internal_function;
-static void match_ctx_free (re_match_context_t *cache) internal_function;
+ int n);
+static void match_ctx_clean (re_match_context_t *mctx);
+static void match_ctx_free (re_match_context_t *cache);
static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
- int str_idx, int from, int to)
- internal_function;
-static int search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx)
- internal_function;
+ int str_idx, int from, int to);
+static int search_cur_bkref_entry (const re_match_context_t *mctx,
+ int str_idx);
static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node,
- int str_idx) internal_function;
+ int str_idx);
static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
- int node, int str_idx)
- internal_function;
+ int node, int str_idx);
static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
re_dfastate_t **limited_sts, int last_node,
- int last_str_idx)
- internal_function;
+ int last_str_idx);
static reg_errcode_t re_search_internal (const regex_t *preg,
const char *string, int length,
int start, int range, int stop,
size_t nmatch, regmatch_t pmatch[],
- int eflags) internal_function;
+ int eflags);
static int re_search_2_stub (struct re_pattern_buffer *bufp,
const char *string1, int length1,
const char *string2, int length2,
int start, int range, struct re_registers *regs,
- int stop, int ret_len) internal_function;
+ int stop, int ret_len);
static int re_search_stub (struct re_pattern_buffer *bufp,
const char *string, int length, int start,
int range, int stop, struct re_registers *regs,
- int ret_len) internal_function;
+ int ret_len);
static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
- int nregs, int regs_allocated) internal_function;
-static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
- internal_function;
+ int nregs, int regs_allocated);
+static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx);
static int check_matching (re_match_context_t *mctx, int fl_longest_match,
- int *p_match_first) internal_function;
+ int *p_match_first);
static int check_halt_state_context (const re_match_context_t *mctx,
- const re_dfastate_t *state, int idx)
- internal_function;
+ const re_dfastate_t *state, int idx);
static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
regmatch_t *prev_idx_match, int cur_node,
- int cur_idx, int nmatch) internal_function;
+ int cur_idx, int nmatch);
static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
int str_idx, int dest_node, int nregs,
regmatch_t *regs,
- re_node_set *eps_via_nodes)
- internal_function;
+ re_node_set *eps_via_nodes);
static reg_errcode_t set_regs (const regex_t *preg,
const re_match_context_t *mctx,
size_t nmatch, regmatch_t *pmatch,
- int fl_backtrack) internal_function;
-static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs)
- internal_function;
+ int fl_backtrack);
+static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs);
#ifdef RE_ENABLE_I18N
static int sift_states_iter_mb (const re_match_context_t *mctx,
re_sift_context_t *sctx,
- int node_idx, int str_idx, int max_str_idx)
- internal_function;
+ int node_idx, int str_idx, int max_str_idx);
#endif /* RE_ENABLE_I18N */
static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
- re_sift_context_t *sctx)
- internal_function;
+ re_sift_context_t *sctx);
static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
re_sift_context_t *sctx, int str_idx,
- re_node_set *cur_dest)
- internal_function;
+ re_node_set *cur_dest);
static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
re_sift_context_t *sctx,
int str_idx,
- re_node_set *dest_nodes)
- internal_function;
+ re_node_set *dest_nodes);
static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
re_node_set *dest_nodes,
- const re_node_set *candidates)
- internal_function;
+ const re_node_set *candidates);
static int check_dst_limits (const re_match_context_t *mctx,
re_node_set *limits,
int dst_node, int dst_idx, int src_node,
- int src_idx) internal_function;
+ int src_idx);
static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
int boundaries, int subexp_idx,
- int from_node, int bkref_idx)
- internal_function;
+ int from_node, int bkref_idx);
static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
int limit, int subexp_idx,
int node, int str_idx,
- int bkref_idx) internal_function;
+ int bkref_idx);
static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
re_node_set *dest_nodes,
const re_node_set *candidates,
re_node_set *limits,
struct re_backref_cache_entry *bkref_ents,
- int str_idx) internal_function;
+ int str_idx);
static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
re_sift_context_t *sctx,
- int str_idx, const re_node_set *candidates)
- internal_function;
+ int str_idx,
+ const re_node_set *candidates);
static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
re_dfastate_t **dst,
- re_dfastate_t **src, int num)
- internal_function;
+ re_dfastate_t **src, int num);
static re_dfastate_t *find_recover_state (reg_errcode_t *err,
- re_match_context_t *mctx) internal_function;
+ re_match_context_t *mctx);
static re_dfastate_t *transit_state (reg_errcode_t *err,
re_match_context_t *mctx,
- re_dfastate_t *state) internal_function;
+ re_dfastate_t *state);
static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
re_match_context_t *mctx,
- re_dfastate_t *next_state)
- internal_function;
+ re_dfastate_t *next_state);
static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
re_node_set *cur_nodes,
- int str_idx) internal_function;
+ int str_idx);
#if 0
static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
re_match_context_t *mctx,
- re_dfastate_t *pstate)
- internal_function;
+ re_dfastate_t *pstate);
#endif
#ifdef RE_ENABLE_I18N
static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
- re_dfastate_t *pstate)
- internal_function;
+ re_dfastate_t *pstate);
#endif /* RE_ENABLE_I18N */
static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
- const re_node_set *nodes)
- internal_function;
+ const re_node_set *nodes);
static reg_errcode_t get_subexp (re_match_context_t *mctx,
- int bkref_node, int bkref_str_idx)
- internal_function;
+ int bkref_node, int bkref_str_idx);
static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
const re_sub_match_top_t *sub_top,
re_sub_match_last_t *sub_last,
- int bkref_node, int bkref_str)
- internal_function;
+ int bkref_node, int bkref_str);
static int find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
- int subexp_idx, int type) internal_function;
+ int subexp_idx, int type);
static reg_errcode_t check_arrival (re_match_context_t *mctx,
state_array_t *path, int top_node,
int top_str, int last_node, int last_str,
- int type) internal_function;
+ int type);
static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
int str_idx,
re_node_set *cur_nodes,
- re_node_set *next_nodes)
- internal_function;
+ re_node_set *next_nodes);
static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
re_node_set *cur_nodes,
- int ex_subexp, int type)
- internal_function;
+ int ex_subexp, int type);
static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
re_node_set *dst_nodes,
int target, int ex_subexp,
- int type) internal_function;
+ int type);
static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
re_node_set *cur_nodes, int cur_str,
- int subexp_num, int type)
- internal_function;
-static int build_trtable (const re_dfa_t *dfa,
- re_dfastate_t *state) internal_function;
+ int subexp_num, int type);
+static int build_trtable (const re_dfa_t *dfa, re_dfastate_t *state);
#ifdef RE_ENABLE_I18N
static int check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
- const re_string_t *input, int idx)
- internal_function;
+ const re_string_t *input, int idx);
# ifdef _LIBC
static unsigned int find_collation_sequence_value (const unsigned char *mbs,
- size_t name_len)
- internal_function;
+ size_t name_len);
# endif /* _LIBC */
#endif /* RE_ENABLE_I18N */
static int group_nodes_into_DFAstates (const re_dfa_t *dfa,
const re_dfastate_t *state,
re_node_set *states_node,
- bitset_t *states_ch) internal_function;
+ bitset_t *states_ch);
static int check_node_accept (const re_match_context_t *mctx,
- const re_token_t *node, int idx)
- internal_function;
-static reg_errcode_t extend_buffers (re_match_context_t *mctx)
- internal_function;
+ const re_token_t *node, int idx);
+static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len);
\f
/* Entry point for POSIX code. */
We return 0 if we find a match and REG_NOMATCH if not. */
int
-regexec (preg, string, nmatch, pmatch, eflags)
- const regex_t *__restrict preg;
- const char *__restrict string;
- size_t nmatch;
- regmatch_t pmatch[];
- int eflags;
+regexec (const regex_t *__restrict preg, const char *__restrict string,
+ size_t nmatch, regmatch_t pmatch[], int eflags)
{
reg_errcode_t err;
int start, length;
}
#ifdef _LIBC
+libc_hidden_def (__regexec)
+
# include <shlib-compat.h>
versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
concerned.
If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
- and all groups is stroed in REGS. (For the "_2" variants, the offsets are
+ and all groups is stored in REGS. (For the "_2" variants, the offsets are
computed relative to the concatenation, not relative to the individual
strings.)
match was found and -2 indicates an internal error. */
int
-re_match (bufp, string, length, start, regs)
- struct re_pattern_buffer *bufp;
- const char *string;
- int length, start;
- struct re_registers *regs;
+re_match (struct re_pattern_buffer *bufp, const char *string, int length,
+ int start, struct re_registers *regs)
{
return re_search_stub (bufp, string, length, start, 0, length, regs, 1);
}
#endif
int
-re_search (bufp, string, length, start, range, regs)
- struct re_pattern_buffer *bufp;
- const char *string;
- int length, start, range;
- struct re_registers *regs;
+re_search (struct re_pattern_buffer *bufp, const char *string, int length,
+ int start, int range, struct re_registers *regs)
{
return re_search_stub (bufp, string, length, start, range, length, regs, 0);
}
#endif
int
-re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)
- struct re_pattern_buffer *bufp;
- const char *string1, *string2;
- int length1, length2, start, stop;
- struct re_registers *regs;
+re_match_2 (struct re_pattern_buffer *bufp, const char *string1, int length1,
+ const char *string2, int length2, int start,
+ struct re_registers *regs, int stop)
{
return re_search_2_stub (bufp, string1, length1, string2, length2,
start, 0, regs, stop, 1);
#endif
int
-re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)
- struct re_pattern_buffer *bufp;
- const char *string1, *string2;
- int length1, length2, start, range, stop;
- struct re_registers *regs;
+re_search_2 (struct re_pattern_buffer *bufp, const char *string1, int length1,
+ const char *string2, int length2, int start, int range,
+ struct re_registers *regs, int stop)
{
return re_search_2_stub (bufp, string1, length1, string2, length2,
start, range, regs, stop, 0);
#endif
static int
-re_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs,
- stop, ret_len)
- struct re_pattern_buffer *bufp;
- const char *string1, *string2;
- int length1, length2, start, range, stop, ret_len;
- struct re_registers *regs;
+re_search_2_stub (struct re_pattern_buffer *bufp, const char *string1,
+ int length1, const char *string2, int length2, int start,
+ int range, struct re_registers *regs,
+ int stop, int ret_len)
{
const char *str;
int rval;
int len = length1 + length2;
- int free_str = 0;
+ char *s = NULL;
- if (BE (length1 < 0 || length2 < 0 || stop < 0, 0))
+ if (BE (length1 < 0 || length2 < 0 || stop < 0 || len < length1, 0))
return -2;
/* Concatenate the strings. */
if (length2 > 0)
if (length1 > 0)
{
- char *s = re_malloc (char, len);
+ s = re_malloc (char, len);
if (BE (s == NULL, 0))
return -2;
memcpy (s + length1, string2, length2);
#endif
str = s;
- free_str = 1;
}
else
str = string2;
else
str = string1;
- rval = re_search_stub (bufp, str, len, start, range, stop, regs,
- ret_len);
- if (free_str)
- re_free ((char *) str);
+ rval = re_search_stub (bufp, str, len, start, range, stop, regs, ret_len);
+ re_free (s);
return rval;
}
otherwise the position of the match is returned. */
static int
-re_search_stub (bufp, string, length, start, range, stop, regs, ret_len)
- struct re_pattern_buffer *bufp;
- const char *string;
- int length, start, range, stop, ret_len;
- struct re_registers *regs;
+re_search_stub (struct re_pattern_buffer *bufp, const char *string, int length,
+ int start, int range, int stop, struct re_registers *regs,
+ int ret_len)
{
reg_errcode_t result;
regmatch_t *pmatch;
}
static unsigned
-re_copy_regs (regs, pmatch, nregs, regs_allocated)
- struct re_registers *regs;
- regmatch_t *pmatch;
- int nregs, regs_allocated;
+re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, int nregs,
+ int regs_allocated)
{
int rval = REGS_REALLOCATE;
int i;
if (regs_allocated == REGS_UNALLOCATED)
{ /* No. So allocate them with malloc. */
regs->start = re_malloc (regoff_t, need_regs);
- regs->end = re_malloc (regoff_t, need_regs);
- if (BE (regs->start == NULL, 0) || BE (regs->end == NULL, 0))
+ if (BE (regs->start == NULL, 0))
return REGS_UNALLOCATED;
+ regs->end = re_malloc (regoff_t, need_regs);
+ if (BE (regs->end == NULL, 0))
+ {
+ re_free (regs->start);
+ return REGS_UNALLOCATED;
+ }
regs->num_regs = need_regs;
}
else if (regs_allocated == REGS_REALLOCATE)
if (BE (need_regs > regs->num_regs, 0))
{
regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
- regoff_t *new_end = re_realloc (regs->end, regoff_t, need_regs);
- if (BE (new_start == NULL, 0) || BE (new_end == NULL, 0))
+ regoff_t *new_end;
+ if (BE (new_start == NULL, 0))
return REGS_UNALLOCATED;
+ new_end = re_realloc (regs->end, regoff_t, need_regs);
+ if (BE (new_end == NULL, 0))
+ {
+ re_free (new_start);
+ return REGS_UNALLOCATED;
+ }
regs->start = new_start;
regs->end = new_end;
regs->num_regs = need_regs;
freeing the old data. */
void
-re_set_registers (bufp, regs, num_regs, starts, ends)
- struct re_pattern_buffer *bufp;
- struct re_registers *regs;
- unsigned num_regs;
- regoff_t *starts, *ends;
+re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs,
+ unsigned num_regs, regoff_t *starts, regoff_t *ends)
{
if (num_regs)
{
# ifdef _LIBC
weak_function
# endif
-re_exec (s)
- const char *s;
+re_exec (const char *s)
{
return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
}
/* Searches for a compiled pattern PREG in the string STRING, whose
length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same
- mingings with regexec. START, and RANGE have the same meanings
+ meaning as with regexec. START, and RANGE have the same meanings
with re_search.
Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
otherwise return the error code.
(START + RANGE >= 0 && START + RANGE <= LENGTH) */
static reg_errcode_t
-re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
- eflags)
- const regex_t *preg;
- const char *string;
- int length, start, range, stop, eflags;
- size_t nmatch;
- regmatch_t pmatch[];
+__attribute_warn_unused_result__
+re_search_internal (const regex_t *preg, const char *string, int length,
+ int start, int range, int stop, size_t nmatch,
+ regmatch_t pmatch[], int eflags)
{
reg_errcode_t err;
const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
|| !preg->newline_anchor))
{
if (start != 0 && start + range != 0)
- return REG_NOMATCH;
+ return REG_NOMATCH;
start = range = 0;
}
multi character collating element. */
if (nmatch > 1 || dfa->has_mb_node)
{
+ /* Avoid overflow. */
+ if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= mctx.input.bufs_len, 0))
+ {
+ err = REG_ESPACE;
+ goto free_return;
+ }
+
mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
if (BE (mctx.state_log == NULL, 0))
{
break;
match_first += incr;
if (match_first < left_lim || match_first > right_lim)
- {
- err = REG_NOMATCH;
- goto free_return;
- }
+ {
+ err = REG_NOMATCH;
+ goto free_return;
+ }
}
break;
}
goto free_return;
}
- /* At last, add the offset to the each registers, since we slided
+ /* At last, add the offset to each register, since we slid
the buffers so that we could assume that the matching starts
from 0. */
for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
}
if (dfa->subexp_map)
- for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
- if (dfa->subexp_map[reg_idx] != reg_idx)
- {
- pmatch[reg_idx + 1].rm_so
- = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
- pmatch[reg_idx + 1].rm_eo
- = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
- }
+ for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
+ if (dfa->subexp_map[reg_idx] != reg_idx)
+ {
+ pmatch[reg_idx + 1].rm_so
+ = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
+ pmatch[reg_idx + 1].rm_eo
+ = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
+ }
}
free_return:
}
static reg_errcode_t
-prune_impossible_nodes (mctx)
- re_match_context_t *mctx;
+__attribute_warn_unused_result__
+prune_impossible_nodes (re_match_context_t *mctx)
{
const re_dfa_t *const dfa = mctx->dfa;
int halt_node, match_last;
#endif
match_last = mctx->match_last;
halt_node = mctx->last_node;
+
+ /* Avoid overflow. */
+ if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= match_last, 0))
+ return REG_ESPACE;
+
sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
if (BE (sifted_states == NULL, 0))
{
re_node_set_free (&sctx.limits);
if (BE (ret != REG_NOERROR, 0))
goto free_return;
+ if (sifted_states[0] == NULL)
+ {
+ ret = REG_NOMATCH;
+ goto free_return;
+ }
}
re_free (mctx->state_log);
mctx->state_log = sifted_states;
since initial states may have constraints like "\<", "^", etc.. */
static inline re_dfastate_t *
-__attribute ((always_inline)) internal_function
+__attribute ((always_inline))
acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
int idx)
{
index of the buffer. */
static int
+__attribute_warn_unused_result__
check_matching (re_match_context_t *mctx, int fl_longest_match,
int *p_match_first)
{
{
err = transit_state_bkref (mctx, &cur_state->nodes);
if (BE (err != REG_NOERROR, 0))
- return err;
+ return err;
}
}
}
re_dfastate_t *old_state = cur_state;
int next_char_idx = re_string_cur_idx (&mctx->input) + 1;
- if (BE (next_char_idx >= mctx->input.bufs_len, 0)
- || (BE (next_char_idx >= mctx->input.valid_len, 0)
- && mctx->input.valid_len < mctx->input.len))
- {
- err = extend_buffers (mctx);
- if (BE (err != REG_NOERROR, 0))
+ if ((BE (next_char_idx >= mctx->input.bufs_len, 0)
+ && mctx->input.bufs_len < mctx->input.len)
+ || (BE (next_char_idx >= mctx->input.valid_len, 0)
+ && mctx->input.valid_len < mctx->input.len))
+ {
+ err = extend_buffers (mctx, next_char_idx + 1);
+ if (BE (err != REG_NOERROR, 0))
{
assert (err == REG_ESPACE);
return -2;
}
- }
+ }
cur_state = transit_state (&err, mctx, cur_state);
if (mctx->state_log != NULL)
/* Check NODE match the current context. */
static int
-internal_function
check_halt_node_context (const re_dfa_t *dfa, int node, unsigned int context)
{
re_token_type_t type = dfa->nodes[node].type;
of errors. */
static int
-internal_function
proceed_next_node (const re_match_context_t *mctx, int nregs, regmatch_t *regs,
int *pidx, int node, re_node_set *eps_via_nodes,
struct re_fail_stack_t *fs)
int candidate = edests->elems[i];
if (!re_node_set_contains (cur_nodes, candidate))
continue;
- if (dest_node == -1)
+ if (dest_node == -1)
dest_node = candidate;
- else
+ else
{
/* In order to avoid infinite loop like "(a*)*", return the second
- epsilon-transition if the first was already considered. */
+ epsilon-transition if the first was already considered. */
if (re_node_set_contains (eps_via_nodes, dest_node))
- return candidate;
+ return candidate;
/* Otherwise, push the second epsilon-transition on the fail stack. */
else if (fs != NULL
&& push_fail_stack (fs, *pidx, candidate, nregs, regs,
- eps_via_nodes))
+ eps_via_nodes))
return -2;
/* We know we are going to exit. */
}
static reg_errcode_t
+__attribute_warn_unused_result__
push_fail_stack (struct re_fail_stack_t *fs, int str_idx, int dest_node,
int nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
{
}
static int
-internal_function
pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs,
regmatch_t *regs, re_node_set *eps_via_nodes)
{
pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. */
static reg_errcode_t
+__attribute_warn_unused_result__
set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
regmatch_t *pmatch, int fl_backtrack)
{
if (mctx->state_log[str_idx])
{
err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
- if (BE (err != REG_NOERROR, 0))
+ if (BE (err != REG_NOERROR, 0))
goto free_return;
}
}
static reg_errcode_t
+__attribute_warn_unused_result__
build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
int str_idx, re_node_set *cur_dest)
{
/* Helper functions. */
static reg_errcode_t
-internal_function
clean_state_log_if_needed (re_match_context_t *mctx, int next_state_log_idx)
{
int top = mctx->state_log_top;
- if (next_state_log_idx >= mctx->input.bufs_len
+ if ((next_state_log_idx >= mctx->input.bufs_len
+ && mctx->input.bufs_len < mctx->input.len)
|| (next_state_log_idx >= mctx->input.valid_len
&& mctx->input.valid_len < mctx->input.len))
{
reg_errcode_t err;
- err = extend_buffers (mctx);
+ err = extend_buffers (mctx, next_state_log_idx + 1);
if (BE (err != REG_NOERROR, 0))
return err;
}
}
static reg_errcode_t
+__attribute_warn_unused_result__
add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
const re_node_set *candidates)
{
{
err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
if (BE (err != REG_NOERROR, 0))
- return REG_ESPACE;
+ return REG_ESPACE;
for (i = 0; i < dest_nodes->nelem; i++)
- re_node_set_merge (&state->inveclosure,
- dfa->inveclosures + dest_nodes->elems[i]);
+ {
+ err = re_node_set_merge (&state->inveclosure,
+ dfa->inveclosures + dest_nodes->elems[i]);
+ if (BE (err != REG_NOERROR, 0))
+ return REG_ESPACE;
+ }
}
return re_node_set_add_intersect (dest_nodes, candidates,
&state->inveclosure);
}
static reg_errcode_t
-internal_function
sub_epsilon_src_nodes (const re_dfa_t *dfa, int node, re_node_set *dest_nodes,
const re_node_set *candidates)
{
{
struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
do
- {
+ {
int dst, cpos;
if (ent->node != node)
if (dst == from_node)
{
if (boundaries & 1)
- return -1;
+ return -1;
else /* if (boundaries & 2) */
- return 0;
+ return 0;
}
cpos =
if (subexp_idx < BITSET_WORD_BITS)
ent->eps_reachable_subexps_map
&= ~((bitset_word_t) 1 << subexp_idx);
- }
+ }
while (ent++->more);
}
break;
}
static reg_errcode_t
+__attribute_warn_unused_result__
sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
int str_idx, const re_node_set *candidates)
{
re_node_set_remove (&local_sctx.limits, enabled_idx);
/* mctx->bkref_ents may have changed, reload the pointer. */
- entry = mctx->bkref_ents + enabled_idx;
+ entry = mctx->bkref_ents + enabled_idx;
}
while (enabled_idx++, entry++->more);
}
update the destination of STATE_LOG. */
static re_dfastate_t *
+__attribute_warn_unused_result__
transit_state (reg_errcode_t *err, re_match_context_t *mctx,
re_dfastate_t *state)
{
trtable = state->word_trtable;
if (BE (trtable != NULL, 1))
- {
+ {
unsigned int context;
context
= re_string_context_at (&mctx->input,
unsigned int context;
re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
/* If (state_log[cur_idx] != 0), it implies that cur_idx is
- the destination of a multibyte char/collating element/
- back reference. Then the next state is the union set of
- these destinations and the results of the transition table. */
+ the destination of a multibyte char/collating element/
+ back reference. Then the next state is the union set of
+ these destinations and the results of the transition table. */
pstate = mctx->state_log[cur_idx];
log_nodes = pstate->entrance_nodes;
if (next_state != NULL)
- {
- table_nodes = next_state->entrance_nodes;
- *err = re_node_set_init_union (&next_nodes, table_nodes,
+ {
+ table_nodes = next_state->entrance_nodes;
+ *err = re_node_set_init_union (&next_nodes, table_nodes,
log_nodes);
- if (BE (*err != REG_NOERROR, 0))
+ if (BE (*err != REG_NOERROR, 0))
return NULL;
- }
+ }
else
- next_nodes = *log_nodes;
+ next_nodes = *log_nodes;
/* Note: We already add the nodes of the initial state,
then we don't need to add them here. */
re_string_cur_idx (&mctx->input) - 1,
mctx->eflags);
next_state = mctx->state_log[cur_idx]
- = re_acquire_state_context (err, dfa, &next_nodes, context);
+ = re_acquire_state_context (err, dfa, &next_nodes, context);
/* We don't need to check errors here, since the return value of
- this function is next_state and ERR is already set. */
+ this function is next_state and ERR is already set. */
if (table_nodes != NULL)
- re_node_set_free (&next_nodes);
+ re_node_set_free (&next_nodes);
}
if (BE (dfa->nbackref, 0) && next_state != NULL)
do
{
- if (++cur_str_idx > max)
- return NULL;
- re_string_skip_bytes (&mctx->input, 1);
+ if (++cur_str_idx > max)
+ return NULL;
+ re_string_skip_bytes (&mctx->input, 1);
}
while (mctx->state_log[cur_str_idx] == NULL);
/* From the node set CUR_NODES, pick up the nodes whose types are
OP_OPEN_SUBEXP and which have corresponding back references in the regular
expression. And register them to use them later for evaluating the
- correspoding back references. */
+ corresponding back references. */
static reg_errcode_t
check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
re_dfastate_t *dest_state;
if (!dfa->nodes[cur_node_idx].accept_mb)
- continue;
+ continue;
if (dfa->nodes[cur_node_idx].constraint)
{
delay these checking for prune_impossible_nodes(). */
static reg_errcode_t
+__attribute_warn_unused_result__
get_subexp (re_match_context_t *mctx, int bkref_node, int bkref_str_idx)
{
const re_dfa_t *const dfa = mctx->dfa;
const struct re_backref_cache_entry *entry
= mctx->bkref_ents + cache_idx;
do
- if (entry->node == bkref_node)
+ if (entry->node == bkref_node)
return REG_NOERROR; /* We already checked it. */
while (entry++->more);
}
if (bkref_str_off >= mctx->input.len)
break;
- err = extend_buffers (mctx);
+ err = extend_buffers (mctx, bkref_str_off + 1);
if (BE (err != REG_NOERROR, 0))
return err;
Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */
static reg_errcode_t
+__attribute_warn_unused_result__
check_arrival (re_match_context_t *mctx, state_array_t *path, int top_node,
int top_str, int last_node, int last_str, int type)
{
Can't we unify them? */
static reg_errcode_t
+__attribute_warn_unused_result__
check_arrival_add_next_nodes (re_match_context_t *mctx, int str_idx,
re_node_set *cur_nodes, re_node_set *next_nodes)
{
const re_dfa_t *const dfa = mctx->dfa;
int result;
int cur_idx;
+#ifdef RE_ENABLE_I18N
reg_errcode_t err = REG_NOERROR;
+#endif
re_node_set union_set;
re_node_set_init_empty (&union_set);
for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
problematic append it to DST_NODES. */
static reg_errcode_t
+__attribute_warn_unused_result__
check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
int target, int ex_subexp, int type)
{
in MCTX->BKREF_ENTS. */
static reg_errcode_t
+__attribute_warn_unused_result__
expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
int cur_str, int subexp_num, int type)
{
{
state->trtable = (re_dfastate_t **)
calloc (sizeof (re_dfastate_t *), SBC_MAX);
+ if (BE (state->trtable == NULL, 0))
+ return 0;
return 1;
}
return 0;
if (BE (err != REG_NOERROR, 0))
goto out_free;
+ /* Avoid arithmetic overflow in size calculation. */
+ if (BE ((((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX)
+ / (3 * sizeof (re_dfastate_t *)))
+ < ndests),
+ 0))
+ goto out_free;
+
if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
+ ndests * 3 * sizeof (re_dfastate_t *)))
dest_states = (re_dfastate_t **)
}
#ifdef RE_ENABLE_I18N
else if (type == OP_UTF8_PERIOD)
- {
+ {
memset (accepts, '\xff', sizeof (bitset_t) / 2);
if (!(dfa->syntax & RE_DOT_NEWLINE))
bitset_clear (accepts, '\n');
if (dfa->syntax & RE_DOT_NOT_NULL)
bitset_clear (accepts, '\0');
- }
+ }
#endif
else
continue;
one collating element like '.', '[a-z]', opposite to the other nodes
can only accept one byte. */
+# ifdef _LIBC
+# include <locale/weight.h>
+# endif
+
static int
check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
const re_string_t *input, int str_idx)
if (node->type == OP_PERIOD)
{
if (char_len <= 1)
- return 0;
+ return 0;
/* FIXME: I don't think this if is needed, as both '\n'
and '\0' are char_len == 1. */
/* '.' accepts any one character except the following two cases. */
const int32_t *table, *indirect;
const unsigned char *weights, *extra;
const char *collseqwc;
- int32_t idx;
- /* This #include defines a local function! */
-# include <locale/weight.h>
/* match with collating_symbol? */
if (cset->ncoll_syms)
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
indirect = (const int32_t *)
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
- idx = findidx (&cp);
+ int32_t idx = findidx (table, indirect, extra, &cp, elem_len);
if (idx > 0)
for (i = 0; i < cset->nequiv_classes; ++i)
{
int32_t equiv_class_idx = cset->equiv_classes[i];
- size_t weight_len = weights[idx];
- if (weight_len == weights[equiv_class_idx])
+ size_t weight_len = weights[idx & 0xffffff];
+ if (weight_len == weights[equiv_class_idx & 0xffffff]
+ && (idx >> 24) == (equiv_class_idx >> 24))
{
int cnt = 0;
+
+ idx &= 0xffffff;
+ equiv_class_idx &= 0xffffff;
+
while (cnt <= weight_len
&& (weights[equiv_class_idx + 1 + cnt]
== weights[idx + 1 + cnt]))
{
cmp_buf[0] = cset->range_starts[i];
cmp_buf[4] = cset->range_ends[i];
- if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
- && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
+ if (__wcscoll (cmp_buf, cmp_buf + 2) <= 0
+ && __wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
{
match_len = char_len;
goto check_node_accept_bytes_match;
/* Skip the collation sequence value. */
idx += sizeof (uint32_t);
/* Skip the wide char sequence of the collating element. */
- idx = idx + sizeof (uint32_t) * (extra[idx] + 1);
+ idx = idx + sizeof (uint32_t) * (*(int32_t *) (extra + idx) + 1);
/* If we found the entry, return the sequence value. */
if (found)
return *(uint32_t *) (extra + idx);
{
case CHARACTER:
if (node->opr.c != ch)
- return 0;
+ return 0;
break;
case SIMPLE_BRACKET:
if (!bitset_contain (node->opr.sbcset, ch))
- return 0;
+ return 0;
break;
#ifdef RE_ENABLE_I18N
case OP_UTF8_PERIOD:
if (ch >= 0x80)
- return 0;
+ return 0;
/* FALLTHROUGH */
#endif
case OP_PERIOD:
/* Extend the buffers, if the buffers have run out. */
static reg_errcode_t
-extend_buffers (re_match_context_t *mctx)
+__attribute_warn_unused_result__
+extend_buffers (re_match_context_t *mctx, int min_len)
{
reg_errcode_t ret;
re_string_t *pstr = &mctx->input;
- /* Double the lengthes of the buffers. */
- ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
+ /* Avoid overflow. */
+ if (BE (INT_MAX / 2 / sizeof (re_dfastate_t *) <= pstr->bufs_len, 0))
+ return REG_ESPACE;
+
+ /* Double the lengthes of the buffers, but allocate at least MIN_LEN. */
+ ret = re_string_realloc_buffers (pstr,
+ MAX (min_len,
+ MIN (pstr->len, pstr->bufs_len * 2)));
if (BE (ret != REG_NOERROR, 0))
return ret;
/* Initialize MCTX. */
static reg_errcode_t
+__attribute_warn_unused_result__
match_ctx_init (re_match_context_t *mctx, int eflags, int n)
{
mctx->eflags = eflags;
*/
static reg_errcode_t
+__attribute_warn_unused_result__
match_ctx_add_entry (re_match_context_t *mctx, int node, int str_idx, int from,
int to)
{
at STR_IDX. */
static reg_errcode_t
+__attribute_warn_unused_result__
match_ctx_add_subtop (re_match_context_t *mctx, int node, int str_idx)
{
#ifdef DEBUG