posix/regexec.c

   1 /* Extended regular expression matching and search library.
   2    Copyright (C) 2002-2019 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4    Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
   5
   6    The GNU C Library is free software; you can redistribute it and/or
   7    modify it under the terms of the GNU Lesser General Public
   8    License as published by the Free Software Foundation; either
   9    version 2.1 of the License, or (at your option) any later version.
  10
  11    The GNU C Library is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14    Lesser General Public License for more details.
  15
  16    You should have received a copy of the GNU Lesser General Public
  17    License along with the GNU C Library; if not, see
  18    <https://www.gnu.org/licenses/>.  */
  19
  20 static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
  21                                      Idx n);
  22 static void match_ctx_clean (re_match_context_t *mctx);
  23 static void match_ctx_free (re_match_context_t *cache);
  24 static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, Idx node,
  25                                           Idx str_idx, Idx from, Idx to);
  26 static Idx search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx);
  27 static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, Idx node,
  28                                            Idx str_idx);
  29 static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
  30                                                     Idx node, Idx str_idx);
  31 static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
  32                            re_dfastate_t **limited_sts, Idx last_node,
  33                            Idx last_str_idx);
  34 static reg_errcode_t re_search_internal (const regex_t *preg,
  35                                          const char *string, Idx length,
  36                                          Idx start, Idx last_start, Idx stop,
  37                                          size_t nmatch, regmatch_t pmatch[],
  38                                          int eflags);
  39 static regoff_t re_search_2_stub (struct re_pattern_buffer *bufp,
  40                                   const char *string1, Idx length1,
  41                                   const char *string2, Idx length2,
  42                                   Idx start, regoff_t range,
  43                                   struct re_registers *regs,
  44                                   Idx stop, bool ret_len);
  45 static regoff_t re_search_stub (struct re_pattern_buffer *bufp,
  46                                 const char *string, Idx length, Idx start,
  47                                 regoff_t range, Idx stop,
  48                                 struct re_registers *regs,
  49                                 bool ret_len);
  50 static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
  51                               Idx nregs, int regs_allocated);
  52 static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx);
  53 static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match,
  54                            Idx *p_match_first);
  55 static Idx check_halt_state_context (const re_match_context_t *mctx,
  56                                      const re_dfastate_t *state, Idx idx);
  57 static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
  58                          regmatch_t *prev_idx_match, Idx cur_node,
  59                          Idx cur_idx, Idx nmatch);
  60 static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
  61                                       Idx str_idx, Idx dest_node, Idx nregs,
  62                                       regmatch_t *regs,
  63                                       re_node_set *eps_via_nodes);
  64 static reg_errcode_t set_regs (const regex_t *preg,
  65                                const re_match_context_t *mctx,
  66                                size_t nmatch, regmatch_t *pmatch,
  67                                bool fl_backtrack);
  68 static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs);
  69
  70 #ifdef RE_ENABLE_I18N
  71 static int sift_states_iter_mb (const re_match_context_t *mctx,
  72                                 re_sift_context_t *sctx,
  73                                 Idx node_idx, Idx str_idx, Idx max_str_idx);
  74 #endif /* RE_ENABLE_I18N */
  75 static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
  76                                            re_sift_context_t *sctx);
  77 static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
  78                                           re_sift_context_t *sctx, Idx str_idx,
  79                                           re_node_set *cur_dest);
  80 static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
  81                                               re_sift_context_t *sctx,
  82                                               Idx str_idx,
  83                                               re_node_set *dest_nodes);
  84 static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
  85                                             re_node_set *dest_nodes,
  86                                             const re_node_set *candidates);
  87 static bool check_dst_limits (const re_match_context_t *mctx,
  88                               const re_node_set *limits,
  89                               Idx dst_node, Idx dst_idx, Idx src_node,
  90                               Idx src_idx);
  91 static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
  92                                         int boundaries, Idx subexp_idx,
  93                                         Idx from_node, Idx bkref_idx);
  94 static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
  95                                       Idx limit, Idx subexp_idx,
  96                                       Idx node, Idx str_idx,
  97                                       Idx bkref_idx);
  98 static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
  99                                           re_node_set *dest_nodes,
 100                                           const re_node_set *candidates,
 101                                           re_node_set *limits,
 102                                           struct re_backref_cache_entry *bkref_ents,
 103                                           Idx str_idx);
 104 static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
 105                                         re_sift_context_t *sctx,
 106                                         Idx str_idx, const re_node_set *candidates);
 107 static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
 108                                         re_dfastate_t **dst,
 109                                         re_dfastate_t **src, Idx num);
 110 static re_dfastate_t *find_recover_state (reg_errcode_t *err,
 111                                          re_match_context_t *mctx);
 112 static re_dfastate_t *transit_state (reg_errcode_t *err,
 113                                      re_match_context_t *mctx,
 114                                      re_dfastate_t *state);
 115 static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
 116                                             re_match_context_t *mctx,
 117                                             re_dfastate_t *next_state);
 118 static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
 119                                                 re_node_set *cur_nodes,
 120                                                 Idx str_idx);
 121 #if 0
 122 static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
 123                                         re_match_context_t *mctx,
 124                                         re_dfastate_t *pstate);
 125 #endif
 126 #ifdef RE_ENABLE_I18N
 127 static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
 128                                        re_dfastate_t *pstate);
 129 #endif /* RE_ENABLE_I18N */
 130 static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
 131                                           const re_node_set *nodes);
 132 static reg_errcode_t get_subexp (re_match_context_t *mctx,
 133                                  Idx bkref_node, Idx bkref_str_idx);
 134 static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
 135                                      const re_sub_match_top_t *sub_top,
 136                                      re_sub_match_last_t *sub_last,
 137                                      Idx bkref_node, Idx bkref_str);
 138 static Idx find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
 139                              Idx subexp_idx, int type);
 140 static reg_errcode_t check_arrival (re_match_context_t *mctx,
 141                                     state_array_t *path, Idx top_node,
 142                                     Idx top_str, Idx last_node, Idx last_str,
 143                                     int type);
 144 static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
 145                                                    Idx str_idx,
 146                                                    re_node_set *cur_nodes,
 147                                                    re_node_set *next_nodes);
 148 static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
 149                                                re_node_set *cur_nodes,
 150                                                Idx ex_subexp, int type);
 151 static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
 152                                                    re_node_set *dst_nodes,
 153                                                    Idx target, Idx ex_subexp,
 154                                                    int type);
 155 static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
 156                                          re_node_set *cur_nodes, Idx cur_str,
 157                                          Idx subexp_num, int type);
 158 static bool build_trtable (const re_dfa_t *dfa, re_dfastate_t *state);
 159 #ifdef RE_ENABLE_I18N
 160 static int check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
 161                                     const re_string_t *input, Idx idx);
 162 # ifdef _LIBC
 163 static unsigned int find_collation_sequence_value (const unsigned char *mbs,
 164                                                    size_t name_len);
 165 # endif /* _LIBC */
 166 #endif /* RE_ENABLE_I18N */
 167 static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa,
 168                                        const re_dfastate_t *state,
 169                                        re_node_set *states_node,
 170                                        bitset_t *states_ch);
 171 static bool check_node_accept (const re_match_context_t *mctx,
 172                                const re_token_t *node, Idx idx);
 173 static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len);
 174 \f
 175 /* Entry point for POSIX code.  */
 176
 177 /* regexec searches for a given pattern, specified by PREG, in the
 178    string STRING.
 179
 180    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
 181    'regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
 182    least NMATCH elements, and we set them to the offsets of the
 183    corresponding matched substrings.
 184
 185    EFLAGS specifies "execution flags" which affect matching: if
 186    REG_NOTBOL is set, then ^ does not match at the beginning of the
 187    string; if REG_NOTEOL is set, then $ does not match at the end.
 188
 189    We return 0 if we find a match and REG_NOMATCH if not.  */
 190
 191 int
 192 regexec (const regex_t *__restrict preg, const char *__restrict string,
 193          size_t nmatch, regmatch_t pmatch[], int eflags)
 194 {
 195   reg_errcode_t err;
 196   Idx start, length;
 197   re_dfa_t *dfa = preg->buffer;
 198
 199   if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
 200     return REG_BADPAT;
 201
 202   if (eflags & REG_STARTEND)
 203     {
 204       start = pmatch[0].rm_so;
 205       length = pmatch[0].rm_eo;
 206     }
 207   else
 208     {
 209       start = 0;
 210       length = strlen (string);
 211     }
 212
 213   lock_lock (dfa->lock);
 214   if (preg->no_sub)
 215     err = re_search_internal (preg, string, length, start, length,
 216                               length, 0, NULL, eflags);
 217   else
 218     err = re_search_internal (preg, string, length, start, length,
 219                               length, nmatch, pmatch, eflags);
 220   lock_unlock (dfa->lock);
 221   return err != REG_NOERROR;
 222 }
 223
 224 #ifdef _LIBC
 225 libc_hidden_def (__regexec)
 226
 227 # include <shlib-compat.h>
 228 versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
 229
 230 # if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
 231 __typeof__ (__regexec) __compat_regexec;
 232
 233 int
 234 attribute_compat_text_section
 235 __compat_regexec (const regex_t *__restrict preg,
 236                   const char *__restrict string, size_t nmatch,
 237                   regmatch_t pmatch[], int eflags)
 238 {
 239   return regexec (preg, string, nmatch, pmatch,
 240                   eflags & (REG_NOTBOL | REG_NOTEOL));
 241 }
 242 compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
 243 # endif
 244 #endif
 245
 246 /* Entry points for GNU code.  */
 247
 248 /* re_match, re_search, re_match_2, re_search_2
 249
 250    The former two functions operate on STRING with length LENGTH,
 251    while the later two operate on concatenation of STRING1 and STRING2
 252    with lengths LENGTH1 and LENGTH2, respectively.
 253
 254    re_match() matches the compiled pattern in BUFP against the string,
 255    starting at index START.
 256
 257    re_search() first tries matching at index START, then it tries to match
 258    starting from index START + 1, and so on.  The last start position tried
 259    is START + RANGE.  (Thus RANGE = 0 forces re_search to operate the same
 260    way as re_match().)
 261
 262    The parameter STOP of re_{match,search}_2 specifies that no match exceeding
 263    the first STOP characters of the concatenation of the strings should be
 264    concerned.
 265
 266    If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
 267    and all groups is stored in REGS.  (For the "_2" variants, the offsets are
 268    computed relative to the concatenation, not relative to the individual
 269    strings.)
 270
 271    On success, re_match* functions return the length of the match, re_search*
 272    return the position of the start of the match.  Return value -1 means no
 273    match was found and -2 indicates an internal error.  */
 274
 275 regoff_t
 276 re_match (struct re_pattern_buffer *bufp, const char *string, Idx length,
 277           Idx start, struct re_registers *regs)
 278 {
 279   return re_search_stub (bufp, string, length, start, 0, length, regs, true);
 280 }
 281 #ifdef _LIBC
 282 weak_alias (__re_match, re_match)
 283 #endif
 284
 285 regoff_t
 286 re_search (struct re_pattern_buffer *bufp, const char *string, Idx length,
 287            Idx start, regoff_t range, struct re_registers *regs)
 288 {
 289   return re_search_stub (bufp, string, length, start, range, length, regs,
 290                          false);
 291 }
 292 #ifdef _LIBC
 293 weak_alias (__re_search, re_search)
 294 #endif
 295
 296 regoff_t
 297 re_match_2 (struct re_pattern_buffer *bufp, const char *string1, Idx length1,
 298             const char *string2, Idx length2, Idx start,
 299             struct re_registers *regs, Idx stop)
 300 {
 301   return re_search_2_stub (bufp, string1, length1, string2, length2,
 302                            start, 0, regs, stop, true);
 303 }
 304 #ifdef _LIBC
 305 weak_alias (__re_match_2, re_match_2)
 306 #endif
 307
 308 regoff_t
 309 re_search_2 (struct re_pattern_buffer *bufp, const char *string1, Idx length1,
 310              const char *string2, Idx length2, Idx start, regoff_t range,
 311              struct re_registers *regs, Idx stop)
 312 {
 313   return re_search_2_stub (bufp, string1, length1, string2, length2,
 314                            start, range, regs, stop, false);
 315 }
 316 #ifdef _LIBC
 317 weak_alias (__re_search_2, re_search_2)
 318 #endif
 319
 320 static regoff_t
 321 re_search_2_stub (struct re_pattern_buffer *bufp, const char *string1,
 322                   Idx length1, const char *string2, Idx length2, Idx start,
 323                   regoff_t range, struct re_registers *regs,
 324                   Idx stop, bool ret_len)
 325 {
 326   const char *str;
 327   regoff_t rval;
 328   Idx len;
 329   char *s = NULL;
 330
 331   if (__glibc_unlikely ((length1 < 0 || length2 < 0 || stop < 0
 332                          || INT_ADD_WRAPV (length1, length2, &len))))
 333     return -2;
 334
 335   /* Concatenate the strings.  */
 336   if (length2 > 0)
 337     if (length1 > 0)
 338       {
 339         s = re_malloc (char, len);
 340
 341         if (__glibc_unlikely (s == NULL))
 342           return -2;
 343 #ifdef _LIBC
 344         memcpy (__mempcpy (s, string1, length1), string2, length2);
 345 #else
 346         memcpy (s, string1, length1);
 347         memcpy (s + length1, string2, length2);
 348 #endif
 349         str = s;
 350       }
 351     else
 352       str = string2;
 353   else
 354     str = string1;
 355
 356   rval = re_search_stub (bufp, str, len, start, range, stop, regs,
 357                          ret_len);
 358   re_free (s);
 359   return rval;
 360 }
 361
 362 /* The parameters have the same meaning as those of re_search.
 363    Additional parameters:
 364    If RET_LEN is true the length of the match is returned (re_match style);
 365    otherwise the position of the match is returned.  */
 366
 367 static regoff_t
 368 re_search_stub (struct re_pattern_buffer *bufp, const char *string, Idx length,
 369                 Idx start, regoff_t range, Idx stop, struct re_registers *regs,
 370                 bool ret_len)
 371 {
 372   reg_errcode_t result;
 373   regmatch_t *pmatch;
 374   Idx nregs;
 375   regoff_t rval;
 376   int eflags = 0;
 377   re_dfa_t *dfa = bufp->buffer;
 378   Idx last_start = start + range;
 379
 380   /* Check for out-of-range.  */
 381   if (__glibc_unlikely (start < 0 || start > length))
 382     return -1;
 383   if (__glibc_unlikely (length < last_start
 384                         || (0 <= range && last_start < start)))
 385     last_start = length;
 386   else if (__glibc_unlikely (last_start < 0
 387                              || (range < 0 && start <= last_start)))
 388     last_start = 0;
 389
 390   lock_lock (dfa->lock);
 391
 392   eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
 393   eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
 394
 395   /* Compile fastmap if we haven't yet.  */
 396   if (start < last_start && bufp->fastmap != NULL && !bufp->fastmap_accurate)
 397     re_compile_fastmap (bufp);
 398
 399   if (__glibc_unlikely (bufp->no_sub))
 400     regs = NULL;
 401
 402   /* We need at least 1 register.  */
 403   if (regs == NULL)
 404     nregs = 1;
 405   else if (__glibc_unlikely (bufp->regs_allocated == REGS_FIXED
 406                              && regs->num_regs <= bufp->re_nsub))
 407     {
 408       nregs = regs->num_regs;
 409       if (__glibc_unlikely (nregs < 1))
 410         {
 411           /* Nothing can be copied to regs.  */
 412           regs = NULL;
 413           nregs = 1;
 414         }
 415     }
 416   else
 417     nregs = bufp->re_nsub + 1;
 418   pmatch = re_malloc (regmatch_t, nregs);
 419   if (__glibc_unlikely (pmatch == NULL))
 420     {
 421       rval = -2;
 422       goto out;
 423     }
 424
 425   result = re_search_internal (bufp, string, length, start, last_start, stop,
 426                                nregs, pmatch, eflags);
 427
 428   rval = 0;
 429
 430   /* I hope we needn't fill their regs with -1's when no match was found.  */
 431   if (result != REG_NOERROR)
 432     rval = result == REG_NOMATCH ? -1 : -2;
 433   else if (regs != NULL)
 434     {
 435       /* If caller wants register contents data back, copy them.  */
 436       bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
 437                                            bufp->regs_allocated);
 438       if (__glibc_unlikely (bufp->regs_allocated == REGS_UNALLOCATED))
 439         rval = -2;
 440     }
 441
 442   if (__glibc_likely (rval == 0))
 443     {
 444       if (ret_len)
 445         {
 446           assert (pmatch[0].rm_so == start);
 447           rval = pmatch[0].rm_eo - start;
 448         }
 449       else
 450         rval = pmatch[0].rm_so;
 451     }
 452   re_free (pmatch);
 453  out:
 454   lock_unlock (dfa->lock);
 455   return rval;
 456 }
 457
 458 static unsigned
 459 re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs,
 460               int regs_allocated)
 461 {
 462   int rval = REGS_REALLOCATE;
 463   Idx i;
 464   Idx need_regs = nregs + 1;
 465   /* We need one extra element beyond 'num_regs' for the '-1' marker GNU code
 466      uses.  */
 467
 468   /* Have the register data arrays been allocated?  */
 469   if (regs_allocated == REGS_UNALLOCATED)
 470     { /* No.  So allocate them with malloc.  */
 471       regs->start = re_malloc (regoff_t, need_regs);
 472       if (__glibc_unlikely (regs->start == NULL))
 473         return REGS_UNALLOCATED;
 474       regs->end = re_malloc (regoff_t, need_regs);
 475       if (__glibc_unlikely (regs->end == NULL))
 476         {
 477           re_free (regs->start);
 478           return REGS_UNALLOCATED;
 479         }
 480       regs->num_regs = need_regs;
 481     }
 482   else if (regs_allocated == REGS_REALLOCATE)
 483     { /* Yes.  If we need more elements than were already
 484          allocated, reallocate them.  If we need fewer, just
 485          leave it alone.  */
 486       if (__glibc_unlikely (need_regs > regs->num_regs))
 487         {
 488           regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
 489           regoff_t *new_end;
 490           if (__glibc_unlikely (new_start == NULL))
 491             return REGS_UNALLOCATED;
 492           new_end = re_realloc (regs->end, regoff_t, need_regs);
 493           if (__glibc_unlikely (new_end == NULL))
 494             {
 495               re_free (new_start);
 496               return REGS_UNALLOCATED;
 497             }
 498           regs->start = new_start;
 499           regs->end = new_end;
 500           regs->num_regs = need_regs;
 501         }
 502     }
 503   else
 504     {
 505       assert (regs_allocated == REGS_FIXED);
 506       /* This function may not be called with REGS_FIXED and nregs too big.  */
 507       assert (regs->num_regs >= nregs);
 508       rval = REGS_FIXED;
 509     }
 510
 511   /* Copy the regs.  */
 512   for (i = 0; i < nregs; ++i)
 513     {
 514       regs->start[i] = pmatch[i].rm_so;
 515       regs->end[i] = pmatch[i].rm_eo;
 516     }
 517   for ( ; i < regs->num_regs; ++i)
 518     regs->start[i] = regs->end[i] = -1;
 519
 520   return rval;
 521 }
 522
 523 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
 524    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
 525    this memory for recording register information.  STARTS and ENDS
 526    must be allocated using the malloc library routine, and must each
 527    be at least NUM_REGS * sizeof (regoff_t) bytes long.
 528
 529    If NUM_REGS == 0, then subsequent matches should allocate their own
 530    register data.
 531
 532    Unless this function is called, the first search or match using
 533    PATTERN_BUFFER will allocate its own register data, without
 534    freeing the old data.  */
 535
 536 void
 537 re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs,
 538                   __re_size_t num_regs, regoff_t *starts, regoff_t *ends)
 539 {
 540   if (num_regs)
 541     {
 542       bufp->regs_allocated = REGS_REALLOCATE;
 543       regs->num_regs = num_regs;
 544       regs->start = starts;
 545       regs->end = ends;
 546     }
 547   else
 548     {
 549       bufp->regs_allocated = REGS_UNALLOCATED;
 550       regs->num_regs = 0;
 551       regs->start = regs->end = NULL;
 552     }
 553 }
 554 #ifdef _LIBC
 555 weak_alias (__re_set_registers, re_set_registers)
 556 #endif
 557 \f
 558 /* Entry points compatible with 4.2 BSD regex library.  We don't define
 559    them unless specifically requested.  */
 560
 561 #if defined _REGEX_RE_COMP || defined _LIBC
 562 int
 563 # ifdef _LIBC
 564 weak_function
 565 # endif
 566 re_exec (const char *s)
 567 {
 568   return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
 569 }
 570 #endif /* _REGEX_RE_COMP */
 571 \f
 572 /* Internal entry point.  */
 573
 574 /* Searches for a compiled pattern PREG in the string STRING, whose
 575    length is LENGTH.  NMATCH, PMATCH, and EFLAGS have the same
 576    meaning as with regexec.  LAST_START is START + RANGE, where
 577    START and RANGE have the same meaning as with re_search.
 578    Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
 579    otherwise return the error code.
 580    Note: We assume front end functions already check ranges.
 581    (0 <= LAST_START && LAST_START <= LENGTH)  */
 582
 583 static reg_errcode_t
 584 __attribute_warn_unused_result__
 585 re_search_internal (const regex_t *preg, const char *string, Idx length,
 586                     Idx start, Idx last_start, Idx stop, size_t nmatch,
 587                     regmatch_t pmatch[], int eflags)
 588 {
 589   reg_errcode_t err;
 590   const re_dfa_t *dfa = preg->buffer;
 591   Idx left_lim, right_lim;
 592   int incr;
 593   bool fl_longest_match;
 594   int match_kind;
 595   Idx match_first;
 596   Idx match_last = -1;
 597   Idx extra_nmatch;
 598   bool sb;
 599   int ch;
 600 #if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
 601   re_match_context_t mctx = { .dfa = dfa };
 602 #else
 603   re_match_context_t mctx;
 604 #endif
 605   char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate
 606                     && start != last_start && !preg->can_be_null)
 607                    ? preg->fastmap : NULL);
 608   RE_TRANSLATE_TYPE t = preg->translate;
 609
 610 #if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
 611   memset (&mctx, '\0', sizeof (re_match_context_t));
 612   mctx.dfa = dfa;
 613 #endif
 614
 615   extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
 616   nmatch -= extra_nmatch;
 617
 618   /* Check if the DFA haven't been compiled.  */
 619   if (__glibc_unlikely (preg->used == 0 || dfa->init_state == NULL
 620                         || dfa->init_state_word == NULL
 621                         || dfa->init_state_nl == NULL
 622                         || dfa->init_state_begbuf == NULL))
 623     return REG_NOMATCH;
 624
 625 #ifdef DEBUG
 626   /* We assume front-end functions already check them.  */
 627   assert (0 <= last_start && last_start <= length);
 628 #endif
 629
 630   /* If initial states with non-begbuf contexts have no elements,
 631      the regex must be anchored.  If preg->newline_anchor is set,
 632      we'll never use init_state_nl, so do not check it.  */
 633   if (dfa->init_state->nodes.nelem == 0
 634       && dfa->init_state_word->nodes.nelem == 0
 635       && (dfa->init_state_nl->nodes.nelem == 0
 636           || !preg->newline_anchor))
 637     {
 638       if (start != 0 && last_start != 0)
 639         return REG_NOMATCH;
 640       start = last_start = 0;
 641     }
 642
 643   /* We must check the longest matching, if nmatch > 0.  */
 644   fl_longest_match = (nmatch != 0 || dfa->nbackref);
 645
 646   err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
 647                             preg->translate, (preg->syntax & RE_ICASE) != 0,
 648                             dfa);
 649   if (__glibc_unlikely (err != REG_NOERROR))
 650     goto free_return;
 651   mctx.input.stop = stop;
 652   mctx.input.raw_stop = stop;
 653   mctx.input.newline_anchor = preg->newline_anchor;
 654
 655   err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
 656   if (__glibc_unlikely (err != REG_NOERROR))
 657     goto free_return;
 658
 659   /* We will log all the DFA states through which the dfa pass,
 660      if nmatch > 1, or this dfa has "multibyte node", which is a
 661      back-reference or a node which can accept multibyte character or
 662      multi character collating element.  */
 663   if (nmatch > 1 || dfa->has_mb_node)
 664     {
 665       /* Avoid overflow.  */
 666       if (__glibc_unlikely ((MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *))
 667                              <= mctx.input.bufs_len)))
 668         {
 669           err = REG_ESPACE;
 670           goto free_return;
 671         }
 672
 673       mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
 674       if (__glibc_unlikely (mctx.state_log == NULL))
 675         {
 676           err = REG_ESPACE;
 677           goto free_return;
 678         }
 679     }
 680   else
 681     mctx.state_log = NULL;
 682
 683   match_first = start;
 684   mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
 685                            : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
 686
 687   /* Check incrementally whether the input string matches.  */
 688   incr = (last_start < start) ? -1 : 1;
 689   left_lim = (last_start < start) ? last_start : start;
 690   right_lim = (last_start < start) ? start : last_start;
 691   sb = dfa->mb_cur_max == 1;
 692   match_kind =
 693     (fastmap
 694      ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)
 695         | (start <= last_start ? 2 : 0)
 696         | (t != NULL ? 1 : 0))
 697      : 8);
 698
 699   for (;; match_first += incr)
 700     {
 701       err = REG_NOMATCH;
 702       if (match_first < left_lim || right_lim < match_first)
 703         goto free_return;
 704
 705       /* Advance as rapidly as possible through the string, until we
 706          find a plausible place to start matching.  This may be done
 707          with varying efficiency, so there are various possibilities:
 708          only the most common of them are specialized, in order to
 709          save on code size.  We use a switch statement for speed.  */
 710       switch (match_kind)
 711         {
 712         case 8:
 713           /* No fastmap.  */
 714           break;
 715
 716         case 7:
 717           /* Fastmap with single-byte translation, match forward.  */
 718           while (__glibc_likely (match_first < right_lim)
 719                  && !fastmap[t[(unsigned char) string[match_first]]])
 720             ++match_first;
 721           goto forward_match_found_start_or_reached_end;
 722
 723         case 6:
 724           /* Fastmap without translation, match forward.  */
 725           while (__glibc_likely (match_first < right_lim)
 726                  && !fastmap[(unsigned char) string[match_first]])
 727             ++match_first;
 728
 729         forward_match_found_start_or_reached_end:
 730           if (__glibc_unlikely (match_first == right_lim))
 731             {
 732               ch = match_first >= length
 733                        ? 0 : (unsigned char) string[match_first];
 734               if (!fastmap[t ? t[ch] : ch])
 735                 goto free_return;
 736             }
 737           break;
 738
 739         case 4:
 740         case 5:
 741           /* Fastmap without multi-byte translation, match backwards.  */
 742           while (match_first >= left_lim)
 743             {
 744               ch = match_first >= length
 745                        ? 0 : (unsigned char) string[match_first];
 746               if (fastmap[t ? t[ch] : ch])
 747                 break;
 748               --match_first;
 749             }
 750           if (match_first < left_lim)
 751             goto free_return;
 752           break;
 753
 754         default:
 755           /* In this case, we can't determine easily the current byte,
 756              since it might be a component byte of a multibyte
 757              character.  Then we use the constructed buffer instead.  */
 758           for (;;)
 759             {
 760               /* If MATCH_FIRST is out of the valid range, reconstruct the
 761                  buffers.  */
 762               __re_size_t offset = match_first - mctx.input.raw_mbs_idx;
 763               if (__glibc_unlikely (offset
 764                                     >= (__re_size_t) mctx.input.valid_raw_len))
 765                 {
 766                   err = re_string_reconstruct (&mctx.input, match_first,
 767                                                eflags);
 768                   if (__glibc_unlikely (err != REG_NOERROR))
 769                     goto free_return;
 770
 771                   offset = match_first - mctx.input.raw_mbs_idx;
 772                 }
 773               /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
 774                  Note that MATCH_FIRST must not be smaller than 0.  */
 775               ch = (match_first >= length
 776                     ? 0 : re_string_byte_at (&mctx.input, offset));
 777               if (fastmap[ch])
 778                 break;
 779               match_first += incr;
 780               if (match_first < left_lim || match_first > right_lim)
 781                 {
 782                   err = REG_NOMATCH;
 783                   goto free_return;
 784                 }
 785             }
 786           break;
 787         }
 788
 789       /* Reconstruct the buffers so that the matcher can assume that
 790          the matching starts from the beginning of the buffer.  */
 791       err = re_string_reconstruct (&mctx.input, match_first, eflags);
 792       if (__glibc_unlikely (err != REG_NOERROR))
 793         goto free_return;
 794
 795 #ifdef RE_ENABLE_I18N
 796      /* Don't consider this char as a possible match start if it part,
 797         yet isn't the head, of a multibyte character.  */
 798       if (!sb && !re_string_first_byte (&mctx.input, 0))
 799         continue;
 800 #endif
 801
 802       /* It seems to be appropriate one, then use the matcher.  */
 803       /* We assume that the matching starts from 0.  */
 804       mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
 805       match_last = check_matching (&mctx, fl_longest_match,
 806                                    start <= last_start ? &match_first : NULL);
 807       if (match_last != -1)
 808         {
 809           if (__glibc_unlikely (match_last == -2))
 810             {
 811               err = REG_ESPACE;
 812               goto free_return;
 813             }
 814           else
 815             {
 816               mctx.match_last = match_last;
 817               if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
 818                 {
 819                   re_dfastate_t *pstate = mctx.state_log[match_last];
 820                   mctx.last_node = check_halt_state_context (&mctx, pstate,
 821                                                              match_last);
 822                 }
 823               if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
 824                   || dfa->nbackref)
 825                 {
 826                   err = prune_impossible_nodes (&mctx);
 827                   if (err == REG_NOERROR)
 828                     break;
 829                   if (__glibc_unlikely (err != REG_NOMATCH))
 830                     goto free_return;
 831                   match_last = -1;
 832                 }
 833               else
 834                 break; /* We found a match.  */
 835             }
 836         }
 837
 838       match_ctx_clean (&mctx);
 839     }
 840
 841 #ifdef DEBUG
 842   assert (match_last != -1);
 843   assert (err == REG_NOERROR);
 844 #endif
 845
 846   /* Set pmatch[] if we need.  */
 847   if (nmatch > 0)
 848     {
 849       Idx reg_idx;
 850
 851       /* Initialize registers.  */
 852       for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
 853         pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
 854
 855       /* Set the points where matching start/end.  */
 856       pmatch[0].rm_so = 0;
 857       pmatch[0].rm_eo = mctx.match_last;
 858       /* FIXME: This function should fail if mctx.match_last exceeds
 859          the maximum possible regoff_t value.  We need a new error
 860          code REG_OVERFLOW.  */
 861
 862       if (!preg->no_sub && nmatch > 1)
 863         {
 864           err = set_regs (preg, &mctx, nmatch, pmatch,
 865                           dfa->has_plural_match && dfa->nbackref > 0);
 866           if (__glibc_unlikely (err != REG_NOERROR))
 867             goto free_return;
 868         }
 869
 870       /* At last, add the offset to each register, since we slid
 871          the buffers so that we could assume that the matching starts
 872          from 0.  */
 873       for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
 874         if (pmatch[reg_idx].rm_so != -1)
 875           {
 876 #ifdef RE_ENABLE_I18N
 877             if (__glibc_unlikely (mctx.input.offsets_needed != 0))
 878               {
 879                 pmatch[reg_idx].rm_so =
 880                   (pmatch[reg_idx].rm_so == mctx.input.valid_len
 881                    ? mctx.input.valid_raw_len
 882                    : mctx.input.offsets[pmatch[reg_idx].rm_so]);
 883                 pmatch[reg_idx].rm_eo =
 884                   (pmatch[reg_idx].rm_eo == mctx.input.valid_len
 885                    ? mctx.input.valid_raw_len
 886                    : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
 887               }
 888 #else
 889             assert (mctx.input.offsets_needed == 0);
 890 #endif
 891             pmatch[reg_idx].rm_so += match_first;
 892             pmatch[reg_idx].rm_eo += match_first;
 893           }
 894       for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
 895         {
 896           pmatch[nmatch + reg_idx].rm_so = -1;
 897           pmatch[nmatch + reg_idx].rm_eo = -1;
 898         }
 899
 900       if (dfa->subexp_map)
 901         for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
 902           if (dfa->subexp_map[reg_idx] != reg_idx)
 903             {
 904               pmatch[reg_idx + 1].rm_so
 905                 = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
 906               pmatch[reg_idx + 1].rm_eo
 907                 = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
 908             }
 909     }
 910
 911  free_return:
 912   re_free (mctx.state_log);
 913   if (dfa->nbackref)
 914     match_ctx_free (&mctx);
 915   re_string_destruct (&mctx.input);
 916   return err;
 917 }
 918
 919 static reg_errcode_t
 920 __attribute_warn_unused_result__
 921 prune_impossible_nodes (re_match_context_t *mctx)
 922 {
 923   const re_dfa_t *const dfa = mctx->dfa;
 924   Idx halt_node, match_last;
 925   reg_errcode_t ret;
 926   re_dfastate_t **sifted_states;
 927   re_dfastate_t **lim_states = NULL;
 928   re_sift_context_t sctx;
 929 #ifdef DEBUG
 930   assert (mctx->state_log != NULL);
 931 #endif
 932   match_last = mctx->match_last;
 933   halt_node = mctx->last_node;
 934
 935   /* Avoid overflow.  */
 936   if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *))
 937                         <= match_last))
 938     return REG_ESPACE;
 939
 940   sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
 941   if (__glibc_unlikely (sifted_states == NULL))
 942     {
 943       ret = REG_ESPACE;
 944       goto free_return;
 945     }
 946   if (dfa->nbackref)
 947     {
 948       lim_states = re_malloc (re_dfastate_t *, match_last + 1);
 949       if (__glibc_unlikely (lim_states == NULL))
 950         {
 951           ret = REG_ESPACE;
 952           goto free_return;
 953         }
 954       while (1)
 955         {
 956           memset (lim_states, '\0',
 957                   sizeof (re_dfastate_t *) * (match_last + 1));
 958           sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
 959                          match_last);
 960           ret = sift_states_backward (mctx, &sctx);
 961           re_node_set_free (&sctx.limits);
 962           if (__glibc_unlikely (ret != REG_NOERROR))
 963               goto free_return;
 964           if (sifted_states[0] != NULL || lim_states[0] != NULL)
 965             break;
 966           do
 967             {
 968               --match_last;
 969               if (match_last < 0)
 970                 {
 971                   ret = REG_NOMATCH;
 972                   goto free_return;
 973                 }
 974             } while (mctx->state_log[match_last] == NULL
 975                      || !mctx->state_log[match_last]->halt);
 976           halt_node = check_halt_state_context (mctx,
 977                                                 mctx->state_log[match_last],
 978                                                 match_last);
 979         }
 980       ret = merge_state_array (dfa, sifted_states, lim_states,
 981                                match_last + 1);
 982       re_free (lim_states);
 983       lim_states = NULL;
 984       if (__glibc_unlikely (ret != REG_NOERROR))
 985         goto free_return;
 986     }
 987   else
 988     {
 989       sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
 990       ret = sift_states_backward (mctx, &sctx);
 991       re_node_set_free (&sctx.limits);
 992       if (__glibc_unlikely (ret != REG_NOERROR))
 993         goto free_return;
 994       if (sifted_states[0] == NULL)
 995         {
 996           ret = REG_NOMATCH;
 997           goto free_return;
 998         }
 999     }
1000   re_free (mctx->state_log);
1001   mctx->state_log = sifted_states;
1002   sifted_states = NULL;
1003   mctx->last_node = halt_node;
1004   mctx->match_last = match_last;
1005   ret = REG_NOERROR;
1006  free_return:
1007   re_free (sifted_states);
1008   re_free (lim_states);
1009   return ret;
1010 }
1011
1012 /* Acquire an initial state and return it.
1013    We must select appropriate initial state depending on the context,
1014    since initial states may have constraints like "\<", "^", etc..  */
1015
1016 static inline re_dfastate_t *
1017 __attribute__ ((always_inline))
1018 acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
1019                             Idx idx)
1020 {
1021   const re_dfa_t *const dfa = mctx->dfa;
1022   if (dfa->init_state->has_constraint)
1023     {
1024       unsigned int context;
1025       context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
1026       if (IS_WORD_CONTEXT (context))
1027         return dfa->init_state_word;
1028       else if (IS_ORDINARY_CONTEXT (context))
1029         return dfa->init_state;
1030       else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
1031         return dfa->init_state_begbuf;
1032       else if (IS_NEWLINE_CONTEXT (context))
1033         return dfa->init_state_nl;
1034       else if (IS_BEGBUF_CONTEXT (context))
1035         {
1036           /* It is relatively rare case, then calculate on demand.  */
1037           return re_acquire_state_context (err, dfa,
1038                                            dfa->init_state->entrance_nodes,
1039                                            context);
1040         }
1041       else
1042         /* Must not happen?  */
1043         return dfa->init_state;
1044     }
1045   else
1046     return dfa->init_state;
1047 }
1048
1049 /* Check whether the regular expression match input string INPUT or not,
1050    and return the index where the matching end.  Return -1 if
1051    there is no match, and return -2 in case of an error.
1052    FL_LONGEST_MATCH means we want the POSIX longest matching.
1053    If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
1054    next place where we may want to try matching.
1055    Note that the matcher assumes that the matching starts from the current
1056    index of the buffer.  */
1057
1058 static Idx
1059 __attribute_warn_unused_result__
1060 check_matching (re_match_context_t *mctx, bool fl_longest_match,
1061                 Idx *p_match_first)
1062 {
1063   const re_dfa_t *const dfa = mctx->dfa;
1064   reg_errcode_t err;
1065   Idx match = 0;
1066   Idx match_last = -1;
1067   Idx cur_str_idx = re_string_cur_idx (&mctx->input);
1068   re_dfastate_t *cur_state;
1069   bool at_init_state = p_match_first != NULL;
1070   Idx next_start_idx = cur_str_idx;
1071
1072   err = REG_NOERROR;
1073   cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
1074   /* An initial state must not be NULL (invalid).  */
1075   if (__glibc_unlikely (cur_state == NULL))
1076     {
1077       assert (err == REG_ESPACE);
1078       return -2;
1079     }
1080
1081   if (mctx->state_log != NULL)
1082     {
1083       mctx->state_log[cur_str_idx] = cur_state;
1084
1085       /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
1086          later.  E.g. Processing back references.  */
1087       if (__glibc_unlikely (dfa->nbackref))
1088         {
1089           at_init_state = false;
1090           err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
1091           if (__glibc_unlikely (err != REG_NOERROR))
1092             return err;
1093
1094           if (cur_state->has_backref)
1095             {
1096               err = transit_state_bkref (mctx, &cur_state->nodes);
1097               if (__glibc_unlikely (err != REG_NOERROR))
1098                 return err;
1099             }
1100         }
1101     }
1102
1103   /* If the RE accepts NULL string.  */
1104   if (__glibc_unlikely (cur_state->halt))
1105     {
1106       if (!cur_state->has_constraint
1107           || check_halt_state_context (mctx, cur_state, cur_str_idx))
1108         {
1109           if (!fl_longest_match)
1110             return cur_str_idx;
1111           else
1112             {
1113               match_last = cur_str_idx;
1114               match = 1;
1115             }
1116         }
1117     }
1118
1119   while (!re_string_eoi (&mctx->input))
1120     {
1121       re_dfastate_t *old_state = cur_state;
1122       Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1;
1123
1124       if ((__glibc_unlikely (next_char_idx >= mctx->input.bufs_len)
1125            && mctx->input.bufs_len < mctx->input.len)
1126           || (__glibc_unlikely (next_char_idx >= mctx->input.valid_len)
1127               && mctx->input.valid_len < mctx->input.len))
1128         {
1129           err = extend_buffers (mctx, next_char_idx + 1);
1130           if (__glibc_unlikely (err != REG_NOERROR))
1131             {
1132               assert (err == REG_ESPACE);
1133               return -2;
1134             }
1135         }
1136
1137       cur_state = transit_state (&err, mctx, cur_state);
1138       if (mctx->state_log != NULL)
1139         cur_state = merge_state_with_log (&err, mctx, cur_state);
1140
1141       if (cur_state == NULL)
1142         {
1143           /* Reached the invalid state or an error.  Try to recover a valid
1144              state using the state log, if available and if we have not
1145              already found a valid (even if not the longest) match.  */
1146           if (__glibc_unlikely (err != REG_NOERROR))
1147             return -2;
1148
1149           if (mctx->state_log == NULL
1150               || (match && !fl_longest_match)
1151               || (cur_state = find_recover_state (&err, mctx)) == NULL)
1152             break;
1153         }
1154
1155       if (__glibc_unlikely (at_init_state))
1156         {
1157           if (old_state == cur_state)
1158             next_start_idx = next_char_idx;
1159           else
1160             at_init_state = false;
1161         }
1162
1163       if (cur_state->halt)
1164         {
1165           /* Reached a halt state.
1166              Check the halt state can satisfy the current context.  */
1167           if (!cur_state->has_constraint
1168               || check_halt_state_context (mctx, cur_state,
1169                                            re_string_cur_idx (&mctx->input)))
1170             {
1171               /* We found an appropriate halt state.  */
1172               match_last = re_string_cur_idx (&mctx->input);
1173               match = 1;
1174
1175               /* We found a match, do not modify match_first below.  */
1176               p_match_first = NULL;
1177               if (!fl_longest_match)
1178                 break;
1179             }
1180         }
1181     }
1182
1183   if (p_match_first)
1184     *p_match_first += next_start_idx;
1185
1186   return match_last;
1187 }
1188
1189 /* Check NODE match the current context.  */
1190
1191 static bool
1192 check_halt_node_context (const re_dfa_t *dfa, Idx node, unsigned int context)
1193 {
1194   re_token_type_t type = dfa->nodes[node].type;
1195   unsigned int constraint = dfa->nodes[node].constraint;
1196   if (type != END_OF_RE)
1197     return false;
1198   if (!constraint)
1199     return true;
1200   if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
1201     return false;
1202   return true;
1203 }
1204
1205 /* Check the halt state STATE match the current context.
1206    Return 0 if not match, if the node, STATE has, is a halt node and
1207    match the context, return the node.  */
1208
1209 static Idx
1210 check_halt_state_context (const re_match_context_t *mctx,
1211                           const re_dfastate_t *state, Idx idx)
1212 {
1213   Idx i;
1214   unsigned int context;
1215 #ifdef DEBUG
1216   assert (state->halt);
1217 #endif
1218   context = re_string_context_at (&mctx->input, idx, mctx->eflags);
1219   for (i = 0; i < state->nodes.nelem; ++i)
1220     if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
1221       return state->nodes.elems[i];
1222   return 0;
1223 }
1224
1225 /* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
1226    corresponding to the DFA).
1227    Return the destination node, and update EPS_VIA_NODES;
1228    return -1 in case of errors.  */
1229
1230 static Idx
1231 proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
1232                    Idx *pidx, Idx node, re_node_set *eps_via_nodes,
1233                    struct re_fail_stack_t *fs)
1234 {
1235   const re_dfa_t *const dfa = mctx->dfa;
1236   Idx i;
1237   bool ok;
1238   if (IS_EPSILON_NODE (dfa->nodes[node].type))
1239     {
1240       re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
1241       re_node_set *edests = &dfa->edests[node];
1242       Idx dest_node;
1243       ok = re_node_set_insert (eps_via_nodes, node);
1244       if (__glibc_unlikely (! ok))
1245         return -2;
1246       /* Pick up a valid destination, or return -1 if none
1247          is found.  */
1248       for (dest_node = -1, i = 0; i < edests->nelem; ++i)
1249         {
1250           Idx candidate = edests->elems[i];
1251           if (!re_node_set_contains (cur_nodes, candidate))
1252             continue;
1253           if (dest_node == -1)
1254             dest_node = candidate;
1255
1256           else
1257             {
1258               /* In order to avoid infinite loop like "(a*)*", return the second
1259                  epsilon-transition if the first was already considered.  */
1260               if (re_node_set_contains (eps_via_nodes, dest_node))
1261                 return candidate;
1262
1263               /* Otherwise, push the second epsilon-transition on the fail stack.  */
1264               else if (fs != NULL
1265                        && push_fail_stack (fs, *pidx, candidate, nregs, regs,
1266                                            eps_via_nodes))
1267                 return -2;
1268
1269               /* We know we are going to exit.  */
1270               break;
1271             }
1272         }
1273       return dest_node;
1274     }
1275   else
1276     {
1277       Idx naccepted = 0;
1278       re_token_type_t type = dfa->nodes[node].type;
1279
1280 #ifdef RE_ENABLE_I18N
1281       if (dfa->nodes[node].accept_mb)
1282         naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
1283       else
1284 #endif /* RE_ENABLE_I18N */
1285       if (type == OP_BACK_REF)
1286         {
1287           Idx subexp_idx = dfa->nodes[node].opr.idx + 1;
1288           naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
1289           if (fs != NULL)
1290             {
1291               if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
1292                 return -1;
1293               else if (naccepted)
1294                 {
1295                   char *buf = (char *) re_string_get_buffer (&mctx->input);
1296                   if (mctx->input.valid_len - *pidx < naccepted
1297                       || (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1298                                   naccepted)
1299                           != 0))
1300                     return -1;
1301                 }
1302             }
1303
1304           if (naccepted == 0)
1305             {
1306               Idx dest_node;
1307               ok = re_node_set_insert (eps_via_nodes, node);
1308               if (__glibc_unlikely (! ok))
1309                 return -2;
1310               dest_node = dfa->edests[node].elems[0];
1311               if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1312                                         dest_node))
1313                 return dest_node;
1314             }
1315         }
1316
1317       if (naccepted != 0
1318           || check_node_accept (mctx, dfa->nodes + node, *pidx))
1319         {
1320           Idx dest_node = dfa->nexts[node];
1321           *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
1322           if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
1323                      || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1324                                                dest_node)))
1325             return -1;
1326           re_node_set_empty (eps_via_nodes);
1327           return dest_node;
1328         }
1329     }
1330   return -1;
1331 }
1332
1333 static reg_errcode_t
1334 __attribute_warn_unused_result__
1335 push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node,
1336                  Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
1337 {
1338   reg_errcode_t err;
1339   Idx num = fs->num++;
1340   if (fs->num == fs->alloc)
1341     {
1342       struct re_fail_stack_ent_t *new_array;
1343       new_array = re_realloc (fs->stack, struct re_fail_stack_ent_t,
1344                               fs->alloc * 2);
1345       if (new_array == NULL)
1346         return REG_ESPACE;
1347       fs->alloc *= 2;
1348       fs->stack = new_array;
1349     }
1350   fs->stack[num].idx = str_idx;
1351   fs->stack[num].node = dest_node;
1352   fs->stack[num].regs = re_malloc (regmatch_t, nregs);
1353   if (fs->stack[num].regs == NULL)
1354     return REG_ESPACE;
1355   memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
1356   err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
1357   return err;
1358 }
1359
1360 static Idx
1361 pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs,
1362                 regmatch_t *regs, re_node_set *eps_via_nodes)
1363 {
1364   Idx num = --fs->num;
1365   assert (num >= 0);
1366   *pidx = fs->stack[num].idx;
1367   memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
1368   re_node_set_free (eps_via_nodes);
1369   re_free (fs->stack[num].regs);
1370   *eps_via_nodes = fs->stack[num].eps_via_nodes;
1371   return fs->stack[num].node;
1372 }
1373
1374 /* Set the positions where the subexpressions are starts/ends to registers
1375    PMATCH.
1376    Note: We assume that pmatch[0] is already set, and
1377    pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch.  */
1378
1379 static reg_errcode_t
1380 __attribute_warn_unused_result__
1381 set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
1382           regmatch_t *pmatch, bool fl_backtrack)
1383 {
1384   const re_dfa_t *dfa = preg->buffer;
1385   Idx idx, cur_node;
1386   re_node_set eps_via_nodes;
1387   struct re_fail_stack_t *fs;
1388   struct re_fail_stack_t fs_body = { 0, 2, NULL };
1389   regmatch_t *prev_idx_match;
1390   bool prev_idx_match_malloced = false;
1391
1392 #ifdef DEBUG
1393   assert (nmatch > 1);
1394   assert (mctx->state_log != NULL);
1395 #endif
1396   if (fl_backtrack)
1397     {
1398       fs = &fs_body;
1399       fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
1400       if (fs->stack == NULL)
1401         return REG_ESPACE;
1402     }
1403   else
1404     fs = NULL;
1405
1406   cur_node = dfa->init_node;
1407   re_node_set_init_empty (&eps_via_nodes);
1408
1409   if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
1410     prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));
1411   else
1412     {
1413       prev_idx_match = re_malloc (regmatch_t, nmatch);
1414       if (prev_idx_match == NULL)
1415         {
1416           free_fail_stack_return (fs);
1417           return REG_ESPACE;
1418         }
1419       prev_idx_match_malloced = true;
1420     }
1421   memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1422
1423   for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
1424     {
1425       update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
1426
1427       if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
1428         {
1429           Idx reg_idx;
1430           if (fs)
1431             {
1432               for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
1433                 if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
1434                   break;
1435               if (reg_idx == nmatch)
1436                 {
1437                   re_node_set_free (&eps_via_nodes);
1438                   if (prev_idx_match_malloced)
1439                     re_free (prev_idx_match);
1440                   return free_fail_stack_return (fs);
1441                 }
1442               cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1443                                          &eps_via_nodes);
1444             }
1445           else
1446             {
1447               re_node_set_free (&eps_via_nodes);
1448               if (prev_idx_match_malloced)
1449                 re_free (prev_idx_match);
1450               return REG_NOERROR;
1451             }
1452         }
1453
1454       /* Proceed to next node.  */
1455       cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
1456                                     &eps_via_nodes, fs);
1457
1458       if (__glibc_unlikely (cur_node < 0))
1459         {
1460           if (__glibc_unlikely (cur_node == -2))
1461             {
1462               re_node_set_free (&eps_via_nodes);
1463               if (prev_idx_match_malloced)
1464                 re_free (prev_idx_match);
1465               free_fail_stack_return (fs);
1466               return REG_ESPACE;
1467             }
1468           if (fs)
1469             cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1470                                        &eps_via_nodes);
1471           else
1472             {
1473               re_node_set_free (&eps_via_nodes);
1474               if (prev_idx_match_malloced)
1475                 re_free (prev_idx_match);
1476               return REG_NOMATCH;
1477             }
1478         }
1479     }
1480   re_node_set_free (&eps_via_nodes);
1481   if (prev_idx_match_malloced)
1482     re_free (prev_idx_match);
1483   return free_fail_stack_return (fs);
1484 }
1485
1486 static reg_errcode_t
1487 free_fail_stack_return (struct re_fail_stack_t *fs)
1488 {
1489   if (fs)
1490     {
1491       Idx fs_idx;
1492       for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
1493         {
1494           re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
1495           re_free (fs->stack[fs_idx].regs);
1496         }
1497       re_free (fs->stack);
1498     }
1499   return REG_NOERROR;
1500 }
1501
1502 static void
1503 update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
1504              regmatch_t *prev_idx_match, Idx cur_node, Idx cur_idx, Idx nmatch)
1505 {
1506   int type = dfa->nodes[cur_node].type;
1507   if (type == OP_OPEN_SUBEXP)
1508     {
1509       Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
1510
1511       /* We are at the first node of this sub expression.  */
1512       if (reg_num < nmatch)
1513         {
1514           pmatch[reg_num].rm_so = cur_idx;
1515           pmatch[reg_num].rm_eo = -1;
1516         }
1517     }
1518   else if (type == OP_CLOSE_SUBEXP)
1519     {
1520       Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
1521       if (reg_num < nmatch)
1522         {
1523           /* We are at the last node of this sub expression.  */
1524           if (pmatch[reg_num].rm_so < cur_idx)
1525             {
1526               pmatch[reg_num].rm_eo = cur_idx;
1527               /* This is a non-empty match or we are not inside an optional
1528                  subexpression.  Accept this right away.  */
1529               memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1530             }
1531           else
1532             {
1533               if (dfa->nodes[cur_node].opt_subexp
1534                   && prev_idx_match[reg_num].rm_so != -1)
1535                 /* We transited through an empty match for an optional
1536                    subexpression, like (a?)*, and this is not the subexp's
1537                    first match.  Copy back the old content of the registers
1538                    so that matches of an inner subexpression are undone as
1539                    well, like in ((a?))*.  */
1540                 memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
1541               else
1542                 /* We completed a subexpression, but it may be part of
1543                    an optional one, so do not update PREV_IDX_MATCH.  */
1544                 pmatch[reg_num].rm_eo = cur_idx;
1545             }
1546         }
1547     }
1548 }
1549
1550 /* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
1551    and sift the nodes in each states according to the following rules.
1552    Updated state_log will be wrote to STATE_LOG.
1553
1554    Rules: We throw away the Node 'a' in the STATE_LOG[STR_IDX] if...
1555      1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1556         If 'a' isn't the LAST_NODE and 'a' can't epsilon transit to
1557         the LAST_NODE, we throw away the node 'a'.
1558      2. When 0 <= STR_IDX < MATCH_LAST and 'a' accepts
1559         string 's' and transit to 'b':
1560         i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1561            away the node 'a'.
1562         ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1563             thrown away, we throw away the node 'a'.
1564      3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
1565         i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1566            node 'a'.
1567         ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
1568             we throw away the node 'a'.  */
1569
1570 #define STATE_NODE_CONTAINS(state,node) \
1571   ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1572
1573 static reg_errcode_t
1574 sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
1575 {
1576   reg_errcode_t err;
1577   int null_cnt = 0;
1578   Idx str_idx = sctx->last_str_idx;
1579   re_node_set cur_dest;
1580
1581 #ifdef DEBUG
1582   assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
1583 #endif
1584
1585   /* Build sifted state_log[str_idx].  It has the nodes which can epsilon
1586      transit to the last_node and the last_node itself.  */
1587   err = re_node_set_init_1 (&cur_dest, sctx->last_node);
1588   if (__glibc_unlikely (err != REG_NOERROR))
1589     return err;
1590   err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1591   if (__glibc_unlikely (err != REG_NOERROR))
1592     goto free_return;
1593
1594   /* Then check each states in the state_log.  */
1595   while (str_idx > 0)
1596     {
1597       /* Update counters.  */
1598       null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
1599       if (null_cnt > mctx->max_mb_elem_len)
1600         {
1601           memset (sctx->sifted_states, '\0',
1602                   sizeof (re_dfastate_t *) * str_idx);
1603           re_node_set_free (&cur_dest);
1604           return REG_NOERROR;
1605         }
1606       re_node_set_empty (&cur_dest);
1607       --str_idx;
1608
1609       if (mctx->state_log[str_idx])
1610         {
1611           err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
1612           if (__glibc_unlikely (err != REG_NOERROR))
1613             goto free_return;
1614         }
1615
1616       /* Add all the nodes which satisfy the following conditions:
1617          - It can epsilon transit to a node in CUR_DEST.
1618          - It is in CUR_SRC.
1619          And update state_log.  */
1620       err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1621       if (__glibc_unlikely (err != REG_NOERROR))
1622         goto free_return;
1623     }
1624   err = REG_NOERROR;
1625  free_return:
1626   re_node_set_free (&cur_dest);
1627   return err;
1628 }
1629
1630 static reg_errcode_t
1631 __attribute_warn_unused_result__
1632 build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
1633                      Idx str_idx, re_node_set *cur_dest)
1634 {
1635   const re_dfa_t *const dfa = mctx->dfa;
1636   const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
1637   Idx i;
1638
1639   /* Then build the next sifted state.
1640      We build the next sifted state on 'cur_dest', and update
1641      'sifted_states[str_idx]' with 'cur_dest'.
1642      Note:
1643      'cur_dest' is the sifted state from 'state_log[str_idx + 1]'.
1644      'cur_src' points the node_set of the old 'state_log[str_idx]'
1645      (with the epsilon nodes pre-filtered out).  */
1646   for (i = 0; i < cur_src->nelem; i++)
1647     {
1648       Idx prev_node = cur_src->elems[i];
1649       int naccepted = 0;
1650       bool ok;
1651
1652 #ifdef DEBUG
1653       re_token_type_t type = dfa->nodes[prev_node].type;
1654       assert (!IS_EPSILON_NODE (type));
1655 #endif
1656 #ifdef RE_ENABLE_I18N
1657       /* If the node may accept "multi byte".  */
1658       if (dfa->nodes[prev_node].accept_mb)
1659         naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
1660                                          str_idx, sctx->last_str_idx);
1661 #endif /* RE_ENABLE_I18N */
1662
1663       /* We don't check backreferences here.
1664          See update_cur_sifted_state().  */
1665       if (!naccepted
1666           && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
1667           && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
1668                                   dfa->nexts[prev_node]))
1669         naccepted = 1;
1670
1671       if (naccepted == 0)
1672         continue;
1673
1674       if (sctx->limits.nelem)
1675         {
1676           Idx to_idx = str_idx + naccepted;
1677           if (check_dst_limits (mctx, &sctx->limits,
1678                                 dfa->nexts[prev_node], to_idx,
1679                                 prev_node, str_idx))
1680             continue;
1681         }
1682       ok = re_node_set_insert (cur_dest, prev_node);
1683       if (__glibc_unlikely (! ok))
1684         return REG_ESPACE;
1685     }
1686
1687   return REG_NOERROR;
1688 }
1689
1690 /* Helper functions.  */
1691
1692 static reg_errcode_t
1693 clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx)
1694 {
1695   Idx top = mctx->state_log_top;
1696
1697   if ((next_state_log_idx >= mctx->input.bufs_len
1698        && mctx->input.bufs_len < mctx->input.len)
1699       || (next_state_log_idx >= mctx->input.valid_len
1700           && mctx->input.valid_len < mctx->input.len))
1701     {
1702       reg_errcode_t err;
1703       err = extend_buffers (mctx, next_state_log_idx + 1);
1704       if (__glibc_unlikely (err != REG_NOERROR))
1705         return err;
1706     }
1707
1708   if (top < next_state_log_idx)
1709     {
1710       memset (mctx->state_log + top + 1, '\0',
1711               sizeof (re_dfastate_t *) * (next_state_log_idx - top));
1712       mctx->state_log_top = next_state_log_idx;
1713     }
1714   return REG_NOERROR;
1715 }
1716
1717 static reg_errcode_t
1718 merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,
1719                    re_dfastate_t **src, Idx num)
1720 {
1721   Idx st_idx;
1722   reg_errcode_t err;
1723   for (st_idx = 0; st_idx < num; ++st_idx)
1724     {
1725       if (dst[st_idx] == NULL)
1726         dst[st_idx] = src[st_idx];
1727       else if (src[st_idx] != NULL)
1728         {
1729           re_node_set merged_set;
1730           err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
1731                                         &src[st_idx]->nodes);
1732           if (__glibc_unlikely (err != REG_NOERROR))
1733             return err;
1734           dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
1735           re_node_set_free (&merged_set);
1736           if (__glibc_unlikely (err != REG_NOERROR))
1737             return err;
1738         }
1739     }
1740   return REG_NOERROR;
1741 }
1742
1743 static reg_errcode_t
1744 update_cur_sifted_state (const re_match_context_t *mctx,
1745                          re_sift_context_t *sctx, Idx str_idx,
1746                          re_node_set *dest_nodes)
1747 {
1748   const re_dfa_t *const dfa = mctx->dfa;
1749   reg_errcode_t err = REG_NOERROR;
1750   const re_node_set *candidates;
1751   candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
1752                 : &mctx->state_log[str_idx]->nodes);
1753
1754   if (dest_nodes->nelem == 0)
1755     sctx->sifted_states[str_idx] = NULL;
1756   else
1757     {
1758       if (candidates)
1759         {
1760           /* At first, add the nodes which can epsilon transit to a node in
1761              DEST_NODE.  */
1762           err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1763           if (__glibc_unlikely (err != REG_NOERROR))
1764             return err;
1765
1766           /* Then, check the limitations in the current sift_context.  */
1767           if (sctx->limits.nelem)
1768             {
1769               err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
1770                                          mctx->bkref_ents, str_idx);
1771               if (__glibc_unlikely (err != REG_NOERROR))
1772                 return err;
1773             }
1774         }
1775
1776       sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1777       if (__glibc_unlikely (err != REG_NOERROR))
1778         return err;
1779     }
1780
1781   if (candidates && mctx->state_log[str_idx]->has_backref)
1782     {
1783       err = sift_states_bkref (mctx, sctx, str_idx, candidates);
1784       if (__glibc_unlikely (err != REG_NOERROR))
1785         return err;
1786     }
1787   return REG_NOERROR;
1788 }
1789
1790 static reg_errcode_t
1791 __attribute_warn_unused_result__
1792 add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
1793                        const re_node_set *candidates)
1794 {
1795   reg_errcode_t err = REG_NOERROR;
1796   Idx i;
1797
1798   re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
1799   if (__glibc_unlikely (err != REG_NOERROR))
1800     return err;
1801
1802   if (!state->inveclosure.alloc)
1803     {
1804       err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
1805       if (__glibc_unlikely (err != REG_NOERROR))
1806         return REG_ESPACE;
1807       for (i = 0; i < dest_nodes->nelem; i++)
1808         {
1809           err = re_node_set_merge (&state->inveclosure,
1810                                    dfa->inveclosures + dest_nodes->elems[i]);
1811           if (__glibc_unlikely (err != REG_NOERROR))
1812             return REG_ESPACE;
1813         }
1814     }
1815   return re_node_set_add_intersect (dest_nodes, candidates,
1816                                     &state->inveclosure);
1817 }
1818
1819 static reg_errcode_t
1820 sub_epsilon_src_nodes (const re_dfa_t *dfa, Idx node, re_node_set *dest_nodes,
1821                        const re_node_set *candidates)
1822 {
1823     Idx ecl_idx;
1824     reg_errcode_t err;
1825     re_node_set *inv_eclosure = dfa->inveclosures + node;
1826     re_node_set except_nodes;
1827     re_node_set_init_empty (&except_nodes);
1828     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1829       {
1830         Idx cur_node = inv_eclosure->elems[ecl_idx];
1831         if (cur_node == node)
1832           continue;
1833         if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
1834           {
1835             Idx edst1 = dfa->edests[cur_node].elems[0];
1836             Idx edst2 = ((dfa->edests[cur_node].nelem > 1)
1837                          ? dfa->edests[cur_node].elems[1] : -1);
1838             if ((!re_node_set_contains (inv_eclosure, edst1)
1839                  && re_node_set_contains (dest_nodes, edst1))
1840                 || (edst2 > 0
1841                     && !re_node_set_contains (inv_eclosure, edst2)
1842                     && re_node_set_contains (dest_nodes, edst2)))
1843               {
1844                 err = re_node_set_add_intersect (&except_nodes, candidates,
1845                                                  dfa->inveclosures + cur_node);
1846                 if (__glibc_unlikely (err != REG_NOERROR))
1847                   {
1848                     re_node_set_free (&except_nodes);
1849                     return err;
1850                   }
1851               }
1852           }
1853       }
1854     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1855       {
1856         Idx cur_node = inv_eclosure->elems[ecl_idx];
1857         if (!re_node_set_contains (&except_nodes, cur_node))
1858           {
1859             Idx idx = re_node_set_contains (dest_nodes, cur_node) - 1;
1860             re_node_set_remove_at (dest_nodes, idx);
1861           }
1862       }
1863     re_node_set_free (&except_nodes);
1864     return REG_NOERROR;
1865 }
1866
1867 static bool
1868 check_dst_limits (const re_match_context_t *mctx, const re_node_set *limits,
1869                   Idx dst_node, Idx dst_idx, Idx src_node, Idx src_idx)
1870 {
1871   const re_dfa_t *const dfa = mctx->dfa;
1872   Idx lim_idx, src_pos, dst_pos;
1873
1874   Idx dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
1875   Idx src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
1876   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1877     {
1878       Idx subexp_idx;
1879       struct re_backref_cache_entry *ent;
1880       ent = mctx->bkref_ents + limits->elems[lim_idx];
1881       subexp_idx = dfa->nodes[ent->node].opr.idx;
1882
1883       dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1884                                            subexp_idx, dst_node, dst_idx,
1885                                            dst_bkref_idx);
1886       src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1887                                            subexp_idx, src_node, src_idx,
1888                                            src_bkref_idx);
1889
1890       /* In case of:
1891          <src> <dst> ( <subexp> )
1892          ( <subexp> ) <src> <dst>
1893          ( <subexp1> <src> <subexp2> <dst> <subexp3> )  */
1894       if (src_pos == dst_pos)
1895         continue; /* This is unrelated limitation.  */
1896       else
1897         return true;
1898     }
1899   return false;
1900 }
1901
1902 static int
1903 check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
1904                              Idx subexp_idx, Idx from_node, Idx bkref_idx)
1905 {
1906   const re_dfa_t *const dfa = mctx->dfa;
1907   const re_node_set *eclosures = dfa->eclosures + from_node;
1908   Idx node_idx;
1909
1910   /* Else, we are on the boundary: examine the nodes on the epsilon
1911      closure.  */
1912   for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
1913     {
1914       Idx node = eclosures->elems[node_idx];
1915       switch (dfa->nodes[node].type)
1916         {
1917         case OP_BACK_REF:
1918           if (bkref_idx != -1)
1919             {
1920               struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
1921               do
1922                 {
1923                   Idx dst;
1924                   int cpos;
1925
1926                   if (ent->node != node)
1927                     continue;
1928
1929                   if (subexp_idx < BITSET_WORD_BITS
1930                       && !(ent->eps_reachable_subexps_map
1931                            & ((bitset_word_t) 1 << subexp_idx)))
1932                     continue;
1933
1934                   /* Recurse trying to reach the OP_OPEN_SUBEXP and
1935                      OP_CLOSE_SUBEXP cases below.  But, if the
1936                      destination node is the same node as the source
1937                      node, don't recurse because it would cause an
1938                      infinite loop: a regex that exhibits this behavior
1939                      is ()\1*\1*  */
1940                   dst = dfa->edests[node].elems[0];
1941                   if (dst == from_node)
1942                     {
1943                       if (boundaries & 1)
1944                         return -1;
1945                       else /* if (boundaries & 2) */
1946                         return 0;
1947                     }
1948
1949                   cpos =
1950                     check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
1951                                                  dst, bkref_idx);
1952                   if (cpos == -1 /* && (boundaries & 1) */)
1953                     return -1;
1954                   if (cpos == 0 && (boundaries & 2))
1955                     return 0;
1956
1957                   if (subexp_idx < BITSET_WORD_BITS)
1958                     ent->eps_reachable_subexps_map
1959                       &= ~((bitset_word_t) 1 << subexp_idx);
1960                 }
1961               while (ent++->more);
1962             }
1963           break;
1964
1965         case OP_OPEN_SUBEXP:
1966           if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
1967             return -1;
1968           break;
1969
1970         case OP_CLOSE_SUBEXP:
1971           if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
1972             return 0;
1973           break;
1974
1975         default:
1976             break;
1977         }
1978     }
1979
1980   return (boundaries & 2) ? 1 : 0;
1981 }
1982
1983 static int
1984 check_dst_limits_calc_pos (const re_match_context_t *mctx, Idx limit,
1985                            Idx subexp_idx, Idx from_node, Idx str_idx,
1986                            Idx bkref_idx)
1987 {
1988   struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
1989   int boundaries;
1990
1991   /* If we are outside the range of the subexpression, return -1 or 1.  */
1992   if (str_idx < lim->subexp_from)
1993     return -1;
1994
1995   if (lim->subexp_to < str_idx)
1996     return 1;
1997
1998   /* If we are within the subexpression, return 0.  */
1999   boundaries = (str_idx == lim->subexp_from);
2000   boundaries |= (str_idx == lim->subexp_to) << 1;
2001   if (boundaries == 0)
2002     return 0;
2003
2004   /* Else, examine epsilon closure.  */
2005   return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
2006                                       from_node, bkref_idx);
2007 }
2008
2009 /* Check the limitations of sub expressions LIMITS, and remove the nodes
2010    which are against limitations from DEST_NODES. */
2011
2012 static reg_errcode_t
2013 check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
2014                      const re_node_set *candidates, re_node_set *limits,
2015                      struct re_backref_cache_entry *bkref_ents, Idx str_idx)
2016 {
2017   reg_errcode_t err;
2018   Idx node_idx, lim_idx;
2019
2020   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
2021     {
2022       Idx subexp_idx;
2023       struct re_backref_cache_entry *ent;
2024       ent = bkref_ents + limits->elems[lim_idx];
2025
2026       if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
2027         continue; /* This is unrelated limitation.  */
2028
2029       subexp_idx = dfa->nodes[ent->node].opr.idx;
2030       if (ent->subexp_to == str_idx)
2031         {
2032           Idx ops_node = -1;
2033           Idx cls_node = -1;
2034           for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2035             {
2036               Idx node = dest_nodes->elems[node_idx];
2037               re_token_type_t type = dfa->nodes[node].type;
2038               if (type == OP_OPEN_SUBEXP
2039                   && subexp_idx == dfa->nodes[node].opr.idx)
2040                 ops_node = node;
2041               else if (type == OP_CLOSE_SUBEXP
2042                        && subexp_idx == dfa->nodes[node].opr.idx)
2043                 cls_node = node;
2044             }
2045
2046           /* Check the limitation of the open subexpression.  */
2047           /* Note that (ent->subexp_to = str_idx != ent->subexp_from).  */
2048           if (ops_node >= 0)
2049             {
2050               err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
2051                                            candidates);
2052               if (__glibc_unlikely (err != REG_NOERROR))
2053                 return err;
2054             }
2055
2056           /* Check the limitation of the close subexpression.  */
2057           if (cls_node >= 0)
2058             for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2059               {
2060                 Idx node = dest_nodes->elems[node_idx];
2061                 if (!re_node_set_contains (dfa->inveclosures + node,
2062                                            cls_node)
2063                     && !re_node_set_contains (dfa->eclosures + node,
2064                                               cls_node))
2065                   {
2066                     /* It is against this limitation.
2067                        Remove it form the current sifted state.  */
2068                     err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2069                                                  candidates);
2070                     if (__glibc_unlikely (err != REG_NOERROR))
2071                       return err;
2072                     --node_idx;
2073                   }
2074               }
2075         }
2076       else /* (ent->subexp_to != str_idx)  */
2077         {
2078           for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2079             {
2080               Idx node = dest_nodes->elems[node_idx];
2081               re_token_type_t type = dfa->nodes[node].type;
2082               if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
2083                 {
2084                   if (subexp_idx != dfa->nodes[node].opr.idx)
2085                     continue;
2086                   /* It is against this limitation.
2087                      Remove it form the current sifted state.  */
2088                   err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2089                                                candidates);
2090                   if (__glibc_unlikely (err != REG_NOERROR))
2091                     return err;
2092                 }
2093             }
2094         }
2095     }
2096   return REG_NOERROR;
2097 }
2098
2099 static reg_errcode_t
2100 __attribute_warn_unused_result__
2101 sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
2102                    Idx str_idx, const re_node_set *candidates)
2103 {
2104   const re_dfa_t *const dfa = mctx->dfa;
2105   reg_errcode_t err;
2106   Idx node_idx, node;
2107   re_sift_context_t local_sctx;
2108   Idx first_idx = search_cur_bkref_entry (mctx, str_idx);
2109
2110   if (first_idx == -1)
2111     return REG_NOERROR;
2112
2113   local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized.  */
2114
2115   for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
2116     {
2117       Idx enabled_idx;
2118       re_token_type_t type;
2119       struct re_backref_cache_entry *entry;
2120       node = candidates->elems[node_idx];
2121       type = dfa->nodes[node].type;
2122       /* Avoid infinite loop for the REs like "()\1+".  */
2123       if (node == sctx->last_node && str_idx == sctx->last_str_idx)
2124         continue;
2125       if (type != OP_BACK_REF)
2126         continue;
2127
2128       entry = mctx->bkref_ents + first_idx;
2129       enabled_idx = first_idx;
2130       do
2131         {
2132           Idx subexp_len;
2133           Idx to_idx;
2134           Idx dst_node;
2135           bool ok;
2136           re_dfastate_t *cur_state;
2137
2138           if (entry->node != node)
2139             continue;
2140           subexp_len = entry->subexp_to - entry->subexp_from;
2141           to_idx = str_idx + subexp_len;
2142           dst_node = (subexp_len ? dfa->nexts[node]
2143                       : dfa->edests[node].elems[0]);
2144
2145           if (to_idx > sctx->last_str_idx
2146               || sctx->sifted_states[to_idx] == NULL
2147               || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
2148               || check_dst_limits (mctx, &sctx->limits, node,
2149                                    str_idx, dst_node, to_idx))
2150             continue;
2151
2152           if (local_sctx.sifted_states == NULL)
2153             {
2154               local_sctx = *sctx;
2155               err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
2156               if (__glibc_unlikely (err != REG_NOERROR))
2157                 goto free_return;
2158             }
2159           local_sctx.last_node = node;
2160           local_sctx.last_str_idx = str_idx;
2161           ok = re_node_set_insert (&local_sctx.limits, enabled_idx);
2162           if (__glibc_unlikely (! ok))
2163             {
2164               err = REG_ESPACE;
2165               goto free_return;
2166             }
2167           cur_state = local_sctx.sifted_states[str_idx];
2168           err = sift_states_backward (mctx, &local_sctx);
2169           if (__glibc_unlikely (err != REG_NOERROR))
2170             goto free_return;
2171           if (sctx->limited_states != NULL)
2172             {
2173               err = merge_state_array (dfa, sctx->limited_states,
2174                                        local_sctx.sifted_states,
2175                                        str_idx + 1);
2176               if (__glibc_unlikely (err != REG_NOERROR))
2177                 goto free_return;
2178             }
2179           local_sctx.sifted_states[str_idx] = cur_state;
2180           re_node_set_remove (&local_sctx.limits, enabled_idx);
2181
2182           /* mctx->bkref_ents may have changed, reload the pointer.  */
2183           entry = mctx->bkref_ents + enabled_idx;
2184         }
2185       while (enabled_idx++, entry++->more);
2186     }
2187   err = REG_NOERROR;
2188  free_return:
2189   if (local_sctx.sifted_states != NULL)
2190     {
2191       re_node_set_free (&local_sctx.limits);
2192     }
2193
2194   return err;
2195 }
2196
2197
2198 #ifdef RE_ENABLE_I18N
2199 static int
2200 sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
2201                      Idx node_idx, Idx str_idx, Idx max_str_idx)
2202 {
2203   const re_dfa_t *const dfa = mctx->dfa;
2204   int naccepted;
2205   /* Check the node can accept "multi byte".  */
2206   naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
2207   if (naccepted > 0 && str_idx + naccepted <= max_str_idx
2208       && !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
2209                                dfa->nexts[node_idx]))
2210     /* The node can't accept the "multi byte", or the
2211        destination was already thrown away, then the node
2212        couldn't accept the current input "multi byte".   */
2213     naccepted = 0;
2214   /* Otherwise, it is sure that the node could accept
2215      'naccepted' bytes input.  */
2216   return naccepted;
2217 }
2218 #endif /* RE_ENABLE_I18N */
2219
2220 \f
2221 /* Functions for state transition.  */
2222
2223 /* Return the next state to which the current state STATE will transit by
2224    accepting the current input byte, and update STATE_LOG if necessary.
2225    If STATE can accept a multibyte char/collating element/back reference
2226    update the destination of STATE_LOG.  */
2227
2228 static re_dfastate_t *
2229 __attribute_warn_unused_result__
2230 transit_state (reg_errcode_t *err, re_match_context_t *mctx,
2231                re_dfastate_t *state)
2232 {
2233   re_dfastate_t **trtable;
2234   unsigned char ch;
2235
2236 #ifdef RE_ENABLE_I18N
2237   /* If the current state can accept multibyte.  */
2238   if (__glibc_unlikely (state->accept_mb))
2239     {
2240       *err = transit_state_mb (mctx, state);
2241       if (__glibc_unlikely (*err != REG_NOERROR))
2242         return NULL;
2243     }
2244 #endif /* RE_ENABLE_I18N */
2245
2246   /* Then decide the next state with the single byte.  */
2247 #if 0
2248   if (0)
2249     /* don't use transition table  */
2250     return transit_state_sb (err, mctx, state);
2251 #endif
2252
2253   /* Use transition table  */
2254   ch = re_string_fetch_byte (&mctx->input);
2255   for (;;)
2256     {
2257       trtable = state->trtable;
2258       if (__glibc_likely (trtable != NULL))
2259         return trtable[ch];
2260
2261       trtable = state->word_trtable;
2262       if (__glibc_likely (trtable != NULL))
2263         {
2264           unsigned int context;
2265           context
2266             = re_string_context_at (&mctx->input,
2267                                     re_string_cur_idx (&mctx->input) - 1,
2268                                     mctx->eflags);
2269           if (IS_WORD_CONTEXT (context))
2270             return trtable[ch + SBC_MAX];
2271           else
2272             return trtable[ch];
2273         }
2274
2275       if (!build_trtable (mctx->dfa, state))
2276         {
2277           *err = REG_ESPACE;
2278           return NULL;
2279         }
2280
2281       /* Retry, we now have a transition table.  */
2282     }
2283 }
2284
2285 /* Update the state_log if we need */
2286 static re_dfastate_t *
2287 merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
2288                       re_dfastate_t *next_state)
2289 {
2290   const re_dfa_t *const dfa = mctx->dfa;
2291   Idx cur_idx = re_string_cur_idx (&mctx->input);
2292
2293   if (cur_idx > mctx->state_log_top)
2294     {
2295       mctx->state_log[cur_idx] = next_state;
2296       mctx->state_log_top = cur_idx;
2297     }
2298   else if (mctx->state_log[cur_idx] == 0)
2299     {
2300       mctx->state_log[cur_idx] = next_state;
2301     }
2302   else
2303     {
2304       re_dfastate_t *pstate;
2305       unsigned int context;
2306       re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
2307       /* If (state_log[cur_idx] != 0), it implies that cur_idx is
2308          the destination of a multibyte char/collating element/
2309          back reference.  Then the next state is the union set of
2310          these destinations and the results of the transition table.  */
2311       pstate = mctx->state_log[cur_idx];
2312       log_nodes = pstate->entrance_nodes;
2313       if (next_state != NULL)
2314         {
2315           table_nodes = next_state->entrance_nodes;
2316           *err = re_node_set_init_union (&next_nodes, table_nodes,
2317                                              log_nodes);
2318           if (__glibc_unlikely (*err != REG_NOERROR))
2319             return NULL;
2320         }
2321       else
2322         next_nodes = *log_nodes;
2323       /* Note: We already add the nodes of the initial state,
2324          then we don't need to add them here.  */
2325
2326       context = re_string_context_at (&mctx->input,
2327                                       re_string_cur_idx (&mctx->input) - 1,
2328                                       mctx->eflags);
2329       next_state = mctx->state_log[cur_idx]
2330         = re_acquire_state_context (err, dfa, &next_nodes, context);
2331       /* We don't need to check errors here, since the return value of
2332          this function is next_state and ERR is already set.  */
2333
2334       if (table_nodes != NULL)
2335         re_node_set_free (&next_nodes);
2336     }
2337
2338   if (__glibc_unlikely (dfa->nbackref) && next_state != NULL)
2339     {
2340       /* Check OP_OPEN_SUBEXP in the current state in case that we use them
2341          later.  We must check them here, since the back references in the
2342          next state might use them.  */
2343       *err = check_subexp_matching_top (mctx, &next_state->nodes,
2344                                         cur_idx);
2345       if (__glibc_unlikely (*err != REG_NOERROR))
2346         return NULL;
2347
2348       /* If the next state has back references.  */
2349       if (next_state->has_backref)
2350         {
2351           *err = transit_state_bkref (mctx, &next_state->nodes);
2352           if (__glibc_unlikely (*err != REG_NOERROR))
2353             return NULL;
2354           next_state = mctx->state_log[cur_idx];
2355         }
2356     }
2357
2358   return next_state;
2359 }
2360
2361 /* Skip bytes in the input that correspond to part of a
2362    multi-byte match, then look in the log for a state
2363    from which to restart matching.  */
2364 static re_dfastate_t *
2365 find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
2366 {
2367   re_dfastate_t *cur_state;
2368   do
2369     {
2370       Idx max = mctx->state_log_top;
2371       Idx cur_str_idx = re_string_cur_idx (&mctx->input);
2372
2373       do
2374         {
2375           if (++cur_str_idx > max)
2376             return NULL;
2377           re_string_skip_bytes (&mctx->input, 1);
2378         }
2379       while (mctx->state_log[cur_str_idx] == NULL);
2380
2381       cur_state = merge_state_with_log (err, mctx, NULL);
2382     }
2383   while (*err == REG_NOERROR && cur_state == NULL);
2384   return cur_state;
2385 }
2386
2387 /* Helper functions for transit_state.  */
2388
2389 /* From the node set CUR_NODES, pick up the nodes whose types are
2390    OP_OPEN_SUBEXP and which have corresponding back references in the regular
2391    expression. And register them to use them later for evaluating the
2392    corresponding back references.  */
2393
2394 static reg_errcode_t
2395 check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
2396                            Idx str_idx)
2397 {
2398   const re_dfa_t *const dfa = mctx->dfa;
2399   Idx node_idx;
2400   reg_errcode_t err;
2401
2402   /* TODO: This isn't efficient.
2403            Because there might be more than one nodes whose types are
2404            OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2405            nodes.
2406            E.g. RE: (a){2}  */
2407   for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
2408     {
2409       Idx node = cur_nodes->elems[node_idx];
2410       if (dfa->nodes[node].type == OP_OPEN_SUBEXP
2411           && dfa->nodes[node].opr.idx < BITSET_WORD_BITS
2412           && (dfa->used_bkref_map
2413               & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))
2414         {
2415           err = match_ctx_add_subtop (mctx, node, str_idx);
2416           if (__glibc_unlikely (err != REG_NOERROR))
2417             return err;
2418         }
2419     }
2420   return REG_NOERROR;
2421 }
2422
2423 #if 0
2424 /* Return the next state to which the current state STATE will transit by
2425    accepting the current input byte.  */
2426
2427 static re_dfastate_t *
2428 transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
2429                   re_dfastate_t *state)
2430 {
2431   const re_dfa_t *const dfa = mctx->dfa;
2432   re_node_set next_nodes;
2433   re_dfastate_t *next_state;
2434   Idx node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
2435   unsigned int context;
2436
2437   *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
2438   if (__glibc_unlikely (*err != REG_NOERROR))
2439     return NULL;
2440   for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
2441     {
2442       Idx cur_node = state->nodes.elems[node_cnt];
2443       if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
2444         {
2445           *err = re_node_set_merge (&next_nodes,
2446                                     dfa->eclosures + dfa->nexts[cur_node]);
2447           if (__glibc_unlikely (*err != REG_NOERROR))
2448             {
2449               re_node_set_free (&next_nodes);
2450               return NULL;
2451             }
2452         }
2453     }
2454   context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
2455   next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
2456   /* We don't need to check errors here, since the return value of
2457      this function is next_state and ERR is already set.  */
2458
2459   re_node_set_free (&next_nodes);
2460   re_string_skip_bytes (&mctx->input, 1);
2461   return next_state;
2462 }
2463 #endif
2464
2465 #ifdef RE_ENABLE_I18N
2466 static reg_errcode_t
2467 transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
2468 {
2469   const re_dfa_t *const dfa = mctx->dfa;
2470   reg_errcode_t err;
2471   Idx i;
2472
2473   for (i = 0; i < pstate->nodes.nelem; ++i)
2474     {
2475       re_node_set dest_nodes, *new_nodes;
2476       Idx cur_node_idx = pstate->nodes.elems[i];
2477       int naccepted;
2478       Idx dest_idx;
2479       unsigned int context;
2480       re_dfastate_t *dest_state;
2481
2482       if (!dfa->nodes[cur_node_idx].accept_mb)
2483         continue;
2484
2485       if (dfa->nodes[cur_node_idx].constraint)
2486         {
2487           context = re_string_context_at (&mctx->input,
2488                                           re_string_cur_idx (&mctx->input),
2489                                           mctx->eflags);
2490           if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
2491                                            context))
2492             continue;
2493         }
2494
2495       /* How many bytes the node can accept?  */
2496       naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
2497                                            re_string_cur_idx (&mctx->input));
2498       if (naccepted == 0)
2499         continue;
2500
2501       /* The node can accepts 'naccepted' bytes.  */
2502       dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
2503       mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
2504                                : mctx->max_mb_elem_len);
2505       err = clean_state_log_if_needed (mctx, dest_idx);
2506       if (__glibc_unlikely (err != REG_NOERROR))
2507         return err;
2508 #ifdef DEBUG
2509       assert (dfa->nexts[cur_node_idx] != -1);
2510 #endif
2511       new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
2512
2513       dest_state = mctx->state_log[dest_idx];
2514       if (dest_state == NULL)
2515         dest_nodes = *new_nodes;
2516       else
2517         {
2518           err = re_node_set_init_union (&dest_nodes,
2519                                         dest_state->entrance_nodes, new_nodes);
2520           if (__glibc_unlikely (err != REG_NOERROR))
2521             return err;
2522         }
2523       context = re_string_context_at (&mctx->input, dest_idx - 1,
2524                                       mctx->eflags);
2525       mctx->state_log[dest_idx]
2526         = re_acquire_state_context (&err, dfa, &dest_nodes, context);
2527       if (dest_state != NULL)
2528         re_node_set_free (&dest_nodes);
2529       if (__glibc_unlikely (mctx->state_log[dest_idx] == NULL
2530                             && err != REG_NOERROR))
2531         return err;
2532     }
2533   return REG_NOERROR;
2534 }
2535 #endif /* RE_ENABLE_I18N */
2536
2537 static reg_errcode_t
2538 transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
2539 {
2540   const re_dfa_t *const dfa = mctx->dfa;
2541   reg_errcode_t err;
2542   Idx i;
2543   Idx cur_str_idx = re_string_cur_idx (&mctx->input);
2544
2545   for (i = 0; i < nodes->nelem; ++i)
2546     {
2547       Idx dest_str_idx, prev_nelem, bkc_idx;
2548       Idx node_idx = nodes->elems[i];
2549       unsigned int context;
2550       const re_token_t *node = dfa->nodes + node_idx;
2551       re_node_set *new_dest_nodes;
2552
2553       /* Check whether 'node' is a backreference or not.  */
2554       if (node->type != OP_BACK_REF)
2555         continue;
2556
2557       if (node->constraint)
2558         {
2559           context = re_string_context_at (&mctx->input, cur_str_idx,
2560                                           mctx->eflags);
2561           if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
2562             continue;
2563         }
2564
2565       /* 'node' is a backreference.
2566          Check the substring which the substring matched.  */
2567       bkc_idx = mctx->nbkref_ents;
2568       err = get_subexp (mctx, node_idx, cur_str_idx);
2569       if (__glibc_unlikely (err != REG_NOERROR))
2570         goto free_return;
2571
2572       /* And add the epsilon closures (which is 'new_dest_nodes') of
2573          the backreference to appropriate state_log.  */
2574 #ifdef DEBUG
2575       assert (dfa->nexts[node_idx] != -1);
2576 #endif
2577       for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
2578         {
2579           Idx subexp_len;
2580           re_dfastate_t *dest_state;
2581           struct re_backref_cache_entry *bkref_ent;
2582           bkref_ent = mctx->bkref_ents + bkc_idx;
2583           if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
2584             continue;
2585           subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
2586           new_dest_nodes = (subexp_len == 0
2587                             ? dfa->eclosures + dfa->edests[node_idx].elems[0]
2588                             : dfa->eclosures + dfa->nexts[node_idx]);
2589           dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
2590                           - bkref_ent->subexp_from);
2591           context = re_string_context_at (&mctx->input, dest_str_idx - 1,
2592                                           mctx->eflags);
2593           dest_state = mctx->state_log[dest_str_idx];
2594           prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
2595                         : mctx->state_log[cur_str_idx]->nodes.nelem);
2596           /* Add 'new_dest_node' to state_log.  */
2597           if (dest_state == NULL)
2598             {
2599               mctx->state_log[dest_str_idx]
2600                 = re_acquire_state_context (&err, dfa, new_dest_nodes,
2601                                             context);
2602               if (__glibc_unlikely (mctx->state_log[dest_str_idx] == NULL
2603                                     && err != REG_NOERROR))
2604                 goto free_return;
2605             }
2606           else
2607             {
2608               re_node_set dest_nodes;
2609               err = re_node_set_init_union (&dest_nodes,
2610                                             dest_state->entrance_nodes,
2611                                             new_dest_nodes);
2612               if (__glibc_unlikely (err != REG_NOERROR))
2613                 {
2614                   re_node_set_free (&dest_nodes);
2615                   goto free_return;
2616                 }
2617               mctx->state_log[dest_str_idx]
2618                 = re_acquire_state_context (&err, dfa, &dest_nodes, context);
2619               re_node_set_free (&dest_nodes);
2620               if (__glibc_unlikely (mctx->state_log[dest_str_idx] == NULL
2621                                     && err != REG_NOERROR))
2622                 goto free_return;
2623             }
2624           /* We need to check recursively if the backreference can epsilon
2625              transit.  */
2626           if (subexp_len == 0
2627               && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
2628             {
2629               err = check_subexp_matching_top (mctx, new_dest_nodes,
2630                                                cur_str_idx);
2631               if (__glibc_unlikely (err != REG_NOERROR))
2632                 goto free_return;
2633               err = transit_state_bkref (mctx, new_dest_nodes);
2634               if (__glibc_unlikely (err != REG_NOERROR))
2635                 goto free_return;
2636             }
2637         }
2638     }
2639   err = REG_NOERROR;
2640  free_return:
2641   return err;
2642 }
2643
2644 /* Enumerate all the candidates which the backreference BKREF_NODE can match
2645    at BKREF_STR_IDX, and register them by match_ctx_add_entry().
2646    Note that we might collect inappropriate candidates here.
2647    However, the cost of checking them strictly here is too high, then we
2648    delay these checking for prune_impossible_nodes().  */
2649
2650 static reg_errcode_t
2651 __attribute_warn_unused_result__
2652 get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
2653 {
2654   const re_dfa_t *const dfa = mctx->dfa;
2655   Idx subexp_num, sub_top_idx;
2656   const char *buf = (const char *) re_string_get_buffer (&mctx->input);
2657   /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX.  */
2658   Idx cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
2659   if (cache_idx != -1)
2660     {
2661       const struct re_backref_cache_entry *entry
2662         = mctx->bkref_ents + cache_idx;
2663       do
2664         if (entry->node == bkref_node)
2665           return REG_NOERROR; /* We already checked it.  */
2666       while (entry++->more);
2667     }
2668
2669   subexp_num = dfa->nodes[bkref_node].opr.idx;
2670
2671   /* For each sub expression  */
2672   for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
2673     {
2674       reg_errcode_t err;
2675       re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
2676       re_sub_match_last_t *sub_last;
2677       Idx sub_last_idx, sl_str, bkref_str_off;
2678
2679       if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
2680         continue; /* It isn't related.  */
2681
2682       sl_str = sub_top->str_idx;
2683       bkref_str_off = bkref_str_idx;
2684       /* At first, check the last node of sub expressions we already
2685          evaluated.  */
2686       for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
2687         {
2688           regoff_t sl_str_diff;
2689           sub_last = sub_top->lasts[sub_last_idx];
2690           sl_str_diff = sub_last->str_idx - sl_str;
2691           /* The matched string by the sub expression match with the substring
2692              at the back reference?  */
2693           if (sl_str_diff > 0)
2694             {
2695               if (__glibc_unlikely (bkref_str_off + sl_str_diff
2696                                     > mctx->input.valid_len))
2697                 {
2698                   /* Not enough chars for a successful match.  */
2699                   if (bkref_str_off + sl_str_diff > mctx->input.len)
2700                     break;
2701
2702                   err = clean_state_log_if_needed (mctx,
2703                                                    bkref_str_off
2704                                                    + sl_str_diff);
2705                   if (__glibc_unlikely (err != REG_NOERROR))
2706                     return err;
2707                   buf = (const char *) re_string_get_buffer (&mctx->input);
2708                 }
2709               if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
2710                 /* We don't need to search this sub expression any more.  */
2711                 break;
2712             }
2713           bkref_str_off += sl_str_diff;
2714           sl_str += sl_str_diff;
2715           err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2716                                 bkref_str_idx);
2717
2718           /* Reload buf, since the preceding call might have reallocated
2719              the buffer.  */
2720           buf = (const char *) re_string_get_buffer (&mctx->input);
2721
2722           if (err == REG_NOMATCH)
2723             continue;
2724           if (__glibc_unlikely (err != REG_NOERROR))
2725             return err;
2726         }
2727
2728       if (sub_last_idx < sub_top->nlasts)
2729         continue;
2730       if (sub_last_idx > 0)
2731         ++sl_str;
2732       /* Then, search for the other last nodes of the sub expression.  */
2733       for (; sl_str <= bkref_str_idx; ++sl_str)
2734         {
2735           Idx cls_node;
2736           regoff_t sl_str_off;
2737           const re_node_set *nodes;
2738           sl_str_off = sl_str - sub_top->str_idx;
2739           /* The matched string by the sub expression match with the substring
2740              at the back reference?  */
2741           if (sl_str_off > 0)
2742             {
2743               if (__glibc_unlikely (bkref_str_off >= mctx->input.valid_len))
2744                 {
2745                   /* If we are at the end of the input, we cannot match.  */
2746                   if (bkref_str_off >= mctx->input.len)
2747                     break;
2748
2749                   err = extend_buffers (mctx, bkref_str_off + 1);
2750                   if (__glibc_unlikely (err != REG_NOERROR))
2751                     return err;
2752
2753                   buf = (const char *) re_string_get_buffer (&mctx->input);
2754                 }
2755               if (buf [bkref_str_off++] != buf[sl_str - 1])
2756                 break; /* We don't need to search this sub expression
2757                           any more.  */
2758             }
2759           if (mctx->state_log[sl_str] == NULL)
2760             continue;
2761           /* Does this state have a ')' of the sub expression?  */
2762           nodes = &mctx->state_log[sl_str]->nodes;
2763           cls_node = find_subexp_node (dfa, nodes, subexp_num,
2764                                        OP_CLOSE_SUBEXP);
2765           if (cls_node == -1)
2766             continue; /* No.  */
2767           if (sub_top->path == NULL)
2768             {
2769               sub_top->path = calloc (sizeof (state_array_t),
2770                                       sl_str - sub_top->str_idx + 1);
2771               if (sub_top->path == NULL)
2772                 return REG_ESPACE;
2773             }
2774           /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
2775              in the current context?  */
2776           err = check_arrival (mctx, sub_top->path, sub_top->node,
2777                                sub_top->str_idx, cls_node, sl_str,
2778                                OP_CLOSE_SUBEXP);
2779           if (err == REG_NOMATCH)
2780               continue;
2781           if (__glibc_unlikely (err != REG_NOERROR))
2782               return err;
2783           sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
2784           if (__glibc_unlikely (sub_last == NULL))
2785             return REG_ESPACE;
2786           err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2787                                 bkref_str_idx);
2788           buf = (const char *) re_string_get_buffer (&mctx->input);
2789           if (err == REG_NOMATCH)
2790             continue;
2791           if (__glibc_unlikely (err != REG_NOERROR))
2792             return err;
2793         }
2794     }
2795   return REG_NOERROR;
2796 }
2797
2798 /* Helper functions for get_subexp().  */
2799
2800 /* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
2801    If it can arrive, register the sub expression expressed with SUB_TOP
2802    and SUB_LAST.  */
2803
2804 static reg_errcode_t
2805 get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
2806                 re_sub_match_last_t *sub_last, Idx bkref_node, Idx bkref_str)
2807 {
2808   reg_errcode_t err;
2809   Idx to_idx;
2810   /* Can the subexpression arrive the back reference?  */
2811   err = check_arrival (mctx, &sub_last->path, sub_last->node,
2812                        sub_last->str_idx, bkref_node, bkref_str,
2813                        OP_OPEN_SUBEXP);
2814   if (err != REG_NOERROR)
2815     return err;
2816   err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
2817                              sub_last->str_idx);
2818   if (__glibc_unlikely (err != REG_NOERROR))
2819     return err;
2820   to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
2821   return clean_state_log_if_needed (mctx, to_idx);
2822 }
2823
2824 /* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
2825    Search '(' if FL_OPEN, or search ')' otherwise.
2826    TODO: This function isn't efficient...
2827          Because there might be more than one nodes whose types are
2828          OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2829          nodes.
2830          E.g. RE: (a){2}  */
2831
2832 static Idx
2833 find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
2834                   Idx subexp_idx, int type)
2835 {
2836   Idx cls_idx;
2837   for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
2838     {
2839       Idx cls_node = nodes->elems[cls_idx];
2840       const re_token_t *node = dfa->nodes + cls_node;
2841       if (node->type == type
2842           && node->opr.idx == subexp_idx)
2843         return cls_node;
2844     }
2845   return -1;
2846 }
2847
2848 /* Check whether the node TOP_NODE at TOP_STR can arrive to the node
2849    LAST_NODE at LAST_STR.  We record the path onto PATH since it will be
2850    heavily reused.
2851    Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise.  */
2852
2853 static reg_errcode_t
2854 __attribute_warn_unused_result__
2855 check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node,
2856                Idx top_str, Idx last_node, Idx last_str, int type)
2857 {
2858   const re_dfa_t *const dfa = mctx->dfa;
2859   reg_errcode_t err = REG_NOERROR;
2860   Idx subexp_num, backup_cur_idx, str_idx, null_cnt;
2861   re_dfastate_t *cur_state = NULL;
2862   re_node_set *cur_nodes, next_nodes;
2863   re_dfastate_t **backup_state_log;
2864   unsigned int context;
2865
2866   subexp_num = dfa->nodes[top_node].opr.idx;
2867   /* Extend the buffer if we need.  */
2868   if (__glibc_unlikely (path->alloc < last_str + mctx->max_mb_elem_len + 1))
2869     {
2870       re_dfastate_t **new_array;
2871       Idx old_alloc = path->alloc;
2872       Idx incr_alloc = last_str + mctx->max_mb_elem_len + 1;
2873       Idx new_alloc;
2874       if (__glibc_unlikely (IDX_MAX - old_alloc < incr_alloc))
2875         return REG_ESPACE;
2876       new_alloc = old_alloc + incr_alloc;
2877       if (__glibc_unlikely (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc))
2878         return REG_ESPACE;
2879       new_array = re_realloc (path->array, re_dfastate_t *, new_alloc);
2880       if (__glibc_unlikely (new_array == NULL))
2881         return REG_ESPACE;
2882       path->array = new_array;
2883       path->alloc = new_alloc;
2884       memset (new_array + old_alloc, '\0',
2885               sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
2886     }
2887
2888   str_idx = path->next_idx ? path->next_idx : top_str;
2889
2890   /* Temporary modify MCTX.  */
2891   backup_state_log = mctx->state_log;
2892   backup_cur_idx = mctx->input.cur_idx;
2893   mctx->state_log = path->array;
2894   mctx->input.cur_idx = str_idx;
2895
2896   /* Setup initial node set.  */
2897   context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2898   if (str_idx == top_str)
2899     {
2900       err = re_node_set_init_1 (&next_nodes, top_node);
2901       if (__glibc_unlikely (err != REG_NOERROR))
2902         return err;
2903       err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2904       if (__glibc_unlikely (err != REG_NOERROR))
2905         {
2906           re_node_set_free (&next_nodes);
2907           return err;
2908         }
2909     }
2910   else
2911     {
2912       cur_state = mctx->state_log[str_idx];
2913       if (cur_state && cur_state->has_backref)
2914         {
2915           err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
2916           if (__glibc_unlikely (err != REG_NOERROR))
2917             return err;
2918         }
2919       else
2920         re_node_set_init_empty (&next_nodes);
2921     }
2922   if (str_idx == top_str || (cur_state && cur_state->has_backref))
2923     {
2924       if (next_nodes.nelem)
2925         {
2926           err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2927                                     subexp_num, type);
2928           if (__glibc_unlikely (err != REG_NOERROR))
2929             {
2930               re_node_set_free (&next_nodes);
2931               return err;
2932             }
2933         }
2934       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2935       if (__glibc_unlikely (cur_state == NULL && err != REG_NOERROR))
2936         {
2937           re_node_set_free (&next_nodes);
2938           return err;
2939         }
2940       mctx->state_log[str_idx] = cur_state;
2941     }
2942
2943   for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
2944     {
2945       re_node_set_empty (&next_nodes);
2946       if (mctx->state_log[str_idx + 1])
2947         {
2948           err = re_node_set_merge (&next_nodes,
2949                                    &mctx->state_log[str_idx + 1]->nodes);
2950           if (__glibc_unlikely (err != REG_NOERROR))
2951             {
2952               re_node_set_free (&next_nodes);
2953               return err;
2954             }
2955         }
2956       if (cur_state)
2957         {
2958           err = check_arrival_add_next_nodes (mctx, str_idx,
2959                                               &cur_state->non_eps_nodes,
2960                                               &next_nodes);
2961           if (__glibc_unlikely (err != REG_NOERROR))
2962             {
2963               re_node_set_free (&next_nodes);
2964               return err;
2965             }
2966         }
2967       ++str_idx;
2968       if (next_nodes.nelem)
2969         {
2970           err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2971           if (__glibc_unlikely (err != REG_NOERROR))
2972             {
2973               re_node_set_free (&next_nodes);
2974               return err;
2975             }
2976           err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2977                                     subexp_num, type);
2978           if (__glibc_unlikely (err != REG_NOERROR))
2979             {
2980               re_node_set_free (&next_nodes);
2981               return err;
2982             }
2983         }
2984       context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2985       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2986       if (__glibc_unlikely (cur_state == NULL && err != REG_NOERROR))
2987         {
2988           re_node_set_free (&next_nodes);
2989           return err;
2990         }
2991       mctx->state_log[str_idx] = cur_state;
2992       null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
2993     }
2994   re_node_set_free (&next_nodes);
2995   cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
2996                : &mctx->state_log[last_str]->nodes);
2997   path->next_idx = str_idx;
2998
2999   /* Fix MCTX.  */
3000   mctx->state_log = backup_state_log;
3001   mctx->input.cur_idx = backup_cur_idx;
3002
3003   /* Then check the current node set has the node LAST_NODE.  */
3004   if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
3005     return REG_NOERROR;
3006
3007   return REG_NOMATCH;
3008 }
3009
3010 /* Helper functions for check_arrival.  */
3011
3012 /* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
3013    to NEXT_NODES.
3014    TODO: This function is similar to the functions transit_state*(),
3015          however this function has many additional works.
3016          Can't we unify them?  */
3017
3018 static reg_errcode_t
3019 __attribute_warn_unused_result__
3020 check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx,
3021                               re_node_set *cur_nodes, re_node_set *next_nodes)
3022 {
3023   const re_dfa_t *const dfa = mctx->dfa;
3024   bool ok;
3025   Idx cur_idx;
3026 #ifdef RE_ENABLE_I18N
3027   reg_errcode_t err = REG_NOERROR;
3028 #endif
3029   re_node_set union_set;
3030   re_node_set_init_empty (&union_set);
3031   for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
3032     {
3033       int naccepted = 0;
3034       Idx cur_node = cur_nodes->elems[cur_idx];
3035 #ifdef DEBUG
3036       re_token_type_t type = dfa->nodes[cur_node].type;
3037       assert (!IS_EPSILON_NODE (type));
3038 #endif
3039 #ifdef RE_ENABLE_I18N
3040       /* If the node may accept "multi byte".  */
3041       if (dfa->nodes[cur_node].accept_mb)
3042         {
3043           naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
3044                                                str_idx);
3045           if (naccepted > 1)
3046             {
3047               re_dfastate_t *dest_state;
3048               Idx next_node = dfa->nexts[cur_node];
3049               Idx next_idx = str_idx + naccepted;
3050               dest_state = mctx->state_log[next_idx];
3051               re_node_set_empty (&union_set);
3052               if (dest_state)
3053                 {
3054                   err = re_node_set_merge (&union_set, &dest_state->nodes);
3055                   if (__glibc_unlikely (err != REG_NOERROR))
3056                     {
3057                       re_node_set_free (&union_set);
3058                       return err;
3059                     }
3060                 }
3061               ok = re_node_set_insert (&union_set, next_node);
3062               if (__glibc_unlikely (! ok))
3063                 {
3064                   re_node_set_free (&union_set);
3065                   return REG_ESPACE;
3066                 }
3067               mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
3068                                                             &union_set);
3069               if (__glibc_unlikely (mctx->state_log[next_idx] == NULL
3070                                     && err != REG_NOERROR))
3071                 {
3072                   re_node_set_free (&union_set);
3073                   return err;
3074                 }
3075             }
3076         }
3077 #endif /* RE_ENABLE_I18N */
3078       if (naccepted
3079           || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
3080         {
3081           ok = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
3082           if (__glibc_unlikely (! ok))
3083             {
3084               re_node_set_free (&union_set);
3085               return REG_ESPACE;
3086             }
3087         }
3088     }
3089   re_node_set_free (&union_set);
3090   return REG_NOERROR;
3091 }
3092
3093 /* For all the nodes in CUR_NODES, add the epsilon closures of them to
3094    CUR_NODES, however exclude the nodes which are:
3095     - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
3096     - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
3097 */
3098
3099 static reg_errcode_t
3100 check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,
3101                           Idx ex_subexp, int type)
3102 {
3103   reg_errcode_t err;
3104   Idx idx, outside_node;
3105   re_node_set new_nodes;
3106 #ifdef DEBUG
3107   assert (cur_nodes->nelem);
3108 #endif
3109   err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
3110   if (__glibc_unlikely (err != REG_NOERROR))
3111     return err;
3112   /* Create a new node set NEW_NODES with the nodes which are epsilon
3113      closures of the node in CUR_NODES.  */
3114
3115   for (idx = 0; idx < cur_nodes->nelem; ++idx)
3116     {
3117       Idx cur_node = cur_nodes->elems[idx];
3118       const re_node_set *eclosure = dfa->eclosures + cur_node;
3119       outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
3120       if (outside_node == -1)
3121         {
3122           /* There are no problematic nodes, just merge them.  */
3123           err = re_node_set_merge (&new_nodes, eclosure);
3124           if (__glibc_unlikely (err != REG_NOERROR))
3125             {
3126               re_node_set_free (&new_nodes);
3127               return err;
3128             }
3129         }
3130       else
3131         {
3132           /* There are problematic nodes, re-calculate incrementally.  */
3133           err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
3134                                               ex_subexp, type);
3135           if (__glibc_unlikely (err != REG_NOERROR))
3136             {
3137               re_node_set_free (&new_nodes);
3138               return err;
3139             }
3140         }
3141     }
3142   re_node_set_free (cur_nodes);
3143   *cur_nodes = new_nodes;
3144   return REG_NOERROR;
3145 }
3146
3147 /* Helper function for check_arrival_expand_ecl.
3148    Check incrementally the epsilon closure of TARGET, and if it isn't
3149    problematic append it to DST_NODES.  */
3150
3151 static reg_errcode_t
3152 __attribute_warn_unused_result__
3153 check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
3154                               Idx target, Idx ex_subexp, int type)
3155 {
3156   Idx cur_node;
3157   for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
3158     {
3159       bool ok;
3160
3161       if (dfa->nodes[cur_node].type == type
3162           && dfa->nodes[cur_node].opr.idx == ex_subexp)
3163         {
3164           if (type == OP_CLOSE_SUBEXP)
3165             {
3166               ok = re_node_set_insert (dst_nodes, cur_node);
3167               if (__glibc_unlikely (! ok))
3168                 return REG_ESPACE;
3169             }
3170           break;
3171         }
3172       ok = re_node_set_insert (dst_nodes, cur_node);
3173       if (__glibc_unlikely (! ok))
3174         return REG_ESPACE;
3175       if (dfa->edests[cur_node].nelem == 0)
3176         break;
3177       if (dfa->edests[cur_node].nelem == 2)
3178         {
3179           reg_errcode_t err;
3180           err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
3181                                               dfa->edests[cur_node].elems[1],
3182                                               ex_subexp, type);
3183           if (__glibc_unlikely (err != REG_NOERROR))
3184             return err;
3185         }
3186       cur_node = dfa->edests[cur_node].elems[0];
3187     }
3188   return REG_NOERROR;
3189 }
3190
3191
3192 /* For all the back references in the current state, calculate the
3193    destination of the back references by the appropriate entry
3194    in MCTX->BKREF_ENTS.  */
3195
3196 static reg_errcode_t
3197 __attribute_warn_unused_result__
3198 expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
3199                     Idx cur_str, Idx subexp_num, int type)
3200 {
3201   const re_dfa_t *const dfa = mctx->dfa;
3202   reg_errcode_t err;
3203   Idx cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
3204   struct re_backref_cache_entry *ent;
3205
3206   if (cache_idx_start == -1)
3207     return REG_NOERROR;
3208
3209  restart:
3210   ent = mctx->bkref_ents + cache_idx_start;
3211   do
3212     {
3213       Idx to_idx, next_node;
3214
3215       /* Is this entry ENT is appropriate?  */
3216       if (!re_node_set_contains (cur_nodes, ent->node))
3217         continue; /* No.  */
3218
3219       to_idx = cur_str + ent->subexp_to - ent->subexp_from;
3220       /* Calculate the destination of the back reference, and append it
3221          to MCTX->STATE_LOG.  */
3222       if (to_idx == cur_str)
3223         {
3224           /* The backreference did epsilon transit, we must re-check all the
3225              node in the current state.  */
3226           re_node_set new_dests;
3227           reg_errcode_t err2, err3;
3228           next_node = dfa->edests[ent->node].elems[0];
3229           if (re_node_set_contains (cur_nodes, next_node))
3230             continue;
3231           err = re_node_set_init_1 (&new_dests, next_node);
3232           err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
3233           err3 = re_node_set_merge (cur_nodes, &new_dests);
3234           re_node_set_free (&new_dests);
3235           if (__glibc_unlikely (err != REG_NOERROR || err2 != REG_NOERROR
3236                                 || err3 != REG_NOERROR))
3237             {
3238               err = (err != REG_NOERROR ? err
3239                      : (err2 != REG_NOERROR ? err2 : err3));
3240               return err;
3241             }
3242           /* TODO: It is still inefficient...  */
3243           goto restart;
3244         }
3245       else
3246         {
3247           re_node_set union_set;
3248           next_node = dfa->nexts[ent->node];
3249           if (mctx->state_log[to_idx])
3250             {
3251               bool ok;
3252               if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
3253                                         next_node))
3254                 continue;
3255               err = re_node_set_init_copy (&union_set,
3256                                            &mctx->state_log[to_idx]->nodes);
3257               ok = re_node_set_insert (&union_set, next_node);
3258               if (__glibc_unlikely (err != REG_NOERROR || ! ok))
3259                 {
3260                   re_node_set_free (&union_set);
3261                   err = err != REG_NOERROR ? err : REG_ESPACE;
3262                   return err;
3263                 }
3264             }
3265           else
3266             {
3267               err = re_node_set_init_1 (&union_set, next_node);
3268               if (__glibc_unlikely (err != REG_NOERROR))
3269                 return err;
3270             }
3271           mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
3272           re_node_set_free (&union_set);
3273           if (__glibc_unlikely (mctx->state_log[to_idx] == NULL
3274                                 && err != REG_NOERROR))
3275             return err;
3276         }
3277     }
3278   while (ent++->more);
3279   return REG_NOERROR;
3280 }
3281
3282 /* Build transition table for the state.
3283    Return true if successful.  */
3284
3285 static bool
3286 build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
3287 {
3288   reg_errcode_t err;
3289   Idx i, j;
3290   int ch;
3291   bool need_word_trtable = false;
3292   bitset_word_t elem, mask;
3293   bool dests_node_malloced = false;
3294   bool dest_states_malloced = false;
3295   Idx ndests; /* Number of the destination states from 'state'.  */
3296   re_dfastate_t **trtable;
3297   re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
3298   re_node_set follows, *dests_node;
3299   bitset_t *dests_ch;
3300   bitset_t acceptable;
3301
3302   struct dests_alloc
3303   {
3304     re_node_set dests_node[SBC_MAX];
3305     bitset_t dests_ch[SBC_MAX];
3306   } *dests_alloc;
3307
3308   /* We build DFA states which corresponds to the destination nodes
3309      from 'state'.  'dests_node[i]' represents the nodes which i-th
3310      destination state contains, and 'dests_ch[i]' represents the
3311      characters which i-th destination state accepts.  */
3312   if (__libc_use_alloca (sizeof (struct dests_alloc)))
3313     dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc));
3314   else
3315     {
3316       dests_alloc = re_malloc (struct dests_alloc, 1);
3317       if (__glibc_unlikely (dests_alloc == NULL))
3318         return false;
3319       dests_node_malloced = true;
3320     }
3321   dests_node = dests_alloc->dests_node;
3322   dests_ch = dests_alloc->dests_ch;
3323
3324   /* Initialize transition table.  */
3325   state->word_trtable = state->trtable = NULL;
3326
3327   /* At first, group all nodes belonging to 'state' into several
3328      destinations.  */
3329   ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
3330   if (__glibc_unlikely (ndests <= 0))
3331     {
3332       if (dests_node_malloced)
3333         re_free (dests_alloc);
3334       /* Return false in case of an error, true otherwise.  */
3335       if (ndests == 0)
3336         {
3337           state->trtable = (re_dfastate_t **)
3338             calloc (sizeof (re_dfastate_t *), SBC_MAX);
3339           if (__glibc_unlikely (state->trtable == NULL))
3340             return false;
3341           return true;
3342         }
3343       return false;
3344     }
3345
3346   err = re_node_set_alloc (&follows, ndests + 1);
3347   if (__glibc_unlikely (err != REG_NOERROR))
3348     goto out_free;
3349
3350   /* Avoid arithmetic overflow in size calculation.  */
3351   size_t ndests_max
3352     = ((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX)
3353        / (3 * sizeof (re_dfastate_t *)));
3354   if (__glibc_unlikely (ndests_max < ndests))
3355     goto out_free;
3356
3357   if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
3358                          + ndests * 3 * sizeof (re_dfastate_t *)))
3359     dest_states = (re_dfastate_t **)
3360       alloca (ndests * 3 * sizeof (re_dfastate_t *));
3361   else
3362     {
3363       dest_states = re_malloc (re_dfastate_t *, ndests * 3);
3364       if (__glibc_unlikely (dest_states == NULL))
3365         {
3366 out_free:
3367           if (dest_states_malloced)
3368             re_free (dest_states);
3369           re_node_set_free (&follows);
3370           for (i = 0; i < ndests; ++i)
3371             re_node_set_free (dests_node + i);
3372           if (dests_node_malloced)
3373             re_free (dests_alloc);
3374           return false;
3375         }
3376       dest_states_malloced = true;
3377     }
3378   dest_states_word = dest_states + ndests;
3379   dest_states_nl = dest_states_word + ndests;
3380   bitset_empty (acceptable);
3381
3382   /* Then build the states for all destinations.  */
3383   for (i = 0; i < ndests; ++i)
3384     {
3385       Idx next_node;
3386       re_node_set_empty (&follows);
3387       /* Merge the follows of this destination states.  */
3388       for (j = 0; j < dests_node[i].nelem; ++j)
3389         {
3390           next_node = dfa->nexts[dests_node[i].elems[j]];
3391           if (next_node != -1)
3392             {
3393               err = re_node_set_merge (&follows, dfa->eclosures + next_node);
3394               if (__glibc_unlikely (err != REG_NOERROR))
3395                 goto out_free;
3396             }
3397         }
3398       dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
3399       if (__glibc_unlikely (dest_states[i] == NULL && err != REG_NOERROR))
3400         goto out_free;
3401       /* If the new state has context constraint,
3402          build appropriate states for these contexts.  */
3403       if (dest_states[i]->has_constraint)
3404         {
3405           dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
3406                                                           CONTEXT_WORD);
3407           if (__glibc_unlikely (dest_states_word[i] == NULL
3408                                 && err != REG_NOERROR))
3409             goto out_free;
3410
3411           if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
3412             need_word_trtable = true;
3413
3414           dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
3415                                                         CONTEXT_NEWLINE);
3416           if (__glibc_unlikely (dest_states_nl[i] == NULL && err != REG_NOERROR))
3417             goto out_free;
3418         }
3419       else
3420         {
3421           dest_states_word[i] = dest_states[i];
3422           dest_states_nl[i] = dest_states[i];
3423         }
3424       bitset_merge (acceptable, dests_ch[i]);
3425     }
3426
3427   if (!__glibc_unlikely (need_word_trtable))
3428     {
3429       /* We don't care about whether the following character is a word
3430          character, or we are in a single-byte character set so we can
3431          discern by looking at the character code: allocate a
3432          256-entry transition table.  */
3433       trtable = state->trtable =
3434         (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
3435       if (__glibc_unlikely (trtable == NULL))
3436         goto out_free;
3437
3438       /* For all characters ch...:  */
3439       for (i = 0; i < BITSET_WORDS; ++i)
3440         for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3441              elem;
3442              mask <<= 1, elem >>= 1, ++ch)
3443           if (__glibc_unlikely (elem & 1))
3444             {
3445               /* There must be exactly one destination which accepts
3446                  character ch.  See group_nodes_into_DFAstates.  */
3447               for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3448                 ;
3449
3450               /* j-th destination accepts the word character ch.  */
3451               if (dfa->word_char[i] & mask)
3452                 trtable[ch] = dest_states_word[j];
3453               else
3454                 trtable[ch] = dest_states[j];
3455             }
3456     }
3457   else
3458     {
3459       /* We care about whether the following character is a word
3460          character, and we are in a multi-byte character set: discern
3461          by looking at the character code: build two 256-entry
3462          transition tables, one starting at trtable[0] and one
3463          starting at trtable[SBC_MAX].  */
3464       trtable = state->word_trtable =
3465         (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX);
3466       if (__glibc_unlikely (trtable == NULL))
3467         goto out_free;
3468
3469       /* For all characters ch...:  */
3470       for (i = 0; i < BITSET_WORDS; ++i)
3471         for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3472              elem;
3473              mask <<= 1, elem >>= 1, ++ch)
3474           if (__glibc_unlikely (elem & 1))
3475             {
3476               /* There must be exactly one destination which accepts
3477                  character ch.  See group_nodes_into_DFAstates.  */
3478               for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3479                 ;
3480
3481               /* j-th destination accepts the word character ch.  */
3482               trtable[ch] = dest_states[j];
3483               trtable[ch + SBC_MAX] = dest_states_word[j];
3484             }
3485     }
3486
3487   /* new line */
3488   if (bitset_contain (acceptable, NEWLINE_CHAR))
3489     {
3490       /* The current state accepts newline character.  */
3491       for (j = 0; j < ndests; ++j)
3492         if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
3493           {
3494             /* k-th destination accepts newline character.  */
3495             trtable[NEWLINE_CHAR] = dest_states_nl[j];
3496             if (need_word_trtable)
3497               trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
3498             /* There must be only one destination which accepts
3499                newline.  See group_nodes_into_DFAstates.  */
3500             break;
3501           }
3502     }
3503
3504   if (dest_states_malloced)
3505     re_free (dest_states);
3506
3507   re_node_set_free (&follows);
3508   for (i = 0; i < ndests; ++i)
3509     re_node_set_free (dests_node + i);
3510
3511   if (dests_node_malloced)
3512     re_free (dests_alloc);
3513
3514   return true;
3515 }
3516
3517 /* Group all nodes belonging to STATE into several destinations.
3518    Then for all destinations, set the nodes belonging to the destination
3519    to DESTS_NODE[i] and set the characters accepted by the destination
3520    to DEST_CH[i].  This function return the number of destinations.  */
3521
3522 static Idx
3523 group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
3524                             re_node_set *dests_node, bitset_t *dests_ch)
3525 {
3526   reg_errcode_t err;
3527   bool ok;
3528   Idx i, j, k;
3529   Idx ndests; /* Number of the destinations from 'state'.  */
3530   bitset_t accepts; /* Characters a node can accept.  */
3531   const re_node_set *cur_nodes = &state->nodes;
3532   bitset_empty (accepts);
3533   ndests = 0;
3534
3535   /* For all the nodes belonging to 'state',  */
3536   for (i = 0; i < cur_nodes->nelem; ++i)
3537     {
3538       re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
3539       re_token_type_t type = node->type;
3540       unsigned int constraint = node->constraint;
3541
3542       /* Enumerate all single byte character this node can accept.  */
3543       if (type == CHARACTER)
3544         bitset_set (accepts, node->opr.c);
3545       else if (type == SIMPLE_BRACKET)
3546         {
3547           bitset_merge (accepts, node->opr.sbcset);
3548         }
3549       else if (type == OP_PERIOD)
3550         {
3551 #ifdef RE_ENABLE_I18N
3552           if (dfa->mb_cur_max > 1)
3553             bitset_merge (accepts, dfa->sb_char);
3554           else
3555 #endif
3556             bitset_set_all (accepts);
3557           if (!(dfa->syntax & RE_DOT_NEWLINE))
3558             bitset_clear (accepts, '\n');
3559           if (dfa->syntax & RE_DOT_NOT_NULL)
3560             bitset_clear (accepts, '\0');
3561         }
3562 #ifdef RE_ENABLE_I18N
3563       else if (type == OP_UTF8_PERIOD)
3564         {
3565           if (ASCII_CHARS % BITSET_WORD_BITS == 0)
3566             memset (accepts, -1, ASCII_CHARS / CHAR_BIT);
3567           else
3568             bitset_merge (accepts, utf8_sb_map);
3569           if (!(dfa->syntax & RE_DOT_NEWLINE))
3570             bitset_clear (accepts, '\n');
3571           if (dfa->syntax & RE_DOT_NOT_NULL)
3572             bitset_clear (accepts, '\0');
3573         }
3574 #endif
3575       else
3576         continue;
3577
3578       /* Check the 'accepts' and sift the characters which are not
3579          match it the context.  */
3580       if (constraint)
3581         {
3582           if (constraint & NEXT_NEWLINE_CONSTRAINT)
3583             {
3584               bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
3585               bitset_empty (accepts);
3586               if (accepts_newline)
3587                 bitset_set (accepts, NEWLINE_CHAR);
3588               else
3589                 continue;
3590             }
3591           if (constraint & NEXT_ENDBUF_CONSTRAINT)
3592             {
3593               bitset_empty (accepts);
3594               continue;
3595             }
3596
3597           if (constraint & NEXT_WORD_CONSTRAINT)
3598             {
3599               bitset_word_t any_set = 0;
3600               if (type == CHARACTER && !node->word_char)
3601                 {
3602                   bitset_empty (accepts);
3603                   continue;
3604                 }
3605 #ifdef RE_ENABLE_I18N
3606               if (dfa->mb_cur_max > 1)
3607                 for (j = 0; j < BITSET_WORDS; ++j)
3608                   any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
3609               else
3610 #endif
3611                 for (j = 0; j < BITSET_WORDS; ++j)
3612                   any_set |= (accepts[j] &= dfa->word_char[j]);
3613               if (!any_set)
3614                 continue;
3615             }
3616           if (constraint & NEXT_NOTWORD_CONSTRAINT)
3617             {
3618               bitset_word_t any_set = 0;
3619               if (type == CHARACTER && node->word_char)
3620                 {
3621                   bitset_empty (accepts);
3622                   continue;
3623                 }
3624 #ifdef RE_ENABLE_I18N
3625               if (dfa->mb_cur_max > 1)
3626                 for (j = 0; j < BITSET_WORDS; ++j)
3627                   any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
3628               else
3629 #endif
3630                 for (j = 0; j < BITSET_WORDS; ++j)
3631                   any_set |= (accepts[j] &= ~dfa->word_char[j]);
3632               if (!any_set)
3633                 continue;
3634             }
3635         }
3636
3637       /* Then divide 'accepts' into DFA states, or create a new
3638          state.  Above, we make sure that accepts is not empty.  */
3639       for (j = 0; j < ndests; ++j)
3640         {
3641           bitset_t intersec; /* Intersection sets, see below.  */
3642           bitset_t remains;
3643           /* Flags, see below.  */
3644           bitset_word_t has_intersec, not_subset, not_consumed;
3645
3646           /* Optimization, skip if this state doesn't accept the character.  */
3647           if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
3648             continue;
3649
3650           /* Enumerate the intersection set of this state and 'accepts'.  */
3651           has_intersec = 0;
3652           for (k = 0; k < BITSET_WORDS; ++k)
3653             has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
3654           /* And skip if the intersection set is empty.  */
3655           if (!has_intersec)
3656             continue;
3657
3658           /* Then check if this state is a subset of 'accepts'.  */
3659           not_subset = not_consumed = 0;
3660           for (k = 0; k < BITSET_WORDS; ++k)
3661             {
3662               not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
3663               not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
3664             }
3665
3666           /* If this state isn't a subset of 'accepts', create a
3667              new group state, which has the 'remains'. */
3668           if (not_subset)
3669             {
3670               bitset_copy (dests_ch[ndests], remains);
3671               bitset_copy (dests_ch[j], intersec);
3672               err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
3673               if (__glibc_unlikely (err != REG_NOERROR))
3674                 goto error_return;
3675               ++ndests;
3676             }
3677
3678           /* Put the position in the current group. */
3679           ok = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
3680           if (__glibc_unlikely (! ok))
3681             goto error_return;
3682
3683           /* If all characters are consumed, go to next node. */
3684           if (!not_consumed)
3685             break;
3686         }
3687       /* Some characters remain, create a new group. */
3688       if (j == ndests)
3689         {
3690           bitset_copy (dests_ch[ndests], accepts);
3691           err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
3692           if (__glibc_unlikely (err != REG_NOERROR))
3693             goto error_return;
3694           ++ndests;
3695           bitset_empty (accepts);
3696         }
3697     }
3698   return ndests;
3699  error_return:
3700   for (j = 0; j < ndests; ++j)
3701     re_node_set_free (dests_node + j);
3702   return -1;
3703 }
3704
3705 #ifdef RE_ENABLE_I18N
3706 /* Check how many bytes the node 'dfa->nodes[node_idx]' accepts.
3707    Return the number of the bytes the node accepts.
3708    STR_IDX is the current index of the input string.
3709
3710    This function handles the nodes which can accept one character, or
3711    one collating element like '.', '[a-z]', opposite to the other nodes
3712    can only accept one byte.  */
3713
3714 # ifdef _LIBC
3715 #  include <locale/weight.h>
3716 # endif
3717
3718 static int
3719 check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
3720                          const re_string_t *input, Idx str_idx)
3721 {
3722   const re_token_t *node = dfa->nodes + node_idx;
3723   int char_len, elem_len;
3724   Idx i;
3725
3726   if (__glibc_unlikely (node->type == OP_UTF8_PERIOD))
3727     {
3728       unsigned char c = re_string_byte_at (input, str_idx), d;
3729       if (__glibc_likely (c < 0xc2))
3730         return 0;
3731
3732       if (str_idx + 2 > input->len)
3733         return 0;
3734
3735       d = re_string_byte_at (input, str_idx + 1);
3736       if (c < 0xe0)
3737         return (d < 0x80 || d > 0xbf) ? 0 : 2;
3738       else if (c < 0xf0)
3739         {
3740           char_len = 3;
3741           if (c == 0xe0 && d < 0xa0)
3742             return 0;
3743         }
3744       else if (c < 0xf8)
3745         {
3746           char_len = 4;
3747           if (c == 0xf0 && d < 0x90)
3748             return 0;
3749         }
3750       else if (c < 0xfc)
3751         {
3752           char_len = 5;
3753           if (c == 0xf8 && d < 0x88)
3754             return 0;
3755         }
3756       else if (c < 0xfe)
3757         {
3758           char_len = 6;
3759           if (c == 0xfc && d < 0x84)
3760             return 0;
3761         }
3762       else
3763         return 0;
3764
3765       if (str_idx + char_len > input->len)
3766         return 0;
3767
3768       for (i = 1; i < char_len; ++i)
3769         {
3770           d = re_string_byte_at (input, str_idx + i);
3771           if (d < 0x80 || d > 0xbf)
3772             return 0;
3773         }
3774       return char_len;
3775     }
3776
3777   char_len = re_string_char_size_at (input, str_idx);
3778   if (node->type == OP_PERIOD)
3779     {
3780       if (char_len <= 1)
3781         return 0;
3782       /* FIXME: I don't think this if is needed, as both '\n'
3783          and '\0' are char_len == 1.  */
3784       /* '.' accepts any one character except the following two cases.  */
3785       if ((!(dfa->syntax & RE_DOT_NEWLINE)
3786            && re_string_byte_at (input, str_idx) == '\n')
3787           || ((dfa->syntax & RE_DOT_NOT_NULL)
3788               && re_string_byte_at (input, str_idx) == '\0'))
3789         return 0;
3790       return char_len;
3791     }
3792
3793   elem_len = re_string_elem_size_at (input, str_idx);
3794   if ((elem_len <= 1 && char_len <= 1) || char_len == 0)
3795     return 0;
3796
3797   if (node->type == COMPLEX_BRACKET)
3798     {
3799       const re_charset_t *cset = node->opr.mbcset;
3800 # ifdef _LIBC
3801       const unsigned char *pin
3802         = ((const unsigned char *) re_string_get_buffer (input) + str_idx);
3803       Idx j;
3804       uint32_t nrules;
3805 # endif /* _LIBC */
3806       int match_len = 0;
3807       wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
3808                     ? re_string_wchar_at (input, str_idx) : 0);
3809
3810       /* match with multibyte character?  */
3811       for (i = 0; i < cset->nmbchars; ++i)
3812         if (wc == cset->mbchars[i])
3813           {
3814             match_len = char_len;
3815             goto check_node_accept_bytes_match;
3816           }
3817       /* match with character_class?  */
3818       for (i = 0; i < cset->nchar_classes; ++i)
3819         {
3820           wctype_t wt = cset->char_classes[i];
3821           if (__iswctype (wc, wt))
3822             {
3823               match_len = char_len;
3824               goto check_node_accept_bytes_match;
3825             }
3826         }
3827
3828 # ifdef _LIBC
3829       nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3830       if (nrules != 0)
3831         {
3832           unsigned int in_collseq = 0;
3833           const int32_t *table, *indirect;
3834           const unsigned char *weights, *extra;
3835           const char *collseqwc;
3836
3837           /* match with collating_symbol?  */
3838           if (cset->ncoll_syms)
3839             extra = (const unsigned char *)
3840               _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3841           for (i = 0; i < cset->ncoll_syms; ++i)
3842             {
3843               const unsigned char *coll_sym = extra + cset->coll_syms[i];
3844               /* Compare the length of input collating element and
3845                  the length of current collating element.  */
3846               if (*coll_sym != elem_len)
3847                 continue;
3848               /* Compare each bytes.  */
3849               for (j = 0; j < *coll_sym; j++)
3850                 if (pin[j] != coll_sym[1 + j])
3851                   break;
3852               if (j == *coll_sym)
3853                 {
3854                   /* Match if every bytes is equal.  */
3855                   match_len = j;
3856                   goto check_node_accept_bytes_match;
3857                 }
3858             }
3859
3860           if (cset->nranges)
3861             {
3862               if (elem_len <= char_len)
3863                 {
3864                   collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3865                   in_collseq = __collseq_table_lookup (collseqwc, wc);
3866                 }
3867               else
3868                 in_collseq = find_collation_sequence_value (pin, elem_len);
3869             }
3870           /* match with range expression?  */
3871           /* FIXME: Implement rational ranges here, too.  */
3872           for (i = 0; i < cset->nranges; ++i)
3873             if (cset->range_starts[i] <= in_collseq
3874                 && in_collseq <= cset->range_ends[i])
3875               {
3876                 match_len = elem_len;
3877                 goto check_node_accept_bytes_match;
3878               }
3879
3880           /* match with equivalence_class?  */
3881           if (cset->nequiv_classes)
3882             {
3883               const unsigned char *cp = pin;
3884               table = (const int32_t *)
3885                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3886               weights = (const unsigned char *)
3887                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3888               extra = (const unsigned char *)
3889                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3890               indirect = (const int32_t *)
3891                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3892               int32_t idx = findidx (table, indirect, extra, &cp, elem_len);
3893               int32_t rule = idx >> 24;
3894               idx &= 0xffffff;
3895               if (idx > 0)
3896                 {
3897                   size_t weight_len = weights[idx];
3898                   for (i = 0; i < cset->nequiv_classes; ++i)
3899                     {
3900                       int32_t equiv_class_idx = cset->equiv_classes[i];
3901                       int32_t equiv_class_rule = equiv_class_idx >> 24;
3902                       equiv_class_idx &= 0xffffff;
3903                       if (weights[equiv_class_idx] == weight_len
3904                           && equiv_class_rule == rule
3905                           && memcmp (weights + idx + 1,
3906                                      weights + equiv_class_idx + 1,
3907                                      weight_len) == 0)
3908                         {
3909                           match_len = elem_len;
3910                           goto check_node_accept_bytes_match;
3911                         }
3912                     }
3913                 }
3914             }
3915         }
3916       else
3917 # endif /* _LIBC */
3918         {
3919           /* match with range expression?  */
3920           for (i = 0; i < cset->nranges; ++i)
3921             {
3922               if (cset->range_starts[i] <= wc && wc <= cset->range_ends[i])
3923                 {
3924                   match_len = char_len;
3925                   goto check_node_accept_bytes_match;
3926                 }
3927             }
3928         }
3929     check_node_accept_bytes_match:
3930       if (!cset->non_match)
3931         return match_len;
3932       else
3933         {
3934           if (match_len > 0)
3935             return 0;
3936           else
3937             return (elem_len > char_len) ? elem_len : char_len;
3938         }
3939     }
3940   return 0;
3941 }
3942
3943 # ifdef _LIBC
3944 static unsigned int
3945 find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
3946 {
3947   uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3948   if (nrules == 0)
3949     {
3950       if (mbs_len == 1)
3951         {
3952           /* No valid character.  Match it as a single byte character.  */
3953           const unsigned char *collseq = (const unsigned char *)
3954             _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
3955           return collseq[mbs[0]];
3956         }
3957       return UINT_MAX;
3958     }
3959   else
3960     {
3961       int32_t idx;
3962       const unsigned char *extra = (const unsigned char *)
3963         _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3964       int32_t extrasize = (const unsigned char *)
3965         _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
3966
3967       for (idx = 0; idx < extrasize;)
3968         {
3969           int mbs_cnt;
3970           bool found = false;
3971           int32_t elem_mbs_len;
3972           /* Skip the name of collating element name.  */
3973           idx = idx + extra[idx] + 1;
3974           elem_mbs_len = extra[idx++];
3975           if (mbs_len == elem_mbs_len)
3976             {
3977               for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
3978                 if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
3979                   break;
3980               if (mbs_cnt == elem_mbs_len)
3981                 /* Found the entry.  */
3982                 found = true;
3983             }
3984           /* Skip the byte sequence of the collating element.  */
3985           idx += elem_mbs_len;
3986           /* Adjust for the alignment.  */
3987           idx = (idx + 3) & ~3;
3988           /* Skip the collation sequence value.  */
3989           idx += sizeof (uint32_t);
3990           /* Skip the wide char sequence of the collating element.  */
3991           idx = idx + sizeof (uint32_t) * (*(int32_t *) (extra + idx) + 1);
3992           /* If we found the entry, return the sequence value.  */
3993           if (found)
3994             return *(uint32_t *) (extra + idx);
3995           /* Skip the collation sequence value.  */
3996           idx += sizeof (uint32_t);
3997         }
3998       return UINT_MAX;
3999     }
4000 }
4001 # endif /* _LIBC */
4002 #endif /* RE_ENABLE_I18N */
4003
4004 /* Check whether the node accepts the byte which is IDX-th
4005    byte of the INPUT.  */
4006
4007 static bool
4008 check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
4009                    Idx idx)
4010 {
4011   unsigned char ch;
4012   ch = re_string_byte_at (&mctx->input, idx);
4013   switch (node->type)
4014     {
4015     case CHARACTER:
4016       if (node->opr.c != ch)
4017         return false;
4018       break;
4019
4020     case SIMPLE_BRACKET:
4021       if (!bitset_contain (node->opr.sbcset, ch))
4022         return false;
4023       break;
4024
4025 #ifdef RE_ENABLE_I18N
4026     case OP_UTF8_PERIOD:
4027       if (ch >= ASCII_CHARS)
4028         return false;
4029       FALLTHROUGH;
4030 #endif
4031     case OP_PERIOD:
4032       if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
4033           || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
4034         return false;
4035       break;
4036
4037     default:
4038       return false;
4039     }
4040
4041   if (node->constraint)
4042     {
4043       /* The node has constraints.  Check whether the current context
4044          satisfies the constraints.  */
4045       unsigned int context = re_string_context_at (&mctx->input, idx,
4046                                                    mctx->eflags);
4047       if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
4048         return false;
4049     }
4050
4051   return true;
4052 }
4053
4054 /* Extend the buffers, if the buffers have run out.  */
4055
4056 static reg_errcode_t
4057 __attribute_warn_unused_result__
4058 extend_buffers (re_match_context_t *mctx, int min_len)
4059 {
4060   reg_errcode_t ret;
4061   re_string_t *pstr = &mctx->input;
4062
4063   /* Avoid overflow.  */
4064   if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) / 2
4065                         <= pstr->bufs_len))
4066     return REG_ESPACE;
4067
4068   /* Double the lengths of the buffers, but allocate at least MIN_LEN.  */
4069   ret = re_string_realloc_buffers (pstr,
4070                                    MAX (min_len,
4071                                         MIN (pstr->len, pstr->bufs_len * 2)));
4072   if (__glibc_unlikely (ret != REG_NOERROR))
4073     return ret;
4074
4075   if (mctx->state_log != NULL)
4076     {
4077       /* And double the length of state_log.  */
4078       /* XXX We have no indication of the size of this buffer.  If this
4079          allocation fail we have no indication that the state_log array
4080          does not have the right size.  */
4081       re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
4082                                               pstr->bufs_len + 1);
4083       if (__glibc_unlikely (new_array == NULL))
4084         return REG_ESPACE;
4085       mctx->state_log = new_array;
4086     }
4087
4088   /* Then reconstruct the buffers.  */
4089   if (pstr->icase)
4090     {
4091 #ifdef RE_ENABLE_I18N
4092       if (pstr->mb_cur_max > 1)
4093         {
4094           ret = build_wcs_upper_buffer (pstr);
4095           if (__glibc_unlikely (ret != REG_NOERROR))
4096             return ret;
4097         }
4098       else
4099 #endif /* RE_ENABLE_I18N  */
4100         build_upper_buffer (pstr);
4101     }
4102   else
4103     {
4104 #ifdef RE_ENABLE_I18N
4105       if (pstr->mb_cur_max > 1)
4106         build_wcs_buffer (pstr);
4107       else
4108 #endif /* RE_ENABLE_I18N  */
4109         {
4110           if (pstr->trans != NULL)
4111             re_string_translate_buffer (pstr);
4112         }
4113     }
4114   return REG_NOERROR;
4115 }
4116
4117 \f
4118 /* Functions for matching context.  */
4119
4120 /* Initialize MCTX.  */
4121
4122 static reg_errcode_t
4123 __attribute_warn_unused_result__
4124 match_ctx_init (re_match_context_t *mctx, int eflags, Idx n)
4125 {
4126   mctx->eflags = eflags;
4127   mctx->match_last = -1;
4128   if (n > 0)
4129     {
4130       /* Avoid overflow.  */
4131       size_t max_object_size =
4132         MAX (sizeof (struct re_backref_cache_entry),
4133              sizeof (re_sub_match_top_t *));
4134       if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / max_object_size) < n))
4135         return REG_ESPACE;
4136
4137       mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
4138       mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
4139       if (__glibc_unlikely (mctx->bkref_ents == NULL || mctx->sub_tops == NULL))
4140         return REG_ESPACE;
4141     }
4142   /* Already zero-ed by the caller.
4143      else
4144        mctx->bkref_ents = NULL;
4145      mctx->nbkref_ents = 0;
4146      mctx->nsub_tops = 0;  */
4147   mctx->abkref_ents = n;
4148   mctx->max_mb_elem_len = 1;
4149   mctx->asub_tops = n;
4150   return REG_NOERROR;
4151 }
4152
4153 /* Clean the entries which depend on the current input in MCTX.
4154    This function must be invoked when the matcher changes the start index
4155    of the input, or changes the input string.  */
4156
4157 static void
4158 match_ctx_clean (re_match_context_t *mctx)
4159 {
4160   Idx st_idx;
4161   for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
4162     {
4163       Idx sl_idx;
4164       re_sub_match_top_t *top = mctx->sub_tops[st_idx];
4165       for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
4166         {
4167           re_sub_match_last_t *last = top->lasts[sl_idx];
4168           re_free (last->path.array);
4169           re_free (last);
4170         }
4171       re_free (top->lasts);
4172       if (top->path)
4173         {
4174           re_free (top->path->array);
4175           re_free (top->path);
4176         }
4177       re_free (top);
4178     }
4179
4180   mctx->nsub_tops = 0;
4181   mctx->nbkref_ents = 0;
4182 }
4183
4184 /* Free all the memory associated with MCTX.  */
4185
4186 static void
4187 match_ctx_free (re_match_context_t *mctx)
4188 {
4189   /* First, free all the memory associated with MCTX->SUB_TOPS.  */
4190   match_ctx_clean (mctx);
4191   re_free (mctx->sub_tops);
4192   re_free (mctx->bkref_ents);
4193 }
4194
4195 /* Add a new backreference entry to MCTX.
4196    Note that we assume that caller never call this function with duplicate
4197    entry, and call with STR_IDX which isn't smaller than any existing entry.
4198 */
4199
4200 static reg_errcode_t
4201 __attribute_warn_unused_result__
4202 match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, Idx from,
4203                      Idx to)
4204 {
4205   if (mctx->nbkref_ents >= mctx->abkref_ents)
4206     {
4207       struct re_backref_cache_entry* new_entry;
4208       new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
4209                               mctx->abkref_ents * 2);
4210       if (__glibc_unlikely (new_entry == NULL))
4211         {
4212           re_free (mctx->bkref_ents);
4213           return REG_ESPACE;
4214         }
4215       mctx->bkref_ents = new_entry;
4216       memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
4217               sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
4218       mctx->abkref_ents *= 2;
4219     }
4220   if (mctx->nbkref_ents > 0
4221       && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
4222     mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
4223
4224   mctx->bkref_ents[mctx->nbkref_ents].node = node;
4225   mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
4226   mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
4227   mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
4228
4229   /* This is a cache that saves negative results of check_dst_limits_calc_pos.
4230      If bit N is clear, means that this entry won't epsilon-transition to
4231      an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression.  If
4232      it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
4233      such node.
4234
4235      A backreference does not epsilon-transition unless it is empty, so set
4236      to all zeros if FROM != TO.  */
4237   mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
4238     = (from == to ? -1 : 0);
4239
4240   mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
4241   if (mctx->max_mb_elem_len < to - from)
4242     mctx->max_mb_elem_len = to - from;
4243   return REG_NOERROR;
4244 }
4245
4246 /* Return the first entry with the same str_idx, or -1 if none is
4247    found.  Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX.  */
4248
4249 static Idx
4250 search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
4251 {
4252   Idx left, right, mid, last;
4253   last = right = mctx->nbkref_ents;
4254   for (left = 0; left < right;)
4255     {
4256       mid = (left + right) / 2;
4257       if (mctx->bkref_ents[mid].str_idx < str_idx)
4258         left = mid + 1;
4259       else
4260         right = mid;
4261     }
4262   if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
4263     return left;
4264   else
4265     return -1;
4266 }
4267
4268 /* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
4269    at STR_IDX.  */
4270
4271 static reg_errcode_t
4272 __attribute_warn_unused_result__
4273 match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx)
4274 {
4275 #ifdef DEBUG
4276   assert (mctx->sub_tops != NULL);
4277   assert (mctx->asub_tops > 0);
4278 #endif
4279   if (__glibc_unlikely (mctx->nsub_tops == mctx->asub_tops))
4280     {
4281       Idx new_asub_tops = mctx->asub_tops * 2;
4282       re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
4283                                                    re_sub_match_top_t *,
4284                                                    new_asub_tops);
4285       if (__glibc_unlikely (new_array == NULL))
4286         return REG_ESPACE;
4287       mctx->sub_tops = new_array;
4288       mctx->asub_tops = new_asub_tops;
4289     }
4290   mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
4291   if (__glibc_unlikely (mctx->sub_tops[mctx->nsub_tops] == NULL))
4292     return REG_ESPACE;
4293   mctx->sub_tops[mctx->nsub_tops]->node = node;
4294   mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
4295   return REG_NOERROR;
4296 }
4297
4298 /* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
4299    at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP.  */
4300
4301 static re_sub_match_last_t *
4302 match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx)
4303 {
4304   re_sub_match_last_t *new_entry;
4305   if (__glibc_unlikely (subtop->nlasts == subtop->alasts))
4306     {
4307       Idx new_alasts = 2 * subtop->alasts + 1;
4308       re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
4309                                                     re_sub_match_last_t *,
4310                                                     new_alasts);
4311       if (__glibc_unlikely (new_array == NULL))
4312         return NULL;
4313       subtop->lasts = new_array;
4314       subtop->alasts = new_alasts;
4315     }
4316   new_entry = calloc (1, sizeof (re_sub_match_last_t));
4317   if (__glibc_likely (new_entry != NULL))
4318     {
4319       subtop->lasts[subtop->nlasts] = new_entry;
4320       new_entry->node = node;
4321       new_entry->str_idx = str_idx;
4322       ++subtop->nlasts;
4323     }
4324   return new_entry;
4325 }
4326
4327 static void
4328 sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
4329                re_dfastate_t **limited_sts, Idx last_node, Idx last_str_idx)
4330 {
4331   sctx->sifted_states = sifted_sts;
4332   sctx->limited_states = limited_sts;
4333   sctx->last_node = last_node;
4334   sctx->last_str_idx = last_str_idx;
4335   re_node_set_init_empty (&sctx->limits);
4336 }