posix/regexec.c

   1 /* Extended regular expression matching and search library.
   2    Copyright (C) 2002, 2003 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4    Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
   5
   6    The GNU C Library is free software; you can redistribute it and/or
   7    modify it under the terms of the GNU Lesser General Public
   8    License as published by the Free Software Foundation; either
   9    version 2.1 of the License, or (at your option) any later version.
  10
  11    The GNU C Library is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14    Lesser General Public License for more details.
  15
  16    You should have received a copy of the GNU Lesser General Public
  17    License along with the GNU C Library; if not, write to the Free
  18    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  19    02111-1307 USA.  */
  20
  21 static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
  22                                      re_string_t *input, int n);
  23 static void match_ctx_clean (re_match_context_t *mctx);
  24 static void match_ctx_free (re_match_context_t *cache);
  25 static void match_ctx_free_subtops (re_match_context_t *mctx);
  26 static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
  27                                           int str_idx, int from, int to);
  28 static int search_cur_bkref_entry (re_match_context_t *mctx, int str_idx);
  29 static void match_ctx_clear_flag (re_match_context_t *mctx);
  30 static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node,
  31                                            int str_idx);
  32 static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
  33                                                    int node, int str_idx);
  34 static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
  35                            re_dfastate_t **limited_sts, int last_node,
  36                            int last_str_idx, int check_subexp);
  37 static reg_errcode_t re_search_internal (const regex_t *preg,
  38                                          const char *string, int length,
  39                                          int start, int range, int stop,
  40                                          size_t nmatch, regmatch_t pmatch[],
  41                                          int eflags);
  42 static int re_search_2_stub (struct re_pattern_buffer *bufp,
  43                              const char *string1, int length1,
  44                              const char *string2, int length2,
  45                              int start, int range, struct re_registers *regs,
  46                              int stop, int ret_len);
  47 static int re_search_stub (struct re_pattern_buffer *bufp,
  48                            const char *string, int length, int start,
  49                            int range, int stop, struct re_registers *regs,
  50                            int ret_len);
  51 static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
  52                               int nregs, int regs_allocated);
  53 static re_dfastate_t *acquire_init_state_context (reg_errcode_t *err,
  54                                                   const regex_t *preg,
  55                                                   const re_match_context_t *mctx,
  56                                                   int idx);
  57 static reg_errcode_t prune_impossible_nodes (const regex_t *preg,
  58                                              re_match_context_t *mctx);
  59 static int check_matching (const regex_t *preg, re_match_context_t *mctx,
  60                            int fl_search, int fl_longest_match);
  61 static int check_halt_node_context (const re_dfa_t *dfa, int node,
  62                                     unsigned int context);
  63 static int check_halt_state_context (const regex_t *preg,
  64                                      const re_dfastate_t *state,
  65                                      const re_match_context_t *mctx, int idx);
  66 static void update_regs (re_dfa_t *dfa, regmatch_t *pmatch, int cur_node,
  67                          int cur_idx, int nmatch);
  68 static int proceed_next_node (const regex_t *preg, int nregs, regmatch_t *regs,
  69                               const re_match_context_t *mctx,
  70                               int *pidx, int node, re_node_set *eps_via_nodes,
  71                               struct re_fail_stack_t *fs);
  72 static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
  73                                       int str_idx, int *dests, int nregs,
  74                                       regmatch_t *regs,
  75                                       re_node_set *eps_via_nodes);
  76 static int pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs,
  77                            regmatch_t *regs, re_node_set *eps_via_nodes);
  78 static reg_errcode_t set_regs (const regex_t *preg,
  79                                const re_match_context_t *mctx,
  80                                size_t nmatch, regmatch_t *pmatch,
  81                                int fl_backtrack);
  82 static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs);
  83
  84 #ifdef RE_ENABLE_I18N
  85 static int sift_states_iter_mb (const regex_t *preg,
  86                                 const re_match_context_t *mctx,
  87                                 re_sift_context_t *sctx,
  88                                 int node_idx, int str_idx, int max_str_idx);
  89 #endif /* RE_ENABLE_I18N */
  90 static reg_errcode_t sift_states_backward (const regex_t *preg,
  91                                            re_match_context_t *mctx,
  92                                            re_sift_context_t *sctx);
  93 static reg_errcode_t update_cur_sifted_state (const regex_t *preg,
  94                                               re_match_context_t *mctx,
  95                                               re_sift_context_t *sctx,
  96                                               int str_idx,
  97                                               re_node_set *dest_nodes);
  98 static reg_errcode_t add_epsilon_src_nodes (re_dfa_t *dfa,
  99                                             re_node_set *dest_nodes,
 100                                             const re_node_set *candidates);
 101 static reg_errcode_t sub_epsilon_src_nodes (re_dfa_t *dfa, int node,
 102                                             re_node_set *dest_nodes,
 103                                             const re_node_set *and_nodes);
 104 static int check_dst_limits (re_dfa_t *dfa, re_node_set *limits,
 105                              re_match_context_t *mctx, int dst_node,
 106                              int dst_idx, int src_node, int src_idx);
 107 static int check_dst_limits_calc_pos (re_dfa_t *dfa, re_match_context_t *mctx,
 108                                       int limit, re_node_set *eclosures,
 109                                       int subexp_idx, int node, int str_idx);
 110 static reg_errcode_t check_subexp_limits (re_dfa_t *dfa,
 111                                           re_node_set *dest_nodes,
 112                                           const re_node_set *candidates,
 113                                           re_node_set *limits,
 114                                           struct re_backref_cache_entry *bkref_ents,
 115                                           int str_idx);
 116 static reg_errcode_t sift_states_bkref (const regex_t *preg,
 117                                         re_match_context_t *mctx,
 118                                         re_sift_context_t *sctx,
 119                                         int str_idx, re_node_set *dest_nodes);
 120 static reg_errcode_t clean_state_log_if_need (re_match_context_t *mctx,
 121                                               int next_state_log_idx);
 122 static reg_errcode_t merge_state_array (re_dfa_t *dfa, re_dfastate_t **dst,
 123                                         re_dfastate_t **src, int num);
 124 static re_dfastate_t *transit_state (reg_errcode_t *err, const regex_t *preg,
 125                                      re_match_context_t *mctx,
 126                                      re_dfastate_t *state, int fl_search);
 127 static reg_errcode_t check_subexp_matching_top (re_dfa_t *dfa,
 128                                                 re_match_context_t *mctx,
 129                                                 re_node_set *cur_nodes,
 130                                                 int str_idx);
 131 static re_dfastate_t *transit_state_sb (reg_errcode_t *err, const regex_t *preg,
 132                                         re_dfastate_t *pstate,
 133                                         int fl_search,
 134                                         re_match_context_t *mctx);
 135 #ifdef RE_ENABLE_I18N
 136 static reg_errcode_t transit_state_mb (const regex_t *preg,
 137                                        re_dfastate_t *pstate,
 138                                        re_match_context_t *mctx);
 139 #endif /* RE_ENABLE_I18N */
 140 static reg_errcode_t transit_state_bkref (const regex_t *preg,
 141                                           re_node_set *nodes,
 142                                           re_match_context_t *mctx);
 143 static reg_errcode_t get_subexp (const regex_t *preg, re_match_context_t *mctx,
 144                                  int bkref_node, int bkref_str_idx);
 145 static reg_errcode_t get_subexp_sub (const regex_t *preg,
 146                                      re_match_context_t *mctx,
 147                                      re_sub_match_top_t *sub_top,
 148                                      re_sub_match_last_t *sub_last,
 149                                      int bkref_node, int bkref_str);
 150 static int find_subexp_node (re_dfa_t *dfa, re_node_set *nodes,
 151                              int subexp_idx, int fl_open);
 152 static reg_errcode_t check_arrival (const regex_t *preg,
 153                                     re_match_context_t *mctx,
 154                                     state_array_t *path, int top_node,
 155                                     int top_str, int last_node, int last_str,
 156                                     int fl_open);
 157 static reg_errcode_t check_arrival_add_next_nodes (const regex_t *preg,
 158                                                    re_dfa_t *dfa,
 159                                                    re_match_context_t *mctx,
 160                                                    int str_idx,
 161                                                    re_node_set *cur_nodes,
 162                                                    re_node_set *next_nodes);
 163 static reg_errcode_t check_arrival_expand_ecl (re_dfa_t *dfa,
 164                                                re_node_set *cur_nodes,
 165                                                int ex_subexp, int fl_open);
 166 static reg_errcode_t check_arrival_expand_ecl_sub (re_dfa_t *dfa,
 167                                                    re_node_set *dst_nodes,
 168                                                    int target, int ex_subexp,
 169                                                    int fl_open);
 170 static reg_errcode_t expand_bkref_cache (const regex_t *preg,
 171                                          re_match_context_t *mctx,
 172                                          re_node_set *cur_nodes, int cur_str,
 173                                          int last_str, int subexp_num,
 174                                          int fl_open);
 175 static re_dfastate_t **build_trtable (const regex_t *dfa,
 176                                       const re_dfastate_t *state,
 177                                       int fl_search);
 178 #ifdef RE_ENABLE_I18N
 179 static int check_node_accept_bytes (const regex_t *preg, int node_idx,
 180                                     const re_string_t *input, int idx);
 181 # ifdef _LIBC
 182 static unsigned int find_collation_sequence_value (const unsigned char *mbs,
 183                                                    size_t name_len);
 184 # endif /* _LIBC */
 185 #endif /* RE_ENABLE_I18N */
 186 static int group_nodes_into_DFAstates (const regex_t *dfa,
 187                                        const re_dfastate_t *state,
 188                                        re_node_set *states_node,
 189                                        bitset *states_ch);
 190 static int check_node_accept (const regex_t *preg, const re_token_t *node,
 191                               const re_match_context_t *mctx, int idx);
 192 static reg_errcode_t extend_buffers (re_match_context_t *mctx);
 193 \f
 194 /* Entry point for POSIX code.  */
 195
 196 /* regexec searches for a given pattern, specified by PREG, in the
 197    string STRING.
 198
 199    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
 200    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
 201    least NMATCH elements, and we set them to the offsets of the
 202    corresponding matched substrings.
 203
 204    EFLAGS specifies `execution flags' which affect matching: if
 205    REG_NOTBOL is set, then ^ does not match at the beginning of the
 206    string; if REG_NOTEOL is set, then $ does not match at the end.
 207
 208    We return 0 if we find a match and REG_NOMATCH if not.  */
 209
 210 int
 211 regexec (preg, string, nmatch, pmatch, eflags)
 212     const regex_t *__restrict preg;
 213     const char *__restrict string;
 214     size_t nmatch;
 215     regmatch_t pmatch[];
 216     int eflags;
 217 {
 218   reg_errcode_t err;
 219   int length = strlen (string);
 220   if (preg->no_sub)
 221     err = re_search_internal (preg, string, length, 0, length, length, 0,
 222                               NULL, eflags);
 223   else
 224     err = re_search_internal (preg, string, length, 0, length, length, nmatch,
 225                               pmatch, eflags);
 226   return err != REG_NOERROR;
 227 }
 228 #ifdef _LIBC
 229 weak_alias (__regexec, regexec)
 230 #endif
 231
 232 /* Entry points for GNU code.  */
 233
 234 /* re_match, re_search, re_match_2, re_search_2
 235
 236    The former two functions operate on STRING with length LENGTH,
 237    while the later two operate on concatenation of STRING1 and STRING2
 238    with lengths LENGTH1 and LENGTH2, respectively.
 239
 240    re_match() matches the compiled pattern in BUFP against the string,
 241    starting at index START.
 242
 243    re_search() first tries matching at index START, then it tries to match
 244    starting from index START + 1, and so on.  The last start position tried
 245    is START + RANGE.  (Thus RANGE = 0 forces re_search to operate the same
 246    way as re_match().)
 247
 248    The parameter STOP of re_{match,search}_2 specifies that no match exceeding
 249    the first STOP characters of the concatenation of the strings should be
 250    concerned.
 251
 252    If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
 253    and all groups is stroed in REGS.  (For the "_2" variants, the offsets are
 254    computed relative to the concatenation, not relative to the individual
 255    strings.)
 256
 257    On success, re_match* functions return the length of the match, re_search*
 258    return the position of the start of the match.  Return value -1 means no
 259    match was found and -2 indicates an internal error.  */
 260
 261 int
 262 re_match (bufp, string, length, start, regs)
 263     struct re_pattern_buffer *bufp;
 264     const char *string;
 265     int length, start;
 266     struct re_registers *regs;
 267 {
 268   return re_search_stub (bufp, string, length, start, 0, length, regs, 1);
 269 }
 270 #ifdef _LIBC
 271 weak_alias (__re_match, re_match)
 272 #endif
 273
 274 int
 275 re_search (bufp, string, length, start, range, regs)
 276     struct re_pattern_buffer *bufp;
 277     const char *string;
 278     int length, start, range;
 279     struct re_registers *regs;
 280 {
 281   return re_search_stub (bufp, string, length, start, range, length, regs, 0);
 282 }
 283 #ifdef _LIBC
 284 weak_alias (__re_search, re_search)
 285 #endif
 286
 287 int
 288 re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)
 289     struct re_pattern_buffer *bufp;
 290     const char *string1, *string2;
 291     int length1, length2, start, stop;
 292     struct re_registers *regs;
 293 {
 294   return re_search_2_stub (bufp, string1, length1, string2, length2,
 295                            start, 0, regs, stop, 1);
 296 }
 297 #ifdef _LIBC
 298 weak_alias (__re_match_2, re_match_2)
 299 #endif
 300
 301 int
 302 re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)
 303     struct re_pattern_buffer *bufp;
 304     const char *string1, *string2;
 305     int length1, length2, start, range, stop;
 306     struct re_registers *regs;
 307 {
 308   return re_search_2_stub (bufp, string1, length1, string2, length2,
 309                            start, range, regs, stop, 0);
 310 }
 311 #ifdef _LIBC
 312 weak_alias (__re_search_2, re_search_2)
 313 #endif
 314
 315 static int
 316 re_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs,
 317                   stop, ret_len)
 318     struct re_pattern_buffer *bufp;
 319     const char *string1, *string2;
 320     int length1, length2, start, range, stop, ret_len;
 321     struct re_registers *regs;
 322 {
 323   const char *str;
 324   int rval;
 325   int len = length1 + length2;
 326   int free_str = 0;
 327
 328   if (BE (length1 < 0 || length2 < 0 || stop < 0, 0))
 329     return -2;
 330
 331   /* Concatenate the strings.  */
 332   if (length2 > 0)
 333     if (length1 > 0)
 334       {
 335         char *s = re_malloc (char, len);
 336
 337         if (BE (s == NULL, 0))
 338           return -2;
 339         memcpy (s, string1, length1);
 340         memcpy (s + length1, string2, length2);
 341         str = s;
 342         free_str = 1;
 343       }
 344     else
 345       str = string2;
 346   else
 347     str = string1;
 348
 349   rval = re_search_stub (bufp, str, len, start, range, stop, regs,
 350                          ret_len);
 351   if (free_str)
 352     re_free ((char *) str);
 353   return rval;
 354 }
 355
 356 /* The parameters have the same meaning as those of re_search.
 357    Additional parameters:
 358    If RET_LEN is nonzero the length of the match is returned (re_match style);
 359    otherwise the position of the match is returned.  */
 360
 361 static int
 362 re_search_stub (bufp, string, length, start, range, stop, regs, ret_len)
 363     struct re_pattern_buffer *bufp;
 364     const char *string;
 365     int length, start, range, stop, ret_len;
 366     struct re_registers *regs;
 367 {
 368   reg_errcode_t result;
 369   regmatch_t *pmatch;
 370   int nregs, rval;
 371   int eflags = 0;
 372
 373   /* Check for out-of-range.  */
 374   if (BE (start < 0 || start > length, 0))
 375     return -1;
 376   if (BE (start + range > length, 0))
 377     range = length - start;
 378   else if (BE (start + range < 0, 0))
 379     range = -start;
 380
 381   eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
 382   eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
 383
 384   /* Compile fastmap if we haven't yet.  */
 385   if (range > 0 && bufp->fastmap != NULL && !bufp->fastmap_accurate)
 386     re_compile_fastmap (bufp);
 387
 388   if (BE (bufp->no_sub, 0))
 389     regs = NULL;
 390
 391   /* We need at least 1 register.  */
 392   if (regs == NULL)
 393     nregs = 1;
 394   else if (BE (bufp->regs_allocated == REGS_FIXED &&
 395                regs->num_regs < bufp->re_nsub + 1, 0))
 396     {
 397       nregs = regs->num_regs;
 398       if (BE (nregs < 1, 0))
 399         {
 400           /* Nothing can be copied to regs.  */
 401           regs = NULL;
 402           nregs = 1;
 403         }
 404     }
 405   else
 406     nregs = bufp->re_nsub + 1;
 407   pmatch = re_malloc (regmatch_t, nregs);
 408   if (BE (pmatch == NULL, 0))
 409     return -2;
 410
 411   result = re_search_internal (bufp, string, length, start, range, stop,
 412                                nregs, pmatch, eflags);
 413
 414   rval = 0;
 415
 416   /* I hope we needn't fill ther regs with -1's when no match was found.  */
 417   if (result != REG_NOERROR)
 418     rval = -1;
 419   else if (regs != NULL)
 420     {
 421       /* If caller wants register contents data back, copy them.  */
 422       bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
 423                                            bufp->regs_allocated);
 424       if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
 425         rval = -2;
 426     }
 427
 428   if (BE (rval == 0, 1))
 429     {
 430       if (ret_len)
 431         {
 432           assert (pmatch[0].rm_so == start);
 433           rval = pmatch[0].rm_eo - start;
 434         }
 435       else
 436         rval = pmatch[0].rm_so;
 437     }
 438   re_free (pmatch);
 439   return rval;
 440 }
 441
 442 static unsigned
 443 re_copy_regs (regs, pmatch, nregs, regs_allocated)
 444     struct re_registers *regs;
 445     regmatch_t *pmatch;
 446     int nregs, regs_allocated;
 447 {
 448   int rval = REGS_REALLOCATE;
 449   int i;
 450   int need_regs = nregs + 1;
 451   /* We need one extra element beyond `num_regs' for the `-1' marker GNU code
 452      uses.  */
 453
 454   /* Have the register data arrays been allocated?  */
 455   if (regs_allocated == REGS_UNALLOCATED)
 456     { /* No.  So allocate them with malloc.  */
 457       regs->start = re_malloc (regoff_t, need_regs);
 458       if (BE (regs->start == NULL, 0))
 459         return REGS_UNALLOCATED;
 460       regs->end = re_malloc (regoff_t, need_regs);
 461       if (BE (regs->end == NULL, 0))
 462         {
 463           re_free (regs->start);
 464           return REGS_UNALLOCATED;
 465         }
 466       regs->num_regs = need_regs;
 467     }
 468   else if (regs_allocated == REGS_REALLOCATE)
 469     { /* Yes.  If we need more elements than were already
 470          allocated, reallocate them.  If we need fewer, just
 471          leave it alone.  */
 472       if (need_regs > regs->num_regs)
 473         {
 474           regs->start = re_realloc (regs->start, regoff_t, need_regs);
 475           if (BE (regs->start == NULL, 0))
 476             {
 477               if (regs->end != NULL)
 478                 re_free (regs->end);
 479               return REGS_UNALLOCATED;
 480             }
 481           regs->end = re_realloc (regs->end, regoff_t, need_regs);
 482           if (BE (regs->end == NULL, 0))
 483             {
 484               re_free (regs->start);
 485               return REGS_UNALLOCATED;
 486             }
 487           regs->num_regs = need_regs;
 488         }
 489     }
 490   else
 491     {
 492       assert (regs_allocated == REGS_FIXED);
 493       /* This function may not be called with REGS_FIXED and nregs too big.  */
 494       assert (regs->num_regs >= nregs);
 495       rval = REGS_FIXED;
 496     }
 497
 498   /* Copy the regs.  */
 499   for (i = 0; i < nregs; ++i)
 500     {
 501       regs->start[i] = pmatch[i].rm_so;
 502       regs->end[i] = pmatch[i].rm_eo;
 503     }
 504   for ( ; i < regs->num_regs; ++i)
 505     regs->start[i] = regs->end[i] = -1;
 506
 507   return rval;
 508 }
 509
 510 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
 511    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
 512    this memory for recording register information.  STARTS and ENDS
 513    must be allocated using the malloc library routine, and must each
 514    be at least NUM_REGS * sizeof (regoff_t) bytes long.
 515
 516    If NUM_REGS == 0, then subsequent matches should allocate their own
 517    register data.
 518
 519    Unless this function is called, the first search or match using
 520    PATTERN_BUFFER will allocate its own register data, without
 521    freeing the old data.  */
 522
 523 void
 524 re_set_registers (bufp, regs, num_regs, starts, ends)
 525     struct re_pattern_buffer *bufp;
 526     struct re_registers *regs;
 527     unsigned num_regs;
 528     regoff_t *starts, *ends;
 529 {
 530   if (num_regs)
 531     {
 532       bufp->regs_allocated = REGS_REALLOCATE;
 533       regs->num_regs = num_regs;
 534       regs->start = starts;
 535       regs->end = ends;
 536     }
 537   else
 538     {
 539       bufp->regs_allocated = REGS_UNALLOCATED;
 540       regs->num_regs = 0;
 541       regs->start = regs->end = (regoff_t *) 0;
 542     }
 543 }
 544 #ifdef _LIBC
 545 weak_alias (__re_set_registers, re_set_registers)
 546 #endif
 547 \f
 548 /* Entry points compatible with 4.2 BSD regex library.  We don't define
 549    them unless specifically requested.  */
 550
 551 #if defined _REGEX_RE_COMP || defined _LIBC
 552 int
 553 # ifdef _LIBC
 554 weak_function
 555 # endif
 556 re_exec (s)
 557      const char *s;
 558 {
 559   return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
 560 }
 561 #endif /* _REGEX_RE_COMP */
 562 \f
 563 static re_node_set empty_set;
 564
 565 /* Internal entry point.  */
 566
 567 /* Searches for a compiled pattern PREG in the string STRING, whose
 568    length is LENGTH.  NMATCH, PMATCH, and EFLAGS have the same
 569    mingings with regexec.  START, and RANGE have the same meanings
 570    with re_search.
 571    Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
 572    otherwise return the error code.
 573    Note: We assume front end functions already check ranges.
 574    (START + RANGE >= 0 && START + RANGE <= LENGTH)  */
 575
 576 static reg_errcode_t
 577 re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
 578                     eflags)
 579     const regex_t *preg;
 580     const char *string;
 581     int length, start, range, stop, eflags;
 582     size_t nmatch;
 583     regmatch_t pmatch[];
 584 {
 585   reg_errcode_t err;
 586   re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
 587   re_string_t input;
 588   int left_lim, right_lim, incr;
 589   int fl_longest_match, match_first, match_last = -1;
 590   int fast_translate, sb;
 591   re_match_context_t mctx;
 592   char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate
 593                     && range && !preg->can_be_null) ? preg->fastmap : NULL);
 594
 595   /* Check if the DFA haven't been compiled.  */
 596   if (BE (preg->used == 0 || dfa->init_state == NULL
 597           || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
 598           || dfa->init_state_begbuf == NULL, 0))
 599     return REG_NOMATCH;
 600
 601   re_node_set_init_empty (&empty_set);
 602   memset (&mctx, '\0', sizeof (re_match_context_t));
 603
 604   /* We must check the longest matching, if nmatch > 0.  */
 605   fl_longest_match = (nmatch != 0 || dfa->nbackref);
 606
 607   err = re_string_allocate (&input, string, length, dfa->nodes_len + 1,
 608                             preg->translate, preg->syntax & RE_ICASE);
 609   if (BE (err != REG_NOERROR, 0))
 610     goto free_return;
 611   input.stop = stop;
 612
 613   err = match_ctx_init (&mctx, eflags, &input, dfa->nbackref * 2);
 614   if (BE (err != REG_NOERROR, 0))
 615     goto free_return;
 616
 617   /* We will log all the DFA states through which the dfa pass,
 618      if nmatch > 1, or this dfa has "multibyte node", which is a
 619      back-reference or a node which can accept multibyte character or
 620      multi character collating element.  */
 621   if (nmatch > 1 || dfa->has_mb_node)
 622     {
 623       mctx.state_log = re_malloc (re_dfastate_t *, dfa->nodes_len + 1);
 624       if (BE (mctx.state_log == NULL, 0))
 625         {
 626           err = REG_ESPACE;
 627           goto free_return;
 628         }
 629     }
 630   else
 631     mctx.state_log = NULL;
 632
 633 #ifdef DEBUG
 634   /* We assume front-end functions already check them.  */
 635   assert (start + range >= 0 && start + range <= length);
 636 #endif
 637
 638   match_first = start;
 639   input.tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
 640                        : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
 641
 642   /* Check incrementally whether of not the input string match.  */
 643   incr = (range < 0) ? -1 : 1;
 644   left_lim = (range < 0) ? start + range : start;
 645   right_lim = (range < 0) ? start : start + range;
 646   sb = MB_CUR_MAX == 1;
 647   fast_translate = sb || !(preg->syntax & RE_ICASE || preg->translate);
 648
 649   for (;;)
 650     {
 651       /* At first get the current byte from input string.  */
 652       if (fastmap)
 653         {
 654           if (BE (fast_translate, 1))
 655             {
 656               unsigned RE_TRANSLATE_TYPE t
 657                 = (unsigned RE_TRANSLATE_TYPE) preg->translate;
 658               if (BE (range >= 0, 1))
 659                 {
 660                   if (BE (t != NULL, 0))
 661                     {
 662                       while (BE (match_first < right_lim, 1)
 663                              && !fastmap[t[(unsigned char) string[match_first]]])
 664                         ++match_first;
 665                     }
 666                   else
 667                     {
 668                       while (BE (match_first < right_lim, 1)
 669                              && !fastmap[(unsigned char) string[match_first]])
 670                         ++match_first;
 671                     }
 672                   if (BE (match_first == right_lim, 0))
 673                     {
 674                       int ch = match_first >= length
 675                                ? 0 : (unsigned char) string[match_first];
 676                       if (!fastmap[t ? t[ch] : ch])
 677                         break;
 678                     }
 679                 }
 680               else
 681                 {
 682                   while (match_first >= left_lim)
 683                     {
 684                       int ch = match_first >= length
 685                                ? 0 : (unsigned char) string[match_first];
 686                       if (fastmap[t ? t[ch] : ch])
 687                         break;
 688                       --match_first;
 689                     }
 690                   if (match_first < left_lim)
 691                     break;
 692                 }
 693             }
 694           else
 695             {
 696               int ch;
 697
 698               do
 699                 {
 700                   /* In this case, we can't determine easily the current byte,
 701                      since it might be a component byte of a multibyte
 702                      character.  Then we use the constructed buffer
 703                      instead.  */
 704                   /* If MATCH_FIRST is out of the valid range, reconstruct the
 705                      buffers.  */
 706                   if (input.raw_mbs_idx + input.valid_len <= match_first
 707                       || match_first < input.raw_mbs_idx)
 708                     {
 709                       err = re_string_reconstruct (&input, match_first, eflags,
 710                                                    preg->newline_anchor);
 711                       if (BE (err != REG_NOERROR, 0))
 712                         goto free_return;
 713                     }
 714                   /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
 715                      Note that MATCH_FIRST must not be smaller than 0.  */
 716                   ch = ((match_first >= length) ? 0
 717                        : re_string_byte_at (&input,
 718                                             match_first - input.raw_mbs_idx));
 719                   if (fastmap[ch])
 720                     break;
 721                   match_first += incr;
 722                 }
 723               while (match_first >= left_lim && match_first <= right_lim);
 724               if (! fastmap[ch])
 725                 break;
 726             }
 727         }
 728
 729       /* Reconstruct the buffers so that the matcher can assume that
 730          the matching starts from the begining of the buffer.  */
 731       err = re_string_reconstruct (&input, match_first, eflags,
 732                                    preg->newline_anchor);
 733       if (BE (err != REG_NOERROR, 0))
 734         goto free_return;
 735 #ifdef RE_ENABLE_I18N
 736      /* Eliminate it when it is a component of a multibyte character
 737          and isn't the head of a multibyte character.  */
 738       if (sb || re_string_first_byte (&input, 0))
 739 #endif
 740         {
 741           /* It seems to be appropriate one, then use the matcher.  */
 742           /* We assume that the matching starts from 0.  */
 743           mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
 744           match_last = check_matching (preg, &mctx, 0, fl_longest_match);
 745           if (match_last != -1)
 746             {
 747               if (BE (match_last == -2, 0))
 748                 {
 749                   err = REG_ESPACE;
 750                   goto free_return;
 751                 }
 752               else
 753                 {
 754                   mctx.match_last = match_last;
 755                   if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
 756                     {
 757                       re_dfastate_t *pstate = mctx.state_log[match_last];
 758                       mctx.last_node = check_halt_state_context (preg, pstate,
 759                                                                  &mctx, match_last);
 760                     }
 761                   if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
 762                       || dfa->nbackref)
 763                     {
 764                       err = prune_impossible_nodes (preg, &mctx);
 765                       if (err == REG_NOERROR)
 766                         break;
 767                       if (BE (err != REG_NOMATCH, 0))
 768                         goto free_return;
 769                     }
 770                   else
 771                     break; /* We found a matching.  */
 772                 }
 773             }
 774           match_ctx_clean (&mctx);
 775         }
 776       /* Update counter.  */
 777       match_first += incr;
 778       if (match_first < left_lim || right_lim < match_first)
 779         break;
 780     }
 781
 782   /* Set pmatch[] if we need.  */
 783   if (match_last != -1 && nmatch > 0)
 784     {
 785       int reg_idx;
 786
 787       /* Initialize registers.  */
 788       for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
 789         pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
 790
 791       /* Set the points where matching start/end.  */
 792       pmatch[0].rm_so = 0;
 793       pmatch[0].rm_eo = mctx.match_last;
 794
 795       if (!preg->no_sub && nmatch > 1)
 796         {
 797           err = set_regs (preg, &mctx, nmatch, pmatch,
 798                           dfa->has_plural_match && dfa->nbackref > 0);
 799           if (BE (err != REG_NOERROR, 0))
 800             goto free_return;
 801         }
 802
 803       /* At last, add the offset to the each registers, since we slided
 804          the buffers so that We can assume that the matching starts from 0.  */
 805       for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
 806         if (pmatch[reg_idx].rm_so != -1)
 807           {
 808             pmatch[reg_idx].rm_so += match_first;
 809             pmatch[reg_idx].rm_eo += match_first;
 810           }
 811     }
 812   err = (match_last == -1) ? REG_NOMATCH : REG_NOERROR;
 813  free_return:
 814   re_free (mctx.state_log);
 815   if (dfa->nbackref)
 816     match_ctx_free (&mctx);
 817   re_string_destruct (&input);
 818   return err;
 819 }
 820
 821 static reg_errcode_t
 822 prune_impossible_nodes (preg, mctx)
 823      const regex_t *preg;
 824      re_match_context_t *mctx;
 825 {
 826   int halt_node, match_last;
 827   reg_errcode_t ret;
 828   re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
 829   re_dfastate_t **sifted_states;
 830   re_dfastate_t **lim_states = NULL;
 831   re_sift_context_t sctx;
 832 #ifdef DEBUG
 833   assert (mctx->state_log != NULL);
 834 #endif
 835   match_last = mctx->match_last;
 836   halt_node = mctx->last_node;
 837   sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
 838   if (BE (sifted_states == NULL, 0))
 839     {
 840       ret = REG_ESPACE;
 841       goto free_return;
 842     }
 843   if (dfa->nbackref)
 844     {
 845       lim_states = re_malloc (re_dfastate_t *, match_last + 1);
 846       if (BE (lim_states == NULL, 0))
 847         {
 848           ret = REG_ESPACE;
 849           goto free_return;
 850         }
 851       while (1)
 852         {
 853           memset (lim_states, '\0',
 854                   sizeof (re_dfastate_t *) * (match_last + 1));
 855           match_ctx_clear_flag (mctx);
 856           sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
 857                          match_last, 0);
 858           ret = sift_states_backward (preg, mctx, &sctx);
 859           re_node_set_free (&sctx.limits);
 860           if (BE (ret != REG_NOERROR, 0))
 861               goto free_return;
 862           if (sifted_states[0] != NULL || lim_states[0] != NULL)
 863             break;
 864           do
 865             {
 866               --match_last;
 867               if (match_last < 0)
 868                 {
 869                   ret = REG_NOMATCH;
 870                   goto free_return;
 871                 }
 872             } while (!mctx->state_log[match_last]->halt);
 873           halt_node = check_halt_state_context (preg,
 874                                                 mctx->state_log[match_last],
 875                                                 mctx, match_last);
 876         }
 877       ret = merge_state_array (dfa, sifted_states, lim_states,
 878                                match_last + 1);
 879       re_free (lim_states);
 880       lim_states = NULL;
 881       if (BE (ret != REG_NOERROR, 0))
 882         goto free_return;
 883     }
 884   else
 885     {
 886       sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
 887                      match_last, 0);
 888       ret = sift_states_backward (preg, mctx, &sctx);
 889       re_node_set_free (&sctx.limits);
 890       if (BE (ret != REG_NOERROR, 0))
 891         goto free_return;
 892     }
 893   re_free (mctx->state_log);
 894   mctx->state_log = sifted_states;
 895   sifted_states = NULL;
 896   mctx->last_node = halt_node;
 897   mctx->match_last = match_last;
 898   ret = REG_NOERROR;
 899  free_return:
 900   re_free (sifted_states);
 901   re_free (lim_states);
 902   return ret;
 903 }
 904
 905 /* Acquire an initial state and return it.
 906    We must select appropriate initial state depending on the context,
 907    since initial states may have constraints like "\<", "^", etc..  */
 908
 909 static re_dfastate_t *
 910 acquire_init_state_context (err, preg, mctx, idx)
 911      reg_errcode_t *err;
 912      const regex_t *preg;
 913      const re_match_context_t *mctx;
 914      int idx;
 915 {
 916   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
 917
 918   *err = REG_NOERROR;
 919   if (dfa->init_state->has_constraint)
 920     {
 921       unsigned int context;
 922       context =  re_string_context_at (mctx->input, idx - 1, mctx->eflags,
 923                                        preg->newline_anchor);
 924       if (IS_WORD_CONTEXT (context))
 925         return dfa->init_state_word;
 926       else if (IS_ORDINARY_CONTEXT (context))
 927         return dfa->init_state;
 928       else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
 929         return dfa->init_state_begbuf;
 930       else if (IS_NEWLINE_CONTEXT (context))
 931         return dfa->init_state_nl;
 932       else if (IS_BEGBUF_CONTEXT (context))
 933         {
 934           /* It is relatively rare case, then calculate on demand.  */
 935           return  re_acquire_state_context (err, dfa,
 936                                             dfa->init_state->entrance_nodes,
 937                                             context);
 938         }
 939       else
 940         /* Must not happen?  */
 941         return dfa->init_state;
 942     }
 943   else
 944     return dfa->init_state;
 945 }
 946
 947 /* Check whether the regular expression match input string INPUT or not,
 948    and return the index where the matching end, return -1 if not match,
 949    or return -2 in case of an error.
 950    FL_SEARCH means we must search where the matching starts,
 951    FL_LONGEST_MATCH means we want the POSIX longest matching.
 952    Note that the matcher assume that the maching starts from the current
 953    index of the buffer.  */
 954
 955 static int
 956 check_matching (preg, mctx, fl_search, fl_longest_match)
 957     const regex_t *preg;
 958     re_match_context_t *mctx;
 959     int fl_search, fl_longest_match;
 960 {
 961   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
 962   reg_errcode_t err;
 963   int match = 0;
 964   int match_last = -1;
 965   int cur_str_idx = re_string_cur_idx (mctx->input);
 966   re_dfastate_t *cur_state;
 967
 968   cur_state = acquire_init_state_context (&err, preg, mctx, cur_str_idx);
 969   /* An initial state must not be NULL(invalid state).  */
 970   if (BE (cur_state == NULL, 0))
 971     return -2;
 972   if (mctx->state_log != NULL)
 973     mctx->state_log[cur_str_idx] = cur_state;
 974
 975   /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
 976      later.  E.g. Processing back references.  */
 977   if (dfa->nbackref)
 978     {
 979       err = check_subexp_matching_top (dfa, mctx, &cur_state->nodes, 0);
 980       if (BE (err != REG_NOERROR, 0))
 981         return err;
 982     }
 983
 984   if (cur_state->has_backref)
 985     {
 986       err = transit_state_bkref (preg, &cur_state->nodes, mctx);
 987       if (BE (err != REG_NOERROR, 0))
 988         return err;
 989     }
 990
 991   /* If the RE accepts NULL string.  */
 992   if (cur_state->halt)
 993     {
 994       if (!cur_state->has_constraint
 995           || check_halt_state_context (preg, cur_state, mctx, cur_str_idx))
 996         {
 997           if (!fl_longest_match)
 998             return cur_str_idx;
 999           else
1000             {
1001               match_last = cur_str_idx;
1002               match = 1;
1003             }
1004         }
1005     }
1006
1007   while (!re_string_eoi (mctx->input))
1008     {
1009       cur_state = transit_state (&err, preg, mctx, cur_state,
1010                                  fl_search && !match);
1011       if (cur_state == NULL) /* Reached at the invalid state or an error.  */
1012         {
1013           cur_str_idx = re_string_cur_idx (mctx->input);
1014           if (BE (err != REG_NOERROR, 0))
1015             return -2;
1016           if (fl_search && !match)
1017             {
1018               /* Restart from initial state, since we are searching
1019                  the point from where matching start.  */
1020 #ifdef RE_ENABLE_I18N
1021               if (MB_CUR_MAX == 1
1022                   || re_string_first_byte (mctx->input, cur_str_idx))
1023 #endif /* RE_ENABLE_I18N */
1024                 cur_state = acquire_init_state_context (&err, preg, mctx,
1025                                                         cur_str_idx);
1026               if (BE (cur_state == NULL && err != REG_NOERROR, 0))
1027                 return -2;
1028               if (mctx->state_log != NULL)
1029                 mctx->state_log[cur_str_idx] = cur_state;
1030             }
1031           else if (!fl_longest_match && match)
1032             break;
1033           else /* (fl_longest_match && match) || (!fl_search && !match)  */
1034             {
1035               if (mctx->state_log == NULL)
1036                 break;
1037               else
1038                 {
1039                   int max = mctx->state_log_top;
1040                   for (; cur_str_idx <= max; ++cur_str_idx)
1041                     if (mctx->state_log[cur_str_idx] != NULL)
1042                       break;
1043                   if (cur_str_idx > max)
1044                     break;
1045                 }
1046             }
1047         }
1048
1049       if (cur_state != NULL && cur_state->halt)
1050         {
1051           /* Reached at a halt state.
1052              Check the halt state can satisfy the current context.  */
1053           if (!cur_state->has_constraint
1054               || check_halt_state_context (preg, cur_state, mctx,
1055                                            re_string_cur_idx (mctx->input)))
1056             {
1057               /* We found an appropriate halt state.  */
1058               match_last = re_string_cur_idx (mctx->input);
1059               match = 1;
1060               if (!fl_longest_match)
1061                 break;
1062             }
1063         }
1064    }
1065   return match_last;
1066 }
1067
1068 /* Check NODE match the current context.  */
1069
1070 static int check_halt_node_context (dfa, node, context)
1071     const re_dfa_t *dfa;
1072     int node;
1073     unsigned int context;
1074 {
1075   re_token_type_t type = dfa->nodes[node].type;
1076   unsigned int constraint = dfa->nodes[node].constraint;
1077   if (type != END_OF_RE)
1078     return 0;
1079   if (!constraint)
1080     return 1;
1081   if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
1082     return 0;
1083   return 1;
1084 }
1085
1086 /* Check the halt state STATE match the current context.
1087    Return 0 if not match, if the node, STATE has, is a halt node and
1088    match the context, return the node.  */
1089
1090 static int
1091 check_halt_state_context (preg, state, mctx, idx)
1092     const regex_t *preg;
1093     const re_dfastate_t *state;
1094     const re_match_context_t *mctx;
1095     int idx;
1096 {
1097   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
1098   int i;
1099   unsigned int context;
1100 #ifdef DEBUG
1101   assert (state->halt);
1102 #endif
1103   context = re_string_context_at (mctx->input, idx, mctx->eflags,
1104                                   preg->newline_anchor);
1105   for (i = 0; i < state->nodes.nelem; ++i)
1106     if (check_halt_node_context (dfa, state->nodes.elems[i], context))
1107       return state->nodes.elems[i];
1108   return 0;
1109 }
1110
1111 /* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
1112    corresponding to the DFA).
1113    Return the destination node, and update EPS_VIA_NODES, return -1 in case
1114    of errors.  */
1115
1116 static int
1117 proceed_next_node (preg, nregs, regs, mctx, pidx, node, eps_via_nodes, fs)
1118     const regex_t *preg;
1119     regmatch_t *regs;
1120     const re_match_context_t *mctx;
1121     int nregs, *pidx, node;
1122     re_node_set *eps_via_nodes;
1123     struct re_fail_stack_t *fs;
1124 {
1125   re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
1126   int i, err, dest_node;
1127   dest_node = -1;
1128   if (IS_EPSILON_NODE (dfa->nodes[node].type))
1129     {
1130       re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
1131       int ndest, dest_nodes[2];
1132       err = re_node_set_insert (eps_via_nodes, node);
1133       if (BE (err < 0, 0))
1134         return -1;
1135       /* Pick up valid destinations.  */
1136       for (ndest = 0, i = 0; i < dfa->edests[node].nelem; ++i)
1137         {
1138           int candidate = dfa->edests[node].elems[i];
1139           if (!re_node_set_contains (cur_nodes, candidate))
1140             continue;
1141           dest_nodes[0] = (ndest == 0) ? candidate : dest_nodes[0];
1142           dest_nodes[1] = (ndest == 1) ? candidate : dest_nodes[1];
1143           ++ndest;
1144         }
1145       if (ndest <= 1)
1146         return ndest == 0 ? -1 : (ndest == 1 ? dest_nodes[0] : 0);
1147       /* In order to avoid infinite loop like "(a*)*".  */
1148       if (re_node_set_contains (eps_via_nodes, dest_nodes[0]))
1149         return dest_nodes[1];
1150       if (fs != NULL)
1151         push_fail_stack (fs, *pidx, dest_nodes, nregs, regs, eps_via_nodes);
1152       return dest_nodes[0];
1153     }
1154   else
1155     {
1156       int naccepted = 0;
1157       re_token_type_t type = dfa->nodes[node].type;
1158
1159 #ifdef RE_ENABLE_I18N
1160       if (ACCEPT_MB_NODE (type))
1161         naccepted = check_node_accept_bytes (preg, node, mctx->input, *pidx);
1162       else
1163 #endif /* RE_ENABLE_I18N */
1164       if (type == OP_BACK_REF)
1165         {
1166           int subexp_idx = dfa->nodes[node].opr.idx;
1167           naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
1168           if (fs != NULL)
1169             {
1170               if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
1171                 return -1;
1172               else if (naccepted)
1173                 {
1174                   char *buf = (char *) re_string_get_buffer (mctx->input);
1175                   if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1176                               naccepted) != 0)
1177                     return -1;
1178                 }
1179             }
1180
1181           if (naccepted == 0)
1182             {
1183               err = re_node_set_insert (eps_via_nodes, node);
1184               if (BE (err < 0, 0))
1185                 return -2;
1186               dest_node = dfa->edests[node].elems[0];
1187               if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1188                                         dest_node))
1189                 return dest_node;
1190             }
1191         }
1192
1193       if (naccepted != 0
1194           || check_node_accept (preg, dfa->nodes + node, mctx, *pidx))
1195         {
1196           dest_node = dfa->nexts[node];
1197           *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
1198           if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
1199                      || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1200                                                dest_node)))
1201             return -1;
1202           re_node_set_empty (eps_via_nodes);
1203           return dest_node;
1204         }
1205     }
1206   return -1;
1207 }
1208
1209 static reg_errcode_t
1210 push_fail_stack (fs, str_idx, dests, nregs, regs, eps_via_nodes)
1211      struct re_fail_stack_t *fs;
1212      int str_idx, *dests, nregs;
1213      regmatch_t *regs;
1214      re_node_set *eps_via_nodes;
1215 {
1216   reg_errcode_t err;
1217   int num = fs->num++;
1218   if (fs->num == fs->alloc)
1219     {
1220       struct re_fail_stack_ent_t *new_array;
1221       fs->alloc *= 2;
1222       new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)
1223                                        * fs->alloc));
1224       if (new_array == NULL)
1225         return REG_ESPACE;
1226       fs->stack = new_array;
1227     }
1228   fs->stack[num].idx = str_idx;
1229   fs->stack[num].node = dests[1];
1230   fs->stack[num].regs = re_malloc (regmatch_t, nregs);
1231   memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
1232   err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
1233   return err;
1234 }
1235
1236 static int
1237 pop_fail_stack (fs, pidx, nregs, regs, eps_via_nodes)
1238      struct re_fail_stack_t *fs;
1239      int *pidx, nregs;
1240      regmatch_t *regs;
1241      re_node_set *eps_via_nodes;
1242 {
1243   int num = --fs->num;
1244   assert (num >= 0);
1245  *pidx = fs->stack[num].idx;
1246   memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
1247   re_node_set_free (eps_via_nodes);
1248   re_free (fs->stack[num].regs);
1249   *eps_via_nodes = fs->stack[num].eps_via_nodes;
1250   return fs->stack[num].node;
1251 }
1252
1253 /* Set the positions where the subexpressions are starts/ends to registers
1254    PMATCH.
1255    Note: We assume that pmatch[0] is already set, and
1256    pmatch[i].rm_so == pmatch[i].rm_eo == -1 (i > 1).  */
1257
1258 static reg_errcode_t
1259 set_regs (preg, mctx, nmatch, pmatch, fl_backtrack)
1260      const regex_t *preg;
1261      const re_match_context_t *mctx;
1262      size_t nmatch;
1263      regmatch_t *pmatch;
1264      int fl_backtrack;
1265 {
1266   re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
1267   int idx, cur_node, real_nmatch;
1268   re_node_set eps_via_nodes;
1269   struct re_fail_stack_t *fs;
1270   struct re_fail_stack_t fs_body = {0, 2, NULL};
1271 #ifdef DEBUG
1272   assert (nmatch > 1);
1273   assert (mctx->state_log != NULL);
1274 #endif
1275   if (fl_backtrack)
1276     {
1277       fs = &fs_body;
1278       fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
1279     }
1280   else
1281     fs = NULL;
1282   cur_node = dfa->init_node;
1283   real_nmatch = (nmatch <= preg->re_nsub) ? nmatch : preg->re_nsub + 1;
1284   re_node_set_init_empty (&eps_via_nodes);
1285   for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
1286     {
1287       update_regs (dfa, pmatch, cur_node, idx, real_nmatch);
1288       if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
1289         {
1290           int reg_idx;
1291           if (fs)
1292             {
1293               for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
1294                 if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
1295                   break;
1296               if (reg_idx == nmatch)
1297                 {
1298                   re_node_set_free (&eps_via_nodes);
1299                   return free_fail_stack_return (fs);
1300                 }
1301               cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1302                                          &eps_via_nodes);
1303             }
1304           else
1305             {
1306               re_node_set_free (&eps_via_nodes);
1307               return REG_NOERROR;
1308             }
1309         }
1310
1311       /* Proceed to next node.  */
1312       cur_node = proceed_next_node (preg, nmatch, pmatch, mctx, &idx, cur_node,
1313                                     &eps_via_nodes, fs);
1314
1315       if (BE (cur_node < 0, 0))
1316         {
1317           if (cur_node == -2)
1318             return REG_ESPACE;
1319           if (fs)
1320             cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1321                                        &eps_via_nodes);
1322           else
1323             {
1324               re_node_set_free (&eps_via_nodes);
1325               return REG_NOMATCH;
1326             }
1327         }
1328     }
1329   re_node_set_free (&eps_via_nodes);
1330   return free_fail_stack_return (fs);
1331 }
1332
1333 static reg_errcode_t
1334 free_fail_stack_return (fs)
1335      struct re_fail_stack_t *fs;
1336 {
1337   if (fs)
1338     {
1339       int fs_idx;
1340       for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
1341         {
1342           re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
1343           re_free (fs->stack[fs_idx].regs);
1344         }
1345       re_free (fs->stack);
1346     }
1347   return REG_NOERROR;
1348 }
1349
1350 static void
1351 update_regs (dfa, pmatch, cur_node, cur_idx, nmatch)
1352      re_dfa_t *dfa;
1353      regmatch_t *pmatch;
1354      int cur_node, cur_idx, nmatch;
1355 {
1356   int type = dfa->nodes[cur_node].type;
1357   int reg_num;
1358   if (type != OP_OPEN_SUBEXP && type != OP_CLOSE_SUBEXP)
1359     return;
1360   reg_num = dfa->nodes[cur_node].opr.idx + 1;
1361   if (reg_num >= nmatch)
1362     return;
1363   if (type == OP_OPEN_SUBEXP)
1364     {
1365       /* We are at the first node of this sub expression.  */
1366       pmatch[reg_num].rm_so = cur_idx;
1367       pmatch[reg_num].rm_eo = -1;
1368     }
1369   else if (type == OP_CLOSE_SUBEXP)
1370     /* We are at the first node of this sub expression.  */
1371     pmatch[reg_num].rm_eo = cur_idx;
1372 }
1373
1374 #define NUMBER_OF_STATE 1
1375
1376 /* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
1377    and sift the nodes in each states according to the following rules.
1378    Updated state_log will be wrote to STATE_LOG.
1379
1380    Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
1381      1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1382         If `a' isn't the LAST_NODE and `a' can't epsilon transit to
1383         the LAST_NODE, we throw away the node `a'.
1384      2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
1385         string `s' and transit to `b':
1386         i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1387            away the node `a'.
1388         ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1389             throwed away, we throw away the node `a'.
1390      3. When 0 <= STR_IDX < n and 'a' epsilon transit to 'b':
1391         i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1392            node `a'.
1393         ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is throwed away,
1394             we throw away the node `a'.  */
1395
1396 #define STATE_NODE_CONTAINS(state,node) \
1397   ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1398
1399 static reg_errcode_t
1400 sift_states_backward (preg, mctx, sctx)
1401      const regex_t *preg;
1402      re_match_context_t *mctx;
1403      re_sift_context_t *sctx;
1404 {
1405   reg_errcode_t err;
1406   re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
1407   int null_cnt = 0;
1408   int str_idx = sctx->last_str_idx;
1409   re_node_set cur_dest;
1410   re_node_set *cur_src; /* Points the state_log[str_idx]->nodes  */
1411
1412 #ifdef DEBUG
1413   assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
1414 #endif
1415   cur_src = &mctx->state_log[str_idx]->nodes;
1416
1417   /* Build sifted state_log[str_idx].  It has the nodes which can epsilon
1418      transit to the last_node and the last_node itself.  */
1419   err = re_node_set_init_1 (&cur_dest, sctx->last_node);
1420   if (BE (err != REG_NOERROR, 0))
1421     return err;
1422   err = update_cur_sifted_state (preg, mctx, sctx, str_idx, &cur_dest);
1423   if (BE (err != REG_NOERROR, 0))
1424     goto free_return;
1425
1426   /* Then check each states in the state_log.  */
1427   while (str_idx > 0)
1428     {
1429       int i, ret;
1430       /* Update counters.  */
1431       null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
1432       if (null_cnt > mctx->max_mb_elem_len)
1433         {
1434           memset (sctx->sifted_states, '\0',
1435                   sizeof (re_dfastate_t *) * str_idx);
1436           re_node_set_free (&cur_dest);
1437           return REG_NOERROR;
1438         }
1439       re_node_set_empty (&cur_dest);
1440       --str_idx;
1441       cur_src = ((mctx->state_log[str_idx] == NULL) ? &empty_set
1442                  : &mctx->state_log[str_idx]->nodes);
1443
1444       /* Then build the next sifted state.
1445          We build the next sifted state on `cur_dest', and update
1446          `sifted_states[str_idx]' with `cur_dest'.
1447          Note:
1448          `cur_dest' is the sifted state from `state_log[str_idx + 1]'.
1449          `cur_src' points the node_set of the old `state_log[str_idx]'.  */
1450       for (i = 0; i < cur_src->nelem; i++)
1451         {
1452           int prev_node = cur_src->elems[i];
1453           int naccepted = 0;
1454           re_token_type_t type = dfa->nodes[prev_node].type;
1455
1456           if (IS_EPSILON_NODE(type))
1457             continue;
1458 #ifdef RE_ENABLE_I18N
1459           /* If the node may accept `multi byte'.  */
1460           if (ACCEPT_MB_NODE (type))
1461             naccepted = sift_states_iter_mb (preg, mctx, sctx, prev_node,
1462                                              str_idx, sctx->last_str_idx);
1463
1464 #endif /* RE_ENABLE_I18N */
1465           /* We don't check backreferences here.
1466              See update_cur_sifted_state().  */
1467
1468           if (!naccepted
1469               && check_node_accept (preg, dfa->nodes + prev_node, mctx,
1470                                     str_idx)
1471               && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
1472                                       dfa->nexts[prev_node]))
1473             naccepted = 1;
1474
1475           if (naccepted == 0)
1476             continue;
1477
1478           if (sctx->limits.nelem)
1479             {
1480               int to_idx = str_idx + naccepted;
1481               if (check_dst_limits (dfa, &sctx->limits, mctx,
1482                                     dfa->nexts[prev_node], to_idx,
1483                                     prev_node, str_idx))
1484                 continue;
1485             }
1486           ret = re_node_set_insert (&cur_dest, prev_node);
1487           if (BE (ret == -1, 0))
1488             {
1489               err = REG_ESPACE;
1490               goto free_return;
1491             }
1492         }
1493
1494       /* Add all the nodes which satisfy the following conditions:
1495          - It can epsilon transit to a node in CUR_DEST.
1496          - It is in CUR_SRC.
1497          And update state_log.  */
1498       err = update_cur_sifted_state (preg, mctx, sctx, str_idx, &cur_dest);
1499       if (BE (err != REG_NOERROR, 0))
1500         goto free_return;
1501     }
1502   err = REG_NOERROR;
1503  free_return:
1504   re_node_set_free (&cur_dest);
1505   return err;
1506 }
1507
1508 /* Helper functions.  */
1509
1510 static reg_errcode_t
1511 clean_state_log_if_need (mctx, next_state_log_idx)
1512     re_match_context_t *mctx;
1513     int next_state_log_idx;
1514 {
1515   int top = mctx->state_log_top;
1516
1517   if (next_state_log_idx >= mctx->input->bufs_len
1518       || (next_state_log_idx >= mctx->input->valid_len
1519           && mctx->input->valid_len < mctx->input->len))
1520     {
1521       reg_errcode_t err;
1522       err = extend_buffers (mctx);
1523       if (BE (err != REG_NOERROR, 0))
1524         return err;
1525     }
1526
1527   if (top < next_state_log_idx)
1528     {
1529       memset (mctx->state_log + top + 1, '\0',
1530               sizeof (re_dfastate_t *) * (next_state_log_idx - top));
1531       mctx->state_log_top = next_state_log_idx;
1532     }
1533   return REG_NOERROR;
1534 }
1535
1536 static reg_errcode_t
1537 merge_state_array (dfa, dst, src, num)
1538      re_dfa_t *dfa;
1539      re_dfastate_t **dst;
1540      re_dfastate_t **src;
1541      int num;
1542 {
1543   int st_idx;
1544   reg_errcode_t err;
1545   for (st_idx = 0; st_idx < num; ++st_idx)
1546     {
1547       if (dst[st_idx] == NULL)
1548         dst[st_idx] = src[st_idx];
1549       else if (src[st_idx] != NULL)
1550         {
1551           re_node_set merged_set;
1552           err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
1553                                         &src[st_idx]->nodes);
1554           if (BE (err != REG_NOERROR, 0))
1555             return err;
1556           dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
1557           re_node_set_free (&merged_set);
1558           if (BE (err != REG_NOERROR, 0))
1559             return err;
1560         }
1561     }
1562   return REG_NOERROR;
1563 }
1564
1565 static reg_errcode_t
1566 update_cur_sifted_state (preg, mctx, sctx, str_idx, dest_nodes)
1567      const regex_t *preg;
1568      re_match_context_t *mctx;
1569      re_sift_context_t *sctx;
1570      int str_idx;
1571      re_node_set *dest_nodes;
1572 {
1573   reg_errcode_t err;
1574   re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
1575   const re_node_set *candidates;
1576   candidates = ((mctx->state_log[str_idx] == NULL) ? &empty_set
1577                 : &mctx->state_log[str_idx]->nodes);
1578
1579   /* At first, add the nodes which can epsilon transit to a node in
1580      DEST_NODE.  */
1581   if (dest_nodes->nelem)
1582     {
1583       err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1584       if (BE (err != REG_NOERROR, 0))
1585         return err;
1586     }
1587
1588   /* Then, check the limitations in the current sift_context.  */
1589   if (dest_nodes->nelem && sctx->limits.nelem)
1590     {
1591       err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
1592                                  mctx->bkref_ents, str_idx);
1593       if (BE (err != REG_NOERROR, 0))
1594         return err;
1595     }
1596
1597   /* Update state_log.  */
1598   sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1599   if (BE (sctx->sifted_states[str_idx] == NULL && err != REG_NOERROR, 0))
1600     return err;
1601
1602   if ((mctx->state_log[str_idx] != NULL
1603        && mctx->state_log[str_idx]->has_backref))
1604     {
1605       err = sift_states_bkref (preg, mctx, sctx, str_idx, dest_nodes);
1606       if (BE (err != REG_NOERROR, 0))
1607         return err;
1608     }
1609   return REG_NOERROR;
1610 }
1611
1612 static reg_errcode_t
1613 add_epsilon_src_nodes (dfa, dest_nodes, candidates)
1614      re_dfa_t *dfa;
1615      re_node_set *dest_nodes;
1616      const re_node_set *candidates;
1617 {
1618   reg_errcode_t err;
1619   int src_idx;
1620   re_node_set src_copy;
1621
1622   err = re_node_set_init_copy (&src_copy, dest_nodes);
1623   if (BE (err != REG_NOERROR, 0))
1624     return err;
1625   for (src_idx = 0; src_idx < src_copy.nelem; ++src_idx)
1626     {
1627       err = re_node_set_add_intersect (dest_nodes, candidates,
1628                                        dfa->inveclosures
1629                                        + src_copy.elems[src_idx]);
1630       if (BE (err != REG_NOERROR, 0))
1631         {
1632           re_node_set_free (&src_copy);
1633           return err;
1634         }
1635     }
1636   re_node_set_free (&src_copy);
1637   return REG_NOERROR;
1638 }
1639
1640 static reg_errcode_t
1641 sub_epsilon_src_nodes (dfa, node, dest_nodes, candidates)
1642      re_dfa_t *dfa;
1643      int node;
1644      re_node_set *dest_nodes;
1645      const re_node_set *candidates;
1646 {
1647     int ecl_idx;
1648     reg_errcode_t err;
1649     re_node_set *inv_eclosure = dfa->inveclosures + node;
1650     re_node_set except_nodes;
1651     re_node_set_init_empty (&except_nodes);
1652     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1653       {
1654         int cur_node = inv_eclosure->elems[ecl_idx];
1655         if (cur_node == node)
1656           continue;
1657         if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
1658           {
1659             int edst1 = dfa->edests[cur_node].elems[0];
1660             int edst2 = ((dfa->edests[cur_node].nelem > 1)
1661                          ? dfa->edests[cur_node].elems[1] : -1);
1662             if ((!re_node_set_contains (inv_eclosure, edst1)
1663                  && re_node_set_contains (dest_nodes, edst1))
1664                 || (edst2 > 0
1665                     && !re_node_set_contains (inv_eclosure, edst2)
1666                     && re_node_set_contains (dest_nodes, edst2)))
1667               {
1668                 err = re_node_set_add_intersect (&except_nodes, candidates,
1669                                                  dfa->inveclosures + cur_node);
1670                 if (BE (err != REG_NOERROR, 0))
1671                   {
1672                     re_node_set_free (&except_nodes);
1673                     return err;
1674                   }
1675               }
1676           }
1677       }
1678     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1679       {
1680         int cur_node = inv_eclosure->elems[ecl_idx];
1681         if (!re_node_set_contains (&except_nodes, cur_node))
1682           {
1683             int idx = re_node_set_contains (dest_nodes, cur_node) - 1;
1684             re_node_set_remove_at (dest_nodes, idx);
1685           }
1686       }
1687     re_node_set_free (&except_nodes);
1688     return REG_NOERROR;
1689 }
1690
1691 static int
1692 check_dst_limits (dfa, limits, mctx, dst_node, dst_idx, src_node, src_idx)
1693      re_dfa_t *dfa;
1694      re_node_set *limits;
1695      re_match_context_t *mctx;
1696      int dst_node, dst_idx, src_node, src_idx;
1697 {
1698   int lim_idx, src_pos, dst_pos;
1699
1700   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1701     {
1702       int subexp_idx;
1703       struct re_backref_cache_entry *ent;
1704       ent = mctx->bkref_ents + limits->elems[lim_idx];
1705       subexp_idx = dfa->nodes[ent->node].opr.idx - 1;
1706
1707       dst_pos = check_dst_limits_calc_pos (dfa, mctx, limits->elems[lim_idx],
1708                                            dfa->eclosures + dst_node,
1709                                            subexp_idx, dst_node, dst_idx);
1710       src_pos = check_dst_limits_calc_pos (dfa, mctx, limits->elems[lim_idx],
1711                                            dfa->eclosures + src_node,
1712                                            subexp_idx, src_node, src_idx);
1713
1714       /* In case of:
1715          <src> <dst> ( <subexp> )
1716          ( <subexp> ) <src> <dst>
1717          ( <subexp1> <src> <subexp2> <dst> <subexp3> )  */
1718       if (src_pos == dst_pos)
1719         continue; /* This is unrelated limitation.  */
1720       else
1721         return 1;
1722     }
1723   return 0;
1724 }
1725
1726 static int
1727 check_dst_limits_calc_pos (dfa, mctx, limit, eclosures, subexp_idx, node,
1728                            str_idx)
1729      re_dfa_t *dfa;
1730      re_match_context_t *mctx;
1731      re_node_set *eclosures;
1732      int limit, subexp_idx, node, str_idx;
1733 {
1734   struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
1735   int pos = (str_idx < lim->subexp_from ? -1
1736              : (lim->subexp_to < str_idx ? 1 : 0));
1737   if (pos == 0
1738       && (str_idx == lim->subexp_from || str_idx == lim->subexp_to))
1739     {
1740       int node_idx;
1741       for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
1742         {
1743           int node = eclosures->elems[node_idx];
1744           re_token_type_t type= dfa->nodes[node].type;
1745           if (type == OP_BACK_REF)
1746             {
1747               int bi = search_cur_bkref_entry (mctx, str_idx);
1748               for (; bi < mctx->nbkref_ents; ++bi)
1749                 {
1750                   struct re_backref_cache_entry *ent = mctx->bkref_ents + bi;
1751                   if (ent->str_idx > str_idx)
1752                     break;
1753                   if (ent->node == node && ent->subexp_from == ent->subexp_to)
1754                     {
1755                       int cpos, dst;
1756                       dst = dfa->edests[node].elems[0];
1757                       cpos = check_dst_limits_calc_pos (dfa, mctx, limit,
1758                                                         dfa->eclosures + dst,
1759                                                         subexp_idx, dst,
1760                                                         str_idx);
1761                       if ((str_idx == lim->subexp_from && cpos == -1)
1762                           || (str_idx == lim->subexp_to && cpos == 0))
1763                         return cpos;
1764                     }
1765                 }
1766             }
1767           if (type == OP_OPEN_SUBEXP && subexp_idx == dfa->nodes[node].opr.idx
1768               && str_idx == lim->subexp_from)
1769             {
1770               pos = -1;
1771               break;
1772             }
1773           if (type == OP_CLOSE_SUBEXP && subexp_idx == dfa->nodes[node].opr.idx
1774               && str_idx == lim->subexp_to)
1775             break;
1776         }
1777       if (node_idx == eclosures->nelem && str_idx == lim->subexp_to)
1778         pos = 1;
1779     }
1780   return pos;
1781 }
1782
1783 /* Check the limitations of sub expressions LIMITS, and remove the nodes
1784    which are against limitations from DEST_NODES. */
1785
1786 static reg_errcode_t
1787 check_subexp_limits (dfa, dest_nodes, candidates, limits, bkref_ents, str_idx)
1788      re_dfa_t *dfa;
1789      re_node_set *dest_nodes;
1790      const re_node_set *candidates;
1791      re_node_set *limits;
1792      struct re_backref_cache_entry *bkref_ents;
1793      int str_idx;
1794 {
1795   reg_errcode_t err;
1796   int node_idx, lim_idx;
1797
1798   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1799     {
1800       int subexp_idx;
1801       struct re_backref_cache_entry *ent;
1802       ent = bkref_ents + limits->elems[lim_idx];
1803
1804       if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
1805         continue; /* This is unrelated limitation.  */
1806
1807       subexp_idx = dfa->nodes[ent->node].opr.idx - 1;
1808       if (ent->subexp_to == str_idx)
1809         {
1810           int ops_node = -1;
1811           int cls_node = -1;
1812           for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
1813             {
1814               int node = dest_nodes->elems[node_idx];
1815               re_token_type_t type= dfa->nodes[node].type;
1816               if (type == OP_OPEN_SUBEXP
1817                   && subexp_idx == dfa->nodes[node].opr.idx)
1818                 ops_node = node;
1819               else if (type == OP_CLOSE_SUBEXP
1820                        && subexp_idx == dfa->nodes[node].opr.idx)
1821                 cls_node = node;
1822             }
1823
1824           /* Check the limitation of the open subexpression.  */
1825           /* Note that (ent->subexp_to = str_idx != ent->subexp_from).  */
1826           if (ops_node >= 0)
1827             {
1828               err = sub_epsilon_src_nodes(dfa, ops_node, dest_nodes,
1829                                           candidates);
1830               if (BE (err != REG_NOERROR, 0))
1831                 return err;
1832             }
1833           /* Check the limitation of the close subexpression.  */
1834           for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
1835             {
1836               int node = dest_nodes->elems[node_idx];
1837               if (!re_node_set_contains (dfa->inveclosures + node, cls_node)
1838                   && !re_node_set_contains (dfa->eclosures + node, cls_node))
1839                 {
1840                   /* It is against this limitation.
1841                      Remove it form the current sifted state.  */
1842                   err = sub_epsilon_src_nodes(dfa, node, dest_nodes,
1843                                               candidates);
1844                   if (BE (err != REG_NOERROR, 0))
1845                     return err;
1846                   --node_idx;
1847                 }
1848             }
1849         }
1850       else /* (ent->subexp_to != str_idx)  */
1851         {
1852           for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
1853             {
1854               int node = dest_nodes->elems[node_idx];
1855               re_token_type_t type= dfa->nodes[node].type;
1856               if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
1857                 {
1858                   if (subexp_idx != dfa->nodes[node].opr.idx)
1859                     continue;
1860                   if ((type == OP_CLOSE_SUBEXP && ent->subexp_to != str_idx)
1861                       || (type == OP_OPEN_SUBEXP))
1862                     {
1863                       /* It is against this limitation.
1864                          Remove it form the current sifted state.  */
1865                       err = sub_epsilon_src_nodes(dfa, node, dest_nodes,
1866                                                   candidates);
1867                       if (BE (err != REG_NOERROR, 0))
1868                         return err;
1869                     }
1870                 }
1871             }
1872         }
1873     }
1874   return REG_NOERROR;
1875 }
1876
1877 static reg_errcode_t
1878 sift_states_bkref (preg, mctx, sctx, str_idx, dest_nodes)
1879      const regex_t *preg;
1880      re_match_context_t *mctx;
1881      re_sift_context_t *sctx;
1882      int str_idx;
1883      re_node_set *dest_nodes;
1884 {
1885   reg_errcode_t err;
1886   re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
1887   int node_idx, node;
1888   re_sift_context_t local_sctx;
1889   const re_node_set *candidates;
1890   candidates = ((mctx->state_log[str_idx] == NULL) ? &empty_set
1891                 : &mctx->state_log[str_idx]->nodes);
1892   local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized.  */
1893
1894   for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
1895     {
1896       int cur_bkref_idx = re_string_cur_idx (mctx->input);
1897       re_token_type_t type;
1898       node = candidates->elems[node_idx];
1899       type = dfa->nodes[node].type;
1900       if (node == sctx->cur_bkref && str_idx == cur_bkref_idx)
1901         continue;
1902       /* Avoid infinite loop for the REs like "()\1+".  */
1903       if (node == sctx->last_node && str_idx == sctx->last_str_idx)
1904         continue;
1905       if (type == OP_BACK_REF)
1906         {
1907           int enabled_idx = search_cur_bkref_entry (mctx, str_idx);
1908           for (; enabled_idx < mctx->nbkref_ents; ++enabled_idx)
1909             {
1910               int disabled_idx, subexp_len, to_idx, dst_node;
1911               struct re_backref_cache_entry *entry;
1912               entry = mctx->bkref_ents + enabled_idx;
1913               if (entry->str_idx > str_idx)
1914                 break;
1915               if (entry->node != node)
1916                   continue;
1917               subexp_len = entry->subexp_to - entry->subexp_from;
1918               to_idx = str_idx + subexp_len;
1919               dst_node = (subexp_len ? dfa->nexts[node]
1920                           : dfa->edests[node].elems[0]);
1921
1922               if (to_idx > sctx->last_str_idx
1923                   || sctx->sifted_states[to_idx] == NULL
1924                   || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx],
1925                                            dst_node)
1926                   || check_dst_limits (dfa, &sctx->limits, mctx, node,
1927                                        str_idx, dst_node, to_idx))
1928                 continue;
1929                 {
1930                   re_dfastate_t *cur_state;
1931                   entry->flag = 0;
1932                   for (disabled_idx = enabled_idx + 1;
1933                        disabled_idx < mctx->nbkref_ents; ++disabled_idx)
1934                     {
1935                       struct re_backref_cache_entry *entry2;
1936                       entry2 = mctx->bkref_ents + disabled_idx;
1937                       if (entry2->str_idx > str_idx)
1938                         break;
1939                       entry2->flag = (entry2->node == node) ? 1 : entry2->flag;
1940                     }
1941
1942                   if (local_sctx.sifted_states == NULL)
1943                     {
1944                       local_sctx = *sctx;
1945                       err = re_node_set_init_copy (&local_sctx.limits,
1946                                                    &sctx->limits);
1947                       if (BE (err != REG_NOERROR, 0))
1948                         goto free_return;
1949                     }
1950                   local_sctx.last_node = node;
1951                   local_sctx.last_str_idx = str_idx;
1952                   err = re_node_set_insert (&local_sctx.limits, enabled_idx);
1953                   if (BE (err < 0, 0))
1954                     {
1955                       err = REG_ESPACE;
1956                       goto free_return;
1957                     }
1958                   cur_state = local_sctx.sifted_states[str_idx];
1959                   err = sift_states_backward (preg, mctx, &local_sctx);
1960                   if (BE (err != REG_NOERROR, 0))
1961                     goto free_return;
1962                   if (sctx->limited_states != NULL)
1963                     {
1964                       err = merge_state_array (dfa, sctx->limited_states,
1965                                                local_sctx.sifted_states,
1966                                                str_idx + 1);
1967                       if (BE (err != REG_NOERROR, 0))
1968                         goto free_return;
1969                     }
1970                   local_sctx.sifted_states[str_idx] = cur_state;
1971                   re_node_set_remove (&local_sctx.limits, enabled_idx);
1972                   /* We must not use the variable entry here, since
1973                      mctx->bkref_ents might be realloced.  */
1974                   mctx->bkref_ents[enabled_idx].flag = 1;
1975                 }
1976             }
1977           enabled_idx = search_cur_bkref_entry (mctx, str_idx);
1978           for (; enabled_idx < mctx->nbkref_ents; ++enabled_idx)
1979             {
1980               struct re_backref_cache_entry *entry;
1981               entry = mctx->bkref_ents + enabled_idx;
1982               if (entry->str_idx > str_idx)
1983                 break;
1984               if (entry->node == node)
1985                 entry->flag = 0;
1986             }
1987         }
1988     }
1989   err = REG_NOERROR;
1990  free_return:
1991   if (local_sctx.sifted_states != NULL)
1992     {
1993       re_node_set_free (&local_sctx.limits);
1994     }
1995
1996   return err;
1997 }
1998
1999
2000 #ifdef RE_ENABLE_I18N
2001 static int
2002 sift_states_iter_mb (preg, mctx, sctx, node_idx, str_idx, max_str_idx)
2003     const regex_t *preg;
2004     const re_match_context_t *mctx;
2005     re_sift_context_t *sctx;
2006     int node_idx, str_idx, max_str_idx;
2007 {
2008   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2009   int naccepted;
2010   /* Check the node can accept `multi byte'.  */
2011   naccepted = check_node_accept_bytes (preg, node_idx, mctx->input, str_idx);
2012   if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
2013       !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
2014                             dfa->nexts[node_idx]))
2015     /* The node can't accept the `multi byte', or the
2016        destination was already throwed away, then the node
2017        could't accept the current input `multi byte'.   */
2018     naccepted = 0;
2019   /* Otherwise, it is sure that the node could accept
2020      `naccepted' bytes input.  */
2021   return naccepted;
2022 }
2023 #endif /* RE_ENABLE_I18N */
2024
2025 \f
2026 /* Functions for state transition.  */
2027
2028 /* Return the next state to which the current state STATE will transit by
2029    accepting the current input byte, and update STATE_LOG if necessary.
2030    If STATE can accept a multibyte char/collating element/back reference
2031    update the destination of STATE_LOG.  */
2032
2033 static re_dfastate_t *
2034 transit_state (err, preg, mctx, state, fl_search)
2035      reg_errcode_t *err;
2036      const regex_t *preg;
2037      re_match_context_t *mctx;
2038      re_dfastate_t *state;
2039      int fl_search;
2040 {
2041   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2042   re_dfastate_t **trtable, *next_state;
2043   unsigned char ch;
2044   int cur_idx;
2045
2046   if (re_string_cur_idx (mctx->input) + 1 >= mctx->input->bufs_len
2047       || (re_string_cur_idx (mctx->input) + 1 >= mctx->input->valid_len
2048           && mctx->input->valid_len < mctx->input->len))
2049     {
2050       *err = extend_buffers (mctx);
2051       if (BE (*err != REG_NOERROR, 0))
2052         return NULL;
2053     }
2054
2055   *err = REG_NOERROR;
2056   if (state == NULL)
2057     {
2058       next_state = state;
2059       re_string_skip_bytes (mctx->input, 1);
2060     }
2061   else
2062     {
2063 #ifdef RE_ENABLE_I18N
2064       /* If the current state can accept multibyte.  */
2065       if (state->accept_mb)
2066         {
2067           *err = transit_state_mb (preg, state, mctx);
2068           if (BE (*err != REG_NOERROR, 0))
2069             return NULL;
2070         }
2071 #endif /* RE_ENABLE_I18N */
2072
2073       /* Then decide the next state with the single byte.  */
2074       if (1)
2075         {
2076           /* Use transition table  */
2077           ch = re_string_fetch_byte (mctx->input);
2078           trtable = fl_search ? state->trtable_search : state->trtable;
2079           if (trtable == NULL)
2080             {
2081               trtable = build_trtable (preg, state, fl_search);
2082               if (fl_search)
2083                 state->trtable_search = trtable;
2084               else
2085                 state->trtable = trtable;
2086             }
2087           next_state = trtable[ch];
2088         }
2089       else
2090         {
2091           /* don't use transition table  */
2092           next_state = transit_state_sb (err, preg, state, fl_search, mctx);
2093           if (BE (next_state == NULL && err != REG_NOERROR, 0))
2094             return NULL;
2095         }
2096     }
2097
2098   cur_idx = re_string_cur_idx (mctx->input);
2099   /* Update the state_log if we need.  */
2100   if (mctx->state_log != NULL)
2101     {
2102       if (cur_idx > mctx->state_log_top)
2103         {
2104           mctx->state_log[cur_idx] = next_state;
2105           mctx->state_log_top = cur_idx;
2106         }
2107       else if (mctx->state_log[cur_idx] == 0)
2108         {
2109           mctx->state_log[cur_idx] = next_state;
2110         }
2111       else
2112         {
2113           re_dfastate_t *pstate;
2114           unsigned int context;
2115           re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
2116           /* If (state_log[cur_idx] != 0), it implies that cur_idx is
2117              the destination of a multibyte char/collating element/
2118              back reference.  Then the next state is the union set of
2119              these destinations and the results of the transition table.  */
2120           pstate = mctx->state_log[cur_idx];
2121           log_nodes = pstate->entrance_nodes;
2122           if (next_state != NULL)
2123             {
2124               table_nodes = next_state->entrance_nodes;
2125               *err = re_node_set_init_union (&next_nodes, table_nodes,
2126                                              log_nodes);
2127               if (BE (*err != REG_NOERROR, 0))
2128                 return NULL;
2129             }
2130           else
2131             next_nodes = *log_nodes;
2132           /* Note: We already add the nodes of the initial state,
2133                    then we don't need to add them here.  */
2134
2135           context = re_string_context_at (mctx->input,
2136                                           re_string_cur_idx (mctx->input) - 1,
2137                                           mctx->eflags, preg->newline_anchor);
2138           next_state = mctx->state_log[cur_idx]
2139             = re_acquire_state_context (err, dfa, &next_nodes, context);
2140           /* We don't need to check errors here, since the return value of
2141              this function is next_state and ERR is already set.  */
2142
2143           if (table_nodes != NULL)
2144             re_node_set_free (&next_nodes);
2145         }
2146     }
2147
2148   /* Check OP_OPEN_SUBEXP in the current state in case that we use them
2149      later.  We must check them here, since the back references in the
2150      next state might use them.  */
2151   if (dfa->nbackref && next_state/* && fl_process_bkref */)
2152     {
2153       *err = check_subexp_matching_top (dfa, mctx, &next_state->nodes,
2154                                         cur_idx);
2155       if (BE (*err != REG_NOERROR, 0))
2156         return NULL;
2157     }
2158
2159   /* If the next state has back references.  */
2160   if (next_state != NULL && next_state->has_backref)
2161     {
2162       *err = transit_state_bkref (preg, &next_state->nodes, mctx);
2163       if (BE (*err != REG_NOERROR, 0))
2164         return NULL;
2165       next_state = mctx->state_log[cur_idx];
2166     }
2167   return next_state;
2168 }
2169
2170 /* Helper functions for transit_state.  */
2171
2172 /* From the node set CUR_NODES, pick up the nodes whose types are
2173    OP_OPEN_SUBEXP and which have corresponding back references in the regular
2174    expression. And register them to use them later for evaluating the
2175    correspoding back references.  */
2176
2177 static reg_errcode_t
2178 check_subexp_matching_top (dfa, mctx, cur_nodes, str_idx)
2179      re_dfa_t *dfa;
2180      re_match_context_t *mctx;
2181      re_node_set *cur_nodes;
2182      int str_idx;
2183 {
2184   int node_idx;
2185   reg_errcode_t err;
2186
2187   /* TODO: This isn't efficient.
2188            Because there might be more than one nodes whose types are
2189            OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2190            nodes.
2191            E.g. RE: (a){2}  */
2192   for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
2193     {
2194       int node = cur_nodes->elems[node_idx];
2195       if (dfa->nodes[node].type == OP_OPEN_SUBEXP
2196           && dfa->nodes[node].opr.idx < (8 * sizeof (dfa->used_bkref_map))
2197           && dfa->used_bkref_map & (1 << dfa->nodes[node].opr.idx))
2198         {
2199           err = match_ctx_add_subtop (mctx, node, str_idx);
2200           if (BE (err != REG_NOERROR, 0))
2201             return err;
2202         }
2203     }
2204   return REG_NOERROR;
2205 }
2206
2207 /* Return the next state to which the current state STATE will transit by
2208    accepting the current input byte.  */
2209
2210 static re_dfastate_t *
2211 transit_state_sb (err, preg, state, fl_search, mctx)
2212      reg_errcode_t *err;
2213      const regex_t *preg;
2214      re_dfastate_t *state;
2215      int fl_search;
2216      re_match_context_t *mctx;
2217 {
2218   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2219   re_node_set next_nodes;
2220   re_dfastate_t *next_state;
2221   int node_cnt, cur_str_idx = re_string_cur_idx (mctx->input);
2222   unsigned int context;
2223
2224   *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
2225   if (BE (*err != REG_NOERROR, 0))
2226     return NULL;
2227   for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
2228     {
2229       int cur_node = state->nodes.elems[node_cnt];
2230       if (check_node_accept (preg, dfa->nodes + cur_node, mctx, cur_str_idx))
2231         {
2232           *err = re_node_set_merge (&next_nodes,
2233                                     dfa->eclosures + dfa->nexts[cur_node]);
2234           if (BE (*err != REG_NOERROR, 0))
2235             {
2236               re_node_set_free (&next_nodes);
2237               return NULL;
2238             }
2239         }
2240     }
2241   if (fl_search)
2242     {
2243 #ifdef RE_ENABLE_I18N
2244       int not_initial = 0;
2245       if (MB_CUR_MAX > 1)
2246         for (node_cnt = 0; node_cnt < next_nodes.nelem; ++node_cnt)
2247           if (dfa->nodes[next_nodes.elems[node_cnt]].type == CHARACTER)
2248             {
2249               not_initial = dfa->nodes[next_nodes.elems[node_cnt]].mb_partial;
2250               break;
2251             }
2252       if (!not_initial)
2253 #endif
2254         {
2255           *err = re_node_set_merge (&next_nodes,
2256                                     dfa->init_state->entrance_nodes);
2257           if (BE (*err != REG_NOERROR, 0))
2258             {
2259               re_node_set_free (&next_nodes);
2260               return NULL;
2261             }
2262         }
2263     }
2264   context = re_string_context_at (mctx->input, cur_str_idx, mctx->eflags,
2265                                   preg->newline_anchor);
2266   next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
2267   /* We don't need to check errors here, since the return value of
2268      this function is next_state and ERR is already set.  */
2269
2270   re_node_set_free (&next_nodes);
2271   re_string_skip_bytes (mctx->input, 1);
2272   return next_state;
2273 }
2274
2275 #ifdef RE_ENABLE_I18N
2276 static reg_errcode_t
2277 transit_state_mb (preg, pstate, mctx)
2278     const regex_t *preg;
2279     re_dfastate_t *pstate;
2280     re_match_context_t *mctx;
2281 {
2282   reg_errcode_t err;
2283   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2284   int i;
2285
2286   for (i = 0; i < pstate->nodes.nelem; ++i)
2287     {
2288       re_node_set dest_nodes, *new_nodes;
2289       int cur_node_idx = pstate->nodes.elems[i];
2290       int naccepted = 0, dest_idx;
2291       unsigned int context;
2292       re_dfastate_t *dest_state;
2293
2294       if (dfa->nodes[cur_node_idx].constraint)
2295         {
2296           context = re_string_context_at (mctx->input,
2297                                           re_string_cur_idx (mctx->input),
2298                                           mctx->eflags, preg->newline_anchor);
2299           if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
2300                                            context))
2301             continue;
2302         }
2303
2304       /* How many bytes the node can accepts?  */
2305       if (ACCEPT_MB_NODE (dfa->nodes[cur_node_idx].type))
2306         naccepted = check_node_accept_bytes (preg, cur_node_idx, mctx->input,
2307                                              re_string_cur_idx (mctx->input));
2308       if (naccepted == 0)
2309         continue;
2310
2311       /* The node can accepts `naccepted' bytes.  */
2312       dest_idx = re_string_cur_idx (mctx->input) + naccepted;
2313       mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
2314                                : mctx->max_mb_elem_len);
2315       err = clean_state_log_if_need (mctx, dest_idx);
2316       if (BE (err != REG_NOERROR, 0))
2317         return err;
2318 #ifdef DEBUG
2319       assert (dfa->nexts[cur_node_idx] != -1);
2320 #endif
2321       /* `cur_node_idx' may point the entity of the OP_CONTEXT_NODE,
2322          then we use pstate->nodes.elems[i] instead.  */
2323       new_nodes = dfa->eclosures + dfa->nexts[pstate->nodes.elems[i]];
2324
2325       dest_state = mctx->state_log[dest_idx];
2326       if (dest_state == NULL)
2327         dest_nodes = *new_nodes;
2328       else
2329         {
2330           err = re_node_set_init_union (&dest_nodes,
2331                                         dest_state->entrance_nodes, new_nodes);
2332           if (BE (err != REG_NOERROR, 0))
2333             return err;
2334         }
2335       context = re_string_context_at (mctx->input, dest_idx - 1, mctx->eflags,
2336                                       preg->newline_anchor);
2337       mctx->state_log[dest_idx]
2338         = re_acquire_state_context (&err, dfa, &dest_nodes, context);
2339       if (dest_state != NULL)
2340         re_node_set_free (&dest_nodes);
2341       if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
2342         return err;
2343     }
2344   return REG_NOERROR;
2345 }
2346 #endif /* RE_ENABLE_I18N */
2347
2348 static reg_errcode_t
2349 transit_state_bkref (preg, nodes, mctx)
2350     const regex_t *preg;
2351     re_node_set *nodes;
2352     re_match_context_t *mctx;
2353 {
2354   reg_errcode_t err;
2355   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2356   int i;
2357   int cur_str_idx = re_string_cur_idx (mctx->input);
2358
2359   for (i = 0; i < nodes->nelem; ++i)
2360     {
2361       int dest_str_idx, prev_nelem, bkc_idx;
2362       int node_idx = nodes->elems[i];
2363       unsigned int context;
2364       re_token_t *node = dfa->nodes + node_idx;
2365       re_node_set *new_dest_nodes;
2366
2367       /* Check whether `node' is a backreference or not.  */
2368       if (node->type != OP_BACK_REF)
2369         continue;
2370
2371       if (node->constraint)
2372         {
2373           context = re_string_context_at (mctx->input, cur_str_idx,
2374                                           mctx->eflags, preg->newline_anchor);
2375           if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
2376             continue;
2377         }
2378
2379       /* `node' is a backreference.
2380          Check the substring which the substring matched.  */
2381       bkc_idx = mctx->nbkref_ents;
2382       err = get_subexp (preg, mctx, node_idx, cur_str_idx);
2383       if (BE (err != REG_NOERROR, 0))
2384         goto free_return;
2385
2386       /* And add the epsilon closures (which is `new_dest_nodes') of
2387          the backreference to appropriate state_log.  */
2388 #ifdef DEBUG
2389       assert (dfa->nexts[node_idx] != -1);
2390 #endif
2391       for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
2392         {
2393           int subexp_len;
2394           re_dfastate_t *dest_state;
2395           struct re_backref_cache_entry *bkref_ent;
2396           bkref_ent = mctx->bkref_ents + bkc_idx;
2397           if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
2398             continue;
2399           subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
2400           new_dest_nodes = (subexp_len == 0
2401                             ? dfa->eclosures + dfa->edests[node_idx].elems[0]
2402                             : dfa->eclosures + dfa->nexts[node_idx]);
2403           dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
2404                           - bkref_ent->subexp_from);
2405           context = re_string_context_at (mctx->input, dest_str_idx - 1,
2406                                           mctx->eflags, preg->newline_anchor);
2407           dest_state = mctx->state_log[dest_str_idx];
2408           prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
2409                         : mctx->state_log[cur_str_idx]->nodes.nelem);
2410           /* Add `new_dest_node' to state_log.  */
2411           if (dest_state == NULL)
2412             {
2413               mctx->state_log[dest_str_idx]
2414                 = re_acquire_state_context (&err, dfa, new_dest_nodes,
2415                                             context);
2416               if (BE (mctx->state_log[dest_str_idx] == NULL
2417                       && err != REG_NOERROR, 0))
2418                 goto free_return;
2419             }
2420           else
2421             {
2422               re_node_set dest_nodes;
2423               err = re_node_set_init_union (&dest_nodes,
2424                                             dest_state->entrance_nodes,
2425                                             new_dest_nodes);
2426               if (BE (err != REG_NOERROR, 0))
2427                 {
2428                   re_node_set_free (&dest_nodes);
2429                   goto free_return;
2430                 }
2431               mctx->state_log[dest_str_idx]
2432                 = re_acquire_state_context (&err, dfa, &dest_nodes, context);
2433               re_node_set_free (&dest_nodes);
2434               if (BE (mctx->state_log[dest_str_idx] == NULL
2435                       && err != REG_NOERROR, 0))
2436                 goto free_return;
2437             }
2438           /* We need to check recursively if the backreference can epsilon
2439              transit.  */
2440           if (subexp_len == 0
2441               && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
2442             {
2443               err = check_subexp_matching_top (dfa, mctx, new_dest_nodes,
2444                                                cur_str_idx);
2445               if (BE (err != REG_NOERROR, 0))
2446                 goto free_return;
2447               err = transit_state_bkref (preg, new_dest_nodes, mctx);
2448               if (BE (err != REG_NOERROR, 0))
2449                 goto free_return;
2450             }
2451         }
2452     }
2453   err = REG_NOERROR;
2454  free_return:
2455   return err;
2456 }
2457
2458 /* Enumerate all the candidates which the backreference BKREF_NODE can match
2459    at BKREF_STR_IDX, and register them by match_ctx_add_entry().
2460    Note that we might collect inappropriate candidates here.
2461    However, the cost of checking them strictly here is too high, then we
2462    delay these checking for prune_impossible_nodes().  */
2463
2464 static reg_errcode_t
2465 get_subexp (preg, mctx, bkref_node, bkref_str_idx)
2466      const regex_t *preg;
2467      re_match_context_t *mctx;
2468      int bkref_node, bkref_str_idx;
2469 {
2470   int subexp_num, sub_top_idx;
2471   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2472   char *buf = (char *) re_string_get_buffer (mctx->input);
2473   /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX.  */
2474   int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
2475   for (; cache_idx < mctx->nbkref_ents; ++cache_idx)
2476     {
2477       struct re_backref_cache_entry *entry = mctx->bkref_ents + cache_idx;
2478       if (entry->str_idx > bkref_str_idx)
2479         break;
2480       if (entry->node == bkref_node)
2481         return REG_NOERROR; /* We already checked it.  */
2482     }
2483   subexp_num = dfa->nodes[bkref_node].opr.idx - 1;
2484
2485   /* For each sub expression  */
2486   for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
2487     {
2488       reg_errcode_t err;
2489       re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
2490       re_sub_match_last_t *sub_last;
2491       int sub_last_idx, sl_str;
2492       char *bkref_str;
2493
2494       if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
2495         continue; /* It isn't related.  */
2496
2497       sl_str = sub_top->str_idx;
2498       bkref_str = buf + bkref_str_idx;
2499       /* At first, check the last node of sub expressions we already
2500          evaluated.  */
2501       for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
2502         {
2503           int sl_str_diff;
2504           sub_last = sub_top->lasts[sub_last_idx];
2505           sl_str_diff = sub_last->str_idx - sl_str;
2506           /* The matched string by the sub expression match with the substring
2507              at the back reference?  */
2508           if (sl_str_diff > 0
2509               && memcmp (bkref_str, buf + sl_str, sl_str_diff) != 0)
2510             break; /* We don't need to search this sub expression any more.  */
2511           bkref_str += sl_str_diff;
2512           sl_str += sl_str_diff;
2513           err = get_subexp_sub (preg, mctx, sub_top, sub_last, bkref_node,
2514                                 bkref_str_idx);
2515           if (err == REG_NOMATCH)
2516             continue;
2517           if (BE (err != REG_NOERROR, 0))
2518             return err;
2519         }
2520       if (sub_last_idx < sub_top->nlasts)
2521         continue;
2522       if (sub_last_idx > 0)
2523         ++sl_str;
2524       /* Then, search for the other last nodes of the sub expression.  */
2525       for (; sl_str <= bkref_str_idx; ++sl_str)
2526         {
2527           int cls_node, sl_str_off;
2528           re_node_set *nodes;
2529           sl_str_off = sl_str - sub_top->str_idx;
2530           /* The matched string by the sub expression match with the substring
2531              at the back reference?  */
2532           if (sl_str_off > 0
2533               && memcmp (bkref_str++, buf + sl_str - 1, 1) != 0)
2534             break; /* We don't need to search this sub expression any more.  */
2535           if (mctx->state_log[sl_str] == NULL)
2536             continue;
2537           /* Does this state have a ')' of the sub expression?  */
2538           nodes = &mctx->state_log[sl_str]->nodes;
2539           cls_node = find_subexp_node (dfa, nodes, subexp_num, 0);
2540           if (cls_node == -1)
2541             continue; /* No.  */
2542           if (sub_top->path == NULL)
2543             {
2544               sub_top->path = calloc (sizeof (state_array_t),
2545                                       sl_str - sub_top->str_idx + 1);
2546               if (sub_top->path == NULL)
2547                 return REG_ESPACE;
2548             }
2549           /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
2550              in the current context?  */
2551           err = check_arrival (preg, mctx, sub_top->path, sub_top->node,
2552                                sub_top->str_idx, cls_node, sl_str, 0);
2553           if (err == REG_NOMATCH)
2554               continue;
2555           if (BE (err != REG_NOERROR, 0))
2556               return err;
2557           sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
2558           if (BE (sub_last == NULL, 0))
2559             return REG_ESPACE;
2560           err = get_subexp_sub (preg, mctx, sub_top, sub_last, bkref_node,
2561                                 bkref_str_idx);
2562           if (err == REG_NOMATCH)
2563             continue;
2564         }
2565     }
2566   return REG_NOERROR;
2567 }
2568
2569 /* Helper functions for get_subexp().  */
2570
2571 /* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
2572    If it can arrive, register the sub expression expressed with SUB_TOP
2573    and SUB_LAST.  */
2574
2575 static reg_errcode_t
2576 get_subexp_sub (preg, mctx, sub_top, sub_last, bkref_node, bkref_str)
2577      const regex_t *preg;
2578      re_match_context_t *mctx;
2579      re_sub_match_top_t *sub_top;
2580      re_sub_match_last_t *sub_last;
2581      int bkref_node, bkref_str;
2582 {
2583   reg_errcode_t err;
2584   int to_idx;
2585   /* Can the subexpression arrive the back reference?  */
2586   err = check_arrival (preg, mctx, &sub_last->path, sub_last->node,
2587                        sub_last->str_idx, bkref_node, bkref_str, 1);
2588   if (err != REG_NOERROR)
2589     return err;
2590   err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
2591                              sub_last->str_idx);
2592   if (BE (err != REG_NOERROR, 0))
2593     return err;
2594   to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
2595   clean_state_log_if_need (mctx, to_idx);
2596   return REG_NOERROR;
2597 }
2598
2599 /* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
2600    Search '(' if FL_OPEN, or search ')' otherwise.
2601    TODO: This function isn't efficient...
2602          Because there might be more than one nodes whose types are
2603          OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2604          nodes.
2605          E.g. RE: (a){2}  */
2606
2607 static int
2608 find_subexp_node (dfa, nodes, subexp_idx, fl_open)
2609      re_dfa_t *dfa;
2610      re_node_set *nodes;
2611      int subexp_idx, fl_open;
2612 {
2613   int cls_idx;
2614   for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
2615     {
2616       int cls_node = nodes->elems[cls_idx];
2617       re_token_t *node = dfa->nodes + cls_node;
2618       if (((fl_open && node->type == OP_OPEN_SUBEXP)
2619           || (!fl_open && node->type == OP_CLOSE_SUBEXP))
2620           && node->opr.idx == subexp_idx)
2621         return cls_node;
2622     }
2623   return -1;
2624 }
2625
2626 /* Check whether the node TOP_NODE at TOP_STR can arrive to the node
2627    LAST_NODE at LAST_STR.  We record the path onto PATH since it will be
2628    heavily reused.
2629    Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise.  */
2630
2631 static reg_errcode_t
2632 check_arrival (preg, mctx, path, top_node, top_str, last_node, last_str,
2633                fl_open)
2634      const regex_t *preg;
2635      re_match_context_t *mctx;
2636      state_array_t *path;
2637      int top_node, top_str, last_node, last_str, fl_open;
2638 {
2639   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2640   reg_errcode_t err;
2641   int subexp_num, backup_cur_idx, str_idx, null_cnt;
2642   re_dfastate_t *cur_state = NULL;
2643   re_node_set *cur_nodes, next_nodes;
2644   re_dfastate_t **backup_state_log;
2645   unsigned int context;
2646
2647   subexp_num = dfa->nodes[top_node].opr.idx;
2648   /* Extend the buffer if we need.  */
2649   if (path->alloc < last_str + mctx->max_mb_elem_len + 1)
2650     {
2651       re_dfastate_t **new_array;
2652       int old_alloc = path->alloc;
2653       path->alloc += last_str + mctx->max_mb_elem_len + 1;
2654       new_array = re_realloc (path->array, re_dfastate_t *, path->alloc);
2655       if (new_array == NULL)
2656         return REG_ESPACE;
2657       path->array = new_array;
2658       memset (new_array + old_alloc, '\0',
2659               sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
2660     }
2661
2662   str_idx = path->next_idx == 0 ? top_str : path->next_idx;
2663
2664   /* Temporary modify MCTX.  */
2665   backup_state_log = mctx->state_log;
2666   backup_cur_idx = mctx->input->cur_idx;
2667   mctx->state_log = path->array;
2668   mctx->input->cur_idx = str_idx;
2669
2670   /* Setup initial node set.  */
2671   context = re_string_context_at (mctx->input, str_idx - 1, mctx->eflags,
2672                                   preg->newline_anchor);
2673   if (str_idx == top_str)
2674     {
2675       err = re_node_set_init_1 (&next_nodes, top_node);
2676       if (BE (err != REG_NOERROR, 0))
2677         return err;
2678       err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, fl_open);
2679       if (BE (err != REG_NOERROR, 0))
2680         {
2681           re_node_set_free (&next_nodes);
2682           return err;
2683         }
2684     }
2685   else
2686     {
2687       cur_state = mctx->state_log[str_idx];
2688       if (cur_state && cur_state->has_backref)
2689         {
2690           err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
2691           if (BE ( err != REG_NOERROR, 0))
2692             return err;
2693         }
2694       else
2695         re_node_set_init_empty (&next_nodes);
2696     }
2697   if (str_idx == top_str || (cur_state && cur_state->has_backref))
2698     {
2699       if (next_nodes.nelem)
2700         {
2701           err = expand_bkref_cache (preg, mctx, &next_nodes, str_idx, last_str,
2702                                     subexp_num, fl_open);
2703           if (BE ( err != REG_NOERROR, 0))
2704             {
2705               re_node_set_free (&next_nodes);
2706               return err;
2707             }
2708         }
2709       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2710       if (BE (cur_state == NULL && err != REG_NOERROR, 0))
2711         {
2712           re_node_set_free (&next_nodes);
2713           return err;
2714         }
2715       mctx->state_log[str_idx] = cur_state;
2716     }
2717
2718   for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
2719     {
2720       re_node_set_empty (&next_nodes);
2721       if (mctx->state_log[str_idx + 1])
2722         {
2723           err = re_node_set_merge (&next_nodes,
2724                                    &mctx->state_log[str_idx + 1]->nodes);
2725           if (BE (err != REG_NOERROR, 0))
2726             {
2727               re_node_set_free (&next_nodes);
2728               return err;
2729             }
2730         }
2731       if (cur_state)
2732         {
2733           err = check_arrival_add_next_nodes(preg, dfa, mctx, str_idx,
2734                                              &cur_state->nodes, &next_nodes);
2735           if (BE (err != REG_NOERROR, 0))
2736             {
2737               re_node_set_free (&next_nodes);
2738               return err;
2739             }
2740         }
2741       ++str_idx;
2742       if (next_nodes.nelem)
2743         {
2744           err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num,
2745                                           fl_open);
2746           if (BE (err != REG_NOERROR, 0))
2747             {
2748               re_node_set_free (&next_nodes);
2749               return err;
2750             }
2751           err = expand_bkref_cache (preg, mctx, &next_nodes, str_idx, last_str,
2752                                     subexp_num, fl_open);
2753           if (BE ( err != REG_NOERROR, 0))
2754             {
2755               re_node_set_free (&next_nodes);
2756               return err;
2757             }
2758         }
2759       context = re_string_context_at (mctx->input, str_idx - 1, mctx->eflags,
2760                                       preg->newline_anchor);
2761       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2762       if (BE (cur_state == NULL && err != REG_NOERROR, 0))
2763         {
2764           re_node_set_free (&next_nodes);
2765           return err;
2766         }
2767       mctx->state_log[str_idx] = cur_state;
2768       null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
2769     }
2770   re_node_set_free (&next_nodes);
2771   cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
2772                : &mctx->state_log[last_str]->nodes);
2773   path->next_idx = str_idx;
2774
2775   /* Fix MCTX.  */
2776   mctx->state_log = backup_state_log;
2777   mctx->input->cur_idx = backup_cur_idx;
2778
2779   if (cur_nodes == NULL)
2780     return REG_NOMATCH;
2781   /* Then check the current node set has the node LAST_NODE.  */
2782   return (re_node_set_contains (cur_nodes, last_node)
2783           || re_node_set_contains (cur_nodes, last_node) ? REG_NOERROR
2784           : REG_NOMATCH);
2785 }
2786
2787 /* Helper functions for check_arrival.  */
2788
2789 /* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
2790    to NEXT_NODES.
2791    TODO: This function is similar to the functions transit_state*(),
2792          however this function has many additional works.
2793          Can't we unify them?  */
2794
2795 static reg_errcode_t
2796 check_arrival_add_next_nodes (preg, dfa, mctx, str_idx, cur_nodes, next_nodes)
2797      const regex_t *preg;
2798      re_dfa_t *dfa;
2799      re_match_context_t *mctx;
2800      int str_idx;
2801      re_node_set *cur_nodes, *next_nodes;
2802 {
2803   int cur_idx;
2804   reg_errcode_t err;
2805   re_node_set union_set;
2806   re_node_set_init_empty (&union_set);
2807   for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
2808     {
2809       int naccepted = 0;
2810       int cur_node = cur_nodes->elems[cur_idx];
2811       re_token_type_t type = dfa->nodes[cur_node].type;
2812       if (IS_EPSILON_NODE(type))
2813         continue;
2814 #ifdef RE_ENABLE_I18N
2815       /* If the node may accept `multi byte'.  */
2816       if (ACCEPT_MB_NODE (type))
2817         {
2818           naccepted = check_node_accept_bytes (preg, cur_node, mctx->input,
2819                                                str_idx);
2820           if (naccepted > 1)
2821             {
2822               re_dfastate_t *dest_state;
2823               int next_node = dfa->nexts[cur_node];
2824               int next_idx = str_idx + naccepted;
2825               dest_state = mctx->state_log[next_idx];
2826               re_node_set_empty (&union_set);
2827               if (dest_state)
2828                 {
2829                   err = re_node_set_merge (&union_set, &dest_state->nodes);
2830                   if (BE (err != REG_NOERROR, 0))
2831                     {
2832                       re_node_set_free (&union_set);
2833                       return err;
2834                     }
2835                   err = re_node_set_insert (&union_set, next_node);
2836                   if (BE (err < 0, 0))
2837                     {
2838                       re_node_set_free (&union_set);
2839                       return REG_ESPACE;
2840                     }
2841                 }
2842               else
2843                 {
2844                   err = re_node_set_insert (&union_set, next_node);
2845                   if (BE (err < 0, 0))
2846                     {
2847                       re_node_set_free (&union_set);
2848                       return REG_ESPACE;
2849                     }
2850                 }
2851               mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
2852                                                             &union_set);
2853               if (BE (mctx->state_log[next_idx] == NULL
2854                       && err != REG_NOERROR, 0))
2855                 {
2856                   re_node_set_free (&union_set);
2857                   return err;
2858                 }
2859             }
2860         }
2861 #endif /* RE_ENABLE_I18N */
2862       if (naccepted
2863           || check_node_accept (preg, dfa->nodes + cur_node, mctx,
2864                                 str_idx))
2865         {
2866           err = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
2867           if (BE (err < 0, 0))
2868             {
2869               re_node_set_free (&union_set);
2870               return REG_ESPACE;
2871             }
2872         }
2873     }
2874   re_node_set_free (&union_set);
2875   return REG_NOERROR;
2876 }
2877
2878 /* For all the nodes in CUR_NODES, add the epsilon closures of them to
2879    CUR_NODES, however exclude the nodes which are:
2880     - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
2881     - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
2882 */
2883
2884 static reg_errcode_t
2885 check_arrival_expand_ecl (dfa, cur_nodes, ex_subexp, fl_open)
2886      re_dfa_t *dfa;
2887      re_node_set *cur_nodes;
2888      int ex_subexp, fl_open;
2889 {
2890   reg_errcode_t err;
2891   int idx, outside_node;
2892   re_node_set new_nodes;
2893 #ifdef DEBUG
2894   assert (cur_nodes->nelem);
2895 #endif
2896   err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
2897   if (BE (err != REG_NOERROR, 0))
2898     return err;
2899   /* Create a new node set NEW_NODES with the nodes which are epsilon
2900      closures of the node in CUR_NODES.  */
2901
2902   for (idx = 0; idx < cur_nodes->nelem; ++idx)
2903     {
2904       int cur_node = cur_nodes->elems[idx];
2905       re_node_set *eclosure = dfa->eclosures + cur_node;
2906       outside_node = find_subexp_node (dfa, eclosure, ex_subexp, fl_open);
2907       if (outside_node == -1)
2908         {
2909           /* There are no problematic nodes, just merge them.  */
2910           err = re_node_set_merge (&new_nodes, eclosure);
2911           if (BE (err != REG_NOERROR, 0))
2912             {
2913               re_node_set_free (&new_nodes);
2914               return err;
2915             }
2916         }
2917       else
2918         {
2919           /* There are problematic nodes, re-calculate incrementally.  */
2920           err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
2921                                               ex_subexp, fl_open);
2922           if (BE (err != REG_NOERROR, 0))
2923             {
2924               re_node_set_free (&new_nodes);
2925               return err;
2926             }
2927         }
2928     }
2929   re_node_set_free (cur_nodes);
2930   *cur_nodes = new_nodes;
2931   return REG_NOERROR;
2932 }
2933
2934 /* Helper function for check_arrival_expand_ecl.
2935    Check incrementally the epsilon closure of TARGET, and if it isn't
2936    problematic append it to DST_NODES.  */
2937
2938 static reg_errcode_t
2939 check_arrival_expand_ecl_sub (dfa, dst_nodes, target, ex_subexp, fl_open)
2940      re_dfa_t *dfa;
2941      int target, ex_subexp, fl_open;
2942      re_node_set *dst_nodes;
2943 {
2944   int cur_node, type;
2945   for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
2946     {
2947       int err;
2948       type = dfa->nodes[cur_node].type;
2949
2950       if (((type == OP_OPEN_SUBEXP && fl_open)
2951            || (type == OP_CLOSE_SUBEXP && !fl_open))
2952           && dfa->nodes[cur_node].opr.idx == ex_subexp)
2953         {
2954           if (!fl_open)
2955             {
2956               err = re_node_set_insert (dst_nodes, cur_node);
2957               if (BE (err == -1, 0))
2958                 return REG_ESPACE;
2959             }
2960           break;
2961         }
2962       err = re_node_set_insert (dst_nodes, cur_node);
2963       if (BE (err == -1, 0))
2964         return REG_ESPACE;
2965       if (dfa->edests[cur_node].nelem == 0)
2966         break;
2967       if (dfa->edests[cur_node].nelem == 2)
2968         {
2969           err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
2970                                               dfa->edests[cur_node].elems[1],
2971                                               ex_subexp, fl_open);
2972           if (BE (err != REG_NOERROR, 0))
2973             return err;
2974         }
2975       cur_node = dfa->edests[cur_node].elems[0];
2976     }
2977   return REG_NOERROR;
2978 }
2979
2980
2981 /* For all the back references in the current state, calculate the
2982    destination of the back references by the appropriate entry
2983    in MCTX->BKREF_ENTS.  */
2984
2985 static reg_errcode_t
2986 expand_bkref_cache (preg, mctx, cur_nodes, cur_str, last_str, subexp_num,
2987                     fl_open)
2988      const regex_t *preg;
2989      re_match_context_t *mctx;
2990      int cur_str, last_str, subexp_num, fl_open;
2991      re_node_set *cur_nodes;
2992 {
2993   reg_errcode_t err;
2994   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2995   int cache_idx, cache_idx_start;
2996   /* The current state.  */
2997
2998   cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
2999   for (cache_idx = cache_idx_start; cache_idx < mctx->nbkref_ents; ++cache_idx)
3000     {
3001       int to_idx, next_node;
3002       struct re_backref_cache_entry *ent = mctx->bkref_ents + cache_idx;
3003       if (ent->str_idx > cur_str)
3004         break;
3005       /* Is this entry ENT is appropriate?  */
3006       if (!re_node_set_contains (cur_nodes, ent->node))
3007         continue; /* No.  */
3008
3009       to_idx = cur_str + ent->subexp_to - ent->subexp_from;
3010       /* Calculate the destination of the back reference, and append it
3011          to MCTX->STATE_LOG.  */
3012       if (to_idx == cur_str)
3013         {
3014           /* The backreference did epsilon transit, we must re-check all the
3015              node in the current state.  */
3016           re_node_set new_dests;
3017           reg_errcode_t err2, err3;
3018           next_node = dfa->edests[ent->node].elems[0];
3019           if (re_node_set_contains (cur_nodes, next_node))
3020             continue;
3021           err = re_node_set_init_1 (&new_dests, next_node);
3022           err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num,
3023                                            fl_open);
3024           err3 = re_node_set_merge (cur_nodes, &new_dests);
3025           re_node_set_free (&new_dests);
3026           if (BE (err != REG_NOERROR || err2 != REG_NOERROR
3027                   || err3 != REG_NOERROR, 0))
3028             {
3029               err = (err != REG_NOERROR ? err
3030                      : (err2 != REG_NOERROR ? err2 : err3));
3031               return err;
3032             }
3033           /* TODO: It is still inefficient...  */
3034           cache_idx = cache_idx_start - 1;
3035           continue;
3036         }
3037       else
3038         {
3039           re_node_set union_set;
3040           next_node = dfa->nexts[ent->node];
3041           if (mctx->state_log[to_idx])
3042             {
3043               int ret;
3044               if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
3045                                         next_node))
3046                 continue;
3047               err = re_node_set_init_copy (&union_set,
3048                                            &mctx->state_log[to_idx]->nodes);
3049               ret = re_node_set_insert (&union_set, next_node);
3050               if (BE (err != REG_NOERROR || ret < 0, 0))
3051                 {
3052                   re_node_set_free (&union_set);
3053                   err = err != REG_NOERROR ? err : REG_ESPACE;
3054                   return err;
3055                 }
3056             }
3057           else
3058             {
3059               err = re_node_set_init_1 (&union_set, next_node);
3060               if (BE (err != REG_NOERROR, 0))
3061                 return err;
3062             }
3063           mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
3064           re_node_set_free (&union_set);
3065           if (BE (mctx->state_log[to_idx] == NULL
3066                   && err != REG_NOERROR, 0))
3067             return err;
3068         }
3069     }
3070   return REG_NOERROR;
3071 }
3072
3073 /* Build transition table for the state.
3074    Return the new table if succeeded, otherwise return NULL.  */
3075
3076 static re_dfastate_t **
3077 build_trtable (preg, state, fl_search)
3078     const regex_t *preg;
3079     const re_dfastate_t *state;
3080     int fl_search;
3081 {
3082   reg_errcode_t err;
3083   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
3084   int i, j, k, ch;
3085   int dests_node_malloced = 0, dest_states_malloced = 0;
3086   int ndests; /* Number of the destination states from `state'.  */
3087   re_dfastate_t **trtable;
3088   re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
3089   re_node_set follows, *dests_node;
3090   bitset *dests_ch;
3091   bitset acceptable;
3092
3093   /* We build DFA states which corresponds to the destination nodes
3094      from `state'.  `dests_node[i]' represents the nodes which i-th
3095      destination state contains, and `dests_ch[i]' represents the
3096      characters which i-th destination state accepts.  */
3097 #ifdef _LIBC
3098   if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX))
3099     dests_node = (re_node_set *)
3100                  alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
3101   else
3102 #endif
3103     {
3104       dests_node = (re_node_set *)
3105                    malloc ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
3106       if (BE (dests_node == NULL, 0))
3107         return NULL;
3108       dests_node_malloced = 1;
3109     }
3110   dests_ch = (bitset *) (dests_node + SBC_MAX);
3111
3112   /* Initialize transiton table.  */
3113   trtable = (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
3114   if (BE (trtable == NULL, 0))
3115     {
3116       if (dests_node_malloced)
3117         free (dests_node);
3118       return NULL;
3119     }
3120
3121   /* At first, group all nodes belonging to `state' into several
3122      destinations.  */
3123   ndests = group_nodes_into_DFAstates (preg, state, dests_node, dests_ch);
3124   if (BE (ndests <= 0, 0))
3125     {
3126       if (dests_node_malloced)
3127         free (dests_node);
3128       /* Return NULL in case of an error, trtable otherwise.  */
3129       if (ndests == 0)
3130         return trtable;
3131       free (trtable);
3132       return NULL;
3133     }
3134
3135   err = re_node_set_alloc (&follows, ndests + 1);
3136   if (BE (err != REG_NOERROR, 0))
3137     goto out_free;
3138
3139 #ifdef _LIBC
3140   if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX
3141                          + ndests * 3 * sizeof (re_dfastate_t *)))
3142     dest_states = (re_dfastate_t **)
3143                   alloca (ndests * 3 * sizeof (re_dfastate_t *));
3144   else
3145 #endif
3146     {
3147       dest_states = (re_dfastate_t **)
3148                     malloc (ndests * 3 * sizeof (re_dfastate_t *));
3149       if (BE (dest_states == NULL, 0))
3150         {
3151 out_free:
3152           if (dest_states_malloced)
3153             free (dest_states);
3154           re_node_set_free (&follows);
3155           for (i = 0; i < ndests; ++i)
3156             re_node_set_free (dests_node + i);
3157           free (trtable);
3158           if (dests_node_malloced)
3159             free (dests_node);
3160           return NULL;
3161         }
3162       dest_states_malloced = 1;
3163     }
3164   dest_states_word = dest_states + ndests;
3165   dest_states_nl = dest_states_word + ndests;
3166   bitset_empty (acceptable);
3167
3168   /* Then build the states for all destinations.  */
3169   for (i = 0; i < ndests; ++i)
3170     {
3171       int next_node;
3172       re_node_set_empty (&follows);
3173       /* Merge the follows of this destination states.  */
3174       for (j = 0; j < dests_node[i].nelem; ++j)
3175         {
3176           next_node = dfa->nexts[dests_node[i].elems[j]];
3177           if (next_node != -1)
3178             {
3179               err = re_node_set_merge (&follows, dfa->eclosures + next_node);
3180               if (BE (err != REG_NOERROR, 0))
3181                 goto out_free;
3182             }
3183         }
3184       /* If search flag is set, merge the initial state.  */
3185       if (fl_search)
3186         {
3187 #ifdef RE_ENABLE_I18N
3188           int not_initial = 0;
3189           for (j = 0; j < follows.nelem; ++j)
3190             if (dfa->nodes[follows.elems[j]].type == CHARACTER)
3191               {
3192                 not_initial = dfa->nodes[follows.elems[j]].mb_partial;
3193                 break;
3194               }
3195           if (!not_initial)
3196 #endif
3197             {
3198               err = re_node_set_merge (&follows,
3199                                        dfa->init_state->entrance_nodes);
3200               if (BE (err != REG_NOERROR, 0))
3201                 goto out_free;
3202             }
3203         }
3204       dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
3205       if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
3206         goto out_free;
3207       /* If the new state has context constraint,
3208          build appropriate states for these contexts.  */
3209       if (dest_states[i]->has_constraint)
3210         {
3211           dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
3212                                                           CONTEXT_WORD);
3213           if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
3214             goto out_free;
3215           dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
3216                                                         CONTEXT_NEWLINE);
3217           if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
3218             goto out_free;
3219         }
3220       else
3221         {
3222           dest_states_word[i] = dest_states[i];
3223           dest_states_nl[i] = dest_states[i];
3224         }
3225       bitset_merge (acceptable, dests_ch[i]);
3226     }
3227
3228   /* Update the transition table.  */
3229   /* For all characters ch...:  */
3230   for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
3231     for (j = 0; j < UINT_BITS; ++j, ++ch)
3232       if ((acceptable[i] >> j) & 1)
3233         {
3234           /* The current state accepts the character ch.  */
3235           if (IS_WORD_CHAR (ch))
3236             {
3237               for (k = 0; k < ndests; ++k)
3238                 if ((dests_ch[k][i] >> j) & 1)
3239                   {
3240                     /* k-th destination accepts the word character ch.  */
3241                     trtable[ch] = dest_states_word[k];
3242                     /* There must be only one destination which accepts
3243                        character ch.  See group_nodes_into_DFAstates.  */
3244                     break;
3245                   }
3246             }
3247           else /* not WORD_CHAR */
3248             {
3249               for (k = 0; k < ndests; ++k)
3250                 if ((dests_ch[k][i] >> j) & 1)
3251                   {
3252                     /* k-th destination accepts the non-word character ch.  */
3253                     trtable[ch] = dest_states[k];
3254                     /* There must be only one destination which accepts
3255                        character ch.  See group_nodes_into_DFAstates.  */
3256                     break;
3257                   }
3258             }
3259         }
3260   /* new line */
3261   if (bitset_contain (acceptable, NEWLINE_CHAR))
3262     {
3263       /* The current state accepts newline character.  */
3264       for (k = 0; k < ndests; ++k)
3265         if (bitset_contain (dests_ch[k], NEWLINE_CHAR))
3266           {
3267             /* k-th destination accepts newline character.  */
3268             trtable[NEWLINE_CHAR] = dest_states_nl[k];
3269             /* There must be only one destination which accepts
3270                newline.  See group_nodes_into_DFAstates.  */
3271             break;
3272           }
3273     }
3274
3275   if (dest_states_malloced)
3276     free (dest_states);
3277
3278   re_node_set_free (&follows);
3279   for (i = 0; i < ndests; ++i)
3280     re_node_set_free (dests_node + i);
3281
3282   if (dests_node_malloced)
3283     free (dests_node);
3284
3285   return trtable;
3286 }
3287
3288 /* Group all nodes belonging to STATE into several destinations.
3289    Then for all destinations, set the nodes belonging to the destination
3290    to DESTS_NODE[i] and set the characters accepted by the destination
3291    to DEST_CH[i].  This function return the number of destinations.  */
3292
3293 static int
3294 group_nodes_into_DFAstates (preg, state, dests_node, dests_ch)
3295     const regex_t *preg;
3296     const re_dfastate_t *state;
3297     re_node_set *dests_node;
3298     bitset *dests_ch;
3299 {
3300   reg_errcode_t err;
3301   const re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
3302   int i, j, k;
3303   int ndests; /* Number of the destinations from `state'.  */
3304   bitset accepts; /* Characters a node can accept.  */
3305   const re_node_set *cur_nodes = &state->nodes;
3306   bitset_empty (accepts);
3307   ndests = 0;
3308
3309   /* For all the nodes belonging to `state',  */
3310   for (i = 0; i < cur_nodes->nelem; ++i)
3311     {
3312       re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
3313       re_token_type_t type = node->type;
3314       unsigned int constraint = node->constraint;
3315
3316       /* Enumerate all single byte character this node can accept.  */
3317       if (type == CHARACTER)
3318         bitset_set (accepts, node->opr.c);
3319       else if (type == SIMPLE_BRACKET)
3320         {
3321           bitset_merge (accepts, node->opr.sbcset);
3322         }
3323       else if (type == OP_PERIOD)
3324         {
3325           bitset_set_all (accepts);
3326           if (!(preg->syntax & RE_DOT_NEWLINE))
3327             bitset_clear (accepts, '\n');
3328           if (preg->syntax & RE_DOT_NOT_NULL)
3329             bitset_clear (accepts, '\0');
3330         }
3331       else
3332         continue;
3333
3334       /* Check the `accepts' and sift the characters which are not
3335          match it the context.  */
3336       if (constraint)
3337         {
3338           if (constraint & NEXT_NEWLINE_CONSTRAINT)
3339             {
3340               int accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
3341               bitset_empty (accepts);
3342               if (accepts_newline)
3343                 bitset_set (accepts, NEWLINE_CHAR);
3344               else
3345                 continue;
3346             }
3347           if (constraint & NEXT_ENDBUF_CONSTRAINT)
3348             {
3349               bitset_empty (accepts);
3350               continue;
3351             }
3352           if (constraint & NEXT_WORD_CONSTRAINT)
3353             for (j = 0; j < BITSET_UINTS; ++j)
3354               accepts[j] &= dfa->word_char[j];
3355           if (constraint & NEXT_NOTWORD_CONSTRAINT)
3356             for (j = 0; j < BITSET_UINTS; ++j)
3357               accepts[j] &= ~dfa->word_char[j];
3358         }
3359
3360       /* Then divide `accepts' into DFA states, or create a new
3361          state.  */
3362       for (j = 0; j < ndests; ++j)
3363         {
3364           bitset intersec; /* Intersection sets, see below.  */
3365           bitset remains;
3366           /* Flags, see below.  */
3367           int has_intersec, not_subset, not_consumed;
3368
3369           /* Optimization, skip if this state doesn't accept the character.  */
3370           if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
3371             continue;
3372
3373           /* Enumerate the intersection set of this state and `accepts'.  */
3374           has_intersec = 0;
3375           for (k = 0; k < BITSET_UINTS; ++k)
3376             has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
3377           /* And skip if the intersection set is empty.  */
3378           if (!has_intersec)
3379             continue;
3380
3381           /* Then check if this state is a subset of `accepts'.  */
3382           not_subset = not_consumed = 0;
3383           for (k = 0; k < BITSET_UINTS; ++k)
3384             {
3385               not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
3386               not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
3387             }
3388
3389           /* If this state isn't a subset of `accepts', create a
3390              new group state, which has the `remains'. */
3391           if (not_subset)
3392             {
3393               bitset_copy (dests_ch[ndests], remains);
3394               bitset_copy (dests_ch[j], intersec);
3395               err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
3396               if (BE (err != REG_NOERROR, 0))
3397                 goto error_return;
3398               ++ndests;
3399             }
3400
3401           /* Put the position in the current group. */
3402           err = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
3403           if (BE (err < 0, 0))
3404             goto error_return;
3405
3406           /* If all characters are consumed, go to next node. */
3407           if (!not_consumed)
3408             break;
3409         }
3410       /* Some characters remain, create a new group. */
3411       if (j == ndests)
3412         {
3413           bitset_copy (dests_ch[ndests], accepts);
3414           err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
3415           if (BE (err != REG_NOERROR, 0))
3416             goto error_return;
3417           ++ndests;
3418           bitset_empty (accepts);
3419         }
3420     }
3421   return ndests;
3422  error_return:
3423   for (j = 0; j < ndests; ++j)
3424     re_node_set_free (dests_node + j);
3425   return -1;
3426 }
3427
3428 #ifdef RE_ENABLE_I18N
3429 /* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
3430    Return the number of the bytes the node accepts.
3431    STR_IDX is the current index of the input string.
3432
3433    This function handles the nodes which can accept one character, or
3434    one collating element like '.', '[a-z]', opposite to the other nodes
3435    can only accept one byte.  */
3436
3437 static int
3438 check_node_accept_bytes (preg, node_idx, input, str_idx)
3439     const regex_t *preg;
3440     int node_idx, str_idx;
3441     const re_string_t *input;
3442 {
3443   const re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
3444   const re_token_t *node = dfa->nodes + node_idx;
3445   int elem_len = re_string_elem_size_at (input, str_idx);
3446   int char_len = re_string_char_size_at (input, str_idx);
3447   int i;
3448 # ifdef _LIBC
3449   int j;
3450   uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3451 # endif /* _LIBC */
3452   if (elem_len <= 1 && char_len <= 1)
3453     return 0;
3454   if (node->type == OP_PERIOD)
3455     {
3456       /* '.' accepts any one character except the following two cases.  */
3457       if ((!(preg->syntax & RE_DOT_NEWLINE) &&
3458            re_string_byte_at (input, str_idx) == '\n') ||
3459           ((preg->syntax & RE_DOT_NOT_NULL) &&
3460            re_string_byte_at (input, str_idx) == '\0'))
3461         return 0;
3462       return char_len;
3463     }
3464   else if (node->type == COMPLEX_BRACKET)
3465     {
3466       const re_charset_t *cset = node->opr.mbcset;
3467 # ifdef _LIBC
3468       const unsigned char *pin = ((char *) re_string_get_buffer (input)
3469                                   + str_idx);
3470 # endif /* _LIBC */
3471       int match_len = 0;
3472       wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
3473                     ? re_string_wchar_at (input, str_idx) : 0);
3474
3475       /* match with multibyte character?  */
3476       for (i = 0; i < cset->nmbchars; ++i)
3477         if (wc == cset->mbchars[i])
3478           {
3479             match_len = char_len;
3480             goto check_node_accept_bytes_match;
3481           }
3482       /* match with character_class?  */
3483       for (i = 0; i < cset->nchar_classes; ++i)
3484         {
3485           wctype_t wt = cset->char_classes[i];
3486           if (__iswctype (wc, wt))
3487             {
3488               match_len = char_len;
3489               goto check_node_accept_bytes_match;
3490             }
3491         }
3492
3493 # ifdef _LIBC
3494       if (nrules != 0)
3495         {
3496           unsigned int in_collseq = 0;
3497           const int32_t *table, *indirect;
3498           const unsigned char *weights, *extra;
3499           const char *collseqwc;
3500           int32_t idx;
3501           /* This #include defines a local function!  */
3502 #  include <locale/weight.h>
3503
3504           /* match with collating_symbol?  */
3505           if (cset->ncoll_syms)
3506             extra = (const unsigned char *)
3507               _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3508           for (i = 0; i < cset->ncoll_syms; ++i)
3509             {
3510               const unsigned char *coll_sym = extra + cset->coll_syms[i];
3511               /* Compare the length of input collating element and
3512                  the length of current collating element.  */
3513               if (*coll_sym != elem_len)
3514                 continue;
3515               /* Compare each bytes.  */
3516               for (j = 0; j < *coll_sym; j++)
3517                 if (pin[j] != coll_sym[1 + j])
3518                   break;
3519               if (j == *coll_sym)
3520                 {
3521                   /* Match if every bytes is equal.  */
3522                   match_len = j;
3523                   goto check_node_accept_bytes_match;
3524                 }
3525             }
3526
3527           if (cset->nranges)
3528             {
3529               if (elem_len <= char_len)
3530                 {
3531                   collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3532                   in_collseq = __collseq_table_lookup (collseqwc, wc);
3533                 }
3534               else
3535                 in_collseq = find_collation_sequence_value (pin, elem_len);
3536             }
3537           /* match with range expression?  */
3538           for (i = 0; i < cset->nranges; ++i)
3539             if (cset->range_starts[i] <= in_collseq
3540                 && in_collseq <= cset->range_ends[i])
3541               {
3542                 match_len = elem_len;
3543                 goto check_node_accept_bytes_match;
3544               }
3545
3546           /* match with equivalence_class?  */
3547           if (cset->nequiv_classes)
3548             {
3549               const unsigned char *cp = pin;
3550               table = (const int32_t *)
3551                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3552               weights = (const unsigned char *)
3553                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3554               extra = (const unsigned char *)
3555                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3556               indirect = (const int32_t *)
3557                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3558               idx = findidx (&cp);
3559               if (idx > 0)
3560                 for (i = 0; i < cset->nequiv_classes; ++i)
3561                   {
3562                     int32_t equiv_class_idx = cset->equiv_classes[i];
3563                     size_t weight_len = weights[idx];
3564                     if (weight_len == weights[equiv_class_idx])
3565                       {
3566                         int cnt = 0;
3567                         while (cnt <= weight_len
3568                                && (weights[equiv_class_idx + 1 + cnt]
3569                                    == weights[idx + 1 + cnt]))
3570                           ++cnt;
3571                         if (cnt > weight_len)
3572                           {
3573                             match_len = elem_len;
3574                             goto check_node_accept_bytes_match;
3575                           }
3576                       }
3577                   }
3578             }
3579         }
3580       else
3581 # endif /* _LIBC */
3582         {
3583           /* match with range expression?  */
3584 #if __GNUC__ >= 2
3585           wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
3586 #else
3587           wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
3588           cmp_buf[2] = wc;
3589 #endif
3590           for (i = 0; i < cset->nranges; ++i)
3591             {
3592               cmp_buf[0] = cset->range_starts[i];
3593               cmp_buf[4] = cset->range_ends[i];
3594               if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
3595                   && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
3596                 {
3597                   match_len = char_len;
3598                   goto check_node_accept_bytes_match;
3599                 }
3600             }
3601         }
3602     check_node_accept_bytes_match:
3603       if (!cset->non_match)
3604         return match_len;
3605       else
3606         {
3607           if (match_len > 0)
3608             return 0;
3609           else
3610             return (elem_len > char_len) ? elem_len : char_len;
3611         }
3612     }
3613   return 0;
3614 }
3615
3616 # ifdef _LIBC
3617 static unsigned int
3618 find_collation_sequence_value (mbs, mbs_len)
3619     const unsigned char *mbs;
3620     size_t mbs_len;
3621 {
3622   uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3623   if (nrules == 0)
3624     {
3625       if (mbs_len == 1)
3626         {
3627           /* No valid character.  Match it as a single byte character.  */
3628           const unsigned char *collseq = (const unsigned char *)
3629             _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
3630           return collseq[mbs[0]];
3631         }
3632       return UINT_MAX;
3633     }
3634   else
3635     {
3636       int32_t idx;
3637       const unsigned char *extra = (const unsigned char *)
3638         _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3639
3640       for (idx = 0; ;)
3641         {
3642           int mbs_cnt, found = 0;
3643           int32_t elem_mbs_len;
3644           /* Skip the name of collating element name.  */
3645           idx = idx + extra[idx] + 1;
3646           elem_mbs_len = extra[idx++];
3647           if (mbs_len == elem_mbs_len)
3648             {
3649               for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
3650                 if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
3651                   break;
3652               if (mbs_cnt == elem_mbs_len)
3653                 /* Found the entry.  */
3654                 found = 1;
3655             }
3656           /* Skip the byte sequence of the collating element.  */
3657           idx += elem_mbs_len;
3658           /* Adjust for the alignment.  */
3659           idx = (idx + 3) & ~3;
3660           /* Skip the collation sequence value.  */
3661           idx += sizeof (uint32_t);
3662           /* Skip the wide char sequence of the collating element.  */
3663           idx = idx + sizeof (uint32_t) * (extra[idx] + 1);
3664           /* If we found the entry, return the sequence value.  */
3665           if (found)
3666             return *(uint32_t *) (extra + idx);
3667           /* Skip the collation sequence value.  */
3668           idx += sizeof (uint32_t);
3669         }
3670     }
3671 }
3672 # endif /* _LIBC */
3673 #endif /* RE_ENABLE_I18N */
3674
3675 /* Check whether the node accepts the byte which is IDX-th
3676    byte of the INPUT.  */
3677
3678 static int
3679 check_node_accept (preg, node, mctx, idx)
3680     const regex_t *preg;
3681     const re_token_t *node;
3682     const re_match_context_t *mctx;
3683     int idx;
3684 {
3685   unsigned char ch;
3686   if (node->constraint)
3687     {
3688       /* The node has constraints.  Check whether the current context
3689          satisfies the constraints.  */
3690       unsigned int context = re_string_context_at (mctx->input, idx,
3691                                                    mctx->eflags,
3692                                                    preg->newline_anchor);
3693       if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
3694         return 0;
3695     }
3696   ch = re_string_byte_at (mctx->input, idx);
3697   if (node->type == CHARACTER)
3698     return node->opr.c == ch;
3699   else if (node->type == SIMPLE_BRACKET)
3700     return bitset_contain (node->opr.sbcset, ch);
3701   else if (node->type == OP_PERIOD)
3702     return !((ch == '\n' && !(preg->syntax & RE_DOT_NEWLINE))
3703              || (ch == '\0' && (preg->syntax & RE_DOT_NOT_NULL)));
3704   else
3705     return 0;
3706 }
3707
3708 /* Extend the buffers, if the buffers have run out.  */
3709
3710 static reg_errcode_t
3711 extend_buffers (mctx)
3712      re_match_context_t *mctx;
3713 {
3714   reg_errcode_t ret;
3715   re_string_t *pstr = mctx->input;
3716
3717   /* Double the lengthes of the buffers.  */
3718   ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
3719   if (BE (ret != REG_NOERROR, 0))
3720     return ret;
3721
3722   if (mctx->state_log != NULL)
3723     {
3724       /* And double the length of state_log.  */
3725       re_dfastate_t **new_array;
3726       new_array = re_realloc (mctx->state_log, re_dfastate_t *,
3727                               pstr->bufs_len * 2);
3728       if (BE (new_array == NULL, 0))
3729         return REG_ESPACE;
3730       mctx->state_log = new_array;
3731     }
3732
3733   /* Then reconstruct the buffers.  */
3734   if (pstr->icase)
3735     {
3736 #ifdef RE_ENABLE_I18N
3737       if (MB_CUR_MAX > 1)
3738         build_wcs_upper_buffer (pstr);
3739       else
3740 #endif /* RE_ENABLE_I18N  */
3741         build_upper_buffer (pstr);
3742     }
3743   else
3744     {
3745 #ifdef RE_ENABLE_I18N
3746       if (MB_CUR_MAX > 1)
3747         build_wcs_buffer (pstr);
3748       else
3749 #endif /* RE_ENABLE_I18N  */
3750         {
3751           if (pstr->trans != NULL)
3752             re_string_translate_buffer (pstr);
3753           else
3754             pstr->valid_len = pstr->bufs_len;
3755         }
3756     }
3757   return REG_NOERROR;
3758 }
3759
3760 \f
3761 /* Functions for matching context.  */
3762
3763 /* Initialize MCTX.  */
3764
3765 static reg_errcode_t
3766 match_ctx_init (mctx, eflags, input, n)
3767     re_match_context_t *mctx;
3768     int eflags, n;
3769     re_string_t *input;
3770 {
3771   mctx->eflags = eflags;
3772   mctx->input = input;
3773   mctx->match_last = -1;
3774   if (n > 0)
3775     {
3776       mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
3777       mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
3778       if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))
3779         return REG_ESPACE;
3780     }
3781   else
3782     mctx->bkref_ents = NULL;
3783   mctx->nbkref_ents = 0;
3784   mctx->abkref_ents = n;
3785   mctx->max_mb_elem_len = 1;
3786   mctx->nsub_tops = 0;
3787   mctx->asub_tops = n;
3788   return REG_NOERROR;
3789 }
3790
3791 /* Clean the entries which depend on the current input in MCTX.
3792    This function must be invoked when the matcher changes the start index
3793    of the input, or changes the input string.  */
3794
3795 static void
3796 match_ctx_clean (mctx)
3797     re_match_context_t *mctx;
3798 {
3799   match_ctx_free_subtops (mctx);
3800   mctx->nsub_tops = 0;
3801   mctx->nbkref_ents = 0;
3802 }
3803
3804 /* Free all the memory associated with MCTX.  */
3805
3806 static void
3807 match_ctx_free (mctx)
3808     re_match_context_t *mctx;
3809 {
3810   match_ctx_free_subtops (mctx);
3811   re_free (mctx->sub_tops);
3812   re_free (mctx->bkref_ents);
3813 }
3814
3815 /* Free all the memory associated with MCTX->SUB_TOPS.  */
3816
3817 static void
3818 match_ctx_free_subtops (mctx)
3819      re_match_context_t *mctx;
3820 {
3821   int st_idx;
3822   for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
3823     {
3824       int sl_idx;
3825       re_sub_match_top_t *top = mctx->sub_tops[st_idx];
3826       for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
3827         {
3828           re_sub_match_last_t *last = top->lasts[sl_idx];
3829           re_free (last->path.array);
3830           re_free (last);
3831         }
3832       re_free (top->lasts);
3833       if (top->path)
3834         {
3835           re_free (top->path->array);
3836           re_free (top->path);
3837         }
3838       free (top);
3839     }
3840 }
3841
3842 /* Add a new backreference entry to MCTX.
3843    Note that we assume that caller never call this function with duplicate
3844    entry, and call with STR_IDX which isn't smaller than any existing entry.
3845 */
3846
3847 static reg_errcode_t
3848 match_ctx_add_entry (mctx, node, str_idx, from, to)
3849      re_match_context_t *mctx;
3850      int node, str_idx, from, to;
3851 {
3852   if (mctx->nbkref_ents >= mctx->abkref_ents)
3853     {
3854       struct re_backref_cache_entry* new_entry;
3855       new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
3856                               mctx->abkref_ents * 2);
3857       if (BE (new_entry == NULL, 0))
3858         {
3859           re_free (mctx->bkref_ents);
3860           return REG_ESPACE;
3861         }
3862       mctx->bkref_ents = new_entry;
3863       memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
3864               sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
3865       mctx->abkref_ents *= 2;
3866     }
3867   mctx->bkref_ents[mctx->nbkref_ents].node = node;
3868   mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
3869   mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
3870   mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
3871   mctx->bkref_ents[mctx->nbkref_ents++].flag = 0;
3872   if (mctx->max_mb_elem_len < to - from)
3873     mctx->max_mb_elem_len = to - from;
3874   return REG_NOERROR;
3875 }
3876
3877 /* Search for the first entry which has the same str_idx.
3878    Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX.  */
3879
3880 static int
3881 search_cur_bkref_entry (mctx, str_idx)
3882      re_match_context_t *mctx;
3883      int str_idx;
3884 {
3885   int left, right, mid;
3886   right = mctx->nbkref_ents;
3887   for (left = 0; left < right;)
3888     {
3889       mid = (left + right) / 2;
3890       if (mctx->bkref_ents[mid].str_idx < str_idx)
3891         left = mid + 1;
3892       else
3893         right = mid;
3894     }
3895   return left;
3896 }
3897
3898 static void
3899 match_ctx_clear_flag (mctx)
3900      re_match_context_t *mctx;
3901 {
3902   int i;
3903   for (i = 0; i < mctx->nbkref_ents; ++i)
3904     {
3905       mctx->bkref_ents[i].flag = 0;
3906     }
3907 }
3908
3909 /* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
3910    at STR_IDX.  */
3911
3912 static reg_errcode_t
3913 match_ctx_add_subtop (mctx, node, str_idx)
3914      re_match_context_t *mctx;
3915      int node, str_idx;
3916 {
3917 #ifdef DEBUG
3918   assert (mctx->sub_tops != NULL);
3919   assert (mctx->asub_tops > 0);
3920 #endif
3921   if (mctx->nsub_tops == mctx->asub_tops)
3922     {
3923       re_sub_match_top_t **new_array;
3924       mctx->asub_tops *= 2;
3925       new_array = re_realloc (mctx->sub_tops, re_sub_match_top_t *,
3926                               mctx->asub_tops);
3927       if (BE (new_array == NULL, 0))
3928         return REG_ESPACE;
3929       mctx->sub_tops = new_array;
3930     }
3931   mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
3932   if (mctx->sub_tops[mctx->nsub_tops] == NULL)
3933     return REG_ESPACE;
3934   mctx->sub_tops[mctx->nsub_tops]->node = node;
3935   mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
3936   return REG_NOERROR;
3937 }
3938
3939 /* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
3940    at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP.  */
3941
3942 static re_sub_match_last_t *
3943 match_ctx_add_sublast (subtop, node, str_idx)
3944      re_sub_match_top_t *subtop;
3945      int node, str_idx;
3946 {
3947   re_sub_match_last_t *new_entry;
3948   if (subtop->nlasts == subtop->alasts)
3949     {
3950       re_sub_match_last_t **new_array;
3951       subtop->alasts = 2 * subtop->alasts + 1;
3952       new_array = re_realloc (subtop->lasts, re_sub_match_last_t *,
3953                               subtop->alasts);
3954       if (BE (new_array == NULL, 0))
3955         return NULL;
3956       subtop->lasts = new_array;
3957     }
3958   new_entry = calloc (1, sizeof (re_sub_match_last_t));
3959   if (BE (new_entry == NULL, 0))
3960     return NULL;
3961   subtop->lasts[subtop->nlasts] = new_entry;
3962   new_entry->node = node;
3963   new_entry->str_idx = str_idx;
3964   ++subtop->nlasts;
3965   return new_entry;
3966 }
3967
3968 static void
3969 sift_ctx_init (sctx, sifted_sts, limited_sts, last_node, last_str_idx,
3970                check_subexp)
3971     re_sift_context_t *sctx;
3972     re_dfastate_t **sifted_sts, **limited_sts;
3973     int last_node, last_str_idx, check_subexp;
3974 {
3975   sctx->sifted_states = sifted_sts;
3976   sctx->limited_states = limited_sts;
3977   sctx->last_node = last_node;
3978   sctx->last_str_idx = last_str_idx;
3979   sctx->check_subexp = check_subexp;
3980   sctx->cur_bkref = -1;
3981   sctx->cls_subexp_idx = -1;
3982   re_node_set_init_empty (&sctx->limits);
3983 }