posix/regexec.c

   1 /* Extended regular expression matching and search library.
   2    Copyright (C) 2002, 2003 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4    Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
   5
   6    The GNU C Library is free software; you can redistribute it and/or
   7    modify it under the terms of the GNU Lesser General Public
   8    License as published by the Free Software Foundation; either
   9    version 2.1 of the License, or (at your option) any later version.
  10
  11    The GNU C Library is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14    Lesser General Public License for more details.
  15
  16    You should have received a copy of the GNU Lesser General Public
  17    License along with the GNU C Library; if not, write to the Free
  18    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  19    02111-1307 USA.  */
  20
  21 static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
  22                                      re_string_t *input, int n);
  23 static void match_ctx_clean (re_match_context_t *mctx);
  24 static void match_ctx_free (re_match_context_t *cache);
  25 static void match_ctx_free_subtops (re_match_context_t *mctx);
  26 static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
  27                                           int str_idx, int from, int to);
  28 static int search_cur_bkref_entry (re_match_context_t *mctx, int str_idx);
  29 static void match_ctx_clear_flag (re_match_context_t *mctx);
  30 static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node,
  31                                            int str_idx);
  32 static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
  33                                                    int node, int str_idx);
  34 static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
  35                            re_dfastate_t **limited_sts, int last_node,
  36                            int last_str_idx, int check_subexp);
  37 static reg_errcode_t re_search_internal (const regex_t *preg,
  38                                          const char *string, int length,
  39                                          int start, int range, int stop,
  40                                          size_t nmatch, regmatch_t pmatch[],
  41                                          int eflags);
  42 static int re_search_2_stub (struct re_pattern_buffer *bufp,
  43                              const char *string1, int length1,
  44                              const char *string2, int length2,
  45                              int start, int range, struct re_registers *regs,
  46                              int stop, int ret_len);
  47 static int re_search_stub (struct re_pattern_buffer *bufp,
  48                            const char *string, int length, int start,
  49                            int range, int stop, struct re_registers *regs,
  50                            int ret_len);
  51 static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
  52                               int nregs, int regs_allocated);
  53 static inline re_dfastate_t *acquire_init_state_context
  54   (reg_errcode_t *err, const regex_t *preg, const re_match_context_t *mctx,
  55    int idx) __attribute ((always_inline));
  56 static reg_errcode_t prune_impossible_nodes (const regex_t *preg,
  57                                              re_match_context_t *mctx);
  58 static int check_matching (const regex_t *preg, re_match_context_t *mctx,
  59                            int fl_longest_match);
  60 static int check_halt_node_context (const re_dfa_t *dfa, int node,
  61                                     unsigned int context);
  62 static int check_halt_state_context (const regex_t *preg,
  63                                      const re_dfastate_t *state,
  64                                      const re_match_context_t *mctx, int idx);
  65 static void update_regs (re_dfa_t *dfa, regmatch_t *pmatch, int cur_node,
  66                          int cur_idx, int nmatch);
  67 static int proceed_next_node (const regex_t *preg, int nregs, regmatch_t *regs,
  68                               const re_match_context_t *mctx,
  69                               int *pidx, int node, re_node_set *eps_via_nodes,
  70                               struct re_fail_stack_t *fs);
  71 static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
  72                                       int str_idx, int *dests, int nregs,
  73                                       regmatch_t *regs,
  74                                       re_node_set *eps_via_nodes);
  75 static int pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs,
  76                            regmatch_t *regs, re_node_set *eps_via_nodes);
  77 static reg_errcode_t set_regs (const regex_t *preg,
  78                                const re_match_context_t *mctx,
  79                                size_t nmatch, regmatch_t *pmatch,
  80                                int fl_backtrack);
  81 static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs);
  82
  83 #ifdef RE_ENABLE_I18N
  84 static int sift_states_iter_mb (const regex_t *preg,
  85                                 const re_match_context_t *mctx,
  86                                 re_sift_context_t *sctx,
  87                                 int node_idx, int str_idx, int max_str_idx);
  88 #endif /* RE_ENABLE_I18N */
  89 static reg_errcode_t sift_states_backward (const regex_t *preg,
  90                                            re_match_context_t *mctx,
  91                                            re_sift_context_t *sctx);
  92 static reg_errcode_t update_cur_sifted_state (const regex_t *preg,
  93                                               re_match_context_t *mctx,
  94                                               re_sift_context_t *sctx,
  95                                               int str_idx,
  96                                               re_node_set *dest_nodes);
  97 static reg_errcode_t add_epsilon_src_nodes (re_dfa_t *dfa,
  98                                             re_node_set *dest_nodes,
  99                                             const re_node_set *candidates);
 100 static reg_errcode_t sub_epsilon_src_nodes (re_dfa_t *dfa, int node,
 101                                             re_node_set *dest_nodes,
 102                                             const re_node_set *and_nodes);
 103 static int check_dst_limits (re_dfa_t *dfa, re_node_set *limits,
 104                              re_match_context_t *mctx, int dst_node,
 105                              int dst_idx, int src_node, int src_idx);
 106 static int check_dst_limits_calc_pos (re_dfa_t *dfa, re_match_context_t *mctx,
 107                                       int limit, re_node_set *eclosures,
 108                                       int subexp_idx, int node, int str_idx);
 109 static reg_errcode_t check_subexp_limits (re_dfa_t *dfa,
 110                                           re_node_set *dest_nodes,
 111                                           const re_node_set *candidates,
 112                                           re_node_set *limits,
 113                                           struct re_backref_cache_entry *bkref_ents,
 114                                           int str_idx);
 115 static reg_errcode_t sift_states_bkref (const regex_t *preg,
 116                                         re_match_context_t *mctx,
 117                                         re_sift_context_t *sctx,
 118                                         int str_idx, re_node_set *dest_nodes);
 119 static reg_errcode_t clean_state_log_if_need (re_match_context_t *mctx,
 120                                               int next_state_log_idx);
 121 static reg_errcode_t merge_state_array (re_dfa_t *dfa, re_dfastate_t **dst,
 122                                         re_dfastate_t **src, int num);
 123 static re_dfastate_t *transit_state (reg_errcode_t *err, const regex_t *preg,
 124                                      re_match_context_t *mctx,
 125                                      re_dfastate_t *state);
 126 static reg_errcode_t check_subexp_matching_top (re_dfa_t *dfa,
 127                                                 re_match_context_t *mctx,
 128                                                 re_node_set *cur_nodes,
 129                                                 int str_idx);
 130 #if 0
 131 static re_dfastate_t *transit_state_sb (reg_errcode_t *err, const regex_t *preg,
 132                                         re_dfastate_t *pstate,
 133                                         re_match_context_t *mctx);
 134 #endif
 135 #ifdef RE_ENABLE_I18N
 136 static reg_errcode_t transit_state_mb (const regex_t *preg,
 137                                        re_dfastate_t *pstate,
 138                                        re_match_context_t *mctx);
 139 #endif /* RE_ENABLE_I18N */
 140 static reg_errcode_t transit_state_bkref (const regex_t *preg,
 141                                           const re_node_set *nodes,
 142                                           re_match_context_t *mctx);
 143 static reg_errcode_t get_subexp (const regex_t *preg, re_match_context_t *mctx,
 144                                  int bkref_node, int bkref_str_idx);
 145 static reg_errcode_t get_subexp_sub (const regex_t *preg,
 146                                      re_match_context_t *mctx,
 147                                      const re_sub_match_top_t *sub_top,
 148                                      re_sub_match_last_t *sub_last,
 149                                      int bkref_node, int bkref_str);
 150 static int find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
 151                              int subexp_idx, int type);
 152 static reg_errcode_t check_arrival (const regex_t *preg,
 153                                     re_match_context_t *mctx,
 154                                     state_array_t *path, int top_node,
 155                                     int top_str, int last_node, int last_str,
 156                                     int type);
 157 static reg_errcode_t check_arrival_add_next_nodes (const regex_t *preg,
 158                                                    re_dfa_t *dfa,
 159                                                    re_match_context_t *mctx,
 160                                                    int str_idx,
 161                                                    re_node_set *cur_nodes,
 162                                                    re_node_set *next_nodes);
 163 static reg_errcode_t check_arrival_expand_ecl (re_dfa_t *dfa,
 164                                                re_node_set *cur_nodes,
 165                                                int ex_subexp, int type);
 166 static reg_errcode_t check_arrival_expand_ecl_sub (re_dfa_t *dfa,
 167                                                    re_node_set *dst_nodes,
 168                                                    int target, int ex_subexp,
 169                                                    int type);
 170 static reg_errcode_t expand_bkref_cache (const regex_t *preg,
 171                                          re_match_context_t *mctx,
 172                                          re_node_set *cur_nodes, int cur_str,
 173                                          int last_str, int subexp_num,
 174                                          int type);
 175 static re_dfastate_t **build_trtable (const regex_t *dfa,
 176                                       re_dfastate_t *state);
 177 #ifdef RE_ENABLE_I18N
 178 static int check_node_accept_bytes (const regex_t *preg, int node_idx,
 179                                     const re_string_t *input, int idx);
 180 # ifdef _LIBC
 181 static unsigned int find_collation_sequence_value (const unsigned char *mbs,
 182                                                    size_t name_len);
 183 # endif /* _LIBC */
 184 #endif /* RE_ENABLE_I18N */
 185 static int group_nodes_into_DFAstates (const regex_t *dfa,
 186                                        const re_dfastate_t *state,
 187                                        re_node_set *states_node,
 188                                        bitset *states_ch);
 189 static int check_node_accept (const regex_t *preg, const re_token_t *node,
 190                               const re_match_context_t *mctx, int idx);
 191 static reg_errcode_t extend_buffers (re_match_context_t *mctx);
 192 \f
 193 /* Entry point for POSIX code.  */
 194
 195 /* regexec searches for a given pattern, specified by PREG, in the
 196    string STRING.
 197
 198    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
 199    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
 200    least NMATCH elements, and we set them to the offsets of the
 201    corresponding matched substrings.
 202
 203    EFLAGS specifies `execution flags' which affect matching: if
 204    REG_NOTBOL is set, then ^ does not match at the beginning of the
 205    string; if REG_NOTEOL is set, then $ does not match at the end.
 206
 207    We return 0 if we find a match and REG_NOMATCH if not.  */
 208
 209 int
 210 regexec (preg, string, nmatch, pmatch, eflags)
 211     const regex_t *__restrict preg;
 212     const char *__restrict string;
 213     size_t nmatch;
 214     regmatch_t pmatch[];
 215     int eflags;
 216 {
 217   reg_errcode_t err;
 218   int length = strlen (string);
 219   if (preg->no_sub)
 220     err = re_search_internal (preg, string, length, 0, length, length, 0,
 221                               NULL, eflags);
 222   else
 223     err = re_search_internal (preg, string, length, 0, length, length, nmatch,
 224                               pmatch, eflags);
 225   return err != REG_NOERROR;
 226 }
 227 #ifdef _LIBC
 228 weak_alias (__regexec, regexec)
 229 #endif
 230
 231 /* Entry points for GNU code.  */
 232
 233 /* re_match, re_search, re_match_2, re_search_2
 234
 235    The former two functions operate on STRING with length LENGTH,
 236    while the later two operate on concatenation of STRING1 and STRING2
 237    with lengths LENGTH1 and LENGTH2, respectively.
 238
 239    re_match() matches the compiled pattern in BUFP against the string,
 240    starting at index START.
 241
 242    re_search() first tries matching at index START, then it tries to match
 243    starting from index START + 1, and so on.  The last start position tried
 244    is START + RANGE.  (Thus RANGE = 0 forces re_search to operate the same
 245    way as re_match().)
 246
 247    The parameter STOP of re_{match,search}_2 specifies that no match exceeding
 248    the first STOP characters of the concatenation of the strings should be
 249    concerned.
 250
 251    If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
 252    and all groups is stroed in REGS.  (For the "_2" variants, the offsets are
 253    computed relative to the concatenation, not relative to the individual
 254    strings.)
 255
 256    On success, re_match* functions return the length of the match, re_search*
 257    return the position of the start of the match.  Return value -1 means no
 258    match was found and -2 indicates an internal error.  */
 259
 260 int
 261 re_match (bufp, string, length, start, regs)
 262     struct re_pattern_buffer *bufp;
 263     const char *string;
 264     int length, start;
 265     struct re_registers *regs;
 266 {
 267   return re_search_stub (bufp, string, length, start, 0, length, regs, 1);
 268 }
 269 #ifdef _LIBC
 270 weak_alias (__re_match, re_match)
 271 #endif
 272
 273 int
 274 re_search (bufp, string, length, start, range, regs)
 275     struct re_pattern_buffer *bufp;
 276     const char *string;
 277     int length, start, range;
 278     struct re_registers *regs;
 279 {
 280   return re_search_stub (bufp, string, length, start, range, length, regs, 0);
 281 }
 282 #ifdef _LIBC
 283 weak_alias (__re_search, re_search)
 284 #endif
 285
 286 int
 287 re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)
 288     struct re_pattern_buffer *bufp;
 289     const char *string1, *string2;
 290     int length1, length2, start, stop;
 291     struct re_registers *regs;
 292 {
 293   return re_search_2_stub (bufp, string1, length1, string2, length2,
 294                            start, 0, regs, stop, 1);
 295 }
 296 #ifdef _LIBC
 297 weak_alias (__re_match_2, re_match_2)
 298 #endif
 299
 300 int
 301 re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)
 302     struct re_pattern_buffer *bufp;
 303     const char *string1, *string2;
 304     int length1, length2, start, range, stop;
 305     struct re_registers *regs;
 306 {
 307   return re_search_2_stub (bufp, string1, length1, string2, length2,
 308                            start, range, regs, stop, 0);
 309 }
 310 #ifdef _LIBC
 311 weak_alias (__re_search_2, re_search_2)
 312 #endif
 313
 314 static int
 315 re_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs,
 316                   stop, ret_len)
 317     struct re_pattern_buffer *bufp;
 318     const char *string1, *string2;
 319     int length1, length2, start, range, stop, ret_len;
 320     struct re_registers *regs;
 321 {
 322   const char *str;
 323   int rval;
 324   int len = length1 + length2;
 325   int free_str = 0;
 326
 327   if (BE (length1 < 0 || length2 < 0 || stop < 0, 0))
 328     return -2;
 329
 330   /* Concatenate the strings.  */
 331   if (length2 > 0)
 332     if (length1 > 0)
 333       {
 334         char *s = re_malloc (char, len);
 335
 336         if (BE (s == NULL, 0))
 337           return -2;
 338         memcpy (s, string1, length1);
 339         memcpy (s + length1, string2, length2);
 340         str = s;
 341         free_str = 1;
 342       }
 343     else
 344       str = string2;
 345   else
 346     str = string1;
 347
 348   rval = re_search_stub (bufp, str, len, start, range, stop, regs,
 349                          ret_len);
 350   if (free_str)
 351     re_free ((char *) str);
 352   return rval;
 353 }
 354
 355 /* The parameters have the same meaning as those of re_search.
 356    Additional parameters:
 357    If RET_LEN is nonzero the length of the match is returned (re_match style);
 358    otherwise the position of the match is returned.  */
 359
 360 static int
 361 re_search_stub (bufp, string, length, start, range, stop, regs, ret_len)
 362     struct re_pattern_buffer *bufp;
 363     const char *string;
 364     int length, start, range, stop, ret_len;
 365     struct re_registers *regs;
 366 {
 367   reg_errcode_t result;
 368   regmatch_t *pmatch;
 369   int nregs, rval;
 370   int eflags = 0;
 371
 372   /* Check for out-of-range.  */
 373   if (BE (start < 0 || start > length, 0))
 374     return -1;
 375   if (BE (start + range > length, 0))
 376     range = length - start;
 377   else if (BE (start + range < 0, 0))
 378     range = -start;
 379
 380   eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
 381   eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
 382
 383   /* Compile fastmap if we haven't yet.  */
 384   if (range > 0 && bufp->fastmap != NULL && !bufp->fastmap_accurate)
 385     re_compile_fastmap (bufp);
 386
 387   if (BE (bufp->no_sub, 0))
 388     regs = NULL;
 389
 390   /* We need at least 1 register.  */
 391   if (regs == NULL)
 392     nregs = 1;
 393   else if (BE (bufp->regs_allocated == REGS_FIXED &&
 394                regs->num_regs < bufp->re_nsub + 1, 0))
 395     {
 396       nregs = regs->num_regs;
 397       if (BE (nregs < 1, 0))
 398         {
 399           /* Nothing can be copied to regs.  */
 400           regs = NULL;
 401           nregs = 1;
 402         }
 403     }
 404   else
 405     nregs = bufp->re_nsub + 1;
 406   pmatch = re_malloc (regmatch_t, nregs);
 407   if (BE (pmatch == NULL, 0))
 408     return -2;
 409
 410   result = re_search_internal (bufp, string, length, start, range, stop,
 411                                nregs, pmatch, eflags);
 412
 413   rval = 0;
 414
 415   /* I hope we needn't fill ther regs with -1's when no match was found.  */
 416   if (result != REG_NOERROR)
 417     rval = -1;
 418   else if (regs != NULL)
 419     {
 420       /* If caller wants register contents data back, copy them.  */
 421       bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
 422                                            bufp->regs_allocated);
 423       if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
 424         rval = -2;
 425     }
 426
 427   if (BE (rval == 0, 1))
 428     {
 429       if (ret_len)
 430         {
 431           assert (pmatch[0].rm_so == start);
 432           rval = pmatch[0].rm_eo - start;
 433         }
 434       else
 435         rval = pmatch[0].rm_so;
 436     }
 437   re_free (pmatch);
 438   return rval;
 439 }
 440
 441 static unsigned
 442 re_copy_regs (regs, pmatch, nregs, regs_allocated)
 443     struct re_registers *regs;
 444     regmatch_t *pmatch;
 445     int nregs, regs_allocated;
 446 {
 447   int rval = REGS_REALLOCATE;
 448   int i;
 449   int need_regs = nregs + 1;
 450   /* We need one extra element beyond `num_regs' for the `-1' marker GNU code
 451      uses.  */
 452
 453   /* Have the register data arrays been allocated?  */
 454   if (regs_allocated == REGS_UNALLOCATED)
 455     { /* No.  So allocate them with malloc.  */
 456       regs->start = re_malloc (regoff_t, need_regs);
 457       if (BE (regs->start == NULL, 0))
 458         return REGS_UNALLOCATED;
 459       regs->end = re_malloc (regoff_t, need_regs);
 460       if (BE (regs->end == NULL, 0))
 461         {
 462           re_free (regs->start);
 463           return REGS_UNALLOCATED;
 464         }
 465       regs->num_regs = need_regs;
 466     }
 467   else if (regs_allocated == REGS_REALLOCATE)
 468     { /* Yes.  If we need more elements than were already
 469          allocated, reallocate them.  If we need fewer, just
 470          leave it alone.  */
 471       if (BE (need_regs > regs->num_regs, 0))
 472         {
 473           regs->start = re_realloc (regs->start, regoff_t, need_regs);
 474           if (BE (regs->start == NULL, 0))
 475             {
 476               if (regs->end != NULL)
 477                 re_free (regs->end);
 478               return REGS_UNALLOCATED;
 479             }
 480           regs->end = re_realloc (regs->end, regoff_t, need_regs);
 481           if (BE (regs->end == NULL, 0))
 482             {
 483               re_free (regs->start);
 484               return REGS_UNALLOCATED;
 485             }
 486           regs->num_regs = need_regs;
 487         }
 488     }
 489   else
 490     {
 491       assert (regs_allocated == REGS_FIXED);
 492       /* This function may not be called with REGS_FIXED and nregs too big.  */
 493       assert (regs->num_regs >= nregs);
 494       rval = REGS_FIXED;
 495     }
 496
 497   /* Copy the regs.  */
 498   for (i = 0; i < nregs; ++i)
 499     {
 500       regs->start[i] = pmatch[i].rm_so;
 501       regs->end[i] = pmatch[i].rm_eo;
 502     }
 503   for ( ; i < regs->num_regs; ++i)
 504     regs->start[i] = regs->end[i] = -1;
 505
 506   return rval;
 507 }
 508
 509 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
 510    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
 511    this memory for recording register information.  STARTS and ENDS
 512    must be allocated using the malloc library routine, and must each
 513    be at least NUM_REGS * sizeof (regoff_t) bytes long.
 514
 515    If NUM_REGS == 0, then subsequent matches should allocate their own
 516    register data.
 517
 518    Unless this function is called, the first search or match using
 519    PATTERN_BUFFER will allocate its own register data, without
 520    freeing the old data.  */
 521
 522 void
 523 re_set_registers (bufp, regs, num_regs, starts, ends)
 524     struct re_pattern_buffer *bufp;
 525     struct re_registers *regs;
 526     unsigned num_regs;
 527     regoff_t *starts, *ends;
 528 {
 529   if (num_regs)
 530     {
 531       bufp->regs_allocated = REGS_REALLOCATE;
 532       regs->num_regs = num_regs;
 533       regs->start = starts;
 534       regs->end = ends;
 535     }
 536   else
 537     {
 538       bufp->regs_allocated = REGS_UNALLOCATED;
 539       regs->num_regs = 0;
 540       regs->start = regs->end = (regoff_t *) 0;
 541     }
 542 }
 543 #ifdef _LIBC
 544 weak_alias (__re_set_registers, re_set_registers)
 545 #endif
 546 \f
 547 /* Entry points compatible with 4.2 BSD regex library.  We don't define
 548    them unless specifically requested.  */
 549
 550 #if defined _REGEX_RE_COMP || defined _LIBC
 551 int
 552 # ifdef _LIBC
 553 weak_function
 554 # endif
 555 re_exec (s)
 556      const char *s;
 557 {
 558   return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
 559 }
 560 #endif /* _REGEX_RE_COMP */
 561 \f
 562 static re_node_set empty_set;
 563
 564 /* Internal entry point.  */
 565
 566 /* Searches for a compiled pattern PREG in the string STRING, whose
 567    length is LENGTH.  NMATCH, PMATCH, and EFLAGS have the same
 568    mingings with regexec.  START, and RANGE have the same meanings
 569    with re_search.
 570    Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
 571    otherwise return the error code.
 572    Note: We assume front end functions already check ranges.
 573    (START + RANGE >= 0 && START + RANGE <= LENGTH)  */
 574
 575 static reg_errcode_t
 576 re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
 577                     eflags)
 578     const regex_t *preg;
 579     const char *string;
 580     int length, start, range, stop, eflags;
 581     size_t nmatch;
 582     regmatch_t pmatch[];
 583 {
 584   reg_errcode_t err;
 585   re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
 586   re_string_t input;
 587   int left_lim, right_lim, incr;
 588   int fl_longest_match, match_first, match_last = -1;
 589   int fast_translate, sb;
 590   re_match_context_t mctx;
 591   char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate
 592                     && range && !preg->can_be_null) ? preg->fastmap : NULL);
 593
 594   /* Check if the DFA haven't been compiled.  */
 595   if (BE (preg->used == 0 || dfa->init_state == NULL
 596           || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
 597           || dfa->init_state_begbuf == NULL, 0))
 598     return REG_NOMATCH;
 599
 600 #ifdef DEBUG
 601   /* We assume front-end functions already check them.  */
 602   assert (start + range >= 0 && start + range <= length);
 603 #endif
 604
 605   /* If initial states with non-begbuf contexts have no elements,
 606      the regex must be anchored.  If preg->newline_anchor is set,
 607      we'll never use init_state_nl, so do not check it.  */
 608   if (dfa->init_state->nodes.nelem == 0
 609       && dfa->init_state_word->nodes.nelem == 0
 610       && (dfa->init_state_nl->nodes.nelem == 0
 611           || !preg->newline_anchor))
 612     {
 613       if (start != 0 && start + range != 0)
 614         return REG_NOMATCH;
 615       start = range = 0;
 616     }
 617
 618   re_node_set_init_empty (&empty_set);
 619   memset (&mctx, '\0', sizeof (re_match_context_t));
 620
 621   /* We must check the longest matching, if nmatch > 0.  */
 622   fl_longest_match = (nmatch != 0 || dfa->nbackref);
 623
 624   err = re_string_allocate (&input, string, length, dfa->nodes_len + 1,
 625                             preg->translate, preg->syntax & RE_ICASE, dfa);
 626   if (BE (err != REG_NOERROR, 0))
 627     goto free_return;
 628   input.stop = stop;
 629   input.raw_stop = stop;
 630
 631   err = match_ctx_init (&mctx, eflags, &input, dfa->nbackref * 2);
 632   if (BE (err != REG_NOERROR, 0))
 633     goto free_return;
 634
 635   /* We will log all the DFA states through which the dfa pass,
 636      if nmatch > 1, or this dfa has "multibyte node", which is a
 637      back-reference or a node which can accept multibyte character or
 638      multi character collating element.  */
 639   if (nmatch > 1 || dfa->has_mb_node)
 640     {
 641       mctx.state_log = re_malloc (re_dfastate_t *, input.bufs_len + 1);
 642       if (BE (mctx.state_log == NULL, 0))
 643         {
 644           err = REG_ESPACE;
 645           goto free_return;
 646         }
 647     }
 648   else
 649     mctx.state_log = NULL;
 650
 651   match_first = start;
 652   input.tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
 653                        : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
 654
 655   /* Check incrementally whether of not the input string match.  */
 656   incr = (range < 0) ? -1 : 1;
 657   left_lim = (range < 0) ? start + range : start;
 658   right_lim = (range < 0) ? start : start + range;
 659   sb = dfa->mb_cur_max == 1;
 660   fast_translate = sb || !(preg->syntax & RE_ICASE || preg->translate);
 661
 662   for (;;)
 663     {
 664       /* At first get the current byte from input string.  */
 665       if (fastmap)
 666         {
 667           if (BE (fast_translate, 1))
 668             {
 669               unsigned RE_TRANSLATE_TYPE t
 670                 = (unsigned RE_TRANSLATE_TYPE) preg->translate;
 671               if (BE (range >= 0, 1))
 672                 {
 673                   if (BE (t != NULL, 0))
 674                     {
 675                       while (BE (match_first < right_lim, 1)
 676                              && !fastmap[t[(unsigned char) string[match_first]]])
 677                         ++match_first;
 678                     }
 679                   else
 680                     {
 681                       while (BE (match_first < right_lim, 1)
 682                              && !fastmap[(unsigned char) string[match_first]])
 683                         ++match_first;
 684                     }
 685                   if (BE (match_first == right_lim, 0))
 686                     {
 687                       int ch = match_first >= length
 688                                ? 0 : (unsigned char) string[match_first];
 689                       if (!fastmap[t ? t[ch] : ch])
 690                         break;
 691                     }
 692                 }
 693               else
 694                 {
 695                   while (match_first >= left_lim)
 696                     {
 697                       int ch = match_first >= length
 698                                ? 0 : (unsigned char) string[match_first];
 699                       if (fastmap[t ? t[ch] : ch])
 700                         break;
 701                       --match_first;
 702                     }
 703                   if (match_first < left_lim)
 704                     break;
 705                 }
 706             }
 707           else
 708             {
 709               int ch;
 710
 711               do
 712                 {
 713                   /* In this case, we can't determine easily the current byte,
 714                      since it might be a component byte of a multibyte
 715                      character.  Then we use the constructed buffer
 716                      instead.  */
 717                   /* If MATCH_FIRST is out of the valid range, reconstruct the
 718                      buffers.  */
 719                   if (input.raw_mbs_idx + input.valid_raw_len <= match_first
 720                       || match_first < input.raw_mbs_idx)
 721                     {
 722                       err = re_string_reconstruct (&input, match_first, eflags,
 723                                                    preg->newline_anchor);
 724                       if (BE (err != REG_NOERROR, 0))
 725                         goto free_return;
 726                     }
 727                   /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
 728                      Note that MATCH_FIRST must not be smaller than 0.  */
 729                   ch = ((match_first >= length) ? 0
 730                        : re_string_byte_at (&input,
 731                                             match_first - input.raw_mbs_idx));
 732                   if (fastmap[ch])
 733                     break;
 734                   match_first += incr;
 735                 }
 736               while (match_first >= left_lim && match_first <= right_lim);
 737               if (! fastmap[ch])
 738                 break;
 739             }
 740         }
 741
 742       /* Reconstruct the buffers so that the matcher can assume that
 743          the matching starts from the beginning of the buffer.  */
 744       err = re_string_reconstruct (&input, match_first, eflags,
 745                                    preg->newline_anchor);
 746       if (BE (err != REG_NOERROR, 0))
 747         goto free_return;
 748 #ifdef RE_ENABLE_I18N
 749      /* Eliminate it when it is a component of a multibyte character
 750          and isn't the head of a multibyte character.  */
 751       if (sb || re_string_first_byte (&input, 0))
 752 #endif
 753         {
 754           /* It seems to be appropriate one, then use the matcher.  */
 755           /* We assume that the matching starts from 0.  */
 756           mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
 757           match_last = check_matching (preg, &mctx, fl_longest_match);
 758           if (match_last != -1)
 759             {
 760               if (BE (match_last == -2, 0))
 761                 {
 762                   err = REG_ESPACE;
 763                   goto free_return;
 764                 }
 765               else
 766                 {
 767                   mctx.match_last = match_last;
 768                   if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
 769                     {
 770                       re_dfastate_t *pstate = mctx.state_log[match_last];
 771                       mctx.last_node = check_halt_state_context (preg, pstate,
 772                                                                  &mctx, match_last);
 773                     }
 774                   if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
 775                       || dfa->nbackref)
 776                     {
 777                       err = prune_impossible_nodes (preg, &mctx);
 778                       if (err == REG_NOERROR)
 779                         break;
 780                       if (BE (err != REG_NOMATCH, 0))
 781                         goto free_return;
 782                       match_last = -1;
 783                     }
 784                   else
 785                     break; /* We found a match.  */
 786                 }
 787             }
 788           match_ctx_clean (&mctx);
 789         }
 790       /* Update counter.  */
 791       match_first += incr;
 792       if (match_first < left_lim || right_lim < match_first)
 793         break;
 794     }
 795
 796   /* Set pmatch[] if we need.  */
 797   if (match_last != -1 && nmatch > 0)
 798     {
 799       int reg_idx;
 800
 801       /* Initialize registers.  */
 802       for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
 803         pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
 804
 805       /* Set the points where matching start/end.  */
 806       pmatch[0].rm_so = 0;
 807       pmatch[0].rm_eo = mctx.match_last;
 808
 809       if (!preg->no_sub && nmatch > 1)
 810         {
 811           err = set_regs (preg, &mctx, nmatch, pmatch,
 812                           dfa->has_plural_match && dfa->nbackref > 0);
 813           if (BE (err != REG_NOERROR, 0))
 814             goto free_return;
 815         }
 816
 817       /* At last, add the offset to the each registers, since we slided
 818          the buffers so that we could assume that the matching starts
 819          from 0.  */
 820       for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
 821         if (pmatch[reg_idx].rm_so != -1)
 822           {
 823 #ifdef RE_ENABLE_I18N
 824             if (BE (input.offsets_needed != 0, 0))
 825               {
 826                 if (pmatch[reg_idx].rm_so == input.valid_len)
 827                   pmatch[reg_idx].rm_so += input.valid_raw_len - input.valid_len;
 828                 else
 829                   pmatch[reg_idx].rm_so = input.offsets[pmatch[reg_idx].rm_so];
 830                 if (pmatch[reg_idx].rm_eo == input.valid_len)
 831                   pmatch[reg_idx].rm_eo += input.valid_raw_len - input.valid_len;
 832                 else
 833                   pmatch[reg_idx].rm_eo = input.offsets[pmatch[reg_idx].rm_eo];
 834               }
 835 #else
 836             assert (input.offsets_needed == 0);
 837 #endif
 838             pmatch[reg_idx].rm_so += match_first;
 839             pmatch[reg_idx].rm_eo += match_first;
 840           }
 841     }
 842   err = (match_last == -1) ? REG_NOMATCH : REG_NOERROR;
 843  free_return:
 844   re_free (mctx.state_log);
 845   if (dfa->nbackref)
 846     match_ctx_free (&mctx);
 847   re_string_destruct (&input);
 848   return err;
 849 }
 850
 851 static reg_errcode_t
 852 prune_impossible_nodes (preg, mctx)
 853      const regex_t *preg;
 854      re_match_context_t *mctx;
 855 {
 856   int halt_node, match_last;
 857   reg_errcode_t ret;
 858   re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
 859   re_dfastate_t **sifted_states;
 860   re_dfastate_t **lim_states = NULL;
 861   re_sift_context_t sctx;
 862 #ifdef DEBUG
 863   assert (mctx->state_log != NULL);
 864 #endif
 865   match_last = mctx->match_last;
 866   halt_node = mctx->last_node;
 867   sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
 868   if (BE (sifted_states == NULL, 0))
 869     {
 870       ret = REG_ESPACE;
 871       goto free_return;
 872     }
 873   if (dfa->nbackref)
 874     {
 875       lim_states = re_malloc (re_dfastate_t *, match_last + 1);
 876       if (BE (lim_states == NULL, 0))
 877         {
 878           ret = REG_ESPACE;
 879           goto free_return;
 880         }
 881       while (1)
 882         {
 883           memset (lim_states, '\0',
 884                   sizeof (re_dfastate_t *) * (match_last + 1));
 885           match_ctx_clear_flag (mctx);
 886           sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
 887                          match_last, 0);
 888           ret = sift_states_backward (preg, mctx, &sctx);
 889           re_node_set_free (&sctx.limits);
 890           if (BE (ret != REG_NOERROR, 0))
 891               goto free_return;
 892           if (sifted_states[0] != NULL || lim_states[0] != NULL)
 893             break;
 894           do
 895             {
 896               --match_last;
 897               if (match_last < 0)
 898                 {
 899                   ret = REG_NOMATCH;
 900                   goto free_return;
 901                 }
 902             } while (mctx->state_log[match_last] == NULL
 903                      || !mctx->state_log[match_last]->halt);
 904           halt_node = check_halt_state_context (preg,
 905                                                 mctx->state_log[match_last],
 906                                                 mctx, match_last);
 907         }
 908       ret = merge_state_array (dfa, sifted_states, lim_states,
 909                                match_last + 1);
 910       re_free (lim_states);
 911       lim_states = NULL;
 912       if (BE (ret != REG_NOERROR, 0))
 913         goto free_return;
 914     }
 915   else
 916     {
 917       sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
 918                      match_last, 0);
 919       ret = sift_states_backward (preg, mctx, &sctx);
 920       re_node_set_free (&sctx.limits);
 921       if (BE (ret != REG_NOERROR, 0))
 922         goto free_return;
 923     }
 924   re_free (mctx->state_log);
 925   mctx->state_log = sifted_states;
 926   sifted_states = NULL;
 927   mctx->last_node = halt_node;
 928   mctx->match_last = match_last;
 929   ret = REG_NOERROR;
 930  free_return:
 931   re_free (sifted_states);
 932   re_free (lim_states);
 933   return ret;
 934 }
 935
 936 /* Acquire an initial state and return it.
 937    We must select appropriate initial state depending on the context,
 938    since initial states may have constraints like "\<", "^", etc..  */
 939
 940 static inline re_dfastate_t *
 941 acquire_init_state_context (err, preg, mctx, idx)
 942      reg_errcode_t *err;
 943      const regex_t *preg;
 944      const re_match_context_t *mctx;
 945      int idx;
 946 {
 947   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
 948
 949   *err = REG_NOERROR;
 950   if (dfa->init_state->has_constraint)
 951     {
 952       unsigned int context;
 953       context = re_string_context_at (mctx->input, idx - 1, mctx->eflags,
 954                                       preg->newline_anchor);
 955       if (IS_WORD_CONTEXT (context))
 956         return dfa->init_state_word;
 957       else if (IS_ORDINARY_CONTEXT (context))
 958         return dfa->init_state;
 959       else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
 960         return dfa->init_state_begbuf;
 961       else if (IS_NEWLINE_CONTEXT (context))
 962         return dfa->init_state_nl;
 963       else if (IS_BEGBUF_CONTEXT (context))
 964         {
 965           /* It is relatively rare case, then calculate on demand.  */
 966           return  re_acquire_state_context (err, dfa,
 967                                             dfa->init_state->entrance_nodes,
 968                                             context);
 969         }
 970       else
 971         /* Must not happen?  */
 972         return dfa->init_state;
 973     }
 974   else
 975     return dfa->init_state;
 976 }
 977
 978 /* Check whether the regular expression match input string INPUT or not,
 979    and return the index where the matching end, return -1 if not match,
 980    or return -2 in case of an error.
 981    FL_LONGEST_MATCH means we want the POSIX longest matching.
 982    Note that the matcher assume that the maching starts from the current
 983    index of the buffer.  */
 984
 985 static int
 986 check_matching (preg, mctx, fl_longest_match)
 987     const regex_t *preg;
 988     re_match_context_t *mctx;
 989     int fl_longest_match;
 990 {
 991   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
 992   reg_errcode_t err;
 993   int match = 0;
 994   int match_last = -1;
 995   int cur_str_idx = re_string_cur_idx (mctx->input);
 996   re_dfastate_t *cur_state;
 997
 998   cur_state = acquire_init_state_context (&err, preg, mctx, cur_str_idx);
 999   /* An initial state must not be NULL(invalid state).  */
1000   if (BE (cur_state == NULL, 0))
1001     return -2;
1002   if (mctx->state_log != NULL)
1003     mctx->state_log[cur_str_idx] = cur_state;
1004
1005   /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
1006      later.  E.g. Processing back references.  */
1007   if (BE (dfa->nbackref, 0))
1008     {
1009       err = check_subexp_matching_top (dfa, mctx, &cur_state->nodes, 0);
1010       if (BE (err != REG_NOERROR, 0))
1011         return err;
1012
1013       if (cur_state->has_backref)
1014         {
1015           err = transit_state_bkref (preg, &cur_state->nodes, mctx);
1016           if (BE (err != REG_NOERROR, 0))
1017             return err;
1018         }
1019     }
1020
1021   /* If the RE accepts NULL string.  */
1022   if (BE (cur_state->halt, 0))
1023     {
1024       if (!cur_state->has_constraint
1025           || check_halt_state_context (preg, cur_state, mctx, cur_str_idx))
1026         {
1027           if (!fl_longest_match)
1028             return cur_str_idx;
1029           else
1030             {
1031               match_last = cur_str_idx;
1032               match = 1;
1033             }
1034         }
1035     }
1036
1037   while (!re_string_eoi (mctx->input))
1038     {
1039       cur_state = transit_state (&err, preg, mctx, cur_state);
1040       if (cur_state == NULL) /* Reached at the invalid state or an error.  */
1041         {
1042           cur_str_idx = re_string_cur_idx (mctx->input);
1043           if (BE (err != REG_NOERROR, 0))
1044             return -2;
1045           if (!fl_longest_match && match)
1046             break;
1047           else
1048             {
1049               if (mctx->state_log == NULL)
1050                 break;
1051               else
1052                 {
1053                   int max = mctx->state_log_top;
1054                   for (; cur_str_idx <= max; ++cur_str_idx)
1055                     if (mctx->state_log[cur_str_idx] != NULL)
1056                       break;
1057                   if (cur_str_idx > max)
1058                     break;
1059                 }
1060             }
1061         }
1062
1063       if (cur_state != NULL && cur_state->halt)
1064         {
1065           /* Reached at a halt state.
1066              Check the halt state can satisfy the current context.  */
1067           if (!cur_state->has_constraint
1068               || check_halt_state_context (preg, cur_state, mctx,
1069                                            re_string_cur_idx (mctx->input)))
1070             {
1071               /* We found an appropriate halt state.  */
1072               match_last = re_string_cur_idx (mctx->input);
1073               match = 1;
1074               if (!fl_longest_match)
1075                 break;
1076             }
1077         }
1078    }
1079   return match_last;
1080 }
1081
1082 /* Check NODE match the current context.  */
1083
1084 static int check_halt_node_context (dfa, node, context)
1085     const re_dfa_t *dfa;
1086     int node;
1087     unsigned int context;
1088 {
1089   re_token_type_t type = dfa->nodes[node].type;
1090   unsigned int constraint = dfa->nodes[node].constraint;
1091   if (type != END_OF_RE)
1092     return 0;
1093   if (!constraint)
1094     return 1;
1095   if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
1096     return 0;
1097   return 1;
1098 }
1099
1100 /* Check the halt state STATE match the current context.
1101    Return 0 if not match, if the node, STATE has, is a halt node and
1102    match the context, return the node.  */
1103
1104 static int
1105 check_halt_state_context (preg, state, mctx, idx)
1106     const regex_t *preg;
1107     const re_dfastate_t *state;
1108     const re_match_context_t *mctx;
1109     int idx;
1110 {
1111   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
1112   int i;
1113   unsigned int context;
1114 #ifdef DEBUG
1115   assert (state->halt);
1116 #endif
1117   context = re_string_context_at (mctx->input, idx, mctx->eflags,
1118                                   preg->newline_anchor);
1119   for (i = 0; i < state->nodes.nelem; ++i)
1120     if (check_halt_node_context (dfa, state->nodes.elems[i], context))
1121       return state->nodes.elems[i];
1122   return 0;
1123 }
1124
1125 /* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
1126    corresponding to the DFA).
1127    Return the destination node, and update EPS_VIA_NODES, return -1 in case
1128    of errors.  */
1129
1130 static int
1131 proceed_next_node (preg, nregs, regs, mctx, pidx, node, eps_via_nodes, fs)
1132     const regex_t *preg;
1133     regmatch_t *regs;
1134     const re_match_context_t *mctx;
1135     int nregs, *pidx, node;
1136     re_node_set *eps_via_nodes;
1137     struct re_fail_stack_t *fs;
1138 {
1139   re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
1140   int i, err, dest_node;
1141   dest_node = -1;
1142   if (IS_EPSILON_NODE (dfa->nodes[node].type))
1143     {
1144       re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
1145       int ndest, dest_nodes[2];
1146       err = re_node_set_insert (eps_via_nodes, node);
1147       if (BE (err < 0, 0))
1148         return -1;
1149       /* Pick up valid destinations.  */
1150       for (ndest = 0, i = 0; i < dfa->edests[node].nelem; ++i)
1151         {
1152           int candidate = dfa->edests[node].elems[i];
1153           if (!re_node_set_contains (cur_nodes, candidate))
1154             continue;
1155           dest_nodes[0] = (ndest == 0) ? candidate : dest_nodes[0];
1156           dest_nodes[1] = (ndest == 1) ? candidate : dest_nodes[1];
1157           ++ndest;
1158         }
1159       if (ndest <= 1)
1160         return ndest == 0 ? -1 : (ndest == 1 ? dest_nodes[0] : 0);
1161       /* In order to avoid infinite loop like "(a*)*".  */
1162       if (re_node_set_contains (eps_via_nodes, dest_nodes[0]))
1163         return dest_nodes[1];
1164       if (fs != NULL)
1165         push_fail_stack (fs, *pidx, dest_nodes, nregs, regs, eps_via_nodes);
1166       return dest_nodes[0];
1167     }
1168   else
1169     {
1170       int naccepted = 0;
1171       re_token_type_t type = dfa->nodes[node].type;
1172
1173 #ifdef RE_ENABLE_I18N
1174       if (ACCEPT_MB_NODE (type))
1175         naccepted = check_node_accept_bytes (preg, node, mctx->input, *pidx);
1176       else
1177 #endif /* RE_ENABLE_I18N */
1178       if (type == OP_BACK_REF)
1179         {
1180           int subexp_idx = dfa->nodes[node].opr.idx;
1181           naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
1182           if (fs != NULL)
1183             {
1184               if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
1185                 return -1;
1186               else if (naccepted)
1187                 {
1188                   char *buf = (char *) re_string_get_buffer (mctx->input);
1189                   if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1190                               naccepted) != 0)
1191                     return -1;
1192                 }
1193             }
1194
1195           if (naccepted == 0)
1196             {
1197               err = re_node_set_insert (eps_via_nodes, node);
1198               if (BE (err < 0, 0))
1199                 return -2;
1200               dest_node = dfa->edests[node].elems[0];
1201               if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1202                                         dest_node))
1203                 return dest_node;
1204             }
1205         }
1206
1207       if (naccepted != 0
1208           || check_node_accept (preg, dfa->nodes + node, mctx, *pidx))
1209         {
1210           dest_node = dfa->nexts[node];
1211           *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
1212           if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
1213                      || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1214                                                dest_node)))
1215             return -1;
1216           re_node_set_empty (eps_via_nodes);
1217           return dest_node;
1218         }
1219     }
1220   return -1;
1221 }
1222
1223 static reg_errcode_t
1224 push_fail_stack (fs, str_idx, dests, nregs, regs, eps_via_nodes)
1225      struct re_fail_stack_t *fs;
1226      int str_idx, *dests, nregs;
1227      regmatch_t *regs;
1228      re_node_set *eps_via_nodes;
1229 {
1230   reg_errcode_t err;
1231   int num = fs->num++;
1232   if (fs->num == fs->alloc)
1233     {
1234       struct re_fail_stack_ent_t *new_array;
1235       fs->alloc *= 2;
1236       new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)
1237                                        * fs->alloc));
1238       if (new_array == NULL)
1239         return REG_ESPACE;
1240       fs->stack = new_array;
1241     }
1242   fs->stack[num].idx = str_idx;
1243   fs->stack[num].node = dests[1];
1244   fs->stack[num].regs = re_malloc (regmatch_t, nregs);
1245   memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
1246   err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
1247   return err;
1248 }
1249
1250 static int
1251 pop_fail_stack (fs, pidx, nregs, regs, eps_via_nodes)
1252      struct re_fail_stack_t *fs;
1253      int *pidx, nregs;
1254      regmatch_t *regs;
1255      re_node_set *eps_via_nodes;
1256 {
1257   int num = --fs->num;
1258   assert (num >= 0);
1259  *pidx = fs->stack[num].idx;
1260   memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
1261   re_node_set_free (eps_via_nodes);
1262   re_free (fs->stack[num].regs);
1263   *eps_via_nodes = fs->stack[num].eps_via_nodes;
1264   return fs->stack[num].node;
1265 }
1266
1267 /* Set the positions where the subexpressions are starts/ends to registers
1268    PMATCH.
1269    Note: We assume that pmatch[0] is already set, and
1270    pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch.  */
1271
1272 static reg_errcode_t
1273 set_regs (preg, mctx, nmatch, pmatch, fl_backtrack)
1274      const regex_t *preg;
1275      const re_match_context_t *mctx;
1276      size_t nmatch;
1277      regmatch_t *pmatch;
1278      int fl_backtrack;
1279 {
1280   re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
1281   int idx, cur_node, real_nmatch;
1282   re_node_set eps_via_nodes;
1283   struct re_fail_stack_t *fs;
1284   struct re_fail_stack_t fs_body = {0, 2, NULL};
1285 #ifdef DEBUG
1286   assert (nmatch > 1);
1287   assert (mctx->state_log != NULL);
1288 #endif
1289   if (fl_backtrack)
1290     {
1291       fs = &fs_body;
1292       fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
1293     }
1294   else
1295     fs = NULL;
1296   cur_node = dfa->init_node;
1297   real_nmatch = (nmatch <= preg->re_nsub) ? nmatch : preg->re_nsub + 1;
1298   re_node_set_init_empty (&eps_via_nodes);
1299   for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
1300     {
1301       update_regs (dfa, pmatch, cur_node, idx, real_nmatch);
1302       if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
1303         {
1304           int reg_idx;
1305           if (fs)
1306             {
1307               for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
1308                 if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
1309                   break;
1310               if (reg_idx == nmatch)
1311                 {
1312                   re_node_set_free (&eps_via_nodes);
1313                   return free_fail_stack_return (fs);
1314                 }
1315               cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1316                                          &eps_via_nodes);
1317             }
1318           else
1319             {
1320               re_node_set_free (&eps_via_nodes);
1321               return REG_NOERROR;
1322             }
1323         }
1324
1325       /* Proceed to next node.  */
1326       cur_node = proceed_next_node (preg, nmatch, pmatch, mctx, &idx, cur_node,
1327                                     &eps_via_nodes, fs);
1328
1329       if (BE (cur_node < 0, 0))
1330         {
1331           if (cur_node == -2)
1332             return REG_ESPACE;
1333           if (fs)
1334             cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1335                                        &eps_via_nodes);
1336           else
1337             {
1338               re_node_set_free (&eps_via_nodes);
1339               return REG_NOMATCH;
1340             }
1341         }
1342     }
1343   re_node_set_free (&eps_via_nodes);
1344   return free_fail_stack_return (fs);
1345 }
1346
1347 static reg_errcode_t
1348 free_fail_stack_return (fs)
1349      struct re_fail_stack_t *fs;
1350 {
1351   if (fs)
1352     {
1353       int fs_idx;
1354       for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
1355         {
1356           re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
1357           re_free (fs->stack[fs_idx].regs);
1358         }
1359       re_free (fs->stack);
1360     }
1361   return REG_NOERROR;
1362 }
1363
1364 static void
1365 update_regs (dfa, pmatch, cur_node, cur_idx, nmatch)
1366      re_dfa_t *dfa;
1367      regmatch_t *pmatch;
1368      int cur_node, cur_idx, nmatch;
1369 {
1370   int type = dfa->nodes[cur_node].type;
1371   int reg_num;
1372   if (type != OP_OPEN_SUBEXP && type != OP_CLOSE_SUBEXP)
1373     return;
1374   reg_num = dfa->nodes[cur_node].opr.idx + 1;
1375   if (reg_num >= nmatch)
1376     return;
1377   if (type == OP_OPEN_SUBEXP)
1378     {
1379       /* We are at the first node of this sub expression.  */
1380       pmatch[reg_num].rm_so = cur_idx;
1381       pmatch[reg_num].rm_eo = -1;
1382     }
1383   else if (type == OP_CLOSE_SUBEXP)
1384     /* We are at the first node of this sub expression.  */
1385     pmatch[reg_num].rm_eo = cur_idx;
1386 }
1387
1388 #define NUMBER_OF_STATE 1
1389
1390 /* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
1391    and sift the nodes in each states according to the following rules.
1392    Updated state_log will be wrote to STATE_LOG.
1393
1394    Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
1395      1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1396         If `a' isn't the LAST_NODE and `a' can't epsilon transit to
1397         the LAST_NODE, we throw away the node `a'.
1398      2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
1399         string `s' and transit to `b':
1400         i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1401            away the node `a'.
1402         ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1403             thrown away, we throw away the node `a'.
1404      3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
1405         i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1406            node `a'.
1407         ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
1408             we throw away the node `a'.  */
1409
1410 #define STATE_NODE_CONTAINS(state,node) \
1411   ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1412
1413 static reg_errcode_t
1414 sift_states_backward (preg, mctx, sctx)
1415      const regex_t *preg;
1416      re_match_context_t *mctx;
1417      re_sift_context_t *sctx;
1418 {
1419   reg_errcode_t err;
1420   re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
1421   int null_cnt = 0;
1422   int str_idx = sctx->last_str_idx;
1423   re_node_set cur_dest;
1424   re_node_set *cur_src; /* Points the state_log[str_idx]->nodes  */
1425
1426 #ifdef DEBUG
1427   assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
1428 #endif
1429   cur_src = &mctx->state_log[str_idx]->nodes;
1430
1431   /* Build sifted state_log[str_idx].  It has the nodes which can epsilon
1432      transit to the last_node and the last_node itself.  */
1433   err = re_node_set_init_1 (&cur_dest, sctx->last_node);
1434   if (BE (err != REG_NOERROR, 0))
1435     return err;
1436   err = update_cur_sifted_state (preg, mctx, sctx, str_idx, &cur_dest);
1437   if (BE (err != REG_NOERROR, 0))
1438     goto free_return;
1439
1440   /* Then check each states in the state_log.  */
1441   while (str_idx > 0)
1442     {
1443       int i, ret;
1444       /* Update counters.  */
1445       null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
1446       if (null_cnt > mctx->max_mb_elem_len)
1447         {
1448           memset (sctx->sifted_states, '\0',
1449                   sizeof (re_dfastate_t *) * str_idx);
1450           re_node_set_free (&cur_dest);
1451           return REG_NOERROR;
1452         }
1453       re_node_set_empty (&cur_dest);
1454       --str_idx;
1455       cur_src = ((mctx->state_log[str_idx] == NULL) ? &empty_set
1456                  : &mctx->state_log[str_idx]->nodes);
1457
1458       /* Then build the next sifted state.
1459          We build the next sifted state on `cur_dest', and update
1460          `sifted_states[str_idx]' with `cur_dest'.
1461          Note:
1462          `cur_dest' is the sifted state from `state_log[str_idx + 1]'.
1463          `cur_src' points the node_set of the old `state_log[str_idx]'.  */
1464       for (i = 0; i < cur_src->nelem; i++)
1465         {
1466           int prev_node = cur_src->elems[i];
1467           int naccepted = 0;
1468           re_token_type_t type = dfa->nodes[prev_node].type;
1469
1470           if (IS_EPSILON_NODE (type))
1471             continue;
1472 #ifdef RE_ENABLE_I18N
1473           /* If the node may accept `multi byte'.  */
1474           if (ACCEPT_MB_NODE (type))
1475             naccepted = sift_states_iter_mb (preg, mctx, sctx, prev_node,
1476                                              str_idx, sctx->last_str_idx);
1477
1478 #endif /* RE_ENABLE_I18N */
1479           /* We don't check backreferences here.
1480              See update_cur_sifted_state().  */
1481
1482           if (!naccepted
1483               && check_node_accept (preg, dfa->nodes + prev_node, mctx,
1484                                     str_idx)
1485               && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
1486                                       dfa->nexts[prev_node]))
1487             naccepted = 1;
1488
1489           if (naccepted == 0)
1490             continue;
1491
1492           if (sctx->limits.nelem)
1493             {
1494               int to_idx = str_idx + naccepted;
1495               if (check_dst_limits (dfa, &sctx->limits, mctx,
1496                                     dfa->nexts[prev_node], to_idx,
1497                                     prev_node, str_idx))
1498                 continue;
1499             }
1500           ret = re_node_set_insert (&cur_dest, prev_node);
1501           if (BE (ret == -1, 0))
1502             {
1503               err = REG_ESPACE;
1504               goto free_return;
1505             }
1506         }
1507
1508       /* Add all the nodes which satisfy the following conditions:
1509          - It can epsilon transit to a node in CUR_DEST.
1510          - It is in CUR_SRC.
1511          And update state_log.  */
1512       err = update_cur_sifted_state (preg, mctx, sctx, str_idx, &cur_dest);
1513       if (BE (err != REG_NOERROR, 0))
1514         goto free_return;
1515     }
1516   err = REG_NOERROR;
1517  free_return:
1518   re_node_set_free (&cur_dest);
1519   return err;
1520 }
1521
1522 /* Helper functions.  */
1523
1524 static reg_errcode_t
1525 clean_state_log_if_need (mctx, next_state_log_idx)
1526     re_match_context_t *mctx;
1527     int next_state_log_idx;
1528 {
1529   int top = mctx->state_log_top;
1530
1531   if (next_state_log_idx >= mctx->input->bufs_len
1532       || (next_state_log_idx >= mctx->input->valid_len
1533           && mctx->input->valid_len < mctx->input->len))
1534     {
1535       reg_errcode_t err;
1536       err = extend_buffers (mctx);
1537       if (BE (err != REG_NOERROR, 0))
1538         return err;
1539     }
1540
1541   if (top < next_state_log_idx)
1542     {
1543       memset (mctx->state_log + top + 1, '\0',
1544               sizeof (re_dfastate_t *) * (next_state_log_idx - top));
1545       mctx->state_log_top = next_state_log_idx;
1546     }
1547   return REG_NOERROR;
1548 }
1549
1550 static reg_errcode_t
1551 merge_state_array (dfa, dst, src, num)
1552      re_dfa_t *dfa;
1553      re_dfastate_t **dst;
1554      re_dfastate_t **src;
1555      int num;
1556 {
1557   int st_idx;
1558   reg_errcode_t err;
1559   for (st_idx = 0; st_idx < num; ++st_idx)
1560     {
1561       if (dst[st_idx] == NULL)
1562         dst[st_idx] = src[st_idx];
1563       else if (src[st_idx] != NULL)
1564         {
1565           re_node_set merged_set;
1566           err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
1567                                         &src[st_idx]->nodes);
1568           if (BE (err != REG_NOERROR, 0))
1569             return err;
1570           dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
1571           re_node_set_free (&merged_set);
1572           if (BE (err != REG_NOERROR, 0))
1573             return err;
1574         }
1575     }
1576   return REG_NOERROR;
1577 }
1578
1579 static reg_errcode_t
1580 update_cur_sifted_state (preg, mctx, sctx, str_idx, dest_nodes)
1581      const regex_t *preg;
1582      re_match_context_t *mctx;
1583      re_sift_context_t *sctx;
1584      int str_idx;
1585      re_node_set *dest_nodes;
1586 {
1587   reg_errcode_t err;
1588   re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
1589   const re_node_set *candidates;
1590   candidates = ((mctx->state_log[str_idx] == NULL) ? &empty_set
1591                 : &mctx->state_log[str_idx]->nodes);
1592
1593   /* At first, add the nodes which can epsilon transit to a node in
1594      DEST_NODE.  */
1595   if (dest_nodes->nelem)
1596     {
1597       err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1598       if (BE (err != REG_NOERROR, 0))
1599         return err;
1600     }
1601
1602   /* Then, check the limitations in the current sift_context.  */
1603   if (dest_nodes->nelem && sctx->limits.nelem)
1604     {
1605       err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
1606                                  mctx->bkref_ents, str_idx);
1607       if (BE (err != REG_NOERROR, 0))
1608         return err;
1609     }
1610
1611   /* Update state_log.  */
1612   sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1613   if (BE (sctx->sifted_states[str_idx] == NULL && err != REG_NOERROR, 0))
1614     return err;
1615
1616   if ((mctx->state_log[str_idx] != NULL
1617        && mctx->state_log[str_idx]->has_backref))
1618     {
1619       err = sift_states_bkref (preg, mctx, sctx, str_idx, dest_nodes);
1620       if (BE (err != REG_NOERROR, 0))
1621         return err;
1622     }
1623   return REG_NOERROR;
1624 }
1625
1626 static reg_errcode_t
1627 add_epsilon_src_nodes (dfa, dest_nodes, candidates)
1628      re_dfa_t *dfa;
1629      re_node_set *dest_nodes;
1630      const re_node_set *candidates;
1631 {
1632   reg_errcode_t err;
1633   int src_idx;
1634   re_node_set src_copy;
1635
1636   err = re_node_set_init_copy (&src_copy, dest_nodes);
1637   if (BE (err != REG_NOERROR, 0))
1638     return err;
1639   for (src_idx = 0; src_idx < src_copy.nelem; ++src_idx)
1640     {
1641       err = re_node_set_add_intersect (dest_nodes, candidates,
1642                                        dfa->inveclosures
1643                                        + src_copy.elems[src_idx]);
1644       if (BE (err != REG_NOERROR, 0))
1645         {
1646           re_node_set_free (&src_copy);
1647           return err;
1648         }
1649     }
1650   re_node_set_free (&src_copy);
1651   return REG_NOERROR;
1652 }
1653
1654 static reg_errcode_t
1655 sub_epsilon_src_nodes (dfa, node, dest_nodes, candidates)
1656      re_dfa_t *dfa;
1657      int node;
1658      re_node_set *dest_nodes;
1659      const re_node_set *candidates;
1660 {
1661     int ecl_idx;
1662     reg_errcode_t err;
1663     re_node_set *inv_eclosure = dfa->inveclosures + node;
1664     re_node_set except_nodes;
1665     re_node_set_init_empty (&except_nodes);
1666     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1667       {
1668         int cur_node = inv_eclosure->elems[ecl_idx];
1669         if (cur_node == node)
1670           continue;
1671         if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
1672           {
1673             int edst1 = dfa->edests[cur_node].elems[0];
1674             int edst2 = ((dfa->edests[cur_node].nelem > 1)
1675                          ? dfa->edests[cur_node].elems[1] : -1);
1676             if ((!re_node_set_contains (inv_eclosure, edst1)
1677                  && re_node_set_contains (dest_nodes, edst1))
1678                 || (edst2 > 0
1679                     && !re_node_set_contains (inv_eclosure, edst2)
1680                     && re_node_set_contains (dest_nodes, edst2)))
1681               {
1682                 err = re_node_set_add_intersect (&except_nodes, candidates,
1683                                                  dfa->inveclosures + cur_node);
1684                 if (BE (err != REG_NOERROR, 0))
1685                   {
1686                     re_node_set_free (&except_nodes);
1687                     return err;
1688                   }
1689               }
1690           }
1691       }
1692     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1693       {
1694         int cur_node = inv_eclosure->elems[ecl_idx];
1695         if (!re_node_set_contains (&except_nodes, cur_node))
1696           {
1697             int idx = re_node_set_contains (dest_nodes, cur_node) - 1;
1698             re_node_set_remove_at (dest_nodes, idx);
1699           }
1700       }
1701     re_node_set_free (&except_nodes);
1702     return REG_NOERROR;
1703 }
1704
1705 static int
1706 check_dst_limits (dfa, limits, mctx, dst_node, dst_idx, src_node, src_idx)
1707      re_dfa_t *dfa;
1708      re_node_set *limits;
1709      re_match_context_t *mctx;
1710      int dst_node, dst_idx, src_node, src_idx;
1711 {
1712   int lim_idx, src_pos, dst_pos;
1713
1714   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1715     {
1716       int subexp_idx;
1717       struct re_backref_cache_entry *ent;
1718       ent = mctx->bkref_ents + limits->elems[lim_idx];
1719       subexp_idx = dfa->nodes[ent->node].opr.idx - 1;
1720
1721       dst_pos = check_dst_limits_calc_pos (dfa, mctx, limits->elems[lim_idx],
1722                                            dfa->eclosures + dst_node,
1723                                            subexp_idx, dst_node, dst_idx);
1724       src_pos = check_dst_limits_calc_pos (dfa, mctx, limits->elems[lim_idx],
1725                                            dfa->eclosures + src_node,
1726                                            subexp_idx, src_node, src_idx);
1727
1728       /* In case of:
1729          <src> <dst> ( <subexp> )
1730          ( <subexp> ) <src> <dst>
1731          ( <subexp1> <src> <subexp2> <dst> <subexp3> )  */
1732       if (src_pos == dst_pos)
1733         continue; /* This is unrelated limitation.  */
1734       else
1735         return 1;
1736     }
1737   return 0;
1738 }
1739
1740 static int
1741 check_dst_limits_calc_pos (dfa, mctx, limit, eclosures, subexp_idx, from_node,
1742                            str_idx)
1743      re_dfa_t *dfa;
1744      re_match_context_t *mctx;
1745      re_node_set *eclosures;
1746      int limit, subexp_idx, from_node, str_idx;
1747 {
1748   struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
1749   int node_idx;
1750
1751   /* If we are outside the range of the subexpression, return -1 or 1.  */
1752   if (str_idx < lim->subexp_from)
1753     return -1;
1754
1755   if (lim->subexp_to < str_idx)
1756     return 1;
1757
1758   /* If we are within the subexpression, return 0.  */
1759   if (str_idx != lim->subexp_from && str_idx != lim->subexp_to)
1760     return 0;
1761
1762   /* Else, we are on the boundary: examine the nodes on the epsilon
1763      closure.  */
1764   for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
1765     {
1766       int node = eclosures->elems[node_idx];
1767       switch (dfa->nodes[node].type)
1768         {
1769         case OP_BACK_REF:
1770           {
1771             int bi = search_cur_bkref_entry (mctx, str_idx);
1772             for (; bi < mctx->nbkref_ents; ++bi)
1773               {
1774                 struct re_backref_cache_entry *ent = mctx->bkref_ents + bi;
1775                 int dst, cpos;
1776
1777                 /* If this backreference goes beyond the point we're
1778                    examining, don't go any further.  */
1779                 if (ent->str_idx > str_idx)
1780                   break;
1781
1782                 if (ent->node != node || ent->subexp_from != ent->subexp_to)
1783                   continue;
1784
1785                 /* Recurse trying to reach the OP_OPEN_SUBEXP and
1786                    OP_CLOSE_SUBEXP cases below.  But, if the
1787                    destination node is the same node as the source
1788                    node, don't recurse because it would cause an
1789                    infinite loop: a regex that exhibits this behavior
1790                    is ()\1*\1*  */
1791                 dst = dfa->edests[node].elems[0];
1792                 if (dst == from_node)
1793                   {
1794                     if (str_idx == lim->subexp_from)
1795                       return -1;
1796                     else /* if (str_idx == lim->subexp_to) */
1797                       return 0;
1798                   }
1799
1800                 cpos = check_dst_limits_calc_pos (dfa, mctx, limit,
1801                                                   dfa->eclosures + dst,
1802                                                   subexp_idx, dst,
1803                                                   str_idx);
1804
1805                 if (cpos == -1 && str_idx == lim->subexp_from)
1806                   return -1;
1807
1808                 if (cpos == 0 /* && str_idx == lim->lim->subexp_to */)
1809                   return 0;
1810               }
1811               break;
1812             }
1813
1814         case OP_OPEN_SUBEXP:
1815           if (str_idx == lim->subexp_from && subexp_idx == dfa->nodes[node].opr.idx)
1816             return -1;
1817           break;
1818
1819         case OP_CLOSE_SUBEXP:
1820           if (str_idx == lim->subexp_to && subexp_idx == dfa->nodes[node].opr.idx)
1821             return 0;
1822           break;
1823
1824         default:
1825             break;
1826         }
1827     }
1828
1829   if (str_idx == lim->subexp_to)
1830     return 1;
1831   else
1832     return 0;
1833 }
1834
1835 /* Check the limitations of sub expressions LIMITS, and remove the nodes
1836    which are against limitations from DEST_NODES. */
1837
1838 static reg_errcode_t
1839 check_subexp_limits (dfa, dest_nodes, candidates, limits, bkref_ents, str_idx)
1840      re_dfa_t *dfa;
1841      re_node_set *dest_nodes;
1842      const re_node_set *candidates;
1843      re_node_set *limits;
1844      struct re_backref_cache_entry *bkref_ents;
1845      int str_idx;
1846 {
1847   reg_errcode_t err;
1848   int node_idx, lim_idx;
1849
1850   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1851     {
1852       int subexp_idx;
1853       struct re_backref_cache_entry *ent;
1854       ent = bkref_ents + limits->elems[lim_idx];
1855
1856       if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
1857         continue; /* This is unrelated limitation.  */
1858
1859       subexp_idx = dfa->nodes[ent->node].opr.idx - 1;
1860       if (ent->subexp_to == str_idx)
1861         {
1862           int ops_node = -1;
1863           int cls_node = -1;
1864           for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
1865             {
1866               int node = dest_nodes->elems[node_idx];
1867               re_token_type_t type = dfa->nodes[node].type;
1868               if (type == OP_OPEN_SUBEXP
1869                   && subexp_idx == dfa->nodes[node].opr.idx)
1870                 ops_node = node;
1871               else if (type == OP_CLOSE_SUBEXP
1872                        && subexp_idx == dfa->nodes[node].opr.idx)
1873                 cls_node = node;
1874             }
1875
1876           /* Check the limitation of the open subexpression.  */
1877           /* Note that (ent->subexp_to = str_idx != ent->subexp_from).  */
1878           if (ops_node >= 0)
1879             {
1880               err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
1881                                            candidates);
1882               if (BE (err != REG_NOERROR, 0))
1883                 return err;
1884             }
1885
1886           /* Check the limitation of the close subexpression.  */
1887           if (cls_node >= 0)
1888             for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
1889               {
1890                 int node = dest_nodes->elems[node_idx];
1891                 if (!re_node_set_contains (dfa->inveclosures + node,
1892                                            cls_node)
1893                     && !re_node_set_contains (dfa->eclosures + node,
1894                                               cls_node))
1895                   {
1896                     /* It is against this limitation.
1897                        Remove it form the current sifted state.  */
1898                     err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
1899                                                  candidates);
1900                     if (BE (err != REG_NOERROR, 0))
1901                       return err;
1902                     --node_idx;
1903                   }
1904               }
1905         }
1906       else /* (ent->subexp_to != str_idx)  */
1907         {
1908           for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
1909             {
1910               int node = dest_nodes->elems[node_idx];
1911               re_token_type_t type = dfa->nodes[node].type;
1912               if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
1913                 {
1914                   if (subexp_idx != dfa->nodes[node].opr.idx)
1915                     continue;
1916                   if ((type == OP_CLOSE_SUBEXP && ent->subexp_to != str_idx)
1917                       || (type == OP_OPEN_SUBEXP))
1918                     {
1919                       /* It is against this limitation.
1920                          Remove it form the current sifted state.  */
1921                       err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
1922                                                    candidates);
1923                       if (BE (err != REG_NOERROR, 0))
1924                         return err;
1925                     }
1926                 }
1927             }
1928         }
1929     }
1930   return REG_NOERROR;
1931 }
1932
1933 static reg_errcode_t
1934 sift_states_bkref (preg, mctx, sctx, str_idx, dest_nodes)
1935      const regex_t *preg;
1936      re_match_context_t *mctx;
1937      re_sift_context_t *sctx;
1938      int str_idx;
1939      re_node_set *dest_nodes;
1940 {
1941   reg_errcode_t err;
1942   re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
1943   int node_idx, node;
1944   re_sift_context_t local_sctx;
1945   const re_node_set *candidates;
1946   candidates = ((mctx->state_log[str_idx] == NULL) ? &empty_set
1947                 : &mctx->state_log[str_idx]->nodes);
1948   local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized.  */
1949
1950   for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
1951     {
1952       int cur_bkref_idx = re_string_cur_idx (mctx->input);
1953       re_token_type_t type;
1954       node = candidates->elems[node_idx];
1955       type = dfa->nodes[node].type;
1956       if (node == sctx->cur_bkref && str_idx == cur_bkref_idx)
1957         continue;
1958       /* Avoid infinite loop for the REs like "()\1+".  */
1959       if (node == sctx->last_node && str_idx == sctx->last_str_idx)
1960         continue;
1961       if (type == OP_BACK_REF)
1962         {
1963           int enabled_idx = search_cur_bkref_entry (mctx, str_idx);
1964           for (; enabled_idx < mctx->nbkref_ents; ++enabled_idx)
1965             {
1966               int disabled_idx, subexp_len, to_idx, dst_node;
1967               struct re_backref_cache_entry *entry;
1968               entry = mctx->bkref_ents + enabled_idx;
1969               if (entry->str_idx > str_idx)
1970                 break;
1971               if (entry->node != node)
1972                   continue;
1973               subexp_len = entry->subexp_to - entry->subexp_from;
1974               to_idx = str_idx + subexp_len;
1975               dst_node = (subexp_len ? dfa->nexts[node]
1976                           : dfa->edests[node].elems[0]);
1977
1978               if (to_idx > sctx->last_str_idx
1979                   || sctx->sifted_states[to_idx] == NULL
1980                   || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx],
1981                                            dst_node)
1982                   || check_dst_limits (dfa, &sctx->limits, mctx, node,
1983                                        str_idx, dst_node, to_idx))
1984                 continue;
1985                 {
1986                   re_dfastate_t *cur_state;
1987                   entry->flag = 0;
1988                   for (disabled_idx = enabled_idx + 1;
1989                        disabled_idx < mctx->nbkref_ents; ++disabled_idx)
1990                     {
1991                       struct re_backref_cache_entry *entry2;
1992                       entry2 = mctx->bkref_ents + disabled_idx;
1993                       if (entry2->str_idx > str_idx)
1994                         break;
1995                       entry2->flag = (entry2->node == node) ? 1 : entry2->flag;
1996                     }
1997
1998                   if (local_sctx.sifted_states == NULL)
1999                     {
2000                       local_sctx = *sctx;
2001                       err = re_node_set_init_copy (&local_sctx.limits,
2002                                                    &sctx->limits);
2003                       if (BE (err != REG_NOERROR, 0))
2004                         goto free_return;
2005                     }
2006                   local_sctx.last_node = node;
2007                   local_sctx.last_str_idx = str_idx;
2008                   err = re_node_set_insert (&local_sctx.limits, enabled_idx);
2009                   if (BE (err < 0, 0))
2010                     {
2011                       err = REG_ESPACE;
2012                       goto free_return;
2013                     }
2014                   cur_state = local_sctx.sifted_states[str_idx];
2015                   err = sift_states_backward (preg, mctx, &local_sctx);
2016                   if (BE (err != REG_NOERROR, 0))
2017                     goto free_return;
2018                   if (sctx->limited_states != NULL)
2019                     {
2020                       err = merge_state_array (dfa, sctx->limited_states,
2021                                                local_sctx.sifted_states,
2022                                                str_idx + 1);
2023                       if (BE (err != REG_NOERROR, 0))
2024                         goto free_return;
2025                     }
2026                   local_sctx.sifted_states[str_idx] = cur_state;
2027                   re_node_set_remove (&local_sctx.limits, enabled_idx);
2028                   /* We must not use the variable entry here, since
2029                      mctx->bkref_ents might be realloced.  */
2030                   mctx->bkref_ents[enabled_idx].flag = 1;
2031                 }
2032             }
2033           enabled_idx = search_cur_bkref_entry (mctx, str_idx);
2034           for (; enabled_idx < mctx->nbkref_ents; ++enabled_idx)
2035             {
2036               struct re_backref_cache_entry *entry;
2037               entry = mctx->bkref_ents + enabled_idx;
2038               if (entry->str_idx > str_idx)
2039                 break;
2040               if (entry->node == node)
2041                 entry->flag = 0;
2042             }
2043         }
2044     }
2045   err = REG_NOERROR;
2046  free_return:
2047   if (local_sctx.sifted_states != NULL)
2048     {
2049       re_node_set_free (&local_sctx.limits);
2050     }
2051
2052   return err;
2053 }
2054
2055
2056 #ifdef RE_ENABLE_I18N
2057 static int
2058 sift_states_iter_mb (preg, mctx, sctx, node_idx, str_idx, max_str_idx)
2059     const regex_t *preg;
2060     const re_match_context_t *mctx;
2061     re_sift_context_t *sctx;
2062     int node_idx, str_idx, max_str_idx;
2063 {
2064   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2065   int naccepted;
2066   /* Check the node can accept `multi byte'.  */
2067   naccepted = check_node_accept_bytes (preg, node_idx, mctx->input, str_idx);
2068   if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
2069       !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
2070                             dfa->nexts[node_idx]))
2071     /* The node can't accept the `multi byte', or the
2072        destination was already thrown away, then the node
2073        could't accept the current input `multi byte'.   */
2074     naccepted = 0;
2075   /* Otherwise, it is sure that the node could accept
2076      `naccepted' bytes input.  */
2077   return naccepted;
2078 }
2079 #endif /* RE_ENABLE_I18N */
2080
2081 \f
2082 /* Functions for state transition.  */
2083
2084 /* Return the next state to which the current state STATE will transit by
2085    accepting the current input byte, and update STATE_LOG if necessary.
2086    If STATE can accept a multibyte char/collating element/back reference
2087    update the destination of STATE_LOG.  */
2088
2089 static re_dfastate_t *
2090 transit_state (err, preg, mctx, state)
2091      reg_errcode_t *err;
2092      const regex_t *preg;
2093      re_match_context_t *mctx;
2094      re_dfastate_t *state;
2095 {
2096   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2097   re_dfastate_t **trtable, *next_state;
2098   unsigned char ch;
2099   int cur_idx;
2100
2101   if (re_string_cur_idx (mctx->input) + 1 >= mctx->input->bufs_len
2102       || (re_string_cur_idx (mctx->input) + 1 >= mctx->input->valid_len
2103           && mctx->input->valid_len < mctx->input->len))
2104     {
2105       *err = extend_buffers (mctx);
2106       if (BE (*err != REG_NOERROR, 0))
2107         return NULL;
2108     }
2109
2110   *err = REG_NOERROR;
2111   if (state == NULL)
2112     {
2113       next_state = state;
2114       re_string_skip_bytes (mctx->input, 1);
2115     }
2116   else
2117     {
2118 #ifdef RE_ENABLE_I18N
2119       /* If the current state can accept multibyte.  */
2120       if (state->accept_mb)
2121         {
2122           *err = transit_state_mb (preg, state, mctx);
2123           if (BE (*err != REG_NOERROR, 0))
2124             return NULL;
2125         }
2126 #endif /* RE_ENABLE_I18N */
2127
2128       /* Then decide the next state with the single byte.  */
2129       if (1)
2130         {
2131           /* Use transition table  */
2132           ch = re_string_fetch_byte (mctx->input);
2133           trtable = state->trtable;
2134           if (trtable == NULL)
2135             {
2136               trtable = build_trtable (preg, state);
2137               if (trtable == NULL)
2138                 {
2139                   *err = REG_ESPACE;
2140                   return NULL;
2141                 }
2142             }
2143           if (BE (state->word_trtable, 0))
2144             {
2145               unsigned int context;
2146               context
2147                 = re_string_context_at (mctx->input,
2148                                         re_string_cur_idx (mctx->input) - 1,
2149                                         mctx->eflags, preg->newline_anchor);
2150               if (IS_WORD_CONTEXT (context))
2151                 next_state = trtable[ch + SBC_MAX];
2152               else
2153                 next_state = trtable[ch];
2154             }
2155           else
2156             next_state = trtable[ch];
2157         }
2158 #if 0
2159       else
2160         {
2161           /* don't use transition table  */
2162           next_state = transit_state_sb (err, preg, state, mctx);
2163           if (BE (next_state == NULL && err != REG_NOERROR, 0))
2164             return NULL;
2165         }
2166 #endif
2167     }
2168
2169   cur_idx = re_string_cur_idx (mctx->input);
2170   /* Update the state_log if we need.  */
2171   if (mctx->state_log != NULL)
2172     {
2173       if (cur_idx > mctx->state_log_top)
2174         {
2175           mctx->state_log[cur_idx] = next_state;
2176           mctx->state_log_top = cur_idx;
2177         }
2178       else if (mctx->state_log[cur_idx] == 0)
2179         {
2180           mctx->state_log[cur_idx] = next_state;
2181         }
2182       else
2183         {
2184           re_dfastate_t *pstate;
2185           unsigned int context;
2186           re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
2187           /* If (state_log[cur_idx] != 0), it implies that cur_idx is
2188              the destination of a multibyte char/collating element/
2189              back reference.  Then the next state is the union set of
2190              these destinations and the results of the transition table.  */
2191           pstate = mctx->state_log[cur_idx];
2192           log_nodes = pstate->entrance_nodes;
2193           if (next_state != NULL)
2194             {
2195               table_nodes = next_state->entrance_nodes;
2196               *err = re_node_set_init_union (&next_nodes, table_nodes,
2197                                              log_nodes);
2198               if (BE (*err != REG_NOERROR, 0))
2199                 return NULL;
2200             }
2201           else
2202             next_nodes = *log_nodes;
2203           /* Note: We already add the nodes of the initial state,
2204                    then we don't need to add them here.  */
2205
2206           context = re_string_context_at (mctx->input,
2207                                           re_string_cur_idx (mctx->input) - 1,
2208                                           mctx->eflags, preg->newline_anchor);
2209           next_state = mctx->state_log[cur_idx]
2210             = re_acquire_state_context (err, dfa, &next_nodes, context);
2211           /* We don't need to check errors here, since the return value of
2212              this function is next_state and ERR is already set.  */
2213
2214           if (table_nodes != NULL)
2215             re_node_set_free (&next_nodes);
2216         }
2217     }
2218
2219   if (BE (dfa->nbackref, 0) && next_state != NULL)
2220     {
2221       /* Check OP_OPEN_SUBEXP in the current state in case that we use them
2222          later.  We must check them here, since the back references in the
2223          next state might use them.  */
2224       *err = check_subexp_matching_top (dfa, mctx, &next_state->nodes,
2225                                         cur_idx);
2226       if (BE (*err != REG_NOERROR, 0))
2227         return NULL;
2228
2229       /* If the next state has back references.  */
2230       if (next_state->has_backref)
2231         {
2232           *err = transit_state_bkref (preg, &next_state->nodes, mctx);
2233           if (BE (*err != REG_NOERROR, 0))
2234             return NULL;
2235           next_state = mctx->state_log[cur_idx];
2236         }
2237     }
2238   return next_state;
2239 }
2240
2241 /* Helper functions for transit_state.  */
2242
2243 /* From the node set CUR_NODES, pick up the nodes whose types are
2244    OP_OPEN_SUBEXP and which have corresponding back references in the regular
2245    expression. And register them to use them later for evaluating the
2246    correspoding back references.  */
2247
2248 static reg_errcode_t
2249 check_subexp_matching_top (dfa, mctx, cur_nodes, str_idx)
2250      re_dfa_t *dfa;
2251      re_match_context_t *mctx;
2252      re_node_set *cur_nodes;
2253      int str_idx;
2254 {
2255   int node_idx;
2256   reg_errcode_t err;
2257
2258   /* TODO: This isn't efficient.
2259            Because there might be more than one nodes whose types are
2260            OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2261            nodes.
2262            E.g. RE: (a){2}  */
2263   for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
2264     {
2265       int node = cur_nodes->elems[node_idx];
2266       if (dfa->nodes[node].type == OP_OPEN_SUBEXP
2267           && dfa->nodes[node].opr.idx < (8 * sizeof (dfa->used_bkref_map))
2268           && dfa->used_bkref_map & (1 << dfa->nodes[node].opr.idx))
2269         {
2270           err = match_ctx_add_subtop (mctx, node, str_idx);
2271           if (BE (err != REG_NOERROR, 0))
2272             return err;
2273         }
2274     }
2275   return REG_NOERROR;
2276 }
2277
2278 #if 0
2279 /* Return the next state to which the current state STATE will transit by
2280    accepting the current input byte.  */
2281
2282 static re_dfastate_t *
2283 transit_state_sb (err, preg, state, mctx)
2284      reg_errcode_t *err;
2285      const regex_t *preg;
2286      re_dfastate_t *state;
2287      re_match_context_t *mctx;
2288 {
2289   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2290   re_node_set next_nodes;
2291   re_dfastate_t *next_state;
2292   int node_cnt, cur_str_idx = re_string_cur_idx (mctx->input);
2293   unsigned int context;
2294
2295   *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
2296   if (BE (*err != REG_NOERROR, 0))
2297     return NULL;
2298   for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
2299     {
2300       int cur_node = state->nodes.elems[node_cnt];
2301       if (check_node_accept (preg, dfa->nodes + cur_node, mctx, cur_str_idx))
2302         {
2303           *err = re_node_set_merge (&next_nodes,
2304                                     dfa->eclosures + dfa->nexts[cur_node]);
2305           if (BE (*err != REG_NOERROR, 0))
2306             {
2307               re_node_set_free (&next_nodes);
2308               return NULL;
2309             }
2310         }
2311     }
2312   context = re_string_context_at (mctx->input, cur_str_idx, mctx->eflags,
2313                                   preg->newline_anchor);
2314   next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
2315   /* We don't need to check errors here, since the return value of
2316      this function is next_state and ERR is already set.  */
2317
2318   re_node_set_free (&next_nodes);
2319   re_string_skip_bytes (mctx->input, 1);
2320   return next_state;
2321 }
2322 #endif
2323
2324 #ifdef RE_ENABLE_I18N
2325 static reg_errcode_t
2326 transit_state_mb (preg, pstate, mctx)
2327     const regex_t *preg;
2328     re_dfastate_t *pstate;
2329     re_match_context_t *mctx;
2330 {
2331   reg_errcode_t err;
2332   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2333   int i;
2334
2335   for (i = 0; i < pstate->nodes.nelem; ++i)
2336     {
2337       re_node_set dest_nodes, *new_nodes;
2338       int cur_node_idx = pstate->nodes.elems[i];
2339       int naccepted = 0, dest_idx;
2340       unsigned int context;
2341       re_dfastate_t *dest_state;
2342
2343       if (dfa->nodes[cur_node_idx].constraint)
2344         {
2345           context = re_string_context_at (mctx->input,
2346                                           re_string_cur_idx (mctx->input),
2347                                           mctx->eflags, preg->newline_anchor);
2348           if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
2349                                            context))
2350             continue;
2351         }
2352
2353       /* How many bytes the node can accept?  */
2354       if (ACCEPT_MB_NODE (dfa->nodes[cur_node_idx].type))
2355         naccepted = check_node_accept_bytes (preg, cur_node_idx, mctx->input,
2356                                              re_string_cur_idx (mctx->input));
2357       if (naccepted == 0)
2358         continue;
2359
2360       /* The node can accepts `naccepted' bytes.  */
2361       dest_idx = re_string_cur_idx (mctx->input) + naccepted;
2362       mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
2363                                : mctx->max_mb_elem_len);
2364       err = clean_state_log_if_need (mctx, dest_idx);
2365       if (BE (err != REG_NOERROR, 0))
2366         return err;
2367 #ifdef DEBUG
2368       assert (dfa->nexts[cur_node_idx] != -1);
2369 #endif
2370       /* `cur_node_idx' may point the entity of the OP_CONTEXT_NODE,
2371          then we use pstate->nodes.elems[i] instead.  */
2372       new_nodes = dfa->eclosures + dfa->nexts[pstate->nodes.elems[i]];
2373
2374       dest_state = mctx->state_log[dest_idx];
2375       if (dest_state == NULL)
2376         dest_nodes = *new_nodes;
2377       else
2378         {
2379           err = re_node_set_init_union (&dest_nodes,
2380                                         dest_state->entrance_nodes, new_nodes);
2381           if (BE (err != REG_NOERROR, 0))
2382             return err;
2383         }
2384       context = re_string_context_at (mctx->input, dest_idx - 1, mctx->eflags,
2385                                       preg->newline_anchor);
2386       mctx->state_log[dest_idx]
2387         = re_acquire_state_context (&err, dfa, &dest_nodes, context);
2388       if (dest_state != NULL)
2389         re_node_set_free (&dest_nodes);
2390       if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
2391         return err;
2392     }
2393   return REG_NOERROR;
2394 }
2395 #endif /* RE_ENABLE_I18N */
2396
2397 static reg_errcode_t
2398 transit_state_bkref (preg, nodes, mctx)
2399     const regex_t *preg;
2400     const re_node_set *nodes;
2401     re_match_context_t *mctx;
2402 {
2403   reg_errcode_t err;
2404   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2405   int i;
2406   int cur_str_idx = re_string_cur_idx (mctx->input);
2407
2408   for (i = 0; i < nodes->nelem; ++i)
2409     {
2410       int dest_str_idx, prev_nelem, bkc_idx;
2411       int node_idx = nodes->elems[i];
2412       unsigned int context;
2413       const re_token_t *node = dfa->nodes + node_idx;
2414       re_node_set *new_dest_nodes;
2415
2416       /* Check whether `node' is a backreference or not.  */
2417       if (node->type != OP_BACK_REF)
2418         continue;
2419
2420       if (node->constraint)
2421         {
2422           context = re_string_context_at (mctx->input, cur_str_idx,
2423                                           mctx->eflags, preg->newline_anchor);
2424           if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
2425             continue;
2426         }
2427
2428       /* `node' is a backreference.
2429          Check the substring which the substring matched.  */
2430       bkc_idx = mctx->nbkref_ents;
2431       err = get_subexp (preg, mctx, node_idx, cur_str_idx);
2432       if (BE (err != REG_NOERROR, 0))
2433         goto free_return;
2434
2435       /* And add the epsilon closures (which is `new_dest_nodes') of
2436          the backreference to appropriate state_log.  */
2437 #ifdef DEBUG
2438       assert (dfa->nexts[node_idx] != -1);
2439 #endif
2440       for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
2441         {
2442           int subexp_len;
2443           re_dfastate_t *dest_state;
2444           struct re_backref_cache_entry *bkref_ent;
2445           bkref_ent = mctx->bkref_ents + bkc_idx;
2446           if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
2447             continue;
2448           subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
2449           new_dest_nodes = (subexp_len == 0
2450                             ? dfa->eclosures + dfa->edests[node_idx].elems[0]
2451                             : dfa->eclosures + dfa->nexts[node_idx]);
2452           dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
2453                           - bkref_ent->subexp_from);
2454           context = re_string_context_at (mctx->input, dest_str_idx - 1,
2455                                           mctx->eflags, preg->newline_anchor);
2456           dest_state = mctx->state_log[dest_str_idx];
2457           prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
2458                         : mctx->state_log[cur_str_idx]->nodes.nelem);
2459           /* Add `new_dest_node' to state_log.  */
2460           if (dest_state == NULL)
2461             {
2462               mctx->state_log[dest_str_idx]
2463                 = re_acquire_state_context (&err, dfa, new_dest_nodes,
2464                                             context);
2465               if (BE (mctx->state_log[dest_str_idx] == NULL
2466                       && err != REG_NOERROR, 0))
2467                 goto free_return;
2468             }
2469           else
2470             {
2471               re_node_set dest_nodes;
2472               err = re_node_set_init_union (&dest_nodes,
2473                                             dest_state->entrance_nodes,
2474                                             new_dest_nodes);
2475               if (BE (err != REG_NOERROR, 0))
2476                 {
2477                   re_node_set_free (&dest_nodes);
2478                   goto free_return;
2479                 }
2480               mctx->state_log[dest_str_idx]
2481                 = re_acquire_state_context (&err, dfa, &dest_nodes, context);
2482               re_node_set_free (&dest_nodes);
2483               if (BE (mctx->state_log[dest_str_idx] == NULL
2484                       && err != REG_NOERROR, 0))
2485                 goto free_return;
2486             }
2487           /* We need to check recursively if the backreference can epsilon
2488              transit.  */
2489           if (subexp_len == 0
2490               && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
2491             {
2492               err = check_subexp_matching_top (dfa, mctx, new_dest_nodes,
2493                                                cur_str_idx);
2494               if (BE (err != REG_NOERROR, 0))
2495                 goto free_return;
2496               err = transit_state_bkref (preg, new_dest_nodes, mctx);
2497               if (BE (err != REG_NOERROR, 0))
2498                 goto free_return;
2499             }
2500         }
2501     }
2502   err = REG_NOERROR;
2503  free_return:
2504   return err;
2505 }
2506
2507 /* Enumerate all the candidates which the backreference BKREF_NODE can match
2508    at BKREF_STR_IDX, and register them by match_ctx_add_entry().
2509    Note that we might collect inappropriate candidates here.
2510    However, the cost of checking them strictly here is too high, then we
2511    delay these checking for prune_impossible_nodes().  */
2512
2513 static reg_errcode_t
2514 get_subexp (preg, mctx, bkref_node, bkref_str_idx)
2515      const regex_t *preg;
2516      re_match_context_t *mctx;
2517      int bkref_node, bkref_str_idx;
2518 {
2519   int subexp_num, sub_top_idx;
2520   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2521   const char *buf = (const char *) re_string_get_buffer (mctx->input);
2522   /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX.  */
2523   int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
2524   for (; cache_idx < mctx->nbkref_ents; ++cache_idx)
2525     {
2526       const struct re_backref_cache_entry *entry
2527         = &mctx->bkref_ents[cache_idx];
2528       if (entry->str_idx > bkref_str_idx)
2529         break;
2530       if (entry->node == bkref_node)
2531         return REG_NOERROR; /* We already checked it.  */
2532     }
2533   subexp_num = dfa->nodes[bkref_node].opr.idx - 1;
2534
2535   /* For each sub expression  */
2536   for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
2537     {
2538       reg_errcode_t err;
2539       re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
2540       re_sub_match_last_t *sub_last;
2541       int sub_last_idx, sl_str;
2542       const char *bkref_str;
2543
2544       if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
2545         continue; /* It isn't related.  */
2546
2547       sl_str = sub_top->str_idx;
2548       bkref_str = buf + bkref_str_idx;
2549       /* At first, check the last node of sub expressions we already
2550          evaluated.  */
2551       for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
2552         {
2553           int sl_str_diff;
2554           sub_last = sub_top->lasts[sub_last_idx];
2555           sl_str_diff = sub_last->str_idx - sl_str;
2556           /* The matched string by the sub expression match with the substring
2557              at the back reference?  */
2558           if (sl_str_diff > 0
2559               && memcmp (bkref_str, buf + sl_str, sl_str_diff) != 0)
2560             break; /* We don't need to search this sub expression any more.  */
2561           bkref_str += sl_str_diff;
2562           sl_str += sl_str_diff;
2563           err = get_subexp_sub (preg, mctx, sub_top, sub_last, bkref_node,
2564                                 bkref_str_idx);
2565
2566           /* Reload buf and bkref_str, since the preceding call might
2567              have reallocated the buffer.  */
2568           buf = (const char *) re_string_get_buffer (mctx->input);
2569           bkref_str = buf + bkref_str_idx;
2570
2571           if (err == REG_NOMATCH)
2572             continue;
2573           if (BE (err != REG_NOERROR, 0))
2574             return err;
2575         }
2576       if (sub_last_idx < sub_top->nlasts)
2577         continue;
2578       if (sub_last_idx > 0)
2579         ++sl_str;
2580       /* Then, search for the other last nodes of the sub expression.  */
2581       for (; sl_str <= bkref_str_idx; ++sl_str)
2582         {
2583           int cls_node, sl_str_off;
2584           const re_node_set *nodes;
2585           sl_str_off = sl_str - sub_top->str_idx;
2586           /* The matched string by the sub expression match with the substring
2587              at the back reference?  */
2588           if (sl_str_off > 0 && *bkref_str++ != buf[sl_str - 1])
2589             break; /* We don't need to search this sub expression any more.  */
2590           if (mctx->state_log[sl_str] == NULL)
2591             continue;
2592           /* Does this state have a ')' of the sub expression?  */
2593           nodes = &mctx->state_log[sl_str]->nodes;
2594           cls_node = find_subexp_node (dfa, nodes, subexp_num, OP_CLOSE_SUBEXP);
2595           if (cls_node == -1)
2596             continue; /* No.  */
2597           if (sub_top->path == NULL)
2598             {
2599               sub_top->path = calloc (sizeof (state_array_t),
2600                                       sl_str - sub_top->str_idx + 1);
2601               if (sub_top->path == NULL)
2602                 return REG_ESPACE;
2603             }
2604           /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
2605              in the current context?  */
2606           err = check_arrival (preg, mctx, sub_top->path, sub_top->node,
2607                                sub_top->str_idx, cls_node, sl_str, OP_CLOSE_SUBEXP);
2608           if (err == REG_NOMATCH)
2609               continue;
2610           if (BE (err != REG_NOERROR, 0))
2611               return err;
2612           sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
2613           if (BE (sub_last == NULL, 0))
2614             return REG_ESPACE;
2615           err = get_subexp_sub (preg, mctx, sub_top, sub_last, bkref_node,
2616                                 bkref_str_idx);
2617           if (err == REG_NOMATCH)
2618             continue;
2619         }
2620     }
2621   return REG_NOERROR;
2622 }
2623
2624 /* Helper functions for get_subexp().  */
2625
2626 /* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
2627    If it can arrive, register the sub expression expressed with SUB_TOP
2628    and SUB_LAST.  */
2629
2630 static reg_errcode_t
2631 get_subexp_sub (preg, mctx, sub_top, sub_last, bkref_node, bkref_str)
2632      const regex_t *preg;
2633      re_match_context_t *mctx;
2634      const re_sub_match_top_t *sub_top;
2635      re_sub_match_last_t *sub_last;
2636      int bkref_node, bkref_str;
2637 {
2638   reg_errcode_t err;
2639   int to_idx;
2640   /* Can the subexpression arrive the back reference?  */
2641   err = check_arrival (preg, mctx, &sub_last->path, sub_last->node,
2642                        sub_last->str_idx, bkref_node, bkref_str, OP_OPEN_SUBEXP);
2643   if (err != REG_NOERROR)
2644     return err;
2645   err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
2646                              sub_last->str_idx);
2647   if (BE (err != REG_NOERROR, 0))
2648     return err;
2649   to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
2650   clean_state_log_if_need (mctx, to_idx);
2651   return REG_NOERROR;
2652 }
2653
2654 /* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
2655    Search '(' if FL_OPEN, or search ')' otherwise.
2656    TODO: This function isn't efficient...
2657          Because there might be more than one nodes whose types are
2658          OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2659          nodes.
2660          E.g. RE: (a){2}  */
2661
2662 static int
2663 find_subexp_node (dfa, nodes, subexp_idx, type)
2664      const re_dfa_t *dfa;
2665      const re_node_set *nodes;
2666      int subexp_idx, type;
2667 {
2668   int cls_idx;
2669   for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
2670     {
2671       int cls_node = nodes->elems[cls_idx];
2672       const re_token_t *node = dfa->nodes + cls_node;
2673       if (node->type == type
2674           && node->opr.idx == subexp_idx)
2675         return cls_node;
2676     }
2677   return -1;
2678 }
2679
2680 /* Check whether the node TOP_NODE at TOP_STR can arrive to the node
2681    LAST_NODE at LAST_STR.  We record the path onto PATH since it will be
2682    heavily reused.
2683    Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise.  */
2684
2685 static reg_errcode_t
2686 check_arrival (preg, mctx, path, top_node, top_str, last_node, last_str,
2687                type)
2688      const regex_t *preg;
2689      re_match_context_t *mctx;
2690      state_array_t *path;
2691      int top_node, top_str, last_node, last_str, type;
2692 {
2693   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2694   reg_errcode_t err;
2695   int subexp_num, backup_cur_idx, str_idx, null_cnt;
2696   re_dfastate_t *cur_state = NULL;
2697   re_node_set *cur_nodes, next_nodes;
2698   re_dfastate_t **backup_state_log;
2699   unsigned int context;
2700
2701   subexp_num = dfa->nodes[top_node].opr.idx;
2702   /* Extend the buffer if we need.  */
2703   if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0))
2704     {
2705       re_dfastate_t **new_array;
2706       int old_alloc = path->alloc;
2707       path->alloc += last_str + mctx->max_mb_elem_len + 1;
2708       new_array = re_realloc (path->array, re_dfastate_t *, path->alloc);
2709       if (new_array == NULL)
2710         {
2711           path->alloc = old_alloc;
2712           return REG_ESPACE;
2713         }
2714       path->array = new_array;
2715       memset (new_array + old_alloc, '\0',
2716               sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
2717     }
2718
2719   str_idx = path->next_idx == 0 ? top_str : path->next_idx;
2720
2721   /* Temporary modify MCTX.  */
2722   backup_state_log = mctx->state_log;
2723   backup_cur_idx = mctx->input->cur_idx;
2724   mctx->state_log = path->array;
2725   mctx->input->cur_idx = str_idx;
2726
2727   /* Setup initial node set.  */
2728   context = re_string_context_at (mctx->input, str_idx - 1, mctx->eflags,
2729                                   preg->newline_anchor);
2730   if (str_idx == top_str)
2731     {
2732       err = re_node_set_init_1 (&next_nodes, top_node);
2733       if (BE (err != REG_NOERROR, 0))
2734         return err;
2735       err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2736       if (BE (err != REG_NOERROR, 0))
2737         {
2738           re_node_set_free (&next_nodes);
2739           return err;
2740         }
2741     }
2742   else
2743     {
2744       cur_state = mctx->state_log[str_idx];
2745       if (cur_state && cur_state->has_backref)
2746         {
2747           err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
2748           if (BE ( err != REG_NOERROR, 0))
2749             return err;
2750         }
2751       else
2752         re_node_set_init_empty (&next_nodes);
2753     }
2754   if (str_idx == top_str || (cur_state && cur_state->has_backref))
2755     {
2756       if (next_nodes.nelem)
2757         {
2758           err = expand_bkref_cache (preg, mctx, &next_nodes, str_idx, last_str,
2759                                     subexp_num, type);
2760           if (BE ( err != REG_NOERROR, 0))
2761             {
2762               re_node_set_free (&next_nodes);
2763               return err;
2764             }
2765         }
2766       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2767       if (BE (cur_state == NULL && err != REG_NOERROR, 0))
2768         {
2769           re_node_set_free (&next_nodes);
2770           return err;
2771         }
2772       mctx->state_log[str_idx] = cur_state;
2773     }
2774
2775   for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
2776     {
2777       re_node_set_empty (&next_nodes);
2778       if (mctx->state_log[str_idx + 1])
2779         {
2780           err = re_node_set_merge (&next_nodes,
2781                                    &mctx->state_log[str_idx + 1]->nodes);
2782           if (BE (err != REG_NOERROR, 0))
2783             {
2784               re_node_set_free (&next_nodes);
2785               return err;
2786             }
2787         }
2788       if (cur_state)
2789         {
2790           err = check_arrival_add_next_nodes (preg, dfa, mctx, str_idx,
2791                                               &cur_state->nodes, &next_nodes);
2792           if (BE (err != REG_NOERROR, 0))
2793             {
2794               re_node_set_free (&next_nodes);
2795               return err;
2796             }
2797         }
2798       ++str_idx;
2799       if (next_nodes.nelem)
2800         {
2801           err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2802           if (BE (err != REG_NOERROR, 0))
2803             {
2804               re_node_set_free (&next_nodes);
2805               return err;
2806             }
2807           err = expand_bkref_cache (preg, mctx, &next_nodes, str_idx, last_str,
2808                                     subexp_num, type);
2809           if (BE ( err != REG_NOERROR, 0))
2810             {
2811               re_node_set_free (&next_nodes);
2812               return err;
2813             }
2814         }
2815       context = re_string_context_at (mctx->input, str_idx - 1, mctx->eflags,
2816                                       preg->newline_anchor);
2817       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2818       if (BE (cur_state == NULL && err != REG_NOERROR, 0))
2819         {
2820           re_node_set_free (&next_nodes);
2821           return err;
2822         }
2823       mctx->state_log[str_idx] = cur_state;
2824       null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
2825     }
2826   re_node_set_free (&next_nodes);
2827   cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
2828                : &mctx->state_log[last_str]->nodes);
2829   path->next_idx = str_idx;
2830
2831   /* Fix MCTX.  */
2832   mctx->state_log = backup_state_log;
2833   mctx->input->cur_idx = backup_cur_idx;
2834
2835   /* Then check the current node set has the node LAST_NODE.  */
2836   if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
2837     return REG_NOERROR;
2838
2839   return REG_NOMATCH;
2840 }
2841
2842 /* Helper functions for check_arrival.  */
2843
2844 /* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
2845    to NEXT_NODES.
2846    TODO: This function is similar to the functions transit_state*(),
2847          however this function has many additional works.
2848          Can't we unify them?  */
2849
2850 static reg_errcode_t
2851 check_arrival_add_next_nodes (preg, dfa, mctx, str_idx, cur_nodes, next_nodes)
2852      const regex_t *preg;
2853      re_dfa_t *dfa;
2854      re_match_context_t *mctx;
2855      int str_idx;
2856      re_node_set *cur_nodes, *next_nodes;
2857 {
2858   int cur_idx;
2859   reg_errcode_t err;
2860   re_node_set union_set;
2861   re_node_set_init_empty (&union_set);
2862   for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
2863     {
2864       int naccepted = 0;
2865       int cur_node = cur_nodes->elems[cur_idx];
2866       re_token_type_t type = dfa->nodes[cur_node].type;
2867       if (IS_EPSILON_NODE (type))
2868         continue;
2869 #ifdef RE_ENABLE_I18N
2870       /* If the node may accept `multi byte'.  */
2871       if (ACCEPT_MB_NODE (type))
2872         {
2873           naccepted = check_node_accept_bytes (preg, cur_node, mctx->input,
2874                                                str_idx);
2875           if (naccepted > 1)
2876             {
2877               re_dfastate_t *dest_state;
2878               int next_node = dfa->nexts[cur_node];
2879               int next_idx = str_idx + naccepted;
2880               dest_state = mctx->state_log[next_idx];
2881               re_node_set_empty (&union_set);
2882               if (dest_state)
2883                 {
2884                   err = re_node_set_merge (&union_set, &dest_state->nodes);
2885                   if (BE (err != REG_NOERROR, 0))
2886                     {
2887                       re_node_set_free (&union_set);
2888                       return err;
2889                     }
2890                   err = re_node_set_insert (&union_set, next_node);
2891                   if (BE (err < 0, 0))
2892                     {
2893                       re_node_set_free (&union_set);
2894                       return REG_ESPACE;
2895                     }
2896                 }
2897               else
2898                 {
2899                   err = re_node_set_insert (&union_set, next_node);
2900                   if (BE (err < 0, 0))
2901                     {
2902                       re_node_set_free (&union_set);
2903                       return REG_ESPACE;
2904                     }
2905                 }
2906               mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
2907                                                             &union_set);
2908               if (BE (mctx->state_log[next_idx] == NULL
2909                       && err != REG_NOERROR, 0))
2910                 {
2911                   re_node_set_free (&union_set);
2912                   return err;
2913                 }
2914             }
2915         }
2916 #endif /* RE_ENABLE_I18N */
2917       if (naccepted
2918           || check_node_accept (preg, dfa->nodes + cur_node, mctx,
2919                                 str_idx))
2920         {
2921           err = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
2922           if (BE (err < 0, 0))
2923             {
2924               re_node_set_free (&union_set);
2925               return REG_ESPACE;
2926             }
2927         }
2928     }
2929   re_node_set_free (&union_set);
2930   return REG_NOERROR;
2931 }
2932
2933 /* For all the nodes in CUR_NODES, add the epsilon closures of them to
2934    CUR_NODES, however exclude the nodes which are:
2935     - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
2936     - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
2937 */
2938
2939 static reg_errcode_t
2940 check_arrival_expand_ecl (dfa, cur_nodes, ex_subexp, type)
2941      re_dfa_t *dfa;
2942      re_node_set *cur_nodes;
2943      int ex_subexp, type;
2944 {
2945   reg_errcode_t err;
2946   int idx, outside_node;
2947   re_node_set new_nodes;
2948 #ifdef DEBUG
2949   assert (cur_nodes->nelem);
2950 #endif
2951   err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
2952   if (BE (err != REG_NOERROR, 0))
2953     return err;
2954   /* Create a new node set NEW_NODES with the nodes which are epsilon
2955      closures of the node in CUR_NODES.  */
2956
2957   for (idx = 0; idx < cur_nodes->nelem; ++idx)
2958     {
2959       int cur_node = cur_nodes->elems[idx];
2960       re_node_set *eclosure = dfa->eclosures + cur_node;
2961       outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
2962       if (outside_node == -1)
2963         {
2964           /* There are no problematic nodes, just merge them.  */
2965           err = re_node_set_merge (&new_nodes, eclosure);
2966           if (BE (err != REG_NOERROR, 0))
2967             {
2968               re_node_set_free (&new_nodes);
2969               return err;
2970             }
2971         }
2972       else
2973         {
2974           /* There are problematic nodes, re-calculate incrementally.  */
2975           err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
2976                                               ex_subexp, type);
2977           if (BE (err != REG_NOERROR, 0))
2978             {
2979               re_node_set_free (&new_nodes);
2980               return err;
2981             }
2982         }
2983     }
2984   re_node_set_free (cur_nodes);
2985   *cur_nodes = new_nodes;
2986   return REG_NOERROR;
2987 }
2988
2989 /* Helper function for check_arrival_expand_ecl.
2990    Check incrementally the epsilon closure of TARGET, and if it isn't
2991    problematic append it to DST_NODES.  */
2992
2993 static reg_errcode_t
2994 check_arrival_expand_ecl_sub (dfa, dst_nodes, target, ex_subexp, type)
2995      re_dfa_t *dfa;
2996      int target, ex_subexp, type;
2997      re_node_set *dst_nodes;
2998 {
2999   int cur_node;
3000   for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
3001     {
3002       int err;
3003
3004       if (dfa->nodes[cur_node].type == type
3005           && dfa->nodes[cur_node].opr.idx == ex_subexp)
3006         {
3007           if (type == OP_CLOSE_SUBEXP)
3008             {
3009               err = re_node_set_insert (dst_nodes, cur_node);
3010               if (BE (err == -1, 0))
3011                 return REG_ESPACE;
3012             }
3013           break;
3014         }
3015       err = re_node_set_insert (dst_nodes, cur_node);
3016       if (BE (err == -1, 0))
3017         return REG_ESPACE;
3018       if (dfa->edests[cur_node].nelem == 0)
3019         break;
3020       if (dfa->edests[cur_node].nelem == 2)
3021         {
3022           err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
3023                                               dfa->edests[cur_node].elems[1],
3024                                               ex_subexp, type);
3025           if (BE (err != REG_NOERROR, 0))
3026             return err;
3027         }
3028       cur_node = dfa->edests[cur_node].elems[0];
3029     }
3030   return REG_NOERROR;
3031 }
3032
3033
3034 /* For all the back references in the current state, calculate the
3035    destination of the back references by the appropriate entry
3036    in MCTX->BKREF_ENTS.  */
3037
3038 static reg_errcode_t
3039 expand_bkref_cache (preg, mctx, cur_nodes, cur_str, last_str, subexp_num,
3040                     type)
3041      const regex_t *preg;
3042      re_match_context_t *mctx;
3043      int cur_str, last_str, subexp_num, type;
3044      re_node_set *cur_nodes;
3045 {
3046   reg_errcode_t err;
3047   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
3048   int cache_idx, cache_idx_start;
3049   /* The current state.  */
3050
3051   cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
3052   for (cache_idx = cache_idx_start; cache_idx < mctx->nbkref_ents; ++cache_idx)
3053     {
3054       int to_idx, next_node;
3055       struct re_backref_cache_entry *ent = mctx->bkref_ents + cache_idx;
3056       if (ent->str_idx > cur_str)
3057         break;
3058       /* Is this entry ENT is appropriate?  */
3059       if (!re_node_set_contains (cur_nodes, ent->node))
3060         continue; /* No.  */
3061
3062       to_idx = cur_str + ent->subexp_to - ent->subexp_from;
3063       /* Calculate the destination of the back reference, and append it
3064          to MCTX->STATE_LOG.  */
3065       if (to_idx == cur_str)
3066         {
3067           /* The backreference did epsilon transit, we must re-check all the
3068              node in the current state.  */
3069           re_node_set new_dests;
3070           reg_errcode_t err2, err3;
3071           next_node = dfa->edests[ent->node].elems[0];
3072           if (re_node_set_contains (cur_nodes, next_node))
3073             continue;
3074           err = re_node_set_init_1 (&new_dests, next_node);
3075           err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
3076           err3 = re_node_set_merge (cur_nodes, &new_dests);
3077           re_node_set_free (&new_dests);
3078           if (BE (err != REG_NOERROR || err2 != REG_NOERROR
3079                   || err3 != REG_NOERROR, 0))
3080             {
3081               err = (err != REG_NOERROR ? err
3082                      : (err2 != REG_NOERROR ? err2 : err3));
3083               return err;
3084             }
3085           /* TODO: It is still inefficient...  */
3086           cache_idx = cache_idx_start - 1;
3087           continue;
3088         }
3089       else
3090         {
3091           re_node_set union_set;
3092           next_node = dfa->nexts[ent->node];
3093           if (mctx->state_log[to_idx])
3094             {
3095               int ret;
3096               if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
3097                                         next_node))
3098                 continue;
3099               err = re_node_set_init_copy (&union_set,
3100                                            &mctx->state_log[to_idx]->nodes);
3101               ret = re_node_set_insert (&union_set, next_node);
3102               if (BE (err != REG_NOERROR || ret < 0, 0))
3103                 {
3104                   re_node_set_free (&union_set);
3105                   err = err != REG_NOERROR ? err : REG_ESPACE;
3106                   return err;
3107                 }
3108             }
3109           else
3110             {
3111               err = re_node_set_init_1 (&union_set, next_node);
3112               if (BE (err != REG_NOERROR, 0))
3113                 return err;
3114             }
3115           mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
3116           re_node_set_free (&union_set);
3117           if (BE (mctx->state_log[to_idx] == NULL
3118                   && err != REG_NOERROR, 0))
3119             return err;
3120         }
3121     }
3122   return REG_NOERROR;
3123 }
3124
3125 /* Build transition table for the state.
3126    Return the new table if succeeded, otherwise return NULL.  */
3127
3128 static re_dfastate_t **
3129 build_trtable (preg, state)
3130     const regex_t *preg;
3131     re_dfastate_t *state;
3132 {
3133   reg_errcode_t err;
3134   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
3135   int i, j, k, ch;
3136   int dests_node_malloced = 0, dest_states_malloced = 0;
3137   int ndests; /* Number of the destination states from `state'.  */
3138   re_dfastate_t **trtable;
3139   re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
3140   re_node_set follows, *dests_node;
3141   bitset *dests_ch;
3142   bitset acceptable;
3143
3144   /* We build DFA states which corresponds to the destination nodes
3145      from `state'.  `dests_node[i]' represents the nodes which i-th
3146      destination state contains, and `dests_ch[i]' represents the
3147      characters which i-th destination state accepts.  */
3148 #ifdef _LIBC
3149   if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX))
3150     dests_node = (re_node_set *)
3151                  alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
3152   else
3153 #endif
3154     {
3155       dests_node = (re_node_set *)
3156                    malloc ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
3157       if (BE (dests_node == NULL, 0))
3158         return NULL;
3159       dests_node_malloced = 1;
3160     }
3161   dests_ch = (bitset *) (dests_node + SBC_MAX);
3162
3163   /* Initialize transiton table.  */
3164   trtable = (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
3165   state->word_trtable = 0;
3166   if (BE (trtable == NULL, 0))
3167     {
3168       if (dests_node_malloced)
3169         free (dests_node);
3170       return NULL;
3171     }
3172
3173   /* At first, group all nodes belonging to `state' into several
3174      destinations.  */
3175   ndests = group_nodes_into_DFAstates (preg, state, dests_node, dests_ch);
3176   if (BE (ndests <= 0, 0))
3177     {
3178       if (dests_node_malloced)
3179         free (dests_node);
3180       /* Return NULL in case of an error, trtable otherwise.  */
3181       if (ndests == 0)
3182         {
3183           state->trtable = trtable;
3184           return trtable;
3185         }
3186       free (trtable);
3187       return NULL;
3188     }
3189
3190   err = re_node_set_alloc (&follows, ndests + 1);
3191   if (BE (err != REG_NOERROR, 0))
3192     goto out_free;
3193
3194 #ifdef _LIBC
3195   if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX
3196                          + ndests * 3 * sizeof (re_dfastate_t *)))
3197     dest_states = (re_dfastate_t **)
3198                   alloca (ndests * 3 * sizeof (re_dfastate_t *));
3199   else
3200 #endif
3201     {
3202       dest_states = (re_dfastate_t **)
3203                     malloc (ndests * 3 * sizeof (re_dfastate_t *));
3204       if (BE (dest_states == NULL, 0))
3205         {
3206 out_free:
3207           if (dest_states_malloced)
3208             free (dest_states);
3209           re_node_set_free (&follows);
3210           for (i = 0; i < ndests; ++i)
3211             re_node_set_free (dests_node + i);
3212           free (trtable);
3213           if (dests_node_malloced)
3214             free (dests_node);
3215           return NULL;
3216         }
3217       dest_states_malloced = 1;
3218     }
3219   dest_states_word = dest_states + ndests;
3220   dest_states_nl = dest_states_word + ndests;
3221   bitset_empty (acceptable);
3222
3223   /* Then build the states for all destinations.  */
3224   for (i = 0; i < ndests; ++i)
3225     {
3226       int next_node;
3227       re_node_set_empty (&follows);
3228       /* Merge the follows of this destination states.  */
3229       for (j = 0; j < dests_node[i].nelem; ++j)
3230         {
3231           next_node = dfa->nexts[dests_node[i].elems[j]];
3232           if (next_node != -1)
3233             {
3234               err = re_node_set_merge (&follows, dfa->eclosures + next_node);
3235               if (BE (err != REG_NOERROR, 0))
3236                 goto out_free;
3237             }
3238         }
3239       dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
3240       if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
3241         goto out_free;
3242       /* If the new state has context constraint,
3243          build appropriate states for these contexts.  */
3244       if (dest_states[i]->has_constraint)
3245         {
3246           dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
3247                                                           CONTEXT_WORD);
3248           if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
3249             goto out_free;
3250           dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
3251                                                         CONTEXT_NEWLINE);
3252           if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
3253             goto out_free;
3254         }
3255       else
3256         {
3257           dest_states_word[i] = dest_states[i];
3258           dest_states_nl[i] = dest_states[i];
3259         }
3260       bitset_merge (acceptable, dests_ch[i]);
3261     }
3262
3263   /* Update the transition table.  */
3264   /* For all characters ch...:  */
3265   for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
3266     for (j = 0; j < UINT_BITS; ++j, ++ch)
3267       if ((acceptable[i] >> j) & 1)
3268         {
3269           for (k = 0; k < ndests; ++k)
3270             if ((dests_ch[k][i] >> j) & 1)
3271               {
3272                 /* k-th destination accepts the word character ch.  */
3273                 if (state->word_trtable)
3274                   {
3275                     trtable[ch] = dest_states[k];
3276                     trtable[ch + SBC_MAX] = dest_states_word[k];
3277                   }
3278                 else if (dfa->mb_cur_max > 1
3279                          && dest_states[k] != dest_states_word[k])
3280                   {
3281                     re_dfastate_t **new_trtable;
3282
3283                     new_trtable = (re_dfastate_t **)
3284                                   realloc (trtable,
3285                                            sizeof (re_dfastate_t *)
3286                                            * 2 * SBC_MAX);
3287                     if (BE (new_trtable == NULL, 0))
3288                       goto out_free;
3289                     memcpy (new_trtable + SBC_MAX, new_trtable,
3290                             sizeof (re_dfastate_t *) * SBC_MAX);
3291                     trtable = new_trtable;
3292                     state->word_trtable = 1;
3293                     trtable[ch] = dest_states[k];
3294                     trtable[ch + SBC_MAX] = dest_states_word[k];
3295                   }
3296                 else if (IS_WORD_CHAR (ch))
3297                   trtable[ch] = dest_states_word[k];
3298                 else
3299                   trtable[ch] = dest_states[k];
3300                 /* There must be only one destination which accepts
3301                    character ch.  See group_nodes_into_DFAstates.  */
3302                 break;
3303               }
3304         }
3305   /* new line */
3306   if (bitset_contain (acceptable, NEWLINE_CHAR))
3307     {
3308       /* The current state accepts newline character.  */
3309       for (k = 0; k < ndests; ++k)
3310         if (bitset_contain (dests_ch[k], NEWLINE_CHAR))
3311           {
3312             /* k-th destination accepts newline character.  */
3313             trtable[NEWLINE_CHAR] = dest_states_nl[k];
3314             if (state->word_trtable)
3315               trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[k];
3316             /* There must be only one destination which accepts
3317                newline.  See group_nodes_into_DFAstates.  */
3318             break;
3319           }
3320     }
3321
3322   if (dest_states_malloced)
3323     free (dest_states);
3324
3325   re_node_set_free (&follows);
3326   for (i = 0; i < ndests; ++i)
3327     re_node_set_free (dests_node + i);
3328
3329   if (dests_node_malloced)
3330     free (dests_node);
3331
3332   state->trtable = trtable;
3333   return trtable;
3334 }
3335
3336 /* Group all nodes belonging to STATE into several destinations.
3337    Then for all destinations, set the nodes belonging to the destination
3338    to DESTS_NODE[i] and set the characters accepted by the destination
3339    to DEST_CH[i].  This function return the number of destinations.  */
3340
3341 static int
3342 group_nodes_into_DFAstates (preg, state, dests_node, dests_ch)
3343     const regex_t *preg;
3344     const re_dfastate_t *state;
3345     re_node_set *dests_node;
3346     bitset *dests_ch;
3347 {
3348   reg_errcode_t err;
3349   const re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
3350   int i, j, k;
3351   int ndests; /* Number of the destinations from `state'.  */
3352   bitset accepts; /* Characters a node can accept.  */
3353   const re_node_set *cur_nodes = &state->nodes;
3354   bitset_empty (accepts);
3355   ndests = 0;
3356
3357   /* For all the nodes belonging to `state',  */
3358   for (i = 0; i < cur_nodes->nelem; ++i)
3359     {
3360       re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
3361       re_token_type_t type = node->type;
3362       unsigned int constraint = node->constraint;
3363
3364       /* Enumerate all single byte character this node can accept.  */
3365       if (type == CHARACTER)
3366         bitset_set (accepts, node->opr.c);
3367       else if (type == SIMPLE_BRACKET)
3368         {
3369           bitset_merge (accepts, node->opr.sbcset);
3370         }
3371       else if (type == OP_PERIOD)
3372         {
3373 #ifdef RE_ENABLE_I18N
3374           if (dfa->mb_cur_max > 1)
3375             bitset_merge (accepts, dfa->sb_char);
3376           else
3377 #endif
3378             bitset_set_all (accepts);
3379           if (!(preg->syntax & RE_DOT_NEWLINE))
3380             bitset_clear (accepts, '\n');
3381           if (preg->syntax & RE_DOT_NOT_NULL)
3382             bitset_clear (accepts, '\0');
3383         }
3384 #ifdef RE_ENABLE_I18N
3385       else if (type == OP_UTF8_PERIOD)
3386         {
3387           memset (accepts, 255, sizeof (unsigned int) * BITSET_UINTS / 2);
3388           if (!(preg->syntax & RE_DOT_NEWLINE))
3389             bitset_clear (accepts, '\n');
3390           if (preg->syntax & RE_DOT_NOT_NULL)
3391             bitset_clear (accepts, '\0');
3392         }
3393 #endif
3394       else
3395         continue;
3396
3397       /* Check the `accepts' and sift the characters which are not
3398          match it the context.  */
3399       if (constraint)
3400         {
3401           if (constraint & NEXT_NEWLINE_CONSTRAINT)
3402             {
3403               int accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
3404               bitset_empty (accepts);
3405               if (accepts_newline)
3406                 bitset_set (accepts, NEWLINE_CHAR);
3407               else
3408                 continue;
3409             }
3410           if (constraint & NEXT_ENDBUF_CONSTRAINT)
3411             {
3412               bitset_empty (accepts);
3413               continue;
3414             }
3415
3416           if (constraint & NEXT_WORD_CONSTRAINT)
3417             {
3418               unsigned int any_set = 0;
3419               if (type == CHARACTER && !node->word_char)
3420                 {
3421                   bitset_empty (accepts);
3422                   continue;
3423                 }
3424 #ifdef RE_ENABLE_I18N
3425               if (dfa->mb_cur_max > 1)
3426                 for (j = 0; j < BITSET_UINTS; ++j)
3427                   any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
3428               else
3429 #endif
3430                 for (j = 0; j < BITSET_UINTS; ++j)
3431                   any_set |= (accepts[j] &= dfa->word_char[j]);
3432               if (!any_set)
3433                 continue;
3434             }
3435           if (constraint & NEXT_NOTWORD_CONSTRAINT)
3436             {
3437               unsigned int any_set = 0;
3438               if (type == CHARACTER && node->word_char)
3439                 {
3440                   bitset_empty (accepts);
3441                   continue;
3442                 }
3443 #ifdef RE_ENABLE_I18N
3444               if (dfa->mb_cur_max > 1)
3445                 for (j = 0; j < BITSET_UINTS; ++j)
3446                   any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
3447               else
3448 #endif
3449                 for (j = 0; j < BITSET_UINTS; ++j)
3450                   any_set |= (accepts[j] &= ~dfa->word_char[j]);
3451               if (!any_set)
3452                 continue;
3453             }
3454         }
3455
3456       /* Then divide `accepts' into DFA states, or create a new
3457          state.  Above, we make sure that accepts is not empty.  */
3458       for (j = 0; j < ndests; ++j)
3459         {
3460           bitset intersec; /* Intersection sets, see below.  */
3461           bitset remains;
3462           /* Flags, see below.  */
3463           int has_intersec, not_subset, not_consumed;
3464
3465           /* Optimization, skip if this state doesn't accept the character.  */
3466           if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
3467             continue;
3468
3469           /* Enumerate the intersection set of this state and `accepts'.  */
3470           has_intersec = 0;
3471           for (k = 0; k < BITSET_UINTS; ++k)
3472             has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
3473           /* And skip if the intersection set is empty.  */
3474           if (!has_intersec)
3475             continue;
3476
3477           /* Then check if this state is a subset of `accepts'.  */
3478           not_subset = not_consumed = 0;
3479           for (k = 0; k < BITSET_UINTS; ++k)
3480             {
3481               not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
3482               not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
3483             }
3484
3485           /* If this state isn't a subset of `accepts', create a
3486              new group state, which has the `remains'. */
3487           if (not_subset)
3488             {
3489               bitset_copy (dests_ch[ndests], remains);
3490               bitset_copy (dests_ch[j], intersec);
3491               err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
3492               if (BE (err != REG_NOERROR, 0))
3493                 goto error_return;
3494               ++ndests;
3495             }
3496
3497           /* Put the position in the current group. */
3498           err = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
3499           if (BE (err < 0, 0))
3500             goto error_return;
3501
3502           /* If all characters are consumed, go to next node. */
3503           if (!not_consumed)
3504             break;
3505         }
3506       /* Some characters remain, create a new group. */
3507       if (j == ndests)
3508         {
3509           bitset_copy (dests_ch[ndests], accepts);
3510           err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
3511           if (BE (err != REG_NOERROR, 0))
3512             goto error_return;
3513           ++ndests;
3514           bitset_empty (accepts);
3515         }
3516     }
3517   return ndests;
3518  error_return:
3519   for (j = 0; j < ndests; ++j)
3520     re_node_set_free (dests_node + j);
3521   return -1;
3522 }
3523
3524 #ifdef RE_ENABLE_I18N
3525 /* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
3526    Return the number of the bytes the node accepts.
3527    STR_IDX is the current index of the input string.
3528
3529    This function handles the nodes which can accept one character, or
3530    one collating element like '.', '[a-z]', opposite to the other nodes
3531    can only accept one byte.  */
3532
3533 static int
3534 check_node_accept_bytes (preg, node_idx, input, str_idx)
3535     const regex_t *preg;
3536     int node_idx, str_idx;
3537     const re_string_t *input;
3538 {
3539   const re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
3540   const re_token_t *node = dfa->nodes + node_idx;
3541   int char_len, elem_len;
3542   int i;
3543
3544   if (BE (node->type == OP_UTF8_PERIOD, 0))
3545     {
3546       unsigned char c = re_string_byte_at (input, str_idx), d;
3547       if (BE (c < 0xc2, 1))
3548         return 0;
3549
3550       if (str_idx + 2 > input->len)
3551         return 0;
3552
3553       d = re_string_byte_at (input, str_idx + 1);
3554       if (c < 0xe0)
3555         return (d < 0x80 || d > 0xbf) ? 0 : 2;
3556       else if (c < 0xf0)
3557         {
3558           char_len = 3;
3559           if (c == 0xe0 && d < 0xa0)
3560             return 0;
3561         }
3562       else if (c < 0xf8)
3563         {
3564           char_len = 4;
3565           if (c == 0xf0 && d < 0x90)
3566             return 0;
3567         }
3568       else if (c < 0xfc)
3569         {
3570           char_len = 5;
3571           if (c == 0xf8 && d < 0x88)
3572             return 0;
3573         }
3574       else if (c < 0xfe)
3575         {
3576           char_len = 6;
3577           if (c == 0xfc && d < 0x84)
3578             return 0;
3579         }
3580       else
3581         return 0;
3582
3583       if (str_idx + char_len > input->len)
3584         return 0;
3585
3586       for (i = 1; i < char_len; ++i)
3587         {
3588           d = re_string_byte_at (input, str_idx + i);
3589           if (d < 0x80 || d > 0xbf)
3590             return 0;
3591         }
3592       return char_len;
3593     }
3594
3595   char_len = re_string_char_size_at (input, str_idx);
3596   if (node->type == OP_PERIOD)
3597     {
3598       if (char_len <= 1)
3599         return 0;
3600       /* FIXME: I don't think this if is needed, as both '\n'
3601          and '\0' are char_len == 1.  */
3602       /* '.' accepts any one character except the following two cases.  */
3603       if ((!(preg->syntax & RE_DOT_NEWLINE) &&
3604            re_string_byte_at (input, str_idx) == '\n') ||
3605           ((preg->syntax & RE_DOT_NOT_NULL) &&
3606            re_string_byte_at (input, str_idx) == '\0'))
3607         return 0;
3608       return char_len;
3609     }
3610
3611   elem_len = re_string_elem_size_at (input, str_idx);
3612   if (elem_len <= 1 && char_len <= 1)
3613     return 0;
3614
3615   if (node->type == COMPLEX_BRACKET)
3616     {
3617       const re_charset_t *cset = node->opr.mbcset;
3618 # ifdef _LIBC
3619       const unsigned char *pin = ((char *) re_string_get_buffer (input)
3620                                   + str_idx);
3621       int j;
3622       uint32_t nrules;
3623 # endif /* _LIBC */
3624       int match_len = 0;
3625       wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
3626                     ? re_string_wchar_at (input, str_idx) : 0);
3627
3628       /* match with multibyte character?  */
3629       for (i = 0; i < cset->nmbchars; ++i)
3630         if (wc == cset->mbchars[i])
3631           {
3632             match_len = char_len;
3633             goto check_node_accept_bytes_match;
3634           }
3635       /* match with character_class?  */
3636       for (i = 0; i < cset->nchar_classes; ++i)
3637         {
3638           wctype_t wt = cset->char_classes[i];
3639           if (__iswctype (wc, wt))
3640             {
3641               match_len = char_len;
3642               goto check_node_accept_bytes_match;
3643             }
3644         }
3645
3646 # ifdef _LIBC
3647       nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3648       if (nrules != 0)
3649         {
3650           unsigned int in_collseq = 0;
3651           const int32_t *table, *indirect;
3652           const unsigned char *weights, *extra;
3653           const char *collseqwc;
3654           int32_t idx;
3655           /* This #include defines a local function!  */
3656 #  include <locale/weight.h>
3657
3658           /* match with collating_symbol?  */
3659           if (cset->ncoll_syms)
3660             extra = (const unsigned char *)
3661               _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3662           for (i = 0; i < cset->ncoll_syms; ++i)
3663             {
3664               const unsigned char *coll_sym = extra + cset->coll_syms[i];
3665               /* Compare the length of input collating element and
3666                  the length of current collating element.  */
3667               if (*coll_sym != elem_len)
3668                 continue;
3669               /* Compare each bytes.  */
3670               for (j = 0; j < *coll_sym; j++)
3671                 if (pin[j] != coll_sym[1 + j])
3672                   break;
3673               if (j == *coll_sym)
3674                 {
3675                   /* Match if every bytes is equal.  */
3676                   match_len = j;
3677                   goto check_node_accept_bytes_match;
3678                 }
3679             }
3680
3681           if (cset->nranges)
3682             {
3683               if (elem_len <= char_len)
3684                 {
3685                   collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3686                   in_collseq = __collseq_table_lookup (collseqwc, wc);
3687                 }
3688               else
3689                 in_collseq = find_collation_sequence_value (pin, elem_len);
3690             }
3691           /* match with range expression?  */
3692           for (i = 0; i < cset->nranges; ++i)
3693             if (cset->range_starts[i] <= in_collseq
3694                 && in_collseq <= cset->range_ends[i])
3695               {
3696                 match_len = elem_len;
3697                 goto check_node_accept_bytes_match;
3698               }
3699
3700           /* match with equivalence_class?  */
3701           if (cset->nequiv_classes)
3702             {
3703               const unsigned char *cp = pin;
3704               table = (const int32_t *)
3705                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3706               weights = (const unsigned char *)
3707                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3708               extra = (const unsigned char *)
3709                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3710               indirect = (const int32_t *)
3711                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3712               idx = findidx (&cp);
3713               if (idx > 0)
3714                 for (i = 0; i < cset->nequiv_classes; ++i)
3715                   {
3716                     int32_t equiv_class_idx = cset->equiv_classes[i];
3717                     size_t weight_len = weights[idx];
3718                     if (weight_len == weights[equiv_class_idx])
3719                       {
3720                         int cnt = 0;
3721                         while (cnt <= weight_len
3722                                && (weights[equiv_class_idx + 1 + cnt]
3723                                    == weights[idx + 1 + cnt]))
3724                           ++cnt;
3725                         if (cnt > weight_len)
3726                           {
3727                             match_len = elem_len;
3728                             goto check_node_accept_bytes_match;
3729                           }
3730                       }
3731                   }
3732             }
3733         }
3734       else
3735 # endif /* _LIBC */
3736         {
3737           /* match with range expression?  */
3738 #if __GNUC__ >= 2
3739           wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
3740 #else
3741           wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
3742           cmp_buf[2] = wc;
3743 #endif
3744           for (i = 0; i < cset->nranges; ++i)
3745             {
3746               cmp_buf[0] = cset->range_starts[i];
3747               cmp_buf[4] = cset->range_ends[i];
3748               if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
3749                   && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
3750                 {
3751                   match_len = char_len;
3752                   goto check_node_accept_bytes_match;
3753                 }
3754             }
3755         }
3756     check_node_accept_bytes_match:
3757       if (!cset->non_match)
3758         return match_len;
3759       else
3760         {
3761           if (match_len > 0)
3762             return 0;
3763           else
3764             return (elem_len > char_len) ? elem_len : char_len;
3765         }
3766     }
3767   return 0;
3768 }
3769
3770 # ifdef _LIBC
3771 static unsigned int
3772 find_collation_sequence_value (mbs, mbs_len)
3773     const unsigned char *mbs;
3774     size_t mbs_len;
3775 {
3776   uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3777   if (nrules == 0)
3778     {
3779       if (mbs_len == 1)
3780         {
3781           /* No valid character.  Match it as a single byte character.  */
3782           const unsigned char *collseq = (const unsigned char *)
3783             _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
3784           return collseq[mbs[0]];
3785         }
3786       return UINT_MAX;
3787     }
3788   else
3789     {
3790       int32_t idx;
3791       const unsigned char *extra = (const unsigned char *)
3792         _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3793
3794       for (idx = 0; ;)
3795         {
3796           int mbs_cnt, found = 0;
3797           int32_t elem_mbs_len;
3798           /* Skip the name of collating element name.  */
3799           idx = idx + extra[idx] + 1;
3800           elem_mbs_len = extra[idx++];
3801           if (mbs_len == elem_mbs_len)
3802             {
3803               for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
3804                 if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
3805                   break;
3806               if (mbs_cnt == elem_mbs_len)
3807                 /* Found the entry.  */
3808                 found = 1;
3809             }
3810           /* Skip the byte sequence of the collating element.  */
3811           idx += elem_mbs_len;
3812           /* Adjust for the alignment.  */
3813           idx = (idx + 3) & ~3;
3814           /* Skip the collation sequence value.  */
3815           idx += sizeof (uint32_t);
3816           /* Skip the wide char sequence of the collating element.  */
3817           idx = idx + sizeof (uint32_t) * (extra[idx] + 1);
3818           /* If we found the entry, return the sequence value.  */
3819           if (found)
3820             return *(uint32_t *) (extra + idx);
3821           /* Skip the collation sequence value.  */
3822           idx += sizeof (uint32_t);
3823         }
3824     }
3825 }
3826 # endif /* _LIBC */
3827 #endif /* RE_ENABLE_I18N */
3828
3829 /* Check whether the node accepts the byte which is IDX-th
3830    byte of the INPUT.  */
3831
3832 static int
3833 check_node_accept (preg, node, mctx, idx)
3834     const regex_t *preg;
3835     const re_token_t *node;
3836     const re_match_context_t *mctx;
3837     int idx;
3838 {
3839   unsigned char ch;
3840   if (node->constraint)
3841     {
3842       /* The node has constraints.  Check whether the current context
3843          satisfies the constraints.  */
3844       unsigned int context = re_string_context_at (mctx->input, idx,
3845                                                    mctx->eflags,
3846                                                    preg->newline_anchor);
3847       if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
3848         return 0;
3849     }
3850   ch = re_string_byte_at (mctx->input, idx);
3851   switch (node->type)
3852     {
3853     case CHARACTER:
3854       return node->opr.c == ch;
3855     case SIMPLE_BRACKET:
3856       return bitset_contain (node->opr.sbcset, ch);
3857 #ifdef RE_ENABLE_I18N
3858     case OP_UTF8_PERIOD:
3859       if (ch >= 0x80)
3860         return 0;
3861       /* FALLTHROUGH */
3862 #endif
3863     case OP_PERIOD:
3864       return !((ch == '\n' && !(preg->syntax & RE_DOT_NEWLINE))
3865                || (ch == '\0' && (preg->syntax & RE_DOT_NOT_NULL)));
3866     default:
3867       return 0;
3868     }
3869 }
3870
3871 /* Extend the buffers, if the buffers have run out.  */
3872
3873 static reg_errcode_t
3874 extend_buffers (mctx)
3875      re_match_context_t *mctx;
3876 {
3877   reg_errcode_t ret;
3878   re_string_t *pstr = mctx->input;
3879
3880   /* Double the lengthes of the buffers.  */
3881   ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
3882   if (BE (ret != REG_NOERROR, 0))
3883     return ret;
3884
3885   if (mctx->state_log != NULL)
3886     {
3887       /* And double the length of state_log.  */
3888       /* XXX We have no indication of the size of this buffer.  If this
3889          allocation fail we have no indication that the state_log array
3890          does not have the right size.  */
3891       re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
3892                                               pstr->bufs_len + 1);
3893       if (BE (new_array == NULL, 0))
3894         return REG_ESPACE;
3895       mctx->state_log = new_array;
3896     }
3897
3898   /* Then reconstruct the buffers.  */
3899   if (pstr->icase)
3900     {
3901 #ifdef RE_ENABLE_I18N
3902       if (pstr->mb_cur_max > 1)
3903         {
3904           ret = build_wcs_upper_buffer (pstr);
3905           if (BE (ret != REG_NOERROR, 0))
3906             return ret;
3907         }
3908       else
3909 #endif /* RE_ENABLE_I18N  */
3910         build_upper_buffer (pstr);
3911     }
3912   else
3913     {
3914 #ifdef RE_ENABLE_I18N
3915       if (pstr->mb_cur_max > 1)
3916         build_wcs_buffer (pstr);
3917       else
3918 #endif /* RE_ENABLE_I18N  */
3919         {
3920           if (pstr->trans != NULL)
3921             re_string_translate_buffer (pstr);
3922         }
3923     }
3924   return REG_NOERROR;
3925 }
3926
3927 \f
3928 /* Functions for matching context.  */
3929
3930 /* Initialize MCTX.  */
3931
3932 static reg_errcode_t
3933 match_ctx_init (mctx, eflags, input, n)
3934     re_match_context_t *mctx;
3935     int eflags, n;
3936     re_string_t *input;
3937 {
3938   mctx->eflags = eflags;
3939   mctx->input = input;
3940   mctx->match_last = -1;
3941   if (n > 0)
3942     {
3943       mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
3944       mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
3945       if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))
3946         return REG_ESPACE;
3947     }
3948   else
3949     mctx->bkref_ents = NULL;
3950   mctx->nbkref_ents = 0;
3951   mctx->abkref_ents = n;
3952   mctx->max_mb_elem_len = 1;
3953   mctx->nsub_tops = 0;
3954   mctx->asub_tops = n;
3955   return REG_NOERROR;
3956 }
3957
3958 /* Clean the entries which depend on the current input in MCTX.
3959    This function must be invoked when the matcher changes the start index
3960    of the input, or changes the input string.  */
3961
3962 static void
3963 match_ctx_clean (mctx)
3964     re_match_context_t *mctx;
3965 {
3966   match_ctx_free_subtops (mctx);
3967   mctx->nsub_tops = 0;
3968   mctx->nbkref_ents = 0;
3969 }
3970
3971 /* Free all the memory associated with MCTX.  */
3972
3973 static void
3974 match_ctx_free (mctx)
3975     re_match_context_t *mctx;
3976 {
3977   match_ctx_free_subtops (mctx);
3978   re_free (mctx->sub_tops);
3979   re_free (mctx->bkref_ents);
3980 }
3981
3982 /* Free all the memory associated with MCTX->SUB_TOPS.  */
3983
3984 static void
3985 match_ctx_free_subtops (mctx)
3986      re_match_context_t *mctx;
3987 {
3988   int st_idx;
3989   for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
3990     {
3991       int sl_idx;
3992       re_sub_match_top_t *top = mctx->sub_tops[st_idx];
3993       for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
3994         {
3995           re_sub_match_last_t *last = top->lasts[sl_idx];
3996           re_free (last->path.array);
3997           re_free (last);
3998         }
3999       re_free (top->lasts);
4000       if (top->path)
4001         {
4002           re_free (top->path->array);
4003           re_free (top->path);
4004         }
4005       free (top);
4006     }
4007 }
4008
4009 /* Add a new backreference entry to MCTX.
4010    Note that we assume that caller never call this function with duplicate
4011    entry, and call with STR_IDX which isn't smaller than any existing entry.
4012 */
4013
4014 static reg_errcode_t
4015 match_ctx_add_entry (mctx, node, str_idx, from, to)
4016      re_match_context_t *mctx;
4017      int node, str_idx, from, to;
4018 {
4019   if (mctx->nbkref_ents >= mctx->abkref_ents)
4020     {
4021       struct re_backref_cache_entry* new_entry;
4022       new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
4023                               mctx->abkref_ents * 2);
4024       if (BE (new_entry == NULL, 0))
4025         {
4026           re_free (mctx->bkref_ents);
4027           return REG_ESPACE;
4028         }
4029       mctx->bkref_ents = new_entry;
4030       memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
4031               sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
4032       mctx->abkref_ents *= 2;
4033     }
4034   mctx->bkref_ents[mctx->nbkref_ents].node = node;
4035   mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
4036   mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
4037   mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
4038   mctx->bkref_ents[mctx->nbkref_ents++].flag = 0;
4039   if (mctx->max_mb_elem_len < to - from)
4040     mctx->max_mb_elem_len = to - from;
4041   return REG_NOERROR;
4042 }
4043
4044 /* Search for the first entry which has the same str_idx.
4045    Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX.  */
4046
4047 static int
4048 search_cur_bkref_entry (mctx, str_idx)
4049      re_match_context_t *mctx;
4050      int str_idx;
4051 {
4052   int left, right, mid;
4053   right = mctx->nbkref_ents;
4054   for (left = 0; left < right;)
4055     {
4056       mid = (left + right) / 2;
4057       if (mctx->bkref_ents[mid].str_idx < str_idx)
4058         left = mid + 1;
4059       else
4060         right = mid;
4061     }
4062   return left;
4063 }
4064
4065 static void
4066 match_ctx_clear_flag (mctx)
4067      re_match_context_t *mctx;
4068 {
4069   int i;
4070   for (i = 0; i < mctx->nbkref_ents; ++i)
4071     mctx->bkref_ents[i].flag = 0;
4072 }
4073
4074 /* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
4075    at STR_IDX.  */
4076
4077 static reg_errcode_t
4078 match_ctx_add_subtop (mctx, node, str_idx)
4079      re_match_context_t *mctx;
4080      int node, str_idx;
4081 {
4082 #ifdef DEBUG
4083   assert (mctx->sub_tops != NULL);
4084   assert (mctx->asub_tops > 0);
4085 #endif
4086   if (BE (mctx->nsub_tops == mctx->asub_tops, 0))
4087     {
4088       int new_asub_tops = mctx->asub_tops * 2;
4089       re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
4090                                                    re_sub_match_top_t *,
4091                                                    new_asub_tops);
4092       if (BE (new_array == NULL, 0))
4093         return REG_ESPACE;
4094       mctx->sub_tops = new_array;
4095       mctx->asub_tops = new_asub_tops;
4096     }
4097   mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
4098   if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0))
4099     return REG_ESPACE;
4100   mctx->sub_tops[mctx->nsub_tops]->node = node;
4101   mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
4102   return REG_NOERROR;
4103 }
4104
4105 /* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
4106    at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP.  */
4107
4108 static re_sub_match_last_t *
4109 match_ctx_add_sublast (subtop, node, str_idx)
4110      re_sub_match_top_t *subtop;
4111      int node, str_idx;
4112 {
4113   re_sub_match_last_t *new_entry;
4114   if (BE (subtop->nlasts == subtop->alasts, 0))
4115     {
4116       int new_alasts = 2 * subtop->alasts + 1;
4117       re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
4118                                                     re_sub_match_last_t *,
4119                                                     new_alasts);
4120       if (BE (new_array == NULL, 0))
4121         return NULL;
4122       subtop->lasts = new_array;
4123       subtop->alasts = new_alasts;
4124     }
4125   new_entry = calloc (1, sizeof (re_sub_match_last_t));
4126   if (BE (new_entry != NULL, 1))
4127     {
4128       subtop->lasts[subtop->nlasts] = new_entry;
4129       new_entry->node = node;
4130       new_entry->str_idx = str_idx;
4131       ++subtop->nlasts;
4132     }
4133   return new_entry;
4134 }
4135
4136 static void
4137 sift_ctx_init (sctx, sifted_sts, limited_sts, last_node, last_str_idx,
4138                check_subexp)
4139     re_sift_context_t *sctx;
4140     re_dfastate_t **sifted_sts, **limited_sts;
4141     int last_node, last_str_idx, check_subexp;
4142 {
4143   sctx->sifted_states = sifted_sts;
4144   sctx->limited_states = limited_sts;
4145   sctx->last_node = last_node;
4146   sctx->last_str_idx = last_str_idx;
4147   sctx->check_subexp = check_subexp;
4148   sctx->cur_bkref = -1;
4149   sctx->cls_subexp_idx = -1;
4150   re_node_set_init_empty (&sctx->limits);
4151 }