]> git.ipfire.org Git - thirdparty/vim.git/commitdiff
updated for version 7.3.1037 v7.3.1037
authorBram Moolenaar <Bram@vim.org>
Wed, 29 May 2013 16:45:11 +0000 (18:45 +0200)
committerBram Moolenaar <Bram@vim.org>
Wed, 29 May 2013 16:45:11 +0000 (18:45 +0200)
Problem:    Look-behind matching is very slow on long lines.
Solution:   Add a byte limit to how far back an attempt is made.

src/regexp.c
src/regexp_nfa.c
src/testdir/test64.in
src/testdir/test64.ok
src/version.c

index c7b25c2b2371f904301abfbd6daa614aaa03241f..f0b93256630c395b2bb0169bec4494c05808f31e 100644 (file)
@@ -701,6 +701,7 @@ static void regmbc __ARGS((int c));
 # define CASEMBC(x)
 #endif
 static void    reginsert __ARGS((int, char_u *));
+static void    reginsert_nr __ARGS((int op, long val, char_u *opnd));
 static void    reginsert_limits __ARGS((int, long, long, char_u *));
 static char_u  *re_put_long __ARGS((char_u *pr, long_u val));
 static int     read_limits __ARGS((long *, long *));
@@ -1781,7 +1782,9 @@ regpiece(flagp)
        case Magic('@'):
            {
                int     lop = END;
+               int     nr;
 
+               nr = getdecchrs();
                switch (no_Magic(getchr()))
                {
                    case '=': lop = MATCH; break;                 /* \@= */
@@ -1803,7 +1806,14 @@ regpiece(flagp)
                    *flagp |= HASLOOKBH;
                }
                regtail(ret, regnode(END)); /* operand ends */
-               reginsert(lop, ret);
+               if (lop == BEHIND || lop == NOBEHIND)
+               {
+                   if (nr < 0)
+                       nr = 0; /* no limit is same as zero limit */
+                   reginsert_nr(lop, nr, ret);
+               }
+               else
+                   reginsert(lop, ret);
                break;
            }
 
@@ -2778,6 +2788,38 @@ reginsert(op, opnd)
     *place = NUL;
 }
 
+/*
+ * Insert an operator in front of already-emitted operand.
+ * Add a number to the operator.
+ */
+    static void
+reginsert_nr(op, val, opnd)
+    int                op;
+    long       val;
+    char_u     *opnd;
+{
+    char_u     *src;
+    char_u     *dst;
+    char_u     *place;
+
+    if (regcode == JUST_CALC_SIZE)
+    {
+       regsize += 7;
+       return;
+    }
+    src = regcode;
+    regcode += 7;
+    dst = regcode;
+    while (src > opnd)
+       *--dst = *--src;
+
+    place = opnd;              /* Op node, where operand used to be. */
+    *place++ = op;
+    *place++ = NUL;
+    *place++ = NUL;
+    place = re_put_long(place, (long_u)val);
+}
+
 /*
  * Insert an operator in front of already-emitted operand.
  * The operator has the given limit values as operands.  Also set next pointer.
@@ -3182,7 +3224,7 @@ gethexchrs(maxinputlen)
 }
 
 /*
- * get and return the value of the decimal string immediately after the
+ * Get and return the value of the decimal string immediately after the
  * current position. Return -1 for invalid.  Consumes all digits.
  */
     static int
@@ -3200,6 +3242,7 @@ getdecchrs()
        nr *= 10;
        nr += c - '0';
        ++regparse;
+       curchr = -1; /* no longer valid */
     }
 
     if (i == 0)
@@ -5432,7 +5475,7 @@ regmatch(scan)
                /* save the position after the found match for next */
                reg_save(&(((regbehind_T *)rp) - 1)->save_after, &backpos);
 
-               /* start looking for a match with operand at the current
+               /* Start looking for a match with operand at the current
                 * position.  Go back one character until we find the
                 * result, hitting the start of the line or the previous
                 * line (for multi-line matching).
@@ -5444,7 +5487,7 @@ regmatch(scan)
                rp->rs_state = RS_BEHIND2;
 
                reg_restore(&rp->rs_un.regsave, &backpos);
-               scan = OPERAND(rp->rs_scan);
+               scan = OPERAND(rp->rs_scan) + 4;
            }
            break;
 
@@ -5472,9 +5515,12 @@ regmatch(scan)
            }
            else
            {
+               long limit;
+
                /* No match or a match that doesn't end where we want it: Go
                 * back one character.  May go to previous line once. */
                no = OK;
+               limit = OPERAND_MIN(rp->rs_scan);
                if (REG_MULTI)
                {
                    if (rp->rs_un.regsave.rs_u.pos.col == 0)
@@ -5493,27 +5539,41 @@ regmatch(scan)
                        }
                    }
                    else
+                   {
 #ifdef FEAT_MBYTE
-                   if (has_mbyte)
-                       rp->rs_un.regsave.rs_u.pos.col -=
-                           (*mb_head_off)(regline, regline
+                       if (has_mbyte)
+                           rp->rs_un.regsave.rs_u.pos.col -=
+                               (*mb_head_off)(regline, regline
                                    + rp->rs_un.regsave.rs_u.pos.col - 1) + 1;
-                   else
+                       else
 #endif
-                       --rp->rs_un.regsave.rs_u.pos.col;
+                           --rp->rs_un.regsave.rs_u.pos.col;
+                       if (limit > 0
+                               && ((rp->rs_un.regsave.rs_u.pos.lnum
+                                                   < behind_pos.rs_u.pos.lnum
+                                       ? (colnr_T)STRLEN(regline)
+                                       : behind_pos.rs_u.pos.col)
+                                   - rp->rs_un.regsave.rs_u.pos.col > limit))
+                           no = FAIL;
+                   }
                }
                else
                {
                    if (rp->rs_un.regsave.rs_u.ptr == regline)
                        no = FAIL;
                    else
-                       --rp->rs_un.regsave.rs_u.ptr;
+                   {
+                       mb_ptr_back(regline, rp->rs_un.regsave.rs_u.ptr);
+                       if (limit > 0 && (long)(behind_pos.rs_u.ptr
+                                    - rp->rs_un.regsave.rs_u.ptr) > limit)
+                           no = FAIL;
+                   }
                }
                if (no == OK)
                {
                    /* Advanced, prepare for finding match again. */
                    reg_restore(&rp->rs_un.regsave, &backpos);
-                   scan = OPERAND(rp->rs_scan);
+                   scan = OPERAND(rp->rs_scan) + 4;
                    if (status == RA_MATCH)
                    {
                        /* We did match, so subexpr may have been changed,
@@ -7773,7 +7833,7 @@ static int regexp_engine = 0;
 #ifdef DEBUG
 static char_u regname[][30] = {
                    "AUTOMATIC Regexp Engine",
-                   "BACKTACKING Regexp Engine",
+                   "BACKTRACKING Regexp Engine",
                    "NFA Regexp Engine"
                            };
 #endif
index 9fb863a4e6766c55d79d92b34d68a887e5240ea5..88c813d2e2f0bf82028ab15fbb44824b3417a19e 100644 (file)
@@ -1331,6 +1331,16 @@ nfa_regpiece()
                case '=':
                    EMIT(NFA_PREV_ATOM_NO_WIDTH);
                    break;
+               case '0':
+               case '1':
+               case '2':
+               case '3':
+               case '4':
+               case '5':
+               case '6':
+               case '7':
+               case '8':
+               case '9':
                case '!':
                case '<':
                case '>':
@@ -3817,7 +3827,9 @@ nfa_regmatch(start, submatch, m)
         * because recursive calls should only start in the first position.
         * Also don't start a match past the first line. */
        if (nfa_match == FALSE && start->c == NFA_MOPEN + 0
-                                                && reglnum == 0 && clen != 0)
+               && reglnum == 0 && clen != 0
+               && (ireg_maxcol == 0
+                             || (colnr_T)(reginput - regline) < ireg_maxcol))
        {
 #ifdef ENABLE_LOG
            fprintf(log_fd, "(---) STARTSTATE\n");
index a81cd0cfd2672bd23019f7bd960f8c90885a1717..667e194e16a284d7089c1974ea1ed6b46e34281d 100644 (file)
@@ -336,6 +336,14 @@ STARTTEST
 :"call add(tl, [2, '\(\i\+\) \1', 'xgoo goox', 'goo goo', 'goo'])
 :call add(tl, [2, '\(a\)\(b\)\(c\)\(dd\)\(e\)\(f\)\(g\)\(h\)\(i\)\1\2\3\4\5\6\7\8\9', 'xabcddefghiabcddefghix', 'abcddefghiabcddefghi', 'a', 'b', 'c', 'dd', 'e', 'f', 'g', 'h', 'i'])
 :"
+:"""" Look-behind with limit
+:call add(tl, [0, '<\@<=span.', 'xxspanxx<spanyyy', 'spany'])
+:call add(tl, [0, '<\@1<=span.', 'xxspanxx<spanyyy', 'spany'])
+:call add(tl, [0, '<\@2<=span.', 'xxspanxx<spanyyy', 'spany'])
+:call add(tl, [0, '\(<<\)\@<=span.', 'xxspanxxxx<spanxx<<spanyyy', 'spany', '<<'])
+:call add(tl, [0, '\(<<\)\@1<=span.', 'xxspanxxxx<spanxx<<spanyyy'])
+:call add(tl, [0, '\(<<\)\@2<=span.', 'xxspanxxxx<spanxx<<spanyyy', 'spany', '<<'])
+:"
 :"""" Run the tests
 :"
 :"
@@ -406,6 +414,12 @@ Go\ep:"
 y$Go\ep:"
 :"
 :"
+:" Check a pattern with a look beind crossing a line boundary
+/^Behind:
+/\(<\_[xy]\+\)\@3<=start
+:.yank
+Go\ep:"
+:"
 :/\%#=1^Results/,$wq! test.out
 ENDTEST
 
@@ -423,4 +437,12 @@ ghi
 xjk
 lmn
 
+Behind:
+asdfasd<yyy
+xxstart1
+asdfasd<yy
+xxxxstart2
+asdfasd<yy
+xxxstart3
+
 Results of test64:
index abad49ff8dcdd6e8aa509c7605f6da30210f83b6..3b53d2451e005769e3544a83477f1a1018612e59 100644 (file)
@@ -719,6 +719,18 @@ OK 2 - \(\i\+\) \1
 OK 0 - \(a\)\(b\)\(c\)\(dd\)\(e\)\(f\)\(g\)\(h\)\(i\)\1\2\3\4\5\6\7\8\9
 OK 1 - \(a\)\(b\)\(c\)\(dd\)\(e\)\(f\)\(g\)\(h\)\(i\)\1\2\3\4\5\6\7\8\9
 OK 2 - \(a\)\(b\)\(c\)\(dd\)\(e\)\(f\)\(g\)\(h\)\(i\)\1\2\3\4\5\6\7\8\9
+OK 0 - <\@<=span.
+OK 1 - <\@<=span.
+OK 0 - <\@1<=span.
+OK 1 - <\@1<=span.
+OK 0 - <\@2<=span.
+OK 1 - <\@2<=span.
+OK 0 - \(<<\)\@<=span.
+OK 1 - \(<<\)\@<=span.
+OK 0 - \(<<\)\@1<=span.
+OK 1 - \(<<\)\@1<=span.
+OK 0 - \(<<\)\@2<=span.
+OK 1 - \(<<\)\@2<=span.
 192.168.0.1
 192.168.0.1
 192.168.0.1
@@ -726,3 +738,5 @@ OK 2 - \(a\)\(b\)\(c\)\(dd\)\(e\)\(f\)\(g\)\(h\)\(i\)\1\2\3\4\5\6\7\8\9
 <T="5">Ta 5</Title>
 <T="7">Ac 7</Title>
 ghi
+
+xxxstart3
index 342465e7fdc24ac8c65a76d8e1d75e1a43e3baf9..1f741682d85b490ed2e53ef25b91224ebbf25f5d 100644 (file)
@@ -728,6 +728,8 @@ static char *(features[]) =
 
 static int included_patches[] =
 {   /* Add new patch number below this line */
+/**/
+    1037,
 /**/
     1036,
 /**/