]> git.ipfire.org Git - thirdparty/vim.git/commitdiff
updated for version 7.3.1029 v7.3.1029
authorBram Moolenaar <Bram@vim.org>
Sun, 26 May 2013 20:56:19 +0000 (22:56 +0200)
committerBram Moolenaar <Bram@vim.org>
Sun, 26 May 2013 20:56:19 +0000 (22:56 +0200)
Problem:    New regexp performance: Unused position state being copied.
Solution:   Keep track of which positions are actually valid.

src/regexp_nfa.c
src/version.c

index ca37b1073be6efd5178b69e3607b16d67d14b7e4..7eba2c69bae9d84a0f55dacd416a8d7e2a2d3a2e 100644 (file)
@@ -1649,22 +1649,6 @@ nfa_reg(paren)
     return OK;
 }
 
-typedef union
-{
-    struct multipos
-    {
-       lpos_T  start;
-       lpos_T  end;
-    } multilist[NSUBEXP];
-    struct linepos
-    {
-       char_u  *start;
-       char_u  *end;
-    } linelist[NSUBEXP];
-} regsub_T;
-
-static int nfa_regmatch __ARGS((nfa_state_T *start, regsub_T *submatch, regsub_T *m));
-
 #ifdef DEBUG
 static char_u code[50];
 
@@ -2489,6 +2473,26 @@ theend:
  * NFA execution code.
  ****************************************************************/
 
+typedef struct
+{
+    int            in_use; /* number of subexpr with useful info */
+
+    /* When REG_MULTI is TRUE multilist is used, otherwise linelist. */
+    union
+    {
+       struct multipos
+       {
+           lpos_T      start;
+           lpos_T      end;
+       } multilist[NSUBEXP];
+       struct linepos
+       {
+           char_u      *start;
+           char_u      *end;
+       } linelist[NSUBEXP];
+    };
+} regsub_T;
+
 /* nfa_thread_T contains execution information of a NFA state */
 typedef struct
 {
@@ -2507,7 +2511,6 @@ typedef struct
 static int nfa_match;
 
 static void addstate __ARGS((nfa_list_T *l, nfa_state_T *state, regsub_T *m, int off, int lid));
-
 static void addstate_here __ARGS((nfa_list_T *l, nfa_state_T *state, regsub_T *m, int lid, int *ip));
 
     static void
@@ -2521,7 +2524,9 @@ addstate(l, state, m, off, lid)
     int                        subidx;
     nfa_thread_T       *lastthread;
     lpos_T             save_lpos;
+    int                        save_in_use;
     char_u             *save_ptr;
+    int                        i;
 
     if (l == NULL || state == NULL)
        return;
@@ -2557,16 +2562,19 @@ addstate(l, state, m, off, lid)
                state->lastlist = lid;
                lastthread = &l->t[l->n++];
                lastthread->state = state;
-
-               /* Copy the match start and end positions. */
-               if (REG_MULTI)
-                   mch_memmove(&lastthread->sub.multilist[0],
-                               &m->multilist[0],
-                               sizeof(struct multipos) * nfa_nsubexpr);
-               else
-                   mch_memmove(&lastthread->sub.linelist[0],
-                               &m->linelist[0],
-                               sizeof(struct linepos) * nfa_nsubexpr);
+               lastthread->sub.in_use = m->in_use;
+               if (m->in_use > 0)
+               {
+                   /* Copy the match start and end positions. */
+                   if (REG_MULTI)
+                       mch_memmove(&lastthread->sub.multilist[0],
+                                   &m->multilist[0],
+                                   sizeof(struct multipos) * m->in_use);
+                   else
+                       mch_memmove(&lastthread->sub.linelist[0],
+                                   &m->linelist[0],
+                                   sizeof(struct linepos) * m->in_use);
+               }
            }
     }
 
@@ -2636,9 +2644,25 @@ addstate(l, state, m, off, lid)
            else
                subidx = state->c - NFA_MOPEN;
 
+           /* Set the position (with "off") in the subexpression.  Save and
+            * restore it when it was in use.  Otherwise fill any gap. */
            if (REG_MULTI)
            {
-               save_lpos = m->multilist[subidx].start;
+               if (subidx < m->in_use)
+               {
+                   save_lpos = m->multilist[subidx].start;
+                   save_in_use = -1;
+               }
+               else
+               {
+                   save_in_use = m->in_use;
+                   for (i = m->in_use; i < subidx; ++i)
+                   {
+                       m->multilist[i].start.lnum = -1;
+                       m->multilist[i].end.lnum = -1;
+                   }
+                   m->in_use = subidx + 1;
+               }
                if (off == -1)
                {
                    m->multilist[subidx].start.lnum = reglnum + 1;
@@ -2653,16 +2677,35 @@ addstate(l, state, m, off, lid)
            }
            else
            {
-               save_ptr = m->linelist[subidx].start;
+               if (subidx < m->in_use)
+               {
+                   save_ptr = m->linelist[subidx].start;
+                   save_in_use = -1;
+               }
+               else
+               {
+                   save_in_use = m->in_use;
+                   for (i = m->in_use; i < subidx; ++i)
+                   {
+                       m->linelist[i].start = NULL;
+                       m->linelist[i].end = NULL;
+                   }
+                   m->in_use = subidx + 1;
+               }
                m->linelist[subidx].start = reginput + off;
            }
 
            addstate(l, state->out, m, off, lid);
 
-           if (REG_MULTI)
-               m->multilist[subidx].start = save_lpos;
+           if (save_in_use == -1)
+           {
+               if (REG_MULTI)
+                   m->multilist[subidx].start = save_lpos;
+               else
+                   m->linelist[subidx].start = save_ptr;
+           }
            else
-               m->linelist[subidx].start = save_ptr;
+               m->in_use = save_in_use;
            break;
 
        case NFA_MCLOSE + 0:
@@ -2686,6 +2729,11 @@ addstate(l, state, m, off, lid)
            else
                subidx = state->c - NFA_MCLOSE;
 
+           /* We don't fill in gaps here, there must have been an MOPEN that
+            * has done that. */
+           save_in_use = m->in_use;
+           if (m->in_use <= subidx)
+               m->in_use = subidx + 1;
            if (REG_MULTI)
            {
                save_lpos = m->multilist[subidx].end;
@@ -2713,6 +2761,7 @@ addstate(l, state, m, off, lid)
                m->multilist[subidx].end = save_lpos;
            else
                m->linelist[subidx].end = save_ptr;
+           m->in_use = save_in_use;
            break;
     }
 }
@@ -2917,6 +2966,8 @@ nfa_restore_listids(start, list)
     }
 }
 
+static int nfa_regmatch __ARGS((nfa_state_T *start, regsub_T *submatch, regsub_T *m));
+
 /*
  * Main matching routine.
  *
@@ -2960,7 +3011,7 @@ nfa_regmatch(start, submatch, m)
 #endif
     nfa_match = FALSE;
 
-    /* Allocate memory for the lists of nodes */
+    /* Allocate memory for the lists of nodes. */
     size = (nstate + 1) * sizeof(nfa_thread_T);
     list[0].t = (nfa_thread_T *)lalloc(size, TRUE);
     list[1].t = (nfa_thread_T *)lalloc(size, TRUE);
@@ -3099,7 +3150,19 @@ nfa_regmatch(start, submatch, m)
            {
            case NFA_MATCH:
                nfa_match = TRUE;
-               *submatch = t->sub;
+               submatch->in_use = t->sub.in_use;
+               if (REG_MULTI)
+                   for (j = 0; j < submatch->in_use; j++)
+                   {
+                       submatch->multilist[j].start = t->sub.multilist[j].start;
+                       submatch->multilist[j].end = t->sub.multilist[j].end;
+                   }
+               else
+                   for (j = 0; j < submatch->in_use; j++)
+                   {
+                       submatch->linelist[j].start = t->sub.linelist[j].start;
+                       submatch->linelist[j].end = t->sub.linelist[j].end;
+                   }
 #ifdef ENABLE_LOG
                for (j = 0; j < 4; j++)
                    if (REG_MULTI)
@@ -3194,17 +3257,19 @@ nfa_regmatch(start, submatch, m)
                    reglnum = old_reglnum;
                    /* Copy submatch info from the recursive call */
                    if (REG_MULTI)
-                       for (j = 1; j < nfa_nsubexpr; j++)
+                       for (j = 1; j < m->in_use; j++)
                        {
                            t->sub.multilist[j].start = m->multilist[j].start;
                            t->sub.multilist[j].end = m->multilist[j].end;
                        }
                    else
-                       for (j = 1; j < nfa_nsubexpr; j++)
+                       for (j = 1; j < m->in_use; j++)
                        {
                            t->sub.linelist[j].start = m->linelist[j].start;
                            t->sub.linelist[j].end = m->linelist[j].end;
                        }
+                   t->sub.in_use = m->in_use;
+
                    /* t->state->out1 is the corresponding END_INVISIBLE node */
                    addstate_here(thislist, t->state->out1->out, &t->sub,
                                                            listid, &listidx);
@@ -3703,6 +3768,8 @@ nfa_regtry(start, col)
        vim_memset(sub.linelist, 0, sizeof(struct linepos) * nfa_nsubexpr);
        vim_memset(m.linelist, 0, sizeof(struct linepos) * nfa_nsubexpr);
     }
+    sub.in_use = 0;
+    m.in_use = 0;
 
     if (nfa_regmatch(start, &sub, &m) == FALSE)
        return 0;
@@ -3710,7 +3777,7 @@ nfa_regtry(start, col)
     cleanup_subexpr();
     if (REG_MULTI)
     {
-       for (i = 0; i < nfa_nsubexpr; i++)
+       for (i = 0; i < sub.in_use; i++)
        {
            reg_startpos[i] = sub.multilist[i].start;
            reg_endpos[i] = sub.multilist[i].end;
@@ -3732,7 +3799,7 @@ nfa_regtry(start, col)
     }
     else
     {
-       for (i = 0; i < nfa_nsubexpr; i++)
+       for (i = 0; i < sub.in_use; i++)
        {
            reg_startp[i] = sub.linelist[i].start;
            reg_endp[i] = sub.linelist[i].end;
index 2c0b4f4f56d620bad76777c925dab2434a639004..3eb2d05600f9a202f2a4799b6a96f5e2cc80684d 100644 (file)
@@ -728,6 +728,8 @@ static char *(features[]) =
 
 static int included_patches[] =
 {   /* Add new patch number below this line */
+/**/
+    1029,
 /**/
     1028,
 /**/