]> git.ipfire.org Git - thirdparty/rrdtool-1.x.git/commitdiff
added PREDICTPERC function
authorMartin Sperl <kernel@martin.sperl.org>
Tue, 6 May 2014 20:12:44 +0000 (20:12 +0000)
committerMartin Sperl <kernel@martin.sperl.org>
Tue, 6 May 2014 20:12:44 +0000 (20:12 +0000)
there is one still open question: should we interpolate to get the final value?
Say: if we have 8 values, and we want thd 95th percventile, then
we should actually take the 6.65th 0-based index value (=95/100*(8-1)).
with the current implementation we round, so we return actually the 100th percentile.
what we could also do is:
value=val[floor(idx)]+(idx-floor(idx))*(val[floor(idx)+1]-val[floor(idx)])
(besides some boundry checking for the explicit percentile 100)

this should get decided prior to final merging

doc/rrdgraph_rpn.pod
src/rrd_graph.c
src/rrd_rpncalc.c
src/rrd_rpncalc.h
src/rrd_update.c

index 733cfaa422e0b04dfcaf626bf21e610e778d77b7..80b427b98ed64c7d5b55fdea15a5141ca6f153bf 100644 (file)
@@ -202,18 +202,20 @@ source value is NAN the complete sliding window is affected. The TRENDNAN
 operation ignores all NAN-values in a sliding window and computes the 
 average of the remaining values.
 
-B<PREDICT, PREDICTSIGMA>
+B<PREDICT, PREDICTSIGMA, PREDICTPERC>
 
-Create a "sliding window" average/sigma of another data series, that also
-shifts the data series by given amounts of of time as well
+Create a "sliding window" average/sigma/percentil of another data series, 
+that also shifts the data series by given amounts of of time as well
 
 Usage - explicit stating shifts:
 CDEF:predict=<shift n>,...,<shift 1>,n,<window>,x,PREDICT
 CDEF:sigma=<shift n>,...,<shift 1>,n,<window>,x,PREDICTSIGMA
+CDEF:perc=<shift n>,...,<shift 1>,n,<window>,<percentil>,x,PREDICTPERC
 
 Usage - shifts defined as a base shift and a number of time this is applied
 CDEF:predict=<shift multiplier>,-n,<window>,x,PREDICT
 CDEF:sigma=<shift multiplier>,-n,<window>,x,PREDICTSIGMA
+CDEF:sigma=<shift multiplier>,-n,<window>,<percentil>,x,PREDICTPERC
 
 Example:
 CDEF:predict=172800,86400,2,1800,x,PREDICT
@@ -267,13 +269,18 @@ rrdtool graph image.png --imgformat=PNG \
  LINE1:upper#0000ff:upper\ certainty\ limit \
  LINE1:lower#0000ff:lower\ certainty\ limit \
  CDEF:exceeds=value,UN,0,value,lower,upper,LIMIT,UN,IF \
- TICK:exceeds#aa000080:1
+ TICK:exceeds#aa000080:1 \
+ CDEF:perc95=86400,-7,1800,95,value,PREDICTPERC \
+ LINE1:perc95#ffff00:95th_percentile
 
 Note: Experience has shown that a factor between 3 and 5 to scale sigma is a good 
 discriminator to detect abnormal behavior. This obviously depends also on the type 
 of data and how "noisy" the data series is.
 
-This prediction can only be used for short term extrapolations - say a few days into the future-
+Also Note the explicit use of start= in the CDEF - this is necessary to load all
+the necessary data (even if it is not displayed)
+
+This prediction can only be used for short term extrapolations - say a few days into the future.
 
 =item Special values
 
index ac2f3b8a8d7a4794731f90ff111b576da3224f63..b3be4d96ca7334c4619dc9f85d3ff45c1b358d9a 100644 (file)
@@ -997,6 +997,7 @@ long lcd(
     return num[i];
 }
 
+
 /* run the rpn calculator on all the VDEF and CDEF arguments */
 int data_calc(
     image_desc_t *im)
@@ -1008,6 +1009,7 @@ int data_calc(
     int       stepcnt;
     time_t    now;
     rpnstack_t rpnstack;
+    rpnp_t   *rpnp;
 
     rpnstack_init(&rpnstack);
 
@@ -1061,6 +1063,7 @@ int data_calc(
             steparray = NULL;
             stepcnt = 0;
             dataidx = -1;
+           rpnp = im->gdes[gdi].rpnp;
 
             /* Find the variables in the expression.
              * - VDEF variables are substituted by their values
@@ -1173,7 +1176,6 @@ int data_calc(
              */
             for (now = im->gdes[gdi].start + im->gdes[gdi].step;
                  now <= im->gdes[gdi].end; now += im->gdes[gdi].step) {
-                rpnp_t   *rpnp = im->gdes[gdi].rpnp;
 
                 /* 3rd arg of rpn_calc is for OP_VARIABLE lookups;
                  * in this case we are advancing by timesteps;
@@ -1183,9 +1185,12 @@ int data_calc(
                              im->gdes[gdi].data, ++dataidx) == -1) {
                     /* rpn_calc sets the error string */
                     rpnstack_free(&rpnstack);
+                   rpnp_freeextra(rpnp);
                     return -1;
                 }
             }           /* enumerate over time steps within a CDEF */
+           rpnp_freeextra(rpnp);
+           
             break;
         default:
             continue;
index aba6042fe10462c41687100331da99dbd7978881..80522717200d9d95177e3dad9d3f20b9bd9365aa 100644 (file)
@@ -78,6 +78,8 @@ rpnp_t   *rpn_expand(
     }
     for (i = 0; rpnc[i].op != OP_END; ++i) {
         rpnp[i].op = (enum op_en)rpnc[i].op;
+       rpnp[i].extra = NULL;
+       rpnp[i].free_extra = NULL;
         if (rpnp[i].op == OP_NUMBER) {
             rpnp[i].val = (double) rpnc[i].val;
         } else if (rpnp[i].op == OP_VARIABLE || rpnp[i].op == OP_PREV_OTHER) {
@@ -180,6 +182,7 @@ void rpn_compact2str(
             add_op(OP_TRENDNAN, TRENDNAN)
             add_op(OP_PREDICT, PREDICT)
             add_op(OP_PREDICTSIGMA, PREDICTSIGMA)
+            add_op(OP_PREDICTPERC, PREDICTPERC)
             add_op(OP_RAD2DEG, RAD2DEG)
             add_op(OP_DEG2RAD, DEG2RAD)
             add_op(OP_AVG, AVG)
@@ -241,9 +244,10 @@ void parseCDEF_DS(const char *def,
         if (rpnp[i].op == OP_TIME || rpnp[i].op == OP_LTIME ||
             rpnp[i].op == OP_PREV || rpnp[i].op == OP_COUNT ||
             rpnp[i].op == OP_TREND || rpnp[i].op == OP_TRENDNAN ||
-            rpnp[i].op == OP_PREDICT || rpnp[i].op ==  OP_PREDICTSIGMA ) {
+            rpnp[i].op == OP_PREDICT || rpnp[i].op ==  OP_PREDICTSIGMA ||
+            rpnp[i].op == OP_PREDICTPERC ) {
             rrd_set_error
-                ("operators TIME, LTIME, PREV COUNT TREND TRENDNAN PREDICT PREDICTSIGMA are not supported with DS COMPUTE");
+                ("operators TIME, LTIME, PREV COUNT TREND TRENDNAN PREDICT PREDICTSIGMA PREDICTPERC are not supported with DS COMPUTE");
             free(rpnp);
             return;
         }
@@ -385,6 +389,7 @@ rpnp_t   *rpn_parse(
             match_op(OP_TRENDNAN, TRENDNAN)
             match_op(OP_PREDICT, PREDICT)
             match_op(OP_PREDICTSIGMA, PREDICTSIGMA)
+            match_op(OP_PREDICTPERC, PREDICTPERC)
             match_op(OP_RAD2DEG, RAD2DEG)
             match_op(OP_DEG2RAD, DEG2RAD)
             match_op(OP_AVG, AVG)
@@ -409,6 +414,9 @@ rpnp_t   *rpn_parse(
             return NULL;
         }
 
+       rpnp[steps].extra = NULL;
+       rpnp[steps].free_extra = NULL;
+
         if (*expr == 0)
             break;
         if (*expr == ',')
@@ -435,11 +443,28 @@ void rpnstack_init(
 void rpnstack_free(
     rpnstack_t *rpnstack)
 {
-    if (rpnstack->s != NULL)
-        free(rpnstack->s);
+    free(rpnstack->s);
     rpnstack->dc_stacksize = 0;
 }
 
+void rpnp_freeextra(rpnp_t* rpnp)
+{
+    int rpi;
+    if (!rpnp)
+      return;
+    /* process each op from the rpn in turn */
+    for (rpi = 0; rpnp[rpi].op != OP_END; rpi++) {
+        if (rpnp[rpi].extra) {
+           if (rpnp[rpi].free_extra) {
+               rpnp[rpi].free_extra(rpnp[rpi].extra);
+           } else {
+               free(rpnp[rpi].extra);
+           }
+           rpnp[rpi].extra = NULL;
+       }
+    }
+}
+
 static int rpn_compare_double(
     const void *x,
     const void *y)
@@ -823,12 +848,27 @@ short rpn_calc(
             break;
         case OP_PREDICT:
         case OP_PREDICTSIGMA:
-            stackunderflow(2);
+        case OP_PREDICTPERC:
            {
-               /* the local averaging window (similar to trend, but better here, as we get better statistics thru numbers)*/
+               /* the percentile requested */
+               double  percentile = DNAN;
+               if (rpnp[rpi].op == OP_PREDICTPERC) {
+                   stackunderflow(1);
+                   percentile = rpnstack->s[--stptr];
+                   if ((percentile<0) || (percentile > 100)) {
+                       rrd_set_error("unsupported percentile: %f",percentile);
+                       return -1;
+                   }
+                   percentile/=100;
+               }
+               /* the local averaging window (similar to trend,
+                * but better here, as we get better statistics 
+                * thru numbers)*/
+               stackunderflow(2);
                int   locstepsize = rpnstack->s[--stptr];
                /* the number of shifts and range-checking*/
                int     shifts = rpnstack->s[--stptr];
+
                 stackunderflow(shifts);
                // handle negative shifts special
                if (shifts<0) {
@@ -839,9 +879,9 @@ short rpn_calc(
                /* the real calculation */
                double val=DNAN;
                /* the info on the datasource */
-               time_t  dsstep = (time_t) rpnp[rpi - 1].step;
-               int    dscount = rpnp[rpi - 1].ds_cnt;
-               int   locstep = (int)ceil((float)locstepsize/(float)dsstep);
+               time_t  dsstep  = (time_t) rpnp[rpi - 1].step;
+               int     dscount = rpnp[rpi - 1].ds_cnt;
+               int     locstep = (int)ceil((float)locstepsize/(float)dsstep);
 
                /* the sums */
                 double    sum = 0;
@@ -850,6 +890,16 @@ short rpn_calc(
                /* now loop for each position */
                int doshifts=shifts;
                if (shifts<0) { doshifts=-shifts; }
+               /* alloc memory */
+               double *extra = rpnp[rpi].extra;
+               if (rpnp[rpi].op == OP_PREDICTPERC) {
+                   if (! extra) {
+                     int size = (doshifts + 1) * (locstep + 2);
+                     rpnp[rpi].extra =
+                         extra =  malloc(sizeof(double) * size);
+                   }
+               }
+               /* loop the shifts */
                for(int loop=0;loop<doshifts;loop++) {
                    /* calculate shift step */
                    int shiftstep=1;
@@ -865,7 +915,8 @@ short rpn_calc(
                    shiftstep=(int)ceil((float)shiftstep/(float)dsstep);
                    /* loop all local shifts */
                    for(int i=0;i<=locstep;i++) {
-                       /* now calculate offset into data-array - relative to output_idx*/
+                       /* now calculate offset into data-array 
+                        * - relative to output_idx */
                        int offset=shiftstep+i;
                        /* and process if we have index 0 of above */
                        if ((offset>=0)&&(offset<output_idx)) {
@@ -875,6 +926,9 @@ short rpn_calc(
                            if (! isnan(val)) {
                                sum+=val;
                                sum2+=val*val;
+                               if (extra) {
+                                   extra[count]=val;
+                               }
                                count++;
                            }
                        }
@@ -882,11 +936,13 @@ short rpn_calc(
                }
                /* do the final calculations */
                val=DNAN;
-               if (rpnp[rpi].op == OP_PREDICT) {  /* the average */
+               switch (rpnp[rpi].op) {
+               case OP_PREDICT:
                    if (count>0) {
                        val = sum/(double)count;
                    } 
-               } else {
+                   break;
+               case OP_PREDICTSIGMA:
                    if (count>1) { /* the sigma case */
                        val=count*sum2-sum*sum;
                        if (val<0) {
@@ -895,6 +951,21 @@ short rpn_calc(
                            val=sqrt(val/((float)count*((float)count-1.0)));
                        }
                    }
+                   break;
+               case OP_PREDICTPERC:
+                   if ((count>0) && extra) {
+                       /* sort the numbers */
+                       qsort(extra,count,sizeof(double),rpn_compare_double);
+                       /* get the percentile selected */
+                       int idx=(int)round(percentile * ((float)count-1.0));
+                       /* maybe we should also do an interpolation between the 2
+                        * neighboring fields, similar to what we do with MEDIAN 
+                        */
+                       val = extra[idx];
+                   }
+                   break;
+               default: /* should not get here ... */
+                   break; 
                }
                rpnstack->s[stptr] = val;
            }
index 26b52cb4bfe1f5f3900186028562d038552b7a4f..2072bfc77134cec5c399b9b366fd00ff0e4140de 100644 (file)
@@ -21,7 +21,7 @@ enum op_en { OP_NUMBER = 0, OP_VARIABLE, OP_INF, OP_PREV, OP_NEGINF,
     OP_PREDICT,OP_PREDICTSIGMA,
     OP_AVG, OP_ABS, OP_ADDNAN,
     OP_MINNAN, OP_MAXNAN,
-    OP_MEDIAN
+    OP_MEDIAN, OP_PREDICTPERC
  };
 
 typedef struct rpnp_t {
@@ -31,8 +31,14 @@ typedef struct rpnp_t {
     double   *data;     /* pointer to the current value from OP_VAR DAS */
     long      ds_cnt;   /* data source count for data pointer */
     long      step;     /* time step for OP_VAR das */
+    void     *extra;    /* some extra data for longer setups */
+    void      (*free_extra)(void *); /* function pointer used to free extra 
+                                     * - NULL for "simple" free(extra); */
 } rpnp_t;
 
+void      rpnp_freeextra(
+    rpnp_t *rpnp);
+
 /* a compact representation of rpnp_t for computed data sources */
 typedef struct rpn_cdefds_t {
     char      op;       /* rpn operator type */
index efa0482450b377a252c78049a5a9b569cdd9a177..a26005e0e258d2eb17f615c20d928536b08816d0 100644 (file)
@@ -1713,13 +1713,18 @@ static int process_pdp_st(
                 rpnp[i].op = OP_NUMBER;
                 rpnp[i].val = pdp_temp[rpnp[i].ptr];
             }
+           /* just in case */
+           rpnp[i].extra = NULL;
+           rpnp[i].free_extra = NULL;
         }
         /* run the rpn calculator */
         if (rpn_calc(rpnp, &rpnstack, 0, pdp_temp, ds_idx) == -1) {
+           rpnp_freeextra(rpnp);
             free(rpnp);
             rpnstack_free(&rpnstack);
             return -1;
         }
+       rpnp_freeextra(rpnp);
         free(rpnp);
     }