added PREDICTPERC function

author Martin Sperl <kernel@martin.sperl.org>

Tue, 6 May 2014 20:12:44 +0000 (20:12 +0000)

committer Martin Sperl <kernel@martin.sperl.org>

Tue, 6 May 2014 20:12:44 +0000 (20:12 +0000)
author Martin Sperl <kernel@martin.sperl.org>
Tue, 6 May 2014 20:12:44 +0000 (20:12 +0000)
committer Martin Sperl <kernel@martin.sperl.org>
Tue, 6 May 2014 20:12:44 +0000 (20:12 +0000)
diff --git a/doc/rrdgraph_rpn.pod b/doc/rrdgraph_rpn.pod

index 733cfaa422e0b04dfcaf626bf21e610e778d77b7..80b427b98ed64c7d5b55fdea15a5141ca6f153bf 100644 (file)
--- a/doc/rrdgraph_rpn.pod
+++ b/doc/rrdgraph_rpn.pod
@@ -202,18 +202,20 @@ source value is NAN the complete sliding window is affected. The TRENDNAN
  operation ignores all NAN-values in a sliding window and computes the 
  average of the remaining values.
  
-B<PREDICT, PREDICTSIGMA>
+B<PREDICT, PREDICTSIGMA, PREDICTPERC>
  
-Create a "sliding window" average/sigma of another data series, that also
-shifts the data series by given amounts of of time as well
+Create a "sliding window" average/sigma/percentil of another data series, 
+that also shifts the data series by given amounts of of time as well
  
  Usage - explicit stating shifts:
  CDEF:predict=<shift n>,...,<shift 1>,n,<window>,x,PREDICT
  CDEF:sigma=<shift n>,...,<shift 1>,n,<window>,x,PREDICTSIGMA
+CDEF:perc=<shift n>,...,<shift 1>,n,<window>,<percentil>,x,PREDICTPERC
  
  Usage - shifts defined as a base shift and a number of time this is applied
  CDEF:predict=<shift multiplier>,-n,<window>,x,PREDICT
  CDEF:sigma=<shift multiplier>,-n,<window>,x,PREDICTSIGMA
+CDEF:sigma=<shift multiplier>,-n,<window>,<percentil>,x,PREDICTPERC
  
  Example:
  CDEF:predict=172800,86400,2,1800,x,PREDICT
@@ -267,13 +269,18 @@ rrdtool graph image.png --imgformat=PNG \
   LINE1:upper#0000ff:upper\ certainty\ limit \
   LINE1:lower#0000ff:lower\ certainty\ limit \
   CDEF:exceeds=value,UN,0,value,lower,upper,LIMIT,UN,IF \
- TICK:exceeds#aa000080:1
+ TICK:exceeds#aa000080:1 \
+ CDEF:perc95=86400,-7,1800,95,value,PREDICTPERC \
+ LINE1:perc95#ffff00:95th_percentile
  
  Note: Experience has shown that a factor between 3 and 5 to scale sigma is a good 
  discriminator to detect abnormal behavior. This obviously depends also on the type 
  of data and how "noisy" the data series is.
  
-This prediction can only be used for short term extrapolations - say a few days into the future-
+Also Note the explicit use of start= in the CDEF - this is necessary to load all
+the necessary data (even if it is not displayed)
+
+This prediction can only be used for short term extrapolations - say a few days into the future.
  
  =item Special values
  
diff --git a/src/rrd_graph.c b/src/rrd_graph.c

index ac2f3b8a8d7a4794731f90ff111b576da3224f63..b3be4d96ca7334c4619dc9f85d3ff45c1b358d9a 100644 (file)
--- a/src/rrd_graph.c
+++ b/src/rrd_graph.c
@@ -997,6 +997,7 @@ long lcd(
      return num[i];
  }
  
+
  /* run the rpn calculator on all the VDEF and CDEF arguments */
  int data_calc(
      image_desc_t *im)
@@ -1008,6 +1009,7 @@ int data_calc(
      int       stepcnt;
      time_t    now;
      rpnstack_t rpnstack;
+    rpnp_t   *rpnp;
  
      rpnstack_init(&rpnstack);
  
@@ -1061,6 +1063,7 @@ int data_calc(
              steparray = NULL;
              stepcnt = 0;
              dataidx = -1;
+           rpnp = im->gdes[gdi].rpnp;
  
              /* Find the variables in the expression.
               * - VDEF variables are substituted by their values
@@ -1173,7 +1176,6 @@ int data_calc(
               */
              for (now = im->gdes[gdi].start + im->gdes[gdi].step;
                   now <= im->gdes[gdi].end; now += im->gdes[gdi].step) {
-                rpnp_t   *rpnp = im->gdes[gdi].rpnp;
  
                  /* 3rd arg of rpn_calc is for OP_VARIABLE lookups;
                   * in this case we are advancing by timesteps;
@@ -1183,9 +1185,12 @@ int data_calc(
                               im->gdes[gdi].data, ++dataidx) == -1) {
                      /* rpn_calc sets the error string */
                      rpnstack_free(&rpnstack);
+                   rpnp_freeextra(rpnp);
                      return -1;
                  }
              }           /* enumerate over time steps within a CDEF */
+           rpnp_freeextra(rpnp);
+           
              break;
          default:
              continue;
diff --git a/src/rrd_rpncalc.c b/src/rrd_rpncalc.c

index aba6042fe10462c41687100331da99dbd7978881..80522717200d9d95177e3dad9d3f20b9bd9365aa 100644 (file)
--- a/src/rrd_rpncalc.c
+++ b/src/rrd_rpncalc.c
@@ -78,6 +78,8 @@ rpnp_t   *rpn_expand(
      }
      for (i = 0; rpnc[i].op != OP_END; ++i) {
          rpnp[i].op = (enum op_en)rpnc[i].op;
+       rpnp[i].extra = NULL;
+       rpnp[i].free_extra = NULL;
          if (rpnp[i].op == OP_NUMBER) {
              rpnp[i].val = (double) rpnc[i].val;
          } else if (rpnp[i].op == OP_VARIABLE || rpnp[i].op == OP_PREV_OTHER) {
@@ -180,6 +182,7 @@ void rpn_compact2str(
              add_op(OP_TRENDNAN, TRENDNAN)
              add_op(OP_PREDICT, PREDICT)
              add_op(OP_PREDICTSIGMA, PREDICTSIGMA)
+            add_op(OP_PREDICTPERC, PREDICTPERC)
              add_op(OP_RAD2DEG, RAD2DEG)
              add_op(OP_DEG2RAD, DEG2RAD)
              add_op(OP_AVG, AVG)
@@ -241,9 +244,10 @@ void parseCDEF_DS(const char *def,
          if (rpnp[i].op == OP_TIME || rpnp[i].op == OP_LTIME ||
              rpnp[i].op == OP_PREV || rpnp[i].op == OP_COUNT ||
              rpnp[i].op == OP_TREND || rpnp[i].op == OP_TRENDNAN ||
-            rpnp[i].op == OP_PREDICT || rpnp[i].op ==  OP_PREDICTSIGMA ) {
+            rpnp[i].op == OP_PREDICT || rpnp[i].op ==  OP_PREDICTSIGMA ||
+            rpnp[i].op == OP_PREDICTPERC ) {
              rrd_set_error
-                ("operators TIME, LTIME, PREV COUNT TREND TRENDNAN PREDICT PREDICTSIGMA are not supported with DS COMPUTE");
+                ("operators TIME, LTIME, PREV COUNT TREND TRENDNAN PREDICT PREDICTSIGMA PREDICTPERC are not supported with DS COMPUTE");
              free(rpnp);
              return;
          }
@@ -385,6 +389,7 @@ rpnp_t   *rpn_parse(
              match_op(OP_TRENDNAN, TRENDNAN)
              match_op(OP_PREDICT, PREDICT)
              match_op(OP_PREDICTSIGMA, PREDICTSIGMA)
+            match_op(OP_PREDICTPERC, PREDICTPERC)
              match_op(OP_RAD2DEG, RAD2DEG)
              match_op(OP_DEG2RAD, DEG2RAD)
              match_op(OP_AVG, AVG)
@@ -409,6 +414,9 @@ rpnp_t   *rpn_parse(
              return NULL;
          }
  
+       rpnp[steps].extra = NULL;
+       rpnp[steps].free_extra = NULL;
+
          if (*expr == 0)
              break;
          if (*expr == ',')
@@ -435,11 +443,28 @@ void rpnstack_init(
  void rpnstack_free(
      rpnstack_t *rpnstack)
  {
-    if (rpnstack->s != NULL)
-        free(rpnstack->s);
+    free(rpnstack->s);
      rpnstack->dc_stacksize = 0;
  }
  
+void rpnp_freeextra(rpnp_t* rpnp)
+{
+    int rpi;
+    if (!rpnp)
+      return;
+    /* process each op from the rpn in turn */
+    for (rpi = 0; rpnp[rpi].op != OP_END; rpi++) {
+        if (rpnp[rpi].extra) {
+           if (rpnp[rpi].free_extra) {
+               rpnp[rpi].free_extra(rpnp[rpi].extra);
+           } else {
+               free(rpnp[rpi].extra);
+           }
+           rpnp[rpi].extra = NULL;
+       }
+    }
+}
+
  static int rpn_compare_double(
      const void *x,
      const void *y)
@@ -823,12 +848,27 @@ short rpn_calc(
              break;
          case OP_PREDICT:
          case OP_PREDICTSIGMA:
-            stackunderflow(2);
+        case OP_PREDICTPERC:
             {
-               /* the local averaging window (similar to trend, but better here, as we get better statistics thru numbers)*/
+               /* the percentile requested */
+               double  percentile = DNAN;
+               if (rpnp[rpi].op == OP_PREDICTPERC) {
+                   stackunderflow(1);
+                   percentile = rpnstack->s[--stptr];
+                   if ((percentile<0) || (percentile > 100)) {
+                       rrd_set_error("unsupported percentile: %f",percentile);
+                       return -1;
+                   }
+                   percentile/=100;
+               }
+               /* the local averaging window (similar to trend,
+                * but better here, as we get better statistics 
+                * thru numbers)*/
+               stackunderflow(2);
                 int   locstepsize = rpnstack->s[--stptr];
                 /* the number of shifts and range-checking*/
                 int     shifts = rpnstack->s[--stptr];
+
                  stackunderflow(shifts);
                 // handle negative shifts special
                 if (shifts<0) {
@@ -839,9 +879,9 @@ short rpn_calc(
                 /* the real calculation */
                 double val=DNAN;
                 /* the info on the datasource */
-               time_t  dsstep = (time_t) rpnp[rpi - 1].step;
-               int    dscount = rpnp[rpi - 1].ds_cnt;
-               int   locstep = (int)ceil((float)locstepsize/(float)dsstep);
+               time_t  dsstep  = (time_t) rpnp[rpi - 1].step;
+               int     dscount = rpnp[rpi - 1].ds_cnt;
+               int     locstep = (int)ceil((float)locstepsize/(float)dsstep);
  
                 /* the sums */
                  double    sum = 0;
@@ -850,6 +890,16 @@ short rpn_calc(
                 /* now loop for each position */
                 int doshifts=shifts;
                 if (shifts<0) { doshifts=-shifts; }
+               /* alloc memory */
+               double *extra = rpnp[rpi].extra;
+               if (rpnp[rpi].op == OP_PREDICTPERC) {
+                   if (! extra) {
+                     int size = (doshifts + 1) * (locstep + 2);
+                     rpnp[rpi].extra =
+                         extra =  malloc(sizeof(double) * size);
+                   }
+               }
+               /* loop the shifts */
                 for(int loop=0;loop<doshifts;loop++) {
                     /* calculate shift step */
                     int shiftstep=1;
@@ -865,7 +915,8 @@ short rpn_calc(
                     shiftstep=(int)ceil((float)shiftstep/(float)dsstep);
                     /* loop all local shifts */
                     for(int i=0;i<=locstep;i++) {
-                       /* now calculate offset into data-array - relative to output_idx*/
+                       /* now calculate offset into data-array 
+                        * - relative to output_idx */
                         int offset=shiftstep+i;
                         /* and process if we have index 0 of above */
                         if ((offset>=0)&&(offset<output_idx)) {
@@ -875,6 +926,9 @@ short rpn_calc(
                             if (! isnan(val)) {
                                 sum+=val;
                                 sum2+=val*val;
+                               if (extra) {
+                                   extra[count]=val;
+                               }
                                 count++;
                             }
                         }
@@ -882,11 +936,13 @@ short rpn_calc(
                 }
                 /* do the final calculations */
                 val=DNAN;
-               if (rpnp[rpi].op == OP_PREDICT) {  /* the average */
+               switch (rpnp[rpi].op) {
+               case OP_PREDICT:
                     if (count>0) {
                         val = sum/(double)count;
                     } 
-               } else {
+                   break;
+               case OP_PREDICTSIGMA:
                     if (count>1) { /* the sigma case */
                         val=count*sum2-sum*sum;
                         if (val<0) {
@@ -895,6 +951,21 @@ short rpn_calc(
                             val=sqrt(val/((float)count*((float)count-1.0)));
                         }
                     }
+                   break;
+               case OP_PREDICTPERC:
+                   if ((count>0) && extra) {
+                       /* sort the numbers */
+                       qsort(extra,count,sizeof(double),rpn_compare_double);
+                       /* get the percentile selected */
+                       int idx=(int)round(percentile * ((float)count-1.0));
+                       /* maybe we should also do an interpolation between the 2
+                        * neighboring fields, similar to what we do with MEDIAN 
+                        */
+                       val = extra[idx];
+                   }
+                   break;
+               default: /* should not get here ... */
+                   break; 
                 }
                 rpnstack->s[stptr] = val;
             }
diff --git a/src/rrd_rpncalc.h b/src/rrd_rpncalc.h

index 26b52cb4bfe1f5f3900186028562d038552b7a4f..2072bfc77134cec5c399b9b366fd00ff0e4140de 100644 (file)
--- a/src/rrd_rpncalc.h
+++ b/src/rrd_rpncalc.h
@@ -21,7 +21,7 @@ enum op_en { OP_NUMBER = 0, OP_VARIABLE, OP_INF, OP_PREV, OP_NEGINF,
      OP_PREDICT,OP_PREDICTSIGMA,
      OP_AVG, OP_ABS, OP_ADDNAN,
      OP_MINNAN, OP_MAXNAN,
-    OP_MEDIAN
+    OP_MEDIAN, OP_PREDICTPERC
   };
  
  typedef struct rpnp_t {
@@ -31,8 +31,14 @@ typedef struct rpnp_t {
      double   *data;     /* pointer to the current value from OP_VAR DAS */
      long      ds_cnt;   /* data source count for data pointer */
      long      step;     /* time step for OP_VAR das */
+    void     *extra;    /* some extra data for longer setups */
+    void      (*free_extra)(void *); /* function pointer used to free extra 
+                                     * - NULL for "simple" free(extra); */
  } rpnp_t;
  
+void      rpnp_freeextra(
+    rpnp_t *rpnp);
+
  /* a compact representation of rpnp_t for computed data sources */
  typedef struct rpn_cdefds_t {
      char      op;       /* rpn operator type */
diff --git a/src/rrd_update.c b/src/rrd_update.c

index efa0482450b377a252c78049a5a9b569cdd9a177..a26005e0e258d2eb17f615c20d928536b08816d0 100644 (file)
--- a/src/rrd_update.c
+++ b/src/rrd_update.c
@@ -1713,13 +1713,18 @@ static int process_pdp_st(
                  rpnp[i].op = OP_NUMBER;
                  rpnp[i].val = pdp_temp[rpnp[i].ptr];
              }
+           /* just in case */
+           rpnp[i].extra = NULL;
+           rpnp[i].free_extra = NULL;
          }
          /* run the rpn calculator */
          if (rpn_calc(rpnp, &rpnstack, 0, pdp_temp, ds_idx) == -1) {
+           rpnp_freeextra(rpnp);
              free(rpnp);
              rpnstack_free(&rpnstack);
              return -1;
          }
+       rpnp_freeextra(rpnp);
          free(rpnp);
      }
author	Martin Sperl <kernel@martin.sperl.org>
	Tue, 6 May 2014 20:12:44 +0000 (20:12 +0000)
committer	Martin Sperl <kernel@martin.sperl.org>
	Tue, 6 May 2014 20:12:44 +0000 (20:12 +0000)
doc/rrdgraph_rpn.pod		patch \| blob \| blame \| history
src/rrd_graph.c		patch \| blob \| blame \| history
src/rrd_rpncalc.c		patch \| blob \| blame \| history
src/rrd_rpncalc.h		patch \| blob \| blame \| history
src/rrd_update.c		patch \| blob \| blame \| history