From: Martin Sperl Date: Tue, 6 May 2014 20:12:44 +0000 (+0000) Subject: added PREDICTPERC function X-Git-Tag: v1.5.0-rc1~106^2~1 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=b76b9c7bacf6b0017e8e8990559b4c62079edc9a;p=thirdparty%2Frrdtool-1.x.git added PREDICTPERC function there is one still open question: should we interpolate to get the final value? Say: if we have 8 values, and we want thd 95th percventile, then we should actually take the 6.65th 0-based index value (=95/100*(8-1)). with the current implementation we round, so we return actually the 100th percentile. what we could also do is: value=val[floor(idx)]+(idx-floor(idx))*(val[floor(idx)+1]-val[floor(idx)]) (besides some boundry checking for the explicit percentile 100) this should get decided prior to final merging --- diff --git a/doc/rrdgraph_rpn.pod b/doc/rrdgraph_rpn.pod index 733cfaa4..80b427b9 100644 --- a/doc/rrdgraph_rpn.pod +++ b/doc/rrdgraph_rpn.pod @@ -202,18 +202,20 @@ source value is NAN the complete sliding window is affected. The TRENDNAN operation ignores all NAN-values in a sliding window and computes the average of the remaining values. -B +B -Create a "sliding window" average/sigma of another data series, that also -shifts the data series by given amounts of of time as well +Create a "sliding window" average/sigma/percentil of another data series, +that also shifts the data series by given amounts of of time as well Usage - explicit stating shifts: CDEF:predict=,...,,n,,x,PREDICT CDEF:sigma=,...,,n,,x,PREDICTSIGMA +CDEF:perc=,...,,n,,,x,PREDICTPERC Usage - shifts defined as a base shift and a number of time this is applied CDEF:predict=,-n,,x,PREDICT CDEF:sigma=,-n,,x,PREDICTSIGMA +CDEF:sigma=,-n,,,x,PREDICTPERC Example: CDEF:predict=172800,86400,2,1800,x,PREDICT @@ -267,13 +269,18 @@ rrdtool graph image.png --imgformat=PNG \ LINE1:upper#0000ff:upper\ certainty\ limit \ LINE1:lower#0000ff:lower\ certainty\ limit \ CDEF:exceeds=value,UN,0,value,lower,upper,LIMIT,UN,IF \ - TICK:exceeds#aa000080:1 + TICK:exceeds#aa000080:1 \ + CDEF:perc95=86400,-7,1800,95,value,PREDICTPERC \ + LINE1:perc95#ffff00:95th_percentile Note: Experience has shown that a factor between 3 and 5 to scale sigma is a good discriminator to detect abnormal behavior. This obviously depends also on the type of data and how "noisy" the data series is. -This prediction can only be used for short term extrapolations - say a few days into the future- +Also Note the explicit use of start= in the CDEF - this is necessary to load all +the necessary data (even if it is not displayed) + +This prediction can only be used for short term extrapolations - say a few days into the future. =item Special values diff --git a/src/rrd_graph.c b/src/rrd_graph.c index ac2f3b8a..b3be4d96 100644 --- a/src/rrd_graph.c +++ b/src/rrd_graph.c @@ -997,6 +997,7 @@ long lcd( return num[i]; } + /* run the rpn calculator on all the VDEF and CDEF arguments */ int data_calc( image_desc_t *im) @@ -1008,6 +1009,7 @@ int data_calc( int stepcnt; time_t now; rpnstack_t rpnstack; + rpnp_t *rpnp; rpnstack_init(&rpnstack); @@ -1061,6 +1063,7 @@ int data_calc( steparray = NULL; stepcnt = 0; dataidx = -1; + rpnp = im->gdes[gdi].rpnp; /* Find the variables in the expression. * - VDEF variables are substituted by their values @@ -1173,7 +1176,6 @@ int data_calc( */ for (now = im->gdes[gdi].start + im->gdes[gdi].step; now <= im->gdes[gdi].end; now += im->gdes[gdi].step) { - rpnp_t *rpnp = im->gdes[gdi].rpnp; /* 3rd arg of rpn_calc is for OP_VARIABLE lookups; * in this case we are advancing by timesteps; @@ -1183,9 +1185,12 @@ int data_calc( im->gdes[gdi].data, ++dataidx) == -1) { /* rpn_calc sets the error string */ rpnstack_free(&rpnstack); + rpnp_freeextra(rpnp); return -1; } } /* enumerate over time steps within a CDEF */ + rpnp_freeextra(rpnp); + break; default: continue; diff --git a/src/rrd_rpncalc.c b/src/rrd_rpncalc.c index aba6042f..80522717 100644 --- a/src/rrd_rpncalc.c +++ b/src/rrd_rpncalc.c @@ -78,6 +78,8 @@ rpnp_t *rpn_expand( } for (i = 0; rpnc[i].op != OP_END; ++i) { rpnp[i].op = (enum op_en)rpnc[i].op; + rpnp[i].extra = NULL; + rpnp[i].free_extra = NULL; if (rpnp[i].op == OP_NUMBER) { rpnp[i].val = (double) rpnc[i].val; } else if (rpnp[i].op == OP_VARIABLE || rpnp[i].op == OP_PREV_OTHER) { @@ -180,6 +182,7 @@ void rpn_compact2str( add_op(OP_TRENDNAN, TRENDNAN) add_op(OP_PREDICT, PREDICT) add_op(OP_PREDICTSIGMA, PREDICTSIGMA) + add_op(OP_PREDICTPERC, PREDICTPERC) add_op(OP_RAD2DEG, RAD2DEG) add_op(OP_DEG2RAD, DEG2RAD) add_op(OP_AVG, AVG) @@ -241,9 +244,10 @@ void parseCDEF_DS(const char *def, if (rpnp[i].op == OP_TIME || rpnp[i].op == OP_LTIME || rpnp[i].op == OP_PREV || rpnp[i].op == OP_COUNT || rpnp[i].op == OP_TREND || rpnp[i].op == OP_TRENDNAN || - rpnp[i].op == OP_PREDICT || rpnp[i].op == OP_PREDICTSIGMA ) { + rpnp[i].op == OP_PREDICT || rpnp[i].op == OP_PREDICTSIGMA || + rpnp[i].op == OP_PREDICTPERC ) { rrd_set_error - ("operators TIME, LTIME, PREV COUNT TREND TRENDNAN PREDICT PREDICTSIGMA are not supported with DS COMPUTE"); + ("operators TIME, LTIME, PREV COUNT TREND TRENDNAN PREDICT PREDICTSIGMA PREDICTPERC are not supported with DS COMPUTE"); free(rpnp); return; } @@ -385,6 +389,7 @@ rpnp_t *rpn_parse( match_op(OP_TRENDNAN, TRENDNAN) match_op(OP_PREDICT, PREDICT) match_op(OP_PREDICTSIGMA, PREDICTSIGMA) + match_op(OP_PREDICTPERC, PREDICTPERC) match_op(OP_RAD2DEG, RAD2DEG) match_op(OP_DEG2RAD, DEG2RAD) match_op(OP_AVG, AVG) @@ -409,6 +414,9 @@ rpnp_t *rpn_parse( return NULL; } + rpnp[steps].extra = NULL; + rpnp[steps].free_extra = NULL; + if (*expr == 0) break; if (*expr == ',') @@ -435,11 +443,28 @@ void rpnstack_init( void rpnstack_free( rpnstack_t *rpnstack) { - if (rpnstack->s != NULL) - free(rpnstack->s); + free(rpnstack->s); rpnstack->dc_stacksize = 0; } +void rpnp_freeextra(rpnp_t* rpnp) +{ + int rpi; + if (!rpnp) + return; + /* process each op from the rpn in turn */ + for (rpi = 0; rpnp[rpi].op != OP_END; rpi++) { + if (rpnp[rpi].extra) { + if (rpnp[rpi].free_extra) { + rpnp[rpi].free_extra(rpnp[rpi].extra); + } else { + free(rpnp[rpi].extra); + } + rpnp[rpi].extra = NULL; + } + } +} + static int rpn_compare_double( const void *x, const void *y) @@ -823,12 +848,27 @@ short rpn_calc( break; case OP_PREDICT: case OP_PREDICTSIGMA: - stackunderflow(2); + case OP_PREDICTPERC: { - /* the local averaging window (similar to trend, but better here, as we get better statistics thru numbers)*/ + /* the percentile requested */ + double percentile = DNAN; + if (rpnp[rpi].op == OP_PREDICTPERC) { + stackunderflow(1); + percentile = rpnstack->s[--stptr]; + if ((percentile<0) || (percentile > 100)) { + rrd_set_error("unsupported percentile: %f",percentile); + return -1; + } + percentile/=100; + } + /* the local averaging window (similar to trend, + * but better here, as we get better statistics + * thru numbers)*/ + stackunderflow(2); int locstepsize = rpnstack->s[--stptr]; /* the number of shifts and range-checking*/ int shifts = rpnstack->s[--stptr]; + stackunderflow(shifts); // handle negative shifts special if (shifts<0) { @@ -839,9 +879,9 @@ short rpn_calc( /* the real calculation */ double val=DNAN; /* the info on the datasource */ - time_t dsstep = (time_t) rpnp[rpi - 1].step; - int dscount = rpnp[rpi - 1].ds_cnt; - int locstep = (int)ceil((float)locstepsize/(float)dsstep); + time_t dsstep = (time_t) rpnp[rpi - 1].step; + int dscount = rpnp[rpi - 1].ds_cnt; + int locstep = (int)ceil((float)locstepsize/(float)dsstep); /* the sums */ double sum = 0; @@ -850,6 +890,16 @@ short rpn_calc( /* now loop for each position */ int doshifts=shifts; if (shifts<0) { doshifts=-shifts; } + /* alloc memory */ + double *extra = rpnp[rpi].extra; + if (rpnp[rpi].op == OP_PREDICTPERC) { + if (! extra) { + int size = (doshifts + 1) * (locstep + 2); + rpnp[rpi].extra = + extra = malloc(sizeof(double) * size); + } + } + /* loop the shifts */ for(int loop=0;loop=0)&&(offset0) { val = sum/(double)count; } - } else { + break; + case OP_PREDICTSIGMA: if (count>1) { /* the sigma case */ val=count*sum2-sum*sum; if (val<0) { @@ -895,6 +951,21 @@ short rpn_calc( val=sqrt(val/((float)count*((float)count-1.0))); } } + break; + case OP_PREDICTPERC: + if ((count>0) && extra) { + /* sort the numbers */ + qsort(extra,count,sizeof(double),rpn_compare_double); + /* get the percentile selected */ + int idx=(int)round(percentile * ((float)count-1.0)); + /* maybe we should also do an interpolation between the 2 + * neighboring fields, similar to what we do with MEDIAN + */ + val = extra[idx]; + } + break; + default: /* should not get here ... */ + break; } rpnstack->s[stptr] = val; } diff --git a/src/rrd_rpncalc.h b/src/rrd_rpncalc.h index 26b52cb4..2072bfc7 100644 --- a/src/rrd_rpncalc.h +++ b/src/rrd_rpncalc.h @@ -21,7 +21,7 @@ enum op_en { OP_NUMBER = 0, OP_VARIABLE, OP_INF, OP_PREV, OP_NEGINF, OP_PREDICT,OP_PREDICTSIGMA, OP_AVG, OP_ABS, OP_ADDNAN, OP_MINNAN, OP_MAXNAN, - OP_MEDIAN + OP_MEDIAN, OP_PREDICTPERC }; typedef struct rpnp_t { @@ -31,8 +31,14 @@ typedef struct rpnp_t { double *data; /* pointer to the current value from OP_VAR DAS */ long ds_cnt; /* data source count for data pointer */ long step; /* time step for OP_VAR das */ + void *extra; /* some extra data for longer setups */ + void (*free_extra)(void *); /* function pointer used to free extra + * - NULL for "simple" free(extra); */ } rpnp_t; +void rpnp_freeextra( + rpnp_t *rpnp); + /* a compact representation of rpnp_t for computed data sources */ typedef struct rpn_cdefds_t { char op; /* rpn operator type */ diff --git a/src/rrd_update.c b/src/rrd_update.c index efa04824..a26005e0 100644 --- a/src/rrd_update.c +++ b/src/rrd_update.c @@ -1713,13 +1713,18 @@ static int process_pdp_st( rpnp[i].op = OP_NUMBER; rpnp[i].val = pdp_temp[rpnp[i].ptr]; } + /* just in case */ + rpnp[i].extra = NULL; + rpnp[i].free_extra = NULL; } /* run the rpn calculator */ if (rpn_calc(rpnp, &rpnstack, 0, pdp_temp, ds_idx) == -1) { + rpnp_freeextra(rpnp); free(rpnp); rpnstack_free(&rpnstack); return -1; } + rpnp_freeextra(rpnp); free(rpnp); }