From: Peter Stamfest Date: Sun, 2 Mar 2014 10:28:46 +0000 (+0100) Subject: Add code to populate newly added RRA rows, but do not use it yet X-Git-Tag: v1.5.0-rc1~131^2~17 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=29e0d7dc54a56b6bd1d5f443a32cd49938f108a1;p=thirdparty%2Frrdtool-1.x.git Add code to populate newly added RRA rows, but do not use it yet --- diff --git a/src/rrd_modify.c b/src/rrd_modify.c index 1511fa8e..49a2aafc 100644 --- a/src/rrd_modify.c +++ b/src/rrd_modify.c @@ -28,6 +28,13 @@ typedef struct { char *def; } rra_mod_op_t; +// calculate a % b, guaranteeing a positive result... +static int positive_mod(int a, int b) { + int x = a % b; + if (x < 0) x += b; + return x; +} + // prototypes static int write_rrd(const char *outfilename, rrd_t *out); static int add_rras(rrd_t *out, rra_mod_op_t *rra_mod_ops, int rra_mod_ops_cnt); @@ -46,6 +53,269 @@ static void * copy_over_realloc(void *dest, int dest_index, return r; } + +/* + Try to populate rows (presumably for added rows) in new_rra from + available data in rrd. This only works for some CF types and + generally is wildly inacurate - eg. it does not take the xff factor + into account. Do not think of it as producing correct data but + rather as a way to produce nice pictures for subsequent rrdgraph + invocations... + + NOTE: rrd and new_rra may point to entirely different RRAs. +*/ + + +typedef struct { + rrd_t *rrd; + int rra_index; + rrd_value_t *values; +} candidate_t; + +static int sort_candidates(const void *va, const void *vb) { + const candidate_t *a = (candidate_t *) va; + const candidate_t *b = (candidate_t *) vb; + + if (a == b) return 0; + + if (a->rrd == b->rrd && a->rra_index == b->rra_index) return 0; + + rra_def_t *a_def = a->rrd->rra_def + a->rra_index; + rra_def_t *b_def = b->rrd->rra_def + b->rra_index; + + if (a_def->pdp_cnt == b_def->pdp_cnt) { + return b_def->row_cnt - a_def->row_cnt; // prefer the RRA with more rows + } + + // ultimately, prefer the RRA with fewer PDPs per CDP + return a_def->pdp_cnt - b_def->pdp_cnt; +} + +static time_t end_time_for_row(const rrd_t *rrd, + const rra_def_t *rra, + int cur_row, int row) { + // one entry in the candidate covers timeslot seconds + int timeslot = rra->pdp_cnt * rrd->stat_head->pdp_step; + + /* Just to re-iterate how data is stored in RRAs, in order to + understand the following code: the current slot was filled at + last_up time, but slots always correspond with time periods of + length timeslot, ending at exact multiples of timeslot + wrt. the unix epoch. So the current timeslot ends at: + + int(last_up / timeslot) * timeslot + + or (equivalently): + + last_up - last_up % timeslot + */ + + int past_cnt = positive_mod((cur_row - row), rra->row_cnt); + + time_t last_up = rrd->live_head->last_up; + time_t now = (last_up - last_up % timeslot) - past_cnt * timeslot; + + return now; +} + +static int row_for_time(const rrd_t *rrd, + const rra_def_t *rra, + int cur_row, time_t time) +{ + time_t last_up = rrd->live_head->last_up; + int timeslot = rra->pdp_cnt * rrd->stat_head->pdp_step; + + // align to slot boundary end times + time_t delta = time % timeslot; + if (delta > 0) time += timeslot - delta; + + delta = time % timeslot; + if (delta > 0) last_up += timeslot - delta; + + if (time > last_up) return -1; // out of range + if (time <= (int) last_up - (int) rra->row_cnt * timeslot) return -1; // out of range + + int past_cnt = (last_up - time) / timeslot; + if (past_cnt >= (int) rra->row_cnt) return -1; + + // NOTE: rra->row_cnt is unsigned!! + int row = positive_mod(cur_row - past_cnt, rra->row_cnt); + + return row < 0 ? (row + (int) rra->row_cnt) : row ; +} + +/* + rrd .. the RRD to use for the search of other RRAs to populate the new RRA + new_rra .. the RRA to populate. It is assumed, that this RRA will + become part of rrd. This means that all meta settings (step size, + last update time, etc.) not part of the RRA definition can be taken + from rrd. + populate_start .. the first row to populate in new_rra + populate_cnt .. the number of rows to populate in new_rra, starting at + populate_start + */ +static int populate_row(rrd_t *rrd, + rra_def_t *new_rra, + int cur_row, + rrd_value_t *values, + int populate_start, + int populate_cnt) { + int rc = -1; + + if (rrd->stat_head->rra_cnt <= 1) return 0; + + enum cf_en cf = cf_conv(new_rra->cf_nam); + switch (cf) { + case CF_AVERAGE: + case CF_MINIMUM: + case CF_MAXIMUM: + case CF_LAST: + break; + default: // unsupported CF for extension + return 0; + } + + int ds_cnt = rrd->stat_head->ds_cnt; + + candidate_t *candidates = NULL; + int candidates_cnt = 0; + + int i, ri; + int total_rows = 0; + + /* find other rows with the same CF or an RRA with CF_AVERAGE and + a stepping of 1 as possible candidates for filling */ + for (i = 0 ; i < (int) rrd->stat_head->rra_cnt ; i++) { + rra_def_t *other_rra = rrd->rra_def + i; + + // can't use our own data + if (other_rra == new_rra) { + continue; + } + + enum cf_en other_cf = cf_conv(other_rra->cf_nam); + if (other_cf == cf || + (other_cf == CF_AVERAGE && other_rra->pdp_cnt == 1)) { + candidate_t c = { + .rrd = rrd, + .rra_index = i, + .values = rrd->rrd_value + ds_cnt * total_rows + }; + candidates = copy_over_realloc(candidates, candidates_cnt, + &c, 0, sizeof(c)); + if (candidates == NULL) { + rrd_set_error("out of memory"); + goto done; + } + candidates_cnt++; + } + total_rows += other_rra->row_cnt; + } + + if (candidates_cnt == 0) { + rc = 0; + goto done; + } + + // now sort candidates by granularity + qsort(candidates, candidates_cnt, sizeof(candidate_t), sort_candidates); + + /* some of the code below is based on + https://github.com/ssinyagin/perl-rrd-tweak/blob/master/lib/RRD/Tweak.pm#L1455 + */ + + for (ri = 0 ; ri < populate_cnt ; ri++) { + int row = populate_start + ri; + + time_t new_timeslot = new_rra->pdp_cnt * rrd->stat_head->pdp_step; + + time_t row_end_time = end_time_for_row(rrd, new_rra, cur_row, row); + time_t row_start_time = row_end_time - new_timeslot + 1; + + /* now walk all candidates */ + + for (i = 0 ; i < candidates_cnt ; i++) { + candidate_t *c = candidates + i; + rra_def_t *r = c->rrd->rra_def + c->rra_index; + int cand_cur_row = c->rrd->rra_ptr[c->rra_index].cur_row; + + /* find a matching range of rows */ + int cand_row_start = row_for_time(rrd, r, cand_cur_row, row_start_time); + int cand_row_end = row_for_time(rrd, r, cand_cur_row, row_end_time); + + if (cand_row_start == -1 && cand_row_end != -1) { + cand_row_start = cand_cur_row; + } else if (cand_row_start != -1 && cand_row_end == -1) { + cand_row_end = (cand_cur_row - 1) % r->row_cnt; + } else if (cand_row_start == -1 && cand_row_end == -1) { + // neither start nor end in range. Can't use this candidate RRA... + continue; + } + + + /* note: cand_row_end is usually after cand_row_start, + unless we have a wrap over.... so we turn the + interation over the rows into one based on the number + of rows starting at cand_row_end. All this dance should + be in preparation for unusual cases where we have + candidates and new RRAs that have pdp counts that are + not directly divisible by each other (like populating a + 2-pdp RRA from a 3-pdp RRA) */ + + int cand_rows = (cand_row_end - cand_row_start + 1); + if (cand_rows < 0) cand_rows += r->row_cnt; + + int cand_timeslot = r->pdp_cnt * c->rrd->stat_head->pdp_step; + + for (int k = 0 ; k < ds_cnt ; k++) { + int cand_row, ci ; + rrd_value_t tmp = DNAN, final = DNAN; + int covered = 0; + + for (cand_row = cand_row_start, ci = 0 ; + ci < cand_rows ; + ci++, cand_row = (cand_row + 1) % r->row_cnt) + { + rrd_value_t v = c->values[cand_row * ds_cnt + k]; + + if (isnan(v)) continue; + + switch (cf) { + case CF_AVERAGE: + tmp = isnan(tmp) ? v * cand_timeslot : (tmp + v * cand_timeslot); + covered += cand_timeslot; + final = tmp / covered; + break; + case CF_MINIMUM: + final = tmp = isnan(tmp) ? v : (tmp < v ? tmp : v); + break; + case CF_MAXIMUM: + final = tmp = isnan(tmp) ? v : (tmp > v ? tmp : v); + break; + case CF_LAST: + final = tmp = v; + break; + default: // unsupported CF for extension + return 0; + } + } + + values[row * ds_cnt + k] = final; + } + } + } + + rc = 0; + + done: + if (candidates) { + free(candidates); + } + + return rc; +} + + /* copies the RRD named by infilename to a new RRD called outfilename. In that process, data sources may be removed or added.