5 * DEBUG: section 62 Generic Histogram
6 * AUTHOR: Duane Wessels
8 * SQUID Web Proxy Cache http://www.squid-cache.org/
9 * ----------------------------------------------------------
11 * Squid is the result of efforts by numerous individuals from
12 * the Internet community; see the CONTRIBUTORS file for full
13 * details. Many organizations have provided support for Squid's
14 * development; see the SPONSORS file for full details. Squid is
15 * Copyrighted (C) 2001 by the Regents of the University of
16 * California; see the COPYRIGHT file for full details. Squid
17 * incorporates software developed and/or copyrighted by other
18 * sources; see the CREDITS file for full details.
20 * This program is free software; you can redistribute it and/or modify
21 * it under the terms of the GNU General Public License as published by
22 * the Free Software Foundation; either version 2 of the License, or
23 * (at your option) any later version.
25 * This program is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 * GNU General Public License for more details.
30 * You should have received a copy of the GNU General Public License
31 * along with this program; if not, write to the Free Software
32 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
37 * Important restrictions on val_in and val_out functions:
39 * - val_in: ascending, defined on [0, oo), val_in(0) == 0;
40 * - val_out: x == val_out(val_in(x)) where val_in(x) is defined
42 * In practice, the requirements are less strict,
43 * but then it gets hard to define them without math notation.
44 * val_in is applied after offseting the value but before scaling
45 * See log and linear based histograms for examples
53 static StatHistBinDumper statHistBinDumper
;
62 /* low level init, higher level functions has less params */
64 StatHist::init(int capacity_
, hbase_f
* val_in_
, hbase_f
* val_out_
, double min_
, double max_
)
66 assert(capacity_
> 0);
67 assert(val_in_
&& val_out_
);
68 /* check before we divide to get scale */
69 assert(val_in_(max_
- min_
) > 0);
75 bins
= (int *)xcalloc(capacity
, sizeof(int));
76 scale
= capacity
/ val_in(max
- min
);
78 /* check that functions are valid */
79 /* a min value should go into bin[0] */
80 assert(findBin(min
) == 0);
81 /* a max value should go into the last bin */
82 assert(findBin(max
) == capacity
- 1);
83 /* it is hard to test val_out, here is a crude test */
84 assert(((int) floor(0.99 + val(0) - min
)) == 0);
94 /* assumes that somebody already called init for Dest */
95 /* TODO: remove assumption */
97 StatHist::operator =(const StatHist
& src
)
99 assert(src
.bins
!= NULL
);
100 assert(capacity
==src
.capacity
);
101 assert(min
==src
.min
);
102 assert(max
==src
.max
);
103 assert(fabs(scale
- src
.scale
) < 0.0000001);
104 assert(val_in
==src
.val_in
);
105 assert(val_out
==src
.val_out
);
106 memcpy(bins
,src
.bins
,capacity
*sizeof(*bins
));
111 StatHist::count(double val
)
113 const int bin
= findBin(val
);
114 assert(bins
); /* make sure it got initialized */
115 assert(0 <= bin
&& bin
< capacity
);
120 StatHist::findBin(double v
)
124 v
-= min
; /* offset */
126 if (v
<= 0.0) /* too small */
129 bin
= (int) floor(scale
* val_in(v
) + 0.5);
131 if (bin
< 0) /* should not happen */
134 if (bin
>= capacity
) /* too big */
141 StatHist::val(int bin
) const
143 return val_out((double) bin
/ scale
) + min
;
147 statHistDeltaMedian(const StatHist
& A
, const StatHist
& B
)
149 return statHistDeltaPctile(A
,B
, 0.5);
153 statHistDeltaPctile(const StatHist
& A
, const StatHist
& B
, double pctile
)
155 return A
.deltaPctile(B
,pctile
);
159 StatHist::deltaPctile(const StatHist
& B
, double pctile
) const
171 assert(capacity
== B
.capacity
);
173 int *D
= (int *)xcalloc(capacity
, sizeof(int));
175 for (i
= 0; i
< capacity
; ++i
) {
176 D
[i
] = B
.bins
[i
] - bins
[i
];
180 for (i
= 0; i
< capacity
; ++i
)
183 h
= int(s1
* pctile
);
185 for (i
= 0; i
< capacity
; ++i
) {
189 if (a
<= h
&& h
<= b
)
211 f
= (h
- a
) / (b
- a
);
213 K
= (int) floor(f
* (double) (J
- I
) + I
);
219 statHistBinDumper(StoreEntry
* sentry
, int idx
, double val
, double size
, int count
)
222 storeAppendPrintf(sentry
, "\t%3d/%f\t%d\t%f\n",
223 idx
, val
, count
, count
/ size
);
227 StatHist::dump(StoreEntry
* sentry
, StatHistBinDumper
* bd
) const
230 double left_border
= min
;
233 bd
= statHistBinDumper
;
235 for (i
= 0; i
< capacity
; ++i
) {
236 const double right_border
= val(i
+ 1);
237 assert(right_border
- left_border
> 0.0);
238 bd(sentry
, i
, left_border
, right_border
- left_border
, bins
[i
]);
239 left_border
= right_border
;
243 /* log based histogram */
247 assert((x
+ 1.0) >= 0.0);
258 statHistLogInit(StatHist
* H
, int capacity
, double min
, double max
)
260 H
->init(capacity
, Math::Log
, Math::Exp
, min
, max
);
263 /* linear histogram for enums */
264 /* we want to be have [-1,last_enum+1] range to track out of range enums */
272 statHistEnumInit(StatHist
* H
, int last_enum
)
274 H
->init(last_enum
+ 3, Math::Null
, Math::Null
, (double) -1, (double) (last_enum
+ 1 + 1));
278 statHistEnumDumper(StoreEntry
* sentry
, int idx
, double val
, double size
, int count
)
281 storeAppendPrintf(sentry
, "%2d\t %5d\t %5d\n",
282 idx
, (int) val
, count
);
286 statHistIntInit(StatHist
* H
, int n
)
288 H
->init(n
, Math::Null
, Math::Null
, (double) 0, (double) n
- 1);
292 statHistIntDumper(StoreEntry
* sentry
, int idx
, double val
, double size
, int count
)
295 storeAppendPrintf(sentry
, "%9d\t%9d\n", (int) val
, count
);