]>
git.ipfire.org Git - thirdparty/squid.git/blob - lib/profiler/Profiler.cc
2 * Copyright (C) 1996-2015 The Squid Software Foundation and contributors
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
9 /* DEBUG: section 81 CPU Profiling Routines */
12 * CPU Profiling implementation.
15 * This library implements the Probes needed to gather stats.
16 * See src/ProfStats.c which implements historical recording and
17 * presentation in CacheMgr.cgi.
20 * For timing we prefer on-CPU ops that retrieve cpu ticks counter.
21 * For Intel, this is "rdtsc", which is 64-bit counter that virtually
22 * never wraps. For alpha, this is "rpcc" which is 32-bit counter and
23 * wraps every few seconds. Currently, no handling of wrapping counters
24 * is implemented. Other CPU's are also not covered. Potentially all
25 * modern CPU's has similar counters.
28 * Insert macro PROF_state(probename) in strategic places in code.
29 * PROF_start(probename);
30 * ... section of code measured ...
31 * PROF_stop(probename);
33 * probename must be added to the xprof_type.h enum list
34 * with prepended "XPROF_" string.
36 * \section description Description.
38 * gathers stats per probename into structures. It indexes these
39 * structures by enum type index in an array.
42 * records best, best, average and worst values for delta time,
43 * also, if UNACCED is defined, it measures "empty" time during which
44 * no probes are in measuring state. This allows to see time "unaccounted"
45 * for. If OVERHEAD is defined, additional calculations are made at every
46 * probe to measure approximate overhead of the probe code itself.
49 * Probe data is stored in linked-list, so the more probes you define,
50 * the more overhead is added to find the deepest nested probe. To reduce
51 * average overhead, linked list is manipulated each time PR_start is
52 * called, so that probe just started is moved 1 position up in linkedlist.
53 * This way frequently used probes are moved closer to the head of list,
54 * reducing average overhead.
55 * Note that all overhead is on the scale of one hundred of CPU clock
56 * ticks, which on the scale of submicroseconds. Yet, to optimise really
57 * fast and frequent sections of code, we want to reduce this overhead
58 * to absolute minimum possible.
61 * For actual measurements, probe overhead cancels out mostly. Still,
62 * do not take the measured times as facts, they should be viewed in
63 * relative comparison to overall CPU time and on the same platform.
66 * Every 1 second, Event within squid is called that parses gathered
67 * statistics of every probe, and accumulates that into historical
68 * structures for last 1,5,30 secs, 1,5,30 mins, and 1,5 and 24 hours.
69 * Each second active probe stats are reset, and only historical data
70 * is presented in cachemgr output.
72 * \section reading Reading stats.
74 * "Worst case" may be misleading. Anything can happen at any section
75 * of code that could delay reaching to probe stop. For eg. system may
76 * need to service interrupt routine, task switch could occur, or page
77 * fault needs to be handled. In this sense, this is quite meaningless
78 * metric. "Best case" shows fastest completion of probe section, and
79 * is also somewhat useless, unless you know that amount of work is
80 * constant. Best metric to watch is "average time" and total cumulated
81 * time in given timeframe, which really show percentage of time spent
82 * in given section of code, and its average completion time. This data
83 * could be used to detect bottlenecks withing squid and optimise them.
86 * TOTALS are quite off reality. Its there just to summarise cumulative
87 * times and percent column. Percent values over 100% shows that there
88 * have been some probes nested into each other.
93 #include "profiler/Profiler.h"
99 #include <gnumalloc.h>
108 TimersArray
*xprof_Timers
= NULL
;
113 #define MAXSTACKDEPTH 512
115 struct _callstack_entry
{
116 int timer
; /* index into timers array */
118 hrtime_t start
, stop
, accum
;
121 struct _callstack_entry cstack
[MAXSTACKDEPTH
];
124 #if defined(_MSC_VER) /* Microsoft C Compiler ONLY */
129 xprof_update(xprof_stats_data
* head
)
131 if (head
->delta
< head
->best
)
132 head
->best
= head
->delta
;
133 if (head
->worst
< head
->delta
)
134 head
->worst
= head
->delta
;
135 head
->summ
+= head
->delta
;
139 static xprof_stats_data
*xp_UNACCOUNTED
;
140 static int xprof_inited
= 0;
148 xprof_Timers
= static_cast<TimersArray
*>(calloc(XPROF_LAST
+ 2, sizeof(xprof_stats_node
)));
150 xprof_Timers
[XPROF_PROF_UNACCOUNTED
]->name
= "PROF_UNACCOUNTED";
151 xprof_Timers
[XPROF_PROF_UNACCOUNTED
]->accu
.start
= get_tick();
152 xp_UNACCOUNTED
= &xprof_Timers
[XPROF_PROF_UNACCOUNTED
]->accu
;
158 xprof_start(xprof_type type
, const char *timer
)
160 hrtime_t tt
= get_tick();
164 /* If nested, stop current stack frame */
165 if (cstack_head
> 0) {
166 cstack
[cstack_head
- 1].accum
+= get_tick() - cstack
[cstack_head
- 1].start
;
167 cstack
[cstack_head
- 1].start
= -1;
170 /* Are we at the first level? If so; stop the unaccounted timer */
171 if (cstack_head
== 0) {
172 assert(xp_UNACCOUNTED
->start
!= -1);
173 xp_UNACCOUNTED
->delta
= tt
- xp_UNACCOUNTED
->start
;
174 xp_UNACCOUNTED
->start
= -1;
175 xprof_update(xp_UNACCOUNTED
);
178 /* Allocate new stack frame */
179 cstack
[cstack_head
].start
= tt
;
180 cstack
[cstack_head
].stop
= -1;
181 cstack
[cstack_head
].accum
= 0;
182 cstack
[cstack_head
].timer
= type
;
183 cstack
[cstack_head
].name
= timer
;
185 assert(cstack_head
< MAXSTACKDEPTH
);
190 xprof_stop(xprof_type type
, const char *timer
)
192 hrtime_t tt
= get_tick();
193 assert(cstack_head
> 0);
195 assert(cstack
[cstack_head
].timer
== type
);
197 /* Record timer details */
198 cstack
[cstack_head
].accum
+= tt
- cstack
[cstack_head
].start
;
199 xprof_Timers
[type
]->accu
.delta
= cstack
[cstack_head
].accum
;
200 xprof_Timers
[type
]->name
= timer
;
203 xprof_update(&xprof_Timers
[type
]->accu
);
205 /* Restart previous timer if we're not at the top level */
206 if (cstack_head
> 0) {
207 cstack
[cstack_head
- 1].start
= tt
;
208 cstack
[cstack_head
- 1].stop
= 0;
211 /* Get here? We're at the top level; unaccounted */
212 xp_UNACCOUNTED
->start
= tt
;
215 #endif /* USE_XPROF_STATS */