]> git.ipfire.org Git - thirdparty/squid.git/blame - lib/profiler/Profiler.cc
SourceFormat Enforcement
[thirdparty/squid.git] / lib / profiler / Profiler.cc
CommitLineData
88bfe092 1/*
4ac4a490 2 * Copyright (C) 1996-2017 The Squid Software Foundation and contributors
88bfe092 3 *
f0ba1a9b
AJ
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
88bfe092 7 */
8
f0ba1a9b
AJ
9/* DEBUG: section 81 CPU Profiling Routines */
10
dc47f531 11/**
88bfe092 12 * CPU Profiling implementation.
26ac0430 13 *
dc47f531 14 * \par
88bfe092 15 * This library implements the Probes needed to gather stats.
16 * See src/ProfStats.c which implements historical recording and
17 * presentation in CacheMgr.cgi.
26ac0430 18 *
dc47f531 19 * \par
88bfe092 20 * For timing we prefer on-CPU ops that retrieve cpu ticks counter.
21 * For Intel, this is "rdtsc", which is 64-bit counter that virtually
22 * never wraps. For alpha, this is "rpcc" which is 32-bit counter and
23 * wraps every few seconds. Currently, no handling of wrapping counters
24 * is implemented. Other CPU's are also not covered. Potentially all
25 * modern CPU's has similar counters.
26ac0430 26 *
88bfe092 27 * Usage.
28 * Insert macro PROF_state(probename) in strategic places in code.
29 * PROF_start(probename);
30 * ... section of code measured ...
31 * PROF_stop(probename);
26ac0430 32 *
25f98340 33 * probename must be added to the xprof_type.h enum list
88bfe092 34 * with prepended "XPROF_" string.
35 *
dc47f531
AJ
36 * \section description Description.
37 * \par PROF
38 * gathers stats per probename into structures. It indexes these
88bfe092 39 * structures by enum type index in an array.
26ac0430 40 *
dc47f531
AJ
41 * \par PROF
42 * records best, best, average and worst values for delta time,
88bfe092 43 * also, if UNACCED is defined, it measures "empty" time during which
44 * no probes are in measuring state. This allows to see time "unaccounted"
45 * for. If OVERHEAD is defined, additional calculations are made at every
46 * probe to measure approximate overhead of the probe code itself.
26ac0430 47 *
dc47f531 48 * \par
88bfe092 49 * Probe data is stored in linked-list, so the more probes you define,
50 * the more overhead is added to find the deepest nested probe. To reduce
51 * average overhead, linked list is manipulated each time PR_start is
52 * called, so that probe just started is moved 1 position up in linkedlist.
53 * This way frequently used probes are moved closer to the head of list,
54 * reducing average overhead.
55 * Note that all overhead is on the scale of one hundred of CPU clock
56 * ticks, which on the scale of submicroseconds. Yet, to optimise really
57 * fast and frequent sections of code, we want to reduce this overhead
58 * to absolute minimum possible.
26ac0430 59 *
dc47f531 60 * \par
88bfe092 61 * For actual measurements, probe overhead cancels out mostly. Still,
62 * do not take the measured times as facts, they should be viewed in
63 * relative comparison to overall CPU time and on the same platform.
26ac0430 64 *
dc47f531 65 * \par
88bfe092 66 * Every 1 second, Event within squid is called that parses gathered
67 * statistics of every probe, and accumulates that into historical
68 * structures for last 1,5,30 secs, 1,5,30 mins, and 1,5 and 24 hours.
69 * Each second active probe stats are reset, and only historical data
70 * is presented in cachemgr output.
26ac0430 71 *
dc47f531
AJ
72 * \section reading Reading stats.
73 * \par
88bfe092 74 * "Worst case" may be misleading. Anything can happen at any section
75 * of code that could delay reaching to probe stop. For eg. system may
76 * need to service interrupt routine, task switch could occur, or page
77 * fault needs to be handled. In this sense, this is quite meaningless
78 * metric. "Best case" shows fastest completion of probe section, and
79 * is also somewhat useless, unless you know that amount of work is
80 * constant. Best metric to watch is "average time" and total cumulated
81 * time in given timeframe, which really show percentage of time spent
82 * in given section of code, and its average completion time. This data
83 * could be used to detect bottlenecks withing squid and optimise them.
26ac0430 84 *
dc47f531 85 * \par
88bfe092 86 * TOTALS are quite off reality. Its there just to summarise cumulative
87 * times and percent column. Percent values over 100% shows that there
88 * have been some probes nested into each other.
26ac0430 89 *
88bfe092 90 */
91
f7f3304a 92#include "squid.h"
25f98340 93#include "profiler/Profiler.h"
cf8d9cd4 94
32d002cb 95#if USE_XPROF_STATS
88bfe092 96
074d6a40 97#include <cassert>
88bfe092 98#if HAVE_GNUMALLLOC_H
99#include <gnumalloc.h>
482aa790 100#elif HAVE_MALLOC_H
88bfe092 101#include <malloc.h>
102#endif
103#if HAVE_UNISTD_H
104#include <unistd.h>
105#endif
106
107/* Exported Data */
108TimersArray *xprof_Timers = NULL;
88bfe092 109
110/* Private stuff */
111
4fd8d08f 112/* new stuff */
f53969cc 113#define MAXSTACKDEPTH 512
4fd8d08f 114
115struct _callstack_entry {
f53969cc 116 int timer; /* index into timers array */
26ac0430
AJ
117 const char *name;
118 hrtime_t start, stop, accum;
4fd8d08f 119};
120
121struct _callstack_entry cstack[MAXSTACKDEPTH];
122int cstack_head = 0;
123
0ef0f1de 124#if defined(_MSC_VER) /* Microsoft C Compiler ONLY */
125static __inline void
126#else
88bfe092 127static inline void
0ef0f1de 128#endif
88bfe092 129xprof_update(xprof_stats_data * head)
130{
88bfe092 131 if (head->delta < head->best)
26ac0430 132 head->best = head->delta;
88bfe092 133 if (head->worst < head->delta)
26ac0430 134 head->worst = head->delta;
88bfe092 135 head->summ += head->delta;
742a021b 136 ++head->count;
88bfe092 137}
138
139static xprof_stats_data *xp_UNACCOUNTED;
140static int xprof_inited = 0;
141
142static void
143xprof_InitLib(void)
144{
145 if (xprof_inited)
26ac0430 146 return;
88bfe092 147
25f98340 148 xprof_Timers = static_cast<TimersArray *>(calloc(XPROF_LAST + 2, sizeof(xprof_stats_node)));
88bfe092 149
150 xprof_Timers[XPROF_PROF_UNACCOUNTED]->name = "PROF_UNACCOUNTED";
151 xprof_Timers[XPROF_PROF_UNACCOUNTED]->accu.start = get_tick();
152 xp_UNACCOUNTED = &xprof_Timers[XPROF_PROF_UNACCOUNTED]->accu;
4fd8d08f 153 cstack_head = 0;
88bfe092 154 xprof_inited = 1;
155}
156
157void
158xprof_start(xprof_type type, const char *timer)
159{
4fd8d08f 160 hrtime_t tt = get_tick();
88bfe092 161 if (!xprof_inited)
26ac0430 162 xprof_InitLib();
88bfe092 163
4fd8d08f 164 /* If nested, stop current stack frame */
165 if (cstack_head > 0) {
26ac0430
AJ
166 cstack[cstack_head - 1].accum += get_tick() - cstack[cstack_head - 1].start;
167 cstack[cstack_head - 1].start = -1;
4fd8d08f 168 }
169
170 /* Are we at the first level? If so; stop the unaccounted timer */
171 if (cstack_head == 0) {
26ac0430
AJ
172 assert(xp_UNACCOUNTED->start != -1);
173 xp_UNACCOUNTED->delta = tt - xp_UNACCOUNTED->start;
174 xp_UNACCOUNTED->start = -1;
175 xprof_update(xp_UNACCOUNTED);
4fd8d08f 176 }
177
178 /* Allocate new stack frame */
179 cstack[cstack_head].start = tt;
180 cstack[cstack_head].stop = -1;
181 cstack[cstack_head].accum = 0;
182 cstack[cstack_head].timer = type;
183 cstack[cstack_head].name = timer;
742a021b 184 ++cstack_head;
4fd8d08f 185 assert(cstack_head < MAXSTACKDEPTH);
88bfe092 186
88bfe092 187}
188
189void
190xprof_stop(xprof_type type, const char *timer)
191{
4fd8d08f 192 hrtime_t tt = get_tick();
193 assert(cstack_head > 0);
742a021b 194 --cstack_head;
4fd8d08f 195 assert(cstack[cstack_head].timer == type);
196
197 /* Record timer details */
198 cstack[cstack_head].accum += tt - cstack[cstack_head].start;
199 xprof_Timers[type]->accu.delta = cstack[cstack_head].accum;
200 xprof_Timers[type]->name = timer;
201
202 /* Update */
88bfe092 203 xprof_update(&xprof_Timers[type]->accu);
4fd8d08f 204
205 /* Restart previous timer if we're not at the top level */
206 if (cstack_head > 0) {
26ac0430
AJ
207 cstack[cstack_head - 1].start = tt;
208 cstack[cstack_head - 1].stop = 0;
209 return;
4fd8d08f 210 }
211 /* Get here? We're at the top level; unaccounted */
212 xp_UNACCOUNTED->start = tt;
88bfe092 213}
214
215#endif /* USE_XPROF_STATS */
f53969cc 216