]> git.ipfire.org Git - thirdparty/squid.git/blame - lib/profiler/Profiler.cc
Renamed squid.h to squid-old.h and config.h to squid.h
[thirdparty/squid.git] / lib / profiler / Profiler.cc
CommitLineData
88bfe092 1/*
262a0e14 2 * $Id$
88bfe092 3 *
507d0a78 4 * DEBUG: section 81 CPU Profiling Routines
88bfe092 5 * AUTHOR: Andres Kroonmaa, Sep.2000
6 *
7 * SQUID Internet Object Cache http://squid.nlanr.net/Squid/
8 * ----------------------------------------------------------
9 *
10 * Squid is the result of efforts by numerous individuals from the
11 * Internet community. Development is led by Duane Wessels of the
12 * National Laboratory for Applied Network Research and funded by the
13 * National Science Foundation. Squid is Copyrighted (C) 1998 by
14 * the Regents of the University of California. Please see the
15 * COPYRIGHT file for full details. Squid incorporates software
16 * developed and/or copyrighted by other sources. Please see the
17 * CREDITS file for full details.
18 *
19 * This program is free software; you can redistribute it and/or modify
20 * it under the terms of the GNU General Public License as published by
21 * the Free Software Foundation; either version 2 of the License, or
22 * (at your option) any later version.
26ac0430 23 *
88bfe092 24 * This program is distributed in the hope that it will be useful,
25 * but WITHOUT ANY WARRANTY; without even the implied warranty of
26 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 * GNU General Public License for more details.
26ac0430 28 *
88bfe092 29 * You should have received a copy of the GNU General Public License
30 * along with this program; if not, write to the Free Software
31 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
32 *
33 */
34
dc47f531 35/**
88bfe092 36 * CPU Profiling implementation.
26ac0430 37 *
dc47f531 38 * \par
88bfe092 39 * This library implements the Probes needed to gather stats.
40 * See src/ProfStats.c which implements historical recording and
41 * presentation in CacheMgr.cgi.
26ac0430 42 *
dc47f531 43 * \par
88bfe092 44 * For timing we prefer on-CPU ops that retrieve cpu ticks counter.
45 * For Intel, this is "rdtsc", which is 64-bit counter that virtually
46 * never wraps. For alpha, this is "rpcc" which is 32-bit counter and
47 * wraps every few seconds. Currently, no handling of wrapping counters
48 * is implemented. Other CPU's are also not covered. Potentially all
49 * modern CPU's has similar counters.
26ac0430 50 *
88bfe092 51 * Usage.
52 * Insert macro PROF_state(probename) in strategic places in code.
53 * PROF_start(probename);
54 * ... section of code measured ...
55 * PROF_stop(probename);
26ac0430 56 *
25f98340 57 * probename must be added to the xprof_type.h enum list
88bfe092 58 * with prepended "XPROF_" string.
59 *
dc47f531
AJ
60 * \section description Description.
61 * \par PROF
62 * gathers stats per probename into structures. It indexes these
88bfe092 63 * structures by enum type index in an array.
26ac0430 64 *
dc47f531
AJ
65 * \par PROF
66 * records best, best, average and worst values for delta time,
88bfe092 67 * also, if UNACCED is defined, it measures "empty" time during which
68 * no probes are in measuring state. This allows to see time "unaccounted"
69 * for. If OVERHEAD is defined, additional calculations are made at every
70 * probe to measure approximate overhead of the probe code itself.
26ac0430 71 *
dc47f531 72 * \par
88bfe092 73 * Probe data is stored in linked-list, so the more probes you define,
74 * the more overhead is added to find the deepest nested probe. To reduce
75 * average overhead, linked list is manipulated each time PR_start is
76 * called, so that probe just started is moved 1 position up in linkedlist.
77 * This way frequently used probes are moved closer to the head of list,
78 * reducing average overhead.
79 * Note that all overhead is on the scale of one hundred of CPU clock
80 * ticks, which on the scale of submicroseconds. Yet, to optimise really
81 * fast and frequent sections of code, we want to reduce this overhead
82 * to absolute minimum possible.
26ac0430 83 *
dc47f531 84 * \par
88bfe092 85 * For actual measurements, probe overhead cancels out mostly. Still,
86 * do not take the measured times as facts, they should be viewed in
87 * relative comparison to overall CPU time and on the same platform.
26ac0430 88 *
dc47f531 89 * \par
88bfe092 90 * Every 1 second, Event within squid is called that parses gathered
91 * statistics of every probe, and accumulates that into historical
92 * structures for last 1,5,30 secs, 1,5,30 mins, and 1,5 and 24 hours.
93 * Each second active probe stats are reset, and only historical data
94 * is presented in cachemgr output.
26ac0430 95 *
dc47f531
AJ
96 * \section reading Reading stats.
97 * \par
88bfe092 98 * "Worst case" may be misleading. Anything can happen at any section
99 * of code that could delay reaching to probe stop. For eg. system may
100 * need to service interrupt routine, task switch could occur, or page
101 * fault needs to be handled. In this sense, this is quite meaningless
102 * metric. "Best case" shows fastest completion of probe section, and
103 * is also somewhat useless, unless you know that amount of work is
104 * constant. Best metric to watch is "average time" and total cumulated
105 * time in given timeframe, which really show percentage of time spent
106 * in given section of code, and its average completion time. This data
107 * could be used to detect bottlenecks withing squid and optimise them.
26ac0430 108 *
dc47f531 109 * \par
88bfe092 110 * TOTALS are quite off reality. Its there just to summarise cumulative
111 * times and percent column. Percent values over 100% shows that there
112 * have been some probes nested into each other.
26ac0430 113 *
88bfe092 114 */
115
f7f3304a 116#include "squid.h"
25f98340 117#include "profiler/Profiler.h"
cf8d9cd4 118
32d002cb 119#if USE_XPROF_STATS
88bfe092 120
121
dc47f531
AJ
122#if HAVE_ASSERT_H
123#include <assert.h>
124#endif
88bfe092 125#if HAVE_GNUMALLLOC_H
126#include <gnumalloc.h>
482aa790 127#elif HAVE_MALLOC_H
88bfe092 128#include <malloc.h>
129#endif
130#if HAVE_UNISTD_H
131#include <unistd.h>
132#endif
133
134/* Exported Data */
135TimersArray *xprof_Timers = NULL;
88bfe092 136
137/* Private stuff */
138
4fd8d08f 139
140/* new stuff */
141#define MAXSTACKDEPTH 512
142
143struct _callstack_entry {
26ac0430
AJ
144 int timer; /* index into timers array */
145 const char *name;
146 hrtime_t start, stop, accum;
4fd8d08f 147};
148
149struct _callstack_entry cstack[MAXSTACKDEPTH];
150int cstack_head = 0;
151
0ef0f1de 152#if defined(_MSC_VER) /* Microsoft C Compiler ONLY */
153static __inline void
154#else
88bfe092 155static inline void
0ef0f1de 156#endif
88bfe092 157xprof_update(xprof_stats_data * head)
158{
88bfe092 159 if (head->delta < head->best)
26ac0430 160 head->best = head->delta;
88bfe092 161 if (head->worst < head->delta)
26ac0430 162 head->worst = head->delta;
88bfe092 163 head->summ += head->delta;
164 head->count++;
165}
166
167static xprof_stats_data *xp_UNACCOUNTED;
168static int xprof_inited = 0;
169
170static void
171xprof_InitLib(void)
172{
173 if (xprof_inited)
26ac0430 174 return;
88bfe092 175
25f98340 176 xprof_Timers = static_cast<TimersArray *>(calloc(XPROF_LAST + 2, sizeof(xprof_stats_node)));
88bfe092 177
178 xprof_Timers[XPROF_PROF_UNACCOUNTED]->name = "PROF_UNACCOUNTED";
179 xprof_Timers[XPROF_PROF_UNACCOUNTED]->accu.start = get_tick();
180 xp_UNACCOUNTED = &xprof_Timers[XPROF_PROF_UNACCOUNTED]->accu;
4fd8d08f 181 cstack_head = 0;
88bfe092 182 xprof_inited = 1;
183}
184
185void
186xprof_start(xprof_type type, const char *timer)
187{
4fd8d08f 188 hrtime_t tt = get_tick();
88bfe092 189 if (!xprof_inited)
26ac0430 190 xprof_InitLib();
88bfe092 191
4fd8d08f 192 /* If nested, stop current stack frame */
193 if (cstack_head > 0) {
26ac0430
AJ
194 cstack[cstack_head - 1].accum += get_tick() - cstack[cstack_head - 1].start;
195 cstack[cstack_head - 1].start = -1;
4fd8d08f 196 }
197
198 /* Are we at the first level? If so; stop the unaccounted timer */
199 if (cstack_head == 0) {
26ac0430
AJ
200 assert(xp_UNACCOUNTED->start != -1);
201 xp_UNACCOUNTED->delta = tt - xp_UNACCOUNTED->start;
202 xp_UNACCOUNTED->start = -1;
203 xprof_update(xp_UNACCOUNTED);
4fd8d08f 204 }
205
206 /* Allocate new stack frame */
207 cstack[cstack_head].start = tt;
208 cstack[cstack_head].stop = -1;
209 cstack[cstack_head].accum = 0;
210 cstack[cstack_head].timer = type;
211 cstack[cstack_head].name = timer;
212 cstack_head++;
213 assert(cstack_head < MAXSTACKDEPTH);
88bfe092 214
88bfe092 215}
216
217void
218xprof_stop(xprof_type type, const char *timer)
219{
4fd8d08f 220 hrtime_t tt = get_tick();
221 assert(cstack_head > 0);
222 cstack_head--;
223 assert(cstack[cstack_head].timer == type);
224
225 /* Record timer details */
226 cstack[cstack_head].accum += tt - cstack[cstack_head].start;
227 xprof_Timers[type]->accu.delta = cstack[cstack_head].accum;
228 xprof_Timers[type]->name = timer;
229
230 /* Update */
88bfe092 231 xprof_update(&xprof_Timers[type]->accu);
4fd8d08f 232
233 /* Restart previous timer if we're not at the top level */
234 if (cstack_head > 0) {
26ac0430
AJ
235 cstack[cstack_head - 1].start = tt;
236 cstack[cstack_head - 1].stop = 0;
237 return;
4fd8d08f 238 }
239 /* Get here? We're at the top level; unaccounted */
240 xp_UNACCOUNTED->start = tt;
88bfe092 241}
242
243#endif /* USE_XPROF_STATS */