]> git.ipfire.org Git - thirdparty/squid.git/blame - lib/Profiler.c
Updates for running on squid-cache.org
[thirdparty/squid.git] / lib / Profiler.c
CommitLineData
88bfe092 1
2/*
507d0a78 3 * $Id: Profiler.c,v 1.7 2007/04/25 11:30:16 adrian Exp $
88bfe092 4 *
507d0a78 5 * DEBUG: section 81 CPU Profiling Routines
88bfe092 6 * AUTHOR: Andres Kroonmaa, Sep.2000
7 *
8 * SQUID Internet Object Cache http://squid.nlanr.net/Squid/
9 * ----------------------------------------------------------
10 *
11 * Squid is the result of efforts by numerous individuals from the
12 * Internet community. Development is led by Duane Wessels of the
13 * National Laboratory for Applied Network Research and funded by the
14 * National Science Foundation. Squid is Copyrighted (C) 1998 by
15 * the Regents of the University of California. Please see the
16 * COPYRIGHT file for full details. Squid incorporates software
17 * developed and/or copyrighted by other sources. Please see the
18 * CREDITS file for full details.
19 *
20 * This program is free software; you can redistribute it and/or modify
21 * it under the terms of the GNU General Public License as published by
22 * the Free Software Foundation; either version 2 of the License, or
23 * (at your option) any later version.
24 *
25 * This program is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 * GNU General Public License for more details.
29 *
30 * You should have received a copy of the GNU General Public License
31 * along with this program; if not, write to the Free Software
32 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
33 *
34 */
35
36/*
37 * CPU Profiling implementation.
38 *
39 * This library implements the Probes needed to gather stats.
40 * See src/ProfStats.c which implements historical recording and
41 * presentation in CacheMgr.cgi.
42 *
43 * For timing we prefer on-CPU ops that retrieve cpu ticks counter.
44 * For Intel, this is "rdtsc", which is 64-bit counter that virtually
45 * never wraps. For alpha, this is "rpcc" which is 32-bit counter and
46 * wraps every few seconds. Currently, no handling of wrapping counters
47 * is implemented. Other CPU's are also not covered. Potentially all
48 * modern CPU's has similar counters.
49 *
50 * Usage.
51 * Insert macro PROF_state(probename) in strategic places in code.
52 * PROF_start(probename);
53 * ... section of code measured ...
54 * PROF_stop(probename);
55 *
56 * probename must be added to profiling.h into xprof_type enum list
57 * with prepended "XPROF_" string.
58 *
59 * Description.
60 * PROF gathers stats per probename into structures. It indexes these
61 * structures by enum type index in an array.
62 *
63 * PROF records best, best, average and worst values for delta time,
64 * also, if UNACCED is defined, it measures "empty" time during which
65 * no probes are in measuring state. This allows to see time "unaccounted"
66 * for. If OVERHEAD is defined, additional calculations are made at every
67 * probe to measure approximate overhead of the probe code itself.
68 *
69 * Probe data is stored in linked-list, so the more probes you define,
70 * the more overhead is added to find the deepest nested probe. To reduce
71 * average overhead, linked list is manipulated each time PR_start is
72 * called, so that probe just started is moved 1 position up in linkedlist.
73 * This way frequently used probes are moved closer to the head of list,
74 * reducing average overhead.
75 * Note that all overhead is on the scale of one hundred of CPU clock
76 * ticks, which on the scale of submicroseconds. Yet, to optimise really
77 * fast and frequent sections of code, we want to reduce this overhead
78 * to absolute minimum possible.
79 *
80 * For actual measurements, probe overhead cancels out mostly. Still,
81 * do not take the measured times as facts, they should be viewed in
82 * relative comparison to overall CPU time and on the same platform.
83 *
84 * Every 1 second, Event within squid is called that parses gathered
85 * statistics of every probe, and accumulates that into historical
86 * structures for last 1,5,30 secs, 1,5,30 mins, and 1,5 and 24 hours.
87 * Each second active probe stats are reset, and only historical data
88 * is presented in cachemgr output.
89 *
90 * Reading stats.
91 * "Worst case" may be misleading. Anything can happen at any section
92 * of code that could delay reaching to probe stop. For eg. system may
93 * need to service interrupt routine, task switch could occur, or page
94 * fault needs to be handled. In this sense, this is quite meaningless
95 * metric. "Best case" shows fastest completion of probe section, and
96 * is also somewhat useless, unless you know that amount of work is
97 * constant. Best metric to watch is "average time" and total cumulated
98 * time in given timeframe, which really show percentage of time spent
99 * in given section of code, and its average completion time. This data
100 * could be used to detect bottlenecks withing squid and optimise them.
101 *
102 * TOTALS are quite off reality. Its there just to summarise cumulative
103 * times and percent column. Percent values over 100% shows that there
104 * have been some probes nested into each other.
105 *
106 */
107
108#include "profiling.h"
109
4fd8d08f 110#include <assert.h>
111
88bfe092 112#ifdef USE_XPROF_STATS
113
114
115#if HAVE_GNUMALLLOC_H
116#include <gnumalloc.h>
482aa790 117#elif HAVE_MALLOC_H
88bfe092 118#include <malloc.h>
119#endif
120#if HAVE_UNISTD_H
121#include <unistd.h>
122#endif
123
124/* Exported Data */
125TimersArray *xprof_Timers = NULL;
88bfe092 126
127/* Private stuff */
128
4fd8d08f 129
130/* new stuff */
131#define MAXSTACKDEPTH 512
132
133struct _callstack_entry {
134 int timer; /* index into timers array */
135 const char *name;
136 hrtime_t start, stop, accum;
137};
138
139struct _callstack_entry cstack[MAXSTACKDEPTH];
140int cstack_head = 0;
141
0ef0f1de 142#if defined(_MSC_VER) /* Microsoft C Compiler ONLY */
143static __inline void
144#else
88bfe092 145static inline void
0ef0f1de 146#endif
88bfe092 147xprof_update(xprof_stats_data * head)
148{
88bfe092 149 if (head->delta < head->best)
150 head->best = head->delta;
151 if (head->worst < head->delta)
152 head->worst = head->delta;
153 head->summ += head->delta;
154 head->count++;
155}
156
157static xprof_stats_data *xp_UNACCOUNTED;
158static int xprof_inited = 0;
159
160static void
161xprof_InitLib(void)
162{
163 if (xprof_inited)
164 return;
165
166 xprof_Timers = calloc(XPROF_LAST + 2, sizeof(xprof_stats_node));
167
168 xprof_Timers[XPROF_PROF_UNACCOUNTED]->name = "PROF_UNACCOUNTED";
169 xprof_Timers[XPROF_PROF_UNACCOUNTED]->accu.start = get_tick();
170 xp_UNACCOUNTED = &xprof_Timers[XPROF_PROF_UNACCOUNTED]->accu;
4fd8d08f 171 cstack_head = 0;
88bfe092 172 xprof_inited = 1;
173}
174
175void
176xprof_start(xprof_type type, const char *timer)
177{
4fd8d08f 178 hrtime_t tt = get_tick();
88bfe092 179 if (!xprof_inited)
180 xprof_InitLib();
181
4fd8d08f 182 /* If nested, stop current stack frame */
183 if (cstack_head > 0) {
184 cstack[cstack_head - 1].accum += get_tick() - cstack[cstack_head - 1].start;
185 cstack[cstack_head - 1].start = -1;
186 }
187
188 /* Are we at the first level? If so; stop the unaccounted timer */
189 if (cstack_head == 0) {
190 assert(xp_UNACCOUNTED->start != -1);
191 xp_UNACCOUNTED->delta = tt - xp_UNACCOUNTED->start;
192 xp_UNACCOUNTED->start = -1;
193 xprof_update(xp_UNACCOUNTED);
194 }
195
196 /* Allocate new stack frame */
197 cstack[cstack_head].start = tt;
198 cstack[cstack_head].stop = -1;
199 cstack[cstack_head].accum = 0;
200 cstack[cstack_head].timer = type;
201 cstack[cstack_head].name = timer;
202 cstack_head++;
203 assert(cstack_head < MAXSTACKDEPTH);
88bfe092 204
88bfe092 205}
206
207void
208xprof_stop(xprof_type type, const char *timer)
209{
4fd8d08f 210 hrtime_t tt = get_tick();
211 assert(cstack_head > 0);
212 cstack_head--;
213 assert(cstack[cstack_head].timer == type);
214
215 /* Record timer details */
216 cstack[cstack_head].accum += tt - cstack[cstack_head].start;
217 xprof_Timers[type]->accu.delta = cstack[cstack_head].accum;
218 xprof_Timers[type]->name = timer;
219
220 /* Update */
88bfe092 221 xprof_update(&xprof_Timers[type]->accu);
4fd8d08f 222
223 /* Restart previous timer if we're not at the top level */
224 if (cstack_head > 0) {
225 cstack[cstack_head - 1].start = tt;
226 cstack[cstack_head - 1].stop = 0;
88bfe092 227 return;
4fd8d08f 228 }
229 /* Get here? We're at the top level; unaccounted */
230 xp_UNACCOUNTED->start = tt;
88bfe092 231}
232
233#endif /* USE_XPROF_STATS */