]> git.ipfire.org Git - thirdparty/squid.git/blob - lib/profiler/Profiler.cc
Cleanup: un-wrap C++ header includes
[thirdparty/squid.git] / lib / profiler / Profiler.cc
1 /*
2 * DEBUG: section 81 CPU Profiling Routines
3 * AUTHOR: Andres Kroonmaa, Sep.2000
4 *
5 * SQUID Internet Object Cache http://squid.nlanr.net/Squid/
6 * ----------------------------------------------------------
7 *
8 * Squid is the result of efforts by numerous individuals from the
9 * Internet community. Development is led by Duane Wessels of the
10 * National Laboratory for Applied Network Research and funded by the
11 * National Science Foundation. Squid is Copyrighted (C) 1998 by
12 * the Regents of the University of California. Please see the
13 * COPYRIGHT file for full details. Squid incorporates software
14 * developed and/or copyrighted by other sources. Please see the
15 * CREDITS file for full details.
16 *
17 * This program is free software; you can redistribute it and/or modify
18 * it under the terms of the GNU General Public License as published by
19 * the Free Software Foundation; either version 2 of the License, or
20 * (at your option) any later version.
21 *
22 * This program is distributed in the hope that it will be useful,
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 * GNU General Public License for more details.
26 *
27 * You should have received a copy of the GNU General Public License
28 * along with this program; if not, write to the Free Software
29 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
30 *
31 */
32
33 /**
34 * CPU Profiling implementation.
35 *
36 * \par
37 * This library implements the Probes needed to gather stats.
38 * See src/ProfStats.c which implements historical recording and
39 * presentation in CacheMgr.cgi.
40 *
41 * \par
42 * For timing we prefer on-CPU ops that retrieve cpu ticks counter.
43 * For Intel, this is "rdtsc", which is 64-bit counter that virtually
44 * never wraps. For alpha, this is "rpcc" which is 32-bit counter and
45 * wraps every few seconds. Currently, no handling of wrapping counters
46 * is implemented. Other CPU's are also not covered. Potentially all
47 * modern CPU's has similar counters.
48 *
49 * Usage.
50 * Insert macro PROF_state(probename) in strategic places in code.
51 * PROF_start(probename);
52 * ... section of code measured ...
53 * PROF_stop(probename);
54 *
55 * probename must be added to the xprof_type.h enum list
56 * with prepended "XPROF_" string.
57 *
58 * \section description Description.
59 * \par PROF
60 * gathers stats per probename into structures. It indexes these
61 * structures by enum type index in an array.
62 *
63 * \par PROF
64 * records best, best, average and worst values for delta time,
65 * also, if UNACCED is defined, it measures "empty" time during which
66 * no probes are in measuring state. This allows to see time "unaccounted"
67 * for. If OVERHEAD is defined, additional calculations are made at every
68 * probe to measure approximate overhead of the probe code itself.
69 *
70 * \par
71 * Probe data is stored in linked-list, so the more probes you define,
72 * the more overhead is added to find the deepest nested probe. To reduce
73 * average overhead, linked list is manipulated each time PR_start is
74 * called, so that probe just started is moved 1 position up in linkedlist.
75 * This way frequently used probes are moved closer to the head of list,
76 * reducing average overhead.
77 * Note that all overhead is on the scale of one hundred of CPU clock
78 * ticks, which on the scale of submicroseconds. Yet, to optimise really
79 * fast and frequent sections of code, we want to reduce this overhead
80 * to absolute minimum possible.
81 *
82 * \par
83 * For actual measurements, probe overhead cancels out mostly. Still,
84 * do not take the measured times as facts, they should be viewed in
85 * relative comparison to overall CPU time and on the same platform.
86 *
87 * \par
88 * Every 1 second, Event within squid is called that parses gathered
89 * statistics of every probe, and accumulates that into historical
90 * structures for last 1,5,30 secs, 1,5,30 mins, and 1,5 and 24 hours.
91 * Each second active probe stats are reset, and only historical data
92 * is presented in cachemgr output.
93 *
94 * \section reading Reading stats.
95 * \par
96 * "Worst case" may be misleading. Anything can happen at any section
97 * of code that could delay reaching to probe stop. For eg. system may
98 * need to service interrupt routine, task switch could occur, or page
99 * fault needs to be handled. In this sense, this is quite meaningless
100 * metric. "Best case" shows fastest completion of probe section, and
101 * is also somewhat useless, unless you know that amount of work is
102 * constant. Best metric to watch is "average time" and total cumulated
103 * time in given timeframe, which really show percentage of time spent
104 * in given section of code, and its average completion time. This data
105 * could be used to detect bottlenecks withing squid and optimise them.
106 *
107 * \par
108 * TOTALS are quite off reality. Its there just to summarise cumulative
109 * times and percent column. Percent values over 100% shows that there
110 * have been some probes nested into each other.
111 *
112 */
113
114 #include "squid.h"
115 #include "profiler/Profiler.h"
116
117 #if USE_XPROF_STATS
118
119 #include <cassert>
120 #if HAVE_GNUMALLLOC_H
121 #include <gnumalloc.h>
122 #elif HAVE_MALLOC_H
123 #include <malloc.h>
124 #endif
125 #if HAVE_UNISTD_H
126 #include <unistd.h>
127 #endif
128
129 /* Exported Data */
130 TimersArray *xprof_Timers = NULL;
131
132 /* Private stuff */
133
134 /* new stuff */
135 #define MAXSTACKDEPTH 512
136
137 struct _callstack_entry {
138 int timer; /* index into timers array */
139 const char *name;
140 hrtime_t start, stop, accum;
141 };
142
143 struct _callstack_entry cstack[MAXSTACKDEPTH];
144 int cstack_head = 0;
145
146 #if defined(_MSC_VER) /* Microsoft C Compiler ONLY */
147 static __inline void
148 #else
149 static inline void
150 #endif
151 xprof_update(xprof_stats_data * head)
152 {
153 if (head->delta < head->best)
154 head->best = head->delta;
155 if (head->worst < head->delta)
156 head->worst = head->delta;
157 head->summ += head->delta;
158 ++head->count;
159 }
160
161 static xprof_stats_data *xp_UNACCOUNTED;
162 static int xprof_inited = 0;
163
164 static void
165 xprof_InitLib(void)
166 {
167 if (xprof_inited)
168 return;
169
170 xprof_Timers = static_cast<TimersArray *>(calloc(XPROF_LAST + 2, sizeof(xprof_stats_node)));
171
172 xprof_Timers[XPROF_PROF_UNACCOUNTED]->name = "PROF_UNACCOUNTED";
173 xprof_Timers[XPROF_PROF_UNACCOUNTED]->accu.start = get_tick();
174 xp_UNACCOUNTED = &xprof_Timers[XPROF_PROF_UNACCOUNTED]->accu;
175 cstack_head = 0;
176 xprof_inited = 1;
177 }
178
179 void
180 xprof_start(xprof_type type, const char *timer)
181 {
182 hrtime_t tt = get_tick();
183 if (!xprof_inited)
184 xprof_InitLib();
185
186 /* If nested, stop current stack frame */
187 if (cstack_head > 0) {
188 cstack[cstack_head - 1].accum += get_tick() - cstack[cstack_head - 1].start;
189 cstack[cstack_head - 1].start = -1;
190 }
191
192 /* Are we at the first level? If so; stop the unaccounted timer */
193 if (cstack_head == 0) {
194 assert(xp_UNACCOUNTED->start != -1);
195 xp_UNACCOUNTED->delta = tt - xp_UNACCOUNTED->start;
196 xp_UNACCOUNTED->start = -1;
197 xprof_update(xp_UNACCOUNTED);
198 }
199
200 /* Allocate new stack frame */
201 cstack[cstack_head].start = tt;
202 cstack[cstack_head].stop = -1;
203 cstack[cstack_head].accum = 0;
204 cstack[cstack_head].timer = type;
205 cstack[cstack_head].name = timer;
206 ++cstack_head;
207 assert(cstack_head < MAXSTACKDEPTH);
208
209 }
210
211 void
212 xprof_stop(xprof_type type, const char *timer)
213 {
214 hrtime_t tt = get_tick();
215 assert(cstack_head > 0);
216 --cstack_head;
217 assert(cstack[cstack_head].timer == type);
218
219 /* Record timer details */
220 cstack[cstack_head].accum += tt - cstack[cstack_head].start;
221 xprof_Timers[type]->accu.delta = cstack[cstack_head].accum;
222 xprof_Timers[type]->name = timer;
223
224 /* Update */
225 xprof_update(&xprof_Timers[type]->accu);
226
227 /* Restart previous timer if we're not at the top level */
228 if (cstack_head > 0) {
229 cstack[cstack_head - 1].start = tt;
230 cstack[cstack_head - 1].stop = 0;
231 return;
232 }
233 /* Get here? We're at the top level; unaccounted */
234 xp_UNACCOUNTED->start = tt;
235 }
236
237 #endif /* USE_XPROF_STATS */