]>
Commit | Line | Data |
---|---|---|
88bfe092 | 1 | /* |
4ac4a490 | 2 | * Copyright (C) 1996-2017 The Squid Software Foundation and contributors |
88bfe092 | 3 | * |
f0ba1a9b AJ |
4 | * Squid software is distributed under GPLv2+ license and includes |
5 | * contributions from numerous individuals and organizations. | |
6 | * Please see the COPYING and CONTRIBUTORS files for details. | |
88bfe092 | 7 | */ |
8 | ||
f0ba1a9b AJ |
9 | /* DEBUG: section 81 CPU Profiling Routines */ |
10 | ||
dc47f531 | 11 | /** |
88bfe092 | 12 | * CPU Profiling implementation. |
26ac0430 | 13 | * |
dc47f531 | 14 | * \par |
88bfe092 | 15 | * This library implements the Probes needed to gather stats. |
16 | * See src/ProfStats.c which implements historical recording and | |
17 | * presentation in CacheMgr.cgi. | |
26ac0430 | 18 | * |
dc47f531 | 19 | * \par |
88bfe092 | 20 | * For timing we prefer on-CPU ops that retrieve cpu ticks counter. |
21 | * For Intel, this is "rdtsc", which is 64-bit counter that virtually | |
22 | * never wraps. For alpha, this is "rpcc" which is 32-bit counter and | |
23 | * wraps every few seconds. Currently, no handling of wrapping counters | |
24 | * is implemented. Other CPU's are also not covered. Potentially all | |
25 | * modern CPU's has similar counters. | |
26ac0430 | 26 | * |
88bfe092 | 27 | * Usage. |
28 | * Insert macro PROF_state(probename) in strategic places in code. | |
29 | * PROF_start(probename); | |
30 | * ... section of code measured ... | |
31 | * PROF_stop(probename); | |
26ac0430 | 32 | * |
25f98340 | 33 | * probename must be added to the xprof_type.h enum list |
88bfe092 | 34 | * with prepended "XPROF_" string. |
35 | * | |
dc47f531 AJ |
36 | * \section description Description. |
37 | * \par PROF | |
38 | * gathers stats per probename into structures. It indexes these | |
88bfe092 | 39 | * structures by enum type index in an array. |
26ac0430 | 40 | * |
dc47f531 AJ |
41 | * \par PROF |
42 | * records best, best, average and worst values for delta time, | |
88bfe092 | 43 | * also, if UNACCED is defined, it measures "empty" time during which |
44 | * no probes are in measuring state. This allows to see time "unaccounted" | |
45 | * for. If OVERHEAD is defined, additional calculations are made at every | |
46 | * probe to measure approximate overhead of the probe code itself. | |
26ac0430 | 47 | * |
dc47f531 | 48 | * \par |
88bfe092 | 49 | * Probe data is stored in linked-list, so the more probes you define, |
50 | * the more overhead is added to find the deepest nested probe. To reduce | |
51 | * average overhead, linked list is manipulated each time PR_start is | |
52 | * called, so that probe just started is moved 1 position up in linkedlist. | |
53 | * This way frequently used probes are moved closer to the head of list, | |
54 | * reducing average overhead. | |
55 | * Note that all overhead is on the scale of one hundred of CPU clock | |
56 | * ticks, which on the scale of submicroseconds. Yet, to optimise really | |
57 | * fast and frequent sections of code, we want to reduce this overhead | |
58 | * to absolute minimum possible. | |
26ac0430 | 59 | * |
dc47f531 | 60 | * \par |
88bfe092 | 61 | * For actual measurements, probe overhead cancels out mostly. Still, |
62 | * do not take the measured times as facts, they should be viewed in | |
63 | * relative comparison to overall CPU time and on the same platform. | |
26ac0430 | 64 | * |
dc47f531 | 65 | * \par |
88bfe092 | 66 | * Every 1 second, Event within squid is called that parses gathered |
67 | * statistics of every probe, and accumulates that into historical | |
68 | * structures for last 1,5,30 secs, 1,5,30 mins, and 1,5 and 24 hours. | |
69 | * Each second active probe stats are reset, and only historical data | |
70 | * is presented in cachemgr output. | |
26ac0430 | 71 | * |
dc47f531 AJ |
72 | * \section reading Reading stats. |
73 | * \par | |
88bfe092 | 74 | * "Worst case" may be misleading. Anything can happen at any section |
75 | * of code that could delay reaching to probe stop. For eg. system may | |
76 | * need to service interrupt routine, task switch could occur, or page | |
77 | * fault needs to be handled. In this sense, this is quite meaningless | |
78 | * metric. "Best case" shows fastest completion of probe section, and | |
79 | * is also somewhat useless, unless you know that amount of work is | |
80 | * constant. Best metric to watch is "average time" and total cumulated | |
81 | * time in given timeframe, which really show percentage of time spent | |
82 | * in given section of code, and its average completion time. This data | |
83 | * could be used to detect bottlenecks withing squid and optimise them. | |
26ac0430 | 84 | * |
dc47f531 | 85 | * \par |
88bfe092 | 86 | * TOTALS are quite off reality. Its there just to summarise cumulative |
87 | * times and percent column. Percent values over 100% shows that there | |
88 | * have been some probes nested into each other. | |
26ac0430 | 89 | * |
88bfe092 | 90 | */ |
91 | ||
f7f3304a | 92 | #include "squid.h" |
25f98340 | 93 | #include "profiler/Profiler.h" |
cf8d9cd4 | 94 | |
32d002cb | 95 | #if USE_XPROF_STATS |
88bfe092 | 96 | |
074d6a40 | 97 | #include <cassert> |
88bfe092 | 98 | #if HAVE_GNUMALLLOC_H |
99 | #include <gnumalloc.h> | |
482aa790 | 100 | #elif HAVE_MALLOC_H |
88bfe092 | 101 | #include <malloc.h> |
102 | #endif | |
103 | #if HAVE_UNISTD_H | |
104 | #include <unistd.h> | |
105 | #endif | |
106 | ||
107 | /* Exported Data */ | |
108 | TimersArray *xprof_Timers = NULL; | |
88bfe092 | 109 | |
110 | /* Private stuff */ | |
111 | ||
4fd8d08f | 112 | /* new stuff */ |
f53969cc | 113 | #define MAXSTACKDEPTH 512 |
4fd8d08f | 114 | |
115 | struct _callstack_entry { | |
f53969cc | 116 | int timer; /* index into timers array */ |
26ac0430 AJ |
117 | const char *name; |
118 | hrtime_t start, stop, accum; | |
4fd8d08f | 119 | }; |
120 | ||
121 | struct _callstack_entry cstack[MAXSTACKDEPTH]; | |
122 | int cstack_head = 0; | |
123 | ||
0ef0f1de | 124 | #if defined(_MSC_VER) /* Microsoft C Compiler ONLY */ |
125 | static __inline void | |
126 | #else | |
88bfe092 | 127 | static inline void |
0ef0f1de | 128 | #endif |
88bfe092 | 129 | xprof_update(xprof_stats_data * head) |
130 | { | |
88bfe092 | 131 | if (head->delta < head->best) |
26ac0430 | 132 | head->best = head->delta; |
88bfe092 | 133 | if (head->worst < head->delta) |
26ac0430 | 134 | head->worst = head->delta; |
88bfe092 | 135 | head->summ += head->delta; |
742a021b | 136 | ++head->count; |
88bfe092 | 137 | } |
138 | ||
139 | static xprof_stats_data *xp_UNACCOUNTED; | |
140 | static int xprof_inited = 0; | |
141 | ||
142 | static void | |
143 | xprof_InitLib(void) | |
144 | { | |
145 | if (xprof_inited) | |
26ac0430 | 146 | return; |
88bfe092 | 147 | |
25f98340 | 148 | xprof_Timers = static_cast<TimersArray *>(calloc(XPROF_LAST + 2, sizeof(xprof_stats_node))); |
88bfe092 | 149 | |
150 | xprof_Timers[XPROF_PROF_UNACCOUNTED]->name = "PROF_UNACCOUNTED"; | |
151 | xprof_Timers[XPROF_PROF_UNACCOUNTED]->accu.start = get_tick(); | |
152 | xp_UNACCOUNTED = &xprof_Timers[XPROF_PROF_UNACCOUNTED]->accu; | |
4fd8d08f | 153 | cstack_head = 0; |
88bfe092 | 154 | xprof_inited = 1; |
155 | } | |
156 | ||
157 | void | |
158 | xprof_start(xprof_type type, const char *timer) | |
159 | { | |
4fd8d08f | 160 | hrtime_t tt = get_tick(); |
88bfe092 | 161 | if (!xprof_inited) |
26ac0430 | 162 | xprof_InitLib(); |
88bfe092 | 163 | |
4fd8d08f | 164 | /* If nested, stop current stack frame */ |
165 | if (cstack_head > 0) { | |
26ac0430 AJ |
166 | cstack[cstack_head - 1].accum += get_tick() - cstack[cstack_head - 1].start; |
167 | cstack[cstack_head - 1].start = -1; | |
4fd8d08f | 168 | } |
169 | ||
170 | /* Are we at the first level? If so; stop the unaccounted timer */ | |
171 | if (cstack_head == 0) { | |
26ac0430 AJ |
172 | assert(xp_UNACCOUNTED->start != -1); |
173 | xp_UNACCOUNTED->delta = tt - xp_UNACCOUNTED->start; | |
174 | xp_UNACCOUNTED->start = -1; | |
175 | xprof_update(xp_UNACCOUNTED); | |
4fd8d08f | 176 | } |
177 | ||
178 | /* Allocate new stack frame */ | |
179 | cstack[cstack_head].start = tt; | |
180 | cstack[cstack_head].stop = -1; | |
181 | cstack[cstack_head].accum = 0; | |
182 | cstack[cstack_head].timer = type; | |
183 | cstack[cstack_head].name = timer; | |
742a021b | 184 | ++cstack_head; |
4fd8d08f | 185 | assert(cstack_head < MAXSTACKDEPTH); |
88bfe092 | 186 | |
88bfe092 | 187 | } |
188 | ||
189 | void | |
190 | xprof_stop(xprof_type type, const char *timer) | |
191 | { | |
4fd8d08f | 192 | hrtime_t tt = get_tick(); |
193 | assert(cstack_head > 0); | |
742a021b | 194 | --cstack_head; |
4fd8d08f | 195 | assert(cstack[cstack_head].timer == type); |
196 | ||
197 | /* Record timer details */ | |
198 | cstack[cstack_head].accum += tt - cstack[cstack_head].start; | |
199 | xprof_Timers[type]->accu.delta = cstack[cstack_head].accum; | |
200 | xprof_Timers[type]->name = timer; | |
201 | ||
202 | /* Update */ | |
88bfe092 | 203 | xprof_update(&xprof_Timers[type]->accu); |
4fd8d08f | 204 | |
205 | /* Restart previous timer if we're not at the top level */ | |
206 | if (cstack_head > 0) { | |
26ac0430 AJ |
207 | cstack[cstack_head - 1].start = tt; |
208 | cstack[cstack_head - 1].stop = 0; | |
209 | return; | |
4fd8d08f | 210 | } |
211 | /* Get here? We're at the top level; unaccounted */ | |
212 | xp_UNACCOUNTED->start = tt; | |
88bfe092 | 213 | } |
214 | ||
215 | #endif /* USE_XPROF_STATS */ | |
f53969cc | 216 |