From d2d3348acbaca96a2e5254eed3daa2feb7f724c8 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Thu, 25 Apr 2019 17:09:07 +0200 Subject: [PATCH] MINOR: activity: enable automatic profiling turn on/off Instead of having to manually turn task profiling on/off in the configuration, by default it will work in "auto" mode, which automatically turns on on any thread experiencing sustained loop latencies over one millisecond averaged over the last 1024 samples. This may happen with configs using lots of regex (thing map_reg for example, which is the lazy way to convert Apache's rewrite rules but must not be abused), and such high latencies affect all the process and the problem is most often intermittent (e.g. hitting a map which is only used for certain host names). Thus now by default, with profiling set to "auto", it remains off all the time until something bad happens. This also helps better focus on the issues when looking at the logs as well as in "show sess" output. It automatically turns off when the average loop latency over the last 1024 calls goes below 990 microseconds (which typically takes a while when in idle). This patch could be backported to stable versions after a bit more exposure, as it definitely improves observability and the ability to quickly spot the culprit. In this case, previous patch ("MINOR: activity: make the profiling status per thread and not global") must also be taken. --- doc/configuration.txt | 21 +++++++++++++---- doc/management.txt | 2 +- include/proto/activity.h | 30 ++++++++++++++++++++---- src/activity.c | 49 +++++++++++++++++++++++++++------------- src/proto_http.c | 2 +- 5 files changed, 76 insertions(+), 28 deletions(-) diff --git a/doc/configuration.txt b/doc/configuration.txt index 832373785d..205f5de902 100644 --- a/doc/configuration.txt +++ b/doc/configuration.txt @@ -1462,11 +1462,22 @@ noreuseport Disables the use of SO_REUSEPORT - see socket(7). It is equivalent to the command line argument "-dR". -profiling.tasks { on | off } - Enables ('on') or disables ('off') per-task CPU profiling. CPU profiling per - task can be very convenient to report where the time is spent and which - requests have what effect on which other request. It is not enabled by - default as it may consume a little bit extra CPU. This requires a system +profiling.tasks { auto | on | off } + Enables ('on') or disables ('off') per-task CPU profiling. When set to 'auto' + the profiling automatically turns on a thread when it starts to suffer from + an average latency of 1000 microseconds or higher as reported in the + "avg_loop_us" activity field, and automatically turns off when the latency + reutnrs below 990 microseconds (this value is an average over the last 1024 + loops so it does not vary quickly and tends to significantly smooth short + spikes). It may also spontaneously trigger from time to time on overloaded + systems, containers, or virtual machines, or when the system swaps (which + must absolutely never happen on a load balancer). + + CPU profiling per task can be very convenient to report where the time is + spent and which requests have what effect on which other request. Enabling + it will typically affect the overall's performance by less than 1%, thus it + is recommended to leave it to the default 'auto' value so that it only + operates when a problem is identified. This feature requires a system supporting the clock_gettime(2) syscall with clock identifiers CLOCK_MONOTONIC and CLOCK_THREAD_CPUTIME_ID, otherwise the reported time will be zero. This option may be changed at run time using "set profiling" on the diff --git a/doc/management.txt b/doc/management.txt index 952a25f3df..0d3944a5e4 100644 --- a/doc/management.txt +++ b/doc/management.txt @@ -1667,7 +1667,7 @@ set maxconn global delayed until the threshold is reached. A value of zero restores the initial setting. -set profiling { tasks } { on | off } +set profiling { tasks } { auto | on | off } Enables or disables CPU profiling for the indicated subsystem. This is equivalent to setting or clearing the "profiling" settings in the "global" section of the configuration file. Please also see "show profiling". diff --git a/include/proto/activity.h b/include/proto/activity.h index 922f493924..dd783448fb 100644 --- a/include/proto/activity.h +++ b/include/proto/activity.h @@ -29,7 +29,10 @@ #include /* bit fields for "profiling" */ -#define HA_PROF_TASKS 0x00000001 /* enable per-task CPU profiling */ +#define HA_PROF_TASKS_OFF 0x00000000 /* per-task CPU profiling forced disabled */ +#define HA_PROF_TASKS_AUTO 0x00000001 /* per-task CPU profiling automatic */ +#define HA_PROF_TASKS_ON 0x00000002 /* per-task CPU profiling forced enabled */ +#define HA_PROF_TASKS_MASK 0x00000003 /* per-task CPU profiling mask */ extern unsigned int profiling; extern unsigned long task_profiling_mask; @@ -49,6 +52,13 @@ static inline void activity_count_runtime() uint64_t new_cpu_time; int64_t stolen; uint32_t run_time; + uint32_t up, down; + + /* 1 millisecond per loop on average over last 1024 iterations is + * enough to turn on profiling. + */ + up = 1000; + down = up * 99 / 100; new_cpu_time = now_cpu_time(); new_mono_time = now_mono_time(); @@ -69,12 +79,22 @@ static inline void activity_count_runtime() run_time = (before_poll.tv_sec - after_poll.tv_sec) * 1000000U + (before_poll.tv_usec - after_poll.tv_usec); swrate_add(&activity[tid].avg_loop_us, TIME_STATS_SAMPLES, run_time); + /* reaching the "up" threshold on average switches profiling to "on" + * when automatic, and going back below the "down" threshold switches + * to off. + */ if (!(task_profiling_mask & tid_bit)) { - if (unlikely(profiling & HA_PROF_TASKS)) - _HA_ATOMIC_OR(&task_profiling_mask, tid_bit); + if (unlikely((profiling & HA_PROF_TASKS_MASK) == HA_PROF_TASKS_ON || + ((profiling & HA_PROF_TASKS_MASK) == HA_PROF_TASKS_AUTO && run_time >= up))) { + if (swrate_avg(activity[tid].avg_loop_us, TIME_STATS_SAMPLES) >= up) + _HA_ATOMIC_OR(&task_profiling_mask, tid_bit); + } } else { - if (unlikely(!(profiling & HA_PROF_TASKS))) - _HA_ATOMIC_AND(&task_profiling_mask, ~tid_bit); + if (unlikely((profiling & HA_PROF_TASKS_MASK) == HA_PROF_TASKS_OFF || + ((profiling & HA_PROF_TASKS_MASK) == HA_PROF_TASKS_AUTO && run_time <= down))) { + if (swrate_avg(activity[tid].avg_loop_us, TIME_STATS_SAMPLES) <= down) + _HA_ATOMIC_AND(&task_profiling_mask, ~tid_bit); + } } } diff --git a/src/activity.c b/src/activity.c index eac2c10c34..5ffa6623d5 100644 --- a/src/activity.c +++ b/src/activity.c @@ -23,7 +23,7 @@ /* bit field of profiling options. Beware, may be modified at runtime! */ -unsigned int profiling = 0; +unsigned int profiling = HA_PROF_TASKS_AUTO; unsigned long task_profiling_mask = 0; /* One struct per thread containing all collected measurements */ @@ -49,11 +49,13 @@ static int cfg_parse_prof_tasks(char **args, int section_type, struct proxy *cur return -1; if (strcmp(args[1], "on") == 0) - profiling |= HA_PROF_TASKS; + profiling = (profiling & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_ON; + else if (strcmp(args[1], "auto") == 0) + profiling = (profiling & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_AUTO; else if (strcmp(args[1], "off") == 0) - profiling &= ~HA_PROF_TASKS; + profiling = (profiling & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_OFF; else { - memprintf(err, "'%s' expects either 'on' or 'off' but got '%s'.", args[0], args[1]); + memprintf(err, "'%s' expects either 'on', 'auto', or 'off' but got '%s'.", args[0], args[1]); return -1; } return 0; @@ -62,27 +64,34 @@ static int cfg_parse_prof_tasks(char **args, int section_type, struct proxy *cur /* parse a "set profiling" command. It always returns 1. */ static int cli_parse_set_profiling(char **args, char *payload, struct appctx *appctx, void *private) { - unsigned int bit = 0; - if (!cli_has_level(appctx, ACCESS_LVL_ADMIN)) return 1; - if (strcmp(args[2], "tasks") == 0) - bit = HA_PROF_TASKS; - else { + if (strcmp(args[2], "tasks") != 0) { appctx->ctx.cli.severity = LOG_ERR; appctx->ctx.cli.msg = "Expects 'tasks'.\n"; appctx->st0 = CLI_ST_PRINT; return 1; } - if (strcmp(args[3], "on") == 0) - _HA_ATOMIC_OR(&profiling, bit); - else if (strcmp(args[3], "off") == 0) - _HA_ATOMIC_AND(&profiling, ~bit); + if (strcmp(args[3], "on") == 0) { + unsigned int old = profiling; + while (!_HA_ATOMIC_CAS(&profiling, &old, (old & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_ON)) + ; + } + else if (strcmp(args[3], "auto") == 0) { + unsigned int old = profiling; + while (!_HA_ATOMIC_CAS(&profiling, &old, (old & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_AUTO)) + ; + } + else if (strcmp(args[3], "off") == 0) { + unsigned int old = profiling; + while (!_HA_ATOMIC_CAS(&profiling, &old, (old & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_OFF)) + ; + } else { appctx->ctx.cli.severity = LOG_ERR; - appctx->ctx.cli.msg = "Expects either 'on' or 'off'.\n"; + appctx->ctx.cli.msg = "Expects 'on', 'auto', or 'off'.\n"; appctx->st0 = CLI_ST_PRINT; return 1; } @@ -95,14 +104,22 @@ static int cli_parse_set_profiling(char **args, char *payload, struct appctx *ap static int cli_io_handler_show_profiling(struct appctx *appctx) { struct stream_interface *si = appctx->owner; + const char *str; if (unlikely(si_ic(si)->flags & (CF_WRITE_ERROR|CF_SHUTW))) return 1; chunk_reset(&trash); - chunk_printf(&trash, "Per-task CPU profiling : %s # set profiling tasks {on|off}\n", - (profiling & HA_PROF_TASKS) ? "on" : "off"); + switch (profiling & HA_PROF_TASKS_MASK) { + case HA_PROF_TASKS_AUTO: str="auto"; break; + case HA_PROF_TASKS_ON: str="on"; break; + default: str="off"; break; + } + + chunk_printf(&trash, + "Per-task CPU profiling : %s # set profiling tasks {on|auto|off}\n", + str); if (ci_putchk(si_ic(si), &trash) == -1) { /* failed, try again */ diff --git a/src/proto_http.c b/src/proto_http.c index 83293241dd..82292c11ba 100644 --- a/src/proto_http.c +++ b/src/proto_http.c @@ -3449,7 +3449,7 @@ void http_end_txn_clean_session(struct stream *s) s->task->calls = 0; s->task->cpu_time = 0; s->task->lat_time = 0; - s->task->call_date = (profiling & HA_PROF_TASKS) ? now_mono_time() : 0; + s->task->call_date = ((profiling & HA_PROF_TASKS_MASK) >= HA_PROF_TASKS_AUTO) ? now_mono_time() : 0; s->logs.accept_date = date; /* user-visible date for logging */ s->logs.tv_accept = now; /* corrected date for internal use */ -- 2.39.5