From: Willy Tarreau Date: Sat, 20 Jan 2018 18:30:13 +0000 (+0100) Subject: MINOR: global: add some global activity counters to help debugging X-Git-Tag: v1.9-dev1~508 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=d80cb4ee1386cb5853170371d11e41284739e9d4;p=thirdparty%2Fhaproxy.git MINOR: global: add some global activity counters to help debugging A number of counters have been added at special places helping better understanding certain bug reports. These counters are maintained per thread and are shown using "show activity" on the CLI. The "clear counters" commands also reset these counters. The output is sent as a single write(), which currently produces up to about 7 kB of data for 64 threads. If more counters are added, it may be necessary to write into multiple buffers, or to reset the counters. To backport to 1.8 to help collect more detailed bug reports. --- diff --git a/doc/management.txt b/doc/management.txt index 3240505b4b..af216521a2 100644 --- a/doc/management.txt +++ b/doc/management.txt @@ -1321,7 +1321,8 @@ add map clear counters Clear the max values of the statistics counters in each proxy (frontend & - backend) and in each server. The accumulated counters are not affected. This + backend) and in each server. The accumulated counters are not affected. The + internal activity counters reported by "show activity" are also reset. This can be used to get clean counters after an incident, without having to restart nor to clear traffic counters. This command is restricted and can only be issued on sockets configured for levels "operator" or "admin". @@ -1861,6 +1862,18 @@ show fd [] that the output format may evolve over time so this output must not be parsed by tools designed to be durable. +show activity + Reports some counters about internal events that will help developers and + more generally people who know haproxy well enough to narrow down the causes + of reports of abnormal behaviours. A typical example would be a properly + running process never sleeping and eating 100% of the CPU. The output fields + will be made of one line per metric, and per-thread counters on the same + line. These counters are 32-bit and will wrap during the process' life, which + is not a problem since calls to this command will typically be performed + twice. The fields are purposely not documented so that their exact meaning is + verified in the code where the counters are fed. These values are also reset + by the "clear counters" command. + show info [typed|json] Dump info about haproxy status on current process. If "typed" is passed as an optional argument, field numbers, names and types are emitted as well so that diff --git a/include/types/global.h b/include/types/global.h index 5c5cf732e0..bd7761cd6f 100644 --- a/include/types/global.h +++ b/include/types/global.h @@ -173,7 +173,34 @@ struct global { #endif }; +/* per-thread activity reports. It's important that it's aligned on cache lines + * because some elements will be updated very often. Most counters are OK on + * 32-bit since this will be used during debugging sessions for troubleshooting + * in iterative mode. + */ +struct activity { + unsigned int loops; // complete loops in run_poll_loop() + unsigned int wake_cache; // active fd_cache prevented poll() from sleeping + unsigned int wake_tasks; // active tasks prevented poll() from sleeping + unsigned int wake_applets; // active applets prevented poll() from sleeping + unsigned int wake_signal; // pending signal prevented poll() from sleeping + unsigned int poll_exp; // number of times poll() sees an expired timeout (includes wake_*) + unsigned int poll_drop; // poller dropped a dead FD from the update list + unsigned int poll_dead; // poller woke up with a dead FD + unsigned int poll_skip; // poller skipped another thread's FD + unsigned int fd_skip; // fd cache skipped another thread's FD + unsigned int fd_lock; // fd cache skipped a locked FD + unsigned int fd_del; // fd cache detected a deleted FD + unsigned int conn_dead; // conn_fd_handler woke up on an FD indicating a dead connection + unsigned int stream; // calls to process_stream() + unsigned int empty_rq; // calls to process_runnable_tasks() with nothing for the thread + unsigned int long_rq; // process_runnable_tasks() left with tasks in the run queue + char __pad[0]; // unused except to check remaining room + char __end[0] __attribute__((aligned(64))); // align size to 64. +}; + extern struct global global; +extern struct activity activity[MAX_THREADS]; extern int pid; /* current process id */ extern int relative_pid; /* process id starting at 1 */ extern unsigned long pid_bit; /* bit corresponding to the process id */ diff --git a/src/cli.c b/src/cli.c index 149ecd7c8b..3e62c311bd 100644 --- a/src/cli.c +++ b/src/cli.c @@ -855,6 +855,53 @@ static int cli_io_handler_show_fd(struct appctx *appctx) return 1; } +/* This function dumps some activity counters used by developers and support to + * rule out some hypothesis during bug reports. It returns 0 if the output + * buffer is full and it needs to be called again, otherwise non-zero. It dumps + * everything at once in the buffer and is not designed to do it in multiple + * passes. + */ +static int cli_io_handler_show_activity(struct appctx *appctx) +{ + struct stream_interface *si = appctx->owner; + int thr; + + if (unlikely(si_ic(si)->flags & (CF_WRITE_ERROR|CF_SHUTW))) + return 1; + + chunk_reset(&trash); + + chunk_appendf(&trash, "thread_id: %u", tid); + chunk_appendf(&trash, "\ndate_now: %lu.%06lu", (long)now.tv_sec, (long)now.tv_usec); + chunk_appendf(&trash, "\nloops:"); for (thr = 0; thr < global.nbthread; thr++) chunk_appendf(&trash, " %u", activity[thr].loops); + chunk_appendf(&trash, "\nwake_cache:"); for (thr = 0; thr < global.nbthread; thr++) chunk_appendf(&trash, " %u", activity[thr].wake_cache); + chunk_appendf(&trash, "\nwake_tasks:"); for (thr = 0; thr < global.nbthread; thr++) chunk_appendf(&trash, " %u", activity[thr].wake_tasks); + chunk_appendf(&trash, "\nwake_applets:"); for (thr = 0; thr < global.nbthread; thr++) chunk_appendf(&trash, " %u", activity[thr].wake_applets); + chunk_appendf(&trash, "\nwake_signal:"); for (thr = 0; thr < global.nbthread; thr++) chunk_appendf(&trash, " %u", activity[thr].wake_signal); + chunk_appendf(&trash, "\npoll_exp:"); for (thr = 0; thr < global.nbthread; thr++) chunk_appendf(&trash, " %u", activity[thr].poll_exp); + chunk_appendf(&trash, "\npoll_drop:"); for (thr = 0; thr < global.nbthread; thr++) chunk_appendf(&trash, " %u", activity[thr].poll_drop); + chunk_appendf(&trash, "\npoll_dead:"); for (thr = 0; thr < global.nbthread; thr++) chunk_appendf(&trash, " %u", activity[thr].poll_dead); + chunk_appendf(&trash, "\npoll_skip:"); for (thr = 0; thr < global.nbthread; thr++) chunk_appendf(&trash, " %u", activity[thr].poll_skip); + chunk_appendf(&trash, "\nfd_skip:"); for (thr = 0; thr < global.nbthread; thr++) chunk_appendf(&trash, " %u", activity[thr].fd_skip); + chunk_appendf(&trash, "\nfd_lock:"); for (thr = 0; thr < global.nbthread; thr++) chunk_appendf(&trash, " %u", activity[thr].fd_lock); + chunk_appendf(&trash, "\nfd_del:"); for (thr = 0; thr < global.nbthread; thr++) chunk_appendf(&trash, " %u", activity[thr].fd_del); + chunk_appendf(&trash, "\nconn_dead:"); for (thr = 0; thr < global.nbthread; thr++) chunk_appendf(&trash, " %u", activity[thr].conn_dead); + chunk_appendf(&trash, "\nstream:"); for (thr = 0; thr < global.nbthread; thr++) chunk_appendf(&trash, " %u", activity[thr].stream); + chunk_appendf(&trash, "\nempty_rq:"); for (thr = 0; thr < global.nbthread; thr++) chunk_appendf(&trash, " %u", activity[thr].empty_rq); + chunk_appendf(&trash, "\nlong_rq:"); for (thr = 0; thr < global.nbthread; thr++) chunk_appendf(&trash, " %u", activity[thr].long_rq); + + chunk_appendf(&trash, "\n"); + + if (ci_putchk(si_ic(si), &trash) == -1) { + chunk_reset(&trash); + chunk_printf(&trash, "[output too large, cannot dump]\n"); + si_applet_cant_put(si); + } + + /* dump complete */ + return 1; +} + /* * CLI IO handler for `show cli sockets`. * Uses ctx.cli.p0 to store the restart pointer. @@ -1428,6 +1475,7 @@ static struct cli_kw_list cli_kws = {{ },{ { { "show", "env", NULL }, "show env [var] : dump environment variables known to the process", cli_parse_show_env, cli_io_handler_show_env, NULL }, { { "show", "cli", "sockets", NULL }, "show cli sockets : dump list of cli sockets", cli_parse_default, cli_io_handler_show_cli_sock, NULL }, { { "show", "fd", NULL }, "show fd [num] : dump list of file descriptors in use", cli_parse_show_fd, cli_io_handler_show_fd, NULL }, + { { "show", "activity", NULL }, "show activity : show per-thread activity stats (for support/developers)", cli_parse_default, cli_io_handler_show_activity, NULL }, { { "_getsocks", NULL }, NULL, _getsocks, NULL }, {{},} }}; diff --git a/src/connection.c b/src/connection.c index 0f8acb02db..48d7a64de3 100644 --- a/src/connection.c +++ b/src/connection.c @@ -63,8 +63,10 @@ void conn_fd_handler(int fd) struct connection *conn = fdtab[fd].owner; unsigned int flags; - if (unlikely(!conn)) + if (unlikely(!conn)) { + activity[tid].conn_dead++; return; + } conn_refresh_polling_flags(conn); conn->flags |= CO_FL_WILL_UPDATE; diff --git a/src/ev_epoll.c b/src/ev_epoll.c index 602a243d7e..679dfee4d3 100644 --- a/src/ev_epoll.c +++ b/src/ev_epoll.c @@ -68,8 +68,10 @@ REGPRM2 static void _do_poll(struct poller *p, int exp) for (updt_idx = 0; updt_idx < fd_nbupdt; updt_idx++) { fd = fd_updt[updt_idx]; - if (!fdtab[fd].owner) + if (!fdtab[fd].owner) { + activity[tid].poll_drop++; continue; + } HA_SPIN_LOCK(FD_LOCK, &fdtab[fd].lock); fdtab[fd].updated = 0; @@ -114,8 +116,10 @@ REGPRM2 static void _do_poll(struct poller *p, int exp) /* compute the epoll_wait() timeout */ if (!exp) wait_time = MAX_DELAY_MS; - else if (tick_is_expired(exp, now_ms)) + else if (tick_is_expired(exp, now_ms)) { + activity[tid].poll_exp++; wait_time = 0; + } else { wait_time = TICKS_TO_MS(tick_remain(now_ms, exp)) + 1; if (wait_time > MAX_DELAY_MS) @@ -136,8 +140,15 @@ REGPRM2 static void _do_poll(struct poller *p, int exp) unsigned int e = epoll_events[count].events; fd = epoll_events[count].data.fd; - if (!fdtab[fd].owner || !(fdtab[fd].thread_mask & tid_bit)) + if (!fdtab[fd].owner) { + activity[tid].poll_dead++; + continue; + } + + if (!(fdtab[fd].thread_mask & tid_bit)) { + activity[tid].poll_skip++; continue; + } /* it looks complicated but gcc can optimize it away when constants * have same values... In fact it depends on gcc :-( diff --git a/src/ev_kqueue.c b/src/ev_kqueue.c index b42ee3dd0e..69d51b6b05 100644 --- a/src/ev_kqueue.c +++ b/src/ev_kqueue.c @@ -47,8 +47,10 @@ REGPRM2 static void _do_poll(struct poller *p, int exp) for (updt_idx = 0; updt_idx < fd_nbupdt; updt_idx++) { fd = fd_updt[updt_idx]; - if (!fdtab[fd].owner) + if (!fdtab[fd].owner) { + activity[tid].poll_drop++; continue; + } HA_SPIN_LOCK(FD_LOCK, &fdtab[fd].lock); fdtab[fd].updated = 0; @@ -106,6 +108,8 @@ REGPRM2 static void _do_poll(struct poller *p, int exp) timeout.tv_sec = (delta_ms / 1000); timeout.tv_nsec = (delta_ms % 1000) * 1000000; } + else + activity[tid].poll_exp++; fd = MIN(maxfd, global.tune.maxpollevents); gettimeofday(&before_poll, NULL); @@ -122,8 +126,15 @@ REGPRM2 static void _do_poll(struct poller *p, int exp) unsigned int n = 0; fd = kev[count].ident; - if (!fdtab[fd].owner || !(fdtab[fd].thread_mask & tid_bit)) + if (!fdtab[fd].owner) { + activity[tid].poll_dead++; continue; + } + + if (!(fdtab[fd].thread_mask & tid_bit)) { + activity[tid].poll_skip++; + continue; + } if (kev[count].filter == EVFILT_READ) { if (kev[count].data) diff --git a/src/ev_poll.c b/src/ev_poll.c index 610509bd6f..efd56ee191 100644 --- a/src/ev_poll.c +++ b/src/ev_poll.c @@ -73,8 +73,10 @@ REGPRM2 static void _do_poll(struct poller *p, int exp) for (updt_idx = 0; updt_idx < fd_nbupdt; updt_idx++) { fd = fd_updt[updt_idx]; - if (!fdtab[fd].owner) + if (!fdtab[fd].owner) { + activity[tid].poll_drop++; continue; + } HA_SPIN_LOCK(FD_LOCK, &fdtab[fd].lock); fdtab[fd].updated = 0; @@ -111,13 +113,21 @@ REGPRM2 static void _do_poll(struct poller *p, int exp) continue; for (count = 0, fd = fds * 8*sizeof(**fd_evts); count < 8*sizeof(**fd_evts) && fd < maxfd; count++, fd++) { - - if (!fdtab[fd].owner || !(fdtab[fd].thread_mask & tid_bit)) - continue; - sr = (rn >> count) & 1; sw = (wn >> count) & 1; if ((sr|sw)) { + if (!fdtab[fd].owner) { + /* should normally not happen here except + * due to rare thread concurrency + */ + continue; + } + + if (!(fdtab[fd].thread_mask & tid_bit)) { + activity[tid].poll_skip++; + continue; + } + poll_events[nbfd].fd = fd; poll_events[nbfd].events = (sr ? (POLLIN | POLLRDHUP) : 0) | (sw ? POLLOUT : 0); nbfd++; @@ -128,8 +138,10 @@ REGPRM2 static void _do_poll(struct poller *p, int exp) /* now let's wait for events */ if (!exp) wait_time = MAX_DELAY_MS; - else if (tick_is_expired(exp, now_ms)) + else if (tick_is_expired(exp, now_ms)) { + activity[tid].poll_exp++; wait_time = 0; + } else { wait_time = TICKS_TO_MS(tick_remain(now_ms, exp)) + 1; if (wait_time > MAX_DELAY_MS) @@ -152,8 +164,10 @@ REGPRM2 static void _do_poll(struct poller *p, int exp) /* ok, we found one active fd */ status--; - if (!fdtab[fd].owner) + if (!fdtab[fd].owner) { + activity[tid].poll_dead++; continue; + } /* it looks complicated but gcc can optimize it away when constants * have same values... In fact it depends on gcc :-( diff --git a/src/ev_select.c b/src/ev_select.c index b2b4e50359..52c4454735 100644 --- a/src/ev_select.c +++ b/src/ev_select.c @@ -55,8 +55,10 @@ REGPRM2 static void _do_poll(struct poller *p, int exp) for (updt_idx = 0; updt_idx < fd_nbupdt; updt_idx++) { fd = fd_updt[updt_idx]; - if (!fdtab[fd].owner) + if (!fdtab[fd].owner) { + activity[tid].poll_drop++; continue; + } HA_SPIN_LOCK(FD_LOCK, &fdtab[fd].lock); fdtab[fd].updated = 0; @@ -117,6 +119,8 @@ REGPRM2 static void _do_poll(struct poller *p, int exp) delta.tv_sec = (delta_ms / 1000); delta.tv_usec = (delta_ms % 1000) * 1000; } + else + activity[tid].poll_exp++; gettimeofday(&before_poll, NULL); status = select(maxfd, @@ -138,11 +142,15 @@ REGPRM2 static void _do_poll(struct poller *p, int exp) for (count = BITS_PER_INT, fd = fds * BITS_PER_INT; count && fd < maxfd; count--, fd++) { unsigned int n = 0; - /* if we specify read first, the accepts and zero reads will be - * seen first. Moreover, system buffers will be flushed faster. - */ - if (!fdtab[fd].owner || !(fdtab[fd].thread_mask & tid_bit)) + if (!fdtab[fd].owner) { + activity[tid].poll_dead++; + continue; + } + + if (!(fdtab[fd].thread_mask & tid_bit)) { + activity[tid].poll_skip++; continue; + } if (FD_ISSET(fd, tmp_evts[DIR_RD])) n |= FD_POLL_IN; diff --git a/src/fd.c b/src/fd.c index 9fb09ab715..148b4d27ed 100644 --- a/src/fd.c +++ b/src/fd.c @@ -243,10 +243,14 @@ void fd_process_cached_events() for (entry = 0; entry < fd_cache_num; ) { fd = fd_cache[entry]; - if (!(fdtab[fd].thread_mask & tid_bit)) + if (!(fdtab[fd].thread_mask & tid_bit)) { + activity[tid].fd_skip++; goto next; - if (HA_SPIN_TRYLOCK(FD_LOCK, &fdtab[fd].lock)) + } + if (HA_SPIN_TRYLOCK(FD_LOCK, &fdtab[fd].lock)) { + activity[tid].fd_lock++; goto next; + } HA_RWLOCK_RDUNLOCK(FDCACHE_LOCK, &fdcache_lock); @@ -272,8 +276,10 @@ void fd_process_cached_events() /* If the fd was removed from the cache, it has been * replaced by the next one that we don't want to skip ! */ - if (entry < fd_cache_num && fd_cache[entry] != fd) + if (entry < fd_cache_num && fd_cache[entry] != fd) { + activity[tid].fd_del++; continue; + } next: entry++; } diff --git a/src/haproxy.c b/src/haproxy.c index a8d0fad87f..952733ee38 100644 --- a/src/haproxy.c +++ b/src/haproxy.c @@ -165,6 +165,8 @@ struct global global = { /* others NULL OK */ }; +struct activity activity[MAX_THREADS] __attribute__((aligned(64))) = { }; + /*********************************************************************/ int stopping; /* non zero means stopping in progress */ @@ -2371,7 +2373,7 @@ static void sync_poll_loop() /* Runs the polling loop */ static void run_poll_loop() { - int next; + int next, exp; tv_update_date(0,1); while (1) { @@ -2389,18 +2391,27 @@ static void run_poll_loop() break; /* expire immediately if events are pending */ - if (fd_cache_num || (active_tasks_mask & tid_bit) || signal_queue_len || (active_applets_mask & tid_bit)) - next = now_ms; + exp = now_ms; + if (fd_cache_num) + activity[tid].wake_cache++; + else if (active_tasks_mask & tid_bit) + activity[tid].wake_tasks++; + else if (active_applets_mask & tid_bit) + activity[tid].wake_applets++; + else if (signal_queue_len) + activity[tid].wake_signal++; + else + exp = next; /* The poller will ensure it returns around */ - cur_poller.poll(&cur_poller, next); + cur_poller.poll(&cur_poller, exp); fd_process_cached_events(); applet_run_active(); /* Synchronize all polling loops */ sync_poll_loop(); - + activity[tid].loops++; } } diff --git a/src/stats.c b/src/stats.c index 01259282d0..61e0549810 100644 --- a/src/stats.c +++ b/src/stats.c @@ -3578,6 +3578,8 @@ static int cli_parse_clear_counters(char **args, struct appctx *appctx, void *pr global.ssl_max = 0; global.ssl_fe_keys_max = 0; global.ssl_be_keys_max = 0; + + memset(activity, 0, sizeof(activity)); return 1; } diff --git a/src/stream.c b/src/stream.c index 60d3eff44c..ebe41be190 100644 --- a/src/stream.c +++ b/src/stream.c @@ -1627,6 +1627,8 @@ struct task *process_stream(struct task *t) struct channel *req, *res; struct stream_interface *si_f, *si_b; + activity[tid].stream++; + req = &s->req; res = &s->res; diff --git a/src/task.c b/src/task.c index 053376c394..fd9acf66d8 100644 --- a/src/task.c +++ b/src/task.c @@ -196,8 +196,10 @@ void process_runnable_tasks() max_processed = 200; if (unlikely(global.nbthread <= 1)) { /* when no lock is needed, this loop is much faster */ - if (!(active_tasks_mask & tid_bit)) + if (!(active_tasks_mask & tid_bit)) { + activity[tid].empty_rq++; return; + } active_tasks_mask &= ~tid_bit; rq_next = eb32sc_lookup_ge(&rqueue, rqueue_ticks - TIMER_LOOK_BACK, tid_bit); @@ -245,6 +247,7 @@ void process_runnable_tasks() max_processed--; if (max_processed <= 0) { active_tasks_mask |= tid_bit; + activity[tid].long_rq++; break; } } @@ -254,6 +257,7 @@ void process_runnable_tasks() HA_SPIN_LOCK(TASK_RQ_LOCK, &rq_lock); if (!(active_tasks_mask & tid_bit)) { HA_SPIN_UNLOCK(TASK_RQ_LOCK, &rq_lock); + activity[tid].empty_rq++; return; } @@ -335,6 +339,7 @@ void process_runnable_tasks() HA_SPIN_LOCK(TASK_RQ_LOCK, &rq_lock); if (max_processed <= 0) { active_tasks_mask |= tid_bit; + activity[tid].long_rq++; break; } }