]> git.ipfire.org Git - thirdparty/haproxy.git/commitdiff
MINOR: tasks: do not keep cpu and latency times in struct task
authorWilly Tarreau <w@1wt.eu>
Wed, 7 Sep 2022 07:17:45 +0000 (09:17 +0200)
committerWilly Tarreau <w@1wt.eu>
Thu, 8 Sep 2022 12:19:15 +0000 (14:19 +0200)
It was a mistake to put these two fields in the struct task. This
was added in 1.9 via commit 9efd7456e ("MEDIUM: tasks: collect per-task
CPU time and latency"). These fields are used solely by streams in
order to report the measurements via the lat_ns* and cpu_ns* sample
fetch functions when task profiling is enabled. For the rest of the
tasks, this is pure CPU waste when profiling is enabled, and memory
waste 100% of the time, as the point where these latencies and usages
are measured is in the profiling array.

Let's move the fields to the stream instead, and have process_stream()
retrieve the relevant info from the thread's context.

The struct task is now back to 120 bytes, i.e. almost two cache lines,
with 32 bit still available.

include/haproxy/stream-t.h
include/haproxy/task-t.h
include/haproxy/task.h
src/sample.c
src/stream.c
src/task.c

index a09e541cb76f4d84c2c613b317a0500941d3bb5c..a2928e5de906ad466a9ee2f0801fdf7fe8a57924 100644 (file)
@@ -168,6 +168,8 @@ struct stream {
        struct list back_refs;          /* list of users tracking this stream */
        struct buffer_wait buffer_wait; /* position in the list of objects waiting for a buffer */
 
+       uint64_t lat_time;              /* total latency time experienced */
+       uint64_t cpu_time;              /* total CPU time consumed */
        struct freq_ctr call_rate;      /* stream task call rate without making progress */
 
        short store_count;
index 33a7f070ef9880491d97bc8da83a72c26c7d1b0a..373ac297c306b59900fe31af4994acedc5c706b0 100644 (file)
@@ -113,8 +113,6 @@ struct task {
        short nice;                     /* task prio from -1024 to +1024 */
        short tid;                      /* TID where it's allowed to run, <0 if anywhere */
        uint32_t wake_date;             /* date of the last task wakeup */
-       uint64_t lat_time;              /* total latency time experienced */
-       uint64_t cpu_time;              /* total CPU time consumed */
 };
 
 /* lightweight tasks, without priority, mainly used for I/Os */
index 33be63f0c279caea9cc8d072534ed576ab7ed382..a711c2740d756e636b2e12fa9d79558254b6799b 100644 (file)
@@ -498,8 +498,6 @@ static inline struct task *task_init(struct task *t, int tid)
        t->nice = 0;
        t->calls = 0;
        t->wake_date = 0;
-       t->cpu_time = 0;
-       t->lat_time = 0;
        t->expire = TICK_ETERNITY;
 #ifdef DEBUG_TASK
        t->debug.caller_idx = 0;
index 852cd0418f534c9a855cf4228b6c1e3e215f0e1e..cd9fccf85e3a786ba706ef47859923102f926af8 100644 (file)
@@ -4136,7 +4136,7 @@ smp_fetch_cpu_ns_avg(const struct arg *args, struct sample *smp, const char *kw,
                return 0;
 
        smp->data.type = SMP_T_SINT;
-       smp->data.u.sint = smp->strm->task->calls ? smp->strm->task->cpu_time / smp->strm->task->calls : 0;
+       smp->data.u.sint = smp->strm->task->calls ? smp->strm->cpu_time / smp->strm->task->calls : 0;
        return 1;
 }
 
@@ -4148,7 +4148,7 @@ smp_fetch_cpu_ns_tot(const struct arg *args, struct sample *smp, const char *kw,
                return 0;
 
        smp->data.type = SMP_T_SINT;
-       smp->data.u.sint = smp->strm->task->cpu_time;
+       smp->data.u.sint = smp->strm->cpu_time;
        return 1;
 }
 
@@ -4160,7 +4160,7 @@ smp_fetch_lat_ns_avg(const struct arg *args, struct sample *smp, const char *kw,
                return 0;
 
        smp->data.type = SMP_T_SINT;
-       smp->data.u.sint = smp->strm->task->calls ? smp->strm->task->lat_time / smp->strm->task->calls : 0;
+       smp->data.u.sint = smp->strm->task->calls ? smp->strm->lat_time / smp->strm->task->calls : 0;
        return 1;
 }
 
@@ -4172,7 +4172,7 @@ smp_fetch_lat_ns_tot(const struct arg *args, struct sample *smp, const char *kw,
                return 0;
 
        smp->data.type = SMP_T_SINT;
-       smp->data.u.sint = smp->strm->task->lat_time;
+       smp->data.u.sint = smp->strm->lat_time;
        return 1;
 }
 
index 8d62754054ab682c877c1652f43ccaa2c9e0d1d1..25a50e7a1d7ef3d13e35c0c91cbf3179a724cd2f 100644 (file)
@@ -407,6 +407,7 @@ struct stream *stream_new(struct session *sess, struct stconn *sc, struct buffer
        s->buffer_wait.target = s;
        s->buffer_wait.wakeup_cb = stream_buf_available;
 
+       s->lat_time = s->cpu_time = 0;
        s->call_rate.curr_tick = s->call_rate.curr_ctr = s->call_rate.prev_ctr = 0;
        s->pcli_next_pid = 0;
        s->pcli_flags = 0;
@@ -1556,6 +1557,17 @@ static void stream_update_both_sc(struct stream *s)
        }
 }
 
+/* if the current task's wake_date was set, it's being profiled, thus we may
+ * report latencies and CPU usages in logs, so it's desirable to update the
+ * latency when entering process_stream().
+ */
+static void stream_cond_update_cpu_latency(struct stream *s)
+{
+       uint32_t lat = th_ctx->sched_call_date - th_ctx->sched_wake_date;
+
+       s->lat_time += lat;
+}
+
 /* if the current task's wake_date was set, it's being profiled, thus we may
  * report latencies and CPU usages in logs, so it's desirable to do that before
  * logging in order to report accurate CPU usage. In this case we count that
@@ -1573,11 +1585,23 @@ static void stream_cond_update_cpu_usage(struct stream *s)
                return;
 
        cpu = (uint32_t)now_mono_time() - th_ctx->sched_call_date;
-       s->task->cpu_time += cpu;
+       s->cpu_time += cpu;
        HA_ATOMIC_ADD(&th_ctx->sched_profile_entry->cpu_time, cpu);
        th_ctx->sched_wake_date = 0;
 }
 
+/* this functions is called directly by the scheduler for tasks whose
+ * ->process points to process_stream(), and is used to keep latencies
+ * and CPU usage measurements accurate.
+ */
+void stream_update_timings(struct task *t, uint64_t lat, uint64_t cpu)
+{
+       struct stream *s = t->context;
+       s->lat_time += lat;
+       s->cpu_time += cpu;
+}
+
+
 /* This macro is very specific to the function below. See the comments in
  * process_stream() below to understand the logic and the tests.
  */
@@ -1649,6 +1673,7 @@ struct task *process_stream(struct task *t, void *context, unsigned int state)
        DBG_TRACE_ENTER(STRM_EV_STRM_PROC, s);
 
        activity[tid].stream_calls++;
+       stream_cond_update_cpu_latency(s);
 
        req = &s->req;
        res = &s->res;
@@ -3376,14 +3401,13 @@ static int stats_dump_full_strm_to_buffer(struct stconn *sc, struct stream *strm
                }
                else if ((tmpctx = sc_appctx(scf)) != NULL) {
                        chunk_appendf(&trash,
-                                     "      app0=%p st0=%d st1=%d applet=%s tid=%d nice=%d calls=%u rate=%u cpu=%llu lat=%llu\n",
+                                     "      app0=%p st0=%d st1=%d applet=%s tid=%d nice=%d calls=%u rate=%u\n",
                                      tmpctx,
                                      tmpctx->st0,
                                      tmpctx->st1,
                                      tmpctx->applet->name,
                                      tmpctx->t->tid,
-                                     tmpctx->t->nice, tmpctx->t->calls, read_freq_ctr(&tmpctx->call_rate),
-                                     (unsigned long long)tmpctx->t->cpu_time, (unsigned long long)tmpctx->t->lat_time);
+                                     tmpctx->t->nice, tmpctx->t->calls, read_freq_ctr(&tmpctx->call_rate));
                }
 
                scb = strm->scb;
@@ -3419,14 +3443,13 @@ static int stats_dump_full_strm_to_buffer(struct stconn *sc, struct stream *strm
                }
                else if ((tmpctx = sc_appctx(scb)) != NULL) {
                        chunk_appendf(&trash,
-                                     "      app1=%p st0=%d st1=%d applet=%s tid=%d nice=%d calls=%u rate=%u cpu=%llu lat=%llu\n",
+                                     "      app1=%p st0=%d st1=%d applet=%s tid=%d nice=%d calls=%u rate=%u\n",
                                      tmpctx,
                                      tmpctx->st0,
                                      tmpctx->st1,
                                      tmpctx->applet->name,
                                      tmpctx->t->tid,
-                                     tmpctx->t->nice, tmpctx->t->calls, read_freq_ctr(&tmpctx->call_rate),
-                                     (unsigned long long)tmpctx->t->cpu_time, (unsigned long long)tmpctx->t->lat_time);
+                                     tmpctx->t->nice, tmpctx->t->calls, read_freq_ctr(&tmpctx->call_rate));
                }
 
                chunk_appendf(&trash,
@@ -3682,7 +3705,7 @@ static int cli_io_handler_dump_sess(struct appctx *appctx)
                             curr_strm->task->state, curr_strm->stream_epoch,
                             human_time(now.tv_sec - curr_strm->logs.tv_accept.tv_sec, 1),
                             curr_strm->task->calls, read_freq_ctr(&curr_strm->call_rate),
-                            (unsigned long long)curr_strm->task->cpu_time, (unsigned long long)curr_strm->task->lat_time);
+                            (unsigned long long)curr_strm->cpu_time, (unsigned long long)curr_strm->lat_time);
 
                chunk_appendf(&trash,
                             " rq[f=%06xh,i=%u,an=%02xh,rx=%s",
index 3d78680774fb3051085b7dc1893c46b0c4fafa0e..bb25d508af35e46767aeb77a7bff585c09f0e385 100644 (file)
@@ -25,6 +25,7 @@
 #include <haproxy/tools.h>
 
 extern struct task *process_stream(struct task *t, void *context, unsigned int state);
+extern void stream_update_timings(struct task *t, uint64_t lat, uint64_t cpu);
 
 DECLARE_POOL(pool_head_task,    "task",    sizeof(struct task));
 DECLARE_POOL(pool_head_tasklet, "tasklet", sizeof(struct tasklet));
@@ -611,7 +612,6 @@ unsigned int run_tasks_from_lists(unsigned int budgets[])
                        uint32_t now_ns = now_mono_time();
                        uint32_t lat = now_ns - t->wake_date;
 
-                       t->lat_time += lat;
                        t->wake_date = 0;
                        th_ctx->sched_call_date = now_ns;
                        profile_entry = sched_activity_entry(sched_activity, t->process);
@@ -669,8 +669,6 @@ unsigned int run_tasks_from_lists(unsigned int budgets[])
                if (unlikely(th_ctx->sched_wake_date)) {
                        uint32_t cpu = (uint32_t)now_mono_time() - th_ctx->sched_call_date;
 
-                       if (t)
-                               t->cpu_time += cpu;
                        HA_ATOMIC_ADD(&profile_entry->cpu_time, cpu);
                }