From: Willy Tarreau Date: Tue, 30 Apr 2024 13:22:46 +0000 (+0200) Subject: CLEANUP: tinfo: better align fields in thread_ctx X-Git-Tag: v3.0-dev11~20 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=53461e4d940c488a62fbe44687de503abf61d105;p=thirdparty%2Fhaproxy.git CLEANUP: tinfo: better align fields in thread_ctx The introduction of buffer_wq[] in thread_ctx pushed a few fields around and the cache line alignment is less satisfying. And more importantly, even before this, all the lists in the local parts were 8-aligned, with the first one split across two cache lines. We can do better: - sched_profile_entry is not atomic at all, the data it points to is atomic so it doesn't need to be in the atomic-only region, and it can fill the 8-hole before the lists - the align(2*void) that was only before tasklets[] moves before all lists (and it's a nop for now) This now makes the lists and buffer_wq[] start on a cache line boundary, leaves 48 bytes after the lists before the atomic-only cache line, and leaves a full cache line at the end for 128-alignment. This way we still have plenty of room in both parts with better aligned fields. --- diff --git a/include/haproxy/tinfo-t.h b/include/haproxy/tinfo-t.h index 4ed78756af..5564a70b5c 100644 --- a/include/haproxy/tinfo-t.h +++ b/include/haproxy/tinfo-t.h @@ -137,17 +137,19 @@ struct thread_ctx { uint8_t tl_class_mask; /* bit mask of non-empty tasklets classes */ uint8_t bufq_map; /* one bit per non-empty buffer_wq */ - // 6 bytes hole here + // 2 bytes hole here + unsigned int nb_rhttp_conns; /* count of current conns used for active reverse HTTP */ + struct sched_activity *sched_profile_entry; /* profile entry in use by the current task/tasklet, only if sched_wake_date>0 */ + + ALWAYS_ALIGN(2*sizeof(void*)); + struct list buffer_wq[DYNBUF_NBQ]; /* buffer waiters, 4 criticality-based queues */ struct list pool_lru_head; /* oldest objects in thread-local pool caches */ struct list streams; /* list of streams attached to this thread */ struct list quic_conns; /* list of active quic-conns attached to this thread */ struct list quic_conns_clo; /* list of closing quic-conns attached to this thread */ struct list queued_checks; /* checks waiting for a connection slot */ - struct list buffer_wq[DYNBUF_NBQ]; /* buffer waiters, 4 criticality-based queues */ - unsigned int nb_rhttp_conns; /* count of current conns used for active reverse HTTP */ - - ALWAYS_ALIGN(2*sizeof(void*)); struct list tasklets[TL_CLASSES]; /* tasklets (and/or tasks) to run, by class */ + // around 48 bytes here for thread-local variables // third cache line here on 64 bits: accessed mostly using atomic ops ALWAYS_ALIGN(64); @@ -161,7 +163,6 @@ struct thread_ctx { uint32_t sched_wake_date; /* current task/tasklet's wake date or 0 */ uint32_t sched_call_date; /* current task/tasklet's call date (valid if sched_wake_date > 0) */ - struct sched_activity *sched_profile_entry; /* profile entry in use by the current task/tasklet, only if sched_wake_date>0 */ uint64_t prev_cpu_time; /* previous per thread CPU time */ uint64_t prev_mono_time; /* previous system wide monotonic time */ @@ -175,6 +176,7 @@ struct thread_ctx { unsigned long long out_bytes; /* total #of bytes emitted */ unsigned long long spliced_out_bytes; /* total #of bytes emitted though a kernel pipe */ struct buffer *thread_dump_buffer; /* NULL out of dump, valid during a dump, 0x01 once done */ + // around 64 bytes here for shared variables ALWAYS_ALIGN(128); };