From 4e9510f16218802b5fc0d593d8707d4e7ebf9774 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 24 Sep 2025 01:27:07 -0400 Subject: [PATCH] ptr_ring: drop duplicated tail zeroing code We have some rather subtle code around zeroing tail entries, minimizing cache bouncing. Let's put it all in one place. Doing this also reduces the text size slightly, e.g. for drivers/vhost/net.o Before: text: 15,114 bytes After: text: 15,082 bytes Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Link: https://patch.msgid.link/adb9d941de4a2b619ddb2be271a9939849e70687.1758690291.git.mst@redhat.com Signed-off-by: Jakub Kicinski --- include/linux/ptr_ring.h | 42 +++++++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h index 551329220e4f3..a736b16859a6c 100644 --- a/include/linux/ptr_ring.h +++ b/include/linux/ptr_ring.h @@ -243,6 +243,24 @@ static inline bool ptr_ring_empty_bh(struct ptr_ring *r) return ret; } +/* Zero entries from tail to specified head. + * NB: if consumer_head can be >= r->size need to fixup tail later. + */ +static inline void __ptr_ring_zero_tail(struct ptr_ring *r, int consumer_head) +{ + int head = consumer_head - 1; + + /* Zero out entries in the reverse order: this way we touch the + * cache line that producer might currently be reading the last; + * producer won't make progress and touch other cache lines + * besides the first one until we write out all entries. + */ + while (likely(head >= r->consumer_tail)) + r->queue[head--] = NULL; + + r->consumer_tail = consumer_head; +} + /* Must only be called after __ptr_ring_peek returned !NULL */ static inline void __ptr_ring_discard_one(struct ptr_ring *r) { @@ -261,8 +279,7 @@ static inline void __ptr_ring_discard_one(struct ptr_ring *r) /* Note: we must keep consumer_head valid at all times for __ptr_ring_empty * to work correctly. */ - int consumer_head = r->consumer_head; - int head = consumer_head++; + int consumer_head = r->consumer_head + 1; /* Once we have processed enough entries invalidate them in * the ring all at once so producer can reuse their space in the ring. @@ -270,16 +287,9 @@ static inline void __ptr_ring_discard_one(struct ptr_ring *r) * but helps keep the implementation simple. */ if (unlikely(consumer_head - r->consumer_tail >= r->batch || - consumer_head >= r->size)) { - /* Zero out entries in the reverse order: this way we touch the - * cache line that producer might currently be reading the last; - * producer won't make progress and touch other cache lines - * besides the first one until we write out all entries. - */ - while (likely(head >= r->consumer_tail)) - r->queue[head--] = NULL; - r->consumer_tail = consumer_head; - } + consumer_head >= r->size)) + __ptr_ring_zero_tail(r, consumer_head); + if (unlikely(consumer_head >= r->size)) { consumer_head = 0; r->consumer_tail = 0; @@ -513,7 +523,6 @@ static inline void ptr_ring_unconsume(struct ptr_ring *r, void **batch, int n, void (*destroy)(void *)) { unsigned long flags; - int head; spin_lock_irqsave(&r->consumer_lock, flags); spin_lock(&r->producer_lock); @@ -525,17 +534,14 @@ static inline void ptr_ring_unconsume(struct ptr_ring *r, void **batch, int n, * Clean out buffered entries (for simplicity). This way following code * can test entries for NULL and if not assume they are valid. */ - head = r->consumer_head - 1; - while (likely(head >= r->consumer_tail)) - r->queue[head--] = NULL; - r->consumer_tail = r->consumer_head; + __ptr_ring_zero_tail(r, r->consumer_head); /* * Go over entries in batch, start moving head back and copy entries. * Stop when we run into previously unconsumed entries. */ while (n) { - head = r->consumer_head - 1; + int head = r->consumer_head - 1; if (head < 0) head = r->size - 1; if (r->queue[head]) { -- 2.47.3