From 19f1407ad90d39d3bab9293bf2849afc42dd2e9d Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Mon, 7 Jul 2025 13:21:24 +0100 Subject: [PATCH] [iobuf] Ensure I/O buffer data sits within unshared cachelines On platforms where DMA devices are not in the same coherency domain as the CPU cache, we must ensure that DMA I/O buffers do not share cachelines with other data. Align the start and end of I/O buffers to IOB_ZLEN, which is larger than any cacheline size we expect to encounter. Signed-off-by: Michael Brown --- src/core/iobuf.c | 38 +++++++++++++++++++------------------- src/include/ipxe/iobuf.h | 6 +++++- src/tests/iobuf_test.c | 8 +++++++- 3 files changed, 31 insertions(+), 21 deletions(-) diff --git a/src/core/iobuf.c b/src/core/iobuf.c index 9f8a263d2..78fa23924 100644 --- a/src/core/iobuf.c +++ b/src/core/iobuf.c @@ -47,22 +47,26 @@ FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL ); */ struct io_buffer * alloc_iob_raw ( size_t len, size_t align, size_t offset ) { struct io_buffer *iobuf; + size_t headroom; + size_t tailroom; size_t padding; size_t threshold; unsigned int align_log2; void *data; - /* Calculate padding required below alignment boundary to - * ensure that a correctly aligned inline struct io_buffer - * could fit (regardless of the requested offset). + /* Round up requested alignment and calculate initial headroom + * and tailroom to ensure that no cachelines are shared + * between I/O buffer data and other data structures. */ - padding = ( sizeof ( *iobuf ) + __alignof__ ( *iobuf ) - 1 ); - - /* Round up requested alignment to at least the size of the - * padding, to simplify subsequent calculations. - */ - if ( align < padding ) - align = padding; + if ( align < IOB_ZLEN ) + align = IOB_ZLEN; + headroom = ( offset & ( IOB_ZLEN - 1 ) ); + tailroom = ( ( - len - offset ) & ( IOB_ZLEN - 1 ) ); + padding = ( headroom + tailroom ); + offset -= headroom; + len += padding; + if ( len < padding ) + return NULL; /* Round up alignment to the nearest power of two, avoiding * a potentially undefined shift operation. @@ -73,8 +77,8 @@ struct io_buffer * alloc_iob_raw ( size_t len, size_t align, size_t offset ) { align = ( 1UL << align_log2 ); /* Calculate length threshold */ - assert ( align >= padding ); - threshold = ( align - padding ); + assert ( align >= sizeof ( *iobuf ) ); + threshold = ( align - sizeof ( *iobuf ) ); /* Allocate buffer plus an inline descriptor as a single unit, * unless doing so would push the total size over the @@ -82,11 +86,6 @@ struct io_buffer * alloc_iob_raw ( size_t len, size_t align, size_t offset ) { */ if ( len <= threshold ) { - /* Round up buffer length to ensure that struct - * io_buffer is aligned. - */ - len += ( ( - len - offset ) & ( __alignof__ ( *iobuf ) - 1 ) ); - /* Allocate memory for buffer plus descriptor */ data = malloc_phys_offset ( len + sizeof ( *iobuf ), align, offset ); @@ -111,7 +110,8 @@ struct io_buffer * alloc_iob_raw ( size_t len, size_t align, size_t offset ) { /* Populate descriptor */ memset ( &iobuf->map, 0, sizeof ( iobuf->map ) ); - iobuf->head = iobuf->data = iobuf->tail = data; + iobuf->head = data; + iobuf->data = iobuf->tail = ( data + headroom ); iobuf->end = ( data + len ); return iobuf; @@ -266,7 +266,7 @@ struct io_buffer * iob_concatenate ( struct list_head *list ) { len += iob_len ( iobuf ); /* Allocate new I/O buffer */ - concatenated = alloc_iob_raw ( len, __alignof__ ( *iobuf ), 0 ); + concatenated = alloc_iob_raw ( len, IOB_ZLEN, 0 ); if ( ! concatenated ) return NULL; diff --git a/src/include/ipxe/iobuf.h b/src/include/ipxe/iobuf.h index 3e079c064..2bdca6b5d 100644 --- a/src/include/ipxe/iobuf.h +++ b/src/include/ipxe/iobuf.h @@ -15,11 +15,15 @@ FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL ); #include /** - * Minimum I/O buffer length + * Minimum I/O buffer length and alignment * * alloc_iob() will round up the allocated length to this size if * necessary. This is used on behalf of hardware that is not capable * of auto-padding. + * + * This length must be at least as large as the largest cacheline size + * that we expect to encounter, to allow for platforms where DMA + * devices are not in the same coherency domain as the CPU cache. */ #define IOB_ZLEN 128 diff --git a/src/tests/iobuf_test.c b/src/tests/iobuf_test.c index a417c2e8c..27fbccf0d 100644 --- a/src/tests/iobuf_test.c +++ b/src/tests/iobuf_test.c @@ -62,7 +62,7 @@ static inline void alloc_iob_okx ( size_t len, size_t align, size_t offset, "offset %#zx\n", iobuf, virt_to_phys ( iobuf->data ), iob_tailroom ( iobuf ), len, align, offset ); - /* Validate requested length and alignment */ + /* Validate requested length and data alignment */ okx ( ( ( ( intptr_t ) iobuf ) & ( __alignof__ ( *iobuf ) - 1 ) ) == 0, file, line ); okx ( iob_tailroom ( iobuf ) >= len, file, line ); @@ -70,6 +70,12 @@ static inline void alloc_iob_okx ( size_t len, size_t align, size_t offset, ( ( virt_to_phys ( iobuf->data ) & ( align - 1 ) ) == ( offset & ( align - 1 ) ) ) ), file, line ); + /* Validate overall buffer alignment */ + okx ( ( ( ( intptr_t ) iobuf->head ) & ( IOB_ZLEN - 1 ) ) == 0, + file, line ); + okx ( ( ( ( intptr_t ) iobuf->end ) & ( IOB_ZLEN - 1 ) ) == 0, + file, line ); + /* Overwrite entire content of I/O buffer (for Valgrind) */ memset ( iob_put ( iobuf, len ), 0x55, len ); -- 2.47.2