From: Greg Kroah-Hartman Date: Mon, 19 Aug 2024 09:45:55 +0000 (+0200) Subject: 4.19-stable patches X-Git-Tag: v6.1.107~138 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=7486ef3d56f82497d10ed9e9dfee70a14ef237e9;p=thirdparty%2Fkernel%2Fstable-queue.git 4.19-stable patches added patches: bitmap-introduce-generic-optimized-bitmap_size.patch --- diff --git a/queue-4.19/bitmap-introduce-generic-optimized-bitmap_size.patch b/queue-4.19/bitmap-introduce-generic-optimized-bitmap_size.patch new file mode 100644 index 00000000000..94e21966b73 --- /dev/null +++ b/queue-4.19/bitmap-introduce-generic-optimized-bitmap_size.patch @@ -0,0 +1,145 @@ +From a37fbe666c016fd89e4460d0ebfcea05baba46dc Mon Sep 17 00:00:00 2001 +From: Alexander Lobakin +Date: Wed, 27 Mar 2024 16:23:49 +0100 +Subject: bitmap: introduce generic optimized bitmap_size() + +From: Alexander Lobakin + +commit a37fbe666c016fd89e4460d0ebfcea05baba46dc upstream. + +The number of times yet another open coded +`BITS_TO_LONGS(nbits) * sizeof(long)` can be spotted is huge. +Some generic helper is long overdue. + +Add one, bitmap_size(), but with one detail. +BITS_TO_LONGS() uses DIV_ROUND_UP(). The latter works well when both +divident and divisor are compile-time constants or when the divisor +is not a pow-of-2. When it is however, the compilers sometimes tend +to generate suboptimal code (GCC 13): + +48 83 c0 3f add $0x3f,%rax +48 c1 e8 06 shr $0x6,%rax +48 8d 14 c5 00 00 00 00 lea 0x0(,%rax,8),%rdx + +%BITS_PER_LONG is always a pow-2 (either 32 or 64), but GCC still does +full division of `nbits + 63` by it and then multiplication by 8. +Instead of BITS_TO_LONGS(), use ALIGN() and then divide by 8. GCC: + +8d 50 3f lea 0x3f(%rax),%edx +c1 ea 03 shr $0x3,%edx +81 e2 f8 ff ff 1f and $0x1ffffff8,%edx + +Now it shifts `nbits + 63` by 3 positions (IOW performs fast division +by 8) and then masks bits[2:0]. bloat-o-meter: + +add/remove: 0/0 grow/shrink: 20/133 up/down: 156/-773 (-617) + +Clang does it better and generates the same code before/after starting +from -O1, except that with the ALIGN() approach it uses %edx and thus +still saves some bytes: + +add/remove: 0/0 grow/shrink: 9/133 up/down: 18/-538 (-520) + +Note that we can't expand DIV_ROUND_UP() by adding a check and using +this approach there, as it's used in array declarations where +expressions are not allowed. +Add this helper to tools/ as well. + +Reviewed-by: Przemek Kitszel +Acked-by: Yury Norov +Signed-off-by: Alexander Lobakin +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/s390/cio/idset.c | 2 +- + include/linux/bitmap.h | 8 +++++--- + include/linux/cpumask.h | 2 +- + tools/include/linux/bitmap.h | 7 ++++--- + 4 files changed, 11 insertions(+), 8 deletions(-) + +--- a/drivers/s390/cio/idset.c ++++ b/drivers/s390/cio/idset.c +@@ -18,7 +18,7 @@ struct idset { + + static inline unsigned long bitmap_size(int num_ssid, int num_id) + { +- return BITS_TO_LONGS(num_ssid * num_id) * sizeof(unsigned long); ++ return bitmap_size(size_mul(num_ssid, num_id)); + } + + static struct idset *idset_new(int num_ssid, int num_id) +--- a/include/linux/bitmap.h ++++ b/include/linux/bitmap.h +@@ -212,12 +212,14 @@ extern int bitmap_print_to_pagebuf(bool + #define small_const_nbits(nbits) \ + (__builtin_constant_p(nbits) && (nbits) <= BITS_PER_LONG && (nbits) > 0) + ++#define bitmap_size(nbits) (ALIGN(nbits, BITS_PER_LONG) / BITS_PER_BYTE) ++ + static inline void bitmap_zero(unsigned long *dst, unsigned int nbits) + { + if (small_const_nbits(nbits)) + *dst = 0UL; + else { +- unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); ++ unsigned int len = bitmap_size(nbits); + memset(dst, 0, len); + } + } +@@ -227,7 +229,7 @@ static inline void bitmap_fill(unsigned + if (small_const_nbits(nbits)) + *dst = ~0UL; + else { +- unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); ++ unsigned int len = bitmap_size(nbits); + memset(dst, 0xff, len); + } + } +@@ -238,7 +240,7 @@ static inline void bitmap_copy(unsigned + if (small_const_nbits(nbits)) + *dst = *src; + else { +- unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); ++ unsigned int len = bitmap_size(nbits); + memcpy(dst, src, len); + } + } +--- a/include/linux/cpumask.h ++++ b/include/linux/cpumask.h +@@ -656,7 +656,7 @@ static inline int cpulist_parse(const ch + */ + static inline unsigned int cpumask_size(void) + { +- return BITS_TO_LONGS(nr_cpumask_bits) * sizeof(long); ++ return bitmap_size(nr_cpumask_bits); + } + + /* +--- a/tools/include/linux/bitmap.h ++++ b/tools/include/linux/bitmap.h +@@ -27,13 +27,14 @@ int __bitmap_and(unsigned long *dst, con + #define small_const_nbits(nbits) \ + (__builtin_constant_p(nbits) && (nbits) <= BITS_PER_LONG) + ++#define bitmap_size(nbits) (ALIGN(nbits, BITS_PER_LONG) / BITS_PER_BYTE) ++ + static inline void bitmap_zero(unsigned long *dst, int nbits) + { + if (small_const_nbits(nbits)) + *dst = 0UL; + else { +- int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); +- memset(dst, 0, len); ++ memset(dst, 0, bitmap_size(nbits)); + } + } + +@@ -119,7 +120,7 @@ static inline int test_and_clear_bit(int + */ + static inline unsigned long *bitmap_alloc(int nbits) + { +- return calloc(1, BITS_TO_LONGS(nbits) * sizeof(unsigned long)); ++ return calloc(1, bitmap_size(nbits)); + } + + /* diff --git a/queue-4.19/fix-bitmap-corruption-on-close_range-with-close_range_unshare.patch b/queue-4.19/fix-bitmap-corruption-on-close_range-with-close_range_unshare.patch index cd10ac83fc6..96b5864c5b2 100644 --- a/queue-4.19/fix-bitmap-corruption-on-close_range-with-close_range_unshare.patch +++ b/queue-4.19/fix-bitmap-corruption-on-close_range-with-close_range_unshare.patch @@ -120,7 +120,7 @@ Signed-off-by: Greg Kroah-Hartman new_fds = new_fdt->fd; --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h -@@ -254,6 +254,18 @@ static inline void bitmap_copy_clear_tai +@@ -256,6 +256,18 @@ static inline void bitmap_copy_clear_tai dst[nbits / BITS_PER_LONG] &= BITMAP_LAST_WORD_MASK(nbits); } diff --git a/queue-4.19/series b/queue-4.19/series index 6803eea792c..ded9034d4c1 100644 --- a/queue-4.19/series +++ b/queue-4.19/series @@ -4,4 +4,5 @@ xhci-fix-panther-point-null-pointer-deref-at-full-speed-re-enumeration.patch arm64-acpi-numa-initialize-all-values-of-acpi_early_node_map-to-numa_no_node.patch dm-resume-don-t-return-einval-when-signalled.patch dm-persistent-data-fix-memory-allocation-failure.patch +bitmap-introduce-generic-optimized-bitmap_size.patch fix-bitmap-corruption-on-close_range-with-close_range_unshare.patch