From: John Wolfe Date: Sun, 21 Aug 2022 14:56:49 +0000 (-0700) Subject: Changes to common header files not directly applicable to open-vm-tools. X-Git-Tag: stable-12.1.0~12 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=581091addcbb6484047587f6d12038a04f2cc14a;p=thirdparty%2Fopen-vm-tools.git Changes to common header files not directly applicable to open-vm-tools. ARM: native atomics --- diff --git a/open-vm-tools/lib/include/vm_atomic.h b/open-vm-tools/lib/include/vm_atomic.h index de96f7ae4..4cdddf336 100644 --- a/open-vm-tools/lib/include/vm_atomic.h +++ b/open-vm-tools/lib/include/vm_atomic.h @@ -1,5 +1,5 @@ /********************************************************* - * Copyright (C) 1998-2021 VMware, Inc. All rights reserved. + * Copyright (C) 1998-2022 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License as published @@ -229,40 +229,59 @@ Atomic_ReadIfEqualWrite128(Atomic_uint128 *ptr, // IN/OUT uint128 oldVal, // IN uint128 newVal) // IN { -#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_16 +#if defined VM_ARM_64 + /* + * Don't use __sync_val_compare_and_swap, as this cannot magically + * use the right (LL/SC vs LSE) atomics without -moutline-atomics. + */ +#if __GNUC__ >= 9 + if (Atomic_HaveLSE) { + SMP_RW_BARRIER_RW(); + __asm__ __volatile__( + ".arch armv8.2-a \n\t" + "casp %0, %H0, %2, %H2, %1 \n\t" + : "+r" (oldVal), + "+Q" (ptr->value) + : "r" (newVal) + ); + SMP_RW_BARRIER_RW(); + return oldVal; + } else +#endif /* __GNUC__ */ + { + union { + uint128 raw; + struct { + uint64 lo; + uint64 hi; + }; + } res, _old = { oldVal }, _new = { newVal }; + uint32 failed; + + SMP_RW_BARRIER_RW(); + __asm__ __volatile__( + "1: ldxp %x0, %x1, %3 \n\t" + " cmp %x0, %x4 \n\t" + " ccmp %x1, %x5, #0, eq \n\t" + " b.ne 2f \n\t" + " stxp %w2, %x6, %x7, %3 \n\t" + " cbnz %w2, 1b \n\t" + "2: \n\t" + : "=&r" (res.lo), + "=&r" (res.hi), + "=&r" (failed), + "+Q" (ptr->value) + : "r" (_old.lo), + "r" (_old.hi), + "r" (_new.lo), + "r" (_new.hi) + : "cc" + ); + SMP_RW_BARRIER_RW(); + return res.raw; + } +#elif __GCC_HAVE_SYNC_COMPARE_AND_SWAP_16 return __sync_val_compare_and_swap(&ptr->value, oldVal, newVal); -#elif defined VM_ARM_64 - union { - uint128 raw; - struct { - uint64 lo; - uint64 hi; - }; - } res, _old = { oldVal }, _new = { newVal }; - uint32 failed; - - SMP_RW_BARRIER_RW(); - __asm__ __volatile__( - "1: ldxp %x0, %x1, %3 \n\t" - " cmp %x0, %x4 \n\t" - " ccmp %x1, %x5, #0, eq \n\t" - " b.ne 2f \n\t" - " stxp %w2, %x6, %x7, %3 \n\t" - " cbnz %w2, 1b \n\t" - "2: \n\t" - : "=&r" (res.lo), - "=&r" (res.hi), - "=&r" (failed), - "+Q" (ptr->value) - : "r" (_old.lo), - "r" (_old.hi), - "r" (_new.lo), - "r" (_new.hi) - : "cc" - ); - SMP_RW_BARRIER_RW(); - - return res.raw; #endif } #endif diff --git a/open-vm-tools/lib/include/vm_atomic_arm64_begin.h b/open-vm-tools/lib/include/vm_atomic_arm64_begin.h index 6dab80e2b..831e7bde2 100644 --- a/open-vm-tools/lib/include/vm_atomic_arm64_begin.h +++ b/open-vm-tools/lib/include/vm_atomic_arm64_begin.h @@ -1,5 +1,5 @@ /********************************************************* - * Copyright (C) 2017-2018 VMware, Inc. All rights reserved. + * Copyright (C) 2017-2018,2022 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License as published @@ -30,6 +30,40 @@ #include "vm_basic_asm_arm64.h" +/* + * Today these are defines, but long-term these will be patchable globals + * for ESXi kernel-mode code (and something similar for ESXi userspace code). + * + * Atomic_HaveLSE should be set to 1 for CPUs that have the LSE extenstion + * and where the atomic instructions are known to have a performance benefit. + * Seemingly, on some low-end chips (CA55) there may not be a benefit. + * + * Not every operation can be performed using a single-instruction atomic - + * LSE doesn't cover all kinds of logical/arithmetic operations. For example, + * there's an ldeor instruction, but not an ldorr. For cases, where there is no + * combined instruction that atomically performs the load/store and the ALU + * operation, we fall back to CAS or to LL/SC. On some uarches - e.g. Neoverse + * N1 - CAS shows better behavior during heavy contention than LL/SC. LL/SC, + * though, remains the safest option. Atomic_PreferCasForOps controls this. + */ + +#ifdef VMK_ARM_LSE +#define Atomic_HaveLSE 1 +#else +#define Atomic_HaveLSE 0 +#endif +#define Atomic_PreferCasForOps 1 + +#define _VMATOM_LSE_HAVE(x) _VMATOM_LSE_HAVE_##x +#define _VMATOM_LSE_HAVE_add 1 +#define _VMATOM_LSE_HAVE_sub 0 +#define _VMATOM_LSE_HAVE_eor 1 +#define _VMATOM_LSE_HAVE_orr 0 +#define _VMATOM_LSE_HAVE_and 0 + +#define Atomic_PreferLSE(op) (Atomic_HaveLSE && \ + (_VMATOM_LSE_HAVE(op) || Atomic_PreferCasForOps)) + /* bit size, instruction suffix, register prefix, extend suffix */ #define _VMATOM_SIZE_8 8, b, w, b #define _VMATOM_SIZE_16 16, h, w, h @@ -121,41 +155,99 @@ /* Read (not returned), op with modval, write. */ #define _VMATOM_SNIPPET_OP(bs, is, rp, es, fenced, atm, op, modval) ({ \ - uint32 _failed; \ - uint##bs _sample; \ + uint##bs _newval; \ \ _VMATOM_FENCE(fenced); \ - __asm__ __volatile__( \ - "1: ldxr"#is" %"#rp"0, %2 \n\t"\ - " "#op" %"#rp"0, %"#rp"0, %"#rp"3 \n\t"\ - " stxr"#is" %w1 , %"#rp"0, %2 \n\t"\ - " cbnz %w1 , 1b \n\t"\ - : "=&r" (_sample), \ - "=&r" (_failed), \ - "+Q" (*atm) \ - : "r" (modval) \ - ); \ + if (Atomic_PreferLSE(op)) { \ + if (_VMATOM_LSE_HAVE(op)) { \ + __asm__ __volatile__( \ + ".arch armv8.2-a \n\t"\ + "st" #op #is" %"#rp"1, %0 \n\t"\ + : "+Q" (*atm) \ + : "r" (modval) \ + ); \ + } else { \ + uint##bs _oldval; \ + uint##bs _clobberedval; \ + __asm__ __volatile__( \ + ".arch armv8.2-a \n\t"\ + " ldr"#is" %"#rp"1, %3 \n\t"\ + "1: mov %"#rp"0, %"#rp"1 \n\t"\ + " "#op" %"#rp"2, %"#rp"0, %"#rp"4 \n\t"\ + " cas"#is" %"#rp"1, %"#rp"2, %3 \n\t"\ + " cmp %"#rp"0, %"#rp"1, uxt"#es" \n\t"\ + " b.ne 1b \n\t"\ + : "=&r" (_oldval), \ + "=&r" (_clobberedval), \ + "=&r" (_newval), \ + "+Q" (*atm) \ + : "r" (modval) \ + : "cc" \ + ); \ + } \ + } else { \ + uint32 _failed; \ + __asm__ __volatile__( \ + "1: ldxr"#is" %"#rp"0, %2 \n\t"\ + " "#op" %"#rp"0, %"#rp"0, %"#rp"3 \n\t"\ + " stxr"#is" %w1 , %"#rp"0, %2 \n\t"\ + " cbnz %w1 , 1b \n\t"\ + : "=&r" (_newval), \ + "=&r" (_failed), \ + "+Q" (*atm) \ + : "r" (modval) \ + ); \ + } \ _VMATOM_FENCE(fenced); \ }) /* Read (returned), op with modval, write. */ #define _VMATOM_SNIPPET_ROP(bs, is, rp, es, fenced, atm, op, modval) ({ \ - uint32 _failed; \ uint##bs _newval; \ uint##bs _oldval; \ \ _VMATOM_FENCE(fenced); \ - __asm__ __volatile__( \ - "1: ldxr"#is" %"#rp"0, %3 \n\t"\ - " "#op" %"#rp"1, %"#rp"0, %"#rp"4 \n\t"\ - " stxr"#is" %w2 , %"#rp"1, %3 \n\t"\ - " cbnz %w2 , 1b \n\t"\ - : "=&r" (_oldval), \ - "=&r" (_newval), \ - "=&r" (_failed), \ - "+Q" (*atm) \ - : "r" (modval) \ - ); \ + if (Atomic_PreferLSE(op)) { \ + if (_VMATOM_LSE_HAVE(op)) { \ + __asm__ __volatile__( \ + ".arch armv8.2-a \n\t"\ + "ld" #op #is" %"#rp"2, %"#rp"0, %1 \n\t"\ + : "=r" (_oldval), \ + "+Q" (*atm) \ + : "r" (modval) \ + ); \ + } else { \ + uint##bs _clobberedval; \ + __asm__ __volatile__( \ + ".arch armv8.2-a \n\t"\ + " ldr"#is" %"#rp"1, %3 \n\t"\ + "1: mov %"#rp"0, %"#rp"1 \n\t"\ + " "#op" %"#rp"2, %"#rp"0, %"#rp"4 \n\t"\ + " cas"#is" %"#rp"1, %"#rp"2, %3 \n\t"\ + " cmp %"#rp"0, %"#rp"1, uxt"#es" \n\t"\ + " b.ne 1b \n\t"\ + : "=&r" (_oldval), \ + "=&r" (_clobberedval), \ + "=&r" (_newval), \ + "+Q" (*atm) \ + : "r" (modval) \ + : "cc" \ + ); \ + } \ + } else { \ + uint32 _failed; \ + __asm__ __volatile__( \ + "1: ldxr"#is" %"#rp"0, %3 \n\t"\ + " "#op" %"#rp"1, %"#rp"0, %"#rp"4 \n\t"\ + " stxr"#is" %w2 , %"#rp"1, %3 \n\t"\ + " cbnz %w2 , 1b \n\t"\ + : "=&r" (_oldval), \ + "=&r" (_newval), \ + "=&r" (_failed), \ + "+Q" (*atm) \ + : "r" (modval) \ + ); \ + } \ _VMATOM_FENCE(fenced); \ \ _oldval; \ @@ -163,19 +255,29 @@ /* Read (returned), write. */ #define _VMATOM_SNIPPET_RW(bs, is, rp, es, fenced, atm, val) ({ \ - uint32 _failed; \ uint##bs _oldval; \ \ _VMATOM_FENCE(fenced); \ - __asm__ __volatile__( \ - "1: ldxr"#is" %"#rp"0, %2 \n\t"\ - " stxr"#is" %w1 , %"#rp"3, %2 \n\t"\ - " cbnz %w1 , 1b \n\t"\ - : "=&r" (_oldval), \ - "=&r" (_failed), \ - "+Q" (*atm) \ - : "r" (val) \ - ); \ + if (Atomic_HaveLSE) { \ + __asm__ __volatile__( \ + ".arch armv8.2-a \n\t"\ + "swp"#is" %"#rp"2, %"#rp"0, %1 \n\t"\ + : "=r" (_oldval), \ + "+Q" (*atm) \ + : "r" (val) \ + ); \ + } else { \ + uint32 _failed; \ + __asm__ __volatile__( \ + "1: ldxr"#is" %"#rp"0, %2 \n\t"\ + " stxr"#is" %w1 , %"#rp"3, %2 \n\t"\ + " cbnz %w1 , 1b \n\t"\ + : "=&r" (_oldval), \ + "=&r" (_failed), \ + "+Q" (*atm) \ + : "r" (val) \ + ); \ + } \ _VMATOM_FENCE(fenced); \ \ _oldval; \ @@ -183,24 +285,34 @@ /* Read (returned), if equal to old then write new. */ #define _VMATOM_SNIPPET_RIFEQW(bs, is, rp, es, fenced, atm, old, new) ({ \ - uint32 _failed; \ uint##bs _oldval; \ \ _VMATOM_FENCE(fenced); \ - __asm__ __volatile__( \ - "1: ldxr"#is" %"#rp"0, %2 \n\t"\ - " cmp %"#rp"0, %"#rp"3, uxt"#es" \n\t"\ - " b.ne 2f \n\t"\ - " stxr"#is" %w1 , %"#rp"4, %2 \n\t"\ - " cbnz %w1 , 1b \n\t"\ - "2: \n\t"\ - : "=&r" (_oldval), \ - "=&r" (_failed), \ - "+Q" (*atm) \ - : "r" (old), \ - "r" (new) \ - : "cc" \ - ); \ + if (Atomic_HaveLSE) { \ + __asm__ __volatile__( \ + ".arch armv8.2-a \n\t"\ + "cas"#is" %"#rp"0, %"#rp"2, %1 \n\t"\ + : "=r" (_oldval), \ + "+Q" (*atm) \ + : "r" (new), "0" (old) \ + ); \ + } else { \ + uint32 _failed; \ + __asm__ __volatile__( \ + "1: ldxr"#is" %"#rp"0, %2 \n\t"\ + " cmp %"#rp"0, %"#rp"3, uxt"#es" \n\t"\ + " b.ne 2f \n\t"\ + " stxr"#is" %w1 , %"#rp"4, %2 \n\t"\ + " cbnz %w1 , 1b \n\t"\ + "2: \n\t"\ + : "=&r" (_oldval), \ + "=&r" (_failed), \ + "+Q" (*atm) \ + : "r" (old), \ + "r" (new) \ + : "cc" \ + ); \ + } \ _VMATOM_FENCE(fenced); \ \ _oldval; \ diff --git a/open-vm-tools/lib/include/vm_atomic_arm64_end.h b/open-vm-tools/lib/include/vm_atomic_arm64_end.h index 02c0e806f..91aebc643 100644 --- a/open-vm-tools/lib/include/vm_atomic_arm64_end.h +++ b/open-vm-tools/lib/include/vm_atomic_arm64_end.h @@ -1,5 +1,5 @@ /********************************************************* - * Copyright (C) 2017 VMware, Inc. All rights reserved. + * Copyright (C) 2017,2022 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License as published @@ -27,6 +27,12 @@ #endif /* Undefine all the private macros we previously defined. */ +#undef _VMATOM_LSE_HAVE +#undef _VMATOM_LSE_HAVE_add +#undef _VMATOM_LSE_HAVE_sub +#undef _VMATOM_LSE_HAVE_eor +#undef _VMATOM_LSE_HAVE_orr +#undef _VMATOM_LSE_HAVE_and #undef _VMATOM_SIZE_8 #undef _VMATOM_SIZE_16 #undef _VMATOM_SIZE_32