From: John Wolfe <jwolfe@vmware.com>
Date: Sun, 21 Aug 2022 14:56:49 +0000 (-0700)
Subject: Changes to common header files not directly applicable to open-vm-tools.
X-Git-Tag: stable-12.1.0~12
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=581091addcbb6484047587f6d12038a04f2cc14a;p=thirdparty%2Fopen-vm-tools.git

Changes to common header files not directly applicable to open-vm-tools.

ARM: native atomics
---

diff --git a/open-vm-tools/lib/include/vm_atomic.h b/open-vm-tools/lib/include/vm_atomic.h
index de96f7ae4..4cdddf336 100644
--- a/open-vm-tools/lib/include/vm_atomic.h
+++ b/open-vm-tools/lib/include/vm_atomic.h
@@ -1,5 +1,5 @@
 /*********************************************************
- * Copyright (C) 1998-2021 VMware, Inc. All rights reserved.
+ * Copyright (C) 1998-2022 VMware, Inc. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU Lesser General Public License as published
@@ -229,40 +229,59 @@ Atomic_ReadIfEqualWrite128(Atomic_uint128 *ptr,   // IN/OUT
                            uint128        oldVal, // IN
                            uint128        newVal) // IN
 {
-#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_16
+#if defined VM_ARM_64
+   /*
+    * Don't use __sync_val_compare_and_swap, as this cannot magically
+    * use the right (LL/SC vs LSE) atomics without -moutline-atomics.
+    */
+#if __GNUC__ >= 9
+   if (Atomic_HaveLSE) {
+      SMP_RW_BARRIER_RW();
+      __asm__ __volatile__(
+         ".arch armv8.2-a            \n\t"
+         "casp %0, %H0, %2, %H2, %1  \n\t"
+         : "+r" (oldVal),
+           "+Q" (ptr->value)
+         : "r" (newVal)
+      );
+      SMP_RW_BARRIER_RW();
+      return oldVal;
+   } else
+#endif /* __GNUC__ */
+   {
+      union {
+         uint128 raw;
+         struct {
+            uint64 lo;
+            uint64 hi;
+         };
+      } res, _old = { oldVal }, _new = { newVal };
+      uint32 failed;
+
+      SMP_RW_BARRIER_RW();
+      __asm__ __volatile__(
+         "1: ldxp    %x0, %x1, %3        \n\t"
+         "   cmp     %x0, %x4            \n\t"
+         "   ccmp    %x1, %x5, #0, eq    \n\t"
+         "   b.ne    2f                  \n\t"
+         "   stxp    %w2, %x6, %x7, %3   \n\t"
+         "   cbnz    %w2, 1b             \n\t"
+         "2:                             \n\t"
+         : "=&r" (res.lo),
+           "=&r" (res.hi),
+           "=&r" (failed),
+           "+Q" (ptr->value)
+         : "r" (_old.lo),
+           "r" (_old.hi),
+           "r" (_new.lo),
+           "r" (_new.hi)
+         : "cc"
+      );
+      SMP_RW_BARRIER_RW();
+      return res.raw;
+   }
+#elif __GCC_HAVE_SYNC_COMPARE_AND_SWAP_16
    return __sync_val_compare_and_swap(&ptr->value, oldVal, newVal);
-#elif defined VM_ARM_64
-   union {
-      uint128 raw;
-      struct {
-         uint64 lo;
-         uint64 hi;
-      };
-   } res, _old = { oldVal }, _new = { newVal };
-   uint32 failed;
-
-   SMP_RW_BARRIER_RW();
-   __asm__ __volatile__(
-      "1: ldxp    %x0, %x1, %3        \n\t"
-      "   cmp     %x0, %x4            \n\t"
-      "   ccmp    %x1, %x5, #0, eq    \n\t"
-      "   b.ne    2f                  \n\t"
-      "   stxp    %w2, %x6, %x7, %3   \n\t"
-      "   cbnz    %w2, 1b             \n\t"
-      "2:                             \n\t"
-      : "=&r" (res.lo),
-        "=&r" (res.hi),
-        "=&r" (failed),
-        "+Q" (ptr->value)
-      : "r" (_old.lo),
-        "r" (_old.hi),
-        "r" (_new.lo),
-        "r" (_new.hi)
-      : "cc"
-   );
-   SMP_RW_BARRIER_RW();
-
-   return res.raw;
 #endif
 }
 #endif
diff --git a/open-vm-tools/lib/include/vm_atomic_arm64_begin.h b/open-vm-tools/lib/include/vm_atomic_arm64_begin.h
index 6dab80e2b..831e7bde2 100644
--- a/open-vm-tools/lib/include/vm_atomic_arm64_begin.h
+++ b/open-vm-tools/lib/include/vm_atomic_arm64_begin.h
@@ -1,5 +1,5 @@
 /*********************************************************
- * Copyright (C) 2017-2018 VMware, Inc. All rights reserved.
+ * Copyright (C) 2017-2018,2022 VMware, Inc. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU Lesser General Public License as published
@@ -30,6 +30,40 @@
 
 #include "vm_basic_asm_arm64.h"
 
+/*
+ * Today these are defines, but long-term these will be patchable globals
+ * for ESXi kernel-mode code (and something similar for ESXi userspace code).
+ *
+ * Atomic_HaveLSE should be set to 1 for CPUs that have the LSE extenstion
+ * and where the atomic instructions are known to have a performance benefit.
+ * Seemingly, on some low-end chips (CA55) there may not be a benefit.
+ *
+ * Not every operation can be performed using a single-instruction atomic -
+ * LSE doesn't cover all kinds of logical/arithmetic operations. For example,
+ * there's an ldeor instruction, but not an ldorr. For cases, where there is no
+ * combined instruction that atomically performs the load/store and the ALU
+ * operation, we fall back to CAS or to LL/SC. On some uarches - e.g. Neoverse
+ * N1 - CAS shows better behavior during heavy contention than LL/SC. LL/SC,
+ * though, remains the safest option. Atomic_PreferCasForOps controls this.
+ */
+
+#ifdef VMK_ARM_LSE
+#define Atomic_HaveLSE               1
+#else
+#define Atomic_HaveLSE               0
+#endif
+#define Atomic_PreferCasForOps       1
+
+#define _VMATOM_LSE_HAVE(x)  _VMATOM_LSE_HAVE_##x
+#define _VMATOM_LSE_HAVE_add 1
+#define _VMATOM_LSE_HAVE_sub 0
+#define _VMATOM_LSE_HAVE_eor 1
+#define _VMATOM_LSE_HAVE_orr 0
+#define _VMATOM_LSE_HAVE_and 0
+
+#define Atomic_PreferLSE(op) (Atomic_HaveLSE && \
+   (_VMATOM_LSE_HAVE(op) || Atomic_PreferCasForOps))
+
 /*                      bit size, instruction suffix, register prefix, extend suffix */
 #define _VMATOM_SIZE_8         8,                  b,               w,             b
 #define _VMATOM_SIZE_16       16,                  h,               w,             h
@@ -121,41 +155,99 @@
 
 /* Read (not returned), op with modval, write. */
 #define _VMATOM_SNIPPET_OP(bs, is, rp, es, fenced, atm, op, modval) ({        \
-   uint32 _failed;                                                            \
-   uint##bs _sample;                                                          \
+   uint##bs _newval;                                                          \
                                                                               \
    _VMATOM_FENCE(fenced);                                                     \
-   __asm__ __volatile__(                                                      \
-      "1: ldxr"#is" %"#rp"0, %2                                          \n\t"\
-      "  "#op"      %"#rp"0, %"#rp"0, %"#rp"3                            \n\t"\
-      "   stxr"#is" %w1    , %"#rp"0, %2                                 \n\t"\
-      "   cbnz      %w1    , 1b                                          \n\t"\
-      : "=&r" (_sample),                                                      \
-        "=&r" (_failed),                                                      \
-        "+Q" (*atm)                                                           \
-      : "r" (modval)                                                          \
-   );                                                                         \
+   if (Atomic_PreferLSE(op)) {                                                \
+      if (_VMATOM_LSE_HAVE(op)) {                                             \
+         __asm__ __volatile__(                                                \
+            ".arch armv8.2-a                                             \n\t"\
+            "st" #op #is" %"#rp"1, %0                                    \n\t"\
+            : "+Q" (*atm)                                                     \
+            : "r" (modval)                                                    \
+         );                                                                   \
+      } else {                                                                \
+         uint##bs _oldval;                                                    \
+         uint##bs _clobberedval;                                              \
+         __asm__ __volatile__(                                                \
+            ".arch armv8.2-a                                             \n\t"\
+            "   ldr"#is" %"#rp"1, %3                                     \n\t"\
+            "1: mov      %"#rp"0, %"#rp"1                                \n\t"\
+            "  "#op"     %"#rp"2, %"#rp"0, %"#rp"4                       \n\t"\
+            "   cas"#is" %"#rp"1, %"#rp"2, %3                            \n\t"\
+            "   cmp      %"#rp"0, %"#rp"1, uxt"#es"                      \n\t"\
+            "   b.ne     1b                                              \n\t"\
+            : "=&r" (_oldval),                                                \
+              "=&r" (_clobberedval),                                          \
+              "=&r" (_newval),                                                \
+              "+Q" (*atm)                                                     \
+            : "r" (modval)                                                    \
+            : "cc"                                                            \
+         );                                                                   \
+      }                                                                       \
+   } else {                                                                   \
+      uint32 _failed;                                                         \
+      __asm__ __volatile__(                                                   \
+         "1: ldxr"#is" %"#rp"0, %2                                       \n\t"\
+         "  "#op"      %"#rp"0, %"#rp"0, %"#rp"3                         \n\t"\
+         "   stxr"#is" %w1    , %"#rp"0, %2                              \n\t"\
+         "   cbnz      %w1    , 1b                                       \n\t"\
+         : "=&r" (_newval),                                                   \
+           "=&r" (_failed),                                                   \
+           "+Q" (*atm)                                                        \
+         : "r" (modval)                                                       \
+      );                                                                      \
+   }                                                                          \
    _VMATOM_FENCE(fenced);                                                     \
 })
 
 /* Read (returned), op with modval, write. */
 #define _VMATOM_SNIPPET_ROP(bs, is, rp, es, fenced, atm, op, modval) ({       \
-   uint32 _failed;                                                            \
    uint##bs _newval;                                                          \
    uint##bs _oldval;                                                          \
                                                                               \
    _VMATOM_FENCE(fenced);                                                     \
-   __asm__ __volatile__(                                                      \
-      "1: ldxr"#is" %"#rp"0, %3                                          \n\t"\
-      "  "#op"      %"#rp"1, %"#rp"0, %"#rp"4                            \n\t"\
-      "   stxr"#is" %w2    , %"#rp"1, %3                                 \n\t"\
-      "   cbnz      %w2    , 1b                                          \n\t"\
-      : "=&r" (_oldval),                                                      \
-        "=&r" (_newval),                                                      \
-        "=&r" (_failed),                                                      \
-        "+Q" (*atm)                                                           \
-      : "r" (modval)                                                          \
-   );                                                                         \
+   if (Atomic_PreferLSE(op)) {                                                \
+      if (_VMATOM_LSE_HAVE(op)) {                                             \
+         __asm__ __volatile__(                                                \
+            ".arch armv8.2-a                                             \n\t"\
+            "ld" #op #is" %"#rp"2, %"#rp"0, %1                           \n\t"\
+            : "=r" (_oldval),                                                 \
+              "+Q" (*atm)                                                     \
+            : "r" (modval)                                                    \
+         );                                                                   \
+      } else {                                                                \
+         uint##bs _clobberedval;                                              \
+         __asm__ __volatile__(                                                \
+            ".arch armv8.2-a                                             \n\t"\
+            "   ldr"#is"  %"#rp"1, %3                                    \n\t"\
+            "1: mov      %"#rp"0, %"#rp"1                                \n\t"\
+            "  "#op"     %"#rp"2, %"#rp"0, %"#rp"4                       \n\t"\
+            "   cas"#is" %"#rp"1, %"#rp"2, %3                            \n\t"\
+            "   cmp      %"#rp"0, %"#rp"1, uxt"#es"                      \n\t"\
+            "   b.ne     1b                                              \n\t"\
+            : "=&r" (_oldval),                                                \
+              "=&r" (_clobberedval),                                          \
+              "=&r" (_newval),                                                \
+              "+Q" (*atm)                                                     \
+            : "r" (modval)                                                    \
+            : "cc"                                                            \
+         );                                                                   \
+      }                                                                       \
+   } else {                                                                   \
+      uint32 _failed;                                                         \
+      __asm__ __volatile__(                                                   \
+         "1: ldxr"#is" %"#rp"0, %3                                       \n\t"\
+         "  "#op"      %"#rp"1, %"#rp"0, %"#rp"4                         \n\t"\
+         "   stxr"#is" %w2    , %"#rp"1, %3                              \n\t"\
+         "   cbnz      %w2    , 1b                                       \n\t"\
+         : "=&r" (_oldval),                                                   \
+           "=&r" (_newval),                                                   \
+           "=&r" (_failed),                                                   \
+           "+Q" (*atm)                                                        \
+         : "r" (modval)                                                       \
+      );                                                                      \
+   }                                                                          \
    _VMATOM_FENCE(fenced);                                                     \
                                                                               \
    _oldval;                                                                   \
@@ -163,19 +255,29 @@
 
 /* Read (returned), write. */
 #define _VMATOM_SNIPPET_RW(bs, is, rp, es, fenced, atm, val) ({               \
-   uint32 _failed;                                                            \
    uint##bs _oldval;                                                          \
                                                                               \
    _VMATOM_FENCE(fenced);                                                     \
-   __asm__ __volatile__(                                                      \
-      "1: ldxr"#is" %"#rp"0, %2                                          \n\t"\
-      "   stxr"#is" %w1    , %"#rp"3, %2                                 \n\t"\
-      "   cbnz      %w1    , 1b                                          \n\t"\
-      : "=&r" (_oldval),                                                      \
-        "=&r" (_failed),                                                      \
-        "+Q" (*atm)                                                           \
-      : "r" (val)                                                             \
-   );                                                                         \
+   if (Atomic_HaveLSE) {                                                      \
+      __asm__ __volatile__(                                                   \
+         ".arch armv8.2-a                                                \n\t"\
+         "swp"#is" %"#rp"2, %"#rp"0, %1                                  \n\t"\
+         : "=r" (_oldval),                                                    \
+           "+Q" (*atm)                                                        \
+         : "r" (val)                                                          \
+      );                                                                      \
+   } else {                                                                   \
+      uint32 _failed;                                                         \
+      __asm__ __volatile__(                                                   \
+         "1: ldxr"#is" %"#rp"0, %2                                       \n\t"\
+         "   stxr"#is" %w1    , %"#rp"3, %2                              \n\t"\
+         "   cbnz      %w1    , 1b                                       \n\t"\
+         : "=&r" (_oldval),                                                   \
+           "=&r" (_failed),                                                   \
+           "+Q" (*atm)                                                        \
+         : "r" (val)                                                          \
+      );                                                                      \
+   }                                                                          \
    _VMATOM_FENCE(fenced);                                                     \
                                                                               \
    _oldval;                                                                   \
@@ -183,24 +285,34 @@
 
 /* Read (returned), if equal to old then write new. */
 #define _VMATOM_SNIPPET_RIFEQW(bs, is, rp, es, fenced, atm, old, new) ({      \
-   uint32 _failed;                                                            \
    uint##bs _oldval;                                                          \
                                                                               \
    _VMATOM_FENCE(fenced);                                                     \
-   __asm__ __volatile__(                                                      \
-      "1: ldxr"#is" %"#rp"0, %2                                          \n\t"\
-      "   cmp       %"#rp"0, %"#rp"3, uxt"#es"                           \n\t"\
-      "   b.ne      2f                                                   \n\t"\
-      "   stxr"#is" %w1    , %"#rp"4, %2                                 \n\t"\
-      "   cbnz      %w1    , 1b                                          \n\t"\
-      "2:                                                                \n\t"\
-      : "=&r" (_oldval),                                                      \
-        "=&r" (_failed),                                                      \
-        "+Q" (*atm)                                                           \
-      : "r" (old),                                                            \
-        "r" (new)                                                             \
-      : "cc"                                                                  \
-   );                                                                         \
+   if (Atomic_HaveLSE) {                                                      \
+      __asm__ __volatile__(                                                   \
+         ".arch armv8.2-a                                                \n\t"\
+         "cas"#is" %"#rp"0, %"#rp"2, %1                                  \n\t"\
+         : "=r" (_oldval),                                                    \
+           "+Q" (*atm)                                                        \
+         : "r" (new), "0" (old)                                               \
+      );                                                                      \
+   } else {                                                                   \
+      uint32 _failed;                                                         \
+      __asm__ __volatile__(                                                   \
+         "1: ldxr"#is" %"#rp"0, %2                                       \n\t"\
+         "   cmp       %"#rp"0, %"#rp"3, uxt"#es"                        \n\t"\
+         "   b.ne      2f                                                \n\t"\
+         "   stxr"#is" %w1    , %"#rp"4, %2                              \n\t"\
+         "   cbnz      %w1    , 1b                                       \n\t"\
+         "2:                                                             \n\t"\
+         : "=&r" (_oldval),                                                   \
+           "=&r" (_failed),                                                   \
+           "+Q" (*atm)                                                        \
+         : "r" (old),                                                         \
+           "r" (new)                                                          \
+         : "cc"                                                               \
+      );                                                                      \
+   }                                                                          \
    _VMATOM_FENCE(fenced);                                                     \
                                                                               \
    _oldval;                                                                   \
diff --git a/open-vm-tools/lib/include/vm_atomic_arm64_end.h b/open-vm-tools/lib/include/vm_atomic_arm64_end.h
index 02c0e806f..91aebc643 100644
--- a/open-vm-tools/lib/include/vm_atomic_arm64_end.h
+++ b/open-vm-tools/lib/include/vm_atomic_arm64_end.h
@@ -1,5 +1,5 @@
 /*********************************************************
- * Copyright (C) 2017 VMware, Inc. All rights reserved.
+ * Copyright (C) 2017,2022 VMware, Inc. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU Lesser General Public License as published
@@ -27,6 +27,12 @@
 #endif
 
 /* Undefine all the private macros we previously defined. */
+#undef _VMATOM_LSE_HAVE
+#undef _VMATOM_LSE_HAVE_add
+#undef _VMATOM_LSE_HAVE_sub
+#undef _VMATOM_LSE_HAVE_eor
+#undef _VMATOM_LSE_HAVE_orr
+#undef _VMATOM_LSE_HAVE_and
 #undef _VMATOM_SIZE_8
 #undef _VMATOM_SIZE_16
 #undef _VMATOM_SIZE_32