]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
LoongArch: Revert qspinlock to test-and-set simple lock on VM
authorBibo Mao <maobibo@loongson.cn>
Wed, 11 Sep 2024 15:26:32 +0000 (23:26 +0800)
committerHuacai Chen <chenhuacai@loongson.cn>
Wed, 11 Sep 2024 15:26:32 +0000 (23:26 +0800)
Similar with x86, when VM is detected, revert to a simple test-and-set
lock to avoid the horrors of queue preemption.

Tested on 3C5000 Dual-way machine with 32 cores and 2 numa nodes,
test case is kcbench on kernel mainline 6.10, the detailed command is
"kcbench --src /root/src/linux"

Performance on host machine
                      kernel compile time       performance impact
   Original           150.29 seconds
   With patch         150.19 seconds            almost no impact

Performance on virtual machine:
1. 1 VM with 32 vCPUs and 2 numa node, numa node pinned
                      kernel compile time       performance impact
   Original           170.87 seconds
   With patch         171.73 seconds            almost no impact

2. 2 VMs, each VM with 32 vCPUs and 2 numa node, numa node pinned
                      kernel compile time       performance impact
   Original           2362.04 seconds
   With patch         354.73  seconds            +565%

Signed-off-by: Bibo Mao <maobibo@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
arch/loongarch/include/asm/Kbuild
arch/loongarch/include/asm/paravirt.h
arch/loongarch/include/asm/qspinlock.h [new file with mode: 0644]
arch/loongarch/kernel/paravirt.c
arch/loongarch/kernel/setup.c
arch/loongarch/kernel/smp.c

index 2bb3676429c05259f7cc61e0250f8dec04e66478..4635b755b2b43efd27841e07488e68cc5821cc34 100644 (file)
@@ -6,7 +6,6 @@ generic-y += mcs_spinlock.h
 generic-y += parport.h
 generic-y += early_ioremap.h
 generic-y += qrwlock.h
-generic-y += qspinlock.h
 generic-y += user.h
 generic-y += ioctl.h
 generic-y += statfs.h
index dddec49671ae4254dc49acf69f79d499945b711f..3f4323603e6aa971edbcf70955947889a42339c3 100644 (file)
@@ -19,6 +19,7 @@ static inline u64 paravirt_steal_clock(int cpu)
 
 int __init pv_ipi_init(void);
 int __init pv_time_init(void);
+int __init pv_spinlock_init(void);
 
 #else
 
@@ -31,5 +32,11 @@ static inline int pv_time_init(void)
 {
        return 0;
 }
+
+static inline int pv_spinlock_init(void)
+{
+       return 0;
+}
+
 #endif // CONFIG_PARAVIRT
 #endif
diff --git a/arch/loongarch/include/asm/qspinlock.h b/arch/loongarch/include/asm/qspinlock.h
new file mode 100644 (file)
index 0000000..e76d3aa
--- /dev/null
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_LOONGARCH_QSPINLOCK_H
+#define _ASM_LOONGARCH_QSPINLOCK_H
+
+#include <linux/jump_label.h>
+
+#ifdef CONFIG_PARAVIRT
+
+DECLARE_STATIC_KEY_FALSE(virt_spin_lock_key);
+
+#define virt_spin_lock virt_spin_lock
+
+static inline bool virt_spin_lock(struct qspinlock *lock)
+{
+       int val;
+
+       if (!static_branch_unlikely(&virt_spin_lock_key))
+               return false;
+
+       /*
+        * On hypervisors without PARAVIRT_SPINLOCKS support we fall
+        * back to a Test-and-Set spinlock, because fair locks have
+        * horrible lock 'holder' preemption issues.
+        */
+
+__retry:
+       val = atomic_read(&lock->val);
+
+       if (val || !atomic_try_cmpxchg(&lock->val, &val, _Q_LOCKED_VAL)) {
+               cpu_relax();
+               goto __retry;
+       }
+
+       return true;
+}
+
+#endif /* CONFIG_PARAVIRT */
+
+#include <asm-generic/qspinlock.h>
+
+#endif // _ASM_LOONGARCH_QSPINLOCK_H
index 9c9b75b76f62f298c89a14f520508e2d553f8a3f..32537e9b17080e14b8ba87d7083d1f6e714c6398 100644 (file)
@@ -13,6 +13,7 @@ static int has_steal_clock;
 struct static_key paravirt_steal_enabled;
 struct static_key paravirt_steal_rq_enabled;
 static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64);
+DEFINE_STATIC_KEY_FALSE(virt_spin_lock_key);
 
 static u64 native_steal_clock(int cpu)
 {
@@ -300,3 +301,13 @@ int __init pv_time_init(void)
 
        return 0;
 }
+
+int __init pv_spinlock_init(void)
+{
+       if (!cpu_has_hypervisor)
+               return 0;
+
+       static_branch_enable(&virt_spin_lock_key);
+
+       return 0;
+}
index 0f0740f0be274ac3ba944776e0f2889540e99308..00e307203ddb423316b36a041efa47ce39e96559 100644 (file)
@@ -603,6 +603,8 @@ void __init setup_arch(char **cmdline_p)
        arch_mem_init(cmdline_p);
 
        resource_init();
+       jump_label_init(); /* Initialise the static keys for paravirtualization */
+
 #ifdef CONFIG_SMP
        plat_smp_setup();
        prefill_possible_map();
index ca405ab86aaef69837de7f9c799759045dc52066..482b3c7e3042d844cf5078f473d7f4db3eb4dc14 100644 (file)
@@ -476,7 +476,7 @@ core_initcall(ipi_pm_init);
 #endif
 
 /* Preload SMP state for boot cpu */
-void smp_prepare_boot_cpu(void)
+void __init smp_prepare_boot_cpu(void)
 {
        unsigned int cpu, node, rr_node;
 
@@ -509,6 +509,8 @@ void smp_prepare_boot_cpu(void)
                        rr_node = next_node_in(rr_node, node_online_map);
                }
        }
+
+       pv_spinlock_init();
 }
 
 /* called from main before smp_init() */