]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
RISC-V: Detect unaligned vector accesses supported
authorJesse Taube <jesse@rivosinc.com>
Thu, 17 Oct 2024 19:00:21 +0000 (12:00 -0700)
committerPalmer Dabbelt <palmer@rivosinc.com>
Fri, 18 Oct 2024 19:38:33 +0000 (12:38 -0700)
Run an unaligned vector access to test if the system supports
vector unaligned access. Add the result to a new key in hwprobe.
This is useful for usermode to know if vector misaligned accesses are
supported and if they are faster or slower than equivalent byte accesses.

Signed-off-by: Jesse Taube <jesse@rivosinc.com>
Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
Link: https://lore.kernel.org/r/20241017-jesse_unaligned_vector-v10-4-5b33500160f8@rivosinc.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
arch/riscv/Kconfig
arch/riscv/include/asm/cpufeature.h
arch/riscv/include/asm/entry-common.h
arch/riscv/include/asm/hwprobe.h
arch/riscv/include/asm/vector.h
arch/riscv/include/uapi/asm/hwprobe.h
arch/riscv/kernel/Makefile
arch/riscv/kernel/sys_hwprobe.c
arch/riscv/kernel/traps_misaligned.c
arch/riscv/kernel/unaligned_access_speed.c
arch/riscv/kernel/vector.c

index fab7c6bc172987ce6efd788761687df0181cda53..05f698a8897770ce50641e57a7687919048bff5b 100644 (file)
@@ -784,12 +784,26 @@ config THREAD_SIZE_ORDER
          Specify the Pages of thread stack size (from 4KB to 64KB), which also
          affects irq stack size, which is equal to thread stack size.
 
+config RISCV_MISALIGNED
+       bool
+       help
+         Embed support for detecting and emulating misaligned
+         scalar or vector loads and stores.
+
 config RISCV_SCALAR_MISALIGNED
        bool
+       select RISCV_MISALIGNED
        select SYSCTL_ARCH_UNALIGN_ALLOW
        help
          Embed support for emulating misaligned loads and stores.
 
+config RISCV_VECTOR_MISALIGNED
+       bool
+       select RISCV_MISALIGNED
+       depends on RISCV_ISA_V
+       help
+         Enable detecting support for vector misaligned loads and stores.
+
 choice
        prompt "Unaligned Accesses Support"
        default RISCV_PROBE_UNALIGNED_ACCESS
@@ -841,6 +855,28 @@ config RISCV_EFFICIENT_UNALIGNED_ACCESS
 
 endchoice
 
+choice
+       prompt "Vector unaligned Accesses Support"
+       depends on RISCV_ISA_V
+       default RISCV_PROBE_VECTOR_UNALIGNED_ACCESS
+       help
+         This determines the level of support for vector unaligned accesses. This
+         information is used by the kernel to perform optimizations. It is also
+         exposed to user space via the hwprobe syscall. The hardware will be
+         probed at boot by default.
+
+config RISCV_PROBE_VECTOR_UNALIGNED_ACCESS
+       bool "Probe speed of vector unaligned accesses"
+       select RISCV_VECTOR_MISALIGNED
+       depends on RISCV_ISA_V
+       help
+         During boot, the kernel will run a series of tests to determine the
+         speed of vector unaligned accesses if they are supported. This probing
+         will dynamically determine the speed of vector unaligned accesses on
+         the underlying system if they are supported.
+
+endchoice
+
 source "arch/riscv/Kconfig.vendor"
 
 endmenu # "Platform type"
index ccc6cf141c20eec1950bc2ac9fed52dd27856450..85bf1bce51e65c1c8732802ba6e0765aa40773a2 100644 (file)
@@ -59,8 +59,8 @@ void riscv_user_isa_enable(void);
 #define __RISCV_ISA_EXT_SUPERSET_VALIDATE(_name, _id, _sub_exts, _validate) \
        _RISCV_ISA_EXT_DATA(_name, _id, _sub_exts, ARRAY_SIZE(_sub_exts), _validate)
 
-#if defined(CONFIG_RISCV_SCALAR_MISALIGNED)
 bool check_unaligned_access_emulated_all_cpus(void);
+#if defined(CONFIG_RISCV_SCALAR_MISALIGNED)
 void check_unaligned_access_emulated(struct work_struct *work __always_unused);
 void unaligned_emulation_finish(void);
 bool unaligned_ctl_available(void);
@@ -72,6 +72,12 @@ static inline bool unaligned_ctl_available(void)
 }
 #endif
 
+bool check_vector_unaligned_access_emulated_all_cpus(void);
+#if defined(CONFIG_RISCV_VECTOR_MISALIGNED)
+void check_vector_unaligned_access_emulated(struct work_struct *work __always_unused);
+DECLARE_PER_CPU(long, vector_misaligned_access);
+#endif
+
 #if defined(CONFIG_RISCV_PROBE_UNALIGNED_ACCESS)
 DECLARE_STATIC_KEY_FALSE(fast_unaligned_access_speed_key);
 
index 0a4e3544c877a00f511cd40666d4dac94d5799ce..7b32d2b08bb669790ac2567b4a977367747d9401 100644 (file)
@@ -25,18 +25,7 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
 void handle_page_fault(struct pt_regs *regs);
 void handle_break(struct pt_regs *regs);
 
-#ifdef CONFIG_RISCV_SCALAR_MISALIGNED
 int handle_misaligned_load(struct pt_regs *regs);
 int handle_misaligned_store(struct pt_regs *regs);
-#else
-static inline int handle_misaligned_load(struct pt_regs *regs)
-{
-       return -1;
-}
-static inline int handle_misaligned_store(struct pt_regs *regs)
-{
-       return -1;
-}
-#endif
 
 #endif /* _ASM_RISCV_ENTRY_COMMON_H */
index ffb9484531af7e4166ef970cb7c79f04176a2823..1ce1df6d0ff3c6f80a0221ec0c1755d30c425445 100644 (file)
@@ -8,7 +8,7 @@
 
 #include <uapi/asm/hwprobe.h>
 
-#define RISCV_HWPROBE_MAX_KEY 9
+#define RISCV_HWPROBE_MAX_KEY 10
 
 static inline bool riscv_hwprobe_key_is_valid(__s64 key)
 {
index be7d309cca8a78d3963ae42d4b55fda89b8ab9dc..c7c023afbacd7b51e0a95e0cc7a68258b75bf9fb 100644 (file)
@@ -21,6 +21,7 @@
 
 extern unsigned long riscv_v_vsize;
 int riscv_v_setup_vsize(void);
+bool insn_is_vector(u32 insn_buf);
 bool riscv_v_first_use_handler(struct pt_regs *regs);
 void kernel_vector_begin(void);
 void kernel_vector_end(void);
@@ -268,6 +269,7 @@ struct pt_regs;
 
 static inline int riscv_v_setup_vsize(void) { return -EOPNOTSUPP; }
 static __always_inline bool has_vector(void) { return false; }
+static __always_inline bool insn_is_vector(u32 insn_buf) { return false; }
 static inline bool riscv_v_first_use_handler(struct pt_regs *regs) { return false; }
 static inline bool riscv_v_vstate_query(struct pt_regs *regs) { return false; }
 static inline bool riscv_v_vstate_ctrl_user_allowed(void) { return false; }
index 1e153cda57db85b99d40f4fcc67aa69651a0f856..34c88c15322c5a30bd820009e2b6335373a384d0 100644 (file)
@@ -88,6 +88,11 @@ struct riscv_hwprobe {
 #define                RISCV_HWPROBE_MISALIGNED_SCALAR_SLOW            2
 #define                RISCV_HWPROBE_MISALIGNED_SCALAR_FAST            3
 #define                RISCV_HWPROBE_MISALIGNED_SCALAR_UNSUPPORTED     4
+#define RISCV_HWPROBE_KEY_MISALIGNED_VECTOR_PERF       10
+#define                RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN         0
+#define                RISCV_HWPROBE_MISALIGNED_VECTOR_SLOW            2
+#define                RISCV_HWPROBE_MISALIGNED_VECTOR_FAST            3
+#define                RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED     4
 /* Increase RISCV_HWPROBE_MAX_KEY when adding items. */
 
 /* Flags */
index 45624c5ea86c6f2fa85a4e0f44de0d6e8a853478..7f88cc4931f5c44105e827d46832a94fe81483f4 100644 (file)
@@ -68,8 +68,8 @@ obj-y += probes/
 obj-y  += tests/
 obj-$(CONFIG_MMU) += vdso.o vdso/
 
-obj-$(CONFIG_RISCV_SCALAR_MISALIGNED)  += traps_misaligned.o
-obj-$(CONFIG_RISCV_SCALAR_MISALIGNED)  += unaligned_access_speed.o
+obj-$(CONFIG_RISCV_MISALIGNED) += traps_misaligned.o
+obj-$(CONFIG_RISCV_MISALIGNED) += unaligned_access_speed.o
 obj-$(CONFIG_RISCV_PROBE_UNALIGNED_ACCESS)     += copy-unaligned.o
 
 obj-$(CONFIG_FPU)              += fpu.o
index cea0ca2bf2a25ecc671e31b141e84c6d1977da25..6441baada36bc29424383056efd9fc145464e1ed 100644 (file)
@@ -201,6 +201,37 @@ static u64 hwprobe_misaligned(const struct cpumask *cpus)
 }
 #endif
 
+#ifdef CONFIG_RISCV_VECTOR_MISALIGNED
+static u64 hwprobe_vec_misaligned(const struct cpumask *cpus)
+{
+       int cpu;
+       u64 perf = -1ULL;
+
+       /* Return if supported or not even if speed wasn't probed */
+       for_each_cpu(cpu, cpus) {
+               int this_perf = per_cpu(vector_misaligned_access, cpu);
+
+               if (perf == -1ULL)
+                       perf = this_perf;
+
+               if (perf != this_perf) {
+                       perf = RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN;
+                       break;
+               }
+       }
+
+       if (perf == -1ULL)
+               return RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN;
+
+       return perf;
+}
+#else
+static u64 hwprobe_vec_misaligned(const struct cpumask *cpus)
+{
+       return RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN;
+}
+#endif
+
 static void hwprobe_one_pair(struct riscv_hwprobe *pair,
                             const struct cpumask *cpus)
 {
@@ -229,6 +260,10 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair,
                pair->value = hwprobe_misaligned(cpus);
                break;
 
+       case RISCV_HWPROBE_KEY_MISALIGNED_VECTOR_PERF:
+               pair->value = hwprobe_vec_misaligned(cpus);
+               break;
+
        case RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE:
                pair->value = 0;
                if (hwprobe_ext0_has(cpus, RISCV_HWPROBE_EXT_ZICBOZ))
index d076dde5ad20434412a05ff18fe85194f6127bb4..ef59ecfc64cb58a3e72687c3d694fca351c0a47e 100644 (file)
@@ -16,6 +16,7 @@
 #include <asm/entry-common.h>
 #include <asm/hwprobe.h>
 #include <asm/cpufeature.h>
+#include <asm/vector.h>
 
 #define INSN_MATCH_LB                  0x3
 #define INSN_MASK_LB                   0x707f
@@ -322,12 +323,37 @@ union reg_data {
        u64 data_u64;
 };
 
-static bool unaligned_ctl __read_mostly;
-
 /* sysctl hooks */
 int unaligned_enabled __read_mostly = 1;       /* Enabled by default */
 
-int handle_misaligned_load(struct pt_regs *regs)
+#ifdef CONFIG_RISCV_VECTOR_MISALIGNED
+static int handle_vector_misaligned_load(struct pt_regs *regs)
+{
+       unsigned long epc = regs->epc;
+       unsigned long insn;
+
+       if (get_insn(regs, epc, &insn))
+               return -1;
+
+       /* Only return 0 when in check_vector_unaligned_access_emulated */
+       if (*this_cpu_ptr(&vector_misaligned_access) == RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN) {
+               *this_cpu_ptr(&vector_misaligned_access) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED;
+               regs->epc = epc + INSN_LEN(insn);
+               return 0;
+       }
+
+       /* If vector instruction we don't emulate it yet */
+       regs->epc = epc;
+       return -1;
+}
+#else
+static int handle_vector_misaligned_load(struct pt_regs *regs)
+{
+       return -1;
+}
+#endif
+
+static int handle_scalar_misaligned_load(struct pt_regs *regs)
 {
        union reg_data val;
        unsigned long epc = regs->epc;
@@ -435,7 +461,7 @@ int handle_misaligned_load(struct pt_regs *regs)
        return 0;
 }
 
-int handle_misaligned_store(struct pt_regs *regs)
+static int handle_scalar_misaligned_store(struct pt_regs *regs)
 {
        union reg_data val;
        unsigned long epc = regs->epc;
@@ -526,6 +552,91 @@ int handle_misaligned_store(struct pt_regs *regs)
        return 0;
 }
 
+int handle_misaligned_load(struct pt_regs *regs)
+{
+       unsigned long epc = regs->epc;
+       unsigned long insn;
+
+       if (IS_ENABLED(CONFIG_RISCV_VECTOR_MISALIGNED)) {
+               if (get_insn(regs, epc, &insn))
+                       return -1;
+
+               if (insn_is_vector(insn))
+                       return handle_vector_misaligned_load(regs);
+       }
+
+       if (IS_ENABLED(CONFIG_RISCV_SCALAR_MISALIGNED))
+               return handle_scalar_misaligned_load(regs);
+
+       return -1;
+}
+
+int handle_misaligned_store(struct pt_regs *regs)
+{
+       if (IS_ENABLED(CONFIG_RISCV_SCALAR_MISALIGNED))
+               return handle_scalar_misaligned_store(regs);
+
+       return -1;
+}
+
+#ifdef CONFIG_RISCV_VECTOR_MISALIGNED
+void check_vector_unaligned_access_emulated(struct work_struct *work __always_unused)
+{
+       long *mas_ptr = this_cpu_ptr(&vector_misaligned_access);
+       unsigned long tmp_var;
+
+       *mas_ptr = RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN;
+
+       kernel_vector_begin();
+       /*
+        * In pre-13.0.0 versions of GCC, vector registers cannot appear in
+        * the clobber list. This inline asm clobbers v0, but since we do not
+        * currently build the kernel with V enabled, the v0 clobber arg is not
+        * needed (as the compiler will not emit vector code itself). If the kernel
+        * is changed to build with V enabled, the clobber arg will need to be
+        * added here.
+        */
+       __asm__ __volatile__ (
+               ".balign 4\n\t"
+               ".option push\n\t"
+               ".option arch, +zve32x\n\t"
+               "       vsetivli zero, 1, e16, m1, ta, ma\n\t"  // Vectors of 16b
+               "       vle16.v v0, (%[ptr])\n\t"               // Load bytes
+               ".option pop\n\t"
+               : : [ptr] "r" ((u8 *)&tmp_var + 1));
+       kernel_vector_end();
+}
+
+bool check_vector_unaligned_access_emulated_all_cpus(void)
+{
+       int cpu;
+
+       if (!has_vector()) {
+               for_each_online_cpu(cpu)
+                       per_cpu(vector_misaligned_access, cpu) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED;
+               return false;
+       }
+
+       schedule_on_each_cpu(check_vector_unaligned_access_emulated);
+
+       for_each_online_cpu(cpu)
+               if (per_cpu(vector_misaligned_access, cpu)
+                   == RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN)
+                       return false;
+
+       return true;
+}
+#else
+bool check_vector_unaligned_access_emulated_all_cpus(void)
+{
+       return false;
+}
+#endif
+
+#ifdef CONFIG_RISCV_SCALAR_MISALIGNED
+
+static bool unaligned_ctl __read_mostly;
+
 void check_unaligned_access_emulated(struct work_struct *work __always_unused)
 {
        int cpu = smp_processor_id();
@@ -574,3 +685,9 @@ bool unaligned_ctl_available(void)
 {
        return unaligned_ctl;
 }
+#else
+bool check_unaligned_access_emulated_all_cpus(void)
+{
+       return false;
+}
+#endif
index f3508cc54f91ae9da62342d28d24f869b894651d..0b8b5e17453a8eaef7e8834d7d8d317d0bbb8c77 100644 (file)
@@ -19,7 +19,8 @@
 #define MISALIGNED_BUFFER_ORDER get_order(MISALIGNED_BUFFER_SIZE)
 #define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80)
 
-DEFINE_PER_CPU(long, misaligned_access_speed);
+DEFINE_PER_CPU(long, misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN;
+DEFINE_PER_CPU(long, vector_misaligned_access) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED;
 
 #ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS
 static cpumask_t fast_misaligned_access;
@@ -260,23 +261,24 @@ out:
        kfree(bufs);
        return 0;
 }
+#else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */
+static int check_unaligned_access_speed_all_cpus(void)
+{
+       return 0;
+}
+#endif
 
 static int check_unaligned_access_all_cpus(void)
 {
-       bool all_cpus_emulated = check_unaligned_access_emulated_all_cpus();
+       bool all_cpus_emulated;
+
+       all_cpus_emulated = check_unaligned_access_emulated_all_cpus();
+       check_vector_unaligned_access_emulated_all_cpus();
 
        if (!all_cpus_emulated)
                return check_unaligned_access_speed_all_cpus();
 
        return 0;
 }
-#else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */
-static int check_unaligned_access_all_cpus(void)
-{
-       check_unaligned_access_emulated_all_cpus();
-
-       return 0;
-}
-#endif
 
 arch_initcall(check_unaligned_access_all_cpus);
index 682b3feee45114694f29f2479bb7c75ce54e7e56..821818886fab06aa054e2ba3266897e9b4d161c0 100644 (file)
@@ -66,7 +66,7 @@ void __init riscv_v_setup_ctx_cache(void)
 #endif
 }
 
-static bool insn_is_vector(u32 insn_buf)
+bool insn_is_vector(u32 insn_buf)
 {
        u32 opcode = insn_buf & __INSN_OPCODE_MASK;
        u32 width, csr;