From: Julian Seward Date: Wed, 9 Dec 2020 11:54:45 +0000 (+0100) Subject: Bug 414268 - Enable AArch64 feature detection and decoding for v8.x instructions... X-Git-Tag: VALGRIND_3_17_0~92 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=cb52fee5ddbc2c0e936fd1efe5107a1afcf375cf;p=thirdparty%2Fvalgrind.git Bug 414268 - Enable AArch64 feature detection and decoding for v8.x instructions (where x>0). Patch from Assad Hashmi . --- diff --git a/VEX/priv/main_main.c b/VEX/priv/main_main.c index 12f521d8c3..43f053ea55 100644 --- a/VEX/priv/main_main.c +++ b/VEX/priv/main_main.c @@ -1765,11 +1765,36 @@ static const HChar* show_hwcaps_arm ( UInt hwcaps ) static const HChar* show_hwcaps_arm64 ( UInt hwcaps ) { - /* Since there are no variants, just insist that hwcaps is zero, - and declare it invalid otherwise. */ - if (hwcaps == 0) - return "baseline"; - return "Unsupported"; + static const HChar prefix[] = "v8"; + static const struct { + UInt hwcaps_bit; + HChar name[16]; + } hwcaps_list[] = { + { VEX_HWCAPS_ARM64_FHM, "fhm" }, + { VEX_HWCAPS_ARM64_DPBCVAP, "dpcvap" }, + { VEX_HWCAPS_ARM64_DPBCVADP, "dpbcvadp" }, + { VEX_HWCAPS_ARM64_SM3, "sm3" }, + { VEX_HWCAPS_ARM64_SM4, "sm4" }, + { VEX_HWCAPS_ARM64_SHA3, "sha3" }, + { VEX_HWCAPS_ARM64_RDM, "rdm" }, + { VEX_HWCAPS_ARM64_I8MM, "i8mm" }, + { VEX_HWCAPS_ARM64_ATOMICS, "atomics" }, + { VEX_HWCAPS_ARM64_BF16, "bf16" }, + { VEX_HWCAPS_ARM64_FP16, "fp16" }, + { VEX_HWCAPS_ARM64_VFP16, "vfp16" }, + }; + + static HChar buf[sizeof prefix + // '\0' + NUM_HWCAPS * (sizeof hwcaps_list[0].name + 1) + 1]; + + HChar *p = buf + vex_sprintf(buf, "%s", prefix); + UInt i; + for (i = 0 ; i < NUM_HWCAPS; ++i) { + if (hwcaps & hwcaps_list[i].hwcaps_bit) + p = p + vex_sprintf(p, "-%s", hwcaps_list[i].name); + } + + return buf; } static const HChar* show_hwcaps_s390x ( UInt hwcaps ) @@ -2130,11 +2155,20 @@ static void check_hwcaps ( VexArch arch, UInt hwcaps ) } } - case VexArchARM64: - if (hwcaps != 0) + case VexArchARM64: { + /* Mandatory dependencies. */ + Bool have_fp16 = ((hwcaps & VEX_HWCAPS_ARM64_FP16) != 0); + Bool have_vfp16 = ((hwcaps & VEX_HWCAPS_ARM64_VFP16) != 0); + if (have_fp16 != have_vfp16) invalid_hwcaps(arch, hwcaps, - "Unsupported hardware capabilities.\n"); + "Mismatch detected between scalar and vector FP16 features.\n"); + Bool have_rdm = ((hwcaps & VEX_HWCAPS_ARM64_RDM) != 0); + Bool have_atomics = ((hwcaps & VEX_HWCAPS_ARM64_ATOMICS) != 0); + if (have_rdm != have_atomics) + invalid_hwcaps(arch, hwcaps, + "Mismatch detected between RDMA and atomics features.\n"); return; + } case VexArchS390X: if (! s390_host_has_ldisp) diff --git a/VEX/pub/libvex.h b/VEX/pub/libvex.h index 2ffed0ad06..46a7a5553e 100644 --- a/VEX/pub/libvex.h +++ b/VEX/pub/libvex.h @@ -207,7 +207,18 @@ typedef #define VEX_ARM_ARCHLEVEL(x) ((x) & 0x3f) /* ARM64: baseline capability is AArch64 v8. */ -/* (no definitions since no variants so far) */ +#define VEX_HWCAPS_ARM64_FHM (1 << 4) +#define VEX_HWCAPS_ARM64_DPBCVAP (1 << 5) +#define VEX_HWCAPS_ARM64_DPBCVADP (1 << 6) +#define VEX_HWCAPS_ARM64_SM3 (1 << 7) +#define VEX_HWCAPS_ARM64_SM4 (1 << 8) +#define VEX_HWCAPS_ARM64_SHA3 (1 << 9) +#define VEX_HWCAPS_ARM64_RDM (1 << 10) +#define VEX_HWCAPS_ARM64_ATOMICS (1 << 11) +#define VEX_HWCAPS_ARM64_I8MM (1 << 12) +#define VEX_HWCAPS_ARM64_BF16 (1 << 13) +#define VEX_HWCAPS_ARM64_FP16 (1 << 14) +#define VEX_HWCAPS_ARM64_VFP16 (1 << 15) /* MIPS baseline capability */ /* Assigned Company values for bits 23:16 of the PRId Register diff --git a/coregrind/m_machine.c b/coregrind/m_machine.c index 228ae2554e..1bf50846d2 100644 --- a/coregrind/m_machine.c +++ b/coregrind/m_machine.c @@ -478,7 +478,7 @@ Int VG_(machine_arm_archlevel) = 4; testing, so we need a VG_MINIMAL_JMP_BUF. */ #if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) \ || defined(VGA_arm) || defined(VGA_s390x) || defined(VGA_mips32) \ - || defined(VGA_mips64) + || defined(VGA_mips64) || defined(VGA_arm64) #include "pub_core_libcsetjmp.h" static VG_MINIMAL_JMP_BUF(env_unsup_insn); static void handler_unsup_insn ( Int x ) { @@ -1719,10 +1719,84 @@ Bool VG_(machine_get_hwcaps)( void ) #elif defined(VGA_arm64) { + /* Use the attribute and feature registers to determine host hardware + * capabilities. Only user-space features are read. Naming conventions + * follow the Arm Architecture Reference Manual. + * + * ID_AA64ISAR0_EL1 Instruction Set Attribute Register 0 + * ---------------- + * ...5544 4444 4444 3333 3333 3332 2222 2222 1111 1111 11 + * ...1098 7654 3210 9876 5432 1098 7654 3210 9876 5432 1098 7654 3210 + * FHM DP SM4 SM3 SHA3 RDM ATOMICS + * + * ID_AA64ISAR1_EL1 Instruction Set Attribute Register 1 + * ---------------- + * ...5555 5544 4444 4444 3333 3333 3332 2222 2222 1111 1111 11 + * ...5432 1098 7654 3210 9876 5432 1098 7654 3210 9876 5432 1098 7654 3210 + * ...I8MM BF16 DPB + * + * ID_AA64PFR0_EL1 Processor Feature Register 0 + * --------------- + * 6666...2222 2222 1111 1111 11 + * 3210...7654 3210 9876 5432 1098 7654 3210 + * ASIMD FP16 + */ + + Bool is_base_v8 = False; + + Bool have_fhm, have_dp, have_sm4, have_sm3, have_sha3, have_rdm; + Bool have_atomics, have_i8mm, have_bf16, have_dpbcvap, have_dpbcvadp; + Bool have_vfp16, have_fp16; + + have_fhm = have_dp = have_sm4 = have_sm3 = have_sha3 = have_rdm + = have_atomics = have_i8mm = have_bf16 = have_dpbcvap + = have_dpbcvadp = have_vfp16 = have_fp16 = False; + + /* Some baseline v8.0 kernels do not allow reads of these registers. Use + * the same SIGILL handling algorithm as other architectures for such + * kernels. + */ + vki_sigset_t saved_set, tmp_set; + vki_sigaction_fromK_t saved_sigill_act; + vki_sigaction_toK_t tmp_sigill_act; + + vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t)); + + VG_(sigemptyset)(&tmp_set); + VG_(sigaddset)(&tmp_set, VKI_SIGILL); + + Int r; + + r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set); + vg_assert(r == 0); + + r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act); + vg_assert(r == 0); + tmp_sigill_act = saved_sigill_act; + + /* NODEFER: signal handler does not return (from the kernel's point of + view), hence if it is to successfully catch a signal more than once, + we need the NODEFER flag. */ + tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND; + tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO; + tmp_sigill_act.sa_flags |= VKI_SA_NODEFER; + tmp_sigill_act.ksa_handler = handler_unsup_insn; + VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL); + + /* Does reading ID_AA64ISAR0_EL1 register throw SIGILL on base v8.0? */ + if (VG_MINIMAL_SETJMP(env_unsup_insn)) + is_base_v8 = True; + else + __asm__ __volatile__("mrs x0, ID_AA64ISAR0_EL1"); + + VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act); + VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL); + VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL); + va = VexArchARM64; vai.endness = VexEndnessLE; - /* So far there are no variants. */ + /* Baseline features are v8.0. */ vai.hwcaps = 0; VG_(machine_get_cache_info)(&vai); @@ -1747,6 +1821,162 @@ Bool VG_(machine_get_hwcaps)( void ) VG_(debugLog)(1, "machine", "ARM64: requires_fallback_LLSC: %s\n", vai.arm64_requires_fallback_LLSC ? "yes" : "no"); + if (is_base_v8) + return True; + + /* ID_AA64ISAR0_EL1 Instruction set attribute register 0 fields */ + #define ID_AA64ISAR0_FHM_SHIFT 48 + #define ID_AA64ISAR0_DP_SHIFT 44 + #define ID_AA64ISAR0_SM4_SHIFT 40 + #define ID_AA64ISAR0_SM3_SHIFT 36 + #define ID_AA64ISAR0_SHA3_SHIFT 32 + #define ID_AA64ISAR0_RDM_SHIFT 28 + #define ID_AA64ISAR0_ATOMICS_SHIFT 20 + /* Field values */ + #define ID_AA64ISAR0_FHM_SUPPORTED 0x1 + #define ID_AA64ISAR0_DP_SUPPORTED 0x1 + #define ID_AA64ISAR0_SM4_SUPPORTED 0x1 + #define ID_AA64ISAR0_SM3_SUPPORTED 0x1 + #define ID_AA64ISAR0_SHA3_SUPPORTED 0x1 + #define ID_AA64ISAR0_RDM_SUPPORTED 0x1 + #define ID_AA64ISAR0_ATOMICS_SUPPORTED 0x2 + + /* ID_AA64ISAR1_EL1 Instruction set attribute register 1 fields */ + #define ID_AA64ISAR1_I8MM_SHIFT 52 + #define ID_AA64ISAR1_BF16_SHIFT 44 + #define ID_AA64ISAR1_DPB_SHIFT 0 + /* Field values */ + #define ID_AA64ISAR1_I8MM_SUPPORTED 0x1 + #define ID_AA64ISAR1_BF16_SUPPORTED 0x1 + #define ID_AA64ISAR1_DPBCVAP_SUPPORTED 0x1 + #define ID_AA64ISAR1_DPBCVADP_SUPPORTED 0x2 + + /* ID_AA64PFR0_EL1 Processor feature register 0 fields */ + #define ID_AA64PFR0_VFP16_SHIFT 20 + #define ID_AA64PFR0_FP16_SHIFT 16 + /* Field values */ + #define ID_AA64PFR0_VFP16_SUPPORTED 0x1 + #define ID_AA64PFR0_FP16_SUPPORTED 0x1 + + #define get_cpu_ftr(id) ({ \ + unsigned long val; \ + asm("mrs %0, "#id : "=r" (val)); \ + VG_(debugLog)(1, "machine", "ARM64: %-20s: 0x%016lx\n", #id, val); \ + }) + get_cpu_ftr(ID_AA64ISAR0_EL1); + get_cpu_ftr(ID_AA64ISAR1_EL1); + get_cpu_ftr(ID_AA64PFR0_EL1); + + #define get_ftr(id, ftr, fval, have_ftr) ({ \ + unsigned long rval; \ + asm("mrs %0, "#id : "=r" (rval)); \ + have_ftr = (fval & ((rval >> ftr) & 0xf)) >= fval ? True : False; \ + }) + + /* Read ID_AA64ISAR0_EL1 attributes */ + + /* FHM indicates support for FMLAL and FMLSL instructions. + * Optional for v8.2. + */ + get_ftr(ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT, + ID_AA64ISAR0_FHM_SUPPORTED, have_fhm); + + /* DP indicates support for UDOT and SDOT instructions. + * Optional for v8.2. + */ + get_ftr(ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, + ID_AA64ISAR0_DP_SUPPORTED, have_dp); + + /* SM4 indicates support for SM4E and SM4EKEY instructions. + * Optional for v8.2. + */ + get_ftr(ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM4_SHIFT, + ID_AA64ISAR0_SM4_SUPPORTED, have_sm4); + + /* SM3 indicates support for SM3SS1, SM3TT1A, SM3TT1B, SM3TT2A, * SM3TT2B, + * SM3PARTW1, and SM3PARTW2 instructions. + * Optional for v8.2. + */ + get_ftr(ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM3_SHIFT, + ID_AA64ISAR0_SM3_SUPPORTED, have_sm3); + + /* SHA3 indicates support for EOR3, RAX1, XAR, and BCAX instructions. + * Optional for v8.2. + */ + get_ftr(ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA3_SHIFT, + ID_AA64ISAR0_SHA3_SUPPORTED, have_sha3); + + /* RDM indicates support for SQRDMLAH and SQRDMLSH instructions. + * Mandatory from v8.1 onwards. + */ + get_ftr(ID_AA64ISAR0_EL1, ID_AA64ISAR0_RDM_SHIFT, + ID_AA64ISAR0_RDM_SUPPORTED, have_rdm); + + /* v8.1 ATOMICS indicates support for LDADD, LDCLR, LDEOR, LDSET, LDSMAX, + * LDSMIN, LDUMAX, LDUMIN, CAS, CASP, and SWP instructions. + * Mandatory from v8.1 onwards. + */ + get_ftr(ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, + ID_AA64ISAR0_ATOMICS_SUPPORTED, have_atomics); + + /* Read ID_AA64ISAR1_EL1 attributes */ + + /* I8MM indicates support for SMMLA, SUDOT, UMMLA, USMMLA, and USDOT + * instructions. + * Optional for v8.2. + */ + get_ftr(ID_AA64ISAR1_EL1, ID_AA64ISAR1_I8MM_SHIFT, + ID_AA64ISAR1_I8MM_SUPPORTED, have_i8mm); + + /* BF16 indicates support for BFDOT, BFMLAL, BFMLAL2, BFMMLA, BFCVT, and + * BFCVT2 instructions. + * Optional for v8.2. + */ + get_ftr(ID_AA64ISAR1_EL1, ID_AA64ISAR1_BF16_SHIFT, + ID_AA64ISAR1_BF16_SUPPORTED, have_bf16); + + /* DPB indicates support for DC CVAP instruction. + * Mandatory for v8.2 onwards. + */ + get_ftr(ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, + ID_AA64ISAR1_DPBCVAP_SUPPORTED, have_dpbcvap); + + /* DPB indicates support for DC CVADP instruction. + * Optional for v8.2. + */ + get_ftr(ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, + ID_AA64ISAR1_DPBCVADP_SUPPORTED, have_dpbcvadp); + + /* Read ID_AA64PFR0_EL1 attributes */ + + /* VFP16 indicates support for half-precision vector arithmetic. + * Optional for v8.2. Must be the same value as FP16. + */ + get_ftr(ID_AA64PFR0_EL1, ID_AA64PFR0_VFP16_SHIFT, + ID_AA64PFR0_VFP16_SUPPORTED, have_vfp16); + + /* FP16 indicates support for half-precision scalar arithmetic. + * Optional for v8.2. Must be the same value as VFP16. + */ + get_ftr(ID_AA64PFR0_EL1, ID_AA64PFR0_FP16_SHIFT, + ID_AA64PFR0_FP16_SUPPORTED, have_fp16); + + if (have_fhm) vai.hwcaps |= VEX_HWCAPS_ARM64_FHM; + if (have_dpbcvap) vai.hwcaps |= VEX_HWCAPS_ARM64_DPBCVAP; + if (have_dpbcvadp) vai.hwcaps |= VEX_HWCAPS_ARM64_DPBCVADP; + if (have_sm3) vai.hwcaps |= VEX_HWCAPS_ARM64_SM3; + if (have_sm4) vai.hwcaps |= VEX_HWCAPS_ARM64_SM4; + if (have_sha3) vai.hwcaps |= VEX_HWCAPS_ARM64_SHA3; + if (have_rdm) vai.hwcaps |= VEX_HWCAPS_ARM64_RDM; + if (have_i8mm) vai.hwcaps |= VEX_HWCAPS_ARM64_I8MM; + if (have_atomics) vai.hwcaps |= VEX_HWCAPS_ARM64_ATOMICS; + if (have_bf16) vai.hwcaps |= VEX_HWCAPS_ARM64_BF16; + if (have_fp16) vai.hwcaps |= VEX_HWCAPS_ARM64_FP16; + if (have_vfp16) vai.hwcaps |= VEX_HWCAPS_ARM64_VFP16; + + #undef get_cpu_ftr + #undef get_ftr + return True; }