static const HChar* show_hwcaps_arm64 ( UInt hwcaps )
{
- /* Since there are no variants, just insist that hwcaps is zero,
- and declare it invalid otherwise. */
- if (hwcaps == 0)
- return "baseline";
- return "Unsupported";
+ static const HChar prefix[] = "v8";
+ static const struct {
+ UInt hwcaps_bit;
+ HChar name[16];
+ } hwcaps_list[] = {
+ { VEX_HWCAPS_ARM64_FHM, "fhm" },
+ { VEX_HWCAPS_ARM64_DPBCVAP, "dpcvap" },
+ { VEX_HWCAPS_ARM64_DPBCVADP, "dpbcvadp" },
+ { VEX_HWCAPS_ARM64_SM3, "sm3" },
+ { VEX_HWCAPS_ARM64_SM4, "sm4" },
+ { VEX_HWCAPS_ARM64_SHA3, "sha3" },
+ { VEX_HWCAPS_ARM64_RDM, "rdm" },
+ { VEX_HWCAPS_ARM64_I8MM, "i8mm" },
+ { VEX_HWCAPS_ARM64_ATOMICS, "atomics" },
+ { VEX_HWCAPS_ARM64_BF16, "bf16" },
+ { VEX_HWCAPS_ARM64_FP16, "fp16" },
+ { VEX_HWCAPS_ARM64_VFP16, "vfp16" },
+ };
+
+ static HChar buf[sizeof prefix + // '\0'
+ NUM_HWCAPS * (sizeof hwcaps_list[0].name + 1) + 1];
+
+ HChar *p = buf + vex_sprintf(buf, "%s", prefix);
+ UInt i;
+ for (i = 0 ; i < NUM_HWCAPS; ++i) {
+ if (hwcaps & hwcaps_list[i].hwcaps_bit)
+ p = p + vex_sprintf(p, "-%s", hwcaps_list[i].name);
+ }
+
+ return buf;
}
static const HChar* show_hwcaps_s390x ( UInt hwcaps )
}
}
- case VexArchARM64:
- if (hwcaps != 0)
+ case VexArchARM64: {
+ /* Mandatory dependencies. */
+ Bool have_fp16 = ((hwcaps & VEX_HWCAPS_ARM64_FP16) != 0);
+ Bool have_vfp16 = ((hwcaps & VEX_HWCAPS_ARM64_VFP16) != 0);
+ if (have_fp16 != have_vfp16)
invalid_hwcaps(arch, hwcaps,
- "Unsupported hardware capabilities.\n");
+ "Mismatch detected between scalar and vector FP16 features.\n");
+ Bool have_rdm = ((hwcaps & VEX_HWCAPS_ARM64_RDM) != 0);
+ Bool have_atomics = ((hwcaps & VEX_HWCAPS_ARM64_ATOMICS) != 0);
+ if (have_rdm != have_atomics)
+ invalid_hwcaps(arch, hwcaps,
+ "Mismatch detected between RDMA and atomics features.\n");
return;
+ }
case VexArchS390X:
if (! s390_host_has_ldisp)
testing, so we need a VG_MINIMAL_JMP_BUF. */
#if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) \
|| defined(VGA_arm) || defined(VGA_s390x) || defined(VGA_mips32) \
- || defined(VGA_mips64)
+ || defined(VGA_mips64) || defined(VGA_arm64)
#include "pub_core_libcsetjmp.h"
static VG_MINIMAL_JMP_BUF(env_unsup_insn);
static void handler_unsup_insn ( Int x ) {
#elif defined(VGA_arm64)
{
+ /* Use the attribute and feature registers to determine host hardware
+ * capabilities. Only user-space features are read. Naming conventions
+ * follow the Arm Architecture Reference Manual.
+ *
+ * ID_AA64ISAR0_EL1 Instruction Set Attribute Register 0
+ * ----------------
+ * ...5544 4444 4444 3333 3333 3332 2222 2222 1111 1111 11
+ * ...1098 7654 3210 9876 5432 1098 7654 3210 9876 5432 1098 7654 3210
+ * FHM DP SM4 SM3 SHA3 RDM ATOMICS
+ *
+ * ID_AA64ISAR1_EL1 Instruction Set Attribute Register 1
+ * ----------------
+ * ...5555 5544 4444 4444 3333 3333 3332 2222 2222 1111 1111 11
+ * ...5432 1098 7654 3210 9876 5432 1098 7654 3210 9876 5432 1098 7654 3210
+ * ...I8MM BF16 DPB
+ *
+ * ID_AA64PFR0_EL1 Processor Feature Register 0
+ * ---------------
+ * 6666...2222 2222 1111 1111 11
+ * 3210...7654 3210 9876 5432 1098 7654 3210
+ * ASIMD FP16
+ */
+
+ Bool is_base_v8 = False;
+
+ Bool have_fhm, have_dp, have_sm4, have_sm3, have_sha3, have_rdm;
+ Bool have_atomics, have_i8mm, have_bf16, have_dpbcvap, have_dpbcvadp;
+ Bool have_vfp16, have_fp16;
+
+ have_fhm = have_dp = have_sm4 = have_sm3 = have_sha3 = have_rdm
+ = have_atomics = have_i8mm = have_bf16 = have_dpbcvap
+ = have_dpbcvadp = have_vfp16 = have_fp16 = False;
+
+ /* Some baseline v8.0 kernels do not allow reads of these registers. Use
+ * the same SIGILL handling algorithm as other architectures for such
+ * kernels.
+ */
+ vki_sigset_t saved_set, tmp_set;
+ vki_sigaction_fromK_t saved_sigill_act;
+ vki_sigaction_toK_t tmp_sigill_act;
+
+ vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
+
+ VG_(sigemptyset)(&tmp_set);
+ VG_(sigaddset)(&tmp_set, VKI_SIGILL);
+
+ Int r;
+
+ r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
+ vg_assert(r == 0);
+
+ r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
+ vg_assert(r == 0);
+ tmp_sigill_act = saved_sigill_act;
+
+ /* NODEFER: signal handler does not return (from the kernel's point of
+ view), hence if it is to successfully catch a signal more than once,
+ we need the NODEFER flag. */
+ tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
+ tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
+ tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
+ tmp_sigill_act.ksa_handler = handler_unsup_insn;
+ VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
+
+ /* Does reading ID_AA64ISAR0_EL1 register throw SIGILL on base v8.0? */
+ if (VG_MINIMAL_SETJMP(env_unsup_insn))
+ is_base_v8 = True;
+ else
+ __asm__ __volatile__("mrs x0, ID_AA64ISAR0_EL1");
+
+ VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
+ VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
+ VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
+
va = VexArchARM64;
vai.endness = VexEndnessLE;
- /* So far there are no variants. */
+ /* Baseline features are v8.0. */
vai.hwcaps = 0;
VG_(machine_get_cache_info)(&vai);
VG_(debugLog)(1, "machine", "ARM64: requires_fallback_LLSC: %s\n",
vai.arm64_requires_fallback_LLSC ? "yes" : "no");
+ if (is_base_v8)
+ return True;
+
+ /* ID_AA64ISAR0_EL1 Instruction set attribute register 0 fields */
+ #define ID_AA64ISAR0_FHM_SHIFT 48
+ #define ID_AA64ISAR0_DP_SHIFT 44
+ #define ID_AA64ISAR0_SM4_SHIFT 40
+ #define ID_AA64ISAR0_SM3_SHIFT 36
+ #define ID_AA64ISAR0_SHA3_SHIFT 32
+ #define ID_AA64ISAR0_RDM_SHIFT 28
+ #define ID_AA64ISAR0_ATOMICS_SHIFT 20
+ /* Field values */
+ #define ID_AA64ISAR0_FHM_SUPPORTED 0x1
+ #define ID_AA64ISAR0_DP_SUPPORTED 0x1
+ #define ID_AA64ISAR0_SM4_SUPPORTED 0x1
+ #define ID_AA64ISAR0_SM3_SUPPORTED 0x1
+ #define ID_AA64ISAR0_SHA3_SUPPORTED 0x1
+ #define ID_AA64ISAR0_RDM_SUPPORTED 0x1
+ #define ID_AA64ISAR0_ATOMICS_SUPPORTED 0x2
+
+ /* ID_AA64ISAR1_EL1 Instruction set attribute register 1 fields */
+ #define ID_AA64ISAR1_I8MM_SHIFT 52
+ #define ID_AA64ISAR1_BF16_SHIFT 44
+ #define ID_AA64ISAR1_DPB_SHIFT 0
+ /* Field values */
+ #define ID_AA64ISAR1_I8MM_SUPPORTED 0x1
+ #define ID_AA64ISAR1_BF16_SUPPORTED 0x1
+ #define ID_AA64ISAR1_DPBCVAP_SUPPORTED 0x1
+ #define ID_AA64ISAR1_DPBCVADP_SUPPORTED 0x2
+
+ /* ID_AA64PFR0_EL1 Processor feature register 0 fields */
+ #define ID_AA64PFR0_VFP16_SHIFT 20
+ #define ID_AA64PFR0_FP16_SHIFT 16
+ /* Field values */
+ #define ID_AA64PFR0_VFP16_SUPPORTED 0x1
+ #define ID_AA64PFR0_FP16_SUPPORTED 0x1
+
+ #define get_cpu_ftr(id) ({ \
+ unsigned long val; \
+ asm("mrs %0, "#id : "=r" (val)); \
+ VG_(debugLog)(1, "machine", "ARM64: %-20s: 0x%016lx\n", #id, val); \
+ })
+ get_cpu_ftr(ID_AA64ISAR0_EL1);
+ get_cpu_ftr(ID_AA64ISAR1_EL1);
+ get_cpu_ftr(ID_AA64PFR0_EL1);
+
+ #define get_ftr(id, ftr, fval, have_ftr) ({ \
+ unsigned long rval; \
+ asm("mrs %0, "#id : "=r" (rval)); \
+ have_ftr = (fval & ((rval >> ftr) & 0xf)) >= fval ? True : False; \
+ })
+
+ /* Read ID_AA64ISAR0_EL1 attributes */
+
+ /* FHM indicates support for FMLAL and FMLSL instructions.
+ * Optional for v8.2.
+ */
+ get_ftr(ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT,
+ ID_AA64ISAR0_FHM_SUPPORTED, have_fhm);
+
+ /* DP indicates support for UDOT and SDOT instructions.
+ * Optional for v8.2.
+ */
+ get_ftr(ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT,
+ ID_AA64ISAR0_DP_SUPPORTED, have_dp);
+
+ /* SM4 indicates support for SM4E and SM4EKEY instructions.
+ * Optional for v8.2.
+ */
+ get_ftr(ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM4_SHIFT,
+ ID_AA64ISAR0_SM4_SUPPORTED, have_sm4);
+
+ /* SM3 indicates support for SM3SS1, SM3TT1A, SM3TT1B, SM3TT2A, * SM3TT2B,
+ * SM3PARTW1, and SM3PARTW2 instructions.
+ * Optional for v8.2.
+ */
+ get_ftr(ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM3_SHIFT,
+ ID_AA64ISAR0_SM3_SUPPORTED, have_sm3);
+
+ /* SHA3 indicates support for EOR3, RAX1, XAR, and BCAX instructions.
+ * Optional for v8.2.
+ */
+ get_ftr(ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA3_SHIFT,
+ ID_AA64ISAR0_SHA3_SUPPORTED, have_sha3);
+
+ /* RDM indicates support for SQRDMLAH and SQRDMLSH instructions.
+ * Mandatory from v8.1 onwards.
+ */
+ get_ftr(ID_AA64ISAR0_EL1, ID_AA64ISAR0_RDM_SHIFT,
+ ID_AA64ISAR0_RDM_SUPPORTED, have_rdm);
+
+ /* v8.1 ATOMICS indicates support for LDADD, LDCLR, LDEOR, LDSET, LDSMAX,
+ * LDSMIN, LDUMAX, LDUMIN, CAS, CASP, and SWP instructions.
+ * Mandatory from v8.1 onwards.
+ */
+ get_ftr(ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT,
+ ID_AA64ISAR0_ATOMICS_SUPPORTED, have_atomics);
+
+ /* Read ID_AA64ISAR1_EL1 attributes */
+
+ /* I8MM indicates support for SMMLA, SUDOT, UMMLA, USMMLA, and USDOT
+ * instructions.
+ * Optional for v8.2.
+ */
+ get_ftr(ID_AA64ISAR1_EL1, ID_AA64ISAR1_I8MM_SHIFT,
+ ID_AA64ISAR1_I8MM_SUPPORTED, have_i8mm);
+
+ /* BF16 indicates support for BFDOT, BFMLAL, BFMLAL2, BFMMLA, BFCVT, and
+ * BFCVT2 instructions.
+ * Optional for v8.2.
+ */
+ get_ftr(ID_AA64ISAR1_EL1, ID_AA64ISAR1_BF16_SHIFT,
+ ID_AA64ISAR1_BF16_SUPPORTED, have_bf16);
+
+ /* DPB indicates support for DC CVAP instruction.
+ * Mandatory for v8.2 onwards.
+ */
+ get_ftr(ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT,
+ ID_AA64ISAR1_DPBCVAP_SUPPORTED, have_dpbcvap);
+
+ /* DPB indicates support for DC CVADP instruction.
+ * Optional for v8.2.
+ */
+ get_ftr(ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT,
+ ID_AA64ISAR1_DPBCVADP_SUPPORTED, have_dpbcvadp);
+
+ /* Read ID_AA64PFR0_EL1 attributes */
+
+ /* VFP16 indicates support for half-precision vector arithmetic.
+ * Optional for v8.2. Must be the same value as FP16.
+ */
+ get_ftr(ID_AA64PFR0_EL1, ID_AA64PFR0_VFP16_SHIFT,
+ ID_AA64PFR0_VFP16_SUPPORTED, have_vfp16);
+
+ /* FP16 indicates support for half-precision scalar arithmetic.
+ * Optional for v8.2. Must be the same value as VFP16.
+ */
+ get_ftr(ID_AA64PFR0_EL1, ID_AA64PFR0_FP16_SHIFT,
+ ID_AA64PFR0_FP16_SUPPORTED, have_fp16);
+
+ if (have_fhm) vai.hwcaps |= VEX_HWCAPS_ARM64_FHM;
+ if (have_dpbcvap) vai.hwcaps |= VEX_HWCAPS_ARM64_DPBCVAP;
+ if (have_dpbcvadp) vai.hwcaps |= VEX_HWCAPS_ARM64_DPBCVADP;
+ if (have_sm3) vai.hwcaps |= VEX_HWCAPS_ARM64_SM3;
+ if (have_sm4) vai.hwcaps |= VEX_HWCAPS_ARM64_SM4;
+ if (have_sha3) vai.hwcaps |= VEX_HWCAPS_ARM64_SHA3;
+ if (have_rdm) vai.hwcaps |= VEX_HWCAPS_ARM64_RDM;
+ if (have_i8mm) vai.hwcaps |= VEX_HWCAPS_ARM64_I8MM;
+ if (have_atomics) vai.hwcaps |= VEX_HWCAPS_ARM64_ATOMICS;
+ if (have_bf16) vai.hwcaps |= VEX_HWCAPS_ARM64_BF16;
+ if (have_fp16) vai.hwcaps |= VEX_HWCAPS_ARM64_FP16;
+ if (have_vfp16) vai.hwcaps |= VEX_HWCAPS_ARM64_VFP16;
+
+ #undef get_cpu_ftr
+ #undef get_ftr
+
return True;
}