]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
Merge branch kvm-arm64/nv-sve into kvmarm/next
authorOliver Upton <oliver.upton@linux.dev>
Sun, 14 Jul 2024 00:27:01 +0000 (00:27 +0000)
committerOliver Upton <oliver.upton@linux.dev>
Sun, 14 Jul 2024 00:27:06 +0000 (00:27 +0000)
* kvm-arm64/nv-sve:
  : CPTR_EL2, FPSIMD/SVE support for nested
  :
  : This series brings support for honoring the guest hypervisor's CPTR_EL2
  : trap configuration when running a nested guest, along with support for
  : FPSIMD/SVE usage at L1 and L2.
  KVM: arm64: Allow the use of SVE+NV
  KVM: arm64: nv: Add additional trap setup for CPTR_EL2
  KVM: arm64: nv: Add trap description for CPTR_EL2
  KVM: arm64: nv: Add TCPAC/TTA to CPTR->CPACR conversion helper
  KVM: arm64: nv: Honor guest hypervisor's FP/SVE traps in CPTR_EL2
  KVM: arm64: nv: Load guest FP state for ZCR_EL2 trap
  KVM: arm64: nv: Handle CPACR_EL1 traps
  KVM: arm64: Spin off helper for programming CPTR traps
  KVM: arm64: nv: Ensure correct VL is loaded before saving SVE state
  KVM: arm64: nv: Use guest hypervisor's max VL when running nested guest
  KVM: arm64: nv: Save guest's ZCR_EL2 when in hyp context
  KVM: arm64: nv: Load guest hyp's ZCR into EL1 state
  KVM: arm64: nv: Handle ZCR_EL2 traps
  KVM: arm64: nv: Forward SVE traps to guest hypervisor
  KVM: arm64: nv: Forward FP/ASIMD traps to guest hypervisor

Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
1  2 
arch/arm64/include/asm/kvm_emulate.h
arch/arm64/include/asm/kvm_host.h
arch/arm64/include/asm/kvm_nested.h
arch/arm64/kvm/arm.c
arch/arm64/kvm/handle_exit.c
arch/arm64/kvm/hyp/include/hyp/switch.h
arch/arm64/kvm/hyp/vhe/switch.c
arch/arm64/kvm/nested.c
arch/arm64/kvm/sys_regs.c

Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
index 7a72b33049456a6839d248d476d85325806b57ba,f680182971145a26989efbef8a8f6777d2a984b5..77010b76c150f32725f75d5f57329c0ca7a1d12a
@@@ -266,52 -314,60 +314,104 @@@ static void kvm_hyp_save_fpsimd_host(st
        __fpsimd_save_state(*host_data_ptr(fpsimd_state));
  }
  
 +static bool kvm_hyp_handle_tlbi_el2(struct kvm_vcpu *vcpu, u64 *exit_code)
 +{
 +      int ret = -EINVAL;
 +      u32 instr;
 +      u64 val;
 +
 +      /*
 +       * Ideally, we would never trap on EL2 S1 TLB invalidations using
 +       * the EL1 instructions when the guest's HCR_EL2.{E2H,TGE}=={1,1}.
 +       * But "thanks" to FEAT_NV2, we don't trap writes to HCR_EL2,
 +       * meaning that we can't track changes to the virtual TGE bit. So we
 +       * have to leave HCR_EL2.TTLB set on the host. Oopsie...
 +       *
 +       * Try and handle these invalidation as quickly as possible, without
 +       * fully exiting. Note that we don't need to consider any forwarding
 +       * here, as having E2H+TGE set is the very definition of being
 +       * InHost.
 +       *
 +       * For the lesser hypervisors out there that have failed to get on
 +       * with the VHE program, we can also handle the nVHE style of EL2
 +       * invalidation.
 +       */
 +      if (!(is_hyp_ctxt(vcpu)))
 +              return false;
 +
 +      instr = esr_sys64_to_sysreg(kvm_vcpu_get_esr(vcpu));
 +      val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu));
 +
 +      if ((kvm_supported_tlbi_s1e1_op(vcpu, instr) &&
 +           vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu)) ||
 +          kvm_supported_tlbi_s1e2_op (vcpu, instr))
 +              ret = __kvm_tlbi_s1e2(NULL, val, instr);
 +
 +      if (ret)
 +              return false;
 +
 +      __kvm_skip_instr(vcpu);
 +
 +      return true;
 +}
 +
+ static bool kvm_hyp_handle_cpacr_el1(struct kvm_vcpu *vcpu, u64 *exit_code)
+ {
+       u64 esr = kvm_vcpu_get_esr(vcpu);
+       int rt;
+       if (!is_hyp_ctxt(vcpu) || esr_sys64_to_sysreg(esr) != SYS_CPACR_EL1)
+               return false;
+       rt = kvm_vcpu_sys_get_rt(vcpu);
+       if ((esr & ESR_ELx_SYS64_ISS_DIR_MASK) == ESR_ELx_SYS64_ISS_DIR_READ) {
+               vcpu_set_reg(vcpu, rt, __vcpu_sys_reg(vcpu, CPTR_EL2));
+       } else {
+               vcpu_write_sys_reg(vcpu, vcpu_get_reg(vcpu, rt), CPTR_EL2);
+               __activate_cptr_traps(vcpu);
+       }
+       __kvm_skip_instr(vcpu);
+       return true;
+ }
+ static bool kvm_hyp_handle_zcr_el2(struct kvm_vcpu *vcpu, u64 *exit_code)
+ {
+       u32 sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_esr(vcpu));
+       if (!vcpu_has_nv(vcpu))
+               return false;
+       if (sysreg != SYS_ZCR_EL2)
+               return false;
+       if (guest_owns_fp_regs())
+               return false;
+       /*
+        * ZCR_EL2 traps are handled in the slow path, with the expectation
+        * that the guest's FP context has already been loaded onto the CPU.
+        *
+        * Load the guest's FP context and unconditionally forward to the
+        * slow path for handling (i.e. return false).
+        */
+       kvm_hyp_handle_fpsimd(vcpu, exit_code);
+       return false;
+ }
  static bool kvm_hyp_handle_sysreg_vhe(struct kvm_vcpu *vcpu, u64 *exit_code)
  {
 +      if (kvm_hyp_handle_tlbi_el2(vcpu, exit_code))
 +              return true;
 +
+       if (kvm_hyp_handle_cpacr_el1(vcpu, exit_code))
+               return true;
+       if (kvm_hyp_handle_zcr_el2(vcpu, exit_code))
+               return true;
        return kvm_hyp_handle_sysreg(vcpu, exit_code);
  }
  
index 9ae4be49e22339eb60d149cec2fa5ee5984d47d8,2b9fcbb0fe6c50a3a0bad5fbcefdb76a50f63ec5..de789e0f1ae9cb6e1bc7ba0bd7bfe691df0143af
  #define NV_FTR(r, f)          ID_AA64##r##_EL1_##f
  
  /*
 - * Our emulated CPU doesn't support all the possible features. For the
 - * sake of simplicity (and probably mental sanity), wipe out a number
 - * of feature bits we don't intend to support for the time being.
 - * This list should get updated as new features get added to the NV
 - * support, and new extension to the architecture.
 + * Ratio of live shadow S2 MMU per vcpu. This is a trade-off between
 + * memory usage and potential number of different sets of S2 PTs in
 + * the guests. Running out of S2 MMUs only affects performance (we
 + * will invalidate them more often).
   */
 -static u64 limit_nv_id_reg(u32 id, u64 val)
 -{
 -      u64 tmp;
 -
 -      switch (id) {
 -      case SYS_ID_AA64ISAR0_EL1:
 -              /* Support everything but TME, O.S. and Range TLBIs */
 -              val &= ~(NV_FTR(ISAR0, TLB)             |
 -                       NV_FTR(ISAR0, TME));
 -              break;
 -
 -      case SYS_ID_AA64ISAR1_EL1:
 -              /* Support everything but Spec Invalidation */
 -              val &= ~(GENMASK_ULL(63, 56)    |
 -                       NV_FTR(ISAR1, SPECRES));
 -              break;
 -
 -      case SYS_ID_AA64PFR0_EL1:
 -              /* No AMU, MPAM, S-EL2, or RAS */
 -              val &= ~(GENMASK_ULL(55, 52)    |
 -                       NV_FTR(PFR0, AMU)      |
 -                       NV_FTR(PFR0, MPAM)     |
 -                       NV_FTR(PFR0, SEL2)     |
 -                       NV_FTR(PFR0, RAS)      |
 -                       NV_FTR(PFR0, EL3)      |
 -                       NV_FTR(PFR0, EL2)      |
 -                       NV_FTR(PFR0, EL1));
 -              /* 64bit EL1/EL2/EL3 only */
 -              val |= FIELD_PREP(NV_FTR(PFR0, EL1), 0b0001);
 -              val |= FIELD_PREP(NV_FTR(PFR0, EL2), 0b0001);
 -              val |= FIELD_PREP(NV_FTR(PFR0, EL3), 0b0001);
 -              break;
 -
 -      case SYS_ID_AA64PFR1_EL1:
 -              /* Only support BTI, SSBS, CSV2_frac */
 -              val &= (NV_FTR(PFR1, BT)        |
 -                      NV_FTR(PFR1, SSBS)      |
 -                      NV_FTR(PFR1, CSV2_frac));
 -              break;
 -
 -      case SYS_ID_AA64MMFR0_EL1:
 -              /* Hide ECV, ExS, Secure Memory */
 -              val &= ~(NV_FTR(MMFR0, ECV)             |
 -                       NV_FTR(MMFR0, EXS)             |
 -                       NV_FTR(MMFR0, TGRAN4_2)        |
 -                       NV_FTR(MMFR0, TGRAN16_2)       |
 -                       NV_FTR(MMFR0, TGRAN64_2)       |
 -                       NV_FTR(MMFR0, SNSMEM));
 -
 -              /* Disallow unsupported S2 page sizes */
 -              switch (PAGE_SIZE) {
 -              case SZ_64K:
 -                      val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN16_2), 0b0001);
 -                      fallthrough;
 -              case SZ_16K:
 -                      val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN4_2), 0b0001);
 -                      fallthrough;
 -              case SZ_4K:
 -                      /* Support everything */
 -                      break;
 -              }
 +#define S2_MMU_PER_VCPU               2
 +
 +void kvm_init_nested(struct kvm *kvm)
 +{
 +      kvm->arch.nested_mmus = NULL;
 +      kvm->arch.nested_mmus_size = 0;
 +}
 +
 +static int init_nested_s2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu)
 +{
 +      /*
 +       * We only initialise the IPA range on the canonical MMU, which
 +       * defines the contract between KVM and userspace on where the
 +       * "hardware" is in the IPA space. This affects the validity of MMIO
 +       * exits forwarded to userspace, for example.
 +       *
 +       * For nested S2s, we use the PARange as exposed to the guest, as it
 +       * is allowed to use it at will to expose whatever memory map it
 +       * wants to its own guests as it would be on real HW.
 +       */
 +      return kvm_init_stage2_mmu(kvm, mmu, kvm_get_pa_bits(kvm));
 +}
 +
 +int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu)
 +{
 +      struct kvm *kvm = vcpu->kvm;
 +      struct kvm_s2_mmu *tmp;
 +      int num_mmus, ret = 0;
 +
 +      /*
 +       * Let's treat memory allocation failures as benign: If we fail to
 +       * allocate anything, return an error and keep the allocated array
 +       * alive. Userspace may try to recover by intializing the vcpu
 +       * again, and there is no reason to affect the whole VM for this.
 +       */
 +      num_mmus = atomic_read(&kvm->online_vcpus) * S2_MMU_PER_VCPU;
 +      tmp = kvrealloc(kvm->arch.nested_mmus,
 +                      size_mul(sizeof(*kvm->arch.nested_mmus), kvm->arch.nested_mmus_size),
 +                      size_mul(sizeof(*kvm->arch.nested_mmus), num_mmus),
 +                      GFP_KERNEL_ACCOUNT | __GFP_ZERO);
 +      if (!tmp)
 +              return -ENOMEM;
 +
 +      /*
 +       * If we went through a realocation, adjust the MMU back-pointers in
 +       * the previously initialised kvm_pgtable structures.
 +       */
 +      if (kvm->arch.nested_mmus != tmp)
 +              for (int i = 0; i < kvm->arch.nested_mmus_size; i++)
 +                      tmp[i].pgt->mmu = &tmp[i];
 +
 +      for (int i = kvm->arch.nested_mmus_size; !ret && i < num_mmus; i++)
 +              ret = init_nested_s2_mmu(kvm, &tmp[i]);
 +
 +      if (ret) {
 +              for (int i = kvm->arch.nested_mmus_size; i < num_mmus; i++)
 +                      kvm_free_stage2_pgd(&tmp[i]);
 +
 +              return ret;
 +      }
 +
 +      kvm->arch.nested_mmus_size = num_mmus;
 +      kvm->arch.nested_mmus = tmp;
 +
 +      return 0;
 +}
 +
 +struct s2_walk_info {
 +      int          (*read_desc)(phys_addr_t pa, u64 *desc, void *data);
 +      void         *data;
 +      u64          baddr;
 +      unsigned int max_oa_bits;
 +      unsigned int pgshift;
 +      unsigned int sl;
 +      unsigned int t0sz;
 +      bool         be;
 +};
 +
 +static unsigned int ps_to_output_size(unsigned int ps)
 +{
 +      switch (ps) {
 +      case 0: return 32;
 +      case 1: return 36;
 +      case 2: return 40;
 +      case 3: return 42;
 +      case 4: return 44;
 +      case 5:
 +      default:
 +              return 48;
 +      }
 +}
 +
 +static u32 compute_fsc(int level, u32 fsc)
 +{
 +      return fsc | (level & 0x3);
 +}
 +
 +static int esr_s2_fault(struct kvm_vcpu *vcpu, int level, u32 fsc)
 +{
 +      u32 esr;
 +
 +      esr = kvm_vcpu_get_esr(vcpu) & ~ESR_ELx_FSC;
 +      esr |= compute_fsc(level, fsc);
 +      return esr;
 +}
 +
 +static int get_ia_size(struct s2_walk_info *wi)
 +{
 +      return 64 - wi->t0sz;
 +}
 +
 +static int check_base_s2_limits(struct s2_walk_info *wi,
 +                              int level, int input_size, int stride)
 +{
 +      int start_size, ia_size;
 +
 +      ia_size = get_ia_size(wi);
 +
 +      /* Check translation limits */
 +      switch (BIT(wi->pgshift)) {
 +      case SZ_64K:
 +              if (level == 0 || (level == 1 && ia_size <= 42))
 +                      return -EFAULT;
 +              break;
 +      case SZ_16K:
 +              if (level == 0 || (level == 1 && ia_size <= 40))
 +                      return -EFAULT;
 +              break;
 +      case SZ_4K:
 +              if (level < 0 || (level == 0 && ia_size <= 42))
 +                      return -EFAULT;
 +              break;
 +      }
 +
 +      /* Check input size limits */
 +      if (input_size > ia_size)
 +              return -EFAULT;
 +
 +      /* Check number of entries in starting level table */
 +      start_size = input_size - ((3 - level) * stride + wi->pgshift);
 +      if (start_size < 1 || start_size > stride + 4)
 +              return -EFAULT;
 +
 +      return 0;
 +}
 +
 +/* Check if output is within boundaries */
 +static int check_output_size(struct s2_walk_info *wi, phys_addr_t output)
 +{
 +      unsigned int output_size = wi->max_oa_bits;
 +
 +      if (output_size != 48 && (output & GENMASK_ULL(47, output_size)))
 +              return -1;
 +
 +      return 0;
 +}
 +
 +/*
 + * This is essentially a C-version of the pseudo code from the ARM ARM
 + * AArch64.TranslationTableWalk  function.  I strongly recommend looking at
 + * that pseudocode in trying to understand this.
 + *
 + * Must be called with the kvm->srcu read lock held
 + */
 +static int walk_nested_s2_pgd(phys_addr_t ipa,
 +                            struct s2_walk_info *wi, struct kvm_s2_trans *out)
 +{
 +      int first_block_level, level, stride, input_size, base_lower_bound;
 +      phys_addr_t base_addr;
 +      unsigned int addr_top, addr_bottom;
 +      u64 desc;  /* page table entry */
 +      int ret;
 +      phys_addr_t paddr;
 +
 +      switch (BIT(wi->pgshift)) {
 +      default:
 +      case SZ_64K:
 +      case SZ_16K:
 +              level = 3 - wi->sl;
 +              first_block_level = 2;
 +              break;
 +      case SZ_4K:
 +              level = 2 - wi->sl;
 +              first_block_level = 1;
 +              break;
 +      }
 +
 +      stride = wi->pgshift - 3;
 +      input_size = get_ia_size(wi);
 +      if (input_size > 48 || input_size < 25)
 +              return -EFAULT;
 +
 +      ret = check_base_s2_limits(wi, level, input_size, stride);
 +      if (WARN_ON(ret))
 +              return ret;
 +
 +      base_lower_bound = 3 + input_size - ((3 - level) * stride +
 +                         wi->pgshift);
 +      base_addr = wi->baddr & GENMASK_ULL(47, base_lower_bound);
 +
 +      if (check_output_size(wi, base_addr)) {
 +              out->esr = compute_fsc(level, ESR_ELx_FSC_ADDRSZ);
 +              return 1;
 +      }
 +
 +      addr_top = input_size - 1;
 +
 +      while (1) {
 +              phys_addr_t index;
 +
 +              addr_bottom = (3 - level) * stride + wi->pgshift;
 +              index = (ipa & GENMASK_ULL(addr_top, addr_bottom))
 +                      >> (addr_bottom - 3);
 +
 +              paddr = base_addr | index;
 +              ret = wi->read_desc(paddr, &desc, wi->data);
 +              if (ret < 0)
 +                      return ret;
 +
                /*
 -               * Since we can't support a guest S2 page size smaller than
 -               * the host's own page size (due to KVM only populating its
 -               * own S2 using the kernel's page size), advertise the
 -               * limitation using FEAT_GTG.
 +               * Handle reversedescriptors if endianness differs between the
 +               * host and the guest hypervisor.
                 */
 -              switch (PAGE_SIZE) {
 -              case SZ_4K:
 -                      val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN4_2), 0b0010);
 -                      fallthrough;
 -              case SZ_16K:
 -                      val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN16_2), 0b0010);
 -                      fallthrough;
 -              case SZ_64K:
 -                      val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN64_2), 0b0010);
 +              if (wi->be)
 +                      desc = be64_to_cpu((__force __be64)desc);
 +              else
 +                      desc = le64_to_cpu((__force __le64)desc);
 +
 +              /* Check for valid descriptor at this point */
 +              if (!(desc & 1) || ((desc & 3) == 1 && level == 3)) {
 +                      out->esr = compute_fsc(level, ESR_ELx_FSC_FAULT);
 +                      out->upper_attr = desc;
 +                      return 1;
 +              }
 +
 +              /* We're at the final level or block translation level */
 +              if ((desc & 3) == 1 || level == 3)
 +                      break;
 +
 +              if (check_output_size(wi, desc)) {
 +                      out->esr = compute_fsc(level, ESR_ELx_FSC_ADDRSZ);
 +                      out->upper_attr = desc;
 +                      return 1;
 +              }
 +
 +              base_addr = desc & GENMASK_ULL(47, wi->pgshift);
 +
 +              level += 1;
 +              addr_top = addr_bottom - 1;
 +      }
 +
 +      if (level < first_block_level) {
 +              out->esr = compute_fsc(level, ESR_ELx_FSC_FAULT);
 +              out->upper_attr = desc;
 +              return 1;
 +      }
 +
 +      /*
 +       * We don't use the contiguous bit in the stage-2 ptes, so skip check
 +       * for misprogramming of the contiguous bit.
 +       */
 +
 +      if (check_output_size(wi, desc)) {
 +              out->esr = compute_fsc(level, ESR_ELx_FSC_ADDRSZ);
 +              out->upper_attr = desc;
 +              return 1;
 +      }
 +
 +      if (!(desc & BIT(10))) {
 +              out->esr = compute_fsc(level, ESR_ELx_FSC_ACCESS);
 +              out->upper_attr = desc;
 +              return 1;
 +      }
 +
 +      /* Calculate and return the result */
 +      paddr = (desc & GENMASK_ULL(47, addr_bottom)) |
 +              (ipa & GENMASK_ULL(addr_bottom - 1, 0));
 +      out->output = paddr;
 +      out->block_size = 1UL << ((3 - level) * stride + wi->pgshift);
 +      out->readable = desc & (0b01 << 6);
 +      out->writable = desc & (0b10 << 6);
 +      out->level = level;
 +      out->upper_attr = desc & GENMASK_ULL(63, 52);
 +      return 0;
 +}
 +
 +static int read_guest_s2_desc(phys_addr_t pa, u64 *desc, void *data)
 +{
 +      struct kvm_vcpu *vcpu = data;
 +
 +      return kvm_read_guest(vcpu->kvm, pa, desc, sizeof(*desc));
 +}
 +
 +static void vtcr_to_walk_info(u64 vtcr, struct s2_walk_info *wi)
 +{
 +      wi->t0sz = vtcr & TCR_EL2_T0SZ_MASK;
 +
 +      switch (vtcr & VTCR_EL2_TG0_MASK) {
 +      case VTCR_EL2_TG0_4K:
 +              wi->pgshift = 12;        break;
 +      case VTCR_EL2_TG0_16K:
 +              wi->pgshift = 14;        break;
 +      case VTCR_EL2_TG0_64K:
 +      default:            /* IMPDEF: treat any other value as 64k */
 +              wi->pgshift = 16;        break;
 +      }
 +
 +      wi->sl = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr);
 +      /* Global limit for now, should eventually be per-VM */
 +      wi->max_oa_bits = min(get_kvm_ipa_limit(),
 +                            ps_to_output_size(FIELD_GET(VTCR_EL2_PS_MASK, vtcr)));
 +}
 +
 +int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
 +                     struct kvm_s2_trans *result)
 +{
 +      u64 vtcr = vcpu_read_sys_reg(vcpu, VTCR_EL2);
 +      struct s2_walk_info wi;
 +      int ret;
 +
 +      result->esr = 0;
 +
 +      if (!vcpu_has_nv(vcpu))
 +              return 0;
 +
 +      wi.read_desc = read_guest_s2_desc;
 +      wi.data = vcpu;
 +      wi.baddr = vcpu_read_sys_reg(vcpu, VTTBR_EL2);
 +
 +      vtcr_to_walk_info(vtcr, &wi);
 +
 +      wi.be = vcpu_read_sys_reg(vcpu, SCTLR_EL2) & SCTLR_ELx_EE;
 +
 +      ret = walk_nested_s2_pgd(gipa, &wi, result);
 +      if (ret)
 +              result->esr |= (kvm_vcpu_get_esr(vcpu) & ~ESR_ELx_FSC);
 +
 +      return ret;
 +}
 +
 +static unsigned int ttl_to_size(u8 ttl)
 +{
 +      int level = ttl & 3;
 +      int gran = (ttl >> 2) & 3;
 +      unsigned int max_size = 0;
 +
 +      switch (gran) {
 +      case TLBI_TTL_TG_4K:
 +              switch (level) {
 +              case 0:
 +                      break;
 +              case 1:
 +                      max_size = SZ_1G;
 +                      break;
 +              case 2:
 +                      max_size = SZ_2M;
 +                      break;
 +              case 3:
 +                      max_size = SZ_4K;
                        break;
                }
 -              /* Cap PARange to 48bits */
 -              tmp = FIELD_GET(NV_FTR(MMFR0, PARANGE), val);
 -              if (tmp > 0b0101) {
 -                      val &= ~NV_FTR(MMFR0, PARANGE);
 -                      val |= FIELD_PREP(NV_FTR(MMFR0, PARANGE), 0b0101);
 +              break;
 +      case TLBI_TTL_TG_16K:
 +              switch (level) {
 +              case 0:
 +              case 1:
 +                      break;
 +              case 2:
 +                      max_size = SZ_32M;
 +                      break;
 +              case 3:
 +                      max_size = SZ_16K;
 +                      break;
                }
                break;
 -
 -      case SYS_ID_AA64MMFR1_EL1:
 -              val &= (NV_FTR(MMFR1, HCX)      |
 -                      NV_FTR(MMFR1, PAN)      |
 -                      NV_FTR(MMFR1, LO)       |
 -                      NV_FTR(MMFR1, HPDS)     |
 -                      NV_FTR(MMFR1, VH)       |
 -                      NV_FTR(MMFR1, VMIDBits));
 +      case TLBI_TTL_TG_64K:
 +              switch (level) {
 +              case 0:
 +              case 1:
 +                      /* No 52bit IPA support */
 +                      break;
 +              case 2:
 +                      max_size = SZ_512M;
 +                      break;
 +              case 3:
 +                      max_size = SZ_64K;
 +                      break;
 +              }
 +              break;
 +      default:                        /* No size information */
                break;
 +      }
 +
 +      return max_size;
 +}
 +
 +/*
 + * Compute the equivalent of the TTL field by parsing the shadow PT.  The
 + * granule size is extracted from the cached VTCR_EL2.TG0 while the level is
 + * retrieved from first entry carrying the level as a tag.
 + */
 +static u8 get_guest_mapping_ttl(struct kvm_s2_mmu *mmu, u64 addr)
 +{
 +      u64 tmp, sz = 0, vtcr = mmu->tlb_vtcr;
 +      kvm_pte_t pte;
 +      u8 ttl, level;
  
 -      case SYS_ID_AA64MMFR2_EL1:
 -              val &= ~(NV_FTR(MMFR2, BBM)     |
 -                       NV_FTR(MMFR2, TTL)     |
 -                       GENMASK_ULL(47, 44)    |
 -                       NV_FTR(MMFR2, ST)      |
 -                       NV_FTR(MMFR2, CCIDX)   |
 -                       NV_FTR(MMFR2, VARange));
 +      lockdep_assert_held_write(&kvm_s2_mmu_to_kvm(mmu)->mmu_lock);
  
 -              /* Force TTL support */
 -              val |= FIELD_PREP(NV_FTR(MMFR2, TTL), 0b0001);
 +      switch (vtcr & VTCR_EL2_TG0_MASK) {
 +      case VTCR_EL2_TG0_4K:
 +              ttl = (TLBI_TTL_TG_4K << 2);
 +              break;
 +      case VTCR_EL2_TG0_16K:
 +              ttl = (TLBI_TTL_TG_16K << 2);
                break;
 +      case VTCR_EL2_TG0_64K:
 +      default:            /* IMPDEF: treat any other value as 64k */
 +              ttl = (TLBI_TTL_TG_64K << 2);
 +              break;
 +      }
  
 -      case SYS_ID_AA64MMFR4_EL1:
 -              val = 0;
 -              if (!cpus_have_final_cap(ARM64_HAS_HCR_NV1))
 -                      val |= FIELD_PREP(NV_FTR(MMFR4, E2H0),
 -                                        ID_AA64MMFR4_EL1_E2H0_NI_NV1);
 +      tmp = addr;
 +
 +again:
 +      /* Iteratively compute the block sizes for a particular granule size */
 +      switch (vtcr & VTCR_EL2_TG0_MASK) {
 +      case VTCR_EL2_TG0_4K:
 +              if      (sz < SZ_4K)    sz = SZ_4K;
 +              else if (sz < SZ_2M)    sz = SZ_2M;
 +              else if (sz < SZ_1G)    sz = SZ_1G;
 +              else                    sz = 0;
 +              break;
 +      case VTCR_EL2_TG0_16K:
 +              if      (sz < SZ_16K)   sz = SZ_16K;
 +              else if (sz < SZ_32M)   sz = SZ_32M;
 +              else                    sz = 0;
                break;
 +      case VTCR_EL2_TG0_64K:
 +      default:            /* IMPDEF: treat any other value as 64k */
 +              if      (sz < SZ_64K)   sz = SZ_64K;
 +              else if (sz < SZ_512M)  sz = SZ_512M;
 +              else                    sz = 0;
 +              break;
 +      }
 +
 +      if (sz == 0)
 +              return 0;
 +
 +      tmp &= ~(sz - 1);
 +      if (kvm_pgtable_get_leaf(mmu->pgt, tmp, &pte, NULL))
 +              goto again;
 +      if (!(pte & PTE_VALID))
 +              goto again;
 +      level = FIELD_GET(KVM_NV_GUEST_MAP_SZ, pte);
 +      if (!level)
 +              goto again;
 +
 +      ttl |= level;
 +
 +      /*
 +       * We now have found some level information in the shadow S2. Check
 +       * that the resulting range is actually including the original IPA.
 +       */
 +      sz = ttl_to_size(ttl);
 +      if (addr < (tmp + sz))
 +              return ttl;
 +
 +      return 0;
 +}
 +
 +unsigned long compute_tlb_inval_range(struct kvm_s2_mmu *mmu, u64 val)
 +{
 +      struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu);
 +      unsigned long max_size;
 +      u8 ttl;
 +
 +      ttl = FIELD_GET(TLBI_TTL_MASK, val);
  
 -      case SYS_ID_AA64DFR0_EL1:
 -              /* Only limited support for PMU, Debug, BPs and WPs */
 -              val &= (NV_FTR(DFR0, PMUVer)    |
 -                      NV_FTR(DFR0, WRPs)      |
 -                      NV_FTR(DFR0, BRPs)      |
 -                      NV_FTR(DFR0, DebugVer));
 +      if (!ttl || !kvm_has_feat(kvm, ID_AA64MMFR2_EL1, TTL, IMP)) {
 +              /* No TTL, check the shadow S2 for a hint */
 +              u64 addr = (val & GENMASK_ULL(35, 0)) << 12;
 +              ttl = get_guest_mapping_ttl(mmu, addr);
 +      }
 +
 +      max_size = ttl_to_size(ttl);
  
 -              /* Cap Debug to ARMv8.1 */
 -              tmp = FIELD_GET(NV_FTR(DFR0, DebugVer), val);
 -              if (tmp > 0b0111) {
 -                      val &= ~NV_FTR(DFR0, DebugVer);
 -                      val |= FIELD_PREP(NV_FTR(DFR0, DebugVer), 0b0111);
 +      if (!max_size) {
 +              /* Compute the maximum extent of the invalidation */
 +              switch (mmu->tlb_vtcr & VTCR_EL2_TG0_MASK) {
 +              case VTCR_EL2_TG0_4K:
 +                      max_size = SZ_1G;
 +                      break;
 +              case VTCR_EL2_TG0_16K:
 +                      max_size = SZ_32M;
 +                      break;
 +              case VTCR_EL2_TG0_64K:
 +              default:    /* IMPDEF: treat any other value as 64k */
 +                      /*
 +                       * No, we do not support 52bit IPA in nested yet. Once
 +                       * we do, this should be 4TB.
 +                       */
 +                      max_size = SZ_512M;
 +                      break;
                }
 -              break;
 +      }
  
 -      default:
 -              /* Unknown register, just wipe it clean */
 -              val = 0;
 -              break;
 +      WARN_ON(!max_size);
 +      return max_size;
 +}
 +
 +/*
 + * We can have multiple *different* MMU contexts with the same VMID:
 + *
 + * - S2 being enabled or not, hence differing by the HCR_EL2.VM bit
 + *
 + * - Multiple vcpus using private S2s (huh huh...), hence differing by the
 + *   VBBTR_EL2.BADDR address
 + *
 + * - A combination of the above...
 + *
 + * We can always identify which MMU context to pick at run-time.  However,
 + * TLB invalidation involving a VMID must take action on all the TLBs using
 + * this particular VMID. This translates into applying the same invalidation
 + * operation to all the contexts that are using this VMID. Moar phun!
 + */
 +void kvm_s2_mmu_iterate_by_vmid(struct kvm *kvm, u16 vmid,
 +                              const union tlbi_info *info,
 +                              void (*tlbi_callback)(struct kvm_s2_mmu *,
 +                                                    const union tlbi_info *))
 +{
 +      write_lock(&kvm->mmu_lock);
 +
 +      for (int i = 0; i < kvm->arch.nested_mmus_size; i++) {
 +              struct kvm_s2_mmu *mmu = &kvm->arch.nested_mmus[i];
 +
 +              if (!kvm_s2_mmu_valid(mmu))
 +                      continue;
 +
 +              if (vmid == get_vmid(mmu->tlb_vttbr))
 +                      tlbi_callback(mmu, info);
 +      }
 +
 +      write_unlock(&kvm->mmu_lock);
 +}
 +
 +struct kvm_s2_mmu *lookup_s2_mmu(struct kvm_vcpu *vcpu)
 +{
 +      struct kvm *kvm = vcpu->kvm;
 +      bool nested_stage2_enabled;
 +      u64 vttbr, vtcr, hcr;
 +
 +      lockdep_assert_held_write(&kvm->mmu_lock);
 +
 +      vttbr = vcpu_read_sys_reg(vcpu, VTTBR_EL2);
 +      vtcr = vcpu_read_sys_reg(vcpu, VTCR_EL2);
 +      hcr = vcpu_read_sys_reg(vcpu, HCR_EL2);
 +
 +      nested_stage2_enabled = hcr & HCR_VM;
 +
 +      /* Don't consider the CnP bit for the vttbr match */
 +      vttbr &= ~VTTBR_CNP_BIT;
 +
 +      /*
 +       * Two possibilities when looking up a S2 MMU context:
 +       *
 +       * - either S2 is enabled in the guest, and we need a context that is
 +       *   S2-enabled and matches the full VTTBR (VMID+BADDR) and VTCR,
 +       *   which makes it safe from a TLB conflict perspective (a broken
 +       *   guest won't be able to generate them),
 +       *
 +       * - or S2 is disabled, and we need a context that is S2-disabled
 +       *   and matches the VMID only, as all TLBs are tagged by VMID even
 +       *   if S2 translation is disabled.
 +       */
 +      for (int i = 0; i < kvm->arch.nested_mmus_size; i++) {
 +              struct kvm_s2_mmu *mmu = &kvm->arch.nested_mmus[i];
 +
 +              if (!kvm_s2_mmu_valid(mmu))
 +                      continue;
 +
 +              if (nested_stage2_enabled &&
 +                  mmu->nested_stage2_enabled &&
 +                  vttbr == mmu->tlb_vttbr &&
 +                  vtcr == mmu->tlb_vtcr)
 +                      return mmu;
 +
 +              if (!nested_stage2_enabled &&
 +                  !mmu->nested_stage2_enabled &&
 +                  get_vmid(vttbr) == get_vmid(mmu->tlb_vttbr))
 +                      return mmu;
 +      }
 +      return NULL;
 +}
 +
 +static struct kvm_s2_mmu *get_s2_mmu_nested(struct kvm_vcpu *vcpu)
 +{
 +      struct kvm *kvm = vcpu->kvm;
 +      struct kvm_s2_mmu *s2_mmu;
 +      int i;
 +
 +      lockdep_assert_held_write(&vcpu->kvm->mmu_lock);
 +
 +      s2_mmu = lookup_s2_mmu(vcpu);
 +      if (s2_mmu)
 +              goto out;
 +
 +      /*
 +       * Make sure we don't always search from the same point, or we
 +       * will always reuse a potentially active context, leaving
 +       * free contexts unused.
 +       */
 +      for (i = kvm->arch.nested_mmus_next;
 +           i < (kvm->arch.nested_mmus_size + kvm->arch.nested_mmus_next);
 +           i++) {
 +              s2_mmu = &kvm->arch.nested_mmus[i % kvm->arch.nested_mmus_size];
 +
 +              if (atomic_read(&s2_mmu->refcnt) == 0)
 +                      break;
        }
 +      BUG_ON(atomic_read(&s2_mmu->refcnt)); /* We have struct MMUs to spare */
 +
 +      /* Set the scene for the next search */
 +      kvm->arch.nested_mmus_next = (i + 1) % kvm->arch.nested_mmus_size;
 +
 +      /* Clear the old state */
 +      if (kvm_s2_mmu_valid(s2_mmu))
 +              kvm_stage2_unmap_range(s2_mmu, 0, kvm_phys_size(s2_mmu));
 +
 +      /*
 +       * The virtual VMID (modulo CnP) will be used as a key when matching
 +       * an existing kvm_s2_mmu.
 +       *
 +       * We cache VTCR at allocation time, once and for all. It'd be great
 +       * if the guest didn't screw that one up, as this is not very
 +       * forgiving...
 +       */
 +      s2_mmu->tlb_vttbr = vcpu_read_sys_reg(vcpu, VTTBR_EL2) & ~VTTBR_CNP_BIT;
 +      s2_mmu->tlb_vtcr = vcpu_read_sys_reg(vcpu, VTCR_EL2);
 +      s2_mmu->nested_stage2_enabled = vcpu_read_sys_reg(vcpu, HCR_EL2) & HCR_VM;
 +
 +out:
 +      atomic_inc(&s2_mmu->refcnt);
 +      return s2_mmu;
 +}
  
 -      return val;
 +void kvm_init_nested_s2_mmu(struct kvm_s2_mmu *mmu)
 +{
 +      /* CnP being set denotes an invalid entry */
 +      mmu->tlb_vttbr = VTTBR_CNP_BIT;
 +      mmu->nested_stage2_enabled = false;
 +      atomic_set(&mmu->refcnt, 0);
 +}
 +
 +void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu)
 +{
 +      if (is_hyp_ctxt(vcpu)) {
 +              vcpu->arch.hw_mmu = &vcpu->kvm->arch.mmu;
 +      } else {
 +              write_lock(&vcpu->kvm->mmu_lock);
 +              vcpu->arch.hw_mmu = get_s2_mmu_nested(vcpu);
 +              write_unlock(&vcpu->kvm->mmu_lock);
 +      }
 +}
 +
 +void kvm_vcpu_put_hw_mmu(struct kvm_vcpu *vcpu)
 +{
 +      if (kvm_is_nested_s2_mmu(vcpu->kvm, vcpu->arch.hw_mmu)) {
 +              atomic_dec(&vcpu->arch.hw_mmu->refcnt);
 +              vcpu->arch.hw_mmu = NULL;
 +      }
 +}
 +
 +/*
 + * Returns non-zero if permission fault is handled by injecting it to the next
 + * level hypervisor.
 + */
 +int kvm_s2_handle_perm_fault(struct kvm_vcpu *vcpu, struct kvm_s2_trans *trans)
 +{
 +      bool forward_fault = false;
 +
 +      trans->esr = 0;
 +
 +      if (!kvm_vcpu_trap_is_permission_fault(vcpu))
 +              return 0;
 +
 +      if (kvm_vcpu_trap_is_iabt(vcpu)) {
 +              forward_fault = !kvm_s2_trans_executable(trans);
 +      } else {
 +              bool write_fault = kvm_is_write_fault(vcpu);
 +
 +              forward_fault = ((write_fault && !trans->writable) ||
 +                               (!write_fault && !trans->readable));
 +      }
 +
 +      if (forward_fault)
 +              trans->esr = esr_s2_fault(vcpu, trans->level, ESR_ELx_FSC_PERM);
 +
 +      return forward_fault;
 +}
 +
 +int kvm_inject_s2_fault(struct kvm_vcpu *vcpu, u64 esr_el2)
 +{
 +      vcpu_write_sys_reg(vcpu, vcpu->arch.fault.far_el2, FAR_EL2);
 +      vcpu_write_sys_reg(vcpu, vcpu->arch.fault.hpfar_el2, HPFAR_EL2);
 +
 +      return kvm_inject_nested_sync(vcpu, esr_el2);
 +}
 +
 +void kvm_nested_s2_wp(struct kvm *kvm)
 +{
 +      int i;
 +
 +      lockdep_assert_held_write(&kvm->mmu_lock);
 +
 +      for (i = 0; i < kvm->arch.nested_mmus_size; i++) {
 +              struct kvm_s2_mmu *mmu = &kvm->arch.nested_mmus[i];
 +
 +              if (kvm_s2_mmu_valid(mmu))
 +                      kvm_stage2_wp_range(mmu, 0, kvm_phys_size(mmu));
 +      }
 +}
 +
 +void kvm_nested_s2_unmap(struct kvm *kvm)
 +{
 +      int i;
 +
 +      lockdep_assert_held_write(&kvm->mmu_lock);
 +
 +      for (i = 0; i < kvm->arch.nested_mmus_size; i++) {
 +              struct kvm_s2_mmu *mmu = &kvm->arch.nested_mmus[i];
 +
 +              if (kvm_s2_mmu_valid(mmu))
 +                      kvm_stage2_unmap_range(mmu, 0, kvm_phys_size(mmu));
 +      }
 +}
 +
 +void kvm_nested_s2_flush(struct kvm *kvm)
 +{
 +      int i;
 +
 +      lockdep_assert_held_write(&kvm->mmu_lock);
 +
 +      for (i = 0; i < kvm->arch.nested_mmus_size; i++) {
 +              struct kvm_s2_mmu *mmu = &kvm->arch.nested_mmus[i];
 +
 +              if (kvm_s2_mmu_valid(mmu))
 +                      kvm_stage2_flush_range(mmu, 0, kvm_phys_size(mmu));
 +      }
 +}
 +
 +void kvm_arch_flush_shadow_all(struct kvm *kvm)
 +{
 +      int i;
 +
 +      for (i = 0; i < kvm->arch.nested_mmus_size; i++) {
 +              struct kvm_s2_mmu *mmu = &kvm->arch.nested_mmus[i];
 +
 +              if (!WARN_ON(atomic_read(&mmu->refcnt)))
 +                      kvm_free_stage2_pgd(mmu);
 +      }
 +      kfree(kvm->arch.nested_mmus);
 +      kvm->arch.nested_mmus = NULL;
 +      kvm->arch.nested_mmus_size = 0;
 +      kvm_uninit_stage2_mmu(kvm);
 +}
 +
 +/*
 + * Our emulated CPU doesn't support all the possible features. For the
 + * sake of simplicity (and probably mental sanity), wipe out a number
 + * of feature bits we don't intend to support for the time being.
 + * This list should get updated as new features get added to the NV
 + * support, and new extension to the architecture.
 + */
 +static void limit_nv_id_regs(struct kvm *kvm)
 +{
 +      u64 val, tmp;
 +
 +      /* Support everything but TME */
 +      val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64ISAR0_EL1);
 +      val &= ~NV_FTR(ISAR0, TME);
 +      kvm_set_vm_id_reg(kvm, SYS_ID_AA64ISAR0_EL1, val);
 +
 +      /* Support everything but Spec Invalidation and LS64 */
 +      val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64ISAR1_EL1);
 +      val &= ~(NV_FTR(ISAR1, LS64)    |
 +               NV_FTR(ISAR1, SPECRES));
 +      kvm_set_vm_id_reg(kvm, SYS_ID_AA64ISAR1_EL1, val);
 +
-       /* No AMU, MPAM, S-EL2, RAS or SVE */
++      /* No AMU, MPAM, S-EL2, or RAS */
 +      val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1);
 +      val &= ~(GENMASK_ULL(55, 52)    |
 +               NV_FTR(PFR0, AMU)      |
 +               NV_FTR(PFR0, MPAM)     |
 +               NV_FTR(PFR0, SEL2)     |
 +               NV_FTR(PFR0, RAS)      |
-                NV_FTR(PFR0, SVE)      |
 +               NV_FTR(PFR0, EL3)      |
 +               NV_FTR(PFR0, EL2)      |
 +               NV_FTR(PFR0, EL1));
 +      /* 64bit EL1/EL2/EL3 only */
 +      val |= FIELD_PREP(NV_FTR(PFR0, EL1), 0b0001);
 +      val |= FIELD_PREP(NV_FTR(PFR0, EL2), 0b0001);
 +      val |= FIELD_PREP(NV_FTR(PFR0, EL3), 0b0001);
 +      kvm_set_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1, val);
 +
 +      /* Only support BTI, SSBS, CSV2_frac */
 +      val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR1_EL1);
 +      val &= (NV_FTR(PFR1, BT)        |
 +              NV_FTR(PFR1, SSBS)      |
 +              NV_FTR(PFR1, CSV2_frac));
 +      kvm_set_vm_id_reg(kvm, SYS_ID_AA64PFR1_EL1, val);
 +
 +      /* Hide ECV, ExS, Secure Memory */
 +      val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64MMFR0_EL1);
 +      val &= ~(NV_FTR(MMFR0, ECV)             |
 +               NV_FTR(MMFR0, EXS)             |
 +               NV_FTR(MMFR0, TGRAN4_2)        |
 +               NV_FTR(MMFR0, TGRAN16_2)       |
 +               NV_FTR(MMFR0, TGRAN64_2)       |
 +               NV_FTR(MMFR0, SNSMEM));
 +
 +      /* Disallow unsupported S2 page sizes */
 +      switch (PAGE_SIZE) {
 +      case SZ_64K:
 +              val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN16_2), 0b0001);
 +              fallthrough;
 +      case SZ_16K:
 +              val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN4_2), 0b0001);
 +              fallthrough;
 +      case SZ_4K:
 +              /* Support everything */
 +              break;
 +      }
 +      /*
 +       * Since we can't support a guest S2 page size smaller than
 +       * the host's own page size (due to KVM only populating its
 +       * own S2 using the kernel's page size), advertise the
 +       * limitation using FEAT_GTG.
 +       */
 +      switch (PAGE_SIZE) {
 +      case SZ_4K:
 +              val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN4_2), 0b0010);
 +              fallthrough;
 +      case SZ_16K:
 +              val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN16_2), 0b0010);
 +              fallthrough;
 +      case SZ_64K:
 +              val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN64_2), 0b0010);
 +              break;
 +      }
 +      /* Cap PARange to 48bits */
 +      tmp = FIELD_GET(NV_FTR(MMFR0, PARANGE), val);
 +      if (tmp > 0b0101) {
 +              val &= ~NV_FTR(MMFR0, PARANGE);
 +              val |= FIELD_PREP(NV_FTR(MMFR0, PARANGE), 0b0101);
 +      }
 +      kvm_set_vm_id_reg(kvm, SYS_ID_AA64MMFR0_EL1, val);
 +
 +      val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64MMFR1_EL1);
 +      val &= (NV_FTR(MMFR1, HCX)      |
 +              NV_FTR(MMFR1, PAN)      |
 +              NV_FTR(MMFR1, LO)       |
 +              NV_FTR(MMFR1, HPDS)     |
 +              NV_FTR(MMFR1, VH)       |
 +              NV_FTR(MMFR1, VMIDBits));
 +      kvm_set_vm_id_reg(kvm, SYS_ID_AA64MMFR1_EL1, val);
 +
 +      val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64MMFR2_EL1);
 +      val &= ~(NV_FTR(MMFR2, BBM)     |
 +               NV_FTR(MMFR2, TTL)     |
 +               GENMASK_ULL(47, 44)    |
 +               NV_FTR(MMFR2, ST)      |
 +               NV_FTR(MMFR2, CCIDX)   |
 +               NV_FTR(MMFR2, VARange));
 +
 +      /* Force TTL support */
 +      val |= FIELD_PREP(NV_FTR(MMFR2, TTL), 0b0001);
 +      kvm_set_vm_id_reg(kvm, SYS_ID_AA64MMFR2_EL1, val);
 +
 +      val = 0;
 +      if (!cpus_have_final_cap(ARM64_HAS_HCR_NV1))
 +              val |= FIELD_PREP(NV_FTR(MMFR4, E2H0),
 +                                ID_AA64MMFR4_EL1_E2H0_NI_NV1);
 +      kvm_set_vm_id_reg(kvm, SYS_ID_AA64MMFR4_EL1, val);
 +
 +      /* Only limited support for PMU, Debug, BPs and WPs */
 +      val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64DFR0_EL1);
 +      val &= (NV_FTR(DFR0, PMUVer)    |
 +              NV_FTR(DFR0, WRPs)      |
 +              NV_FTR(DFR0, BRPs)      |
 +              NV_FTR(DFR0, DebugVer));
 +
 +      /* Cap Debug to ARMv8.1 */
 +      tmp = FIELD_GET(NV_FTR(DFR0, DebugVer), val);
 +      if (tmp > 0b0111) {
 +              val &= ~NV_FTR(DFR0, DebugVer);
 +              val |= FIELD_PREP(NV_FTR(DFR0, DebugVer), 0b0111);
 +      }
 +      kvm_set_vm_id_reg(kvm, SYS_ID_AA64DFR0_EL1, val);
  }
  
  u64 kvm_vcpu_sanitise_vncr_reg(const struct kvm_vcpu *vcpu, enum vcpu_sysreg sr)
Simple merge