Merge branch kvm-arm64/nv-sve into kvmarm/next

author Oliver Upton <oliver.upton@linux.dev>

Sun, 14 Jul 2024 00:27:01 +0000 (00:27 +0000)

committer Oliver Upton <oliver.upton@linux.dev>

Sun, 14 Jul 2024 00:27:06 +0000 (00:27 +0000)
author Oliver Upton <oliver.upton@linux.dev>
Sun, 14 Jul 2024 00:27:01 +0000 (00:27 +0000)
committer Oliver Upton <oliver.upton@linux.dev>
Sun, 14 Jul 2024 00:27:06 +0000 (00:27 +0000)
diff --cc arch/arm64/include/asm/kvm_emulate.h
Simple merge
diff --cc arch/arm64/include/asm/kvm_host.h
Simple merge
diff --cc arch/arm64/include/asm/kvm_nested.h
Simple merge
diff --cc arch/arm64/kvm/arm.c
Simple merge
diff --cc arch/arm64/kvm/handle_exit.c
Simple merge
diff --cc arch/arm64/kvm/hyp/include/hyp/switch.h
Simple merge
diff --cc arch/arm64/kvm/hyp/vhe/switch.c

index 7a72b33049456a6839d248d476d85325806b57ba,f680182971145a26989efbef8a8f6777d2a984b5..77010b76c150f32725f75d5f57329c0ca7a1d12a
--- 1/arch/arm64/kvm/hyp/vhe/switch.c
--- 2/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@@ -266,52 -314,60 +314,104 @@@ static void kvm_hyp_save_fpsimd_host(st
         __fpsimd_save_state(*host_data_ptr(fpsimd_state));
   }
   
+ +static bool kvm_hyp_handle_tlbi_el2(struct kvm_vcpu *vcpu, u64 *exit_code)
+ +{
+ +      int ret = -EINVAL;
+ +      u32 instr;
+ +      u64 val;
+ +
+ +      /*
+ +       * Ideally, we would never trap on EL2 S1 TLB invalidations using
+ +       * the EL1 instructions when the guest's HCR_EL2.{E2H,TGE}=={1,1}.
+ +       * But "thanks" to FEAT_NV2, we don't trap writes to HCR_EL2,
+ +       * meaning that we can't track changes to the virtual TGE bit. So we
+ +       * have to leave HCR_EL2.TTLB set on the host. Oopsie...
+ +       *
+ +       * Try and handle these invalidation as quickly as possible, without
+ +       * fully exiting. Note that we don't need to consider any forwarding
+ +       * here, as having E2H+TGE set is the very definition of being
+ +       * InHost.
+ +       *
+ +       * For the lesser hypervisors out there that have failed to get on
+ +       * with the VHE program, we can also handle the nVHE style of EL2
+ +       * invalidation.
+ +       */
+ +      if (!(is_hyp_ctxt(vcpu)))
+ +              return false;
+ +
+ +      instr = esr_sys64_to_sysreg(kvm_vcpu_get_esr(vcpu));
+ +      val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu));
+ +
+ +      if ((kvm_supported_tlbi_s1e1_op(vcpu, instr) &&
+ +           vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu)) ||
+ +          kvm_supported_tlbi_s1e2_op (vcpu, instr))
+ +              ret = __kvm_tlbi_s1e2(NULL, val, instr);
+ +
+ +      if (ret)
+ +              return false;
+ +
+ +      __kvm_skip_instr(vcpu);
+ +
+ +      return true;
+ +}
+ +
+ static bool kvm_hyp_handle_cpacr_el1(struct kvm_vcpu *vcpu, u64 *exit_code)
+ {
+       u64 esr = kvm_vcpu_get_esr(vcpu);
+       int rt;
+ 
+       if (!is_hyp_ctxt(vcpu) || esr_sys64_to_sysreg(esr) != SYS_CPACR_EL1)
+               return false;
+ 
+       rt = kvm_vcpu_sys_get_rt(vcpu);
+ 
+       if ((esr & ESR_ELx_SYS64_ISS_DIR_MASK) == ESR_ELx_SYS64_ISS_DIR_READ) {
+               vcpu_set_reg(vcpu, rt, __vcpu_sys_reg(vcpu, CPTR_EL2));
+       } else {
+               vcpu_write_sys_reg(vcpu, vcpu_get_reg(vcpu, rt), CPTR_EL2);
+               __activate_cptr_traps(vcpu);
+       }
+ 
+       __kvm_skip_instr(vcpu);
+ 
+       return true;
+ }
+ 
+ static bool kvm_hyp_handle_zcr_el2(struct kvm_vcpu *vcpu, u64 *exit_code)
+ {
+       u32 sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_esr(vcpu));
+ 
+       if (!vcpu_has_nv(vcpu))
+               return false;
+ 
+       if (sysreg != SYS_ZCR_EL2)
+               return false;
+ 
+       if (guest_owns_fp_regs())
+               return false;
+ 
+       /*
+        * ZCR_EL2 traps are handled in the slow path, with the expectation
+        * that the guest's FP context has already been loaded onto the CPU.
+        *
+        * Load the guest's FP context and unconditionally forward to the
+        * slow path for handling (i.e. return false).
+        */
+       kvm_hyp_handle_fpsimd(vcpu, exit_code);
+       return false;
+ }
+ 
   static bool kvm_hyp_handle_sysreg_vhe(struct kvm_vcpu *vcpu, u64 *exit_code)
   {
+ +      if (kvm_hyp_handle_tlbi_el2(vcpu, exit_code))
+ +              return true;
+ +
+       if (kvm_hyp_handle_cpacr_el1(vcpu, exit_code))
+               return true;
+ 
+       if (kvm_hyp_handle_zcr_el2(vcpu, exit_code))
+               return true;
+ 
         return kvm_hyp_handle_sysreg(vcpu, exit_code);
   }
   
diff --cc arch/arm64/kvm/nested.c

index 9ae4be49e22339eb60d149cec2fa5ee5984d47d8,2b9fcbb0fe6c50a3a0bad5fbcefdb76a50f63ec5..de789e0f1ae9cb6e1bc7ba0bd7bfe691df0143af
--- 1/arch/arm64/kvm/nested.c
--- 2/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@@ -20,911 -17,148 +20,910 @@@
   #define NV_FTR(r, f)          ID_AA64##r##_EL1_##f
   
   /*
- - * Our emulated CPU doesn't support all the possible features. For the
- - * sake of simplicity (and probably mental sanity), wipe out a number
- - * of feature bits we don't intend to support for the time being.
- - * This list should get updated as new features get added to the NV
- - * support, and new extension to the architecture.
+ + * Ratio of live shadow S2 MMU per vcpu. This is a trade-off between
+ + * memory usage and potential number of different sets of S2 PTs in
+ + * the guests. Running out of S2 MMUs only affects performance (we
+ + * will invalidate them more often).
    */
- -static u64 limit_nv_id_reg(u32 id, u64 val)
- -{
- -      u64 tmp;
- -
- -      switch (id) {
- -      case SYS_ID_AA64ISAR0_EL1:
- -              /* Support everything but TME, O.S. and Range TLBIs */
- -              val &= ~(NV_FTR(ISAR0, TLB)             |
- -                       NV_FTR(ISAR0, TME));
- -              break;
- -
- -      case SYS_ID_AA64ISAR1_EL1:
- -              /* Support everything but Spec Invalidation */
- -              val &= ~(GENMASK_ULL(63, 56)    |
- -                       NV_FTR(ISAR1, SPECRES));
- -              break;
- -
- -      case SYS_ID_AA64PFR0_EL1:
- -              /* No AMU, MPAM, S-EL2, or RAS */
- -              val &= ~(GENMASK_ULL(55, 52)    |
- -                       NV_FTR(PFR0, AMU)      |
- -                       NV_FTR(PFR0, MPAM)     |
- -                       NV_FTR(PFR0, SEL2)     |
- -                       NV_FTR(PFR0, RAS)      |
- -                       NV_FTR(PFR0, EL3)      |
- -                       NV_FTR(PFR0, EL2)      |
- -                       NV_FTR(PFR0, EL1));
- -              /* 64bit EL1/EL2/EL3 only */
- -              val |= FIELD_PREP(NV_FTR(PFR0, EL1), 0b0001);
- -              val |= FIELD_PREP(NV_FTR(PFR0, EL2), 0b0001);
- -              val |= FIELD_PREP(NV_FTR(PFR0, EL3), 0b0001);
- -              break;
- -
- -      case SYS_ID_AA64PFR1_EL1:
- -              /* Only support BTI, SSBS, CSV2_frac */
- -              val &= (NV_FTR(PFR1, BT)        |
- -                      NV_FTR(PFR1, SSBS)      |
- -                      NV_FTR(PFR1, CSV2_frac));
- -              break;
- -
- -      case SYS_ID_AA64MMFR0_EL1:
- -              /* Hide ECV, ExS, Secure Memory */
- -              val &= ~(NV_FTR(MMFR0, ECV)             |
- -                       NV_FTR(MMFR0, EXS)             |
- -                       NV_FTR(MMFR0, TGRAN4_2)        |
- -                       NV_FTR(MMFR0, TGRAN16_2)       |
- -                       NV_FTR(MMFR0, TGRAN64_2)       |
- -                       NV_FTR(MMFR0, SNSMEM));
- -
- -              /* Disallow unsupported S2 page sizes */
- -              switch (PAGE_SIZE) {
- -              case SZ_64K:
- -                      val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN16_2), 0b0001);
- -                      fallthrough;
- -              case SZ_16K:
- -                      val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN4_2), 0b0001);
- -                      fallthrough;
- -              case SZ_4K:
- -                      /* Support everything */
- -                      break;
- -              }
+ +#define S2_MMU_PER_VCPU               2
+ +
+ +void kvm_init_nested(struct kvm *kvm)
+ +{
+ +      kvm->arch.nested_mmus = NULL;
+ +      kvm->arch.nested_mmus_size = 0;
+ +}
+ +
+ +static int init_nested_s2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu)
+ +{
+ +      /*
+ +       * We only initialise the IPA range on the canonical MMU, which
+ +       * defines the contract between KVM and userspace on where the
+ +       * "hardware" is in the IPA space. This affects the validity of MMIO
+ +       * exits forwarded to userspace, for example.
+ +       *
+ +       * For nested S2s, we use the PARange as exposed to the guest, as it
+ +       * is allowed to use it at will to expose whatever memory map it
+ +       * wants to its own guests as it would be on real HW.
+ +       */
+ +      return kvm_init_stage2_mmu(kvm, mmu, kvm_get_pa_bits(kvm));
+ +}
+ +
+ +int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu)
+ +{
+ +      struct kvm *kvm = vcpu->kvm;
+ +      struct kvm_s2_mmu *tmp;
+ +      int num_mmus, ret = 0;
+ +
+ +      /*
+ +       * Let's treat memory allocation failures as benign: If we fail to
+ +       * allocate anything, return an error and keep the allocated array
+ +       * alive. Userspace may try to recover by intializing the vcpu
+ +       * again, and there is no reason to affect the whole VM for this.
+ +       */
+ +      num_mmus = atomic_read(&kvm->online_vcpus) * S2_MMU_PER_VCPU;
+ +      tmp = kvrealloc(kvm->arch.nested_mmus,
+ +                      size_mul(sizeof(*kvm->arch.nested_mmus), kvm->arch.nested_mmus_size),
+ +                      size_mul(sizeof(*kvm->arch.nested_mmus), num_mmus),
+ +                      GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+ +      if (!tmp)
+ +              return -ENOMEM;
+ +
+ +      /*
+ +       * If we went through a realocation, adjust the MMU back-pointers in
+ +       * the previously initialised kvm_pgtable structures.
+ +       */
+ +      if (kvm->arch.nested_mmus != tmp)
+ +              for (int i = 0; i < kvm->arch.nested_mmus_size; i++)
+ +                      tmp[i].pgt->mmu = &tmp[i];
+ +
+ +      for (int i = kvm->arch.nested_mmus_size; !ret && i < num_mmus; i++)
+ +              ret = init_nested_s2_mmu(kvm, &tmp[i]);
+ +
+ +      if (ret) {
+ +              for (int i = kvm->arch.nested_mmus_size; i < num_mmus; i++)
+ +                      kvm_free_stage2_pgd(&tmp[i]);
+ +
+ +              return ret;
+ +      }
+ +
+ +      kvm->arch.nested_mmus_size = num_mmus;
+ +      kvm->arch.nested_mmus = tmp;
+ +
+ +      return 0;
+ +}
+ +
+ +struct s2_walk_info {
+ +      int          (*read_desc)(phys_addr_t pa, u64 *desc, void *data);
+ +      void         *data;
+ +      u64          baddr;
+ +      unsigned int max_oa_bits;
+ +      unsigned int pgshift;
+ +      unsigned int sl;
+ +      unsigned int t0sz;
+ +      bool         be;
+ +};
+ +
+ +static unsigned int ps_to_output_size(unsigned int ps)
+ +{
+ +      switch (ps) {
+ +      case 0: return 32;
+ +      case 1: return 36;
+ +      case 2: return 40;
+ +      case 3: return 42;
+ +      case 4: return 44;
+ +      case 5:
+ +      default:
+ +              return 48;
+ +      }
+ +}
+ +
+ +static u32 compute_fsc(int level, u32 fsc)
+ +{
+ +      return fsc | (level & 0x3);
+ +}
+ +
+ +static int esr_s2_fault(struct kvm_vcpu *vcpu, int level, u32 fsc)
+ +{
+ +      u32 esr;
+ +
+ +      esr = kvm_vcpu_get_esr(vcpu) & ~ESR_ELx_FSC;
+ +      esr |= compute_fsc(level, fsc);
+ +      return esr;
+ +}
+ +
+ +static int get_ia_size(struct s2_walk_info *wi)
+ +{
+ +      return 64 - wi->t0sz;
+ +}
+ +
+ +static int check_base_s2_limits(struct s2_walk_info *wi,
+ +                              int level, int input_size, int stride)
+ +{
+ +      int start_size, ia_size;
+ +
+ +      ia_size = get_ia_size(wi);
+ +
+ +      /* Check translation limits */
+ +      switch (BIT(wi->pgshift)) {
+ +      case SZ_64K:
+ +              if (level == 0 || (level == 1 && ia_size <= 42))
+ +                      return -EFAULT;
+ +              break;
+ +      case SZ_16K:
+ +              if (level == 0 || (level == 1 && ia_size <= 40))
+ +                      return -EFAULT;
+ +              break;
+ +      case SZ_4K:
+ +              if (level < 0 || (level == 0 && ia_size <= 42))
+ +                      return -EFAULT;
+ +              break;
+ +      }
+ +
+ +      /* Check input size limits */
+ +      if (input_size > ia_size)
+ +              return -EFAULT;
+ +
+ +      /* Check number of entries in starting level table */
+ +      start_size = input_size - ((3 - level) * stride + wi->pgshift);
+ +      if (start_size < 1 || start_size > stride + 4)
+ +              return -EFAULT;
+ +
+ +      return 0;
+ +}
+ +
+ +/* Check if output is within boundaries */
+ +static int check_output_size(struct s2_walk_info *wi, phys_addr_t output)
+ +{
+ +      unsigned int output_size = wi->max_oa_bits;
+ +
+ +      if (output_size != 48 && (output & GENMASK_ULL(47, output_size)))
+ +              return -1;
+ +
+ +      return 0;
+ +}
+ +
+ +/*
+ + * This is essentially a C-version of the pseudo code from the ARM ARM
+ + * AArch64.TranslationTableWalk  function.  I strongly recommend looking at
+ + * that pseudocode in trying to understand this.
+ + *
+ + * Must be called with the kvm->srcu read lock held
+ + */
+ +static int walk_nested_s2_pgd(phys_addr_t ipa,
+ +                            struct s2_walk_info *wi, struct kvm_s2_trans *out)
+ +{
+ +      int first_block_level, level, stride, input_size, base_lower_bound;
+ +      phys_addr_t base_addr;
+ +      unsigned int addr_top, addr_bottom;
+ +      u64 desc;  /* page table entry */
+ +      int ret;
+ +      phys_addr_t paddr;
+ +
+ +      switch (BIT(wi->pgshift)) {
+ +      default:
+ +      case SZ_64K:
+ +      case SZ_16K:
+ +              level = 3 - wi->sl;
+ +              first_block_level = 2;
+ +              break;
+ +      case SZ_4K:
+ +              level = 2 - wi->sl;
+ +              first_block_level = 1;
+ +              break;
+ +      }
+ +
+ +      stride = wi->pgshift - 3;
+ +      input_size = get_ia_size(wi);
+ +      if (input_size > 48 || input_size < 25)
+ +              return -EFAULT;
+ +
+ +      ret = check_base_s2_limits(wi, level, input_size, stride);
+ +      if (WARN_ON(ret))
+ +              return ret;
+ +
+ +      base_lower_bound = 3 + input_size - ((3 - level) * stride +
+ +                         wi->pgshift);
+ +      base_addr = wi->baddr & GENMASK_ULL(47, base_lower_bound);
+ +
+ +      if (check_output_size(wi, base_addr)) {
+ +              out->esr = compute_fsc(level, ESR_ELx_FSC_ADDRSZ);
+ +              return 1;
+ +      }
+ +
+ +      addr_top = input_size - 1;
+ +
+ +      while (1) {
+ +              phys_addr_t index;
+ +
+ +              addr_bottom = (3 - level) * stride + wi->pgshift;
+ +              index = (ipa & GENMASK_ULL(addr_top, addr_bottom))
+ +                      >> (addr_bottom - 3);
+ +
+ +              paddr = base_addr | index;
+ +              ret = wi->read_desc(paddr, &desc, wi->data);
+ +              if (ret < 0)
+ +                      return ret;
+ +
                 /*
- -               * Since we can't support a guest S2 page size smaller than
- -               * the host's own page size (due to KVM only populating its
- -               * own S2 using the kernel's page size), advertise the
- -               * limitation using FEAT_GTG.
+ +               * Handle reversedescriptors if endianness differs between the
+ +               * host and the guest hypervisor.
                  */
- -              switch (PAGE_SIZE) {
- -              case SZ_4K:
- -                      val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN4_2), 0b0010);
- -                      fallthrough;
- -              case SZ_16K:
- -                      val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN16_2), 0b0010);
- -                      fallthrough;
- -              case SZ_64K:
- -                      val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN64_2), 0b0010);
+ +              if (wi->be)
+ +                      desc = be64_to_cpu((__force __be64)desc);
+ +              else
+ +                      desc = le64_to_cpu((__force __le64)desc);
+ +
+ +              /* Check for valid descriptor at this point */
+ +              if (!(desc & 1) || ((desc & 3) == 1 && level == 3)) {
+ +                      out->esr = compute_fsc(level, ESR_ELx_FSC_FAULT);
+ +                      out->upper_attr = desc;
+ +                      return 1;
+ +              }
+ +
+ +              /* We're at the final level or block translation level */
+ +              if ((desc & 3) == 1 || level == 3)
+ +                      break;
+ +
+ +              if (check_output_size(wi, desc)) {
+ +                      out->esr = compute_fsc(level, ESR_ELx_FSC_ADDRSZ);
+ +                      out->upper_attr = desc;
+ +                      return 1;
+ +              }
+ +
+ +              base_addr = desc & GENMASK_ULL(47, wi->pgshift);
+ +
+ +              level += 1;
+ +              addr_top = addr_bottom - 1;
+ +      }
+ +
+ +      if (level < first_block_level) {
+ +              out->esr = compute_fsc(level, ESR_ELx_FSC_FAULT);
+ +              out->upper_attr = desc;
+ +              return 1;
+ +      }
+ +
+ +      /*
+ +       * We don't use the contiguous bit in the stage-2 ptes, so skip check
+ +       * for misprogramming of the contiguous bit.
+ +       */
+ +
+ +      if (check_output_size(wi, desc)) {
+ +              out->esr = compute_fsc(level, ESR_ELx_FSC_ADDRSZ);
+ +              out->upper_attr = desc;
+ +              return 1;
+ +      }
+ +
+ +      if (!(desc & BIT(10))) {
+ +              out->esr = compute_fsc(level, ESR_ELx_FSC_ACCESS);
+ +              out->upper_attr = desc;
+ +              return 1;
+ +      }
+ +
+ +      /* Calculate and return the result */
+ +      paddr = (desc & GENMASK_ULL(47, addr_bottom)) |
+ +              (ipa & GENMASK_ULL(addr_bottom - 1, 0));
+ +      out->output = paddr;
+ +      out->block_size = 1UL << ((3 - level) * stride + wi->pgshift);
+ +      out->readable = desc & (0b01 << 6);
+ +      out->writable = desc & (0b10 << 6);
+ +      out->level = level;
+ +      out->upper_attr = desc & GENMASK_ULL(63, 52);
+ +      return 0;
+ +}
+ +
+ +static int read_guest_s2_desc(phys_addr_t pa, u64 *desc, void *data)
+ +{
+ +      struct kvm_vcpu *vcpu = data;
+ +
+ +      return kvm_read_guest(vcpu->kvm, pa, desc, sizeof(*desc));
+ +}
+ +
+ +static void vtcr_to_walk_info(u64 vtcr, struct s2_walk_info *wi)
+ +{
+ +      wi->t0sz = vtcr & TCR_EL2_T0SZ_MASK;
+ +
+ +      switch (vtcr & VTCR_EL2_TG0_MASK) {
+ +      case VTCR_EL2_TG0_4K:
+ +              wi->pgshift = 12;        break;
+ +      case VTCR_EL2_TG0_16K:
+ +              wi->pgshift = 14;        break;
+ +      case VTCR_EL2_TG0_64K:
+ +      default:            /* IMPDEF: treat any other value as 64k */
+ +              wi->pgshift = 16;        break;
+ +      }
+ +
+ +      wi->sl = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr);
+ +      /* Global limit for now, should eventually be per-VM */
+ +      wi->max_oa_bits = min(get_kvm_ipa_limit(),
+ +                            ps_to_output_size(FIELD_GET(VTCR_EL2_PS_MASK, vtcr)));
+ +}
+ +
+ +int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
+ +                     struct kvm_s2_trans *result)
+ +{
+ +      u64 vtcr = vcpu_read_sys_reg(vcpu, VTCR_EL2);
+ +      struct s2_walk_info wi;
+ +      int ret;
+ +
+ +      result->esr = 0;
+ +
+ +      if (!vcpu_has_nv(vcpu))
+ +              return 0;
+ +
+ +      wi.read_desc = read_guest_s2_desc;
+ +      wi.data = vcpu;
+ +      wi.baddr = vcpu_read_sys_reg(vcpu, VTTBR_EL2);
+ +
+ +      vtcr_to_walk_info(vtcr, &wi);
+ +
+ +      wi.be = vcpu_read_sys_reg(vcpu, SCTLR_EL2) & SCTLR_ELx_EE;
+ +
+ +      ret = walk_nested_s2_pgd(gipa, &wi, result);
+ +      if (ret)
+ +              result->esr |= (kvm_vcpu_get_esr(vcpu) & ~ESR_ELx_FSC);
+ +
+ +      return ret;
+ +}
+ +
+ +static unsigned int ttl_to_size(u8 ttl)
+ +{
+ +      int level = ttl & 3;
+ +      int gran = (ttl >> 2) & 3;
+ +      unsigned int max_size = 0;
+ +
+ +      switch (gran) {
+ +      case TLBI_TTL_TG_4K:
+ +              switch (level) {
+ +              case 0:
+ +                      break;
+ +              case 1:
+ +                      max_size = SZ_1G;
+ +                      break;
+ +              case 2:
+ +                      max_size = SZ_2M;
+ +                      break;
+ +              case 3:
+ +                      max_size = SZ_4K;
                         break;
                 }
- -              /* Cap PARange to 48bits */
- -              tmp = FIELD_GET(NV_FTR(MMFR0, PARANGE), val);
- -              if (tmp > 0b0101) {
- -                      val &= ~NV_FTR(MMFR0, PARANGE);
- -                      val |= FIELD_PREP(NV_FTR(MMFR0, PARANGE), 0b0101);
+ +              break;
+ +      case TLBI_TTL_TG_16K:
+ +              switch (level) {
+ +              case 0:
+ +              case 1:
+ +                      break;
+ +              case 2:
+ +                      max_size = SZ_32M;
+ +                      break;
+ +              case 3:
+ +                      max_size = SZ_16K;
+ +                      break;
                 }
                 break;
- -
- -      case SYS_ID_AA64MMFR1_EL1:
- -              val &= (NV_FTR(MMFR1, HCX)      |
- -                      NV_FTR(MMFR1, PAN)      |
- -                      NV_FTR(MMFR1, LO)       |
- -                      NV_FTR(MMFR1, HPDS)     |
- -                      NV_FTR(MMFR1, VH)       |
- -                      NV_FTR(MMFR1, VMIDBits));
+ +      case TLBI_TTL_TG_64K:
+ +              switch (level) {
+ +              case 0:
+ +              case 1:
+ +                      /* No 52bit IPA support */
+ +                      break;
+ +              case 2:
+ +                      max_size = SZ_512M;
+ +                      break;
+ +              case 3:
+ +                      max_size = SZ_64K;
+ +                      break;
+ +              }
+ +              break;
+ +      default:                        /* No size information */
                 break;
+ +      }
+ +
+ +      return max_size;
+ +}
+ +
+ +/*
+ + * Compute the equivalent of the TTL field by parsing the shadow PT.  The
+ + * granule size is extracted from the cached VTCR_EL2.TG0 while the level is
+ + * retrieved from first entry carrying the level as a tag.
+ + */
+ +static u8 get_guest_mapping_ttl(struct kvm_s2_mmu *mmu, u64 addr)
+ +{
+ +      u64 tmp, sz = 0, vtcr = mmu->tlb_vtcr;
+ +      kvm_pte_t pte;
+ +      u8 ttl, level;
   
- -      case SYS_ID_AA64MMFR2_EL1:
- -              val &= ~(NV_FTR(MMFR2, BBM)     |
- -                       NV_FTR(MMFR2, TTL)     |
- -                       GENMASK_ULL(47, 44)    |
- -                       NV_FTR(MMFR2, ST)      |
- -                       NV_FTR(MMFR2, CCIDX)   |
- -                       NV_FTR(MMFR2, VARange));
+ +      lockdep_assert_held_write(&kvm_s2_mmu_to_kvm(mmu)->mmu_lock);
   
- -              /* Force TTL support */
- -              val |= FIELD_PREP(NV_FTR(MMFR2, TTL), 0b0001);
+ +      switch (vtcr & VTCR_EL2_TG0_MASK) {
+ +      case VTCR_EL2_TG0_4K:
+ +              ttl = (TLBI_TTL_TG_4K << 2);
+ +              break;
+ +      case VTCR_EL2_TG0_16K:
+ +              ttl = (TLBI_TTL_TG_16K << 2);
                 break;
+ +      case VTCR_EL2_TG0_64K:
+ +      default:            /* IMPDEF: treat any other value as 64k */
+ +              ttl = (TLBI_TTL_TG_64K << 2);
+ +              break;
+ +      }
   
- -      case SYS_ID_AA64MMFR4_EL1:
- -              val = 0;
- -              if (!cpus_have_final_cap(ARM64_HAS_HCR_NV1))
- -                      val |= FIELD_PREP(NV_FTR(MMFR4, E2H0),
- -                                        ID_AA64MMFR4_EL1_E2H0_NI_NV1);
+ +      tmp = addr;
+ +
+ +again:
+ +      /* Iteratively compute the block sizes for a particular granule size */
+ +      switch (vtcr & VTCR_EL2_TG0_MASK) {
+ +      case VTCR_EL2_TG0_4K:
+ +              if      (sz < SZ_4K)    sz = SZ_4K;
+ +              else if (sz < SZ_2M)    sz = SZ_2M;
+ +              else if (sz < SZ_1G)    sz = SZ_1G;
+ +              else                    sz = 0;
+ +              break;
+ +      case VTCR_EL2_TG0_16K:
+ +              if      (sz < SZ_16K)   sz = SZ_16K;
+ +              else if (sz < SZ_32M)   sz = SZ_32M;
+ +              else                    sz = 0;
                 break;
+ +      case VTCR_EL2_TG0_64K:
+ +      default:            /* IMPDEF: treat any other value as 64k */
+ +              if      (sz < SZ_64K)   sz = SZ_64K;
+ +              else if (sz < SZ_512M)  sz = SZ_512M;
+ +              else                    sz = 0;
+ +              break;
+ +      }
+ +
+ +      if (sz == 0)
+ +              return 0;
+ +
+ +      tmp &= ~(sz - 1);
+ +      if (kvm_pgtable_get_leaf(mmu->pgt, tmp, &pte, NULL))
+ +              goto again;
+ +      if (!(pte & PTE_VALID))
+ +              goto again;
+ +      level = FIELD_GET(KVM_NV_GUEST_MAP_SZ, pte);
+ +      if (!level)
+ +              goto again;
+ +
+ +      ttl |= level;
+ +
+ +      /*
+ +       * We now have found some level information in the shadow S2. Check
+ +       * that the resulting range is actually including the original IPA.
+ +       */
+ +      sz = ttl_to_size(ttl);
+ +      if (addr < (tmp + sz))
+ +              return ttl;
+ +
+ +      return 0;
+ +}
+ +
+ +unsigned long compute_tlb_inval_range(struct kvm_s2_mmu *mmu, u64 val)
+ +{
+ +      struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu);
+ +      unsigned long max_size;
+ +      u8 ttl;
+ +
+ +      ttl = FIELD_GET(TLBI_TTL_MASK, val);
   
- -      case SYS_ID_AA64DFR0_EL1:
- -              /* Only limited support for PMU, Debug, BPs and WPs */
- -              val &= (NV_FTR(DFR0, PMUVer)    |
- -                      NV_FTR(DFR0, WRPs)      |
- -                      NV_FTR(DFR0, BRPs)      |
- -                      NV_FTR(DFR0, DebugVer));
+ +      if (!ttl || !kvm_has_feat(kvm, ID_AA64MMFR2_EL1, TTL, IMP)) {
+ +              /* No TTL, check the shadow S2 for a hint */
+ +              u64 addr = (val & GENMASK_ULL(35, 0)) << 12;
+ +              ttl = get_guest_mapping_ttl(mmu, addr);
+ +      }
+ +
+ +      max_size = ttl_to_size(ttl);
   
- -              /* Cap Debug to ARMv8.1 */
- -              tmp = FIELD_GET(NV_FTR(DFR0, DebugVer), val);
- -              if (tmp > 0b0111) {
- -                      val &= ~NV_FTR(DFR0, DebugVer);
- -                      val |= FIELD_PREP(NV_FTR(DFR0, DebugVer), 0b0111);
+ +      if (!max_size) {
+ +              /* Compute the maximum extent of the invalidation */
+ +              switch (mmu->tlb_vtcr & VTCR_EL2_TG0_MASK) {
+ +              case VTCR_EL2_TG0_4K:
+ +                      max_size = SZ_1G;
+ +                      break;
+ +              case VTCR_EL2_TG0_16K:
+ +                      max_size = SZ_32M;
+ +                      break;
+ +              case VTCR_EL2_TG0_64K:
+ +              default:    /* IMPDEF: treat any other value as 64k */
+ +                      /*
+ +                       * No, we do not support 52bit IPA in nested yet. Once
+ +                       * we do, this should be 4TB.
+ +                       */
+ +                      max_size = SZ_512M;
+ +                      break;
                 }
- -              break;
+ +      }
   
- -      default:
- -              /* Unknown register, just wipe it clean */
- -              val = 0;
- -              break;
+ +      WARN_ON(!max_size);
+ +      return max_size;
+ +}
+ +
+ +/*
+ + * We can have multiple *different* MMU contexts with the same VMID:
+ + *
+ + * - S2 being enabled or not, hence differing by the HCR_EL2.VM bit
+ + *
+ + * - Multiple vcpus using private S2s (huh huh...), hence differing by the
+ + *   VBBTR_EL2.BADDR address
+ + *
+ + * - A combination of the above...
+ + *
+ + * We can always identify which MMU context to pick at run-time.  However,
+ + * TLB invalidation involving a VMID must take action on all the TLBs using
+ + * this particular VMID. This translates into applying the same invalidation
+ + * operation to all the contexts that are using this VMID. Moar phun!
+ + */
+ +void kvm_s2_mmu_iterate_by_vmid(struct kvm *kvm, u16 vmid,
+ +                              const union tlbi_info *info,
+ +                              void (*tlbi_callback)(struct kvm_s2_mmu *,
+ +                                                    const union tlbi_info *))
+ +{
+ +      write_lock(&kvm->mmu_lock);
+ +
+ +      for (int i = 0; i < kvm->arch.nested_mmus_size; i++) {
+ +              struct kvm_s2_mmu *mmu = &kvm->arch.nested_mmus[i];
+ +
+ +              if (!kvm_s2_mmu_valid(mmu))
+ +                      continue;
+ +
+ +              if (vmid == get_vmid(mmu->tlb_vttbr))
+ +                      tlbi_callback(mmu, info);
+ +      }
+ +
+ +      write_unlock(&kvm->mmu_lock);
+ +}
+ +
+ +struct kvm_s2_mmu *lookup_s2_mmu(struct kvm_vcpu *vcpu)
+ +{
+ +      struct kvm *kvm = vcpu->kvm;
+ +      bool nested_stage2_enabled;
+ +      u64 vttbr, vtcr, hcr;
+ +
+ +      lockdep_assert_held_write(&kvm->mmu_lock);
+ +
+ +      vttbr = vcpu_read_sys_reg(vcpu, VTTBR_EL2);
+ +      vtcr = vcpu_read_sys_reg(vcpu, VTCR_EL2);
+ +      hcr = vcpu_read_sys_reg(vcpu, HCR_EL2);
+ +
+ +      nested_stage2_enabled = hcr & HCR_VM;
+ +
+ +      /* Don't consider the CnP bit for the vttbr match */
+ +      vttbr &= ~VTTBR_CNP_BIT;
+ +
+ +      /*
+ +       * Two possibilities when looking up a S2 MMU context:
+ +       *
+ +       * - either S2 is enabled in the guest, and we need a context that is
+ +       *   S2-enabled and matches the full VTTBR (VMID+BADDR) and VTCR,
+ +       *   which makes it safe from a TLB conflict perspective (a broken
+ +       *   guest won't be able to generate them),
+ +       *
+ +       * - or S2 is disabled, and we need a context that is S2-disabled
+ +       *   and matches the VMID only, as all TLBs are tagged by VMID even
+ +       *   if S2 translation is disabled.
+ +       */
+ +      for (int i = 0; i < kvm->arch.nested_mmus_size; i++) {
+ +              struct kvm_s2_mmu *mmu = &kvm->arch.nested_mmus[i];
+ +
+ +              if (!kvm_s2_mmu_valid(mmu))
+ +                      continue;
+ +
+ +              if (nested_stage2_enabled &&
+ +                  mmu->nested_stage2_enabled &&
+ +                  vttbr == mmu->tlb_vttbr &&
+ +                  vtcr == mmu->tlb_vtcr)
+ +                      return mmu;
+ +
+ +              if (!nested_stage2_enabled &&
+ +                  !mmu->nested_stage2_enabled &&
+ +                  get_vmid(vttbr) == get_vmid(mmu->tlb_vttbr))
+ +                      return mmu;
+ +      }
+ +      return NULL;
+ +}
+ +
+ +static struct kvm_s2_mmu *get_s2_mmu_nested(struct kvm_vcpu *vcpu)
+ +{
+ +      struct kvm *kvm = vcpu->kvm;
+ +      struct kvm_s2_mmu *s2_mmu;
+ +      int i;
+ +
+ +      lockdep_assert_held_write(&vcpu->kvm->mmu_lock);
+ +
+ +      s2_mmu = lookup_s2_mmu(vcpu);
+ +      if (s2_mmu)
+ +              goto out;
+ +
+ +      /*
+ +       * Make sure we don't always search from the same point, or we
+ +       * will always reuse a potentially active context, leaving
+ +       * free contexts unused.
+ +       */
+ +      for (i = kvm->arch.nested_mmus_next;
+ +           i < (kvm->arch.nested_mmus_size + kvm->arch.nested_mmus_next);
+ +           i++) {
+ +              s2_mmu = &kvm->arch.nested_mmus[i % kvm->arch.nested_mmus_size];
+ +
+ +              if (atomic_read(&s2_mmu->refcnt) == 0)
+ +                      break;
         }
+ +      BUG_ON(atomic_read(&s2_mmu->refcnt)); /* We have struct MMUs to spare */
+ +
+ +      /* Set the scene for the next search */
+ +      kvm->arch.nested_mmus_next = (i + 1) % kvm->arch.nested_mmus_size;
+ +
+ +      /* Clear the old state */
+ +      if (kvm_s2_mmu_valid(s2_mmu))
+ +              kvm_stage2_unmap_range(s2_mmu, 0, kvm_phys_size(s2_mmu));
+ +
+ +      /*
+ +       * The virtual VMID (modulo CnP) will be used as a key when matching
+ +       * an existing kvm_s2_mmu.
+ +       *
+ +       * We cache VTCR at allocation time, once and for all. It'd be great
+ +       * if the guest didn't screw that one up, as this is not very
+ +       * forgiving...
+ +       */
+ +      s2_mmu->tlb_vttbr = vcpu_read_sys_reg(vcpu, VTTBR_EL2) & ~VTTBR_CNP_BIT;
+ +      s2_mmu->tlb_vtcr = vcpu_read_sys_reg(vcpu, VTCR_EL2);
+ +      s2_mmu->nested_stage2_enabled = vcpu_read_sys_reg(vcpu, HCR_EL2) & HCR_VM;
+ +
+ +out:
+ +      atomic_inc(&s2_mmu->refcnt);
+ +      return s2_mmu;
+ +}
   
- -      return val;
+ +void kvm_init_nested_s2_mmu(struct kvm_s2_mmu *mmu)
+ +{
+ +      /* CnP being set denotes an invalid entry */
+ +      mmu->tlb_vttbr = VTTBR_CNP_BIT;
+ +      mmu->nested_stage2_enabled = false;
+ +      atomic_set(&mmu->refcnt, 0);
+ +}
+ +
+ +void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu)
+ +{
+ +      if (is_hyp_ctxt(vcpu)) {
+ +              vcpu->arch.hw_mmu = &vcpu->kvm->arch.mmu;
+ +      } else {
+ +              write_lock(&vcpu->kvm->mmu_lock);
+ +              vcpu->arch.hw_mmu = get_s2_mmu_nested(vcpu);
+ +              write_unlock(&vcpu->kvm->mmu_lock);
+ +      }
+ +}
+ +
+ +void kvm_vcpu_put_hw_mmu(struct kvm_vcpu *vcpu)
+ +{
+ +      if (kvm_is_nested_s2_mmu(vcpu->kvm, vcpu->arch.hw_mmu)) {
+ +              atomic_dec(&vcpu->arch.hw_mmu->refcnt);
+ +              vcpu->arch.hw_mmu = NULL;
+ +      }
+ +}
+ +
+ +/*
+ + * Returns non-zero if permission fault is handled by injecting it to the next
+ + * level hypervisor.
+ + */
+ +int kvm_s2_handle_perm_fault(struct kvm_vcpu *vcpu, struct kvm_s2_trans *trans)
+ +{
+ +      bool forward_fault = false;
+ +
+ +      trans->esr = 0;
+ +
+ +      if (!kvm_vcpu_trap_is_permission_fault(vcpu))
+ +              return 0;
+ +
+ +      if (kvm_vcpu_trap_is_iabt(vcpu)) {
+ +              forward_fault = !kvm_s2_trans_executable(trans);
+ +      } else {
+ +              bool write_fault = kvm_is_write_fault(vcpu);
+ +
+ +              forward_fault = ((write_fault && !trans->writable) ||
+ +                               (!write_fault && !trans->readable));
+ +      }
+ +
+ +      if (forward_fault)
+ +              trans->esr = esr_s2_fault(vcpu, trans->level, ESR_ELx_FSC_PERM);
+ +
+ +      return forward_fault;
+ +}
+ +
+ +int kvm_inject_s2_fault(struct kvm_vcpu *vcpu, u64 esr_el2)
+ +{
+ +      vcpu_write_sys_reg(vcpu, vcpu->arch.fault.far_el2, FAR_EL2);
+ +      vcpu_write_sys_reg(vcpu, vcpu->arch.fault.hpfar_el2, HPFAR_EL2);
+ +
+ +      return kvm_inject_nested_sync(vcpu, esr_el2);
+ +}
+ +
+ +void kvm_nested_s2_wp(struct kvm *kvm)
+ +{
+ +      int i;
+ +
+ +      lockdep_assert_held_write(&kvm->mmu_lock);
+ +
+ +      for (i = 0; i < kvm->arch.nested_mmus_size; i++) {
+ +              struct kvm_s2_mmu *mmu = &kvm->arch.nested_mmus[i];
+ +
+ +              if (kvm_s2_mmu_valid(mmu))
+ +                      kvm_stage2_wp_range(mmu, 0, kvm_phys_size(mmu));
+ +      }
+ +}
+ +
+ +void kvm_nested_s2_unmap(struct kvm *kvm)
+ +{
+ +      int i;
+ +
+ +      lockdep_assert_held_write(&kvm->mmu_lock);
+ +
+ +      for (i = 0; i < kvm->arch.nested_mmus_size; i++) {
+ +              struct kvm_s2_mmu *mmu = &kvm->arch.nested_mmus[i];
+ +
+ +              if (kvm_s2_mmu_valid(mmu))
+ +                      kvm_stage2_unmap_range(mmu, 0, kvm_phys_size(mmu));
+ +      }
+ +}
+ +
+ +void kvm_nested_s2_flush(struct kvm *kvm)
+ +{
+ +      int i;
+ +
+ +      lockdep_assert_held_write(&kvm->mmu_lock);
+ +
+ +      for (i = 0; i < kvm->arch.nested_mmus_size; i++) {
+ +              struct kvm_s2_mmu *mmu = &kvm->arch.nested_mmus[i];
+ +
+ +              if (kvm_s2_mmu_valid(mmu))
+ +                      kvm_stage2_flush_range(mmu, 0, kvm_phys_size(mmu));
+ +      }
+ +}
+ +
+ +void kvm_arch_flush_shadow_all(struct kvm *kvm)
+ +{
+ +      int i;
+ +
+ +      for (i = 0; i < kvm->arch.nested_mmus_size; i++) {
+ +              struct kvm_s2_mmu *mmu = &kvm->arch.nested_mmus[i];
+ +
+ +              if (!WARN_ON(atomic_read(&mmu->refcnt)))
+ +                      kvm_free_stage2_pgd(mmu);
+ +      }
+ +      kfree(kvm->arch.nested_mmus);
+ +      kvm->arch.nested_mmus = NULL;
+ +      kvm->arch.nested_mmus_size = 0;
+ +      kvm_uninit_stage2_mmu(kvm);
+ +}
+ +
+ +/*
+ + * Our emulated CPU doesn't support all the possible features. For the
+ + * sake of simplicity (and probably mental sanity), wipe out a number
+ + * of feature bits we don't intend to support for the time being.
+ + * This list should get updated as new features get added to the NV
+ + * support, and new extension to the architecture.
+ + */
+ +static void limit_nv_id_regs(struct kvm *kvm)
+ +{
+ +      u64 val, tmp;
+ +
+ +      /* Support everything but TME */
+ +      val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64ISAR0_EL1);
+ +      val &= ~NV_FTR(ISAR0, TME);
+ +      kvm_set_vm_id_reg(kvm, SYS_ID_AA64ISAR0_EL1, val);
+ +
+ +      /* Support everything but Spec Invalidation and LS64 */
+ +      val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64ISAR1_EL1);
+ +      val &= ~(NV_FTR(ISAR1, LS64)    |
+ +               NV_FTR(ISAR1, SPECRES));
+ +      kvm_set_vm_id_reg(kvm, SYS_ID_AA64ISAR1_EL1, val);
+ +
-       /* No AMU, MPAM, S-EL2, RAS or SVE */
++      /* No AMU, MPAM, S-EL2, or RAS */
+ +      val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1);
+ +      val &= ~(GENMASK_ULL(55, 52)    |
+ +               NV_FTR(PFR0, AMU)      |
+ +               NV_FTR(PFR0, MPAM)     |
+ +               NV_FTR(PFR0, SEL2)     |
+ +               NV_FTR(PFR0, RAS)      |
-                NV_FTR(PFR0, SVE)      |
+ +               NV_FTR(PFR0, EL3)      |
+ +               NV_FTR(PFR0, EL2)      |
+ +               NV_FTR(PFR0, EL1));
+ +      /* 64bit EL1/EL2/EL3 only */
+ +      val |= FIELD_PREP(NV_FTR(PFR0, EL1), 0b0001);
+ +      val |= FIELD_PREP(NV_FTR(PFR0, EL2), 0b0001);
+ +      val |= FIELD_PREP(NV_FTR(PFR0, EL3), 0b0001);
+ +      kvm_set_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1, val);
+ +
+ +      /* Only support BTI, SSBS, CSV2_frac */
+ +      val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR1_EL1);
+ +      val &= (NV_FTR(PFR1, BT)        |
+ +              NV_FTR(PFR1, SSBS)      |
+ +              NV_FTR(PFR1, CSV2_frac));
+ +      kvm_set_vm_id_reg(kvm, SYS_ID_AA64PFR1_EL1, val);
+ +
+ +      /* Hide ECV, ExS, Secure Memory */
+ +      val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64MMFR0_EL1);
+ +      val &= ~(NV_FTR(MMFR0, ECV)             |
+ +               NV_FTR(MMFR0, EXS)             |
+ +               NV_FTR(MMFR0, TGRAN4_2)        |
+ +               NV_FTR(MMFR0, TGRAN16_2)       |
+ +               NV_FTR(MMFR0, TGRAN64_2)       |
+ +               NV_FTR(MMFR0, SNSMEM));
+ +
+ +      /* Disallow unsupported S2 page sizes */
+ +      switch (PAGE_SIZE) {
+ +      case SZ_64K:
+ +              val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN16_2), 0b0001);
+ +              fallthrough;
+ +      case SZ_16K:
+ +              val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN4_2), 0b0001);
+ +              fallthrough;
+ +      case SZ_4K:
+ +              /* Support everything */
+ +              break;
+ +      }
+ +      /*
+ +       * Since we can't support a guest S2 page size smaller than
+ +       * the host's own page size (due to KVM only populating its
+ +       * own S2 using the kernel's page size), advertise the
+ +       * limitation using FEAT_GTG.
+ +       */
+ +      switch (PAGE_SIZE) {
+ +      case SZ_4K:
+ +              val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN4_2), 0b0010);
+ +              fallthrough;
+ +      case SZ_16K:
+ +              val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN16_2), 0b0010);
+ +              fallthrough;
+ +      case SZ_64K:
+ +              val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN64_2), 0b0010);
+ +              break;
+ +      }
+ +      /* Cap PARange to 48bits */
+ +      tmp = FIELD_GET(NV_FTR(MMFR0, PARANGE), val);
+ +      if (tmp > 0b0101) {
+ +              val &= ~NV_FTR(MMFR0, PARANGE);
+ +              val |= FIELD_PREP(NV_FTR(MMFR0, PARANGE), 0b0101);
+ +      }
+ +      kvm_set_vm_id_reg(kvm, SYS_ID_AA64MMFR0_EL1, val);
+ +
+ +      val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64MMFR1_EL1);
+ +      val &= (NV_FTR(MMFR1, HCX)      |
+ +              NV_FTR(MMFR1, PAN)      |
+ +              NV_FTR(MMFR1, LO)       |
+ +              NV_FTR(MMFR1, HPDS)     |
+ +              NV_FTR(MMFR1, VH)       |
+ +              NV_FTR(MMFR1, VMIDBits));
+ +      kvm_set_vm_id_reg(kvm, SYS_ID_AA64MMFR1_EL1, val);
+ +
+ +      val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64MMFR2_EL1);
+ +      val &= ~(NV_FTR(MMFR2, BBM)     |
+ +               NV_FTR(MMFR2, TTL)     |
+ +               GENMASK_ULL(47, 44)    |
+ +               NV_FTR(MMFR2, ST)      |
+ +               NV_FTR(MMFR2, CCIDX)   |
+ +               NV_FTR(MMFR2, VARange));
+ +
+ +      /* Force TTL support */
+ +      val |= FIELD_PREP(NV_FTR(MMFR2, TTL), 0b0001);
+ +      kvm_set_vm_id_reg(kvm, SYS_ID_AA64MMFR2_EL1, val);
+ +
+ +      val = 0;
+ +      if (!cpus_have_final_cap(ARM64_HAS_HCR_NV1))
+ +              val |= FIELD_PREP(NV_FTR(MMFR4, E2H0),
+ +                                ID_AA64MMFR4_EL1_E2H0_NI_NV1);
+ +      kvm_set_vm_id_reg(kvm, SYS_ID_AA64MMFR4_EL1, val);
+ +
+ +      /* Only limited support for PMU, Debug, BPs and WPs */
+ +      val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64DFR0_EL1);
+ +      val &= (NV_FTR(DFR0, PMUVer)    |
+ +              NV_FTR(DFR0, WRPs)      |
+ +              NV_FTR(DFR0, BRPs)      |
+ +              NV_FTR(DFR0, DebugVer));
+ +
+ +      /* Cap Debug to ARMv8.1 */
+ +      tmp = FIELD_GET(NV_FTR(DFR0, DebugVer), val);
+ +      if (tmp > 0b0111) {
+ +              val &= ~NV_FTR(DFR0, DebugVer);
+ +              val |= FIELD_PREP(NV_FTR(DFR0, DebugVer), 0b0111);
+ +      }
+ +      kvm_set_vm_id_reg(kvm, SYS_ID_AA64DFR0_EL1, val);
   }
   
   u64 kvm_vcpu_sanitise_vncr_reg(const struct kvm_vcpu *vcpu, enum vcpu_sysreg sr)
diff --cc arch/arm64/kvm/sys_regs.c
Simple merge
author	Oliver Upton <oliver.upton@linux.dev>
	Sun, 14 Jul 2024 00:27:01 +0000 (00:27 +0000)
committer	Oliver Upton <oliver.upton@linux.dev>
	Sun, 14 Jul 2024 00:27:06 +0000 (00:27 +0000)
		1	2
arch/arm64/include/asm/kvm_emulate.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm64/include/asm/kvm_host.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm64/include/asm/kvm_nested.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm64/kvm/arm.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm64/kvm/handle_exit.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm64/kvm/hyp/include/hyp/switch.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm64/kvm/hyp/vhe/switch.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm64/kvm/nested.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm64/kvm/sys_regs.c	patch \|	diff1 \|	diff2 \|	blob \| history