static u64 sanitise_id_aa64pfr0_el1(const struct kvm_vcpu *vcpu, u64 val);
static u64 sanitise_id_aa64pfr1_el1(const struct kvm_vcpu *vcpu, u64 val);
+static u64 sanitise_id_aa64pfr2_el1(const struct kvm_vcpu *vcpu, u64 val);
static u64 sanitise_id_aa64dfr0_el1(const struct kvm_vcpu *vcpu, u64 val);
/* Read a sanitised cpufeature ID register by sys_reg_desc */
val = sanitise_id_aa64pfr1_el1(vcpu, val);
break;
case SYS_ID_AA64PFR2_EL1:
- val &= ID_AA64PFR2_EL1_FPMR |
- (kvm_has_mte(vcpu->kvm) ?
- ID_AA64PFR2_EL1_MTEFAR | ID_AA64PFR2_EL1_MTESTOREONLY :
- 0);
+ val = sanitise_id_aa64pfr2_el1(vcpu, val);
break;
case SYS_ID_AA64ISAR1_EL1:
if (!vcpu_has_ptrauth(vcpu))
return val;
}
+static u64 sanitise_id_aa64pfr2_el1(const struct kvm_vcpu *vcpu, u64 val)
+{
+ val &= ID_AA64PFR2_EL1_FPMR |
+ ID_AA64PFR2_EL1_MTEFAR |
+ ID_AA64PFR2_EL1_MTESTOREONLY;
+
+ if (!kvm_has_mte(vcpu->kvm)) {
+ val &= ~ID_AA64PFR2_EL1_MTEFAR;
+ val &= ~ID_AA64PFR2_EL1_MTESTOREONLY;
+ }
+
+ if (vgic_host_has_gicv5())
+ val |= SYS_FIELD_PREP_ENUM(ID_AA64PFR2_EL1, GCIE, IMP);
+
+ return val;
+}
+
static u64 sanitise_id_aa64dfr0_el1(const struct kvm_vcpu *vcpu, u64 val)
{
val = ID_REG_LIMIT_FIELD_ENUM(val, ID_AA64DFR0_EL1, DebugVer, V8P8);
return set_id_reg(vcpu, rd, user_val);
}
+static int set_id_aa64pfr2_el1(struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd, u64 user_val)
+{
+ return set_id_reg(vcpu, rd, user_val);
+}
+
/*
* Allow userspace to de-feature a stage-2 translation granule but prevent it
* from claiming the impossible.
ID_AA64PFR1_EL1_RES0 |
ID_AA64PFR1_EL1_MPAM_frac |
ID_AA64PFR1_EL1_MTE)),
- ID_WRITABLE(ID_AA64PFR2_EL1,
- ID_AA64PFR2_EL1_FPMR |
- ID_AA64PFR2_EL1_MTEFAR |
- ID_AA64PFR2_EL1_MTESTOREONLY),
+ ID_FILTERED(ID_AA64PFR2_EL1, id_aa64pfr2_el1,
+ ~(ID_AA64PFR2_EL1_FPMR |
+ ID_AA64PFR2_EL1_MTEFAR |
+ ID_AA64PFR2_EL1_MTESTOREONLY |
+ ID_AA64PFR2_EL1_GCIE)),
ID_UNALLOCATED(4,3),
ID_WRITABLE(ID_AA64ZFR0_EL1, ~ID_AA64ZFR0_EL1_RES0),
ID_HIDDEN(ID_AA64SMFR0_EL1),
val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1) & ~ID_AA64PFR0_EL1_GIC;
kvm_set_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1, val);
+ val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR2_EL1) & ~ID_AA64PFR2_EL1_GCIE;
+ kvm_set_vm_id_reg(kvm, SYS_ID_AA64PFR2_EL1, val);
val = kvm_read_vm_id_reg(kvm, SYS_ID_PFR1_EL1) & ~ID_PFR1_EL1_GIC;
kvm_set_vm_id_reg(kvm, SYS_ID_PFR1_EL1, val);
+ } else {
+ /*
+ * Certain userspace software - QEMU - samples the system
+ * register state without creating an irqchip, then blindly
+ * restores the state prior to running the final guest. This
+ * means that it restores the virtualization & emulation
+ * capabilities of the host system, rather than something that
+ * reflects the final guest state. Moreover, it checks that the
+ * state was "correctly" restored (i.e., verbatim), bailing if
+ * it isn't, so masking off invalid state isn't an option.
+ *
+ * On GICv5 hardware that supports FEAT_GCIE_LEGACY we can run
+ * both GICv3- and GICv5-based guests. Therefore, we initially
+ * present both ID_AA64PFR0.GIC and ID_AA64PFR2.GCIE as IMP to
+ * reflect that userspace can create EITHER a vGICv3 or a
+ * vGICv5. This is an architecturally invalid combination, of
+ * course. Once an in-kernel GIC is created, the sysreg state is
+ * updated to reflect the actual, valid configuration.
+ *
+ * Setting both the GIC and GCIE features to IMP unsurprisingly
+ * results in guests falling over, and hence we need to fix up
+ * this mess in KVM. Before running for the first time we yet
+ * again ensure that the GIC and GCIE fields accurately reflect
+ * the actual hardware the guest should see.
+ *
+ * This hack allows legacy QEMU-based GICv3 guests to run
+ * unmodified on compatible GICv5 hosts, and avoids the inverse
+ * problem for GICv5-based guests in the future.
+ */
+ kvm_vgic_finalize_idregs(kvm);
}
if (vcpu_has_nv(vcpu)) {
int kvm_vgic_create(struct kvm *kvm, u32 type)
{
struct kvm_vcpu *vcpu;
- u64 aa64pfr0, pfr1;
unsigned long i;
int ret;
kvm->arch.vgic.implementation_rev = KVM_VGIC_IMP_REV_LATEST;
kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
- aa64pfr0 = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1) & ~ID_AA64PFR0_EL1_GIC;
- pfr1 = kvm_read_vm_id_reg(kvm, SYS_ID_PFR1_EL1) & ~ID_PFR1_EL1_GIC;
-
- if (type == KVM_DEV_TYPE_ARM_VGIC_V2) {
- kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
- } else {
- INIT_LIST_HEAD(&kvm->arch.vgic.rd_regions);
- aa64pfr0 |= SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, GIC, IMP);
- pfr1 |= SYS_FIELD_PREP_ENUM(ID_PFR1_EL1, GIC, GICv3);
- }
-
- kvm_set_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1, aa64pfr0);
- kvm_set_vm_id_reg(kvm, SYS_ID_PFR1_EL1, pfr1);
+ /*
+ * We've now created the GIC. Update the system register state
+ * to accurately reflect what we've created.
+ */
+ kvm_vgic_finalize_idregs(kvm);
kvm_for_each_vcpu(i, vcpu, kvm) {
ret = vgic_allocate_private_irqs_locked(vcpu, type);
return ret;
}
+void kvm_vgic_finalize_idregs(struct kvm *kvm)
+{
+ u32 type = kvm->arch.vgic.vgic_model;
+ u64 aa64pfr0, aa64pfr2, pfr1;
+
+ aa64pfr0 = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1) & ~ID_AA64PFR0_EL1_GIC;
+ aa64pfr2 = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR2_EL1) & ~ID_AA64PFR2_EL1_GCIE;
+ pfr1 = kvm_read_vm_id_reg(kvm, SYS_ID_PFR1_EL1) & ~ID_PFR1_EL1_GIC;
+
+ switch (type) {
+ case KVM_DEV_TYPE_ARM_VGIC_V2:
+ kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
+ break;
+ case KVM_DEV_TYPE_ARM_VGIC_V3:
+ INIT_LIST_HEAD(&kvm->arch.vgic.rd_regions);
+ aa64pfr0 |= SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, GIC, IMP);
+ pfr1 |= SYS_FIELD_PREP_ENUM(ID_PFR1_EL1, GIC, GICv3);
+ break;
+ case KVM_DEV_TYPE_ARM_VGIC_V5:
+ aa64pfr2 |= SYS_FIELD_PREP_ENUM(ID_AA64PFR2_EL1, GCIE, IMP);
+ break;
+ default:
+ WARN_ONCE(1, "Unknown VGIC type!!!\n");
+ }
+
+ kvm_set_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1, aa64pfr0);
+ kvm_set_vm_id_reg(kvm, SYS_ID_AA64PFR2_EL1, aa64pfr2);
+ kvm_set_vm_id_reg(kvm, SYS_ID_PFR1_EL1, pfr1);
+}
+
/* GENERIC PROBE */
void kvm_vgic_cpu_up(void)