#include <nvhe/memory.h>
#include <nvhe/mem_protect.h>
#include <nvhe/mm.h>
+#include <nvhe/trap_handler.h>
#define KVM_HOST_S2_FLAGS (KVM_PGTABLE_S2_AS_S1 | KVM_PGTABLE_S2_IDMAP)
return ret;
}
+static void host_inject_mem_abort(struct kvm_cpu_context *host_ctxt)
+{
+ u64 ec, esr, spsr;
+
+ esr = read_sysreg_el2(SYS_ESR);
+ spsr = read_sysreg_el2(SYS_SPSR);
+
+ /* Repaint the ESR to report a same-level fault if taken from EL1 */
+ if ((spsr & PSR_MODE_MASK) != PSR_MODE_EL0t) {
+ ec = ESR_ELx_EC(esr);
+ if (ec == ESR_ELx_EC_DABT_LOW)
+ ec = ESR_ELx_EC_DABT_CUR;
+ else if (ec == ESR_ELx_EC_IABT_LOW)
+ ec = ESR_ELx_EC_IABT_CUR;
+ else
+ WARN_ON(1);
+ esr &= ~ESR_ELx_EC_MASK;
+ esr |= ec << ESR_ELx_EC_SHIFT;
+ }
+
+ /*
+ * Since S1PTW should only ever be set for stage-2 faults, we're pretty
+ * much guaranteed that it won't be set in ESR_EL1 by the hardware. So,
+ * let's use that bit to allow the host abort handler to differentiate
+ * this abort from normal userspace faults.
+ *
+ * Note: although S1PTW is RES0 at EL1, it is guaranteed by the
+ * architecture to be backed by flops, so it should be safe to use.
+ */
+ esr |= ESR_ELx_S1PTW;
+ inject_host_exception(esr);
+}
+
void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
{
struct kvm_vcpu_fault_info fault;
addr = FIELD_GET(HPFAR_EL2_FIPA, fault.hpfar_el2) << 12;
switch (host_stage2_idmap(addr)) {
+ case -EPERM:
+ host_inject_mem_abort(host_ctxt);
+ fallthrough;
case -EEXIST:
case 0:
break;
#include <asm/system_misc.h>
#include <asm/tlbflush.h>
#include <asm/traps.h>
+#include <asm/virt.h>
struct fault_info {
int (*fn)(unsigned long far, unsigned long esr,
return false;
}
+static bool is_pkvm_stage2_abort(unsigned int esr)
+{
+ /*
+ * S1PTW should only ever be set in ESR_EL1 if the pkvm hypervisor
+ * injected a stage-2 abort -- see host_inject_mem_abort().
+ */
+ return is_pkvm_initialized() && (esr & ESR_ELx_S1PTW);
+}
+
static bool __kprobes is_spurious_el1_translation_fault(unsigned long addr,
unsigned long esr,
struct pt_regs *regs)
if (!is_el1_data_abort(esr) || !esr_fsc_is_translation_fault(esr))
return false;
+ if (is_pkvm_stage2_abort(esr))
+ return false;
+
local_irq_save(flags);
asm volatile("at s1e1r, %0" :: "r" (addr));
isb();
msg = "read from unreadable memory";
} else if (addr < PAGE_SIZE) {
msg = "NULL pointer dereference";
+ } else if (is_pkvm_stage2_abort(esr)) {
+ msg = "access to hypervisor-protected memory";
} else {
if (esr_fsc_is_translation_fault(esr) &&
kfence_handle_page_fault(addr, esr & ESR_ELx_WNR, regs))
addr, esr, regs);
}
+ if (is_pkvm_stage2_abort(esr)) {
+ if (!user_mode(regs))
+ goto no_context;
+ arm64_force_sig_fault(SIGSEGV, SEGV_ACCERR, far, "stage-2 fault");
+ return 0;
+ }
+
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
if (!(mm_flags & FAULT_FLAG_USER))