*/
#include "qemu/osdep.h"
+#include "qemu/lockable.h"
#include "qemu/error-report.h"
+#include "qemu/rcu.h"
#include "linux/mshv.h"
#include "system/address-spaces.h"
#include "system/mshv.h"
#include <sys/ioctl.h>
#include "trace.h"
+typedef struct SlotsRCUReclaim {
+ struct rcu_head rcu;
+ GList *old_head;
+ MshvMemorySlot *removed_slot;
+} SlotsRCUReclaim;
+
+static void rcu_reclaim_slotlist(struct rcu_head *rcu)
+{
+ SlotsRCUReclaim *r = container_of(rcu, SlotsRCUReclaim, rcu);
+ g_list_free(r->old_head);
+ g_free(r->removed_slot);
+ g_free(r);
+}
+
+static void publish_slots(GList *new_head, GList *old_head,
+ MshvMemorySlot *removed_slot)
+{
+ MshvMemorySlotManager *manager = &mshv_state->msm;
+
+ assert(manager);
+ qatomic_store_release(&manager->slots, new_head);
+
+ SlotsRCUReclaim *r = g_new(SlotsRCUReclaim, 1);
+ r->old_head = old_head;
+ r->removed_slot = removed_slot;
+
+ call_rcu1(&r->rcu, rcu_reclaim_slotlist);
+}
+
+/* Needs to be called with mshv_state->msm.mutex held */
+static int remove_slot(MshvMemorySlot *slot)
+{
+ GList *old_head, *new_head;
+ MshvMemorySlotManager *manager = &mshv_state->msm;
+
+ assert(manager);
+ old_head = qatomic_load_acquire(&manager->slots);
+
+ if (!g_list_find(old_head, slot)) {
+ error_report("slot requested for removal not found");
+ return -1;
+ }
+
+ new_head = g_list_copy(old_head);
+ new_head = g_list_remove(new_head, slot);
+ manager->n_slots--;
+
+ publish_slots(new_head, old_head, slot);
+
+ return 0;
+}
+
+/* Needs to be called with mshv_state->msm.mutex held */
+static MshvMemorySlot *append_slot(uint64_t gpa, uint64_t userspace_addr,
+ uint64_t size, bool readonly)
+{
+ GList *old_head, *new_head;
+ MshvMemorySlot *slot;
+ MshvMemorySlotManager *manager = &mshv_state->msm;
+
+ assert(manager);
+
+ old_head = qatomic_load_acquire(&manager->slots);
+
+ if (manager->n_slots >= MSHV_MAX_MEM_SLOTS) {
+ error_report("no free memory slots available");
+ return NULL;
+ }
+
+ slot = g_new0(MshvMemorySlot, 1);
+ slot->guest_phys_addr = gpa;
+ slot->userspace_addr = userspace_addr;
+ slot->memory_size = size;
+ slot->readonly = readonly;
+
+ new_head = g_list_copy(old_head);
+ new_head = g_list_append(new_head, slot);
+ manager->n_slots++;
+
+ publish_slots(new_head, old_head, NULL);
+
+ return slot;
+}
+
+static int slot_overlaps(const MshvMemorySlot *slot1,
+ const MshvMemorySlot *slot2)
+{
+ uint64_t start_1 = slot1->userspace_addr,
+ start_2 = slot2->userspace_addr;
+ size_t len_1 = slot1->memory_size,
+ len_2 = slot2->memory_size;
+
+ if (slot1 == slot2) {
+ return -1;
+ }
+
+ return ranges_overlap(start_1, len_1, start_2, len_2) ? 0 : -1;
+}
+
+static bool is_mapped(MshvMemorySlot *slot)
+{
+ /* Subsequent reads of mapped field see a fully-initialized slot */
+ return qatomic_load_acquire(&slot->mapped);
+}
+
+/*
+ * Find slot that is:
+ * - overlapping in userspace
+ * - currently mapped in the guest
+ *
+ * Needs to be called with mshv_state->msm.mutex or RCU read lock held.
+ */
+static MshvMemorySlot *find_overlap_mem_slot(GList *head, MshvMemorySlot *slot)
+{
+ GList *found;
+ MshvMemorySlot *overlap_slot;
+
+ found = g_list_find_custom(head, slot, (GCompareFunc) slot_overlaps);
+
+ if (!found) {
+ return NULL;
+ }
+
+ overlap_slot = found->data;
+ if (!overlap_slot || !is_mapped(overlap_slot)) {
+ return NULL;
+ }
+
+ return overlap_slot;
+}
+
static int set_guest_memory(int vm_fd,
const struct mshv_user_mem_region *region)
{
ret = ioctl(vm_fd, MSHV_SET_GUEST_MEMORY, region);
if (ret < 0) {
- error_report("failed to set guest memory");
- return -errno;
+ error_report("failed to set guest memory: %s", strerror(errno));
+ return -1;
}
return 0;
}
-static int map_or_unmap(int vm_fd, const MshvMemoryRegion *mr, bool map)
+static int map_or_unmap(int vm_fd, const MshvMemorySlot *slot, bool map)
{
struct mshv_user_mem_region region = {0};
- region.guest_pfn = mr->guest_phys_addr >> MSHV_PAGE_SHIFT;
- region.size = mr->memory_size;
- region.userspace_addr = mr->userspace_addr;
+ region.guest_pfn = slot->guest_phys_addr >> MSHV_PAGE_SHIFT;
+ region.size = slot->memory_size;
+ region.userspace_addr = slot->userspace_addr;
if (!map) {
region.flags |= (1 << MSHV_SET_MEM_BIT_UNMAP);
- trace_mshv_unmap_memory(mr->userspace_addr, mr->guest_phys_addr,
- mr->memory_size);
+ trace_mshv_unmap_memory(slot->userspace_addr, slot->guest_phys_addr,
+ slot->memory_size);
return set_guest_memory(vm_fd, ®ion);
}
region.flags = BIT(MSHV_SET_MEM_BIT_EXECUTABLE);
- if (!mr->readonly) {
+ if (!slot->readonly) {
region.flags |= BIT(MSHV_SET_MEM_BIT_WRITABLE);
}
- trace_mshv_map_memory(mr->userspace_addr, mr->guest_phys_addr,
- mr->memory_size);
+ trace_mshv_map_memory(slot->userspace_addr, slot->guest_phys_addr,
+ slot->memory_size);
return set_guest_memory(vm_fd, ®ion);
}
+static int slot_matches_region(const MshvMemorySlot *slot1,
+ const MshvMemorySlot *slot2)
+{
+ return (slot1->guest_phys_addr == slot2->guest_phys_addr &&
+ slot1->userspace_addr == slot2->userspace_addr &&
+ slot1->memory_size == slot2->memory_size) ? 0 : -1;
+}
+
+/* Needs to be called with mshv_state->msm.mutex held */
+static MshvMemorySlot *find_mem_slot_by_region(uint64_t gpa, uint64_t size,
+ uint64_t userspace_addr)
+{
+ MshvMemorySlot ref_slot = {
+ .guest_phys_addr = gpa,
+ .userspace_addr = userspace_addr,
+ .memory_size = size,
+ };
+ GList *found;
+ MshvMemorySlotManager *manager = &mshv_state->msm;
+
+ assert(manager);
+ found = g_list_find_custom(manager->slots, &ref_slot,
+ (GCompareFunc) slot_matches_region);
+
+ return found ? found->data : NULL;
+}
+
+static int slot_covers_gpa(const MshvMemorySlot *slot, uint64_t *gpa_p)
+{
+ uint64_t gpa_offset, gpa = *gpa_p;
+
+ gpa_offset = gpa - slot->guest_phys_addr;
+ return (slot->guest_phys_addr <= gpa && gpa_offset < slot->memory_size)
+ ? 0 : -1;
+}
+
+/* Needs to be called with mshv_state->msm.mutex or RCU read lock held */
+static MshvMemorySlot *find_mem_slot_by_gpa(GList *head, uint64_t gpa)
+{
+ GList *found;
+ MshvMemorySlot *slot;
+
+ trace_mshv_find_slot_by_gpa(gpa);
+
+ found = g_list_find_custom(head, &gpa, (GCompareFunc) slot_covers_gpa);
+ if (found) {
+ slot = found->data;
+ trace_mshv_found_slot(slot->userspace_addr, slot->guest_phys_addr,
+ slot->memory_size);
+ return slot;
+ }
+
+ return NULL;
+}
+
+/* Needs to be called with mshv_state->msm.mutex held */
+static void set_mapped(MshvMemorySlot *slot, bool mapped)
+{
+ /* prior writes to mapped field becomes visible before readers see slot */
+ qatomic_store_release(&slot->mapped, mapped);
+}
+
+MshvRemapResult mshv_remap_overlap_region(int vm_fd, uint64_t gpa)
+{
+ MshvMemorySlot *gpa_slot, *overlap_slot;
+ GList *head;
+ int ret;
+ MshvMemorySlotManager *manager = &mshv_state->msm;
+
+ /* fast path, called often by unmapped_gpa vm exit */
+ WITH_RCU_READ_LOCK_GUARD() {
+ assert(manager);
+ head = qatomic_load_acquire(&manager->slots);
+ /* return early if no slot is found */
+ gpa_slot = find_mem_slot_by_gpa(head, gpa);
+ if (gpa_slot == NULL) {
+ return MshvRemapNoMapping;
+ }
+
+ /* return early if no overlapping slot is found */
+ overlap_slot = find_overlap_mem_slot(head, gpa_slot);
+ if (overlap_slot == NULL) {
+ return MshvRemapNoOverlap;
+ }
+ }
+
+ /*
+ * We'll modify the mapping list, so we need to upgrade to mutex and
+ * recheck.
+ */
+ assert(manager);
+ QEMU_LOCK_GUARD(&manager->mutex);
+
+ /* return early if no slot is found */
+ gpa_slot = find_mem_slot_by_gpa(manager->slots, gpa);
+ if (gpa_slot == NULL) {
+ return MshvRemapNoMapping;
+ }
+
+ /* return early if no overlapping slot is found */
+ overlap_slot = find_overlap_mem_slot(manager->slots, gpa_slot);
+ if (overlap_slot == NULL) {
+ return MshvRemapNoOverlap;
+ }
+
+ /* unmap overlapping slot */
+ ret = map_or_unmap(vm_fd, overlap_slot, false);
+ if (ret < 0) {
+ error_report("failed to unmap overlap region");
+ abort();
+ }
+ set_mapped(overlap_slot, false);
+ warn_report("mapped out userspace_addr=0x%016lx gpa=0x%010lx size=0x%lx",
+ overlap_slot->userspace_addr,
+ overlap_slot->guest_phys_addr,
+ overlap_slot->memory_size);
+
+ /* map region for gpa */
+ ret = map_or_unmap(vm_fd, gpa_slot, true);
+ if (ret < 0) {
+ error_report("failed to map new region");
+ abort();
+ }
+ set_mapped(gpa_slot, true);
+ warn_report("mapped in userspace_addr=0x%016lx gpa=0x%010lx size=0x%lx",
+ gpa_slot->userspace_addr, gpa_slot->guest_phys_addr,
+ gpa_slot->memory_size);
+
+ return MshvRemapOk;
+}
+
static int handle_unmapped_mmio_region_read(uint64_t gpa, uint64_t size,
uint8_t *data)
{
return -1;
}
-static int set_memory(const MshvMemoryRegion *mshv_mr, bool add)
+static int tracked_unmap(int vm_fd, uint64_t gpa, uint64_t size,
+ uint64_t userspace_addr)
{
- int ret = 0;
+ int ret;
+ MshvMemorySlot *slot;
+ MshvMemorySlotManager *manager = &mshv_state->msm;
+
+ assert(manager);
+
+ QEMU_LOCK_GUARD(&manager->mutex);
+
+ slot = find_mem_slot_by_region(gpa, size, userspace_addr);
+ if (!slot) {
+ trace_mshv_skip_unset_mem(userspace_addr, gpa, size);
+ /* no work to do */
+ return 0;
+ }
+
+ if (!is_mapped(slot)) {
+ /* remove slot, no need to unmap */
+ return remove_slot(slot);
+ }
- if (!mshv_mr) {
- error_report("Invalid mshv_mr");
+ ret = map_or_unmap(vm_fd, slot, false);
+ if (ret < 0) {
+ error_report("failed to unmap memory region");
+ return ret;
+ }
+ return remove_slot(slot);
+}
+
+static int tracked_map(int vm_fd, uint64_t gpa, uint64_t size, bool readonly,
+ uint64_t userspace_addr)
+{
+ MshvMemorySlot *slot, *overlap_slot;
+ int ret;
+ MshvMemorySlotManager *manager = &mshv_state->msm;
+
+ assert(manager);
+
+ QEMU_LOCK_GUARD(&manager->mutex);
+
+ slot = find_mem_slot_by_region(gpa, size, userspace_addr);
+ if (slot) {
+ error_report("memory region already mapped at gpa=0x%lx, "
+ "userspace_addr=0x%lx, size=0x%lx",
+ slot->guest_phys_addr, slot->userspace_addr,
+ slot->memory_size);
return -1;
}
- trace_mshv_set_memory(add, mshv_mr->guest_phys_addr,
- mshv_mr->memory_size,
- mshv_mr->userspace_addr, mshv_mr->readonly,
- ret);
- return map_or_unmap(mshv_state->vm, mshv_mr, add);
+ slot = append_slot(gpa, userspace_addr, size, readonly);
+
+ overlap_slot = find_overlap_mem_slot(manager->slots, slot);
+ if (overlap_slot) {
+ trace_mshv_remap_attempt(slot->userspace_addr,
+ slot->guest_phys_addr,
+ slot->memory_size);
+ warn_report("attempt to map region [0x%lx-0x%lx], while "
+ "[0x%lx-0x%lx] is already mapped in the guest",
+ userspace_addr, userspace_addr + size - 1,
+ overlap_slot->userspace_addr,
+ overlap_slot->userspace_addr +
+ overlap_slot->memory_size - 1);
+
+ /* do not register mem slot in hv, but record for later swap-in */
+ set_mapped(slot, false);
+
+ return 0;
+ }
+
+ ret = map_or_unmap(vm_fd, slot, true);
+ if (ret < 0) {
+ error_report("failed to map memory region");
+ return -1;
+ }
+ set_mapped(slot, true);
+
+ return 0;
+}
+
+static int set_memory(uint64_t gpa, uint64_t size, bool readonly,
+ uint64_t userspace_addr, bool add)
+{
+ int vm_fd = mshv_state->vm;
+
+ if (add) {
+ return tracked_map(vm_fd, gpa, size, readonly, userspace_addr);
+ }
+
+ return tracked_unmap(vm_fd, gpa, size, userspace_addr);
}
/*
bool writable = !area->readonly && !area->rom_device;
hwaddr start_addr, mr_offset, size;
void *ram;
- MshvMemoryRegion mshv_mr = {0};
+
+ size = align_section(section, &start_addr);
+ trace_mshv_set_phys_mem(add, section->mr->name, start_addr);
size = align_section(section, &start_addr);
trace_mshv_set_phys_mem(add, section->mr->name, start_addr);
ram = memory_region_get_ram_ptr(area) + mr_offset;
- mshv_mr.guest_phys_addr = start_addr;
- mshv_mr.memory_size = size;
- mshv_mr.readonly = !writable;
- mshv_mr.userspace_addr = (uint64_t)ram;
-
- ret = set_memory(&mshv_mr, add);
+ ret = set_memory(start_addr, size, !writable, (uint64_t)ram, add);
if (ret < 0) {
- error_report("Failed to set memory region");
+ error_report("failed to set memory region");
abort();
}
}
+
+void mshv_init_memory_slot_manager(MshvState *mshv_state)
+{
+ MshvMemorySlotManager *manager;
+
+ assert(mshv_state);
+ manager = &mshv_state->msm;
+
+ manager->n_slots = 0;
+ manager->slots = NULL;
+ qemu_mutex_init(&manager->mutex);
+}