From: Damien Bergamini Date: Fri, 10 Oct 2025 04:55:55 +0000 (+0900) Subject: pcie_sriov: Fix broken MMIO accesses from SR-IOV VFs X-Git-Tag: v10.0.6~11 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=47f00fd978c671e232d77404c2c4bc7c5a514c60;p=thirdparty%2Fqemu.git pcie_sriov: Fix broken MMIO accesses from SR-IOV VFs Starting with commit cab1398a60eb, SR-IOV VFs are realized as soon as pcie_sriov_pf_init() is called. Because pcie_sriov_pf_init() must be called before pcie_sriov_pf_init_vf_bar(), the VF BARs types won't be known when the VF realize function calls pcie_sriov_vf_register_bar(). This breaks the memory regions of the VFs (for instance with igbvf): $ lspci ... Region 0: Memory at 281a00000 (64-bit, prefetchable) [virtual] [size=16K] Region 3: Memory at 281a20000 (64-bit, prefetchable) [virtual] [size=16K] $ info mtree ... address-space: pci_bridge_pci_mem 0000000000000000-ffffffffffffffff (prio 0, i/o): pci_bridge_pci 0000000081a00000-0000000081a03fff (prio 1, i/o): igbvf-mmio 0000000081a20000-0000000081a23fff (prio 1, i/o): igbvf-msix and causes MMIO accesses to fail: Invalid write at addr 0x281A01520, size 4, region '(null)', reason: rejected Invalid read at addr 0x281A00C40, size 4, region '(null)', reason: rejected To fix this, VF BARs are now registered with pci_register_bar() which has a type parameter and pcie_sriov_vf_register_bar() is removed. Fixes: cab1398a60eb ("pcie_sriov: Reuse SR-IOV VF device instances") Signed-off-by: Damien Bergamini Signed-off-by: Clement Mathieu--Drif Reviewed-by: Akihiko Odaki Reviewed-by: Michael S. Tsirkin Message-ID: <20250901151314.1038020-1-clement.mathieu--drif@eviden.com> Signed-off-by: Michael S. Tsirkin (cherry picked from commit 2e54e5fda779a7ba45578884276dca62462f7a06) (This is a back-port of commit 2e54e5fda779 to 10.0.x) Signed-off-by: Akihiko Odaki Signed-off-by: Michael Tokarev --- diff --git a/docs/pcie_sriov.txt b/docs/pcie_sriov.txt index ab2142807f..00d7bd93fd 100644 --- a/docs/pcie_sriov.txt +++ b/docs/pcie_sriov.txt @@ -72,8 +72,7 @@ setting up a BAR for a VF. 2) Similarly in the implementation of the virtual function, you need to make it a PCI Express device and add a similar set of capabilities except for the SR/IOV capability. Then you need to set up the VF BARs as - subregions of the PFs SR/IOV VF BARs by calling - pcie_sriov_vf_register_bar() instead of the normal pci_register_bar() call: + subregions of the PFs SR/IOV VF BARs by calling pci_register_bar(): pci_your_vf_dev_realize( ... ) { @@ -83,7 +82,7 @@ setting up a BAR for a VF. pcie_ari_init(d, 0x100); ... memory_region_init(mr, ... ) - pcie_sriov_vf_register_bar(d, bar_nr, mr); + pci_register_bar(d, bar_nr, bar_type, mr); ... } diff --git a/hw/net/igbvf.c b/hw/net/igbvf.c index 21a97d4d61..575410d6cc 100644 --- a/hw/net/igbvf.c +++ b/hw/net/igbvf.c @@ -251,10 +251,12 @@ static void igbvf_pci_realize(PCIDevice *dev, Error **errp) memory_region_init_io(&s->mmio, OBJECT(dev), &mmio_ops, s, "igbvf-mmio", IGBVF_MMIO_SIZE); - pcie_sriov_vf_register_bar(dev, IGBVF_MMIO_BAR_IDX, &s->mmio); + pci_register_bar(dev, IGBVF_MMIO_BAR_IDX, PCI_BASE_ADDRESS_MEM_TYPE_64 | + PCI_BASE_ADDRESS_MEM_PREFETCH, &s->mmio); memory_region_init(&s->msix, OBJECT(dev), "igbvf-msix", IGBVF_MSIX_SIZE); - pcie_sriov_vf_register_bar(dev, IGBVF_MSIX_BAR_IDX, &s->msix); + pci_register_bar(dev, IGBVF_MSIX_BAR_IDX, PCI_BASE_ADDRESS_MEM_TYPE_64 | + PCI_BASE_ADDRESS_MEM_PREFETCH, &s->msix); ret = msix_init(dev, IGBVF_MSIX_VEC_NUM, &s->msix, IGBVF_MSIX_BAR_IDX, 0, &s->msix, IGBVF_MSIX_BAR_IDX, 0x2000, 0x70, errp); diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 67cb95c809..c93039ba23 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -8708,12 +8708,8 @@ static bool nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp) msix_table_offset); memory_region_add_subregion(&n->bar0, 0, &n->iomem); - if (pci_is_vf(pci_dev)) { - pcie_sriov_vf_register_bar(pci_dev, 0, &n->bar0); - } else { - pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY | - PCI_BASE_ADDRESS_MEM_TYPE_64, &n->bar0); - } + pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY | + PCI_BASE_ADDRESS_MEM_TYPE_64, &n->bar0); ret = msix_init(pci_dev, nr_vectors, &n->bar0, 0, msix_table_offset, diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 503a897528..57c608c368 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -1470,7 +1470,6 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num, pcibus_t size = memory_region_size(memory); uint8_t hdr_type; - assert(!pci_is_vf(pci_dev)); /* VFs must use pcie_sriov_vf_register_bar */ assert(region_num >= 0); assert(region_num < PCI_NUM_REGIONS); assert(is_power_of_2(size)); @@ -1482,7 +1481,6 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num, r = &pci_dev->io_regions[region_num]; assert(!r->size); - r->addr = PCI_BAR_UNMAPPED; r->size = size; r->type = type; r->memory = memory; @@ -1490,22 +1488,32 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num, ? pci_get_bus(pci_dev)->address_space_io : pci_get_bus(pci_dev)->address_space_mem; - wmask = ~(size - 1); - if (region_num == PCI_ROM_SLOT) { - /* ROM enable bit is writable */ - wmask |= PCI_ROM_ADDRESS_ENABLE; - } + if (pci_is_vf(pci_dev)) { + r->addr = pci_bar_address(pci_dev, region_num, r->type, r->size); + if (r->addr != PCI_BAR_UNMAPPED) { + memory_region_add_subregion_overlap(r->address_space, + r->addr, r->memory, 1); + } + } else { + r->addr = PCI_BAR_UNMAPPED; - addr = pci_bar(pci_dev, region_num); - pci_set_long(pci_dev->config + addr, type); + wmask = ~(size - 1); + if (region_num == PCI_ROM_SLOT) { + /* ROM enable bit is writable */ + wmask |= PCI_ROM_ADDRESS_ENABLE; + } - if (!(r->type & PCI_BASE_ADDRESS_SPACE_IO) && - r->type & PCI_BASE_ADDRESS_MEM_TYPE_64) { - pci_set_quad(pci_dev->wmask + addr, wmask); - pci_set_quad(pci_dev->cmask + addr, ~0ULL); - } else { - pci_set_long(pci_dev->wmask + addr, wmask & 0xffffffff); - pci_set_long(pci_dev->cmask + addr, 0xffffffff); + addr = pci_bar(pci_dev, region_num); + pci_set_long(pci_dev->config + addr, type); + + if (!(r->type & PCI_BASE_ADDRESS_SPACE_IO) && + r->type & PCI_BASE_ADDRESS_MEM_TYPE_64) { + pci_set_quad(pci_dev->wmask + addr, wmask); + pci_set_quad(pci_dev->cmask + addr, ~0ULL); + } else { + pci_set_long(pci_dev->wmask + addr, wmask & 0xffffffff); + pci_set_long(pci_dev->cmask + addr, 0xffffffff); + } } } diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c index 6281bd61d9..17bbfc568f 100644 --- a/hw/pci/pcie_sriov.c +++ b/hw/pci/pcie_sriov.c @@ -146,42 +146,6 @@ void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num, dev->exp.sriov_pf.vf_bar_type[region_num] = type; } -void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num, - MemoryRegion *memory) -{ - PCIIORegion *r; - PCIBus *bus = pci_get_bus(dev); - uint8_t type; - pcibus_t size = memory_region_size(memory); - - assert(pci_is_vf(dev)); /* PFs must use pci_register_bar */ - assert(region_num >= 0); - assert(region_num < PCI_NUM_REGIONS); - type = dev->exp.sriov_vf.pf->exp.sriov_pf.vf_bar_type[region_num]; - - if (!is_power_of_2(size)) { - error_report("%s: PCI region size must be a power" - " of two - type=0x%x, size=0x%"FMT_PCIBUS, - __func__, type, size); - exit(1); - } - - r = &dev->io_regions[region_num]; - r->memory = memory; - r->address_space = - type & PCI_BASE_ADDRESS_SPACE_IO - ? bus->address_space_io - : bus->address_space_mem; - r->size = size; - r->type = type; - - r->addr = pci_bar_address(dev, region_num, r->type, r->size); - if (r->addr != PCI_BAR_UNMAPPED) { - memory_region_add_subregion_overlap(r->address_space, - r->addr, r->memory, 1); - } -} - static void register_vfs(PCIDevice *dev) { uint16_t num_vfs; diff --git a/include/hw/pci/pcie_sriov.h b/include/hw/pci/pcie_sriov.h index c5d2d318d3..e9cb0a3248 100644 --- a/include/hw/pci/pcie_sriov.h +++ b/include/hw/pci/pcie_sriov.h @@ -36,10 +36,6 @@ void pcie_sriov_pf_exit(PCIDevice *dev); void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num, uint8_t type, dma_addr_t size); -/* Instantiate a bar for a VF */ -void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num, - MemoryRegion *memory); - /* * Default (minimal) page size support values * as required by the SR/IOV standard: