From 159b7eede32ea403f9dfdd66f2f7bc8403377e3c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 22 Apr 2018 09:30:31 +0200 Subject: [PATCH] 4.4-stable patches added patches: ext4-don-t-allow-r-w-mounts-if-metadata-blocks-overlap-the-superblock.patch vfio-pci-virtualize-maximum-payload-size.patch vfio-pci-virtualize-maximum-read-request-size.patch vfio-pci-virtualize-pcie-af-flr.patch --- ...tadata-blocks-overlap-the-superblock.patch | 57 +++++++ queue-4.4/series | 4 + ...-pci-virtualize-maximum-payload-size.patch | 46 ++++++ ...virtualize-maximum-read-request-size.patch | 80 ++++++++++ .../vfio-pci-virtualize-pcie-af-flr.patch | 146 ++++++++++++++++++ 5 files changed, 333 insertions(+) create mode 100644 queue-4.4/ext4-don-t-allow-r-w-mounts-if-metadata-blocks-overlap-the-superblock.patch create mode 100644 queue-4.4/vfio-pci-virtualize-maximum-payload-size.patch create mode 100644 queue-4.4/vfio-pci-virtualize-maximum-read-request-size.patch create mode 100644 queue-4.4/vfio-pci-virtualize-pcie-af-flr.patch diff --git a/queue-4.4/ext4-don-t-allow-r-w-mounts-if-metadata-blocks-overlap-the-superblock.patch b/queue-4.4/ext4-don-t-allow-r-w-mounts-if-metadata-blocks-overlap-the-superblock.patch new file mode 100644 index 00000000000..7870dd76887 --- /dev/null +++ b/queue-4.4/ext4-don-t-allow-r-w-mounts-if-metadata-blocks-overlap-the-superblock.patch @@ -0,0 +1,57 @@ +From 18db4b4e6fc31eda838dd1c1296d67dbcb3dc957 Mon Sep 17 00:00:00 2001 +From: Theodore Ts'o +Date: Thu, 29 Mar 2018 22:10:35 -0400 +Subject: ext4: don't allow r/w mounts if metadata blocks overlap the superblock + +From: Theodore Ts'o + +commit 18db4b4e6fc31eda838dd1c1296d67dbcb3dc957 upstream. + +If some metadata block, such as an allocation bitmap, overlaps the +superblock, it's very likely that if the file system is mounted +read/write, the results will not be pretty. So disallow r/w mounts +for file systems corrupted in this particular way. + +Backport notes: +3.18.y is missing bc98a42c1f7d ("VFS: Convert sb->s_flags & MS_RDONLY to sb_rdonly(sb)") +and e462ec50cb5f ("VFS: Differentiate mount flags (MS_*) from internal superblock flags") +so we simply use the sb MS_RDONLY check from pre bc98a42c1f7d in place of the sb_rdonly +function used in the upstream variant of the patch. + +Signed-off-by: Theodore Ts'o +Cc: stable@vger.kernel.org +Signed-off-by: Harsh Shandilya +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/super.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -2131,6 +2131,8 @@ static int ext4_check_descriptors(struct + ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " + "Block bitmap for group %u overlaps " + "superblock", i); ++ if (!(sb->s_flags & MS_RDONLY)) ++ return 0; + } + if (block_bitmap < first_block || block_bitmap > last_block) { + ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " +@@ -2143,6 +2145,8 @@ static int ext4_check_descriptors(struct + ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " + "Inode bitmap for group %u overlaps " + "superblock", i); ++ if (!(sb->s_flags & MS_RDONLY)) ++ return 0; + } + if (inode_bitmap < first_block || inode_bitmap > last_block) { + ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " +@@ -2155,6 +2159,8 @@ static int ext4_check_descriptors(struct + ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " + "Inode table for group %u overlaps " + "superblock", i); ++ if (!(sb->s_flags & MS_RDONLY)) ++ return 0; + } + if (inode_table < first_block || + inode_table + sbi->s_itb_per_group - 1 > last_block) { diff --git a/queue-4.4/series b/queue-4.4/series index 19ad99de2d4..3cf2ecd7b0e 100644 --- a/queue-4.4/series +++ b/queue-4.4/series @@ -68,3 +68,7 @@ alsa-pcm-avoid-potential-races-between-oss-ioctls-and-read-write.patch alsa-pcm-return-ebusy-for-oss-ioctls-changing-busy-streams.patch alsa-pcm-fix-mutex-unbalance-in-oss-emulation-ioctls.patch alsa-pcm-fix-endless-loop-for-xrun-recovery-in-oss-emulation.patch +vfio-pci-virtualize-pcie-af-flr.patch +vfio-pci-virtualize-maximum-payload-size.patch +vfio-pci-virtualize-maximum-read-request-size.patch +ext4-don-t-allow-r-w-mounts-if-metadata-blocks-overlap-the-superblock.patch diff --git a/queue-4.4/vfio-pci-virtualize-maximum-payload-size.patch b/queue-4.4/vfio-pci-virtualize-maximum-payload-size.patch new file mode 100644 index 00000000000..f01c94b7863 --- /dev/null +++ b/queue-4.4/vfio-pci-virtualize-maximum-payload-size.patch @@ -0,0 +1,46 @@ +From 523184972b282cd9ca17a76f6ca4742394856818 Mon Sep 17 00:00:00 2001 +From: Alex Williamson +Date: Mon, 2 Oct 2017 12:39:09 -0600 +Subject: vfio/pci: Virtualize Maximum Payload Size + +From: Alex Williamson + +commit 523184972b282cd9ca17a76f6ca4742394856818 upstream. + +With virtual PCI-Express chipsets, we now see userspace/guest drivers +trying to match the physical MPS setting to a virtual downstream port. +Of course a lone physical device surrounded by virtual interconnects +cannot make a correct decision for a proper MPS setting. Instead, +let's virtualize the MPS control register so that writes through to +hardware are disallowed. Userspace drivers like QEMU assume they can +write anything to the device and we'll filter out anything dangerous. +Since mismatched MPS can lead to AER and other faults, let's add it +to the kernel side rather than relying on userspace virtualization to +handle it. + +Signed-off-by: Alex Williamson +Reviewed-by: Eric Auger +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/vfio/pci/vfio_pci_config.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/drivers/vfio/pci/vfio_pci_config.c ++++ b/drivers/vfio/pci/vfio_pci_config.c +@@ -799,11 +799,13 @@ static int __init init_pci_cap_exp_perm( + + /* + * Allow writes to device control fields, except devctl_phantom, +- * which could confuse IOMMU, and the ARI bit in devctl2, which ++ * which could confuse IOMMU, MPS, which can break communication ++ * with other physical devices, and the ARI bit in devctl2, which + * is set at probe time. FLR gets virtualized via our writefn. + */ + p_setw(perm, PCI_EXP_DEVCTL, +- PCI_EXP_DEVCTL_BCR_FLR, ~PCI_EXP_DEVCTL_PHANTOM); ++ PCI_EXP_DEVCTL_BCR_FLR | PCI_EXP_DEVCTL_PAYLOAD, ++ ~PCI_EXP_DEVCTL_PHANTOM); + p_setw(perm, PCI_EXP_DEVCTL2, NO_VIRT, ~PCI_EXP_DEVCTL2_ARI); + return 0; + } diff --git a/queue-4.4/vfio-pci-virtualize-maximum-read-request-size.patch b/queue-4.4/vfio-pci-virtualize-maximum-read-request-size.patch new file mode 100644 index 00000000000..d5f969dae16 --- /dev/null +++ b/queue-4.4/vfio-pci-virtualize-maximum-read-request-size.patch @@ -0,0 +1,80 @@ +From cf0d53ba4947aad6e471491d5b20a567cbe92e56 Mon Sep 17 00:00:00 2001 +From: Alex Williamson +Date: Mon, 2 Oct 2017 12:39:10 -0600 +Subject: vfio/pci: Virtualize Maximum Read Request Size + +From: Alex Williamson + +commit cf0d53ba4947aad6e471491d5b20a567cbe92e56 upstream. + +MRRS defines the maximum read request size a device is allowed to +make. Drivers will often increase this to allow more data transfer +with a single request. Completions to this request are bound by the +MPS setting for the bus. Aside from device quirks (none known), it +doesn't seem to make sense to set an MRRS value less than MPS, yet +this is a likely scenario given that user drivers do not have a +system-wide view of the PCI topology. Virtualize MRRS such that the +user can set MRRS >= MPS, but use MPS as the floor value that we'll +write to hardware. + +Signed-off-by: Alex Williamson +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/vfio/pci/vfio_pci_config.c | 29 ++++++++++++++++++++++++++--- + 1 file changed, 26 insertions(+), 3 deletions(-) + +--- a/drivers/vfio/pci/vfio_pci_config.c ++++ b/drivers/vfio/pci/vfio_pci_config.c +@@ -758,6 +758,7 @@ static int vfio_exp_config_write(struct + { + __le16 *ctrl = (__le16 *)(vdev->vconfig + pos - + offset + PCI_EXP_DEVCTL); ++ int readrq = le16_to_cpu(*ctrl) & PCI_EXP_DEVCTL_READRQ; + + count = vfio_default_config_write(vdev, pos, count, perm, offset, val); + if (count < 0) +@@ -783,6 +784,27 @@ static int vfio_exp_config_write(struct + pci_try_reset_function(vdev->pdev); + } + ++ /* ++ * MPS is virtualized to the user, writes do not change the physical ++ * register since determining a proper MPS value requires a system wide ++ * device view. The MRRS is largely independent of MPS, but since the ++ * user does not have that system-wide view, they might set a safe, but ++ * inefficiently low value. Here we allow writes through to hardware, ++ * but we set the floor to the physical device MPS setting, so that ++ * we can at least use full TLPs, as defined by the MPS value. ++ * ++ * NB, if any devices actually depend on an artificially low MRRS ++ * setting, this will need to be revisited, perhaps with a quirk ++ * though pcie_set_readrq(). ++ */ ++ if (readrq != (le16_to_cpu(*ctrl) & PCI_EXP_DEVCTL_READRQ)) { ++ readrq = 128 << ++ ((le16_to_cpu(*ctrl) & PCI_EXP_DEVCTL_READRQ) >> 12); ++ readrq = max(readrq, pcie_get_mps(vdev->pdev)); ++ ++ pcie_set_readrq(vdev->pdev, readrq); ++ } ++ + return count; + } + +@@ -801,11 +823,12 @@ static int __init init_pci_cap_exp_perm( + * Allow writes to device control fields, except devctl_phantom, + * which could confuse IOMMU, MPS, which can break communication + * with other physical devices, and the ARI bit in devctl2, which +- * is set at probe time. FLR gets virtualized via our writefn. ++ * is set at probe time. FLR and MRRS get virtualized via our ++ * writefn. + */ + p_setw(perm, PCI_EXP_DEVCTL, +- PCI_EXP_DEVCTL_BCR_FLR | PCI_EXP_DEVCTL_PAYLOAD, +- ~PCI_EXP_DEVCTL_PHANTOM); ++ PCI_EXP_DEVCTL_BCR_FLR | PCI_EXP_DEVCTL_PAYLOAD | ++ PCI_EXP_DEVCTL_READRQ, ~PCI_EXP_DEVCTL_PHANTOM); + p_setw(perm, PCI_EXP_DEVCTL2, NO_VIRT, ~PCI_EXP_DEVCTL2_ARI); + return 0; + } diff --git a/queue-4.4/vfio-pci-virtualize-pcie-af-flr.patch b/queue-4.4/vfio-pci-virtualize-pcie-af-flr.patch new file mode 100644 index 00000000000..f5007f4cad0 --- /dev/null +++ b/queue-4.4/vfio-pci-virtualize-pcie-af-flr.patch @@ -0,0 +1,146 @@ +From ddf9dc0eb5314d6dac8b19b1cc37c739c6896e7e Mon Sep 17 00:00:00 2001 +From: Alex Williamson +Date: Mon, 26 Sep 2016 13:52:16 -0600 +Subject: vfio-pci: Virtualize PCIe & AF FLR + +From: Alex Williamson + +commit ddf9dc0eb5314d6dac8b19b1cc37c739c6896e7e upstream. + +We use a BAR restore trick to try to detect when a user has performed +a device reset, possibly through FLR or other backdoors, to put things +back into a working state. This is important for backdoor resets, but +we can actually just virtualize the "front door" resets provided via +PCIe and AF FLR. Set these bits as virtualized + writable, allowing +the default write to set them in vconfig, then we can simply check the +bit, perform an FLR of our own, and clear the bit. We don't actually +have the granularity in PCI to specify the type of reset we want to +do, but generally devices don't implement both PCIe and AF FLR and +we'll favor these over other types of reset, so we should generally +lineup. We do test whether the device provides the requested FLR type +to stay consistent with hardware capabilities though. + +This seems to fix several instance of devices getting into bad states +with userspace drivers, like dpdk, running inside a VM. + +Signed-off-by: Alex Williamson +Reviewed-by: Greg Rose +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/vfio/pci/vfio_pci_config.c | 82 ++++++++++++++++++++++++++++++++++--- + 1 file changed, 77 insertions(+), 5 deletions(-) + +--- a/drivers/vfio/pci/vfio_pci_config.c ++++ b/drivers/vfio/pci/vfio_pci_config.c +@@ -752,6 +752,40 @@ static int __init init_pci_cap_pcix_perm + return 0; + } + ++static int vfio_exp_config_write(struct vfio_pci_device *vdev, int pos, ++ int count, struct perm_bits *perm, ++ int offset, __le32 val) ++{ ++ __le16 *ctrl = (__le16 *)(vdev->vconfig + pos - ++ offset + PCI_EXP_DEVCTL); ++ ++ count = vfio_default_config_write(vdev, pos, count, perm, offset, val); ++ if (count < 0) ++ return count; ++ ++ /* ++ * The FLR bit is virtualized, if set and the device supports PCIe ++ * FLR, issue a reset_function. Regardless, clear the bit, the spec ++ * requires it to be always read as zero. NB, reset_function might ++ * not use a PCIe FLR, we don't have that level of granularity. ++ */ ++ if (*ctrl & cpu_to_le16(PCI_EXP_DEVCTL_BCR_FLR)) { ++ u32 cap; ++ int ret; ++ ++ *ctrl &= ~cpu_to_le16(PCI_EXP_DEVCTL_BCR_FLR); ++ ++ ret = pci_user_read_config_dword(vdev->pdev, ++ pos - offset + PCI_EXP_DEVCAP, ++ &cap); ++ ++ if (!ret && (cap & PCI_EXP_DEVCAP_FLR)) ++ pci_try_reset_function(vdev->pdev); ++ } ++ ++ return count; ++} ++ + /* Permissions for PCI Express capability */ + static int __init init_pci_cap_exp_perm(struct perm_bits *perm) + { +@@ -759,26 +793,64 @@ static int __init init_pci_cap_exp_perm( + if (alloc_perm_bits(perm, PCI_CAP_EXP_ENDPOINT_SIZEOF_V2)) + return -ENOMEM; + ++ perm->writefn = vfio_exp_config_write; ++ + p_setb(perm, PCI_CAP_LIST_NEXT, (u8)ALL_VIRT, NO_WRITE); + + /* +- * Allow writes to device control fields (includes FLR!) +- * but not to devctl_phantom which could confuse IOMMU +- * or to the ARI bit in devctl2 which is set at probe time ++ * Allow writes to device control fields, except devctl_phantom, ++ * which could confuse IOMMU, and the ARI bit in devctl2, which ++ * is set at probe time. FLR gets virtualized via our writefn. + */ +- p_setw(perm, PCI_EXP_DEVCTL, NO_VIRT, ~PCI_EXP_DEVCTL_PHANTOM); ++ p_setw(perm, PCI_EXP_DEVCTL, ++ PCI_EXP_DEVCTL_BCR_FLR, ~PCI_EXP_DEVCTL_PHANTOM); + p_setw(perm, PCI_EXP_DEVCTL2, NO_VIRT, ~PCI_EXP_DEVCTL2_ARI); + return 0; + } + ++static int vfio_af_config_write(struct vfio_pci_device *vdev, int pos, ++ int count, struct perm_bits *perm, ++ int offset, __le32 val) ++{ ++ u8 *ctrl = vdev->vconfig + pos - offset + PCI_AF_CTRL; ++ ++ count = vfio_default_config_write(vdev, pos, count, perm, offset, val); ++ if (count < 0) ++ return count; ++ ++ /* ++ * The FLR bit is virtualized, if set and the device supports AF ++ * FLR, issue a reset_function. Regardless, clear the bit, the spec ++ * requires it to be always read as zero. NB, reset_function might ++ * not use an AF FLR, we don't have that level of granularity. ++ */ ++ if (*ctrl & PCI_AF_CTRL_FLR) { ++ u8 cap; ++ int ret; ++ ++ *ctrl &= ~PCI_AF_CTRL_FLR; ++ ++ ret = pci_user_read_config_byte(vdev->pdev, ++ pos - offset + PCI_AF_CAP, ++ &cap); ++ ++ if (!ret && (cap & PCI_AF_CAP_FLR) && (cap & PCI_AF_CAP_TP)) ++ pci_try_reset_function(vdev->pdev); ++ } ++ ++ return count; ++} ++ + /* Permissions for Advanced Function capability */ + static int __init init_pci_cap_af_perm(struct perm_bits *perm) + { + if (alloc_perm_bits(perm, pci_cap_length[PCI_CAP_ID_AF])) + return -ENOMEM; + ++ perm->writefn = vfio_af_config_write; ++ + p_setb(perm, PCI_CAP_LIST_NEXT, (u8)ALL_VIRT, NO_WRITE); +- p_setb(perm, PCI_AF_CTRL, NO_VIRT, PCI_AF_CTRL_FLR); ++ p_setb(perm, PCI_AF_CTRL, PCI_AF_CTRL_FLR, PCI_AF_CTRL_FLR); + return 0; + } + -- 2.47.3