]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.4-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 22 Apr 2018 07:30:31 +0000 (09:30 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 22 Apr 2018 07:30:31 +0000 (09:30 +0200)
added patches:
ext4-don-t-allow-r-w-mounts-if-metadata-blocks-overlap-the-superblock.patch
vfio-pci-virtualize-maximum-payload-size.patch
vfio-pci-virtualize-maximum-read-request-size.patch
vfio-pci-virtualize-pcie-af-flr.patch

queue-4.4/ext4-don-t-allow-r-w-mounts-if-metadata-blocks-overlap-the-superblock.patch [new file with mode: 0644]
queue-4.4/series
queue-4.4/vfio-pci-virtualize-maximum-payload-size.patch [new file with mode: 0644]
queue-4.4/vfio-pci-virtualize-maximum-read-request-size.patch [new file with mode: 0644]
queue-4.4/vfio-pci-virtualize-pcie-af-flr.patch [new file with mode: 0644]

diff --git a/queue-4.4/ext4-don-t-allow-r-w-mounts-if-metadata-blocks-overlap-the-superblock.patch b/queue-4.4/ext4-don-t-allow-r-w-mounts-if-metadata-blocks-overlap-the-superblock.patch
new file mode 100644 (file)
index 0000000..7870dd7
--- /dev/null
@@ -0,0 +1,57 @@
+From 18db4b4e6fc31eda838dd1c1296d67dbcb3dc957 Mon Sep 17 00:00:00 2001
+From: Theodore Ts'o <tytso@mit.edu>
+Date: Thu, 29 Mar 2018 22:10:35 -0400
+Subject: ext4: don't allow r/w mounts if metadata blocks overlap the superblock
+
+From: Theodore Ts'o <tytso@mit.edu>
+
+commit 18db4b4e6fc31eda838dd1c1296d67dbcb3dc957 upstream.
+
+If some metadata block, such as an allocation bitmap, overlaps the
+superblock, it's very likely that if the file system is mounted
+read/write, the results will not be pretty.  So disallow r/w mounts
+for file systems corrupted in this particular way.
+
+Backport notes:
+3.18.y is missing bc98a42c1f7d ("VFS: Convert sb->s_flags & MS_RDONLY to sb_rdonly(sb)")
+and e462ec50cb5f ("VFS: Differentiate mount flags (MS_*) from internal superblock flags")
+so we simply use the sb MS_RDONLY check from pre bc98a42c1f7d in place of the sb_rdonly
+function used in the upstream variant of the patch.
+
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Cc: stable@vger.kernel.org
+Signed-off-by: Harsh Shandilya <harsh@prjkt.io>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/super.c |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -2131,6 +2131,8 @@ static int ext4_check_descriptors(struct
+                       ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
+                                "Block bitmap for group %u overlaps "
+                                "superblock", i);
++                      if (!(sb->s_flags & MS_RDONLY))
++                              return 0;
+               }
+               if (block_bitmap < first_block || block_bitmap > last_block) {
+                       ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
+@@ -2143,6 +2145,8 @@ static int ext4_check_descriptors(struct
+                       ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
+                                "Inode bitmap for group %u overlaps "
+                                "superblock", i);
++                      if (!(sb->s_flags & MS_RDONLY))
++                              return 0;
+               }
+               if (inode_bitmap < first_block || inode_bitmap > last_block) {
+                       ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
+@@ -2155,6 +2159,8 @@ static int ext4_check_descriptors(struct
+                       ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
+                                "Inode table for group %u overlaps "
+                                "superblock", i);
++                      if (!(sb->s_flags & MS_RDONLY))
++                              return 0;
+               }
+               if (inode_table < first_block ||
+                   inode_table + sbi->s_itb_per_group - 1 > last_block) {
index 19ad99de2d449be158bdc88c5c0389448d87b84a..3cf2ecd7b0e5881c7b7414e6484b0825116d4940 100644 (file)
@@ -68,3 +68,7 @@ alsa-pcm-avoid-potential-races-between-oss-ioctls-and-read-write.patch
 alsa-pcm-return-ebusy-for-oss-ioctls-changing-busy-streams.patch
 alsa-pcm-fix-mutex-unbalance-in-oss-emulation-ioctls.patch
 alsa-pcm-fix-endless-loop-for-xrun-recovery-in-oss-emulation.patch
+vfio-pci-virtualize-pcie-af-flr.patch
+vfio-pci-virtualize-maximum-payload-size.patch
+vfio-pci-virtualize-maximum-read-request-size.patch
+ext4-don-t-allow-r-w-mounts-if-metadata-blocks-overlap-the-superblock.patch
diff --git a/queue-4.4/vfio-pci-virtualize-maximum-payload-size.patch b/queue-4.4/vfio-pci-virtualize-maximum-payload-size.patch
new file mode 100644 (file)
index 0000000..f01c94b
--- /dev/null
@@ -0,0 +1,46 @@
+From 523184972b282cd9ca17a76f6ca4742394856818 Mon Sep 17 00:00:00 2001
+From: Alex Williamson <alex.williamson@redhat.com>
+Date: Mon, 2 Oct 2017 12:39:09 -0600
+Subject: vfio/pci: Virtualize Maximum Payload Size
+
+From: Alex Williamson <alex.williamson@redhat.com>
+
+commit 523184972b282cd9ca17a76f6ca4742394856818 upstream.
+
+With virtual PCI-Express chipsets, we now see userspace/guest drivers
+trying to match the physical MPS setting to a virtual downstream port.
+Of course a lone physical device surrounded by virtual interconnects
+cannot make a correct decision for a proper MPS setting.  Instead,
+let's virtualize the MPS control register so that writes through to
+hardware are disallowed.  Userspace drivers like QEMU assume they can
+write anything to the device and we'll filter out anything dangerous.
+Since mismatched MPS can lead to AER and other faults, let's add it
+to the kernel side rather than relying on userspace virtualization to
+handle it.
+
+Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
+Reviewed-by: Eric Auger <eric.auger@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/vfio/pci/vfio_pci_config.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/drivers/vfio/pci/vfio_pci_config.c
++++ b/drivers/vfio/pci/vfio_pci_config.c
+@@ -799,11 +799,13 @@ static int __init init_pci_cap_exp_perm(
+       /*
+        * Allow writes to device control fields, except devctl_phantom,
+-       * which could confuse IOMMU, and the ARI bit in devctl2, which
++       * which could confuse IOMMU, MPS, which can break communication
++       * with other physical devices, and the ARI bit in devctl2, which
+        * is set at probe time.  FLR gets virtualized via our writefn.
+        */
+       p_setw(perm, PCI_EXP_DEVCTL,
+-             PCI_EXP_DEVCTL_BCR_FLR, ~PCI_EXP_DEVCTL_PHANTOM);
++             PCI_EXP_DEVCTL_BCR_FLR | PCI_EXP_DEVCTL_PAYLOAD,
++             ~PCI_EXP_DEVCTL_PHANTOM);
+       p_setw(perm, PCI_EXP_DEVCTL2, NO_VIRT, ~PCI_EXP_DEVCTL2_ARI);
+       return 0;
+ }
diff --git a/queue-4.4/vfio-pci-virtualize-maximum-read-request-size.patch b/queue-4.4/vfio-pci-virtualize-maximum-read-request-size.patch
new file mode 100644 (file)
index 0000000..d5f969d
--- /dev/null
@@ -0,0 +1,80 @@
+From cf0d53ba4947aad6e471491d5b20a567cbe92e56 Mon Sep 17 00:00:00 2001
+From: Alex Williamson <alex.williamson@redhat.com>
+Date: Mon, 2 Oct 2017 12:39:10 -0600
+Subject: vfio/pci: Virtualize Maximum Read Request Size
+
+From: Alex Williamson <alex.williamson@redhat.com>
+
+commit cf0d53ba4947aad6e471491d5b20a567cbe92e56 upstream.
+
+MRRS defines the maximum read request size a device is allowed to
+make.  Drivers will often increase this to allow more data transfer
+with a single request.  Completions to this request are bound by the
+MPS setting for the bus.  Aside from device quirks (none known), it
+doesn't seem to make sense to set an MRRS value less than MPS, yet
+this is a likely scenario given that user drivers do not have a
+system-wide view of the PCI topology.  Virtualize MRRS such that the
+user can set MRRS >= MPS, but use MPS as the floor value that we'll
+write to hardware.
+
+Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/vfio/pci/vfio_pci_config.c |   29 ++++++++++++++++++++++++++---
+ 1 file changed, 26 insertions(+), 3 deletions(-)
+
+--- a/drivers/vfio/pci/vfio_pci_config.c
++++ b/drivers/vfio/pci/vfio_pci_config.c
+@@ -758,6 +758,7 @@ static int vfio_exp_config_write(struct
+ {
+       __le16 *ctrl = (__le16 *)(vdev->vconfig + pos -
+                                 offset + PCI_EXP_DEVCTL);
++      int readrq = le16_to_cpu(*ctrl) & PCI_EXP_DEVCTL_READRQ;
+       count = vfio_default_config_write(vdev, pos, count, perm, offset, val);
+       if (count < 0)
+@@ -783,6 +784,27 @@ static int vfio_exp_config_write(struct
+                       pci_try_reset_function(vdev->pdev);
+       }
++      /*
++       * MPS is virtualized to the user, writes do not change the physical
++       * register since determining a proper MPS value requires a system wide
++       * device view.  The MRRS is largely independent of MPS, but since the
++       * user does not have that system-wide view, they might set a safe, but
++       * inefficiently low value.  Here we allow writes through to hardware,
++       * but we set the floor to the physical device MPS setting, so that
++       * we can at least use full TLPs, as defined by the MPS value.
++       *
++       * NB, if any devices actually depend on an artificially low MRRS
++       * setting, this will need to be revisited, perhaps with a quirk
++       * though pcie_set_readrq().
++       */
++      if (readrq != (le16_to_cpu(*ctrl) & PCI_EXP_DEVCTL_READRQ)) {
++              readrq = 128 <<
++                      ((le16_to_cpu(*ctrl) & PCI_EXP_DEVCTL_READRQ) >> 12);
++              readrq = max(readrq, pcie_get_mps(vdev->pdev));
++
++              pcie_set_readrq(vdev->pdev, readrq);
++      }
++
+       return count;
+ }
+@@ -801,11 +823,12 @@ static int __init init_pci_cap_exp_perm(
+        * Allow writes to device control fields, except devctl_phantom,
+        * which could confuse IOMMU, MPS, which can break communication
+        * with other physical devices, and the ARI bit in devctl2, which
+-       * is set at probe time.  FLR gets virtualized via our writefn.
++       * is set at probe time.  FLR and MRRS get virtualized via our
++       * writefn.
+        */
+       p_setw(perm, PCI_EXP_DEVCTL,
+-             PCI_EXP_DEVCTL_BCR_FLR | PCI_EXP_DEVCTL_PAYLOAD,
+-             ~PCI_EXP_DEVCTL_PHANTOM);
++             PCI_EXP_DEVCTL_BCR_FLR | PCI_EXP_DEVCTL_PAYLOAD |
++             PCI_EXP_DEVCTL_READRQ, ~PCI_EXP_DEVCTL_PHANTOM);
+       p_setw(perm, PCI_EXP_DEVCTL2, NO_VIRT, ~PCI_EXP_DEVCTL2_ARI);
+       return 0;
+ }
diff --git a/queue-4.4/vfio-pci-virtualize-pcie-af-flr.patch b/queue-4.4/vfio-pci-virtualize-pcie-af-flr.patch
new file mode 100644 (file)
index 0000000..f5007f4
--- /dev/null
@@ -0,0 +1,146 @@
+From ddf9dc0eb5314d6dac8b19b1cc37c739c6896e7e Mon Sep 17 00:00:00 2001
+From: Alex Williamson <alex.williamson@redhat.com>
+Date: Mon, 26 Sep 2016 13:52:16 -0600
+Subject: vfio-pci: Virtualize PCIe & AF FLR
+
+From: Alex Williamson <alex.williamson@redhat.com>
+
+commit ddf9dc0eb5314d6dac8b19b1cc37c739c6896e7e upstream.
+
+We use a BAR restore trick to try to detect when a user has performed
+a device reset, possibly through FLR or other backdoors, to put things
+back into a working state.  This is important for backdoor resets, but
+we can actually just virtualize the "front door" resets provided via
+PCIe and AF FLR.  Set these bits as virtualized + writable, allowing
+the default write to set them in vconfig, then we can simply check the
+bit, perform an FLR of our own, and clear the bit.  We don't actually
+have the granularity in PCI to specify the type of reset we want to
+do, but generally devices don't implement both PCIe and AF FLR and
+we'll favor these over other types of reset, so we should generally
+lineup.  We do test whether the device provides the requested FLR type
+to stay consistent with hardware capabilities though.
+
+This seems to fix several instance of devices getting into bad states
+with userspace drivers, like dpdk, running inside a VM.
+
+Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
+Reviewed-by: Greg Rose <grose@lightfleet.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/vfio/pci/vfio_pci_config.c |   82 ++++++++++++++++++++++++++++++++++---
+ 1 file changed, 77 insertions(+), 5 deletions(-)
+
+--- a/drivers/vfio/pci/vfio_pci_config.c
++++ b/drivers/vfio/pci/vfio_pci_config.c
+@@ -752,6 +752,40 @@ static int __init init_pci_cap_pcix_perm
+       return 0;
+ }
++static int vfio_exp_config_write(struct vfio_pci_device *vdev, int pos,
++                               int count, struct perm_bits *perm,
++                               int offset, __le32 val)
++{
++      __le16 *ctrl = (__le16 *)(vdev->vconfig + pos -
++                                offset + PCI_EXP_DEVCTL);
++
++      count = vfio_default_config_write(vdev, pos, count, perm, offset, val);
++      if (count < 0)
++              return count;
++
++      /*
++       * The FLR bit is virtualized, if set and the device supports PCIe
++       * FLR, issue a reset_function.  Regardless, clear the bit, the spec
++       * requires it to be always read as zero.  NB, reset_function might
++       * not use a PCIe FLR, we don't have that level of granularity.
++       */
++      if (*ctrl & cpu_to_le16(PCI_EXP_DEVCTL_BCR_FLR)) {
++              u32 cap;
++              int ret;
++
++              *ctrl &= ~cpu_to_le16(PCI_EXP_DEVCTL_BCR_FLR);
++
++              ret = pci_user_read_config_dword(vdev->pdev,
++                                               pos - offset + PCI_EXP_DEVCAP,
++                                               &cap);
++
++              if (!ret && (cap & PCI_EXP_DEVCAP_FLR))
++                      pci_try_reset_function(vdev->pdev);
++      }
++
++      return count;
++}
++
+ /* Permissions for PCI Express capability */
+ static int __init init_pci_cap_exp_perm(struct perm_bits *perm)
+ {
+@@ -759,26 +793,64 @@ static int __init init_pci_cap_exp_perm(
+       if (alloc_perm_bits(perm, PCI_CAP_EXP_ENDPOINT_SIZEOF_V2))
+               return -ENOMEM;
++      perm->writefn = vfio_exp_config_write;
++
+       p_setb(perm, PCI_CAP_LIST_NEXT, (u8)ALL_VIRT, NO_WRITE);
+       /*
+-       * Allow writes to device control fields (includes FLR!)
+-       * but not to devctl_phantom which could confuse IOMMU
+-       * or to the ARI bit in devctl2 which is set at probe time
++       * Allow writes to device control fields, except devctl_phantom,
++       * which could confuse IOMMU, and the ARI bit in devctl2, which
++       * is set at probe time.  FLR gets virtualized via our writefn.
+        */
+-      p_setw(perm, PCI_EXP_DEVCTL, NO_VIRT, ~PCI_EXP_DEVCTL_PHANTOM);
++      p_setw(perm, PCI_EXP_DEVCTL,
++             PCI_EXP_DEVCTL_BCR_FLR, ~PCI_EXP_DEVCTL_PHANTOM);
+       p_setw(perm, PCI_EXP_DEVCTL2, NO_VIRT, ~PCI_EXP_DEVCTL2_ARI);
+       return 0;
+ }
++static int vfio_af_config_write(struct vfio_pci_device *vdev, int pos,
++                              int count, struct perm_bits *perm,
++                              int offset, __le32 val)
++{
++      u8 *ctrl = vdev->vconfig + pos - offset + PCI_AF_CTRL;
++
++      count = vfio_default_config_write(vdev, pos, count, perm, offset, val);
++      if (count < 0)
++              return count;
++
++      /*
++       * The FLR bit is virtualized, if set and the device supports AF
++       * FLR, issue a reset_function.  Regardless, clear the bit, the spec
++       * requires it to be always read as zero.  NB, reset_function might
++       * not use an AF FLR, we don't have that level of granularity.
++       */
++      if (*ctrl & PCI_AF_CTRL_FLR) {
++              u8 cap;
++              int ret;
++
++              *ctrl &= ~PCI_AF_CTRL_FLR;
++
++              ret = pci_user_read_config_byte(vdev->pdev,
++                                              pos - offset + PCI_AF_CAP,
++                                              &cap);
++
++              if (!ret && (cap & PCI_AF_CAP_FLR) && (cap & PCI_AF_CAP_TP))
++                      pci_try_reset_function(vdev->pdev);
++      }
++
++      return count;
++}
++
+ /* Permissions for Advanced Function capability */
+ static int __init init_pci_cap_af_perm(struct perm_bits *perm)
+ {
+       if (alloc_perm_bits(perm, pci_cap_length[PCI_CAP_ID_AF]))
+               return -ENOMEM;
++      perm->writefn = vfio_af_config_write;
++
+       p_setb(perm, PCI_CAP_LIST_NEXT, (u8)ALL_VIRT, NO_WRITE);
+-      p_setb(perm, PCI_AF_CTRL, NO_VIRT, PCI_AF_CTRL_FLR);
++      p_setb(perm, PCI_AF_CTRL, PCI_AF_CTRL_FLR, PCI_AF_CTRL_FLR);
+       return 0;
+ }