]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.9-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 12 Mar 2017 16:31:07 +0000 (17:31 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 12 Mar 2017 16:31:07 +0000 (17:31 +0100)
added patches:
bluetooth-add-another-ar3012-04ca-3018-device.patch
brcmfmac-fix-incorrect-event-channel-deduction.patch
ceph-remove-req-from-unsafe-list-when-unregistering-it.patch
cxl-fix-nested-locking-hang-during-eeh-hotplug.patch
cxl-prevent-read-write-to-afu-config-space-while-afu-not-configured.patch
drivers-pci-hotplug-fix-initial-state-for-empty-slot.patch
drivers-pci-hotplug-handle-presence-detection-change-properly.patch
fs-better-permission-checking-for-submounts.patch
ib-ipoib-add-destination-address-when-re-queue-packet.patch
ib-ipoib-fix-deadlock-between-rmmod-and-set_mode.patch
ib-mlx5-fix-out-of-bound-access.patch
ib-srp-avoid-that-duplicate-responses-trigger-a-kernel-bug.patch
ib-srp-avoid-using-ib_mr_type_sg_gaps.patch
ib-srp-fix-race-conditions-related-to-task-management.patch
ktest-fix-child-exit-code-processing.patch
kvm-s390-disable-dirty-log-retrieval-for-ucontrol-guests.patch
kvm-vmx-use-correct-vmcs_read-write-for-guest-segment-selector-base.patch
mac80211-don-t-handle-filtered-frames-within-a-ba-session.patch
mac80211-don-t-reorder-frames-with-sn-smaller-than-ssn.patch
mac80211-flush-delayed-work-when-entering-suspend.patch
mac80211-use-driver-indicated-transmitter-sta-only-for-data-frames.patch
memory-atmel-ebi-fix-ns-cycles-conversions.patch
mnt-tuck-mounts-under-others-instead-of-creating-shadow-side-mounts.patch
net-mvpp2-fix-dma-address-calculation-in-mvpp2_txq_inc_put.patch
nfit-libnvdimm-fix-interleave-set-cookie-calculation.patch
nlm-ensure-callback-code-also-checks-that-the-files-match.patch
orangefs-use-rcu-for-destroy_inode.patch
pci-hotplug-pnv-php-disable-surprise-hotplug-capability-on-conflicts.patch
pci-hotplug-pnv-php-remove-warn_on-in-pnv_php_put_slot.patch
pwm-pca9685-fix-period-change-with-same-duty-cycle.patch
s390-chsc-add-exception-handler-for-chsc-instruction.patch
s390-dcssblk-fix-device-size-calculation-in-dcssblk_direct_access.patch
s390-kdump-use-linux-elf-note-name-instead-of-core.patch
s390-make-setup_randomness-work.patch
s390-qdio-clear-dsci-prior-to-scanning-multiple-input-queues.patch
s390-task_size-for-kernel-threads.patch
s390-use-correct-input-data-address-for-setup_randomness.patch
serial-8250_pci-add-mks-tenta-scom-0800-and-scom-0801-cards.patch
target-fix-null-dereference-during-lun-lookup-active-i-o-shutdown.patch
xtensa-move-parse_tag_fdt-out-of-ifdef-config_blk_dev_initrd.patch

41 files changed:
queue-4.9/bluetooth-add-another-ar3012-04ca-3018-device.patch [new file with mode: 0644]
queue-4.9/brcmfmac-fix-incorrect-event-channel-deduction.patch [new file with mode: 0644]
queue-4.9/ceph-remove-req-from-unsafe-list-when-unregistering-it.patch [new file with mode: 0644]
queue-4.9/cxl-fix-nested-locking-hang-during-eeh-hotplug.patch [new file with mode: 0644]
queue-4.9/cxl-prevent-read-write-to-afu-config-space-while-afu-not-configured.patch [new file with mode: 0644]
queue-4.9/drivers-pci-hotplug-fix-initial-state-for-empty-slot.patch [new file with mode: 0644]
queue-4.9/drivers-pci-hotplug-handle-presence-detection-change-properly.patch [new file with mode: 0644]
queue-4.9/fs-better-permission-checking-for-submounts.patch [new file with mode: 0644]
queue-4.9/ib-ipoib-add-destination-address-when-re-queue-packet.patch [new file with mode: 0644]
queue-4.9/ib-ipoib-fix-deadlock-between-rmmod-and-set_mode.patch [new file with mode: 0644]
queue-4.9/ib-mlx5-fix-out-of-bound-access.patch [new file with mode: 0644]
queue-4.9/ib-srp-avoid-that-duplicate-responses-trigger-a-kernel-bug.patch [new file with mode: 0644]
queue-4.9/ib-srp-avoid-using-ib_mr_type_sg_gaps.patch [new file with mode: 0644]
queue-4.9/ib-srp-fix-race-conditions-related-to-task-management.patch [new file with mode: 0644]
queue-4.9/ktest-fix-child-exit-code-processing.patch [new file with mode: 0644]
queue-4.9/kvm-s390-disable-dirty-log-retrieval-for-ucontrol-guests.patch [new file with mode: 0644]
queue-4.9/kvm-vmx-use-correct-vmcs_read-write-for-guest-segment-selector-base.patch [new file with mode: 0644]
queue-4.9/mac80211-don-t-handle-filtered-frames-within-a-ba-session.patch [new file with mode: 0644]
queue-4.9/mac80211-don-t-reorder-frames-with-sn-smaller-than-ssn.patch [new file with mode: 0644]
queue-4.9/mac80211-flush-delayed-work-when-entering-suspend.patch [new file with mode: 0644]
queue-4.9/mac80211-use-driver-indicated-transmitter-sta-only-for-data-frames.patch [new file with mode: 0644]
queue-4.9/memory-atmel-ebi-fix-ns-cycles-conversions.patch [new file with mode: 0644]
queue-4.9/mnt-tuck-mounts-under-others-instead-of-creating-shadow-side-mounts.patch [new file with mode: 0644]
queue-4.9/net-mvpp2-fix-dma-address-calculation-in-mvpp2_txq_inc_put.patch [new file with mode: 0644]
queue-4.9/nfit-libnvdimm-fix-interleave-set-cookie-calculation.patch [new file with mode: 0644]
queue-4.9/nlm-ensure-callback-code-also-checks-that-the-files-match.patch [new file with mode: 0644]
queue-4.9/orangefs-use-rcu-for-destroy_inode.patch [new file with mode: 0644]
queue-4.9/pci-hotplug-pnv-php-disable-surprise-hotplug-capability-on-conflicts.patch [new file with mode: 0644]
queue-4.9/pci-hotplug-pnv-php-remove-warn_on-in-pnv_php_put_slot.patch [new file with mode: 0644]
queue-4.9/pwm-pca9685-fix-period-change-with-same-duty-cycle.patch [new file with mode: 0644]
queue-4.9/s390-chsc-add-exception-handler-for-chsc-instruction.patch [new file with mode: 0644]
queue-4.9/s390-dcssblk-fix-device-size-calculation-in-dcssblk_direct_access.patch [new file with mode: 0644]
queue-4.9/s390-kdump-use-linux-elf-note-name-instead-of-core.patch [new file with mode: 0644]
queue-4.9/s390-make-setup_randomness-work.patch [new file with mode: 0644]
queue-4.9/s390-qdio-clear-dsci-prior-to-scanning-multiple-input-queues.patch [new file with mode: 0644]
queue-4.9/s390-task_size-for-kernel-threads.patch [new file with mode: 0644]
queue-4.9/s390-use-correct-input-data-address-for-setup_randomness.patch [new file with mode: 0644]
queue-4.9/serial-8250_pci-add-mks-tenta-scom-0800-and-scom-0801-cards.patch [new file with mode: 0644]
queue-4.9/series
queue-4.9/target-fix-null-dereference-during-lun-lookup-active-i-o-shutdown.patch [new file with mode: 0644]
queue-4.9/xtensa-move-parse_tag_fdt-out-of-ifdef-config_blk_dev_initrd.patch [new file with mode: 0644]

diff --git a/queue-4.9/bluetooth-add-another-ar3012-04ca-3018-device.patch b/queue-4.9/bluetooth-add-another-ar3012-04ca-3018-device.patch
new file mode 100644 (file)
index 0000000..7a7467a
--- /dev/null
@@ -0,0 +1,53 @@
+From 441ad62d6c3f131f1dbd7dcdd9cbe3f74dbd8501 Mon Sep 17 00:00:00 2001
+From: Dmitry Tunin <hanipouspilot@gmail.com>
+Date: Thu, 5 Jan 2017 13:19:53 +0300
+Subject: Bluetooth: Add another AR3012 04ca:3018 device
+
+From: Dmitry Tunin <hanipouspilot@gmail.com>
+
+commit 441ad62d6c3f131f1dbd7dcdd9cbe3f74dbd8501 upstream.
+
+T:  Bus=01 Lev=01 Prnt=01 Port=07 Cnt=04 Dev#=  5 Spd=12  MxCh= 0
+D:  Ver= 1.10 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs=  1
+P:  Vendor=04ca ProdID=3018 Rev=00.01
+C:  #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=100mA
+I:  If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
+I:  If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
+
+Signed-off-by: Dmitry Tunin <hanipouspilot@gmail.com>
+Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/bluetooth/ath3k.c |    2 ++
+ drivers/bluetooth/btusb.c |    1 +
+ 2 files changed, 3 insertions(+)
+
+--- a/drivers/bluetooth/ath3k.c
++++ b/drivers/bluetooth/ath3k.c
+@@ -94,6 +94,7 @@ static const struct usb_device_id ath3k_
+       { USB_DEVICE(0x04CA, 0x300f) },
+       { USB_DEVICE(0x04CA, 0x3010) },
+       { USB_DEVICE(0x04CA, 0x3014) },
++      { USB_DEVICE(0x04CA, 0x3018) },
+       { USB_DEVICE(0x0930, 0x0219) },
+       { USB_DEVICE(0x0930, 0x021c) },
+       { USB_DEVICE(0x0930, 0x0220) },
+@@ -162,6 +163,7 @@ static const struct usb_device_id ath3k_
+       { USB_DEVICE(0x04ca, 0x300f), .driver_info = BTUSB_ATH3012 },
+       { USB_DEVICE(0x04ca, 0x3010), .driver_info = BTUSB_ATH3012 },
+       { USB_DEVICE(0x04ca, 0x3014), .driver_info = BTUSB_ATH3012 },
++      { USB_DEVICE(0x04ca, 0x3018), .driver_info = BTUSB_ATH3012 },
+       { USB_DEVICE(0x0930, 0x0219), .driver_info = BTUSB_ATH3012 },
+       { USB_DEVICE(0x0930, 0x021c), .driver_info = BTUSB_ATH3012 },
+       { USB_DEVICE(0x0930, 0x0220), .driver_info = BTUSB_ATH3012 },
+--- a/drivers/bluetooth/btusb.c
++++ b/drivers/bluetooth/btusb.c
+@@ -209,6 +209,7 @@ static const struct usb_device_id blackl
+       { USB_DEVICE(0x04ca, 0x300f), .driver_info = BTUSB_ATH3012 },
+       { USB_DEVICE(0x04ca, 0x3010), .driver_info = BTUSB_ATH3012 },
+       { USB_DEVICE(0x04ca, 0x3014), .driver_info = BTUSB_ATH3012 },
++      { USB_DEVICE(0x04ca, 0x3018), .driver_info = BTUSB_ATH3012 },
+       { USB_DEVICE(0x0930, 0x0219), .driver_info = BTUSB_ATH3012 },
+       { USB_DEVICE(0x0930, 0x021c), .driver_info = BTUSB_ATH3012 },
+       { USB_DEVICE(0x0930, 0x0220), .driver_info = BTUSB_ATH3012 },
diff --git a/queue-4.9/brcmfmac-fix-incorrect-event-channel-deduction.patch b/queue-4.9/brcmfmac-fix-incorrect-event-channel-deduction.patch
new file mode 100644 (file)
index 0000000..abbe9b0
--- /dev/null
@@ -0,0 +1,39 @@
+From 8e290cecdd0178f3d4cf7d463c51dc7e462843b4 Mon Sep 17 00:00:00 2001
+From: Gavin Li <git@thegavinli.com>
+Date: Tue, 17 Jan 2017 15:24:05 -0800
+Subject: brcmfmac: fix incorrect event channel deduction
+
+From: Gavin Li <git@thegavinli.com>
+
+commit 8e290cecdd0178f3d4cf7d463c51dc7e462843b4 upstream.
+
+brcmf_sdio_fromevntchan() was being called on the the data frame
+rather than the software header, causing some frames to be
+mischaracterized as on the event channel rather than the data channel.
+
+This fixes a major performance regression (due to dropped packets). With
+this patch the download speed jumped from 1Mbit/s back up to 40MBit/s due
+to the sheer amount of packets being incorrectly processed.
+
+Fixes: c56caa9db8ab ("brcmfmac: screening firmware event packet")
+Signed-off-by: Gavin Li <git@thegavinli.com>
+Acked-by: Arend van Spriel <arend.vanspriel@broadcom.com>
+[kvalo@codeaurora.org: improve commit logs based on email discussion]
+Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
++++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
+@@ -1660,7 +1660,7 @@ static u8 brcmf_sdio_rxglom(struct brcmf
+                                          pfirst->len, pfirst->next,
+                                          pfirst->prev);
+                       skb_unlink(pfirst, &bus->glom);
+-                      if (brcmf_sdio_fromevntchan(pfirst->data))
++                      if (brcmf_sdio_fromevntchan(&dptr[SDPCM_HWHDR_LEN]))
+                               brcmf_rx_event(bus->sdiodev->dev, pfirst);
+                       else
+                               brcmf_rx_frame(bus->sdiodev->dev, pfirst,
diff --git a/queue-4.9/ceph-remove-req-from-unsafe-list-when-unregistering-it.patch b/queue-4.9/ceph-remove-req-from-unsafe-list-when-unregistering-it.patch
new file mode 100644 (file)
index 0000000..4deda38
--- /dev/null
@@ -0,0 +1,50 @@
+From df963ea8a082d31521a120e8e31a29ad8a1dc215 Mon Sep 17 00:00:00 2001
+From: Jeff Layton <jlayton@redhat.com>
+Date: Tue, 14 Feb 2017 10:09:40 -0500
+Subject: ceph: remove req from unsafe list when unregistering it
+
+From: Jeff Layton <jlayton@redhat.com>
+
+commit df963ea8a082d31521a120e8e31a29ad8a1dc215 upstream.
+
+There's no reason a request should ever be on a s_unsafe list but not
+in the request tree.
+
+Link: http://tracker.ceph.com/issues/18474
+Signed-off-by: Jeff Layton <jlayton@redhat.com>
+Reviewed-by: Yan, Zheng <zyan@redhat.com>
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ceph/mds_client.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/fs/ceph/mds_client.c
++++ b/fs/ceph/mds_client.c
+@@ -628,6 +628,9 @@ static void __unregister_request(struct
+ {
+       dout("__unregister_request %p tid %lld\n", req, req->r_tid);
++      /* Never leave an unregistered request on an unsafe list! */
++      list_del_init(&req->r_unsafe_item);
++
+       if (req->r_tid == mdsc->oldest_tid) {
+               struct rb_node *p = rb_next(&req->r_node);
+               mdsc->oldest_tid = 0;
+@@ -1036,7 +1039,6 @@ static void cleanup_session_requests(str
+       while (!list_empty(&session->s_unsafe)) {
+               req = list_first_entry(&session->s_unsafe,
+                                      struct ceph_mds_request, r_unsafe_item);
+-              list_del_init(&req->r_unsafe_item);
+               pr_warn_ratelimited(" dropping unsafe request %llu\n",
+                                   req->r_tid);
+               __unregister_request(mdsc, req);
+@@ -2423,7 +2425,6 @@ static void handle_reply(struct ceph_mds
+                        * useful we could do with a revised return value.
+                        */
+                       dout("got safe reply %llu, mds%d\n", tid, mds);
+-                      list_del_init(&req->r_unsafe_item);
+                       /* last unsafe request during umount? */
+                       if (mdsc->stopping && !__get_oldest_req(mdsc))
diff --git a/queue-4.9/cxl-fix-nested-locking-hang-during-eeh-hotplug.patch b/queue-4.9/cxl-fix-nested-locking-hang-during-eeh-hotplug.patch
new file mode 100644 (file)
index 0000000..9e30df9
--- /dev/null
@@ -0,0 +1,148 @@
+From 171ed0fcd8966d82c45376f1434678e7b9d4d9b1 Mon Sep 17 00:00:00 2001
+From: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
+Date: Mon, 6 Feb 2017 12:07:17 +1100
+Subject: cxl: fix nested locking hang during EEH hotplug
+
+From: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
+
+commit 171ed0fcd8966d82c45376f1434678e7b9d4d9b1 upstream.
+
+Commit 14a3ae34bfd0 ("cxl: Prevent read/write to AFU config space while AFU
+not configured") introduced a rwsem to fix an invalid memory access that
+occurred when someone attempts to access the config space of an AFU on a
+vPHB whilst the AFU is deconfigured, such as during EEH recovery.
+
+It turns out that it's possible to run into a nested locking issue when EEH
+recovery fails and a full device hotplug is required.
+cxl_pci_error_detected() deconfigures the AFU, taking a writer lock on
+configured_rwsem. When EEH recovery fails, the EEH code calls
+pci_hp_remove_devices() to remove the device, which in turn calls
+cxl_remove() -> cxl_pci_remove_afu() -> pci_deconfigure_afu(), which tries
+to grab the writer lock that's already held.
+
+Standard rwsem semantics don't express what we really want to do here and
+don't allow for nested locking. Fix this by replacing the rwsem with an
+atomic_t which we can control more finely. Allow the AFU to be locked
+multiple times so long as there are no readers.
+
+Fixes: 14a3ae34bfd0 ("cxl: Prevent read/write to AFU config space while AFU not configured")
+Signed-off-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
+Acked-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/misc/cxl/cxl.h  |    5 +++--
+ drivers/misc/cxl/main.c |    3 +--
+ drivers/misc/cxl/pci.c  |   11 +++++++++--
+ drivers/misc/cxl/vphb.c |   18 ++++++++++++++----
+ 4 files changed, 27 insertions(+), 10 deletions(-)
+
+--- a/drivers/misc/cxl/cxl.h
++++ b/drivers/misc/cxl/cxl.h
+@@ -418,8 +418,9 @@ struct cxl_afu {
+       struct dentry *debugfs;
+       struct mutex contexts_lock;
+       spinlock_t afu_cntl_lock;
+-      /* Used to block access to AFU config space while deconfigured */
+-      struct rw_semaphore configured_rwsem;
++
++      /* -1: AFU deconfigured/locked, >= 0: number of readers */
++      atomic_t configured_state;
+       /* AFU error buffer fields and bin attribute for sysfs */
+       u64 eb_len, eb_offset;
+--- a/drivers/misc/cxl/main.c
++++ b/drivers/misc/cxl/main.c
+@@ -268,8 +268,7 @@ struct cxl_afu *cxl_alloc_afu(struct cxl
+       idr_init(&afu->contexts_idr);
+       mutex_init(&afu->contexts_lock);
+       spin_lock_init(&afu->afu_cntl_lock);
+-      init_rwsem(&afu->configured_rwsem);
+-      down_write(&afu->configured_rwsem);
++      atomic_set(&afu->configured_state, -1);
+       afu->prefault_mode = CXL_PREFAULT_NONE;
+       afu->irqs_max = afu->adapter->user_irqs;
+--- a/drivers/misc/cxl/pci.c
++++ b/drivers/misc/cxl/pci.c
+@@ -1129,7 +1129,7 @@ static int pci_configure_afu(struct cxl_
+       if ((rc = cxl_native_register_psl_irq(afu)))
+               goto err2;
+-      up_write(&afu->configured_rwsem);
++      atomic_set(&afu->configured_state, 0);
+       return 0;
+ err2:
+@@ -1142,7 +1142,14 @@ err1:
+ static void pci_deconfigure_afu(struct cxl_afu *afu)
+ {
+-      down_write(&afu->configured_rwsem);
++      /*
++       * It's okay to deconfigure when AFU is already locked, otherwise wait
++       * until there are no readers
++       */
++      if (atomic_read(&afu->configured_state) != -1) {
++              while (atomic_cmpxchg(&afu->configured_state, 0, -1) != -1)
++                      schedule();
++      }
+       cxl_native_release_psl_irq(afu);
+       if (afu->adapter->native->sl_ops->release_serr_irq)
+               afu->adapter->native->sl_ops->release_serr_irq(afu);
+--- a/drivers/misc/cxl/vphb.c
++++ b/drivers/misc/cxl/vphb.c
+@@ -83,6 +83,16 @@ static inline struct cxl_afu *pci_bus_to
+       return phb ? phb->private_data : NULL;
+ }
++static void cxl_afu_configured_put(struct cxl_afu *afu)
++{
++      atomic_dec_if_positive(&afu->configured_state);
++}
++
++static bool cxl_afu_configured_get(struct cxl_afu *afu)
++{
++      return atomic_inc_unless_negative(&afu->configured_state);
++}
++
+ static inline int cxl_pcie_config_info(struct pci_bus *bus, unsigned int devfn,
+                                      struct cxl_afu *afu, int *_record)
+ {
+@@ -107,7 +117,7 @@ static int cxl_pcie_read_config(struct p
+       afu = pci_bus_to_afu(bus);
+       /* Grab a reader lock on afu. */
+-      if (afu == NULL || !down_read_trylock(&afu->configured_rwsem))
++      if (afu == NULL || !cxl_afu_configured_get(afu))
+               return PCIBIOS_DEVICE_NOT_FOUND;
+       rc = cxl_pcie_config_info(bus, devfn, afu, &record);
+@@ -132,7 +142,7 @@ static int cxl_pcie_read_config(struct p
+       }
+ out:
+-      up_read(&afu->configured_rwsem);
++      cxl_afu_configured_put(afu);
+       return rc ? PCIBIOS_DEVICE_NOT_FOUND : PCIBIOS_SUCCESSFUL;
+ }
+@@ -144,7 +154,7 @@ static int cxl_pcie_write_config(struct
+       afu = pci_bus_to_afu(bus);
+       /* Grab a reader lock on afu. */
+-      if (afu == NULL || !down_read_trylock(&afu->configured_rwsem))
++      if (afu == NULL || !cxl_afu_configured_get(afu))
+               return PCIBIOS_DEVICE_NOT_FOUND;
+       rc = cxl_pcie_config_info(bus, devfn, afu, &record);
+@@ -166,7 +176,7 @@ static int cxl_pcie_write_config(struct
+       }
+ out:
+-      up_read(&afu->configured_rwsem);
++      cxl_afu_configured_put(afu);
+       return rc ? PCIBIOS_SET_FAILED : PCIBIOS_SUCCESSFUL;
+ }
diff --git a/queue-4.9/cxl-prevent-read-write-to-afu-config-space-while-afu-not-configured.patch b/queue-4.9/cxl-prevent-read-write-to-afu-config-space-while-afu-not-configured.patch
new file mode 100644 (file)
index 0000000..cf6558c
--- /dev/null
@@ -0,0 +1,173 @@
+From 14a3ae34bfd0bcb1cc12d55b06a8584c11fac6fc Mon Sep 17 00:00:00 2001
+From: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
+Date: Fri, 9 Dec 2016 17:18:50 +1100
+Subject: cxl: Prevent read/write to AFU config space while AFU not configured
+
+From: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
+
+commit 14a3ae34bfd0bcb1cc12d55b06a8584c11fac6fc upstream.
+
+During EEH recovery, we deconfigure all AFUs whilst leaving the
+corresponding vPHB and virtual PCI device in place.
+
+If something attempts to interact with the AFU's PCI config space (e.g.
+running lspci) after the AFU has been deconfigured and before it's
+reconfigured, cxl_pcie_{read,write}_config() will read invalid values from
+the deconfigured struct cxl_afu and proceed to Oops when they try to
+dereference pointers that have been set to NULL during deconfiguration.
+
+Add a rwsem to struct cxl_afu so we can prevent interaction with config
+space while the AFU is deconfigured.
+
+Reported-by: Pradipta Ghosh <pradghos@in.ibm.com>
+Suggested-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com>
+Signed-off-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
+Signed-off-by: Vaibhav Jain <vaibhav@linux.vnet.ibm.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/misc/cxl/cxl.h  |    2 +
+ drivers/misc/cxl/main.c |    3 +-
+ drivers/misc/cxl/pci.c  |    2 +
+ drivers/misc/cxl/vphb.c |   51 +++++++++++++++++++++++++++---------------------
+ 4 files changed, 35 insertions(+), 23 deletions(-)
+
+--- a/drivers/misc/cxl/cxl.h
++++ b/drivers/misc/cxl/cxl.h
+@@ -418,6 +418,8 @@ struct cxl_afu {
+       struct dentry *debugfs;
+       struct mutex contexts_lock;
+       spinlock_t afu_cntl_lock;
++      /* Used to block access to AFU config space while deconfigured */
++      struct rw_semaphore configured_rwsem;
+       /* AFU error buffer fields and bin attribute for sysfs */
+       u64 eb_len, eb_offset;
+--- a/drivers/misc/cxl/main.c
++++ b/drivers/misc/cxl/main.c
+@@ -268,7 +268,8 @@ struct cxl_afu *cxl_alloc_afu(struct cxl
+       idr_init(&afu->contexts_idr);
+       mutex_init(&afu->contexts_lock);
+       spin_lock_init(&afu->afu_cntl_lock);
+-
++      init_rwsem(&afu->configured_rwsem);
++      down_write(&afu->configured_rwsem);
+       afu->prefault_mode = CXL_PREFAULT_NONE;
+       afu->irqs_max = afu->adapter->user_irqs;
+--- a/drivers/misc/cxl/pci.c
++++ b/drivers/misc/cxl/pci.c
+@@ -1129,6 +1129,7 @@ static int pci_configure_afu(struct cxl_
+       if ((rc = cxl_native_register_psl_irq(afu)))
+               goto err2;
++      up_write(&afu->configured_rwsem);
+       return 0;
+ err2:
+@@ -1141,6 +1142,7 @@ err1:
+ static void pci_deconfigure_afu(struct cxl_afu *afu)
+ {
++      down_write(&afu->configured_rwsem);
+       cxl_native_release_psl_irq(afu);
+       if (afu->adapter->native->sl_ops->release_serr_irq)
+               afu->adapter->native->sl_ops->release_serr_irq(afu);
+--- a/drivers/misc/cxl/vphb.c
++++ b/drivers/misc/cxl/vphb.c
+@@ -76,23 +76,22 @@ static int cxl_pcie_cfg_record(u8 bus, u
+       return (bus << 8) + devfn;
+ }
+-static int cxl_pcie_config_info(struct pci_bus *bus, unsigned int devfn,
+-                              struct cxl_afu **_afu, int *_record)
++static inline struct cxl_afu *pci_bus_to_afu(struct pci_bus *bus)
+ {
+-      struct pci_controller *phb;
+-      struct cxl_afu *afu;
+-      int record;
++      struct pci_controller *phb = bus ? pci_bus_to_host(bus) : NULL;
+-      phb = pci_bus_to_host(bus);
+-      if (phb == NULL)
+-              return PCIBIOS_DEVICE_NOT_FOUND;
++      return phb ? phb->private_data : NULL;
++}
++
++static inline int cxl_pcie_config_info(struct pci_bus *bus, unsigned int devfn,
++                                     struct cxl_afu *afu, int *_record)
++{
++      int record;
+-      afu = (struct cxl_afu *)phb->private_data;
+       record = cxl_pcie_cfg_record(bus->number, devfn);
+       if (record > afu->crs_num)
+               return PCIBIOS_DEVICE_NOT_FOUND;
+-      *_afu = afu;
+       *_record = record;
+       return 0;
+ }
+@@ -106,9 +105,14 @@ static int cxl_pcie_read_config(struct p
+       u16 val16;
+       u32 val32;
+-      rc = cxl_pcie_config_info(bus, devfn, &afu, &record);
++      afu = pci_bus_to_afu(bus);
++      /* Grab a reader lock on afu. */
++      if (afu == NULL || !down_read_trylock(&afu->configured_rwsem))
++              return PCIBIOS_DEVICE_NOT_FOUND;
++
++      rc = cxl_pcie_config_info(bus, devfn, afu, &record);
+       if (rc)
+-              return rc;
++              goto out;
+       switch (len) {
+       case 1:
+@@ -127,10 +131,9 @@ static int cxl_pcie_read_config(struct p
+               WARN_ON(1);
+       }
+-      if (rc)
+-              return PCIBIOS_DEVICE_NOT_FOUND;
+-
+-      return PCIBIOS_SUCCESSFUL;
++out:
++      up_read(&afu->configured_rwsem);
++      return rc ? PCIBIOS_DEVICE_NOT_FOUND : PCIBIOS_SUCCESSFUL;
+ }
+ static int cxl_pcie_write_config(struct pci_bus *bus, unsigned int devfn,
+@@ -139,9 +142,14 @@ static int cxl_pcie_write_config(struct
+       int rc, record;
+       struct cxl_afu *afu;
+-      rc = cxl_pcie_config_info(bus, devfn, &afu, &record);
++      afu = pci_bus_to_afu(bus);
++      /* Grab a reader lock on afu. */
++      if (afu == NULL || !down_read_trylock(&afu->configured_rwsem))
++              return PCIBIOS_DEVICE_NOT_FOUND;
++
++      rc = cxl_pcie_config_info(bus, devfn, afu, &record);
+       if (rc)
+-              return rc;
++              goto out;
+       switch (len) {
+       case 1:
+@@ -157,10 +165,9 @@ static int cxl_pcie_write_config(struct
+               WARN_ON(1);
+       }
+-      if (rc)
+-              return PCIBIOS_SET_FAILED;
+-
+-      return PCIBIOS_SUCCESSFUL;
++out:
++      up_read(&afu->configured_rwsem);
++      return rc ? PCIBIOS_SET_FAILED : PCIBIOS_SUCCESSFUL;
+ }
+ static struct pci_ops cxl_pcie_pci_ops =
diff --git a/queue-4.9/drivers-pci-hotplug-fix-initial-state-for-empty-slot.patch b/queue-4.9/drivers-pci-hotplug-fix-initial-state-for-empty-slot.patch
new file mode 100644 (file)
index 0000000..aedb80f
--- /dev/null
@@ -0,0 +1,58 @@
+From d0c424971f70501ec0a0364117b9934db039c9cc Mon Sep 17 00:00:00 2001
+From: Gavin Shan <gwshan@linux.vnet.ibm.com>
+Date: Wed, 11 Jan 2017 11:50:07 +1100
+Subject: drivers/pci/hotplug: Fix initial state for empty slot
+
+From: Gavin Shan <gwshan@linux.vnet.ibm.com>
+
+commit d0c424971f70501ec0a0364117b9934db039c9cc upstream.
+
+In PowerNV PCI hotplug driver, the initial PCI slot's state is set
+to PNV_PHP_STATE_POPULATED if no PCI devices are connected to the
+slot. The PCI devices that are hot added to the slot won't be probed
+and populated because of the check in pnv_php_enable():
+
+        /* Check if the slot has been configured */
+        if (php_slot->state != PNV_PHP_STATE_REGISTERED)
+                return 0;
+
+This fixes the issue by leaving the slot in PNV_PHP_STATE_REGISTERED
+state initially if nothing is connected to the slot.
+
+Fixes: 360aebd85a4 ("drivers/pci/hotplug: Support surprise hotplug in powernv driver")
+Reported-by: Hank Chang <hankmax0000@gmail.com>
+Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
+Tested-by: Willie Liauw <williel@supermicro.com.tw>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/pci/hotplug/pnv_php.c |   16 ++++++++++++++--
+ 1 file changed, 14 insertions(+), 2 deletions(-)
+
+--- a/drivers/pci/hotplug/pnv_php.c
++++ b/drivers/pci/hotplug/pnv_php.c
+@@ -436,9 +436,21 @@ static int pnv_php_enable(struct pnv_php
+       if (ret)
+               return ret;
+-      /* Proceed if there have nothing behind the slot */
+-      if (presence == OPAL_PCI_SLOT_EMPTY)
++      /*
++       * Proceed if there have nothing behind the slot. However,
++       * we should leave the slot in registered state at the
++       * beginning. Otherwise, the PCI devices inserted afterwards
++       * won't be probed and populated.
++       */
++      if (presence == OPAL_PCI_SLOT_EMPTY) {
++              if (!php_slot->power_state_check) {
++                      php_slot->power_state_check = true;
++
++                      return 0;
++              }
++
+               goto scan;
++      }
+       /*
+        * If the power supply to the slot is off, we can't detect
diff --git a/queue-4.9/drivers-pci-hotplug-handle-presence-detection-change-properly.patch b/queue-4.9/drivers-pci-hotplug-handle-presence-detection-change-properly.patch
new file mode 100644 (file)
index 0000000..da6058f
--- /dev/null
@@ -0,0 +1,44 @@
+From d7d55536c6cd1f80295b6d7483ad0587b148bde4 Mon Sep 17 00:00:00 2001
+From: Gavin Shan <gwshan@linux.vnet.ibm.com>
+Date: Wed, 11 Jan 2017 11:50:06 +1100
+Subject: drivers/pci/hotplug: Handle presence detection change properly
+
+From: Gavin Shan <gwshan@linux.vnet.ibm.com>
+
+commit d7d55536c6cd1f80295b6d7483ad0587b148bde4 upstream.
+
+The surprise hotplug is driven by interrupt in PowerNV PCI hotplug
+driver. In the interrupt handler, pnv_php_interrupt(), we bail when
+pnv_pci_get_presence_state() returns zero wrongly. It causes the
+presence change event is always ignored incorrectly.
+
+This fixes the issue by bailing on error (non-zero value) returned
+from pnv_pci_get_presence_state().
+
+Fixes: 360aebd85a4 ("drivers/pci/hotplug: Support surprise hotplug in powernv driver")
+Reported-by: Hank Chang <hankmax0000@gmail.com>
+Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
+Tested-by: Willie Liauw <williel@supermicro.com.tw>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/pci/hotplug/pnv_php.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/drivers/pci/hotplug/pnv_php.c
++++ b/drivers/pci/hotplug/pnv_php.c
+@@ -713,8 +713,12 @@ static irqreturn_t pnv_php_interrupt(int
+               added = !!(lsts & PCI_EXP_LNKSTA_DLLLA);
+       } else if (sts & PCI_EXP_SLTSTA_PDC) {
+               ret = pnv_pci_get_presence_state(php_slot->id, &presence);
+-              if (!ret)
++              if (ret) {
++                      dev_warn(&pdev->dev, "PCI slot [%s] error %d getting presence (0x%04x), to retry the operation.\n",
++                               php_slot->name, ret, sts);
+                       return IRQ_HANDLED;
++              }
++
+               added = !!(presence == OPAL_PCI_SLOT_PRESENT);
+       } else {
+               return IRQ_NONE;
diff --git a/queue-4.9/fs-better-permission-checking-for-submounts.patch b/queue-4.9/fs-better-permission-checking-for-submounts.patch
new file mode 100644 (file)
index 0000000..c8201d1
--- /dev/null
@@ -0,0 +1,320 @@
+From 93faccbbfa958a9668d3ab4e30f38dd205cee8d8 Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Wed, 1 Feb 2017 06:06:16 +1300
+Subject: fs: Better permission checking for submounts
+
+From: Eric W. Biederman <ebiederm@xmission.com>
+
+commit 93faccbbfa958a9668d3ab4e30f38dd205cee8d8 upstream.
+
+To support unprivileged users mounting filesystems two permission
+checks have to be performed: a test to see if the user allowed to
+create a mount in the mount namespace, and a test to see if
+the user is allowed to access the specified filesystem.
+
+The automount case is special in that mounting the original filesystem
+grants permission to mount the sub-filesystems, to any user who
+happens to stumble across the their mountpoint and satisfies the
+ordinary filesystem permission checks.
+
+Attempting to handle the automount case by using override_creds
+almost works.  It preserves the idea that permission to mount
+the original filesystem is permission to mount the sub-filesystem.
+Unfortunately using override_creds messes up the filesystems
+ordinary permission checks.
+
+Solve this by being explicit that a mount is a submount by introducing
+vfs_submount, and using it where appropriate.
+
+vfs_submount uses a new mount internal mount flags MS_SUBMOUNT, to let
+sget and friends know that a mount is a submount so they can take appropriate
+action.
+
+sget and sget_userns are modified to not perform any permission checks
+on submounts.
+
+follow_automount is modified to stop using override_creds as that
+has proven problemantic.
+
+do_mount is modified to always remove the new MS_SUBMOUNT flag so
+that we know userspace will never by able to specify it.
+
+autofs4 is modified to stop using current_real_cred that was put in
+there to handle the previous version of submount permission checking.
+
+cifs is modified to pass the mountpoint all of the way down to vfs_submount.
+
+debugfs is modified to pass the mountpoint all of the way down to
+trace_automount by adding a new parameter.  To make this change easier
+a new typedef debugfs_automount_t is introduced to capture the type of
+the debugfs automount function.
+
+Fixes: 069d5ac9ae0d ("autofs:  Fix automounts by using current_real_cred()->uid")
+Fixes: aeaa4a79ff6a ("fs: Call d_automount with the filesystems creds")
+Reviewed-by: Trond Myklebust <trond.myklebust@primarydata.com>
+Reviewed-by: Seth Forshee <seth.forshee@canonical.com>
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/afs/mntpt.c          |    2 +-
+ fs/autofs4/waitq.c      |    4 ++--
+ fs/cifs/cifs_dfs_ref.c  |    7 ++++---
+ fs/debugfs/inode.c      |    8 ++++----
+ fs/namei.c              |    3 ---
+ fs/namespace.c          |   17 ++++++++++++++++-
+ fs/nfs/namespace.c      |    2 +-
+ fs/nfs/nfs4namespace.c  |    2 +-
+ fs/super.c              |   13 ++++++++++---
+ include/linux/debugfs.h |    3 ++-
+ include/linux/mount.h   |    3 +++
+ include/uapi/linux/fs.h |    1 +
+ kernel/trace/trace.c    |    4 ++--
+ 13 files changed, 47 insertions(+), 22 deletions(-)
+
+--- a/fs/afs/mntpt.c
++++ b/fs/afs/mntpt.c
+@@ -202,7 +202,7 @@ static struct vfsmount *afs_mntpt_do_aut
+       /* try and do the mount */
+       _debug("--- attempting mount %s -o %s ---", devname, options);
+-      mnt = vfs_kern_mount(&afs_fs_type, 0, devname, options);
++      mnt = vfs_submount(mntpt, &afs_fs_type, devname, options);
+       _debug("--- mount result %p ---", mnt);
+       free_page((unsigned long) devname);
+--- a/fs/autofs4/waitq.c
++++ b/fs/autofs4/waitq.c
+@@ -431,8 +431,8 @@ int autofs4_wait(struct autofs_sb_info *
+               memcpy(&wq->name, &qstr, sizeof(struct qstr));
+               wq->dev = autofs4_get_dev(sbi);
+               wq->ino = autofs4_get_ino(sbi);
+-              wq->uid = current_real_cred()->uid;
+-              wq->gid = current_real_cred()->gid;
++              wq->uid = current_cred()->uid;
++              wq->gid = current_cred()->gid;
+               wq->pid = pid;
+               wq->tgid = tgid;
+               wq->status = -EINTR; /* Status return if interrupted */
+--- a/fs/cifs/cifs_dfs_ref.c
++++ b/fs/cifs/cifs_dfs_ref.c
+@@ -245,7 +245,8 @@ compose_mount_options_err:
+  * @fullpath:         full path in UNC format
+  * @ref:              server's referral
+  */
+-static struct vfsmount *cifs_dfs_do_refmount(struct cifs_sb_info *cifs_sb,
++static struct vfsmount *cifs_dfs_do_refmount(struct dentry *mntpt,
++              struct cifs_sb_info *cifs_sb,
+               const char *fullpath, const struct dfs_info3_param *ref)
+ {
+       struct vfsmount *mnt;
+@@ -259,7 +260,7 @@ static struct vfsmount *cifs_dfs_do_refm
+       if (IS_ERR(mountdata))
+               return (struct vfsmount *)mountdata;
+-      mnt = vfs_kern_mount(&cifs_fs_type, 0, devname, mountdata);
++      mnt = vfs_submount(mntpt, &cifs_fs_type, devname, mountdata);
+       kfree(mountdata);
+       kfree(devname);
+       return mnt;
+@@ -334,7 +335,7 @@ static struct vfsmount *cifs_dfs_do_auto
+                       mnt = ERR_PTR(-EINVAL);
+                       break;
+               }
+-              mnt = cifs_dfs_do_refmount(cifs_sb,
++              mnt = cifs_dfs_do_refmount(mntpt, cifs_sb,
+                               full_path, referrals + i);
+               cifs_dbg(FYI, "%s: cifs_dfs_do_refmount:%s , mnt:%p\n",
+                        __func__, referrals[i].node_name, mnt);
+--- a/fs/debugfs/inode.c
++++ b/fs/debugfs/inode.c
+@@ -187,9 +187,9 @@ static const struct super_operations deb
+ static struct vfsmount *debugfs_automount(struct path *path)
+ {
+-      struct vfsmount *(*f)(void *);
+-      f = (struct vfsmount *(*)(void *))path->dentry->d_fsdata;
+-      return f(d_inode(path->dentry)->i_private);
++      debugfs_automount_t f;
++      f = (debugfs_automount_t)path->dentry->d_fsdata;
++      return f(path->dentry, d_inode(path->dentry)->i_private);
+ }
+ static const struct dentry_operations debugfs_dops = {
+@@ -504,7 +504,7 @@ EXPORT_SYMBOL_GPL(debugfs_create_dir);
+  */
+ struct dentry *debugfs_create_automount(const char *name,
+                                       struct dentry *parent,
+-                                      struct vfsmount *(*f)(void *),
++                                      debugfs_automount_t f,
+                                       void *data)
+ {
+       struct dentry *dentry = start_creating(name, parent);
+--- a/fs/namei.c
++++ b/fs/namei.c
+@@ -1100,7 +1100,6 @@ static int follow_automount(struct path
+                           bool *need_mntput)
+ {
+       struct vfsmount *mnt;
+-      const struct cred *old_cred;
+       int err;
+       if (!path->dentry->d_op || !path->dentry->d_op->d_automount)
+@@ -1129,9 +1128,7 @@ static int follow_automount(struct path
+       if (nd->total_link_count >= 40)
+               return -ELOOP;
+-      old_cred = override_creds(&init_cred);
+       mnt = path->dentry->d_op->d_automount(path);
+-      revert_creds(old_cred);
+       if (IS_ERR(mnt)) {
+               /*
+                * The filesystem is allowed to return -EISDIR here to indicate
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -995,6 +995,21 @@ vfs_kern_mount(struct file_system_type *
+ }
+ EXPORT_SYMBOL_GPL(vfs_kern_mount);
++struct vfsmount *
++vfs_submount(const struct dentry *mountpoint, struct file_system_type *type,
++           const char *name, void *data)
++{
++      /* Until it is worked out how to pass the user namespace
++       * through from the parent mount to the submount don't support
++       * unprivileged mounts with submounts.
++       */
++      if (mountpoint->d_sb->s_user_ns != &init_user_ns)
++              return ERR_PTR(-EPERM);
++
++      return vfs_kern_mount(type, MS_SUBMOUNT, name, data);
++}
++EXPORT_SYMBOL_GPL(vfs_submount);
++
+ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
+                                       int flag)
+ {
+@@ -2779,7 +2794,7 @@ long do_mount(const char *dev_name, cons
+       flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN |
+                  MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
+-                 MS_STRICTATIME | MS_NOREMOTELOCK);
++                 MS_STRICTATIME | MS_NOREMOTELOCK | MS_SUBMOUNT);
+       if (flags & MS_REMOUNT)
+               retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags,
+--- a/fs/nfs/namespace.c
++++ b/fs/nfs/namespace.c
+@@ -226,7 +226,7 @@ static struct vfsmount *nfs_do_clone_mou
+                                          const char *devname,
+                                          struct nfs_clone_mount *mountdata)
+ {
+-      return vfs_kern_mount(&nfs_xdev_fs_type, 0, devname, mountdata);
++      return vfs_submount(mountdata->dentry, &nfs_xdev_fs_type, devname, mountdata);
+ }
+ /**
+--- a/fs/nfs/nfs4namespace.c
++++ b/fs/nfs/nfs4namespace.c
+@@ -279,7 +279,7 @@ static struct vfsmount *try_location(str
+                               mountdata->hostname,
+                               mountdata->mnt_path);
+-              mnt = vfs_kern_mount(&nfs4_referral_fs_type, 0, page, mountdata);
++              mnt = vfs_submount(mountdata->dentry, &nfs4_referral_fs_type, page, mountdata);
+               if (!IS_ERR(mnt))
+                       break;
+       }
+--- a/fs/super.c
++++ b/fs/super.c
+@@ -470,7 +470,7 @@ struct super_block *sget_userns(struct f
+       struct super_block *old;
+       int err;
+-      if (!(flags & MS_KERNMOUNT) &&
++      if (!(flags & (MS_KERNMOUNT|MS_SUBMOUNT)) &&
+           !(type->fs_flags & FS_USERNS_MOUNT) &&
+           !capable(CAP_SYS_ADMIN))
+               return ERR_PTR(-EPERM);
+@@ -500,7 +500,7 @@ retry:
+       }
+       if (!s) {
+               spin_unlock(&sb_lock);
+-              s = alloc_super(type, flags, user_ns);
++              s = alloc_super(type, (flags & ~MS_SUBMOUNT), user_ns);
+               if (!s)
+                       return ERR_PTR(-ENOMEM);
+               goto retry;
+@@ -541,8 +541,15 @@ struct super_block *sget(struct file_sys
+ {
+       struct user_namespace *user_ns = current_user_ns();
++      /* We don't yet pass the user namespace of the parent
++       * mount through to here so always use &init_user_ns
++       * until that changes.
++       */
++      if (flags & MS_SUBMOUNT)
++              user_ns = &init_user_ns;
++
+       /* Ensure the requestor has permissions over the target filesystem */
+-      if (!(flags & MS_KERNMOUNT) && !ns_capable(user_ns, CAP_SYS_ADMIN))
++      if (!(flags & (MS_KERNMOUNT|MS_SUBMOUNT)) && !ns_capable(user_ns, CAP_SYS_ADMIN))
+               return ERR_PTR(-EPERM);
+       return sget_userns(type, test, set, flags, user_ns, data);
+--- a/include/linux/debugfs.h
++++ b/include/linux/debugfs.h
+@@ -96,9 +96,10 @@ struct dentry *debugfs_create_dir(const
+ struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent,
+                                     const char *dest);
++typedef struct vfsmount *(*debugfs_automount_t)(struct dentry *, void *);
+ struct dentry *debugfs_create_automount(const char *name,
+                                       struct dentry *parent,
+-                                      struct vfsmount *(*f)(void *),
++                                      debugfs_automount_t f,
+                                       void *data);
+ void debugfs_remove(struct dentry *dentry);
+--- a/include/linux/mount.h
++++ b/include/linux/mount.h
+@@ -90,6 +90,9 @@ struct file_system_type;
+ extern struct vfsmount *vfs_kern_mount(struct file_system_type *type,
+                                     int flags, const char *name,
+                                     void *data);
++extern struct vfsmount *vfs_submount(const struct dentry *mountpoint,
++                                   struct file_system_type *type,
++                                   const char *name, void *data);
+ extern void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list);
+ extern void mark_mounts_for_expiry(struct list_head *mounts);
+--- a/include/uapi/linux/fs.h
++++ b/include/uapi/linux/fs.h
+@@ -132,6 +132,7 @@ struct inodes_stat_t {
+ #define MS_LAZYTIME   (1<<25) /* Update the on-disk [acm]times lazily */
+ /* These sb flags are internal to the kernel */
++#define MS_SUBMOUNT     (1<<26)
+ #define MS_NOREMOTELOCK       (1<<27)
+ #define MS_NOSEC      (1<<28)
+ #define MS_BORN               (1<<29)
+--- a/kernel/trace/trace.c
++++ b/kernel/trace/trace.c
+@@ -7241,7 +7241,7 @@ init_tracer_tracefs(struct trace_array *
+       ftrace_init_tracefs(tr, d_tracer);
+ }
+-static struct vfsmount *trace_automount(void *ingore)
++static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
+ {
+       struct vfsmount *mnt;
+       struct file_system_type *type;
+@@ -7254,7 +7254,7 @@ static struct vfsmount *trace_automount(
+       type = get_fs_type("tracefs");
+       if (!type)
+               return NULL;
+-      mnt = vfs_kern_mount(type, 0, "tracefs", NULL);
++      mnt = vfs_submount(mntpt, type, "tracefs", NULL);
+       put_filesystem(type);
+       if (IS_ERR(mnt))
+               return NULL;
diff --git a/queue-4.9/ib-ipoib-add-destination-address-when-re-queue-packet.patch b/queue-4.9/ib-ipoib-add-destination-address-when-re-queue-packet.patch
new file mode 100644 (file)
index 0000000..06b488a
--- /dev/null
@@ -0,0 +1,129 @@
+From 2b0841766a898aba84630fb723989a77a9d3b4e6 Mon Sep 17 00:00:00 2001
+From: Erez Shitrit <erezsh@mellanox.com>
+Date: Wed, 1 Feb 2017 19:10:05 +0200
+Subject: IB/IPoIB: Add destination address when re-queue packet
+
+From: Erez Shitrit <erezsh@mellanox.com>
+
+commit 2b0841766a898aba84630fb723989a77a9d3b4e6 upstream.
+
+When sending packet to destination that was not resolved yet
+via path query, the driver keeps the skb and tries to re-send it
+again when the path is resolved.
+
+But when re-sending via dev_queue_xmit the kernel doesn't call
+to dev_hard_header, so IPoIB needs to keep 20 bytes in the skb
+and to put the destination address inside them.
+
+In that way the dev_start_xmit will have the correct destination,
+and the driver won't take the destination from the skb->data, while
+nothing exists there, which causes to packet be be dropped.
+
+The test flow is:
+1. Run the SM on remote node,
+2. Restart the driver.
+4. Ping some destination,
+3. Observe that first ICMP request will be dropped.
+
+Fixes: fc791b633515 ("IB/ipoib: move back IB LL address into the hard header")
+Signed-off-by: Erez Shitrit <erezsh@mellanox.com>
+Signed-off-by: Noa Osherovich <noaos@mellanox.com>
+Signed-off-by: Leon Romanovsky <leon@kernel.org>
+Tested-by: Yuval Shaia <yuval.shaia@oracle.com>
+Signed-off-by: Doug Ledford <dledford@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/infiniband/ulp/ipoib/ipoib_main.c |   30 +++++++++++++++++-------------
+ 1 file changed, 17 insertions(+), 13 deletions(-)
+
+--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
++++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
+@@ -701,6 +701,14 @@ int ipoib_check_sm_sendonly_fullmember_s
+       return ret;
+ }
++static void push_pseudo_header(struct sk_buff *skb, const char *daddr)
++{
++      struct ipoib_pseudo_header *phdr;
++
++      phdr = (struct ipoib_pseudo_header *)skb_push(skb, sizeof(*phdr));
++      memcpy(phdr->hwaddr, daddr, INFINIBAND_ALEN);
++}
++
+ void ipoib_flush_paths(struct net_device *dev)
+ {
+       struct ipoib_dev_priv *priv = netdev_priv(dev);
+@@ -925,8 +933,7 @@ static void neigh_add_path(struct sk_buf
+                       }
+                       if (skb_queue_len(&neigh->queue) <
+                           IPOIB_MAX_PATH_REC_QUEUE) {
+-                              /* put pseudoheader back on for next time */
+-                              skb_push(skb, IPOIB_PSEUDO_LEN);
++                              push_pseudo_header(skb, neigh->daddr);
+                               __skb_queue_tail(&neigh->queue, skb);
+                       } else {
+                               ipoib_warn(priv, "queue length limit %d. Packet drop.\n",
+@@ -944,10 +951,12 @@ static void neigh_add_path(struct sk_buf
+               if (!path->query && path_rec_start(dev, path))
+                       goto err_path;
+-              if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE)
++              if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
++                      push_pseudo_header(skb, neigh->daddr);
+                       __skb_queue_tail(&neigh->queue, skb);
+-              else
++              } else {
+                       goto err_drop;
++              }
+       }
+       spin_unlock_irqrestore(&priv->lock, flags);
+@@ -983,8 +992,7 @@ static void unicast_arp_send(struct sk_b
+               }
+               if (path) {
+                       if (skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
+-                              /* put pseudoheader back on for next time */
+-                              skb_push(skb, IPOIB_PSEUDO_LEN);
++                              push_pseudo_header(skb, phdr->hwaddr);
+                               __skb_queue_tail(&path->queue, skb);
+                       } else {
+                               ++dev->stats.tx_dropped;
+@@ -1016,8 +1024,7 @@ static void unicast_arp_send(struct sk_b
+               return;
+       } else if ((path->query || !path_rec_start(dev, path)) &&
+                  skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
+-              /* put pseudoheader back on for next time */
+-              skb_push(skb, IPOIB_PSEUDO_LEN);
++              push_pseudo_header(skb, phdr->hwaddr);
+               __skb_queue_tail(&path->queue, skb);
+       } else {
+               ++dev->stats.tx_dropped;
+@@ -1098,8 +1105,7 @@ send_using_neigh:
+       }
+       if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
+-              /* put pseudoheader back on for next time */
+-              skb_push(skb, sizeof(*phdr));
++              push_pseudo_header(skb, phdr->hwaddr);
+               spin_lock_irqsave(&priv->lock, flags);
+               __skb_queue_tail(&neigh->queue, skb);
+               spin_unlock_irqrestore(&priv->lock, flags);
+@@ -1131,7 +1137,6 @@ static int ipoib_hard_header(struct sk_b
+                            unsigned short type,
+                            const void *daddr, const void *saddr, unsigned len)
+ {
+-      struct ipoib_pseudo_header *phdr;
+       struct ipoib_header *header;
+       header = (struct ipoib_header *) skb_push(skb, sizeof *header);
+@@ -1144,8 +1149,7 @@ static int ipoib_hard_header(struct sk_b
+        * destination address into skb hard header so we can figure out where
+        * to send the packet later.
+        */
+-      phdr = (struct ipoib_pseudo_header *) skb_push(skb, sizeof(*phdr));
+-      memcpy(phdr->hwaddr, daddr, INFINIBAND_ALEN);
++      push_pseudo_header(skb, daddr);
+       return IPOIB_HARD_LEN;
+ }
diff --git a/queue-4.9/ib-ipoib-fix-deadlock-between-rmmod-and-set_mode.patch b/queue-4.9/ib-ipoib-fix-deadlock-between-rmmod-and-set_mode.patch
new file mode 100644 (file)
index 0000000..afb9df0
--- /dev/null
@@ -0,0 +1,112 @@
+From 0a0007f28304cb9fc87809c86abb80ec71317f20 Mon Sep 17 00:00:00 2001
+From: Feras Daoud <ferasda@mellanox.com>
+Date: Wed, 28 Dec 2016 14:47:23 +0200
+Subject: IB/ipoib: Fix deadlock between rmmod and set_mode
+
+From: Feras Daoud <ferasda@mellanox.com>
+
+commit 0a0007f28304cb9fc87809c86abb80ec71317f20 upstream.
+
+When calling set_mode from sys/fs, the call flow locks the sys/fs lock
+first and then tries to lock rtnl_lock (when calling ipoib_set_mod).
+On the other hand, the rmmod call flow takes the rtnl_lock first
+(when calling unregister_netdev) and then tries to take the sys/fs
+lock. Deadlock a->b, b->a.
+
+The problem starts when ipoib_set_mod frees it's rtnl_lck and tries
+to get it after that.
+
+    set_mod:
+    [<ffffffff8104f2bd>] ? check_preempt_curr+0x6d/0x90
+    [<ffffffff814fee8e>] __mutex_lock_slowpath+0x13e/0x180
+    [<ffffffff81448655>] ? __rtnl_unlock+0x15/0x20
+    [<ffffffff814fed2b>] mutex_lock+0x2b/0x50
+    [<ffffffff81448675>] rtnl_lock+0x15/0x20
+    [<ffffffffa02ad807>] ipoib_set_mode+0x97/0x160 [ib_ipoib]
+    [<ffffffffa02b5f5b>] set_mode+0x3b/0x80 [ib_ipoib]
+    [<ffffffff8134b840>] dev_attr_store+0x20/0x30
+    [<ffffffff811f0fe5>] sysfs_write_file+0xe5/0x170
+    [<ffffffff8117b068>] vfs_write+0xb8/0x1a0
+    [<ffffffff8117ba81>] sys_write+0x51/0x90
+    [<ffffffff8100b0f2>] system_call_fastpath+0x16/0x1b
+
+    rmmod:
+    [<ffffffff81279ffc>] ? put_dec+0x10c/0x110
+    [<ffffffff8127a2ee>] ? number+0x2ee/0x320
+    [<ffffffff814fe6a5>] schedule_timeout+0x215/0x2e0
+    [<ffffffff8127cc04>] ? vsnprintf+0x484/0x5f0
+    [<ffffffff8127b550>] ? string+0x40/0x100
+    [<ffffffff814fe323>] wait_for_common+0x123/0x180
+    [<ffffffff81060250>] ? default_wake_function+0x0/0x20
+    [<ffffffff8119661e>] ? ifind_fast+0x5e/0xb0
+    [<ffffffff814fe43d>] wait_for_completion+0x1d/0x20
+    [<ffffffff811f2e68>] sysfs_addrm_finish+0x228/0x270
+    [<ffffffff811f2fb3>] sysfs_remove_dir+0xa3/0xf0
+    [<ffffffff81273f66>] kobject_del+0x16/0x40
+    [<ffffffff8134cd14>] device_del+0x184/0x1e0
+    [<ffffffff8144e59b>] netdev_unregister_kobject+0xab/0xc0
+    [<ffffffff8143c05e>] rollback_registered+0xae/0x130
+    [<ffffffff8143c102>] unregister_netdevice+0x22/0x70
+    [<ffffffff8143c16e>] unregister_netdev+0x1e/0x30
+    [<ffffffffa02a91b0>] ipoib_remove_one+0xe0/0x120 [ib_ipoib]
+    [<ffffffffa01ed95f>] ib_unregister_device+0x4f/0x100 [ib_core]
+    [<ffffffffa021f5e1>] mlx4_ib_remove+0x41/0x180 [mlx4_ib]
+    [<ffffffffa01ab771>] mlx4_remove_device+0x71/0x90 [mlx4_core]
+
+Fixes: 862096a8bbf8 ("IB/ipoib: Add more rtnl_link_ops callbacks")
+Cc: Or Gerlitz <ogerlitz@mellanox.com>
+Signed-off-by: Feras Daoud <ferasda@mellanox.com>
+Signed-off-by: Erez Shitrit <erezsh@mellanox.com>
+Signed-off-by: Leon Romanovsky <leon@kernel.org>
+Signed-off-by: Doug Ledford <dledford@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/infiniband/ulp/ipoib/ipoib_cm.c   |   12 +++++++-----
+ drivers/infiniband/ulp/ipoib/ipoib_main.c |    6 ++----
+ 2 files changed, 9 insertions(+), 9 deletions(-)
+
+--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
++++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+@@ -1511,12 +1511,14 @@ static ssize_t set_mode(struct device *d
+       ret = ipoib_set_mode(dev, buf);
+-      rtnl_unlock();
++      /* The assumption is that the function ipoib_set_mode returned
++       * with the rtnl held by it, if not the value -EBUSY returned,
++       * then no need to rtnl_unlock
++       */
++      if (ret != -EBUSY)
++              rtnl_unlock();
+-      if (!ret)
+-              return count;
+-
+-      return ret;
++      return (!ret || ret == -EBUSY) ? count : ret;
+ }
+ static DEVICE_ATTR(mode, S_IWUSR | S_IRUGO, show_mode, set_mode);
+--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
++++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
+@@ -468,8 +468,7 @@ int ipoib_set_mode(struct net_device *de
+               priv->tx_wr.wr.send_flags &= ~IB_SEND_IP_CSUM;
+               ipoib_flush_paths(dev);
+-              rtnl_lock();
+-              return 0;
++              return (!rtnl_trylock()) ? -EBUSY : 0;
+       }
+       if (!strcmp(buf, "datagram\n")) {
+@@ -478,8 +477,7 @@ int ipoib_set_mode(struct net_device *de
+               dev_set_mtu(dev, min(priv->mcast_mtu, dev->mtu));
+               rtnl_unlock();
+               ipoib_flush_paths(dev);
+-              rtnl_lock();
+-              return 0;
++              return (!rtnl_trylock()) ? -EBUSY : 0;
+       }
+       return -EINVAL;
diff --git a/queue-4.9/ib-mlx5-fix-out-of-bound-access.patch b/queue-4.9/ib-mlx5-fix-out-of-bound-access.patch
new file mode 100644 (file)
index 0000000..a073327
--- /dev/null
@@ -0,0 +1,65 @@
+From 0fd27a88c2e4f548937fd7d93fc6e65c4ad7c278 Mon Sep 17 00:00:00 2001
+From: Leon Romanovsky <leonro@mellanox.com>
+Date: Wed, 18 Jan 2017 14:10:30 +0200
+Subject: IB/mlx5: Fix out-of-bound access
+
+From: Leon Romanovsky <leonro@mellanox.com>
+
+commit 0fd27a88c2e4f548937fd7d93fc6e65c4ad7c278 upstream.
+
+When we initialize buffer to create SRQ in kernel,
+the number of pages was less than actually used in
+following mlx5_fill_page_array().
+
+Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters")
+Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
+Reviewed-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Leon Romanovsky <leon@kernel.org>
+Signed-off-by: Doug Ledford <dledford@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/infiniband/hw/mlx5/srq.c |   11 +++--------
+ 1 file changed, 3 insertions(+), 8 deletions(-)
+
+--- a/drivers/infiniband/hw/mlx5/srq.c
++++ b/drivers/infiniband/hw/mlx5/srq.c
+@@ -165,8 +165,6 @@ static int create_srq_kernel(struct mlx5
+       int err;
+       int i;
+       struct mlx5_wqe_srq_next_seg *next;
+-      int page_shift;
+-      int npages;
+       err = mlx5_db_alloc(dev->mdev, &srq->db);
+       if (err) {
+@@ -179,7 +177,6 @@ static int create_srq_kernel(struct mlx5
+               err = -ENOMEM;
+               goto err_db;
+       }
+-      page_shift = srq->buf.page_shift;
+       srq->head    = 0;
+       srq->tail    = srq->msrq.max - 1;
+@@ -191,10 +188,8 @@ static int create_srq_kernel(struct mlx5
+                       cpu_to_be16((i + 1) & (srq->msrq.max - 1));
+       }
+-      npages = DIV_ROUND_UP(srq->buf.npages, 1 << (page_shift - PAGE_SHIFT));
+-      mlx5_ib_dbg(dev, "buf_size %d, page_shift %d, npages %d, calc npages %d\n",
+-                  buf_size, page_shift, srq->buf.npages, npages);
+-      in->pas = mlx5_vzalloc(sizeof(*in->pas) * npages);
++      mlx5_ib_dbg(dev, "srq->buf.page_shift = %d\n", srq->buf.page_shift);
++      in->pas = mlx5_vzalloc(sizeof(*in->pas) * srq->buf.npages);
+       if (!in->pas) {
+               err = -ENOMEM;
+               goto err_buf;
+@@ -210,7 +205,7 @@ static int create_srq_kernel(struct mlx5
+       }
+       srq->wq_sig = !!srq_signature;
+-      in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
++      in->log_page_size = srq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT;
+       if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 &&
+           in->type == IB_SRQT_XRC)
+               in->user_index = MLX5_IB_DEFAULT_UIDX;
diff --git a/queue-4.9/ib-srp-avoid-that-duplicate-responses-trigger-a-kernel-bug.patch b/queue-4.9/ib-srp-avoid-that-duplicate-responses-trigger-a-kernel-bug.patch
new file mode 100644 (file)
index 0000000..19f16da
--- /dev/null
@@ -0,0 +1,58 @@
+From 6cb72bc1b40bb2c1750ee7a5ebade93bed49a5fb Mon Sep 17 00:00:00 2001
+From: Bart Van Assche <bart.vanassche@sandisk.com>
+Date: Tue, 14 Feb 2017 10:56:30 -0800
+Subject: IB/srp: Avoid that duplicate responses trigger a kernel bug
+
+From: Bart Van Assche <bart.vanassche@sandisk.com>
+
+commit 6cb72bc1b40bb2c1750ee7a5ebade93bed49a5fb upstream.
+
+After srp_process_rsp() returns there is a short time during which
+the scsi_host_find_tag() call will return a pointer to the SCSI
+command that is being completed. If during that time a duplicate
+response is received, avoid that the following call stack appears:
+
+BUG: unable to handle kernel NULL pointer dereference at           (null)
+IP: srp_recv_done+0x450/0x6b0 [ib_srp]
+Oops: 0000 [#1] SMP
+CPU: 10 PID: 0 Comm: swapper/10 Not tainted 4.10.0-rc7-dbg+ #1
+Call Trace:
+ <IRQ>
+ __ib_process_cq+0x4b/0xd0 [ib_core]
+ ib_poll_handler+0x1d/0x70 [ib_core]
+ irq_poll_softirq+0xba/0x120
+ __do_softirq+0xba/0x4c0
+ irq_exit+0xbe/0xd0
+ smp_apic_timer_interrupt+0x38/0x50
+ apic_timer_interrupt+0x90/0xa0
+ </IRQ>
+RIP: srp_recv_done+0x450/0x6b0 [ib_srp] RSP: ffff88046f483e20
+
+Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
+Cc: Israel Rukshin <israelr@mellanox.com>
+Cc: Max Gurtovoy <maxg@mellanox.com>
+Cc: Laurence Oberman <loberman@redhat.com>
+Cc: Steve Feeley <Steve.Feeley@sandisk.com>
+Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
+Signed-off-by: Doug Ledford <dledford@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/infiniband/ulp/srp/ib_srp.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/drivers/infiniband/ulp/srp/ib_srp.c
++++ b/drivers/infiniband/ulp/srp/ib_srp.c
+@@ -1880,9 +1880,11 @@ static void srp_process_rsp(struct srp_r
+               complete(&ch->tsk_mgmt_done);
+       } else {
+               scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag);
+-              if (scmnd) {
++              if (scmnd && scmnd->host_scribble) {
+                       req = (void *)scmnd->host_scribble;
+                       scmnd = srp_claim_req(ch, req, NULL, scmnd);
++              } else {
++                      scmnd = NULL;
+               }
+               if (!scmnd) {
+                       shost_printk(KERN_ERR, target->scsi_host,
diff --git a/queue-4.9/ib-srp-avoid-using-ib_mr_type_sg_gaps.patch b/queue-4.9/ib-srp-avoid-using-ib_mr_type_sg_gaps.patch
new file mode 100644 (file)
index 0000000..a035fa3
--- /dev/null
@@ -0,0 +1,83 @@
+From d6c58dc40fec35ff6cdb350b53bce0fcf9143709 Mon Sep 17 00:00:00 2001
+From: Bart Van Assche <bart.vanassche@sandisk.com>
+Date: Tue, 14 Feb 2017 10:56:29 -0800
+Subject: IB/SRP: Avoid using IB_MR_TYPE_SG_GAPS
+
+From: Bart Van Assche <bart.vanassche@sandisk.com>
+
+commit d6c58dc40fec35ff6cdb350b53bce0fcf9143709 upstream.
+
+Tests have shown that the following error message is reported when
+using SG-GAPS registration with an mlx5 adapter:
+
+scsi host1: ib_srp: failed RECV status WR flushed (5) for CQE ffff880bd4270eb0
+00000000 00000000 00000000 00000000
+00000000 00000000 00000000 00000000
+00000000 00000000 00000000 00000000
+00000000 0f007806 2500002a ad9fafd1
+scsi host1: ib_srp: reconnect succeeded
+mlx5_0:dump_cqe:262:(pid 7369): dump error cqe
+00000000 00000000 00000000 00000000
+00000000 00000000 00000000 00000000
+00000000 00000000 00000000 00000000
+00000000 0f007806 25000032 00105dd0
+scsi host1: ib_srp: failed FAST REG status memory management operation error (6) for CQE ffff880b92860138
+
+Hence avoid using SG-GAPS memory registrations. Additionally,
+always configure the blk_queue_virt_boundary() to avoid to trigger
+a mapping failure when using adapters that support SG-GAPS (e.g.
+mlx5).
+
+Fixes: commit ad8e66b4a801 ("IB/srp: fix mr allocation when the device supports sg gaps")
+Fixes: commit 509c5f33f4f6 ("IB/srp: Prevent mapping failures")
+Reported-by: Laurence Oberman <loberman@redhat.com>
+Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
+Cc: Israel Rukshin <israelr@mellanox.com>
+Cc: Max Gurtovoy <maxg@mellanox.com>
+Cc: Leon Romanovsky <leonro@mellanox.com>
+Cc: Mark Bloch <markb@mellanox.com>
+Cc: Yuval Shaia <yuval.shaia@oracle.com>
+Signed-off-by: Doug Ledford <dledford@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/infiniband/ulp/srp/ib_srp.c |   12 +++---------
+ 1 file changed, 3 insertions(+), 9 deletions(-)
+
+--- a/drivers/infiniband/ulp/srp/ib_srp.c
++++ b/drivers/infiniband/ulp/srp/ib_srp.c
+@@ -366,7 +366,6 @@ static struct srp_fr_pool *srp_create_fr
+       struct srp_fr_desc *d;
+       struct ib_mr *mr;
+       int i, ret = -EINVAL;
+-      enum ib_mr_type mr_type;
+       if (pool_size <= 0)
+               goto err;
+@@ -380,13 +379,9 @@ static struct srp_fr_pool *srp_create_fr
+       spin_lock_init(&pool->lock);
+       INIT_LIST_HEAD(&pool->free_list);
+-      if (device->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)
+-              mr_type = IB_MR_TYPE_SG_GAPS;
+-      else
+-              mr_type = IB_MR_TYPE_MEM_REG;
+-
+       for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
+-              mr = ib_alloc_mr(pd, mr_type, max_page_list_len);
++              mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG,
++                               max_page_list_len);
+               if (IS_ERR(mr)) {
+                       ret = PTR_ERR(mr);
+                       goto destroy_pool;
+@@ -2652,9 +2647,8 @@ static int srp_slave_alloc(struct scsi_d
+       struct Scsi_Host *shost = sdev->host;
+       struct srp_target_port *target = host_to_target(shost);
+       struct srp_device *srp_dev = target->srp_host->srp_dev;
+-      struct ib_device *ibdev = srp_dev->dev;
+-      if (!(ibdev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG))
++      if (true)
+               blk_queue_virt_boundary(sdev->request_queue,
+                                       ~srp_dev->mr_page_mask);
diff --git a/queue-4.9/ib-srp-fix-race-conditions-related-to-task-management.patch b/queue-4.9/ib-srp-fix-race-conditions-related-to-task-management.patch
new file mode 100644 (file)
index 0000000..26ee552
--- /dev/null
@@ -0,0 +1,171 @@
+From 0a6fdbdeb1c25e31763c1fb333fa2723a7d2aba6 Mon Sep 17 00:00:00 2001
+From: Bart Van Assche <bart.vanassche@sandisk.com>
+Date: Tue, 14 Feb 2017 10:56:31 -0800
+Subject: IB/srp: Fix race conditions related to task management
+
+From: Bart Van Assche <bart.vanassche@sandisk.com>
+
+commit 0a6fdbdeb1c25e31763c1fb333fa2723a7d2aba6 upstream.
+
+Avoid that srp_process_rsp() overwrites the status information
+in ch if the SRP target response timed out and processing of
+another task management function has already started. Avoid that
+issuing multiple task management functions concurrently triggers
+list corruption. This patch prevents that the following stack
+trace appears in the system log:
+
+WARNING: CPU: 8 PID: 9269 at lib/list_debug.c:52 __list_del_entry_valid+0xbc/0xc0
+list_del corruption. prev->next should be ffffc90004bb7b00, but was ffff8804052ecc68
+CPU: 8 PID: 9269 Comm: sg_reset Tainted: G        W       4.10.0-rc7-dbg+ #3
+Call Trace:
+ dump_stack+0x68/0x93
+ __warn+0xc6/0xe0
+ warn_slowpath_fmt+0x4a/0x50
+ __list_del_entry_valid+0xbc/0xc0
+ wait_for_completion_timeout+0x12e/0x170
+ srp_send_tsk_mgmt+0x1ef/0x2d0 [ib_srp]
+ srp_reset_device+0x5b/0x110 [ib_srp]
+ scsi_ioctl_reset+0x1c7/0x290
+ scsi_ioctl+0x12a/0x420
+ sd_ioctl+0x9d/0x100
+ blkdev_ioctl+0x51e/0x9f0
+ block_ioctl+0x38/0x40
+ do_vfs_ioctl+0x8f/0x700
+ SyS_ioctl+0x3c/0x70
+ entry_SYSCALL_64_fastpath+0x18/0xad
+
+Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
+Cc: Israel Rukshin <israelr@mellanox.com>
+Cc: Max Gurtovoy <maxg@mellanox.com>
+Cc: Laurence Oberman <loberman@redhat.com>
+Cc: Steve Feeley <Steve.Feeley@sandisk.com>
+Signed-off-by: Doug Ledford <dledford@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/infiniband/ulp/srp/ib_srp.c |   45 +++++++++++++++++++++++-------------
+ drivers/infiniband/ulp/srp/ib_srp.h |    1 
+ 2 files changed, 30 insertions(+), 16 deletions(-)
+
+--- a/drivers/infiniband/ulp/srp/ib_srp.c
++++ b/drivers/infiniband/ulp/srp/ib_srp.c
+@@ -1872,12 +1872,17 @@ static void srp_process_rsp(struct srp_r
+       if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) {
+               spin_lock_irqsave(&ch->lock, flags);
+               ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
++              if (rsp->tag == ch->tsk_mgmt_tag) {
++                      ch->tsk_mgmt_status = -1;
++                      if (be32_to_cpu(rsp->resp_data_len) >= 4)
++                              ch->tsk_mgmt_status = rsp->data[3];
++                      complete(&ch->tsk_mgmt_done);
++              } else {
++                      shost_printk(KERN_ERR, target->scsi_host,
++                                   "Received tsk mgmt response too late for tag %#llx\n",
++                                   rsp->tag);
++              }
+               spin_unlock_irqrestore(&ch->lock, flags);
+-
+-              ch->tsk_mgmt_status = -1;
+-              if (be32_to_cpu(rsp->resp_data_len) >= 4)
+-                      ch->tsk_mgmt_status = rsp->data[3];
+-              complete(&ch->tsk_mgmt_done);
+       } else {
+               scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag);
+               if (scmnd && scmnd->host_scribble) {
+@@ -2516,19 +2521,18 @@ srp_change_queue_depth(struct scsi_devic
+ }
+ static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
+-                           u8 func)
++                           u8 func, u8 *status)
+ {
+       struct srp_target_port *target = ch->target;
+       struct srp_rport *rport = target->rport;
+       struct ib_device *dev = target->srp_host->srp_dev->dev;
+       struct srp_iu *iu;
+       struct srp_tsk_mgmt *tsk_mgmt;
++      int res;
+       if (!ch->connected || target->qp_in_error)
+               return -1;
+-      init_completion(&ch->tsk_mgmt_done);
+-
+       /*
+        * Lock the rport mutex to avoid that srp_create_ch_ib() is
+        * invoked while a task management function is being sent.
+@@ -2551,10 +2555,16 @@ static int srp_send_tsk_mgmt(struct srp_
+       tsk_mgmt->opcode        = SRP_TSK_MGMT;
+       int_to_scsilun(lun, &tsk_mgmt->lun);
+-      tsk_mgmt->tag           = req_tag | SRP_TAG_TSK_MGMT;
+       tsk_mgmt->tsk_mgmt_func = func;
+       tsk_mgmt->task_tag      = req_tag;
++      spin_lock_irq(&ch->lock);
++      ch->tsk_mgmt_tag = (ch->tsk_mgmt_tag + 1) | SRP_TAG_TSK_MGMT;
++      tsk_mgmt->tag = ch->tsk_mgmt_tag;
++      spin_unlock_irq(&ch->lock);
++
++      init_completion(&ch->tsk_mgmt_done);
++
+       ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt,
+                                     DMA_TO_DEVICE);
+       if (srp_post_send(ch, iu, sizeof(*tsk_mgmt))) {
+@@ -2563,13 +2573,15 @@ static int srp_send_tsk_mgmt(struct srp_
+               return -1;
+       }
++      res = wait_for_completion_timeout(&ch->tsk_mgmt_done,
++                                      msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS));
++      if (res > 0 && status)
++              *status = ch->tsk_mgmt_status;
+       mutex_unlock(&rport->mutex);
+-      if (!wait_for_completion_timeout(&ch->tsk_mgmt_done,
+-                                       msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS)))
+-              return -1;
++      WARN_ON_ONCE(res < 0);
+-      return 0;
++      return res > 0 ? 0 : -1;
+ }
+ static int srp_abort(struct scsi_cmnd *scmnd)
+@@ -2595,7 +2607,7 @@ static int srp_abort(struct scsi_cmnd *s
+       shost_printk(KERN_ERR, target->scsi_host,
+                    "Sending SRP abort for tag %#x\n", tag);
+       if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun,
+-                            SRP_TSK_ABORT_TASK) == 0)
++                            SRP_TSK_ABORT_TASK, NULL) == 0)
+               ret = SUCCESS;
+       else if (target->rport->state == SRP_RPORT_LOST)
+               ret = FAST_IO_FAIL;
+@@ -2613,14 +2625,15 @@ static int srp_reset_device(struct scsi_
+       struct srp_target_port *target = host_to_target(scmnd->device->host);
+       struct srp_rdma_ch *ch;
+       int i;
++      u8 status;
+       shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n");
+       ch = &target->ch[0];
+       if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun,
+-                            SRP_TSK_LUN_RESET))
++                            SRP_TSK_LUN_RESET, &status))
+               return FAILED;
+-      if (ch->tsk_mgmt_status)
++      if (status)
+               return FAILED;
+       for (i = 0; i < target->ch_count; i++) {
+--- a/drivers/infiniband/ulp/srp/ib_srp.h
++++ b/drivers/infiniband/ulp/srp/ib_srp.h
+@@ -163,6 +163,7 @@ struct srp_rdma_ch {
+       int                     max_ti_iu_len;
+       int                     comp_vector;
++      u64                     tsk_mgmt_tag;
+       struct completion       tsk_mgmt_done;
+       u8                      tsk_mgmt_status;
+       bool                    connected;
diff --git a/queue-4.9/ktest-fix-child-exit-code-processing.patch b/queue-4.9/ktest-fix-child-exit-code-processing.patch
new file mode 100644 (file)
index 0000000..862db85
--- /dev/null
@@ -0,0 +1,31 @@
+From 32677207dcc5e594254b7fb4fb2352b1755b1d5b Mon Sep 17 00:00:00 2001
+From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
+Date: Tue, 7 Feb 2017 12:05:25 -0500
+Subject: ktest: Fix child exit code processing
+
+From: Steven Rostedt (VMware) <rostedt@goodmis.org>
+
+commit 32677207dcc5e594254b7fb4fb2352b1755b1d5b upstream.
+
+The child_exit errno needs to be shifted by 8 bits to compare against the
+return values for the bisect variables.
+
+Fixes: c5dacb88f0a64 ("ktest: Allow overriding bisect test results")
+Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ tools/testing/ktest/ktest.pl |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/tools/testing/ktest/ktest.pl
++++ b/tools/testing/ktest/ktest.pl
+@@ -2629,7 +2629,7 @@ sub do_run_test {
+     }
+     waitpid $child_pid, 0;
+-    $child_exit = $?;
++    $child_exit = $? >> 8;
+     my $end_time = time;
+     $test_time = $end_time - $start_time;
diff --git a/queue-4.9/kvm-s390-disable-dirty-log-retrieval-for-ucontrol-guests.patch b/queue-4.9/kvm-s390-disable-dirty-log-retrieval-for-ucontrol-guests.patch
new file mode 100644 (file)
index 0000000..ff19773
--- /dev/null
@@ -0,0 +1,42 @@
+From e1e8a9624f7ba8ead4f056ff558ed070e86fa747 Mon Sep 17 00:00:00 2001
+From: Janosch Frank <frankja@linux.vnet.ibm.com>
+Date: Thu, 2 Feb 2017 16:39:31 +0100
+Subject: KVM: s390: Disable dirty log retrieval for UCONTROL guests
+
+From: Janosch Frank <frankja@linux.vnet.ibm.com>
+
+commit e1e8a9624f7ba8ead4f056ff558ed070e86fa747 upstream.
+
+User controlled KVM guests do not support the dirty log, as they have
+no single gmap that we can check for changes.
+
+As they have no single gmap, kvm->arch.gmap is NULL and all further
+referencing to it for dirty checking will result in a NULL
+dereference.
+
+Let's return -EINVAL if a caller tries to sync dirty logs for a
+UCONTROL guest.
+
+Fixes: 15f36eb ("KVM: s390: Add proper dirty bitmap support to S390 kvm.")
+Signed-off-by: Janosch Frank <frankja@linux.vnet.ibm.com>
+Reported-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
+Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/s390/kvm/kvm-s390.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/arch/s390/kvm/kvm-s390.c
++++ b/arch/s390/kvm/kvm-s390.c
+@@ -442,6 +442,9 @@ int kvm_vm_ioctl_get_dirty_log(struct kv
+       struct kvm_memory_slot *memslot;
+       int is_dirty = 0;
++      if (kvm_is_ucontrol(kvm))
++              return -EINVAL;
++
+       mutex_lock(&kvm->slots_lock);
+       r = -EINVAL;
diff --git a/queue-4.9/kvm-vmx-use-correct-vmcs_read-write-for-guest-segment-selector-base.patch b/queue-4.9/kvm-vmx-use-correct-vmcs_read-write-for-guest-segment-selector-base.patch
new file mode 100644 (file)
index 0000000..bbb0144
--- /dev/null
@@ -0,0 +1,42 @@
+From 96794e4ed4d758272c486e1529e431efb7045265 Mon Sep 17 00:00:00 2001
+From: Chao Peng <chao.p.peng@linux.intel.com>
+Date: Tue, 21 Feb 2017 03:50:01 -0500
+Subject: KVM: VMX: use correct vmcs_read/write for guest segment selector/base
+
+From: Chao Peng <chao.p.peng@linux.intel.com>
+
+commit 96794e4ed4d758272c486e1529e431efb7045265 upstream.
+
+Guest segment selector is 16 bit field and guest segment base is natural
+width field. Fix two incorrect invocations accordingly.
+
+Without this patch, build fails when aggressive inlining is used with ICC.
+
+Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kvm/vmx.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -3693,7 +3693,7 @@ static void fix_rmode_seg(int seg, struc
+       }
+       vmcs_write16(sf->selector, var.selector);
+-      vmcs_write32(sf->base, var.base);
++      vmcs_writel(sf->base, var.base);
+       vmcs_write32(sf->limit, var.limit);
+       vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(&var));
+ }
+@@ -8202,7 +8202,7 @@ static void kvm_flush_pml_buffers(struct
+ static void vmx_dump_sel(char *name, uint32_t sel)
+ {
+       pr_err("%s sel=0x%04x, attr=0x%05x, limit=0x%08x, base=0x%016lx\n",
+-             name, vmcs_read32(sel),
++             name, vmcs_read16(sel),
+              vmcs_read32(sel + GUEST_ES_AR_BYTES - GUEST_ES_SELECTOR),
+              vmcs_read32(sel + GUEST_ES_LIMIT - GUEST_ES_SELECTOR),
+              vmcs_readl(sel + GUEST_ES_BASE - GUEST_ES_SELECTOR));
diff --git a/queue-4.9/mac80211-don-t-handle-filtered-frames-within-a-ba-session.patch b/queue-4.9/mac80211-don-t-handle-filtered-frames-within-a-ba-session.patch
new file mode 100644 (file)
index 0000000..b0e431f
--- /dev/null
@@ -0,0 +1,37 @@
+From 890030d3c425f49abaa4acf60e20f288b599f980 Mon Sep 17 00:00:00 2001
+From: Felix Fietkau <nbd@nbd.name>
+Date: Wed, 22 Feb 2017 16:16:07 +0100
+Subject: mac80211: don't handle filtered frames within a BA session
+
+From: Felix Fietkau <nbd@nbd.name>
+
+commit 890030d3c425f49abaa4acf60e20f288b599f980 upstream.
+
+When running a BA session, the driver (or the hardware) already takes
+care of retransmitting failed frames, since it has to keep the receiver
+reorder window in sync.
+
+Adding another layer of retransmit around that does not improve
+anything. In fact, it can only lead to some strong reordering with huge
+latency.
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/mac80211/status.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/mac80211/status.c
++++ b/net/mac80211/status.c
+@@ -51,7 +51,8 @@ static void ieee80211_handle_filtered_fr
+       struct ieee80211_hdr *hdr = (void *)skb->data;
+       int ac;
+-      if (info->flags & IEEE80211_TX_CTL_NO_PS_BUFFER) {
++      if (info->flags & (IEEE80211_TX_CTL_NO_PS_BUFFER |
++                         IEEE80211_TX_CTL_AMPDU)) {
+               ieee80211_free_txskb(&local->hw, skb);
+               return;
+       }
diff --git a/queue-4.9/mac80211-don-t-reorder-frames-with-sn-smaller-than-ssn.patch b/queue-4.9/mac80211-don-t-reorder-frames-with-sn-smaller-than-ssn.patch
new file mode 100644 (file)
index 0000000..f12f89f
--- /dev/null
@@ -0,0 +1,87 @@
+From b7540d8f25c8034de7e4163fc23ac457bf057731 Mon Sep 17 00:00:00 2001
+From: Sara Sharon <sara.sharon@intel.com>
+Date: Mon, 6 Feb 2017 15:28:42 +0200
+Subject: mac80211: don't reorder frames with SN smaller than SSN
+
+From: Sara Sharon <sara.sharon@intel.com>
+
+commit b7540d8f25c8034de7e4163fc23ac457bf057731 upstream.
+
+When RX aggregation starts, transmitter may continue send frames
+with SN smaller than SSN until the AddBA response is received.
+However, the reorder buffer is already initialized at this point,
+which will cause the drop of such frames as duplicates since the
+head SN of the reorder buffer is set to the SSN, which is bigger.
+
+Signed-off-by: Sara Sharon <sara.sharon@intel.com>
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/mac80211/agg-rx.c   |    1 +
+ net/mac80211/rx.c       |   14 +++++++++++++-
+ net/mac80211/sta_info.h |    6 ++++--
+ 3 files changed, 18 insertions(+), 3 deletions(-)
+
+--- a/net/mac80211/agg-rx.c
++++ b/net/mac80211/agg-rx.c
+@@ -398,6 +398,7 @@ void __ieee80211_start_rx_ba_session(str
+       tid_agg_rx->timeout = timeout;
+       tid_agg_rx->stored_mpdu_num = 0;
+       tid_agg_rx->auto_seq = auto_seq;
++      tid_agg_rx->started = false;
+       tid_agg_rx->reorder_buf_filtered = 0;
+       status = WLAN_STATUS_SUCCESS;
+--- a/net/mac80211/rx.c
++++ b/net/mac80211/rx.c
+@@ -4,7 +4,7 @@
+  * Copyright 2006-2007        Jiri Benc <jbenc@suse.cz>
+  * Copyright 2007-2010        Johannes Berg <johannes@sipsolutions.net>
+  * Copyright 2013-2014  Intel Mobile Communications GmbH
+- * Copyright(c) 2015 - 2016 Intel Deutschland GmbH
++ * Copyright(c) 2015 - 2017 Intel Deutschland GmbH
+  *
+  * This program is free software; you can redistribute it and/or modify
+  * it under the terms of the GNU General Public License version 2 as
+@@ -1034,6 +1034,18 @@ static bool ieee80211_sta_manage_reorder
+       buf_size = tid_agg_rx->buf_size;
+       head_seq_num = tid_agg_rx->head_seq_num;
++      /*
++       * If the current MPDU's SN is smaller than the SSN, it shouldn't
++       * be reordered.
++       */
++      if (unlikely(!tid_agg_rx->started)) {
++              if (ieee80211_sn_less(mpdu_seq_num, head_seq_num)) {
++                      ret = false;
++                      goto out;
++              }
++              tid_agg_rx->started = true;
++      }
++
+       /* frame with out of date sequence number */
+       if (ieee80211_sn_less(mpdu_seq_num, head_seq_num)) {
+               dev_kfree_skb(skb);
+--- a/net/mac80211/sta_info.h
++++ b/net/mac80211/sta_info.h
+@@ -189,6 +189,7 @@ struct tid_ampdu_tx {
+  * @auto_seq: used for offloaded BA sessions to automatically pick head_seq_and
+  *    and ssn.
+  * @removed: this session is removed (but might have been found due to RCU)
++ * @started: this session has started (head ssn or higher was received)
+  *
+  * This structure's lifetime is managed by RCU, assignments to
+  * the array holding it must hold the aggregation mutex.
+@@ -212,8 +213,9 @@ struct tid_ampdu_rx {
+       u16 ssn;
+       u16 buf_size;
+       u16 timeout;
+-      bool auto_seq;
+-      bool removed;
++      u8 auto_seq:1,
++         removed:1,
++         started:1;
+ };
+ /**
diff --git a/queue-4.9/mac80211-flush-delayed-work-when-entering-suspend.patch b/queue-4.9/mac80211-flush-delayed-work-when-entering-suspend.patch
new file mode 100644 (file)
index 0000000..36b72d7
--- /dev/null
@@ -0,0 +1,38 @@
+From a9e9200d8661c1a0be8c39f93deb383dc940de35 Mon Sep 17 00:00:00 2001
+From: Matt Chen <matt.chen@intel.com>
+Date: Sun, 22 Jan 2017 02:16:58 +0800
+Subject: mac80211: flush delayed work when entering suspend
+
+From: Matt Chen <matt.chen@intel.com>
+
+commit a9e9200d8661c1a0be8c39f93deb383dc940de35 upstream.
+
+The issue was found when entering suspend and resume.
+It triggers a warning in:
+mac80211/key.c: ieee80211_enable_keys()
+...
+WARN_ON_ONCE(sdata->crypto_tx_tailroom_needed_cnt ||
+             sdata->crypto_tx_tailroom_pending_dec);
+...
+
+It points out sdata->crypto_tx_tailroom_pending_dec isn't cleaned up successfully
+in a delayed_work during suspend. Add a flush_delayed_work to fix it.
+
+Signed-off-by: Matt Chen <matt.chen@intel.com>
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/mac80211/pm.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/mac80211/pm.c
++++ b/net/mac80211/pm.c
+@@ -168,6 +168,7 @@ int __ieee80211_suspend(struct ieee80211
+                       break;
+               }
++              flush_delayed_work(&sdata->dec_tailroom_needed_wk);
+               drv_remove_interface(local, sdata);
+       }
diff --git a/queue-4.9/mac80211-use-driver-indicated-transmitter-sta-only-for-data-frames.patch b/queue-4.9/mac80211-use-driver-indicated-transmitter-sta-only-for-data-frames.patch
new file mode 100644 (file)
index 0000000..4d9fd34
--- /dev/null
@@ -0,0 +1,70 @@
+From 19d19e960598161be92a7e4828eb7706c6410ce6 Mon Sep 17 00:00:00 2001
+From: Johannes Berg <johannes.berg@intel.com>
+Date: Mon, 27 Feb 2017 09:38:11 +0100
+Subject: mac80211: use driver-indicated transmitter STA only for data frames
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Johannes Berg <johannes.berg@intel.com>
+
+commit 19d19e960598161be92a7e4828eb7706c6410ce6 upstream.
+
+When I originally introduced using the driver-indicated station as an
+optimisation to avoid the hashtable lookup/iteration, of course it
+wasn't intended to really functionally change anything.
+
+I neglected, however, to take into account VLAN interfaces, which have
+the property that management and data frames are handled differently:
+data frames go directly to the station and the VLAN while management
+frames continue to be processed over the underlying/associated AP-type
+interface. As a consequence, when a driver used this optimisation for
+management frames and the user enabled VLANs, my change broke things
+since any management frames, particularly disassoc/deauth, were missed
+by hostapd.
+
+Fix this by restoring the original code path for non-data frames, they
+aren't critical for performance to begin with.
+
+This fixes https://bugzilla.kernel.org/show_bug.cgi?id=194713.
+
+Big thanks goes to Jarek who bisected the issue and provided a very
+detailed bug report, including the crucial information that he was
+using VLANs in his configuration.
+
+Fixes: 771e846bea9e ("mac80211: allow passing transmitter station on RX")
+Reported-and-tested-by: Jarek KamiÅ„ski <jarek@freeside.be>
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/mac80211/rx.c |   16 +++++++++-------
+ 1 file changed, 9 insertions(+), 7 deletions(-)
+
+--- a/net/mac80211/rx.c
++++ b/net/mac80211/rx.c
+@@ -4092,15 +4092,17 @@ static void __ieee80211_rx_handle_packet
+                    ieee80211_is_beacon(hdr->frame_control)))
+               ieee80211_scan_rx(local, skb);
+-      if (pubsta) {
+-              rx.sta = container_of(pubsta, struct sta_info, sta);
+-              rx.sdata = rx.sta->sdata;
+-              if (ieee80211_prepare_and_rx_handle(&rx, skb, true))
+-                      return;
+-              goto out;
+-      } else if (ieee80211_is_data(fc)) {
++      if (ieee80211_is_data(fc)) {
+               struct sta_info *sta, *prev_sta;
++              if (pubsta) {
++                      rx.sta = container_of(pubsta, struct sta_info, sta);
++                      rx.sdata = rx.sta->sdata;
++                      if (ieee80211_prepare_and_rx_handle(&rx, skb, true))
++                              return;
++                      goto out;
++              }
++
+               prev_sta = NULL;
+               for_each_sta_info(local, hdr->addr2, sta, tmp) {
diff --git a/queue-4.9/memory-atmel-ebi-fix-ns-cycles-conversions.patch b/queue-4.9/memory-atmel-ebi-fix-ns-cycles-conversions.patch
new file mode 100644 (file)
index 0000000..39d5d71
--- /dev/null
@@ -0,0 +1,109 @@
+From ee194289502a6901cc77dc9a893bf2afd351ac5e Mon Sep 17 00:00:00 2001
+From: Boris Brezillon <boris.brezillon@free-electrons.com>
+Date: Mon, 28 Nov 2016 16:17:56 +0100
+Subject: memory/atmel-ebi: Fix ns <-> cycles conversions
+
+From: Boris Brezillon <boris.brezillon@free-electrons.com>
+
+commit ee194289502a6901cc77dc9a893bf2afd351ac5e upstream.
+
+at91sam9_ebi_get_config() is incorrectly converting timings in clock
+cycles into timings in nanoseconds by multiplying the cycle values by
+the clk rate instead of the clk period.
+
+at91sam9_ebi_xslate_config() has the same problem for the
+tdf_ns -> tdf_cycles conversion.
+
+Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
+Reported-by: Chris Leahy <leahycm@gmail.com>
+Fixes: 6a4ec4cd0888 ("memory: add Atmel EBI (External Bus Interface) driver")
+Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/memory/atmel-ebi.c |   27 ++++++++++++++-------------
+ 1 file changed, 14 insertions(+), 13 deletions(-)
+
+--- a/drivers/memory/atmel-ebi.c
++++ b/drivers/memory/atmel-ebi.c
+@@ -93,7 +93,7 @@ static void at91sam9_ebi_get_config(stru
+                                   struct at91_ebi_dev_config *conf)
+ {
+       struct at91sam9_smc_generic_fields *fields = &ebid->ebi->sam9;
+-      unsigned int clk_rate = clk_get_rate(ebid->ebi->clk);
++      unsigned int clk_period = NSEC_PER_SEC / clk_get_rate(ebid->ebi->clk);
+       struct at91sam9_ebi_dev_config *config = &conf->sam9;
+       struct at91sam9_smc_timings *timings = &config->timings;
+       unsigned int val;
+@@ -102,43 +102,43 @@ static void at91sam9_ebi_get_config(stru
+       config->mode = val & ~AT91_SMC_TDF;
+       val = (val & AT91_SMC_TDF) >> 16;
+-      timings->tdf_ns = clk_rate * val;
++      timings->tdf_ns = clk_period * val;
+       regmap_fields_read(fields->setup, conf->cs, &val);
+       timings->ncs_rd_setup_ns = (val >> 24) & 0x1f;
+       timings->ncs_rd_setup_ns += ((val >> 29) & 0x1) * 128;
+-      timings->ncs_rd_setup_ns *= clk_rate;
++      timings->ncs_rd_setup_ns *= clk_period;
+       timings->nrd_setup_ns = (val >> 16) & 0x1f;
+       timings->nrd_setup_ns += ((val >> 21) & 0x1) * 128;
+-      timings->nrd_setup_ns *= clk_rate;
++      timings->nrd_setup_ns *= clk_period;
+       timings->ncs_wr_setup_ns = (val >> 8) & 0x1f;
+       timings->ncs_wr_setup_ns += ((val >> 13) & 0x1) * 128;
+-      timings->ncs_wr_setup_ns *= clk_rate;
++      timings->ncs_wr_setup_ns *= clk_period;
+       timings->nwe_setup_ns = val & 0x1f;
+       timings->nwe_setup_ns += ((val >> 5) & 0x1) * 128;
+-      timings->nwe_setup_ns *= clk_rate;
++      timings->nwe_setup_ns *= clk_period;
+       regmap_fields_read(fields->pulse, conf->cs, &val);
+       timings->ncs_rd_pulse_ns = (val >> 24) & 0x3f;
+       timings->ncs_rd_pulse_ns += ((val >> 30) & 0x1) * 256;
+-      timings->ncs_rd_pulse_ns *= clk_rate;
++      timings->ncs_rd_pulse_ns *= clk_period;
+       timings->nrd_pulse_ns = (val >> 16) & 0x3f;
+       timings->nrd_pulse_ns += ((val >> 22) & 0x1) * 256;
+-      timings->nrd_pulse_ns *= clk_rate;
++      timings->nrd_pulse_ns *= clk_period;
+       timings->ncs_wr_pulse_ns = (val >> 8) & 0x3f;
+       timings->ncs_wr_pulse_ns += ((val >> 14) & 0x1) * 256;
+-      timings->ncs_wr_pulse_ns *= clk_rate;
++      timings->ncs_wr_pulse_ns *= clk_period;
+       timings->nwe_pulse_ns = val & 0x3f;
+       timings->nwe_pulse_ns += ((val >> 6) & 0x1) * 256;
+-      timings->nwe_pulse_ns *= clk_rate;
++      timings->nwe_pulse_ns *= clk_period;
+       regmap_fields_read(fields->cycle, conf->cs, &val);
+       timings->nrd_cycle_ns = (val >> 16) & 0x7f;
+       timings->nrd_cycle_ns += ((val >> 23) & 0x3) * 256;
+-      timings->nrd_cycle_ns *= clk_rate;
++      timings->nrd_cycle_ns *= clk_period;
+       timings->nwe_cycle_ns = val & 0x7f;
+       timings->nwe_cycle_ns += ((val >> 7) & 0x3) * 256;
+-      timings->nwe_cycle_ns *= clk_rate;
++      timings->nwe_cycle_ns *= clk_period;
+ }
+ static int at91_xlate_timing(struct device_node *np, const char *prop,
+@@ -334,6 +334,7 @@ static int at91sam9_ebi_apply_config(str
+                                    struct at91_ebi_dev_config *conf)
+ {
+       unsigned int clk_rate = clk_get_rate(ebid->ebi->clk);
++      unsigned int clk_period = NSEC_PER_SEC / clk_rate;
+       struct at91sam9_ebi_dev_config *config = &conf->sam9;
+       struct at91sam9_smc_timings *timings = &config->timings;
+       struct at91sam9_smc_generic_fields *fields = &ebid->ebi->sam9;
+@@ -376,7 +377,7 @@ static int at91sam9_ebi_apply_config(str
+       val |= AT91SAM9_SMC_NWECYCLE(coded_val);
+       regmap_fields_write(fields->cycle, conf->cs, val);
+-      val = DIV_ROUND_UP(timings->tdf_ns, clk_rate);
++      val = DIV_ROUND_UP(timings->tdf_ns, clk_period);
+       if (val > AT91_SMC_TDF_MAX)
+               val = AT91_SMC_TDF_MAX;
+       regmap_fields_write(fields->mode, conf->cs,
diff --git a/queue-4.9/mnt-tuck-mounts-under-others-instead-of-creating-shadow-side-mounts.patch b/queue-4.9/mnt-tuck-mounts-under-others-instead-of-creating-shadow-side-mounts.patch
new file mode 100644 (file)
index 0000000..a3ae3d4
--- /dev/null
@@ -0,0 +1,422 @@
+From 1064f874abc0d05eeed8993815f584d847b72486 Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Fri, 20 Jan 2017 18:28:35 +1300
+Subject: mnt: Tuck mounts under others instead of creating shadow/side mounts.
+
+From: Eric W. Biederman <ebiederm@xmission.com>
+
+commit 1064f874abc0d05eeed8993815f584d847b72486 upstream.
+
+Ever since mount propagation was introduced in cases where a mount in
+propagated to parent mount mountpoint pair that is already in use the
+code has placed the new mount behind the old mount in the mount hash
+table.
+
+This implementation detail is problematic as it allows creating
+arbitrary length mount hash chains.
+
+Furthermore it invalidates the constraint maintained elsewhere in the
+mount code that a parent mount and a mountpoint pair will have exactly
+one mount upon them.  Making it hard to deal with and to talk about
+this special case in the mount code.
+
+Modify mount propagation to notice when there is already a mount at
+the parent mount and mountpoint where a new mount is propagating to
+and place that preexisting mount on top of the new mount.
+
+Modify unmount propagation to notice when a mount that is being
+unmounted has another mount on top of it (and no other children), and
+to replace the unmounted mount with the mount on top of it.
+
+Move the MNT_UMUONT test from __lookup_mnt_last into
+__propagate_umount as that is the only call of __lookup_mnt_last where
+MNT_UMOUNT may be set on any mount visible in the mount hash table.
+
+These modifications allow:
+ - __lookup_mnt_last to be removed.
+ - attach_shadows to be renamed __attach_mnt and its shadow
+   handling to be removed.
+ - commit_tree to be simplified
+ - copy_tree to be simplified
+
+The result is an easier to understand tree of mounts that does not
+allow creation of arbitrary length hash chains in the mount hash table.
+
+The result is also a very slight userspace visible difference in semantics.
+The following two cases now behave identically, where before order
+mattered:
+
+case 1: (explicit user action)
+       B is a slave of A
+       mount something on A/a , it will propagate to B/a
+       and than mount something on B/a
+
+case 2: (tucked mount)
+       B is a slave of A
+       mount something on B/a
+       and than mount something on A/a
+
+Histroically umount A/a would fail in case 1 and succeed in case 2.
+Now umount A/a succeeds in both configurations.
+
+This very small change in semantics appears if anything to be a bug
+fix to me and my survey of userspace leads me to believe that no programs
+will notice or care of this subtle semantic change.
+
+v2: Updated to mnt_change_mountpoint to not call dput or mntput
+and instead to decrement the counts directly.  It is guaranteed
+that there will be other references when mnt_change_mountpoint is
+called so this is safe.
+
+v3: Moved put_mountpoint under mount_lock in attach_recursive_mnt
+    As the locking in fs/namespace.c changed between v2 and v3.
+
+v4: Reworked the logic in propagate_mount_busy and __propagate_umount
+    that detects when a mount completely covers another mount.
+
+v5: Removed unnecessary tests whose result is alwasy true in
+    find_topper and attach_recursive_mnt.
+
+v6: Document the user space visible semantic difference.
+
+Fixes: b90fa9ae8f51 ("[PATCH] shared mount handling: bind and rbind")
+Tested-by: Andrei Vagin <avagin@virtuozzo.com>
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/mount.h     |    1 
+ fs/namespace.c |  110 +++++++++++++++++++++++++++++++--------------------------
+ fs/pnode.c     |   61 +++++++++++++++++++++++++------
+ fs/pnode.h     |    2 +
+ 4 files changed, 111 insertions(+), 63 deletions(-)
+
+--- a/fs/mount.h
++++ b/fs/mount.h
+@@ -89,7 +89,6 @@ static inline int is_mounted(struct vfsm
+ }
+ extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *);
+-extern struct mount *__lookup_mnt_last(struct vfsmount *, struct dentry *);
+ extern int __legitimize_mnt(struct vfsmount *, unsigned);
+ extern bool legitimize_mnt(struct vfsmount *, unsigned);
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -641,28 +641,6 @@ struct mount *__lookup_mnt(struct vfsmou
+ }
+ /*
+- * find the last mount at @dentry on vfsmount @mnt.
+- * mount_lock must be held.
+- */
+-struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry)
+-{
+-      struct mount *p, *res = NULL;
+-      p = __lookup_mnt(mnt, dentry);
+-      if (!p)
+-              goto out;
+-      if (!(p->mnt.mnt_flags & MNT_UMOUNT))
+-              res = p;
+-      hlist_for_each_entry_continue(p, mnt_hash) {
+-              if (&p->mnt_parent->mnt != mnt || p->mnt_mountpoint != dentry)
+-                      break;
+-              if (!(p->mnt.mnt_flags & MNT_UMOUNT))
+-                      res = p;
+-      }
+-out:
+-      return res;
+-}
+-
+-/*
+  * lookup_mnt - Return the first child mount mounted at path
+  *
+  * "First" means first mounted chronologically.  If you create the
+@@ -882,6 +860,13 @@ void mnt_set_mountpoint(struct mount *mn
+       hlist_add_head(&child_mnt->mnt_mp_list, &mp->m_list);
+ }
++static void __attach_mnt(struct mount *mnt, struct mount *parent)
++{
++      hlist_add_head_rcu(&mnt->mnt_hash,
++                         m_hash(&parent->mnt, mnt->mnt_mountpoint));
++      list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
++}
++
+ /*
+  * vfsmount lock must be held for write
+  */
+@@ -890,28 +875,45 @@ static void attach_mnt(struct mount *mnt
+                       struct mountpoint *mp)
+ {
+       mnt_set_mountpoint(parent, mp, mnt);
+-      hlist_add_head_rcu(&mnt->mnt_hash, m_hash(&parent->mnt, mp->m_dentry));
+-      list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
++      __attach_mnt(mnt, parent);
+ }
+-static void attach_shadowed(struct mount *mnt,
+-                      struct mount *parent,
+-                      struct mount *shadows)
++void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, struct mount *mnt)
+ {
+-      if (shadows) {
+-              hlist_add_behind_rcu(&mnt->mnt_hash, &shadows->mnt_hash);
+-              list_add(&mnt->mnt_child, &shadows->mnt_child);
+-      } else {
+-              hlist_add_head_rcu(&mnt->mnt_hash,
+-                              m_hash(&parent->mnt, mnt->mnt_mountpoint));
+-              list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
+-      }
++      struct mountpoint *old_mp = mnt->mnt_mp;
++      struct dentry *old_mountpoint = mnt->mnt_mountpoint;
++      struct mount *old_parent = mnt->mnt_parent;
++
++      list_del_init(&mnt->mnt_child);
++      hlist_del_init(&mnt->mnt_mp_list);
++      hlist_del_init_rcu(&mnt->mnt_hash);
++
++      attach_mnt(mnt, parent, mp);
++
++      put_mountpoint(old_mp);
++
++      /*
++       * Safely avoid even the suggestion this code might sleep or
++       * lock the mount hash by taking advantage of the knowledge that
++       * mnt_change_mountpoint will not release the final reference
++       * to a mountpoint.
++       *
++       * During mounting, the mount passed in as the parent mount will
++       * continue to use the old mountpoint and during unmounting, the
++       * old mountpoint will continue to exist until namespace_unlock,
++       * which happens well after mnt_change_mountpoint.
++       */
++      spin_lock(&old_mountpoint->d_lock);
++      old_mountpoint->d_lockref.count--;
++      spin_unlock(&old_mountpoint->d_lock);
++
++      mnt_add_count(old_parent, -1);
+ }
+ /*
+  * vfsmount lock must be held for write
+  */
+-static void commit_tree(struct mount *mnt, struct mount *shadows)
++static void commit_tree(struct mount *mnt)
+ {
+       struct mount *parent = mnt->mnt_parent;
+       struct mount *m;
+@@ -929,7 +931,7 @@ static void commit_tree(struct mount *mn
+       n->mounts += n->pending_mounts;
+       n->pending_mounts = 0;
+-      attach_shadowed(mnt, parent, shadows);
++      __attach_mnt(mnt, parent);
+       touch_mnt_namespace(n);
+ }
+@@ -1737,7 +1739,6 @@ struct mount *copy_tree(struct mount *mn
+                       continue;
+               for (s = r; s; s = next_mnt(s, r)) {
+-                      struct mount *t = NULL;
+                       if (!(flag & CL_COPY_UNBINDABLE) &&
+                           IS_MNT_UNBINDABLE(s)) {
+                               s = skip_mnt_tree(s);
+@@ -1759,14 +1760,7 @@ struct mount *copy_tree(struct mount *mn
+                               goto out;
+                       lock_mount_hash();
+                       list_add_tail(&q->mnt_list, &res->mnt_list);
+-                      mnt_set_mountpoint(parent, p->mnt_mp, q);
+-                      if (!list_empty(&parent->mnt_mounts)) {
+-                              t = list_last_entry(&parent->mnt_mounts,
+-                                      struct mount, mnt_child);
+-                              if (t->mnt_mp != p->mnt_mp)
+-                                      t = NULL;
+-                      }
+-                      attach_shadowed(q, parent, t);
++                      attach_mnt(q, parent, p->mnt_mp);
+                       unlock_mount_hash();
+               }
+       }
+@@ -1967,10 +1961,18 @@ static int attach_recursive_mnt(struct m
+ {
+       HLIST_HEAD(tree_list);
+       struct mnt_namespace *ns = dest_mnt->mnt_ns;
++      struct mountpoint *smp;
+       struct mount *child, *p;
+       struct hlist_node *n;
+       int err;
++      /* Preallocate a mountpoint in case the new mounts need
++       * to be tucked under other mounts.
++       */
++      smp = get_mountpoint(source_mnt->mnt.mnt_root);
++      if (IS_ERR(smp))
++              return PTR_ERR(smp);
++
+       /* Is there space to add these mounts to the mount namespace? */
+       if (!parent_path) {
+               err = count_mounts(ns, source_mnt);
+@@ -1997,16 +1999,19 @@ static int attach_recursive_mnt(struct m
+               touch_mnt_namespace(source_mnt->mnt_ns);
+       } else {
+               mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
+-              commit_tree(source_mnt, NULL);
++              commit_tree(source_mnt);
+       }
+       hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) {
+               struct mount *q;
+               hlist_del_init(&child->mnt_hash);
+-              q = __lookup_mnt_last(&child->mnt_parent->mnt,
+-                                    child->mnt_mountpoint);
+-              commit_tree(child, q);
++              q = __lookup_mnt(&child->mnt_parent->mnt,
++                               child->mnt_mountpoint);
++              if (q)
++                      mnt_change_mountpoint(child, smp, q);
++              commit_tree(child);
+       }
++      put_mountpoint(smp);
+       unlock_mount_hash();
+       return 0;
+@@ -2021,6 +2026,11 @@ static int attach_recursive_mnt(struct m
+       cleanup_group_ids(source_mnt, NULL);
+  out:
+       ns->pending_mounts = 0;
++
++      read_seqlock_excl(&mount_lock);
++      put_mountpoint(smp);
++      read_sequnlock_excl(&mount_lock);
++
+       return err;
+ }
+--- a/fs/pnode.c
++++ b/fs/pnode.c
+@@ -324,6 +324,21 @@ out:
+       return ret;
+ }
++static struct mount *find_topper(struct mount *mnt)
++{
++      /* If there is exactly one mount covering mnt completely return it. */
++      struct mount *child;
++
++      if (!list_is_singular(&mnt->mnt_mounts))
++              return NULL;
++
++      child = list_first_entry(&mnt->mnt_mounts, struct mount, mnt_child);
++      if (child->mnt_mountpoint != mnt->mnt.mnt_root)
++              return NULL;
++
++      return child;
++}
++
+ /*
+  * return true if the refcount is greater than count
+  */
+@@ -344,9 +359,8 @@ static inline int do_refcount_check(stru
+  */
+ int propagate_mount_busy(struct mount *mnt, int refcnt)
+ {
+-      struct mount *m, *child;
++      struct mount *m, *child, *topper;
+       struct mount *parent = mnt->mnt_parent;
+-      int ret = 0;
+       if (mnt == parent)
+               return do_refcount_check(mnt, refcnt);
+@@ -361,12 +375,24 @@ int propagate_mount_busy(struct mount *m
+       for (m = propagation_next(parent, parent); m;
+                       m = propagation_next(m, parent)) {
+-              child = __lookup_mnt_last(&m->mnt, mnt->mnt_mountpoint);
+-              if (child && list_empty(&child->mnt_mounts) &&
+-                  (ret = do_refcount_check(child, 1)))
+-                      break;
++              int count = 1;
++              child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint);
++              if (!child)
++                      continue;
++
++              /* Is there exactly one mount on the child that covers
++               * it completely whose reference should be ignored?
++               */
++              topper = find_topper(child);
++              if (topper)
++                      count += 1;
++              else if (!list_empty(&child->mnt_mounts))
++                      continue;
++
++              if (do_refcount_check(child, count))
++                      return 1;
+       }
+-      return ret;
++      return 0;
+ }
+ /*
+@@ -383,7 +409,7 @@ void propagate_mount_unlock(struct mount
+       for (m = propagation_next(parent, parent); m;
+                       m = propagation_next(m, parent)) {
+-              child = __lookup_mnt_last(&m->mnt, mnt->mnt_mountpoint);
++              child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint);
+               if (child)
+                       child->mnt.mnt_flags &= ~MNT_LOCKED;
+       }
+@@ -401,9 +427,11 @@ static void mark_umount_candidates(struc
+       for (m = propagation_next(parent, parent); m;
+                       m = propagation_next(m, parent)) {
+-              struct mount *child = __lookup_mnt_last(&m->mnt,
++              struct mount *child = __lookup_mnt(&m->mnt,
+                                               mnt->mnt_mountpoint);
+-              if (child && (!IS_MNT_LOCKED(child) || IS_MNT_MARKED(m))) {
++              if (!child || (child->mnt.mnt_flags & MNT_UMOUNT))
++                      continue;
++              if (!IS_MNT_LOCKED(child) || IS_MNT_MARKED(m)) {
+                       SET_MNT_MARK(child);
+               }
+       }
+@@ -422,8 +450,8 @@ static void __propagate_umount(struct mo
+       for (m = propagation_next(parent, parent); m;
+                       m = propagation_next(m, parent)) {
+-
+-              struct mount *child = __lookup_mnt_last(&m->mnt,
++              struct mount *topper;
++              struct mount *child = __lookup_mnt(&m->mnt,
+                                               mnt->mnt_mountpoint);
+               /*
+                * umount the child only if the child has no children
+@@ -432,6 +460,15 @@ static void __propagate_umount(struct mo
+               if (!child || !IS_MNT_MARKED(child))
+                       continue;
+               CLEAR_MNT_MARK(child);
++
++              /* If there is exactly one mount covering all of child
++               * replace child with that mount.
++               */
++              topper = find_topper(child);
++              if (topper)
++                      mnt_change_mountpoint(child->mnt_parent, child->mnt_mp,
++                                            topper);
++
+               if (list_empty(&child->mnt_mounts)) {
+                       list_del_init(&child->mnt_child);
+                       child->mnt.mnt_flags |= MNT_UMOUNT;
+--- a/fs/pnode.h
++++ b/fs/pnode.h
+@@ -49,6 +49,8 @@ int get_dominating_id(struct mount *mnt,
+ unsigned int mnt_get_count(struct mount *mnt);
+ void mnt_set_mountpoint(struct mount *, struct mountpoint *,
+                       struct mount *);
++void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp,
++                         struct mount *mnt);
+ struct mount *copy_tree(struct mount *, struct dentry *, int);
+ bool is_path_reachable(struct mount *, struct dentry *,
+                        const struct path *root);
diff --git a/queue-4.9/net-mvpp2-fix-dma-address-calculation-in-mvpp2_txq_inc_put.patch b/queue-4.9/net-mvpp2-fix-dma-address-calculation-in-mvpp2_txq_inc_put.patch
new file mode 100644 (file)
index 0000000..4e8a693
--- /dev/null
@@ -0,0 +1,42 @@
+From 239a3b663647869330955ec59caac0100ef9b60a Mon Sep 17 00:00:00 2001
+From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
+Date: Tue, 21 Feb 2017 11:28:01 +0100
+Subject: net: mvpp2: fix DMA address calculation in mvpp2_txq_inc_put()
+
+From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
+
+commit 239a3b663647869330955ec59caac0100ef9b60a upstream.
+
+When TX descriptors are filled in, the buffer DMA address is split
+between the tx_desc->buf_phys_addr field (high-order bits) and
+tx_desc->packet_offset field (5 low-order bits).
+
+However, when we re-calculate the DMA address from the TX descriptor in
+mvpp2_txq_inc_put(), we do not take tx_desc->packet_offset into
+account. This means that when the DMA address is not aligned on a 32
+bytes boundary, we end up calling dma_unmap_single() with a DMA address
+that was not the one returned by dma_map_single().
+
+This inconsistency is detected by the kernel when DMA_API_DEBUG is
+enabled. We fix this problem by properly calculating the DMA address in
+mvpp2_txq_inc_put().
+
+Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/ethernet/marvell/mvpp2.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/marvell/mvpp2.c
++++ b/drivers/net/ethernet/marvell/mvpp2.c
+@@ -991,7 +991,7 @@ static void mvpp2_txq_inc_put(struct mvp
+               txq_pcpu->buffs + txq_pcpu->txq_put_index;
+       tx_buf->skb = skb;
+       tx_buf->size = tx_desc->data_size;
+-      tx_buf->phys = tx_desc->buf_phys_addr;
++      tx_buf->phys = tx_desc->buf_phys_addr + tx_desc->packet_offset;
+       txq_pcpu->txq_put_index++;
+       if (txq_pcpu->txq_put_index == txq_pcpu->size)
+               txq_pcpu->txq_put_index = 0;
diff --git a/queue-4.9/nfit-libnvdimm-fix-interleave-set-cookie-calculation.patch b/queue-4.9/nfit-libnvdimm-fix-interleave-set-cookie-calculation.patch
new file mode 100644 (file)
index 0000000..bde5e9d
--- /dev/null
@@ -0,0 +1,166 @@
+From 86ef58a4e35e8fa66afb5898cf6dec6a3bb29f67 Mon Sep 17 00:00:00 2001
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Tue, 28 Feb 2017 18:32:48 -0800
+Subject: nfit, libnvdimm: fix interleave set cookie calculation
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+commit 86ef58a4e35e8fa66afb5898cf6dec6a3bb29f67 upstream.
+
+The interleave-set cookie is a sum that sanity checks the composition of
+an interleave set has not changed from when the namespace was initially
+created.  The checksum is calculated by sorting the DIMMs by their
+location in the interleave-set. The comparison for the sort must be
+64-bit wide, not byte-by-byte as performed by memcmp() in the broken
+case.
+
+Fix the implementation to accept correct cookie values in addition to
+the Linux "memcmp" order cookies, but only allow correct cookies to be
+generated going forward. It does mean that namespaces created by
+third-party-tooling, or created by newer kernels with this fix, will not
+validate on older kernels. However, there are a couple mitigating
+conditions:
+
+    1/ platforms with namespace-label capable NVDIMMs are not widely
+       available.
+
+    2/ interleave-sets with a single-dimm are by definition not affected
+       (nothing to sort). This covers the QEMU-KVM NVDIMM emulation case.
+
+The cookie stored in the namespace label will be fixed by any write the
+namespace label, the most straightforward way to achieve this is to
+write to the "alt_name" attribute of a namespace in sysfs.
+
+Fixes: eaf961536e16 ("libnvdimm, nfit: add interleave-set state-tracking infrastructure")
+Reported-by: Nicholas Moulin <nicholas.w.moulin@linux.intel.com>
+Tested-by: Nicholas Moulin <nicholas.w.moulin@linux.intel.com>
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/acpi/nfit/core.c        |   16 +++++++++++++++-
+ drivers/nvdimm/namespace_devs.c |   18 ++++++++++++++----
+ drivers/nvdimm/nd.h             |    1 +
+ drivers/nvdimm/region_devs.c    |    9 +++++++++
+ include/linux/libnvdimm.h       |    2 ++
+ 5 files changed, 41 insertions(+), 5 deletions(-)
+
+--- a/drivers/acpi/nfit/core.c
++++ b/drivers/acpi/nfit/core.c
+@@ -1603,7 +1603,7 @@ static size_t sizeof_nfit_set_info(int n
+               + num_mappings * sizeof(struct nfit_set_info_map);
+ }
+-static int cmp_map(const void *m0, const void *m1)
++static int cmp_map_compat(const void *m0, const void *m1)
+ {
+       const struct nfit_set_info_map *map0 = m0;
+       const struct nfit_set_info_map *map1 = m1;
+@@ -1612,6 +1612,14 @@ static int cmp_map(const void *m0, const
+                       sizeof(u64));
+ }
++static int cmp_map(const void *m0, const void *m1)
++{
++      const struct nfit_set_info_map *map0 = m0;
++      const struct nfit_set_info_map *map1 = m1;
++
++      return map0->region_offset - map1->region_offset;
++}
++
+ /* Retrieve the nth entry referencing this spa */
+ static struct acpi_nfit_memory_map *memdev_from_spa(
+               struct acpi_nfit_desc *acpi_desc, u16 range_index, int n)
+@@ -1667,6 +1675,12 @@ static int acpi_nfit_init_interleave_set
+       sort(&info->mapping[0], nr, sizeof(struct nfit_set_info_map),
+                       cmp_map, NULL);
+       nd_set->cookie = nd_fletcher64(info, sizeof_nfit_set_info(nr), 0);
++
++      /* support namespaces created with the wrong sort order */
++      sort(&info->mapping[0], nr, sizeof(struct nfit_set_info_map),
++                      cmp_map_compat, NULL);
++      nd_set->altcookie = nd_fletcher64(info, sizeof_nfit_set_info(nr), 0);
++
+       ndr_desc->nd_set = nd_set;
+       devm_kfree(dev, info);
+--- a/drivers/nvdimm/namespace_devs.c
++++ b/drivers/nvdimm/namespace_devs.c
+@@ -1700,6 +1700,7 @@ static int select_pmem_id(struct nd_regi
+ struct device *create_namespace_pmem(struct nd_region *nd_region,
+               struct nd_namespace_label *nd_label)
+ {
++      u64 altcookie = nd_region_interleave_set_altcookie(nd_region);
+       u64 cookie = nd_region_interleave_set_cookie(nd_region);
+       struct nd_label_ent *label_ent;
+       struct nd_namespace_pmem *nspm;
+@@ -1718,7 +1719,11 @@ struct device *create_namespace_pmem(str
+       if (__le64_to_cpu(nd_label->isetcookie) != cookie) {
+               dev_dbg(&nd_region->dev, "invalid cookie in label: %pUb\n",
+                               nd_label->uuid);
+-              return ERR_PTR(-EAGAIN);
++              if (__le64_to_cpu(nd_label->isetcookie) != altcookie)
++                      return ERR_PTR(-EAGAIN);
++
++              dev_dbg(&nd_region->dev, "valid altcookie in label: %pUb\n",
++                              nd_label->uuid);
+       }
+       nspm = kzalloc(sizeof(*nspm), GFP_KERNEL);
+@@ -1733,9 +1738,14 @@ struct device *create_namespace_pmem(str
+       res->name = dev_name(&nd_region->dev);
+       res->flags = IORESOURCE_MEM;
+-      for (i = 0; i < nd_region->ndr_mappings; i++)
+-              if (!has_uuid_at_pos(nd_region, nd_label->uuid, cookie, i))
+-                      break;
++      for (i = 0; i < nd_region->ndr_mappings; i++) {
++              if (has_uuid_at_pos(nd_region, nd_label->uuid, cookie, i))
++                      continue;
++              if (has_uuid_at_pos(nd_region, nd_label->uuid, altcookie, i))
++                      continue;
++              break;
++      }
++
+       if (i < nd_region->ndr_mappings) {
+               struct nvdimm_drvdata *ndd = to_ndd(&nd_region->mapping[i]);
+--- a/drivers/nvdimm/nd.h
++++ b/drivers/nvdimm/nd.h
+@@ -327,6 +327,7 @@ struct nd_region *to_nd_region(struct de
+ int nd_region_to_nstype(struct nd_region *nd_region);
+ int nd_region_register_namespaces(struct nd_region *nd_region, int *err);
+ u64 nd_region_interleave_set_cookie(struct nd_region *nd_region);
++u64 nd_region_interleave_set_altcookie(struct nd_region *nd_region);
+ void nvdimm_bus_lock(struct device *dev);
+ void nvdimm_bus_unlock(struct device *dev);
+ bool is_nvdimm_bus_locked(struct device *dev);
+--- a/drivers/nvdimm/region_devs.c
++++ b/drivers/nvdimm/region_devs.c
+@@ -505,6 +505,15 @@ u64 nd_region_interleave_set_cookie(stru
+       return 0;
+ }
++u64 nd_region_interleave_set_altcookie(struct nd_region *nd_region)
++{
++      struct nd_interleave_set *nd_set = nd_region->nd_set;
++
++      if (nd_set)
++              return nd_set->altcookie;
++      return 0;
++}
++
+ void nd_mapping_free_labels(struct nd_mapping *nd_mapping)
+ {
+       struct nd_label_ent *label_ent, *e;
+--- a/include/linux/libnvdimm.h
++++ b/include/linux/libnvdimm.h
+@@ -70,6 +70,8 @@ struct nd_cmd_desc {
+ struct nd_interleave_set {
+       u64 cookie;
++      /* compatibility with initial buggy Linux implementation */
++      u64 altcookie;
+ };
+ struct nd_mapping_desc {
diff --git a/queue-4.9/nlm-ensure-callback-code-also-checks-that-the-files-match.patch b/queue-4.9/nlm-ensure-callback-code-also-checks-that-the-files-match.patch
new file mode 100644 (file)
index 0000000..7507757
--- /dev/null
@@ -0,0 +1,35 @@
+From 251af29c320d86071664f02c76f0d063a19fefdf Mon Sep 17 00:00:00 2001
+From: Trond Myklebust <trond.myklebust@primarydata.com>
+Date: Sat, 11 Feb 2017 10:37:38 -0500
+Subject: nlm: Ensure callback code also checks that the files match
+
+From: Trond Myklebust <trond.myklebust@primarydata.com>
+
+commit 251af29c320d86071664f02c76f0d063a19fefdf upstream.
+
+It is not sufficient to just check that the lock pids match when
+granting a callback, we also need to ensure that we're granting
+the callback on the right file.
+
+Reported-by: Pankaj Singh <psingh.ait@gmail.com>
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/lockd/lockd.h |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/include/linux/lockd/lockd.h
++++ b/include/linux/lockd/lockd.h
+@@ -355,7 +355,8 @@ static inline int nlm_privileged_request
+ static inline int nlm_compare_locks(const struct file_lock *fl1,
+                                   const struct file_lock *fl2)
+ {
+-      return  fl1->fl_pid   == fl2->fl_pid
++      return file_inode(fl1->fl_file) == file_inode(fl2->fl_file)
++           && fl1->fl_pid   == fl2->fl_pid
+            && fl1->fl_owner == fl2->fl_owner
+            && fl1->fl_start == fl2->fl_start
+            && fl1->fl_end   == fl2->fl_end
diff --git a/queue-4.9/orangefs-use-rcu-for-destroy_inode.patch b/queue-4.9/orangefs-use-rcu-for-destroy_inode.patch
new file mode 100644 (file)
index 0000000..0fa6de3
--- /dev/null
@@ -0,0 +1,44 @@
+From 0695d7dc1d9f19b82ec2cae24856bddce278cfe6 Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 24 Feb 2017 16:43:36 +0100
+Subject: orangefs: Use RCU for destroy_inode
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 0695d7dc1d9f19b82ec2cae24856bddce278cfe6 upstream.
+
+freeing of inodes must be RCU-delayed on all filesystems
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/orangefs/super.c |    9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+--- a/fs/orangefs/super.c
++++ b/fs/orangefs/super.c
+@@ -115,6 +115,13 @@ static struct inode *orangefs_alloc_inod
+       return &orangefs_inode->vfs_inode;
+ }
++static void orangefs_i_callback(struct rcu_head *head)
++{
++      struct inode *inode = container_of(head, struct inode, i_rcu);
++      struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
++      kmem_cache_free(orangefs_inode_cache, orangefs_inode);
++}
++
+ static void orangefs_destroy_inode(struct inode *inode)
+ {
+       struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
+@@ -123,7 +130,7 @@ static void orangefs_destroy_inode(struc
+                       "%s: deallocated %p destroying inode %pU\n",
+                       __func__, orangefs_inode, get_khandle_from_ino(inode));
+-      kmem_cache_free(orangefs_inode_cache, orangefs_inode);
++      call_rcu(&inode->i_rcu, orangefs_i_callback);
+ }
+ /*
diff --git a/queue-4.9/pci-hotplug-pnv-php-disable-surprise-hotplug-capability-on-conflicts.patch b/queue-4.9/pci-hotplug-pnv-php-disable-surprise-hotplug-capability-on-conflicts.patch
new file mode 100644 (file)
index 0000000..d24ac25
--- /dev/null
@@ -0,0 +1,64 @@
+From 303529d6ef1293513c2c73c9ab86489eebb37d08 Mon Sep 17 00:00:00 2001
+From: Gavin Shan <gwshan@linux.vnet.ibm.com>
+Date: Thu, 16 Feb 2017 10:22:33 +1100
+Subject: pci/hotplug/pnv-php: Disable surprise hotplug capability on conflicts
+
+From: Gavin Shan <gwshan@linux.vnet.ibm.com>
+
+commit 303529d6ef1293513c2c73c9ab86489eebb37d08 upstream.
+
+The root port or PCIe switch downstream port might have been associated
+with driver other than pnv-php. The MSI or MSIx might also have been
+enabled by that driver (e.g. pcieport_drv). Attempt to enable MSI incurs
+below backtrace:
+
+ PowerPC PowerNV PCI Hotplug Driver version: 0.1
+ ------------[ cut here ]------------
+ WARNING: CPU: 19 PID: 1004 at drivers/pci/msi.c:1071 \
+                              __pci_enable_msi_range+0x84/0x4e0
+ NIP [c000000000665c34] __pci_enable_msi_range+0x84/0x4e0
+ LR [c000000000665c24] __pci_enable_msi_range+0x74/0x4e0
+ Call Trace:
+ [c000000384d67600] [c000000000665c24] __pci_enable_msi_range+0x74/0x4e0
+ [c000000384d676e0] [d00000000aa31b04] pnv_php_register+0x564/0x5a0 [pnv_php]
+ [c000000384d677c0] [d00000000aa31658] pnv_php_register+0xb8/0x5a0 [pnv_php]
+ [c000000384d678a0] [d00000000aa31658] pnv_php_register+0xb8/0x5a0 [pnv_php]
+ [c000000384d67980] [d00000000aa31dfc] pnv_php_init+0x60/0x98 [pnv_php]
+ [c000000384d679f0] [c00000000000cfdc] do_one_initcall+0x6c/0x1d0
+ [c000000384d67ab0] [c000000000b92354] do_init_module+0x94/0x254
+ [c000000384d67b40] [c00000000019719c] load_module+0x258c/0x2c60
+ [c000000384d67d30] [c000000000197bb0] SyS_finit_module+0xf0/0x170
+ [c000000384d67e30] [c00000000000b184] system_call+0x38/0xe0
+
+This fixes the issue by skipping enabling the surprise hotplug
+capability if the MSI or MSIx on the PCI slot's upstream port has
+been enabled by other driver.
+
+Fixes: 360aebd85a4c ("drivers/pci/hotplug: Support surprise hotplug in powernv driver")
+Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
+Reviewed-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
+Tested-by: Vaibhav Jain <vaibhav@linux.vnet.ibm.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/pci/hotplug/pnv_php.c |    8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/drivers/pci/hotplug/pnv_php.c
++++ b/drivers/pci/hotplug/pnv_php.c
+@@ -799,6 +799,14 @@ static void pnv_php_enable_irq(struct pn
+       struct pci_dev *pdev = php_slot->pdev;
+       int irq, ret;
++      /*
++       * The MSI/MSIx interrupt might have been occupied by other
++       * drivers. Don't populate the surprise hotplug capability
++       * in that case.
++       */
++      if (pci_dev_msi_enabled(pdev))
++              return;
++
+       ret = pci_enable_device(pdev);
+       if (ret) {
+               dev_warn(&pdev->dev, "Error %d enabling device\n", ret);
diff --git a/queue-4.9/pci-hotplug-pnv-php-remove-warn_on-in-pnv_php_put_slot.patch b/queue-4.9/pci-hotplug-pnv-php-remove-warn_on-in-pnv_php_put_slot.patch
new file mode 100644 (file)
index 0000000..780f8c3
--- /dev/null
@@ -0,0 +1,46 @@
+From 36c7c9da40c408a71e5e6bfe12e57dcf549a296d Mon Sep 17 00:00:00 2001
+From: Gavin Shan <gwshan@linux.vnet.ibm.com>
+Date: Thu, 16 Feb 2017 10:22:32 +1100
+Subject: pci/hotplug/pnv-php: Remove WARN_ON() in pnv_php_put_slot()
+
+From: Gavin Shan <gwshan@linux.vnet.ibm.com>
+
+commit 36c7c9da40c408a71e5e6bfe12e57dcf549a296d upstream.
+
+The WARN_ON() causes unnecessary backtrace when putting the parent
+slot, which is likely to be NULL.
+
+ WARNING: CPU: 2 PID: 1071 at drivers/pci/hotplug/pnv_php.c:85 \
+                              pnv_php_release+0xcc/0x150 [pnv_php]
+    :
+ Call Trace:
+ [c0000003bc007c10] [d00000000ad613c4] pnv_php_release+0x144/0x150 [pnv_php]
+ [c0000003bc007c40] [c0000000006641d8] pci_hp_deregister+0x238/0x330
+ [c0000003bc007cd0] [d00000000ad61440] pnv_php_unregister_one+0x70/0xa0 [pnv_php]
+ [c0000003bc007d10] [d00000000ad614c0] pnv_php_unregister+0x50/0x80 [pnv_php]
+ [c0000003bc007d40] [d00000000ad61e84] pnv_php_exit+0x50/0xcb4 [pnv_php]
+ [c0000003bc007d70] [c00000000019499c] SyS_delete_module+0x1fc/0x2a0
+ [c0000003bc007e30] [c00000000000b184] system_call+0x38/0xe0
+
+Fixes: 66725152fb9f ("PCI/hotplug: PowerPC PowerNV PCI hotplug driver")
+Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
+Reviewed-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
+Tested-by: Vaibhav Jain <vaibhav@linux.vnet.ibm.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/pci/hotplug/pnv_php.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/pci/hotplug/pnv_php.c
++++ b/drivers/pci/hotplug/pnv_php.c
+@@ -82,7 +82,7 @@ static void pnv_php_free_slot(struct kre
+ static inline void pnv_php_put_slot(struct pnv_php_slot *php_slot)
+ {
+-      if (WARN_ON(!php_slot))
++      if (!php_slot)
+               return;
+       kref_put(&php_slot->kref, pnv_php_free_slot);
diff --git a/queue-4.9/pwm-pca9685-fix-period-change-with-same-duty-cycle.patch b/queue-4.9/pwm-pca9685-fix-period-change-with-same-duty-cycle.patch
new file mode 100644 (file)
index 0000000..31a60e4
--- /dev/null
@@ -0,0 +1,55 @@
+From 8d254a340efb12b40c4c1ff25a48a4f48f7bbd6b Mon Sep 17 00:00:00 2001
+From: Clemens Gruber <clemens.gruber@pqgruber.com>
+Date: Tue, 13 Dec 2016 16:52:50 +0100
+Subject: pwm: pca9685: Fix period change with same duty cycle
+
+From: Clemens Gruber <clemens.gruber@pqgruber.com>
+
+commit 8d254a340efb12b40c4c1ff25a48a4f48f7bbd6b upstream.
+
+When first implementing support for changing the output frequency, an
+optimization was added to continue the PWM after changing the prescaler
+without having to reprogram the ON and OFF registers for the duty cycle,
+in case the duty cycle stayed the same. This was flawed, because we
+compared the absolute value of the duty cycle in nanoseconds instead of
+the ratio to the period.
+
+Fix the problem by removing the shortcut.
+
+Fixes: 01ec8472009c9 ("pwm-pca9685: Support changing the output frequency")
+Signed-off-by: Clemens Gruber <clemens.gruber@pqgruber.com>
+Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
+Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/pwm/pwm-pca9685.c |   11 -----------
+ 1 file changed, 11 deletions(-)
+
+--- a/drivers/pwm/pwm-pca9685.c
++++ b/drivers/pwm/pwm-pca9685.c
+@@ -65,7 +65,6 @@
+ #define PCA9685_MAXCHAN               0x10
+ #define LED_FULL              (1 << 4)
+-#define MODE1_RESTART         (1 << 7)
+ #define MODE1_SLEEP           (1 << 4)
+ #define MODE2_INVRT           (1 << 4)
+ #define MODE2_OUTDRV          (1 << 2)
+@@ -117,16 +116,6 @@ static int pca9685_pwm_config(struct pwm
+                       udelay(500);
+                       pca->period_ns = period_ns;
+-
+-                      /*
+-                       * If the duty cycle did not change, restart PWM with
+-                       * the same duty cycle to period ratio and return.
+-                       */
+-                      if (duty_ns == pca->duty_ns) {
+-                              regmap_update_bits(pca->regmap, PCA9685_MODE1,
+-                                                 MODE1_RESTART, 0x1);
+-                              return 0;
+-                      }
+               } else {
+                       dev_err(chip->dev,
+                               "prescaler not set: period out of bounds!\n");
diff --git a/queue-4.9/s390-chsc-add-exception-handler-for-chsc-instruction.patch b/queue-4.9/s390-chsc-add-exception-handler-for-chsc-instruction.patch
new file mode 100644 (file)
index 0000000..d1f767a
--- /dev/null
@@ -0,0 +1,44 @@
+From 77759137248f34864a8f7a58bbcebfcf1047504a Mon Sep 17 00:00:00 2001
+From: Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
+Date: Mon, 20 Feb 2017 14:52:58 +0100
+Subject: s390/chsc: Add exception handler for CHSC instruction
+
+From: Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
+
+commit 77759137248f34864a8f7a58bbcebfcf1047504a upstream.
+
+Prevent kernel crashes due to unhandled exceptions raised by the CHSC
+instruction which may for example be triggered by invalid ioctl data.
+
+Fixes: 64150adf89df ("s390/cio: Introduce generic synchronous CHSC IOCTL")
+Signed-off-by: Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
+Reviewed-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
+Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
+Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/s390/cio/ioasm.c |    8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+--- a/drivers/s390/cio/ioasm.c
++++ b/drivers/s390/cio/ioasm.c
+@@ -165,13 +165,15 @@ int tpi(struct tpi_info *addr)
+ int chsc(void *chsc_area)
+ {
+       typedef struct { char _[4096]; } addr_type;
+-      int cc;
++      int cc = -EIO;
+       asm volatile(
+               "       .insn   rre,0xb25f0000,%2,0\n"
+-              "       ipm     %0\n"
++              "0:     ipm     %0\n"
+               "       srl     %0,28\n"
+-              : "=d" (cc), "=m" (*(addr_type *) chsc_area)
++              "1:\n"
++              EX_TABLE(0b, 1b)
++              : "+d" (cc), "=m" (*(addr_type *) chsc_area)
+               : "d" (chsc_area), "m" (*(addr_type *) chsc_area)
+               : "cc");
+       trace_s390_cio_chsc(chsc_area, cc);
diff --git a/queue-4.9/s390-dcssblk-fix-device-size-calculation-in-dcssblk_direct_access.patch b/queue-4.9/s390-dcssblk-fix-device-size-calculation-in-dcssblk_direct_access.patch
new file mode 100644 (file)
index 0000000..6f3002e
--- /dev/null
@@ -0,0 +1,36 @@
+From a63f53e34db8b49675448d03ae324f6c5bc04fe6 Mon Sep 17 00:00:00 2001
+From: Gerald Schaefer <gerald.schaefer@de.ibm.com>
+Date: Mon, 30 Jan 2017 15:52:14 +0100
+Subject: s390/dcssblk: fix device size calculation in dcssblk_direct_access()
+
+From: Gerald Schaefer <gerald.schaefer@de.ibm.com>
+
+commit a63f53e34db8b49675448d03ae324f6c5bc04fe6 upstream.
+
+Since commit dd22f551 "block: Change direct_access calling convention",
+the device size calculation in dcssblk_direct_access() is off-by-one.
+This results in bdev_direct_access() always returning -ENXIO because the
+returned value is not page aligned.
+
+Fix this by adding 1 to the dev_sz calculation.
+
+Fixes: dd22f551 ("block: Change direct_access calling convention")
+Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
+Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/s390/block/dcssblk.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/s390/block/dcssblk.c
++++ b/drivers/s390/block/dcssblk.c
+@@ -892,7 +892,7 @@ dcssblk_direct_access (struct block_devi
+       dev_info = bdev->bd_disk->private_data;
+       if (!dev_info)
+               return -ENODEV;
+-      dev_sz = dev_info->end - dev_info->start;
++      dev_sz = dev_info->end - dev_info->start + 1;
+       offset = secnum * 512;
+       *kaddr = (void *) dev_info->start + offset;
+       *pfn = __pfn_to_pfn_t(PFN_DOWN(dev_info->start + offset), PFN_DEV);
diff --git a/queue-4.9/s390-kdump-use-linux-elf-note-name-instead-of-core.patch b/queue-4.9/s390-kdump-use-linux-elf-note-name-instead-of-core.patch
new file mode 100644 (file)
index 0000000..d92ab6b
--- /dev/null
@@ -0,0 +1,47 @@
+From a4a81d8eebdc1d209d034f62a082a5131e4242b5 Mon Sep 17 00:00:00 2001
+From: Michael Holzheu <holzheu@linux.vnet.ibm.com>
+Date: Tue, 7 Feb 2017 18:09:14 +0100
+Subject: s390/kdump: Use "LINUX" ELF note name instead of "CORE"
+
+From: Michael Holzheu <holzheu@linux.vnet.ibm.com>
+
+commit a4a81d8eebdc1d209d034f62a082a5131e4242b5 upstream.
+
+In binutils/libbfd (bfd/elf.c) it is enforced that all s390 specific ELF
+notes like e.g. NT_S390_PREFIX or NT_S390_CTRS have "LINUX" specified
+as note name. Otherwise the notes are ignored.
+
+For /proc/vmcore we currently use "CORE" for these notes.
+
+Up to now this has not been a real problem because the dump analysis tool
+"crash" does not check the note name. But it will break all programs that
+use libbfd for processing ELF notes.
+
+So fix this and use "LINUX" for all s390 specific notes to comply with
+libbfd.
+
+Reported-by: Philipp Rudo <prudo@linux.vnet.ibm.com>
+Reviewed-by: Philipp Rudo <prudo@linux.vnet.ibm.com>
+Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
+Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/s390/kernel/crash_dump.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/arch/s390/kernel/crash_dump.c
++++ b/arch/s390/kernel/crash_dump.c
+@@ -329,7 +329,11 @@ static void *nt_init_name(void *buf, Elf
+ static inline void *nt_init(void *buf, Elf64_Word type, void *desc, int d_len)
+ {
+-      return nt_init_name(buf, type, desc, d_len, KEXEC_CORE_NOTE_NAME);
++      const char *note_name = "LINUX";
++
++      if (type == NT_PRPSINFO || type == NT_PRSTATUS || type == NT_PRFPREG)
++              note_name = KEXEC_CORE_NOTE_NAME;
++      return nt_init_name(buf, type, desc, d_len, note_name);
+ }
+ /*
diff --git a/queue-4.9/s390-make-setup_randomness-work.patch b/queue-4.9/s390-make-setup_randomness-work.patch
new file mode 100644 (file)
index 0000000..1965a9f
--- /dev/null
@@ -0,0 +1,44 @@
+From da8fd820f389a0e29080b14c61bf5cf1d8ef5ca1 Mon Sep 17 00:00:00 2001
+From: Heiko Carstens <heiko.carstens@de.ibm.com>
+Date: Sat, 4 Feb 2017 11:40:36 +0100
+Subject: s390: make setup_randomness work
+
+From: Heiko Carstens <heiko.carstens@de.ibm.com>
+
+commit da8fd820f389a0e29080b14c61bf5cf1d8ef5ca1 upstream.
+
+Commit bcfcbb6bae64 ("s390: add system information as device
+randomness") intended to add some virtual machine specific information
+to the randomness pool.
+
+Unfortunately it uses the page allocator before it is ready to use. In
+result the page allocator always returns NULL and the setup_randomness
+function never adds anything to the randomness pool.
+
+To fix this use memblock_alloc and memblock_free instead.
+
+Fixes: bcfcbb6bae64 ("s390: add system information as device randomness")
+Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
+Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/s390/kernel/setup.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/arch/s390/kernel/setup.c
++++ b/arch/s390/kernel/setup.c
+@@ -819,10 +819,10 @@ static void __init setup_randomness(void
+ {
+       struct sysinfo_3_2_2 *vmms;
+-      vmms = (struct sysinfo_3_2_2 *) alloc_page(GFP_KERNEL);
+-      if (vmms && stsi(vmms, 3, 2, 2) == 0 && vmms->count)
++      vmms = (struct sysinfo_3_2_2 *) memblock_alloc(PAGE_SIZE, PAGE_SIZE);
++      if (stsi(vmms, 3, 2, 2) == 0 && vmms->count)
+               add_device_randomness(&vmms, vmms->count);
+-      free_page((unsigned long) vmms);
++      memblock_free((unsigned long) vmms, PAGE_SIZE);
+ }
+ /*
diff --git a/queue-4.9/s390-qdio-clear-dsci-prior-to-scanning-multiple-input-queues.patch b/queue-4.9/s390-qdio-clear-dsci-prior-to-scanning-multiple-input-queues.patch
new file mode 100644 (file)
index 0000000..1111d96
--- /dev/null
@@ -0,0 +1,60 @@
+From 1e4a382fdc0ba8d1a85b758c0811de3a3631085e Mon Sep 17 00:00:00 2001
+From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+Date: Mon, 21 Nov 2016 13:37:48 +0100
+Subject: s390/qdio: clear DSCI prior to scanning multiple input queues
+
+From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+
+commit 1e4a382fdc0ba8d1a85b758c0811de3a3631085e upstream.
+
+For devices with multiple input queues, tiqdio_call_inq_handlers()
+iterates over all input queues and clears the device's DSCI
+during each iteration. If the DSCI is re-armed during one
+of the later iterations, we therefore do not scan the previous
+queues again.
+The re-arming also raises a new adapter interrupt. But its
+handler does not trigger a rescan for the device, as the DSCI
+has already been erroneously cleared.
+This can result in queue stalls on devices with multiple
+input queues.
+
+Fix it by clearing the DSCI just once, prior to scanning the queues.
+
+As the code is moved in front of the loop, we also need to access
+the DSCI directly (ie irq->dsci) instead of going via each queue's
+parent pointer to the same irq. This is not a functional change,
+and a follow-up patch will clean up the other users.
+
+In practice, this bug only affects CQ-enabled HiperSockets devices,
+ie. devices with sysfs-attribute "hsuid" set. Setting a hsuid is
+needed for AF_IUCV socket applications that use HiperSockets
+communication.
+
+Fixes: 104ea556ee7f ("qdio: support asynchronous delivery of storage blocks")
+Reviewed-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
+Signed-off-by: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/s390/cio/qdio_thinint.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/drivers/s390/cio/qdio_thinint.c
++++ b/drivers/s390/cio/qdio_thinint.c
+@@ -147,11 +147,11 @@ static inline void tiqdio_call_inq_handl
+       struct qdio_q *q;
+       int i;
+-      for_each_input_queue(irq, q, i) {
+-              if (!references_shared_dsci(irq) &&
+-                  has_multiple_inq_on_dsci(irq))
+-                      xchg(q->irq_ptr->dsci, 0);
++      if (!references_shared_dsci(irq) &&
++          has_multiple_inq_on_dsci(irq))
++              xchg(irq->dsci, 0);
++      for_each_input_queue(irq, q, i) {
+               if (q->u.in.queue_start_poll) {
+                       /* skip if polling is enabled or already in work */
+                       if (test_and_set_bit(QDIO_QUEUE_IRQS_DISABLED,
diff --git a/queue-4.9/s390-task_size-for-kernel-threads.patch b/queue-4.9/s390-task_size-for-kernel-threads.patch
new file mode 100644 (file)
index 0000000..6d785c2
--- /dev/null
@@ -0,0 +1,34 @@
+From fb94a687d96c570d46332a4a890f1dcb7310e643 Mon Sep 17 00:00:00 2001
+From: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Date: Fri, 24 Feb 2017 07:43:51 +0100
+Subject: s390: TASK_SIZE for kernel threads
+
+From: Martin Schwidefsky <schwidefsky@de.ibm.com>
+
+commit fb94a687d96c570d46332a4a890f1dcb7310e643 upstream.
+
+Return a sensible value if TASK_SIZE if called from a kernel thread.
+
+This gets us around an issue with copy_mount_options that does a magic
+size calculation "TASK_SIZE - (unsigned long)data" while in a kernel
+thread and data pointing to kernel space.
+
+Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/s390/include/asm/processor.h |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/arch/s390/include/asm/processor.h
++++ b/arch/s390/include/asm/processor.h
+@@ -89,7 +89,8 @@ extern void execve_tail(void);
+  * User space process size: 2GB for 31 bit, 4TB or 8PT for 64 bit.
+  */
+-#define TASK_SIZE_OF(tsk)     ((tsk)->mm->context.asce_limit)
++#define TASK_SIZE_OF(tsk)     ((tsk)->mm ? \
++                               (tsk)->mm->context.asce_limit : TASK_MAX_SIZE)
+ #define TASK_UNMAPPED_BASE    (test_thread_flag(TIF_31BIT) ? \
+                                       (1UL << 30) : (1UL << 41))
+ #define TASK_SIZE             TASK_SIZE_OF(current)
diff --git a/queue-4.9/s390-use-correct-input-data-address-for-setup_randomness.patch b/queue-4.9/s390-use-correct-input-data-address-for-setup_randomness.patch
new file mode 100644 (file)
index 0000000..02ada63
--- /dev/null
@@ -0,0 +1,41 @@
+From 4920e3cf77347d7d7373552d4839e8d832321313 Mon Sep 17 00:00:00 2001
+From: Heiko Carstens <heiko.carstens@de.ibm.com>
+Date: Sun, 5 Feb 2017 23:03:18 +0100
+Subject: s390: use correct input data address for setup_randomness
+
+From: Heiko Carstens <heiko.carstens@de.ibm.com>
+
+commit 4920e3cf77347d7d7373552d4839e8d832321313 upstream.
+
+The current implementation of setup_randomness uses the stack address
+and therefore the pointer to the SYSIB 3.2.2 block as input data
+address. Furthermore the length of the input data is the number of
+virtual-machine description blocks which is typically one.
+
+This means that typically a single zero byte is fed to
+add_device_randomness.
+
+Fix both of these and use the address of the first virtual machine
+description block as input data address and also use the correct
+length.
+
+Fixes: bcfcbb6bae64 ("s390: add system information as device randomness")
+Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
+Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/s390/kernel/setup.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/s390/kernel/setup.c
++++ b/arch/s390/kernel/setup.c
+@@ -821,7 +821,7 @@ static void __init setup_randomness(void
+       vmms = (struct sysinfo_3_2_2 *) memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+       if (stsi(vmms, 3, 2, 2) == 0 && vmms->count)
+-              add_device_randomness(&vmms, vmms->count);
++              add_device_randomness(&vmms->vm, sizeof(vmms->vm[0]) * vmms->count);
+       memblock_free((unsigned long) vmms, PAGE_SIZE);
+ }
diff --git a/queue-4.9/serial-8250_pci-add-mks-tenta-scom-0800-and-scom-0801-cards.patch b/queue-4.9/serial-8250_pci-add-mks-tenta-scom-0800-and-scom-0801-cards.patch
new file mode 100644 (file)
index 0000000..0b168b9
--- /dev/null
@@ -0,0 +1,60 @@
+From 1c9c858e2ff8ae8024a3d75d2ed080063af43754 Mon Sep 17 00:00:00 2001
+From: Ian Abbott <abbotti@mev.co.uk>
+Date: Fri, 3 Feb 2017 20:25:00 +0000
+Subject: serial: 8250_pci: Add MKS Tenta SCOM-0800 and SCOM-0801 cards
+
+From: Ian Abbott <abbotti@mev.co.uk>
+
+commit 1c9c858e2ff8ae8024a3d75d2ed080063af43754 upstream.
+
+The MKS Instruments SCOM-0800 and SCOM-0801 cards (originally by Tenta
+Technologies) are 3U CompactPCI serial cards with 4 and 8 serial ports,
+respectively.  The first 4 ports are implemented by an OX16PCI954 chip,
+and the second 4 ports are implemented by an OX16C954 chip on a local
+bus, bridged by the second PCI function of the OX16PCI954.  The ports
+are jumper-selectable as RS-232 and RS-422/485, and the UARTs use a
+non-standard oscillator frequency of 20 MHz (base_baud = 1250000).
+
+Signed-off-by: Ian Abbott <abbotti@mev.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/tty/serial/8250/8250_pci.c |   13 +++++++++++++
+ 1 file changed, 13 insertions(+)
+
+--- a/drivers/tty/serial/8250/8250_pci.c
++++ b/drivers/tty/serial/8250/8250_pci.c
+@@ -2688,6 +2688,8 @@ enum pci_board_num_t {
+       pbn_b0_4_1152000_200,
+       pbn_b0_8_1152000_200,
++      pbn_b0_4_1250000,
++
+       pbn_b0_2_1843200,
+       pbn_b0_4_1843200,
+@@ -2919,6 +2921,13 @@ static struct pciserial_board pci_boards
+               .uart_offset    = 0x200,
+       },
++      [pbn_b0_4_1250000] = {
++              .flags          = FL_BASE0,
++              .num_ports      = 4,
++              .base_baud      = 1250000,
++              .uart_offset    = 8,
++      },
++
+       [pbn_b0_2_1843200] = {
+               .flags          = FL_BASE0,
+               .num_ports      = 2,
+@@ -5549,6 +5558,10 @@ static struct pci_device_id serial_pci_t
+       { PCI_DEVICE(0x1c29, 0x1108), .driver_data = pbn_fintek_8 },
+       { PCI_DEVICE(0x1c29, 0x1112), .driver_data = pbn_fintek_12 },
++      /* MKS Tenta SCOM-080x serial cards */
++      { PCI_DEVICE(0x1601, 0x0800), .driver_data = pbn_b0_4_1250000 },
++      { PCI_DEVICE(0x1601, 0xa801), .driver_data = pbn_b0_4_1250000 },
++
+       /*
+        * These entries match devices with class COMMUNICATION_SERIAL,
+        * COMMUNICATION_MODEM or COMMUNICATION_MULTISERIAL
index a6802e67ecb2677ba9526acf5a5c54452595c5b5..6b5ec12bd5f41c6e0319416326fc55caba6d56b4 100644 (file)
@@ -1 +1,41 @@
 tty-n_hdlc-get-rid-of-racy-n_hdlc.tbuf.patch
+serial-8250_pci-add-mks-tenta-scom-0800-and-scom-0801-cards.patch
+kvm-s390-disable-dirty-log-retrieval-for-ucontrol-guests.patch
+kvm-vmx-use-correct-vmcs_read-write-for-guest-segment-selector-base.patch
+bluetooth-add-another-ar3012-04ca-3018-device.patch
+s390-qdio-clear-dsci-prior-to-scanning-multiple-input-queues.patch
+s390-dcssblk-fix-device-size-calculation-in-dcssblk_direct_access.patch
+s390-kdump-use-linux-elf-note-name-instead-of-core.patch
+s390-chsc-add-exception-handler-for-chsc-instruction.patch
+s390-task_size-for-kernel-threads.patch
+s390-make-setup_randomness-work.patch
+s390-use-correct-input-data-address-for-setup_randomness.patch
+net-mvpp2-fix-dma-address-calculation-in-mvpp2_txq_inc_put.patch
+cxl-prevent-read-write-to-afu-config-space-while-afu-not-configured.patch
+cxl-fix-nested-locking-hang-during-eeh-hotplug.patch
+brcmfmac-fix-incorrect-event-channel-deduction.patch
+mnt-tuck-mounts-under-others-instead-of-creating-shadow-side-mounts.patch
+ib-ipoib-fix-deadlock-between-rmmod-and-set_mode.patch
+ib-ipoib-add-destination-address-when-re-queue-packet.patch
+ib-mlx5-fix-out-of-bound-access.patch
+ib-srp-avoid-using-ib_mr_type_sg_gaps.patch
+ib-srp-avoid-that-duplicate-responses-trigger-a-kernel-bug.patch
+ib-srp-fix-race-conditions-related-to-task-management.patch
+fs-better-permission-checking-for-submounts.patch
+orangefs-use-rcu-for-destroy_inode.patch
+memory-atmel-ebi-fix-ns-cycles-conversions.patch
+ktest-fix-child-exit-code-processing.patch
+ceph-remove-req-from-unsafe-list-when-unregistering-it.patch
+pci-hotplug-pnv-php-remove-warn_on-in-pnv_php_put_slot.patch
+pci-hotplug-pnv-php-disable-surprise-hotplug-capability-on-conflicts.patch
+target-fix-null-dereference-during-lun-lookup-active-i-o-shutdown.patch
+drivers-pci-hotplug-handle-presence-detection-change-properly.patch
+drivers-pci-hotplug-fix-initial-state-for-empty-slot.patch
+nlm-ensure-callback-code-also-checks-that-the-files-match.patch
+pwm-pca9685-fix-period-change-with-same-duty-cycle.patch
+xtensa-move-parse_tag_fdt-out-of-ifdef-config_blk_dev_initrd.patch
+nfit-libnvdimm-fix-interleave-set-cookie-calculation.patch
+mac80211-flush-delayed-work-when-entering-suspend.patch
+mac80211-don-t-reorder-frames-with-sn-smaller-than-ssn.patch
+mac80211-don-t-handle-filtered-frames-within-a-ba-session.patch
+mac80211-use-driver-indicated-transmitter-sta-only-for-data-frames.patch
diff --git a/queue-4.9/target-fix-null-dereference-during-lun-lookup-active-i-o-shutdown.patch b/queue-4.9/target-fix-null-dereference-during-lun-lookup-active-i-o-shutdown.patch
new file mode 100644 (file)
index 0000000..09712c8
--- /dev/null
@@ -0,0 +1,189 @@
+From bd4e2d2907fa23a11d46217064ecf80470ddae10 Mon Sep 17 00:00:00 2001
+From: Nicholas Bellinger <nab@linux-iscsi.org>
+Date: Wed, 22 Feb 2017 22:06:32 -0800
+Subject: target: Fix NULL dereference during LUN lookup + active I/O shutdown
+
+From: Nicholas Bellinger <nab@linux-iscsi.org>
+
+commit bd4e2d2907fa23a11d46217064ecf80470ddae10 upstream.
+
+When transport_clear_lun_ref() is shutting down a se_lun via
+configfs with new I/O in-flight, it's possible to trigger a
+NULL pointer dereference in transport_lookup_cmd_lun() due
+to the fact percpu_ref_get() doesn't do any __PERCPU_REF_DEAD
+checking before incrementing lun->lun_ref.count after
+lun->lun_ref has switched to atomic_t mode.
+
+This results in a NULL pointer dereference as LUN shutdown
+code in core_tpg_remove_lun() continues running after the
+existing ->release() -> core_tpg_lun_ref_release() callback
+completes, and clears the RCU protected se_lun->lun_se_dev
+pointer.
+
+During the OOPs, the state of lun->lun_ref in the process
+which triggered the NULL pointer dereference looks like
+the following on v4.1.y stable code:
+
+struct se_lun {
+  lun_link_magic = 4294932337,
+  lun_status = TRANSPORT_LUN_STATUS_FREE,
+
+  .....
+
+  lun_se_dev = 0x0,
+  lun_sep = 0x0,
+
+  .....
+
+  lun_ref = {
+    count = {
+      counter = 1
+    },
+    percpu_count_ptr = 3,
+    release = 0xffffffffa02fa1e0 <core_tpg_lun_ref_release>,
+    confirm_switch = 0x0,
+    force_atomic = false,
+    rcu = {
+      next = 0xffff88154fa1a5d0,
+      func = 0xffffffff8137c4c0 <percpu_ref_switch_to_atomic_rcu>
+    }
+  }
+}
+
+To address this bug, use percpu_ref_tryget_live() to ensure
+once __PERCPU_REF_DEAD is visable on all CPUs and ->lun_ref
+has switched to atomic_t, all new I/Os will fail to obtain
+a new lun->lun_ref reference.
+
+Also use an explicit percpu_ref_kill_and_confirm() callback
+to block on ->lun_ref_comp to allow the first stage and
+associated RCU grace period to complete, and then block on
+->lun_ref_shutdown waiting for the final percpu_ref_put()
+to drop the last reference via transport_lun_remove_cmd()
+before continuing with core_tpg_remove_lun() shutdown.
+
+Reported-by: Rob Millner <rlm@daterainc.com>
+Tested-by: Rob Millner <rlm@daterainc.com>
+Cc: Rob Millner <rlm@daterainc.com>
+Tested-by: Vaibhav Tandon <vst@datera.io>
+Cc: Vaibhav Tandon <vst@datera.io>
+Tested-by: Bryant G. Ly <bryantly@linux.vnet.ibm.com>
+Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/target/target_core_device.c    |   10 ++++++++--
+ drivers/target/target_core_tpg.c       |    3 ++-
+ drivers/target/target_core_transport.c |   31 ++++++++++++++++++++++++++++++-
+ include/target/target_core_base.h      |    1 +
+ 4 files changed, 41 insertions(+), 4 deletions(-)
+
+--- a/drivers/target/target_core_device.c
++++ b/drivers/target/target_core_device.c
+@@ -77,12 +77,16 @@ transport_lookup_cmd_lun(struct se_cmd *
+                                       &deve->read_bytes);
+               se_lun = rcu_dereference(deve->se_lun);
++
++              if (!percpu_ref_tryget_live(&se_lun->lun_ref)) {
++                      se_lun = NULL;
++                      goto out_unlock;
++              }
++
+               se_cmd->se_lun = rcu_dereference(deve->se_lun);
+               se_cmd->pr_res_key = deve->pr_res_key;
+               se_cmd->orig_fe_lun = unpacked_lun;
+               se_cmd->se_cmd_flags |= SCF_SE_LUN_CMD;
+-
+-              percpu_ref_get(&se_lun->lun_ref);
+               se_cmd->lun_ref_active = true;
+               if ((se_cmd->data_direction == DMA_TO_DEVICE) &&
+@@ -96,6 +100,7 @@ transport_lookup_cmd_lun(struct se_cmd *
+                       goto ref_dev;
+               }
+       }
++out_unlock:
+       rcu_read_unlock();
+       if (!se_lun) {
+@@ -815,6 +820,7 @@ struct se_device *target_alloc_device(st
+       xcopy_lun = &dev->xcopy_lun;
+       rcu_assign_pointer(xcopy_lun->lun_se_dev, dev);
+       init_completion(&xcopy_lun->lun_ref_comp);
++      init_completion(&xcopy_lun->lun_shutdown_comp);
+       INIT_LIST_HEAD(&xcopy_lun->lun_deve_list);
+       INIT_LIST_HEAD(&xcopy_lun->lun_dev_link);
+       mutex_init(&xcopy_lun->lun_tg_pt_md_mutex);
+--- a/drivers/target/target_core_tpg.c
++++ b/drivers/target/target_core_tpg.c
+@@ -445,7 +445,7 @@ static void core_tpg_lun_ref_release(str
+ {
+       struct se_lun *lun = container_of(ref, struct se_lun, lun_ref);
+-      complete(&lun->lun_ref_comp);
++      complete(&lun->lun_shutdown_comp);
+ }
+ int core_tpg_register(
+@@ -571,6 +571,7 @@ struct se_lun *core_tpg_alloc_lun(
+       lun->lun_link_magic = SE_LUN_LINK_MAGIC;
+       atomic_set(&lun->lun_acl_count, 0);
+       init_completion(&lun->lun_ref_comp);
++      init_completion(&lun->lun_shutdown_comp);
+       INIT_LIST_HEAD(&lun->lun_deve_list);
+       INIT_LIST_HEAD(&lun->lun_dev_link);
+       atomic_set(&lun->lun_tg_pt_secondary_offline, 0);
+--- a/drivers/target/target_core_transport.c
++++ b/drivers/target/target_core_transport.c
+@@ -2702,10 +2702,39 @@ void target_wait_for_sess_cmds(struct se
+ }
+ EXPORT_SYMBOL(target_wait_for_sess_cmds);
++static void target_lun_confirm(struct percpu_ref *ref)
++{
++      struct se_lun *lun = container_of(ref, struct se_lun, lun_ref);
++
++      complete(&lun->lun_ref_comp);
++}
++
+ void transport_clear_lun_ref(struct se_lun *lun)
+ {
+-      percpu_ref_kill(&lun->lun_ref);
++      /*
++       * Mark the percpu-ref as DEAD, switch to atomic_t mode, drop
++       * the initial reference and schedule confirm kill to be
++       * executed after one full RCU grace period has completed.
++       */
++      percpu_ref_kill_and_confirm(&lun->lun_ref, target_lun_confirm);
++      /*
++       * The first completion waits for percpu_ref_switch_to_atomic_rcu()
++       * to call target_lun_confirm after lun->lun_ref has been marked
++       * as __PERCPU_REF_DEAD on all CPUs, and switches to atomic_t
++       * mode so that percpu_ref_tryget_live() lookup of lun->lun_ref
++       * fails for all new incoming I/O.
++       */
+       wait_for_completion(&lun->lun_ref_comp);
++      /*
++       * The second completion waits for percpu_ref_put_many() to
++       * invoke ->release() after lun->lun_ref has switched to
++       * atomic_t mode, and lun->lun_ref.count has reached zero.
++       *
++       * At this point all target-core lun->lun_ref references have
++       * been dropped via transport_lun_remove_cmd(), and it's safe
++       * to proceed with the remaining LUN shutdown.
++       */
++      wait_for_completion(&lun->lun_shutdown_comp);
+ }
+ static bool
+--- a/include/target/target_core_base.h
++++ b/include/target/target_core_base.h
+@@ -732,6 +732,7 @@ struct se_lun {
+       struct config_group     lun_group;
+       struct se_port_stat_grps port_stat_grps;
+       struct completion       lun_ref_comp;
++      struct completion       lun_shutdown_comp;
+       struct percpu_ref       lun_ref;
+       struct list_head        lun_dev_link;
+       struct hlist_node       link;
diff --git a/queue-4.9/xtensa-move-parse_tag_fdt-out-of-ifdef-config_blk_dev_initrd.patch b/queue-4.9/xtensa-move-parse_tag_fdt-out-of-ifdef-config_blk_dev_initrd.patch
new file mode 100644 (file)
index 0000000..32f76b9
--- /dev/null
@@ -0,0 +1,41 @@
+From 4ab18701c66552944188dbcd0ce0012729baab84 Mon Sep 17 00:00:00 2001
+From: Max Filippov <jcmvbkbc@gmail.com>
+Date: Tue, 3 Jan 2017 09:37:34 -0800
+Subject: xtensa: move parse_tag_fdt out of #ifdef CONFIG_BLK_DEV_INITRD
+
+From: Max Filippov <jcmvbkbc@gmail.com>
+
+commit 4ab18701c66552944188dbcd0ce0012729baab84 upstream.
+
+FDT tag parsing is not related to whether BLK_DEV_INITRD is configured
+or not, move it out of the corresponding #ifdef/#endif block.
+This fixes passing external FDT to the kernel configured w/o
+BLK_DEV_INITRD support.
+
+Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/xtensa/kernel/setup.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/xtensa/kernel/setup.c
++++ b/arch/xtensa/kernel/setup.c
+@@ -133,6 +133,8 @@ static int __init parse_tag_initrd(const
+ __tagtable(BP_TAG_INITRD, parse_tag_initrd);
++#endif /* CONFIG_BLK_DEV_INITRD */
++
+ #ifdef CONFIG_OF
+ static int __init parse_tag_fdt(const bp_tag_t *tag)
+@@ -145,8 +147,6 @@ __tagtable(BP_TAG_FDT, parse_tag_fdt);
+ #endif /* CONFIG_OF */
+-#endif /* CONFIG_BLK_DEV_INITRD */
+-
+ static int __init parse_tag_cmdline(const bp_tag_t* tag)
+ {
+       strlcpy(command_line, (char *)(tag->data), COMMAND_LINE_SIZE);