--- /dev/null
+From 441ad62d6c3f131f1dbd7dcdd9cbe3f74dbd8501 Mon Sep 17 00:00:00 2001
+From: Dmitry Tunin <hanipouspilot@gmail.com>
+Date: Thu, 5 Jan 2017 13:19:53 +0300
+Subject: Bluetooth: Add another AR3012 04ca:3018 device
+
+From: Dmitry Tunin <hanipouspilot@gmail.com>
+
+commit 441ad62d6c3f131f1dbd7dcdd9cbe3f74dbd8501 upstream.
+
+T: Bus=01 Lev=01 Prnt=01 Port=07 Cnt=04 Dev#= 5 Spd=12 MxCh= 0
+D: Ver= 1.10 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1
+P: Vendor=04ca ProdID=3018 Rev=00.01
+C: #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=100mA
+I: If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
+I: If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
+
+Signed-off-by: Dmitry Tunin <hanipouspilot@gmail.com>
+Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/bluetooth/ath3k.c | 2 ++
+ drivers/bluetooth/btusb.c | 1 +
+ 2 files changed, 3 insertions(+)
+
+--- a/drivers/bluetooth/ath3k.c
++++ b/drivers/bluetooth/ath3k.c
+@@ -94,6 +94,7 @@ static const struct usb_device_id ath3k_
+ { USB_DEVICE(0x04CA, 0x300f) },
+ { USB_DEVICE(0x04CA, 0x3010) },
+ { USB_DEVICE(0x04CA, 0x3014) },
++ { USB_DEVICE(0x04CA, 0x3018) },
+ { USB_DEVICE(0x0930, 0x0219) },
+ { USB_DEVICE(0x0930, 0x021c) },
+ { USB_DEVICE(0x0930, 0x0220) },
+@@ -162,6 +163,7 @@ static const struct usb_device_id ath3k_
+ { USB_DEVICE(0x04ca, 0x300f), .driver_info = BTUSB_ATH3012 },
+ { USB_DEVICE(0x04ca, 0x3010), .driver_info = BTUSB_ATH3012 },
+ { USB_DEVICE(0x04ca, 0x3014), .driver_info = BTUSB_ATH3012 },
++ { USB_DEVICE(0x04ca, 0x3018), .driver_info = BTUSB_ATH3012 },
+ { USB_DEVICE(0x0930, 0x0219), .driver_info = BTUSB_ATH3012 },
+ { USB_DEVICE(0x0930, 0x021c), .driver_info = BTUSB_ATH3012 },
+ { USB_DEVICE(0x0930, 0x0220), .driver_info = BTUSB_ATH3012 },
+--- a/drivers/bluetooth/btusb.c
++++ b/drivers/bluetooth/btusb.c
+@@ -209,6 +209,7 @@ static const struct usb_device_id blackl
+ { USB_DEVICE(0x04ca, 0x300f), .driver_info = BTUSB_ATH3012 },
+ { USB_DEVICE(0x04ca, 0x3010), .driver_info = BTUSB_ATH3012 },
+ { USB_DEVICE(0x04ca, 0x3014), .driver_info = BTUSB_ATH3012 },
++ { USB_DEVICE(0x04ca, 0x3018), .driver_info = BTUSB_ATH3012 },
+ { USB_DEVICE(0x0930, 0x0219), .driver_info = BTUSB_ATH3012 },
+ { USB_DEVICE(0x0930, 0x021c), .driver_info = BTUSB_ATH3012 },
+ { USB_DEVICE(0x0930, 0x0220), .driver_info = BTUSB_ATH3012 },
--- /dev/null
+From 8e290cecdd0178f3d4cf7d463c51dc7e462843b4 Mon Sep 17 00:00:00 2001
+From: Gavin Li <git@thegavinli.com>
+Date: Tue, 17 Jan 2017 15:24:05 -0800
+Subject: brcmfmac: fix incorrect event channel deduction
+
+From: Gavin Li <git@thegavinli.com>
+
+commit 8e290cecdd0178f3d4cf7d463c51dc7e462843b4 upstream.
+
+brcmf_sdio_fromevntchan() was being called on the the data frame
+rather than the software header, causing some frames to be
+mischaracterized as on the event channel rather than the data channel.
+
+This fixes a major performance regression (due to dropped packets). With
+this patch the download speed jumped from 1Mbit/s back up to 40MBit/s due
+to the sheer amount of packets being incorrectly processed.
+
+Fixes: c56caa9db8ab ("brcmfmac: screening firmware event packet")
+Signed-off-by: Gavin Li <git@thegavinli.com>
+Acked-by: Arend van Spriel <arend.vanspriel@broadcom.com>
+[kvalo@codeaurora.org: improve commit logs based on email discussion]
+Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
++++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
+@@ -1660,7 +1660,7 @@ static u8 brcmf_sdio_rxglom(struct brcmf
+ pfirst->len, pfirst->next,
+ pfirst->prev);
+ skb_unlink(pfirst, &bus->glom);
+- if (brcmf_sdio_fromevntchan(pfirst->data))
++ if (brcmf_sdio_fromevntchan(&dptr[SDPCM_HWHDR_LEN]))
+ brcmf_rx_event(bus->sdiodev->dev, pfirst);
+ else
+ brcmf_rx_frame(bus->sdiodev->dev, pfirst,
--- /dev/null
+From df963ea8a082d31521a120e8e31a29ad8a1dc215 Mon Sep 17 00:00:00 2001
+From: Jeff Layton <jlayton@redhat.com>
+Date: Tue, 14 Feb 2017 10:09:40 -0500
+Subject: ceph: remove req from unsafe list when unregistering it
+
+From: Jeff Layton <jlayton@redhat.com>
+
+commit df963ea8a082d31521a120e8e31a29ad8a1dc215 upstream.
+
+There's no reason a request should ever be on a s_unsafe list but not
+in the request tree.
+
+Link: http://tracker.ceph.com/issues/18474
+Signed-off-by: Jeff Layton <jlayton@redhat.com>
+Reviewed-by: Yan, Zheng <zyan@redhat.com>
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ceph/mds_client.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/fs/ceph/mds_client.c
++++ b/fs/ceph/mds_client.c
+@@ -628,6 +628,9 @@ static void __unregister_request(struct
+ {
+ dout("__unregister_request %p tid %lld\n", req, req->r_tid);
+
++ /* Never leave an unregistered request on an unsafe list! */
++ list_del_init(&req->r_unsafe_item);
++
+ if (req->r_tid == mdsc->oldest_tid) {
+ struct rb_node *p = rb_next(&req->r_node);
+ mdsc->oldest_tid = 0;
+@@ -1036,7 +1039,6 @@ static void cleanup_session_requests(str
+ while (!list_empty(&session->s_unsafe)) {
+ req = list_first_entry(&session->s_unsafe,
+ struct ceph_mds_request, r_unsafe_item);
+- list_del_init(&req->r_unsafe_item);
+ pr_warn_ratelimited(" dropping unsafe request %llu\n",
+ req->r_tid);
+ __unregister_request(mdsc, req);
+@@ -2423,7 +2425,6 @@ static void handle_reply(struct ceph_mds
+ * useful we could do with a revised return value.
+ */
+ dout("got safe reply %llu, mds%d\n", tid, mds);
+- list_del_init(&req->r_unsafe_item);
+
+ /* last unsafe request during umount? */
+ if (mdsc->stopping && !__get_oldest_req(mdsc))
--- /dev/null
+From 171ed0fcd8966d82c45376f1434678e7b9d4d9b1 Mon Sep 17 00:00:00 2001
+From: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
+Date: Mon, 6 Feb 2017 12:07:17 +1100
+Subject: cxl: fix nested locking hang during EEH hotplug
+
+From: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
+
+commit 171ed0fcd8966d82c45376f1434678e7b9d4d9b1 upstream.
+
+Commit 14a3ae34bfd0 ("cxl: Prevent read/write to AFU config space while AFU
+not configured") introduced a rwsem to fix an invalid memory access that
+occurred when someone attempts to access the config space of an AFU on a
+vPHB whilst the AFU is deconfigured, such as during EEH recovery.
+
+It turns out that it's possible to run into a nested locking issue when EEH
+recovery fails and a full device hotplug is required.
+cxl_pci_error_detected() deconfigures the AFU, taking a writer lock on
+configured_rwsem. When EEH recovery fails, the EEH code calls
+pci_hp_remove_devices() to remove the device, which in turn calls
+cxl_remove() -> cxl_pci_remove_afu() -> pci_deconfigure_afu(), which tries
+to grab the writer lock that's already held.
+
+Standard rwsem semantics don't express what we really want to do here and
+don't allow for nested locking. Fix this by replacing the rwsem with an
+atomic_t which we can control more finely. Allow the AFU to be locked
+multiple times so long as there are no readers.
+
+Fixes: 14a3ae34bfd0 ("cxl: Prevent read/write to AFU config space while AFU not configured")
+Signed-off-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
+Acked-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/misc/cxl/cxl.h | 5 +++--
+ drivers/misc/cxl/main.c | 3 +--
+ drivers/misc/cxl/pci.c | 11 +++++++++--
+ drivers/misc/cxl/vphb.c | 18 ++++++++++++++----
+ 4 files changed, 27 insertions(+), 10 deletions(-)
+
+--- a/drivers/misc/cxl/cxl.h
++++ b/drivers/misc/cxl/cxl.h
+@@ -418,8 +418,9 @@ struct cxl_afu {
+ struct dentry *debugfs;
+ struct mutex contexts_lock;
+ spinlock_t afu_cntl_lock;
+- /* Used to block access to AFU config space while deconfigured */
+- struct rw_semaphore configured_rwsem;
++
++ /* -1: AFU deconfigured/locked, >= 0: number of readers */
++ atomic_t configured_state;
+
+ /* AFU error buffer fields and bin attribute for sysfs */
+ u64 eb_len, eb_offset;
+--- a/drivers/misc/cxl/main.c
++++ b/drivers/misc/cxl/main.c
+@@ -268,8 +268,7 @@ struct cxl_afu *cxl_alloc_afu(struct cxl
+ idr_init(&afu->contexts_idr);
+ mutex_init(&afu->contexts_lock);
+ spin_lock_init(&afu->afu_cntl_lock);
+- init_rwsem(&afu->configured_rwsem);
+- down_write(&afu->configured_rwsem);
++ atomic_set(&afu->configured_state, -1);
+ afu->prefault_mode = CXL_PREFAULT_NONE;
+ afu->irqs_max = afu->adapter->user_irqs;
+
+--- a/drivers/misc/cxl/pci.c
++++ b/drivers/misc/cxl/pci.c
+@@ -1129,7 +1129,7 @@ static int pci_configure_afu(struct cxl_
+ if ((rc = cxl_native_register_psl_irq(afu)))
+ goto err2;
+
+- up_write(&afu->configured_rwsem);
++ atomic_set(&afu->configured_state, 0);
+ return 0;
+
+ err2:
+@@ -1142,7 +1142,14 @@ err1:
+
+ static void pci_deconfigure_afu(struct cxl_afu *afu)
+ {
+- down_write(&afu->configured_rwsem);
++ /*
++ * It's okay to deconfigure when AFU is already locked, otherwise wait
++ * until there are no readers
++ */
++ if (atomic_read(&afu->configured_state) != -1) {
++ while (atomic_cmpxchg(&afu->configured_state, 0, -1) != -1)
++ schedule();
++ }
+ cxl_native_release_psl_irq(afu);
+ if (afu->adapter->native->sl_ops->release_serr_irq)
+ afu->adapter->native->sl_ops->release_serr_irq(afu);
+--- a/drivers/misc/cxl/vphb.c
++++ b/drivers/misc/cxl/vphb.c
+@@ -83,6 +83,16 @@ static inline struct cxl_afu *pci_bus_to
+ return phb ? phb->private_data : NULL;
+ }
+
++static void cxl_afu_configured_put(struct cxl_afu *afu)
++{
++ atomic_dec_if_positive(&afu->configured_state);
++}
++
++static bool cxl_afu_configured_get(struct cxl_afu *afu)
++{
++ return atomic_inc_unless_negative(&afu->configured_state);
++}
++
+ static inline int cxl_pcie_config_info(struct pci_bus *bus, unsigned int devfn,
+ struct cxl_afu *afu, int *_record)
+ {
+@@ -107,7 +117,7 @@ static int cxl_pcie_read_config(struct p
+
+ afu = pci_bus_to_afu(bus);
+ /* Grab a reader lock on afu. */
+- if (afu == NULL || !down_read_trylock(&afu->configured_rwsem))
++ if (afu == NULL || !cxl_afu_configured_get(afu))
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ rc = cxl_pcie_config_info(bus, devfn, afu, &record);
+@@ -132,7 +142,7 @@ static int cxl_pcie_read_config(struct p
+ }
+
+ out:
+- up_read(&afu->configured_rwsem);
++ cxl_afu_configured_put(afu);
+ return rc ? PCIBIOS_DEVICE_NOT_FOUND : PCIBIOS_SUCCESSFUL;
+ }
+
+@@ -144,7 +154,7 @@ static int cxl_pcie_write_config(struct
+
+ afu = pci_bus_to_afu(bus);
+ /* Grab a reader lock on afu. */
+- if (afu == NULL || !down_read_trylock(&afu->configured_rwsem))
++ if (afu == NULL || !cxl_afu_configured_get(afu))
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ rc = cxl_pcie_config_info(bus, devfn, afu, &record);
+@@ -166,7 +176,7 @@ static int cxl_pcie_write_config(struct
+ }
+
+ out:
+- up_read(&afu->configured_rwsem);
++ cxl_afu_configured_put(afu);
+ return rc ? PCIBIOS_SET_FAILED : PCIBIOS_SUCCESSFUL;
+ }
+
--- /dev/null
+From 14a3ae34bfd0bcb1cc12d55b06a8584c11fac6fc Mon Sep 17 00:00:00 2001
+From: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
+Date: Fri, 9 Dec 2016 17:18:50 +1100
+Subject: cxl: Prevent read/write to AFU config space while AFU not configured
+
+From: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
+
+commit 14a3ae34bfd0bcb1cc12d55b06a8584c11fac6fc upstream.
+
+During EEH recovery, we deconfigure all AFUs whilst leaving the
+corresponding vPHB and virtual PCI device in place.
+
+If something attempts to interact with the AFU's PCI config space (e.g.
+running lspci) after the AFU has been deconfigured and before it's
+reconfigured, cxl_pcie_{read,write}_config() will read invalid values from
+the deconfigured struct cxl_afu and proceed to Oops when they try to
+dereference pointers that have been set to NULL during deconfiguration.
+
+Add a rwsem to struct cxl_afu so we can prevent interaction with config
+space while the AFU is deconfigured.
+
+Reported-by: Pradipta Ghosh <pradghos@in.ibm.com>
+Suggested-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com>
+Signed-off-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
+Signed-off-by: Vaibhav Jain <vaibhav@linux.vnet.ibm.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/misc/cxl/cxl.h | 2 +
+ drivers/misc/cxl/main.c | 3 +-
+ drivers/misc/cxl/pci.c | 2 +
+ drivers/misc/cxl/vphb.c | 51 +++++++++++++++++++++++++++---------------------
+ 4 files changed, 35 insertions(+), 23 deletions(-)
+
+--- a/drivers/misc/cxl/cxl.h
++++ b/drivers/misc/cxl/cxl.h
+@@ -418,6 +418,8 @@ struct cxl_afu {
+ struct dentry *debugfs;
+ struct mutex contexts_lock;
+ spinlock_t afu_cntl_lock;
++ /* Used to block access to AFU config space while deconfigured */
++ struct rw_semaphore configured_rwsem;
+
+ /* AFU error buffer fields and bin attribute for sysfs */
+ u64 eb_len, eb_offset;
+--- a/drivers/misc/cxl/main.c
++++ b/drivers/misc/cxl/main.c
+@@ -268,7 +268,8 @@ struct cxl_afu *cxl_alloc_afu(struct cxl
+ idr_init(&afu->contexts_idr);
+ mutex_init(&afu->contexts_lock);
+ spin_lock_init(&afu->afu_cntl_lock);
+-
++ init_rwsem(&afu->configured_rwsem);
++ down_write(&afu->configured_rwsem);
+ afu->prefault_mode = CXL_PREFAULT_NONE;
+ afu->irqs_max = afu->adapter->user_irqs;
+
+--- a/drivers/misc/cxl/pci.c
++++ b/drivers/misc/cxl/pci.c
+@@ -1129,6 +1129,7 @@ static int pci_configure_afu(struct cxl_
+ if ((rc = cxl_native_register_psl_irq(afu)))
+ goto err2;
+
++ up_write(&afu->configured_rwsem);
+ return 0;
+
+ err2:
+@@ -1141,6 +1142,7 @@ err1:
+
+ static void pci_deconfigure_afu(struct cxl_afu *afu)
+ {
++ down_write(&afu->configured_rwsem);
+ cxl_native_release_psl_irq(afu);
+ if (afu->adapter->native->sl_ops->release_serr_irq)
+ afu->adapter->native->sl_ops->release_serr_irq(afu);
+--- a/drivers/misc/cxl/vphb.c
++++ b/drivers/misc/cxl/vphb.c
+@@ -76,23 +76,22 @@ static int cxl_pcie_cfg_record(u8 bus, u
+ return (bus << 8) + devfn;
+ }
+
+-static int cxl_pcie_config_info(struct pci_bus *bus, unsigned int devfn,
+- struct cxl_afu **_afu, int *_record)
++static inline struct cxl_afu *pci_bus_to_afu(struct pci_bus *bus)
+ {
+- struct pci_controller *phb;
+- struct cxl_afu *afu;
+- int record;
++ struct pci_controller *phb = bus ? pci_bus_to_host(bus) : NULL;
+
+- phb = pci_bus_to_host(bus);
+- if (phb == NULL)
+- return PCIBIOS_DEVICE_NOT_FOUND;
++ return phb ? phb->private_data : NULL;
++}
++
++static inline int cxl_pcie_config_info(struct pci_bus *bus, unsigned int devfn,
++ struct cxl_afu *afu, int *_record)
++{
++ int record;
+
+- afu = (struct cxl_afu *)phb->private_data;
+ record = cxl_pcie_cfg_record(bus->number, devfn);
+ if (record > afu->crs_num)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+- *_afu = afu;
+ *_record = record;
+ return 0;
+ }
+@@ -106,9 +105,14 @@ static int cxl_pcie_read_config(struct p
+ u16 val16;
+ u32 val32;
+
+- rc = cxl_pcie_config_info(bus, devfn, &afu, &record);
++ afu = pci_bus_to_afu(bus);
++ /* Grab a reader lock on afu. */
++ if (afu == NULL || !down_read_trylock(&afu->configured_rwsem))
++ return PCIBIOS_DEVICE_NOT_FOUND;
++
++ rc = cxl_pcie_config_info(bus, devfn, afu, &record);
+ if (rc)
+- return rc;
++ goto out;
+
+ switch (len) {
+ case 1:
+@@ -127,10 +131,9 @@ static int cxl_pcie_read_config(struct p
+ WARN_ON(1);
+ }
+
+- if (rc)
+- return PCIBIOS_DEVICE_NOT_FOUND;
+-
+- return PCIBIOS_SUCCESSFUL;
++out:
++ up_read(&afu->configured_rwsem);
++ return rc ? PCIBIOS_DEVICE_NOT_FOUND : PCIBIOS_SUCCESSFUL;
+ }
+
+ static int cxl_pcie_write_config(struct pci_bus *bus, unsigned int devfn,
+@@ -139,9 +142,14 @@ static int cxl_pcie_write_config(struct
+ int rc, record;
+ struct cxl_afu *afu;
+
+- rc = cxl_pcie_config_info(bus, devfn, &afu, &record);
++ afu = pci_bus_to_afu(bus);
++ /* Grab a reader lock on afu. */
++ if (afu == NULL || !down_read_trylock(&afu->configured_rwsem))
++ return PCIBIOS_DEVICE_NOT_FOUND;
++
++ rc = cxl_pcie_config_info(bus, devfn, afu, &record);
+ if (rc)
+- return rc;
++ goto out;
+
+ switch (len) {
+ case 1:
+@@ -157,10 +165,9 @@ static int cxl_pcie_write_config(struct
+ WARN_ON(1);
+ }
+
+- if (rc)
+- return PCIBIOS_SET_FAILED;
+-
+- return PCIBIOS_SUCCESSFUL;
++out:
++ up_read(&afu->configured_rwsem);
++ return rc ? PCIBIOS_SET_FAILED : PCIBIOS_SUCCESSFUL;
+ }
+
+ static struct pci_ops cxl_pcie_pci_ops =
--- /dev/null
+From d0c424971f70501ec0a0364117b9934db039c9cc Mon Sep 17 00:00:00 2001
+From: Gavin Shan <gwshan@linux.vnet.ibm.com>
+Date: Wed, 11 Jan 2017 11:50:07 +1100
+Subject: drivers/pci/hotplug: Fix initial state for empty slot
+
+From: Gavin Shan <gwshan@linux.vnet.ibm.com>
+
+commit d0c424971f70501ec0a0364117b9934db039c9cc upstream.
+
+In PowerNV PCI hotplug driver, the initial PCI slot's state is set
+to PNV_PHP_STATE_POPULATED if no PCI devices are connected to the
+slot. The PCI devices that are hot added to the slot won't be probed
+and populated because of the check in pnv_php_enable():
+
+ /* Check if the slot has been configured */
+ if (php_slot->state != PNV_PHP_STATE_REGISTERED)
+ return 0;
+
+This fixes the issue by leaving the slot in PNV_PHP_STATE_REGISTERED
+state initially if nothing is connected to the slot.
+
+Fixes: 360aebd85a4 ("drivers/pci/hotplug: Support surprise hotplug in powernv driver")
+Reported-by: Hank Chang <hankmax0000@gmail.com>
+Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
+Tested-by: Willie Liauw <williel@supermicro.com.tw>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/pci/hotplug/pnv_php.c | 16 ++++++++++++++--
+ 1 file changed, 14 insertions(+), 2 deletions(-)
+
+--- a/drivers/pci/hotplug/pnv_php.c
++++ b/drivers/pci/hotplug/pnv_php.c
+@@ -436,9 +436,21 @@ static int pnv_php_enable(struct pnv_php
+ if (ret)
+ return ret;
+
+- /* Proceed if there have nothing behind the slot */
+- if (presence == OPAL_PCI_SLOT_EMPTY)
++ /*
++ * Proceed if there have nothing behind the slot. However,
++ * we should leave the slot in registered state at the
++ * beginning. Otherwise, the PCI devices inserted afterwards
++ * won't be probed and populated.
++ */
++ if (presence == OPAL_PCI_SLOT_EMPTY) {
++ if (!php_slot->power_state_check) {
++ php_slot->power_state_check = true;
++
++ return 0;
++ }
++
+ goto scan;
++ }
+
+ /*
+ * If the power supply to the slot is off, we can't detect
--- /dev/null
+From d7d55536c6cd1f80295b6d7483ad0587b148bde4 Mon Sep 17 00:00:00 2001
+From: Gavin Shan <gwshan@linux.vnet.ibm.com>
+Date: Wed, 11 Jan 2017 11:50:06 +1100
+Subject: drivers/pci/hotplug: Handle presence detection change properly
+
+From: Gavin Shan <gwshan@linux.vnet.ibm.com>
+
+commit d7d55536c6cd1f80295b6d7483ad0587b148bde4 upstream.
+
+The surprise hotplug is driven by interrupt in PowerNV PCI hotplug
+driver. In the interrupt handler, pnv_php_interrupt(), we bail when
+pnv_pci_get_presence_state() returns zero wrongly. It causes the
+presence change event is always ignored incorrectly.
+
+This fixes the issue by bailing on error (non-zero value) returned
+from pnv_pci_get_presence_state().
+
+Fixes: 360aebd85a4 ("drivers/pci/hotplug: Support surprise hotplug in powernv driver")
+Reported-by: Hank Chang <hankmax0000@gmail.com>
+Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
+Tested-by: Willie Liauw <williel@supermicro.com.tw>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/pci/hotplug/pnv_php.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/drivers/pci/hotplug/pnv_php.c
++++ b/drivers/pci/hotplug/pnv_php.c
+@@ -713,8 +713,12 @@ static irqreturn_t pnv_php_interrupt(int
+ added = !!(lsts & PCI_EXP_LNKSTA_DLLLA);
+ } else if (sts & PCI_EXP_SLTSTA_PDC) {
+ ret = pnv_pci_get_presence_state(php_slot->id, &presence);
+- if (!ret)
++ if (ret) {
++ dev_warn(&pdev->dev, "PCI slot [%s] error %d getting presence (0x%04x), to retry the operation.\n",
++ php_slot->name, ret, sts);
+ return IRQ_HANDLED;
++ }
++
+ added = !!(presence == OPAL_PCI_SLOT_PRESENT);
+ } else {
+ return IRQ_NONE;
--- /dev/null
+From 93faccbbfa958a9668d3ab4e30f38dd205cee8d8 Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Wed, 1 Feb 2017 06:06:16 +1300
+Subject: fs: Better permission checking for submounts
+
+From: Eric W. Biederman <ebiederm@xmission.com>
+
+commit 93faccbbfa958a9668d3ab4e30f38dd205cee8d8 upstream.
+
+To support unprivileged users mounting filesystems two permission
+checks have to be performed: a test to see if the user allowed to
+create a mount in the mount namespace, and a test to see if
+the user is allowed to access the specified filesystem.
+
+The automount case is special in that mounting the original filesystem
+grants permission to mount the sub-filesystems, to any user who
+happens to stumble across the their mountpoint and satisfies the
+ordinary filesystem permission checks.
+
+Attempting to handle the automount case by using override_creds
+almost works. It preserves the idea that permission to mount
+the original filesystem is permission to mount the sub-filesystem.
+Unfortunately using override_creds messes up the filesystems
+ordinary permission checks.
+
+Solve this by being explicit that a mount is a submount by introducing
+vfs_submount, and using it where appropriate.
+
+vfs_submount uses a new mount internal mount flags MS_SUBMOUNT, to let
+sget and friends know that a mount is a submount so they can take appropriate
+action.
+
+sget and sget_userns are modified to not perform any permission checks
+on submounts.
+
+follow_automount is modified to stop using override_creds as that
+has proven problemantic.
+
+do_mount is modified to always remove the new MS_SUBMOUNT flag so
+that we know userspace will never by able to specify it.
+
+autofs4 is modified to stop using current_real_cred that was put in
+there to handle the previous version of submount permission checking.
+
+cifs is modified to pass the mountpoint all of the way down to vfs_submount.
+
+debugfs is modified to pass the mountpoint all of the way down to
+trace_automount by adding a new parameter. To make this change easier
+a new typedef debugfs_automount_t is introduced to capture the type of
+the debugfs automount function.
+
+Fixes: 069d5ac9ae0d ("autofs: Fix automounts by using current_real_cred()->uid")
+Fixes: aeaa4a79ff6a ("fs: Call d_automount with the filesystems creds")
+Reviewed-by: Trond Myklebust <trond.myklebust@primarydata.com>
+Reviewed-by: Seth Forshee <seth.forshee@canonical.com>
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/afs/mntpt.c | 2 +-
+ fs/autofs4/waitq.c | 4 ++--
+ fs/cifs/cifs_dfs_ref.c | 7 ++++---
+ fs/debugfs/inode.c | 8 ++++----
+ fs/namei.c | 3 ---
+ fs/namespace.c | 17 ++++++++++++++++-
+ fs/nfs/namespace.c | 2 +-
+ fs/nfs/nfs4namespace.c | 2 +-
+ fs/super.c | 13 ++++++++++---
+ include/linux/debugfs.h | 3 ++-
+ include/linux/mount.h | 3 +++
+ include/uapi/linux/fs.h | 1 +
+ kernel/trace/trace.c | 4 ++--
+ 13 files changed, 47 insertions(+), 22 deletions(-)
+
+--- a/fs/afs/mntpt.c
++++ b/fs/afs/mntpt.c
+@@ -202,7 +202,7 @@ static struct vfsmount *afs_mntpt_do_aut
+
+ /* try and do the mount */
+ _debug("--- attempting mount %s -o %s ---", devname, options);
+- mnt = vfs_kern_mount(&afs_fs_type, 0, devname, options);
++ mnt = vfs_submount(mntpt, &afs_fs_type, devname, options);
+ _debug("--- mount result %p ---", mnt);
+
+ free_page((unsigned long) devname);
+--- a/fs/autofs4/waitq.c
++++ b/fs/autofs4/waitq.c
+@@ -431,8 +431,8 @@ int autofs4_wait(struct autofs_sb_info *
+ memcpy(&wq->name, &qstr, sizeof(struct qstr));
+ wq->dev = autofs4_get_dev(sbi);
+ wq->ino = autofs4_get_ino(sbi);
+- wq->uid = current_real_cred()->uid;
+- wq->gid = current_real_cred()->gid;
++ wq->uid = current_cred()->uid;
++ wq->gid = current_cred()->gid;
+ wq->pid = pid;
+ wq->tgid = tgid;
+ wq->status = -EINTR; /* Status return if interrupted */
+--- a/fs/cifs/cifs_dfs_ref.c
++++ b/fs/cifs/cifs_dfs_ref.c
+@@ -245,7 +245,8 @@ compose_mount_options_err:
+ * @fullpath: full path in UNC format
+ * @ref: server's referral
+ */
+-static struct vfsmount *cifs_dfs_do_refmount(struct cifs_sb_info *cifs_sb,
++static struct vfsmount *cifs_dfs_do_refmount(struct dentry *mntpt,
++ struct cifs_sb_info *cifs_sb,
+ const char *fullpath, const struct dfs_info3_param *ref)
+ {
+ struct vfsmount *mnt;
+@@ -259,7 +260,7 @@ static struct vfsmount *cifs_dfs_do_refm
+ if (IS_ERR(mountdata))
+ return (struct vfsmount *)mountdata;
+
+- mnt = vfs_kern_mount(&cifs_fs_type, 0, devname, mountdata);
++ mnt = vfs_submount(mntpt, &cifs_fs_type, devname, mountdata);
+ kfree(mountdata);
+ kfree(devname);
+ return mnt;
+@@ -334,7 +335,7 @@ static struct vfsmount *cifs_dfs_do_auto
+ mnt = ERR_PTR(-EINVAL);
+ break;
+ }
+- mnt = cifs_dfs_do_refmount(cifs_sb,
++ mnt = cifs_dfs_do_refmount(mntpt, cifs_sb,
+ full_path, referrals + i);
+ cifs_dbg(FYI, "%s: cifs_dfs_do_refmount:%s , mnt:%p\n",
+ __func__, referrals[i].node_name, mnt);
+--- a/fs/debugfs/inode.c
++++ b/fs/debugfs/inode.c
+@@ -187,9 +187,9 @@ static const struct super_operations deb
+
+ static struct vfsmount *debugfs_automount(struct path *path)
+ {
+- struct vfsmount *(*f)(void *);
+- f = (struct vfsmount *(*)(void *))path->dentry->d_fsdata;
+- return f(d_inode(path->dentry)->i_private);
++ debugfs_automount_t f;
++ f = (debugfs_automount_t)path->dentry->d_fsdata;
++ return f(path->dentry, d_inode(path->dentry)->i_private);
+ }
+
+ static const struct dentry_operations debugfs_dops = {
+@@ -504,7 +504,7 @@ EXPORT_SYMBOL_GPL(debugfs_create_dir);
+ */
+ struct dentry *debugfs_create_automount(const char *name,
+ struct dentry *parent,
+- struct vfsmount *(*f)(void *),
++ debugfs_automount_t f,
+ void *data)
+ {
+ struct dentry *dentry = start_creating(name, parent);
+--- a/fs/namei.c
++++ b/fs/namei.c
+@@ -1100,7 +1100,6 @@ static int follow_automount(struct path
+ bool *need_mntput)
+ {
+ struct vfsmount *mnt;
+- const struct cred *old_cred;
+ int err;
+
+ if (!path->dentry->d_op || !path->dentry->d_op->d_automount)
+@@ -1129,9 +1128,7 @@ static int follow_automount(struct path
+ if (nd->total_link_count >= 40)
+ return -ELOOP;
+
+- old_cred = override_creds(&init_cred);
+ mnt = path->dentry->d_op->d_automount(path);
+- revert_creds(old_cred);
+ if (IS_ERR(mnt)) {
+ /*
+ * The filesystem is allowed to return -EISDIR here to indicate
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -995,6 +995,21 @@ vfs_kern_mount(struct file_system_type *
+ }
+ EXPORT_SYMBOL_GPL(vfs_kern_mount);
+
++struct vfsmount *
++vfs_submount(const struct dentry *mountpoint, struct file_system_type *type,
++ const char *name, void *data)
++{
++ /* Until it is worked out how to pass the user namespace
++ * through from the parent mount to the submount don't support
++ * unprivileged mounts with submounts.
++ */
++ if (mountpoint->d_sb->s_user_ns != &init_user_ns)
++ return ERR_PTR(-EPERM);
++
++ return vfs_kern_mount(type, MS_SUBMOUNT, name, data);
++}
++EXPORT_SYMBOL_GPL(vfs_submount);
++
+ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
+ int flag)
+ {
+@@ -2779,7 +2794,7 @@ long do_mount(const char *dev_name, cons
+
+ flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN |
+ MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
+- MS_STRICTATIME | MS_NOREMOTELOCK);
++ MS_STRICTATIME | MS_NOREMOTELOCK | MS_SUBMOUNT);
+
+ if (flags & MS_REMOUNT)
+ retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags,
+--- a/fs/nfs/namespace.c
++++ b/fs/nfs/namespace.c
+@@ -226,7 +226,7 @@ static struct vfsmount *nfs_do_clone_mou
+ const char *devname,
+ struct nfs_clone_mount *mountdata)
+ {
+- return vfs_kern_mount(&nfs_xdev_fs_type, 0, devname, mountdata);
++ return vfs_submount(mountdata->dentry, &nfs_xdev_fs_type, devname, mountdata);
+ }
+
+ /**
+--- a/fs/nfs/nfs4namespace.c
++++ b/fs/nfs/nfs4namespace.c
+@@ -279,7 +279,7 @@ static struct vfsmount *try_location(str
+ mountdata->hostname,
+ mountdata->mnt_path);
+
+- mnt = vfs_kern_mount(&nfs4_referral_fs_type, 0, page, mountdata);
++ mnt = vfs_submount(mountdata->dentry, &nfs4_referral_fs_type, page, mountdata);
+ if (!IS_ERR(mnt))
+ break;
+ }
+--- a/fs/super.c
++++ b/fs/super.c
+@@ -470,7 +470,7 @@ struct super_block *sget_userns(struct f
+ struct super_block *old;
+ int err;
+
+- if (!(flags & MS_KERNMOUNT) &&
++ if (!(flags & (MS_KERNMOUNT|MS_SUBMOUNT)) &&
+ !(type->fs_flags & FS_USERNS_MOUNT) &&
+ !capable(CAP_SYS_ADMIN))
+ return ERR_PTR(-EPERM);
+@@ -500,7 +500,7 @@ retry:
+ }
+ if (!s) {
+ spin_unlock(&sb_lock);
+- s = alloc_super(type, flags, user_ns);
++ s = alloc_super(type, (flags & ~MS_SUBMOUNT), user_ns);
+ if (!s)
+ return ERR_PTR(-ENOMEM);
+ goto retry;
+@@ -541,8 +541,15 @@ struct super_block *sget(struct file_sys
+ {
+ struct user_namespace *user_ns = current_user_ns();
+
++ /* We don't yet pass the user namespace of the parent
++ * mount through to here so always use &init_user_ns
++ * until that changes.
++ */
++ if (flags & MS_SUBMOUNT)
++ user_ns = &init_user_ns;
++
+ /* Ensure the requestor has permissions over the target filesystem */
+- if (!(flags & MS_KERNMOUNT) && !ns_capable(user_ns, CAP_SYS_ADMIN))
++ if (!(flags & (MS_KERNMOUNT|MS_SUBMOUNT)) && !ns_capable(user_ns, CAP_SYS_ADMIN))
+ return ERR_PTR(-EPERM);
+
+ return sget_userns(type, test, set, flags, user_ns, data);
+--- a/include/linux/debugfs.h
++++ b/include/linux/debugfs.h
+@@ -96,9 +96,10 @@ struct dentry *debugfs_create_dir(const
+ struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent,
+ const char *dest);
+
++typedef struct vfsmount *(*debugfs_automount_t)(struct dentry *, void *);
+ struct dentry *debugfs_create_automount(const char *name,
+ struct dentry *parent,
+- struct vfsmount *(*f)(void *),
++ debugfs_automount_t f,
+ void *data);
+
+ void debugfs_remove(struct dentry *dentry);
+--- a/include/linux/mount.h
++++ b/include/linux/mount.h
+@@ -90,6 +90,9 @@ struct file_system_type;
+ extern struct vfsmount *vfs_kern_mount(struct file_system_type *type,
+ int flags, const char *name,
+ void *data);
++extern struct vfsmount *vfs_submount(const struct dentry *mountpoint,
++ struct file_system_type *type,
++ const char *name, void *data);
+
+ extern void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list);
+ extern void mark_mounts_for_expiry(struct list_head *mounts);
+--- a/include/uapi/linux/fs.h
++++ b/include/uapi/linux/fs.h
+@@ -132,6 +132,7 @@ struct inodes_stat_t {
+ #define MS_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */
+
+ /* These sb flags are internal to the kernel */
++#define MS_SUBMOUNT (1<<26)
+ #define MS_NOREMOTELOCK (1<<27)
+ #define MS_NOSEC (1<<28)
+ #define MS_BORN (1<<29)
+--- a/kernel/trace/trace.c
++++ b/kernel/trace/trace.c
+@@ -7241,7 +7241,7 @@ init_tracer_tracefs(struct trace_array *
+ ftrace_init_tracefs(tr, d_tracer);
+ }
+
+-static struct vfsmount *trace_automount(void *ingore)
++static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
+ {
+ struct vfsmount *mnt;
+ struct file_system_type *type;
+@@ -7254,7 +7254,7 @@ static struct vfsmount *trace_automount(
+ type = get_fs_type("tracefs");
+ if (!type)
+ return NULL;
+- mnt = vfs_kern_mount(type, 0, "tracefs", NULL);
++ mnt = vfs_submount(mntpt, type, "tracefs", NULL);
+ put_filesystem(type);
+ if (IS_ERR(mnt))
+ return NULL;
--- /dev/null
+From 2b0841766a898aba84630fb723989a77a9d3b4e6 Mon Sep 17 00:00:00 2001
+From: Erez Shitrit <erezsh@mellanox.com>
+Date: Wed, 1 Feb 2017 19:10:05 +0200
+Subject: IB/IPoIB: Add destination address when re-queue packet
+
+From: Erez Shitrit <erezsh@mellanox.com>
+
+commit 2b0841766a898aba84630fb723989a77a9d3b4e6 upstream.
+
+When sending packet to destination that was not resolved yet
+via path query, the driver keeps the skb and tries to re-send it
+again when the path is resolved.
+
+But when re-sending via dev_queue_xmit the kernel doesn't call
+to dev_hard_header, so IPoIB needs to keep 20 bytes in the skb
+and to put the destination address inside them.
+
+In that way the dev_start_xmit will have the correct destination,
+and the driver won't take the destination from the skb->data, while
+nothing exists there, which causes to packet be be dropped.
+
+The test flow is:
+1. Run the SM on remote node,
+2. Restart the driver.
+4. Ping some destination,
+3. Observe that first ICMP request will be dropped.
+
+Fixes: fc791b633515 ("IB/ipoib: move back IB LL address into the hard header")
+Signed-off-by: Erez Shitrit <erezsh@mellanox.com>
+Signed-off-by: Noa Osherovich <noaos@mellanox.com>
+Signed-off-by: Leon Romanovsky <leon@kernel.org>
+Tested-by: Yuval Shaia <yuval.shaia@oracle.com>
+Signed-off-by: Doug Ledford <dledford@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/infiniband/ulp/ipoib/ipoib_main.c | 30 +++++++++++++++++-------------
+ 1 file changed, 17 insertions(+), 13 deletions(-)
+
+--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
++++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
+@@ -701,6 +701,14 @@ int ipoib_check_sm_sendonly_fullmember_s
+ return ret;
+ }
+
++static void push_pseudo_header(struct sk_buff *skb, const char *daddr)
++{
++ struct ipoib_pseudo_header *phdr;
++
++ phdr = (struct ipoib_pseudo_header *)skb_push(skb, sizeof(*phdr));
++ memcpy(phdr->hwaddr, daddr, INFINIBAND_ALEN);
++}
++
+ void ipoib_flush_paths(struct net_device *dev)
+ {
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+@@ -925,8 +933,7 @@ static void neigh_add_path(struct sk_buf
+ }
+ if (skb_queue_len(&neigh->queue) <
+ IPOIB_MAX_PATH_REC_QUEUE) {
+- /* put pseudoheader back on for next time */
+- skb_push(skb, IPOIB_PSEUDO_LEN);
++ push_pseudo_header(skb, neigh->daddr);
+ __skb_queue_tail(&neigh->queue, skb);
+ } else {
+ ipoib_warn(priv, "queue length limit %d. Packet drop.\n",
+@@ -944,10 +951,12 @@ static void neigh_add_path(struct sk_buf
+
+ if (!path->query && path_rec_start(dev, path))
+ goto err_path;
+- if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE)
++ if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
++ push_pseudo_header(skb, neigh->daddr);
+ __skb_queue_tail(&neigh->queue, skb);
+- else
++ } else {
+ goto err_drop;
++ }
+ }
+
+ spin_unlock_irqrestore(&priv->lock, flags);
+@@ -983,8 +992,7 @@ static void unicast_arp_send(struct sk_b
+ }
+ if (path) {
+ if (skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
+- /* put pseudoheader back on for next time */
+- skb_push(skb, IPOIB_PSEUDO_LEN);
++ push_pseudo_header(skb, phdr->hwaddr);
+ __skb_queue_tail(&path->queue, skb);
+ } else {
+ ++dev->stats.tx_dropped;
+@@ -1016,8 +1024,7 @@ static void unicast_arp_send(struct sk_b
+ return;
+ } else if ((path->query || !path_rec_start(dev, path)) &&
+ skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
+- /* put pseudoheader back on for next time */
+- skb_push(skb, IPOIB_PSEUDO_LEN);
++ push_pseudo_header(skb, phdr->hwaddr);
+ __skb_queue_tail(&path->queue, skb);
+ } else {
+ ++dev->stats.tx_dropped;
+@@ -1098,8 +1105,7 @@ send_using_neigh:
+ }
+
+ if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
+- /* put pseudoheader back on for next time */
+- skb_push(skb, sizeof(*phdr));
++ push_pseudo_header(skb, phdr->hwaddr);
+ spin_lock_irqsave(&priv->lock, flags);
+ __skb_queue_tail(&neigh->queue, skb);
+ spin_unlock_irqrestore(&priv->lock, flags);
+@@ -1131,7 +1137,6 @@ static int ipoib_hard_header(struct sk_b
+ unsigned short type,
+ const void *daddr, const void *saddr, unsigned len)
+ {
+- struct ipoib_pseudo_header *phdr;
+ struct ipoib_header *header;
+
+ header = (struct ipoib_header *) skb_push(skb, sizeof *header);
+@@ -1144,8 +1149,7 @@ static int ipoib_hard_header(struct sk_b
+ * destination address into skb hard header so we can figure out where
+ * to send the packet later.
+ */
+- phdr = (struct ipoib_pseudo_header *) skb_push(skb, sizeof(*phdr));
+- memcpy(phdr->hwaddr, daddr, INFINIBAND_ALEN);
++ push_pseudo_header(skb, daddr);
+
+ return IPOIB_HARD_LEN;
+ }
--- /dev/null
+From 0a0007f28304cb9fc87809c86abb80ec71317f20 Mon Sep 17 00:00:00 2001
+From: Feras Daoud <ferasda@mellanox.com>
+Date: Wed, 28 Dec 2016 14:47:23 +0200
+Subject: IB/ipoib: Fix deadlock between rmmod and set_mode
+
+From: Feras Daoud <ferasda@mellanox.com>
+
+commit 0a0007f28304cb9fc87809c86abb80ec71317f20 upstream.
+
+When calling set_mode from sys/fs, the call flow locks the sys/fs lock
+first and then tries to lock rtnl_lock (when calling ipoib_set_mod).
+On the other hand, the rmmod call flow takes the rtnl_lock first
+(when calling unregister_netdev) and then tries to take the sys/fs
+lock. Deadlock a->b, b->a.
+
+The problem starts when ipoib_set_mod frees it's rtnl_lck and tries
+to get it after that.
+
+ set_mod:
+ [<ffffffff8104f2bd>] ? check_preempt_curr+0x6d/0x90
+ [<ffffffff814fee8e>] __mutex_lock_slowpath+0x13e/0x180
+ [<ffffffff81448655>] ? __rtnl_unlock+0x15/0x20
+ [<ffffffff814fed2b>] mutex_lock+0x2b/0x50
+ [<ffffffff81448675>] rtnl_lock+0x15/0x20
+ [<ffffffffa02ad807>] ipoib_set_mode+0x97/0x160 [ib_ipoib]
+ [<ffffffffa02b5f5b>] set_mode+0x3b/0x80 [ib_ipoib]
+ [<ffffffff8134b840>] dev_attr_store+0x20/0x30
+ [<ffffffff811f0fe5>] sysfs_write_file+0xe5/0x170
+ [<ffffffff8117b068>] vfs_write+0xb8/0x1a0
+ [<ffffffff8117ba81>] sys_write+0x51/0x90
+ [<ffffffff8100b0f2>] system_call_fastpath+0x16/0x1b
+
+ rmmod:
+ [<ffffffff81279ffc>] ? put_dec+0x10c/0x110
+ [<ffffffff8127a2ee>] ? number+0x2ee/0x320
+ [<ffffffff814fe6a5>] schedule_timeout+0x215/0x2e0
+ [<ffffffff8127cc04>] ? vsnprintf+0x484/0x5f0
+ [<ffffffff8127b550>] ? string+0x40/0x100
+ [<ffffffff814fe323>] wait_for_common+0x123/0x180
+ [<ffffffff81060250>] ? default_wake_function+0x0/0x20
+ [<ffffffff8119661e>] ? ifind_fast+0x5e/0xb0
+ [<ffffffff814fe43d>] wait_for_completion+0x1d/0x20
+ [<ffffffff811f2e68>] sysfs_addrm_finish+0x228/0x270
+ [<ffffffff811f2fb3>] sysfs_remove_dir+0xa3/0xf0
+ [<ffffffff81273f66>] kobject_del+0x16/0x40
+ [<ffffffff8134cd14>] device_del+0x184/0x1e0
+ [<ffffffff8144e59b>] netdev_unregister_kobject+0xab/0xc0
+ [<ffffffff8143c05e>] rollback_registered+0xae/0x130
+ [<ffffffff8143c102>] unregister_netdevice+0x22/0x70
+ [<ffffffff8143c16e>] unregister_netdev+0x1e/0x30
+ [<ffffffffa02a91b0>] ipoib_remove_one+0xe0/0x120 [ib_ipoib]
+ [<ffffffffa01ed95f>] ib_unregister_device+0x4f/0x100 [ib_core]
+ [<ffffffffa021f5e1>] mlx4_ib_remove+0x41/0x180 [mlx4_ib]
+ [<ffffffffa01ab771>] mlx4_remove_device+0x71/0x90 [mlx4_core]
+
+Fixes: 862096a8bbf8 ("IB/ipoib: Add more rtnl_link_ops callbacks")
+Cc: Or Gerlitz <ogerlitz@mellanox.com>
+Signed-off-by: Feras Daoud <ferasda@mellanox.com>
+Signed-off-by: Erez Shitrit <erezsh@mellanox.com>
+Signed-off-by: Leon Romanovsky <leon@kernel.org>
+Signed-off-by: Doug Ledford <dledford@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/infiniband/ulp/ipoib/ipoib_cm.c | 12 +++++++-----
+ drivers/infiniband/ulp/ipoib/ipoib_main.c | 6 ++----
+ 2 files changed, 9 insertions(+), 9 deletions(-)
+
+--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
++++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+@@ -1511,12 +1511,14 @@ static ssize_t set_mode(struct device *d
+
+ ret = ipoib_set_mode(dev, buf);
+
+- rtnl_unlock();
++ /* The assumption is that the function ipoib_set_mode returned
++ * with the rtnl held by it, if not the value -EBUSY returned,
++ * then no need to rtnl_unlock
++ */
++ if (ret != -EBUSY)
++ rtnl_unlock();
+
+- if (!ret)
+- return count;
+-
+- return ret;
++ return (!ret || ret == -EBUSY) ? count : ret;
+ }
+
+ static DEVICE_ATTR(mode, S_IWUSR | S_IRUGO, show_mode, set_mode);
+--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
++++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
+@@ -468,8 +468,7 @@ int ipoib_set_mode(struct net_device *de
+ priv->tx_wr.wr.send_flags &= ~IB_SEND_IP_CSUM;
+
+ ipoib_flush_paths(dev);
+- rtnl_lock();
+- return 0;
++ return (!rtnl_trylock()) ? -EBUSY : 0;
+ }
+
+ if (!strcmp(buf, "datagram\n")) {
+@@ -478,8 +477,7 @@ int ipoib_set_mode(struct net_device *de
+ dev_set_mtu(dev, min(priv->mcast_mtu, dev->mtu));
+ rtnl_unlock();
+ ipoib_flush_paths(dev);
+- rtnl_lock();
+- return 0;
++ return (!rtnl_trylock()) ? -EBUSY : 0;
+ }
+
+ return -EINVAL;
--- /dev/null
+From 0fd27a88c2e4f548937fd7d93fc6e65c4ad7c278 Mon Sep 17 00:00:00 2001
+From: Leon Romanovsky <leonro@mellanox.com>
+Date: Wed, 18 Jan 2017 14:10:30 +0200
+Subject: IB/mlx5: Fix out-of-bound access
+
+From: Leon Romanovsky <leonro@mellanox.com>
+
+commit 0fd27a88c2e4f548937fd7d93fc6e65c4ad7c278 upstream.
+
+When we initialize buffer to create SRQ in kernel,
+the number of pages was less than actually used in
+following mlx5_fill_page_array().
+
+Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters")
+Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
+Reviewed-by: Eli Cohen <eli@mellanox.com>
+Signed-off-by: Leon Romanovsky <leon@kernel.org>
+Signed-off-by: Doug Ledford <dledford@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/infiniband/hw/mlx5/srq.c | 11 +++--------
+ 1 file changed, 3 insertions(+), 8 deletions(-)
+
+--- a/drivers/infiniband/hw/mlx5/srq.c
++++ b/drivers/infiniband/hw/mlx5/srq.c
+@@ -165,8 +165,6 @@ static int create_srq_kernel(struct mlx5
+ int err;
+ int i;
+ struct mlx5_wqe_srq_next_seg *next;
+- int page_shift;
+- int npages;
+
+ err = mlx5_db_alloc(dev->mdev, &srq->db);
+ if (err) {
+@@ -179,7 +177,6 @@ static int create_srq_kernel(struct mlx5
+ err = -ENOMEM;
+ goto err_db;
+ }
+- page_shift = srq->buf.page_shift;
+
+ srq->head = 0;
+ srq->tail = srq->msrq.max - 1;
+@@ -191,10 +188,8 @@ static int create_srq_kernel(struct mlx5
+ cpu_to_be16((i + 1) & (srq->msrq.max - 1));
+ }
+
+- npages = DIV_ROUND_UP(srq->buf.npages, 1 << (page_shift - PAGE_SHIFT));
+- mlx5_ib_dbg(dev, "buf_size %d, page_shift %d, npages %d, calc npages %d\n",
+- buf_size, page_shift, srq->buf.npages, npages);
+- in->pas = mlx5_vzalloc(sizeof(*in->pas) * npages);
++ mlx5_ib_dbg(dev, "srq->buf.page_shift = %d\n", srq->buf.page_shift);
++ in->pas = mlx5_vzalloc(sizeof(*in->pas) * srq->buf.npages);
+ if (!in->pas) {
+ err = -ENOMEM;
+ goto err_buf;
+@@ -210,7 +205,7 @@ static int create_srq_kernel(struct mlx5
+ }
+ srq->wq_sig = !!srq_signature;
+
+- in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
++ in->log_page_size = srq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT;
+ if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 &&
+ in->type == IB_SRQT_XRC)
+ in->user_index = MLX5_IB_DEFAULT_UIDX;
--- /dev/null
+From 6cb72bc1b40bb2c1750ee7a5ebade93bed49a5fb Mon Sep 17 00:00:00 2001
+From: Bart Van Assche <bart.vanassche@sandisk.com>
+Date: Tue, 14 Feb 2017 10:56:30 -0800
+Subject: IB/srp: Avoid that duplicate responses trigger a kernel bug
+
+From: Bart Van Assche <bart.vanassche@sandisk.com>
+
+commit 6cb72bc1b40bb2c1750ee7a5ebade93bed49a5fb upstream.
+
+After srp_process_rsp() returns there is a short time during which
+the scsi_host_find_tag() call will return a pointer to the SCSI
+command that is being completed. If during that time a duplicate
+response is received, avoid that the following call stack appears:
+
+BUG: unable to handle kernel NULL pointer dereference at (null)
+IP: srp_recv_done+0x450/0x6b0 [ib_srp]
+Oops: 0000 [#1] SMP
+CPU: 10 PID: 0 Comm: swapper/10 Not tainted 4.10.0-rc7-dbg+ #1
+Call Trace:
+ <IRQ>
+ __ib_process_cq+0x4b/0xd0 [ib_core]
+ ib_poll_handler+0x1d/0x70 [ib_core]
+ irq_poll_softirq+0xba/0x120
+ __do_softirq+0xba/0x4c0
+ irq_exit+0xbe/0xd0
+ smp_apic_timer_interrupt+0x38/0x50
+ apic_timer_interrupt+0x90/0xa0
+ </IRQ>
+RIP: srp_recv_done+0x450/0x6b0 [ib_srp] RSP: ffff88046f483e20
+
+Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
+Cc: Israel Rukshin <israelr@mellanox.com>
+Cc: Max Gurtovoy <maxg@mellanox.com>
+Cc: Laurence Oberman <loberman@redhat.com>
+Cc: Steve Feeley <Steve.Feeley@sandisk.com>
+Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
+Signed-off-by: Doug Ledford <dledford@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/infiniband/ulp/srp/ib_srp.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/drivers/infiniband/ulp/srp/ib_srp.c
++++ b/drivers/infiniband/ulp/srp/ib_srp.c
+@@ -1880,9 +1880,11 @@ static void srp_process_rsp(struct srp_r
+ complete(&ch->tsk_mgmt_done);
+ } else {
+ scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag);
+- if (scmnd) {
++ if (scmnd && scmnd->host_scribble) {
+ req = (void *)scmnd->host_scribble;
+ scmnd = srp_claim_req(ch, req, NULL, scmnd);
++ } else {
++ scmnd = NULL;
+ }
+ if (!scmnd) {
+ shost_printk(KERN_ERR, target->scsi_host,
--- /dev/null
+From d6c58dc40fec35ff6cdb350b53bce0fcf9143709 Mon Sep 17 00:00:00 2001
+From: Bart Van Assche <bart.vanassche@sandisk.com>
+Date: Tue, 14 Feb 2017 10:56:29 -0800
+Subject: IB/SRP: Avoid using IB_MR_TYPE_SG_GAPS
+
+From: Bart Van Assche <bart.vanassche@sandisk.com>
+
+commit d6c58dc40fec35ff6cdb350b53bce0fcf9143709 upstream.
+
+Tests have shown that the following error message is reported when
+using SG-GAPS registration with an mlx5 adapter:
+
+scsi host1: ib_srp: failed RECV status WR flushed (5) for CQE ffff880bd4270eb0
+00000000 00000000 00000000 00000000
+00000000 00000000 00000000 00000000
+00000000 00000000 00000000 00000000
+00000000 0f007806 2500002a ad9fafd1
+scsi host1: ib_srp: reconnect succeeded
+mlx5_0:dump_cqe:262:(pid 7369): dump error cqe
+00000000 00000000 00000000 00000000
+00000000 00000000 00000000 00000000
+00000000 00000000 00000000 00000000
+00000000 0f007806 25000032 00105dd0
+scsi host1: ib_srp: failed FAST REG status memory management operation error (6) for CQE ffff880b92860138
+
+Hence avoid using SG-GAPS memory registrations. Additionally,
+always configure the blk_queue_virt_boundary() to avoid to trigger
+a mapping failure when using adapters that support SG-GAPS (e.g.
+mlx5).
+
+Fixes: commit ad8e66b4a801 ("IB/srp: fix mr allocation when the device supports sg gaps")
+Fixes: commit 509c5f33f4f6 ("IB/srp: Prevent mapping failures")
+Reported-by: Laurence Oberman <loberman@redhat.com>
+Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
+Cc: Israel Rukshin <israelr@mellanox.com>
+Cc: Max Gurtovoy <maxg@mellanox.com>
+Cc: Leon Romanovsky <leonro@mellanox.com>
+Cc: Mark Bloch <markb@mellanox.com>
+Cc: Yuval Shaia <yuval.shaia@oracle.com>
+Signed-off-by: Doug Ledford <dledford@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/infiniband/ulp/srp/ib_srp.c | 12 +++---------
+ 1 file changed, 3 insertions(+), 9 deletions(-)
+
+--- a/drivers/infiniband/ulp/srp/ib_srp.c
++++ b/drivers/infiniband/ulp/srp/ib_srp.c
+@@ -366,7 +366,6 @@ static struct srp_fr_pool *srp_create_fr
+ struct srp_fr_desc *d;
+ struct ib_mr *mr;
+ int i, ret = -EINVAL;
+- enum ib_mr_type mr_type;
+
+ if (pool_size <= 0)
+ goto err;
+@@ -380,13 +379,9 @@ static struct srp_fr_pool *srp_create_fr
+ spin_lock_init(&pool->lock);
+ INIT_LIST_HEAD(&pool->free_list);
+
+- if (device->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)
+- mr_type = IB_MR_TYPE_SG_GAPS;
+- else
+- mr_type = IB_MR_TYPE_MEM_REG;
+-
+ for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
+- mr = ib_alloc_mr(pd, mr_type, max_page_list_len);
++ mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG,
++ max_page_list_len);
+ if (IS_ERR(mr)) {
+ ret = PTR_ERR(mr);
+ goto destroy_pool;
+@@ -2652,9 +2647,8 @@ static int srp_slave_alloc(struct scsi_d
+ struct Scsi_Host *shost = sdev->host;
+ struct srp_target_port *target = host_to_target(shost);
+ struct srp_device *srp_dev = target->srp_host->srp_dev;
+- struct ib_device *ibdev = srp_dev->dev;
+
+- if (!(ibdev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG))
++ if (true)
+ blk_queue_virt_boundary(sdev->request_queue,
+ ~srp_dev->mr_page_mask);
+
--- /dev/null
+From 0a6fdbdeb1c25e31763c1fb333fa2723a7d2aba6 Mon Sep 17 00:00:00 2001
+From: Bart Van Assche <bart.vanassche@sandisk.com>
+Date: Tue, 14 Feb 2017 10:56:31 -0800
+Subject: IB/srp: Fix race conditions related to task management
+
+From: Bart Van Assche <bart.vanassche@sandisk.com>
+
+commit 0a6fdbdeb1c25e31763c1fb333fa2723a7d2aba6 upstream.
+
+Avoid that srp_process_rsp() overwrites the status information
+in ch if the SRP target response timed out and processing of
+another task management function has already started. Avoid that
+issuing multiple task management functions concurrently triggers
+list corruption. This patch prevents that the following stack
+trace appears in the system log:
+
+WARNING: CPU: 8 PID: 9269 at lib/list_debug.c:52 __list_del_entry_valid+0xbc/0xc0
+list_del corruption. prev->next should be ffffc90004bb7b00, but was ffff8804052ecc68
+CPU: 8 PID: 9269 Comm: sg_reset Tainted: G W 4.10.0-rc7-dbg+ #3
+Call Trace:
+ dump_stack+0x68/0x93
+ __warn+0xc6/0xe0
+ warn_slowpath_fmt+0x4a/0x50
+ __list_del_entry_valid+0xbc/0xc0
+ wait_for_completion_timeout+0x12e/0x170
+ srp_send_tsk_mgmt+0x1ef/0x2d0 [ib_srp]
+ srp_reset_device+0x5b/0x110 [ib_srp]
+ scsi_ioctl_reset+0x1c7/0x290
+ scsi_ioctl+0x12a/0x420
+ sd_ioctl+0x9d/0x100
+ blkdev_ioctl+0x51e/0x9f0
+ block_ioctl+0x38/0x40
+ do_vfs_ioctl+0x8f/0x700
+ SyS_ioctl+0x3c/0x70
+ entry_SYSCALL_64_fastpath+0x18/0xad
+
+Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
+Cc: Israel Rukshin <israelr@mellanox.com>
+Cc: Max Gurtovoy <maxg@mellanox.com>
+Cc: Laurence Oberman <loberman@redhat.com>
+Cc: Steve Feeley <Steve.Feeley@sandisk.com>
+Signed-off-by: Doug Ledford <dledford@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/infiniband/ulp/srp/ib_srp.c | 45 +++++++++++++++++++++++-------------
+ drivers/infiniband/ulp/srp/ib_srp.h | 1
+ 2 files changed, 30 insertions(+), 16 deletions(-)
+
+--- a/drivers/infiniband/ulp/srp/ib_srp.c
++++ b/drivers/infiniband/ulp/srp/ib_srp.c
+@@ -1872,12 +1872,17 @@ static void srp_process_rsp(struct srp_r
+ if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) {
+ spin_lock_irqsave(&ch->lock, flags);
+ ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
++ if (rsp->tag == ch->tsk_mgmt_tag) {
++ ch->tsk_mgmt_status = -1;
++ if (be32_to_cpu(rsp->resp_data_len) >= 4)
++ ch->tsk_mgmt_status = rsp->data[3];
++ complete(&ch->tsk_mgmt_done);
++ } else {
++ shost_printk(KERN_ERR, target->scsi_host,
++ "Received tsk mgmt response too late for tag %#llx\n",
++ rsp->tag);
++ }
+ spin_unlock_irqrestore(&ch->lock, flags);
+-
+- ch->tsk_mgmt_status = -1;
+- if (be32_to_cpu(rsp->resp_data_len) >= 4)
+- ch->tsk_mgmt_status = rsp->data[3];
+- complete(&ch->tsk_mgmt_done);
+ } else {
+ scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag);
+ if (scmnd && scmnd->host_scribble) {
+@@ -2516,19 +2521,18 @@ srp_change_queue_depth(struct scsi_devic
+ }
+
+ static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
+- u8 func)
++ u8 func, u8 *status)
+ {
+ struct srp_target_port *target = ch->target;
+ struct srp_rport *rport = target->rport;
+ struct ib_device *dev = target->srp_host->srp_dev->dev;
+ struct srp_iu *iu;
+ struct srp_tsk_mgmt *tsk_mgmt;
++ int res;
+
+ if (!ch->connected || target->qp_in_error)
+ return -1;
+
+- init_completion(&ch->tsk_mgmt_done);
+-
+ /*
+ * Lock the rport mutex to avoid that srp_create_ch_ib() is
+ * invoked while a task management function is being sent.
+@@ -2551,10 +2555,16 @@ static int srp_send_tsk_mgmt(struct srp_
+
+ tsk_mgmt->opcode = SRP_TSK_MGMT;
+ int_to_scsilun(lun, &tsk_mgmt->lun);
+- tsk_mgmt->tag = req_tag | SRP_TAG_TSK_MGMT;
+ tsk_mgmt->tsk_mgmt_func = func;
+ tsk_mgmt->task_tag = req_tag;
+
++ spin_lock_irq(&ch->lock);
++ ch->tsk_mgmt_tag = (ch->tsk_mgmt_tag + 1) | SRP_TAG_TSK_MGMT;
++ tsk_mgmt->tag = ch->tsk_mgmt_tag;
++ spin_unlock_irq(&ch->lock);
++
++ init_completion(&ch->tsk_mgmt_done);
++
+ ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt,
+ DMA_TO_DEVICE);
+ if (srp_post_send(ch, iu, sizeof(*tsk_mgmt))) {
+@@ -2563,13 +2573,15 @@ static int srp_send_tsk_mgmt(struct srp_
+
+ return -1;
+ }
++ res = wait_for_completion_timeout(&ch->tsk_mgmt_done,
++ msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS));
++ if (res > 0 && status)
++ *status = ch->tsk_mgmt_status;
+ mutex_unlock(&rport->mutex);
+
+- if (!wait_for_completion_timeout(&ch->tsk_mgmt_done,
+- msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS)))
+- return -1;
++ WARN_ON_ONCE(res < 0);
+
+- return 0;
++ return res > 0 ? 0 : -1;
+ }
+
+ static int srp_abort(struct scsi_cmnd *scmnd)
+@@ -2595,7 +2607,7 @@ static int srp_abort(struct scsi_cmnd *s
+ shost_printk(KERN_ERR, target->scsi_host,
+ "Sending SRP abort for tag %#x\n", tag);
+ if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun,
+- SRP_TSK_ABORT_TASK) == 0)
++ SRP_TSK_ABORT_TASK, NULL) == 0)
+ ret = SUCCESS;
+ else if (target->rport->state == SRP_RPORT_LOST)
+ ret = FAST_IO_FAIL;
+@@ -2613,14 +2625,15 @@ static int srp_reset_device(struct scsi_
+ struct srp_target_port *target = host_to_target(scmnd->device->host);
+ struct srp_rdma_ch *ch;
+ int i;
++ u8 status;
+
+ shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n");
+
+ ch = &target->ch[0];
+ if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun,
+- SRP_TSK_LUN_RESET))
++ SRP_TSK_LUN_RESET, &status))
+ return FAILED;
+- if (ch->tsk_mgmt_status)
++ if (status)
+ return FAILED;
+
+ for (i = 0; i < target->ch_count; i++) {
+--- a/drivers/infiniband/ulp/srp/ib_srp.h
++++ b/drivers/infiniband/ulp/srp/ib_srp.h
+@@ -163,6 +163,7 @@ struct srp_rdma_ch {
+ int max_ti_iu_len;
+ int comp_vector;
+
++ u64 tsk_mgmt_tag;
+ struct completion tsk_mgmt_done;
+ u8 tsk_mgmt_status;
+ bool connected;
--- /dev/null
+From 32677207dcc5e594254b7fb4fb2352b1755b1d5b Mon Sep 17 00:00:00 2001
+From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
+Date: Tue, 7 Feb 2017 12:05:25 -0500
+Subject: ktest: Fix child exit code processing
+
+From: Steven Rostedt (VMware) <rostedt@goodmis.org>
+
+commit 32677207dcc5e594254b7fb4fb2352b1755b1d5b upstream.
+
+The child_exit errno needs to be shifted by 8 bits to compare against the
+return values for the bisect variables.
+
+Fixes: c5dacb88f0a64 ("ktest: Allow overriding bisect test results")
+Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ tools/testing/ktest/ktest.pl | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/tools/testing/ktest/ktest.pl
++++ b/tools/testing/ktest/ktest.pl
+@@ -2629,7 +2629,7 @@ sub do_run_test {
+ }
+
+ waitpid $child_pid, 0;
+- $child_exit = $?;
++ $child_exit = $? >> 8;
+
+ my $end_time = time;
+ $test_time = $end_time - $start_time;
--- /dev/null
+From e1e8a9624f7ba8ead4f056ff558ed070e86fa747 Mon Sep 17 00:00:00 2001
+From: Janosch Frank <frankja@linux.vnet.ibm.com>
+Date: Thu, 2 Feb 2017 16:39:31 +0100
+Subject: KVM: s390: Disable dirty log retrieval for UCONTROL guests
+
+From: Janosch Frank <frankja@linux.vnet.ibm.com>
+
+commit e1e8a9624f7ba8ead4f056ff558ed070e86fa747 upstream.
+
+User controlled KVM guests do not support the dirty log, as they have
+no single gmap that we can check for changes.
+
+As they have no single gmap, kvm->arch.gmap is NULL and all further
+referencing to it for dirty checking will result in a NULL
+dereference.
+
+Let's return -EINVAL if a caller tries to sync dirty logs for a
+UCONTROL guest.
+
+Fixes: 15f36eb ("KVM: s390: Add proper dirty bitmap support to S390 kvm.")
+Signed-off-by: Janosch Frank <frankja@linux.vnet.ibm.com>
+Reported-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
+Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/s390/kvm/kvm-s390.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/arch/s390/kvm/kvm-s390.c
++++ b/arch/s390/kvm/kvm-s390.c
+@@ -442,6 +442,9 @@ int kvm_vm_ioctl_get_dirty_log(struct kv
+ struct kvm_memory_slot *memslot;
+ int is_dirty = 0;
+
++ if (kvm_is_ucontrol(kvm))
++ return -EINVAL;
++
+ mutex_lock(&kvm->slots_lock);
+
+ r = -EINVAL;
--- /dev/null
+From 96794e4ed4d758272c486e1529e431efb7045265 Mon Sep 17 00:00:00 2001
+From: Chao Peng <chao.p.peng@linux.intel.com>
+Date: Tue, 21 Feb 2017 03:50:01 -0500
+Subject: KVM: VMX: use correct vmcs_read/write for guest segment selector/base
+
+From: Chao Peng <chao.p.peng@linux.intel.com>
+
+commit 96794e4ed4d758272c486e1529e431efb7045265 upstream.
+
+Guest segment selector is 16 bit field and guest segment base is natural
+width field. Fix two incorrect invocations accordingly.
+
+Without this patch, build fails when aggressive inlining is used with ICC.
+
+Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kvm/vmx.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -3693,7 +3693,7 @@ static void fix_rmode_seg(int seg, struc
+ }
+
+ vmcs_write16(sf->selector, var.selector);
+- vmcs_write32(sf->base, var.base);
++ vmcs_writel(sf->base, var.base);
+ vmcs_write32(sf->limit, var.limit);
+ vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(&var));
+ }
+@@ -8202,7 +8202,7 @@ static void kvm_flush_pml_buffers(struct
+ static void vmx_dump_sel(char *name, uint32_t sel)
+ {
+ pr_err("%s sel=0x%04x, attr=0x%05x, limit=0x%08x, base=0x%016lx\n",
+- name, vmcs_read32(sel),
++ name, vmcs_read16(sel),
+ vmcs_read32(sel + GUEST_ES_AR_BYTES - GUEST_ES_SELECTOR),
+ vmcs_read32(sel + GUEST_ES_LIMIT - GUEST_ES_SELECTOR),
+ vmcs_readl(sel + GUEST_ES_BASE - GUEST_ES_SELECTOR));
--- /dev/null
+From 890030d3c425f49abaa4acf60e20f288b599f980 Mon Sep 17 00:00:00 2001
+From: Felix Fietkau <nbd@nbd.name>
+Date: Wed, 22 Feb 2017 16:16:07 +0100
+Subject: mac80211: don't handle filtered frames within a BA session
+
+From: Felix Fietkau <nbd@nbd.name>
+
+commit 890030d3c425f49abaa4acf60e20f288b599f980 upstream.
+
+When running a BA session, the driver (or the hardware) already takes
+care of retransmitting failed frames, since it has to keep the receiver
+reorder window in sync.
+
+Adding another layer of retransmit around that does not improve
+anything. In fact, it can only lead to some strong reordering with huge
+latency.
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/mac80211/status.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/mac80211/status.c
++++ b/net/mac80211/status.c
+@@ -51,7 +51,8 @@ static void ieee80211_handle_filtered_fr
+ struct ieee80211_hdr *hdr = (void *)skb->data;
+ int ac;
+
+- if (info->flags & IEEE80211_TX_CTL_NO_PS_BUFFER) {
++ if (info->flags & (IEEE80211_TX_CTL_NO_PS_BUFFER |
++ IEEE80211_TX_CTL_AMPDU)) {
+ ieee80211_free_txskb(&local->hw, skb);
+ return;
+ }
--- /dev/null
+From b7540d8f25c8034de7e4163fc23ac457bf057731 Mon Sep 17 00:00:00 2001
+From: Sara Sharon <sara.sharon@intel.com>
+Date: Mon, 6 Feb 2017 15:28:42 +0200
+Subject: mac80211: don't reorder frames with SN smaller than SSN
+
+From: Sara Sharon <sara.sharon@intel.com>
+
+commit b7540d8f25c8034de7e4163fc23ac457bf057731 upstream.
+
+When RX aggregation starts, transmitter may continue send frames
+with SN smaller than SSN until the AddBA response is received.
+However, the reorder buffer is already initialized at this point,
+which will cause the drop of such frames as duplicates since the
+head SN of the reorder buffer is set to the SSN, which is bigger.
+
+Signed-off-by: Sara Sharon <sara.sharon@intel.com>
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/mac80211/agg-rx.c | 1 +
+ net/mac80211/rx.c | 14 +++++++++++++-
+ net/mac80211/sta_info.h | 6 ++++--
+ 3 files changed, 18 insertions(+), 3 deletions(-)
+
+--- a/net/mac80211/agg-rx.c
++++ b/net/mac80211/agg-rx.c
+@@ -398,6 +398,7 @@ void __ieee80211_start_rx_ba_session(str
+ tid_agg_rx->timeout = timeout;
+ tid_agg_rx->stored_mpdu_num = 0;
+ tid_agg_rx->auto_seq = auto_seq;
++ tid_agg_rx->started = false;
+ tid_agg_rx->reorder_buf_filtered = 0;
+ status = WLAN_STATUS_SUCCESS;
+
+--- a/net/mac80211/rx.c
++++ b/net/mac80211/rx.c
+@@ -4,7 +4,7 @@
+ * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
+ * Copyright 2007-2010 Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2013-2014 Intel Mobile Communications GmbH
+- * Copyright(c) 2015 - 2016 Intel Deutschland GmbH
++ * Copyright(c) 2015 - 2017 Intel Deutschland GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+@@ -1034,6 +1034,18 @@ static bool ieee80211_sta_manage_reorder
+ buf_size = tid_agg_rx->buf_size;
+ head_seq_num = tid_agg_rx->head_seq_num;
+
++ /*
++ * If the current MPDU's SN is smaller than the SSN, it shouldn't
++ * be reordered.
++ */
++ if (unlikely(!tid_agg_rx->started)) {
++ if (ieee80211_sn_less(mpdu_seq_num, head_seq_num)) {
++ ret = false;
++ goto out;
++ }
++ tid_agg_rx->started = true;
++ }
++
+ /* frame with out of date sequence number */
+ if (ieee80211_sn_less(mpdu_seq_num, head_seq_num)) {
+ dev_kfree_skb(skb);
+--- a/net/mac80211/sta_info.h
++++ b/net/mac80211/sta_info.h
+@@ -189,6 +189,7 @@ struct tid_ampdu_tx {
+ * @auto_seq: used for offloaded BA sessions to automatically pick head_seq_and
+ * and ssn.
+ * @removed: this session is removed (but might have been found due to RCU)
++ * @started: this session has started (head ssn or higher was received)
+ *
+ * This structure's lifetime is managed by RCU, assignments to
+ * the array holding it must hold the aggregation mutex.
+@@ -212,8 +213,9 @@ struct tid_ampdu_rx {
+ u16 ssn;
+ u16 buf_size;
+ u16 timeout;
+- bool auto_seq;
+- bool removed;
++ u8 auto_seq:1,
++ removed:1,
++ started:1;
+ };
+
+ /**
--- /dev/null
+From a9e9200d8661c1a0be8c39f93deb383dc940de35 Mon Sep 17 00:00:00 2001
+From: Matt Chen <matt.chen@intel.com>
+Date: Sun, 22 Jan 2017 02:16:58 +0800
+Subject: mac80211: flush delayed work when entering suspend
+
+From: Matt Chen <matt.chen@intel.com>
+
+commit a9e9200d8661c1a0be8c39f93deb383dc940de35 upstream.
+
+The issue was found when entering suspend and resume.
+It triggers a warning in:
+mac80211/key.c: ieee80211_enable_keys()
+...
+WARN_ON_ONCE(sdata->crypto_tx_tailroom_needed_cnt ||
+ sdata->crypto_tx_tailroom_pending_dec);
+...
+
+It points out sdata->crypto_tx_tailroom_pending_dec isn't cleaned up successfully
+in a delayed_work during suspend. Add a flush_delayed_work to fix it.
+
+Signed-off-by: Matt Chen <matt.chen@intel.com>
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/mac80211/pm.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/mac80211/pm.c
++++ b/net/mac80211/pm.c
+@@ -168,6 +168,7 @@ int __ieee80211_suspend(struct ieee80211
+ break;
+ }
+
++ flush_delayed_work(&sdata->dec_tailroom_needed_wk);
+ drv_remove_interface(local, sdata);
+ }
+
--- /dev/null
+From 19d19e960598161be92a7e4828eb7706c6410ce6 Mon Sep 17 00:00:00 2001
+From: Johannes Berg <johannes.berg@intel.com>
+Date: Mon, 27 Feb 2017 09:38:11 +0100
+Subject: mac80211: use driver-indicated transmitter STA only for data frames
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Johannes Berg <johannes.berg@intel.com>
+
+commit 19d19e960598161be92a7e4828eb7706c6410ce6 upstream.
+
+When I originally introduced using the driver-indicated station as an
+optimisation to avoid the hashtable lookup/iteration, of course it
+wasn't intended to really functionally change anything.
+
+I neglected, however, to take into account VLAN interfaces, which have
+the property that management and data frames are handled differently:
+data frames go directly to the station and the VLAN while management
+frames continue to be processed over the underlying/associated AP-type
+interface. As a consequence, when a driver used this optimisation for
+management frames and the user enabled VLANs, my change broke things
+since any management frames, particularly disassoc/deauth, were missed
+by hostapd.
+
+Fix this by restoring the original code path for non-data frames, they
+aren't critical for performance to begin with.
+
+This fixes https://bugzilla.kernel.org/show_bug.cgi?id=194713.
+
+Big thanks goes to Jarek who bisected the issue and provided a very
+detailed bug report, including the crucial information that he was
+using VLANs in his configuration.
+
+Fixes: 771e846bea9e ("mac80211: allow passing transmitter station on RX")
+Reported-and-tested-by: Jarek Kamiński <jarek@freeside.be>
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/mac80211/rx.c | 16 +++++++++-------
+ 1 file changed, 9 insertions(+), 7 deletions(-)
+
+--- a/net/mac80211/rx.c
++++ b/net/mac80211/rx.c
+@@ -4092,15 +4092,17 @@ static void __ieee80211_rx_handle_packet
+ ieee80211_is_beacon(hdr->frame_control)))
+ ieee80211_scan_rx(local, skb);
+
+- if (pubsta) {
+- rx.sta = container_of(pubsta, struct sta_info, sta);
+- rx.sdata = rx.sta->sdata;
+- if (ieee80211_prepare_and_rx_handle(&rx, skb, true))
+- return;
+- goto out;
+- } else if (ieee80211_is_data(fc)) {
++ if (ieee80211_is_data(fc)) {
+ struct sta_info *sta, *prev_sta;
+
++ if (pubsta) {
++ rx.sta = container_of(pubsta, struct sta_info, sta);
++ rx.sdata = rx.sta->sdata;
++ if (ieee80211_prepare_and_rx_handle(&rx, skb, true))
++ return;
++ goto out;
++ }
++
+ prev_sta = NULL;
+
+ for_each_sta_info(local, hdr->addr2, sta, tmp) {
--- /dev/null
+From ee194289502a6901cc77dc9a893bf2afd351ac5e Mon Sep 17 00:00:00 2001
+From: Boris Brezillon <boris.brezillon@free-electrons.com>
+Date: Mon, 28 Nov 2016 16:17:56 +0100
+Subject: memory/atmel-ebi: Fix ns <-> cycles conversions
+
+From: Boris Brezillon <boris.brezillon@free-electrons.com>
+
+commit ee194289502a6901cc77dc9a893bf2afd351ac5e upstream.
+
+at91sam9_ebi_get_config() is incorrectly converting timings in clock
+cycles into timings in nanoseconds by multiplying the cycle values by
+the clk rate instead of the clk period.
+
+at91sam9_ebi_xslate_config() has the same problem for the
+tdf_ns -> tdf_cycles conversion.
+
+Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
+Reported-by: Chris Leahy <leahycm@gmail.com>
+Fixes: 6a4ec4cd0888 ("memory: add Atmel EBI (External Bus Interface) driver")
+Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/memory/atmel-ebi.c | 27 ++++++++++++++-------------
+ 1 file changed, 14 insertions(+), 13 deletions(-)
+
+--- a/drivers/memory/atmel-ebi.c
++++ b/drivers/memory/atmel-ebi.c
+@@ -93,7 +93,7 @@ static void at91sam9_ebi_get_config(stru
+ struct at91_ebi_dev_config *conf)
+ {
+ struct at91sam9_smc_generic_fields *fields = &ebid->ebi->sam9;
+- unsigned int clk_rate = clk_get_rate(ebid->ebi->clk);
++ unsigned int clk_period = NSEC_PER_SEC / clk_get_rate(ebid->ebi->clk);
+ struct at91sam9_ebi_dev_config *config = &conf->sam9;
+ struct at91sam9_smc_timings *timings = &config->timings;
+ unsigned int val;
+@@ -102,43 +102,43 @@ static void at91sam9_ebi_get_config(stru
+ config->mode = val & ~AT91_SMC_TDF;
+
+ val = (val & AT91_SMC_TDF) >> 16;
+- timings->tdf_ns = clk_rate * val;
++ timings->tdf_ns = clk_period * val;
+
+ regmap_fields_read(fields->setup, conf->cs, &val);
+ timings->ncs_rd_setup_ns = (val >> 24) & 0x1f;
+ timings->ncs_rd_setup_ns += ((val >> 29) & 0x1) * 128;
+- timings->ncs_rd_setup_ns *= clk_rate;
++ timings->ncs_rd_setup_ns *= clk_period;
+ timings->nrd_setup_ns = (val >> 16) & 0x1f;
+ timings->nrd_setup_ns += ((val >> 21) & 0x1) * 128;
+- timings->nrd_setup_ns *= clk_rate;
++ timings->nrd_setup_ns *= clk_period;
+ timings->ncs_wr_setup_ns = (val >> 8) & 0x1f;
+ timings->ncs_wr_setup_ns += ((val >> 13) & 0x1) * 128;
+- timings->ncs_wr_setup_ns *= clk_rate;
++ timings->ncs_wr_setup_ns *= clk_period;
+ timings->nwe_setup_ns = val & 0x1f;
+ timings->nwe_setup_ns += ((val >> 5) & 0x1) * 128;
+- timings->nwe_setup_ns *= clk_rate;
++ timings->nwe_setup_ns *= clk_period;
+
+ regmap_fields_read(fields->pulse, conf->cs, &val);
+ timings->ncs_rd_pulse_ns = (val >> 24) & 0x3f;
+ timings->ncs_rd_pulse_ns += ((val >> 30) & 0x1) * 256;
+- timings->ncs_rd_pulse_ns *= clk_rate;
++ timings->ncs_rd_pulse_ns *= clk_period;
+ timings->nrd_pulse_ns = (val >> 16) & 0x3f;
+ timings->nrd_pulse_ns += ((val >> 22) & 0x1) * 256;
+- timings->nrd_pulse_ns *= clk_rate;
++ timings->nrd_pulse_ns *= clk_period;
+ timings->ncs_wr_pulse_ns = (val >> 8) & 0x3f;
+ timings->ncs_wr_pulse_ns += ((val >> 14) & 0x1) * 256;
+- timings->ncs_wr_pulse_ns *= clk_rate;
++ timings->ncs_wr_pulse_ns *= clk_period;
+ timings->nwe_pulse_ns = val & 0x3f;
+ timings->nwe_pulse_ns += ((val >> 6) & 0x1) * 256;
+- timings->nwe_pulse_ns *= clk_rate;
++ timings->nwe_pulse_ns *= clk_period;
+
+ regmap_fields_read(fields->cycle, conf->cs, &val);
+ timings->nrd_cycle_ns = (val >> 16) & 0x7f;
+ timings->nrd_cycle_ns += ((val >> 23) & 0x3) * 256;
+- timings->nrd_cycle_ns *= clk_rate;
++ timings->nrd_cycle_ns *= clk_period;
+ timings->nwe_cycle_ns = val & 0x7f;
+ timings->nwe_cycle_ns += ((val >> 7) & 0x3) * 256;
+- timings->nwe_cycle_ns *= clk_rate;
++ timings->nwe_cycle_ns *= clk_period;
+ }
+
+ static int at91_xlate_timing(struct device_node *np, const char *prop,
+@@ -334,6 +334,7 @@ static int at91sam9_ebi_apply_config(str
+ struct at91_ebi_dev_config *conf)
+ {
+ unsigned int clk_rate = clk_get_rate(ebid->ebi->clk);
++ unsigned int clk_period = NSEC_PER_SEC / clk_rate;
+ struct at91sam9_ebi_dev_config *config = &conf->sam9;
+ struct at91sam9_smc_timings *timings = &config->timings;
+ struct at91sam9_smc_generic_fields *fields = &ebid->ebi->sam9;
+@@ -376,7 +377,7 @@ static int at91sam9_ebi_apply_config(str
+ val |= AT91SAM9_SMC_NWECYCLE(coded_val);
+ regmap_fields_write(fields->cycle, conf->cs, val);
+
+- val = DIV_ROUND_UP(timings->tdf_ns, clk_rate);
++ val = DIV_ROUND_UP(timings->tdf_ns, clk_period);
+ if (val > AT91_SMC_TDF_MAX)
+ val = AT91_SMC_TDF_MAX;
+ regmap_fields_write(fields->mode, conf->cs,
--- /dev/null
+From 1064f874abc0d05eeed8993815f584d847b72486 Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Fri, 20 Jan 2017 18:28:35 +1300
+Subject: mnt: Tuck mounts under others instead of creating shadow/side mounts.
+
+From: Eric W. Biederman <ebiederm@xmission.com>
+
+commit 1064f874abc0d05eeed8993815f584d847b72486 upstream.
+
+Ever since mount propagation was introduced in cases where a mount in
+propagated to parent mount mountpoint pair that is already in use the
+code has placed the new mount behind the old mount in the mount hash
+table.
+
+This implementation detail is problematic as it allows creating
+arbitrary length mount hash chains.
+
+Furthermore it invalidates the constraint maintained elsewhere in the
+mount code that a parent mount and a mountpoint pair will have exactly
+one mount upon them. Making it hard to deal with and to talk about
+this special case in the mount code.
+
+Modify mount propagation to notice when there is already a mount at
+the parent mount and mountpoint where a new mount is propagating to
+and place that preexisting mount on top of the new mount.
+
+Modify unmount propagation to notice when a mount that is being
+unmounted has another mount on top of it (and no other children), and
+to replace the unmounted mount with the mount on top of it.
+
+Move the MNT_UMUONT test from __lookup_mnt_last into
+__propagate_umount as that is the only call of __lookup_mnt_last where
+MNT_UMOUNT may be set on any mount visible in the mount hash table.
+
+These modifications allow:
+ - __lookup_mnt_last to be removed.
+ - attach_shadows to be renamed __attach_mnt and its shadow
+ handling to be removed.
+ - commit_tree to be simplified
+ - copy_tree to be simplified
+
+The result is an easier to understand tree of mounts that does not
+allow creation of arbitrary length hash chains in the mount hash table.
+
+The result is also a very slight userspace visible difference in semantics.
+The following two cases now behave identically, where before order
+mattered:
+
+case 1: (explicit user action)
+ B is a slave of A
+ mount something on A/a , it will propagate to B/a
+ and than mount something on B/a
+
+case 2: (tucked mount)
+ B is a slave of A
+ mount something on B/a
+ and than mount something on A/a
+
+Histroically umount A/a would fail in case 1 and succeed in case 2.
+Now umount A/a succeeds in both configurations.
+
+This very small change in semantics appears if anything to be a bug
+fix to me and my survey of userspace leads me to believe that no programs
+will notice or care of this subtle semantic change.
+
+v2: Updated to mnt_change_mountpoint to not call dput or mntput
+and instead to decrement the counts directly. It is guaranteed
+that there will be other references when mnt_change_mountpoint is
+called so this is safe.
+
+v3: Moved put_mountpoint under mount_lock in attach_recursive_mnt
+ As the locking in fs/namespace.c changed between v2 and v3.
+
+v4: Reworked the logic in propagate_mount_busy and __propagate_umount
+ that detects when a mount completely covers another mount.
+
+v5: Removed unnecessary tests whose result is alwasy true in
+ find_topper and attach_recursive_mnt.
+
+v6: Document the user space visible semantic difference.
+
+Fixes: b90fa9ae8f51 ("[PATCH] shared mount handling: bind and rbind")
+Tested-by: Andrei Vagin <avagin@virtuozzo.com>
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/mount.h | 1
+ fs/namespace.c | 110 +++++++++++++++++++++++++++++++--------------------------
+ fs/pnode.c | 61 +++++++++++++++++++++++++------
+ fs/pnode.h | 2 +
+ 4 files changed, 111 insertions(+), 63 deletions(-)
+
+--- a/fs/mount.h
++++ b/fs/mount.h
+@@ -89,7 +89,6 @@ static inline int is_mounted(struct vfsm
+ }
+
+ extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *);
+-extern struct mount *__lookup_mnt_last(struct vfsmount *, struct dentry *);
+
+ extern int __legitimize_mnt(struct vfsmount *, unsigned);
+ extern bool legitimize_mnt(struct vfsmount *, unsigned);
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -641,28 +641,6 @@ struct mount *__lookup_mnt(struct vfsmou
+ }
+
+ /*
+- * find the last mount at @dentry on vfsmount @mnt.
+- * mount_lock must be held.
+- */
+-struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry)
+-{
+- struct mount *p, *res = NULL;
+- p = __lookup_mnt(mnt, dentry);
+- if (!p)
+- goto out;
+- if (!(p->mnt.mnt_flags & MNT_UMOUNT))
+- res = p;
+- hlist_for_each_entry_continue(p, mnt_hash) {
+- if (&p->mnt_parent->mnt != mnt || p->mnt_mountpoint != dentry)
+- break;
+- if (!(p->mnt.mnt_flags & MNT_UMOUNT))
+- res = p;
+- }
+-out:
+- return res;
+-}
+-
+-/*
+ * lookup_mnt - Return the first child mount mounted at path
+ *
+ * "First" means first mounted chronologically. If you create the
+@@ -882,6 +860,13 @@ void mnt_set_mountpoint(struct mount *mn
+ hlist_add_head(&child_mnt->mnt_mp_list, &mp->m_list);
+ }
+
++static void __attach_mnt(struct mount *mnt, struct mount *parent)
++{
++ hlist_add_head_rcu(&mnt->mnt_hash,
++ m_hash(&parent->mnt, mnt->mnt_mountpoint));
++ list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
++}
++
+ /*
+ * vfsmount lock must be held for write
+ */
+@@ -890,28 +875,45 @@ static void attach_mnt(struct mount *mnt
+ struct mountpoint *mp)
+ {
+ mnt_set_mountpoint(parent, mp, mnt);
+- hlist_add_head_rcu(&mnt->mnt_hash, m_hash(&parent->mnt, mp->m_dentry));
+- list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
++ __attach_mnt(mnt, parent);
+ }
+
+-static void attach_shadowed(struct mount *mnt,
+- struct mount *parent,
+- struct mount *shadows)
++void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, struct mount *mnt)
+ {
+- if (shadows) {
+- hlist_add_behind_rcu(&mnt->mnt_hash, &shadows->mnt_hash);
+- list_add(&mnt->mnt_child, &shadows->mnt_child);
+- } else {
+- hlist_add_head_rcu(&mnt->mnt_hash,
+- m_hash(&parent->mnt, mnt->mnt_mountpoint));
+- list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
+- }
++ struct mountpoint *old_mp = mnt->mnt_mp;
++ struct dentry *old_mountpoint = mnt->mnt_mountpoint;
++ struct mount *old_parent = mnt->mnt_parent;
++
++ list_del_init(&mnt->mnt_child);
++ hlist_del_init(&mnt->mnt_mp_list);
++ hlist_del_init_rcu(&mnt->mnt_hash);
++
++ attach_mnt(mnt, parent, mp);
++
++ put_mountpoint(old_mp);
++
++ /*
++ * Safely avoid even the suggestion this code might sleep or
++ * lock the mount hash by taking advantage of the knowledge that
++ * mnt_change_mountpoint will not release the final reference
++ * to a mountpoint.
++ *
++ * During mounting, the mount passed in as the parent mount will
++ * continue to use the old mountpoint and during unmounting, the
++ * old mountpoint will continue to exist until namespace_unlock,
++ * which happens well after mnt_change_mountpoint.
++ */
++ spin_lock(&old_mountpoint->d_lock);
++ old_mountpoint->d_lockref.count--;
++ spin_unlock(&old_mountpoint->d_lock);
++
++ mnt_add_count(old_parent, -1);
+ }
+
+ /*
+ * vfsmount lock must be held for write
+ */
+-static void commit_tree(struct mount *mnt, struct mount *shadows)
++static void commit_tree(struct mount *mnt)
+ {
+ struct mount *parent = mnt->mnt_parent;
+ struct mount *m;
+@@ -929,7 +931,7 @@ static void commit_tree(struct mount *mn
+ n->mounts += n->pending_mounts;
+ n->pending_mounts = 0;
+
+- attach_shadowed(mnt, parent, shadows);
++ __attach_mnt(mnt, parent);
+ touch_mnt_namespace(n);
+ }
+
+@@ -1737,7 +1739,6 @@ struct mount *copy_tree(struct mount *mn
+ continue;
+
+ for (s = r; s; s = next_mnt(s, r)) {
+- struct mount *t = NULL;
+ if (!(flag & CL_COPY_UNBINDABLE) &&
+ IS_MNT_UNBINDABLE(s)) {
+ s = skip_mnt_tree(s);
+@@ -1759,14 +1760,7 @@ struct mount *copy_tree(struct mount *mn
+ goto out;
+ lock_mount_hash();
+ list_add_tail(&q->mnt_list, &res->mnt_list);
+- mnt_set_mountpoint(parent, p->mnt_mp, q);
+- if (!list_empty(&parent->mnt_mounts)) {
+- t = list_last_entry(&parent->mnt_mounts,
+- struct mount, mnt_child);
+- if (t->mnt_mp != p->mnt_mp)
+- t = NULL;
+- }
+- attach_shadowed(q, parent, t);
++ attach_mnt(q, parent, p->mnt_mp);
+ unlock_mount_hash();
+ }
+ }
+@@ -1967,10 +1961,18 @@ static int attach_recursive_mnt(struct m
+ {
+ HLIST_HEAD(tree_list);
+ struct mnt_namespace *ns = dest_mnt->mnt_ns;
++ struct mountpoint *smp;
+ struct mount *child, *p;
+ struct hlist_node *n;
+ int err;
+
++ /* Preallocate a mountpoint in case the new mounts need
++ * to be tucked under other mounts.
++ */
++ smp = get_mountpoint(source_mnt->mnt.mnt_root);
++ if (IS_ERR(smp))
++ return PTR_ERR(smp);
++
+ /* Is there space to add these mounts to the mount namespace? */
+ if (!parent_path) {
+ err = count_mounts(ns, source_mnt);
+@@ -1997,16 +1999,19 @@ static int attach_recursive_mnt(struct m
+ touch_mnt_namespace(source_mnt->mnt_ns);
+ } else {
+ mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
+- commit_tree(source_mnt, NULL);
++ commit_tree(source_mnt);
+ }
+
+ hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) {
+ struct mount *q;
+ hlist_del_init(&child->mnt_hash);
+- q = __lookup_mnt_last(&child->mnt_parent->mnt,
+- child->mnt_mountpoint);
+- commit_tree(child, q);
++ q = __lookup_mnt(&child->mnt_parent->mnt,
++ child->mnt_mountpoint);
++ if (q)
++ mnt_change_mountpoint(child, smp, q);
++ commit_tree(child);
+ }
++ put_mountpoint(smp);
+ unlock_mount_hash();
+
+ return 0;
+@@ -2021,6 +2026,11 @@ static int attach_recursive_mnt(struct m
+ cleanup_group_ids(source_mnt, NULL);
+ out:
+ ns->pending_mounts = 0;
++
++ read_seqlock_excl(&mount_lock);
++ put_mountpoint(smp);
++ read_sequnlock_excl(&mount_lock);
++
+ return err;
+ }
+
+--- a/fs/pnode.c
++++ b/fs/pnode.c
+@@ -324,6 +324,21 @@ out:
+ return ret;
+ }
+
++static struct mount *find_topper(struct mount *mnt)
++{
++ /* If there is exactly one mount covering mnt completely return it. */
++ struct mount *child;
++
++ if (!list_is_singular(&mnt->mnt_mounts))
++ return NULL;
++
++ child = list_first_entry(&mnt->mnt_mounts, struct mount, mnt_child);
++ if (child->mnt_mountpoint != mnt->mnt.mnt_root)
++ return NULL;
++
++ return child;
++}
++
+ /*
+ * return true if the refcount is greater than count
+ */
+@@ -344,9 +359,8 @@ static inline int do_refcount_check(stru
+ */
+ int propagate_mount_busy(struct mount *mnt, int refcnt)
+ {
+- struct mount *m, *child;
++ struct mount *m, *child, *topper;
+ struct mount *parent = mnt->mnt_parent;
+- int ret = 0;
+
+ if (mnt == parent)
+ return do_refcount_check(mnt, refcnt);
+@@ -361,12 +375,24 @@ int propagate_mount_busy(struct mount *m
+
+ for (m = propagation_next(parent, parent); m;
+ m = propagation_next(m, parent)) {
+- child = __lookup_mnt_last(&m->mnt, mnt->mnt_mountpoint);
+- if (child && list_empty(&child->mnt_mounts) &&
+- (ret = do_refcount_check(child, 1)))
+- break;
++ int count = 1;
++ child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint);
++ if (!child)
++ continue;
++
++ /* Is there exactly one mount on the child that covers
++ * it completely whose reference should be ignored?
++ */
++ topper = find_topper(child);
++ if (topper)
++ count += 1;
++ else if (!list_empty(&child->mnt_mounts))
++ continue;
++
++ if (do_refcount_check(child, count))
++ return 1;
+ }
+- return ret;
++ return 0;
+ }
+
+ /*
+@@ -383,7 +409,7 @@ void propagate_mount_unlock(struct mount
+
+ for (m = propagation_next(parent, parent); m;
+ m = propagation_next(m, parent)) {
+- child = __lookup_mnt_last(&m->mnt, mnt->mnt_mountpoint);
++ child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint);
+ if (child)
+ child->mnt.mnt_flags &= ~MNT_LOCKED;
+ }
+@@ -401,9 +427,11 @@ static void mark_umount_candidates(struc
+
+ for (m = propagation_next(parent, parent); m;
+ m = propagation_next(m, parent)) {
+- struct mount *child = __lookup_mnt_last(&m->mnt,
++ struct mount *child = __lookup_mnt(&m->mnt,
+ mnt->mnt_mountpoint);
+- if (child && (!IS_MNT_LOCKED(child) || IS_MNT_MARKED(m))) {
++ if (!child || (child->mnt.mnt_flags & MNT_UMOUNT))
++ continue;
++ if (!IS_MNT_LOCKED(child) || IS_MNT_MARKED(m)) {
+ SET_MNT_MARK(child);
+ }
+ }
+@@ -422,8 +450,8 @@ static void __propagate_umount(struct mo
+
+ for (m = propagation_next(parent, parent); m;
+ m = propagation_next(m, parent)) {
+-
+- struct mount *child = __lookup_mnt_last(&m->mnt,
++ struct mount *topper;
++ struct mount *child = __lookup_mnt(&m->mnt,
+ mnt->mnt_mountpoint);
+ /*
+ * umount the child only if the child has no children
+@@ -432,6 +460,15 @@ static void __propagate_umount(struct mo
+ if (!child || !IS_MNT_MARKED(child))
+ continue;
+ CLEAR_MNT_MARK(child);
++
++ /* If there is exactly one mount covering all of child
++ * replace child with that mount.
++ */
++ topper = find_topper(child);
++ if (topper)
++ mnt_change_mountpoint(child->mnt_parent, child->mnt_mp,
++ topper);
++
+ if (list_empty(&child->mnt_mounts)) {
+ list_del_init(&child->mnt_child);
+ child->mnt.mnt_flags |= MNT_UMOUNT;
+--- a/fs/pnode.h
++++ b/fs/pnode.h
+@@ -49,6 +49,8 @@ int get_dominating_id(struct mount *mnt,
+ unsigned int mnt_get_count(struct mount *mnt);
+ void mnt_set_mountpoint(struct mount *, struct mountpoint *,
+ struct mount *);
++void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp,
++ struct mount *mnt);
+ struct mount *copy_tree(struct mount *, struct dentry *, int);
+ bool is_path_reachable(struct mount *, struct dentry *,
+ const struct path *root);
--- /dev/null
+From 239a3b663647869330955ec59caac0100ef9b60a Mon Sep 17 00:00:00 2001
+From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
+Date: Tue, 21 Feb 2017 11:28:01 +0100
+Subject: net: mvpp2: fix DMA address calculation in mvpp2_txq_inc_put()
+
+From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
+
+commit 239a3b663647869330955ec59caac0100ef9b60a upstream.
+
+When TX descriptors are filled in, the buffer DMA address is split
+between the tx_desc->buf_phys_addr field (high-order bits) and
+tx_desc->packet_offset field (5 low-order bits).
+
+However, when we re-calculate the DMA address from the TX descriptor in
+mvpp2_txq_inc_put(), we do not take tx_desc->packet_offset into
+account. This means that when the DMA address is not aligned on a 32
+bytes boundary, we end up calling dma_unmap_single() with a DMA address
+that was not the one returned by dma_map_single().
+
+This inconsistency is detected by the kernel when DMA_API_DEBUG is
+enabled. We fix this problem by properly calculating the DMA address in
+mvpp2_txq_inc_put().
+
+Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/ethernet/marvell/mvpp2.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/marvell/mvpp2.c
++++ b/drivers/net/ethernet/marvell/mvpp2.c
+@@ -991,7 +991,7 @@ static void mvpp2_txq_inc_put(struct mvp
+ txq_pcpu->buffs + txq_pcpu->txq_put_index;
+ tx_buf->skb = skb;
+ tx_buf->size = tx_desc->data_size;
+- tx_buf->phys = tx_desc->buf_phys_addr;
++ tx_buf->phys = tx_desc->buf_phys_addr + tx_desc->packet_offset;
+ txq_pcpu->txq_put_index++;
+ if (txq_pcpu->txq_put_index == txq_pcpu->size)
+ txq_pcpu->txq_put_index = 0;
--- /dev/null
+From 86ef58a4e35e8fa66afb5898cf6dec6a3bb29f67 Mon Sep 17 00:00:00 2001
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Tue, 28 Feb 2017 18:32:48 -0800
+Subject: nfit, libnvdimm: fix interleave set cookie calculation
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+commit 86ef58a4e35e8fa66afb5898cf6dec6a3bb29f67 upstream.
+
+The interleave-set cookie is a sum that sanity checks the composition of
+an interleave set has not changed from when the namespace was initially
+created. The checksum is calculated by sorting the DIMMs by their
+location in the interleave-set. The comparison for the sort must be
+64-bit wide, not byte-by-byte as performed by memcmp() in the broken
+case.
+
+Fix the implementation to accept correct cookie values in addition to
+the Linux "memcmp" order cookies, but only allow correct cookies to be
+generated going forward. It does mean that namespaces created by
+third-party-tooling, or created by newer kernels with this fix, will not
+validate on older kernels. However, there are a couple mitigating
+conditions:
+
+ 1/ platforms with namespace-label capable NVDIMMs are not widely
+ available.
+
+ 2/ interleave-sets with a single-dimm are by definition not affected
+ (nothing to sort). This covers the QEMU-KVM NVDIMM emulation case.
+
+The cookie stored in the namespace label will be fixed by any write the
+namespace label, the most straightforward way to achieve this is to
+write to the "alt_name" attribute of a namespace in sysfs.
+
+Fixes: eaf961536e16 ("libnvdimm, nfit: add interleave-set state-tracking infrastructure")
+Reported-by: Nicholas Moulin <nicholas.w.moulin@linux.intel.com>
+Tested-by: Nicholas Moulin <nicholas.w.moulin@linux.intel.com>
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/acpi/nfit/core.c | 16 +++++++++++++++-
+ drivers/nvdimm/namespace_devs.c | 18 ++++++++++++++----
+ drivers/nvdimm/nd.h | 1 +
+ drivers/nvdimm/region_devs.c | 9 +++++++++
+ include/linux/libnvdimm.h | 2 ++
+ 5 files changed, 41 insertions(+), 5 deletions(-)
+
+--- a/drivers/acpi/nfit/core.c
++++ b/drivers/acpi/nfit/core.c
+@@ -1603,7 +1603,7 @@ static size_t sizeof_nfit_set_info(int n
+ + num_mappings * sizeof(struct nfit_set_info_map);
+ }
+
+-static int cmp_map(const void *m0, const void *m1)
++static int cmp_map_compat(const void *m0, const void *m1)
+ {
+ const struct nfit_set_info_map *map0 = m0;
+ const struct nfit_set_info_map *map1 = m1;
+@@ -1612,6 +1612,14 @@ static int cmp_map(const void *m0, const
+ sizeof(u64));
+ }
+
++static int cmp_map(const void *m0, const void *m1)
++{
++ const struct nfit_set_info_map *map0 = m0;
++ const struct nfit_set_info_map *map1 = m1;
++
++ return map0->region_offset - map1->region_offset;
++}
++
+ /* Retrieve the nth entry referencing this spa */
+ static struct acpi_nfit_memory_map *memdev_from_spa(
+ struct acpi_nfit_desc *acpi_desc, u16 range_index, int n)
+@@ -1667,6 +1675,12 @@ static int acpi_nfit_init_interleave_set
+ sort(&info->mapping[0], nr, sizeof(struct nfit_set_info_map),
+ cmp_map, NULL);
+ nd_set->cookie = nd_fletcher64(info, sizeof_nfit_set_info(nr), 0);
++
++ /* support namespaces created with the wrong sort order */
++ sort(&info->mapping[0], nr, sizeof(struct nfit_set_info_map),
++ cmp_map_compat, NULL);
++ nd_set->altcookie = nd_fletcher64(info, sizeof_nfit_set_info(nr), 0);
++
+ ndr_desc->nd_set = nd_set;
+ devm_kfree(dev, info);
+
+--- a/drivers/nvdimm/namespace_devs.c
++++ b/drivers/nvdimm/namespace_devs.c
+@@ -1700,6 +1700,7 @@ static int select_pmem_id(struct nd_regi
+ struct device *create_namespace_pmem(struct nd_region *nd_region,
+ struct nd_namespace_label *nd_label)
+ {
++ u64 altcookie = nd_region_interleave_set_altcookie(nd_region);
+ u64 cookie = nd_region_interleave_set_cookie(nd_region);
+ struct nd_label_ent *label_ent;
+ struct nd_namespace_pmem *nspm;
+@@ -1718,7 +1719,11 @@ struct device *create_namespace_pmem(str
+ if (__le64_to_cpu(nd_label->isetcookie) != cookie) {
+ dev_dbg(&nd_region->dev, "invalid cookie in label: %pUb\n",
+ nd_label->uuid);
+- return ERR_PTR(-EAGAIN);
++ if (__le64_to_cpu(nd_label->isetcookie) != altcookie)
++ return ERR_PTR(-EAGAIN);
++
++ dev_dbg(&nd_region->dev, "valid altcookie in label: %pUb\n",
++ nd_label->uuid);
+ }
+
+ nspm = kzalloc(sizeof(*nspm), GFP_KERNEL);
+@@ -1733,9 +1738,14 @@ struct device *create_namespace_pmem(str
+ res->name = dev_name(&nd_region->dev);
+ res->flags = IORESOURCE_MEM;
+
+- for (i = 0; i < nd_region->ndr_mappings; i++)
+- if (!has_uuid_at_pos(nd_region, nd_label->uuid, cookie, i))
+- break;
++ for (i = 0; i < nd_region->ndr_mappings; i++) {
++ if (has_uuid_at_pos(nd_region, nd_label->uuid, cookie, i))
++ continue;
++ if (has_uuid_at_pos(nd_region, nd_label->uuid, altcookie, i))
++ continue;
++ break;
++ }
++
+ if (i < nd_region->ndr_mappings) {
+ struct nvdimm_drvdata *ndd = to_ndd(&nd_region->mapping[i]);
+
+--- a/drivers/nvdimm/nd.h
++++ b/drivers/nvdimm/nd.h
+@@ -327,6 +327,7 @@ struct nd_region *to_nd_region(struct de
+ int nd_region_to_nstype(struct nd_region *nd_region);
+ int nd_region_register_namespaces(struct nd_region *nd_region, int *err);
+ u64 nd_region_interleave_set_cookie(struct nd_region *nd_region);
++u64 nd_region_interleave_set_altcookie(struct nd_region *nd_region);
+ void nvdimm_bus_lock(struct device *dev);
+ void nvdimm_bus_unlock(struct device *dev);
+ bool is_nvdimm_bus_locked(struct device *dev);
+--- a/drivers/nvdimm/region_devs.c
++++ b/drivers/nvdimm/region_devs.c
+@@ -505,6 +505,15 @@ u64 nd_region_interleave_set_cookie(stru
+ return 0;
+ }
+
++u64 nd_region_interleave_set_altcookie(struct nd_region *nd_region)
++{
++ struct nd_interleave_set *nd_set = nd_region->nd_set;
++
++ if (nd_set)
++ return nd_set->altcookie;
++ return 0;
++}
++
+ void nd_mapping_free_labels(struct nd_mapping *nd_mapping)
+ {
+ struct nd_label_ent *label_ent, *e;
+--- a/include/linux/libnvdimm.h
++++ b/include/linux/libnvdimm.h
+@@ -70,6 +70,8 @@ struct nd_cmd_desc {
+
+ struct nd_interleave_set {
+ u64 cookie;
++ /* compatibility with initial buggy Linux implementation */
++ u64 altcookie;
+ };
+
+ struct nd_mapping_desc {
--- /dev/null
+From 251af29c320d86071664f02c76f0d063a19fefdf Mon Sep 17 00:00:00 2001
+From: Trond Myklebust <trond.myklebust@primarydata.com>
+Date: Sat, 11 Feb 2017 10:37:38 -0500
+Subject: nlm: Ensure callback code also checks that the files match
+
+From: Trond Myklebust <trond.myklebust@primarydata.com>
+
+commit 251af29c320d86071664f02c76f0d063a19fefdf upstream.
+
+It is not sufficient to just check that the lock pids match when
+granting a callback, we also need to ensure that we're granting
+the callback on the right file.
+
+Reported-by: Pankaj Singh <psingh.ait@gmail.com>
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/lockd/lockd.h | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/include/linux/lockd/lockd.h
++++ b/include/linux/lockd/lockd.h
+@@ -355,7 +355,8 @@ static inline int nlm_privileged_request
+ static inline int nlm_compare_locks(const struct file_lock *fl1,
+ const struct file_lock *fl2)
+ {
+- return fl1->fl_pid == fl2->fl_pid
++ return file_inode(fl1->fl_file) == file_inode(fl2->fl_file)
++ && fl1->fl_pid == fl2->fl_pid
+ && fl1->fl_owner == fl2->fl_owner
+ && fl1->fl_start == fl2->fl_start
+ && fl1->fl_end == fl2->fl_end
--- /dev/null
+From 0695d7dc1d9f19b82ec2cae24856bddce278cfe6 Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 24 Feb 2017 16:43:36 +0100
+Subject: orangefs: Use RCU for destroy_inode
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 0695d7dc1d9f19b82ec2cae24856bddce278cfe6 upstream.
+
+freeing of inodes must be RCU-delayed on all filesystems
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/orangefs/super.c | 9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+--- a/fs/orangefs/super.c
++++ b/fs/orangefs/super.c
+@@ -115,6 +115,13 @@ static struct inode *orangefs_alloc_inod
+ return &orangefs_inode->vfs_inode;
+ }
+
++static void orangefs_i_callback(struct rcu_head *head)
++{
++ struct inode *inode = container_of(head, struct inode, i_rcu);
++ struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
++ kmem_cache_free(orangefs_inode_cache, orangefs_inode);
++}
++
+ static void orangefs_destroy_inode(struct inode *inode)
+ {
+ struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
+@@ -123,7 +130,7 @@ static void orangefs_destroy_inode(struc
+ "%s: deallocated %p destroying inode %pU\n",
+ __func__, orangefs_inode, get_khandle_from_ino(inode));
+
+- kmem_cache_free(orangefs_inode_cache, orangefs_inode);
++ call_rcu(&inode->i_rcu, orangefs_i_callback);
+ }
+
+ /*
--- /dev/null
+From 303529d6ef1293513c2c73c9ab86489eebb37d08 Mon Sep 17 00:00:00 2001
+From: Gavin Shan <gwshan@linux.vnet.ibm.com>
+Date: Thu, 16 Feb 2017 10:22:33 +1100
+Subject: pci/hotplug/pnv-php: Disable surprise hotplug capability on conflicts
+
+From: Gavin Shan <gwshan@linux.vnet.ibm.com>
+
+commit 303529d6ef1293513c2c73c9ab86489eebb37d08 upstream.
+
+The root port or PCIe switch downstream port might have been associated
+with driver other than pnv-php. The MSI or MSIx might also have been
+enabled by that driver (e.g. pcieport_drv). Attempt to enable MSI incurs
+below backtrace:
+
+ PowerPC PowerNV PCI Hotplug Driver version: 0.1
+ ------------[ cut here ]------------
+ WARNING: CPU: 19 PID: 1004 at drivers/pci/msi.c:1071 \
+ __pci_enable_msi_range+0x84/0x4e0
+ NIP [c000000000665c34] __pci_enable_msi_range+0x84/0x4e0
+ LR [c000000000665c24] __pci_enable_msi_range+0x74/0x4e0
+ Call Trace:
+ [c000000384d67600] [c000000000665c24] __pci_enable_msi_range+0x74/0x4e0
+ [c000000384d676e0] [d00000000aa31b04] pnv_php_register+0x564/0x5a0 [pnv_php]
+ [c000000384d677c0] [d00000000aa31658] pnv_php_register+0xb8/0x5a0 [pnv_php]
+ [c000000384d678a0] [d00000000aa31658] pnv_php_register+0xb8/0x5a0 [pnv_php]
+ [c000000384d67980] [d00000000aa31dfc] pnv_php_init+0x60/0x98 [pnv_php]
+ [c000000384d679f0] [c00000000000cfdc] do_one_initcall+0x6c/0x1d0
+ [c000000384d67ab0] [c000000000b92354] do_init_module+0x94/0x254
+ [c000000384d67b40] [c00000000019719c] load_module+0x258c/0x2c60
+ [c000000384d67d30] [c000000000197bb0] SyS_finit_module+0xf0/0x170
+ [c000000384d67e30] [c00000000000b184] system_call+0x38/0xe0
+
+This fixes the issue by skipping enabling the surprise hotplug
+capability if the MSI or MSIx on the PCI slot's upstream port has
+been enabled by other driver.
+
+Fixes: 360aebd85a4c ("drivers/pci/hotplug: Support surprise hotplug in powernv driver")
+Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
+Reviewed-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
+Tested-by: Vaibhav Jain <vaibhav@linux.vnet.ibm.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/pci/hotplug/pnv_php.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/drivers/pci/hotplug/pnv_php.c
++++ b/drivers/pci/hotplug/pnv_php.c
+@@ -799,6 +799,14 @@ static void pnv_php_enable_irq(struct pn
+ struct pci_dev *pdev = php_slot->pdev;
+ int irq, ret;
+
++ /*
++ * The MSI/MSIx interrupt might have been occupied by other
++ * drivers. Don't populate the surprise hotplug capability
++ * in that case.
++ */
++ if (pci_dev_msi_enabled(pdev))
++ return;
++
+ ret = pci_enable_device(pdev);
+ if (ret) {
+ dev_warn(&pdev->dev, "Error %d enabling device\n", ret);
--- /dev/null
+From 36c7c9da40c408a71e5e6bfe12e57dcf549a296d Mon Sep 17 00:00:00 2001
+From: Gavin Shan <gwshan@linux.vnet.ibm.com>
+Date: Thu, 16 Feb 2017 10:22:32 +1100
+Subject: pci/hotplug/pnv-php: Remove WARN_ON() in pnv_php_put_slot()
+
+From: Gavin Shan <gwshan@linux.vnet.ibm.com>
+
+commit 36c7c9da40c408a71e5e6bfe12e57dcf549a296d upstream.
+
+The WARN_ON() causes unnecessary backtrace when putting the parent
+slot, which is likely to be NULL.
+
+ WARNING: CPU: 2 PID: 1071 at drivers/pci/hotplug/pnv_php.c:85 \
+ pnv_php_release+0xcc/0x150 [pnv_php]
+ :
+ Call Trace:
+ [c0000003bc007c10] [d00000000ad613c4] pnv_php_release+0x144/0x150 [pnv_php]
+ [c0000003bc007c40] [c0000000006641d8] pci_hp_deregister+0x238/0x330
+ [c0000003bc007cd0] [d00000000ad61440] pnv_php_unregister_one+0x70/0xa0 [pnv_php]
+ [c0000003bc007d10] [d00000000ad614c0] pnv_php_unregister+0x50/0x80 [pnv_php]
+ [c0000003bc007d40] [d00000000ad61e84] pnv_php_exit+0x50/0xcb4 [pnv_php]
+ [c0000003bc007d70] [c00000000019499c] SyS_delete_module+0x1fc/0x2a0
+ [c0000003bc007e30] [c00000000000b184] system_call+0x38/0xe0
+
+Fixes: 66725152fb9f ("PCI/hotplug: PowerPC PowerNV PCI hotplug driver")
+Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
+Reviewed-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
+Tested-by: Vaibhav Jain <vaibhav@linux.vnet.ibm.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/pci/hotplug/pnv_php.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/pci/hotplug/pnv_php.c
++++ b/drivers/pci/hotplug/pnv_php.c
+@@ -82,7 +82,7 @@ static void pnv_php_free_slot(struct kre
+ static inline void pnv_php_put_slot(struct pnv_php_slot *php_slot)
+ {
+
+- if (WARN_ON(!php_slot))
++ if (!php_slot)
+ return;
+
+ kref_put(&php_slot->kref, pnv_php_free_slot);
--- /dev/null
+From 8d254a340efb12b40c4c1ff25a48a4f48f7bbd6b Mon Sep 17 00:00:00 2001
+From: Clemens Gruber <clemens.gruber@pqgruber.com>
+Date: Tue, 13 Dec 2016 16:52:50 +0100
+Subject: pwm: pca9685: Fix period change with same duty cycle
+
+From: Clemens Gruber <clemens.gruber@pqgruber.com>
+
+commit 8d254a340efb12b40c4c1ff25a48a4f48f7bbd6b upstream.
+
+When first implementing support for changing the output frequency, an
+optimization was added to continue the PWM after changing the prescaler
+without having to reprogram the ON and OFF registers for the duty cycle,
+in case the duty cycle stayed the same. This was flawed, because we
+compared the absolute value of the duty cycle in nanoseconds instead of
+the ratio to the period.
+
+Fix the problem by removing the shortcut.
+
+Fixes: 01ec8472009c9 ("pwm-pca9685: Support changing the output frequency")
+Signed-off-by: Clemens Gruber <clemens.gruber@pqgruber.com>
+Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
+Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/pwm/pwm-pca9685.c | 11 -----------
+ 1 file changed, 11 deletions(-)
+
+--- a/drivers/pwm/pwm-pca9685.c
++++ b/drivers/pwm/pwm-pca9685.c
+@@ -65,7 +65,6 @@
+ #define PCA9685_MAXCHAN 0x10
+
+ #define LED_FULL (1 << 4)
+-#define MODE1_RESTART (1 << 7)
+ #define MODE1_SLEEP (1 << 4)
+ #define MODE2_INVRT (1 << 4)
+ #define MODE2_OUTDRV (1 << 2)
+@@ -117,16 +116,6 @@ static int pca9685_pwm_config(struct pwm
+ udelay(500);
+
+ pca->period_ns = period_ns;
+-
+- /*
+- * If the duty cycle did not change, restart PWM with
+- * the same duty cycle to period ratio and return.
+- */
+- if (duty_ns == pca->duty_ns) {
+- regmap_update_bits(pca->regmap, PCA9685_MODE1,
+- MODE1_RESTART, 0x1);
+- return 0;
+- }
+ } else {
+ dev_err(chip->dev,
+ "prescaler not set: period out of bounds!\n");
--- /dev/null
+From 77759137248f34864a8f7a58bbcebfcf1047504a Mon Sep 17 00:00:00 2001
+From: Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
+Date: Mon, 20 Feb 2017 14:52:58 +0100
+Subject: s390/chsc: Add exception handler for CHSC instruction
+
+From: Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
+
+commit 77759137248f34864a8f7a58bbcebfcf1047504a upstream.
+
+Prevent kernel crashes due to unhandled exceptions raised by the CHSC
+instruction which may for example be triggered by invalid ioctl data.
+
+Fixes: 64150adf89df ("s390/cio: Introduce generic synchronous CHSC IOCTL")
+Signed-off-by: Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
+Reviewed-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
+Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
+Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/s390/cio/ioasm.c | 8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+--- a/drivers/s390/cio/ioasm.c
++++ b/drivers/s390/cio/ioasm.c
+@@ -165,13 +165,15 @@ int tpi(struct tpi_info *addr)
+ int chsc(void *chsc_area)
+ {
+ typedef struct { char _[4096]; } addr_type;
+- int cc;
++ int cc = -EIO;
+
+ asm volatile(
+ " .insn rre,0xb25f0000,%2,0\n"
+- " ipm %0\n"
++ "0: ipm %0\n"
+ " srl %0,28\n"
+- : "=d" (cc), "=m" (*(addr_type *) chsc_area)
++ "1:\n"
++ EX_TABLE(0b, 1b)
++ : "+d" (cc), "=m" (*(addr_type *) chsc_area)
+ : "d" (chsc_area), "m" (*(addr_type *) chsc_area)
+ : "cc");
+ trace_s390_cio_chsc(chsc_area, cc);
--- /dev/null
+From a63f53e34db8b49675448d03ae324f6c5bc04fe6 Mon Sep 17 00:00:00 2001
+From: Gerald Schaefer <gerald.schaefer@de.ibm.com>
+Date: Mon, 30 Jan 2017 15:52:14 +0100
+Subject: s390/dcssblk: fix device size calculation in dcssblk_direct_access()
+
+From: Gerald Schaefer <gerald.schaefer@de.ibm.com>
+
+commit a63f53e34db8b49675448d03ae324f6c5bc04fe6 upstream.
+
+Since commit dd22f551 "block: Change direct_access calling convention",
+the device size calculation in dcssblk_direct_access() is off-by-one.
+This results in bdev_direct_access() always returning -ENXIO because the
+returned value is not page aligned.
+
+Fix this by adding 1 to the dev_sz calculation.
+
+Fixes: dd22f551 ("block: Change direct_access calling convention")
+Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
+Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/s390/block/dcssblk.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/s390/block/dcssblk.c
++++ b/drivers/s390/block/dcssblk.c
+@@ -892,7 +892,7 @@ dcssblk_direct_access (struct block_devi
+ dev_info = bdev->bd_disk->private_data;
+ if (!dev_info)
+ return -ENODEV;
+- dev_sz = dev_info->end - dev_info->start;
++ dev_sz = dev_info->end - dev_info->start + 1;
+ offset = secnum * 512;
+ *kaddr = (void *) dev_info->start + offset;
+ *pfn = __pfn_to_pfn_t(PFN_DOWN(dev_info->start + offset), PFN_DEV);
--- /dev/null
+From a4a81d8eebdc1d209d034f62a082a5131e4242b5 Mon Sep 17 00:00:00 2001
+From: Michael Holzheu <holzheu@linux.vnet.ibm.com>
+Date: Tue, 7 Feb 2017 18:09:14 +0100
+Subject: s390/kdump: Use "LINUX" ELF note name instead of "CORE"
+
+From: Michael Holzheu <holzheu@linux.vnet.ibm.com>
+
+commit a4a81d8eebdc1d209d034f62a082a5131e4242b5 upstream.
+
+In binutils/libbfd (bfd/elf.c) it is enforced that all s390 specific ELF
+notes like e.g. NT_S390_PREFIX or NT_S390_CTRS have "LINUX" specified
+as note name. Otherwise the notes are ignored.
+
+For /proc/vmcore we currently use "CORE" for these notes.
+
+Up to now this has not been a real problem because the dump analysis tool
+"crash" does not check the note name. But it will break all programs that
+use libbfd for processing ELF notes.
+
+So fix this and use "LINUX" for all s390 specific notes to comply with
+libbfd.
+
+Reported-by: Philipp Rudo <prudo@linux.vnet.ibm.com>
+Reviewed-by: Philipp Rudo <prudo@linux.vnet.ibm.com>
+Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
+Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/s390/kernel/crash_dump.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/arch/s390/kernel/crash_dump.c
++++ b/arch/s390/kernel/crash_dump.c
+@@ -329,7 +329,11 @@ static void *nt_init_name(void *buf, Elf
+
+ static inline void *nt_init(void *buf, Elf64_Word type, void *desc, int d_len)
+ {
+- return nt_init_name(buf, type, desc, d_len, KEXEC_CORE_NOTE_NAME);
++ const char *note_name = "LINUX";
++
++ if (type == NT_PRPSINFO || type == NT_PRSTATUS || type == NT_PRFPREG)
++ note_name = KEXEC_CORE_NOTE_NAME;
++ return nt_init_name(buf, type, desc, d_len, note_name);
+ }
+
+ /*
--- /dev/null
+From da8fd820f389a0e29080b14c61bf5cf1d8ef5ca1 Mon Sep 17 00:00:00 2001
+From: Heiko Carstens <heiko.carstens@de.ibm.com>
+Date: Sat, 4 Feb 2017 11:40:36 +0100
+Subject: s390: make setup_randomness work
+
+From: Heiko Carstens <heiko.carstens@de.ibm.com>
+
+commit da8fd820f389a0e29080b14c61bf5cf1d8ef5ca1 upstream.
+
+Commit bcfcbb6bae64 ("s390: add system information as device
+randomness") intended to add some virtual machine specific information
+to the randomness pool.
+
+Unfortunately it uses the page allocator before it is ready to use. In
+result the page allocator always returns NULL and the setup_randomness
+function never adds anything to the randomness pool.
+
+To fix this use memblock_alloc and memblock_free instead.
+
+Fixes: bcfcbb6bae64 ("s390: add system information as device randomness")
+Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
+Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/s390/kernel/setup.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/arch/s390/kernel/setup.c
++++ b/arch/s390/kernel/setup.c
+@@ -819,10 +819,10 @@ static void __init setup_randomness(void
+ {
+ struct sysinfo_3_2_2 *vmms;
+
+- vmms = (struct sysinfo_3_2_2 *) alloc_page(GFP_KERNEL);
+- if (vmms && stsi(vmms, 3, 2, 2) == 0 && vmms->count)
++ vmms = (struct sysinfo_3_2_2 *) memblock_alloc(PAGE_SIZE, PAGE_SIZE);
++ if (stsi(vmms, 3, 2, 2) == 0 && vmms->count)
+ add_device_randomness(&vmms, vmms->count);
+- free_page((unsigned long) vmms);
++ memblock_free((unsigned long) vmms, PAGE_SIZE);
+ }
+
+ /*
--- /dev/null
+From 1e4a382fdc0ba8d1a85b758c0811de3a3631085e Mon Sep 17 00:00:00 2001
+From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+Date: Mon, 21 Nov 2016 13:37:48 +0100
+Subject: s390/qdio: clear DSCI prior to scanning multiple input queues
+
+From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+
+commit 1e4a382fdc0ba8d1a85b758c0811de3a3631085e upstream.
+
+For devices with multiple input queues, tiqdio_call_inq_handlers()
+iterates over all input queues and clears the device's DSCI
+during each iteration. If the DSCI is re-armed during one
+of the later iterations, we therefore do not scan the previous
+queues again.
+The re-arming also raises a new adapter interrupt. But its
+handler does not trigger a rescan for the device, as the DSCI
+has already been erroneously cleared.
+This can result in queue stalls on devices with multiple
+input queues.
+
+Fix it by clearing the DSCI just once, prior to scanning the queues.
+
+As the code is moved in front of the loop, we also need to access
+the DSCI directly (ie irq->dsci) instead of going via each queue's
+parent pointer to the same irq. This is not a functional change,
+and a follow-up patch will clean up the other users.
+
+In practice, this bug only affects CQ-enabled HiperSockets devices,
+ie. devices with sysfs-attribute "hsuid" set. Setting a hsuid is
+needed for AF_IUCV socket applications that use HiperSockets
+communication.
+
+Fixes: 104ea556ee7f ("qdio: support asynchronous delivery of storage blocks")
+Reviewed-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
+Signed-off-by: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/s390/cio/qdio_thinint.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/drivers/s390/cio/qdio_thinint.c
++++ b/drivers/s390/cio/qdio_thinint.c
+@@ -147,11 +147,11 @@ static inline void tiqdio_call_inq_handl
+ struct qdio_q *q;
+ int i;
+
+- for_each_input_queue(irq, q, i) {
+- if (!references_shared_dsci(irq) &&
+- has_multiple_inq_on_dsci(irq))
+- xchg(q->irq_ptr->dsci, 0);
++ if (!references_shared_dsci(irq) &&
++ has_multiple_inq_on_dsci(irq))
++ xchg(irq->dsci, 0);
+
++ for_each_input_queue(irq, q, i) {
+ if (q->u.in.queue_start_poll) {
+ /* skip if polling is enabled or already in work */
+ if (test_and_set_bit(QDIO_QUEUE_IRQS_DISABLED,
--- /dev/null
+From fb94a687d96c570d46332a4a890f1dcb7310e643 Mon Sep 17 00:00:00 2001
+From: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Date: Fri, 24 Feb 2017 07:43:51 +0100
+Subject: s390: TASK_SIZE for kernel threads
+
+From: Martin Schwidefsky <schwidefsky@de.ibm.com>
+
+commit fb94a687d96c570d46332a4a890f1dcb7310e643 upstream.
+
+Return a sensible value if TASK_SIZE if called from a kernel thread.
+
+This gets us around an issue with copy_mount_options that does a magic
+size calculation "TASK_SIZE - (unsigned long)data" while in a kernel
+thread and data pointing to kernel space.
+
+Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/s390/include/asm/processor.h | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/arch/s390/include/asm/processor.h
++++ b/arch/s390/include/asm/processor.h
+@@ -89,7 +89,8 @@ extern void execve_tail(void);
+ * User space process size: 2GB for 31 bit, 4TB or 8PT for 64 bit.
+ */
+
+-#define TASK_SIZE_OF(tsk) ((tsk)->mm->context.asce_limit)
++#define TASK_SIZE_OF(tsk) ((tsk)->mm ? \
++ (tsk)->mm->context.asce_limit : TASK_MAX_SIZE)
+ #define TASK_UNMAPPED_BASE (test_thread_flag(TIF_31BIT) ? \
+ (1UL << 30) : (1UL << 41))
+ #define TASK_SIZE TASK_SIZE_OF(current)
--- /dev/null
+From 4920e3cf77347d7d7373552d4839e8d832321313 Mon Sep 17 00:00:00 2001
+From: Heiko Carstens <heiko.carstens@de.ibm.com>
+Date: Sun, 5 Feb 2017 23:03:18 +0100
+Subject: s390: use correct input data address for setup_randomness
+
+From: Heiko Carstens <heiko.carstens@de.ibm.com>
+
+commit 4920e3cf77347d7d7373552d4839e8d832321313 upstream.
+
+The current implementation of setup_randomness uses the stack address
+and therefore the pointer to the SYSIB 3.2.2 block as input data
+address. Furthermore the length of the input data is the number of
+virtual-machine description blocks which is typically one.
+
+This means that typically a single zero byte is fed to
+add_device_randomness.
+
+Fix both of these and use the address of the first virtual machine
+description block as input data address and also use the correct
+length.
+
+Fixes: bcfcbb6bae64 ("s390: add system information as device randomness")
+Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
+Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/s390/kernel/setup.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/s390/kernel/setup.c
++++ b/arch/s390/kernel/setup.c
+@@ -821,7 +821,7 @@ static void __init setup_randomness(void
+
+ vmms = (struct sysinfo_3_2_2 *) memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+ if (stsi(vmms, 3, 2, 2) == 0 && vmms->count)
+- add_device_randomness(&vmms, vmms->count);
++ add_device_randomness(&vmms->vm, sizeof(vmms->vm[0]) * vmms->count);
+ memblock_free((unsigned long) vmms, PAGE_SIZE);
+ }
+
--- /dev/null
+From 1c9c858e2ff8ae8024a3d75d2ed080063af43754 Mon Sep 17 00:00:00 2001
+From: Ian Abbott <abbotti@mev.co.uk>
+Date: Fri, 3 Feb 2017 20:25:00 +0000
+Subject: serial: 8250_pci: Add MKS Tenta SCOM-0800 and SCOM-0801 cards
+
+From: Ian Abbott <abbotti@mev.co.uk>
+
+commit 1c9c858e2ff8ae8024a3d75d2ed080063af43754 upstream.
+
+The MKS Instruments SCOM-0800 and SCOM-0801 cards (originally by Tenta
+Technologies) are 3U CompactPCI serial cards with 4 and 8 serial ports,
+respectively. The first 4 ports are implemented by an OX16PCI954 chip,
+and the second 4 ports are implemented by an OX16C954 chip on a local
+bus, bridged by the second PCI function of the OX16PCI954. The ports
+are jumper-selectable as RS-232 and RS-422/485, and the UARTs use a
+non-standard oscillator frequency of 20 MHz (base_baud = 1250000).
+
+Signed-off-by: Ian Abbott <abbotti@mev.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/tty/serial/8250/8250_pci.c | 13 +++++++++++++
+ 1 file changed, 13 insertions(+)
+
+--- a/drivers/tty/serial/8250/8250_pci.c
++++ b/drivers/tty/serial/8250/8250_pci.c
+@@ -2688,6 +2688,8 @@ enum pci_board_num_t {
+ pbn_b0_4_1152000_200,
+ pbn_b0_8_1152000_200,
+
++ pbn_b0_4_1250000,
++
+ pbn_b0_2_1843200,
+ pbn_b0_4_1843200,
+
+@@ -2919,6 +2921,13 @@ static struct pciserial_board pci_boards
+ .uart_offset = 0x200,
+ },
+
++ [pbn_b0_4_1250000] = {
++ .flags = FL_BASE0,
++ .num_ports = 4,
++ .base_baud = 1250000,
++ .uart_offset = 8,
++ },
++
+ [pbn_b0_2_1843200] = {
+ .flags = FL_BASE0,
+ .num_ports = 2,
+@@ -5549,6 +5558,10 @@ static struct pci_device_id serial_pci_t
+ { PCI_DEVICE(0x1c29, 0x1108), .driver_data = pbn_fintek_8 },
+ { PCI_DEVICE(0x1c29, 0x1112), .driver_data = pbn_fintek_12 },
+
++ /* MKS Tenta SCOM-080x serial cards */
++ { PCI_DEVICE(0x1601, 0x0800), .driver_data = pbn_b0_4_1250000 },
++ { PCI_DEVICE(0x1601, 0xa801), .driver_data = pbn_b0_4_1250000 },
++
+ /*
+ * These entries match devices with class COMMUNICATION_SERIAL,
+ * COMMUNICATION_MODEM or COMMUNICATION_MULTISERIAL
tty-n_hdlc-get-rid-of-racy-n_hdlc.tbuf.patch
+serial-8250_pci-add-mks-tenta-scom-0800-and-scom-0801-cards.patch
+kvm-s390-disable-dirty-log-retrieval-for-ucontrol-guests.patch
+kvm-vmx-use-correct-vmcs_read-write-for-guest-segment-selector-base.patch
+bluetooth-add-another-ar3012-04ca-3018-device.patch
+s390-qdio-clear-dsci-prior-to-scanning-multiple-input-queues.patch
+s390-dcssblk-fix-device-size-calculation-in-dcssblk_direct_access.patch
+s390-kdump-use-linux-elf-note-name-instead-of-core.patch
+s390-chsc-add-exception-handler-for-chsc-instruction.patch
+s390-task_size-for-kernel-threads.patch
+s390-make-setup_randomness-work.patch
+s390-use-correct-input-data-address-for-setup_randomness.patch
+net-mvpp2-fix-dma-address-calculation-in-mvpp2_txq_inc_put.patch
+cxl-prevent-read-write-to-afu-config-space-while-afu-not-configured.patch
+cxl-fix-nested-locking-hang-during-eeh-hotplug.patch
+brcmfmac-fix-incorrect-event-channel-deduction.patch
+mnt-tuck-mounts-under-others-instead-of-creating-shadow-side-mounts.patch
+ib-ipoib-fix-deadlock-between-rmmod-and-set_mode.patch
+ib-ipoib-add-destination-address-when-re-queue-packet.patch
+ib-mlx5-fix-out-of-bound-access.patch
+ib-srp-avoid-using-ib_mr_type_sg_gaps.patch
+ib-srp-avoid-that-duplicate-responses-trigger-a-kernel-bug.patch
+ib-srp-fix-race-conditions-related-to-task-management.patch
+fs-better-permission-checking-for-submounts.patch
+orangefs-use-rcu-for-destroy_inode.patch
+memory-atmel-ebi-fix-ns-cycles-conversions.patch
+ktest-fix-child-exit-code-processing.patch
+ceph-remove-req-from-unsafe-list-when-unregistering-it.patch
+pci-hotplug-pnv-php-remove-warn_on-in-pnv_php_put_slot.patch
+pci-hotplug-pnv-php-disable-surprise-hotplug-capability-on-conflicts.patch
+target-fix-null-dereference-during-lun-lookup-active-i-o-shutdown.patch
+drivers-pci-hotplug-handle-presence-detection-change-properly.patch
+drivers-pci-hotplug-fix-initial-state-for-empty-slot.patch
+nlm-ensure-callback-code-also-checks-that-the-files-match.patch
+pwm-pca9685-fix-period-change-with-same-duty-cycle.patch
+xtensa-move-parse_tag_fdt-out-of-ifdef-config_blk_dev_initrd.patch
+nfit-libnvdimm-fix-interleave-set-cookie-calculation.patch
+mac80211-flush-delayed-work-when-entering-suspend.patch
+mac80211-don-t-reorder-frames-with-sn-smaller-than-ssn.patch
+mac80211-don-t-handle-filtered-frames-within-a-ba-session.patch
+mac80211-use-driver-indicated-transmitter-sta-only-for-data-frames.patch
--- /dev/null
+From bd4e2d2907fa23a11d46217064ecf80470ddae10 Mon Sep 17 00:00:00 2001
+From: Nicholas Bellinger <nab@linux-iscsi.org>
+Date: Wed, 22 Feb 2017 22:06:32 -0800
+Subject: target: Fix NULL dereference during LUN lookup + active I/O shutdown
+
+From: Nicholas Bellinger <nab@linux-iscsi.org>
+
+commit bd4e2d2907fa23a11d46217064ecf80470ddae10 upstream.
+
+When transport_clear_lun_ref() is shutting down a se_lun via
+configfs with new I/O in-flight, it's possible to trigger a
+NULL pointer dereference in transport_lookup_cmd_lun() due
+to the fact percpu_ref_get() doesn't do any __PERCPU_REF_DEAD
+checking before incrementing lun->lun_ref.count after
+lun->lun_ref has switched to atomic_t mode.
+
+This results in a NULL pointer dereference as LUN shutdown
+code in core_tpg_remove_lun() continues running after the
+existing ->release() -> core_tpg_lun_ref_release() callback
+completes, and clears the RCU protected se_lun->lun_se_dev
+pointer.
+
+During the OOPs, the state of lun->lun_ref in the process
+which triggered the NULL pointer dereference looks like
+the following on v4.1.y stable code:
+
+struct se_lun {
+ lun_link_magic = 4294932337,
+ lun_status = TRANSPORT_LUN_STATUS_FREE,
+
+ .....
+
+ lun_se_dev = 0x0,
+ lun_sep = 0x0,
+
+ .....
+
+ lun_ref = {
+ count = {
+ counter = 1
+ },
+ percpu_count_ptr = 3,
+ release = 0xffffffffa02fa1e0 <core_tpg_lun_ref_release>,
+ confirm_switch = 0x0,
+ force_atomic = false,
+ rcu = {
+ next = 0xffff88154fa1a5d0,
+ func = 0xffffffff8137c4c0 <percpu_ref_switch_to_atomic_rcu>
+ }
+ }
+}
+
+To address this bug, use percpu_ref_tryget_live() to ensure
+once __PERCPU_REF_DEAD is visable on all CPUs and ->lun_ref
+has switched to atomic_t, all new I/Os will fail to obtain
+a new lun->lun_ref reference.
+
+Also use an explicit percpu_ref_kill_and_confirm() callback
+to block on ->lun_ref_comp to allow the first stage and
+associated RCU grace period to complete, and then block on
+->lun_ref_shutdown waiting for the final percpu_ref_put()
+to drop the last reference via transport_lun_remove_cmd()
+before continuing with core_tpg_remove_lun() shutdown.
+
+Reported-by: Rob Millner <rlm@daterainc.com>
+Tested-by: Rob Millner <rlm@daterainc.com>
+Cc: Rob Millner <rlm@daterainc.com>
+Tested-by: Vaibhav Tandon <vst@datera.io>
+Cc: Vaibhav Tandon <vst@datera.io>
+Tested-by: Bryant G. Ly <bryantly@linux.vnet.ibm.com>
+Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/target/target_core_device.c | 10 ++++++++--
+ drivers/target/target_core_tpg.c | 3 ++-
+ drivers/target/target_core_transport.c | 31 ++++++++++++++++++++++++++++++-
+ include/target/target_core_base.h | 1 +
+ 4 files changed, 41 insertions(+), 4 deletions(-)
+
+--- a/drivers/target/target_core_device.c
++++ b/drivers/target/target_core_device.c
+@@ -77,12 +77,16 @@ transport_lookup_cmd_lun(struct se_cmd *
+ &deve->read_bytes);
+
+ se_lun = rcu_dereference(deve->se_lun);
++
++ if (!percpu_ref_tryget_live(&se_lun->lun_ref)) {
++ se_lun = NULL;
++ goto out_unlock;
++ }
++
+ se_cmd->se_lun = rcu_dereference(deve->se_lun);
+ se_cmd->pr_res_key = deve->pr_res_key;
+ se_cmd->orig_fe_lun = unpacked_lun;
+ se_cmd->se_cmd_flags |= SCF_SE_LUN_CMD;
+-
+- percpu_ref_get(&se_lun->lun_ref);
+ se_cmd->lun_ref_active = true;
+
+ if ((se_cmd->data_direction == DMA_TO_DEVICE) &&
+@@ -96,6 +100,7 @@ transport_lookup_cmd_lun(struct se_cmd *
+ goto ref_dev;
+ }
+ }
++out_unlock:
+ rcu_read_unlock();
+
+ if (!se_lun) {
+@@ -815,6 +820,7 @@ struct se_device *target_alloc_device(st
+ xcopy_lun = &dev->xcopy_lun;
+ rcu_assign_pointer(xcopy_lun->lun_se_dev, dev);
+ init_completion(&xcopy_lun->lun_ref_comp);
++ init_completion(&xcopy_lun->lun_shutdown_comp);
+ INIT_LIST_HEAD(&xcopy_lun->lun_deve_list);
+ INIT_LIST_HEAD(&xcopy_lun->lun_dev_link);
+ mutex_init(&xcopy_lun->lun_tg_pt_md_mutex);
+--- a/drivers/target/target_core_tpg.c
++++ b/drivers/target/target_core_tpg.c
+@@ -445,7 +445,7 @@ static void core_tpg_lun_ref_release(str
+ {
+ struct se_lun *lun = container_of(ref, struct se_lun, lun_ref);
+
+- complete(&lun->lun_ref_comp);
++ complete(&lun->lun_shutdown_comp);
+ }
+
+ int core_tpg_register(
+@@ -571,6 +571,7 @@ struct se_lun *core_tpg_alloc_lun(
+ lun->lun_link_magic = SE_LUN_LINK_MAGIC;
+ atomic_set(&lun->lun_acl_count, 0);
+ init_completion(&lun->lun_ref_comp);
++ init_completion(&lun->lun_shutdown_comp);
+ INIT_LIST_HEAD(&lun->lun_deve_list);
+ INIT_LIST_HEAD(&lun->lun_dev_link);
+ atomic_set(&lun->lun_tg_pt_secondary_offline, 0);
+--- a/drivers/target/target_core_transport.c
++++ b/drivers/target/target_core_transport.c
+@@ -2702,10 +2702,39 @@ void target_wait_for_sess_cmds(struct se
+ }
+ EXPORT_SYMBOL(target_wait_for_sess_cmds);
+
++static void target_lun_confirm(struct percpu_ref *ref)
++{
++ struct se_lun *lun = container_of(ref, struct se_lun, lun_ref);
++
++ complete(&lun->lun_ref_comp);
++}
++
+ void transport_clear_lun_ref(struct se_lun *lun)
+ {
+- percpu_ref_kill(&lun->lun_ref);
++ /*
++ * Mark the percpu-ref as DEAD, switch to atomic_t mode, drop
++ * the initial reference and schedule confirm kill to be
++ * executed after one full RCU grace period has completed.
++ */
++ percpu_ref_kill_and_confirm(&lun->lun_ref, target_lun_confirm);
++ /*
++ * The first completion waits for percpu_ref_switch_to_atomic_rcu()
++ * to call target_lun_confirm after lun->lun_ref has been marked
++ * as __PERCPU_REF_DEAD on all CPUs, and switches to atomic_t
++ * mode so that percpu_ref_tryget_live() lookup of lun->lun_ref
++ * fails for all new incoming I/O.
++ */
+ wait_for_completion(&lun->lun_ref_comp);
++ /*
++ * The second completion waits for percpu_ref_put_many() to
++ * invoke ->release() after lun->lun_ref has switched to
++ * atomic_t mode, and lun->lun_ref.count has reached zero.
++ *
++ * At this point all target-core lun->lun_ref references have
++ * been dropped via transport_lun_remove_cmd(), and it's safe
++ * to proceed with the remaining LUN shutdown.
++ */
++ wait_for_completion(&lun->lun_shutdown_comp);
+ }
+
+ static bool
+--- a/include/target/target_core_base.h
++++ b/include/target/target_core_base.h
+@@ -732,6 +732,7 @@ struct se_lun {
+ struct config_group lun_group;
+ struct se_port_stat_grps port_stat_grps;
+ struct completion lun_ref_comp;
++ struct completion lun_shutdown_comp;
+ struct percpu_ref lun_ref;
+ struct list_head lun_dev_link;
+ struct hlist_node link;
--- /dev/null
+From 4ab18701c66552944188dbcd0ce0012729baab84 Mon Sep 17 00:00:00 2001
+From: Max Filippov <jcmvbkbc@gmail.com>
+Date: Tue, 3 Jan 2017 09:37:34 -0800
+Subject: xtensa: move parse_tag_fdt out of #ifdef CONFIG_BLK_DEV_INITRD
+
+From: Max Filippov <jcmvbkbc@gmail.com>
+
+commit 4ab18701c66552944188dbcd0ce0012729baab84 upstream.
+
+FDT tag parsing is not related to whether BLK_DEV_INITRD is configured
+or not, move it out of the corresponding #ifdef/#endif block.
+This fixes passing external FDT to the kernel configured w/o
+BLK_DEV_INITRD support.
+
+Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/xtensa/kernel/setup.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/xtensa/kernel/setup.c
++++ b/arch/xtensa/kernel/setup.c
+@@ -133,6 +133,8 @@ static int __init parse_tag_initrd(const
+
+ __tagtable(BP_TAG_INITRD, parse_tag_initrd);
+
++#endif /* CONFIG_BLK_DEV_INITRD */
++
+ #ifdef CONFIG_OF
+
+ static int __init parse_tag_fdt(const bp_tag_t *tag)
+@@ -145,8 +147,6 @@ __tagtable(BP_TAG_FDT, parse_tag_fdt);
+
+ #endif /* CONFIG_OF */
+
+-#endif /* CONFIG_BLK_DEV_INITRD */
+-
+ static int __init parse_tag_cmdline(const bp_tag_t* tag)
+ {
+ strlcpy(command_line, (char *)(tag->data), COMMAND_LINE_SIZE);