From 5d44b2020722da73e9b6b446053f6bccfef9996b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 3 Jul 2017 13:15:58 +0200 Subject: [PATCH] 4.4-stable patches added patches: amd-xgbe-check-xgbe_init-return-code.patch drm-amdgpu-check-ring-being-ready-before-using.patch net-dsa-check-return-value-of-phy_connect_direct.patch net-sctp-fix-array-overrun-read-on-sctp_timer_tbl.patch platform-x86-ideapad-laptop-handle-acpi-event-1.patch scsi-virtio_scsi-reject-commands-when-virtqueue-is-broken.patch swiotlb-xen-update-dev_addr-after-swapping-pages.patch virtio_console-fix-a-crash-in-config_work_handler.patch xen-netfront-fix-rx-stall-during-network-stress-and-oom.patch --- ...amd-xgbe-check-xgbe_init-return-code.patch | 51 +++++++++ ...-check-ring-being-ready-before-using.patch | 46 ++++++++ ...k-return-value-of-phy_connect_direct.patch | 53 +++++++++ ...array-overrun-read-on-sctp_timer_tbl.patch | 36 ++++++ ...6-ideapad-laptop-handle-acpi-event-1.patch | 40 +++++++ ...ct-commands-when-virtqueue-is-broken.patch | 105 ++++++++++++++++++ queue-4.4/series | 9 ++ ...update-dev_addr-after-swapping-pages.patch | 65 +++++++++++ ...e-fix-a-crash-in-config_work_handler.patch | 41 +++++++ ...-stall-during-network-stress-and-oom.patch | 44 ++++++++ 10 files changed, 490 insertions(+) create mode 100644 queue-4.4/amd-xgbe-check-xgbe_init-return-code.patch create mode 100644 queue-4.4/drm-amdgpu-check-ring-being-ready-before-using.patch create mode 100644 queue-4.4/net-dsa-check-return-value-of-phy_connect_direct.patch create mode 100644 queue-4.4/net-sctp-fix-array-overrun-read-on-sctp_timer_tbl.patch create mode 100644 queue-4.4/platform-x86-ideapad-laptop-handle-acpi-event-1.patch create mode 100644 queue-4.4/scsi-virtio_scsi-reject-commands-when-virtqueue-is-broken.patch create mode 100644 queue-4.4/swiotlb-xen-update-dev_addr-after-swapping-pages.patch create mode 100644 queue-4.4/virtio_console-fix-a-crash-in-config_work_handler.patch create mode 100644 queue-4.4/xen-netfront-fix-rx-stall-during-network-stress-and-oom.patch diff --git a/queue-4.4/amd-xgbe-check-xgbe_init-return-code.patch b/queue-4.4/amd-xgbe-check-xgbe_init-return-code.patch new file mode 100644 index 00000000000..01330bdd1e7 --- /dev/null +++ b/queue-4.4/amd-xgbe-check-xgbe_init-return-code.patch @@ -0,0 +1,51 @@ +From foo@baz Mon Jul 3 13:09:11 CEST 2017 +From: "Lendacky, Thomas" +Date: Fri, 20 Jan 2017 12:14:13 -0600 +Subject: amd-xgbe: Check xgbe_init() return code + +From: "Lendacky, Thomas" + + +[ Upstream commit 738f7f647371ff4cfc9646c99dba5b58ad142db3 ] + +The xgbe_init() routine returns a return code indicating success or +failure, but the return code is not checked. Add code to xgbe_init() +to issue a message when failures are seen and add code to check the +xgbe_init() return code. + +Signed-off-by: Tom Lendacky +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/amd/xgbe/xgbe-dev.c | 4 +++- + drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 4 +++- + 2 files changed, 6 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c ++++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c +@@ -2732,8 +2732,10 @@ static int xgbe_init(struct xgbe_prv_dat + + /* Flush Tx queues */ + ret = xgbe_flush_tx_queues(pdata); +- if (ret) ++ if (ret) { ++ netdev_err(pdata->netdev, "error flushing TX queues\n"); + return ret; ++ } + + /* + * Initialize DMA related features +--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c ++++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +@@ -877,7 +877,9 @@ static int xgbe_start(struct xgbe_prv_da + + DBGPR("-->xgbe_start\n"); + +- hw_if->init(pdata); ++ ret = hw_if->init(pdata); ++ if (ret) ++ return ret; + + ret = phy_if->phy_start(pdata); + if (ret) diff --git a/queue-4.4/drm-amdgpu-check-ring-being-ready-before-using.patch b/queue-4.4/drm-amdgpu-check-ring-being-ready-before-using.patch new file mode 100644 index 00000000000..fa6d3b213e6 --- /dev/null +++ b/queue-4.4/drm-amdgpu-check-ring-being-ready-before-using.patch @@ -0,0 +1,46 @@ +From foo@baz Mon Jul 3 13:09:11 CEST 2017 +From: Ding Pixel +Date: Wed, 18 Jan 2017 17:26:38 +0800 +Subject: drm/amdgpu: check ring being ready before using + +From: Ding Pixel + + +[ Upstream commit c5f21c9f878b8dcd54d0b9739c025ca73cb4c091 ] + +Return success when the ring is properly initialized, otherwise return +failure. + +Tonga SRIOV VF doesn't have UVD and VCE engines, the initialization of +these IPs is bypassed. The system crashes if application submit IB to +their rings which are not ready to use. It could be a common issue if +IP having ring buffer is disabled for some reason on specific ASIC, so +it should check the ring being ready to use. + +Bug: amdgpu_test crashes system on Tonga VF. + +Signed-off-by: Ding Pixel +Reviewed-by: Christian König +Signed-off-by: Alex Deucher +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +@@ -124,6 +124,13 @@ int amdgpu_cs_get_ring(struct amdgpu_dev + } + break; + } ++ ++ if (!(*out_ring && (*out_ring)->adev)) { ++ DRM_ERROR("Ring %d is not initialized on IP %d\n", ++ ring, ip_type); ++ return -EINVAL; ++ } ++ + return 0; + } + diff --git a/queue-4.4/net-dsa-check-return-value-of-phy_connect_direct.patch b/queue-4.4/net-dsa-check-return-value-of-phy_connect_direct.patch new file mode 100644 index 00000000000..4fbd4c1f4c7 --- /dev/null +++ b/queue-4.4/net-dsa-check-return-value-of-phy_connect_direct.patch @@ -0,0 +1,53 @@ +From foo@baz Mon Jul 3 13:09:11 CEST 2017 +From: Florian Fainelli +Date: Fri, 20 Jan 2017 16:05:05 -0800 +Subject: net: dsa: Check return value of phy_connect_direct() + +From: Florian Fainelli + + +[ Upstream commit 4078b76cac68e50ccf1f76a74e7d3d5788aec3fe ] + +We need to check the return value of phy_connect_direct() in +dsa_slave_phy_connect() otherwise we may be continuing the +initialization of a slave network device with a PHY that already +attached somewhere else and which will soon be in error because the PHY +device is in error. + +The conditions for such an error to occur are that we have a port of our +switch that is not disabled, and has the same port number as a PHY +address (say both 5) that can be probed using the DSA slave MII bus. We +end-up having this slave network device find a PHY at the same address +as our port number, and we try to attach to it. + +A slave network (e.g: port 0) has already attached to our PHY device, +and we try to re-attach it with a different network device, but since we +ignore the error we would end-up initializating incorrect device +references by the time the slave network interface is opened. + +The code has been (re)organized several times, making it hard to provide +an exact Fixes tag, this is a bugfix nonetheless. + +Signed-off-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + net/dsa/slave.c | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +--- a/net/dsa/slave.c ++++ b/net/dsa/slave.c +@@ -1006,10 +1006,8 @@ static int dsa_slave_phy_connect(struct + /* Use already configured phy mode */ + if (p->phy_interface == PHY_INTERFACE_MODE_NA) + p->phy_interface = p->phy->interface; +- phy_connect_direct(slave_dev, p->phy, dsa_slave_adjust_link, +- p->phy_interface); +- +- return 0; ++ return phy_connect_direct(slave_dev, p->phy, dsa_slave_adjust_link, ++ p->phy_interface); + } + + static int dsa_slave_phy_setup(struct dsa_slave_priv *p, diff --git a/queue-4.4/net-sctp-fix-array-overrun-read-on-sctp_timer_tbl.patch b/queue-4.4/net-sctp-fix-array-overrun-read-on-sctp_timer_tbl.patch new file mode 100644 index 00000000000..98e138cdc6c --- /dev/null +++ b/queue-4.4/net-sctp-fix-array-overrun-read-on-sctp_timer_tbl.patch @@ -0,0 +1,36 @@ +From foo@baz Mon Jul 3 13:09:11 CEST 2017 +From: Colin Ian King +Date: Fri, 20 Jan 2017 13:01:57 +0000 +Subject: net: sctp: fix array overrun read on sctp_timer_tbl + +From: Colin Ian King + + +[ Upstream commit 0e73fc9a56f22f2eec4d2b2910c649f7af67b74d ] + +The comparison on the timeout can lead to an array overrun +read on sctp_timer_tbl because of an off-by-one error. Fix +this by using < instead of <= and also compare to the array +size rather than SCTP_EVENT_TIMEOUT_MAX. + +Fixes CoverityScan CID#1397639 ("Out-of-bounds read") + +Signed-off-by: Colin Ian King +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/debug.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/sctp/debug.c ++++ b/net/sctp/debug.c +@@ -166,7 +166,7 @@ static const char *const sctp_timer_tbl[ + /* Lookup timer debug name. */ + const char *sctp_tname(const sctp_subtype_t id) + { +- if (id.timeout <= SCTP_EVENT_TIMEOUT_MAX) ++ if (id.timeout < ARRAY_SIZE(sctp_timer_tbl)) + return sctp_timer_tbl[id.timeout]; + return "unknown_timer"; + } diff --git a/queue-4.4/platform-x86-ideapad-laptop-handle-acpi-event-1.patch b/queue-4.4/platform-x86-ideapad-laptop-handle-acpi-event-1.patch new file mode 100644 index 00000000000..ec3c1d8a4d4 --- /dev/null +++ b/queue-4.4/platform-x86-ideapad-laptop-handle-acpi-event-1.patch @@ -0,0 +1,40 @@ +From foo@baz Mon Jul 3 13:09:11 CEST 2017 +From: Zach Ploskey +Date: Sun, 22 Jan 2017 00:47:19 -0800 +Subject: platform/x86: ideapad-laptop: handle ACPI event 1 + +From: Zach Ploskey + + +[ Upstream commit cfee5d63767b2e7997c1f36420d008abbe61565c ] + +On Ideapad laptops, ACPI event 1 is currently not handled. Many models +log "ideapad_laptop: Unknown event: 1" every 20 seconds or so while +running on battery power. Some convertible laptops receive this event +when switching in and out of tablet mode. + +This adds and additional case for event 1 in ideapad_acpi_notify to call +ideapad_input_report(priv, vpc_bit), so that the event is reported to +userspace and we avoid unnecessary logging. + +Fixes bug #107481 (https://bugzilla.kernel.org/show_bug.cgi?id=107481) +Fixes bug #65751 (https://bugzilla.kernel.org/show_bug.cgi?id=65751) + +Signed-off-by: Zach Ploskey +Signed-off-by: Andy Shevchenko +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/platform/x86/ideapad-laptop.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/platform/x86/ideapad-laptop.c ++++ b/drivers/platform/x86/ideapad-laptop.c +@@ -807,6 +807,7 @@ static void ideapad_acpi_notify(acpi_han + case 11: + case 7: + case 6: ++ case 1: + ideapad_input_report(priv, vpc_bit); + break; + case 5: diff --git a/queue-4.4/scsi-virtio_scsi-reject-commands-when-virtqueue-is-broken.patch b/queue-4.4/scsi-virtio_scsi-reject-commands-when-virtqueue-is-broken.patch new file mode 100644 index 00000000000..d08452bf467 --- /dev/null +++ b/queue-4.4/scsi-virtio_scsi-reject-commands-when-virtqueue-is-broken.patch @@ -0,0 +1,105 @@ +From foo@baz Mon Jul 3 13:09:11 CEST 2017 +From: Eric Farman +Date: Fri, 13 Jan 2017 12:48:06 -0500 +Subject: scsi: virtio_scsi: Reject commands when virtqueue is broken + +From: Eric Farman + + +[ Upstream commit 773c7220e22d193e5667c352fcbf8d47eefc817f ] + +In the case of a graceful set of detaches, where the virtio-scsi-ccw +disk is removed from the guest prior to the controller, the guest +behaves quite normally. Specifically, the detach gets us into +sd_sync_cache to issue a Synchronize Cache(10) command, which +immediately fails (and is retried a couple of times) because the device +has been removed. Later, the removal of the controller sees two CRWs +presented, but there's no further indication of the removal from the +guest viewpoint. + + [ 17.217458] sd 0:0:0:0: [sda] Synchronizing SCSI cache + [ 17.219257] sd 0:0:0:0: [sda] Synchronize Cache(10) failed: Result: hostbyte=DID_BAD_TARGET driverbyte=DRIVER_OK + [ 21.449400] crw_info : CRW reports slct=0, oflw=0, chn=1, rsc=3, anc=0, erc=4, rsid=2 + [ 21.449406] crw_info : CRW reports slct=0, oflw=0, chn=0, rsc=3, anc=0, erc=4, rsid=0 + +However, on s390, the SCSI disks can be removed "by surprise" when an +entire controller (host) is removed and all associated disks are removed +via the loop in scsi_forget_host. The same call to sd_sync_cache is +made, but because the controller has already been removed, the +Synchronize Cache(10) command is neither issued (and then failed) nor +rejected. + +That the I/O isn't returned means the guest cannot have other devices +added nor removed, and other tasks (such as shutdown or reboot) issued +by the guest will not complete either. The virtio ring has already been +marked as broken (via virtio_break_device in virtio_ccw_remove), but we +still attempt to queue the command only to have it remain there. The +calling sequence provides a bit of distinction for us: + + virtscsi_queuecommand() + -> virtscsi_kick_cmd() + -> virtscsi_add_cmd() + -> virtqueue_add_sgs() + -> virtqueue_add() + if success + return 0 + elseif vq->broken or vring_mapping_error() + return -EIO + else + return -ENOSPC + +A return of ENOSPC is generally a temporary condition, so returning +"host busy" from virtscsi_queuecommand makes sense here, to have it +redriven in a moment or two. But the EIO return code is more of a +permanent error and so it would be wise to return the I/O itself and +allow the calling thread to finish gracefully. The result is these four +kernel messages in the guest (the fourth one does not occur prior to +this patch): + + [ 22.921562] crw_info : CRW reports slct=0, oflw=0, chn=1, rsc=3, anc=0, erc=4, rsid=2 + [ 22.921580] crw_info : CRW reports slct=0, oflw=0, chn=0, rsc=3, anc=0, erc=4, rsid=0 + [ 22.921978] sd 0:0:0:0: [sda] Synchronizing SCSI cache + [ 22.921993] sd 0:0:0:0: [sda] Synchronize Cache(10) failed: Result: hostbyte=DID_BAD_TARGET driverbyte=DRIVER_OK + +I opted to fill in the same response data that is returned from the more +graceful device detach, where the disk device is removed prior to the +controller device. + +Signed-off-by: Eric Farman +Reviewed-by: Fam Zheng +Signed-off-by: Martin K. Petersen +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/scsi/virtio_scsi.c | 11 ++++++++++- + 1 file changed, 10 insertions(+), 1 deletion(-) + +--- a/drivers/scsi/virtio_scsi.c ++++ b/drivers/scsi/virtio_scsi.c +@@ -533,7 +533,9 @@ static int virtscsi_queuecommand(struct + { + struct Scsi_Host *shost = virtio_scsi_host(vscsi->vdev); + struct virtio_scsi_cmd *cmd = scsi_cmd_priv(sc); ++ unsigned long flags; + int req_size; ++ int ret; + + BUG_ON(scsi_sg_count(sc) > shost->sg_tablesize); + +@@ -561,8 +563,15 @@ static int virtscsi_queuecommand(struct + req_size = sizeof(cmd->req.cmd); + } + +- if (virtscsi_kick_cmd(req_vq, cmd, req_size, sizeof(cmd->resp.cmd)) != 0) ++ ret = virtscsi_kick_cmd(req_vq, cmd, req_size, sizeof(cmd->resp.cmd)); ++ if (ret == -EIO) { ++ cmd->resp.cmd.response = VIRTIO_SCSI_S_BAD_TARGET; ++ spin_lock_irqsave(&req_vq->vq_lock, flags); ++ virtscsi_complete_cmd(vscsi, cmd); ++ spin_unlock_irqrestore(&req_vq->vq_lock, flags); ++ } else if (ret != 0) { + return SCSI_MLQUEUE_HOST_BUSY; ++ } + return 0; + } + diff --git a/queue-4.4/series b/queue-4.4/series index 5b727abec04..44a3abce080 100644 --- a/queue-4.4/series +++ b/queue-4.4/series @@ -52,3 +52,12 @@ net-bgmac-remove-superflous-netif_carrier_on.patch powerpc-eeh-enable-io-path-on-permanent-error.patch gianfar-do-not-reuse-pages-from-emergency-reserve.patch btrfs-fix-truncate-down-when-no_holes-feature-is-enabled.patch +virtio_console-fix-a-crash-in-config_work_handler.patch +swiotlb-xen-update-dev_addr-after-swapping-pages.patch +net-sctp-fix-array-overrun-read-on-sctp_timer_tbl.patch +xen-netfront-fix-rx-stall-during-network-stress-and-oom.patch +scsi-virtio_scsi-reject-commands-when-virtqueue-is-broken.patch +platform-x86-ideapad-laptop-handle-acpi-event-1.patch +amd-xgbe-check-xgbe_init-return-code.patch +net-dsa-check-return-value-of-phy_connect_direct.patch +drm-amdgpu-check-ring-being-ready-before-using.patch diff --git a/queue-4.4/swiotlb-xen-update-dev_addr-after-swapping-pages.patch b/queue-4.4/swiotlb-xen-update-dev_addr-after-swapping-pages.patch new file mode 100644 index 00000000000..477c8bffbd8 --- /dev/null +++ b/queue-4.4/swiotlb-xen-update-dev_addr-after-swapping-pages.patch @@ -0,0 +1,65 @@ +From foo@baz Mon Jul 3 13:09:11 CEST 2017 +From: Stefano Stabellini +Date: Thu, 19 Jan 2017 10:39:09 -0800 +Subject: swiotlb-xen: update dev_addr after swapping pages + +From: Stefano Stabellini + + +[ Upstream commit f1225ee4c8fcf09afaa199b8b1f0450f38b8cd11 ] + +In xen_swiotlb_map_page and xen_swiotlb_map_sg_attrs, if the original +page is not suitable, we swap it for another page from the swiotlb +pool. + +In these cases, we don't update the previously calculated dma address +for the page before calling xen_dma_map_page. Thus, we end up calling +xen_dma_map_page passing the wrong dev_addr, resulting in +xen_dma_map_page mistakenly assuming that the page is foreign when it is +local. + +Fix the bug by updating dev_addr appropriately. + +This change has no effect on x86, because xen_dma_map_page is a stub +there. + +Signed-off-by: Stefano Stabellini +Signed-off-by: Pooya Keshavarzi +Tested-by: Pooya Keshavarzi +Reviewed-by: Boris Ostrovsky +Signed-off-by: Konrad Rzeszutek Wilk +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/xen/swiotlb-xen.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/drivers/xen/swiotlb-xen.c ++++ b/drivers/xen/swiotlb-xen.c +@@ -409,9 +409,9 @@ dma_addr_t xen_swiotlb_map_page(struct d + if (map == SWIOTLB_MAP_ERROR) + return DMA_ERROR_CODE; + ++ dev_addr = xen_phys_to_bus(map); + xen_dma_map_page(dev, pfn_to_page(map >> PAGE_SHIFT), + dev_addr, map & ~PAGE_MASK, size, dir, attrs); +- dev_addr = xen_phys_to_bus(map); + + /* + * Ensure that the address returned is DMA'ble +@@ -567,13 +567,14 @@ xen_swiotlb_map_sg_attrs(struct device * + sg_dma_len(sgl) = 0; + return 0; + } ++ dev_addr = xen_phys_to_bus(map); + xen_dma_map_page(hwdev, pfn_to_page(map >> PAGE_SHIFT), + dev_addr, + map & ~PAGE_MASK, + sg->length, + dir, + attrs); +- sg->dma_address = xen_phys_to_bus(map); ++ sg->dma_address = dev_addr; + } else { + /* we are not interested in the dma_addr returned by + * xen_dma_map_page, only in the potential cache flushes executed diff --git a/queue-4.4/virtio_console-fix-a-crash-in-config_work_handler.patch b/queue-4.4/virtio_console-fix-a-crash-in-config_work_handler.patch new file mode 100644 index 00000000000..039e673beea --- /dev/null +++ b/queue-4.4/virtio_console-fix-a-crash-in-config_work_handler.patch @@ -0,0 +1,41 @@ +From foo@baz Mon Jul 3 13:09:11 CEST 2017 +From: "G. Campana" +Date: Thu, 19 Jan 2017 23:37:46 +0200 +Subject: virtio_console: fix a crash in config_work_handler + +From: "G. Campana" + + +[ Upstream commit 8379cadf71c3ee8173a1c6fc1ea7762a9638c047 ] + +Using control_work instead of config_work as the 3rd argument to +container_of results in an invalid portdev pointer. Indeed, the work +structure is initialized as below: + + INIT_WORK(&portdev->config_work, &config_work_handler); + +It leads to a crash when portdev->vdev is dereferenced later. This +bug +is triggered when the guest uses a virtio-console without multiport +feature and receives a config_changed virtio interrupt. + +Signed-off-by: G. Campana +Reviewed-by: Amit Shah +Signed-off-by: Michael S. Tsirkin +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/char/virtio_console.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/char/virtio_console.c ++++ b/drivers/char/virtio_console.c +@@ -1864,7 +1864,7 @@ static void config_work_handler(struct w + { + struct ports_device *portdev; + +- portdev = container_of(work, struct ports_device, control_work); ++ portdev = container_of(work, struct ports_device, config_work); + if (!use_multiport(portdev)) { + struct virtio_device *vdev; + struct port *port; diff --git a/queue-4.4/xen-netfront-fix-rx-stall-during-network-stress-and-oom.patch b/queue-4.4/xen-netfront-fix-rx-stall-during-network-stress-and-oom.patch new file mode 100644 index 00000000000..af82238b023 --- /dev/null +++ b/queue-4.4/xen-netfront-fix-rx-stall-during-network-stress-and-oom.patch @@ -0,0 +1,44 @@ +From foo@baz Mon Jul 3 13:09:11 CEST 2017 +From: Vineeth Remanan Pillai +Date: Thu, 19 Jan 2017 08:35:39 -0800 +Subject: xen-netfront: Fix Rx stall during network stress and OOM + +From: Vineeth Remanan Pillai + + +[ Upstream commit 90c311b0eeead647b708a723dbdde1eda3dcad05 ] + +During an OOM scenario, request slots could not be created as skb +allocation fails. So the netback cannot pass in packets and netfront +wrongly assumes that there is no more work to be done and it disables +polling. This causes Rx to stall. + +The issue is with the retry logic which schedules the timer if the +created slots are less than NET_RX_SLOTS_MIN. The count of new request +slots to be pushed are calculated as a difference between new req_prod +and rsp_cons which could be more than the actual slots, if there are +unconsumed responses. + +The fix is to calculate the count of newly created slots as the +difference between new req_prod and old req_prod. + +Signed-off-by: Vineeth Remanan Pillai +Reviewed-by: Juergen Gross +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/xen-netfront.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/xen-netfront.c ++++ b/drivers/net/xen-netfront.c +@@ -321,7 +321,7 @@ static void xennet_alloc_rx_buffers(stru + queue->rx.req_prod_pvt = req_prod; + + /* Not enough requests? Try again later. */ +- if (req_prod - queue->rx.rsp_cons < NET_RX_SLOTS_MIN) { ++ if (req_prod - queue->rx.sring->req_prod < NET_RX_SLOTS_MIN) { + mod_timer(&queue->rx_refill_timer, jiffies + (HZ/10)); + return; + } -- 2.47.3