--- /dev/null
+From 29bf70edaca4c49955ed9ec154d1dc8bf0a483ac Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 27 Sep 2019 11:51:46 +0900
+Subject: ath9k_htc: Discard undersized packets
+
+From: Masashi Honma <masashi.honma@gmail.com>
+
+[ Upstream commit cd486e627e67ee9ab66914d36d3127ef057cc010 ]
+
+Sometimes the hardware will push small packets that trigger a WARN_ON
+in mac80211. Discard them early to avoid this issue.
+
+This patch ports 2 patches from ath9k to ath9k_htc.
+commit 3c0efb745a172bfe96459e20cbd37b0c945d5f8d "ath9k: discard
+undersized packets".
+commit df5c4150501ee7e86383be88f6490d970adcf157 "ath9k: correctly
+handle short radar pulses".
+
+[ 112.835889] ------------[ cut here ]------------
+[ 112.835971] WARNING: CPU: 5 PID: 0 at net/mac80211/rx.c:804 ieee80211_rx_napi+0xaac/0xb40 [mac80211]
+[ 112.835973] Modules linked in: ath9k_htc ath9k_common ath9k_hw ath mac80211 cfg80211 libarc4 nouveau snd_hda_codec_hdmi intel_rapl_msr intel_rapl_common x86_pkg_temp_thermal intel_powerclamp coretemp snd_hda_codec_realtek snd_hda_codec_generic ledtrig_audio snd_hda_intel snd_hda_codec video snd_hda_core ttm snd_hwdep drm_kms_helper snd_pcm crct10dif_pclmul snd_seq_midi drm snd_seq_midi_event crc32_pclmul snd_rawmidi ghash_clmulni_intel snd_seq aesni_intel aes_x86_64 crypto_simd cryptd snd_seq_device glue_helper snd_timer sch_fq_codel i2c_algo_bit fb_sys_fops snd input_leds syscopyarea sysfillrect sysimgblt intel_cstate mei_me intel_rapl_perf soundcore mxm_wmi lpc_ich mei kvm_intel kvm mac_hid irqbypass parport_pc ppdev lp parport ip_tables x_tables autofs4 hid_generic usbhid hid raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear e1000e ahci libahci wmi
+[ 112.836022] CPU: 5 PID: 0 Comm: swapper/5 Not tainted 5.3.0-wt #1
+[ 112.836023] Hardware name: MouseComputer Co.,Ltd. X99-S01/X99-S01, BIOS 1.0C-W7 04/01/2015
+[ 112.836056] RIP: 0010:ieee80211_rx_napi+0xaac/0xb40 [mac80211]
+[ 112.836059] Code: 00 00 66 41 89 86 b0 00 00 00 e9 c8 fa ff ff 4c 89 b5 40 ff ff ff 49 89 c6 e9 c9 fa ff ff 48 c7 c7 e0 a2 a5 c0 e8 47 41 b0 e9 <0f> 0b 48 89 df e8 5a 94 2d ea e9 02 f9 ff ff 41 39 c1 44 89 85 60
+[ 112.836060] RSP: 0018:ffffaa6180220da8 EFLAGS: 00010286
+[ 112.836062] RAX: 0000000000000024 RBX: ffff909a20eeda00 RCX: 0000000000000000
+[ 112.836064] RDX: 0000000000000000 RSI: ffff909a2f957448 RDI: ffff909a2f957448
+[ 112.836065] RBP: ffffaa6180220e78 R08: 00000000000006e9 R09: 0000000000000004
+[ 112.836066] R10: 000000000000000a R11: 0000000000000001 R12: 0000000000000000
+[ 112.836068] R13: ffff909a261a47a0 R14: 0000000000000000 R15: 0000000000000004
+[ 112.836070] FS: 0000000000000000(0000) GS:ffff909a2f940000(0000) knlGS:0000000000000000
+[ 112.836071] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[ 112.836073] CR2: 00007f4e3ffffa08 CR3: 00000001afc0a006 CR4: 00000000001606e0
+[ 112.836074] Call Trace:
+[ 112.836076] <IRQ>
+[ 112.836083] ? finish_td+0xb3/0xf0
+[ 112.836092] ? ath9k_rx_prepare.isra.11+0x22f/0x2a0 [ath9k_htc]
+[ 112.836099] ath9k_rx_tasklet+0x10b/0x1d0 [ath9k_htc]
+[ 112.836105] tasklet_action_common.isra.22+0x63/0x110
+[ 112.836108] tasklet_action+0x22/0x30
+[ 112.836115] __do_softirq+0xe4/0x2da
+[ 112.836118] irq_exit+0xae/0xb0
+[ 112.836121] do_IRQ+0x86/0xe0
+[ 112.836125] common_interrupt+0xf/0xf
+[ 112.836126] </IRQ>
+[ 112.836130] RIP: 0010:cpuidle_enter_state+0xa9/0x440
+[ 112.836133] Code: 3d bc 20 38 55 e8 f7 1d 84 ff 49 89 c7 0f 1f 44 00 00 31 ff e8 28 29 84 ff 80 7d d3 00 0f 85 e6 01 00 00 fb 66 0f 1f 44 00 00 <45> 85 ed 0f 89 ff 01 00 00 41 c7 44 24 10 00 00 00 00 48 83 c4 18
+[ 112.836134] RSP: 0018:ffffaa61800e3e48 EFLAGS: 00000246 ORIG_RAX: ffffffffffffffde
+[ 112.836136] RAX: ffff909a2f96b340 RBX: ffffffffabb58200 RCX: 000000000000001f
+[ 112.836137] RDX: 0000001a458adc5d RSI: 0000000026c9b581 RDI: 0000000000000000
+[ 112.836139] RBP: ffffaa61800e3e88 R08: 0000000000000002 R09: 000000000002abc0
+[ 112.836140] R10: ffffaa61800e3e18 R11: 000000000000002d R12: ffffca617fb40b00
+[ 112.836141] R13: 0000000000000002 R14: ffffffffabb582d8 R15: 0000001a458adc5d
+[ 112.836145] ? cpuidle_enter_state+0x98/0x440
+[ 112.836149] ? menu_select+0x370/0x600
+[ 112.836151] cpuidle_enter+0x2e/0x40
+[ 112.836154] call_cpuidle+0x23/0x40
+[ 112.836156] do_idle+0x204/0x280
+[ 112.836159] cpu_startup_entry+0x1d/0x20
+[ 112.836164] start_secondary+0x167/0x1c0
+[ 112.836169] secondary_startup_64+0xa4/0xb0
+[ 112.836173] ---[ end trace 9f4cd18479cc5ae5 ]---
+
+Signed-off-by: Masashi Honma <masashi.honma@gmail.com>
+Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/wireless/ath/ath9k/htc_drv_txrx.c | 23 +++++++++++++++----
+ 1 file changed, 19 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c
+index baacbd11eb43..b5d7ef4da17f 100644
+--- a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c
++++ b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c
+@@ -973,6 +973,8 @@ static bool ath9k_rx_prepare(struct ath9k_htc_priv *priv,
+ struct ath_htc_rx_status *rxstatus;
+ struct ath_rx_status rx_stats;
+ bool decrypt_error = false;
++ __be16 rs_datalen;
++ bool is_phyerr;
+
+ if (skb->len < HTC_RX_FRAME_HEADER_SIZE) {
+ ath_err(common, "Corrupted RX frame, dropping (len: %d)\n",
+@@ -982,11 +984,24 @@ static bool ath9k_rx_prepare(struct ath9k_htc_priv *priv,
+
+ rxstatus = (struct ath_htc_rx_status *)skb->data;
+
+- if (be16_to_cpu(rxstatus->rs_datalen) -
+- (skb->len - HTC_RX_FRAME_HEADER_SIZE) != 0) {
++ rs_datalen = be16_to_cpu(rxstatus->rs_datalen);
++ if (unlikely(rs_datalen -
++ (skb->len - HTC_RX_FRAME_HEADER_SIZE) != 0)) {
+ ath_err(common,
+ "Corrupted RX data len, dropping (dlen: %d, skblen: %d)\n",
+- be16_to_cpu(rxstatus->rs_datalen), skb->len);
++ rs_datalen, skb->len);
++ goto rx_next;
++ }
++
++ is_phyerr = rxstatus->rs_status & ATH9K_RXERR_PHY;
++ /*
++ * Discard zero-length packets and packets smaller than an ACK
++ * which are not PHY_ERROR (short radar pulses have a length of 3)
++ */
++ if (unlikely(!rs_datalen || (rs_datalen < 10 && !is_phyerr))) {
++ ath_warn(common,
++ "Short RX data len, dropping (dlen: %d)\n",
++ rs_datalen);
+ goto rx_next;
+ }
+
+@@ -1011,7 +1026,7 @@ static bool ath9k_rx_prepare(struct ath9k_htc_priv *priv,
+ * Process PHY errors and return so that the packet
+ * can be dropped.
+ */
+- if (rx_stats.rs_status & ATH9K_RXERR_PHY) {
++ if (unlikely(is_phyerr)) {
+ /* TODO: Not using DFS processing now. */
+ if (ath_cmn_process_fft(&priv->spec_priv, hdr,
+ &rx_stats, rx_status->mactime)) {
+--
+2.20.1
+
--- /dev/null
+From 48f6231084ef1c482c7b9dd442d8c0111b0ca403 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 27 Sep 2019 11:51:45 +0900
+Subject: ath9k_htc: Modify byte order for an error message
+
+From: Masashi Honma <masashi.honma@gmail.com>
+
+[ Upstream commit e01fddc19d215f6ad397894ec2a851d99bf154e2 ]
+
+rs_datalen is be16 so we need to convert it before printing.
+
+Signed-off-by: Masashi Honma <masashi.honma@gmail.com>
+Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/wireless/ath/ath9k/htc_drv_txrx.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c
+index 799010ed04e0..baacbd11eb43 100644
+--- a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c
++++ b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c
+@@ -986,7 +986,7 @@ static bool ath9k_rx_prepare(struct ath9k_htc_priv *priv,
+ (skb->len - HTC_RX_FRAME_HEADER_SIZE) != 0) {
+ ath_err(common,
+ "Corrupted RX data len, dropping (dlen: %d, skblen: %d)\n",
+- rxstatus->rs_datalen, skb->len);
++ be16_to_cpu(rxstatus->rs_datalen), skb->len);
+ goto rx_next;
+ }
+
+--
+2.20.1
+
--- /dev/null
+From 7dd3619af053abf402857259709330d8ab1514d0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 Jun 2019 16:12:36 -0600
+Subject: coresight: etb10: Do not call smp_processor_id from preemptible
+
+From: Suzuki K Poulose <suzuki.poulose@arm.com>
+
+[ Upstream commit 730766bae3280a25d40ea76a53dc6342e84e6513 ]
+
+During a perf session we try to allocate buffers on the "node" associated
+with the CPU the event is bound to. If it is not bound to a CPU, we
+use the current CPU node, using smp_processor_id(). However this is unsafe
+in a pre-emptible context and could generate the splats as below :
+
+ BUG: using smp_processor_id() in preemptible [00000000] code: perf/2544
+
+Use NUMA_NO_NODE hint instead of using the current node for events
+not bound to CPUs.
+
+Fixes: 2997aa4063d97fdb39 ("coresight: etb10: implementing AUX API")
+Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
+Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
+Cc: stable <stable@vger.kernel.org> # 4.6+
+Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
+Link: https://lore.kernel.org/r/20190620221237.3536-5-mathieu.poirier@linaro.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/hwtracing/coresight/coresight-etb10.c | 4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/drivers/hwtracing/coresight/coresight-etb10.c
+index 0dad8626bcfb..0a59bf3af40b 100644
+--- a/drivers/hwtracing/coresight/coresight-etb10.c
++++ b/drivers/hwtracing/coresight/coresight-etb10.c
+@@ -275,9 +275,7 @@ static void *etb_alloc_buffer(struct coresight_device *csdev, int cpu,
+ int node;
+ struct cs_buffers *buf;
+
+- if (cpu == -1)
+- cpu = smp_processor_id();
+- node = cpu_to_node(cpu);
++ node = (event->cpu == -1) ? NUMA_NO_NODE : cpu_to_node(event->cpu);
+
+ buf = kzalloc_node(sizeof(struct cs_buffers), GFP_KERNEL, node);
+ if (!buf)
+--
+2.20.1
+
--- /dev/null
+From 41d6b7b0ed03946188d6f6eea3e432bce64d3c0b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 Jun 2019 16:12:35 -0600
+Subject: coresight: tmc-etf: Do not call smp_processor_id from preemptible
+
+From: Suzuki K Poulose <suzuki.poulose@arm.com>
+
+[ Upstream commit 024c1fd9dbcc1d8a847f1311f999d35783921b7f ]
+
+During a perf session we try to allocate buffers on the "node" associated
+with the CPU the event is bound to. If it is not bound to a CPU, we
+use the current CPU node, using smp_processor_id(). However this is unsafe
+in a pre-emptible context and could generate the splats as below :
+
+ BUG: using smp_processor_id() in preemptible [00000000] code: perf/2544
+ caller is tmc_alloc_etf_buffer+0x5c/0x60
+ CPU: 2 PID: 2544 Comm: perf Not tainted 5.1.0-rc6-147786-g116841e #344
+ Hardware name: ARM LTD ARM Juno Development Platform/ARM Juno Development Platform, BIOS EDK II Feb 1 2019
+ Call trace:
+ dump_backtrace+0x0/0x150
+ show_stack+0x14/0x20
+ dump_stack+0x9c/0xc4
+ debug_smp_processor_id+0x10c/0x110
+ tmc_alloc_etf_buffer+0x5c/0x60
+ etm_setup_aux+0x1c4/0x230
+ rb_alloc_aux+0x1b8/0x2b8
+ perf_mmap+0x35c/0x478
+ mmap_region+0x34c/0x4f0
+ do_mmap+0x2d8/0x418
+ vm_mmap_pgoff+0xd0/0xf8
+ ksys_mmap_pgoff+0x88/0xf8
+ __arm64_sys_mmap+0x28/0x38
+ el0_svc_handler+0xd8/0x138
+ el0_svc+0x8/0xc
+
+Use NUMA_NO_NODE hint instead of using the current node for events
+not bound to CPUs.
+
+Fixes: 2e499bbc1a929ac ("coresight: tmc: implementing TMC-ETF AUX space API")
+Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
+Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
+Cc: stable <stable@vger.kernel.org> # 4.7+
+Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
+Link: https://lore.kernel.org/r/20190620221237.3536-4-mathieu.poirier@linaro.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/hwtracing/coresight/coresight-tmc-etf.c | 4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c
+index e31061308e19..4644ac5582cf 100644
+--- a/drivers/hwtracing/coresight/coresight-tmc-etf.c
++++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c
+@@ -304,9 +304,7 @@ static void *tmc_alloc_etf_buffer(struct coresight_device *csdev, int cpu,
+ int node;
+ struct cs_buffers *buf;
+
+- if (cpu == -1)
+- cpu = smp_processor_id();
+- node = cpu_to_node(cpu);
++ node = (event->cpu == -1) ? NUMA_NO_NODE : cpu_to_node(event->cpu);
+
+ /* Allocate memory structure for interaction with Perf */
+ buf = kzalloc_node(sizeof(struct cs_buffers), GFP_KERNEL, node);
+--
+2.20.1
+
--- /dev/null
+From 7ab2b97949080bd284fc2d112cdc5e4306e52fcb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 24 May 2019 00:24:33 +0300
+Subject: drm/mst: Fix MST sideband up-reply failure handling
+
+From: Imre Deak <imre.deak@intel.com>
+
+[ Upstream commit d8fd3722207f154b53c80eee2cf4977c3fc25a92 ]
+
+Fix the breakage resulting in the stacktrace below, due to tx queue
+being full when trying to send an up-reply. txmsg->seqno is -1 in this
+case leading to a corruption of the mstb object by
+
+ txmsg->dst->tx_slots[txmsg->seqno] = NULL;
+
+in process_single_up_tx_qlock().
+
+[ +0,005162] [drm:process_single_tx_qlock [drm_kms_helper]] set_hdr_from_dst_qlock: failed to find slot
+[ +0,000015] [drm:drm_dp_send_up_ack_reply.constprop.19 [drm_kms_helper]] failed to send msg in q -11
+[ +0,000939] BUG: kernel NULL pointer dereference, address: 00000000000005a0
+[ +0,006982] #PF: supervisor write access in kernel mode
+[ +0,005223] #PF: error_code(0x0002) - not-present page
+[ +0,005135] PGD 0 P4D 0
+[ +0,002581] Oops: 0002 [#1] PREEMPT SMP NOPTI
+[ +0,004359] CPU: 1 PID: 1200 Comm: kworker/u16:3 Tainted: G U 5.2.0-rc1+ #410
+[ +0,008433] Hardware name: Intel Corporation Ice Lake Client Platform/IceLake U DDR4 SODIMM PD RVP, BIOS ICLSFWR1.R00.3175.A00.1904261428 04/26/2019
+[ +0,013323] Workqueue: i915-dp i915_digport_work_func [i915]
+[ +0,005676] RIP: 0010:queue_work_on+0x19/0x70
+[ +0,004372] Code: ff ff ff 0f 1f 40 00 66 2e 0f 1f 84 00 00 00 00 00 41 56 49 89 f6 41 55 41 89 fd 41 54 55 53 48 89 d3 9c 5d fa e8 e7 81 0c 00 <f0> 48 0f ba 2b 00 73 31 45 31 e4 f7 c5 00 02 00 00 74 13 e8 cf 7f
+[ +0,018750] RSP: 0018:ffffc900007dfc50 EFLAGS: 00010006
+[ +0,005222] RAX: 0000000000000046 RBX: 00000000000005a0 RCX: 0000000000000001
+[ +0,007133] RDX: 000000000001b608 RSI: 0000000000000000 RDI: ffffffff82121972
+[ +0,007129] RBP: 0000000000000202 R08: 0000000000000000 R09: 0000000000000001
+[ +0,007129] R10: 0000000000000000 R11: 0000000000000000 R12: ffff88847bfa5096
+[ +0,007131] R13: 0000000000000010 R14: ffff88849c08f3f8 R15: 0000000000000000
+[ +0,007128] FS: 0000000000000000(0000) GS:ffff88849dc80000(0000) knlGS:0000000000000000
+[ +0,008083] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[ +0,005749] CR2: 00000000000005a0 CR3: 0000000005210006 CR4: 0000000000760ee0
+[ +0,007128] PKRU: 55555554
+[ +0,002722] Call Trace:
+[ +0,002458] drm_dp_mst_handle_up_req+0x517/0x540 [drm_kms_helper]
+[ +0,006197] ? drm_dp_mst_hpd_irq+0x5b/0x9c0 [drm_kms_helper]
+[ +0,005764] drm_dp_mst_hpd_irq+0x5b/0x9c0 [drm_kms_helper]
+[ +0,005623] ? intel_dp_hpd_pulse+0x205/0x370 [i915]
+[ +0,005018] intel_dp_hpd_pulse+0x205/0x370 [i915]
+[ +0,004836] i915_digport_work_func+0xbb/0x140 [i915]
+[ +0,005108] process_one_work+0x245/0x610
+[ +0,004027] worker_thread+0x37/0x380
+[ +0,003684] ? process_one_work+0x610/0x610
+[ +0,004184] kthread+0x119/0x130
+[ +0,003240] ? kthread_park+0x80/0x80
+[ +0,003668] ret_from_fork+0x24/0x50
+
+Cc: Lyude Paul <lyude@redhat.com>
+Cc: Dave Airlie <airlied@redhat.com>
+Signed-off-by: Imre Deak <imre.deak@intel.com>
+Reviewed-by: Lyude Paul <lyude@redhat.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20190523212433.9058-1-imre.deak@intel.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/drm_dp_mst_topology.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c
+index 65f58e23e03d..77347a258f6c 100644
+--- a/drivers/gpu/drm/drm_dp_mst_topology.c
++++ b/drivers/gpu/drm/drm_dp_mst_topology.c
+@@ -1582,7 +1582,11 @@ static void process_single_up_tx_qlock(struct drm_dp_mst_topology_mgr *mgr,
+ if (ret != 1)
+ DRM_DEBUG_KMS("failed to send msg in q %d\n", ret);
+
+- txmsg->dst->tx_slots[txmsg->seqno] = NULL;
++ if (txmsg->seqno != -1) {
++ WARN_ON((unsigned int)txmsg->seqno >
++ ARRAY_SIZE(txmsg->dst->tx_slots));
++ txmsg->dst->tx_slots[txmsg->seqno] = NULL;
++ }
+ }
+
+ static void drm_dp_queue_down_tx(struct drm_dp_mst_topology_mgr *mgr,
+--
+2.20.1
+
--- /dev/null
+From 65ef3476172506d84aa78a75d33d0cccd452d727 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 11 Sep 2019 17:31:55 -0500
+Subject: KVM: PPC: Book3S HV: use smp_mb() when setting/clearing host_ipi flag
+
+From: Michael Roth <mdroth@linux.vnet.ibm.com>
+
+[ Upstream commit 3a83f677a6eeff65751b29e3648d7c69c3be83f3 ]
+
+On a 2-socket Power9 system with 32 cores/128 threads (SMT4) and 1TB
+of memory running the following guest configs:
+
+ guest A:
+ - 224GB of memory
+ - 56 VCPUs (sockets=1,cores=28,threads=2), where:
+ VCPUs 0-1 are pinned to CPUs 0-3,
+ VCPUs 2-3 are pinned to CPUs 4-7,
+ ...
+ VCPUs 54-55 are pinned to CPUs 108-111
+
+ guest B:
+ - 4GB of memory
+ - 4 VCPUs (sockets=1,cores=4,threads=1)
+
+with the following workloads (with KSM and THP enabled in all):
+
+ guest A:
+ stress --cpu 40 --io 20 --vm 20 --vm-bytes 512M
+
+ guest B:
+ stress --cpu 4 --io 4 --vm 4 --vm-bytes 512M
+
+ host:
+ stress --cpu 4 --io 4 --vm 2 --vm-bytes 256M
+
+the below soft-lockup traces were observed after an hour or so and
+persisted until the host was reset (this was found to be reliably
+reproducible for this configuration, for kernels 4.15, 4.18, 5.0,
+and 5.3-rc5):
+
+ [ 1253.183290] rcu: INFO: rcu_sched self-detected stall on CPU
+ [ 1253.183319] rcu: 124-....: (5250 ticks this GP) idle=10a/1/0x4000000000000002 softirq=5408/5408 fqs=1941
+ [ 1256.287426] watchdog: BUG: soft lockup - CPU#105 stuck for 23s! [CPU 52/KVM:19709]
+ [ 1264.075773] watchdog: BUG: soft lockup - CPU#24 stuck for 23s! [worker:19913]
+ [ 1264.079769] watchdog: BUG: soft lockup - CPU#31 stuck for 23s! [worker:20331]
+ [ 1264.095770] watchdog: BUG: soft lockup - CPU#45 stuck for 23s! [worker:20338]
+ [ 1264.131773] watchdog: BUG: soft lockup - CPU#64 stuck for 23s! [avocado:19525]
+ [ 1280.408480] watchdog: BUG: soft lockup - CPU#124 stuck for 22s! [ksmd:791]
+ [ 1316.198012] rcu: INFO: rcu_sched self-detected stall on CPU
+ [ 1316.198032] rcu: 124-....: (21003 ticks this GP) idle=10a/1/0x4000000000000002 softirq=5408/5408 fqs=8243
+ [ 1340.411024] watchdog: BUG: soft lockup - CPU#124 stuck for 22s! [ksmd:791]
+ [ 1379.212609] rcu: INFO: rcu_sched self-detected stall on CPU
+ [ 1379.212629] rcu: 124-....: (36756 ticks this GP) idle=10a/1/0x4000000000000002 softirq=5408/5408 fqs=14714
+ [ 1404.413615] watchdog: BUG: soft lockup - CPU#124 stuck for 22s! [ksmd:791]
+ [ 1442.227095] rcu: INFO: rcu_sched self-detected stall on CPU
+ [ 1442.227115] rcu: 124-....: (52509 ticks this GP) idle=10a/1/0x4000000000000002 softirq=5408/5408 fqs=21403
+ [ 1455.111787] INFO: task worker:19907 blocked for more than 120 seconds.
+ [ 1455.111822] Tainted: G L 5.3.0-rc5-mdr-vanilla+ #1
+ [ 1455.111833] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
+ [ 1455.111884] INFO: task worker:19908 blocked for more than 120 seconds.
+ [ 1455.111905] Tainted: G L 5.3.0-rc5-mdr-vanilla+ #1
+ [ 1455.111925] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
+ [ 1455.111966] INFO: task worker:20328 blocked for more than 120 seconds.
+ [ 1455.111986] Tainted: G L 5.3.0-rc5-mdr-vanilla+ #1
+ [ 1455.111998] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
+ [ 1455.112048] INFO: task worker:20330 blocked for more than 120 seconds.
+ [ 1455.112068] Tainted: G L 5.3.0-rc5-mdr-vanilla+ #1
+ [ 1455.112097] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
+ [ 1455.112138] INFO: task worker:20332 blocked for more than 120 seconds.
+ [ 1455.112159] Tainted: G L 5.3.0-rc5-mdr-vanilla+ #1
+ [ 1455.112179] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
+ [ 1455.112210] INFO: task worker:20333 blocked for more than 120 seconds.
+ [ 1455.112231] Tainted: G L 5.3.0-rc5-mdr-vanilla+ #1
+ [ 1455.112242] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
+ [ 1455.112282] INFO: task worker:20335 blocked for more than 120 seconds.
+ [ 1455.112303] Tainted: G L 5.3.0-rc5-mdr-vanilla+ #1
+ [ 1455.112332] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
+ [ 1455.112372] INFO: task worker:20336 blocked for more than 120 seconds.
+ [ 1455.112392] Tainted: G L 5.3.0-rc5-mdr-vanilla+ #1
+
+CPUs 45, 24, and 124 are stuck on spin locks, likely held by
+CPUs 105 and 31.
+
+CPUs 105 and 31 are stuck in smp_call_function_many(), waiting on
+target CPU 42. For instance:
+
+ # CPU 105 registers (via xmon)
+ R00 = c00000000020b20c R16 = 00007d1bcd800000
+ R01 = c00000363eaa7970 R17 = 0000000000000001
+ R02 = c0000000019b3a00 R18 = 000000000000006b
+ R03 = 000000000000002a R19 = 00007d537d7aecf0
+ R04 = 000000000000002a R20 = 60000000000000e0
+ R05 = 000000000000002a R21 = 0801000000000080
+ R06 = c0002073fb0caa08 R22 = 0000000000000d60
+ R07 = c0000000019ddd78 R23 = 0000000000000001
+ R08 = 000000000000002a R24 = c00000000147a700
+ R09 = 0000000000000001 R25 = c0002073fb0ca908
+ R10 = c000008ffeb4e660 R26 = 0000000000000000
+ R11 = c0002073fb0ca900 R27 = c0000000019e2464
+ R12 = c000000000050790 R28 = c0000000000812b0
+ R13 = c000207fff623e00 R29 = c0002073fb0ca808
+ R14 = 00007d1bbee00000 R30 = c0002073fb0ca800
+ R15 = 00007d1bcd600000 R31 = 0000000000000800
+ pc = c00000000020b260 smp_call_function_many+0x3d0/0x460
+ cfar= c00000000020b270 smp_call_function_many+0x3e0/0x460
+ lr = c00000000020b20c smp_call_function_many+0x37c/0x460
+ msr = 900000010288b033 cr = 44024824
+ ctr = c000000000050790 xer = 0000000000000000 trap = 100
+
+CPU 42 is running normally, doing VCPU work:
+
+ # CPU 42 stack trace (via xmon)
+ [link register ] c00800001be17188 kvmppc_book3s_radix_page_fault+0x90/0x2b0 [kvm_hv]
+ [c000008ed3343820] c000008ed3343850 (unreliable)
+ [c000008ed33438d0] c00800001be11b6c kvmppc_book3s_hv_page_fault+0x264/0xe30 [kvm_hv]
+ [c000008ed33439d0] c00800001be0d7b4 kvmppc_vcpu_run_hv+0x8dc/0xb50 [kvm_hv]
+ [c000008ed3343ae0] c00800001c10891c kvmppc_vcpu_run+0x34/0x48 [kvm]
+ [c000008ed3343b00] c00800001c10475c kvm_arch_vcpu_ioctl_run+0x244/0x420 [kvm]
+ [c000008ed3343b90] c00800001c0f5a78 kvm_vcpu_ioctl+0x470/0x7c8 [kvm]
+ [c000008ed3343d00] c000000000475450 do_vfs_ioctl+0xe0/0xc70
+ [c000008ed3343db0] c0000000004760e4 ksys_ioctl+0x104/0x120
+ [c000008ed3343e00] c000000000476128 sys_ioctl+0x28/0x80
+ [c000008ed3343e20] c00000000000b388 system_call+0x5c/0x70
+ --- Exception: c00 (System Call) at 00007d545cfd7694
+ SP (7d53ff7edf50) is in userspace
+
+It was subsequently found that ipi_message[PPC_MSG_CALL_FUNCTION]
+was set for CPU 42 by at least 1 of the CPUs waiting in
+smp_call_function_many(), but somehow the corresponding
+call_single_queue entries were never processed by CPU 42, causing the
+callers to spin in csd_lock_wait() indefinitely.
+
+Nick Piggin suggested something similar to the following sequence as
+a possible explanation (interleaving of CALL_FUNCTION/RESCHEDULE
+IPI messages seems to be most common, but any mix of CALL_FUNCTION and
+!CALL_FUNCTION messages could trigger it):
+
+ CPU
+ X: smp_muxed_ipi_set_message():
+ X: smp_mb()
+ X: message[RESCHEDULE] = 1
+ X: doorbell_global_ipi(42):
+ X: kvmppc_set_host_ipi(42, 1)
+ X: ppc_msgsnd_sync()/smp_mb()
+ X: ppc_msgsnd() -> 42
+ 42: doorbell_exception(): // from CPU X
+ 42: ppc_msgsync()
+ 105: smp_muxed_ipi_set_message():
+ 105: smb_mb()
+ // STORE DEFERRED DUE TO RE-ORDERING
+ --105: message[CALL_FUNCTION] = 1
+ | 105: doorbell_global_ipi(42):
+ | 105: kvmppc_set_host_ipi(42, 1)
+ | 42: kvmppc_set_host_ipi(42, 0)
+ | 42: smp_ipi_demux_relaxed()
+ | 42: // returns to executing guest
+ | // RE-ORDERED STORE COMPLETES
+ ->105: message[CALL_FUNCTION] = 1
+ 105: ppc_msgsnd_sync()/smp_mb()
+ 105: ppc_msgsnd() -> 42
+ 42: local_paca->kvm_hstate.host_ipi == 0 // IPI ignored
+ 105: // hangs waiting on 42 to process messages/call_single_queue
+
+This can be prevented with an smp_mb() at the beginning of
+kvmppc_set_host_ipi(), such that stores to message[<type>] (or other
+state indicated by the host_ipi flag) are ordered vs. the store to
+to host_ipi.
+
+However, doing so might still allow for the following scenario (not
+yet observed):
+
+ CPU
+ X: smp_muxed_ipi_set_message():
+ X: smp_mb()
+ X: message[RESCHEDULE] = 1
+ X: doorbell_global_ipi(42):
+ X: kvmppc_set_host_ipi(42, 1)
+ X: ppc_msgsnd_sync()/smp_mb()
+ X: ppc_msgsnd() -> 42
+ 42: doorbell_exception(): // from CPU X
+ 42: ppc_msgsync()
+ // STORE DEFERRED DUE TO RE-ORDERING
+ -- 42: kvmppc_set_host_ipi(42, 0)
+ | 42: smp_ipi_demux_relaxed()
+ | 105: smp_muxed_ipi_set_message():
+ | 105: smb_mb()
+ | 105: message[CALL_FUNCTION] = 1
+ | 105: doorbell_global_ipi(42):
+ | 105: kvmppc_set_host_ipi(42, 1)
+ | // RE-ORDERED STORE COMPLETES
+ -> 42: kvmppc_set_host_ipi(42, 0)
+ 42: // returns to executing guest
+ 105: ppc_msgsnd_sync()/smp_mb()
+ 105: ppc_msgsnd() -> 42
+ 42: local_paca->kvm_hstate.host_ipi == 0 // IPI ignored
+ 105: // hangs waiting on 42 to process messages/call_single_queue
+
+Fixing this scenario would require an smp_mb() *after* clearing
+host_ipi flag in kvmppc_set_host_ipi() to order the store vs.
+subsequent processing of IPI messages.
+
+To handle both cases, this patch splits kvmppc_set_host_ipi() into
+separate set/clear functions, where we execute smp_mb() prior to
+setting host_ipi flag, and after clearing host_ipi flag. These
+functions pair with each other to synchronize the sender and receiver
+sides.
+
+With that change in place the above workload ran for 20 hours without
+triggering any lock-ups.
+
+Fixes: 755563bc79c7 ("powerpc/powernv: Fixes for hypervisor doorbell handling") # v4.0
+Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
+Acked-by: Paul Mackerras <paulus@ozlabs.org>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20190911223155.16045-1-mdroth@linux.vnet.ibm.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/powerpc/include/asm/kvm_ppc.h | 100 +++++++++++++++++++++++++-
+ arch/powerpc/kernel/dbell.c | 6 +-
+ arch/powerpc/kvm/book3s_hv_rm_xics.c | 2 +-
+ arch/powerpc/platforms/powernv/smp.c | 2 +-
+ arch/powerpc/sysdev/xics/icp-native.c | 6 +-
+ arch/powerpc/sysdev/xics/icp-opal.c | 6 +-
+ 6 files changed, 108 insertions(+), 14 deletions(-)
+
+diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
+index e991821dd7fa..a061c3d48c48 100644
+--- a/arch/powerpc/include/asm/kvm_ppc.h
++++ b/arch/powerpc/include/asm/kvm_ppc.h
+@@ -458,9 +458,100 @@ static inline u32 kvmppc_get_xics_latch(void)
+ return xirr;
+ }
+
+-static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
++/*
++ * To avoid the need to unnecessarily exit fully to the host kernel, an IPI to
++ * a CPU thread that's running/napping inside of a guest is by default regarded
++ * as a request to wake the CPU (if needed) and continue execution within the
++ * guest, potentially to process new state like externally-generated
++ * interrupts or IPIs sent from within the guest itself (e.g. H_PROD/H_IPI).
++ *
++ * To force an exit to the host kernel, kvmppc_set_host_ipi() must be called
++ * prior to issuing the IPI to set the corresponding 'host_ipi' flag in the
++ * target CPU's PACA. To avoid unnecessary exits to the host, this flag should
++ * be immediately cleared via kvmppc_clear_host_ipi() by the IPI handler on
++ * the receiving side prior to processing the IPI work.
++ *
++ * NOTE:
++ *
++ * We currently issue an smp_mb() at the beginning of kvmppc_set_host_ipi().
++ * This is to guard against sequences such as the following:
++ *
++ * CPU
++ * X: smp_muxed_ipi_set_message():
++ * X: smp_mb()
++ * X: message[RESCHEDULE] = 1
++ * X: doorbell_global_ipi(42):
++ * X: kvmppc_set_host_ipi(42)
++ * X: ppc_msgsnd_sync()/smp_mb()
++ * X: ppc_msgsnd() -> 42
++ * 42: doorbell_exception(): // from CPU X
++ * 42: ppc_msgsync()
++ * 105: smp_muxed_ipi_set_message():
++ * 105: smb_mb()
++ * // STORE DEFERRED DUE TO RE-ORDERING
++ * --105: message[CALL_FUNCTION] = 1
++ * | 105: doorbell_global_ipi(42):
++ * | 105: kvmppc_set_host_ipi(42)
++ * | 42: kvmppc_clear_host_ipi(42)
++ * | 42: smp_ipi_demux_relaxed()
++ * | 42: // returns to executing guest
++ * | // RE-ORDERED STORE COMPLETES
++ * ->105: message[CALL_FUNCTION] = 1
++ * 105: ppc_msgsnd_sync()/smp_mb()
++ * 105: ppc_msgsnd() -> 42
++ * 42: local_paca->kvm_hstate.host_ipi == 0 // IPI ignored
++ * 105: // hangs waiting on 42 to process messages/call_single_queue
++ *
++ * We also issue an smp_mb() at the end of kvmppc_clear_host_ipi(). This is
++ * to guard against sequences such as the following (as well as to create
++ * a read-side pairing with the barrier in kvmppc_set_host_ipi()):
++ *
++ * CPU
++ * X: smp_muxed_ipi_set_message():
++ * X: smp_mb()
++ * X: message[RESCHEDULE] = 1
++ * X: doorbell_global_ipi(42):
++ * X: kvmppc_set_host_ipi(42)
++ * X: ppc_msgsnd_sync()/smp_mb()
++ * X: ppc_msgsnd() -> 42
++ * 42: doorbell_exception(): // from CPU X
++ * 42: ppc_msgsync()
++ * // STORE DEFERRED DUE TO RE-ORDERING
++ * -- 42: kvmppc_clear_host_ipi(42)
++ * | 42: smp_ipi_demux_relaxed()
++ * | 105: smp_muxed_ipi_set_message():
++ * | 105: smb_mb()
++ * | 105: message[CALL_FUNCTION] = 1
++ * | 105: doorbell_global_ipi(42):
++ * | 105: kvmppc_set_host_ipi(42)
++ * | // RE-ORDERED STORE COMPLETES
++ * -> 42: kvmppc_clear_host_ipi(42)
++ * 42: // returns to executing guest
++ * 105: ppc_msgsnd_sync()/smp_mb()
++ * 105: ppc_msgsnd() -> 42
++ * 42: local_paca->kvm_hstate.host_ipi == 0 // IPI ignored
++ * 105: // hangs waiting on 42 to process messages/call_single_queue
++ */
++static inline void kvmppc_set_host_ipi(int cpu)
+ {
+- paca_ptrs[cpu]->kvm_hstate.host_ipi = host_ipi;
++ /*
++ * order stores of IPI messages vs. setting of host_ipi flag
++ *
++ * pairs with the barrier in kvmppc_clear_host_ipi()
++ */
++ smp_mb();
++ paca_ptrs[cpu]->kvm_hstate.host_ipi = 1;
++}
++
++static inline void kvmppc_clear_host_ipi(int cpu)
++{
++ paca_ptrs[cpu]->kvm_hstate.host_ipi = 0;
++ /*
++ * order clearing of host_ipi flag vs. processing of IPI messages
++ *
++ * pairs with the barrier in kvmppc_set_host_ipi()
++ */
++ smp_mb();
+ }
+
+ static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
+@@ -489,7 +580,10 @@ static inline u32 kvmppc_get_xics_latch(void)
+ return 0;
+ }
+
+-static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
++static inline void kvmppc_set_host_ipi(int cpu)
++{}
++
++static inline void kvmppc_clear_host_ipi(int cpu)
+ {}
+
+ static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
+diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c
+index b6fe883b1016..5828144555af 100644
+--- a/arch/powerpc/kernel/dbell.c
++++ b/arch/powerpc/kernel/dbell.c
+@@ -36,7 +36,7 @@ void doorbell_global_ipi(int cpu)
+ {
+ u32 tag = get_hard_smp_processor_id(cpu);
+
+- kvmppc_set_host_ipi(cpu, 1);
++ kvmppc_set_host_ipi(cpu);
+ /* Order previous accesses vs. msgsnd, which is treated as a store */
+ ppc_msgsnd_sync();
+ ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag);
+@@ -51,7 +51,7 @@ void doorbell_core_ipi(int cpu)
+ {
+ u32 tag = cpu_thread_in_core(cpu);
+
+- kvmppc_set_host_ipi(cpu, 1);
++ kvmppc_set_host_ipi(cpu);
+ /* Order previous accesses vs. msgsnd, which is treated as a store */
+ ppc_msgsnd_sync();
+ ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag);
+@@ -86,7 +86,7 @@ void doorbell_exception(struct pt_regs *regs)
+
+ may_hard_irq_enable();
+
+- kvmppc_set_host_ipi(smp_processor_id(), 0);
++ kvmppc_clear_host_ipi(smp_processor_id());
+ __this_cpu_inc(irq_stat.doorbell_irqs);
+
+ smp_ipi_demux_relaxed(); /* already performed the barrier */
+diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
+index 758d1d23215e..aaafb9f080d5 100644
+--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
++++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
+@@ -61,7 +61,7 @@ static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu)
+ hcpu = hcore << threads_shift;
+ kvmppc_host_rm_ops_hv->rm_core[hcore].rm_data = vcpu;
+ smp_muxed_ipi_set_message(hcpu, PPC_MSG_RM_HOST_ACTION);
+- kvmppc_set_host_ipi(hcpu, 1);
++ kvmppc_set_host_ipi(hcpu);
+ smp_mb();
+ kvmhv_rm_send_ipi(hcpu);
+ }
+diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
+index fdd9577d1798..3d3c989e44dd 100644
+--- a/arch/powerpc/platforms/powernv/smp.c
++++ b/arch/powerpc/platforms/powernv/smp.c
+@@ -223,7 +223,7 @@ static void pnv_smp_cpu_kill_self(void)
+ * for coming online, which are handled via
+ * generic_check_cpu_restart() calls.
+ */
+- kvmppc_set_host_ipi(cpu, 0);
++ kvmppc_clear_host_ipi(cpu);
+
+ srr1 = pnv_cpu_offline(cpu);
+
+diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c
+index 37bfbc54aacb..340de58a15bd 100644
+--- a/arch/powerpc/sysdev/xics/icp-native.c
++++ b/arch/powerpc/sysdev/xics/icp-native.c
+@@ -145,7 +145,7 @@ static unsigned int icp_native_get_irq(void)
+
+ static void icp_native_cause_ipi(int cpu)
+ {
+- kvmppc_set_host_ipi(cpu, 1);
++ kvmppc_set_host_ipi(cpu);
+ icp_native_set_qirr(cpu, IPI_PRIORITY);
+ }
+
+@@ -184,7 +184,7 @@ void icp_native_flush_interrupt(void)
+ if (vec == XICS_IPI) {
+ /* Clear pending IPI */
+ int cpu = smp_processor_id();
+- kvmppc_set_host_ipi(cpu, 0);
++ kvmppc_clear_host_ipi(cpu);
+ icp_native_set_qirr(cpu, 0xff);
+ } else {
+ pr_err("XICS: hw interrupt 0x%x to offline cpu, disabling\n",
+@@ -205,7 +205,7 @@ static irqreturn_t icp_native_ipi_action(int irq, void *dev_id)
+ {
+ int cpu = smp_processor_id();
+
+- kvmppc_set_host_ipi(cpu, 0);
++ kvmppc_clear_host_ipi(cpu);
+ icp_native_set_qirr(cpu, 0xff);
+
+ return smp_ipi_demux();
+diff --git a/arch/powerpc/sysdev/xics/icp-opal.c b/arch/powerpc/sysdev/xics/icp-opal.c
+index c71d2ea42627..e3e52cf035a9 100644
+--- a/arch/powerpc/sysdev/xics/icp-opal.c
++++ b/arch/powerpc/sysdev/xics/icp-opal.c
+@@ -130,7 +130,7 @@ static void icp_opal_cause_ipi(int cpu)
+ {
+ int hw_cpu = get_hard_smp_processor_id(cpu);
+
+- kvmppc_set_host_ipi(cpu, 1);
++ kvmppc_set_host_ipi(cpu);
+ opal_int_set_mfrr(hw_cpu, IPI_PRIORITY);
+ }
+
+@@ -138,7 +138,7 @@ static irqreturn_t icp_opal_ipi_action(int irq, void *dev_id)
+ {
+ int cpu = smp_processor_id();
+
+- kvmppc_set_host_ipi(cpu, 0);
++ kvmppc_clear_host_ipi(cpu);
+ opal_int_set_mfrr(get_hard_smp_processor_id(cpu), 0xff);
+
+ return smp_ipi_demux();
+@@ -161,7 +161,7 @@ void icp_opal_flush_interrupt(void)
+ if (vec == XICS_IPI) {
+ /* Clear pending IPI */
+ int cpu = smp_processor_id();
+- kvmppc_set_host_ipi(cpu, 0);
++ kvmppc_clear_host_ipi(cpu);
+ opal_int_set_mfrr(get_hard_smp_processor_id(cpu), 0xff);
+ } else {
+ pr_err("XICS: hw interrupt 0x%x to offline cpu, "
+--
+2.20.1
+
--- /dev/null
+From 5964d1bf3c23ca33377a84e082b1df0590533f6d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 7 Nov 2019 18:29:11 -0800
+Subject: net: add annotations on hh->hh_len lockless accesses
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit c305c6ae79e2ce20c22660ceda94f0d86d639a82 ]
+
+KCSAN reported a data-race [1]
+
+While we can use READ_ONCE() on the read sides,
+we need to make sure hh->hh_len is written last.
+
+[1]
+
+BUG: KCSAN: data-race in eth_header_cache / neigh_resolve_output
+
+write to 0xffff8880b9dedcb8 of 4 bytes by task 29760 on cpu 0:
+ eth_header_cache+0xa9/0xd0 net/ethernet/eth.c:247
+ neigh_hh_init net/core/neighbour.c:1463 [inline]
+ neigh_resolve_output net/core/neighbour.c:1480 [inline]
+ neigh_resolve_output+0x415/0x470 net/core/neighbour.c:1470
+ neigh_output include/net/neighbour.h:511 [inline]
+ ip6_finish_output2+0x7a2/0xec0 net/ipv6/ip6_output.c:116
+ __ip6_finish_output net/ipv6/ip6_output.c:142 [inline]
+ __ip6_finish_output+0x2d7/0x330 net/ipv6/ip6_output.c:127
+ ip6_finish_output+0x41/0x160 net/ipv6/ip6_output.c:152
+ NF_HOOK_COND include/linux/netfilter.h:294 [inline]
+ ip6_output+0xf2/0x280 net/ipv6/ip6_output.c:175
+ dst_output include/net/dst.h:436 [inline]
+ NF_HOOK include/linux/netfilter.h:305 [inline]
+ ndisc_send_skb+0x459/0x5f0 net/ipv6/ndisc.c:505
+ ndisc_send_ns+0x207/0x430 net/ipv6/ndisc.c:647
+ rt6_probe_deferred+0x98/0xf0 net/ipv6/route.c:615
+ process_one_work+0x3d4/0x890 kernel/workqueue.c:2269
+ worker_thread+0xa0/0x800 kernel/workqueue.c:2415
+ kthread+0x1d4/0x200 drivers/block/aoe/aoecmd.c:1253
+ ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:352
+
+read to 0xffff8880b9dedcb8 of 4 bytes by task 29572 on cpu 1:
+ neigh_resolve_output net/core/neighbour.c:1479 [inline]
+ neigh_resolve_output+0x113/0x470 net/core/neighbour.c:1470
+ neigh_output include/net/neighbour.h:511 [inline]
+ ip6_finish_output2+0x7a2/0xec0 net/ipv6/ip6_output.c:116
+ __ip6_finish_output net/ipv6/ip6_output.c:142 [inline]
+ __ip6_finish_output+0x2d7/0x330 net/ipv6/ip6_output.c:127
+ ip6_finish_output+0x41/0x160 net/ipv6/ip6_output.c:152
+ NF_HOOK_COND include/linux/netfilter.h:294 [inline]
+ ip6_output+0xf2/0x280 net/ipv6/ip6_output.c:175
+ dst_output include/net/dst.h:436 [inline]
+ NF_HOOK include/linux/netfilter.h:305 [inline]
+ ndisc_send_skb+0x459/0x5f0 net/ipv6/ndisc.c:505
+ ndisc_send_ns+0x207/0x430 net/ipv6/ndisc.c:647
+ rt6_probe_deferred+0x98/0xf0 net/ipv6/route.c:615
+ process_one_work+0x3d4/0x890 kernel/workqueue.c:2269
+ worker_thread+0xa0/0x800 kernel/workqueue.c:2415
+ kthread+0x1d4/0x200 drivers/block/aoe/aoecmd.c:1253
+ ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:352
+
+Reported by Kernel Concurrency Sanitizer on:
+CPU: 1 PID: 29572 Comm: kworker/1:4 Not tainted 5.4.0-rc6+ #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+Workqueue: events rt6_probe_deferred
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/firewire/net.c | 6 +++++-
+ include/net/neighbour.h | 2 +-
+ net/core/neighbour.c | 4 ++--
+ net/ethernet/eth.c | 7 ++++++-
+ 4 files changed, 14 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c
+index 82ba110d9d1a..bbabfca812bb 100644
+--- a/drivers/firewire/net.c
++++ b/drivers/firewire/net.c
+@@ -249,7 +249,11 @@ static int fwnet_header_cache(const struct neighbour *neigh,
+ h = (struct fwnet_header *)((u8 *)hh->hh_data + HH_DATA_OFF(sizeof(*h)));
+ h->h_proto = type;
+ memcpy(h->h_dest, neigh->ha, net->addr_len);
+- hh->hh_len = FWNET_HLEN;
++
++ /* Pairs with the READ_ONCE() in neigh_resolve_output(),
++ * neigh_hh_output() and neigh_update_hhs().
++ */
++ smp_store_release(&hh->hh_len, FWNET_HLEN);
+
+ return 0;
+ }
+diff --git a/include/net/neighbour.h b/include/net/neighbour.h
+index c84807c1c5bd..5ce035984a4d 100644
+--- a/include/net/neighbour.h
++++ b/include/net/neighbour.h
+@@ -459,7 +459,7 @@ static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb
+
+ do {
+ seq = read_seqbegin(&hh->hh_lock);
+- hh_len = hh->hh_len;
++ hh_len = READ_ONCE(hh->hh_len);
+ if (likely(hh_len <= HH_DATA_MOD)) {
+ hh_alen = HH_DATA_MOD;
+
+diff --git a/net/core/neighbour.c b/net/core/neighbour.c
+index 7597afee7068..e260d44ebdca 100644
+--- a/net/core/neighbour.c
++++ b/net/core/neighbour.c
+@@ -1097,7 +1097,7 @@ static void neigh_update_hhs(struct neighbour *neigh)
+
+ if (update) {
+ hh = &neigh->hh;
+- if (hh->hh_len) {
++ if (READ_ONCE(hh->hh_len)) {
+ write_seqlock_bh(&hh->hh_lock);
+ update(hh, neigh->dev, neigh->ha);
+ write_sequnlock_bh(&hh->hh_lock);
+@@ -1360,7 +1360,7 @@ int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
+ struct net_device *dev = neigh->dev;
+ unsigned int seq;
+
+- if (dev->header_ops->cache && !neigh->hh.hh_len)
++ if (dev->header_ops->cache && !READ_ONCE(neigh->hh.hh_len))
+ neigh_hh_init(neigh);
+
+ do {
+diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
+index fd8faa0dfa61..ca06e9a53d15 100644
+--- a/net/ethernet/eth.c
++++ b/net/ethernet/eth.c
+@@ -239,7 +239,12 @@ int eth_header_cache(const struct neighbour *neigh, struct hh_cache *hh, __be16
+ eth->h_proto = type;
+ memcpy(eth->h_source, dev->dev_addr, ETH_ALEN);
+ memcpy(eth->h_dest, neigh->ha, ETH_ALEN);
+- hh->hh_len = ETH_HLEN;
++
++ /* Pairs with READ_ONCE() in neigh_resolve_output(),
++ * neigh_hh_output() and neigh_update_hhs().
++ */
++ smp_store_release(&hh->hh_len, ETH_HLEN);
++
+ return 0;
+ }
+ EXPORT_SYMBOL(eth_header_cache);
+--
+2.20.1
+
--- /dev/null
+From 4ae9b1f9371093b86c74c1905accbd1aaf0a0dce Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 21 Oct 2019 18:47:50 +0000
+Subject: net: core: limit nested device depth
+
+From: Taehee Yoo <ap420073@gmail.com>
+
+[ Upstream commit 5343da4c17429efaa5fb1594ea96aee1a283e694 ]
+
+Current code doesn't limit the number of nested devices.
+Nested devices would be handled recursively and this needs huge stack
+memory. So, unlimited nested devices could make stack overflow.
+
+This patch adds upper_level and lower_level, they are common variables
+and represent maximum lower/upper depth.
+When upper/lower device is attached or dettached,
+{lower/upper}_level are updated. and if maximum depth is bigger than 8,
+attach routine fails and returns -EMLINK.
+
+In addition, this patch converts recursive routine of
+netdev_walk_all_{lower/upper} to iterator routine.
+
+Test commands:
+ ip link add dummy0 type dummy
+ ip link add link dummy0 name vlan1 type vlan id 1
+ ip link set vlan1 up
+
+ for i in {2..55}
+ do
+ let A=$i-1
+
+ ip link add vlan$i link vlan$A type vlan id $i
+ done
+ ip link del dummy0
+
+Splat looks like:
+[ 155.513226][ T908] BUG: KASAN: use-after-free in __unwind_start+0x71/0x850
+[ 155.514162][ T908] Write of size 88 at addr ffff8880608a6cc0 by task ip/908
+[ 155.515048][ T908]
+[ 155.515333][ T908] CPU: 0 PID: 908 Comm: ip Not tainted 5.4.0-rc3+ #96
+[ 155.516147][ T908] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
+[ 155.517233][ T908] Call Trace:
+[ 155.517627][ T908]
+[ 155.517918][ T908] Allocated by task 0:
+[ 155.518412][ T908] (stack is not available)
+[ 155.518955][ T908]
+[ 155.519228][ T908] Freed by task 0:
+[ 155.519885][ T908] (stack is not available)
+[ 155.520452][ T908]
+[ 155.520729][ T908] The buggy address belongs to the object at ffff8880608a6ac0
+[ 155.520729][ T908] which belongs to the cache names_cache of size 4096
+[ 155.522387][ T908] The buggy address is located 512 bytes inside of
+[ 155.522387][ T908] 4096-byte region [ffff8880608a6ac0, ffff8880608a7ac0)
+[ 155.523920][ T908] The buggy address belongs to the page:
+[ 155.524552][ T908] page:ffffea0001822800 refcount:1 mapcount:0 mapping:ffff88806c657cc0 index:0x0 compound_mapcount:0
+[ 155.525836][ T908] flags: 0x100000000010200(slab|head)
+[ 155.526445][ T908] raw: 0100000000010200 ffffea0001813808 ffffea0001a26c08 ffff88806c657cc0
+[ 155.527424][ T908] raw: 0000000000000000 0000000000070007 00000001ffffffff 0000000000000000
+[ 155.528429][ T908] page dumped because: kasan: bad access detected
+[ 155.529158][ T908]
+[ 155.529410][ T908] Memory state around the buggy address:
+[ 155.530060][ T908] ffff8880608a6b80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+[ 155.530971][ T908] ffff8880608a6c00: fb fb fb fb fb f1 f1 f1 f1 00 f2 f2 f2 f3 f3 f3
+[ 155.531889][ T908] >ffff8880608a6c80: f3 fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+[ 155.532806][ T908] ^
+[ 155.533509][ T908] ffff8880608a6d00: fb fb fb fb fb fb fb fb fb f1 f1 f1 f1 00 00 00
+[ 155.534436][ T908] ffff8880608a6d80: f2 f3 f3 f3 f3 fb fb fb 00 00 00 00 00 00 00 00
+[ ... ]
+
+Signed-off-by: Taehee Yoo <ap420073@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/netdevice.h | 4 +
+ net/core/dev.c | 272 +++++++++++++++++++++++++++++++-------
+ 2 files changed, 231 insertions(+), 45 deletions(-)
+
+diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
+index 9dfa0ae173ac..d5527e3828d1 100644
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -1619,6 +1619,8 @@ enum netdev_priv_flags {
+ * @perm_addr: Permanent hw address
+ * @addr_assign_type: Hw address assignment type
+ * @addr_len: Hardware address length
++ * @upper_level: Maximum depth level of upper devices.
++ * @lower_level: Maximum depth level of lower devices.
+ * @neigh_priv_len: Used in neigh_alloc()
+ * @dev_id: Used to differentiate devices that share
+ * the same link layer address
+@@ -1853,6 +1855,8 @@ struct net_device {
+ unsigned char perm_addr[MAX_ADDR_LEN];
+ unsigned char addr_assign_type;
+ unsigned char addr_len;
++ unsigned char upper_level;
++ unsigned char lower_level;
+ unsigned short neigh_priv_len;
+ unsigned short dev_id;
+ unsigned short dev_port;
+diff --git a/net/core/dev.c b/net/core/dev.c
+index 8ff21d461f08..a26d87073f71 100644
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -149,6 +149,7 @@
+ #include "net-sysfs.h"
+
+ #define MAX_GRO_SKBS 8
++#define MAX_NEST_DEV 8
+
+ /* This should be increased if a protocol with a bigger head is added. */
+ #define GRO_MAX_HEAD (MAX_HEADER + 128)
+@@ -6542,6 +6543,21 @@ struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
+ }
+ EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu);
+
++static struct net_device *netdev_next_upper_dev(struct net_device *dev,
++ struct list_head **iter)
++{
++ struct netdev_adjacent *upper;
++
++ upper = list_entry((*iter)->next, struct netdev_adjacent, list);
++
++ if (&upper->list == &dev->adj_list.upper)
++ return NULL;
++
++ *iter = &upper->list;
++
++ return upper->dev;
++}
++
+ static struct net_device *netdev_next_upper_dev_rcu(struct net_device *dev,
+ struct list_head **iter)
+ {
+@@ -6559,28 +6575,93 @@ static struct net_device *netdev_next_upper_dev_rcu(struct net_device *dev,
+ return upper->dev;
+ }
+
++static int netdev_walk_all_upper_dev(struct net_device *dev,
++ int (*fn)(struct net_device *dev,
++ void *data),
++ void *data)
++{
++ struct net_device *udev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
++ struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
++ int ret, cur = 0;
++
++ now = dev;
++ iter = &dev->adj_list.upper;
++
++ while (1) {
++ if (now != dev) {
++ ret = fn(now, data);
++ if (ret)
++ return ret;
++ }
++
++ next = NULL;
++ while (1) {
++ udev = netdev_next_upper_dev(now, &iter);
++ if (!udev)
++ break;
++
++ next = udev;
++ niter = &udev->adj_list.upper;
++ dev_stack[cur] = now;
++ iter_stack[cur++] = iter;
++ break;
++ }
++
++ if (!next) {
++ if (!cur)
++ return 0;
++ next = dev_stack[--cur];
++ niter = iter_stack[cur];
++ }
++
++ now = next;
++ iter = niter;
++ }
++
++ return 0;
++}
++
+ int netdev_walk_all_upper_dev_rcu(struct net_device *dev,
+ int (*fn)(struct net_device *dev,
+ void *data),
+ void *data)
+ {
+- struct net_device *udev;
+- struct list_head *iter;
+- int ret;
++ struct net_device *udev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
++ struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
++ int ret, cur = 0;
+
+- for (iter = &dev->adj_list.upper,
+- udev = netdev_next_upper_dev_rcu(dev, &iter);
+- udev;
+- udev = netdev_next_upper_dev_rcu(dev, &iter)) {
+- /* first is the upper device itself */
+- ret = fn(udev, data);
+- if (ret)
+- return ret;
++ now = dev;
++ iter = &dev->adj_list.upper;
+
+- /* then look at all of its upper devices */
+- ret = netdev_walk_all_upper_dev_rcu(udev, fn, data);
+- if (ret)
+- return ret;
++ while (1) {
++ if (now != dev) {
++ ret = fn(now, data);
++ if (ret)
++ return ret;
++ }
++
++ next = NULL;
++ while (1) {
++ udev = netdev_next_upper_dev_rcu(now, &iter);
++ if (!udev)
++ break;
++
++ next = udev;
++ niter = &udev->adj_list.upper;
++ dev_stack[cur] = now;
++ iter_stack[cur++] = iter;
++ break;
++ }
++
++ if (!next) {
++ if (!cur)
++ return 0;
++ next = dev_stack[--cur];
++ niter = iter_stack[cur];
++ }
++
++ now = next;
++ iter = niter;
+ }
+
+ return 0;
+@@ -6688,23 +6769,42 @@ int netdev_walk_all_lower_dev(struct net_device *dev,
+ void *data),
+ void *data)
+ {
+- struct net_device *ldev;
+- struct list_head *iter;
+- int ret;
++ struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
++ struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
++ int ret, cur = 0;
+
+- for (iter = &dev->adj_list.lower,
+- ldev = netdev_next_lower_dev(dev, &iter);
+- ldev;
+- ldev = netdev_next_lower_dev(dev, &iter)) {
+- /* first is the lower device itself */
+- ret = fn(ldev, data);
+- if (ret)
+- return ret;
++ now = dev;
++ iter = &dev->adj_list.lower;
+
+- /* then look at all of its lower devices */
+- ret = netdev_walk_all_lower_dev(ldev, fn, data);
+- if (ret)
+- return ret;
++ while (1) {
++ if (now != dev) {
++ ret = fn(now, data);
++ if (ret)
++ return ret;
++ }
++
++ next = NULL;
++ while (1) {
++ ldev = netdev_next_lower_dev(now, &iter);
++ if (!ldev)
++ break;
++
++ next = ldev;
++ niter = &ldev->adj_list.lower;
++ dev_stack[cur] = now;
++ iter_stack[cur++] = iter;
++ break;
++ }
++
++ if (!next) {
++ if (!cur)
++ return 0;
++ next = dev_stack[--cur];
++ niter = iter_stack[cur];
++ }
++
++ now = next;
++ iter = niter;
+ }
+
+ return 0;
+@@ -6725,28 +6825,93 @@ static struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev,
+ return lower->dev;
+ }
+
+-int netdev_walk_all_lower_dev_rcu(struct net_device *dev,
+- int (*fn)(struct net_device *dev,
+- void *data),
+- void *data)
++static u8 __netdev_upper_depth(struct net_device *dev)
++{
++ struct net_device *udev;
++ struct list_head *iter;
++ u8 max_depth = 0;
++
++ for (iter = &dev->adj_list.upper,
++ udev = netdev_next_upper_dev(dev, &iter);
++ udev;
++ udev = netdev_next_upper_dev(dev, &iter)) {
++ if (max_depth < udev->upper_level)
++ max_depth = udev->upper_level;
++ }
++
++ return max_depth;
++}
++
++static u8 __netdev_lower_depth(struct net_device *dev)
+ {
+ struct net_device *ldev;
+ struct list_head *iter;
+- int ret;
++ u8 max_depth = 0;
+
+ for (iter = &dev->adj_list.lower,
+- ldev = netdev_next_lower_dev_rcu(dev, &iter);
++ ldev = netdev_next_lower_dev(dev, &iter);
+ ldev;
+- ldev = netdev_next_lower_dev_rcu(dev, &iter)) {
+- /* first is the lower device itself */
+- ret = fn(ldev, data);
+- if (ret)
+- return ret;
++ ldev = netdev_next_lower_dev(dev, &iter)) {
++ if (max_depth < ldev->lower_level)
++ max_depth = ldev->lower_level;
++ }
+
+- /* then look at all of its lower devices */
+- ret = netdev_walk_all_lower_dev_rcu(ldev, fn, data);
+- if (ret)
+- return ret;
++ return max_depth;
++}
++
++static int __netdev_update_upper_level(struct net_device *dev, void *data)
++{
++ dev->upper_level = __netdev_upper_depth(dev) + 1;
++ return 0;
++}
++
++static int __netdev_update_lower_level(struct net_device *dev, void *data)
++{
++ dev->lower_level = __netdev_lower_depth(dev) + 1;
++ return 0;
++}
++
++int netdev_walk_all_lower_dev_rcu(struct net_device *dev,
++ int (*fn)(struct net_device *dev,
++ void *data),
++ void *data)
++{
++ struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
++ struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
++ int ret, cur = 0;
++
++ now = dev;
++ iter = &dev->adj_list.lower;
++
++ while (1) {
++ if (now != dev) {
++ ret = fn(now, data);
++ if (ret)
++ return ret;
++ }
++
++ next = NULL;
++ while (1) {
++ ldev = netdev_next_lower_dev_rcu(now, &iter);
++ if (!ldev)
++ break;
++
++ next = ldev;
++ niter = &ldev->adj_list.lower;
++ dev_stack[cur] = now;
++ iter_stack[cur++] = iter;
++ break;
++ }
++
++ if (!next) {
++ if (!cur)
++ return 0;
++ next = dev_stack[--cur];
++ niter = iter_stack[cur];
++ }
++
++ now = next;
++ iter = niter;
+ }
+
+ return 0;
+@@ -7003,6 +7168,9 @@ static int __netdev_upper_dev_link(struct net_device *dev,
+ if (netdev_has_upper_dev(upper_dev, dev))
+ return -EBUSY;
+
++ if ((dev->lower_level + upper_dev->upper_level) > MAX_NEST_DEV)
++ return -EMLINK;
++
+ if (!master) {
+ if (netdev_has_upper_dev(dev, upper_dev))
+ return -EEXIST;
+@@ -7029,6 +7197,12 @@ static int __netdev_upper_dev_link(struct net_device *dev,
+ if (ret)
+ goto rollback;
+
++ __netdev_update_upper_level(dev, NULL);
++ netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL);
++
++ __netdev_update_lower_level(upper_dev, NULL);
++ netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level, NULL);
++
+ return 0;
+
+ rollback:
+@@ -7111,6 +7285,12 @@ void netdev_upper_dev_unlink(struct net_device *dev,
+
+ call_netdevice_notifiers_info(NETDEV_CHANGEUPPER,
+ &changeupper_info.info);
++
++ __netdev_update_upper_level(dev, NULL);
++ netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL);
++
++ __netdev_update_lower_level(upper_dev, NULL);
++ netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level, NULL);
+ }
+ EXPORT_SYMBOL(netdev_upper_dev_unlink);
+
+@@ -8978,6 +9158,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
+
+ dev->gso_max_size = GSO_MAX_SIZE;
+ dev->gso_max_segs = GSO_MAX_SEGS;
++ dev->upper_level = 1;
++ dev->lower_level = 1;
+
+ INIT_LIST_HEAD(&dev->napi_list);
+ INIT_LIST_HEAD(&dev->unreg_list);
+--
+2.20.1
+
--- /dev/null
+From 17029394b8fb11ceac999eeed1081584640412a6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 5 Dec 2019 17:28:52 +0300
+Subject: perf/x86/intel/bts: Fix the use of page_private()
+
+From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+
+[ Upstream commit ff61541cc6c1962957758ba433c574b76f588d23 ]
+
+Commit
+
+ 8062382c8dbe2 ("perf/x86/intel/bts: Add BTS PMU driver")
+
+brought in a warning with the BTS buffer initialization
+that is easily tripped with (assuming KPTI is disabled):
+
+instantly throwing:
+
+> ------------[ cut here ]------------
+> WARNING: CPU: 2 PID: 326 at arch/x86/events/intel/bts.c:86 bts_buffer_setup_aux+0x117/0x3d0
+> Modules linked in:
+> CPU: 2 PID: 326 Comm: perf Not tainted 5.4.0-rc8-00291-gceb9e77324fa #904
+> RIP: 0010:bts_buffer_setup_aux+0x117/0x3d0
+> Call Trace:
+> rb_alloc_aux+0x339/0x550
+> perf_mmap+0x607/0xc70
+> mmap_region+0x76b/0xbd0
+...
+
+It appears to assume (for lost raisins) that PagePrivate() is set,
+while later it actually tests for PagePrivate() before using
+page_private().
+
+Make it consistent and always check PagePrivate() before using
+page_private().
+
+Fixes: 8062382c8dbe2 ("perf/x86/intel/bts: Add BTS PMU driver")
+Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: Jiri Olsa <jolsa@kernel.org>
+Cc: Vince Weaver <vincent.weaver@maine.edu>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
+Link: https://lkml.kernel.org/r/20191205142853.28894-2-alexander.shishkin@linux.intel.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/events/intel/bts.c | 16 +++++++++++-----
+ 1 file changed, 11 insertions(+), 5 deletions(-)
+
+diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c
+index 7139f6bf27ad..510f9461407e 100644
+--- a/arch/x86/events/intel/bts.c
++++ b/arch/x86/events/intel/bts.c
+@@ -71,9 +71,17 @@ struct bts_buffer {
+
+ static struct pmu bts_pmu;
+
++static int buf_nr_pages(struct page *page)
++{
++ if (!PagePrivate(page))
++ return 1;
++
++ return 1 << page_private(page);
++}
++
+ static size_t buf_size(struct page *page)
+ {
+- return 1 << (PAGE_SHIFT + page_private(page));
++ return buf_nr_pages(page) * PAGE_SIZE;
+ }
+
+ static void *
+@@ -91,9 +99,7 @@ bts_buffer_setup_aux(struct perf_event *event, void **pages,
+ /* count all the high order buffers */
+ for (pg = 0, nbuf = 0; pg < nr_pages;) {
+ page = virt_to_page(pages[pg]);
+- if (WARN_ON_ONCE(!PagePrivate(page) && nr_pages > 1))
+- return NULL;
+- pg += 1 << page_private(page);
++ pg += buf_nr_pages(page);
+ nbuf++;
+ }
+
+@@ -117,7 +123,7 @@ bts_buffer_setup_aux(struct perf_event *event, void **pages,
+ unsigned int __nr_pages;
+
+ page = virt_to_page(pages[pg]);
+- __nr_pages = PagePrivate(page) ? 1 << page_private(page) : 1;
++ __nr_pages = buf_nr_pages(page);
+ buf->buf[nbuf].page = page;
+ buf->buf[nbuf].offset = offset;
+ buf->buf[nbuf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0);
+--
+2.20.1
+
--- /dev/null
+From acbf6f08f638cff2c287a07e09819ad9fee1c897 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 3 Jun 2019 16:56:57 +1000
+Subject: powerpc/pseries/hvconsole: Fix stack overread via udbg
+
+From: Daniel Axtens <dja@axtens.net>
+
+[ Upstream commit 934bda59f286d0221f1a3ebab7f5156a996cc37d ]
+
+While developing KASAN for 64-bit book3s, I hit the following stack
+over-read.
+
+It occurs because the hypercall to put characters onto the terminal
+takes 2 longs (128 bits/16 bytes) of characters at a time, and so
+hvc_put_chars() would unconditionally copy 16 bytes from the argument
+buffer, regardless of supplied length. However, udbg_hvc_putc() can
+call hvc_put_chars() with a single-byte buffer, leading to the error.
+
+ ==================================================================
+ BUG: KASAN: stack-out-of-bounds in hvc_put_chars+0xdc/0x110
+ Read of size 8 at addr c0000000023e7a90 by task swapper/0
+
+ CPU: 0 PID: 0 Comm: swapper Not tainted 5.2.0-rc2-next-20190528-02824-g048a6ab4835b #113
+ Call Trace:
+ dump_stack+0x104/0x154 (unreliable)
+ print_address_description+0xa0/0x30c
+ __kasan_report+0x20c/0x224
+ kasan_report+0x18/0x30
+ __asan_report_load8_noabort+0x24/0x40
+ hvc_put_chars+0xdc/0x110
+ hvterm_raw_put_chars+0x9c/0x110
+ udbg_hvc_putc+0x154/0x200
+ udbg_write+0xf0/0x240
+ console_unlock+0x868/0xd30
+ register_console+0x970/0xe90
+ register_early_udbg_console+0xf8/0x114
+ setup_arch+0x108/0x790
+ start_kernel+0x104/0x784
+ start_here_common+0x1c/0x534
+
+ Memory state around the buggy address:
+ c0000000023e7980: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+ c0000000023e7a00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 f1 f1
+ >c0000000023e7a80: f1 f1 01 f2 f2 f2 00 00 00 00 00 00 00 00 00 00
+ ^
+ c0000000023e7b00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+ c0000000023e7b80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+ ==================================================================
+
+Document that a 16-byte buffer is requred, and provide it in udbg.
+
+Signed-off-by: Daniel Axtens <dja@axtens.net>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/powerpc/platforms/pseries/hvconsole.c | 2 +-
+ drivers/tty/hvc/hvc_vio.c | 16 +++++++++++++++-
+ 2 files changed, 16 insertions(+), 2 deletions(-)
+
+diff --git a/arch/powerpc/platforms/pseries/hvconsole.c b/arch/powerpc/platforms/pseries/hvconsole.c
+index 74da18de853a..73ec15cd2708 100644
+--- a/arch/powerpc/platforms/pseries/hvconsole.c
++++ b/arch/powerpc/platforms/pseries/hvconsole.c
+@@ -62,7 +62,7 @@ EXPORT_SYMBOL(hvc_get_chars);
+ * @vtermno: The vtermno or unit_address of the adapter from which the data
+ * originated.
+ * @buf: The character buffer that contains the character data to send to
+- * firmware.
++ * firmware. Must be at least 16 bytes, even if count is less than 16.
+ * @count: Send this number of characters.
+ */
+ int hvc_put_chars(uint32_t vtermno, const char *buf, int count)
+diff --git a/drivers/tty/hvc/hvc_vio.c b/drivers/tty/hvc/hvc_vio.c
+index 59eaa620bf13..80fd06fbd712 100644
+--- a/drivers/tty/hvc/hvc_vio.c
++++ b/drivers/tty/hvc/hvc_vio.c
+@@ -107,6 +107,14 @@ static int hvterm_raw_get_chars(uint32_t vtermno, char *buf, int count)
+ return got;
+ }
+
++/**
++ * hvterm_raw_put_chars: send characters to firmware for given vterm adapter
++ * @vtermno: The virtual terminal number.
++ * @buf: The characters to send. Because of the underlying hypercall in
++ * hvc_put_chars(), this buffer must be at least 16 bytes long, even if
++ * you are sending fewer chars.
++ * @count: number of chars to send.
++ */
+ static int hvterm_raw_put_chars(uint32_t vtermno, const char *buf, int count)
+ {
+ struct hvterm_priv *pv = hvterm_privs[vtermno];
+@@ -219,6 +227,7 @@ static const struct hv_ops hvterm_hvsi_ops = {
+ static void udbg_hvc_putc(char c)
+ {
+ int count = -1;
++ unsigned char bounce_buffer[16];
+
+ if (!hvterm_privs[0])
+ return;
+@@ -229,7 +238,12 @@ static void udbg_hvc_putc(char c)
+ do {
+ switch(hvterm_privs[0]->proto) {
+ case HV_PROTOCOL_RAW:
+- count = hvterm_raw_put_chars(0, &c, 1);
++ /*
++ * hvterm_raw_put_chars requires at least a 16-byte
++ * buffer, so go via the bounce buffer
++ */
++ bounce_buffer[0] = c;
++ count = hvterm_raw_put_chars(0, bounce_buffer, 1);
+ break;
+ case HV_PROTOCOL_HVSI:
+ count = hvterm_hvsi_put_chars(0, &c, 1);
+--
+2.20.1
+
--- /dev/null
+From 27bc67f62237f2288b91ce198d12b9913182c428 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 10 Oct 2019 15:52:34 +0100
+Subject: rxrpc: Fix possible NULL pointer access in ICMP handling
+
+From: David Howells <dhowells@redhat.com>
+
+[ Upstream commit f0308fb0708078d6c1d8a4d533941a7a191af634 ]
+
+If an ICMP packet comes in on the UDP socket backing an AF_RXRPC socket as
+the UDP socket is being shut down, rxrpc_error_report() may get called to
+deal with it after sk_user_data on the UDP socket has been cleared, leading
+to a NULL pointer access when this local endpoint record gets accessed.
+
+Fix this by just returning immediately if sk_user_data was NULL.
+
+The oops looks like the following:
+
+#PF: supervisor read access in kernel mode
+#PF: error_code(0x0000) - not-present page
+...
+RIP: 0010:rxrpc_error_report+0x1bd/0x6a9
+...
+Call Trace:
+ ? sock_queue_err_skb+0xbd/0xde
+ ? __udp4_lib_err+0x313/0x34d
+ __udp4_lib_err+0x313/0x34d
+ icmp_unreach+0x1ee/0x207
+ icmp_rcv+0x25b/0x28f
+ ip_protocol_deliver_rcu+0x95/0x10e
+ ip_local_deliver+0xe9/0x148
+ __netif_receive_skb_one_core+0x52/0x6e
+ process_backlog+0xdc/0x177
+ net_rx_action+0xf9/0x270
+ __do_softirq+0x1b6/0x39a
+ ? smpboot_register_percpu_thread+0xce/0xce
+ run_ksoftirqd+0x1d/0x42
+ smpboot_thread_fn+0x19e/0x1b3
+ kthread+0xf1/0xf6
+ ? kthread_delayed_work_timer_fn+0x83/0x83
+ ret_from_fork+0x24/0x30
+
+Fixes: 17926a79320a ("[AF_RXRPC]: Provide secure RxRPC sockets for use by userspace and kernel both")
+Reported-by: syzbot+611164843bd48cc2190c@syzkaller.appspotmail.com
+Signed-off-by: David Howells <dhowells@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/rxrpc/peer_event.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c
+index dc7fdaf20445..42582a9ff81d 100644
+--- a/net/rxrpc/peer_event.c
++++ b/net/rxrpc/peer_event.c
+@@ -153,6 +153,9 @@ void rxrpc_error_report(struct sock *sk)
+ struct rxrpc_peer *peer;
+ struct sk_buff *skb;
+
++ if (unlikely(!local))
++ return;
++
+ _enter("%p{%d}", sk, local->debug_id);
+
+ skb = sock_dequeue_err_skb(sk);
+--
+2.20.1
+
--- /dev/null
+From 2a2d16b8855ba8661bab7c8fb144c37a91bfcc32 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 17 Nov 2019 14:55:38 +0100
+Subject: s390/smp: fix physical to logical CPU map for SMT
+
+From: Heiko Carstens <heiko.carstens@de.ibm.com>
+
+[ Upstream commit 72a81ad9d6d62dcb79f7e8ad66ffd1c768b72026 ]
+
+If an SMT capable system is not IPL'ed from the first CPU the setup of
+the physical to logical CPU mapping is broken: the IPL core gets CPU
+number 0, but then the next core gets CPU number 1. Correct would be
+that all SMT threads of CPU 0 get the subsequent logical CPU numbers.
+
+This is important since a lot of code (like e.g. the CPU topology
+code) assumes that CPU maps are setup like this. If the mapping is
+broken the system will not IPL due to broken topology masks:
+
+[ 1.716341] BUG: arch topology broken
+[ 1.716342] the SMT domain not a subset of the MC domain
+[ 1.716343] BUG: arch topology broken
+[ 1.716344] the MC domain not a subset of the BOOK domain
+
+This scenario can usually not happen since LPARs are always IPL'ed
+from CPU 0 and also re-IPL is intiated from CPU 0. However older
+kernels did initiate re-IPL on an arbitrary CPU. If therefore a re-IPL
+from an old kernel into a new kernel is initiated this may lead to
+crash.
+
+Fix this by setting up the physical to logical CPU mapping correctly.
+
+Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
+Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/s390/kernel/smp.c | 80 ++++++++++++++++++++++++++++--------------
+ 1 file changed, 54 insertions(+), 26 deletions(-)
+
+diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
+index df2413f26a8f..ecd24711f3aa 100644
+--- a/arch/s390/kernel/smp.c
++++ b/arch/s390/kernel/smp.c
+@@ -715,39 +715,67 @@ static void __ref smp_get_core_info(struct sclp_core_info *info, int early)
+
+ static int smp_add_present_cpu(int cpu);
+
+-static int __smp_rescan_cpus(struct sclp_core_info *info, int sysfs_add)
++static int smp_add_core(struct sclp_core_entry *core, cpumask_t *avail,
++ bool configured, bool early)
+ {
+ struct pcpu *pcpu;
+- cpumask_t avail;
+- int cpu, nr, i, j;
++ int cpu, nr, i;
+ u16 address;
+
+ nr = 0;
+- cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask);
+- cpu = cpumask_first(&avail);
+- for (i = 0; (i < info->combined) && (cpu < nr_cpu_ids); i++) {
+- if (sclp.has_core_type && info->core[i].type != boot_core_type)
++ if (sclp.has_core_type && core->type != boot_core_type)
++ return nr;
++ cpu = cpumask_first(avail);
++ address = core->core_id << smp_cpu_mt_shift;
++ for (i = 0; (i <= smp_cpu_mtid) && (cpu < nr_cpu_ids); i++) {
++ if (pcpu_find_address(cpu_present_mask, address + i))
+ continue;
+- address = info->core[i].core_id << smp_cpu_mt_shift;
+- for (j = 0; j <= smp_cpu_mtid; j++) {
+- if (pcpu_find_address(cpu_present_mask, address + j))
+- continue;
+- pcpu = pcpu_devices + cpu;
+- pcpu->address = address + j;
+- pcpu->state =
+- (cpu >= info->configured*(smp_cpu_mtid + 1)) ?
+- CPU_STATE_STANDBY : CPU_STATE_CONFIGURED;
+- smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
+- set_cpu_present(cpu, true);
+- if (sysfs_add && smp_add_present_cpu(cpu) != 0)
+- set_cpu_present(cpu, false);
+- else
+- nr++;
+- cpu = cpumask_next(cpu, &avail);
+- if (cpu >= nr_cpu_ids)
++ pcpu = pcpu_devices + cpu;
++ pcpu->address = address + i;
++ if (configured)
++ pcpu->state = CPU_STATE_CONFIGURED;
++ else
++ pcpu->state = CPU_STATE_STANDBY;
++ smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
++ set_cpu_present(cpu, true);
++ if (!early && smp_add_present_cpu(cpu) != 0)
++ set_cpu_present(cpu, false);
++ else
++ nr++;
++ cpumask_clear_cpu(cpu, avail);
++ cpu = cpumask_next(cpu, avail);
++ }
++ return nr;
++}
++
++static int __smp_rescan_cpus(struct sclp_core_info *info, bool early)
++{
++ struct sclp_core_entry *core;
++ cpumask_t avail;
++ bool configured;
++ u16 core_id;
++ int nr, i;
++
++ nr = 0;
++ cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask);
++ /*
++ * Add IPL core first (which got logical CPU number 0) to make sure
++ * that all SMT threads get subsequent logical CPU numbers.
++ */
++ if (early) {
++ core_id = pcpu_devices[0].address >> smp_cpu_mt_shift;
++ for (i = 0; i < info->configured; i++) {
++ core = &info->core[i];
++ if (core->core_id == core_id) {
++ nr += smp_add_core(core, &avail, true, early);
+ break;
++ }
+ }
+ }
++ for (i = 0; i < info->combined; i++) {
++ configured = i < info->configured;
++ nr += smp_add_core(&info->core[i], &avail, configured, early);
++ }
+ return nr;
+ }
+
+@@ -793,7 +821,7 @@ void __init smp_detect_cpus(void)
+
+ /* Add CPUs present at boot */
+ get_online_cpus();
+- __smp_rescan_cpus(info, 0);
++ __smp_rescan_cpus(info, true);
+ put_online_cpus();
+ memblock_free_early((unsigned long)info, sizeof(*info));
+ }
+@@ -1145,7 +1173,7 @@ int __ref smp_rescan_cpus(void)
+ smp_get_core_info(info, 0);
+ get_online_cpus();
+ mutex_lock(&smp_cpu_state_mutex);
+- nr = __smp_rescan_cpus(info, 1);
++ nr = __smp_rescan_cpus(info, false);
+ mutex_unlock(&smp_cpu_state_mutex);
+ put_online_cpus();
+ kfree(info);
+--
+2.20.1
+
--- /dev/null
+From 857a9311a2ece78520554227b748d3193c5b83ff Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 26 Mar 2019 00:38:33 -0700
+Subject: scsi: qedf: Do not retry ELS request if qedf_alloc_cmd fails
+
+From: Chad Dupuis <cdupuis@marvell.com>
+
+[ Upstream commit f1c43590365bac054d753d808dbbd207d09e088d ]
+
+If we cannot allocate an ELS middlepath request, simply fail instead of
+trying to delay and then reallocate. This delay logic is causing soft
+lockup messages:
+
+NMI watchdog: BUG: soft lockup - CPU#2 stuck for 22s! [kworker/2:1:7639]
+Modules linked in: xt_CHECKSUM ipt_MASQUERADE nf_nat_masquerade_ipv4 tun devlink ip6t_rpfilter ipt_REJECT nf_reject_ipv4 ip6t_REJECT nf_reject_ipv6 xt_conntrack ip_set nfnetlink ebtable_nat ebtable_broute bridge stp llc ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_security ip6table_raw iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_security iptable_raw ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter dm_service_time vfat fat rpcrdma sunrpc ib_isert iscsi_target_mod ib_iser libiscsi scsi_transport_iscsi ib_srpt target_core_mod ib_srp scsi_transport_srp ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm_intel kvm
+irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd iTCO_wdt iTCO_vendor_support qedr(OE) ib_core joydev ipmi_ssif pcspkr hpilo hpwdt sg ipmi_si ipmi_devintf ipmi_msghandler ioatdma shpchp lpc_ich wmi dca acpi_power_meter dm_multipath ip_tables xfs libcrc32c sd_mod crc_t10dif crct10dif_generic qedf(OE) libfcoe mgag200 libfc i2c_algo_bit drm_kms_helper scsi_transport_fc qede(OE) syscopyarea sysfillrect sysimgblt fb_sys_fops ttm qed(OE) drm crct10dif_pclmul e1000e crct10dif_common crc32c_intel scsi_tgt hpsa i2c_core ptp scsi_transport_sas pps_core dm_mirror dm_region_hash dm_log dm_mod
+CPU: 2 PID: 7639 Comm: kworker/2:1 Kdump: loaded Tainted: G OEL ------------ 3.10.0-861.el7.x86_64 #1
+Hardware name: HP ProLiant DL580 Gen9/ProLiant DL580 Gen9, BIOS U17 07/21/2016
+Workqueue: qedf_2_dpc qedf_handle_rrq [qedf]
+task: ffff959edd628fd0 ti: ffff959ed6f08000 task.ti: ffff959ed6f08000
+RIP: 0010:[<ffffffff8355913a>] [<ffffffff8355913a>] delay_tsc+0x3a/0x60
+RSP: 0018:ffff959ed6f0bd30 EFLAGS: 00000246
+RAX: 000000008ef5f791 RBX: 5f646d635f666465 RCX: 0000025b8ededa2f
+RDX: 000000000000025b RSI: 0000000000000002 RDI: 0000000000217d1e
+RBP: ffff959ed6f0bd30 R08: ffffffffc079aae8 R09: 0000000000000200
+R10: ffffffffc07952c6 R11: 0000000000000000 R12: 6c6c615f66646571
+R13: ffff959ed6f0bcc8 R14: ffff959ed6f0bd08 R15: ffff959e00000028
+FS: 0000000000000000(0000) GS:ffff959eff480000(0000) knlGS:0000000000000000
+CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00007f4117fa1eb0 CR3: 0000002039e66000 CR4: 00000000003607e0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+Call Trace:
+[<ffffffff8355907d>] __const_udelay+0x2d/0x30
+[<ffffffffc079444a>] qedf_initiate_els+0x13a/0x450 [qedf]
+[<ffffffffc0794210>] ? qedf_srr_compl+0x2a0/0x2a0 [qedf]
+[<ffffffffc0795337>] qedf_send_rrq+0x127/0x230 [qedf]
+[<ffffffffc078ed55>] qedf_handle_rrq+0x15/0x20 [qedf]
+[<ffffffff832b2dff>] process_one_work+0x17f/0x440
+[<ffffffff832b3ac6>] worker_thread+0x126/0x3c0
+[<ffffffff832b39a0>] ? manage_workers.isra.24+0x2a0/0x2a0
+[<ffffffff832bae31>] kthread+0xd1/0xe0
+[<ffffffff832bad60>] ? insert_kthread_work+0x40/0x40
+[<ffffffff8391f637>] ret_from_fork_nospec_begin+0x21/0x21
+[<ffffffff832bad60>] ? insert_kthread_work+0x40/0x40
+
+Signed-off-by: Chad Dupuis <cdupuis@marvell.com>
+Signed-off-by: Saurav Kashyap <skashyap@marvell.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/qedf/qedf_els.c | 16 ++++------------
+ 1 file changed, 4 insertions(+), 12 deletions(-)
+
+diff --git a/drivers/scsi/qedf/qedf_els.c b/drivers/scsi/qedf/qedf_els.c
+index 04f0c4d2e256..5178cd03666a 100644
+--- a/drivers/scsi/qedf/qedf_els.c
++++ b/drivers/scsi/qedf/qedf_els.c
+@@ -23,8 +23,6 @@ static int qedf_initiate_els(struct qedf_rport *fcport, unsigned int op,
+ int rc = 0;
+ uint32_t did, sid;
+ uint16_t xid;
+- uint32_t start_time = jiffies / HZ;
+- uint32_t current_time;
+ struct fcoe_wqe *sqe;
+ unsigned long flags;
+ u16 sqe_idx;
+@@ -59,18 +57,12 @@ static int qedf_initiate_els(struct qedf_rport *fcport, unsigned int op,
+ goto els_err;
+ }
+
+-retry_els:
+ els_req = qedf_alloc_cmd(fcport, QEDF_ELS);
+ if (!els_req) {
+- current_time = jiffies / HZ;
+- if ((current_time - start_time) > 10) {
+- QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS,
+- "els: Failed els 0x%x\n", op);
+- rc = -ENOMEM;
+- goto els_err;
+- }
+- mdelay(20 * USEC_PER_MSEC);
+- goto retry_els;
++ QEDF_INFO(&qedf->dbg_ctx, QEDF_LOG_ELS,
++ "Failed to alloc ELS request 0x%x\n", op);
++ rc = -ENOMEM;
++ goto els_err;
+ }
+
+ QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS, "initiate_els els_req = "
+--
+2.20.1
+
--- /dev/null
+From 9f69075abd69c61b38a0c3f17a52bce8789ad71d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Jun 2019 16:02:28 +0200
+Subject: selftests: rtnetlink: add addresses with fixed life time
+
+From: Florian Westphal <fw@strlen.de>
+
+[ Upstream commit 3cfa148826e3c666da1cc2a43fbe8689e2650636 ]
+
+This exercises kernel code path that deal with addresses that have
+a limited lifetime.
+
+Without previous fix, this triggers following crash on net-next:
+ BUG: KASAN: null-ptr-deref in check_lifetime+0x403/0x670
+ Read of size 8 at addr 0000000000000010 by task kworker [..]
+
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/net/rtnetlink.sh | 21 +++++++++++++++++++++
+ 1 file changed, 21 insertions(+)
+
+diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
+index e101af52d1d6..ff665de788ef 100755
+--- a/tools/testing/selftests/net/rtnetlink.sh
++++ b/tools/testing/selftests/net/rtnetlink.sh
+@@ -234,6 +234,26 @@ kci_test_route_get()
+ echo "PASS: route get"
+ }
+
++kci_test_addrlft()
++{
++ for i in $(seq 10 100) ;do
++ lft=$(((RANDOM%3) + 1))
++ ip addr add 10.23.11.$i/32 dev "$devdummy" preferred_lft $lft valid_lft $((lft+1))
++ check_err $?
++ done
++
++ sleep 5
++
++ ip addr show dev "$devdummy" | grep "10.23.11."
++ if [ $? -eq 0 ]; then
++ echo "FAIL: preferred_lft addresses remaining"
++ check_err 1
++ return
++ fi
++
++ echo "PASS: preferred_lft addresses have expired"
++}
++
+ kci_test_addrlabel()
+ {
+ ret=0
+@@ -965,6 +985,7 @@ kci_test_rtnl()
+
+ kci_test_polrouting
+ kci_test_route_get
++ kci_test_addrlft
+ kci_test_tc
+ kci_test_gre
+ kci_test_gretap
+--
+2.20.1
+
fix-compat-handling-of-ficlonerange-fideduperange-and-fs_ioc_fiemap.patch
bdev-factor-out-bdev-revalidation-into-a-common-helper.patch
bdev-refresh-bdev-size-for-disks-without-partitioning.patch
+scsi-qedf-do-not-retry-els-request-if-qedf_alloc_cmd.patch
+drm-mst-fix-mst-sideband-up-reply-failure-handling.patch
+powerpc-pseries-hvconsole-fix-stack-overread-via-udb.patch
+selftests-rtnetlink-add-addresses-with-fixed-life-ti.patch
+coresight-tmc-etf-do-not-call-smp_processor_id-from-.patch
+coresight-etb10-do-not-call-smp_processor_id-from-pr.patch
+kvm-ppc-book3s-hv-use-smp_mb-when-setting-clearing-h.patch
+rxrpc-fix-possible-null-pointer-access-in-icmp-handl.patch
+tcp-annotate-tp-rcv_nxt-lockless-reads.patch
+net-core-limit-nested-device-depth.patch
+ath9k_htc-modify-byte-order-for-an-error-message.patch
+ath9k_htc-discard-undersized-packets.patch
+xfs-periodically-yield-scrub-threads-to-the-schedule.patch
+net-add-annotations-on-hh-hh_len-lockless-accesses.patch
+ubifs-ubifs_tnc_start_commit-fix-oob-in-layout_in_ga.patch
+s390-smp-fix-physical-to-logical-cpu-map-for-smt.patch
+xen-blkback-avoid-unmapping-unmapped-grant-pages.patch
+perf-x86-intel-bts-fix-the-use-of-page_private.patch
--- /dev/null
+From 1c3f6c1668634fd4180dd2eba22513aa770d7952 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 10 Oct 2019 20:17:39 -0700
+Subject: tcp: annotate tp->rcv_nxt lockless reads
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit dba7d9b8c739df27ff3a234c81d6c6b23e3986fa ]
+
+There are few places where we fetch tp->rcv_nxt while
+this field can change from IRQ or other cpu.
+
+We need to add READ_ONCE() annotations, and also make
+sure write sides use corresponding WRITE_ONCE() to avoid
+store-tearing.
+
+Note that tcp_inq_hint() was already using READ_ONCE(tp->rcv_nxt)
+
+syzbot reported :
+
+BUG: KCSAN: data-race in tcp_poll / tcp_queue_rcv
+
+write to 0xffff888120425770 of 4 bytes by interrupt on cpu 0:
+ tcp_rcv_nxt_update net/ipv4/tcp_input.c:3365 [inline]
+ tcp_queue_rcv+0x180/0x380 net/ipv4/tcp_input.c:4638
+ tcp_rcv_established+0xbf1/0xf50 net/ipv4/tcp_input.c:5616
+ tcp_v4_do_rcv+0x381/0x4e0 net/ipv4/tcp_ipv4.c:1542
+ tcp_v4_rcv+0x1a03/0x1bf0 net/ipv4/tcp_ipv4.c:1923
+ ip_protocol_deliver_rcu+0x51/0x470 net/ipv4/ip_input.c:204
+ ip_local_deliver_finish+0x110/0x140 net/ipv4/ip_input.c:231
+ NF_HOOK include/linux/netfilter.h:305 [inline]
+ NF_HOOK include/linux/netfilter.h:299 [inline]
+ ip_local_deliver+0x133/0x210 net/ipv4/ip_input.c:252
+ dst_input include/net/dst.h:442 [inline]
+ ip_rcv_finish+0x121/0x160 net/ipv4/ip_input.c:413
+ NF_HOOK include/linux/netfilter.h:305 [inline]
+ NF_HOOK include/linux/netfilter.h:299 [inline]
+ ip_rcv+0x18f/0x1a0 net/ipv4/ip_input.c:523
+ __netif_receive_skb_one_core+0xa7/0xe0 net/core/dev.c:5004
+ __netif_receive_skb+0x37/0xf0 net/core/dev.c:5118
+ netif_receive_skb_internal+0x59/0x190 net/core/dev.c:5208
+ napi_skb_finish net/core/dev.c:5671 [inline]
+ napi_gro_receive+0x28f/0x330 net/core/dev.c:5704
+ receive_buf+0x284/0x30b0 drivers/net/virtio_net.c:1061
+
+read to 0xffff888120425770 of 4 bytes by task 7254 on cpu 1:
+ tcp_stream_is_readable net/ipv4/tcp.c:480 [inline]
+ tcp_poll+0x204/0x6b0 net/ipv4/tcp.c:554
+ sock_poll+0xed/0x250 net/socket.c:1256
+ vfs_poll include/linux/poll.h:90 [inline]
+ ep_item_poll.isra.0+0x90/0x190 fs/eventpoll.c:892
+ ep_send_events_proc+0x113/0x5c0 fs/eventpoll.c:1749
+ ep_scan_ready_list.constprop.0+0x189/0x500 fs/eventpoll.c:704
+ ep_send_events fs/eventpoll.c:1793 [inline]
+ ep_poll+0xe3/0x900 fs/eventpoll.c:1930
+ do_epoll_wait+0x162/0x180 fs/eventpoll.c:2294
+ __do_sys_epoll_pwait fs/eventpoll.c:2325 [inline]
+ __se_sys_epoll_pwait fs/eventpoll.c:2311 [inline]
+ __x64_sys_epoll_pwait+0xcd/0x170 fs/eventpoll.c:2311
+ do_syscall_64+0xcf/0x2f0 arch/x86/entry/common.c:296
+ entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+Reported by Kernel Concurrency Sanitizer on:
+CPU: 1 PID: 7254 Comm: syz-fuzzer Not tainted 5.3.0+ #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp.c | 4 ++--
+ net/ipv4/tcp_diag.c | 2 +-
+ net/ipv4/tcp_input.c | 6 +++---
+ net/ipv4/tcp_ipv4.c | 3 ++-
+ net/ipv4/tcp_minisocks.c | 7 +++++--
+ net/ipv6/tcp_ipv6.c | 3 ++-
+ 6 files changed, 15 insertions(+), 10 deletions(-)
+
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index a7a804bece7a..7561fa1bcc3e 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -488,7 +488,7 @@ static void tcp_tx_timestamp(struct sock *sk, u16 tsflags)
+ static inline bool tcp_stream_is_readable(const struct tcp_sock *tp,
+ int target, struct sock *sk)
+ {
+- return (tp->rcv_nxt - tp->copied_seq >= target) ||
++ return (READ_ONCE(tp->rcv_nxt) - tp->copied_seq >= target) ||
+ (sk->sk_prot->stream_memory_read ?
+ sk->sk_prot->stream_memory_read(sk) : false);
+ }
+@@ -2866,7 +2866,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
+ else if (tp->repair_queue == TCP_SEND_QUEUE)
+ tp->write_seq = val;
+ else if (tp->repair_queue == TCP_RECV_QUEUE)
+- tp->rcv_nxt = val;
++ WRITE_ONCE(tp->rcv_nxt, val);
+ else
+ err = -EINVAL;
+ break;
+diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
+index 81148f7a2323..c9e97f304f98 100644
+--- a/net/ipv4/tcp_diag.c
++++ b/net/ipv4/tcp_diag.c
+@@ -30,7 +30,7 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
+ } else if (sk->sk_type == SOCK_STREAM) {
+ const struct tcp_sock *tp = tcp_sk(sk);
+
+- r->idiag_rqueue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
++ r->idiag_rqueue = max_t(int, READ_ONCE(tp->rcv_nxt) - tp->copied_seq, 0);
+ r->idiag_wqueue = tp->write_seq - tp->snd_una;
+ }
+ if (info)
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index 57e8dad956ec..3a08ee81cbc3 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -3348,7 +3348,7 @@ static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq)
+
+ sock_owned_by_me((struct sock *)tp);
+ tp->bytes_received += delta;
+- tp->rcv_nxt = seq;
++ WRITE_ONCE(tp->rcv_nxt, seq);
+ }
+
+ /* Update our send window.
+@@ -5829,7 +5829,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
+ /* Ok.. it's good. Set up sequence numbers and
+ * move to established.
+ */
+- tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
++ WRITE_ONCE(tp->rcv_nxt, TCP_SKB_CB(skb)->seq + 1);
+ tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
+
+ /* RFC1323: The window in SYN & SYN/ACK segments is
+@@ -5932,7 +5932,7 @@ discard:
+ tp->tcp_header_len = sizeof(struct tcphdr);
+ }
+
+- tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
++ WRITE_ONCE(tp->rcv_nxt, TCP_SKB_CB(skb)->seq + 1);
+ tp->copied_seq = tp->rcv_nxt;
+ tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
+
+diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
+index 5553f6a833f3..6da393016c11 100644
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -2330,7 +2330,8 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
+ /* Because we don't lock the socket,
+ * we might find a transient negative value.
+ */
+- rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
++ rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
++ tp->copied_seq, 0);
+
+ seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
+ "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
+diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
+index 12affb7864d9..7ba8a90772b0 100644
+--- a/net/ipv4/tcp_minisocks.c
++++ b/net/ipv4/tcp_minisocks.c
+@@ -454,6 +454,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
+ struct tcp_request_sock *treq = tcp_rsk(req);
+ struct inet_connection_sock *newicsk;
+ struct tcp_sock *oldtp, *newtp;
++ u32 seq;
+
+ if (!newsk)
+ return NULL;
+@@ -467,8 +468,10 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
+ /* Now setup tcp_sock */
+ newtp->pred_flags = 0;
+
+- newtp->rcv_wup = newtp->copied_seq =
+- newtp->rcv_nxt = treq->rcv_isn + 1;
++ seq = treq->rcv_isn + 1;
++ newtp->rcv_wup = seq;
++ newtp->copied_seq = seq;
++ WRITE_ONCE(newtp->rcv_nxt, seq);
+ newtp->segs_in = 1;
+
+ newtp->snd_sml = newtp->snd_una =
+diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
+index 9a117a79af65..c5f4e89b6ff3 100644
+--- a/net/ipv6/tcp_ipv6.c
++++ b/net/ipv6/tcp_ipv6.c
+@@ -1839,7 +1839,8 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
+ /* Because we don't lock the socket,
+ * we might find a transient negative value.
+ */
+- rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
++ rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
++ tp->copied_seq, 0);
+
+ seq_printf(seq,
+ "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
+--
+2.20.1
+
--- /dev/null
+From 65e42bdf75d428d21e34608124dd3004a3250dc4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 20 Jul 2019 14:05:20 +0800
+Subject: ubifs: ubifs_tnc_start_commit: Fix OOB in layout_in_gaps
+
+From: Zhihao Cheng <chengzhihao1@huawei.com>
+
+[ Upstream commit 6abf57262166b4f4294667fb5206ae7ba1ba96f5 ]
+
+Running stress-test test_2 in mtd-utils on ubi device, sometimes we can
+get following oops message:
+
+ BUG: unable to handle page fault for address: ffffffff00000140
+ #PF: supervisor read access in kernel mode
+ #PF: error_code(0x0000) - not-present page
+ PGD 280a067 P4D 280a067 PUD 0
+ Oops: 0000 [#1] SMP
+ CPU: 0 PID: 60 Comm: kworker/u16:1 Kdump: loaded Not tainted 5.2.0 #13
+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0
+ -0-ga698c8995f-prebuilt.qemu.org 04/01/2014
+ Workqueue: writeback wb_workfn (flush-ubifs_0_0)
+ RIP: 0010:rb_next_postorder+0x2e/0xb0
+ Code: 80 db 03 01 48 85 ff 0f 84 97 00 00 00 48 8b 17 48 83 05 bc 80 db
+ 03 01 48 83 e2 fc 0f 84 82 00 00 00 48 83 05 b2 80 db 03 01 <48> 3b 7a
+ 10 48 89 d0 74 02 f3 c3 48 8b 52 08 48 83 05 a3 80 db 03
+ RSP: 0018:ffffc90000887758 EFLAGS: 00010202
+ RAX: ffff888129ae4700 RBX: ffff888138b08400 RCX: 0000000080800001
+ RDX: ffffffff00000130 RSI: 0000000080800024 RDI: ffff888138b08400
+ RBP: ffff888138b08400 R08: ffffea0004a6b920 R09: 0000000000000000
+ R10: ffffc90000887740 R11: 0000000000000001 R12: ffff888128d48000
+ R13: 0000000000000800 R14: 000000000000011e R15: 00000000000007c8
+ FS: 0000000000000000(0000) GS:ffff88813ba00000(0000)
+ knlGS:0000000000000000
+ CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ CR2: ffffffff00000140 CR3: 000000013789d000 CR4: 00000000000006f0
+ DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+ DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+ Call Trace:
+ destroy_old_idx+0x5d/0xa0 [ubifs]
+ ubifs_tnc_start_commit+0x4fe/0x1380 [ubifs]
+ do_commit+0x3eb/0x830 [ubifs]
+ ubifs_run_commit+0xdc/0x1c0 [ubifs]
+
+Above Oops are due to the slab-out-of-bounds happened in do-while of
+function layout_in_gaps indirectly called by ubifs_tnc_start_commit. In
+function layout_in_gaps, there is a do-while loop placing index nodes
+into the gaps created by obsolete index nodes in non-empty index LEBs
+until rest index nodes can totally be placed into pre-allocated empty
+LEBs. @c->gap_lebs points to a memory area(integer array) which records
+LEB numbers used by 'in-the-gaps' method. Whenever a fitable index LEB
+is found, corresponding lnum will be incrementally written into the
+memory area pointed by @c->gap_lebs. The size
+((@c->lst.idx_lebs + 1) * sizeof(int)) of memory area is allocated before
+do-while loop and can not be changed in the loop. But @c->lst.idx_lebs
+could be increased by function ubifs_change_lp (called by
+layout_leb_in_gaps->ubifs_find_dirty_idx_leb->get_idx_gc_leb) during the
+loop. So, sometimes oob happens when number of cycles in do-while loop
+exceeds the original value of @c->lst.idx_lebs. See detail in
+https://bugzilla.kernel.org/show_bug.cgi?id=204229.
+This patch fixes oob in layout_in_gaps.
+
+Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com>
+Signed-off-by: Richard Weinberger <richard@nod.at>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ubifs/tnc_commit.c | 34 +++++++++++++++++++++++++++-------
+ 1 file changed, 27 insertions(+), 7 deletions(-)
+
+diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c
+index dba87d09b989..95630f9f40dd 100644
+--- a/fs/ubifs/tnc_commit.c
++++ b/fs/ubifs/tnc_commit.c
+@@ -219,7 +219,7 @@ static int is_idx_node_in_use(struct ubifs_info *c, union ubifs_key *key,
+ /**
+ * layout_leb_in_gaps - layout index nodes using in-the-gaps method.
+ * @c: UBIFS file-system description object
+- * @p: return LEB number here
++ * @p: return LEB number in @c->gap_lebs[p]
+ *
+ * This function lays out new index nodes for dirty znodes using in-the-gaps
+ * method of TNC commit.
+@@ -228,7 +228,7 @@ static int is_idx_node_in_use(struct ubifs_info *c, union ubifs_key *key,
+ * This function returns the number of index nodes written into the gaps, or a
+ * negative error code on failure.
+ */
+-static int layout_leb_in_gaps(struct ubifs_info *c, int *p)
++static int layout_leb_in_gaps(struct ubifs_info *c, int p)
+ {
+ struct ubifs_scan_leb *sleb;
+ struct ubifs_scan_node *snod;
+@@ -243,7 +243,7 @@ static int layout_leb_in_gaps(struct ubifs_info *c, int *p)
+ * filled, however we do not check there at present.
+ */
+ return lnum; /* Error code */
+- *p = lnum;
++ c->gap_lebs[p] = lnum;
+ dbg_gc("LEB %d", lnum);
+ /*
+ * Scan the index LEB. We use the generic scan for this even though
+@@ -362,7 +362,7 @@ static int get_leb_cnt(struct ubifs_info *c, int cnt)
+ */
+ static int layout_in_gaps(struct ubifs_info *c, int cnt)
+ {
+- int err, leb_needed_cnt, written, *p;
++ int err, leb_needed_cnt, written, p = 0, old_idx_lebs, *gap_lebs;
+
+ dbg_gc("%d znodes to write", cnt);
+
+@@ -371,9 +371,9 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt)
+ if (!c->gap_lebs)
+ return -ENOMEM;
+
+- p = c->gap_lebs;
++ old_idx_lebs = c->lst.idx_lebs;
+ do {
+- ubifs_assert(c, p < c->gap_lebs + c->lst.idx_lebs);
++ ubifs_assert(c, p < c->lst.idx_lebs);
+ written = layout_leb_in_gaps(c, p);
+ if (written < 0) {
+ err = written;
+@@ -399,9 +399,29 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt)
+ leb_needed_cnt = get_leb_cnt(c, cnt);
+ dbg_gc("%d znodes remaining, need %d LEBs, have %d", cnt,
+ leb_needed_cnt, c->ileb_cnt);
++ /*
++ * Dynamically change the size of @c->gap_lebs to prevent
++ * oob, because @c->lst.idx_lebs could be increased by
++ * function @get_idx_gc_leb (called by layout_leb_in_gaps->
++ * ubifs_find_dirty_idx_leb) during loop. Only enlarge
++ * @c->gap_lebs when needed.
++ *
++ */
++ if (leb_needed_cnt > c->ileb_cnt && p >= old_idx_lebs &&
++ old_idx_lebs < c->lst.idx_lebs) {
++ old_idx_lebs = c->lst.idx_lebs;
++ gap_lebs = krealloc(c->gap_lebs, sizeof(int) *
++ (old_idx_lebs + 1), GFP_NOFS);
++ if (!gap_lebs) {
++ kfree(c->gap_lebs);
++ c->gap_lebs = NULL;
++ return -ENOMEM;
++ }
++ c->gap_lebs = gap_lebs;
++ }
+ } while (leb_needed_cnt > c->ileb_cnt);
+
+- *p = -1;
++ c->gap_lebs[p] = -1;
+ return 0;
+ }
+
+--
+2.20.1
+
--- /dev/null
+From 9073ea24e25c62bad1801cba51d67378478c94fd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 26 Nov 2019 16:36:05 +0100
+Subject: xen/blkback: Avoid unmapping unmapped grant pages
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: SeongJae Park <sjpark@amazon.de>
+
+[ Upstream commit f9bd84a8a845d82f9b5a081a7ae68c98a11d2e84 ]
+
+For each I/O request, blkback first maps the foreign pages for the
+request to its local pages. If an allocation of a local page for the
+mapping fails, it should unmap every mapping already made for the
+request.
+
+However, blkback's handling mechanism for the allocation failure does
+not mark the remaining foreign pages as unmapped. Therefore, the unmap
+function merely tries to unmap every valid grant page for the request,
+including the pages not mapped due to the allocation failure. On a
+system that fails the allocation frequently, this problem leads to
+following kernel crash.
+
+ [ 372.012538] BUG: unable to handle kernel NULL pointer dereference at 0000000000000001
+ [ 372.012546] IP: [<ffffffff814071ac>] gnttab_unmap_refs.part.7+0x1c/0x40
+ [ 372.012557] PGD 16f3e9067 PUD 16426e067 PMD 0
+ [ 372.012562] Oops: 0002 [#1] SMP
+ [ 372.012566] Modules linked in: act_police sch_ingress cls_u32
+ ...
+ [ 372.012746] Call Trace:
+ [ 372.012752] [<ffffffff81407204>] gnttab_unmap_refs+0x34/0x40
+ [ 372.012759] [<ffffffffa0335ae3>] xen_blkbk_unmap+0x83/0x150 [xen_blkback]
+ ...
+ [ 372.012802] [<ffffffffa0336c50>] dispatch_rw_block_io+0x970/0x980 [xen_blkback]
+ ...
+ Decompressing Linux... Parsing ELF... done.
+ Booting the kernel.
+ [ 0.000000] Initializing cgroup subsys cpuset
+
+This commit fixes this problem by marking the grant pages of the given
+request that didn't mapped due to the allocation failure as invalid.
+
+Fixes: c6cc142dac52 ("xen-blkback: use balloon pages for all mappings")
+
+Reviewed-by: David Woodhouse <dwmw@amazon.de>
+Reviewed-by: Maximilian Heyne <mheyne@amazon.de>
+Reviewed-by: Paul Durrant <pdurrant@amazon.co.uk>
+Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
+Signed-off-by: SeongJae Park <sjpark@amazon.de>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/block/xen-blkback/blkback.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
+index fd1e19f1a49f..3666afa639d1 100644
+--- a/drivers/block/xen-blkback/blkback.c
++++ b/drivers/block/xen-blkback/blkback.c
+@@ -936,6 +936,8 @@ next:
+ out_of_memory:
+ pr_alert("%s: out of memory\n", __func__);
+ put_free_pages(ring, pages_to_gnt, segs_to_map);
++ for (i = last_map; i < num; i++)
++ pages[i]->handle = BLKBACK_INVALID_HANDLE;
+ return -ENOMEM;
+ }
+
+--
+2.20.1
+
--- /dev/null
+From c14d20357188794b827cde596fdc7cca0f7d8233 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 5 Nov 2019 15:33:57 -0800
+Subject: xfs: periodically yield scrub threads to the scheduler
+
+From: Darrick J. Wong <darrick.wong@oracle.com>
+
+[ Upstream commit 5d1116d4c6af3e580f1ed0382ca5a94bd65a34cf ]
+
+Christoph Hellwig complained about the following soft lockup warning
+when running scrub after generic/175 when preemption is disabled and
+slub debugging is enabled:
+
+watchdog: BUG: soft lockup - CPU#3 stuck for 22s! [xfs_scrub:161]
+Modules linked in:
+irq event stamp: 41692326
+hardirqs last enabled at (41692325): [<ffffffff8232c3b7>] _raw_0
+hardirqs last disabled at (41692326): [<ffffffff81001c5a>] trace0
+softirqs last enabled at (41684994): [<ffffffff8260031f>] __do_e
+softirqs last disabled at (41684987): [<ffffffff81127d8c>] irq_e0
+CPU: 3 PID: 16189 Comm: xfs_scrub Not tainted 5.4.0-rc3+ #30
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.124
+RIP: 0010:_raw_spin_unlock_irqrestore+0x39/0x40
+Code: 89 f3 be 01 00 00 00 e8 d5 3a e5 fe 48 89 ef e8 ed 87 e5 f2
+RSP: 0018:ffffc9000233f970 EFLAGS: 00000286 ORIG_RAX: ffffffffff3
+RAX: ffff88813b398040 RBX: 0000000000000286 RCX: 0000000000000006
+RDX: 0000000000000006 RSI: ffff88813b3988c0 RDI: ffff88813b398040
+RBP: ffff888137958640 R08: 0000000000000001 R09: 0000000000000000
+R10: 0000000000000000 R11: 0000000000000000 R12: ffffea00042b0c00
+R13: 0000000000000001 R14: ffff88810ac32308 R15: ffff8881376fc040
+FS: 00007f6113dea700(0000) GS:ffff88813bb80000(0000) knlGS:00000
+CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00007f6113de8ff8 CR3: 000000012f290000 CR4: 00000000000006e0
+Call Trace:
+ free_debug_processing+0x1dd/0x240
+ __slab_free+0x231/0x410
+ kmem_cache_free+0x30e/0x360
+ xchk_ag_btcur_free+0x76/0xb0
+ xchk_ag_free+0x10/0x80
+ xchk_bmap_iextent_xref.isra.14+0xd9/0x120
+ xchk_bmap_iextent+0x187/0x210
+ xchk_bmap+0x2e0/0x3b0
+ xfs_scrub_metadata+0x2e7/0x500
+ xfs_ioc_scrub_metadata+0x4a/0xa0
+ xfs_file_ioctl+0x58a/0xcd0
+ do_vfs_ioctl+0xa0/0x6f0
+ ksys_ioctl+0x5b/0x90
+ __x64_sys_ioctl+0x11/0x20
+ do_syscall_64+0x4b/0x1a0
+ entry_SYSCALL_64_after_hwframe+0x49/0xbe
+
+If preemption is disabled, all metadata buffers needed to perform the
+scrub are already in memory, and there are a lot of records to check,
+it's possible that the scrub thread will run for an extended period of
+time without sleeping for IO or any other reason. Then the watchdog
+timer or the RCU stall timeout can trigger, producing the backtrace
+above.
+
+To fix this problem, call cond_resched() from the scrub thread so that
+we back out to the scheduler whenever necessary.
+
+Reported-by: Christoph Hellwig <hch@infradead.org>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/xfs/scrub/common.h | 9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
+index 2d4324d12f9a..51ea2ab124b7 100644
+--- a/fs/xfs/scrub/common.h
++++ b/fs/xfs/scrub/common.h
+@@ -14,8 +14,15 @@
+ static inline bool
+ xchk_should_terminate(
+ struct xfs_scrub *sc,
+- int *error)
++ int *error)
+ {
++ /*
++ * If preemption is disabled, we need to yield to the scheduler every
++ * few seconds so that we don't run afoul of the soft lockup watchdog
++ * or RCU stall detector.
++ */
++ cond_resched();
++
+ if (fatal_signal_pending(current)) {
+ if (*error == 0)
+ *error = -EAGAIN;
+--
+2.20.1
+