fixes for 4.19

author Sasha Levin <sashal@kernel.org>

Tue, 7 Jan 2020 18:07:24 +0000 (13:07 -0500)

committer Sasha Levin <sashal@kernel.org>

Tue, 7 Jan 2020 18:07:24 +0000 (13:07 -0500)
author Sasha Levin <sashal@kernel.org>
Tue, 7 Jan 2020 18:07:24 +0000 (13:07 -0500)
committer Sasha Levin <sashal@kernel.org>
Tue, 7 Jan 2020 18:07:24 +0000 (13:07 -0500)
diff --git a/queue-4.19/ath9k_htc-discard-undersized-packets.patch b/queue-4.19/ath9k_htc-discard-undersized-packets.patch

new file mode 100644 (file)

index 0000000..84d81c7
--- /dev/null
+++ b/queue-4.19/ath9k_htc-discard-undersized-packets.patch
@@ -0,0 +1,124 @@
+From 29bf70edaca4c49955ed9ec154d1dc8bf0a483ac Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 27 Sep 2019 11:51:46 +0900
+Subject: ath9k_htc: Discard undersized packets
+
+From: Masashi Honma <masashi.honma@gmail.com>
+
+[ Upstream commit cd486e627e67ee9ab66914d36d3127ef057cc010 ]
+
+Sometimes the hardware will push small packets that trigger a WARN_ON
+in mac80211. Discard them early to avoid this issue.
+
+This patch ports 2 patches from ath9k to ath9k_htc.
+commit 3c0efb745a172bfe96459e20cbd37b0c945d5f8d "ath9k: discard
+undersized packets".
+commit df5c4150501ee7e86383be88f6490d970adcf157 "ath9k: correctly
+handle short radar pulses".
+
+[  112.835889] ------------[ cut here ]------------
+[  112.835971] WARNING: CPU: 5 PID: 0 at net/mac80211/rx.c:804 ieee80211_rx_napi+0xaac/0xb40 [mac80211]
+[  112.835973] Modules linked in: ath9k_htc ath9k_common ath9k_hw ath mac80211 cfg80211 libarc4 nouveau snd_hda_codec_hdmi intel_rapl_msr intel_rapl_common x86_pkg_temp_thermal intel_powerclamp coretemp snd_hda_codec_realtek snd_hda_codec_generic ledtrig_audio snd_hda_intel snd_hda_codec video snd_hda_core ttm snd_hwdep drm_kms_helper snd_pcm crct10dif_pclmul snd_seq_midi drm snd_seq_midi_event crc32_pclmul snd_rawmidi ghash_clmulni_intel snd_seq aesni_intel aes_x86_64 crypto_simd cryptd snd_seq_device glue_helper snd_timer sch_fq_codel i2c_algo_bit fb_sys_fops snd input_leds syscopyarea sysfillrect sysimgblt intel_cstate mei_me intel_rapl_perf soundcore mxm_wmi lpc_ich mei kvm_intel kvm mac_hid irqbypass parport_pc ppdev lp parport ip_tables x_tables autofs4 hid_generic usbhid hid raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear e1000e ahci libahci wmi
+[  112.836022] CPU: 5 PID: 0 Comm: swapper/5 Not tainted 5.3.0-wt #1
+[  112.836023] Hardware name: MouseComputer Co.,Ltd. X99-S01/X99-S01, BIOS 1.0C-W7 04/01/2015
+[  112.836056] RIP: 0010:ieee80211_rx_napi+0xaac/0xb40 [mac80211]
+[  112.836059] Code: 00 00 66 41 89 86 b0 00 00 00 e9 c8 fa ff ff 4c 89 b5 40 ff ff ff 49 89 c6 e9 c9 fa ff ff 48 c7 c7 e0 a2 a5 c0 e8 47 41 b0 e9 <0f> 0b 48 89 df e8 5a 94 2d ea e9 02 f9 ff ff 41 39 c1 44 89 85 60
+[  112.836060] RSP: 0018:ffffaa6180220da8 EFLAGS: 00010286
+[  112.836062] RAX: 0000000000000024 RBX: ffff909a20eeda00 RCX: 0000000000000000
+[  112.836064] RDX: 0000000000000000 RSI: ffff909a2f957448 RDI: ffff909a2f957448
+[  112.836065] RBP: ffffaa6180220e78 R08: 00000000000006e9 R09: 0000000000000004
+[  112.836066] R10: 000000000000000a R11: 0000000000000001 R12: 0000000000000000
+[  112.836068] R13: ffff909a261a47a0 R14: 0000000000000000 R15: 0000000000000004
+[  112.836070] FS:  0000000000000000(0000) GS:ffff909a2f940000(0000) knlGS:0000000000000000
+[  112.836071] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[  112.836073] CR2: 00007f4e3ffffa08 CR3: 00000001afc0a006 CR4: 00000000001606e0
+[  112.836074] Call Trace:
+[  112.836076]  <IRQ>
+[  112.836083]  ? finish_td+0xb3/0xf0
+[  112.836092]  ? ath9k_rx_prepare.isra.11+0x22f/0x2a0 [ath9k_htc]
+[  112.836099]  ath9k_rx_tasklet+0x10b/0x1d0 [ath9k_htc]
+[  112.836105]  tasklet_action_common.isra.22+0x63/0x110
+[  112.836108]  tasklet_action+0x22/0x30
+[  112.836115]  __do_softirq+0xe4/0x2da
+[  112.836118]  irq_exit+0xae/0xb0
+[  112.836121]  do_IRQ+0x86/0xe0
+[  112.836125]  common_interrupt+0xf/0xf
+[  112.836126]  </IRQ>
+[  112.836130] RIP: 0010:cpuidle_enter_state+0xa9/0x440
+[  112.836133] Code: 3d bc 20 38 55 e8 f7 1d 84 ff 49 89 c7 0f 1f 44 00 00 31 ff e8 28 29 84 ff 80 7d d3 00 0f 85 e6 01 00 00 fb 66 0f 1f 44 00 00 <45> 85 ed 0f 89 ff 01 00 00 41 c7 44 24 10 00 00 00 00 48 83 c4 18
+[  112.836134] RSP: 0018:ffffaa61800e3e48 EFLAGS: 00000246 ORIG_RAX: ffffffffffffffde
+[  112.836136] RAX: ffff909a2f96b340 RBX: ffffffffabb58200 RCX: 000000000000001f
+[  112.836137] RDX: 0000001a458adc5d RSI: 0000000026c9b581 RDI: 0000000000000000
+[  112.836139] RBP: ffffaa61800e3e88 R08: 0000000000000002 R09: 000000000002abc0
+[  112.836140] R10: ffffaa61800e3e18 R11: 000000000000002d R12: ffffca617fb40b00
+[  112.836141] R13: 0000000000000002 R14: ffffffffabb582d8 R15: 0000001a458adc5d
+[  112.836145]  ? cpuidle_enter_state+0x98/0x440
+[  112.836149]  ? menu_select+0x370/0x600
+[  112.836151]  cpuidle_enter+0x2e/0x40
+[  112.836154]  call_cpuidle+0x23/0x40
+[  112.836156]  do_idle+0x204/0x280
+[  112.836159]  cpu_startup_entry+0x1d/0x20
+[  112.836164]  start_secondary+0x167/0x1c0
+[  112.836169]  secondary_startup_64+0xa4/0xb0
+[  112.836173] ---[ end trace 9f4cd18479cc5ae5 ]---
+
+Signed-off-by: Masashi Honma <masashi.honma@gmail.com>
+Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/wireless/ath/ath9k/htc_drv_txrx.c | 23 +++++++++++++++----
+ 1 file changed, 19 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c
+index baacbd11eb43..b5d7ef4da17f 100644
+--- a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c
++++ b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c
+@@ -973,6 +973,8 @@ static bool ath9k_rx_prepare(struct ath9k_htc_priv *priv,
+       struct ath_htc_rx_status *rxstatus;
+       struct ath_rx_status rx_stats;
+       bool decrypt_error = false;
++      __be16 rs_datalen;
++      bool is_phyerr;
+ 
+       if (skb->len < HTC_RX_FRAME_HEADER_SIZE) {
+               ath_err(common, "Corrupted RX frame, dropping (len: %d)\n",
+@@ -982,11 +984,24 @@ static bool ath9k_rx_prepare(struct ath9k_htc_priv *priv,
+ 
+       rxstatus = (struct ath_htc_rx_status *)skb->data;
+ 
+-      if (be16_to_cpu(rxstatus->rs_datalen) -
+-          (skb->len - HTC_RX_FRAME_HEADER_SIZE) != 0) {
++      rs_datalen = be16_to_cpu(rxstatus->rs_datalen);
++      if (unlikely(rs_datalen -
++          (skb->len - HTC_RX_FRAME_HEADER_SIZE) != 0)) {
+               ath_err(common,
+                       "Corrupted RX data len, dropping (dlen: %d, skblen: %d)\n",
+-                      be16_to_cpu(rxstatus->rs_datalen), skb->len);
++                      rs_datalen, skb->len);
++              goto rx_next;
++      }
++
++      is_phyerr = rxstatus->rs_status & ATH9K_RXERR_PHY;
++      /*
++       * Discard zero-length packets and packets smaller than an ACK
++       * which are not PHY_ERROR (short radar pulses have a length of 3)
++       */
++      if (unlikely(!rs_datalen || (rs_datalen < 10 && !is_phyerr))) {
++              ath_warn(common,
++                       "Short RX data len, dropping (dlen: %d)\n",
++                       rs_datalen);
+               goto rx_next;
+       }
+ 
+@@ -1011,7 +1026,7 @@ static bool ath9k_rx_prepare(struct ath9k_htc_priv *priv,
+        * Process PHY errors and return so that the packet
+        * can be dropped.
+        */
+-      if (rx_stats.rs_status & ATH9K_RXERR_PHY) {
++      if (unlikely(is_phyerr)) {
+               /* TODO: Not using DFS processing now. */
+               if (ath_cmn_process_fft(&priv->spec_priv, hdr,
+                                   &rx_stats, rx_status->mactime)) {
+-- 
+2.20.1
+
diff --git a/queue-4.19/ath9k_htc-modify-byte-order-for-an-error-message.patch b/queue-4.19/ath9k_htc-modify-byte-order-for-an-error-message.patch

new file mode 100644 (file)

index 0000000..c5a002c
--- /dev/null
+++ b/queue-4.19/ath9k_htc-modify-byte-order-for-an-error-message.patch
@@ -0,0 +1,34 @@
+From 48f6231084ef1c482c7b9dd442d8c0111b0ca403 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 27 Sep 2019 11:51:45 +0900
+Subject: ath9k_htc: Modify byte order for an error message
+
+From: Masashi Honma <masashi.honma@gmail.com>
+
+[ Upstream commit e01fddc19d215f6ad397894ec2a851d99bf154e2 ]
+
+rs_datalen is be16 so we need to convert it before printing.
+
+Signed-off-by: Masashi Honma <masashi.honma@gmail.com>
+Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/wireless/ath/ath9k/htc_drv_txrx.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c
+index 799010ed04e0..baacbd11eb43 100644
+--- a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c
++++ b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c
+@@ -986,7 +986,7 @@ static bool ath9k_rx_prepare(struct ath9k_htc_priv *priv,
+           (skb->len - HTC_RX_FRAME_HEADER_SIZE) != 0) {
+               ath_err(common,
+                       "Corrupted RX data len, dropping (dlen: %d, skblen: %d)\n",
+-                      rxstatus->rs_datalen, skb->len);
++                      be16_to_cpu(rxstatus->rs_datalen), skb->len);
+               goto rx_next;
+       }
+ 
+-- 
+2.20.1
+
diff --git a/queue-4.19/coresight-etb10-do-not-call-smp_processor_id-from-pr.patch b/queue-4.19/coresight-etb10-do-not-call-smp_processor_id-from-pr.patch

new file mode 100644 (file)

index 0000000..bacbe8c
--- /dev/null
+++ b/queue-4.19/coresight-etb10-do-not-call-smp_processor_id-from-pr.patch
@@ -0,0 +1,49 @@
+From 7dd3619af053abf402857259709330d8ab1514d0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 Jun 2019 16:12:36 -0600
+Subject: coresight: etb10: Do not call smp_processor_id from preemptible
+
+From: Suzuki K Poulose <suzuki.poulose@arm.com>
+
+[ Upstream commit 730766bae3280a25d40ea76a53dc6342e84e6513 ]
+
+During a perf session we try to allocate buffers on the "node" associated
+with the CPU the event is bound to. If it is not bound to a CPU, we
+use the current CPU node, using smp_processor_id(). However this is unsafe
+in a pre-emptible context and could generate the splats as below :
+
+ BUG: using smp_processor_id() in preemptible [00000000] code: perf/2544
+
+Use NUMA_NO_NODE hint instead of using the current node for events
+not bound to CPUs.
+
+Fixes: 2997aa4063d97fdb39 ("coresight: etb10: implementing AUX API")
+Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
+Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
+Cc: stable <stable@vger.kernel.org> # 4.6+
+Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
+Link: https://lore.kernel.org/r/20190620221237.3536-5-mathieu.poirier@linaro.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/hwtracing/coresight/coresight-etb10.c | 4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/drivers/hwtracing/coresight/coresight-etb10.c
+index 0dad8626bcfb..0a59bf3af40b 100644
+--- a/drivers/hwtracing/coresight/coresight-etb10.c
++++ b/drivers/hwtracing/coresight/coresight-etb10.c
+@@ -275,9 +275,7 @@ static void *etb_alloc_buffer(struct coresight_device *csdev, int cpu,
+       int node;
+       struct cs_buffers *buf;
+ 
+-      if (cpu == -1)
+-              cpu = smp_processor_id();
+-      node = cpu_to_node(cpu);
++      node = (event->cpu == -1) ? NUMA_NO_NODE : cpu_to_node(event->cpu);
+ 
+       buf = kzalloc_node(sizeof(struct cs_buffers), GFP_KERNEL, node);
+       if (!buf)
+-- 
+2.20.1
+
diff --git a/queue-4.19/coresight-tmc-etf-do-not-call-smp_processor_id-from-.patch b/queue-4.19/coresight-tmc-etf-do-not-call-smp_processor_id-from-.patch

new file mode 100644 (file)

index 0000000..66a2fde
--- /dev/null
+++ b/queue-4.19/coresight-tmc-etf-do-not-call-smp_processor_id-from-.patch
@@ -0,0 +1,68 @@
+From 41d6b7b0ed03946188d6f6eea3e432bce64d3c0b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 Jun 2019 16:12:35 -0600
+Subject: coresight: tmc-etf: Do not call smp_processor_id from preemptible
+
+From: Suzuki K Poulose <suzuki.poulose@arm.com>
+
+[ Upstream commit 024c1fd9dbcc1d8a847f1311f999d35783921b7f ]
+
+During a perf session we try to allocate buffers on the "node" associated
+with the CPU the event is bound to. If it is not bound to a CPU, we
+use the current CPU node, using smp_processor_id(). However this is unsafe
+in a pre-emptible context and could generate the splats as below :
+
+ BUG: using smp_processor_id() in preemptible [00000000] code: perf/2544
+ caller is tmc_alloc_etf_buffer+0x5c/0x60
+ CPU: 2 PID: 2544 Comm: perf Not tainted 5.1.0-rc6-147786-g116841e #344
+ Hardware name: ARM LTD ARM Juno Development Platform/ARM Juno Development Platform, BIOS EDK II Feb  1 2019
+ Call trace:
+  dump_backtrace+0x0/0x150
+  show_stack+0x14/0x20
+  dump_stack+0x9c/0xc4
+  debug_smp_processor_id+0x10c/0x110
+  tmc_alloc_etf_buffer+0x5c/0x60
+  etm_setup_aux+0x1c4/0x230
+  rb_alloc_aux+0x1b8/0x2b8
+  perf_mmap+0x35c/0x478
+  mmap_region+0x34c/0x4f0
+  do_mmap+0x2d8/0x418
+  vm_mmap_pgoff+0xd0/0xf8
+  ksys_mmap_pgoff+0x88/0xf8
+  __arm64_sys_mmap+0x28/0x38
+  el0_svc_handler+0xd8/0x138
+  el0_svc+0x8/0xc
+
+Use NUMA_NO_NODE hint instead of using the current node for events
+not bound to CPUs.
+
+Fixes: 2e499bbc1a929ac ("coresight: tmc: implementing TMC-ETF AUX space API")
+Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
+Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
+Cc: stable <stable@vger.kernel.org> # 4.7+
+Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
+Link: https://lore.kernel.org/r/20190620221237.3536-4-mathieu.poirier@linaro.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/hwtracing/coresight/coresight-tmc-etf.c | 4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c
+index e31061308e19..4644ac5582cf 100644
+--- a/drivers/hwtracing/coresight/coresight-tmc-etf.c
++++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c
+@@ -304,9 +304,7 @@ static void *tmc_alloc_etf_buffer(struct coresight_device *csdev, int cpu,
+       int node;
+       struct cs_buffers *buf;
+ 
+-      if (cpu == -1)
+-              cpu = smp_processor_id();
+-      node = cpu_to_node(cpu);
++      node = (event->cpu == -1) ? NUMA_NO_NODE : cpu_to_node(event->cpu);
+ 
+       /* Allocate memory structure for interaction with Perf */
+       buf = kzalloc_node(sizeof(struct cs_buffers), GFP_KERNEL, node);
+-- 
+2.20.1
+
diff --git a/queue-4.19/drm-mst-fix-mst-sideband-up-reply-failure-handling.patch b/queue-4.19/drm-mst-fix-mst-sideband-up-reply-failure-handling.patch

new file mode 100644 (file)

index 0000000..01ead5e
--- /dev/null
+++ b/queue-4.19/drm-mst-fix-mst-sideband-up-reply-failure-handling.patch
@@ -0,0 +1,83 @@
+From 7ab2b97949080bd284fc2d112cdc5e4306e52fcb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 24 May 2019 00:24:33 +0300
+Subject: drm/mst: Fix MST sideband up-reply failure handling
+
+From: Imre Deak <imre.deak@intel.com>
+
+[ Upstream commit d8fd3722207f154b53c80eee2cf4977c3fc25a92 ]
+
+Fix the breakage resulting in the stacktrace below, due to tx queue
+being full when trying to send an up-reply. txmsg->seqno is -1 in this
+case leading to a corruption of the mstb object by
+
+       txmsg->dst->tx_slots[txmsg->seqno] = NULL;
+
+in process_single_up_tx_qlock().
+
+[  +0,005162] [drm:process_single_tx_qlock [drm_kms_helper]] set_hdr_from_dst_qlock: failed to find slot
+[  +0,000015] [drm:drm_dp_send_up_ack_reply.constprop.19 [drm_kms_helper]] failed to send msg in q -11
+[  +0,000939] BUG: kernel NULL pointer dereference, address: 00000000000005a0
+[  +0,006982] #PF: supervisor write access in kernel mode
+[  +0,005223] #PF: error_code(0x0002) - not-present page
+[  +0,005135] PGD 0 P4D 0
+[  +0,002581] Oops: 0002 [#1] PREEMPT SMP NOPTI
+[  +0,004359] CPU: 1 PID: 1200 Comm: kworker/u16:3 Tainted: G     U            5.2.0-rc1+ #410
+[  +0,008433] Hardware name: Intel Corporation Ice Lake Client Platform/IceLake U DDR4 SODIMM PD RVP, BIOS ICLSFWR1.R00.3175.A00.1904261428 04/26/2019
+[  +0,013323] Workqueue: i915-dp i915_digport_work_func [i915]
+[  +0,005676] RIP: 0010:queue_work_on+0x19/0x70
+[  +0,004372] Code: ff ff ff 0f 1f 40 00 66 2e 0f 1f 84 00 00 00 00 00 41 56 49 89 f6 41 55 41 89 fd 41 54 55 53 48 89 d3 9c 5d fa e8 e7 81 0c 00 <f0> 48 0f ba 2b 00 73 31 45 31 e4 f7 c5 00 02 00 00 74 13 e8 cf 7f
+[  +0,018750] RSP: 0018:ffffc900007dfc50 EFLAGS: 00010006
+[  +0,005222] RAX: 0000000000000046 RBX: 00000000000005a0 RCX: 0000000000000001
+[  +0,007133] RDX: 000000000001b608 RSI: 0000000000000000 RDI: ffffffff82121972
+[  +0,007129] RBP: 0000000000000202 R08: 0000000000000000 R09: 0000000000000001
+[  +0,007129] R10: 0000000000000000 R11: 0000000000000000 R12: ffff88847bfa5096
+[  +0,007131] R13: 0000000000000010 R14: ffff88849c08f3f8 R15: 0000000000000000
+[  +0,007128] FS:  0000000000000000(0000) GS:ffff88849dc80000(0000) knlGS:0000000000000000
+[  +0,008083] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[  +0,005749] CR2: 00000000000005a0 CR3: 0000000005210006 CR4: 0000000000760ee0
+[  +0,007128] PKRU: 55555554
+[  +0,002722] Call Trace:
+[  +0,002458]  drm_dp_mst_handle_up_req+0x517/0x540 [drm_kms_helper]
+[  +0,006197]  ? drm_dp_mst_hpd_irq+0x5b/0x9c0 [drm_kms_helper]
+[  +0,005764]  drm_dp_mst_hpd_irq+0x5b/0x9c0 [drm_kms_helper]
+[  +0,005623]  ? intel_dp_hpd_pulse+0x205/0x370 [i915]
+[  +0,005018]  intel_dp_hpd_pulse+0x205/0x370 [i915]
+[  +0,004836]  i915_digport_work_func+0xbb/0x140 [i915]
+[  +0,005108]  process_one_work+0x245/0x610
+[  +0,004027]  worker_thread+0x37/0x380
+[  +0,003684]  ? process_one_work+0x610/0x610
+[  +0,004184]  kthread+0x119/0x130
+[  +0,003240]  ? kthread_park+0x80/0x80
+[  +0,003668]  ret_from_fork+0x24/0x50
+
+Cc: Lyude Paul <lyude@redhat.com>
+Cc: Dave Airlie <airlied@redhat.com>
+Signed-off-by: Imre Deak <imre.deak@intel.com>
+Reviewed-by: Lyude Paul <lyude@redhat.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20190523212433.9058-1-imre.deak@intel.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/drm_dp_mst_topology.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c
+index 65f58e23e03d..77347a258f6c 100644
+--- a/drivers/gpu/drm/drm_dp_mst_topology.c
++++ b/drivers/gpu/drm/drm_dp_mst_topology.c
+@@ -1582,7 +1582,11 @@ static void process_single_up_tx_qlock(struct drm_dp_mst_topology_mgr *mgr,
+       if (ret != 1)
+               DRM_DEBUG_KMS("failed to send msg in q %d\n", ret);
+ 
+-      txmsg->dst->tx_slots[txmsg->seqno] = NULL;
++      if (txmsg->seqno != -1) {
++              WARN_ON((unsigned int)txmsg->seqno >
++                      ARRAY_SIZE(txmsg->dst->tx_slots));
++              txmsg->dst->tx_slots[txmsg->seqno] = NULL;
++      }
+ }
+ 
+ static void drm_dp_queue_down_tx(struct drm_dp_mst_topology_mgr *mgr,
+-- 
+2.20.1
+
diff --git a/queue-4.19/kvm-ppc-book3s-hv-use-smp_mb-when-setting-clearing-h.patch b/queue-4.19/kvm-ppc-book3s-hv-use-smp_mb-when-setting-clearing-h.patch

new file mode 100644 (file)

index 0000000..4f61e3a
--- /dev/null
+++ b/queue-4.19/kvm-ppc-book3s-hv-use-smp_mb-when-setting-clearing-h.patch
@@ -0,0 +1,466 @@
+From 65ef3476172506d84aa78a75d33d0cccd452d727 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 11 Sep 2019 17:31:55 -0500
+Subject: KVM: PPC: Book3S HV: use smp_mb() when setting/clearing host_ipi flag
+
+From: Michael Roth <mdroth@linux.vnet.ibm.com>
+
+[ Upstream commit 3a83f677a6eeff65751b29e3648d7c69c3be83f3 ]
+
+On a 2-socket Power9 system with 32 cores/128 threads (SMT4) and 1TB
+of memory running the following guest configs:
+
+  guest A:
+    - 224GB of memory
+    - 56 VCPUs (sockets=1,cores=28,threads=2), where:
+      VCPUs 0-1 are pinned to CPUs 0-3,
+      VCPUs 2-3 are pinned to CPUs 4-7,
+      ...
+      VCPUs 54-55 are pinned to CPUs 108-111
+
+  guest B:
+    - 4GB of memory
+    - 4 VCPUs (sockets=1,cores=4,threads=1)
+
+with the following workloads (with KSM and THP enabled in all):
+
+  guest A:
+    stress --cpu 40 --io 20 --vm 20 --vm-bytes 512M
+
+  guest B:
+    stress --cpu 4 --io 4 --vm 4 --vm-bytes 512M
+
+  host:
+    stress --cpu 4 --io 4 --vm 2 --vm-bytes 256M
+
+the below soft-lockup traces were observed after an hour or so and
+persisted until the host was reset (this was found to be reliably
+reproducible for this configuration, for kernels 4.15, 4.18, 5.0,
+and 5.3-rc5):
+
+  [ 1253.183290] rcu: INFO: rcu_sched self-detected stall on CPU
+  [ 1253.183319] rcu:     124-....: (5250 ticks this GP) idle=10a/1/0x4000000000000002 softirq=5408/5408 fqs=1941
+  [ 1256.287426] watchdog: BUG: soft lockup - CPU#105 stuck for 23s! [CPU 52/KVM:19709]
+  [ 1264.075773] watchdog: BUG: soft lockup - CPU#24 stuck for 23s! [worker:19913]
+  [ 1264.079769] watchdog: BUG: soft lockup - CPU#31 stuck for 23s! [worker:20331]
+  [ 1264.095770] watchdog: BUG: soft lockup - CPU#45 stuck for 23s! [worker:20338]
+  [ 1264.131773] watchdog: BUG: soft lockup - CPU#64 stuck for 23s! [avocado:19525]
+  [ 1280.408480] watchdog: BUG: soft lockup - CPU#124 stuck for 22s! [ksmd:791]
+  [ 1316.198012] rcu: INFO: rcu_sched self-detected stall on CPU
+  [ 1316.198032] rcu:     124-....: (21003 ticks this GP) idle=10a/1/0x4000000000000002 softirq=5408/5408 fqs=8243
+  [ 1340.411024] watchdog: BUG: soft lockup - CPU#124 stuck for 22s! [ksmd:791]
+  [ 1379.212609] rcu: INFO: rcu_sched self-detected stall on CPU
+  [ 1379.212629] rcu:     124-....: (36756 ticks this GP) idle=10a/1/0x4000000000000002 softirq=5408/5408 fqs=14714
+  [ 1404.413615] watchdog: BUG: soft lockup - CPU#124 stuck for 22s! [ksmd:791]
+  [ 1442.227095] rcu: INFO: rcu_sched self-detected stall on CPU
+  [ 1442.227115] rcu:     124-....: (52509 ticks this GP) idle=10a/1/0x4000000000000002 softirq=5408/5408 fqs=21403
+  [ 1455.111787] INFO: task worker:19907 blocked for more than 120 seconds.
+  [ 1455.111822]       Tainted: G             L    5.3.0-rc5-mdr-vanilla+ #1
+  [ 1455.111833] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
+  [ 1455.111884] INFO: task worker:19908 blocked for more than 120 seconds.
+  [ 1455.111905]       Tainted: G             L    5.3.0-rc5-mdr-vanilla+ #1
+  [ 1455.111925] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
+  [ 1455.111966] INFO: task worker:20328 blocked for more than 120 seconds.
+  [ 1455.111986]       Tainted: G             L    5.3.0-rc5-mdr-vanilla+ #1
+  [ 1455.111998] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
+  [ 1455.112048] INFO: task worker:20330 blocked for more than 120 seconds.
+  [ 1455.112068]       Tainted: G             L    5.3.0-rc5-mdr-vanilla+ #1
+  [ 1455.112097] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
+  [ 1455.112138] INFO: task worker:20332 blocked for more than 120 seconds.
+  [ 1455.112159]       Tainted: G             L    5.3.0-rc5-mdr-vanilla+ #1
+  [ 1455.112179] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
+  [ 1455.112210] INFO: task worker:20333 blocked for more than 120 seconds.
+  [ 1455.112231]       Tainted: G             L    5.3.0-rc5-mdr-vanilla+ #1
+  [ 1455.112242] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
+  [ 1455.112282] INFO: task worker:20335 blocked for more than 120 seconds.
+  [ 1455.112303]       Tainted: G             L    5.3.0-rc5-mdr-vanilla+ #1
+  [ 1455.112332] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
+  [ 1455.112372] INFO: task worker:20336 blocked for more than 120 seconds.
+  [ 1455.112392]       Tainted: G             L    5.3.0-rc5-mdr-vanilla+ #1
+
+CPUs 45, 24, and 124 are stuck on spin locks, likely held by
+CPUs 105 and 31.
+
+CPUs 105 and 31 are stuck in smp_call_function_many(), waiting on
+target CPU 42. For instance:
+
+  # CPU 105 registers (via xmon)
+  R00 = c00000000020b20c   R16 = 00007d1bcd800000
+  R01 = c00000363eaa7970   R17 = 0000000000000001
+  R02 = c0000000019b3a00   R18 = 000000000000006b
+  R03 = 000000000000002a   R19 = 00007d537d7aecf0
+  R04 = 000000000000002a   R20 = 60000000000000e0
+  R05 = 000000000000002a   R21 = 0801000000000080
+  R06 = c0002073fb0caa08   R22 = 0000000000000d60
+  R07 = c0000000019ddd78   R23 = 0000000000000001
+  R08 = 000000000000002a   R24 = c00000000147a700
+  R09 = 0000000000000001   R25 = c0002073fb0ca908
+  R10 = c000008ffeb4e660   R26 = 0000000000000000
+  R11 = c0002073fb0ca900   R27 = c0000000019e2464
+  R12 = c000000000050790   R28 = c0000000000812b0
+  R13 = c000207fff623e00   R29 = c0002073fb0ca808
+  R14 = 00007d1bbee00000   R30 = c0002073fb0ca800
+  R15 = 00007d1bcd600000   R31 = 0000000000000800
+  pc  = c00000000020b260 smp_call_function_many+0x3d0/0x460
+  cfar= c00000000020b270 smp_call_function_many+0x3e0/0x460
+  lr  = c00000000020b20c smp_call_function_many+0x37c/0x460
+  msr = 900000010288b033   cr  = 44024824
+  ctr = c000000000050790   xer = 0000000000000000   trap =  100
+
+CPU 42 is running normally, doing VCPU work:
+
+  # CPU 42 stack trace (via xmon)
+  [link register   ] c00800001be17188 kvmppc_book3s_radix_page_fault+0x90/0x2b0 [kvm_hv]
+  [c000008ed3343820] c000008ed3343850 (unreliable)
+  [c000008ed33438d0] c00800001be11b6c kvmppc_book3s_hv_page_fault+0x264/0xe30 [kvm_hv]
+  [c000008ed33439d0] c00800001be0d7b4 kvmppc_vcpu_run_hv+0x8dc/0xb50 [kvm_hv]
+  [c000008ed3343ae0] c00800001c10891c kvmppc_vcpu_run+0x34/0x48 [kvm]
+  [c000008ed3343b00] c00800001c10475c kvm_arch_vcpu_ioctl_run+0x244/0x420 [kvm]
+  [c000008ed3343b90] c00800001c0f5a78 kvm_vcpu_ioctl+0x470/0x7c8 [kvm]
+  [c000008ed3343d00] c000000000475450 do_vfs_ioctl+0xe0/0xc70
+  [c000008ed3343db0] c0000000004760e4 ksys_ioctl+0x104/0x120
+  [c000008ed3343e00] c000000000476128 sys_ioctl+0x28/0x80
+  [c000008ed3343e20] c00000000000b388 system_call+0x5c/0x70
+  --- Exception: c00 (System Call) at 00007d545cfd7694
+  SP (7d53ff7edf50) is in userspace
+
+It was subsequently found that ipi_message[PPC_MSG_CALL_FUNCTION]
+was set for CPU 42 by at least 1 of the CPUs waiting in
+smp_call_function_many(), but somehow the corresponding
+call_single_queue entries were never processed by CPU 42, causing the
+callers to spin in csd_lock_wait() indefinitely.
+
+Nick Piggin suggested something similar to the following sequence as
+a possible explanation (interleaving of CALL_FUNCTION/RESCHEDULE
+IPI messages seems to be most common, but any mix of CALL_FUNCTION and
+!CALL_FUNCTION messages could trigger it):
+
+    CPU
+      X: smp_muxed_ipi_set_message():
+      X:   smp_mb()
+      X:   message[RESCHEDULE] = 1
+      X: doorbell_global_ipi(42):
+      X:   kvmppc_set_host_ipi(42, 1)
+      X:   ppc_msgsnd_sync()/smp_mb()
+      X:   ppc_msgsnd() -> 42
+     42: doorbell_exception(): // from CPU X
+     42:   ppc_msgsync()
+    105: smp_muxed_ipi_set_message():
+    105:   smb_mb()
+         // STORE DEFERRED DUE TO RE-ORDERING
+  --105:   message[CALL_FUNCTION] = 1
+  | 105: doorbell_global_ipi(42):
+  | 105:   kvmppc_set_host_ipi(42, 1)
+  |  42:   kvmppc_set_host_ipi(42, 0)
+  |  42: smp_ipi_demux_relaxed()
+  |  42: // returns to executing guest
+  |      // RE-ORDERED STORE COMPLETES
+  ->105:   message[CALL_FUNCTION] = 1
+    105:   ppc_msgsnd_sync()/smp_mb()
+    105:   ppc_msgsnd() -> 42
+     42: local_paca->kvm_hstate.host_ipi == 0 // IPI ignored
+    105: // hangs waiting on 42 to process messages/call_single_queue
+
+This can be prevented with an smp_mb() at the beginning of
+kvmppc_set_host_ipi(), such that stores to message[<type>] (or other
+state indicated by the host_ipi flag) are ordered vs. the store to
+to host_ipi.
+
+However, doing so might still allow for the following scenario (not
+yet observed):
+
+    CPU
+      X: smp_muxed_ipi_set_message():
+      X:   smp_mb()
+      X:   message[RESCHEDULE] = 1
+      X: doorbell_global_ipi(42):
+      X:   kvmppc_set_host_ipi(42, 1)
+      X:   ppc_msgsnd_sync()/smp_mb()
+      X:   ppc_msgsnd() -> 42
+     42: doorbell_exception(): // from CPU X
+     42:   ppc_msgsync()
+         // STORE DEFERRED DUE TO RE-ORDERING
+  -- 42:   kvmppc_set_host_ipi(42, 0)
+  |  42: smp_ipi_demux_relaxed()
+  | 105: smp_muxed_ipi_set_message():
+  | 105:   smb_mb()
+  | 105:   message[CALL_FUNCTION] = 1
+  | 105: doorbell_global_ipi(42):
+  | 105:   kvmppc_set_host_ipi(42, 1)
+  |      // RE-ORDERED STORE COMPLETES
+  -> 42:   kvmppc_set_host_ipi(42, 0)
+     42: // returns to executing guest
+    105:   ppc_msgsnd_sync()/smp_mb()
+    105:   ppc_msgsnd() -> 42
+     42: local_paca->kvm_hstate.host_ipi == 0 // IPI ignored
+    105: // hangs waiting on 42 to process messages/call_single_queue
+
+Fixing this scenario would require an smp_mb() *after* clearing
+host_ipi flag in kvmppc_set_host_ipi() to order the store vs.
+subsequent processing of IPI messages.
+
+To handle both cases, this patch splits kvmppc_set_host_ipi() into
+separate set/clear functions, where we execute smp_mb() prior to
+setting host_ipi flag, and after clearing host_ipi flag. These
+functions pair with each other to synchronize the sender and receiver
+sides.
+
+With that change in place the above workload ran for 20 hours without
+triggering any lock-ups.
+
+Fixes: 755563bc79c7 ("powerpc/powernv: Fixes for hypervisor doorbell handling") # v4.0
+Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
+Acked-by: Paul Mackerras <paulus@ozlabs.org>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20190911223155.16045-1-mdroth@linux.vnet.ibm.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/powerpc/include/asm/kvm_ppc.h    | 100 +++++++++++++++++++++++++-
+ arch/powerpc/kernel/dbell.c           |   6 +-
+ arch/powerpc/kvm/book3s_hv_rm_xics.c  |   2 +-
+ arch/powerpc/platforms/powernv/smp.c  |   2 +-
+ arch/powerpc/sysdev/xics/icp-native.c |   6 +-
+ arch/powerpc/sysdev/xics/icp-opal.c   |   6 +-
+ 6 files changed, 108 insertions(+), 14 deletions(-)
+
+diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
+index e991821dd7fa..a061c3d48c48 100644
+--- a/arch/powerpc/include/asm/kvm_ppc.h
++++ b/arch/powerpc/include/asm/kvm_ppc.h
+@@ -458,9 +458,100 @@ static inline u32 kvmppc_get_xics_latch(void)
+       return xirr;
+ }
+ 
+-static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
++/*
++ * To avoid the need to unnecessarily exit fully to the host kernel, an IPI to
++ * a CPU thread that's running/napping inside of a guest is by default regarded
++ * as a request to wake the CPU (if needed) and continue execution within the
++ * guest, potentially to process new state like externally-generated
++ * interrupts or IPIs sent from within the guest itself (e.g. H_PROD/H_IPI).
++ *
++ * To force an exit to the host kernel, kvmppc_set_host_ipi() must be called
++ * prior to issuing the IPI to set the corresponding 'host_ipi' flag in the
++ * target CPU's PACA. To avoid unnecessary exits to the host, this flag should
++ * be immediately cleared via kvmppc_clear_host_ipi() by the IPI handler on
++ * the receiving side prior to processing the IPI work.
++ *
++ * NOTE:
++ *
++ * We currently issue an smp_mb() at the beginning of kvmppc_set_host_ipi().
++ * This is to guard against sequences such as the following:
++ *
++ *      CPU
++ *        X: smp_muxed_ipi_set_message():
++ *        X:   smp_mb()
++ *        X:   message[RESCHEDULE] = 1
++ *        X: doorbell_global_ipi(42):
++ *        X:   kvmppc_set_host_ipi(42)
++ *        X:   ppc_msgsnd_sync()/smp_mb()
++ *        X:   ppc_msgsnd() -> 42
++ *       42: doorbell_exception(): // from CPU X
++ *       42:   ppc_msgsync()
++ *      105: smp_muxed_ipi_set_message():
++ *      105:   smb_mb()
++ *           // STORE DEFERRED DUE TO RE-ORDERING
++ *    --105:   message[CALL_FUNCTION] = 1
++ *    | 105: doorbell_global_ipi(42):
++ *    | 105:   kvmppc_set_host_ipi(42)
++ *    |  42:   kvmppc_clear_host_ipi(42)
++ *    |  42: smp_ipi_demux_relaxed()
++ *    |  42: // returns to executing guest
++ *    |      // RE-ORDERED STORE COMPLETES
++ *    ->105:   message[CALL_FUNCTION] = 1
++ *      105:   ppc_msgsnd_sync()/smp_mb()
++ *      105:   ppc_msgsnd() -> 42
++ *       42: local_paca->kvm_hstate.host_ipi == 0 // IPI ignored
++ *      105: // hangs waiting on 42 to process messages/call_single_queue
++ *
++ * We also issue an smp_mb() at the end of kvmppc_clear_host_ipi(). This is
++ * to guard against sequences such as the following (as well as to create
++ * a read-side pairing with the barrier in kvmppc_set_host_ipi()):
++ *
++ *      CPU
++ *        X: smp_muxed_ipi_set_message():
++ *        X:   smp_mb()
++ *        X:   message[RESCHEDULE] = 1
++ *        X: doorbell_global_ipi(42):
++ *        X:   kvmppc_set_host_ipi(42)
++ *        X:   ppc_msgsnd_sync()/smp_mb()
++ *        X:   ppc_msgsnd() -> 42
++ *       42: doorbell_exception(): // from CPU X
++ *       42:   ppc_msgsync()
++ *           // STORE DEFERRED DUE TO RE-ORDERING
++ *    -- 42:   kvmppc_clear_host_ipi(42)
++ *    |  42: smp_ipi_demux_relaxed()
++ *    | 105: smp_muxed_ipi_set_message():
++ *    | 105:   smb_mb()
++ *    | 105:   message[CALL_FUNCTION] = 1
++ *    | 105: doorbell_global_ipi(42):
++ *    | 105:   kvmppc_set_host_ipi(42)
++ *    |      // RE-ORDERED STORE COMPLETES
++ *    -> 42:   kvmppc_clear_host_ipi(42)
++ *       42: // returns to executing guest
++ *      105:   ppc_msgsnd_sync()/smp_mb()
++ *      105:   ppc_msgsnd() -> 42
++ *       42: local_paca->kvm_hstate.host_ipi == 0 // IPI ignored
++ *      105: // hangs waiting on 42 to process messages/call_single_queue
++ */
++static inline void kvmppc_set_host_ipi(int cpu)
+ {
+-      paca_ptrs[cpu]->kvm_hstate.host_ipi = host_ipi;
++      /*
++       * order stores of IPI messages vs. setting of host_ipi flag
++       *
++       * pairs with the barrier in kvmppc_clear_host_ipi()
++       */
++      smp_mb();
++      paca_ptrs[cpu]->kvm_hstate.host_ipi = 1;
++}
++
++static inline void kvmppc_clear_host_ipi(int cpu)
++{
++      paca_ptrs[cpu]->kvm_hstate.host_ipi = 0;
++      /*
++       * order clearing of host_ipi flag vs. processing of IPI messages
++       *
++       * pairs with the barrier in kvmppc_set_host_ipi()
++       */
++      smp_mb();
+ }
+ 
+ static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
+@@ -489,7 +580,10 @@ static inline u32 kvmppc_get_xics_latch(void)
+       return 0;
+ }
+ 
+-static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
++static inline void kvmppc_set_host_ipi(int cpu)
++{}
++
++static inline void kvmppc_clear_host_ipi(int cpu)
+ {}
+ 
+ static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
+diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c
+index b6fe883b1016..5828144555af 100644
+--- a/arch/powerpc/kernel/dbell.c
++++ b/arch/powerpc/kernel/dbell.c
+@@ -36,7 +36,7 @@ void doorbell_global_ipi(int cpu)
+ {
+       u32 tag = get_hard_smp_processor_id(cpu);
+ 
+-      kvmppc_set_host_ipi(cpu, 1);
++      kvmppc_set_host_ipi(cpu);
+       /* Order previous accesses vs. msgsnd, which is treated as a store */
+       ppc_msgsnd_sync();
+       ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag);
+@@ -51,7 +51,7 @@ void doorbell_core_ipi(int cpu)
+ {
+       u32 tag = cpu_thread_in_core(cpu);
+ 
+-      kvmppc_set_host_ipi(cpu, 1);
++      kvmppc_set_host_ipi(cpu);
+       /* Order previous accesses vs. msgsnd, which is treated as a store */
+       ppc_msgsnd_sync();
+       ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag);
+@@ -86,7 +86,7 @@ void doorbell_exception(struct pt_regs *regs)
+ 
+       may_hard_irq_enable();
+ 
+-      kvmppc_set_host_ipi(smp_processor_id(), 0);
++      kvmppc_clear_host_ipi(smp_processor_id());
+       __this_cpu_inc(irq_stat.doorbell_irqs);
+ 
+       smp_ipi_demux_relaxed(); /* already performed the barrier */
+diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
+index 758d1d23215e..aaafb9f080d5 100644
+--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
++++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
+@@ -61,7 +61,7 @@ static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu)
+       hcpu = hcore << threads_shift;
+       kvmppc_host_rm_ops_hv->rm_core[hcore].rm_data = vcpu;
+       smp_muxed_ipi_set_message(hcpu, PPC_MSG_RM_HOST_ACTION);
+-      kvmppc_set_host_ipi(hcpu, 1);
++      kvmppc_set_host_ipi(hcpu);
+       smp_mb();
+       kvmhv_rm_send_ipi(hcpu);
+ }
+diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
+index fdd9577d1798..3d3c989e44dd 100644
+--- a/arch/powerpc/platforms/powernv/smp.c
++++ b/arch/powerpc/platforms/powernv/smp.c
+@@ -223,7 +223,7 @@ static void pnv_smp_cpu_kill_self(void)
+                * for coming online, which are handled via
+                * generic_check_cpu_restart() calls.
+                */
+-              kvmppc_set_host_ipi(cpu, 0);
++              kvmppc_clear_host_ipi(cpu);
+ 
+               srr1 = pnv_cpu_offline(cpu);
+ 
+diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c
+index 37bfbc54aacb..340de58a15bd 100644
+--- a/arch/powerpc/sysdev/xics/icp-native.c
++++ b/arch/powerpc/sysdev/xics/icp-native.c
+@@ -145,7 +145,7 @@ static unsigned int icp_native_get_irq(void)
+ 
+ static void icp_native_cause_ipi(int cpu)
+ {
+-      kvmppc_set_host_ipi(cpu, 1);
++      kvmppc_set_host_ipi(cpu);
+       icp_native_set_qirr(cpu, IPI_PRIORITY);
+ }
+ 
+@@ -184,7 +184,7 @@ void icp_native_flush_interrupt(void)
+       if (vec == XICS_IPI) {
+               /* Clear pending IPI */
+               int cpu = smp_processor_id();
+-              kvmppc_set_host_ipi(cpu, 0);
++              kvmppc_clear_host_ipi(cpu);
+               icp_native_set_qirr(cpu, 0xff);
+       } else {
+               pr_err("XICS: hw interrupt 0x%x to offline cpu, disabling\n",
+@@ -205,7 +205,7 @@ static irqreturn_t icp_native_ipi_action(int irq, void *dev_id)
+ {
+       int cpu = smp_processor_id();
+ 
+-      kvmppc_set_host_ipi(cpu, 0);
++      kvmppc_clear_host_ipi(cpu);
+       icp_native_set_qirr(cpu, 0xff);
+ 
+       return smp_ipi_demux();
+diff --git a/arch/powerpc/sysdev/xics/icp-opal.c b/arch/powerpc/sysdev/xics/icp-opal.c
+index c71d2ea42627..e3e52cf035a9 100644
+--- a/arch/powerpc/sysdev/xics/icp-opal.c
++++ b/arch/powerpc/sysdev/xics/icp-opal.c
+@@ -130,7 +130,7 @@ static void icp_opal_cause_ipi(int cpu)
+ {
+       int hw_cpu = get_hard_smp_processor_id(cpu);
+ 
+-      kvmppc_set_host_ipi(cpu, 1);
++      kvmppc_set_host_ipi(cpu);
+       opal_int_set_mfrr(hw_cpu, IPI_PRIORITY);
+ }
+ 
+@@ -138,7 +138,7 @@ static irqreturn_t icp_opal_ipi_action(int irq, void *dev_id)
+ {
+       int cpu = smp_processor_id();
+ 
+-      kvmppc_set_host_ipi(cpu, 0);
++      kvmppc_clear_host_ipi(cpu);
+       opal_int_set_mfrr(get_hard_smp_processor_id(cpu), 0xff);
+ 
+       return smp_ipi_demux();
+@@ -161,7 +161,7 @@ void icp_opal_flush_interrupt(void)
+               if (vec == XICS_IPI) {
+                       /* Clear pending IPI */
+                       int cpu = smp_processor_id();
+-                      kvmppc_set_host_ipi(cpu, 0);
++                      kvmppc_clear_host_ipi(cpu);
+                       opal_int_set_mfrr(get_hard_smp_processor_id(cpu), 0xff);
+               } else {
+                       pr_err("XICS: hw interrupt 0x%x to offline cpu, "
+-- 
+2.20.1
+
diff --git a/queue-4.19/net-add-annotations-on-hh-hh_len-lockless-accesses.patch b/queue-4.19/net-add-annotations-on-hh-hh_len-lockless-accesses.patch

new file mode 100644 (file)

index 0000000..a75ae6c
--- /dev/null
+++ b/queue-4.19/net-add-annotations-on-hh-hh_len-lockless-accesses.patch
@@ -0,0 +1,149 @@
+From 5964d1bf3c23ca33377a84e082b1df0590533f6d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 7 Nov 2019 18:29:11 -0800
+Subject: net: add annotations on hh->hh_len lockless accesses
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit c305c6ae79e2ce20c22660ceda94f0d86d639a82 ]
+
+KCSAN reported a data-race [1]
+
+While we can use READ_ONCE() on the read sides,
+we need to make sure hh->hh_len is written last.
+
+[1]
+
+BUG: KCSAN: data-race in eth_header_cache / neigh_resolve_output
+
+write to 0xffff8880b9dedcb8 of 4 bytes by task 29760 on cpu 0:
+ eth_header_cache+0xa9/0xd0 net/ethernet/eth.c:247
+ neigh_hh_init net/core/neighbour.c:1463 [inline]
+ neigh_resolve_output net/core/neighbour.c:1480 [inline]
+ neigh_resolve_output+0x415/0x470 net/core/neighbour.c:1470
+ neigh_output include/net/neighbour.h:511 [inline]
+ ip6_finish_output2+0x7a2/0xec0 net/ipv6/ip6_output.c:116
+ __ip6_finish_output net/ipv6/ip6_output.c:142 [inline]
+ __ip6_finish_output+0x2d7/0x330 net/ipv6/ip6_output.c:127
+ ip6_finish_output+0x41/0x160 net/ipv6/ip6_output.c:152
+ NF_HOOK_COND include/linux/netfilter.h:294 [inline]
+ ip6_output+0xf2/0x280 net/ipv6/ip6_output.c:175
+ dst_output include/net/dst.h:436 [inline]
+ NF_HOOK include/linux/netfilter.h:305 [inline]
+ ndisc_send_skb+0x459/0x5f0 net/ipv6/ndisc.c:505
+ ndisc_send_ns+0x207/0x430 net/ipv6/ndisc.c:647
+ rt6_probe_deferred+0x98/0xf0 net/ipv6/route.c:615
+ process_one_work+0x3d4/0x890 kernel/workqueue.c:2269
+ worker_thread+0xa0/0x800 kernel/workqueue.c:2415
+ kthread+0x1d4/0x200 drivers/block/aoe/aoecmd.c:1253
+ ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:352
+
+read to 0xffff8880b9dedcb8 of 4 bytes by task 29572 on cpu 1:
+ neigh_resolve_output net/core/neighbour.c:1479 [inline]
+ neigh_resolve_output+0x113/0x470 net/core/neighbour.c:1470
+ neigh_output include/net/neighbour.h:511 [inline]
+ ip6_finish_output2+0x7a2/0xec0 net/ipv6/ip6_output.c:116
+ __ip6_finish_output net/ipv6/ip6_output.c:142 [inline]
+ __ip6_finish_output+0x2d7/0x330 net/ipv6/ip6_output.c:127
+ ip6_finish_output+0x41/0x160 net/ipv6/ip6_output.c:152
+ NF_HOOK_COND include/linux/netfilter.h:294 [inline]
+ ip6_output+0xf2/0x280 net/ipv6/ip6_output.c:175
+ dst_output include/net/dst.h:436 [inline]
+ NF_HOOK include/linux/netfilter.h:305 [inline]
+ ndisc_send_skb+0x459/0x5f0 net/ipv6/ndisc.c:505
+ ndisc_send_ns+0x207/0x430 net/ipv6/ndisc.c:647
+ rt6_probe_deferred+0x98/0xf0 net/ipv6/route.c:615
+ process_one_work+0x3d4/0x890 kernel/workqueue.c:2269
+ worker_thread+0xa0/0x800 kernel/workqueue.c:2415
+ kthread+0x1d4/0x200 drivers/block/aoe/aoecmd.c:1253
+ ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:352
+
+Reported by Kernel Concurrency Sanitizer on:
+CPU: 1 PID: 29572 Comm: kworker/1:4 Not tainted 5.4.0-rc6+ #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+Workqueue: events rt6_probe_deferred
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/firewire/net.c  | 6 +++++-
+ include/net/neighbour.h | 2 +-
+ net/core/neighbour.c    | 4 ++--
+ net/ethernet/eth.c      | 7 ++++++-
+ 4 files changed, 14 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c
+index 82ba110d9d1a..bbabfca812bb 100644
+--- a/drivers/firewire/net.c
++++ b/drivers/firewire/net.c
+@@ -249,7 +249,11 @@ static int fwnet_header_cache(const struct neighbour *neigh,
+       h = (struct fwnet_header *)((u8 *)hh->hh_data + HH_DATA_OFF(sizeof(*h)));
+       h->h_proto = type;
+       memcpy(h->h_dest, neigh->ha, net->addr_len);
+-      hh->hh_len = FWNET_HLEN;
++
++      /* Pairs with the READ_ONCE() in neigh_resolve_output(),
++       * neigh_hh_output() and neigh_update_hhs().
++       */
++      smp_store_release(&hh->hh_len, FWNET_HLEN);
+ 
+       return 0;
+ }
+diff --git a/include/net/neighbour.h b/include/net/neighbour.h
+index c84807c1c5bd..5ce035984a4d 100644
+--- a/include/net/neighbour.h
++++ b/include/net/neighbour.h
+@@ -459,7 +459,7 @@ static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb
+ 
+       do {
+               seq = read_seqbegin(&hh->hh_lock);
+-              hh_len = hh->hh_len;
++              hh_len = READ_ONCE(hh->hh_len);
+               if (likely(hh_len <= HH_DATA_MOD)) {
+                       hh_alen = HH_DATA_MOD;
+ 
+diff --git a/net/core/neighbour.c b/net/core/neighbour.c
+index 7597afee7068..e260d44ebdca 100644
+--- a/net/core/neighbour.c
++++ b/net/core/neighbour.c
+@@ -1097,7 +1097,7 @@ static void neigh_update_hhs(struct neighbour *neigh)
+ 
+       if (update) {
+               hh = &neigh->hh;
+-              if (hh->hh_len) {
++              if (READ_ONCE(hh->hh_len)) {
+                       write_seqlock_bh(&hh->hh_lock);
+                       update(hh, neigh->dev, neigh->ha);
+                       write_sequnlock_bh(&hh->hh_lock);
+@@ -1360,7 +1360,7 @@ int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
+               struct net_device *dev = neigh->dev;
+               unsigned int seq;
+ 
+-              if (dev->header_ops->cache && !neigh->hh.hh_len)
++              if (dev->header_ops->cache && !READ_ONCE(neigh->hh.hh_len))
+                       neigh_hh_init(neigh);
+ 
+               do {
+diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
+index fd8faa0dfa61..ca06e9a53d15 100644
+--- a/net/ethernet/eth.c
++++ b/net/ethernet/eth.c
+@@ -239,7 +239,12 @@ int eth_header_cache(const struct neighbour *neigh, struct hh_cache *hh, __be16
+       eth->h_proto = type;
+       memcpy(eth->h_source, dev->dev_addr, ETH_ALEN);
+       memcpy(eth->h_dest, neigh->ha, ETH_ALEN);
+-      hh->hh_len = ETH_HLEN;
++
++      /* Pairs with READ_ONCE() in neigh_resolve_output(),
++       * neigh_hh_output() and neigh_update_hhs().
++       */
++      smp_store_release(&hh->hh_len, ETH_HLEN);
++
+       return 0;
+ }
+ EXPORT_SYMBOL(eth_header_cache);
+-- 
+2.20.1
+
diff --git a/queue-4.19/net-core-limit-nested-device-depth.patch b/queue-4.19/net-core-limit-nested-device-depth.patch

new file mode 100644 (file)

index 0000000..417a355
--- /dev/null
+++ b/queue-4.19/net-core-limit-nested-device-depth.patch
@@ -0,0 +1,457 @@
+From 4ae9b1f9371093b86c74c1905accbd1aaf0a0dce Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 21 Oct 2019 18:47:50 +0000
+Subject: net: core: limit nested device depth
+
+From: Taehee Yoo <ap420073@gmail.com>
+
+[ Upstream commit 5343da4c17429efaa5fb1594ea96aee1a283e694 ]
+
+Current code doesn't limit the number of nested devices.
+Nested devices would be handled recursively and this needs huge stack
+memory. So, unlimited nested devices could make stack overflow.
+
+This patch adds upper_level and lower_level, they are common variables
+and represent maximum lower/upper depth.
+When upper/lower device is attached or dettached,
+{lower/upper}_level are updated. and if maximum depth is bigger than 8,
+attach routine fails and returns -EMLINK.
+
+In addition, this patch converts recursive routine of
+netdev_walk_all_{lower/upper} to iterator routine.
+
+Test commands:
+    ip link add dummy0 type dummy
+    ip link add link dummy0 name vlan1 type vlan id 1
+    ip link set vlan1 up
+
+    for i in {2..55}
+    do
+           let A=$i-1
+
+           ip link add vlan$i link vlan$A type vlan id $i
+    done
+    ip link del dummy0
+
+Splat looks like:
+[  155.513226][  T908] BUG: KASAN: use-after-free in __unwind_start+0x71/0x850
+[  155.514162][  T908] Write of size 88 at addr ffff8880608a6cc0 by task ip/908
+[  155.515048][  T908]
+[  155.515333][  T908] CPU: 0 PID: 908 Comm: ip Not tainted 5.4.0-rc3+ #96
+[  155.516147][  T908] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
+[  155.517233][  T908] Call Trace:
+[  155.517627][  T908]
+[  155.517918][  T908] Allocated by task 0:
+[  155.518412][  T908] (stack is not available)
+[  155.518955][  T908]
+[  155.519228][  T908] Freed by task 0:
+[  155.519885][  T908] (stack is not available)
+[  155.520452][  T908]
+[  155.520729][  T908] The buggy address belongs to the object at ffff8880608a6ac0
+[  155.520729][  T908]  which belongs to the cache names_cache of size 4096
+[  155.522387][  T908] The buggy address is located 512 bytes inside of
+[  155.522387][  T908]  4096-byte region [ffff8880608a6ac0, ffff8880608a7ac0)
+[  155.523920][  T908] The buggy address belongs to the page:
+[  155.524552][  T908] page:ffffea0001822800 refcount:1 mapcount:0 mapping:ffff88806c657cc0 index:0x0 compound_mapcount:0
+[  155.525836][  T908] flags: 0x100000000010200(slab|head)
+[  155.526445][  T908] raw: 0100000000010200 ffffea0001813808 ffffea0001a26c08 ffff88806c657cc0
+[  155.527424][  T908] raw: 0000000000000000 0000000000070007 00000001ffffffff 0000000000000000
+[  155.528429][  T908] page dumped because: kasan: bad access detected
+[  155.529158][  T908]
+[  155.529410][  T908] Memory state around the buggy address:
+[  155.530060][  T908]  ffff8880608a6b80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+[  155.530971][  T908]  ffff8880608a6c00: fb fb fb fb fb f1 f1 f1 f1 00 f2 f2 f2 f3 f3 f3
+[  155.531889][  T908] >ffff8880608a6c80: f3 fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+[  155.532806][  T908]                                            ^
+[  155.533509][  T908]  ffff8880608a6d00: fb fb fb fb fb fb fb fb fb f1 f1 f1 f1 00 00 00
+[  155.534436][  T908]  ffff8880608a6d80: f2 f3 f3 f3 f3 fb fb fb 00 00 00 00 00 00 00 00
+[ ... ]
+
+Signed-off-by: Taehee Yoo <ap420073@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/netdevice.h |   4 +
+ net/core/dev.c            | 272 +++++++++++++++++++++++++++++++-------
+ 2 files changed, 231 insertions(+), 45 deletions(-)
+
+diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
+index 9dfa0ae173ac..d5527e3828d1 100644
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -1619,6 +1619,8 @@ enum netdev_priv_flags {
+  *    @perm_addr:             Permanent hw address
+  *    @addr_assign_type:      Hw address assignment type
+  *    @addr_len:              Hardware address length
++ *    @upper_level:           Maximum depth level of upper devices.
++ *    @lower_level:           Maximum depth level of lower devices.
+  *    @neigh_priv_len:        Used in neigh_alloc()
+  *    @dev_id:                Used to differentiate devices that share
+  *                            the same link layer address
+@@ -1853,6 +1855,8 @@ struct net_device {
+       unsigned char           perm_addr[MAX_ADDR_LEN];
+       unsigned char           addr_assign_type;
+       unsigned char           addr_len;
++      unsigned char           upper_level;
++      unsigned char           lower_level;
+       unsigned short          neigh_priv_len;
+       unsigned short          dev_id;
+       unsigned short          dev_port;
+diff --git a/net/core/dev.c b/net/core/dev.c
+index 8ff21d461f08..a26d87073f71 100644
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -149,6 +149,7 @@
+ #include "net-sysfs.h"
+ 
+ #define MAX_GRO_SKBS 8
++#define MAX_NEST_DEV 8
+ 
+ /* This should be increased if a protocol with a bigger head is added. */
+ #define GRO_MAX_HEAD (MAX_HEADER + 128)
+@@ -6542,6 +6543,21 @@ struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
+ }
+ EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu);
+ 
++static struct net_device *netdev_next_upper_dev(struct net_device *dev,
++                                              struct list_head **iter)
++{
++      struct netdev_adjacent *upper;
++
++      upper = list_entry((*iter)->next, struct netdev_adjacent, list);
++
++      if (&upper->list == &dev->adj_list.upper)
++              return NULL;
++
++      *iter = &upper->list;
++
++      return upper->dev;
++}
++
+ static struct net_device *netdev_next_upper_dev_rcu(struct net_device *dev,
+                                                   struct list_head **iter)
+ {
+@@ -6559,28 +6575,93 @@ static struct net_device *netdev_next_upper_dev_rcu(struct net_device *dev,
+       return upper->dev;
+ }
+ 
++static int netdev_walk_all_upper_dev(struct net_device *dev,
++                                   int (*fn)(struct net_device *dev,
++                                             void *data),
++                                   void *data)
++{
++      struct net_device *udev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
++      struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
++      int ret, cur = 0;
++
++      now = dev;
++      iter = &dev->adj_list.upper;
++
++      while (1) {
++              if (now != dev) {
++                      ret = fn(now, data);
++                      if (ret)
++                              return ret;
++              }
++
++              next = NULL;
++              while (1) {
++                      udev = netdev_next_upper_dev(now, &iter);
++                      if (!udev)
++                              break;
++
++                      next = udev;
++                      niter = &udev->adj_list.upper;
++                      dev_stack[cur] = now;
++                      iter_stack[cur++] = iter;
++                      break;
++              }
++
++              if (!next) {
++                      if (!cur)
++                              return 0;
++                      next = dev_stack[--cur];
++                      niter = iter_stack[cur];
++              }
++
++              now = next;
++              iter = niter;
++      }
++
++      return 0;
++}
++
+ int netdev_walk_all_upper_dev_rcu(struct net_device *dev,
+                                 int (*fn)(struct net_device *dev,
+                                           void *data),
+                                 void *data)
+ {
+-      struct net_device *udev;
+-      struct list_head *iter;
+-      int ret;
++      struct net_device *udev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
++      struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
++      int ret, cur = 0;
+ 
+-      for (iter = &dev->adj_list.upper,
+-           udev = netdev_next_upper_dev_rcu(dev, &iter);
+-           udev;
+-           udev = netdev_next_upper_dev_rcu(dev, &iter)) {
+-              /* first is the upper device itself */
+-              ret = fn(udev, data);
+-              if (ret)
+-                      return ret;
++      now = dev;
++      iter = &dev->adj_list.upper;
+ 
+-              /* then look at all of its upper devices */
+-              ret = netdev_walk_all_upper_dev_rcu(udev, fn, data);
+-              if (ret)
+-                      return ret;
++      while (1) {
++              if (now != dev) {
++                      ret = fn(now, data);
++                      if (ret)
++                              return ret;
++              }
++
++              next = NULL;
++              while (1) {
++                      udev = netdev_next_upper_dev_rcu(now, &iter);
++                      if (!udev)
++                              break;
++
++                      next = udev;
++                      niter = &udev->adj_list.upper;
++                      dev_stack[cur] = now;
++                      iter_stack[cur++] = iter;
++                      break;
++              }
++
++              if (!next) {
++                      if (!cur)
++                              return 0;
++                      next = dev_stack[--cur];
++                      niter = iter_stack[cur];
++              }
++
++              now = next;
++              iter = niter;
+       }
+ 
+       return 0;
+@@ -6688,23 +6769,42 @@ int netdev_walk_all_lower_dev(struct net_device *dev,
+                                       void *data),
+                             void *data)
+ {
+-      struct net_device *ldev;
+-      struct list_head *iter;
+-      int ret;
++      struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
++      struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
++      int ret, cur = 0;
+ 
+-      for (iter = &dev->adj_list.lower,
+-           ldev = netdev_next_lower_dev(dev, &iter);
+-           ldev;
+-           ldev = netdev_next_lower_dev(dev, &iter)) {
+-              /* first is the lower device itself */
+-              ret = fn(ldev, data);
+-              if (ret)
+-                      return ret;
++      now = dev;
++      iter = &dev->adj_list.lower;
+ 
+-              /* then look at all of its lower devices */
+-              ret = netdev_walk_all_lower_dev(ldev, fn, data);
+-              if (ret)
+-                      return ret;
++      while (1) {
++              if (now != dev) {
++                      ret = fn(now, data);
++                      if (ret)
++                              return ret;
++              }
++
++              next = NULL;
++              while (1) {
++                      ldev = netdev_next_lower_dev(now, &iter);
++                      if (!ldev)
++                              break;
++
++                      next = ldev;
++                      niter = &ldev->adj_list.lower;
++                      dev_stack[cur] = now;
++                      iter_stack[cur++] = iter;
++                      break;
++              }
++
++              if (!next) {
++                      if (!cur)
++                              return 0;
++                      next = dev_stack[--cur];
++                      niter = iter_stack[cur];
++              }
++
++              now = next;
++              iter = niter;
+       }
+ 
+       return 0;
+@@ -6725,28 +6825,93 @@ static struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev,
+       return lower->dev;
+ }
+ 
+-int netdev_walk_all_lower_dev_rcu(struct net_device *dev,
+-                                int (*fn)(struct net_device *dev,
+-                                          void *data),
+-                                void *data)
++static u8 __netdev_upper_depth(struct net_device *dev)
++{
++      struct net_device *udev;
++      struct list_head *iter;
++      u8 max_depth = 0;
++
++      for (iter = &dev->adj_list.upper,
++           udev = netdev_next_upper_dev(dev, &iter);
++           udev;
++           udev = netdev_next_upper_dev(dev, &iter)) {
++              if (max_depth < udev->upper_level)
++                      max_depth = udev->upper_level;
++      }
++
++      return max_depth;
++}
++
++static u8 __netdev_lower_depth(struct net_device *dev)
+ {
+       struct net_device *ldev;
+       struct list_head *iter;
+-      int ret;
++      u8 max_depth = 0;
+ 
+       for (iter = &dev->adj_list.lower,
+-           ldev = netdev_next_lower_dev_rcu(dev, &iter);
++           ldev = netdev_next_lower_dev(dev, &iter);
+            ldev;
+-           ldev = netdev_next_lower_dev_rcu(dev, &iter)) {
+-              /* first is the lower device itself */
+-              ret = fn(ldev, data);
+-              if (ret)
+-                      return ret;
++           ldev = netdev_next_lower_dev(dev, &iter)) {
++              if (max_depth < ldev->lower_level)
++                      max_depth = ldev->lower_level;
++      }
+ 
+-              /* then look at all of its lower devices */
+-              ret = netdev_walk_all_lower_dev_rcu(ldev, fn, data);
+-              if (ret)
+-                      return ret;
++      return max_depth;
++}
++
++static int __netdev_update_upper_level(struct net_device *dev, void *data)
++{
++      dev->upper_level = __netdev_upper_depth(dev) + 1;
++      return 0;
++}
++
++static int __netdev_update_lower_level(struct net_device *dev, void *data)
++{
++      dev->lower_level = __netdev_lower_depth(dev) + 1;
++      return 0;
++}
++
++int netdev_walk_all_lower_dev_rcu(struct net_device *dev,
++                                int (*fn)(struct net_device *dev,
++                                          void *data),
++                                void *data)
++{
++      struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
++      struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
++      int ret, cur = 0;
++
++      now = dev;
++      iter = &dev->adj_list.lower;
++
++      while (1) {
++              if (now != dev) {
++                      ret = fn(now, data);
++                      if (ret)
++                              return ret;
++              }
++
++              next = NULL;
++              while (1) {
++                      ldev = netdev_next_lower_dev_rcu(now, &iter);
++                      if (!ldev)
++                              break;
++
++                      next = ldev;
++                      niter = &ldev->adj_list.lower;
++                      dev_stack[cur] = now;
++                      iter_stack[cur++] = iter;
++                      break;
++              }
++
++              if (!next) {
++                      if (!cur)
++                              return 0;
++                      next = dev_stack[--cur];
++                      niter = iter_stack[cur];
++              }
++
++              now = next;
++              iter = niter;
+       }
+ 
+       return 0;
+@@ -7003,6 +7168,9 @@ static int __netdev_upper_dev_link(struct net_device *dev,
+       if (netdev_has_upper_dev(upper_dev, dev))
+               return -EBUSY;
+ 
++      if ((dev->lower_level + upper_dev->upper_level) > MAX_NEST_DEV)
++              return -EMLINK;
++
+       if (!master) {
+               if (netdev_has_upper_dev(dev, upper_dev))
+                       return -EEXIST;
+@@ -7029,6 +7197,12 @@ static int __netdev_upper_dev_link(struct net_device *dev,
+       if (ret)
+               goto rollback;
+ 
++      __netdev_update_upper_level(dev, NULL);
++      netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL);
++
++      __netdev_update_lower_level(upper_dev, NULL);
++      netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level, NULL);
++
+       return 0;
+ 
+ rollback:
+@@ -7111,6 +7285,12 @@ void netdev_upper_dev_unlink(struct net_device *dev,
+ 
+       call_netdevice_notifiers_info(NETDEV_CHANGEUPPER,
+                                     &changeupper_info.info);
++
++      __netdev_update_upper_level(dev, NULL);
++      netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL);
++
++      __netdev_update_lower_level(upper_dev, NULL);
++      netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level, NULL);
+ }
+ EXPORT_SYMBOL(netdev_upper_dev_unlink);
+ 
+@@ -8978,6 +9158,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
+ 
+       dev->gso_max_size = GSO_MAX_SIZE;
+       dev->gso_max_segs = GSO_MAX_SEGS;
++      dev->upper_level = 1;
++      dev->lower_level = 1;
+ 
+       INIT_LIST_HEAD(&dev->napi_list);
+       INIT_LIST_HEAD(&dev->unreg_list);
+-- 
+2.20.1
+
diff --git a/queue-4.19/perf-x86-intel-bts-fix-the-use-of-page_private.patch b/queue-4.19/perf-x86-intel-bts-fix-the-use-of-page_private.patch

new file mode 100644 (file)

index 0000000..49444ca
--- /dev/null
+++ b/queue-4.19/perf-x86-intel-bts-fix-the-use-of-page_private.patch
@@ -0,0 +1,95 @@
+From 17029394b8fb11ceac999eeed1081584640412a6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 5 Dec 2019 17:28:52 +0300
+Subject: perf/x86/intel/bts: Fix the use of page_private()
+
+From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+
+[ Upstream commit ff61541cc6c1962957758ba433c574b76f588d23 ]
+
+Commit
+
+  8062382c8dbe2 ("perf/x86/intel/bts: Add BTS PMU driver")
+
+brought in a warning with the BTS buffer initialization
+that is easily tripped with (assuming KPTI is disabled):
+
+instantly throwing:
+
+> ------------[ cut here ]------------
+> WARNING: CPU: 2 PID: 326 at arch/x86/events/intel/bts.c:86 bts_buffer_setup_aux+0x117/0x3d0
+> Modules linked in:
+> CPU: 2 PID: 326 Comm: perf Not tainted 5.4.0-rc8-00291-gceb9e77324fa #904
+> RIP: 0010:bts_buffer_setup_aux+0x117/0x3d0
+> Call Trace:
+>  rb_alloc_aux+0x339/0x550
+>  perf_mmap+0x607/0xc70
+>  mmap_region+0x76b/0xbd0
+...
+
+It appears to assume (for lost raisins) that PagePrivate() is set,
+while later it actually tests for PagePrivate() before using
+page_private().
+
+Make it consistent and always check PagePrivate() before using
+page_private().
+
+Fixes: 8062382c8dbe2 ("perf/x86/intel/bts: Add BTS PMU driver")
+Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: Jiri Olsa <jolsa@kernel.org>
+Cc: Vince Weaver <vincent.weaver@maine.edu>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
+Link: https://lkml.kernel.org/r/20191205142853.28894-2-alexander.shishkin@linux.intel.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/events/intel/bts.c | 16 +++++++++++-----
+ 1 file changed, 11 insertions(+), 5 deletions(-)
+
+diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c
+index 7139f6bf27ad..510f9461407e 100644
+--- a/arch/x86/events/intel/bts.c
++++ b/arch/x86/events/intel/bts.c
+@@ -71,9 +71,17 @@ struct bts_buffer {
+ 
+ static struct pmu bts_pmu;
+ 
++static int buf_nr_pages(struct page *page)
++{
++      if (!PagePrivate(page))
++              return 1;
++
++      return 1 << page_private(page);
++}
++
+ static size_t buf_size(struct page *page)
+ {
+-      return 1 << (PAGE_SHIFT + page_private(page));
++      return buf_nr_pages(page) * PAGE_SIZE;
+ }
+ 
+ static void *
+@@ -91,9 +99,7 @@ bts_buffer_setup_aux(struct perf_event *event, void **pages,
+       /* count all the high order buffers */
+       for (pg = 0, nbuf = 0; pg < nr_pages;) {
+               page = virt_to_page(pages[pg]);
+-              if (WARN_ON_ONCE(!PagePrivate(page) && nr_pages > 1))
+-                      return NULL;
+-              pg += 1 << page_private(page);
++              pg += buf_nr_pages(page);
+               nbuf++;
+       }
+ 
+@@ -117,7 +123,7 @@ bts_buffer_setup_aux(struct perf_event *event, void **pages,
+               unsigned int __nr_pages;
+ 
+               page = virt_to_page(pages[pg]);
+-              __nr_pages = PagePrivate(page) ? 1 << page_private(page) : 1;
++              __nr_pages = buf_nr_pages(page);
+               buf->buf[nbuf].page = page;
+               buf->buf[nbuf].offset = offset;
+               buf->buf[nbuf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0);
+-- 
+2.20.1
+
diff --git a/queue-4.19/powerpc-pseries-hvconsole-fix-stack-overread-via-udb.patch b/queue-4.19/powerpc-pseries-hvconsole-fix-stack-overread-via-udb.patch

new file mode 100644 (file)

index 0000000..c9e628e
--- /dev/null
+++ b/queue-4.19/powerpc-pseries-hvconsole-fix-stack-overread-via-udb.patch
@@ -0,0 +1,116 @@
+From acbf6f08f638cff2c287a07e09819ad9fee1c897 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 3 Jun 2019 16:56:57 +1000
+Subject: powerpc/pseries/hvconsole: Fix stack overread via udbg
+
+From: Daniel Axtens <dja@axtens.net>
+
+[ Upstream commit 934bda59f286d0221f1a3ebab7f5156a996cc37d ]
+
+While developing KASAN for 64-bit book3s, I hit the following stack
+over-read.
+
+It occurs because the hypercall to put characters onto the terminal
+takes 2 longs (128 bits/16 bytes) of characters at a time, and so
+hvc_put_chars() would unconditionally copy 16 bytes from the argument
+buffer, regardless of supplied length. However, udbg_hvc_putc() can
+call hvc_put_chars() with a single-byte buffer, leading to the error.
+
+  ==================================================================
+  BUG: KASAN: stack-out-of-bounds in hvc_put_chars+0xdc/0x110
+  Read of size 8 at addr c0000000023e7a90 by task swapper/0
+
+  CPU: 0 PID: 0 Comm: swapper Not tainted 5.2.0-rc2-next-20190528-02824-g048a6ab4835b #113
+  Call Trace:
+    dump_stack+0x104/0x154 (unreliable)
+    print_address_description+0xa0/0x30c
+    __kasan_report+0x20c/0x224
+    kasan_report+0x18/0x30
+    __asan_report_load8_noabort+0x24/0x40
+    hvc_put_chars+0xdc/0x110
+    hvterm_raw_put_chars+0x9c/0x110
+    udbg_hvc_putc+0x154/0x200
+    udbg_write+0xf0/0x240
+    console_unlock+0x868/0xd30
+    register_console+0x970/0xe90
+    register_early_udbg_console+0xf8/0x114
+    setup_arch+0x108/0x790
+    start_kernel+0x104/0x784
+    start_here_common+0x1c/0x534
+
+  Memory state around the buggy address:
+   c0000000023e7980: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+   c0000000023e7a00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 f1 f1
+  >c0000000023e7a80: f1 f1 01 f2 f2 f2 00 00 00 00 00 00 00 00 00 00
+                           ^
+   c0000000023e7b00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+   c0000000023e7b80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+  ==================================================================
+
+Document that a 16-byte buffer is requred, and provide it in udbg.
+
+Signed-off-by: Daniel Axtens <dja@axtens.net>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/powerpc/platforms/pseries/hvconsole.c |  2 +-
+ drivers/tty/hvc/hvc_vio.c                  | 16 +++++++++++++++-
+ 2 files changed, 16 insertions(+), 2 deletions(-)
+
+diff --git a/arch/powerpc/platforms/pseries/hvconsole.c b/arch/powerpc/platforms/pseries/hvconsole.c
+index 74da18de853a..73ec15cd2708 100644
+--- a/arch/powerpc/platforms/pseries/hvconsole.c
++++ b/arch/powerpc/platforms/pseries/hvconsole.c
+@@ -62,7 +62,7 @@ EXPORT_SYMBOL(hvc_get_chars);
+  * @vtermno: The vtermno or unit_address of the adapter from which the data
+  *    originated.
+  * @buf: The character buffer that contains the character data to send to
+- *    firmware.
++ *    firmware. Must be at least 16 bytes, even if count is less than 16.
+  * @count: Send this number of characters.
+  */
+ int hvc_put_chars(uint32_t vtermno, const char *buf, int count)
+diff --git a/drivers/tty/hvc/hvc_vio.c b/drivers/tty/hvc/hvc_vio.c
+index 59eaa620bf13..80fd06fbd712 100644
+--- a/drivers/tty/hvc/hvc_vio.c
++++ b/drivers/tty/hvc/hvc_vio.c
+@@ -107,6 +107,14 @@ static int hvterm_raw_get_chars(uint32_t vtermno, char *buf, int count)
+       return got;
+ }
+ 
++/**
++ * hvterm_raw_put_chars: send characters to firmware for given vterm adapter
++ * @vtermno: The virtual terminal number.
++ * @buf: The characters to send. Because of the underlying hypercall in
++ *       hvc_put_chars(), this buffer must be at least 16 bytes long, even if
++ *       you are sending fewer chars.
++ * @count: number of chars to send.
++ */
+ static int hvterm_raw_put_chars(uint32_t vtermno, const char *buf, int count)
+ {
+       struct hvterm_priv *pv = hvterm_privs[vtermno];
+@@ -219,6 +227,7 @@ static const struct hv_ops hvterm_hvsi_ops = {
+ static void udbg_hvc_putc(char c)
+ {
+       int count = -1;
++      unsigned char bounce_buffer[16];
+ 
+       if (!hvterm_privs[0])
+               return;
+@@ -229,7 +238,12 @@ static void udbg_hvc_putc(char c)
+       do {
+               switch(hvterm_privs[0]->proto) {
+               case HV_PROTOCOL_RAW:
+-                      count = hvterm_raw_put_chars(0, &c, 1);
++                      /*
++                       * hvterm_raw_put_chars requires at least a 16-byte
++                       * buffer, so go via the bounce buffer
++                       */
++                      bounce_buffer[0] = c;
++                      count = hvterm_raw_put_chars(0, bounce_buffer, 1);
+                       break;
+               case HV_PROTOCOL_HVSI:
+                       count = hvterm_hvsi_put_chars(0, &c, 1);
+-- 
+2.20.1
+
diff --git a/queue-4.19/rxrpc-fix-possible-null-pointer-access-in-icmp-handl.patch b/queue-4.19/rxrpc-fix-possible-null-pointer-access-in-icmp-handl.patch

new file mode 100644 (file)

index 0000000..5ac8bd7
--- /dev/null
+++ b/queue-4.19/rxrpc-fix-possible-null-pointer-access-in-icmp-handl.patch
@@ -0,0 +1,68 @@
+From 27bc67f62237f2288b91ce198d12b9913182c428 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 10 Oct 2019 15:52:34 +0100
+Subject: rxrpc: Fix possible NULL pointer access in ICMP handling
+
+From: David Howells <dhowells@redhat.com>
+
+[ Upstream commit f0308fb0708078d6c1d8a4d533941a7a191af634 ]
+
+If an ICMP packet comes in on the UDP socket backing an AF_RXRPC socket as
+the UDP socket is being shut down, rxrpc_error_report() may get called to
+deal with it after sk_user_data on the UDP socket has been cleared, leading
+to a NULL pointer access when this local endpoint record gets accessed.
+
+Fix this by just returning immediately if sk_user_data was NULL.
+
+The oops looks like the following:
+
+#PF: supervisor read access in kernel mode
+#PF: error_code(0x0000) - not-present page
+...
+RIP: 0010:rxrpc_error_report+0x1bd/0x6a9
+...
+Call Trace:
+ ? sock_queue_err_skb+0xbd/0xde
+ ? __udp4_lib_err+0x313/0x34d
+ __udp4_lib_err+0x313/0x34d
+ icmp_unreach+0x1ee/0x207
+ icmp_rcv+0x25b/0x28f
+ ip_protocol_deliver_rcu+0x95/0x10e
+ ip_local_deliver+0xe9/0x148
+ __netif_receive_skb_one_core+0x52/0x6e
+ process_backlog+0xdc/0x177
+ net_rx_action+0xf9/0x270
+ __do_softirq+0x1b6/0x39a
+ ? smpboot_register_percpu_thread+0xce/0xce
+ run_ksoftirqd+0x1d/0x42
+ smpboot_thread_fn+0x19e/0x1b3
+ kthread+0xf1/0xf6
+ ? kthread_delayed_work_timer_fn+0x83/0x83
+ ret_from_fork+0x24/0x30
+
+Fixes: 17926a79320a ("[AF_RXRPC]: Provide secure RxRPC sockets for use by userspace and kernel both")
+Reported-by: syzbot+611164843bd48cc2190c@syzkaller.appspotmail.com
+Signed-off-by: David Howells <dhowells@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/rxrpc/peer_event.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c
+index dc7fdaf20445..42582a9ff81d 100644
+--- a/net/rxrpc/peer_event.c
++++ b/net/rxrpc/peer_event.c
+@@ -153,6 +153,9 @@ void rxrpc_error_report(struct sock *sk)
+       struct rxrpc_peer *peer;
+       struct sk_buff *skb;
+ 
++      if (unlikely(!local))
++              return;
++
+       _enter("%p{%d}", sk, local->debug_id);
+ 
+       skb = sock_dequeue_err_skb(sk);
+-- 
+2.20.1
+
diff --git a/queue-4.19/s390-smp-fix-physical-to-logical-cpu-map-for-smt.patch b/queue-4.19/s390-smp-fix-physical-to-logical-cpu-map-for-smt.patch

new file mode 100644 (file)

index 0000000..ff07c93
--- /dev/null
+++ b/queue-4.19/s390-smp-fix-physical-to-logical-cpu-map-for-smt.patch
@@ -0,0 +1,155 @@
+From 2a2d16b8855ba8661bab7c8fb144c37a91bfcc32 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 17 Nov 2019 14:55:38 +0100
+Subject: s390/smp: fix physical to logical CPU map for SMT
+
+From: Heiko Carstens <heiko.carstens@de.ibm.com>
+
+[ Upstream commit 72a81ad9d6d62dcb79f7e8ad66ffd1c768b72026 ]
+
+If an SMT capable system is not IPL'ed from the first CPU the setup of
+the physical to logical CPU mapping is broken: the IPL core gets CPU
+number 0, but then the next core gets CPU number 1. Correct would be
+that all SMT threads of CPU 0 get the subsequent logical CPU numbers.
+
+This is important since a lot of code (like e.g. the CPU topology
+code) assumes that CPU maps are setup like this. If the mapping is
+broken the system will not IPL due to broken topology masks:
+
+[    1.716341] BUG: arch topology broken
+[    1.716342]      the SMT domain not a subset of the MC domain
+[    1.716343] BUG: arch topology broken
+[    1.716344]      the MC domain not a subset of the BOOK domain
+
+This scenario can usually not happen since LPARs are always IPL'ed
+from CPU 0 and also re-IPL is intiated from CPU 0. However older
+kernels did initiate re-IPL on an arbitrary CPU. If therefore a re-IPL
+from an old kernel into a new kernel is initiated this may lead to
+crash.
+
+Fix this by setting up the physical to logical CPU mapping correctly.
+
+Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
+Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/s390/kernel/smp.c | 80 ++++++++++++++++++++++++++++--------------
+ 1 file changed, 54 insertions(+), 26 deletions(-)
+
+diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
+index df2413f26a8f..ecd24711f3aa 100644
+--- a/arch/s390/kernel/smp.c
++++ b/arch/s390/kernel/smp.c
+@@ -715,39 +715,67 @@ static void __ref smp_get_core_info(struct sclp_core_info *info, int early)
+ 
+ static int smp_add_present_cpu(int cpu);
+ 
+-static int __smp_rescan_cpus(struct sclp_core_info *info, int sysfs_add)
++static int smp_add_core(struct sclp_core_entry *core, cpumask_t *avail,
++                      bool configured, bool early)
+ {
+       struct pcpu *pcpu;
+-      cpumask_t avail;
+-      int cpu, nr, i, j;
++      int cpu, nr, i;
+       u16 address;
+ 
+       nr = 0;
+-      cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask);
+-      cpu = cpumask_first(&avail);
+-      for (i = 0; (i < info->combined) && (cpu < nr_cpu_ids); i++) {
+-              if (sclp.has_core_type && info->core[i].type != boot_core_type)
++      if (sclp.has_core_type && core->type != boot_core_type)
++              return nr;
++      cpu = cpumask_first(avail);
++      address = core->core_id << smp_cpu_mt_shift;
++      for (i = 0; (i <= smp_cpu_mtid) && (cpu < nr_cpu_ids); i++) {
++              if (pcpu_find_address(cpu_present_mask, address + i))
+                       continue;
+-              address = info->core[i].core_id << smp_cpu_mt_shift;
+-              for (j = 0; j <= smp_cpu_mtid; j++) {
+-                      if (pcpu_find_address(cpu_present_mask, address + j))
+-                              continue;
+-                      pcpu = pcpu_devices + cpu;
+-                      pcpu->address = address + j;
+-                      pcpu->state =
+-                              (cpu >= info->configured*(smp_cpu_mtid + 1)) ?
+-                              CPU_STATE_STANDBY : CPU_STATE_CONFIGURED;
+-                      smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
+-                      set_cpu_present(cpu, true);
+-                      if (sysfs_add && smp_add_present_cpu(cpu) != 0)
+-                              set_cpu_present(cpu, false);
+-                      else
+-                              nr++;
+-                      cpu = cpumask_next(cpu, &avail);
+-                      if (cpu >= nr_cpu_ids)
++              pcpu = pcpu_devices + cpu;
++              pcpu->address = address + i;
++              if (configured)
++                      pcpu->state = CPU_STATE_CONFIGURED;
++              else
++                      pcpu->state = CPU_STATE_STANDBY;
++              smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
++              set_cpu_present(cpu, true);
++              if (!early && smp_add_present_cpu(cpu) != 0)
++                      set_cpu_present(cpu, false);
++              else
++                      nr++;
++              cpumask_clear_cpu(cpu, avail);
++              cpu = cpumask_next(cpu, avail);
++      }
++      return nr;
++}
++
++static int __smp_rescan_cpus(struct sclp_core_info *info, bool early)
++{
++      struct sclp_core_entry *core;
++      cpumask_t avail;
++      bool configured;
++      u16 core_id;
++      int nr, i;
++
++      nr = 0;
++      cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask);
++      /*
++       * Add IPL core first (which got logical CPU number 0) to make sure
++       * that all SMT threads get subsequent logical CPU numbers.
++       */
++      if (early) {
++              core_id = pcpu_devices[0].address >> smp_cpu_mt_shift;
++              for (i = 0; i < info->configured; i++) {
++                      core = &info->core[i];
++                      if (core->core_id == core_id) {
++                              nr += smp_add_core(core, &avail, true, early);
+                               break;
++                      }
+               }
+       }
++      for (i = 0; i < info->combined; i++) {
++              configured = i < info->configured;
++              nr += smp_add_core(&info->core[i], &avail, configured, early);
++      }
+       return nr;
+ }
+ 
+@@ -793,7 +821,7 @@ void __init smp_detect_cpus(void)
+ 
+       /* Add CPUs present at boot */
+       get_online_cpus();
+-      __smp_rescan_cpus(info, 0);
++      __smp_rescan_cpus(info, true);
+       put_online_cpus();
+       memblock_free_early((unsigned long)info, sizeof(*info));
+ }
+@@ -1145,7 +1173,7 @@ int __ref smp_rescan_cpus(void)
+       smp_get_core_info(info, 0);
+       get_online_cpus();
+       mutex_lock(&smp_cpu_state_mutex);
+-      nr = __smp_rescan_cpus(info, 1);
++      nr = __smp_rescan_cpus(info, false);
+       mutex_unlock(&smp_cpu_state_mutex);
+       put_online_cpus();
+       kfree(info);
+-- 
+2.20.1
+
diff --git a/queue-4.19/scsi-qedf-do-not-retry-els-request-if-qedf_alloc_cmd.patch b/queue-4.19/scsi-qedf-do-not-retry-els-request-if-qedf_alloc_cmd.patch

new file mode 100644 (file)

index 0000000..7d2ef96
--- /dev/null
+++ b/queue-4.19/scsi-qedf-do-not-retry-els-request-if-qedf_alloc_cmd.patch
@@ -0,0 +1,93 @@
+From 857a9311a2ece78520554227b748d3193c5b83ff Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 26 Mar 2019 00:38:33 -0700
+Subject: scsi: qedf: Do not retry ELS request if qedf_alloc_cmd fails
+
+From: Chad Dupuis <cdupuis@marvell.com>
+
+[ Upstream commit f1c43590365bac054d753d808dbbd207d09e088d ]
+
+If we cannot allocate an ELS middlepath request, simply fail instead of
+trying to delay and then reallocate.  This delay logic is causing soft
+lockup messages:
+
+NMI watchdog: BUG: soft lockup - CPU#2 stuck for 22s! [kworker/2:1:7639]
+Modules linked in: xt_CHECKSUM ipt_MASQUERADE nf_nat_masquerade_ipv4 tun devlink ip6t_rpfilter ipt_REJECT nf_reject_ipv4 ip6t_REJECT nf_reject_ipv6 xt_conntrack ip_set nfnetlink ebtable_nat ebtable_broute bridge stp llc ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_security ip6table_raw iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_security iptable_raw ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter dm_service_time vfat fat rpcrdma sunrpc ib_isert iscsi_target_mod ib_iser libiscsi scsi_transport_iscsi ib_srpt target_core_mod ib_srp scsi_transport_srp ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm_intel kvm
+irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd iTCO_wdt iTCO_vendor_support qedr(OE) ib_core joydev ipmi_ssif pcspkr hpilo hpwdt sg ipmi_si ipmi_devintf ipmi_msghandler ioatdma shpchp lpc_ich wmi dca acpi_power_meter dm_multipath ip_tables xfs libcrc32c sd_mod crc_t10dif crct10dif_generic qedf(OE) libfcoe mgag200 libfc i2c_algo_bit drm_kms_helper scsi_transport_fc qede(OE) syscopyarea sysfillrect sysimgblt fb_sys_fops ttm qed(OE) drm crct10dif_pclmul e1000e crct10dif_common crc32c_intel scsi_tgt hpsa i2c_core ptp scsi_transport_sas pps_core dm_mirror dm_region_hash dm_log dm_mod
+CPU: 2 PID: 7639 Comm: kworker/2:1 Kdump: loaded Tainted: G           OEL ------------   3.10.0-861.el7.x86_64 #1
+Hardware name: HP ProLiant DL580 Gen9/ProLiant DL580 Gen9, BIOS U17 07/21/2016
+Workqueue: qedf_2_dpc qedf_handle_rrq [qedf]
+task: ffff959edd628fd0 ti: ffff959ed6f08000 task.ti: ffff959ed6f08000
+RIP: 0010:[<ffffffff8355913a>]  [<ffffffff8355913a>] delay_tsc+0x3a/0x60
+RSP: 0018:ffff959ed6f0bd30  EFLAGS: 00000246
+RAX: 000000008ef5f791 RBX: 5f646d635f666465 RCX: 0000025b8ededa2f
+RDX: 000000000000025b RSI: 0000000000000002 RDI: 0000000000217d1e
+RBP: ffff959ed6f0bd30 R08: ffffffffc079aae8 R09: 0000000000000200
+R10: ffffffffc07952c6 R11: 0000000000000000 R12: 6c6c615f66646571
+R13: ffff959ed6f0bcc8 R14: ffff959ed6f0bd08 R15: ffff959e00000028
+FS:  0000000000000000(0000) GS:ffff959eff480000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00007f4117fa1eb0 CR3: 0000002039e66000 CR4: 00000000003607e0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+Call Trace:
+[<ffffffff8355907d>] __const_udelay+0x2d/0x30
+[<ffffffffc079444a>] qedf_initiate_els+0x13a/0x450 [qedf]
+[<ffffffffc0794210>] ? qedf_srr_compl+0x2a0/0x2a0 [qedf]
+[<ffffffffc0795337>] qedf_send_rrq+0x127/0x230 [qedf]
+[<ffffffffc078ed55>] qedf_handle_rrq+0x15/0x20 [qedf]
+[<ffffffff832b2dff>] process_one_work+0x17f/0x440
+[<ffffffff832b3ac6>] worker_thread+0x126/0x3c0
+[<ffffffff832b39a0>] ? manage_workers.isra.24+0x2a0/0x2a0
+[<ffffffff832bae31>] kthread+0xd1/0xe0
+[<ffffffff832bad60>] ? insert_kthread_work+0x40/0x40
+[<ffffffff8391f637>] ret_from_fork_nospec_begin+0x21/0x21
+[<ffffffff832bad60>] ? insert_kthread_work+0x40/0x40
+
+Signed-off-by: Chad Dupuis <cdupuis@marvell.com>
+Signed-off-by: Saurav Kashyap <skashyap@marvell.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/qedf/qedf_els.c | 16 ++++------------
+ 1 file changed, 4 insertions(+), 12 deletions(-)
+
+diff --git a/drivers/scsi/qedf/qedf_els.c b/drivers/scsi/qedf/qedf_els.c
+index 04f0c4d2e256..5178cd03666a 100644
+--- a/drivers/scsi/qedf/qedf_els.c
++++ b/drivers/scsi/qedf/qedf_els.c
+@@ -23,8 +23,6 @@ static int qedf_initiate_els(struct qedf_rport *fcport, unsigned int op,
+       int rc = 0;
+       uint32_t did, sid;
+       uint16_t xid;
+-      uint32_t start_time = jiffies / HZ;
+-      uint32_t current_time;
+       struct fcoe_wqe *sqe;
+       unsigned long flags;
+       u16 sqe_idx;
+@@ -59,18 +57,12 @@ static int qedf_initiate_els(struct qedf_rport *fcport, unsigned int op,
+               goto els_err;
+       }
+ 
+-retry_els:
+       els_req = qedf_alloc_cmd(fcport, QEDF_ELS);
+       if (!els_req) {
+-              current_time = jiffies / HZ;
+-              if ((current_time - start_time) > 10) {
+-                      QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS,
+-                                 "els: Failed els 0x%x\n", op);
+-                      rc = -ENOMEM;
+-                      goto els_err;
+-              }
+-              mdelay(20 * USEC_PER_MSEC);
+-              goto retry_els;
++              QEDF_INFO(&qedf->dbg_ctx, QEDF_LOG_ELS,
++                        "Failed to alloc ELS request 0x%x\n", op);
++              rc = -ENOMEM;
++              goto els_err;
+       }
+ 
+       QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS, "initiate_els els_req = "
+-- 
+2.20.1
+
diff --git a/queue-4.19/selftests-rtnetlink-add-addresses-with-fixed-life-ti.patch b/queue-4.19/selftests-rtnetlink-add-addresses-with-fixed-life-ti.patch

new file mode 100644 (file)

index 0000000..a7cc8e8
--- /dev/null
+++ b/queue-4.19/selftests-rtnetlink-add-addresses-with-fixed-life-ti.patch
@@ -0,0 +1,65 @@
+From 9f69075abd69c61b38a0c3f17a52bce8789ad71d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Jun 2019 16:02:28 +0200
+Subject: selftests: rtnetlink: add addresses with fixed life time
+
+From: Florian Westphal <fw@strlen.de>
+
+[ Upstream commit 3cfa148826e3c666da1cc2a43fbe8689e2650636 ]
+
+This exercises kernel code path that deal with addresses that have
+a limited lifetime.
+
+Without previous fix, this triggers following crash on net-next:
+ BUG: KASAN: null-ptr-deref in check_lifetime+0x403/0x670
+ Read of size 8 at addr 0000000000000010 by task kworker [..]
+
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/net/rtnetlink.sh | 21 +++++++++++++++++++++
+ 1 file changed, 21 insertions(+)
+
+diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
+index e101af52d1d6..ff665de788ef 100755
+--- a/tools/testing/selftests/net/rtnetlink.sh
++++ b/tools/testing/selftests/net/rtnetlink.sh
+@@ -234,6 +234,26 @@ kci_test_route_get()
+       echo "PASS: route get"
+ }
+ 
++kci_test_addrlft()
++{
++      for i in $(seq 10 100) ;do
++              lft=$(((RANDOM%3) + 1))
++              ip addr add 10.23.11.$i/32 dev "$devdummy" preferred_lft $lft valid_lft $((lft+1))
++              check_err $?
++      done
++
++      sleep 5
++
++      ip addr show dev "$devdummy" | grep "10.23.11."
++      if [ $? -eq 0 ]; then
++              echo "FAIL: preferred_lft addresses remaining"
++              check_err 1
++              return
++      fi
++
++      echo "PASS: preferred_lft addresses have expired"
++}
++
+ kci_test_addrlabel()
+ {
+       ret=0
+@@ -965,6 +985,7 @@ kci_test_rtnl()
+ 
+       kci_test_polrouting
+       kci_test_route_get
++      kci_test_addrlft
+       kci_test_tc
+       kci_test_gre
+       kci_test_gretap
+-- 
+2.20.1
+
diff --git a/queue-4.19/series b/queue-4.19/series

index 8463b03e471e8b3ec657b9ba9b003aed23b43050..716a81803574b319a1f0e9b174e5963a5d7812f7 100644 (file)
--- a/queue-4.19/series
+++ b/queue-4.19/series
@@ -94,3 +94,21 @@ tty-serial-msm_serial-fix-lockup-for-sysrq-and-oops.patch
  fix-compat-handling-of-ficlonerange-fideduperange-and-fs_ioc_fiemap.patch
  bdev-factor-out-bdev-revalidation-into-a-common-helper.patch
  bdev-refresh-bdev-size-for-disks-without-partitioning.patch
+scsi-qedf-do-not-retry-els-request-if-qedf_alloc_cmd.patch
+drm-mst-fix-mst-sideband-up-reply-failure-handling.patch
+powerpc-pseries-hvconsole-fix-stack-overread-via-udb.patch
+selftests-rtnetlink-add-addresses-with-fixed-life-ti.patch
+coresight-tmc-etf-do-not-call-smp_processor_id-from-.patch
+coresight-etb10-do-not-call-smp_processor_id-from-pr.patch
+kvm-ppc-book3s-hv-use-smp_mb-when-setting-clearing-h.patch
+rxrpc-fix-possible-null-pointer-access-in-icmp-handl.patch
+tcp-annotate-tp-rcv_nxt-lockless-reads.patch
+net-core-limit-nested-device-depth.patch
+ath9k_htc-modify-byte-order-for-an-error-message.patch
+ath9k_htc-discard-undersized-packets.patch
+xfs-periodically-yield-scrub-threads-to-the-schedule.patch
+net-add-annotations-on-hh-hh_len-lockless-accesses.patch
+ubifs-ubifs_tnc_start_commit-fix-oob-in-layout_in_ga.patch
+s390-smp-fix-physical-to-logical-cpu-map-for-smt.patch
+xen-blkback-avoid-unmapping-unmapped-grant-pages.patch
+perf-x86-intel-bts-fix-the-use-of-page_private.patch
diff --git a/queue-4.19/tcp-annotate-tp-rcv_nxt-lockless-reads.patch b/queue-4.19/tcp-annotate-tp-rcv_nxt-lockless-reads.patch

new file mode 100644 (file)

index 0000000..8ea48c9
--- /dev/null
+++ b/queue-4.19/tcp-annotate-tp-rcv_nxt-lockless-reads.patch
@@ -0,0 +1,201 @@
+From 1c3f6c1668634fd4180dd2eba22513aa770d7952 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 10 Oct 2019 20:17:39 -0700
+Subject: tcp: annotate tp->rcv_nxt lockless reads
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit dba7d9b8c739df27ff3a234c81d6c6b23e3986fa ]
+
+There are few places where we fetch tp->rcv_nxt while
+this field can change from IRQ or other cpu.
+
+We need to add READ_ONCE() annotations, and also make
+sure write sides use corresponding WRITE_ONCE() to avoid
+store-tearing.
+
+Note that tcp_inq_hint() was already using READ_ONCE(tp->rcv_nxt)
+
+syzbot reported :
+
+BUG: KCSAN: data-race in tcp_poll / tcp_queue_rcv
+
+write to 0xffff888120425770 of 4 bytes by interrupt on cpu 0:
+ tcp_rcv_nxt_update net/ipv4/tcp_input.c:3365 [inline]
+ tcp_queue_rcv+0x180/0x380 net/ipv4/tcp_input.c:4638
+ tcp_rcv_established+0xbf1/0xf50 net/ipv4/tcp_input.c:5616
+ tcp_v4_do_rcv+0x381/0x4e0 net/ipv4/tcp_ipv4.c:1542
+ tcp_v4_rcv+0x1a03/0x1bf0 net/ipv4/tcp_ipv4.c:1923
+ ip_protocol_deliver_rcu+0x51/0x470 net/ipv4/ip_input.c:204
+ ip_local_deliver_finish+0x110/0x140 net/ipv4/ip_input.c:231
+ NF_HOOK include/linux/netfilter.h:305 [inline]
+ NF_HOOK include/linux/netfilter.h:299 [inline]
+ ip_local_deliver+0x133/0x210 net/ipv4/ip_input.c:252
+ dst_input include/net/dst.h:442 [inline]
+ ip_rcv_finish+0x121/0x160 net/ipv4/ip_input.c:413
+ NF_HOOK include/linux/netfilter.h:305 [inline]
+ NF_HOOK include/linux/netfilter.h:299 [inline]
+ ip_rcv+0x18f/0x1a0 net/ipv4/ip_input.c:523
+ __netif_receive_skb_one_core+0xa7/0xe0 net/core/dev.c:5004
+ __netif_receive_skb+0x37/0xf0 net/core/dev.c:5118
+ netif_receive_skb_internal+0x59/0x190 net/core/dev.c:5208
+ napi_skb_finish net/core/dev.c:5671 [inline]
+ napi_gro_receive+0x28f/0x330 net/core/dev.c:5704
+ receive_buf+0x284/0x30b0 drivers/net/virtio_net.c:1061
+
+read to 0xffff888120425770 of 4 bytes by task 7254 on cpu 1:
+ tcp_stream_is_readable net/ipv4/tcp.c:480 [inline]
+ tcp_poll+0x204/0x6b0 net/ipv4/tcp.c:554
+ sock_poll+0xed/0x250 net/socket.c:1256
+ vfs_poll include/linux/poll.h:90 [inline]
+ ep_item_poll.isra.0+0x90/0x190 fs/eventpoll.c:892
+ ep_send_events_proc+0x113/0x5c0 fs/eventpoll.c:1749
+ ep_scan_ready_list.constprop.0+0x189/0x500 fs/eventpoll.c:704
+ ep_send_events fs/eventpoll.c:1793 [inline]
+ ep_poll+0xe3/0x900 fs/eventpoll.c:1930
+ do_epoll_wait+0x162/0x180 fs/eventpoll.c:2294
+ __do_sys_epoll_pwait fs/eventpoll.c:2325 [inline]
+ __se_sys_epoll_pwait fs/eventpoll.c:2311 [inline]
+ __x64_sys_epoll_pwait+0xcd/0x170 fs/eventpoll.c:2311
+ do_syscall_64+0xcf/0x2f0 arch/x86/entry/common.c:296
+ entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+Reported by Kernel Concurrency Sanitizer on:
+CPU: 1 PID: 7254 Comm: syz-fuzzer Not tainted 5.3.0+ #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp.c           | 4 ++--
+ net/ipv4/tcp_diag.c      | 2 +-
+ net/ipv4/tcp_input.c     | 6 +++---
+ net/ipv4/tcp_ipv4.c      | 3 ++-
+ net/ipv4/tcp_minisocks.c | 7 +++++--
+ net/ipv6/tcp_ipv6.c      | 3 ++-
+ 6 files changed, 15 insertions(+), 10 deletions(-)
+
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index a7a804bece7a..7561fa1bcc3e 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -488,7 +488,7 @@ static void tcp_tx_timestamp(struct sock *sk, u16 tsflags)
+ static inline bool tcp_stream_is_readable(const struct tcp_sock *tp,
+                                         int target, struct sock *sk)
+ {
+-      return (tp->rcv_nxt - tp->copied_seq >= target) ||
++      return (READ_ONCE(tp->rcv_nxt) - tp->copied_seq >= target) ||
+               (sk->sk_prot->stream_memory_read ?
+               sk->sk_prot->stream_memory_read(sk) : false);
+ }
+@@ -2866,7 +2866,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
+               else if (tp->repair_queue == TCP_SEND_QUEUE)
+                       tp->write_seq = val;
+               else if (tp->repair_queue == TCP_RECV_QUEUE)
+-                      tp->rcv_nxt = val;
++                      WRITE_ONCE(tp->rcv_nxt, val);
+               else
+                       err = -EINVAL;
+               break;
+diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
+index 81148f7a2323..c9e97f304f98 100644
+--- a/net/ipv4/tcp_diag.c
++++ b/net/ipv4/tcp_diag.c
+@@ -30,7 +30,7 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
+       } else if (sk->sk_type == SOCK_STREAM) {
+               const struct tcp_sock *tp = tcp_sk(sk);
+ 
+-              r->idiag_rqueue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
++              r->idiag_rqueue = max_t(int, READ_ONCE(tp->rcv_nxt) - tp->copied_seq, 0);
+               r->idiag_wqueue = tp->write_seq - tp->snd_una;
+       }
+       if (info)
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index 57e8dad956ec..3a08ee81cbc3 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -3348,7 +3348,7 @@ static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq)
+ 
+       sock_owned_by_me((struct sock *)tp);
+       tp->bytes_received += delta;
+-      tp->rcv_nxt = seq;
++      WRITE_ONCE(tp->rcv_nxt, seq);
+ }
+ 
+ /* Update our send window.
+@@ -5829,7 +5829,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
+               /* Ok.. it's good. Set up sequence numbers and
+                * move to established.
+                */
+-              tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
++              WRITE_ONCE(tp->rcv_nxt, TCP_SKB_CB(skb)->seq + 1);
+               tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
+ 
+               /* RFC1323: The window in SYN & SYN/ACK segments is
+@@ -5932,7 +5932,7 @@ discard:
+                       tp->tcp_header_len = sizeof(struct tcphdr);
+               }
+ 
+-              tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
++              WRITE_ONCE(tp->rcv_nxt, TCP_SKB_CB(skb)->seq + 1);
+               tp->copied_seq = tp->rcv_nxt;
+               tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
+ 
+diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
+index 5553f6a833f3..6da393016c11 100644
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -2330,7 +2330,8 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
+               /* Because we don't lock the socket,
+                * we might find a transient negative value.
+                */
+-              rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
++              rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
++                                    tp->copied_seq, 0);
+ 
+       seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
+                       "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
+diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
+index 12affb7864d9..7ba8a90772b0 100644
+--- a/net/ipv4/tcp_minisocks.c
++++ b/net/ipv4/tcp_minisocks.c
+@@ -454,6 +454,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
+       struct tcp_request_sock *treq = tcp_rsk(req);
+       struct inet_connection_sock *newicsk;
+       struct tcp_sock *oldtp, *newtp;
++      u32 seq;
+ 
+       if (!newsk)
+               return NULL;
+@@ -467,8 +468,10 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
+       /* Now setup tcp_sock */
+       newtp->pred_flags = 0;
+ 
+-      newtp->rcv_wup = newtp->copied_seq =
+-      newtp->rcv_nxt = treq->rcv_isn + 1;
++      seq = treq->rcv_isn + 1;
++      newtp->rcv_wup = seq;
++      newtp->copied_seq = seq;
++      WRITE_ONCE(newtp->rcv_nxt, seq);
+       newtp->segs_in = 1;
+ 
+       newtp->snd_sml = newtp->snd_una =
+diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
+index 9a117a79af65..c5f4e89b6ff3 100644
+--- a/net/ipv6/tcp_ipv6.c
++++ b/net/ipv6/tcp_ipv6.c
+@@ -1839,7 +1839,8 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
+               /* Because we don't lock the socket,
+                * we might find a transient negative value.
+                */
+-              rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
++              rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
++                                    tp->copied_seq, 0);
+ 
+       seq_printf(seq,
+                  "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
+-- 
+2.20.1
+
diff --git a/queue-4.19/ubifs-ubifs_tnc_start_commit-fix-oob-in-layout_in_ga.patch b/queue-4.19/ubifs-ubifs_tnc_start_commit-fix-oob-in-layout_in_ga.patch

new file mode 100644 (file)

index 0000000..feb3439
--- /dev/null
+++ b/queue-4.19/ubifs-ubifs_tnc_start_commit-fix-oob-in-layout_in_ga.patch
@@ -0,0 +1,154 @@
+From 65e42bdf75d428d21e34608124dd3004a3250dc4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 20 Jul 2019 14:05:20 +0800
+Subject: ubifs: ubifs_tnc_start_commit: Fix OOB in layout_in_gaps
+
+From: Zhihao Cheng <chengzhihao1@huawei.com>
+
+[ Upstream commit 6abf57262166b4f4294667fb5206ae7ba1ba96f5 ]
+
+Running stress-test test_2 in mtd-utils on ubi device, sometimes we can
+get following oops message:
+
+  BUG: unable to handle page fault for address: ffffffff00000140
+  #PF: supervisor read access in kernel mode
+  #PF: error_code(0x0000) - not-present page
+  PGD 280a067 P4D 280a067 PUD 0
+  Oops: 0000 [#1] SMP
+  CPU: 0 PID: 60 Comm: kworker/u16:1 Kdump: loaded Not tainted 5.2.0 #13
+  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0
+  -0-ga698c8995f-prebuilt.qemu.org 04/01/2014
+  Workqueue: writeback wb_workfn (flush-ubifs_0_0)
+  RIP: 0010:rb_next_postorder+0x2e/0xb0
+  Code: 80 db 03 01 48 85 ff 0f 84 97 00 00 00 48 8b 17 48 83 05 bc 80 db
+  03 01 48 83 e2 fc 0f 84 82 00 00 00 48 83 05 b2 80 db 03 01 <48> 3b 7a
+  10 48 89 d0 74 02 f3 c3 48 8b 52 08 48 83 05 a3 80 db 03
+  RSP: 0018:ffffc90000887758 EFLAGS: 00010202
+  RAX: ffff888129ae4700 RBX: ffff888138b08400 RCX: 0000000080800001
+  RDX: ffffffff00000130 RSI: 0000000080800024 RDI: ffff888138b08400
+  RBP: ffff888138b08400 R08: ffffea0004a6b920 R09: 0000000000000000
+  R10: ffffc90000887740 R11: 0000000000000001 R12: ffff888128d48000
+  R13: 0000000000000800 R14: 000000000000011e R15: 00000000000007c8
+  FS:  0000000000000000(0000) GS:ffff88813ba00000(0000)
+  knlGS:0000000000000000
+  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+  CR2: ffffffff00000140 CR3: 000000013789d000 CR4: 00000000000006f0
+  DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+  DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+  Call Trace:
+    destroy_old_idx+0x5d/0xa0 [ubifs]
+    ubifs_tnc_start_commit+0x4fe/0x1380 [ubifs]
+    do_commit+0x3eb/0x830 [ubifs]
+    ubifs_run_commit+0xdc/0x1c0 [ubifs]
+
+Above Oops are due to the slab-out-of-bounds happened in do-while of
+function layout_in_gaps indirectly called by ubifs_tnc_start_commit. In
+function layout_in_gaps, there is a do-while loop placing index nodes
+into the gaps created by obsolete index nodes in non-empty index LEBs
+until rest index nodes can totally be placed into pre-allocated empty
+LEBs. @c->gap_lebs points to a memory area(integer array) which records
+LEB numbers used by 'in-the-gaps' method. Whenever a fitable index LEB
+is found, corresponding lnum will be incrementally written into the
+memory area pointed by @c->gap_lebs. The size
+((@c->lst.idx_lebs + 1) * sizeof(int)) of memory area is allocated before
+do-while loop and can not be changed in the loop. But @c->lst.idx_lebs
+could be increased by function ubifs_change_lp (called by
+layout_leb_in_gaps->ubifs_find_dirty_idx_leb->get_idx_gc_leb) during the
+loop. So, sometimes oob happens when number of cycles in do-while loop
+exceeds the original value of @c->lst.idx_lebs. See detail in
+https://bugzilla.kernel.org/show_bug.cgi?id=204229.
+This patch fixes oob in layout_in_gaps.
+
+Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com>
+Signed-off-by: Richard Weinberger <richard@nod.at>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ubifs/tnc_commit.c | 34 +++++++++++++++++++++++++++-------
+ 1 file changed, 27 insertions(+), 7 deletions(-)
+
+diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c
+index dba87d09b989..95630f9f40dd 100644
+--- a/fs/ubifs/tnc_commit.c
++++ b/fs/ubifs/tnc_commit.c
+@@ -219,7 +219,7 @@ static int is_idx_node_in_use(struct ubifs_info *c, union ubifs_key *key,
+ /**
+  * layout_leb_in_gaps - layout index nodes using in-the-gaps method.
+  * @c: UBIFS file-system description object
+- * @p: return LEB number here
++ * @p: return LEB number in @c->gap_lebs[p]
+  *
+  * This function lays out new index nodes for dirty znodes using in-the-gaps
+  * method of TNC commit.
+@@ -228,7 +228,7 @@ static int is_idx_node_in_use(struct ubifs_info *c, union ubifs_key *key,
+  * This function returns the number of index nodes written into the gaps, or a
+  * negative error code on failure.
+  */
+-static int layout_leb_in_gaps(struct ubifs_info *c, int *p)
++static int layout_leb_in_gaps(struct ubifs_info *c, int p)
+ {
+       struct ubifs_scan_leb *sleb;
+       struct ubifs_scan_node *snod;
+@@ -243,7 +243,7 @@ static int layout_leb_in_gaps(struct ubifs_info *c, int *p)
+                * filled, however we do not check there at present.
+                */
+               return lnum; /* Error code */
+-      *p = lnum;
++      c->gap_lebs[p] = lnum;
+       dbg_gc("LEB %d", lnum);
+       /*
+        * Scan the index LEB.  We use the generic scan for this even though
+@@ -362,7 +362,7 @@ static int get_leb_cnt(struct ubifs_info *c, int cnt)
+  */
+ static int layout_in_gaps(struct ubifs_info *c, int cnt)
+ {
+-      int err, leb_needed_cnt, written, *p;
++      int err, leb_needed_cnt, written, p = 0, old_idx_lebs, *gap_lebs;
+ 
+       dbg_gc("%d znodes to write", cnt);
+ 
+@@ -371,9 +371,9 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt)
+       if (!c->gap_lebs)
+               return -ENOMEM;
+ 
+-      p = c->gap_lebs;
++      old_idx_lebs = c->lst.idx_lebs;
+       do {
+-              ubifs_assert(c, p < c->gap_lebs + c->lst.idx_lebs);
++              ubifs_assert(c, p < c->lst.idx_lebs);
+               written = layout_leb_in_gaps(c, p);
+               if (written < 0) {
+                       err = written;
+@@ -399,9 +399,29 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt)
+               leb_needed_cnt = get_leb_cnt(c, cnt);
+               dbg_gc("%d znodes remaining, need %d LEBs, have %d", cnt,
+                      leb_needed_cnt, c->ileb_cnt);
++              /*
++               * Dynamically change the size of @c->gap_lebs to prevent
++               * oob, because @c->lst.idx_lebs could be increased by
++               * function @get_idx_gc_leb (called by layout_leb_in_gaps->
++               * ubifs_find_dirty_idx_leb) during loop. Only enlarge
++               * @c->gap_lebs when needed.
++               *
++               */
++              if (leb_needed_cnt > c->ileb_cnt && p >= old_idx_lebs &&
++                  old_idx_lebs < c->lst.idx_lebs) {
++                      old_idx_lebs = c->lst.idx_lebs;
++                      gap_lebs = krealloc(c->gap_lebs, sizeof(int) *
++                                             (old_idx_lebs + 1), GFP_NOFS);
++                      if (!gap_lebs) {
++                              kfree(c->gap_lebs);
++                              c->gap_lebs = NULL;
++                              return -ENOMEM;
++                      }
++                      c->gap_lebs = gap_lebs;
++              }
+       } while (leb_needed_cnt > c->ileb_cnt);
+ 
+-      *p = -1;
++      c->gap_lebs[p] = -1;
+       return 0;
+ }
+ 
+-- 
+2.20.1
+
diff --git a/queue-4.19/xen-blkback-avoid-unmapping-unmapped-grant-pages.patch b/queue-4.19/xen-blkback-avoid-unmapping-unmapped-grant-pages.patch

new file mode 100644 (file)

index 0000000..fa90154
--- /dev/null
+++ b/queue-4.19/xen-blkback-avoid-unmapping-unmapped-grant-pages.patch
@@ -0,0 +1,72 @@
+From 9073ea24e25c62bad1801cba51d67378478c94fd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 26 Nov 2019 16:36:05 +0100
+Subject: xen/blkback: Avoid unmapping unmapped grant pages
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: SeongJae Park <sjpark@amazon.de>
+
+[ Upstream commit f9bd84a8a845d82f9b5a081a7ae68c98a11d2e84 ]
+
+For each I/O request, blkback first maps the foreign pages for the
+request to its local pages.  If an allocation of a local page for the
+mapping fails, it should unmap every mapping already made for the
+request.
+
+However, blkback's handling mechanism for the allocation failure does
+not mark the remaining foreign pages as unmapped.  Therefore, the unmap
+function merely tries to unmap every valid grant page for the request,
+including the pages not mapped due to the allocation failure.  On a
+system that fails the allocation frequently, this problem leads to
+following kernel crash.
+
+  [  372.012538] BUG: unable to handle kernel NULL pointer dereference at 0000000000000001
+  [  372.012546] IP: [<ffffffff814071ac>] gnttab_unmap_refs.part.7+0x1c/0x40
+  [  372.012557] PGD 16f3e9067 PUD 16426e067 PMD 0
+  [  372.012562] Oops: 0002 [#1] SMP
+  [  372.012566] Modules linked in: act_police sch_ingress cls_u32
+  ...
+  [  372.012746] Call Trace:
+  [  372.012752]  [<ffffffff81407204>] gnttab_unmap_refs+0x34/0x40
+  [  372.012759]  [<ffffffffa0335ae3>] xen_blkbk_unmap+0x83/0x150 [xen_blkback]
+  ...
+  [  372.012802]  [<ffffffffa0336c50>] dispatch_rw_block_io+0x970/0x980 [xen_blkback]
+  ...
+  Decompressing Linux... Parsing ELF... done.
+  Booting the kernel.
+  [    0.000000] Initializing cgroup subsys cpuset
+
+This commit fixes this problem by marking the grant pages of the given
+request that didn't mapped due to the allocation failure as invalid.
+
+Fixes: c6cc142dac52 ("xen-blkback: use balloon pages for all mappings")
+
+Reviewed-by: David Woodhouse <dwmw@amazon.de>
+Reviewed-by: Maximilian Heyne <mheyne@amazon.de>
+Reviewed-by: Paul Durrant <pdurrant@amazon.co.uk>
+Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
+Signed-off-by: SeongJae Park <sjpark@amazon.de>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/block/xen-blkback/blkback.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
+index fd1e19f1a49f..3666afa639d1 100644
+--- a/drivers/block/xen-blkback/blkback.c
++++ b/drivers/block/xen-blkback/blkback.c
+@@ -936,6 +936,8 @@ next:
+ out_of_memory:
+       pr_alert("%s: out of memory\n", __func__);
+       put_free_pages(ring, pages_to_gnt, segs_to_map);
++      for (i = last_map; i < num; i++)
++              pages[i]->handle = BLKBACK_INVALID_HANDLE;
+       return -ENOMEM;
+ }
+ 
+-- 
+2.20.1
+
diff --git a/queue-4.19/xfs-periodically-yield-scrub-threads-to-the-schedule.patch b/queue-4.19/xfs-periodically-yield-scrub-threads-to-the-schedule.patch

new file mode 100644 (file)

index 0000000..05e6c3e
--- /dev/null
+++ b/queue-4.19/xfs-periodically-yield-scrub-threads-to-the-schedule.patch
@@ -0,0 +1,93 @@
+From c14d20357188794b827cde596fdc7cca0f7d8233 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 5 Nov 2019 15:33:57 -0800
+Subject: xfs: periodically yield scrub threads to the scheduler
+
+From: Darrick J. Wong <darrick.wong@oracle.com>
+
+[ Upstream commit 5d1116d4c6af3e580f1ed0382ca5a94bd65a34cf ]
+
+Christoph Hellwig complained about the following soft lockup warning
+when running scrub after generic/175 when preemption is disabled and
+slub debugging is enabled:
+
+watchdog: BUG: soft lockup - CPU#3 stuck for 22s! [xfs_scrub:161]
+Modules linked in:
+irq event stamp: 41692326
+hardirqs last  enabled at (41692325): [<ffffffff8232c3b7>] _raw_0
+hardirqs last disabled at (41692326): [<ffffffff81001c5a>] trace0
+softirqs last  enabled at (41684994): [<ffffffff8260031f>] __do_e
+softirqs last disabled at (41684987): [<ffffffff81127d8c>] irq_e0
+CPU: 3 PID: 16189 Comm: xfs_scrub Not tainted 5.4.0-rc3+ #30
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.124
+RIP: 0010:_raw_spin_unlock_irqrestore+0x39/0x40
+Code: 89 f3 be 01 00 00 00 e8 d5 3a e5 fe 48 89 ef e8 ed 87 e5 f2
+RSP: 0018:ffffc9000233f970 EFLAGS: 00000286 ORIG_RAX: ffffffffff3
+RAX: ffff88813b398040 RBX: 0000000000000286 RCX: 0000000000000006
+RDX: 0000000000000006 RSI: ffff88813b3988c0 RDI: ffff88813b398040
+RBP: ffff888137958640 R08: 0000000000000001 R09: 0000000000000000
+R10: 0000000000000000 R11: 0000000000000000 R12: ffffea00042b0c00
+R13: 0000000000000001 R14: ffff88810ac32308 R15: ffff8881376fc040
+FS:  00007f6113dea700(0000) GS:ffff88813bb80000(0000) knlGS:00000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00007f6113de8ff8 CR3: 000000012f290000 CR4: 00000000000006e0
+Call Trace:
+ free_debug_processing+0x1dd/0x240
+ __slab_free+0x231/0x410
+ kmem_cache_free+0x30e/0x360
+ xchk_ag_btcur_free+0x76/0xb0
+ xchk_ag_free+0x10/0x80
+ xchk_bmap_iextent_xref.isra.14+0xd9/0x120
+ xchk_bmap_iextent+0x187/0x210
+ xchk_bmap+0x2e0/0x3b0
+ xfs_scrub_metadata+0x2e7/0x500
+ xfs_ioc_scrub_metadata+0x4a/0xa0
+ xfs_file_ioctl+0x58a/0xcd0
+ do_vfs_ioctl+0xa0/0x6f0
+ ksys_ioctl+0x5b/0x90
+ __x64_sys_ioctl+0x11/0x20
+ do_syscall_64+0x4b/0x1a0
+ entry_SYSCALL_64_after_hwframe+0x49/0xbe
+
+If preemption is disabled, all metadata buffers needed to perform the
+scrub are already in memory, and there are a lot of records to check,
+it's possible that the scrub thread will run for an extended period of
+time without sleeping for IO or any other reason.  Then the watchdog
+timer or the RCU stall timeout can trigger, producing the backtrace
+above.
+
+To fix this problem, call cond_resched() from the scrub thread so that
+we back out to the scheduler whenever necessary.
+
+Reported-by: Christoph Hellwig <hch@infradead.org>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/xfs/scrub/common.h | 9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
+index 2d4324d12f9a..51ea2ab124b7 100644
+--- a/fs/xfs/scrub/common.h
++++ b/fs/xfs/scrub/common.h
+@@ -14,8 +14,15 @@
+ static inline bool
+ xchk_should_terminate(
+       struct xfs_scrub        *sc,
+-      int                             *error)
++      int                     *error)
+ {
++      /*
++       * If preemption is disabled, we need to yield to the scheduler every
++       * few seconds so that we don't run afoul of the soft lockup watchdog
++       * or RCU stall detector.
++       */
++      cond_resched();
++
+       if (fatal_signal_pending(current)) {
+               if (*error == 0)
+                       *error = -EAGAIN;
+-- 
+2.20.1
+
author	Sasha Levin <sashal@kernel.org>
	Tue, 7 Jan 2020 18:07:24 +0000 (13:07 -0500)
committer	Sasha Levin <sashal@kernel.org>
	Tue, 7 Jan 2020 18:07:24 +0000 (13:07 -0500)
queue-4.19/ath9k_htc-discard-undersized-packets.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/ath9k_htc-modify-byte-order-for-an-error-message.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/coresight-etb10-do-not-call-smp_processor_id-from-pr.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/coresight-tmc-etf-do-not-call-smp_processor_id-from-.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/drm-mst-fix-mst-sideband-up-reply-failure-handling.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/kvm-ppc-book3s-hv-use-smp_mb-when-setting-clearing-h.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/net-add-annotations-on-hh-hh_len-lockless-accesses.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/net-core-limit-nested-device-depth.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/perf-x86-intel-bts-fix-the-use-of-page_private.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/powerpc-pseries-hvconsole-fix-stack-overread-via-udb.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/rxrpc-fix-possible-null-pointer-access-in-icmp-handl.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/s390-smp-fix-physical-to-logical-cpu-map-for-smt.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/scsi-qedf-do-not-retry-els-request-if-qedf_alloc_cmd.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/selftests-rtnetlink-add-addresses-with-fixed-life-ti.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/series		patch \| blob \| blame \| history
queue-4.19/tcp-annotate-tp-rcv_nxt-lockless-reads.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/ubifs-ubifs_tnc_start_commit-fix-oob-in-layout_in_ga.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/xen-blkback-avoid-unmapping-unmapped-grant-pages.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/xfs-periodically-yield-scrub-threads-to-the-schedule.patch	[new file with mode: 0644]	patch \| blob