--- /dev/null
+From ea86c5f95edea1e25b0f86df7f2438c980b28a1c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 27 Sep 2019 11:51:46 +0900
+Subject: ath9k_htc: Discard undersized packets
+
+From: Masashi Honma <masashi.honma@gmail.com>
+
+[ Upstream commit cd486e627e67ee9ab66914d36d3127ef057cc010 ]
+
+Sometimes the hardware will push small packets that trigger a WARN_ON
+in mac80211. Discard them early to avoid this issue.
+
+This patch ports 2 patches from ath9k to ath9k_htc.
+commit 3c0efb745a172bfe96459e20cbd37b0c945d5f8d "ath9k: discard
+undersized packets".
+commit df5c4150501ee7e86383be88f6490d970adcf157 "ath9k: correctly
+handle short radar pulses".
+
+[ 112.835889] ------------[ cut here ]------------
+[ 112.835971] WARNING: CPU: 5 PID: 0 at net/mac80211/rx.c:804 ieee80211_rx_napi+0xaac/0xb40 [mac80211]
+[ 112.835973] Modules linked in: ath9k_htc ath9k_common ath9k_hw ath mac80211 cfg80211 libarc4 nouveau snd_hda_codec_hdmi intel_rapl_msr intel_rapl_common x86_pkg_temp_thermal intel_powerclamp coretemp snd_hda_codec_realtek snd_hda_codec_generic ledtrig_audio snd_hda_intel snd_hda_codec video snd_hda_core ttm snd_hwdep drm_kms_helper snd_pcm crct10dif_pclmul snd_seq_midi drm snd_seq_midi_event crc32_pclmul snd_rawmidi ghash_clmulni_intel snd_seq aesni_intel aes_x86_64 crypto_simd cryptd snd_seq_device glue_helper snd_timer sch_fq_codel i2c_algo_bit fb_sys_fops snd input_leds syscopyarea sysfillrect sysimgblt intel_cstate mei_me intel_rapl_perf soundcore mxm_wmi lpc_ich mei kvm_intel kvm mac_hid irqbypass parport_pc ppdev lp parport ip_tables x_tables autofs4 hid_generic usbhid hid raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear e1000e ahci libahci wmi
+[ 112.836022] CPU: 5 PID: 0 Comm: swapper/5 Not tainted 5.3.0-wt #1
+[ 112.836023] Hardware name: MouseComputer Co.,Ltd. X99-S01/X99-S01, BIOS 1.0C-W7 04/01/2015
+[ 112.836056] RIP: 0010:ieee80211_rx_napi+0xaac/0xb40 [mac80211]
+[ 112.836059] Code: 00 00 66 41 89 86 b0 00 00 00 e9 c8 fa ff ff 4c 89 b5 40 ff ff ff 49 89 c6 e9 c9 fa ff ff 48 c7 c7 e0 a2 a5 c0 e8 47 41 b0 e9 <0f> 0b 48 89 df e8 5a 94 2d ea e9 02 f9 ff ff 41 39 c1 44 89 85 60
+[ 112.836060] RSP: 0018:ffffaa6180220da8 EFLAGS: 00010286
+[ 112.836062] RAX: 0000000000000024 RBX: ffff909a20eeda00 RCX: 0000000000000000
+[ 112.836064] RDX: 0000000000000000 RSI: ffff909a2f957448 RDI: ffff909a2f957448
+[ 112.836065] RBP: ffffaa6180220e78 R08: 00000000000006e9 R09: 0000000000000004
+[ 112.836066] R10: 000000000000000a R11: 0000000000000001 R12: 0000000000000000
+[ 112.836068] R13: ffff909a261a47a0 R14: 0000000000000000 R15: 0000000000000004
+[ 112.836070] FS: 0000000000000000(0000) GS:ffff909a2f940000(0000) knlGS:0000000000000000
+[ 112.836071] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[ 112.836073] CR2: 00007f4e3ffffa08 CR3: 00000001afc0a006 CR4: 00000000001606e0
+[ 112.836074] Call Trace:
+[ 112.836076] <IRQ>
+[ 112.836083] ? finish_td+0xb3/0xf0
+[ 112.836092] ? ath9k_rx_prepare.isra.11+0x22f/0x2a0 [ath9k_htc]
+[ 112.836099] ath9k_rx_tasklet+0x10b/0x1d0 [ath9k_htc]
+[ 112.836105] tasklet_action_common.isra.22+0x63/0x110
+[ 112.836108] tasklet_action+0x22/0x30
+[ 112.836115] __do_softirq+0xe4/0x2da
+[ 112.836118] irq_exit+0xae/0xb0
+[ 112.836121] do_IRQ+0x86/0xe0
+[ 112.836125] common_interrupt+0xf/0xf
+[ 112.836126] </IRQ>
+[ 112.836130] RIP: 0010:cpuidle_enter_state+0xa9/0x440
+[ 112.836133] Code: 3d bc 20 38 55 e8 f7 1d 84 ff 49 89 c7 0f 1f 44 00 00 31 ff e8 28 29 84 ff 80 7d d3 00 0f 85 e6 01 00 00 fb 66 0f 1f 44 00 00 <45> 85 ed 0f 89 ff 01 00 00 41 c7 44 24 10 00 00 00 00 48 83 c4 18
+[ 112.836134] RSP: 0018:ffffaa61800e3e48 EFLAGS: 00000246 ORIG_RAX: ffffffffffffffde
+[ 112.836136] RAX: ffff909a2f96b340 RBX: ffffffffabb58200 RCX: 000000000000001f
+[ 112.836137] RDX: 0000001a458adc5d RSI: 0000000026c9b581 RDI: 0000000000000000
+[ 112.836139] RBP: ffffaa61800e3e88 R08: 0000000000000002 R09: 000000000002abc0
+[ 112.836140] R10: ffffaa61800e3e18 R11: 000000000000002d R12: ffffca617fb40b00
+[ 112.836141] R13: 0000000000000002 R14: ffffffffabb582d8 R15: 0000001a458adc5d
+[ 112.836145] ? cpuidle_enter_state+0x98/0x440
+[ 112.836149] ? menu_select+0x370/0x600
+[ 112.836151] cpuidle_enter+0x2e/0x40
+[ 112.836154] call_cpuidle+0x23/0x40
+[ 112.836156] do_idle+0x204/0x280
+[ 112.836159] cpu_startup_entry+0x1d/0x20
+[ 112.836164] start_secondary+0x167/0x1c0
+[ 112.836169] secondary_startup_64+0xa4/0xb0
+[ 112.836173] ---[ end trace 9f4cd18479cc5ae5 ]---
+
+Signed-off-by: Masashi Honma <masashi.honma@gmail.com>
+Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/wireless/ath/ath9k/htc_drv_txrx.c | 23 +++++++++++++++----
+ 1 file changed, 19 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c
+index aba0d454c381..9cec5c216e1f 100644
+--- a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c
++++ b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c
+@@ -973,6 +973,8 @@ static bool ath9k_rx_prepare(struct ath9k_htc_priv *priv,
+ struct ath_htc_rx_status *rxstatus;
+ struct ath_rx_status rx_stats;
+ bool decrypt_error = false;
++ __be16 rs_datalen;
++ bool is_phyerr;
+
+ if (skb->len < HTC_RX_FRAME_HEADER_SIZE) {
+ ath_err(common, "Corrupted RX frame, dropping (len: %d)\n",
+@@ -982,11 +984,24 @@ static bool ath9k_rx_prepare(struct ath9k_htc_priv *priv,
+
+ rxstatus = (struct ath_htc_rx_status *)skb->data;
+
+- if (be16_to_cpu(rxstatus->rs_datalen) -
+- (skb->len - HTC_RX_FRAME_HEADER_SIZE) != 0) {
++ rs_datalen = be16_to_cpu(rxstatus->rs_datalen);
++ if (unlikely(rs_datalen -
++ (skb->len - HTC_RX_FRAME_HEADER_SIZE) != 0)) {
+ ath_err(common,
+ "Corrupted RX data len, dropping (dlen: %d, skblen: %d)\n",
+- be16_to_cpu(rxstatus->rs_datalen), skb->len);
++ rs_datalen, skb->len);
++ goto rx_next;
++ }
++
++ is_phyerr = rxstatus->rs_status & ATH9K_RXERR_PHY;
++ /*
++ * Discard zero-length packets and packets smaller than an ACK
++ * which are not PHY_ERROR (short radar pulses have a length of 3)
++ */
++ if (unlikely(!rs_datalen || (rs_datalen < 10 && !is_phyerr))) {
++ ath_warn(common,
++ "Short RX data len, dropping (dlen: %d)\n",
++ rs_datalen);
+ goto rx_next;
+ }
+
+@@ -1011,7 +1026,7 @@ static bool ath9k_rx_prepare(struct ath9k_htc_priv *priv,
+ * Process PHY errors and return so that the packet
+ * can be dropped.
+ */
+- if (rx_stats.rs_status & ATH9K_RXERR_PHY) {
++ if (unlikely(is_phyerr)) {
+ /* TODO: Not using DFS processing now. */
+ if (ath_cmn_process_fft(&priv->spec_priv, hdr,
+ &rx_stats, rx_status->mactime)) {
+--
+2.20.1
+
--- /dev/null
+From 0eb0bef03873075031fd47bfbd3a891463c56968 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 27 Sep 2019 11:51:45 +0900
+Subject: ath9k_htc: Modify byte order for an error message
+
+From: Masashi Honma <masashi.honma@gmail.com>
+
+[ Upstream commit e01fddc19d215f6ad397894ec2a851d99bf154e2 ]
+
+rs_datalen is be16 so we need to convert it before printing.
+
+Signed-off-by: Masashi Honma <masashi.honma@gmail.com>
+Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/wireless/ath/ath9k/htc_drv_txrx.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c
+index 4e8e80ac8341..aba0d454c381 100644
+--- a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c
++++ b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c
+@@ -986,7 +986,7 @@ static bool ath9k_rx_prepare(struct ath9k_htc_priv *priv,
+ (skb->len - HTC_RX_FRAME_HEADER_SIZE) != 0) {
+ ath_err(common,
+ "Corrupted RX data len, dropping (dlen: %d, skblen: %d)\n",
+- rxstatus->rs_datalen, skb->len);
++ be16_to_cpu(rxstatus->rs_datalen), skb->len);
+ goto rx_next;
+ }
+
+--
+2.20.1
+
--- /dev/null
+From 01b911ec79fc1cd8f5b865e80a19bb62e7614ac7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 16 Sep 2019 11:30:57 -0700
+Subject: btrfs: get rid of unique workqueue helper functions
+
+From: Omar Sandoval <osandov@fb.com>
+
+[ Upstream commit a0cac0ec961f0d42828eeef196ac2246a2f07659 ]
+
+Commit 9e0af2376434 ("Btrfs: fix task hang under heavy compressed
+write") worked around the issue that a recycled work item could get a
+false dependency on the original work item due to how the workqueue code
+guarantees non-reentrancy. It did so by giving different work functions
+to different types of work.
+
+However, the fixes in the previous few patches are more complete, as
+they prevent a work item from being recycled at all (except for a tiny
+window that the kernel workqueue code handles for us). This obsoletes
+the previous fix, so we don't need the unique helpers for correctness.
+The only other reason to keep them would be so they show up in stack
+traces, but they always seem to be optimized to a tail call, so they
+don't show up anyways. So, let's just get rid of the extra indirection.
+
+While we're here, rename normal_work_helper() to the more informative
+btrfs_work_helper().
+
+Reviewed-by: Nikolay Borisov <nborisov@suse.com>
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Omar Sandoval <osandov@fb.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/async-thread.c | 58 +++++++++-------------------------------
+ fs/btrfs/async-thread.h | 33 ++---------------------
+ fs/btrfs/block-group.c | 3 +--
+ fs/btrfs/delayed-inode.c | 4 +--
+ fs/btrfs/disk-io.c | 34 ++++++++---------------
+ fs/btrfs/inode.c | 36 ++++++++-----------------
+ fs/btrfs/ordered-data.c | 1 -
+ fs/btrfs/qgroup.c | 1 -
+ fs/btrfs/raid56.c | 5 ++--
+ fs/btrfs/reada.c | 3 +--
+ fs/btrfs/scrub.c | 14 +++++-----
+ fs/btrfs/volumes.c | 3 +--
+ 12 files changed, 50 insertions(+), 145 deletions(-)
+
+diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
+index 10a04b99798a..3f3110975f88 100644
+--- a/fs/btrfs/async-thread.c
++++ b/fs/btrfs/async-thread.c
+@@ -53,16 +53,6 @@ struct btrfs_workqueue {
+ struct __btrfs_workqueue *high;
+ };
+
+-static void normal_work_helper(struct btrfs_work *work);
+-
+-#define BTRFS_WORK_HELPER(name) \
+-noinline_for_stack void btrfs_##name(struct work_struct *arg) \
+-{ \
+- struct btrfs_work *work = container_of(arg, struct btrfs_work, \
+- normal_work); \
+- normal_work_helper(work); \
+-}
+-
+ struct btrfs_fs_info *
+ btrfs_workqueue_owner(const struct __btrfs_workqueue *wq)
+ {
+@@ -89,29 +79,6 @@ bool btrfs_workqueue_normal_congested(const struct btrfs_workqueue *wq)
+ return atomic_read(&wq->normal->pending) > wq->normal->thresh * 2;
+ }
+
+-BTRFS_WORK_HELPER(worker_helper);
+-BTRFS_WORK_HELPER(delalloc_helper);
+-BTRFS_WORK_HELPER(flush_delalloc_helper);
+-BTRFS_WORK_HELPER(cache_helper);
+-BTRFS_WORK_HELPER(submit_helper);
+-BTRFS_WORK_HELPER(fixup_helper);
+-BTRFS_WORK_HELPER(endio_helper);
+-BTRFS_WORK_HELPER(endio_meta_helper);
+-BTRFS_WORK_HELPER(endio_meta_write_helper);
+-BTRFS_WORK_HELPER(endio_raid56_helper);
+-BTRFS_WORK_HELPER(endio_repair_helper);
+-BTRFS_WORK_HELPER(rmw_helper);
+-BTRFS_WORK_HELPER(endio_write_helper);
+-BTRFS_WORK_HELPER(freespace_write_helper);
+-BTRFS_WORK_HELPER(delayed_meta_helper);
+-BTRFS_WORK_HELPER(readahead_helper);
+-BTRFS_WORK_HELPER(qgroup_rescan_helper);
+-BTRFS_WORK_HELPER(extent_refs_helper);
+-BTRFS_WORK_HELPER(scrub_helper);
+-BTRFS_WORK_HELPER(scrubwrc_helper);
+-BTRFS_WORK_HELPER(scrubnc_helper);
+-BTRFS_WORK_HELPER(scrubparity_helper);
+-
+ static struct __btrfs_workqueue *
+ __btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info, const char *name,
+ unsigned int flags, int limit_active, int thresh)
+@@ -302,12 +269,13 @@ static void run_ordered_work(struct __btrfs_workqueue *wq,
+ * original work item cannot depend on the recycled work
+ * item in that case (see find_worker_executing_work()).
+ *
+- * Note that the work of one Btrfs filesystem may depend
+- * on the work of another Btrfs filesystem via, e.g., a
+- * loop device. Therefore, we must not allow the current
+- * work item to be recycled until we are really done,
+- * otherwise we break the above assumption and can
+- * deadlock.
++ * Note that different types of Btrfs work can depend on
++ * each other, and one type of work on one Btrfs
++ * filesystem may even depend on the same type of work
++ * on another Btrfs filesystem via, e.g., a loop device.
++ * Therefore, we must not allow the current work item to
++ * be recycled until we are really done, otherwise we
++ * break the above assumption and can deadlock.
+ */
+ free_self = true;
+ } else {
+@@ -331,8 +299,10 @@ static void run_ordered_work(struct __btrfs_workqueue *wq,
+ }
+ }
+
+-static void normal_work_helper(struct btrfs_work *work)
++static void btrfs_work_helper(struct work_struct *normal_work)
+ {
++ struct btrfs_work *work = container_of(normal_work, struct btrfs_work,
++ normal_work);
+ struct __btrfs_workqueue *wq;
+ void *wtag;
+ int need_order = 0;
+@@ -362,15 +332,13 @@ static void normal_work_helper(struct btrfs_work *work)
+ trace_btrfs_all_work_done(wq->fs_info, wtag);
+ }
+
+-void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t uniq_func,
+- btrfs_func_t func,
+- btrfs_func_t ordered_func,
+- btrfs_func_t ordered_free)
++void btrfs_init_work(struct btrfs_work *work, btrfs_func_t func,
++ btrfs_func_t ordered_func, btrfs_func_t ordered_free)
+ {
+ work->func = func;
+ work->ordered_func = ordered_func;
+ work->ordered_free = ordered_free;
+- INIT_WORK(&work->normal_work, uniq_func);
++ INIT_WORK(&work->normal_work, btrfs_work_helper);
+ INIT_LIST_HEAD(&work->ordered_list);
+ work->flags = 0;
+ }
+diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h
+index 7861c9feba5f..c5bf2b117c05 100644
+--- a/fs/btrfs/async-thread.h
++++ b/fs/btrfs/async-thread.h
+@@ -29,42 +29,13 @@ struct btrfs_work {
+ unsigned long flags;
+ };
+
+-#define BTRFS_WORK_HELPER_PROTO(name) \
+-void btrfs_##name(struct work_struct *arg)
+-
+-BTRFS_WORK_HELPER_PROTO(worker_helper);
+-BTRFS_WORK_HELPER_PROTO(delalloc_helper);
+-BTRFS_WORK_HELPER_PROTO(flush_delalloc_helper);
+-BTRFS_WORK_HELPER_PROTO(cache_helper);
+-BTRFS_WORK_HELPER_PROTO(submit_helper);
+-BTRFS_WORK_HELPER_PROTO(fixup_helper);
+-BTRFS_WORK_HELPER_PROTO(endio_helper);
+-BTRFS_WORK_HELPER_PROTO(endio_meta_helper);
+-BTRFS_WORK_HELPER_PROTO(endio_meta_write_helper);
+-BTRFS_WORK_HELPER_PROTO(endio_raid56_helper);
+-BTRFS_WORK_HELPER_PROTO(endio_repair_helper);
+-BTRFS_WORK_HELPER_PROTO(rmw_helper);
+-BTRFS_WORK_HELPER_PROTO(endio_write_helper);
+-BTRFS_WORK_HELPER_PROTO(freespace_write_helper);
+-BTRFS_WORK_HELPER_PROTO(delayed_meta_helper);
+-BTRFS_WORK_HELPER_PROTO(readahead_helper);
+-BTRFS_WORK_HELPER_PROTO(qgroup_rescan_helper);
+-BTRFS_WORK_HELPER_PROTO(extent_refs_helper);
+-BTRFS_WORK_HELPER_PROTO(scrub_helper);
+-BTRFS_WORK_HELPER_PROTO(scrubwrc_helper);
+-BTRFS_WORK_HELPER_PROTO(scrubnc_helper);
+-BTRFS_WORK_HELPER_PROTO(scrubparity_helper);
+-
+-
+ struct btrfs_workqueue *btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info,
+ const char *name,
+ unsigned int flags,
+ int limit_active,
+ int thresh);
+-void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t helper,
+- btrfs_func_t func,
+- btrfs_func_t ordered_func,
+- btrfs_func_t ordered_free);
++void btrfs_init_work(struct btrfs_work *work, btrfs_func_t func,
++ btrfs_func_t ordered_func, btrfs_func_t ordered_free);
+ void btrfs_queue_work(struct btrfs_workqueue *wq,
+ struct btrfs_work *work);
+ void btrfs_destroy_workqueue(struct btrfs_workqueue *wq);
+diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
+index 0d2da2366869..7dcfa7d7632a 100644
+--- a/fs/btrfs/block-group.c
++++ b/fs/btrfs/block-group.c
+@@ -695,8 +695,7 @@ int btrfs_cache_block_group(struct btrfs_block_group_cache *cache,
+ caching_ctl->block_group = cache;
+ caching_ctl->progress = cache->key.objectid;
+ refcount_set(&caching_ctl->count, 1);
+- btrfs_init_work(&caching_ctl->work, btrfs_cache_helper,
+- caching_thread, NULL, NULL);
++ btrfs_init_work(&caching_ctl->work, caching_thread, NULL, NULL);
+
+ spin_lock(&cache->lock);
+ /*
+diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
+index 57a9ad3e8c29..c7a53e79c66d 100644
+--- a/fs/btrfs/delayed-inode.c
++++ b/fs/btrfs/delayed-inode.c
+@@ -1367,8 +1367,8 @@ static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
+ return -ENOMEM;
+
+ async_work->delayed_root = delayed_root;
+- btrfs_init_work(&async_work->work, btrfs_delayed_meta_helper,
+- btrfs_async_run_delayed_root, NULL, NULL);
++ btrfs_init_work(&async_work->work, btrfs_async_run_delayed_root, NULL,
++ NULL);
+ async_work->nr = nr;
+
+ btrfs_queue_work(fs_info->delayed_workers, &async_work->work);
+diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
+index 3895c21853cc..bae334212ee2 100644
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -706,43 +706,31 @@ static void end_workqueue_bio(struct bio *bio)
+ struct btrfs_end_io_wq *end_io_wq = bio->bi_private;
+ struct btrfs_fs_info *fs_info;
+ struct btrfs_workqueue *wq;
+- btrfs_work_func_t func;
+
+ fs_info = end_io_wq->info;
+ end_io_wq->status = bio->bi_status;
+
+ if (bio_op(bio) == REQ_OP_WRITE) {
+- if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) {
++ if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA)
+ wq = fs_info->endio_meta_write_workers;
+- func = btrfs_endio_meta_write_helper;
+- } else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE) {
++ else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE)
+ wq = fs_info->endio_freespace_worker;
+- func = btrfs_freespace_write_helper;
+- } else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) {
++ else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56)
+ wq = fs_info->endio_raid56_workers;
+- func = btrfs_endio_raid56_helper;
+- } else {
++ else
+ wq = fs_info->endio_write_workers;
+- func = btrfs_endio_write_helper;
+- }
+ } else {
+- if (unlikely(end_io_wq->metadata ==
+- BTRFS_WQ_ENDIO_DIO_REPAIR)) {
++ if (unlikely(end_io_wq->metadata == BTRFS_WQ_ENDIO_DIO_REPAIR))
+ wq = fs_info->endio_repair_workers;
+- func = btrfs_endio_repair_helper;
+- } else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) {
++ else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56)
+ wq = fs_info->endio_raid56_workers;
+- func = btrfs_endio_raid56_helper;
+- } else if (end_io_wq->metadata) {
++ else if (end_io_wq->metadata)
+ wq = fs_info->endio_meta_workers;
+- func = btrfs_endio_meta_helper;
+- } else {
++ else
+ wq = fs_info->endio_workers;
+- func = btrfs_endio_helper;
+- }
+ }
+
+- btrfs_init_work(&end_io_wq->work, func, end_workqueue_fn, NULL, NULL);
++ btrfs_init_work(&end_io_wq->work, end_workqueue_fn, NULL, NULL);
+ btrfs_queue_work(wq, &end_io_wq->work);
+ }
+
+@@ -835,8 +823,8 @@ blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
+ async->mirror_num = mirror_num;
+ async->submit_bio_start = submit_bio_start;
+
+- btrfs_init_work(&async->work, btrfs_worker_helper, run_one_async_start,
+- run_one_async_done, run_one_async_free);
++ btrfs_init_work(&async->work, run_one_async_start, run_one_async_done,
++ run_one_async_free);
+
+ async->bio_offset = bio_offset;
+
+diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
+index bc6e7d15577a..dc14fc2e4206 100644
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -1268,10 +1268,8 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
+ async_chunk[i].write_flags = write_flags;
+ INIT_LIST_HEAD(&async_chunk[i].extents);
+
+- btrfs_init_work(&async_chunk[i].work,
+- btrfs_delalloc_helper,
+- async_cow_start, async_cow_submit,
+- async_cow_free);
++ btrfs_init_work(&async_chunk[i].work, async_cow_start,
++ async_cow_submit, async_cow_free);
+
+ nr_pages = DIV_ROUND_UP(cur_end - start, PAGE_SIZE);
+ atomic_add(nr_pages, &fs_info->async_delalloc_pages);
+@@ -2264,8 +2262,7 @@ int btrfs_writepage_cow_fixup(struct page *page, u64 start, u64 end)
+
+ SetPageChecked(page);
+ get_page(page);
+- btrfs_init_work(&fixup->work, btrfs_fixup_helper,
+- btrfs_writepage_fixup_worker, NULL, NULL);
++ btrfs_init_work(&fixup->work, btrfs_writepage_fixup_worker, NULL, NULL);
+ fixup->page = page;
+ btrfs_queue_work(fs_info->fixup_workers, &fixup->work);
+ return -EBUSY;
+@@ -3258,7 +3255,6 @@ void btrfs_writepage_endio_finish_ordered(struct page *page, u64 start,
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_ordered_extent *ordered_extent = NULL;
+ struct btrfs_workqueue *wq;
+- btrfs_work_func_t func;
+
+ trace_btrfs_writepage_end_io_hook(page, start, end, uptodate);
+
+@@ -3267,16 +3263,12 @@ void btrfs_writepage_endio_finish_ordered(struct page *page, u64 start,
+ end - start + 1, uptodate))
+ return;
+
+- if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
++ if (btrfs_is_free_space_inode(BTRFS_I(inode)))
+ wq = fs_info->endio_freespace_worker;
+- func = btrfs_freespace_write_helper;
+- } else {
++ else
+ wq = fs_info->endio_write_workers;
+- func = btrfs_endio_write_helper;
+- }
+
+- btrfs_init_work(&ordered_extent->work, func, finish_ordered_fn, NULL,
+- NULL);
++ btrfs_init_work(&ordered_extent->work, finish_ordered_fn, NULL, NULL);
+ btrfs_queue_work(wq, &ordered_extent->work);
+ }
+
+@@ -8213,18 +8205,14 @@ static void __endio_write_update_ordered(struct inode *inode,
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_ordered_extent *ordered = NULL;
+ struct btrfs_workqueue *wq;
+- btrfs_work_func_t func;
+ u64 ordered_offset = offset;
+ u64 ordered_bytes = bytes;
+ u64 last_offset;
+
+- if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
++ if (btrfs_is_free_space_inode(BTRFS_I(inode)))
+ wq = fs_info->endio_freespace_worker;
+- func = btrfs_freespace_write_helper;
+- } else {
++ else
+ wq = fs_info->endio_write_workers;
+- func = btrfs_endio_write_helper;
+- }
+
+ while (ordered_offset < offset + bytes) {
+ last_offset = ordered_offset;
+@@ -8232,9 +8220,8 @@ static void __endio_write_update_ordered(struct inode *inode,
+ &ordered_offset,
+ ordered_bytes,
+ uptodate)) {
+- btrfs_init_work(&ordered->work, func,
+- finish_ordered_fn,
+- NULL, NULL);
++ btrfs_init_work(&ordered->work, finish_ordered_fn, NULL,
++ NULL);
+ btrfs_queue_work(wq, &ordered->work);
+ }
+ /*
+@@ -10119,8 +10106,7 @@ static struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode
+ init_completion(&work->completion);
+ INIT_LIST_HEAD(&work->list);
+ work->inode = inode;
+- btrfs_init_work(&work->work, btrfs_flush_delalloc_helper,
+- btrfs_run_delalloc_work, NULL, NULL);
++ btrfs_init_work(&work->work, btrfs_run_delalloc_work, NULL, NULL);
+
+ return work;
+ }
+diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
+index 24b6c72b9a59..6240a5a1f2c0 100644
+--- a/fs/btrfs/ordered-data.c
++++ b/fs/btrfs/ordered-data.c
+@@ -547,7 +547,6 @@ u64 btrfs_wait_ordered_extents(struct btrfs_root *root, u64 nr,
+ spin_unlock(&root->ordered_extent_lock);
+
+ btrfs_init_work(&ordered->flush_work,
+- btrfs_flush_delalloc_helper,
+ btrfs_run_ordered_extent_work, NULL, NULL);
+ list_add_tail(&ordered->work_list, &works);
+ btrfs_queue_work(fs_info->flush_workers, &ordered->flush_work);
+diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
+index 3ad151655eb8..27a903aaf43b 100644
+--- a/fs/btrfs/qgroup.c
++++ b/fs/btrfs/qgroup.c
+@@ -3280,7 +3280,6 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
+ memset(&fs_info->qgroup_rescan_work, 0,
+ sizeof(fs_info->qgroup_rescan_work));
+ btrfs_init_work(&fs_info->qgroup_rescan_work,
+- btrfs_qgroup_rescan_helper,
+ btrfs_qgroup_rescan_worker, NULL, NULL);
+ return 0;
+ }
+diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
+index 57a2ac721985..8f47a85944eb 100644
+--- a/fs/btrfs/raid56.c
++++ b/fs/btrfs/raid56.c
+@@ -190,7 +190,7 @@ static void scrub_parity_work(struct btrfs_work *work);
+
+ static void start_async_work(struct btrfs_raid_bio *rbio, btrfs_func_t work_func)
+ {
+- btrfs_init_work(&rbio->work, btrfs_rmw_helper, work_func, NULL, NULL);
++ btrfs_init_work(&rbio->work, work_func, NULL, NULL);
+ btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work);
+ }
+
+@@ -1743,8 +1743,7 @@ static void btrfs_raid_unplug(struct blk_plug_cb *cb, bool from_schedule)
+ plug = container_of(cb, struct btrfs_plug_cb, cb);
+
+ if (from_schedule) {
+- btrfs_init_work(&plug->work, btrfs_rmw_helper,
+- unplug_work, NULL, NULL);
++ btrfs_init_work(&plug->work, unplug_work, NULL, NULL);
+ btrfs_queue_work(plug->info->rmw_workers,
+ &plug->work);
+ return;
+diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
+index dd4f9c2b7107..1feaeadc8cf5 100644
+--- a/fs/btrfs/reada.c
++++ b/fs/btrfs/reada.c
+@@ -819,8 +819,7 @@ static void reada_start_machine(struct btrfs_fs_info *fs_info)
+ /* FIXME we cannot handle this properly right now */
+ BUG();
+ }
+- btrfs_init_work(&rmw->work, btrfs_readahead_helper,
+- reada_start_machine_worker, NULL, NULL);
++ btrfs_init_work(&rmw->work, reada_start_machine_worker, NULL, NULL);
+ rmw->fs_info = fs_info;
+
+ btrfs_queue_work(fs_info->readahead_workers, &rmw->work);
+diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
+index a0770a6aee00..a7b043fd7a57 100644
+--- a/fs/btrfs/scrub.c
++++ b/fs/btrfs/scrub.c
+@@ -598,8 +598,8 @@ static noinline_for_stack struct scrub_ctx *scrub_setup_ctx(
+ sbio->index = i;
+ sbio->sctx = sctx;
+ sbio->page_count = 0;
+- btrfs_init_work(&sbio->work, btrfs_scrub_helper,
+- scrub_bio_end_io_worker, NULL, NULL);
++ btrfs_init_work(&sbio->work, scrub_bio_end_io_worker, NULL,
++ NULL);
+
+ if (i != SCRUB_BIOS_PER_SCTX - 1)
+ sctx->bios[i]->next_free = i + 1;
+@@ -1720,8 +1720,7 @@ static void scrub_wr_bio_end_io(struct bio *bio)
+ sbio->status = bio->bi_status;
+ sbio->bio = bio;
+
+- btrfs_init_work(&sbio->work, btrfs_scrubwrc_helper,
+- scrub_wr_bio_end_io_worker, NULL, NULL);
++ btrfs_init_work(&sbio->work, scrub_wr_bio_end_io_worker, NULL, NULL);
+ btrfs_queue_work(fs_info->scrub_wr_completion_workers, &sbio->work);
+ }
+
+@@ -2203,8 +2202,7 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock)
+ raid56_add_scrub_pages(rbio, spage->page, spage->logical);
+ }
+
+- btrfs_init_work(&sblock->work, btrfs_scrub_helper,
+- scrub_missing_raid56_worker, NULL, NULL);
++ btrfs_init_work(&sblock->work, scrub_missing_raid56_worker, NULL, NULL);
+ scrub_block_get(sblock);
+ scrub_pending_bio_inc(sctx);
+ raid56_submit_missing_rbio(rbio);
+@@ -2742,8 +2740,8 @@ static void scrub_parity_bio_endio(struct bio *bio)
+
+ bio_put(bio);
+
+- btrfs_init_work(&sparity->work, btrfs_scrubparity_helper,
+- scrub_parity_bio_endio_worker, NULL, NULL);
++ btrfs_init_work(&sparity->work, scrub_parity_bio_endio_worker, NULL,
++ NULL);
+ btrfs_queue_work(fs_info->scrub_parity_workers, &sparity->work);
+ }
+
+diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
+index e04409f85063..d8d7b1ee83ca 100644
+--- a/fs/btrfs/volumes.c
++++ b/fs/btrfs/volumes.c
+@@ -6676,8 +6676,7 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
+ else
+ generate_random_uuid(dev->uuid);
+
+- btrfs_init_work(&dev->work, btrfs_submit_helper,
+- pending_bios_fn, NULL, NULL);
++ btrfs_init_work(&dev->work, pending_bios_fn, NULL, NULL);
+
+ return dev;
+ }
+--
+2.20.1
+
--- /dev/null
+From b2ff5cd6ecab58eb1374bc235d8c19cd937011a6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 10 Jul 2019 12:28:16 -0700
+Subject: Btrfs: only associate the locked page with one async_chunk struct
+
+From: Chris Mason <clm@fb.com>
+
+[ Upstream commit 1d53c9e6723022b12e4a5ed4b141f67c834b7f6f ]
+
+The btrfs writepages function collects a large range of pages flagged
+for delayed allocation, and then sends them down through the COW code
+for processing. When compression is on, we allocate one async_chunk
+structure for every 512K, and then run those pages through the
+compression code for IO submission.
+
+writepages starts all of this off with a single page, locked by the
+original call to extent_write_cache_pages(), and it's important to keep
+track of this page because it has already been through
+clear_page_dirty_for_io().
+
+The btrfs async_chunk struct has a pointer to the locked_page, and when
+we're redirtying the page because compression had to fallback to
+uncompressed IO, we use page->index to decide if a given async_chunk
+struct really owns that page.
+
+But, this is racey. If a given delalloc range is broken up into two
+async_chunks (chunkA and chunkB), we can end up with something like
+this:
+
+ compress_file_range(chunkA)
+ submit_compress_extents(chunkA)
+ submit compressed bios(chunkA)
+ put_page(locked_page)
+
+ compress_file_range(chunkB)
+ ...
+
+Or:
+
+ async_cow_submit
+ submit_compressed_extents <--- falls back to buffered writeout
+ cow_file_range
+ extent_clear_unlock_delalloc
+ __process_pages_contig
+ put_page(locked_pages)
+
+ async_cow_submit
+
+The end result is that chunkA is completed and cleaned up before chunkB
+even starts processing. This means we can free locked_page() and reuse
+it elsewhere. If we get really lucky, it'll have the same page->index
+in its new home as it did before.
+
+While we're processing chunkB, we might decide we need to fall back to
+uncompressed IO, and so compress_file_range() will call
+__set_page_dirty_nobufers() on chunkB->locked_page.
+
+Without cgroups in use, this creates as a phantom dirty page, which
+isn't great but isn't the end of the world. What can happen, it can go
+through the fixup worker and the whole COW machinery again:
+
+in submit_compressed_extents():
+ while (async extents) {
+ ...
+ cow_file_range
+ if (!page_started ...)
+ extent_write_locked_range
+ else if (...)
+ unlock_page
+ continue;
+
+This hasn't been observed in practice but is still possible.
+
+With cgroups in use, we might crash in the accounting code because
+page->mapping->i_wb isn't set.
+
+ BUG: unable to handle kernel NULL pointer dereference at 00000000000000d0
+ IP: percpu_counter_add_batch+0x11/0x70
+ PGD 66534e067 P4D 66534e067 PUD 66534f067 PMD 0
+ Oops: 0000 [#1] SMP DEBUG_PAGEALLOC
+ CPU: 16 PID: 2172 Comm: rm Not tainted
+ RIP: 0010:percpu_counter_add_batch+0x11/0x70
+ RSP: 0018:ffffc9000a97bbe0 EFLAGS: 00010286
+ RAX: 0000000000000005 RBX: 0000000000000090 RCX: 0000000000026115
+ RDX: 0000000000000030 RSI: ffffffffffffffff RDI: 0000000000000090
+ RBP: 0000000000000000 R08: fffffffffffffff5 R09: 0000000000000000
+ R10: 00000000000260c0 R11: ffff881037fc26c0 R12: ffffffffffffffff
+ R13: ffff880fe4111548 R14: ffffc9000a97bc90 R15: 0000000000000001
+ FS: 00007f5503ced480(0000) GS:ffff880ff7200000(0000) knlGS:0000000000000000
+ CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ CR2: 00000000000000d0 CR3: 00000001e0459005 CR4: 0000000000360ee0
+ DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+ DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+ Call Trace:
+ account_page_cleaned+0x15b/0x1f0
+ __cancel_dirty_page+0x146/0x200
+ truncate_cleanup_page+0x92/0xb0
+ truncate_inode_pages_range+0x202/0x7d0
+ btrfs_evict_inode+0x92/0x5a0
+ evict+0xc1/0x190
+ do_unlinkat+0x176/0x280
+ do_syscall_64+0x63/0x1a0
+ entry_SYSCALL_64_after_hwframe+0x42/0xb7
+
+The fix here is to make asyc_chunk->locked_page NULL everywhere but the
+one async_chunk struct that's allowed to do things to the locked page.
+
+Link: https://lore.kernel.org/linux-btrfs/c2419d01-5c84-3fb4-189e-4db519d08796@suse.com/
+Fixes: 771ed689d2cd ("Btrfs: Optimize compressed writeback and reads")
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: Chris Mason <clm@fb.com>
+[ update changelog from mail thread discussion ]
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/extent_io.c | 2 +-
+ fs/btrfs/inode.c | 25 +++++++++++++++++++++----
+ 2 files changed, 22 insertions(+), 5 deletions(-)
+
+diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
+index be9dc78aa727..33c6b191ca59 100644
+--- a/fs/btrfs/extent_io.c
++++ b/fs/btrfs/extent_io.c
+@@ -1899,7 +1899,7 @@ static int __process_pages_contig(struct address_space *mapping,
+ if (page_ops & PAGE_SET_PRIVATE2)
+ SetPagePrivate2(pages[i]);
+
+- if (pages[i] == locked_page) {
++ if (locked_page && pages[i] == locked_page) {
+ put_page(pages[i]);
+ pages_locked++;
+ continue;
+diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
+index dc14fc2e4206..0b2758961b1c 100644
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -712,10 +712,12 @@ cleanup_and_bail_uncompressed:
+ * to our extent and set things up for the async work queue to run
+ * cow_file_range to do the normal delalloc dance.
+ */
+- if (page_offset(async_chunk->locked_page) >= start &&
+- page_offset(async_chunk->locked_page) <= end)
++ if (async_chunk->locked_page &&
++ (page_offset(async_chunk->locked_page) >= start &&
++ page_offset(async_chunk->locked_page)) <= end) {
+ __set_page_dirty_nobuffers(async_chunk->locked_page);
+ /* unlocked later on in the async handlers */
++ }
+
+ if (redirty)
+ extent_range_redirty_for_io(inode, start, end);
+@@ -795,7 +797,7 @@ retry:
+ async_extent->start +
+ async_extent->ram_size - 1,
+ WB_SYNC_ALL);
+- else if (ret)
++ else if (ret && async_chunk->locked_page)
+ unlock_page(async_chunk->locked_page);
+ kfree(async_extent);
+ cond_resched();
+@@ -1264,10 +1266,25 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
+ async_chunk[i].inode = inode;
+ async_chunk[i].start = start;
+ async_chunk[i].end = cur_end;
+- async_chunk[i].locked_page = locked_page;
+ async_chunk[i].write_flags = write_flags;
+ INIT_LIST_HEAD(&async_chunk[i].extents);
+
++ /*
++ * The locked_page comes all the way from writepage and its
++ * the original page we were actually given. As we spread
++ * this large delalloc region across multiple async_chunk
++ * structs, only the first struct needs a pointer to locked_page
++ *
++ * This way we don't need racey decisions about who is supposed
++ * to unlock it.
++ */
++ if (locked_page) {
++ async_chunk[i].locked_page = locked_page;
++ locked_page = NULL;
++ } else {
++ async_chunk[i].locked_page = NULL;
++ }
++
+ btrfs_init_work(&async_chunk[i].work, async_cow_start,
+ async_cow_submit, async_cow_free);
+
+--
+2.20.1
+
--- /dev/null
+From a2e6f02f652ac665c135fba479c66440e4c2336c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 4 Oct 2019 20:47:58 +0100
+Subject: drm/i915/execlists: Fix annotation for decoupling virtual request
+
+From: Chris Wilson <chris@chris-wilson.co.uk>
+
+[ Upstream commit 08ad9a3846fc72b047b110b36d162ffbcf298fa2 ]
+
+As we may signal a request and take the engine->active.lock within the
+signaler, the engine submission paths have to use a nested annotation on
+their requests -- but we guarantee that we can never submit on the same
+engine as the signaling fence.
+
+<4>[ 723.763281] WARNING: possible circular locking dependency detected
+<4>[ 723.763285] 5.3.0-g80fa0e042cdb-drmtip_379+ #1 Tainted: G U
+<4>[ 723.763288] ------------------------------------------------------
+<4>[ 723.763291] gem_exec_await/1388 is trying to acquire lock:
+<4>[ 723.763294] ffff93a7b53221d8 (&engine->active.lock){..-.}, at: execlists_submit_request+0x2b/0x1e0 [i915]
+<4>[ 723.763378]
+ but task is already holding lock:
+<4>[ 723.763381] ffff93a7c25f6d20 (&i915_request_get(rq)->submit/1){-.-.}, at: __i915_sw_fence_complete+0x1b2/0x250 [i915]
+<4>[ 723.763420]
+ which lock already depends on the new lock.
+
+<4>[ 723.763423]
+ the existing dependency chain (in reverse order) is:
+<4>[ 723.763427]
+ -> #2 (&i915_request_get(rq)->submit/1){-.-.}:
+<4>[ 723.763434] _raw_spin_lock_irqsave_nested+0x39/0x50
+<4>[ 723.763478] __i915_sw_fence_complete+0x1b2/0x250 [i915]
+<4>[ 723.763513] intel_engine_breadcrumbs_irq+0x3aa/0x5e0 [i915]
+<4>[ 723.763600] cs_irq_handler+0x49/0x50 [i915]
+<4>[ 723.763659] gen11_gt_irq_handler+0x17b/0x280 [i915]
+<4>[ 723.763690] gen11_irq_handler+0x54/0xf0 [i915]
+<4>[ 723.763695] __handle_irq_event_percpu+0x41/0x2d0
+<4>[ 723.763699] handle_irq_event_percpu+0x2b/0x70
+<4>[ 723.763702] handle_irq_event+0x2f/0x50
+<4>[ 723.763706] handle_edge_irq+0xee/0x1a0
+<4>[ 723.763709] do_IRQ+0x7e/0x160
+<4>[ 723.763712] ret_from_intr+0x0/0x1d
+<4>[ 723.763717] __slab_alloc.isra.28.constprop.33+0x4f/0x70
+<4>[ 723.763720] kmem_cache_alloc+0x28d/0x2f0
+<4>[ 723.763724] vm_area_dup+0x15/0x40
+<4>[ 723.763727] dup_mm+0x2dd/0x550
+<4>[ 723.763730] copy_process+0xf21/0x1ef0
+<4>[ 723.763734] _do_fork+0x71/0x670
+<4>[ 723.763737] __se_sys_clone+0x6e/0xa0
+<4>[ 723.763741] do_syscall_64+0x4f/0x210
+<4>[ 723.763744] entry_SYSCALL_64_after_hwframe+0x49/0xbe
+<4>[ 723.763747]
+ -> #1 (&(&rq->lock)->rlock#2){-.-.}:
+<4>[ 723.763752] _raw_spin_lock+0x2a/0x40
+<4>[ 723.763789] __unwind_incomplete_requests+0x3eb/0x450 [i915]
+<4>[ 723.763825] __execlists_submission_tasklet+0x9ec/0x1d60 [i915]
+<4>[ 723.763864] execlists_submission_tasklet+0x34/0x50 [i915]
+<4>[ 723.763874] tasklet_action_common.isra.5+0x47/0xb0
+<4>[ 723.763878] __do_softirq+0xd8/0x4ae
+<4>[ 723.763881] irq_exit+0xa9/0xc0
+<4>[ 723.763883] smp_apic_timer_interrupt+0xb7/0x280
+<4>[ 723.763887] apic_timer_interrupt+0xf/0x20
+<4>[ 723.763892] cpuidle_enter_state+0xae/0x450
+<4>[ 723.763895] cpuidle_enter+0x24/0x40
+<4>[ 723.763899] do_idle+0x1e7/0x250
+<4>[ 723.763902] cpu_startup_entry+0x14/0x20
+<4>[ 723.763905] start_secondary+0x15f/0x1b0
+<4>[ 723.763908] secondary_startup_64+0xa4/0xb0
+<4>[ 723.763911]
+ -> #0 (&engine->active.lock){..-.}:
+<4>[ 723.763916] __lock_acquire+0x15d8/0x1ea0
+<4>[ 723.763919] lock_acquire+0xa6/0x1c0
+<4>[ 723.763922] _raw_spin_lock_irqsave+0x33/0x50
+<4>[ 723.763956] execlists_submit_request+0x2b/0x1e0 [i915]
+<4>[ 723.764002] submit_notify+0xa8/0x13c [i915]
+<4>[ 723.764035] __i915_sw_fence_complete+0x81/0x250 [i915]
+<4>[ 723.764054] i915_sw_fence_wake+0x51/0x64 [i915]
+<4>[ 723.764054] __i915_sw_fence_complete+0x1ee/0x250 [i915]
+<4>[ 723.764054] dma_i915_sw_fence_wake_timer+0x14/0x20 [i915]
+<4>[ 723.764054] dma_fence_signal_locked+0x9e/0x1c0
+<4>[ 723.764054] dma_fence_signal+0x1f/0x40
+<4>[ 723.764054] vgem_fence_signal_ioctl+0x67/0xc0 [vgem]
+<4>[ 723.764054] drm_ioctl_kernel+0x83/0xf0
+<4>[ 723.764054] drm_ioctl+0x2f3/0x3b0
+<4>[ 723.764054] do_vfs_ioctl+0xa0/0x6f0
+<4>[ 723.764054] ksys_ioctl+0x35/0x60
+<4>[ 723.764054] __x64_sys_ioctl+0x11/0x20
+<4>[ 723.764054] do_syscall_64+0x4f/0x210
+<4>[ 723.764054] entry_SYSCALL_64_after_hwframe+0x49/0xbe
+<4>[ 723.764054]
+ other info that might help us debug this:
+
+<4>[ 723.764054] Chain exists of:
+ &engine->active.lock --> &(&rq->lock)->rlock#2 --> &i915_request_get(rq)->submit/1
+
+<4>[ 723.764054] Possible unsafe locking scenario:
+
+<4>[ 723.764054] CPU0 CPU1
+<4>[ 723.764054] ---- ----
+<4>[ 723.764054] lock(&i915_request_get(rq)->submit/1);
+<4>[ 723.764054] lock(&(&rq->lock)->rlock#2);
+<4>[ 723.764054] lock(&i915_request_get(rq)->submit/1);
+<4>[ 723.764054] lock(&engine->active.lock);
+<4>[ 723.764054]
+ *** DEADLOCK ***
+
+Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111862
+Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20191004194758.19679-1-chris@chris-wilson.co.uk
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/i915/gt/intel_lrc.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
+index 06a506c29463..d564bfcab6a3 100644
+--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
++++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
+@@ -525,7 +525,8 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
+ */
+ if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
+ &rq->fence.flags)) {
+- spin_lock(&rq->lock);
++ spin_lock_nested(&rq->lock,
++ SINGLE_DEPTH_NESTING);
+ i915_request_cancel_breadcrumb(rq);
+ spin_unlock(&rq->lock);
+ }
+--
+2.20.1
+
--- /dev/null
+From b6d34c969bf5712aa744486463eed1c7889ae67f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 10 Dec 2019 10:09:45 +0100
+Subject: efi: Don't attempt to map RCI2 config table if it doesn't exist
+
+From: Ard Biesheuvel <ardb@kernel.org>
+
+[ Upstream commit a470552ee8965da0fe6fd4df0aa39c4cda652c7c ]
+
+Commit:
+
+ 1c5fecb61255aa12 ("efi: Export Runtime Configuration Interface table to sysfs")
+
+... added support for a Dell specific UEFI configuration table, but
+failed to take into account that mapping the table should not be
+attempted unless the table actually exists. If it doesn't exist,
+the code usually fails silently unless pr_debug() prints are
+enabled. However, on 32-bit PAE x86, the splat below is produced due
+to the attempt to map the placeholder value EFI_INVALID_TABLE_ADDR
+which we use for non-existing UEFI configuration tables, and which
+equals ULONG_MAX.
+
+ memremap attempted on mixed range 0x00000000ffffffff size: 0x1e
+ WARNING: CPU: 1 PID: 1 at kernel/iomem.c:81 memremap+0x1a3/0x1c0
+ Modules linked in:
+ CPU: 1 PID: 1 Comm: swapper/0 Not tainted 5.4.2-smp-mine #1
+ Hardware name: Hewlett-Packard HP Z400 Workstation/0B4Ch, BIOS 786G3 v03.61 03/05/2018
+ EIP: memremap+0x1a3/0x1c0
+ ...
+ Call Trace:
+ ? map_properties+0x473/0x473
+ ? efi_rci2_sysfs_init+0x2c/0x154
+ ? map_properties+0x473/0x473
+ ? do_one_initcall+0x49/0x1d4
+ ? parse_args+0x1e8/0x2a0
+ ? do_early_param+0x7a/0x7a
+ ? kernel_init_freeable+0x139/0x1c2
+ ? rest_init+0x8e/0x8e
+ ? kernel_init+0xd/0xf2
+ ? ret_from_fork+0x2e/0x38
+
+Fix this by checking whether the table exists before attempting to map it.
+
+Reported-by: Richard Narron <comet.berkeley@gmail.com>
+Tested-by: Richard Narron <comet.berkeley@gmail.com>
+Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
+Cc: linux-efi@vger.kernel.org
+Fixes: 1c5fecb61255aa12 ("efi: Export Runtime Configuration Interface table to sysfs")
+Link: https://lkml.kernel.org/r/20191210090945.11501-2-ardb@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/firmware/efi/rci2-table.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/drivers/firmware/efi/rci2-table.c b/drivers/firmware/efi/rci2-table.c
+index 76b0c354a027..de1a9a1f9f14 100644
+--- a/drivers/firmware/efi/rci2-table.c
++++ b/drivers/firmware/efi/rci2-table.c
+@@ -81,6 +81,9 @@ static int __init efi_rci2_sysfs_init(void)
+ struct kobject *tables_kobj;
+ int ret = -ENOMEM;
+
++ if (rci2_table_phys == EFI_INVALID_TABLE_ADDR)
++ return 0;
++
+ rci2_base = memremap(rci2_table_phys,
+ sizeof(struct rci2_table_global_hdr),
+ MEMREMAP_WB);
+--
+2.20.1
+
--- /dev/null
+From 529aff430f3aba5d90ad2d6995674fdce1e041ef Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 22 Dec 2019 11:25:27 +0000
+Subject: hsr: avoid debugfs warning message when module is remove
+
+From: Taehee Yoo <ap420073@gmail.com>
+
+[ Upstream commit 84bb59d773853bc2dda2ac1ef8474c40eb33a3c6 ]
+
+When hsr module is being removed, debugfs_remove() is called to remove
+both debugfs directory and file.
+
+When module is being removed, module state is changed to
+MODULE_STATE_GOING then exit() is called.
+At this moment, module couldn't be held so try_module_get()
+will be failed.
+
+debugfs's open() callback tries to hold the module if .owner is existing.
+If it fails, warning message is printed.
+
+CPU0 CPU1
+delete_module()
+ try_stop_module()
+ hsr_exit() open() <-- WARNING
+ debugfs_remove()
+
+In order to avoid the warning message, this patch makes hsr module does
+not set .owner. Unsetting .owner is safe because these are protected by
+inode_lock().
+
+Test commands:
+ #SHELL1
+ ip link add dummy0 type dummy
+ ip link add dummy1 type dummy
+ while :
+ do
+ ip link add hsr0 type hsr slave1 dummy0 slave2 dummy1
+ modprobe -rv hsr
+ done
+
+ #SHELL2
+ while :
+ do
+ cat /sys/kernel/debug/hsr0/node_table
+ done
+
+Splat looks like:
+[ 101.223783][ T1271] ------------[ cut here ]------------
+[ 101.230309][ T1271] debugfs file owner did not clean up at exit: node_table
+[ 101.230380][ T1271] WARNING: CPU: 3 PID: 1271 at fs/debugfs/file.c:309 full_proxy_open+0x10f/0x650
+[ 101.233153][ T1271] Modules linked in: hsr(-) dummy veth openvswitch nsh nf_conncount nf_nat nf_conntrack nf_d]
+[ 101.237112][ T1271] CPU: 3 PID: 1271 Comm: cat Tainted: G W 5.5.0-rc1+ #204
+[ 101.238270][ T1271] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
+[ 101.240379][ T1271] RIP: 0010:full_proxy_open+0x10f/0x650
+[ 101.241166][ T1271] Code: 48 c1 ea 03 80 3c 02 00 0f 85 c1 04 00 00 49 8b 3c 24 e8 04 86 7e ff 84 c0 75 2d 4c 8
+[ 101.251985][ T1271] RSP: 0018:ffff8880ca22fa38 EFLAGS: 00010286
+[ 101.273355][ T1271] RAX: dffffc0000000008 RBX: ffff8880cc6e6200 RCX: 0000000000000000
+[ 101.274466][ T1271] RDX: 0000000000000000 RSI: 0000000000000006 RDI: ffff8880c4dd5c14
+[ 101.275581][ T1271] RBP: 0000000000000000 R08: fffffbfff2922f5d R09: 0000000000000000
+[ 101.276733][ T1271] R10: 0000000000000001 R11: 0000000000000000 R12: ffffffffc0551bc0
+[ 101.277853][ T1271] R13: ffff8880c4059a48 R14: ffff8880be50a5e0 R15: ffffffff941adaa0
+[ 101.278956][ T1271] FS: 00007f8871cda540(0000) GS:ffff8880da800000(0000) knlGS:0000000000000000
+[ 101.280216][ T1271] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[ 101.282832][ T1271] CR2: 00007f88717cfd10 CR3: 00000000b9440005 CR4: 00000000000606e0
+[ 101.283974][ T1271] Call Trace:
+[ 101.285328][ T1271] do_dentry_open+0x63c/0xf50
+[ 101.286077][ T1271] ? open_proxy_open+0x270/0x270
+[ 101.288271][ T1271] ? __x64_sys_fchdir+0x180/0x180
+[ 101.288987][ T1271] ? inode_permission+0x65/0x390
+[ 101.289682][ T1271] path_openat+0x701/0x2810
+[ 101.290294][ T1271] ? path_lookupat+0x880/0x880
+[ 101.290957][ T1271] ? check_chain_key+0x236/0x5d0
+[ 101.291676][ T1271] ? __lock_acquire+0xdfe/0x3de0
+[ 101.292358][ T1271] ? sched_clock+0x5/0x10
+[ 101.292962][ T1271] ? sched_clock_cpu+0x18/0x170
+[ 101.293644][ T1271] ? find_held_lock+0x39/0x1d0
+[ 101.305616][ T1271] do_filp_open+0x17a/0x270
+[ 101.306061][ T1271] ? may_open_dev+0xc0/0xc0
+[ ... ]
+
+Fixes: fc4ecaeebd26 ("net: hsr: add debugfs support for display node list")
+Signed-off-by: Taehee Yoo <ap420073@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/hsr/hsr_debugfs.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/net/hsr/hsr_debugfs.c b/net/hsr/hsr_debugfs.c
+index 94447974a3c0..6135706f03d5 100644
+--- a/net/hsr/hsr_debugfs.c
++++ b/net/hsr/hsr_debugfs.c
+@@ -64,7 +64,6 @@ hsr_node_table_open(struct inode *inode, struct file *filp)
+ }
+
+ static const struct file_operations hsr_fops = {
+- .owner = THIS_MODULE,
+ .open = hsr_node_table_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+--
+2.20.1
+
--- /dev/null
+From b5bd241db1f7df2a9b3f1f57d1ea66770f0bbf46 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 22 Dec 2019 11:26:54 +0000
+Subject: hsr: fix a race condition in node list insertion and deletion
+
+From: Taehee Yoo <ap420073@gmail.com>
+
+[ Upstream commit 92a35678ec075100ce666a2fb6969151affb0e5d ]
+
+hsr nodes are protected by RCU and there is no write side lock.
+But node insertions and deletions could be being operated concurrently.
+So write side locking is needed.
+
+Test commands:
+ ip netns add nst
+ ip link add veth0 type veth peer name veth1
+ ip link add veth2 type veth peer name veth3
+ ip link set veth1 netns nst
+ ip link set veth3 netns nst
+ ip link set veth0 up
+ ip link set veth2 up
+ ip link add hsr0 type hsr slave1 veth0 slave2 veth2
+ ip a a 192.168.100.1/24 dev hsr0
+ ip link set hsr0 up
+ ip netns exec nst ip link set veth1 up
+ ip netns exec nst ip link set veth3 up
+ ip netns exec nst ip link add hsr1 type hsr slave1 veth1 slave2 veth3
+ ip netns exec nst ip a a 192.168.100.2/24 dev hsr1
+ ip netns exec nst ip link set hsr1 up
+
+ for i in {0..9}
+ do
+ for j in {0..9}
+ do
+ for k in {0..9}
+ do
+ for l in {0..9}
+ do
+ arping 192.168.100.2 -I hsr0 -s 00:01:3$i:4$j:5$k:6$l -c1 &
+ done
+ done
+ done
+ done
+
+Splat looks like:
+[ 236.066091][ T3286] list_add corruption. next->prev should be prev (ffff8880a5940300), but was ffff8880a5940d0.
+[ 236.069617][ T3286] ------------[ cut here ]------------
+[ 236.070545][ T3286] kernel BUG at lib/list_debug.c:25!
+[ 236.071391][ T3286] invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI
+[ 236.072343][ T3286] CPU: 0 PID: 3286 Comm: arping Tainted: G W 5.5.0-rc1+ #209
+[ 236.073463][ T3286] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
+[ 236.074695][ T3286] RIP: 0010:__list_add_valid+0x74/0xd0
+[ 236.075499][ T3286] Code: 48 39 da 75 27 48 39 f5 74 36 48 39 dd 74 31 48 83 c4 08 b8 01 00 00 00 5b 5d c3 48 b
+[ 236.078277][ T3286] RSP: 0018:ffff8880aaa97648 EFLAGS: 00010286
+[ 236.086991][ T3286] RAX: 0000000000000075 RBX: ffff8880d4624c20 RCX: 0000000000000000
+[ 236.088000][ T3286] RDX: 0000000000000075 RSI: 0000000000000008 RDI: ffffed1015552ebf
+[ 236.098897][ T3286] RBP: ffff88809b53d200 R08: ffffed101b3c04f9 R09: ffffed101b3c04f9
+[ 236.099960][ T3286] R10: 00000000308769a1 R11: ffffed101b3c04f8 R12: ffff8880d4624c28
+[ 236.100974][ T3286] R13: ffff8880d4624c20 R14: 0000000040310100 R15: ffff8880ce17ee02
+[ 236.138967][ T3286] FS: 00007f23479fa680(0000) GS:ffff8880d9c00000(0000) knlGS:0000000000000000
+[ 236.144852][ T3286] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[ 236.145720][ T3286] CR2: 00007f4a14bab210 CR3: 00000000a61c6001 CR4: 00000000000606f0
+[ 236.146776][ T3286] Call Trace:
+[ 236.147222][ T3286] hsr_add_node+0x314/0x490 [hsr]
+[ 236.153633][ T3286] hsr_forward_skb+0x2b6/0x1bc0 [hsr]
+[ 236.154362][ T3286] ? rcu_read_lock_sched_held+0x90/0xc0
+[ 236.155091][ T3286] ? rcu_read_lock_bh_held+0xa0/0xa0
+[ 236.156607][ T3286] hsr_dev_xmit+0x70/0xd0 [hsr]
+[ 236.157254][ T3286] dev_hard_start_xmit+0x160/0x740
+[ 236.157941][ T3286] __dev_queue_xmit+0x1961/0x2e10
+[ 236.158565][ T3286] ? netdev_core_pick_tx+0x2e0/0x2e0
+[ ... ]
+
+Reported-by: syzbot+3924327f9ad5f4d2b343@syzkaller.appspotmail.com
+Fixes: f421436a591d ("net/hsr: Add support for the High-availability Seamless Redundancy protocol (HSRv0)")
+Signed-off-by: Taehee Yoo <ap420073@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/hsr/hsr_device.c | 7 ++--
+ net/hsr/hsr_framereg.c | 73 ++++++++++++++++++++++++++----------------
+ net/hsr/hsr_framereg.h | 6 ++--
+ net/hsr/hsr_main.c | 2 +-
+ net/hsr/hsr_main.h | 5 +--
+ 5 files changed, 56 insertions(+), 37 deletions(-)
+
+diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
+index e73549075a03..62c03f0d0079 100644
+--- a/net/hsr/hsr_device.c
++++ b/net/hsr/hsr_device.c
+@@ -368,7 +368,7 @@ static void hsr_dev_destroy(struct net_device *hsr_dev)
+ del_timer_sync(&hsr->prune_timer);
+ del_timer_sync(&hsr->announce_timer);
+
+- hsr_del_self_node(&hsr->self_node_db);
++ hsr_del_self_node(hsr);
+ hsr_del_nodes(&hsr->node_db);
+ }
+
+@@ -440,11 +440,12 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
+ INIT_LIST_HEAD(&hsr->ports);
+ INIT_LIST_HEAD(&hsr->node_db);
+ INIT_LIST_HEAD(&hsr->self_node_db);
++ spin_lock_init(&hsr->list_lock);
+
+ ether_addr_copy(hsr_dev->dev_addr, slave[0]->dev_addr);
+
+ /* Make sure we recognize frames from ourselves in hsr_rcv() */
+- res = hsr_create_self_node(&hsr->self_node_db, hsr_dev->dev_addr,
++ res = hsr_create_self_node(hsr, hsr_dev->dev_addr,
+ slave[1]->dev_addr);
+ if (res < 0)
+ return res;
+@@ -502,7 +503,7 @@ err_unregister:
+ list_for_each_entry_safe(port, tmp, &hsr->ports, port_list)
+ hsr_del_port(port);
+ err_add_master:
+- hsr_del_self_node(&hsr->self_node_db);
++ hsr_del_self_node(hsr);
+
+ return res;
+ }
+diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
+index 292be446007b..27dc65d7de67 100644
+--- a/net/hsr/hsr_framereg.c
++++ b/net/hsr/hsr_framereg.c
+@@ -75,10 +75,11 @@ static struct hsr_node *find_node_by_addr_A(struct list_head *node_db,
+ /* Helper for device init; the self_node_db is used in hsr_rcv() to recognize
+ * frames from self that's been looped over the HSR ring.
+ */
+-int hsr_create_self_node(struct list_head *self_node_db,
++int hsr_create_self_node(struct hsr_priv *hsr,
+ unsigned char addr_a[ETH_ALEN],
+ unsigned char addr_b[ETH_ALEN])
+ {
++ struct list_head *self_node_db = &hsr->self_node_db;
+ struct hsr_node *node, *oldnode;
+
+ node = kmalloc(sizeof(*node), GFP_KERNEL);
+@@ -88,33 +89,33 @@ int hsr_create_self_node(struct list_head *self_node_db,
+ ether_addr_copy(node->macaddress_A, addr_a);
+ ether_addr_copy(node->macaddress_B, addr_b);
+
+- rcu_read_lock();
++ spin_lock_bh(&hsr->list_lock);
+ oldnode = list_first_or_null_rcu(self_node_db,
+ struct hsr_node, mac_list);
+ if (oldnode) {
+ list_replace_rcu(&oldnode->mac_list, &node->mac_list);
+- rcu_read_unlock();
+- synchronize_rcu();
+- kfree(oldnode);
++ spin_unlock_bh(&hsr->list_lock);
++ kfree_rcu(oldnode, rcu_head);
+ } else {
+- rcu_read_unlock();
+ list_add_tail_rcu(&node->mac_list, self_node_db);
++ spin_unlock_bh(&hsr->list_lock);
+ }
+
+ return 0;
+ }
+
+-void hsr_del_self_node(struct list_head *self_node_db)
++void hsr_del_self_node(struct hsr_priv *hsr)
+ {
++ struct list_head *self_node_db = &hsr->self_node_db;
+ struct hsr_node *node;
+
+- rcu_read_lock();
++ spin_lock_bh(&hsr->list_lock);
+ node = list_first_or_null_rcu(self_node_db, struct hsr_node, mac_list);
+- rcu_read_unlock();
+ if (node) {
+ list_del_rcu(&node->mac_list);
+- kfree(node);
++ kfree_rcu(node, rcu_head);
+ }
++ spin_unlock_bh(&hsr->list_lock);
+ }
+
+ void hsr_del_nodes(struct list_head *node_db)
+@@ -130,30 +131,43 @@ void hsr_del_nodes(struct list_head *node_db)
+ * seq_out is used to initialize filtering of outgoing duplicate frames
+ * originating from the newly added node.
+ */
+-struct hsr_node *hsr_add_node(struct list_head *node_db, unsigned char addr[],
+- u16 seq_out)
++static struct hsr_node *hsr_add_node(struct hsr_priv *hsr,
++ struct list_head *node_db,
++ unsigned char addr[],
++ u16 seq_out)
+ {
+- struct hsr_node *node;
++ struct hsr_node *new_node, *node;
+ unsigned long now;
+ int i;
+
+- node = kzalloc(sizeof(*node), GFP_ATOMIC);
+- if (!node)
++ new_node = kzalloc(sizeof(*new_node), GFP_ATOMIC);
++ if (!new_node)
+ return NULL;
+
+- ether_addr_copy(node->macaddress_A, addr);
++ ether_addr_copy(new_node->macaddress_A, addr);
+
+ /* We are only interested in time diffs here, so use current jiffies
+ * as initialization. (0 could trigger an spurious ring error warning).
+ */
+ now = jiffies;
+ for (i = 0; i < HSR_PT_PORTS; i++)
+- node->time_in[i] = now;
++ new_node->time_in[i] = now;
+ for (i = 0; i < HSR_PT_PORTS; i++)
+- node->seq_out[i] = seq_out;
+-
+- list_add_tail_rcu(&node->mac_list, node_db);
++ new_node->seq_out[i] = seq_out;
+
++ spin_lock_bh(&hsr->list_lock);
++ list_for_each_entry_rcu(node, node_db, mac_list) {
++ if (ether_addr_equal(node->macaddress_A, addr))
++ goto out;
++ if (ether_addr_equal(node->macaddress_B, addr))
++ goto out;
++ }
++ list_add_tail_rcu(&new_node->mac_list, node_db);
++ spin_unlock_bh(&hsr->list_lock);
++ return new_node;
++out:
++ spin_unlock_bh(&hsr->list_lock);
++ kfree(new_node);
+ return node;
+ }
+
+@@ -163,6 +177,7 @@ struct hsr_node *hsr_get_node(struct hsr_port *port, struct sk_buff *skb,
+ bool is_sup)
+ {
+ struct list_head *node_db = &port->hsr->node_db;
++ struct hsr_priv *hsr = port->hsr;
+ struct hsr_node *node;
+ struct ethhdr *ethhdr;
+ u16 seq_out;
+@@ -196,7 +211,7 @@ struct hsr_node *hsr_get_node(struct hsr_port *port, struct sk_buff *skb,
+ seq_out = HSR_SEQNR_START;
+ }
+
+- return hsr_add_node(node_db, ethhdr->h_source, seq_out);
++ return hsr_add_node(hsr, node_db, ethhdr->h_source, seq_out);
+ }
+
+ /* Use the Supervision frame's info about an eventual macaddress_B for merging
+@@ -206,10 +221,11 @@ struct hsr_node *hsr_get_node(struct hsr_port *port, struct sk_buff *skb,
+ void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr,
+ struct hsr_port *port_rcv)
+ {
+- struct ethhdr *ethhdr;
+- struct hsr_node *node_real;
++ struct hsr_priv *hsr = port_rcv->hsr;
+ struct hsr_sup_payload *hsr_sp;
++ struct hsr_node *node_real;
+ struct list_head *node_db;
++ struct ethhdr *ethhdr;
+ int i;
+
+ ethhdr = (struct ethhdr *)skb_mac_header(skb);
+@@ -231,7 +247,7 @@ void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr,
+ node_real = find_node_by_addr_A(node_db, hsr_sp->macaddress_A);
+ if (!node_real)
+ /* No frame received from AddrA of this node yet */
+- node_real = hsr_add_node(node_db, hsr_sp->macaddress_A,
++ node_real = hsr_add_node(hsr, node_db, hsr_sp->macaddress_A,
+ HSR_SEQNR_START - 1);
+ if (!node_real)
+ goto done; /* No mem */
+@@ -252,7 +268,9 @@ void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr,
+ }
+ node_real->addr_B_port = port_rcv->type;
+
++ spin_lock_bh(&hsr->list_lock);
+ list_del_rcu(&node_curr->mac_list);
++ spin_unlock_bh(&hsr->list_lock);
+ kfree_rcu(node_curr, rcu_head);
+
+ done:
+@@ -368,12 +386,13 @@ void hsr_prune_nodes(struct timer_list *t)
+ {
+ struct hsr_priv *hsr = from_timer(hsr, t, prune_timer);
+ struct hsr_node *node;
++ struct hsr_node *tmp;
+ struct hsr_port *port;
+ unsigned long timestamp;
+ unsigned long time_a, time_b;
+
+- rcu_read_lock();
+- list_for_each_entry_rcu(node, &hsr->node_db, mac_list) {
++ spin_lock_bh(&hsr->list_lock);
++ list_for_each_entry_safe(node, tmp, &hsr->node_db, mac_list) {
+ /* Don't prune own node. Neither time_in[HSR_PT_SLAVE_A]
+ * nor time_in[HSR_PT_SLAVE_B], will ever be updated for
+ * the master port. Thus the master node will be repeatedly
+@@ -421,7 +440,7 @@ void hsr_prune_nodes(struct timer_list *t)
+ kfree_rcu(node, rcu_head);
+ }
+ }
+- rcu_read_unlock();
++ spin_unlock_bh(&hsr->list_lock);
+
+ /* Restart timer */
+ mod_timer(&hsr->prune_timer,
+diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h
+index 89a3ce38151d..0f0fa12b4329 100644
+--- a/net/hsr/hsr_framereg.h
++++ b/net/hsr/hsr_framereg.h
+@@ -12,10 +12,8 @@
+
+ struct hsr_node;
+
+-void hsr_del_self_node(struct list_head *self_node_db);
++void hsr_del_self_node(struct hsr_priv *hsr);
+ void hsr_del_nodes(struct list_head *node_db);
+-struct hsr_node *hsr_add_node(struct list_head *node_db, unsigned char addr[],
+- u16 seq_out);
+ struct hsr_node *hsr_get_node(struct hsr_port *port, struct sk_buff *skb,
+ bool is_sup);
+ void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr,
+@@ -33,7 +31,7 @@ int hsr_register_frame_out(struct hsr_port *port, struct hsr_node *node,
+
+ void hsr_prune_nodes(struct timer_list *t);
+
+-int hsr_create_self_node(struct list_head *self_node_db,
++int hsr_create_self_node(struct hsr_priv *hsr,
+ unsigned char addr_a[ETH_ALEN],
+ unsigned char addr_b[ETH_ALEN]);
+
+diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c
+index b9988a662ee1..6deb8fa8d5c8 100644
+--- a/net/hsr/hsr_main.c
++++ b/net/hsr/hsr_main.c
+@@ -64,7 +64,7 @@ static int hsr_netdev_notify(struct notifier_block *nb, unsigned long event,
+
+ /* Make sure we recognize frames from ourselves in hsr_rcv() */
+ port = hsr_port_get_hsr(hsr, HSR_PT_SLAVE_B);
+- res = hsr_create_self_node(&hsr->self_node_db,
++ res = hsr_create_self_node(hsr,
+ master->dev->dev_addr,
+ port ?
+ port->dev->dev_addr :
+diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h
+index acab9c353a49..9ec38e33b8b1 100644
+--- a/net/hsr/hsr_main.h
++++ b/net/hsr/hsr_main.h
+@@ -160,8 +160,9 @@ struct hsr_priv {
+ int announce_count;
+ u16 sequence_nr;
+ u16 sup_sequence_nr; /* For HSRv1 separate seq_nr for supervision */
+- u8 prot_version; /* Indicate if HSRv0 or HSRv1. */
+- spinlock_t seqnr_lock; /* locking for sequence_nr */
++ u8 prot_version; /* Indicate if HSRv0 or HSRv1. */
++ spinlock_t seqnr_lock; /* locking for sequence_nr */
++ spinlock_t list_lock; /* locking for node list */
+ unsigned char sup_multicast_addr[ETH_ALEN];
+ #ifdef CONFIG_DEBUG_FS
+ struct dentry *node_tbl_root;
+--
+2.20.1
+
--- /dev/null
+From 938841d9f18e2125949badb44d0ee7ccfddabec9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 22 Dec 2019 11:26:15 +0000
+Subject: hsr: fix error handling routine in hsr_dev_finalize()
+
+From: Taehee Yoo <ap420073@gmail.com>
+
+[ Upstream commit 1d19e2d53e8ed9e4c98fc95e0067492cda7288b0 ]
+
+hsr_dev_finalize() is called to create new hsr interface.
+There are some wrong error handling codes.
+
+1. wrong checking return value of debugfs_create_{dir/file}.
+These function doesn't return NULL. If error occurs in there,
+it returns error pointer.
+So, it should check error pointer instead of NULL.
+
+2. It doesn't unregister interface if it fails to setup hsr interface.
+If it fails to initialize hsr interface after register_netdevice(),
+it should call unregister_netdevice().
+
+3. Ignore failure of creation of debugfs
+If creating of debugfs dir and file is failed, creating hsr interface
+will be failed. But debugfs doesn't affect actual logic of hsr module.
+So, ignoring this is more correct and this behavior is more general.
+
+Fixes: c5a759117210 ("net/hsr: Use list_head (and rcu) instead of array for slave devices.")
+Signed-off-by: Taehee Yoo <ap420073@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/hsr/hsr_debugfs.c | 15 +++++++--------
+ net/hsr/hsr_device.c | 19 ++++++++++---------
+ net/hsr/hsr_main.h | 11 ++++-------
+ 3 files changed, 21 insertions(+), 24 deletions(-)
+
+diff --git a/net/hsr/hsr_debugfs.c b/net/hsr/hsr_debugfs.c
+index 6135706f03d5..6618a9d8e58e 100644
+--- a/net/hsr/hsr_debugfs.c
++++ b/net/hsr/hsr_debugfs.c
+@@ -77,15 +77,14 @@ static const struct file_operations hsr_fops = {
+ * When debugfs is configured this routine sets up the node_table file per
+ * hsr device for dumping the node_table entries
+ */
+-int hsr_debugfs_init(struct hsr_priv *priv, struct net_device *hsr_dev)
++void hsr_debugfs_init(struct hsr_priv *priv, struct net_device *hsr_dev)
+ {
+- int rc = -1;
+ struct dentry *de = NULL;
+
+ de = debugfs_create_dir(hsr_dev->name, NULL);
+- if (!de) {
++ if (IS_ERR(de)) {
+ pr_err("Cannot create hsr debugfs root\n");
+- return rc;
++ return;
+ }
+
+ priv->node_tbl_root = de;
+@@ -93,13 +92,13 @@ int hsr_debugfs_init(struct hsr_priv *priv, struct net_device *hsr_dev)
+ de = debugfs_create_file("node_table", S_IFREG | 0444,
+ priv->node_tbl_root, priv,
+ &hsr_fops);
+- if (!de) {
++ if (IS_ERR(de)) {
+ pr_err("Cannot create hsr node_table directory\n");
+- return rc;
++ debugfs_remove(priv->node_tbl_root);
++ priv->node_tbl_root = NULL;
++ return;
+ }
+ priv->node_tbl_file = de;
+-
+- return 0;
+ }
+
+ /* hsr_debugfs_term - Tear down debugfs intrastructure
+diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
+index b01e1bae4ddc..e73549075a03 100644
+--- a/net/hsr/hsr_device.c
++++ b/net/hsr/hsr_device.c
+@@ -477,30 +477,31 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
+
+ res = hsr_add_port(hsr, hsr_dev, HSR_PT_MASTER);
+ if (res)
+- goto err_add_port;
++ goto err_add_master;
+
+ res = register_netdevice(hsr_dev);
+ if (res)
+- goto fail;
++ goto err_unregister;
+
+ res = hsr_add_port(hsr, slave[0], HSR_PT_SLAVE_A);
+ if (res)
+- goto fail;
++ goto err_add_slaves;
++
+ res = hsr_add_port(hsr, slave[1], HSR_PT_SLAVE_B);
+ if (res)
+- goto fail;
++ goto err_add_slaves;
+
++ hsr_debugfs_init(hsr, hsr_dev);
+ mod_timer(&hsr->prune_timer, jiffies + msecs_to_jiffies(PRUNE_PERIOD));
+- res = hsr_debugfs_init(hsr, hsr_dev);
+- if (res)
+- goto fail;
+
+ return 0;
+
+-fail:
++err_add_slaves:
++ unregister_netdevice(hsr_dev);
++err_unregister:
+ list_for_each_entry_safe(port, tmp, &hsr->ports, port_list)
+ hsr_del_port(port);
+-err_add_port:
++err_add_master:
+ hsr_del_self_node(&hsr->self_node_db);
+
+ return res;
+diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h
+index 96fac696a1e1..acab9c353a49 100644
+--- a/net/hsr/hsr_main.h
++++ b/net/hsr/hsr_main.h
+@@ -184,15 +184,12 @@ static inline u16 hsr_get_skb_sequence_nr(struct sk_buff *skb)
+ }
+
+ #if IS_ENABLED(CONFIG_DEBUG_FS)
+-int hsr_debugfs_init(struct hsr_priv *priv, struct net_device *hsr_dev);
++void hsr_debugfs_init(struct hsr_priv *priv, struct net_device *hsr_dev);
+ void hsr_debugfs_term(struct hsr_priv *priv);
+ #else
+-static inline int hsr_debugfs_init(struct hsr_priv *priv,
+- struct net_device *hsr_dev)
+-{
+- return 0;
+-}
+-
++static inline void hsr_debugfs_init(struct hsr_priv *priv,
++ struct net_device *hsr_dev)
++{}
+ static inline void hsr_debugfs_term(struct hsr_priv *priv)
+ {}
+ #endif
+--
+2.20.1
+
--- /dev/null
+From 3cc96c5410f0a8c6558b5b56a0878ef44ee707a0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 4 Dec 2019 16:52:53 -0800
+Subject: lib/ubsan: don't serialize UBSAN report
+
+From: Julien Grall <julien.grall@arm.com>
+
+[ Upstream commit ce5c31db3645b649a31044a4d8b6057f6c723702 ]
+
+At the moment, UBSAN report will be serialized using a spin_lock(). On
+RT-systems, spinlocks are turned to rt_spin_lock and may sleep. This
+will result to the following splat if the undefined behavior is in a
+context that can sleep:
+
+ BUG: sleeping function called from invalid context at /src/linux/kernel/locking/rtmutex.c:968
+ in_atomic(): 1, irqs_disabled(): 128, pid: 3447, name: make
+ 1 lock held by make/3447:
+ #0: 000000009a966332 (&mm->mmap_sem){++++}, at: do_page_fault+0x140/0x4f8
+ irq event stamp: 6284
+ hardirqs last enabled at (6283): [<ffff000011326520>] _raw_spin_unlock_irqrestore+0x90/0xa0
+ hardirqs last disabled at (6284): [<ffff0000113262b0>] _raw_spin_lock_irqsave+0x30/0x78
+ softirqs last enabled at (2430): [<ffff000010088ef8>] fpsimd_restore_current_state+0x60/0xe8
+ softirqs last disabled at (2427): [<ffff000010088ec0>] fpsimd_restore_current_state+0x28/0xe8
+ Preemption disabled at:
+ [<ffff000011324a4c>] rt_mutex_futex_unlock+0x4c/0xb0
+ CPU: 3 PID: 3447 Comm: make Tainted: G W 5.2.14-rt7-01890-ge6e057589653 #911
+ Call trace:
+ dump_backtrace+0x0/0x148
+ show_stack+0x14/0x20
+ dump_stack+0xbc/0x104
+ ___might_sleep+0x154/0x210
+ rt_spin_lock+0x68/0xa0
+ ubsan_prologue+0x30/0x68
+ handle_overflow+0x64/0xe0
+ __ubsan_handle_add_overflow+0x10/0x18
+ __lock_acquire+0x1c28/0x2a28
+ lock_acquire+0xf0/0x370
+ _raw_spin_lock_irqsave+0x58/0x78
+ rt_mutex_futex_unlock+0x4c/0xb0
+ rt_spin_unlock+0x28/0x70
+ get_page_from_freelist+0x428/0x2b60
+ __alloc_pages_nodemask+0x174/0x1708
+ alloc_pages_vma+0x1ac/0x238
+ __handle_mm_fault+0x4ac/0x10b0
+ handle_mm_fault+0x1d8/0x3b0
+ do_page_fault+0x1c8/0x4f8
+ do_translation_fault+0xb8/0xe0
+ do_mem_abort+0x3c/0x98
+ el0_da+0x20/0x24
+
+The spin_lock() will protect against multiple CPUs to output a report
+together, I guess to prevent them from being interleaved. However, they
+can still interleave with other messages (and even splat from
+__might_sleep).
+
+So the lock usefulness seems pretty limited. Rather than trying to
+accomodate RT-system by switching to a raw_spin_lock(), the lock is now
+completely dropped.
+
+Link: http://lkml.kernel.org/r/20190920100835.14999-1-julien.grall@arm.com
+Signed-off-by: Julien Grall <julien.grall@arm.com>
+Reported-by: Andre Przywara <andre.przywara@arm.com>
+Acked-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Cc: Steven Rostedt <rostedt@goodmis.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ lib/ubsan.c | 64 +++++++++++++++++++----------------------------------
+ 1 file changed, 23 insertions(+), 41 deletions(-)
+
+diff --git a/lib/ubsan.c b/lib/ubsan.c
+index 0c4681118fcd..f007a406f89c 100644
+--- a/lib/ubsan.c
++++ b/lib/ubsan.c
+@@ -140,25 +140,21 @@ static void val_to_string(char *str, size_t size, struct type_descriptor *type,
+ }
+ }
+
+-static DEFINE_SPINLOCK(report_lock);
+-
+-static void ubsan_prologue(struct source_location *location,
+- unsigned long *flags)
++static void ubsan_prologue(struct source_location *location)
+ {
+ current->in_ubsan++;
+- spin_lock_irqsave(&report_lock, *flags);
+
+ pr_err("========================================"
+ "========================================\n");
+ print_source_location("UBSAN: Undefined behaviour in", location);
+ }
+
+-static void ubsan_epilogue(unsigned long *flags)
++static void ubsan_epilogue(void)
+ {
+ dump_stack();
+ pr_err("========================================"
+ "========================================\n");
+- spin_unlock_irqrestore(&report_lock, *flags);
++
+ current->in_ubsan--;
+ }
+
+@@ -167,14 +163,13 @@ static void handle_overflow(struct overflow_data *data, void *lhs,
+ {
+
+ struct type_descriptor *type = data->type;
+- unsigned long flags;
+ char lhs_val_str[VALUE_LENGTH];
+ char rhs_val_str[VALUE_LENGTH];
+
+ if (suppress_report(&data->location))
+ return;
+
+- ubsan_prologue(&data->location, &flags);
++ ubsan_prologue(&data->location);
+
+ val_to_string(lhs_val_str, sizeof(lhs_val_str), type, lhs);
+ val_to_string(rhs_val_str, sizeof(rhs_val_str), type, rhs);
+@@ -186,7 +181,7 @@ static void handle_overflow(struct overflow_data *data, void *lhs,
+ rhs_val_str,
+ type->type_name);
+
+- ubsan_epilogue(&flags);
++ ubsan_epilogue();
+ }
+
+ void __ubsan_handle_add_overflow(struct overflow_data *data,
+@@ -214,20 +209,19 @@ EXPORT_SYMBOL(__ubsan_handle_mul_overflow);
+ void __ubsan_handle_negate_overflow(struct overflow_data *data,
+ void *old_val)
+ {
+- unsigned long flags;
+ char old_val_str[VALUE_LENGTH];
+
+ if (suppress_report(&data->location))
+ return;
+
+- ubsan_prologue(&data->location, &flags);
++ ubsan_prologue(&data->location);
+
+ val_to_string(old_val_str, sizeof(old_val_str), data->type, old_val);
+
+ pr_err("negation of %s cannot be represented in type %s:\n",
+ old_val_str, data->type->type_name);
+
+- ubsan_epilogue(&flags);
++ ubsan_epilogue();
+ }
+ EXPORT_SYMBOL(__ubsan_handle_negate_overflow);
+
+@@ -235,13 +229,12 @@ EXPORT_SYMBOL(__ubsan_handle_negate_overflow);
+ void __ubsan_handle_divrem_overflow(struct overflow_data *data,
+ void *lhs, void *rhs)
+ {
+- unsigned long flags;
+ char rhs_val_str[VALUE_LENGTH];
+
+ if (suppress_report(&data->location))
+ return;
+
+- ubsan_prologue(&data->location, &flags);
++ ubsan_prologue(&data->location);
+
+ val_to_string(rhs_val_str, sizeof(rhs_val_str), data->type, rhs);
+
+@@ -251,58 +244,52 @@ void __ubsan_handle_divrem_overflow(struct overflow_data *data,
+ else
+ pr_err("division by zero\n");
+
+- ubsan_epilogue(&flags);
++ ubsan_epilogue();
+ }
+ EXPORT_SYMBOL(__ubsan_handle_divrem_overflow);
+
+ static void handle_null_ptr_deref(struct type_mismatch_data_common *data)
+ {
+- unsigned long flags;
+-
+ if (suppress_report(data->location))
+ return;
+
+- ubsan_prologue(data->location, &flags);
++ ubsan_prologue(data->location);
+
+ pr_err("%s null pointer of type %s\n",
+ type_check_kinds[data->type_check_kind],
+ data->type->type_name);
+
+- ubsan_epilogue(&flags);
++ ubsan_epilogue();
+ }
+
+ static void handle_misaligned_access(struct type_mismatch_data_common *data,
+ unsigned long ptr)
+ {
+- unsigned long flags;
+-
+ if (suppress_report(data->location))
+ return;
+
+- ubsan_prologue(data->location, &flags);
++ ubsan_prologue(data->location);
+
+ pr_err("%s misaligned address %p for type %s\n",
+ type_check_kinds[data->type_check_kind],
+ (void *)ptr, data->type->type_name);
+ pr_err("which requires %ld byte alignment\n", data->alignment);
+
+- ubsan_epilogue(&flags);
++ ubsan_epilogue();
+ }
+
+ static void handle_object_size_mismatch(struct type_mismatch_data_common *data,
+ unsigned long ptr)
+ {
+- unsigned long flags;
+-
+ if (suppress_report(data->location))
+ return;
+
+- ubsan_prologue(data->location, &flags);
++ ubsan_prologue(data->location);
+ pr_err("%s address %p with insufficient space\n",
+ type_check_kinds[data->type_check_kind],
+ (void *) ptr);
+ pr_err("for an object of type %s\n", data->type->type_name);
+- ubsan_epilogue(&flags);
++ ubsan_epilogue();
+ }
+
+ static void ubsan_type_mismatch_common(struct type_mismatch_data_common *data,
+@@ -351,25 +338,23 @@ EXPORT_SYMBOL(__ubsan_handle_type_mismatch_v1);
+
+ void __ubsan_handle_out_of_bounds(struct out_of_bounds_data *data, void *index)
+ {
+- unsigned long flags;
+ char index_str[VALUE_LENGTH];
+
+ if (suppress_report(&data->location))
+ return;
+
+- ubsan_prologue(&data->location, &flags);
++ ubsan_prologue(&data->location);
+
+ val_to_string(index_str, sizeof(index_str), data->index_type, index);
+ pr_err("index %s is out of range for type %s\n", index_str,
+ data->array_type->type_name);
+- ubsan_epilogue(&flags);
++ ubsan_epilogue();
+ }
+ EXPORT_SYMBOL(__ubsan_handle_out_of_bounds);
+
+ void __ubsan_handle_shift_out_of_bounds(struct shift_out_of_bounds_data *data,
+ void *lhs, void *rhs)
+ {
+- unsigned long flags;
+ struct type_descriptor *rhs_type = data->rhs_type;
+ struct type_descriptor *lhs_type = data->lhs_type;
+ char rhs_str[VALUE_LENGTH];
+@@ -379,7 +364,7 @@ void __ubsan_handle_shift_out_of_bounds(struct shift_out_of_bounds_data *data,
+ if (suppress_report(&data->location))
+ goto out;
+
+- ubsan_prologue(&data->location, &flags);
++ ubsan_prologue(&data->location);
+
+ val_to_string(rhs_str, sizeof(rhs_str), rhs_type, rhs);
+ val_to_string(lhs_str, sizeof(lhs_str), lhs_type, lhs);
+@@ -402,7 +387,7 @@ void __ubsan_handle_shift_out_of_bounds(struct shift_out_of_bounds_data *data,
+ lhs_str, rhs_str,
+ lhs_type->type_name);
+
+- ubsan_epilogue(&flags);
++ ubsan_epilogue();
+ out:
+ user_access_restore(ua_flags);
+ }
+@@ -411,11 +396,9 @@ EXPORT_SYMBOL(__ubsan_handle_shift_out_of_bounds);
+
+ void __ubsan_handle_builtin_unreachable(struct unreachable_data *data)
+ {
+- unsigned long flags;
+-
+- ubsan_prologue(&data->location, &flags);
++ ubsan_prologue(&data->location);
+ pr_err("calling __builtin_unreachable()\n");
+- ubsan_epilogue(&flags);
++ ubsan_epilogue();
+ panic("can't return from __builtin_unreachable()");
+ }
+ EXPORT_SYMBOL(__ubsan_handle_builtin_unreachable);
+@@ -423,19 +406,18 @@ EXPORT_SYMBOL(__ubsan_handle_builtin_unreachable);
+ void __ubsan_handle_load_invalid_value(struct invalid_value_data *data,
+ void *val)
+ {
+- unsigned long flags;
+ char val_str[VALUE_LENGTH];
+
+ if (suppress_report(&data->location))
+ return;
+
+- ubsan_prologue(&data->location, &flags);
++ ubsan_prologue(&data->location);
+
+ val_to_string(val_str, sizeof(val_str), data->type, val);
+
+ pr_err("load of value %s is not a valid value for type %s\n",
+ val_str, data->type->type_name);
+
+- ubsan_epilogue(&flags);
++ ubsan_epilogue();
+ }
+ EXPORT_SYMBOL(__ubsan_handle_load_invalid_value);
+--
+2.20.1
+
--- /dev/null
+From 86910105f43f432765737a14792160d01b65d2d5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 4 Jan 2020 13:00:15 -0800
+Subject: mm/hugetlb: defer freeing of huge pages if in non-task context
+
+From: Waiman Long <longman@redhat.com>
+
+[ Upstream commit c77c0a8ac4c522638a8242fcb9de9496e3cdbb2d ]
+
+The following lockdep splat was observed when a certain hugetlbfs test
+was run:
+
+ ================================
+ WARNING: inconsistent lock state
+ 4.18.0-159.el8.x86_64+debug #1 Tainted: G W --------- - -
+ --------------------------------
+ inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage.
+ swapper/30/0 [HC0[0]:SC1[1]:HE1:SE0] takes:
+ ffffffff9acdc038 (hugetlb_lock){+.?.}, at: free_huge_page+0x36f/0xaa0
+ {SOFTIRQ-ON-W} state was registered at:
+ lock_acquire+0x14f/0x3b0
+ _raw_spin_lock+0x30/0x70
+ __nr_hugepages_store_common+0x11b/0xb30
+ hugetlb_sysctl_handler_common+0x209/0x2d0
+ proc_sys_call_handler+0x37f/0x450
+ vfs_write+0x157/0x460
+ ksys_write+0xb8/0x170
+ do_syscall_64+0xa5/0x4d0
+ entry_SYSCALL_64_after_hwframe+0x6a/0xdf
+ irq event stamp: 691296
+ hardirqs last enabled at (691296): [<ffffffff99bb034b>] _raw_spin_unlock_irqrestore+0x4b/0x60
+ hardirqs last disabled at (691295): [<ffffffff99bb0ad2>] _raw_spin_lock_irqsave+0x22/0x81
+ softirqs last enabled at (691284): [<ffffffff97ff0c63>] irq_enter+0xc3/0xe0
+ softirqs last disabled at (691285): [<ffffffff97ff0ebe>] irq_exit+0x23e/0x2b0
+
+ other info that might help us debug this:
+ Possible unsafe locking scenario:
+
+ CPU0
+ ----
+ lock(hugetlb_lock);
+ <Interrupt>
+ lock(hugetlb_lock);
+
+ *** DEADLOCK ***
+ :
+ Call Trace:
+ <IRQ>
+ __lock_acquire+0x146b/0x48c0
+ lock_acquire+0x14f/0x3b0
+ _raw_spin_lock+0x30/0x70
+ free_huge_page+0x36f/0xaa0
+ bio_check_pages_dirty+0x2fc/0x5c0
+ clone_endio+0x17f/0x670 [dm_mod]
+ blk_update_request+0x276/0xe50
+ scsi_end_request+0x7b/0x6a0
+ scsi_io_completion+0x1c6/0x1570
+ blk_done_softirq+0x22e/0x350
+ __do_softirq+0x23d/0xad8
+ irq_exit+0x23e/0x2b0
+ do_IRQ+0x11a/0x200
+ common_interrupt+0xf/0xf
+ </IRQ>
+
+Both the hugetbl_lock and the subpool lock can be acquired in
+free_huge_page(). One way to solve the problem is to make both locks
+irq-safe. However, Mike Kravetz had learned that the hugetlb_lock is
+held for a linear scan of ALL hugetlb pages during a cgroup reparentling
+operation. So it is just too long to have irq disabled unless we can
+break hugetbl_lock down into finer-grained locks with shorter lock hold
+times.
+
+Another alternative is to defer the freeing to a workqueue job. This
+patch implements the deferred freeing by adding a free_hpage_workfn()
+work function to do the actual freeing. The free_huge_page() call in a
+non-task context saves the page to be freed in the hpage_freelist linked
+list in a lockless manner using the llist APIs.
+
+The generic workqueue is used to process the work, but a dedicated
+workqueue can be used instead if it is desirable to have the huge page
+freed ASAP.
+
+Thanks to Kirill Tkhai <ktkhai@virtuozzo.com> for suggesting the use of
+llist APIs which simplfy the code.
+
+Link: http://lkml.kernel.org/r/20191217170331.30893-1-longman@redhat.com
+Signed-off-by: Waiman Long <longman@redhat.com>
+Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
+Acked-by: Davidlohr Bueso <dbueso@suse.de>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Reviewed-by: Kirill Tkhai <ktkhai@virtuozzo.com>
+Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Andi Kleen <ak@linux.intel.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/hugetlb.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++-
+ 1 file changed, 50 insertions(+), 1 deletion(-)
+
+diff --git a/mm/hugetlb.c b/mm/hugetlb.c
+index b45a95363a84..e0afd582ca01 100644
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -27,6 +27,7 @@
+ #include <linux/swapops.h>
+ #include <linux/jhash.h>
+ #include <linux/numa.h>
++#include <linux/llist.h>
+
+ #include <asm/page.h>
+ #include <asm/pgtable.h>
+@@ -1255,7 +1256,7 @@ static inline void ClearPageHugeTemporary(struct page *page)
+ page[2].mapping = NULL;
+ }
+
+-void free_huge_page(struct page *page)
++static void __free_huge_page(struct page *page)
+ {
+ /*
+ * Can't pass hstate in here because it is called from the
+@@ -1318,6 +1319,54 @@ void free_huge_page(struct page *page)
+ spin_unlock(&hugetlb_lock);
+ }
+
++/*
++ * As free_huge_page() can be called from a non-task context, we have
++ * to defer the actual freeing in a workqueue to prevent potential
++ * hugetlb_lock deadlock.
++ *
++ * free_hpage_workfn() locklessly retrieves the linked list of pages to
++ * be freed and frees them one-by-one. As the page->mapping pointer is
++ * going to be cleared in __free_huge_page() anyway, it is reused as the
++ * llist_node structure of a lockless linked list of huge pages to be freed.
++ */
++static LLIST_HEAD(hpage_freelist);
++
++static void free_hpage_workfn(struct work_struct *work)
++{
++ struct llist_node *node;
++ struct page *page;
++
++ node = llist_del_all(&hpage_freelist);
++
++ while (node) {
++ page = container_of((struct address_space **)node,
++ struct page, mapping);
++ node = node->next;
++ __free_huge_page(page);
++ }
++}
++static DECLARE_WORK(free_hpage_work, free_hpage_workfn);
++
++void free_huge_page(struct page *page)
++{
++ /*
++ * Defer freeing if in non-task context to avoid hugetlb_lock deadlock.
++ */
++ if (!in_task()) {
++ /*
++ * Only call schedule_work() if hpage_freelist is previously
++ * empty. Otherwise, schedule_work() had been called but the
++ * workfn hasn't retrieved the list yet.
++ */
++ if (llist_add((struct llist_node *)&page->mapping,
++ &hpage_freelist))
++ schedule_work(&free_hpage_work);
++ return;
++ }
++
++ __free_huge_page(page);
++}
++
+ static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
+ {
+ INIT_LIST_HEAD(&page->lru);
+--
+2.20.1
+
--- /dev/null
+From f6cef242c3be3091330006837025227760bc95e6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 30 Nov 2019 17:54:24 -0800
+Subject: mm/sparse.c: mark populate_section_memmap as __meminit
+
+From: Ilya Leoshkevich <iii@linux.ibm.com>
+
+[ Upstream commit 030eab4f9ffb469344c10a46bc02c5149db0a2a9 ]
+
+Building the kernel on s390 with -Og produces the following warning:
+
+ WARNING: vmlinux.o(.text+0x28dabe): Section mismatch in reference from the function populate_section_memmap() to the function .meminit.text:__populate_section_memmap()
+ The function populate_section_memmap() references
+ the function __meminit __populate_section_memmap().
+ This is often because populate_section_memmap lacks a __meminit
+ annotation or the annotation of __populate_section_memmap is wrong.
+
+While -Og is not supported, in theory this might still happen with
+another compiler or on another architecture. So fix this by using the
+correct section annotations.
+
+[iii@linux.ibm.com: v2]
+ Link: http://lkml.kernel.org/r/20191030151639.41486-1-iii@linux.ibm.com
+Link: http://lkml.kernel.org/r/20191028165549.14478-1-iii@linux.ibm.com
+Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
+Cc: Vasily Gorbik <gor@linux.ibm.com>
+Cc: Oscar Salvador <OSalvador@suse.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/sparse.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/mm/sparse.c b/mm/sparse.c
+index f6891c1992b1..c2c01b6330af 100644
+--- a/mm/sparse.c
++++ b/mm/sparse.c
+@@ -647,7 +647,7 @@ void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn)
+ #endif
+
+ #ifdef CONFIG_SPARSEMEM_VMEMMAP
+-static struct page *populate_section_memmap(unsigned long pfn,
++static struct page * __meminit populate_section_memmap(unsigned long pfn,
+ unsigned long nr_pages, int nid, struct vmem_altmap *altmap)
+ {
+ return __populate_section_memmap(pfn, nr_pages, nid, altmap);
+@@ -669,7 +669,7 @@ static void free_map_bootmem(struct page *memmap)
+ vmemmap_free(start, end, NULL);
+ }
+ #else
+-struct page *populate_section_memmap(unsigned long pfn,
++struct page * __meminit populate_section_memmap(unsigned long pfn,
+ unsigned long nr_pages, int nid, struct vmem_altmap *altmap)
+ {
+ struct page *page, *ret;
+--
+2.20.1
+
--- /dev/null
+From e52a366deaff0f9f0630fa809f426735e5beb3aa Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 7 Nov 2019 18:29:11 -0800
+Subject: net: add annotations on hh->hh_len lockless accesses
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit c305c6ae79e2ce20c22660ceda94f0d86d639a82 ]
+
+KCSAN reported a data-race [1]
+
+While we can use READ_ONCE() on the read sides,
+we need to make sure hh->hh_len is written last.
+
+[1]
+
+BUG: KCSAN: data-race in eth_header_cache / neigh_resolve_output
+
+write to 0xffff8880b9dedcb8 of 4 bytes by task 29760 on cpu 0:
+ eth_header_cache+0xa9/0xd0 net/ethernet/eth.c:247
+ neigh_hh_init net/core/neighbour.c:1463 [inline]
+ neigh_resolve_output net/core/neighbour.c:1480 [inline]
+ neigh_resolve_output+0x415/0x470 net/core/neighbour.c:1470
+ neigh_output include/net/neighbour.h:511 [inline]
+ ip6_finish_output2+0x7a2/0xec0 net/ipv6/ip6_output.c:116
+ __ip6_finish_output net/ipv6/ip6_output.c:142 [inline]
+ __ip6_finish_output+0x2d7/0x330 net/ipv6/ip6_output.c:127
+ ip6_finish_output+0x41/0x160 net/ipv6/ip6_output.c:152
+ NF_HOOK_COND include/linux/netfilter.h:294 [inline]
+ ip6_output+0xf2/0x280 net/ipv6/ip6_output.c:175
+ dst_output include/net/dst.h:436 [inline]
+ NF_HOOK include/linux/netfilter.h:305 [inline]
+ ndisc_send_skb+0x459/0x5f0 net/ipv6/ndisc.c:505
+ ndisc_send_ns+0x207/0x430 net/ipv6/ndisc.c:647
+ rt6_probe_deferred+0x98/0xf0 net/ipv6/route.c:615
+ process_one_work+0x3d4/0x890 kernel/workqueue.c:2269
+ worker_thread+0xa0/0x800 kernel/workqueue.c:2415
+ kthread+0x1d4/0x200 drivers/block/aoe/aoecmd.c:1253
+ ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:352
+
+read to 0xffff8880b9dedcb8 of 4 bytes by task 29572 on cpu 1:
+ neigh_resolve_output net/core/neighbour.c:1479 [inline]
+ neigh_resolve_output+0x113/0x470 net/core/neighbour.c:1470
+ neigh_output include/net/neighbour.h:511 [inline]
+ ip6_finish_output2+0x7a2/0xec0 net/ipv6/ip6_output.c:116
+ __ip6_finish_output net/ipv6/ip6_output.c:142 [inline]
+ __ip6_finish_output+0x2d7/0x330 net/ipv6/ip6_output.c:127
+ ip6_finish_output+0x41/0x160 net/ipv6/ip6_output.c:152
+ NF_HOOK_COND include/linux/netfilter.h:294 [inline]
+ ip6_output+0xf2/0x280 net/ipv6/ip6_output.c:175
+ dst_output include/net/dst.h:436 [inline]
+ NF_HOOK include/linux/netfilter.h:305 [inline]
+ ndisc_send_skb+0x459/0x5f0 net/ipv6/ndisc.c:505
+ ndisc_send_ns+0x207/0x430 net/ipv6/ndisc.c:647
+ rt6_probe_deferred+0x98/0xf0 net/ipv6/route.c:615
+ process_one_work+0x3d4/0x890 kernel/workqueue.c:2269
+ worker_thread+0xa0/0x800 kernel/workqueue.c:2415
+ kthread+0x1d4/0x200 drivers/block/aoe/aoecmd.c:1253
+ ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:352
+
+Reported by Kernel Concurrency Sanitizer on:
+CPU: 1 PID: 29572 Comm: kworker/1:4 Not tainted 5.4.0-rc6+ #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+Workqueue: events rt6_probe_deferred
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/firewire/net.c | 6 +++++-
+ include/net/neighbour.h | 2 +-
+ net/core/neighbour.c | 4 ++--
+ net/ethernet/eth.c | 7 ++++++-
+ 4 files changed, 14 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c
+index b132ab9ad607..715e491dfbc3 100644
+--- a/drivers/firewire/net.c
++++ b/drivers/firewire/net.c
+@@ -250,7 +250,11 @@ static int fwnet_header_cache(const struct neighbour *neigh,
+ h = (struct fwnet_header *)((u8 *)hh->hh_data + HH_DATA_OFF(sizeof(*h)));
+ h->h_proto = type;
+ memcpy(h->h_dest, neigh->ha, net->addr_len);
+- hh->hh_len = FWNET_HLEN;
++
++ /* Pairs with the READ_ONCE() in neigh_resolve_output(),
++ * neigh_hh_output() and neigh_update_hhs().
++ */
++ smp_store_release(&hh->hh_len, FWNET_HLEN);
+
+ return 0;
+ }
+diff --git a/include/net/neighbour.h b/include/net/neighbour.h
+index 5e679c8dae0b..8ec77bfdc1a4 100644
+--- a/include/net/neighbour.h
++++ b/include/net/neighbour.h
+@@ -467,7 +467,7 @@ static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb
+
+ do {
+ seq = read_seqbegin(&hh->hh_lock);
+- hh_len = hh->hh_len;
++ hh_len = READ_ONCE(hh->hh_len);
+ if (likely(hh_len <= HH_DATA_MOD)) {
+ hh_alen = HH_DATA_MOD;
+
+diff --git a/net/core/neighbour.c b/net/core/neighbour.c
+index 08ebc3ac5343..f2452496ad9f 100644
+--- a/net/core/neighbour.c
++++ b/net/core/neighbour.c
+@@ -1194,7 +1194,7 @@ static void neigh_update_hhs(struct neighbour *neigh)
+
+ if (update) {
+ hh = &neigh->hh;
+- if (hh->hh_len) {
++ if (READ_ONCE(hh->hh_len)) {
+ write_seqlock_bh(&hh->hh_lock);
+ update(hh, neigh->dev, neigh->ha);
+ write_sequnlock_bh(&hh->hh_lock);
+@@ -1473,7 +1473,7 @@ int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
+ struct net_device *dev = neigh->dev;
+ unsigned int seq;
+
+- if (dev->header_ops->cache && !neigh->hh.hh_len)
++ if (dev->header_ops->cache && !READ_ONCE(neigh->hh.hh_len))
+ neigh_hh_init(neigh);
+
+ do {
+diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
+index 17374afee28f..9040fe55e0f5 100644
+--- a/net/ethernet/eth.c
++++ b/net/ethernet/eth.c
+@@ -244,7 +244,12 @@ int eth_header_cache(const struct neighbour *neigh, struct hh_cache *hh, __be16
+ eth->h_proto = type;
+ memcpy(eth->h_source, dev->dev_addr, ETH_ALEN);
+ memcpy(eth->h_dest, neigh->ha, ETH_ALEN);
+- hh->hh_len = ETH_HLEN;
++
++ /* Pairs with READ_ONCE() in neigh_resolve_output(),
++ * neigh_hh_output() and neigh_update_hhs().
++ */
++ smp_store_release(&hh->hh_len, ETH_HLEN);
++
+ return 0;
+ }
+ EXPORT_SYMBOL(eth_header_cache);
+--
+2.20.1
+
--- /dev/null
+From 7a48ede0f00838af038f9bca5c71d2ba8e75e29a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 16 Dec 2019 18:51:03 -0800
+Subject: net: annotate lockless accesses to sk->sk_pacing_shift
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 7c68fa2bddda6d942bd387c9ba5b4300737fd991 ]
+
+sk->sk_pacing_shift can be read and written without lock
+synchronization. This patch adds annotations to
+document this fact and avoid future syzbot complains.
+
+This might also avoid unexpected false sharing
+in sk_pacing_shift_update(), as the compiler
+could remove the conditional check and always
+write over sk->sk_pacing_shift :
+
+if (sk->sk_pacing_shift != val)
+ sk->sk_pacing_shift = val;
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/sock.h | 4 ++--
+ net/core/sock.c | 2 +-
+ net/ipv4/tcp_bbr.c | 3 ++-
+ net/ipv4/tcp_output.c | 4 ++--
+ 4 files changed, 7 insertions(+), 6 deletions(-)
+
+diff --git a/include/net/sock.h b/include/net/sock.h
+index e09e2886a836..6c5a3809483e 100644
+--- a/include/net/sock.h
++++ b/include/net/sock.h
+@@ -2589,9 +2589,9 @@ static inline int sk_get_rmem0(const struct sock *sk, const struct proto *proto)
+ */
+ static inline void sk_pacing_shift_update(struct sock *sk, int val)
+ {
+- if (!sk || !sk_fullsock(sk) || sk->sk_pacing_shift == val)
++ if (!sk || !sk_fullsock(sk) || READ_ONCE(sk->sk_pacing_shift) == val)
+ return;
+- sk->sk_pacing_shift = val;
++ WRITE_ONCE(sk->sk_pacing_shift, val);
+ }
+
+ /* if a socket is bound to a device, check that the given device
+diff --git a/net/core/sock.c b/net/core/sock.c
+index ac78a570e43a..b4d1112174c1 100644
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -2918,7 +2918,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
+
+ sk->sk_max_pacing_rate = ~0UL;
+ sk->sk_pacing_rate = ~0UL;
+- sk->sk_pacing_shift = 10;
++ WRITE_ONCE(sk->sk_pacing_shift, 10);
+ sk->sk_incoming_cpu = -1;
+
+ sk_rx_queue_clear(sk);
+diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
+index 32772d6ded4e..a6545ef0d27b 100644
+--- a/net/ipv4/tcp_bbr.c
++++ b/net/ipv4/tcp_bbr.c
+@@ -306,7 +306,8 @@ static u32 bbr_tso_segs_goal(struct sock *sk)
+ /* Sort of tcp_tso_autosize() but ignoring
+ * driver provided sk_gso_max_size.
+ */
+- bytes = min_t(unsigned long, sk->sk_pacing_rate >> sk->sk_pacing_shift,
++ bytes = min_t(unsigned long,
++ sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift),
+ GSO_MAX_SIZE - 1 - MAX_TCP_HEADER);
+ segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk));
+
+diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
+index 0269584e9cf7..e4ba915c4bb5 100644
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -1728,7 +1728,7 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
+ u32 bytes, segs;
+
+ bytes = min_t(unsigned long,
+- sk->sk_pacing_rate >> sk->sk_pacing_shift,
++ sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift),
+ sk->sk_gso_max_size - 1 - MAX_TCP_HEADER);
+
+ /* Goal is to send at least one packet per ms,
+@@ -2263,7 +2263,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
+
+ limit = max_t(unsigned long,
+ 2 * skb->truesize,
+- sk->sk_pacing_rate >> sk->sk_pacing_shift);
++ sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift));
+ if (sk->sk_pacing_status == SK_PACING_NONE)
+ limit = min_t(unsigned long, limit,
+ sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes);
+--
+2.20.1
+
--- /dev/null
+From 91c475cf7bc81d3399504610ebe07789ba462887 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 5 Dec 2019 17:28:52 +0300
+Subject: perf/x86/intel/bts: Fix the use of page_private()
+
+From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+
+[ Upstream commit ff61541cc6c1962957758ba433c574b76f588d23 ]
+
+Commit
+
+ 8062382c8dbe2 ("perf/x86/intel/bts: Add BTS PMU driver")
+
+brought in a warning with the BTS buffer initialization
+that is easily tripped with (assuming KPTI is disabled):
+
+instantly throwing:
+
+> ------------[ cut here ]------------
+> WARNING: CPU: 2 PID: 326 at arch/x86/events/intel/bts.c:86 bts_buffer_setup_aux+0x117/0x3d0
+> Modules linked in:
+> CPU: 2 PID: 326 Comm: perf Not tainted 5.4.0-rc8-00291-gceb9e77324fa #904
+> RIP: 0010:bts_buffer_setup_aux+0x117/0x3d0
+> Call Trace:
+> rb_alloc_aux+0x339/0x550
+> perf_mmap+0x607/0xc70
+> mmap_region+0x76b/0xbd0
+...
+
+It appears to assume (for lost raisins) that PagePrivate() is set,
+while later it actually tests for PagePrivate() before using
+page_private().
+
+Make it consistent and always check PagePrivate() before using
+page_private().
+
+Fixes: 8062382c8dbe2 ("perf/x86/intel/bts: Add BTS PMU driver")
+Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: Jiri Olsa <jolsa@kernel.org>
+Cc: Vince Weaver <vincent.weaver@maine.edu>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
+Link: https://lkml.kernel.org/r/20191205142853.28894-2-alexander.shishkin@linux.intel.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/events/intel/bts.c | 16 +++++++++++-----
+ 1 file changed, 11 insertions(+), 5 deletions(-)
+
+diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c
+index 5ee3fed881d3..741540d849f3 100644
+--- a/arch/x86/events/intel/bts.c
++++ b/arch/x86/events/intel/bts.c
+@@ -63,9 +63,17 @@ struct bts_buffer {
+
+ static struct pmu bts_pmu;
+
++static int buf_nr_pages(struct page *page)
++{
++ if (!PagePrivate(page))
++ return 1;
++
++ return 1 << page_private(page);
++}
++
+ static size_t buf_size(struct page *page)
+ {
+- return 1 << (PAGE_SHIFT + page_private(page));
++ return buf_nr_pages(page) * PAGE_SIZE;
+ }
+
+ static void *
+@@ -83,9 +91,7 @@ bts_buffer_setup_aux(struct perf_event *event, void **pages,
+ /* count all the high order buffers */
+ for (pg = 0, nbuf = 0; pg < nr_pages;) {
+ page = virt_to_page(pages[pg]);
+- if (WARN_ON_ONCE(!PagePrivate(page) && nr_pages > 1))
+- return NULL;
+- pg += 1 << page_private(page);
++ pg += buf_nr_pages(page);
+ nbuf++;
+ }
+
+@@ -109,7 +115,7 @@ bts_buffer_setup_aux(struct perf_event *event, void **pages,
+ unsigned int __nr_pages;
+
+ page = virt_to_page(pages[pg]);
+- __nr_pages = PagePrivate(page) ? 1 << page_private(page) : 1;
++ __nr_pages = buf_nr_pages(page);
+ buf->buf[nbuf].page = page;
+ buf->buf[nbuf].offset = offset;
+ buf->buf[nbuf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0);
+--
+2.20.1
+
--- /dev/null
+From 452dca8cd002d7bc535424a4dfa0fe177b38dbcb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 17 Nov 2019 14:55:38 +0100
+Subject: s390/smp: fix physical to logical CPU map for SMT
+
+From: Heiko Carstens <heiko.carstens@de.ibm.com>
+
+[ Upstream commit 72a81ad9d6d62dcb79f7e8ad66ffd1c768b72026 ]
+
+If an SMT capable system is not IPL'ed from the first CPU the setup of
+the physical to logical CPU mapping is broken: the IPL core gets CPU
+number 0, but then the next core gets CPU number 1. Correct would be
+that all SMT threads of CPU 0 get the subsequent logical CPU numbers.
+
+This is important since a lot of code (like e.g. the CPU topology
+code) assumes that CPU maps are setup like this. If the mapping is
+broken the system will not IPL due to broken topology masks:
+
+[ 1.716341] BUG: arch topology broken
+[ 1.716342] the SMT domain not a subset of the MC domain
+[ 1.716343] BUG: arch topology broken
+[ 1.716344] the MC domain not a subset of the BOOK domain
+
+This scenario can usually not happen since LPARs are always IPL'ed
+from CPU 0 and also re-IPL is intiated from CPU 0. However older
+kernels did initiate re-IPL on an arbitrary CPU. If therefore a re-IPL
+from an old kernel into a new kernel is initiated this may lead to
+crash.
+
+Fix this by setting up the physical to logical CPU mapping correctly.
+
+Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
+Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/s390/kernel/smp.c | 80 ++++++++++++++++++++++++++++--------------
+ 1 file changed, 54 insertions(+), 26 deletions(-)
+
+diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
+index d95c85780e07..06dddd7c4290 100644
+--- a/arch/s390/kernel/smp.c
++++ b/arch/s390/kernel/smp.c
+@@ -727,39 +727,67 @@ static void __ref smp_get_core_info(struct sclp_core_info *info, int early)
+
+ static int smp_add_present_cpu(int cpu);
+
+-static int __smp_rescan_cpus(struct sclp_core_info *info, int sysfs_add)
++static int smp_add_core(struct sclp_core_entry *core, cpumask_t *avail,
++ bool configured, bool early)
+ {
+ struct pcpu *pcpu;
+- cpumask_t avail;
+- int cpu, nr, i, j;
++ int cpu, nr, i;
+ u16 address;
+
+ nr = 0;
+- cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask);
+- cpu = cpumask_first(&avail);
+- for (i = 0; (i < info->combined) && (cpu < nr_cpu_ids); i++) {
+- if (sclp.has_core_type && info->core[i].type != boot_core_type)
++ if (sclp.has_core_type && core->type != boot_core_type)
++ return nr;
++ cpu = cpumask_first(avail);
++ address = core->core_id << smp_cpu_mt_shift;
++ for (i = 0; (i <= smp_cpu_mtid) && (cpu < nr_cpu_ids); i++) {
++ if (pcpu_find_address(cpu_present_mask, address + i))
+ continue;
+- address = info->core[i].core_id << smp_cpu_mt_shift;
+- for (j = 0; j <= smp_cpu_mtid; j++) {
+- if (pcpu_find_address(cpu_present_mask, address + j))
+- continue;
+- pcpu = pcpu_devices + cpu;
+- pcpu->address = address + j;
+- pcpu->state =
+- (cpu >= info->configured*(smp_cpu_mtid + 1)) ?
+- CPU_STATE_STANDBY : CPU_STATE_CONFIGURED;
+- smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
+- set_cpu_present(cpu, true);
+- if (sysfs_add && smp_add_present_cpu(cpu) != 0)
+- set_cpu_present(cpu, false);
+- else
+- nr++;
+- cpu = cpumask_next(cpu, &avail);
+- if (cpu >= nr_cpu_ids)
++ pcpu = pcpu_devices + cpu;
++ pcpu->address = address + i;
++ if (configured)
++ pcpu->state = CPU_STATE_CONFIGURED;
++ else
++ pcpu->state = CPU_STATE_STANDBY;
++ smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
++ set_cpu_present(cpu, true);
++ if (!early && smp_add_present_cpu(cpu) != 0)
++ set_cpu_present(cpu, false);
++ else
++ nr++;
++ cpumask_clear_cpu(cpu, avail);
++ cpu = cpumask_next(cpu, avail);
++ }
++ return nr;
++}
++
++static int __smp_rescan_cpus(struct sclp_core_info *info, bool early)
++{
++ struct sclp_core_entry *core;
++ cpumask_t avail;
++ bool configured;
++ u16 core_id;
++ int nr, i;
++
++ nr = 0;
++ cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask);
++ /*
++ * Add IPL core first (which got logical CPU number 0) to make sure
++ * that all SMT threads get subsequent logical CPU numbers.
++ */
++ if (early) {
++ core_id = pcpu_devices[0].address >> smp_cpu_mt_shift;
++ for (i = 0; i < info->configured; i++) {
++ core = &info->core[i];
++ if (core->core_id == core_id) {
++ nr += smp_add_core(core, &avail, true, early);
+ break;
++ }
+ }
+ }
++ for (i = 0; i < info->combined; i++) {
++ configured = i < info->configured;
++ nr += smp_add_core(&info->core[i], &avail, configured, early);
++ }
+ return nr;
+ }
+
+@@ -808,7 +836,7 @@ void __init smp_detect_cpus(void)
+
+ /* Add CPUs present at boot */
+ get_online_cpus();
+- __smp_rescan_cpus(info, 0);
++ __smp_rescan_cpus(info, true);
+ put_online_cpus();
+ memblock_free_early((unsigned long)info, sizeof(*info));
+ }
+@@ -1153,7 +1181,7 @@ int __ref smp_rescan_cpus(void)
+ smp_get_core_info(info, 0);
+ get_online_cpus();
+ mutex_lock(&smp_cpu_state_mutex);
+- nr = __smp_rescan_cpus(info, 1);
++ nr = __smp_rescan_cpus(info, false);
+ mutex_unlock(&smp_cpu_state_mutex);
+ put_online_cpus();
+ kfree(info);
+--
+2.20.1
+
cifs-fix-lookup-of-root-ses-in-dfs-referral-cache.patch
fs-cifs-fix-atime-update-check-vs-mtime.patch
fix-compat-handling-of-ficlonerange-fideduperange-and-fs_ioc_fiemap.patch
+ath9k_htc-modify-byte-order-for-an-error-message.patch
+ath9k_htc-discard-undersized-packets.patch
+drm-i915-execlists-fix-annotation-for-decoupling-vir.patch
+xfs-periodically-yield-scrub-threads-to-the-schedule.patch
+net-add-annotations-on-hh-hh_len-lockless-accesses.patch
+ubifs-ubifs_tnc_start_commit-fix-oob-in-layout_in_ga.patch
+btrfs-get-rid-of-unique-workqueue-helper-functions.patch
+btrfs-only-associate-the-locked-page-with-one-async_.patch
+s390-smp-fix-physical-to-logical-cpu-map-for-smt.patch
+mm-sparse.c-mark-populate_section_memmap-as-__memini.patch
+xen-blkback-avoid-unmapping-unmapped-grant-pages.patch
+lib-ubsan-don-t-serialize-ubsan-report.patch
+efi-don-t-attempt-to-map-rci2-config-table-if-it-doe.patch
+perf-x86-intel-bts-fix-the-use-of-page_private.patch
+net-annotate-lockless-accesses-to-sk-sk_pacing_shift.patch
+hsr-avoid-debugfs-warning-message-when-module-is-rem.patch
+hsr-fix-error-handling-routine-in-hsr_dev_finalize.patch
+hsr-fix-a-race-condition-in-node-list-insertion-and-.patch
+mm-hugetlb-defer-freeing-of-huge-pages-if-in-non-tas.patch
--- /dev/null
+From 94ae1299adf76cf1547149c3ae54ba40d5911708 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 20 Jul 2019 14:05:20 +0800
+Subject: ubifs: ubifs_tnc_start_commit: Fix OOB in layout_in_gaps
+
+From: Zhihao Cheng <chengzhihao1@huawei.com>
+
+[ Upstream commit 6abf57262166b4f4294667fb5206ae7ba1ba96f5 ]
+
+Running stress-test test_2 in mtd-utils on ubi device, sometimes we can
+get following oops message:
+
+ BUG: unable to handle page fault for address: ffffffff00000140
+ #PF: supervisor read access in kernel mode
+ #PF: error_code(0x0000) - not-present page
+ PGD 280a067 P4D 280a067 PUD 0
+ Oops: 0000 [#1] SMP
+ CPU: 0 PID: 60 Comm: kworker/u16:1 Kdump: loaded Not tainted 5.2.0 #13
+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0
+ -0-ga698c8995f-prebuilt.qemu.org 04/01/2014
+ Workqueue: writeback wb_workfn (flush-ubifs_0_0)
+ RIP: 0010:rb_next_postorder+0x2e/0xb0
+ Code: 80 db 03 01 48 85 ff 0f 84 97 00 00 00 48 8b 17 48 83 05 bc 80 db
+ 03 01 48 83 e2 fc 0f 84 82 00 00 00 48 83 05 b2 80 db 03 01 <48> 3b 7a
+ 10 48 89 d0 74 02 f3 c3 48 8b 52 08 48 83 05 a3 80 db 03
+ RSP: 0018:ffffc90000887758 EFLAGS: 00010202
+ RAX: ffff888129ae4700 RBX: ffff888138b08400 RCX: 0000000080800001
+ RDX: ffffffff00000130 RSI: 0000000080800024 RDI: ffff888138b08400
+ RBP: ffff888138b08400 R08: ffffea0004a6b920 R09: 0000000000000000
+ R10: ffffc90000887740 R11: 0000000000000001 R12: ffff888128d48000
+ R13: 0000000000000800 R14: 000000000000011e R15: 00000000000007c8
+ FS: 0000000000000000(0000) GS:ffff88813ba00000(0000)
+ knlGS:0000000000000000
+ CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ CR2: ffffffff00000140 CR3: 000000013789d000 CR4: 00000000000006f0
+ DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+ DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+ Call Trace:
+ destroy_old_idx+0x5d/0xa0 [ubifs]
+ ubifs_tnc_start_commit+0x4fe/0x1380 [ubifs]
+ do_commit+0x3eb/0x830 [ubifs]
+ ubifs_run_commit+0xdc/0x1c0 [ubifs]
+
+Above Oops are due to the slab-out-of-bounds happened in do-while of
+function layout_in_gaps indirectly called by ubifs_tnc_start_commit. In
+function layout_in_gaps, there is a do-while loop placing index nodes
+into the gaps created by obsolete index nodes in non-empty index LEBs
+until rest index nodes can totally be placed into pre-allocated empty
+LEBs. @c->gap_lebs points to a memory area(integer array) which records
+LEB numbers used by 'in-the-gaps' method. Whenever a fitable index LEB
+is found, corresponding lnum will be incrementally written into the
+memory area pointed by @c->gap_lebs. The size
+((@c->lst.idx_lebs + 1) * sizeof(int)) of memory area is allocated before
+do-while loop and can not be changed in the loop. But @c->lst.idx_lebs
+could be increased by function ubifs_change_lp (called by
+layout_leb_in_gaps->ubifs_find_dirty_idx_leb->get_idx_gc_leb) during the
+loop. So, sometimes oob happens when number of cycles in do-while loop
+exceeds the original value of @c->lst.idx_lebs. See detail in
+https://bugzilla.kernel.org/show_bug.cgi?id=204229.
+This patch fixes oob in layout_in_gaps.
+
+Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com>
+Signed-off-by: Richard Weinberger <richard@nod.at>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ubifs/tnc_commit.c | 34 +++++++++++++++++++++++++++-------
+ 1 file changed, 27 insertions(+), 7 deletions(-)
+
+diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c
+index a384a0f9ff32..234be1c4dc87 100644
+--- a/fs/ubifs/tnc_commit.c
++++ b/fs/ubifs/tnc_commit.c
+@@ -212,7 +212,7 @@ static int is_idx_node_in_use(struct ubifs_info *c, union ubifs_key *key,
+ /**
+ * layout_leb_in_gaps - layout index nodes using in-the-gaps method.
+ * @c: UBIFS file-system description object
+- * @p: return LEB number here
++ * @p: return LEB number in @c->gap_lebs[p]
+ *
+ * This function lays out new index nodes for dirty znodes using in-the-gaps
+ * method of TNC commit.
+@@ -221,7 +221,7 @@ static int is_idx_node_in_use(struct ubifs_info *c, union ubifs_key *key,
+ * This function returns the number of index nodes written into the gaps, or a
+ * negative error code on failure.
+ */
+-static int layout_leb_in_gaps(struct ubifs_info *c, int *p)
++static int layout_leb_in_gaps(struct ubifs_info *c, int p)
+ {
+ struct ubifs_scan_leb *sleb;
+ struct ubifs_scan_node *snod;
+@@ -236,7 +236,7 @@ static int layout_leb_in_gaps(struct ubifs_info *c, int *p)
+ * filled, however we do not check there at present.
+ */
+ return lnum; /* Error code */
+- *p = lnum;
++ c->gap_lebs[p] = lnum;
+ dbg_gc("LEB %d", lnum);
+ /*
+ * Scan the index LEB. We use the generic scan for this even though
+@@ -355,7 +355,7 @@ static int get_leb_cnt(struct ubifs_info *c, int cnt)
+ */
+ static int layout_in_gaps(struct ubifs_info *c, int cnt)
+ {
+- int err, leb_needed_cnt, written, *p;
++ int err, leb_needed_cnt, written, p = 0, old_idx_lebs, *gap_lebs;
+
+ dbg_gc("%d znodes to write", cnt);
+
+@@ -364,9 +364,9 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt)
+ if (!c->gap_lebs)
+ return -ENOMEM;
+
+- p = c->gap_lebs;
++ old_idx_lebs = c->lst.idx_lebs;
+ do {
+- ubifs_assert(c, p < c->gap_lebs + c->lst.idx_lebs);
++ ubifs_assert(c, p < c->lst.idx_lebs);
+ written = layout_leb_in_gaps(c, p);
+ if (written < 0) {
+ err = written;
+@@ -392,9 +392,29 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt)
+ leb_needed_cnt = get_leb_cnt(c, cnt);
+ dbg_gc("%d znodes remaining, need %d LEBs, have %d", cnt,
+ leb_needed_cnt, c->ileb_cnt);
++ /*
++ * Dynamically change the size of @c->gap_lebs to prevent
++ * oob, because @c->lst.idx_lebs could be increased by
++ * function @get_idx_gc_leb (called by layout_leb_in_gaps->
++ * ubifs_find_dirty_idx_leb) during loop. Only enlarge
++ * @c->gap_lebs when needed.
++ *
++ */
++ if (leb_needed_cnt > c->ileb_cnt && p >= old_idx_lebs &&
++ old_idx_lebs < c->lst.idx_lebs) {
++ old_idx_lebs = c->lst.idx_lebs;
++ gap_lebs = krealloc(c->gap_lebs, sizeof(int) *
++ (old_idx_lebs + 1), GFP_NOFS);
++ if (!gap_lebs) {
++ kfree(c->gap_lebs);
++ c->gap_lebs = NULL;
++ return -ENOMEM;
++ }
++ c->gap_lebs = gap_lebs;
++ }
+ } while (leb_needed_cnt > c->ileb_cnt);
+
+- *p = -1;
++ c->gap_lebs[p] = -1;
+ return 0;
+ }
+
+--
+2.20.1
+
--- /dev/null
+From 6e86974fa7e00a71d39242261a04b8add0d08e3c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 26 Nov 2019 16:36:05 +0100
+Subject: xen/blkback: Avoid unmapping unmapped grant pages
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: SeongJae Park <sjpark@amazon.de>
+
+[ Upstream commit f9bd84a8a845d82f9b5a081a7ae68c98a11d2e84 ]
+
+For each I/O request, blkback first maps the foreign pages for the
+request to its local pages. If an allocation of a local page for the
+mapping fails, it should unmap every mapping already made for the
+request.
+
+However, blkback's handling mechanism for the allocation failure does
+not mark the remaining foreign pages as unmapped. Therefore, the unmap
+function merely tries to unmap every valid grant page for the request,
+including the pages not mapped due to the allocation failure. On a
+system that fails the allocation frequently, this problem leads to
+following kernel crash.
+
+ [ 372.012538] BUG: unable to handle kernel NULL pointer dereference at 0000000000000001
+ [ 372.012546] IP: [<ffffffff814071ac>] gnttab_unmap_refs.part.7+0x1c/0x40
+ [ 372.012557] PGD 16f3e9067 PUD 16426e067 PMD 0
+ [ 372.012562] Oops: 0002 [#1] SMP
+ [ 372.012566] Modules linked in: act_police sch_ingress cls_u32
+ ...
+ [ 372.012746] Call Trace:
+ [ 372.012752] [<ffffffff81407204>] gnttab_unmap_refs+0x34/0x40
+ [ 372.012759] [<ffffffffa0335ae3>] xen_blkbk_unmap+0x83/0x150 [xen_blkback]
+ ...
+ [ 372.012802] [<ffffffffa0336c50>] dispatch_rw_block_io+0x970/0x980 [xen_blkback]
+ ...
+ Decompressing Linux... Parsing ELF... done.
+ Booting the kernel.
+ [ 0.000000] Initializing cgroup subsys cpuset
+
+This commit fixes this problem by marking the grant pages of the given
+request that didn't mapped due to the allocation failure as invalid.
+
+Fixes: c6cc142dac52 ("xen-blkback: use balloon pages for all mappings")
+
+Reviewed-by: David Woodhouse <dwmw@amazon.de>
+Reviewed-by: Maximilian Heyne <mheyne@amazon.de>
+Reviewed-by: Paul Durrant <pdurrant@amazon.co.uk>
+Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
+Signed-off-by: SeongJae Park <sjpark@amazon.de>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/block/xen-blkback/blkback.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
+index fd1e19f1a49f..3666afa639d1 100644
+--- a/drivers/block/xen-blkback/blkback.c
++++ b/drivers/block/xen-blkback/blkback.c
+@@ -936,6 +936,8 @@ next:
+ out_of_memory:
+ pr_alert("%s: out of memory\n", __func__);
+ put_free_pages(ring, pages_to_gnt, segs_to_map);
++ for (i = last_map; i < num; i++)
++ pages[i]->handle = BLKBACK_INVALID_HANDLE;
+ return -ENOMEM;
+ }
+
+--
+2.20.1
+
--- /dev/null
+From e75720ba35fd5a8ca6a895f912bc1176b676e5eb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 5 Nov 2019 15:33:57 -0800
+Subject: xfs: periodically yield scrub threads to the scheduler
+
+From: Darrick J. Wong <darrick.wong@oracle.com>
+
+[ Upstream commit 5d1116d4c6af3e580f1ed0382ca5a94bd65a34cf ]
+
+Christoph Hellwig complained about the following soft lockup warning
+when running scrub after generic/175 when preemption is disabled and
+slub debugging is enabled:
+
+watchdog: BUG: soft lockup - CPU#3 stuck for 22s! [xfs_scrub:161]
+Modules linked in:
+irq event stamp: 41692326
+hardirqs last enabled at (41692325): [<ffffffff8232c3b7>] _raw_0
+hardirqs last disabled at (41692326): [<ffffffff81001c5a>] trace0
+softirqs last enabled at (41684994): [<ffffffff8260031f>] __do_e
+softirqs last disabled at (41684987): [<ffffffff81127d8c>] irq_e0
+CPU: 3 PID: 16189 Comm: xfs_scrub Not tainted 5.4.0-rc3+ #30
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.124
+RIP: 0010:_raw_spin_unlock_irqrestore+0x39/0x40
+Code: 89 f3 be 01 00 00 00 e8 d5 3a e5 fe 48 89 ef e8 ed 87 e5 f2
+RSP: 0018:ffffc9000233f970 EFLAGS: 00000286 ORIG_RAX: ffffffffff3
+RAX: ffff88813b398040 RBX: 0000000000000286 RCX: 0000000000000006
+RDX: 0000000000000006 RSI: ffff88813b3988c0 RDI: ffff88813b398040
+RBP: ffff888137958640 R08: 0000000000000001 R09: 0000000000000000
+R10: 0000000000000000 R11: 0000000000000000 R12: ffffea00042b0c00
+R13: 0000000000000001 R14: ffff88810ac32308 R15: ffff8881376fc040
+FS: 00007f6113dea700(0000) GS:ffff88813bb80000(0000) knlGS:00000
+CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00007f6113de8ff8 CR3: 000000012f290000 CR4: 00000000000006e0
+Call Trace:
+ free_debug_processing+0x1dd/0x240
+ __slab_free+0x231/0x410
+ kmem_cache_free+0x30e/0x360
+ xchk_ag_btcur_free+0x76/0xb0
+ xchk_ag_free+0x10/0x80
+ xchk_bmap_iextent_xref.isra.14+0xd9/0x120
+ xchk_bmap_iextent+0x187/0x210
+ xchk_bmap+0x2e0/0x3b0
+ xfs_scrub_metadata+0x2e7/0x500
+ xfs_ioc_scrub_metadata+0x4a/0xa0
+ xfs_file_ioctl+0x58a/0xcd0
+ do_vfs_ioctl+0xa0/0x6f0
+ ksys_ioctl+0x5b/0x90
+ __x64_sys_ioctl+0x11/0x20
+ do_syscall_64+0x4b/0x1a0
+ entry_SYSCALL_64_after_hwframe+0x49/0xbe
+
+If preemption is disabled, all metadata buffers needed to perform the
+scrub are already in memory, and there are a lot of records to check,
+it's possible that the scrub thread will run for an extended period of
+time without sleeping for IO or any other reason. Then the watchdog
+timer or the RCU stall timeout can trigger, producing the backtrace
+above.
+
+To fix this problem, call cond_resched() from the scrub thread so that
+we back out to the scheduler whenever necessary.
+
+Reported-by: Christoph Hellwig <hch@infradead.org>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/xfs/scrub/common.h | 9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
+index 003a772cd26c..2e50d146105d 100644
+--- a/fs/xfs/scrub/common.h
++++ b/fs/xfs/scrub/common.h
+@@ -14,8 +14,15 @@
+ static inline bool
+ xchk_should_terminate(
+ struct xfs_scrub *sc,
+- int *error)
++ int *error)
+ {
++ /*
++ * If preemption is disabled, we need to yield to the scheduler every
++ * few seconds so that we don't run afoul of the soft lockup watchdog
++ * or RCU stall detector.
++ */
++ cond_resched();
++
+ if (fatal_signal_pending(current)) {
+ if (*error == 0)
+ *error = -EAGAIN;
+--
+2.20.1
+