]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
fixes for 5.4
authorSasha Levin <sashal@kernel.org>
Tue, 7 Jan 2020 18:07:23 +0000 (13:07 -0500)
committerSasha Levin <sashal@kernel.org>
Tue, 7 Jan 2020 18:07:23 +0000 (13:07 -0500)
Signed-off-by: Sasha Levin <sashal@kernel.org>
20 files changed:
queue-5.4/ath9k_htc-discard-undersized-packets.patch [new file with mode: 0644]
queue-5.4/ath9k_htc-modify-byte-order-for-an-error-message.patch [new file with mode: 0644]
queue-5.4/btrfs-get-rid-of-unique-workqueue-helper-functions.patch [new file with mode: 0644]
queue-5.4/btrfs-only-associate-the-locked-page-with-one-async_.patch [new file with mode: 0644]
queue-5.4/drm-i915-execlists-fix-annotation-for-decoupling-vir.patch [new file with mode: 0644]
queue-5.4/efi-don-t-attempt-to-map-rci2-config-table-if-it-doe.patch [new file with mode: 0644]
queue-5.4/hsr-avoid-debugfs-warning-message-when-module-is-rem.patch [new file with mode: 0644]
queue-5.4/hsr-fix-a-race-condition-in-node-list-insertion-and-.patch [new file with mode: 0644]
queue-5.4/hsr-fix-error-handling-routine-in-hsr_dev_finalize.patch [new file with mode: 0644]
queue-5.4/lib-ubsan-don-t-serialize-ubsan-report.patch [new file with mode: 0644]
queue-5.4/mm-hugetlb-defer-freeing-of-huge-pages-if-in-non-tas.patch [new file with mode: 0644]
queue-5.4/mm-sparse.c-mark-populate_section_memmap-as-__memini.patch [new file with mode: 0644]
queue-5.4/net-add-annotations-on-hh-hh_len-lockless-accesses.patch [new file with mode: 0644]
queue-5.4/net-annotate-lockless-accesses-to-sk-sk_pacing_shift.patch [new file with mode: 0644]
queue-5.4/perf-x86-intel-bts-fix-the-use-of-page_private.patch [new file with mode: 0644]
queue-5.4/s390-smp-fix-physical-to-logical-cpu-map-for-smt.patch [new file with mode: 0644]
queue-5.4/series
queue-5.4/ubifs-ubifs_tnc_start_commit-fix-oob-in-layout_in_ga.patch [new file with mode: 0644]
queue-5.4/xen-blkback-avoid-unmapping-unmapped-grant-pages.patch [new file with mode: 0644]
queue-5.4/xfs-periodically-yield-scrub-threads-to-the-schedule.patch [new file with mode: 0644]

diff --git a/queue-5.4/ath9k_htc-discard-undersized-packets.patch b/queue-5.4/ath9k_htc-discard-undersized-packets.patch
new file mode 100644 (file)
index 0000000..71973a5
--- /dev/null
@@ -0,0 +1,124 @@
+From ea86c5f95edea1e25b0f86df7f2438c980b28a1c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 27 Sep 2019 11:51:46 +0900
+Subject: ath9k_htc: Discard undersized packets
+
+From: Masashi Honma <masashi.honma@gmail.com>
+
+[ Upstream commit cd486e627e67ee9ab66914d36d3127ef057cc010 ]
+
+Sometimes the hardware will push small packets that trigger a WARN_ON
+in mac80211. Discard them early to avoid this issue.
+
+This patch ports 2 patches from ath9k to ath9k_htc.
+commit 3c0efb745a172bfe96459e20cbd37b0c945d5f8d "ath9k: discard
+undersized packets".
+commit df5c4150501ee7e86383be88f6490d970adcf157 "ath9k: correctly
+handle short radar pulses".
+
+[  112.835889] ------------[ cut here ]------------
+[  112.835971] WARNING: CPU: 5 PID: 0 at net/mac80211/rx.c:804 ieee80211_rx_napi+0xaac/0xb40 [mac80211]
+[  112.835973] Modules linked in: ath9k_htc ath9k_common ath9k_hw ath mac80211 cfg80211 libarc4 nouveau snd_hda_codec_hdmi intel_rapl_msr intel_rapl_common x86_pkg_temp_thermal intel_powerclamp coretemp snd_hda_codec_realtek snd_hda_codec_generic ledtrig_audio snd_hda_intel snd_hda_codec video snd_hda_core ttm snd_hwdep drm_kms_helper snd_pcm crct10dif_pclmul snd_seq_midi drm snd_seq_midi_event crc32_pclmul snd_rawmidi ghash_clmulni_intel snd_seq aesni_intel aes_x86_64 crypto_simd cryptd snd_seq_device glue_helper snd_timer sch_fq_codel i2c_algo_bit fb_sys_fops snd input_leds syscopyarea sysfillrect sysimgblt intel_cstate mei_me intel_rapl_perf soundcore mxm_wmi lpc_ich mei kvm_intel kvm mac_hid irqbypass parport_pc ppdev lp parport ip_tables x_tables autofs4 hid_generic usbhid hid raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear e1000e ahci libahci wmi
+[  112.836022] CPU: 5 PID: 0 Comm: swapper/5 Not tainted 5.3.0-wt #1
+[  112.836023] Hardware name: MouseComputer Co.,Ltd. X99-S01/X99-S01, BIOS 1.0C-W7 04/01/2015
+[  112.836056] RIP: 0010:ieee80211_rx_napi+0xaac/0xb40 [mac80211]
+[  112.836059] Code: 00 00 66 41 89 86 b0 00 00 00 e9 c8 fa ff ff 4c 89 b5 40 ff ff ff 49 89 c6 e9 c9 fa ff ff 48 c7 c7 e0 a2 a5 c0 e8 47 41 b0 e9 <0f> 0b 48 89 df e8 5a 94 2d ea e9 02 f9 ff ff 41 39 c1 44 89 85 60
+[  112.836060] RSP: 0018:ffffaa6180220da8 EFLAGS: 00010286
+[  112.836062] RAX: 0000000000000024 RBX: ffff909a20eeda00 RCX: 0000000000000000
+[  112.836064] RDX: 0000000000000000 RSI: ffff909a2f957448 RDI: ffff909a2f957448
+[  112.836065] RBP: ffffaa6180220e78 R08: 00000000000006e9 R09: 0000000000000004
+[  112.836066] R10: 000000000000000a R11: 0000000000000001 R12: 0000000000000000
+[  112.836068] R13: ffff909a261a47a0 R14: 0000000000000000 R15: 0000000000000004
+[  112.836070] FS:  0000000000000000(0000) GS:ffff909a2f940000(0000) knlGS:0000000000000000
+[  112.836071] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[  112.836073] CR2: 00007f4e3ffffa08 CR3: 00000001afc0a006 CR4: 00000000001606e0
+[  112.836074] Call Trace:
+[  112.836076]  <IRQ>
+[  112.836083]  ? finish_td+0xb3/0xf0
+[  112.836092]  ? ath9k_rx_prepare.isra.11+0x22f/0x2a0 [ath9k_htc]
+[  112.836099]  ath9k_rx_tasklet+0x10b/0x1d0 [ath9k_htc]
+[  112.836105]  tasklet_action_common.isra.22+0x63/0x110
+[  112.836108]  tasklet_action+0x22/0x30
+[  112.836115]  __do_softirq+0xe4/0x2da
+[  112.836118]  irq_exit+0xae/0xb0
+[  112.836121]  do_IRQ+0x86/0xe0
+[  112.836125]  common_interrupt+0xf/0xf
+[  112.836126]  </IRQ>
+[  112.836130] RIP: 0010:cpuidle_enter_state+0xa9/0x440
+[  112.836133] Code: 3d bc 20 38 55 e8 f7 1d 84 ff 49 89 c7 0f 1f 44 00 00 31 ff e8 28 29 84 ff 80 7d d3 00 0f 85 e6 01 00 00 fb 66 0f 1f 44 00 00 <45> 85 ed 0f 89 ff 01 00 00 41 c7 44 24 10 00 00 00 00 48 83 c4 18
+[  112.836134] RSP: 0018:ffffaa61800e3e48 EFLAGS: 00000246 ORIG_RAX: ffffffffffffffde
+[  112.836136] RAX: ffff909a2f96b340 RBX: ffffffffabb58200 RCX: 000000000000001f
+[  112.836137] RDX: 0000001a458adc5d RSI: 0000000026c9b581 RDI: 0000000000000000
+[  112.836139] RBP: ffffaa61800e3e88 R08: 0000000000000002 R09: 000000000002abc0
+[  112.836140] R10: ffffaa61800e3e18 R11: 000000000000002d R12: ffffca617fb40b00
+[  112.836141] R13: 0000000000000002 R14: ffffffffabb582d8 R15: 0000001a458adc5d
+[  112.836145]  ? cpuidle_enter_state+0x98/0x440
+[  112.836149]  ? menu_select+0x370/0x600
+[  112.836151]  cpuidle_enter+0x2e/0x40
+[  112.836154]  call_cpuidle+0x23/0x40
+[  112.836156]  do_idle+0x204/0x280
+[  112.836159]  cpu_startup_entry+0x1d/0x20
+[  112.836164]  start_secondary+0x167/0x1c0
+[  112.836169]  secondary_startup_64+0xa4/0xb0
+[  112.836173] ---[ end trace 9f4cd18479cc5ae5 ]---
+
+Signed-off-by: Masashi Honma <masashi.honma@gmail.com>
+Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/wireless/ath/ath9k/htc_drv_txrx.c | 23 +++++++++++++++----
+ 1 file changed, 19 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c
+index aba0d454c381..9cec5c216e1f 100644
+--- a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c
++++ b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c
+@@ -973,6 +973,8 @@ static bool ath9k_rx_prepare(struct ath9k_htc_priv *priv,
+       struct ath_htc_rx_status *rxstatus;
+       struct ath_rx_status rx_stats;
+       bool decrypt_error = false;
++      __be16 rs_datalen;
++      bool is_phyerr;
+       if (skb->len < HTC_RX_FRAME_HEADER_SIZE) {
+               ath_err(common, "Corrupted RX frame, dropping (len: %d)\n",
+@@ -982,11 +984,24 @@ static bool ath9k_rx_prepare(struct ath9k_htc_priv *priv,
+       rxstatus = (struct ath_htc_rx_status *)skb->data;
+-      if (be16_to_cpu(rxstatus->rs_datalen) -
+-          (skb->len - HTC_RX_FRAME_HEADER_SIZE) != 0) {
++      rs_datalen = be16_to_cpu(rxstatus->rs_datalen);
++      if (unlikely(rs_datalen -
++          (skb->len - HTC_RX_FRAME_HEADER_SIZE) != 0)) {
+               ath_err(common,
+                       "Corrupted RX data len, dropping (dlen: %d, skblen: %d)\n",
+-                      be16_to_cpu(rxstatus->rs_datalen), skb->len);
++                      rs_datalen, skb->len);
++              goto rx_next;
++      }
++
++      is_phyerr = rxstatus->rs_status & ATH9K_RXERR_PHY;
++      /*
++       * Discard zero-length packets and packets smaller than an ACK
++       * which are not PHY_ERROR (short radar pulses have a length of 3)
++       */
++      if (unlikely(!rs_datalen || (rs_datalen < 10 && !is_phyerr))) {
++              ath_warn(common,
++                       "Short RX data len, dropping (dlen: %d)\n",
++                       rs_datalen);
+               goto rx_next;
+       }
+@@ -1011,7 +1026,7 @@ static bool ath9k_rx_prepare(struct ath9k_htc_priv *priv,
+        * Process PHY errors and return so that the packet
+        * can be dropped.
+        */
+-      if (rx_stats.rs_status & ATH9K_RXERR_PHY) {
++      if (unlikely(is_phyerr)) {
+               /* TODO: Not using DFS processing now. */
+               if (ath_cmn_process_fft(&priv->spec_priv, hdr,
+                                   &rx_stats, rx_status->mactime)) {
+-- 
+2.20.1
+
diff --git a/queue-5.4/ath9k_htc-modify-byte-order-for-an-error-message.patch b/queue-5.4/ath9k_htc-modify-byte-order-for-an-error-message.patch
new file mode 100644 (file)
index 0000000..fdfe8db
--- /dev/null
@@ -0,0 +1,34 @@
+From 0eb0bef03873075031fd47bfbd3a891463c56968 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 27 Sep 2019 11:51:45 +0900
+Subject: ath9k_htc: Modify byte order for an error message
+
+From: Masashi Honma <masashi.honma@gmail.com>
+
+[ Upstream commit e01fddc19d215f6ad397894ec2a851d99bf154e2 ]
+
+rs_datalen is be16 so we need to convert it before printing.
+
+Signed-off-by: Masashi Honma <masashi.honma@gmail.com>
+Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/wireless/ath/ath9k/htc_drv_txrx.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c
+index 4e8e80ac8341..aba0d454c381 100644
+--- a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c
++++ b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c
+@@ -986,7 +986,7 @@ static bool ath9k_rx_prepare(struct ath9k_htc_priv *priv,
+           (skb->len - HTC_RX_FRAME_HEADER_SIZE) != 0) {
+               ath_err(common,
+                       "Corrupted RX data len, dropping (dlen: %d, skblen: %d)\n",
+-                      rxstatus->rs_datalen, skb->len);
++                      be16_to_cpu(rxstatus->rs_datalen), skb->len);
+               goto rx_next;
+       }
+-- 
+2.20.1
+
diff --git a/queue-5.4/btrfs-get-rid-of-unique-workqueue-helper-functions.patch b/queue-5.4/btrfs-get-rid-of-unique-workqueue-helper-functions.patch
new file mode 100644 (file)
index 0000000..f6e9ba7
--- /dev/null
@@ -0,0 +1,517 @@
+From 01b911ec79fc1cd8f5b865e80a19bb62e7614ac7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 16 Sep 2019 11:30:57 -0700
+Subject: btrfs: get rid of unique workqueue helper functions
+
+From: Omar Sandoval <osandov@fb.com>
+
+[ Upstream commit a0cac0ec961f0d42828eeef196ac2246a2f07659 ]
+
+Commit 9e0af2376434 ("Btrfs: fix task hang under heavy compressed
+write") worked around the issue that a recycled work item could get a
+false dependency on the original work item due to how the workqueue code
+guarantees non-reentrancy. It did so by giving different work functions
+to different types of work.
+
+However, the fixes in the previous few patches are more complete, as
+they prevent a work item from being recycled at all (except for a tiny
+window that the kernel workqueue code handles for us). This obsoletes
+the previous fix, so we don't need the unique helpers for correctness.
+The only other reason to keep them would be so they show up in stack
+traces, but they always seem to be optimized to a tail call, so they
+don't show up anyways. So, let's just get rid of the extra indirection.
+
+While we're here, rename normal_work_helper() to the more informative
+btrfs_work_helper().
+
+Reviewed-by: Nikolay Borisov <nborisov@suse.com>
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Omar Sandoval <osandov@fb.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/async-thread.c  | 58 +++++++++-------------------------------
+ fs/btrfs/async-thread.h  | 33 ++---------------------
+ fs/btrfs/block-group.c   |  3 +--
+ fs/btrfs/delayed-inode.c |  4 +--
+ fs/btrfs/disk-io.c       | 34 ++++++++---------------
+ fs/btrfs/inode.c         | 36 ++++++++-----------------
+ fs/btrfs/ordered-data.c  |  1 -
+ fs/btrfs/qgroup.c        |  1 -
+ fs/btrfs/raid56.c        |  5 ++--
+ fs/btrfs/reada.c         |  3 +--
+ fs/btrfs/scrub.c         | 14 +++++-----
+ fs/btrfs/volumes.c       |  3 +--
+ 12 files changed, 50 insertions(+), 145 deletions(-)
+
+diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
+index 10a04b99798a..3f3110975f88 100644
+--- a/fs/btrfs/async-thread.c
++++ b/fs/btrfs/async-thread.c
+@@ -53,16 +53,6 @@ struct btrfs_workqueue {
+       struct __btrfs_workqueue *high;
+ };
+-static void normal_work_helper(struct btrfs_work *work);
+-
+-#define BTRFS_WORK_HELPER(name)                                       \
+-noinline_for_stack void btrfs_##name(struct work_struct *arg)         \
+-{                                                                     \
+-      struct btrfs_work *work = container_of(arg, struct btrfs_work,  \
+-                                             normal_work);            \
+-      normal_work_helper(work);                                       \
+-}
+-
+ struct btrfs_fs_info *
+ btrfs_workqueue_owner(const struct __btrfs_workqueue *wq)
+ {
+@@ -89,29 +79,6 @@ bool btrfs_workqueue_normal_congested(const struct btrfs_workqueue *wq)
+       return atomic_read(&wq->normal->pending) > wq->normal->thresh * 2;
+ }
+-BTRFS_WORK_HELPER(worker_helper);
+-BTRFS_WORK_HELPER(delalloc_helper);
+-BTRFS_WORK_HELPER(flush_delalloc_helper);
+-BTRFS_WORK_HELPER(cache_helper);
+-BTRFS_WORK_HELPER(submit_helper);
+-BTRFS_WORK_HELPER(fixup_helper);
+-BTRFS_WORK_HELPER(endio_helper);
+-BTRFS_WORK_HELPER(endio_meta_helper);
+-BTRFS_WORK_HELPER(endio_meta_write_helper);
+-BTRFS_WORK_HELPER(endio_raid56_helper);
+-BTRFS_WORK_HELPER(endio_repair_helper);
+-BTRFS_WORK_HELPER(rmw_helper);
+-BTRFS_WORK_HELPER(endio_write_helper);
+-BTRFS_WORK_HELPER(freespace_write_helper);
+-BTRFS_WORK_HELPER(delayed_meta_helper);
+-BTRFS_WORK_HELPER(readahead_helper);
+-BTRFS_WORK_HELPER(qgroup_rescan_helper);
+-BTRFS_WORK_HELPER(extent_refs_helper);
+-BTRFS_WORK_HELPER(scrub_helper);
+-BTRFS_WORK_HELPER(scrubwrc_helper);
+-BTRFS_WORK_HELPER(scrubnc_helper);
+-BTRFS_WORK_HELPER(scrubparity_helper);
+-
+ static struct __btrfs_workqueue *
+ __btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info, const char *name,
+                       unsigned int flags, int limit_active, int thresh)
+@@ -302,12 +269,13 @@ static void run_ordered_work(struct __btrfs_workqueue *wq,
+                        * original work item cannot depend on the recycled work
+                        * item in that case (see find_worker_executing_work()).
+                        *
+-                       * Note that the work of one Btrfs filesystem may depend
+-                       * on the work of another Btrfs filesystem via, e.g., a
+-                       * loop device. Therefore, we must not allow the current
+-                       * work item to be recycled until we are really done,
+-                       * otherwise we break the above assumption and can
+-                       * deadlock.
++                       * Note that different types of Btrfs work can depend on
++                       * each other, and one type of work on one Btrfs
++                       * filesystem may even depend on the same type of work
++                       * on another Btrfs filesystem via, e.g., a loop device.
++                       * Therefore, we must not allow the current work item to
++                       * be recycled until we are really done, otherwise we
++                       * break the above assumption and can deadlock.
+                        */
+                       free_self = true;
+               } else {
+@@ -331,8 +299,10 @@ static void run_ordered_work(struct __btrfs_workqueue *wq,
+       }
+ }
+-static void normal_work_helper(struct btrfs_work *work)
++static void btrfs_work_helper(struct work_struct *normal_work)
+ {
++      struct btrfs_work *work = container_of(normal_work, struct btrfs_work,
++                                             normal_work);
+       struct __btrfs_workqueue *wq;
+       void *wtag;
+       int need_order = 0;
+@@ -362,15 +332,13 @@ static void normal_work_helper(struct btrfs_work *work)
+               trace_btrfs_all_work_done(wq->fs_info, wtag);
+ }
+-void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t uniq_func,
+-                   btrfs_func_t func,
+-                   btrfs_func_t ordered_func,
+-                   btrfs_func_t ordered_free)
++void btrfs_init_work(struct btrfs_work *work, btrfs_func_t func,
++                   btrfs_func_t ordered_func, btrfs_func_t ordered_free)
+ {
+       work->func = func;
+       work->ordered_func = ordered_func;
+       work->ordered_free = ordered_free;
+-      INIT_WORK(&work->normal_work, uniq_func);
++      INIT_WORK(&work->normal_work, btrfs_work_helper);
+       INIT_LIST_HEAD(&work->ordered_list);
+       work->flags = 0;
+ }
+diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h
+index 7861c9feba5f..c5bf2b117c05 100644
+--- a/fs/btrfs/async-thread.h
++++ b/fs/btrfs/async-thread.h
+@@ -29,42 +29,13 @@ struct btrfs_work {
+       unsigned long flags;
+ };
+-#define BTRFS_WORK_HELPER_PROTO(name)                                 \
+-void btrfs_##name(struct work_struct *arg)
+-
+-BTRFS_WORK_HELPER_PROTO(worker_helper);
+-BTRFS_WORK_HELPER_PROTO(delalloc_helper);
+-BTRFS_WORK_HELPER_PROTO(flush_delalloc_helper);
+-BTRFS_WORK_HELPER_PROTO(cache_helper);
+-BTRFS_WORK_HELPER_PROTO(submit_helper);
+-BTRFS_WORK_HELPER_PROTO(fixup_helper);
+-BTRFS_WORK_HELPER_PROTO(endio_helper);
+-BTRFS_WORK_HELPER_PROTO(endio_meta_helper);
+-BTRFS_WORK_HELPER_PROTO(endio_meta_write_helper);
+-BTRFS_WORK_HELPER_PROTO(endio_raid56_helper);
+-BTRFS_WORK_HELPER_PROTO(endio_repair_helper);
+-BTRFS_WORK_HELPER_PROTO(rmw_helper);
+-BTRFS_WORK_HELPER_PROTO(endio_write_helper);
+-BTRFS_WORK_HELPER_PROTO(freespace_write_helper);
+-BTRFS_WORK_HELPER_PROTO(delayed_meta_helper);
+-BTRFS_WORK_HELPER_PROTO(readahead_helper);
+-BTRFS_WORK_HELPER_PROTO(qgroup_rescan_helper);
+-BTRFS_WORK_HELPER_PROTO(extent_refs_helper);
+-BTRFS_WORK_HELPER_PROTO(scrub_helper);
+-BTRFS_WORK_HELPER_PROTO(scrubwrc_helper);
+-BTRFS_WORK_HELPER_PROTO(scrubnc_helper);
+-BTRFS_WORK_HELPER_PROTO(scrubparity_helper);
+-
+-
+ struct btrfs_workqueue *btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info,
+                                             const char *name,
+                                             unsigned int flags,
+                                             int limit_active,
+                                             int thresh);
+-void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t helper,
+-                   btrfs_func_t func,
+-                   btrfs_func_t ordered_func,
+-                   btrfs_func_t ordered_free);
++void btrfs_init_work(struct btrfs_work *work, btrfs_func_t func,
++                   btrfs_func_t ordered_func, btrfs_func_t ordered_free);
+ void btrfs_queue_work(struct btrfs_workqueue *wq,
+                     struct btrfs_work *work);
+ void btrfs_destroy_workqueue(struct btrfs_workqueue *wq);
+diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
+index 0d2da2366869..7dcfa7d7632a 100644
+--- a/fs/btrfs/block-group.c
++++ b/fs/btrfs/block-group.c
+@@ -695,8 +695,7 @@ int btrfs_cache_block_group(struct btrfs_block_group_cache *cache,
+       caching_ctl->block_group = cache;
+       caching_ctl->progress = cache->key.objectid;
+       refcount_set(&caching_ctl->count, 1);
+-      btrfs_init_work(&caching_ctl->work, btrfs_cache_helper,
+-                      caching_thread, NULL, NULL);
++      btrfs_init_work(&caching_ctl->work, caching_thread, NULL, NULL);
+       spin_lock(&cache->lock);
+       /*
+diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
+index 57a9ad3e8c29..c7a53e79c66d 100644
+--- a/fs/btrfs/delayed-inode.c
++++ b/fs/btrfs/delayed-inode.c
+@@ -1367,8 +1367,8 @@ static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
+               return -ENOMEM;
+       async_work->delayed_root = delayed_root;
+-      btrfs_init_work(&async_work->work, btrfs_delayed_meta_helper,
+-                      btrfs_async_run_delayed_root, NULL, NULL);
++      btrfs_init_work(&async_work->work, btrfs_async_run_delayed_root, NULL,
++                      NULL);
+       async_work->nr = nr;
+       btrfs_queue_work(fs_info->delayed_workers, &async_work->work);
+diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
+index 3895c21853cc..bae334212ee2 100644
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -706,43 +706,31 @@ static void end_workqueue_bio(struct bio *bio)
+       struct btrfs_end_io_wq *end_io_wq = bio->bi_private;
+       struct btrfs_fs_info *fs_info;
+       struct btrfs_workqueue *wq;
+-      btrfs_work_func_t func;
+       fs_info = end_io_wq->info;
+       end_io_wq->status = bio->bi_status;
+       if (bio_op(bio) == REQ_OP_WRITE) {
+-              if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) {
++              if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA)
+                       wq = fs_info->endio_meta_write_workers;
+-                      func = btrfs_endio_meta_write_helper;
+-              } else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE) {
++              else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE)
+                       wq = fs_info->endio_freespace_worker;
+-                      func = btrfs_freespace_write_helper;
+-              } else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) {
++              else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56)
+                       wq = fs_info->endio_raid56_workers;
+-                      func = btrfs_endio_raid56_helper;
+-              } else {
++              else
+                       wq = fs_info->endio_write_workers;
+-                      func = btrfs_endio_write_helper;
+-              }
+       } else {
+-              if (unlikely(end_io_wq->metadata ==
+-                           BTRFS_WQ_ENDIO_DIO_REPAIR)) {
++              if (unlikely(end_io_wq->metadata == BTRFS_WQ_ENDIO_DIO_REPAIR))
+                       wq = fs_info->endio_repair_workers;
+-                      func = btrfs_endio_repair_helper;
+-              } else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) {
++              else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56)
+                       wq = fs_info->endio_raid56_workers;
+-                      func = btrfs_endio_raid56_helper;
+-              } else if (end_io_wq->metadata) {
++              else if (end_io_wq->metadata)
+                       wq = fs_info->endio_meta_workers;
+-                      func = btrfs_endio_meta_helper;
+-              } else {
++              else
+                       wq = fs_info->endio_workers;
+-                      func = btrfs_endio_helper;
+-              }
+       }
+-      btrfs_init_work(&end_io_wq->work, func, end_workqueue_fn, NULL, NULL);
++      btrfs_init_work(&end_io_wq->work, end_workqueue_fn, NULL, NULL);
+       btrfs_queue_work(wq, &end_io_wq->work);
+ }
+@@ -835,8 +823,8 @@ blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
+       async->mirror_num = mirror_num;
+       async->submit_bio_start = submit_bio_start;
+-      btrfs_init_work(&async->work, btrfs_worker_helper, run_one_async_start,
+-                      run_one_async_done, run_one_async_free);
++      btrfs_init_work(&async->work, run_one_async_start, run_one_async_done,
++                      run_one_async_free);
+       async->bio_offset = bio_offset;
+diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
+index bc6e7d15577a..dc14fc2e4206 100644
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -1268,10 +1268,8 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
+               async_chunk[i].write_flags = write_flags;
+               INIT_LIST_HEAD(&async_chunk[i].extents);
+-              btrfs_init_work(&async_chunk[i].work,
+-                              btrfs_delalloc_helper,
+-                              async_cow_start, async_cow_submit,
+-                              async_cow_free);
++              btrfs_init_work(&async_chunk[i].work, async_cow_start,
++                              async_cow_submit, async_cow_free);
+               nr_pages = DIV_ROUND_UP(cur_end - start, PAGE_SIZE);
+               atomic_add(nr_pages, &fs_info->async_delalloc_pages);
+@@ -2264,8 +2262,7 @@ int btrfs_writepage_cow_fixup(struct page *page, u64 start, u64 end)
+       SetPageChecked(page);
+       get_page(page);
+-      btrfs_init_work(&fixup->work, btrfs_fixup_helper,
+-                      btrfs_writepage_fixup_worker, NULL, NULL);
++      btrfs_init_work(&fixup->work, btrfs_writepage_fixup_worker, NULL, NULL);
+       fixup->page = page;
+       btrfs_queue_work(fs_info->fixup_workers, &fixup->work);
+       return -EBUSY;
+@@ -3258,7 +3255,6 @@ void btrfs_writepage_endio_finish_ordered(struct page *page, u64 start,
+       struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+       struct btrfs_ordered_extent *ordered_extent = NULL;
+       struct btrfs_workqueue *wq;
+-      btrfs_work_func_t func;
+       trace_btrfs_writepage_end_io_hook(page, start, end, uptodate);
+@@ -3267,16 +3263,12 @@ void btrfs_writepage_endio_finish_ordered(struct page *page, u64 start,
+                                           end - start + 1, uptodate))
+               return;
+-      if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
++      if (btrfs_is_free_space_inode(BTRFS_I(inode)))
+               wq = fs_info->endio_freespace_worker;
+-              func = btrfs_freespace_write_helper;
+-      } else {
++      else
+               wq = fs_info->endio_write_workers;
+-              func = btrfs_endio_write_helper;
+-      }
+-      btrfs_init_work(&ordered_extent->work, func, finish_ordered_fn, NULL,
+-                      NULL);
++      btrfs_init_work(&ordered_extent->work, finish_ordered_fn, NULL, NULL);
+       btrfs_queue_work(wq, &ordered_extent->work);
+ }
+@@ -8213,18 +8205,14 @@ static void __endio_write_update_ordered(struct inode *inode,
+       struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+       struct btrfs_ordered_extent *ordered = NULL;
+       struct btrfs_workqueue *wq;
+-      btrfs_work_func_t func;
+       u64 ordered_offset = offset;
+       u64 ordered_bytes = bytes;
+       u64 last_offset;
+-      if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
++      if (btrfs_is_free_space_inode(BTRFS_I(inode)))
+               wq = fs_info->endio_freespace_worker;
+-              func = btrfs_freespace_write_helper;
+-      } else {
++      else
+               wq = fs_info->endio_write_workers;
+-              func = btrfs_endio_write_helper;
+-      }
+       while (ordered_offset < offset + bytes) {
+               last_offset = ordered_offset;
+@@ -8232,9 +8220,8 @@ static void __endio_write_update_ordered(struct inode *inode,
+                                                          &ordered_offset,
+                                                          ordered_bytes,
+                                                          uptodate)) {
+-                      btrfs_init_work(&ordered->work, func,
+-                                      finish_ordered_fn,
+-                                      NULL, NULL);
++                      btrfs_init_work(&ordered->work, finish_ordered_fn, NULL,
++                                      NULL);
+                       btrfs_queue_work(wq, &ordered->work);
+               }
+               /*
+@@ -10119,8 +10106,7 @@ static struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode
+       init_completion(&work->completion);
+       INIT_LIST_HEAD(&work->list);
+       work->inode = inode;
+-      btrfs_init_work(&work->work, btrfs_flush_delalloc_helper,
+-                      btrfs_run_delalloc_work, NULL, NULL);
++      btrfs_init_work(&work->work, btrfs_run_delalloc_work, NULL, NULL);
+       return work;
+ }
+diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
+index 24b6c72b9a59..6240a5a1f2c0 100644
+--- a/fs/btrfs/ordered-data.c
++++ b/fs/btrfs/ordered-data.c
+@@ -547,7 +547,6 @@ u64 btrfs_wait_ordered_extents(struct btrfs_root *root, u64 nr,
+               spin_unlock(&root->ordered_extent_lock);
+               btrfs_init_work(&ordered->flush_work,
+-                              btrfs_flush_delalloc_helper,
+                               btrfs_run_ordered_extent_work, NULL, NULL);
+               list_add_tail(&ordered->work_list, &works);
+               btrfs_queue_work(fs_info->flush_workers, &ordered->flush_work);
+diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
+index 3ad151655eb8..27a903aaf43b 100644
+--- a/fs/btrfs/qgroup.c
++++ b/fs/btrfs/qgroup.c
+@@ -3280,7 +3280,6 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
+       memset(&fs_info->qgroup_rescan_work, 0,
+              sizeof(fs_info->qgroup_rescan_work));
+       btrfs_init_work(&fs_info->qgroup_rescan_work,
+-                      btrfs_qgroup_rescan_helper,
+                       btrfs_qgroup_rescan_worker, NULL, NULL);
+       return 0;
+ }
+diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
+index 57a2ac721985..8f47a85944eb 100644
+--- a/fs/btrfs/raid56.c
++++ b/fs/btrfs/raid56.c
+@@ -190,7 +190,7 @@ static void scrub_parity_work(struct btrfs_work *work);
+ static void start_async_work(struct btrfs_raid_bio *rbio, btrfs_func_t work_func)
+ {
+-      btrfs_init_work(&rbio->work, btrfs_rmw_helper, work_func, NULL, NULL);
++      btrfs_init_work(&rbio->work, work_func, NULL, NULL);
+       btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work);
+ }
+@@ -1743,8 +1743,7 @@ static void btrfs_raid_unplug(struct blk_plug_cb *cb, bool from_schedule)
+       plug = container_of(cb, struct btrfs_plug_cb, cb);
+       if (from_schedule) {
+-              btrfs_init_work(&plug->work, btrfs_rmw_helper,
+-                              unplug_work, NULL, NULL);
++              btrfs_init_work(&plug->work, unplug_work, NULL, NULL);
+               btrfs_queue_work(plug->info->rmw_workers,
+                                &plug->work);
+               return;
+diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
+index dd4f9c2b7107..1feaeadc8cf5 100644
+--- a/fs/btrfs/reada.c
++++ b/fs/btrfs/reada.c
+@@ -819,8 +819,7 @@ static void reada_start_machine(struct btrfs_fs_info *fs_info)
+               /* FIXME we cannot handle this properly right now */
+               BUG();
+       }
+-      btrfs_init_work(&rmw->work, btrfs_readahead_helper,
+-                      reada_start_machine_worker, NULL, NULL);
++      btrfs_init_work(&rmw->work, reada_start_machine_worker, NULL, NULL);
+       rmw->fs_info = fs_info;
+       btrfs_queue_work(fs_info->readahead_workers, &rmw->work);
+diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
+index a0770a6aee00..a7b043fd7a57 100644
+--- a/fs/btrfs/scrub.c
++++ b/fs/btrfs/scrub.c
+@@ -598,8 +598,8 @@ static noinline_for_stack struct scrub_ctx *scrub_setup_ctx(
+               sbio->index = i;
+               sbio->sctx = sctx;
+               sbio->page_count = 0;
+-              btrfs_init_work(&sbio->work, btrfs_scrub_helper,
+-                              scrub_bio_end_io_worker, NULL, NULL);
++              btrfs_init_work(&sbio->work, scrub_bio_end_io_worker, NULL,
++                              NULL);
+               if (i != SCRUB_BIOS_PER_SCTX - 1)
+                       sctx->bios[i]->next_free = i + 1;
+@@ -1720,8 +1720,7 @@ static void scrub_wr_bio_end_io(struct bio *bio)
+       sbio->status = bio->bi_status;
+       sbio->bio = bio;
+-      btrfs_init_work(&sbio->work, btrfs_scrubwrc_helper,
+-                       scrub_wr_bio_end_io_worker, NULL, NULL);
++      btrfs_init_work(&sbio->work, scrub_wr_bio_end_io_worker, NULL, NULL);
+       btrfs_queue_work(fs_info->scrub_wr_completion_workers, &sbio->work);
+ }
+@@ -2203,8 +2202,7 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock)
+               raid56_add_scrub_pages(rbio, spage->page, spage->logical);
+       }
+-      btrfs_init_work(&sblock->work, btrfs_scrub_helper,
+-                      scrub_missing_raid56_worker, NULL, NULL);
++      btrfs_init_work(&sblock->work, scrub_missing_raid56_worker, NULL, NULL);
+       scrub_block_get(sblock);
+       scrub_pending_bio_inc(sctx);
+       raid56_submit_missing_rbio(rbio);
+@@ -2742,8 +2740,8 @@ static void scrub_parity_bio_endio(struct bio *bio)
+       bio_put(bio);
+-      btrfs_init_work(&sparity->work, btrfs_scrubparity_helper,
+-                      scrub_parity_bio_endio_worker, NULL, NULL);
++      btrfs_init_work(&sparity->work, scrub_parity_bio_endio_worker, NULL,
++                      NULL);
+       btrfs_queue_work(fs_info->scrub_parity_workers, &sparity->work);
+ }
+diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
+index e04409f85063..d8d7b1ee83ca 100644
+--- a/fs/btrfs/volumes.c
++++ b/fs/btrfs/volumes.c
+@@ -6676,8 +6676,7 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
+       else
+               generate_random_uuid(dev->uuid);
+-      btrfs_init_work(&dev->work, btrfs_submit_helper,
+-                      pending_bios_fn, NULL, NULL);
++      btrfs_init_work(&dev->work, pending_bios_fn, NULL, NULL);
+       return dev;
+ }
+-- 
+2.20.1
+
diff --git a/queue-5.4/btrfs-only-associate-the-locked-page-with-one-async_.patch b/queue-5.4/btrfs-only-associate-the-locked-page-with-one-async_.patch
new file mode 100644 (file)
index 0000000..4dbd969
--- /dev/null
@@ -0,0 +1,190 @@
+From b2ff5cd6ecab58eb1374bc235d8c19cd937011a6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 10 Jul 2019 12:28:16 -0700
+Subject: Btrfs: only associate the locked page with one async_chunk struct
+
+From: Chris Mason <clm@fb.com>
+
+[ Upstream commit 1d53c9e6723022b12e4a5ed4b141f67c834b7f6f ]
+
+The btrfs writepages function collects a large range of pages flagged
+for delayed allocation, and then sends them down through the COW code
+for processing.  When compression is on, we allocate one async_chunk
+structure for every 512K, and then run those pages through the
+compression code for IO submission.
+
+writepages starts all of this off with a single page, locked by the
+original call to extent_write_cache_pages(), and it's important to keep
+track of this page because it has already been through
+clear_page_dirty_for_io().
+
+The btrfs async_chunk struct has a pointer to the locked_page, and when
+we're redirtying the page because compression had to fallback to
+uncompressed IO, we use page->index to decide if a given async_chunk
+struct really owns that page.
+
+But, this is racey.  If a given delalloc range is broken up into two
+async_chunks (chunkA and chunkB), we can end up with something like
+this:
+
+ compress_file_range(chunkA)
+ submit_compress_extents(chunkA)
+ submit compressed bios(chunkA)
+ put_page(locked_page)
+
+                                compress_file_range(chunkB)
+                                ...
+
+Or:
+
+ async_cow_submit
+  submit_compressed_extents <--- falls back to buffered writeout
+   cow_file_range
+    extent_clear_unlock_delalloc
+     __process_pages_contig
+       put_page(locked_pages)
+
+                                           async_cow_submit
+
+The end result is that chunkA is completed and cleaned up before chunkB
+even starts processing.  This means we can free locked_page() and reuse
+it elsewhere.  If we get really lucky, it'll have the same page->index
+in its new home as it did before.
+
+While we're processing chunkB, we might decide we need to fall back to
+uncompressed IO, and so compress_file_range() will call
+__set_page_dirty_nobufers() on chunkB->locked_page.
+
+Without cgroups in use, this creates as a phantom dirty page, which
+isn't great but isn't the end of the world. What can happen, it can go
+through the fixup worker and the whole COW machinery again:
+
+in submit_compressed_extents():
+  while (async extents) {
+  ...
+    cow_file_range
+    if (!page_started ...)
+      extent_write_locked_range
+    else if (...)
+      unlock_page
+    continue;
+
+This hasn't been observed in practice but is still possible.
+
+With cgroups in use, we might crash in the accounting code because
+page->mapping->i_wb isn't set.
+
+  BUG: unable to handle kernel NULL pointer dereference at 00000000000000d0
+  IP: percpu_counter_add_batch+0x11/0x70
+  PGD 66534e067 P4D 66534e067 PUD 66534f067 PMD 0
+  Oops: 0000 [#1] SMP DEBUG_PAGEALLOC
+  CPU: 16 PID: 2172 Comm: rm Not tainted
+  RIP: 0010:percpu_counter_add_batch+0x11/0x70
+  RSP: 0018:ffffc9000a97bbe0 EFLAGS: 00010286
+  RAX: 0000000000000005 RBX: 0000000000000090 RCX: 0000000000026115
+  RDX: 0000000000000030 RSI: ffffffffffffffff RDI: 0000000000000090
+  RBP: 0000000000000000 R08: fffffffffffffff5 R09: 0000000000000000
+  R10: 00000000000260c0 R11: ffff881037fc26c0 R12: ffffffffffffffff
+  R13: ffff880fe4111548 R14: ffffc9000a97bc90 R15: 0000000000000001
+  FS:  00007f5503ced480(0000) GS:ffff880ff7200000(0000) knlGS:0000000000000000
+  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+  CR2: 00000000000000d0 CR3: 00000001e0459005 CR4: 0000000000360ee0
+  DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+  DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+  Call Trace:
+   account_page_cleaned+0x15b/0x1f0
+   __cancel_dirty_page+0x146/0x200
+   truncate_cleanup_page+0x92/0xb0
+   truncate_inode_pages_range+0x202/0x7d0
+   btrfs_evict_inode+0x92/0x5a0
+   evict+0xc1/0x190
+   do_unlinkat+0x176/0x280
+   do_syscall_64+0x63/0x1a0
+   entry_SYSCALL_64_after_hwframe+0x42/0xb7
+
+The fix here is to make asyc_chunk->locked_page NULL everywhere but the
+one async_chunk struct that's allowed to do things to the locked page.
+
+Link: https://lore.kernel.org/linux-btrfs/c2419d01-5c84-3fb4-189e-4db519d08796@suse.com/
+Fixes: 771ed689d2cd ("Btrfs: Optimize compressed writeback and reads")
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: Chris Mason <clm@fb.com>
+[ update changelog from mail thread discussion ]
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/extent_io.c |  2 +-
+ fs/btrfs/inode.c     | 25 +++++++++++++++++++++----
+ 2 files changed, 22 insertions(+), 5 deletions(-)
+
+diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
+index be9dc78aa727..33c6b191ca59 100644
+--- a/fs/btrfs/extent_io.c
++++ b/fs/btrfs/extent_io.c
+@@ -1899,7 +1899,7 @@ static int __process_pages_contig(struct address_space *mapping,
+                       if (page_ops & PAGE_SET_PRIVATE2)
+                               SetPagePrivate2(pages[i]);
+-                      if (pages[i] == locked_page) {
++                      if (locked_page && pages[i] == locked_page) {
+                               put_page(pages[i]);
+                               pages_locked++;
+                               continue;
+diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
+index dc14fc2e4206..0b2758961b1c 100644
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -712,10 +712,12 @@ cleanup_and_bail_uncompressed:
+        * to our extent and set things up for the async work queue to run
+        * cow_file_range to do the normal delalloc dance.
+        */
+-      if (page_offset(async_chunk->locked_page) >= start &&
+-          page_offset(async_chunk->locked_page) <= end)
++      if (async_chunk->locked_page &&
++          (page_offset(async_chunk->locked_page) >= start &&
++           page_offset(async_chunk->locked_page)) <= end) {
+               __set_page_dirty_nobuffers(async_chunk->locked_page);
+               /* unlocked later on in the async handlers */
++      }
+       if (redirty)
+               extent_range_redirty_for_io(inode, start, end);
+@@ -795,7 +797,7 @@ retry:
+                                                 async_extent->start +
+                                                 async_extent->ram_size - 1,
+                                                 WB_SYNC_ALL);
+-                      else if (ret)
++                      else if (ret && async_chunk->locked_page)
+                               unlock_page(async_chunk->locked_page);
+                       kfree(async_extent);
+                       cond_resched();
+@@ -1264,10 +1266,25 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
+               async_chunk[i].inode = inode;
+               async_chunk[i].start = start;
+               async_chunk[i].end = cur_end;
+-              async_chunk[i].locked_page = locked_page;
+               async_chunk[i].write_flags = write_flags;
+               INIT_LIST_HEAD(&async_chunk[i].extents);
++              /*
++               * The locked_page comes all the way from writepage and its
++               * the original page we were actually given.  As we spread
++               * this large delalloc region across multiple async_chunk
++               * structs, only the first struct needs a pointer to locked_page
++               *
++               * This way we don't need racey decisions about who is supposed
++               * to unlock it.
++               */
++              if (locked_page) {
++                      async_chunk[i].locked_page = locked_page;
++                      locked_page = NULL;
++              } else {
++                      async_chunk[i].locked_page = NULL;
++              }
++
+               btrfs_init_work(&async_chunk[i].work, async_cow_start,
+                               async_cow_submit, async_cow_free);
+-- 
+2.20.1
+
diff --git a/queue-5.4/drm-i915-execlists-fix-annotation-for-decoupling-vir.patch b/queue-5.4/drm-i915-execlists-fix-annotation-for-decoupling-vir.patch
new file mode 100644 (file)
index 0000000..49aeca5
--- /dev/null
@@ -0,0 +1,131 @@
+From a2e6f02f652ac665c135fba479c66440e4c2336c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 4 Oct 2019 20:47:58 +0100
+Subject: drm/i915/execlists: Fix annotation for decoupling virtual request
+
+From: Chris Wilson <chris@chris-wilson.co.uk>
+
+[ Upstream commit 08ad9a3846fc72b047b110b36d162ffbcf298fa2 ]
+
+As we may signal a request and take the engine->active.lock within the
+signaler, the engine submission paths have to use a nested annotation on
+their requests -- but we guarantee that we can never submit on the same
+engine as the signaling fence.
+
+<4>[  723.763281] WARNING: possible circular locking dependency detected
+<4>[  723.763285] 5.3.0-g80fa0e042cdb-drmtip_379+ #1 Tainted: G     U
+<4>[  723.763288] ------------------------------------------------------
+<4>[  723.763291] gem_exec_await/1388 is trying to acquire lock:
+<4>[  723.763294] ffff93a7b53221d8 (&engine->active.lock){..-.}, at: execlists_submit_request+0x2b/0x1e0 [i915]
+<4>[  723.763378]
+                  but task is already holding lock:
+<4>[  723.763381] ffff93a7c25f6d20 (&i915_request_get(rq)->submit/1){-.-.}, at: __i915_sw_fence_complete+0x1b2/0x250 [i915]
+<4>[  723.763420]
+                  which lock already depends on the new lock.
+
+<4>[  723.763423]
+                  the existing dependency chain (in reverse order) is:
+<4>[  723.763427]
+                  -> #2 (&i915_request_get(rq)->submit/1){-.-.}:
+<4>[  723.763434]        _raw_spin_lock_irqsave_nested+0x39/0x50
+<4>[  723.763478]        __i915_sw_fence_complete+0x1b2/0x250 [i915]
+<4>[  723.763513]        intel_engine_breadcrumbs_irq+0x3aa/0x5e0 [i915]
+<4>[  723.763600]        cs_irq_handler+0x49/0x50 [i915]
+<4>[  723.763659]        gen11_gt_irq_handler+0x17b/0x280 [i915]
+<4>[  723.763690]        gen11_irq_handler+0x54/0xf0 [i915]
+<4>[  723.763695]        __handle_irq_event_percpu+0x41/0x2d0
+<4>[  723.763699]        handle_irq_event_percpu+0x2b/0x70
+<4>[  723.763702]        handle_irq_event+0x2f/0x50
+<4>[  723.763706]        handle_edge_irq+0xee/0x1a0
+<4>[  723.763709]        do_IRQ+0x7e/0x160
+<4>[  723.763712]        ret_from_intr+0x0/0x1d
+<4>[  723.763717]        __slab_alloc.isra.28.constprop.33+0x4f/0x70
+<4>[  723.763720]        kmem_cache_alloc+0x28d/0x2f0
+<4>[  723.763724]        vm_area_dup+0x15/0x40
+<4>[  723.763727]        dup_mm+0x2dd/0x550
+<4>[  723.763730]        copy_process+0xf21/0x1ef0
+<4>[  723.763734]        _do_fork+0x71/0x670
+<4>[  723.763737]        __se_sys_clone+0x6e/0xa0
+<4>[  723.763741]        do_syscall_64+0x4f/0x210
+<4>[  723.763744]        entry_SYSCALL_64_after_hwframe+0x49/0xbe
+<4>[  723.763747]
+                  -> #1 (&(&rq->lock)->rlock#2){-.-.}:
+<4>[  723.763752]        _raw_spin_lock+0x2a/0x40
+<4>[  723.763789]        __unwind_incomplete_requests+0x3eb/0x450 [i915]
+<4>[  723.763825]        __execlists_submission_tasklet+0x9ec/0x1d60 [i915]
+<4>[  723.763864]        execlists_submission_tasklet+0x34/0x50 [i915]
+<4>[  723.763874]        tasklet_action_common.isra.5+0x47/0xb0
+<4>[  723.763878]        __do_softirq+0xd8/0x4ae
+<4>[  723.763881]        irq_exit+0xa9/0xc0
+<4>[  723.763883]        smp_apic_timer_interrupt+0xb7/0x280
+<4>[  723.763887]        apic_timer_interrupt+0xf/0x20
+<4>[  723.763892]        cpuidle_enter_state+0xae/0x450
+<4>[  723.763895]        cpuidle_enter+0x24/0x40
+<4>[  723.763899]        do_idle+0x1e7/0x250
+<4>[  723.763902]        cpu_startup_entry+0x14/0x20
+<4>[  723.763905]        start_secondary+0x15f/0x1b0
+<4>[  723.763908]        secondary_startup_64+0xa4/0xb0
+<4>[  723.763911]
+                  -> #0 (&engine->active.lock){..-.}:
+<4>[  723.763916]        __lock_acquire+0x15d8/0x1ea0
+<4>[  723.763919]        lock_acquire+0xa6/0x1c0
+<4>[  723.763922]        _raw_spin_lock_irqsave+0x33/0x50
+<4>[  723.763956]        execlists_submit_request+0x2b/0x1e0 [i915]
+<4>[  723.764002]        submit_notify+0xa8/0x13c [i915]
+<4>[  723.764035]        __i915_sw_fence_complete+0x81/0x250 [i915]
+<4>[  723.764054]        i915_sw_fence_wake+0x51/0x64 [i915]
+<4>[  723.764054]        __i915_sw_fence_complete+0x1ee/0x250 [i915]
+<4>[  723.764054]        dma_i915_sw_fence_wake_timer+0x14/0x20 [i915]
+<4>[  723.764054]        dma_fence_signal_locked+0x9e/0x1c0
+<4>[  723.764054]        dma_fence_signal+0x1f/0x40
+<4>[  723.764054]        vgem_fence_signal_ioctl+0x67/0xc0 [vgem]
+<4>[  723.764054]        drm_ioctl_kernel+0x83/0xf0
+<4>[  723.764054]        drm_ioctl+0x2f3/0x3b0
+<4>[  723.764054]        do_vfs_ioctl+0xa0/0x6f0
+<4>[  723.764054]        ksys_ioctl+0x35/0x60
+<4>[  723.764054]        __x64_sys_ioctl+0x11/0x20
+<4>[  723.764054]        do_syscall_64+0x4f/0x210
+<4>[  723.764054]        entry_SYSCALL_64_after_hwframe+0x49/0xbe
+<4>[  723.764054]
+                  other info that might help us debug this:
+
+<4>[  723.764054] Chain exists of:
+                    &engine->active.lock --> &(&rq->lock)->rlock#2 --> &i915_request_get(rq)->submit/1
+
+<4>[  723.764054]  Possible unsafe locking scenario:
+
+<4>[  723.764054]        CPU0                    CPU1
+<4>[  723.764054]        ----                    ----
+<4>[  723.764054]   lock(&i915_request_get(rq)->submit/1);
+<4>[  723.764054]                                lock(&(&rq->lock)->rlock#2);
+<4>[  723.764054]                                lock(&i915_request_get(rq)->submit/1);
+<4>[  723.764054]   lock(&engine->active.lock);
+<4>[  723.764054]
+                   *** DEADLOCK ***
+
+Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111862
+Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20191004194758.19679-1-chris@chris-wilson.co.uk
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/i915/gt/intel_lrc.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
+index 06a506c29463..d564bfcab6a3 100644
+--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
++++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
+@@ -525,7 +525,8 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
+                        */
+                       if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
+                                    &rq->fence.flags)) {
+-                              spin_lock(&rq->lock);
++                              spin_lock_nested(&rq->lock,
++                                               SINGLE_DEPTH_NESTING);
+                               i915_request_cancel_breadcrumb(rq);
+                               spin_unlock(&rq->lock);
+                       }
+-- 
+2.20.1
+
diff --git a/queue-5.4/efi-don-t-attempt-to-map-rci2-config-table-if-it-doe.patch b/queue-5.4/efi-don-t-attempt-to-map-rci2-config-table-if-it-doe.patch
new file mode 100644 (file)
index 0000000..c581786
--- /dev/null
@@ -0,0 +1,72 @@
+From b6d34c969bf5712aa744486463eed1c7889ae67f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 10 Dec 2019 10:09:45 +0100
+Subject: efi: Don't attempt to map RCI2 config table if it doesn't exist
+
+From: Ard Biesheuvel <ardb@kernel.org>
+
+[ Upstream commit a470552ee8965da0fe6fd4df0aa39c4cda652c7c ]
+
+Commit:
+
+  1c5fecb61255aa12 ("efi: Export Runtime Configuration Interface table to sysfs")
+
+... added support for a Dell specific UEFI configuration table, but
+failed to take into account that mapping the table should not be
+attempted unless the table actually exists. If it doesn't exist,
+the code usually fails silently unless pr_debug() prints are
+enabled. However, on 32-bit PAE x86, the splat below is produced due
+to the attempt to map the placeholder value EFI_INVALID_TABLE_ADDR
+which we use for non-existing UEFI configuration tables, and which
+equals ULONG_MAX.
+
+   memremap attempted on mixed range 0x00000000ffffffff size: 0x1e
+   WARNING: CPU: 1 PID: 1 at kernel/iomem.c:81 memremap+0x1a3/0x1c0
+   Modules linked in:
+   CPU: 1 PID: 1 Comm: swapper/0 Not tainted 5.4.2-smp-mine #1
+   Hardware name: Hewlett-Packard HP Z400 Workstation/0B4Ch, BIOS 786G3 v03.61 03/05/2018
+   EIP: memremap+0x1a3/0x1c0
+  ...
+   Call Trace:
+    ? map_properties+0x473/0x473
+    ? efi_rci2_sysfs_init+0x2c/0x154
+    ? map_properties+0x473/0x473
+    ? do_one_initcall+0x49/0x1d4
+    ? parse_args+0x1e8/0x2a0
+    ? do_early_param+0x7a/0x7a
+    ? kernel_init_freeable+0x139/0x1c2
+    ? rest_init+0x8e/0x8e
+    ? kernel_init+0xd/0xf2
+    ? ret_from_fork+0x2e/0x38
+
+Fix this by checking whether the table exists before attempting to map it.
+
+Reported-by: Richard Narron <comet.berkeley@gmail.com>
+Tested-by: Richard Narron <comet.berkeley@gmail.com>
+Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
+Cc: linux-efi@vger.kernel.org
+Fixes: 1c5fecb61255aa12 ("efi: Export Runtime Configuration Interface table to sysfs")
+Link: https://lkml.kernel.org/r/20191210090945.11501-2-ardb@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/firmware/efi/rci2-table.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/drivers/firmware/efi/rci2-table.c b/drivers/firmware/efi/rci2-table.c
+index 76b0c354a027..de1a9a1f9f14 100644
+--- a/drivers/firmware/efi/rci2-table.c
++++ b/drivers/firmware/efi/rci2-table.c
+@@ -81,6 +81,9 @@ static int __init efi_rci2_sysfs_init(void)
+       struct kobject *tables_kobj;
+       int ret = -ENOMEM;
++      if (rci2_table_phys == EFI_INVALID_TABLE_ADDR)
++              return 0;
++
+       rci2_base = memremap(rci2_table_phys,
+                            sizeof(struct rci2_table_global_hdr),
+                            MEMREMAP_WB);
+-- 
+2.20.1
+
diff --git a/queue-5.4/hsr-avoid-debugfs-warning-message-when-module-is-rem.patch b/queue-5.4/hsr-avoid-debugfs-warning-message-when-module-is-rem.patch
new file mode 100644 (file)
index 0000000..0b75e64
--- /dev/null
@@ -0,0 +1,103 @@
+From 529aff430f3aba5d90ad2d6995674fdce1e041ef Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 22 Dec 2019 11:25:27 +0000
+Subject: hsr: avoid debugfs warning message when module is remove
+
+From: Taehee Yoo <ap420073@gmail.com>
+
+[ Upstream commit 84bb59d773853bc2dda2ac1ef8474c40eb33a3c6 ]
+
+When hsr module is being removed, debugfs_remove() is called to remove
+both debugfs directory and file.
+
+When module is being removed, module state is changed to
+MODULE_STATE_GOING then exit() is called.
+At this moment, module couldn't be held so try_module_get()
+will be failed.
+
+debugfs's open() callback tries to hold the module if .owner is existing.
+If it fails, warning message is printed.
+
+CPU0                           CPU1
+delete_module()
+    try_stop_module()
+    hsr_exit()                 open() <-- WARNING
+        debugfs_remove()
+
+In order to avoid the warning message, this patch makes hsr module does
+not set .owner. Unsetting .owner is safe because these are protected by
+inode_lock().
+
+Test commands:
+    #SHELL1
+    ip link add dummy0 type dummy
+    ip link add dummy1 type dummy
+    while :
+    do
+        ip link add hsr0 type hsr slave1 dummy0 slave2 dummy1
+       modprobe -rv hsr
+    done
+
+    #SHELL2
+    while :
+    do
+        cat /sys/kernel/debug/hsr0/node_table
+    done
+
+Splat looks like:
+[  101.223783][ T1271] ------------[ cut here ]------------
+[  101.230309][ T1271] debugfs file owner did not clean up at exit: node_table
+[  101.230380][ T1271] WARNING: CPU: 3 PID: 1271 at fs/debugfs/file.c:309 full_proxy_open+0x10f/0x650
+[  101.233153][ T1271] Modules linked in: hsr(-) dummy veth openvswitch nsh nf_conncount nf_nat nf_conntrack nf_d]
+[  101.237112][ T1271] CPU: 3 PID: 1271 Comm: cat Tainted: G        W         5.5.0-rc1+ #204
+[  101.238270][ T1271] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
+[  101.240379][ T1271] RIP: 0010:full_proxy_open+0x10f/0x650
+[  101.241166][ T1271] Code: 48 c1 ea 03 80 3c 02 00 0f 85 c1 04 00 00 49 8b 3c 24 e8 04 86 7e ff 84 c0 75 2d 4c 8
+[  101.251985][ T1271] RSP: 0018:ffff8880ca22fa38 EFLAGS: 00010286
+[  101.273355][ T1271] RAX: dffffc0000000008 RBX: ffff8880cc6e6200 RCX: 0000000000000000
+[  101.274466][ T1271] RDX: 0000000000000000 RSI: 0000000000000006 RDI: ffff8880c4dd5c14
+[  101.275581][ T1271] RBP: 0000000000000000 R08: fffffbfff2922f5d R09: 0000000000000000
+[  101.276733][ T1271] R10: 0000000000000001 R11: 0000000000000000 R12: ffffffffc0551bc0
+[  101.277853][ T1271] R13: ffff8880c4059a48 R14: ffff8880be50a5e0 R15: ffffffff941adaa0
+[  101.278956][ T1271] FS:  00007f8871cda540(0000) GS:ffff8880da800000(0000) knlGS:0000000000000000
+[  101.280216][ T1271] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[  101.282832][ T1271] CR2: 00007f88717cfd10 CR3: 00000000b9440005 CR4: 00000000000606e0
+[  101.283974][ T1271] Call Trace:
+[  101.285328][ T1271]  do_dentry_open+0x63c/0xf50
+[  101.286077][ T1271]  ? open_proxy_open+0x270/0x270
+[  101.288271][ T1271]  ? __x64_sys_fchdir+0x180/0x180
+[  101.288987][ T1271]  ? inode_permission+0x65/0x390
+[  101.289682][ T1271]  path_openat+0x701/0x2810
+[  101.290294][ T1271]  ? path_lookupat+0x880/0x880
+[  101.290957][ T1271]  ? check_chain_key+0x236/0x5d0
+[  101.291676][ T1271]  ? __lock_acquire+0xdfe/0x3de0
+[  101.292358][ T1271]  ? sched_clock+0x5/0x10
+[  101.292962][ T1271]  ? sched_clock_cpu+0x18/0x170
+[  101.293644][ T1271]  ? find_held_lock+0x39/0x1d0
+[  101.305616][ T1271]  do_filp_open+0x17a/0x270
+[  101.306061][ T1271]  ? may_open_dev+0xc0/0xc0
+[ ... ]
+
+Fixes: fc4ecaeebd26 ("net: hsr: add debugfs support for display node list")
+Signed-off-by: Taehee Yoo <ap420073@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/hsr/hsr_debugfs.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/net/hsr/hsr_debugfs.c b/net/hsr/hsr_debugfs.c
+index 94447974a3c0..6135706f03d5 100644
+--- a/net/hsr/hsr_debugfs.c
++++ b/net/hsr/hsr_debugfs.c
+@@ -64,7 +64,6 @@ hsr_node_table_open(struct inode *inode, struct file *filp)
+ }
+ static const struct file_operations hsr_fops = {
+-      .owner  = THIS_MODULE,
+       .open   = hsr_node_table_open,
+       .read   = seq_read,
+       .llseek = seq_lseek,
+-- 
+2.20.1
+
diff --git a/queue-5.4/hsr-fix-a-race-condition-in-node-list-insertion-and-.patch b/queue-5.4/hsr-fix-a-race-condition-in-node-list-insertion-and-.patch
new file mode 100644 (file)
index 0000000..f970794
--- /dev/null
@@ -0,0 +1,368 @@
+From b5bd241db1f7df2a9b3f1f57d1ea66770f0bbf46 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 22 Dec 2019 11:26:54 +0000
+Subject: hsr: fix a race condition in node list insertion and deletion
+
+From: Taehee Yoo <ap420073@gmail.com>
+
+[ Upstream commit 92a35678ec075100ce666a2fb6969151affb0e5d ]
+
+hsr nodes are protected by RCU and there is no write side lock.
+But node insertions and deletions could be being operated concurrently.
+So write side locking is needed.
+
+Test commands:
+    ip netns add nst
+    ip link add veth0 type veth peer name veth1
+    ip link add veth2 type veth peer name veth3
+    ip link set veth1 netns nst
+    ip link set veth3 netns nst
+    ip link set veth0 up
+    ip link set veth2 up
+    ip link add hsr0 type hsr slave1 veth0 slave2 veth2
+    ip a a 192.168.100.1/24 dev hsr0
+    ip link set hsr0 up
+    ip netns exec nst ip link set veth1 up
+    ip netns exec nst ip link set veth3 up
+    ip netns exec nst ip link add hsr1 type hsr slave1 veth1 slave2 veth3
+    ip netns exec nst ip a a 192.168.100.2/24 dev hsr1
+    ip netns exec nst ip link set hsr1 up
+
+    for i in {0..9}
+    do
+        for j in {0..9}
+       do
+           for k in {0..9}
+           do
+               for l in {0..9}
+               do
+               arping 192.168.100.2 -I hsr0 -s 00:01:3$i:4$j:5$k:6$l -c1 &
+               done
+           done
+       done
+    done
+
+Splat looks like:
+[  236.066091][ T3286] list_add corruption. next->prev should be prev (ffff8880a5940300), but was ffff8880a5940d0.
+[  236.069617][ T3286] ------------[ cut here ]------------
+[  236.070545][ T3286] kernel BUG at lib/list_debug.c:25!
+[  236.071391][ T3286] invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI
+[  236.072343][ T3286] CPU: 0 PID: 3286 Comm: arping Tainted: G        W         5.5.0-rc1+ #209
+[  236.073463][ T3286] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
+[  236.074695][ T3286] RIP: 0010:__list_add_valid+0x74/0xd0
+[  236.075499][ T3286] Code: 48 39 da 75 27 48 39 f5 74 36 48 39 dd 74 31 48 83 c4 08 b8 01 00 00 00 5b 5d c3 48 b
+[  236.078277][ T3286] RSP: 0018:ffff8880aaa97648 EFLAGS: 00010286
+[  236.086991][ T3286] RAX: 0000000000000075 RBX: ffff8880d4624c20 RCX: 0000000000000000
+[  236.088000][ T3286] RDX: 0000000000000075 RSI: 0000000000000008 RDI: ffffed1015552ebf
+[  236.098897][ T3286] RBP: ffff88809b53d200 R08: ffffed101b3c04f9 R09: ffffed101b3c04f9
+[  236.099960][ T3286] R10: 00000000308769a1 R11: ffffed101b3c04f8 R12: ffff8880d4624c28
+[  236.100974][ T3286] R13: ffff8880d4624c20 R14: 0000000040310100 R15: ffff8880ce17ee02
+[  236.138967][ T3286] FS:  00007f23479fa680(0000) GS:ffff8880d9c00000(0000) knlGS:0000000000000000
+[  236.144852][ T3286] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[  236.145720][ T3286] CR2: 00007f4a14bab210 CR3: 00000000a61c6001 CR4: 00000000000606f0
+[  236.146776][ T3286] Call Trace:
+[  236.147222][ T3286]  hsr_add_node+0x314/0x490 [hsr]
+[  236.153633][ T3286]  hsr_forward_skb+0x2b6/0x1bc0 [hsr]
+[  236.154362][ T3286]  ? rcu_read_lock_sched_held+0x90/0xc0
+[  236.155091][ T3286]  ? rcu_read_lock_bh_held+0xa0/0xa0
+[  236.156607][ T3286]  hsr_dev_xmit+0x70/0xd0 [hsr]
+[  236.157254][ T3286]  dev_hard_start_xmit+0x160/0x740
+[  236.157941][ T3286]  __dev_queue_xmit+0x1961/0x2e10
+[  236.158565][ T3286]  ? netdev_core_pick_tx+0x2e0/0x2e0
+[ ... ]
+
+Reported-by: syzbot+3924327f9ad5f4d2b343@syzkaller.appspotmail.com
+Fixes: f421436a591d ("net/hsr: Add support for the High-availability Seamless Redundancy protocol (HSRv0)")
+Signed-off-by: Taehee Yoo <ap420073@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/hsr/hsr_device.c   |  7 ++--
+ net/hsr/hsr_framereg.c | 73 ++++++++++++++++++++++++++----------------
+ net/hsr/hsr_framereg.h |  6 ++--
+ net/hsr/hsr_main.c     |  2 +-
+ net/hsr/hsr_main.h     |  5 +--
+ 5 files changed, 56 insertions(+), 37 deletions(-)
+
+diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
+index e73549075a03..62c03f0d0079 100644
+--- a/net/hsr/hsr_device.c
++++ b/net/hsr/hsr_device.c
+@@ -368,7 +368,7 @@ static void hsr_dev_destroy(struct net_device *hsr_dev)
+       del_timer_sync(&hsr->prune_timer);
+       del_timer_sync(&hsr->announce_timer);
+-      hsr_del_self_node(&hsr->self_node_db);
++      hsr_del_self_node(hsr);
+       hsr_del_nodes(&hsr->node_db);
+ }
+@@ -440,11 +440,12 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
+       INIT_LIST_HEAD(&hsr->ports);
+       INIT_LIST_HEAD(&hsr->node_db);
+       INIT_LIST_HEAD(&hsr->self_node_db);
++      spin_lock_init(&hsr->list_lock);
+       ether_addr_copy(hsr_dev->dev_addr, slave[0]->dev_addr);
+       /* Make sure we recognize frames from ourselves in hsr_rcv() */
+-      res = hsr_create_self_node(&hsr->self_node_db, hsr_dev->dev_addr,
++      res = hsr_create_self_node(hsr, hsr_dev->dev_addr,
+                                  slave[1]->dev_addr);
+       if (res < 0)
+               return res;
+@@ -502,7 +503,7 @@ err_unregister:
+       list_for_each_entry_safe(port, tmp, &hsr->ports, port_list)
+               hsr_del_port(port);
+ err_add_master:
+-      hsr_del_self_node(&hsr->self_node_db);
++      hsr_del_self_node(hsr);
+       return res;
+ }
+diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
+index 292be446007b..27dc65d7de67 100644
+--- a/net/hsr/hsr_framereg.c
++++ b/net/hsr/hsr_framereg.c
+@@ -75,10 +75,11 @@ static struct hsr_node *find_node_by_addr_A(struct list_head *node_db,
+ /* Helper for device init; the self_node_db is used in hsr_rcv() to recognize
+  * frames from self that's been looped over the HSR ring.
+  */
+-int hsr_create_self_node(struct list_head *self_node_db,
++int hsr_create_self_node(struct hsr_priv *hsr,
+                        unsigned char addr_a[ETH_ALEN],
+                        unsigned char addr_b[ETH_ALEN])
+ {
++      struct list_head *self_node_db = &hsr->self_node_db;
+       struct hsr_node *node, *oldnode;
+       node = kmalloc(sizeof(*node), GFP_KERNEL);
+@@ -88,33 +89,33 @@ int hsr_create_self_node(struct list_head *self_node_db,
+       ether_addr_copy(node->macaddress_A, addr_a);
+       ether_addr_copy(node->macaddress_B, addr_b);
+-      rcu_read_lock();
++      spin_lock_bh(&hsr->list_lock);
+       oldnode = list_first_or_null_rcu(self_node_db,
+                                        struct hsr_node, mac_list);
+       if (oldnode) {
+               list_replace_rcu(&oldnode->mac_list, &node->mac_list);
+-              rcu_read_unlock();
+-              synchronize_rcu();
+-              kfree(oldnode);
++              spin_unlock_bh(&hsr->list_lock);
++              kfree_rcu(oldnode, rcu_head);
+       } else {
+-              rcu_read_unlock();
+               list_add_tail_rcu(&node->mac_list, self_node_db);
++              spin_unlock_bh(&hsr->list_lock);
+       }
+       return 0;
+ }
+-void hsr_del_self_node(struct list_head *self_node_db)
++void hsr_del_self_node(struct hsr_priv *hsr)
+ {
++      struct list_head *self_node_db = &hsr->self_node_db;
+       struct hsr_node *node;
+-      rcu_read_lock();
++      spin_lock_bh(&hsr->list_lock);
+       node = list_first_or_null_rcu(self_node_db, struct hsr_node, mac_list);
+-      rcu_read_unlock();
+       if (node) {
+               list_del_rcu(&node->mac_list);
+-              kfree(node);
++              kfree_rcu(node, rcu_head);
+       }
++      spin_unlock_bh(&hsr->list_lock);
+ }
+ void hsr_del_nodes(struct list_head *node_db)
+@@ -130,30 +131,43 @@ void hsr_del_nodes(struct list_head *node_db)
+  * seq_out is used to initialize filtering of outgoing duplicate frames
+  * originating from the newly added node.
+  */
+-struct hsr_node *hsr_add_node(struct list_head *node_db, unsigned char addr[],
+-                            u16 seq_out)
++static struct hsr_node *hsr_add_node(struct hsr_priv *hsr,
++                                   struct list_head *node_db,
++                                   unsigned char addr[],
++                                   u16 seq_out)
+ {
+-      struct hsr_node *node;
++      struct hsr_node *new_node, *node;
+       unsigned long now;
+       int i;
+-      node = kzalloc(sizeof(*node), GFP_ATOMIC);
+-      if (!node)
++      new_node = kzalloc(sizeof(*new_node), GFP_ATOMIC);
++      if (!new_node)
+               return NULL;
+-      ether_addr_copy(node->macaddress_A, addr);
++      ether_addr_copy(new_node->macaddress_A, addr);
+       /* We are only interested in time diffs here, so use current jiffies
+        * as initialization. (0 could trigger an spurious ring error warning).
+        */
+       now = jiffies;
+       for (i = 0; i < HSR_PT_PORTS; i++)
+-              node->time_in[i] = now;
++              new_node->time_in[i] = now;
+       for (i = 0; i < HSR_PT_PORTS; i++)
+-              node->seq_out[i] = seq_out;
+-
+-      list_add_tail_rcu(&node->mac_list, node_db);
++              new_node->seq_out[i] = seq_out;
++      spin_lock_bh(&hsr->list_lock);
++      list_for_each_entry_rcu(node, node_db, mac_list) {
++              if (ether_addr_equal(node->macaddress_A, addr))
++                      goto out;
++              if (ether_addr_equal(node->macaddress_B, addr))
++                      goto out;
++      }
++      list_add_tail_rcu(&new_node->mac_list, node_db);
++      spin_unlock_bh(&hsr->list_lock);
++      return new_node;
++out:
++      spin_unlock_bh(&hsr->list_lock);
++      kfree(new_node);
+       return node;
+ }
+@@ -163,6 +177,7 @@ struct hsr_node *hsr_get_node(struct hsr_port *port, struct sk_buff *skb,
+                             bool is_sup)
+ {
+       struct list_head *node_db = &port->hsr->node_db;
++      struct hsr_priv *hsr = port->hsr;
+       struct hsr_node *node;
+       struct ethhdr *ethhdr;
+       u16 seq_out;
+@@ -196,7 +211,7 @@ struct hsr_node *hsr_get_node(struct hsr_port *port, struct sk_buff *skb,
+               seq_out = HSR_SEQNR_START;
+       }
+-      return hsr_add_node(node_db, ethhdr->h_source, seq_out);
++      return hsr_add_node(hsr, node_db, ethhdr->h_source, seq_out);
+ }
+ /* Use the Supervision frame's info about an eventual macaddress_B for merging
+@@ -206,10 +221,11 @@ struct hsr_node *hsr_get_node(struct hsr_port *port, struct sk_buff *skb,
+ void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr,
+                         struct hsr_port *port_rcv)
+ {
+-      struct ethhdr *ethhdr;
+-      struct hsr_node *node_real;
++      struct hsr_priv *hsr = port_rcv->hsr;
+       struct hsr_sup_payload *hsr_sp;
++      struct hsr_node *node_real;
+       struct list_head *node_db;
++      struct ethhdr *ethhdr;
+       int i;
+       ethhdr = (struct ethhdr *)skb_mac_header(skb);
+@@ -231,7 +247,7 @@ void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr,
+       node_real = find_node_by_addr_A(node_db, hsr_sp->macaddress_A);
+       if (!node_real)
+               /* No frame received from AddrA of this node yet */
+-              node_real = hsr_add_node(node_db, hsr_sp->macaddress_A,
++              node_real = hsr_add_node(hsr, node_db, hsr_sp->macaddress_A,
+                                        HSR_SEQNR_START - 1);
+       if (!node_real)
+               goto done; /* No mem */
+@@ -252,7 +268,9 @@ void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr,
+       }
+       node_real->addr_B_port = port_rcv->type;
++      spin_lock_bh(&hsr->list_lock);
+       list_del_rcu(&node_curr->mac_list);
++      spin_unlock_bh(&hsr->list_lock);
+       kfree_rcu(node_curr, rcu_head);
+ done:
+@@ -368,12 +386,13 @@ void hsr_prune_nodes(struct timer_list *t)
+ {
+       struct hsr_priv *hsr = from_timer(hsr, t, prune_timer);
+       struct hsr_node *node;
++      struct hsr_node *tmp;
+       struct hsr_port *port;
+       unsigned long timestamp;
+       unsigned long time_a, time_b;
+-      rcu_read_lock();
+-      list_for_each_entry_rcu(node, &hsr->node_db, mac_list) {
++      spin_lock_bh(&hsr->list_lock);
++      list_for_each_entry_safe(node, tmp, &hsr->node_db, mac_list) {
+               /* Don't prune own node. Neither time_in[HSR_PT_SLAVE_A]
+                * nor time_in[HSR_PT_SLAVE_B], will ever be updated for
+                * the master port. Thus the master node will be repeatedly
+@@ -421,7 +440,7 @@ void hsr_prune_nodes(struct timer_list *t)
+                       kfree_rcu(node, rcu_head);
+               }
+       }
+-      rcu_read_unlock();
++      spin_unlock_bh(&hsr->list_lock);
+       /* Restart timer */
+       mod_timer(&hsr->prune_timer,
+diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h
+index 89a3ce38151d..0f0fa12b4329 100644
+--- a/net/hsr/hsr_framereg.h
++++ b/net/hsr/hsr_framereg.h
+@@ -12,10 +12,8 @@
+ struct hsr_node;
+-void hsr_del_self_node(struct list_head *self_node_db);
++void hsr_del_self_node(struct hsr_priv *hsr);
+ void hsr_del_nodes(struct list_head *node_db);
+-struct hsr_node *hsr_add_node(struct list_head *node_db, unsigned char addr[],
+-                            u16 seq_out);
+ struct hsr_node *hsr_get_node(struct hsr_port *port, struct sk_buff *skb,
+                             bool is_sup);
+ void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr,
+@@ -33,7 +31,7 @@ int hsr_register_frame_out(struct hsr_port *port, struct hsr_node *node,
+ void hsr_prune_nodes(struct timer_list *t);
+-int hsr_create_self_node(struct list_head *self_node_db,
++int hsr_create_self_node(struct hsr_priv *hsr,
+                        unsigned char addr_a[ETH_ALEN],
+                        unsigned char addr_b[ETH_ALEN]);
+diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c
+index b9988a662ee1..6deb8fa8d5c8 100644
+--- a/net/hsr/hsr_main.c
++++ b/net/hsr/hsr_main.c
+@@ -64,7 +64,7 @@ static int hsr_netdev_notify(struct notifier_block *nb, unsigned long event,
+               /* Make sure we recognize frames from ourselves in hsr_rcv() */
+               port = hsr_port_get_hsr(hsr, HSR_PT_SLAVE_B);
+-              res = hsr_create_self_node(&hsr->self_node_db,
++              res = hsr_create_self_node(hsr,
+                                          master->dev->dev_addr,
+                                          port ?
+                                               port->dev->dev_addr :
+diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h
+index acab9c353a49..9ec38e33b8b1 100644
+--- a/net/hsr/hsr_main.h
++++ b/net/hsr/hsr_main.h
+@@ -160,8 +160,9 @@ struct hsr_priv {
+       int announce_count;
+       u16 sequence_nr;
+       u16 sup_sequence_nr;    /* For HSRv1 separate seq_nr for supervision */
+-      u8 prot_version;                /* Indicate if HSRv0 or HSRv1. */
+-      spinlock_t seqnr_lock;                  /* locking for sequence_nr */
++      u8 prot_version;        /* Indicate if HSRv0 or HSRv1. */
++      spinlock_t seqnr_lock;  /* locking for sequence_nr */
++      spinlock_t list_lock;   /* locking for node list */
+       unsigned char           sup_multicast_addr[ETH_ALEN];
+ #ifdef        CONFIG_DEBUG_FS
+       struct dentry *node_tbl_root;
+-- 
+2.20.1
+
diff --git a/queue-5.4/hsr-fix-error-handling-routine-in-hsr_dev_finalize.patch b/queue-5.4/hsr-fix-error-handling-routine-in-hsr_dev_finalize.patch
new file mode 100644 (file)
index 0000000..aef59eb
--- /dev/null
@@ -0,0 +1,149 @@
+From 938841d9f18e2125949badb44d0ee7ccfddabec9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 22 Dec 2019 11:26:15 +0000
+Subject: hsr: fix error handling routine in hsr_dev_finalize()
+
+From: Taehee Yoo <ap420073@gmail.com>
+
+[ Upstream commit 1d19e2d53e8ed9e4c98fc95e0067492cda7288b0 ]
+
+hsr_dev_finalize() is called to create new hsr interface.
+There are some wrong error handling codes.
+
+1. wrong checking return value of debugfs_create_{dir/file}.
+These function doesn't return NULL. If error occurs in there,
+it returns error pointer.
+So, it should check error pointer instead of NULL.
+
+2. It doesn't unregister interface if it fails to setup hsr interface.
+If it fails to initialize hsr interface after register_netdevice(),
+it should call unregister_netdevice().
+
+3. Ignore failure of creation of debugfs
+If creating of debugfs dir and file is failed, creating hsr interface
+will be failed. But debugfs doesn't affect actual logic of hsr module.
+So, ignoring this is more correct and this behavior is more general.
+
+Fixes: c5a759117210 ("net/hsr: Use list_head (and rcu) instead of array for slave devices.")
+Signed-off-by: Taehee Yoo <ap420073@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/hsr/hsr_debugfs.c | 15 +++++++--------
+ net/hsr/hsr_device.c  | 19 ++++++++++---------
+ net/hsr/hsr_main.h    | 11 ++++-------
+ 3 files changed, 21 insertions(+), 24 deletions(-)
+
+diff --git a/net/hsr/hsr_debugfs.c b/net/hsr/hsr_debugfs.c
+index 6135706f03d5..6618a9d8e58e 100644
+--- a/net/hsr/hsr_debugfs.c
++++ b/net/hsr/hsr_debugfs.c
+@@ -77,15 +77,14 @@ static const struct file_operations hsr_fops = {
+  * When debugfs is configured this routine sets up the node_table file per
+  * hsr device for dumping the node_table entries
+  */
+-int hsr_debugfs_init(struct hsr_priv *priv, struct net_device *hsr_dev)
++void hsr_debugfs_init(struct hsr_priv *priv, struct net_device *hsr_dev)
+ {
+-      int rc = -1;
+       struct dentry *de = NULL;
+       de = debugfs_create_dir(hsr_dev->name, NULL);
+-      if (!de) {
++      if (IS_ERR(de)) {
+               pr_err("Cannot create hsr debugfs root\n");
+-              return rc;
++              return;
+       }
+       priv->node_tbl_root = de;
+@@ -93,13 +92,13 @@ int hsr_debugfs_init(struct hsr_priv *priv, struct net_device *hsr_dev)
+       de = debugfs_create_file("node_table", S_IFREG | 0444,
+                                priv->node_tbl_root, priv,
+                                &hsr_fops);
+-      if (!de) {
++      if (IS_ERR(de)) {
+               pr_err("Cannot create hsr node_table directory\n");
+-              return rc;
++              debugfs_remove(priv->node_tbl_root);
++              priv->node_tbl_root = NULL;
++              return;
+       }
+       priv->node_tbl_file = de;
+-
+-      return 0;
+ }
+ /* hsr_debugfs_term - Tear down debugfs intrastructure
+diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
+index b01e1bae4ddc..e73549075a03 100644
+--- a/net/hsr/hsr_device.c
++++ b/net/hsr/hsr_device.c
+@@ -477,30 +477,31 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
+       res = hsr_add_port(hsr, hsr_dev, HSR_PT_MASTER);
+       if (res)
+-              goto err_add_port;
++              goto err_add_master;
+       res = register_netdevice(hsr_dev);
+       if (res)
+-              goto fail;
++              goto err_unregister;
+       res = hsr_add_port(hsr, slave[0], HSR_PT_SLAVE_A);
+       if (res)
+-              goto fail;
++              goto err_add_slaves;
++
+       res = hsr_add_port(hsr, slave[1], HSR_PT_SLAVE_B);
+       if (res)
+-              goto fail;
++              goto err_add_slaves;
++      hsr_debugfs_init(hsr, hsr_dev);
+       mod_timer(&hsr->prune_timer, jiffies + msecs_to_jiffies(PRUNE_PERIOD));
+-      res = hsr_debugfs_init(hsr, hsr_dev);
+-      if (res)
+-              goto fail;
+       return 0;
+-fail:
++err_add_slaves:
++      unregister_netdevice(hsr_dev);
++err_unregister:
+       list_for_each_entry_safe(port, tmp, &hsr->ports, port_list)
+               hsr_del_port(port);
+-err_add_port:
++err_add_master:
+       hsr_del_self_node(&hsr->self_node_db);
+       return res;
+diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h
+index 96fac696a1e1..acab9c353a49 100644
+--- a/net/hsr/hsr_main.h
++++ b/net/hsr/hsr_main.h
+@@ -184,15 +184,12 @@ static inline u16 hsr_get_skb_sequence_nr(struct sk_buff *skb)
+ }
+ #if IS_ENABLED(CONFIG_DEBUG_FS)
+-int hsr_debugfs_init(struct hsr_priv *priv, struct net_device *hsr_dev);
++void hsr_debugfs_init(struct hsr_priv *priv, struct net_device *hsr_dev);
+ void hsr_debugfs_term(struct hsr_priv *priv);
+ #else
+-static inline int hsr_debugfs_init(struct hsr_priv *priv,
+-                                 struct net_device *hsr_dev)
+-{
+-      return 0;
+-}
+-
++static inline void hsr_debugfs_init(struct hsr_priv *priv,
++                                  struct net_device *hsr_dev)
++{}
+ static inline void hsr_debugfs_term(struct hsr_priv *priv)
+ {}
+ #endif
+-- 
+2.20.1
+
diff --git a/queue-5.4/lib-ubsan-don-t-serialize-ubsan-report.patch b/queue-5.4/lib-ubsan-don-t-serialize-ubsan-report.patch
new file mode 100644 (file)
index 0000000..4b45245
--- /dev/null
@@ -0,0 +1,320 @@
+From 3cc96c5410f0a8c6558b5b56a0878ef44ee707a0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 4 Dec 2019 16:52:53 -0800
+Subject: lib/ubsan: don't serialize UBSAN report
+
+From: Julien Grall <julien.grall@arm.com>
+
+[ Upstream commit ce5c31db3645b649a31044a4d8b6057f6c723702 ]
+
+At the moment, UBSAN report will be serialized using a spin_lock().  On
+RT-systems, spinlocks are turned to rt_spin_lock and may sleep.  This
+will result to the following splat if the undefined behavior is in a
+context that can sleep:
+
+  BUG: sleeping function called from invalid context at /src/linux/kernel/locking/rtmutex.c:968
+  in_atomic(): 1, irqs_disabled(): 128, pid: 3447, name: make
+  1 lock held by make/3447:
+   #0: 000000009a966332 (&mm->mmap_sem){++++}, at: do_page_fault+0x140/0x4f8
+  irq event stamp: 6284
+  hardirqs last  enabled at (6283): [<ffff000011326520>] _raw_spin_unlock_irqrestore+0x90/0xa0
+  hardirqs last disabled at (6284): [<ffff0000113262b0>] _raw_spin_lock_irqsave+0x30/0x78
+  softirqs last  enabled at (2430): [<ffff000010088ef8>] fpsimd_restore_current_state+0x60/0xe8
+  softirqs last disabled at (2427): [<ffff000010088ec0>] fpsimd_restore_current_state+0x28/0xe8
+  Preemption disabled at:
+  [<ffff000011324a4c>] rt_mutex_futex_unlock+0x4c/0xb0
+  CPU: 3 PID: 3447 Comm: make Tainted: G        W         5.2.14-rt7-01890-ge6e057589653 #911
+  Call trace:
+    dump_backtrace+0x0/0x148
+    show_stack+0x14/0x20
+    dump_stack+0xbc/0x104
+    ___might_sleep+0x154/0x210
+    rt_spin_lock+0x68/0xa0
+    ubsan_prologue+0x30/0x68
+    handle_overflow+0x64/0xe0
+    __ubsan_handle_add_overflow+0x10/0x18
+    __lock_acquire+0x1c28/0x2a28
+    lock_acquire+0xf0/0x370
+    _raw_spin_lock_irqsave+0x58/0x78
+    rt_mutex_futex_unlock+0x4c/0xb0
+    rt_spin_unlock+0x28/0x70
+    get_page_from_freelist+0x428/0x2b60
+    __alloc_pages_nodemask+0x174/0x1708
+    alloc_pages_vma+0x1ac/0x238
+    __handle_mm_fault+0x4ac/0x10b0
+    handle_mm_fault+0x1d8/0x3b0
+    do_page_fault+0x1c8/0x4f8
+    do_translation_fault+0xb8/0xe0
+    do_mem_abort+0x3c/0x98
+    el0_da+0x20/0x24
+
+The spin_lock() will protect against multiple CPUs to output a report
+together, I guess to prevent them from being interleaved.  However, they
+can still interleave with other messages (and even splat from
+__might_sleep).
+
+So the lock usefulness seems pretty limited.  Rather than trying to
+accomodate RT-system by switching to a raw_spin_lock(), the lock is now
+completely dropped.
+
+Link: http://lkml.kernel.org/r/20190920100835.14999-1-julien.grall@arm.com
+Signed-off-by: Julien Grall <julien.grall@arm.com>
+Reported-by: Andre Przywara <andre.przywara@arm.com>
+Acked-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Cc: Steven Rostedt <rostedt@goodmis.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ lib/ubsan.c | 64 +++++++++++++++++++----------------------------------
+ 1 file changed, 23 insertions(+), 41 deletions(-)
+
+diff --git a/lib/ubsan.c b/lib/ubsan.c
+index 0c4681118fcd..f007a406f89c 100644
+--- a/lib/ubsan.c
++++ b/lib/ubsan.c
+@@ -140,25 +140,21 @@ static void val_to_string(char *str, size_t size, struct type_descriptor *type,
+       }
+ }
+-static DEFINE_SPINLOCK(report_lock);
+-
+-static void ubsan_prologue(struct source_location *location,
+-                      unsigned long *flags)
++static void ubsan_prologue(struct source_location *location)
+ {
+       current->in_ubsan++;
+-      spin_lock_irqsave(&report_lock, *flags);
+       pr_err("========================================"
+               "========================================\n");
+       print_source_location("UBSAN: Undefined behaviour in", location);
+ }
+-static void ubsan_epilogue(unsigned long *flags)
++static void ubsan_epilogue(void)
+ {
+       dump_stack();
+       pr_err("========================================"
+               "========================================\n");
+-      spin_unlock_irqrestore(&report_lock, *flags);
++
+       current->in_ubsan--;
+ }
+@@ -167,14 +163,13 @@ static void handle_overflow(struct overflow_data *data, void *lhs,
+ {
+       struct type_descriptor *type = data->type;
+-      unsigned long flags;
+       char lhs_val_str[VALUE_LENGTH];
+       char rhs_val_str[VALUE_LENGTH];
+       if (suppress_report(&data->location))
+               return;
+-      ubsan_prologue(&data->location, &flags);
++      ubsan_prologue(&data->location);
+       val_to_string(lhs_val_str, sizeof(lhs_val_str), type, lhs);
+       val_to_string(rhs_val_str, sizeof(rhs_val_str), type, rhs);
+@@ -186,7 +181,7 @@ static void handle_overflow(struct overflow_data *data, void *lhs,
+               rhs_val_str,
+               type->type_name);
+-      ubsan_epilogue(&flags);
++      ubsan_epilogue();
+ }
+ void __ubsan_handle_add_overflow(struct overflow_data *data,
+@@ -214,20 +209,19 @@ EXPORT_SYMBOL(__ubsan_handle_mul_overflow);
+ void __ubsan_handle_negate_overflow(struct overflow_data *data,
+                               void *old_val)
+ {
+-      unsigned long flags;
+       char old_val_str[VALUE_LENGTH];
+       if (suppress_report(&data->location))
+               return;
+-      ubsan_prologue(&data->location, &flags);
++      ubsan_prologue(&data->location);
+       val_to_string(old_val_str, sizeof(old_val_str), data->type, old_val);
+       pr_err("negation of %s cannot be represented in type %s:\n",
+               old_val_str, data->type->type_name);
+-      ubsan_epilogue(&flags);
++      ubsan_epilogue();
+ }
+ EXPORT_SYMBOL(__ubsan_handle_negate_overflow);
+@@ -235,13 +229,12 @@ EXPORT_SYMBOL(__ubsan_handle_negate_overflow);
+ void __ubsan_handle_divrem_overflow(struct overflow_data *data,
+                               void *lhs, void *rhs)
+ {
+-      unsigned long flags;
+       char rhs_val_str[VALUE_LENGTH];
+       if (suppress_report(&data->location))
+               return;
+-      ubsan_prologue(&data->location, &flags);
++      ubsan_prologue(&data->location);
+       val_to_string(rhs_val_str, sizeof(rhs_val_str), data->type, rhs);
+@@ -251,58 +244,52 @@ void __ubsan_handle_divrem_overflow(struct overflow_data *data,
+       else
+               pr_err("division by zero\n");
+-      ubsan_epilogue(&flags);
++      ubsan_epilogue();
+ }
+ EXPORT_SYMBOL(__ubsan_handle_divrem_overflow);
+ static void handle_null_ptr_deref(struct type_mismatch_data_common *data)
+ {
+-      unsigned long flags;
+-
+       if (suppress_report(data->location))
+               return;
+-      ubsan_prologue(data->location, &flags);
++      ubsan_prologue(data->location);
+       pr_err("%s null pointer of type %s\n",
+               type_check_kinds[data->type_check_kind],
+               data->type->type_name);
+-      ubsan_epilogue(&flags);
++      ubsan_epilogue();
+ }
+ static void handle_misaligned_access(struct type_mismatch_data_common *data,
+                               unsigned long ptr)
+ {
+-      unsigned long flags;
+-
+       if (suppress_report(data->location))
+               return;
+-      ubsan_prologue(data->location, &flags);
++      ubsan_prologue(data->location);
+       pr_err("%s misaligned address %p for type %s\n",
+               type_check_kinds[data->type_check_kind],
+               (void *)ptr, data->type->type_name);
+       pr_err("which requires %ld byte alignment\n", data->alignment);
+-      ubsan_epilogue(&flags);
++      ubsan_epilogue();
+ }
+ static void handle_object_size_mismatch(struct type_mismatch_data_common *data,
+                                       unsigned long ptr)
+ {
+-      unsigned long flags;
+-
+       if (suppress_report(data->location))
+               return;
+-      ubsan_prologue(data->location, &flags);
++      ubsan_prologue(data->location);
+       pr_err("%s address %p with insufficient space\n",
+               type_check_kinds[data->type_check_kind],
+               (void *) ptr);
+       pr_err("for an object of type %s\n", data->type->type_name);
+-      ubsan_epilogue(&flags);
++      ubsan_epilogue();
+ }
+ static void ubsan_type_mismatch_common(struct type_mismatch_data_common *data,
+@@ -351,25 +338,23 @@ EXPORT_SYMBOL(__ubsan_handle_type_mismatch_v1);
+ void __ubsan_handle_out_of_bounds(struct out_of_bounds_data *data, void *index)
+ {
+-      unsigned long flags;
+       char index_str[VALUE_LENGTH];
+       if (suppress_report(&data->location))
+               return;
+-      ubsan_prologue(&data->location, &flags);
++      ubsan_prologue(&data->location);
+       val_to_string(index_str, sizeof(index_str), data->index_type, index);
+       pr_err("index %s is out of range for type %s\n", index_str,
+               data->array_type->type_name);
+-      ubsan_epilogue(&flags);
++      ubsan_epilogue();
+ }
+ EXPORT_SYMBOL(__ubsan_handle_out_of_bounds);
+ void __ubsan_handle_shift_out_of_bounds(struct shift_out_of_bounds_data *data,
+                                       void *lhs, void *rhs)
+ {
+-      unsigned long flags;
+       struct type_descriptor *rhs_type = data->rhs_type;
+       struct type_descriptor *lhs_type = data->lhs_type;
+       char rhs_str[VALUE_LENGTH];
+@@ -379,7 +364,7 @@ void __ubsan_handle_shift_out_of_bounds(struct shift_out_of_bounds_data *data,
+       if (suppress_report(&data->location))
+               goto out;
+-      ubsan_prologue(&data->location, &flags);
++      ubsan_prologue(&data->location);
+       val_to_string(rhs_str, sizeof(rhs_str), rhs_type, rhs);
+       val_to_string(lhs_str, sizeof(lhs_str), lhs_type, lhs);
+@@ -402,7 +387,7 @@ void __ubsan_handle_shift_out_of_bounds(struct shift_out_of_bounds_data *data,
+                       lhs_str, rhs_str,
+                       lhs_type->type_name);
+-      ubsan_epilogue(&flags);
++      ubsan_epilogue();
+ out:
+       user_access_restore(ua_flags);
+ }
+@@ -411,11 +396,9 @@ EXPORT_SYMBOL(__ubsan_handle_shift_out_of_bounds);
+ void __ubsan_handle_builtin_unreachable(struct unreachable_data *data)
+ {
+-      unsigned long flags;
+-
+-      ubsan_prologue(&data->location, &flags);
++      ubsan_prologue(&data->location);
+       pr_err("calling __builtin_unreachable()\n");
+-      ubsan_epilogue(&flags);
++      ubsan_epilogue();
+       panic("can't return from __builtin_unreachable()");
+ }
+ EXPORT_SYMBOL(__ubsan_handle_builtin_unreachable);
+@@ -423,19 +406,18 @@ EXPORT_SYMBOL(__ubsan_handle_builtin_unreachable);
+ void __ubsan_handle_load_invalid_value(struct invalid_value_data *data,
+                               void *val)
+ {
+-      unsigned long flags;
+       char val_str[VALUE_LENGTH];
+       if (suppress_report(&data->location))
+               return;
+-      ubsan_prologue(&data->location, &flags);
++      ubsan_prologue(&data->location);
+       val_to_string(val_str, sizeof(val_str), data->type, val);
+       pr_err("load of value %s is not a valid value for type %s\n",
+               val_str, data->type->type_name);
+-      ubsan_epilogue(&flags);
++      ubsan_epilogue();
+ }
+ EXPORT_SYMBOL(__ubsan_handle_load_invalid_value);
+-- 
+2.20.1
+
diff --git a/queue-5.4/mm-hugetlb-defer-freeing-of-huge-pages-if-in-non-tas.patch b/queue-5.4/mm-hugetlb-defer-freeing-of-huge-pages-if-in-non-tas.patch
new file mode 100644 (file)
index 0000000..b170204
--- /dev/null
@@ -0,0 +1,180 @@
+From 86910105f43f432765737a14792160d01b65d2d5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 4 Jan 2020 13:00:15 -0800
+Subject: mm/hugetlb: defer freeing of huge pages if in non-task context
+
+From: Waiman Long <longman@redhat.com>
+
+[ Upstream commit c77c0a8ac4c522638a8242fcb9de9496e3cdbb2d ]
+
+The following lockdep splat was observed when a certain hugetlbfs test
+was run:
+
+  ================================
+  WARNING: inconsistent lock state
+  4.18.0-159.el8.x86_64+debug #1 Tainted: G        W --------- -  -
+  --------------------------------
+  inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage.
+  swapper/30/0 [HC0[0]:SC1[1]:HE1:SE0] takes:
+  ffffffff9acdc038 (hugetlb_lock){+.?.}, at: free_huge_page+0x36f/0xaa0
+  {SOFTIRQ-ON-W} state was registered at:
+    lock_acquire+0x14f/0x3b0
+    _raw_spin_lock+0x30/0x70
+    __nr_hugepages_store_common+0x11b/0xb30
+    hugetlb_sysctl_handler_common+0x209/0x2d0
+    proc_sys_call_handler+0x37f/0x450
+    vfs_write+0x157/0x460
+    ksys_write+0xb8/0x170
+    do_syscall_64+0xa5/0x4d0
+    entry_SYSCALL_64_after_hwframe+0x6a/0xdf
+  irq event stamp: 691296
+  hardirqs last  enabled at (691296): [<ffffffff99bb034b>] _raw_spin_unlock_irqrestore+0x4b/0x60
+  hardirqs last disabled at (691295): [<ffffffff99bb0ad2>] _raw_spin_lock_irqsave+0x22/0x81
+  softirqs last  enabled at (691284): [<ffffffff97ff0c63>] irq_enter+0xc3/0xe0
+  softirqs last disabled at (691285): [<ffffffff97ff0ebe>] irq_exit+0x23e/0x2b0
+
+  other info that might help us debug this:
+   Possible unsafe locking scenario:
+
+         CPU0
+         ----
+    lock(hugetlb_lock);
+    <Interrupt>
+      lock(hugetlb_lock);
+
+   *** DEADLOCK ***
+      :
+  Call Trace:
+   <IRQ>
+   __lock_acquire+0x146b/0x48c0
+   lock_acquire+0x14f/0x3b0
+   _raw_spin_lock+0x30/0x70
+   free_huge_page+0x36f/0xaa0
+   bio_check_pages_dirty+0x2fc/0x5c0
+   clone_endio+0x17f/0x670 [dm_mod]
+   blk_update_request+0x276/0xe50
+   scsi_end_request+0x7b/0x6a0
+   scsi_io_completion+0x1c6/0x1570
+   blk_done_softirq+0x22e/0x350
+   __do_softirq+0x23d/0xad8
+   irq_exit+0x23e/0x2b0
+   do_IRQ+0x11a/0x200
+   common_interrupt+0xf/0xf
+   </IRQ>
+
+Both the hugetbl_lock and the subpool lock can be acquired in
+free_huge_page().  One way to solve the problem is to make both locks
+irq-safe.  However, Mike Kravetz had learned that the hugetlb_lock is
+held for a linear scan of ALL hugetlb pages during a cgroup reparentling
+operation.  So it is just too long to have irq disabled unless we can
+break hugetbl_lock down into finer-grained locks with shorter lock hold
+times.
+
+Another alternative is to defer the freeing to a workqueue job.  This
+patch implements the deferred freeing by adding a free_hpage_workfn()
+work function to do the actual freeing.  The free_huge_page() call in a
+non-task context saves the page to be freed in the hpage_freelist linked
+list in a lockless manner using the llist APIs.
+
+The generic workqueue is used to process the work, but a dedicated
+workqueue can be used instead if it is desirable to have the huge page
+freed ASAP.
+
+Thanks to Kirill Tkhai <ktkhai@virtuozzo.com> for suggesting the use of
+llist APIs which simplfy the code.
+
+Link: http://lkml.kernel.org/r/20191217170331.30893-1-longman@redhat.com
+Signed-off-by: Waiman Long <longman@redhat.com>
+Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
+Acked-by: Davidlohr Bueso <dbueso@suse.de>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Reviewed-by: Kirill Tkhai <ktkhai@virtuozzo.com>
+Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Andi Kleen <ak@linux.intel.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/hugetlb.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++-
+ 1 file changed, 50 insertions(+), 1 deletion(-)
+
+diff --git a/mm/hugetlb.c b/mm/hugetlb.c
+index b45a95363a84..e0afd582ca01 100644
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -27,6 +27,7 @@
+ #include <linux/swapops.h>
+ #include <linux/jhash.h>
+ #include <linux/numa.h>
++#include <linux/llist.h>
+ #include <asm/page.h>
+ #include <asm/pgtable.h>
+@@ -1255,7 +1256,7 @@ static inline void ClearPageHugeTemporary(struct page *page)
+       page[2].mapping = NULL;
+ }
+-void free_huge_page(struct page *page)
++static void __free_huge_page(struct page *page)
+ {
+       /*
+        * Can't pass hstate in here because it is called from the
+@@ -1318,6 +1319,54 @@ void free_huge_page(struct page *page)
+       spin_unlock(&hugetlb_lock);
+ }
++/*
++ * As free_huge_page() can be called from a non-task context, we have
++ * to defer the actual freeing in a workqueue to prevent potential
++ * hugetlb_lock deadlock.
++ *
++ * free_hpage_workfn() locklessly retrieves the linked list of pages to
++ * be freed and frees them one-by-one. As the page->mapping pointer is
++ * going to be cleared in __free_huge_page() anyway, it is reused as the
++ * llist_node structure of a lockless linked list of huge pages to be freed.
++ */
++static LLIST_HEAD(hpage_freelist);
++
++static void free_hpage_workfn(struct work_struct *work)
++{
++      struct llist_node *node;
++      struct page *page;
++
++      node = llist_del_all(&hpage_freelist);
++
++      while (node) {
++              page = container_of((struct address_space **)node,
++                                   struct page, mapping);
++              node = node->next;
++              __free_huge_page(page);
++      }
++}
++static DECLARE_WORK(free_hpage_work, free_hpage_workfn);
++
++void free_huge_page(struct page *page)
++{
++      /*
++       * Defer freeing if in non-task context to avoid hugetlb_lock deadlock.
++       */
++      if (!in_task()) {
++              /*
++               * Only call schedule_work() if hpage_freelist is previously
++               * empty. Otherwise, schedule_work() had been called but the
++               * workfn hasn't retrieved the list yet.
++               */
++              if (llist_add((struct llist_node *)&page->mapping,
++                            &hpage_freelist))
++                      schedule_work(&free_hpage_work);
++              return;
++      }
++
++      __free_huge_page(page);
++}
++
+ static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
+ {
+       INIT_LIST_HEAD(&page->lru);
+-- 
+2.20.1
+
diff --git a/queue-5.4/mm-sparse.c-mark-populate_section_memmap-as-__memini.patch b/queue-5.4/mm-sparse.c-mark-populate_section_memmap-as-__memini.patch
new file mode 100644 (file)
index 0000000..4f84e19
--- /dev/null
@@ -0,0 +1,61 @@
+From f6cef242c3be3091330006837025227760bc95e6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 30 Nov 2019 17:54:24 -0800
+Subject: mm/sparse.c: mark populate_section_memmap as __meminit
+
+From: Ilya Leoshkevich <iii@linux.ibm.com>
+
+[ Upstream commit 030eab4f9ffb469344c10a46bc02c5149db0a2a9 ]
+
+Building the kernel on s390 with -Og produces the following warning:
+
+  WARNING: vmlinux.o(.text+0x28dabe): Section mismatch in reference from the function populate_section_memmap() to the function .meminit.text:__populate_section_memmap()
+  The function populate_section_memmap() references
+  the function __meminit __populate_section_memmap().
+  This is often because populate_section_memmap lacks a __meminit
+  annotation or the annotation of __populate_section_memmap is wrong.
+
+While -Og is not supported, in theory this might still happen with
+another compiler or on another architecture.  So fix this by using the
+correct section annotations.
+
+[iii@linux.ibm.com: v2]
+  Link: http://lkml.kernel.org/r/20191030151639.41486-1-iii@linux.ibm.com
+Link: http://lkml.kernel.org/r/20191028165549.14478-1-iii@linux.ibm.com
+Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
+Cc: Vasily Gorbik <gor@linux.ibm.com>
+Cc: Oscar Salvador <OSalvador@suse.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/sparse.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/mm/sparse.c b/mm/sparse.c
+index f6891c1992b1..c2c01b6330af 100644
+--- a/mm/sparse.c
++++ b/mm/sparse.c
+@@ -647,7 +647,7 @@ void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn)
+ #endif
+ #ifdef CONFIG_SPARSEMEM_VMEMMAP
+-static struct page *populate_section_memmap(unsigned long pfn,
++static struct page * __meminit populate_section_memmap(unsigned long pfn,
+               unsigned long nr_pages, int nid, struct vmem_altmap *altmap)
+ {
+       return __populate_section_memmap(pfn, nr_pages, nid, altmap);
+@@ -669,7 +669,7 @@ static void free_map_bootmem(struct page *memmap)
+       vmemmap_free(start, end, NULL);
+ }
+ #else
+-struct page *populate_section_memmap(unsigned long pfn,
++struct page * __meminit populate_section_memmap(unsigned long pfn,
+               unsigned long nr_pages, int nid, struct vmem_altmap *altmap)
+ {
+       struct page *page, *ret;
+-- 
+2.20.1
+
diff --git a/queue-5.4/net-add-annotations-on-hh-hh_len-lockless-accesses.patch b/queue-5.4/net-add-annotations-on-hh-hh_len-lockless-accesses.patch
new file mode 100644 (file)
index 0000000..d2601b4
--- /dev/null
@@ -0,0 +1,149 @@
+From e52a366deaff0f9f0630fa809f426735e5beb3aa Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 7 Nov 2019 18:29:11 -0800
+Subject: net: add annotations on hh->hh_len lockless accesses
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit c305c6ae79e2ce20c22660ceda94f0d86d639a82 ]
+
+KCSAN reported a data-race [1]
+
+While we can use READ_ONCE() on the read sides,
+we need to make sure hh->hh_len is written last.
+
+[1]
+
+BUG: KCSAN: data-race in eth_header_cache / neigh_resolve_output
+
+write to 0xffff8880b9dedcb8 of 4 bytes by task 29760 on cpu 0:
+ eth_header_cache+0xa9/0xd0 net/ethernet/eth.c:247
+ neigh_hh_init net/core/neighbour.c:1463 [inline]
+ neigh_resolve_output net/core/neighbour.c:1480 [inline]
+ neigh_resolve_output+0x415/0x470 net/core/neighbour.c:1470
+ neigh_output include/net/neighbour.h:511 [inline]
+ ip6_finish_output2+0x7a2/0xec0 net/ipv6/ip6_output.c:116
+ __ip6_finish_output net/ipv6/ip6_output.c:142 [inline]
+ __ip6_finish_output+0x2d7/0x330 net/ipv6/ip6_output.c:127
+ ip6_finish_output+0x41/0x160 net/ipv6/ip6_output.c:152
+ NF_HOOK_COND include/linux/netfilter.h:294 [inline]
+ ip6_output+0xf2/0x280 net/ipv6/ip6_output.c:175
+ dst_output include/net/dst.h:436 [inline]
+ NF_HOOK include/linux/netfilter.h:305 [inline]
+ ndisc_send_skb+0x459/0x5f0 net/ipv6/ndisc.c:505
+ ndisc_send_ns+0x207/0x430 net/ipv6/ndisc.c:647
+ rt6_probe_deferred+0x98/0xf0 net/ipv6/route.c:615
+ process_one_work+0x3d4/0x890 kernel/workqueue.c:2269
+ worker_thread+0xa0/0x800 kernel/workqueue.c:2415
+ kthread+0x1d4/0x200 drivers/block/aoe/aoecmd.c:1253
+ ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:352
+
+read to 0xffff8880b9dedcb8 of 4 bytes by task 29572 on cpu 1:
+ neigh_resolve_output net/core/neighbour.c:1479 [inline]
+ neigh_resolve_output+0x113/0x470 net/core/neighbour.c:1470
+ neigh_output include/net/neighbour.h:511 [inline]
+ ip6_finish_output2+0x7a2/0xec0 net/ipv6/ip6_output.c:116
+ __ip6_finish_output net/ipv6/ip6_output.c:142 [inline]
+ __ip6_finish_output+0x2d7/0x330 net/ipv6/ip6_output.c:127
+ ip6_finish_output+0x41/0x160 net/ipv6/ip6_output.c:152
+ NF_HOOK_COND include/linux/netfilter.h:294 [inline]
+ ip6_output+0xf2/0x280 net/ipv6/ip6_output.c:175
+ dst_output include/net/dst.h:436 [inline]
+ NF_HOOK include/linux/netfilter.h:305 [inline]
+ ndisc_send_skb+0x459/0x5f0 net/ipv6/ndisc.c:505
+ ndisc_send_ns+0x207/0x430 net/ipv6/ndisc.c:647
+ rt6_probe_deferred+0x98/0xf0 net/ipv6/route.c:615
+ process_one_work+0x3d4/0x890 kernel/workqueue.c:2269
+ worker_thread+0xa0/0x800 kernel/workqueue.c:2415
+ kthread+0x1d4/0x200 drivers/block/aoe/aoecmd.c:1253
+ ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:352
+
+Reported by Kernel Concurrency Sanitizer on:
+CPU: 1 PID: 29572 Comm: kworker/1:4 Not tainted 5.4.0-rc6+ #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+Workqueue: events rt6_probe_deferred
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/firewire/net.c  | 6 +++++-
+ include/net/neighbour.h | 2 +-
+ net/core/neighbour.c    | 4 ++--
+ net/ethernet/eth.c      | 7 ++++++-
+ 4 files changed, 14 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c
+index b132ab9ad607..715e491dfbc3 100644
+--- a/drivers/firewire/net.c
++++ b/drivers/firewire/net.c
+@@ -250,7 +250,11 @@ static int fwnet_header_cache(const struct neighbour *neigh,
+       h = (struct fwnet_header *)((u8 *)hh->hh_data + HH_DATA_OFF(sizeof(*h)));
+       h->h_proto = type;
+       memcpy(h->h_dest, neigh->ha, net->addr_len);
+-      hh->hh_len = FWNET_HLEN;
++
++      /* Pairs with the READ_ONCE() in neigh_resolve_output(),
++       * neigh_hh_output() and neigh_update_hhs().
++       */
++      smp_store_release(&hh->hh_len, FWNET_HLEN);
+       return 0;
+ }
+diff --git a/include/net/neighbour.h b/include/net/neighbour.h
+index 5e679c8dae0b..8ec77bfdc1a4 100644
+--- a/include/net/neighbour.h
++++ b/include/net/neighbour.h
+@@ -467,7 +467,7 @@ static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb
+       do {
+               seq = read_seqbegin(&hh->hh_lock);
+-              hh_len = hh->hh_len;
++              hh_len = READ_ONCE(hh->hh_len);
+               if (likely(hh_len <= HH_DATA_MOD)) {
+                       hh_alen = HH_DATA_MOD;
+diff --git a/net/core/neighbour.c b/net/core/neighbour.c
+index 08ebc3ac5343..f2452496ad9f 100644
+--- a/net/core/neighbour.c
++++ b/net/core/neighbour.c
+@@ -1194,7 +1194,7 @@ static void neigh_update_hhs(struct neighbour *neigh)
+       if (update) {
+               hh = &neigh->hh;
+-              if (hh->hh_len) {
++              if (READ_ONCE(hh->hh_len)) {
+                       write_seqlock_bh(&hh->hh_lock);
+                       update(hh, neigh->dev, neigh->ha);
+                       write_sequnlock_bh(&hh->hh_lock);
+@@ -1473,7 +1473,7 @@ int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
+               struct net_device *dev = neigh->dev;
+               unsigned int seq;
+-              if (dev->header_ops->cache && !neigh->hh.hh_len)
++              if (dev->header_ops->cache && !READ_ONCE(neigh->hh.hh_len))
+                       neigh_hh_init(neigh);
+               do {
+diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
+index 17374afee28f..9040fe55e0f5 100644
+--- a/net/ethernet/eth.c
++++ b/net/ethernet/eth.c
+@@ -244,7 +244,12 @@ int eth_header_cache(const struct neighbour *neigh, struct hh_cache *hh, __be16
+       eth->h_proto = type;
+       memcpy(eth->h_source, dev->dev_addr, ETH_ALEN);
+       memcpy(eth->h_dest, neigh->ha, ETH_ALEN);
+-      hh->hh_len = ETH_HLEN;
++
++      /* Pairs with READ_ONCE() in neigh_resolve_output(),
++       * neigh_hh_output() and neigh_update_hhs().
++       */
++      smp_store_release(&hh->hh_len, ETH_HLEN);
++
+       return 0;
+ }
+ EXPORT_SYMBOL(eth_header_cache);
+-- 
+2.20.1
+
diff --git a/queue-5.4/net-annotate-lockless-accesses-to-sk-sk_pacing_shift.patch b/queue-5.4/net-annotate-lockless-accesses-to-sk-sk_pacing_shift.patch
new file mode 100644 (file)
index 0000000..87c1a9a
--- /dev/null
@@ -0,0 +1,99 @@
+From 7a48ede0f00838af038f9bca5c71d2ba8e75e29a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 16 Dec 2019 18:51:03 -0800
+Subject: net: annotate lockless accesses to sk->sk_pacing_shift
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 7c68fa2bddda6d942bd387c9ba5b4300737fd991 ]
+
+sk->sk_pacing_shift can be read and written without lock
+synchronization. This patch adds annotations to
+document this fact and avoid future syzbot complains.
+
+This might also avoid unexpected false sharing
+in sk_pacing_shift_update(), as the compiler
+could remove the conditional check and always
+write over sk->sk_pacing_shift :
+
+if (sk->sk_pacing_shift != val)
+       sk->sk_pacing_shift = val;
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/sock.h    | 4 ++--
+ net/core/sock.c       | 2 +-
+ net/ipv4/tcp_bbr.c    | 3 ++-
+ net/ipv4/tcp_output.c | 4 ++--
+ 4 files changed, 7 insertions(+), 6 deletions(-)
+
+diff --git a/include/net/sock.h b/include/net/sock.h
+index e09e2886a836..6c5a3809483e 100644
+--- a/include/net/sock.h
++++ b/include/net/sock.h
+@@ -2589,9 +2589,9 @@ static inline int sk_get_rmem0(const struct sock *sk, const struct proto *proto)
+  */
+ static inline void sk_pacing_shift_update(struct sock *sk, int val)
+ {
+-      if (!sk || !sk_fullsock(sk) || sk->sk_pacing_shift == val)
++      if (!sk || !sk_fullsock(sk) || READ_ONCE(sk->sk_pacing_shift) == val)
+               return;
+-      sk->sk_pacing_shift = val;
++      WRITE_ONCE(sk->sk_pacing_shift, val);
+ }
+ /* if a socket is bound to a device, check that the given device
+diff --git a/net/core/sock.c b/net/core/sock.c
+index ac78a570e43a..b4d1112174c1 100644
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -2918,7 +2918,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
+       sk->sk_max_pacing_rate = ~0UL;
+       sk->sk_pacing_rate = ~0UL;
+-      sk->sk_pacing_shift = 10;
++      WRITE_ONCE(sk->sk_pacing_shift, 10);
+       sk->sk_incoming_cpu = -1;
+       sk_rx_queue_clear(sk);
+diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
+index 32772d6ded4e..a6545ef0d27b 100644
+--- a/net/ipv4/tcp_bbr.c
++++ b/net/ipv4/tcp_bbr.c
+@@ -306,7 +306,8 @@ static u32 bbr_tso_segs_goal(struct sock *sk)
+       /* Sort of tcp_tso_autosize() but ignoring
+        * driver provided sk_gso_max_size.
+        */
+-      bytes = min_t(unsigned long, sk->sk_pacing_rate >> sk->sk_pacing_shift,
++      bytes = min_t(unsigned long,
++                    sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift),
+                     GSO_MAX_SIZE - 1 - MAX_TCP_HEADER);
+       segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk));
+diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
+index 0269584e9cf7..e4ba915c4bb5 100644
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -1728,7 +1728,7 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
+       u32 bytes, segs;
+       bytes = min_t(unsigned long,
+-                    sk->sk_pacing_rate >> sk->sk_pacing_shift,
++                    sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift),
+                     sk->sk_gso_max_size - 1 - MAX_TCP_HEADER);
+       /* Goal is to send at least one packet per ms,
+@@ -2263,7 +2263,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
+       limit = max_t(unsigned long,
+                     2 * skb->truesize,
+-                    sk->sk_pacing_rate >> sk->sk_pacing_shift);
++                    sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift));
+       if (sk->sk_pacing_status == SK_PACING_NONE)
+               limit = min_t(unsigned long, limit,
+                             sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes);
+-- 
+2.20.1
+
diff --git a/queue-5.4/perf-x86-intel-bts-fix-the-use-of-page_private.patch b/queue-5.4/perf-x86-intel-bts-fix-the-use-of-page_private.patch
new file mode 100644 (file)
index 0000000..c65d14f
--- /dev/null
@@ -0,0 +1,95 @@
+From 91c475cf7bc81d3399504610ebe07789ba462887 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 5 Dec 2019 17:28:52 +0300
+Subject: perf/x86/intel/bts: Fix the use of page_private()
+
+From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+
+[ Upstream commit ff61541cc6c1962957758ba433c574b76f588d23 ]
+
+Commit
+
+  8062382c8dbe2 ("perf/x86/intel/bts: Add BTS PMU driver")
+
+brought in a warning with the BTS buffer initialization
+that is easily tripped with (assuming KPTI is disabled):
+
+instantly throwing:
+
+> ------------[ cut here ]------------
+> WARNING: CPU: 2 PID: 326 at arch/x86/events/intel/bts.c:86 bts_buffer_setup_aux+0x117/0x3d0
+> Modules linked in:
+> CPU: 2 PID: 326 Comm: perf Not tainted 5.4.0-rc8-00291-gceb9e77324fa #904
+> RIP: 0010:bts_buffer_setup_aux+0x117/0x3d0
+> Call Trace:
+>  rb_alloc_aux+0x339/0x550
+>  perf_mmap+0x607/0xc70
+>  mmap_region+0x76b/0xbd0
+...
+
+It appears to assume (for lost raisins) that PagePrivate() is set,
+while later it actually tests for PagePrivate() before using
+page_private().
+
+Make it consistent and always check PagePrivate() before using
+page_private().
+
+Fixes: 8062382c8dbe2 ("perf/x86/intel/bts: Add BTS PMU driver")
+Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: Jiri Olsa <jolsa@kernel.org>
+Cc: Vince Weaver <vincent.weaver@maine.edu>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
+Link: https://lkml.kernel.org/r/20191205142853.28894-2-alexander.shishkin@linux.intel.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/events/intel/bts.c | 16 +++++++++++-----
+ 1 file changed, 11 insertions(+), 5 deletions(-)
+
+diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c
+index 5ee3fed881d3..741540d849f3 100644
+--- a/arch/x86/events/intel/bts.c
++++ b/arch/x86/events/intel/bts.c
+@@ -63,9 +63,17 @@ struct bts_buffer {
+ static struct pmu bts_pmu;
++static int buf_nr_pages(struct page *page)
++{
++      if (!PagePrivate(page))
++              return 1;
++
++      return 1 << page_private(page);
++}
++
+ static size_t buf_size(struct page *page)
+ {
+-      return 1 << (PAGE_SHIFT + page_private(page));
++      return buf_nr_pages(page) * PAGE_SIZE;
+ }
+ static void *
+@@ -83,9 +91,7 @@ bts_buffer_setup_aux(struct perf_event *event, void **pages,
+       /* count all the high order buffers */
+       for (pg = 0, nbuf = 0; pg < nr_pages;) {
+               page = virt_to_page(pages[pg]);
+-              if (WARN_ON_ONCE(!PagePrivate(page) && nr_pages > 1))
+-                      return NULL;
+-              pg += 1 << page_private(page);
++              pg += buf_nr_pages(page);
+               nbuf++;
+       }
+@@ -109,7 +115,7 @@ bts_buffer_setup_aux(struct perf_event *event, void **pages,
+               unsigned int __nr_pages;
+               page = virt_to_page(pages[pg]);
+-              __nr_pages = PagePrivate(page) ? 1 << page_private(page) : 1;
++              __nr_pages = buf_nr_pages(page);
+               buf->buf[nbuf].page = page;
+               buf->buf[nbuf].offset = offset;
+               buf->buf[nbuf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0);
+-- 
+2.20.1
+
diff --git a/queue-5.4/s390-smp-fix-physical-to-logical-cpu-map-for-smt.patch b/queue-5.4/s390-smp-fix-physical-to-logical-cpu-map-for-smt.patch
new file mode 100644 (file)
index 0000000..70b3bea
--- /dev/null
@@ -0,0 +1,155 @@
+From 452dca8cd002d7bc535424a4dfa0fe177b38dbcb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 17 Nov 2019 14:55:38 +0100
+Subject: s390/smp: fix physical to logical CPU map for SMT
+
+From: Heiko Carstens <heiko.carstens@de.ibm.com>
+
+[ Upstream commit 72a81ad9d6d62dcb79f7e8ad66ffd1c768b72026 ]
+
+If an SMT capable system is not IPL'ed from the first CPU the setup of
+the physical to logical CPU mapping is broken: the IPL core gets CPU
+number 0, but then the next core gets CPU number 1. Correct would be
+that all SMT threads of CPU 0 get the subsequent logical CPU numbers.
+
+This is important since a lot of code (like e.g. the CPU topology
+code) assumes that CPU maps are setup like this. If the mapping is
+broken the system will not IPL due to broken topology masks:
+
+[    1.716341] BUG: arch topology broken
+[    1.716342]      the SMT domain not a subset of the MC domain
+[    1.716343] BUG: arch topology broken
+[    1.716344]      the MC domain not a subset of the BOOK domain
+
+This scenario can usually not happen since LPARs are always IPL'ed
+from CPU 0 and also re-IPL is intiated from CPU 0. However older
+kernels did initiate re-IPL on an arbitrary CPU. If therefore a re-IPL
+from an old kernel into a new kernel is initiated this may lead to
+crash.
+
+Fix this by setting up the physical to logical CPU mapping correctly.
+
+Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
+Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/s390/kernel/smp.c | 80 ++++++++++++++++++++++++++++--------------
+ 1 file changed, 54 insertions(+), 26 deletions(-)
+
+diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
+index d95c85780e07..06dddd7c4290 100644
+--- a/arch/s390/kernel/smp.c
++++ b/arch/s390/kernel/smp.c
+@@ -727,39 +727,67 @@ static void __ref smp_get_core_info(struct sclp_core_info *info, int early)
+ static int smp_add_present_cpu(int cpu);
+-static int __smp_rescan_cpus(struct sclp_core_info *info, int sysfs_add)
++static int smp_add_core(struct sclp_core_entry *core, cpumask_t *avail,
++                      bool configured, bool early)
+ {
+       struct pcpu *pcpu;
+-      cpumask_t avail;
+-      int cpu, nr, i, j;
++      int cpu, nr, i;
+       u16 address;
+       nr = 0;
+-      cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask);
+-      cpu = cpumask_first(&avail);
+-      for (i = 0; (i < info->combined) && (cpu < nr_cpu_ids); i++) {
+-              if (sclp.has_core_type && info->core[i].type != boot_core_type)
++      if (sclp.has_core_type && core->type != boot_core_type)
++              return nr;
++      cpu = cpumask_first(avail);
++      address = core->core_id << smp_cpu_mt_shift;
++      for (i = 0; (i <= smp_cpu_mtid) && (cpu < nr_cpu_ids); i++) {
++              if (pcpu_find_address(cpu_present_mask, address + i))
+                       continue;
+-              address = info->core[i].core_id << smp_cpu_mt_shift;
+-              for (j = 0; j <= smp_cpu_mtid; j++) {
+-                      if (pcpu_find_address(cpu_present_mask, address + j))
+-                              continue;
+-                      pcpu = pcpu_devices + cpu;
+-                      pcpu->address = address + j;
+-                      pcpu->state =
+-                              (cpu >= info->configured*(smp_cpu_mtid + 1)) ?
+-                              CPU_STATE_STANDBY : CPU_STATE_CONFIGURED;
+-                      smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
+-                      set_cpu_present(cpu, true);
+-                      if (sysfs_add && smp_add_present_cpu(cpu) != 0)
+-                              set_cpu_present(cpu, false);
+-                      else
+-                              nr++;
+-                      cpu = cpumask_next(cpu, &avail);
+-                      if (cpu >= nr_cpu_ids)
++              pcpu = pcpu_devices + cpu;
++              pcpu->address = address + i;
++              if (configured)
++                      pcpu->state = CPU_STATE_CONFIGURED;
++              else
++                      pcpu->state = CPU_STATE_STANDBY;
++              smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
++              set_cpu_present(cpu, true);
++              if (!early && smp_add_present_cpu(cpu) != 0)
++                      set_cpu_present(cpu, false);
++              else
++                      nr++;
++              cpumask_clear_cpu(cpu, avail);
++              cpu = cpumask_next(cpu, avail);
++      }
++      return nr;
++}
++
++static int __smp_rescan_cpus(struct sclp_core_info *info, bool early)
++{
++      struct sclp_core_entry *core;
++      cpumask_t avail;
++      bool configured;
++      u16 core_id;
++      int nr, i;
++
++      nr = 0;
++      cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask);
++      /*
++       * Add IPL core first (which got logical CPU number 0) to make sure
++       * that all SMT threads get subsequent logical CPU numbers.
++       */
++      if (early) {
++              core_id = pcpu_devices[0].address >> smp_cpu_mt_shift;
++              for (i = 0; i < info->configured; i++) {
++                      core = &info->core[i];
++                      if (core->core_id == core_id) {
++                              nr += smp_add_core(core, &avail, true, early);
+                               break;
++                      }
+               }
+       }
++      for (i = 0; i < info->combined; i++) {
++              configured = i < info->configured;
++              nr += smp_add_core(&info->core[i], &avail, configured, early);
++      }
+       return nr;
+ }
+@@ -808,7 +836,7 @@ void __init smp_detect_cpus(void)
+       /* Add CPUs present at boot */
+       get_online_cpus();
+-      __smp_rescan_cpus(info, 0);
++      __smp_rescan_cpus(info, true);
+       put_online_cpus();
+       memblock_free_early((unsigned long)info, sizeof(*info));
+ }
+@@ -1153,7 +1181,7 @@ int __ref smp_rescan_cpus(void)
+       smp_get_core_info(info, 0);
+       get_online_cpus();
+       mutex_lock(&smp_cpu_state_mutex);
+-      nr = __smp_rescan_cpus(info, 1);
++      nr = __smp_rescan_cpus(info, false);
+       mutex_unlock(&smp_cpu_state_mutex);
+       put_online_cpus();
+       kfree(info);
+-- 
+2.20.1
+
index d64f03e8e689860d296fb50d0d2609072a4eaa3b..5e79c811841c31fdcf9d7cd4ff6edd6fb9dc37bd 100644 (file)
@@ -172,3 +172,22 @@ tty-serial-msm_serial-fix-lockup-for-sysrq-and-oops.patch
 cifs-fix-lookup-of-root-ses-in-dfs-referral-cache.patch
 fs-cifs-fix-atime-update-check-vs-mtime.patch
 fix-compat-handling-of-ficlonerange-fideduperange-and-fs_ioc_fiemap.patch
+ath9k_htc-modify-byte-order-for-an-error-message.patch
+ath9k_htc-discard-undersized-packets.patch
+drm-i915-execlists-fix-annotation-for-decoupling-vir.patch
+xfs-periodically-yield-scrub-threads-to-the-schedule.patch
+net-add-annotations-on-hh-hh_len-lockless-accesses.patch
+ubifs-ubifs_tnc_start_commit-fix-oob-in-layout_in_ga.patch
+btrfs-get-rid-of-unique-workqueue-helper-functions.patch
+btrfs-only-associate-the-locked-page-with-one-async_.patch
+s390-smp-fix-physical-to-logical-cpu-map-for-smt.patch
+mm-sparse.c-mark-populate_section_memmap-as-__memini.patch
+xen-blkback-avoid-unmapping-unmapped-grant-pages.patch
+lib-ubsan-don-t-serialize-ubsan-report.patch
+efi-don-t-attempt-to-map-rci2-config-table-if-it-doe.patch
+perf-x86-intel-bts-fix-the-use-of-page_private.patch
+net-annotate-lockless-accesses-to-sk-sk_pacing_shift.patch
+hsr-avoid-debugfs-warning-message-when-module-is-rem.patch
+hsr-fix-error-handling-routine-in-hsr_dev_finalize.patch
+hsr-fix-a-race-condition-in-node-list-insertion-and-.patch
+mm-hugetlb-defer-freeing-of-huge-pages-if-in-non-tas.patch
diff --git a/queue-5.4/ubifs-ubifs_tnc_start_commit-fix-oob-in-layout_in_ga.patch b/queue-5.4/ubifs-ubifs_tnc_start_commit-fix-oob-in-layout_in_ga.patch
new file mode 100644 (file)
index 0000000..928605d
--- /dev/null
@@ -0,0 +1,154 @@
+From 94ae1299adf76cf1547149c3ae54ba40d5911708 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 20 Jul 2019 14:05:20 +0800
+Subject: ubifs: ubifs_tnc_start_commit: Fix OOB in layout_in_gaps
+
+From: Zhihao Cheng <chengzhihao1@huawei.com>
+
+[ Upstream commit 6abf57262166b4f4294667fb5206ae7ba1ba96f5 ]
+
+Running stress-test test_2 in mtd-utils on ubi device, sometimes we can
+get following oops message:
+
+  BUG: unable to handle page fault for address: ffffffff00000140
+  #PF: supervisor read access in kernel mode
+  #PF: error_code(0x0000) - not-present page
+  PGD 280a067 P4D 280a067 PUD 0
+  Oops: 0000 [#1] SMP
+  CPU: 0 PID: 60 Comm: kworker/u16:1 Kdump: loaded Not tainted 5.2.0 #13
+  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0
+  -0-ga698c8995f-prebuilt.qemu.org 04/01/2014
+  Workqueue: writeback wb_workfn (flush-ubifs_0_0)
+  RIP: 0010:rb_next_postorder+0x2e/0xb0
+  Code: 80 db 03 01 48 85 ff 0f 84 97 00 00 00 48 8b 17 48 83 05 bc 80 db
+  03 01 48 83 e2 fc 0f 84 82 00 00 00 48 83 05 b2 80 db 03 01 <48> 3b 7a
+  10 48 89 d0 74 02 f3 c3 48 8b 52 08 48 83 05 a3 80 db 03
+  RSP: 0018:ffffc90000887758 EFLAGS: 00010202
+  RAX: ffff888129ae4700 RBX: ffff888138b08400 RCX: 0000000080800001
+  RDX: ffffffff00000130 RSI: 0000000080800024 RDI: ffff888138b08400
+  RBP: ffff888138b08400 R08: ffffea0004a6b920 R09: 0000000000000000
+  R10: ffffc90000887740 R11: 0000000000000001 R12: ffff888128d48000
+  R13: 0000000000000800 R14: 000000000000011e R15: 00000000000007c8
+  FS:  0000000000000000(0000) GS:ffff88813ba00000(0000)
+  knlGS:0000000000000000
+  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+  CR2: ffffffff00000140 CR3: 000000013789d000 CR4: 00000000000006f0
+  DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+  DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+  Call Trace:
+    destroy_old_idx+0x5d/0xa0 [ubifs]
+    ubifs_tnc_start_commit+0x4fe/0x1380 [ubifs]
+    do_commit+0x3eb/0x830 [ubifs]
+    ubifs_run_commit+0xdc/0x1c0 [ubifs]
+
+Above Oops are due to the slab-out-of-bounds happened in do-while of
+function layout_in_gaps indirectly called by ubifs_tnc_start_commit. In
+function layout_in_gaps, there is a do-while loop placing index nodes
+into the gaps created by obsolete index nodes in non-empty index LEBs
+until rest index nodes can totally be placed into pre-allocated empty
+LEBs. @c->gap_lebs points to a memory area(integer array) which records
+LEB numbers used by 'in-the-gaps' method. Whenever a fitable index LEB
+is found, corresponding lnum will be incrementally written into the
+memory area pointed by @c->gap_lebs. The size
+((@c->lst.idx_lebs + 1) * sizeof(int)) of memory area is allocated before
+do-while loop and can not be changed in the loop. But @c->lst.idx_lebs
+could be increased by function ubifs_change_lp (called by
+layout_leb_in_gaps->ubifs_find_dirty_idx_leb->get_idx_gc_leb) during the
+loop. So, sometimes oob happens when number of cycles in do-while loop
+exceeds the original value of @c->lst.idx_lebs. See detail in
+https://bugzilla.kernel.org/show_bug.cgi?id=204229.
+This patch fixes oob in layout_in_gaps.
+
+Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com>
+Signed-off-by: Richard Weinberger <richard@nod.at>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ubifs/tnc_commit.c | 34 +++++++++++++++++++++++++++-------
+ 1 file changed, 27 insertions(+), 7 deletions(-)
+
+diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c
+index a384a0f9ff32..234be1c4dc87 100644
+--- a/fs/ubifs/tnc_commit.c
++++ b/fs/ubifs/tnc_commit.c
+@@ -212,7 +212,7 @@ static int is_idx_node_in_use(struct ubifs_info *c, union ubifs_key *key,
+ /**
+  * layout_leb_in_gaps - layout index nodes using in-the-gaps method.
+  * @c: UBIFS file-system description object
+- * @p: return LEB number here
++ * @p: return LEB number in @c->gap_lebs[p]
+  *
+  * This function lays out new index nodes for dirty znodes using in-the-gaps
+  * method of TNC commit.
+@@ -221,7 +221,7 @@ static int is_idx_node_in_use(struct ubifs_info *c, union ubifs_key *key,
+  * This function returns the number of index nodes written into the gaps, or a
+  * negative error code on failure.
+  */
+-static int layout_leb_in_gaps(struct ubifs_info *c, int *p)
++static int layout_leb_in_gaps(struct ubifs_info *c, int p)
+ {
+       struct ubifs_scan_leb *sleb;
+       struct ubifs_scan_node *snod;
+@@ -236,7 +236,7 @@ static int layout_leb_in_gaps(struct ubifs_info *c, int *p)
+                * filled, however we do not check there at present.
+                */
+               return lnum; /* Error code */
+-      *p = lnum;
++      c->gap_lebs[p] = lnum;
+       dbg_gc("LEB %d", lnum);
+       /*
+        * Scan the index LEB.  We use the generic scan for this even though
+@@ -355,7 +355,7 @@ static int get_leb_cnt(struct ubifs_info *c, int cnt)
+  */
+ static int layout_in_gaps(struct ubifs_info *c, int cnt)
+ {
+-      int err, leb_needed_cnt, written, *p;
++      int err, leb_needed_cnt, written, p = 0, old_idx_lebs, *gap_lebs;
+       dbg_gc("%d znodes to write", cnt);
+@@ -364,9 +364,9 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt)
+       if (!c->gap_lebs)
+               return -ENOMEM;
+-      p = c->gap_lebs;
++      old_idx_lebs = c->lst.idx_lebs;
+       do {
+-              ubifs_assert(c, p < c->gap_lebs + c->lst.idx_lebs);
++              ubifs_assert(c, p < c->lst.idx_lebs);
+               written = layout_leb_in_gaps(c, p);
+               if (written < 0) {
+                       err = written;
+@@ -392,9 +392,29 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt)
+               leb_needed_cnt = get_leb_cnt(c, cnt);
+               dbg_gc("%d znodes remaining, need %d LEBs, have %d", cnt,
+                      leb_needed_cnt, c->ileb_cnt);
++              /*
++               * Dynamically change the size of @c->gap_lebs to prevent
++               * oob, because @c->lst.idx_lebs could be increased by
++               * function @get_idx_gc_leb (called by layout_leb_in_gaps->
++               * ubifs_find_dirty_idx_leb) during loop. Only enlarge
++               * @c->gap_lebs when needed.
++               *
++               */
++              if (leb_needed_cnt > c->ileb_cnt && p >= old_idx_lebs &&
++                  old_idx_lebs < c->lst.idx_lebs) {
++                      old_idx_lebs = c->lst.idx_lebs;
++                      gap_lebs = krealloc(c->gap_lebs, sizeof(int) *
++                                             (old_idx_lebs + 1), GFP_NOFS);
++                      if (!gap_lebs) {
++                              kfree(c->gap_lebs);
++                              c->gap_lebs = NULL;
++                              return -ENOMEM;
++                      }
++                      c->gap_lebs = gap_lebs;
++              }
+       } while (leb_needed_cnt > c->ileb_cnt);
+-      *p = -1;
++      c->gap_lebs[p] = -1;
+       return 0;
+ }
+-- 
+2.20.1
+
diff --git a/queue-5.4/xen-blkback-avoid-unmapping-unmapped-grant-pages.patch b/queue-5.4/xen-blkback-avoid-unmapping-unmapped-grant-pages.patch
new file mode 100644 (file)
index 0000000..bbcf914
--- /dev/null
@@ -0,0 +1,72 @@
+From 6e86974fa7e00a71d39242261a04b8add0d08e3c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 26 Nov 2019 16:36:05 +0100
+Subject: xen/blkback: Avoid unmapping unmapped grant pages
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: SeongJae Park <sjpark@amazon.de>
+
+[ Upstream commit f9bd84a8a845d82f9b5a081a7ae68c98a11d2e84 ]
+
+For each I/O request, blkback first maps the foreign pages for the
+request to its local pages.  If an allocation of a local page for the
+mapping fails, it should unmap every mapping already made for the
+request.
+
+However, blkback's handling mechanism for the allocation failure does
+not mark the remaining foreign pages as unmapped.  Therefore, the unmap
+function merely tries to unmap every valid grant page for the request,
+including the pages not mapped due to the allocation failure.  On a
+system that fails the allocation frequently, this problem leads to
+following kernel crash.
+
+  [  372.012538] BUG: unable to handle kernel NULL pointer dereference at 0000000000000001
+  [  372.012546] IP: [<ffffffff814071ac>] gnttab_unmap_refs.part.7+0x1c/0x40
+  [  372.012557] PGD 16f3e9067 PUD 16426e067 PMD 0
+  [  372.012562] Oops: 0002 [#1] SMP
+  [  372.012566] Modules linked in: act_police sch_ingress cls_u32
+  ...
+  [  372.012746] Call Trace:
+  [  372.012752]  [<ffffffff81407204>] gnttab_unmap_refs+0x34/0x40
+  [  372.012759]  [<ffffffffa0335ae3>] xen_blkbk_unmap+0x83/0x150 [xen_blkback]
+  ...
+  [  372.012802]  [<ffffffffa0336c50>] dispatch_rw_block_io+0x970/0x980 [xen_blkback]
+  ...
+  Decompressing Linux... Parsing ELF... done.
+  Booting the kernel.
+  [    0.000000] Initializing cgroup subsys cpuset
+
+This commit fixes this problem by marking the grant pages of the given
+request that didn't mapped due to the allocation failure as invalid.
+
+Fixes: c6cc142dac52 ("xen-blkback: use balloon pages for all mappings")
+
+Reviewed-by: David Woodhouse <dwmw@amazon.de>
+Reviewed-by: Maximilian Heyne <mheyne@amazon.de>
+Reviewed-by: Paul Durrant <pdurrant@amazon.co.uk>
+Reviewed-by: Roger Pau MonnĂ© <roger.pau@citrix.com>
+Signed-off-by: SeongJae Park <sjpark@amazon.de>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/block/xen-blkback/blkback.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
+index fd1e19f1a49f..3666afa639d1 100644
+--- a/drivers/block/xen-blkback/blkback.c
++++ b/drivers/block/xen-blkback/blkback.c
+@@ -936,6 +936,8 @@ next:
+ out_of_memory:
+       pr_alert("%s: out of memory\n", __func__);
+       put_free_pages(ring, pages_to_gnt, segs_to_map);
++      for (i = last_map; i < num; i++)
++              pages[i]->handle = BLKBACK_INVALID_HANDLE;
+       return -ENOMEM;
+ }
+-- 
+2.20.1
+
diff --git a/queue-5.4/xfs-periodically-yield-scrub-threads-to-the-schedule.patch b/queue-5.4/xfs-periodically-yield-scrub-threads-to-the-schedule.patch
new file mode 100644 (file)
index 0000000..8aa1ca3
--- /dev/null
@@ -0,0 +1,93 @@
+From e75720ba35fd5a8ca6a895f912bc1176b676e5eb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 5 Nov 2019 15:33:57 -0800
+Subject: xfs: periodically yield scrub threads to the scheduler
+
+From: Darrick J. Wong <darrick.wong@oracle.com>
+
+[ Upstream commit 5d1116d4c6af3e580f1ed0382ca5a94bd65a34cf ]
+
+Christoph Hellwig complained about the following soft lockup warning
+when running scrub after generic/175 when preemption is disabled and
+slub debugging is enabled:
+
+watchdog: BUG: soft lockup - CPU#3 stuck for 22s! [xfs_scrub:161]
+Modules linked in:
+irq event stamp: 41692326
+hardirqs last  enabled at (41692325): [<ffffffff8232c3b7>] _raw_0
+hardirqs last disabled at (41692326): [<ffffffff81001c5a>] trace0
+softirqs last  enabled at (41684994): [<ffffffff8260031f>] __do_e
+softirqs last disabled at (41684987): [<ffffffff81127d8c>] irq_e0
+CPU: 3 PID: 16189 Comm: xfs_scrub Not tainted 5.4.0-rc3+ #30
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.124
+RIP: 0010:_raw_spin_unlock_irqrestore+0x39/0x40
+Code: 89 f3 be 01 00 00 00 e8 d5 3a e5 fe 48 89 ef e8 ed 87 e5 f2
+RSP: 0018:ffffc9000233f970 EFLAGS: 00000286 ORIG_RAX: ffffffffff3
+RAX: ffff88813b398040 RBX: 0000000000000286 RCX: 0000000000000006
+RDX: 0000000000000006 RSI: ffff88813b3988c0 RDI: ffff88813b398040
+RBP: ffff888137958640 R08: 0000000000000001 R09: 0000000000000000
+R10: 0000000000000000 R11: 0000000000000000 R12: ffffea00042b0c00
+R13: 0000000000000001 R14: ffff88810ac32308 R15: ffff8881376fc040
+FS:  00007f6113dea700(0000) GS:ffff88813bb80000(0000) knlGS:00000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00007f6113de8ff8 CR3: 000000012f290000 CR4: 00000000000006e0
+Call Trace:
+ free_debug_processing+0x1dd/0x240
+ __slab_free+0x231/0x410
+ kmem_cache_free+0x30e/0x360
+ xchk_ag_btcur_free+0x76/0xb0
+ xchk_ag_free+0x10/0x80
+ xchk_bmap_iextent_xref.isra.14+0xd9/0x120
+ xchk_bmap_iextent+0x187/0x210
+ xchk_bmap+0x2e0/0x3b0
+ xfs_scrub_metadata+0x2e7/0x500
+ xfs_ioc_scrub_metadata+0x4a/0xa0
+ xfs_file_ioctl+0x58a/0xcd0
+ do_vfs_ioctl+0xa0/0x6f0
+ ksys_ioctl+0x5b/0x90
+ __x64_sys_ioctl+0x11/0x20
+ do_syscall_64+0x4b/0x1a0
+ entry_SYSCALL_64_after_hwframe+0x49/0xbe
+
+If preemption is disabled, all metadata buffers needed to perform the
+scrub are already in memory, and there are a lot of records to check,
+it's possible that the scrub thread will run for an extended period of
+time without sleeping for IO or any other reason.  Then the watchdog
+timer or the RCU stall timeout can trigger, producing the backtrace
+above.
+
+To fix this problem, call cond_resched() from the scrub thread so that
+we back out to the scheduler whenever necessary.
+
+Reported-by: Christoph Hellwig <hch@infradead.org>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/xfs/scrub/common.h | 9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
+index 003a772cd26c..2e50d146105d 100644
+--- a/fs/xfs/scrub/common.h
++++ b/fs/xfs/scrub/common.h
+@@ -14,8 +14,15 @@
+ static inline bool
+ xchk_should_terminate(
+       struct xfs_scrub        *sc,
+-      int                             *error)
++      int                     *error)
+ {
++      /*
++       * If preemption is disabled, we need to yield to the scheduler every
++       * few seconds so that we don't run afoul of the soft lockup watchdog
++       * or RCU stall detector.
++       */
++      cond_resched();
++
+       if (fatal_signal_pending(current)) {
+               if (*error == 0)
+                       *error = -EAGAIN;
+-- 
+2.20.1
+