From fabce5f86569335b54acd2867e8cfa77d278e6c8 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Mon, 4 Jan 2021 10:53:14 -0500 Subject: [PATCH] Fixes for 5.4 Signed-off-by: Sasha Levin --- ...he-full-allocated-memory-at-hw_param.patch | 52 +++ ...erity-work-if-i-o-error-when-system-.patch | 59 +++ ...id-race-condition-for-shrinker-count.patch | 236 ++++++++++ ...arn-if-mnt_count-has-become-negative.patch | 87 ++++ ...issing-destroy_workqueue-on-error-in.patch | 47 ++ ...ject-uevent-until-after-module-init-.patch | 72 +++ ...e_state_going-state-when-a-module-fa.patch | 36 ++ ...-layout-related-use-after-free-race-.patch | 131 ++++++ ...dd-missing-iounmap-on-error-in-mpic_.patch | 39 ++ ...ta-don-t-overflow-quota-file-offsets.patch | 66 +++ ...031-fix-resource-leak-in-pl031_probe.patch | 42 ++ ...6i-fix-memleak-in-sun6i_rtc_clk_init.patch | 65 +++ queue-5.4/series | 14 + ...sched-remove-bogus-boot-safety-check.patch | 49 ++ ...-submit-all-data-segments-atomically.patch | 434 ++++++++++++++++++ 15 files changed, 1429 insertions(+) create mode 100644 queue-5.4/alsa-pcm-clear-the-full-allocated-memory-at-hw_param.patch create mode 100644 queue-5.4/dm-verity-skip-verity-work-if-i-o-error-when-system-.patch create mode 100644 queue-5.4/f2fs-avoid-race-condition-for-shrinker-count.patch create mode 100644 queue-5.4/fs-namespace.c-warn-if-mnt_count-has-become-negative.patch create mode 100644 queue-5.4/i3c-master-fix-missing-destroy_workqueue-on-error-in.patch create mode 100644 queue-5.4/module-delay-kobject-uevent-until-after-module-init-.patch create mode 100644 queue-5.4/module-set-module_state_going-state-when-a-module-fa.patch create mode 100644 queue-5.4/nfsv4-fix-a-pnfs-layout-related-use-after-free-race-.patch create mode 100644 queue-5.4/powerpc-sysdev-add-missing-iounmap-on-error-in-mpic_.patch create mode 100644 queue-5.4/quota-don-t-overflow-quota-file-offsets.patch create mode 100644 queue-5.4/rtc-pl031-fix-resource-leak-in-pl031_probe.patch create mode 100644 queue-5.4/rtc-sun6i-fix-memleak-in-sun6i_rtc_clk_init.patch create mode 100644 queue-5.4/tick-sched-remove-bogus-boot-safety-check.patch create mode 100644 queue-5.4/um-ubd-submit-all-data-segments-atomically.patch diff --git a/queue-5.4/alsa-pcm-clear-the-full-allocated-memory-at-hw_param.patch b/queue-5.4/alsa-pcm-clear-the-full-allocated-memory-at-hw_param.patch new file mode 100644 index 00000000000..d91ce231b22 --- /dev/null +++ b/queue-5.4/alsa-pcm-clear-the-full-allocated-memory-at-hw_param.patch @@ -0,0 +1,52 @@ +From ba166a6f9c66e8727e70bc4aaa18684f8c69ae65 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 18 Dec 2020 15:56:25 +0100 +Subject: ALSA: pcm: Clear the full allocated memory at hw_params + +From: Takashi Iwai + +[ Upstream commit 618de0f4ef11acd8cf26902e65493d46cc20cc89 ] + +The PCM hw_params core function tries to clear up the PCM buffer +before actually using for avoiding the information leak from the +previous usages or the usage before a new allocation. It performs the +memset() with runtime->dma_bytes, but this might still leave some +remaining bytes untouched; namely, the PCM buffer size is aligned in +page size for mmap, hence runtime->dma_bytes doesn't necessarily cover +all PCM buffer pages, and the remaining bytes are exposed via mmap. + +This patch changes the memory clearance to cover the all buffer pages +if the stream is supposed to be mmap-ready (that guarantees that the +buffer size is aligned in page size). + +Reviewed-by: Lars-Peter Clausen +Link: https://lore.kernel.org/r/20201218145625.2045-3-tiwai@suse.de +Signed-off-by: Takashi Iwai +Signed-off-by: Sasha Levin +--- + sound/core/pcm_native.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c +index ec501fbaabe49..0c5b7a54ca81c 100644 +--- a/sound/core/pcm_native.c ++++ b/sound/core/pcm_native.c +@@ -717,8 +717,13 @@ static int snd_pcm_hw_params(struct snd_pcm_substream *substream, + runtime->boundary *= 2; + + /* clear the buffer for avoiding possible kernel info leaks */ +- if (runtime->dma_area && !substream->ops->copy_user) +- memset(runtime->dma_area, 0, runtime->dma_bytes); ++ if (runtime->dma_area && !substream->ops->copy_user) { ++ size_t size = runtime->dma_bytes; ++ ++ if (runtime->info & SNDRV_PCM_INFO_MMAP) ++ size = PAGE_ALIGN(size); ++ memset(runtime->dma_area, 0, size); ++ } + + snd_pcm_timer_resolution_change(substream); + snd_pcm_set_state(substream, SNDRV_PCM_STATE_SETUP); +-- +2.27.0 + diff --git a/queue-5.4/dm-verity-skip-verity-work-if-i-o-error-when-system-.patch b/queue-5.4/dm-verity-skip-verity-work-if-i-o-error-when-system-.patch new file mode 100644 index 00000000000..342aa4b8efb --- /dev/null +++ b/queue-5.4/dm-verity-skip-verity-work-if-i-o-error-when-system-.patch @@ -0,0 +1,59 @@ +From 94e1690d8252cd05b52d476b98c00daecde676ce Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 3 Dec 2020 09:46:59 +0900 +Subject: dm verity: skip verity work if I/O error when system is shutting down + +From: Hyeongseok Kim + +[ Upstream commit 252bd1256396cebc6fc3526127fdb0b317601318 ] + +If emergency system shutdown is called, like by thermal shutdown, +a dm device could be alive when the block device couldn't process +I/O requests anymore. In this state, the handling of I/O errors +by new dm I/O requests or by those already in-flight can lead to +a verity corruption state, which is a misjudgment. + +So, skip verity work in response to I/O error when system is shutting +down. + +Signed-off-by: Hyeongseok Kim +Reviewed-by: Sami Tolvanen +Signed-off-by: Mike Snitzer +Signed-off-by: Sasha Levin +--- + drivers/md/dm-verity-target.c | 12 +++++++++++- + 1 file changed, 11 insertions(+), 1 deletion(-) + +diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c +index 4fb33e7562c52..2aeb922e2365c 100644 +--- a/drivers/md/dm-verity-target.c ++++ b/drivers/md/dm-verity-target.c +@@ -533,6 +533,15 @@ static int verity_verify_io(struct dm_verity_io *io) + return 0; + } + ++/* ++ * Skip verity work in response to I/O error when system is shutting down. ++ */ ++static inline bool verity_is_system_shutting_down(void) ++{ ++ return system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF ++ || system_state == SYSTEM_RESTART; ++} ++ + /* + * End one "io" structure with a given error. + */ +@@ -560,7 +569,8 @@ static void verity_end_io(struct bio *bio) + { + struct dm_verity_io *io = bio->bi_private; + +- if (bio->bi_status && !verity_fec_is_enabled(io->v)) { ++ if (bio->bi_status && ++ (!verity_fec_is_enabled(io->v) || verity_is_system_shutting_down())) { + verity_finish_io(io, bio->bi_status); + return; + } +-- +2.27.0 + diff --git a/queue-5.4/f2fs-avoid-race-condition-for-shrinker-count.patch b/queue-5.4/f2fs-avoid-race-condition-for-shrinker-count.patch new file mode 100644 index 00000000000..55bacdad2b1 --- /dev/null +++ b/queue-5.4/f2fs-avoid-race-condition-for-shrinker-count.patch @@ -0,0 +1,236 @@ +From fe3750d3a733333f196ccdc053a644368b407f6f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 6 Nov 2020 13:22:05 -0800 +Subject: f2fs: avoid race condition for shrinker count + +From: Jaegeuk Kim + +[ Upstream commit a95ba66ac1457b76fe472c8e092ab1006271f16c ] + +Light reported sometimes shinker gets nat_cnt < dirty_nat_cnt resulting in +wrong do_shinker work. Let's avoid to return insane overflowed value by adding +single tracking value. + +Reported-by: Light Hsieh +Reviewed-by: Chao Yu +Signed-off-by: Jaegeuk Kim +Signed-off-by: Sasha Levin +--- + fs/f2fs/checkpoint.c | 2 +- + fs/f2fs/debug.c | 11 ++++++----- + fs/f2fs/f2fs.h | 10 ++++++++-- + fs/f2fs/node.c | 29 ++++++++++++++++++----------- + fs/f2fs/node.h | 4 ++-- + fs/f2fs/shrinker.c | 4 +--- + 6 files changed, 36 insertions(+), 24 deletions(-) + +diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c +index c966ccc44c157..a57219c51c01a 100644 +--- a/fs/f2fs/checkpoint.c ++++ b/fs/f2fs/checkpoint.c +@@ -1596,7 +1596,7 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) + goto out; + } + +- if (NM_I(sbi)->dirty_nat_cnt == 0 && ++ if (NM_I(sbi)->nat_cnt[DIRTY_NAT] == 0 && + SIT_I(sbi)->dirty_sentries == 0 && + prefree_segments(sbi) == 0) { + f2fs_flush_sit_entries(sbi, cpc); +diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c +index 9b0bedd82581b..d8d64447bc947 100644 +--- a/fs/f2fs/debug.c ++++ b/fs/f2fs/debug.c +@@ -107,8 +107,8 @@ static void update_general_status(struct f2fs_sb_info *sbi) + si->node_pages = NODE_MAPPING(sbi)->nrpages; + if (sbi->meta_inode) + si->meta_pages = META_MAPPING(sbi)->nrpages; +- si->nats = NM_I(sbi)->nat_cnt; +- si->dirty_nats = NM_I(sbi)->dirty_nat_cnt; ++ si->nats = NM_I(sbi)->nat_cnt[TOTAL_NAT]; ++ si->dirty_nats = NM_I(sbi)->nat_cnt[DIRTY_NAT]; + si->sits = MAIN_SEGS(sbi); + si->dirty_sits = SIT_I(sbi)->dirty_sentries; + si->free_nids = NM_I(sbi)->nid_cnt[FREE_NID]; +@@ -254,9 +254,10 @@ static void update_mem_info(struct f2fs_sb_info *sbi) + si->cache_mem += (NM_I(sbi)->nid_cnt[FREE_NID] + + NM_I(sbi)->nid_cnt[PREALLOC_NID]) * + sizeof(struct free_nid); +- si->cache_mem += NM_I(sbi)->nat_cnt * sizeof(struct nat_entry); +- si->cache_mem += NM_I(sbi)->dirty_nat_cnt * +- sizeof(struct nat_entry_set); ++ si->cache_mem += NM_I(sbi)->nat_cnt[TOTAL_NAT] * ++ sizeof(struct nat_entry); ++ si->cache_mem += NM_I(sbi)->nat_cnt[DIRTY_NAT] * ++ sizeof(struct nat_entry_set); + si->cache_mem += si->inmem_pages * sizeof(struct inmem_pages); + for (i = 0; i < MAX_INO_ENTRY; i++) + si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry); +diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h +index 0ddc4a74b9d43..4ca3c2a0a0f5b 100644 +--- a/fs/f2fs/f2fs.h ++++ b/fs/f2fs/f2fs.h +@@ -797,6 +797,13 @@ enum nid_state { + MAX_NID_STATE, + }; + ++enum nat_state { ++ TOTAL_NAT, ++ DIRTY_NAT, ++ RECLAIMABLE_NAT, ++ MAX_NAT_STATE, ++}; ++ + struct f2fs_nm_info { + block_t nat_blkaddr; /* base disk address of NAT */ + nid_t max_nid; /* maximum possible node ids */ +@@ -812,8 +819,7 @@ struct f2fs_nm_info { + struct rw_semaphore nat_tree_lock; /* protect nat_tree_lock */ + struct list_head nat_entries; /* cached nat entry list (clean) */ + spinlock_t nat_list_lock; /* protect clean nat entry list */ +- unsigned int nat_cnt; /* the # of cached nat entries */ +- unsigned int dirty_nat_cnt; /* total num of nat entries in set */ ++ unsigned int nat_cnt[MAX_NAT_STATE]; /* the # of cached nat entries */ + unsigned int nat_blocks; /* # of nat blocks */ + + /* free node ids management */ +diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c +index 3ac2a4b32375d..7ce33698ae381 100644 +--- a/fs/f2fs/node.c ++++ b/fs/f2fs/node.c +@@ -62,8 +62,8 @@ bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type) + sizeof(struct free_nid)) >> PAGE_SHIFT; + res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2); + } else if (type == NAT_ENTRIES) { +- mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >> +- PAGE_SHIFT; ++ mem_size = (nm_i->nat_cnt[TOTAL_NAT] * ++ sizeof(struct nat_entry)) >> PAGE_SHIFT; + res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2); + if (excess_cached_nats(sbi)) + res = false; +@@ -177,7 +177,8 @@ static struct nat_entry *__init_nat_entry(struct f2fs_nm_info *nm_i, + list_add_tail(&ne->list, &nm_i->nat_entries); + spin_unlock(&nm_i->nat_list_lock); + +- nm_i->nat_cnt++; ++ nm_i->nat_cnt[TOTAL_NAT]++; ++ nm_i->nat_cnt[RECLAIMABLE_NAT]++; + return ne; + } + +@@ -207,7 +208,8 @@ static unsigned int __gang_lookup_nat_cache(struct f2fs_nm_info *nm_i, + static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e) + { + radix_tree_delete(&nm_i->nat_root, nat_get_nid(e)); +- nm_i->nat_cnt--; ++ nm_i->nat_cnt[TOTAL_NAT]--; ++ nm_i->nat_cnt[RECLAIMABLE_NAT]--; + __free_nat_entry(e); + } + +@@ -253,7 +255,8 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, + if (get_nat_flag(ne, IS_DIRTY)) + goto refresh_list; + +- nm_i->dirty_nat_cnt++; ++ nm_i->nat_cnt[DIRTY_NAT]++; ++ nm_i->nat_cnt[RECLAIMABLE_NAT]--; + set_nat_flag(ne, IS_DIRTY, true); + refresh_list: + spin_lock(&nm_i->nat_list_lock); +@@ -273,7 +276,8 @@ static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i, + + set_nat_flag(ne, IS_DIRTY, false); + set->entry_cnt--; +- nm_i->dirty_nat_cnt--; ++ nm_i->nat_cnt[DIRTY_NAT]--; ++ nm_i->nat_cnt[RECLAIMABLE_NAT]++; + } + + static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i, +@@ -2881,14 +2885,17 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) + LIST_HEAD(sets); + int err = 0; + +- /* during unmount, let's flush nat_bits before checking dirty_nat_cnt */ ++ /* ++ * during unmount, let's flush nat_bits before checking ++ * nat_cnt[DIRTY_NAT]. ++ */ + if (enabled_nat_bits(sbi, cpc)) { + down_write(&nm_i->nat_tree_lock); + remove_nats_in_journal(sbi); + up_write(&nm_i->nat_tree_lock); + } + +- if (!nm_i->dirty_nat_cnt) ++ if (!nm_i->nat_cnt[DIRTY_NAT]) + return 0; + + down_write(&nm_i->nat_tree_lock); +@@ -2899,7 +2906,8 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) + * into nat entry set. + */ + if (enabled_nat_bits(sbi, cpc) || +- !__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL)) ++ !__has_cursum_space(journal, ++ nm_i->nat_cnt[DIRTY_NAT], NAT_JOURNAL)) + remove_nats_in_journal(sbi); + + while ((found = __gang_lookup_nat_set(nm_i, +@@ -3023,7 +3031,6 @@ static int init_node_manager(struct f2fs_sb_info *sbi) + F2FS_RESERVED_NODE_NUM; + nm_i->nid_cnt[FREE_NID] = 0; + nm_i->nid_cnt[PREALLOC_NID] = 0; +- nm_i->nat_cnt = 0; + nm_i->ram_thresh = DEF_RAM_THRESHOLD; + nm_i->ra_nid_pages = DEF_RA_NID_PAGES; + nm_i->dirty_nats_ratio = DEF_DIRTY_NAT_RATIO_THRESHOLD; +@@ -3160,7 +3167,7 @@ void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi) + __del_from_nat_cache(nm_i, natvec[idx]); + } + } +- f2fs_bug_on(sbi, nm_i->nat_cnt); ++ f2fs_bug_on(sbi, nm_i->nat_cnt[TOTAL_NAT]); + + /* destroy nat set cache */ + nid = 0; +diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h +index e05af5df56485..4a2e7eaf2b028 100644 +--- a/fs/f2fs/node.h ++++ b/fs/f2fs/node.h +@@ -123,13 +123,13 @@ static inline void raw_nat_from_node_info(struct f2fs_nat_entry *raw_ne, + + static inline bool excess_dirty_nats(struct f2fs_sb_info *sbi) + { +- return NM_I(sbi)->dirty_nat_cnt >= NM_I(sbi)->max_nid * ++ return NM_I(sbi)->nat_cnt[DIRTY_NAT] >= NM_I(sbi)->max_nid * + NM_I(sbi)->dirty_nats_ratio / 100; + } + + static inline bool excess_cached_nats(struct f2fs_sb_info *sbi) + { +- return NM_I(sbi)->nat_cnt >= DEF_NAT_CACHE_THRESHOLD; ++ return NM_I(sbi)->nat_cnt[TOTAL_NAT] >= DEF_NAT_CACHE_THRESHOLD; + } + + static inline bool excess_dirty_nodes(struct f2fs_sb_info *sbi) +diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c +index a467aca29cfef..3ceebaaee3840 100644 +--- a/fs/f2fs/shrinker.c ++++ b/fs/f2fs/shrinker.c +@@ -18,9 +18,7 @@ static unsigned int shrinker_run_no; + + static unsigned long __count_nat_entries(struct f2fs_sb_info *sbi) + { +- long count = NM_I(sbi)->nat_cnt - NM_I(sbi)->dirty_nat_cnt; +- +- return count > 0 ? count : 0; ++ return NM_I(sbi)->nat_cnt[RECLAIMABLE_NAT]; + } + + static unsigned long __count_free_nids(struct f2fs_sb_info *sbi) +-- +2.27.0 + diff --git a/queue-5.4/fs-namespace.c-warn-if-mnt_count-has-become-negative.patch b/queue-5.4/fs-namespace.c-warn-if-mnt_count-has-become-negative.patch new file mode 100644 index 00000000000..643b791cec3 --- /dev/null +++ b/queue-5.4/fs-namespace.c-warn-if-mnt_count-has-become-negative.patch @@ -0,0 +1,87 @@ +From 3f03fcb4296348f5d4648c7206f20217ed0c03da Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 31 Oct 2020 21:40:21 -0700 +Subject: fs/namespace.c: WARN if mnt_count has become negative + +From: Eric Biggers + +[ Upstream commit edf7ddbf1c5eb98b720b063b73e20e8a4a1ce673 ] + +Missing calls to mntget() (or equivalently, too many calls to mntput()) +are hard to detect because mntput() delays freeing mounts using +task_work_add(), then again using call_rcu(). As a result, mnt_count +can often be decremented to -1 without getting a KASAN use-after-free +report. Such cases are still bugs though, and they point to real +use-after-frees being possible. + +For an example of this, see the bug fixed by commit 1b0b9cc8d379 +("vfs: fsmount: add missing mntget()"), discussed at +https://lkml.kernel.org/linux-fsdevel/20190605135401.GB30925@xxxxxxxxxxxxxxxxxxxxxxxxx/T/#u. +This bug *should* have been trivial to find. But actually, it wasn't +found until syzkaller happened to use fchdir() to manipulate the +reference count just right for the bug to be noticeable. + +Address this by making mntput_no_expire() issue a WARN if mnt_count has +become negative. + +Suggested-by: Miklos Szeredi +Signed-off-by: Eric Biggers +Signed-off-by: Al Viro +Signed-off-by: Sasha Levin +--- + fs/namespace.c | 9 ++++++--- + fs/pnode.h | 2 +- + 2 files changed, 7 insertions(+), 4 deletions(-) + +diff --git a/fs/namespace.c b/fs/namespace.c +index 2adfe7b166a3e..76ea92994d26d 100644 +--- a/fs/namespace.c ++++ b/fs/namespace.c +@@ -156,10 +156,10 @@ static inline void mnt_add_count(struct mount *mnt, int n) + /* + * vfsmount lock must be held for write + */ +-unsigned int mnt_get_count(struct mount *mnt) ++int mnt_get_count(struct mount *mnt) + { + #ifdef CONFIG_SMP +- unsigned int count = 0; ++ int count = 0; + int cpu; + + for_each_possible_cpu(cpu) { +@@ -1123,6 +1123,7 @@ static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput); + static void mntput_no_expire(struct mount *mnt) + { + LIST_HEAD(list); ++ int count; + + rcu_read_lock(); + if (likely(READ_ONCE(mnt->mnt_ns))) { +@@ -1146,7 +1147,9 @@ static void mntput_no_expire(struct mount *mnt) + */ + smp_mb(); + mnt_add_count(mnt, -1); +- if (mnt_get_count(mnt)) { ++ count = mnt_get_count(mnt); ++ if (count != 0) { ++ WARN_ON(count < 0); + rcu_read_unlock(); + unlock_mount_hash(); + return; +diff --git a/fs/pnode.h b/fs/pnode.h +index 49a058c73e4c7..26f74e092bd98 100644 +--- a/fs/pnode.h ++++ b/fs/pnode.h +@@ -44,7 +44,7 @@ int propagate_mount_busy(struct mount *, int); + void propagate_mount_unlock(struct mount *); + void mnt_release_group_id(struct mount *); + int get_dominating_id(struct mount *mnt, const struct path *root); +-unsigned int mnt_get_count(struct mount *mnt); ++int mnt_get_count(struct mount *mnt); + void mnt_set_mountpoint(struct mount *, struct mountpoint *, + struct mount *); + void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, +-- +2.27.0 + diff --git a/queue-5.4/i3c-master-fix-missing-destroy_workqueue-on-error-in.patch b/queue-5.4/i3c-master-fix-missing-destroy_workqueue-on-error-in.patch new file mode 100644 index 00000000000..a640de911bb --- /dev/null +++ b/queue-5.4/i3c-master-fix-missing-destroy_workqueue-on-error-in.patch @@ -0,0 +1,47 @@ +From fef746ab879b9e123042abf4d72e43a26e7f6bf9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 28 Oct 2020 17:15:43 +0800 +Subject: i3c master: fix missing destroy_workqueue() on error in + i3c_master_register + +From: Qinglang Miao + +[ Upstream commit 59165d16c699182b86b5c65181013f1fd88feb62 ] + +Add the missing destroy_workqueue() before return from +i3c_master_register in the error handling case. + +Signed-off-by: Qinglang Miao +Signed-off-by: Boris Brezillon +Link: https://lore.kernel.org/linux-i3c/20201028091543.136167-1-miaoqinglang@huawei.com +Signed-off-by: Sasha Levin +--- + drivers/i3c/master.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/drivers/i3c/master.c b/drivers/i3c/master.c +index 6cc71c90f85ea..19337aed9f235 100644 +--- a/drivers/i3c/master.c ++++ b/drivers/i3c/master.c +@@ -2492,7 +2492,7 @@ int i3c_master_register(struct i3c_master_controller *master, + + ret = i3c_master_bus_init(master); + if (ret) +- goto err_put_dev; ++ goto err_destroy_wq; + + ret = device_add(&master->dev); + if (ret) +@@ -2523,6 +2523,9 @@ int i3c_master_register(struct i3c_master_controller *master, + err_cleanup_bus: + i3c_master_bus_cleanup(master); + ++err_destroy_wq: ++ destroy_workqueue(master->wq); ++ + err_put_dev: + put_device(&master->dev); + +-- +2.27.0 + diff --git a/queue-5.4/module-delay-kobject-uevent-until-after-module-init-.patch b/queue-5.4/module-delay-kobject-uevent-until-after-module-init-.patch new file mode 100644 index 00000000000..0d0bd31cffb --- /dev/null +++ b/queue-5.4/module-delay-kobject-uevent-until-after-module-init-.patch @@ -0,0 +1,72 @@ +From 97ab2a6e4b692ca101ecbb8508ac3ee4c2e8bfcd Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 27 Nov 2020 10:09:39 +0100 +Subject: module: delay kobject uevent until after module init call + +From: Jessica Yu + +[ Upstream commit 38dc717e97153e46375ee21797aa54777e5498f3 ] + +Apparently there has been a longstanding race between udev/systemd and +the module loader. Currently, the module loader sends a uevent right +after sysfs initialization, but before the module calls its init +function. However, some udev rules expect that the module has +initialized already upon receiving the uevent. + +This race has been triggered recently (see link in references) in some +systemd mount unit files. For instance, the configfs module creates the +/sys/kernel/config mount point in its init function, however the module +loader issues the uevent before this happens. sys-kernel-config.mount +expects to be able to mount /sys/kernel/config upon receipt of the +module loading uevent, but if the configfs module has not called its +init function yet, then this directory will not exist and the mount unit +fails. A similar situation exists for sys-fs-fuse-connections.mount, as +the fuse sysfs mount point is created during the fuse module's init +function. If udev is faster than module initialization then the mount +unit would fail in a similar fashion. + +To fix this race, delay the module KOBJ_ADD uevent until after the +module has finished calling its init routine. + +References: https://github.com/systemd/systemd/issues/17586 +Reviewed-by: Greg Kroah-Hartman +Tested-By: Nicolas Morey-Chaisemartin +Signed-off-by: Jessica Yu +Signed-off-by: Sasha Levin +--- + kernel/module.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/kernel/module.c b/kernel/module.c +index 806a7196754a7..9e9af40698ffe 100644 +--- a/kernel/module.c ++++ b/kernel/module.c +@@ -1863,7 +1863,6 @@ static int mod_sysfs_init(struct module *mod) + if (err) + mod_kobject_put(mod); + +- /* delay uevent until full sysfs population */ + out: + return err; + } +@@ -1900,7 +1899,6 @@ static int mod_sysfs_setup(struct module *mod, + add_sect_attrs(mod, info); + add_notes_attrs(mod, info); + +- kobject_uevent(&mod->mkobj.kobj, KOBJ_ADD); + return 0; + + out_unreg_modinfo_attrs: +@@ -3608,6 +3606,9 @@ static noinline int do_init_module(struct module *mod) + blocking_notifier_call_chain(&module_notify_list, + MODULE_STATE_LIVE, mod); + ++ /* Delay uevent until module has finished its init routine */ ++ kobject_uevent(&mod->mkobj.kobj, KOBJ_ADD); ++ + /* + * We need to finish all async code before the module init sequence + * is done. This has potential to deadlock. For example, a newly +-- +2.27.0 + diff --git a/queue-5.4/module-set-module_state_going-state-when-a-module-fa.patch b/queue-5.4/module-set-module_state_going-state-when-a-module-fa.patch new file mode 100644 index 00000000000..eefe929f0c1 --- /dev/null +++ b/queue-5.4/module-set-module_state_going-state-when-a-module-fa.patch @@ -0,0 +1,36 @@ +From b3c139ab7a42ab22e90e8c73324eba6dd77f4c01 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 27 Oct 2020 15:03:36 +0100 +Subject: module: set MODULE_STATE_GOING state when a module fails to load + +From: Miroslav Benes + +[ Upstream commit 5e8ed280dab9eeabc1ba0b2db5dbe9fe6debb6b5 ] + +If a module fails to load due to an error in prepare_coming_module(), +the following error handling in load_module() runs with +MODULE_STATE_COMING in module's state. Fix it by correctly setting +MODULE_STATE_GOING under "bug_cleanup" label. + +Signed-off-by: Miroslav Benes +Signed-off-by: Jessica Yu +Signed-off-by: Sasha Levin +--- + kernel/module.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/kernel/module.c b/kernel/module.c +index 45513909b01d5..806a7196754a7 100644 +--- a/kernel/module.c ++++ b/kernel/module.c +@@ -3953,6 +3953,7 @@ static int load_module(struct load_info *info, const char __user *uargs, + MODULE_STATE_GOING, mod); + klp_module_going(mod); + bug_cleanup: ++ mod->state = MODULE_STATE_GOING; + /* module_bug_cleanup needs module_mutex protection */ + mutex_lock(&module_mutex); + module_bug_cleanup(mod); +-- +2.27.0 + diff --git a/queue-5.4/nfsv4-fix-a-pnfs-layout-related-use-after-free-race-.patch b/queue-5.4/nfsv4-fix-a-pnfs-layout-related-use-after-free-race-.patch new file mode 100644 index 00000000000..db90896cffe --- /dev/null +++ b/queue-5.4/nfsv4-fix-a-pnfs-layout-related-use-after-free-race-.patch @@ -0,0 +1,131 @@ +From 44695bd003de671d21b516200d99f4801cd7854f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 25 Nov 2020 12:06:14 -0500 +Subject: NFSv4: Fix a pNFS layout related use-after-free race when freeing the + inode + +From: Trond Myklebust + +[ Upstream commit b6d49ecd1081740b6e632366428b960461f8158b ] + +When returning the layout in nfs4_evict_inode(), we need to ensure that +the layout is actually done being freed before we can proceed to free the +inode itself. + +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/nfs4super.c | 2 +- + fs/nfs/pnfs.c | 33 +++++++++++++++++++++++++++++++-- + fs/nfs/pnfs.h | 5 +++++ + 3 files changed, 37 insertions(+), 3 deletions(-) + +diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c +index 04c57066a11af..b90642b022eb9 100644 +--- a/fs/nfs/nfs4super.c ++++ b/fs/nfs/nfs4super.c +@@ -96,7 +96,7 @@ static void nfs4_evict_inode(struct inode *inode) + nfs_inode_return_delegation_noreclaim(inode); + /* Note that above delegreturn would trigger pnfs return-on-close */ + pnfs_return_layout(inode); +- pnfs_destroy_layout(NFS_I(inode)); ++ pnfs_destroy_layout_final(NFS_I(inode)); + /* First call standard NFS clear_inode() code */ + nfs_clear_inode(inode); + } +diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c +index 9c2b07ce57b27..9fd115c4d0a2f 100644 +--- a/fs/nfs/pnfs.c ++++ b/fs/nfs/pnfs.c +@@ -294,6 +294,7 @@ void + pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo) + { + struct inode *inode; ++ unsigned long i_state; + + if (!lo) + return; +@@ -304,8 +305,12 @@ pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo) + if (!list_empty(&lo->plh_segs)) + WARN_ONCE(1, "NFS: BUG unfreed layout segments.\n"); + pnfs_detach_layout_hdr(lo); ++ i_state = inode->i_state; + spin_unlock(&inode->i_lock); + pnfs_free_layout_hdr(lo); ++ /* Notify pnfs_destroy_layout_final() that we're done */ ++ if (i_state & (I_FREEING | I_CLEAR)) ++ wake_up_var(lo); + } + } + +@@ -723,8 +728,7 @@ pnfs_free_lseg_list(struct list_head *free_me) + } + } + +-void +-pnfs_destroy_layout(struct nfs_inode *nfsi) ++static struct pnfs_layout_hdr *__pnfs_destroy_layout(struct nfs_inode *nfsi) + { + struct pnfs_layout_hdr *lo; + LIST_HEAD(tmp_list); +@@ -742,9 +746,34 @@ pnfs_destroy_layout(struct nfs_inode *nfsi) + pnfs_put_layout_hdr(lo); + } else + spin_unlock(&nfsi->vfs_inode.i_lock); ++ return lo; ++} ++ ++void pnfs_destroy_layout(struct nfs_inode *nfsi) ++{ ++ __pnfs_destroy_layout(nfsi); + } + EXPORT_SYMBOL_GPL(pnfs_destroy_layout); + ++static bool pnfs_layout_removed(struct nfs_inode *nfsi, ++ struct pnfs_layout_hdr *lo) ++{ ++ bool ret; ++ ++ spin_lock(&nfsi->vfs_inode.i_lock); ++ ret = nfsi->layout != lo; ++ spin_unlock(&nfsi->vfs_inode.i_lock); ++ return ret; ++} ++ ++void pnfs_destroy_layout_final(struct nfs_inode *nfsi) ++{ ++ struct pnfs_layout_hdr *lo = __pnfs_destroy_layout(nfsi); ++ ++ if (lo) ++ wait_var_event(lo, pnfs_layout_removed(nfsi, lo)); ++} ++ + static bool + pnfs_layout_add_bulk_destroy_list(struct inode *inode, + struct list_head *layout_list) +diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h +index f8a38065c7e47..63da33a92d831 100644 +--- a/fs/nfs/pnfs.h ++++ b/fs/nfs/pnfs.h +@@ -255,6 +255,7 @@ struct pnfs_layout_segment *pnfs_layout_process(struct nfs4_layoutget *lgp); + void pnfs_layoutget_free(struct nfs4_layoutget *lgp); + void pnfs_free_lseg_list(struct list_head *tmp_list); + void pnfs_destroy_layout(struct nfs_inode *); ++void pnfs_destroy_layout_final(struct nfs_inode *); + void pnfs_destroy_all_layouts(struct nfs_client *); + int pnfs_destroy_layouts_byfsid(struct nfs_client *clp, + struct nfs_fsid *fsid, +@@ -651,6 +652,10 @@ static inline void pnfs_destroy_layout(struct nfs_inode *nfsi) + { + } + ++static inline void pnfs_destroy_layout_final(struct nfs_inode *nfsi) ++{ ++} ++ + static inline struct pnfs_layout_segment * + pnfs_get_lseg(struct pnfs_layout_segment *lseg) + { +-- +2.27.0 + diff --git a/queue-5.4/powerpc-sysdev-add-missing-iounmap-on-error-in-mpic_.patch b/queue-5.4/powerpc-sysdev-add-missing-iounmap-on-error-in-mpic_.patch new file mode 100644 index 00000000000..787de6ee08e --- /dev/null +++ b/queue-5.4/powerpc-sysdev-add-missing-iounmap-on-error-in-mpic_.patch @@ -0,0 +1,39 @@ +From 340830345af52e3f7d391c9daae17225600e3aec Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 28 Oct 2020 17:15:51 +0800 +Subject: powerpc: sysdev: add missing iounmap() on error in mpic_msgr_probe() + +From: Qinglang Miao + +[ Upstream commit ffa1797040c5da391859a9556be7b735acbe1242 ] + +I noticed that iounmap() of msgr_block_addr before return from +mpic_msgr_probe() in the error handling case is missing. So use +devm_ioremap() instead of just ioremap() when remapping the message +register block, so the mapping will be automatically released on +probe failure. + +Signed-off-by: Qinglang Miao +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/20201028091551.136400-1-miaoqinglang@huawei.com +Signed-off-by: Sasha Levin +--- + arch/powerpc/sysdev/mpic_msgr.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/powerpc/sysdev/mpic_msgr.c b/arch/powerpc/sysdev/mpic_msgr.c +index f6b253e2be409..36ec0bdd8b63c 100644 +--- a/arch/powerpc/sysdev/mpic_msgr.c ++++ b/arch/powerpc/sysdev/mpic_msgr.c +@@ -191,7 +191,7 @@ static int mpic_msgr_probe(struct platform_device *dev) + + /* IO map the message register block. */ + of_address_to_resource(np, 0, &rsrc); +- msgr_block_addr = ioremap(rsrc.start, resource_size(&rsrc)); ++ msgr_block_addr = devm_ioremap(&dev->dev, rsrc.start, resource_size(&rsrc)); + if (!msgr_block_addr) { + dev_err(&dev->dev, "Failed to iomap MPIC message registers"); + return -EFAULT; +-- +2.27.0 + diff --git a/queue-5.4/quota-don-t-overflow-quota-file-offsets.patch b/queue-5.4/quota-don-t-overflow-quota-file-offsets.patch new file mode 100644 index 00000000000..9855dc477f9 --- /dev/null +++ b/queue-5.4/quota-don-t-overflow-quota-file-offsets.patch @@ -0,0 +1,66 @@ +From 2f3a8ca680aca1db92199c743ab0e607d2c77c59 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 2 Nov 2020 16:32:10 +0100 +Subject: quota: Don't overflow quota file offsets + +From: Jan Kara + +[ Upstream commit 10f04d40a9fa29785206c619f80d8beedb778837 ] + +The on-disk quota format supports quota files with upto 2^32 blocks. Be +careful when computing quota file offsets in the quota files from block +numbers as they can overflow 32-bit types. Since quota files larger than +4GB would require ~26 millions of quota users, this is mostly a +theoretical concern now but better be careful, fuzzers would find the +problem sooner or later anyway... + +Reviewed-by: Andreas Dilger +Signed-off-by: Jan Kara +Signed-off-by: Sasha Levin +--- + fs/quota/quota_tree.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c +index a6f856f341dc7..c5562c871c8be 100644 +--- a/fs/quota/quota_tree.c ++++ b/fs/quota/quota_tree.c +@@ -62,7 +62,7 @@ static ssize_t read_blk(struct qtree_mem_dqinfo *info, uint blk, char *buf) + + memset(buf, 0, info->dqi_usable_bs); + return sb->s_op->quota_read(sb, info->dqi_type, buf, +- info->dqi_usable_bs, blk << info->dqi_blocksize_bits); ++ info->dqi_usable_bs, (loff_t)blk << info->dqi_blocksize_bits); + } + + static ssize_t write_blk(struct qtree_mem_dqinfo *info, uint blk, char *buf) +@@ -71,7 +71,7 @@ static ssize_t write_blk(struct qtree_mem_dqinfo *info, uint blk, char *buf) + ssize_t ret; + + ret = sb->s_op->quota_write(sb, info->dqi_type, buf, +- info->dqi_usable_bs, blk << info->dqi_blocksize_bits); ++ info->dqi_usable_bs, (loff_t)blk << info->dqi_blocksize_bits); + if (ret != info->dqi_usable_bs) { + quota_error(sb, "dquota write failed"); + if (ret >= 0) +@@ -284,7 +284,7 @@ static uint find_free_dqentry(struct qtree_mem_dqinfo *info, + blk); + goto out_buf; + } +- dquot->dq_off = (blk << info->dqi_blocksize_bits) + ++ dquot->dq_off = ((loff_t)blk << info->dqi_blocksize_bits) + + sizeof(struct qt_disk_dqdbheader) + + i * info->dqi_entry_size; + kfree(buf); +@@ -559,7 +559,7 @@ static loff_t find_block_dqentry(struct qtree_mem_dqinfo *info, + ret = -EIO; + goto out_buf; + } else { +- ret = (blk << info->dqi_blocksize_bits) + sizeof(struct ++ ret = ((loff_t)blk << info->dqi_blocksize_bits) + sizeof(struct + qt_disk_dqdbheader) + i * info->dqi_entry_size; + } + out_buf: +-- +2.27.0 + diff --git a/queue-5.4/rtc-pl031-fix-resource-leak-in-pl031_probe.patch b/queue-5.4/rtc-pl031-fix-resource-leak-in-pl031_probe.patch new file mode 100644 index 00000000000..d100d065077 --- /dev/null +++ b/queue-5.4/rtc-pl031-fix-resource-leak-in-pl031_probe.patch @@ -0,0 +1,42 @@ +From fea427d7471b4b666521a64a484698fe42b04204 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 12 Nov 2020 17:31:39 +0800 +Subject: rtc: pl031: fix resource leak in pl031_probe + +From: Zheng Liang + +[ Upstream commit 1eab0fea2514b269e384c117f5b5772b882761f0 ] + +When devm_rtc_allocate_device is failed in pl031_probe, it should release +mem regions with device. + +Reported-by: Hulk Robot +Signed-off-by: Zheng Liang +Signed-off-by: Alexandre Belloni +Acked-by: Linus Walleij +Link: https://lore.kernel.org/r/20201112093139.32566-1-zhengliang6@huawei.com +Signed-off-by: Sasha Levin +--- + drivers/rtc/rtc-pl031.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/drivers/rtc/rtc-pl031.c b/drivers/rtc/rtc-pl031.c +index 180caebbd3552..9566958476dfc 100644 +--- a/drivers/rtc/rtc-pl031.c ++++ b/drivers/rtc/rtc-pl031.c +@@ -379,8 +379,10 @@ static int pl031_probe(struct amba_device *adev, const struct amba_id *id) + + device_init_wakeup(&adev->dev, true); + ldata->rtc = devm_rtc_allocate_device(&adev->dev); +- if (IS_ERR(ldata->rtc)) +- return PTR_ERR(ldata->rtc); ++ if (IS_ERR(ldata->rtc)) { ++ ret = PTR_ERR(ldata->rtc); ++ goto out; ++ } + + ldata->rtc->ops = ops; + +-- +2.27.0 + diff --git a/queue-5.4/rtc-sun6i-fix-memleak-in-sun6i_rtc_clk_init.patch b/queue-5.4/rtc-sun6i-fix-memleak-in-sun6i_rtc_clk_init.patch new file mode 100644 index 00000000000..f4ae24fa8e7 --- /dev/null +++ b/queue-5.4/rtc-sun6i-fix-memleak-in-sun6i_rtc_clk_init.patch @@ -0,0 +1,65 @@ +From 8555f3a228677127b174a6d7f37bafca0b8551ed Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 20 Oct 2020 14:12:26 +0800 +Subject: rtc: sun6i: Fix memleak in sun6i_rtc_clk_init + +From: Dinghao Liu + +[ Upstream commit 28d211919e422f58c1e6c900e5810eee4f1ce4c8 ] + +When clk_hw_register_fixed_rate_with_accuracy() fails, +clk_data should be freed. It's the same for the subsequent +two error paths, but we should also unregister the already +registered clocks in them. + +Signed-off-by: Dinghao Liu +Signed-off-by: Alexandre Belloni +Link: https://lore.kernel.org/r/20201020061226.6572-1-dinghao.liu@zju.edu.cn +Signed-off-by: Sasha Levin +--- + drivers/rtc/rtc-sun6i.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +diff --git a/drivers/rtc/rtc-sun6i.c b/drivers/rtc/rtc-sun6i.c +index fc32be687606c..c41bc8084d7cc 100644 +--- a/drivers/rtc/rtc-sun6i.c ++++ b/drivers/rtc/rtc-sun6i.c +@@ -276,7 +276,7 @@ static void __init sun6i_rtc_clk_init(struct device_node *node, + 300000000); + if (IS_ERR(rtc->int_osc)) { + pr_crit("Couldn't register the internal oscillator\n"); +- return; ++ goto err; + } + + parents[0] = clk_hw_get_name(rtc->int_osc); +@@ -292,7 +292,7 @@ static void __init sun6i_rtc_clk_init(struct device_node *node, + rtc->losc = clk_register(NULL, &rtc->hw); + if (IS_ERR(rtc->losc)) { + pr_crit("Couldn't register the LOSC clock\n"); +- return; ++ goto err_register; + } + + of_property_read_string_index(node, "clock-output-names", 1, +@@ -303,7 +303,7 @@ static void __init sun6i_rtc_clk_init(struct device_node *node, + &rtc->lock); + if (IS_ERR(rtc->ext_losc)) { + pr_crit("Couldn't register the LOSC external gate\n"); +- return; ++ goto err_register; + } + + clk_data->num = 2; +@@ -316,6 +316,8 @@ static void __init sun6i_rtc_clk_init(struct device_node *node, + of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data); + return; + ++err_register: ++ clk_hw_unregister_fixed_rate(rtc->int_osc); + err: + kfree(clk_data); + } +-- +2.27.0 + diff --git a/queue-5.4/series b/queue-5.4/series index b51748ded65..c284d27a6ce 100644 --- a/queue-5.4/series +++ b/queue-5.4/series @@ -31,3 +31,17 @@ alsa-seq-use-bool-for-snd_seq_queue-internal-flags.patch alsa-rawmidi-access-runtime-avail-always-in-spinlock.patch bfs-don-t-use-warning-string-when-it-s-just-info.patch fcntl-fix-potential-deadlock-in-send_sig-io-urg.patch +rtc-sun6i-fix-memleak-in-sun6i_rtc_clk_init.patch +module-set-module_state_going-state-when-a-module-fa.patch +quota-don-t-overflow-quota-file-offsets.patch +rtc-pl031-fix-resource-leak-in-pl031_probe.patch +powerpc-sysdev-add-missing-iounmap-on-error-in-mpic_.patch +i3c-master-fix-missing-destroy_workqueue-on-error-in.patch +nfsv4-fix-a-pnfs-layout-related-use-after-free-race-.patch +f2fs-avoid-race-condition-for-shrinker-count.patch +module-delay-kobject-uevent-until-after-module-init-.patch +fs-namespace.c-warn-if-mnt_count-has-become-negative.patch +um-ubd-submit-all-data-segments-atomically.patch +tick-sched-remove-bogus-boot-safety-check.patch +alsa-pcm-clear-the-full-allocated-memory-at-hw_param.patch +dm-verity-skip-verity-work-if-i-o-error-when-system-.patch diff --git a/queue-5.4/tick-sched-remove-bogus-boot-safety-check.patch b/queue-5.4/tick-sched-remove-bogus-boot-safety-check.patch new file mode 100644 index 00000000000..7259c9155e8 --- /dev/null +++ b/queue-5.4/tick-sched-remove-bogus-boot-safety-check.patch @@ -0,0 +1,49 @@ +From 169d1c17ff6a8d83c331db7d57006e7bf2ede98e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 6 Dec 2020 22:12:55 +0100 +Subject: tick/sched: Remove bogus boot "safety" check + +From: Thomas Gleixner + +[ Upstream commit ba8ea8e7dd6e1662e34e730eadfc52aa6816f9dd ] + +can_stop_idle_tick() checks whether the do_timer() duty has been taken over +by a CPU on boot. That's silly because the boot CPU always takes over with +the initial clockevent device. + +But even if no CPU would have installed a clockevent and taken over the +duty then the question whether the tick on the current CPU can be stopped +or not is moot. In that case the current CPU would have no clockevent +either, so there would be nothing to keep ticking. + +Remove it. + +Signed-off-by: Thomas Gleixner +Acked-by: Frederic Weisbecker +Link: https://lore.kernel.org/r/20201206212002.725238293@linutronix.de +Signed-off-by: Sasha Levin +--- + kernel/time/tick-sched.c | 7 ------- + 1 file changed, 7 deletions(-) + +diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c +index 5c9fcc72460df..4419486d7413c 100644 +--- a/kernel/time/tick-sched.c ++++ b/kernel/time/tick-sched.c +@@ -916,13 +916,6 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) + */ + if (tick_do_timer_cpu == cpu) + return false; +- /* +- * Boot safety: make sure the timekeeping duty has been +- * assigned before entering dyntick-idle mode, +- * tick_do_timer_cpu is TICK_DO_TIMER_BOOT +- */ +- if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_BOOT)) +- return false; + + /* Should not happen for nohz-full */ + if (WARN_ON_ONCE(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) +-- +2.27.0 + diff --git a/queue-5.4/um-ubd-submit-all-data-segments-atomically.patch b/queue-5.4/um-ubd-submit-all-data-segments-atomically.patch new file mode 100644 index 00000000000..f10ffc6ac14 --- /dev/null +++ b/queue-5.4/um-ubd-submit-all-data-segments-atomically.patch @@ -0,0 +1,434 @@ +From 1861098fff6f35dd7c115d0e27c5f37ba9126291 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 21 Nov 2020 23:13:56 -0500 +Subject: um: ubd: Submit all data segments atomically + +From: Gabriel Krisman Bertazi + +[ Upstream commit fc6b6a872dcd48c6f39c7975836d75113db67d37 ] + +Internally, UBD treats each physical IO segment as a separate command to +be submitted in the execution pipe. If the pipe returns a transient +error after a few segments have already been written, UBD will tell the +block layer to requeue the request, but there is no way to reclaim the +segments already submitted. When a new attempt to dispatch the request +is done, those segments already submitted will get duplicated, causing +the WARN_ON below in the best case, and potentially data corruption. + +In my system, running a UML instance with 2GB of RAM and a 50M UBD disk, +I can reproduce the WARN_ON by simply running mkfs.fvat against the +disk on a freshly booted system. + +There are a few ways to around this, like reducing the pressure on +the pipe by reducing the queue depth, which almost eliminates the +occurrence of the problem, increasing the pipe buffer size on the host +system, or by limiting the request to one physical segment, which causes +the block layer to submit way more requests to resolve a single +operation. + +Instead, this patch modifies the format of a UBD command, such that all +segments are sent through a single element in the communication pipe, +turning the command submission atomic from the point of view of the +block layer. The new format has a variable size, depending on the +number of elements, and looks like this: + ++------------+-----------+-----------+------------ +| cmd_header | segment 0 | segment 1 | segment ... ++------------+-----------+-----------+------------ + +With this format, we push a pointer to cmd_header in the submission +pipe. + +This has the advantage of reducing the memory footprint of executing a +single request, since it allow us to merge some fields in the header. +It is possible to reduce even further each segment memory footprint, by +merging bitmap_words and cow_offset, for instance, but this is not the +focus of this patch and is left as future work. One issue with the +patch is that for a big number of segments, we now perform one big +memory allocation instead of multiple small ones, but I wasn't able to +trigger any real issues or -ENOMEM because of this change, that wouldn't +be reproduced otherwise. + +This was tested using fio with the verify-crc32 option, and by running +an ext4 filesystem over this UBD device. + +The original WARN_ON was: + +------------[ cut here ]------------ +WARNING: CPU: 0 PID: 0 at lib/refcount.c:28 refcount_warn_saturate+0x13f/0x141 +refcount_t: underflow; use-after-free. +Modules linked in: +CPU: 0 PID: 0 Comm: swapper Not tainted 5.5.0-rc6-00002-g2a5bb2cf75c8 #346 +Stack: + 6084eed0 6063dc77 00000009 6084ef60 + 00000000 604b8d9f 6084eee0 6063dcbc + 6084ef40 6006ab8d e013d780 1c00000000 +Call Trace: + [<600a0c1c>] ? printk+0x0/0x94 + [<6004a888>] show_stack+0x13b/0x155 + [<6063dc77>] ? dump_stack_print_info+0xdf/0xe8 + [<604b8d9f>] ? refcount_warn_saturate+0x13f/0x141 + [<6063dcbc>] dump_stack+0x2a/0x2c + [<6006ab8d>] __warn+0x107/0x134 + [<6008da6c>] ? wake_up_process+0x17/0x19 + [<60487628>] ? blk_queue_max_discard_sectors+0x0/0xd + [<6006b05f>] warn_slowpath_fmt+0xd1/0xdf + [<6006af8e>] ? warn_slowpath_fmt+0x0/0xdf + [<600acc14>] ? raw_read_seqcount_begin.constprop.0+0x0/0x15 + [<600619ae>] ? os_nsecs+0x1d/0x2b + [<604b8d9f>] refcount_warn_saturate+0x13f/0x141 + [<6048bc8f>] refcount_sub_and_test.constprop.0+0x2f/0x37 + [<6048c8de>] blk_mq_free_request+0xf1/0x10d + [<6048ca06>] __blk_mq_end_request+0x10c/0x114 + [<6005ac0f>] ubd_intr+0xb5/0x169 + [<600a1a37>] __handle_irq_event_percpu+0x6b/0x17e + [<600a1b70>] handle_irq_event_percpu+0x26/0x69 + [<600a1bd9>] handle_irq_event+0x26/0x34 + [<600a1bb3>] ? handle_irq_event+0x0/0x34 + [<600a5186>] ? unmask_irq+0x0/0x37 + [<600a57e6>] handle_edge_irq+0xbc/0xd6 + [<600a131a>] generic_handle_irq+0x21/0x29 + [<60048f6e>] do_IRQ+0x39/0x54 + [...] +---[ end trace c6e7444e55386c0f ]--- + +Cc: Christopher Obbard +Reported-by: Martyn Welch +Signed-off-by: Gabriel Krisman Bertazi +Tested-by: Christopher Obbard +Acked-by: Anton Ivanov +Signed-off-by: Richard Weinberger +Signed-off-by: Sasha Levin +--- + arch/um/drivers/ubd_kern.c | 191 ++++++++++++++++++++++--------------- + 1 file changed, 115 insertions(+), 76 deletions(-) + +diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c +index 0f5d0a699a49b..4e59ab817d3e7 100644 +--- a/arch/um/drivers/ubd_kern.c ++++ b/arch/um/drivers/ubd_kern.c +@@ -47,18 +47,25 @@ + /* Max request size is determined by sector mask - 32K */ + #define UBD_MAX_REQUEST (8 * sizeof(long)) + ++struct io_desc { ++ char *buffer; ++ unsigned long length; ++ unsigned long sector_mask; ++ unsigned long long cow_offset; ++ unsigned long bitmap_words[2]; ++}; ++ + struct io_thread_req { + struct request *req; + int fds[2]; + unsigned long offsets[2]; + unsigned long long offset; +- unsigned long length; +- char *buffer; + int sectorsize; +- unsigned long sector_mask; +- unsigned long long cow_offset; +- unsigned long bitmap_words[2]; + int error; ++ ++ int desc_cnt; ++ /* io_desc has to be the last element of the struct */ ++ struct io_desc io_desc[]; + }; + + +@@ -524,12 +531,7 @@ static void ubd_handler(void) + blk_queue_max_write_zeroes_sectors(io_req->req->q, 0); + blk_queue_flag_clear(QUEUE_FLAG_DISCARD, io_req->req->q); + } +- if ((io_req->error) || (io_req->buffer == NULL)) +- blk_mq_end_request(io_req->req, io_req->error); +- else { +- if (!blk_update_request(io_req->req, io_req->error, io_req->length)) +- __blk_mq_end_request(io_req->req, io_req->error); +- } ++ blk_mq_end_request(io_req->req, io_req->error); + kfree(io_req); + } + } +@@ -945,6 +947,7 @@ static int ubd_add(int n, char **error_out) + blk_queue_write_cache(ubd_dev->queue, true, false); + + blk_queue_max_segments(ubd_dev->queue, MAX_SG); ++ blk_queue_segment_boundary(ubd_dev->queue, PAGE_SIZE - 1); + err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, &ubd_gendisk[n]); + if(err){ + *error_out = "Failed to register device"; +@@ -1288,37 +1291,74 @@ static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask, + *cow_offset += bitmap_offset; + } + +-static void cowify_req(struct io_thread_req *req, unsigned long *bitmap, ++static void cowify_req(struct io_thread_req *req, struct io_desc *segment, ++ unsigned long offset, unsigned long *bitmap, + __u64 bitmap_offset, __u64 bitmap_len) + { +- __u64 sector = req->offset >> SECTOR_SHIFT; ++ __u64 sector = offset >> SECTOR_SHIFT; + int i; + +- if (req->length > (sizeof(req->sector_mask) * 8) << SECTOR_SHIFT) ++ if (segment->length > (sizeof(segment->sector_mask) * 8) << SECTOR_SHIFT) + panic("Operation too long"); + + if (req_op(req->req) == REQ_OP_READ) { +- for (i = 0; i < req->length >> SECTOR_SHIFT; i++) { ++ for (i = 0; i < segment->length >> SECTOR_SHIFT; i++) { + if(ubd_test_bit(sector + i, (unsigned char *) bitmap)) + ubd_set_bit(i, (unsigned char *) +- &req->sector_mask); ++ &segment->sector_mask); ++ } ++ } else { ++ cowify_bitmap(offset, segment->length, &segment->sector_mask, ++ &segment->cow_offset, bitmap, bitmap_offset, ++ segment->bitmap_words, bitmap_len); ++ } ++} ++ ++static void ubd_map_req(struct ubd *dev, struct io_thread_req *io_req, ++ struct request *req) ++{ ++ struct bio_vec bvec; ++ struct req_iterator iter; ++ int i = 0; ++ unsigned long byte_offset = io_req->offset; ++ int op = req_op(req); ++ ++ if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD) { ++ io_req->io_desc[0].buffer = NULL; ++ io_req->io_desc[0].length = blk_rq_bytes(req); ++ } else { ++ rq_for_each_segment(bvec, req, iter) { ++ BUG_ON(i >= io_req->desc_cnt); ++ ++ io_req->io_desc[i].buffer = ++ page_address(bvec.bv_page) + bvec.bv_offset; ++ io_req->io_desc[i].length = bvec.bv_len; ++ i++; ++ } ++ } ++ ++ if (dev->cow.file) { ++ for (i = 0; i < io_req->desc_cnt; i++) { ++ cowify_req(io_req, &io_req->io_desc[i], byte_offset, ++ dev->cow.bitmap, dev->cow.bitmap_offset, ++ dev->cow.bitmap_len); ++ byte_offset += io_req->io_desc[i].length; + } ++ + } +- else cowify_bitmap(req->offset, req->length, &req->sector_mask, +- &req->cow_offset, bitmap, bitmap_offset, +- req->bitmap_words, bitmap_len); + } + +-static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req, +- u64 off, struct bio_vec *bvec) ++static struct io_thread_req *ubd_alloc_req(struct ubd *dev, struct request *req, ++ int desc_cnt) + { +- struct ubd *dev = hctx->queue->queuedata; + struct io_thread_req *io_req; +- int ret; ++ int i; + +- io_req = kmalloc(sizeof(struct io_thread_req), GFP_ATOMIC); ++ io_req = kmalloc(sizeof(*io_req) + ++ (desc_cnt * sizeof(struct io_desc)), ++ GFP_ATOMIC); + if (!io_req) +- return -ENOMEM; ++ return NULL; + + io_req->req = req; + if (dev->cow.file) +@@ -1326,26 +1366,41 @@ static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req, + else + io_req->fds[0] = dev->fd; + io_req->error = 0; +- +- if (bvec != NULL) { +- io_req->buffer = page_address(bvec->bv_page) + bvec->bv_offset; +- io_req->length = bvec->bv_len; +- } else { +- io_req->buffer = NULL; +- io_req->length = blk_rq_bytes(req); +- } +- + io_req->sectorsize = SECTOR_SIZE; + io_req->fds[1] = dev->fd; +- io_req->cow_offset = -1; +- io_req->offset = off; +- io_req->sector_mask = 0; ++ io_req->offset = (u64) blk_rq_pos(req) << SECTOR_SHIFT; + io_req->offsets[0] = 0; + io_req->offsets[1] = dev->cow.data_offset; + +- if (dev->cow.file) +- cowify_req(io_req, dev->cow.bitmap, +- dev->cow.bitmap_offset, dev->cow.bitmap_len); ++ for (i = 0 ; i < desc_cnt; i++) { ++ io_req->io_desc[i].sector_mask = 0; ++ io_req->io_desc[i].cow_offset = -1; ++ } ++ ++ return io_req; ++} ++ ++static int ubd_submit_request(struct ubd *dev, struct request *req) ++{ ++ int segs = 0; ++ struct io_thread_req *io_req; ++ int ret; ++ int op = req_op(req); ++ ++ if (op == REQ_OP_FLUSH) ++ segs = 0; ++ else if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD) ++ segs = 1; ++ else ++ segs = blk_rq_nr_phys_segments(req); ++ ++ io_req = ubd_alloc_req(dev, req, segs); ++ if (!io_req) ++ return -ENOMEM; ++ ++ io_req->desc_cnt = segs; ++ if (segs) ++ ubd_map_req(dev, io_req, req); + + ret = os_write_file(thread_fd, &io_req, sizeof(io_req)); + if (ret != sizeof(io_req)) { +@@ -1356,22 +1411,6 @@ static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req, + return ret; + } + +-static int queue_rw_req(struct blk_mq_hw_ctx *hctx, struct request *req) +-{ +- struct req_iterator iter; +- struct bio_vec bvec; +- int ret; +- u64 off = (u64)blk_rq_pos(req) << SECTOR_SHIFT; +- +- rq_for_each_segment(bvec, req, iter) { +- ret = ubd_queue_one_vec(hctx, req, off, &bvec); +- if (ret < 0) +- return ret; +- off += bvec.bv_len; +- } +- return 0; +-} +- + static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) + { +@@ -1384,17 +1423,12 @@ static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx, + spin_lock_irq(&ubd_dev->lock); + + switch (req_op(req)) { +- /* operations with no lentgth/offset arguments */ + case REQ_OP_FLUSH: +- ret = ubd_queue_one_vec(hctx, req, 0, NULL); +- break; + case REQ_OP_READ: + case REQ_OP_WRITE: +- ret = queue_rw_req(hctx, req); +- break; + case REQ_OP_DISCARD: + case REQ_OP_WRITE_ZEROES: +- ret = ubd_queue_one_vec(hctx, req, (u64)blk_rq_pos(req) << 9, NULL); ++ ret = ubd_submit_request(ubd_dev, req); + break; + default: + WARN_ON_ONCE(1); +@@ -1482,22 +1516,22 @@ static int map_error(int error_code) + * will result in unpredictable behaviour and/or crashes. + */ + +-static int update_bitmap(struct io_thread_req *req) ++static int update_bitmap(struct io_thread_req *req, struct io_desc *segment) + { + int n; + +- if(req->cow_offset == -1) ++ if (segment->cow_offset == -1) + return map_error(0); + +- n = os_pwrite_file(req->fds[1], &req->bitmap_words, +- sizeof(req->bitmap_words), req->cow_offset); +- if (n != sizeof(req->bitmap_words)) ++ n = os_pwrite_file(req->fds[1], &segment->bitmap_words, ++ sizeof(segment->bitmap_words), segment->cow_offset); ++ if (n != sizeof(segment->bitmap_words)) + return map_error(-n); + + return map_error(0); + } + +-static void do_io(struct io_thread_req *req) ++static void do_io(struct io_thread_req *req, struct io_desc *desc) + { + char *buf = NULL; + unsigned long len; +@@ -1512,21 +1546,20 @@ static void do_io(struct io_thread_req *req) + return; + } + +- nsectors = req->length / req->sectorsize; ++ nsectors = desc->length / req->sectorsize; + start = 0; + do { +- bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask); ++ bit = ubd_test_bit(start, (unsigned char *) &desc->sector_mask); + end = start; + while((end < nsectors) && +- (ubd_test_bit(end, (unsigned char *) +- &req->sector_mask) == bit)) ++ (ubd_test_bit(end, (unsigned char *) &desc->sector_mask) == bit)) + end++; + + off = req->offset + req->offsets[bit] + + start * req->sectorsize; + len = (end - start) * req->sectorsize; +- if (req->buffer != NULL) +- buf = &req->buffer[start * req->sectorsize]; ++ if (desc->buffer != NULL) ++ buf = &desc->buffer[start * req->sectorsize]; + + switch (req_op(req->req)) { + case REQ_OP_READ: +@@ -1566,7 +1599,8 @@ static void do_io(struct io_thread_req *req) + start = end; + } while(start < nsectors); + +- req->error = update_bitmap(req); ++ req->offset += len; ++ req->error = update_bitmap(req, desc); + } + + /* Changed in start_io_thread, which is serialized by being called only +@@ -1599,8 +1633,13 @@ int io_thread(void *arg) + } + + for (count = 0; count < n/sizeof(struct io_thread_req *); count++) { ++ struct io_thread_req *req = (*io_req_buffer)[count]; ++ int i; ++ + io_count++; +- do_io((*io_req_buffer)[count]); ++ for (i = 0; !req->error && i < req->desc_cnt; i++) ++ do_io(req, &(req->io_desc[i])); ++ + } + + written = 0; +-- +2.27.0 + -- 2.47.3