From eda0fb45a47683510c5d6a106333f6afe07f558b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 16 Nov 2020 17:17:41 +0100 Subject: [PATCH] 4.4-stable patches added patches: mei-protect-mei_cl_mtu-from-null-dereference.patch ocfs2-initialize-ip_next_orphan.patch --- ...ect-mei_cl_mtu-from-null-dereference.patch | 41 ++++++++ .../ocfs2-initialize-ip_next_orphan.patch | 93 +++++++++++++++++++ queue-4.4/series | 2 + 3 files changed, 136 insertions(+) create mode 100644 queue-4.4/mei-protect-mei_cl_mtu-from-null-dereference.patch create mode 100644 queue-4.4/ocfs2-initialize-ip_next_orphan.patch diff --git a/queue-4.4/mei-protect-mei_cl_mtu-from-null-dereference.patch b/queue-4.4/mei-protect-mei_cl_mtu-from-null-dereference.patch new file mode 100644 index 00000000000..ee68db8b937 --- /dev/null +++ b/queue-4.4/mei-protect-mei_cl_mtu-from-null-dereference.patch @@ -0,0 +1,41 @@ +From bcbc0b2e275f0a797de11a10eff495b4571863fc Mon Sep 17 00:00:00 2001 +From: Alexander Usyskin +Date: Thu, 29 Oct 2020 11:54:42 +0200 +Subject: mei: protect mei_cl_mtu from null dereference + +From: Alexander Usyskin + +commit bcbc0b2e275f0a797de11a10eff495b4571863fc upstream. + +A receive callback is queued while the client is still connected +but can still be called after the client was disconnected. Upon +disconnect cl->me_cl is set to NULL, hence we need to check +that ME client is not-NULL in mei_cl_mtu to avoid +null dereference. + +Cc: +Signed-off-by: Alexander Usyskin +Signed-off-by: Tomas Winkler +Link: https://lore.kernel.org/r/20201029095444.957924-2-tomas.winkler@intel.com +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/misc/mei/client.h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/misc/mei/client.h ++++ b/drivers/misc/mei/client.h +@@ -156,11 +156,11 @@ static inline u8 mei_cl_me_id(const stru + * + * @cl: host client + * +- * Return: mtu ++ * Return: mtu or 0 if client is not connected + */ + static inline size_t mei_cl_mtu(const struct mei_cl *cl) + { +- return cl->me_cl->props.max_msg_length; ++ return cl->me_cl ? cl->me_cl->props.max_msg_length : 0; + } + + /** diff --git a/queue-4.4/ocfs2-initialize-ip_next_orphan.patch b/queue-4.4/ocfs2-initialize-ip_next_orphan.patch new file mode 100644 index 00000000000..9f0c75e98c2 --- /dev/null +++ b/queue-4.4/ocfs2-initialize-ip_next_orphan.patch @@ -0,0 +1,93 @@ +From f5785283dd64867a711ca1fb1f5bb172f252ecdf Mon Sep 17 00:00:00 2001 +From: Wengang Wang +Date: Fri, 13 Nov 2020 22:52:23 -0800 +Subject: ocfs2: initialize ip_next_orphan + +From: Wengang Wang + +commit f5785283dd64867a711ca1fb1f5bb172f252ecdf upstream. + +Though problem if found on a lower 4.1.12 kernel, I think upstream has +same issue. + +In one node in the cluster, there is the following callback trace: + + # cat /proc/21473/stack + __ocfs2_cluster_lock.isra.36+0x336/0x9e0 [ocfs2] + ocfs2_inode_lock_full_nested+0x121/0x520 [ocfs2] + ocfs2_evict_inode+0x152/0x820 [ocfs2] + evict+0xae/0x1a0 + iput+0x1c6/0x230 + ocfs2_orphan_filldir+0x5d/0x100 [ocfs2] + ocfs2_dir_foreach_blk+0x490/0x4f0 [ocfs2] + ocfs2_dir_foreach+0x29/0x30 [ocfs2] + ocfs2_recover_orphans+0x1b6/0x9a0 [ocfs2] + ocfs2_complete_recovery+0x1de/0x5c0 [ocfs2] + process_one_work+0x169/0x4a0 + worker_thread+0x5b/0x560 + kthread+0xcb/0xf0 + ret_from_fork+0x61/0x90 + +The above stack is not reasonable, the final iput shouldn't happen in +ocfs2_orphan_filldir() function. Looking at the code, + + 2067 /* Skip inodes which are already added to recover list, since dio may + 2068 * happen concurrently with unlink/rename */ + 2069 if (OCFS2_I(iter)->ip_next_orphan) { + 2070 iput(iter); + 2071 return 0; + 2072 } + 2073 + +The logic thinks the inode is already in recover list on seeing +ip_next_orphan is non-NULL, so it skip this inode after dropping a +reference which incremented in ocfs2_iget(). + +While, if the inode is already in recover list, it should have another +reference and the iput() at line 2070 should not be the final iput +(dropping the last reference). So I don't think the inode is really in +the recover list (no vmcore to confirm). + +Note that ocfs2_queue_orphans(), though not shown up in the call back +trace, is holding cluster lock on the orphan directory when looking up +for unlinked inodes. The on disk inode eviction could involve a lot of +IOs which may need long time to finish. That means this node could hold +the cluster lock for very long time, that can lead to the lock requests +(from other nodes) to the orhpan directory hang for long time. + +Looking at more on ip_next_orphan, I found it's not initialized when +allocating a new ocfs2_inode_info structure. + +This causes te reflink operations from some nodes hang for very long +time waiting for the cluster lock on the orphan directory. + +Fix: initialize ip_next_orphan as NULL. + +Signed-off-by: Wengang Wang +Signed-off-by: Andrew Morton +Reviewed-by: Joseph Qi +Cc: Mark Fasheh +Cc: Joel Becker +Cc: Junxiao Bi +Cc: Changwei Ge +Cc: Gang He +Cc: Jun Piao +Cc: +Link: https://lkml.kernel.org/r/20201109171746.27884-1-wen.gang.wang@oracle.com +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ocfs2/super.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/fs/ocfs2/super.c ++++ b/fs/ocfs2/super.c +@@ -1751,6 +1751,7 @@ static void ocfs2_inode_init_once(void * + + oi->ip_blkno = 0ULL; + oi->ip_clusters = 0; ++ oi->ip_next_orphan = NULL; + + ocfs2_resv_init_once(&oi->ip_la_data_resv); + diff --git a/queue-4.4/series b/queue-4.4/series index 185fce52fe5..0fa85ef9ff3 100644 --- a/queue-4.4/series +++ b/queue-4.4/series @@ -32,3 +32,5 @@ perf-fix-get_recursion_context.patch ext4-correctly-report-not-supported-for-usr-grp-jquota-when-config_quota.patch ext4-unlock-xattr_sem-properly-in-ext4_inline_data_truncate.patch usb-cdc-acm-add-disable_echo-for-renesas-usb-download-mode.patch +mei-protect-mei_cl_mtu-from-null-dereference.patch +ocfs2-initialize-ip_next_orphan.patch -- 2.47.3