1 From e091eab028f9253eac5c04f9141bbc9d170acab3 Mon Sep 17 00:00:00 2001
2 From: Shuning Zhang <sunny.s.zhang@oracle.com>
3 Date: Mon, 13 May 2019 17:15:56 -0700
4 Subject: ocfs2: fix ocfs2 read inode data panic in ocfs2_iget
6 From: Shuning Zhang <sunny.s.zhang@oracle.com>
8 commit e091eab028f9253eac5c04f9141bbc9d170acab3 upstream.
10 In some cases, ocfs2_iget() reads the data of inode, which has been
11 deleted for some reason. That will make the system panic. So We should
12 judge whether this inode has been deleted, and tell the caller that the
15 For example, the ocfs2 is used as the backed of nfs, and the client is
16 nfsv3. This issue can be reproduced by the following steps.
18 on the nfs server side,
21 Step 1: The process A was scheduled before calling the function fh_verify.
23 Step 2: The process B is removing the 'pathb', and just completed the call
24 to function dput. Then the dentry of 'pathb' has been deleted from the
25 dcache, and all ancestors have been deleted also. The relationship of
26 dentry and inode was deleted through the function hlist_del_init. The
27 following is the call stack.
28 dentry_iput->hlist_del_init(&dentry->d_u.d_alias)
30 At this time, the inode is still in the dcache.
32 Step 3: The process A call the function ocfs2_get_dentry, which get the
33 inode from dcache. Then the refcount of inode is 1. The following is the
35 nfsd3_proc_getacl->fh_verify->exportfs_decode_fh->fh_to_dentry(ocfs2_get_dentry)
37 Step 4: Dirty pages are flushed by bdi threads. So the inode of 'patha'
38 is evicted, and this directory was deleted. But the inode of 'pathb'
39 can't be evicted, because the refcount of the inode was 1.
41 Step 5: The process A keep running, and call the function
42 reconnect_path(in exportfs_decode_fh), which call function
43 ocfs2_get_parent of ocfs2. Get the block number of parent
44 directory(patha) by the name of ... Then read the data from disk by the
45 block number. But this inode has been deleted, so the system panic.
48 1. in nfsd3_proc_getacl |
50 3. fh_to_dentry(ocfs2_get_dentry) |
51 4. bdi flush dirty cache |
54 [283465.542049] OCFS2: ERROR (device sdp): ocfs2_validate_inode_block:
55 Invalid dinode #580640: OCFS2_VALID_FL not set
57 [283465.545490] Kernel panic - not syncing: OCFS2: (device sdp): panic forced
60 [283465.546889] CPU: 5 PID: 12416 Comm: nfsd Tainted: G W
61 4.1.12-124.18.6.el6uek.bug28762940v3.x86_64 #2
62 [283465.548382] Hardware name: VMware, Inc. VMware Virtual Platform/440BX
63 Desktop Reference Platform, BIOS 6.00 09/21/2015
64 [283465.549657] 0000000000000000 ffff8800a56fb7b8 ffffffff816e839c
66 [283465.550392] 000000000008dc20 ffff8800a56fb838 ffffffff816e62d3
68 [283465.551056] ffff880000000010 ffff8800a56fb848 ffff8800a56fb7e8
70 [283465.551710] Call Trace:
71 [283465.552516] [<ffffffff816e839c>] dump_stack+0x63/0x81
72 [283465.553291] [<ffffffff816e62d3>] panic+0xcb/0x21b
73 [283465.554037] [<ffffffffa04e66b0>] ocfs2_handle_error+0xf0/0xf0 [ocfs2]
74 [283465.554882] [<ffffffffa04e7737>] __ocfs2_error+0x67/0x70 [ocfs2]
75 [283465.555768] [<ffffffffa049c0f9>] ocfs2_validate_inode_block+0x229/0x230
77 [283465.556683] [<ffffffffa047bcbc>] ocfs2_read_blocks+0x46c/0x7b0 [ocfs2]
78 [283465.557408] [<ffffffffa049bed0>] ? ocfs2_inode_cache_io_unlock+0x20/0x20
80 [283465.557973] [<ffffffffa049f0eb>] ocfs2_read_inode_block_full+0x3b/0x60
82 [283465.558525] [<ffffffffa049f5ba>] ocfs2_iget+0x4aa/0x880 [ocfs2]
83 [283465.559082] [<ffffffffa049146e>] ocfs2_get_parent+0x9e/0x220 [ocfs2]
84 [283465.559622] [<ffffffff81297c05>] reconnect_path+0xb5/0x300
85 [283465.560156] [<ffffffff81297f46>] exportfs_decode_fh+0xf6/0x2b0
86 [283465.560708] [<ffffffffa062faf0>] ? nfsd_proc_getattr+0xa0/0xa0 [nfsd]
87 [283465.561262] [<ffffffff810a8196>] ? prepare_creds+0x26/0x110
88 [283465.561932] [<ffffffffa0630860>] fh_verify+0x350/0x660 [nfsd]
89 [283465.562862] [<ffffffffa0637804>] ? nfsd_cache_lookup+0x44/0x630 [nfsd]
90 [283465.563697] [<ffffffffa063a8b9>] nfsd3_proc_getattr+0x69/0xf0 [nfsd]
91 [283465.564510] [<ffffffffa062cf60>] nfsd_dispatch+0xe0/0x290 [nfsd]
92 [283465.565358] [<ffffffffa05eb892>] ? svc_tcp_adjust_wspace+0x12/0x30
94 [283465.566272] [<ffffffffa05ea652>] svc_process_common+0x412/0x6a0 [sunrpc]
95 [283465.567155] [<ffffffffa05eaa03>] svc_process+0x123/0x210 [sunrpc]
96 [283465.568020] [<ffffffffa062c90f>] nfsd+0xff/0x170 [nfsd]
97 [283465.568962] [<ffffffffa062c810>] ? nfsd_destroy+0x80/0x80 [nfsd]
98 [283465.570112] [<ffffffff810a622b>] kthread+0xcb/0xf0
99 [283465.571099] [<ffffffff810a6160>] ? kthread_create_on_node+0x180/0x180
100 [283465.572114] [<ffffffff816f11b8>] ret_from_fork+0x58/0x90
101 [283465.573156] [<ffffffff810a6160>] ? kthread_create_on_node+0x180/0x180
103 Link: http://lkml.kernel.org/r/1554185919-3010-1-git-send-email-sunny.s.zhang@oracle.com
104 Signed-off-by: Shuning Zhang <sunny.s.zhang@oracle.com>
105 Reviewed-by: Joseph Qi <jiangqi903@gmail.com>
106 Cc: Mark Fasheh <mark@fasheh.com>
107 Cc: Joel Becker <jlbec@evilplan.org>
108 Cc: Junxiao Bi <junxiao.bi@oracle.com>
109 Cc: Changwei Ge <gechangwei@live.cn>
110 Cc: piaojun <piaojun@huawei.com>
111 Cc: "Gang He" <ghe@suse.com>
112 Cc: <stable@vger.kernel.org>
113 Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
114 Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
115 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
118 fs/ocfs2/export.c | 30 +++++++++++++++++++++++++++++-
119 1 file changed, 29 insertions(+), 1 deletion(-)
121 --- a/fs/ocfs2/export.c
122 +++ b/fs/ocfs2/export.c
123 @@ -148,16 +148,24 @@ static struct dentry *ocfs2_get_parent(s
125 struct dentry *parent;
126 struct inode *dir = d_inode(child);
129 trace_ocfs2_get_parent(child, child->d_name.len, child->d_name.name,
130 (unsigned long long)OCFS2_I(dir)->ip_blkno);
132 + status = ocfs2_nfs_sync_lock(OCFS2_SB(dir->i_sb), 1);
134 + mlog(ML_ERROR, "getting nfs sync lock(EX) failed %d\n", status);
135 + parent = ERR_PTR(status);
139 status = ocfs2_inode_lock(dir, NULL, 0);
141 if (status != -ENOENT)
143 parent = ERR_PTR(status);
145 + goto unlock_nfs_sync;
148 status = ocfs2_lookup_ino_from_name(dir, "..", 2, &blkno);
149 @@ -166,11 +174,31 @@ static struct dentry *ocfs2_get_parent(s
153 + status = ocfs2_test_inode_bit(OCFS2_SB(dir->i_sb), blkno, &set);
155 + if (status == -EINVAL) {
158 + mlog(ML_ERROR, "test inode bit failed %d\n", status);
159 + parent = ERR_PTR(status);
163 + trace_ocfs2_get_dentry_test_bit(status, set);
166 + parent = ERR_PTR(status);
170 parent = d_obtain_alias(ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0, 0));
173 ocfs2_inode_unlock(dir, 0);
176 + ocfs2_nfs_sync_unlock(OCFS2_SB(dir->i_sb), 1);
179 trace_ocfs2_get_parent_end(parent);