]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blame - releases/4.4.172/f2fs-fix-inode-cache-leak.patch
4.14-stable patches
[thirdparty/kernel/stable-queue.git] / releases / 4.4.172 / f2fs-fix-inode-cache-leak.patch
CommitLineData
c2a518a0
GKH
1From foo@baz Fri Jan 18 09:16:11 CET 2019
2From: Chao Yu <yuchao0@huawei.com>
3Date: Sat, 7 May 2016 16:15:05 +0800
4Subject: f2fs: fix inode cache leak
5
6From: Chao Yu <yuchao0@huawei.com>
7
8commit f61cce5b81f91ba336184008b24baec84afbb3dd upstream.
9
10When testing f2fs with inline_dentry option, generic/342 reports:
11VFS: Busy inodes after unmount of dm-0. Self-destruct in 5 seconds. Have a nice day...
12
13After rmmod f2fs module, kenrel shows following dmesg:
14 =============================================================================
15 BUG f2fs_inode_cache (Tainted: G O ): Objects remaining in f2fs_inode_cache on __kmem_cache_shutdown()
16 -----------------------------------------------------------------------------
17
18 Disabling lock debugging due to kernel taint
19 INFO: Slab 0xf51ca0e0 objects=22 used=1 fp=0xd1e6fc60 flags=0x40004080
20 CPU: 3 PID: 7455 Comm: rmmod Tainted: G B O 4.6.0-rc4+ #16
21 Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
22 00000086 00000086 d062fe18 c13a83a0 f51ca0e0 d062fe38 d062fea4 c11c7276
23 c1981040 f51ca0e0 00000016 00000001 d1e6fc60 40004080 656a624f 20737463
24 616d6572 6e696e69 6e692067 66326620 6e695f73 5f65646f 68636163 6e6f2065
25 Call Trace:
26 [<c13a83a0>] dump_stack+0x5f/0x8f
27 [<c11c7276>] slab_err+0x76/0x80
28 [<c11cbfc0>] ? __kmem_cache_shutdown+0x100/0x2f0
29 [<c11cbfc0>] ? __kmem_cache_shutdown+0x100/0x2f0
30 [<c11cbfe5>] __kmem_cache_shutdown+0x125/0x2f0
31 [<c1198a38>] kmem_cache_destroy+0x158/0x1f0
32 [<c176b43d>] ? mutex_unlock+0xd/0x10
33 [<f8f15aa3>] exit_f2fs_fs+0x4b/0x5a8 [f2fs]
34 [<c10f596c>] SyS_delete_module+0x16c/0x1d0
35 [<c1001b10>] ? do_fast_syscall_32+0x30/0x1c0
36 [<c13c59bf>] ? __this_cpu_preempt_check+0xf/0x20
37 [<c10afa7d>] ? trace_hardirqs_on_caller+0xdd/0x210
38 [<c10ad50b>] ? trace_hardirqs_off+0xb/0x10
39 [<c1001b81>] do_fast_syscall_32+0xa1/0x1c0
40 [<c176d888>] sysenter_past_esp+0x45/0x74
41 INFO: Object 0xd1e6d9e0 @offset=6624
42 kmem_cache_destroy f2fs_inode_cache: Slab cache still has objects
43 CPU: 3 PID: 7455 Comm: rmmod Tainted: G B O 4.6.0-rc4+ #16
44 Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
45 00000286 00000286 d062fef4 c13a83a0 f174b000 d062ff14 d062ff28 c1198ac7
46 c197fe18 f3c5b980 d062ff20 000d04f2 d062ff0c d062ff0c d062ff14 d062ff14
47 f8f20dc0 fffffff5 d062e000 d062ff30 f8f15aa3 d062ff7c c10f596c 73663266
48 Call Trace:
49 [<c13a83a0>] dump_stack+0x5f/0x8f
50 [<c1198ac7>] kmem_cache_destroy+0x1e7/0x1f0
51 [<f8f15aa3>] exit_f2fs_fs+0x4b/0x5a8 [f2fs]
52 [<c10f596c>] SyS_delete_module+0x16c/0x1d0
53 [<c1001b10>] ? do_fast_syscall_32+0x30/0x1c0
54 [<c13c59bf>] ? __this_cpu_preempt_check+0xf/0x20
55 [<c10afa7d>] ? trace_hardirqs_on_caller+0xdd/0x210
56 [<c10ad50b>] ? trace_hardirqs_off+0xb/0x10
57 [<c1001b81>] do_fast_syscall_32+0xa1/0x1c0
58 [<c176d888>] sysenter_past_esp+0x45/0x74
59
60The reason is: in recovery flow, we use delayed iput mechanism for directory
61which has recovered dentry block. It means the reference of inode will be
62held until last dirty dentry page being writebacked.
63
64But when we mount f2fs with inline_dentry option, during recovery, dirent
65may only be recovered into dir inode page rather than dentry page, so there
66are no chance for us to release inode reference in ->writepage when
67writebacking last dentry page.
68
69We can call paired iget/iput explicityly for inline_dentry case, but for
70non-inline_dentry case, iput will call writeback_single_inode to write all
71data pages synchronously, but during recovery, ->writepages of f2fs skips
72writing all pages, result in losing dirent.
73
74This patch fixes this issue by obsoleting old mechanism, and introduce a
75new dir_list to hold all directory inodes which has recovered datas until
76finishing recovery.
77
78Signed-off-by: Chao Yu <yuchao0@huawei.com>
79Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
80[bwh: Backported to 4.4:
81 - Deleted add_dirty_dir_inode() function is different
82 - Adjust context]
83Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
84Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
85---
86 fs/f2fs/checkpoint.c | 24 ---------------------
87 fs/f2fs/f2fs.h | 2 -
88 fs/f2fs/recovery.c | 56 ++++++++++++++++++++++++++++-----------------------
89 3 files changed, 31 insertions(+), 51 deletions(-)
90
91--- a/fs/f2fs/checkpoint.c
92+++ b/fs/f2fs/checkpoint.c
93@@ -771,24 +771,6 @@ out:
94 f2fs_trace_pid(page);
95 }
96
97-void add_dirty_dir_inode(struct inode *inode)
98-{
99- struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
100- struct inode_entry *new =
101- f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
102- int ret = 0;
103-
104- new->inode = inode;
105- INIT_LIST_HEAD(&new->list);
106-
107- spin_lock(&sbi->dir_inode_lock);
108- ret = __add_dirty_inode(inode, new);
109- spin_unlock(&sbi->dir_inode_lock);
110-
111- if (ret)
112- kmem_cache_free(inode_entry_slab, new);
113-}
114-
115 void remove_dirty_dir_inode(struct inode *inode)
116 {
117 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
118@@ -811,12 +793,6 @@ void remove_dirty_dir_inode(struct inode
119 stat_dec_dirty_dir(sbi);
120 spin_unlock(&sbi->dir_inode_lock);
121 kmem_cache_free(inode_entry_slab, entry);
122-
123- /* Only from the recovery routine */
124- if (is_inode_flag_set(F2FS_I(inode), FI_DELAY_IPUT)) {
125- clear_inode_flag(F2FS_I(inode), FI_DELAY_IPUT);
126- iput(inode);
127- }
128 }
129
130 void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi)
131--- a/fs/f2fs/f2fs.h
132+++ b/fs/f2fs/f2fs.h
133@@ -1402,7 +1402,6 @@ enum {
134 FI_NO_ALLOC, /* should not allocate any blocks */
135 FI_FREE_NID, /* free allocated nide */
136 FI_UPDATE_DIR, /* should update inode block for consistency */
137- FI_DELAY_IPUT, /* used for the recovery */
138 FI_NO_EXTENT, /* not to use the extent cache */
139 FI_INLINE_XATTR, /* used for inline xattr */
140 FI_INLINE_DATA, /* used for inline data*/
141@@ -1828,7 +1827,6 @@ void remove_orphan_inode(struct f2fs_sb_
142 int recover_orphan_inodes(struct f2fs_sb_info *);
143 int get_valid_checkpoint(struct f2fs_sb_info *);
144 void update_dirty_page(struct inode *, struct page *);
145-void add_dirty_dir_inode(struct inode *);
146 void remove_dirty_dir_inode(struct inode *);
147 void sync_dirty_dir_inodes(struct f2fs_sb_info *);
148 void write_checkpoint(struct f2fs_sb_info *, struct cp_control *);
149--- a/fs/f2fs/recovery.c
150+++ b/fs/f2fs/recovery.c
151@@ -89,7 +89,8 @@ static void del_fsync_inode(struct fsync
152 kmem_cache_free(fsync_entry_slab, entry);
153 }
154
155-static int recover_dentry(struct inode *inode, struct page *ipage)
156+static int recover_dentry(struct inode *inode, struct page *ipage,
157+ struct list_head *dir_list)
158 {
159 struct f2fs_inode *raw_inode = F2FS_INODE(ipage);
160 nid_t pino = le32_to_cpu(raw_inode->i_pino);
161@@ -97,18 +98,29 @@ static int recover_dentry(struct inode *
162 struct qstr name;
163 struct page *page;
164 struct inode *dir, *einode;
165+ struct fsync_inode_entry *entry;
166 int err = 0;
167
168- dir = f2fs_iget(inode->i_sb, pino);
169- if (IS_ERR(dir)) {
170- err = PTR_ERR(dir);
171- goto out;
172+ entry = get_fsync_inode(dir_list, pino);
173+ if (!entry) {
174+ dir = f2fs_iget(inode->i_sb, pino);
175+ if (IS_ERR(dir)) {
176+ err = PTR_ERR(dir);
177+ goto out;
178+ }
179+
180+ entry = add_fsync_inode(dir_list, dir);
181+ if (!entry) {
182+ err = -ENOMEM;
183+ iput(dir);
184+ goto out;
185+ }
186 }
187
188- if (file_enc_name(inode)) {
189- iput(dir);
190+ dir = entry->inode;
191+
192+ if (file_enc_name(inode))
193 return 0;
194- }
195
196 name.len = le32_to_cpu(raw_inode->i_namelen);
197 name.name = raw_inode->i_name;
198@@ -116,7 +128,7 @@ static int recover_dentry(struct inode *
199 if (unlikely(name.len > F2FS_NAME_LEN)) {
200 WARN_ON(1);
201 err = -ENAMETOOLONG;
202- goto out_err;
203+ goto out;
204 }
205 retry:
206 de = f2fs_find_entry(dir, &name, &page);
207@@ -142,23 +154,12 @@ retry:
208 goto retry;
209 }
210 err = __f2fs_add_link(dir, &name, inode, inode->i_ino, inode->i_mode);
211- if (err)
212- goto out_err;
213-
214- if (is_inode_flag_set(F2FS_I(dir), FI_DELAY_IPUT)) {
215- iput(dir);
216- } else {
217- add_dirty_dir_inode(dir);
218- set_inode_flag(F2FS_I(dir), FI_DELAY_IPUT);
219- }
220
221 goto out;
222
223 out_unmap_put:
224 f2fs_dentry_kunmap(dir, page);
225 f2fs_put_page(page, 0);
226-out_err:
227- iput(dir);
228 out:
229 f2fs_msg(inode->i_sb, KERN_NOTICE,
230 "%s: ino = %x, name = %s, dir = %lx, err = %d",
231@@ -479,7 +480,8 @@ out:
232 return err;
233 }
234
235-static int recover_data(struct f2fs_sb_info *sbi, struct list_head *head)
236+static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
237+ struct list_head *dir_list)
238 {
239 unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
240 struct curseg_info *curseg;
241@@ -506,7 +508,7 @@ static int recover_data(struct f2fs_sb_i
242 break;
243 }
244
245- entry = get_fsync_inode(head, ino_of_node(page));
246+ entry = get_fsync_inode(inode_list, ino_of_node(page));
247 if (!entry)
248 goto next;
249 /*
250@@ -517,7 +519,7 @@ static int recover_data(struct f2fs_sb_i
251 if (entry->last_inode == blkaddr)
252 recover_inode(entry->inode, page);
253 if (entry->last_dentry == blkaddr) {
254- err = recover_dentry(entry->inode, page);
255+ err = recover_dentry(entry->inode, page, dir_list);
256 if (err) {
257 f2fs_put_page(page, 1);
258 break;
259@@ -545,6 +547,7 @@ int recover_fsync_data(struct f2fs_sb_in
260 {
261 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
262 struct list_head inode_list;
263+ struct list_head dir_list;
264 block_t blkaddr;
265 int err;
266 int ret = 0;
267@@ -556,6 +559,7 @@ int recover_fsync_data(struct f2fs_sb_in
268 return -ENOMEM;
269
270 INIT_LIST_HEAD(&inode_list);
271+ INIT_LIST_HEAD(&dir_list);
272
273 /* prevent checkpoint */
274 mutex_lock(&sbi->cp_mutex);
275@@ -575,12 +579,11 @@ int recover_fsync_data(struct f2fs_sb_in
276 need_writecp = true;
277
278 /* step #2: recover data */
279- err = recover_data(sbi, &inode_list);
280+ err = recover_data(sbi, &inode_list, &dir_list);
281 if (!err)
282 f2fs_bug_on(sbi, !list_empty(&inode_list));
283 out:
284 destroy_fsync_dnodes(&inode_list);
285- kmem_cache_destroy(fsync_entry_slab);
286
287 /* truncate meta pages to be used by the recovery */
288 truncate_inode_pages_range(META_MAPPING(sbi),
289@@ -618,5 +621,8 @@ out:
290 } else {
291 mutex_unlock(&sbi->cp_mutex);
292 }
293+
294+ destroy_fsync_dnodes(&dir_list);
295+ kmem_cache_destroy(fsync_entry_slab);
296 return ret ? ret: err;
297 }