]>
Commit | Line | Data |
---|---|---|
c2a518a0 GKH |
1 | From foo@baz Fri Jan 18 09:16:11 CET 2019 |
2 | From: Jaegeuk Kim <jaegeuk@kernel.org> | |
3 | Date: Mon, 19 Sep 2016 17:55:10 -0700 | |
4 | Subject: f2fs: use crc and cp version to determine roll-forward recovery | |
5 | ||
6 | From: Jaegeuk Kim <jaegeuk@kernel.org> | |
7 | ||
8 | commit a468f0ef516fda9c7d91bb550d458e853d76955e upstream. | |
9 | ||
10 | Previously, we used cp_version only to detect recoverable dnodes. | |
11 | In order to avoid same garbage cp_version, we needed to truncate the next | |
12 | dnode during checkpoint, resulting in additional discard or data write. | |
13 | If we can distinguish this by using crc in addition to cp_version, we can | |
14 | remove this overhead. | |
15 | ||
16 | There is backward compatibility concern where it changes node_footer layout. | |
17 | So, this patch introduces a new checkpoint flag, CP_CRC_RECOVERY_FLAG, to | |
18 | detect new layout. New layout will be activated only when this flag is set. | |
19 | ||
20 | Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org> | |
21 | [bwh: Backported to 4.4: | |
22 | - Deleted code is slightly different | |
23 | - Adjust context] | |
24 | Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk> | |
25 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | |
26 | --- | |
27 | fs/f2fs/checkpoint.c | 21 +------------ | |
28 | fs/f2fs/f2fs.h | 1 | |
29 | fs/f2fs/node.h | 77 ++++++++++++++++++++++++++++++------------------ | |
30 | fs/f2fs/recovery.c | 30 +++--------------- | |
31 | fs/f2fs/segment.c | 22 ------------- | |
32 | fs/f2fs/super.c | 5 ++- | |
33 | include/linux/f2fs_fs.h | 1 | |
34 | 7 files changed, 63 insertions(+), 94 deletions(-) | |
35 | ||
36 | --- a/fs/f2fs/checkpoint.c | |
37 | +++ b/fs/f2fs/checkpoint.c | |
38 | @@ -902,7 +902,6 @@ static void wait_on_all_pages_writeback( | |
39 | static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) | |
40 | { | |
41 | struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); | |
42 | - struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); | |
43 | struct f2fs_nm_info *nm_i = NM_I(sbi); | |
44 | unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num; | |
45 | nid_t last_nid = nm_i->next_scan_nid; | |
46 | @@ -911,15 +910,6 @@ static void do_checkpoint(struct f2fs_sb | |
47 | __u32 crc32 = 0; | |
48 | int i; | |
49 | int cp_payload_blks = __cp_payload(sbi); | |
50 | - block_t discard_blk = NEXT_FREE_BLKADDR(sbi, curseg); | |
51 | - bool invalidate = false; | |
52 | - | |
53 | - /* | |
54 | - * This avoids to conduct wrong roll-forward operations and uses | |
55 | - * metapages, so should be called prior to sync_meta_pages below. | |
56 | - */ | |
57 | - if (discard_next_dnode(sbi, discard_blk)) | |
58 | - invalidate = true; | |
59 | ||
60 | /* Flush all the NAT/SIT pages */ | |
61 | while (get_pages(sbi, F2FS_DIRTY_META)) { | |
62 | @@ -996,6 +986,9 @@ static void do_checkpoint(struct f2fs_sb | |
63 | if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) | |
64 | set_ckpt_flags(ckpt, CP_FSCK_FLAG); | |
65 | ||
66 | + /* set this flag to activate crc|cp_ver for recovery */ | |
67 | + set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG); | |
68 | + | |
69 | /* update SIT/NAT bitmap */ | |
70 | get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP)); | |
71 | get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP)); | |
72 | @@ -1053,14 +1046,6 @@ static void do_checkpoint(struct f2fs_sb | |
73 | /* wait for previous submitted meta pages writeback */ | |
74 | wait_on_all_pages_writeback(sbi); | |
75 | ||
76 | - /* | |
77 | - * invalidate meta page which is used temporarily for zeroing out | |
78 | - * block at the end of warm node chain. | |
79 | - */ | |
80 | - if (invalidate) | |
81 | - invalidate_mapping_pages(META_MAPPING(sbi), discard_blk, | |
82 | - discard_blk); | |
83 | - | |
84 | release_dirty_inode(sbi); | |
85 | ||
86 | if (unlikely(f2fs_cp_error(sbi))) | |
87 | --- a/fs/f2fs/f2fs.h | |
88 | +++ b/fs/f2fs/f2fs.h | |
89 | @@ -1780,7 +1780,6 @@ bool is_checkpointed_data(struct f2fs_sb | |
90 | void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t); | |
91 | void clear_prefree_segments(struct f2fs_sb_info *, struct cp_control *); | |
92 | void release_discard_addrs(struct f2fs_sb_info *); | |
93 | -bool discard_next_dnode(struct f2fs_sb_info *, block_t); | |
94 | int npages_for_summary_flush(struct f2fs_sb_info *, bool); | |
95 | void allocate_new_segments(struct f2fs_sb_info *); | |
96 | int f2fs_trim_fs(struct f2fs_sb_info *, struct fstrim_range *); | |
97 | --- a/fs/f2fs/node.h | |
98 | +++ b/fs/f2fs/node.h | |
99 | @@ -212,6 +212,37 @@ static inline void set_to_next_nat(struc | |
100 | f2fs_change_bit(block_off, nm_i->nat_bitmap); | |
101 | } | |
102 | ||
103 | +static inline nid_t ino_of_node(struct page *node_page) | |
104 | +{ | |
105 | + struct f2fs_node *rn = F2FS_NODE(node_page); | |
106 | + return le32_to_cpu(rn->footer.ino); | |
107 | +} | |
108 | + | |
109 | +static inline nid_t nid_of_node(struct page *node_page) | |
110 | +{ | |
111 | + struct f2fs_node *rn = F2FS_NODE(node_page); | |
112 | + return le32_to_cpu(rn->footer.nid); | |
113 | +} | |
114 | + | |
115 | +static inline unsigned int ofs_of_node(struct page *node_page) | |
116 | +{ | |
117 | + struct f2fs_node *rn = F2FS_NODE(node_page); | |
118 | + unsigned flag = le32_to_cpu(rn->footer.flag); | |
119 | + return flag >> OFFSET_BIT_SHIFT; | |
120 | +} | |
121 | + | |
122 | +static inline __u64 cpver_of_node(struct page *node_page) | |
123 | +{ | |
124 | + struct f2fs_node *rn = F2FS_NODE(node_page); | |
125 | + return le64_to_cpu(rn->footer.cp_ver); | |
126 | +} | |
127 | + | |
128 | +static inline block_t next_blkaddr_of_node(struct page *node_page) | |
129 | +{ | |
130 | + struct f2fs_node *rn = F2FS_NODE(node_page); | |
131 | + return le32_to_cpu(rn->footer.next_blkaddr); | |
132 | +} | |
133 | + | |
134 | static inline void fill_node_footer(struct page *page, nid_t nid, | |
135 | nid_t ino, unsigned int ofs, bool reset) | |
136 | { | |
137 | @@ -242,40 +273,30 @@ static inline void fill_node_footer_blka | |
138 | { | |
139 | struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page)); | |
140 | struct f2fs_node *rn = F2FS_NODE(page); | |
141 | + size_t crc_offset = le32_to_cpu(ckpt->checksum_offset); | |
142 | + __u64 cp_ver = le64_to_cpu(ckpt->checkpoint_ver); | |
143 | ||
144 | - rn->footer.cp_ver = ckpt->checkpoint_ver; | |
145 | + if (is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) { | |
146 | + __u64 crc = le32_to_cpu(*((__le32 *) | |
147 | + ((unsigned char *)ckpt + crc_offset))); | |
148 | + cp_ver |= (crc << 32); | |
149 | + } | |
150 | + rn->footer.cp_ver = cpu_to_le64(cp_ver); | |
151 | rn->footer.next_blkaddr = cpu_to_le32(blkaddr); | |
152 | } | |
153 | ||
154 | -static inline nid_t ino_of_node(struct page *node_page) | |
155 | -{ | |
156 | - struct f2fs_node *rn = F2FS_NODE(node_page); | |
157 | - return le32_to_cpu(rn->footer.ino); | |
158 | -} | |
159 | - | |
160 | -static inline nid_t nid_of_node(struct page *node_page) | |
161 | -{ | |
162 | - struct f2fs_node *rn = F2FS_NODE(node_page); | |
163 | - return le32_to_cpu(rn->footer.nid); | |
164 | -} | |
165 | - | |
166 | -static inline unsigned int ofs_of_node(struct page *node_page) | |
167 | -{ | |
168 | - struct f2fs_node *rn = F2FS_NODE(node_page); | |
169 | - unsigned flag = le32_to_cpu(rn->footer.flag); | |
170 | - return flag >> OFFSET_BIT_SHIFT; | |
171 | -} | |
172 | - | |
173 | -static inline unsigned long long cpver_of_node(struct page *node_page) | |
174 | +static inline bool is_recoverable_dnode(struct page *page) | |
175 | { | |
176 | - struct f2fs_node *rn = F2FS_NODE(node_page); | |
177 | - return le64_to_cpu(rn->footer.cp_ver); | |
178 | -} | |
179 | + struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page)); | |
180 | + size_t crc_offset = le32_to_cpu(ckpt->checksum_offset); | |
181 | + __u64 cp_ver = cur_cp_version(ckpt); | |
182 | ||
183 | -static inline block_t next_blkaddr_of_node(struct page *node_page) | |
184 | -{ | |
185 | - struct f2fs_node *rn = F2FS_NODE(node_page); | |
186 | - return le32_to_cpu(rn->footer.next_blkaddr); | |
187 | + if (is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) { | |
188 | + __u64 crc = le32_to_cpu(*((__le32 *) | |
189 | + ((unsigned char *)ckpt + crc_offset))); | |
190 | + cp_ver |= (crc << 32); | |
191 | + } | |
192 | + return cpu_to_le64(cp_ver) == cpver_of_node(page); | |
193 | } | |
194 | ||
195 | /* | |
196 | --- a/fs/f2fs/recovery.c | |
197 | +++ b/fs/f2fs/recovery.c | |
198 | @@ -193,7 +193,6 @@ static void recover_inode(struct inode * | |
199 | ||
200 | static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) | |
201 | { | |
202 | - unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi)); | |
203 | struct curseg_info *curseg; | |
204 | struct inode *inode; | |
205 | struct page *page = NULL; | |
206 | @@ -214,7 +213,7 @@ static int find_fsync_dnodes(struct f2fs | |
207 | ||
208 | page = get_tmp_page(sbi, blkaddr); | |
209 | ||
210 | - if (cp_ver != cpver_of_node(page)) | |
211 | + if (!is_recoverable_dnode(page)) | |
212 | break; | |
213 | ||
214 | if (!is_fsync_dnode(page)) | |
215 | @@ -483,7 +482,6 @@ out: | |
216 | static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, | |
217 | struct list_head *dir_list) | |
218 | { | |
219 | - unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi)); | |
220 | struct curseg_info *curseg; | |
221 | struct page *page = NULL; | |
222 | int err = 0; | |
223 | @@ -503,7 +501,7 @@ static int recover_data(struct f2fs_sb_i | |
224 | ||
225 | page = get_tmp_page(sbi, blkaddr); | |
226 | ||
227 | - if (cp_ver != cpver_of_node(page)) { | |
228 | + if (!is_recoverable_dnode(page)) { | |
229 | f2fs_put_page(page, 1); | |
230 | break; | |
231 | } | |
232 | @@ -595,31 +593,15 @@ out: | |
233 | } | |
234 | ||
235 | clear_sbi_flag(sbi, SBI_POR_DOING); | |
236 | - if (err) { | |
237 | - bool invalidate = false; | |
238 | - | |
239 | - if (discard_next_dnode(sbi, blkaddr)) | |
240 | - invalidate = true; | |
241 | - | |
242 | - /* Flush all the NAT/SIT pages */ | |
243 | - while (get_pages(sbi, F2FS_DIRTY_META)) | |
244 | - sync_meta_pages(sbi, META, LONG_MAX); | |
245 | - | |
246 | - /* invalidate temporary meta page */ | |
247 | - if (invalidate) | |
248 | - invalidate_mapping_pages(META_MAPPING(sbi), | |
249 | - blkaddr, blkaddr); | |
250 | - | |
251 | + if (err) | |
252 | set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); | |
253 | - mutex_unlock(&sbi->cp_mutex); | |
254 | - } else if (need_writecp) { | |
255 | + mutex_unlock(&sbi->cp_mutex); | |
256 | + | |
257 | + if (!err && need_writecp) { | |
258 | struct cp_control cpc = { | |
259 | .reason = CP_RECOVERY, | |
260 | }; | |
261 | - mutex_unlock(&sbi->cp_mutex); | |
262 | write_checkpoint(sbi, &cpc); | |
263 | - } else { | |
264 | - mutex_unlock(&sbi->cp_mutex); | |
265 | } | |
266 | ||
267 | destroy_fsync_dnodes(&dir_list); | |
268 | --- a/fs/f2fs/segment.c | |
269 | +++ b/fs/f2fs/segment.c | |
270 | @@ -519,28 +519,6 @@ static int f2fs_issue_discard(struct f2f | |
271 | return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0); | |
272 | } | |
273 | ||
274 | -bool discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr) | |
275 | -{ | |
276 | - int err = -ENOTSUPP; | |
277 | - | |
278 | - if (test_opt(sbi, DISCARD)) { | |
279 | - struct seg_entry *se = get_seg_entry(sbi, | |
280 | - GET_SEGNO(sbi, blkaddr)); | |
281 | - unsigned int offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); | |
282 | - | |
283 | - if (f2fs_test_bit(offset, se->discard_map)) | |
284 | - return false; | |
285 | - | |
286 | - err = f2fs_issue_discard(sbi, blkaddr, 1); | |
287 | - } | |
288 | - | |
289 | - if (err) { | |
290 | - update_meta_page(sbi, NULL, blkaddr); | |
291 | - return true; | |
292 | - } | |
293 | - return false; | |
294 | -} | |
295 | - | |
296 | static void __add_discard_entry(struct f2fs_sb_info *sbi, | |
297 | struct cp_control *cpc, struct seg_entry *se, | |
298 | unsigned int start, unsigned int end) | |
299 | --- a/fs/f2fs/super.c | |
300 | +++ b/fs/f2fs/super.c | |
301 | @@ -1457,6 +1457,9 @@ try_onemore: | |
302 | if (need_fsck) | |
303 | set_sbi_flag(sbi, SBI_NEED_FSCK); | |
304 | ||
305 | + if (!retry) | |
306 | + goto skip_recovery; | |
307 | + | |
308 | err = recover_fsync_data(sbi, false); | |
309 | if (err < 0) { | |
310 | need_fsck = true; | |
311 | @@ -1474,7 +1477,7 @@ try_onemore: | |
312 | goto free_kobj; | |
313 | } | |
314 | } | |
315 | - | |
316 | +skip_recovery: | |
317 | /* recover_fsync_data() cleared this already */ | |
318 | clear_sbi_flag(sbi, SBI_POR_DOING); | |
319 | ||
320 | --- a/include/linux/f2fs_fs.h | |
321 | +++ b/include/linux/f2fs_fs.h | |
322 | @@ -99,6 +99,7 @@ struct f2fs_super_block { | |
323 | /* | |
324 | * For checkpoint | |
325 | */ | |
326 | +#define CP_CRC_RECOVERY_FLAG 0x00000040 | |
327 | #define CP_FASTBOOT_FLAG 0x00000020 | |
328 | #define CP_FSCK_FLAG 0x00000010 | |
329 | #define CP_ERROR_FLAG 0x00000008 |