]> git.ipfire.org Git - people/ms/linux.git/blame - fs/f2fs/gc.c
f2fs: mark inode dirty explicitly in recover_inode()
[people/ms/linux.git] / fs / f2fs / gc.c
CommitLineData
7c1a000d 1// SPDX-License-Identifier: GPL-2.0
0a8165d7 2/*
7bc09003
JK
3 * fs/f2fs/gc.c
4 *
5 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
6 * http://www.samsung.com/
7bc09003
JK
7 */
8#include <linux/fs.h>
9#include <linux/module.h>
10#include <linux/backing-dev.h>
7bc09003
JK
11#include <linux/init.h>
12#include <linux/f2fs_fs.h>
13#include <linux/kthread.h>
14#include <linux/delay.h>
15#include <linux/freezer.h>
7bc09003
JK
16
17#include "f2fs.h"
18#include "node.h"
19#include "segment.h"
20#include "gc.h"
8e46b3ed 21#include <trace/events/f2fs.h>
7bc09003 22
7bc09003
JK
23static int gc_thread_func(void *data)
24{
25 struct f2fs_sb_info *sbi = data;
b59d0bae 26 struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
7bc09003 27 wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head;
b8c502b8 28 unsigned int wait_ms;
7bc09003 29
b59d0bae 30 wait_ms = gc_th->min_sleep_time;
7bc09003 31
1d7be270 32 set_freezable();
7bc09003 33 do {
1d7be270 34 wait_event_interruptible_timeout(*wq,
d9872a69
JK
35 kthread_should_stop() || freezing(current) ||
36 gc_th->gc_wake,
1d7be270
JK
37 msecs_to_jiffies(wait_ms));
38
d9872a69
JK
39 /* give it a try one time */
40 if (gc_th->gc_wake)
41 gc_th->gc_wake = 0;
42
7bc09003
JK
43 if (try_to_freeze())
44 continue;
7bc09003
JK
45 if (kthread_should_stop())
46 break;
47
d6212a5f 48 if (sbi->sb->s_writers.frozen >= SB_FREEZE_WRITE) {
88dd8934 49 increase_sleep_time(gc_th, &wait_ms);
d6212a5f
CL
50 continue;
51 }
52
55523519
CY
53 if (time_to_inject(sbi, FAULT_CHECKPOINT)) {
54 f2fs_show_injection_info(FAULT_CHECKPOINT);
0f348028 55 f2fs_stop_checkpoint(sbi, false);
55523519 56 }
0f348028 57
dc6febb6
CY
58 if (!sb_start_write_trylock(sbi->sb))
59 continue;
60
7bc09003
JK
61 /*
62 * [GC triggering condition]
63 * 0. GC is not conducted currently.
64 * 1. There are enough dirty segments.
65 * 2. IO subsystem is idle by checking the # of writeback pages.
66 * 3. IO subsystem is idle by checking the # of requests in
67 * bdev's request list.
68 *
e1c42045 69 * Note) We have to avoid triggering GCs frequently.
7bc09003
JK
70 * Because it is possible that some segments can be
71 * invalidated soon after by user update or deletion.
72 * So, I'd like to wait some time to collect dirty segments.
73 */
5b0e9539 74 if (sbi->gc_mode == GC_URGENT) {
d9872a69 75 wait_ms = gc_th->urgent_sleep_time;
69babac0 76 mutex_lock(&sbi->gc_mutex);
d9872a69
JK
77 goto do_gc;
78 }
79
69babac0
JK
80 if (!mutex_trylock(&sbi->gc_mutex))
81 goto next;
82
a7d10cf3 83 if (!is_idle(sbi, GC_TIME)) {
88dd8934 84 increase_sleep_time(gc_th, &wait_ms);
7bc09003 85 mutex_unlock(&sbi->gc_mutex);
dc6febb6 86 goto next;
7bc09003
JK
87 }
88
89 if (has_enough_invalid_blocks(sbi))
88dd8934 90 decrease_sleep_time(gc_th, &wait_ms);
7bc09003 91 else
88dd8934 92 increase_sleep_time(gc_th, &wait_ms);
d9872a69 93do_gc:
dcdfff65 94 stat_inc_bggc_count(sbi);
7bc09003 95
43727527 96 /* if return value is not zero, no victim was selected */
e066b83c 97 if (f2fs_gc(sbi, test_opt(sbi, FORCE_FG_GC), true, NULL_SEGNO))
b59d0bae 98 wait_ms = gc_th->no_gc_sleep_time;
81eb8d6e 99
84e4214f
JK
100 trace_f2fs_background_gc(sbi->sb, wait_ms,
101 prefree_segments(sbi), free_segments(sbi));
102
4660f9c0
JK
103 /* balancing f2fs's metadata periodically */
104 f2fs_balance_fs_bg(sbi);
dc6febb6
CY
105next:
106 sb_end_write(sbi->sb);
81eb8d6e 107
7bc09003
JK
108 } while (!kthread_should_stop());
109 return 0;
110}
111
4d57b86d 112int f2fs_start_gc_thread(struct f2fs_sb_info *sbi)
7bc09003 113{
1042d60f 114 struct f2fs_gc_kthread *gc_th;
ec7b1f2d 115 dev_t dev = sbi->sb->s_bdev->bd_dev;
7a267f8d 116 int err = 0;
7bc09003 117
1ecc0c5c 118 gc_th = f2fs_kmalloc(sbi, sizeof(struct f2fs_gc_kthread), GFP_KERNEL);
7a267f8d
NJ
119 if (!gc_th) {
120 err = -ENOMEM;
121 goto out;
122 }
7bc09003 123
d9872a69 124 gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME;
b59d0bae
NJ
125 gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME;
126 gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME;
127 gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME;
128
d9872a69 129 gc_th->gc_wake= 0;
d2dc095f 130
7bc09003
JK
131 sbi->gc_thread = gc_th;
132 init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head);
133 sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
ec7b1f2d 134 "f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev));
7bc09003 135 if (IS_ERR(gc_th->f2fs_gc_task)) {
7a267f8d 136 err = PTR_ERR(gc_th->f2fs_gc_task);
7bc09003 137 kfree(gc_th);
25718423 138 sbi->gc_thread = NULL;
7bc09003 139 }
7a267f8d
NJ
140out:
141 return err;
7bc09003
JK
142}
143
4d57b86d 144void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi)
7bc09003
JK
145{
146 struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
147 if (!gc_th)
148 return;
149 kthread_stop(gc_th->f2fs_gc_task);
150 kfree(gc_th);
151 sbi->gc_thread = NULL;
152}
153
5b0e9539 154static int select_gc_type(struct f2fs_sb_info *sbi, int gc_type)
7bc09003 155{
d2dc095f
NJ
156 int gc_mode = (gc_type == BG_GC) ? GC_CB : GC_GREEDY;
157
5b0e9539
JK
158 switch (sbi->gc_mode) {
159 case GC_IDLE_CB:
160 gc_mode = GC_CB;
161 break;
162 case GC_IDLE_GREEDY:
163 case GC_URGENT:
b27bc809 164 gc_mode = GC_GREEDY;
5b0e9539
JK
165 break;
166 }
d2dc095f 167 return gc_mode;
7bc09003
JK
168}
169
170static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
171 int type, struct victim_sel_policy *p)
172{
173 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
174
4ebefc44 175 if (p->alloc_mode == SSR) {
7bc09003
JK
176 p->gc_mode = GC_GREEDY;
177 p->dirty_segmap = dirty_i->dirty_segmap[type];
a26b7c8a 178 p->max_search = dirty_i->nr_dirty[type];
7bc09003
JK
179 p->ofs_unit = 1;
180 } else {
5b0e9539 181 p->gc_mode = select_gc_type(sbi, gc_type);
7bc09003 182 p->dirty_segmap = dirty_i->dirty_segmap[DIRTY];
a26b7c8a 183 p->max_search = dirty_i->nr_dirty[DIRTY];
7bc09003
JK
184 p->ofs_unit = sbi->segs_per_sec;
185 }
a26b7c8a 186
e93b9865 187 /* we need to check every dirty segments in the FG_GC case */
b27bc809 188 if (gc_type != FG_GC &&
5b0e9539 189 (sbi->gc_mode != GC_URGENT) &&
b27bc809 190 p->max_search > sbi->max_victim_search)
b1c57c1c 191 p->max_search = sbi->max_victim_search;
a26b7c8a 192
b94929d9
YS
193 /* let's select beginning hot/small space first in no_heap mode*/
194 if (test_opt(sbi, NOHEAP) &&
195 (type == CURSEG_HOT_DATA || IS_NODESEG(type)))
7a20b8a6
JK
196 p->offset = 0;
197 else
e066b83c 198 p->offset = SIT_I(sbi)->last_victim[p->gc_mode];
7bc09003
JK
199}
200
201static unsigned int get_max_cost(struct f2fs_sb_info *sbi,
202 struct victim_sel_policy *p)
203{
b7250d2d
JK
204 /* SSR allocates in a segment unit */
205 if (p->alloc_mode == SSR)
3519e3f9 206 return sbi->blocks_per_seg;
7bc09003 207 if (p->gc_mode == GC_GREEDY)
c541a51b 208 return 2 * sbi->blocks_per_seg * p->ofs_unit;
7bc09003
JK
209 else if (p->gc_mode == GC_CB)
210 return UINT_MAX;
211 else /* No other gc_mode */
212 return 0;
213}
214
215static unsigned int check_bg_victims(struct f2fs_sb_info *sbi)
216{
217 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
5ec4e49f 218 unsigned int secno;
7bc09003
JK
219
220 /*
221 * If the gc_type is FG_GC, we can select victim segments
222 * selected by background GC before.
223 * Those segments guarantee they have small valid blocks.
224 */
7cd8558b 225 for_each_set_bit(secno, dirty_i->victim_secmap, MAIN_SECS(sbi)) {
5ec4e49f 226 if (sec_usage_check(sbi, secno))
b65ee148 227 continue;
5ec4e49f 228 clear_bit(secno, dirty_i->victim_secmap);
4ddb1a4d 229 return GET_SEG_FROM_SEC(sbi, secno);
7bc09003
JK
230 }
231 return NULL_SEGNO;
232}
233
234static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno)
235{
236 struct sit_info *sit_i = SIT_I(sbi);
4ddb1a4d
JK
237 unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
238 unsigned int start = GET_SEG_FROM_SEC(sbi, secno);
7bc09003
JK
239 unsigned long long mtime = 0;
240 unsigned int vblocks;
241 unsigned char age = 0;
242 unsigned char u;
243 unsigned int i;
244
245 for (i = 0; i < sbi->segs_per_sec; i++)
246 mtime += get_seg_entry(sbi, start + i)->mtime;
302bd348 247 vblocks = get_valid_blocks(sbi, segno, true);
7bc09003
JK
248
249 mtime = div_u64(mtime, sbi->segs_per_sec);
250 vblocks = div_u64(vblocks, sbi->segs_per_sec);
251
252 u = (vblocks * 100) >> sbi->log_blocks_per_seg;
253
e1c42045 254 /* Handle if the system time has changed by the user */
7bc09003
JK
255 if (mtime < sit_i->min_mtime)
256 sit_i->min_mtime = mtime;
257 if (mtime > sit_i->max_mtime)
258 sit_i->max_mtime = mtime;
259 if (sit_i->max_mtime != sit_i->min_mtime)
260 age = 100 - div64_u64(100 * (mtime - sit_i->min_mtime),
261 sit_i->max_mtime - sit_i->min_mtime);
262
263 return UINT_MAX - ((100 * (100 - u) * age) / (100 + u));
264}
265
a57e564d
JX
266static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi,
267 unsigned int segno, struct victim_sel_policy *p)
7bc09003
JK
268{
269 if (p->alloc_mode == SSR)
2afce76a 270 return get_seg_entry(sbi, segno)->ckpt_valid_blocks;
7bc09003
JK
271
272 /* alloc_mode == LFS */
273 if (p->gc_mode == GC_GREEDY)
91f4382b 274 return get_valid_blocks(sbi, segno, true);
7bc09003
JK
275 else
276 return get_cb_cost(sbi, segno);
277}
278
688159b6
FL
279static unsigned int count_bits(const unsigned long *addr,
280 unsigned int offset, unsigned int len)
281{
282 unsigned int end = offset + len, sum = 0;
283
284 while (offset < end) {
285 if (test_bit(offset++, addr))
286 ++sum;
287 }
288 return sum;
289}
290
0a8165d7 291/*
111d2495 292 * This function is called from two paths.
7bc09003
JK
293 * One is garbage collection and the other is SSR segment selection.
294 * When it is called during GC, it just gets a victim segment
295 * and it does not remove it from dirty seglist.
296 * When it is called from SSR segment selection, it finds a segment
297 * which has minimum valid blocks and removes it from dirty seglist.
298 */
299static int get_victim_by_default(struct f2fs_sb_info *sbi,
300 unsigned int *result, int gc_type, int type, char alloc_mode)
301{
302 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
e066b83c 303 struct sit_info *sm = SIT_I(sbi);
7bc09003 304 struct victim_sel_policy p;
3fa56503 305 unsigned int secno, last_victim;
a43f7ec3 306 unsigned int last_segment = MAIN_SEGS(sbi);
688159b6 307 unsigned int nsearched = 0;
7bc09003 308
210f41bc
CY
309 mutex_lock(&dirty_i->seglist_lock);
310
7bc09003
JK
311 p.alloc_mode = alloc_mode;
312 select_policy(sbi, gc_type, type, &p);
313
314 p.min_segno = NULL_SEGNO;
3fa56503 315 p.min_cost = get_max_cost(sbi, &p);
7bc09003 316
e066b83c
JK
317 if (*result != NULL_SEGNO) {
318 if (IS_DATASEG(get_seg_entry(sbi, *result)->type) &&
319 get_valid_blocks(sbi, *result, false) &&
320 !sec_usage_check(sbi, GET_SEC_FROM_SEG(sbi, *result)))
321 p.min_segno = *result;
322 goto out;
323 }
324
3342bb30
CY
325 if (p.max_search == 0)
326 goto out;
327
e066b83c 328 last_victim = sm->last_victim[p.gc_mode];
7bc09003
JK
329 if (p.alloc_mode == LFS && gc_type == FG_GC) {
330 p.min_segno = check_bg_victims(sbi);
331 if (p.min_segno != NULL_SEGNO)
332 goto got_it;
333 }
334
335 while (1) {
336 unsigned long cost;
5ec4e49f 337 unsigned int segno;
7bc09003 338
a43f7ec3
CY
339 segno = find_next_bit(p.dirty_segmap, last_segment, p.offset);
340 if (segno >= last_segment) {
e066b83c
JK
341 if (sm->last_victim[p.gc_mode]) {
342 last_segment =
343 sm->last_victim[p.gc_mode];
344 sm->last_victim[p.gc_mode] = 0;
7bc09003
JK
345 p.offset = 0;
346 continue;
347 }
348 break;
349 }
a57e564d
JX
350
351 p.offset = segno + p.ofs_unit;
688159b6 352 if (p.ofs_unit > 1) {
a57e564d 353 p.offset -= segno % p.ofs_unit;
688159b6
FL
354 nsearched += count_bits(p.dirty_segmap,
355 p.offset - p.ofs_unit,
356 p.ofs_unit);
357 } else {
358 nsearched++;
359 }
360
4ddb1a4d 361 secno = GET_SEC_FROM_SEG(sbi, segno);
7bc09003 362
5ec4e49f 363 if (sec_usage_check(sbi, secno))
688159b6 364 goto next;
5ec4e49f 365 if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap))
688159b6 366 goto next;
7bc09003
JK
367
368 cost = get_gc_cost(sbi, segno, &p);
369
370 if (p.min_cost > cost) {
371 p.min_segno = segno;
372 p.min_cost = cost;
a57e564d 373 }
688159b6
FL
374next:
375 if (nsearched >= p.max_search) {
e066b83c
JK
376 if (!sm->last_victim[p.gc_mode] && segno <= last_victim)
377 sm->last_victim[p.gc_mode] = last_victim + 1;
4ce53776 378 else
e066b83c
JK
379 sm->last_victim[p.gc_mode] = segno + 1;
380 sm->last_victim[p.gc_mode] %= MAIN_SEGS(sbi);
7bc09003
JK
381 break;
382 }
383 }
7bc09003 384 if (p.min_segno != NULL_SEGNO) {
b2b3460a 385got_it:
7bc09003 386 if (p.alloc_mode == LFS) {
4ddb1a4d 387 secno = GET_SEC_FROM_SEG(sbi, p.min_segno);
5ec4e49f
JK
388 if (gc_type == FG_GC)
389 sbi->cur_victim_sec = secno;
390 else
391 set_bit(secno, dirty_i->victim_secmap);
7bc09003 392 }
5ec4e49f 393 *result = (p.min_segno / p.ofs_unit) * p.ofs_unit;
8e46b3ed
NJ
394
395 trace_f2fs_get_victim(sbi->sb, type, gc_type, &p,
396 sbi->cur_victim_sec,
397 prefree_segments(sbi), free_segments(sbi));
7bc09003 398 }
3342bb30 399out:
7bc09003
JK
400 mutex_unlock(&dirty_i->seglist_lock);
401
402 return (p.min_segno == NULL_SEGNO) ? 0 : 1;
403}
404
405static const struct victim_selection default_v_ops = {
406 .get_victim = get_victim_by_default,
407};
408
7dda2af8 409static struct inode *find_gc_inode(struct gc_inode_list *gc_list, nid_t ino)
7bc09003 410{
7bc09003
JK
411 struct inode_entry *ie;
412
7dda2af8
CL
413 ie = radix_tree_lookup(&gc_list->iroot, ino);
414 if (ie)
415 return ie->inode;
7bc09003
JK
416 return NULL;
417}
418
7dda2af8 419static void add_gc_inode(struct gc_inode_list *gc_list, struct inode *inode)
7bc09003 420{
6cc4af56
GZ
421 struct inode_entry *new_ie;
422
7dda2af8 423 if (inode == find_gc_inode(gc_list, inode->i_ino)) {
6cc4af56
GZ
424 iput(inode);
425 return;
7bc09003 426 }
4d57b86d 427 new_ie = f2fs_kmem_cache_alloc(f2fs_inode_entry_slab, GFP_NOFS);
7bc09003 428 new_ie->inode = inode;
f28e5034
CY
429
430 f2fs_radix_tree_insert(&gc_list->iroot, inode->i_ino, new_ie);
7dda2af8 431 list_add_tail(&new_ie->list, &gc_list->ilist);
7bc09003
JK
432}
433
7dda2af8 434static void put_gc_inode(struct gc_inode_list *gc_list)
7bc09003
JK
435{
436 struct inode_entry *ie, *next_ie;
7dda2af8
CL
437 list_for_each_entry_safe(ie, next_ie, &gc_list->ilist, list) {
438 radix_tree_delete(&gc_list->iroot, ie->inode->i_ino);
7bc09003
JK
439 iput(ie->inode);
440 list_del(&ie->list);
4d57b86d 441 kmem_cache_free(f2fs_inode_entry_slab, ie);
7bc09003
JK
442 }
443}
444
445static int check_valid_map(struct f2fs_sb_info *sbi,
446 unsigned int segno, int offset)
447{
448 struct sit_info *sit_i = SIT_I(sbi);
449 struct seg_entry *sentry;
450 int ret;
451
3d26fa6b 452 down_read(&sit_i->sentry_lock);
7bc09003
JK
453 sentry = get_seg_entry(sbi, segno);
454 ret = f2fs_test_bit(offset, sentry->cur_valid_map);
3d26fa6b 455 up_read(&sit_i->sentry_lock);
43727527 456 return ret;
7bc09003
JK
457}
458
0a8165d7 459/*
7bc09003
JK
460 * This function compares node address got in summary with that in NAT.
461 * On validity, copy that node with cold status, otherwise (invalid node)
462 * ignore that.
463 */
718e53fa 464static void gc_node_segment(struct f2fs_sb_info *sbi,
7bc09003
JK
465 struct f2fs_summary *sum, unsigned int segno, int gc_type)
466{
7bc09003 467 struct f2fs_summary *entry;
26d58599 468 block_t start_addr;
7bc09003 469 int off;
7ea984b0 470 int phase = 0;
c29fd0c0 471 bool fggc = (gc_type == FG_GC);
7bc09003 472
26d58599
JK
473 start_addr = START_BLOCK(sbi, segno);
474
7bc09003
JK
475next_step:
476 entry = sum;
c718379b 477
c29fd0c0
CY
478 if (fggc && phase == 2)
479 atomic_inc(&sbi->wb_sync_req[NODE]);
480
7bc09003
JK
481 for (off = 0; off < sbi->blocks_per_seg; off++, entry++) {
482 nid_t nid = le32_to_cpu(entry->nid);
483 struct page *node_page;
26d58599 484 struct node_info ni;
7bc09003 485
43727527 486 /* stop BG_GC if there is not enough free sections. */
7f3037a5 487 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0))
718e53fa 488 return;
7bc09003 489
43727527 490 if (check_valid_map(sbi, segno, off) == 0)
7bc09003
JK
491 continue;
492
7ea984b0 493 if (phase == 0) {
4d57b86d 494 f2fs_ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), 1,
7ea984b0
CY
495 META_NAT, true);
496 continue;
497 }
498
499 if (phase == 1) {
4d57b86d 500 f2fs_ra_node_page(sbi, nid);
7bc09003
JK
501 continue;
502 }
7ea984b0
CY
503
504 /* phase == 2 */
4d57b86d 505 node_page = f2fs_get_node_page(sbi, nid);
7bc09003
JK
506 if (IS_ERR(node_page))
507 continue;
508
4d57b86d 509 /* block may become invalid during f2fs_get_node_page */
9a01b56b
HY
510 if (check_valid_map(sbi, segno, off) == 0) {
511 f2fs_put_page(node_page, 1);
512 continue;
26d58599
JK
513 }
514
7735730d
CY
515 if (f2fs_get_node_info(sbi, nid, &ni)) {
516 f2fs_put_page(node_page, 1);
517 continue;
518 }
519
26d58599
JK
520 if (ni.blk_addr != start_addr + off) {
521 f2fs_put_page(node_page, 1);
522 continue;
9a01b56b
HY
523 }
524
4d57b86d 525 f2fs_move_node_page(node_page, gc_type);
e1235983 526 stat_inc_node_blk_count(sbi, 1, gc_type);
7bc09003 527 }
c718379b 528
7ea984b0 529 if (++phase < 3)
7bc09003 530 goto next_step;
c29fd0c0
CY
531
532 if (fggc)
533 atomic_dec(&sbi->wb_sync_req[NODE]);
7bc09003
JK
534}
535
0a8165d7 536/*
9af45ef5
JK
537 * Calculate start block index indicating the given node offset.
538 * Be careful, caller should give this node offset only indicating direct node
539 * blocks. If any node offsets, which point the other types of node blocks such
540 * as indirect or double indirect node blocks, are given, it must be a caller's
541 * bug.
7bc09003 542 */
4d57b86d 543block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode)
7bc09003 544{
ce19a5d4
JK
545 unsigned int indirect_blks = 2 * NIDS_PER_BLOCK + 4;
546 unsigned int bidx;
7bc09003 547
ce19a5d4
JK
548 if (node_ofs == 0)
549 return 0;
7bc09003 550
ce19a5d4 551 if (node_ofs <= 2) {
7bc09003
JK
552 bidx = node_ofs - 1;
553 } else if (node_ofs <= indirect_blks) {
ce19a5d4 554 int dec = (node_ofs - 4) / (NIDS_PER_BLOCK + 1);
7bc09003
JK
555 bidx = node_ofs - 2 - dec;
556 } else {
ce19a5d4 557 int dec = (node_ofs - indirect_blks - 3) / (NIDS_PER_BLOCK + 1);
7bc09003
JK
558 bidx = node_ofs - 5 - dec;
559 }
81ca7350 560 return bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE(inode);
7bc09003
JK
561}
562
c1079892 563static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
7bc09003
JK
564 struct node_info *dni, block_t blkaddr, unsigned int *nofs)
565{
566 struct page *node_page;
567 nid_t nid;
568 unsigned int ofs_in_node;
569 block_t source_blkaddr;
570
571 nid = le32_to_cpu(sum->nid);
572 ofs_in_node = le16_to_cpu(sum->ofs_in_node);
573
4d57b86d 574 node_page = f2fs_get_node_page(sbi, nid);
7bc09003 575 if (IS_ERR(node_page))
c1079892 576 return false;
7bc09003 577
7735730d
CY
578 if (f2fs_get_node_info(sbi, nid, dni)) {
579 f2fs_put_page(node_page, 1);
580 return false;
581 }
7bc09003
JK
582
583 if (sum->version != dni->version) {
c13ff37e
JK
584 f2fs_msg(sbi->sb, KERN_WARNING,
585 "%s: valid data with mismatched node version.",
586 __func__);
587 set_sbi_flag(sbi, SBI_NEED_FSCK);
7bc09003
JK
588 }
589
590 *nofs = ofs_of_node(node_page);
7a2af766 591 source_blkaddr = datablock_addr(NULL, node_page, ofs_in_node);
7bc09003
JK
592 f2fs_put_page(node_page, 1);
593
594 if (source_blkaddr != blkaddr)
c1079892
NK
595 return false;
596 return true;
7bc09003
JK
597}
598
6aa58d8a
CY
599static int ra_data_block(struct inode *inode, pgoff_t index)
600{
601 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
602 struct address_space *mapping = inode->i_mapping;
603 struct dnode_of_data dn;
604 struct page *page;
605 struct extent_info ei = {0, 0, 0};
606 struct f2fs_io_info fio = {
607 .sbi = sbi,
608 .ino = inode->i_ino,
609 .type = DATA,
610 .temp = COLD,
611 .op = REQ_OP_READ,
612 .op_flags = 0,
613 .encrypted_page = NULL,
614 .in_list = false,
615 .retry = false,
616 };
617 int err;
618
619 page = f2fs_grab_cache_page(mapping, index, true);
620 if (!page)
621 return -ENOMEM;
622
623 if (f2fs_lookup_extent_cache(inode, index, &ei)) {
624 dn.data_blkaddr = ei.blk + index - ei.fofs;
625 goto got_it;
626 }
627
628 set_new_dnode(&dn, inode, NULL, NULL, 0);
629 err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
630 if (err)
631 goto put_page;
632 f2fs_put_dnode(&dn);
633
634 if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr,
635 DATA_GENERIC))) {
636 err = -EFAULT;
637 goto put_page;
638 }
639got_it:
640 /* read page */
641 fio.page = page;
642 fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr;
643
644 fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(sbi),
645 dn.data_blkaddr,
646 FGP_LOCK | FGP_CREAT, GFP_NOFS);
647 if (!fio.encrypted_page) {
648 err = -ENOMEM;
649 goto put_page;
650 }
651
652 err = f2fs_submit_page_bio(&fio);
653 if (err)
654 goto put_encrypted_page;
655 f2fs_put_page(fio.encrypted_page, 0);
656 f2fs_put_page(page, 1);
657 return 0;
658put_encrypted_page:
659 f2fs_put_page(fio.encrypted_page, 1);
660put_page:
661 f2fs_put_page(page, 1);
662 return err;
663}
664
d4c759ee
JK
665/*
666 * Move data block via META_MAPPING while keeping locked data page.
667 * This can be used to move blocks, aka LBAs, directly on disk.
668 */
669static void move_data_block(struct inode *inode, block_t bidx,
2ef79ecb 670 int gc_type, unsigned int segno, int off)
4375a336
JK
671{
672 struct f2fs_io_info fio = {
673 .sbi = F2FS_I_SB(inode),
39d787be 674 .ino = inode->i_ino,
4375a336 675 .type = DATA,
a912b54d 676 .temp = COLD,
04d328de 677 .op = REQ_OP_READ,
70fd7614 678 .op_flags = 0,
4375a336 679 .encrypted_page = NULL,
fb830fc5 680 .in_list = false,
fe16efe6 681 .retry = false,
4375a336
JK
682 };
683 struct dnode_of_data dn;
684 struct f2fs_summary sum;
685 struct node_info ni;
6aa58d8a 686 struct page *page, *mpage;
4356e48e 687 block_t newaddr;
4375a336 688 int err;
107a805d 689 bool lfs_mode = test_opt(fio.sbi, LFS);
4375a336
JK
690
691 /* do not read out */
a56c7c6f 692 page = f2fs_grab_cache_page(inode->i_mapping, bidx, false);
4375a336
JK
693 if (!page)
694 return;
695
20614711
YH
696 if (!check_valid_map(F2FS_I_SB(inode), segno, off))
697 goto out;
698
2ef79ecb
CY
699 if (f2fs_is_atomic_file(inode)) {
700 F2FS_I(inode)->i_gc_failures[GC_FAILURE_ATOMIC]++;
701 F2FS_I_SB(inode)->skipped_atomic_files[gc_type]++;
5fe45743 702 goto out;
2ef79ecb 703 }
5fe45743 704
1ad71a27
JK
705 if (f2fs_is_pinned_file(inode)) {
706 f2fs_pin_file_control(inode, true);
707 goto out;
708 }
709
4375a336 710 set_new_dnode(&dn, inode, NULL, NULL, 0);
4d57b86d 711 err = f2fs_get_dnode_of_data(&dn, bidx, LOOKUP_NODE);
4375a336
JK
712 if (err)
713 goto out;
714
08b39fbd
CY
715 if (unlikely(dn.data_blkaddr == NULL_ADDR)) {
716 ClearPageUptodate(page);
4375a336 717 goto put_out;
08b39fbd
CY
718 }
719
720 /*
721 * don't cache encrypted data into meta inode until previous dirty
722 * data were writebacked to avoid racing between GC and flush.
723 */
fec1d657 724 f2fs_wait_on_page_writeback(page, DATA, true);
4375a336 725
7735730d
CY
726 err = f2fs_get_node_info(fio.sbi, dn.nid, &ni);
727 if (err)
728 goto put_out;
729
4375a336
JK
730 set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version);
731
732 /* read page */
733 fio.page = page;
7a9d7548 734 fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr;
4375a336 735
107a805d
CY
736 if (lfs_mode)
737 down_write(&fio.sbi->io_order_lock);
738
4d57b86d 739 f2fs_allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr,
fb830fc5 740 &sum, CURSEG_COLD_DATA, NULL, false);
4356e48e 741
01eccef7
CY
742 fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(fio.sbi),
743 newaddr, FGP_LOCK | FGP_CREAT, GFP_NOFS);
4356e48e
CY
744 if (!fio.encrypted_page) {
745 err = -ENOMEM;
746 goto recover_block;
747 }
4375a336 748
6aa58d8a
CY
749 mpage = f2fs_pagecache_get_page(META_MAPPING(fio.sbi),
750 fio.old_blkaddr, FGP_LOCK, GFP_NOFS);
751 if (mpage) {
752 bool updated = false;
753
754 if (PageUptodate(mpage)) {
755 memcpy(page_address(fio.encrypted_page),
756 page_address(mpage), PAGE_SIZE);
757 updated = true;
758 }
759 f2fs_put_page(mpage, 1);
760 invalidate_mapping_pages(META_MAPPING(fio.sbi),
761 fio.old_blkaddr, fio.old_blkaddr);
762 if (updated)
763 goto write_page;
764 }
765
548aedac
JK
766 err = f2fs_submit_page_bio(&fio);
767 if (err)
768 goto put_page_out;
769
770 /* write page */
771 lock_page(fio.encrypted_page);
772
1563ac75 773 if (unlikely(fio.encrypted_page->mapping != META_MAPPING(fio.sbi))) {
4356e48e 774 err = -EIO;
548aedac 775 goto put_page_out;
4356e48e 776 }
1563ac75 777 if (unlikely(!PageUptodate(fio.encrypted_page))) {
4356e48e 778 err = -EIO;
548aedac 779 goto put_page_out;
4356e48e 780 }
548aedac 781
6aa58d8a 782write_page:
6282adbf 783 set_page_dirty(fio.encrypted_page);
fec1d657 784 f2fs_wait_on_page_writeback(fio.encrypted_page, DATA, true);
6282adbf
JK
785 if (clear_page_dirty_for_io(fio.encrypted_page))
786 dec_page_count(fio.sbi, F2FS_DIRTY_META);
787
548aedac 788 set_page_writeback(fio.encrypted_page);
17c50035 789 ClearPageError(page);
4375a336
JK
790
791 /* allocate block address */
fec1d657 792 f2fs_wait_on_page_writeback(dn.node_page, NODE, true);
4356e48e 793
04d328de 794 fio.op = REQ_OP_WRITE;
70fd7614 795 fio.op_flags = REQ_SYNC;
4356e48e 796 fio.new_blkaddr = newaddr;
fe16efe6
CY
797 f2fs_submit_page_write(&fio);
798 if (fio.retry) {
a9d572c7
SY
799 if (PageWriteback(fio.encrypted_page))
800 end_page_writeback(fio.encrypted_page);
801 goto put_page_out;
802 }
4375a336 803
b0af6d49
CY
804 f2fs_update_iostat(fio.sbi, FS_GC_DATA_IO, F2FS_BLKSIZE);
805
f28b3434 806 f2fs_update_data_blkaddr(&dn, newaddr);
91942321 807 set_inode_flag(inode, FI_APPEND_WRITE);
4375a336 808 if (page->index == 0)
91942321 809 set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
548aedac 810put_page_out:
4375a336 811 f2fs_put_page(fio.encrypted_page, 1);
4356e48e 812recover_block:
107a805d
CY
813 if (lfs_mode)
814 up_write(&fio.sbi->io_order_lock);
4356e48e 815 if (err)
4d57b86d 816 f2fs_do_replace_block(fio.sbi, &sum, newaddr, fio.old_blkaddr,
4356e48e 817 true, true);
4375a336
JK
818put_out:
819 f2fs_put_dnode(&dn);
820out:
821 f2fs_put_page(page, 1);
822}
823
20614711
YH
824static void move_data_page(struct inode *inode, block_t bidx, int gc_type,
825 unsigned int segno, int off)
7bc09003 826{
c879f90d
JK
827 struct page *page;
828
4d57b86d 829 page = f2fs_get_lock_data_page(inode, bidx, true);
c879f90d
JK
830 if (IS_ERR(page))
831 return;
63a0b7cb 832
20614711
YH
833 if (!check_valid_map(F2FS_I_SB(inode), segno, off))
834 goto out;
835
2ef79ecb
CY
836 if (f2fs_is_atomic_file(inode)) {
837 F2FS_I(inode)->i_gc_failures[GC_FAILURE_ATOMIC]++;
838 F2FS_I_SB(inode)->skipped_atomic_files[gc_type]++;
5fe45743 839 goto out;
2ef79ecb 840 }
1ad71a27
JK
841 if (f2fs_is_pinned_file(inode)) {
842 if (gc_type == FG_GC)
843 f2fs_pin_file_control(inode, true);
844 goto out;
845 }
5fe45743 846
7bc09003 847 if (gc_type == BG_GC) {
4ebefc44
JK
848 if (PageWriteback(page))
849 goto out;
7bc09003
JK
850 set_page_dirty(page);
851 set_cold_data(page);
852 } else {
c879f90d
JK
853 struct f2fs_io_info fio = {
854 .sbi = F2FS_I_SB(inode),
39d787be 855 .ino = inode->i_ino,
c879f90d 856 .type = DATA,
a912b54d 857 .temp = COLD,
04d328de 858 .op = REQ_OP_WRITE,
70fd7614 859 .op_flags = REQ_SYNC,
e959c8f5 860 .old_blkaddr = NULL_ADDR,
c879f90d 861 .page = page,
4375a336 862 .encrypted_page = NULL,
cc15620b 863 .need_lock = LOCK_REQ,
b0af6d49 864 .io_type = FS_GC_DATA_IO,
c879f90d 865 };
72e1c797
CY
866 bool is_dirty = PageDirty(page);
867 int err;
868
869retry:
6282adbf 870 set_page_dirty(page);
fec1d657 871 f2fs_wait_on_page_writeback(page, DATA, true);
933439c8 872 if (clear_page_dirty_for_io(page)) {
a7ffdbe2 873 inode_dec_dirty_pages(inode);
4d57b86d 874 f2fs_remove_dirty_inode(inode);
933439c8 875 }
72e1c797 876
7bc09003 877 set_cold_data(page);
72e1c797 878
4d57b86d 879 err = f2fs_do_write_data_page(&fio);
14a28559
CY
880 if (err) {
881 clear_cold_data(page);
882 if (err == -ENOMEM) {
883 congestion_wait(BLK_RW_ASYNC, HZ/50);
884 goto retry;
885 }
886 if (is_dirty)
887 set_page_dirty(page);
72e1c797 888 }
7bc09003
JK
889 }
890out:
891 f2fs_put_page(page, 1);
892}
893
0a8165d7 894/*
7bc09003
JK
895 * This function tries to get parent node of victim data block, and identifies
896 * data block validity. If the block is valid, copy that with cold status and
897 * modify parent node.
898 * If the parent node is not valid or the data block address is different,
899 * the victim data block is ignored.
900 */
718e53fa 901static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
7dda2af8 902 struct gc_inode_list *gc_list, unsigned int segno, int gc_type)
7bc09003
JK
903{
904 struct super_block *sb = sbi->sb;
905 struct f2fs_summary *entry;
906 block_t start_addr;
43727527 907 int off;
7bc09003
JK
908 int phase = 0;
909
910 start_addr = START_BLOCK(sbi, segno);
911
912next_step:
913 entry = sum;
c718379b 914
7bc09003
JK
915 for (off = 0; off < sbi->blocks_per_seg; off++, entry++) {
916 struct page *data_page;
917 struct inode *inode;
918 struct node_info dni; /* dnode info for the data */
919 unsigned int ofs_in_node, nofs;
920 block_t start_bidx;
7ea984b0 921 nid_t nid = le32_to_cpu(entry->nid);
7bc09003 922
43727527 923 /* stop BG_GC if there is not enough free sections. */
7f3037a5 924 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0))
718e53fa 925 return;
7bc09003 926
43727527 927 if (check_valid_map(sbi, segno, off) == 0)
7bc09003
JK
928 continue;
929
930 if (phase == 0) {
4d57b86d 931 f2fs_ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), 1,
7ea984b0
CY
932 META_NAT, true);
933 continue;
934 }
935
936 if (phase == 1) {
4d57b86d 937 f2fs_ra_node_page(sbi, nid);
7bc09003
JK
938 continue;
939 }
940
941 /* Get an inode by ino with checking validity */
c1079892 942 if (!is_alive(sbi, entry, &dni, start_addr + off, &nofs))
7bc09003
JK
943 continue;
944
7ea984b0 945 if (phase == 2) {
4d57b86d 946 f2fs_ra_node_page(sbi, dni.ino);
7bc09003
JK
947 continue;
948 }
949
7bc09003
JK
950 ofs_in_node = le16_to_cpu(entry->ofs_in_node);
951
7ea984b0 952 if (phase == 3) {
d4686d56 953 inode = f2fs_iget(sb, dni.ino);
b73e5282 954 if (IS_ERR(inode) || is_bad_inode(inode))
7bc09003
JK
955 continue;
956
bb06664a 957 if (!down_write_trylock(
b2532c69 958 &F2FS_I(inode)->i_gc_rwsem[WRITE])) {
bb06664a 959 iput(inode);
6f8d4455 960 sbi->skipped_gc_rwsem++;
bb06664a
CY
961 continue;
962 }
963
6aa58d8a
CY
964 start_bidx = f2fs_start_bidx_of_node(nofs, inode) +
965 ofs_in_node;
966
967 if (f2fs_post_read_required(inode)) {
968 int err = ra_data_block(inode, start_bidx);
969
970 up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
971 if (err) {
972 iput(inode);
973 continue;
974 }
975 add_gc_inode(gc_list, inode);
976 continue;
977 }
978
4d57b86d 979 data_page = f2fs_get_read_data_page(inode,
6aa58d8a 980 start_bidx, REQ_RAHEAD, true);
b2532c69 981 up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
31a32688
CL
982 if (IS_ERR(data_page)) {
983 iput(inode);
984 continue;
985 }
7bc09003
JK
986
987 f2fs_put_page(data_page, 0);
7dda2af8 988 add_gc_inode(gc_list, inode);
31a32688
CL
989 continue;
990 }
991
7ea984b0 992 /* phase 4 */
7dda2af8 993 inode = find_gc_inode(gc_list, dni.ino);
31a32688 994 if (inode) {
82e0a5aa
CY
995 struct f2fs_inode_info *fi = F2FS_I(inode);
996 bool locked = false;
997
998 if (S_ISREG(inode->i_mode)) {
b2532c69 999 if (!down_write_trylock(&fi->i_gc_rwsem[READ]))
82e0a5aa
CY
1000 continue;
1001 if (!down_write_trylock(
b2532c69 1002 &fi->i_gc_rwsem[WRITE])) {
6f8d4455 1003 sbi->skipped_gc_rwsem++;
b2532c69 1004 up_write(&fi->i_gc_rwsem[READ]);
82e0a5aa
CY
1005 continue;
1006 }
1007 locked = true;
73ac2f4e
CY
1008
1009 /* wait for all inflight aio data */
1010 inode_dio_wait(inode);
82e0a5aa
CY
1011 }
1012
4d57b86d 1013 start_bidx = f2fs_start_bidx_of_node(nofs, inode)
c879f90d 1014 + ofs_in_node;
6dbb1796 1015 if (f2fs_post_read_required(inode))
2ef79ecb
CY
1016 move_data_block(inode, start_bidx, gc_type,
1017 segno, off);
4375a336 1018 else
d4c759ee
JK
1019 move_data_page(inode, start_bidx, gc_type,
1020 segno, off);
82e0a5aa
CY
1021
1022 if (locked) {
b2532c69
CY
1023 up_write(&fi->i_gc_rwsem[WRITE]);
1024 up_write(&fi->i_gc_rwsem[READ]);
82e0a5aa
CY
1025 }
1026
e1235983 1027 stat_inc_data_blk_count(sbi, 1, gc_type);
7bc09003 1028 }
7bc09003 1029 }
c718379b 1030
7ea984b0 1031 if (++phase < 5)
7bc09003 1032 goto next_step;
7bc09003
JK
1033}
1034
1035static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim,
8a2d0ace 1036 int gc_type)
7bc09003
JK
1037{
1038 struct sit_info *sit_i = SIT_I(sbi);
1039 int ret;
8a2d0ace 1040
3d26fa6b 1041 down_write(&sit_i->sentry_lock);
8a2d0ace
GZ
1042 ret = DIRTY_I(sbi)->v_ops->get_victim(sbi, victim, gc_type,
1043 NO_CHECK_TYPE, LFS);
3d26fa6b 1044 up_write(&sit_i->sentry_lock);
7bc09003
JK
1045 return ret;
1046}
1047
718e53fa
CY
1048static int do_garbage_collect(struct f2fs_sb_info *sbi,
1049 unsigned int start_segno,
7dda2af8 1050 struct gc_inode_list *gc_list, int gc_type)
7bc09003
JK
1051{
1052 struct page *sum_page;
1053 struct f2fs_summary_block *sum;
c718379b 1054 struct blk_plug plug;
718e53fa
CY
1055 unsigned int segno = start_segno;
1056 unsigned int end_segno = start_segno + sbi->segs_per_sec;
c56f16da 1057 int seg_freed = 0;
718e53fa
CY
1058 unsigned char type = IS_DATASEG(get_seg_entry(sbi, segno)->type) ?
1059 SUM_TYPE_DATA : SUM_TYPE_NODE;
7bc09003 1060
718e53fa
CY
1061 /* readahead multi ssa blocks those have contiguous address */
1062 if (sbi->segs_per_sec > 1)
4d57b86d 1063 f2fs_ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno),
718e53fa
CY
1064 sbi->segs_per_sec, META_SSA, true);
1065
1066 /* reference all summary page */
1067 while (segno < end_segno) {
4d57b86d 1068 sum_page = f2fs_get_sum_page(sbi, segno++);
718e53fa
CY
1069 unlock_page(sum_page);
1070 }
7bc09003 1071
c718379b
JK
1072 blk_start_plug(&plug);
1073
718e53fa 1074 for (segno = start_segno; segno < end_segno; segno++) {
aa987273 1075
718e53fa
CY
1076 /* find segment summary of victim */
1077 sum_page = find_get_page(META_MAPPING(sbi),
1078 GET_SUM_BLOCK(sbi, segno));
718e53fa 1079 f2fs_put_page(sum_page, 0);
7bc09003 1080
302bd348 1081 if (get_valid_blocks(sbi, segno, false) == 0 ||
de0dcc40
JK
1082 !PageUptodate(sum_page) ||
1083 unlikely(f2fs_cp_error(sbi)))
1084 goto next;
1085
718e53fa 1086 sum = page_address(sum_page);
10d255c3
CY
1087 if (type != GET_SUM_TYPE((&sum->footer))) {
1088 f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent segment (%u) "
1089 "type [%d, %d] in SSA and SIT",
1090 segno, type, GET_SUM_TYPE((&sum->footer)));
1091 set_sbi_flag(sbi, SBI_NEED_FSCK);
1092 goto next;
1093 }
718e53fa
CY
1094
1095 /*
1096 * this is to avoid deadlock:
1097 * - lock_page(sum_page) - f2fs_replace_block
3d26fa6b
CY
1098 * - check_valid_map() - down_write(sentry_lock)
1099 * - down_read(sentry_lock) - change_curseg()
718e53fa
CY
1100 * - lock_page(sum_page)
1101 */
718e53fa
CY
1102 if (type == SUM_TYPE_NODE)
1103 gc_node_segment(sbi, sum->entries, segno, gc_type);
1104 else
1105 gc_data_segment(sbi, sum->entries, gc_list, segno,
1106 gc_type);
1107
1108 stat_inc_seg_count(sbi, type, gc_type);
c56f16da
CY
1109
1110 if (gc_type == FG_GC &&
1111 get_valid_blocks(sbi, segno, false) == 0)
1112 seg_freed++;
f6fe2be3 1113next:
718e53fa
CY
1114 f2fs_put_page(sum_page, 0);
1115 }
1116
da011cc0 1117 if (gc_type == FG_GC)
b9109b0e
JK
1118 f2fs_submit_merged_write(sbi,
1119 (type == SUM_TYPE_NODE) ? NODE : DATA);
c718379b 1120
718e53fa 1121 blk_finish_plug(&plug);
7bc09003 1122
17d899df
CY
1123 stat_inc_call_count(sbi->stat_info);
1124
c56f16da 1125 return seg_freed;
7bc09003
JK
1126}
1127
e066b83c
JK
1128int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
1129 bool background, unsigned int segno)
7bc09003 1130{
d530d4d8 1131 int gc_type = sync ? FG_GC : BG_GC;
c56f16da
CY
1132 int sec_freed = 0, seg_freed = 0, total_freed = 0;
1133 int ret = 0;
d5053a34 1134 struct cp_control cpc;
e066b83c 1135 unsigned int init_segno = segno;
7dda2af8
CL
1136 struct gc_inode_list gc_list = {
1137 .ilist = LIST_HEAD_INIT(gc_list.ilist),
f6bb2a2c 1138 .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
7dda2af8 1139 };
2ef79ecb 1140 unsigned long long last_skipped = sbi->skipped_atomic_files[FG_GC];
6f8d4455 1141 unsigned long long first_skipped;
2ef79ecb 1142 unsigned int skipped_round = 0, round = 0;
d5053a34 1143
c56f16da
CY
1144 trace_f2fs_gc_begin(sbi->sb, sync, background,
1145 get_pages(sbi, F2FS_DIRTY_NODES),
1146 get_pages(sbi, F2FS_DIRTY_DENTS),
1147 get_pages(sbi, F2FS_DIRTY_IMETA),
1148 free_sections(sbi),
1149 free_segments(sbi),
1150 reserved_segments(sbi),
1151 prefree_segments(sbi));
1152
119ee914 1153 cpc.reason = __get_cp_reason(sbi);
6f8d4455
JK
1154 sbi->skipped_gc_rwsem = 0;
1155 first_skipped = last_skipped;
7bc09003 1156gc_more:
1751e8a6 1157 if (unlikely(!(sbi->sb->s_flags & SB_ACTIVE))) {
e5dbd956 1158 ret = -EINVAL;
408e9375 1159 goto stop;
e5dbd956 1160 }
6d5a1495
CY
1161 if (unlikely(f2fs_cp_error(sbi))) {
1162 ret = -EIO;
203681f6 1163 goto stop;
6d5a1495 1164 }
7bc09003 1165
19f4e688 1166 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0)) {
6e17bfbc 1167 /*
19f4e688
HP
1168 * For example, if there are many prefree_segments below given
1169 * threshold, we can make them free by checkpoint. Then, we
1170 * secure free segments which doesn't need fggc any more.
6e17bfbc 1171 */
8fd5a37e 1172 if (prefree_segments(sbi)) {
4d57b86d 1173 ret = f2fs_write_checkpoint(sbi, &cpc);
8fd5a37e
JK
1174 if (ret)
1175 goto stop;
1176 }
19f4e688
HP
1177 if (has_not_enough_free_secs(sbi, 0, 0))
1178 gc_type = FG_GC;
d64f8047 1179 }
7bc09003 1180
19f4e688 1181 /* f2fs_balance_fs doesn't need to do BG_GC in critical path. */
c56f16da
CY
1182 if (gc_type == BG_GC && !background) {
1183 ret = -EINVAL;
19f4e688 1184 goto stop;
c56f16da
CY
1185 }
1186 if (!__get_victim(sbi, &segno, gc_type)) {
1187 ret = -ENODATA;
408e9375 1188 goto stop;
c56f16da 1189 }
7bc09003 1190
c56f16da
CY
1191 seg_freed = do_garbage_collect(sbi, segno, &gc_list, gc_type);
1192 if (gc_type == FG_GC && seg_freed == sbi->segs_per_sec)
45fe8492 1193 sec_freed++;
c56f16da 1194 total_freed += seg_freed;
43727527 1195
2ef79ecb 1196 if (gc_type == FG_GC) {
6f8d4455
JK
1197 if (sbi->skipped_atomic_files[FG_GC] > last_skipped ||
1198 sbi->skipped_gc_rwsem)
2ef79ecb
CY
1199 skipped_round++;
1200 last_skipped = sbi->skipped_atomic_files[FG_GC];
1201 round++;
1202 }
1203
5ee5293c 1204 if (gc_type == FG_GC)
5ec4e49f 1205 sbi->cur_victim_sec = NULL_SEGNO;
43727527 1206
6f8d4455
JK
1207 if (sync)
1208 goto stop;
1209
1210 if (has_not_enough_free_secs(sbi, sec_freed, 0)) {
1211 if (skipped_round <= MAX_SKIP_GC_COUNT ||
1212 skipped_round * 2 < round) {
e066b83c 1213 segno = NULL_SEGNO;
d530d4d8 1214 goto gc_more;
e066b83c 1215 }
43727527 1216
6f8d4455
JK
1217 if (first_skipped < last_skipped &&
1218 (last_skipped - first_skipped) >
1219 sbi->skipped_gc_rwsem) {
1220 f2fs_drop_inmem_pages_all(sbi, true);
1221 segno = NULL_SEGNO;
1222 goto gc_more;
1223 }
d530d4d8 1224 if (gc_type == FG_GC)
4d57b86d 1225 ret = f2fs_write_checkpoint(sbi, &cpc);
d530d4d8 1226 }
408e9375 1227stop:
e066b83c
JK
1228 SIT_I(sbi)->last_victim[ALLOC_NEXT] = 0;
1229 SIT_I(sbi)->last_victim[FLUSH_DEVICE] = init_segno;
c56f16da
CY
1230
1231 trace_f2fs_gc_end(sbi->sb, ret, total_freed, sec_freed,
1232 get_pages(sbi, F2FS_DIRTY_NODES),
1233 get_pages(sbi, F2FS_DIRTY_DENTS),
1234 get_pages(sbi, F2FS_DIRTY_IMETA),
1235 free_sections(sbi),
1236 free_segments(sbi),
1237 reserved_segments(sbi),
1238 prefree_segments(sbi));
1239
7bc09003
JK
1240 mutex_unlock(&sbi->gc_mutex);
1241
7dda2af8 1242 put_gc_inode(&gc_list);
d530d4d8
CY
1243
1244 if (sync)
1245 ret = sec_freed ? 0 : -EAGAIN;
43727527 1246 return ret;
7bc09003
JK
1247}
1248
4d57b86d 1249void f2fs_build_gc_manager(struct f2fs_sb_info *sbi)
7bc09003
JK
1250{
1251 DIRTY_I(sbi)->v_ops = &default_v_ops;
e93b9865 1252
1ad71a27 1253 sbi->gc_pin_file_threshold = DEF_GC_FAILED_PINNED_FILES;
d5793249
JK
1254
1255 /* give warm/cold data area from slower device */
1256 if (sbi->s_ndevs && sbi->segs_per_sec == 1)
1257 SIT_I(sbi)->last_victim[ALLOC_NEXT] =
1258 GET_SEGNO(sbi, FDEV(0).end_blk) + 1;
7bc09003 1259}