1 // SPDX-License-Identifier: GPL-2.0
3 * background writeback - scan btree for dirty data and write it to the backing
6 * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
7 * Copyright 2012 Google, Inc.
13 #include "writeback.h"
15 #include <linux/delay.h>
16 #include <linux/kthread.h>
17 #include <linux/sched/clock.h>
18 #include <trace/events/bcache.h>
20 static void update_gc_after_writeback(struct cache_set
*c
)
22 if (c
->gc_after_writeback
!= (BCH_ENABLE_AUTO_GC
) ||
23 c
->gc_stats
.in_use
< BCH_AUTO_GC_DIRTY_THRESHOLD
)
26 c
->gc_after_writeback
|= BCH_DO_AUTO_GC
;
30 static uint64_t __calc_target_rate(struct cached_dev
*dc
)
32 struct cache_set
*c
= dc
->disk
.c
;
35 * This is the size of the cache, minus the amount used for
38 uint64_t cache_sectors
= c
->nbuckets
* c
->sb
.bucket_size
-
39 atomic_long_read(&c
->flash_dev_dirty_sectors
);
42 * Unfortunately there is no control of global dirty data. If the
43 * user states that they want 10% dirty data in the cache, and has,
44 * e.g., 5 backing volumes of equal size, we try and ensure each
45 * backing volume uses about 2% of the cache for dirty data.
48 div64_u64(bdev_sectors(dc
->bdev
) << WRITEBACK_SHARE_SHIFT
,
49 c
->cached_dev_sectors
);
51 uint64_t cache_dirty_target
=
52 div_u64(cache_sectors
* dc
->writeback_percent
, 100);
54 /* Ensure each backing dev gets at least one dirty share */
58 return (cache_dirty_target
* bdev_share
) >> WRITEBACK_SHARE_SHIFT
;
61 static void __update_writeback_rate(struct cached_dev
*dc
)
65 * Figures out the amount that should be written per second.
67 * First, the error (number of sectors that are dirty beyond our
68 * target) is calculated. The error is accumulated (numerically
71 * Then, the proportional value and integral value are scaled
72 * based on configured values. These are stored as inverses to
73 * avoid fixed point math and to make configuration easy-- e.g.
74 * the default value of 40 for writeback_rate_p_term_inverse
75 * attempts to write at a rate that would retire all the dirty
76 * blocks in 40 seconds.
78 * The writeback_rate_i_inverse value of 10000 means that 1/10000th
79 * of the error is accumulated in the integral term per second.
80 * This acts as a slow, long-term average that is not subject to
81 * variations in usage like the p term.
83 int64_t target
= __calc_target_rate(dc
);
84 int64_t dirty
= bcache_dev_sectors_dirty(&dc
->disk
);
85 int64_t error
= dirty
- target
;
86 int64_t proportional_scaled
=
87 div_s64(error
, dc
->writeback_rate_p_term_inverse
);
88 int64_t integral_scaled
;
91 if ((error
< 0 && dc
->writeback_rate_integral
> 0) ||
92 (error
> 0 && time_before64(local_clock(),
93 dc
->writeback_rate
.next
+ NSEC_PER_MSEC
))) {
95 * Only decrease the integral term if it's more than
96 * zero. Only increase the integral term if the device
97 * is keeping up. (Don't wind up the integral
98 * ineffectively in either case).
100 * It's necessary to scale this by
101 * writeback_rate_update_seconds to keep the integral
102 * term dimensioned properly.
104 dc
->writeback_rate_integral
+= error
*
105 dc
->writeback_rate_update_seconds
;
108 integral_scaled
= div_s64(dc
->writeback_rate_integral
,
109 dc
->writeback_rate_i_term_inverse
);
111 new_rate
= clamp_t(int32_t, (proportional_scaled
+ integral_scaled
),
112 dc
->writeback_rate_minimum
, NSEC_PER_SEC
);
114 dc
->writeback_rate_proportional
= proportional_scaled
;
115 dc
->writeback_rate_integral_scaled
= integral_scaled
;
116 dc
->writeback_rate_change
= new_rate
-
117 atomic_long_read(&dc
->writeback_rate
.rate
);
118 atomic_long_set(&dc
->writeback_rate
.rate
, new_rate
);
119 dc
->writeback_rate_target
= target
;
122 static bool set_at_max_writeback_rate(struct cache_set
*c
,
123 struct cached_dev
*dc
)
126 * Idle_counter is increased everytime when update_writeback_rate() is
127 * called. If all backing devices attached to the same cache set have
128 * identical dc->writeback_rate_update_seconds values, it is about 6
129 * rounds of update_writeback_rate() on each backing device before
130 * c->at_max_writeback_rate is set to 1, and then max wrteback rate set
131 * to each dc->writeback_rate.rate.
132 * In order to avoid extra locking cost for counting exact dirty cached
133 * devices number, c->attached_dev_nr is used to calculate the idle
134 * throushold. It might be bigger if not all cached device are in write-
135 * back mode, but it still works well with limited extra rounds of
136 * update_writeback_rate().
138 if (atomic_inc_return(&c
->idle_counter
) <
139 atomic_read(&c
->attached_dev_nr
) * 6)
142 if (atomic_read(&c
->at_max_writeback_rate
) != 1)
143 atomic_set(&c
->at_max_writeback_rate
, 1);
145 atomic_long_set(&dc
->writeback_rate
.rate
, INT_MAX
);
147 /* keep writeback_rate_target as existing value */
148 dc
->writeback_rate_proportional
= 0;
149 dc
->writeback_rate_integral_scaled
= 0;
150 dc
->writeback_rate_change
= 0;
153 * Check c->idle_counter and c->at_max_writeback_rate agagain in case
154 * new I/O arrives during before set_at_max_writeback_rate() returns.
155 * Then the writeback rate is set to 1, and its new value should be
156 * decided via __update_writeback_rate().
158 if ((atomic_read(&c
->idle_counter
) <
159 atomic_read(&c
->attached_dev_nr
) * 6) ||
160 !atomic_read(&c
->at_max_writeback_rate
))
166 static void update_writeback_rate(struct work_struct
*work
)
168 struct cached_dev
*dc
= container_of(to_delayed_work(work
),
170 writeback_rate_update
);
171 struct cache_set
*c
= dc
->disk
.c
;
174 * should check BCACHE_DEV_RATE_DW_RUNNING before calling
175 * cancel_delayed_work_sync().
177 set_bit(BCACHE_DEV_RATE_DW_RUNNING
, &dc
->disk
.flags
);
178 /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
182 * CACHE_SET_IO_DISABLE might be set via sysfs interface,
185 if (!test_bit(BCACHE_DEV_WB_RUNNING
, &dc
->disk
.flags
) ||
186 test_bit(CACHE_SET_IO_DISABLE
, &c
->flags
)) {
187 clear_bit(BCACHE_DEV_RATE_DW_RUNNING
, &dc
->disk
.flags
);
188 /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
193 if (atomic_read(&dc
->has_dirty
) && dc
->writeback_percent
) {
195 * If the whole cache set is idle, set_at_max_writeback_rate()
196 * will set writeback rate to a max number. Then it is
197 * unncessary to update writeback rate for an idle cache set
198 * in maximum writeback rate number(s).
200 if (!set_at_max_writeback_rate(c
, dc
)) {
201 down_read(&dc
->writeback_lock
);
202 __update_writeback_rate(dc
);
203 update_gc_after_writeback(c
);
204 up_read(&dc
->writeback_lock
);
210 * CACHE_SET_IO_DISABLE might be set via sysfs interface,
213 if (test_bit(BCACHE_DEV_WB_RUNNING
, &dc
->disk
.flags
) &&
214 !test_bit(CACHE_SET_IO_DISABLE
, &c
->flags
)) {
215 schedule_delayed_work(&dc
->writeback_rate_update
,
216 dc
->writeback_rate_update_seconds
* HZ
);
220 * should check BCACHE_DEV_RATE_DW_RUNNING before calling
221 * cancel_delayed_work_sync().
223 clear_bit(BCACHE_DEV_RATE_DW_RUNNING
, &dc
->disk
.flags
);
224 /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
228 static unsigned int writeback_delay(struct cached_dev
*dc
,
229 unsigned int sectors
)
231 if (test_bit(BCACHE_DEV_DETACHING
, &dc
->disk
.flags
) ||
232 !dc
->writeback_percent
)
235 return bch_next_delay(&dc
->writeback_rate
, sectors
);
240 struct cached_dev
*dc
;
245 static void dirty_init(struct keybuf_key
*w
)
247 struct dirty_io
*io
= w
->private;
248 struct bio
*bio
= &io
->bio
;
250 bio_init(bio
, bio
->bi_inline_vecs
,
251 DIV_ROUND_UP(KEY_SIZE(&w
->key
), PAGE_SECTORS
));
252 if (!io
->dc
->writeback_percent
)
253 bio_set_prio(bio
, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE
, 0));
255 bio
->bi_iter
.bi_size
= KEY_SIZE(&w
->key
) << 9;
257 bch_bio_map(bio
, NULL
);
260 static void dirty_io_destructor(struct closure
*cl
)
262 struct dirty_io
*io
= container_of(cl
, struct dirty_io
, cl
);
267 static void write_dirty_finish(struct closure
*cl
)
269 struct dirty_io
*io
= container_of(cl
, struct dirty_io
, cl
);
270 struct keybuf_key
*w
= io
->bio
.bi_private
;
271 struct cached_dev
*dc
= io
->dc
;
273 bio_free_pages(&io
->bio
);
275 /* This is kind of a dumb way of signalling errors. */
276 if (KEY_DIRTY(&w
->key
)) {
281 bch_keylist_init(&keys
);
283 bkey_copy(keys
.top
, &w
->key
);
284 SET_KEY_DIRTY(keys
.top
, false);
285 bch_keylist_push(&keys
);
287 for (i
= 0; i
< KEY_PTRS(&w
->key
); i
++)
288 atomic_inc(&PTR_BUCKET(dc
->disk
.c
, &w
->key
, i
)->pin
);
290 ret
= bch_btree_insert(dc
->disk
.c
, &keys
, NULL
, &w
->key
);
293 trace_bcache_writeback_collision(&w
->key
);
296 ? &dc
->disk
.c
->writeback_keys_failed
297 : &dc
->disk
.c
->writeback_keys_done
);
300 bch_keybuf_del(&dc
->writeback_keys
, w
);
303 closure_return_with_destructor(cl
, dirty_io_destructor
);
306 static void dirty_endio(struct bio
*bio
)
308 struct keybuf_key
*w
= bio
->bi_private
;
309 struct dirty_io
*io
= w
->private;
311 if (bio
->bi_status
) {
312 SET_KEY_DIRTY(&w
->key
, false);
313 bch_count_backing_io_errors(io
->dc
, bio
);
316 closure_put(&io
->cl
);
319 static void write_dirty(struct closure
*cl
)
321 struct dirty_io
*io
= container_of(cl
, struct dirty_io
, cl
);
322 struct keybuf_key
*w
= io
->bio
.bi_private
;
323 struct cached_dev
*dc
= io
->dc
;
325 uint16_t next_sequence
;
327 if (atomic_read(&dc
->writeback_sequence_next
) != io
->sequence
) {
328 /* Not our turn to write; wait for a write to complete */
329 closure_wait(&dc
->writeback_ordering_wait
, cl
);
331 if (atomic_read(&dc
->writeback_sequence_next
) == io
->sequence
) {
333 * Edge case-- it happened in indeterminate order
334 * relative to when we were added to wait list..
336 closure_wake_up(&dc
->writeback_ordering_wait
);
339 continue_at(cl
, write_dirty
, io
->dc
->writeback_write_wq
);
343 next_sequence
= io
->sequence
+ 1;
346 * IO errors are signalled using the dirty bit on the key.
347 * If we failed to read, we should not attempt to write to the
348 * backing device. Instead, immediately go to write_dirty_finish
351 if (KEY_DIRTY(&w
->key
)) {
353 bio_set_op_attrs(&io
->bio
, REQ_OP_WRITE
, 0);
354 io
->bio
.bi_iter
.bi_sector
= KEY_START(&w
->key
);
355 bio_set_dev(&io
->bio
, io
->dc
->bdev
);
356 io
->bio
.bi_end_io
= dirty_endio
;
358 /* I/O request sent to backing device */
359 closure_bio_submit(io
->dc
->disk
.c
, &io
->bio
, cl
);
362 atomic_set(&dc
->writeback_sequence_next
, next_sequence
);
363 closure_wake_up(&dc
->writeback_ordering_wait
);
365 continue_at(cl
, write_dirty_finish
, io
->dc
->writeback_write_wq
);
368 static void read_dirty_endio(struct bio
*bio
)
370 struct keybuf_key
*w
= bio
->bi_private
;
371 struct dirty_io
*io
= w
->private;
374 bch_count_io_errors(PTR_CACHE(io
->dc
->disk
.c
, &w
->key
, 0),
376 "reading dirty data from cache");
381 static void read_dirty_submit(struct closure
*cl
)
383 struct dirty_io
*io
= container_of(cl
, struct dirty_io
, cl
);
385 closure_bio_submit(io
->dc
->disk
.c
, &io
->bio
, cl
);
387 continue_at(cl
, write_dirty
, io
->dc
->writeback_write_wq
);
390 static void read_dirty(struct cached_dev
*dc
)
392 unsigned int delay
= 0;
393 struct keybuf_key
*next
, *keys
[MAX_WRITEBACKS_IN_PASS
], *w
;
398 uint16_t sequence
= 0;
400 BUG_ON(!llist_empty(&dc
->writeback_ordering_wait
.list
));
401 atomic_set(&dc
->writeback_sequence_next
, sequence
);
402 closure_init_stack(&cl
);
405 * XXX: if we error, background writeback just spins. Should use some
409 next
= bch_keybuf_next(&dc
->writeback_keys
);
411 while (!kthread_should_stop() &&
412 !test_bit(CACHE_SET_IO_DISABLE
, &dc
->disk
.c
->flags
) &&
418 BUG_ON(ptr_stale(dc
->disk
.c
, &next
->key
, 0));
421 * Don't combine too many operations, even if they
424 if (nk
>= MAX_WRITEBACKS_IN_PASS
)
428 * If the current operation is very large, don't
429 * further combine operations.
431 if (size
>= MAX_WRITESIZE_IN_PASS
)
435 * Operations are only eligible to be combined
436 * if they are contiguous.
438 * TODO: add a heuristic willing to fire a
439 * certain amount of non-contiguous IO per pass,
440 * so that we can benefit from backing device
443 if ((nk
!= 0) && bkey_cmp(&keys
[nk
-1]->key
,
444 &START_KEY(&next
->key
)))
447 size
+= KEY_SIZE(&next
->key
);
449 } while ((next
= bch_keybuf_next(&dc
->writeback_keys
)));
451 /* Now we have gathered a set of 1..5 keys to write back. */
452 for (i
= 0; i
< nk
; i
++) {
455 io
= kzalloc(sizeof(struct dirty_io
) +
456 sizeof(struct bio_vec
) *
457 DIV_ROUND_UP(KEY_SIZE(&w
->key
),
465 io
->sequence
= sequence
++;
468 bio_set_op_attrs(&io
->bio
, REQ_OP_READ
, 0);
469 io
->bio
.bi_iter
.bi_sector
= PTR_OFFSET(&w
->key
, 0);
470 bio_set_dev(&io
->bio
,
471 PTR_CACHE(dc
->disk
.c
, &w
->key
, 0)->bdev
);
472 io
->bio
.bi_end_io
= read_dirty_endio
;
474 if (bch_bio_alloc_pages(&io
->bio
, GFP_KERNEL
))
477 trace_bcache_writeback(&w
->key
);
479 down(&dc
->in_flight
);
482 * We've acquired a semaphore for the maximum
483 * simultaneous number of writebacks; from here
484 * everything happens asynchronously.
486 closure_call(&io
->cl
, read_dirty_submit
, NULL
, &cl
);
489 delay
= writeback_delay(dc
, size
);
491 while (!kthread_should_stop() &&
492 !test_bit(CACHE_SET_IO_DISABLE
, &dc
->disk
.c
->flags
) &&
494 schedule_timeout_interruptible(delay
);
495 delay
= writeback_delay(dc
, 0);
503 bch_keybuf_del(&dc
->writeback_keys
, w
);
507 * Wait for outstanding writeback IOs to finish (and keybuf slots to be
508 * freed) before refilling again
513 /* Scan for dirty data */
515 void bcache_dev_sectors_dirty_add(struct cache_set
*c
, unsigned int inode
,
516 uint64_t offset
, int nr_sectors
)
518 struct bcache_device
*d
= c
->devices
[inode
];
519 unsigned int stripe_offset
, stripe
, sectors_dirty
;
524 if (UUID_FLASH_ONLY(&c
->uuids
[inode
]))
525 atomic_long_add(nr_sectors
, &c
->flash_dev_dirty_sectors
);
527 stripe
= offset_to_stripe(d
, offset
);
528 stripe_offset
= offset
& (d
->stripe_size
- 1);
531 int s
= min_t(unsigned int, abs(nr_sectors
),
532 d
->stripe_size
- stripe_offset
);
537 if (stripe
>= d
->nr_stripes
)
540 sectors_dirty
= atomic_add_return(s
,
541 d
->stripe_sectors_dirty
+ stripe
);
542 if (sectors_dirty
== d
->stripe_size
)
543 set_bit(stripe
, d
->full_dirty_stripes
);
545 clear_bit(stripe
, d
->full_dirty_stripes
);
553 static bool dirty_pred(struct keybuf
*buf
, struct bkey
*k
)
555 struct cached_dev
*dc
= container_of(buf
,
559 BUG_ON(KEY_INODE(k
) != dc
->disk
.id
);
564 static void refill_full_stripes(struct cached_dev
*dc
)
566 struct keybuf
*buf
= &dc
->writeback_keys
;
567 unsigned int start_stripe
, stripe
, next_stripe
;
568 bool wrapped
= false;
570 stripe
= offset_to_stripe(&dc
->disk
, KEY_OFFSET(&buf
->last_scanned
));
572 if (stripe
>= dc
->disk
.nr_stripes
)
575 start_stripe
= stripe
;
578 stripe
= find_next_bit(dc
->disk
.full_dirty_stripes
,
579 dc
->disk
.nr_stripes
, stripe
);
581 if (stripe
== dc
->disk
.nr_stripes
)
584 next_stripe
= find_next_zero_bit(dc
->disk
.full_dirty_stripes
,
585 dc
->disk
.nr_stripes
, stripe
);
587 buf
->last_scanned
= KEY(dc
->disk
.id
,
588 stripe
* dc
->disk
.stripe_size
, 0);
590 bch_refill_keybuf(dc
->disk
.c
, buf
,
592 next_stripe
* dc
->disk
.stripe_size
, 0),
595 if (array_freelist_empty(&buf
->freelist
))
598 stripe
= next_stripe
;
600 if (wrapped
&& stripe
> start_stripe
)
603 if (stripe
== dc
->disk
.nr_stripes
) {
611 * Returns true if we scanned the entire disk
613 static bool refill_dirty(struct cached_dev
*dc
)
615 struct keybuf
*buf
= &dc
->writeback_keys
;
616 struct bkey start
= KEY(dc
->disk
.id
, 0, 0);
617 struct bkey end
= KEY(dc
->disk
.id
, MAX_KEY_OFFSET
, 0);
618 struct bkey start_pos
;
621 * make sure keybuf pos is inside the range for this disk - at bringup
622 * we might not be attached yet so this disk's inode nr isn't
625 if (bkey_cmp(&buf
->last_scanned
, &start
) < 0 ||
626 bkey_cmp(&buf
->last_scanned
, &end
) > 0)
627 buf
->last_scanned
= start
;
629 if (dc
->partial_stripes_expensive
) {
630 refill_full_stripes(dc
);
631 if (array_freelist_empty(&buf
->freelist
))
635 start_pos
= buf
->last_scanned
;
636 bch_refill_keybuf(dc
->disk
.c
, buf
, &end
, dirty_pred
);
638 if (bkey_cmp(&buf
->last_scanned
, &end
) < 0)
642 * If we get to the end start scanning again from the beginning, and
643 * only scan up to where we initially started scanning from:
645 buf
->last_scanned
= start
;
646 bch_refill_keybuf(dc
->disk
.c
, buf
, &start_pos
, dirty_pred
);
648 return bkey_cmp(&buf
->last_scanned
, &start_pos
) >= 0;
651 static int bch_writeback_thread(void *arg
)
653 struct cached_dev
*dc
= arg
;
654 struct cache_set
*c
= dc
->disk
.c
;
655 bool searched_full_index
;
657 bch_ratelimit_reset(&dc
->writeback_rate
);
659 while (!kthread_should_stop() &&
660 !test_bit(CACHE_SET_IO_DISABLE
, &c
->flags
)) {
661 down_write(&dc
->writeback_lock
);
662 set_current_state(TASK_INTERRUPTIBLE
);
664 * If the bache device is detaching, skip here and continue
665 * to perform writeback. Otherwise, if no dirty data on cache,
666 * or there is dirty data on cache but writeback is disabled,
667 * the writeback thread should sleep here and wait for others
670 if (!test_bit(BCACHE_DEV_DETACHING
, &dc
->disk
.flags
) &&
671 (!atomic_read(&dc
->has_dirty
) || !dc
->writeback_running
)) {
672 up_write(&dc
->writeback_lock
);
674 if (kthread_should_stop() ||
675 test_bit(CACHE_SET_IO_DISABLE
, &c
->flags
)) {
676 set_current_state(TASK_RUNNING
);
683 set_current_state(TASK_RUNNING
);
685 searched_full_index
= refill_dirty(dc
);
687 if (searched_full_index
&&
688 RB_EMPTY_ROOT(&dc
->writeback_keys
.keys
)) {
689 atomic_set(&dc
->has_dirty
, 0);
690 SET_BDEV_STATE(&dc
->sb
, BDEV_STATE_CLEAN
);
691 bch_write_bdev_super(dc
, NULL
);
693 * If bcache device is detaching via sysfs interface,
694 * writeback thread should stop after there is no dirty
695 * data on cache. BCACHE_DEV_DETACHING flag is set in
696 * bch_cached_dev_detach().
698 if (test_bit(BCACHE_DEV_DETACHING
, &dc
->disk
.flags
)) {
699 up_write(&dc
->writeback_lock
);
704 * When dirty data rate is high (e.g. 50%+), there might
705 * be heavy buckets fragmentation after writeback
706 * finished, which hurts following write performance.
707 * If users really care about write performance they
708 * may set BCH_ENABLE_AUTO_GC via sysfs, then when
709 * BCH_DO_AUTO_GC is set, garbage collection thread
710 * will be wake up here. After moving gc, the shrunk
711 * btree and discarded free buckets SSD space may be
712 * helpful for following write requests.
714 if (c
->gc_after_writeback
==
715 (BCH_ENABLE_AUTO_GC
|BCH_DO_AUTO_GC
)) {
716 c
->gc_after_writeback
&= ~BCH_DO_AUTO_GC
;
721 up_write(&dc
->writeback_lock
);
725 if (searched_full_index
) {
726 unsigned int delay
= dc
->writeback_delay
* HZ
;
729 !kthread_should_stop() &&
730 !test_bit(CACHE_SET_IO_DISABLE
, &c
->flags
) &&
731 !test_bit(BCACHE_DEV_DETACHING
, &dc
->disk
.flags
))
732 delay
= schedule_timeout_interruptible(delay
);
734 bch_ratelimit_reset(&dc
->writeback_rate
);
738 if (dc
->writeback_write_wq
) {
739 flush_workqueue(dc
->writeback_write_wq
);
740 destroy_workqueue(dc
->writeback_write_wq
);
743 wait_for_kthread_stop();
749 #define INIT_KEYS_EACH_TIME 500000
750 #define INIT_KEYS_SLEEP_MS 100
752 struct sectors_dirty_init
{
759 static int sectors_dirty_init_fn(struct btree_op
*_op
, struct btree
*b
,
762 struct sectors_dirty_init
*op
= container_of(_op
,
763 struct sectors_dirty_init
, op
);
764 if (KEY_INODE(k
) > op
->inode
)
768 bcache_dev_sectors_dirty_add(b
->c
, KEY_INODE(k
),
769 KEY_START(k
), KEY_SIZE(k
));
772 if (atomic_read(&b
->c
->search_inflight
) &&
773 !(op
->count
% INIT_KEYS_EACH_TIME
)) {
774 bkey_copy_key(&op
->start
, k
);
781 void bch_sectors_dirty_init(struct bcache_device
*d
)
783 struct sectors_dirty_init op
;
786 bch_btree_op_init(&op
.op
, -1);
789 op
.start
= KEY(op
.inode
, 0, 0);
792 ret
= bch_btree_map_keys(&op
.op
, d
->c
, &op
.start
,
793 sectors_dirty_init_fn
, 0);
795 schedule_timeout_interruptible(
796 msecs_to_jiffies(INIT_KEYS_SLEEP_MS
));
798 pr_warn("sectors dirty init failed, ret=%d!", ret
);
801 } while (ret
== -EAGAIN
);
804 void bch_cached_dev_writeback_init(struct cached_dev
*dc
)
806 sema_init(&dc
->in_flight
, 64);
807 init_rwsem(&dc
->writeback_lock
);
808 bch_keybuf_init(&dc
->writeback_keys
);
810 dc
->writeback_metadata
= true;
811 dc
->writeback_running
= false;
812 dc
->writeback_percent
= 10;
813 dc
->writeback_delay
= 30;
814 atomic_long_set(&dc
->writeback_rate
.rate
, 1024);
815 dc
->writeback_rate_minimum
= 8;
817 dc
->writeback_rate_update_seconds
= WRITEBACK_RATE_UPDATE_SECS_DEFAULT
;
818 dc
->writeback_rate_p_term_inverse
= 40;
819 dc
->writeback_rate_i_term_inverse
= 10000;
821 WARN_ON(test_and_clear_bit(BCACHE_DEV_WB_RUNNING
, &dc
->disk
.flags
));
822 INIT_DELAYED_WORK(&dc
->writeback_rate_update
, update_writeback_rate
);
825 int bch_cached_dev_writeback_start(struct cached_dev
*dc
)
827 dc
->writeback_write_wq
= alloc_workqueue("bcache_writeback_wq",
829 if (!dc
->writeback_write_wq
)
833 dc
->writeback_thread
= kthread_create(bch_writeback_thread
, dc
,
835 if (IS_ERR(dc
->writeback_thread
)) {
837 return PTR_ERR(dc
->writeback_thread
);
839 dc
->writeback_running
= true;
841 WARN_ON(test_and_set_bit(BCACHE_DEV_WB_RUNNING
, &dc
->disk
.flags
));
842 schedule_delayed_work(&dc
->writeback_rate_update
,
843 dc
->writeback_rate_update_seconds
* HZ
);
845 bch_writeback_queue(dc
);