#include "subvolume.h"
#include "trace.h"
+#include <linux/ioprio.h>
+
static void bkey_put_dev_refs(struct bch_fs *c, struct bkey_s_c k)
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
return bch2_trans_run(op->c, __bch2_data_update_index_update(trans, op));
}
-void bch2_data_update_read_done(struct data_update *m,
- struct bch_extent_crc_unpacked crc)
+void bch2_data_update_read_done(struct data_update *m)
{
m->read_done = true;
/* write bio must own pages: */
BUG_ON(!m->op.wbio.bio.bi_vcnt);
- m->op.crc = crc;
- m->op.wbio.bio.bi_iter.bi_size = crc.compressed_size << 9;
+ m->op.crc = m->rbio.pick.crc;
+ m->op.wbio.bio.bi_iter.bi_size = m->op.crc.compressed_size << 9;
this_cpu_add(m->op.c->counters[BCH_COUNTER_move_extent_write], m->k.k->k.size);
bch2_bkey_buf_exit(&update->k, c);
bch2_disk_reservation_put(c, &update->op.res);
bch2_bio_free_pages_pool(c, &update->op.wbio.bio);
+ kfree(update->bvecs);
+ update->bvecs = NULL;
}
static int bch2_update_unwritten_extent(struct btree_trans *trans,
goto out;
}
+ /* write path might have to decompress data: */
+ unsigned buf_bytes = 0;
+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
+ buf_bytes = max_t(unsigned, buf_bytes, p.crc.uncompressed_size << 9);
+
+ unsigned nr_vecs = DIV_ROUND_UP(buf_bytes, PAGE_SIZE);
+
+ m->bvecs = kmalloc_array(nr_vecs, sizeof*(m->bvecs), GFP_KERNEL);
+ if (!m->bvecs)
+ goto enomem;
+
+ bio_init(&m->rbio.bio, NULL, m->bvecs, nr_vecs, REQ_OP_READ);
+ bio_init(&m->op.wbio.bio, NULL, m->bvecs, nr_vecs, 0);
+
+ if (bch2_bio_alloc_pages(&m->op.wbio.bio, buf_bytes, GFP_KERNEL))
+ goto enomem;
+
+ rbio_init(&m->rbio.bio, c, io_opts, NULL);
+ m->rbio.bio.bi_iter.bi_size = buf_bytes;
+ m->rbio.bio.bi_iter.bi_sector = bkey_start_offset(k.k);
+ m->op.wbio.bio.bi_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0);
+
return 0;
+enomem:
+ ret = -ENOMEM;
out:
bch2_data_update_exit(m);
return ret ?: -BCH_ERR_data_update_done;
struct bch_read_bio rbio;
struct bch_write_op op;
- /* Must be last since it is variable size */
- struct bio_vec bi_inline_vecs[];
+ struct bio_vec *bvecs;
};
void bch2_data_update_to_text(struct printbuf *, struct data_update *);
int bch2_data_update_index_update(struct bch_write_op *);
-void bch2_data_update_read_done(struct data_update *,
- struct bch_extent_crc_unpacked);
+void bch2_data_update_read_done(struct data_update *);
int bch2_extent_drop_ptrs(struct btree_trans *,
struct btree_iter *,
return 0;
}
-static void promote_free(struct bch_fs *c, struct promote_op *op)
+static noinline void promote_free(struct bch_read_bio *rbio)
{
- int ret;
+ struct promote_op *op = container_of(rbio, struct promote_op, write.rbio);
+ struct bch_fs *c = rbio->c;
+
+ int ret = rhashtable_remove_fast(&c->promote_table, &op->hash,
+ bch_promote_params);
+ BUG_ON(ret);
bch2_data_update_exit(&op->write);
- ret = rhashtable_remove_fast(&c->promote_table, &op->hash,
- bch_promote_params);
- BUG_ON(ret);
bch2_write_ref_put(c, BCH_WRITE_REF_promote);
kfree_rcu(op, rcu);
}
static void promote_done(struct bch_write_op *wop)
{
- struct promote_op *op =
- container_of(wop, struct promote_op, write.op);
- struct bch_fs *c = op->write.op.c;
+ struct promote_op *op = container_of(wop, struct promote_op, write.op);
+ struct bch_fs *c = op->write.rbio.c;
- bch2_time_stats_update(&c->times[BCH_TIME_data_promote],
- op->start_time);
- promote_free(c, op);
+ bch2_time_stats_update(&c->times[BCH_TIME_data_promote], op->start_time);
+ promote_free(&op->write.rbio);
}
-static void promote_start(struct promote_op *op, struct bch_read_bio *rbio)
+static noinline void promote_start(struct bch_read_bio *rbio)
{
- struct bio *bio = &op->write.op.wbio.bio;
+ struct promote_op *op = container_of(rbio, struct promote_op, write.rbio);
trace_and_count(op->write.op.c, read_promote, &rbio->bio);
- /* we now own pages: */
- BUG_ON(!rbio->bounce);
- BUG_ON(rbio->bio.bi_vcnt > bio->bi_max_vecs);
-
- memcpy(bio->bi_io_vec, rbio->bio.bi_io_vec,
- sizeof(struct bio_vec) * rbio->bio.bi_vcnt);
- swap(bio->bi_vcnt, rbio->bio.bi_vcnt);
-
- bch2_data_update_read_done(&op->write, rbio->pick.crc);
+ bch2_data_update_read_done(&op->write);
}
static struct bch_read_bio *__promote_alloc(struct btree_trans *trans,
struct bch_io_failures *failed)
{
struct bch_fs *c = trans->c;
- struct promote_op *op = NULL;
- struct bio *bio;
- unsigned pages = DIV_ROUND_UP(sectors, PAGE_SECTORS);
int ret;
if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_promote))
return ERR_PTR(-BCH_ERR_nopromote_no_writes);
- op = kzalloc(struct_size(op, bi_inline_vecs, pages), GFP_KERNEL);
+ struct promote_op *op = kzalloc(sizeof(*op), GFP_KERNEL);
if (!op) {
ret = -BCH_ERR_nopromote_enomem;
goto err_put;
op->start_time = local_clock();
op->pos = pos;
- rbio_init_fragment(&op->write.rbio.bio, orig);
- bio_init(&op->write.rbio.bio,
- NULL,
- op->write.bi_inline_vecs,
- pages, 0);
-
- if (bch2_bio_alloc_pages(&op->write.rbio.bio, sectors << 9, GFP_KERNEL)) {
- ret = -BCH_ERR_nopromote_enomem;
- goto err;
- }
-
- op->write.rbio.bounce = true;
- op->write.rbio.promote = true;
-
if (rhashtable_lookup_insert_fast(&c->promote_table, &op->hash,
bch_promote_params)) {
ret = -BCH_ERR_nopromote_in_flight;
goto err;
}
- bio = &op->write.op.wbio.bio;
- bio_init(bio, NULL, bio->bi_inline_vecs, pages, 0);
-
struct data_update_opts update_opts = {};
if (!have_io_error(failed)) {
* possible errors: -BCH_ERR_nocow_lock_blocked,
* -BCH_ERR_ENOSPC_disk_reservation:
*/
- if (ret) {
- BUG_ON(rhashtable_remove_fast(&c->promote_table, &op->hash,
- bch_promote_params));
- goto err;
- }
+ if (ret)
+ goto err_remove_hash;
+ rbio_init_fragment(&op->write.rbio.bio, orig);
+ op->write.rbio.bounce = true;
+ op->write.rbio.promote = true;
op->write.op.end_io = promote_done;
+
return &op->write.rbio;
+err_remove_hash:
+ BUG_ON(rhashtable_remove_fast(&c->promote_table, &op->hash,
+ bch_promote_params));
err:
- bio_free_pages(&op->write.rbio.bio);
+ bio_free_pages(&op->write.op.wbio.bio);
/* We may have added to the rhashtable and thus need rcu freeing: */
kfree_rcu(op, rcu);
err_put:
if (rbio->split) {
struct bch_read_bio *parent = rbio->parent;
- if (rbio->promote) {
- struct promote_op *op = container_of(rbio, struct promote_op, write.rbio);
-
- if (!rbio->bio.bi_status) {
- promote_start(op, rbio);
- } else {
- bch2_bio_free_pages_pool(rbio->c, &rbio->bio);
- promote_free(rbio->c, op);
- }
+ if (unlikely(rbio->promote)) {
+ if (!rbio->bio.bi_status)
+ promote_start(rbio);
+ else
+ promote_free(rbio);
} else {
if (rbio->bounce)
bch2_bio_free_pages_pool(rbio->c, &rbio->bio);
}
if (flags & BCH_READ_data_update) {
+ struct data_update *u = container_of(orig, struct data_update, rbio);
+
/*
* can happen if we retry, and the extent we were going to read
* has been merged in the meantime:
*/
- if (pick.crc.compressed_size > orig->bio.bi_vcnt * PAGE_SECTORS) {
+ if (pick.crc.compressed_size > u->op.wbio.bio.bi_iter.bi_size) {
if (ca)
percpu_ref_put(&ca->io_ref);
goto hole;
atomic_add(io->write_sectors, &io->write.ctxt->write_sectors);
atomic_inc(&io->write.ctxt->write_ios);
- bch2_data_update_read_done(&io->write, io->write.rbio.pick.crc);
+ bch2_data_update_read_done(&io->write);
}
struct moving_io *bch2_moving_ctxt_next_pending_write(struct moving_context *ctxt)
{
struct btree_trans *trans = ctxt->trans;
struct bch_fs *c = trans->c;
- struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
- struct moving_io *io;
- const union bch_extent_entry *entry;
- struct extent_ptr_decoded p;
- unsigned sectors = k.k->size, pages;
int ret = -ENOMEM;
trace_move_extent2(c, k, &io_opts, &data_opts);
*/
bch2_trans_unlock(trans);
- /* write path might have to decompress data: */
- bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
- sectors = max_t(unsigned, sectors, p.crc.uncompressed_size);
-
- pages = DIV_ROUND_UP(sectors, PAGE_SECTORS);
- io = kzalloc(sizeof(struct moving_io) +
- sizeof(struct bio_vec) * pages, GFP_KERNEL);
+ struct moving_io *io = kzalloc(sizeof(struct moving_io), GFP_KERNEL);
if (!io)
goto err;
io->read_sectors = k.k->size;
io->write_sectors = k.k->size;
- bio_init(&io->write.op.wbio.bio, NULL, io->write.bi_inline_vecs, pages, 0);
- io->write.op.wbio.bio.bi_ioprio =
- IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0);
-
- if (bch2_bio_alloc_pages(&io->write.op.wbio.bio, sectors << 9,
- GFP_KERNEL))
- goto err_free;
-
- bio_init(&io->write.rbio.bio, NULL, io->write.bi_inline_vecs, pages, 0);
- io->write.rbio.bio.bi_vcnt = pages;
- io->write.rbio.bio.bi_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0);
- io->write.rbio.bio.bi_iter.bi_size = sectors << 9;
-
- io->write.rbio.bio.bi_opf = REQ_OP_READ;
- io->write.rbio.bio.bi_iter.bi_sector = bkey_start_offset(k.k);
-
- rbio_init(&io->write.rbio.bio,
- c,
- io_opts,
- move_read_endio);
-
ret = bch2_data_update_init(trans, iter, ctxt, &io->write, ctxt->wp,
io_opts, data_opts, iter->btree_id, k);
if (ret)
- goto err_free_pages;
+ goto err_free;
+
+ io->write.rbio.bio.bi_end_io = move_read_endio;
+ io->write.rbio.bio.bi_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0);
io->write.op.end_io = move_write_done;
BCH_READ_data_update|
BCH_READ_last_fragment);
return 0;
-err_free_pages:
- bio_free_pages(&io->write.op.wbio.bio);
err_free:
kfree(io);
err:
if (bch2_err_matches(ret2, BCH_ERR_transaction_restart))
continue;
- if (ret2 == -ENOMEM) {
+ if (bch2_err_matches(ret2, ENOMEM)) {
/* memory allocation failure, wait for some IO to finish */
bch2_move_ctxt_wait_for_io(ctxt);
continue;