]> git.ipfire.org Git - people/ms/linux.git/blobdiff - fs/btrfs/compression.c
Merge branch 'for-6.0/dax' into libnvdimm-fixes
[people/ms/linux.git] / fs / btrfs / compression.c
index f4564f32f6d93b61b7952e13dbf2a071c0dadf46..e84d22c5c6a833fa111e77ac6057513f0f243bd7 100644 (file)
@@ -136,109 +136,14 @@ static int compression_decompress(int type, struct list_head *ws,
 
 static int btrfs_decompress_bio(struct compressed_bio *cb);
 
-static inline int compressed_bio_size(struct btrfs_fs_info *fs_info,
-                                     unsigned long disk_size)
-{
-       return sizeof(struct compressed_bio) +
-               (DIV_ROUND_UP(disk_size, fs_info->sectorsize)) * fs_info->csum_size;
-}
-
-static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio,
-                                u64 disk_start)
-{
-       struct btrfs_fs_info *fs_info = inode->root->fs_info;
-       SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
-       const u32 csum_size = fs_info->csum_size;
-       const u32 sectorsize = fs_info->sectorsize;
-       struct page *page;
-       unsigned int i;
-       char *kaddr;
-       u8 csum[BTRFS_CSUM_SIZE];
-       struct compressed_bio *cb = bio->bi_private;
-       u8 *cb_sum = cb->sums;
-
-       if ((inode->flags & BTRFS_INODE_NODATASUM) ||
-           test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state))
-               return 0;
-
-       shash->tfm = fs_info->csum_shash;
-
-       for (i = 0; i < cb->nr_pages; i++) {
-               u32 pg_offset;
-               u32 bytes_left = PAGE_SIZE;
-               page = cb->compressed_pages[i];
-
-               /* Determine the remaining bytes inside the page first */
-               if (i == cb->nr_pages - 1)
-                       bytes_left = cb->compressed_len - i * PAGE_SIZE;
-
-               /* Hash through the page sector by sector */
-               for (pg_offset = 0; pg_offset < bytes_left;
-                    pg_offset += sectorsize) {
-                       kaddr = kmap_atomic(page);
-                       crypto_shash_digest(shash, kaddr + pg_offset,
-                                           sectorsize, csum);
-                       kunmap_atomic(kaddr);
-
-                       if (memcmp(&csum, cb_sum, csum_size) != 0) {
-                               btrfs_print_data_csum_error(inode, disk_start,
-                                               csum, cb_sum, cb->mirror_num);
-                               if (btrfs_bio(bio)->device)
-                                       btrfs_dev_stat_inc_and_print(
-                                               btrfs_bio(bio)->device,
-                                               BTRFS_DEV_STAT_CORRUPTION_ERRS);
-                               return -EIO;
-                       }
-                       cb_sum += csum_size;
-                       disk_start += sectorsize;
-               }
-       }
-       return 0;
-}
-
-/*
- * Reduce bio and io accounting for a compressed_bio with its corresponding bio.
- *
- * Return true if there is no pending bio nor io.
- * Return false otherwise.
- */
-static bool dec_and_test_compressed_bio(struct compressed_bio *cb, struct bio *bio)
-{
-       struct btrfs_fs_info *fs_info = btrfs_sb(cb->inode->i_sb);
-       unsigned int bi_size = 0;
-       bool last_io = false;
-       struct bio_vec *bvec;
-       struct bvec_iter_all iter_all;
-
-       /*
-        * At endio time, bi_iter.bi_size doesn't represent the real bio size.
-        * Thus here we have to iterate through all segments to grab correct
-        * bio size.
-        */
-       bio_for_each_segment_all(bvec, bio, iter_all)
-               bi_size += bvec->bv_len;
-
-       if (bio->bi_status)
-               cb->status = bio->bi_status;
-
-       ASSERT(bi_size && bi_size <= cb->compressed_len);
-       last_io = refcount_sub_and_test(bi_size >> fs_info->sectorsize_bits,
-                                       &cb->pending_sectors);
-       /*
-        * Here we must wake up the possible error handler after all other
-        * operations on @cb finished, or we can race with
-        * finish_compressed_bio_*() which may free @cb.
-        */
-       wake_up_var(cb);
-
-       return last_io;
-}
-
 static void finish_compressed_bio_read(struct compressed_bio *cb)
 {
        unsigned int index;
        struct page *page;
 
+       if (cb->status == BLK_STS_OK)
+               cb->status = errno_to_blk_status(btrfs_decompress_bio(cb));
+
        /* Release the compressed pages */
        for (index = 0; index < cb->nr_pages; index++) {
                page = cb->compressed_pages[index];
@@ -247,85 +152,63 @@ static void finish_compressed_bio_read(struct compressed_bio *cb)
        }
 
        /* Do io completion on the original bio */
-       if (cb->status != BLK_STS_OK) {
+       if (cb->status != BLK_STS_OK)
                cb->orig_bio->bi_status = cb->status;
-               bio_endio(cb->orig_bio);
-       } else {
-               struct bio_vec *bvec;
-               struct bvec_iter_all iter_all;
-
-               /*
-                * We have verified the checksum already, set page checked so
-                * the end_io handlers know about it
-                */
-               ASSERT(!bio_flagged(cb->orig_bio, BIO_CLONED));
-               bio_for_each_segment_all(bvec, cb->orig_bio, iter_all) {
-                       u64 bvec_start = page_offset(bvec->bv_page) +
-                                        bvec->bv_offset;
-
-                       btrfs_page_set_checked(btrfs_sb(cb->inode->i_sb),
-                                       bvec->bv_page, bvec_start,
-                                       bvec->bv_len);
-               }
-
-               bio_endio(cb->orig_bio);
-       }
+       bio_endio(cb->orig_bio);
 
        /* Finally free the cb struct */
        kfree(cb->compressed_pages);
        kfree(cb);
 }
 
-/* when we finish reading compressed pages from the disk, we
- * decompress them and then run the bio end_io routines on the
- * decompressed pages (in the inode address space).
- *
- * This allows the checksumming and other IO error handling routines
- * to work normally
- *
- * The compressed pages are freed here, and it must be run
- * in process context
+/*
+ * Verify the checksums and kick off repair if needed on the uncompressed data
+ * before decompressing it into the original bio and freeing the uncompressed
+ * pages.
  */
 static void end_compressed_bio_read(struct bio *bio)
 {
        struct compressed_bio *cb = bio->bi_private;
-       struct inode *inode;
-       unsigned int mirror = btrfs_bio(bio)->mirror_num;
-       int ret = 0;
-
-       if (!dec_and_test_compressed_bio(cb, bio))
-               goto out;
-
-       /*
-        * Record the correct mirror_num in cb->orig_bio so that
-        * read-repair can work properly.
-        */
-       btrfs_bio(cb->orig_bio)->mirror_num = mirror;
-       cb->mirror_num = mirror;
-
-       /*
-        * Some IO in this cb have failed, just skip checksum as there
-        * is no way it could be correct.
-        */
-       if (cb->status != BLK_STS_OK)
-               goto csum_failed;
-
-       inode = cb->inode;
-       ret = check_compressed_csum(BTRFS_I(inode), bio,
-                                   bio->bi_iter.bi_sector << 9);
-       if (ret)
-               goto csum_failed;
+       struct inode *inode = cb->inode;
+       struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+       struct btrfs_inode *bi = BTRFS_I(inode);
+       bool csum = !(bi->flags & BTRFS_INODE_NODATASUM) &&
+                   !test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state);
+       blk_status_t status = bio->bi_status;
+       struct btrfs_bio *bbio = btrfs_bio(bio);
+       struct bvec_iter iter;
+       struct bio_vec bv;
+       u32 offset;
+
+       btrfs_bio_for_each_sector(fs_info, bv, bbio, iter, offset) {
+               u64 start = bbio->file_offset + offset;
+
+               if (!status &&
+                   (!csum || !btrfs_check_data_csum(inode, bbio, offset,
+                                                    bv.bv_page, bv.bv_offset))) {
+                       clean_io_failure(fs_info, &bi->io_failure_tree,
+                                        &bi->io_tree, start, bv.bv_page,
+                                        btrfs_ino(bi), bv.bv_offset);
+               } else {
+                       int ret;
+
+                       refcount_inc(&cb->pending_ios);
+                       ret = btrfs_repair_one_sector(inode, bbio, offset,
+                                                     bv.bv_page, bv.bv_offset,
+                                                     btrfs_submit_data_read_bio);
+                       if (ret) {
+                               refcount_dec(&cb->pending_ios);
+                               status = errno_to_blk_status(ret);
+                       }
+               }
+       }
 
-       /* ok, we're the last bio for this extent, lets start
-        * the decompression.
-        */
-       ret = btrfs_decompress_bio(cb);
+       if (status)
+               cb->status = status;
 
-csum_failed:
-       if (ret)
-               cb->status = errno_to_blk_status(ret);
-       finish_compressed_bio_read(cb);
-out:
+       if (refcount_dec_and_test(&cb->pending_ios))
+               finish_compressed_bio_read(cb);
+       btrfs_bio_free_csum(bbio);
        bio_put(bio);
 }
 
@@ -403,6 +286,14 @@ static void finish_compressed_bio_write(struct compressed_bio *cb)
        kfree(cb);
 }
 
+static void btrfs_finish_compressed_write_work(struct work_struct *work)
+{
+       struct compressed_bio *cb =
+               container_of(work, struct compressed_bio, write_end_work);
+
+       finish_compressed_bio_write(cb);
+}
+
 /*
  * Do the cleanup once all the compressed pages hit the disk.  This will clear
  * writeback on the file pages and free the compressed pages.
@@ -414,29 +305,18 @@ static void end_compressed_bio_write(struct bio *bio)
 {
        struct compressed_bio *cb = bio->bi_private;
 
-       if (!dec_and_test_compressed_bio(cb, bio))
-               goto out;
+       if (bio->bi_status)
+               cb->status = bio->bi_status;
 
-       btrfs_record_physical_zoned(cb->inode, cb->start, bio);
+       if (refcount_dec_and_test(&cb->pending_ios)) {
+               struct btrfs_fs_info *fs_info = btrfs_sb(cb->inode->i_sb);
 
-       finish_compressed_bio_write(cb);
-out:
+               btrfs_record_physical_zoned(cb->inode, cb->start, bio);
+               queue_work(fs_info->compressed_write_workers, &cb->write_end_work);
+       }
        bio_put(bio);
 }
 
-static blk_status_t submit_compressed_bio(struct btrfs_fs_info *fs_info,
-                                         struct bio *bio, int mirror_num)
-{
-       blk_status_t ret;
-
-       ASSERT(bio->bi_iter.bi_size);
-       ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA);
-       if (ret)
-               return ret;
-       ret = btrfs_map_bio(fs_info, bio, mirror_num);
-       return ret;
-}
-
 /*
  * Allocate a compressed_bio, which will be used to read/write on-disk
  * (aka, compressed) * data.
@@ -455,7 +335,7 @@ static blk_status_t submit_compressed_bio(struct btrfs_fs_info *fs_info,
 
 
 static struct bio *alloc_compressed_bio(struct compressed_bio *cb, u64 disk_bytenr,
-                                       unsigned int opf, bio_end_io_t endio_func,
+                                       blk_opf_t opf, bio_end_io_t endio_func,
                                        u64 *next_stripe_start)
 {
        struct btrfs_fs_info *fs_info = btrfs_sb(cb->inode->i_sb);
@@ -487,7 +367,7 @@ static struct bio *alloc_compressed_bio(struct compressed_bio *cb, u64 disk_byte
                return ERR_PTR(ret);
        }
        *next_stripe_start = disk_bytenr + geom.len;
-
+       refcount_inc(&cb->pending_ios);
        return bio;
 }
 
@@ -505,7 +385,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
                                 unsigned int compressed_len,
                                 struct page **compressed_pages,
                                 unsigned int nr_pages,
-                                unsigned int write_flags,
+                                blk_opf_t write_flags,
                                 struct cgroup_subsys_state *blkcg_css,
                                 bool writeback)
 {
@@ -514,26 +394,25 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
        struct compressed_bio *cb;
        u64 cur_disk_bytenr = disk_start;
        u64 next_stripe_start;
-       blk_status_t ret;
+       blk_status_t ret = BLK_STS_OK;
        int skip_sum = inode->flags & BTRFS_INODE_NODATASUM;
        const bool use_append = btrfs_use_zone_append(inode, disk_start);
-       const unsigned int bio_op = use_append ? REQ_OP_ZONE_APPEND : REQ_OP_WRITE;
+       const enum req_op bio_op = use_append ? REQ_OP_ZONE_APPEND : REQ_OP_WRITE;
 
        ASSERT(IS_ALIGNED(start, fs_info->sectorsize) &&
               IS_ALIGNED(len, fs_info->sectorsize));
-       cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
+       cb = kmalloc(sizeof(struct compressed_bio), GFP_NOFS);
        if (!cb)
                return BLK_STS_RESOURCE;
-       refcount_set(&cb->pending_sectors, compressed_len >> fs_info->sectorsize_bits);
+       refcount_set(&cb->pending_ios, 1);
        cb->status = BLK_STS_OK;
        cb->inode = &inode->vfs_inode;
        cb->start = start;
        cb->len = len;
-       cb->mirror_num = 0;
        cb->compressed_pages = compressed_pages;
        cb->compressed_len = compressed_len;
        cb->writeback = writeback;
-       cb->orig_bio = NULL;
+       INIT_WORK(&cb->write_end_work, btrfs_finish_compressed_write_work);
        cb->nr_pages = nr_pages;
 
        if (blkcg_css)
@@ -554,8 +433,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
                                &next_stripe_start);
                        if (IS_ERR(bio)) {
                                ret = errno_to_blk_status(PTR_ERR(bio));
-                               bio = NULL;
-                               goto finish_cb;
+                               break;
                        }
                        if (blkcg_css)
                                bio->bi_opf |= REQ_CGROUP_PUNT;
@@ -599,44 +477,25 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
                if (submit) {
                        if (!skip_sum) {
                                ret = btrfs_csum_one_bio(inode, bio, start, true);
-                               if (ret)
-                                       goto finish_cb;
+                               if (ret) {
+                                       bio->bi_status = ret;
+                                       bio_endio(bio);
+                                       break;
+                               }
                        }
 
-                       ret = submit_compressed_bio(fs_info, bio, 0);
-                       if (ret)
-                               goto finish_cb;
+                       ASSERT(bio->bi_iter.bi_size);
+                       btrfs_submit_bio(fs_info, bio, 0);
                        bio = NULL;
                }
                cond_resched();
        }
-       if (blkcg_css)
-               kthread_associate_blkcg(NULL);
-
-       return 0;
 
-finish_cb:
        if (blkcg_css)
                kthread_associate_blkcg(NULL);
 
-       if (bio) {
-               bio->bi_status = ret;
-               bio_endio(bio);
-       }
-       /* Last byte of @cb is submitted, endio will free @cb */
-       if (cur_disk_bytenr == disk_start + compressed_len)
-               return ret;
-
-       wait_var_event(cb, refcount_read(&cb->pending_sectors) ==
-                          (disk_start + compressed_len - cur_disk_bytenr) >>
-                          fs_info->sectorsize_bits);
-       /*
-        * Even with previous bio ended, we should still have io not yet
-        * submitted, thus need to finish manually.
-        */
-       ASSERT(refcount_read(&cb->pending_sectors));
-       /* Now we are the only one referring @cb, can finish it safely. */
-       finish_compressed_bio_write(cb);
+       if (refcount_dec_and_test(&cb->pending_ios))
+               finish_compressed_bio_write(cb);
        return ret;
 }
 
@@ -765,7 +624,6 @@ static noinline int add_ra_bio_pages(struct inode *inode,
                                int zeros;
                                zeros = PAGE_SIZE - zero_offset;
                                memzero_page(page, zero_offset, zeros);
-                               flush_dcache_page(page);
                        }
                }
 
@@ -819,7 +677,6 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
        blk_status_t ret;
        int ret2;
        int i;
-       u8 *sums;
 
        em_tree = &BTRFS_I(inode)->extent_tree;
 
@@ -837,17 +694,15 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 
        ASSERT(em->compress_type != BTRFS_COMPRESS_NONE);
        compressed_len = em->block_len;
-       cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
+       cb = kmalloc(sizeof(struct compressed_bio), GFP_NOFS);
        if (!cb) {
                ret = BLK_STS_RESOURCE;
                goto out;
        }
 
-       refcount_set(&cb->pending_sectors, compressed_len >> fs_info->sectorsize_bits);
+       refcount_set(&cb->pending_ios, 1);
        cb->status = BLK_STS_OK;
        cb->inode = inode;
-       cb->mirror_num = mirror_num;
-       sums = cb->sums;
 
        cb->start = em->orig_start;
        em_len = em->len;
@@ -893,9 +748,8 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
                                        REQ_OP_READ, end_compressed_bio_read,
                                        &next_stripe_start);
                        if (IS_ERR(comp_bio)) {
-                               ret = errno_to_blk_status(PTR_ERR(comp_bio));
-                               comp_bio = NULL;
-                               goto finish_cb;
+                               cb->status = errno_to_blk_status(PTR_ERR(comp_bio));
+                               break;
                        }
                }
                /*
@@ -931,22 +785,33 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
                        submit = true;
 
                if (submit) {
-                       unsigned int nr_sectors;
+                       /* Save the original iter for read repair */
+                       if (bio_op(comp_bio) == REQ_OP_READ)
+                               btrfs_bio(comp_bio)->iter = comp_bio->bi_iter;
 
-                       ret = btrfs_lookup_bio_sums(inode, comp_bio, sums);
-                       if (ret)
-                               goto finish_cb;
+                       /*
+                        * Save the initial offset of this chunk, as there
+                        * is no direct correlation between compressed pages and
+                        * the original file offset.  The field is only used for
+                        * priting error messages.
+                        */
+                       btrfs_bio(comp_bio)->file_offset = file_offset;
 
-                       nr_sectors = DIV_ROUND_UP(comp_bio->bi_iter.bi_size,
-                                                 fs_info->sectorsize);
-                       sums += fs_info->csum_size * nr_sectors;
+                       ret = btrfs_lookup_bio_sums(inode, comp_bio, NULL);
+                       if (ret) {
+                               comp_bio->bi_status = ret;
+                               bio_endio(comp_bio);
+                               break;
+                       }
 
-                       ret = submit_compressed_bio(fs_info, comp_bio, mirror_num);
-                       if (ret)
-                               goto finish_cb;
+                       ASSERT(comp_bio->bi_iter.bi_size);
+                       btrfs_submit_bio(fs_info, comp_bio, mirror_num);
                        comp_bio = NULL;
                }
        }
+
+       if (refcount_dec_and_test(&cb->pending_ios))
+               finish_compressed_bio_read(cb);
        return;
 
 fail:
@@ -964,25 +829,6 @@ out:
        bio->bi_status = ret;
        bio_endio(bio);
        return;
-finish_cb:
-       if (comp_bio) {
-               comp_bio->bi_status = ret;
-               bio_endio(comp_bio);
-       }
-       /* All bytes of @cb is submitted, endio will free @cb */
-       if (cur_disk_byte == disk_bytenr + compressed_len)
-               return;
-
-       wait_var_event(cb, refcount_read(&cb->pending_sectors) ==
-                          (disk_bytenr + compressed_len - cur_disk_byte) >>
-                          fs_info->sectorsize_bits);
-       /*
-        * Even with previous bio ended, we should still have io not yet
-        * submitted, thus need to finish @cb manually.
-        */
-       ASSERT(refcount_read(&cb->pending_sectors));
-       /* Now we are the only one referring @cb, can finish it safely. */
-       finish_compressed_bio_read(cb);
 }
 
 /*
@@ -1481,7 +1327,6 @@ int btrfs_decompress_buf2page(const char *buf, u32 buf_len,
                ASSERT(copy_start - decompressed < buf_len);
                memcpy_to_page(bvec.bv_page, bvec.bv_offset,
                               buf + copy_start - decompressed, copy_len);
-               flush_dcache_page(bvec.bv_page);
                cur_offset += copy_len;
 
                bio_advance(orig_bio, copy_len);