btrfs: remove btrfs_end_io_wq

[people/ms/linux.git] / fs / btrfs / inode.c
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c

index 05e0c4a5affda0268cbce2260ff84998d862266e..9cce0a3228f831ae065e57aee353da3841534db5 100644 (file)
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -560,8 +560,8 @@ static inline int inode_need_compress(struct btrfs_inode *inode, u64 start,
          * will unlock the full page.
          */
         if (fs_info->sectorsize < PAGE_SIZE) {
-               if (!IS_ALIGNED(start, PAGE_SIZE) ||
-                   !IS_ALIGNED(end + 1, PAGE_SIZE))
+               if (!PAGE_ALIGNED(start) ||
+                   !PAGE_ALIGNED(end + 1))
                         return 0;
         }
  
@@ -678,8 +678,8 @@ again:
          * Thus we must also check against @actual_end, not just @end.
          */
         if (blocksize < PAGE_SIZE) {
-               if (!IS_ALIGNED(start, PAGE_SIZE) ||
-                   !IS_ALIGNED(round_up(actual_end, blocksize), PAGE_SIZE))
+               if (!PAGE_ALIGNED(start) ||
+                   !PAGE_ALIGNED(round_up(actual_end, blocksize)))
                         goto cleanup_and_bail_uncompressed;
         }
  
@@ -2580,90 +2580,74 @@ out:
         return errno_to_blk_status(ret);
  }
  
-/*
- * extent_io.c submission hook. This does the right thing for csum calculation
- * on write, or reading the csums from the tree before a read.
- *
- * Rules about async/sync submit,
- * a) read:                            sync submit
- *
- * b) write without checksum:          sync submit
- *
- * c) write with checksum:
- *    c-1) if bio is issued by fsync:  sync submit
- *         (sync_writers != 0)
- *
- *    c-2) if root is reloc root:      sync submit
- *         (only in case of buffered IO)
- *
- *    c-3) otherwise:                  async submit
- */
-void btrfs_submit_data_bio(struct inode *inode, struct bio *bio,
-                          int mirror_num, enum btrfs_compression_type compress_type)
+void btrfs_submit_data_write_bio(struct inode *inode, struct bio *bio, int mirror_num)
  {
         struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-       struct btrfs_root *root = BTRFS_I(inode)->root;
-       enum btrfs_wq_endio_type metadata = BTRFS_WQ_ENDIO_DATA;
-       blk_status_t ret = 0;
-       int skip_sum;
-       int async = !atomic_read(&BTRFS_I(inode)->sync_writers);
-
-       skip_sum = (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) ||
-               test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state);
-
-       if (btrfs_is_free_space_inode(BTRFS_I(inode)))
-               metadata = BTRFS_WQ_ENDIO_FREE_SPACE;
+       struct btrfs_inode *bi = BTRFS_I(inode);
+       blk_status_t ret;
  
         if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
-               struct page *page = bio_first_bvec_all(bio)->bv_page;
-               loff_t file_offset = page_offset(page);
-
-               ret = extract_ordered_extent(BTRFS_I(inode), bio, file_offset);
+               ret = extract_ordered_extent(bi, bio,
+                               page_offset(bio_first_bvec_all(bio)->bv_page));
                 if (ret)
                         goto out;
         }
  
-       if (btrfs_op(bio) != BTRFS_MAP_WRITE) {
-               ret = btrfs_bio_wq_end_io(fs_info, bio, metadata);
-               if (ret)
-                       goto out;
-
-               if (compress_type != BTRFS_COMPRESS_NONE) {
-                       /*
-                        * btrfs_submit_compressed_read will handle completing
-                        * the bio if there were any errors, so just return
-                        * here.
-                        */
-                       btrfs_submit_compressed_read(inode, bio, mirror_num);
-                       return;
-               } else {
-                       /*
-                        * Lookup bio sums does extra checks around whether we
-                        * need to csum or not, which is why we ignore skip_sum
-                        * here.
-                        */
-                       ret = btrfs_lookup_bio_sums(inode, bio, NULL);
+       /*
+        * Rules for async/sync submit:
+        *   a) write without checksum:                 sync submit
+        *   b) write with checksum:
+        *      b-1) if bio is issued by fsync:         sync submit
+        *           (sync_writers != 0)
+        *      b-2) if root is reloc root:             sync submit
+        *           (only in case of buffered IO)
+        *      b-3) otherwise:                         async submit
+        */
+       if (!(bi->flags & BTRFS_INODE_NODATASUM) &&
+           !test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state)) {
+               if (atomic_read(&bi->sync_writers)) {
+                       ret = btrfs_csum_one_bio(bi, bio, (u64)-1, false);
                         if (ret)
                                 goto out;
-               }
-               goto mapit;
-       } else if (async && !skip_sum) {
-               /* csum items have already been cloned */
-               if (btrfs_is_data_reloc_root(root))
-                       goto mapit;
-               /* we're doing a write, do the async checksumming */
-               ret = btrfs_wq_submit_bio(inode, bio, mirror_num,
-                                         0, btrfs_submit_bio_start);
-               goto out;
-       } else if (!skip_sum) {
-               ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, (u64)-1, false);
-               if (ret)
+               } else if (btrfs_is_data_reloc_root(bi->root)) {
+                       ; /* Csum items have already been cloned */
+               } else {
+                       ret = btrfs_wq_submit_bio(inode, bio, mirror_num, 0,
+                                                 btrfs_submit_bio_start);
                         goto out;
+               }
         }
-
-mapit:
         ret = btrfs_map_bio(fs_info, bio, mirror_num);
+out:
+       if (ret) {
+               bio->bi_status = ret;
+               bio_endio(bio);
+       }
+}
+
+void btrfs_submit_data_read_bio(struct inode *inode, struct bio *bio,
+                       int mirror_num, enum btrfs_compression_type compress_type)
+{
+       struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+       blk_status_t ret;
  
+       if (compress_type != BTRFS_COMPRESS_NONE) {
+               /*
+                * btrfs_submit_compressed_read will handle completing the bio
+                * if there were any errors, so just return here.
+                */
+               btrfs_submit_compressed_read(inode, bio, mirror_num);
+               return;
+       }
+
+       /*
+        * Lookup bio sums does extra checks around whether we need to csum or
+        * not, which is why we ignore skip_sum here.
+        */
+       ret = btrfs_lookup_bio_sums(inode, bio, NULL);
+       if (ret)
+               goto out;
+       ret = btrfs_map_bio(fs_info, bio, mirror_num);
  out:
         if (ret) {
                 bio->bi_status = ret;
@@ -3328,6 +3312,29 @@ void btrfs_writepage_endio_finish_ordered(struct btrfs_inode *inode,
                                        finish_ordered_fn, uptodate);
  }
  
+/*
+ * Verify the checksum for a single sector without any extra action that depend
+ * on the type of I/O.
+ */
+int btrfs_check_sector_csum(struct btrfs_fs_info *fs_info, struct page *page,
+                           u32 pgoff, u8 *csum, const u8 * const csum_expected)
+{
+       SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
+       char *kaddr;
+
+       ASSERT(pgoff + fs_info->sectorsize <= PAGE_SIZE);
+
+       shash->tfm = fs_info->csum_shash;
+
+       kaddr = kmap_local_page(page) + pgoff;
+       crypto_shash_digest(shash, kaddr, fs_info->sectorsize, csum);
+       kunmap_local(kaddr);
+
+       if (memcmp(csum, csum_expected, fs_info->csum_size))
+               return -EIO;
+       return 0;
+}
+
  /*
   * check_data_csum - verify checksum of one sector of uncompressed data
   * @inode:     inode
@@ -3338,35 +3345,27 @@ void btrfs_writepage_endio_finish_ordered(struct btrfs_inode *inode,
   * @start:     logical offset in the file
   *
   * The length of such check is always one sector size.
+ *
+ * When csum mismatch is detected, we will also report the error and fill the
+ * corrupted range with zero. (Thus it needs the extra parameters)
   */
  static int check_data_csum(struct inode *inode, struct btrfs_bio *bbio,
                            u32 bio_offset, struct page *page, u32 pgoff,
                            u64 start)
  {
         struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-       SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
-       char *kaddr;
         u32 len = fs_info->sectorsize;
-       const u32 csum_size = fs_info->csum_size;
-       unsigned int offset_sectors;
         u8 *csum_expected;
         u8 csum[BTRFS_CSUM_SIZE];
  
         ASSERT(pgoff + len <= PAGE_SIZE);
  
-       offset_sectors = bio_offset >> fs_info->sectorsize_bits;
-       csum_expected = ((u8 *)bbio->csum) + offset_sectors * csum_size;
+       csum_expected = btrfs_csum_ptr(fs_info, bbio->csum, bio_offset);
  
-       kaddr = kmap_atomic(page);
-       shash->tfm = fs_info->csum_shash;
-
-       crypto_shash_digest(shash, kaddr + pgoff, len, csum);
-       kunmap_atomic(kaddr);
-
-       if (memcmp(csum, csum_expected, csum_size))
+       if (btrfs_check_sector_csum(fs_info, page, pgoff, csum, csum_expected))
                 goto zeroit;
-
         return 0;
+
  zeroit:
         btrfs_print_data_csum_error(BTRFS_I(inode), start, csum, csum_expected,
                                     bbio->mirror_num);
@@ -3578,7 +3577,6 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
         u64 last_objectid = 0;
         int ret = 0, nr_unlink = 0;
  
-       /* Bail out if the cleanup is already running. */
         if (test_and_set_bit(BTRFS_ROOT_ORPHAN_CLEANUP, &root->state))
                 return 0;
  
@@ -3661,17 +3659,17 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
                          *
                          * btrfs_find_orphan_roots() ran before us, which has
                          * found all deleted roots and loaded them into
-                        * fs_info->fs_roots. So here we can find if an
+                        * fs_info->fs_roots_radix. So here we can find if an
                          * orphan item corresponds to a deleted root by looking
-                        * up the root from that xarray.
+                        * up the root from that radix tree.
                          */
  
-                       spin_lock(&fs_info->fs_roots_lock);
-                       dead_root = xa_load(&fs_info->fs_roots,
-                                           (unsigned long)found_key.objectid);
+                       spin_lock(&fs_info->fs_roots_radix_lock);
+                       dead_root = radix_tree_lookup(&fs_info->fs_roots_radix,
+                                                        (unsigned long)found_key.objectid);
                         if (dead_root && btrfs_root_refs(&dead_root->root_item) == 0)
                                 is_dead_root = 1;
-                       spin_unlock(&fs_info->fs_roots_lock);
+                       spin_unlock(&fs_info->fs_roots_radix_lock);
  
                         if (is_dead_root) {
                                 /* prevent this orphan from being found again */
@@ -3911,7 +3909,7 @@ cache_index:
          * cache.
          *
          * This is required for both inode re-read from disk and delayed inode
-        * in the delayed_nodes xarray.
+        * in delayed_nodes_tree.
          */
         if (BTRFS_I(inode)->last_trans == fs_info->generation)
                 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
@@ -4229,7 +4227,7 @@ skip_backref:
         /*
          * If we are in a rename context, we don't need to update anything in the
          * log. That will be done later during the rename by btrfs_log_new_name().
-        * Besides that, doing it here would only cause extra unncessary btree
+        * Besides that, doing it here would only cause extra unnecessary btree
          * operations on the log tree, increasing latency for applications.
          */
         if (!rename_ctx) {
@@ -4859,7 +4857,6 @@ again:
                 else
                         memzero_page(page, (block_start - page_offset(page)) + offset,
                                      len);
-               flush_dcache_page(page);
         }
         btrfs_page_clear_checked(fs_info, page, block_start,
                                  block_end + 1 - block_start);
@@ -6367,7 +6364,13 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans,
         }
  
         btrfs_mark_buffer_dirty(path->nodes[0]);
-       btrfs_release_path(path);
+       /*
+        * We don't need the path anymore, plus inheriting properties, adding
+        * ACLs, security xattrs, orphan item or adding the link, will result in
+        * allocating yet another path. So just free our path.
+        */
+       btrfs_free_path(path);
+       path = NULL;
  
         if (args->subvol) {
                 struct inode *parent;
@@ -6424,8 +6427,7 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans,
                 goto discard;
         }
  
-       ret = 0;
-       goto out;
+       return 0;
  
  discard:
         /*
@@ -7681,7 +7683,19 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,
         if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) ||
             em->block_start == EXTENT_MAP_INLINE) {
                 free_extent_map(em);
-               ret = -ENOTBLK;
+               /*
+                * If we are in a NOWAIT context, return -EAGAIN in order to
+                * fallback to buffered IO. This is not only because we can
+                * block with buffered IO (no support for NOWAIT semantics at
+                * the moment) but also to avoid returning short reads to user
+                * space - this happens if we were able to read some data from
+                * previous non-compressed extents and then when we fallback to
+                * buffered IO, at btrfs_file_read_iter() by calling
+                * filemap_read(), we fail to fault in pages for the read buffer,
+                * in which case filemap_read() returns a short read (the number
+                * of bytes previously read is > 0, so it does not return -EFAULT).
+                */
+               ret = (flags & IOMAP_NOWAIT) ? -EAGAIN : -ENOTBLK;
                 goto unlock_err;
         }
  
@@ -7859,9 +7873,6 @@ static void submit_dio_repair_bio(struct inode *inode, struct bio *bio,
  
         BUG_ON(bio_op(bio) == REQ_OP_WRITE);
  
-       if (btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA))
-               return;
-
         refcount_inc(&dip->refs);
         if (btrfs_map_bio(fs_info, bio, mirror_num))
                 refcount_dec(&dip->refs);
@@ -7873,47 +7884,35 @@ static blk_status_t btrfs_check_read_dio_bio(struct btrfs_dio_private *dip,
  {
         struct inode *inode = dip->inode;
         struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
-       const u32 sectorsize = fs_info->sectorsize;
         struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
         const bool csum = !(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM);
-       struct bio_vec bvec;
-       struct bvec_iter iter;
-       u32 bio_offset = 0;
         blk_status_t err = BLK_STS_OK;
+       struct bvec_iter iter;
+       struct bio_vec bv;
+       u32 offset;
+
+       btrfs_bio_for_each_sector(fs_info, bv, bbio, iter, offset) {
+               u64 start = bbio->file_offset + offset;
+
+               if (uptodate &&
+                   (!csum || !check_data_csum(inode, bbio, offset, bv.bv_page,
+                                              bv.bv_offset, start))) {
+                       clean_io_failure(fs_info, failure_tree, io_tree, start,
+                                        bv.bv_page, btrfs_ino(BTRFS_I(inode)),
+                                        bv.bv_offset);
+               } else {
+                       int ret;
  
-       __bio_for_each_segment(bvec, &bbio->bio, iter, bbio->iter) {
-               unsigned int i, nr_sectors, pgoff;
-
-               nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec.bv_len);
-               pgoff = bvec.bv_offset;
-               for (i = 0; i < nr_sectors; i++) {
-                       u64 start = bbio->file_offset + bio_offset;
-
-                       ASSERT(pgoff < PAGE_SIZE);
-                       if (uptodate &&
-                           (!csum || !check_data_csum(inode, bbio,
-                                                      bio_offset, bvec.bv_page,
-                                                      pgoff, start))) {
-                               clean_io_failure(fs_info, failure_tree, io_tree,
-                                                start, bvec.bv_page,
-                                                btrfs_ino(BTRFS_I(inode)),
-                                                pgoff);
-                       } else {
-                               int ret;
-
-                               ret = btrfs_repair_one_sector(inode, &bbio->bio,
-                                               bio_offset, bvec.bv_page, pgoff,
-                                               start, bbio->mirror_num,
-                                               submit_dio_repair_bio);
-                               if (ret)
-                                       err = errno_to_blk_status(ret);
-                       }
-                       ASSERT(bio_offset + sectorsize > bio_offset);
-                       bio_offset += sectorsize;
-                       pgoff += sectorsize;
+                       ret = btrfs_repair_one_sector(inode, &bbio->bio, offset,
+                                       bv.bv_page, bv.bv_offset, start,
+                                       bbio->mirror_num,
+                                       submit_dio_repair_bio);
+                       if (ret)
+                               err = errno_to_blk_status(ret);
                 }
         }
+
         return err;
  }
  
@@ -7962,46 +7961,29 @@ static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio,
  {
         struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
         struct btrfs_dio_private *dip = bio->bi_private;
-       bool write = btrfs_op(bio) == BTRFS_MAP_WRITE;
         blk_status_t ret;
  
-       /* Check btrfs_submit_bio_hook() for rules about async submit. */
-       if (async_submit)
-               async_submit = !atomic_read(&BTRFS_I(inode)->sync_writers);
-
-       if (!write) {
-               ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA);
-               if (ret)
-                       goto err;
-       }
-
         if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
                 goto map;
  
-       if (write && async_submit) {
-               ret = btrfs_wq_submit_bio(inode, bio, 0, file_offset,
-                                         btrfs_submit_bio_start_direct_io);
-               goto err;
-       } else if (write) {
+       if (btrfs_op(bio) == BTRFS_MAP_WRITE) {
+               /* Check btrfs_submit_data_write_bio() for async submit rules */
+               if (async_submit && !atomic_read(&BTRFS_I(inode)->sync_writers))
+                       return btrfs_wq_submit_bio(inode, bio, 0, file_offset,
+                                       btrfs_submit_bio_start_direct_io);
                 /*
                  * If we aren't doing async submit, calculate the csum of the
                  * bio now.
                  */
                 ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, file_offset, false);
                 if (ret)
-                       goto err;
+                       return ret;
         } else {
-               u64 csum_offset;
-
-               csum_offset = file_offset - dip->file_offset;
-               csum_offset >>= fs_info->sectorsize_bits;
-               csum_offset *= fs_info->csum_size;
-               btrfs_bio(bio)->csum = dip->csums + csum_offset;
+               btrfs_bio(bio)->csum = btrfs_csum_ptr(fs_info, dip->csums,
+                                                     file_offset - dip->file_offset);
         }
  map:
-       ret = btrfs_map_bio(fs_info, bio, 0);
-err:
-       return ret;
+       return btrfs_map_bio(fs_info, bio, 0);
  }
  
  static void btrfs_submit_direct(const struct iomap_iter *iter,
@@ -8588,10 +8570,9 @@ again:
         else
                 zero_start = PAGE_SIZE;
  
-       if (zero_start != PAGE_SIZE) {
+       if (zero_start != PAGE_SIZE)
                 memzero_page(page, zero_start, PAGE_SIZE - zero_start);
-               flush_dcache_page(page);
-       }
+
         btrfs_page_clear_checked(fs_info, page, page_start, PAGE_SIZE);
         btrfs_page_set_dirty(fs_info, page, page_start, end + 1 - page_start);
         btrfs_page_set_uptodate(fs_info, page, page_start, end + 1 - page_start);
@@ -9549,15 +9530,21 @@ static int btrfs_rename2(struct user_namespace *mnt_userns, struct inode *old_di
                          struct dentry *old_dentry, struct inode *new_dir,
                          struct dentry *new_dentry, unsigned int flags)
  {
+       int ret;
+
         if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
                 return -EINVAL;
  
         if (flags & RENAME_EXCHANGE)
-               return btrfs_rename_exchange(old_dir, old_dentry, new_dir,
-                                         new_dentry);
+               ret = btrfs_rename_exchange(old_dir, old_dentry, new_dir,
+                                           new_dentry);
+       else
+               ret = btrfs_rename(mnt_userns, old_dir, old_dentry, new_dir,
+                                  new_dentry, flags);
  
-       return btrfs_rename(mnt_userns, old_dir, old_dentry, new_dir,
-                           new_dentry, flags);
+       btrfs_btree_balance_dirty(BTRFS_I(new_dir)->root->fs_info);
+
+       return ret;
  }
  
  struct btrfs_delalloc_work {
@@ -10177,9 +10164,8 @@ void btrfs_set_range_writeback(struct btrfs_inode *inode, u64 start, u64 end)
         }
  }
  
-static int btrfs_encoded_io_compression_from_extent(
-                               struct btrfs_fs_info *fs_info,
-                               int compress_type)
+int btrfs_encoded_io_compression_from_extent(struct btrfs_fs_info *fs_info,
+                                            int compress_type)
  {
         switch (compress_type) {
         case BTRFS_COMPRESS_NONE:
@@ -10312,12 +10298,6 @@ static blk_status_t submit_encoded_read_bio(struct btrfs_inode *inode,
                         return ret;
         }
  
-       ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA);
-       if (ret) {
-               btrfs_bio_free_csum(bbio);
-               return ret;
-       }
-
         atomic_inc(&priv->pending);
         ret = btrfs_map_bio(fs_info, bio, mirror_num);
         if (ret) {
@@ -10384,11 +10364,9 @@ static void btrfs_encoded_read_endio(struct bio *bio)
         bio_put(bio);
  }
  
-static int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
-                                                u64 file_offset,
-                                                u64 disk_bytenr,
-                                                u64 disk_io_size,
-                                                struct page **pages)
+int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
+                                         u64 file_offset, u64 disk_bytenr,
+                                         u64 disk_io_size, struct page **pages)
  {
         struct btrfs_fs_info *fs_info = inode->root->fs_info;
         struct btrfs_encoded_read_private priv = {