Merge branch 'for-6.0/dax' into libnvdimm-fixes

[people/ms/linux.git] / fs / btrfs / space-info.c
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c

index 2dd8754cb990dd79a65210e5f27152b2bc183bc8..435559ba94fa00d31f21fa56ec97ec40c55282bc 100644 (file)
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -9,6 +9,7 @@
  #include "ordered-data.h"
  #include "transaction.h"
  #include "block-group.h"
+#include "zoned.h"
  
  /*
   * HOW DOES SPACE RESERVATION WORK
@@ -187,6 +188,37 @@ void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
   */
  #define BTRFS_DEFAULT_ZONED_RECLAIM_THRESH                     (75)
  
+/*
+ * Calculate chunk size depending on volume type (regular or zoned).
+ */
+static u64 calc_chunk_size(const struct btrfs_fs_info *fs_info, u64 flags)
+{
+       if (btrfs_is_zoned(fs_info))
+               return fs_info->zone_size;
+
+       ASSERT(flags & BTRFS_BLOCK_GROUP_TYPE_MASK);
+
+       if (flags & BTRFS_BLOCK_GROUP_DATA)
+               return BTRFS_MAX_DATA_CHUNK_SIZE;
+       else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
+               return SZ_32M;
+
+       /* Handle BTRFS_BLOCK_GROUP_METADATA */
+       if (fs_info->fs_devices->total_rw_bytes > 50ULL * SZ_1G)
+               return SZ_1G;
+
+       return SZ_256M;
+}
+
+/*
+ * Update default chunk size.
+ */
+void btrfs_update_space_info_chunk_size(struct btrfs_space_info *space_info,
+                                       u64 chunk_size)
+{
+       WRITE_ONCE(space_info->chunk_size, chunk_size);
+}
+
  static int create_space_info(struct btrfs_fs_info *info, u64 flags)
  {
  
@@ -208,6 +240,7 @@ static int create_space_info(struct btrfs_fs_info *info, u64 flags)
         INIT_LIST_HEAD(&space_info->tickets);
         INIT_LIST_HEAD(&space_info->priority_tickets);
         space_info->clamp = 1;
+       btrfs_update_space_info_chunk_size(space_info, calc_chunk_size(info, flags));
  
         if (btrfs_is_zoned(info))
                 space_info->bg_reclaim_threshold = BTRFS_DEFAULT_ZONED_RECLAIM_THRESH;
@@ -263,7 +296,7 @@ out:
  void btrfs_update_space_info(struct btrfs_fs_info *info, u64 flags,
                              u64 total_bytes, u64 bytes_used,
                              u64 bytes_readonly, u64 bytes_zone_unusable,
-                            struct btrfs_space_info **space_info)
+                            bool active, struct btrfs_space_info **space_info)
  {
         struct btrfs_space_info *found;
         int factor;
@@ -274,6 +307,8 @@ void btrfs_update_space_info(struct btrfs_fs_info *info, u64 flags,
         ASSERT(found);
         spin_lock(&found->lock);
         found->total_bytes += total_bytes;
+       if (active)
+               found->active_total_bytes += total_bytes;
         found->disk_total += total_bytes * factor;
         found->bytes_used += bytes_used;
         found->disk_used += bytes_used * factor;
@@ -337,6 +372,22 @@ static u64 calc_available_free_space(struct btrfs_fs_info *fs_info,
         return avail;
  }
  
+static inline u64 writable_total_bytes(struct btrfs_fs_info *fs_info,
+                                      struct btrfs_space_info *space_info)
+{
+       /*
+        * On regular filesystem, all total_bytes are always writable. On zoned
+        * filesystem, there may be a limitation imposed by max_active_zones.
+        * For metadata allocation, we cannot finish an existing active block
+        * group to avoid a deadlock. Thus, we need to consider only the active
+        * groups to be writable for metadata space.
+        */
+       if (!btrfs_is_zoned(fs_info) || (space_info->flags & BTRFS_BLOCK_GROUP_DATA))
+               return space_info->total_bytes;
+
+       return space_info->active_total_bytes;
+}
+
  int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
                          struct btrfs_space_info *space_info, u64 bytes,
                          enum btrfs_reserve_flush_enum flush)
@@ -349,9 +400,12 @@ int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
                 return 0;
  
         used = btrfs_space_info_used(space_info, true);
-       avail = calc_available_free_space(fs_info, space_info, flush);
+       if (btrfs_is_zoned(fs_info) && (space_info->flags & BTRFS_BLOCK_GROUP_METADATA))
+               avail = 0;
+       else
+               avail = calc_available_free_space(fs_info, space_info, flush);
  
-       if (used + bytes < space_info->total_bytes + avail)
+       if (used + bytes < writable_total_bytes(fs_info, space_info) + avail)
                 return 1;
         return 0;
  }
@@ -387,7 +441,7 @@ again:
                 ticket = list_first_entry(head, struct reserve_ticket, list);
  
                 /* Check and see if our ticket can be satisfied now. */
-               if ((used + ticket->bytes <= space_info->total_bytes) ||
+               if ((used + ticket->bytes <= writable_total_bytes(fs_info, space_info)) ||
                     btrfs_can_overcommit(fs_info, space_info, ticket->bytes,
                                          flush)) {
                         btrfs_space_info_update_bytes_may_use(fs_info,
@@ -671,6 +725,18 @@ static void flush_space(struct btrfs_fs_info *fs_info,
                 break;
         case ALLOC_CHUNK:
         case ALLOC_CHUNK_FORCE:
+               /*
+                * For metadata space on zoned filesystem, reaching here means we
+                * don't have enough space left in active_total_bytes. Try to
+                * activate a block group first, because we may have inactive
+                * block group already allocated.
+                */
+               ret = btrfs_zoned_activate_one_bg(fs_info, space_info, false);
+               if (ret < 0)
+                       break;
+               else if (ret == 1)
+                       break;
+
                 trans = btrfs_join_transaction(root);
                 if (IS_ERR(trans)) {
                         ret = PTR_ERR(trans);
@@ -681,6 +747,23 @@ static void flush_space(struct btrfs_fs_info *fs_info,
                                 (state == ALLOC_CHUNK) ? CHUNK_ALLOC_NO_FORCE :
                                         CHUNK_ALLOC_FORCE);
                 btrfs_end_transaction(trans);
+
+               /*
+                * For metadata space on zoned filesystem, allocating a new chunk
+                * is not enough. We still need to activate the block * group.
+                * Active the newly allocated block group by (maybe) finishing
+                * a block group.
+                */
+               if (ret == 1) {
+                       ret = btrfs_zoned_activate_one_bg(fs_info, space_info, true);
+                       /*
+                        * Revert to the original ret regardless we could finish
+                        * one block group or not.
+                        */
+                       if (ret >= 0)
+                               ret = 1;
+               }
+
                 if (ret > 0 || ret == -ENOSPC)
                         ret = 0;
                 break;
@@ -718,6 +801,7 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
  {
         u64 used;
         u64 avail;
+       u64 total;
         u64 to_reclaim = space_info->reclaim_size;
  
         lockdep_assert_held(&space_info->lock);
@@ -732,8 +816,9 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
          * space.  If that's the case add in our overage so we make sure to put
          * appropriate pressure on the flushing state machine.
          */
-       if (space_info->total_bytes + avail < used)
-               to_reclaim += used - (space_info->total_bytes + avail);
+       total = writable_total_bytes(fs_info, space_info);
+       if (total + avail < used)
+               to_reclaim += used - (total + avail);
  
         return to_reclaim;
  }
@@ -743,9 +828,12 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info,
  {
         u64 global_rsv_size = fs_info->global_block_rsv.reserved;
         u64 ordered, delalloc;
-       u64 thresh = div_factor_fine(space_info->total_bytes, 90);
+       u64 total = writable_total_bytes(fs_info, space_info);
+       u64 thresh;
         u64 used;
  
+       thresh = div_factor_fine(total, 90);
+
         lockdep_assert_held(&space_info->lock);
  
         /* If we're just plain full then async reclaim just slows us down. */
@@ -807,8 +895,8 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info,
                                            BTRFS_RESERVE_FLUSH_ALL);
         used = space_info->bytes_used + space_info->bytes_reserved +
                space_info->bytes_readonly + global_rsv_size;
-       if (used < space_info->total_bytes)
-               thresh += space_info->total_bytes - used;
+       if (used < total)
+               thresh += total - used;
         thresh >>= space_info->clamp;
  
         used = space_info->bytes_pinned;
@@ -1280,7 +1368,7 @@ static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
         to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info);
         /*
          * This is the priority reclaim path, so to_reclaim could be >0 still
-        * because we may have only satisified the priority tickets and still
+        * because we may have only satisfied the priority tickets and still
          * left non priority tickets on the list.  We would then have
          * to_reclaim but ->bytes == 0.
          */
@@ -1525,7 +1613,7 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info,
          * can_overcommit() to ensure we can overcommit to continue.
          */
         if (!pending_tickets &&
-           ((used + orig_bytes <= space_info->total_bytes) ||
+           ((used + orig_bytes <= writable_total_bytes(fs_info, space_info)) ||
              btrfs_can_overcommit(fs_info, space_info, orig_bytes, flush))) {
                 btrfs_space_info_update_bytes_may_use(fs_info, space_info,
                                                       orig_bytes);