{
struct btrfs_fs_info *fs_info = bg->fs_info;
u64 stripe_nr = 0, stripe_offset = 0;
+ u64 prev_offset = 0;
u32 stripe_index = 0;
+ bool has_partial = false, has_conventional = false;
if ((map->type & BTRFS_BLOCK_GROUP_DATA) && !fs_info->stripe_root) {
btrfs_err(fs_info, "zoned: data %s needs raid-stripe-tree",
return -EINVAL;
}
+ /*
+ * When the last extent is removed, last_alloc can be smaller than the other write
+ * pointer. In that case, last_alloc should be moved to the corresponding write
+ * pointer position.
+ */
+ for (int i = 0; i < map->num_stripes; i++) {
+ u64 alloc;
+
+ if (zone_info[i].alloc_offset == WP_MISSING_DEV ||
+ zone_info[i].alloc_offset == WP_CONVENTIONAL)
+ continue;
+
+ stripe_nr = zone_info[i].alloc_offset >> BTRFS_STRIPE_LEN_SHIFT;
+ stripe_offset = zone_info[i].alloc_offset & BTRFS_STRIPE_LEN_MASK;
+ if (stripe_offset == 0 && stripe_nr > 0) {
+ stripe_nr--;
+ stripe_offset = BTRFS_STRIPE_LEN;
+ }
+ alloc = ((stripe_nr * map->num_stripes + i) << BTRFS_STRIPE_LEN_SHIFT) +
+ stripe_offset;
+ last_alloc = max(last_alloc, alloc);
+
+ /* Partially written stripe found. It should be last. */
+ if (zone_info[i].alloc_offset & BTRFS_STRIPE_LEN_MASK)
+ break;
+ }
+ stripe_nr = 0;
+ stripe_offset = 0;
+
if (last_alloc) {
u32 factor = map->num_stripes;
continue;
if (zone_info[i].alloc_offset == WP_CONVENTIONAL) {
-
+ has_conventional = true;
zone_info[i].alloc_offset = btrfs_stripe_nr_to_offset(stripe_nr);
if (stripe_index > i)
zone_info[i].alloc_offset += stripe_offset;
}
+ /* Verification */
+ if (i != 0) {
+ if (unlikely(prev_offset < zone_info[i].alloc_offset)) {
+ btrfs_err(fs_info,
+ "zoned: stripe position disorder found in block group %llu",
+ bg->start);
+ return -EIO;
+ }
+
+ if (unlikely(has_partial &&
+ (zone_info[i].alloc_offset & BTRFS_STRIPE_LEN_MASK))) {
+ btrfs_err(fs_info,
+ "zoned: multiple partial written stripe found in block group %llu",
+ bg->start);
+ return -EIO;
+ }
+ }
+ prev_offset = zone_info[i].alloc_offset;
+
+ if ((zone_info[i].alloc_offset & BTRFS_STRIPE_LEN_MASK) != 0)
+ has_partial = true;
+
if (test_bit(0, active) != test_bit(i, active)) {
if (unlikely(!btrfs_zone_activate(bg)))
return -EIO;
bg->alloc_offset += zone_info[i].alloc_offset;
}
+ /* Check if all devices stay in the same stripe row. */
+ if (unlikely(zone_info[0].alloc_offset -
+ zone_info[map->num_stripes - 1].alloc_offset > BTRFS_STRIPE_LEN)) {
+ btrfs_err(fs_info, "zoned: stripe gap too large in block group %llu", bg->start);
+ return -EIO;
+ }
+
+ if (unlikely(has_conventional && bg->alloc_offset < last_alloc)) {
+ btrfs_err(fs_info, "zoned: allocated extent stays beyond write pointers %llu %llu",
+ bg->alloc_offset, last_alloc);
+ return -EIO;
+ }
+
return 0;
}
u64 last_alloc)
{
struct btrfs_fs_info *fs_info = bg->fs_info;
+ u64 AUTO_KFREE(raid0_allocs);
u64 stripe_nr = 0, stripe_offset = 0;
u32 stripe_index = 0;
+ bool has_partial = false, has_conventional = false;
+ u64 prev_offset = 0;
if ((map->type & BTRFS_BLOCK_GROUP_DATA) && !fs_info->stripe_root) {
btrfs_err(fs_info, "zoned: data %s needs raid-stripe-tree",
return -EINVAL;
}
+ raid0_allocs = kcalloc(map->num_stripes / map->sub_stripes, sizeof(*raid0_allocs),
+ GFP_NOFS);
+ if (!raid0_allocs)
+ return -ENOMEM;
+
+ /*
+ * When the last extent is removed, last_alloc can be smaller than the other write
+ * pointer. In that case, last_alloc should be moved to the corresponding write
+ * pointer position.
+ */
+ for (int i = 0; i < map->num_stripes; i += map->sub_stripes) {
+ u64 alloc = zone_info[i].alloc_offset;
+
+ for (int j = 1; j < map->sub_stripes; j++) {
+ int idx = i + j;
+
+ if (zone_info[idx].alloc_offset == WP_MISSING_DEV ||
+ zone_info[idx].alloc_offset == WP_CONVENTIONAL)
+ continue;
+ if (alloc == WP_MISSING_DEV || alloc == WP_CONVENTIONAL) {
+ alloc = zone_info[idx].alloc_offset;
+ } else if (unlikely(zone_info[idx].alloc_offset != alloc)) {
+ btrfs_err(fs_info,
+ "zoned: write pointer mismatch found in block group %llu",
+ bg->start);
+ return -EIO;
+ }
+ }
+
+ raid0_allocs[i / map->sub_stripes] = alloc;
+ if (alloc == WP_CONVENTIONAL)
+ continue;
+ if (unlikely(alloc == WP_MISSING_DEV)) {
+ btrfs_err(fs_info,
+ "zoned: cannot recover write pointer of block group %llu due to missing device",
+ bg->start);
+ return -EIO;
+ }
+
+ stripe_nr = alloc >> BTRFS_STRIPE_LEN_SHIFT;
+ stripe_offset = alloc & BTRFS_STRIPE_LEN_MASK;
+ if (stripe_offset == 0 && stripe_nr > 0) {
+ stripe_nr--;
+ stripe_offset = BTRFS_STRIPE_LEN;
+ }
+
+ alloc = ((stripe_nr * (map->num_stripes / map->sub_stripes) +
+ (i / map->sub_stripes)) <<
+ BTRFS_STRIPE_LEN_SHIFT) + stripe_offset;
+ last_alloc = max(last_alloc, alloc);
+ }
+ stripe_nr = 0;
+ stripe_offset = 0;
+
if (last_alloc) {
u32 factor = map->num_stripes / map->sub_stripes;
}
for (int i = 0; i < map->num_stripes; i++) {
- if (zone_info[i].alloc_offset == WP_MISSING_DEV)
- continue;
+ int idx = i / map->sub_stripes;
- if (test_bit(0, active) != test_bit(i, active)) {
- if (unlikely(!btrfs_zone_activate(bg)))
- return -EIO;
- } else {
- if (test_bit(0, active))
- set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags);
+ if (raid0_allocs[idx] == WP_CONVENTIONAL) {
+ has_conventional = true;
+ raid0_allocs[idx] = btrfs_stripe_nr_to_offset(stripe_nr);
+
+ if (stripe_index > idx)
+ raid0_allocs[idx] += BTRFS_STRIPE_LEN;
+ else if (stripe_index == idx)
+ raid0_allocs[idx] += stripe_offset;
}
- if (zone_info[i].alloc_offset == WP_CONVENTIONAL) {
- zone_info[i].alloc_offset = btrfs_stripe_nr_to_offset(stripe_nr);
+ if ((i % map->sub_stripes) == 0) {
+ /* Verification */
+ if (i != 0) {
+ if (unlikely(prev_offset < raid0_allocs[idx])) {
+ btrfs_err(fs_info,
+ "zoned: stripe position disorder found in block group %llu",
+ bg->start);
+ return -EIO;
+ }
- if (stripe_index > (i / map->sub_stripes))
- zone_info[i].alloc_offset += BTRFS_STRIPE_LEN;
- else if (stripe_index == (i / map->sub_stripes))
- zone_info[i].alloc_offset += stripe_offset;
+ if (unlikely(has_partial &&
+ (raid0_allocs[idx] & BTRFS_STRIPE_LEN_MASK))) {
+ btrfs_err(fs_info,
+ "zoned: multiple partial written stripe found in block group %llu",
+ bg->start);
+ return -EIO;
+ }
+ }
+ prev_offset = raid0_allocs[idx];
+
+ if ((raid0_allocs[idx] & BTRFS_STRIPE_LEN_MASK) != 0)
+ has_partial = true;
+ }
+
+ if (zone_info[i].alloc_offset == WP_MISSING_DEV ||
+ zone_info[i].alloc_offset == WP_CONVENTIONAL)
+ zone_info[i].alloc_offset = raid0_allocs[idx];
+
+ if (test_bit(0, active) != test_bit(i, active)) {
+ if (unlikely(!btrfs_zone_activate(bg)))
+ return -EIO;
+ } else if (test_bit(0, active)) {
+ set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags);
}
if ((i % map->sub_stripes) == 0) {
}
}
+ /* Check if all devices stay in the same stripe row. */
+ if (unlikely(zone_info[0].alloc_offset -
+ zone_info[map->num_stripes - 1].alloc_offset > BTRFS_STRIPE_LEN)) {
+ btrfs_err(fs_info, "zoned: stripe gap too large in block group %llu",
+ bg->start);
+ return -EIO;
+ }
+
+ if (unlikely(has_conventional && bg->alloc_offset < last_alloc)) {
+ btrfs_err(fs_info, "zoned: allocated extent stays beyond write pointers %llu %llu",
+ bg->alloc_offset, last_alloc);
+ return -EIO;
+ }
+
return 0;
}