]> git.ipfire.org Git - thirdparty/u-boot.git/blame - fs/btrfs/volumes.c
fs: btrfs: Crossport volumes.[ch] from btrfs-progs
[thirdparty/u-boot.git] / fs / btrfs / volumes.c
CommitLineData
b1f0067a
QW
1// SPDX-License-Identifier: GPL-2.0+
2#include <stdlib.h>
3#include <common.h>
4#include <fs_internal.h>
5#include "ctree.h"
6#include "disk-io.h"
7#include "volumes.h"
8
9const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
10 [BTRFS_RAID_RAID10] = {
11 .sub_stripes = 2,
12 .dev_stripes = 1,
13 .devs_max = 0, /* 0 == as many as possible */
14 .devs_min = 4,
15 .tolerated_failures = 1,
16 .devs_increment = 2,
17 .ncopies = 2,
18 .nparity = 0,
19 .raid_name = "raid10",
20 .bg_flag = BTRFS_BLOCK_GROUP_RAID10,
21 },
22 [BTRFS_RAID_RAID1] = {
23 .sub_stripes = 1,
24 .dev_stripes = 1,
25 .devs_max = 2,
26 .devs_min = 2,
27 .tolerated_failures = 1,
28 .devs_increment = 2,
29 .ncopies = 2,
30 .nparity = 0,
31 .raid_name = "raid1",
32 .bg_flag = BTRFS_BLOCK_GROUP_RAID1,
33 },
34 [BTRFS_RAID_RAID1C3] = {
35 .sub_stripes = 1,
36 .dev_stripes = 1,
37 .devs_max = 3,
38 .devs_min = 3,
39 .tolerated_failures = 2,
40 .devs_increment = 3,
41 .ncopies = 3,
42 .raid_name = "raid1c3",
43 .bg_flag = BTRFS_BLOCK_GROUP_RAID1C3,
44 },
45 [BTRFS_RAID_RAID1C4] = {
46 .sub_stripes = 1,
47 .dev_stripes = 1,
48 .devs_max = 4,
49 .devs_min = 4,
50 .tolerated_failures = 3,
51 .devs_increment = 4,
52 .ncopies = 4,
53 .raid_name = "raid1c4",
54 .bg_flag = BTRFS_BLOCK_GROUP_RAID1C4,
55 },
56 [BTRFS_RAID_DUP] = {
57 .sub_stripes = 1,
58 .dev_stripes = 2,
59 .devs_max = 1,
60 .devs_min = 1,
61 .tolerated_failures = 0,
62 .devs_increment = 1,
63 .ncopies = 2,
64 .nparity = 0,
65 .raid_name = "dup",
66 .bg_flag = BTRFS_BLOCK_GROUP_DUP,
67 },
68 [BTRFS_RAID_RAID0] = {
69 .sub_stripes = 1,
70 .dev_stripes = 1,
71 .devs_max = 0,
72 .devs_min = 2,
73 .tolerated_failures = 0,
74 .devs_increment = 1,
75 .ncopies = 1,
76 .nparity = 0,
77 .raid_name = "raid0",
78 .bg_flag = BTRFS_BLOCK_GROUP_RAID0,
79 },
80 [BTRFS_RAID_SINGLE] = {
81 .sub_stripes = 1,
82 .dev_stripes = 1,
83 .devs_max = 1,
84 .devs_min = 1,
85 .tolerated_failures = 0,
86 .devs_increment = 1,
87 .ncopies = 1,
88 .nparity = 0,
89 .raid_name = "single",
90 .bg_flag = 0,
91 },
92 [BTRFS_RAID_RAID5] = {
93 .sub_stripes = 1,
94 .dev_stripes = 1,
95 .devs_max = 0,
96 .devs_min = 2,
97 .tolerated_failures = 1,
98 .devs_increment = 1,
99 .ncopies = 1,
100 .nparity = 1,
101 .raid_name = "raid5",
102 .bg_flag = BTRFS_BLOCK_GROUP_RAID5,
103 },
104 [BTRFS_RAID_RAID6] = {
105 .sub_stripes = 1,
106 .dev_stripes = 1,
107 .devs_max = 0,
108 .devs_min = 3,
109 .tolerated_failures = 2,
110 .devs_increment = 1,
111 .ncopies = 1,
112 .nparity = 2,
113 .raid_name = "raid6",
114 .bg_flag = BTRFS_BLOCK_GROUP_RAID6,
115 },
116};
117
118struct stripe {
119 struct btrfs_device *dev;
120 u64 physical;
121};
122
123static inline int nr_parity_stripes(struct map_lookup *map)
124{
125 if (map->type & BTRFS_BLOCK_GROUP_RAID5)
126 return 1;
127 else if (map->type & BTRFS_BLOCK_GROUP_RAID6)
128 return 2;
129 else
130 return 0;
131}
132
133static inline int nr_data_stripes(struct map_lookup *map)
134{
135 return map->num_stripes - nr_parity_stripes(map);
136}
137
138#define is_parity_stripe(x) ( ((x) == BTRFS_RAID5_P_STRIPE) || ((x) == BTRFS_RAID6_Q_STRIPE) )
139
140static LIST_HEAD(fs_uuids);
141
142/*
143 * Find a device specified by @devid or @uuid in the list of @fs_devices, or
144 * return NULL.
145 *
146 * If devid and uuid are both specified, the match must be exact, otherwise
147 * only devid is used.
148 */
149static struct btrfs_device *find_device(struct btrfs_fs_devices *fs_devices,
150 u64 devid, u8 *uuid)
151{
152 struct list_head *head = &fs_devices->devices;
153 struct btrfs_device *dev;
154
155 list_for_each_entry(dev, head, dev_list) {
156 if (dev->devid == devid &&
157 (!uuid || !memcmp(dev->uuid, uuid, BTRFS_UUID_SIZE))) {
158 return dev;
159 }
160 }
161 return NULL;
162}
163
164static struct btrfs_fs_devices *find_fsid(u8 *fsid, u8 *metadata_uuid)
165{
166 struct btrfs_fs_devices *fs_devices;
167
168 list_for_each_entry(fs_devices, &fs_uuids, list) {
169 if (metadata_uuid && (memcmp(fsid, fs_devices->fsid,
170 BTRFS_FSID_SIZE) == 0) &&
171 (memcmp(metadata_uuid, fs_devices->metadata_uuid,
172 BTRFS_FSID_SIZE) == 0)) {
173 return fs_devices;
174 } else if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0){
175 return fs_devices;
176 }
177 }
178 return NULL;
179}
180
181static int device_list_add(struct btrfs_super_block *disk_super,
182 u64 devid, struct blk_desc *desc,
183 struct disk_partition *part,
184 struct btrfs_fs_devices **fs_devices_ret)
185{
186 struct btrfs_device *device;
187 struct btrfs_fs_devices *fs_devices;
188 u64 found_transid = btrfs_super_generation(disk_super);
189 bool metadata_uuid = (btrfs_super_incompat_flags(disk_super) &
190 BTRFS_FEATURE_INCOMPAT_METADATA_UUID);
191
192 if (metadata_uuid)
193 fs_devices = find_fsid(disk_super->fsid,
194 disk_super->metadata_uuid);
195 else
196 fs_devices = find_fsid(disk_super->fsid, NULL);
197
198 if (!fs_devices) {
199 fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS);
200 if (!fs_devices)
201 return -ENOMEM;
202 INIT_LIST_HEAD(&fs_devices->devices);
203 list_add(&fs_devices->list, &fs_uuids);
204 memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE);
205 if (metadata_uuid)
206 memcpy(fs_devices->metadata_uuid,
207 disk_super->metadata_uuid, BTRFS_FSID_SIZE);
208 else
209 memcpy(fs_devices->metadata_uuid, fs_devices->fsid,
210 BTRFS_FSID_SIZE);
211
212 fs_devices->latest_devid = devid;
213 fs_devices->latest_trans = found_transid;
214 fs_devices->lowest_devid = (u64)-1;
215 device = NULL;
216 } else {
217 device = find_device(fs_devices, devid,
218 disk_super->dev_item.uuid);
219 }
220 if (!device) {
221 device = kzalloc(sizeof(*device), GFP_NOFS);
222 if (!device) {
223 /* we can safely leave the fs_devices entry around */
224 return -ENOMEM;
225 }
226 device->devid = devid;
227 device->desc = desc;
228 device->part = part;
229 device->generation = found_transid;
230 memcpy(device->uuid, disk_super->dev_item.uuid,
231 BTRFS_UUID_SIZE);
232 device->total_devs = btrfs_super_num_devices(disk_super);
233 device->super_bytes_used = btrfs_super_bytes_used(disk_super);
234 device->total_bytes =
235 btrfs_stack_device_total_bytes(&disk_super->dev_item);
236 device->bytes_used =
237 btrfs_stack_device_bytes_used(&disk_super->dev_item);
238 list_add(&device->dev_list, &fs_devices->devices);
239 device->fs_devices = fs_devices;
240 } else if (!device->desc || !device->part) {
241 /*
242 * The existing device has newer generation, so this one could
243 * be a stale one, don't add it.
244 */
245 if (found_transid < device->generation) {
246 error(
247 "adding devid %llu gen %llu but found an existing device gen %llu",
248 device->devid, found_transid,
249 device->generation);
250 return -EEXIST;
251 } else {
252 device->desc = desc;
253 device->part = part;
254 }
255 }
256
257
258 if (found_transid > fs_devices->latest_trans) {
259 fs_devices->latest_devid = devid;
260 fs_devices->latest_trans = found_transid;
261 }
262 if (fs_devices->lowest_devid > devid) {
263 fs_devices->lowest_devid = devid;
264 }
265 *fs_devices_ret = fs_devices;
266 return 0;
267}
268
269int btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
270{
271 struct btrfs_fs_devices *seed_devices;
272 struct btrfs_device *device;
273 int ret = 0;
274
275again:
276 if (!fs_devices)
277 return 0;
278 while (!list_empty(&fs_devices->devices)) {
279 device = list_entry(fs_devices->devices.next,
280 struct btrfs_device, dev_list);
281 list_del(&device->dev_list);
282 /* free the memory */
283 free(device);
284 }
285
286 seed_devices = fs_devices->seed;
287 fs_devices->seed = NULL;
288 if (seed_devices) {
289 struct btrfs_fs_devices *orig;
290
291 orig = fs_devices;
292 fs_devices = seed_devices;
293 list_del(&orig->list);
294 free(orig);
295 goto again;
296 } else {
297 list_del(&fs_devices->list);
298 free(fs_devices);
299 }
300
301 return ret;
302}
303
304void btrfs_close_all_devices(void)
305{
306 struct btrfs_fs_devices *fs_devices;
307
308 while (!list_empty(&fs_uuids)) {
309 fs_devices = list_entry(fs_uuids.next, struct btrfs_fs_devices,
310 list);
311 btrfs_close_devices(fs_devices);
312 }
313}
314
315int btrfs_open_devices(struct btrfs_fs_devices *fs_devices)
316{
317 struct btrfs_device *device;
318
319 list_for_each_entry(device, &fs_devices->devices, dev_list) {
320 if (!device->desc || !device->part) {
321 printf("no device found for devid %llu, skip it \n",
322 device->devid);
323 continue;
324 }
325 }
326 return 0;
327}
328
329int btrfs_scan_one_device(struct blk_desc *desc, struct disk_partition *part,
330 struct btrfs_fs_devices **fs_devices_ret,
331 u64 *total_devs)
332{
333 struct btrfs_super_block *disk_super;
334 char buf[BTRFS_SUPER_INFO_SIZE];
335 int ret;
336 u64 devid;
337
338 disk_super = (struct btrfs_super_block *)buf;
339 ret = btrfs_read_dev_super(desc, part, disk_super);
340 if (ret < 0)
341 return -EIO;
342 devid = btrfs_stack_device_id(&disk_super->dev_item);
343 if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_METADUMP)
344 *total_devs = 1;
345 else
346 *total_devs = btrfs_super_num_devices(disk_super);
347
348 ret = device_list_add(disk_super, devid, desc, part, fs_devices_ret);
349
350 return ret;
351}
352
353struct btrfs_device *btrfs_find_device(struct btrfs_fs_info *fs_info, u64 devid,
354 u8 *uuid, u8 *fsid)
355{
356 struct btrfs_device *device;
357 struct btrfs_fs_devices *cur_devices;
358
359 cur_devices = fs_info->fs_devices;
360 while (cur_devices) {
361 if (!fsid ||
362 !memcmp(cur_devices->metadata_uuid, fsid, BTRFS_FSID_SIZE)) {
363 device = find_device(cur_devices, devid, uuid);
364 if (device)
365 return device;
366 }
367 cur_devices = cur_devices->seed;
368 }
369 return NULL;
370}
371
372/*
373 * slot == -1: SYSTEM chunk
374 * return -EIO on error, otherwise return 0
375 */
376int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info,
377 struct extent_buffer *leaf,
378 struct btrfs_chunk *chunk,
379 int slot, u64 logical)
380{
381 u64 length;
382 u64 stripe_len;
383 u16 num_stripes;
384 u16 sub_stripes;
385 u64 type;
386 u32 chunk_ondisk_size;
387 u32 sectorsize = fs_info->sectorsize;
388
389 /*
390 * Basic chunk item size check. Note that btrfs_chunk already contains
391 * one stripe, so no "==" check.
392 */
393 if (slot >= 0 &&
394 btrfs_item_size_nr(leaf, slot) < sizeof(struct btrfs_chunk)) {
395 error("invalid chunk item size, have %u expect [%zu, %zu)",
396 btrfs_item_size_nr(leaf, slot),
397 sizeof(struct btrfs_chunk),
398 BTRFS_LEAF_DATA_SIZE(fs_info));
399 return -EUCLEAN;
400 }
401 length = btrfs_chunk_length(leaf, chunk);
402 stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
403 num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
404 sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
405 type = btrfs_chunk_type(leaf, chunk);
406
407 if (num_stripes == 0) {
408 error("invalid num_stripes, have %u expect non-zero",
409 num_stripes);
410 return -EUCLEAN;
411 }
412 if (slot >= 0 && btrfs_chunk_item_size(num_stripes) !=
413 btrfs_item_size_nr(leaf, slot)) {
414 error("invalid chunk item size, have %u expect %lu",
415 btrfs_item_size_nr(leaf, slot),
416 btrfs_chunk_item_size(num_stripes));
417 return -EUCLEAN;
418 }
419
420 /*
421 * These valid checks may be insufficient to cover every corner cases.
422 */
423 if (!IS_ALIGNED(logical, sectorsize)) {
424 error("invalid chunk logical %llu", logical);
425 return -EIO;
426 }
427 if (btrfs_chunk_sector_size(leaf, chunk) != sectorsize) {
428 error("invalid chunk sectorsize %llu",
429 (unsigned long long)btrfs_chunk_sector_size(leaf, chunk));
430 return -EIO;
431 }
432 if (!length || !IS_ALIGNED(length, sectorsize)) {
433 error("invalid chunk length %llu", length);
434 return -EIO;
435 }
436 if (stripe_len != BTRFS_STRIPE_LEN) {
437 error("invalid chunk stripe length: %llu", stripe_len);
438 return -EIO;
439 }
440 /* Check on chunk item type */
441 if (slot == -1 && (type & BTRFS_BLOCK_GROUP_SYSTEM) == 0) {
442 error("invalid chunk type %llu", type);
443 return -EIO;
444 }
445 if (type & ~(BTRFS_BLOCK_GROUP_TYPE_MASK |
446 BTRFS_BLOCK_GROUP_PROFILE_MASK)) {
447 error("unrecognized chunk type: %llu",
448 ~(BTRFS_BLOCK_GROUP_TYPE_MASK |
449 BTRFS_BLOCK_GROUP_PROFILE_MASK) & type);
450 return -EIO;
451 }
452 if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
453 error("missing chunk type flag: %llu", type);
454 return -EIO;
455 }
456 if (!(is_power_of_2(type & BTRFS_BLOCK_GROUP_PROFILE_MASK) ||
457 (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0)) {
458 error("conflicting chunk type detected: %llu", type);
459 return -EIO;
460 }
461 if ((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) &&
462 !is_power_of_2(type & BTRFS_BLOCK_GROUP_PROFILE_MASK)) {
463 error("conflicting chunk profile detected: %llu", type);
464 return -EIO;
465 }
466
467 chunk_ondisk_size = btrfs_chunk_item_size(num_stripes);
468 /*
469 * Btrfs_chunk contains at least one stripe, and for sys_chunk
470 * it can't exceed the system chunk array size
471 * For normal chunk, it should match its chunk item size.
472 */
473 if (num_stripes < 1 ||
474 (slot == -1 && chunk_ondisk_size > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) ||
475 (slot >= 0 && chunk_ondisk_size > btrfs_item_size_nr(leaf, slot))) {
476 error("invalid num_stripes: %u", num_stripes);
477 return -EIO;
478 }
479 /*
480 * Device number check against profile
481 */
482 if ((type & BTRFS_BLOCK_GROUP_RAID10 && (sub_stripes != 2 ||
483 !IS_ALIGNED(num_stripes, sub_stripes))) ||
484 (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes < 1) ||
485 (type & BTRFS_BLOCK_GROUP_RAID1C3 && num_stripes < 3) ||
486 (type & BTRFS_BLOCK_GROUP_RAID1C4 && num_stripes < 4) ||
487 (type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) ||
488 (type & BTRFS_BLOCK_GROUP_RAID6 && num_stripes < 3) ||
489 (type & BTRFS_BLOCK_GROUP_DUP && num_stripes > 2) ||
490 ((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 &&
491 num_stripes != 1)) {
492 error("Invalid num_stripes:sub_stripes %u:%u for profile %llu",
493 num_stripes, sub_stripes,
494 type & BTRFS_BLOCK_GROUP_PROFILE_MASK);
495 return -EIO;
496 }
497
498 return 0;
499}
500
501/*
502 * Get stripe length from chunk item and its stripe items
503 *
504 * Caller should only call this function after validating the chunk item
505 * by using btrfs_check_chunk_valid().
506 */
507u64 btrfs_stripe_length(struct btrfs_fs_info *fs_info,
508 struct extent_buffer *leaf,
509 struct btrfs_chunk *chunk)
510{
511 u64 stripe_len;
512 u64 chunk_len;
513 u32 num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
514 u64 profile = btrfs_chunk_type(leaf, chunk) &
515 BTRFS_BLOCK_GROUP_PROFILE_MASK;
516
517 chunk_len = btrfs_chunk_length(leaf, chunk);
518
519 switch (profile) {
520 case 0: /* Single profile */
521 case BTRFS_BLOCK_GROUP_RAID1:
522 case BTRFS_BLOCK_GROUP_RAID1C3:
523 case BTRFS_BLOCK_GROUP_RAID1C4:
524 case BTRFS_BLOCK_GROUP_DUP:
525 stripe_len = chunk_len;
526 break;
527 case BTRFS_BLOCK_GROUP_RAID0:
528 stripe_len = chunk_len / num_stripes;
529 break;
530 case BTRFS_BLOCK_GROUP_RAID5:
531 stripe_len = chunk_len / (num_stripes - 1);
532 break;
533 case BTRFS_BLOCK_GROUP_RAID6:
534 stripe_len = chunk_len / (num_stripes - 2);
535 break;
536 case BTRFS_BLOCK_GROUP_RAID10:
537 stripe_len = chunk_len / (num_stripes /
538 btrfs_chunk_sub_stripes(leaf, chunk));
539 break;
540 default:
541 /* Invalid chunk profile found */
542 BUG_ON(1);
543 }
544 return stripe_len;
545}
546
547int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
548{
549 struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
550 struct cache_extent *ce;
551 struct map_lookup *map;
552 int ret;
553
554 ce = search_cache_extent(&map_tree->cache_tree, logical);
555 if (!ce) {
556 fprintf(stderr, "No mapping for %llu-%llu\n",
557 (unsigned long long)logical,
558 (unsigned long long)logical+len);
559 return 1;
560 }
561 if (ce->start > logical || ce->start + ce->size < logical) {
562 fprintf(stderr, "Invalid mapping for %llu-%llu, got "
563 "%llu-%llu\n", (unsigned long long)logical,
564 (unsigned long long)logical+len,
565 (unsigned long long)ce->start,
566 (unsigned long long)ce->start + ce->size);
567 return 1;
568 }
569 map = container_of(ce, struct map_lookup, ce);
570
571 if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
572 BTRFS_BLOCK_GROUP_RAID1C3 | BTRFS_BLOCK_GROUP_RAID1C4))
573 ret = map->num_stripes;
574 else if (map->type & BTRFS_BLOCK_GROUP_RAID10)
575 ret = map->sub_stripes;
576 else if (map->type & BTRFS_BLOCK_GROUP_RAID5)
577 ret = 2;
578 else if (map->type & BTRFS_BLOCK_GROUP_RAID6)
579 ret = 3;
580 else
581 ret = 1;
582 return ret;
583}
584
585int btrfs_next_bg(struct btrfs_fs_info *fs_info, u64 *logical,
586 u64 *size, u64 type)
587{
588 struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
589 struct cache_extent *ce;
590 struct map_lookup *map;
591 u64 cur = *logical;
592
593 ce = search_cache_extent(&map_tree->cache_tree, cur);
594
595 while (ce) {
596 /*
597 * only jump to next bg if our cur is not 0
598 * As the initial logical for btrfs_next_bg() is 0, and
599 * if we jump to next bg, we skipped a valid bg.
600 */
601 if (cur) {
602 ce = next_cache_extent(ce);
603 if (!ce)
604 return -ENOENT;
605 }
606
607 cur = ce->start;
608 map = container_of(ce, struct map_lookup, ce);
609 if (map->type & type) {
610 *logical = ce->start;
611 *size = ce->size;
612 return 0;
613 }
614 if (!cur)
615 ce = next_cache_extent(ce);
616 }
617
618 return -ENOENT;
619}
620
621static inline int parity_smaller(u64 a, u64 b)
622{
623 return a > b;
624}
625
626/* Bubble-sort the stripe set to put the parity/syndrome stripes last */
627static void sort_parity_stripes(struct btrfs_multi_bio *bbio, u64 *raid_map)
628{
629 struct btrfs_bio_stripe s;
630 int i;
631 u64 l;
632 int again = 1;
633
634 while (again) {
635 again = 0;
636 for (i = 0; i < bbio->num_stripes - 1; i++) {
637 if (parity_smaller(raid_map[i], raid_map[i+1])) {
638 s = bbio->stripes[i];
639 l = raid_map[i];
640 bbio->stripes[i] = bbio->stripes[i+1];
641 raid_map[i] = raid_map[i+1];
642 bbio->stripes[i+1] = s;
643 raid_map[i+1] = l;
644 again = 1;
645 }
646 }
647 }
648}
649
650int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
651 u64 logical, u64 *length, u64 *type,
652 struct btrfs_multi_bio **multi_ret, int mirror_num,
653 u64 **raid_map_ret)
654{
655 struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
656 struct cache_extent *ce;
657 struct map_lookup *map;
658 u64 offset;
659 u64 stripe_offset;
660 u64 *raid_map = NULL;
661 int stripe_nr;
662 int stripes_allocated = 8;
663 int stripes_required = 1;
664 int stripe_index;
665 int i;
666 struct btrfs_multi_bio *multi = NULL;
667
668 if (multi_ret && rw == READ) {
669 stripes_allocated = 1;
670 }
671again:
672 ce = search_cache_extent(&map_tree->cache_tree, logical);
673 if (!ce) {
674 kfree(multi);
675 *length = (u64)-1;
676 return -ENOENT;
677 }
678 if (ce->start > logical) {
679 kfree(multi);
680 *length = ce->start - logical;
681 return -ENOENT;
682 }
683
684 if (multi_ret) {
685 multi = kzalloc(btrfs_multi_bio_size(stripes_allocated),
686 GFP_NOFS);
687 if (!multi)
688 return -ENOMEM;
689 }
690 map = container_of(ce, struct map_lookup, ce);
691 offset = logical - ce->start;
692
693 if (rw == WRITE) {
694 if (map->type & (BTRFS_BLOCK_GROUP_RAID1 |
695 BTRFS_BLOCK_GROUP_RAID1C3 |
696 BTRFS_BLOCK_GROUP_RAID1C4 |
697 BTRFS_BLOCK_GROUP_DUP)) {
698 stripes_required = map->num_stripes;
699 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
700 stripes_required = map->sub_stripes;
701 }
702 }
703 if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6)
704 && multi_ret && ((rw & WRITE) || mirror_num > 1) && raid_map_ret) {
705 /* RAID[56] write or recovery. Return all stripes */
706 stripes_required = map->num_stripes;
707
708 /* Only allocate the map if we've already got a large enough multi_ret */
709 if (stripes_allocated >= stripes_required) {
710 raid_map = kmalloc(sizeof(u64) * map->num_stripes, GFP_NOFS);
711 if (!raid_map) {
712 kfree(multi);
713 return -ENOMEM;
714 }
715 }
716 }
717
718 /* if our multi bio struct is too small, back off and try again */
719 if (multi_ret && stripes_allocated < stripes_required) {
720 stripes_allocated = stripes_required;
721 kfree(multi);
722 multi = NULL;
723 goto again;
724 }
725 stripe_nr = offset;
726 /*
727 * stripe_nr counts the total number of stripes we have to stride
728 * to get to this block
729 */
730 stripe_nr = stripe_nr / map->stripe_len;
731
732 stripe_offset = stripe_nr * map->stripe_len;
733 BUG_ON(offset < stripe_offset);
734
735 /* stripe_offset is the offset of this block in its stripe*/
736 stripe_offset = offset - stripe_offset;
737
738 if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 |
739 BTRFS_BLOCK_GROUP_RAID1C3 | BTRFS_BLOCK_GROUP_RAID1C4 |
740 BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 |
741 BTRFS_BLOCK_GROUP_RAID10 |
742 BTRFS_BLOCK_GROUP_DUP)) {
743 /* we limit the length of each bio to what fits in a stripe */
744 *length = min_t(u64, ce->size - offset,
745 map->stripe_len - stripe_offset);
746 } else {
747 *length = ce->size - offset;
748 }
749
750 if (!multi_ret)
751 goto out;
752
753 multi->num_stripes = 1;
754 stripe_index = 0;
755 if (map->type & (BTRFS_BLOCK_GROUP_RAID1 |
756 BTRFS_BLOCK_GROUP_RAID1C3 |
757 BTRFS_BLOCK_GROUP_RAID1C4)) {
758 if (rw == WRITE)
759 multi->num_stripes = map->num_stripes;
760 else if (mirror_num)
761 stripe_index = mirror_num - 1;
762 else
763 stripe_index = stripe_nr % map->num_stripes;
764 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
765 int factor = map->num_stripes / map->sub_stripes;
766
767 stripe_index = stripe_nr % factor;
768 stripe_index *= map->sub_stripes;
769
770 if (rw == WRITE)
771 multi->num_stripes = map->sub_stripes;
772 else if (mirror_num)
773 stripe_index += mirror_num - 1;
774
775 stripe_nr = stripe_nr / factor;
776 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
777 if (rw == WRITE)
778 multi->num_stripes = map->num_stripes;
779 else if (mirror_num)
780 stripe_index = mirror_num - 1;
781 } else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
782 BTRFS_BLOCK_GROUP_RAID6)) {
783
784 if (raid_map) {
785 int rot;
786 u64 tmp;
787 u64 raid56_full_stripe_start;
788 u64 full_stripe_len = nr_data_stripes(map) * map->stripe_len;
789
790 /*
791 * align the start of our data stripe in the logical
792 * address space
793 */
794 raid56_full_stripe_start = offset / full_stripe_len;
795 raid56_full_stripe_start *= full_stripe_len;
796
797 /* get the data stripe number */
798 stripe_nr = raid56_full_stripe_start / map->stripe_len;
799 stripe_nr = stripe_nr / nr_data_stripes(map);
800
801 /* Work out the disk rotation on this stripe-set */
802 rot = stripe_nr % map->num_stripes;
803
804 /* Fill in the logical address of each stripe */
805 tmp = stripe_nr * nr_data_stripes(map);
806
807 for (i = 0; i < nr_data_stripes(map); i++)
808 raid_map[(i+rot) % map->num_stripes] =
809 ce->start + (tmp + i) * map->stripe_len;
810
811 raid_map[(i+rot) % map->num_stripes] = BTRFS_RAID5_P_STRIPE;
812 if (map->type & BTRFS_BLOCK_GROUP_RAID6)
813 raid_map[(i+rot+1) % map->num_stripes] = BTRFS_RAID6_Q_STRIPE;
814
815 *length = map->stripe_len;
816 stripe_index = 0;
817 stripe_offset = 0;
818 multi->num_stripes = map->num_stripes;
819 } else {
820 stripe_index = stripe_nr % nr_data_stripes(map);
821 stripe_nr = stripe_nr / nr_data_stripes(map);
822
823 /*
824 * Mirror #0 or #1 means the original data block.
825 * Mirror #2 is RAID5 parity block.
826 * Mirror #3 is RAID6 Q block.
827 */
828 if (mirror_num > 1)
829 stripe_index = nr_data_stripes(map) + mirror_num - 2;
830
831 /* We distribute the parity blocks across stripes */
832 stripe_index = (stripe_nr + stripe_index) % map->num_stripes;
833 }
834 } else {
835 /*
836 * after this do_div call, stripe_nr is the number of stripes
837 * on this device we have to walk to find the data, and
838 * stripe_index is the number of our device in the stripe array
839 */
840 stripe_index = stripe_nr % map->num_stripes;
841 stripe_nr = stripe_nr / map->num_stripes;
842 }
843 BUG_ON(stripe_index >= map->num_stripes);
844
845 for (i = 0; i < multi->num_stripes; i++) {
846 multi->stripes[i].physical =
847 map->stripes[stripe_index].physical + stripe_offset +
848 stripe_nr * map->stripe_len;
849 multi->stripes[i].dev = map->stripes[stripe_index].dev;
850 stripe_index++;
851 }
852 *multi_ret = multi;
853
854 if (type)
855 *type = map->type;
856
857 if (raid_map) {
858 sort_parity_stripes(multi, raid_map);
859 *raid_map_ret = raid_map;
860 }
861out:
862 return 0;
863}
864
865int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
866 u64 logical, u64 *length,
867 struct btrfs_multi_bio **multi_ret, int mirror_num,
868 u64 **raid_map_ret)
869{
870 return __btrfs_map_block(fs_info, rw, logical, length, NULL,
871 multi_ret, mirror_num, raid_map_ret);
872}