]>
Commit | Line | Data |
---|---|---|
b1f0067a QW |
1 | // SPDX-License-Identifier: GPL-2.0+ |
2 | #include <stdlib.h> | |
3 | #include <common.h> | |
4 | #include <fs_internal.h> | |
5 | #include "ctree.h" | |
6 | #include "disk-io.h" | |
7 | #include "volumes.h" | |
57f24f10 | 8 | #include "extent-io.h" |
b1f0067a QW |
9 | |
10 | const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = { | |
11 | [BTRFS_RAID_RAID10] = { | |
12 | .sub_stripes = 2, | |
13 | .dev_stripes = 1, | |
14 | .devs_max = 0, /* 0 == as many as possible */ | |
15 | .devs_min = 4, | |
16 | .tolerated_failures = 1, | |
17 | .devs_increment = 2, | |
18 | .ncopies = 2, | |
19 | .nparity = 0, | |
20 | .raid_name = "raid10", | |
21 | .bg_flag = BTRFS_BLOCK_GROUP_RAID10, | |
22 | }, | |
23 | [BTRFS_RAID_RAID1] = { | |
24 | .sub_stripes = 1, | |
25 | .dev_stripes = 1, | |
26 | .devs_max = 2, | |
27 | .devs_min = 2, | |
28 | .tolerated_failures = 1, | |
29 | .devs_increment = 2, | |
30 | .ncopies = 2, | |
31 | .nparity = 0, | |
32 | .raid_name = "raid1", | |
33 | .bg_flag = BTRFS_BLOCK_GROUP_RAID1, | |
34 | }, | |
35 | [BTRFS_RAID_RAID1C3] = { | |
36 | .sub_stripes = 1, | |
37 | .dev_stripes = 1, | |
38 | .devs_max = 3, | |
39 | .devs_min = 3, | |
40 | .tolerated_failures = 2, | |
41 | .devs_increment = 3, | |
42 | .ncopies = 3, | |
43 | .raid_name = "raid1c3", | |
44 | .bg_flag = BTRFS_BLOCK_GROUP_RAID1C3, | |
45 | }, | |
46 | [BTRFS_RAID_RAID1C4] = { | |
47 | .sub_stripes = 1, | |
48 | .dev_stripes = 1, | |
49 | .devs_max = 4, | |
50 | .devs_min = 4, | |
51 | .tolerated_failures = 3, | |
52 | .devs_increment = 4, | |
53 | .ncopies = 4, | |
54 | .raid_name = "raid1c4", | |
55 | .bg_flag = BTRFS_BLOCK_GROUP_RAID1C4, | |
56 | }, | |
57 | [BTRFS_RAID_DUP] = { | |
58 | .sub_stripes = 1, | |
59 | .dev_stripes = 2, | |
60 | .devs_max = 1, | |
61 | .devs_min = 1, | |
62 | .tolerated_failures = 0, | |
63 | .devs_increment = 1, | |
64 | .ncopies = 2, | |
65 | .nparity = 0, | |
66 | .raid_name = "dup", | |
67 | .bg_flag = BTRFS_BLOCK_GROUP_DUP, | |
68 | }, | |
69 | [BTRFS_RAID_RAID0] = { | |
70 | .sub_stripes = 1, | |
71 | .dev_stripes = 1, | |
72 | .devs_max = 0, | |
73 | .devs_min = 2, | |
74 | .tolerated_failures = 0, | |
75 | .devs_increment = 1, | |
76 | .ncopies = 1, | |
77 | .nparity = 0, | |
78 | .raid_name = "raid0", | |
79 | .bg_flag = BTRFS_BLOCK_GROUP_RAID0, | |
80 | }, | |
81 | [BTRFS_RAID_SINGLE] = { | |
82 | .sub_stripes = 1, | |
83 | .dev_stripes = 1, | |
84 | .devs_max = 1, | |
85 | .devs_min = 1, | |
86 | .tolerated_failures = 0, | |
87 | .devs_increment = 1, | |
88 | .ncopies = 1, | |
89 | .nparity = 0, | |
90 | .raid_name = "single", | |
91 | .bg_flag = 0, | |
92 | }, | |
93 | [BTRFS_RAID_RAID5] = { | |
94 | .sub_stripes = 1, | |
95 | .dev_stripes = 1, | |
96 | .devs_max = 0, | |
97 | .devs_min = 2, | |
98 | .tolerated_failures = 1, | |
99 | .devs_increment = 1, | |
100 | .ncopies = 1, | |
101 | .nparity = 1, | |
102 | .raid_name = "raid5", | |
103 | .bg_flag = BTRFS_BLOCK_GROUP_RAID5, | |
104 | }, | |
105 | [BTRFS_RAID_RAID6] = { | |
106 | .sub_stripes = 1, | |
107 | .dev_stripes = 1, | |
108 | .devs_max = 0, | |
109 | .devs_min = 3, | |
110 | .tolerated_failures = 2, | |
111 | .devs_increment = 1, | |
112 | .ncopies = 1, | |
113 | .nparity = 2, | |
114 | .raid_name = "raid6", | |
115 | .bg_flag = BTRFS_BLOCK_GROUP_RAID6, | |
116 | }, | |
117 | }; | |
118 | ||
119 | struct stripe { | |
120 | struct btrfs_device *dev; | |
121 | u64 physical; | |
122 | }; | |
123 | ||
124 | static inline int nr_parity_stripes(struct map_lookup *map) | |
125 | { | |
126 | if (map->type & BTRFS_BLOCK_GROUP_RAID5) | |
127 | return 1; | |
128 | else if (map->type & BTRFS_BLOCK_GROUP_RAID6) | |
129 | return 2; | |
130 | else | |
131 | return 0; | |
132 | } | |
133 | ||
134 | static inline int nr_data_stripes(struct map_lookup *map) | |
135 | { | |
136 | return map->num_stripes - nr_parity_stripes(map); | |
137 | } | |
138 | ||
139 | #define is_parity_stripe(x) ( ((x) == BTRFS_RAID5_P_STRIPE) || ((x) == BTRFS_RAID6_Q_STRIPE) ) | |
140 | ||
141 | static LIST_HEAD(fs_uuids); | |
142 | ||
143 | /* | |
144 | * Find a device specified by @devid or @uuid in the list of @fs_devices, or | |
145 | * return NULL. | |
146 | * | |
147 | * If devid and uuid are both specified, the match must be exact, otherwise | |
148 | * only devid is used. | |
149 | */ | |
150 | static struct btrfs_device *find_device(struct btrfs_fs_devices *fs_devices, | |
151 | u64 devid, u8 *uuid) | |
152 | { | |
153 | struct list_head *head = &fs_devices->devices; | |
154 | struct btrfs_device *dev; | |
155 | ||
156 | list_for_each_entry(dev, head, dev_list) { | |
157 | if (dev->devid == devid && | |
158 | (!uuid || !memcmp(dev->uuid, uuid, BTRFS_UUID_SIZE))) { | |
159 | return dev; | |
160 | } | |
161 | } | |
162 | return NULL; | |
163 | } | |
164 | ||
165 | static struct btrfs_fs_devices *find_fsid(u8 *fsid, u8 *metadata_uuid) | |
166 | { | |
167 | struct btrfs_fs_devices *fs_devices; | |
168 | ||
169 | list_for_each_entry(fs_devices, &fs_uuids, list) { | |
170 | if (metadata_uuid && (memcmp(fsid, fs_devices->fsid, | |
171 | BTRFS_FSID_SIZE) == 0) && | |
172 | (memcmp(metadata_uuid, fs_devices->metadata_uuid, | |
173 | BTRFS_FSID_SIZE) == 0)) { | |
174 | return fs_devices; | |
175 | } else if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0){ | |
176 | return fs_devices; | |
177 | } | |
178 | } | |
179 | return NULL; | |
180 | } | |
181 | ||
182 | static int device_list_add(struct btrfs_super_block *disk_super, | |
183 | u64 devid, struct blk_desc *desc, | |
184 | struct disk_partition *part, | |
185 | struct btrfs_fs_devices **fs_devices_ret) | |
186 | { | |
187 | struct btrfs_device *device; | |
188 | struct btrfs_fs_devices *fs_devices; | |
189 | u64 found_transid = btrfs_super_generation(disk_super); | |
190 | bool metadata_uuid = (btrfs_super_incompat_flags(disk_super) & | |
191 | BTRFS_FEATURE_INCOMPAT_METADATA_UUID); | |
192 | ||
193 | if (metadata_uuid) | |
194 | fs_devices = find_fsid(disk_super->fsid, | |
195 | disk_super->metadata_uuid); | |
196 | else | |
197 | fs_devices = find_fsid(disk_super->fsid, NULL); | |
198 | ||
199 | if (!fs_devices) { | |
200 | fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS); | |
201 | if (!fs_devices) | |
202 | return -ENOMEM; | |
203 | INIT_LIST_HEAD(&fs_devices->devices); | |
204 | list_add(&fs_devices->list, &fs_uuids); | |
205 | memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE); | |
206 | if (metadata_uuid) | |
207 | memcpy(fs_devices->metadata_uuid, | |
208 | disk_super->metadata_uuid, BTRFS_FSID_SIZE); | |
209 | else | |
210 | memcpy(fs_devices->metadata_uuid, fs_devices->fsid, | |
211 | BTRFS_FSID_SIZE); | |
212 | ||
213 | fs_devices->latest_devid = devid; | |
214 | fs_devices->latest_trans = found_transid; | |
215 | fs_devices->lowest_devid = (u64)-1; | |
216 | device = NULL; | |
217 | } else { | |
218 | device = find_device(fs_devices, devid, | |
219 | disk_super->dev_item.uuid); | |
220 | } | |
221 | if (!device) { | |
222 | device = kzalloc(sizeof(*device), GFP_NOFS); | |
223 | if (!device) { | |
224 | /* we can safely leave the fs_devices entry around */ | |
225 | return -ENOMEM; | |
226 | } | |
227 | device->devid = devid; | |
228 | device->desc = desc; | |
229 | device->part = part; | |
230 | device->generation = found_transid; | |
231 | memcpy(device->uuid, disk_super->dev_item.uuid, | |
232 | BTRFS_UUID_SIZE); | |
233 | device->total_devs = btrfs_super_num_devices(disk_super); | |
234 | device->super_bytes_used = btrfs_super_bytes_used(disk_super); | |
235 | device->total_bytes = | |
236 | btrfs_stack_device_total_bytes(&disk_super->dev_item); | |
237 | device->bytes_used = | |
238 | btrfs_stack_device_bytes_used(&disk_super->dev_item); | |
239 | list_add(&device->dev_list, &fs_devices->devices); | |
240 | device->fs_devices = fs_devices; | |
241 | } else if (!device->desc || !device->part) { | |
242 | /* | |
243 | * The existing device has newer generation, so this one could | |
244 | * be a stale one, don't add it. | |
245 | */ | |
246 | if (found_transid < device->generation) { | |
247 | error( | |
248 | "adding devid %llu gen %llu but found an existing device gen %llu", | |
249 | device->devid, found_transid, | |
250 | device->generation); | |
251 | return -EEXIST; | |
252 | } else { | |
253 | device->desc = desc; | |
254 | device->part = part; | |
255 | } | |
256 | } | |
257 | ||
258 | ||
259 | if (found_transid > fs_devices->latest_trans) { | |
260 | fs_devices->latest_devid = devid; | |
261 | fs_devices->latest_trans = found_transid; | |
262 | } | |
263 | if (fs_devices->lowest_devid > devid) { | |
264 | fs_devices->lowest_devid = devid; | |
265 | } | |
266 | *fs_devices_ret = fs_devices; | |
267 | return 0; | |
268 | } | |
269 | ||
270 | int btrfs_close_devices(struct btrfs_fs_devices *fs_devices) | |
271 | { | |
272 | struct btrfs_fs_devices *seed_devices; | |
273 | struct btrfs_device *device; | |
274 | int ret = 0; | |
275 | ||
276 | again: | |
277 | if (!fs_devices) | |
278 | return 0; | |
279 | while (!list_empty(&fs_devices->devices)) { | |
280 | device = list_entry(fs_devices->devices.next, | |
281 | struct btrfs_device, dev_list); | |
282 | list_del(&device->dev_list); | |
283 | /* free the memory */ | |
284 | free(device); | |
285 | } | |
286 | ||
287 | seed_devices = fs_devices->seed; | |
288 | fs_devices->seed = NULL; | |
289 | if (seed_devices) { | |
290 | struct btrfs_fs_devices *orig; | |
291 | ||
292 | orig = fs_devices; | |
293 | fs_devices = seed_devices; | |
294 | list_del(&orig->list); | |
295 | free(orig); | |
296 | goto again; | |
297 | } else { | |
298 | list_del(&fs_devices->list); | |
299 | free(fs_devices); | |
300 | } | |
301 | ||
302 | return ret; | |
303 | } | |
304 | ||
305 | void btrfs_close_all_devices(void) | |
306 | { | |
307 | struct btrfs_fs_devices *fs_devices; | |
308 | ||
309 | while (!list_empty(&fs_uuids)) { | |
310 | fs_devices = list_entry(fs_uuids.next, struct btrfs_fs_devices, | |
311 | list); | |
312 | btrfs_close_devices(fs_devices); | |
313 | } | |
314 | } | |
315 | ||
316 | int btrfs_open_devices(struct btrfs_fs_devices *fs_devices) | |
317 | { | |
318 | struct btrfs_device *device; | |
319 | ||
320 | list_for_each_entry(device, &fs_devices->devices, dev_list) { | |
321 | if (!device->desc || !device->part) { | |
322 | printf("no device found for devid %llu, skip it \n", | |
323 | device->devid); | |
324 | continue; | |
325 | } | |
326 | } | |
327 | return 0; | |
328 | } | |
329 | ||
330 | int btrfs_scan_one_device(struct blk_desc *desc, struct disk_partition *part, | |
331 | struct btrfs_fs_devices **fs_devices_ret, | |
332 | u64 *total_devs) | |
333 | { | |
334 | struct btrfs_super_block *disk_super; | |
335 | char buf[BTRFS_SUPER_INFO_SIZE]; | |
336 | int ret; | |
337 | u64 devid; | |
338 | ||
339 | disk_super = (struct btrfs_super_block *)buf; | |
340 | ret = btrfs_read_dev_super(desc, part, disk_super); | |
341 | if (ret < 0) | |
342 | return -EIO; | |
343 | devid = btrfs_stack_device_id(&disk_super->dev_item); | |
344 | if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_METADUMP) | |
345 | *total_devs = 1; | |
346 | else | |
347 | *total_devs = btrfs_super_num_devices(disk_super); | |
348 | ||
349 | ret = device_list_add(disk_super, devid, desc, part, fs_devices_ret); | |
350 | ||
351 | return ret; | |
352 | } | |
353 | ||
354 | struct btrfs_device *btrfs_find_device(struct btrfs_fs_info *fs_info, u64 devid, | |
355 | u8 *uuid, u8 *fsid) | |
356 | { | |
357 | struct btrfs_device *device; | |
358 | struct btrfs_fs_devices *cur_devices; | |
359 | ||
360 | cur_devices = fs_info->fs_devices; | |
361 | while (cur_devices) { | |
362 | if (!fsid || | |
363 | !memcmp(cur_devices->metadata_uuid, fsid, BTRFS_FSID_SIZE)) { | |
364 | device = find_device(cur_devices, devid, uuid); | |
365 | if (device) | |
366 | return device; | |
367 | } | |
368 | cur_devices = cur_devices->seed; | |
369 | } | |
370 | return NULL; | |
371 | } | |
372 | ||
57f24f10 QW |
373 | static struct btrfs_device *fill_missing_device(u64 devid) |
374 | { | |
375 | struct btrfs_device *device; | |
376 | ||
377 | device = kzalloc(sizeof(*device), GFP_NOFS); | |
378 | return device; | |
379 | } | |
380 | ||
b1f0067a QW |
381 | /* |
382 | * slot == -1: SYSTEM chunk | |
383 | * return -EIO on error, otherwise return 0 | |
384 | */ | |
385 | int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info, | |
386 | struct extent_buffer *leaf, | |
387 | struct btrfs_chunk *chunk, | |
388 | int slot, u64 logical) | |
389 | { | |
390 | u64 length; | |
391 | u64 stripe_len; | |
392 | u16 num_stripes; | |
393 | u16 sub_stripes; | |
394 | u64 type; | |
395 | u32 chunk_ondisk_size; | |
396 | u32 sectorsize = fs_info->sectorsize; | |
397 | ||
398 | /* | |
399 | * Basic chunk item size check. Note that btrfs_chunk already contains | |
400 | * one stripe, so no "==" check. | |
401 | */ | |
402 | if (slot >= 0 && | |
403 | btrfs_item_size_nr(leaf, slot) < sizeof(struct btrfs_chunk)) { | |
404 | error("invalid chunk item size, have %u expect [%zu, %zu)", | |
405 | btrfs_item_size_nr(leaf, slot), | |
406 | sizeof(struct btrfs_chunk), | |
407 | BTRFS_LEAF_DATA_SIZE(fs_info)); | |
408 | return -EUCLEAN; | |
409 | } | |
410 | length = btrfs_chunk_length(leaf, chunk); | |
411 | stripe_len = btrfs_chunk_stripe_len(leaf, chunk); | |
412 | num_stripes = btrfs_chunk_num_stripes(leaf, chunk); | |
413 | sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk); | |
414 | type = btrfs_chunk_type(leaf, chunk); | |
415 | ||
416 | if (num_stripes == 0) { | |
417 | error("invalid num_stripes, have %u expect non-zero", | |
418 | num_stripes); | |
419 | return -EUCLEAN; | |
420 | } | |
421 | if (slot >= 0 && btrfs_chunk_item_size(num_stripes) != | |
422 | btrfs_item_size_nr(leaf, slot)) { | |
423 | error("invalid chunk item size, have %u expect %lu", | |
424 | btrfs_item_size_nr(leaf, slot), | |
425 | btrfs_chunk_item_size(num_stripes)); | |
426 | return -EUCLEAN; | |
427 | } | |
428 | ||
429 | /* | |
430 | * These valid checks may be insufficient to cover every corner cases. | |
431 | */ | |
432 | if (!IS_ALIGNED(logical, sectorsize)) { | |
433 | error("invalid chunk logical %llu", logical); | |
434 | return -EIO; | |
435 | } | |
436 | if (btrfs_chunk_sector_size(leaf, chunk) != sectorsize) { | |
437 | error("invalid chunk sectorsize %llu", | |
438 | (unsigned long long)btrfs_chunk_sector_size(leaf, chunk)); | |
439 | return -EIO; | |
440 | } | |
441 | if (!length || !IS_ALIGNED(length, sectorsize)) { | |
442 | error("invalid chunk length %llu", length); | |
443 | return -EIO; | |
444 | } | |
445 | if (stripe_len != BTRFS_STRIPE_LEN) { | |
446 | error("invalid chunk stripe length: %llu", stripe_len); | |
447 | return -EIO; | |
448 | } | |
449 | /* Check on chunk item type */ | |
450 | if (slot == -1 && (type & BTRFS_BLOCK_GROUP_SYSTEM) == 0) { | |
451 | error("invalid chunk type %llu", type); | |
452 | return -EIO; | |
453 | } | |
454 | if (type & ~(BTRFS_BLOCK_GROUP_TYPE_MASK | | |
455 | BTRFS_BLOCK_GROUP_PROFILE_MASK)) { | |
456 | error("unrecognized chunk type: %llu", | |
457 | ~(BTRFS_BLOCK_GROUP_TYPE_MASK | | |
458 | BTRFS_BLOCK_GROUP_PROFILE_MASK) & type); | |
459 | return -EIO; | |
460 | } | |
461 | if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) { | |
462 | error("missing chunk type flag: %llu", type); | |
463 | return -EIO; | |
464 | } | |
465 | if (!(is_power_of_2(type & BTRFS_BLOCK_GROUP_PROFILE_MASK) || | |
466 | (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0)) { | |
467 | error("conflicting chunk type detected: %llu", type); | |
468 | return -EIO; | |
469 | } | |
470 | if ((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) && | |
471 | !is_power_of_2(type & BTRFS_BLOCK_GROUP_PROFILE_MASK)) { | |
472 | error("conflicting chunk profile detected: %llu", type); | |
473 | return -EIO; | |
474 | } | |
475 | ||
476 | chunk_ondisk_size = btrfs_chunk_item_size(num_stripes); | |
477 | /* | |
478 | * Btrfs_chunk contains at least one stripe, and for sys_chunk | |
479 | * it can't exceed the system chunk array size | |
480 | * For normal chunk, it should match its chunk item size. | |
481 | */ | |
482 | if (num_stripes < 1 || | |
483 | (slot == -1 && chunk_ondisk_size > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) || | |
484 | (slot >= 0 && chunk_ondisk_size > btrfs_item_size_nr(leaf, slot))) { | |
485 | error("invalid num_stripes: %u", num_stripes); | |
486 | return -EIO; | |
487 | } | |
488 | /* | |
489 | * Device number check against profile | |
490 | */ | |
491 | if ((type & BTRFS_BLOCK_GROUP_RAID10 && (sub_stripes != 2 || | |
492 | !IS_ALIGNED(num_stripes, sub_stripes))) || | |
493 | (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes < 1) || | |
494 | (type & BTRFS_BLOCK_GROUP_RAID1C3 && num_stripes < 3) || | |
495 | (type & BTRFS_BLOCK_GROUP_RAID1C4 && num_stripes < 4) || | |
496 | (type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) || | |
497 | (type & BTRFS_BLOCK_GROUP_RAID6 && num_stripes < 3) || | |
498 | (type & BTRFS_BLOCK_GROUP_DUP && num_stripes > 2) || | |
499 | ((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 && | |
500 | num_stripes != 1)) { | |
501 | error("Invalid num_stripes:sub_stripes %u:%u for profile %llu", | |
502 | num_stripes, sub_stripes, | |
503 | type & BTRFS_BLOCK_GROUP_PROFILE_MASK); | |
504 | return -EIO; | |
505 | } | |
506 | ||
507 | return 0; | |
508 | } | |
509 | ||
57f24f10 QW |
510 | /* |
511 | * Slot is used to verify the chunk item is valid | |
512 | * | |
513 | * For sys chunk in superblock, pass -1 to indicate sys chunk. | |
514 | */ | |
515 | static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key, | |
516 | struct extent_buffer *leaf, | |
517 | struct btrfs_chunk *chunk, int slot) | |
518 | { | |
519 | struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; | |
520 | struct map_lookup *map; | |
521 | struct cache_extent *ce; | |
522 | u64 logical; | |
523 | u64 length; | |
524 | u64 devid; | |
525 | u8 uuid[BTRFS_UUID_SIZE]; | |
526 | int num_stripes; | |
527 | int ret; | |
528 | int i; | |
529 | ||
530 | logical = key->offset; | |
531 | length = btrfs_chunk_length(leaf, chunk); | |
532 | num_stripes = btrfs_chunk_num_stripes(leaf, chunk); | |
533 | /* Validation check */ | |
534 | ret = btrfs_check_chunk_valid(fs_info, leaf, chunk, slot, logical); | |
535 | if (ret) { | |
536 | error("%s checksums match, but it has an invalid chunk, %s", | |
537 | (slot == -1) ? "Superblock" : "Metadata", | |
538 | (slot == -1) ? "try btrfsck --repair -s <superblock> ie, 0,1,2" : ""); | |
539 | return ret; | |
540 | } | |
541 | ||
542 | ce = search_cache_extent(&map_tree->cache_tree, logical); | |
543 | ||
544 | /* already mapped? */ | |
545 | if (ce && ce->start <= logical && ce->start + ce->size > logical) { | |
546 | return 0; | |
547 | } | |
548 | ||
549 | map = kmalloc(btrfs_map_lookup_size(num_stripes), GFP_NOFS); | |
550 | if (!map) | |
551 | return -ENOMEM; | |
552 | ||
553 | map->ce.start = logical; | |
554 | map->ce.size = length; | |
555 | map->num_stripes = num_stripes; | |
556 | map->io_width = btrfs_chunk_io_width(leaf, chunk); | |
557 | map->io_align = btrfs_chunk_io_align(leaf, chunk); | |
558 | map->sector_size = btrfs_chunk_sector_size(leaf, chunk); | |
559 | map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk); | |
560 | map->type = btrfs_chunk_type(leaf, chunk); | |
561 | map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk); | |
562 | ||
563 | for (i = 0; i < num_stripes; i++) { | |
564 | map->stripes[i].physical = | |
565 | btrfs_stripe_offset_nr(leaf, chunk, i); | |
566 | devid = btrfs_stripe_devid_nr(leaf, chunk, i); | |
567 | read_extent_buffer(leaf, uuid, (unsigned long) | |
568 | btrfs_stripe_dev_uuid_nr(chunk, i), | |
569 | BTRFS_UUID_SIZE); | |
570 | map->stripes[i].dev = btrfs_find_device(fs_info, devid, uuid, | |
571 | NULL); | |
572 | if (!map->stripes[i].dev) { | |
573 | map->stripes[i].dev = fill_missing_device(devid); | |
574 | printf("warning, device %llu is missing\n", | |
575 | (unsigned long long)devid); | |
576 | list_add(&map->stripes[i].dev->dev_list, | |
577 | &fs_info->fs_devices->devices); | |
578 | } | |
579 | ||
580 | } | |
581 | ret = insert_cache_extent(&map_tree->cache_tree, &map->ce); | |
582 | if (ret < 0) { | |
583 | errno = -ret; | |
584 | error("failed to add chunk map start=%llu len=%llu: %d (%m)", | |
585 | map->ce.start, map->ce.size, ret); | |
586 | } | |
587 | ||
588 | return ret; | |
589 | } | |
590 | ||
591 | static int fill_device_from_item(struct extent_buffer *leaf, | |
592 | struct btrfs_dev_item *dev_item, | |
593 | struct btrfs_device *device) | |
594 | { | |
595 | unsigned long ptr; | |
596 | ||
597 | device->devid = btrfs_device_id(leaf, dev_item); | |
598 | device->total_bytes = btrfs_device_total_bytes(leaf, dev_item); | |
599 | device->bytes_used = btrfs_device_bytes_used(leaf, dev_item); | |
600 | device->type = btrfs_device_type(leaf, dev_item); | |
601 | device->io_align = btrfs_device_io_align(leaf, dev_item); | |
602 | device->io_width = btrfs_device_io_width(leaf, dev_item); | |
603 | device->sector_size = btrfs_device_sector_size(leaf, dev_item); | |
604 | ||
605 | ptr = (unsigned long)btrfs_device_uuid(dev_item); | |
606 | read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE); | |
607 | ||
608 | return 0; | |
609 | } | |
610 | ||
611 | static int read_one_dev(struct btrfs_fs_info *fs_info, | |
612 | struct extent_buffer *leaf, | |
613 | struct btrfs_dev_item *dev_item) | |
614 | { | |
615 | struct btrfs_device *device; | |
616 | u64 devid; | |
617 | int ret = 0; | |
618 | u8 fs_uuid[BTRFS_UUID_SIZE]; | |
619 | u8 dev_uuid[BTRFS_UUID_SIZE]; | |
620 | ||
621 | devid = btrfs_device_id(leaf, dev_item); | |
622 | read_extent_buffer(leaf, dev_uuid, | |
623 | (unsigned long)btrfs_device_uuid(dev_item), | |
624 | BTRFS_UUID_SIZE); | |
625 | read_extent_buffer(leaf, fs_uuid, | |
626 | (unsigned long)btrfs_device_fsid(dev_item), | |
627 | BTRFS_FSID_SIZE); | |
628 | ||
629 | if (memcmp(fs_uuid, fs_info->fs_devices->fsid, BTRFS_UUID_SIZE)) { | |
630 | error("Seed device is not yet supported\n"); | |
631 | return -ENOTSUPP; | |
632 | } | |
633 | ||
634 | device = btrfs_find_device(fs_info, devid, dev_uuid, fs_uuid); | |
635 | if (!device) { | |
636 | device = kzalloc(sizeof(*device), GFP_NOFS); | |
637 | if (!device) | |
638 | return -ENOMEM; | |
639 | list_add(&device->dev_list, | |
640 | &fs_info->fs_devices->devices); | |
641 | } | |
642 | ||
643 | fill_device_from_item(leaf, dev_item, device); | |
644 | fs_info->fs_devices->total_rw_bytes += | |
645 | btrfs_device_total_bytes(leaf, dev_item); | |
646 | return ret; | |
647 | } | |
648 | ||
649 | int btrfs_read_sys_array(struct btrfs_fs_info *fs_info) | |
650 | { | |
651 | struct btrfs_super_block *super_copy = fs_info->super_copy; | |
652 | struct extent_buffer *sb; | |
653 | struct btrfs_disk_key *disk_key; | |
654 | struct btrfs_chunk *chunk; | |
655 | u8 *array_ptr; | |
656 | unsigned long sb_array_offset; | |
657 | int ret = 0; | |
658 | u32 num_stripes; | |
659 | u32 array_size; | |
660 | u32 len = 0; | |
661 | u32 cur_offset; | |
662 | struct btrfs_key key; | |
663 | ||
664 | if (fs_info->nodesize < BTRFS_SUPER_INFO_SIZE) { | |
665 | printf("ERROR: nodesize %u too small to read superblock\n", | |
666 | fs_info->nodesize); | |
667 | return -EINVAL; | |
668 | } | |
669 | sb = alloc_dummy_extent_buffer(fs_info, BTRFS_SUPER_INFO_OFFSET, | |
670 | BTRFS_SUPER_INFO_SIZE); | |
671 | if (!sb) | |
672 | return -ENOMEM; | |
673 | btrfs_set_buffer_uptodate(sb); | |
674 | write_extent_buffer(sb, super_copy, 0, sizeof(*super_copy)); | |
675 | array_size = btrfs_super_sys_array_size(super_copy); | |
676 | ||
677 | array_ptr = super_copy->sys_chunk_array; | |
678 | sb_array_offset = offsetof(struct btrfs_super_block, sys_chunk_array); | |
679 | cur_offset = 0; | |
680 | ||
681 | while (cur_offset < array_size) { | |
682 | disk_key = (struct btrfs_disk_key *)array_ptr; | |
683 | len = sizeof(*disk_key); | |
684 | if (cur_offset + len > array_size) | |
685 | goto out_short_read; | |
686 | ||
687 | btrfs_disk_key_to_cpu(&key, disk_key); | |
688 | ||
689 | array_ptr += len; | |
690 | sb_array_offset += len; | |
691 | cur_offset += len; | |
692 | ||
693 | if (key.type == BTRFS_CHUNK_ITEM_KEY) { | |
694 | chunk = (struct btrfs_chunk *)sb_array_offset; | |
695 | /* | |
696 | * At least one btrfs_chunk with one stripe must be | |
697 | * present, exact stripe count check comes afterwards | |
698 | */ | |
699 | len = btrfs_chunk_item_size(1); | |
700 | if (cur_offset + len > array_size) | |
701 | goto out_short_read; | |
702 | ||
703 | num_stripes = btrfs_chunk_num_stripes(sb, chunk); | |
704 | if (!num_stripes) { | |
705 | printk( | |
706 | "ERROR: invalid number of stripes %u in sys_array at offset %u\n", | |
707 | num_stripes, cur_offset); | |
708 | ret = -EIO; | |
709 | break; | |
710 | } | |
711 | ||
712 | len = btrfs_chunk_item_size(num_stripes); | |
713 | if (cur_offset + len > array_size) | |
714 | goto out_short_read; | |
715 | ||
716 | ret = read_one_chunk(fs_info, &key, sb, chunk, -1); | |
717 | if (ret) | |
718 | break; | |
719 | } else { | |
720 | printk( | |
721 | "ERROR: unexpected item type %u in sys_array at offset %u\n", | |
722 | (u32)key.type, cur_offset); | |
723 | ret = -EIO; | |
724 | break; | |
725 | } | |
726 | array_ptr += len; | |
727 | sb_array_offset += len; | |
728 | cur_offset += len; | |
729 | } | |
730 | free_extent_buffer(sb); | |
731 | return ret; | |
732 | ||
733 | out_short_read: | |
734 | printk("ERROR: sys_array too short to read %u bytes at offset %u\n", | |
735 | len, cur_offset); | |
736 | free_extent_buffer(sb); | |
737 | return -EIO; | |
738 | } | |
739 | ||
740 | int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info) | |
741 | { | |
742 | struct btrfs_path *path; | |
743 | struct extent_buffer *leaf; | |
744 | struct btrfs_key key; | |
745 | struct btrfs_key found_key; | |
746 | struct btrfs_root *root = fs_info->chunk_root; | |
747 | int ret; | |
748 | int slot; | |
749 | ||
750 | path = btrfs_alloc_path(); | |
751 | if (!path) | |
752 | return -ENOMEM; | |
753 | ||
754 | /* | |
755 | * Read all device items, and then all the chunk items. All | |
756 | * device items are found before any chunk item (their object id | |
757 | * is smaller than the lowest possible object id for a chunk | |
758 | * item - BTRFS_FIRST_CHUNK_TREE_OBJECTID). | |
759 | */ | |
760 | key.objectid = BTRFS_DEV_ITEMS_OBJECTID; | |
761 | key.offset = 0; | |
762 | key.type = 0; | |
763 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | |
764 | if (ret < 0) | |
765 | goto error; | |
766 | while(1) { | |
767 | leaf = path->nodes[0]; | |
768 | slot = path->slots[0]; | |
769 | if (slot >= btrfs_header_nritems(leaf)) { | |
770 | ret = btrfs_next_leaf(root, path); | |
771 | if (ret == 0) | |
772 | continue; | |
773 | if (ret < 0) | |
774 | goto error; | |
775 | break; | |
776 | } | |
777 | btrfs_item_key_to_cpu(leaf, &found_key, slot); | |
778 | if (found_key.type == BTRFS_DEV_ITEM_KEY) { | |
779 | struct btrfs_dev_item *dev_item; | |
780 | dev_item = btrfs_item_ptr(leaf, slot, | |
781 | struct btrfs_dev_item); | |
782 | ret = read_one_dev(fs_info, leaf, dev_item); | |
783 | if (ret < 0) | |
784 | goto error; | |
785 | } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) { | |
786 | struct btrfs_chunk *chunk; | |
787 | chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); | |
788 | ret = read_one_chunk(fs_info, &found_key, leaf, chunk, | |
789 | slot); | |
790 | if (ret < 0) | |
791 | goto error; | |
792 | } | |
793 | path->slots[0]++; | |
794 | } | |
795 | ||
796 | ret = 0; | |
797 | error: | |
798 | btrfs_free_path(path); | |
799 | return ret; | |
800 | } | |
801 | ||
b1f0067a QW |
802 | /* |
803 | * Get stripe length from chunk item and its stripe items | |
804 | * | |
805 | * Caller should only call this function after validating the chunk item | |
806 | * by using btrfs_check_chunk_valid(). | |
807 | */ | |
808 | u64 btrfs_stripe_length(struct btrfs_fs_info *fs_info, | |
809 | struct extent_buffer *leaf, | |
810 | struct btrfs_chunk *chunk) | |
811 | { | |
812 | u64 stripe_len; | |
813 | u64 chunk_len; | |
814 | u32 num_stripes = btrfs_chunk_num_stripes(leaf, chunk); | |
815 | u64 profile = btrfs_chunk_type(leaf, chunk) & | |
816 | BTRFS_BLOCK_GROUP_PROFILE_MASK; | |
817 | ||
818 | chunk_len = btrfs_chunk_length(leaf, chunk); | |
819 | ||
820 | switch (profile) { | |
821 | case 0: /* Single profile */ | |
822 | case BTRFS_BLOCK_GROUP_RAID1: | |
823 | case BTRFS_BLOCK_GROUP_RAID1C3: | |
824 | case BTRFS_BLOCK_GROUP_RAID1C4: | |
825 | case BTRFS_BLOCK_GROUP_DUP: | |
826 | stripe_len = chunk_len; | |
827 | break; | |
828 | case BTRFS_BLOCK_GROUP_RAID0: | |
829 | stripe_len = chunk_len / num_stripes; | |
830 | break; | |
831 | case BTRFS_BLOCK_GROUP_RAID5: | |
832 | stripe_len = chunk_len / (num_stripes - 1); | |
833 | break; | |
834 | case BTRFS_BLOCK_GROUP_RAID6: | |
835 | stripe_len = chunk_len / (num_stripes - 2); | |
836 | break; | |
837 | case BTRFS_BLOCK_GROUP_RAID10: | |
838 | stripe_len = chunk_len / (num_stripes / | |
839 | btrfs_chunk_sub_stripes(leaf, chunk)); | |
840 | break; | |
841 | default: | |
842 | /* Invalid chunk profile found */ | |
843 | BUG_ON(1); | |
844 | } | |
845 | return stripe_len; | |
846 | } | |
847 | ||
848 | int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len) | |
849 | { | |
850 | struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; | |
851 | struct cache_extent *ce; | |
852 | struct map_lookup *map; | |
853 | int ret; | |
854 | ||
855 | ce = search_cache_extent(&map_tree->cache_tree, logical); | |
856 | if (!ce) { | |
857 | fprintf(stderr, "No mapping for %llu-%llu\n", | |
858 | (unsigned long long)logical, | |
859 | (unsigned long long)logical+len); | |
860 | return 1; | |
861 | } | |
862 | if (ce->start > logical || ce->start + ce->size < logical) { | |
863 | fprintf(stderr, "Invalid mapping for %llu-%llu, got " | |
864 | "%llu-%llu\n", (unsigned long long)logical, | |
865 | (unsigned long long)logical+len, | |
866 | (unsigned long long)ce->start, | |
867 | (unsigned long long)ce->start + ce->size); | |
868 | return 1; | |
869 | } | |
870 | map = container_of(ce, struct map_lookup, ce); | |
871 | ||
872 | if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 | | |
873 | BTRFS_BLOCK_GROUP_RAID1C3 | BTRFS_BLOCK_GROUP_RAID1C4)) | |
874 | ret = map->num_stripes; | |
875 | else if (map->type & BTRFS_BLOCK_GROUP_RAID10) | |
876 | ret = map->sub_stripes; | |
877 | else if (map->type & BTRFS_BLOCK_GROUP_RAID5) | |
878 | ret = 2; | |
879 | else if (map->type & BTRFS_BLOCK_GROUP_RAID6) | |
880 | ret = 3; | |
881 | else | |
882 | ret = 1; | |
883 | return ret; | |
884 | } | |
885 | ||
886 | int btrfs_next_bg(struct btrfs_fs_info *fs_info, u64 *logical, | |
887 | u64 *size, u64 type) | |
888 | { | |
889 | struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; | |
890 | struct cache_extent *ce; | |
891 | struct map_lookup *map; | |
892 | u64 cur = *logical; | |
893 | ||
894 | ce = search_cache_extent(&map_tree->cache_tree, cur); | |
895 | ||
896 | while (ce) { | |
897 | /* | |
898 | * only jump to next bg if our cur is not 0 | |
899 | * As the initial logical for btrfs_next_bg() is 0, and | |
900 | * if we jump to next bg, we skipped a valid bg. | |
901 | */ | |
902 | if (cur) { | |
903 | ce = next_cache_extent(ce); | |
904 | if (!ce) | |
905 | return -ENOENT; | |
906 | } | |
907 | ||
908 | cur = ce->start; | |
909 | map = container_of(ce, struct map_lookup, ce); | |
910 | if (map->type & type) { | |
911 | *logical = ce->start; | |
912 | *size = ce->size; | |
913 | return 0; | |
914 | } | |
915 | if (!cur) | |
916 | ce = next_cache_extent(ce); | |
917 | } | |
918 | ||
919 | return -ENOENT; | |
920 | } | |
921 | ||
922 | static inline int parity_smaller(u64 a, u64 b) | |
923 | { | |
924 | return a > b; | |
925 | } | |
926 | ||
927 | /* Bubble-sort the stripe set to put the parity/syndrome stripes last */ | |
928 | static void sort_parity_stripes(struct btrfs_multi_bio *bbio, u64 *raid_map) | |
929 | { | |
930 | struct btrfs_bio_stripe s; | |
931 | int i; | |
932 | u64 l; | |
933 | int again = 1; | |
934 | ||
935 | while (again) { | |
936 | again = 0; | |
937 | for (i = 0; i < bbio->num_stripes - 1; i++) { | |
938 | if (parity_smaller(raid_map[i], raid_map[i+1])) { | |
939 | s = bbio->stripes[i]; | |
940 | l = raid_map[i]; | |
941 | bbio->stripes[i] = bbio->stripes[i+1]; | |
942 | raid_map[i] = raid_map[i+1]; | |
943 | bbio->stripes[i+1] = s; | |
944 | raid_map[i+1] = l; | |
945 | again = 1; | |
946 | } | |
947 | } | |
948 | } | |
949 | } | |
950 | ||
951 | int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |
952 | u64 logical, u64 *length, u64 *type, | |
953 | struct btrfs_multi_bio **multi_ret, int mirror_num, | |
954 | u64 **raid_map_ret) | |
955 | { | |
956 | struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; | |
957 | struct cache_extent *ce; | |
958 | struct map_lookup *map; | |
511a1303 | 959 | u64 orig_len = *length; |
b1f0067a QW |
960 | u64 offset; |
961 | u64 stripe_offset; | |
962 | u64 *raid_map = NULL; | |
963 | int stripe_nr; | |
964 | int stripes_allocated = 8; | |
965 | int stripes_required = 1; | |
966 | int stripe_index; | |
967 | int i; | |
968 | struct btrfs_multi_bio *multi = NULL; | |
969 | ||
970 | if (multi_ret && rw == READ) { | |
971 | stripes_allocated = 1; | |
972 | } | |
973 | again: | |
974 | ce = search_cache_extent(&map_tree->cache_tree, logical); | |
975 | if (!ce) { | |
976 | kfree(multi); | |
977 | *length = (u64)-1; | |
978 | return -ENOENT; | |
979 | } | |
980 | if (ce->start > logical) { | |
981 | kfree(multi); | |
982 | *length = ce->start - logical; | |
983 | return -ENOENT; | |
984 | } | |
985 | ||
986 | if (multi_ret) { | |
987 | multi = kzalloc(btrfs_multi_bio_size(stripes_allocated), | |
988 | GFP_NOFS); | |
989 | if (!multi) | |
990 | return -ENOMEM; | |
991 | } | |
992 | map = container_of(ce, struct map_lookup, ce); | |
993 | offset = logical - ce->start; | |
994 | ||
995 | if (rw == WRITE) { | |
996 | if (map->type & (BTRFS_BLOCK_GROUP_RAID1 | | |
997 | BTRFS_BLOCK_GROUP_RAID1C3 | | |
998 | BTRFS_BLOCK_GROUP_RAID1C4 | | |
999 | BTRFS_BLOCK_GROUP_DUP)) { | |
1000 | stripes_required = map->num_stripes; | |
1001 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) { | |
1002 | stripes_required = map->sub_stripes; | |
1003 | } | |
1004 | } | |
1005 | if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6) | |
1006 | && multi_ret && ((rw & WRITE) || mirror_num > 1) && raid_map_ret) { | |
1007 | /* RAID[56] write or recovery. Return all stripes */ | |
1008 | stripes_required = map->num_stripes; | |
1009 | ||
1010 | /* Only allocate the map if we've already got a large enough multi_ret */ | |
1011 | if (stripes_allocated >= stripes_required) { | |
1012 | raid_map = kmalloc(sizeof(u64) * map->num_stripes, GFP_NOFS); | |
1013 | if (!raid_map) { | |
1014 | kfree(multi); | |
1015 | return -ENOMEM; | |
1016 | } | |
1017 | } | |
1018 | } | |
1019 | ||
1020 | /* if our multi bio struct is too small, back off and try again */ | |
1021 | if (multi_ret && stripes_allocated < stripes_required) { | |
1022 | stripes_allocated = stripes_required; | |
1023 | kfree(multi); | |
1024 | multi = NULL; | |
1025 | goto again; | |
1026 | } | |
1027 | stripe_nr = offset; | |
1028 | /* | |
1029 | * stripe_nr counts the total number of stripes we have to stride | |
1030 | * to get to this block | |
1031 | */ | |
1032 | stripe_nr = stripe_nr / map->stripe_len; | |
1033 | ||
3b72612a | 1034 | stripe_offset = stripe_nr * (u64)map->stripe_len; |
b1f0067a QW |
1035 | BUG_ON(offset < stripe_offset); |
1036 | ||
1037 | /* stripe_offset is the offset of this block in its stripe*/ | |
1038 | stripe_offset = offset - stripe_offset; | |
1039 | ||
1040 | if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | | |
1041 | BTRFS_BLOCK_GROUP_RAID1C3 | BTRFS_BLOCK_GROUP_RAID1C4 | | |
1042 | BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 | | |
1043 | BTRFS_BLOCK_GROUP_RAID10 | | |
1044 | BTRFS_BLOCK_GROUP_DUP)) { | |
1045 | /* we limit the length of each bio to what fits in a stripe */ | |
1046 | *length = min_t(u64, ce->size - offset, | |
1047 | map->stripe_len - stripe_offset); | |
1048 | } else { | |
1049 | *length = ce->size - offset; | |
1050 | } | |
511a1303 | 1051 | *length = min_t(u64, *length, orig_len); |
b1f0067a QW |
1052 | |
1053 | if (!multi_ret) | |
1054 | goto out; | |
1055 | ||
1056 | multi->num_stripes = 1; | |
1057 | stripe_index = 0; | |
1058 | if (map->type & (BTRFS_BLOCK_GROUP_RAID1 | | |
1059 | BTRFS_BLOCK_GROUP_RAID1C3 | | |
1060 | BTRFS_BLOCK_GROUP_RAID1C4)) { | |
1061 | if (rw == WRITE) | |
1062 | multi->num_stripes = map->num_stripes; | |
1063 | else if (mirror_num) | |
1064 | stripe_index = mirror_num - 1; | |
1065 | else | |
1066 | stripe_index = stripe_nr % map->num_stripes; | |
1067 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) { | |
1068 | int factor = map->num_stripes / map->sub_stripes; | |
1069 | ||
1070 | stripe_index = stripe_nr % factor; | |
1071 | stripe_index *= map->sub_stripes; | |
1072 | ||
1073 | if (rw == WRITE) | |
1074 | multi->num_stripes = map->sub_stripes; | |
1075 | else if (mirror_num) | |
1076 | stripe_index += mirror_num - 1; | |
1077 | ||
1078 | stripe_nr = stripe_nr / factor; | |
1079 | } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { | |
1080 | if (rw == WRITE) | |
1081 | multi->num_stripes = map->num_stripes; | |
1082 | else if (mirror_num) | |
1083 | stripe_index = mirror_num - 1; | |
1084 | } else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | | |
1085 | BTRFS_BLOCK_GROUP_RAID6)) { | |
1086 | ||
1087 | if (raid_map) { | |
1088 | int rot; | |
1089 | u64 tmp; | |
1090 | u64 raid56_full_stripe_start; | |
1091 | u64 full_stripe_len = nr_data_stripes(map) * map->stripe_len; | |
1092 | ||
1093 | /* | |
1094 | * align the start of our data stripe in the logical | |
1095 | * address space | |
1096 | */ | |
1097 | raid56_full_stripe_start = offset / full_stripe_len; | |
1098 | raid56_full_stripe_start *= full_stripe_len; | |
1099 | ||
1100 | /* get the data stripe number */ | |
1101 | stripe_nr = raid56_full_stripe_start / map->stripe_len; | |
1102 | stripe_nr = stripe_nr / nr_data_stripes(map); | |
1103 | ||
1104 | /* Work out the disk rotation on this stripe-set */ | |
1105 | rot = stripe_nr % map->num_stripes; | |
1106 | ||
1107 | /* Fill in the logical address of each stripe */ | |
3b72612a | 1108 | tmp = (u64)stripe_nr * nr_data_stripes(map); |
b1f0067a QW |
1109 | |
1110 | for (i = 0; i < nr_data_stripes(map); i++) | |
1111 | raid_map[(i+rot) % map->num_stripes] = | |
1112 | ce->start + (tmp + i) * map->stripe_len; | |
1113 | ||
1114 | raid_map[(i+rot) % map->num_stripes] = BTRFS_RAID5_P_STRIPE; | |
1115 | if (map->type & BTRFS_BLOCK_GROUP_RAID6) | |
1116 | raid_map[(i+rot+1) % map->num_stripes] = BTRFS_RAID6_Q_STRIPE; | |
1117 | ||
1118 | *length = map->stripe_len; | |
1119 | stripe_index = 0; | |
1120 | stripe_offset = 0; | |
1121 | multi->num_stripes = map->num_stripes; | |
1122 | } else { | |
1123 | stripe_index = stripe_nr % nr_data_stripes(map); | |
1124 | stripe_nr = stripe_nr / nr_data_stripes(map); | |
1125 | ||
1126 | /* | |
1127 | * Mirror #0 or #1 means the original data block. | |
1128 | * Mirror #2 is RAID5 parity block. | |
1129 | * Mirror #3 is RAID6 Q block. | |
1130 | */ | |
1131 | if (mirror_num > 1) | |
1132 | stripe_index = nr_data_stripes(map) + mirror_num - 2; | |
1133 | ||
1134 | /* We distribute the parity blocks across stripes */ | |
1135 | stripe_index = (stripe_nr + stripe_index) % map->num_stripes; | |
1136 | } | |
1137 | } else { | |
1138 | /* | |
1139 | * after this do_div call, stripe_nr is the number of stripes | |
1140 | * on this device we have to walk to find the data, and | |
1141 | * stripe_index is the number of our device in the stripe array | |
1142 | */ | |
1143 | stripe_index = stripe_nr % map->num_stripes; | |
1144 | stripe_nr = stripe_nr / map->num_stripes; | |
1145 | } | |
1146 | BUG_ON(stripe_index >= map->num_stripes); | |
1147 | ||
1148 | for (i = 0; i < multi->num_stripes; i++) { | |
1149 | multi->stripes[i].physical = | |
1150 | map->stripes[stripe_index].physical + stripe_offset + | |
1151 | stripe_nr * map->stripe_len; | |
1152 | multi->stripes[i].dev = map->stripes[stripe_index].dev; | |
1153 | stripe_index++; | |
1154 | } | |
1155 | *multi_ret = multi; | |
1156 | ||
1157 | if (type) | |
1158 | *type = map->type; | |
1159 | ||
1160 | if (raid_map) { | |
1161 | sort_parity_stripes(multi, raid_map); | |
1162 | *raid_map_ret = raid_map; | |
1163 | } | |
1164 | out: | |
1165 | return 0; | |
1166 | } | |
1167 | ||
1168 | int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |
1169 | u64 logical, u64 *length, | |
1170 | struct btrfs_multi_bio **multi_ret, int mirror_num, | |
1171 | u64 **raid_map_ret) | |
1172 | { | |
1173 | return __btrfs_map_block(fs_info, rw, logical, length, NULL, | |
1174 | multi_ret, mirror_num, raid_map_ret); | |
1175 | } |