default: /* no layout */
layout = 0;
break;
+ case 0:
+ layout = RAID0_ORIG_LAYOUT;
+ break;
case 10:
layout = 0x102; /* near=2, far=1 */
if (verbose > 0)
if (rv) {
pr_err("ADD_NEW_DISK for %s failed: %s\n",
dv->devname, strerror(errno));
+ if (errno == EINVAL &&
+ info.array.level == 0) {
+ pr_err("Possibly your kernel doesn't support RAID0 layouts.\n");
+ pr_err("Either upgrade, or use --layout=dangerous\n");
+ }
goto abort_locked;
}
break;
if (ioctl(mdfd, RUN_ARRAY, ¶m)) {
pr_err("RUN_ARRAY failed: %s\n",
strerror(errno));
+ if (errno == 524 /* ENOTSUP */ &&
+ info.array.level == 0)
+ cont_err("Please use --layout=original or --layout=alternate\n");
if (info.array.chunk_size & (info.array.chunk_size-1)) {
cont_err("Problem may be that chunk size is not a power of 2\n");
}
printf(" Layout : %s\n",
str ? str : "-unknown-");
}
+ if (array.level == 0 && array.layout) {
+ str = map_num(r0layout, array.layout);
+ printf(" Layout : %s\n",
+ str ? str : "-unknown-");
+ }
if (array.level == 6) {
str = map_num(r6layout, array.layout);
printf(" Layout : %s\n",
{ NULL, UnSet }
};
+/* raid0 layout is only needed because of a bug in 3.14 which changed
+ * the effective layout of raid0 arrays with varying device sizes.
+ */
+mapping_t r0layout[] = {
+ { "original", RAID0_ORIG_LAYOUT},
+ { "alternate", RAID0_ALT_MULTIZONE_LAYOUT},
+ { "1", 1}, /* aka ORIG */
+ { "2", 2}, /* aka ALT */
+ { "dangerous", 0},
+ { NULL, UnSet},
+};
+
mapping_t pers[] = {
{ "linear", LEVEL_LINEAR},
{ "raid0", 0},
collecting chunks into smaller stripes that only span the drives which
still have remaining space.
+A bug was introduced in linux 3.14 which changed the layout of blocks in
+a RAID0 beyond the region that is striped over all devices. This bug
+does not affect an array with all devices the same size, but can affect
+other RAID0 arrays.
+
+Linux 5.4 (and some stable kernels to which the change was backported)
+will not normally assemble such an array as it cannot know which layout
+to use. There is a module parameter "raid0.default_layout" which can be
+set to "1" to force the kernel to use the pre-3.14 layout or to "2" to
+force it to use the 3.14-and-later layout. when creating a new RAID0
+array,
+.I mdadm
+will record the chosen layout in the metadata in a way that allows newer
+kernels to assemble the array without needing a module parameter.
.SS RAID1
This option configures the fine details of data layout for RAID5, RAID6,
and RAID10 arrays, and controls the failure modes for
.IR faulty .
+It can also be used for working around a kernel bug with RAID0, but generally
+doesn't need to be used explicitly.
The layout of the RAID5 parity block can be one of
.BR left\-asymmetric ,
"clear" or "none" will remove any pending or periodic failure modes,
and "flush" will clear any persistent faults.
-Finally, the layout options for RAID10 are one of 'n', 'o' or 'f' followed
+The layout options for RAID10 are one of 'n', 'o' or 'f' followed
by a small number. The default is 'n2'. The supported options are:
.I 'n'
number (e.g. it is perfectly legal to have an 'n2' layout for an array
with an odd number of devices).
+A bug introduced in Linux 3.14 means that RAID0 arrays
+.B "with devices of differing sizes"
+started using a different layout. This could lead to
+data corruption. Since Linux 5.4 (and various stable releases that received
+backports), the kernel will not accept such an array unless
+a layout is explictly set. It can be set to
+.RB ' original '
+or
+.RB ' alternate '.
+When creating a new array,
+.I mdadm
+will select
+.RB ' original '
+by default, so the layout does not normally need to be set.
+An array created for either
+.RB ' original '
+or
+.RB ' alternate '
+will not be recognized by an (unpatched) kernel prior to 5.4. To create
+a RAID0 array with devices of differing sizes that can be used on an
+older kernel, you can set the layout to
+.RB ' dangerous '.
+This will use whichever layout the running kernel supports, so the data
+on the array may become corrupt when changing kernel from pre-3.14 to a
+later kernel.
+
When an array is converted between RAID5 and RAID6 an intermediate
RAID6 layout is used in which the second parity block (Q) is always on
the last device. To convert a RAID5 to RAID6 and leave it in this new
pr_err("raid level must be given before layout.\n");
exit(2);
+ case 0:
+ s.layout = map_name(r0layout, optarg);
+ if (s.layout == UnSet) {
+ pr_err("layout %s not understood for raid0.\n",
+ optarg);
+ exit(2);
+ }
+ break;
case 5:
s.layout = map_name(r5layout, optarg);
if (s.layout == UnSet) {
extern char *map_num(mapping_t *map, int num);
extern int map_name(mapping_t *map, char *name);
-extern mapping_t r5layout[], r6layout[], pers[], modes[], faultylayout[];
+extern mapping_t r0layout[], r5layout[], r6layout[],
+ pers[], modes[], faultylayout[];
extern mapping_t consistency_policies[], sysfs_array_states[];
extern char *map_dev_preferred(int major, int minor, int create,
#define makedev(M,m) (((M)<<8) | (m))
#endif
+enum r0layout {
+ RAID0_ORIG_LAYOUT = 1,
+ RAID0_ALT_MULTIZONE_LAYOUT = 2,
+};
+
/* for raid4/5/6 */
#define ALGORITHM_LEFT_ASYMMETRIC 0
#define ALGORITHM_RIGHT_ASYMMETRIC 1
if (*chunk == UnSet)
*chunk = DEFAULT_CHUNK;
+ if (level == 0 && layout != UnSet) {
+ if (verbose)
+ pr_err("0.90 metadata does not support layouts for RAID0\n");
+ return 0;
+ }
+
if (!subdev)
return 1;
__u64 ctime; /* lo 40 bits are seconds, top 24 are microseconds or 0*/
__u32 level; /* -4 (multipath), -1 (linear), 0,1,4,5 */
- __u32 layout; /* only for raid5 currently */
+ __u32 layout; /* used for raid5, raid6, raid10, and raid0 */
__u64 size; /* used size of component devices, in 512byte sectors */
__u32 chunksize; /* in 512byte sectors */
#define MD_FEATURE_JOURNAL 512 /* support write journal */
#define MD_FEATURE_PPL 1024 /* support PPL */
#define MD_FEATURE_MUTLIPLE_PPLS 2048 /* support for multiple PPLs */
+#define MD_FEATURE_RAID0_LAYOUT 4096 /* layout is meaningful in RAID0 */
#define MD_FEATURE_ALL (MD_FEATURE_BITMAP_OFFSET \
|MD_FEATURE_RECOVERY_OFFSET \
|MD_FEATURE_RESHAPE_ACTIVE \
|MD_FEATURE_JOURNAL \
|MD_FEATURE_PPL \
|MD_FEATURE_MULTIPLE_PPLS \
+ |MD_FEATURE_RAID0_LAYOUT \
)
static int role_from_sb(struct mdp_superblock_1 *sb)
printf(" Events : %llu\n",
(unsigned long long)__le64_to_cpu(sb->events));
printf("\n");
+ if (__le32_to_cpu(sb->level) == 0 &&
+ (sb->feature_map & __cpu_to_le32(MD_FEATURE_RAID0_LAYOUT))) {
+ c = map_num(r0layout, __le32_to_cpu(sb->layout));
+ printf(" Layout : %s\n", c?c:"-unknown-");
+ }
if (__le32_to_cpu(sb->level) == 5) {
c = map_num(r5layout, __le32_to_cpu(sb->layout));
printf(" Layout : %s\n", c?c:"-unknown-");
int fd;
char *devname;
long long data_offset;
+ unsigned long long dev_size;
mdu_disk_info_t disk;
struct devinfo *next;
};
di->devname = devname;
di->disk = *dk;
di->data_offset = data_offset;
+ get_dev_size(fd, NULL, &di->dev_size);
di->next = NULL;
*dip = di;
unsigned long long sb_offset;
unsigned long long data_offset;
long bm_offset;
+ int raid0_need_layout = 0;
for (di = st->info; di; di = di->next) {
if (di->disk.state & (1 << MD_DISK_JOURNAL))
sb->feature_map |= __cpu_to_le32(MD_FEATURE_JOURNAL);
+ if (sb->level == 0 && sb->layout != 0) {
+ struct devinfo *di2 = st->info;
+ unsigned long long s1, s2;
+ s1 = di->dev_size;
+ if (di->data_offset != INVALID_SECTORS)
+ s1 -= di->data_offset;
+ s1 /= __le32_to_cpu(sb->chunksize);
+ s2 = di2->dev_size;
+ if (di2->data_offset != INVALID_SECTORS)
+ s2 -= di2->data_offset;
+ s2 /= __le32_to_cpu(sb->chunksize);
+ if (s1 != s2)
+ raid0_need_layout = 1;
+ }
}
for (di = st->info; di; di = di->next) {
sb->bblog_offset = 0;
}
+ /* RAID0 needs a layout if devices aren't all the same size */
+ if (raid0_need_layout)
+ sb->feature_map |= __cpu_to_le32(MD_FEATURE_RAID0_LAYOUT);
+
sb->sb_csum = calc_sb_1_csum(sb);
rv = store_super1(st, di->fd);