/*
* mdadm - manage Linux "md" devices aka RAID arrays.
*
- * Copyright (C) 2001-2009 Neil Brown <neilb@suse.de>
+ * Copyright (C) 2001-2016 Neil Brown <neilb@suse.com>
*
*
* This program is free software; you can redistribute it and/or modify
__u32 chunksize; /* in 512byte sectors */
__u32 raid_disks;
- __u32 bitmap_offset; /* sectors after start of superblock that bitmap starts
- * NOTE: signed, so bitmap can be before superblock
- * only meaningful of feature_map[0] is set.
- */
+ union {
+ __u32 bitmap_offset; /* sectors after start of superblock that bitmap starts
+ * NOTE: signed, so bitmap can be before superblock
+ * only meaningful of feature_map[0] is set.
+ */
+
+ /* only meaningful when feature_map[MD_FEATURE_PPL] is set */
+ struct {
+ __s16 offset; /* sectors from start of superblock that ppl starts */
+ __u16 size; /* ppl size in sectors */
+ } ppl;
+ };
/* These are only valid with feature bit '4' */
__u32 new_level; /* new level we are reshaping to */
__u8 device_uuid[16]; /* user-space setable, ignored by kernel */
__u8 devflags; /* per-device flags. Only one defined...*/
#define WriteMostly1 1 /* mask for writemostly flag in above */
+#define FailFast1 2 /* Device should get FailFast requests */
/* bad block log. If there are any bad blocks the feature flag is set.
* if offset and size are non-zero, that space is reserved and available.
*/
#define MD_FEATURE_NEW_OFFSET 64 /* new_offset must be honoured */
#define MD_FEATURE_BITMAP_VERSIONED 256 /* bitmap version number checked properly */
#define MD_FEATURE_JOURNAL 512 /* support write journal */
+#define MD_FEATURE_PPL 1024 /* support PPL */
#define MD_FEATURE_ALL (MD_FEATURE_BITMAP_OFFSET \
|MD_FEATURE_RECOVERY_OFFSET \
|MD_FEATURE_RESHAPE_ACTIVE \
|MD_FEATURE_NEW_OFFSET \
|MD_FEATURE_BITMAP_VERSIONED \
|MD_FEATURE_JOURNAL \
+ |MD_FEATURE_PPL \
)
+#ifndef MDASSEMBLE
static int role_from_sb(struct mdp_superblock_1 *sb)
{
unsigned int d;
role = MD_DISK_ROLE_SPARE;
return role;
}
+#endif
/* return how many bytes are needed for bitmap, for cluster-md each node
* should have it's own bitmap */
{
unsigned long long bits, bytes;
- bits = __le64_to_cpu(bms->sync_size) / (__le32_to_cpu(bms->chunksize)>>9);
+ bits = bitmap_bits(__le64_to_cpu(bms->sync_size),
+ __le32_to_cpu(bms->chunksize));
bytes = (bits+7) >> 3;
bytes += sizeof(bitmap_super_t);
bytes = ROUND_UP(bytes, boundary);
static void init_afd(struct align_fd *afd, int fd)
{
afd->fd = fd;
-
- if (ioctl(afd->fd, BLKSSZGET, &afd->blk_sz) != 0)
+ if (!get_dev_sector_size(afd->fd, NULL, (unsigned int *)&afd->blk_sz))
afd->blk_sz = 512;
}
return len;
}
+static inline unsigned int choose_ppl_space(int chunk)
+{
+ return (PPL_HEADER_SIZE >> 9) + (chunk > 128*2 ? chunk : 128*2);
+}
+
#ifndef MDASSEMBLE
static void examine_super1(struct supertype *st, char *homehost)
{
if (sb->feature_map & __cpu_to_le32(MD_FEATURE_BITMAP_OFFSET)) {
printf("Internal Bitmap : %ld sectors from superblock\n",
(long)(int32_t)__le32_to_cpu(sb->bitmap_offset));
+ } else if (sb->feature_map & __cpu_to_le32(MD_FEATURE_PPL)) {
+ printf(" PPL : %u sectors at offset %d sectors from superblock\n",
+ __le16_to_cpu(sb->ppl.size),
+ __le16_to_cpu(sb->ppl.offset));
}
if (sb->feature_map & __cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE)) {
printf(" Reshape pos'n : %llu%s\n", (unsigned long long)__le64_to_cpu(sb->reshape_position)/2,
printf(" Flags :");
if (sb->devflags & WriteMostly1)
printf(" write-mostly");
+ if (sb->devflags & FailFast1)
+ printf(" failfast");
printf("\n");
}
}
if (super.bblog_size != 0 &&
- __le32_to_cpu(super.bblog_size) <= 100 &&
+ __le16_to_cpu(super.bblog_size) <= 100 &&
super.bblog_offset != 0 &&
(super.feature_map & __le32_to_cpu(MD_FEATURE_BAD_BLOCKS))) {
/* There is a bad block log */
unsigned long long bb_offset = sb_offset;
- int bytes = __le32_to_cpu(super.bblog_size) * 512;
+ int bytes = __le16_to_cpu(super.bblog_size) * 512;
int written = 0;
struct align_fd afrom, ato;
int i;
int l = homehost ? strlen(homehost) : 0;
- printf(" Name : %.32s", sb->set_name);
+ printf(" Name : %.32s", sb->set_name);
if (l > 0 && l < 32 &&
sb->set_name[l] == ':' &&
strncmp(sb->set_name, homehost, l) == 0)
printf(" (local to host %s)", homehost);
if (bms->nodes > 0 && (__le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET))
- printf("\n Cluster Name : %-64s", bms->cluster_name);
- printf("\n UUID : ");
+ printf("\n Cluster Name : %-64s", bms->cluster_name);
+ printf("\n UUID : ");
for (i=0; i<16; i++) {
if ((i&3)==0 && i != 0) printf(":");
printf("%02x", sb->set_uuid[i]);
}
- printf("\n Events : %llu\n\n", (unsigned long long)__le64_to_cpu(sb->events));
+ printf("\n Events : %llu\n\n",
+ (unsigned long long)__le64_to_cpu(sb->events));
}
static void brief_detail_super1(struct supertype *st)
__u64 *bbl, *bbp;
int i;
- if (!sb->bblog_size || __le32_to_cpu(sb->bblog_size) > 100
+ if (!sb->bblog_size || __le16_to_cpu(sb->bblog_size) > 100
|| !sb->bblog_offset){
printf("No bad-blocks list configured on %s\n", devname);
return 0;
return 0;
}
- size = __le32_to_cpu(sb->bblog_size)* 512;
+ size = __le16_to_cpu(sb->bblog_size)* 512;
if (posix_memalign((void**)&bbl, 4096, size) != 0) {
pr_err("could not allocate badblocks list\n");
return 0;
if (__le32_to_cpu(bsb->nodes) > 1)
info->array.state |= (1 << MD_SB_CLUSTERED);
+ super_offset = __le64_to_cpu(sb->super_offset);
info->data_offset = __le64_to_cpu(sb->data_offset);
info->component_size = __le64_to_cpu(sb->size);
- if (sb->feature_map & __le32_to_cpu(MD_FEATURE_BITMAP_OFFSET))
+ if (sb->feature_map & __le32_to_cpu(MD_FEATURE_BITMAP_OFFSET)) {
info->bitmap_offset = (int32_t)__le32_to_cpu(sb->bitmap_offset);
+ } else if (sb->feature_map & __le32_to_cpu(MD_FEATURE_PPL)) {
+ info->ppl_offset = __le16_to_cpu(sb->ppl.offset);
+ info->ppl_size = __le16_to_cpu(sb->ppl.size);
+ info->ppl_sector = super_offset + info->ppl_offset;
+ }
info->disk.major = 0;
info->disk.minor = 0;
else
role = __le16_to_cpu(sb->dev_roles[__le32_to_cpu(sb->dev_number)]);
- super_offset = __le64_to_cpu(sb->super_offset);
if (info->array.level <= 0)
data_size = __le64_to_cpu(sb->data_size);
else
end = bboffset;
}
- if (super_offset + info->bitmap_offset < end)
- end = super_offset + info->bitmap_offset;
+ if (super_offset + info->bitmap_offset + info->ppl_offset < end)
+ end = super_offset + info->bitmap_offset + info->ppl_offset;
if (info->data_offset + data_size < end)
info->space_after = end - data_size - info->data_offset;
earliest = super_offset + (32+4)*2; /* match kernel */
if (info->bitmap_offset > 0) {
unsigned long long bmend = info->bitmap_offset;
- unsigned long long size = __le64_to_cpu(bsb->sync_size);
- size /= __le32_to_cpu(bsb->chunksize) >> 9;
- size = (size + 7) >> 3;
- size += sizeof(bitmap_super_t);
- size = ROUND_UP(size, 4096);
+ unsigned long long size = calc_bitmap_size(bsb, 4096);
size /= 512;
bmend += size;
if (bmend > earliest)
earliest = bmend;
+ } else if (info->ppl_offset > 0) {
+ unsigned long long pplend = info->ppl_offset +
+ info->ppl_size;
+ if (pplend > earliest)
+ earliest = pplend;
}
if (sb->bblog_offset && sb->bblog_size) {
unsigned long long bbend = super_offset;
bbend += (int32_t)__le32_to_cpu(sb->bblog_offset);
- bbend += __le32_to_cpu(sb->bblog_size);
+ bbend += __le16_to_cpu(sb->bblog_size);
if (bbend > earliest)
earliest = bbend;
}
info->disk.state = 0; /* spare: not active, not sync, not faulty */
break;
case MD_DISK_ROLE_FAULTY:
- info->disk.state = 1; /* faulty */
+ info->disk.state = (1 << MD_DISK_FAULTY); /* faulty */
break;
case MD_DISK_ROLE_JOURNAL:
info->disk.state = (1 << MD_DISK_JOURNAL);
}
if (sb->devflags & WriteMostly1)
info->disk.state |= (1 << MD_DISK_WRITEMOSTLY);
+ if (sb->devflags & FailFast1)
+ info->disk.state |= (1 << MD_DISK_FAILFAST);
info->events = __le64_to_cpu(sb->events);
sprintf(info->text_version, "1.%d", st->minor_version);
info->safe_mode_delay = 200;
}
info->array.working_disks = working;
- if (sb->feature_map & __le32_to_cpu(MD_FEATURE_JOURNAL))
+
+ if (sb->feature_map & __le32_to_cpu(MD_FEATURE_JOURNAL)) {
info->journal_device_required = 1;
+ info->consistency_policy = CONSISTENCY_POLICY_JOURNAL;
+ } else if (sb->feature_map & __le32_to_cpu(MD_FEATURE_PPL)) {
+ info->consistency_policy = CONSISTENCY_POLICY_PPL;
+ } else if (sb->feature_map & __le32_to_cpu(MD_FEATURE_BITMAP_OFFSET)) {
+ info->consistency_policy = CONSISTENCY_POLICY_BITMAP;
+ } else if (info->array.level <= 0) {
+ info->consistency_policy = CONSISTENCY_POLICY_NONE;
+ } else {
+ info->consistency_policy = CONSISTENCY_POLICY_RESYNC;
+ }
+
info->journal_clean = 0;
}
}
} else if (strcmp(update, "linear-grow-new") == 0) {
unsigned int i;
- int rfd, fd;
+ int fd;
unsigned int max = __le32_to_cpu(sb->max_dev);
for (i=0 ; i < max ; i++)
if (max >= __le32_to_cpu(sb->max_dev))
sb->max_dev = __cpu_to_le32(max+1);
- if ((rfd = open("/dev/urandom", O_RDONLY)) < 0 ||
- read(rfd, sb->device_uuid, 16) != 16) {
- __u32 r[4] = {random(), random(), random(), random()};
- memcpy(sb->device_uuid, r, 16);
- }
- if (rfd >= 0)
- close(rfd);
+ random_uuid(sb->device_uuid);
sb->dev_roles[i] =
__cpu_to_le16(info->disk.raid_disk);
} else if (strcmp(update, "uuid") == 0) {
copy_uuid(sb->set_uuid, info->uuid, super1.swapuuid);
- if (__le32_to_cpu(sb->feature_map)&MD_FEATURE_BITMAP_OFFSET) {
- struct bitmap_super_s *bm;
- bm = (struct bitmap_super_s*)(st->sb+MAX_SB_SIZE);
- memcpy(bm->uuid, sb->set_uuid, 16);
- }
+ if (__le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET)
+ memcpy(bms->uuid, sb->set_uuid, 16);
} else if (strcmp(update, "no-bitmap") == 0) {
sb->feature_map &= ~__cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
} else if (strcmp(update, "bbl") == 0) {
*/
unsigned long long sb_offset = __le64_to_cpu(sb->super_offset);
unsigned long long data_offset = __le64_to_cpu(sb->data_offset);
- long bitmap_offset = (long)(int32_t)__le32_to_cpu(sb->bitmap_offset);
+ long bitmap_offset = 0;
long bm_sectors = 0;
long space;
#ifndef MDASSEMBLE
if (sb->feature_map & __cpu_to_le32(MD_FEATURE_BITMAP_OFFSET)) {
- struct bitmap_super_s *bsb;
- bsb = (struct bitmap_super_s *)(((char*)sb)+MAX_SB_SIZE);
- bm_sectors = bitmap_sectors(bsb);
+ bitmap_offset = (long)__le32_to_cpu(sb->bitmap_offset);
+ bm_sectors = calc_bitmap_size(bms, 4096) >> 9;
+ } else if (sb->feature_map & __cpu_to_le32(MD_FEATURE_PPL)) {
+ bitmap_offset = (long)__le16_to_cpu(sb->ppl.offset);
+ bm_sectors = (long)__le16_to_cpu(sb->ppl.size);
}
#endif
if (sb_offset < data_offset) {
sb->bblog_size = 0;
sb->bblog_shift = 0;
sb->bblog_offset = 0;
+ } else if (strcmp(update, "ppl") == 0) {
+ unsigned long long sb_offset = __le64_to_cpu(sb->super_offset);
+ unsigned long long data_offset = __le64_to_cpu(sb->data_offset);
+ unsigned long long data_size = __le64_to_cpu(sb->data_size);
+ long bb_offset = __le32_to_cpu(sb->bblog_offset);
+ int space;
+ int optimal_space;
+ int offset;
+
+ if (sb->feature_map & __cpu_to_le32(MD_FEATURE_BITMAP_OFFSET)) {
+ pr_err("Cannot add PPL to array with bitmap\n");
+ return -2;
+ }
+
+ if (sb->feature_map & __cpu_to_le32(MD_FEATURE_JOURNAL)) {
+ pr_err("Cannot add PPL to array with journal\n");
+ return -2;
+ }
+
+ if (sb_offset < data_offset) {
+ if (bb_offset)
+ space = bb_offset - 8;
+ else
+ space = data_offset - sb_offset - 8;
+ offset = 8;
+ } else {
+ offset = -(sb_offset - data_offset - data_size);
+ if (offset < INT16_MIN)
+ offset = INT16_MIN;
+ space = -(offset - bb_offset);
+ }
+
+ if (space < (PPL_HEADER_SIZE >> 9) + 8) {
+ pr_err("Not enough space to add ppl\n");
+ return -2;
+ }
+
+ optimal_space = choose_ppl_space(__le32_to_cpu(sb->chunksize));
+
+ if (space > optimal_space)
+ space = optimal_space;
+ if (space > UINT16_MAX)
+ space = UINT16_MAX;
+
+ sb->ppl.offset = __cpu_to_le16(offset);
+ sb->ppl.size = __cpu_to_le16(space);
+ sb->feature_map |= __cpu_to_le32(MD_FEATURE_PPL);
+ } else if (strcmp(update, "no-ppl") == 0) {
+ sb->feature_map &= ~ __cpu_to_le32(MD_FEATURE_PPL);
} else if (strcmp(update, "name") == 0) {
if (info->name[0] == 0)
sprintf(info->name, "%d", info->array.md_minor);
strcat(sb->set_name, ":");
strcat(sb->set_name, info->name);
} else
- strcpy(sb->set_name, info->name);
+ strncpy(sb->set_name, info->name, sizeof(sb->set_name));
} else if (strcmp(update, "devicesize") == 0 &&
__le64_to_cpu(sb->super_offset) <
__le64_to_cpu(sb->data_offset)) {
sb->devflags |= WriteMostly1;
else if (strcmp(update, "readwrite")==0)
sb->devflags &= ~WriteMostly1;
+ else if (strcmp(update, "failfast") == 0)
+ sb->devflags |= FailFast1;
+ else if (strcmp(update, "nofailfast") == 0)
+ sb->devflags &= ~FailFast1;
else
rv = -1;
}
static int init_super1(struct supertype *st, mdu_array_info_t *info,
- unsigned long long size, char *name, char *homehost,
+ struct shape *s, char *name, char *homehost,
int *uuid, unsigned long long data_offset)
{
struct mdp_superblock_1 *sb;
int spares;
- int rfd;
char defname[10];
int sbsize;
if (uuid)
copy_uuid(sb->set_uuid, uuid, super1.swapuuid);
- else {
- if ((rfd = open("/dev/urandom", O_RDONLY)) < 0 ||
- read(rfd, sb->set_uuid, 16) != 16) {
- __u32 r[4] = {random(), random(), random(), random()};
- memcpy(sb->set_uuid, r, 16);
- }
- if (rfd >= 0) close(rfd);
- }
+ else
+ random_uuid(sb->set_uuid);;
if (name == NULL || *name == 0) {
sprintf(defname, "%d", info->md_minor);
strcat(sb->set_name, ":");
strcat(sb->set_name, name);
} else
- strcpy(sb->set_name, name);
+ strncpy(sb->set_name, name, sizeof(sb->set_name));
sb->ctime = __cpu_to_le64((unsigned long long)time(0));
sb->level = __cpu_to_le32(info->level);
sb->layout = __cpu_to_le32(info->layout);
- sb->size = __cpu_to_le64(size*2ULL);
+ sb->size = __cpu_to_le64(s->size*2ULL);
sb->chunksize = __cpu_to_le32(info->chunk_size>>9);
sb->raid_disks = __cpu_to_le32(info->raid_disks);
memset(sb->dev_roles, 0xff, MAX_SB_SIZE - sizeof(struct mdp_superblock_1));
+ if (s->consistency_policy == CONSISTENCY_POLICY_PPL)
+ sb->feature_map |= __cpu_to_le32(MD_FEATURE_PPL);
+
return 1;
}
struct devinfo *di, **dip;
bitmap_super_t *bms = (bitmap_super_t*)(((char*)sb) + MAX_SB_SIZE);
int rv, lockid;
+ int dk_state;
if (bms->version == BITMAP_MAJOR_CLUSTERED && dlm_funs_ready()) {
rv = cluster_get_dlmlock(&lockid);
}
}
- if ((dk->state & 6) == 6) /* active, sync */
+ dk_state = dk->state & ~(1<<MD_DISK_FAILFAST);
+ if ((dk_state & (1<<MD_DISK_ACTIVE)) &&
+ (dk_state & (1<<MD_DISK_SYNC)))/* active, sync */
*rp = __cpu_to_le16(dk->raid_disk);
- else if (dk->state & (1<<MD_DISK_JOURNAL))
+ else if (dk_state & (1<<MD_DISK_JOURNAL))
*rp = MD_DISK_ROLE_JOURNAL;
- else if ((dk->state & ~2) == 0) /* active or idle -> spare */
+ else if ((dk_state & ~(1<<MD_DISK_ACTIVE)) == 0) /* active or idle -> spare */
*rp = MD_DISK_ROLE_SPARE;
else
*rp = MD_DISK_ROLE_FAULTY;
}
#endif
-static int locate_bitmap1(struct supertype *st, int fd);
+static int locate_bitmap1(struct supertype *st, int fd, int node_num);
static int store_super1(struct supertype *st, int fd)
{
struct bitmap_super_s *bm = (struct bitmap_super_s*)
(((char*)sb)+MAX_SB_SIZE);
if (__le32_to_cpu(bm->magic) == BITMAP_MAGIC) {
- locate_bitmap1(st, fd);
+ locate_bitmap1(st, fd, 0);
if (awrite(&afd, bm, sizeof(*bm)) != sizeof(*bm))
return 5;
}
* NOTE: result must be multiple of 4K else bad things happen
* on 4K-sector devices.
*/
- if (devsize < 64*2) return 0;
+ if (devsize < 64*2)
+ return 0;
if (devsize - 64*2 >= 200*1024*1024*2)
return 128*2;
if (devsize - 4*2 > 8*1024*1024*2)
static void free_super1(struct supertype *st);
-#define META_BLOCK_SIZE 4096
+#ifndef MDASSEMBLE
+
__u32 crc32c_le(__u32 crc, unsigned char const *p, size_t len);
+static int write_init_ppl1(struct supertype *st, struct mdinfo *info, int fd)
+{
+ struct mdp_superblock_1 *sb = st->sb;
+ void *buf;
+ struct ppl_header *ppl_hdr;
+ int ret;
+
+ ret = posix_memalign(&buf, 4096, PPL_HEADER_SIZE);
+ if (ret) {
+ pr_err("Failed to allocate PPL header buffer\n");
+ return ret;
+ }
+
+ memset(buf, 0, PPL_HEADER_SIZE);
+ ppl_hdr = buf;
+ memset(ppl_hdr->reserved, 0xff, PPL_HDR_RESERVED);
+ ppl_hdr->signature = __cpu_to_le32(~crc32c_le(~0, sb->set_uuid,
+ sizeof(sb->set_uuid)));
+ ppl_hdr->checksum = __cpu_to_le32(~crc32c_le(~0, buf, PPL_HEADER_SIZE));
+
+ if (lseek64(fd, info->ppl_sector * 512, SEEK_SET) < 0) {
+ ret = errno;
+ perror("Failed to seek to PPL header location");
+ }
+
+ if (!ret && write(fd, buf, PPL_HEADER_SIZE) != PPL_HEADER_SIZE) {
+ ret = errno;
+ perror("Write PPL header failed");
+ }
+
+ if (!ret)
+ fsync(fd);
+
+ free(buf);
+ return ret;
+}
+
+#define META_BLOCK_SIZE 4096
+
static int write_empty_r5l_meta_block(struct supertype *st, int fd)
{
struct r5l_meta_block *mb;
crc = crc32c_le(crc, (void *)mb, META_BLOCK_SIZE);
mb->checksum = crc;
- if (lseek64(fd, (sb->data_offset) * 512, 0) < 0LL) {
+ if (lseek64(fd, __le64_to_cpu(sb->data_offset) * 512, 0) < 0LL) {
pr_err("cannot seek to offset of the meta block\n");
goto fail_to_write;
}
return 1;
}
-#ifndef MDASSEMBLE
static int write_init_super1(struct supertype *st)
{
struct mdp_superblock_1 *sb = st->sb;
struct supertype *refst;
- int rfd;
int rv = 0;
unsigned long long bm_space;
struct devinfo *di;
unsigned long long dsize, array_size;
unsigned long long sb_offset;
unsigned long long data_offset;
+ long bm_offset;
for (di = st->info; di; di = di->next) {
if (di->disk.state & (1 << MD_DISK_JOURNAL))
- sb->feature_map |= MD_FEATURE_JOURNAL;
+ sb->feature_map |= __cpu_to_le32(MD_FEATURE_JOURNAL);
}
for (di = st->info; di; di = di->next) {
sb->devflags |= WriteMostly1;
else
sb->devflags &= ~WriteMostly1;
+ if (di->disk.state & (1<<MD_DISK_FAILFAST))
+ sb->devflags |= FailFast1;
+ else
+ sb->devflags &= ~FailFast1;
- if ((rfd = open("/dev/urandom", O_RDONLY)) < 0 ||
- read(rfd, sb->device_uuid, 16) != 16) {
- __u32 r[4] = {random(), random(), random(), random()};
- memcpy(sb->device_uuid, r, 16);
- }
- if (rfd >= 0)
- close(rfd);
+ random_uuid(sb->device_uuid);
if (!(di->disk.state & (1<<MD_DISK_JOURNAL)))
sb->events = 0;
* data_offset has already been set.
*/
array_size = __le64_to_cpu(sb->size);
- /* work out how much space we left for a bitmap,
- * Add 8 sectors for bad block log */
- bm_space = choose_bm_space(array_size) + 8;
+
+ /* work out how much space we left for a bitmap */
+ if (sb->feature_map & __cpu_to_le32(MD_FEATURE_BITMAP_OFFSET)) {
+ bitmap_super_t *bms = (bitmap_super_t *)
+ (((char *)sb) + MAX_SB_SIZE);
+ bm_space = calc_bitmap_size(bms, 4096) >> 9;
+ bm_offset = (long)__le32_to_cpu(sb->bitmap_offset);
+ } else if (sb->feature_map & __cpu_to_le32(MD_FEATURE_PPL)) {
+ bm_space = choose_ppl_space(__le32_to_cpu(sb->chunksize));
+ if (bm_space > UINT16_MAX)
+ bm_space = UINT16_MAX;
+ if (st->minor_version == 0) {
+ bm_offset = -bm_space - 8;
+ if (bm_offset < INT16_MIN) {
+ bm_offset = INT16_MIN;
+ bm_space = -bm_offset - 8;
+ }
+ } else {
+ bm_offset = 8;
+ }
+ sb->ppl.offset = __cpu_to_le16(bm_offset);
+ sb->ppl.size = __cpu_to_le16(bm_space);
+ } else {
+ bm_space = choose_bm_space(array_size);
+ bm_offset = 8;
+ }
data_offset = di->data_offset;
if (data_offset == INVALID_SECTORS)
data_offset = st->data_offset;
switch(st->minor_version) {
case 0:
+ /* Add 8 sectors for bad block log */
+ bm_space += 8;
if (data_offset == INVALID_SECTORS)
data_offset = 0;
sb_offset = dsize;
}
break;
case 1:
- sb->super_offset = __cpu_to_le64(0);
- if (data_offset == INVALID_SECTORS)
- data_offset = 16;
-
- sb->data_offset = __cpu_to_le64(data_offset);
- sb->data_size = __cpu_to_le64(dsize - data_offset);
- if (data_offset >= 8 + 32*2 + 8) {
- sb->bblog_size = __cpu_to_le16(8);
- sb->bblog_offset = __cpu_to_le32(8 + 32*2);
- } else if (data_offset >= 16) {
- sb->bblog_size = __cpu_to_le16(8);
- sb->bblog_offset = __cpu_to_le32(data_offset-8);
- }
- break;
case 2:
- sb_offset = 4*2;
+ sb_offset = st->minor_version == 2 ? 8 : 0;
sb->super_offset = __cpu_to_le64(sb_offset);
if (data_offset == INVALID_SECTORS)
- data_offset = 24;
+ data_offset = sb_offset + 16;
sb->data_offset = __cpu_to_le64(data_offset);
sb->data_size = __cpu_to_le64(dsize - data_offset);
- if (data_offset >= 16 + 32*2 + 8) {
+ if (data_offset >= sb_offset+bm_offset+bm_space+8) {
sb->bblog_size = __cpu_to_le16(8);
- sb->bblog_offset = __cpu_to_le32(8 + 32*2);
- } else if (data_offset >= 16+16) {
+ sb->bblog_offset = __cpu_to_le32(bm_offset +
+ bm_space);
+ } else if (data_offset >= sb_offset + 16) {
sb->bblog_size = __cpu_to_le16(8);
- /* '8' sectors for the bblog, and another '8'
+ /* '8' sectors for the bblog, and 'sb_offset'
* because we want offset from superblock, not
* start of device.
*/
- sb->bblog_offset = __cpu_to_le32(data_offset-8-8);
+ sb->bblog_offset = __cpu_to_le32(data_offset -
+ 8 - sb_offset);
}
break;
default:
goto error_out;
}
- if (rv == 0 && (__le32_to_cpu(sb->feature_map) & 1))
- rv = st->ss->write_bitmap(st, di->fd, NoUpdate);
+ if (rv == 0 &&
+ (__le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET)) {
+ rv = st->ss->write_bitmap(st, di->fd, NodeNumUpdate);
+ } else if (rv == 0 &&
+ (__le32_to_cpu(sb->feature_map) & MD_FEATURE_PPL)) {
+ struct mdinfo info;
+
+ st->ss->getinfo_super(st, &info, NULL);
+ rv = st->ss->write_init_ppl(st, &info, di->fd);
+ }
+
close(di->fd);
di->fd = -1;
if (rv)
return 1;
}
+ memset(super, 0, SUPER1_SIZE);
+
if (aread(&afd, super, MAX_SB_SIZE) != MAX_SB_SIZE) {
if (devname)
pr_err("Cannot read superblock on %s\n",
* valid. If it doesn't clear the bit. An --assemble --force
* should get that written out.
*/
- locate_bitmap1(st, fd);
+ locate_bitmap1(st, fd, 0);
if (aread(&afd, bsb, 512) != 512)
goto no_bitmap;
return 0;
#ifndef MDASSEMBLE
- if (__le32_to_cpu(super->feature_map)&MD_FEATURE_BITMAP_OFFSET) {
+ if (__le32_to_cpu(super->feature_map) & MD_FEATURE_BITMAP_OFFSET) {
/* hot-add. allow for actual size of bitmap */
struct bitmap_super_s *bsb;
bsb = (struct bitmap_super_s *)(((char*)super)+MAX_SB_SIZE);
- bmspace = bitmap_sectors(bsb);
+ bmspace = calc_bitmap_size(bsb, 4096) >> 9;
+ } else if (__le32_to_cpu(super->feature_map) & MD_FEATURE_PPL) {
+ bmspace = __le16_to_cpu(super->ppl.size);
}
#endif
/* Allow space for bad block log */
unsigned long long chunk = *chunkp;
int room = 0;
int creating = 0;
+ int len;
struct mdp_superblock_1 *sb = st->sb;
bitmap_super_t *bms = (bitmap_super_t*)(((char*)sb) + MAX_SB_SIZE);
int uuid[4];
}
break;
default:
- return 0;
+ return -ENOSPC;
}
room -= bbl_size;
if (room <= 1)
/* No room for a bitmap */
- return 0;
+ return -ENOSPC;
max_bits = (room * 512 - sizeof(bitmap_super_t)) * 8;
if (chunk < 64*1024*1024)
chunk = 64*1024*1024;
} else if (chunk < min_chunk)
- return 0; /* chunk size too small */
+ return -EINVAL; /* chunk size too small */
if (chunk == 0) /* rounding problem */
- return 0;
+ return -EINVAL;
if (offset == 0) {
/* start bitmap on a 4K boundary with enough space for
if (st->nodes)
sb->feature_map = __cpu_to_le32(__le32_to_cpu(sb->feature_map)
| MD_FEATURE_BITMAP_VERSIONED);
- if (st->cluster_name)
- strncpy((char *)bms->cluster_name,
- st->cluster_name, strlen(st->cluster_name));
+ if (st->cluster_name) {
+ len = sizeof(bms->cluster_name);
+ strncpy((char *)bms->cluster_name, st->cluster_name, len);
+ bms->cluster_name[len - 1] = '\0';
+ }
*chunkp = chunk;
- return 1;
+ return 0;
}
-static int locate_bitmap1(struct supertype *st, int fd)
+static int locate_bitmap1(struct supertype *st, int fd, int node_num)
{
unsigned long long offset;
struct mdp_superblock_1 *sb;
else
ret = -1;
offset = __le64_to_cpu(sb->super_offset);
- offset += (int32_t) __le32_to_cpu(sb->bitmap_offset);
+ offset += (int32_t) __le32_to_cpu(sb->bitmap_offset) * (node_num + 1);
if (mustfree)
free(sb);
lseek64(fd, offset<<9, 0);
bitmap_super_t *bms = (bitmap_super_t*)(((char*)sb)+MAX_SB_SIZE);
int rv = 0;
void *buf;
- int towrite, n;
+ int towrite, n, len;
struct align_fd afd;
unsigned int i = 0;
unsigned long long total_bm_space, bm_space_per_node;
case NameUpdate:
/* update cluster name */
if (st->cluster_name) {
- memset((char *)bms->cluster_name, 0, sizeof(bms->cluster_name));
- strncpy((char *)bms->cluster_name, st->cluster_name, 64);
+ len = sizeof(bms->cluster_name);
+ memset((char *)bms->cluster_name, 0, len);
+ strncpy((char *)bms->cluster_name,
+ st->cluster_name, len);
+ bms->cluster_name[len - 1] = '\0';
}
break;
case NodeNumUpdate:
/* cluster md only supports superblock 1.2 now */
- if (st->minor_version != 2) {
+ if (st->minor_version != 2 && bms->version == BITMAP_MAJOR_CLUSTERED) {
pr_err("Warning: cluster md only works with superblock 1.2\n");
return -EINVAL;
}
+ if (bms->version == BITMAP_MAJOR_CLUSTERED) {
+ if (__cpu_to_le32(st->nodes) < bms->nodes) {
+ /* Since the nodes num is not increased, no need to check the space
+ * is enough or not, just update bms->nodes */
+ bms->nodes = __cpu_to_le32(st->nodes);
+ break;
+ }
+ } else {
+ /* no need to change bms->nodes for other bitmap types */
+ if (st->nodes)
+ pr_err("Warning: --nodes option is only suitable for clustered bitmap\n");
+ break;
+ }
+
/* Each node has an independent bitmap, it is necessary to calculate the
* space is enough or not, first get how many bytes for the total bitmap */
bm_space_per_node = calc_bitmap_size(bms, 4096);
init_afd(&afd, fd);
- locate_bitmap1(st, fd);
+ locate_bitmap1(st, fd, 0);
if (posix_memalign(&buf, 4096, 4096))
return -ENOMEM;
memset(buf, 0xff, 4096);
memcpy(buf, (char *)bms, sizeof(bitmap_super_t));
- towrite = calc_bitmap_size(bms, 4096);
+ /*
+ * use 4096 boundary if bitmap_offset is aligned
+ * with 8 sectors, then it should compatible with
+ * older mdadm.
+ */
+ if (__le32_to_cpu(sb->bitmap_offset) & 7)
+ towrite = calc_bitmap_size(bms, 512);
+ else
+ towrite = calc_bitmap_size(bms, 4096);
while (towrite > 0) {
n = towrite;
if (n > 4096)
int *chunk, unsigned long long size,
unsigned long long data_offset,
char *subdev, unsigned long long *freesize,
- int verbose)
+ int consistency_policy, int verbose)
{
unsigned long long ldsize, devsize;
int bmspace;
return 0;
}
- /* creating: allow suitable space for bitmap */
- bmspace = choose_bm_space(devsize);
+ /* creating: allow suitable space for bitmap or PPL */
+ bmspace = consistency_policy == CONSISTENCY_POLICY_PPL ?
+ choose_ppl_space((*chunk)*2) : choose_bm_space(devsize);
if (data_offset == INVALID_SECTORS)
data_offset = st->data_offset;
switch(st->minor_version) {
case 0: /* metadata at end. Round down and subtract space to reserve */
devsize = (devsize & ~(4ULL*2-1));
- /* space for metadata, bblog, bitmap */
+ /* space for metadata, bblog, bitmap/ppl */
devsize -= 8*2 + 8 + bmspace;
break;
case 1:
void *ret;
struct mdp_superblock_1 *sb;
int i;
- int rfd;
unsigned long long offset;
if (posix_memalign(&ret, 4096, 1024) != 0)
sb->super_offset = __cpu_to_le64(offset);
//*(__u64*)(st->other + 128 + 8 + 8) = __cpu_to_le64(offset);
- if ((rfd = open("/dev/urandom", O_RDONLY)) < 0 ||
- read(rfd, sb->device_uuid, 16) != 16) {
- __u32 r[4] = {random(), random(), random(), random()};
- memcpy(sb->device_uuid, r, 16);
- }
- if (rfd >= 0)
- close(rfd);
+ random_uuid(sb->device_uuid);
for (i = 0; i < MD_SB_DISKS; i++) {
int state = sb0->disks[i].state;
.add_to_super = add_to_super1,
.examine_badblocks = examine_badblocks_super1,
.copy_metadata = copy_metadata1,
+ .write_init_ppl = write_init_ppl1,
#endif
.match_home = match_home1,
.uuid_from_super = uuid_from_super1,