unsigned long long minsize=0, maxsize=0;
char *mindisc = NULL;
char *maxdisc = NULL;
- int dnum;
+ int dnum, raid_disk_num;
struct mddev_dev *dv;
int fail=0, warn=0;
struct stat stb;
pr_err("This metadata type does not support spare disks at create time\n");
return 1;
}
- if (subdevs > s->raiddisks+s->sparedisks) {
+ if (subdevs > s->raiddisks+s->sparedisks+s->journaldisks) {
pr_err("You have listed more devices (%d) than are in the array(%d)!\n", subdevs, s->raiddisks+s->sparedisks);
return 1;
}
- if (!have_container && subdevs < s->raiddisks+s->sparedisks) {
+ if (!have_container && subdevs < s->raiddisks+s->sparedisks+s->journaldisks) {
pr_err("You haven't given enough devices (real or missing) to create this array\n");
return 1;
}
}
}
+ if (dv->disposition == 'j')
+ continue; /* skip write journal for size check */
+
freesize /= 2; /* convert to K */
if (s->chunk && s->chunk != UnSet) {
/* round to chunk size */
for (pass=1; pass <=2 ; pass++) {
struct mddev_dev *moved_disk = NULL; /* the disk that was moved out of the insert point */
- for (dnum=0, dv = devlist ; dv ;
+ for (dnum=0, raid_disk_num=0, dv = devlist ; dv ;
dv=(dv->next)?(dv->next):moved_disk, dnum++) {
int fd;
struct stat stb;
*inf = info;
inf->disk.number = dnum;
- inf->disk.raid_disk = dnum;
- if (inf->disk.raid_disk < s->raiddisks)
+ inf->disk.raid_disk = raid_disk_num++;
+
+ if (dv->disposition == 'j') {
+ inf->disk.raid_disk = MD_DISK_ROLE_JOURNAL;
+ inf->disk.state = (1<<MD_DISK_JOURNAL);
+ raid_disk_num--;
+ } else if (inf->disk.raid_disk < s->raiddisks)
inf->disk.state = (1<<MD_DISK_ACTIVE) |
(1<<MD_DISK_SYNC);
else
{"data-offset",1, 0, DataOffset},
{"nodes",1, 0, Nodes}, /* also for --assemble */
{"home-cluster",1, 0, ClusterName},
+ {"write-journal",1, 0, WriteJournal},
/* For assemble */
{"uuid", 1, 0, 'u'},
return (ev<<32)| sb->events_lo;
}
+struct r5l_payload_header {
+ __u16 type;
+ __u16 flags;
+} __attribute__ ((__packed__));
+
+enum r5l_payload_type {
+ R5LOG_PAYLOAD_DATA = 0,
+ R5LOG_PAYLOAD_PARITY = 1,
+ R5LOG_PAYLOAD_FLUSH = 2,
+};
+
+struct r5l_payload_data_parity {
+ struct r5l_payload_header header;
+ __u32 size; /* sector. data/parity size. each 4k has a checksum */
+ __u64 location; /* sector. For data, it's raid sector. For
+ parity, it's stripe sector */
+ __u32 checksum[];
+} __attribute__ ((__packed__));
+
+enum r5l_payload_data_parity_flag {
+ R5LOG_PAYLOAD_FLAG_DISCARD = 1, /* payload is discard */
+ /*
+ * RESHAPED/RESHAPING is only set when there is reshape activity. Note,
+ * both data/parity of a stripe should have the same flag set
+ *
+ * RESHAPED: reshape is running, and this stripe finished reshape
+ * RESHAPING: reshape is running, and this stripe isn't reshaped
+ * */
+ R5LOG_PAYLOAD_FLAG_RESHAPED = 2,
+ R5LOG_PAYLOAD_FLAG_RESHAPING = 3,
+};
+
+struct r5l_payload_flush {
+ struct r5l_payload_header header;
+ __u32 size; /* flush_stripes size, bytes */
+ __u64 flush_stripes[];
+} __attribute__ ((__packed__));
+
+enum r5l_payload_flush_flag {
+ R5LOG_PAYLOAD_FLAG_FLUSH_STRIPE = 1, /* data represents whole stripe */
+};
+
+struct r5l_meta_block {
+ __u32 magic;
+ __u32 checksum;
+ __u8 version;
+ __u8 __zero_pading_1;
+ __u16 __zero_pading_2;
+ __u32 meta_size; /* whole size of the block */
+
+ __u64 seq;
+ __u64 position; /* sector, start from rdev->data_offset, current position */
+ struct r5l_payload_header payloads[];
+} __attribute__ ((__packed__));
+
+#define R5LOG_VERSION 0x1
+#define R5LOG_MAGIC 0x6433c509
+
#endif
.require_homehost = 1,
};
struct shape s = {
+ .journaldisks = 0,
.level = UnSet,
.layout = UnSet,
.bitmap_chunk = UnSet,
case O(INCREMENTAL, IncrementalPath):
remove_path = optarg;
continue;
+ case O(CREATE, WriteJournal):
+ if (s.journaldisks) {
+ pr_err("Please specify only one journal device for the array.\n");
+ pr_err("Ignoring --write-journal %s...\n", optarg);
+ continue;
+ }
+ dv = xmalloc(sizeof(*dv));
+ dv->devname = optarg;
+ dv->disposition = 'j'; /* WriteJournal */
+ dv->used = 0;
+ dv->next = NULL;
+ *devlistend = dv;
+ devlistend = &dv->next;
+ devs_found++;
+
+ s.journaldisks = 1;
+ continue;
}
/* We have now processed all the valid options. Anything else is
* an error
exit(0);
}
+ if (s.journaldisks && (s.level < 4 || s.level > 6)) {
+ pr_err("--write-journal is only supported for RAID level 4/5/6.\n");
+ exit(2);
+ }
+
if (!mode && devs_found) {
mode = MISC;
devmode = 'Q';
Nodes,
ClusterName,
ClusterConfirm,
+ WriteJournal,
};
enum prefix_standard {
struct shape {
int raiddisks;
int sparedisks;
+ int journaldisks;
int level;
int layout;
char *layout_str;
__u64 data_offset; /* sector start of data, often 0 */
__u64 data_size; /* sectors in this device that can be used for data */
__u64 super_offset; /* sector start of this superblock */
- __u64 recovery_offset;/* sectors before this offset (from data_offset) have been recovered */
+ union {
+ __u64 recovery_offset;/* sectors before this offset (from data_offset) have been recovered */
+ __u64 journal_tail;/* journal tail of journal device (from data_offset) */
+ };
__u32 dev_number; /* permanent identifier of this device - not role in raid */
__u32 cnt_corrected_read; /* number of read errors that were corrected by re-writing */
__u8 device_uuid[16]; /* user-space setable, ignored by kernel */
if ((dk->state & 6) == 6) /* active, sync */
*rp = __cpu_to_le16(dk->raid_disk);
+ else if (dk->state & (1<<MD_DISK_JOURNAL))
+ *rp = MD_DISK_ROLE_JOURNAL;
else if ((dk->state & ~2) == 0) /* active or idle -> spare */
*rp = MD_DISK_ROLE_SPARE;
else
static void free_super1(struct supertype *st);
+#define META_BLOCK_SIZE 4096
+unsigned long crc32(
+ unsigned long crc,
+ const unsigned char *buf,
+ unsigned len);
+
+static int write_empty_r5l_meta_block(struct supertype *st, int fd)
+{
+ struct r5l_meta_block *mb;
+ struct mdp_superblock_1 *sb = st->sb;
+ struct align_fd afd;
+ __u32 crc;
+
+ init_afd(&afd, fd);
+
+ if (posix_memalign((void**)&mb, 4096, META_BLOCK_SIZE) != 0) {
+ pr_err("Could not allocate memory for the meta block.\n");
+ return 1;
+ }
+
+ memset(mb, 0, META_BLOCK_SIZE);
+
+ mb->magic = __cpu_to_le32(R5LOG_MAGIC);
+ mb->version = R5LOG_VERSION;
+ mb->meta_size = __cpu_to_le32(sizeof(struct r5l_meta_block));
+ mb->seq = __cpu_to_le64(random32());
+ mb->position = __cpu_to_le64(0);
+
+ crc = crc32(0xffffffff, sb->set_uuid, sizeof(sb->set_uuid));
+ crc = crc32(crc, (void *)mb, META_BLOCK_SIZE);
+ mb->checksum = __cpu_to_le32(crc);
+
+ if (lseek64(fd, (sb->data_offset) * 512, 0) < 0LL) {
+ pr_err("cannot seek to offset of the meta block\n");
+ goto fail_to_write;
+ }
+
+ if (awrite(&afd, mb, META_BLOCK_SIZE) != META_BLOCK_SIZE) {
+ pr_err("failed to store write the meta block \n");
+ goto fail_to_write;
+ }
+ fsync(fd);
+
+ free(mb);
+ return 0;
+
+fail_to_write:
+ free(mb);
+ return 1;
+}
+
#ifndef MDASSEMBLE
static int write_init_super1(struct supertype *st)
{
unsigned long long sb_offset;
unsigned long long data_offset;
+ for (di = st->info; di; di = di->next) {
+ if (di->disk.state & (1 << MD_DISK_JOURNAL))
+ sb->feature_map |= MD_FEATURE_JOURNAL;
+ }
+
for (di = st->info; di; di = di->next) {
if (di->disk.state & (1 << MD_DISK_FAULTY))
continue;
sb->sb_csum = calc_sb_1_csum(sb);
rv = store_super1(st, di->fd);
+
+ if (rv == 0 && (di->disk.state & (1 << MD_DISK_JOURNAL))) {
+ rv = write_empty_r5l_meta_block(st, di->fd);
+ if (rv)
+ goto error_out;
+ }
+
if (rv == 0 && (__le32_to_cpu(sb->feature_map) & 1))
rv = st->ss->write_bitmap(st, di->fd, NoUpdate);
close(di->fd);