X-Git-Url: http://git.ipfire.org/?a=blobdiff_plain;f=super1.c;h=4fef3789d39a00d0c8acd7b3461b9be20cf655cc;hb=2d2b0eb7b9d5603edb2e41e2c2860042fbbe8c92;hp=ba74a33126631f0058b916dc68a1cb83e41f96cd;hpb=7e6e839a265190e15742c4ecdd050aa1d9f208c6;p=thirdparty%2Fmdadm.git diff --git a/super1.c b/super1.c index ba74a331..4fef3789 100644 --- a/super1.c +++ b/super1.c @@ -1,7 +1,7 @@ /* * mdadm - manage Linux "md" devices aka RAID arrays. * - * Copyright (C) 2001-2009 Neil Brown + * Copyright (C) 2001-2016 Neil Brown * * * This program is free software; you can redistribute it and/or modify @@ -68,7 +68,10 @@ struct mdp_superblock_1 { __u64 data_offset; /* sector start of data, often 0 */ __u64 data_size; /* sectors in this device that can be used for data */ __u64 super_offset; /* sector start of this superblock */ - __u64 recovery_offset;/* sectors before this offset (from data_offset) have been recovered */ + union { + __u64 recovery_offset;/* sectors before this offset (from data_offset) have been recovered */ + __u64 journal_tail;/* journal tail of journal device (from data_offset) */ + }; __u32 dev_number; /* permanent identifier of this device - not role in raid */ __u32 cnt_corrected_read; /* number of read errors that were corrected by re-writing */ __u8 device_uuid[16]; /* user-space setable, ignored by kernel */ @@ -125,6 +128,8 @@ struct misc_dev_info { * backwards anyway. */ #define MD_FEATURE_NEW_OFFSET 64 /* new_offset must be honoured */ +#define MD_FEATURE_BITMAP_VERSIONED 256 /* bitmap version number checked properly */ +#define MD_FEATURE_JOURNAL 512 /* support write journal */ #define MD_FEATURE_ALL (MD_FEATURE_BITMAP_OFFSET \ |MD_FEATURE_RECOVERY_OFFSET \ |MD_FEATURE_RESHAPE_ACTIVE \ @@ -132,8 +137,25 @@ struct misc_dev_info { |MD_FEATURE_REPLACEMENT \ |MD_FEATURE_RESHAPE_BACKWARDS \ |MD_FEATURE_NEW_OFFSET \ + |MD_FEATURE_BITMAP_VERSIONED \ + |MD_FEATURE_JOURNAL \ ) +#ifndef MDASSEMBLE +static int role_from_sb(struct mdp_superblock_1 *sb) +{ + unsigned int d; + int role; + + d = __le32_to_cpu(sb->dev_number); + if (d < __le32_to_cpu(sb->max_dev)) + role = __le16_to_cpu(sb->dev_roles[d]); + else + role = MD_DISK_ROLE_SPARE; + return role; +} +#endif + /* return how many bytes are needed for bitmap, for cluster-md each node * should have it's own bitmap */ static unsigned int calc_bitmap_size(bitmap_super_t *bms, unsigned int boundary) @@ -304,8 +326,8 @@ static void examine_super1(struct supertype *st, char *homehost) strncmp(sb->set_name, homehost, l) == 0) printf(" (local to host %s)", homehost); printf("\n"); - if (bms->nodes > 0) - printf("Cluster Name : %s", bms->cluster_name); + if (bms->nodes > 0 && (__le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET)) + printf(" Cluster Name : %-64s\n", bms->cluster_name); atime = __le64_to_cpu(sb->ctime) & 0xFFFFFFFFFFULL; printf(" Creation Time : %.24s\n", ctime(&atime)); c=map_num(pers, __le32_to_cpu(sb->level)); @@ -463,25 +485,23 @@ static void examine_super1(struct supertype *st, char *homehost) /* This turns out to just be confusing */ printf(" Array Slot : %d (", __le32_to_cpu(sb->dev_number)); for (i= __le32_to_cpu(sb->max_dev); i> 0 ; i--) - if (__le16_to_cpu(sb->dev_roles[i-1]) != 0xffff) + if (__le16_to_cpu(sb->dev_roles[i-1]) != MD_DISK_ROLE_SPARE) break; for (d=0; d < i; d++) { int role = __le16_to_cpu(sb->dev_roles[d]); if (d) printf(", "); - if (role == 0xffff) printf("empty"); - else if(role == 0xfffe) printf("failed"); + if (role == MD_DISK_ROLE_SPARE) printf("empty"); + else if(role == MD_DISK_ROLE_FAULTY) printf("failed"); else printf("%d", role); } printf(")\n"); #endif printf(" Device Role : "); - d = __le32_to_cpu(sb->dev_number); - if (d < __le32_to_cpu(sb->max_dev)) - role = __le16_to_cpu(sb->dev_roles[d]); - else - role = 0xFFFF; - if (role >= 0xFFFE) + role = role_from_sb(sb); + if (role >= MD_DISK_ROLE_FAULTY) printf("spare\n"); + else if (role == MD_DISK_ROLE_JOURNAL) + printf("Journal\n"); else if (sb->feature_map & __cpu_to_le32(MD_FEATURE_REPLACEMENT)) printf("Replacement device %d\n", role); else @@ -510,7 +530,7 @@ static void examine_super1(struct supertype *st, char *homehost) faulty = 0; for (i=0; i< __le32_to_cpu(sb->max_dev); i++) { int role = __le16_to_cpu(sb->dev_roles[i]); - if (role == 0xFFFE) + if (role == MD_DISK_ROLE_FAULTY) faulty++; } if (faulty) printf(" %d failed", faulty); @@ -698,12 +718,8 @@ static int copy_metadata1(struct supertype *st, int from, int to) /* have the header, can calculate * correct bitmap bytes */ bitmap_super_t *bms; - int bits; bms = (void*)buf; - bits = __le64_to_cpu(bms->sync_size) / (__le32_to_cpu(bms->chunksize)>>9); - bytes = (bits+7) >> 3; - bytes += sizeof(bitmap_super_t); - bytes = ROUND_UP(bytes, 512); + bytes = calc_bitmap_size(bms, 512); if (n > bytes) n = bytes; } @@ -714,12 +730,12 @@ static int copy_metadata1(struct supertype *st, int from, int to) } if (super.bblog_size != 0 && - __le32_to_cpu(super.bblog_size) <= 100 && + __le16_to_cpu(super.bblog_size) <= 100 && super.bblog_offset != 0 && (super.feature_map & __le32_to_cpu(MD_FEATURE_BAD_BLOCKS))) { /* There is a bad block log */ unsigned long long bb_offset = sb_offset; - int bytes = __le32_to_cpu(super.bblog_size) * 512; + int bytes = __le16_to_cpu(super.bblog_size) * 512; int written = 0; struct align_fd afrom, ato; @@ -766,8 +782,8 @@ static void detail_super1(struct supertype *st, char *homehost) sb->set_name[l] == ':' && strncmp(sb->set_name, homehost, l) == 0) printf(" (local to host %s)", homehost); - if (bms->nodes > 0) - printf("Cluster Name : %64s", bms->cluster_name); + if (bms->nodes > 0 && (__le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET)) + printf("\n Cluster Name : %-64s", bms->cluster_name); printf("\n UUID : "); for (i=0; i<16; i++) { if ((i&3)==0 && i != 0) printf(":"); @@ -816,7 +832,7 @@ static int examine_badblocks_super1(struct supertype *st, int fd, char *devname) __u64 *bbl, *bbp; int i; - if (!sb->bblog_size || __le32_to_cpu(sb->bblog_size) > 100 + if (!sb->bblog_size || __le16_to_cpu(sb->bblog_size) > 100 || !sb->bblog_offset){ printf("No bad-blocks list configured on %s\n", devname); return 0; @@ -827,7 +843,7 @@ static int examine_badblocks_super1(struct supertype *st, int fd, char *devname) return 0; } - size = __le32_to_cpu(sb->bblog_size)* 512; + size = __le16_to_cpu(sb->bblog_size)* 512; if (posix_memalign((void**)&bbl, 4096, size) != 0) { pr_err("could not allocate badblocks list\n"); return 0; @@ -924,7 +940,7 @@ static void getinfo_super1(struct supertype *st, struct mdinfo *info, char *map) info->disk.number = __le32_to_cpu(sb->dev_number); if (__le32_to_cpu(sb->dev_number) >= __le32_to_cpu(sb->max_dev) || __le32_to_cpu(sb->dev_number) >= MAX_DEVS) - role = 0xfffe; + role = MD_DISK_ROLE_FAULTY; else role = __le16_to_cpu(sb->dev_roles[__le32_to_cpu(sb->dev_number)]); @@ -965,12 +981,12 @@ static void getinfo_super1(struct supertype *st, struct mdinfo *info, char *map) size /= 512; bmend += size; if (bmend > earliest) - bmend = earliest; + earliest = bmend; } if (sb->bblog_offset && sb->bblog_size) { unsigned long long bbend = super_offset; bbend += (int32_t)__le32_to_cpu(sb->bblog_offset); - bbend += __le32_to_cpu(sb->bblog_size); + bbend += __le16_to_cpu(sb->bblog_size); if (bbend > earliest) earliest = bbend; } @@ -991,12 +1007,17 @@ static void getinfo_super1(struct supertype *st, struct mdinfo *info, char *map) info->disk.raid_disk = -1; switch(role) { - case 0xFFFF: + case MD_DISK_ROLE_SPARE: info->disk.state = 0; /* spare: not active, not sync, not faulty */ break; - case 0xFFFE: + case MD_DISK_ROLE_FAULTY: info->disk.state = 1; /* faulty */ break; + case MD_DISK_ROLE_JOURNAL: + info->disk.state = (1 << MD_DISK_JOURNAL); + info->disk.raid_disk = role; + info->space_after = (misc->device_size - info->data_offset) % 8; /* journal uses all 4kB blocks*/ + break; default: info->disk.state = 6; /* active and in sync */ info->disk.raid_disk = role; @@ -1044,7 +1065,7 @@ static void getinfo_super1(struct supertype *st, struct mdinfo *info, char *map) map[i] = 0; for (i = 0; i < __le32_to_cpu(sb->max_dev); i++) { role = __le16_to_cpu(sb->dev_roles[i]); - if (/*role == 0xFFFF || */role < (unsigned) info->array.raid_disks) { + if (/*role == MD_DISK_ROLE_SPARE || */role < (unsigned) info->array.raid_disks) { working++; if (map && role < map_disks) map[role] = 1; @@ -1052,6 +1073,9 @@ static void getinfo_super1(struct supertype *st, struct mdinfo *info, char *map) } info->array.working_disks = working; + if (sb->feature_map & __le32_to_cpu(MD_FEATURE_JOURNAL)) + info->journal_device_required = 1; + info->journal_clean = 0; } static struct mdinfo *container_content1(struct supertype *st, char *subarray) @@ -1076,7 +1100,18 @@ static int update_super1(struct supertype *st, struct mdinfo *info, * ignored. */ int rv = 0; + int lockid; struct mdp_superblock_1 *sb = st->sb; + bitmap_super_t *bms = (bitmap_super_t*)(((char*)sb) + MAX_SB_SIZE); + + if (bms->version == BITMAP_MAJOR_CLUSTERED && dlm_funs_ready()) { + rv = cluster_get_dlmlock(&lockid); + if (rv) { + pr_err("Cannot get dlmlock in %s return %d\n", __func__, rv); + cluster_release_dlmlock(lockid); + return rv; + } + } if (strcmp(update, "homehost") == 0 && homehost) { @@ -1116,8 +1151,10 @@ static int update_super1(struct supertype *st, struct mdinfo *info, int want; if (info->disk.state & (1<disk.raid_disk; + else if (info->disk.state & (1<dev_roles[d] != __cpu_to_le16(want)) { sb->dev_roles[d] = __cpu_to_le16(want); rv = 1; @@ -1138,24 +1175,18 @@ static int update_super1(struct supertype *st, struct mdinfo *info, } } else if (strcmp(update, "linear-grow-new") == 0) { unsigned int i; - int rfd, fd; + int fd; unsigned int max = __le32_to_cpu(sb->max_dev); for (i=0 ; i < max ; i++) - if (__le16_to_cpu(sb->dev_roles[i]) >= 0xfffe) + if (__le16_to_cpu(sb->dev_roles[i]) >= MD_DISK_ROLE_FAULTY) break; sb->dev_number = __cpu_to_le32(i); info->disk.number = i; if (max >= __le32_to_cpu(sb->max_dev)) sb->max_dev = __cpu_to_le32(max+1); - if ((rfd = open("/dev/urandom", O_RDONLY)) < 0 || - read(rfd, sb->device_uuid, 16) != 16) { - __u32 r[4] = {random(), random(), random(), random()}; - memcpy(sb->device_uuid, r, 16); - } - if (rfd >= 0) - close(rfd); + random_uuid(sb->device_uuid); sb->dev_roles[i] = __cpu_to_le16(info->disk.raid_disk); @@ -1247,6 +1278,11 @@ static int update_super1(struct supertype *st, struct mdinfo *info, sb->bblog_shift = 0; sb->bblog_offset = 0; } + } else if (strcmp(update, "force-no-bbl") == 0) { + sb->feature_map &= ~ __cpu_to_le32(MD_FEATURE_BAD_BLOCKS); + sb->bblog_size = 0; + sb->bblog_shift = 0; + sb->bblog_offset = 0; } else if (strcmp(update, "name") == 0) { if (info->name[0] == 0) sprintf(info->name, "%d", info->array.md_minor); @@ -1258,7 +1294,7 @@ static int update_super1(struct supertype *st, struct mdinfo *info, strcat(sb->set_name, ":"); strcat(sb->set_name, info->name); } else - strcpy(sb->set_name, info->name); + strncpy(sb->set_name, info->name, sizeof(sb->set_name)); } else if (strcmp(update, "devicesize") == 0 && __le64_to_cpu(sb->super_offset) < __le64_to_cpu(sb->data_offset)) { @@ -1267,7 +1303,7 @@ static int update_super1(struct supertype *st, struct mdinfo *info, (st->sb + MAX_SB_SIZE + BM_SUPER_SIZE); sb->data_size = __cpu_to_le64( misc->device_size - __le64_to_cpu(sb->data_offset)); - } else if (strcmp(update, "revert-reshape") == 0) { + } else if (strncmp(update, "revert-reshape", 14) == 0) { rv = -2; if (!(sb->feature_map & __cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE))) pr_err("No active reshape to revert on %s\n", @@ -1277,6 +1313,24 @@ static int update_super1(struct supertype *st, struct mdinfo *info, unsigned long long reshape_sectors; long reshape_chunk; rv = 0; + /* If the reshape hasn't started, just stop it. + * It is conceivable that a stripe was modified but + * the metadata not updated. In that case the backup + * should have been used to get passed the critical stage. + * If that couldn't happen, the "-nobackup" version + * will be used. + */ + if (strcmp(update, "revert-reshape-nobackup") == 0 && + sb->reshape_position == 0 && + (__le32_to_cpu(sb->delta_disks) > 0 || + (__le32_to_cpu(sb->delta_disks) == 0 && + !(sb->feature_map & __cpu_to_le32(MD_FEATURE_RESHAPE_BACKWARDS))))) { + sb->feature_map &= ~__cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE); + sb->raid_disks = __cpu_to_le32(__le32_to_cpu(sb->raid_disks) - + __le32_to_cpu(sb->delta_disks)); + sb->delta_disks = 0; + goto done; + } /* reshape_position is a little messy. * Its value must be a multiple of the larger * chunk size, and of the "after" data disks. @@ -1323,6 +1377,7 @@ static int update_super1(struct supertype *st, struct mdinfo *info, sb->new_offset = __cpu_to_le32(-offset_delta); sb->data_size = __cpu_to_le64(__le64_to_cpu(sb->data_size) - offset_delta); } + done:; } } else if (strcmp(update, "_reshape_progress")==0) sb->reshape_position = __cpu_to_le64(info->reshape_progress); @@ -1334,6 +1389,9 @@ static int update_super1(struct supertype *st, struct mdinfo *info, rv = -1; sb->sb_csum = calc_sb_1_csum(sb); + if (bms->version == BITMAP_MAJOR_CLUSTERED && dlm_funs_ready()) + cluster_release_dlmlock(lockid); + return rv; } @@ -1343,7 +1401,6 @@ static int init_super1(struct supertype *st, mdu_array_info_t *info, { struct mdp_superblock_1 *sb; int spares; - int rfd; char defname[10]; int sbsize; @@ -1373,14 +1430,8 @@ static int init_super1(struct supertype *st, mdu_array_info_t *info, if (uuid) copy_uuid(sb->set_uuid, uuid, super1.swapuuid); - else { - if ((rfd = open("/dev/urandom", O_RDONLY)) < 0 || - read(rfd, sb->set_uuid, 16) != 16) { - __u32 r[4] = {random(), random(), random(), random()}; - memcpy(sb->set_uuid, r, 16); - } - if (rfd >= 0) close(rfd); - } + else + random_uuid(sb->set_uuid);; if (name == NULL || *name == 0) { sprintf(defname, "%d", info->md_minor); @@ -1393,7 +1444,7 @@ static int init_super1(struct supertype *st, mdu_array_info_t *info, strcat(sb->set_name, ":"); strcat(sb->set_name, name); } else - strcpy(sb->set_name, name); + strncpy(sb->set_name, name, sizeof(sb->set_name)); sb->ctime = __cpu_to_le64((unsigned long long)time(0)); sb->level = __cpu_to_le32(info->level); @@ -1437,13 +1488,26 @@ static int add_to_super1(struct supertype *st, mdu_disk_info_t *dk, struct mdp_superblock_1 *sb = st->sb; __u16 *rp = sb->dev_roles + dk->number; struct devinfo *di, **dip; + bitmap_super_t *bms = (bitmap_super_t*)(((char*)sb) + MAX_SB_SIZE); + int rv, lockid; + + if (bms->version == BITMAP_MAJOR_CLUSTERED && dlm_funs_ready()) { + rv = cluster_get_dlmlock(&lockid); + if (rv) { + pr_err("Cannot get dlmlock in %s return %d\n", __func__, rv); + cluster_release_dlmlock(lockid); + return rv; + } + } if ((dk->state & 6) == 6) /* active, sync */ *rp = __cpu_to_le16(dk->raid_disk); + else if (dk->state & (1<state & ~2) == 0) /* active or idle -> spare */ - *rp = 0xffff; + *rp = MD_DISK_ROLE_SPARE; else - *rp = 0xfffe; + *rp = MD_DISK_ROLE_FAULTY; if (dk->number >= (int)__le32_to_cpu(sb->max_dev) && __le32_to_cpu(sb->max_dev) < MAX_DEVS) @@ -1464,11 +1528,14 @@ static int add_to_super1(struct supertype *st, mdu_disk_info_t *dk, di->next = NULL; *dip = di; + if (bms->version == BITMAP_MAJOR_CLUSTERED && dlm_funs_ready()) + cluster_release_dlmlock(lockid); + return 0; } #endif -static void locate_bitmap1(struct supertype *st, int fd); +static int locate_bitmap1(struct supertype *st, int fd, int node_num); static int store_super1(struct supertype *st, int fd) { @@ -1477,6 +1544,17 @@ static int store_super1(struct supertype *st, int fd) struct align_fd afd; int sbsize; unsigned long long dsize; + bitmap_super_t *bms = (bitmap_super_t*)(((char*)sb) + MAX_SB_SIZE); + int rv, lockid; + + if (bms->version == BITMAP_MAJOR_CLUSTERED && dlm_funs_ready()) { + rv = cluster_get_dlmlock(&lockid); + if (rv) { + pr_err("Cannot get dlmlock in %s return %d\n", __func__, rv); + cluster_release_dlmlock(lockid); + return rv; + } + } if (!get_dev_size(fd, NULL, &dsize)) return 1; @@ -1531,12 +1609,15 @@ static int store_super1(struct supertype *st, int fd) struct bitmap_super_s *bm = (struct bitmap_super_s*) (((char*)sb)+MAX_SB_SIZE); if (__le32_to_cpu(bm->magic) == BITMAP_MAGIC) { - locate_bitmap1(st, fd); + locate_bitmap1(st, fd, 0); if (awrite(&afd, bm, sizeof(*bm)) != sizeof(*bm)) return 5; } } fsync(fd); + if (bms->version == BITMAP_MAJOR_CLUSTERED && dlm_funs_ready()) + cluster_release_dlmlock(lockid); + return 0; } @@ -1549,7 +1630,8 @@ static unsigned long choose_bm_space(unsigned long devsize) * NOTE: result must be multiple of 4K else bad things happen * on 4K-sector devices. */ - if (devsize < 64*2) return 0; + if (devsize < 64*2) + return 0; if (devsize - 64*2 >= 200*1024*1024*2) return 128*2; if (devsize - 4*2 > 8*1024*1024*2) @@ -1559,12 +1641,59 @@ static unsigned long choose_bm_space(unsigned long devsize) static void free_super1(struct supertype *st); +#define META_BLOCK_SIZE 4096 +__u32 crc32c_le(__u32 crc, unsigned char const *p, size_t len); + #ifndef MDASSEMBLE +static int write_empty_r5l_meta_block(struct supertype *st, int fd) +{ + struct r5l_meta_block *mb; + struct mdp_superblock_1 *sb = st->sb; + struct align_fd afd; + __u32 crc; + + init_afd(&afd, fd); + + if (posix_memalign((void**)&mb, 4096, META_BLOCK_SIZE) != 0) { + pr_err("Could not allocate memory for the meta block.\n"); + return 1; + } + + memset(mb, 0, META_BLOCK_SIZE); + + mb->magic = __cpu_to_le32(R5LOG_MAGIC); + mb->version = R5LOG_VERSION; + mb->meta_size = __cpu_to_le32(sizeof(struct r5l_meta_block)); + mb->seq = __cpu_to_le64(random32()); + mb->position = __cpu_to_le64(0); + + crc = crc32c_le(0xffffffff, sb->set_uuid, sizeof(sb->set_uuid)); + crc = crc32c_le(crc, (void *)mb, META_BLOCK_SIZE); + mb->checksum = crc; + + if (lseek64(fd, (sb->data_offset) * 512, 0) < 0LL) { + pr_err("cannot seek to offset of the meta block\n"); + goto fail_to_write; + } + + if (awrite(&afd, mb, META_BLOCK_SIZE) != META_BLOCK_SIZE) { + pr_err("failed to store write the meta block \n"); + goto fail_to_write; + } + fsync(fd); + + free(mb); + return 0; + +fail_to_write: + free(mb); + return 1; +} + static int write_init_super1(struct supertype *st) { struct mdp_superblock_1 *sb = st->sb; struct supertype *refst; - int rfd; int rv = 0; unsigned long long bm_space; struct devinfo *di; @@ -1572,6 +1701,11 @@ static int write_init_super1(struct supertype *st) unsigned long long sb_offset; unsigned long long data_offset; + for (di = st->info; di; di = di->next) { + if (di->disk.state & (1 << MD_DISK_JOURNAL)) + sb->feature_map |= MD_FEATURE_JOURNAL; + } + for (di = st->info; di; di = di->next) { if (di->disk.state & (1 << MD_DISK_FAULTY)) continue; @@ -1587,15 +1721,10 @@ static int write_init_super1(struct supertype *st) else sb->devflags &= ~WriteMostly1; - if ((rfd = open("/dev/urandom", O_RDONLY)) < 0 || - read(rfd, sb->device_uuid, 16) != 16) { - __u32 r[4] = {random(), random(), random(), random()}; - memcpy(sb->device_uuid, r, 16); - } - if (rfd >= 0) - close(rfd); + random_uuid(sb->device_uuid); - sb->events = 0; + if (!(di->disk.state & (1<events = 0; refst = dup_super(st); if (load_super1(refst, di->fd, NULL)==0) { @@ -1703,15 +1832,23 @@ static int write_init_super1(struct supertype *st) rv = -EINVAL; goto out; } - if (conf_get_create_info()->bblist == 0) { + /* Disable badblock log on clusters, or when explicitly requested */ + if (st->nodes > 0 || conf_get_create_info()->bblist == 0) { sb->bblog_size = 0; sb->bblog_offset = 0; } sb->sb_csum = calc_sb_1_csum(sb); rv = store_super1(st, di->fd); + + if (rv == 0 && (di->disk.state & (1 << MD_DISK_JOURNAL))) { + rv = write_empty_r5l_meta_block(st, di->fd); + if (rv) + goto error_out; + } + if (rv == 0 && (__le32_to_cpu(sb->feature_map) & 1)) - rv = st->ss->write_bitmap(st, di->fd, NoUpdate); + rv = st->ss->write_bitmap(st, di->fd, NodeNumUpdate); close(di->fd); di->fd = -1; if (rv) @@ -1860,6 +1997,8 @@ static int load_super1(struct supertype *st, int fd, char *devname) return 1; } + memset(super, 0, SUPER1_SIZE); + if (aread(&afd, super, MAX_SB_SIZE) != MAX_SB_SIZE) { if (devname) pr_err("Cannot read superblock on %s\n", @@ -1906,7 +2045,7 @@ static int load_super1(struct supertype *st, int fd, char *devname) * valid. If it doesn't clear the bit. An --assemble --force * should get that written out. */ - locate_bitmap1(st, fd); + locate_bitmap1(st, fd, 0); if (aread(&afd, bsb, 512) != 512) goto no_bitmap; @@ -2045,6 +2184,7 @@ add_internal_bitmap1(struct supertype *st, unsigned long long chunk = *chunkp; int room = 0; int creating = 0; + int len; struct mdp_superblock_1 *sb = st->sb; bitmap_super_t *bms = (bitmap_super_t*)(((char*)sb) + MAX_SB_SIZE); int uuid[4]; @@ -2111,7 +2251,7 @@ add_internal_bitmap1(struct supertype *st, } break; default: - return 0; + return -ENOSPC; } room -= bbl_size; @@ -2121,7 +2261,7 @@ add_internal_bitmap1(struct supertype *st, if (room <= 1) /* No room for a bitmap */ - return 0; + return -ENOSPC; max_bits = (room * 512 - sizeof(bitmap_super_t)) * 8; @@ -2139,9 +2279,9 @@ add_internal_bitmap1(struct supertype *st, if (chunk < 64*1024*1024) chunk = 64*1024*1024; } else if (chunk < min_chunk) - return 0; /* chunk size too small */ + return -EINVAL; /* chunk size too small */ if (chunk == 0) /* rounding problem */ - return 0; + return -EINVAL; if (offset == 0) { /* start bitmap on a 4K boundary with enough space for @@ -2167,32 +2307,43 @@ add_internal_bitmap1(struct supertype *st, bms->sync_size = __cpu_to_le64(size); bms->write_behind = __cpu_to_le32(write_behind); bms->nodes = __cpu_to_le32(st->nodes); - if (st->cluster_name) - strncpy((char *)bms->cluster_name, - st->cluster_name, strlen(st->cluster_name)); + if (st->nodes) + sb->feature_map = __cpu_to_le32(__le32_to_cpu(sb->feature_map) + | MD_FEATURE_BITMAP_VERSIONED); + if (st->cluster_name) { + len = sizeof(bms->cluster_name); + strncpy((char *)bms->cluster_name, st->cluster_name, len); + bms->cluster_name[len - 1] = '\0'; + } *chunkp = chunk; - return 1; + return 0; } -static void locate_bitmap1(struct supertype *st, int fd) +static int locate_bitmap1(struct supertype *st, int fd, int node_num) { unsigned long long offset; struct mdp_superblock_1 *sb; int mustfree = 0; + int ret; if (!st->sb) { if (st->ss->load_super(st, fd, NULL)) - return; /* no error I hope... */ + return -1; /* no error I hope... */ mustfree = 1; } sb = st->sb; + if ((__le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET)) + ret = 0; + else + ret = -1; offset = __le64_to_cpu(sb->super_offset); - offset += (int32_t) __le32_to_cpu(sb->bitmap_offset); + offset += (int32_t) __le32_to_cpu(sb->bitmap_offset) * (node_num + 1); if (mustfree) free(sb); lseek64(fd, offset<<9, 0); + return ret; } static int write_bitmap1(struct supertype *st, int fd, enum bitmap_update update) @@ -2201,7 +2352,7 @@ static int write_bitmap1(struct supertype *st, int fd, enum bitmap_update update bitmap_super_t *bms = (bitmap_super_t*)(((char*)sb)+MAX_SB_SIZE); int rv = 0; void *buf; - int towrite, n; + int towrite, n, len; struct align_fd afd; unsigned int i = 0; unsigned long long total_bm_space, bm_space_per_node; @@ -2210,17 +2361,41 @@ static int write_bitmap1(struct supertype *st, int fd, enum bitmap_update update case NameUpdate: /* update cluster name */ if (st->cluster_name) { - memset((char *)bms->cluster_name, 0, sizeof(bms->cluster_name)); - strncpy((char *)bms->cluster_name, st->cluster_name, 64); + len = sizeof(bms->cluster_name); + memset((char *)bms->cluster_name, 0, len); + strncpy((char *)bms->cluster_name, + st->cluster_name, len); + bms->cluster_name[len - 1] = '\0'; } break; case NodeNumUpdate: /* cluster md only supports superblock 1.2 now */ - if (st->minor_version != 2) { + if (st->minor_version != 2 && bms->version == BITMAP_MAJOR_CLUSTERED) { pr_err("Warning: cluster md only works with superblock 1.2\n"); return -EINVAL; } + if (bms->version == BITMAP_MAJOR_CLUSTERED) { + if (st->nodes == 1) { + /* the parameter for nodes is not valid */ + pr_err("Warning: cluster-md at least needs two nodes\n"); + return -EINVAL; + } else if (st->nodes == 0) + /* --nodes is not specified */ + break; + else if (__cpu_to_le32(st->nodes) < bms->nodes) { + /* Since the nodes num is not increased, no need to check the space + * is enough or not, just update bms->nodes */ + bms->nodes = __cpu_to_le32(st->nodes); + break; + } + } else { + /* no need to change bms->nodes for other bitmap types */ + if (st->nodes) + pr_err("Warning: --nodes option is only suitable for clustered bitmap\n"); + break; + } + /* Each node has an independent bitmap, it is necessary to calculate the * space is enough or not, first get how many bytes for the total bitmap */ bm_space_per_node = calc_bitmap_size(bms, 4096); @@ -2243,7 +2418,7 @@ static int write_bitmap1(struct supertype *st, int fd, enum bitmap_update update init_afd(&afd, fd); - locate_bitmap1(st, fd); + locate_bitmap1(st, fd, 0); if (posix_memalign(&buf, 4096, 4096)) return -ENOMEM; @@ -2258,11 +2433,15 @@ static int write_bitmap1(struct supertype *st, int fd, enum bitmap_update update memset(buf, 0xff, 4096); memcpy(buf, (char *)bms, sizeof(bitmap_super_t)); - towrite = __le64_to_cpu(bms->sync_size) / (__le32_to_cpu(bms->chunksize)>>9); - towrite = (towrite+7) >> 3; /* bits to bytes */ - towrite += sizeof(bitmap_super_t); - /* we need the bitmaps to be at 4k boundary */ - towrite = ROUND_UP(towrite, 4096); + /* + * use 4096 boundary if bitmap_offset is aligned + * with 8 sectors, then it should compatible with + * older mdadm. + */ + if (__le32_to_cpu(sb->bitmap_offset) & 7) + towrite = calc_bitmap_size(bms, 512); + else + towrite = calc_bitmap_size(bms, 4096); while (towrite > 0) { n = towrite; if (n > 4096) @@ -2290,6 +2469,7 @@ static int write_bitmap1(struct supertype *st, int fd, enum bitmap_update update static void free_super1(struct supertype *st) { + if (st->sb) free(st->sb); while (st->info) { @@ -2405,7 +2585,6 @@ void *super1_make_v0(struct supertype *st, struct mdinfo *info, mdp_super_t *sb0 void *ret; struct mdp_superblock_1 *sb; int i; - int rfd; unsigned long long offset; if (posix_memalign(&ret, 4096, 1024) != 0) @@ -2437,17 +2616,11 @@ void *super1_make_v0(struct supertype *st, struct mdinfo *info, mdp_super_t *sb0 sb->super_offset = __cpu_to_le64(offset); //*(__u64*)(st->other + 128 + 8 + 8) = __cpu_to_le64(offset); - if ((rfd = open("/dev/urandom", O_RDONLY)) < 0 || - read(rfd, sb->device_uuid, 16) != 16) { - __u32 r[4] = {random(), random(), random(), random()}; - memcpy(sb->device_uuid, r, 16); - } - if (rfd >= 0) - close(rfd); + random_uuid(sb->device_uuid); for (i = 0; i < MD_SB_DISKS; i++) { int state = sb0->disks[i].state; - sb->dev_roles[i] = 0xFFFF; + sb->dev_roles[i] = MD_DISK_ROLE_SPARE; if ((state & (1<dev_roles[i] = __cpu_to_le16(sb0->disks[i].raid_disk);