X-Git-Url: http://git.ipfire.org/?a=blobdiff_plain;f=Grow.c;h=c4f417ee283527c86f7344a4df7396c0541625ab;hb=bf08f6b1efed94b80c4cfb433db7202b6b7f794c;hp=327764919d0f8e1fa5decf80d33fc5e7ddb2239c;hpb=4abcbc21b90f614ec8eeb2da6eafffcb8ed10be7;p=thirdparty%2Fmdadm.git diff --git a/Grow.c b/Grow.c old mode 100644 new mode 100755 index 32776491..c4f417ee --- a/Grow.c +++ b/Grow.c @@ -1,7 +1,7 @@ /* * mdadm - manage Linux "md" devices aka RAID arrays. * - * Copyright (C) 2001-2012 Neil Brown + * Copyright (C) 2001-2013 Neil Brown * * * This program is free software; you can redistribute it and/or modify @@ -24,6 +24,10 @@ #include "mdadm.h" #include "dlink.h" #include +#include +#include +#include +#include #if ! defined(__BIG_ENDIAN) && ! defined(__LITTLE_ENDIAN) #error no endian defined @@ -31,15 +35,11 @@ #include "md_u.h" #include "md_p.h" -#ifndef offsetof -#define offsetof(t,f) ((size_t)&(((t*)0)->f)) -#endif - int restore_backup(struct supertype *st, struct mdinfo *content, int working_disks, int next_spare, - char *backup_file, + char **backup_filep, int verbose) { int i; @@ -47,10 +47,12 @@ int restore_backup(struct supertype *st, struct mdinfo *dev; int err; int disk_count = next_spare + working_disks; + char *backup_file = *backup_filep; dprintf("Called restore_backup()\n"); fdlist = xmalloc(sizeof(int) * disk_count); + enable_fds(next_spare); for (i = 0; i < next_spare; i++) fdlist[i] = -1; for (dev = content->devs; dev; dev = dev->next) { @@ -67,6 +69,11 @@ int restore_backup(struct supertype *st, fdlist[next_spare++] = fd; } + if (!backup_file) { + backup_file = locate_backup(content->sys_name); + *backup_filep = backup_file; + } + if (st->ss->external && st->ss->recover_backup) err = st->ss->recover_backup(st, content); else @@ -80,11 +87,9 @@ int restore_backup(struct supertype *st, } free(fdlist); if (err) { - pr_err("Failed to restore critical" - " section for reshape - sorry.\n"); + pr_err("Failed to restore critical section for reshape - sorry.\n"); if (!backup_file) - pr_err("Possibly you need" - " to specify a --backup-file\n"); + pr_err("Possibly you need to specify a --backup-file\n"); return 1; } @@ -288,12 +293,13 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s) if (vers < 9003) { major = BITMAP_MAJOR_HOSTENDIAN; - pr_err("Warning - bitmaps created on this kernel" - " are not portable\n" - " between different architectures. Consider upgrading" - " the Linux kernel.\n"); + pr_err("Warning - bitmaps created on this kernel are not portable\n" + " between different architectures. Consider upgrading the Linux kernel.\n"); } + if (s->bitmap_file && strcmp(s->bitmap_file, "clustered") == 0) + major = BITMAP_MAJOR_CLUSTERED; + if (ioctl(fd, GET_BITMAP_FILE, &bmf) != 0) { if (errno == ENOMEM) pr_err("Memory allocation failure.\n"); @@ -322,13 +328,15 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s) if (strcmp(s->bitmap_file, "none")==0) { array.state &= ~(1<bitmap_file, "internal") == 0) { + if (strcmp(s->bitmap_file, "internal") == 0 || + strcmp(s->bitmap_file, "clustered") == 0) { int rv; int d; int offset_setable = 0; struct mdinfo *mdi; if (st->ss->add_internal_bitmap == NULL) { - pr_err("Internal bitmaps not supported " - "with %s metadata\n", st->ss->name); + pr_err("Internal bitmaps not supported with %s metadata\n", st->ss->name); return 1; } + st->nodes = c->nodes; + st->cluster_name = c->homecluster; mdi = sysfs_read(fd, NULL, GET_BITMAP_LOCATION); if (mdi) offset_setable = 1; @@ -408,10 +418,9 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s) bitmapsize, offset_setable, major) ) - st->ss->write_bitmap(st, fd2); + st->ss->write_bitmap(st, fd2, NoUpdate); else { - pr_err("failed to create internal bitmap" - " - chunksize problem.\n"); + pr_err("failed to create internal bitmap - chunksize problem.\n"); close(fd2); return 1; } @@ -425,13 +434,14 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s) rv = sysfs_set_num_signed(mdi, NULL, "bitmap/location", mdi->bitmap_offset); } else { + if (strcmp(s->bitmap_file, "clustered") == 0) + array.state |= (1<devnm, GET_VERSION); + char buf[20]; - if (sra) + if (sra && + sysfs_get_str(sra, NULL, "sync_action", buf, 20) > 0 + && strcmp(buf, "frozen\n") == 0) sysfs_set_str(sra, NULL, "sync_action", "idle"); sysfs_free(sra); } @@ -624,13 +636,9 @@ static void wait_reshape(struct mdinfo *sra) if (fd < 0) return; - while (sysfs_fd_get_str(fd, action, 20) > 0 && - strncmp(action, "reshape", 7) == 0) { - fd_set rfds; - FD_ZERO(&rfds); - FD_SET(fd, &rfds); - select(fd+1, NULL, NULL, &rfds, NULL); - } + while (sysfs_fd_get_str(fd, action, 20) > 0 && + strncmp(action, "reshape", 7) == 0) + sysfs_wait(fd, NULL); close(fd); } @@ -718,20 +726,34 @@ int start_reshape(struct mdinfo *sra, int already_running, if (!already_running) sysfs_set_num(sra, NULL, "sync_min", sync_max_to_set); err = err ?: sysfs_set_num(sra, NULL, "sync_max", sync_max_to_set); - if (!already_running) - err = err ?: sysfs_set_str(sra, NULL, "sync_action", "reshape"); - + if (!already_running && err == 0) { + int cnt = 5; + do { + err = sysfs_set_str(sra, NULL, "sync_action", "reshape"); + if (err) + sleep(1); + } while (err && errno == EBUSY && cnt-- > 0); + } return err; } void abort_reshape(struct mdinfo *sra) { sysfs_set_str(sra, NULL, "sync_action", "idle"); + /* + * Prior to kernel commit: 23ddff3792f6 ("md: allow suspend_lo and + * suspend_hi to decrease as well as increase.") + * you could only increase suspend_{lo,hi} unless the region they + * covered was empty. So to reset to 0, you need to push suspend_lo + * up past suspend_hi first. So to maximize the chance of mdadm + * working on all kernels, we want to keep doing that. + */ sysfs_set_num(sra, NULL, "suspend_lo", 0x7FFFFFFFFFFFFFFFULL); sysfs_set_num(sra, NULL, "suspend_hi", 0); sysfs_set_num(sra, NULL, "suspend_lo", 0); sysfs_set_num(sra, NULL, "sync_min", 0); - sysfs_set_str(sra, NULL, "sync_max", "max"); + // It isn't safe to reset sync_max as we aren't monitoring. + // Array really should be stopped at this point. } int remove_disks_for_takeover(struct supertype *st, @@ -837,13 +859,15 @@ int reshape_prepare_fdlist(char *devname, int d = 0; struct mdinfo *sd; + enable_fds(nrdisks); for (d = 0; d <= nrdisks; d++) fdlist[d] = -1; d = raid_disks; for (sd = sra->devs; sd; sd = sd->next) { if (sd->disk.state & (1<disk.state & (1<disk.state & (1<disk.raid_disk < raid_disks) { char *dn = map_dev(sd->disk.major, sd->disk.minor, 1); fdlist[sd->disk.raid_disk] @@ -880,6 +904,7 @@ int reshape_open_backup_file(char *backup_file, long blocks, int *fdlist, unsigned long long *offsets, + char *sys_name, int restart) { /* Return 1 on success, 0 on any form of failure */ @@ -906,8 +931,7 @@ int reshape_open_backup_file(char *backup_file, dev = stb.st_dev; fstat(fd, &stb); if (stb.st_rdev == dev) { - pr_err("backup file must NOT be" - " on the array being reshaped.\n"); + pr_err("backup file must NOT be on the array being reshaped.\n"); close(*fdlist); return 0; } @@ -915,8 +939,7 @@ int reshape_open_backup_file(char *backup_file, memset(buf, 0, 512); for (i=0; i < blocks + 8 ; i++) { if (write(*fdlist, buf, 512) != 512) { - pr_err("%s: cannot create" - " backup file %s: %s\n", + pr_err("%s: cannot create backup file %s: %s\n", devname, backup_file, strerror(errno)); return 0; } @@ -927,18 +950,15 @@ int reshape_open_backup_file(char *backup_file, return 0; } - return 1; -} - -unsigned long GCD(unsigned long a, unsigned long b) -{ - while (a != b) { - if (a < b) - b -= a; - if (b < a) - a -= b; + if (!restart && strncmp(backup_file, MAP_DIR, strlen(MAP_DIR)) != 0) { + char *bu = make_backup(sys_name); + if (symlink(backup_file, bu)) + pr_err("Recording backup file in " MAP_DIR " failed: %s\n", + strerror(errno)); + free(bu); } - return a; + + return 1; } unsigned long compute_backup_blocks(int nchunk, int ochunk, @@ -960,7 +980,7 @@ unsigned long compute_backup_blocks(int nchunk, int ochunk, return blocks; } -char *analyse_change(struct mdinfo *info, struct reshape *re) +char *analyse_change(char *devname, struct mdinfo *info, struct reshape *re) { /* Based on the current array state in info->array and * the changes in info->new_* etc, determine: @@ -1001,9 +1021,16 @@ char *analyse_change(struct mdinfo *info, struct reshape *re) /* chunk size is meaningful, must divide component_size * evenly */ - if (info->component_size % (info->new_chunk/512)) - return "New chunk size does not" - " divide component size"; + if (info->component_size % (info->new_chunk/512)) { + unsigned long long shrink = info->component_size; + shrink &= ~(unsigned long long)(info->new_chunk/512-1); + pr_err("New chunk size (%dK) does not evenly divide device size (%lluk)\n", + info->new_chunk/1024, info->component_size/2); + pr_err("After shrinking any filesystem, \"mdadm --grow %s --size %llu\"\n", + devname, shrink/2); + pr_err("will shrink the array so the given chunk size would work.\n"); + return ""; + } break; default: return "chunk size not meaningful for this level"; @@ -1013,7 +1040,12 @@ char *analyse_change(struct mdinfo *info, struct reshape *re) switch (info->array.level) { default: - return "Cannot understand this RAID level"; + return "No reshape is possibly for this RAID level"; + case LEVEL_LINEAR: + if (info->delta_disks != UnSet) + return "Only --add is supported for LINEAR, setting --raid-disks is not needed"; + else + return "Only --add is supported for LINEAR, other --grow options are not meaningful"; case 1: /* RAID1 can convert to RAID1 with different disks, or * raid5 with 2 disks, or @@ -1021,13 +1053,11 @@ char *analyse_change(struct mdinfo *info, struct reshape *re) */ if (info->new_level > 1 && (info->component_size & 7)) - return "Cannot convert RAID1 of this size - " - "reduce size to multiple of 4K first."; + return "Cannot convert RAID1 of this size - reduce size to multiple of 4K first."; if (info->new_level == 0) { if (info->delta_disks != UnSet && info->delta_disks != 0) - return "Cannot change number of disks " - "with RAID1->RAID0 conversion"; + return "Cannot change number of disks with RAID1->RAID0 conversion"; re->level = 0; re->before.data_disks = 1; re->after.data_disks = 1; @@ -1113,8 +1143,7 @@ char *analyse_change(struct mdinfo *info, struct reshape *re) if (far > 1 && !offset) return "Cannot reshape RAID10 to far-mode"; if (near * far != copies) - return "Cannot change number of copies" - " when reshaping RAID10"; + return "Cannot change number of copies when reshaping RAID10"; } if (info->delta_disks == UnSet) info->delta_disks = 0; @@ -1164,15 +1193,13 @@ char *analyse_change(struct mdinfo *info, struct reshape *re) / info->array.raid_disks); if (info->array.raid_disks * (copies-1) != info->delta_disks) - return "Impossible number of devices" - " for RAID0->RAID10"; + return "Impossible number of devices for RAID0->RAID10"; info->new_layout = 0x100 + copies; } if (info->delta_disks == UnSet) { int copies = info->new_layout & 0xff; if (info->new_layout != 0x100 + copies) - return "New layout impossible" - " for RAID0->RAID10";; + return "New layout impossible for RAID0->RAID10";; info->delta_disks = (copies - 1) * info->array.raid_disks; } @@ -1202,11 +1229,15 @@ char *analyse_change(struct mdinfo *info, struct reshape *re) delta_parity = 1; re->level = 5; re->before.layout = ALGORITHM_PARITY_N; + if (info->new_layout == UnSet) + info->new_layout = map_name(r5layout, "default"); break; case 6: delta_parity = 2; re->level = 6; re->before.layout = ALGORITHM_PARITY_N; + if (info->new_layout == UnSet) + info->new_layout = map_name(r6layout, "default"); break; default: return "Impossible level change requested"; @@ -1263,8 +1294,7 @@ char *analyse_change(struct mdinfo *info, struct reshape *re) return "Can only convert a 2-device array to RAID1"; if (info->delta_disks != UnSet && info->delta_disks != 0) - return "Cannot set raid_disk when " - "converting RAID5->RAID1"; + return "Cannot set raid_disk when converting RAID5->RAID1"; re->level = 1; info->new_chunk = 0; return NULL; @@ -1321,7 +1351,6 @@ char *analyse_change(struct mdinfo *info, struct reshape *re) switch (re->level) { case 4: - re->before.layout = 0; re->after.layout = 0; break; case 5: @@ -1360,8 +1389,7 @@ char *analyse_change(struct mdinfo *info, struct reshape *re) strcat(strcpy(layout, ls), "-6"); l = map_name(r6layout, layout); if (l == UnSet) - return "Cannot find RAID6 layout" - " to convert to"; + return "Cannot find RAID6 layout to convert to"; } else { /* Current RAID6 has no equivalent. * If it is already a '-6' layout we @@ -1409,6 +1437,7 @@ char *analyse_change(struct mdinfo *info, struct reshape *re) /* So we have a restripe operation, we need to calculate the number * of blocks per reshape operation. */ + re->new_size = info->component_size * re->before.data_disks; if (info->new_chunk == 0) info->new_chunk = info->array.chunk_size; if (re->after.data_disks == re->before.data_disks && @@ -1472,8 +1501,8 @@ static int set_array_size(struct supertype *st, struct mdinfo *sra, ret_val = 0; dprintf("Array size changed"); } - dprintf(" from %llu to %llu.\n", - current_size, new_size); + dprintf_cont(" from %llu to %llu.\n", + current_size, new_size); } sysfs_free(info); } else @@ -1493,8 +1522,8 @@ static int reshape_container(char *container, char *devname, struct supertype *st, struct mdinfo *info, int force, - char *backup_file, - int verbose, int restart, int freeze_reshape); + char *backup_file, int verbose, + int forked, int restart, int freeze_reshape); int Grow_reshape(char *devname, int fd, struct mddev_dev *devlist, @@ -1534,7 +1563,6 @@ int Grow_reshape(char *devname, int fd, struct mdinfo info; struct mdinfo *sra; - if (ioctl(fd, GET_ARRAY_INFO, &array) < 0) { pr_err("%s is not an active md array - aborting\n", devname); @@ -1548,10 +1576,8 @@ int Grow_reshape(char *devname, int fd, if (s->size > 0 && (s->chunk || s->level!= UnSet || s->layout_str || s->raiddisks)) { - pr_err("cannot change component size at the same time " - "as other changes.\n" - " Change size first, then check data is intact before " - "making other changes.\n"); + pr_err("cannot change component size at the same time as other changes.\n" + " Change size first, then check data is intact before making other changes.\n"); return 1; } @@ -1569,10 +1595,18 @@ int Grow_reshape(char *devname, int fd, return 1; } if (s->raiddisks > st->max_devs) { - pr_err("Cannot increase raid-disks on this array" - " beyond %d\n", st->max_devs); + pr_err("Cannot increase raid-disks on this array beyond %d\n", st->max_devs); return 1; } + if (s->level == 0 && + (array.state & (1<raiddisks > array.raid_disks && array.spare_disks +added_disks < (s->raiddisks - array.raid_disks) && !c->force) { - pr_err("Need %d spare%s to avoid degraded array," - " and only have %d.\n" + pr_err("Need %d spare%s to avoid degraded array, and only have %d.\n" " Use --force to over-ride this check.\n", s->raiddisks - array.raid_disks, s->raiddisks - array.raid_disks == 1 ? "" : "s", @@ -1672,8 +1703,7 @@ int Grow_reshape(char *devname, int fd, sysfs_free(sra); return 1; } else if (frozen < 0) { - pr_err("%s is performing resync/recovery and cannot" - " be reshaped\n", devname); + pr_err("%s is performing resync/recovery and cannot be reshaped\n", devname); sysfs_free(sra); return 1; } @@ -1714,8 +1744,7 @@ int Grow_reshape(char *devname, int fd, sizeinfo->array.layout, sizeinfo->array.raid_disks); new_size /= data_disks; - dprintf("Metadata size correction from %llu to " - "%llu (%llu)\n", orig_size, new_size, + dprintf("Metadata size correction from %llu to %llu (%llu)\n", orig_size, new_size, new_size * data_disks); s->size = new_size; sysfs_free(sizeinfo); @@ -1752,13 +1781,11 @@ int Grow_reshape(char *devname, int fd, } } if (rv) { - pr_err("Cannot set size on " - "array members.\n"); + pr_err("Cannot set size on array members.\n"); goto size_change_error; } if (min_csize && s->size > min_csize) { - pr_err("Cannot safely make this array " - "use more than 2TB per device on this kernel.\n"); + pr_err("Cannot safely make this array use more than 2TB per device on this kernel.\n"); rv = 1; goto size_change_error; } @@ -1766,8 +1793,7 @@ int Grow_reshape(char *devname, int fd, /* Don't let the kernel choose a size - it will get * it wrong */ - pr_err("Limited v0.90 array to " - "2TB per device\n"); + pr_err("Limited v0.90 array to 2TB per device\n"); s->size = min_csize; } if (st->ss->external) { @@ -1776,8 +1802,8 @@ int Grow_reshape(char *devname, int fd, "raid5"); if (!rv) { raid0_takeover = 1; - /* get array parametes after takeover - * to chane one parameter at time only + /* get array parameters after takeover + * to change one parameter at time only */ rv = ioctl(fd, GET_ARRAY_INFO, &array); } @@ -1795,7 +1821,7 @@ int Grow_reshape(char *devname, int fd, if (s->size == MAX_SIZE) s->size = 0; array.size = s->size; - if ((unsigned)array.size != s->size) { + if (s->size & ~INT32_MAX) { /* got truncated to 32bit, write to * component_size instead */ @@ -1856,12 +1882,10 @@ size_change_error: s->size = array.size; if (c->verbose >= 0) { if (s->size == orig_size) - pr_err("component size of %s " - "unchanged at %lluK\n", + pr_err("component size of %s unchanged at %lluK\n", devname, s->size); else - pr_err("component size of %s " - "has been set to %lluK\n", + pr_err("component size of %s has been set to %lluK\n", devname, s->size); } changed = 1; @@ -1875,6 +1899,7 @@ size_change_error: if ((s->level == UnSet || s->level == array.level) && (s->layout_str == NULL) && (s->chunk == 0 || s->chunk == array.chunk_size) && + data_offset == INVALID_SECTORS && (s->raiddisks == 0 || s->raiddisks == array.raid_disks)) { /* Nothing more to do */ if (!changed && c->verbose >= 0) @@ -1895,7 +1920,7 @@ size_change_error: int err; err = remove_disks_for_takeover(st, sra, array.layout); if (err) { - dprintf(Name": Array cannot be reshaped\n"); + dprintf("Array cannot be reshaped\n"); if (cfd > -1) close(cfd); rv = 1; @@ -1928,12 +1953,9 @@ size_change_error: if (info.array.level == 6 && (info.new_level == 6 || info.new_level == UnSet) && info.array.layout >= 16) { - pr_err("%s has a non-standard layout. If you" - " wish to preserve this\n", devname); - cont_err("during the reshape, please specify" - " --layout=preserve\n"); - cont_err("If you want to change it, specify a" - " layout or use --layout=normalise\n"); + pr_err("%s has a non-standard layout. If you wish to preserve this\n", devname); + cont_err("during the reshape, please specify --layout=preserve\n"); + cont_err("If you want to change it, specify a layout or use --layout=normalise\n"); rv = 1; goto release; } @@ -1950,8 +1972,7 @@ size_change_error: info.new_layout = map_name(r6layout, l); } } else { - pr_err("%s is only meaningful when reshaping" - " a RAID6 array.\n", s->layout_str); + pr_err("%s is only meaningful when reshaping a RAID6 array.\n", s->layout_str); rv = 1; goto release; } @@ -1973,8 +1994,7 @@ size_change_error: strcat(l, "-6"); info.new_layout = map_name(r6layout, l); } else { - pr_err("%s in only meaningful when reshaping" - " to RAID6\n", s->layout_str); + pr_err("%s in only meaningful when reshaping to RAID6\n", s->layout_str); rv = 1; goto release; } @@ -1996,14 +2016,12 @@ size_change_error: info.new_layout = parse_layout_faulty(s->layout_str); break; default: - pr_err("layout not meaningful" - " with this level\n"); + pr_err("layout not meaningful with this level\n"); rv = 1; goto release; } if (info.new_layout == UnSet) { - pr_err("layout %s not understood" - " for this level\n", + pr_err("layout %s not understood for this level\n", s->layout_str); rv = 1; goto release; @@ -2046,7 +2064,7 @@ size_change_error: * performed at the level of the container */ rv = reshape_container(container, devname, -1, st, &info, - c->force, c->backup_file, c->verbose, 0, 0); + c->force, c->backup_file, c->verbose, 0, 0, 0); frozen = 0; } else { /* get spare devices from external metadata @@ -2109,27 +2127,22 @@ static int verify_reshape_position(struct mdinfo *info, int level) char *ep; unsigned long long position = strtoull(buf, &ep, 0); - dprintf(Name": Read sync_max sysfs entry is: %s\n", buf); + dprintf("Read sync_max sysfs entry is: %s\n", buf); if (!(ep == buf || (*ep != 0 && *ep != '\n' && *ep != ' '))) { position *= get_data_disks(level, info->new_layout, info->array.raid_disks); if (info->reshape_progress < position) { - dprintf("Corrected reshape progress (%llu) to " - "md position (%llu)\n", + dprintf("Corrected reshape progress (%llu) to md position (%llu)\n", info->reshape_progress, position); info->reshape_progress = position; ret_val = 1; } else if (info->reshape_progress > position) { - pr_err("Fatal error: array " - "reshape was not properly frozen " - "(expected reshape position is %llu, " - "but reshape progress is %llu.\n", + pr_err("Fatal error: array reshape was not properly frozen (expected reshape position is %llu, but reshape progress is %llu.\n", position, info->reshape_progress); ret_val = -1; } else { - dprintf("Reshape position in md and metadata " - "are the same;"); + dprintf("Reshape position in md and metadata are the same;"); ret_val = 1; } } @@ -2143,15 +2156,63 @@ static int verify_reshape_position(struct mdinfo *info, int level) return ret_val; } +static unsigned long long choose_offset(unsigned long long lo, + unsigned long long hi, + unsigned long long min, + unsigned long long max) +{ + /* Choose a new offset between hi and lo. + * It must be between min and max, but + * we would prefer something near the middle of hi/lo, and also + * prefer to be aligned to a big power of 2. + * + * So we start with the middle, then for each bit, + * starting at '1' and increasing, if it is set, we either + * add it or subtract it if possible, preferring the option + * which is furthest from the boundary. + * + * We stop once we get a 1MB alignment. As units are in sectors, + * 1MB = 2*1024 sectors. + */ + unsigned long long choice = (lo + hi) / 2; + unsigned long long bit = 1; + + for (bit = 1; bit < 2*1024; bit = bit << 1) { + unsigned long long bigger, smaller; + if (! (bit & choice)) + continue; + bigger = choice + bit; + smaller = choice - bit; + if (bigger > max && smaller < min) + break; + if (bigger > max) + choice = smaller; + else if (smaller < min) + choice = bigger; + else if (hi - bigger > smaller - lo) + choice = bigger; + else + choice = smaller; + } + return choice; +} + static int set_new_data_offset(struct mdinfo *sra, struct supertype *st, char *devname, int delta_disks, unsigned long long data_offset, - unsigned long long min) + unsigned long long min, + int can_fallback) { struct mdinfo *sd; int dir = 0; int err = 0; + unsigned long long before, after; + /* Need to find min space before and after so same is used + * on all devices + */ + before = UINT64_MAX; + after = UINT64_MAX; for (sd = sra->devs; sd; sd = sd->next) { char *dn; int dfd; @@ -2183,113 +2244,161 @@ static int set_new_data_offset(struct mdinfo *sra, struct supertype *st, if (info2.space_before == 0 && info2.space_after == 0) { /* Metadata doesn't support data_offset changes */ - return 1; + if (!can_fallback) + pr_err("%s: Metadata version doesn't support data_offset changes\n", + devname); + goto fallback; + } + if (before > info2.space_before) + before = info2.space_before; + if (after > info2.space_after) + after = info2.space_after; + + if (data_offset != INVALID_SECTORS) { + if (dir == 0) { + if (info2.data_offset == data_offset) { + pr_err("%s: already has that data_offset\n", + dn); + goto release; + } + if (data_offset < info2.data_offset) + dir = -1; + else + dir = 1; + } else if ((data_offset <= info2.data_offset && dir == 1) || + (data_offset >= info2.data_offset && dir == -1)) { + pr_err("%s: differing data offsets on devices make this --data-offset setting impossible\n", + dn); + goto release; + } } + } + if (before == UINT64_MAX) + /* impossible really, there must be no devices */ + return 1; + + for (sd = sra->devs; sd; sd = sd->next) { + char *dn = map_dev(sd->disk.major, sd->disk.minor, 0); + unsigned long long new_data_offset; + + if (sd->disk.state & (1<data_offset + min; else { - if (data_offset < info2.data_offset + min) { + if (data_offset < sd->data_offset + min) { pr_err("--data-offset too small for %s\n", dn); goto release; } - info2.new_data_offset = data_offset; + new_data_offset = data_offset; } } else if (delta_disks > 0) { /* need space before */ - if (info2.space_before < min) { + if (before < min) { + if (can_fallback) + goto fallback; pr_err("Insufficient head-space for reshape on %s\n", dn); goto release; } if (data_offset == INVALID_SECTORS) - info2.new_data_offset = info2.data_offset - min; + new_data_offset = sd->data_offset - min; else { - if (data_offset > info2.data_offset - min) { + if (data_offset > sd->data_offset - min) { pr_err("--data-offset too large for %s\n", dn); goto release; } - info2.new_data_offset = data_offset; + new_data_offset = data_offset; } } else { if (dir == 0) { - /* can move up or down. 'data_offset' - * might guide us, otherwise choose - * direction with most space + /* can move up or down. If 'data_offset' + * was set we would have already decided, + * so just choose direction with most space. */ - if (data_offset == INVALID_SECTORS) { - if (info2.space_before > info2.space_after) - dir = -1; - else - dir = 1; - } else if (data_offset < info2.data_offset) + if (before > after) dir = -1; else dir = 1; - sysfs_set_str(sra, NULL, "reshape_direction", - dir == 1 ? "backwards" : "forwards"); } - switch (dir) { - case 1: /* Increase data offset */ - if (info2.space_after < min) { + sysfs_set_str(sra, NULL, "reshape_direction", + dir == 1 ? "backwards" : "forwards"); + if (dir > 0) { + /* Increase data offset */ + if (after < min) { + if (can_fallback) + goto fallback; pr_err("Insufficient tail-space for reshape on %s\n", dn); goto release; } if (data_offset != INVALID_SECTORS && - data_offset < info2.data_offset + min) { + data_offset < sd->data_offset + min) { pr_err("--data-offset too small on %s\n", dn); goto release; } if (data_offset != INVALID_SECTORS) - info2.new_data_offset = data_offset; - else { - unsigned long long off = - info2.space_after / 2; - off &= ~7ULL; - if (off < min) - off = min; - info2.new_data_offset = - info2.data_offset + off; - } - break; - case -1: /* Decrease data offset */ - if (info2.space_before < min) { + new_data_offset = data_offset; + else + new_data_offset = choose_offset(sd->data_offset, + sd->data_offset + after, + sd->data_offset + min, + sd->data_offset + after); + } else { + /* Decrease data offset */ + if (before < min) { + if (can_fallback) + goto fallback; pr_err("insufficient head-room on %s\n", dn); goto release; } if (data_offset != INVALID_SECTORS && - data_offset < info2.data_offset - min) { + data_offset < sd->data_offset - min) { pr_err("--data-offset too small on %s\n", dn); goto release; } if (data_offset != INVALID_SECTORS) - info2.new_data_offset = data_offset; - else { - unsigned long long off = - info2.space_before / 2; - off &= ~7ULL; - if (off < min) - off = min; - info2.new_data_offset = - info2.data_offset - off; - } - break; + new_data_offset = data_offset; + else + new_data_offset = choose_offset(sd->data_offset - before, + sd->data_offset, + sd->data_offset - before, + sd->data_offset - min); } } - if (sysfs_set_num(sra, sd, "new_offset", - info2.new_data_offset) < 0) { - err = errno; - if (sd == sra->devs && err == ENOENT) - /* Early kernel, no 'new_offset' file. + err = sysfs_set_num(sra, sd, "new_offset", new_data_offset); + if (err < 0 && errno == E2BIG) { + /* try again after increasing data size to max */ + err = sysfs_set_num(sra, sd, "size", 0); + if (err < 0 && errno == EINVAL && + !(sd->disk.state & (1<component_size + after)/2); + } + err = sysfs_set_num(sra, sd, "new_offset", + new_data_offset); + } + if (err < 0) { + if (errno == E2BIG && data_offset != INVALID_SECTORS) { + pr_err("data-offset is too big for %s\n", + dn); + goto release; + } + if (sd == sra->devs && + (errno == ENOENT || errno == E2BIG)) + /* Early kernel, no 'new_offset' file, + * or kernel doesn't like us. * For RAID5/6 this is not fatal */ return 1; @@ -2301,6 +2410,9 @@ static int set_new_data_offset(struct mdinfo *sra, struct supertype *st, return err; release: return -1; +fallback: + /* Just use a backup file */ + return 1; } static int raid10_reshape(char *container, int fd, char *devname, @@ -2357,7 +2469,7 @@ static int raid10_reshape(char *container, int fd, char *devname, } } err = set_new_data_offset(sra, st, devname, info->delta_disks, data_offset, - min); + min, 0); if (err == 1) { pr_err("Cannot set new_data_offset: RAID10 reshape not\n"); cont_err("supported on this kernel\n"); @@ -2380,8 +2492,7 @@ static int raid10_reshape(char *container, int fd, char *devname, devname); if (err == EBUSY && (info->array.state & (1<cache_size < cache) subarray_set_num(container, sra, "stripe_cache_size", @@ -2505,8 +2616,7 @@ static int impose_reshape(struct mdinfo *sra, if (err == EBUSY && (array.state & (1<= 4 && array.level <= 6)) { + /* To convert to RAID0 we need to fail and + * remove any non-data devices. */ + int found = 0; + int d; + int data_disks = array.raid_disks - 1; + if (array.level == 6) + data_disks -= 1; + if (array.level == 5 && + array.layout != ALGORITHM_PARITY_N) + return -1; + if (array.level == 6 && + array.layout != ALGORITHM_PARITY_N_6) + return -1; + sysfs_set_str(&info, NULL,"sync_action", "idle"); + /* First remove any spares so no recovery starts */ + for (d = 0, found = 0; + d < MAX_DISKS && found < array.nr_disks; + d++) { + mdu_disk_info_t disk; + disk.number = d; + if (ioctl(fd, GET_DISK_INFO, &disk) < 0) + continue; + if (disk.major == 0 && disk.minor == 0) + continue; + found++; + if ((disk.state & (1 << MD_DISK_ACTIVE)) + && disk.raid_disk < data_disks) + /* keep this */ + continue; + ioctl(fd, HOT_REMOVE_DISK, + makedev(disk.major, disk.minor)); + } + /* Now fail anything left */ + ioctl(fd, GET_ARRAY_INFO, &array); + for (d = 0, found = 0; + d < MAX_DISKS && found < array.nr_disks; + d++) { + int cnt; + mdu_disk_info_t disk; + disk.number = d; + if (ioctl(fd, GET_DISK_INFO, &disk) < 0) + continue; + if (disk.major == 0 && disk.minor == 0) + continue; + found++; + if ((disk.state & (1 << MD_DISK_ACTIVE)) + && disk.raid_disk < data_disks) + /* keep this */ + continue; + ioctl(fd, SET_DISK_FAULTY, + makedev(disk.major, disk.minor)); + cnt = 5; + while (ioctl(fd, HOT_REMOVE_DISK, + makedev(disk.major, disk.minor)) < 0 + && errno == EBUSY + && cnt--) { + usleep(10000); + } + } + } + c = map_num(pers, level); + if (c) { + int err = sysfs_set_str(&info, NULL, "level", c); + if (err) { + err = errno; + pr_err("%s: could not set level to %s\n", + devname, c); + if (err == EBUSY && + (array.state & (1<= 0) + pr_err("level of %s changed to %s\n", + devname, c); + } + return 0; +} + +int sigterm = 0; +static void catch_term(int sig) +{ + sigterm = 1; +} + +static int continue_via_systemd(char *devnm) +{ + int skipped, i, pid, status; + char pathbuf[1024]; + /* In a systemd/udev world, it is best to get systemd to + * run "mdadm --grow --continue" rather than running in the + * background. + */ + switch(fork()) { + case 0: + /* FIXME yuk. CLOSE_EXEC?? */ + skipped = 0; + for (i = 3; skipped < 20; i++) + if (close(i) < 0) + skipped++; + else + skipped = 0; + + /* Don't want to see error messages from + * systemctl. If the service doesn't exist, + * we fork ourselves. + */ + close(2); + open("/dev/null", O_WRONLY); + snprintf(pathbuf, sizeof(pathbuf), "mdadm-grow-continue@%s.service", + devnm); + status = execl("/usr/bin/systemctl", "systemctl", + "start", + pathbuf, NULL); + status = execl("/bin/systemctl", "systemctl", "start", + pathbuf, NULL); + exit(1); + case -1: /* Just do it ourselves. */ + break; + default: /* parent - good */ + pid = wait(&status); + if (pid >= 0 && status == 0) + return 1; + } + return 0; +} + static int reshape_array(char *container, int fd, char *devname, struct supertype *st, struct mdinfo *info, int force, struct mddev_dev *devlist, @@ -2569,6 +2815,7 @@ static int reshape_array(char *container, int fd, char *devname, unsigned long long array_size; int done; struct mdinfo *sra = NULL; + char buf[20]; /* when reshaping a RAID0, the component_size might be zero. * So try to fix that up. @@ -2591,7 +2838,7 @@ static int reshape_array(char *container, int fd, char *devname, info->new_level = UnSet; if (info->delta_disks > 0) info->array.raid_disks -= info->delta_disks; - msg = analyse_change(info, &reshape); + msg = analyse_change(devname, info, &reshape); info->new_level = new_level; if (info->delta_disks > 0) info->array.raid_disks += info->delta_disks; @@ -2599,9 +2846,11 @@ static int reshape_array(char *container, int fd, char *devname, /* Make sure the array isn't read-only */ ioctl(fd, RESTART_ARRAY_RW, 0); } else - msg = analyse_change(info, &reshape); + msg = analyse_change(devname, info, &reshape); if (msg) { - pr_err("%s\n", msg); + /* if msg == "", error has already been printed */ + if (msg[0]) + pr_err("%s\n", msg); goto release; } if (restart && @@ -2609,12 +2858,13 @@ static int reshape_array(char *container, int fd, char *devname, reshape.before.layout != info->array.layout || reshape.before.data_disks + reshape.parity != info->array.raid_disks - max(0, info->delta_disks))) { - pr_err("reshape info is not in native format -" - " cannot continue.\n"); + pr_err("reshape info is not in native format - cannot continue.\n"); goto release; } - if (st->ss->external && restart && (info->reshape_progress == 0)) { + if (st->ss->external && restart && (info->reshape_progress == 0) && + !((sysfs_get_str(info, NULL, "sync_action", buf, sizeof(buf)) > 0) && + (strncmp(buf, "reshape", 7) == 0))) { /* When reshape is restarted from '0', very begin of array * it is possible that for external metadata reshape and array * configuration doesn't happen. @@ -2630,6 +2880,22 @@ static int reshape_array(char *container, int fd, char *devname, /* reshape already started. just skip to monitoring the reshape */ if (reshape.backup_blocks == 0) return 0; + if (restart & RESHAPE_NO_BACKUP) + return 0; + + /* Need 'sra' down at 'started:' */ + sra = sysfs_read(fd, NULL, + GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE|GET_CHUNK| + GET_CACHE); + if (!sra) { + pr_err("%s: Cannot get array details from sysfs\n", + devname); + goto release; + } + + if (!backup_file) + backup_file = locate_backup(sra->sys_name); + goto started; } /* The container is frozen but the array may not be. @@ -2649,8 +2915,7 @@ static int reshape_array(char *container, int fd, char *devname, if (!force && info->new_level > 1 && info->array.level > 1 && spares_needed > info->array.spare_disks + added_disks) { - pr_err("Need %d spare%s to avoid degraded array," - " and only have %d.\n" + pr_err("Need %d spare%s to avoid degraded array, and only have %d.\n" " Use --force to over-ride this check.\n", spares_needed, spares_needed == 1 ? "" : "s", @@ -2663,8 +2928,7 @@ static int reshape_array(char *container, int fd, char *devname, - array.raid_disks; if ((info->new_level > 1 || info->new_level == 0) && spares_needed > info->array.spare_disks +added_disks) { - pr_err("Need %d spare%s to create working array," - " and only have %d.\n", + pr_err("Need %d spare%s to create working array, and only have %d.\n", spares_needed, spares_needed == 1 ? "" : "s", info->array.spare_disks + added_disks); @@ -2672,25 +2936,11 @@ static int reshape_array(char *container, int fd, char *devname, } if (reshape.level != array.level) { - char *c = map_num(pers, reshape.level); - int err; - if (c == NULL) - goto release; - - err = sysfs_set_str(info, NULL, "level", c); - if (err) { - err = errno; - pr_err("%s: could not set level to %s\n", - devname, c); - if (err == EBUSY && - (info->array.state & (1<= 0) - pr_err("level of %s changed to %s\n", - devname, c); + info->new_layout = UnSet; /* after level change, + * layout is meaningless */ orig_level = array.level; sysfs_freeze_array(info); @@ -2749,6 +2999,8 @@ static int reshape_array(char *container, int fd, char *devname, Manage_subdevs(devname, fd, devlist, verbose, 0,NULL, 0); + if (reshape.backup_blocks == 0 && data_offset != INVALID_SECTORS) + reshape.backup_blocks = reshape.before.data_disks * info->array.chunk_size/512; if (reshape.backup_blocks == 0) { /* No restriping needed, but we might need to impose * some more changes: layout, raid_disks, chunk_size @@ -2839,8 +3091,6 @@ static int reshape_array(char *container, int fd, char *devname, goto release; } -started: - if (array.level == 10) { /* Reshaping RAID10 does not require any data backup by * user-space. Instead it requires that the data_offset @@ -2863,9 +3113,11 @@ started: goto release; } - switch(set_new_data_offset(sra, st, devname, info->delta_disks, - data_offset, - reshape.min_offset_change)) { + if (!backup_file) + switch(set_new_data_offset(sra, st, devname, + reshape.after.data_disks - reshape.before.data_disks, + data_offset, + reshape.min_offset_change, 1)) { case -1: goto release; case 0: @@ -2883,10 +3135,37 @@ started: devname, container, &reshape) < 0) goto release; if (sysfs_set_str(sra, NULL, "sync_action", "reshape") < 0) { - pr_err("Failed to initiate reshape!\n"); - goto release; + struct mdinfo *sd; + if (errno != EINVAL) { + pr_err("Failed to initiate reshape!\n"); + goto release; + } + /* revert data_offset and try the old way */ + for (sd = sra->devs; sd; sd = sd->next) { + sysfs_set_num(sra, sd, "new_offset", + sd->data_offset); + sysfs_set_str(sra, NULL, "reshape_direction", + "forwards"); + } + break; } - + if (info->new_level == reshape.level) + return 0; + /* need to adjust level when reshape completes */ + switch(fork()) { + case -1: /* ignore error, but don't wait */ + return 0; + default: /* parent */ + return 0; + case 0: + map_fork(); + break; + } + close(fd); + wait_reshape(sra); + fd = open_dev(sra->sys_name); + if (fd >= 0) + impose_level(fd, info->new_level, devname, verbose); return 0; case 1: /* Couldn't set data_offset, try the old way */ if (data_offset != INVALID_SECTORS) { @@ -2896,6 +3175,7 @@ started: break; } +started: /* Decide how many blocks (sectors) for a reshape * unit. The number we have so far is just a minimum */ @@ -2910,12 +3190,10 @@ started: blocks < 16*1024*2) blocks *= 2; } else - pr_err("Need to backup %luK of critical " - "section..\n", blocks/2); + pr_err("Need to backup %luK of critical section..\n", blocks/2); if (blocks >= sra->component_size/2) { - pr_err("%s: Something wrong" - " - reshape aborted\n", + pr_err("%s: Something wrong - reshape aborted\n", devname); goto release; } @@ -2932,7 +3210,7 @@ started: d = reshape_prepare_fdlist(devname, sra, odisks, nrdisks, blocks, backup_file, fdlist, offsets); - if (d < 0) { + if (d < odisks) { goto release; } if ((st->ss->manage_reshape == NULL) || @@ -2940,19 +3218,19 @@ started: if (backup_file == NULL) { if (reshape.after.data_disks <= reshape.before.data_disks) { - pr_err("%s: Cannot grow - " - "need backup-file\n", devname); + pr_err("%s: Cannot grow - need backup-file\n", + devname); + pr_err(" Please provide one with \"--backup=...\"\n"); goto release; - } else if (sra->array.spare_disks == 0) { - pr_err("%s: Cannot grow - " - "need a spare or backup-file to backup " - "critical section\n", devname); + } else if (d == odisks) { + pr_err("%s: Cannot grow - need a spare or backup-file to backup critical section\n", devname); goto release; } } else { if (!reshape_open_backup_file(backup_file, fd, devname, (signed)blocks, fdlist+d, offsets+d, + sra->sys_name, restart)) { goto release; } @@ -2988,12 +3266,19 @@ started: free(fdlist); free(offsets); sysfs_free(sra); - pr_err("Reshape has to be continued from" - " location %llu when root filesystem has been mounted.\n", + pr_err("Reshape has to be continued from location %llu when root filesystem has been mounted.\n", sra->reshape_progress); return 1; } + if (!forked && !check_env("MDADM_NO_SYSTEMCTL")) + if (continue_via_systemd(container ?: sra->sys_name)) { + free(fdlist); + free(offsets); + sysfs_free(sra); + return 0; + } + /* Now we just need to kick off the reshape and watch, while * handling backups of the data... * This is all done by a forked background process. @@ -3024,7 +3309,7 @@ started: do { struct mdstat_ent *mds, *m; delayed = 0; - mds = mdstat_read(0, 0); + mds = mdstat_read(1, 0); for (m = mds; m; m = m->next) if (strcmp(m->devnm, sra->sys_name) == 0) { if (m->resync && @@ -3044,9 +3329,9 @@ started: delayed = 0; } if (delayed) - sleep(30 - (delayed-1) * 25); + mdstat_wait(30 - (delayed-1) * 25); } while (delayed); - + mdstat_close(); close(fd); if (check_env("MDADM_GROW_VERIFY")) fd = open(devname, O_RDONLY | O_DIRECT); @@ -3054,6 +3339,8 @@ started: fd = -1; mlockall(MCL_FUTURE); + signal(SIGTERM, catch_term); + if (st->ss->external) { /* metadata handler takes it from here */ done = st->ss->manage_reshape( @@ -3071,8 +3358,21 @@ started: free(fdlist); free(offsets); - if (backup_file && done) + if (backup_file && done) { + char *bul; + bul = make_backup(sra->sys_name); + if (bul) { + char buf[1024]; + int l = readlink(bul, buf, sizeof(buf) - 1); + if (l > 0) { + buf[l]=0; + unlink(buf); + } + unlink(bul); + free(bul); + } unlink(backup_file); + } if (!done) { abort_reshape(sra); goto out; @@ -3111,14 +3411,10 @@ started: set_array_size(st, info, info->text_version); if (info->new_level != reshape.level) { - - c = map_num(pers, info->new_level); - if (c) { - err = sysfs_set_str(sra, NULL, "level", c); - if (err) - pr_err("%s: could not set level " - "to %s\n", devname, c); - } + if (fd < 0) + fd = open(devname, O_RDONLY); + impose_level(fd, info->new_level, devname, verbose); + close(fd); if (info->new_level == 0) st->update_tail = NULL; } @@ -3150,8 +3446,8 @@ int reshape_container(char *container, char *devname, struct supertype *st, struct mdinfo *info, int force, - char *backup_file, - int verbose, int restart, int freeze_reshape) + char *backup_file, int verbose, + int forked, int restart, int freeze_reshape) { struct mdinfo *cc = NULL; int rv = restart; @@ -3176,15 +3472,18 @@ int reshape_container(char *container, char *devname, */ ping_monitor(container); - switch (fork()) { + if (!forked && !freeze_reshape && !check_env("MDADM_NO_SYSTEMCTL")) + if (continue_via_systemd(container)) + return 0; + + switch (forked ? 0 : fork()) { case -1: /* error */ perror("Cannot fork to complete reshape\n"); unfreeze(st); return 1; default: /* parent */ if (!freeze_reshape) - printf(Name ": multi-array reshape continues" - " in background\n"); + printf("%s: multi-array reshape continues in background\n", Name); return 0; case 0: /* child */ map_fork(); @@ -3246,8 +3545,7 @@ int reshape_container(char *container, char *devname, fd = open_dev(mdstat->devnm); if (fd < 0) { - printf(Name ": Device %s cannot be opened for reshape.", - adev); + pr_err("Device %s cannot be opened for reshape.\n", adev); break; } @@ -3262,8 +3560,7 @@ int reshape_container(char *container, char *devname, * This is possibly interim until the behaviour of * reshape_array is resolved(). */ - printf(Name ": Multiple reshape execution detected for " - "device %s.", adev); + printf("%s: Multiple reshape execution detected for device %s.\n", Name, adev); close(fd); break; } @@ -3275,7 +3572,7 @@ int reshape_container(char *container, char *devname, flush_mdmon(container); rv = reshape_array(container, fd, adev, st, - content, force, NULL, 0ULL, + content, force, NULL, INVALID_SECTORS, backup_file, verbose, 1, restart, freeze_reshape); close(fd); @@ -3326,7 +3623,7 @@ int progress_reshape(struct mdinfo *info, struct reshape *reshape, unsigned long long backup_point, unsigned long long wait_point, unsigned long long *suspend_point, - unsigned long long *reshape_completed) + unsigned long long *reshape_completed, int *frozen) { /* This function is called repeatedly by the reshape manager. * It determines how much progress can safely be made and allows @@ -3543,7 +3840,8 @@ int progress_reshape(struct mdinfo *info, struct reshape *reshape, wait_point = info->component_size - wait_point; } - sysfs_set_num(info, NULL, "sync_max", max_progress); + if (!*frozen) + sysfs_set_num(info, NULL, "sync_max", max_progress); /* Now wait. If we have already reached the point that we were * asked to wait to, don't wait at all, else wait for any change. @@ -3563,7 +3861,6 @@ int progress_reshape(struct mdinfo *info, struct reshape *reshape, * waiting forever on a dead array */ char action[20]; - fd_set rfds; if (sysfs_get_str(info, NULL, "sync_action", action, 20) <= 0 || strncmp(action, "reshape", 7) != 0) @@ -3573,34 +3870,44 @@ int progress_reshape(struct mdinfo *info, struct reshape *reshape, * So we need these extra tests. */ if (completed == 0 && advancing + && strncmp(action, "idle", 4) == 0 && info->reshape_progress > 0) break; if (completed == 0 && !advancing + && strncmp(action, "idle", 4) == 0 && info->reshape_progress < (info->component_size * reshape->after.data_disks)) break; - FD_ZERO(&rfds); - FD_SET(fd, &rfds); - select(fd+1, NULL, NULL, &rfds, NULL); + sysfs_wait(fd, NULL); if (sysfs_fd_get_ll(fd, &completed) < 0) goto check_progress; } /* Some kernels reset 'sync_completed' to zero, - * we need to have real point we are in md + * we need to have real point we are in md. + * So in that case, read 'reshape_position' from sysfs. */ - if (completed == 0) - completed = max_progress; - - /* some kernels can give an incorrectly high 'completed' number */ - completed /= (info->new_chunk/512); - completed *= (info->new_chunk/512); - /* Convert 'completed' back in to a 'progress' number */ - completed *= reshape->after.data_disks; - if (!advancing) { - completed = info->component_size * reshape->after.data_disks - - completed; + if (completed == 0) { + unsigned long long reshapep; + char action[20]; + if (sysfs_get_str(info, NULL, "sync_action", + action, 20) > 0 && + strncmp(action, "idle", 4) == 0 && + sysfs_get_ll(info, NULL, + "reshape_position", &reshapep) == 0) + *reshape_completed = reshapep; + } else { + /* some kernels can give an incorrectly high + * 'completed' number, so round down */ + completed /= (info->new_chunk/512); + completed *= (info->new_chunk/512); + /* Convert 'completed' back in to a 'progress' number */ + completed *= reshape->after.data_disks; + if (!advancing) + completed = (info->component_size + * reshape->after.data_disks + - completed); + *reshape_completed = completed; } - *reshape_completed = completed; close(fd); @@ -3620,29 +3927,29 @@ check_progress: * it was just a device failure that leaves us degraded but * functioning. */ - strcpy(buf, "hi"); if (sysfs_get_str(info, NULL, "reshape_position", buf, sizeof(buf)) < 0 || strncmp(buf, "none", 4) != 0) { /* The abort might only be temporary. Wait up to 10 * seconds for fd to contain a valid number again. */ - struct timeval tv; + int wait = 10000; int rv = -2; - tv.tv_sec = 10; - tv.tv_usec = 0; - while (fd >= 0 && rv < 0 && tv.tv_sec > 0) { - fd_set rfds; - FD_ZERO(&rfds); - FD_SET(fd, &rfds); - if (select(fd+1, NULL, NULL, &rfds, &tv) != 1) + unsigned long long new_sync_max; + while (fd >= 0 && rv < 0 && wait > 0) { + if (sysfs_wait(fd, &wait) != 1) break; switch (sysfs_fd_get_ll(fd, &completed)) { case 0: /* all good again */ rv = 1; + /* If "sync_max" is no longer max_progress + * we need to freeze things + */ + sysfs_get_ll(info, NULL, "sync_max", &new_sync_max); + *frozen = (new_sync_max != max_progress); break; case -2: /* read error - abort */ - tv.tv_sec = 0; + wait = 0; break; } } @@ -3934,6 +4241,7 @@ int child_monitor(int afd, struct mdinfo *sra, struct reshape *reshape, struct mdinfo *sd; unsigned long stripes; int uuid[4]; + int frozen = 0; /* set up the backup-super-block. This requires the * uuid from the array. @@ -4011,9 +4319,11 @@ int child_monitor(int afd, struct mdinfo *sra, struct reshape *reshape, wait_point = __le64_to_cpu(bsb.arraystart2); } + reshape_completed = sra->reshape_progress; rv = progress_reshape(sra, reshape, backup_point, wait_point, - &suspend_point, &reshape_completed); + &suspend_point, &reshape_completed, + &frozen); /* external metadata would need to ping_monitor here */ sra->reshape_progress = reshape_completed; @@ -4039,7 +4349,8 @@ int child_monitor(int afd, struct mdinfo *sra, struct reshape *reshape, forget_backup(dests, destfd, destoffsets, 1); } - + if (sigterm) + rv = -2; if (rv < 0) { if (rv == -1) done = 1; @@ -4047,6 +4358,7 @@ int child_monitor(int afd, struct mdinfo *sra, struct reshape *reshape, } if (rv == 0 && increasing && !st->ss->external) { /* No longer need to monitor this reshape */ + sysfs_set_str(sra, NULL, "sync_max", "max"); done = 1; break; } @@ -4100,7 +4412,12 @@ int child_monitor(int afd, struct mdinfo *sra, struct reshape *reshape, } /* FIXME maybe call progress_reshape one more time instead */ - abort_reshape(sra); /* remove any remaining suspension */ + /* remove any remaining suspension */ + sysfs_set_num(sra, NULL, "suspend_lo", 0x7FFFFFFFFFFFFFFFULL); + sysfs_set_num(sra, NULL, "suspend_hi", 0); + sysfs_set_num(sra, NULL, "suspend_lo", 0); + sysfs_set_num(sra, NULL, "sync_min", 0); + if (reshape->before.data_disks == reshape->after.data_disks) sysfs_set_num(sra, NULL, "sync_speed_min", speed); free(buf); @@ -4206,11 +4523,10 @@ int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt * sometimes they aren't... So allow considerable flexability in matching, and allow * this test to be overridden by an environment variable. */ - if (info->array.utime > (int)__le64_to_cpu(bsb.mtime) + 2*60*60 || - info->array.utime < (int)__le64_to_cpu(bsb.mtime) - 10*60) { + if(time_after(info->array.utime, (unsigned int)__le64_to_cpu(bsb.mtime) + 2*60*60) || + time_before(info->array.utime, (unsigned int)__le64_to_cpu(bsb.mtime) - 10*60)) { if (check_env("MDADM_GROW_ALLOW_OLD")) { - pr_err("accepting backup with timestamp %lu " - "for array with timestamp %lu\n", + pr_err("accepting backup with timestamp %lu for array with timestamp %lu\n", (unsigned long)__le64_to_cpu(bsb.mtime), (unsigned long)info->array.utime); } else { @@ -4291,7 +4607,7 @@ int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt st->ss->free_super(st); offsets[j] = dinfo.data_offset * 512; } - printf(Name ": restoring critical section\n"); + printf("%s: restoring critical section\n", Name); if (restore_stripes(fdlist, offsets, info->array.raid_disks, @@ -4450,10 +4766,9 @@ int Grow_continue_command(char *devname, int fd, dprintf("Grow continue is run for "); if (st->ss->external == 0) { int d; - dprintf("native array (%s)\n", devname); + dprintf_cont("native array (%s)\n", devname); if (ioctl(fd, GET_ARRAY_INFO, &array.array) < 0) { - pr_err("%s is not an active md array -" - " aborting\n", devname); + pr_err("%s is not an active md array - aborting\n", devname); ret_val = 1; goto Grow_continue_command_exit; } @@ -4481,6 +4796,8 @@ int Grow_continue_command(char *devname, int fd, continue; err = st->ss->load_super(st, fd2, NULL); close(fd2); + /* invalidate fd2 to avoid possible double close() */ + fd2 = -1; if (err) continue; break; @@ -4496,19 +4813,18 @@ int Grow_continue_command(char *devname, int fd, char *container; if (subarray) { - dprintf("subarray (%s)\n", subarray); + dprintf_cont("subarray (%s)\n", subarray); container = st->container_devnm; cfd = open_dev_excl(st->container_devnm); } else { container = st->devnm; close(fd); cfd = open_dev_excl(st->devnm); - dprintf("container (%s)\n", container); + dprintf_cont("container (%s)\n", container); fd = cfd; } if (cfd < 0) { - pr_err("Unable to open container " - "for %s\n", devname); + pr_err("Unable to open container for %s\n", devname); ret_val = 1; goto Grow_continue_command_exit; } @@ -4544,9 +4860,7 @@ int Grow_continue_command(char *devname, int fd, allow_reshape = 0; if (!allow_reshape) { - pr_err("cannot continue reshape of an array" - " in container with unsupported" - " metadata: %s(%s)\n", + pr_err("cannot continue reshape of an array in container with unsupported metadata: %s(%s)\n", devname, container); ret_val = 1; goto Grow_continue_command_exit; @@ -4566,8 +4880,7 @@ int Grow_continue_command(char *devname, int fd, break; } if (!content) { - pr_err("Unable to determine reshaped " - "array for %s\n", devname); + pr_err("Unable to determine reshaped array for %s\n", devname); ret_val = 1; goto Grow_continue_command_exit; } @@ -4580,6 +4893,9 @@ int Grow_continue_command(char *devname, int fd, sysfs_init(content, fd2, mdstat->devnm); + close(fd2); + fd2 = -1; + /* start mdmon in case it is not running */ if (!mdmon_running(container)) @@ -4589,8 +4905,7 @@ int Grow_continue_command(char *devname, int fd, if (mdmon_running(container)) st->update_tail = &st->updates; else { - pr_err("No mdmon found. " - "Grow cannot continue.\n"); + pr_err("No mdmon found. Grow cannot continue.\n"); ret_val = 1; goto Grow_continue_command_exit; } @@ -4606,7 +4921,7 @@ int Grow_continue_command(char *devname, int fd, /* continue reshape */ - ret_val = Grow_continue(fd, st, content, backup_file, 0); + ret_val = Grow_continue(fd, st, content, backup_file, 1, 0); Grow_continue_command_exit: if (fd2 > -1) @@ -4622,7 +4937,7 @@ Grow_continue_command_exit: } int Grow_continue(int mdfd, struct supertype *st, struct mdinfo *info, - char *backup_file, int freeze_reshape) + char *backup_file, int forked, int freeze_reshape) { int ret_val = 2; @@ -4639,11 +4954,40 @@ int Grow_continue(int mdfd, struct supertype *st, struct mdinfo *info, close(cfd); ret_val = reshape_container(st->container_devnm, NULL, mdfd, st, info, 0, backup_file, - 0, 1, freeze_reshape); + 0, forked, + 1 | info->reshape_active, + freeze_reshape); } else ret_val = reshape_array(NULL, mdfd, "array", st, info, 1, - NULL, 0ULL, backup_file, 0, 0, 1, + NULL, INVALID_SECTORS, + backup_file, 0, forked, + 1 | info->reshape_active, freeze_reshape); return ret_val; } + +char *make_backup(char *name) +{ + char *base = "backup_file-"; + int len; + char *fname; + + len = strlen(MAP_DIR) + 1 + strlen(base) + strlen(name)+1; + fname = xmalloc(len); + sprintf(fname, "%s/%s%s", MAP_DIR, base, name); + return fname; +} + +char *locate_backup(char *name) +{ + char *fl = make_backup(name); + struct stat stb; + + if (stat(fl, &stb) == 0 && + S_ISREG(stb.st_mode)) + return fl; + + free(fl); + return NULL; +}