From c82f047cfceb479c9c6b56b44c196018af050e45 Mon Sep 17 00:00:00 2001 From: Neil Brown Date: Tue, 7 Jun 2005 23:16:35 +0000 Subject: [PATCH] Initial bitmap support Signed-off-by: Neil Brown --- Assemble.c | 7 ++ Build.c | 34 +++++- Create.c | 24 +++- Makefile | 8 +- ReadMe.c | 23 +++- bitmap.c | 315 +++++++++++++++++++++++++++++++++++++++++++++++++++++ bitmap.h | 273 ++++++++++++++++++++++++++++++++++++++++++++++ md_u.h | 7 ++ mdadm.c | 72 +++++++++++- mdadm.h | 15 ++- 10 files changed, 765 insertions(+), 13 deletions(-) create mode 100644 bitmap.c create mode 100644 bitmap.h diff --git a/Assemble.c b/Assemble.c index 7cab81a5..71aaa60c 100644 --- a/Assemble.c +++ b/Assemble.c @@ -531,6 +531,13 @@ int Assemble(struct supertype *st, char *mddev, int mdfd, mddev, strerror(errno)); return 1; } + if (ident->bitmap_fd) { + if (ioctl(mdfd, SET_BITMAP_FILE, ident->bitmap_fd) != 0) { + fprintf(stderr, Name ": SET_BITMAP_FILE failed.\n"); + return 1; + } + } + /* First, add the raid disks, but add the chosen one last */ for (i=0; i<= bestcnt; i++) { int j; diff --git a/Build.c b/Build.c index 6bb3a626..b1fa1225 100644 --- a/Build.c +++ b/Build.c @@ -35,7 +35,8 @@ int Build(char *mddev, int mdfd, int chunk, int level, int layout, int raiddisks, - mddev_dev_t devlist, int assume_clean) + mddev_dev_t devlist, int assume_clean, + char *bitmap_file, int bitmap_chunk, int delay) { /* Build a linear or raid0 arrays without superblocks * We cannot really do any checks, we just do it. @@ -56,6 +57,7 @@ int Build(char *mddev, int mdfd, int chunk, int level, int layout, struct stat stb; int subdevs = 0; mddev_dev_t dv; + int bitmap_fd; /* scan all devices, make sure they really are block devices */ for (dv = devlist; dv; dv=dv->next) { @@ -135,6 +137,9 @@ int Build(char *mddev, int mdfd, int chunk, int level, int layout, mddev, strerror(errno)); return 1; } + } else if (bitmap_file) { + fprintf(stderr, Name ": bitmaps not supported with this kernel\n"); + return 1; } /* now add the devices */ for ((i=0), (dv = devlist) ; dv ; i++, dv=dv->next) { @@ -171,6 +176,33 @@ int Build(char *mddev, int mdfd, int chunk, int level, int layout, /* now to start it */ if (vers >= 9000) { mdu_param_t param; /* not used by syscall */ + if (bitmap_file) { + bitmap_fd = open(bitmap_file, O_RDWR); + if (bitmap_fd < 0) { + if (bitmap_chunk == UnSet) { + fprintf(stderr, Name ": %s cannot be openned.", + bitmap_file); + return 1; + } + if (CreateBitmap(bitmap_file, 1, NULL, bitmap_chunk, + delay, 0/* FIXME size */)) { + return 1; + } + bitmap_fd = open(bitmap_file, O_RDWR); + if (bitmap_fd < 0) { + fprintf(stderr, Name ": %s cannot be openned.", + bitmap_file); + return 1; + } + } + if (bitmap_fd >= 0) { + if (ioctl(mdfd, SET_BITMAP_FILE, bitmap_fd) < 0) { + fprintf(stderr, Name ": Cannot set bitmap file for %s: %s\n", + mddev, strerror(errno)); + return 1; + } + } + } if (ioctl(mdfd, RUN_ARRAY, ¶m)) { fprintf(stderr, Name ": RUN_ARRAY failed: %s\n", strerror(errno)); diff --git a/Create.c b/Create.c index 1717240b..e1b17373 100644 --- a/Create.c +++ b/Create.c @@ -34,7 +34,8 @@ int Create(struct supertype *st, char *mddev, int mdfd, int chunk, int level, int layout, unsigned long size, int raiddisks, int sparedisks, int subdevs, mddev_dev_t devlist, - int runstop, int verbose, int force) + int runstop, int verbose, int force, + char *bitmap_file, int bitmap_chunk, int delay) { /* * Create a new raid array. @@ -66,6 +67,7 @@ int Create(struct supertype *st, char *mddev, int mdfd, int pass; int vers; int rv; + int bitmap_fd; mdu_array_info_t array; @@ -358,6 +360,26 @@ int Create(struct supertype *st, char *mddev, int mdfd, return 1; } + if (bitmap_file) { + int uuid[4]; + st->ss->uuid_from_super(uuid, super); + if (CreateBitmap(bitmap_file, force, (char*)uuid, bitmap_chunk, delay, + array.size*2ULL /* FIXME wrong for raid10 */)) { + return 1; + } + bitmap_fd = open(bitmap_file, O_RDWR); + if (bitmap_fd < 0) { + fprintf(stderr, Name ": weird: %s cannot be openned\n", + bitmap_file); + return 1; + } + if (ioctl(mdfd, SET_BITMAP_FILE, bitmap_fd) < 0) { + fprintf(stderr, Name ": Cannot set bitmap file for %s: %s\n", + mddev, strerror(errno)); + return 1; + } + } + for (pass=1; pass <=2 ; pass++) { diff --git a/Makefile b/Makefile index dcb9b0fb..fd086057 100644 --- a/Makefile +++ b/Makefile @@ -58,9 +58,11 @@ MAN5DIR = $(MANDIR)/man5 MAN8DIR = $(MANDIR)/man8 OBJS = mdadm.o config.o mdstat.o ReadMe.o util.o Manage.o Assemble.o Build.o \ - Create.o Detail.o Examine.o Grow.o Monitor.o dlink.o Kill.o Query.o mdopen.o super0.o super1.o + Create.o Detail.o Examine.o Grow.o Monitor.o dlink.o Kill.o Query.o \ + mdopen.o super0.o super1.o bitmap.o SRCS = mdadm.c config.c mdstat.c ReadMe.c util.c Manage.c Assemble.c Build.c \ - Create.c Detail.c Examine.c Grow.c Monitor.c dlink.c Kill.c Query.c mdopen.c super0.c super1.c + Create.c Detail.c Examine.c Grow.c Monitor.c dlink.c Kill.c Query.c \ + mdopen.c super0.c super1.c bitmap.c ASSEMBLE_SRCS := mdassemble.c Assemble.c config.c dlink.c util.c super0.c super1.c ifdef MDASSEMBLE_AUTO @@ -114,7 +116,7 @@ md.man : md.4 mdadm.conf.man : mdadm.conf.5 nroff -man mdadm.conf.5 > mdadm.conf.man -$(OBJS) : mdadm.h +$(OBJS) : mdadm.h bitmap.h install : mdadm mdadm.8 md.4 mdadm.conf.5 $(INSTALL) -D $(STRIP) -m 755 mdadm $(DESTDIR)$(BINDIR)/mdadm diff --git a/ReadMe.c b/ReadMe.c index 367bc0b7..856a8eff 100644 --- a/ReadMe.c +++ b/ReadMe.c @@ -91,7 +91,7 @@ char Version[] = Name " - v1.11.0 - 11 April 2005\n"; * At the time if writing, there is only minimal support. */ -char short_options[]="-ABCDEFGQhVvbc:i:l:p:m:n:x:u:c:d:z:U:sa::rfRSow1te:"; +char short_options[]="-ABCDEFGQhVXvb::c:i:l:p:m:n:x:u:c:d:z:U:sa::rfRSow1te:"; struct option long_options[] = { {"manage", 0, 0, '@'}, {"misc", 0, 0, '#'}, @@ -104,6 +104,7 @@ struct option long_options[] = { {"grow", 0, 0, 'G'}, {"zero-superblock", 0, 0, 'K'}, /* deliberately no a short_option */ {"query", 0, 0, 'Q'}, + {"examine-bitmap", 0, 0, 'X'}, /* synonyms */ {"monitor", 0, 0, 'F'}, @@ -125,9 +126,11 @@ struct option long_options[] = { {"spare-disks",1,0, 'x'}, {"spare-devices",1,0, 'x'}, {"size", 1, 0, 'z'}, - {"auto", 2, 0, 'a'}, /* also for --assemble */ + {"auto", 1, 0, 'a'}, /* also for --assemble */ {"assume-clean",0,0, 3 }, {"metadata", 1, 0, 'e'}, /* superblock format */ + {"bitmap", 1, 0, 'b'}, + {"bitmap-chunk", 1, 0, 4}, /* For assemble */ {"uuid", 1, 0, 'u'}, @@ -188,6 +191,7 @@ char Help[] = char OptionHelp[] = "Any parameter that does not start with '-' is treated as a device name\n" +"or, for --examine-bitmap, a file name.\n" "The first such name is often the name of an md device. Subsequent\n" "names are often names of component devices." "\n" @@ -205,6 +209,7 @@ char OptionHelp[] = " --create -C : Create a new array\n" " --detail -D : Display details of an array\n" " --examine -E : Examine superblock on an array component\n" +" --examine-bitmap -X: Display the detail of a bitmap file\n" " --monitor -F : monitor (follow) some arrays\n" " --query -Q : Display general information about how a\n" " device relates to the md driver\n" @@ -212,6 +217,7 @@ char OptionHelp[] = /* "\n" " For create or build:\n" +" --bitmap= -b : File to store bitmap in - may pre-exist for --build\n" " --chunk= -c : chunk size of kibibytes\n" " --rounding= : rounding factor for linear array (==chunk size)\n" " --level= -l : raid level: 0,1,4,5,6,linear,mp. 0 or linear for build\n" @@ -224,8 +230,11 @@ char OptionHelp[] = " : insert a missing drive for RAID5.\n" " --auto(=p) -a : Automatically allocate new (partitioned) md array if needed.\n" " --assume-clean : Assume the array is already in-sync. This is dangerous.\n" +" --bitmap-chunk= : chunksize of bitmap in bitmap file (Kilobytes)\n" +" --delay= -d : seconds between bitmap updates\n" "\n" " For assemble:\n" +" --bitmap= -b : File to find bitmap information in\n" " --uuid= -u : uuid of array to assemble. Devices which don't\n" " have this uuid are excluded\n" " --super-minor= -m : minor number to look for in super-block when\n" @@ -280,6 +289,7 @@ char Help_create[] = " other levels.\n" "\n" " Options that are valid with --create (-C) are:\n" +" --bitmap= : Create a bitmap for the array with the given filename\n" " --chunk= -c : chunk size of kibibytes\n" " --rounding= : rounding factor for linear array (==chunk size)\n" " --level= -l : raid level: 0,1,4,5,6,linear,multipath and synonyms\n" @@ -293,6 +303,8 @@ char Help_create[] = " --run -R : insist of running the array even if not all\n" " : devices are present or some look odd.\n" " --readonly -o : start the array readonly - not supported yet.\n" +" --bitmap-chunk= : bitmap chunksize in Kilobytes.\n" +" --delay= -d : bitmap update delay in seconds.\n" "\n" ; @@ -308,10 +320,13 @@ char Help_build[] = " The level may only be 0, raid0, or linear.\n" " All devices must be listed and the array will be started once complete.\n" " Options that are valid with --build (-B) are:\n" +" --bitmap= : file to store/find bitmap information in.\n" " --chunk= -c : chunk size of kibibytes\n" " --rounding= : rounding factor for linear array (==chunk size)\n" " --level= -l : 0, raid0, or linear\n" -" --raid-devices= -n : number of active devices in array\n" +" --raid-devices= -n : number of active devices in array\n" +" --bitmap-chunk= : bitmap chunksize in Kilobytes.\n" +" --delay= -d : bitmap update delay in seconds.\n" ; char Help_assemble[] = @@ -347,6 +362,7 @@ char Help_assemble[] = " and components are determined from the config file.\n" "\n" "Options that are valid with --assemble (-A) are:\n" +" --bitmap= : bitmap file to use wit the array\n" " --uuid= -u : uuid of array to assemble. Devices which don't\n" " have this uuid are excluded\n" " --super-minor= -m : minor number to look for in super-block when\n" @@ -393,6 +409,7 @@ char Help_misc[] = " device relates to the md driver\n" " --detail -D : Display details of an array\n" " --examine -E : Examine superblock on an array component\n" +" --examine-bitmap -X: Display contents of a bitmap file\n" " --zero-superblock : erase the MD superblock from a device.\n" " --run -R : start a partially built array\n" " --stop -S : deactivate array, releasing all resources\n" diff --git a/bitmap.c b/bitmap.c new file mode 100644 index 00000000..57969a67 --- /dev/null +++ b/bitmap.c @@ -0,0 +1,315 @@ +/* + * mdadm - manage Linux "md" devices aka RAID arrays. + * + * Copyright (C) 2004 Paul Clements, SteelEye Technology, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include "mdadm.h" +#include + +#define min(a,b) (((a) < (b)) ? (a) : (b)) + +inline void sb_le_to_cpu(bitmap_super_t *sb) +{ + sb->magic = __le32_to_cpu(sb->magic); + sb->version = __le32_to_cpu(sb->version); + /* uuid gets no translation */ + sb->events = __le64_to_cpu(sb->events); + sb->events_cleared = __le64_to_cpu(sb->events_cleared); + sb->state = __le32_to_cpu(sb->state); + sb->chunksize = __le32_to_cpu(sb->chunksize); + sb->daemon_sleep = __le32_to_cpu(sb->daemon_sleep); + sb->sync_size = __le64_to_cpu(sb->sync_size); +} + +inline void sb_cpu_to_le(bitmap_super_t *sb) +{ + sb_le_to_cpu(sb); /* these are really the same thing */ +} + +mapping_t bitmap_states[] = { + { "OK", 0 }, + { "Out of date", 2 }, + { NULL, -1 } +}; + +const char *bitmap_state(int state_num) +{ + char *state = map_num(bitmap_states, state_num); + return state ? state : "Unknown"; +} + +const char *human_chunksize(unsigned long bytes) +{ + static char buf[16]; + char *suffixes[] = { "B", "KB", "MB", "GB", "TB", NULL }; + int i = 0; + + while (bytes >> 10) { + bytes >>= 10; + i++; + } + + sprintf(buf, "%lu %s", bytes, suffixes[i]); + + return buf; +} + +typedef struct bitmap_info_s { + bitmap_super_t sb; + unsigned long long total_bits; + unsigned long long dirty_bits; +} bitmap_info_t; + +/* count the dirty bits in the first num_bits of byte */ +inline int count_dirty_bits_byte(char byte, int num_bits) +{ + int num = 0; + + switch (num_bits) { /* fall through... */ + case 8: if (byte & 128) num++; + case 7: if (byte & 64) num++; + case 6: if (byte & 32) num++; + case 5: if (byte & 16) num++; + case 4: if (byte & 8) num++; + case 3: if (byte & 4) num++; + case 2: if (byte & 2) num++; + case 1: if (byte & 1) num++; + default: break; + } + + return num; +} + +int count_dirty_bits(char *buf, int num_bits) +{ + int i, num = 0; + + for (i=0; i < num_bits / 8; i++) + num += count_dirty_bits_byte(buf[i], 8); + + if (num_bits % 8) /* not an even byte boundary */ + num += count_dirty_bits_byte(buf[i], num_bits % 8); + + return num; +} + +/* calculate the size of the bitmap given the array size and bitmap chunksize */ +unsigned long long bitmap_bits(unsigned long long array_size, + unsigned long chunksize) +{ + return (array_size * 512 + chunksize - 1) / chunksize; +} + +bitmap_info_t *bitmap_fd_read(int fd, int brief) +{ + unsigned long long total_bits = 0, read_bits = 0, dirty_bits = 0; + bitmap_info_t *info; + char buf[512]; + int n; + + info = malloc(sizeof(*info)); + if (info == NULL) { + fprintf(stderr, Name ": failed to allocate %d bytes\n", + sizeof(*info)); + return NULL; + } + + if (read(fd, &info->sb, sizeof(info->sb)) != sizeof(info->sb)) { + fprintf(stderr, Name ": failed to read superblock of bitmap " + "file: %s\n", strerror(errno)); + free(info); + return NULL; + } + + sb_le_to_cpu(&info->sb); /* convert superblock to CPU byte ordering */ + + if (brief || info->sb.sync_size == 0) + goto out; + + /* read the rest of the file counting total bits and dirty bits -- + * we stop when either: + * 1) we hit EOF, in which case we assume the rest of the bits (if any) + * are dirty + * 2) we've read the full bitmap, in which case we ignore any trailing + * data in the file + */ + total_bits = bitmap_bits(info->sb.sync_size, info->sb.chunksize); + + while ((n = read(fd, buf, sizeof(*buf))) > 0) { + unsigned long long remaining = total_bits - read_bits; + + if (remaining > sizeof(*buf) * 8) /* we want the full buffer */ + remaining = sizeof(*buf) * 8; + if (remaining > n * 8) /* the file is truncated */ + remaining = n * 8; + dirty_bits += count_dirty_bits(buf, remaining); + + read_bits += remaining; + if (read_bits >= total_bits) /* we've got what we want */ + break; + } + + if (read_bits < total_bits) { /* file truncated... */ + fprintf(stderr, Name ": WARNING: bitmap file is not large " + "enough for array size %llu!\n\n", info->sb.sync_size); + total_bits = read_bits; + } +out: + info->total_bits = total_bits; + info->dirty_bits = dirty_bits; + return info; +} + +bitmap_info_t *bitmap_file_read(char *filename, int brief) +{ + int fd; + bitmap_info_t *info; + + fd = open(filename, O_RDONLY); + if (fd < 0) { + fprintf(stderr, Name ": failed to open bitmap file %s: %s\n", + filename, strerror(errno)); + return NULL; + } + + info = bitmap_fd_read(fd, brief); + close(fd); + return info; +} + +int ExamineBitmap(char *filename, int brief) +{ + /* + * Read the bitmap file and display its contents + */ + + bitmap_super_t *sb; + bitmap_info_t *info; + int rv = 1; + + info = bitmap_file_read(filename, brief); + if (!info) + return rv; + + sb = &info->sb; + printf(" Filename : %s\n", filename); + printf(" Magic : %08x\n", sb->magic); + if (sb->magic != BITMAP_MAGIC) { + fprintf(stderr, Name ": invalid bitmap magic 0x%x, the bitmap file appears to be corrupted\n", sb->magic); + } + printf(" Version : %d\n", sb->version); + if (sb->version != BITMAP_MAJOR) { + fprintf(stderr, Name ": unknown bitmap version %d, either the bitmap file is corrupted or you need to upgrade your tools\n", sb->version); + goto free_info; + } + + rv = 0; + printf(" UUID : %08x.%08x.%08x.%08x\n", + *(__u32 *)(sb->uuid+0), + *(__u32 *)(sb->uuid+4), + *(__u32 *)(sb->uuid+8), + *(__u32 *)(sb->uuid+12)); + printf(" Events : %llu\n", sb->events); + printf(" Events Cleared : %llu\n", sb->events_cleared); + printf(" State : %s\n", bitmap_state(sb->state)); + printf(" Chunksize : %s\n", human_chunksize(sb->chunksize)); + printf(" Daemon : %ds flush period\n", sb->daemon_sleep); + printf(" Sync Size : %llu%s\n", sb->sync_size, + human_size(sb->sync_size * 1024)); + if (brief) + goto free_info; + printf(" Bitmap : %llu bits (chunks), %llu dirty (%2.1f%%)\n", + info->total_bits, info->dirty_bits, + 100.0 * info->dirty_bits / (info->total_bits + 1)); +free_info: + free(info); + return rv; +} + +int CreateBitmap(char *filename, int force, char uuid[16], + unsigned long chunksize, unsigned long daemon_sleep, + unsigned long long array_size) +{ + /* + * Create a bitmap file with a superblock and (optionally) a full bitmap + */ + + FILE *fp; + int rv = 1; + char block[512]; + bitmap_super_t sb; + long long bytes, filesize; + + if (!force && access(filename, F_OK) == 0) { + fprintf(stderr, Name ": bitmap file %s already exists, use --force to overwrite\n", filename); + return rv; + } + + fp = fopen(filename, "w"); + if (fp == NULL) { + fprintf(stderr, Name ": failed to open bitmap file %s: %s\n", + filename, strerror(errno)); + return rv; + } + + memset(&sb, 0, sizeof(sb)); + sb.magic = BITMAP_MAGIC; + sb.version = BITMAP_MAJOR; + if (uuid != NULL) + memcpy(sb.uuid, uuid, 16); + sb.chunksize = chunksize; + sb.daemon_sleep = daemon_sleep; + sb.sync_size = array_size; + + sb_cpu_to_le(&sb); /* convert to on-disk byte ordering */ + + if (fwrite(&sb, sizeof(sb), 1, fp) != 1) { + fprintf(stderr, Name ": failed to write superblock to bitmap file %s: %s\n", filename, strerror(errno)); + goto out; + } + + /* calculate the size of the bitmap and write it to disk */ + bytes = (bitmap_bits(array_size, chunksize) + 7) / 8; + if (!bytes) { + rv = 0; + goto out; + } + + filesize = bytes + sizeof(sb); + + memset(block, 0xff, sizeof(block)); + + while (bytes > 0) { + if (fwrite(block, sizeof(block), 1, fp) != 1) { + fprintf(stderr, Name ": failed to write bitmap file %s: %s\n", filename, strerror(errno)); + goto out; + } + bytes -= sizeof(block); + } + + rv = 0; + /* make the file be the right size (well, to the nearest byte) */ + ftruncate(fileno(fp), filesize); +out: + fclose(fp); + if (rv) + unlink(filename); /* possibly corrupted, better get rid of it */ + return rv; +} diff --git a/bitmap.h b/bitmap.h new file mode 100644 index 00000000..17027e08 --- /dev/null +++ b/bitmap.h @@ -0,0 +1,273 @@ +/* + * bitmap.h: Copyright (C) Peter T. Breuer (ptb@ot.uc3m.es) 2003 + * + * additions: Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc. + */ +#ifndef BITMAP_H +#define BITMAP_H 1 + +#define BITMAP_MAJOR 3 +#define BITMAP_MINOR 38 + +/* + * in-memory bitmap: + * + * Use 16 bit block counters to track pending writes to each "chunk". + * The 2 high order bits are special-purpose, the first is a flag indicating + * whether a resync is needed. The second is a flag indicating whether a + * resync is active. + * This means that the counter is actually 14 bits: + * + * +--------+--------+------------------------------------------------+ + * | resync | resync | counter | + * | needed | active | | + * | (0-1) | (0-1) | (0-16383) | + * +--------+--------+------------------------------------------------+ + * + * The "resync needed" bit is set when: + * a '1' bit is read from storage at startup. + * a write request fails on some drives + * a resync is aborted on a chunk with 'resync active' set + * It is cleared (and resync-active set) when a resync starts across all drives + * of the chunk. + * + * + * The "resync active" bit is set when: + * a resync is started on all drives, and resync_needed is set. + * resync_needed will be cleared (as long as resync_active wasn't already set). + * It is cleared when a resync completes. + * + * The counter counts pending write requests, plus the on-disk bit. + * When the counter is '1' and the resync bits are clear, the on-disk + * bit can be cleared aswell, thus setting the counter to 0. + * When we set a bit, or in the counter (to start a write), if the fields is + * 0, we first set the disk bit and set the counter to 1. + * + * Also, we'll hijack the "map" pointer itself and use it as two 16 bit block + * counters as a fallback when "page" memory cannot be allocated: + * + * Normal case (page memory allocated): + * + * page pointer (32-bit) + * + * [ ] ------+ + * | + * +-------> [ ][ ]..[ ] (4096 byte page == 2048 counters) + * c1 c2 c2048 + * + * Hijacked case (page memory allocation failed): + * + * hijacked page pointer (32-bit) + * + * [ ][ ] (no page memory allocated) + * counter #1 (16-bit) counter #2 (16-bit) + * + */ + +#ifdef __KERNEL__ + +#define PAGE_BITS (PAGE_SIZE << 3) +#define PAGE_BIT_SHIFT (PAGE_SHIFT + 3) + +typedef __u16 bitmap_counter_t; +#define COUNTER_BITS 16 +#define COUNTER_BIT_SHIFT 4 +#define COUNTER_BYTE_RATIO (COUNTER_BITS / 8) +#define COUNTER_BYTE_SHIFT (COUNTER_BIT_SHIFT - 3) + +#define NEEDED_MASK ((bitmap_counter_t) (1 << (COUNTER_BITS - 1))) +#define RESYNC_MASK ((bitmap_counter_t) (1 << (COUNTER_BITS - 2))) +#define COUNTER_MAX ((bitmap_counter_t) RESYNC_MASK - 1) +#define NEEDED(x) (((bitmap_counter_t) x) & NEEDED_MASK) +#define RESYNC(x) (((bitmap_counter_t) x) & RESYNC_MASK) +#define COUNTER(x) (((bitmap_counter_t) x) & COUNTER_MAX) + +/* how many counters per page? */ +#define PAGE_COUNTER_RATIO (PAGE_BITS / COUNTER_BITS) +/* same, except a shift value for more efficient bitops */ +#define PAGE_COUNTER_SHIFT (PAGE_BIT_SHIFT - COUNTER_BIT_SHIFT) +/* same, except a mask value for more efficient bitops */ +#define PAGE_COUNTER_MASK (PAGE_COUNTER_RATIO - 1) + +#define BITMAP_BLOCK_SIZE 512 +#define BITMAP_BLOCK_SHIFT 9 + +/* how many blocks per chunk? (this is variable) */ +#define CHUNK_BLOCK_RATIO(bitmap) ((bitmap)->chunksize >> BITMAP_BLOCK_SHIFT) +#define CHUNK_BLOCK_SHIFT(bitmap) ((bitmap)->chunkshift - BITMAP_BLOCK_SHIFT) +#define CHUNK_BLOCK_MASK(bitmap) (CHUNK_BLOCK_RATIO(bitmap) - 1) + +/* when hijacked, the counters and bits represent even larger "chunks" */ +/* there will be 1024 chunks represented by each counter in the page pointers */ +#define PAGEPTR_BLOCK_RATIO(bitmap) \ + (CHUNK_BLOCK_RATIO(bitmap) << PAGE_COUNTER_SHIFT >> 1) +#define PAGEPTR_BLOCK_SHIFT(bitmap) \ + (CHUNK_BLOCK_SHIFT(bitmap) + PAGE_COUNTER_SHIFT - 1) +#define PAGEPTR_BLOCK_MASK(bitmap) (PAGEPTR_BLOCK_RATIO(bitmap) - 1) + +/* + * on-disk bitmap: + * + * Use one bit per "chunk" (block set). We do the disk I/O on the bitmap + * file a page at a time. There's a superblock at the start of the file. + */ + +/* map chunks (bits) to file pages - offset by the size of the superblock */ +#define CHUNK_BIT_OFFSET(chunk) ((chunk) + (sizeof(bitmap_super_t) << 3)) + +#endif + +/* + * bitmap structures: + */ + +#define BITMAP_MAGIC 0x6d746962 + +/* use these for bitmap->flags and bitmap->sb->state bit-fields */ +enum bitmap_state { + BITMAP_ACTIVE = 0x001, /* the bitmap is in use */ + BITMAP_STALE = 0x002 /* the bitmap file is out of date or had -EIO */ +}; + +/* the superblock at the front of the bitmap file -- little endian */ +typedef struct bitmap_super_s { + __u32 magic; /* 0 BITMAP_MAGIC */ + __u32 version; /* 4 the bitmap major for now, could change... */ + __u8 uuid[16]; /* 8 128 bit uuid - must match md device uuid */ + __u64 events; /* 24 event counter for the bitmap (1)*/ + __u64 events_cleared;/*32 event counter when last bit cleared (2) */ + __u64 sync_size; /* 40 the size of the md device's sync range(3) */ + __u32 state; /* 48 bitmap state information */ + __u32 chunksize; /* 52 the bitmap chunk size in bytes */ + __u32 daemon_sleep; /* 56 seconds between disk flushes */ + + __u8 pad[4096 - 60]; /* set to zero */ +} bitmap_super_t; + +/* notes: + * (1) This event counter is updated before the eventcounter in the md superblock + * When a bitmap is loaded, it is only accepted if this event counter is equal + * to, or one greater than, the event counter in the superblock. + * (2) This event counter is updated when the other one is *if*and*only*if* the + * array is not degraded. As bits are not cleared when the array is degraded, + * this represents the last time that any bits were cleared. + * If a device is being added that has an event count with this value or + * higher, it is accepted as conforming to the bitmap. + * (3)This is the number of sectors represented by the bitmap, and is the range that + * resync happens across. For raid1 and raid5/6 it is the size of individual + * devices. For raid10 it is the size of the array. + */ + +#ifdef __KERNEL__ + +/* the in-memory bitmap is represented by bitmap_pages */ +struct bitmap_page { + /* + * map points to the actual memory page + */ + char *map; + /* + * in emergencies (when map cannot be alloced), hijack the map + * pointer and use it as two counters itself + */ + unsigned int hijacked; + /* + * count of dirty bits on the page + */ + int count; +}; + +/* keep track of bitmap file pages that have pending writes on them */ +struct page_list { + struct list_head list; + struct page *page; +}; + +/* the main bitmap structure - one per mddev */ +struct bitmap { + struct bitmap_page *bp; + unsigned long pages; /* total number of pages in the bitmap */ + unsigned long missing_pages; /* number of pages not yet allocated */ + + mddev_t *mddev; /* the md device that the bitmap is for */ + + int counter_bits; /* how many bits per block counter */ + + /* bitmap chunksize -- how much data does each bit represent? */ + unsigned long chunksize; + unsigned long chunkshift; /* chunksize = 2^chunkshift (for bitops) */ + unsigned long chunks; /* total number of data chunks for the array */ + + /* We hold a count on the chunk currently being synced, and drop + * it when the last block is started. If the resync is aborted + * midway, we need to be able to drop that count, so we remember + * the counted chunk.. + */ + unsigned long syncchunk; + + __u64 events_cleared; + + /* bitmap spinlock */ + spinlock_t lock; + + struct file *file; /* backing disk file */ + struct page *sb_page; /* cached copy of the bitmap file superblock */ + struct page **filemap; /* list of cache pages for the file */ + unsigned long *filemap_attr; /* attributes associated w/ filemap pages */ + unsigned long file_pages; /* number of pages in the file */ + + unsigned long flags; + + /* + * the bitmap daemon - periodically wakes up and sweeps the bitmap + * file, cleaning up bits and flushing out pages to disk as necessary + */ + mdk_thread_t *daemon; + unsigned long daemon_sleep; /* how many seconds between updates? */ + + /* + * bitmap write daemon - this daemon performs writes to the bitmap file + * this thread is only needed because of a limitation in ext3 (jbd) + * that does not allow a task to have two journal transactions ongoing + * simultaneously (even if the transactions are for two different + * filesystems) -- in the case of bitmap, that would be the filesystem + * that the bitmap file resides on and the filesystem that is mounted + * on the md device -- see current->journal_info in jbd/transaction.c + */ + mdk_thread_t *write_daemon; + mdk_thread_t *writeback_daemon; + spinlock_t write_lock; + struct semaphore write_ready; + struct semaphore write_done; + unsigned long writes_pending; + wait_queue_head_t write_wait; + struct list_head write_pages; + struct list_head complete_pages; + mempool_t *write_pool; +}; + +/* the bitmap API */ + +/* these are used only by md/bitmap */ +int bitmap_create(mddev_t *mddev); +void bitmap_destroy(mddev_t *mddev); +int bitmap_active(struct bitmap *bitmap); + +char *file_path(struct file *file, char *buf, int count); +void bitmap_print_sb(struct bitmap *bitmap); +int bitmap_update_sb(struct bitmap *bitmap); + +int bitmap_setallbits(struct bitmap *bitmap); + +/* these are exported */ +void bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors); +void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors, + int success); +int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks); +void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int aborted); +void bitmap_close_sync(struct bitmap *bitmap); + +int bitmap_unplug(struct bitmap *bitmap); +#endif + +#endif diff --git a/md_u.h b/md_u.h index 22a15438..6b067c6a 100644 --- a/md_u.h +++ b/md_u.h @@ -23,6 +23,7 @@ #define GET_DISK_INFO _IOR (MD_MAJOR, 0x12, mdu_disk_info_t) #define PRINT_RAID_DEBUG _IO (MD_MAJOR, 0x13) #define RAID_AUTORUN _IO (MD_MAJOR, 0x14) +#define GET_BITMAP_FILE _IOR (MD_MAJOR, 0x15, mdu_bitmap_file_t) /* configuration */ #define CLEAR_ARRAY _IO (MD_MAJOR, 0x20) @@ -35,6 +36,7 @@ #define PROTECT_ARRAY _IO (MD_MAJOR, 0x27) #define HOT_ADD_DISK _IO (MD_MAJOR, 0x28) #define SET_DISK_FAULTY _IO (MD_MAJOR, 0x29) +#define SET_BITMAP_FILE _IOW (MD_MAJOR, 0x2b, int) /* usage */ #define RUN_ARRAY _IOW (MD_MAJOR, 0x30, mdu_param_t) @@ -105,6 +107,11 @@ typedef struct mdu_start_info_s { } mdu_start_info_t; +typedef struct mdu_bitmap_file_s +{ + char pathname[4096]; +} mdu_bitmap_file_t; + typedef struct mdu_param_s { int personality; /* 1,2,3,4 */ diff --git a/mdadm.c b/mdadm.c index 47ea2fe3..d8d46ee8 100644 --- a/mdadm.c +++ b/mdadm.c @@ -25,6 +25,9 @@ * The University of New South Wales * Sydney, 2052 * Australia + * + * Additions for bitmap and async RAID options, Copyright (C) 2003-2004, + * Paul Clements, SteelEye Technology, Inc. */ #include "mdadm.h" @@ -56,6 +59,9 @@ int main(int argc, char *argv[]) char devmode = 0; int runstop = 0; int readonly = 0; + int bitmap_fd = -1; + char *bitmap_file = NULL; + int bitmap_chunk = UnSet; int SparcAdjust = 0; mddev_dev_t devlist = NULL; mddev_dev_t *devlistend = & devlist; @@ -95,6 +101,7 @@ int main(int argc, char *argv[]) ident.spare_group = NULL; ident.autof = 0; ident.st = NULL; + ident.bitmap_fd = -1; while ((option_index = -1) , (opt=getopt_long(argc, argv, @@ -128,7 +135,10 @@ int main(int argc, char *argv[]) case 'v': verbose = 1; continue; - case 'b': brief = 1; + case 'b': + if (mode == ASSEMBLE || mode == BUILD || mode == CREATE) + break; /* b means bitmap */ + brief = 1; continue; case ':': @@ -159,6 +169,7 @@ int main(int argc, char *argv[]) case '#': case 'D': case 'E': + case 'X': case 'Q': newmode = MISC; break; case 'R': case 'S': @@ -574,6 +585,8 @@ int main(int argc, char *argv[]) continue; case O(MONITOR,'d'): /* delay in seconds */ + case O(BUILD,'d'): /* delay for bitmap updates */ + case O(CREATE,'d'): if (delay) fprintf(stderr, Name ": only specify delay once. %s ignored.\n", optarg); @@ -655,6 +668,7 @@ int main(int argc, char *argv[]) case O(MISC,'K'): case O(MISC,'R'): case O(MISC,'S'): + case O(MISC,'X'): case O(MISC,'o'): case O(MISC,'w'): if (devmode && devmode != opt && @@ -676,6 +690,36 @@ int main(int argc, char *argv[]) } SparcAdjust = 1; continue; + + case O(ASSEMBLE,'b'): /* here we simply set the bitmap file */ + if (!optarg) { + fprintf(stderr, Name ": bitmap file needed with -b in --assemble mode\n"); + exit(2); + } + bitmap_fd = open(optarg, O_RDWR); + if (!*optarg || bitmap_fd < 0) { + fprintf(stderr, Name ": cannot open bitmap file %s: %s\n", optarg, strerror(errno)); + exit(2); + } + ident.bitmap_fd = bitmap_fd; /* for Assemble */ + continue; + case O(BUILD,'b'): + case O(CREATE,'b'): /* here we create the bitmap */ + bitmap_file = optarg; + continue; + + case O(BUILD,4): + case O(CREATE,4): /* bitmap chunksize */ + bitmap_chunk = strtol(optarg, &c, 10); + if (!optarg[0] || *c || bitmap_chunk < 0 || + bitmap_chunk & (bitmap_chunk - 1)) { + fprintf(stderr, Name ": invalid bitmap chunksize: %s\n", + optarg); + exit(2); + } + /* convert K to B, chunk of 0K means 512B */ + bitmap_chunk = bitmap_chunk ? bitmap_chunk * 1024 : 512; + continue; } /* We have now processed all the valid options. Anything else is * an error @@ -726,6 +770,7 @@ int main(int argc, char *argv[]) } } + rv = 0; switch(mode) { case MANAGE: @@ -813,9 +858,27 @@ int main(int argc, char *argv[]) } break; case BUILD: - rv = Build(devlist->devname, mdfd, chunk, level, layout, raiddisks, devlist->next, assume_clean); + if (bitmap_chunk == UnSet) bitmap_chunk = DEFAULT_BITMAP_CHUNK; + if (delay == 0) delay = DEFAULT_BITMAP_DELAY; + if (bitmap_file) { + bitmap_fd = open(bitmap_file, O_RDWR,0); + if (bitmap_fd < 0 && errno != ENOENT) { + perror(Name ": cannot create bitmap file"); + rv |= 1; + break; + } + if (bitmap_fd < 0) { + bitmap_fd = CreateBitmap(bitmap_file, force, NULL, + bitmap_chunk, delay, size); + } + } + rv = Build(devlist->devname, mdfd, chunk, level, layout, + raiddisks, devlist->next, assume_clean, + bitmap_file, bitmap_chunk, delay); break; case CREATE: + if (bitmap_chunk == UnSet) bitmap_chunk = DEFAULT_BITMAP_CHUNK; + if (delay == 0) delay = DEFAULT_BITMAP_DELAY; if (ss == NULL) { for(i=0; !ss && superlist[i]; i++) ss = superlist[i]->match_metadata_desc("default"); @@ -827,7 +890,8 @@ int main(int argc, char *argv[]) rv = Create(ss, devlist->devname, mdfd, chunk, level, layout, size<0 ? 0 : size, raiddisks, sparedisks, - devs_found-1, devlist->next, runstop, verbose, force); + devs_found-1, devlist->next, runstop, verbose, force, + bitmap_file, bitmap_chunk, delay); break; case MISC: @@ -891,6 +955,8 @@ int main(int argc, char *argv[]) rv |= Kill(dv->devname, force); continue; case 'Q': rv |= Query(dv->devname); continue; + case 'X': + rv |= ExamineBitmap(dv->devname, brief); continue; } mdfd = open_mddev(dv->devname, 0); if (mdfd>=0) { diff --git a/mdadm.h b/mdadm.h index d42e853c..5733a038 100644 --- a/mdadm.h +++ b/mdadm.h @@ -61,9 +61,12 @@ char *strncpy(char *dest, const char *src, size_t n) __THROW; #define BLKGETSIZE64 _IOR(0x12,114,size_t) /* return device size in bytes (u64 *arg) */ #endif +#define DEFAULT_BITMAP_CHUNK 4096 +#define DEFAULT_BITMAP_DELAY 5 #include "md_u.h" #include "md_p.h" +#include "bitmap.h" /* general information that might be extracted from a superblock */ struct mdinfo { @@ -119,6 +122,7 @@ typedef struct mddev_ident_s { struct supertype *st; int autof; /* 1 for normal, 2 for partitioned */ char *spare_group; + int bitmap_fd; struct mddev_ident_s *next; } *mddev_ident_t; @@ -212,13 +216,15 @@ extern int Assemble(struct supertype *st, char *mddev, int mdfd, extern int Build(char *mddev, int mdfd, int chunk, int level, int layout, int raiddisks, - mddev_dev_t devlist, int assume_clean); + mddev_dev_t devlist, int assume_clean, + char *bitmap_file, int bitmap_chunk, int delay); extern int Create(struct supertype *st, char *mddev, int mdfd, int chunk, int level, int layout, unsigned long size, int raiddisks, int sparedisks, int subdevs, mddev_dev_t devlist, - int runstop, int verbose, int force); + int runstop, int verbose, int force, + char *bitmap_file, int bitmap_chunk, int delay); extern int Detail(char *dev, int brief, int test); extern int Query(char *dev); @@ -231,6 +237,11 @@ extern int Monitor(mddev_dev_t devlist, extern int Kill(char *dev, int force); +extern int CreateBitmap(char *filename, int force, char uuid[16], + unsigned long chunksize, unsigned long daemon_sleep, + unsigned long long array_size); +extern int ExamineBitmap(char *filename, int brief); + extern int md_get_version(int fd); extern int get_linux_version(void); extern int parse_uuid(char *str, int uuid[4]); -- 2.39.2