]> git.ipfire.org Git - thirdparty/mdadm.git/commitdiff
Add write-behind support
authorNeil Brown <neilb@suse.de>
Tue, 9 Aug 2005 04:25:47 +0000 (04:25 +0000)
committerNeil Brown <neilb@suse.de>
Tue, 9 Aug 2005 04:25:47 +0000 (04:25 +0000)
Currently this includes
  --write-behind  to set level of write-behind supported
  --write-mostly  to flag devices as write-mostly.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
16 files changed:
Build.c
ChangeLog
Create.c
Detail.c
Grow.c
Manage.c
ReadMe.c
bitmap.c
bitmap.h
md_p.h
mdadm.8
mdadm.c
mdadm.h
super0.c
super1.c
tests/06wrmostly [new file with mode: 0644]

diff --git a/Build.c b/Build.c
index 5537b46d1befc040d2f502c1d045bd9ee5e5d475..6489d8451ce6d940edf1fd2942089055ae49768e 100644 (file)
--- a/Build.c
+++ b/Build.c
@@ -36,7 +36,7 @@
 int Build(char *mddev, int mdfd, int chunk, int level, int layout,
          int raiddisks,
          mddev_dev_t devlist, int assume_clean,
-         char *bitmap_file, int bitmap_chunk, int delay)
+         char *bitmap_file, int bitmap_chunk, int write_behind, int delay)
 {
        /* Build a linear or raid0 arrays without superblocks
         * We cannot really do any checks, we just do it.
@@ -164,7 +164,9 @@ int Build(char *mddev, int mdfd, int chunk, int level, int layout,
                        mdu_disk_info_t disk;
                        disk.number = i;
                        disk.raid_disk = i;
-                       disk.state = 6;
+                       disk.state = (1<<MD_DISK_SYNC) | (1<<MD_DISK_ACTIVE);
+                       if (dv->writemostly)
+                               disk.state |= 1<<MD_DISK_WRITEMOSTLY;
                        disk.major = major(stb.st_rdev);
                        disk.minor = minor(stb.st_rdev);
                        if (ioctl(mdfd, ADD_NEW_DISK, &disk)) {
@@ -192,7 +194,7 @@ int Build(char *mddev, int mdfd, int chunk, int level, int layout,
                                        return 1;
                                }
                                if (CreateBitmap(bitmap_file, 1, NULL, bitmap_chunk,
-                                                delay, 0/* FIXME size */)) {
+                                                delay, write_behind, 0/* FIXME size */)) {
                                        return 1;
                                }
                                bitmap_fd = open(bitmap_file, O_RDWR);
index 2cb01c17c89ebeebed5e086eddc4d92569e8fd4b..56bf07d7712b4ed71cb1689a1a5e2ecfe6446a7b 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,6 +1,7 @@
 Changes Prior to this release
     -   Support assembling from byte-swapped superblocks
        metadata type "0.swap" and --update=byteorder
+    -   write-mostly and write-behind support for raid1.
 
 Changes Prior to 2.0-devel-3 release
     -   Assorted fixes for multiple bugs...
index 735b8d799a858ae3d69f66acf8a88c53b91e0de0..87a9a2e71c3c2ffd27eb02936bcc152d4b099b2b 100644 (file)
--- a/Create.c
+++ b/Create.c
@@ -35,7 +35,7 @@ int Create(struct supertype *st, char *mddev, int mdfd,
           int chunk, int level, int layout, unsigned long size, int raiddisks, int sparedisks,
           int subdevs, mddev_dev_t devlist,
           int runstop, int verbose, int force,
-          char *bitmap_file, int bitmap_chunk, int delay)
+          char *bitmap_file, int bitmap_chunk, int write_behind, int delay)
 {
        /*
         * Create a new raid array.
@@ -351,7 +351,7 @@ int Create(struct supertype *st, char *mddev, int mdfd,
                        fprintf(stderr, Name ": internal bitmaps not supported by this kernel.\n");
                        return 1;
                }
-               if (!st->ss->add_internal_bitmap(super, bitmap_chunk, delay, 
+               if (!st->ss->add_internal_bitmap(super, bitmap_chunk, delay, write_behind,
                                                 size ? size : maxsize)) {
                        fprintf(stderr, Name ": Given bitmap chunk size not supported.\n");
                        return 1;
@@ -382,7 +382,8 @@ int Create(struct supertype *st, char *mddev, int mdfd,
                        bitmap_chunk = DEFAULT_BITMAP_CHUNK;
 
                st->ss->uuid_from_super(uuid, super);
-               if (CreateBitmap(bitmap_file, force, (char*)uuid, bitmap_chunk, delay,
+               if (CreateBitmap(bitmap_file, force, (char*)uuid, bitmap_chunk,
+                       delay, write_behind,
                                 array.size*2ULL /* FIXME wrong for raid10 */)) {
                        return 1;
                }
@@ -416,14 +417,18 @@ int Create(struct supertype *st, char *mddev, int mdfd,
                        }
                        disk.raid_disk = disk.number;
                        if (disk.raid_disk < raiddisks)
-                               disk.state = 6; /* active and in sync */
+                               disk.state = (1<<MD_DISK_ACTIVE) |
+                                               (1<<MD_DISK_SYNC);
                        else
                                disk.state = 0;
+                       if (dv->writemostly)
+                               disk.state |= (1<<MD_DISK_WRITEMOSTLY);
+
                        if (dnum == insert_point ||
                            strcasecmp(dv->devname, "missing")==0) {
                                disk.major = 0;
                                disk.minor = 0;
-                               disk.state = 1; /* faulty */
+                               disk.state = (1<<MD_DISK_FAULTY);
                        } else {
                                fd = open(dv->devname, O_RDONLY|O_EXCL, 0);
                                if (fd < 0) {
index 46b483c4bd023cd9cf3d7073fa10b6231fb8b9c3..4c93eff95f51b3d7bddbdfda716172aa493d0812 100644 (file)
--- a/Detail.c
+++ b/Detail.c
@@ -216,6 +216,8 @@ int Detail(char *dev, int brief, int test)
        for (d= 0; d < max_disks; d++) {
                mdu_disk_info_t disk;
                char *dv;
+               int wonly = disk.state & (1<<MD_DISK_WRITEMOSTLY);
+               disk.state &= ~(1<<MD_DISK_WRITEMOSTLY);
                disk.number = d;
                if (ioctl(fd, GET_DISK_INFO, &disk) < 0) {
                        if (d < array.raid_disks)
@@ -244,6 +246,7 @@ int Detail(char *dev, int brief, int test)
                        if (disk.state & (1<<MD_DISK_ACTIVE)) printf(" active");
                        if (disk.state & (1<<MD_DISK_SYNC)) printf(" sync");
                        if (disk.state & (1<<MD_DISK_REMOVED)) printf(" removed");
+                       if (wonly) printf(" writeonly");
                        if (disk.state == 0) printf(" spare");
                        if (disk.state == 0) {
                                if (is_26) {
diff --git a/Grow.c b/Grow.c
index d20bc6edc8f17a9388902ecad60276ae43e78a6d..ce536d5e43e13947f825f97da8332acaa31385c4 100644 (file)
--- a/Grow.c
+++ b/Grow.c
@@ -192,7 +192,7 @@ int Grow_Add_device(char *devname, int fd, char *newdev)
        return 0;
 }
 
-int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay)
+int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int write_behind)
 {
        /*
         * First check that array doesn't have a bitmap
@@ -255,7 +255,7 @@ int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay)
                                        continue;
                                if (st->ss->load_super(st, fd2, &super, NULL)==0) {
                                        st->ss->add_internal_bitmap(super, 
-                                                                   chunk, delay,
+                                                                   chunk, delay, write_behind,
                                                                    array.size);
                                        st->ss->write_bitmap(st, fd2, super);
                                }
index 53accd7e9a1722013dc77d28408e3bc5ae0d913f..a91e467c8ff7916ba321d366042b77d3172b6973 100644 (file)
--- a/Manage.c
+++ b/Manage.c
@@ -266,6 +266,8 @@ int Manage_subdevs(char *devname, int fd,
                        disc.minor = minor(stb.st_rdev);
                        disc.number =j;
                        disc.state = 0;
+                       if (dv->writemostly)
+                               disc.state |= 1 << MD_DISK_WRITEMOSTLY;
                        st->ss->add_to_super(dsuper, &disc);
                        if (st->ss->write_init_super(st, dsuper, &disc, dv->devname))
                                return 1;
index 1d28adf56b8e0957171ba227f32c933a136af2c8..1ba7301b956fb3e1bf762869f4e47131e7fe38f6 100644 (file)
--- a/ReadMe.c
+++ b/ReadMe.c
@@ -131,6 +131,8 @@ struct option long_options[] = {
     {"metadata",  1, 0, 'e'}, /* superblock format */
     {"bitmap",   1, 0, 'b'},
     {"bitmap-chunk", 1, 0, 4},
+    {"write-behind", 2, 0, 5},
+    {"write-mostly",0, 0, 'W'},
 
     /* For assemble */
     {"uuid",      1, 0, 'u'},
@@ -139,6 +141,7 @@ struct option long_options[] = {
     {"scan",      0, 0, 's'},
     {"force",    0, 0, 'f'},
     {"update",   1, 0, 'U'},
+
     /* Management */
     {"add",       0, 0, 'a'},
     {"remove",    0, 0, 'r'},
@@ -232,6 +235,7 @@ char OptionHelp[] =
 "  --assume-clean     : Assume the array is already in-sync. This is dangerous.\n"
 "  --bitmap-chunk=    : chunksize of bitmap in bitmap file (Kilobytes)\n"
 "  --delay=      -d   : seconds between bitmap updates\n"
+"  --write-behind=    : number of simultaneous write-behind requests to allow (requires bitmap)\n"
 "\n"
 " For assemble:\n"
 "  --bitmap=     -b   : File to find bitmap information in\n"
index 96a26f9a96195446c6a35c6e28fcffd7a60b62f6..0a2ed5d6bd804f8bcd28e614864205b4039c76e2 100644 (file)
--- a/bitmap.c
+++ b/bitmap.c
@@ -215,6 +215,7 @@ int ExamineBitmap(char *filename, int brief, struct supertype *st)
        bitmap_super_t *sb;
        bitmap_info_t *info;
        int rv = 1;
+       char buf[64];
 
        info = bitmap_file_read(filename, brief, st);
        if (!info)
@@ -243,6 +244,11 @@ int ExamineBitmap(char *filename, int brief, struct supertype *st)
        printf("           State : %s\n", bitmap_state(sb->state));
        printf("       Chunksize : %s\n", human_chunksize(sb->chunksize));
        printf("          Daemon : %ds flush period\n", sb->daemon_sleep);
+       if (sb->write_behind)
+               sprintf(buf, "Allow write behind, max %d", sb->write_behind);
+       else
+               sprintf(buf, "Normal");
+       printf("      Write Mode : %s\n", buf);
        printf("       Sync Size : %llu%s\n", sb->sync_size/2,
                                        human_size(sb->sync_size * 512));
        if (brief)
@@ -257,6 +263,7 @@ free_info:
 
 int CreateBitmap(char *filename, int force, char uuid[16],
                unsigned long chunksize, unsigned long daemon_sleep,
+               unsigned long write_behind,
                unsigned long long array_size)
 {
        /*
@@ -288,6 +295,7 @@ int CreateBitmap(char *filename, int force, char uuid[16],
                memcpy(sb.uuid, uuid, 16);
        sb.chunksize = chunksize;
        sb.daemon_sleep = daemon_sleep;
+       sb.write_behind = write_behind;
        sb.sync_size = array_size;
 
        sb_cpu_to_le(&sb); /* convert to on-disk byte ordering */
index 811485d14082750c6d1840303209bebba04bd2cd..02a4e97acabc507e7f75697e39116589435e67bd 100644 (file)
--- a/bitmap.h
+++ b/bitmap.h
@@ -7,7 +7,7 @@
 #define BITMAP_H 1
 
 #define BITMAP_MAJOR 3
-#define BITMAP_MINOR 38
+#define BITMAP_MINOR 39
 
 /*
  * in-memory bitmap:
  * When we set a bit, or in the counter (to start a write), if the fields is
  * 0, we first set the disk bit and set the counter to 1.
  *
+ * If the counter is 0, the on-disk bit is clear and the stipe is clean
+ * Anything that dirties the stipe pushes the counter to 2 (at least)
+ * and sets the on-disk bit (lazily).
+ * If a periodic sweep find the counter at 2, it is decremented to 1.
+ * If the sweep find the counter at 1, the on-disk bit is cleared and the
+ * counter goes to zero.
+ *
  * Also, we'll hijack the "map" pointer itself and use it as two 16 bit block
  * counters as a fallback when "page" memory cannot be allocated:
  *
@@ -140,8 +147,9 @@ typedef struct bitmap_super_s {
        __u32 state;        /* 48  bitmap state information */
        __u32 chunksize;    /* 52  the bitmap chunk size in bytes */
        __u32 daemon_sleep; /* 56  seconds between disk flushes */
+       __u32 write_behind; /* 60  number of outstanding write-behind writes */
 
-       __u8  pad[256 - 60]; /* set to zero */
+       __u8  pad[256 - 64]; /* set to zero */
 } bitmap_super_t;
 
 /* notes:
diff --git a/md_p.h b/md_p.h
index 31eaafd2cc0f8afb27b2a8d176a4b00e105d832a..0a0b38156fd38d3e5a6dc706fd7c8e9b0d9b8e42 100644 (file)
--- a/md_p.h
+++ b/md_p.h
 #define MD_DISK_SYNC           2 /* disk is in sync with the raid set */
 #define MD_DISK_REMOVED                3 /* disk is in sync with the raid set */
 
+#define        MD_DISK_WRITEMOSTLY     9 /* disk is "write-mostly" is RAID1 config.
+                                  * read requests will only be sent here in 
+                                  * dire need
+                                  */
+
 typedef struct mdp_device_descriptor_s {
        __u32 number;           /* 0 Device number in the entire set          */
        __u32 major;            /* 1 Device major number                      */
diff --git a/mdadm.8 b/mdadm.8
index 1ecdac29130080495e3ba8c8a38c4cdb1af47da7..48d6c53a01dc22ad341c4ebfe6f49ee0de3daa2e 100644 (file)
--- a/mdadm.8
+++ b/mdadm.8
@@ -204,6 +204,18 @@ with
 .B --verbose
 gives an intermediate level of verbosity.
 
+.TP
+.BR -W ", " --write-mostly
+subsequent devices lists in a
+.BR --build ,
+.BR --create ,
+or
+.B --add
+command will be flagged as 'write-mostly'.  This is valid for RAID1
+only and means that the 'md' driver will avoid reading from these
+devices if at all possible.  This can be useful if mirroring over a
+slow link.
+
 .TP
 .BR -b ", " --bitmap=
 Give the name of a bitmap file to use with this array.  Can be used
@@ -215,6 +227,15 @@ exist).
 Set the Chunksize of the bitmap. Each bit corresponds to that many
 Kilobytes of storage. Default is 4.
 
+.TP
+.BR --write-behind=
+Specify that write-behind mode should be enabled (valid for RAID1
+only). If an argument is specified, it will set the maximum number
+of outstanding writes allowed. The default value is 256.
+A write-intent bitmap is required in order to use write-behind
+mode, and write-behind is only attempted on drives marked as
+.IR write-mostly .
+
 
 .TP
 .BR -f ", " --force
@@ -1218,9 +1239,15 @@ For this to work, the kernel must support the necessary change.
 Various types of growth may be added during 2.6 development, possibly
 including restructuring a raid5 array to have more active devices.
 
-Currently the only support available is to change the "size" attribute
-for arrays with redundancy, and the raid-disks attribute of RAID1
-arrays.
+Currently the only support available is to
+.IP \(bu 4
+change the "size" attribute
+for RAID1, RAID5 and RAID6.
+.IP \(bu 4
+change the "raid-disks" attribute of RAID1.
+.IP \(bu 4
+add a write-intent bitmap to a RAID1 array.
+.PP
 
 Normally when an array is build the "size" it taken from the smallest
 of the drives.  If all the small drives in an arrays are, one at a
diff --git a/mdadm.c b/mdadm.c
index 2b2b9be7043df8e8707c7a6e293d062a09b952c6..4dd6524515a816d3dccfa3bc9c8d9bd7af7cc458 100644 (file)
--- a/mdadm.c
+++ b/mdadm.c
@@ -26,7 +26,7 @@
  *           Sydney, 2052
  *           Australia
  *
- *    Additions for bitmap and async RAID options, Copyright (C) 2003-2004, 
+ *    Additions for bitmap and write-behind RAID options, Copyright (C) 2003-2004, 
  *    Paul Clements, SteelEye Technology, Inc.
  */
 
@@ -60,6 +60,7 @@ int main(int argc, char *argv[])
        char devmode = 0;
        int runstop = 0;
        int readonly = 0;
+       int write_behind = 0;
        int bitmap_fd = -1;
        char *bitmap_file = NULL;
        int bitmap_chunk = UnSet;
@@ -89,6 +90,7 @@ int main(int argc, char *argv[])
        char *pidfile = NULL;
        int oneshot = 0;
        struct supertype *ss = NULL;
+       int writemostly = 0;
 
        int copies;
 
@@ -214,6 +216,7 @@ int main(int argc, char *argv[])
                                        }
                                        dv->devname = optarg;
                                        dv->disposition = devmode;
+                                       dv->writemostly = writemostly;
                                        dv->next = NULL;
                                        *devlistend = dv;
                                        devlistend = &dv->next;
@@ -262,6 +265,7 @@ int main(int argc, char *argv[])
                        }
                        dv->devname = optarg;
                        dv->disposition = devmode;
+                       dv->writemostly = writemostly;
                        dv->next = NULL;
                        *devlistend = dv;
                        devlistend = &dv->next;
@@ -306,6 +310,13 @@ int main(int argc, char *argv[])
                        max_disks = ss->max_devs;
                        continue;
 
+               case O(MANAGE,'W'):
+               case O(BUILD,'W'):
+               case O(CREATE,'W'):
+                       /* set write-mostly for following devices */
+                       writemostly = 1;
+                       continue;
+
                case O(GROW,'z'):
                case O(CREATE,'z'): /* size */
                        if (size >= 0) {
@@ -741,6 +752,19 @@ int main(int argc, char *argv[])
                        /* convert K to B, chunk of 0K means 512B */
                        bitmap_chunk = bitmap_chunk ? bitmap_chunk * 1024 : 512;
                        continue;
+
+               case O(BUILD, 5):
+               case O(CREATE, 5): /* write-behind mode */
+                       write_behind = DEFAULT_MAX_WRITE_BEHIND;
+                       if (optarg) {
+                               write_behind = strtol(optarg, &c, 10);
+                               if (write_behind < 0 || *c ||
+                                   write_behind > 16383) {
+                                       fprintf(stderr, Name ": Invalid value for maximum outstanding write-behind writes: %s.\n\tMust be between 0 and 16383.\n", optarg);
+                                       exit(2);
+                               }
+                       }
+                       continue;
                }
                /* We have now processed all the valid options. Anything else is
                 * an error
@@ -904,6 +928,12 @@ int main(int argc, char *argv[])
        case BUILD:
                if (bitmap_chunk == UnSet) bitmap_chunk = DEFAULT_BITMAP_CHUNK;
                if (delay == 0) delay = DEFAULT_BITMAP_DELAY;
+               if (write_behind && !bitmap_file) {
+                       fprintf(stderr, Name ": write-behind mode requires a bitmap.\n");
+                       rv = 1;
+                       break;
+               }
+
                if (bitmap_file) {
                        if (strcmp(bitmap_file, "internal")==0) {
                                fprintf(stderr, Name ": 'internal' bitmaps not supported with --build\n");
@@ -918,15 +948,20 @@ int main(int argc, char *argv[])
                        }
                        if (bitmap_fd < 0) {
                                bitmap_fd = CreateBitmap(bitmap_file, force, NULL,
-                                                        bitmap_chunk, delay, size);
+                                                        bitmap_chunk, delay, write_behind, size);
                        }
                }
                rv = Build(devlist->devname, mdfd, chunk, level, layout,
                           raiddisks, devlist->next, assume_clean,
-                          bitmap_file, bitmap_chunk, delay);
+                          bitmap_file, bitmap_chunk, write_behind, delay);
                break;
        case CREATE:
                if (delay == 0) delay = DEFAULT_BITMAP_DELAY;
+               if (write_behind && !bitmap_file) {
+                       fprintf(stderr, Name ": write-behind mode requires a bitmap.\n");
+                       rv = 1;
+                       break;
+               }
                if (ss == NULL) {
                        for(i=0; !ss && superlist[i]; i++) 
                                ss = superlist[i]->match_metadata_desc("default");
@@ -939,7 +974,7 @@ int main(int argc, char *argv[])
                rv = Create(ss, devlist->devname, mdfd, chunk, level, layout, size<0 ? 0 : size,
                            raiddisks, sparedisks,
                            devs_found-1, devlist->next, runstop, verbose, force,
-                           bitmap_file, bitmap_chunk, delay);
+                           bitmap_file, bitmap_chunk, write_behind, delay);
                break;
        case MISC:
 
@@ -1078,7 +1113,7 @@ int main(int argc, char *argv[])
                else if (bitmap_file) {
                        if (delay == 0) delay = DEFAULT_BITMAP_DELAY;
                        rv = Grow_addbitmap(devlist->devname, mdfd, bitmap_file,
-                                           bitmap_chunk, delay);
+                                           bitmap_chunk, delay, write_behind);
                } else
                        fprintf(stderr, Name ": no changes to --grow\n");
                break;
diff --git a/mdadm.h b/mdadm.h
index fa8ea69bcd7fa3692566bd993da11511c9a4ac6f..8b58afc067dfc4ee30541582edef224ac3e0301f 100644 (file)
--- a/mdadm.h
+++ b/mdadm.h
@@ -64,6 +64,7 @@ char *strncpy(char *dest, const char *src, size_t n) __THROW;
 
 #define DEFAULT_BITMAP_CHUNK 4096
 #define DEFAULT_BITMAP_DELAY 5
+#define DEFAULT_MAX_WRITE_BEHIND 256
 
 #include       "md_u.h"
 #include       "md_p.h"
@@ -134,6 +135,7 @@ typedef struct mddev_dev_s {
        char disposition;       /* 'a' for add, 'r' for remove, 'f' for fail.
                                 * Not set for names read from .config
                                 */
+       char writemostly;
        struct mddev_dev_s *next;
 } *mddev_dev_t;
 
@@ -186,7 +188,7 @@ extern struct superswitch {
        int (*load_super)(struct supertype *st, int fd, void **sbp, char *devname);
        struct supertype * (*match_metadata_desc)(char *arg);
        __u64 (*avail_size)(__u64 size);
-       int (*add_internal_bitmap)(void *sbv, int chunk, int delay, unsigned long long size);
+       int (*add_internal_bitmap)(void *sbv, int chunk, int delay, int write_behind, unsigned long long size);
        void (*locate_bitmap)(struct supertype *st, int fd);
        int (*write_bitmap)(struct supertype *st, int fd, void *sbv);
        int major;
@@ -223,7 +225,7 @@ extern int Manage_reconfig(char *devname, int fd, int layout);
 extern int Manage_subdevs(char *devname, int fd,
                          mddev_dev_t devlist);
 extern int Grow_Add_device(char *devname, int fd, char *newdev);
-extern int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay);
+extern int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int write_behind);
 
 
 extern int Assemble(struct supertype *st, char *mddev, int mdfd,
@@ -237,14 +239,14 @@ extern int Assemble(struct supertype *st, char *mddev, int mdfd,
 extern int Build(char *mddev, int mdfd, int chunk, int level, int layout,
                 int raiddisks,
                 mddev_dev_t devlist, int assume_clean,
-                char *bitmap_file, int bitmap_chunk, int delay);
+                char *bitmap_file, int bitmap_chunk, int write_behind, int delay);
 
 
 extern int Create(struct supertype *st, char *mddev, int mdfd,
                  int chunk, int level, int layout, unsigned long size, int raiddisks, int sparedisks,
                  int subdevs, mddev_dev_t devlist,
                  int runstop, int verbose, int force,
-                 char *bitmap_file, int bitmap_chunk, int delay);
+                 char *bitmap_file, int bitmap_chunk, int write_behind, int delay);
 
 extern int Detail(char *dev, int brief, int test);
 extern int Query(char *dev);
@@ -259,6 +261,7 @@ extern int Kill(char *dev, int force);
 
 extern int CreateBitmap(char *filename, int force, char uuid[16],
                        unsigned long chunksize, unsigned long daemon_sleep,
+                       unsigned long write_behind,
                        unsigned long long array_size);
 extern int ExamineBitmap(char *filename, int brief, struct supertype *st);
 
index 7a306b524f12637c002b57444b95e470120c32ee..e33643905624033cbd93d1d7ca753ae1f1974414 100644 (file)
--- a/super0.c
+++ b/super0.c
@@ -148,15 +148,19 @@ static void examine_super0(void *sbv)
                mdp_disk_t *dp;
                char *dv;
                char nb[5];
+               int wonly;
                if (d>=0) dp = &sb->disks[d];
                else dp = &sb->this_disk;
                snprintf(nb, sizeof(nb), "%4d", d);
                printf("%4s %5d   %5d    %5d    %5d     ", d < 0 ? "this" :  nb,
                       dp->number, dp->major, dp->minor, dp->raid_disk);
+               wonly = dp->state & (1<<MD_DISK_WRITEMOSTLY);
+               dp->state &= ~(1<<MD_DISK_WRITEMOSTLY);
                if (dp->state & (1<<MD_DISK_FAULTY)) printf(" faulty");
                if (dp->state & (1<<MD_DISK_ACTIVE)) printf(" active");
                if (dp->state & (1<<MD_DISK_SYNC)) printf(" sync");
                if (dp->state & (1<<MD_DISK_REMOVED)) printf(" removed");
+               if (wonly) printf(" write-mostly");
                if (dp->state == 0) printf(" spare");
                if ((dv=map_dev(dp->major, dp->minor)))
                        printf("   %s", dv);
@@ -312,8 +316,10 @@ static int update_super0(struct mdinfo *info, void *sbv, char *update, char *dev
        }
        if (strcmp(update, "assemble")==0) {
                int d = info->disk.number;
+               int wonly = sb->disks[d].state & (1<<MD_DISK_WRITEMOSTLY);
+               sb->disks[d].state &= ~(1<<MD_DISK_WRITEMOSTLY);
                if (sb->disks[d].state != info->disk.state) {
-                       sb->disks[d].state = info->disk.state;
+                       sb->disks[d].state = info->disk.state & wonly;
                        rv = 1;
                }
        }
@@ -467,7 +473,7 @@ static int store_super0(struct supertype *st, int fd, void *sbv)
 static int write_init_super0(struct supertype *st, void *sbv, mdu_disk_info_t *dinfo, char *devname)
 {
        mdp_super_t *sb = sbv;
-       int fd = open(devname, O_RDWRO_EXCL);
+       int fd = open(devname, O_RDWR|O_EXCL);
        int rv;
 
        if (fd < 0) {
@@ -485,6 +491,7 @@ static int write_init_super0(struct supertype *st, void *sbv, mdu_disk_info_t *d
        if (sb->state & (1<<MD_SB_BITMAP_PRESENT)) {
                int towrite, n;
                char buf[4096];
+
                write(fd, ((char*)sb)+MD_SB_BYTES, sizeof(bitmap_super_t));
                towrite = 64*1024 - MD_SB_BYTES - sizeof(bitmap_super_t);
                memset(buf, 0xff, sizeof(buf));
@@ -498,6 +505,7 @@ static int write_init_super0(struct supertype *st, void *sbv, mdu_disk_info_t *d
                        else
                                break;
                }
+               fsync(fd);
                if (towrite)
                        rv = -2;
        }
@@ -661,7 +669,7 @@ static __u64 avail_size0(__u64 devsize)
        return MD_NEW_SIZE_SECTORS(devsize);
 }
 
-static int add_internal_bitmap0(void *sbv, int chunk, int delay, unsigned long long size)
+static int add_internal_bitmap0(void *sbv, int chunk, int delay, int write_behind, unsigned long long size)
 {
        /*
         * The bitmap comes immediately after the superblock and must be 60K in size
@@ -690,12 +698,13 @@ static int add_internal_bitmap0(void *sbv, int chunk, int delay, unsigned long l
        sb->state |= (1<<MD_SB_BITMAP_PRESENT);
 
        memset(bms, sizeof(*bms), 0);
-       bms->magic = __le32_to_cpu(BITMAP_MAGIC);
-       bms->version = __le32_to_cpu(BITMAP_MAJOR);
+       bms->magic = __cpu_to_le32(BITMAP_MAGIC);
+       bms->version = __cpu_to_le32(BITMAP_MAJOR);
        uuid_from_super0((int*)bms->uuid, sb);
-       bms->chunksize = __le32_to_cpu(chunk);
-       bms->daemon_sleep = __le32_to_cpu(delay);
-       bms->sync_size = __le64_to_cpu(size);
+       bms->chunksize = __cpu_to_le32(chunk);
+       bms->daemon_sleep = __cpu_to_le32(delay);
+       bms->sync_size = __cpu_to_le64(size);
+       bms->write_behind = __cpu_to_le32(write_behind);
 
 
 
@@ -776,6 +785,7 @@ int write_bitmap0(struct supertype *st, int fd, void *sbv)
                else
                        break;
        }
+       fsync(fd);
        if (towrite)
                rv = -2;
 
index 3c24f3481553dc1b953e9da860064f34cf9f17cd..f59eff06d6566473426e4e7b55805023d89c394f 100644 (file)
--- a/super1.c
+++ b/super1.c
@@ -64,7 +64,9 @@ struct mdp_superblock_1 {
        __u32   dev_number;     /* permanent identifier of this  device - not role in raid */
        __u32   cnt_corrected_read; /* number of read errors that were corrected by re-writing */
        __u8    device_uuid[16]; /* user-space setable, ignored by kernel */
-       __u8    pad2[64-56];    /* set to 0 when writing */
+        __u8    devflags;        /* per-device flags.  Only one defined...*/
+#define WriteMostly1    1        /* mask for writemostly flag in above */
+       __u8    pad2[64-57];    /* set to 0 when writing */
 
        /* array state information - 64 bytes */
        __u64   utime;          /* 40 bits second, 24 btes microseconds */
@@ -153,6 +155,12 @@ static void examine_super1(void *sbv)
                if ((i&3)==0 && i != 0) printf(":");
        }
        printf("\n");
+       if (sb->devflags) {
+               printf("      Flags :");
+               if (sb->devflags & WriteMostly1) 
+                       printf(" write-mostly");
+               printf("\n");
+       }
 
        atime = __le64_to_cpu(sb->utime) & 0xFFFFFFFFFFULL;
        printf("    Update Time : %.24s\n", ctime(&atime));
@@ -429,7 +437,7 @@ static void add_to_super1(void *sbv, mdu_disk_info_t *dk)
 {
        struct mdp_superblock_1 *sb = sbv;
        __u16 *rp = sb->dev_roles + dk->number;
-       if (dk->state == 6) /* active, sync */
+       if ((dk->state & 6) == 6) /* active, sync */
                *rp = __cpu_to_le16(dk->raid_disk);
        else if ((dk->state & ~2) == 0) /* active or idle -> spare */
                *rp = 0xffff;
@@ -517,6 +525,8 @@ static int write_init_super1(struct supertype *st, void *sbv, mdu_disk_info_t *d
        }
 
        sb->dev_number = __cpu_to_le32(dinfo->number);
+       if (dinfo->state & (1<<MD_DISK_WRITEMOSTLY)) 
+               sb->devflags |= WriteMostly1;
 
        if ((rfd = open("/dev/urandom", O_RDONLY)) < 0 ||
            read(rfd, sb->device_uuid, 16) != 16) {
diff --git a/tests/06wrmostly b/tests/06wrmostly
new file mode 100644 (file)
index 0000000..51fff60
--- /dev/null
@@ -0,0 +1,15 @@
+set -e
+
+# create a raid1 array with a wrmostly device
+
+$mdadm -CR $md0 -l1 -n3 $dev0 $dev1 --write-mostly $dev2
+sh tests/testdev $md0 1 $mdsize0 64
+
+# unfortunately, we cannot measure if any read requests are going to $dev2
+
+$mdadm -S $md0
+
+$mdadm -CR $md0 -l1 -n3 --write-behind --bitmap=internal $dev0 $dev1 --write-mostly $dev2
+sh tests/testdev $md0 1 $mdsize0 64
+$mdadm -S $md0
+