/*
* mdadm - manage Linux "md" devices aka RAID arrays.
*
- * Copyright (C) 2001-2006 Neil Brown <neilb@suse.de>
+ * Copyright (C) 2001-2009 Neil Brown <neilb@suse.de>
*
*
* This program is free software; you can redistribute it and/or modify
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* Author: Neil Brown
- * Email: <neilb@cse.unsw.edu.au>
- * Paper: Neil Brown
- * School of Computer Science and Engineering
- * The University of New South Wales
- * Sydney, 2052
- * Australia
+ * Email: <neilb@suse.de>
*/
#define _GNU_SOURCE
#define DEFAULT_BITMAP_DELAY 5
#define DEFAULT_MAX_WRITE_BEHIND 256
+#define VAR_RUN "/var/run/mdadm"
+/* ALT_RUN should be somewhere that persists across the pivotroot
+ * from early boot to late boot.
+ * If you don't have /lib/init/rw you might want to use /dev/.something
+ */
+#ifndef ALT_RUN
+#define ALT_RUN "/lib/init/rw/mdadm"
+#endif /* ALT_RUN */
+
#include "md_u.h"
#include "md_p.h"
#include "bitmap.h"
#endif /* __KLIBC__ */
+/*
+ * min()/max()/clamp() macros that also do
+ * strict type-checking.. See the
+ * "unnecessary" pointer comparison.
+ */
+#define min(x, y) ({ \
+ typeof(x) _min1 = (x); \
+ typeof(y) _min2 = (y); \
+ (void) (&_min1 == &_min2); \
+ _min1 < _min2 ? _min1 : _min2; })
+
+#define max(x, y) ({ \
+ typeof(x) _max1 = (x); \
+ typeof(y) _max2 = (y); \
+ (void) (&_max1 == &_max2); \
+ _max1 > _max2 ? _max1 : _max2; })
/* general information that might be extracted from a superblock */
struct mdinfo {
unsigned long long component_size; /* same as array.size, except in
* sectors and up to 64bits.
*/
+ unsigned long long custom_array_size; /* size for non-default sized
+ * arrays (in sectors)
+ */
int reshape_active;
unsigned long long reshape_progress;
- unsigned long long resync_start;
+ union {
+ unsigned long long resync_start; /* per-array resync position */
+ unsigned long long recovery_start; /* per-device rebuild position */
+ #define MaxSector (~0ULL) /* resync/recovery complete position */
+ };
unsigned long safe_mode_delay; /* ms delay to mark clean */
int new_level, delta_disks, new_layout, new_chunk;
int errors;
int cache_size; /* size of raid456 stripe cache*/
int mismatch_cnt;
char text_version[50];
+ void *update_private; /* for passing metadata-format
+ * specific update data
+ * between successive calls to
+ * update_super()
+ */
int container_member; /* for assembling external-metatdata arrays
* This is to be used internally by metadata
struct mdinfo *next;
/* Device info for mdmon: */
+ int recovery_fd;
int state_fd;
#define DS_FAULTY 1
#define DS_INSYNC 2
Symlinks,
AutoDetect,
Waitclean,
+ DetailPlatform,
};
/* structures read from config file */
char *bitmap_file;
int bitmap_fd;
- char *container; /* /dev/whatever name of container. You
- * would expect this to be the 'devname'
+ char *container; /* /dev/whatever name of container, or
+ * uuid of container. You would expect
+ * this to be the 'devname' or UUID
* of some other entry.
*/
char *member; /* subarray within a container */
struct mddev_ident_s *next;
+ union {
+ /* fields needed by different users of this structure */
+ int assembled; /* set when assembly succeeds */
+ };
} *mddev_ident_t;
/* List of device names - wildcards expanded */
char writemostly; /* 1 for 'set writemostly', 2 for 'clear writemostly' */
char re_add;
char used; /* set when used */
+ struct mdinfo *content; /* If devname is a container, this might list
+ * the remaining member arrays. */
struct mddev_dev_s *next;
} *mddev_dev_t;
int devnum;
char metadata[20];
int uuid[4];
+ int bad;
char *path;
};
extern int map_update(struct map_ent **mpp, int devnum, char *metadata,
int uuid[4], char *path);
extern struct map_ent *map_by_uuid(struct map_ent **map, int uuid[4]);
+extern struct map_ent *map_by_devnum(struct map_ent **map, int devnum);
+extern struct map_ent *map_by_name(struct map_ent **map, char *name);
extern void map_read(struct map_ent **melp);
extern int map_write(struct map_ent *mel);
extern void map_delete(struct map_ent **mapp, int devnum);
extern void map_free(struct map_ent *map);
extern void map_add(struct map_ent **melp,
int devnum, char *metadata, int uuid[4], char *path);
+extern int map_lock(struct map_ent **melp);
+extern void map_unlock(struct map_ent **melp);
/* various details can be requested */
-#define GET_LEVEL 1
-#define GET_LAYOUT 2
-#define GET_COMPONENT 4
-#define GET_CHUNK 8
-#define GET_CACHE 16
-#define GET_MISMATCH 32
-#define GET_VERSION 64
-#define GET_DISKS 128
-#define GET_DEGRADED 256
-#define GET_SAFEMODE 512
-
-#define GET_DEVS 1024 /* gets role, major, minor */
-#define GET_OFFSET 2048
-#define GET_SIZE 4096
-#define GET_STATE 8192
-#define GET_ERROR 16384
+enum sysfs_read_flags {
+ GET_LEVEL = (1 << 0),
+ GET_LAYOUT = (1 << 1),
+ GET_COMPONENT = (1 << 2),
+ GET_CHUNK = (1 << 3),
+ GET_CACHE = (1 << 4),
+ GET_MISMATCH = (1 << 5),
+ GET_VERSION = (1 << 6),
+ GET_DISKS = (1 << 7),
+ GET_DEGRADED = (1 << 8),
+ GET_SAFEMODE = (1 << 9),
+ GET_DEVS = (1 << 10), /* gets role, major, minor */
+ GET_OFFSET = (1 << 11),
+ GET_SIZE = (1 << 12),
+ GET_STATE = (1 << 13),
+ GET_ERROR = (1 << 14),
+ SKIP_GONE_DEVS = (1 << 15),
+};
/* If fd >= 0, get the array it is open on,
* else use devnum. >=0 -> major9. <0.....
char *name, char *val);
extern int sysfs_set_num(struct mdinfo *sra, struct mdinfo *dev,
char *name, unsigned long long val);
+extern int sysfs_uevent(struct mdinfo *sra, char *event);
+extern int sysfs_get_fd(struct mdinfo *sra, struct mdinfo *dev,
+ char *name);
+extern int sysfs_fd_get_ll(int fd, unsigned long long *val);
extern int sysfs_get_ll(struct mdinfo *sra, struct mdinfo *dev,
char *name, unsigned long long *val);
+extern int sysfs_fd_get_str(int fd, char *val, int size);
+extern int sysfs_get_str(struct mdinfo *sra, struct mdinfo *dev,
+ char *name, char *val, int size);
extern int sysfs_set_safemode(struct mdinfo *sra, unsigned long ms);
extern int sysfs_set_array(struct mdinfo *info, int vers);
-extern int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd);
+extern int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd, int resume);
extern int sysfs_disk_to_scsi_id(int fd, __u32 *id);
extern int sysfs_unique_holder(int devnum, long rdev);
extern int load_sys(char *path, char *buf);
extern int save_stripes(int *source, unsigned long long *offsets,
int raid_disks, int chunk_size, int level, int layout,
int nwrites, int *dest,
- unsigned long long start, unsigned long long length);
+ unsigned long long start, unsigned long long length,
+ char *buf);
extern int restore_stripes(int *dest, unsigned long long *offsets,
int raid_disks, int chunk_size, int level, int layout,
int source, unsigned long long read_offset,
extern char *map_num(mapping_t *map, int num);
extern int map_name(mapping_t *map, char *name);
-extern mapping_t r5layout[], pers[], modes[], faultylayout[];
+extern mapping_t r5layout[], r6layout[], pers[], modes[], faultylayout[];
extern char *map_dev(int major, int minor, int create);
* device. ->load_super has been called.
*/
void (*examine_super)(struct supertype *st, char *homehost);
- void (*brief_examine_super)(struct supertype *st);
+ void (*brief_examine_super)(struct supertype *st, int verbose);
+ void (*brief_examine_subarrays)(struct supertype *st, int verbose);
void (*export_examine_super)(struct supertype *st);
/* Used to report details of an active array.
void (*brief_detail_super)(struct supertype *st);
void (*export_detail_super)(struct supertype *st);
+ /* Optional: platform hardware / firmware details */
+ int (*detail_platform)(int verbose, int enumerate_only);
+
/* Used:
* to get uuid to storing in bitmap metadata
* and 'reshape' backup-data metadata
*/
void (*uuid_from_super)(struct supertype *st, int uuid[4]);
- /* Extra generic details from metadata. This could be details about
+ /* Extract generic details from metadata. This could be details about
* the container, or about an individual array within the container.
* The determination is made either by:
* load_super being given a 'component' string.
/* update the metadata to include new device, either at create or
* when hot-adding a spare.
*/
- void (*add_to_super)(struct supertype *st, mdu_disk_info_t *dinfo,
+ int (*add_to_super)(struct supertype *st, mdu_disk_info_t *dinfo,
int fd, char *devname);
/* Write metadata to one device when fixing problems or adding
* added to validate changing size and new devices. If there are
* inter-device dependencies, it should record sufficient details
* so these can be validated.
+ * Both 'size' and '*freesize' are in sectors. chunk is bytes.
*/
int (*validate_geometry)(struct supertype *st, int level, int layout,
int raiddisks,
int verbose);
struct mdinfo *(*container_content)(struct supertype *st);
+ /* Allow a metadata handler to override mdadm's default layouts */
+ int (*default_layout)(int level); /* optional */
/* for mdmon */
int (*open_new)(struct supertype *c, struct active_array *a,
int swapuuid; /* true if uuid is bigending rather than hostendian */
int external;
+ const char *name; /* canonical metadata name */
} super0, super1, super_ddf, *superlist[];
extern struct superswitch super_imsm;
* external:/md0/12
*/
int devcnt;
- char *device_name; /* e.g. /dev/md/whatever */
struct mdinfo *devs;
extern int get_dev_size(int fd, char *dname, unsigned long long *sizep);
extern void get_one_disk(int mdfd, mdu_array_info_t *ainf,
mdu_disk_info_t *disk);
+void wait_for(char *dev, int fd);
#if __GNUC__ < 3
struct stat64;
#define HAVE_NFTW we assume
#define HAVE_FTW
-#ifdef UCLIBC
+#ifdef __UCLIBC__
# include <features.h>
+# ifndef __UCLIBC_HAS_LFS__
+# define lseek64 lseek
+# endif
# ifndef __UCLIBC_HAS_FTW__
# undef HAVE_FTW
# undef HAVE_NFTW
extern int Manage_ro(char *devname, int fd, int readonly);
extern int Manage_runstop(char *devname, int fd, int runstop, int quiet);
extern int Manage_resize(char *devname, int fd, long long size, int raid_disks);
-extern int Manage_reconfig(char *devname, int fd, int layout);
extern int Manage_subdevs(char *devname, int fd,
mddev_dev_t devlist, int verbose);
extern int autodetect(void);
extern int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int write_behind, int force);
extern int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
long long size,
- int level, int layout, int chunksize, int raid_disks);
+ int level, char *layout_str, int chunksize, int raid_disks);
extern int Grow_restart(struct supertype *st, struct mdinfo *info,
- int *fdlist, int cnt, char *backup_file);
-
+ int *fdlist, int cnt, char *backup_file, int verbose);
+extern int Grow_continue(int mdfd, struct supertype *st,
+ struct mdinfo *info, char *backup_file);
-extern int Assemble(struct supertype *st, char *mddev, int mdfd,
+extern int Assemble(struct supertype *st, char *mddev,
mddev_ident_t ident,
mddev_dev_t devlist, char *backup_file,
int readonly, int runstop,
- char *update, char *homehost,
+ char *update, char *homehost, int require_homehost,
int verbose, int force);
-extern int Build(char *mddev, int mdfd, int chunk, int level, int layout,
- int raiddisks,
- mddev_dev_t devlist, int assume_clean,
- char *bitmap_file, int bitmap_chunk, int write_behind, int delay, int verbose);
+extern int Build(char *mddev, int chunk, int level, int layout,
+ int raiddisks, mddev_dev_t devlist, int assume_clean,
+ char *bitmap_file, int bitmap_chunk, int write_behind,
+ int delay, int verbose, int autof, unsigned long long size);
-extern int Create(struct supertype *st, char *mddev, int mdfd,
+extern int Create(struct supertype *st, char *mddev,
int chunk, int level, int layout, unsigned long long size, int raiddisks, int sparedisks,
char *name, char *homehost, int *uuid,
int subdevs, mddev_dev_t devlist,
int runstop, int verbose, int force, int assume_clean,
- char *bitmap_file, int bitmap_chunk, int write_behind, int delay);
+ char *bitmap_file, int bitmap_chunk, int write_behind, int delay, int autof);
extern int Detail(char *dev, int brief, int export, int test, char *homehost);
+extern int Detail_Platform(struct superswitch *ss, int scan, int verbose);
extern int Query(char *dev);
extern int Examine(mddev_dev_t devlist, int brief, int export, int scan,
int SparcAdjust, struct supertype *forcest, char *homehost);
extern int Monitor(mddev_dev_t devlist,
char *mailaddr, char *alert_cmd,
int period, int daemonise, int scan, int oneshot,
- int dosyslog, int test, char *pidfile);
+ int dosyslog, int test, char *pidfile, int increments);
-extern int Kill(char *dev, int force, int quiet, int noexcl);
+extern int Kill(char *dev, struct supertype *st, int force, int quiet, int noexcl);
extern int Wait(char *dev);
-extern int WaitClean(char *dev, int verbose);
+extern int WaitClean(char *dev, int sock, int verbose);
extern int Incremental(char *devname, int verbose, int runstop,
- struct supertype *st, char *homehost, int autof);
+ struct supertype *st, char *homehost, int require_homehost,
+ int autof);
extern int Incremental_container(struct supertype *st, char *devname,
- int verbose, int runstop, int autof);
+ int verbose, int runstop, int autof,
+ int trustworthy);
extern void RebuildMap(void);
extern int IncrementalScan(int verbose);
extern int md_get_version(int fd);
extern int get_linux_version(void);
+extern long long parse_size(char *size);
extern int parse_uuid(char *str, int uuid[4]);
+extern int parse_layout_10(char *layout);
+extern int parse_layout_faulty(char *layout);
extern int check_ext2(int fd, char *name);
extern int check_reiser(int fd, char *name);
extern int check_raid(int fd, char *name);
+extern int check_partitions(int fd, char *dname, unsigned long long freesize);
extern int get_mdp_major(void);
extern int dev_open(char *dev, int flags);
+extern int open_dev(int devnum);
extern int open_dev_excl(int devnum);
extern int is_standard(char *dev, int *nump);
+extern int same_dev(char *one, char *two);
extern int parse_auto(char *str, char *msg, int config);
extern mddev_ident_t conf_get_ident(char *dev);
extern mddev_dev_t conf_get_devs(void);
extern int conf_test_dev(char *devname);
+extern int conf_test_metadata(const char *version);
extern struct createinfo *conf_get_create_info(void);
extern void set_conffile(char *file);
extern char *conf_get_mailaddr(void);
extern char *conf_get_mailfrom(void);
extern char *conf_get_program(void);
-extern char *conf_get_homehost(void);
+extern char *conf_get_homehost(int *require_homehostp);
extern char *conf_line(FILE *file);
extern char *conf_word(FILE *file, int allow_key);
+extern int conf_name_is_free(char *name);
+extern int devname_matches(char *name, char *match);
+extern struct mddev_ident_s *conf_match(struct mdinfo *info, struct supertype *st);
+
extern void free_line(char *line);
extern int match_oneof(char *devices, char *devname);
extern void uuid_from_super(int uuid[4], mdp_super_t *super);
extern const int uuid_match_any[4];
extern int same_uuid(int a[4], int b[4], int swapuuid);
extern void copy_uuid(void *a, int b[4], int swapuuid);
+extern char *__fname_from_uuid(int id[4], int swap, char *buf, char sep);
extern char *fname_from_uuid(struct supertype *st,
struct mdinfo *info, char *buf, char sep);
extern unsigned long calc_csum(void *super, int bytes);
int chunksize, unsigned long long devsize);
extern int flush_metadata_updates(struct supertype *st);
extern void append_metadata_update(struct supertype *st, void *buf, int len);
-extern struct superswitch *find_metadata_methods(char *vers);
+extern int assemble_container_content(struct supertype *st, int mdfd,
+ struct mdinfo *content, int runstop,
+ char *chosen_name, int verbose);
extern int add_disk(int mdfd, struct supertype *st,
struct mdinfo *sra, struct mdinfo *info);
extern int set_array_info(int mdfd, struct supertype *st, struct mdinfo *info);
+unsigned long long min_recovery_start(struct mdinfo *array);
extern char *human_size(long long bytes);
extern char *human_size_brief(long long bytes);
extern char DefaultConfFile[];
-extern int open_mddev(char *dev, int autof);
-extern int open_mddev_devnum(char *devname, int devnum, char *name,
- char *chosen_name, int parts);
+extern int create_mddev(char *dev, char *name, int autof, int trustworthy,
+ char *chosen);
+/* values for 'trustworthy' */
+#define LOCAL 1
+#define FOREIGN 2
+#define METADATA 3
+extern int open_mddev(char *dev, int report_errors);
extern int open_container(int fd);
+extern char *pid_dir;
extern int mdmon_running(int devnum);
-extern int signal_mdmon(int devnum);
-extern int env_no_mdmon(void);
+extern int mdmon_pid(int devnum);
+extern int check_env(char *name);
+extern __u32 random32(void);
extern int start_mdmon(int devnum);
extern char *devnum2devname(int num);
#define dprintf(fmt, arg...) \
({ if (0) fprintf(stderr, fmt, ##arg); 0; })
#endif
+#include <assert.h>
+#include <stdarg.h>
+static inline int xasprintf(char **strp, const char *fmt, ...) {
+ va_list ap;
+ int ret;
+ va_start(ap, fmt);
+ ret = vasprintf(strp, fmt, ap);
+ va_end(ap);
+ assert(ret >= 0);
+ return ret;
+}
#define LEVEL_MULTIPATH (-4)
#define LEVEL_LINEAR (-1)
#define makedev(M,m) (((M)<<8) | (m))
#endif
-/* for raid5 */
+/* for raid4/5/6 */
#define ALGORITHM_LEFT_ASYMMETRIC 0
#define ALGORITHM_RIGHT_ASYMMETRIC 1
#define ALGORITHM_LEFT_SYMMETRIC 2
#define ALGORITHM_RIGHT_SYMMETRIC 3
+
+/* Define non-rotating (raid4) algorithms. These allow
+ * conversion of raid4 to raid5.
+ */
+#define ALGORITHM_PARITY_0 4 /* P or P,Q are initial devices */
+#define ALGORITHM_PARITY_N 5 /* P or P,Q are final devices. */
+
+/* DDF RAID6 layouts differ from md/raid6 layouts in two ways.
+ * Firstly, the exact positioning of the parity block is slightly
+ * different between the 'LEFT_*' modes of md and the "_N_*" modes
+ * of DDF.
+ * Secondly, or order of datablocks over which the Q syndrome is computed
+ * is different.
+ * Consequently we have different layouts for DDF/raid6 than md/raid6.
+ * These layouts are from the DDFv1.2 spec.
+ * Interestingly DDFv1.2-Errata-A does not specify N_CONTINUE but
+ * leaves RLQ=3 as 'Vendor Specific'
+ */
+
+#define ALGORITHM_ROTATING_ZERO_RESTART 8 /* DDF PRL=6 RLQ=1 */
+#define ALGORITHM_ROTATING_N_RESTART 9 /* DDF PRL=6 RLQ=2 */
+#define ALGORITHM_ROTATING_N_CONTINUE 10 /*DDF PRL=6 RLQ=3 */
+
+
+/* For every RAID5 algorithm we define a RAID6 algorithm
+ * with exactly the same layout for data and parity, and
+ * with the Q block always on the last device (N-1).
+ * This allows trivial conversion from RAID5 to RAID6
+ */
+#define ALGORITHM_LEFT_ASYMMETRIC_6 16
+#define ALGORITHM_RIGHT_ASYMMETRIC_6 17
+#define ALGORITHM_LEFT_SYMMETRIC_6 18
+#define ALGORITHM_RIGHT_SYMMETRIC_6 19
+#define ALGORITHM_PARITY_0_6 20
+#define ALGORITHM_PARITY_N_6 ALGORITHM_PARITY_N
+
+/* Define PATH_MAX in case we don't use glibc or standard library does
+ * not have PATH_MAX defined. Assume max path length is 4K characters.
+ */
+#ifndef PATH_MAX
+#define PATH_MAX 4096
+#endif
+