]> git.ipfire.org Git - thirdparty/mdadm.git/blob - mdmon.c
Don't allow spares when creating 'external' arrays.
[thirdparty/mdadm.git] / mdmon.c
1
2 /*
3 * md array manager.
4 * When md arrays have user-space managed metadata, this is the program
5 * that does the managing.
6 *
7 * Given one argument: the name of the array (e.g. /dev/md0) that is
8 * the container.
9 * We fork off a helper that runs high priority and mlocked. It responds to
10 * device failures and other events that might stop writeout, or that are
11 * trivial to deal with.
12 * The main thread then watches for new arrays being created in the container
13 * and starts monitoring them too ... along with a few other tasks.
14 *
15 * The main thread communicates with the priority thread by writing over
16 * a pipe.
17 * Separate programs can communicate with the main thread via Unix-domain
18 * socket.
19 * The two threads share address space and open file table.
20 *
21 */
22
23 #ifndef _GNU_SOURCE
24 #define _GNU_SOURCE
25 #endif
26
27 #include <unistd.h>
28 #include <stdlib.h>
29 #include <sys/types.h>
30 #include <sys/stat.h>
31 #include <sys/socket.h>
32 #include <sys/un.h>
33 #include <sys/mman.h>
34 #include <sys/syscall.h>
35 #include <sys/wait.h>
36 #include <stdio.h>
37 #include <errno.h>
38 #include <string.h>
39 #include <fcntl.h>
40 #include <signal.h>
41
42 #include <sched.h>
43
44 #include "mdadm.h"
45 #include "mdmon.h"
46
47 struct active_array *discard_this;
48 struct active_array *pending_discard;
49
50 int mon_tid, mgr_tid;
51
52 int run_child(void *v)
53 {
54 struct supertype *c = v;
55
56 do_monitor(c);
57 return 0;
58 }
59
60 int clone_monitor(struct supertype *container)
61 {
62 static char stack[4096];
63
64 mon_tid = clone(run_child, stack+4096-64,
65 CLONE_FS|CLONE_FILES|CLONE_VM|CLONE_SIGHAND|CLONE_THREAD,
66 container);
67
68 mgr_tid = syscall(SYS_gettid);
69
70 return mon_tid;
71 }
72
73 static struct superswitch *find_metadata_methods(char *vers)
74 {
75 if (strcmp(vers, "ddf") == 0)
76 return &super_ddf;
77 if (strcmp(vers, "imsm") == 0)
78 return &super_imsm;
79 return NULL;
80 }
81
82
83 int make_pidfile(char *devname, int o_excl)
84 {
85 char path[100];
86 char pid[10];
87 int fd;
88 sprintf(path, "/var/run/mdadm/%s.pid", devname);
89
90 fd = open(path, O_RDWR|O_CREAT|o_excl, 0600);
91 if (fd < 0)
92 return -errno;
93 sprintf(pid, "%d\n", getpid());
94 write(fd, pid, strlen(pid));
95 close(fd);
96 return 0;
97 }
98
99 static void try_kill_monitor(char *devname)
100 {
101 char buf[100];
102 int fd;
103 pid_t pid;
104
105 sprintf(buf, "/var/run/mdadm/%s.pid", devname);
106 fd = open(buf, O_RDONLY);
107 if (fd < 0)
108 return;
109
110 if (read(fd, buf, sizeof(buf)) < 0) {
111 close(fd);
112 return;
113 }
114
115 close(fd);
116 pid = strtoul(buf, NULL, 10);
117
118 /* kill this process if it is mdmon */
119 sprintf(buf, "/proc/%lu/cmdline", (unsigned long) pid);
120 fd = open(buf, O_RDONLY);
121 if (fd < 0)
122 return;
123
124 if (read(fd, buf, sizeof(buf)) < 0) {
125 close(fd);
126 return;
127 }
128
129 if (strstr(buf, "mdmon") != NULL)
130 kill(pid, SIGTERM);
131 }
132
133 void remove_pidfile(char *devname)
134 {
135 char buf[100];
136
137 sprintf(buf, "/var/run/mdadm/%s.pid", devname);
138 unlink(buf);
139 }
140
141 int make_control_sock(char *devname)
142 {
143 char path[100];
144 int sfd;
145 long fl;
146 struct sockaddr_un addr;
147
148 sprintf(path, "/var/run/mdadm/%s.sock", devname);
149 unlink(path);
150 sfd = socket(PF_LOCAL, SOCK_STREAM, 0);
151 if (sfd < 0)
152 return -1;
153
154 addr.sun_family = PF_LOCAL;
155 strcpy(addr.sun_path, path);
156 if (bind(sfd, &addr, sizeof(addr)) < 0) {
157 close(sfd);
158 return -1;
159 }
160 listen(sfd, 10);
161 fl = fcntl(sfd, F_GETFL, 0);
162 fl |= O_NONBLOCK;
163 fcntl(sfd, F_SETFL, fl);
164 return sfd;
165 }
166
167 int socket_hup_requested;
168 static void hup(int sig)
169 {
170 socket_hup_requested = 1;
171 }
172
173 static void wake_me(int sig)
174 {
175
176 }
177
178 /* if we are debugging and starting mdmon by hand then don't fork */
179 static int do_fork(void)
180 {
181 #ifdef DEBUG
182 if (env_no_mdmon())
183 return 0;
184 #endif
185
186 return 1;
187 }
188
189
190
191 int main(int argc, char *argv[])
192 {
193 int mdfd;
194 struct mdinfo *mdi, *di;
195 struct supertype *container;
196 sigset_t set;
197 struct sigaction act;
198 int pfd[2];
199 int status;
200
201 if (argc != 2) {
202 fprintf(stderr, "Usage: md-manage /device/name/for/container\n");
203 exit(2);
204 }
205 mdfd = open(argv[1], O_RDWR);
206 if (mdfd < 0) {
207 fprintf(stderr, "md-manage: %s: %s\n", argv[1],
208 strerror(errno));
209 exit(1);
210 }
211 if (md_get_version(mdfd) < 0) {
212 fprintf(stderr, "md-manage: %s: Not an md device\n",
213 argv[1]);
214 exit(1);
215 }
216
217 /* Fork, and have the child tell us when they are ready */
218 if (do_fork()) {
219 pipe(pfd);
220 switch(fork()) {
221 case -1:
222 fprintf(stderr, "mdmon: failed to fork: %s\n",
223 strerror(errno));
224 exit(1);
225 case 0: /* child */
226 close(pfd[0]);
227 break;
228 default: /* parent */
229 close(pfd[1]);
230 if (read(pfd[0], &status, sizeof(status)) != sizeof(status)) {
231 wait(&status);
232 status = WEXITSTATUS(status);
233 }
234 exit(status);
235 }
236 } else
237 pfd[0] = pfd[1] = -1;
238 /* hopefully it is a container - we'll check later */
239
240 container = malloc(sizeof(*container));
241 container->devnum = fd2devnum(mdfd);
242 container->devname = devnum2devname(container->devnum);
243 container->device_name = argv[1];
244
245 /* If this fails, we hope it already exists */
246 mkdir("/var/run/mdadm", 0600);
247 /* pid file lives in /var/run/mdadm/mdXX.pid */
248 if (make_pidfile(container->devname, O_EXCL) < 0) {
249 if (ping_monitor(container->devname) == 0) {
250 fprintf(stderr, "mdmon: %s already managed\n",
251 container->devname);
252 exit(3);
253 } else {
254 int err;
255
256 /* cleanup the old monitor, this one is taking over */
257 try_kill_monitor(container->devname);
258 err = make_pidfile(container->devname, 0);
259 if (err < 0) {
260 fprintf(stderr, "mdmon: %s Cannot create pidfile\n",
261 container->devname);
262 if (err == -EROFS) {
263 /* FIXME implement a mechanism to
264 * prevent duplicate monitor instances
265 */
266 fprintf(stderr,
267 "mdmon: continuing on read-only file system\n");
268 } else
269 exit(3);
270 }
271 }
272 }
273
274 container->sock = make_control_sock(container->devname);
275 if (container->sock < 0)
276 fprintf(stderr, "mdmon: Cannot create socket in /var/run/mdadm\n");
277 container->arrays = NULL;
278
279 mdi = sysfs_read(mdfd, container->devnum,
280 GET_VERSION|GET_LEVEL|GET_DEVS);
281
282 if (!mdi) {
283 fprintf(stderr, "mdmon: failed to load sysfs info for %s\n",
284 container->devname);
285 exit(3);
286 }
287 if (mdi->array.level != UnSet) {
288 fprintf(stderr, "mdmon: %s is not a container - cannot monitor\n",
289 argv[1]);
290 exit(3);
291 }
292 if (mdi->array.major_version != -1 ||
293 mdi->array.minor_version != -2) {
294 fprintf(stderr, "mdmon: %s does not use external metadata - cannot monitor\n",
295 argv[1]);
296 exit(3);
297 }
298
299 container->ss = find_metadata_methods(mdi->text_version);
300 if (container->ss == NULL) {
301 fprintf(stderr, "mdmon: %s uses unknown metadata: %s\n",
302 argv[1], mdi->text_version);
303 exit(3);
304 }
305
306 container->devs = NULL;
307 for (di = mdi->devs; di; di = di->next) {
308 struct mdinfo *cd = malloc(sizeof(*cd));
309 *cd = *di;
310 cd->next = container->devs;
311 container->devs = cd;
312 }
313 sysfs_free(mdi);
314
315
316 if (container->ss->load_super(container, mdfd, argv[1])) {
317 fprintf(stderr, "mdmon: Cannot load metadata for %s\n",
318 argv[1]);
319 exit(3);
320 }
321
322 /* Ok, this is close enough. We can say goodbye to our parent now.
323 */
324 status = 0;
325 write(pfd[1], &status, sizeof(status));
326 close(pfd[1]);
327
328 chdir("/");
329 setsid();
330 close(0);
331 open("/dev/null", O_RDWR);
332 close(1);
333 dup(0);
334 #ifndef DEBUG
335 close(2);
336 dup(0);
337 #endif
338
339 mlockall(MCL_FUTURE);
340
341 /* SIGUSR is sent between parent and child. So both block it
342 * and enable it only with pselect.
343 */
344 sigemptyset(&set);
345 sigaddset(&set, SIGUSR1);
346 sigaddset(&set, SIGHUP);
347 sigprocmask(SIG_BLOCK, &set, NULL);
348 act.sa_handler = wake_me;
349 act.sa_flags = 0;
350 sigaction(SIGUSR1, &act, NULL);
351 act.sa_handler = hup;
352 sigaction(SIGHUP, &act, NULL);
353 act.sa_handler = SIG_IGN;
354 sigaction(SIGPIPE, &act, NULL);
355
356 if (clone_monitor(container) < 0) {
357 fprintf(stderr, "mdmon: failed to start monitor process: %s\n",
358 strerror(errno));
359 exit(2);
360 }
361
362 do_manager(container);
363
364 exit(0);
365 }