]> git.ipfire.org Git - thirdparty/mdadm.git/blob - mdmon.c
Merge branch 'master' into devel-3.0
[thirdparty/mdadm.git] / mdmon.c
1
2 /*
3 * md array manager.
4 * When md arrays have user-space managed metadata, this is the program
5 * that does the managing.
6 *
7 * Given one argument: the name of the array (e.g. /dev/md0) that is
8 * the container.
9 * We fork off a helper that runs high priority and mlocked. It responds to
10 * device failures and other events that might stop writeout, or that are
11 * trivial to deal with.
12 * The main thread then watches for new arrays being created in the container
13 * and starts monitoring them too ... along with a few other tasks.
14 *
15 * The main thread communicates with the priority thread by writing over
16 * a pipe.
17 * Separate programs can communicate with the main thread via Unix-domain
18 * socket.
19 * The two threads share address space and open file table.
20 *
21 */
22
23 #ifndef _GNU_SOURCE
24 #define _GNU_SOURCE
25 #endif
26
27 #include <unistd.h>
28 #include <stdlib.h>
29 #include <sys/types.h>
30 #include <sys/stat.h>
31 #include <sys/socket.h>
32 #include <sys/un.h>
33 #include <sys/mman.h>
34 #include <sys/syscall.h>
35 #include <sys/wait.h>
36 #include <stdio.h>
37 #include <errno.h>
38 #include <string.h>
39 #include <fcntl.h>
40 #include <signal.h>
41 #include <dirent.h>
42
43 #include <sched.h>
44
45 #include "mdadm.h"
46 #include "mdmon.h"
47
48 struct active_array *discard_this;
49 struct active_array *pending_discard;
50
51 int mon_tid, mgr_tid;
52
53 int sigterm;
54
55 int run_child(void *v)
56 {
57 struct supertype *c = v;
58
59 do_monitor(c);
60 return 0;
61 }
62
63 int clone_monitor(struct supertype *container)
64 {
65 static char stack[4096];
66
67 mon_tid = clone(run_child, stack+4096-64,
68 CLONE_FS|CLONE_FILES|CLONE_VM|CLONE_SIGHAND|CLONE_THREAD,
69 container);
70
71 mgr_tid = syscall(SYS_gettid);
72
73 return mon_tid;
74 }
75
76 static struct superswitch *find_metadata_methods(char *vers)
77 {
78 if (strcmp(vers, "ddf") == 0)
79 return &super_ddf;
80 if (strcmp(vers, "imsm") == 0)
81 return &super_imsm;
82 return NULL;
83 }
84
85
86 int make_pidfile(char *devname, int o_excl)
87 {
88 char path[100];
89 char pid[10];
90 int fd;
91 int n;
92
93 if (sigterm)
94 return -1;
95
96 sprintf(path, "/var/run/mdadm/%s.pid", devname);
97
98 fd = open(path, O_RDWR|O_CREAT|o_excl, 0600);
99 if (fd < 0)
100 return -errno;
101 sprintf(pid, "%d\n", getpid());
102 n = write(fd, pid, strlen(pid));
103 close(fd);
104 if (n < 0)
105 return -errno;
106 return 0;
107 }
108
109 int is_container_member(struct mdstat_ent *mdstat, char *container)
110 {
111 if (mdstat->metadata_version == NULL ||
112 strncmp(mdstat->metadata_version, "external:", 9) != 0 ||
113 !is_subarray(mdstat->metadata_version+9) ||
114 strncmp(mdstat->metadata_version+10, container, strlen(container)) != 0 ||
115 mdstat->metadata_version[10+strlen(container)] != '/')
116 return 0;
117
118 return 1;
119 }
120
121 void remove_pidfile(char *devname);
122 static void try_kill_monitor(char *devname)
123 {
124 char buf[100];
125 int fd;
126 pid_t pid;
127 struct mdstat_ent *mdstat;
128
129 sprintf(buf, "/var/run/mdadm/%s.pid", devname);
130 fd = open(buf, O_RDONLY);
131 if (fd < 0)
132 return;
133
134 if (read(fd, buf, sizeof(buf)) < 0) {
135 close(fd);
136 return;
137 }
138
139 close(fd);
140 pid = strtoul(buf, NULL, 10);
141
142 /* first rule of survival... don't off yourself */
143 if (pid == getpid())
144 return;
145
146 /* kill this process if it is mdmon */
147 sprintf(buf, "/proc/%lu/cmdline", (unsigned long) pid);
148 fd = open(buf, O_RDONLY);
149 if (fd < 0)
150 return;
151
152 if (read(fd, buf, sizeof(buf)) < 0) {
153 close(fd);
154 return;
155 }
156
157 if (!strstr(buf, "mdmon"))
158 return;
159
160 kill(pid, SIGTERM);
161
162 mdstat = mdstat_read(0, 0);
163 for ( ; mdstat; mdstat = mdstat->next)
164 if (is_container_member(mdstat, devname)) {
165 sprintf(buf, "/dev/%s", mdstat->dev);
166 WaitClean(buf, 0);
167 }
168 free_mdstat(mdstat);
169 remove_pidfile(devname);
170 }
171
172 void remove_pidfile(char *devname)
173 {
174 char buf[100];
175
176 if (sigterm)
177 return;
178
179 sprintf(buf, "/var/run/mdadm/%s.pid", devname);
180 unlink(buf);
181 sprintf(buf, "/var/run/mdadm/%s.sock", devname);
182 unlink(buf);
183 }
184
185 int make_control_sock(char *devname)
186 {
187 char path[100];
188 int sfd;
189 long fl;
190 struct sockaddr_un addr;
191
192 if (sigterm)
193 return -1;
194
195 sprintf(path, "/var/run/mdadm/%s.sock", devname);
196 unlink(path);
197 sfd = socket(PF_LOCAL, SOCK_STREAM, 0);
198 if (sfd < 0)
199 return -1;
200
201 addr.sun_family = PF_LOCAL;
202 strcpy(addr.sun_path, path);
203 if (bind(sfd, &addr, sizeof(addr)) < 0) {
204 close(sfd);
205 return -1;
206 }
207 listen(sfd, 10);
208 fl = fcntl(sfd, F_GETFL, 0);
209 fl |= O_NONBLOCK;
210 fcntl(sfd, F_SETFL, fl);
211 return sfd;
212 }
213
214 int socket_hup_requested;
215 static void hup(int sig)
216 {
217 socket_hup_requested = 1;
218 }
219
220 static void term(int sig)
221 {
222 sigterm = 1;
223 }
224
225 static void wake_me(int sig)
226 {
227
228 }
229
230 /* if we are debugging and starting mdmon by hand then don't fork */
231 static int do_fork(void)
232 {
233 #ifdef DEBUG
234 if (env_no_mdmon())
235 return 0;
236 #endif
237
238 return 1;
239 }
240
241 void usage(void)
242 {
243 fprintf(stderr, "Usage: mdmon [--switch-root dir] /device/name/for/container\n");
244 exit(2);
245 }
246
247 int main(int argc, char *argv[])
248 {
249 int mdfd;
250 struct mdinfo *mdi, *di;
251 struct supertype *container;
252 sigset_t set;
253 struct sigaction act;
254 int pfd[2];
255 int status;
256 int ignore;
257 char *container_name = NULL;
258 char *switchroot = NULL;
259
260 switch (argc) {
261 case 2:
262 container_name = argv[1];
263 break;
264 case 4:
265 if (strcmp(argv[1], "--switch-root") != 0) {
266 fprintf(stderr, "mdmon: unknown argument %s\n", argv[1]);
267 usage();
268 }
269 switchroot = argv[2];
270 container_name = argv[3];
271 break;
272 default:
273 usage();
274 }
275
276 mdfd = open(container_name, O_RDWR);
277 if (mdfd < 0) {
278 fprintf(stderr, "mdmon: %s: %s\n", container_name,
279 strerror(errno));
280 exit(1);
281 }
282 if (md_get_version(mdfd) < 0) {
283 fprintf(stderr, "mdmon: %s: Not an md device\n",
284 container_name);
285 exit(1);
286 }
287
288 /* Fork, and have the child tell us when they are ready */
289 if (do_fork()) {
290 if (pipe(pfd) != 0) {
291 fprintf(stderr, "mdmon: failed to create pipe\n");
292 exit(1);
293 }
294 switch(fork()) {
295 case -1:
296 fprintf(stderr, "mdmon: failed to fork: %s\n",
297 strerror(errno));
298 exit(1);
299 case 0: /* child */
300 close(pfd[0]);
301 break;
302 default: /* parent */
303 close(pfd[1]);
304 if (read(pfd[0], &status, sizeof(status)) != sizeof(status)) {
305 wait(&status);
306 status = WEXITSTATUS(status);
307 }
308 exit(status);
309 }
310 } else
311 pfd[0] = pfd[1] = -1;
312
313 container = malloc(sizeof(*container));
314 container->devnum = fd2devnum(mdfd);
315 container->devname = devnum2devname(container->devnum);
316 container->device_name = container_name;
317 container->arrays = NULL;
318
319 if (!container->devname) {
320 fprintf(stderr, "mdmon: failed to allocate container name string\n");
321 exit(3);
322 }
323
324 mdi = sysfs_read(mdfd, container->devnum,
325 GET_VERSION|GET_LEVEL|GET_DEVS);
326
327 if (!mdi) {
328 fprintf(stderr, "mdmon: failed to load sysfs info for %s\n",
329 container->devname);
330 exit(3);
331 }
332 if (mdi->array.level != UnSet) {
333 fprintf(stderr, "mdmon: %s is not a container - cannot monitor\n",
334 container_name);
335 exit(3);
336 }
337 if (mdi->array.major_version != -1 ||
338 mdi->array.minor_version != -2) {
339 fprintf(stderr, "mdmon: %s does not use external metadata - cannot monitor\n",
340 container_name);
341 exit(3);
342 }
343
344 container->ss = find_metadata_methods(mdi->text_version);
345 if (container->ss == NULL) {
346 fprintf(stderr, "mdmon: %s uses unknown metadata: %s\n",
347 container_name, mdi->text_version);
348 exit(3);
349 }
350
351 container->devs = NULL;
352 for (di = mdi->devs; di; di = di->next) {
353 struct mdinfo *cd = malloc(sizeof(*cd));
354 *cd = *di;
355 cd->next = container->devs;
356 container->devs = cd;
357 }
358 sysfs_free(mdi);
359
360 /* SIGUSR is sent between parent and child. So both block it
361 * and enable it only with pselect.
362 */
363 sigemptyset(&set);
364 sigaddset(&set, SIGUSR1);
365 sigaddset(&set, SIGHUP);
366 sigaddset(&set, SIGALRM);
367 sigaddset(&set, SIGTERM);
368 sigprocmask(SIG_BLOCK, &set, NULL);
369 act.sa_handler = wake_me;
370 act.sa_flags = 0;
371 sigaction(SIGUSR1, &act, NULL);
372 sigaction(SIGALRM, &act, NULL);
373 act.sa_handler = hup;
374 sigaction(SIGHUP, &act, NULL);
375 act.sa_handler = term;
376 sigaction(SIGTERM, &act, NULL);
377 act.sa_handler = SIG_IGN;
378 sigaction(SIGPIPE, &act, NULL);
379
380 if (switchroot) {
381 /* we assume we assume that /sys /proc /dev are available in
382 * the new root (see nash:setuproot)
383 *
384 * kill any monitors in the current namespace and change
385 * to the new one
386 */
387 try_kill_monitor(container->devname);
388 if (chroot(switchroot) != 0) {
389 fprintf(stderr, "mdmon: failed to chroot to '%s': %s\n",
390 switchroot, strerror(errno));
391 exit(4);
392 }
393 }
394
395 /* If this fails, we hope it already exists
396 * pid file lives in /var/run/mdadm/mdXX.pid
397 */
398 mkdir("/var", 0600);
399 mkdir("/var/run", 0600);
400 mkdir("/var/run/mdadm", 0600);
401 ignore = chdir("/");
402 if (make_pidfile(container->devname, O_EXCL) < 0) {
403 if (ping_monitor(container->devname) == 0) {
404 fprintf(stderr, "mdmon: %s already managed\n",
405 container->devname);
406 exit(3);
407 } else {
408 int err;
409
410 /* cleanup the old monitor, this one is taking over */
411 try_kill_monitor(container->devname);
412 err = make_pidfile(container->devname, 0);
413 if (err < 0) {
414 fprintf(stderr, "mdmon: %s Cannot create pidfile\n",
415 container->devname);
416 if (err == -EROFS) {
417 /* FIXME implement a mechanism to
418 * prevent duplicate monitor instances
419 */
420 fprintf(stderr,
421 "mdmon: continuing on read-only file system\n");
422 } else
423 exit(3);
424 }
425 }
426 }
427 container->sock = make_control_sock(container->devname);
428
429 if (container->ss->load_super(container, mdfd, container_name)) {
430 fprintf(stderr, "mdmon: Cannot load metadata for %s\n",
431 container_name);
432 exit(3);
433 }
434
435 /* Ok, this is close enough. We can say goodbye to our parent now.
436 */
437 status = 0;
438 if (write(pfd[1], &status, sizeof(status)) < 0)
439 fprintf(stderr, "mdmon: failed to notify our parent: %d\n",
440 getppid());
441 close(pfd[1]);
442
443 setsid();
444 close(0);
445 open("/dev/null", O_RDWR);
446 close(1);
447 ignore = dup(0);
448 #ifndef DEBUG
449 close(2);
450 ignore = dup(0);
451 #endif
452
453 mlockall(MCL_FUTURE);
454
455 if (clone_monitor(container) < 0) {
456 fprintf(stderr, "mdmon: failed to start monitor process: %s\n",
457 strerror(errno));
458 exit(2);
459 }
460
461 do_manager(container);
462
463 exit(0);
464 }