mdmon: wait after trying to kill
[thirdparty/mdadm.git] / mdmon.c
CommitLineData
549e9569
NB
1
2/*
3 * md array manager.
4 * When md arrays have user-space managed metadata, this is the program
5 * that does the managing.
6 *
7 * Given one argument: the name of the array (e.g. /dev/md0) that is
8 * the container.
9 * We fork off a helper that runs high priority and mlocked. It responds to
10 * device failures and other events that might stop writeout, or that are
11 * trivial to deal with.
12 * The main thread then watches for new arrays being created in the container
13 * and starts monitoring them too ... along with a few other tasks.
14 *
15 * The main thread communicates with the priority thread by writing over
16 * a pipe.
17 * Separate programs can communicate with the main thread via Unix-domain
18 * socket.
19 * The two threads share address space and open file table.
20 *
21 */
22
23#ifndef _GNU_SOURCE
24#define _GNU_SOURCE
25#endif
26
27#include <unistd.h>
28#include <stdlib.h>
4d43913c 29#include <sys/types.h>
549e9569
NB
30#include <sys/stat.h>
31#include <sys/socket.h>
32#include <sys/un.h>
33#include <sys/mman.h>
4d43913c 34#include <sys/syscall.h>
9fe32043 35#include <sys/wait.h>
549e9569
NB
36#include <stdio.h>
37#include <errno.h>
38#include <string.h>
39#include <fcntl.h>
b109d928 40#include <signal.h>
549e9569
NB
41
42#include <sched.h>
43
44#include "mdadm.h"
45#include "mdmon.h"
46
549e9569
NB
47struct active_array *discard_this;
48struct active_array *pending_discard;
4d43913c
NB
49
50int mon_tid, mgr_tid;
549e9569 51
6144ed44
DW
52int sigterm;
53
549e9569
NB
54int run_child(void *v)
55{
56 struct supertype *c = v;
1ed3f387 57
549e9569
NB
58 do_monitor(c);
59 return 0;
60}
61
62int clone_monitor(struct supertype *container)
63{
549e9569 64 static char stack[4096];
549e9569 65
2cc98f9e 66 mon_tid = clone(run_child, stack+4096-64,
549e9569
NB
67 CLONE_FS|CLONE_FILES|CLONE_VM|CLONE_SIGHAND|CLONE_THREAD,
68 container);
3e70c845 69
4d43913c 70 mgr_tid = syscall(SYS_gettid);
2cc98f9e
DW
71
72 return mon_tid;
549e9569
NB
73}
74
75static struct superswitch *find_metadata_methods(char *vers)
76{
77 if (strcmp(vers, "ddf") == 0)
78 return &super_ddf;
5b65005f
DW
79 if (strcmp(vers, "imsm") == 0)
80 return &super_imsm;
549e9569
NB
81 return NULL;
82}
83
84
295646b3 85int make_pidfile(char *devname, int o_excl)
549e9569
NB
86{
87 char path[100];
88 char pid[10];
89 int fd;
3d2c4fc7
DW
90 int n;
91
6144ed44
DW
92 if (sigterm)
93 return -1;
94
549e9569
NB
95 sprintf(path, "/var/run/mdadm/%s.pid", devname);
96
b109d928 97 fd = open(path, O_RDWR|O_CREAT|o_excl, 0600);
549e9569 98 if (fd < 0)
295646b3 99 return -errno;
549e9569 100 sprintf(pid, "%d\n", getpid());
3d2c4fc7 101 n = write(fd, pid, strlen(pid));
549e9569 102 close(fd);
3d2c4fc7
DW
103 if (n < 0)
104 return -errno;
549e9569
NB
105 return 0;
106}
107
883a6142
DW
108int is_container_member(struct mdstat_ent *mdstat, char *container)
109{
110 if (mdstat->metadata_version == NULL ||
111 strncmp(mdstat->metadata_version, "external:", 9) != 0 ||
112 !is_subarray(mdstat->metadata_version+9) ||
113 strncmp(mdstat->metadata_version+10, container, strlen(container)) != 0 ||
114 mdstat->metadata_version[10+strlen(container)] != '/')
115 return 0;
116
117 return 1;
118}
119
120void remove_pidfile(char *devname);
b109d928
DW
121static void try_kill_monitor(char *devname)
122{
123 char buf[100];
124 int fd;
125 pid_t pid;
883a6142 126 struct mdstat_ent *mdstat;
b109d928
DW
127
128 sprintf(buf, "/var/run/mdadm/%s.pid", devname);
129 fd = open(buf, O_RDONLY);
130 if (fd < 0)
131 return;
132
133 if (read(fd, buf, sizeof(buf)) < 0) {
134 close(fd);
135 return;
136 }
137
138 close(fd);
139 pid = strtoul(buf, NULL, 10);
140
141 /* kill this process if it is mdmon */
142 sprintf(buf, "/proc/%lu/cmdline", (unsigned long) pid);
143 fd = open(buf, O_RDONLY);
144 if (fd < 0)
145 return;
146
147 if (read(fd, buf, sizeof(buf)) < 0) {
148 close(fd);
149 return;
150 }
151
883a6142
DW
152 if (!strstr(buf, "mdmon"))
153 return;
154
155 kill(pid, SIGTERM);
156
157 mdstat = mdstat_read(0, 0);
158 for ( ; mdstat; mdstat = mdstat->next)
159 if (is_container_member(mdstat, devname)) {
160 sprintf(buf, "/dev/%s", mdstat->dev);
161 WaitClean(buf);
162 }
163 free_mdstat(mdstat);
164 remove_pidfile(devname);
b109d928
DW
165}
166
e0d6609f
NB
167void remove_pidfile(char *devname)
168{
169 char buf[100];
170
6144ed44
DW
171 if (sigterm)
172 return;
173
e0d6609f
NB
174 sprintf(buf, "/var/run/mdadm/%s.pid", devname);
175 unlink(buf);
57752795
N
176 sprintf(buf, "/var/run/mdadm/%s.sock", devname);
177 unlink(buf);
e0d6609f
NB
178}
179
295646b3 180int make_control_sock(char *devname)
549e9569
NB
181{
182 char path[100];
183 int sfd;
184 long fl;
185 struct sockaddr_un addr;
186
6144ed44
DW
187 if (sigterm)
188 return -1;
189
549e9569
NB
190 sprintf(path, "/var/run/mdadm/%s.sock", devname);
191 unlink(path);
192 sfd = socket(PF_LOCAL, SOCK_STREAM, 0);
193 if (sfd < 0)
194 return -1;
195
196 addr.sun_family = PF_LOCAL;
197 strcpy(addr.sun_path, path);
198 if (bind(sfd, &addr, sizeof(addr)) < 0) {
199 close(sfd);
200 return -1;
201 }
202 listen(sfd, 10);
203 fl = fcntl(sfd, F_GETFL, 0);
204 fl |= O_NONBLOCK;
205 fcntl(sfd, F_SETFL, fl);
206 return sfd;
207}
208
295646b3
DW
209int socket_hup_requested;
210static void hup(int sig)
211{
212 socket_hup_requested = 1;
213}
214
6144ed44
DW
215static void term(int sig)
216{
217 sigterm = 1;
218}
219
4d43913c
NB
220static void wake_me(int sig)
221{
222
223}
224
16ddab0d
DW
225/* if we are debugging and starting mdmon by hand then don't fork */
226static int do_fork(void)
227{
228 #ifdef DEBUG
229 if (env_no_mdmon())
230 return 0;
231 #endif
232
233 return 1;
234}
235
236
237
549e9569
NB
238int main(int argc, char *argv[])
239{
240 int mdfd;
549e9569
NB
241 struct mdinfo *mdi, *di;
242 struct supertype *container;
4d43913c 243 sigset_t set;
bfa44e2e 244 struct sigaction act;
9fe32043
N
245 int pfd[2];
246 int status;
3d2c4fc7 247 int ignore;
4d43913c 248
549e9569
NB
249 if (argc != 2) {
250 fprintf(stderr, "Usage: md-manage /device/name/for/container\n");
251 exit(2);
252 }
253 mdfd = open(argv[1], O_RDWR);
254 if (mdfd < 0) {
255 fprintf(stderr, "md-manage: %s: %s\n", argv[1],
256 strerror(errno));
257 exit(1);
258 }
259 if (md_get_version(mdfd) < 0) {
260 fprintf(stderr, "md-manage: %s: Not an md device\n",
261 argv[1]);
262 exit(1);
263 }
264
9fe32043 265 /* Fork, and have the child tell us when they are ready */
16ddab0d 266 if (do_fork()) {
3d2c4fc7
DW
267 if (pipe(pfd) != 0) {
268 fprintf(stderr, "mdmon: failed to create pipe\n");
269 exit(1);
270 }
16ddab0d
DW
271 switch(fork()) {
272 case -1:
273 fprintf(stderr, "mdmon: failed to fork: %s\n",
274 strerror(errno));
275 exit(1);
276 case 0: /* child */
277 close(pfd[0]);
278 break;
279 default: /* parent */
280 close(pfd[1]);
281 if (read(pfd[0], &status, sizeof(status)) != sizeof(status)) {
282 wait(&status);
283 status = WEXITSTATUS(status);
284 }
285 exit(status);
9fe32043 286 }
16ddab0d
DW
287 } else
288 pfd[0] = pfd[1] = -1;
549e9569
NB
289 /* hopefully it is a container - we'll check later */
290
291 container = malloc(sizeof(*container));
549e9569
NB
292 container->devnum = fd2devnum(mdfd);
293 container->devname = devnum2devname(container->devnum);
e0d6609f 294 container->device_name = argv[1];
549e9569 295
883a6142
DW
296 /* SIGUSR is sent between parent and child. So both block it
297 * and enable it only with pselect.
298 */
299 sigemptyset(&set);
300 sigaddset(&set, SIGUSR1);
301 sigaddset(&set, SIGHUP);
302 sigaddset(&set, SIGALRM);
303 sigaddset(&set, SIGTERM);
304 sigprocmask(SIG_BLOCK, &set, NULL);
305 act.sa_handler = wake_me;
306 act.sa_flags = 0;
307 sigaction(SIGUSR1, &act, NULL);
308 sigaction(SIGALRM, &act, NULL);
309 act.sa_handler = hup;
310 sigaction(SIGHUP, &act, NULL);
311 act.sa_handler = term;
312 sigaction(SIGTERM, &act, NULL);
313 act.sa_handler = SIG_IGN;
314 sigaction(SIGPIPE, &act, NULL);
315
549e9569
NB
316 /* If this fails, we hope it already exists */
317 mkdir("/var/run/mdadm", 0600);
318 /* pid file lives in /var/run/mdadm/mdXX.pid */
b109d928
DW
319 if (make_pidfile(container->devname, O_EXCL) < 0) {
320 if (ping_monitor(container->devname) == 0) {
321 fprintf(stderr, "mdmon: %s already managed\n",
322 container->devname);
323 exit(3);
324 } else {
295646b3
DW
325 int err;
326
b109d928
DW
327 /* cleanup the old monitor, this one is taking over */
328 try_kill_monitor(container->devname);
295646b3
DW
329 err = make_pidfile(container->devname, 0);
330 if (err < 0) {
b109d928
DW
331 fprintf(stderr, "mdmon: %s Cannot create pidfile\n",
332 container->devname);
295646b3
DW
333 if (err == -EROFS) {
334 /* FIXME implement a mechanism to
335 * prevent duplicate monitor instances
336 */
337 fprintf(stderr,
338 "mdmon: continuing on read-only file system\n");
339 } else
340 exit(3);
b109d928
DW
341 }
342 }
549e9569
NB
343 }
344
345 container->sock = make_control_sock(container->devname);
549e9569
NB
346 container->arrays = NULL;
347
348 mdi = sysfs_read(mdfd, container->devnum,
349 GET_VERSION|GET_LEVEL|GET_DEVS);
350
351 if (!mdi) {
352 fprintf(stderr, "mdmon: failed to load sysfs info for %s\n",
353 container->devname);
354 exit(3);
355 }
356 if (mdi->array.level != UnSet) {
357 fprintf(stderr, "mdmon: %s is not a container - cannot monitor\n",
358 argv[1]);
359 exit(3);
360 }
361 if (mdi->array.major_version != -1 ||
362 mdi->array.minor_version != -2) {
363 fprintf(stderr, "mdmon: %s does not use external metadata - cannot monitor\n",
364 argv[1]);
365 exit(3);
366 }
367
368 container->ss = find_metadata_methods(mdi->text_version);
369 if (container->ss == NULL) {
370 fprintf(stderr, "mdmon: %s uses unknown metadata: %s\n",
371 argv[1], mdi->text_version);
372 exit(3);
373 }
374
375 container->devs = NULL;
376 for (di = mdi->devs; di; di = di->next) {
377 struct mdinfo *cd = malloc(sizeof(*cd));
7bc1962f 378 *cd = *di;
549e9569
NB
379 cd->next = container->devs;
380 container->devs = cd;
381 }
382 sysfs_free(mdi);
383
384
385 if (container->ss->load_super(container, mdfd, argv[1])) {
386 fprintf(stderr, "mdmon: Cannot load metadata for %s\n",
387 argv[1]);
388 exit(3);
389 }
549e9569 390
9fe32043
N
391 /* Ok, this is close enough. We can say goodbye to our parent now.
392 */
393 status = 0;
3d2c4fc7
DW
394 if (write(pfd[1], &status, sizeof(status)) < 0)
395 fprintf(stderr, "mdmon: failed to notify our parent: %d\n",
396 getppid());
9fe32043
N
397 close(pfd[1]);
398
3d2c4fc7 399 ignore = chdir("/");
9fe32043
N
400 setsid();
401 close(0);
402 open("/dev/null", O_RDWR);
403 close(1);
3d2c4fc7 404 ignore = dup(0);
9fe32043
N
405#ifndef DEBUG
406 close(2);
3d2c4fc7 407 ignore = dup(0);
9fe32043
N
408#endif
409
549e9569
NB
410 mlockall(MCL_FUTURE);
411
3e70c845 412 if (clone_monitor(container) < 0) {
295646b3 413 fprintf(stderr, "mdmon: failed to start monitor process: %s\n",
549e9569
NB
414 strerror(errno));
415 exit(2);
416 }
417
418 do_manager(container);
419
420 exit(0);
421}