mdmon: terminate clean
[thirdparty/mdadm.git] / mdmon.c
CommitLineData
549e9569
NB
1
2/*
3 * md array manager.
4 * When md arrays have user-space managed metadata, this is the program
5 * that does the managing.
6 *
7 * Given one argument: the name of the array (e.g. /dev/md0) that is
8 * the container.
9 * We fork off a helper that runs high priority and mlocked. It responds to
10 * device failures and other events that might stop writeout, or that are
11 * trivial to deal with.
12 * The main thread then watches for new arrays being created in the container
13 * and starts monitoring them too ... along with a few other tasks.
14 *
15 * The main thread communicates with the priority thread by writing over
16 * a pipe.
17 * Separate programs can communicate with the main thread via Unix-domain
18 * socket.
19 * The two threads share address space and open file table.
20 *
21 */
22
23#ifndef _GNU_SOURCE
24#define _GNU_SOURCE
25#endif
26
27#include <unistd.h>
28#include <stdlib.h>
4d43913c 29#include <sys/types.h>
549e9569
NB
30#include <sys/stat.h>
31#include <sys/socket.h>
32#include <sys/un.h>
33#include <sys/mman.h>
4d43913c 34#include <sys/syscall.h>
9fe32043 35#include <sys/wait.h>
549e9569
NB
36#include <stdio.h>
37#include <errno.h>
38#include <string.h>
39#include <fcntl.h>
b109d928 40#include <signal.h>
549e9569
NB
41
42#include <sched.h>
43
44#include "mdadm.h"
45#include "mdmon.h"
46
549e9569
NB
47struct active_array *discard_this;
48struct active_array *pending_discard;
4d43913c
NB
49
50int mon_tid, mgr_tid;
549e9569 51
6144ed44
DW
52int sigterm;
53
549e9569
NB
54int run_child(void *v)
55{
56 struct supertype *c = v;
1ed3f387 57
549e9569
NB
58 do_monitor(c);
59 return 0;
60}
61
62int clone_monitor(struct supertype *container)
63{
549e9569 64 static char stack[4096];
549e9569 65
2cc98f9e 66 mon_tid = clone(run_child, stack+4096-64,
549e9569
NB
67 CLONE_FS|CLONE_FILES|CLONE_VM|CLONE_SIGHAND|CLONE_THREAD,
68 container);
3e70c845 69
4d43913c 70 mgr_tid = syscall(SYS_gettid);
2cc98f9e
DW
71
72 return mon_tid;
549e9569
NB
73}
74
75static struct superswitch *find_metadata_methods(char *vers)
76{
77 if (strcmp(vers, "ddf") == 0)
78 return &super_ddf;
5b65005f
DW
79 if (strcmp(vers, "imsm") == 0)
80 return &super_imsm;
549e9569
NB
81 return NULL;
82}
83
84
295646b3 85int make_pidfile(char *devname, int o_excl)
549e9569
NB
86{
87 char path[100];
88 char pid[10];
89 int fd;
3d2c4fc7
DW
90 int n;
91
6144ed44
DW
92 if (sigterm)
93 return -1;
94
549e9569
NB
95 sprintf(path, "/var/run/mdadm/%s.pid", devname);
96
b109d928 97 fd = open(path, O_RDWR|O_CREAT|o_excl, 0600);
549e9569 98 if (fd < 0)
295646b3 99 return -errno;
549e9569 100 sprintf(pid, "%d\n", getpid());
3d2c4fc7 101 n = write(fd, pid, strlen(pid));
549e9569 102 close(fd);
3d2c4fc7
DW
103 if (n < 0)
104 return -errno;
549e9569
NB
105 return 0;
106}
107
b109d928
DW
108static void try_kill_monitor(char *devname)
109{
110 char buf[100];
111 int fd;
112 pid_t pid;
113
114 sprintf(buf, "/var/run/mdadm/%s.pid", devname);
115 fd = open(buf, O_RDONLY);
116 if (fd < 0)
117 return;
118
119 if (read(fd, buf, sizeof(buf)) < 0) {
120 close(fd);
121 return;
122 }
123
124 close(fd);
125 pid = strtoul(buf, NULL, 10);
126
127 /* kill this process if it is mdmon */
128 sprintf(buf, "/proc/%lu/cmdline", (unsigned long) pid);
129 fd = open(buf, O_RDONLY);
130 if (fd < 0)
131 return;
132
133 if (read(fd, buf, sizeof(buf)) < 0) {
134 close(fd);
135 return;
136 }
137
138 if (strstr(buf, "mdmon") != NULL)
139 kill(pid, SIGTERM);
140}
141
e0d6609f
NB
142void remove_pidfile(char *devname)
143{
144 char buf[100];
145
6144ed44
DW
146 if (sigterm)
147 return;
148
e0d6609f
NB
149 sprintf(buf, "/var/run/mdadm/%s.pid", devname);
150 unlink(buf);
57752795
N
151 sprintf(buf, "/var/run/mdadm/%s.sock", devname);
152 unlink(buf);
e0d6609f
NB
153}
154
295646b3 155int make_control_sock(char *devname)
549e9569
NB
156{
157 char path[100];
158 int sfd;
159 long fl;
160 struct sockaddr_un addr;
161
6144ed44
DW
162 if (sigterm)
163 return -1;
164
549e9569
NB
165 sprintf(path, "/var/run/mdadm/%s.sock", devname);
166 unlink(path);
167 sfd = socket(PF_LOCAL, SOCK_STREAM, 0);
168 if (sfd < 0)
169 return -1;
170
171 addr.sun_family = PF_LOCAL;
172 strcpy(addr.sun_path, path);
173 if (bind(sfd, &addr, sizeof(addr)) < 0) {
174 close(sfd);
175 return -1;
176 }
177 listen(sfd, 10);
178 fl = fcntl(sfd, F_GETFL, 0);
179 fl |= O_NONBLOCK;
180 fcntl(sfd, F_SETFL, fl);
181 return sfd;
182}
183
295646b3
DW
184int socket_hup_requested;
185static void hup(int sig)
186{
187 socket_hup_requested = 1;
188}
189
6144ed44
DW
190static void term(int sig)
191{
192 sigterm = 1;
193}
194
4d43913c
NB
195static void wake_me(int sig)
196{
197
198}
199
16ddab0d
DW
200/* if we are debugging and starting mdmon by hand then don't fork */
201static int do_fork(void)
202{
203 #ifdef DEBUG
204 if (env_no_mdmon())
205 return 0;
206 #endif
207
208 return 1;
209}
210
211
212
549e9569
NB
213int main(int argc, char *argv[])
214{
215 int mdfd;
549e9569
NB
216 struct mdinfo *mdi, *di;
217 struct supertype *container;
4d43913c 218 sigset_t set;
bfa44e2e 219 struct sigaction act;
9fe32043
N
220 int pfd[2];
221 int status;
3d2c4fc7 222 int ignore;
4d43913c 223
549e9569
NB
224 if (argc != 2) {
225 fprintf(stderr, "Usage: md-manage /device/name/for/container\n");
226 exit(2);
227 }
228 mdfd = open(argv[1], O_RDWR);
229 if (mdfd < 0) {
230 fprintf(stderr, "md-manage: %s: %s\n", argv[1],
231 strerror(errno));
232 exit(1);
233 }
234 if (md_get_version(mdfd) < 0) {
235 fprintf(stderr, "md-manage: %s: Not an md device\n",
236 argv[1]);
237 exit(1);
238 }
239
9fe32043 240 /* Fork, and have the child tell us when they are ready */
16ddab0d 241 if (do_fork()) {
3d2c4fc7
DW
242 if (pipe(pfd) != 0) {
243 fprintf(stderr, "mdmon: failed to create pipe\n");
244 exit(1);
245 }
16ddab0d
DW
246 switch(fork()) {
247 case -1:
248 fprintf(stderr, "mdmon: failed to fork: %s\n",
249 strerror(errno));
250 exit(1);
251 case 0: /* child */
252 close(pfd[0]);
253 break;
254 default: /* parent */
255 close(pfd[1]);
256 if (read(pfd[0], &status, sizeof(status)) != sizeof(status)) {
257 wait(&status);
258 status = WEXITSTATUS(status);
259 }
260 exit(status);
9fe32043 261 }
16ddab0d
DW
262 } else
263 pfd[0] = pfd[1] = -1;
549e9569
NB
264 /* hopefully it is a container - we'll check later */
265
266 container = malloc(sizeof(*container));
549e9569
NB
267 container->devnum = fd2devnum(mdfd);
268 container->devname = devnum2devname(container->devnum);
e0d6609f 269 container->device_name = argv[1];
549e9569
NB
270
271 /* If this fails, we hope it already exists */
272 mkdir("/var/run/mdadm", 0600);
273 /* pid file lives in /var/run/mdadm/mdXX.pid */
b109d928
DW
274 if (make_pidfile(container->devname, O_EXCL) < 0) {
275 if (ping_monitor(container->devname) == 0) {
276 fprintf(stderr, "mdmon: %s already managed\n",
277 container->devname);
278 exit(3);
279 } else {
295646b3
DW
280 int err;
281
b109d928
DW
282 /* cleanup the old monitor, this one is taking over */
283 try_kill_monitor(container->devname);
295646b3
DW
284 err = make_pidfile(container->devname, 0);
285 if (err < 0) {
b109d928
DW
286 fprintf(stderr, "mdmon: %s Cannot create pidfile\n",
287 container->devname);
295646b3
DW
288 if (err == -EROFS) {
289 /* FIXME implement a mechanism to
290 * prevent duplicate monitor instances
291 */
292 fprintf(stderr,
293 "mdmon: continuing on read-only file system\n");
294 } else
295 exit(3);
b109d928
DW
296 }
297 }
549e9569
NB
298 }
299
300 container->sock = make_control_sock(container->devname);
549e9569
NB
301 container->arrays = NULL;
302
303 mdi = sysfs_read(mdfd, container->devnum,
304 GET_VERSION|GET_LEVEL|GET_DEVS);
305
306 if (!mdi) {
307 fprintf(stderr, "mdmon: failed to load sysfs info for %s\n",
308 container->devname);
309 exit(3);
310 }
311 if (mdi->array.level != UnSet) {
312 fprintf(stderr, "mdmon: %s is not a container - cannot monitor\n",
313 argv[1]);
314 exit(3);
315 }
316 if (mdi->array.major_version != -1 ||
317 mdi->array.minor_version != -2) {
318 fprintf(stderr, "mdmon: %s does not use external metadata - cannot monitor\n",
319 argv[1]);
320 exit(3);
321 }
322
323 container->ss = find_metadata_methods(mdi->text_version);
324 if (container->ss == NULL) {
325 fprintf(stderr, "mdmon: %s uses unknown metadata: %s\n",
326 argv[1], mdi->text_version);
327 exit(3);
328 }
329
330 container->devs = NULL;
331 for (di = mdi->devs; di; di = di->next) {
332 struct mdinfo *cd = malloc(sizeof(*cd));
7bc1962f 333 *cd = *di;
549e9569
NB
334 cd->next = container->devs;
335 container->devs = cd;
336 }
337 sysfs_free(mdi);
338
339
340 if (container->ss->load_super(container, mdfd, argv[1])) {
341 fprintf(stderr, "mdmon: Cannot load metadata for %s\n",
342 argv[1]);
343 exit(3);
344 }
549e9569 345
9fe32043
N
346 /* Ok, this is close enough. We can say goodbye to our parent now.
347 */
348 status = 0;
3d2c4fc7
DW
349 if (write(pfd[1], &status, sizeof(status)) < 0)
350 fprintf(stderr, "mdmon: failed to notify our parent: %d\n",
351 getppid());
9fe32043
N
352 close(pfd[1]);
353
3d2c4fc7 354 ignore = chdir("/");
9fe32043
N
355 setsid();
356 close(0);
357 open("/dev/null", O_RDWR);
358 close(1);
3d2c4fc7 359 ignore = dup(0);
9fe32043
N
360#ifndef DEBUG
361 close(2);
3d2c4fc7 362 ignore = dup(0);
9fe32043
N
363#endif
364
549e9569
NB
365 mlockall(MCL_FUTURE);
366
4d43913c
NB
367 /* SIGUSR is sent between parent and child. So both block it
368 * and enable it only with pselect.
369 */
370 sigemptyset(&set);
371 sigaddset(&set, SIGUSR1);
295646b3 372 sigaddset(&set, SIGHUP);
695154b2 373 sigaddset(&set, SIGALRM);
6144ed44 374 sigaddset(&set, SIGTERM);
4d43913c 375 sigprocmask(SIG_BLOCK, &set, NULL);
bfa44e2e
NB
376 act.sa_handler = wake_me;
377 act.sa_flags = 0;
378 sigaction(SIGUSR1, &act, NULL);
695154b2 379 sigaction(SIGALRM, &act, NULL);
295646b3
DW
380 act.sa_handler = hup;
381 sigaction(SIGHUP, &act, NULL);
6144ed44
DW
382 act.sa_handler = term;
383 sigaction(SIGTERM, &act, NULL);
bfa44e2e
NB
384 act.sa_handler = SIG_IGN;
385 sigaction(SIGPIPE, &act, NULL);
4d43913c 386
3e70c845 387 if (clone_monitor(container) < 0) {
295646b3 388 fprintf(stderr, "mdmon: failed to start monitor process: %s\n",
549e9569
NB
389 strerror(errno));
390 exit(2);
391 }
392
393 do_manager(container);
394
395 exit(0);
396}