]> git.ipfire.org Git - thirdparty/mdadm.git/blob - mdmon.c
mdmon: terminate clean
[thirdparty/mdadm.git] / mdmon.c
1
2 /*
3 * md array manager.
4 * When md arrays have user-space managed metadata, this is the program
5 * that does the managing.
6 *
7 * Given one argument: the name of the array (e.g. /dev/md0) that is
8 * the container.
9 * We fork off a helper that runs high priority and mlocked. It responds to
10 * device failures and other events that might stop writeout, or that are
11 * trivial to deal with.
12 * The main thread then watches for new arrays being created in the container
13 * and starts monitoring them too ... along with a few other tasks.
14 *
15 * The main thread communicates with the priority thread by writing over
16 * a pipe.
17 * Separate programs can communicate with the main thread via Unix-domain
18 * socket.
19 * The two threads share address space and open file table.
20 *
21 */
22
23 #ifndef _GNU_SOURCE
24 #define _GNU_SOURCE
25 #endif
26
27 #include <unistd.h>
28 #include <stdlib.h>
29 #include <sys/types.h>
30 #include <sys/stat.h>
31 #include <sys/socket.h>
32 #include <sys/un.h>
33 #include <sys/mman.h>
34 #include <sys/syscall.h>
35 #include <sys/wait.h>
36 #include <stdio.h>
37 #include <errno.h>
38 #include <string.h>
39 #include <fcntl.h>
40 #include <signal.h>
41
42 #include <sched.h>
43
44 #include "mdadm.h"
45 #include "mdmon.h"
46
47 struct active_array *discard_this;
48 struct active_array *pending_discard;
49
50 int mon_tid, mgr_tid;
51
52 int sigterm;
53
54 int run_child(void *v)
55 {
56 struct supertype *c = v;
57
58 do_monitor(c);
59 return 0;
60 }
61
62 int clone_monitor(struct supertype *container)
63 {
64 static char stack[4096];
65
66 mon_tid = clone(run_child, stack+4096-64,
67 CLONE_FS|CLONE_FILES|CLONE_VM|CLONE_SIGHAND|CLONE_THREAD,
68 container);
69
70 mgr_tid = syscall(SYS_gettid);
71
72 return mon_tid;
73 }
74
75 static struct superswitch *find_metadata_methods(char *vers)
76 {
77 if (strcmp(vers, "ddf") == 0)
78 return &super_ddf;
79 if (strcmp(vers, "imsm") == 0)
80 return &super_imsm;
81 return NULL;
82 }
83
84
85 int make_pidfile(char *devname, int o_excl)
86 {
87 char path[100];
88 char pid[10];
89 int fd;
90 int n;
91
92 if (sigterm)
93 return -1;
94
95 sprintf(path, "/var/run/mdadm/%s.pid", devname);
96
97 fd = open(path, O_RDWR|O_CREAT|o_excl, 0600);
98 if (fd < 0)
99 return -errno;
100 sprintf(pid, "%d\n", getpid());
101 n = write(fd, pid, strlen(pid));
102 close(fd);
103 if (n < 0)
104 return -errno;
105 return 0;
106 }
107
108 static void try_kill_monitor(char *devname)
109 {
110 char buf[100];
111 int fd;
112 pid_t pid;
113
114 sprintf(buf, "/var/run/mdadm/%s.pid", devname);
115 fd = open(buf, O_RDONLY);
116 if (fd < 0)
117 return;
118
119 if (read(fd, buf, sizeof(buf)) < 0) {
120 close(fd);
121 return;
122 }
123
124 close(fd);
125 pid = strtoul(buf, NULL, 10);
126
127 /* kill this process if it is mdmon */
128 sprintf(buf, "/proc/%lu/cmdline", (unsigned long) pid);
129 fd = open(buf, O_RDONLY);
130 if (fd < 0)
131 return;
132
133 if (read(fd, buf, sizeof(buf)) < 0) {
134 close(fd);
135 return;
136 }
137
138 if (strstr(buf, "mdmon") != NULL)
139 kill(pid, SIGTERM);
140 }
141
142 void remove_pidfile(char *devname)
143 {
144 char buf[100];
145
146 if (sigterm)
147 return;
148
149 sprintf(buf, "/var/run/mdadm/%s.pid", devname);
150 unlink(buf);
151 sprintf(buf, "/var/run/mdadm/%s.sock", devname);
152 unlink(buf);
153 }
154
155 int make_control_sock(char *devname)
156 {
157 char path[100];
158 int sfd;
159 long fl;
160 struct sockaddr_un addr;
161
162 if (sigterm)
163 return -1;
164
165 sprintf(path, "/var/run/mdadm/%s.sock", devname);
166 unlink(path);
167 sfd = socket(PF_LOCAL, SOCK_STREAM, 0);
168 if (sfd < 0)
169 return -1;
170
171 addr.sun_family = PF_LOCAL;
172 strcpy(addr.sun_path, path);
173 if (bind(sfd, &addr, sizeof(addr)) < 0) {
174 close(sfd);
175 return -1;
176 }
177 listen(sfd, 10);
178 fl = fcntl(sfd, F_GETFL, 0);
179 fl |= O_NONBLOCK;
180 fcntl(sfd, F_SETFL, fl);
181 return sfd;
182 }
183
184 int socket_hup_requested;
185 static void hup(int sig)
186 {
187 socket_hup_requested = 1;
188 }
189
190 static void term(int sig)
191 {
192 sigterm = 1;
193 }
194
195 static void wake_me(int sig)
196 {
197
198 }
199
200 /* if we are debugging and starting mdmon by hand then don't fork */
201 static int do_fork(void)
202 {
203 #ifdef DEBUG
204 if (env_no_mdmon())
205 return 0;
206 #endif
207
208 return 1;
209 }
210
211
212
213 int main(int argc, char *argv[])
214 {
215 int mdfd;
216 struct mdinfo *mdi, *di;
217 struct supertype *container;
218 sigset_t set;
219 struct sigaction act;
220 int pfd[2];
221 int status;
222 int ignore;
223
224 if (argc != 2) {
225 fprintf(stderr, "Usage: md-manage /device/name/for/container\n");
226 exit(2);
227 }
228 mdfd = open(argv[1], O_RDWR);
229 if (mdfd < 0) {
230 fprintf(stderr, "md-manage: %s: %s\n", argv[1],
231 strerror(errno));
232 exit(1);
233 }
234 if (md_get_version(mdfd) < 0) {
235 fprintf(stderr, "md-manage: %s: Not an md device\n",
236 argv[1]);
237 exit(1);
238 }
239
240 /* Fork, and have the child tell us when they are ready */
241 if (do_fork()) {
242 if (pipe(pfd) != 0) {
243 fprintf(stderr, "mdmon: failed to create pipe\n");
244 exit(1);
245 }
246 switch(fork()) {
247 case -1:
248 fprintf(stderr, "mdmon: failed to fork: %s\n",
249 strerror(errno));
250 exit(1);
251 case 0: /* child */
252 close(pfd[0]);
253 break;
254 default: /* parent */
255 close(pfd[1]);
256 if (read(pfd[0], &status, sizeof(status)) != sizeof(status)) {
257 wait(&status);
258 status = WEXITSTATUS(status);
259 }
260 exit(status);
261 }
262 } else
263 pfd[0] = pfd[1] = -1;
264 /* hopefully it is a container - we'll check later */
265
266 container = malloc(sizeof(*container));
267 container->devnum = fd2devnum(mdfd);
268 container->devname = devnum2devname(container->devnum);
269 container->device_name = argv[1];
270
271 /* If this fails, we hope it already exists */
272 mkdir("/var/run/mdadm", 0600);
273 /* pid file lives in /var/run/mdadm/mdXX.pid */
274 if (make_pidfile(container->devname, O_EXCL) < 0) {
275 if (ping_monitor(container->devname) == 0) {
276 fprintf(stderr, "mdmon: %s already managed\n",
277 container->devname);
278 exit(3);
279 } else {
280 int err;
281
282 /* cleanup the old monitor, this one is taking over */
283 try_kill_monitor(container->devname);
284 err = make_pidfile(container->devname, 0);
285 if (err < 0) {
286 fprintf(stderr, "mdmon: %s Cannot create pidfile\n",
287 container->devname);
288 if (err == -EROFS) {
289 /* FIXME implement a mechanism to
290 * prevent duplicate monitor instances
291 */
292 fprintf(stderr,
293 "mdmon: continuing on read-only file system\n");
294 } else
295 exit(3);
296 }
297 }
298 }
299
300 container->sock = make_control_sock(container->devname);
301 container->arrays = NULL;
302
303 mdi = sysfs_read(mdfd, container->devnum,
304 GET_VERSION|GET_LEVEL|GET_DEVS);
305
306 if (!mdi) {
307 fprintf(stderr, "mdmon: failed to load sysfs info for %s\n",
308 container->devname);
309 exit(3);
310 }
311 if (mdi->array.level != UnSet) {
312 fprintf(stderr, "mdmon: %s is not a container - cannot monitor\n",
313 argv[1]);
314 exit(3);
315 }
316 if (mdi->array.major_version != -1 ||
317 mdi->array.minor_version != -2) {
318 fprintf(stderr, "mdmon: %s does not use external metadata - cannot monitor\n",
319 argv[1]);
320 exit(3);
321 }
322
323 container->ss = find_metadata_methods(mdi->text_version);
324 if (container->ss == NULL) {
325 fprintf(stderr, "mdmon: %s uses unknown metadata: %s\n",
326 argv[1], mdi->text_version);
327 exit(3);
328 }
329
330 container->devs = NULL;
331 for (di = mdi->devs; di; di = di->next) {
332 struct mdinfo *cd = malloc(sizeof(*cd));
333 *cd = *di;
334 cd->next = container->devs;
335 container->devs = cd;
336 }
337 sysfs_free(mdi);
338
339
340 if (container->ss->load_super(container, mdfd, argv[1])) {
341 fprintf(stderr, "mdmon: Cannot load metadata for %s\n",
342 argv[1]);
343 exit(3);
344 }
345
346 /* Ok, this is close enough. We can say goodbye to our parent now.
347 */
348 status = 0;
349 if (write(pfd[1], &status, sizeof(status)) < 0)
350 fprintf(stderr, "mdmon: failed to notify our parent: %d\n",
351 getppid());
352 close(pfd[1]);
353
354 ignore = chdir("/");
355 setsid();
356 close(0);
357 open("/dev/null", O_RDWR);
358 close(1);
359 ignore = dup(0);
360 #ifndef DEBUG
361 close(2);
362 ignore = dup(0);
363 #endif
364
365 mlockall(MCL_FUTURE);
366
367 /* SIGUSR is sent between parent and child. So both block it
368 * and enable it only with pselect.
369 */
370 sigemptyset(&set);
371 sigaddset(&set, SIGUSR1);
372 sigaddset(&set, SIGHUP);
373 sigaddset(&set, SIGALRM);
374 sigaddset(&set, SIGTERM);
375 sigprocmask(SIG_BLOCK, &set, NULL);
376 act.sa_handler = wake_me;
377 act.sa_flags = 0;
378 sigaction(SIGUSR1, &act, NULL);
379 sigaction(SIGALRM, &act, NULL);
380 act.sa_handler = hup;
381 sigaction(SIGHUP, &act, NULL);
382 act.sa_handler = term;
383 sigaction(SIGTERM, &act, NULL);
384 act.sa_handler = SIG_IGN;
385 sigaction(SIGPIPE, &act, NULL);
386
387 if (clone_monitor(container) < 0) {
388 fprintf(stderr, "mdmon: failed to start monitor process: %s\n",
389 strerror(errno));
390 exit(2);
391 }
392
393 do_manager(container);
394
395 exit(0);
396 }