imsm: use component_size not total array size in container_content_imsm
[thirdparty/mdadm.git] / mdmon.c
CommitLineData
549e9569
NB
1
2/*
3 * md array manager.
4 * When md arrays have user-space managed metadata, this is the program
5 * that does the managing.
6 *
7 * Given one argument: the name of the array (e.g. /dev/md0) that is
8 * the container.
9 * We fork off a helper that runs high priority and mlocked. It responds to
10 * device failures and other events that might stop writeout, or that are
11 * trivial to deal with.
12 * The main thread then watches for new arrays being created in the container
13 * and starts monitoring them too ... along with a few other tasks.
14 *
15 * The main thread communicates with the priority thread by writing over
16 * a pipe.
17 * Separate programs can communicate with the main thread via Unix-domain
18 * socket.
19 * The two threads share address space and open file table.
20 *
21 */
22
23#ifndef _GNU_SOURCE
24#define _GNU_SOURCE
25#endif
26
27#include <unistd.h>
28#include <stdlib.h>
4d43913c 29#include <sys/types.h>
549e9569
NB
30#include <sys/stat.h>
31#include <sys/socket.h>
32#include <sys/un.h>
33#include <sys/mman.h>
4d43913c 34#include <sys/syscall.h>
9fe32043 35#include <sys/wait.h>
549e9569
NB
36#include <stdio.h>
37#include <errno.h>
38#include <string.h>
39#include <fcntl.h>
b109d928 40#include <signal.h>
549e9569
NB
41
42#include <sched.h>
43
44#include "mdadm.h"
45#include "mdmon.h"
46
549e9569
NB
47struct active_array *discard_this;
48struct active_array *pending_discard;
4d43913c
NB
49
50int mon_tid, mgr_tid;
549e9569
NB
51
52int run_child(void *v)
53{
54 struct supertype *c = v;
1ed3f387 55
549e9569
NB
56 do_monitor(c);
57 return 0;
58}
59
60int clone_monitor(struct supertype *container)
61{
549e9569 62 static char stack[4096];
549e9569 63
2cc98f9e 64 mon_tid = clone(run_child, stack+4096-64,
549e9569
NB
65 CLONE_FS|CLONE_FILES|CLONE_VM|CLONE_SIGHAND|CLONE_THREAD,
66 container);
3e70c845 67
4d43913c 68 mgr_tid = syscall(SYS_gettid);
2cc98f9e
DW
69
70 return mon_tid;
549e9569
NB
71}
72
73static struct superswitch *find_metadata_methods(char *vers)
74{
75 if (strcmp(vers, "ddf") == 0)
76 return &super_ddf;
5b65005f
DW
77 if (strcmp(vers, "imsm") == 0)
78 return &super_imsm;
549e9569
NB
79 return NULL;
80}
81
82
b109d928 83static int make_pidfile(char *devname, int o_excl)
549e9569
NB
84{
85 char path[100];
86 char pid[10];
87 int fd;
88 sprintf(path, "/var/run/mdadm/%s.pid", devname);
89
b109d928 90 fd = open(path, O_RDWR|O_CREAT|o_excl, 0600);
549e9569
NB
91 if (fd < 0)
92 return -1;
93 sprintf(pid, "%d\n", getpid());
94 write(fd, pid, strlen(pid));
95 close(fd);
96 return 0;
97}
98
b109d928
DW
99static void try_kill_monitor(char *devname)
100{
101 char buf[100];
102 int fd;
103 pid_t pid;
104
105 sprintf(buf, "/var/run/mdadm/%s.pid", devname);
106 fd = open(buf, O_RDONLY);
107 if (fd < 0)
108 return;
109
110 if (read(fd, buf, sizeof(buf)) < 0) {
111 close(fd);
112 return;
113 }
114
115 close(fd);
116 pid = strtoul(buf, NULL, 10);
117
118 /* kill this process if it is mdmon */
119 sprintf(buf, "/proc/%lu/cmdline", (unsigned long) pid);
120 fd = open(buf, O_RDONLY);
121 if (fd < 0)
122 return;
123
124 if (read(fd, buf, sizeof(buf)) < 0) {
125 close(fd);
126 return;
127 }
128
129 if (strstr(buf, "mdmon") != NULL)
130 kill(pid, SIGTERM);
131}
132
e0d6609f
NB
133void remove_pidfile(char *devname)
134{
135 char buf[100];
136
137 sprintf(buf, "/var/run/mdadm/%s.pid", devname);
138 unlink(buf);
139}
140
549e9569
NB
141static int make_control_sock(char *devname)
142{
143 char path[100];
144 int sfd;
145 long fl;
146 struct sockaddr_un addr;
147
148 sprintf(path, "/var/run/mdadm/%s.sock", devname);
149 unlink(path);
150 sfd = socket(PF_LOCAL, SOCK_STREAM, 0);
151 if (sfd < 0)
152 return -1;
153
154 addr.sun_family = PF_LOCAL;
155 strcpy(addr.sun_path, path);
156 if (bind(sfd, &addr, sizeof(addr)) < 0) {
157 close(sfd);
158 return -1;
159 }
160 listen(sfd, 10);
161 fl = fcntl(sfd, F_GETFL, 0);
162 fl |= O_NONBLOCK;
163 fcntl(sfd, F_SETFL, fl);
164 return sfd;
165}
166
4d43913c
NB
167static void wake_me(int sig)
168{
169
170}
171
549e9569
NB
172int main(int argc, char *argv[])
173{
174 int mdfd;
549e9569
NB
175 struct mdinfo *mdi, *di;
176 struct supertype *container;
4d43913c 177 sigset_t set;
bfa44e2e 178 struct sigaction act;
9fe32043
N
179 int pfd[2];
180 int status;
4d43913c 181
549e9569
NB
182 if (argc != 2) {
183 fprintf(stderr, "Usage: md-manage /device/name/for/container\n");
184 exit(2);
185 }
186 mdfd = open(argv[1], O_RDWR);
187 if (mdfd < 0) {
188 fprintf(stderr, "md-manage: %s: %s\n", argv[1],
189 strerror(errno));
190 exit(1);
191 }
192 if (md_get_version(mdfd) < 0) {
193 fprintf(stderr, "md-manage: %s: Not an md device\n",
194 argv[1]);
195 exit(1);
196 }
197
9fe32043
N
198 /* Fork, and have the child tell us when they are ready */
199 pipe(pfd);
200 switch(fork()){
201 case -1:
202 fprintf(stderr, "mdmon: failed to fork: %s\n",
203 strerror(errno));
204 exit(1);
205 case 0: /* child */
206 close(pfd[0]);
207 break;
208 default: /* parent */
209 close(pfd[1]);
210 if (read(pfd[0], &status, sizeof(status)) != sizeof(status)) {
211 wait(&status);
212 status = WEXITSTATUS(status);
213 }
214 exit(status);
215 }
549e9569
NB
216 /* hopefully it is a container - we'll check later */
217
218 container = malloc(sizeof(*container));
549e9569
NB
219 container->devnum = fd2devnum(mdfd);
220 container->devname = devnum2devname(container->devnum);
e0d6609f 221 container->device_name = argv[1];
549e9569
NB
222
223 /* If this fails, we hope it already exists */
224 mkdir("/var/run/mdadm", 0600);
225 /* pid file lives in /var/run/mdadm/mdXX.pid */
b109d928
DW
226 if (make_pidfile(container->devname, O_EXCL) < 0) {
227 if (ping_monitor(container->devname) == 0) {
228 fprintf(stderr, "mdmon: %s already managed\n",
229 container->devname);
230 exit(3);
231 } else {
232 /* cleanup the old monitor, this one is taking over */
233 try_kill_monitor(container->devname);
234 if (make_pidfile(container->devname, 0) < 0) {
235 fprintf(stderr, "mdmon: %s Cannot create pidfile\n",
236 container->devname);
237 exit(3);
238 }
239 }
549e9569
NB
240 }
241
242 container->sock = make_control_sock(container->devname);
243 if (container->sock < 0) {
244 fprintf(stderr, "mdmon: Cannot create socket in /var/run/mdadm\n");
245 exit(3);
246 }
247 container->arrays = NULL;
248
249 mdi = sysfs_read(mdfd, container->devnum,
250 GET_VERSION|GET_LEVEL|GET_DEVS);
251
252 if (!mdi) {
253 fprintf(stderr, "mdmon: failed to load sysfs info for %s\n",
254 container->devname);
255 exit(3);
256 }
257 if (mdi->array.level != UnSet) {
258 fprintf(stderr, "mdmon: %s is not a container - cannot monitor\n",
259 argv[1]);
260 exit(3);
261 }
262 if (mdi->array.major_version != -1 ||
263 mdi->array.minor_version != -2) {
264 fprintf(stderr, "mdmon: %s does not use external metadata - cannot monitor\n",
265 argv[1]);
266 exit(3);
267 }
268
269 container->ss = find_metadata_methods(mdi->text_version);
270 if (container->ss == NULL) {
271 fprintf(stderr, "mdmon: %s uses unknown metadata: %s\n",
272 argv[1], mdi->text_version);
273 exit(3);
274 }
275
276 container->devs = NULL;
277 for (di = mdi->devs; di; di = di->next) {
278 struct mdinfo *cd = malloc(sizeof(*cd));
279 cd = di;
280 cd->next = container->devs;
281 container->devs = cd;
282 }
283 sysfs_free(mdi);
284
285
286 if (container->ss->load_super(container, mdfd, argv[1])) {
287 fprintf(stderr, "mdmon: Cannot load metadata for %s\n",
288 argv[1]);
289 exit(3);
290 }
549e9569 291
9fe32043
N
292 /* Ok, this is close enough. We can say goodbye to our parent now.
293 */
294 status = 0;
295 write(pfd[1], &status, sizeof(status));
296 close(pfd[1]);
297
298 chdir("/");
299 setsid();
300 close(0);
301 open("/dev/null", O_RDWR);
302 close(1);
303 dup(0);
304#ifndef DEBUG
305 close(2);
306 dup(0);
307#endif
308
549e9569
NB
309 mlockall(MCL_FUTURE);
310
4d43913c
NB
311 /* SIGUSR is sent between parent and child. So both block it
312 * and enable it only with pselect.
313 */
314 sigemptyset(&set);
315 sigaddset(&set, SIGUSR1);
316 sigprocmask(SIG_BLOCK, &set, NULL);
bfa44e2e
NB
317 act.sa_handler = wake_me;
318 act.sa_flags = 0;
319 sigaction(SIGUSR1, &act, NULL);
320 act.sa_handler = SIG_IGN;
321 sigaction(SIGPIPE, &act, NULL);
4d43913c 322
3e70c845 323 if (clone_monitor(container) < 0) {
549e9569
NB
324 fprintf(stderr, "md-manage: failed to start monitor process: %s\n",
325 strerror(errno));
326 exit(2);
327 }
328
329 do_manager(container);
330
331 exit(0);
332}