]> git.ipfire.org Git - thirdparty/mdadm.git/blob - mdmon.c
Add some comments to explain some of the bits of superswitch.
[thirdparty/mdadm.git] / mdmon.c
1
2 /*
3 * md array manager.
4 * When md arrays have user-space managed metadata, this is the program
5 * that does the managing.
6 *
7 * Given one argument: the name of the array (e.g. /dev/md0) that is
8 * the container.
9 * We fork off a helper that runs high priority and mlocked. It responds to
10 * device failures and other events that might stop writeout, or that are
11 * trivial to deal with.
12 * The main thread then watches for new arrays being created in the container
13 * and starts monitoring them too ... along with a few other tasks.
14 *
15 * The main thread communicates with the priority thread by writing over
16 * a pipe.
17 * Separate programs can communicate with the main thread via Unix-domain
18 * socket.
19 * The two threads share address space and open file table.
20 *
21 */
22
23 #ifndef _GNU_SOURCE
24 #define _GNU_SOURCE
25 #endif
26
27 #include <unistd.h>
28 #include <stdlib.h>
29 #include <sys/stat.h>
30 #include <sys/socket.h>
31 #include <sys/un.h>
32 #include <sys/mman.h>
33 #include <stdio.h>
34 #include <errno.h>
35 #include <string.h>
36 #include <fcntl.h>
37 #include <signal.h>
38
39 #include <sched.h>
40
41 #include "mdadm.h"
42 #include "mdmon.h"
43
44 struct active_array *discard_this;
45 struct active_array *pending_discard;
46 struct md_generic_cmd *active_cmd;
47
48 int run_child(void *v)
49 {
50 struct supertype *c = v;
51 sigset_t set;
52 /* SIGUSR is sent from child to parent, So child must block it */
53 sigemptyset(&set);
54 sigaddset(&set, SIGUSR1);
55 sigprocmask(SIG_BLOCK, &set, NULL);
56
57 do_monitor(c);
58 return 0;
59 }
60
61 int clone_monitor(struct supertype *container)
62 {
63 static char stack[4096];
64 int rv;
65
66 rv = pipe(container->mgr_pipe);
67 if (rv < 0)
68 return rv;
69 rv = pipe(container->mon_pipe);
70 if (rv < 0)
71 goto err_mon_pipe;
72
73 rv = clone(run_child, stack+4096-64,
74 CLONE_FS|CLONE_FILES|CLONE_VM|CLONE_SIGHAND|CLONE_THREAD,
75 container);
76 if (rv < 0)
77 goto err_clone;
78 else
79 return rv;
80
81 err_clone:
82 close(container->mon_pipe[0]);
83 close(container->mon_pipe[1]);
84 err_mon_pipe:
85 close(container->mgr_pipe[0]);
86 close(container->mgr_pipe[1]);
87
88 return rv;
89 }
90
91 static struct superswitch *find_metadata_methods(char *vers)
92 {
93 if (strcmp(vers, "ddf") == 0)
94 return &super_ddf;
95 if (strcmp(vers, "imsm") == 0)
96 return &super_imsm;
97 return NULL;
98 }
99
100
101 static int make_pidfile(char *devname, int o_excl)
102 {
103 char path[100];
104 char pid[10];
105 int fd;
106 sprintf(path, "/var/run/mdadm/%s.pid", devname);
107
108 fd = open(path, O_RDWR|O_CREAT|o_excl, 0600);
109 if (fd < 0)
110 return -1;
111 sprintf(pid, "%d\n", getpid());
112 write(fd, pid, strlen(pid));
113 close(fd);
114 return 0;
115 }
116
117 static void try_kill_monitor(char *devname)
118 {
119 char buf[100];
120 int fd;
121 pid_t pid;
122
123 sprintf(buf, "/var/run/mdadm/%s.pid", devname);
124 fd = open(buf, O_RDONLY);
125 if (fd < 0)
126 return;
127
128 if (read(fd, buf, sizeof(buf)) < 0) {
129 close(fd);
130 return;
131 }
132
133 close(fd);
134 pid = strtoul(buf, NULL, 10);
135
136 /* kill this process if it is mdmon */
137 sprintf(buf, "/proc/%lu/cmdline", (unsigned long) pid);
138 fd = open(buf, O_RDONLY);
139 if (fd < 0)
140 return;
141
142 if (read(fd, buf, sizeof(buf)) < 0) {
143 close(fd);
144 return;
145 }
146
147 if (strstr(buf, "mdmon") != NULL)
148 kill(pid, SIGTERM);
149 }
150
151 void remove_pidfile(char *devname)
152 {
153 char buf[100];
154
155 sprintf(buf, "/var/run/mdadm/%s.pid", devname);
156 unlink(buf);
157 }
158
159 static int make_control_sock(char *devname)
160 {
161 char path[100];
162 int sfd;
163 long fl;
164 struct sockaddr_un addr;
165
166 sprintf(path, "/var/run/mdadm/%s.sock", devname);
167 unlink(path);
168 sfd = socket(PF_LOCAL, SOCK_STREAM, 0);
169 if (sfd < 0)
170 return -1;
171
172 addr.sun_family = PF_LOCAL;
173 strcpy(addr.sun_path, path);
174 if (bind(sfd, &addr, sizeof(addr)) < 0) {
175 close(sfd);
176 return -1;
177 }
178 listen(sfd, 10);
179 fl = fcntl(sfd, F_GETFL, 0);
180 fl |= O_NONBLOCK;
181 fcntl(sfd, F_SETFL, fl);
182 return sfd;
183 }
184
185 int main(int argc, char *argv[])
186 {
187 int mdfd;
188 struct mdinfo *mdi, *di;
189 struct supertype *container;
190 if (argc != 2) {
191 fprintf(stderr, "Usage: md-manage /device/name/for/container\n");
192 exit(2);
193 }
194 mdfd = open(argv[1], O_RDWR);
195 if (mdfd < 0) {
196 fprintf(stderr, "md-manage: %s: %s\n", argv[1],
197 strerror(errno));
198 exit(1);
199 }
200 if (md_get_version(mdfd) < 0) {
201 fprintf(stderr, "md-manage: %s: Not an md device\n",
202 argv[1]);
203 exit(1);
204 }
205
206 /* hopefully it is a container - we'll check later */
207
208 container = malloc(sizeof(*container));
209 container->devnum = fd2devnum(mdfd);
210 container->devname = devnum2devname(container->devnum);
211 container->device_name = argv[1];
212
213 /* If this fails, we hope it already exists */
214 mkdir("/var/run/mdadm", 0600);
215 /* pid file lives in /var/run/mdadm/mdXX.pid */
216 if (make_pidfile(container->devname, O_EXCL) < 0) {
217 if (ping_monitor(container->devname) == 0) {
218 fprintf(stderr, "mdmon: %s already managed\n",
219 container->devname);
220 exit(3);
221 } else {
222 /* cleanup the old monitor, this one is taking over */
223 try_kill_monitor(container->devname);
224 if (make_pidfile(container->devname, 0) < 0) {
225 fprintf(stderr, "mdmon: %s Cannot create pidfile\n",
226 container->devname);
227 exit(3);
228 }
229 }
230 }
231
232 container->sock = make_control_sock(container->devname);
233 if (container->sock < 0) {
234 fprintf(stderr, "mdmon: Cannot create socket in /var/run/mdadm\n");
235 exit(3);
236 }
237 container->arrays = NULL;
238
239 mdi = sysfs_read(mdfd, container->devnum,
240 GET_VERSION|GET_LEVEL|GET_DEVS);
241
242 if (!mdi) {
243 fprintf(stderr, "mdmon: failed to load sysfs info for %s\n",
244 container->devname);
245 exit(3);
246 }
247 if (mdi->array.level != UnSet) {
248 fprintf(stderr, "mdmon: %s is not a container - cannot monitor\n",
249 argv[1]);
250 exit(3);
251 }
252 if (mdi->array.major_version != -1 ||
253 mdi->array.minor_version != -2) {
254 fprintf(stderr, "mdmon: %s does not use external metadata - cannot monitor\n",
255 argv[1]);
256 exit(3);
257 }
258
259 container->ss = find_metadata_methods(mdi->text_version);
260 if (container->ss == NULL) {
261 fprintf(stderr, "mdmon: %s uses unknown metadata: %s\n",
262 argv[1], mdi->text_version);
263 exit(3);
264 }
265
266 container->devs = NULL;
267 for (di = mdi->devs; di; di = di->next) {
268 struct mdinfo *cd = malloc(sizeof(*cd));
269 cd = di;
270 cd->next = container->devs;
271 container->devs = cd;
272 }
273 sysfs_free(mdi);
274
275
276 if (container->ss->load_super(container, mdfd, argv[1])) {
277 fprintf(stderr, "mdmon: Cannot load metadata for %s\n",
278 argv[1]);
279 exit(3);
280 }
281 close(mdfd);
282 close(mdfd);
283
284 mlockall(MCL_FUTURE);
285
286 if (clone_monitor(container) < 0) {
287 fprintf(stderr, "md-manage: failed to start monitor process: %s\n",
288 strerror(errno));
289 exit(2);
290 }
291
292 do_manager(container);
293
294 exit(0);
295 }