]> git.ipfire.org Git - thirdparty/mdadm.git/blob - mdmon.c
Keep container device open in monitor
[thirdparty/mdadm.git] / mdmon.c
1
2 /*
3 * md array manager.
4 * When md arrays have user-space managed metadata, this is the program
5 * that does the managing.
6 *
7 * Given one argument: the name of the array (e.g. /dev/md0) that is
8 * the container.
9 * We fork off a helper that runs high priority and mlocked. It responds to
10 * device failures and other events that might stop writeout, or that are
11 * trivial to deal with.
12 * The main thread then watches for new arrays being created in the container
13 * and starts monitoring them too ... along with a few other tasks.
14 *
15 * The main thread communicates with the priority thread by writing over
16 * a pipe.
17 * Separate programs can communicate with the main thread via Unix-domain
18 * socket.
19 * The two threads share address space and open file table.
20 *
21 */
22
23 #ifndef _GNU_SOURCE
24 #define _GNU_SOURCE
25 #endif
26
27 #include <unistd.h>
28 #include <stdlib.h>
29 #include <sys/types.h>
30 #include <sys/stat.h>
31 #include <sys/socket.h>
32 #include <sys/un.h>
33 #include <sys/mman.h>
34 #include <sys/syscall.h>
35 #include <stdio.h>
36 #include <errno.h>
37 #include <string.h>
38 #include <fcntl.h>
39 #include <signal.h>
40
41 #include <sched.h>
42
43 #include "mdadm.h"
44 #include "mdmon.h"
45
46 struct active_array *discard_this;
47 struct active_array *pending_discard;
48
49 int mon_tid, mgr_tid;
50
51 int run_child(void *v)
52 {
53 struct supertype *c = v;
54
55 mon_tid = syscall(SYS_gettid);
56 do_monitor(c);
57 return 0;
58 }
59
60 int clone_monitor(struct supertype *container)
61 {
62 static char stack[4096];
63 int rv;
64
65
66 rv = clone(run_child, stack+4096-64,
67 CLONE_FS|CLONE_FILES|CLONE_VM|CLONE_SIGHAND|CLONE_THREAD,
68 container);
69
70 mgr_tid = syscall(SYS_gettid);
71
72 return rv;
73 }
74
75 static struct superswitch *find_metadata_methods(char *vers)
76 {
77 if (strcmp(vers, "ddf") == 0)
78 return &super_ddf;
79 if (strcmp(vers, "imsm") == 0)
80 return &super_imsm;
81 return NULL;
82 }
83
84
85 static int make_pidfile(char *devname, int o_excl)
86 {
87 char path[100];
88 char pid[10];
89 int fd;
90 sprintf(path, "/var/run/mdadm/%s.pid", devname);
91
92 fd = open(path, O_RDWR|O_CREAT|o_excl, 0600);
93 if (fd < 0)
94 return -1;
95 sprintf(pid, "%d\n", getpid());
96 write(fd, pid, strlen(pid));
97 close(fd);
98 return 0;
99 }
100
101 static void try_kill_monitor(char *devname)
102 {
103 char buf[100];
104 int fd;
105 pid_t pid;
106
107 sprintf(buf, "/var/run/mdadm/%s.pid", devname);
108 fd = open(buf, O_RDONLY);
109 if (fd < 0)
110 return;
111
112 if (read(fd, buf, sizeof(buf)) < 0) {
113 close(fd);
114 return;
115 }
116
117 close(fd);
118 pid = strtoul(buf, NULL, 10);
119
120 /* kill this process if it is mdmon */
121 sprintf(buf, "/proc/%lu/cmdline", (unsigned long) pid);
122 fd = open(buf, O_RDONLY);
123 if (fd < 0)
124 return;
125
126 if (read(fd, buf, sizeof(buf)) < 0) {
127 close(fd);
128 return;
129 }
130
131 if (strstr(buf, "mdmon") != NULL)
132 kill(pid, SIGTERM);
133 }
134
135 void remove_pidfile(char *devname)
136 {
137 char buf[100];
138
139 sprintf(buf, "/var/run/mdadm/%s.pid", devname);
140 unlink(buf);
141 }
142
143 static int make_control_sock(char *devname)
144 {
145 char path[100];
146 int sfd;
147 long fl;
148 struct sockaddr_un addr;
149
150 sprintf(path, "/var/run/mdadm/%s.sock", devname);
151 unlink(path);
152 sfd = socket(PF_LOCAL, SOCK_STREAM, 0);
153 if (sfd < 0)
154 return -1;
155
156 addr.sun_family = PF_LOCAL;
157 strcpy(addr.sun_path, path);
158 if (bind(sfd, &addr, sizeof(addr)) < 0) {
159 close(sfd);
160 return -1;
161 }
162 listen(sfd, 10);
163 fl = fcntl(sfd, F_GETFL, 0);
164 fl |= O_NONBLOCK;
165 fcntl(sfd, F_SETFL, fl);
166 return sfd;
167 }
168
169 static void wake_me(int sig)
170 {
171
172 }
173
174 int main(int argc, char *argv[])
175 {
176 int mdfd;
177 struct mdinfo *mdi, *di;
178 struct supertype *container;
179 sigset_t set;
180 struct sigaction act;
181
182 if (argc != 2) {
183 fprintf(stderr, "Usage: md-manage /device/name/for/container\n");
184 exit(2);
185 }
186 mdfd = open(argv[1], O_RDWR);
187 if (mdfd < 0) {
188 fprintf(stderr, "md-manage: %s: %s\n", argv[1],
189 strerror(errno));
190 exit(1);
191 }
192 if (md_get_version(mdfd) < 0) {
193 fprintf(stderr, "md-manage: %s: Not an md device\n",
194 argv[1]);
195 exit(1);
196 }
197
198 /* hopefully it is a container - we'll check later */
199
200 container = malloc(sizeof(*container));
201 container->devnum = fd2devnum(mdfd);
202 container->devname = devnum2devname(container->devnum);
203 container->device_name = argv[1];
204
205 /* If this fails, we hope it already exists */
206 mkdir("/var/run/mdadm", 0600);
207 /* pid file lives in /var/run/mdadm/mdXX.pid */
208 if (make_pidfile(container->devname, O_EXCL) < 0) {
209 if (ping_monitor(container->devname) == 0) {
210 fprintf(stderr, "mdmon: %s already managed\n",
211 container->devname);
212 exit(3);
213 } else {
214 /* cleanup the old monitor, this one is taking over */
215 try_kill_monitor(container->devname);
216 if (make_pidfile(container->devname, 0) < 0) {
217 fprintf(stderr, "mdmon: %s Cannot create pidfile\n",
218 container->devname);
219 exit(3);
220 }
221 }
222 }
223
224 container->sock = make_control_sock(container->devname);
225 if (container->sock < 0) {
226 fprintf(stderr, "mdmon: Cannot create socket in /var/run/mdadm\n");
227 exit(3);
228 }
229 container->arrays = NULL;
230
231 mdi = sysfs_read(mdfd, container->devnum,
232 GET_VERSION|GET_LEVEL|GET_DEVS);
233
234 if (!mdi) {
235 fprintf(stderr, "mdmon: failed to load sysfs info for %s\n",
236 container->devname);
237 exit(3);
238 }
239 if (mdi->array.level != UnSet) {
240 fprintf(stderr, "mdmon: %s is not a container - cannot monitor\n",
241 argv[1]);
242 exit(3);
243 }
244 if (mdi->array.major_version != -1 ||
245 mdi->array.minor_version != -2) {
246 fprintf(stderr, "mdmon: %s does not use external metadata - cannot monitor\n",
247 argv[1]);
248 exit(3);
249 }
250
251 container->ss = find_metadata_methods(mdi->text_version);
252 if (container->ss == NULL) {
253 fprintf(stderr, "mdmon: %s uses unknown metadata: %s\n",
254 argv[1], mdi->text_version);
255 exit(3);
256 }
257
258 container->devs = NULL;
259 for (di = mdi->devs; di; di = di->next) {
260 struct mdinfo *cd = malloc(sizeof(*cd));
261 cd = di;
262 cd->next = container->devs;
263 container->devs = cd;
264 }
265 sysfs_free(mdi);
266
267
268 if (container->ss->load_super(container, mdfd, argv[1])) {
269 fprintf(stderr, "mdmon: Cannot load metadata for %s\n",
270 argv[1]);
271 exit(3);
272 }
273
274 mlockall(MCL_FUTURE);
275
276 /* SIGUSR is sent between parent and child. So both block it
277 * and enable it only with pselect.
278 */
279 sigemptyset(&set);
280 sigaddset(&set, SIGUSR1);
281 sigprocmask(SIG_BLOCK, &set, NULL);
282 act.sa_handler = wake_me;
283 act.sa_flags = 0;
284 sigaction(SIGUSR1, &act, NULL);
285 act.sa_handler = SIG_IGN;
286 sigaction(SIGPIPE, &act, NULL);
287
288 if (clone_monitor(container) < 0) {
289 fprintf(stderr, "md-manage: failed to start monitor process: %s\n",
290 strerror(errno));
291 exit(2);
292 }
293
294 do_manager(container);
295
296 exit(0);
297 }