sysfs: avoid possible data corruption in sys_load.
[thirdparty/mdadm.git] / mdmon.c
CommitLineData
549e9569
NB
1
2/*
3 * md array manager.
4 * When md arrays have user-space managed metadata, this is the program
5 * that does the managing.
6 *
7 * Given one argument: the name of the array (e.g. /dev/md0) that is
8 * the container.
9 * We fork off a helper that runs high priority and mlocked. It responds to
10 * device failures and other events that might stop writeout, or that are
11 * trivial to deal with.
12 * The main thread then watches for new arrays being created in the container
13 * and starts monitoring them too ... along with a few other tasks.
14 *
15 * The main thread communicates with the priority thread by writing over
16 * a pipe.
17 * Separate programs can communicate with the main thread via Unix-domain
18 * socket.
19 * The two threads share address space and open file table.
20 *
21 */
22
23#ifndef _GNU_SOURCE
24#define _GNU_SOURCE
25#endif
26
27#include <unistd.h>
28#include <stdlib.h>
4d43913c 29#include <sys/types.h>
549e9569
NB
30#include <sys/stat.h>
31#include <sys/socket.h>
32#include <sys/un.h>
33#include <sys/mman.h>
4d43913c 34#include <sys/syscall.h>
549e9569
NB
35#include <stdio.h>
36#include <errno.h>
37#include <string.h>
38#include <fcntl.h>
b109d928 39#include <signal.h>
549e9569
NB
40
41#include <sched.h>
42
43#include "mdadm.h"
44#include "mdmon.h"
45
549e9569
NB
46struct active_array *discard_this;
47struct active_array *pending_discard;
4d43913c
NB
48
49int mon_tid, mgr_tid;
549e9569
NB
50
51int run_child(void *v)
52{
53 struct supertype *c = v;
1ed3f387 54
549e9569
NB
55 do_monitor(c);
56 return 0;
57}
58
59int clone_monitor(struct supertype *container)
60{
549e9569 61 static char stack[4096];
549e9569 62
2cc98f9e 63 mon_tid = clone(run_child, stack+4096-64,
549e9569
NB
64 CLONE_FS|CLONE_FILES|CLONE_VM|CLONE_SIGHAND|CLONE_THREAD,
65 container);
3e70c845 66
4d43913c 67 mgr_tid = syscall(SYS_gettid);
2cc98f9e
DW
68
69 return mon_tid;
549e9569
NB
70}
71
72static struct superswitch *find_metadata_methods(char *vers)
73{
74 if (strcmp(vers, "ddf") == 0)
75 return &super_ddf;
5b65005f
DW
76 if (strcmp(vers, "imsm") == 0)
77 return &super_imsm;
549e9569
NB
78 return NULL;
79}
80
81
b109d928 82static int make_pidfile(char *devname, int o_excl)
549e9569
NB
83{
84 char path[100];
85 char pid[10];
86 int fd;
87 sprintf(path, "/var/run/mdadm/%s.pid", devname);
88
b109d928 89 fd = open(path, O_RDWR|O_CREAT|o_excl, 0600);
549e9569
NB
90 if (fd < 0)
91 return -1;
92 sprintf(pid, "%d\n", getpid());
93 write(fd, pid, strlen(pid));
94 close(fd);
95 return 0;
96}
97
b109d928
DW
98static void try_kill_monitor(char *devname)
99{
100 char buf[100];
101 int fd;
102 pid_t pid;
103
104 sprintf(buf, "/var/run/mdadm/%s.pid", devname);
105 fd = open(buf, O_RDONLY);
106 if (fd < 0)
107 return;
108
109 if (read(fd, buf, sizeof(buf)) < 0) {
110 close(fd);
111 return;
112 }
113
114 close(fd);
115 pid = strtoul(buf, NULL, 10);
116
117 /* kill this process if it is mdmon */
118 sprintf(buf, "/proc/%lu/cmdline", (unsigned long) pid);
119 fd = open(buf, O_RDONLY);
120 if (fd < 0)
121 return;
122
123 if (read(fd, buf, sizeof(buf)) < 0) {
124 close(fd);
125 return;
126 }
127
128 if (strstr(buf, "mdmon") != NULL)
129 kill(pid, SIGTERM);
130}
131
e0d6609f
NB
132void remove_pidfile(char *devname)
133{
134 char buf[100];
135
136 sprintf(buf, "/var/run/mdadm/%s.pid", devname);
137 unlink(buf);
138}
139
549e9569
NB
140static int make_control_sock(char *devname)
141{
142 char path[100];
143 int sfd;
144 long fl;
145 struct sockaddr_un addr;
146
147 sprintf(path, "/var/run/mdadm/%s.sock", devname);
148 unlink(path);
149 sfd = socket(PF_LOCAL, SOCK_STREAM, 0);
150 if (sfd < 0)
151 return -1;
152
153 addr.sun_family = PF_LOCAL;
154 strcpy(addr.sun_path, path);
155 if (bind(sfd, &addr, sizeof(addr)) < 0) {
156 close(sfd);
157 return -1;
158 }
159 listen(sfd, 10);
160 fl = fcntl(sfd, F_GETFL, 0);
161 fl |= O_NONBLOCK;
162 fcntl(sfd, F_SETFL, fl);
163 return sfd;
164}
165
4d43913c
NB
166static void wake_me(int sig)
167{
168
169}
170
549e9569
NB
171int main(int argc, char *argv[])
172{
173 int mdfd;
549e9569
NB
174 struct mdinfo *mdi, *di;
175 struct supertype *container;
4d43913c 176 sigset_t set;
bfa44e2e 177 struct sigaction act;
4d43913c 178
549e9569
NB
179 if (argc != 2) {
180 fprintf(stderr, "Usage: md-manage /device/name/for/container\n");
181 exit(2);
182 }
183 mdfd = open(argv[1], O_RDWR);
184 if (mdfd < 0) {
185 fprintf(stderr, "md-manage: %s: %s\n", argv[1],
186 strerror(errno));
187 exit(1);
188 }
189 if (md_get_version(mdfd) < 0) {
190 fprintf(stderr, "md-manage: %s: Not an md device\n",
191 argv[1]);
192 exit(1);
193 }
194
195 /* hopefully it is a container - we'll check later */
196
197 container = malloc(sizeof(*container));
549e9569
NB
198 container->devnum = fd2devnum(mdfd);
199 container->devname = devnum2devname(container->devnum);
e0d6609f 200 container->device_name = argv[1];
549e9569
NB
201
202 /* If this fails, we hope it already exists */
203 mkdir("/var/run/mdadm", 0600);
204 /* pid file lives in /var/run/mdadm/mdXX.pid */
b109d928
DW
205 if (make_pidfile(container->devname, O_EXCL) < 0) {
206 if (ping_monitor(container->devname) == 0) {
207 fprintf(stderr, "mdmon: %s already managed\n",
208 container->devname);
209 exit(3);
210 } else {
211 /* cleanup the old monitor, this one is taking over */
212 try_kill_monitor(container->devname);
213 if (make_pidfile(container->devname, 0) < 0) {
214 fprintf(stderr, "mdmon: %s Cannot create pidfile\n",
215 container->devname);
216 exit(3);
217 }
218 }
549e9569
NB
219 }
220
221 container->sock = make_control_sock(container->devname);
222 if (container->sock < 0) {
223 fprintf(stderr, "mdmon: Cannot create socket in /var/run/mdadm\n");
224 exit(3);
225 }
226 container->arrays = NULL;
227
228 mdi = sysfs_read(mdfd, container->devnum,
229 GET_VERSION|GET_LEVEL|GET_DEVS);
230
231 if (!mdi) {
232 fprintf(stderr, "mdmon: failed to load sysfs info for %s\n",
233 container->devname);
234 exit(3);
235 }
236 if (mdi->array.level != UnSet) {
237 fprintf(stderr, "mdmon: %s is not a container - cannot monitor\n",
238 argv[1]);
239 exit(3);
240 }
241 if (mdi->array.major_version != -1 ||
242 mdi->array.minor_version != -2) {
243 fprintf(stderr, "mdmon: %s does not use external metadata - cannot monitor\n",
244 argv[1]);
245 exit(3);
246 }
247
248 container->ss = find_metadata_methods(mdi->text_version);
249 if (container->ss == NULL) {
250 fprintf(stderr, "mdmon: %s uses unknown metadata: %s\n",
251 argv[1], mdi->text_version);
252 exit(3);
253 }
254
255 container->devs = NULL;
256 for (di = mdi->devs; di; di = di->next) {
257 struct mdinfo *cd = malloc(sizeof(*cd));
258 cd = di;
259 cd->next = container->devs;
260 container->devs = cd;
261 }
262 sysfs_free(mdi);
263
264
265 if (container->ss->load_super(container, mdfd, argv[1])) {
266 fprintf(stderr, "mdmon: Cannot load metadata for %s\n",
267 argv[1]);
268 exit(3);
269 }
549e9569
NB
270
271 mlockall(MCL_FUTURE);
272
4d43913c
NB
273 /* SIGUSR is sent between parent and child. So both block it
274 * and enable it only with pselect.
275 */
276 sigemptyset(&set);
277 sigaddset(&set, SIGUSR1);
278 sigprocmask(SIG_BLOCK, &set, NULL);
bfa44e2e
NB
279 act.sa_handler = wake_me;
280 act.sa_flags = 0;
281 sigaction(SIGUSR1, &act, NULL);
282 act.sa_handler = SIG_IGN;
283 sigaction(SIGPIPE, &act, NULL);
4d43913c 284
3e70c845 285 if (clone_monitor(container) < 0) {
549e9569
NB
286 fprintf(stderr, "md-manage: failed to start monitor process: %s\n",
287 strerror(errno));
288 exit(2);
289 }
290
291 do_manager(container);
292
293 exit(0);
294}