]> git.ipfire.org Git - thirdparty/mdadm.git/blame - mdopen.c
mdadm: improve the dlm locking mechanism for clustered raid
[thirdparty/mdadm.git] / mdopen.c
CommitLineData
b5e64645
NB
1/*
2 * mdadm - manage Linux "md" devices aka RAID arrays.
3 *
6f02172d 4 * Copyright (C) 2001-2013 Neil Brown <neilb@suse.de>
b5e64645
NB
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Neil Brown
e736b623 22 * Email: <neilb@suse.de>
b5e64645
NB
23 */
24
25#include "mdadm.h"
26#include "md_p.h"
27#include <ctype.h>
28
c4fe2d4f 29void make_parts(char *dev, int cnt)
b5e64645
NB
30{
31 /* make 'cnt' partition devices for 'dev'
c4fe2d4f
N
32 * If dev is a device name we use the
33 * major/minor from dev and add 1..cnt
34 * If it is a symlink, we make similar symlinks.
8d80900b 35 * If dev ends with a digit, we add "p%d" else "%d"
b5e64645
NB
36 * If the name exists, we use it's owner/mode,
37 * else that of dev
38 */
39 struct stat stb;
96ae5973
JS
40 int major_num;
41 int minor_num;
42 int odig;
b5e64645 43 int i;
8f23b0b3 44 int nlen = strlen(dev) + 20;
0eb26465 45 char *name;
b5e64645 46 int dig = isdigit(dev[strlen(dev)-1]);
c4fe2d4f
N
47 char orig[1024];
48 char sym[1024];
614825ea 49 int err;
b5e64645 50
ca3b6696
N
51 if (cnt == 0)
52 cnt = 4;
c4fe2d4f 53 if (lstat(dev, &stb)!= 0)
b5e64645 54 return;
0eb26465 55
96ae5973
JS
56 if (S_ISBLK(stb.st_mode)) {
57 major_num = major(stb.st_rdev);
58 minor_num = minor(stb.st_rdev);
59 odig = -1;
60 } else if (S_ISLNK(stb.st_mode)) {
c4fe2d4f
N
61 int len = readlink(dev, orig, sizeof(orig));
62 if (len < 0 || len > 1000)
63 return;
64 orig[len] = 0;
65 odig = isdigit(orig[len-1]);
96ae5973
JS
66 major_num = -1;
67 minor_num = -1;
c4fe2d4f 68 } else
96ae5973 69 return;
503975b9 70 name = xmalloc(nlen);
ca3b6696 71 for (i = 1; i <= cnt ; i++) {
b5e64645 72 struct stat stb2;
8f23b0b3 73 snprintf(name, nlen, "%s%s%d", dev, dig?"p":"", i);
ca3b6696 74 if (stat(name, &stb2) == 0) {
96ae5973 75 if (!S_ISBLK(stb2.st_mode) || !S_ISBLK(stb.st_mode))
b5e64645 76 continue;
b440882e 77 if (stb2.st_rdev == makedev(major_num, minor_num+i))
b5e64645
NB
78 continue;
79 unlink(name);
80 } else {
81 stb2 = stb;
82 }
c4fe2d4f
N
83 if (S_ISBLK(stb.st_mode)) {
84 if (mknod(name, S_IFBLK | 0600,
85 makedev(major_num, minor_num+i)))
86 perror("mknod");
87 if (chown(name, stb2.st_uid, stb2.st_gid))
88 perror("chown");
89 if (chmod(name, stb2.st_mode & 07777))
90 perror("chmod");
614825ea 91 err = 0;
c4fe2d4f 92 } else {
e10a79c3 93 snprintf(sym, sizeof(sym), "%s%s%d", orig, odig?"p":"", i);
614825ea 94 err = symlink(sym, name);
c4fe2d4f 95 }
614825ea
DW
96
97 if (err == 0 && stat(name, &stb2) == 0)
98 add_dev(name, &stb2, 0, NULL);
b5e64645 99 }
0eb26465 100 free(name);
b5e64645
NB
101}
102
7105228e
ZL
103int create_named_array(char *devnm)
104{
105 int fd;
106 int n = -1;
107 static const char new_array_file[] = {
108 "/sys/module/md_mod/parameters/new_array"
109 };
110
111 fd = open(new_array_file, O_WRONLY);
112 if (fd < 0 && errno == ENOENT) {
113 if (system("modprobe md_mod") == 0)
114 fd = open(new_array_file, O_WRONLY);
115 }
116 if (fd >= 0) {
117 n = write(fd, devnm, strlen(devnm));
118 close(fd);
119 }
120 if (fd < 0 || n != (int)strlen(devnm)) {
121 pr_err("Fail create %s when using %s\n", devnm, new_array_file);
122 return 0;
123 }
124
125 return 1;
126}
127
b5e64645 128/*
69207ff6
N
129 * We need a new md device to assemble/build/create an array.
130 * 'dev' is a name given us by the user (command line or mdadm.conf)
131 * It might start with /dev or /dev/md any might end with a digit
132 * string.
133 * If it starts with just /dev, it must be /dev/mdX or /dev/md_dX
134 * If it ends with a digit string, then it must be as above, or
135 * 'trustworthy' must be 'METADATA' and the 'dev' must be
136 * /dev/md/'name'NN or 'name'NN
137 * If it doesn't end with a digit string, it must be /dev/md/'name'
138 * or 'name' or must be NULL.
139 * If the digit string is present, it gives the minor number to use
140 * If not, we choose a high, unused minor number.
141 * If the 'dev' is a standard name, it devices whether 'md' or 'mdp'.
142 * else if the name is 'd[0-9]+' then we use mdp
143 * else if trustworthy is 'METADATA' we use md
144 * else the choice depends on 'autof'.
145 * If name is NULL it is assumed to match whatever dev provides.
146 * If both name and dev are NULL, we choose a name 'mdXX' or 'mdpXX'
147 *
148 * If 'name' is given, and 'trustworthy' is 'foreign' and name is not
149 * supported by 'dev', we add a "_%d" suffix based on the minor number
150 * use that.
151 *
ca3b6696 152 * If udev is configured, we create a temporary device, open it, and
69207ff6 153 * unlink it.
eca944fa 154 * If not, we create the /dev/mdXX device, and if name is usable,
69207ff6
N
155 * /dev/md/name
156 * In any case we return /dev/md/name or (if that isn't available)
157 * /dev/mdXX in 'chosen'.
158 *
159 * When we create devices, we use uid/gid/umask from config file.
b5e64645 160 */
69207ff6
N
161
162int create_mddev(char *dev, char *name, int autof, int trustworthy,
cd6cbb08 163 char *chosen, int block_udev)
b5e64645
NB
164{
165 int mdfd;
166 struct stat stb;
69207ff6
N
167 int num = -1;
168 int use_mdp = -1;
8aec876d 169 struct createinfo *ci = conf_get_create_info();
f1ae21c4 170 int parts;
69207ff6 171 char *cname;
bd1fd72e 172 char devname[37];
4dd2df09 173 char devnm[32];
69207ff6 174 char cbuf[400];
cd6cbb08
N
175
176 if (!use_udev())
177 block_udev = 0;
178
69207ff6
N
179 if (chosen == NULL)
180 chosen = cbuf;
181
5bbb4842
NB
182 if (autof == 0)
183 autof = ci->autof;
184
f1ae21c4
NB
185 parts = autof >> 3;
186 autof &= 7;
187
69207ff6
N
188 strcpy(chosen, "/dev/md/");
189 cname = chosen + strlen(chosen);
190
69207ff6 191 if (dev) {
69207ff6
N
192 if (strncmp(dev, "/dev/md/", 8) == 0) {
193 strcpy(cname, dev+8);
194 } else if (strncmp(dev, "/dev/", 5) == 0) {
195 char *e = dev + strlen(dev);
196 while (e > dev && isdigit(e[-1]))
197 e--;
198 if (e[0])
199 num = strtoul(e, NULL, 10);
200 strcpy(cname, dev+5);
201 cname[e-(dev+5)] = 0;
202 /* name *must* be mdXX or md_dXX in this context */
203 if (num < 0 ||
204 (strcmp(cname, "md") != 0 && strcmp(cname, "md_d") != 0)) {
7a862a02 205 pr_err("%s is an invalid name for an md device. Try /dev/md/%s\n",
69207ff6 206 dev, dev+5);
f1ae21c4
NB
207 return -1;
208 }
69207ff6
N
209 if (strcmp(cname, "md") == 0)
210 use_mdp = 0;
211 else
212 use_mdp = 1;
11fb4c05
N
213 /* recreate name: /dev/md/0 or /dev/md/d0 */
214 sprintf(cname, "%s%d", use_mdp?"d":"", num);
69207ff6
N
215 } else
216 strcpy(cname, dev);
217
11fb4c05
N
218 /* 'cname' must not contain a slash, and may not be
219 * empty.
69207ff6 220 */
11fb4c05 221 if (strchr(cname, '/') != NULL) {
7a862a02 222 pr_err("%s is an invalid name for an md device.\n", dev);
69207ff6 223 return -1;
f1ae21c4 224 }
11fb4c05 225 if (cname[0] == 0) {
8e5b52cd 226 pr_err("%s is an invalid name for an md device (empty!).\n", dev);
69207ff6 227 return -1;
b5e64645 228 }
11fb4c05
N
229 if (num < 0) {
230 /* If cname is 'N' or 'dN', we get dev number
231 * from there.
232 */
233 char *sp = cname;
234 char *ep;
235 if (cname[0] == 'd')
236 sp++;
4cda8682
JM
237 if (isdigit(sp[0]))
238 num = strtoul(sp, &ep, 10);
239 else
240 ep = sp;
11fb4c05
N
241 if (ep == sp || *ep || num < 0)
242 num = -1;
243 else if (cname[0] == 'd')
244 use_mdp = 1;
245 else
246 use_mdp = 0;
247 }
69207ff6 248 }
f1ae21c4 249
69207ff6
N
250 /* Now determine device number */
251 /* named 'METADATA' cannot use 'mdp'. */
252 if (name && name[0] == 0)
253 name = NULL;
254 if (name && trustworthy == METADATA && use_mdp == 1) {
7a862a02 255 pr_err("%s is not allowed for a %s container. Consider /dev/md%d.\n", dev, name, num);
69207ff6
N
256 return -1;
257 }
258 if (name && trustworthy == METADATA)
259 use_mdp = 0;
260 if (use_mdp == -1) {
261 if (autof == 4 || autof == 6)
262 use_mdp = 1;
f1ae21c4 263 else
69207ff6
N
264 use_mdp = 0;
265 }
266 if (num < 0 && trustworthy == LOCAL && name) {
ca3b6696 267 /* if name is numeric, possibly prefixed by
d7ba0c55 268 * 'md' or '/dev/md', use that for num
bde2c6e2 269 * if it is not already in use */
69207ff6 270 char *ep;
d7ba0c55
N
271 char *n2 = name;
272 if (strncmp(n2, "/dev/", 5) == 0)
273 n2 += 5;
274 if (strncmp(n2, "md", 2) == 0)
275 n2 += 2;
276 if (*n2 == '/')
277 n2++;
278 num = strtoul(n2, &ep, 10);
279 if (ep == n2 || *ep)
69207ff6 280 num = -1;
4dd2df09
N
281 else {
282 sprintf(devnm, "md%s%d", use_mdp ? "_d":"", num);
283 if (mddev_busy(devnm))
284 num = -1;
285 }
69207ff6
N
286 }
287
69207ff6
N
288 if (cname[0] == 0 && name) {
289 /* Need to find a name if we can
290 * We don't completely trust 'name'. Truncate to
291 * reasonable length and remove '/'
8d80900b 292 */
69207ff6 293 char *cp;
f2e55ecc
N
294 struct map_ent *map = NULL;
295 int conflict = 1;
296 int unum = 0;
297 int cnlen;
69207ff6
N
298 strncpy(cname, name, 200);
299 cname[200] = 0;
7103b9b8
N
300 for (cp = cname; *cp ; cp++)
301 switch (*cp) {
302 case '/':
303 *cp = '-';
304 break;
305 case ' ':
306 case '\t':
307 *cp = '_';
308 break;
309 }
310
f2e55ecc
N
311 if (trustworthy == LOCAL ||
312 (trustworthy == FOREIGN && strchr(cname, ':') != NULL)) {
313 /* Only need suffix if there is a conflict */
314 if (map_by_name(&map, cname) == NULL)
315 conflict = 0;
316 }
317 cnlen = strlen(cname);
318 while (conflict) {
add394f3 319 if (trustworthy == METADATA && !isdigit(cname[cnlen-1]))
f2e55ecc
N
320 sprintf(cname+cnlen, "%d", unum);
321 else
ca3b6696 322 /* add _%d to FOREIGN array that don't
f2e55ecc
N
323 * a 'host:' prefix
324 */
325 sprintf(cname+cnlen, "_%d", unum);
326 unum++;
327 if (map_by_name(&map, cname) == NULL)
328 conflict = 0;
329 }
69207ff6 330 }
9a40c327 331
eca944fa
N
332 devnm[0] = 0;
333 if (num < 0 && cname && ci->names) {
eca944fa 334 sprintf(devnm, "md_%s", cname);
cd6cbb08
N
335 if (block_udev)
336 udev_block(devnm);
7105228e 337 if (!create_named_array(devnm)) {
eca944fa 338 devnm[0] = 0;
cd6cbb08
N
339 udev_unblock();
340 }
eca944fa 341 }
039df362 342 if (num >= 0) {
039df362 343 sprintf(devnm, "md%d", num);
cd6cbb08
N
344 if (block_udev)
345 udev_block(devnm);
7105228e 346 if (!create_named_array(devnm)) {
039df362 347 devnm[0] = 0;
cd6cbb08 348 udev_unblock();
eca944fa 349 }
cd6cbb08
N
350 }
351 if (devnm[0] == 0) {
352 if (num < 0) {
353 /* need to choose a free number. */
354 char *_devnm = find_free_devnm(use_mdp);
355 if (_devnm == NULL) {
356 pr_err("No avail md devices - aborting\n");
357 return -1;
358 }
359 strcpy(devnm, _devnm);
360 } else {
361 sprintf(devnm, "%s%d", use_mdp?"md_d":"md", num);
362 if (mddev_busy(devnm)) {
363 pr_err("%s is already in use.\n",
364 dev);
365 return -1;
366 }
eca944fa 367 }
cd6cbb08
N
368 if (block_udev)
369 udev_block(devnm);
eca944fa
N
370 }
371
372 sprintf(devname, "/dev/%s", devnm);
373
5ac6db12 374 if (dev && dev[0] == '/')
9a40c327
N
375 strcpy(chosen, dev);
376 else if (cname[0] == 0)
69207ff6 377 strcpy(chosen, devname);
8d80900b 378
69207ff6 379 /* We have a device number and name.
11fb4c05
N
380 * If we cannot detect udev, we need to make
381 * devices and links ourselves.
69207ff6 382 */
06d2ffc3 383 if (!use_udev()) {
69207ff6
N
384 /* Make sure 'devname' exists and 'chosen' is a symlink to it */
385 if (lstat(devname, &stb) == 0) {
386 /* Must be the correct device, else error */
387 if ((stb.st_mode&S_IFMT) != S_IFBLK ||
13db17bd 388 stb.st_rdev != devnm2devid(devnm)) {
e7b84f9d 389 pr_err("%s exists but looks wrong, please fix\n",
69207ff6
N
390 devname);
391 return -1;
0a6e1c67 392 }
69207ff6
N
393 } else {
394 if (mknod(devname, S_IFBLK|0600,
4dd2df09 395 devnm2devid(devnm)) != 0) {
e7b84f9d 396 pr_err("failed to create %s\n",
69207ff6 397 devname);
b5e64645
NB
398 return -1;
399 }
69207ff6
N
400 if (chown(devname, ci->uid, ci->gid))
401 perror("chown");
402 if (chmod(devname, ci->mode))
403 perror("chmod");
404 stat(devname, &stb);
405 add_dev(devname, &stb, 0, NULL);
406 }
c4fe2d4f
N
407 if (use_mdp == 1)
408 make_parts(devname, parts);
69207ff6 409
ca3b6696
N
410 if (strcmp(chosen, devname) != 0) {
411 if (mkdir("/dev/md",0700) == 0) {
69207ff6
N
412 if (chown("/dev/md", ci->uid, ci->gid))
413 perror("chown /dev/md");
414 if (chmod("/dev/md", ci->mode| ((ci->mode>>2) & 0111)))
415 perror("chmod /dev/md");
8d80900b 416 }
69207ff6
N
417
418 if (dev && strcmp(chosen, dev) == 0)
419 /* We know we are allowed to use this name */
420 unlink(chosen);
421
422 if (lstat(chosen, &stb) == 0) {
423 char buf[300];
fca13185
TJ
424 ssize_t link_len = readlink(chosen, buf, sizeof(buf)-1);
425 if (link_len >= 0)
426 buf[link_len] = '\0';
427
69207ff6 428 if ((stb.st_mode & S_IFMT) != S_IFLNK ||
fca13185 429 link_len < 0 ||
69207ff6 430 strcmp(buf, devname) != 0) {
e7b84f9d 431 pr_err("%s exists - ignoring\n",
69207ff6
N
432 chosen);
433 strcpy(chosen, devname);
434 }
614825ea 435 } else if (symlink(devname, chosen) != 0)
e7b84f9d 436 pr_err("failed to create %s: %s\n",
614825ea 437 chosen, strerror(errno));
c4fe2d4f
N
438 if (use_mdp && strcmp(chosen, devname) != 0)
439 make_parts(chosen, parts);
b5e64645 440 }
b5e64645 441 }
4dd2df09 442 mdfd = open_dev_excl(devnm);
69207ff6 443 if (mdfd < 0)
e7b84f9d 444 pr_err("unexpected failure opening %s\n",
69207ff6 445 devname);
6be1d39d
N
446 return mdfd;
447}
448
449/* Open this and check that it is an md device.
450 * On success, return filedescriptor.
451 * On failure, return -1 if it doesn't exist,
452 * or -2 if it exists but is not an md device.
453 */
454int open_mddev(char *dev, int report_errors)
455{
f71d2b8f 456 int mdfd = open(dev, O_RDONLY);
40b054e1 457
6be1d39d
N
458 if (mdfd < 0) {
459 if (report_errors)
e7b84f9d 460 pr_err("error opening %s: %s\n",
6be1d39d
N
461 dev, strerror(errno));
462 return -1;
463 }
40b054e1 464
9db2ab4e 465 if (md_array_valid(mdfd) == 0) {
b5e64645 466 close(mdfd);
6be1d39d 467 if (report_errors)
7a862a02 468 pr_err("%s does not appear to be an md device\n", dev);
6be1d39d 469 return -2;
b5e64645 470 }
40b054e1 471
b5e64645
NB
472 return mdfd;
473}
289c74f8
N
474
475char *find_free_devnm(int use_partitions)
476{
477 static char devnm[32];
478 int devnum;
479 for (devnum = 127; devnum != 128;
2e466cce 480 devnum = devnum ? devnum-1 : (1<<9)-1) {
289c74f8
N
481
482 if (use_partitions)
483 sprintf(devnm, "md_d%d", devnum);
484 else
485 sprintf(devnm, "md%d", devnum);
486 if (mddev_busy(devnm))
487 continue;
488 if (!conf_name_is_free(devnm))
489 continue;
490 if (!use_udev()) {
491 /* make sure it is new to /dev too, at least as a
492 * non-standard */
13db17bd 493 dev_t devid = devnm2devid(devnm);
289c74f8
N
494 if (devid) {
495 char *dn = map_dev(major(devid),
496 minor(devid), 0);
497 if (dn && ! is_standard(dn, NULL))
498 continue;
499 }
500 }
501 break;
502 }
503 if (devnum == 128)
504 return NULL;
505 return devnm;
506}