]>
Commit | Line | Data |
---|---|---|
2ce07e76 GKH |
1 | From b0140891a8cea36469f58d23859e599b1122bd37 Mon Sep 17 00:00:00 2001 |
2 | From: NeilBrown <neilb@suse.de> | |
3 | Date: Tue, 10 May 2011 17:49:01 +1000 | |
4 | Subject: md: Fix race when creating a new md device. | |
5 | ||
6 | From: NeilBrown <neilb@suse.de> | |
7 | ||
8 | commit b0140891a8cea36469f58d23859e599b1122bd37 upstream. | |
9 | ||
10 | There is a race when creating an md device by opening /dev/mdXX. | |
11 | ||
12 | If two processes do this at much the same time they will follow the | |
13 | call path | |
14 | __blkdev_get -> get_gendisk -> kobj_lookup | |
15 | ||
16 | The first will call | |
17 | -> md_probe -> md_alloc -> add_disk -> blk_register_region | |
18 | ||
19 | and the race happens when the second gets to kobj_lookup after | |
20 | add_disk has called blk_register_region but before it returns to | |
21 | md_alloc. | |
22 | ||
23 | In the case the second will not call md_probe (as the probe is already | |
24 | done) but will get a handle on the gendisk, return to __blkdev_get | |
25 | which will then call md_open (via the ->open) pointer. | |
26 | ||
27 | As mddev->gendisk hasn't been set yet, md_open will think something is | |
28 | wrong an return with ERESTARTSYS. | |
29 | ||
30 | This can loop endlessly while the first thread makes no progress | |
31 | through add_disk. Nothing is blocking it, but due to scheduler | |
32 | behaviour it doesn't get a turn. | |
33 | So this is essentially a live-lock. | |
34 | ||
35 | We fix this by simply moving the assignment to mddev->gendisk before | |
36 | the call the add_disk() so md_open doesn't get confused. | |
37 | Also move blk_queue_flush earlier because add_disk should be as late | |
38 | as possible. | |
39 | ||
40 | To make sure that md_open doesn't complete until md_alloc has done all | |
41 | that is needed, we take mddev->open_mutex during the last part of | |
42 | md_alloc. md_open will wait for this. | |
43 | ||
44 | This can cause a lock-up on boot so Cc:ing for stable. | |
45 | For 2.6.36 and earlier a different patch will be needed as the | |
46 | 'blk_queue_flush' call isn't there. | |
47 | ||
48 | Signed-off-by: NeilBrown <neilb@suse.de> | |
49 | Reported-by: Thomas Jarosch <thomas.jarosch@intra2net.com> | |
50 | Tested-by: Thomas Jarosch <thomas.jarosch@intra2net.com> | |
51 | Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> | |
52 | ||
53 | --- | |
54 | drivers/md/md.c | 11 ++++++++--- | |
55 | 1 file changed, 8 insertions(+), 3 deletions(-) | |
56 | ||
57 | --- a/drivers/md/md.c | |
58 | +++ b/drivers/md/md.c | |
59 | @@ -4335,13 +4335,19 @@ static int md_alloc(dev_t dev, char *nam | |
60 | disk->fops = &md_fops; | |
61 | disk->private_data = mddev; | |
62 | disk->queue = mddev->queue; | |
63 | + blk_queue_flush(mddev->queue, REQ_FLUSH | REQ_FUA); | |
64 | /* Allow extended partitions. This makes the | |
65 | * 'mdp' device redundant, but we can't really | |
66 | * remove it now. | |
67 | */ | |
68 | disk->flags |= GENHD_FL_EXT_DEVT; | |
69 | - add_disk(disk); | |
70 | mddev->gendisk = disk; | |
71 | + /* As soon as we call add_disk(), another thread could get | |
72 | + * through to md_open, so make sure it doesn't get too far | |
73 | + */ | |
74 | + mutex_lock(&mddev->open_mutex); | |
75 | + add_disk(disk); | |
76 | + | |
77 | error = kobject_init_and_add(&mddev->kobj, &md_ktype, | |
78 | &disk_to_dev(disk)->kobj, "%s", "md"); | |
79 | if (error) { | |
80 | @@ -4355,8 +4361,7 @@ static int md_alloc(dev_t dev, char *nam | |
81 | if (mddev->kobj.sd && | |
82 | sysfs_create_group(&mddev->kobj, &md_bitmap_group)) | |
83 | printk(KERN_DEBUG "pointless warning\n"); | |
84 | - | |
85 | - blk_queue_flush(mddev->queue, REQ_FLUSH | REQ_FUA); | |
86 | + mutex_unlock(&mddev->open_mutex); | |
87 | abort: | |
88 | mutex_unlock(&disks_mutex); | |
89 | if (!error && mddev->kobj.sd) { |