]> git.ipfire.org Git - people/pmueller/ipfire-2.x.git/blame - src/patches/suse-2.6.27.25/patches.fixes/dm-table-ref-count
Reenabled linux-xen and xen-image build
[people/pmueller/ipfire-2.x.git] / src / patches / suse-2.6.27.25 / patches.fixes / dm-table-ref-count
CommitLineData
00e5a55c
BS
1From: Mikulas Patocka <mpatocka@redhat.com>
2Subject: dm table: rework reference counting
3Patch-mainline: 2.6.28
4References: bnc#457205
5Signed-off-by: Nikanth Karthikesan <knikanth@suse.de>
6
7[PATCH 3/3] for bnc 457205.
8
9mainline commit d58168763f74d1edbc296d7038c60efe6493fdd4
10
11
12 Rework table reference counting.
13
14 The existing code uses a reference counter. When the last reference is
15 dropped and the counter reaches zero, the table destructor is called.
16 Table reference counters are acquired/released from upcalls from other
17 kernel code (dm_any_congested, dm_merge_bvec, dm_unplug_all).
18 If the reference counter reaches zero in one of the upcalls, the table
19 destructor is called from almost random kernel code.
20
21 This leads to various problems:
22 * dm_any_congested being called under a spinlock, which calls the
23 destructor, which calls some sleeping function.
24 * the destructor attempting to take a lock that is already taken by the
25 same process.
26 * stale reference from some other kernel code keeps the table
27 constructed, which keeps some devices open, even after successful
28 return from "dmsetup remove". This can confuse lvm and prevent closing
29 of underlying devices or reusing device minor numbers.
30
31 The patch changes reference counting so that the table destructor can be
32 called only at predetermined places.
33
34 The table has always exactly one reference from either mapped_device->map
35 or hash_cell->new_map. After this patch, this reference is not counted
36 in table->holders. A pair of dm_create_table/dm_destroy_table functions
37 is used for table creation/destruction.
38
39 Temporary references from the other code increase table->holders. A pair
40 of dm_table_get/dm_table_put functions is used to manipulate it.
41
42 When the table is about to be destroyed, we wait for table->holders to
43 reach 0. Then, we call the table destructor. We use active waiting with
44 msleep(1), because the situation happens rarely (to one user in 5 years)
45 and removing the device isn't performance-critical task: the user doesn't
46 care if it takes one tick more or not.
47
48 This way, the destructor is called only at specific points
49 (dm_table_destroy function) and the above problems associated with lazy
50 destruction can't happen.
51
52 Finally remove the temporary protection added to dm_any_congested().
53
54 Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
55 Signed-off-by: Alasdair G Kergon <agk@redhat.com>
56
57---
58 drivers/md/dm-ioctl.c | 10 ++++------
59 drivers/md/dm-table.c | 28 +++++++++++++++++++++++-----
60 drivers/md/dm.c | 14 +++++---------
61 drivers/md/dm.h | 1 +
62 4 files changed, 33 insertions(+), 20 deletions(-)
63
64--- a/drivers/md/dm.c
65+++ b/drivers/md/dm.c
66@@ -1644,8 +1644,6 @@ static int dm_any_congested(void *conges
67 struct mapped_device *md = (struct mapped_device *) congested_data;
68 struct dm_table *map;
69
70- atomic_inc(&md->pending);
71-
72 if (!test_bit(DMF_BLOCK_IO, &md->flags)) {
73 map = dm_get_table(md);
74 if (map) {
75@@ -1662,10 +1660,6 @@ static int dm_any_congested(void *conges
76 }
77
78
79- if (!atomic_dec_return(&md->pending))
80- /* nudge anyone waiting on suspend queue */
81- wake_up(&md->wait);
82-
83 return r;
84 }
85
86@@ -1926,10 +1920,12 @@ static int __bind(struct mapped_device *
87
88 if (md->suspended_bdev)
89 __set_size(md, size);
90- if (size == 0)
91+
92+ if (!size) {
93+ dm_table_destroy(t);
94 return 0;
95+ }
96
97- dm_table_get(t);
98 dm_table_event_callback(t, event_callback, md);
99
100 /*
101@@ -1967,7 +1963,7 @@ static void __unbind(struct mapped_devic
102 write_lock(&md->map_lock);
103 md->map = NULL;
104 write_unlock(&md->map_lock);
105- dm_table_put(map);
106+ dm_table_destroy(map);
107 }
108
109 /*
110--- a/drivers/md/dm.h
111+++ b/drivers/md/dm.h
112@@ -46,6 +46,7 @@ struct dm_table;
113 /*-----------------------------------------------------------------
114 * Internal table functions.
115 *---------------------------------------------------------------*/
116+void dm_table_destroy(struct dm_table *t);
117 void dm_table_event_callback(struct dm_table *t,
118 void (*fn)(void *), void *context);
119 struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index);
120--- a/drivers/md/dm-ioctl.c
121+++ b/drivers/md/dm-ioctl.c
122@@ -233,7 +233,7 @@ static void __hash_remove(struct hash_ce
123 }
124
125 if (hc->new_map)
126- dm_table_put(hc->new_map);
127+ dm_table_destroy(hc->new_map);
128 dm_put(hc->md);
129 free_cell(hc);
130 }
131@@ -828,8 +828,8 @@ static int do_resume(struct dm_ioctl *pa
132
133 r = dm_swap_table(md, new_map);
134 if (r) {
135+ dm_table_destroy(new_map);
136 dm_put(md);
137- dm_table_put(new_map);
138 return r;
139 }
140
141@@ -837,8 +837,6 @@ static int do_resume(struct dm_ioctl *pa
142 set_disk_ro(dm_disk(md), 0);
143 else
144 set_disk_ro(dm_disk(md), 1);
145-
146- dm_table_put(new_map);
147 }
148
149 if (dm_suspended(md))
150@@ -1094,7 +1092,7 @@ static int table_load(struct dm_ioctl *p
151 }
152
153 if (hc->new_map)
154- dm_table_put(hc->new_map);
155+ dm_table_destroy(hc->new_map);
156 hc->new_map = t;
157 up_write(&_hash_lock);
158
159@@ -1123,7 +1121,7 @@ static int table_clear(struct dm_ioctl *
160 }
161
162 if (hc->new_map) {
163- dm_table_put(hc->new_map);
164+ dm_table_destroy(hc->new_map);
165 hc->new_map = NULL;
166 }
167
168--- a/drivers/md/dm-table.c
169+++ b/drivers/md/dm-table.c
170@@ -1,6 +1,6 @@
171 /*
172 * Copyright (C) 2001 Sistina Software (UK) Limited.
173- * Copyright (C) 2004 Red Hat, Inc. All rights reserved.
174+ * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
175 *
176 * This file is released under the GPL.
177 */
178@@ -15,6 +15,7 @@
179 #include <linux/slab.h>
180 #include <linux/interrupt.h>
181 #include <linux/mutex.h>
182+#include <linux/delay.h>
183 #include <asm/atomic.h>
184
185 #define DM_MSG_PREFIX "table"
186@@ -24,6 +25,19 @@
187 #define KEYS_PER_NODE (NODE_SIZE / sizeof(sector_t))
188 #define CHILDREN_PER_NODE (KEYS_PER_NODE + 1)
189
190+/*
191+ * The table has always exactly one reference from either mapped_device->map
192+ * or hash_cell->new_map. This reference is not counted in table->holders.
193+ * A pair of dm_create_table/dm_destroy_table functions is used for table
194+ * creation/destruction.
195+ *
196+ * Temporary references from the other code increase table->holders. A pair
197+ * of dm_table_get/dm_table_put functions is used to manipulate it.
198+ *
199+ * When the table is about to be destroyed, we wait for table->holders to
200+ * drop to zero.
201+ */
202+
203 struct dm_table {
204 struct mapped_device *md;
205 atomic_t holders;
206@@ -231,7 +245,7 @@ int dm_table_create(struct dm_table **re
207 return -ENOMEM;
208
209 INIT_LIST_HEAD(&t->devices);
210- atomic_set(&t->holders, 1);
211+ atomic_set(&t->holders, 0);
212
213 if (!num_targets)
214 num_targets = KEYS_PER_NODE;
215@@ -260,10 +274,14 @@ static void free_devices(struct list_hea
216 }
217 }
218
219-static void table_destroy(struct dm_table *t)
220+void dm_table_destroy(struct dm_table *t)
221 {
222 unsigned int i;
223
224+ while (atomic_read(&t->holders))
225+ msleep(1);
226+ smp_mb();
227+
228 /* free the indexes (see dm_table_complete) */
229 if (t->depth >= 2)
230 vfree(t->index[t->depth - 2]);
231@@ -301,8 +319,8 @@ void dm_table_put(struct dm_table *t)
232 if (!t)
233 return;
234
235- if (atomic_dec_and_test(&t->holders))
236- table_destroy(t);
237+ smp_mb__before_atomic_dec();
238+ atomic_dec(&t->holders);
239 }
240
241 /*