]>
Commit | Line | Data |
---|---|---|
2cb7cef9 BS |
1 | From: Kiyoshi Ueda <k-ueda@ct.jp.nec.com> |
2 | Subject: Request-based multipath patches | |
3 | References: FATE#302108 | |
4 | ||
5 | This is the latest version of the request-based multipathing patches, | |
6 | posted to dm-devel and linux-scsi on 03.10.2008. | |
7 | ||
8 | Signed-off-by: Hannes Reinecke <hare@suse.de> | |
9 | ||
10 | --- | |
11 | drivers/md/dm-ioctl.c | 13 | |
12 | drivers/md/dm-mpath.c | 192 +++++--- | |
13 | drivers/md/dm-table.c | 82 +++ | |
14 | drivers/md/dm.c | 952 +++++++++++++++++++++++++++++++++++++++--- | |
15 | drivers/md/dm.h | 17 | |
16 | include/linux/device-mapper.h | 24 + | |
17 | 6 files changed, 1158 insertions(+), 122 deletions(-) | |
18 | ||
19 | --- a/drivers/md/dm-ioctl.c | |
20 | +++ b/drivers/md/dm-ioctl.c | |
21 | @@ -1046,6 +1046,12 @@ static int populate_table(struct dm_tabl | |
22 | next = spec->next; | |
23 | } | |
24 | ||
25 | + r = dm_table_set_type(table); | |
26 | + if (r) { | |
27 | + DMWARN("unable to set table type"); | |
28 | + return r; | |
29 | + } | |
30 | + | |
31 | return dm_table_complete(table); | |
32 | } | |
33 | ||
34 | @@ -1069,6 +1075,13 @@ static int table_load(struct dm_ioctl *p | |
35 | dm_table_put(t); | |
36 | goto out; | |
37 | } | |
38 | + | |
39 | + r = dm_init_md_mempool(md, dm_table_get_type(t)); | |
40 | + if (r) { | |
41 | + DMWARN("unable to initialize the md mempools for this table"); | |
42 | + dm_table_put(t); | |
43 | + goto out; | |
44 | + } | |
45 | ||
46 | down_write(&_hash_lock); | |
47 | hc = dm_get_mdptr(md); | |
48 | --- a/drivers/md/dm-mpath.c | |
49 | +++ b/drivers/md/dm-mpath.c | |
50 | @@ -7,8 +7,6 @@ | |
51 | ||
52 | #include "dm.h" | |
53 | #include "dm-path-selector.h" | |
54 | -#include "dm-bio-list.h" | |
55 | -#include "dm-bio-record.h" | |
56 | #include "dm-uevent.h" | |
57 | ||
58 | #include <linux/ctype.h> | |
59 | @@ -83,7 +81,7 @@ struct multipath { | |
60 | unsigned pg_init_count; /* Number of times pg_init called */ | |
61 | ||
62 | struct work_struct process_queued_ios; | |
63 | - struct bio_list queued_ios; | |
64 | + struct list_head queued_ios; | |
65 | unsigned queue_size; | |
66 | ||
67 | struct work_struct trigger_event; | |
68 | @@ -100,7 +98,6 @@ struct multipath { | |
69 | */ | |
70 | struct dm_mpath_io { | |
71 | struct pgpath *pgpath; | |
72 | - struct dm_bio_details details; | |
73 | }; | |
74 | ||
75 | typedef int (*action_fn) (struct pgpath *pgpath); | |
76 | @@ -197,6 +194,7 @@ static struct multipath *alloc_multipath | |
77 | m = kzalloc(sizeof(*m), GFP_KERNEL); | |
78 | if (m) { | |
79 | INIT_LIST_HEAD(&m->priority_groups); | |
80 | + INIT_LIST_HEAD(&m->queued_ios); | |
81 | spin_lock_init(&m->lock); | |
82 | m->queue_io = 1; | |
83 | INIT_WORK(&m->process_queued_ios, process_queued_ios); | |
84 | @@ -321,12 +319,13 @@ static int __must_push_back(struct multi | |
85 | dm_noflush_suspending(m->ti)); | |
86 | } | |
87 | ||
88 | -static int map_io(struct multipath *m, struct bio *bio, | |
89 | +static int map_io(struct multipath *m, struct request *clone, | |
90 | struct dm_mpath_io *mpio, unsigned was_queued) | |
91 | { | |
92 | int r = DM_MAPIO_REMAPPED; | |
93 | unsigned long flags; | |
94 | struct pgpath *pgpath; | |
95 | + struct block_device *bdev; | |
96 | ||
97 | spin_lock_irqsave(&m->lock, flags); | |
98 | ||
99 | @@ -343,16 +342,18 @@ static int map_io(struct multipath *m, s | |
100 | if ((pgpath && m->queue_io) || | |
101 | (!pgpath && m->queue_if_no_path)) { | |
102 | /* Queue for the daemon to resubmit */ | |
103 | - bio_list_add(&m->queued_ios, bio); | |
104 | + list_add_tail(&clone->queuelist, &m->queued_ios); | |
105 | m->queue_size++; | |
106 | if ((m->pg_init_required && !m->pg_init_in_progress) || | |
107 | !m->queue_io) | |
108 | queue_work(kmultipathd, &m->process_queued_ios); | |
109 | pgpath = NULL; | |
110 | r = DM_MAPIO_SUBMITTED; | |
111 | - } else if (pgpath) | |
112 | - bio->bi_bdev = pgpath->path.dev->bdev; | |
113 | - else if (__must_push_back(m)) | |
114 | + } else if (pgpath) { | |
115 | + bdev = pgpath->path.dev->bdev; | |
116 | + clone->q = bdev_get_queue(bdev); | |
117 | + clone->rq_disk = bdev->bd_disk; | |
118 | + } else if (__must_push_back(m)) | |
119 | r = DM_MAPIO_REQUEUE; | |
120 | else | |
121 | r = -EIO; /* Failed */ | |
122 | @@ -395,30 +396,31 @@ static void dispatch_queued_ios(struct m | |
123 | { | |
124 | int r; | |
125 | unsigned long flags; | |
126 | - struct bio *bio = NULL, *next; | |
127 | struct dm_mpath_io *mpio; | |
128 | union map_info *info; | |
129 | + struct request *clone, *n; | |
130 | + LIST_HEAD(cl); | |
131 | ||
132 | spin_lock_irqsave(&m->lock, flags); | |
133 | - bio = bio_list_get(&m->queued_ios); | |
134 | + list_splice_init(&m->queued_ios, &cl); | |
135 | spin_unlock_irqrestore(&m->lock, flags); | |
136 | ||
137 | - while (bio) { | |
138 | - next = bio->bi_next; | |
139 | - bio->bi_next = NULL; | |
140 | + list_for_each_entry_safe(clone, n, &cl, queuelist) { | |
141 | + list_del_init(&clone->queuelist); | |
142 | ||
143 | - info = dm_get_mapinfo(bio); | |
144 | + info = dm_get_rq_mapinfo(clone); | |
145 | mpio = info->ptr; | |
146 | ||
147 | - r = map_io(m, bio, mpio, 1); | |
148 | - if (r < 0) | |
149 | - bio_endio(bio, r); | |
150 | - else if (r == DM_MAPIO_REMAPPED) | |
151 | - generic_make_request(bio); | |
152 | - else if (r == DM_MAPIO_REQUEUE) | |
153 | - bio_endio(bio, -EIO); | |
154 | - | |
155 | - bio = next; | |
156 | + r = map_io(m, clone, mpio, 1); | |
157 | + if (r < 0) { | |
158 | + mempool_free(mpio, m->mpio_pool); | |
159 | + dm_kill_request(clone, r); | |
160 | + } else if (r == DM_MAPIO_REMAPPED) | |
161 | + dm_dispatch_request(clone); | |
162 | + else if (r == DM_MAPIO_REQUEUE) { | |
163 | + mempool_free(mpio, m->mpio_pool); | |
164 | + dm_requeue_request(clone); | |
165 | + } | |
166 | } | |
167 | } | |
168 | ||
169 | @@ -844,21 +846,24 @@ static void multipath_dtr(struct dm_targ | |
170 | } | |
171 | ||
172 | /* | |
173 | - * Map bios, recording original fields for later in case we have to resubmit | |
174 | + * Map cloned requests | |
175 | */ | |
176 | -static int multipath_map(struct dm_target *ti, struct bio *bio, | |
177 | +static int multipath_map(struct dm_target *ti, struct request *clone, | |
178 | union map_info *map_context) | |
179 | { | |
180 | int r; | |
181 | struct dm_mpath_io *mpio; | |
182 | struct multipath *m = (struct multipath *) ti->private; | |
183 | ||
184 | - mpio = mempool_alloc(m->mpio_pool, GFP_NOIO); | |
185 | - dm_bio_record(&mpio->details, bio); | |
186 | + mpio = mempool_alloc(m->mpio_pool, GFP_ATOMIC); | |
187 | + if (!mpio) | |
188 | + /* ENOMEM, requeue */ | |
189 | + return DM_MAPIO_REQUEUE; | |
190 | + memset(mpio, 0, sizeof(*mpio)); | |
191 | ||
192 | map_context->ptr = mpio; | |
193 | - bio->bi_rw |= (1 << BIO_RW_FAILFAST_TRANSPORT); | |
194 | - r = map_io(m, bio, mpio, 0); | |
195 | + clone->cmd_flags |= REQ_FAILFAST_TRANSPORT; | |
196 | + r = map_io(m, clone, mpio, 0); | |
197 | if (r < 0 || r == DM_MAPIO_REQUEUE) | |
198 | mempool_free(mpio, m->mpio_pool); | |
199 | ||
200 | @@ -1140,53 +1145,41 @@ static void activate_path(struct work_st | |
201 | /* | |
202 | * end_io handling | |
203 | */ | |
204 | -static int do_end_io(struct multipath *m, struct bio *bio, | |
205 | +static int do_end_io(struct multipath *m, struct request *clone, | |
206 | int error, struct dm_mpath_io *mpio) | |
207 | { | |
208 | + /* | |
209 | + * We don't queue any clone request inside the multipath target | |
210 | + * during end I/O handling, since those clone requests don't have | |
211 | + * bio clones. If we queue them inside the multipath target, | |
212 | + * we need to make bio clones, that requires memory allocation. | |
213 | + * (See drivers/md/dm.c:end_clone_bio() about why the clone requests | |
214 | + * don't have bio clones.) | |
215 | + * Instead of queueing the clone request here, we queue the original | |
216 | + * request into dm core, which will remake a clone request and | |
217 | + * clone bios for it and resubmit it later. | |
218 | + */ | |
219 | + int r = DM_ENDIO_REQUEUE; | |
220 | unsigned long flags; | |
221 | ||
222 | - if (!error) | |
223 | + if (!error && !clone->errors) | |
224 | return 0; /* I/O complete */ | |
225 | ||
226 | - if ((error == -EWOULDBLOCK) && bio_rw_ahead(bio)) | |
227 | - return error; | |
228 | - | |
229 | if (error == -EOPNOTSUPP) | |
230 | return error; | |
231 | ||
232 | - spin_lock_irqsave(&m->lock, flags); | |
233 | - if (!m->nr_valid_paths) { | |
234 | - if (__must_push_back(m)) { | |
235 | - spin_unlock_irqrestore(&m->lock, flags); | |
236 | - return DM_ENDIO_REQUEUE; | |
237 | - } else if (!m->queue_if_no_path) { | |
238 | - spin_unlock_irqrestore(&m->lock, flags); | |
239 | - return -EIO; | |
240 | - } else { | |
241 | - spin_unlock_irqrestore(&m->lock, flags); | |
242 | - goto requeue; | |
243 | - } | |
244 | - } | |
245 | - spin_unlock_irqrestore(&m->lock, flags); | |
246 | - | |
247 | if (mpio->pgpath) | |
248 | fail_path(mpio->pgpath); | |
249 | ||
250 | - requeue: | |
251 | - dm_bio_restore(&mpio->details, bio); | |
252 | - | |
253 | - /* queue for the daemon to resubmit or fail */ | |
254 | spin_lock_irqsave(&m->lock, flags); | |
255 | - bio_list_add(&m->queued_ios, bio); | |
256 | - m->queue_size++; | |
257 | - if (!m->queue_io) | |
258 | - queue_work(kmultipathd, &m->process_queued_ios); | |
259 | + if (!m->nr_valid_paths && !m->queue_if_no_path && !__must_push_back(m)) | |
260 | + r = -EIO; | |
261 | spin_unlock_irqrestore(&m->lock, flags); | |
262 | ||
263 | - return DM_ENDIO_INCOMPLETE; /* io not complete */ | |
264 | + return r; | |
265 | } | |
266 | ||
267 | -static int multipath_end_io(struct dm_target *ti, struct bio *bio, | |
268 | +static int multipath_end_io(struct dm_target *ti, struct request *clone, | |
269 | int error, union map_info *map_context) | |
270 | { | |
271 | struct multipath *m = ti->private; | |
272 | @@ -1195,14 +1188,13 @@ static int multipath_end_io(struct dm_ta | |
273 | struct path_selector *ps; | |
274 | int r; | |
275 | ||
276 | - r = do_end_io(m, bio, error, mpio); | |
277 | + r = do_end_io(m, clone, error, mpio); | |
278 | if (pgpath) { | |
279 | ps = &pgpath->pg->ps; | |
280 | if (ps->type->end_io) | |
281 | ps->type->end_io(ps, &pgpath->path); | |
282 | } | |
283 | - if (r != DM_ENDIO_INCOMPLETE) | |
284 | - mempool_free(mpio, m->mpio_pool); | |
285 | + mempool_free(mpio, m->mpio_pool); | |
286 | ||
287 | return r; | |
288 | } | |
289 | @@ -1438,6 +1430,75 @@ static int multipath_ioctl(struct dm_tar | |
290 | bdev->bd_disk, cmd, arg); | |
291 | } | |
292 | ||
293 | +static int __pgpath_busy(struct pgpath *pgpath) | |
294 | +{ | |
295 | + struct request_queue *q = bdev_get_queue(pgpath->path.dev->bdev); | |
296 | + | |
297 | + return dm_underlying_device_busy(q); | |
298 | +} | |
299 | + | |
300 | +/* | |
301 | + * We return "busy", only when we can map I/Os but underlying devices | |
302 | + * are busy (so even if we map I/Os now, the I/Os will wait on | |
303 | + * the underlying queue). | |
304 | + * In other words, if we want to kill I/Os or queue them inside us | |
305 | + * due to map unavailability, we don't return "busy". Otherwise, | |
306 | + * dm core won't give us the I/Os and we can't do what we want. | |
307 | + */ | |
308 | +static int multipath_busy(struct dm_target *ti) | |
309 | +{ | |
310 | + int busy = 0, has_active = 0; | |
311 | + struct multipath *m = (struct multipath *) ti->private; | |
312 | + struct priority_group *pg; | |
313 | + struct pgpath *pgpath; | |
314 | + unsigned long flags; | |
315 | + | |
316 | + spin_lock_irqsave(&m->lock, flags); | |
317 | + | |
318 | + /* Guess which priority_group will be used at next mapping time */ | |
319 | + if (unlikely(!m->current_pgpath && m->next_pg)) | |
320 | + pg = m->next_pg; | |
321 | + else if (likely(m->current_pg)) | |
322 | + pg = m->current_pg; | |
323 | + else | |
324 | + /* | |
325 | + * We don't know which pg will be used at next mapping time. | |
326 | + * We don't call __choose_pgpath() here to avoid to trigger | |
327 | + * pg_init just by busy checking. | |
328 | + * So we don't know whether underlying devices we will be using | |
329 | + * at next mapping time are busy or not. Just try mapping. | |
330 | + */ | |
331 | + goto out; | |
332 | + | |
333 | + /* | |
334 | + * If there is one non-busy active path at least, the path selector | |
335 | + * will be able to select it. So we consider such a pg as not busy. | |
336 | + */ | |
337 | + busy = 1; | |
338 | + list_for_each_entry(pgpath, &pg->pgpaths, list) | |
339 | + if (pgpath->is_active) { | |
340 | + has_active = 1; | |
341 | + | |
342 | + if (!__pgpath_busy(pgpath)) { | |
343 | + busy = 0; | |
344 | + break; | |
345 | + } | |
346 | + } | |
347 | + | |
348 | + if (!has_active) | |
349 | + /* | |
350 | + * No active path in this pg, so this pg won't be used and | |
351 | + * the current_pg will be changed at next mapping time. | |
352 | + * We need to try mapping to determine it. | |
353 | + */ | |
354 | + busy = 0; | |
355 | + | |
356 | +out: | |
357 | + spin_unlock_irqrestore(&m->lock, flags); | |
358 | + | |
359 | + return busy; | |
360 | +} | |
361 | + | |
362 | /*----------------------------------------------------------------- | |
363 | * Module setup | |
364 | *---------------------------------------------------------------*/ | |
365 | @@ -1447,13 +1508,14 @@ static struct target_type multipath_targ | |
366 | .module = THIS_MODULE, | |
367 | .ctr = multipath_ctr, | |
368 | .dtr = multipath_dtr, | |
369 | - .map = multipath_map, | |
370 | - .end_io = multipath_end_io, | |
371 | + .map_rq = multipath_map, | |
372 | + .rq_end_io = multipath_end_io, | |
373 | .presuspend = multipath_presuspend, | |
374 | .resume = multipath_resume, | |
375 | .status = multipath_status, | |
376 | .message = multipath_message, | |
377 | .ioctl = multipath_ioctl, | |
378 | + .busy = multipath_busy, | |
379 | }; | |
380 | ||
381 | static int __init dm_multipath_init(void) | |
382 | --- a/drivers/md/dm-table.c | |
383 | +++ b/drivers/md/dm-table.c | |
384 | @@ -108,6 +108,8 @@ static void combine_restrictions_low(str | |
385 | lhs->bounce_pfn = min_not_zero(lhs->bounce_pfn, rhs->bounce_pfn); | |
386 | ||
387 | lhs->no_cluster |= rhs->no_cluster; | |
388 | + | |
389 | + lhs->no_request_stacking |= rhs->no_request_stacking; | |
390 | } | |
391 | ||
392 | /* | |
393 | @@ -522,6 +524,8 @@ void dm_set_device_limits(struct dm_targ | |
394 | rs->bounce_pfn = min_not_zero(rs->bounce_pfn, q->bounce_pfn); | |
395 | ||
396 | rs->no_cluster |= !test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags); | |
397 | + | |
398 | + rs->no_request_stacking |= !blk_queue_stackable(q); | |
399 | } | |
400 | EXPORT_SYMBOL_GPL(dm_set_device_limits); | |
401 | ||
402 | @@ -731,6 +735,66 @@ int dm_table_add_target(struct dm_table | |
403 | return r; | |
404 | } | |
405 | ||
406 | +int dm_table_set_type(struct dm_table *t) | |
407 | +{ | |
408 | + int i; | |
409 | + int bio_based = 0, request_based = 0; | |
410 | + struct dm_target *tgt; | |
411 | + | |
412 | + for (i = 0; i < t->num_targets; i++) { | |
413 | + tgt = t->targets + i; | |
414 | + if (tgt->type->map_rq) | |
415 | + request_based = 1; | |
416 | + else | |
417 | + bio_based = 1; | |
418 | + | |
419 | + if (bio_based && request_based) { | |
420 | + DMWARN("Inconsistent table: different target types" | |
421 | + " can't be mixed up"); | |
422 | + return -EINVAL; | |
423 | + } | |
424 | + } | |
425 | + | |
426 | + if (bio_based) { | |
427 | + /* We must use this table as bio-based */ | |
428 | + t->limits.no_request_stacking = 1; | |
429 | + return 0; | |
430 | + } | |
431 | + | |
432 | + BUG_ON(!request_based); /* No targets in this table */ | |
433 | + | |
434 | + /* Non-request-stackable devices can't be used for request-based dm */ | |
435 | + if (t->limits.no_request_stacking) { | |
436 | + DMWARN("table load rejected: including non-request-stackable" | |
437 | + " devices"); | |
438 | + return -EINVAL; | |
439 | + } | |
440 | + | |
441 | + /* | |
442 | + * Request-based dm supports only tables that have a single target now. | |
443 | + * To support multiple targets, request splitting support is needed, | |
444 | + * and that needs lots of changes in the block-layer. | |
445 | + * (e.g. request completion process for partial completion.) | |
446 | + */ | |
447 | + if (t->num_targets > 1) { | |
448 | + DMWARN("Request-based dm doesn't support multiple targets yet"); | |
449 | + return -EINVAL; | |
450 | + } | |
451 | + | |
452 | + return 0; | |
453 | +} | |
454 | + | |
455 | +int dm_table_get_type(struct dm_table *t) | |
456 | +{ | |
457 | + return t->limits.no_request_stacking ? | |
458 | + DM_TYPE_BIO_BASED : DM_TYPE_REQUEST_BASED; | |
459 | +} | |
460 | + | |
461 | +int dm_table_request_based(struct dm_table *t) | |
462 | +{ | |
463 | + return dm_table_get_type(t) == DM_TYPE_REQUEST_BASED; | |
464 | +} | |
465 | + | |
466 | static int setup_indexes(struct dm_table *t) | |
467 | { | |
468 | int i; | |
469 | @@ -861,6 +925,10 @@ void dm_table_set_restrictions(struct dm | |
470 | else | |
471 | queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, q); | |
472 | ||
473 | + if (t->limits.no_request_stacking) | |
474 | + queue_flag_clear_unlocked(QUEUE_FLAG_STACKABLE, q); | |
475 | + else | |
476 | + queue_flag_set_unlocked(QUEUE_FLAG_STACKABLE, q); | |
477 | } | |
478 | ||
479 | unsigned int dm_table_get_num_targets(struct dm_table *t) | |
480 | @@ -949,6 +1017,20 @@ int dm_table_any_congested(struct dm_tab | |
481 | return r; | |
482 | } | |
483 | ||
484 | +int dm_table_any_busy_target(struct dm_table *t) | |
485 | +{ | |
486 | + int i; | |
487 | + struct dm_target *ti; | |
488 | + | |
489 | + for (i = 0; i < t->num_targets; i++) { | |
490 | + ti = t->targets + i; | |
491 | + if (ti->type->busy && ti->type->busy(ti)) | |
492 | + return 1; | |
493 | + } | |
494 | + | |
495 | + return 0; | |
496 | +} | |
497 | + | |
498 | void dm_table_unplug_all(struct dm_table *t) | |
499 | { | |
500 | struct dm_dev *dd; | |
501 | --- a/drivers/md/dm.c | |
502 | +++ b/drivers/md/dm.c | |
503 | @@ -32,6 +32,7 @@ static unsigned int _major = 0; | |
504 | ||
505 | static DEFINE_SPINLOCK(_minor_lock); | |
506 | /* | |
507 | + * For bio based dm. | |
508 | * One of these is allocated per bio. | |
509 | */ | |
510 | struct dm_io { | |
511 | @@ -43,6 +44,7 @@ struct dm_io { | |
512 | }; | |
513 | ||
514 | /* | |
515 | + * For bio based dm. | |
516 | * One of these is allocated per target within a bio. Hopefully | |
517 | * this will be simplified out one day. | |
518 | */ | |
519 | @@ -52,6 +54,31 @@ struct dm_target_io { | |
520 | union map_info info; | |
521 | }; | |
522 | ||
523 | +/* | |
524 | + * For request based dm. | |
525 | + * One of these is allocated per request. | |
526 | + * | |
527 | + * Since assuming "original request : cloned request = 1 : 1" and | |
528 | + * a counter for number of clones like struct dm_io.io_count isn't needed, | |
529 | + * struct dm_io and struct target_io can be merged. | |
530 | + */ | |
531 | +struct dm_rq_target_io { | |
532 | + struct mapped_device *md; | |
533 | + struct dm_target *ti; | |
534 | + struct request *orig, clone; | |
535 | + int error; | |
536 | + union map_info info; | |
537 | +}; | |
538 | + | |
539 | +/* | |
540 | + * For request based dm. | |
541 | + * One of these is allocated per bio. | |
542 | + */ | |
543 | +struct dm_clone_bio_info { | |
544 | + struct bio *orig; | |
545 | + struct request *rq; | |
546 | +}; | |
547 | + | |
548 | union map_info *dm_get_mapinfo(struct bio *bio) | |
549 | { | |
550 | if (bio && bio->bi_private) | |
551 | @@ -59,6 +86,14 @@ union map_info *dm_get_mapinfo(struct bi | |
552 | return NULL; | |
553 | } | |
554 | ||
555 | +union map_info *dm_get_rq_mapinfo(struct request *rq) | |
556 | +{ | |
557 | + if (rq && rq->end_io_data) | |
558 | + return &((struct dm_rq_target_io *)rq->end_io_data)->info; | |
559 | + return NULL; | |
560 | +} | |
561 | +EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo); | |
562 | + | |
563 | #define MINOR_ALLOCED ((void *)-1) | |
564 | ||
565 | /* | |
566 | @@ -76,7 +111,6 @@ union map_info *dm_get_mapinfo(struct bi | |
567 | */ | |
568 | struct dm_wq_req { | |
569 | enum { | |
570 | - DM_WQ_FLUSH_ALL, | |
571 | DM_WQ_FLUSH_DEFERRED, | |
572 | } type; | |
573 | struct work_struct work; | |
574 | @@ -126,6 +160,8 @@ struct mapped_device { | |
575 | ||
576 | struct bio_set *bs; | |
577 | ||
578 | + unsigned int mempool_type; /* Type of mempools above. */ | |
579 | + | |
580 | /* | |
581 | * Event handling. | |
582 | */ | |
583 | @@ -143,52 +179,74 @@ struct mapped_device { | |
584 | ||
585 | /* forced geometry settings */ | |
586 | struct hd_geometry geometry; | |
587 | + | |
588 | + /* marker of flush suspend for request-based dm */ | |
589 | + struct request suspend_rq; | |
590 | + | |
591 | + /* For saving the address of __make_request for request based dm */ | |
592 | + make_request_fn *saved_make_request_fn; | |
593 | }; | |
594 | ||
595 | #define MIN_IOS 256 | |
596 | static struct kmem_cache *_io_cache; | |
597 | static struct kmem_cache *_tio_cache; | |
598 | +static struct kmem_cache *_rq_tio_cache; | |
599 | +static struct kmem_cache *_bio_info_cache; | |
600 | ||
601 | static int __init local_init(void) | |
602 | { | |
603 | - int r; | |
604 | + int r = -ENOMEM; | |
605 | ||
606 | /* allocate a slab for the dm_ios */ | |
607 | _io_cache = KMEM_CACHE(dm_io, 0); | |
608 | if (!_io_cache) | |
609 | - return -ENOMEM; | |
610 | + return r; | |
611 | ||
612 | /* allocate a slab for the target ios */ | |
613 | _tio_cache = KMEM_CACHE(dm_target_io, 0); | |
614 | - if (!_tio_cache) { | |
615 | - kmem_cache_destroy(_io_cache); | |
616 | - return -ENOMEM; | |
617 | - } | |
618 | + if (!_tio_cache) | |
619 | + goto out_free_io_cache; | |
620 | + | |
621 | + _rq_tio_cache = KMEM_CACHE(dm_rq_target_io, 0); | |
622 | + if (!_rq_tio_cache) | |
623 | + goto out_free_tio_cache; | |
624 | + | |
625 | + _bio_info_cache = KMEM_CACHE(dm_clone_bio_info, 0); | |
626 | + if (!_bio_info_cache) | |
627 | + goto out_free_rq_tio_cache; | |
628 | ||
629 | r = dm_uevent_init(); | |
630 | - if (r) { | |
631 | - kmem_cache_destroy(_tio_cache); | |
632 | - kmem_cache_destroy(_io_cache); | |
633 | - return r; | |
634 | - } | |
635 | + if (r) | |
636 | + goto out_free_bio_info_cache; | |
637 | ||
638 | _major = major; | |
639 | r = register_blkdev(_major, _name); | |
640 | - if (r < 0) { | |
641 | - kmem_cache_destroy(_tio_cache); | |
642 | - kmem_cache_destroy(_io_cache); | |
643 | - dm_uevent_exit(); | |
644 | - return r; | |
645 | - } | |
646 | + if (r < 0) | |
647 | + goto out_uevent_exit; | |
648 | ||
649 | if (!_major) | |
650 | _major = r; | |
651 | ||
652 | return 0; | |
653 | + | |
654 | +out_uevent_exit: | |
655 | + dm_uevent_exit(); | |
656 | +out_free_bio_info_cache: | |
657 | + kmem_cache_destroy(_bio_info_cache); | |
658 | +out_free_rq_tio_cache: | |
659 | + kmem_cache_destroy(_rq_tio_cache); | |
660 | +out_free_tio_cache: | |
661 | + kmem_cache_destroy(_tio_cache); | |
662 | +out_free_io_cache: | |
663 | + kmem_cache_destroy(_io_cache); | |
664 | + | |
665 | + return r; | |
666 | } | |
667 | ||
668 | static void local_exit(void) | |
669 | { | |
670 | + kmem_cache_destroy(_bio_info_cache); | |
671 | + kmem_cache_destroy(_rq_tio_cache); | |
672 | kmem_cache_destroy(_tio_cache); | |
673 | kmem_cache_destroy(_io_cache); | |
674 | unregister_blkdev(_major, _name); | |
675 | @@ -380,6 +438,28 @@ static void free_tio(struct mapped_devic | |
676 | mempool_free(tio, md->tio_pool); | |
677 | } | |
678 | ||
679 | +static inline struct dm_rq_target_io *alloc_rq_tio(struct mapped_device *md) | |
680 | +{ | |
681 | + return mempool_alloc(md->tio_pool, GFP_ATOMIC); | |
682 | +} | |
683 | + | |
684 | +static inline void free_rq_tio(struct mapped_device *md, | |
685 | + struct dm_rq_target_io *tio) | |
686 | +{ | |
687 | + mempool_free(tio, md->tio_pool); | |
688 | +} | |
689 | + | |
690 | +static inline struct dm_clone_bio_info *alloc_bio_info(struct mapped_device *md) | |
691 | +{ | |
692 | + return mempool_alloc(md->io_pool, GFP_ATOMIC); | |
693 | +} | |
694 | + | |
695 | +static inline void free_bio_info(struct mapped_device *md, | |
696 | + struct dm_clone_bio_info *info) | |
697 | +{ | |
698 | + mempool_free(info, md->io_pool); | |
699 | +} | |
700 | + | |
701 | static void start_io_acct(struct dm_io *io) | |
702 | { | |
703 | struct mapped_device *md = io->md; | |
704 | @@ -568,6 +648,266 @@ static void clone_endio(struct bio *bio, | |
705 | free_tio(md, tio); | |
706 | } | |
707 | ||
708 | +/* | |
709 | + * Partial completion handling for request-based dm | |
710 | + */ | |
711 | +static void end_clone_bio(struct bio *clone, int error) | |
712 | +{ | |
713 | + struct dm_clone_bio_info *info = clone->bi_private; | |
714 | + struct dm_rq_target_io *tio = info->rq->end_io_data; | |
715 | + struct bio *bio = info->orig; | |
716 | + unsigned int nr_bytes = info->orig->bi_size; | |
717 | + | |
718 | + free_bio_info(tio->md, info); | |
719 | + clone->bi_private = tio->md->bs; | |
720 | + bio_put(clone); | |
721 | + | |
722 | + if (tio->error) { | |
723 | + /* | |
724 | + * An error has already been detected on the request. | |
725 | + * Once error occurred, just let clone->end_io() handle | |
726 | + * the remainder. | |
727 | + */ | |
728 | + return; | |
729 | + } else if (error) { | |
730 | + /* | |
731 | + * Don't notice the error to the upper layer yet. | |
732 | + * The error handling decision is made by the target driver, | |
733 | + * when the request is completed. | |
734 | + */ | |
735 | + tio->error = error; | |
736 | + return; | |
737 | + } | |
738 | + | |
739 | + /* | |
740 | + * I/O for the bio successfully completed. | |
741 | + * Notice the data completion to the upper layer. | |
742 | + */ | |
743 | + | |
744 | + /* | |
745 | + * bios are processed from the head of the list. | |
746 | + * So the completing bio should always be rq->bio. | |
747 | + * If it's not, something wrong is happening. | |
748 | + */ | |
749 | + if (tio->orig->bio != bio) | |
750 | + DMERR("bio completion is going in the middle of the request"); | |
751 | + | |
752 | + /* | |
753 | + * Update the original request. | |
754 | + * Do not use blk_end_request() here, because it may complete | |
755 | + * the original request before the clone, and break the ordering. | |
756 | + */ | |
757 | + blk_update_request(tio->orig, 0, nr_bytes); | |
758 | +} | |
759 | + | |
760 | +static void free_bio_clone(struct request *clone) | |
761 | +{ | |
762 | + struct dm_rq_target_io *tio = clone->end_io_data; | |
763 | + struct mapped_device *md = tio->md; | |
764 | + struct bio *bio; | |
765 | + struct dm_clone_bio_info *info; | |
766 | + | |
767 | + while ((bio = clone->bio) != NULL) { | |
768 | + clone->bio = bio->bi_next; | |
769 | + | |
770 | + info = bio->bi_private; | |
771 | + free_bio_info(md, info); | |
772 | + | |
773 | + bio->bi_private = md->bs; | |
774 | + bio_put(bio); | |
775 | + } | |
776 | +} | |
777 | + | |
778 | +static void dec_rq_pending(struct dm_rq_target_io *tio) | |
779 | +{ | |
780 | + if (!atomic_dec_return(&tio->md->pending)) | |
781 | + /* nudge anyone waiting on suspend queue */ | |
782 | + wake_up(&tio->md->wait); | |
783 | +} | |
784 | + | |
785 | +static void dm_unprep_request(struct request *rq) | |
786 | +{ | |
787 | + struct request *clone = rq->special; | |
788 | + struct dm_rq_target_io *tio = clone->end_io_data; | |
789 | + | |
790 | + rq->special = NULL; | |
791 | + rq->cmd_flags &= ~REQ_DONTPREP; | |
792 | + | |
793 | + free_bio_clone(clone); | |
794 | + dec_rq_pending(tio); | |
795 | + free_rq_tio(tio->md, tio); | |
796 | +} | |
797 | + | |
798 | +/* | |
799 | + * Requeue the original request of a clone. | |
800 | + */ | |
801 | +void dm_requeue_request(struct request *clone) | |
802 | +{ | |
803 | + struct dm_rq_target_io *tio = clone->end_io_data; | |
804 | + struct request *rq = tio->orig; | |
805 | + struct request_queue *q = rq->q; | |
806 | + unsigned long flags; | |
807 | + | |
808 | + dm_unprep_request(rq); | |
809 | + | |
810 | + spin_lock_irqsave(q->queue_lock, flags); | |
811 | + if (elv_queue_empty(q)) | |
812 | + blk_plug_device(q); | |
813 | + blk_requeue_request(q, rq); | |
814 | + spin_unlock_irqrestore(q->queue_lock, flags); | |
815 | +} | |
816 | +EXPORT_SYMBOL_GPL(dm_requeue_request); | |
817 | + | |
818 | +static inline void __stop_queue(struct request_queue *q) | |
819 | +{ | |
820 | + blk_stop_queue(q); | |
821 | +} | |
822 | + | |
823 | +static void stop_queue(struct request_queue *q) | |
824 | +{ | |
825 | + unsigned long flags; | |
826 | + | |
827 | + spin_lock_irqsave(q->queue_lock, flags); | |
828 | + __stop_queue(q); | |
829 | + spin_unlock_irqrestore(q->queue_lock, flags); | |
830 | +} | |
831 | + | |
832 | +static inline void __start_queue(struct request_queue *q) | |
833 | +{ | |
834 | + if (blk_queue_stopped(q)) | |
835 | + blk_start_queue(q); | |
836 | +} | |
837 | + | |
838 | +static void start_queue(struct request_queue *q) | |
839 | +{ | |
840 | + unsigned long flags; | |
841 | + | |
842 | + spin_lock_irqsave(q->queue_lock, flags); | |
843 | + __start_queue(q); | |
844 | + spin_unlock_irqrestore(q->queue_lock, flags); | |
845 | +} | |
846 | + | |
847 | +/* | |
848 | + * Complete the clone and the original request | |
849 | + */ | |
850 | +static void dm_end_request(struct request *clone, int error) | |
851 | +{ | |
852 | + struct dm_rq_target_io *tio = clone->end_io_data; | |
853 | + struct request *rq = tio->orig; | |
854 | + struct request_queue *q = rq->q; | |
855 | + unsigned int nr_bytes = blk_rq_bytes(rq); | |
856 | + | |
857 | + if (blk_pc_request(rq)) { | |
858 | + rq->errors = clone->errors; | |
859 | + rq->data_len = clone->data_len; | |
860 | + | |
861 | + if (rq->sense) | |
862 | + /* | |
863 | + * We are using the sense buffer of the original | |
864 | + * request. | |
865 | + * So setting the length of the sense data is enough. | |
866 | + */ | |
867 | + rq->sense_len = clone->sense_len; | |
868 | + } | |
869 | + | |
870 | + free_bio_clone(clone); | |
871 | + dec_rq_pending(tio); | |
872 | + free_rq_tio(tio->md, tio); | |
873 | + | |
874 | + if (unlikely(blk_end_request(rq, error, nr_bytes))) | |
875 | + BUG(); | |
876 | + | |
877 | + blk_run_queue(q); | |
878 | +} | |
879 | + | |
880 | +/* | |
881 | + * Request completion handler for request-based dm | |
882 | + */ | |
883 | +static void dm_softirq_done(struct request *rq) | |
884 | +{ | |
885 | + struct request *clone = rq->completion_data; | |
886 | + struct dm_rq_target_io *tio = clone->end_io_data; | |
887 | + dm_request_endio_fn rq_end_io = tio->ti->type->rq_end_io; | |
888 | + int error = tio->error; | |
889 | + int r; | |
890 | + | |
891 | + if (rq->cmd_flags & REQ_FAILED) | |
892 | + goto end_request; | |
893 | + | |
894 | + if (rq_end_io) { | |
895 | + r = rq_end_io(tio->ti, clone, error, &tio->info); | |
896 | + if (r <= 0) | |
897 | + /* The target wants to complete the I/O */ | |
898 | + error = r; | |
899 | + else if (r == DM_ENDIO_INCOMPLETE) | |
900 | + /* The target will handle the I/O */ | |
901 | + return; | |
902 | + else if (r == DM_ENDIO_REQUEUE) { | |
903 | + /* | |
904 | + * The target wants to requeue the I/O. | |
905 | + * Don't invoke blk_run_queue() so that the requeued | |
906 | + * request won't be dispatched again soon. | |
907 | + */ | |
908 | + dm_requeue_request(clone); | |
909 | + return; | |
910 | + } else { | |
911 | + DMWARN("unimplemented target endio return value: %d", | |
912 | + r); | |
913 | + BUG(); | |
914 | + } | |
915 | + } | |
916 | + | |
917 | +end_request: | |
918 | + dm_end_request(clone, error); | |
919 | +} | |
920 | + | |
921 | +/* | |
922 | + * Called with the queue lock held | |
923 | + */ | |
924 | +static void end_clone_request(struct request *clone, int error) | |
925 | +{ | |
926 | + struct dm_rq_target_io *tio = clone->end_io_data; | |
927 | + struct request *rq = tio->orig; | |
928 | + | |
929 | + /* | |
930 | + * For just cleaning up the information of the queue in which | |
931 | + * the clone was dispatched. | |
932 | + * The clone is *NOT* freed actually here because it is alloced from | |
933 | + * dm own mempool and REQ_ALLOCED isn't set in clone->cmd_flags. | |
934 | + */ | |
935 | + __blk_put_request(clone->q, clone); | |
936 | + | |
937 | + /* | |
938 | + * Actual request completion is done in a softirq context which doesn't | |
939 | + * hold the queue lock. Otherwise, deadlock could occur because: | |
940 | + * - another request may be submitted by the upper level driver | |
941 | + * of the stacking during the completion | |
942 | + * - the submission which requires queue lock may be done | |
943 | + * against this queue | |
944 | + */ | |
945 | + tio->error = error; | |
946 | + rq->completion_data = clone; | |
947 | + blk_complete_request(rq); | |
948 | +} | |
949 | + | |
950 | +/* | |
951 | + * Complete the original request of a clone with an error status. | |
952 | + * Target's rq_end_io() function isn't called. | |
953 | + * This may be used by target's map_rq() function when the mapping fails. | |
954 | + */ | |
955 | +void dm_kill_request(struct request *clone, int error) | |
956 | +{ | |
957 | + struct dm_rq_target_io *tio = clone->end_io_data; | |
958 | + struct request *rq = tio->orig; | |
959 | + | |
960 | + tio->error = error; | |
961 | + /* Avoid printing "I/O error" message, since we didn't I/O actually */ | |
962 | + rq->cmd_flags |= (REQ_FAILED | REQ_QUIET); | |
963 | + rq->completion_data = clone; | |
964 | + blk_complete_request(rq); | |
965 | +} | |
966 | +EXPORT_SYMBOL_GPL(dm_kill_request); | |
967 | + | |
968 | static sector_t max_io_len(struct mapped_device *md, | |
969 | sector_t sector, struct dm_target *ti) | |
970 | { | |
971 | @@ -886,7 +1226,7 @@ out: | |
972 | * The request function that just remaps the bio built up by | |
973 | * dm_merge_bvec. | |
974 | */ | |
975 | -static int dm_request(struct request_queue *q, struct bio *bio) | |
976 | +static int _dm_request(struct request_queue *q, struct bio *bio) | |
977 | { | |
978 | int r = -EIO; | |
979 | int rw = bio_data_dir(bio); | |
980 | @@ -936,12 +1276,335 @@ out_req: | |
981 | return 0; | |
982 | } | |
983 | ||
984 | +static int dm_make_request(struct request_queue *q, struct bio *bio) | |
985 | +{ | |
986 | + struct mapped_device *md = (struct mapped_device *)q->queuedata; | |
987 | + | |
988 | + if (unlikely(bio_barrier(bio))) { | |
989 | + bio_endio(bio, -EOPNOTSUPP); | |
990 | + return 0; | |
991 | + } | |
992 | + | |
993 | + if (unlikely(!md->map)) { | |
994 | + bio_endio(bio, -EIO); | |
995 | + return 0; | |
996 | + } | |
997 | + | |
998 | + return md->saved_make_request_fn(q, bio); /* call __make_request() */ | |
999 | +} | |
1000 | + | |
1001 | +static inline int dm_request_based(struct mapped_device *md) | |
1002 | +{ | |
1003 | + return blk_queue_stackable(md->queue); | |
1004 | +} | |
1005 | + | |
1006 | +static int dm_request(struct request_queue *q, struct bio *bio) | |
1007 | +{ | |
1008 | + struct mapped_device *md = q->queuedata; | |
1009 | + | |
1010 | + if (dm_request_based(md)) | |
1011 | + return dm_make_request(q, bio); | |
1012 | + | |
1013 | + return _dm_request(q, bio); | |
1014 | +} | |
1015 | + | |
1016 | +void dm_dispatch_request(struct request *rq) | |
1017 | +{ | |
1018 | + int r; | |
1019 | + | |
1020 | + rq->start_time = jiffies; | |
1021 | + r = blk_insert_cloned_request(rq->q, rq); | |
1022 | + if (r) | |
1023 | + dm_kill_request(rq, r); | |
1024 | +} | |
1025 | +EXPORT_SYMBOL_GPL(dm_dispatch_request); | |
1026 | + | |
1027 | +static void copy_request_info(struct request *clone, struct request *rq) | |
1028 | +{ | |
1029 | + clone->cmd_flags = (rq_data_dir(rq) | REQ_NOMERGE); | |
1030 | + clone->cmd_type = rq->cmd_type; | |
1031 | + clone->sector = rq->sector; | |
1032 | + clone->hard_sector = rq->hard_sector; | |
1033 | + clone->nr_sectors = rq->nr_sectors; | |
1034 | + clone->hard_nr_sectors = rq->hard_nr_sectors; | |
1035 | + clone->current_nr_sectors = rq->current_nr_sectors; | |
1036 | + clone->hard_cur_sectors = rq->hard_cur_sectors; | |
1037 | + clone->nr_phys_segments = rq->nr_phys_segments; | |
1038 | + clone->ioprio = rq->ioprio; | |
1039 | + clone->buffer = rq->buffer; | |
1040 | + clone->cmd_len = rq->cmd_len; | |
1041 | + if (rq->cmd_len) | |
1042 | + clone->cmd = rq->cmd; | |
1043 | + clone->data_len = rq->data_len; | |
1044 | + clone->extra_len = rq->extra_len; | |
1045 | + clone->sense_len = rq->sense_len; | |
1046 | + clone->data = rq->data; | |
1047 | + clone->sense = rq->sense; | |
1048 | +} | |
1049 | + | |
1050 | +static int clone_request_bios(struct request *clone, struct request *rq, | |
1051 | + struct mapped_device *md) | |
1052 | +{ | |
1053 | + struct bio *bio, *clone_bio; | |
1054 | + struct dm_clone_bio_info *info; | |
1055 | + | |
1056 | + for (bio = rq->bio; bio; bio = bio->bi_next) { | |
1057 | + info = alloc_bio_info(md); | |
1058 | + if (!info) | |
1059 | + goto free_and_out; | |
1060 | + | |
1061 | + clone_bio = bio_alloc_bioset(GFP_ATOMIC, bio->bi_max_vecs, | |
1062 | + md->bs); | |
1063 | + if (!clone_bio) { | |
1064 | + free_bio_info(md, info); | |
1065 | + goto free_and_out; | |
1066 | + } | |
1067 | + | |
1068 | + __bio_clone(clone_bio, bio); | |
1069 | + clone_bio->bi_destructor = dm_bio_destructor; | |
1070 | + clone_bio->bi_end_io = end_clone_bio; | |
1071 | + info->rq = clone; | |
1072 | + info->orig = bio; | |
1073 | + clone_bio->bi_private = info; | |
1074 | + | |
1075 | + if (clone->bio) { | |
1076 | + clone->biotail->bi_next = clone_bio; | |
1077 | + clone->biotail = clone_bio; | |
1078 | + } else | |
1079 | + clone->bio = clone->biotail = clone_bio; | |
1080 | + } | |
1081 | + | |
1082 | + return 0; | |
1083 | + | |
1084 | +free_and_out: | |
1085 | + free_bio_clone(clone); | |
1086 | + | |
1087 | + return -ENOMEM; | |
1088 | +} | |
1089 | + | |
1090 | +static int setup_clone(struct request *clone, struct request *rq, | |
1091 | + struct dm_rq_target_io *tio) | |
1092 | +{ | |
1093 | + int r; | |
1094 | + | |
1095 | + blk_rq_init(NULL, clone); | |
1096 | + | |
1097 | + r = clone_request_bios(clone, rq, tio->md); | |
1098 | + if (r) | |
1099 | + return r; | |
1100 | + | |
1101 | + copy_request_info(clone, rq); | |
1102 | + clone->start_time = jiffies; | |
1103 | + clone->end_io = end_clone_request; | |
1104 | + clone->end_io_data = tio; | |
1105 | + | |
1106 | + return 0; | |
1107 | +} | |
1108 | + | |
1109 | +static inline int dm_flush_suspending(struct mapped_device *md) | |
1110 | +{ | |
1111 | + return !md->suspend_rq.data; | |
1112 | +} | |
1113 | + | |
1114 | +/* | |
1115 | + * Called with the queue lock held. | |
1116 | + */ | |
1117 | +static int dm_prep_fn(struct request_queue *q, struct request *rq) | |
1118 | +{ | |
1119 | + struct mapped_device *md = (struct mapped_device *)q->queuedata; | |
1120 | + struct dm_rq_target_io *tio; | |
1121 | + struct request *clone; | |
1122 | + | |
1123 | + if (unlikely(rq == &md->suspend_rq)) { /* Flush suspend marker */ | |
1124 | + if (dm_flush_suspending(md)) { | |
1125 | + if (q->in_flight) | |
1126 | + return BLKPREP_DEFER; | |
1127 | + else { | |
1128 | + /* This device should be quiet now */ | |
1129 | + __stop_queue(q); | |
1130 | + smp_mb(); | |
1131 | + BUG_ON(atomic_read(&md->pending)); | |
1132 | + wake_up(&md->wait); | |
1133 | + return BLKPREP_KILL; | |
1134 | + } | |
1135 | + } else | |
1136 | + /* | |
1137 | + * The suspend process was interrupted. | |
1138 | + * So no need to suspend now. | |
1139 | + */ | |
1140 | + return BLKPREP_KILL; | |
1141 | + } | |
1142 | + | |
1143 | + if (unlikely(rq->special)) { | |
1144 | + DMWARN("Already has something in rq->special."); | |
1145 | + return BLKPREP_KILL; | |
1146 | + } | |
1147 | + | |
1148 | + if (unlikely(!dm_request_based(md))) { | |
1149 | + DMWARN("Request was queued into bio-based device"); | |
1150 | + return BLKPREP_KILL; | |
1151 | + } | |
1152 | + | |
1153 | + tio = alloc_rq_tio(md); /* Only one for each original request */ | |
1154 | + if (!tio) | |
1155 | + /* -ENOMEM */ | |
1156 | + return BLKPREP_DEFER; | |
1157 | + | |
1158 | + tio->md = md; | |
1159 | + tio->ti = NULL; | |
1160 | + tio->orig = rq; | |
1161 | + tio->error = 0; | |
1162 | + memset(&tio->info, 0, sizeof(tio->info)); | |
1163 | + | |
1164 | + clone = &tio->clone; | |
1165 | + if (setup_clone(clone, rq, tio)) { | |
1166 | + /* -ENOMEM */ | |
1167 | + free_rq_tio(md, tio); | |
1168 | + return BLKPREP_DEFER; | |
1169 | + } | |
1170 | + | |
1171 | + rq->special = clone; | |
1172 | + rq->cmd_flags |= REQ_DONTPREP; | |
1173 | + | |
1174 | + return BLKPREP_OK; | |
1175 | +} | |
1176 | + | |
1177 | +static void map_request(struct dm_target *ti, struct request *rq, | |
1178 | + struct mapped_device *md) | |
1179 | +{ | |
1180 | + int r; | |
1181 | + struct request *clone = rq->special; | |
1182 | + struct dm_rq_target_io *tio = clone->end_io_data; | |
1183 | + | |
1184 | + tio->ti = ti; | |
1185 | + atomic_inc(&md->pending); | |
1186 | + | |
1187 | + /* | |
1188 | + * Although submitted requests to the md->queue are checked against | |
1189 | + * the table/queue limitations at the submission time, the limitations | |
1190 | + * may be changed by a table swapping while those already checked | |
1191 | + * requests are in the md->queue. | |
1192 | + * If the limitations have been shrunk in such situations, we may be | |
1193 | + * dispatching requests violating the current limitations here. | |
1194 | + * Since struct request is a reliable one in the block-layer | |
1195 | + * and device drivers, dispatching such requests is dangerous. | |
1196 | + * (e.g. it may cause kernel panic easily.) | |
1197 | + * Avoid to dispatch such problematic requests in request-based dm. | |
1198 | + * | |
1199 | + * Since dm_kill_request() decrements the md->pending, this have to | |
1200 | + * be done after incrementing the md->pending. | |
1201 | + */ | |
1202 | + r = blk_rq_check_limits(rq->q, rq); | |
1203 | + if (unlikely(r)) { | |
1204 | + DMWARN("violating the queue limitation. the limitation may be" | |
1205 | + " shrunk while there are some requests in the queue."); | |
1206 | + dm_kill_request(clone, r); | |
1207 | + return; | |
1208 | + } | |
1209 | + | |
1210 | + r = ti->type->map_rq(ti, clone, &tio->info); | |
1211 | + switch (r) { | |
1212 | + case DM_MAPIO_SUBMITTED: | |
1213 | + /* The target has taken the I/O to submit by itself later */ | |
1214 | + break; | |
1215 | + case DM_MAPIO_REMAPPED: | |
1216 | + /* The target has remapped the I/O so dispatch it */ | |
1217 | + dm_dispatch_request(clone); | |
1218 | + break; | |
1219 | + case DM_MAPIO_REQUEUE: | |
1220 | + /* The target wants to requeue the I/O */ | |
1221 | + dm_requeue_request(clone); | |
1222 | + break; | |
1223 | + default: | |
1224 | + if (r > 0) { | |
1225 | + DMWARN("unimplemented target map return value: %d", r); | |
1226 | + BUG(); | |
1227 | + } | |
1228 | + | |
1229 | + /* The target wants to complete the I/O */ | |
1230 | + dm_kill_request(clone, r); | |
1231 | + break; | |
1232 | + } | |
1233 | +} | |
1234 | + | |
1235 | +/* | |
1236 | + * q->request_fn for request-based dm. | |
1237 | + * Called with the queue lock held. | |
1238 | + */ | |
1239 | +static void dm_request_fn(struct request_queue *q) | |
1240 | +{ | |
1241 | + struct mapped_device *md = (struct mapped_device *)q->queuedata; | |
1242 | + struct dm_table *map = dm_get_table(md); | |
1243 | + struct dm_target *ti; | |
1244 | + struct request *rq; | |
1245 | + | |
1246 | + /* | |
1247 | + * The check for blk_queue_stopped() needs here, because: | |
1248 | + * - device suspend uses blk_stop_queue() and expects that | |
1249 | + * no I/O will be dispatched any more after the queue stop | |
1250 | + * - generic_unplug_device() doesn't call q->request_fn() | |
1251 | + * when the queue is stopped, so no problem | |
1252 | + * - but underlying device drivers may call q->request_fn() | |
1253 | + * without the check through blk_run_queue() | |
1254 | + */ | |
1255 | + while (!blk_queue_plugged(q) && !blk_queue_stopped(q)) { | |
1256 | + rq = elv_next_request(q); | |
1257 | + if (!rq) | |
1258 | + goto plug_and_out; | |
1259 | + | |
1260 | + ti = dm_table_find_target(map, rq->sector); | |
1261 | + if (ti->type->busy && ti->type->busy(ti)) | |
1262 | + goto plug_and_out; | |
1263 | + | |
1264 | + blkdev_dequeue_request(rq); | |
1265 | + spin_unlock(q->queue_lock); | |
1266 | + map_request(ti, rq, md); | |
1267 | + spin_lock_irq(q->queue_lock); | |
1268 | + } | |
1269 | + | |
1270 | + goto out; | |
1271 | + | |
1272 | +plug_and_out: | |
1273 | + if (!elv_queue_empty(q)) | |
1274 | + /* Some requests still remain, retry later */ | |
1275 | + blk_plug_device(q); | |
1276 | + | |
1277 | +out: | |
1278 | + dm_table_put(map); | |
1279 | + | |
1280 | + return; | |
1281 | +} | |
1282 | + | |
1283 | +int dm_underlying_device_busy(struct request_queue *q) | |
1284 | +{ | |
1285 | + return blk_lld_busy(q); | |
1286 | +} | |
1287 | +EXPORT_SYMBOL_GPL(dm_underlying_device_busy); | |
1288 | + | |
1289 | +static int dm_lld_busy(struct request_queue *q) | |
1290 | +{ | |
1291 | + int r; | |
1292 | + struct mapped_device *md = q->queuedata; | |
1293 | + struct dm_table *map = dm_get_table(md); | |
1294 | + | |
1295 | + if (!map || test_bit(DMF_BLOCK_IO, &md->flags)) | |
1296 | + r = 1; | |
1297 | + else | |
1298 | + r = dm_table_any_busy_target(map); | |
1299 | + | |
1300 | + dm_table_put(map); | |
1301 | + return r; | |
1302 | +} | |
1303 | + | |
1304 | static void dm_unplug_all(struct request_queue *q) | |
1305 | { | |
1306 | struct mapped_device *md = q->queuedata; | |
1307 | struct dm_table *map = dm_get_table(md); | |
1308 | ||
1309 | if (map) { | |
1310 | + if (dm_request_based(md)) | |
1311 | + generic_unplug_device(q); | |
1312 | + | |
1313 | dm_table_unplug_all(map); | |
1314 | dm_table_put(map); | |
1315 | } | |
1316 | @@ -955,6 +1618,12 @@ static int dm_any_congested(void *conges | |
1317 | ||
1318 | if (!map || test_bit(DMF_BLOCK_IO, &md->flags)) | |
1319 | r = bdi_bits; | |
1320 | + else if (dm_request_based(md)) | |
1321 | + /* | |
1322 | + * Request-based dm cares about only own queue for | |
1323 | + * the query about congestion status of request_queue | |
1324 | + */ | |
1325 | + r = md->queue->backing_dev_info.state & bdi_bits; | |
1326 | else | |
1327 | r = dm_table_any_congested(map, bdi_bits); | |
1328 | ||
1329 | @@ -1075,10 +1744,22 @@ static struct mapped_device *alloc_dev(i | |
1330 | INIT_LIST_HEAD(&md->uevent_list); | |
1331 | spin_lock_init(&md->uevent_lock); | |
1332 | ||
1333 | - md->queue = blk_alloc_queue(GFP_KERNEL); | |
1334 | + md->queue = blk_init_queue(dm_request_fn, NULL); | |
1335 | if (!md->queue) | |
1336 | goto bad_queue; | |
1337 | ||
1338 | + /* | |
1339 | + * Request-based dm devices cannot be stacked on top of bio-based dm | |
1340 | + * devices. The type of this dm device has not been decided yet, | |
1341 | + * although we initialized the queue using blk_init_queue(). | |
1342 | + * The type is decided at the first table loading time. | |
1343 | + * To prevent problematic device stacking, clear the queue flag | |
1344 | + * for request stacking support until then. | |
1345 | + * | |
1346 | + * This queue is new, so no concurrency on the queue_flags. | |
1347 | + */ | |
1348 | + queue_flag_clear_unlocked(QUEUE_FLAG_STACKABLE, md->queue); | |
1349 | + md->saved_make_request_fn = md->queue->make_request_fn; | |
1350 | md->queue->queuedata = md; | |
1351 | md->queue->backing_dev_info.congested_fn = dm_any_congested; | |
1352 | md->queue->backing_dev_info.congested_data = md; | |
1353 | @@ -1086,18 +1767,9 @@ static struct mapped_device *alloc_dev(i | |
1354 | blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); | |
1355 | md->queue->unplug_fn = dm_unplug_all; | |
1356 | blk_queue_merge_bvec(md->queue, dm_merge_bvec); | |
1357 | - | |
1358 | - md->io_pool = mempool_create_slab_pool(MIN_IOS, _io_cache); | |
1359 | - if (!md->io_pool) | |
1360 | - goto bad_io_pool; | |
1361 | - | |
1362 | - md->tio_pool = mempool_create_slab_pool(MIN_IOS, _tio_cache); | |
1363 | - if (!md->tio_pool) | |
1364 | - goto bad_tio_pool; | |
1365 | - | |
1366 | - md->bs = bioset_create(16, 16); | |
1367 | - if (!md->bs) | |
1368 | - goto bad_no_bioset; | |
1369 | + blk_queue_softirq_done(md->queue, dm_softirq_done); | |
1370 | + blk_queue_prep_rq(md->queue, dm_prep_fn); | |
1371 | + blk_queue_lld_busy(md->queue, dm_lld_busy); | |
1372 | ||
1373 | md->disk = alloc_disk(1); | |
1374 | if (!md->disk) | |
1375 | @@ -1132,12 +1804,6 @@ static struct mapped_device *alloc_dev(i | |
1376 | bad_thread: | |
1377 | put_disk(md->disk); | |
1378 | bad_disk: | |
1379 | - bioset_free(md->bs); | |
1380 | -bad_no_bioset: | |
1381 | - mempool_destroy(md->tio_pool); | |
1382 | -bad_tio_pool: | |
1383 | - mempool_destroy(md->io_pool); | |
1384 | -bad_io_pool: | |
1385 | blk_cleanup_queue(md->queue); | |
1386 | bad_queue: | |
1387 | free_minor(minor); | |
1388 | @@ -1159,9 +1825,12 @@ static void free_dev(struct mapped_devic | |
1389 | bdput(md->suspended_bdev); | |
1390 | } | |
1391 | destroy_workqueue(md->wq); | |
1392 | - mempool_destroy(md->tio_pool); | |
1393 | - mempool_destroy(md->io_pool); | |
1394 | - bioset_free(md->bs); | |
1395 | + if (md->tio_pool) | |
1396 | + mempool_destroy(md->tio_pool); | |
1397 | + if (md->io_pool) | |
1398 | + mempool_destroy(md->io_pool); | |
1399 | + if (md->bs) | |
1400 | + bioset_free(md->bs); | |
1401 | del_gendisk(md->disk); | |
1402 | free_minor(minor); | |
1403 | ||
1404 | @@ -1224,6 +1893,16 @@ static int __bind(struct mapped_device * | |
1405 | dm_table_get(t); | |
1406 | dm_table_event_callback(t, event_callback, md); | |
1407 | ||
1408 | + /* | |
1409 | + * The queue hasn't been stopped yet, if the old table type wasn't | |
1410 | + * for request-based during suspension. So stop it to prevent | |
1411 | + * I/O mapping before resume. | |
1412 | + * This must be done before setting the queue restrictions, | |
1413 | + * because request-based dm may be run just after the setting. | |
1414 | + */ | |
1415 | + if (dm_table_request_based(t) && !blk_queue_stopped(q)) | |
1416 | + stop_queue(q); | |
1417 | + | |
1418 | write_lock(&md->map_lock); | |
1419 | md->map = t; | |
1420 | dm_table_set_restrictions(t, q); | |
1421 | @@ -1346,7 +2025,11 @@ static int dm_wait_for_completion(struct | |
1422 | set_current_state(TASK_INTERRUPTIBLE); | |
1423 | ||
1424 | smp_mb(); | |
1425 | - if (!atomic_read(&md->pending)) | |
1426 | + if (dm_request_based(md)) { | |
1427 | + if (!atomic_read(&md->pending) && | |
1428 | + blk_queue_stopped(md->queue)) | |
1429 | + break; | |
1430 | + } else if (!atomic_read(&md->pending)) | |
1431 | break; | |
1432 | ||
1433 | if (signal_pending(current)) { | |
1434 | @@ -1369,7 +2052,13 @@ static void __flush_deferred_io(struct m | |
1435 | struct bio *c; | |
1436 | ||
1437 | while ((c = bio_list_pop(&md->deferred))) { | |
1438 | - if (__split_bio(md, c)) | |
1439 | + /* | |
1440 | + * Some bios might have been queued here during suspension | |
1441 | + * before setting of request-based dm in resume | |
1442 | + */ | |
1443 | + if (dm_request_based(md)) | |
1444 | + generic_make_request(c); | |
1445 | + else if (__split_bio(md, c)) | |
1446 | bio_io_error(c); | |
1447 | } | |
1448 | ||
1449 | @@ -1394,9 +2083,6 @@ static void dm_wq_work(struct work_struc | |
1450 | ||
1451 | down_write(&md->io_lock); | |
1452 | switch (req->type) { | |
1453 | - case DM_WQ_FLUSH_ALL: | |
1454 | - __merge_pushback_list(md); | |
1455 | - /* pass through */ | |
1456 | case DM_WQ_FLUSH_DEFERRED: | |
1457 | __flush_deferred_io(md); | |
1458 | break; | |
1459 | @@ -1451,6 +2137,88 @@ out: | |
1460 | return r; | |
1461 | } | |
1462 | ||
1463 | +static inline void dm_invalidate_flush_suspend(struct mapped_device *md) | |
1464 | +{ | |
1465 | + md->suspend_rq.data = (void *)0x1; | |
1466 | +} | |
1467 | + | |
1468 | +static void dm_abort_suspend(struct mapped_device *md, int noflush) | |
1469 | +{ | |
1470 | + struct request_queue *q = md->queue; | |
1471 | + unsigned long flags; | |
1472 | + | |
1473 | + /* | |
1474 | + * For flush suspend, invalidation and queue restart must be protected | |
1475 | + * by a single queue lock to prevent a race with dm_prep_fn(). | |
1476 | + */ | |
1477 | + spin_lock_irqsave(q->queue_lock, flags); | |
1478 | + if (!noflush) | |
1479 | + dm_invalidate_flush_suspend(md); | |
1480 | + __start_queue(q); | |
1481 | + spin_unlock_irqrestore(q->queue_lock, flags); | |
1482 | +} | |
1483 | + | |
1484 | +/* | |
1485 | + * Additional suspend work for request-based dm. | |
1486 | + * | |
1487 | + * In request-based dm, stopping request_queue prevents mapping. | |
1488 | + * Even after stopping the request_queue, submitted requests from upper-layer | |
1489 | + * can be inserted to the request_queue. So original (unmapped) requests are | |
1490 | + * kept in the request_queue during suspension. | |
1491 | + */ | |
1492 | +static void dm_start_suspend(struct mapped_device *md, int noflush) | |
1493 | +{ | |
1494 | + struct request *rq = &md->suspend_rq; | |
1495 | + struct request_queue *q = md->queue; | |
1496 | + unsigned long flags; | |
1497 | + | |
1498 | + if (noflush) { | |
1499 | + stop_queue(q); | |
1500 | + return; | |
1501 | + } | |
1502 | + | |
1503 | + /* | |
1504 | + * For flush suspend, we need a marker to indicate the border line | |
1505 | + * between flush needed I/Os and deferred I/Os, since all I/Os are | |
1506 | + * queued in the request_queue during suspension. | |
1507 | + * | |
1508 | + * This marker must be inserted after setting DMF_BLOCK_IO, | |
1509 | + * because dm_prep_fn() considers no DMF_BLOCK_IO to be | |
1510 | + * a suspend interruption. | |
1511 | + */ | |
1512 | + spin_lock_irqsave(q->queue_lock, flags); | |
1513 | + if (unlikely(rq->ref_count)) { | |
1514 | + /* | |
1515 | + * This can happen when the previous suspend was interrupted, | |
1516 | + * the inserted suspend_rq for the previous suspend has still | |
1517 | + * been in the queue and this suspend has been invoked. | |
1518 | + * | |
1519 | + * We could re-insert the suspend_rq by deleting it from | |
1520 | + * the queue forcibly using list_del_init(&rq->queuelist). | |
1521 | + * But it would break the block-layer easily. | |
1522 | + * So we don't re-insert the suspend_rq again in such a case. | |
1523 | + * The suspend_rq should be already invalidated during | |
1524 | + * the previous suspend interruption, so just wait for it | |
1525 | + * to be completed. | |
1526 | + * | |
1527 | + * This suspend will never complete, so warn the user to | |
1528 | + * interrupt this suspend and retry later. | |
1529 | + */ | |
1530 | + BUG_ON(!rq->data); | |
1531 | + spin_unlock_irqrestore(q->queue_lock, flags); | |
1532 | + | |
1533 | + DMWARN("Invalidating the previous suspend is still in" | |
1534 | + " progress. This suspend will be never done." | |
1535 | + " Please interrupt this suspend and retry later."); | |
1536 | + return; | |
1537 | + } | |
1538 | + spin_unlock_irqrestore(q->queue_lock, flags); | |
1539 | + | |
1540 | + /* Now no user of the suspend_rq */ | |
1541 | + blk_rq_init(q, rq); | |
1542 | + blk_insert_request(q, rq, 0, NULL); | |
1543 | +} | |
1544 | + | |
1545 | /* | |
1546 | * Functions to lock and unlock any filesystem running on the | |
1547 | * device. | |
1548 | @@ -1526,7 +2294,7 @@ int dm_suspend(struct mapped_device *md, | |
1549 | if (!md->suspended_bdev) { | |
1550 | DMWARN("bdget failed in dm_suspend"); | |
1551 | r = -ENOMEM; | |
1552 | - goto flush_and_out; | |
1553 | + goto out; | |
1554 | } | |
1555 | ||
1556 | /* | |
1557 | @@ -1549,6 +2317,9 @@ int dm_suspend(struct mapped_device *md, | |
1558 | add_wait_queue(&md->wait, &wait); | |
1559 | up_write(&md->io_lock); | |
1560 | ||
1561 | + if (dm_request_based(md)) | |
1562 | + dm_start_suspend(md, noflush); | |
1563 | + | |
1564 | /* unplug */ | |
1565 | if (map) | |
1566 | dm_table_unplug_all(map); | |
1567 | @@ -1561,14 +2332,22 @@ int dm_suspend(struct mapped_device *md, | |
1568 | down_write(&md->io_lock); | |
1569 | remove_wait_queue(&md->wait, &wait); | |
1570 | ||
1571 | - if (noflush) | |
1572 | - __merge_pushback_list(md); | |
1573 | + if (noflush) { | |
1574 | + if (dm_request_based(md)) | |
1575 | + /* All requeued requests are already in md->queue */ | |
1576 | + clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); | |
1577 | + else | |
1578 | + __merge_pushback_list(md); | |
1579 | + } | |
1580 | up_write(&md->io_lock); | |
1581 | ||
1582 | /* were we interrupted ? */ | |
1583 | if (r < 0) { | |
1584 | dm_queue_flush(md, DM_WQ_FLUSH_DEFERRED, NULL); | |
1585 | ||
1586 | + if (dm_request_based(md)) | |
1587 | + dm_abort_suspend(md, noflush); | |
1588 | + | |
1589 | unlock_fs(md); | |
1590 | goto out; /* pushback list is already flushed, so skip flush */ | |
1591 | } | |
1592 | @@ -1577,14 +2356,6 @@ int dm_suspend(struct mapped_device *md, | |
1593 | ||
1594 | set_bit(DMF_SUSPENDED, &md->flags); | |
1595 | ||
1596 | -flush_and_out: | |
1597 | - if (r && noflush) | |
1598 | - /* | |
1599 | - * Because there may be already I/Os in the pushback list, | |
1600 | - * flush them before return. | |
1601 | - */ | |
1602 | - dm_queue_flush(md, DM_WQ_FLUSH_ALL, NULL); | |
1603 | - | |
1604 | out: | |
1605 | if (r && md->suspended_bdev) { | |
1606 | bdput(md->suspended_bdev); | |
1607 | @@ -1617,6 +2388,14 @@ int dm_resume(struct mapped_device *md) | |
1608 | ||
1609 | dm_queue_flush(md, DM_WQ_FLUSH_DEFERRED, NULL); | |
1610 | ||
1611 | + /* | |
1612 | + * Flushing deferred I/Os must be done after targets are resumed | |
1613 | + * so that mapping of targets can work correctly. | |
1614 | + * Request-based dm is queueing the deferred I/Os in its request_queue. | |
1615 | + */ | |
1616 | + if (dm_request_based(md)) | |
1617 | + start_queue(md->queue); | |
1618 | + | |
1619 | unlock_fs(md); | |
1620 | ||
1621 | if (md->suspended_bdev) { | |
1622 | @@ -1698,6 +2477,65 @@ int dm_noflush_suspending(struct dm_targ | |
1623 | } | |
1624 | EXPORT_SYMBOL_GPL(dm_noflush_suspending); | |
1625 | ||
1626 | +int dm_init_md_mempool(struct mapped_device *md, int type) | |
1627 | +{ | |
1628 | + if (unlikely(type == DM_TYPE_NONE)) { | |
1629 | + DMWARN("no type is specified, can't initialize mempool"); | |
1630 | + return -EINVAL; | |
1631 | + } | |
1632 | + | |
1633 | + if (md->mempool_type == type) | |
1634 | + return 0; | |
1635 | + | |
1636 | + if (md->map) { | |
1637 | + /* The md has been using, can't change the mempool type */ | |
1638 | + DMWARN("can't change mempool type after a table is bound"); | |
1639 | + return -EINVAL; | |
1640 | + } | |
1641 | + | |
1642 | + /* Not using the md yet, we can still change the mempool type */ | |
1643 | + if (md->mempool_type != DM_TYPE_NONE) { | |
1644 | + mempool_destroy(md->io_pool); | |
1645 | + md->io_pool = NULL; | |
1646 | + mempool_destroy(md->tio_pool); | |
1647 | + md->tio_pool = NULL; | |
1648 | + bioset_free(md->bs); | |
1649 | + md->bs = NULL; | |
1650 | + md->mempool_type = DM_TYPE_NONE; | |
1651 | + } | |
1652 | + | |
1653 | + md->io_pool = (type == DM_TYPE_BIO_BASED) ? | |
1654 | + mempool_create_slab_pool(MIN_IOS, _io_cache) : | |
1655 | + mempool_create_slab_pool(MIN_IOS, _bio_info_cache); | |
1656 | + if (!md->io_pool) | |
1657 | + return -ENOMEM; | |
1658 | + | |
1659 | + md->tio_pool = (type == DM_TYPE_BIO_BASED) ? | |
1660 | + mempool_create_slab_pool(MIN_IOS, _tio_cache) : | |
1661 | + mempool_create_slab_pool(MIN_IOS, _rq_tio_cache); | |
1662 | + if (!md->tio_pool) | |
1663 | + goto free_io_pool_and_out; | |
1664 | + | |
1665 | + md->bs = (type == DM_TYPE_BIO_BASED) ? | |
1666 | + bioset_create(16, 16) : bioset_create(MIN_IOS, MIN_IOS); | |
1667 | + if (!md->bs) | |
1668 | + goto free_tio_pool_and_out; | |
1669 | + | |
1670 | + md->mempool_type = type; | |
1671 | + | |
1672 | + return 0; | |
1673 | + | |
1674 | +free_tio_pool_and_out: | |
1675 | + mempool_destroy(md->tio_pool); | |
1676 | + md->tio_pool = NULL; | |
1677 | + | |
1678 | +free_io_pool_and_out: | |
1679 | + mempool_destroy(md->io_pool); | |
1680 | + md->io_pool = NULL; | |
1681 | + | |
1682 | + return -ENOMEM; | |
1683 | +} | |
1684 | + | |
1685 | static struct block_device_operations dm_blk_dops = { | |
1686 | .open = dm_blk_open, | |
1687 | .release = dm_blk_close, | |
1688 | --- a/drivers/md/dm.h | |
1689 | +++ b/drivers/md/dm.h | |
1690 | @@ -23,6 +23,13 @@ | |
1691 | #define DM_SUSPEND_NOFLUSH_FLAG (1 << 1) | |
1692 | ||
1693 | /* | |
1694 | + * Type of table and mapped_device's mempool | |
1695 | + */ | |
1696 | +#define DM_TYPE_NONE 0 | |
1697 | +#define DM_TYPE_BIO_BASED 1 | |
1698 | +#define DM_TYPE_REQUEST_BASED 2 | |
1699 | + | |
1700 | +/* | |
1701 | * List of devices that a metadevice uses and should open/close. | |
1702 | */ | |
1703 | struct dm_dev { | |
1704 | @@ -49,6 +56,10 @@ void dm_table_presuspend_targets(struct | |
1705 | void dm_table_postsuspend_targets(struct dm_table *t); | |
1706 | int dm_table_resume_targets(struct dm_table *t); | |
1707 | int dm_table_any_congested(struct dm_table *t, int bdi_bits); | |
1708 | +int dm_table_any_busy_target(struct dm_table *t); | |
1709 | +int dm_table_set_type(struct dm_table *t); | |
1710 | +int dm_table_get_type(struct dm_table *t); | |
1711 | +int dm_table_request_based(struct dm_table *t); | |
1712 | void dm_table_unplug_all(struct dm_table *t); | |
1713 | ||
1714 | /* | |
1715 | @@ -97,10 +108,16 @@ void *dm_vcalloc(unsigned long nmemb, un | |
1716 | union map_info *dm_get_mapinfo(struct bio *bio); | |
1717 | int dm_open_count(struct mapped_device *md); | |
1718 | int dm_lock_for_deletion(struct mapped_device *md); | |
1719 | +union map_info *dm_get_rq_mapinfo(struct request *rq); | |
1720 | ||
1721 | void dm_kobject_uevent(struct mapped_device *md); | |
1722 | ||
1723 | int dm_kcopyd_init(void); | |
1724 | void dm_kcopyd_exit(void); | |
1725 | ||
1726 | +/* | |
1727 | + * Mempool initializer for a mapped_device | |
1728 | + */ | |
1729 | +int dm_init_md_mempool(struct mapped_device *md, int type); | |
1730 | + | |
1731 | #endif | |
1732 | --- a/include/linux/device-mapper.h | |
1733 | +++ b/include/linux/device-mapper.h | |
1734 | @@ -46,6 +46,8 @@ typedef void (*dm_dtr_fn) (struct dm_tar | |
1735 | */ | |
1736 | typedef int (*dm_map_fn) (struct dm_target *ti, struct bio *bio, | |
1737 | union map_info *map_context); | |
1738 | +typedef int (*dm_map_request_fn) (struct dm_target *ti, struct request *clone, | |
1739 | + union map_info *map_context); | |
1740 | ||
1741 | /* | |
1742 | * Returns: | |
1743 | @@ -58,6 +60,9 @@ typedef int (*dm_map_fn) (struct dm_targ | |
1744 | typedef int (*dm_endio_fn) (struct dm_target *ti, | |
1745 | struct bio *bio, int error, | |
1746 | union map_info *map_context); | |
1747 | +typedef int (*dm_request_endio_fn) (struct dm_target *ti, | |
1748 | + struct request *clone, int error, | |
1749 | + union map_info *map_context); | |
1750 | ||
1751 | typedef void (*dm_flush_fn) (struct dm_target *ti); | |
1752 | typedef void (*dm_presuspend_fn) (struct dm_target *ti); | |
1753 | @@ -77,6 +82,13 @@ typedef int (*dm_ioctl_fn) (struct dm_ta | |
1754 | typedef int (*dm_merge_fn) (struct dm_target *ti, struct bvec_merge_data *bvm, | |
1755 | struct bio_vec *biovec, int max_size); | |
1756 | ||
1757 | +/* | |
1758 | + * Returns: | |
1759 | + * 0: The target can handle the next I/O immediately. | |
1760 | + * 1: The target can't handle the next I/O immediately. | |
1761 | + */ | |
1762 | +typedef int (*dm_busy_fn) (struct dm_target *ti); | |
1763 | + | |
1764 | void dm_error(const char *message); | |
1765 | ||
1766 | /* | |
1767 | @@ -103,7 +115,9 @@ struct target_type { | |
1768 | dm_ctr_fn ctr; | |
1769 | dm_dtr_fn dtr; | |
1770 | dm_map_fn map; | |
1771 | + dm_map_request_fn map_rq; | |
1772 | dm_endio_fn end_io; | |
1773 | + dm_request_endio_fn rq_end_io; | |
1774 | dm_flush_fn flush; | |
1775 | dm_presuspend_fn presuspend; | |
1776 | dm_postsuspend_fn postsuspend; | |
1777 | @@ -113,6 +127,7 @@ struct target_type { | |
1778 | dm_message_fn message; | |
1779 | dm_ioctl_fn ioctl; | |
1780 | dm_merge_fn merge; | |
1781 | + dm_busy_fn busy; | |
1782 | }; | |
1783 | ||
1784 | struct io_restrictions { | |
1785 | @@ -125,6 +140,7 @@ struct io_restrictions { | |
1786 | unsigned short max_hw_segments; | |
1787 | unsigned short max_phys_segments; | |
1788 | unsigned char no_cluster; /* inverted so that 0 is default */ | |
1789 | + unsigned char no_request_stacking; | |
1790 | }; | |
1791 | ||
1792 | struct dm_target { | |
1793 | @@ -348,4 +364,12 @@ static inline unsigned long to_bytes(sec | |
1794 | return (n << SECTOR_SHIFT); | |
1795 | } | |
1796 | ||
1797 | +/*----------------------------------------------------------------- | |
1798 | + * Helper for block layer and dm core operations | |
1799 | + *---------------------------------------------------------------*/ | |
1800 | +void dm_dispatch_request(struct request *rq); | |
1801 | +void dm_requeue_request(struct request *rq); | |
1802 | +void dm_kill_request(struct request *rq, int error); | |
1803 | +int dm_underlying_device_busy(struct request_queue *q); | |
1804 | + | |
1805 | #endif /* _LINUX_DEVICE_MAPPER_H */ |