]>
Commit | Line | Data |
---|---|---|
00e5a55c BS |
1 | From: Kiyoshi Ueda <k-ueda@ct.jp.nec.com> |
2 | Subject: Block layer fixes for request-based multipathing | |
3 | References: References: FATE#302108 | |
4 | ||
5 | This is a combined patch from linux-2.6.git. Commit-IDs: | |
6 | ||
7 | d6c578ec08b3f07050401ed83193b3f21729213b | |
8 | afac32f0c9c68698eaf7688d52de859301a0539f | |
9 | ebd2bf40e9cfa4ebfa614703944f4eafdf0d2c64 | |
10 | 509395182b6b7cf7e3c1ca2cd669506d8f43ee01 | |
11 | 88171cad9ace4b67c5298e6504d70454296afb76 | |
12 | ||
13 | Signed-off-by: Hannes Reinecke <hare@suse.de> | |
14 | ||
15 | --- | |
16 | block/blk-core.c | 169 +++++++++++++++++++++++++++++++++++++++++++++--- | |
17 | block/blk-settings.c | 6 + | |
18 | drivers/scsi/scsi_lib.c | 32 +++++++++ | |
19 | include/linux/blkdev.h | 12 +++ | |
20 | 4 files changed, 209 insertions(+), 10 deletions(-) | |
21 | ||
22 | --- a/block/blk-core.c | |
23 | +++ b/block/blk-core.c | |
24 | @@ -592,7 +592,8 @@ blk_init_queue_node(request_fn_proc *rfn | |
25 | q->request_fn = rfn; | |
26 | q->prep_rq_fn = NULL; | |
27 | q->unplug_fn = generic_unplug_device; | |
28 | - q->queue_flags = (1 << QUEUE_FLAG_CLUSTER); | |
29 | + q->queue_flags = (1 << QUEUE_FLAG_CLUSTER | | |
30 | + 1 << QUEUE_FLAG_STACKABLE); | |
31 | q->queue_lock = lock; | |
32 | ||
33 | blk_queue_segment_boundary(q, 0xffffffff); | |
34 | @@ -1586,6 +1587,87 @@ void blkdev_dequeue_request(struct reque | |
35 | EXPORT_SYMBOL(blkdev_dequeue_request); | |
36 | ||
37 | /** | |
38 | + * blk_rq_check_limits - Helper function to check a request for the queue limit | |
39 | + * @q: the queue | |
40 | + * @rq: the request being checked | |
41 | + * | |
42 | + * Description: | |
43 | + * @rq may have been made based on weaker limitations of upper-level queues | |
44 | + * in request stacking drivers, and it may violate the limitation of @q. | |
45 | + * Since the block layer and the underlying device driver trust @rq | |
46 | + * after it is inserted to @q, it should be checked against @q before | |
47 | + * the insertion using this generic function. | |
48 | + * | |
49 | + * This function should also be useful for request stacking drivers | |
50 | + * in some cases below, so export this fuction. | |
51 | + * Request stacking drivers like request-based dm may change the queue | |
52 | + * limits while requests are in the queue (e.g. dm's table swapping). | |
53 | + * Such request stacking drivers should check those requests agaist | |
54 | + * the new queue limits again when they dispatch those requests, | |
55 | + * although such checkings are also done against the old queue limits | |
56 | + * when submitting requests. | |
57 | + */ | |
58 | +int blk_rq_check_limits(struct request_queue *q, struct request *rq) | |
59 | +{ | |
60 | + if (rq->nr_sectors > q->max_sectors || | |
61 | + rq->data_len > q->max_hw_sectors << 9) { | |
62 | + printk(KERN_ERR "%s: over max size limit.\n", __func__); | |
63 | + return -EIO; | |
64 | + } | |
65 | + | |
66 | + /* | |
67 | + * queue's settings related to segment counting like q->bounce_pfn | |
68 | + * may differ from that of other stacking queues. | |
69 | + * Recalculate it to check the request correctly on this queue's | |
70 | + * limitation. | |
71 | + */ | |
72 | + blk_recalc_rq_segments(rq); | |
73 | + if (rq->nr_phys_segments > q->max_phys_segments || | |
74 | + rq->nr_phys_segments > q->max_hw_segments) { | |
75 | + printk(KERN_ERR "%s: over max segments limit.\n", __func__); | |
76 | + return -EIO; | |
77 | + } | |
78 | + | |
79 | + return 0; | |
80 | +} | |
81 | +EXPORT_SYMBOL_GPL(blk_rq_check_limits); | |
82 | + | |
83 | +/** | |
84 | + * blk_insert_cloned_request - Helper for stacking drivers to submit a request | |
85 | + * @q: the queue to submit the request | |
86 | + * @rq: the request being queued | |
87 | + */ | |
88 | +int blk_insert_cloned_request(struct request_queue *q, struct request *rq) | |
89 | +{ | |
90 | + unsigned long flags; | |
91 | + | |
92 | + if (blk_rq_check_limits(q, rq)) | |
93 | + return -EIO; | |
94 | + | |
95 | +#ifdef CONFIG_FAIL_MAKE_REQUEST | |
96 | + if (rq->rq_disk && rq->rq_disk->part0.make_it_fail && | |
97 | + should_fail(&fail_make_request, blk_rq_bytes(rq))) | |
98 | + return -EIO; | |
99 | +#endif | |
100 | + | |
101 | + spin_lock_irqsave(q->queue_lock, flags); | |
102 | + | |
103 | + /* | |
104 | + * Submitting request must be dequeued before calling this function | |
105 | + * because it will be linked to another request_queue | |
106 | + */ | |
107 | + BUG_ON(blk_queued_rq(rq)); | |
108 | + | |
109 | + drive_stat_acct(rq, 1); | |
110 | + __elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0); | |
111 | + | |
112 | + spin_unlock_irqrestore(q->queue_lock, flags); | |
113 | + | |
114 | + return 0; | |
115 | +} | |
116 | +EXPORT_SYMBOL_GPL(blk_insert_cloned_request); | |
117 | + | |
118 | +/** | |
119 | * __end_that_request_first - end I/O on a request | |
120 | * @req: the request being processed | |
121 | * @error: %0 for success, < %0 for error | |
122 | @@ -1857,6 +1939,22 @@ void end_request(struct request *req, in | |
123 | } | |
124 | EXPORT_SYMBOL(end_request); | |
125 | ||
126 | +static int end_that_request_data(struct request *rq, int error, | |
127 | + unsigned int nr_bytes, unsigned int bidi_bytes) | |
128 | +{ | |
129 | + if (rq->bio) { | |
130 | + if (__end_that_request_first(rq, error, nr_bytes)) | |
131 | + return 1; | |
132 | + | |
133 | + /* Bidi request must be completed as a whole */ | |
134 | + if (blk_bidi_rq(rq) && | |
135 | + __end_that_request_first(rq->next_rq, error, bidi_bytes)) | |
136 | + return 1; | |
137 | + } | |
138 | + | |
139 | + return 0; | |
140 | +} | |
141 | + | |
142 | /** | |
143 | * blk_end_io - Generic end_io function to complete a request. | |
144 | * @rq: the request being processed | |
145 | @@ -1883,15 +1981,8 @@ static int blk_end_io(struct request *rq | |
146 | struct request_queue *q = rq->q; | |
147 | unsigned long flags = 0UL; | |
148 | ||
149 | - if (rq->bio) { | |
150 | - if (__end_that_request_first(rq, error, nr_bytes)) | |
151 | - return 1; | |
152 | - | |
153 | - /* Bidi request must be completed as a whole */ | |
154 | - if (blk_bidi_rq(rq) && | |
155 | - __end_that_request_first(rq->next_rq, error, bidi_bytes)) | |
156 | - return 1; | |
157 | - } | |
158 | + if (end_that_request_data(rq, error, nr_bytes, bidi_bytes)) | |
159 | + return 1; | |
160 | ||
161 | /* Special feature for tricky drivers */ | |
162 | if (drv_callback && drv_callback(rq)) | |
163 | @@ -1974,6 +2065,36 @@ int blk_end_bidi_request(struct request | |
164 | EXPORT_SYMBOL_GPL(blk_end_bidi_request); | |
165 | ||
166 | /** | |
167 | + * blk_update_request - Special helper function for request stacking drivers | |
168 | + * @rq: the request being processed | |
169 | + * @error: %0 for success, < %0 for error | |
170 | + * @nr_bytes: number of bytes to complete @rq | |
171 | + * | |
172 | + * Description: | |
173 | + * Ends I/O on a number of bytes attached to @rq, but doesn't complete | |
174 | + * the request structure even if @rq doesn't have leftover. | |
175 | + * If @rq has leftover, sets it up for the next range of segments. | |
176 | + * | |
177 | + * This special helper function is only for request stacking drivers | |
178 | + * (e.g. request-based dm) so that they can handle partial completion. | |
179 | + * Actual device drivers should use blk_end_request instead. | |
180 | + */ | |
181 | +void blk_update_request(struct request *rq, int error, unsigned int nr_bytes) | |
182 | +{ | |
183 | + if (!end_that_request_data(rq, error, nr_bytes, 0)) { | |
184 | + /* | |
185 | + * These members are not updated in end_that_request_data() | |
186 | + * when all bios are completed. | |
187 | + * Update them so that the request stacking driver can find | |
188 | + * how many bytes remain in the request later. | |
189 | + */ | |
190 | + rq->nr_sectors = rq->hard_nr_sectors = 0; | |
191 | + rq->current_nr_sectors = rq->hard_cur_sectors = 0; | |
192 | + } | |
193 | +} | |
194 | +EXPORT_SYMBOL_GPL(blk_update_request); | |
195 | + | |
196 | +/** | |
197 | * blk_end_request_callback - Special helper function for tricky drivers | |
198 | * @rq: the request being processed | |
199 | * @error: %0 for success, < %0 for error | |
200 | @@ -2028,6 +2149,34 @@ void blk_rq_bio_prep(struct request_queu | |
201 | rq->rq_disk = bio->bi_bdev->bd_disk; | |
202 | } | |
203 | ||
204 | +/** | |
205 | + * blk_lld_busy - Check if underlying low-level drivers of a device are busy | |
206 | + * @q : the queue of the device being checked | |
207 | + * | |
208 | + * Description: | |
209 | + * Check if underlying low-level drivers of a device are busy. | |
210 | + * If the drivers want to export their busy state, they must set own | |
211 | + * exporting function using blk_queue_lld_busy() first. | |
212 | + * | |
213 | + * Basically, this function is used only by request stacking drivers | |
214 | + * to stop dispatching requests to underlying devices when underlying | |
215 | + * devices are busy. This behavior helps more I/O merging on the queue | |
216 | + * of the request stacking driver and prevents I/O throughput regression | |
217 | + * on burst I/O load. | |
218 | + * | |
219 | + * Return: | |
220 | + * 0 - Not busy (The request stacking driver should dispatch request) | |
221 | + * 1 - Busy (The request stacking driver should stop dispatching request) | |
222 | + */ | |
223 | +int blk_lld_busy(struct request_queue *q) | |
224 | +{ | |
225 | + if (q->lld_busy_fn) | |
226 | + return q->lld_busy_fn(q); | |
227 | + | |
228 | + return 0; | |
229 | +} | |
230 | +EXPORT_SYMBOL_GPL(blk_lld_busy); | |
231 | + | |
232 | int kblockd_schedule_work(struct request_queue *q, struct work_struct *work) | |
233 | { | |
234 | return queue_work(kblockd_workqueue, work); | |
235 | --- a/block/blk-settings.c | |
236 | +++ b/block/blk-settings.c | |
237 | @@ -89,6 +89,12 @@ void blk_queue_rq_timed_out(struct reque | |
238 | } | |
239 | EXPORT_SYMBOL_GPL(blk_queue_rq_timed_out); | |
240 | ||
241 | +void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn) | |
242 | +{ | |
243 | + q->lld_busy_fn = fn; | |
244 | +} | |
245 | +EXPORT_SYMBOL_GPL(blk_queue_lld_busy); | |
246 | + | |
247 | /** | |
248 | * blk_queue_make_request - define an alternate make_request function for a device | |
249 | * @q: the request queue for the device to be affected | |
250 | --- a/drivers/scsi/scsi_lib.c | |
251 | +++ b/drivers/scsi/scsi_lib.c | |
252 | @@ -1465,6 +1465,37 @@ static inline int scsi_host_queue_ready( | |
253 | } | |
254 | ||
255 | /* | |
256 | + * Busy state exporting function for request stacking drivers. | |
257 | + * | |
258 | + * For efficiency, no lock is taken to check the busy state of | |
259 | + * shost/starget/sdev, since the returned value is not guaranteed and | |
260 | + * may be changed after request stacking drivers call the function, | |
261 | + * regardless of taking lock or not. | |
262 | + * | |
263 | + * When scsi can't dispatch I/Os anymore and needs to kill I/Os | |
264 | + * (e.g. !sdev), scsi needs to return 'not busy'. | |
265 | + * Otherwise, request stacking drivers may hold requests forever. | |
266 | + */ | |
267 | +static int scsi_lld_busy(struct request_queue *q) | |
268 | +{ | |
269 | + struct scsi_device *sdev = q->queuedata; | |
270 | + struct Scsi_Host *shost; | |
271 | + struct scsi_target *starget; | |
272 | + | |
273 | + if (!sdev) | |
274 | + return 0; | |
275 | + | |
276 | + shost = sdev->host; | |
277 | + starget = scsi_target(sdev); | |
278 | + | |
279 | + if (scsi_host_in_recovery(shost) || scsi_host_is_busy(shost) || | |
280 | + scsi_target_is_busy(starget) || scsi_device_is_busy(sdev)) | |
281 | + return 1; | |
282 | + | |
283 | + return 0; | |
284 | +} | |
285 | + | |
286 | +/* | |
287 | * Kill a request for a dead device | |
288 | */ | |
289 | static void scsi_kill_request(struct request *req, struct request_queue *q) | |
290 | @@ -1778,6 +1809,7 @@ struct request_queue *scsi_alloc_queue(s | |
291 | blk_queue_prep_rq(q, scsi_prep_fn); | |
292 | blk_queue_softirq_done(q, scsi_softirq_done); | |
293 | blk_queue_rq_timed_out(q, scsi_times_out); | |
294 | + blk_queue_lld_busy(q, scsi_lld_busy); | |
295 | return q; | |
296 | } | |
297 | ||
298 | --- a/include/linux/blkdev.h | |
299 | +++ b/include/linux/blkdev.h | |
300 | @@ -272,6 +272,7 @@ typedef int (merge_bvec_fn) (struct requ | |
301 | typedef void (prepare_flush_fn) (struct request_queue *, struct request *); | |
302 | typedef void (softirq_done_fn)(struct request *); | |
303 | typedef int (dma_drain_needed_fn)(struct request *); | |
304 | +typedef int (lld_busy_fn) (struct request_queue *q); | |
305 | ||
306 | enum blk_eh_timer_return { | |
307 | BLK_EH_NOT_HANDLED, | |
308 | @@ -328,6 +329,7 @@ struct request_queue | |
309 | softirq_done_fn *softirq_done_fn; | |
310 | rq_timed_out_fn *rq_timed_out_fn; | |
311 | dma_drain_needed_fn *dma_drain_needed; | |
312 | + lld_busy_fn *lld_busy_fn; | |
313 | ||
314 | /* | |
315 | * Dispatch queue sorting | |
316 | @@ -443,6 +445,7 @@ struct request_queue | |
317 | #define QUEUE_FLAG_BIDI 9 /* queue supports bidi requests */ | |
318 | #define QUEUE_FLAG_NOMERGES 10 /* disable merge attempts */ | |
319 | #define QUEUE_FLAG_SAME_COMP 11 /* force complete on same CPU */ | |
320 | +#define QUEUE_FLAG_STACKABLE 13 /* supports request stacking */ | |
321 | ||
322 | static inline int queue_is_locked(struct request_queue *q) | |
323 | { | |
324 | @@ -549,6 +552,8 @@ enum { | |
325 | #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) | |
326 | #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) | |
327 | #define blk_queue_flushing(q) ((q)->ordseq) | |
328 | +#define blk_queue_stackable(q) \ | |
329 | + test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags) | |
330 | ||
331 | #define blk_fs_request(rq) ((rq)->cmd_type == REQ_TYPE_FS) | |
332 | #define blk_pc_request(rq) ((rq)->cmd_type == REQ_TYPE_BLOCK_PC) | |
333 | @@ -695,6 +700,10 @@ extern void __blk_put_request(struct req | |
334 | extern struct request *blk_get_request(struct request_queue *, int, gfp_t); | |
335 | extern void blk_insert_request(struct request_queue *, struct request *, int, void *); | |
336 | extern void blk_requeue_request(struct request_queue *, struct request *); | |
337 | +extern int blk_rq_check_limits(struct request_queue *q, struct request *rq); | |
338 | +extern int blk_lld_busy(struct request_queue *q); | |
339 | +extern int blk_insert_cloned_request(struct request_queue *q, | |
340 | + struct request *rq); | |
341 | extern void blk_plug_device(struct request_queue *); | |
342 | extern void blk_plug_device_unlocked(struct request_queue *); | |
343 | extern int blk_remove_plug(struct request_queue *); | |
344 | @@ -792,6 +801,8 @@ extern void blk_complete_request(struct | |
345 | extern void __blk_complete_request(struct request *); | |
346 | extern void blk_abort_request(struct request *); | |
347 | extern void blk_abort_queue(struct request_queue *); | |
348 | +extern void blk_update_request(struct request *rq, int error, | |
349 | + unsigned int nr_bytes); | |
350 | ||
351 | /* | |
352 | * blk_end_request() takes bytes instead of sectors as a complete size. | |
353 | @@ -821,6 +832,7 @@ extern void blk_queue_update_dma_pad(str | |
354 | extern int blk_queue_dma_drain(struct request_queue *q, | |
355 | dma_drain_needed_fn *dma_drain_needed, | |
356 | void *buf, unsigned int size); | |
357 | +extern void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn); | |
358 | extern void blk_queue_segment_boundary(struct request_queue *, unsigned long); | |
359 | extern void blk_queue_prep_rq(struct request_queue *, prep_rq_fn *pfn); | |
360 | extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *); |