]>
Commit | Line | Data |
---|---|---|
00e5a55c BS |
1 | From: Jens Axboe <jens.axboe@oracle.com> |
2 | Subject: Block layer fixes for 2.6.28 | |
3 | Patch-Mainline: 2.6.28 | |
4 | ||
5 | This is a combined patchset with block layer fixes from 2.6.28. | |
6 | Commit IDs: | |
7 | 97dee27d1c4d6041ff1cc8150db95fe3eab6be5a | |
8 | 00bbda44114e70fc9879731be3c888122b1de8b1 | |
9 | 7452d2a2be657becb2f385d0e0864ba51f1ae694 | |
10 | 075a108f7d4dd24b8b69e59edcdf1a0fd84e6541 | |
11 | 7a1b6029bf9ff3d0636e318d2482031dc493df16 | |
12 | b3a5faf3cefbff4b69ca181767b882bbd6189aaf | |
13 | 8fe902de23b4f4012db91f538cafd864c63308e7 | |
14 | dfef13dad8d34d0a9e83adee3e8cd9f94cca465e | |
15 | d2629dd70132f90f9d1bca07572197e9adea25b1 | |
16 | 1f08a4484a223cb337e0466042005421cd55d22b | |
17 | fcdc7361d2925596d69d0538d738c08c221a69c9 | |
18 | cd93bcfa9ca9b15051220614160131c53d7f33f0 | |
19 | d371ca6b8a21a617b8607d23f7202197ad40482a | |
20 | 910ee03b1e61d5cfb121dfb1ee7c127f18bdae01 | |
21 | ||
22 | Signed-off-by: Hannes Reinecke <hare@suse.de> | |
23 | ||
24 | --- | |
25 | Documentation/DocBook/kernel-api.tmpl | 4 | |
26 | Documentation/block/deadline-iosched.txt | 14 +- | |
27 | block/Makefile | 4 | |
28 | block/blk-core.c | 166 +++++++------------------------ | |
29 | block/blk-exec.c | 6 - | |
30 | block/blk-integrity.c | 4 | |
31 | block/blk-map.c | 16 +- | |
32 | block/blk-merge.c | 100 ------------------ | |
33 | block/blk-settings.c | 8 - | |
34 | block/blk-softirq.c | 103 +++++++++++++++++++ | |
35 | block/blk-tag.c | 8 - | |
36 | block/cfq-iosched.c | 47 +++++++- | |
37 | block/deadline-iosched.c | 40 ++----- | |
38 | block/elevator.c | 5 | |
39 | block/genhd.c | 5 | |
40 | drivers/block/ps3disk.c | 9 + | |
41 | drivers/block/virtio_blk.c | 4 | |
42 | drivers/md/raid1.c | 4 | |
43 | drivers/md/raid10.c | 4 | |
44 | drivers/md/raid5.c | 66 +++++++++--- | |
45 | fs/bio.c | 16 -- | |
46 | include/linux/bio.h | 33 ------ | |
47 | include/linux/blkdev.h | 18 +-- | |
48 | 23 files changed, 310 insertions(+), 374 deletions(-) | |
49 | ||
50 | --- a/block/blk-core.c | |
51 | +++ b/block/blk-core.c | |
52 | @@ -26,8 +26,6 @@ | |
53 | #include <linux/swap.h> | |
54 | #include <linux/writeback.h> | |
55 | #include <linux/task_io_accounting_ops.h> | |
56 | -#include <linux/interrupt.h> | |
57 | -#include <linux/cpu.h> | |
58 | #include <linux/blktrace_api.h> | |
59 | #include <linux/fault-inject.h> | |
60 | ||
61 | @@ -50,8 +48,6 @@ struct kmem_cache *blk_requestq_cachep; | |
62 | */ | |
63 | static struct workqueue_struct *kblockd_workqueue; | |
64 | ||
65 | -static DEFINE_PER_CPU(struct list_head, blk_cpu_done); | |
66 | - | |
67 | static void drive_stat_acct(struct request *rq, int new_io) | |
68 | { | |
69 | struct hd_struct *part; | |
70 | @@ -531,7 +527,7 @@ EXPORT_SYMBOL(blk_alloc_queue_node); | |
71 | * request queue; this lock will be taken also from interrupt context, so irq | |
72 | * disabling is needed for it. | |
73 | * | |
74 | - * Function returns a pointer to the initialized request queue, or NULL if | |
75 | + * Function returns a pointer to the initialized request queue, or %NULL if | |
76 | * it didn't succeed. | |
77 | * | |
78 | * Note: | |
79 | @@ -913,7 +909,7 @@ void blk_requeue_request(struct request_ | |
80 | EXPORT_SYMBOL(blk_requeue_request); | |
81 | ||
82 | /** | |
83 | - * blk_insert_request - insert a special request in to a request queue | |
84 | + * blk_insert_request - insert a special request into a request queue | |
85 | * @q: request queue where request should be inserted | |
86 | * @rq: request to be inserted | |
87 | * @at_head: insert request at head or tail of queue | |
88 | @@ -923,8 +919,8 @@ EXPORT_SYMBOL(blk_requeue_request); | |
89 | * Many block devices need to execute commands asynchronously, so they don't | |
90 | * block the whole kernel from preemption during request execution. This is | |
91 | * accomplished normally by inserting aritficial requests tagged as | |
92 | - * REQ_SPECIAL in to the corresponding request queue, and letting them be | |
93 | - * scheduled for actual execution by the request queue. | |
94 | + * REQ_TYPE_SPECIAL in to the corresponding request queue, and letting them | |
95 | + * be scheduled for actual execution by the request queue. | |
96 | * | |
97 | * We have the option of inserting the head or the tail of the queue. | |
98 | * Typically we use the tail for new ioctls and so forth. We use the head | |
99 | @@ -1322,7 +1318,7 @@ static inline int bio_check_eod(struct b | |
100 | } | |
101 | ||
102 | /** | |
103 | - * generic_make_request: hand a buffer to its device driver for I/O | |
104 | + * generic_make_request - hand a buffer to its device driver for I/O | |
105 | * @bio: The bio describing the location in memory and on the device. | |
106 | * | |
107 | * generic_make_request() is used to make I/O requests of block | |
108 | @@ -1480,13 +1476,13 @@ void generic_make_request(struct bio *bi | |
109 | EXPORT_SYMBOL(generic_make_request); | |
110 | ||
111 | /** | |
112 | - * submit_bio: submit a bio to the block device layer for I/O | |
113 | + * submit_bio - submit a bio to the block device layer for I/O | |
114 | * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead) | |
115 | * @bio: The &struct bio which describes the I/O | |
116 | * | |
117 | * submit_bio() is very similar in purpose to generic_make_request(), and | |
118 | * uses that function to do most of the work. Both are fairly rough | |
119 | - * interfaces, @bio must be presetup and ready for I/O. | |
120 | + * interfaces; @bio must be presetup and ready for I/O. | |
121 | * | |
122 | */ | |
123 | void submit_bio(int rw, struct bio *bio) | |
124 | @@ -1524,7 +1520,7 @@ EXPORT_SYMBOL(submit_bio); | |
125 | /** | |
126 | * __end_that_request_first - end I/O on a request | |
127 | * @req: the request being processed | |
128 | - * @error: 0 for success, < 0 for error | |
129 | + * @error: %0 for success, < %0 for error | |
130 | * @nr_bytes: number of bytes to complete | |
131 | * | |
132 | * Description: | |
133 | @@ -1532,8 +1528,8 @@ EXPORT_SYMBOL(submit_bio); | |
134 | * for the next range of segments (if any) in the cluster. | |
135 | * | |
136 | * Return: | |
137 | - * 0 - we are done with this request, call end_that_request_last() | |
138 | - * 1 - still buffers pending for this request | |
139 | + * %0 - we are done with this request, call end_that_request_last() | |
140 | + * %1 - still buffers pending for this request | |
141 | **/ | |
142 | static int __end_that_request_first(struct request *req, int error, | |
143 | int nr_bytes) | |
144 | @@ -1544,7 +1540,7 @@ static int __end_that_request_first(stru | |
145 | blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE); | |
146 | ||
147 | /* | |
148 | - * for a REQ_BLOCK_PC request, we want to carry any eventual | |
149 | + * for a REQ_TYPE_BLOCK_PC request, we want to carry any eventual | |
150 | * sense key with us all the way through | |
151 | */ | |
152 | if (!blk_pc_request(req)) | |
153 | @@ -1646,82 +1642,6 @@ static int __end_that_request_first(stru | |
154 | } | |
155 | ||
156 | /* | |
157 | - * splice the completion data to a local structure and hand off to | |
158 | - * process_completion_queue() to complete the requests | |
159 | - */ | |
160 | -static void blk_done_softirq(struct softirq_action *h) | |
161 | -{ | |
162 | - struct list_head *cpu_list, local_list; | |
163 | - | |
164 | - local_irq_disable(); | |
165 | - cpu_list = &__get_cpu_var(blk_cpu_done); | |
166 | - list_replace_init(cpu_list, &local_list); | |
167 | - local_irq_enable(); | |
168 | - | |
169 | - while (!list_empty(&local_list)) { | |
170 | - struct request *rq; | |
171 | - | |
172 | - rq = list_entry(local_list.next, struct request, donelist); | |
173 | - list_del_init(&rq->donelist); | |
174 | - rq->q->softirq_done_fn(rq); | |
175 | - } | |
176 | -} | |
177 | - | |
178 | -static int __cpuinit blk_cpu_notify(struct notifier_block *self, | |
179 | - unsigned long action, void *hcpu) | |
180 | -{ | |
181 | - /* | |
182 | - * If a CPU goes away, splice its entries to the current CPU | |
183 | - * and trigger a run of the softirq | |
184 | - */ | |
185 | - if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { | |
186 | - int cpu = (unsigned long) hcpu; | |
187 | - | |
188 | - local_irq_disable(); | |
189 | - list_splice_init(&per_cpu(blk_cpu_done, cpu), | |
190 | - &__get_cpu_var(blk_cpu_done)); | |
191 | - raise_softirq_irqoff(BLOCK_SOFTIRQ); | |
192 | - local_irq_enable(); | |
193 | - } | |
194 | - | |
195 | - return NOTIFY_OK; | |
196 | -} | |
197 | - | |
198 | - | |
199 | -static struct notifier_block blk_cpu_notifier __cpuinitdata = { | |
200 | - .notifier_call = blk_cpu_notify, | |
201 | -}; | |
202 | - | |
203 | -/** | |
204 | - * blk_complete_request - end I/O on a request | |
205 | - * @req: the request being processed | |
206 | - * | |
207 | - * Description: | |
208 | - * Ends all I/O on a request. It does not handle partial completions, | |
209 | - * unless the driver actually implements this in its completion callback | |
210 | - * through requeueing. The actual completion happens out-of-order, | |
211 | - * through a softirq handler. The user must have registered a completion | |
212 | - * callback through blk_queue_softirq_done(). | |
213 | - **/ | |
214 | - | |
215 | -void blk_complete_request(struct request *req) | |
216 | -{ | |
217 | - struct list_head *cpu_list; | |
218 | - unsigned long flags; | |
219 | - | |
220 | - BUG_ON(!req->q->softirq_done_fn); | |
221 | - | |
222 | - local_irq_save(flags); | |
223 | - | |
224 | - cpu_list = &__get_cpu_var(blk_cpu_done); | |
225 | - list_add_tail(&req->donelist, cpu_list); | |
226 | - raise_softirq_irqoff(BLOCK_SOFTIRQ); | |
227 | - | |
228 | - local_irq_restore(flags); | |
229 | -} | |
230 | -EXPORT_SYMBOL(blk_complete_request); | |
231 | - | |
232 | -/* | |
233 | * queue lock must be held | |
234 | */ | |
235 | static void end_that_request_last(struct request *req, int error) | |
236 | @@ -1810,11 +1730,11 @@ EXPORT_SYMBOL_GPL(blk_rq_cur_bytes); | |
237 | /** | |
238 | * end_queued_request - end all I/O on a queued request | |
239 | * @rq: the request being processed | |
240 | - * @uptodate: error value or 0/1 uptodate flag | |
241 | + * @uptodate: error value or %0/%1 uptodate flag | |
242 | * | |
243 | * Description: | |
244 | * Ends all I/O on a request, and removes it from the block layer queues. | |
245 | - * Not suitable for normal IO completion, unless the driver still has | |
246 | + * Not suitable for normal I/O completion, unless the driver still has | |
247 | * the request attached to the block layer. | |
248 | * | |
249 | **/ | |
250 | @@ -1827,7 +1747,7 @@ EXPORT_SYMBOL(end_queued_request); | |
251 | /** | |
252 | * end_dequeued_request - end all I/O on a dequeued request | |
253 | * @rq: the request being processed | |
254 | - * @uptodate: error value or 0/1 uptodate flag | |
255 | + * @uptodate: error value or %0/%1 uptodate flag | |
256 | * | |
257 | * Description: | |
258 | * Ends all I/O on a request. The request must already have been | |
259 | @@ -1845,14 +1765,14 @@ EXPORT_SYMBOL(end_dequeued_request); | |
260 | /** | |
261 | * end_request - end I/O on the current segment of the request | |
262 | * @req: the request being processed | |
263 | - * @uptodate: error value or 0/1 uptodate flag | |
264 | + * @uptodate: error value or %0/%1 uptodate flag | |
265 | * | |
266 | * Description: | |
267 | * Ends I/O on the current segment of a request. If that is the only | |
268 | * remaining segment, the request is also completed and freed. | |
269 | * | |
270 | - * This is a remnant of how older block drivers handled IO completions. | |
271 | - * Modern drivers typically end IO on the full request in one go, unless | |
272 | + * This is a remnant of how older block drivers handled I/O completions. | |
273 | + * Modern drivers typically end I/O on the full request in one go, unless | |
274 | * they have a residual value to account for. For that case this function | |
275 | * isn't really useful, unless the residual just happens to be the | |
276 | * full current segment. In other words, don't use this function in new | |
277 | @@ -1870,12 +1790,12 @@ EXPORT_SYMBOL(end_request); | |
278 | /** | |
279 | * blk_end_io - Generic end_io function to complete a request. | |
280 | * @rq: the request being processed | |
281 | - * @error: 0 for success, < 0 for error | |
282 | + * @error: %0 for success, < %0 for error | |
283 | * @nr_bytes: number of bytes to complete @rq | |
284 | * @bidi_bytes: number of bytes to complete @rq->next_rq | |
285 | * @drv_callback: function called between completion of bios in the request | |
286 | * and completion of the request. | |
287 | - * If the callback returns non 0, this helper returns without | |
288 | + * If the callback returns non %0, this helper returns without | |
289 | * completion of the request. | |
290 | * | |
291 | * Description: | |
292 | @@ -1883,8 +1803,8 @@ EXPORT_SYMBOL(end_request); | |
293 | * If @rq has leftover, sets it up for the next range of segments. | |
294 | * | |
295 | * Return: | |
296 | - * 0 - we are done with this request | |
297 | - * 1 - this request is not freed yet, it still has pending buffers. | |
298 | + * %0 - we are done with this request | |
299 | + * %1 - this request is not freed yet, it still has pending buffers. | |
300 | **/ | |
301 | static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes, | |
302 | unsigned int bidi_bytes, | |
303 | @@ -1893,7 +1813,7 @@ static int blk_end_io(struct request *rq | |
304 | struct request_queue *q = rq->q; | |
305 | unsigned long flags = 0UL; | |
306 | ||
307 | - if (bio_has_data(rq->bio) || blk_discard_rq(rq)) { | |
308 | + if (rq->bio) { | |
309 | if (__end_that_request_first(rq, error, nr_bytes)) | |
310 | return 1; | |
311 | ||
312 | @@ -1919,7 +1839,7 @@ static int blk_end_io(struct request *rq | |
313 | /** | |
314 | * blk_end_request - Helper function for drivers to complete the request. | |
315 | * @rq: the request being processed | |
316 | - * @error: 0 for success, < 0 for error | |
317 | + * @error: %0 for success, < %0 for error | |
318 | * @nr_bytes: number of bytes to complete | |
319 | * | |
320 | * Description: | |
321 | @@ -1927,8 +1847,8 @@ static int blk_end_io(struct request *rq | |
322 | * If @rq has leftover, sets it up for the next range of segments. | |
323 | * | |
324 | * Return: | |
325 | - * 0 - we are done with this request | |
326 | - * 1 - still buffers pending for this request | |
327 | + * %0 - we are done with this request | |
328 | + * %1 - still buffers pending for this request | |
329 | **/ | |
330 | int blk_end_request(struct request *rq, int error, unsigned int nr_bytes) | |
331 | { | |
332 | @@ -1939,20 +1859,19 @@ EXPORT_SYMBOL_GPL(blk_end_request); | |
333 | /** | |
334 | * __blk_end_request - Helper function for drivers to complete the request. | |
335 | * @rq: the request being processed | |
336 | - * @error: 0 for success, < 0 for error | |
337 | + * @error: %0 for success, < %0 for error | |
338 | * @nr_bytes: number of bytes to complete | |
339 | * | |
340 | * Description: | |
341 | * Must be called with queue lock held unlike blk_end_request(). | |
342 | * | |
343 | * Return: | |
344 | - * 0 - we are done with this request | |
345 | - * 1 - still buffers pending for this request | |
346 | + * %0 - we are done with this request | |
347 | + * %1 - still buffers pending for this request | |
348 | **/ | |
349 | int __blk_end_request(struct request *rq, int error, unsigned int nr_bytes) | |
350 | { | |
351 | - if ((bio_has_data(rq->bio) || blk_discard_rq(rq)) && | |
352 | - __end_that_request_first(rq, error, nr_bytes)) | |
353 | + if (rq->bio && __end_that_request_first(rq, error, nr_bytes)) | |
354 | return 1; | |
355 | ||
356 | add_disk_randomness(rq->rq_disk); | |
357 | @@ -1966,7 +1885,7 @@ EXPORT_SYMBOL_GPL(__blk_end_request); | |
358 | /** | |
359 | * blk_end_bidi_request - Helper function for drivers to complete bidi request. | |
360 | * @rq: the bidi request being processed | |
361 | - * @error: 0 for success, < 0 for error | |
362 | + * @error: %0 for success, < %0 for error | |
363 | * @nr_bytes: number of bytes to complete @rq | |
364 | * @bidi_bytes: number of bytes to complete @rq->next_rq | |
365 | * | |
366 | @@ -1974,8 +1893,8 @@ EXPORT_SYMBOL_GPL(__blk_end_request); | |
367 | * Ends I/O on a number of bytes attached to @rq and @rq->next_rq. | |
368 | * | |
369 | * Return: | |
370 | - * 0 - we are done with this request | |
371 | - * 1 - still buffers pending for this request | |
372 | + * %0 - we are done with this request | |
373 | + * %1 - still buffers pending for this request | |
374 | **/ | |
375 | int blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes, | |
376 | unsigned int bidi_bytes) | |
377 | @@ -1987,11 +1906,11 @@ EXPORT_SYMBOL_GPL(blk_end_bidi_request); | |
378 | /** | |
379 | * blk_end_request_callback - Special helper function for tricky drivers | |
380 | * @rq: the request being processed | |
381 | - * @error: 0 for success, < 0 for error | |
382 | + * @error: %0 for success, < %0 for error | |
383 | * @nr_bytes: number of bytes to complete | |
384 | * @drv_callback: function called between completion of bios in the request | |
385 | * and completion of the request. | |
386 | - * If the callback returns non 0, this helper returns without | |
387 | + * If the callback returns non %0, this helper returns without | |
388 | * completion of the request. | |
389 | * | |
390 | * Description: | |
391 | @@ -2004,10 +1923,10 @@ EXPORT_SYMBOL_GPL(blk_end_bidi_request); | |
392 | * Don't use this interface in other places anymore. | |
393 | * | |
394 | * Return: | |
395 | - * 0 - we are done with this request | |
396 | - * 1 - this request is not freed yet. | |
397 | - * this request still has pending buffers or | |
398 | - * the driver doesn't want to finish this request yet. | |
399 | + * %0 - we are done with this request | |
400 | + * %1 - this request is not freed yet. | |
401 | + * this request still has pending buffers or | |
402 | + * the driver doesn't want to finish this request yet. | |
403 | **/ | |
404 | int blk_end_request_callback(struct request *rq, int error, | |
405 | unsigned int nr_bytes, | |
406 | @@ -2026,7 +1945,6 @@ void blk_rq_bio_prep(struct request_queu | |
407 | ||
408 | if (bio_has_data(bio)) { | |
409 | rq->nr_phys_segments = bio_phys_segments(q, bio); | |
410 | - rq->nr_hw_segments = bio_hw_segments(q, bio); | |
411 | rq->buffer = bio_data(bio); | |
412 | } | |
413 | rq->current_nr_sectors = bio_cur_sectors(bio); | |
414 | @@ -2054,8 +1972,6 @@ EXPORT_SYMBOL(kblockd_flush_work); | |
415 | ||
416 | int __init blk_dev_init(void) | |
417 | { | |
418 | - int i; | |
419 | - | |
420 | kblockd_workqueue = create_workqueue("kblockd"); | |
421 | if (!kblockd_workqueue) | |
422 | panic("Failed to create kblockd\n"); | |
423 | @@ -2066,12 +1982,6 @@ int __init blk_dev_init(void) | |
424 | blk_requestq_cachep = kmem_cache_create("blkdev_queue", | |
425 | sizeof(struct request_queue), 0, SLAB_PANIC, NULL); | |
426 | ||
427 | - for_each_possible_cpu(i) | |
428 | - INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i)); | |
429 | - | |
430 | - open_softirq(BLOCK_SOFTIRQ, blk_done_softirq); | |
431 | - register_hotcpu_notifier(&blk_cpu_notifier); | |
432 | - | |
433 | return 0; | |
434 | } | |
435 | ||
436 | --- a/block/blk-exec.c | |
437 | +++ b/block/blk-exec.c | |
438 | @@ -16,7 +16,7 @@ | |
439 | /** | |
440 | * blk_end_sync_rq - executes a completion event on a request | |
441 | * @rq: request to complete | |
442 | - * @error: end io status of the request | |
443 | + * @error: end I/O status of the request | |
444 | */ | |
445 | static void blk_end_sync_rq(struct request *rq, int error) | |
446 | { | |
447 | @@ -41,7 +41,7 @@ static void blk_end_sync_rq(struct reque | |
448 | * @done: I/O completion handler | |
449 | * | |
450 | * Description: | |
451 | - * Insert a fully prepared request at the back of the io scheduler queue | |
452 | + * Insert a fully prepared request at the back of the I/O scheduler queue | |
453 | * for execution. Don't wait for completion. | |
454 | */ | |
455 | void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, | |
456 | @@ -72,7 +72,7 @@ EXPORT_SYMBOL_GPL(blk_execute_rq_nowait) | |
457 | * @at_head: insert request at head or tail of queue | |
458 | * | |
459 | * Description: | |
460 | - * Insert a fully prepared request at the back of the io scheduler queue | |
461 | + * Insert a fully prepared request at the back of the I/O scheduler queue | |
462 | * for execution and wait for completion. | |
463 | */ | |
464 | int blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk, | |
465 | --- a/block/blk-integrity.c | |
466 | +++ b/block/blk-integrity.c | |
467 | @@ -109,8 +109,8 @@ EXPORT_SYMBOL(blk_rq_map_integrity_sg); | |
468 | ||
469 | /** | |
470 | * blk_integrity_compare - Compare integrity profile of two block devices | |
471 | - * @b1: Device to compare | |
472 | - * @b2: Device to compare | |
473 | + * @bd1: Device to compare | |
474 | + * @bd2: Device to compare | |
475 | * | |
476 | * Description: Meta-devices like DM and MD need to verify that all | |
477 | * sub-devices use the same integrity format before advertising to | |
478 | --- a/block/blk-map.c | |
479 | +++ b/block/blk-map.c | |
480 | @@ -85,17 +85,17 @@ static int __blk_rq_map_user(struct requ | |
481 | } | |
482 | ||
483 | /** | |
484 | - * blk_rq_map_user - map user data to a request, for REQ_BLOCK_PC usage | |
485 | + * blk_rq_map_user - map user data to a request, for REQ_TYPE_BLOCK_PC usage | |
486 | * @q: request queue where request should be inserted | |
487 | * @rq: request structure to fill | |
488 | * @ubuf: the user buffer | |
489 | * @len: length of user data | |
490 | * | |
491 | * Description: | |
492 | - * Data will be mapped directly for zero copy io, if possible. Otherwise | |
493 | + * Data will be mapped directly for zero copy I/O, if possible. Otherwise | |
494 | * a kernel bounce buffer is used. | |
495 | * | |
496 | - * A matching blk_rq_unmap_user() must be issued at the end of io, while | |
497 | + * A matching blk_rq_unmap_user() must be issued at the end of I/O, while | |
498 | * still in process context. | |
499 | * | |
500 | * Note: The mapped bio may need to be bounced through blk_queue_bounce() | |
501 | @@ -154,7 +154,7 @@ unmap_rq: | |
502 | EXPORT_SYMBOL(blk_rq_map_user); | |
503 | ||
504 | /** | |
505 | - * blk_rq_map_user_iov - map user data to a request, for REQ_BLOCK_PC usage | |
506 | + * blk_rq_map_user_iov - map user data to a request, for REQ_TYPE_BLOCK_PC usage | |
507 | * @q: request queue where request should be inserted | |
508 | * @rq: request to map data to | |
509 | * @iov: pointer to the iovec | |
510 | @@ -162,10 +162,10 @@ EXPORT_SYMBOL(blk_rq_map_user); | |
511 | * @len: I/O byte count | |
512 | * | |
513 | * Description: | |
514 | - * Data will be mapped directly for zero copy io, if possible. Otherwise | |
515 | + * Data will be mapped directly for zero copy I/O, if possible. Otherwise | |
516 | * a kernel bounce buffer is used. | |
517 | * | |
518 | - * A matching blk_rq_unmap_user() must be issued at the end of io, while | |
519 | + * A matching blk_rq_unmap_user() must be issued at the end of I/O, while | |
520 | * still in process context. | |
521 | * | |
522 | * Note: The mapped bio may need to be bounced through blk_queue_bounce() | |
523 | @@ -224,7 +224,7 @@ int blk_rq_map_user_iov(struct request_q | |
524 | * Description: | |
525 | * Unmap a rq previously mapped by blk_rq_map_user(). The caller must | |
526 | * supply the original rq->bio from the blk_rq_map_user() return, since | |
527 | - * the io completion may have changed rq->bio. | |
528 | + * the I/O completion may have changed rq->bio. | |
529 | */ | |
530 | int blk_rq_unmap_user(struct bio *bio) | |
531 | { | |
532 | @@ -250,7 +250,7 @@ int blk_rq_unmap_user(struct bio *bio) | |
533 | EXPORT_SYMBOL(blk_rq_unmap_user); | |
534 | ||
535 | /** | |
536 | - * blk_rq_map_kern - map kernel data to a request, for REQ_BLOCK_PC usage | |
537 | + * blk_rq_map_kern - map kernel data to a request, for REQ_TYPE_BLOCK_PC usage | |
538 | * @q: request queue where request should be inserted | |
539 | * @rq: request to fill | |
540 | * @kbuf: the kernel buffer | |
541 | --- a/block/blk-merge.c | |
542 | +++ b/block/blk-merge.c | |
543 | @@ -41,12 +41,9 @@ void blk_recalc_rq_sectors(struct reques | |
544 | void blk_recalc_rq_segments(struct request *rq) | |
545 | { | |
546 | int nr_phys_segs; | |
547 | - int nr_hw_segs; | |
548 | unsigned int phys_size; | |
549 | - unsigned int hw_size; | |
550 | struct bio_vec *bv, *bvprv = NULL; | |
551 | int seg_size; | |
552 | - int hw_seg_size; | |
553 | int cluster; | |
554 | struct req_iterator iter; | |
555 | int high, highprv = 1; | |
556 | @@ -56,8 +53,8 @@ void blk_recalc_rq_segments(struct reque | |
557 | return; | |
558 | ||
559 | cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags); | |
560 | - hw_seg_size = seg_size = 0; | |
561 | - phys_size = hw_size = nr_phys_segs = nr_hw_segs = 0; | |
562 | + seg_size = 0; | |
563 | + phys_size = nr_phys_segs = 0; | |
564 | rq_for_each_segment(bv, rq, iter) { | |
565 | /* | |
566 | * the trick here is making sure that a high page is never | |
567 | @@ -66,7 +63,7 @@ void blk_recalc_rq_segments(struct reque | |
568 | */ | |
569 | high = page_to_pfn(bv->bv_page) > q->bounce_pfn; | |
570 | if (high || highprv) | |
571 | - goto new_hw_segment; | |
572 | + goto new_segment; | |
573 | if (cluster) { | |
574 | if (seg_size + bv->bv_len > q->max_segment_size) | |
575 | goto new_segment; | |
576 | @@ -74,27 +71,12 @@ void blk_recalc_rq_segments(struct reque | |
577 | goto new_segment; | |
578 | if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv)) | |
579 | goto new_segment; | |
580 | - if (BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len)) | |
581 | - goto new_hw_segment; | |
582 | ||
583 | seg_size += bv->bv_len; | |
584 | - hw_seg_size += bv->bv_len; | |
585 | bvprv = bv; | |
586 | continue; | |
587 | } | |
588 | new_segment: | |
589 | - if (BIOVEC_VIRT_MERGEABLE(bvprv, bv) && | |
590 | - !BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len)) | |
591 | - hw_seg_size += bv->bv_len; | |
592 | - else { | |
593 | -new_hw_segment: | |
594 | - if (nr_hw_segs == 1 && | |
595 | - hw_seg_size > rq->bio->bi_hw_front_size) | |
596 | - rq->bio->bi_hw_front_size = hw_seg_size; | |
597 | - hw_seg_size = BIOVEC_VIRT_START_SIZE(bv) + bv->bv_len; | |
598 | - nr_hw_segs++; | |
599 | - } | |
600 | - | |
601 | if (nr_phys_segs == 1 && seg_size > rq->bio->bi_seg_front_size) | |
602 | rq->bio->bi_seg_front_size = seg_size; | |
603 | ||
604 | @@ -104,17 +86,11 @@ new_hw_segment: | |
605 | highprv = high; | |
606 | } | |
607 | ||
608 | - if (nr_hw_segs == 1 && | |
609 | - hw_seg_size > rq->bio->bi_hw_front_size) | |
610 | - rq->bio->bi_hw_front_size = hw_seg_size; | |
611 | - if (hw_seg_size > rq->biotail->bi_hw_back_size) | |
612 | - rq->biotail->bi_hw_back_size = hw_seg_size; | |
613 | if (nr_phys_segs == 1 && seg_size > rq->bio->bi_seg_front_size) | |
614 | rq->bio->bi_seg_front_size = seg_size; | |
615 | if (seg_size > rq->biotail->bi_seg_back_size) | |
616 | rq->biotail->bi_seg_back_size = seg_size; | |
617 | rq->nr_phys_segments = nr_phys_segs; | |
618 | - rq->nr_hw_segments = nr_hw_segs; | |
619 | } | |
620 | ||
621 | void blk_recount_segments(struct request_queue *q, struct bio *bio) | |
622 | @@ -127,7 +103,6 @@ void blk_recount_segments(struct request | |
623 | blk_recalc_rq_segments(&rq); | |
624 | bio->bi_next = nxt; | |
625 | bio->bi_phys_segments = rq.nr_phys_segments; | |
626 | - bio->bi_hw_segments = rq.nr_hw_segments; | |
627 | bio->bi_flags |= (1 << BIO_SEG_VALID); | |
628 | } | |
629 | EXPORT_SYMBOL(blk_recount_segments); | |
630 | @@ -158,23 +133,6 @@ static int blk_phys_contig_segment(struc | |
631 | return 0; | |
632 | } | |
633 | ||
634 | -static int blk_hw_contig_segment(struct request_queue *q, struct bio *bio, | |
635 | - struct bio *nxt) | |
636 | -{ | |
637 | - if (!bio_flagged(bio, BIO_SEG_VALID)) | |
638 | - blk_recount_segments(q, bio); | |
639 | - if (!bio_flagged(nxt, BIO_SEG_VALID)) | |
640 | - blk_recount_segments(q, nxt); | |
641 | - if (bio_has_data(bio) && | |
642 | - (!BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)) || | |
643 | - BIOVEC_VIRT_OVERSIZE(bio->bi_hw_back_size + nxt->bi_hw_front_size))) | |
644 | - return 0; | |
645 | - if (bio->bi_hw_back_size + nxt->bi_hw_front_size > q->max_segment_size) | |
646 | - return 0; | |
647 | - | |
648 | - return 1; | |
649 | -} | |
650 | - | |
651 | /* | |
652 | * map a request to scatterlist, return number of sg entries setup. Caller | |
653 | * must make sure sg can hold rq->nr_phys_segments entries | |
654 | @@ -288,10 +246,9 @@ static inline int ll_new_hw_segment(stru | |
655 | struct request *req, | |
656 | struct bio *bio) | |
657 | { | |
658 | - int nr_hw_segs = bio_hw_segments(q, bio); | |
659 | int nr_phys_segs = bio_phys_segments(q, bio); | |
660 | ||
661 | - if (req->nr_hw_segments + nr_hw_segs > q->max_hw_segments | |
662 | + if (req->nr_phys_segments + nr_phys_segs > q->max_hw_segments | |
663 | || req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) { | |
664 | req->cmd_flags |= REQ_NOMERGE; | |
665 | if (req == q->last_merge) | |
666 | @@ -303,7 +260,6 @@ static inline int ll_new_hw_segment(stru | |
667 | * This will form the start of a new hw segment. Bump both | |
668 | * counters. | |
669 | */ | |
670 | - req->nr_hw_segments += nr_hw_segs; | |
671 | req->nr_phys_segments += nr_phys_segs; | |
672 | return 1; | |
673 | } | |
674 | @@ -312,7 +268,6 @@ int ll_back_merge_fn(struct request_queu | |
675 | struct bio *bio) | |
676 | { | |
677 | unsigned short max_sectors; | |
678 | - int len; | |
679 | ||
680 | if (unlikely(blk_pc_request(req))) | |
681 | max_sectors = q->max_hw_sectors; | |
682 | @@ -329,20 +284,6 @@ int ll_back_merge_fn(struct request_queu | |
683 | blk_recount_segments(q, req->biotail); | |
684 | if (!bio_flagged(bio, BIO_SEG_VALID)) | |
685 | blk_recount_segments(q, bio); | |
686 | - len = req->biotail->bi_hw_back_size + bio->bi_hw_front_size; | |
687 | - if (!bio_has_data(bio) || | |
688 | - (BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail), __BVEC_START(bio)) | |
689 | - && !BIOVEC_VIRT_OVERSIZE(len))) { | |
690 | - int mergeable = ll_new_mergeable(q, req, bio); | |
691 | - | |
692 | - if (mergeable) { | |
693 | - if (req->nr_hw_segments == 1) | |
694 | - req->bio->bi_hw_front_size = len; | |
695 | - if (bio->bi_hw_segments == 1) | |
696 | - bio->bi_hw_back_size = len; | |
697 | - } | |
698 | - return mergeable; | |
699 | - } | |
700 | ||
701 | return ll_new_hw_segment(q, req, bio); | |
702 | } | |
703 | @@ -351,7 +292,6 @@ int ll_front_merge_fn(struct request_que | |
704 | struct bio *bio) | |
705 | { | |
706 | unsigned short max_sectors; | |
707 | - int len; | |
708 | ||
709 | if (unlikely(blk_pc_request(req))) | |
710 | max_sectors = q->max_hw_sectors; | |
711 | @@ -365,24 +305,10 @@ int ll_front_merge_fn(struct request_que | |
712 | q->last_merge = NULL; | |
713 | return 0; | |
714 | } | |
715 | - len = bio->bi_hw_back_size + req->bio->bi_hw_front_size; | |
716 | if (!bio_flagged(bio, BIO_SEG_VALID)) | |
717 | blk_recount_segments(q, bio); | |
718 | if (!bio_flagged(req->bio, BIO_SEG_VALID)) | |
719 | blk_recount_segments(q, req->bio); | |
720 | - if (!bio_has_data(bio) || | |
721 | - (BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(req->bio)) && | |
722 | - !BIOVEC_VIRT_OVERSIZE(len))) { | |
723 | - int mergeable = ll_new_mergeable(q, req, bio); | |
724 | - | |
725 | - if (mergeable) { | |
726 | - if (bio->bi_hw_segments == 1) | |
727 | - bio->bi_hw_front_size = len; | |
728 | - if (req->nr_hw_segments == 1) | |
729 | - req->biotail->bi_hw_back_size = len; | |
730 | - } | |
731 | - return mergeable; | |
732 | - } | |
733 | ||
734 | return ll_new_hw_segment(q, req, bio); | |
735 | } | |
736 | @@ -391,7 +317,6 @@ static int ll_merge_requests_fn(struct r | |
737 | struct request *next) | |
738 | { | |
739 | int total_phys_segments; | |
740 | - int total_hw_segments; | |
741 | unsigned int seg_size = | |
742 | req->biotail->bi_seg_back_size + next->bio->bi_seg_front_size; | |
743 | ||
744 | @@ -420,26 +345,11 @@ static int ll_merge_requests_fn(struct r | |
745 | if (total_phys_segments > q->max_phys_segments) | |
746 | return 0; | |
747 | ||
748 | - total_hw_segments = req->nr_hw_segments + next->nr_hw_segments; | |
749 | - if (blk_hw_contig_segment(q, req->biotail, next->bio)) { | |
750 | - int len = req->biotail->bi_hw_back_size + | |
751 | - next->bio->bi_hw_front_size; | |
752 | - /* | |
753 | - * propagate the combined length to the end of the requests | |
754 | - */ | |
755 | - if (req->nr_hw_segments == 1) | |
756 | - req->bio->bi_hw_front_size = len; | |
757 | - if (next->nr_hw_segments == 1) | |
758 | - next->biotail->bi_hw_back_size = len; | |
759 | - total_hw_segments--; | |
760 | - } | |
761 | - | |
762 | - if (total_hw_segments > q->max_hw_segments) | |
763 | + if (total_phys_segments > q->max_hw_segments) | |
764 | return 0; | |
765 | ||
766 | /* Merge is OK... */ | |
767 | req->nr_phys_segments = total_phys_segments; | |
768 | - req->nr_hw_segments = total_hw_segments; | |
769 | return 1; | |
770 | } | |
771 | ||
772 | --- a/block/blk-settings.c | |
773 | +++ b/block/blk-settings.c | |
774 | @@ -144,7 +144,7 @@ EXPORT_SYMBOL(blk_queue_make_request); | |
775 | * Different hardware can have different requirements as to what pages | |
776 | * it can do I/O directly to. A low level driver can call | |
777 | * blk_queue_bounce_limit to have lower memory pages allocated as bounce | |
778 | - * buffers for doing I/O to pages residing above @page. | |
779 | + * buffers for doing I/O to pages residing above @dma_addr. | |
780 | **/ | |
781 | void blk_queue_bounce_limit(struct request_queue *q, u64 dma_addr) | |
782 | { | |
783 | @@ -229,7 +229,7 @@ EXPORT_SYMBOL(blk_queue_max_phys_segment | |
784 | * Description: | |
785 | * Enables a low level driver to set an upper limit on the number of | |
786 | * hw data segments in a request. This would be the largest number of | |
787 | - * address/length pairs the host adapter can actually give as once | |
788 | + * address/length pairs the host adapter can actually give at once | |
789 | * to the device. | |
790 | **/ | |
791 | void blk_queue_max_hw_segments(struct request_queue *q, | |
792 | @@ -410,7 +410,7 @@ EXPORT_SYMBOL(blk_queue_segment_boundary | |
793 | * @mask: alignment mask | |
794 | * | |
795 | * description: | |
796 | - * set required memory and length aligment for direct dma transactions. | |
797 | + * set required memory and length alignment for direct dma transactions. | |
798 | * this is used when buiding direct io requests for the queue. | |
799 | * | |
800 | **/ | |
801 | @@ -426,7 +426,7 @@ EXPORT_SYMBOL(blk_queue_dma_alignment); | |
802 | * @mask: alignment mask | |
803 | * | |
804 | * description: | |
805 | - * update required memory and length aligment for direct dma transactions. | |
806 | + * update required memory and length alignment for direct dma transactions. | |
807 | * If the requested alignment is larger than the current alignment, then | |
808 | * the current queue alignment is updated to the new value, otherwise it | |
809 | * is left alone. The design of this is to allow multiple objects | |
810 | --- /dev/null | |
811 | +++ b/block/blk-softirq.c | |
812 | @@ -0,0 +1,103 @@ | |
813 | +/* | |
814 | + * Functions related to softirq rq completions | |
815 | + */ | |
816 | +#include <linux/kernel.h> | |
817 | +#include <linux/module.h> | |
818 | +#include <linux/init.h> | |
819 | +#include <linux/bio.h> | |
820 | +#include <linux/blkdev.h> | |
821 | +#include <linux/interrupt.h> | |
822 | +#include <linux/cpu.h> | |
823 | + | |
824 | +#include "blk.h" | |
825 | + | |
826 | +static DEFINE_PER_CPU(struct list_head, blk_cpu_done); | |
827 | + | |
828 | +static int __cpuinit blk_cpu_notify(struct notifier_block *self, | |
829 | + unsigned long action, void *hcpu) | |
830 | +{ | |
831 | + /* | |
832 | + * If a CPU goes away, splice its entries to the current CPU | |
833 | + * and trigger a run of the softirq | |
834 | + */ | |
835 | + if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { | |
836 | + int cpu = (unsigned long) hcpu; | |
837 | + | |
838 | + local_irq_disable(); | |
839 | + list_splice_init(&per_cpu(blk_cpu_done, cpu), | |
840 | + &__get_cpu_var(blk_cpu_done)); | |
841 | + raise_softirq_irqoff(BLOCK_SOFTIRQ); | |
842 | + local_irq_enable(); | |
843 | + } | |
844 | + | |
845 | + return NOTIFY_OK; | |
846 | +} | |
847 | + | |
848 | + | |
849 | +static struct notifier_block blk_cpu_notifier __cpuinitdata = { | |
850 | + .notifier_call = blk_cpu_notify, | |
851 | +}; | |
852 | + | |
853 | +/* | |
854 | + * splice the completion data to a local structure and hand off to | |
855 | + * process_completion_queue() to complete the requests | |
856 | + */ | |
857 | +static void blk_done_softirq(struct softirq_action *h) | |
858 | +{ | |
859 | + struct list_head *cpu_list, local_list; | |
860 | + | |
861 | + local_irq_disable(); | |
862 | + cpu_list = &__get_cpu_var(blk_cpu_done); | |
863 | + list_replace_init(cpu_list, &local_list); | |
864 | + local_irq_enable(); | |
865 | + | |
866 | + while (!list_empty(&local_list)) { | |
867 | + struct request *rq; | |
868 | + | |
869 | + rq = list_entry(local_list.next, struct request, donelist); | |
870 | + list_del_init(&rq->donelist); | |
871 | + rq->q->softirq_done_fn(rq); | |
872 | + } | |
873 | +} | |
874 | + | |
875 | +/** | |
876 | + * blk_complete_request - end I/O on a request | |
877 | + * @req: the request being processed | |
878 | + * | |
879 | + * Description: | |
880 | + * Ends all I/O on a request. It does not handle partial completions, | |
881 | + * unless the driver actually implements this in its completion callback | |
882 | + * through requeueing. The actual completion happens out-of-order, | |
883 | + * through a softirq handler. The user must have registered a completion | |
884 | + * callback through blk_queue_softirq_done(). | |
885 | + **/ | |
886 | + | |
887 | +void blk_complete_request(struct request *req) | |
888 | +{ | |
889 | + struct list_head *cpu_list; | |
890 | + unsigned long flags; | |
891 | + | |
892 | + BUG_ON(!req->q->softirq_done_fn); | |
893 | + | |
894 | + local_irq_save(flags); | |
895 | + | |
896 | + cpu_list = &__get_cpu_var(blk_cpu_done); | |
897 | + list_add_tail(&req->donelist, cpu_list); | |
898 | + raise_softirq_irqoff(BLOCK_SOFTIRQ); | |
899 | + | |
900 | + local_irq_restore(flags); | |
901 | +} | |
902 | +EXPORT_SYMBOL(blk_complete_request); | |
903 | + | |
904 | +int __init blk_softirq_init(void) | |
905 | +{ | |
906 | + int i; | |
907 | + | |
908 | + for_each_possible_cpu(i) | |
909 | + INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i)); | |
910 | + | |
911 | + open_softirq(BLOCK_SOFTIRQ, blk_done_softirq); | |
912 | + register_hotcpu_notifier(&blk_cpu_notifier); | |
913 | + return 0; | |
914 | +} | |
915 | +subsys_initcall(blk_softirq_init); | |
916 | --- a/block/blk-tag.c | |
917 | +++ b/block/blk-tag.c | |
918 | @@ -29,7 +29,7 @@ EXPORT_SYMBOL(blk_queue_find_tag); | |
919 | * __blk_free_tags - release a given set of tag maintenance info | |
920 | * @bqt: the tag map to free | |
921 | * | |
922 | - * Tries to free the specified @bqt@. Returns true if it was | |
923 | + * Tries to free the specified @bqt. Returns true if it was | |
924 | * actually freed and false if there are still references using it | |
925 | */ | |
926 | static int __blk_free_tags(struct blk_queue_tag *bqt) | |
927 | @@ -78,7 +78,7 @@ void __blk_queue_free_tags(struct reques | |
928 | * blk_free_tags - release a given set of tag maintenance info | |
929 | * @bqt: the tag map to free | |
930 | * | |
931 | - * For externally managed @bqt@ frees the map. Callers of this | |
932 | + * For externally managed @bqt frees the map. Callers of this | |
933 | * function must guarantee to have released all the queues that | |
934 | * might have been using this tag map. | |
935 | */ | |
936 | @@ -94,7 +94,7 @@ EXPORT_SYMBOL(blk_free_tags); | |
937 | * @q: the request queue for the device | |
938 | * | |
939 | * Notes: | |
940 | - * This is used to disabled tagged queuing to a device, yet leave | |
941 | + * This is used to disable tagged queuing to a device, yet leave | |
942 | * queue in function. | |
943 | **/ | |
944 | void blk_queue_free_tags(struct request_queue *q) | |
945 | @@ -271,7 +271,7 @@ EXPORT_SYMBOL(blk_queue_resize_tags); | |
946 | * @rq: the request that has completed | |
947 | * | |
948 | * Description: | |
949 | - * Typically called when end_that_request_first() returns 0, meaning | |
950 | + * Typically called when end_that_request_first() returns %0, meaning | |
951 | * all transfers have been done for a request. It's important to call | |
952 | * this function before end_that_request_last(), as that will put the | |
953 | * request back on the free list thus corrupting the internal tag list. | |
954 | --- a/block/cfq-iosched.c | |
955 | +++ b/block/cfq-iosched.c | |
956 | @@ -39,6 +39,7 @@ static int cfq_slice_idle = HZ / 125; | |
957 | #define CFQ_MIN_TT (2) | |
958 | ||
959 | #define CFQ_SLICE_SCALE (5) | |
960 | +#define CFQ_HW_QUEUE_MIN (5) | |
961 | ||
962 | #define RQ_CIC(rq) \ | |
963 | ((struct cfq_io_context *) (rq)->elevator_private) | |
964 | @@ -86,7 +87,14 @@ struct cfq_data { | |
965 | ||
966 | int rq_in_driver; | |
967 | int sync_flight; | |
968 | + | |
969 | + /* | |
970 | + * queue-depth detection | |
971 | + */ | |
972 | + int rq_queued; | |
973 | int hw_tag; | |
974 | + int hw_tag_samples; | |
975 | + int rq_in_driver_peak; | |
976 | ||
977 | /* | |
978 | * idle window management | |
979 | @@ -654,15 +662,6 @@ static void cfq_activate_request(struct | |
980 | cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "activate rq, drv=%d", | |
981 | cfqd->rq_in_driver); | |
982 | ||
983 | - /* | |
984 | - * If the depth is larger 1, it really could be queueing. But lets | |
985 | - * make the mark a little higher - idling could still be good for | |
986 | - * low queueing, and a low queueing number could also just indicate | |
987 | - * a SCSI mid layer like behaviour where limit+1 is often seen. | |
988 | - */ | |
989 | - if (!cfqd->hw_tag && cfqd->rq_in_driver > 4) | |
990 | - cfqd->hw_tag = 1; | |
991 | - | |
992 | cfqd->last_position = rq->hard_sector + rq->hard_nr_sectors; | |
993 | } | |
994 | ||
995 | @@ -686,6 +685,7 @@ static void cfq_remove_request(struct re | |
996 | list_del_init(&rq->queuelist); | |
997 | cfq_del_rq_rb(rq); | |
998 | ||
999 | + cfqq->cfqd->rq_queued--; | |
1000 | if (rq_is_meta(rq)) { | |
1001 | WARN_ON(!cfqq->meta_pending); | |
1002 | cfqq->meta_pending--; | |
1003 | @@ -1833,6 +1833,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, s | |
1004 | { | |
1005 | struct cfq_io_context *cic = RQ_CIC(rq); | |
1006 | ||
1007 | + cfqd->rq_queued++; | |
1008 | if (rq_is_meta(rq)) | |
1009 | cfqq->meta_pending++; | |
1010 | ||
1011 | @@ -1880,6 +1881,31 @@ static void cfq_insert_request(struct re | |
1012 | cfq_rq_enqueued(cfqd, cfqq, rq); | |
1013 | } | |
1014 | ||
1015 | +/* | |
1016 | + * Update hw_tag based on peak queue depth over 50 samples under | |
1017 | + * sufficient load. | |
1018 | + */ | |
1019 | +static void cfq_update_hw_tag(struct cfq_data *cfqd) | |
1020 | +{ | |
1021 | + if (cfqd->rq_in_driver > cfqd->rq_in_driver_peak) | |
1022 | + cfqd->rq_in_driver_peak = cfqd->rq_in_driver; | |
1023 | + | |
1024 | + if (cfqd->rq_queued <= CFQ_HW_QUEUE_MIN && | |
1025 | + cfqd->rq_in_driver <= CFQ_HW_QUEUE_MIN) | |
1026 | + return; | |
1027 | + | |
1028 | + if (cfqd->hw_tag_samples++ < 50) | |
1029 | + return; | |
1030 | + | |
1031 | + if (cfqd->rq_in_driver_peak >= CFQ_HW_QUEUE_MIN) | |
1032 | + cfqd->hw_tag = 1; | |
1033 | + else | |
1034 | + cfqd->hw_tag = 0; | |
1035 | + | |
1036 | + cfqd->hw_tag_samples = 0; | |
1037 | + cfqd->rq_in_driver_peak = 0; | |
1038 | +} | |
1039 | + | |
1040 | static void cfq_completed_request(struct request_queue *q, struct request *rq) | |
1041 | { | |
1042 | struct cfq_queue *cfqq = RQ_CFQQ(rq); | |
1043 | @@ -1890,6 +1916,8 @@ static void cfq_completed_request(struct | |
1044 | now = jiffies; | |
1045 | cfq_log_cfqq(cfqd, cfqq, "complete"); | |
1046 | ||
1047 | + cfq_update_hw_tag(cfqd); | |
1048 | + | |
1049 | WARN_ON(!cfqd->rq_in_driver); | |
1050 | WARN_ON(!cfqq->dispatched); | |
1051 | cfqd->rq_in_driver--; | |
1052 | @@ -2200,6 +2228,7 @@ static void *cfq_init_queue(struct reque | |
1053 | cfqd->cfq_slice[1] = cfq_slice_sync; | |
1054 | cfqd->cfq_slice_async_rq = cfq_slice_async_rq; | |
1055 | cfqd->cfq_slice_idle = cfq_slice_idle; | |
1056 | + cfqd->hw_tag = 1; | |
1057 | ||
1058 | return cfqd; | |
1059 | } | |
1060 | --- a/block/deadline-iosched.c | |
1061 | +++ b/block/deadline-iosched.c | |
1062 | @@ -33,7 +33,7 @@ struct deadline_data { | |
1063 | */ | |
1064 | struct rb_root sort_list[2]; | |
1065 | struct list_head fifo_list[2]; | |
1066 | - | |
1067 | + | |
1068 | /* | |
1069 | * next in sort order. read, write or both are NULL | |
1070 | */ | |
1071 | @@ -53,7 +53,11 @@ struct deadline_data { | |
1072 | ||
1073 | static void deadline_move_request(struct deadline_data *, struct request *); | |
1074 | ||
1075 | -#define RQ_RB_ROOT(dd, rq) (&(dd)->sort_list[rq_data_dir((rq))]) | |
1076 | +static inline struct rb_root * | |
1077 | +deadline_rb_root(struct deadline_data *dd, struct request *rq) | |
1078 | +{ | |
1079 | + return &dd->sort_list[rq_data_dir(rq)]; | |
1080 | +} | |
1081 | ||
1082 | /* | |
1083 | * get the request after `rq' in sector-sorted order | |
1084 | @@ -72,15 +76,11 @@ deadline_latter_request(struct request * | |
1085 | static void | |
1086 | deadline_add_rq_rb(struct deadline_data *dd, struct request *rq) | |
1087 | { | |
1088 | - struct rb_root *root = RQ_RB_ROOT(dd, rq); | |
1089 | + struct rb_root *root = deadline_rb_root(dd, rq); | |
1090 | struct request *__alias; | |
1091 | ||
1092 | -retry: | |
1093 | - __alias = elv_rb_add(root, rq); | |
1094 | - if (unlikely(__alias)) { | |
1095 | + while (unlikely(__alias = elv_rb_add(root, rq))) | |
1096 | deadline_move_request(dd, __alias); | |
1097 | - goto retry; | |
1098 | - } | |
1099 | } | |
1100 | ||
1101 | static inline void | |
1102 | @@ -91,7 +91,7 @@ deadline_del_rq_rb(struct deadline_data | |
1103 | if (dd->next_rq[data_dir] == rq) | |
1104 | dd->next_rq[data_dir] = deadline_latter_request(rq); | |
1105 | ||
1106 | - elv_rb_del(RQ_RB_ROOT(dd, rq), rq); | |
1107 | + elv_rb_del(deadline_rb_root(dd, rq), rq); | |
1108 | } | |
1109 | ||
1110 | /* | |
1111 | @@ -106,7 +106,7 @@ deadline_add_request(struct request_queu | |
1112 | deadline_add_rq_rb(dd, rq); | |
1113 | ||
1114 | /* | |
1115 | - * set expire time (only used for reads) and add to fifo list | |
1116 | + * set expire time and add to fifo list | |
1117 | */ | |
1118 | rq_set_fifo_time(rq, jiffies + dd->fifo_expire[data_dir]); | |
1119 | list_add_tail(&rq->queuelist, &dd->fifo_list[data_dir]); | |
1120 | @@ -162,7 +162,7 @@ static void deadline_merged_request(stru | |
1121 | * if the merge was a front merge, we need to reposition request | |
1122 | */ | |
1123 | if (type == ELEVATOR_FRONT_MERGE) { | |
1124 | - elv_rb_del(RQ_RB_ROOT(dd, req), req); | |
1125 | + elv_rb_del(deadline_rb_root(dd, req), req); | |
1126 | deadline_add_rq_rb(dd, req); | |
1127 | } | |
1128 | } | |
1129 | @@ -212,7 +212,7 @@ deadline_move_request(struct deadline_da | |
1130 | dd->next_rq[WRITE] = NULL; | |
1131 | dd->next_rq[data_dir] = deadline_latter_request(rq); | |
1132 | ||
1133 | - dd->last_sector = rq->sector + rq->nr_sectors; | |
1134 | + dd->last_sector = rq_end_sector(rq); | |
1135 | ||
1136 | /* | |
1137 | * take it off the sort and fifo list, move | |
1138 | @@ -222,7 +222,7 @@ deadline_move_request(struct deadline_da | |
1139 | } | |
1140 | ||
1141 | /* | |
1142 | - * deadline_check_fifo returns 0 if there are no expired reads on the fifo, | |
1143 | + * deadline_check_fifo returns 0 if there are no expired requests on the fifo, | |
1144 | * 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir]) | |
1145 | */ | |
1146 | static inline int deadline_check_fifo(struct deadline_data *dd, int ddir) | |
1147 | @@ -258,17 +258,9 @@ static int deadline_dispatch_requests(st | |
1148 | else | |
1149 | rq = dd->next_rq[READ]; | |
1150 | ||
1151 | - if (rq) { | |
1152 | - /* we have a "next request" */ | |
1153 | - | |
1154 | - if (dd->last_sector != rq->sector) | |
1155 | - /* end the batch on a non sequential request */ | |
1156 | - dd->batching += dd->fifo_batch; | |
1157 | - | |
1158 | - if (dd->batching < dd->fifo_batch) | |
1159 | - /* we are still entitled to batch */ | |
1160 | - goto dispatch_request; | |
1161 | - } | |
1162 | + if (rq && dd->batching < dd->fifo_batch) | |
1163 | + /* we have a next request are still entitled to batch */ | |
1164 | + goto dispatch_request; | |
1165 | ||
1166 | /* | |
1167 | * at this point we are not running a batch. select the appropriate | |
1168 | --- a/block/elevator.c | |
1169 | +++ b/block/elevator.c | |
1170 | @@ -34,8 +34,7 @@ | |
1171 | #include <linux/delay.h> | |
1172 | #include <linux/blktrace_api.h> | |
1173 | #include <linux/hash.h> | |
1174 | - | |
1175 | -#include <asm/uaccess.h> | |
1176 | +#include <linux/uaccess.h> | |
1177 | ||
1178 | static DEFINE_SPINLOCK(elv_list_lock); | |
1179 | static LIST_HEAD(elv_list); | |
1180 | @@ -790,7 +789,6 @@ struct request *elv_next_request(struct | |
1181 | * device can handle | |
1182 | */ | |
1183 | rq->nr_phys_segments++; | |
1184 | - rq->nr_hw_segments++; | |
1185 | } | |
1186 | ||
1187 | if (!q->prep_rq_fn) | |
1188 | @@ -813,7 +811,6 @@ struct request *elv_next_request(struct | |
1189 | * so that we don't add it again | |
1190 | */ | |
1191 | --rq->nr_phys_segments; | |
1192 | - --rq->nr_hw_segments; | |
1193 | } | |
1194 | ||
1195 | rq = NULL; | |
1196 | --- a/block/genhd.c | |
1197 | +++ b/block/genhd.c | |
1198 | @@ -211,10 +211,11 @@ void unlink_gendisk(struct gendisk *disk | |
1199 | ||
1200 | /** | |
1201 | * get_gendisk - get partitioning information for a given device | |
1202 | - * @dev: device to get partitioning information for | |
1203 | + * @devt: device to get partitioning information for | |
1204 | + * @part: returned partition index | |
1205 | * | |
1206 | * This function gets the structure containing partitioning | |
1207 | - * information for the given device @dev. | |
1208 | + * information for the given device @devt. | |
1209 | */ | |
1210 | struct gendisk *get_gendisk(dev_t devt, int *part) | |
1211 | { | |
1212 | --- a/block/Makefile | |
1213 | +++ b/block/Makefile | |
1214 | @@ -4,8 +4,8 @@ | |
1215 | ||
1216 | obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \ | |
1217 | blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \ | |
1218 | - blk-exec.o blk-merge.o ioctl.o genhd.o scsi_ioctl.o \ | |
1219 | - cmd-filter.o | |
1220 | + blk-exec.o blk-merge.o blk-softirq.o ioctl.o genhd.o \ | |
1221 | + scsi_ioctl.o cmd-filter.o | |
1222 | ||
1223 | obj-$(CONFIG_BLK_DEV_BSG) += bsg.o | |
1224 | obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o | |
1225 | --- a/Documentation/block/deadline-iosched.txt | |
1226 | +++ b/Documentation/block/deadline-iosched.txt | |
1227 | @@ -30,12 +30,18 @@ write_expire (in ms) | |
1228 | Similar to read_expire mentioned above, but for writes. | |
1229 | ||
1230 | ||
1231 | -fifo_batch | |
1232 | +fifo_batch (number of requests) | |
1233 | ---------- | |
1234 | ||
1235 | -When a read request expires its deadline, we must move some requests from | |
1236 | -the sorted io scheduler list to the block device dispatch queue. fifo_batch | |
1237 | -controls how many requests we move. | |
1238 | +Requests are grouped into ``batches'' of a particular data direction (read or | |
1239 | +write) which are serviced in increasing sector order. To limit extra seeking, | |
1240 | +deadline expiries are only checked between batches. fifo_batch controls the | |
1241 | +maximum number of requests per batch. | |
1242 | + | |
1243 | +This parameter tunes the balance between per-request latency and aggregate | |
1244 | +throughput. When low latency is the primary concern, smaller is better (where | |
1245 | +a value of 1 yields first-come first-served behaviour). Increasing fifo_batch | |
1246 | +generally improves throughput, at the cost of latency variation. | |
1247 | ||
1248 | ||
1249 | writes_starved (number of dispatches) | |
1250 | --- a/Documentation/DocBook/kernel-api.tmpl | |
1251 | +++ b/Documentation/DocBook/kernel-api.tmpl | |
1252 | @@ -364,6 +364,10 @@ X!Edrivers/pnp/system.c | |
1253 | !Eblock/blk-barrier.c | |
1254 | !Eblock/blk-tag.c | |
1255 | !Iblock/blk-tag.c | |
1256 | +!Eblock/blk-integrity.c | |
1257 | +!Iblock/blktrace.c | |
1258 | +!Iblock/genhd.c | |
1259 | +!Eblock/genhd.c | |
1260 | </chapter> | |
1261 | ||
1262 | <chapter id="chrdev"> | |
1263 | --- a/drivers/block/ps3disk.c | |
1264 | +++ b/drivers/block/ps3disk.c | |
1265 | @@ -199,7 +199,8 @@ static void ps3disk_do_request(struct ps | |
1266 | if (blk_fs_request(req)) { | |
1267 | if (ps3disk_submit_request_sg(dev, req)) | |
1268 | break; | |
1269 | - } else if (req->cmd_type == REQ_TYPE_FLUSH) { | |
1270 | + } else if (req->cmd_type == REQ_TYPE_LINUX_BLOCK && | |
1271 | + req->cmd[0] == REQ_LB_OP_FLUSH) { | |
1272 | if (ps3disk_submit_flush_request(dev, req)) | |
1273 | break; | |
1274 | } else { | |
1275 | @@ -257,7 +258,8 @@ static irqreturn_t ps3disk_interrupt(int | |
1276 | return IRQ_HANDLED; | |
1277 | } | |
1278 | ||
1279 | - if (req->cmd_type == REQ_TYPE_FLUSH) { | |
1280 | + if (req->cmd_type == REQ_TYPE_LINUX_BLOCK && | |
1281 | + req->cmd[0] == REQ_LB_OP_FLUSH) { | |
1282 | read = 0; | |
1283 | num_sectors = req->hard_cur_sectors; | |
1284 | op = "flush"; | |
1285 | @@ -405,7 +407,8 @@ static void ps3disk_prepare_flush(struct | |
1286 | ||
1287 | dev_dbg(&dev->sbd.core, "%s:%u\n", __func__, __LINE__); | |
1288 | ||
1289 | - req->cmd_type = REQ_TYPE_FLUSH; | |
1290 | + req->cmd_type = REQ_TYPE_LINUX_BLOCK; | |
1291 | + req->cmd[0] = REQ_LB_OP_FLUSH; | |
1292 | } | |
1293 | ||
1294 | static unsigned long ps3disk_mask; | |
1295 | --- a/drivers/block/virtio_blk.c | |
1296 | +++ b/drivers/block/virtio_blk.c | |
1297 | @@ -84,11 +84,11 @@ static bool do_req(struct request_queue | |
1298 | if (blk_fs_request(vbr->req)) { | |
1299 | vbr->out_hdr.type = 0; | |
1300 | vbr->out_hdr.sector = vbr->req->sector; | |
1301 | - vbr->out_hdr.ioprio = vbr->req->ioprio; | |
1302 | + vbr->out_hdr.ioprio = req_get_ioprio(vbr->req); | |
1303 | } else if (blk_pc_request(vbr->req)) { | |
1304 | vbr->out_hdr.type = VIRTIO_BLK_T_SCSI_CMD; | |
1305 | vbr->out_hdr.sector = 0; | |
1306 | - vbr->out_hdr.ioprio = vbr->req->ioprio; | |
1307 | + vbr->out_hdr.ioprio = req_get_ioprio(vbr->req); | |
1308 | } else { | |
1309 | /* We don't put anything else in the queue. */ | |
1310 | BUG(); | |
1311 | --- a/drivers/md/raid10.c | |
1312 | +++ b/drivers/md/raid10.c | |
1313 | @@ -1346,9 +1346,6 @@ static void sync_request_write(mddev_t * | |
1314 | tbio->bi_size = r10_bio->sectors << 9; | |
1315 | tbio->bi_idx = 0; | |
1316 | tbio->bi_phys_segments = 0; | |
1317 | - tbio->bi_hw_segments = 0; | |
1318 | - tbio->bi_hw_front_size = 0; | |
1319 | - tbio->bi_hw_back_size = 0; | |
1320 | tbio->bi_flags &= ~(BIO_POOL_MASK - 1); | |
1321 | tbio->bi_flags |= 1 << BIO_UPTODATE; | |
1322 | tbio->bi_next = NULL; | |
1323 | @@ -1948,7 +1945,6 @@ static sector_t sync_request(mddev_t *md | |
1324 | bio->bi_vcnt = 0; | |
1325 | bio->bi_idx = 0; | |
1326 | bio->bi_phys_segments = 0; | |
1327 | - bio->bi_hw_segments = 0; | |
1328 | bio->bi_size = 0; | |
1329 | } | |
1330 | ||
1331 | --- a/drivers/md/raid1.c | |
1332 | +++ b/drivers/md/raid1.c | |
1333 | @@ -1303,9 +1303,6 @@ static void sync_request_write(mddev_t * | |
1334 | sbio->bi_size = r1_bio->sectors << 9; | |
1335 | sbio->bi_idx = 0; | |
1336 | sbio->bi_phys_segments = 0; | |
1337 | - sbio->bi_hw_segments = 0; | |
1338 | - sbio->bi_hw_front_size = 0; | |
1339 | - sbio->bi_hw_back_size = 0; | |
1340 | sbio->bi_flags &= ~(BIO_POOL_MASK - 1); | |
1341 | sbio->bi_flags |= 1 << BIO_UPTODATE; | |
1342 | sbio->bi_next = NULL; | |
1343 | @@ -1791,7 +1788,6 @@ static sector_t sync_request(mddev_t *md | |
1344 | bio->bi_vcnt = 0; | |
1345 | bio->bi_idx = 0; | |
1346 | bio->bi_phys_segments = 0; | |
1347 | - bio->bi_hw_segments = 0; | |
1348 | bio->bi_size = 0; | |
1349 | bio->bi_end_io = NULL; | |
1350 | bio->bi_private = NULL; | |
1351 | --- a/drivers/md/raid5.c | |
1352 | +++ b/drivers/md/raid5.c | |
1353 | @@ -101,6 +101,40 @@ | |
1354 | const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); | |
1355 | #endif | |
1356 | ||
1357 | +/* | |
1358 | + * We maintain a biased count of active stripes in the bottom 16 bits of | |
1359 | + * bi_phys_segments, and a count of processed stripes in the upper 16 bits | |
1360 | + */ | |
1361 | +static inline int raid5_bi_phys_segments(struct bio *bio) | |
1362 | +{ | |
1363 | + return bio->bi_phys_segments & 0xffff; | |
1364 | +} | |
1365 | + | |
1366 | +static inline int raid5_bi_hw_segments(struct bio *bio) | |
1367 | +{ | |
1368 | + return (bio->bi_phys_segments >> 16) & 0xffff; | |
1369 | +} | |
1370 | + | |
1371 | +static inline int raid5_dec_bi_phys_segments(struct bio *bio) | |
1372 | +{ | |
1373 | + --bio->bi_phys_segments; | |
1374 | + return raid5_bi_phys_segments(bio); | |
1375 | +} | |
1376 | + | |
1377 | +static inline int raid5_dec_bi_hw_segments(struct bio *bio) | |
1378 | +{ | |
1379 | + unsigned short val = raid5_bi_hw_segments(bio); | |
1380 | + | |
1381 | + --val; | |
1382 | + bio->bi_phys_segments = (val << 16) | raid5_bi_phys_segments(bio); | |
1383 | + return val; | |
1384 | +} | |
1385 | + | |
1386 | +static inline void raid5_set_bi_hw_segments(struct bio *bio, unsigned int cnt) | |
1387 | +{ | |
1388 | + bio->bi_phys_segments = raid5_bi_phys_segments(bio) || (cnt << 16); | |
1389 | +} | |
1390 | + | |
1391 | static inline int raid6_next_disk(int disk, int raid_disks) | |
1392 | { | |
1393 | disk++; | |
1394 | @@ -507,7 +541,7 @@ static void ops_complete_biofill(void *s | |
1395 | while (rbi && rbi->bi_sector < | |
1396 | dev->sector + STRIPE_SECTORS) { | |
1397 | rbi2 = r5_next_bio(rbi, dev->sector); | |
1398 | - if (--rbi->bi_phys_segments == 0) { | |
1399 | + if (!raid5_dec_bi_phys_segments(rbi)) { | |
1400 | rbi->bi_next = return_bi; | |
1401 | return_bi = rbi; | |
1402 | } | |
1403 | @@ -1725,7 +1759,7 @@ static int add_stripe_bio(struct stripe_ | |
1404 | if (*bip) | |
1405 | bi->bi_next = *bip; | |
1406 | *bip = bi; | |
1407 | - bi->bi_phys_segments ++; | |
1408 | + bi->bi_phys_segments++; | |
1409 | spin_unlock_irq(&conf->device_lock); | |
1410 | spin_unlock(&sh->lock); | |
1411 | ||
1412 | @@ -1819,7 +1853,7 @@ handle_failed_stripe(raid5_conf_t *conf, | |
1413 | sh->dev[i].sector + STRIPE_SECTORS) { | |
1414 | struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector); | |
1415 | clear_bit(BIO_UPTODATE, &bi->bi_flags); | |
1416 | - if (--bi->bi_phys_segments == 0) { | |
1417 | + if (!raid5_dec_bi_phys_segments(bi)) { | |
1418 | md_write_end(conf->mddev); | |
1419 | bi->bi_next = *return_bi; | |
1420 | *return_bi = bi; | |
1421 | @@ -1834,7 +1868,7 @@ handle_failed_stripe(raid5_conf_t *conf, | |
1422 | sh->dev[i].sector + STRIPE_SECTORS) { | |
1423 | struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector); | |
1424 | clear_bit(BIO_UPTODATE, &bi->bi_flags); | |
1425 | - if (--bi->bi_phys_segments == 0) { | |
1426 | + if (!raid5_dec_bi_phys_segments(bi)) { | |
1427 | md_write_end(conf->mddev); | |
1428 | bi->bi_next = *return_bi; | |
1429 | *return_bi = bi; | |
1430 | @@ -1858,7 +1892,7 @@ handle_failed_stripe(raid5_conf_t *conf, | |
1431 | struct bio *nextbi = | |
1432 | r5_next_bio(bi, sh->dev[i].sector); | |
1433 | clear_bit(BIO_UPTODATE, &bi->bi_flags); | |
1434 | - if (--bi->bi_phys_segments == 0) { | |
1435 | + if (!raid5_dec_bi_phys_segments(bi)) { | |
1436 | bi->bi_next = *return_bi; | |
1437 | *return_bi = bi; | |
1438 | } | |
1439 | @@ -2033,7 +2067,7 @@ static void handle_stripe_clean_event(ra | |
1440 | while (wbi && wbi->bi_sector < | |
1441 | dev->sector + STRIPE_SECTORS) { | |
1442 | wbi2 = r5_next_bio(wbi, dev->sector); | |
1443 | - if (--wbi->bi_phys_segments == 0) { | |
1444 | + if (!raid5_dec_bi_phys_segments(wbi)) { | |
1445 | md_write_end(conf->mddev); | |
1446 | wbi->bi_next = *return_bi; | |
1447 | *return_bi = wbi; | |
1448 | @@ -2814,7 +2848,7 @@ static bool handle_stripe6(struct stripe | |
1449 | copy_data(0, rbi, dev->page, dev->sector); | |
1450 | rbi2 = r5_next_bio(rbi, dev->sector); | |
1451 | spin_lock_irq(&conf->device_lock); | |
1452 | - if (--rbi->bi_phys_segments == 0) { | |
1453 | + if (!raid5_dec_bi_phys_segments(rbi)) { | |
1454 | rbi->bi_next = return_bi; | |
1455 | return_bi = rbi; | |
1456 | } | |
1457 | @@ -3155,8 +3189,11 @@ static struct bio *remove_bio_from_retry | |
1458 | if(bi) { | |
1459 | conf->retry_read_aligned_list = bi->bi_next; | |
1460 | bi->bi_next = NULL; | |
1461 | + /* | |
1462 | + * this sets the active strip count to 1 and the processed | |
1463 | + * strip count to zero (upper 8 bits) | |
1464 | + */ | |
1465 | bi->bi_phys_segments = 1; /* biased count of active stripes */ | |
1466 | - bi->bi_hw_segments = 0; /* count of processed stripes */ | |
1467 | } | |
1468 | ||
1469 | return bi; | |
1470 | @@ -3206,8 +3243,7 @@ static int bio_fits_rdev(struct bio *bi) | |
1471 | if ((bi->bi_size>>9) > q->max_sectors) | |
1472 | return 0; | |
1473 | blk_recount_segments(q, bi); | |
1474 | - if (bi->bi_phys_segments > q->max_phys_segments || | |
1475 | - bi->bi_hw_segments > q->max_hw_segments) | |
1476 | + if (bi->bi_phys_segments > q->max_phys_segments) | |
1477 | return 0; | |
1478 | ||
1479 | if (q->merge_bvec_fn) | |
1480 | @@ -3468,7 +3504,7 @@ static int make_request(struct request_q | |
1481 | ||
1482 | } | |
1483 | spin_lock_irq(&conf->device_lock); | |
1484 | - remaining = --bi->bi_phys_segments; | |
1485 | + remaining = raid5_dec_bi_phys_segments(bi); | |
1486 | spin_unlock_irq(&conf->device_lock); | |
1487 | if (remaining == 0) { | |
1488 | ||
1489 | @@ -3752,7 +3788,7 @@ static int retry_aligned_read(raid5_con | |
1490 | sector += STRIPE_SECTORS, | |
1491 | scnt++) { | |
1492 | ||
1493 | - if (scnt < raid_bio->bi_hw_segments) | |
1494 | + if (scnt < raid5_bi_hw_segments(raid_bio)) | |
1495 | /* already done this stripe */ | |
1496 | continue; | |
1497 | ||
1498 | @@ -3760,7 +3796,7 @@ static int retry_aligned_read(raid5_con | |
1499 | ||
1500 | if (!sh) { | |
1501 | /* failed to get a stripe - must wait */ | |
1502 | - raid_bio->bi_hw_segments = scnt; | |
1503 | + raid5_set_bi_hw_segments(raid_bio, scnt); | |
1504 | conf->retry_read_aligned = raid_bio; | |
1505 | return handled; | |
1506 | } | |
1507 | @@ -3768,7 +3804,7 @@ static int retry_aligned_read(raid5_con | |
1508 | set_bit(R5_ReadError, &sh->dev[dd_idx].flags); | |
1509 | if (!add_stripe_bio(sh, raid_bio, dd_idx, 0)) { | |
1510 | release_stripe(sh); | |
1511 | - raid_bio->bi_hw_segments = scnt; | |
1512 | + raid5_set_bi_hw_segments(raid_bio, scnt); | |
1513 | conf->retry_read_aligned = raid_bio; | |
1514 | return handled; | |
1515 | } | |
1516 | @@ -3778,7 +3814,7 @@ static int retry_aligned_read(raid5_con | |
1517 | handled++; | |
1518 | } | |
1519 | spin_lock_irq(&conf->device_lock); | |
1520 | - remaining = --raid_bio->bi_phys_segments; | |
1521 | + remaining = raid5_dec_bi_phys_segments(raid_bio); | |
1522 | spin_unlock_irq(&conf->device_lock); | |
1523 | if (remaining == 0) | |
1524 | bio_endio(raid_bio, 0); | |
1525 | --- a/fs/bio.c | |
1526 | +++ b/fs/bio.c | |
1527 | @@ -208,14 +208,6 @@ inline int bio_phys_segments(struct requ | |
1528 | return bio->bi_phys_segments; | |
1529 | } | |
1530 | ||
1531 | -inline int bio_hw_segments(struct request_queue *q, struct bio *bio) | |
1532 | -{ | |
1533 | - if (unlikely(!bio_flagged(bio, BIO_SEG_VALID))) | |
1534 | - blk_recount_segments(q, bio); | |
1535 | - | |
1536 | - return bio->bi_hw_segments; | |
1537 | -} | |
1538 | - | |
1539 | /** | |
1540 | * __bio_clone - clone a bio | |
1541 | * @bio: destination bio | |
1542 | @@ -350,8 +342,7 @@ static int __bio_add_page(struct request | |
1543 | */ | |
1544 | ||
1545 | while (bio->bi_phys_segments >= q->max_phys_segments | |
1546 | - || bio->bi_hw_segments >= q->max_hw_segments | |
1547 | - || BIOVEC_VIRT_OVERSIZE(bio->bi_size)) { | |
1548 | + || bio->bi_phys_segments >= q->max_hw_segments) { | |
1549 | ||
1550 | if (retried_segments) | |
1551 | return 0; | |
1552 | @@ -395,13 +386,11 @@ static int __bio_add_page(struct request | |
1553 | } | |
1554 | ||
1555 | /* If we may be able to merge these biovecs, force a recount */ | |
1556 | - if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec) || | |
1557 | - BIOVEC_VIRT_MERGEABLE(bvec-1, bvec))) | |
1558 | + if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec))) | |
1559 | bio->bi_flags &= ~(1 << BIO_SEG_VALID); | |
1560 | ||
1561 | bio->bi_vcnt++; | |
1562 | bio->bi_phys_segments++; | |
1563 | - bio->bi_hw_segments++; | |
1564 | done: | |
1565 | bio->bi_size += len; | |
1566 | return len; | |
1567 | @@ -1393,7 +1382,6 @@ EXPORT_SYMBOL(bio_init); | |
1568 | EXPORT_SYMBOL(__bio_clone); | |
1569 | EXPORT_SYMBOL(bio_clone); | |
1570 | EXPORT_SYMBOL(bio_phys_segments); | |
1571 | -EXPORT_SYMBOL(bio_hw_segments); | |
1572 | EXPORT_SYMBOL(bio_add_page); | |
1573 | EXPORT_SYMBOL(bio_add_pc_page); | |
1574 | EXPORT_SYMBOL(bio_get_nr_vecs); | |
1575 | --- a/include/linux/bio.h | |
1576 | +++ b/include/linux/bio.h | |
1577 | @@ -26,21 +26,8 @@ | |
1578 | ||
1579 | #ifdef CONFIG_BLOCK | |
1580 | ||
1581 | -/* Platforms may set this to teach the BIO layer about IOMMU hardware. */ | |
1582 | #include <asm/io.h> | |
1583 | ||
1584 | -#if defined(BIO_VMERGE_MAX_SIZE) && defined(BIO_VMERGE_BOUNDARY) | |
1585 | -#define BIOVEC_VIRT_START_SIZE(x) (bvec_to_phys(x) & (BIO_VMERGE_BOUNDARY - 1)) | |
1586 | -#define BIOVEC_VIRT_OVERSIZE(x) ((x) > BIO_VMERGE_MAX_SIZE) | |
1587 | -#else | |
1588 | -#define BIOVEC_VIRT_START_SIZE(x) 0 | |
1589 | -#define BIOVEC_VIRT_OVERSIZE(x) 0 | |
1590 | -#endif | |
1591 | - | |
1592 | -#ifndef BIO_VMERGE_BOUNDARY | |
1593 | -#define BIO_VMERGE_BOUNDARY 0 | |
1594 | -#endif | |
1595 | - | |
1596 | #define BIO_DEBUG | |
1597 | ||
1598 | #ifdef BIO_DEBUG | |
1599 | @@ -88,12 +75,7 @@ struct bio { | |
1600 | /* Number of segments in this BIO after | |
1601 | * physical address coalescing is performed. | |
1602 | */ | |
1603 | - unsigned short bi_phys_segments; | |
1604 | - | |
1605 | - /* Number of segments after physical and DMA remapping | |
1606 | - * hardware coalescing is performed. | |
1607 | - */ | |
1608 | - unsigned short bi_hw_segments; | |
1609 | + unsigned int bi_phys_segments; | |
1610 | ||
1611 | unsigned int bi_size; /* residual I/O count */ | |
1612 | ||
1613 | @@ -104,14 +86,6 @@ struct bio { | |
1614 | unsigned int bi_seg_front_size; | |
1615 | unsigned int bi_seg_back_size; | |
1616 | ||
1617 | - /* | |
1618 | - * To keep track of the max hw size, we account for the | |
1619 | - * sizes of the first and last virtually mergeable segments | |
1620 | - * in this bio | |
1621 | - */ | |
1622 | - unsigned int bi_hw_front_size; | |
1623 | - unsigned int bi_hw_back_size; | |
1624 | - | |
1625 | unsigned int bi_max_vecs; /* max bvl_vecs we can hold */ | |
1626 | ||
1627 | struct bio_vec *bi_io_vec; /* the actual vec list */ | |
1628 | @@ -133,7 +107,7 @@ struct bio { | |
1629 | #define BIO_UPTODATE 0 /* ok after I/O completion */ | |
1630 | #define BIO_RW_BLOCK 1 /* RW_AHEAD set, and read/write would block */ | |
1631 | #define BIO_EOF 2 /* out-out-bounds error */ | |
1632 | -#define BIO_SEG_VALID 3 /* nr_hw_seg valid */ | |
1633 | +#define BIO_SEG_VALID 3 /* bi_phys_segments valid */ | |
1634 | #define BIO_CLONED 4 /* doesn't own data */ | |
1635 | #define BIO_BOUNCED 5 /* bio is a bounce bio */ | |
1636 | #define BIO_USER_MAPPED 6 /* contains user pages */ | |
1637 | @@ -247,8 +221,6 @@ static inline void *bio_data(struct bio | |
1638 | ((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2))) | |
1639 | #endif | |
1640 | ||
1641 | -#define BIOVEC_VIRT_MERGEABLE(vec1, vec2) \ | |
1642 | - ((((bvec_to_phys((vec1)) + (vec1)->bv_len) | bvec_to_phys((vec2))) & (BIO_VMERGE_BOUNDARY - 1)) == 0) | |
1643 | #define __BIO_SEG_BOUNDARY(addr1, addr2, mask) \ | |
1644 | (((addr1) | (mask)) == (((addr2) - 1) | (mask))) | |
1645 | #define BIOVEC_SEG_BOUNDARY(q, b1, b2) \ | |
1646 | @@ -346,7 +318,6 @@ extern void bio_free(struct bio *, struc | |
1647 | extern void bio_endio(struct bio *, int); | |
1648 | struct request_queue; | |
1649 | extern int bio_phys_segments(struct request_queue *, struct bio *); | |
1650 | -extern int bio_hw_segments(struct request_queue *, struct bio *); | |
1651 | ||
1652 | extern void __bio_clone(struct bio *, struct bio *); | |
1653 | extern struct bio *bio_clone(struct bio *, gfp_t); | |
1654 | --- a/include/linux/blkdev.h | |
1655 | +++ b/include/linux/blkdev.h | |
1656 | @@ -54,7 +54,6 @@ enum rq_cmd_type_bits { | |
1657 | REQ_TYPE_PM_SUSPEND, /* suspend request */ | |
1658 | REQ_TYPE_PM_RESUME, /* resume request */ | |
1659 | REQ_TYPE_PM_SHUTDOWN, /* shutdown request */ | |
1660 | - REQ_TYPE_FLUSH, /* flush request */ | |
1661 | REQ_TYPE_SPECIAL, /* driver defined type */ | |
1662 | REQ_TYPE_LINUX_BLOCK, /* generic block layer message */ | |
1663 | /* | |
1664 | @@ -76,11 +75,8 @@ enum rq_cmd_type_bits { | |
1665 | * | |
1666 | */ | |
1667 | enum { | |
1668 | - /* | |
1669 | - * just examples for now | |
1670 | - */ | |
1671 | REQ_LB_OP_EJECT = 0x40, /* eject request */ | |
1672 | - REQ_LB_OP_FLUSH = 0x41, /* flush device */ | |
1673 | + REQ_LB_OP_FLUSH = 0x41, /* flush request */ | |
1674 | REQ_LB_OP_DISCARD = 0x42, /* discard sectors */ | |
1675 | }; | |
1676 | ||
1677 | @@ -193,13 +189,6 @@ struct request { | |
1678 | */ | |
1679 | unsigned short nr_phys_segments; | |
1680 | ||
1681 | - /* Number of scatter-gather addr+len pairs after | |
1682 | - * physical and DMA remapping hardware coalescing is performed. | |
1683 | - * This is the number of scatter-gather entries the driver | |
1684 | - * will actually have to deal with after DMA mapping is done. | |
1685 | - */ | |
1686 | - unsigned short nr_hw_segments; | |
1687 | - | |
1688 | unsigned short ioprio; | |
1689 | ||
1690 | void *special; | |
1691 | @@ -236,6 +225,11 @@ struct request { | |
1692 | struct request *next_rq; | |
1693 | }; | |
1694 | ||
1695 | +static inline unsigned short req_get_ioprio(struct request *req) | |
1696 | +{ | |
1697 | + return req->ioprio; | |
1698 | +} | |
1699 | + | |
1700 | /* | |
1701 | * State information carried for REQ_TYPE_PM_SUSPEND and REQ_TYPE_PM_RESUME | |
1702 | * requests. Some step values could eventually be made generic. |