]> git.ipfire.org Git - people/pmueller/ipfire-2.x.git/blame - src/patches/suse-2.6.27.25/patches.fixes/block-git-fixes
Changed checkfs to auto reboot after correctable fsck fixes.
[people/pmueller/ipfire-2.x.git] / src / patches / suse-2.6.27.25 / patches.fixes / block-git-fixes
CommitLineData
00e5a55c
BS
1From: Jens Axboe <jens.axboe@oracle.com>
2Subject: Block layer fixes for 2.6.28
3Patch-Mainline: 2.6.28
4
5This is a combined patchset with block layer fixes from 2.6.28.
6Commit IDs:
797dee27d1c4d6041ff1cc8150db95fe3eab6be5a
800bbda44114e70fc9879731be3c888122b1de8b1
97452d2a2be657becb2f385d0e0864ba51f1ae694
10075a108f7d4dd24b8b69e59edcdf1a0fd84e6541
117a1b6029bf9ff3d0636e318d2482031dc493df16
12b3a5faf3cefbff4b69ca181767b882bbd6189aaf
138fe902de23b4f4012db91f538cafd864c63308e7
14dfef13dad8d34d0a9e83adee3e8cd9f94cca465e
15d2629dd70132f90f9d1bca07572197e9adea25b1
161f08a4484a223cb337e0466042005421cd55d22b
17fcdc7361d2925596d69d0538d738c08c221a69c9
18cd93bcfa9ca9b15051220614160131c53d7f33f0
19d371ca6b8a21a617b8607d23f7202197ad40482a
20910ee03b1e61d5cfb121dfb1ee7c127f18bdae01
21
22Signed-off-by: Hannes Reinecke <hare@suse.de>
23
24---
25 Documentation/DocBook/kernel-api.tmpl | 4
26 Documentation/block/deadline-iosched.txt | 14 +-
27 block/Makefile | 4
28 block/blk-core.c | 166 +++++++------------------------
29 block/blk-exec.c | 6 -
30 block/blk-integrity.c | 4
31 block/blk-map.c | 16 +-
32 block/blk-merge.c | 100 ------------------
33 block/blk-settings.c | 8 -
34 block/blk-softirq.c | 103 +++++++++++++++++++
35 block/blk-tag.c | 8 -
36 block/cfq-iosched.c | 47 +++++++-
37 block/deadline-iosched.c | 40 ++-----
38 block/elevator.c | 5
39 block/genhd.c | 5
40 drivers/block/ps3disk.c | 9 +
41 drivers/block/virtio_blk.c | 4
42 drivers/md/raid1.c | 4
43 drivers/md/raid10.c | 4
44 drivers/md/raid5.c | 66 +++++++++---
45 fs/bio.c | 16 --
46 include/linux/bio.h | 33 ------
47 include/linux/blkdev.h | 18 +--
48 23 files changed, 310 insertions(+), 374 deletions(-)
49
50--- a/block/blk-core.c
51+++ b/block/blk-core.c
52@@ -26,8 +26,6 @@
53 #include <linux/swap.h>
54 #include <linux/writeback.h>
55 #include <linux/task_io_accounting_ops.h>
56-#include <linux/interrupt.h>
57-#include <linux/cpu.h>
58 #include <linux/blktrace_api.h>
59 #include <linux/fault-inject.h>
60
61@@ -50,8 +48,6 @@ struct kmem_cache *blk_requestq_cachep;
62 */
63 static struct workqueue_struct *kblockd_workqueue;
64
65-static DEFINE_PER_CPU(struct list_head, blk_cpu_done);
66-
67 static void drive_stat_acct(struct request *rq, int new_io)
68 {
69 struct hd_struct *part;
70@@ -531,7 +527,7 @@ EXPORT_SYMBOL(blk_alloc_queue_node);
71 * request queue; this lock will be taken also from interrupt context, so irq
72 * disabling is needed for it.
73 *
74- * Function returns a pointer to the initialized request queue, or NULL if
75+ * Function returns a pointer to the initialized request queue, or %NULL if
76 * it didn't succeed.
77 *
78 * Note:
79@@ -913,7 +909,7 @@ void blk_requeue_request(struct request_
80 EXPORT_SYMBOL(blk_requeue_request);
81
82 /**
83- * blk_insert_request - insert a special request in to a request queue
84+ * blk_insert_request - insert a special request into a request queue
85 * @q: request queue where request should be inserted
86 * @rq: request to be inserted
87 * @at_head: insert request at head or tail of queue
88@@ -923,8 +919,8 @@ EXPORT_SYMBOL(blk_requeue_request);
89 * Many block devices need to execute commands asynchronously, so they don't
90 * block the whole kernel from preemption during request execution. This is
91 * accomplished normally by inserting aritficial requests tagged as
92- * REQ_SPECIAL in to the corresponding request queue, and letting them be
93- * scheduled for actual execution by the request queue.
94+ * REQ_TYPE_SPECIAL in to the corresponding request queue, and letting them
95+ * be scheduled for actual execution by the request queue.
96 *
97 * We have the option of inserting the head or the tail of the queue.
98 * Typically we use the tail for new ioctls and so forth. We use the head
99@@ -1322,7 +1318,7 @@ static inline int bio_check_eod(struct b
100 }
101
102 /**
103- * generic_make_request: hand a buffer to its device driver for I/O
104+ * generic_make_request - hand a buffer to its device driver for I/O
105 * @bio: The bio describing the location in memory and on the device.
106 *
107 * generic_make_request() is used to make I/O requests of block
108@@ -1480,13 +1476,13 @@ void generic_make_request(struct bio *bi
109 EXPORT_SYMBOL(generic_make_request);
110
111 /**
112- * submit_bio: submit a bio to the block device layer for I/O
113+ * submit_bio - submit a bio to the block device layer for I/O
114 * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
115 * @bio: The &struct bio which describes the I/O
116 *
117 * submit_bio() is very similar in purpose to generic_make_request(), and
118 * uses that function to do most of the work. Both are fairly rough
119- * interfaces, @bio must be presetup and ready for I/O.
120+ * interfaces; @bio must be presetup and ready for I/O.
121 *
122 */
123 void submit_bio(int rw, struct bio *bio)
124@@ -1524,7 +1520,7 @@ EXPORT_SYMBOL(submit_bio);
125 /**
126 * __end_that_request_first - end I/O on a request
127 * @req: the request being processed
128- * @error: 0 for success, < 0 for error
129+ * @error: %0 for success, < %0 for error
130 * @nr_bytes: number of bytes to complete
131 *
132 * Description:
133@@ -1532,8 +1528,8 @@ EXPORT_SYMBOL(submit_bio);
134 * for the next range of segments (if any) in the cluster.
135 *
136 * Return:
137- * 0 - we are done with this request, call end_that_request_last()
138- * 1 - still buffers pending for this request
139+ * %0 - we are done with this request, call end_that_request_last()
140+ * %1 - still buffers pending for this request
141 **/
142 static int __end_that_request_first(struct request *req, int error,
143 int nr_bytes)
144@@ -1544,7 +1540,7 @@ static int __end_that_request_first(stru
145 blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE);
146
147 /*
148- * for a REQ_BLOCK_PC request, we want to carry any eventual
149+ * for a REQ_TYPE_BLOCK_PC request, we want to carry any eventual
150 * sense key with us all the way through
151 */
152 if (!blk_pc_request(req))
153@@ -1646,82 +1642,6 @@ static int __end_that_request_first(stru
154 }
155
156 /*
157- * splice the completion data to a local structure and hand off to
158- * process_completion_queue() to complete the requests
159- */
160-static void blk_done_softirq(struct softirq_action *h)
161-{
162- struct list_head *cpu_list, local_list;
163-
164- local_irq_disable();
165- cpu_list = &__get_cpu_var(blk_cpu_done);
166- list_replace_init(cpu_list, &local_list);
167- local_irq_enable();
168-
169- while (!list_empty(&local_list)) {
170- struct request *rq;
171-
172- rq = list_entry(local_list.next, struct request, donelist);
173- list_del_init(&rq->donelist);
174- rq->q->softirq_done_fn(rq);
175- }
176-}
177-
178-static int __cpuinit blk_cpu_notify(struct notifier_block *self,
179- unsigned long action, void *hcpu)
180-{
181- /*
182- * If a CPU goes away, splice its entries to the current CPU
183- * and trigger a run of the softirq
184- */
185- if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
186- int cpu = (unsigned long) hcpu;
187-
188- local_irq_disable();
189- list_splice_init(&per_cpu(blk_cpu_done, cpu),
190- &__get_cpu_var(blk_cpu_done));
191- raise_softirq_irqoff(BLOCK_SOFTIRQ);
192- local_irq_enable();
193- }
194-
195- return NOTIFY_OK;
196-}
197-
198-
199-static struct notifier_block blk_cpu_notifier __cpuinitdata = {
200- .notifier_call = blk_cpu_notify,
201-};
202-
203-/**
204- * blk_complete_request - end I/O on a request
205- * @req: the request being processed
206- *
207- * Description:
208- * Ends all I/O on a request. It does not handle partial completions,
209- * unless the driver actually implements this in its completion callback
210- * through requeueing. The actual completion happens out-of-order,
211- * through a softirq handler. The user must have registered a completion
212- * callback through blk_queue_softirq_done().
213- **/
214-
215-void blk_complete_request(struct request *req)
216-{
217- struct list_head *cpu_list;
218- unsigned long flags;
219-
220- BUG_ON(!req->q->softirq_done_fn);
221-
222- local_irq_save(flags);
223-
224- cpu_list = &__get_cpu_var(blk_cpu_done);
225- list_add_tail(&req->donelist, cpu_list);
226- raise_softirq_irqoff(BLOCK_SOFTIRQ);
227-
228- local_irq_restore(flags);
229-}
230-EXPORT_SYMBOL(blk_complete_request);
231-
232-/*
233 * queue lock must be held
234 */
235 static void end_that_request_last(struct request *req, int error)
236@@ -1810,11 +1730,11 @@ EXPORT_SYMBOL_GPL(blk_rq_cur_bytes);
237 /**
238 * end_queued_request - end all I/O on a queued request
239 * @rq: the request being processed
240- * @uptodate: error value or 0/1 uptodate flag
241+ * @uptodate: error value or %0/%1 uptodate flag
242 *
243 * Description:
244 * Ends all I/O on a request, and removes it from the block layer queues.
245- * Not suitable for normal IO completion, unless the driver still has
246+ * Not suitable for normal I/O completion, unless the driver still has
247 * the request attached to the block layer.
248 *
249 **/
250@@ -1827,7 +1747,7 @@ EXPORT_SYMBOL(end_queued_request);
251 /**
252 * end_dequeued_request - end all I/O on a dequeued request
253 * @rq: the request being processed
254- * @uptodate: error value or 0/1 uptodate flag
255+ * @uptodate: error value or %0/%1 uptodate flag
256 *
257 * Description:
258 * Ends all I/O on a request. The request must already have been
259@@ -1845,14 +1765,14 @@ EXPORT_SYMBOL(end_dequeued_request);
260 /**
261 * end_request - end I/O on the current segment of the request
262 * @req: the request being processed
263- * @uptodate: error value or 0/1 uptodate flag
264+ * @uptodate: error value or %0/%1 uptodate flag
265 *
266 * Description:
267 * Ends I/O on the current segment of a request. If that is the only
268 * remaining segment, the request is also completed and freed.
269 *
270- * This is a remnant of how older block drivers handled IO completions.
271- * Modern drivers typically end IO on the full request in one go, unless
272+ * This is a remnant of how older block drivers handled I/O completions.
273+ * Modern drivers typically end I/O on the full request in one go, unless
274 * they have a residual value to account for. For that case this function
275 * isn't really useful, unless the residual just happens to be the
276 * full current segment. In other words, don't use this function in new
277@@ -1870,12 +1790,12 @@ EXPORT_SYMBOL(end_request);
278 /**
279 * blk_end_io - Generic end_io function to complete a request.
280 * @rq: the request being processed
281- * @error: 0 for success, < 0 for error
282+ * @error: %0 for success, < %0 for error
283 * @nr_bytes: number of bytes to complete @rq
284 * @bidi_bytes: number of bytes to complete @rq->next_rq
285 * @drv_callback: function called between completion of bios in the request
286 * and completion of the request.
287- * If the callback returns non 0, this helper returns without
288+ * If the callback returns non %0, this helper returns without
289 * completion of the request.
290 *
291 * Description:
292@@ -1883,8 +1803,8 @@ EXPORT_SYMBOL(end_request);
293 * If @rq has leftover, sets it up for the next range of segments.
294 *
295 * Return:
296- * 0 - we are done with this request
297- * 1 - this request is not freed yet, it still has pending buffers.
298+ * %0 - we are done with this request
299+ * %1 - this request is not freed yet, it still has pending buffers.
300 **/
301 static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes,
302 unsigned int bidi_bytes,
303@@ -1893,7 +1813,7 @@ static int blk_end_io(struct request *rq
304 struct request_queue *q = rq->q;
305 unsigned long flags = 0UL;
306
307- if (bio_has_data(rq->bio) || blk_discard_rq(rq)) {
308+ if (rq->bio) {
309 if (__end_that_request_first(rq, error, nr_bytes))
310 return 1;
311
312@@ -1919,7 +1839,7 @@ static int blk_end_io(struct request *rq
313 /**
314 * blk_end_request - Helper function for drivers to complete the request.
315 * @rq: the request being processed
316- * @error: 0 for success, < 0 for error
317+ * @error: %0 for success, < %0 for error
318 * @nr_bytes: number of bytes to complete
319 *
320 * Description:
321@@ -1927,8 +1847,8 @@ static int blk_end_io(struct request *rq
322 * If @rq has leftover, sets it up for the next range of segments.
323 *
324 * Return:
325- * 0 - we are done with this request
326- * 1 - still buffers pending for this request
327+ * %0 - we are done with this request
328+ * %1 - still buffers pending for this request
329 **/
330 int blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
331 {
332@@ -1939,20 +1859,19 @@ EXPORT_SYMBOL_GPL(blk_end_request);
333 /**
334 * __blk_end_request - Helper function for drivers to complete the request.
335 * @rq: the request being processed
336- * @error: 0 for success, < 0 for error
337+ * @error: %0 for success, < %0 for error
338 * @nr_bytes: number of bytes to complete
339 *
340 * Description:
341 * Must be called with queue lock held unlike blk_end_request().
342 *
343 * Return:
344- * 0 - we are done with this request
345- * 1 - still buffers pending for this request
346+ * %0 - we are done with this request
347+ * %1 - still buffers pending for this request
348 **/
349 int __blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
350 {
351- if ((bio_has_data(rq->bio) || blk_discard_rq(rq)) &&
352- __end_that_request_first(rq, error, nr_bytes))
353+ if (rq->bio && __end_that_request_first(rq, error, nr_bytes))
354 return 1;
355
356 add_disk_randomness(rq->rq_disk);
357@@ -1966,7 +1885,7 @@ EXPORT_SYMBOL_GPL(__blk_end_request);
358 /**
359 * blk_end_bidi_request - Helper function for drivers to complete bidi request.
360 * @rq: the bidi request being processed
361- * @error: 0 for success, < 0 for error
362+ * @error: %0 for success, < %0 for error
363 * @nr_bytes: number of bytes to complete @rq
364 * @bidi_bytes: number of bytes to complete @rq->next_rq
365 *
366@@ -1974,8 +1893,8 @@ EXPORT_SYMBOL_GPL(__blk_end_request);
367 * Ends I/O on a number of bytes attached to @rq and @rq->next_rq.
368 *
369 * Return:
370- * 0 - we are done with this request
371- * 1 - still buffers pending for this request
372+ * %0 - we are done with this request
373+ * %1 - still buffers pending for this request
374 **/
375 int blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes,
376 unsigned int bidi_bytes)
377@@ -1987,11 +1906,11 @@ EXPORT_SYMBOL_GPL(blk_end_bidi_request);
378 /**
379 * blk_end_request_callback - Special helper function for tricky drivers
380 * @rq: the request being processed
381- * @error: 0 for success, < 0 for error
382+ * @error: %0 for success, < %0 for error
383 * @nr_bytes: number of bytes to complete
384 * @drv_callback: function called between completion of bios in the request
385 * and completion of the request.
386- * If the callback returns non 0, this helper returns without
387+ * If the callback returns non %0, this helper returns without
388 * completion of the request.
389 *
390 * Description:
391@@ -2004,10 +1923,10 @@ EXPORT_SYMBOL_GPL(blk_end_bidi_request);
392 * Don't use this interface in other places anymore.
393 *
394 * Return:
395- * 0 - we are done with this request
396- * 1 - this request is not freed yet.
397- * this request still has pending buffers or
398- * the driver doesn't want to finish this request yet.
399+ * %0 - we are done with this request
400+ * %1 - this request is not freed yet.
401+ * this request still has pending buffers or
402+ * the driver doesn't want to finish this request yet.
403 **/
404 int blk_end_request_callback(struct request *rq, int error,
405 unsigned int nr_bytes,
406@@ -2026,7 +1945,6 @@ void blk_rq_bio_prep(struct request_queu
407
408 if (bio_has_data(bio)) {
409 rq->nr_phys_segments = bio_phys_segments(q, bio);
410- rq->nr_hw_segments = bio_hw_segments(q, bio);
411 rq->buffer = bio_data(bio);
412 }
413 rq->current_nr_sectors = bio_cur_sectors(bio);
414@@ -2054,8 +1972,6 @@ EXPORT_SYMBOL(kblockd_flush_work);
415
416 int __init blk_dev_init(void)
417 {
418- int i;
419-
420 kblockd_workqueue = create_workqueue("kblockd");
421 if (!kblockd_workqueue)
422 panic("Failed to create kblockd\n");
423@@ -2066,12 +1982,6 @@ int __init blk_dev_init(void)
424 blk_requestq_cachep = kmem_cache_create("blkdev_queue",
425 sizeof(struct request_queue), 0, SLAB_PANIC, NULL);
426
427- for_each_possible_cpu(i)
428- INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i));
429-
430- open_softirq(BLOCK_SOFTIRQ, blk_done_softirq);
431- register_hotcpu_notifier(&blk_cpu_notifier);
432-
433 return 0;
434 }
435
436--- a/block/blk-exec.c
437+++ b/block/blk-exec.c
438@@ -16,7 +16,7 @@
439 /**
440 * blk_end_sync_rq - executes a completion event on a request
441 * @rq: request to complete
442- * @error: end io status of the request
443+ * @error: end I/O status of the request
444 */
445 static void blk_end_sync_rq(struct request *rq, int error)
446 {
447@@ -41,7 +41,7 @@ static void blk_end_sync_rq(struct reque
448 * @done: I/O completion handler
449 *
450 * Description:
451- * Insert a fully prepared request at the back of the io scheduler queue
452+ * Insert a fully prepared request at the back of the I/O scheduler queue
453 * for execution. Don't wait for completion.
454 */
455 void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
456@@ -72,7 +72,7 @@ EXPORT_SYMBOL_GPL(blk_execute_rq_nowait)
457 * @at_head: insert request at head or tail of queue
458 *
459 * Description:
460- * Insert a fully prepared request at the back of the io scheduler queue
461+ * Insert a fully prepared request at the back of the I/O scheduler queue
462 * for execution and wait for completion.
463 */
464 int blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk,
465--- a/block/blk-integrity.c
466+++ b/block/blk-integrity.c
467@@ -109,8 +109,8 @@ EXPORT_SYMBOL(blk_rq_map_integrity_sg);
468
469 /**
470 * blk_integrity_compare - Compare integrity profile of two block devices
471- * @b1: Device to compare
472- * @b2: Device to compare
473+ * @bd1: Device to compare
474+ * @bd2: Device to compare
475 *
476 * Description: Meta-devices like DM and MD need to verify that all
477 * sub-devices use the same integrity format before advertising to
478--- a/block/blk-map.c
479+++ b/block/blk-map.c
480@@ -85,17 +85,17 @@ static int __blk_rq_map_user(struct requ
481 }
482
483 /**
484- * blk_rq_map_user - map user data to a request, for REQ_BLOCK_PC usage
485+ * blk_rq_map_user - map user data to a request, for REQ_TYPE_BLOCK_PC usage
486 * @q: request queue where request should be inserted
487 * @rq: request structure to fill
488 * @ubuf: the user buffer
489 * @len: length of user data
490 *
491 * Description:
492- * Data will be mapped directly for zero copy io, if possible. Otherwise
493+ * Data will be mapped directly for zero copy I/O, if possible. Otherwise
494 * a kernel bounce buffer is used.
495 *
496- * A matching blk_rq_unmap_user() must be issued at the end of io, while
497+ * A matching blk_rq_unmap_user() must be issued at the end of I/O, while
498 * still in process context.
499 *
500 * Note: The mapped bio may need to be bounced through blk_queue_bounce()
501@@ -154,7 +154,7 @@ unmap_rq:
502 EXPORT_SYMBOL(blk_rq_map_user);
503
504 /**
505- * blk_rq_map_user_iov - map user data to a request, for REQ_BLOCK_PC usage
506+ * blk_rq_map_user_iov - map user data to a request, for REQ_TYPE_BLOCK_PC usage
507 * @q: request queue where request should be inserted
508 * @rq: request to map data to
509 * @iov: pointer to the iovec
510@@ -162,10 +162,10 @@ EXPORT_SYMBOL(blk_rq_map_user);
511 * @len: I/O byte count
512 *
513 * Description:
514- * Data will be mapped directly for zero copy io, if possible. Otherwise
515+ * Data will be mapped directly for zero copy I/O, if possible. Otherwise
516 * a kernel bounce buffer is used.
517 *
518- * A matching blk_rq_unmap_user() must be issued at the end of io, while
519+ * A matching blk_rq_unmap_user() must be issued at the end of I/O, while
520 * still in process context.
521 *
522 * Note: The mapped bio may need to be bounced through blk_queue_bounce()
523@@ -224,7 +224,7 @@ int blk_rq_map_user_iov(struct request_q
524 * Description:
525 * Unmap a rq previously mapped by blk_rq_map_user(). The caller must
526 * supply the original rq->bio from the blk_rq_map_user() return, since
527- * the io completion may have changed rq->bio.
528+ * the I/O completion may have changed rq->bio.
529 */
530 int blk_rq_unmap_user(struct bio *bio)
531 {
532@@ -250,7 +250,7 @@ int blk_rq_unmap_user(struct bio *bio)
533 EXPORT_SYMBOL(blk_rq_unmap_user);
534
535 /**
536- * blk_rq_map_kern - map kernel data to a request, for REQ_BLOCK_PC usage
537+ * blk_rq_map_kern - map kernel data to a request, for REQ_TYPE_BLOCK_PC usage
538 * @q: request queue where request should be inserted
539 * @rq: request to fill
540 * @kbuf: the kernel buffer
541--- a/block/blk-merge.c
542+++ b/block/blk-merge.c
543@@ -41,12 +41,9 @@ void blk_recalc_rq_sectors(struct reques
544 void blk_recalc_rq_segments(struct request *rq)
545 {
546 int nr_phys_segs;
547- int nr_hw_segs;
548 unsigned int phys_size;
549- unsigned int hw_size;
550 struct bio_vec *bv, *bvprv = NULL;
551 int seg_size;
552- int hw_seg_size;
553 int cluster;
554 struct req_iterator iter;
555 int high, highprv = 1;
556@@ -56,8 +53,8 @@ void blk_recalc_rq_segments(struct reque
557 return;
558
559 cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
560- hw_seg_size = seg_size = 0;
561- phys_size = hw_size = nr_phys_segs = nr_hw_segs = 0;
562+ seg_size = 0;
563+ phys_size = nr_phys_segs = 0;
564 rq_for_each_segment(bv, rq, iter) {
565 /*
566 * the trick here is making sure that a high page is never
567@@ -66,7 +63,7 @@ void blk_recalc_rq_segments(struct reque
568 */
569 high = page_to_pfn(bv->bv_page) > q->bounce_pfn;
570 if (high || highprv)
571- goto new_hw_segment;
572+ goto new_segment;
573 if (cluster) {
574 if (seg_size + bv->bv_len > q->max_segment_size)
575 goto new_segment;
576@@ -74,27 +71,12 @@ void blk_recalc_rq_segments(struct reque
577 goto new_segment;
578 if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv))
579 goto new_segment;
580- if (BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len))
581- goto new_hw_segment;
582
583 seg_size += bv->bv_len;
584- hw_seg_size += bv->bv_len;
585 bvprv = bv;
586 continue;
587 }
588 new_segment:
589- if (BIOVEC_VIRT_MERGEABLE(bvprv, bv) &&
590- !BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len))
591- hw_seg_size += bv->bv_len;
592- else {
593-new_hw_segment:
594- if (nr_hw_segs == 1 &&
595- hw_seg_size > rq->bio->bi_hw_front_size)
596- rq->bio->bi_hw_front_size = hw_seg_size;
597- hw_seg_size = BIOVEC_VIRT_START_SIZE(bv) + bv->bv_len;
598- nr_hw_segs++;
599- }
600-
601 if (nr_phys_segs == 1 && seg_size > rq->bio->bi_seg_front_size)
602 rq->bio->bi_seg_front_size = seg_size;
603
604@@ -104,17 +86,11 @@ new_hw_segment:
605 highprv = high;
606 }
607
608- if (nr_hw_segs == 1 &&
609- hw_seg_size > rq->bio->bi_hw_front_size)
610- rq->bio->bi_hw_front_size = hw_seg_size;
611- if (hw_seg_size > rq->biotail->bi_hw_back_size)
612- rq->biotail->bi_hw_back_size = hw_seg_size;
613 if (nr_phys_segs == 1 && seg_size > rq->bio->bi_seg_front_size)
614 rq->bio->bi_seg_front_size = seg_size;
615 if (seg_size > rq->biotail->bi_seg_back_size)
616 rq->biotail->bi_seg_back_size = seg_size;
617 rq->nr_phys_segments = nr_phys_segs;
618- rq->nr_hw_segments = nr_hw_segs;
619 }
620
621 void blk_recount_segments(struct request_queue *q, struct bio *bio)
622@@ -127,7 +103,6 @@ void blk_recount_segments(struct request
623 blk_recalc_rq_segments(&rq);
624 bio->bi_next = nxt;
625 bio->bi_phys_segments = rq.nr_phys_segments;
626- bio->bi_hw_segments = rq.nr_hw_segments;
627 bio->bi_flags |= (1 << BIO_SEG_VALID);
628 }
629 EXPORT_SYMBOL(blk_recount_segments);
630@@ -158,23 +133,6 @@ static int blk_phys_contig_segment(struc
631 return 0;
632 }
633
634-static int blk_hw_contig_segment(struct request_queue *q, struct bio *bio,
635- struct bio *nxt)
636-{
637- if (!bio_flagged(bio, BIO_SEG_VALID))
638- blk_recount_segments(q, bio);
639- if (!bio_flagged(nxt, BIO_SEG_VALID))
640- blk_recount_segments(q, nxt);
641- if (bio_has_data(bio) &&
642- (!BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)) ||
643- BIOVEC_VIRT_OVERSIZE(bio->bi_hw_back_size + nxt->bi_hw_front_size)))
644- return 0;
645- if (bio->bi_hw_back_size + nxt->bi_hw_front_size > q->max_segment_size)
646- return 0;
647-
648- return 1;
649-}
650-
651 /*
652 * map a request to scatterlist, return number of sg entries setup. Caller
653 * must make sure sg can hold rq->nr_phys_segments entries
654@@ -288,10 +246,9 @@ static inline int ll_new_hw_segment(stru
655 struct request *req,
656 struct bio *bio)
657 {
658- int nr_hw_segs = bio_hw_segments(q, bio);
659 int nr_phys_segs = bio_phys_segments(q, bio);
660
661- if (req->nr_hw_segments + nr_hw_segs > q->max_hw_segments
662+ if (req->nr_phys_segments + nr_phys_segs > q->max_hw_segments
663 || req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {
664 req->cmd_flags |= REQ_NOMERGE;
665 if (req == q->last_merge)
666@@ -303,7 +260,6 @@ static inline int ll_new_hw_segment(stru
667 * This will form the start of a new hw segment. Bump both
668 * counters.
669 */
670- req->nr_hw_segments += nr_hw_segs;
671 req->nr_phys_segments += nr_phys_segs;
672 return 1;
673 }
674@@ -312,7 +268,6 @@ int ll_back_merge_fn(struct request_queu
675 struct bio *bio)
676 {
677 unsigned short max_sectors;
678- int len;
679
680 if (unlikely(blk_pc_request(req)))
681 max_sectors = q->max_hw_sectors;
682@@ -329,20 +284,6 @@ int ll_back_merge_fn(struct request_queu
683 blk_recount_segments(q, req->biotail);
684 if (!bio_flagged(bio, BIO_SEG_VALID))
685 blk_recount_segments(q, bio);
686- len = req->biotail->bi_hw_back_size + bio->bi_hw_front_size;
687- if (!bio_has_data(bio) ||
688- (BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail), __BVEC_START(bio))
689- && !BIOVEC_VIRT_OVERSIZE(len))) {
690- int mergeable = ll_new_mergeable(q, req, bio);
691-
692- if (mergeable) {
693- if (req->nr_hw_segments == 1)
694- req->bio->bi_hw_front_size = len;
695- if (bio->bi_hw_segments == 1)
696- bio->bi_hw_back_size = len;
697- }
698- return mergeable;
699- }
700
701 return ll_new_hw_segment(q, req, bio);
702 }
703@@ -351,7 +292,6 @@ int ll_front_merge_fn(struct request_que
704 struct bio *bio)
705 {
706 unsigned short max_sectors;
707- int len;
708
709 if (unlikely(blk_pc_request(req)))
710 max_sectors = q->max_hw_sectors;
711@@ -365,24 +305,10 @@ int ll_front_merge_fn(struct request_que
712 q->last_merge = NULL;
713 return 0;
714 }
715- len = bio->bi_hw_back_size + req->bio->bi_hw_front_size;
716 if (!bio_flagged(bio, BIO_SEG_VALID))
717 blk_recount_segments(q, bio);
718 if (!bio_flagged(req->bio, BIO_SEG_VALID))
719 blk_recount_segments(q, req->bio);
720- if (!bio_has_data(bio) ||
721- (BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(req->bio)) &&
722- !BIOVEC_VIRT_OVERSIZE(len))) {
723- int mergeable = ll_new_mergeable(q, req, bio);
724-
725- if (mergeable) {
726- if (bio->bi_hw_segments == 1)
727- bio->bi_hw_front_size = len;
728- if (req->nr_hw_segments == 1)
729- req->biotail->bi_hw_back_size = len;
730- }
731- return mergeable;
732- }
733
734 return ll_new_hw_segment(q, req, bio);
735 }
736@@ -391,7 +317,6 @@ static int ll_merge_requests_fn(struct r
737 struct request *next)
738 {
739 int total_phys_segments;
740- int total_hw_segments;
741 unsigned int seg_size =
742 req->biotail->bi_seg_back_size + next->bio->bi_seg_front_size;
743
744@@ -420,26 +345,11 @@ static int ll_merge_requests_fn(struct r
745 if (total_phys_segments > q->max_phys_segments)
746 return 0;
747
748- total_hw_segments = req->nr_hw_segments + next->nr_hw_segments;
749- if (blk_hw_contig_segment(q, req->biotail, next->bio)) {
750- int len = req->biotail->bi_hw_back_size +
751- next->bio->bi_hw_front_size;
752- /*
753- * propagate the combined length to the end of the requests
754- */
755- if (req->nr_hw_segments == 1)
756- req->bio->bi_hw_front_size = len;
757- if (next->nr_hw_segments == 1)
758- next->biotail->bi_hw_back_size = len;
759- total_hw_segments--;
760- }
761-
762- if (total_hw_segments > q->max_hw_segments)
763+ if (total_phys_segments > q->max_hw_segments)
764 return 0;
765
766 /* Merge is OK... */
767 req->nr_phys_segments = total_phys_segments;
768- req->nr_hw_segments = total_hw_segments;
769 return 1;
770 }
771
772--- a/block/blk-settings.c
773+++ b/block/blk-settings.c
774@@ -144,7 +144,7 @@ EXPORT_SYMBOL(blk_queue_make_request);
775 * Different hardware can have different requirements as to what pages
776 * it can do I/O directly to. A low level driver can call
777 * blk_queue_bounce_limit to have lower memory pages allocated as bounce
778- * buffers for doing I/O to pages residing above @page.
779+ * buffers for doing I/O to pages residing above @dma_addr.
780 **/
781 void blk_queue_bounce_limit(struct request_queue *q, u64 dma_addr)
782 {
783@@ -229,7 +229,7 @@ EXPORT_SYMBOL(blk_queue_max_phys_segment
784 * Description:
785 * Enables a low level driver to set an upper limit on the number of
786 * hw data segments in a request. This would be the largest number of
787- * address/length pairs the host adapter can actually give as once
788+ * address/length pairs the host adapter can actually give at once
789 * to the device.
790 **/
791 void blk_queue_max_hw_segments(struct request_queue *q,
792@@ -410,7 +410,7 @@ EXPORT_SYMBOL(blk_queue_segment_boundary
793 * @mask: alignment mask
794 *
795 * description:
796- * set required memory and length aligment for direct dma transactions.
797+ * set required memory and length alignment for direct dma transactions.
798 * this is used when buiding direct io requests for the queue.
799 *
800 **/
801@@ -426,7 +426,7 @@ EXPORT_SYMBOL(blk_queue_dma_alignment);
802 * @mask: alignment mask
803 *
804 * description:
805- * update required memory and length aligment for direct dma transactions.
806+ * update required memory and length alignment for direct dma transactions.
807 * If the requested alignment is larger than the current alignment, then
808 * the current queue alignment is updated to the new value, otherwise it
809 * is left alone. The design of this is to allow multiple objects
810--- /dev/null
811+++ b/block/blk-softirq.c
812@@ -0,0 +1,103 @@
813+/*
814+ * Functions related to softirq rq completions
815+ */
816+#include <linux/kernel.h>
817+#include <linux/module.h>
818+#include <linux/init.h>
819+#include <linux/bio.h>
820+#include <linux/blkdev.h>
821+#include <linux/interrupt.h>
822+#include <linux/cpu.h>
823+
824+#include "blk.h"
825+
826+static DEFINE_PER_CPU(struct list_head, blk_cpu_done);
827+
828+static int __cpuinit blk_cpu_notify(struct notifier_block *self,
829+ unsigned long action, void *hcpu)
830+{
831+ /*
832+ * If a CPU goes away, splice its entries to the current CPU
833+ * and trigger a run of the softirq
834+ */
835+ if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
836+ int cpu = (unsigned long) hcpu;
837+
838+ local_irq_disable();
839+ list_splice_init(&per_cpu(blk_cpu_done, cpu),
840+ &__get_cpu_var(blk_cpu_done));
841+ raise_softirq_irqoff(BLOCK_SOFTIRQ);
842+ local_irq_enable();
843+ }
844+
845+ return NOTIFY_OK;
846+}
847+
848+
849+static struct notifier_block blk_cpu_notifier __cpuinitdata = {
850+ .notifier_call = blk_cpu_notify,
851+};
852+
853+/*
854+ * splice the completion data to a local structure and hand off to
855+ * process_completion_queue() to complete the requests
856+ */
857+static void blk_done_softirq(struct softirq_action *h)
858+{
859+ struct list_head *cpu_list, local_list;
860+
861+ local_irq_disable();
862+ cpu_list = &__get_cpu_var(blk_cpu_done);
863+ list_replace_init(cpu_list, &local_list);
864+ local_irq_enable();
865+
866+ while (!list_empty(&local_list)) {
867+ struct request *rq;
868+
869+ rq = list_entry(local_list.next, struct request, donelist);
870+ list_del_init(&rq->donelist);
871+ rq->q->softirq_done_fn(rq);
872+ }
873+}
874+
875+/**
876+ * blk_complete_request - end I/O on a request
877+ * @req: the request being processed
878+ *
879+ * Description:
880+ * Ends all I/O on a request. It does not handle partial completions,
881+ * unless the driver actually implements this in its completion callback
882+ * through requeueing. The actual completion happens out-of-order,
883+ * through a softirq handler. The user must have registered a completion
884+ * callback through blk_queue_softirq_done().
885+ **/
886+
887+void blk_complete_request(struct request *req)
888+{
889+ struct list_head *cpu_list;
890+ unsigned long flags;
891+
892+ BUG_ON(!req->q->softirq_done_fn);
893+
894+ local_irq_save(flags);
895+
896+ cpu_list = &__get_cpu_var(blk_cpu_done);
897+ list_add_tail(&req->donelist, cpu_list);
898+ raise_softirq_irqoff(BLOCK_SOFTIRQ);
899+
900+ local_irq_restore(flags);
901+}
902+EXPORT_SYMBOL(blk_complete_request);
903+
904+int __init blk_softirq_init(void)
905+{
906+ int i;
907+
908+ for_each_possible_cpu(i)
909+ INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i));
910+
911+ open_softirq(BLOCK_SOFTIRQ, blk_done_softirq);
912+ register_hotcpu_notifier(&blk_cpu_notifier);
913+ return 0;
914+}
915+subsys_initcall(blk_softirq_init);
916--- a/block/blk-tag.c
917+++ b/block/blk-tag.c
918@@ -29,7 +29,7 @@ EXPORT_SYMBOL(blk_queue_find_tag);
919 * __blk_free_tags - release a given set of tag maintenance info
920 * @bqt: the tag map to free
921 *
922- * Tries to free the specified @bqt@. Returns true if it was
923+ * Tries to free the specified @bqt. Returns true if it was
924 * actually freed and false if there are still references using it
925 */
926 static int __blk_free_tags(struct blk_queue_tag *bqt)
927@@ -78,7 +78,7 @@ void __blk_queue_free_tags(struct reques
928 * blk_free_tags - release a given set of tag maintenance info
929 * @bqt: the tag map to free
930 *
931- * For externally managed @bqt@ frees the map. Callers of this
932+ * For externally managed @bqt frees the map. Callers of this
933 * function must guarantee to have released all the queues that
934 * might have been using this tag map.
935 */
936@@ -94,7 +94,7 @@ EXPORT_SYMBOL(blk_free_tags);
937 * @q: the request queue for the device
938 *
939 * Notes:
940- * This is used to disabled tagged queuing to a device, yet leave
941+ * This is used to disable tagged queuing to a device, yet leave
942 * queue in function.
943 **/
944 void blk_queue_free_tags(struct request_queue *q)
945@@ -271,7 +271,7 @@ EXPORT_SYMBOL(blk_queue_resize_tags);
946 * @rq: the request that has completed
947 *
948 * Description:
949- * Typically called when end_that_request_first() returns 0, meaning
950+ * Typically called when end_that_request_first() returns %0, meaning
951 * all transfers have been done for a request. It's important to call
952 * this function before end_that_request_last(), as that will put the
953 * request back on the free list thus corrupting the internal tag list.
954--- a/block/cfq-iosched.c
955+++ b/block/cfq-iosched.c
956@@ -39,6 +39,7 @@ static int cfq_slice_idle = HZ / 125;
957 #define CFQ_MIN_TT (2)
958
959 #define CFQ_SLICE_SCALE (5)
960+#define CFQ_HW_QUEUE_MIN (5)
961
962 #define RQ_CIC(rq) \
963 ((struct cfq_io_context *) (rq)->elevator_private)
964@@ -86,7 +87,14 @@ struct cfq_data {
965
966 int rq_in_driver;
967 int sync_flight;
968+
969+ /*
970+ * queue-depth detection
971+ */
972+ int rq_queued;
973 int hw_tag;
974+ int hw_tag_samples;
975+ int rq_in_driver_peak;
976
977 /*
978 * idle window management
979@@ -654,15 +662,6 @@ static void cfq_activate_request(struct
980 cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "activate rq, drv=%d",
981 cfqd->rq_in_driver);
982
983- /*
984- * If the depth is larger 1, it really could be queueing. But lets
985- * make the mark a little higher - idling could still be good for
986- * low queueing, and a low queueing number could also just indicate
987- * a SCSI mid layer like behaviour where limit+1 is often seen.
988- */
989- if (!cfqd->hw_tag && cfqd->rq_in_driver > 4)
990- cfqd->hw_tag = 1;
991-
992 cfqd->last_position = rq->hard_sector + rq->hard_nr_sectors;
993 }
994
995@@ -686,6 +685,7 @@ static void cfq_remove_request(struct re
996 list_del_init(&rq->queuelist);
997 cfq_del_rq_rb(rq);
998
999+ cfqq->cfqd->rq_queued--;
1000 if (rq_is_meta(rq)) {
1001 WARN_ON(!cfqq->meta_pending);
1002 cfqq->meta_pending--;
1003@@ -1833,6 +1833,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, s
1004 {
1005 struct cfq_io_context *cic = RQ_CIC(rq);
1006
1007+ cfqd->rq_queued++;
1008 if (rq_is_meta(rq))
1009 cfqq->meta_pending++;
1010
1011@@ -1880,6 +1881,31 @@ static void cfq_insert_request(struct re
1012 cfq_rq_enqueued(cfqd, cfqq, rq);
1013 }
1014
1015+/*
1016+ * Update hw_tag based on peak queue depth over 50 samples under
1017+ * sufficient load.
1018+ */
1019+static void cfq_update_hw_tag(struct cfq_data *cfqd)
1020+{
1021+ if (cfqd->rq_in_driver > cfqd->rq_in_driver_peak)
1022+ cfqd->rq_in_driver_peak = cfqd->rq_in_driver;
1023+
1024+ if (cfqd->rq_queued <= CFQ_HW_QUEUE_MIN &&
1025+ cfqd->rq_in_driver <= CFQ_HW_QUEUE_MIN)
1026+ return;
1027+
1028+ if (cfqd->hw_tag_samples++ < 50)
1029+ return;
1030+
1031+ if (cfqd->rq_in_driver_peak >= CFQ_HW_QUEUE_MIN)
1032+ cfqd->hw_tag = 1;
1033+ else
1034+ cfqd->hw_tag = 0;
1035+
1036+ cfqd->hw_tag_samples = 0;
1037+ cfqd->rq_in_driver_peak = 0;
1038+}
1039+
1040 static void cfq_completed_request(struct request_queue *q, struct request *rq)
1041 {
1042 struct cfq_queue *cfqq = RQ_CFQQ(rq);
1043@@ -1890,6 +1916,8 @@ static void cfq_completed_request(struct
1044 now = jiffies;
1045 cfq_log_cfqq(cfqd, cfqq, "complete");
1046
1047+ cfq_update_hw_tag(cfqd);
1048+
1049 WARN_ON(!cfqd->rq_in_driver);
1050 WARN_ON(!cfqq->dispatched);
1051 cfqd->rq_in_driver--;
1052@@ -2200,6 +2228,7 @@ static void *cfq_init_queue(struct reque
1053 cfqd->cfq_slice[1] = cfq_slice_sync;
1054 cfqd->cfq_slice_async_rq = cfq_slice_async_rq;
1055 cfqd->cfq_slice_idle = cfq_slice_idle;
1056+ cfqd->hw_tag = 1;
1057
1058 return cfqd;
1059 }
1060--- a/block/deadline-iosched.c
1061+++ b/block/deadline-iosched.c
1062@@ -33,7 +33,7 @@ struct deadline_data {
1063 */
1064 struct rb_root sort_list[2];
1065 struct list_head fifo_list[2];
1066-
1067+
1068 /*
1069 * next in sort order. read, write or both are NULL
1070 */
1071@@ -53,7 +53,11 @@ struct deadline_data {
1072
1073 static void deadline_move_request(struct deadline_data *, struct request *);
1074
1075-#define RQ_RB_ROOT(dd, rq) (&(dd)->sort_list[rq_data_dir((rq))])
1076+static inline struct rb_root *
1077+deadline_rb_root(struct deadline_data *dd, struct request *rq)
1078+{
1079+ return &dd->sort_list[rq_data_dir(rq)];
1080+}
1081
1082 /*
1083 * get the request after `rq' in sector-sorted order
1084@@ -72,15 +76,11 @@ deadline_latter_request(struct request *
1085 static void
1086 deadline_add_rq_rb(struct deadline_data *dd, struct request *rq)
1087 {
1088- struct rb_root *root = RQ_RB_ROOT(dd, rq);
1089+ struct rb_root *root = deadline_rb_root(dd, rq);
1090 struct request *__alias;
1091
1092-retry:
1093- __alias = elv_rb_add(root, rq);
1094- if (unlikely(__alias)) {
1095+ while (unlikely(__alias = elv_rb_add(root, rq)))
1096 deadline_move_request(dd, __alias);
1097- goto retry;
1098- }
1099 }
1100
1101 static inline void
1102@@ -91,7 +91,7 @@ deadline_del_rq_rb(struct deadline_data
1103 if (dd->next_rq[data_dir] == rq)
1104 dd->next_rq[data_dir] = deadline_latter_request(rq);
1105
1106- elv_rb_del(RQ_RB_ROOT(dd, rq), rq);
1107+ elv_rb_del(deadline_rb_root(dd, rq), rq);
1108 }
1109
1110 /*
1111@@ -106,7 +106,7 @@ deadline_add_request(struct request_queu
1112 deadline_add_rq_rb(dd, rq);
1113
1114 /*
1115- * set expire time (only used for reads) and add to fifo list
1116+ * set expire time and add to fifo list
1117 */
1118 rq_set_fifo_time(rq, jiffies + dd->fifo_expire[data_dir]);
1119 list_add_tail(&rq->queuelist, &dd->fifo_list[data_dir]);
1120@@ -162,7 +162,7 @@ static void deadline_merged_request(stru
1121 * if the merge was a front merge, we need to reposition request
1122 */
1123 if (type == ELEVATOR_FRONT_MERGE) {
1124- elv_rb_del(RQ_RB_ROOT(dd, req), req);
1125+ elv_rb_del(deadline_rb_root(dd, req), req);
1126 deadline_add_rq_rb(dd, req);
1127 }
1128 }
1129@@ -212,7 +212,7 @@ deadline_move_request(struct deadline_da
1130 dd->next_rq[WRITE] = NULL;
1131 dd->next_rq[data_dir] = deadline_latter_request(rq);
1132
1133- dd->last_sector = rq->sector + rq->nr_sectors;
1134+ dd->last_sector = rq_end_sector(rq);
1135
1136 /*
1137 * take it off the sort and fifo list, move
1138@@ -222,7 +222,7 @@ deadline_move_request(struct deadline_da
1139 }
1140
1141 /*
1142- * deadline_check_fifo returns 0 if there are no expired reads on the fifo,
1143+ * deadline_check_fifo returns 0 if there are no expired requests on the fifo,
1144 * 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir])
1145 */
1146 static inline int deadline_check_fifo(struct deadline_data *dd, int ddir)
1147@@ -258,17 +258,9 @@ static int deadline_dispatch_requests(st
1148 else
1149 rq = dd->next_rq[READ];
1150
1151- if (rq) {
1152- /* we have a "next request" */
1153-
1154- if (dd->last_sector != rq->sector)
1155- /* end the batch on a non sequential request */
1156- dd->batching += dd->fifo_batch;
1157-
1158- if (dd->batching < dd->fifo_batch)
1159- /* we are still entitled to batch */
1160- goto dispatch_request;
1161- }
1162+ if (rq && dd->batching < dd->fifo_batch)
1163+ /* we have a next request are still entitled to batch */
1164+ goto dispatch_request;
1165
1166 /*
1167 * at this point we are not running a batch. select the appropriate
1168--- a/block/elevator.c
1169+++ b/block/elevator.c
1170@@ -34,8 +34,7 @@
1171 #include <linux/delay.h>
1172 #include <linux/blktrace_api.h>
1173 #include <linux/hash.h>
1174-
1175-#include <asm/uaccess.h>
1176+#include <linux/uaccess.h>
1177
1178 static DEFINE_SPINLOCK(elv_list_lock);
1179 static LIST_HEAD(elv_list);
1180@@ -790,7 +789,6 @@ struct request *elv_next_request(struct
1181 * device can handle
1182 */
1183 rq->nr_phys_segments++;
1184- rq->nr_hw_segments++;
1185 }
1186
1187 if (!q->prep_rq_fn)
1188@@ -813,7 +811,6 @@ struct request *elv_next_request(struct
1189 * so that we don't add it again
1190 */
1191 --rq->nr_phys_segments;
1192- --rq->nr_hw_segments;
1193 }
1194
1195 rq = NULL;
1196--- a/block/genhd.c
1197+++ b/block/genhd.c
1198@@ -211,10 +211,11 @@ void unlink_gendisk(struct gendisk *disk
1199
1200 /**
1201 * get_gendisk - get partitioning information for a given device
1202- * @dev: device to get partitioning information for
1203+ * @devt: device to get partitioning information for
1204+ * @part: returned partition index
1205 *
1206 * This function gets the structure containing partitioning
1207- * information for the given device @dev.
1208+ * information for the given device @devt.
1209 */
1210 struct gendisk *get_gendisk(dev_t devt, int *part)
1211 {
1212--- a/block/Makefile
1213+++ b/block/Makefile
1214@@ -4,8 +4,8 @@
1215
1216 obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
1217 blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \
1218- blk-exec.o blk-merge.o ioctl.o genhd.o scsi_ioctl.o \
1219- cmd-filter.o
1220+ blk-exec.o blk-merge.o blk-softirq.o ioctl.o genhd.o \
1221+ scsi_ioctl.o cmd-filter.o
1222
1223 obj-$(CONFIG_BLK_DEV_BSG) += bsg.o
1224 obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o
1225--- a/Documentation/block/deadline-iosched.txt
1226+++ b/Documentation/block/deadline-iosched.txt
1227@@ -30,12 +30,18 @@ write_expire (in ms)
1228 Similar to read_expire mentioned above, but for writes.
1229
1230
1231-fifo_batch
1232+fifo_batch (number of requests)
1233 ----------
1234
1235-When a read request expires its deadline, we must move some requests from
1236-the sorted io scheduler list to the block device dispatch queue. fifo_batch
1237-controls how many requests we move.
1238+Requests are grouped into ``batches'' of a particular data direction (read or
1239+write) which are serviced in increasing sector order. To limit extra seeking,
1240+deadline expiries are only checked between batches. fifo_batch controls the
1241+maximum number of requests per batch.
1242+
1243+This parameter tunes the balance between per-request latency and aggregate
1244+throughput. When low latency is the primary concern, smaller is better (where
1245+a value of 1 yields first-come first-served behaviour). Increasing fifo_batch
1246+generally improves throughput, at the cost of latency variation.
1247
1248
1249 writes_starved (number of dispatches)
1250--- a/Documentation/DocBook/kernel-api.tmpl
1251+++ b/Documentation/DocBook/kernel-api.tmpl
1252@@ -364,6 +364,10 @@ X!Edrivers/pnp/system.c
1253 !Eblock/blk-barrier.c
1254 !Eblock/blk-tag.c
1255 !Iblock/blk-tag.c
1256+!Eblock/blk-integrity.c
1257+!Iblock/blktrace.c
1258+!Iblock/genhd.c
1259+!Eblock/genhd.c
1260 </chapter>
1261
1262 <chapter id="chrdev">
1263--- a/drivers/block/ps3disk.c
1264+++ b/drivers/block/ps3disk.c
1265@@ -199,7 +199,8 @@ static void ps3disk_do_request(struct ps
1266 if (blk_fs_request(req)) {
1267 if (ps3disk_submit_request_sg(dev, req))
1268 break;
1269- } else if (req->cmd_type == REQ_TYPE_FLUSH) {
1270+ } else if (req->cmd_type == REQ_TYPE_LINUX_BLOCK &&
1271+ req->cmd[0] == REQ_LB_OP_FLUSH) {
1272 if (ps3disk_submit_flush_request(dev, req))
1273 break;
1274 } else {
1275@@ -257,7 +258,8 @@ static irqreturn_t ps3disk_interrupt(int
1276 return IRQ_HANDLED;
1277 }
1278
1279- if (req->cmd_type == REQ_TYPE_FLUSH) {
1280+ if (req->cmd_type == REQ_TYPE_LINUX_BLOCK &&
1281+ req->cmd[0] == REQ_LB_OP_FLUSH) {
1282 read = 0;
1283 num_sectors = req->hard_cur_sectors;
1284 op = "flush";
1285@@ -405,7 +407,8 @@ static void ps3disk_prepare_flush(struct
1286
1287 dev_dbg(&dev->sbd.core, "%s:%u\n", __func__, __LINE__);
1288
1289- req->cmd_type = REQ_TYPE_FLUSH;
1290+ req->cmd_type = REQ_TYPE_LINUX_BLOCK;
1291+ req->cmd[0] = REQ_LB_OP_FLUSH;
1292 }
1293
1294 static unsigned long ps3disk_mask;
1295--- a/drivers/block/virtio_blk.c
1296+++ b/drivers/block/virtio_blk.c
1297@@ -84,11 +84,11 @@ static bool do_req(struct request_queue
1298 if (blk_fs_request(vbr->req)) {
1299 vbr->out_hdr.type = 0;
1300 vbr->out_hdr.sector = vbr->req->sector;
1301- vbr->out_hdr.ioprio = vbr->req->ioprio;
1302+ vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
1303 } else if (blk_pc_request(vbr->req)) {
1304 vbr->out_hdr.type = VIRTIO_BLK_T_SCSI_CMD;
1305 vbr->out_hdr.sector = 0;
1306- vbr->out_hdr.ioprio = vbr->req->ioprio;
1307+ vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
1308 } else {
1309 /* We don't put anything else in the queue. */
1310 BUG();
1311--- a/drivers/md/raid10.c
1312+++ b/drivers/md/raid10.c
1313@@ -1346,9 +1346,6 @@ static void sync_request_write(mddev_t *
1314 tbio->bi_size = r10_bio->sectors << 9;
1315 tbio->bi_idx = 0;
1316 tbio->bi_phys_segments = 0;
1317- tbio->bi_hw_segments = 0;
1318- tbio->bi_hw_front_size = 0;
1319- tbio->bi_hw_back_size = 0;
1320 tbio->bi_flags &= ~(BIO_POOL_MASK - 1);
1321 tbio->bi_flags |= 1 << BIO_UPTODATE;
1322 tbio->bi_next = NULL;
1323@@ -1948,7 +1945,6 @@ static sector_t sync_request(mddev_t *md
1324 bio->bi_vcnt = 0;
1325 bio->bi_idx = 0;
1326 bio->bi_phys_segments = 0;
1327- bio->bi_hw_segments = 0;
1328 bio->bi_size = 0;
1329 }
1330
1331--- a/drivers/md/raid1.c
1332+++ b/drivers/md/raid1.c
1333@@ -1303,9 +1303,6 @@ static void sync_request_write(mddev_t *
1334 sbio->bi_size = r1_bio->sectors << 9;
1335 sbio->bi_idx = 0;
1336 sbio->bi_phys_segments = 0;
1337- sbio->bi_hw_segments = 0;
1338- sbio->bi_hw_front_size = 0;
1339- sbio->bi_hw_back_size = 0;
1340 sbio->bi_flags &= ~(BIO_POOL_MASK - 1);
1341 sbio->bi_flags |= 1 << BIO_UPTODATE;
1342 sbio->bi_next = NULL;
1343@@ -1791,7 +1788,6 @@ static sector_t sync_request(mddev_t *md
1344 bio->bi_vcnt = 0;
1345 bio->bi_idx = 0;
1346 bio->bi_phys_segments = 0;
1347- bio->bi_hw_segments = 0;
1348 bio->bi_size = 0;
1349 bio->bi_end_io = NULL;
1350 bio->bi_private = NULL;
1351--- a/drivers/md/raid5.c
1352+++ b/drivers/md/raid5.c
1353@@ -101,6 +101,40 @@
1354 const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256)));
1355 #endif
1356
1357+/*
1358+ * We maintain a biased count of active stripes in the bottom 16 bits of
1359+ * bi_phys_segments, and a count of processed stripes in the upper 16 bits
1360+ */
1361+static inline int raid5_bi_phys_segments(struct bio *bio)
1362+{
1363+ return bio->bi_phys_segments & 0xffff;
1364+}
1365+
1366+static inline int raid5_bi_hw_segments(struct bio *bio)
1367+{
1368+ return (bio->bi_phys_segments >> 16) & 0xffff;
1369+}
1370+
1371+static inline int raid5_dec_bi_phys_segments(struct bio *bio)
1372+{
1373+ --bio->bi_phys_segments;
1374+ return raid5_bi_phys_segments(bio);
1375+}
1376+
1377+static inline int raid5_dec_bi_hw_segments(struct bio *bio)
1378+{
1379+ unsigned short val = raid5_bi_hw_segments(bio);
1380+
1381+ --val;
1382+ bio->bi_phys_segments = (val << 16) | raid5_bi_phys_segments(bio);
1383+ return val;
1384+}
1385+
1386+static inline void raid5_set_bi_hw_segments(struct bio *bio, unsigned int cnt)
1387+{
1388+ bio->bi_phys_segments = raid5_bi_phys_segments(bio) || (cnt << 16);
1389+}
1390+
1391 static inline int raid6_next_disk(int disk, int raid_disks)
1392 {
1393 disk++;
1394@@ -507,7 +541,7 @@ static void ops_complete_biofill(void *s
1395 while (rbi && rbi->bi_sector <
1396 dev->sector + STRIPE_SECTORS) {
1397 rbi2 = r5_next_bio(rbi, dev->sector);
1398- if (--rbi->bi_phys_segments == 0) {
1399+ if (!raid5_dec_bi_phys_segments(rbi)) {
1400 rbi->bi_next = return_bi;
1401 return_bi = rbi;
1402 }
1403@@ -1725,7 +1759,7 @@ static int add_stripe_bio(struct stripe_
1404 if (*bip)
1405 bi->bi_next = *bip;
1406 *bip = bi;
1407- bi->bi_phys_segments ++;
1408+ bi->bi_phys_segments++;
1409 spin_unlock_irq(&conf->device_lock);
1410 spin_unlock(&sh->lock);
1411
1412@@ -1819,7 +1853,7 @@ handle_failed_stripe(raid5_conf_t *conf,
1413 sh->dev[i].sector + STRIPE_SECTORS) {
1414 struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
1415 clear_bit(BIO_UPTODATE, &bi->bi_flags);
1416- if (--bi->bi_phys_segments == 0) {
1417+ if (!raid5_dec_bi_phys_segments(bi)) {
1418 md_write_end(conf->mddev);
1419 bi->bi_next = *return_bi;
1420 *return_bi = bi;
1421@@ -1834,7 +1868,7 @@ handle_failed_stripe(raid5_conf_t *conf,
1422 sh->dev[i].sector + STRIPE_SECTORS) {
1423 struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector);
1424 clear_bit(BIO_UPTODATE, &bi->bi_flags);
1425- if (--bi->bi_phys_segments == 0) {
1426+ if (!raid5_dec_bi_phys_segments(bi)) {
1427 md_write_end(conf->mddev);
1428 bi->bi_next = *return_bi;
1429 *return_bi = bi;
1430@@ -1858,7 +1892,7 @@ handle_failed_stripe(raid5_conf_t *conf,
1431 struct bio *nextbi =
1432 r5_next_bio(bi, sh->dev[i].sector);
1433 clear_bit(BIO_UPTODATE, &bi->bi_flags);
1434- if (--bi->bi_phys_segments == 0) {
1435+ if (!raid5_dec_bi_phys_segments(bi)) {
1436 bi->bi_next = *return_bi;
1437 *return_bi = bi;
1438 }
1439@@ -2033,7 +2067,7 @@ static void handle_stripe_clean_event(ra
1440 while (wbi && wbi->bi_sector <
1441 dev->sector + STRIPE_SECTORS) {
1442 wbi2 = r5_next_bio(wbi, dev->sector);
1443- if (--wbi->bi_phys_segments == 0) {
1444+ if (!raid5_dec_bi_phys_segments(wbi)) {
1445 md_write_end(conf->mddev);
1446 wbi->bi_next = *return_bi;
1447 *return_bi = wbi;
1448@@ -2814,7 +2848,7 @@ static bool handle_stripe6(struct stripe
1449 copy_data(0, rbi, dev->page, dev->sector);
1450 rbi2 = r5_next_bio(rbi, dev->sector);
1451 spin_lock_irq(&conf->device_lock);
1452- if (--rbi->bi_phys_segments == 0) {
1453+ if (!raid5_dec_bi_phys_segments(rbi)) {
1454 rbi->bi_next = return_bi;
1455 return_bi = rbi;
1456 }
1457@@ -3155,8 +3189,11 @@ static struct bio *remove_bio_from_retry
1458 if(bi) {
1459 conf->retry_read_aligned_list = bi->bi_next;
1460 bi->bi_next = NULL;
1461+ /*
1462+ * this sets the active strip count to 1 and the processed
1463+ * strip count to zero (upper 8 bits)
1464+ */
1465 bi->bi_phys_segments = 1; /* biased count of active stripes */
1466- bi->bi_hw_segments = 0; /* count of processed stripes */
1467 }
1468
1469 return bi;
1470@@ -3206,8 +3243,7 @@ static int bio_fits_rdev(struct bio *bi)
1471 if ((bi->bi_size>>9) > q->max_sectors)
1472 return 0;
1473 blk_recount_segments(q, bi);
1474- if (bi->bi_phys_segments > q->max_phys_segments ||
1475- bi->bi_hw_segments > q->max_hw_segments)
1476+ if (bi->bi_phys_segments > q->max_phys_segments)
1477 return 0;
1478
1479 if (q->merge_bvec_fn)
1480@@ -3468,7 +3504,7 @@ static int make_request(struct request_q
1481
1482 }
1483 spin_lock_irq(&conf->device_lock);
1484- remaining = --bi->bi_phys_segments;
1485+ remaining = raid5_dec_bi_phys_segments(bi);
1486 spin_unlock_irq(&conf->device_lock);
1487 if (remaining == 0) {
1488
1489@@ -3752,7 +3788,7 @@ static int retry_aligned_read(raid5_con
1490 sector += STRIPE_SECTORS,
1491 scnt++) {
1492
1493- if (scnt < raid_bio->bi_hw_segments)
1494+ if (scnt < raid5_bi_hw_segments(raid_bio))
1495 /* already done this stripe */
1496 continue;
1497
1498@@ -3760,7 +3796,7 @@ static int retry_aligned_read(raid5_con
1499
1500 if (!sh) {
1501 /* failed to get a stripe - must wait */
1502- raid_bio->bi_hw_segments = scnt;
1503+ raid5_set_bi_hw_segments(raid_bio, scnt);
1504 conf->retry_read_aligned = raid_bio;
1505 return handled;
1506 }
1507@@ -3768,7 +3804,7 @@ static int retry_aligned_read(raid5_con
1508 set_bit(R5_ReadError, &sh->dev[dd_idx].flags);
1509 if (!add_stripe_bio(sh, raid_bio, dd_idx, 0)) {
1510 release_stripe(sh);
1511- raid_bio->bi_hw_segments = scnt;
1512+ raid5_set_bi_hw_segments(raid_bio, scnt);
1513 conf->retry_read_aligned = raid_bio;
1514 return handled;
1515 }
1516@@ -3778,7 +3814,7 @@ static int retry_aligned_read(raid5_con
1517 handled++;
1518 }
1519 spin_lock_irq(&conf->device_lock);
1520- remaining = --raid_bio->bi_phys_segments;
1521+ remaining = raid5_dec_bi_phys_segments(raid_bio);
1522 spin_unlock_irq(&conf->device_lock);
1523 if (remaining == 0)
1524 bio_endio(raid_bio, 0);
1525--- a/fs/bio.c
1526+++ b/fs/bio.c
1527@@ -208,14 +208,6 @@ inline int bio_phys_segments(struct requ
1528 return bio->bi_phys_segments;
1529 }
1530
1531-inline int bio_hw_segments(struct request_queue *q, struct bio *bio)
1532-{
1533- if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
1534- blk_recount_segments(q, bio);
1535-
1536- return bio->bi_hw_segments;
1537-}
1538-
1539 /**
1540 * __bio_clone - clone a bio
1541 * @bio: destination bio
1542@@ -350,8 +342,7 @@ static int __bio_add_page(struct request
1543 */
1544
1545 while (bio->bi_phys_segments >= q->max_phys_segments
1546- || bio->bi_hw_segments >= q->max_hw_segments
1547- || BIOVEC_VIRT_OVERSIZE(bio->bi_size)) {
1548+ || bio->bi_phys_segments >= q->max_hw_segments) {
1549
1550 if (retried_segments)
1551 return 0;
1552@@ -395,13 +386,11 @@ static int __bio_add_page(struct request
1553 }
1554
1555 /* If we may be able to merge these biovecs, force a recount */
1556- if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec) ||
1557- BIOVEC_VIRT_MERGEABLE(bvec-1, bvec)))
1558+ if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec)))
1559 bio->bi_flags &= ~(1 << BIO_SEG_VALID);
1560
1561 bio->bi_vcnt++;
1562 bio->bi_phys_segments++;
1563- bio->bi_hw_segments++;
1564 done:
1565 bio->bi_size += len;
1566 return len;
1567@@ -1393,7 +1382,6 @@ EXPORT_SYMBOL(bio_init);
1568 EXPORT_SYMBOL(__bio_clone);
1569 EXPORT_SYMBOL(bio_clone);
1570 EXPORT_SYMBOL(bio_phys_segments);
1571-EXPORT_SYMBOL(bio_hw_segments);
1572 EXPORT_SYMBOL(bio_add_page);
1573 EXPORT_SYMBOL(bio_add_pc_page);
1574 EXPORT_SYMBOL(bio_get_nr_vecs);
1575--- a/include/linux/bio.h
1576+++ b/include/linux/bio.h
1577@@ -26,21 +26,8 @@
1578
1579 #ifdef CONFIG_BLOCK
1580
1581-/* Platforms may set this to teach the BIO layer about IOMMU hardware. */
1582 #include <asm/io.h>
1583
1584-#if defined(BIO_VMERGE_MAX_SIZE) && defined(BIO_VMERGE_BOUNDARY)
1585-#define BIOVEC_VIRT_START_SIZE(x) (bvec_to_phys(x) & (BIO_VMERGE_BOUNDARY - 1))
1586-#define BIOVEC_VIRT_OVERSIZE(x) ((x) > BIO_VMERGE_MAX_SIZE)
1587-#else
1588-#define BIOVEC_VIRT_START_SIZE(x) 0
1589-#define BIOVEC_VIRT_OVERSIZE(x) 0
1590-#endif
1591-
1592-#ifndef BIO_VMERGE_BOUNDARY
1593-#define BIO_VMERGE_BOUNDARY 0
1594-#endif
1595-
1596 #define BIO_DEBUG
1597
1598 #ifdef BIO_DEBUG
1599@@ -88,12 +75,7 @@ struct bio {
1600 /* Number of segments in this BIO after
1601 * physical address coalescing is performed.
1602 */
1603- unsigned short bi_phys_segments;
1604-
1605- /* Number of segments after physical and DMA remapping
1606- * hardware coalescing is performed.
1607- */
1608- unsigned short bi_hw_segments;
1609+ unsigned int bi_phys_segments;
1610
1611 unsigned int bi_size; /* residual I/O count */
1612
1613@@ -104,14 +86,6 @@ struct bio {
1614 unsigned int bi_seg_front_size;
1615 unsigned int bi_seg_back_size;
1616
1617- /*
1618- * To keep track of the max hw size, we account for the
1619- * sizes of the first and last virtually mergeable segments
1620- * in this bio
1621- */
1622- unsigned int bi_hw_front_size;
1623- unsigned int bi_hw_back_size;
1624-
1625 unsigned int bi_max_vecs; /* max bvl_vecs we can hold */
1626
1627 struct bio_vec *bi_io_vec; /* the actual vec list */
1628@@ -133,7 +107,7 @@ struct bio {
1629 #define BIO_UPTODATE 0 /* ok after I/O completion */
1630 #define BIO_RW_BLOCK 1 /* RW_AHEAD set, and read/write would block */
1631 #define BIO_EOF 2 /* out-out-bounds error */
1632-#define BIO_SEG_VALID 3 /* nr_hw_seg valid */
1633+#define BIO_SEG_VALID 3 /* bi_phys_segments valid */
1634 #define BIO_CLONED 4 /* doesn't own data */
1635 #define BIO_BOUNCED 5 /* bio is a bounce bio */
1636 #define BIO_USER_MAPPED 6 /* contains user pages */
1637@@ -247,8 +221,6 @@ static inline void *bio_data(struct bio
1638 ((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2)))
1639 #endif
1640
1641-#define BIOVEC_VIRT_MERGEABLE(vec1, vec2) \
1642- ((((bvec_to_phys((vec1)) + (vec1)->bv_len) | bvec_to_phys((vec2))) & (BIO_VMERGE_BOUNDARY - 1)) == 0)
1643 #define __BIO_SEG_BOUNDARY(addr1, addr2, mask) \
1644 (((addr1) | (mask)) == (((addr2) - 1) | (mask)))
1645 #define BIOVEC_SEG_BOUNDARY(q, b1, b2) \
1646@@ -346,7 +318,6 @@ extern void bio_free(struct bio *, struc
1647 extern void bio_endio(struct bio *, int);
1648 struct request_queue;
1649 extern int bio_phys_segments(struct request_queue *, struct bio *);
1650-extern int bio_hw_segments(struct request_queue *, struct bio *);
1651
1652 extern void __bio_clone(struct bio *, struct bio *);
1653 extern struct bio *bio_clone(struct bio *, gfp_t);
1654--- a/include/linux/blkdev.h
1655+++ b/include/linux/blkdev.h
1656@@ -54,7 +54,6 @@ enum rq_cmd_type_bits {
1657 REQ_TYPE_PM_SUSPEND, /* suspend request */
1658 REQ_TYPE_PM_RESUME, /* resume request */
1659 REQ_TYPE_PM_SHUTDOWN, /* shutdown request */
1660- REQ_TYPE_FLUSH, /* flush request */
1661 REQ_TYPE_SPECIAL, /* driver defined type */
1662 REQ_TYPE_LINUX_BLOCK, /* generic block layer message */
1663 /*
1664@@ -76,11 +75,8 @@ enum rq_cmd_type_bits {
1665 *
1666 */
1667 enum {
1668- /*
1669- * just examples for now
1670- */
1671 REQ_LB_OP_EJECT = 0x40, /* eject request */
1672- REQ_LB_OP_FLUSH = 0x41, /* flush device */
1673+ REQ_LB_OP_FLUSH = 0x41, /* flush request */
1674 REQ_LB_OP_DISCARD = 0x42, /* discard sectors */
1675 };
1676
1677@@ -193,13 +189,6 @@ struct request {
1678 */
1679 unsigned short nr_phys_segments;
1680
1681- /* Number of scatter-gather addr+len pairs after
1682- * physical and DMA remapping hardware coalescing is performed.
1683- * This is the number of scatter-gather entries the driver
1684- * will actually have to deal with after DMA mapping is done.
1685- */
1686- unsigned short nr_hw_segments;
1687-
1688 unsigned short ioprio;
1689
1690 void *special;
1691@@ -236,6 +225,11 @@ struct request {
1692 struct request *next_rq;
1693 };
1694
1695+static inline unsigned short req_get_ioprio(struct request *req)
1696+{
1697+ return req->ioprio;
1698+}
1699+
1700 /*
1701 * State information carried for REQ_TYPE_PM_SUSPEND and REQ_TYPE_PM_RESUME
1702 * requests. Some step values could eventually be made generic.