]> git.ipfire.org Git - ipfire-2.x.git/blame - src/patches/suse-2.6.27.39/patches.fixes/block-git-fixes
Fix oinkmaster patch.
[ipfire-2.x.git] / src / patches / suse-2.6.27.39 / patches.fixes / block-git-fixes
CommitLineData
2cb7cef9
BS
1From: Jens Axboe <jens.axboe@oracle.com>
2Subject: Block layer fixes for 2.6.28
3Patch-Mainline: 2.6.28
4
5This is a combined patchset with block layer fixes from 2.6.28.
6Commit IDs:
797dee27d1c4d6041ff1cc8150db95fe3eab6be5a
800bbda44114e70fc9879731be3c888122b1de8b1
97452d2a2be657becb2f385d0e0864ba51f1ae694
10075a108f7d4dd24b8b69e59edcdf1a0fd84e6541
117a1b6029bf9ff3d0636e318d2482031dc493df16
12b3a5faf3cefbff4b69ca181767b882bbd6189aaf
138fe902de23b4f4012db91f538cafd864c63308e7
14dfef13dad8d34d0a9e83adee3e8cd9f94cca465e
15d2629dd70132f90f9d1bca07572197e9adea25b1
161f08a4484a223cb337e0466042005421cd55d22b
17fcdc7361d2925596d69d0538d738c08c221a69c9
18cd93bcfa9ca9b15051220614160131c53d7f33f0
19d371ca6b8a21a617b8607d23f7202197ad40482a
20910ee03b1e61d5cfb121dfb1ee7c127f18bdae01
21
22Signed-off-by: Hannes Reinecke <hare@suse.de>
23
24---
25 Documentation/DocBook/kernel-api.tmpl | 4
26 Documentation/block/deadline-iosched.txt | 14 +-
27 block/Makefile | 4
28 block/blk-core.c | 166 +++++++------------------------
29 block/blk-exec.c | 6 -
30 block/blk-integrity.c | 4
31 block/blk-map.c | 16 +-
32 block/blk-merge.c | 100 ------------------
33 block/blk-settings.c | 8 -
34 block/blk-softirq.c | 103 +++++++++++++++++++
35 block/blk-tag.c | 8 -
36 block/cfq-iosched.c | 47 +++++++-
37 block/deadline-iosched.c | 40 ++-----
38 block/elevator.c | 5
39 block/genhd.c | 5
40 drivers/block/ps3disk.c | 9 +
41 drivers/block/virtio_blk.c | 4
42 drivers/md/raid1.c | 4
43 drivers/md/raid10.c | 4
44 drivers/md/raid5.c | 66 +++++++++---
45 fs/bio.c | 16 --
46 include/linux/bio.h | 33 ------
47 include/linux/blkdev.h | 18 +--
48 23 files changed, 310 insertions(+), 374 deletions(-)
49
50--- a/Documentation/DocBook/kernel-api.tmpl
51+++ b/Documentation/DocBook/kernel-api.tmpl
52@@ -364,6 +364,10 @@ X!Edrivers/pnp/system.c
53 !Eblock/blk-barrier.c
54 !Eblock/blk-tag.c
55 !Iblock/blk-tag.c
56+!Eblock/blk-integrity.c
57+!Iblock/blktrace.c
58+!Iblock/genhd.c
59+!Eblock/genhd.c
60 </chapter>
61
62 <chapter id="chrdev">
63--- a/Documentation/block/deadline-iosched.txt
64+++ b/Documentation/block/deadline-iosched.txt
65@@ -30,12 +30,18 @@ write_expire (in ms)
66 Similar to read_expire mentioned above, but for writes.
67
68
69-fifo_batch
70+fifo_batch (number of requests)
71 ----------
72
73-When a read request expires its deadline, we must move some requests from
74-the sorted io scheduler list to the block device dispatch queue. fifo_batch
75-controls how many requests we move.
76+Requests are grouped into ``batches'' of a particular data direction (read or
77+write) which are serviced in increasing sector order. To limit extra seeking,
78+deadline expiries are only checked between batches. fifo_batch controls the
79+maximum number of requests per batch.
80+
81+This parameter tunes the balance between per-request latency and aggregate
82+throughput. When low latency is the primary concern, smaller is better (where
83+a value of 1 yields first-come first-served behaviour). Increasing fifo_batch
84+generally improves throughput, at the cost of latency variation.
85
86
87 writes_starved (number of dispatches)
88--- a/block/Makefile
89+++ b/block/Makefile
90@@ -4,8 +4,8 @@
91
92 obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
93 blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \
94- blk-exec.o blk-merge.o ioctl.o genhd.o scsi_ioctl.o \
95- cmd-filter.o
96+ blk-exec.o blk-merge.o blk-softirq.o ioctl.o genhd.o \
97+ scsi_ioctl.o cmd-filter.o
98
99 obj-$(CONFIG_BLK_DEV_BSG) += bsg.o
100 obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o
101--- a/block/blk-core.c
102+++ b/block/blk-core.c
103@@ -26,8 +26,6 @@
104 #include <linux/swap.h>
105 #include <linux/writeback.h>
106 #include <linux/task_io_accounting_ops.h>
107-#include <linux/interrupt.h>
108-#include <linux/cpu.h>
109 #include <linux/blktrace_api.h>
110 #include <linux/fault-inject.h>
111
112@@ -50,8 +48,6 @@ struct kmem_cache *blk_requestq_cachep;
113 */
114 static struct workqueue_struct *kblockd_workqueue;
115
116-static DEFINE_PER_CPU(struct list_head, blk_cpu_done);
117-
118 static void drive_stat_acct(struct request *rq, int new_io)
119 {
120 struct hd_struct *part;
121@@ -531,7 +527,7 @@ EXPORT_SYMBOL(blk_alloc_queue_node);
122 * request queue; this lock will be taken also from interrupt context, so irq
123 * disabling is needed for it.
124 *
125- * Function returns a pointer to the initialized request queue, or NULL if
126+ * Function returns a pointer to the initialized request queue, or %NULL if
127 * it didn't succeed.
128 *
129 * Note:
130@@ -913,7 +909,7 @@ void blk_requeue_request(struct request_
131 EXPORT_SYMBOL(blk_requeue_request);
132
133 /**
134- * blk_insert_request - insert a special request in to a request queue
135+ * blk_insert_request - insert a special request into a request queue
136 * @q: request queue where request should be inserted
137 * @rq: request to be inserted
138 * @at_head: insert request at head or tail of queue
139@@ -923,8 +919,8 @@ EXPORT_SYMBOL(blk_requeue_request);
140 * Many block devices need to execute commands asynchronously, so they don't
141 * block the whole kernel from preemption during request execution. This is
142 * accomplished normally by inserting aritficial requests tagged as
143- * REQ_SPECIAL in to the corresponding request queue, and letting them be
144- * scheduled for actual execution by the request queue.
145+ * REQ_TYPE_SPECIAL in to the corresponding request queue, and letting them
146+ * be scheduled for actual execution by the request queue.
147 *
148 * We have the option of inserting the head or the tail of the queue.
149 * Typically we use the tail for new ioctls and so forth. We use the head
150@@ -1322,7 +1318,7 @@ static inline int bio_check_eod(struct b
151 }
152
153 /**
154- * generic_make_request: hand a buffer to its device driver for I/O
155+ * generic_make_request - hand a buffer to its device driver for I/O
156 * @bio: The bio describing the location in memory and on the device.
157 *
158 * generic_make_request() is used to make I/O requests of block
159@@ -1480,13 +1476,13 @@ void generic_make_request(struct bio *bi
160 EXPORT_SYMBOL(generic_make_request);
161
162 /**
163- * submit_bio: submit a bio to the block device layer for I/O
164+ * submit_bio - submit a bio to the block device layer for I/O
165 * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
166 * @bio: The &struct bio which describes the I/O
167 *
168 * submit_bio() is very similar in purpose to generic_make_request(), and
169 * uses that function to do most of the work. Both are fairly rough
170- * interfaces, @bio must be presetup and ready for I/O.
171+ * interfaces; @bio must be presetup and ready for I/O.
172 *
173 */
174 void submit_bio(int rw, struct bio *bio)
175@@ -1524,7 +1520,7 @@ EXPORT_SYMBOL(submit_bio);
176 /**
177 * __end_that_request_first - end I/O on a request
178 * @req: the request being processed
179- * @error: 0 for success, < 0 for error
180+ * @error: %0 for success, < %0 for error
181 * @nr_bytes: number of bytes to complete
182 *
183 * Description:
184@@ -1532,8 +1528,8 @@ EXPORT_SYMBOL(submit_bio);
185 * for the next range of segments (if any) in the cluster.
186 *
187 * Return:
188- * 0 - we are done with this request, call end_that_request_last()
189- * 1 - still buffers pending for this request
190+ * %0 - we are done with this request, call end_that_request_last()
191+ * %1 - still buffers pending for this request
192 **/
193 static int __end_that_request_first(struct request *req, int error,
194 int nr_bytes)
195@@ -1544,7 +1540,7 @@ static int __end_that_request_first(stru
196 blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE);
197
198 /*
199- * for a REQ_BLOCK_PC request, we want to carry any eventual
200+ * for a REQ_TYPE_BLOCK_PC request, we want to carry any eventual
201 * sense key with us all the way through
202 */
203 if (!blk_pc_request(req))
204@@ -1646,82 +1642,6 @@ static int __end_that_request_first(stru
205 }
206
207 /*
208- * splice the completion data to a local structure and hand off to
209- * process_completion_queue() to complete the requests
210- */
211-static void blk_done_softirq(struct softirq_action *h)
212-{
213- struct list_head *cpu_list, local_list;
214-
215- local_irq_disable();
216- cpu_list = &__get_cpu_var(blk_cpu_done);
217- list_replace_init(cpu_list, &local_list);
218- local_irq_enable();
219-
220- while (!list_empty(&local_list)) {
221- struct request *rq;
222-
223- rq = list_entry(local_list.next, struct request, donelist);
224- list_del_init(&rq->donelist);
225- rq->q->softirq_done_fn(rq);
226- }
227-}
228-
229-static int __cpuinit blk_cpu_notify(struct notifier_block *self,
230- unsigned long action, void *hcpu)
231-{
232- /*
233- * If a CPU goes away, splice its entries to the current CPU
234- * and trigger a run of the softirq
235- */
236- if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
237- int cpu = (unsigned long) hcpu;
238-
239- local_irq_disable();
240- list_splice_init(&per_cpu(blk_cpu_done, cpu),
241- &__get_cpu_var(blk_cpu_done));
242- raise_softirq_irqoff(BLOCK_SOFTIRQ);
243- local_irq_enable();
244- }
245-
246- return NOTIFY_OK;
247-}
248-
249-
250-static struct notifier_block blk_cpu_notifier __cpuinitdata = {
251- .notifier_call = blk_cpu_notify,
252-};
253-
254-/**
255- * blk_complete_request - end I/O on a request
256- * @req: the request being processed
257- *
258- * Description:
259- * Ends all I/O on a request. It does not handle partial completions,
260- * unless the driver actually implements this in its completion callback
261- * through requeueing. The actual completion happens out-of-order,
262- * through a softirq handler. The user must have registered a completion
263- * callback through blk_queue_softirq_done().
264- **/
265-
266-void blk_complete_request(struct request *req)
267-{
268- struct list_head *cpu_list;
269- unsigned long flags;
270-
271- BUG_ON(!req->q->softirq_done_fn);
272-
273- local_irq_save(flags);
274-
275- cpu_list = &__get_cpu_var(blk_cpu_done);
276- list_add_tail(&req->donelist, cpu_list);
277- raise_softirq_irqoff(BLOCK_SOFTIRQ);
278-
279- local_irq_restore(flags);
280-}
281-EXPORT_SYMBOL(blk_complete_request);
282-
283-/*
284 * queue lock must be held
285 */
286 static void end_that_request_last(struct request *req, int error)
287@@ -1810,11 +1730,11 @@ EXPORT_SYMBOL_GPL(blk_rq_cur_bytes);
288 /**
289 * end_queued_request - end all I/O on a queued request
290 * @rq: the request being processed
291- * @uptodate: error value or 0/1 uptodate flag
292+ * @uptodate: error value or %0/%1 uptodate flag
293 *
294 * Description:
295 * Ends all I/O on a request, and removes it from the block layer queues.
296- * Not suitable for normal IO completion, unless the driver still has
297+ * Not suitable for normal I/O completion, unless the driver still has
298 * the request attached to the block layer.
299 *
300 **/
301@@ -1827,7 +1747,7 @@ EXPORT_SYMBOL(end_queued_request);
302 /**
303 * end_dequeued_request - end all I/O on a dequeued request
304 * @rq: the request being processed
305- * @uptodate: error value or 0/1 uptodate flag
306+ * @uptodate: error value or %0/%1 uptodate flag
307 *
308 * Description:
309 * Ends all I/O on a request. The request must already have been
310@@ -1845,14 +1765,14 @@ EXPORT_SYMBOL(end_dequeued_request);
311 /**
312 * end_request - end I/O on the current segment of the request
313 * @req: the request being processed
314- * @uptodate: error value or 0/1 uptodate flag
315+ * @uptodate: error value or %0/%1 uptodate flag
316 *
317 * Description:
318 * Ends I/O on the current segment of a request. If that is the only
319 * remaining segment, the request is also completed and freed.
320 *
321- * This is a remnant of how older block drivers handled IO completions.
322- * Modern drivers typically end IO on the full request in one go, unless
323+ * This is a remnant of how older block drivers handled I/O completions.
324+ * Modern drivers typically end I/O on the full request in one go, unless
325 * they have a residual value to account for. For that case this function
326 * isn't really useful, unless the residual just happens to be the
327 * full current segment. In other words, don't use this function in new
328@@ -1870,12 +1790,12 @@ EXPORT_SYMBOL(end_request);
329 /**
330 * blk_end_io - Generic end_io function to complete a request.
331 * @rq: the request being processed
332- * @error: 0 for success, < 0 for error
333+ * @error: %0 for success, < %0 for error
334 * @nr_bytes: number of bytes to complete @rq
335 * @bidi_bytes: number of bytes to complete @rq->next_rq
336 * @drv_callback: function called between completion of bios in the request
337 * and completion of the request.
338- * If the callback returns non 0, this helper returns without
339+ * If the callback returns non %0, this helper returns without
340 * completion of the request.
341 *
342 * Description:
343@@ -1883,8 +1803,8 @@ EXPORT_SYMBOL(end_request);
344 * If @rq has leftover, sets it up for the next range of segments.
345 *
346 * Return:
347- * 0 - we are done with this request
348- * 1 - this request is not freed yet, it still has pending buffers.
349+ * %0 - we are done with this request
350+ * %1 - this request is not freed yet, it still has pending buffers.
351 **/
352 static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes,
353 unsigned int bidi_bytes,
354@@ -1893,7 +1813,7 @@ static int blk_end_io(struct request *rq
355 struct request_queue *q = rq->q;
356 unsigned long flags = 0UL;
357
358- if (bio_has_data(rq->bio) || blk_discard_rq(rq)) {
359+ if (rq->bio) {
360 if (__end_that_request_first(rq, error, nr_bytes))
361 return 1;
362
363@@ -1919,7 +1839,7 @@ static int blk_end_io(struct request *rq
364 /**
365 * blk_end_request - Helper function for drivers to complete the request.
366 * @rq: the request being processed
367- * @error: 0 for success, < 0 for error
368+ * @error: %0 for success, < %0 for error
369 * @nr_bytes: number of bytes to complete
370 *
371 * Description:
372@@ -1927,8 +1847,8 @@ static int blk_end_io(struct request *rq
373 * If @rq has leftover, sets it up for the next range of segments.
374 *
375 * Return:
376- * 0 - we are done with this request
377- * 1 - still buffers pending for this request
378+ * %0 - we are done with this request
379+ * %1 - still buffers pending for this request
380 **/
381 int blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
382 {
383@@ -1939,20 +1859,19 @@ EXPORT_SYMBOL_GPL(blk_end_request);
384 /**
385 * __blk_end_request - Helper function for drivers to complete the request.
386 * @rq: the request being processed
387- * @error: 0 for success, < 0 for error
388+ * @error: %0 for success, < %0 for error
389 * @nr_bytes: number of bytes to complete
390 *
391 * Description:
392 * Must be called with queue lock held unlike blk_end_request().
393 *
394 * Return:
395- * 0 - we are done with this request
396- * 1 - still buffers pending for this request
397+ * %0 - we are done with this request
398+ * %1 - still buffers pending for this request
399 **/
400 int __blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
401 {
402- if ((bio_has_data(rq->bio) || blk_discard_rq(rq)) &&
403- __end_that_request_first(rq, error, nr_bytes))
404+ if (rq->bio && __end_that_request_first(rq, error, nr_bytes))
405 return 1;
406
407 add_disk_randomness(rq->rq_disk);
408@@ -1966,7 +1885,7 @@ EXPORT_SYMBOL_GPL(__blk_end_request);
409 /**
410 * blk_end_bidi_request - Helper function for drivers to complete bidi request.
411 * @rq: the bidi request being processed
412- * @error: 0 for success, < 0 for error
413+ * @error: %0 for success, < %0 for error
414 * @nr_bytes: number of bytes to complete @rq
415 * @bidi_bytes: number of bytes to complete @rq->next_rq
416 *
417@@ -1974,8 +1893,8 @@ EXPORT_SYMBOL_GPL(__blk_end_request);
418 * Ends I/O on a number of bytes attached to @rq and @rq->next_rq.
419 *
420 * Return:
421- * 0 - we are done with this request
422- * 1 - still buffers pending for this request
423+ * %0 - we are done with this request
424+ * %1 - still buffers pending for this request
425 **/
426 int blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes,
427 unsigned int bidi_bytes)
428@@ -1987,11 +1906,11 @@ EXPORT_SYMBOL_GPL(blk_end_bidi_request);
429 /**
430 * blk_end_request_callback - Special helper function for tricky drivers
431 * @rq: the request being processed
432- * @error: 0 for success, < 0 for error
433+ * @error: %0 for success, < %0 for error
434 * @nr_bytes: number of bytes to complete
435 * @drv_callback: function called between completion of bios in the request
436 * and completion of the request.
437- * If the callback returns non 0, this helper returns without
438+ * If the callback returns non %0, this helper returns without
439 * completion of the request.
440 *
441 * Description:
442@@ -2004,10 +1923,10 @@ EXPORT_SYMBOL_GPL(blk_end_bidi_request);
443 * Don't use this interface in other places anymore.
444 *
445 * Return:
446- * 0 - we are done with this request
447- * 1 - this request is not freed yet.
448- * this request still has pending buffers or
449- * the driver doesn't want to finish this request yet.
450+ * %0 - we are done with this request
451+ * %1 - this request is not freed yet.
452+ * this request still has pending buffers or
453+ * the driver doesn't want to finish this request yet.
454 **/
455 int blk_end_request_callback(struct request *rq, int error,
456 unsigned int nr_bytes,
457@@ -2026,7 +1945,6 @@ void blk_rq_bio_prep(struct request_queu
458
459 if (bio_has_data(bio)) {
460 rq->nr_phys_segments = bio_phys_segments(q, bio);
461- rq->nr_hw_segments = bio_hw_segments(q, bio);
462 rq->buffer = bio_data(bio);
463 }
464 rq->current_nr_sectors = bio_cur_sectors(bio);
465@@ -2054,8 +1972,6 @@ EXPORT_SYMBOL(kblockd_flush_work);
466
467 int __init blk_dev_init(void)
468 {
469- int i;
470-
471 kblockd_workqueue = create_workqueue("kblockd");
472 if (!kblockd_workqueue)
473 panic("Failed to create kblockd\n");
474@@ -2066,12 +1982,6 @@ int __init blk_dev_init(void)
475 blk_requestq_cachep = kmem_cache_create("blkdev_queue",
476 sizeof(struct request_queue), 0, SLAB_PANIC, NULL);
477
478- for_each_possible_cpu(i)
479- INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i));
480-
481- open_softirq(BLOCK_SOFTIRQ, blk_done_softirq);
482- register_hotcpu_notifier(&blk_cpu_notifier);
483-
484 return 0;
485 }
486
487--- a/block/blk-exec.c
488+++ b/block/blk-exec.c
489@@ -16,7 +16,7 @@
490 /**
491 * blk_end_sync_rq - executes a completion event on a request
492 * @rq: request to complete
493- * @error: end io status of the request
494+ * @error: end I/O status of the request
495 */
496 static void blk_end_sync_rq(struct request *rq, int error)
497 {
498@@ -41,7 +41,7 @@ static void blk_end_sync_rq(struct reque
499 * @done: I/O completion handler
500 *
501 * Description:
502- * Insert a fully prepared request at the back of the io scheduler queue
503+ * Insert a fully prepared request at the back of the I/O scheduler queue
504 * for execution. Don't wait for completion.
505 */
506 void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
507@@ -72,7 +72,7 @@ EXPORT_SYMBOL_GPL(blk_execute_rq_nowait)
508 * @at_head: insert request at head or tail of queue
509 *
510 * Description:
511- * Insert a fully prepared request at the back of the io scheduler queue
512+ * Insert a fully prepared request at the back of the I/O scheduler queue
513 * for execution and wait for completion.
514 */
515 int blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk,
516--- a/block/blk-integrity.c
517+++ b/block/blk-integrity.c
518@@ -109,8 +109,8 @@ EXPORT_SYMBOL(blk_rq_map_integrity_sg);
519
520 /**
521 * blk_integrity_compare - Compare integrity profile of two block devices
522- * @b1: Device to compare
523- * @b2: Device to compare
524+ * @bd1: Device to compare
525+ * @bd2: Device to compare
526 *
527 * Description: Meta-devices like DM and MD need to verify that all
528 * sub-devices use the same integrity format before advertising to
529--- a/block/blk-map.c
530+++ b/block/blk-map.c
531@@ -85,17 +85,17 @@ static int __blk_rq_map_user(struct requ
532 }
533
534 /**
535- * blk_rq_map_user - map user data to a request, for REQ_BLOCK_PC usage
536+ * blk_rq_map_user - map user data to a request, for REQ_TYPE_BLOCK_PC usage
537 * @q: request queue where request should be inserted
538 * @rq: request structure to fill
539 * @ubuf: the user buffer
540 * @len: length of user data
541 *
542 * Description:
543- * Data will be mapped directly for zero copy io, if possible. Otherwise
544+ * Data will be mapped directly for zero copy I/O, if possible. Otherwise
545 * a kernel bounce buffer is used.
546 *
547- * A matching blk_rq_unmap_user() must be issued at the end of io, while
548+ * A matching blk_rq_unmap_user() must be issued at the end of I/O, while
549 * still in process context.
550 *
551 * Note: The mapped bio may need to be bounced through blk_queue_bounce()
552@@ -154,7 +154,7 @@ unmap_rq:
553 EXPORT_SYMBOL(blk_rq_map_user);
554
555 /**
556- * blk_rq_map_user_iov - map user data to a request, for REQ_BLOCK_PC usage
557+ * blk_rq_map_user_iov - map user data to a request, for REQ_TYPE_BLOCK_PC usage
558 * @q: request queue where request should be inserted
559 * @rq: request to map data to
560 * @iov: pointer to the iovec
561@@ -162,10 +162,10 @@ EXPORT_SYMBOL(blk_rq_map_user);
562 * @len: I/O byte count
563 *
564 * Description:
565- * Data will be mapped directly for zero copy io, if possible. Otherwise
566+ * Data will be mapped directly for zero copy I/O, if possible. Otherwise
567 * a kernel bounce buffer is used.
568 *
569- * A matching blk_rq_unmap_user() must be issued at the end of io, while
570+ * A matching blk_rq_unmap_user() must be issued at the end of I/O, while
571 * still in process context.
572 *
573 * Note: The mapped bio may need to be bounced through blk_queue_bounce()
574@@ -224,7 +224,7 @@ int blk_rq_map_user_iov(struct request_q
575 * Description:
576 * Unmap a rq previously mapped by blk_rq_map_user(). The caller must
577 * supply the original rq->bio from the blk_rq_map_user() return, since
578- * the io completion may have changed rq->bio.
579+ * the I/O completion may have changed rq->bio.
580 */
581 int blk_rq_unmap_user(struct bio *bio)
582 {
583@@ -250,7 +250,7 @@ int blk_rq_unmap_user(struct bio *bio)
584 EXPORT_SYMBOL(blk_rq_unmap_user);
585
586 /**
587- * blk_rq_map_kern - map kernel data to a request, for REQ_BLOCK_PC usage
588+ * blk_rq_map_kern - map kernel data to a request, for REQ_TYPE_BLOCK_PC usage
589 * @q: request queue where request should be inserted
590 * @rq: request to fill
591 * @kbuf: the kernel buffer
592--- a/block/blk-merge.c
593+++ b/block/blk-merge.c
594@@ -41,12 +41,9 @@ void blk_recalc_rq_sectors(struct reques
595 void blk_recalc_rq_segments(struct request *rq)
596 {
597 int nr_phys_segs;
598- int nr_hw_segs;
599 unsigned int phys_size;
600- unsigned int hw_size;
601 struct bio_vec *bv, *bvprv = NULL;
602 int seg_size;
603- int hw_seg_size;
604 int cluster;
605 struct req_iterator iter;
606 int high, highprv = 1;
607@@ -56,8 +53,8 @@ void blk_recalc_rq_segments(struct reque
608 return;
609
610 cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
611- hw_seg_size = seg_size = 0;
612- phys_size = hw_size = nr_phys_segs = nr_hw_segs = 0;
613+ seg_size = 0;
614+ phys_size = nr_phys_segs = 0;
615 rq_for_each_segment(bv, rq, iter) {
616 /*
617 * the trick here is making sure that a high page is never
618@@ -66,7 +63,7 @@ void blk_recalc_rq_segments(struct reque
619 */
620 high = page_to_pfn(bv->bv_page) > q->bounce_pfn;
621 if (high || highprv)
622- goto new_hw_segment;
623+ goto new_segment;
624 if (cluster) {
625 if (seg_size + bv->bv_len > q->max_segment_size)
626 goto new_segment;
627@@ -74,27 +71,12 @@ void blk_recalc_rq_segments(struct reque
628 goto new_segment;
629 if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv))
630 goto new_segment;
631- if (BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len))
632- goto new_hw_segment;
633
634 seg_size += bv->bv_len;
635- hw_seg_size += bv->bv_len;
636 bvprv = bv;
637 continue;
638 }
639 new_segment:
640- if (BIOVEC_VIRT_MERGEABLE(bvprv, bv) &&
641- !BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len))
642- hw_seg_size += bv->bv_len;
643- else {
644-new_hw_segment:
645- if (nr_hw_segs == 1 &&
646- hw_seg_size > rq->bio->bi_hw_front_size)
647- rq->bio->bi_hw_front_size = hw_seg_size;
648- hw_seg_size = BIOVEC_VIRT_START_SIZE(bv) + bv->bv_len;
649- nr_hw_segs++;
650- }
651-
652 if (nr_phys_segs == 1 && seg_size > rq->bio->bi_seg_front_size)
653 rq->bio->bi_seg_front_size = seg_size;
654
655@@ -104,17 +86,11 @@ new_hw_segment:
656 highprv = high;
657 }
658
659- if (nr_hw_segs == 1 &&
660- hw_seg_size > rq->bio->bi_hw_front_size)
661- rq->bio->bi_hw_front_size = hw_seg_size;
662- if (hw_seg_size > rq->biotail->bi_hw_back_size)
663- rq->biotail->bi_hw_back_size = hw_seg_size;
664 if (nr_phys_segs == 1 && seg_size > rq->bio->bi_seg_front_size)
665 rq->bio->bi_seg_front_size = seg_size;
666 if (seg_size > rq->biotail->bi_seg_back_size)
667 rq->biotail->bi_seg_back_size = seg_size;
668 rq->nr_phys_segments = nr_phys_segs;
669- rq->nr_hw_segments = nr_hw_segs;
670 }
671
672 void blk_recount_segments(struct request_queue *q, struct bio *bio)
673@@ -127,7 +103,6 @@ void blk_recount_segments(struct request
674 blk_recalc_rq_segments(&rq);
675 bio->bi_next = nxt;
676 bio->bi_phys_segments = rq.nr_phys_segments;
677- bio->bi_hw_segments = rq.nr_hw_segments;
678 bio->bi_flags |= (1 << BIO_SEG_VALID);
679 }
680 EXPORT_SYMBOL(blk_recount_segments);
681@@ -158,23 +133,6 @@ static int blk_phys_contig_segment(struc
682 return 0;
683 }
684
685-static int blk_hw_contig_segment(struct request_queue *q, struct bio *bio,
686- struct bio *nxt)
687-{
688- if (!bio_flagged(bio, BIO_SEG_VALID))
689- blk_recount_segments(q, bio);
690- if (!bio_flagged(nxt, BIO_SEG_VALID))
691- blk_recount_segments(q, nxt);
692- if (bio_has_data(bio) &&
693- (!BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)) ||
694- BIOVEC_VIRT_OVERSIZE(bio->bi_hw_back_size + nxt->bi_hw_front_size)))
695- return 0;
696- if (bio->bi_hw_back_size + nxt->bi_hw_front_size > q->max_segment_size)
697- return 0;
698-
699- return 1;
700-}
701-
702 /*
703 * map a request to scatterlist, return number of sg entries setup. Caller
704 * must make sure sg can hold rq->nr_phys_segments entries
705@@ -288,10 +246,9 @@ static inline int ll_new_hw_segment(stru
706 struct request *req,
707 struct bio *bio)
708 {
709- int nr_hw_segs = bio_hw_segments(q, bio);
710 int nr_phys_segs = bio_phys_segments(q, bio);
711
712- if (req->nr_hw_segments + nr_hw_segs > q->max_hw_segments
713+ if (req->nr_phys_segments + nr_phys_segs > q->max_hw_segments
714 || req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {
715 req->cmd_flags |= REQ_NOMERGE;
716 if (req == q->last_merge)
717@@ -303,7 +260,6 @@ static inline int ll_new_hw_segment(stru
718 * This will form the start of a new hw segment. Bump both
719 * counters.
720 */
721- req->nr_hw_segments += nr_hw_segs;
722 req->nr_phys_segments += nr_phys_segs;
723 return 1;
724 }
725@@ -312,7 +268,6 @@ int ll_back_merge_fn(struct request_queu
726 struct bio *bio)
727 {
728 unsigned short max_sectors;
729- int len;
730
731 if (unlikely(blk_pc_request(req)))
732 max_sectors = q->max_hw_sectors;
733@@ -329,20 +284,6 @@ int ll_back_merge_fn(struct request_queu
734 blk_recount_segments(q, req->biotail);
735 if (!bio_flagged(bio, BIO_SEG_VALID))
736 blk_recount_segments(q, bio);
737- len = req->biotail->bi_hw_back_size + bio->bi_hw_front_size;
738- if (!bio_has_data(bio) ||
739- (BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail), __BVEC_START(bio))
740- && !BIOVEC_VIRT_OVERSIZE(len))) {
741- int mergeable = ll_new_mergeable(q, req, bio);
742-
743- if (mergeable) {
744- if (req->nr_hw_segments == 1)
745- req->bio->bi_hw_front_size = len;
746- if (bio->bi_hw_segments == 1)
747- bio->bi_hw_back_size = len;
748- }
749- return mergeable;
750- }
751
752 return ll_new_hw_segment(q, req, bio);
753 }
754@@ -351,7 +292,6 @@ int ll_front_merge_fn(struct request_que
755 struct bio *bio)
756 {
757 unsigned short max_sectors;
758- int len;
759
760 if (unlikely(blk_pc_request(req)))
761 max_sectors = q->max_hw_sectors;
762@@ -365,24 +305,10 @@ int ll_front_merge_fn(struct request_que
763 q->last_merge = NULL;
764 return 0;
765 }
766- len = bio->bi_hw_back_size + req->bio->bi_hw_front_size;
767 if (!bio_flagged(bio, BIO_SEG_VALID))
768 blk_recount_segments(q, bio);
769 if (!bio_flagged(req->bio, BIO_SEG_VALID))
770 blk_recount_segments(q, req->bio);
771- if (!bio_has_data(bio) ||
772- (BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(req->bio)) &&
773- !BIOVEC_VIRT_OVERSIZE(len))) {
774- int mergeable = ll_new_mergeable(q, req, bio);
775-
776- if (mergeable) {
777- if (bio->bi_hw_segments == 1)
778- bio->bi_hw_front_size = len;
779- if (req->nr_hw_segments == 1)
780- req->biotail->bi_hw_back_size = len;
781- }
782- return mergeable;
783- }
784
785 return ll_new_hw_segment(q, req, bio);
786 }
787@@ -391,7 +317,6 @@ static int ll_merge_requests_fn(struct r
788 struct request *next)
789 {
790 int total_phys_segments;
791- int total_hw_segments;
792 unsigned int seg_size =
793 req->biotail->bi_seg_back_size + next->bio->bi_seg_front_size;
794
795@@ -420,26 +345,11 @@ static int ll_merge_requests_fn(struct r
796 if (total_phys_segments > q->max_phys_segments)
797 return 0;
798
799- total_hw_segments = req->nr_hw_segments + next->nr_hw_segments;
800- if (blk_hw_contig_segment(q, req->biotail, next->bio)) {
801- int len = req->biotail->bi_hw_back_size +
802- next->bio->bi_hw_front_size;
803- /*
804- * propagate the combined length to the end of the requests
805- */
806- if (req->nr_hw_segments == 1)
807- req->bio->bi_hw_front_size = len;
808- if (next->nr_hw_segments == 1)
809- next->biotail->bi_hw_back_size = len;
810- total_hw_segments--;
811- }
812-
813- if (total_hw_segments > q->max_hw_segments)
814+ if (total_phys_segments > q->max_hw_segments)
815 return 0;
816
817 /* Merge is OK... */
818 req->nr_phys_segments = total_phys_segments;
819- req->nr_hw_segments = total_hw_segments;
820 return 1;
821 }
822
823--- a/block/blk-settings.c
824+++ b/block/blk-settings.c
825@@ -144,7 +144,7 @@ EXPORT_SYMBOL(blk_queue_make_request);
826 * Different hardware can have different requirements as to what pages
827 * it can do I/O directly to. A low level driver can call
828 * blk_queue_bounce_limit to have lower memory pages allocated as bounce
829- * buffers for doing I/O to pages residing above @page.
830+ * buffers for doing I/O to pages residing above @dma_addr.
831 **/
832 void blk_queue_bounce_limit(struct request_queue *q, u64 dma_addr)
833 {
834@@ -229,7 +229,7 @@ EXPORT_SYMBOL(blk_queue_max_phys_segment
835 * Description:
836 * Enables a low level driver to set an upper limit on the number of
837 * hw data segments in a request. This would be the largest number of
838- * address/length pairs the host adapter can actually give as once
839+ * address/length pairs the host adapter can actually give at once
840 * to the device.
841 **/
842 void blk_queue_max_hw_segments(struct request_queue *q,
843@@ -410,7 +410,7 @@ EXPORT_SYMBOL(blk_queue_segment_boundary
844 * @mask: alignment mask
845 *
846 * description:
847- * set required memory and length aligment for direct dma transactions.
848+ * set required memory and length alignment for direct dma transactions.
849 * this is used when buiding direct io requests for the queue.
850 *
851 **/
852@@ -426,7 +426,7 @@ EXPORT_SYMBOL(blk_queue_dma_alignment);
853 * @mask: alignment mask
854 *
855 * description:
856- * update required memory and length aligment for direct dma transactions.
857+ * update required memory and length alignment for direct dma transactions.
858 * If the requested alignment is larger than the current alignment, then
859 * the current queue alignment is updated to the new value, otherwise it
860 * is left alone. The design of this is to allow multiple objects
861--- /dev/null
862+++ b/block/blk-softirq.c
863@@ -0,0 +1,103 @@
864+/*
865+ * Functions related to softirq rq completions
866+ */
867+#include <linux/kernel.h>
868+#include <linux/module.h>
869+#include <linux/init.h>
870+#include <linux/bio.h>
871+#include <linux/blkdev.h>
872+#include <linux/interrupt.h>
873+#include <linux/cpu.h>
874+
875+#include "blk.h"
876+
877+static DEFINE_PER_CPU(struct list_head, blk_cpu_done);
878+
879+static int __cpuinit blk_cpu_notify(struct notifier_block *self,
880+ unsigned long action, void *hcpu)
881+{
882+ /*
883+ * If a CPU goes away, splice its entries to the current CPU
884+ * and trigger a run of the softirq
885+ */
886+ if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
887+ int cpu = (unsigned long) hcpu;
888+
889+ local_irq_disable();
890+ list_splice_init(&per_cpu(blk_cpu_done, cpu),
891+ &__get_cpu_var(blk_cpu_done));
892+ raise_softirq_irqoff(BLOCK_SOFTIRQ);
893+ local_irq_enable();
894+ }
895+
896+ return NOTIFY_OK;
897+}
898+
899+
900+static struct notifier_block blk_cpu_notifier __cpuinitdata = {
901+ .notifier_call = blk_cpu_notify,
902+};
903+
904+/*
905+ * splice the completion data to a local structure and hand off to
906+ * process_completion_queue() to complete the requests
907+ */
908+static void blk_done_softirq(struct softirq_action *h)
909+{
910+ struct list_head *cpu_list, local_list;
911+
912+ local_irq_disable();
913+ cpu_list = &__get_cpu_var(blk_cpu_done);
914+ list_replace_init(cpu_list, &local_list);
915+ local_irq_enable();
916+
917+ while (!list_empty(&local_list)) {
918+ struct request *rq;
919+
920+ rq = list_entry(local_list.next, struct request, donelist);
921+ list_del_init(&rq->donelist);
922+ rq->q->softirq_done_fn(rq);
923+ }
924+}
925+
926+/**
927+ * blk_complete_request - end I/O on a request
928+ * @req: the request being processed
929+ *
930+ * Description:
931+ * Ends all I/O on a request. It does not handle partial completions,
932+ * unless the driver actually implements this in its completion callback
933+ * through requeueing. The actual completion happens out-of-order,
934+ * through a softirq handler. The user must have registered a completion
935+ * callback through blk_queue_softirq_done().
936+ **/
937+
938+void blk_complete_request(struct request *req)
939+{
940+ struct list_head *cpu_list;
941+ unsigned long flags;
942+
943+ BUG_ON(!req->q->softirq_done_fn);
944+
945+ local_irq_save(flags);
946+
947+ cpu_list = &__get_cpu_var(blk_cpu_done);
948+ list_add_tail(&req->donelist, cpu_list);
949+ raise_softirq_irqoff(BLOCK_SOFTIRQ);
950+
951+ local_irq_restore(flags);
952+}
953+EXPORT_SYMBOL(blk_complete_request);
954+
955+int __init blk_softirq_init(void)
956+{
957+ int i;
958+
959+ for_each_possible_cpu(i)
960+ INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i));
961+
962+ open_softirq(BLOCK_SOFTIRQ, blk_done_softirq);
963+ register_hotcpu_notifier(&blk_cpu_notifier);
964+ return 0;
965+}
966+subsys_initcall(blk_softirq_init);
967--- a/block/blk-tag.c
968+++ b/block/blk-tag.c
969@@ -29,7 +29,7 @@ EXPORT_SYMBOL(blk_queue_find_tag);
970 * __blk_free_tags - release a given set of tag maintenance info
971 * @bqt: the tag map to free
972 *
973- * Tries to free the specified @bqt@. Returns true if it was
974+ * Tries to free the specified @bqt. Returns true if it was
975 * actually freed and false if there are still references using it
976 */
977 static int __blk_free_tags(struct blk_queue_tag *bqt)
978@@ -78,7 +78,7 @@ void __blk_queue_free_tags(struct reques
979 * blk_free_tags - release a given set of tag maintenance info
980 * @bqt: the tag map to free
981 *
982- * For externally managed @bqt@ frees the map. Callers of this
983+ * For externally managed @bqt frees the map. Callers of this
984 * function must guarantee to have released all the queues that
985 * might have been using this tag map.
986 */
987@@ -94,7 +94,7 @@ EXPORT_SYMBOL(blk_free_tags);
988 * @q: the request queue for the device
989 *
990 * Notes:
991- * This is used to disabled tagged queuing to a device, yet leave
992+ * This is used to disable tagged queuing to a device, yet leave
993 * queue in function.
994 **/
995 void blk_queue_free_tags(struct request_queue *q)
996@@ -271,7 +271,7 @@ EXPORT_SYMBOL(blk_queue_resize_tags);
997 * @rq: the request that has completed
998 *
999 * Description:
1000- * Typically called when end_that_request_first() returns 0, meaning
1001+ * Typically called when end_that_request_first() returns %0, meaning
1002 * all transfers have been done for a request. It's important to call
1003 * this function before end_that_request_last(), as that will put the
1004 * request back on the free list thus corrupting the internal tag list.
1005--- a/block/cfq-iosched.c
1006+++ b/block/cfq-iosched.c
1007@@ -39,6 +39,7 @@ static int cfq_slice_idle = HZ / 125;
1008 #define CFQ_MIN_TT (2)
1009
1010 #define CFQ_SLICE_SCALE (5)
1011+#define CFQ_HW_QUEUE_MIN (5)
1012
1013 #define RQ_CIC(rq) \
1014 ((struct cfq_io_context *) (rq)->elevator_private)
1015@@ -86,7 +87,14 @@ struct cfq_data {
1016
1017 int rq_in_driver;
1018 int sync_flight;
1019+
1020+ /*
1021+ * queue-depth detection
1022+ */
1023+ int rq_queued;
1024 int hw_tag;
1025+ int hw_tag_samples;
1026+ int rq_in_driver_peak;
1027
1028 /*
1029 * idle window management
1030@@ -654,15 +662,6 @@ static void cfq_activate_request(struct
1031 cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "activate rq, drv=%d",
1032 cfqd->rq_in_driver);
1033
1034- /*
1035- * If the depth is larger 1, it really could be queueing. But lets
1036- * make the mark a little higher - idling could still be good for
1037- * low queueing, and a low queueing number could also just indicate
1038- * a SCSI mid layer like behaviour where limit+1 is often seen.
1039- */
1040- if (!cfqd->hw_tag && cfqd->rq_in_driver > 4)
1041- cfqd->hw_tag = 1;
1042-
1043 cfqd->last_position = rq->hard_sector + rq->hard_nr_sectors;
1044 }
1045
1046@@ -686,6 +685,7 @@ static void cfq_remove_request(struct re
1047 list_del_init(&rq->queuelist);
1048 cfq_del_rq_rb(rq);
1049
1050+ cfqq->cfqd->rq_queued--;
1051 if (rq_is_meta(rq)) {
1052 WARN_ON(!cfqq->meta_pending);
1053 cfqq->meta_pending--;
1054@@ -1833,6 +1833,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, s
1055 {
1056 struct cfq_io_context *cic = RQ_CIC(rq);
1057
1058+ cfqd->rq_queued++;
1059 if (rq_is_meta(rq))
1060 cfqq->meta_pending++;
1061
1062@@ -1880,6 +1881,31 @@ static void cfq_insert_request(struct re
1063 cfq_rq_enqueued(cfqd, cfqq, rq);
1064 }
1065
1066+/*
1067+ * Update hw_tag based on peak queue depth over 50 samples under
1068+ * sufficient load.
1069+ */
1070+static void cfq_update_hw_tag(struct cfq_data *cfqd)
1071+{
1072+ if (cfqd->rq_in_driver > cfqd->rq_in_driver_peak)
1073+ cfqd->rq_in_driver_peak = cfqd->rq_in_driver;
1074+
1075+ if (cfqd->rq_queued <= CFQ_HW_QUEUE_MIN &&
1076+ cfqd->rq_in_driver <= CFQ_HW_QUEUE_MIN)
1077+ return;
1078+
1079+ if (cfqd->hw_tag_samples++ < 50)
1080+ return;
1081+
1082+ if (cfqd->rq_in_driver_peak >= CFQ_HW_QUEUE_MIN)
1083+ cfqd->hw_tag = 1;
1084+ else
1085+ cfqd->hw_tag = 0;
1086+
1087+ cfqd->hw_tag_samples = 0;
1088+ cfqd->rq_in_driver_peak = 0;
1089+}
1090+
1091 static void cfq_completed_request(struct request_queue *q, struct request *rq)
1092 {
1093 struct cfq_queue *cfqq = RQ_CFQQ(rq);
1094@@ -1890,6 +1916,8 @@ static void cfq_completed_request(struct
1095 now = jiffies;
1096 cfq_log_cfqq(cfqd, cfqq, "complete");
1097
1098+ cfq_update_hw_tag(cfqd);
1099+
1100 WARN_ON(!cfqd->rq_in_driver);
1101 WARN_ON(!cfqq->dispatched);
1102 cfqd->rq_in_driver--;
1103@@ -2200,6 +2228,7 @@ static void *cfq_init_queue(struct reque
1104 cfqd->cfq_slice[1] = cfq_slice_sync;
1105 cfqd->cfq_slice_async_rq = cfq_slice_async_rq;
1106 cfqd->cfq_slice_idle = cfq_slice_idle;
1107+ cfqd->hw_tag = 1;
1108
1109 return cfqd;
1110 }
1111--- a/block/deadline-iosched.c
1112+++ b/block/deadline-iosched.c
1113@@ -33,7 +33,7 @@ struct deadline_data {
1114 */
1115 struct rb_root sort_list[2];
1116 struct list_head fifo_list[2];
1117-
1118+
1119 /*
1120 * next in sort order. read, write or both are NULL
1121 */
1122@@ -53,7 +53,11 @@ struct deadline_data {
1123
1124 static void deadline_move_request(struct deadline_data *, struct request *);
1125
1126-#define RQ_RB_ROOT(dd, rq) (&(dd)->sort_list[rq_data_dir((rq))])
1127+static inline struct rb_root *
1128+deadline_rb_root(struct deadline_data *dd, struct request *rq)
1129+{
1130+ return &dd->sort_list[rq_data_dir(rq)];
1131+}
1132
1133 /*
1134 * get the request after `rq' in sector-sorted order
1135@@ -72,15 +76,11 @@ deadline_latter_request(struct request *
1136 static void
1137 deadline_add_rq_rb(struct deadline_data *dd, struct request *rq)
1138 {
1139- struct rb_root *root = RQ_RB_ROOT(dd, rq);
1140+ struct rb_root *root = deadline_rb_root(dd, rq);
1141 struct request *__alias;
1142
1143-retry:
1144- __alias = elv_rb_add(root, rq);
1145- if (unlikely(__alias)) {
1146+ while (unlikely(__alias = elv_rb_add(root, rq)))
1147 deadline_move_request(dd, __alias);
1148- goto retry;
1149- }
1150 }
1151
1152 static inline void
1153@@ -91,7 +91,7 @@ deadline_del_rq_rb(struct deadline_data
1154 if (dd->next_rq[data_dir] == rq)
1155 dd->next_rq[data_dir] = deadline_latter_request(rq);
1156
1157- elv_rb_del(RQ_RB_ROOT(dd, rq), rq);
1158+ elv_rb_del(deadline_rb_root(dd, rq), rq);
1159 }
1160
1161 /*
1162@@ -106,7 +106,7 @@ deadline_add_request(struct request_queu
1163 deadline_add_rq_rb(dd, rq);
1164
1165 /*
1166- * set expire time (only used for reads) and add to fifo list
1167+ * set expire time and add to fifo list
1168 */
1169 rq_set_fifo_time(rq, jiffies + dd->fifo_expire[data_dir]);
1170 list_add_tail(&rq->queuelist, &dd->fifo_list[data_dir]);
1171@@ -162,7 +162,7 @@ static void deadline_merged_request(stru
1172 * if the merge was a front merge, we need to reposition request
1173 */
1174 if (type == ELEVATOR_FRONT_MERGE) {
1175- elv_rb_del(RQ_RB_ROOT(dd, req), req);
1176+ elv_rb_del(deadline_rb_root(dd, req), req);
1177 deadline_add_rq_rb(dd, req);
1178 }
1179 }
1180@@ -212,7 +212,7 @@ deadline_move_request(struct deadline_da
1181 dd->next_rq[WRITE] = NULL;
1182 dd->next_rq[data_dir] = deadline_latter_request(rq);
1183
1184- dd->last_sector = rq->sector + rq->nr_sectors;
1185+ dd->last_sector = rq_end_sector(rq);
1186
1187 /*
1188 * take it off the sort and fifo list, move
1189@@ -222,7 +222,7 @@ deadline_move_request(struct deadline_da
1190 }
1191
1192 /*
1193- * deadline_check_fifo returns 0 if there are no expired reads on the fifo,
1194+ * deadline_check_fifo returns 0 if there are no expired requests on the fifo,
1195 * 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir])
1196 */
1197 static inline int deadline_check_fifo(struct deadline_data *dd, int ddir)
1198@@ -258,17 +258,9 @@ static int deadline_dispatch_requests(st
1199 else
1200 rq = dd->next_rq[READ];
1201
1202- if (rq) {
1203- /* we have a "next request" */
1204-
1205- if (dd->last_sector != rq->sector)
1206- /* end the batch on a non sequential request */
1207- dd->batching += dd->fifo_batch;
1208-
1209- if (dd->batching < dd->fifo_batch)
1210- /* we are still entitled to batch */
1211- goto dispatch_request;
1212- }
1213+ if (rq && dd->batching < dd->fifo_batch)
1214+ /* we have a next request are still entitled to batch */
1215+ goto dispatch_request;
1216
1217 /*
1218 * at this point we are not running a batch. select the appropriate
1219--- a/block/elevator.c
1220+++ b/block/elevator.c
1221@@ -34,8 +34,7 @@
1222 #include <linux/delay.h>
1223 #include <linux/blktrace_api.h>
1224 #include <linux/hash.h>
1225-
1226-#include <asm/uaccess.h>
1227+#include <linux/uaccess.h>
1228
1229 static DEFINE_SPINLOCK(elv_list_lock);
1230 static LIST_HEAD(elv_list);
1231@@ -790,7 +789,6 @@ struct request *elv_next_request(struct
1232 * device can handle
1233 */
1234 rq->nr_phys_segments++;
1235- rq->nr_hw_segments++;
1236 }
1237
1238 if (!q->prep_rq_fn)
1239@@ -813,7 +811,6 @@ struct request *elv_next_request(struct
1240 * so that we don't add it again
1241 */
1242 --rq->nr_phys_segments;
1243- --rq->nr_hw_segments;
1244 }
1245
1246 rq = NULL;
1247--- a/block/genhd.c
1248+++ b/block/genhd.c
1249@@ -211,10 +211,11 @@ void unlink_gendisk(struct gendisk *disk
1250
1251 /**
1252 * get_gendisk - get partitioning information for a given device
1253- * @dev: device to get partitioning information for
1254+ * @devt: device to get partitioning information for
1255+ * @part: returned partition index
1256 *
1257 * This function gets the structure containing partitioning
1258- * information for the given device @dev.
1259+ * information for the given device @devt.
1260 */
1261 struct gendisk *get_gendisk(dev_t devt, int *part)
1262 {
1263--- a/drivers/block/ps3disk.c
1264+++ b/drivers/block/ps3disk.c
1265@@ -199,7 +199,8 @@ static void ps3disk_do_request(struct ps
1266 if (blk_fs_request(req)) {
1267 if (ps3disk_submit_request_sg(dev, req))
1268 break;
1269- } else if (req->cmd_type == REQ_TYPE_FLUSH) {
1270+ } else if (req->cmd_type == REQ_TYPE_LINUX_BLOCK &&
1271+ req->cmd[0] == REQ_LB_OP_FLUSH) {
1272 if (ps3disk_submit_flush_request(dev, req))
1273 break;
1274 } else {
1275@@ -257,7 +258,8 @@ static irqreturn_t ps3disk_interrupt(int
1276 return IRQ_HANDLED;
1277 }
1278
1279- if (req->cmd_type == REQ_TYPE_FLUSH) {
1280+ if (req->cmd_type == REQ_TYPE_LINUX_BLOCK &&
1281+ req->cmd[0] == REQ_LB_OP_FLUSH) {
1282 read = 0;
1283 num_sectors = req->hard_cur_sectors;
1284 op = "flush";
1285@@ -405,7 +407,8 @@ static void ps3disk_prepare_flush(struct
1286
1287 dev_dbg(&dev->sbd.core, "%s:%u\n", __func__, __LINE__);
1288
1289- req->cmd_type = REQ_TYPE_FLUSH;
1290+ req->cmd_type = REQ_TYPE_LINUX_BLOCK;
1291+ req->cmd[0] = REQ_LB_OP_FLUSH;
1292 }
1293
1294 static unsigned long ps3disk_mask;
1295--- a/drivers/block/virtio_blk.c
1296+++ b/drivers/block/virtio_blk.c
1297@@ -84,11 +84,11 @@ static bool do_req(struct request_queue
1298 if (blk_fs_request(vbr->req)) {
1299 vbr->out_hdr.type = 0;
1300 vbr->out_hdr.sector = vbr->req->sector;
1301- vbr->out_hdr.ioprio = vbr->req->ioprio;
1302+ vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
1303 } else if (blk_pc_request(vbr->req)) {
1304 vbr->out_hdr.type = VIRTIO_BLK_T_SCSI_CMD;
1305 vbr->out_hdr.sector = 0;
1306- vbr->out_hdr.ioprio = vbr->req->ioprio;
1307+ vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
1308 } else {
1309 /* We don't put anything else in the queue. */
1310 BUG();
1311--- a/drivers/md/raid1.c
1312+++ b/drivers/md/raid1.c
1313@@ -1303,9 +1303,6 @@ static void sync_request_write(mddev_t *
1314 sbio->bi_size = r1_bio->sectors << 9;
1315 sbio->bi_idx = 0;
1316 sbio->bi_phys_segments = 0;
1317- sbio->bi_hw_segments = 0;
1318- sbio->bi_hw_front_size = 0;
1319- sbio->bi_hw_back_size = 0;
1320 sbio->bi_flags &= ~(BIO_POOL_MASK - 1);
1321 sbio->bi_flags |= 1 << BIO_UPTODATE;
1322 sbio->bi_next = NULL;
1323@@ -1791,7 +1788,6 @@ static sector_t sync_request(mddev_t *md
1324 bio->bi_vcnt = 0;
1325 bio->bi_idx = 0;
1326 bio->bi_phys_segments = 0;
1327- bio->bi_hw_segments = 0;
1328 bio->bi_size = 0;
1329 bio->bi_end_io = NULL;
1330 bio->bi_private = NULL;
1331--- a/drivers/md/raid10.c
1332+++ b/drivers/md/raid10.c
1333@@ -1346,9 +1346,6 @@ static void sync_request_write(mddev_t *
1334 tbio->bi_size = r10_bio->sectors << 9;
1335 tbio->bi_idx = 0;
1336 tbio->bi_phys_segments = 0;
1337- tbio->bi_hw_segments = 0;
1338- tbio->bi_hw_front_size = 0;
1339- tbio->bi_hw_back_size = 0;
1340 tbio->bi_flags &= ~(BIO_POOL_MASK - 1);
1341 tbio->bi_flags |= 1 << BIO_UPTODATE;
1342 tbio->bi_next = NULL;
1343@@ -1948,7 +1945,6 @@ static sector_t sync_request(mddev_t *md
1344 bio->bi_vcnt = 0;
1345 bio->bi_idx = 0;
1346 bio->bi_phys_segments = 0;
1347- bio->bi_hw_segments = 0;
1348 bio->bi_size = 0;
1349 }
1350
1351--- a/drivers/md/raid5.c
1352+++ b/drivers/md/raid5.c
1353@@ -101,6 +101,40 @@
1354 const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256)));
1355 #endif
1356
1357+/*
1358+ * We maintain a biased count of active stripes in the bottom 16 bits of
1359+ * bi_phys_segments, and a count of processed stripes in the upper 16 bits
1360+ */
1361+static inline int raid5_bi_phys_segments(struct bio *bio)
1362+{
1363+ return bio->bi_phys_segments & 0xffff;
1364+}
1365+
1366+static inline int raid5_bi_hw_segments(struct bio *bio)
1367+{
1368+ return (bio->bi_phys_segments >> 16) & 0xffff;
1369+}
1370+
1371+static inline int raid5_dec_bi_phys_segments(struct bio *bio)
1372+{
1373+ --bio->bi_phys_segments;
1374+ return raid5_bi_phys_segments(bio);
1375+}
1376+
1377+static inline int raid5_dec_bi_hw_segments(struct bio *bio)
1378+{
1379+ unsigned short val = raid5_bi_hw_segments(bio);
1380+
1381+ --val;
1382+ bio->bi_phys_segments = (val << 16) | raid5_bi_phys_segments(bio);
1383+ return val;
1384+}
1385+
1386+static inline void raid5_set_bi_hw_segments(struct bio *bio, unsigned int cnt)
1387+{
1388+ bio->bi_phys_segments = raid5_bi_phys_segments(bio) || (cnt << 16);
1389+}
1390+
1391 static inline int raid6_next_disk(int disk, int raid_disks)
1392 {
1393 disk++;
1394@@ -507,7 +541,7 @@ static void ops_complete_biofill(void *s
1395 while (rbi && rbi->bi_sector <
1396 dev->sector + STRIPE_SECTORS) {
1397 rbi2 = r5_next_bio(rbi, dev->sector);
1398- if (--rbi->bi_phys_segments == 0) {
1399+ if (!raid5_dec_bi_phys_segments(rbi)) {
1400 rbi->bi_next = return_bi;
1401 return_bi = rbi;
1402 }
1403@@ -1725,7 +1759,7 @@ static int add_stripe_bio(struct stripe_
1404 if (*bip)
1405 bi->bi_next = *bip;
1406 *bip = bi;
1407- bi->bi_phys_segments ++;
1408+ bi->bi_phys_segments++;
1409 spin_unlock_irq(&conf->device_lock);
1410 spin_unlock(&sh->lock);
1411
1412@@ -1819,7 +1853,7 @@ handle_failed_stripe(raid5_conf_t *conf,
1413 sh->dev[i].sector + STRIPE_SECTORS) {
1414 struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
1415 clear_bit(BIO_UPTODATE, &bi->bi_flags);
1416- if (--bi->bi_phys_segments == 0) {
1417+ if (!raid5_dec_bi_phys_segments(bi)) {
1418 md_write_end(conf->mddev);
1419 bi->bi_next = *return_bi;
1420 *return_bi = bi;
1421@@ -1834,7 +1868,7 @@ handle_failed_stripe(raid5_conf_t *conf,
1422 sh->dev[i].sector + STRIPE_SECTORS) {
1423 struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector);
1424 clear_bit(BIO_UPTODATE, &bi->bi_flags);
1425- if (--bi->bi_phys_segments == 0) {
1426+ if (!raid5_dec_bi_phys_segments(bi)) {
1427 md_write_end(conf->mddev);
1428 bi->bi_next = *return_bi;
1429 *return_bi = bi;
1430@@ -1858,7 +1892,7 @@ handle_failed_stripe(raid5_conf_t *conf,
1431 struct bio *nextbi =
1432 r5_next_bio(bi, sh->dev[i].sector);
1433 clear_bit(BIO_UPTODATE, &bi->bi_flags);
1434- if (--bi->bi_phys_segments == 0) {
1435+ if (!raid5_dec_bi_phys_segments(bi)) {
1436 bi->bi_next = *return_bi;
1437 *return_bi = bi;
1438 }
1439@@ -2033,7 +2067,7 @@ static void handle_stripe_clean_event(ra
1440 while (wbi && wbi->bi_sector <
1441 dev->sector + STRIPE_SECTORS) {
1442 wbi2 = r5_next_bio(wbi, dev->sector);
1443- if (--wbi->bi_phys_segments == 0) {
1444+ if (!raid5_dec_bi_phys_segments(wbi)) {
1445 md_write_end(conf->mddev);
1446 wbi->bi_next = *return_bi;
1447 *return_bi = wbi;
1448@@ -2814,7 +2848,7 @@ static bool handle_stripe6(struct stripe
1449 copy_data(0, rbi, dev->page, dev->sector);
1450 rbi2 = r5_next_bio(rbi, dev->sector);
1451 spin_lock_irq(&conf->device_lock);
1452- if (--rbi->bi_phys_segments == 0) {
1453+ if (!raid5_dec_bi_phys_segments(rbi)) {
1454 rbi->bi_next = return_bi;
1455 return_bi = rbi;
1456 }
1457@@ -3155,8 +3189,11 @@ static struct bio *remove_bio_from_retry
1458 if(bi) {
1459 conf->retry_read_aligned_list = bi->bi_next;
1460 bi->bi_next = NULL;
1461+ /*
1462+ * this sets the active strip count to 1 and the processed
1463+ * strip count to zero (upper 8 bits)
1464+ */
1465 bi->bi_phys_segments = 1; /* biased count of active stripes */
1466- bi->bi_hw_segments = 0; /* count of processed stripes */
1467 }
1468
1469 return bi;
1470@@ -3206,8 +3243,7 @@ static int bio_fits_rdev(struct bio *bi)
1471 if ((bi->bi_size>>9) > q->max_sectors)
1472 return 0;
1473 blk_recount_segments(q, bi);
1474- if (bi->bi_phys_segments > q->max_phys_segments ||
1475- bi->bi_hw_segments > q->max_hw_segments)
1476+ if (bi->bi_phys_segments > q->max_phys_segments)
1477 return 0;
1478
1479 if (q->merge_bvec_fn)
1480@@ -3469,7 +3505,7 @@ static int make_request(struct request_q
1481
1482 }
1483 spin_lock_irq(&conf->device_lock);
1484- remaining = --bi->bi_phys_segments;
1485+ remaining = raid5_dec_bi_phys_segments(bi);
1486 spin_unlock_irq(&conf->device_lock);
1487 if (remaining == 0) {
1488
1489@@ -3753,7 +3789,7 @@ static int retry_aligned_read(raid5_con
1490 sector += STRIPE_SECTORS,
1491 scnt++) {
1492
1493- if (scnt < raid_bio->bi_hw_segments)
1494+ if (scnt < raid5_bi_hw_segments(raid_bio))
1495 /* already done this stripe */
1496 continue;
1497
1498@@ -3761,7 +3797,7 @@ static int retry_aligned_read(raid5_con
1499
1500 if (!sh) {
1501 /* failed to get a stripe - must wait */
1502- raid_bio->bi_hw_segments = scnt;
1503+ raid5_set_bi_hw_segments(raid_bio, scnt);
1504 conf->retry_read_aligned = raid_bio;
1505 return handled;
1506 }
1507@@ -3769,7 +3805,7 @@ static int retry_aligned_read(raid5_con
1508 set_bit(R5_ReadError, &sh->dev[dd_idx].flags);
1509 if (!add_stripe_bio(sh, raid_bio, dd_idx, 0)) {
1510 release_stripe(sh);
1511- raid_bio->bi_hw_segments = scnt;
1512+ raid5_set_bi_hw_segments(raid_bio, scnt);
1513 conf->retry_read_aligned = raid_bio;
1514 return handled;
1515 }
1516@@ -3779,7 +3815,7 @@ static int retry_aligned_read(raid5_con
1517 handled++;
1518 }
1519 spin_lock_irq(&conf->device_lock);
1520- remaining = --raid_bio->bi_phys_segments;
1521+ remaining = raid5_dec_bi_phys_segments(raid_bio);
1522 spin_unlock_irq(&conf->device_lock);
1523 if (remaining == 0)
1524 bio_endio(raid_bio, 0);
1525--- a/fs/bio.c
1526+++ b/fs/bio.c
1527@@ -208,14 +208,6 @@ inline int bio_phys_segments(struct requ
1528 return bio->bi_phys_segments;
1529 }
1530
1531-inline int bio_hw_segments(struct request_queue *q, struct bio *bio)
1532-{
1533- if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
1534- blk_recount_segments(q, bio);
1535-
1536- return bio->bi_hw_segments;
1537-}
1538-
1539 /**
1540 * __bio_clone - clone a bio
1541 * @bio: destination bio
1542@@ -350,8 +342,7 @@ static int __bio_add_page(struct request
1543 */
1544
1545 while (bio->bi_phys_segments >= q->max_phys_segments
1546- || bio->bi_hw_segments >= q->max_hw_segments
1547- || BIOVEC_VIRT_OVERSIZE(bio->bi_size)) {
1548+ || bio->bi_phys_segments >= q->max_hw_segments) {
1549
1550 if (retried_segments)
1551 return 0;
1552@@ -395,13 +386,11 @@ static int __bio_add_page(struct request
1553 }
1554
1555 /* If we may be able to merge these biovecs, force a recount */
1556- if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec) ||
1557- BIOVEC_VIRT_MERGEABLE(bvec-1, bvec)))
1558+ if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec)))
1559 bio->bi_flags &= ~(1 << BIO_SEG_VALID);
1560
1561 bio->bi_vcnt++;
1562 bio->bi_phys_segments++;
1563- bio->bi_hw_segments++;
1564 done:
1565 bio->bi_size += len;
1566 return len;
1567@@ -1393,7 +1382,6 @@ EXPORT_SYMBOL(bio_init);
1568 EXPORT_SYMBOL(__bio_clone);
1569 EXPORT_SYMBOL(bio_clone);
1570 EXPORT_SYMBOL(bio_phys_segments);
1571-EXPORT_SYMBOL(bio_hw_segments);
1572 EXPORT_SYMBOL(bio_add_page);
1573 EXPORT_SYMBOL(bio_add_pc_page);
1574 EXPORT_SYMBOL(bio_get_nr_vecs);
1575--- a/include/linux/bio.h
1576+++ b/include/linux/bio.h
1577@@ -26,21 +26,8 @@
1578
1579 #ifdef CONFIG_BLOCK
1580
1581-/* Platforms may set this to teach the BIO layer about IOMMU hardware. */
1582 #include <asm/io.h>
1583
1584-#if defined(BIO_VMERGE_MAX_SIZE) && defined(BIO_VMERGE_BOUNDARY)
1585-#define BIOVEC_VIRT_START_SIZE(x) (bvec_to_phys(x) & (BIO_VMERGE_BOUNDARY - 1))
1586-#define BIOVEC_VIRT_OVERSIZE(x) ((x) > BIO_VMERGE_MAX_SIZE)
1587-#else
1588-#define BIOVEC_VIRT_START_SIZE(x) 0
1589-#define BIOVEC_VIRT_OVERSIZE(x) 0
1590-#endif
1591-
1592-#ifndef BIO_VMERGE_BOUNDARY
1593-#define BIO_VMERGE_BOUNDARY 0
1594-#endif
1595-
1596 #define BIO_DEBUG
1597
1598 #ifdef BIO_DEBUG
1599@@ -88,12 +75,7 @@ struct bio {
1600 /* Number of segments in this BIO after
1601 * physical address coalescing is performed.
1602 */
1603- unsigned short bi_phys_segments;
1604-
1605- /* Number of segments after physical and DMA remapping
1606- * hardware coalescing is performed.
1607- */
1608- unsigned short bi_hw_segments;
1609+ unsigned int bi_phys_segments;
1610
1611 unsigned int bi_size; /* residual I/O count */
1612
1613@@ -104,14 +86,6 @@ struct bio {
1614 unsigned int bi_seg_front_size;
1615 unsigned int bi_seg_back_size;
1616
1617- /*
1618- * To keep track of the max hw size, we account for the
1619- * sizes of the first and last virtually mergeable segments
1620- * in this bio
1621- */
1622- unsigned int bi_hw_front_size;
1623- unsigned int bi_hw_back_size;
1624-
1625 unsigned int bi_max_vecs; /* max bvl_vecs we can hold */
1626
1627 struct bio_vec *bi_io_vec; /* the actual vec list */
1628@@ -133,7 +107,7 @@ struct bio {
1629 #define BIO_UPTODATE 0 /* ok after I/O completion */
1630 #define BIO_RW_BLOCK 1 /* RW_AHEAD set, and read/write would block */
1631 #define BIO_EOF 2 /* out-out-bounds error */
1632-#define BIO_SEG_VALID 3 /* nr_hw_seg valid */
1633+#define BIO_SEG_VALID 3 /* bi_phys_segments valid */
1634 #define BIO_CLONED 4 /* doesn't own data */
1635 #define BIO_BOUNCED 5 /* bio is a bounce bio */
1636 #define BIO_USER_MAPPED 6 /* contains user pages */
1637@@ -247,8 +221,6 @@ static inline void *bio_data(struct bio
1638 ((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2)))
1639 #endif
1640
1641-#define BIOVEC_VIRT_MERGEABLE(vec1, vec2) \
1642- ((((bvec_to_phys((vec1)) + (vec1)->bv_len) | bvec_to_phys((vec2))) & (BIO_VMERGE_BOUNDARY - 1)) == 0)
1643 #define __BIO_SEG_BOUNDARY(addr1, addr2, mask) \
1644 (((addr1) | (mask)) == (((addr2) - 1) | (mask)))
1645 #define BIOVEC_SEG_BOUNDARY(q, b1, b2) \
1646@@ -346,7 +318,6 @@ extern void bio_free(struct bio *, struc
1647 extern void bio_endio(struct bio *, int);
1648 struct request_queue;
1649 extern int bio_phys_segments(struct request_queue *, struct bio *);
1650-extern int bio_hw_segments(struct request_queue *, struct bio *);
1651
1652 extern void __bio_clone(struct bio *, struct bio *);
1653 extern struct bio *bio_clone(struct bio *, gfp_t);
1654--- a/include/linux/blkdev.h
1655+++ b/include/linux/blkdev.h
1656@@ -54,7 +54,6 @@ enum rq_cmd_type_bits {
1657 REQ_TYPE_PM_SUSPEND, /* suspend request */
1658 REQ_TYPE_PM_RESUME, /* resume request */
1659 REQ_TYPE_PM_SHUTDOWN, /* shutdown request */
1660- REQ_TYPE_FLUSH, /* flush request */
1661 REQ_TYPE_SPECIAL, /* driver defined type */
1662 REQ_TYPE_LINUX_BLOCK, /* generic block layer message */
1663 /*
1664@@ -76,11 +75,8 @@ enum rq_cmd_type_bits {
1665 *
1666 */
1667 enum {
1668- /*
1669- * just examples for now
1670- */
1671 REQ_LB_OP_EJECT = 0x40, /* eject request */
1672- REQ_LB_OP_FLUSH = 0x41, /* flush device */
1673+ REQ_LB_OP_FLUSH = 0x41, /* flush request */
1674 REQ_LB_OP_DISCARD = 0x42, /* discard sectors */
1675 };
1676
1677@@ -193,13 +189,6 @@ struct request {
1678 */
1679 unsigned short nr_phys_segments;
1680
1681- /* Number of scatter-gather addr+len pairs after
1682- * physical and DMA remapping hardware coalescing is performed.
1683- * This is the number of scatter-gather entries the driver
1684- * will actually have to deal with after DMA mapping is done.
1685- */
1686- unsigned short nr_hw_segments;
1687-
1688 unsigned short ioprio;
1689
1690 void *special;
1691@@ -236,6 +225,11 @@ struct request {
1692 struct request *next_rq;
1693 };
1694
1695+static inline unsigned short req_get_ioprio(struct request *req)
1696+{
1697+ return req->ioprio;
1698+}
1699+
1700 /*
1701 * State information carried for REQ_TYPE_PM_SUSPEND and REQ_TYPE_PM_RESUME
1702 * requests. Some step values could eventually be made generic.