]>
Commit | Line | Data |
---|---|---|
2cb7cef9 BS |
1 | From: Jens Axboe <jens.axboe@oracle.com> |
2 | Subject: Block layer fixes for 2.6.28 | |
3 | Patch-Mainline: 2.6.28 | |
4 | ||
5 | This is a combined patchset with block layer fixes from 2.6.28. | |
6 | Commit IDs: | |
7 | 97dee27d1c4d6041ff1cc8150db95fe3eab6be5a | |
8 | 00bbda44114e70fc9879731be3c888122b1de8b1 | |
9 | 7452d2a2be657becb2f385d0e0864ba51f1ae694 | |
10 | 075a108f7d4dd24b8b69e59edcdf1a0fd84e6541 | |
11 | 7a1b6029bf9ff3d0636e318d2482031dc493df16 | |
12 | b3a5faf3cefbff4b69ca181767b882bbd6189aaf | |
13 | 8fe902de23b4f4012db91f538cafd864c63308e7 | |
14 | dfef13dad8d34d0a9e83adee3e8cd9f94cca465e | |
15 | d2629dd70132f90f9d1bca07572197e9adea25b1 | |
16 | 1f08a4484a223cb337e0466042005421cd55d22b | |
17 | fcdc7361d2925596d69d0538d738c08c221a69c9 | |
18 | cd93bcfa9ca9b15051220614160131c53d7f33f0 | |
19 | d371ca6b8a21a617b8607d23f7202197ad40482a | |
20 | 910ee03b1e61d5cfb121dfb1ee7c127f18bdae01 | |
21 | ||
22 | Signed-off-by: Hannes Reinecke <hare@suse.de> | |
23 | ||
24 | --- | |
25 | Documentation/DocBook/kernel-api.tmpl | 4 | |
26 | Documentation/block/deadline-iosched.txt | 14 +- | |
27 | block/Makefile | 4 | |
28 | block/blk-core.c | 166 +++++++------------------------ | |
29 | block/blk-exec.c | 6 - | |
30 | block/blk-integrity.c | 4 | |
31 | block/blk-map.c | 16 +- | |
32 | block/blk-merge.c | 100 ------------------ | |
33 | block/blk-settings.c | 8 - | |
34 | block/blk-softirq.c | 103 +++++++++++++++++++ | |
35 | block/blk-tag.c | 8 - | |
36 | block/cfq-iosched.c | 47 +++++++- | |
37 | block/deadline-iosched.c | 40 ++----- | |
38 | block/elevator.c | 5 | |
39 | block/genhd.c | 5 | |
40 | drivers/block/ps3disk.c | 9 + | |
41 | drivers/block/virtio_blk.c | 4 | |
42 | drivers/md/raid1.c | 4 | |
43 | drivers/md/raid10.c | 4 | |
44 | drivers/md/raid5.c | 66 +++++++++--- | |
45 | fs/bio.c | 16 -- | |
46 | include/linux/bio.h | 33 ------ | |
47 | include/linux/blkdev.h | 18 +-- | |
48 | 23 files changed, 310 insertions(+), 374 deletions(-) | |
49 | ||
50 | --- a/Documentation/DocBook/kernel-api.tmpl | |
51 | +++ b/Documentation/DocBook/kernel-api.tmpl | |
52 | @@ -364,6 +364,10 @@ X!Edrivers/pnp/system.c | |
53 | !Eblock/blk-barrier.c | |
54 | !Eblock/blk-tag.c | |
55 | !Iblock/blk-tag.c | |
56 | +!Eblock/blk-integrity.c | |
57 | +!Iblock/blktrace.c | |
58 | +!Iblock/genhd.c | |
59 | +!Eblock/genhd.c | |
60 | </chapter> | |
61 | ||
62 | <chapter id="chrdev"> | |
63 | --- a/Documentation/block/deadline-iosched.txt | |
64 | +++ b/Documentation/block/deadline-iosched.txt | |
65 | @@ -30,12 +30,18 @@ write_expire (in ms) | |
66 | Similar to read_expire mentioned above, but for writes. | |
67 | ||
68 | ||
69 | -fifo_batch | |
70 | +fifo_batch (number of requests) | |
71 | ---------- | |
72 | ||
73 | -When a read request expires its deadline, we must move some requests from | |
74 | -the sorted io scheduler list to the block device dispatch queue. fifo_batch | |
75 | -controls how many requests we move. | |
76 | +Requests are grouped into ``batches'' of a particular data direction (read or | |
77 | +write) which are serviced in increasing sector order. To limit extra seeking, | |
78 | +deadline expiries are only checked between batches. fifo_batch controls the | |
79 | +maximum number of requests per batch. | |
80 | + | |
81 | +This parameter tunes the balance between per-request latency and aggregate | |
82 | +throughput. When low latency is the primary concern, smaller is better (where | |
83 | +a value of 1 yields first-come first-served behaviour). Increasing fifo_batch | |
84 | +generally improves throughput, at the cost of latency variation. | |
85 | ||
86 | ||
87 | writes_starved (number of dispatches) | |
88 | --- a/block/Makefile | |
89 | +++ b/block/Makefile | |
90 | @@ -4,8 +4,8 @@ | |
91 | ||
92 | obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \ | |
93 | blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \ | |
94 | - blk-exec.o blk-merge.o ioctl.o genhd.o scsi_ioctl.o \ | |
95 | - cmd-filter.o | |
96 | + blk-exec.o blk-merge.o blk-softirq.o ioctl.o genhd.o \ | |
97 | + scsi_ioctl.o cmd-filter.o | |
98 | ||
99 | obj-$(CONFIG_BLK_DEV_BSG) += bsg.o | |
100 | obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o | |
101 | --- a/block/blk-core.c | |
102 | +++ b/block/blk-core.c | |
103 | @@ -26,8 +26,6 @@ | |
104 | #include <linux/swap.h> | |
105 | #include <linux/writeback.h> | |
106 | #include <linux/task_io_accounting_ops.h> | |
107 | -#include <linux/interrupt.h> | |
108 | -#include <linux/cpu.h> | |
109 | #include <linux/blktrace_api.h> | |
110 | #include <linux/fault-inject.h> | |
111 | ||
112 | @@ -50,8 +48,6 @@ struct kmem_cache *blk_requestq_cachep; | |
113 | */ | |
114 | static struct workqueue_struct *kblockd_workqueue; | |
115 | ||
116 | -static DEFINE_PER_CPU(struct list_head, blk_cpu_done); | |
117 | - | |
118 | static void drive_stat_acct(struct request *rq, int new_io) | |
119 | { | |
120 | struct hd_struct *part; | |
121 | @@ -531,7 +527,7 @@ EXPORT_SYMBOL(blk_alloc_queue_node); | |
122 | * request queue; this lock will be taken also from interrupt context, so irq | |
123 | * disabling is needed for it. | |
124 | * | |
125 | - * Function returns a pointer to the initialized request queue, or NULL if | |
126 | + * Function returns a pointer to the initialized request queue, or %NULL if | |
127 | * it didn't succeed. | |
128 | * | |
129 | * Note: | |
130 | @@ -913,7 +909,7 @@ void blk_requeue_request(struct request_ | |
131 | EXPORT_SYMBOL(blk_requeue_request); | |
132 | ||
133 | /** | |
134 | - * blk_insert_request - insert a special request in to a request queue | |
135 | + * blk_insert_request - insert a special request into a request queue | |
136 | * @q: request queue where request should be inserted | |
137 | * @rq: request to be inserted | |
138 | * @at_head: insert request at head or tail of queue | |
139 | @@ -923,8 +919,8 @@ EXPORT_SYMBOL(blk_requeue_request); | |
140 | * Many block devices need to execute commands asynchronously, so they don't | |
141 | * block the whole kernel from preemption during request execution. This is | |
142 | * accomplished normally by inserting aritficial requests tagged as | |
143 | - * REQ_SPECIAL in to the corresponding request queue, and letting them be | |
144 | - * scheduled for actual execution by the request queue. | |
145 | + * REQ_TYPE_SPECIAL in to the corresponding request queue, and letting them | |
146 | + * be scheduled for actual execution by the request queue. | |
147 | * | |
148 | * We have the option of inserting the head or the tail of the queue. | |
149 | * Typically we use the tail for new ioctls and so forth. We use the head | |
150 | @@ -1322,7 +1318,7 @@ static inline int bio_check_eod(struct b | |
151 | } | |
152 | ||
153 | /** | |
154 | - * generic_make_request: hand a buffer to its device driver for I/O | |
155 | + * generic_make_request - hand a buffer to its device driver for I/O | |
156 | * @bio: The bio describing the location in memory and on the device. | |
157 | * | |
158 | * generic_make_request() is used to make I/O requests of block | |
159 | @@ -1480,13 +1476,13 @@ void generic_make_request(struct bio *bi | |
160 | EXPORT_SYMBOL(generic_make_request); | |
161 | ||
162 | /** | |
163 | - * submit_bio: submit a bio to the block device layer for I/O | |
164 | + * submit_bio - submit a bio to the block device layer for I/O | |
165 | * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead) | |
166 | * @bio: The &struct bio which describes the I/O | |
167 | * | |
168 | * submit_bio() is very similar in purpose to generic_make_request(), and | |
169 | * uses that function to do most of the work. Both are fairly rough | |
170 | - * interfaces, @bio must be presetup and ready for I/O. | |
171 | + * interfaces; @bio must be presetup and ready for I/O. | |
172 | * | |
173 | */ | |
174 | void submit_bio(int rw, struct bio *bio) | |
175 | @@ -1524,7 +1520,7 @@ EXPORT_SYMBOL(submit_bio); | |
176 | /** | |
177 | * __end_that_request_first - end I/O on a request | |
178 | * @req: the request being processed | |
179 | - * @error: 0 for success, < 0 for error | |
180 | + * @error: %0 for success, < %0 for error | |
181 | * @nr_bytes: number of bytes to complete | |
182 | * | |
183 | * Description: | |
184 | @@ -1532,8 +1528,8 @@ EXPORT_SYMBOL(submit_bio); | |
185 | * for the next range of segments (if any) in the cluster. | |
186 | * | |
187 | * Return: | |
188 | - * 0 - we are done with this request, call end_that_request_last() | |
189 | - * 1 - still buffers pending for this request | |
190 | + * %0 - we are done with this request, call end_that_request_last() | |
191 | + * %1 - still buffers pending for this request | |
192 | **/ | |
193 | static int __end_that_request_first(struct request *req, int error, | |
194 | int nr_bytes) | |
195 | @@ -1544,7 +1540,7 @@ static int __end_that_request_first(stru | |
196 | blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE); | |
197 | ||
198 | /* | |
199 | - * for a REQ_BLOCK_PC request, we want to carry any eventual | |
200 | + * for a REQ_TYPE_BLOCK_PC request, we want to carry any eventual | |
201 | * sense key with us all the way through | |
202 | */ | |
203 | if (!blk_pc_request(req)) | |
204 | @@ -1646,82 +1642,6 @@ static int __end_that_request_first(stru | |
205 | } | |
206 | ||
207 | /* | |
208 | - * splice the completion data to a local structure and hand off to | |
209 | - * process_completion_queue() to complete the requests | |
210 | - */ | |
211 | -static void blk_done_softirq(struct softirq_action *h) | |
212 | -{ | |
213 | - struct list_head *cpu_list, local_list; | |
214 | - | |
215 | - local_irq_disable(); | |
216 | - cpu_list = &__get_cpu_var(blk_cpu_done); | |
217 | - list_replace_init(cpu_list, &local_list); | |
218 | - local_irq_enable(); | |
219 | - | |
220 | - while (!list_empty(&local_list)) { | |
221 | - struct request *rq; | |
222 | - | |
223 | - rq = list_entry(local_list.next, struct request, donelist); | |
224 | - list_del_init(&rq->donelist); | |
225 | - rq->q->softirq_done_fn(rq); | |
226 | - } | |
227 | -} | |
228 | - | |
229 | -static int __cpuinit blk_cpu_notify(struct notifier_block *self, | |
230 | - unsigned long action, void *hcpu) | |
231 | -{ | |
232 | - /* | |
233 | - * If a CPU goes away, splice its entries to the current CPU | |
234 | - * and trigger a run of the softirq | |
235 | - */ | |
236 | - if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { | |
237 | - int cpu = (unsigned long) hcpu; | |
238 | - | |
239 | - local_irq_disable(); | |
240 | - list_splice_init(&per_cpu(blk_cpu_done, cpu), | |
241 | - &__get_cpu_var(blk_cpu_done)); | |
242 | - raise_softirq_irqoff(BLOCK_SOFTIRQ); | |
243 | - local_irq_enable(); | |
244 | - } | |
245 | - | |
246 | - return NOTIFY_OK; | |
247 | -} | |
248 | - | |
249 | - | |
250 | -static struct notifier_block blk_cpu_notifier __cpuinitdata = { | |
251 | - .notifier_call = blk_cpu_notify, | |
252 | -}; | |
253 | - | |
254 | -/** | |
255 | - * blk_complete_request - end I/O on a request | |
256 | - * @req: the request being processed | |
257 | - * | |
258 | - * Description: | |
259 | - * Ends all I/O on a request. It does not handle partial completions, | |
260 | - * unless the driver actually implements this in its completion callback | |
261 | - * through requeueing. The actual completion happens out-of-order, | |
262 | - * through a softirq handler. The user must have registered a completion | |
263 | - * callback through blk_queue_softirq_done(). | |
264 | - **/ | |
265 | - | |
266 | -void blk_complete_request(struct request *req) | |
267 | -{ | |
268 | - struct list_head *cpu_list; | |
269 | - unsigned long flags; | |
270 | - | |
271 | - BUG_ON(!req->q->softirq_done_fn); | |
272 | - | |
273 | - local_irq_save(flags); | |
274 | - | |
275 | - cpu_list = &__get_cpu_var(blk_cpu_done); | |
276 | - list_add_tail(&req->donelist, cpu_list); | |
277 | - raise_softirq_irqoff(BLOCK_SOFTIRQ); | |
278 | - | |
279 | - local_irq_restore(flags); | |
280 | -} | |
281 | -EXPORT_SYMBOL(blk_complete_request); | |
282 | - | |
283 | -/* | |
284 | * queue lock must be held | |
285 | */ | |
286 | static void end_that_request_last(struct request *req, int error) | |
287 | @@ -1810,11 +1730,11 @@ EXPORT_SYMBOL_GPL(blk_rq_cur_bytes); | |
288 | /** | |
289 | * end_queued_request - end all I/O on a queued request | |
290 | * @rq: the request being processed | |
291 | - * @uptodate: error value or 0/1 uptodate flag | |
292 | + * @uptodate: error value or %0/%1 uptodate flag | |
293 | * | |
294 | * Description: | |
295 | * Ends all I/O on a request, and removes it from the block layer queues. | |
296 | - * Not suitable for normal IO completion, unless the driver still has | |
297 | + * Not suitable for normal I/O completion, unless the driver still has | |
298 | * the request attached to the block layer. | |
299 | * | |
300 | **/ | |
301 | @@ -1827,7 +1747,7 @@ EXPORT_SYMBOL(end_queued_request); | |
302 | /** | |
303 | * end_dequeued_request - end all I/O on a dequeued request | |
304 | * @rq: the request being processed | |
305 | - * @uptodate: error value or 0/1 uptodate flag | |
306 | + * @uptodate: error value or %0/%1 uptodate flag | |
307 | * | |
308 | * Description: | |
309 | * Ends all I/O on a request. The request must already have been | |
310 | @@ -1845,14 +1765,14 @@ EXPORT_SYMBOL(end_dequeued_request); | |
311 | /** | |
312 | * end_request - end I/O on the current segment of the request | |
313 | * @req: the request being processed | |
314 | - * @uptodate: error value or 0/1 uptodate flag | |
315 | + * @uptodate: error value or %0/%1 uptodate flag | |
316 | * | |
317 | * Description: | |
318 | * Ends I/O on the current segment of a request. If that is the only | |
319 | * remaining segment, the request is also completed and freed. | |
320 | * | |
321 | - * This is a remnant of how older block drivers handled IO completions. | |
322 | - * Modern drivers typically end IO on the full request in one go, unless | |
323 | + * This is a remnant of how older block drivers handled I/O completions. | |
324 | + * Modern drivers typically end I/O on the full request in one go, unless | |
325 | * they have a residual value to account for. For that case this function | |
326 | * isn't really useful, unless the residual just happens to be the | |
327 | * full current segment. In other words, don't use this function in new | |
328 | @@ -1870,12 +1790,12 @@ EXPORT_SYMBOL(end_request); | |
329 | /** | |
330 | * blk_end_io - Generic end_io function to complete a request. | |
331 | * @rq: the request being processed | |
332 | - * @error: 0 for success, < 0 for error | |
333 | + * @error: %0 for success, < %0 for error | |
334 | * @nr_bytes: number of bytes to complete @rq | |
335 | * @bidi_bytes: number of bytes to complete @rq->next_rq | |
336 | * @drv_callback: function called between completion of bios in the request | |
337 | * and completion of the request. | |
338 | - * If the callback returns non 0, this helper returns without | |
339 | + * If the callback returns non %0, this helper returns without | |
340 | * completion of the request. | |
341 | * | |
342 | * Description: | |
343 | @@ -1883,8 +1803,8 @@ EXPORT_SYMBOL(end_request); | |
344 | * If @rq has leftover, sets it up for the next range of segments. | |
345 | * | |
346 | * Return: | |
347 | - * 0 - we are done with this request | |
348 | - * 1 - this request is not freed yet, it still has pending buffers. | |
349 | + * %0 - we are done with this request | |
350 | + * %1 - this request is not freed yet, it still has pending buffers. | |
351 | **/ | |
352 | static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes, | |
353 | unsigned int bidi_bytes, | |
354 | @@ -1893,7 +1813,7 @@ static int blk_end_io(struct request *rq | |
355 | struct request_queue *q = rq->q; | |
356 | unsigned long flags = 0UL; | |
357 | ||
358 | - if (bio_has_data(rq->bio) || blk_discard_rq(rq)) { | |
359 | + if (rq->bio) { | |
360 | if (__end_that_request_first(rq, error, nr_bytes)) | |
361 | return 1; | |
362 | ||
363 | @@ -1919,7 +1839,7 @@ static int blk_end_io(struct request *rq | |
364 | /** | |
365 | * blk_end_request - Helper function for drivers to complete the request. | |
366 | * @rq: the request being processed | |
367 | - * @error: 0 for success, < 0 for error | |
368 | + * @error: %0 for success, < %0 for error | |
369 | * @nr_bytes: number of bytes to complete | |
370 | * | |
371 | * Description: | |
372 | @@ -1927,8 +1847,8 @@ static int blk_end_io(struct request *rq | |
373 | * If @rq has leftover, sets it up for the next range of segments. | |
374 | * | |
375 | * Return: | |
376 | - * 0 - we are done with this request | |
377 | - * 1 - still buffers pending for this request | |
378 | + * %0 - we are done with this request | |
379 | + * %1 - still buffers pending for this request | |
380 | **/ | |
381 | int blk_end_request(struct request *rq, int error, unsigned int nr_bytes) | |
382 | { | |
383 | @@ -1939,20 +1859,19 @@ EXPORT_SYMBOL_GPL(blk_end_request); | |
384 | /** | |
385 | * __blk_end_request - Helper function for drivers to complete the request. | |
386 | * @rq: the request being processed | |
387 | - * @error: 0 for success, < 0 for error | |
388 | + * @error: %0 for success, < %0 for error | |
389 | * @nr_bytes: number of bytes to complete | |
390 | * | |
391 | * Description: | |
392 | * Must be called with queue lock held unlike blk_end_request(). | |
393 | * | |
394 | * Return: | |
395 | - * 0 - we are done with this request | |
396 | - * 1 - still buffers pending for this request | |
397 | + * %0 - we are done with this request | |
398 | + * %1 - still buffers pending for this request | |
399 | **/ | |
400 | int __blk_end_request(struct request *rq, int error, unsigned int nr_bytes) | |
401 | { | |
402 | - if ((bio_has_data(rq->bio) || blk_discard_rq(rq)) && | |
403 | - __end_that_request_first(rq, error, nr_bytes)) | |
404 | + if (rq->bio && __end_that_request_first(rq, error, nr_bytes)) | |
405 | return 1; | |
406 | ||
407 | add_disk_randomness(rq->rq_disk); | |
408 | @@ -1966,7 +1885,7 @@ EXPORT_SYMBOL_GPL(__blk_end_request); | |
409 | /** | |
410 | * blk_end_bidi_request - Helper function for drivers to complete bidi request. | |
411 | * @rq: the bidi request being processed | |
412 | - * @error: 0 for success, < 0 for error | |
413 | + * @error: %0 for success, < %0 for error | |
414 | * @nr_bytes: number of bytes to complete @rq | |
415 | * @bidi_bytes: number of bytes to complete @rq->next_rq | |
416 | * | |
417 | @@ -1974,8 +1893,8 @@ EXPORT_SYMBOL_GPL(__blk_end_request); | |
418 | * Ends I/O on a number of bytes attached to @rq and @rq->next_rq. | |
419 | * | |
420 | * Return: | |
421 | - * 0 - we are done with this request | |
422 | - * 1 - still buffers pending for this request | |
423 | + * %0 - we are done with this request | |
424 | + * %1 - still buffers pending for this request | |
425 | **/ | |
426 | int blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes, | |
427 | unsigned int bidi_bytes) | |
428 | @@ -1987,11 +1906,11 @@ EXPORT_SYMBOL_GPL(blk_end_bidi_request); | |
429 | /** | |
430 | * blk_end_request_callback - Special helper function for tricky drivers | |
431 | * @rq: the request being processed | |
432 | - * @error: 0 for success, < 0 for error | |
433 | + * @error: %0 for success, < %0 for error | |
434 | * @nr_bytes: number of bytes to complete | |
435 | * @drv_callback: function called between completion of bios in the request | |
436 | * and completion of the request. | |
437 | - * If the callback returns non 0, this helper returns without | |
438 | + * If the callback returns non %0, this helper returns without | |
439 | * completion of the request. | |
440 | * | |
441 | * Description: | |
442 | @@ -2004,10 +1923,10 @@ EXPORT_SYMBOL_GPL(blk_end_bidi_request); | |
443 | * Don't use this interface in other places anymore. | |
444 | * | |
445 | * Return: | |
446 | - * 0 - we are done with this request | |
447 | - * 1 - this request is not freed yet. | |
448 | - * this request still has pending buffers or | |
449 | - * the driver doesn't want to finish this request yet. | |
450 | + * %0 - we are done with this request | |
451 | + * %1 - this request is not freed yet. | |
452 | + * this request still has pending buffers or | |
453 | + * the driver doesn't want to finish this request yet. | |
454 | **/ | |
455 | int blk_end_request_callback(struct request *rq, int error, | |
456 | unsigned int nr_bytes, | |
457 | @@ -2026,7 +1945,6 @@ void blk_rq_bio_prep(struct request_queu | |
458 | ||
459 | if (bio_has_data(bio)) { | |
460 | rq->nr_phys_segments = bio_phys_segments(q, bio); | |
461 | - rq->nr_hw_segments = bio_hw_segments(q, bio); | |
462 | rq->buffer = bio_data(bio); | |
463 | } | |
464 | rq->current_nr_sectors = bio_cur_sectors(bio); | |
465 | @@ -2054,8 +1972,6 @@ EXPORT_SYMBOL(kblockd_flush_work); | |
466 | ||
467 | int __init blk_dev_init(void) | |
468 | { | |
469 | - int i; | |
470 | - | |
471 | kblockd_workqueue = create_workqueue("kblockd"); | |
472 | if (!kblockd_workqueue) | |
473 | panic("Failed to create kblockd\n"); | |
474 | @@ -2066,12 +1982,6 @@ int __init blk_dev_init(void) | |
475 | blk_requestq_cachep = kmem_cache_create("blkdev_queue", | |
476 | sizeof(struct request_queue), 0, SLAB_PANIC, NULL); | |
477 | ||
478 | - for_each_possible_cpu(i) | |
479 | - INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i)); | |
480 | - | |
481 | - open_softirq(BLOCK_SOFTIRQ, blk_done_softirq); | |
482 | - register_hotcpu_notifier(&blk_cpu_notifier); | |
483 | - | |
484 | return 0; | |
485 | } | |
486 | ||
487 | --- a/block/blk-exec.c | |
488 | +++ b/block/blk-exec.c | |
489 | @@ -16,7 +16,7 @@ | |
490 | /** | |
491 | * blk_end_sync_rq - executes a completion event on a request | |
492 | * @rq: request to complete | |
493 | - * @error: end io status of the request | |
494 | + * @error: end I/O status of the request | |
495 | */ | |
496 | static void blk_end_sync_rq(struct request *rq, int error) | |
497 | { | |
498 | @@ -41,7 +41,7 @@ static void blk_end_sync_rq(struct reque | |
499 | * @done: I/O completion handler | |
500 | * | |
501 | * Description: | |
502 | - * Insert a fully prepared request at the back of the io scheduler queue | |
503 | + * Insert a fully prepared request at the back of the I/O scheduler queue | |
504 | * for execution. Don't wait for completion. | |
505 | */ | |
506 | void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, | |
507 | @@ -72,7 +72,7 @@ EXPORT_SYMBOL_GPL(blk_execute_rq_nowait) | |
508 | * @at_head: insert request at head or tail of queue | |
509 | * | |
510 | * Description: | |
511 | - * Insert a fully prepared request at the back of the io scheduler queue | |
512 | + * Insert a fully prepared request at the back of the I/O scheduler queue | |
513 | * for execution and wait for completion. | |
514 | */ | |
515 | int blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk, | |
516 | --- a/block/blk-integrity.c | |
517 | +++ b/block/blk-integrity.c | |
518 | @@ -109,8 +109,8 @@ EXPORT_SYMBOL(blk_rq_map_integrity_sg); | |
519 | ||
520 | /** | |
521 | * blk_integrity_compare - Compare integrity profile of two block devices | |
522 | - * @b1: Device to compare | |
523 | - * @b2: Device to compare | |
524 | + * @bd1: Device to compare | |
525 | + * @bd2: Device to compare | |
526 | * | |
527 | * Description: Meta-devices like DM and MD need to verify that all | |
528 | * sub-devices use the same integrity format before advertising to | |
529 | --- a/block/blk-map.c | |
530 | +++ b/block/blk-map.c | |
531 | @@ -85,17 +85,17 @@ static int __blk_rq_map_user(struct requ | |
532 | } | |
533 | ||
534 | /** | |
535 | - * blk_rq_map_user - map user data to a request, for REQ_BLOCK_PC usage | |
536 | + * blk_rq_map_user - map user data to a request, for REQ_TYPE_BLOCK_PC usage | |
537 | * @q: request queue where request should be inserted | |
538 | * @rq: request structure to fill | |
539 | * @ubuf: the user buffer | |
540 | * @len: length of user data | |
541 | * | |
542 | * Description: | |
543 | - * Data will be mapped directly for zero copy io, if possible. Otherwise | |
544 | + * Data will be mapped directly for zero copy I/O, if possible. Otherwise | |
545 | * a kernel bounce buffer is used. | |
546 | * | |
547 | - * A matching blk_rq_unmap_user() must be issued at the end of io, while | |
548 | + * A matching blk_rq_unmap_user() must be issued at the end of I/O, while | |
549 | * still in process context. | |
550 | * | |
551 | * Note: The mapped bio may need to be bounced through blk_queue_bounce() | |
552 | @@ -154,7 +154,7 @@ unmap_rq: | |
553 | EXPORT_SYMBOL(blk_rq_map_user); | |
554 | ||
555 | /** | |
556 | - * blk_rq_map_user_iov - map user data to a request, for REQ_BLOCK_PC usage | |
557 | + * blk_rq_map_user_iov - map user data to a request, for REQ_TYPE_BLOCK_PC usage | |
558 | * @q: request queue where request should be inserted | |
559 | * @rq: request to map data to | |
560 | * @iov: pointer to the iovec | |
561 | @@ -162,10 +162,10 @@ EXPORT_SYMBOL(blk_rq_map_user); | |
562 | * @len: I/O byte count | |
563 | * | |
564 | * Description: | |
565 | - * Data will be mapped directly for zero copy io, if possible. Otherwise | |
566 | + * Data will be mapped directly for zero copy I/O, if possible. Otherwise | |
567 | * a kernel bounce buffer is used. | |
568 | * | |
569 | - * A matching blk_rq_unmap_user() must be issued at the end of io, while | |
570 | + * A matching blk_rq_unmap_user() must be issued at the end of I/O, while | |
571 | * still in process context. | |
572 | * | |
573 | * Note: The mapped bio may need to be bounced through blk_queue_bounce() | |
574 | @@ -224,7 +224,7 @@ int blk_rq_map_user_iov(struct request_q | |
575 | * Description: | |
576 | * Unmap a rq previously mapped by blk_rq_map_user(). The caller must | |
577 | * supply the original rq->bio from the blk_rq_map_user() return, since | |
578 | - * the io completion may have changed rq->bio. | |
579 | + * the I/O completion may have changed rq->bio. | |
580 | */ | |
581 | int blk_rq_unmap_user(struct bio *bio) | |
582 | { | |
583 | @@ -250,7 +250,7 @@ int blk_rq_unmap_user(struct bio *bio) | |
584 | EXPORT_SYMBOL(blk_rq_unmap_user); | |
585 | ||
586 | /** | |
587 | - * blk_rq_map_kern - map kernel data to a request, for REQ_BLOCK_PC usage | |
588 | + * blk_rq_map_kern - map kernel data to a request, for REQ_TYPE_BLOCK_PC usage | |
589 | * @q: request queue where request should be inserted | |
590 | * @rq: request to fill | |
591 | * @kbuf: the kernel buffer | |
592 | --- a/block/blk-merge.c | |
593 | +++ b/block/blk-merge.c | |
594 | @@ -41,12 +41,9 @@ void blk_recalc_rq_sectors(struct reques | |
595 | void blk_recalc_rq_segments(struct request *rq) | |
596 | { | |
597 | int nr_phys_segs; | |
598 | - int nr_hw_segs; | |
599 | unsigned int phys_size; | |
600 | - unsigned int hw_size; | |
601 | struct bio_vec *bv, *bvprv = NULL; | |
602 | int seg_size; | |
603 | - int hw_seg_size; | |
604 | int cluster; | |
605 | struct req_iterator iter; | |
606 | int high, highprv = 1; | |
607 | @@ -56,8 +53,8 @@ void blk_recalc_rq_segments(struct reque | |
608 | return; | |
609 | ||
610 | cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags); | |
611 | - hw_seg_size = seg_size = 0; | |
612 | - phys_size = hw_size = nr_phys_segs = nr_hw_segs = 0; | |
613 | + seg_size = 0; | |
614 | + phys_size = nr_phys_segs = 0; | |
615 | rq_for_each_segment(bv, rq, iter) { | |
616 | /* | |
617 | * the trick here is making sure that a high page is never | |
618 | @@ -66,7 +63,7 @@ void blk_recalc_rq_segments(struct reque | |
619 | */ | |
620 | high = page_to_pfn(bv->bv_page) > q->bounce_pfn; | |
621 | if (high || highprv) | |
622 | - goto new_hw_segment; | |
623 | + goto new_segment; | |
624 | if (cluster) { | |
625 | if (seg_size + bv->bv_len > q->max_segment_size) | |
626 | goto new_segment; | |
627 | @@ -74,27 +71,12 @@ void blk_recalc_rq_segments(struct reque | |
628 | goto new_segment; | |
629 | if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv)) | |
630 | goto new_segment; | |
631 | - if (BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len)) | |
632 | - goto new_hw_segment; | |
633 | ||
634 | seg_size += bv->bv_len; | |
635 | - hw_seg_size += bv->bv_len; | |
636 | bvprv = bv; | |
637 | continue; | |
638 | } | |
639 | new_segment: | |
640 | - if (BIOVEC_VIRT_MERGEABLE(bvprv, bv) && | |
641 | - !BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len)) | |
642 | - hw_seg_size += bv->bv_len; | |
643 | - else { | |
644 | -new_hw_segment: | |
645 | - if (nr_hw_segs == 1 && | |
646 | - hw_seg_size > rq->bio->bi_hw_front_size) | |
647 | - rq->bio->bi_hw_front_size = hw_seg_size; | |
648 | - hw_seg_size = BIOVEC_VIRT_START_SIZE(bv) + bv->bv_len; | |
649 | - nr_hw_segs++; | |
650 | - } | |
651 | - | |
652 | if (nr_phys_segs == 1 && seg_size > rq->bio->bi_seg_front_size) | |
653 | rq->bio->bi_seg_front_size = seg_size; | |
654 | ||
655 | @@ -104,17 +86,11 @@ new_hw_segment: | |
656 | highprv = high; | |
657 | } | |
658 | ||
659 | - if (nr_hw_segs == 1 && | |
660 | - hw_seg_size > rq->bio->bi_hw_front_size) | |
661 | - rq->bio->bi_hw_front_size = hw_seg_size; | |
662 | - if (hw_seg_size > rq->biotail->bi_hw_back_size) | |
663 | - rq->biotail->bi_hw_back_size = hw_seg_size; | |
664 | if (nr_phys_segs == 1 && seg_size > rq->bio->bi_seg_front_size) | |
665 | rq->bio->bi_seg_front_size = seg_size; | |
666 | if (seg_size > rq->biotail->bi_seg_back_size) | |
667 | rq->biotail->bi_seg_back_size = seg_size; | |
668 | rq->nr_phys_segments = nr_phys_segs; | |
669 | - rq->nr_hw_segments = nr_hw_segs; | |
670 | } | |
671 | ||
672 | void blk_recount_segments(struct request_queue *q, struct bio *bio) | |
673 | @@ -127,7 +103,6 @@ void blk_recount_segments(struct request | |
674 | blk_recalc_rq_segments(&rq); | |
675 | bio->bi_next = nxt; | |
676 | bio->bi_phys_segments = rq.nr_phys_segments; | |
677 | - bio->bi_hw_segments = rq.nr_hw_segments; | |
678 | bio->bi_flags |= (1 << BIO_SEG_VALID); | |
679 | } | |
680 | EXPORT_SYMBOL(blk_recount_segments); | |
681 | @@ -158,23 +133,6 @@ static int blk_phys_contig_segment(struc | |
682 | return 0; | |
683 | } | |
684 | ||
685 | -static int blk_hw_contig_segment(struct request_queue *q, struct bio *bio, | |
686 | - struct bio *nxt) | |
687 | -{ | |
688 | - if (!bio_flagged(bio, BIO_SEG_VALID)) | |
689 | - blk_recount_segments(q, bio); | |
690 | - if (!bio_flagged(nxt, BIO_SEG_VALID)) | |
691 | - blk_recount_segments(q, nxt); | |
692 | - if (bio_has_data(bio) && | |
693 | - (!BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)) || | |
694 | - BIOVEC_VIRT_OVERSIZE(bio->bi_hw_back_size + nxt->bi_hw_front_size))) | |
695 | - return 0; | |
696 | - if (bio->bi_hw_back_size + nxt->bi_hw_front_size > q->max_segment_size) | |
697 | - return 0; | |
698 | - | |
699 | - return 1; | |
700 | -} | |
701 | - | |
702 | /* | |
703 | * map a request to scatterlist, return number of sg entries setup. Caller | |
704 | * must make sure sg can hold rq->nr_phys_segments entries | |
705 | @@ -288,10 +246,9 @@ static inline int ll_new_hw_segment(stru | |
706 | struct request *req, | |
707 | struct bio *bio) | |
708 | { | |
709 | - int nr_hw_segs = bio_hw_segments(q, bio); | |
710 | int nr_phys_segs = bio_phys_segments(q, bio); | |
711 | ||
712 | - if (req->nr_hw_segments + nr_hw_segs > q->max_hw_segments | |
713 | + if (req->nr_phys_segments + nr_phys_segs > q->max_hw_segments | |
714 | || req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) { | |
715 | req->cmd_flags |= REQ_NOMERGE; | |
716 | if (req == q->last_merge) | |
717 | @@ -303,7 +260,6 @@ static inline int ll_new_hw_segment(stru | |
718 | * This will form the start of a new hw segment. Bump both | |
719 | * counters. | |
720 | */ | |
721 | - req->nr_hw_segments += nr_hw_segs; | |
722 | req->nr_phys_segments += nr_phys_segs; | |
723 | return 1; | |
724 | } | |
725 | @@ -312,7 +268,6 @@ int ll_back_merge_fn(struct request_queu | |
726 | struct bio *bio) | |
727 | { | |
728 | unsigned short max_sectors; | |
729 | - int len; | |
730 | ||
731 | if (unlikely(blk_pc_request(req))) | |
732 | max_sectors = q->max_hw_sectors; | |
733 | @@ -329,20 +284,6 @@ int ll_back_merge_fn(struct request_queu | |
734 | blk_recount_segments(q, req->biotail); | |
735 | if (!bio_flagged(bio, BIO_SEG_VALID)) | |
736 | blk_recount_segments(q, bio); | |
737 | - len = req->biotail->bi_hw_back_size + bio->bi_hw_front_size; | |
738 | - if (!bio_has_data(bio) || | |
739 | - (BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail), __BVEC_START(bio)) | |
740 | - && !BIOVEC_VIRT_OVERSIZE(len))) { | |
741 | - int mergeable = ll_new_mergeable(q, req, bio); | |
742 | - | |
743 | - if (mergeable) { | |
744 | - if (req->nr_hw_segments == 1) | |
745 | - req->bio->bi_hw_front_size = len; | |
746 | - if (bio->bi_hw_segments == 1) | |
747 | - bio->bi_hw_back_size = len; | |
748 | - } | |
749 | - return mergeable; | |
750 | - } | |
751 | ||
752 | return ll_new_hw_segment(q, req, bio); | |
753 | } | |
754 | @@ -351,7 +292,6 @@ int ll_front_merge_fn(struct request_que | |
755 | struct bio *bio) | |
756 | { | |
757 | unsigned short max_sectors; | |
758 | - int len; | |
759 | ||
760 | if (unlikely(blk_pc_request(req))) | |
761 | max_sectors = q->max_hw_sectors; | |
762 | @@ -365,24 +305,10 @@ int ll_front_merge_fn(struct request_que | |
763 | q->last_merge = NULL; | |
764 | return 0; | |
765 | } | |
766 | - len = bio->bi_hw_back_size + req->bio->bi_hw_front_size; | |
767 | if (!bio_flagged(bio, BIO_SEG_VALID)) | |
768 | blk_recount_segments(q, bio); | |
769 | if (!bio_flagged(req->bio, BIO_SEG_VALID)) | |
770 | blk_recount_segments(q, req->bio); | |
771 | - if (!bio_has_data(bio) || | |
772 | - (BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(req->bio)) && | |
773 | - !BIOVEC_VIRT_OVERSIZE(len))) { | |
774 | - int mergeable = ll_new_mergeable(q, req, bio); | |
775 | - | |
776 | - if (mergeable) { | |
777 | - if (bio->bi_hw_segments == 1) | |
778 | - bio->bi_hw_front_size = len; | |
779 | - if (req->nr_hw_segments == 1) | |
780 | - req->biotail->bi_hw_back_size = len; | |
781 | - } | |
782 | - return mergeable; | |
783 | - } | |
784 | ||
785 | return ll_new_hw_segment(q, req, bio); | |
786 | } | |
787 | @@ -391,7 +317,6 @@ static int ll_merge_requests_fn(struct r | |
788 | struct request *next) | |
789 | { | |
790 | int total_phys_segments; | |
791 | - int total_hw_segments; | |
792 | unsigned int seg_size = | |
793 | req->biotail->bi_seg_back_size + next->bio->bi_seg_front_size; | |
794 | ||
795 | @@ -420,26 +345,11 @@ static int ll_merge_requests_fn(struct r | |
796 | if (total_phys_segments > q->max_phys_segments) | |
797 | return 0; | |
798 | ||
799 | - total_hw_segments = req->nr_hw_segments + next->nr_hw_segments; | |
800 | - if (blk_hw_contig_segment(q, req->biotail, next->bio)) { | |
801 | - int len = req->biotail->bi_hw_back_size + | |
802 | - next->bio->bi_hw_front_size; | |
803 | - /* | |
804 | - * propagate the combined length to the end of the requests | |
805 | - */ | |
806 | - if (req->nr_hw_segments == 1) | |
807 | - req->bio->bi_hw_front_size = len; | |
808 | - if (next->nr_hw_segments == 1) | |
809 | - next->biotail->bi_hw_back_size = len; | |
810 | - total_hw_segments--; | |
811 | - } | |
812 | - | |
813 | - if (total_hw_segments > q->max_hw_segments) | |
814 | + if (total_phys_segments > q->max_hw_segments) | |
815 | return 0; | |
816 | ||
817 | /* Merge is OK... */ | |
818 | req->nr_phys_segments = total_phys_segments; | |
819 | - req->nr_hw_segments = total_hw_segments; | |
820 | return 1; | |
821 | } | |
822 | ||
823 | --- a/block/blk-settings.c | |
824 | +++ b/block/blk-settings.c | |
825 | @@ -144,7 +144,7 @@ EXPORT_SYMBOL(blk_queue_make_request); | |
826 | * Different hardware can have different requirements as to what pages | |
827 | * it can do I/O directly to. A low level driver can call | |
828 | * blk_queue_bounce_limit to have lower memory pages allocated as bounce | |
829 | - * buffers for doing I/O to pages residing above @page. | |
830 | + * buffers for doing I/O to pages residing above @dma_addr. | |
831 | **/ | |
832 | void blk_queue_bounce_limit(struct request_queue *q, u64 dma_addr) | |
833 | { | |
834 | @@ -229,7 +229,7 @@ EXPORT_SYMBOL(blk_queue_max_phys_segment | |
835 | * Description: | |
836 | * Enables a low level driver to set an upper limit on the number of | |
837 | * hw data segments in a request. This would be the largest number of | |
838 | - * address/length pairs the host adapter can actually give as once | |
839 | + * address/length pairs the host adapter can actually give at once | |
840 | * to the device. | |
841 | **/ | |
842 | void blk_queue_max_hw_segments(struct request_queue *q, | |
843 | @@ -410,7 +410,7 @@ EXPORT_SYMBOL(blk_queue_segment_boundary | |
844 | * @mask: alignment mask | |
845 | * | |
846 | * description: | |
847 | - * set required memory and length aligment for direct dma transactions. | |
848 | + * set required memory and length alignment for direct dma transactions. | |
849 | * this is used when buiding direct io requests for the queue. | |
850 | * | |
851 | **/ | |
852 | @@ -426,7 +426,7 @@ EXPORT_SYMBOL(blk_queue_dma_alignment); | |
853 | * @mask: alignment mask | |
854 | * | |
855 | * description: | |
856 | - * update required memory and length aligment for direct dma transactions. | |
857 | + * update required memory and length alignment for direct dma transactions. | |
858 | * If the requested alignment is larger than the current alignment, then | |
859 | * the current queue alignment is updated to the new value, otherwise it | |
860 | * is left alone. The design of this is to allow multiple objects | |
861 | --- /dev/null | |
862 | +++ b/block/blk-softirq.c | |
863 | @@ -0,0 +1,103 @@ | |
864 | +/* | |
865 | + * Functions related to softirq rq completions | |
866 | + */ | |
867 | +#include <linux/kernel.h> | |
868 | +#include <linux/module.h> | |
869 | +#include <linux/init.h> | |
870 | +#include <linux/bio.h> | |
871 | +#include <linux/blkdev.h> | |
872 | +#include <linux/interrupt.h> | |
873 | +#include <linux/cpu.h> | |
874 | + | |
875 | +#include "blk.h" | |
876 | + | |
877 | +static DEFINE_PER_CPU(struct list_head, blk_cpu_done); | |
878 | + | |
879 | +static int __cpuinit blk_cpu_notify(struct notifier_block *self, | |
880 | + unsigned long action, void *hcpu) | |
881 | +{ | |
882 | + /* | |
883 | + * If a CPU goes away, splice its entries to the current CPU | |
884 | + * and trigger a run of the softirq | |
885 | + */ | |
886 | + if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { | |
887 | + int cpu = (unsigned long) hcpu; | |
888 | + | |
889 | + local_irq_disable(); | |
890 | + list_splice_init(&per_cpu(blk_cpu_done, cpu), | |
891 | + &__get_cpu_var(blk_cpu_done)); | |
892 | + raise_softirq_irqoff(BLOCK_SOFTIRQ); | |
893 | + local_irq_enable(); | |
894 | + } | |
895 | + | |
896 | + return NOTIFY_OK; | |
897 | +} | |
898 | + | |
899 | + | |
900 | +static struct notifier_block blk_cpu_notifier __cpuinitdata = { | |
901 | + .notifier_call = blk_cpu_notify, | |
902 | +}; | |
903 | + | |
904 | +/* | |
905 | + * splice the completion data to a local structure and hand off to | |
906 | + * process_completion_queue() to complete the requests | |
907 | + */ | |
908 | +static void blk_done_softirq(struct softirq_action *h) | |
909 | +{ | |
910 | + struct list_head *cpu_list, local_list; | |
911 | + | |
912 | + local_irq_disable(); | |
913 | + cpu_list = &__get_cpu_var(blk_cpu_done); | |
914 | + list_replace_init(cpu_list, &local_list); | |
915 | + local_irq_enable(); | |
916 | + | |
917 | + while (!list_empty(&local_list)) { | |
918 | + struct request *rq; | |
919 | + | |
920 | + rq = list_entry(local_list.next, struct request, donelist); | |
921 | + list_del_init(&rq->donelist); | |
922 | + rq->q->softirq_done_fn(rq); | |
923 | + } | |
924 | +} | |
925 | + | |
926 | +/** | |
927 | + * blk_complete_request - end I/O on a request | |
928 | + * @req: the request being processed | |
929 | + * | |
930 | + * Description: | |
931 | + * Ends all I/O on a request. It does not handle partial completions, | |
932 | + * unless the driver actually implements this in its completion callback | |
933 | + * through requeueing. The actual completion happens out-of-order, | |
934 | + * through a softirq handler. The user must have registered a completion | |
935 | + * callback through blk_queue_softirq_done(). | |
936 | + **/ | |
937 | + | |
938 | +void blk_complete_request(struct request *req) | |
939 | +{ | |
940 | + struct list_head *cpu_list; | |
941 | + unsigned long flags; | |
942 | + | |
943 | + BUG_ON(!req->q->softirq_done_fn); | |
944 | + | |
945 | + local_irq_save(flags); | |
946 | + | |
947 | + cpu_list = &__get_cpu_var(blk_cpu_done); | |
948 | + list_add_tail(&req->donelist, cpu_list); | |
949 | + raise_softirq_irqoff(BLOCK_SOFTIRQ); | |
950 | + | |
951 | + local_irq_restore(flags); | |
952 | +} | |
953 | +EXPORT_SYMBOL(blk_complete_request); | |
954 | + | |
955 | +int __init blk_softirq_init(void) | |
956 | +{ | |
957 | + int i; | |
958 | + | |
959 | + for_each_possible_cpu(i) | |
960 | + INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i)); | |
961 | + | |
962 | + open_softirq(BLOCK_SOFTIRQ, blk_done_softirq); | |
963 | + register_hotcpu_notifier(&blk_cpu_notifier); | |
964 | + return 0; | |
965 | +} | |
966 | +subsys_initcall(blk_softirq_init); | |
967 | --- a/block/blk-tag.c | |
968 | +++ b/block/blk-tag.c | |
969 | @@ -29,7 +29,7 @@ EXPORT_SYMBOL(blk_queue_find_tag); | |
970 | * __blk_free_tags - release a given set of tag maintenance info | |
971 | * @bqt: the tag map to free | |
972 | * | |
973 | - * Tries to free the specified @bqt@. Returns true if it was | |
974 | + * Tries to free the specified @bqt. Returns true if it was | |
975 | * actually freed and false if there are still references using it | |
976 | */ | |
977 | static int __blk_free_tags(struct blk_queue_tag *bqt) | |
978 | @@ -78,7 +78,7 @@ void __blk_queue_free_tags(struct reques | |
979 | * blk_free_tags - release a given set of tag maintenance info | |
980 | * @bqt: the tag map to free | |
981 | * | |
982 | - * For externally managed @bqt@ frees the map. Callers of this | |
983 | + * For externally managed @bqt frees the map. Callers of this | |
984 | * function must guarantee to have released all the queues that | |
985 | * might have been using this tag map. | |
986 | */ | |
987 | @@ -94,7 +94,7 @@ EXPORT_SYMBOL(blk_free_tags); | |
988 | * @q: the request queue for the device | |
989 | * | |
990 | * Notes: | |
991 | - * This is used to disabled tagged queuing to a device, yet leave | |
992 | + * This is used to disable tagged queuing to a device, yet leave | |
993 | * queue in function. | |
994 | **/ | |
995 | void blk_queue_free_tags(struct request_queue *q) | |
996 | @@ -271,7 +271,7 @@ EXPORT_SYMBOL(blk_queue_resize_tags); | |
997 | * @rq: the request that has completed | |
998 | * | |
999 | * Description: | |
1000 | - * Typically called when end_that_request_first() returns 0, meaning | |
1001 | + * Typically called when end_that_request_first() returns %0, meaning | |
1002 | * all transfers have been done for a request. It's important to call | |
1003 | * this function before end_that_request_last(), as that will put the | |
1004 | * request back on the free list thus corrupting the internal tag list. | |
1005 | --- a/block/cfq-iosched.c | |
1006 | +++ b/block/cfq-iosched.c | |
1007 | @@ -39,6 +39,7 @@ static int cfq_slice_idle = HZ / 125; | |
1008 | #define CFQ_MIN_TT (2) | |
1009 | ||
1010 | #define CFQ_SLICE_SCALE (5) | |
1011 | +#define CFQ_HW_QUEUE_MIN (5) | |
1012 | ||
1013 | #define RQ_CIC(rq) \ | |
1014 | ((struct cfq_io_context *) (rq)->elevator_private) | |
1015 | @@ -86,7 +87,14 @@ struct cfq_data { | |
1016 | ||
1017 | int rq_in_driver; | |
1018 | int sync_flight; | |
1019 | + | |
1020 | + /* | |
1021 | + * queue-depth detection | |
1022 | + */ | |
1023 | + int rq_queued; | |
1024 | int hw_tag; | |
1025 | + int hw_tag_samples; | |
1026 | + int rq_in_driver_peak; | |
1027 | ||
1028 | /* | |
1029 | * idle window management | |
1030 | @@ -654,15 +662,6 @@ static void cfq_activate_request(struct | |
1031 | cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "activate rq, drv=%d", | |
1032 | cfqd->rq_in_driver); | |
1033 | ||
1034 | - /* | |
1035 | - * If the depth is larger 1, it really could be queueing. But lets | |
1036 | - * make the mark a little higher - idling could still be good for | |
1037 | - * low queueing, and a low queueing number could also just indicate | |
1038 | - * a SCSI mid layer like behaviour where limit+1 is often seen. | |
1039 | - */ | |
1040 | - if (!cfqd->hw_tag && cfqd->rq_in_driver > 4) | |
1041 | - cfqd->hw_tag = 1; | |
1042 | - | |
1043 | cfqd->last_position = rq->hard_sector + rq->hard_nr_sectors; | |
1044 | } | |
1045 | ||
1046 | @@ -686,6 +685,7 @@ static void cfq_remove_request(struct re | |
1047 | list_del_init(&rq->queuelist); | |
1048 | cfq_del_rq_rb(rq); | |
1049 | ||
1050 | + cfqq->cfqd->rq_queued--; | |
1051 | if (rq_is_meta(rq)) { | |
1052 | WARN_ON(!cfqq->meta_pending); | |
1053 | cfqq->meta_pending--; | |
1054 | @@ -1833,6 +1833,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, s | |
1055 | { | |
1056 | struct cfq_io_context *cic = RQ_CIC(rq); | |
1057 | ||
1058 | + cfqd->rq_queued++; | |
1059 | if (rq_is_meta(rq)) | |
1060 | cfqq->meta_pending++; | |
1061 | ||
1062 | @@ -1880,6 +1881,31 @@ static void cfq_insert_request(struct re | |
1063 | cfq_rq_enqueued(cfqd, cfqq, rq); | |
1064 | } | |
1065 | ||
1066 | +/* | |
1067 | + * Update hw_tag based on peak queue depth over 50 samples under | |
1068 | + * sufficient load. | |
1069 | + */ | |
1070 | +static void cfq_update_hw_tag(struct cfq_data *cfqd) | |
1071 | +{ | |
1072 | + if (cfqd->rq_in_driver > cfqd->rq_in_driver_peak) | |
1073 | + cfqd->rq_in_driver_peak = cfqd->rq_in_driver; | |
1074 | + | |
1075 | + if (cfqd->rq_queued <= CFQ_HW_QUEUE_MIN && | |
1076 | + cfqd->rq_in_driver <= CFQ_HW_QUEUE_MIN) | |
1077 | + return; | |
1078 | + | |
1079 | + if (cfqd->hw_tag_samples++ < 50) | |
1080 | + return; | |
1081 | + | |
1082 | + if (cfqd->rq_in_driver_peak >= CFQ_HW_QUEUE_MIN) | |
1083 | + cfqd->hw_tag = 1; | |
1084 | + else | |
1085 | + cfqd->hw_tag = 0; | |
1086 | + | |
1087 | + cfqd->hw_tag_samples = 0; | |
1088 | + cfqd->rq_in_driver_peak = 0; | |
1089 | +} | |
1090 | + | |
1091 | static void cfq_completed_request(struct request_queue *q, struct request *rq) | |
1092 | { | |
1093 | struct cfq_queue *cfqq = RQ_CFQQ(rq); | |
1094 | @@ -1890,6 +1916,8 @@ static void cfq_completed_request(struct | |
1095 | now = jiffies; | |
1096 | cfq_log_cfqq(cfqd, cfqq, "complete"); | |
1097 | ||
1098 | + cfq_update_hw_tag(cfqd); | |
1099 | + | |
1100 | WARN_ON(!cfqd->rq_in_driver); | |
1101 | WARN_ON(!cfqq->dispatched); | |
1102 | cfqd->rq_in_driver--; | |
1103 | @@ -2200,6 +2228,7 @@ static void *cfq_init_queue(struct reque | |
1104 | cfqd->cfq_slice[1] = cfq_slice_sync; | |
1105 | cfqd->cfq_slice_async_rq = cfq_slice_async_rq; | |
1106 | cfqd->cfq_slice_idle = cfq_slice_idle; | |
1107 | + cfqd->hw_tag = 1; | |
1108 | ||
1109 | return cfqd; | |
1110 | } | |
1111 | --- a/block/deadline-iosched.c | |
1112 | +++ b/block/deadline-iosched.c | |
1113 | @@ -33,7 +33,7 @@ struct deadline_data { | |
1114 | */ | |
1115 | struct rb_root sort_list[2]; | |
1116 | struct list_head fifo_list[2]; | |
1117 | - | |
1118 | + | |
1119 | /* | |
1120 | * next in sort order. read, write or both are NULL | |
1121 | */ | |
1122 | @@ -53,7 +53,11 @@ struct deadline_data { | |
1123 | ||
1124 | static void deadline_move_request(struct deadline_data *, struct request *); | |
1125 | ||
1126 | -#define RQ_RB_ROOT(dd, rq) (&(dd)->sort_list[rq_data_dir((rq))]) | |
1127 | +static inline struct rb_root * | |
1128 | +deadline_rb_root(struct deadline_data *dd, struct request *rq) | |
1129 | +{ | |
1130 | + return &dd->sort_list[rq_data_dir(rq)]; | |
1131 | +} | |
1132 | ||
1133 | /* | |
1134 | * get the request after `rq' in sector-sorted order | |
1135 | @@ -72,15 +76,11 @@ deadline_latter_request(struct request * | |
1136 | static void | |
1137 | deadline_add_rq_rb(struct deadline_data *dd, struct request *rq) | |
1138 | { | |
1139 | - struct rb_root *root = RQ_RB_ROOT(dd, rq); | |
1140 | + struct rb_root *root = deadline_rb_root(dd, rq); | |
1141 | struct request *__alias; | |
1142 | ||
1143 | -retry: | |
1144 | - __alias = elv_rb_add(root, rq); | |
1145 | - if (unlikely(__alias)) { | |
1146 | + while (unlikely(__alias = elv_rb_add(root, rq))) | |
1147 | deadline_move_request(dd, __alias); | |
1148 | - goto retry; | |
1149 | - } | |
1150 | } | |
1151 | ||
1152 | static inline void | |
1153 | @@ -91,7 +91,7 @@ deadline_del_rq_rb(struct deadline_data | |
1154 | if (dd->next_rq[data_dir] == rq) | |
1155 | dd->next_rq[data_dir] = deadline_latter_request(rq); | |
1156 | ||
1157 | - elv_rb_del(RQ_RB_ROOT(dd, rq), rq); | |
1158 | + elv_rb_del(deadline_rb_root(dd, rq), rq); | |
1159 | } | |
1160 | ||
1161 | /* | |
1162 | @@ -106,7 +106,7 @@ deadline_add_request(struct request_queu | |
1163 | deadline_add_rq_rb(dd, rq); | |
1164 | ||
1165 | /* | |
1166 | - * set expire time (only used for reads) and add to fifo list | |
1167 | + * set expire time and add to fifo list | |
1168 | */ | |
1169 | rq_set_fifo_time(rq, jiffies + dd->fifo_expire[data_dir]); | |
1170 | list_add_tail(&rq->queuelist, &dd->fifo_list[data_dir]); | |
1171 | @@ -162,7 +162,7 @@ static void deadline_merged_request(stru | |
1172 | * if the merge was a front merge, we need to reposition request | |
1173 | */ | |
1174 | if (type == ELEVATOR_FRONT_MERGE) { | |
1175 | - elv_rb_del(RQ_RB_ROOT(dd, req), req); | |
1176 | + elv_rb_del(deadline_rb_root(dd, req), req); | |
1177 | deadline_add_rq_rb(dd, req); | |
1178 | } | |
1179 | } | |
1180 | @@ -212,7 +212,7 @@ deadline_move_request(struct deadline_da | |
1181 | dd->next_rq[WRITE] = NULL; | |
1182 | dd->next_rq[data_dir] = deadline_latter_request(rq); | |
1183 | ||
1184 | - dd->last_sector = rq->sector + rq->nr_sectors; | |
1185 | + dd->last_sector = rq_end_sector(rq); | |
1186 | ||
1187 | /* | |
1188 | * take it off the sort and fifo list, move | |
1189 | @@ -222,7 +222,7 @@ deadline_move_request(struct deadline_da | |
1190 | } | |
1191 | ||
1192 | /* | |
1193 | - * deadline_check_fifo returns 0 if there are no expired reads on the fifo, | |
1194 | + * deadline_check_fifo returns 0 if there are no expired requests on the fifo, | |
1195 | * 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir]) | |
1196 | */ | |
1197 | static inline int deadline_check_fifo(struct deadline_data *dd, int ddir) | |
1198 | @@ -258,17 +258,9 @@ static int deadline_dispatch_requests(st | |
1199 | else | |
1200 | rq = dd->next_rq[READ]; | |
1201 | ||
1202 | - if (rq) { | |
1203 | - /* we have a "next request" */ | |
1204 | - | |
1205 | - if (dd->last_sector != rq->sector) | |
1206 | - /* end the batch on a non sequential request */ | |
1207 | - dd->batching += dd->fifo_batch; | |
1208 | - | |
1209 | - if (dd->batching < dd->fifo_batch) | |
1210 | - /* we are still entitled to batch */ | |
1211 | - goto dispatch_request; | |
1212 | - } | |
1213 | + if (rq && dd->batching < dd->fifo_batch) | |
1214 | + /* we have a next request are still entitled to batch */ | |
1215 | + goto dispatch_request; | |
1216 | ||
1217 | /* | |
1218 | * at this point we are not running a batch. select the appropriate | |
1219 | --- a/block/elevator.c | |
1220 | +++ b/block/elevator.c | |
1221 | @@ -34,8 +34,7 @@ | |
1222 | #include <linux/delay.h> | |
1223 | #include <linux/blktrace_api.h> | |
1224 | #include <linux/hash.h> | |
1225 | - | |
1226 | -#include <asm/uaccess.h> | |
1227 | +#include <linux/uaccess.h> | |
1228 | ||
1229 | static DEFINE_SPINLOCK(elv_list_lock); | |
1230 | static LIST_HEAD(elv_list); | |
1231 | @@ -790,7 +789,6 @@ struct request *elv_next_request(struct | |
1232 | * device can handle | |
1233 | */ | |
1234 | rq->nr_phys_segments++; | |
1235 | - rq->nr_hw_segments++; | |
1236 | } | |
1237 | ||
1238 | if (!q->prep_rq_fn) | |
1239 | @@ -813,7 +811,6 @@ struct request *elv_next_request(struct | |
1240 | * so that we don't add it again | |
1241 | */ | |
1242 | --rq->nr_phys_segments; | |
1243 | - --rq->nr_hw_segments; | |
1244 | } | |
1245 | ||
1246 | rq = NULL; | |
1247 | --- a/block/genhd.c | |
1248 | +++ b/block/genhd.c | |
1249 | @@ -211,10 +211,11 @@ void unlink_gendisk(struct gendisk *disk | |
1250 | ||
1251 | /** | |
1252 | * get_gendisk - get partitioning information for a given device | |
1253 | - * @dev: device to get partitioning information for | |
1254 | + * @devt: device to get partitioning information for | |
1255 | + * @part: returned partition index | |
1256 | * | |
1257 | * This function gets the structure containing partitioning | |
1258 | - * information for the given device @dev. | |
1259 | + * information for the given device @devt. | |
1260 | */ | |
1261 | struct gendisk *get_gendisk(dev_t devt, int *part) | |
1262 | { | |
1263 | --- a/drivers/block/ps3disk.c | |
1264 | +++ b/drivers/block/ps3disk.c | |
1265 | @@ -199,7 +199,8 @@ static void ps3disk_do_request(struct ps | |
1266 | if (blk_fs_request(req)) { | |
1267 | if (ps3disk_submit_request_sg(dev, req)) | |
1268 | break; | |
1269 | - } else if (req->cmd_type == REQ_TYPE_FLUSH) { | |
1270 | + } else if (req->cmd_type == REQ_TYPE_LINUX_BLOCK && | |
1271 | + req->cmd[0] == REQ_LB_OP_FLUSH) { | |
1272 | if (ps3disk_submit_flush_request(dev, req)) | |
1273 | break; | |
1274 | } else { | |
1275 | @@ -257,7 +258,8 @@ static irqreturn_t ps3disk_interrupt(int | |
1276 | return IRQ_HANDLED; | |
1277 | } | |
1278 | ||
1279 | - if (req->cmd_type == REQ_TYPE_FLUSH) { | |
1280 | + if (req->cmd_type == REQ_TYPE_LINUX_BLOCK && | |
1281 | + req->cmd[0] == REQ_LB_OP_FLUSH) { | |
1282 | read = 0; | |
1283 | num_sectors = req->hard_cur_sectors; | |
1284 | op = "flush"; | |
1285 | @@ -405,7 +407,8 @@ static void ps3disk_prepare_flush(struct | |
1286 | ||
1287 | dev_dbg(&dev->sbd.core, "%s:%u\n", __func__, __LINE__); | |
1288 | ||
1289 | - req->cmd_type = REQ_TYPE_FLUSH; | |
1290 | + req->cmd_type = REQ_TYPE_LINUX_BLOCK; | |
1291 | + req->cmd[0] = REQ_LB_OP_FLUSH; | |
1292 | } | |
1293 | ||
1294 | static unsigned long ps3disk_mask; | |
1295 | --- a/drivers/block/virtio_blk.c | |
1296 | +++ b/drivers/block/virtio_blk.c | |
1297 | @@ -84,11 +84,11 @@ static bool do_req(struct request_queue | |
1298 | if (blk_fs_request(vbr->req)) { | |
1299 | vbr->out_hdr.type = 0; | |
1300 | vbr->out_hdr.sector = vbr->req->sector; | |
1301 | - vbr->out_hdr.ioprio = vbr->req->ioprio; | |
1302 | + vbr->out_hdr.ioprio = req_get_ioprio(vbr->req); | |
1303 | } else if (blk_pc_request(vbr->req)) { | |
1304 | vbr->out_hdr.type = VIRTIO_BLK_T_SCSI_CMD; | |
1305 | vbr->out_hdr.sector = 0; | |
1306 | - vbr->out_hdr.ioprio = vbr->req->ioprio; | |
1307 | + vbr->out_hdr.ioprio = req_get_ioprio(vbr->req); | |
1308 | } else { | |
1309 | /* We don't put anything else in the queue. */ | |
1310 | BUG(); | |
1311 | --- a/drivers/md/raid1.c | |
1312 | +++ b/drivers/md/raid1.c | |
1313 | @@ -1303,9 +1303,6 @@ static void sync_request_write(mddev_t * | |
1314 | sbio->bi_size = r1_bio->sectors << 9; | |
1315 | sbio->bi_idx = 0; | |
1316 | sbio->bi_phys_segments = 0; | |
1317 | - sbio->bi_hw_segments = 0; | |
1318 | - sbio->bi_hw_front_size = 0; | |
1319 | - sbio->bi_hw_back_size = 0; | |
1320 | sbio->bi_flags &= ~(BIO_POOL_MASK - 1); | |
1321 | sbio->bi_flags |= 1 << BIO_UPTODATE; | |
1322 | sbio->bi_next = NULL; | |
1323 | @@ -1791,7 +1788,6 @@ static sector_t sync_request(mddev_t *md | |
1324 | bio->bi_vcnt = 0; | |
1325 | bio->bi_idx = 0; | |
1326 | bio->bi_phys_segments = 0; | |
1327 | - bio->bi_hw_segments = 0; | |
1328 | bio->bi_size = 0; | |
1329 | bio->bi_end_io = NULL; | |
1330 | bio->bi_private = NULL; | |
1331 | --- a/drivers/md/raid10.c | |
1332 | +++ b/drivers/md/raid10.c | |
1333 | @@ -1346,9 +1346,6 @@ static void sync_request_write(mddev_t * | |
1334 | tbio->bi_size = r10_bio->sectors << 9; | |
1335 | tbio->bi_idx = 0; | |
1336 | tbio->bi_phys_segments = 0; | |
1337 | - tbio->bi_hw_segments = 0; | |
1338 | - tbio->bi_hw_front_size = 0; | |
1339 | - tbio->bi_hw_back_size = 0; | |
1340 | tbio->bi_flags &= ~(BIO_POOL_MASK - 1); | |
1341 | tbio->bi_flags |= 1 << BIO_UPTODATE; | |
1342 | tbio->bi_next = NULL; | |
1343 | @@ -1948,7 +1945,6 @@ static sector_t sync_request(mddev_t *md | |
1344 | bio->bi_vcnt = 0; | |
1345 | bio->bi_idx = 0; | |
1346 | bio->bi_phys_segments = 0; | |
1347 | - bio->bi_hw_segments = 0; | |
1348 | bio->bi_size = 0; | |
1349 | } | |
1350 | ||
1351 | --- a/drivers/md/raid5.c | |
1352 | +++ b/drivers/md/raid5.c | |
1353 | @@ -101,6 +101,40 @@ | |
1354 | const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); | |
1355 | #endif | |
1356 | ||
1357 | +/* | |
1358 | + * We maintain a biased count of active stripes in the bottom 16 bits of | |
1359 | + * bi_phys_segments, and a count of processed stripes in the upper 16 bits | |
1360 | + */ | |
1361 | +static inline int raid5_bi_phys_segments(struct bio *bio) | |
1362 | +{ | |
1363 | + return bio->bi_phys_segments & 0xffff; | |
1364 | +} | |
1365 | + | |
1366 | +static inline int raid5_bi_hw_segments(struct bio *bio) | |
1367 | +{ | |
1368 | + return (bio->bi_phys_segments >> 16) & 0xffff; | |
1369 | +} | |
1370 | + | |
1371 | +static inline int raid5_dec_bi_phys_segments(struct bio *bio) | |
1372 | +{ | |
1373 | + --bio->bi_phys_segments; | |
1374 | + return raid5_bi_phys_segments(bio); | |
1375 | +} | |
1376 | + | |
1377 | +static inline int raid5_dec_bi_hw_segments(struct bio *bio) | |
1378 | +{ | |
1379 | + unsigned short val = raid5_bi_hw_segments(bio); | |
1380 | + | |
1381 | + --val; | |
1382 | + bio->bi_phys_segments = (val << 16) | raid5_bi_phys_segments(bio); | |
1383 | + return val; | |
1384 | +} | |
1385 | + | |
1386 | +static inline void raid5_set_bi_hw_segments(struct bio *bio, unsigned int cnt) | |
1387 | +{ | |
1388 | + bio->bi_phys_segments = raid5_bi_phys_segments(bio) || (cnt << 16); | |
1389 | +} | |
1390 | + | |
1391 | static inline int raid6_next_disk(int disk, int raid_disks) | |
1392 | { | |
1393 | disk++; | |
1394 | @@ -507,7 +541,7 @@ static void ops_complete_biofill(void *s | |
1395 | while (rbi && rbi->bi_sector < | |
1396 | dev->sector + STRIPE_SECTORS) { | |
1397 | rbi2 = r5_next_bio(rbi, dev->sector); | |
1398 | - if (--rbi->bi_phys_segments == 0) { | |
1399 | + if (!raid5_dec_bi_phys_segments(rbi)) { | |
1400 | rbi->bi_next = return_bi; | |
1401 | return_bi = rbi; | |
1402 | } | |
1403 | @@ -1725,7 +1759,7 @@ static int add_stripe_bio(struct stripe_ | |
1404 | if (*bip) | |
1405 | bi->bi_next = *bip; | |
1406 | *bip = bi; | |
1407 | - bi->bi_phys_segments ++; | |
1408 | + bi->bi_phys_segments++; | |
1409 | spin_unlock_irq(&conf->device_lock); | |
1410 | spin_unlock(&sh->lock); | |
1411 | ||
1412 | @@ -1819,7 +1853,7 @@ handle_failed_stripe(raid5_conf_t *conf, | |
1413 | sh->dev[i].sector + STRIPE_SECTORS) { | |
1414 | struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector); | |
1415 | clear_bit(BIO_UPTODATE, &bi->bi_flags); | |
1416 | - if (--bi->bi_phys_segments == 0) { | |
1417 | + if (!raid5_dec_bi_phys_segments(bi)) { | |
1418 | md_write_end(conf->mddev); | |
1419 | bi->bi_next = *return_bi; | |
1420 | *return_bi = bi; | |
1421 | @@ -1834,7 +1868,7 @@ handle_failed_stripe(raid5_conf_t *conf, | |
1422 | sh->dev[i].sector + STRIPE_SECTORS) { | |
1423 | struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector); | |
1424 | clear_bit(BIO_UPTODATE, &bi->bi_flags); | |
1425 | - if (--bi->bi_phys_segments == 0) { | |
1426 | + if (!raid5_dec_bi_phys_segments(bi)) { | |
1427 | md_write_end(conf->mddev); | |
1428 | bi->bi_next = *return_bi; | |
1429 | *return_bi = bi; | |
1430 | @@ -1858,7 +1892,7 @@ handle_failed_stripe(raid5_conf_t *conf, | |
1431 | struct bio *nextbi = | |
1432 | r5_next_bio(bi, sh->dev[i].sector); | |
1433 | clear_bit(BIO_UPTODATE, &bi->bi_flags); | |
1434 | - if (--bi->bi_phys_segments == 0) { | |
1435 | + if (!raid5_dec_bi_phys_segments(bi)) { | |
1436 | bi->bi_next = *return_bi; | |
1437 | *return_bi = bi; | |
1438 | } | |
1439 | @@ -2033,7 +2067,7 @@ static void handle_stripe_clean_event(ra | |
1440 | while (wbi && wbi->bi_sector < | |
1441 | dev->sector + STRIPE_SECTORS) { | |
1442 | wbi2 = r5_next_bio(wbi, dev->sector); | |
1443 | - if (--wbi->bi_phys_segments == 0) { | |
1444 | + if (!raid5_dec_bi_phys_segments(wbi)) { | |
1445 | md_write_end(conf->mddev); | |
1446 | wbi->bi_next = *return_bi; | |
1447 | *return_bi = wbi; | |
1448 | @@ -2814,7 +2848,7 @@ static bool handle_stripe6(struct stripe | |
1449 | copy_data(0, rbi, dev->page, dev->sector); | |
1450 | rbi2 = r5_next_bio(rbi, dev->sector); | |
1451 | spin_lock_irq(&conf->device_lock); | |
1452 | - if (--rbi->bi_phys_segments == 0) { | |
1453 | + if (!raid5_dec_bi_phys_segments(rbi)) { | |
1454 | rbi->bi_next = return_bi; | |
1455 | return_bi = rbi; | |
1456 | } | |
1457 | @@ -3155,8 +3189,11 @@ static struct bio *remove_bio_from_retry | |
1458 | if(bi) { | |
1459 | conf->retry_read_aligned_list = bi->bi_next; | |
1460 | bi->bi_next = NULL; | |
1461 | + /* | |
1462 | + * this sets the active strip count to 1 and the processed | |
1463 | + * strip count to zero (upper 8 bits) | |
1464 | + */ | |
1465 | bi->bi_phys_segments = 1; /* biased count of active stripes */ | |
1466 | - bi->bi_hw_segments = 0; /* count of processed stripes */ | |
1467 | } | |
1468 | ||
1469 | return bi; | |
1470 | @@ -3206,8 +3243,7 @@ static int bio_fits_rdev(struct bio *bi) | |
1471 | if ((bi->bi_size>>9) > q->max_sectors) | |
1472 | return 0; | |
1473 | blk_recount_segments(q, bi); | |
1474 | - if (bi->bi_phys_segments > q->max_phys_segments || | |
1475 | - bi->bi_hw_segments > q->max_hw_segments) | |
1476 | + if (bi->bi_phys_segments > q->max_phys_segments) | |
1477 | return 0; | |
1478 | ||
1479 | if (q->merge_bvec_fn) | |
1480 | @@ -3469,7 +3505,7 @@ static int make_request(struct request_q | |
1481 | ||
1482 | } | |
1483 | spin_lock_irq(&conf->device_lock); | |
1484 | - remaining = --bi->bi_phys_segments; | |
1485 | + remaining = raid5_dec_bi_phys_segments(bi); | |
1486 | spin_unlock_irq(&conf->device_lock); | |
1487 | if (remaining == 0) { | |
1488 | ||
1489 | @@ -3753,7 +3789,7 @@ static int retry_aligned_read(raid5_con | |
1490 | sector += STRIPE_SECTORS, | |
1491 | scnt++) { | |
1492 | ||
1493 | - if (scnt < raid_bio->bi_hw_segments) | |
1494 | + if (scnt < raid5_bi_hw_segments(raid_bio)) | |
1495 | /* already done this stripe */ | |
1496 | continue; | |
1497 | ||
1498 | @@ -3761,7 +3797,7 @@ static int retry_aligned_read(raid5_con | |
1499 | ||
1500 | if (!sh) { | |
1501 | /* failed to get a stripe - must wait */ | |
1502 | - raid_bio->bi_hw_segments = scnt; | |
1503 | + raid5_set_bi_hw_segments(raid_bio, scnt); | |
1504 | conf->retry_read_aligned = raid_bio; | |
1505 | return handled; | |
1506 | } | |
1507 | @@ -3769,7 +3805,7 @@ static int retry_aligned_read(raid5_con | |
1508 | set_bit(R5_ReadError, &sh->dev[dd_idx].flags); | |
1509 | if (!add_stripe_bio(sh, raid_bio, dd_idx, 0)) { | |
1510 | release_stripe(sh); | |
1511 | - raid_bio->bi_hw_segments = scnt; | |
1512 | + raid5_set_bi_hw_segments(raid_bio, scnt); | |
1513 | conf->retry_read_aligned = raid_bio; | |
1514 | return handled; | |
1515 | } | |
1516 | @@ -3779,7 +3815,7 @@ static int retry_aligned_read(raid5_con | |
1517 | handled++; | |
1518 | } | |
1519 | spin_lock_irq(&conf->device_lock); | |
1520 | - remaining = --raid_bio->bi_phys_segments; | |
1521 | + remaining = raid5_dec_bi_phys_segments(raid_bio); | |
1522 | spin_unlock_irq(&conf->device_lock); | |
1523 | if (remaining == 0) | |
1524 | bio_endio(raid_bio, 0); | |
1525 | --- a/fs/bio.c | |
1526 | +++ b/fs/bio.c | |
1527 | @@ -208,14 +208,6 @@ inline int bio_phys_segments(struct requ | |
1528 | return bio->bi_phys_segments; | |
1529 | } | |
1530 | ||
1531 | -inline int bio_hw_segments(struct request_queue *q, struct bio *bio) | |
1532 | -{ | |
1533 | - if (unlikely(!bio_flagged(bio, BIO_SEG_VALID))) | |
1534 | - blk_recount_segments(q, bio); | |
1535 | - | |
1536 | - return bio->bi_hw_segments; | |
1537 | -} | |
1538 | - | |
1539 | /** | |
1540 | * __bio_clone - clone a bio | |
1541 | * @bio: destination bio | |
1542 | @@ -350,8 +342,7 @@ static int __bio_add_page(struct request | |
1543 | */ | |
1544 | ||
1545 | while (bio->bi_phys_segments >= q->max_phys_segments | |
1546 | - || bio->bi_hw_segments >= q->max_hw_segments | |
1547 | - || BIOVEC_VIRT_OVERSIZE(bio->bi_size)) { | |
1548 | + || bio->bi_phys_segments >= q->max_hw_segments) { | |
1549 | ||
1550 | if (retried_segments) | |
1551 | return 0; | |
1552 | @@ -395,13 +386,11 @@ static int __bio_add_page(struct request | |
1553 | } | |
1554 | ||
1555 | /* If we may be able to merge these biovecs, force a recount */ | |
1556 | - if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec) || | |
1557 | - BIOVEC_VIRT_MERGEABLE(bvec-1, bvec))) | |
1558 | + if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec))) | |
1559 | bio->bi_flags &= ~(1 << BIO_SEG_VALID); | |
1560 | ||
1561 | bio->bi_vcnt++; | |
1562 | bio->bi_phys_segments++; | |
1563 | - bio->bi_hw_segments++; | |
1564 | done: | |
1565 | bio->bi_size += len; | |
1566 | return len; | |
1567 | @@ -1393,7 +1382,6 @@ EXPORT_SYMBOL(bio_init); | |
1568 | EXPORT_SYMBOL(__bio_clone); | |
1569 | EXPORT_SYMBOL(bio_clone); | |
1570 | EXPORT_SYMBOL(bio_phys_segments); | |
1571 | -EXPORT_SYMBOL(bio_hw_segments); | |
1572 | EXPORT_SYMBOL(bio_add_page); | |
1573 | EXPORT_SYMBOL(bio_add_pc_page); | |
1574 | EXPORT_SYMBOL(bio_get_nr_vecs); | |
1575 | --- a/include/linux/bio.h | |
1576 | +++ b/include/linux/bio.h | |
1577 | @@ -26,21 +26,8 @@ | |
1578 | ||
1579 | #ifdef CONFIG_BLOCK | |
1580 | ||
1581 | -/* Platforms may set this to teach the BIO layer about IOMMU hardware. */ | |
1582 | #include <asm/io.h> | |
1583 | ||
1584 | -#if defined(BIO_VMERGE_MAX_SIZE) && defined(BIO_VMERGE_BOUNDARY) | |
1585 | -#define BIOVEC_VIRT_START_SIZE(x) (bvec_to_phys(x) & (BIO_VMERGE_BOUNDARY - 1)) | |
1586 | -#define BIOVEC_VIRT_OVERSIZE(x) ((x) > BIO_VMERGE_MAX_SIZE) | |
1587 | -#else | |
1588 | -#define BIOVEC_VIRT_START_SIZE(x) 0 | |
1589 | -#define BIOVEC_VIRT_OVERSIZE(x) 0 | |
1590 | -#endif | |
1591 | - | |
1592 | -#ifndef BIO_VMERGE_BOUNDARY | |
1593 | -#define BIO_VMERGE_BOUNDARY 0 | |
1594 | -#endif | |
1595 | - | |
1596 | #define BIO_DEBUG | |
1597 | ||
1598 | #ifdef BIO_DEBUG | |
1599 | @@ -88,12 +75,7 @@ struct bio { | |
1600 | /* Number of segments in this BIO after | |
1601 | * physical address coalescing is performed. | |
1602 | */ | |
1603 | - unsigned short bi_phys_segments; | |
1604 | - | |
1605 | - /* Number of segments after physical and DMA remapping | |
1606 | - * hardware coalescing is performed. | |
1607 | - */ | |
1608 | - unsigned short bi_hw_segments; | |
1609 | + unsigned int bi_phys_segments; | |
1610 | ||
1611 | unsigned int bi_size; /* residual I/O count */ | |
1612 | ||
1613 | @@ -104,14 +86,6 @@ struct bio { | |
1614 | unsigned int bi_seg_front_size; | |
1615 | unsigned int bi_seg_back_size; | |
1616 | ||
1617 | - /* | |
1618 | - * To keep track of the max hw size, we account for the | |
1619 | - * sizes of the first and last virtually mergeable segments | |
1620 | - * in this bio | |
1621 | - */ | |
1622 | - unsigned int bi_hw_front_size; | |
1623 | - unsigned int bi_hw_back_size; | |
1624 | - | |
1625 | unsigned int bi_max_vecs; /* max bvl_vecs we can hold */ | |
1626 | ||
1627 | struct bio_vec *bi_io_vec; /* the actual vec list */ | |
1628 | @@ -133,7 +107,7 @@ struct bio { | |
1629 | #define BIO_UPTODATE 0 /* ok after I/O completion */ | |
1630 | #define BIO_RW_BLOCK 1 /* RW_AHEAD set, and read/write would block */ | |
1631 | #define BIO_EOF 2 /* out-out-bounds error */ | |
1632 | -#define BIO_SEG_VALID 3 /* nr_hw_seg valid */ | |
1633 | +#define BIO_SEG_VALID 3 /* bi_phys_segments valid */ | |
1634 | #define BIO_CLONED 4 /* doesn't own data */ | |
1635 | #define BIO_BOUNCED 5 /* bio is a bounce bio */ | |
1636 | #define BIO_USER_MAPPED 6 /* contains user pages */ | |
1637 | @@ -247,8 +221,6 @@ static inline void *bio_data(struct bio | |
1638 | ((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2))) | |
1639 | #endif | |
1640 | ||
1641 | -#define BIOVEC_VIRT_MERGEABLE(vec1, vec2) \ | |
1642 | - ((((bvec_to_phys((vec1)) + (vec1)->bv_len) | bvec_to_phys((vec2))) & (BIO_VMERGE_BOUNDARY - 1)) == 0) | |
1643 | #define __BIO_SEG_BOUNDARY(addr1, addr2, mask) \ | |
1644 | (((addr1) | (mask)) == (((addr2) - 1) | (mask))) | |
1645 | #define BIOVEC_SEG_BOUNDARY(q, b1, b2) \ | |
1646 | @@ -346,7 +318,6 @@ extern void bio_free(struct bio *, struc | |
1647 | extern void bio_endio(struct bio *, int); | |
1648 | struct request_queue; | |
1649 | extern int bio_phys_segments(struct request_queue *, struct bio *); | |
1650 | -extern int bio_hw_segments(struct request_queue *, struct bio *); | |
1651 | ||
1652 | extern void __bio_clone(struct bio *, struct bio *); | |
1653 | extern struct bio *bio_clone(struct bio *, gfp_t); | |
1654 | --- a/include/linux/blkdev.h | |
1655 | +++ b/include/linux/blkdev.h | |
1656 | @@ -54,7 +54,6 @@ enum rq_cmd_type_bits { | |
1657 | REQ_TYPE_PM_SUSPEND, /* suspend request */ | |
1658 | REQ_TYPE_PM_RESUME, /* resume request */ | |
1659 | REQ_TYPE_PM_SHUTDOWN, /* shutdown request */ | |
1660 | - REQ_TYPE_FLUSH, /* flush request */ | |
1661 | REQ_TYPE_SPECIAL, /* driver defined type */ | |
1662 | REQ_TYPE_LINUX_BLOCK, /* generic block layer message */ | |
1663 | /* | |
1664 | @@ -76,11 +75,8 @@ enum rq_cmd_type_bits { | |
1665 | * | |
1666 | */ | |
1667 | enum { | |
1668 | - /* | |
1669 | - * just examples for now | |
1670 | - */ | |
1671 | REQ_LB_OP_EJECT = 0x40, /* eject request */ | |
1672 | - REQ_LB_OP_FLUSH = 0x41, /* flush device */ | |
1673 | + REQ_LB_OP_FLUSH = 0x41, /* flush request */ | |
1674 | REQ_LB_OP_DISCARD = 0x42, /* discard sectors */ | |
1675 | }; | |
1676 | ||
1677 | @@ -193,13 +189,6 @@ struct request { | |
1678 | */ | |
1679 | unsigned short nr_phys_segments; | |
1680 | ||
1681 | - /* Number of scatter-gather addr+len pairs after | |
1682 | - * physical and DMA remapping hardware coalescing is performed. | |
1683 | - * This is the number of scatter-gather entries the driver | |
1684 | - * will actually have to deal with after DMA mapping is done. | |
1685 | - */ | |
1686 | - unsigned short nr_hw_segments; | |
1687 | - | |
1688 | unsigned short ioprio; | |
1689 | ||
1690 | void *special; | |
1691 | @@ -236,6 +225,11 @@ struct request { | |
1692 | struct request *next_rq; | |
1693 | }; | |
1694 | ||
1695 | +static inline unsigned short req_get_ioprio(struct request *req) | |
1696 | +{ | |
1697 | + return req->ioprio; | |
1698 | +} | |
1699 | + | |
1700 | /* | |
1701 | * State information carried for REQ_TYPE_PM_SUSPEND and REQ_TYPE_PM_RESUME | |
1702 | * requests. Some step values could eventually be made generic. |