]> git.ipfire.org Git - people/teissler/ipfire-2.x.git/blob - src/patches/suse-2.6.27.25/patches.fixes/block-rq-affinity
Updated xen patches taken from suse.
[people/teissler/ipfire-2.x.git] / src / patches / suse-2.6.27.25 / patches.fixes / block-rq-affinity
1 From: Jens Axboe <jens.axboe@oracle.com>
2 Subject: Implement rq affinity
3 Patch-Mainline: 2.6.28
4
5 This is a combined patchset from linux-2.6.git. Commit IDs:
6 700e1be34289bde0359c15d6507d4cf90e5a5a7d
7 9f6dd15ebf6591fb2aff8aa774b1b9f4f8d8535d
8 962b69a665ed7e8aa3d8b9b9b318f9133501f866
9
10 Signed-off-by: Hannes Reinecke <hare@suse.de>
11
12 ---
13 block/as-iosched.c | 6 +-
14 block/blk-core.c | 54 ++++++++++----------
15 block/blk-merge.c | 2
16 block/blk-settings.c | 2
17 block/blk-softirq.c | 126 +++++++++++++++++++++++++++++++++++------------
18 block/blk-sysfs.c | 31 +++++++++++
19 block/blk.h | 12 ++++
20 block/cfq-iosched.c | 2
21 fs/bio.c | 1
22 include/linux/bio.h | 11 ++++
23 include/linux/blkdev.h | 8 ++
24 include/linux/elevator.h | 8 +-
25 12 files changed, 196 insertions(+), 67 deletions(-)
26
27 --- a/block/as-iosched.c
28 +++ b/block/as-iosched.c
29 @@ -462,7 +462,7 @@ static void as_antic_stop(struct as_data
30 del_timer(&ad->antic_timer);
31 ad->antic_status = ANTIC_FINISHED;
32 /* see as_work_handler */
33 - kblockd_schedule_work(&ad->antic_work);
34 + kblockd_schedule_work(ad->q, &ad->antic_work);
35 }
36 }
37
38 @@ -483,7 +483,7 @@ static void as_antic_timeout(unsigned lo
39 aic = ad->io_context->aic;
40
41 ad->antic_status = ANTIC_FINISHED;
42 - kblockd_schedule_work(&ad->antic_work);
43 + kblockd_schedule_work(q, &ad->antic_work);
44
45 if (aic->ttime_samples == 0) {
46 /* process anticipated on has exited or timed out*/
47 @@ -844,7 +844,7 @@ static void as_completed_request(struct
48 if (ad->changed_batch && ad->nr_dispatched == 1) {
49 ad->current_batch_expires = jiffies +
50 ad->batch_expire[ad->batch_data_dir];
51 - kblockd_schedule_work(&ad->antic_work);
52 + kblockd_schedule_work(q, &ad->antic_work);
53 ad->changed_batch = 0;
54
55 if (ad->batch_data_dir == REQ_SYNC)
56 --- a/block/blk-core.c
57 +++ b/block/blk-core.c
58 @@ -109,7 +109,7 @@ void blk_rq_init(struct request_queue *q
59 memset(rq, 0, sizeof(*rq));
60
61 INIT_LIST_HEAD(&rq->queuelist);
62 - INIT_LIST_HEAD(&rq->donelist);
63 + rq->cpu = -1;
64 rq->q = q;
65 rq->sector = rq->hard_sector = (sector_t) -1;
66 INIT_HLIST_NODE(&rq->hash);
67 @@ -304,7 +304,7 @@ void blk_unplug_timeout(unsigned long da
68 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL,
69 q->rq.count[READ] + q->rq.count[WRITE]);
70
71 - kblockd_schedule_work(&q->unplug_work);
72 + kblockd_schedule_work(q, &q->unplug_work);
73 }
74
75 void blk_unplug(struct request_queue *q)
76 @@ -321,6 +321,21 @@ void blk_unplug(struct request_queue *q)
77 }
78 EXPORT_SYMBOL(blk_unplug);
79
80 +static void blk_invoke_request_fn(struct request_queue *q)
81 +{
82 + /*
83 + * one level of recursion is ok and is much faster than kicking
84 + * the unplug handling
85 + */
86 + if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
87 + q->request_fn(q);
88 + queue_flag_clear(QUEUE_FLAG_REENTER, q);
89 + } else {
90 + queue_flag_set(QUEUE_FLAG_PLUGGED, q);
91 + kblockd_schedule_work(q, &q->unplug_work);
92 + }
93 +}
94 +
95 /**
96 * blk_start_queue - restart a previously stopped queue
97 * @q: The &struct request_queue in question
98 @@ -335,18 +350,7 @@ void blk_start_queue(struct request_queu
99 WARN_ON(!irqs_disabled());
100
101 queue_flag_clear(QUEUE_FLAG_STOPPED, q);
102 -
103 - /*
104 - * one level of recursion is ok and is much faster than kicking
105 - * the unplug handling
106 - */
107 - if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
108 - q->request_fn(q);
109 - queue_flag_clear(QUEUE_FLAG_REENTER, q);
110 - } else {
111 - blk_plug_device(q);
112 - kblockd_schedule_work(&q->unplug_work);
113 - }
114 + blk_invoke_request_fn(q);
115 }
116 EXPORT_SYMBOL(blk_start_queue);
117
118 @@ -404,15 +408,8 @@ void __blk_run_queue(struct request_queu
119 * Only recurse once to avoid overrunning the stack, let the unplug
120 * handling reinvoke the handler shortly if we already got there.
121 */
122 - if (!elv_queue_empty(q)) {
123 - if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
124 - q->request_fn(q);
125 - queue_flag_clear(QUEUE_FLAG_REENTER, q);
126 - } else {
127 - blk_plug_device(q);
128 - kblockd_schedule_work(&q->unplug_work);
129 - }
130 - }
131 + if (!elv_queue_empty(q))
132 + blk_invoke_request_fn(q);
133 }
134 EXPORT_SYMBOL(__blk_run_queue);
135
136 @@ -1062,6 +1059,7 @@ EXPORT_SYMBOL(blk_put_request);
137
138 void init_request_from_bio(struct request *req, struct bio *bio)
139 {
140 + req->cpu = bio->bi_comp_cpu;
141 req->cmd_type = REQ_TYPE_FS;
142
143 /*
144 @@ -1142,6 +1140,8 @@ static int __make_request(struct request
145 req->biotail = bio;
146 req->nr_sectors = req->hard_nr_sectors += nr_sectors;
147 req->ioprio = ioprio_best(req->ioprio, prio);
148 + if (!blk_rq_cpu_valid(req))
149 + req->cpu = bio->bi_comp_cpu;
150 drive_stat_acct(req, 0);
151 if (!attempt_back_merge(q, req))
152 elv_merged_request(q, req, el_ret);
153 @@ -1169,6 +1169,8 @@ static int __make_request(struct request
154 req->sector = req->hard_sector = bio->bi_sector;
155 req->nr_sectors = req->hard_nr_sectors += nr_sectors;
156 req->ioprio = ioprio_best(req->ioprio, prio);
157 + if (!blk_rq_cpu_valid(req))
158 + req->cpu = bio->bi_comp_cpu;
159 drive_stat_acct(req, 0);
160 if (!attempt_front_merge(q, req))
161 elv_merged_request(q, req, el_ret);
162 @@ -1204,13 +1206,15 @@ get_rq:
163 init_request_from_bio(req, bio);
164
165 spin_lock_irq(q->queue_lock);
166 + if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||
167 + bio_flagged(bio, BIO_CPU_AFFINE))
168 + req->cpu = blk_cpu_to_group(smp_processor_id());
169 if (elv_queue_empty(q))
170 blk_plug_device(q);
171 add_request(q, req);
172 out:
173 if (sync)
174 __generic_unplug_device(q);
175 -
176 spin_unlock_irq(q->queue_lock);
177 return 0;
178
179 @@ -1958,7 +1962,7 @@ void blk_rq_bio_prep(struct request_queu
180 rq->rq_disk = bio->bi_bdev->bd_disk;
181 }
182
183 -int kblockd_schedule_work(struct work_struct *work)
184 +int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)
185 {
186 return queue_work(kblockd_workqueue, work);
187 }
188 --- a/block/blk.h
189 +++ b/block/blk.h
190 @@ -59,4 +59,16 @@ static inline int queue_congestion_off_t
191
192 #endif /* BLK_DEV_INTEGRITY */
193
194 +static inline int blk_cpu_to_group(int cpu)
195 +{
196 +#ifdef CONFIG_SCHED_MC
197 + cpumask_t mask = cpu_coregroup_map(cpu);
198 + return first_cpu(mask);
199 +#elif defined(CONFIG_SCHED_SMT)
200 + return first_cpu(per_cpu(cpu_sibling_map, cpu));
201 +#else
202 + return cpu;
203 +#endif
204 +}
205 +
206 #endif
207 --- a/block/blk-merge.c
208 +++ b/block/blk-merge.c
209 @@ -413,6 +413,8 @@ static int attempt_merge(struct request_
210 }
211
212 req->ioprio = ioprio_best(req->ioprio, next->ioprio);
213 + if (blk_rq_cpu_valid(next))
214 + req->cpu = next->cpu;
215
216 __blk_put_request(q, next);
217 return 1;
218 --- a/block/blk-settings.c
219 +++ b/block/blk-settings.c
220 @@ -443,7 +443,7 @@ void blk_queue_update_dma_alignment(stru
221 }
222 EXPORT_SYMBOL(blk_queue_update_dma_alignment);
223
224 -static int __init blk_settings_init(void)
225 +int __init blk_settings_init(void)
226 {
227 blk_max_low_pfn = max_low_pfn - 1;
228 blk_max_pfn = max_pfn - 1;
229 --- a/block/blk-softirq.c
230 +++ b/block/blk-softirq.c
231 @@ -13,6 +13,70 @@
232
233 static DEFINE_PER_CPU(struct list_head, blk_cpu_done);
234
235 +/*
236 + * Softirq action handler - move entries to local list and loop over them
237 + * while passing them to the queue registered handler.
238 + */
239 +static void blk_done_softirq(struct softirq_action *h)
240 +{
241 + struct list_head *cpu_list, local_list;
242 +
243 + local_irq_disable();
244 + cpu_list = &__get_cpu_var(blk_cpu_done);
245 + list_replace_init(cpu_list, &local_list);
246 + local_irq_enable();
247 +
248 + while (!list_empty(&local_list)) {
249 + struct request *rq;
250 +
251 + rq = list_entry(local_list.next, struct request, csd.list);
252 + list_del_init(&rq->csd.list);
253 + rq->q->softirq_done_fn(rq);
254 + }
255 +}
256 +
257 +#if defined(CONFIG_SMP) && defined(CONFIG_USE_GENERIC_SMP_HELPERS)
258 +static void trigger_softirq(void *data)
259 +{
260 + struct request *rq = data;
261 + unsigned long flags;
262 + struct list_head *list;
263 +
264 + local_irq_save(flags);
265 + list = &__get_cpu_var(blk_cpu_done);
266 + list_add_tail(&rq->csd.list, list);
267 +
268 + if (list->next == &rq->csd.list)
269 + raise_softirq_irqoff(BLOCK_SOFTIRQ);
270 +
271 + local_irq_restore(flags);
272 +}
273 +
274 +/*
275 + * Setup and invoke a run of 'trigger_softirq' on the given cpu.
276 + */
277 +static int raise_blk_irq(int cpu, struct request *rq)
278 +{
279 + if (cpu_online(cpu)) {
280 + struct call_single_data *data = &rq->csd;
281 +
282 + data->func = trigger_softirq;
283 + data->info = rq;
284 + data->flags = 0;
285 +
286 + __smp_call_function_single(cpu, data);
287 + return 0;
288 + }
289 +
290 + return 1;
291 +}
292 +#else /* CONFIG_SMP && CONFIG_USE_GENERIC_SMP_HELPERS */
293 +static int raise_blk_irq(int cpu, struct request *rq)
294 +{
295 + return 1;
296 +}
297 +#endif
298 +
299 static int __cpuinit blk_cpu_notify(struct notifier_block *self,
300 unsigned long action, void *hcpu)
301 {
302 @@ -33,33 +97,10 @@ static int __cpuinit blk_cpu_notify(stru
303 return NOTIFY_OK;
304 }
305
306 -
307 -static struct notifier_block blk_cpu_notifier __cpuinitdata = {
308 +static struct notifier_block __cpuinitdata blk_cpu_notifier = {
309 .notifier_call = blk_cpu_notify,
310 };
311
312 -/*
313 - * splice the completion data to a local structure and hand off to
314 - * process_completion_queue() to complete the requests
315 - */
316 -static void blk_done_softirq(struct softirq_action *h)
317 -{
318 - struct list_head *cpu_list, local_list;
319 -
320 - local_irq_disable();
321 - cpu_list = &__get_cpu_var(blk_cpu_done);
322 - list_replace_init(cpu_list, &local_list);
323 - local_irq_enable();
324 -
325 - while (!list_empty(&local_list)) {
326 - struct request *rq;
327 -
328 - rq = list_entry(local_list.next, struct request, donelist);
329 - list_del_init(&rq->donelist);
330 - rq->q->softirq_done_fn(rq);
331 - }
332 -}
333 -
334 /**
335 * blk_complete_request - end I/O on a request
336 * @req: the request being processed
337 @@ -71,25 +112,48 @@ static void blk_done_softirq(struct soft
338 * through a softirq handler. The user must have registered a completion
339 * callback through blk_queue_softirq_done().
340 **/
341 -
342 void blk_complete_request(struct request *req)
343 {
344 - struct list_head *cpu_list;
345 + struct request_queue *q = req->q;
346 unsigned long flags;
347 + int ccpu, cpu, group_cpu;
348
349 - BUG_ON(!req->q->softirq_done_fn);
350 + BUG_ON(!q->softirq_done_fn);
351
352 local_irq_save(flags);
353 + cpu = smp_processor_id();
354 + group_cpu = blk_cpu_to_group(cpu);
355
356 - cpu_list = &__get_cpu_var(blk_cpu_done);
357 - list_add_tail(&req->donelist, cpu_list);
358 - raise_softirq_irqoff(BLOCK_SOFTIRQ);
359 + /*
360 + * Select completion CPU
361 + */
362 + if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && req->cpu != -1)
363 + ccpu = req->cpu;
364 + else
365 + ccpu = cpu;
366 +
367 + if (ccpu == cpu || ccpu == group_cpu) {
368 + struct list_head *list;
369 +do_local:
370 + list = &__get_cpu_var(blk_cpu_done);
371 + list_add_tail(&req->csd.list, list);
372 +
373 + /*
374 + * if the list only contains our just added request,
375 + * signal a raise of the softirq. If there are already
376 + * entries there, someone already raised the irq but it
377 + * hasn't run yet.
378 + */
379 + if (list->next == &req->csd.list)
380 + raise_softirq_irqoff(BLOCK_SOFTIRQ);
381 + } else if (raise_blk_irq(ccpu, req))
382 + goto do_local;
383
384 local_irq_restore(flags);
385 }
386 EXPORT_SYMBOL(blk_complete_request);
387
388 -int __init blk_softirq_init(void)
389 +__init int blk_softirq_init(void)
390 {
391 int i;
392
393 --- a/block/blk-sysfs.c
394 +++ b/block/blk-sysfs.c
395 @@ -156,6 +156,30 @@ static ssize_t queue_nomerges_store(stru
396 return ret;
397 }
398
399 +static ssize_t queue_rq_affinity_show(struct request_queue *q, char *page)
400 +{
401 + unsigned int set = test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags);
402 +
403 + return queue_var_show(set != 0, page);
404 +}
405 +
406 +static ssize_t
407 +queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
408 +{
409 + ssize_t ret = -EINVAL;
410 +#if defined(CONFIG_USE_GENERIC_SMP_HELPERS)
411 + unsigned long val;
412 +
413 + ret = queue_var_store(&val, page, count);
414 + spin_lock_irq(q->queue_lock);
415 + if (val)
416 + queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
417 + else
418 + queue_flag_clear(QUEUE_FLAG_SAME_COMP, q);
419 + spin_unlock_irq(q->queue_lock);
420 +#endif
421 + return ret;
422 +}
423
424 static struct queue_sysfs_entry queue_requests_entry = {
425 .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
426 @@ -197,6 +221,12 @@ static struct queue_sysfs_entry queue_no
427 .store = queue_nomerges_store,
428 };
429
430 +static struct queue_sysfs_entry queue_rq_affinity_entry = {
431 + .attr = {.name = "rq_affinity", .mode = S_IRUGO | S_IWUSR },
432 + .show = queue_rq_affinity_show,
433 + .store = queue_rq_affinity_store,
434 +};
435 +
436 static struct attribute *default_attrs[] = {
437 &queue_requests_entry.attr,
438 &queue_ra_entry.attr,
439 @@ -205,6 +235,7 @@ static struct attribute *default_attrs[]
440 &queue_iosched_entry.attr,
441 &queue_hw_sector_size_entry.attr,
442 &queue_nomerges_entry.attr,
443 + &queue_rq_affinity_entry.attr,
444 NULL,
445 };
446
447 --- a/block/cfq-iosched.c
448 +++ b/block/cfq-iosched.c
449 @@ -252,7 +252,7 @@ static inline void cfq_schedule_dispatch
450 {
451 if (cfqd->busy_queues) {
452 cfq_log(cfqd, "schedule dispatch");
453 - kblockd_schedule_work(&cfqd->unplug_work);
454 + kblockd_schedule_work(cfqd->queue, &cfqd->unplug_work);
455 }
456 }
457
458 --- a/fs/bio.c
459 +++ b/fs/bio.c
460 @@ -111,6 +111,7 @@ void bio_init(struct bio *bio)
461 {
462 memset(bio, 0, sizeof(*bio));
463 bio->bi_flags = 1 << BIO_UPTODATE;
464 + bio->bi_comp_cpu = -1;
465 atomic_set(&bio->bi_cnt, 1);
466 }
467
468 --- a/include/linux/bio.h
469 +++ b/include/linux/bio.h
470 @@ -88,6 +88,8 @@ struct bio {
471
472 unsigned int bi_max_vecs; /* max bvl_vecs we can hold */
473
474 + unsigned int bi_comp_cpu; /* completion CPU */
475 +
476 struct bio_vec *bi_io_vec; /* the actual vec list */
477
478 bio_end_io_t *bi_end_io;
479 @@ -112,6 +114,7 @@ struct bio {
480 #define BIO_BOUNCED 5 /* bio is a bounce bio */
481 #define BIO_USER_MAPPED 6 /* contains user pages */
482 #define BIO_EOPNOTSUPP 7 /* not supported */
483 +#define BIO_CPU_AFFINE 8 /* complete bio on same CPU as submitted */
484 #define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag)))
485
486 /*
487 @@ -350,6 +353,14 @@ extern struct bio_vec *bvec_alloc_bs(gfp
488 extern unsigned int bvec_nr_vecs(unsigned short idx);
489
490 /*
491 + * Allow queuer to specify a completion CPU for this bio
492 + */
493 +static inline void bio_set_completion_cpu(struct bio *bio, unsigned int cpu)
494 +{
495 + bio->bi_comp_cpu = cpu;
496 +}
497 +
498 +/*
499 * bio_set is used to allow other portions of the IO system to
500 * allocate their own private memory pools for bio and iovec structures.
501 * These memory pools in turn all allocate from the bio_slab
502 --- a/include/linux/blkdev.h
503 +++ b/include/linux/blkdev.h
504 @@ -17,6 +17,7 @@
505 #include <linux/module.h>
506 #include <linux/stringify.h>
507 #include <linux/bsg.h>
508 +#include <linux/smp.h>
509
510 #include <asm/scatterlist.h>
511
512 @@ -139,7 +140,8 @@ enum rq_flag_bits {
513 */
514 struct request {
515 struct list_head queuelist;
516 - struct list_head donelist;
517 + struct call_single_data csd;
518 + int cpu;
519
520 struct request_queue *q;
521
522 @@ -420,6 +422,7 @@ struct request_queue
523 #define QUEUE_FLAG_ELVSWITCH 8 /* don't use elevator, just do FIFO */
524 #define QUEUE_FLAG_BIDI 9 /* queue supports bidi requests */
525 #define QUEUE_FLAG_NOMERGES 10 /* disable merge attempts */
526 +#define QUEUE_FLAG_SAME_COMP 11 /* force complete on same CPU */
527
528 static inline int queue_is_locked(struct request_queue *q)
529 {
530 @@ -542,6 +545,7 @@ enum {
531 #define blk_pm_request(rq) \
532 (blk_pm_suspend_request(rq) || blk_pm_resume_request(rq))
533
534 +#define blk_rq_cpu_valid(rq) ((rq)->cpu != -1)
535 #define blk_sorted_rq(rq) ((rq)->cmd_flags & REQ_SORTED)
536 #define blk_barrier_rq(rq) ((rq)->cmd_flags & REQ_HARDBARRIER)
537 #define blk_fua_rq(rq) ((rq)->cmd_flags & REQ_FUA)
538 @@ -913,7 +917,7 @@ static inline void put_dev_sector(Sector
539 }
540
541 struct work_struct;
542 -int kblockd_schedule_work(struct work_struct *work);
543 +int kblockd_schedule_work(struct request_queue *q, struct work_struct *work);
544 void kblockd_flush_work(struct work_struct *work);
545
546 #define MODULE_ALIAS_BLOCKDEV(major,minor) \
547 --- a/include/linux/elevator.h
548 +++ b/include/linux/elevator.h
549 @@ -173,15 +173,15 @@ enum {
550 #define rb_entry_rq(node) rb_entry((node), struct request, rb_node)
551
552 /*
553 - * Hack to reuse the donelist list_head as the fifo time holder while
554 + * Hack to reuse the csd.list list_head as the fifo time holder while
555 * the request is in the io scheduler. Saves an unsigned long in rq.
556 */
557 -#define rq_fifo_time(rq) ((unsigned long) (rq)->donelist.next)
558 -#define rq_set_fifo_time(rq,exp) ((rq)->donelist.next = (void *) (exp))
559 +#define rq_fifo_time(rq) ((unsigned long) (rq)->csd.list.next)
560 +#define rq_set_fifo_time(rq,exp) ((rq)->csd.list.next = (void *) (exp))
561 #define rq_entry_fifo(ptr) list_entry((ptr), struct request, queuelist)
562 #define rq_fifo_clear(rq) do { \
563 list_del_init(&(rq)->queuelist); \
564 - INIT_LIST_HEAD(&(rq)->donelist); \
565 + INIT_LIST_HEAD(&(rq)->csd.list); \
566 } while (0)
567
568 /*