]> git.ipfire.org Git - ipfire-2.x.git/blame - src/patches/suse-2.6.27.39/patches.fixes/block-rq-affinity
Fix oinkmaster patch.
[ipfire-2.x.git] / src / patches / suse-2.6.27.39 / patches.fixes / block-rq-affinity
CommitLineData
2cb7cef9
BS
1From: Jens Axboe <jens.axboe@oracle.com>
2Subject: Implement rq affinity
3Patch-Mainline: 2.6.28
4
5This is a combined patchset from linux-2.6.git. Commit IDs:
6700e1be34289bde0359c15d6507d4cf90e5a5a7d
79f6dd15ebf6591fb2aff8aa774b1b9f4f8d8535d
8962b69a665ed7e8aa3d8b9b9b318f9133501f866
9
10Signed-off-by: Hannes Reinecke <hare@suse.de>
11
12---
13 block/as-iosched.c | 6 +-
14 block/blk-core.c | 54 ++++++++++----------
15 block/blk-merge.c | 2
16 block/blk-settings.c | 2
17 block/blk-softirq.c | 126 +++++++++++++++++++++++++++++++++++------------
18 block/blk-sysfs.c | 31 +++++++++++
19 block/blk.h | 12 ++++
20 block/cfq-iosched.c | 2
21 fs/bio.c | 1
22 include/linux/bio.h | 11 ++++
23 include/linux/blkdev.h | 8 ++
24 include/linux/elevator.h | 8 +-
25 12 files changed, 196 insertions(+), 67 deletions(-)
26
27--- a/block/as-iosched.c
28+++ b/block/as-iosched.c
29@@ -462,7 +462,7 @@ static void as_antic_stop(struct as_data
30 del_timer(&ad->antic_timer);
31 ad->antic_status = ANTIC_FINISHED;
32 /* see as_work_handler */
33- kblockd_schedule_work(&ad->antic_work);
34+ kblockd_schedule_work(ad->q, &ad->antic_work);
35 }
36 }
37
38@@ -483,7 +483,7 @@ static void as_antic_timeout(unsigned lo
39 aic = ad->io_context->aic;
40
41 ad->antic_status = ANTIC_FINISHED;
42- kblockd_schedule_work(&ad->antic_work);
43+ kblockd_schedule_work(q, &ad->antic_work);
44
45 if (aic->ttime_samples == 0) {
46 /* process anticipated on has exited or timed out*/
47@@ -844,7 +844,7 @@ static void as_completed_request(struct
48 if (ad->changed_batch && ad->nr_dispatched == 1) {
49 ad->current_batch_expires = jiffies +
50 ad->batch_expire[ad->batch_data_dir];
51- kblockd_schedule_work(&ad->antic_work);
52+ kblockd_schedule_work(q, &ad->antic_work);
53 ad->changed_batch = 0;
54
55 if (ad->batch_data_dir == REQ_SYNC)
56--- a/block/blk-core.c
57+++ b/block/blk-core.c
58@@ -109,7 +109,7 @@ void blk_rq_init(struct request_queue *q
59 memset(rq, 0, sizeof(*rq));
60
61 INIT_LIST_HEAD(&rq->queuelist);
62- INIT_LIST_HEAD(&rq->donelist);
63+ rq->cpu = -1;
64 rq->q = q;
65 rq->sector = rq->hard_sector = (sector_t) -1;
66 INIT_HLIST_NODE(&rq->hash);
67@@ -304,7 +304,7 @@ void blk_unplug_timeout(unsigned long da
68 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL,
69 q->rq.count[READ] + q->rq.count[WRITE]);
70
71- kblockd_schedule_work(&q->unplug_work);
72+ kblockd_schedule_work(q, &q->unplug_work);
73 }
74
75 void blk_unplug(struct request_queue *q)
76@@ -321,6 +321,21 @@ void blk_unplug(struct request_queue *q)
77 }
78 EXPORT_SYMBOL(blk_unplug);
79
80+static void blk_invoke_request_fn(struct request_queue *q)
81+{
82+ /*
83+ * one level of recursion is ok and is much faster than kicking
84+ * the unplug handling
85+ */
86+ if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
87+ q->request_fn(q);
88+ queue_flag_clear(QUEUE_FLAG_REENTER, q);
89+ } else {
90+ queue_flag_set(QUEUE_FLAG_PLUGGED, q);
91+ kblockd_schedule_work(q, &q->unplug_work);
92+ }
93+}
94+
95 /**
96 * blk_start_queue - restart a previously stopped queue
97 * @q: The &struct request_queue in question
98@@ -335,18 +350,7 @@ void blk_start_queue(struct request_queu
99 WARN_ON(!irqs_disabled());
100
101 queue_flag_clear(QUEUE_FLAG_STOPPED, q);
102-
103- /*
104- * one level of recursion is ok and is much faster than kicking
105- * the unplug handling
106- */
107- if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
108- q->request_fn(q);
109- queue_flag_clear(QUEUE_FLAG_REENTER, q);
110- } else {
111- blk_plug_device(q);
112- kblockd_schedule_work(&q->unplug_work);
113- }
114+ blk_invoke_request_fn(q);
115 }
116 EXPORT_SYMBOL(blk_start_queue);
117
118@@ -404,15 +408,8 @@ void __blk_run_queue(struct request_queu
119 * Only recurse once to avoid overrunning the stack, let the unplug
120 * handling reinvoke the handler shortly if we already got there.
121 */
122- if (!elv_queue_empty(q)) {
123- if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
124- q->request_fn(q);
125- queue_flag_clear(QUEUE_FLAG_REENTER, q);
126- } else {
127- blk_plug_device(q);
128- kblockd_schedule_work(&q->unplug_work);
129- }
130- }
131+ if (!elv_queue_empty(q))
132+ blk_invoke_request_fn(q);
133 }
134 EXPORT_SYMBOL(__blk_run_queue);
135
136@@ -1062,6 +1059,7 @@ EXPORT_SYMBOL(blk_put_request);
137
138 void init_request_from_bio(struct request *req, struct bio *bio)
139 {
140+ req->cpu = bio->bi_comp_cpu;
141 req->cmd_type = REQ_TYPE_FS;
142
143 /*
144@@ -1142,6 +1140,8 @@ static int __make_request(struct request
145 req->biotail = bio;
146 req->nr_sectors = req->hard_nr_sectors += nr_sectors;
147 req->ioprio = ioprio_best(req->ioprio, prio);
148+ if (!blk_rq_cpu_valid(req))
149+ req->cpu = bio->bi_comp_cpu;
150 drive_stat_acct(req, 0);
151 if (!attempt_back_merge(q, req))
152 elv_merged_request(q, req, el_ret);
153@@ -1169,6 +1169,8 @@ static int __make_request(struct request
154 req->sector = req->hard_sector = bio->bi_sector;
155 req->nr_sectors = req->hard_nr_sectors += nr_sectors;
156 req->ioprio = ioprio_best(req->ioprio, prio);
157+ if (!blk_rq_cpu_valid(req))
158+ req->cpu = bio->bi_comp_cpu;
159 drive_stat_acct(req, 0);
160 if (!attempt_front_merge(q, req))
161 elv_merged_request(q, req, el_ret);
162@@ -1204,13 +1206,15 @@ get_rq:
163 init_request_from_bio(req, bio);
164
165 spin_lock_irq(q->queue_lock);
166+ if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||
167+ bio_flagged(bio, BIO_CPU_AFFINE))
168+ req->cpu = blk_cpu_to_group(smp_processor_id());
169 if (elv_queue_empty(q))
170 blk_plug_device(q);
171 add_request(q, req);
172 out:
173 if (sync)
174 __generic_unplug_device(q);
175-
176 spin_unlock_irq(q->queue_lock);
177 return 0;
178
179@@ -1958,7 +1962,7 @@ void blk_rq_bio_prep(struct request_queu
180 rq->rq_disk = bio->bi_bdev->bd_disk;
181 }
182
183-int kblockd_schedule_work(struct work_struct *work)
184+int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)
185 {
186 return queue_work(kblockd_workqueue, work);
187 }
188--- a/block/blk.h
189+++ b/block/blk.h
190@@ -59,4 +59,16 @@ static inline int queue_congestion_off_t
191
192 #endif /* BLK_DEV_INTEGRITY */
193
194+static inline int blk_cpu_to_group(int cpu)
195+{
196+#ifdef CONFIG_SCHED_MC
197+ cpumask_t mask = cpu_coregroup_map(cpu);
198+ return first_cpu(mask);
199+#elif defined(CONFIG_SCHED_SMT)
200+ return first_cpu(per_cpu(cpu_sibling_map, cpu));
201+#else
202+ return cpu;
203+#endif
204+}
205+
206 #endif
207--- a/block/blk-merge.c
208+++ b/block/blk-merge.c
209@@ -413,6 +413,8 @@ static int attempt_merge(struct request_
210 }
211
212 req->ioprio = ioprio_best(req->ioprio, next->ioprio);
213+ if (blk_rq_cpu_valid(next))
214+ req->cpu = next->cpu;
215
216 __blk_put_request(q, next);
217 return 1;
218--- a/block/blk-settings.c
219+++ b/block/blk-settings.c
220@@ -443,7 +443,7 @@ void blk_queue_update_dma_alignment(stru
221 }
222 EXPORT_SYMBOL(blk_queue_update_dma_alignment);
223
224-static int __init blk_settings_init(void)
225+int __init blk_settings_init(void)
226 {
227 blk_max_low_pfn = max_low_pfn - 1;
228 blk_max_pfn = max_pfn - 1;
229--- a/block/blk-softirq.c
230+++ b/block/blk-softirq.c
231@@ -13,6 +13,70 @@
232
233 static DEFINE_PER_CPU(struct list_head, blk_cpu_done);
234
235+/*
236+ * Softirq action handler - move entries to local list and loop over them
237+ * while passing them to the queue registered handler.
238+ */
239+static void blk_done_softirq(struct softirq_action *h)
240+{
241+ struct list_head *cpu_list, local_list;
242+
243+ local_irq_disable();
244+ cpu_list = &__get_cpu_var(blk_cpu_done);
245+ list_replace_init(cpu_list, &local_list);
246+ local_irq_enable();
247+
248+ while (!list_empty(&local_list)) {
249+ struct request *rq;
250+
251+ rq = list_entry(local_list.next, struct request, csd.list);
252+ list_del_init(&rq->csd.list);
253+ rq->q->softirq_done_fn(rq);
254+ }
255+}
256+
257+#if defined(CONFIG_SMP) && defined(CONFIG_USE_GENERIC_SMP_HELPERS)
258+static void trigger_softirq(void *data)
259+{
260+ struct request *rq = data;
261+ unsigned long flags;
262+ struct list_head *list;
263+
264+ local_irq_save(flags);
265+ list = &__get_cpu_var(blk_cpu_done);
266+ list_add_tail(&rq->csd.list, list);
267+
268+ if (list->next == &rq->csd.list)
269+ raise_softirq_irqoff(BLOCK_SOFTIRQ);
270+
271+ local_irq_restore(flags);
272+}
273+
274+/*
275+ * Setup and invoke a run of 'trigger_softirq' on the given cpu.
276+ */
277+static int raise_blk_irq(int cpu, struct request *rq)
278+{
279+ if (cpu_online(cpu)) {
280+ struct call_single_data *data = &rq->csd;
281+
282+ data->func = trigger_softirq;
283+ data->info = rq;
284+ data->flags = 0;
285+
286+ __smp_call_function_single(cpu, data);
287+ return 0;
288+ }
289+
290+ return 1;
291+}
292+#else /* CONFIG_SMP && CONFIG_USE_GENERIC_SMP_HELPERS */
293+static int raise_blk_irq(int cpu, struct request *rq)
294+{
295+ return 1;
296+}
297+#endif
298+
299 static int __cpuinit blk_cpu_notify(struct notifier_block *self,
300 unsigned long action, void *hcpu)
301 {
302@@ -33,33 +97,10 @@ static int __cpuinit blk_cpu_notify(stru
303 return NOTIFY_OK;
304 }
305
306-
307-static struct notifier_block blk_cpu_notifier __cpuinitdata = {
308+static struct notifier_block __cpuinitdata blk_cpu_notifier = {
309 .notifier_call = blk_cpu_notify,
310 };
311
312-/*
313- * splice the completion data to a local structure and hand off to
314- * process_completion_queue() to complete the requests
315- */
316-static void blk_done_softirq(struct softirq_action *h)
317-{
318- struct list_head *cpu_list, local_list;
319-
320- local_irq_disable();
321- cpu_list = &__get_cpu_var(blk_cpu_done);
322- list_replace_init(cpu_list, &local_list);
323- local_irq_enable();
324-
325- while (!list_empty(&local_list)) {
326- struct request *rq;
327-
328- rq = list_entry(local_list.next, struct request, donelist);
329- list_del_init(&rq->donelist);
330- rq->q->softirq_done_fn(rq);
331- }
332-}
333-
334 /**
335 * blk_complete_request - end I/O on a request
336 * @req: the request being processed
337@@ -71,25 +112,48 @@ static void blk_done_softirq(struct soft
338 * through a softirq handler. The user must have registered a completion
339 * callback through blk_queue_softirq_done().
340 **/
341-
342 void blk_complete_request(struct request *req)
343 {
344- struct list_head *cpu_list;
345+ struct request_queue *q = req->q;
346 unsigned long flags;
347+ int ccpu, cpu, group_cpu;
348
349- BUG_ON(!req->q->softirq_done_fn);
350+ BUG_ON(!q->softirq_done_fn);
351
352 local_irq_save(flags);
353+ cpu = smp_processor_id();
354+ group_cpu = blk_cpu_to_group(cpu);
355
356- cpu_list = &__get_cpu_var(blk_cpu_done);
357- list_add_tail(&req->donelist, cpu_list);
358- raise_softirq_irqoff(BLOCK_SOFTIRQ);
359+ /*
360+ * Select completion CPU
361+ */
362+ if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && req->cpu != -1)
363+ ccpu = req->cpu;
364+ else
365+ ccpu = cpu;
366+
367+ if (ccpu == cpu || ccpu == group_cpu) {
368+ struct list_head *list;
369+do_local:
370+ list = &__get_cpu_var(blk_cpu_done);
371+ list_add_tail(&req->csd.list, list);
372+
373+ /*
374+ * if the list only contains our just added request,
375+ * signal a raise of the softirq. If there are already
376+ * entries there, someone already raised the irq but it
377+ * hasn't run yet.
378+ */
379+ if (list->next == &req->csd.list)
380+ raise_softirq_irqoff(BLOCK_SOFTIRQ);
381+ } else if (raise_blk_irq(ccpu, req))
382+ goto do_local;
383
384 local_irq_restore(flags);
385 }
386 EXPORT_SYMBOL(blk_complete_request);
387
388-int __init blk_softirq_init(void)
389+__init int blk_softirq_init(void)
390 {
391 int i;
392
393--- a/block/blk-sysfs.c
394+++ b/block/blk-sysfs.c
395@@ -156,6 +156,30 @@ static ssize_t queue_nomerges_store(stru
396 return ret;
397 }
398
399+static ssize_t queue_rq_affinity_show(struct request_queue *q, char *page)
400+{
401+ unsigned int set = test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags);
402+
403+ return queue_var_show(set != 0, page);
404+}
405+
406+static ssize_t
407+queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
408+{
409+ ssize_t ret = -EINVAL;
410+#if defined(CONFIG_USE_GENERIC_SMP_HELPERS)
411+ unsigned long val;
412+
413+ ret = queue_var_store(&val, page, count);
414+ spin_lock_irq(q->queue_lock);
415+ if (val)
416+ queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
417+ else
418+ queue_flag_clear(QUEUE_FLAG_SAME_COMP, q);
419+ spin_unlock_irq(q->queue_lock);
420+#endif
421+ return ret;
422+}
423
424 static struct queue_sysfs_entry queue_requests_entry = {
425 .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
426@@ -197,6 +221,12 @@ static struct queue_sysfs_entry queue_no
427 .store = queue_nomerges_store,
428 };
429
430+static struct queue_sysfs_entry queue_rq_affinity_entry = {
431+ .attr = {.name = "rq_affinity", .mode = S_IRUGO | S_IWUSR },
432+ .show = queue_rq_affinity_show,
433+ .store = queue_rq_affinity_store,
434+};
435+
436 static struct attribute *default_attrs[] = {
437 &queue_requests_entry.attr,
438 &queue_ra_entry.attr,
439@@ -205,6 +235,7 @@ static struct attribute *default_attrs[]
440 &queue_iosched_entry.attr,
441 &queue_hw_sector_size_entry.attr,
442 &queue_nomerges_entry.attr,
443+ &queue_rq_affinity_entry.attr,
444 NULL,
445 };
446
447--- a/block/cfq-iosched.c
448+++ b/block/cfq-iosched.c
449@@ -252,7 +252,7 @@ static inline void cfq_schedule_dispatch
450 {
451 if (cfqd->busy_queues) {
452 cfq_log(cfqd, "schedule dispatch");
453- kblockd_schedule_work(&cfqd->unplug_work);
454+ kblockd_schedule_work(cfqd->queue, &cfqd->unplug_work);
455 }
456 }
457
458--- a/fs/bio.c
459+++ b/fs/bio.c
460@@ -111,6 +111,7 @@ void bio_init(struct bio *bio)
461 {
462 memset(bio, 0, sizeof(*bio));
463 bio->bi_flags = 1 << BIO_UPTODATE;
464+ bio->bi_comp_cpu = -1;
465 atomic_set(&bio->bi_cnt, 1);
466 }
467
468--- a/include/linux/bio.h
469+++ b/include/linux/bio.h
470@@ -88,6 +88,8 @@ struct bio {
471
472 unsigned int bi_max_vecs; /* max bvl_vecs we can hold */
473
474+ unsigned int bi_comp_cpu; /* completion CPU */
475+
476 struct bio_vec *bi_io_vec; /* the actual vec list */
477
478 bio_end_io_t *bi_end_io;
479@@ -112,6 +114,7 @@ struct bio {
480 #define BIO_BOUNCED 5 /* bio is a bounce bio */
481 #define BIO_USER_MAPPED 6 /* contains user pages */
482 #define BIO_EOPNOTSUPP 7 /* not supported */
483+#define BIO_CPU_AFFINE 8 /* complete bio on same CPU as submitted */
484 #define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag)))
485
486 /*
487@@ -350,6 +353,14 @@ extern struct bio_vec *bvec_alloc_bs(gfp
488 extern unsigned int bvec_nr_vecs(unsigned short idx);
489
490 /*
491+ * Allow queuer to specify a completion CPU for this bio
492+ */
493+static inline void bio_set_completion_cpu(struct bio *bio, unsigned int cpu)
494+{
495+ bio->bi_comp_cpu = cpu;
496+}
497+
498+/*
499 * bio_set is used to allow other portions of the IO system to
500 * allocate their own private memory pools for bio and iovec structures.
501 * These memory pools in turn all allocate from the bio_slab
502--- a/include/linux/blkdev.h
503+++ b/include/linux/blkdev.h
504@@ -17,6 +17,7 @@
505 #include <linux/module.h>
506 #include <linux/stringify.h>
507 #include <linux/bsg.h>
508+#include <linux/smp.h>
509
510 #include <asm/scatterlist.h>
511
512@@ -139,7 +140,8 @@ enum rq_flag_bits {
513 */
514 struct request {
515 struct list_head queuelist;
516- struct list_head donelist;
517+ struct call_single_data csd;
518+ int cpu;
519
520 struct request_queue *q;
521
522@@ -420,6 +422,7 @@ struct request_queue
523 #define QUEUE_FLAG_ELVSWITCH 8 /* don't use elevator, just do FIFO */
524 #define QUEUE_FLAG_BIDI 9 /* queue supports bidi requests */
525 #define QUEUE_FLAG_NOMERGES 10 /* disable merge attempts */
526+#define QUEUE_FLAG_SAME_COMP 11 /* force complete on same CPU */
527
528 static inline int queue_is_locked(struct request_queue *q)
529 {
530@@ -542,6 +545,7 @@ enum {
531 #define blk_pm_request(rq) \
532 (blk_pm_suspend_request(rq) || blk_pm_resume_request(rq))
533
534+#define blk_rq_cpu_valid(rq) ((rq)->cpu != -1)
535 #define blk_sorted_rq(rq) ((rq)->cmd_flags & REQ_SORTED)
536 #define blk_barrier_rq(rq) ((rq)->cmd_flags & REQ_HARDBARRIER)
537 #define blk_fua_rq(rq) ((rq)->cmd_flags & REQ_FUA)
538@@ -913,7 +917,7 @@ static inline void put_dev_sector(Sector
539 }
540
541 struct work_struct;
542-int kblockd_schedule_work(struct work_struct *work);
543+int kblockd_schedule_work(struct request_queue *q, struct work_struct *work);
544 void kblockd_flush_work(struct work_struct *work);
545
546 #define MODULE_ALIAS_BLOCKDEV(major,minor) \
547--- a/include/linux/elevator.h
548+++ b/include/linux/elevator.h
549@@ -173,15 +173,15 @@ enum {
550 #define rb_entry_rq(node) rb_entry((node), struct request, rb_node)
551
552 /*
553- * Hack to reuse the donelist list_head as the fifo time holder while
554+ * Hack to reuse the csd.list list_head as the fifo time holder while
555 * the request is in the io scheduler. Saves an unsigned long in rq.
556 */
557-#define rq_fifo_time(rq) ((unsigned long) (rq)->donelist.next)
558-#define rq_set_fifo_time(rq,exp) ((rq)->donelist.next = (void *) (exp))
559+#define rq_fifo_time(rq) ((unsigned long) (rq)->csd.list.next)
560+#define rq_set_fifo_time(rq,exp) ((rq)->csd.list.next = (void *) (exp))
561 #define rq_entry_fifo(ptr) list_entry((ptr), struct request, queuelist)
562 #define rq_fifo_clear(rq) do { \
563 list_del_init(&(rq)->queuelist); \
564- INIT_LIST_HEAD(&(rq)->donelist); \
565+ INIT_LIST_HEAD(&(rq)->csd.list); \
566 } while (0)
567
568 /*