]> git.ipfire.org Git - people/pmueller/ipfire-2.x.git/blame - src/patches/suse-2.6.27.25/patches.suse/dm-mpath-service-time-load-balancing
Updated xen patches taken from suse.
[people/pmueller/ipfire-2.x.git] / src / patches / suse-2.6.27.25 / patches.suse / dm-mpath-service-time-load-balancing
CommitLineData
00e5a55c
BS
1From: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
2Subject: dm-mpath: add service-time oriented dynamic load balancer
3References: FATE#303862,FATE#302108
4
5This patch adds a service time oriented dynamic load balancer,
6dm-service-time.
7
8
9Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
10Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
11Signed-off-by: Hannes Reinecke <hare@suse.de>
12
13Index: linux-2.6.27/drivers/md/Makefile
14===================================================================
15--- linux-2.6.27.orig/drivers/md/Makefile
16+++ linux-2.6.27/drivers/md/Makefile
17@@ -33,7 +33,8 @@ obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o
18 obj-$(CONFIG_DM_CRYPT) += dm-crypt.o
19 obj-$(CONFIG_DM_DELAY) += dm-delay.o
20 obj-$(CONFIG_DM_MULTIPATH) += dm-multipath.o dm-round-robin.o \
21- dm-least-pending.o dm-queue-length.o
22+ dm-least-pending.o dm-queue-length.o \
23+ dm-service-time.o
24 obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o
25 obj-$(CONFIG_DM_MIRROR) += dm-mirror.o dm-regions.o dm-log.o
26 obj-$(CONFIG_DM_RAID45) += dm-raid45.o dm-log.o dm-memcache.o \
27Index: linux-2.6.27/drivers/md/dm-service-time.c
28===================================================================
29--- /dev/null
30+++ linux-2.6.27/drivers/md/dm-service-time.c
31@@ -0,0 +1,312 @@
32+/*
33+ * Copyright (C) 2007-2008 NEC Corporation. All Rights Reserved.
34+ *
35+ * Module Author: Kiyoshi Ueda
36+ *
37+ * This file is released under the GPL.
38+ *
39+ * Throughput oriented path selector.
40+ */
41+
42+#include "dm.h"
43+#include "dm-path-selector.h"
44+
45+#define DM_MSG_PREFIX "multipath service-time"
46+#define ST_MIN_IO 2
47+#define ST_VERSION "0.1.0"
48+
49+struct selector {
50+ struct list_head valid_paths;
51+ struct list_head failed_paths;
52+};
53+
54+struct path_info {
55+ struct list_head list;
56+ struct dm_path *path;
57+ unsigned int repeat_count;
58+
59+ atomic_t in_flight; /* Total size of in-flight I/Os */
60+ size_t perf; /* Recent performance of the path */
61+ sector_t last_sectors; /* Total sectors of the last disk_stat_read */
62+ size_t last_io_ticks; /* io_ticks of the last disk_stat_read */
63+};
64+
65+static struct selector *alloc_selector(void)
66+{
67+ struct selector *s = kzalloc(sizeof(*s), GFP_KERNEL);
68+
69+ if (s) {
70+ INIT_LIST_HEAD(&s->valid_paths);
71+ INIT_LIST_HEAD(&s->failed_paths);
72+ }
73+
74+ return s;
75+}
76+
77+static int st_create(struct path_selector *ps, unsigned argc, char **argv)
78+{
79+ struct selector *s = alloc_selector();
80+
81+ if (!s)
82+ return -ENOMEM;
83+
84+ ps->context = s;
85+ return 0;
86+}
87+
88+static void free_paths(struct list_head *paths)
89+{
90+ struct path_info *pi, *next;
91+
92+ list_for_each_entry_safe(pi, next, paths, list) {
93+ list_del(&pi->list);
94+ pi->path->pscontext = NULL;
95+ kfree(pi);
96+ }
97+}
98+
99+static void st_destroy(struct path_selector *ps)
100+{
101+ struct selector *s = (struct selector *) ps->context;
102+
103+ free_paths(&s->valid_paths);
104+ free_paths(&s->failed_paths);
105+ kfree(s);
106+ ps->context = NULL;
107+}
108+
109+static int st_status(struct path_selector *ps, struct dm_path *path,
110+ status_type_t type, char *result, unsigned int maxlen)
111+{
112+ int sz = 0;
113+ struct path_info *pi;
114+
115+ if (!path)
116+ DMEMIT("0 ");
117+ else {
118+ pi = path->pscontext;
119+
120+ switch (type) {
121+ case STATUSTYPE_INFO:
122+ DMEMIT("if:%08lu pf:%06lu ",
123+ (unsigned long) atomic_read(&pi->in_flight),
124+ pi->perf);
125+ break;
126+ case STATUSTYPE_TABLE:
127+ DMEMIT("%u ", pi->repeat_count);
128+ break;
129+ }
130+ }
131+
132+ return sz;
133+}
134+
135+static int st_add_path(struct path_selector *ps, struct dm_path *path,
136+ int argc, char **argv, char **error)
137+{
138+ struct selector *s = (struct selector *) ps->context;
139+ struct path_info *pi;
140+ unsigned int repeat_count = ST_MIN_IO;
141+ struct gendisk *disk = path->dev->bdev->bd_disk;
142+
143+ if (argc > 1) {
144+ *error = "service-time ps: incorrect number of arguments";
145+ return -EINVAL;
146+ }
147+
148+ /* First path argument is number of I/Os before switching path. */
149+ if ((argc == 1) && (sscanf(argv[0], "%u", &repeat_count) != 1)) {
150+ *error = "service-time ps: invalid repeat count";
151+ return -EINVAL;
152+ }
153+
154+ /* allocate the path */
155+ pi = kmalloc(sizeof(*pi), GFP_KERNEL);
156+ if (!pi) {
157+ *error = "service-time ps: Error allocating path context";
158+ return -ENOMEM;
159+ }
160+
161+ pi->path = path;
162+ pi->repeat_count = repeat_count;
163+
164+ pi->perf = 0;
165+ pi->last_sectors = disk_stat_read(disk, sectors[READ])
166+ + disk_stat_read(disk, sectors[WRITE]);
167+ pi->last_io_ticks = disk_stat_read(disk, io_ticks);
168+ atomic_set(&pi->in_flight, 0);
169+
170+ path->pscontext = pi;
171+
172+ list_add_tail(&pi->list, &s->valid_paths);
173+
174+ return 0;
175+}
176+
177+static void st_fail_path(struct path_selector *ps, struct dm_path *path)
178+{
179+ struct selector *s = (struct selector *) ps->context;
180+ struct path_info *pi = path->pscontext;
181+
182+ list_move(&pi->list, &s->failed_paths);
183+}
184+
185+static int st_reinstate_path(struct path_selector *ps, struct dm_path *path)
186+{
187+ struct selector *s = (struct selector *) ps->context;
188+ struct path_info *pi = path->pscontext;
189+
190+ list_move_tail(&pi->list, &s->valid_paths);
191+
192+ return 0;
193+}
194+
195+static void stats_update(struct path_info *pi)
196+{
197+ sector_t sectors;
198+ size_t io_ticks, tmp;
199+ struct gendisk *disk = pi->path->dev->bdev->bd_disk;
200+
201+ sectors = disk_stat_read(disk, sectors[READ])
202+ + disk_stat_read(disk, sectors[WRITE]);
203+ io_ticks = disk_stat_read(disk, io_ticks);
204+
205+ if ((sectors != pi->last_sectors) && (io_ticks != pi->last_io_ticks)) {
206+ tmp = (sectors - pi->last_sectors) << 9;
207+ do_div(tmp, jiffies_to_msecs((io_ticks - pi->last_io_ticks)));
208+ pi->perf = tmp;
209+
210+ pi->last_sectors = sectors;
211+ pi->last_io_ticks = io_ticks;
212+ }
213+}
214+
215+static int st_compare_load(struct path_info *pi1, struct path_info *pi2,
216+ size_t new_io)
217+{
218+ size_t if1, if2;
219+
220+ if1 = atomic_read(&pi1->in_flight);
221+ if2 = atomic_read(&pi2->in_flight);
222+
223+ /*
224+ * Case 1: No performace data available. Choose less loaded path.
225+ */
226+ if (!pi1->perf || !pi2->perf)
227+ return if1 - if2;
228+
229+ /*
230+ * Case 2: Calculate service time. Choose faster path.
231+ * if ((if1+new_io)/pi1->perf < (if2+new_io)/pi2->perf) pi1.
232+ * if ((if1+new_io)/pi1->perf > (if2+new_io)/pi2->perf) pi2.
233+ * To avoid do_div(), use
234+ * if ((if1+new_io)*pi2->perf < (if2+new_io)*pi1->perf) pi1.
235+ * if ((if1+new_io)*pi2->perf > (if2+new_io)*pi1->perf) pi2.
236+ */
237+ if1 = (if1 + new_io) << 10;
238+ if2 = (if2 + new_io) << 10;
239+ do_div(if1, pi1->perf);
240+ do_div(if2, pi2->perf);
241+
242+ if (if1 != if2)
243+ return if1 - if2;
244+
245+ /*
246+ * Case 3: Service time is equal. Choose faster path.
247+ */
248+ return pi2->perf - pi1->perf;
249+}
250+
251+static struct dm_path *st_select_path(struct path_selector *ps,
252+ unsigned *repeat_count, size_t nr_bytes)
253+{
254+ struct selector *s = (struct selector *) ps->context;
255+ struct path_info *pi = NULL, *best = NULL;
256+
257+ if (list_empty(&s->valid_paths))
258+ return NULL;
259+
260+ /* Change preferred (first in list) path to evenly balance. */
261+ list_move_tail(s->valid_paths.next, &s->valid_paths);
262+
263+ /* Update performance information before best path selection */
264+ list_for_each_entry(pi, &s->valid_paths, list)
265+ stats_update(pi);
266+
267+ list_for_each_entry(pi, &s->valid_paths, list) {
268+ if (!best)
269+ best = pi;
270+ else if (st_compare_load(pi, best, nr_bytes) < 0)
271+ best = pi;
272+ }
273+
274+ if (best) {
275+ *repeat_count = best->repeat_count;
276+ return best->path;
277+ }
278+
279+ return NULL;
280+}
281+
282+static int st_start_io(struct path_selector *ps, struct dm_path *path,
283+ size_t nr_bytes)
284+{
285+ struct path_info *pi = path->pscontext;
286+
287+ atomic_add(nr_bytes, &pi->in_flight);
288+
289+ return 0;
290+}
291+
292+static int st_end_io(struct path_selector *ps, struct dm_path *path,
293+ size_t nr_bytes)
294+{
295+ struct path_info *pi = path->pscontext;
296+
297+ atomic_sub(nr_bytes, &pi->in_flight);
298+
299+ return 0;
300+}
301+
302+static struct path_selector_type st_ps = {
303+ .name = "service-time",
304+ .module = THIS_MODULE,
305+ .table_args = 1,
306+ .info_args = 2,
307+ .create = st_create,
308+ .destroy = st_destroy,
309+ .status = st_status,
310+ .add_path = st_add_path,
311+ .fail_path = st_fail_path,
312+ .reinstate_path = st_reinstate_path,
313+ .select_path = st_select_path,
314+ .start_io = st_start_io,
315+ .end_io = st_end_io,
316+};
317+
318+static int __init dm_st_init(void)
319+{
320+ int r = dm_register_path_selector(&st_ps);
321+
322+ if (r < 0)
323+ DMERR("register failed %d", r);
324+
325+ DMINFO("version " ST_VERSION " loaded");
326+
327+ return r;
328+}
329+
330+static void __exit dm_st_exit(void)
331+{
332+ int r = dm_unregister_path_selector(&st_ps);
333+
334+ if (r < 0)
335+ DMERR("unregister failed %d", r);
336+}
337+
338+module_init(dm_st_init);
339+module_exit(dm_st_exit);
340+
341+MODULE_DESCRIPTION(DM_NAME " throughput oriented path selector");
342+MODULE_AUTHOR("Kiyoshi Ueda <k-ueda@ct.jp.nec.com>");
343+MODULE_LICENSE("GPL");