]> git.ipfire.org Git - people/teissler/ipfire-2.x.git/blob - src/patches/suse-2.6.27.31/patches.suse/dm-mpath-service-time-load-balancing
Merge branch 'master' of git://git.ipfire.org/ipfire-2.x
[people/teissler/ipfire-2.x.git] / src / patches / suse-2.6.27.31 / patches.suse / dm-mpath-service-time-load-balancing
1 From: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
2 Subject: dm-mpath: add service-time oriented dynamic load balancer
3 References: FATE#303862,FATE#302108
4
5 This patch adds a service time oriented dynamic load balancer,
6 dm-service-time.
7
8
9 Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
10 Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
11 Signed-off-by: Hannes Reinecke <hare@suse.de>
12
13 Index: linux-2.6.27/drivers/md/Makefile
14 ===================================================================
15 --- linux-2.6.27.orig/drivers/md/Makefile
16 +++ linux-2.6.27/drivers/md/Makefile
17 @@ -33,7 +33,8 @@ obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o
18 obj-$(CONFIG_DM_CRYPT) += dm-crypt.o
19 obj-$(CONFIG_DM_DELAY) += dm-delay.o
20 obj-$(CONFIG_DM_MULTIPATH) += dm-multipath.o dm-round-robin.o \
21 - dm-least-pending.o dm-queue-length.o
22 + dm-least-pending.o dm-queue-length.o \
23 + dm-service-time.o
24 obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o
25 obj-$(CONFIG_DM_MIRROR) += dm-mirror.o dm-regions.o dm-log.o
26 obj-$(CONFIG_DM_RAID45) += dm-raid45.o dm-log.o dm-memcache.o \
27 Index: linux-2.6.27/drivers/md/dm-service-time.c
28 ===================================================================
29 --- /dev/null
30 +++ linux-2.6.27/drivers/md/dm-service-time.c
31 @@ -0,0 +1,312 @@
32 +/*
33 + * Copyright (C) 2007-2008 NEC Corporation. All Rights Reserved.
34 + *
35 + * Module Author: Kiyoshi Ueda
36 + *
37 + * This file is released under the GPL.
38 + *
39 + * Throughput oriented path selector.
40 + */
41 +
42 +#include "dm.h"
43 +#include "dm-path-selector.h"
44 +
45 +#define DM_MSG_PREFIX "multipath service-time"
46 +#define ST_MIN_IO 2
47 +#define ST_VERSION "0.1.0"
48 +
49 +struct selector {
50 + struct list_head valid_paths;
51 + struct list_head failed_paths;
52 +};
53 +
54 +struct path_info {
55 + struct list_head list;
56 + struct dm_path *path;
57 + unsigned int repeat_count;
58 +
59 + atomic_t in_flight; /* Total size of in-flight I/Os */
60 + size_t perf; /* Recent performance of the path */
61 + sector_t last_sectors; /* Total sectors of the last disk_stat_read */
62 + size_t last_io_ticks; /* io_ticks of the last disk_stat_read */
63 +};
64 +
65 +static struct selector *alloc_selector(void)
66 +{
67 + struct selector *s = kzalloc(sizeof(*s), GFP_KERNEL);
68 +
69 + if (s) {
70 + INIT_LIST_HEAD(&s->valid_paths);
71 + INIT_LIST_HEAD(&s->failed_paths);
72 + }
73 +
74 + return s;
75 +}
76 +
77 +static int st_create(struct path_selector *ps, unsigned argc, char **argv)
78 +{
79 + struct selector *s = alloc_selector();
80 +
81 + if (!s)
82 + return -ENOMEM;
83 +
84 + ps->context = s;
85 + return 0;
86 +}
87 +
88 +static void free_paths(struct list_head *paths)
89 +{
90 + struct path_info *pi, *next;
91 +
92 + list_for_each_entry_safe(pi, next, paths, list) {
93 + list_del(&pi->list);
94 + pi->path->pscontext = NULL;
95 + kfree(pi);
96 + }
97 +}
98 +
99 +static void st_destroy(struct path_selector *ps)
100 +{
101 + struct selector *s = (struct selector *) ps->context;
102 +
103 + free_paths(&s->valid_paths);
104 + free_paths(&s->failed_paths);
105 + kfree(s);
106 + ps->context = NULL;
107 +}
108 +
109 +static int st_status(struct path_selector *ps, struct dm_path *path,
110 + status_type_t type, char *result, unsigned int maxlen)
111 +{
112 + int sz = 0;
113 + struct path_info *pi;
114 +
115 + if (!path)
116 + DMEMIT("0 ");
117 + else {
118 + pi = path->pscontext;
119 +
120 + switch (type) {
121 + case STATUSTYPE_INFO:
122 + DMEMIT("if:%08lu pf:%06lu ",
123 + (unsigned long) atomic_read(&pi->in_flight),
124 + pi->perf);
125 + break;
126 + case STATUSTYPE_TABLE:
127 + DMEMIT("%u ", pi->repeat_count);
128 + break;
129 + }
130 + }
131 +
132 + return sz;
133 +}
134 +
135 +static int st_add_path(struct path_selector *ps, struct dm_path *path,
136 + int argc, char **argv, char **error)
137 +{
138 + struct selector *s = (struct selector *) ps->context;
139 + struct path_info *pi;
140 + unsigned int repeat_count = ST_MIN_IO;
141 + struct gendisk *disk = path->dev->bdev->bd_disk;
142 +
143 + if (argc > 1) {
144 + *error = "service-time ps: incorrect number of arguments";
145 + return -EINVAL;
146 + }
147 +
148 + /* First path argument is number of I/Os before switching path. */
149 + if ((argc == 1) && (sscanf(argv[0], "%u", &repeat_count) != 1)) {
150 + *error = "service-time ps: invalid repeat count";
151 + return -EINVAL;
152 + }
153 +
154 + /* allocate the path */
155 + pi = kmalloc(sizeof(*pi), GFP_KERNEL);
156 + if (!pi) {
157 + *error = "service-time ps: Error allocating path context";
158 + return -ENOMEM;
159 + }
160 +
161 + pi->path = path;
162 + pi->repeat_count = repeat_count;
163 +
164 + pi->perf = 0;
165 + pi->last_sectors = disk_stat_read(disk, sectors[READ])
166 + + disk_stat_read(disk, sectors[WRITE]);
167 + pi->last_io_ticks = disk_stat_read(disk, io_ticks);
168 + atomic_set(&pi->in_flight, 0);
169 +
170 + path->pscontext = pi;
171 +
172 + list_add_tail(&pi->list, &s->valid_paths);
173 +
174 + return 0;
175 +}
176 +
177 +static void st_fail_path(struct path_selector *ps, struct dm_path *path)
178 +{
179 + struct selector *s = (struct selector *) ps->context;
180 + struct path_info *pi = path->pscontext;
181 +
182 + list_move(&pi->list, &s->failed_paths);
183 +}
184 +
185 +static int st_reinstate_path(struct path_selector *ps, struct dm_path *path)
186 +{
187 + struct selector *s = (struct selector *) ps->context;
188 + struct path_info *pi = path->pscontext;
189 +
190 + list_move_tail(&pi->list, &s->valid_paths);
191 +
192 + return 0;
193 +}
194 +
195 +static void stats_update(struct path_info *pi)
196 +{
197 + sector_t sectors;
198 + size_t io_ticks, tmp;
199 + struct gendisk *disk = pi->path->dev->bdev->bd_disk;
200 +
201 + sectors = disk_stat_read(disk, sectors[READ])
202 + + disk_stat_read(disk, sectors[WRITE]);
203 + io_ticks = disk_stat_read(disk, io_ticks);
204 +
205 + if ((sectors != pi->last_sectors) && (io_ticks != pi->last_io_ticks)) {
206 + tmp = (sectors - pi->last_sectors) << 9;
207 + do_div(tmp, jiffies_to_msecs((io_ticks - pi->last_io_ticks)));
208 + pi->perf = tmp;
209 +
210 + pi->last_sectors = sectors;
211 + pi->last_io_ticks = io_ticks;
212 + }
213 +}
214 +
215 +static int st_compare_load(struct path_info *pi1, struct path_info *pi2,
216 + size_t new_io)
217 +{
218 + size_t if1, if2;
219 +
220 + if1 = atomic_read(&pi1->in_flight);
221 + if2 = atomic_read(&pi2->in_flight);
222 +
223 + /*
224 + * Case 1: No performace data available. Choose less loaded path.
225 + */
226 + if (!pi1->perf || !pi2->perf)
227 + return if1 - if2;
228 +
229 + /*
230 + * Case 2: Calculate service time. Choose faster path.
231 + * if ((if1+new_io)/pi1->perf < (if2+new_io)/pi2->perf) pi1.
232 + * if ((if1+new_io)/pi1->perf > (if2+new_io)/pi2->perf) pi2.
233 + * To avoid do_div(), use
234 + * if ((if1+new_io)*pi2->perf < (if2+new_io)*pi1->perf) pi1.
235 + * if ((if1+new_io)*pi2->perf > (if2+new_io)*pi1->perf) pi2.
236 + */
237 + if1 = (if1 + new_io) << 10;
238 + if2 = (if2 + new_io) << 10;
239 + do_div(if1, pi1->perf);
240 + do_div(if2, pi2->perf);
241 +
242 + if (if1 != if2)
243 + return if1 - if2;
244 +
245 + /*
246 + * Case 3: Service time is equal. Choose faster path.
247 + */
248 + return pi2->perf - pi1->perf;
249 +}
250 +
251 +static struct dm_path *st_select_path(struct path_selector *ps,
252 + unsigned *repeat_count, size_t nr_bytes)
253 +{
254 + struct selector *s = (struct selector *) ps->context;
255 + struct path_info *pi = NULL, *best = NULL;
256 +
257 + if (list_empty(&s->valid_paths))
258 + return NULL;
259 +
260 + /* Change preferred (first in list) path to evenly balance. */
261 + list_move_tail(s->valid_paths.next, &s->valid_paths);
262 +
263 + /* Update performance information before best path selection */
264 + list_for_each_entry(pi, &s->valid_paths, list)
265 + stats_update(pi);
266 +
267 + list_for_each_entry(pi, &s->valid_paths, list) {
268 + if (!best)
269 + best = pi;
270 + else if (st_compare_load(pi, best, nr_bytes) < 0)
271 + best = pi;
272 + }
273 +
274 + if (best) {
275 + *repeat_count = best->repeat_count;
276 + return best->path;
277 + }
278 +
279 + return NULL;
280 +}
281 +
282 +static int st_start_io(struct path_selector *ps, struct dm_path *path,
283 + size_t nr_bytes)
284 +{
285 + struct path_info *pi = path->pscontext;
286 +
287 + atomic_add(nr_bytes, &pi->in_flight);
288 +
289 + return 0;
290 +}
291 +
292 +static int st_end_io(struct path_selector *ps, struct dm_path *path,
293 + size_t nr_bytes)
294 +{
295 + struct path_info *pi = path->pscontext;
296 +
297 + atomic_sub(nr_bytes, &pi->in_flight);
298 +
299 + return 0;
300 +}
301 +
302 +static struct path_selector_type st_ps = {
303 + .name = "service-time",
304 + .module = THIS_MODULE,
305 + .table_args = 1,
306 + .info_args = 2,
307 + .create = st_create,
308 + .destroy = st_destroy,
309 + .status = st_status,
310 + .add_path = st_add_path,
311 + .fail_path = st_fail_path,
312 + .reinstate_path = st_reinstate_path,
313 + .select_path = st_select_path,
314 + .start_io = st_start_io,
315 + .end_io = st_end_io,
316 +};
317 +
318 +static int __init dm_st_init(void)
319 +{
320 + int r = dm_register_path_selector(&st_ps);
321 +
322 + if (r < 0)
323 + DMERR("register failed %d", r);
324 +
325 + DMINFO("version " ST_VERSION " loaded");
326 +
327 + return r;
328 +}
329 +
330 +static void __exit dm_st_exit(void)
331 +{
332 + int r = dm_unregister_path_selector(&st_ps);
333 +
334 + if (r < 0)
335 + DMERR("unregister failed %d", r);
336 +}
337 +
338 +module_init(dm_st_init);
339 +module_exit(dm_st_exit);
340 +
341 +MODULE_DESCRIPTION(DM_NAME " throughput oriented path selector");
342 +MODULE_AUTHOR("Kiyoshi Ueda <k-ueda@ct.jp.nec.com>");
343 +MODULE_LICENSE("GPL");