]>
Commit | Line | Data |
---|---|---|
e86c9dd6 NB |
1 | /* |
2 | * mdadm - manage Linux "md" devices aka RAID arrays. | |
3 | * | |
4 | * Copyright (C) 2006 Neil Brown <neilb@suse.de> | |
5 | * | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or modify | |
8 | * it under the terms of the GNU General Public License as published by | |
9 | * the Free Software Foundation; either version 2 of the License, or | |
10 | * (at your option) any later version. | |
11 | * | |
12 | * This program is distributed in the hope that it will be useful, | |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | * GNU General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License | |
18 | * along with this program; if not, write to the Free Software | |
19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
20 | * | |
21 | * Author: Neil Brown | |
22 | * Email: <neilb@suse.de> | |
23 | */ | |
24 | ||
25 | #include "mdadm.h" | |
26 | ||
27 | /* To restripe, we read from old geometry to a buffer, and | |
28 | * read from buffer to new geometry. | |
29 | * When reading we don't worry about parity. When writing we do. | |
30 | * | |
31 | */ | |
32 | ||
33 | static int geo_map(int block, unsigned long long stripe, int raid_disks, int level, int layout) | |
34 | { | |
48327135 | 35 | /* On the given stripe, find which disk in the array will have |
e86c9dd6 | 36 | * block numbered 'block'. |
48327135 NB |
37 | * '-1' means the parity block. |
38 | * '-2' means the Q syndrome. | |
e86c9dd6 NB |
39 | */ |
40 | int pd; | |
41 | ||
42 | switch(level*100 + layout) { | |
43 | case 000: | |
44 | case 400: | |
45 | /* raid 4 isn't messed around by parity blocks */ | |
46 | if (block == -1) | |
47 | return raid_disks-1; /* parity block */ | |
48 | return block; | |
49 | case 500 + ALGORITHM_LEFT_ASYMMETRIC: | |
50 | pd = (raid_disks-1) - stripe % raid_disks; | |
51 | if (block == -1) return pd; | |
52 | if (block >= pd) | |
53 | block++; | |
54 | return block; | |
55 | ||
56 | case 500 + ALGORITHM_RIGHT_ASYMMETRIC: | |
57 | pd = stripe % raid_disks; | |
58 | if (block == -1) return pd; | |
59 | if (block >= pd) | |
60 | block++; | |
61 | return block; | |
62 | ||
63 | case 500 + ALGORITHM_LEFT_SYMMETRIC: | |
64 | pd = (raid_disks - 1) - stripe % raid_disks; | |
65 | if (block == -1) return pd; | |
66 | return (pd + 1 + block) % raid_disks; | |
67 | ||
68 | case 500 + ALGORITHM_RIGHT_SYMMETRIC: | |
69 | pd = stripe % raid_disks; | |
70 | if (block == -1) return pd; | |
71 | return (pd + 1 + block) % raid_disks; | |
72 | ||
73 | case 600 + ALGORITHM_LEFT_ASYMMETRIC: | |
74 | pd = raid_disks - 1 - (stripe % raid_disks); | |
75 | if (block == -1) return pd; | |
48327135 | 76 | if (block == -2) return (pd+1) % raid_disks; |
e86c9dd6 NB |
77 | if (pd == raid_disks - 1) |
78 | return block+1; | |
79 | if (block >= pd) | |
80 | return block+2; | |
81 | return block; | |
82 | ||
83 | case 600 + ALGORITHM_RIGHT_ASYMMETRIC: | |
84 | pd = stripe % raid_disks; | |
85 | if (block == -1) return pd; | |
48327135 | 86 | if (block == -2) return (pd+1) % raid_disks; |
e86c9dd6 NB |
87 | if (pd == raid_disks - 1) |
88 | return block+1; | |
89 | if (block >= pd) | |
90 | return block+2; | |
91 | return block; | |
92 | ||
93 | case 600 + ALGORITHM_LEFT_SYMMETRIC: | |
94 | pd = raid_disks - 1 - (stripe % raid_disks); | |
95 | if (block == -1) return pd; | |
48327135 | 96 | if (block == -2) return (pd+1) % raid_disks; |
e86c9dd6 NB |
97 | return (pd + 2 + block) % raid_disks; |
98 | ||
99 | case 600 + ALGORITHM_RIGHT_SYMMETRIC: | |
100 | pd = stripe % raid_disks; | |
101 | if (block == -1) return pd; | |
48327135 | 102 | if (block == -2) return (pd+1) % raid_disks; |
e86c9dd6 NB |
103 | return (pd + 2 + block) % raid_disks; |
104 | } | |
105 | return -1; | |
106 | } | |
107 | ||
108 | ||
109 | static void xor_blocks(char *target, char **sources, int disks, int size) | |
110 | { | |
111 | int i, j; | |
112 | /* Amazingly inefficient... */ | |
113 | for (i=0; i<size; i++) { | |
114 | char c = 0; | |
115 | for (j=0 ; j<disks; j++) | |
116 | c ^= sources[j][i]; | |
117 | target[i] = c; | |
118 | } | |
119 | } | |
120 | ||
48327135 NB |
121 | static void qsyndrome(char *p, char *q, char **sources, int disks, int size) |
122 | { | |
123 | int d, z; | |
124 | char wq0, wp0, wd0, w10, w20; | |
125 | for ( d = 0; d < size; d++) { | |
126 | wq0 = wp0 = sources[disks-1][d]; | |
127 | for ( z = disks-2 ; z >= 0 ; z-- ) { | |
128 | wd0 = sources[z][d]; | |
129 | wp0 ^= wd0; | |
130 | w20 = (wq0&0x80) ? 0xff : 0x00; | |
131 | w10 = (wq0 << 1) & 0xff; | |
132 | w20 &= 0x1d; | |
133 | w10 ^= w20; | |
134 | wq0 = w10 ^ wd0; | |
135 | } | |
136 | p[d] = wp0; | |
137 | q[d] = wq0; | |
138 | } | |
139 | } | |
140 | ||
e86c9dd6 NB |
141 | /* Save data: |
142 | * We are given: | |
143 | * A list of 'fds' of the active disks. For now we require all to be present. | |
48327135 | 144 | * A geometry: raid_disks, chunk_size, level, layout |
e86c9dd6 NB |
145 | * A list of 'fds' for mirrored targets. They are already seeked to |
146 | * right (Write) location | |
147 | * A start and length | |
148 | */ | |
149 | ||
150 | int save_stripes(int *source, unsigned long long *offsets, | |
151 | int raid_disks, int chunk_size, int level, int layout, | |
152 | int nwrites, int *dest, | |
153 | unsigned long long start, unsigned long long length) | |
154 | { | |
94a20f0c N |
155 | char abuf[8192+512]; |
156 | char *buf = (char*)(((unsigned long)abuf+511)&~511UL); | |
e86c9dd6 NB |
157 | int cpos = start % chunk_size; /* where in chunk we are up to */ |
158 | int len; | |
159 | int data_disks = raid_disks - (level == 0 ? 0 : level <=5 ? 1 : 2); | |
160 | int disk; | |
161 | ||
162 | while (length > 0) { | |
163 | unsigned long long offset; | |
164 | int i; | |
165 | len = chunk_size - cpos; | |
94a20f0c | 166 | if (len > 8192) len = 8192; |
e86c9dd6 NB |
167 | if (len > length) len = length; |
168 | /* len bytes to be moved from one device */ | |
169 | ||
170 | offset = (start/chunk_size/data_disks)*chunk_size + cpos; | |
171 | disk = start/chunk_size % data_disks; | |
172 | disk = geo_map(disk, start/chunk_size/data_disks, | |
173 | raid_disks, level, layout); | |
174 | if (lseek64(source[disk], offsets[disk]+offset, 0) < 0) | |
175 | return -1; | |
176 | if (read(source[disk], buf, len) != len) | |
177 | return -1; | |
178 | for (i=0; i<nwrites; i++) | |
179 | if (write(dest[i], buf, len) != len) | |
180 | return -1; | |
181 | length -= len; | |
182 | start += len; | |
183 | cpos += len; | |
184 | while (cpos >= chunk_size) cpos -= chunk_size; | |
185 | } | |
186 | return 0; | |
187 | } | |
188 | ||
189 | /* Restore data: | |
190 | * We are given: | |
191 | * A list of 'fds' of the active disks. Some may be '-1' for not-available. | |
353632d9 | 192 | * A geometry: raid_disks, chunk_size, level, layout |
e86c9dd6 NB |
193 | * An 'fd' to read from. It is already seeked to the right (Read) location. |
194 | * A start and length. | |
195 | * The length must be a multiple of the stripe size. | |
196 | * | |
197 | * We build a full stripe in memory and then write it out. | |
198 | * We assume that there are enough working devices. | |
199 | */ | |
200 | int restore_stripes(int *dest, unsigned long long *offsets, | |
201 | int raid_disks, int chunk_size, int level, int layout, | |
353632d9 | 202 | int source, unsigned long long read_offset, |
e86c9dd6 NB |
203 | unsigned long long start, unsigned long long length) |
204 | { | |
205 | char *stripe_buf = malloc(raid_disks * chunk_size); | |
206 | char **stripes = malloc(raid_disks * sizeof(char*)); | |
207 | char **blocks = malloc(raid_disks * sizeof(char*)); | |
208 | int i; | |
209 | ||
210 | int data_disks = raid_disks - (level == 0 ? 0 : level <=5 ? 1 : 2); | |
211 | ||
212 | if (stripe_buf == NULL || stripes == NULL || blocks == NULL) { | |
213 | free(stripe_buf); | |
214 | free(stripes); | |
215 | free(blocks); | |
216 | return -2; | |
217 | } | |
218 | for (i=0; i<raid_disks; i++) | |
219 | stripes[i] = stripe_buf + i * chunk_size; | |
220 | while (length > 0) { | |
221 | int len = data_disks * chunk_size; | |
222 | unsigned long long offset; | |
48327135 | 223 | int disk, qdisk; |
e86c9dd6 NB |
224 | if (length < len) |
225 | return -3; | |
226 | for (i=0; i < data_disks; i++) { | |
227 | int disk = geo_map(i, start/chunk_size/data_disks, | |
228 | raid_disks, level, layout); | |
229 | blocks[i] = stripes[disk]; | |
353632d9 NB |
230 | if (lseek64(source, read_offset, 0) != read_offset) |
231 | return -1; | |
e86c9dd6 NB |
232 | if (read(source, stripes[disk], chunk_size) != chunk_size) |
233 | return -1; | |
353632d9 | 234 | read_offset += chunk_size; |
e86c9dd6 NB |
235 | } |
236 | /* We have the data, now do the parity */ | |
237 | offset = (start/chunk_size/data_disks) * chunk_size; | |
48327135 NB |
238 | switch (level) { |
239 | case 4: | |
240 | case 5: | |
241 | disk = geo_map(-1, start/chunk_size/data_disks, | |
e86c9dd6 NB |
242 | raid_disks, level, layout); |
243 | xor_blocks(stripes[disk], blocks, data_disks, chunk_size); | |
48327135 NB |
244 | break; |
245 | case 6: | |
246 | disk = geo_map(-1, start/chunk_size/data_disks, | |
247 | raid_disks, level, layout); | |
248 | qdisk = geo_map(-2, start/chunk_size/data_disks, | |
249 | raid_disks, level, layout); | |
250 | ||
251 | qsyndrome(stripes[disk], stripes[qdisk], blocks, | |
252 | data_disks, chunk_size); | |
253 | break; | |
e86c9dd6 NB |
254 | } |
255 | for (i=0; i < raid_disks ; i++) | |
256 | if (dest[i] >= 0) { | |
257 | if (lseek64(dest[i], offsets[i]+offset, 0) < 0) | |
258 | return -1; | |
259 | if (write(dest[i], stripes[i], chunk_size) != chunk_size) | |
260 | return -1; | |
261 | } | |
262 | length -= len; | |
263 | start += len; | |
264 | } | |
265 | return 0; | |
266 | } | |
267 | ||
268 | #ifdef MAIN | |
269 | ||
48327135 NB |
270 | int test_stripes(int *source, unsigned long long *offsets, |
271 | int raid_disks, int chunk_size, int level, int layout, | |
272 | unsigned long long start, unsigned long long length) | |
273 | { | |
274 | /* ready the data and p (and q) blocks, and check we got them right */ | |
275 | char *stripe_buf = malloc(raid_disks * chunk_size); | |
276 | char **stripes = malloc(raid_disks * sizeof(char*)); | |
277 | char **blocks = malloc(raid_disks * sizeof(char*)); | |
278 | char *p = malloc(chunk_size); | |
279 | char *q = malloc(chunk_size); | |
280 | ||
281 | int i; | |
282 | int data_disks = raid_disks - (level == 5 ? 1: 2); | |
283 | for ( i = 0 ; i < raid_disks ; i++) | |
284 | stripes[i] = stripe_buf + i * chunk_size; | |
285 | ||
286 | while (length > 0) { | |
287 | int disk; | |
288 | ||
289 | for (i = 0 ; i < raid_disks ; i++) { | |
290 | lseek64(source[i], offsets[i]+start, 0); | |
291 | read(source[i], stripes[i], chunk_size); | |
292 | } | |
293 | for (i = 0 ; i < data_disks ; i++) { | |
294 | int disk = geo_map(i, start/chunk_size, raid_disks, | |
295 | level, layout); | |
296 | blocks[i] = stripes[disk]; | |
297 | printf("%d->%d\n", i, disk); | |
298 | } | |
299 | switch(level) { | |
300 | case 6: | |
301 | qsyndrome(p, q, blocks, data_disks, chunk_size); | |
302 | disk = geo_map(-1, start/chunk_size, raid_disks, | |
303 | level, layout); | |
304 | if (memcmp(p, stripes[disk], chunk_size) != 0) { | |
305 | printf("P(%d) wrong at %llu\n", disk, | |
306 | start / chunk_size); | |
307 | } | |
308 | disk = geo_map(-2, start/chunk_size, raid_disks, | |
309 | level, layout); | |
310 | if (memcmp(q, stripes[disk], chunk_size) != 0) { | |
311 | printf("Q(%d) wrong at %llu\n", disk, | |
312 | start / chunk_size); | |
313 | } | |
314 | break; | |
315 | } | |
316 | length -= chunk_size; | |
317 | start += chunk_size; | |
318 | } | |
319 | return 0; | |
320 | } | |
321 | ||
e86c9dd6 NB |
322 | unsigned long long getnum(char *str, char **err) |
323 | { | |
324 | char *e; | |
325 | unsigned long long rv = strtoull(str, &e, 10); | |
326 | if (e==str || *e) { | |
327 | *err = str; | |
328 | return 0; | |
329 | } | |
330 | return rv; | |
331 | } | |
332 | ||
333 | main(int argc, char *argv[]) | |
334 | { | |
335 | /* save/restore file raid_disks chunk_size level layout start length devices... | |
336 | */ | |
337 | int save; | |
338 | int *fds; | |
339 | char *file; | |
340 | int storefd; | |
341 | unsigned long long *offsets; | |
342 | int raid_disks, chunk_size, level, layout; | |
343 | unsigned long long start, length; | |
344 | int i; | |
345 | ||
346 | char *err = NULL; | |
347 | if (argc < 10) { | |
348 | fprintf(stderr, "Usage: test_stripe save/restore file raid_disks" | |
349 | " chunk_size level layout start length devices...\n"); | |
350 | exit(1); | |
351 | } | |
352 | if (strcmp(argv[1], "save")==0) | |
353 | save = 1; | |
354 | else if (strcmp(argv[1], "restore") == 0) | |
355 | save = 0; | |
48327135 NB |
356 | else if (strcmp(argv[1], "test") == 0) |
357 | save = 2; | |
e86c9dd6 NB |
358 | else { |
359 | fprintf(stderr, "test_stripe: must give 'save' or 'restore'.\n"); | |
360 | exit(2); | |
361 | } | |
362 | ||
363 | file = argv[2]; | |
364 | raid_disks = getnum(argv[3], &err); | |
365 | chunk_size = getnum(argv[4], &err); | |
366 | level = getnum(argv[5], &err); | |
367 | layout = getnum(argv[6], &err); | |
368 | start = getnum(argv[7], &err); | |
369 | length = getnum(argv[8], &err); | |
370 | if (err) { | |
371 | fprintf(stderr, "test_stripe: Bad number: %s\n", err); | |
372 | exit(2); | |
373 | } | |
374 | if (argc != raid_disks + 9) { | |
375 | fprintf(stderr, "test_stripe: wrong number of devices: want %d found %d\n", | |
376 | raid_disks, argc-9); | |
377 | exit(2); | |
378 | } | |
379 | fds = malloc(raid_disks * sizeof(*fds)); | |
380 | offsets = malloc(raid_disks * sizeof(*offsets)); | |
381 | memset(offsets, 0, raid_disks * sizeof(*offsets)); | |
382 | ||
383 | storefd = open(file, O_RDWR); | |
384 | if (storefd < 0) { | |
385 | perror(file); | |
386 | fprintf(stderr, "test_stripe: could not open %s.\n", file); | |
387 | exit(3); | |
388 | } | |
389 | for (i=0; i<raid_disks; i++) { | |
390 | fds[i] = open(argv[9+i], O_RDWR); | |
391 | if (fds[i] < 0) { | |
392 | perror(argv[9+i]); | |
393 | fprintf(stderr,"test_stripe: cannot open %s.\n", argv[9+i]); | |
394 | exit(3); | |
395 | } | |
396 | } | |
397 | ||
48327135 | 398 | if (save == 1) { |
e86c9dd6 NB |
399 | int rv = save_stripes(fds, offsets, |
400 | raid_disks, chunk_size, level, layout, | |
401 | 1, &storefd, | |
402 | start, length); | |
403 | if (rv != 0) { | |
48327135 NB |
404 | fprintf(stderr, |
405 | "test_stripe: save_stripes returned %d\n", rv); | |
406 | exit(1); | |
407 | } | |
408 | } else if (save == 2) { | |
409 | int rv = test_stripes(fds, offsets, | |
410 | raid_disks, chunk_size, level, layout, | |
411 | start, length); | |
412 | if (rv != 0) { | |
413 | fprintf(stderr, | |
414 | "test_stripe: test_stripes returned %d\n", rv); | |
e86c9dd6 NB |
415 | exit(1); |
416 | } | |
417 | } else { | |
418 | int rv = restore_stripes(fds, offsets, | |
419 | raid_disks, chunk_size, level, layout, | |
353632d9 | 420 | storefd, 0ULL, |
e86c9dd6 NB |
421 | start, length); |
422 | if (rv != 0) { | |
48327135 NB |
423 | fprintf(stderr, |
424 | "test_stripe: restore_stripes returned %d\n", | |
425 | rv); | |
e86c9dd6 NB |
426 | exit(1); |
427 | } | |
428 | } | |
429 | exit(0); | |
430 | } | |
431 | ||
432 | #endif /* MAIN */ |