2 * mdadm - manage Linux "md" devices aka RAID arrays.
4 * Copyright (C) 2006 Neil Brown <neilb@suse.de>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 * Email: <neilb@suse.de>
27 /* To restripe, we read from old geometry to a buffer, and
28 * read from buffer to new geometry.
29 * When reading we don't worry about parity. When writing we do.
33 static int geo_map(int block
, unsigned long long stripe
, int raid_disks
,
34 int level
, int layout
)
36 /* On the given stripe, find which disk in the array will have
37 * block numbered 'block'.
38 * '-1' means the parity block.
39 * '-2' means the Q syndrome.
43 switch(level
*100 + layout
) {
46 case 500 + ALGORITHM_PARITY_N
:
47 /* raid 4 isn't messed around by parity blocks */
49 return raid_disks
-1; /* parity block */
51 case 500 + ALGORITHM_LEFT_ASYMMETRIC
:
52 pd
= (raid_disks
-1) - stripe
% raid_disks
;
53 if (block
== -1) return pd
;
58 case 500 + ALGORITHM_RIGHT_ASYMMETRIC
:
59 pd
= stripe
% raid_disks
;
60 if (block
== -1) return pd
;
65 case 500 + ALGORITHM_LEFT_SYMMETRIC
:
66 pd
= (raid_disks
- 1) - stripe
% raid_disks
;
67 if (block
== -1) return pd
;
68 return (pd
+ 1 + block
) % raid_disks
;
70 case 500 + ALGORITHM_RIGHT_SYMMETRIC
:
71 pd
= stripe
% raid_disks
;
72 if (block
== -1) return pd
;
73 return (pd
+ 1 + block
) % raid_disks
;
75 case 500 + ALGORITHM_PARITY_0
:
79 case 600 + ALGORITHM_PARITY_N_6
:
81 return raid_disks
- 1;
83 return raid_disks
- 2; /* parity block */
85 case 600 + ALGORITHM_LEFT_ASYMMETRIC_6
:
87 return raid_disks
- 1;
89 pd
= (raid_disks
-1) - stripe
% raid_disks
;
90 if (block
== -1) return pd
;
95 case 600 + ALGORITHM_RIGHT_ASYMMETRIC_6
:
97 return raid_disks
- 1;
99 pd
= stripe
% raid_disks
;
100 if (block
== -1) return pd
;
105 case 600 + ALGORITHM_LEFT_SYMMETRIC_6
:
107 return raid_disks
- 1;
109 pd
= (raid_disks
- 1) - stripe
% raid_disks
;
110 if (block
== -1) return pd
;
111 return (pd
+ 1 + block
) % raid_disks
;
113 case 600 + ALGORITHM_RIGHT_SYMMETRIC_6
:
115 return raid_disks
- 1;
117 pd
= stripe
% raid_disks
;
118 if (block
== -1) return pd
;
119 return (pd
+ 1 + block
) % raid_disks
;
121 case 600 + ALGORITHM_PARITY_0_6
:
123 return raid_disks
- 1;
127 case 600 + ALGORITHM_PARITY_0
:
134 case 600 + ALGORITHM_LEFT_ASYMMETRIC
:
135 pd
= raid_disks
- 1 - (stripe
% raid_disks
);
136 if (block
== -1) return pd
;
137 if (block
== -2) return (pd
+1) % raid_disks
;
138 if (pd
== raid_disks
- 1)
144 case 600 + ALGORITHM_ROTATING_ZERO_RESTART
:
145 /* Different order for calculating Q, otherwize same as ... */
146 case 600 + ALGORITHM_RIGHT_ASYMMETRIC
:
147 pd
= stripe
% raid_disks
;
148 if (block
== -1) return pd
;
149 if (block
== -2) return (pd
+1) % raid_disks
;
150 if (pd
== raid_disks
- 1)
156 case 600 + ALGORITHM_LEFT_SYMMETRIC
:
157 pd
= raid_disks
- 1 - (stripe
% raid_disks
);
158 if (block
== -1) return pd
;
159 if (block
== -2) return (pd
+1) % raid_disks
;
160 return (pd
+ 2 + block
) % raid_disks
;
162 case 600 + ALGORITHM_RIGHT_SYMMETRIC
:
163 pd
= stripe
% raid_disks
;
164 if (block
== -1) return pd
;
165 if (block
== -2) return (pd
+1) % raid_disks
;
166 return (pd
+ 2 + block
) % raid_disks
;
169 case 600 + ALGORITHM_ROTATING_N_RESTART
:
170 /* Same a left_asymmetric, by first stripe is
171 * D D D P Q rather than
174 pd
= raid_disks
- 1 - ((stripe
+ 1) % raid_disks
);
175 if (block
== -1) return pd
;
176 if (block
== -2) return (pd
+1) % raid_disks
;
177 if (pd
== raid_disks
- 1)
183 case 600 + ALGORITHM_ROTATING_N_CONTINUE
:
184 /* Same as left_symmetric but Q is before P */
185 pd
= raid_disks
- 1 - (stripe
% raid_disks
);
186 if (block
== -1) return pd
;
187 if (block
== -2) return (pd
+raid_disks
-1) % raid_disks
;
188 return (pd
+ 1 + block
) % raid_disks
;
192 static int is_ddf(int layout
)
198 case ALGORITHM_ROTATING_N_CONTINUE
:
199 case ALGORITHM_ROTATING_N_RESTART
:
200 case ALGORITHM_ROTATING_ZERO_RESTART
:
206 static void xor_blocks(char *target
, char **sources
, int disks
, int size
)
209 /* Amazingly inefficient... */
210 for (i
=0; i
<size
; i
++) {
212 for (j
=0 ; j
<disks
; j
++)
218 static void qsyndrome(char *p
, char *q
, char **sources
, int disks
, int size
)
221 char wq0
, wp0
, wd0
, w10
, w20
;
222 for ( d
= 0; d
< size
; d
++) {
223 wq0
= wp0
= sources
[disks
-1][d
];
224 for ( z
= disks
-2 ; z
>= 0 ; z
-- ) {
227 w20
= (wq0
&0x80) ? 0xff : 0x00;
228 w10
= (wq0
<< 1) & 0xff;
240 * A list of 'fds' of the active disks. For now we require all to be present.
241 * A geometry: raid_disks, chunk_size, level, layout
242 * A list of 'fds' for mirrored targets. They are already seeked to
243 * right (Write) location
247 int save_stripes(int *source
, unsigned long long *offsets
,
248 int raid_disks
, int chunk_size
, int level
, int layout
,
249 int nwrites
, int *dest
,
250 unsigned long long start
, unsigned long long length
)
253 char *buf
= (char*)(((unsigned long)abuf
+511)&~511UL);
254 int cpos
= start
% chunk_size
; /* where in chunk we are up to */
256 int data_disks
= raid_disks
- (level
== 0 ? 0 : level
<=5 ? 1 : 2);
260 unsigned long long offset
;
262 len
= chunk_size
- cpos
;
263 if (len
> 8192) len
= 8192;
264 if (len
> length
) len
= length
;
265 /* len bytes to be moved from one device */
267 offset
= (start
/chunk_size
/data_disks
)*chunk_size
+ cpos
;
268 disk
= start
/chunk_size
% data_disks
;
269 disk
= geo_map(disk
, start
/chunk_size
/data_disks
,
270 raid_disks
, level
, layout
);
271 if (lseek64(source
[disk
], offsets
[disk
]+offset
, 0) < 0)
273 if (read(source
[disk
], buf
, len
) != len
)
275 for (i
=0; i
<nwrites
; i
++)
276 if (write(dest
[i
], buf
, len
) != len
)
281 while (cpos
>= chunk_size
) cpos
-= chunk_size
;
288 * A list of 'fds' of the active disks. Some may be '-1' for not-available.
289 * A geometry: raid_disks, chunk_size, level, layout
290 * An 'fd' to read from. It is already seeked to the right (Read) location.
291 * A start and length.
292 * The length must be a multiple of the stripe size.
294 * We build a full stripe in memory and then write it out.
295 * We assume that there are enough working devices.
297 int restore_stripes(int *dest
, unsigned long long *offsets
,
298 int raid_disks
, int chunk_size
, int level
, int layout
,
299 int source
, unsigned long long read_offset
,
300 unsigned long long start
, unsigned long long length
)
302 char *stripe_buf
= malloc(raid_disks
* chunk_size
);
303 char **stripes
= malloc(raid_disks
* sizeof(char*));
304 char **blocks
= malloc(raid_disks
* sizeof(char*));
305 char *zero
= malloc(chunk_size
);
308 int data_disks
= raid_disks
- (level
== 0 ? 0 : level
<=5 ? 1 : 2);
310 if (stripe_buf
== NULL
|| stripes
== NULL
|| blocks
== NULL
318 memset(zero
, 0, chunk_size
);
319 for (i
=0; i
<raid_disks
; i
++)
320 stripes
[i
] = stripe_buf
+ i
* chunk_size
;
322 int len
= data_disks
* chunk_size
;
323 unsigned long long offset
;
327 for (i
=0; i
< data_disks
; i
++) {
328 int disk
= geo_map(i
, start
/chunk_size
/data_disks
,
329 raid_disks
, level
, layout
);
330 if (lseek64(source
, read_offset
, 0) != read_offset
)
332 if (read(source
, stripes
[disk
], chunk_size
) != chunk_size
)
334 read_offset
+= chunk_size
;
336 /* We have the data, now do the parity */
337 offset
= (start
/chunk_size
/data_disks
) * chunk_size
;
341 disk
= geo_map(-1, start
/chunk_size
/data_disks
,
342 raid_disks
, level
, layout
);
343 for (i
= 0; i
< data_disks
; i
++)
344 blocks
[i
] = stripes
[(disk
+1+i
) % raid_disks
];
345 xor_blocks(stripes
[disk
], blocks
, data_disks
, chunk_size
);
348 disk
= geo_map(-1, start
/chunk_size
/data_disks
,
349 raid_disks
, level
, layout
);
350 qdisk
= geo_map(-2, start
/chunk_size
/data_disks
,
351 raid_disks
, level
, layout
);
352 if (is_ddf(layout
)) {
353 /* q over 'raid_disks' blocks, in device order.
354 * 'p' and 'q' get to be all zero
356 for (i
= 0; i
< raid_disks
; i
++)
357 if (i
== disk
|| i
== qdisk
)
360 blocks
[i
] = stripes
[i
];
361 qsyndrome(stripes
[disk
], stripes
[qdisk
],
362 blocks
, raid_disks
, chunk_size
);
364 /* for md' q is over 'data_disks' blocks,
365 * starting immediately after 'q'
367 for (i
= 0; i
< data_disks
; i
++)
368 blocks
[i
] = stripes
[(qdisk
+1+i
) % raid_disks
];
370 qsyndrome(stripes
[disk
], stripes
[qdisk
], blocks
,
371 data_disks
, chunk_size
);
375 for (i
=0; i
< raid_disks
; i
++)
377 if (lseek64(dest
[i
], offsets
[i
]+offset
, 0) < 0)
379 if (write(dest
[i
], stripes
[i
], chunk_size
) != chunk_size
)
390 int test_stripes(int *source
, unsigned long long *offsets
,
391 int raid_disks
, int chunk_size
, int level
, int layout
,
392 unsigned long long start
, unsigned long long length
)
394 /* ready the data and p (and q) blocks, and check we got them right */
395 char *stripe_buf
= malloc(raid_disks
* chunk_size
);
396 char **stripes
= malloc(raid_disks
* sizeof(char*));
397 char **blocks
= malloc(raid_disks
* sizeof(char*));
398 char *p
= malloc(chunk_size
);
399 char *q
= malloc(chunk_size
);
402 int data_disks
= raid_disks
- (level
== 5 ? 1: 2);
403 for ( i
= 0 ; i
< raid_disks
; i
++)
404 stripes
[i
] = stripe_buf
+ i
* chunk_size
;
409 for (i
= 0 ; i
< raid_disks
; i
++) {
410 lseek64(source
[i
], offsets
[i
]+start
, 0);
411 read(source
[i
], stripes
[i
], chunk_size
);
413 for (i
= 0 ; i
< data_disks
; i
++) {
414 int disk
= geo_map(i
, start
/chunk_size
, raid_disks
,
416 blocks
[i
] = stripes
[disk
];
417 printf("%d->%d\n", i
, disk
);
421 qsyndrome(p
, q
, blocks
, data_disks
, chunk_size
);
422 disk
= geo_map(-1, start
/chunk_size
, raid_disks
,
424 if (memcmp(p
, stripes
[disk
], chunk_size
) != 0) {
425 printf("P(%d) wrong at %llu\n", disk
,
428 disk
= geo_map(-2, start
/chunk_size
, raid_disks
,
430 if (memcmp(q
, stripes
[disk
], chunk_size
) != 0) {
431 printf("Q(%d) wrong at %llu\n", disk
,
436 length
-= chunk_size
;
442 unsigned long long getnum(char *str
, char **err
)
445 unsigned long long rv
= strtoull(str
, &e
, 10);
453 main(int argc
, char *argv
[])
455 /* save/restore file raid_disks chunk_size level layout start length devices...
461 unsigned long long *offsets
;
462 int raid_disks
, chunk_size
, level
, layout
;
463 unsigned long long start
, length
;
468 fprintf(stderr
, "Usage: test_stripe save/restore file raid_disks"
469 " chunk_size level layout start length devices...\n");
472 if (strcmp(argv
[1], "save")==0)
474 else if (strcmp(argv
[1], "restore") == 0)
476 else if (strcmp(argv
[1], "test") == 0)
479 fprintf(stderr
, "test_stripe: must give 'save' or 'restore'.\n");
484 raid_disks
= getnum(argv
[3], &err
);
485 chunk_size
= getnum(argv
[4], &err
);
486 level
= getnum(argv
[5], &err
);
487 layout
= getnum(argv
[6], &err
);
488 start
= getnum(argv
[7], &err
);
489 length
= getnum(argv
[8], &err
);
491 fprintf(stderr
, "test_stripe: Bad number: %s\n", err
);
494 if (argc
!= raid_disks
+ 9) {
495 fprintf(stderr
, "test_stripe: wrong number of devices: want %d found %d\n",
499 fds
= malloc(raid_disks
* sizeof(*fds
));
500 offsets
= malloc(raid_disks
* sizeof(*offsets
));
501 memset(offsets
, 0, raid_disks
* sizeof(*offsets
));
503 storefd
= open(file
, O_RDWR
);
506 fprintf(stderr
, "test_stripe: could not open %s.\n", file
);
509 for (i
=0; i
<raid_disks
; i
++) {
510 fds
[i
] = open(argv
[9+i
], O_RDWR
);
513 fprintf(stderr
,"test_stripe: cannot open %s.\n", argv
[9+i
]);
519 int rv
= save_stripes(fds
, offsets
,
520 raid_disks
, chunk_size
, level
, layout
,
525 "test_stripe: save_stripes returned %d\n", rv
);
528 } else if (save
== 2) {
529 int rv
= test_stripes(fds
, offsets
,
530 raid_disks
, chunk_size
, level
, layout
,
534 "test_stripe: test_stripes returned %d\n", rv
);
538 int rv
= restore_stripes(fds
, offsets
,
539 raid_disks
, chunk_size
, level
, layout
,
544 "test_stripe: restore_stripes returned %d\n",