2 * mdadm - manage Linux "md" devices aka RAID arrays.
4 * Copyright (C) 2006 Neil Brown <neilb@suse.de>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 * Email: <neilb@suse.de>
27 /* To restripe, we read from old geometry to a buffer, and
28 * read from buffer to new geometry.
29 * When reading we don't worry about parity. When writing we do.
33 static int geo_map(int block
, unsigned long long stripe
, int raid_disks
, int level
, int layout
)
35 /* On the given stripe, find which disk in the array will have
36 * block numbered 'block'.
37 * '-1' means the parity block.
38 * '-2' means the Q syndrome.
42 switch(level
*100 + layout
) {
45 /* raid 4 isn't messed around by parity blocks */
47 return raid_disks
-1; /* parity block */
49 case 500 + ALGORITHM_LEFT_ASYMMETRIC
:
50 pd
= (raid_disks
-1) - stripe
% raid_disks
;
51 if (block
== -1) return pd
;
56 case 500 + ALGORITHM_RIGHT_ASYMMETRIC
:
57 pd
= stripe
% raid_disks
;
58 if (block
== -1) return pd
;
63 case 500 + ALGORITHM_LEFT_SYMMETRIC
:
64 pd
= (raid_disks
- 1) - stripe
% raid_disks
;
65 if (block
== -1) return pd
;
66 return (pd
+ 1 + block
) % raid_disks
;
68 case 500 + ALGORITHM_RIGHT_SYMMETRIC
:
69 pd
= stripe
% raid_disks
;
70 if (block
== -1) return pd
;
71 return (pd
+ 1 + block
) % raid_disks
;
73 case 600 + ALGORITHM_LEFT_ASYMMETRIC
:
74 pd
= raid_disks
- 1 - (stripe
% raid_disks
);
75 if (block
== -1) return pd
;
76 if (block
== -2) return (pd
+1) % raid_disks
;
77 if (pd
== raid_disks
- 1)
83 case 600 + ALGORITHM_RIGHT_ASYMMETRIC
:
84 pd
= stripe
% raid_disks
;
85 if (block
== -1) return pd
;
86 if (block
== -2) return (pd
+1) % raid_disks
;
87 if (pd
== raid_disks
- 1)
93 case 600 + ALGORITHM_LEFT_SYMMETRIC
:
94 pd
= raid_disks
- 1 - (stripe
% raid_disks
);
95 if (block
== -1) return pd
;
96 if (block
== -2) return (pd
+1) % raid_disks
;
97 return (pd
+ 2 + block
) % raid_disks
;
99 case 600 + ALGORITHM_RIGHT_SYMMETRIC
:
100 pd
= stripe
% raid_disks
;
101 if (block
== -1) return pd
;
102 if (block
== -2) return (pd
+1) % raid_disks
;
103 return (pd
+ 2 + block
) % raid_disks
;
109 static void xor_blocks(char *target
, char **sources
, int disks
, int size
)
112 /* Amazingly inefficient... */
113 for (i
=0; i
<size
; i
++) {
115 for (j
=0 ; j
<disks
; j
++)
121 static void qsyndrome(char *p
, char *q
, char **sources
, int disks
, int size
)
124 char wq0
, wp0
, wd0
, w10
, w20
;
125 for ( d
= 0; d
< size
; d
++) {
126 wq0
= wp0
= sources
[disks
-1][d
];
127 for ( z
= disks
-2 ; z
>= 0 ; z
-- ) {
130 w20
= (wq0
&0x80) ? 0xff : 0x00;
131 w10
= (wq0
<< 1) & 0xff;
143 * A list of 'fds' of the active disks. For now we require all to be present.
144 * A geometry: raid_disks, chunk_size, level, layout
145 * A list of 'fds' for mirrored targets. They are already seeked to
146 * right (Write) location
150 int save_stripes(int *source
, unsigned long long *offsets
,
151 int raid_disks
, int chunk_size
, int level
, int layout
,
152 int nwrites
, int *dest
,
153 unsigned long long start
, unsigned long long length
)
156 char *buf
= (char*)(((unsigned long)abuf
+511)&~511UL);
157 int cpos
= start
% chunk_size
; /* where in chunk we are up to */
159 int data_disks
= raid_disks
- (level
== 0 ? 0 : level
<=5 ? 1 : 2);
163 unsigned long long offset
;
165 len
= chunk_size
- cpos
;
166 if (len
> 8192) len
= 8192;
167 if (len
> length
) len
= length
;
168 /* len bytes to be moved from one device */
170 offset
= (start
/chunk_size
/data_disks
)*chunk_size
+ cpos
;
171 disk
= start
/chunk_size
% data_disks
;
172 disk
= geo_map(disk
, start
/chunk_size
/data_disks
,
173 raid_disks
, level
, layout
);
174 if (lseek64(source
[disk
], offsets
[disk
]+offset
, 0) < 0)
176 if (read(source
[disk
], buf
, len
) != len
)
178 for (i
=0; i
<nwrites
; i
++)
179 if (write(dest
[i
], buf
, len
) != len
)
184 while (cpos
>= chunk_size
) cpos
-= chunk_size
;
191 * A list of 'fds' of the active disks. Some may be '-1' for not-available.
192 * A geometry: raid_disks, chunk_size, level, layout
193 * An 'fd' to read from. It is already seeked to the right (Read) location.
194 * A start and length.
195 * The length must be a multiple of the stripe size.
197 * We build a full stripe in memory and then write it out.
198 * We assume that there are enough working devices.
200 int restore_stripes(int *dest
, unsigned long long *offsets
,
201 int raid_disks
, int chunk_size
, int level
, int layout
,
202 int source
, unsigned long long read_offset
,
203 unsigned long long start
, unsigned long long length
)
205 char *stripe_buf
= malloc(raid_disks
* chunk_size
);
206 char **stripes
= malloc(raid_disks
* sizeof(char*));
207 char **blocks
= malloc(raid_disks
* sizeof(char*));
210 int data_disks
= raid_disks
- (level
== 0 ? 0 : level
<=5 ? 1 : 2);
212 if (stripe_buf
== NULL
|| stripes
== NULL
|| blocks
== NULL
) {
218 for (i
=0; i
<raid_disks
; i
++)
219 stripes
[i
] = stripe_buf
+ i
* chunk_size
;
221 int len
= data_disks
* chunk_size
;
222 unsigned long long offset
;
226 for (i
=0; i
< data_disks
; i
++) {
227 int disk
= geo_map(i
, start
/chunk_size
/data_disks
,
228 raid_disks
, level
, layout
);
229 blocks
[i
] = stripes
[disk
];
230 if (lseek64(source
, read_offset
, 0) != read_offset
)
232 if (read(source
, stripes
[disk
], chunk_size
) != chunk_size
)
234 read_offset
+= chunk_size
;
236 /* We have the data, now do the parity */
237 offset
= (start
/chunk_size
/data_disks
) * chunk_size
;
241 disk
= geo_map(-1, start
/chunk_size
/data_disks
,
242 raid_disks
, level
, layout
);
243 xor_blocks(stripes
[disk
], blocks
, data_disks
, chunk_size
);
246 disk
= geo_map(-1, start
/chunk_size
/data_disks
,
247 raid_disks
, level
, layout
);
248 qdisk
= geo_map(-2, start
/chunk_size
/data_disks
,
249 raid_disks
, level
, layout
);
251 qsyndrome(stripes
[disk
], stripes
[qdisk
], blocks
,
252 data_disks
, chunk_size
);
255 for (i
=0; i
< raid_disks
; i
++)
257 if (lseek64(dest
[i
], offsets
[i
]+offset
, 0) < 0)
259 if (write(dest
[i
], stripes
[i
], chunk_size
) != chunk_size
)
270 int test_stripes(int *source
, unsigned long long *offsets
,
271 int raid_disks
, int chunk_size
, int level
, int layout
,
272 unsigned long long start
, unsigned long long length
)
274 /* ready the data and p (and q) blocks, and check we got them right */
275 char *stripe_buf
= malloc(raid_disks
* chunk_size
);
276 char **stripes
= malloc(raid_disks
* sizeof(char*));
277 char **blocks
= malloc(raid_disks
* sizeof(char*));
278 char *p
= malloc(chunk_size
);
279 char *q
= malloc(chunk_size
);
282 int data_disks
= raid_disks
- (level
== 5 ? 1: 2);
283 for ( i
= 0 ; i
< raid_disks
; i
++)
284 stripes
[i
] = stripe_buf
+ i
* chunk_size
;
289 for (i
= 0 ; i
< raid_disks
; i
++) {
290 lseek64(source
[i
], offsets
[i
]+start
, 0);
291 read(source
[i
], stripes
[i
], chunk_size
);
293 for (i
= 0 ; i
< data_disks
; i
++) {
294 int disk
= geo_map(i
, start
/chunk_size
, raid_disks
,
296 blocks
[i
] = stripes
[disk
];
297 printf("%d->%d\n", i
, disk
);
301 qsyndrome(p
, q
, blocks
, data_disks
, chunk_size
);
302 disk
= geo_map(-1, start
/chunk_size
, raid_disks
,
304 if (memcmp(p
, stripes
[disk
], chunk_size
) != 0) {
305 printf("P(%d) wrong at %llu\n", disk
,
308 disk
= geo_map(-2, start
/chunk_size
, raid_disks
,
310 if (memcmp(q
, stripes
[disk
], chunk_size
) != 0) {
311 printf("Q(%d) wrong at %llu\n", disk
,
316 length
-= chunk_size
;
322 unsigned long long getnum(char *str
, char **err
)
325 unsigned long long rv
= strtoull(str
, &e
, 10);
333 main(int argc
, char *argv
[])
335 /* save/restore file raid_disks chunk_size level layout start length devices...
341 unsigned long long *offsets
;
342 int raid_disks
, chunk_size
, level
, layout
;
343 unsigned long long start
, length
;
348 fprintf(stderr
, "Usage: test_stripe save/restore file raid_disks"
349 " chunk_size level layout start length devices...\n");
352 if (strcmp(argv
[1], "save")==0)
354 else if (strcmp(argv
[1], "restore") == 0)
356 else if (strcmp(argv
[1], "test") == 0)
359 fprintf(stderr
, "test_stripe: must give 'save' or 'restore'.\n");
364 raid_disks
= getnum(argv
[3], &err
);
365 chunk_size
= getnum(argv
[4], &err
);
366 level
= getnum(argv
[5], &err
);
367 layout
= getnum(argv
[6], &err
);
368 start
= getnum(argv
[7], &err
);
369 length
= getnum(argv
[8], &err
);
371 fprintf(stderr
, "test_stripe: Bad number: %s\n", err
);
374 if (argc
!= raid_disks
+ 9) {
375 fprintf(stderr
, "test_stripe: wrong number of devices: want %d found %d\n",
379 fds
= malloc(raid_disks
* sizeof(*fds
));
380 offsets
= malloc(raid_disks
* sizeof(*offsets
));
381 memset(offsets
, 0, raid_disks
* sizeof(*offsets
));
383 storefd
= open(file
, O_RDWR
);
386 fprintf(stderr
, "test_stripe: could not open %s.\n", file
);
389 for (i
=0; i
<raid_disks
; i
++) {
390 fds
[i
] = open(argv
[9+i
], O_RDWR
);
393 fprintf(stderr
,"test_stripe: cannot open %s.\n", argv
[9+i
]);
399 int rv
= save_stripes(fds
, offsets
,
400 raid_disks
, chunk_size
, level
, layout
,
405 "test_stripe: save_stripes returned %d\n", rv
);
408 } else if (save
== 2) {
409 int rv
= test_stripes(fds
, offsets
,
410 raid_disks
, chunk_size
, level
, layout
,
414 "test_stripe: test_stripes returned %d\n", rv
);
418 int rv
= restore_stripes(fds
, offsets
,
419 raid_disks
, chunk_size
, level
, layout
,
424 "test_stripe: restore_stripes returned %d\n",