2 * mdadm - manage Linux "md" devices aka RAID arrays.
4 * Copyright (C) 2006 Neil Brown <neilb@suse.de>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 * Email: <neilb@suse.de>
27 /* To restripe, we read from old geometry to a buffer, and
28 * read from buffer to new geometry.
29 * When reading we don't worry about parity. When writing we do.
33 static int geo_map(int block
, unsigned long long stripe
, int raid_disks
, int level
, int layout
)
35 /* On the given stripe, find which disk in the array will have
36 * block numbered 'block'.
37 * '-1' means the parity block.
38 * '-2' means the Q syndrome.
42 switch(level
*100 + layout
) {
45 /* raid 4 isn't messed around by parity blocks */
47 return raid_disks
-1; /* parity block */
49 case 500 + ALGORITHM_LEFT_ASYMMETRIC
:
50 pd
= (raid_disks
-1) - stripe
% raid_disks
;
51 if (block
== -1) return pd
;
56 case 500 + ALGORITHM_RIGHT_ASYMMETRIC
:
57 pd
= stripe
% raid_disks
;
58 if (block
== -1) return pd
;
63 case 500 + ALGORITHM_LEFT_SYMMETRIC
:
64 pd
= (raid_disks
- 1) - stripe
% raid_disks
;
65 if (block
== -1) return pd
;
66 return (pd
+ 1 + block
) % raid_disks
;
68 case 500 + ALGORITHM_RIGHT_SYMMETRIC
:
69 pd
= stripe
% raid_disks
;
70 if (block
== -1) return pd
;
71 return (pd
+ 1 + block
) % raid_disks
;
73 case 600 + ALGORITHM_LEFT_ASYMMETRIC
:
74 pd
= raid_disks
- 1 - (stripe
% raid_disks
);
75 if (block
== -1) return pd
;
76 if (block
== -2) return (pd
+1) % raid_disks
;
77 if (pd
== raid_disks
- 1)
83 case 600 + ALGORITHM_RIGHT_ASYMMETRIC
:
84 pd
= stripe
% raid_disks
;
85 if (block
== -1) return pd
;
86 if (block
== -2) return (pd
+1) % raid_disks
;
87 if (pd
== raid_disks
- 1)
93 case 600 + ALGORITHM_LEFT_SYMMETRIC
:
94 pd
= raid_disks
- 1 - (stripe
% raid_disks
);
95 if (block
== -1) return pd
;
96 if (block
== -2) return (pd
+1) % raid_disks
;
97 return (pd
+ 2 + block
) % raid_disks
;
99 case 600 + ALGORITHM_RIGHT_SYMMETRIC
:
100 pd
= stripe
% raid_disks
;
101 if (block
== -1) return pd
;
102 if (block
== -2) return (pd
+1) % raid_disks
;
103 return (pd
+ 2 + block
) % raid_disks
;
109 static void xor_blocks(char *target
, char **sources
, int disks
, int size
)
112 /* Amazingly inefficient... */
113 for (i
=0; i
<size
; i
++) {
115 for (j
=0 ; j
<disks
; j
++)
121 static void qsyndrome(char *p
, char *q
, char **sources
, int disks
, int size
)
124 char wq0
, wp0
, wd0
, w10
, w20
;
125 for ( d
= 0; d
< size
; d
++) {
126 wq0
= wp0
= sources
[disks
-1][d
];
127 for ( z
= disks
-2 ; z
>= 0 ; z
-- ) {
130 w20
= (wq0
&0x80) ? 0xff : 0x00;
131 w10
= (wq0
<< 1) & 0xff;
143 * A list of 'fds' of the active disks. For now we require all to be present.
144 * A geometry: raid_disks, chunk_size, level, layout
145 * A list of 'fds' for mirrored targets. They are already seeked to
146 * right (Write) location
150 int save_stripes(int *source
, unsigned long long *offsets
,
151 int raid_disks
, int chunk_size
, int level
, int layout
,
152 int nwrites
, int *dest
,
153 unsigned long long start
, unsigned long long length
)
156 int cpos
= start
% chunk_size
; /* where in chunk we are up to */
158 int data_disks
= raid_disks
- (level
== 0 ? 0 : level
<=5 ? 1 : 2);
162 unsigned long long offset
;
164 len
= chunk_size
- cpos
;
165 if (len
> sizeof(buf
)) len
= sizeof(buf
);
166 if (len
> length
) len
= length
;
167 /* len bytes to be moved from one device */
169 offset
= (start
/chunk_size
/data_disks
)*chunk_size
+ cpos
;
170 disk
= start
/chunk_size
% data_disks
;
171 disk
= geo_map(disk
, start
/chunk_size
/data_disks
,
172 raid_disks
, level
, layout
);
173 if (lseek64(source
[disk
], offsets
[disk
]+offset
, 0) < 0)
175 if (read(source
[disk
], buf
, len
) != len
)
177 for (i
=0; i
<nwrites
; i
++)
178 if (write(dest
[i
], buf
, len
) != len
)
183 while (cpos
>= chunk_size
) cpos
-= chunk_size
;
190 * A list of 'fds' of the active disks. Some may be '-1' for not-available.
191 * A geometry: raid_disks, chunk_size, level, layout
192 * An 'fd' to read from. It is already seeked to the right (Read) location.
193 * A start and length.
194 * The length must be a multiple of the stripe size.
196 * We build a full stripe in memory and then write it out.
197 * We assume that there are enough working devices.
199 int restore_stripes(int *dest
, unsigned long long *offsets
,
200 int raid_disks
, int chunk_size
, int level
, int layout
,
201 int source
, unsigned long long read_offset
,
202 unsigned long long start
, unsigned long long length
)
204 char *stripe_buf
= malloc(raid_disks
* chunk_size
);
205 char **stripes
= malloc(raid_disks
* sizeof(char*));
206 char **blocks
= malloc(raid_disks
* sizeof(char*));
209 int data_disks
= raid_disks
- (level
== 0 ? 0 : level
<=5 ? 1 : 2);
211 if (stripe_buf
== NULL
|| stripes
== NULL
|| blocks
== NULL
) {
217 for (i
=0; i
<raid_disks
; i
++)
218 stripes
[i
] = stripe_buf
+ i
* chunk_size
;
220 int len
= data_disks
* chunk_size
;
221 unsigned long long offset
;
225 for (i
=0; i
< data_disks
; i
++) {
226 int disk
= geo_map(i
, start
/chunk_size
/data_disks
,
227 raid_disks
, level
, layout
);
228 blocks
[i
] = stripes
[disk
];
229 if (lseek64(source
, read_offset
, 0) != read_offset
)
231 if (read(source
, stripes
[disk
], chunk_size
) != chunk_size
)
233 read_offset
+= chunk_size
;
235 /* We have the data, now do the parity */
236 offset
= (start
/chunk_size
/data_disks
) * chunk_size
;
240 disk
= geo_map(-1, start
/chunk_size
/data_disks
,
241 raid_disks
, level
, layout
);
242 xor_blocks(stripes
[disk
], blocks
, data_disks
, chunk_size
);
245 disk
= geo_map(-1, start
/chunk_size
/data_disks
,
246 raid_disks
, level
, layout
);
247 qdisk
= geo_map(-2, start
/chunk_size
/data_disks
,
248 raid_disks
, level
, layout
);
250 qsyndrome(stripes
[disk
], stripes
[qdisk
], blocks
,
251 data_disks
, chunk_size
);
254 for (i
=0; i
< raid_disks
; i
++)
256 if (lseek64(dest
[i
], offsets
[i
]+offset
, 0) < 0)
258 if (write(dest
[i
], stripes
[i
], chunk_size
) != chunk_size
)
269 int test_stripes(int *source
, unsigned long long *offsets
,
270 int raid_disks
, int chunk_size
, int level
, int layout
,
271 unsigned long long start
, unsigned long long length
)
273 /* ready the data and p (and q) blocks, and check we got them right */
274 char *stripe_buf
= malloc(raid_disks
* chunk_size
);
275 char **stripes
= malloc(raid_disks
* sizeof(char*));
276 char **blocks
= malloc(raid_disks
* sizeof(char*));
277 char *p
= malloc(chunk_size
);
278 char *q
= malloc(chunk_size
);
281 int data_disks
= raid_disks
- (level
== 5 ? 1: 2);
282 for ( i
= 0 ; i
< raid_disks
; i
++)
283 stripes
[i
] = stripe_buf
+ i
* chunk_size
;
288 for (i
= 0 ; i
< raid_disks
; i
++) {
289 lseek64(source
[i
], offsets
[i
]+start
, 0);
290 read(source
[i
], stripes
[i
], chunk_size
);
292 for (i
= 0 ; i
< data_disks
; i
++) {
293 int disk
= geo_map(i
, start
/chunk_size
, raid_disks
,
295 blocks
[i
] = stripes
[disk
];
296 printf("%d->%d\n", i
, disk
);
300 qsyndrome(p
, q
, blocks
, data_disks
, chunk_size
);
301 disk
= geo_map(-1, start
/chunk_size
, raid_disks
,
303 if (memcmp(p
, stripes
[disk
], chunk_size
) != 0) {
304 printf("P(%d) wrong at %llu\n", disk
,
307 disk
= geo_map(-2, start
/chunk_size
, raid_disks
,
309 if (memcmp(q
, stripes
[disk
], chunk_size
) != 0) {
310 printf("Q(%d) wrong at %llu\n", disk
,
315 length
-= chunk_size
;
321 unsigned long long getnum(char *str
, char **err
)
324 unsigned long long rv
= strtoull(str
, &e
, 10);
332 main(int argc
, char *argv
[])
334 /* save/restore file raid_disks chunk_size level layout start length devices...
340 unsigned long long *offsets
;
341 int raid_disks
, chunk_size
, level
, layout
;
342 unsigned long long start
, length
;
347 fprintf(stderr
, "Usage: test_stripe save/restore file raid_disks"
348 " chunk_size level layout start length devices...\n");
351 if (strcmp(argv
[1], "save")==0)
353 else if (strcmp(argv
[1], "restore") == 0)
355 else if (strcmp(argv
[1], "test") == 0)
358 fprintf(stderr
, "test_stripe: must give 'save' or 'restore'.\n");
363 raid_disks
= getnum(argv
[3], &err
);
364 chunk_size
= getnum(argv
[4], &err
);
365 level
= getnum(argv
[5], &err
);
366 layout
= getnum(argv
[6], &err
);
367 start
= getnum(argv
[7], &err
);
368 length
= getnum(argv
[8], &err
);
370 fprintf(stderr
, "test_stripe: Bad number: %s\n", err
);
373 if (argc
!= raid_disks
+ 9) {
374 fprintf(stderr
, "test_stripe: wrong number of devices: want %d found %d\n",
378 fds
= malloc(raid_disks
* sizeof(*fds
));
379 offsets
= malloc(raid_disks
* sizeof(*offsets
));
380 memset(offsets
, 0, raid_disks
* sizeof(*offsets
));
382 storefd
= open(file
, O_RDWR
);
385 fprintf(stderr
, "test_stripe: could not open %s.\n", file
);
388 for (i
=0; i
<raid_disks
; i
++) {
389 fds
[i
] = open(argv
[9+i
], O_RDWR
);
392 fprintf(stderr
,"test_stripe: cannot open %s.\n", argv
[9+i
]);
398 int rv
= save_stripes(fds
, offsets
,
399 raid_disks
, chunk_size
, level
, layout
,
404 "test_stripe: save_stripes returned %d\n", rv
);
407 } else if (save
== 2) {
408 int rv
= test_stripes(fds
, offsets
,
409 raid_disks
, chunk_size
, level
, layout
,
413 "test_stripe: test_stripes returned %d\n", rv
);
417 int rv
= restore_stripes(fds
, offsets
,
418 raid_disks
, chunk_size
, level
, layout
,
423 "test_stripe: restore_stripes returned %d\n",