]>
Commit | Line | Data |
---|---|---|
e86c9dd6 NB |
1 | /* |
2 | * mdadm - manage Linux "md" devices aka RAID arrays. | |
3 | * | |
4 | * Copyright (C) 2006 Neil Brown <neilb@suse.de> | |
5 | * | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or modify | |
8 | * it under the terms of the GNU General Public License as published by | |
9 | * the Free Software Foundation; either version 2 of the License, or | |
10 | * (at your option) any later version. | |
11 | * | |
12 | * This program is distributed in the hope that it will be useful, | |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | * GNU General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License | |
18 | * along with this program; if not, write to the Free Software | |
19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
20 | * | |
21 | * Author: Neil Brown | |
22 | * Email: <neilb@suse.de> | |
23 | */ | |
24 | ||
25 | #include "mdadm.h" | |
26 | ||
27 | /* To restripe, we read from old geometry to a buffer, and | |
28 | * read from buffer to new geometry. | |
29 | * When reading we don't worry about parity. When writing we do. | |
30 | * | |
31 | */ | |
32 | ||
33 | static int geo_map(int block, unsigned long long stripe, int raid_disks, int level, int layout) | |
34 | { | |
35 | /* On the given stripe, find which disk in the array with have | |
36 | * block numbered 'block'. | |
37 | */ | |
38 | int pd; | |
39 | ||
40 | switch(level*100 + layout) { | |
41 | case 000: | |
42 | case 400: | |
43 | /* raid 4 isn't messed around by parity blocks */ | |
44 | if (block == -1) | |
45 | return raid_disks-1; /* parity block */ | |
46 | return block; | |
47 | case 500 + ALGORITHM_LEFT_ASYMMETRIC: | |
48 | pd = (raid_disks-1) - stripe % raid_disks; | |
49 | if (block == -1) return pd; | |
50 | if (block >= pd) | |
51 | block++; | |
52 | return block; | |
53 | ||
54 | case 500 + ALGORITHM_RIGHT_ASYMMETRIC: | |
55 | pd = stripe % raid_disks; | |
56 | if (block == -1) return pd; | |
57 | if (block >= pd) | |
58 | block++; | |
59 | return block; | |
60 | ||
61 | case 500 + ALGORITHM_LEFT_SYMMETRIC: | |
62 | pd = (raid_disks - 1) - stripe % raid_disks; | |
63 | if (block == -1) return pd; | |
64 | return (pd + 1 + block) % raid_disks; | |
65 | ||
66 | case 500 + ALGORITHM_RIGHT_SYMMETRIC: | |
67 | pd = stripe % raid_disks; | |
68 | if (block == -1) return pd; | |
69 | return (pd + 1 + block) % raid_disks; | |
70 | ||
71 | case 600 + ALGORITHM_LEFT_ASYMMETRIC: | |
72 | pd = raid_disks - 1 - (stripe % raid_disks); | |
73 | if (block == -1) return pd; | |
74 | if (pd == raid_disks - 1) | |
75 | return block+1; | |
76 | if (block >= pd) | |
77 | return block+2; | |
78 | return block; | |
79 | ||
80 | case 600 + ALGORITHM_RIGHT_ASYMMETRIC: | |
81 | pd = stripe % raid_disks; | |
82 | if (block == -1) return pd; | |
83 | if (pd == raid_disks - 1) | |
84 | return block+1; | |
85 | if (block >= pd) | |
86 | return block+2; | |
87 | return block; | |
88 | ||
89 | case 600 + ALGORITHM_LEFT_SYMMETRIC: | |
90 | pd = raid_disks - 1 - (stripe % raid_disks); | |
91 | if (block == -1) return pd; | |
92 | return (pd + 2 + block) % raid_disks; | |
93 | ||
94 | case 600 + ALGORITHM_RIGHT_SYMMETRIC: | |
95 | pd = stripe % raid_disks; | |
96 | if (block == -1) return pd; | |
97 | return (pd + 2 + block) % raid_disks; | |
98 | } | |
99 | return -1; | |
100 | } | |
101 | ||
102 | ||
103 | static void xor_blocks(char *target, char **sources, int disks, int size) | |
104 | { | |
105 | int i, j; | |
106 | /* Amazingly inefficient... */ | |
107 | for (i=0; i<size; i++) { | |
108 | char c = 0; | |
109 | for (j=0 ; j<disks; j++) | |
110 | c ^= sources[j][i]; | |
111 | target[i] = c; | |
112 | } | |
113 | } | |
114 | ||
115 | /* Save data: | |
116 | * We are given: | |
117 | * A list of 'fds' of the active disks. For now we require all to be present. | |
118 | * A geomtry: raid_disks, chunk_size, level, layout | |
119 | * A list of 'fds' for mirrored targets. They are already seeked to | |
120 | * right (Write) location | |
121 | * A start and length | |
122 | */ | |
123 | ||
124 | int save_stripes(int *source, unsigned long long *offsets, | |
125 | int raid_disks, int chunk_size, int level, int layout, | |
126 | int nwrites, int *dest, | |
127 | unsigned long long start, unsigned long long length) | |
128 | { | |
129 | char buf[8192]; | |
130 | int cpos = start % chunk_size; /* where in chunk we are up to */ | |
131 | int len; | |
132 | int data_disks = raid_disks - (level == 0 ? 0 : level <=5 ? 1 : 2); | |
133 | int disk; | |
134 | ||
135 | while (length > 0) { | |
136 | unsigned long long offset; | |
137 | int i; | |
138 | len = chunk_size - cpos; | |
139 | if (len > sizeof(buf)) len = sizeof(buf); | |
140 | if (len > length) len = length; | |
141 | /* len bytes to be moved from one device */ | |
142 | ||
143 | offset = (start/chunk_size/data_disks)*chunk_size + cpos; | |
144 | disk = start/chunk_size % data_disks; | |
145 | disk = geo_map(disk, start/chunk_size/data_disks, | |
146 | raid_disks, level, layout); | |
147 | if (lseek64(source[disk], offsets[disk]+offset, 0) < 0) | |
148 | return -1; | |
149 | if (read(source[disk], buf, len) != len) | |
150 | return -1; | |
151 | for (i=0; i<nwrites; i++) | |
152 | if (write(dest[i], buf, len) != len) | |
153 | return -1; | |
154 | length -= len; | |
155 | start += len; | |
156 | cpos += len; | |
157 | while (cpos >= chunk_size) cpos -= chunk_size; | |
158 | } | |
159 | return 0; | |
160 | } | |
161 | ||
162 | /* Restore data: | |
163 | * We are given: | |
164 | * A list of 'fds' of the active disks. Some may be '-1' for not-available. | |
353632d9 | 165 | * A geometry: raid_disks, chunk_size, level, layout |
e86c9dd6 NB |
166 | * An 'fd' to read from. It is already seeked to the right (Read) location. |
167 | * A start and length. | |
168 | * The length must be a multiple of the stripe size. | |
169 | * | |
170 | * We build a full stripe in memory and then write it out. | |
171 | * We assume that there are enough working devices. | |
172 | */ | |
173 | int restore_stripes(int *dest, unsigned long long *offsets, | |
174 | int raid_disks, int chunk_size, int level, int layout, | |
353632d9 | 175 | int source, unsigned long long read_offset, |
e86c9dd6 NB |
176 | unsigned long long start, unsigned long long length) |
177 | { | |
178 | char *stripe_buf = malloc(raid_disks * chunk_size); | |
179 | char **stripes = malloc(raid_disks * sizeof(char*)); | |
180 | char **blocks = malloc(raid_disks * sizeof(char*)); | |
181 | int i; | |
182 | ||
183 | int data_disks = raid_disks - (level == 0 ? 0 : level <=5 ? 1 : 2); | |
184 | ||
185 | if (stripe_buf == NULL || stripes == NULL || blocks == NULL) { | |
186 | free(stripe_buf); | |
187 | free(stripes); | |
188 | free(blocks); | |
189 | return -2; | |
190 | } | |
191 | for (i=0; i<raid_disks; i++) | |
192 | stripes[i] = stripe_buf + i * chunk_size; | |
193 | while (length > 0) { | |
194 | int len = data_disks * chunk_size; | |
195 | unsigned long long offset; | |
196 | if (length < len) | |
197 | return -3; | |
198 | for (i=0; i < data_disks; i++) { | |
199 | int disk = geo_map(i, start/chunk_size/data_disks, | |
200 | raid_disks, level, layout); | |
201 | blocks[i] = stripes[disk]; | |
353632d9 NB |
202 | if (lseek64(source, read_offset, 0) != read_offset) |
203 | return -1; | |
e86c9dd6 NB |
204 | if (read(source, stripes[disk], chunk_size) != chunk_size) |
205 | return -1; | |
353632d9 | 206 | read_offset += chunk_size; |
e86c9dd6 NB |
207 | } |
208 | /* We have the data, now do the parity */ | |
209 | offset = (start/chunk_size/data_disks) * chunk_size; | |
210 | if (level >= 4) { | |
211 | int disk = geo_map(-1, start/chunk_size/data_disks, | |
212 | raid_disks, level, layout); | |
213 | xor_blocks(stripes[disk], blocks, data_disks, chunk_size); | |
214 | /* FIXME need to do raid6 Q as well */ | |
215 | } | |
216 | for (i=0; i < raid_disks ; i++) | |
217 | if (dest[i] >= 0) { | |
218 | if (lseek64(dest[i], offsets[i]+offset, 0) < 0) | |
219 | return -1; | |
220 | if (write(dest[i], stripes[i], chunk_size) != chunk_size) | |
221 | return -1; | |
222 | } | |
223 | length -= len; | |
224 | start += len; | |
225 | } | |
226 | return 0; | |
227 | } | |
228 | ||
229 | #ifdef MAIN | |
230 | ||
231 | unsigned long long getnum(char *str, char **err) | |
232 | { | |
233 | char *e; | |
234 | unsigned long long rv = strtoull(str, &e, 10); | |
235 | if (e==str || *e) { | |
236 | *err = str; | |
237 | return 0; | |
238 | } | |
239 | return rv; | |
240 | } | |
241 | ||
242 | main(int argc, char *argv[]) | |
243 | { | |
244 | /* save/restore file raid_disks chunk_size level layout start length devices... | |
245 | */ | |
246 | int save; | |
247 | int *fds; | |
248 | char *file; | |
249 | int storefd; | |
250 | unsigned long long *offsets; | |
251 | int raid_disks, chunk_size, level, layout; | |
252 | unsigned long long start, length; | |
253 | int i; | |
254 | ||
255 | char *err = NULL; | |
256 | if (argc < 10) { | |
257 | fprintf(stderr, "Usage: test_stripe save/restore file raid_disks" | |
258 | " chunk_size level layout start length devices...\n"); | |
259 | exit(1); | |
260 | } | |
261 | if (strcmp(argv[1], "save")==0) | |
262 | save = 1; | |
263 | else if (strcmp(argv[1], "restore") == 0) | |
264 | save = 0; | |
265 | else { | |
266 | fprintf(stderr, "test_stripe: must give 'save' or 'restore'.\n"); | |
267 | exit(2); | |
268 | } | |
269 | ||
270 | file = argv[2]; | |
271 | raid_disks = getnum(argv[3], &err); | |
272 | chunk_size = getnum(argv[4], &err); | |
273 | level = getnum(argv[5], &err); | |
274 | layout = getnum(argv[6], &err); | |
275 | start = getnum(argv[7], &err); | |
276 | length = getnum(argv[8], &err); | |
277 | if (err) { | |
278 | fprintf(stderr, "test_stripe: Bad number: %s\n", err); | |
279 | exit(2); | |
280 | } | |
281 | if (argc != raid_disks + 9) { | |
282 | fprintf(stderr, "test_stripe: wrong number of devices: want %d found %d\n", | |
283 | raid_disks, argc-9); | |
284 | exit(2); | |
285 | } | |
286 | fds = malloc(raid_disks * sizeof(*fds)); | |
287 | offsets = malloc(raid_disks * sizeof(*offsets)); | |
288 | memset(offsets, 0, raid_disks * sizeof(*offsets)); | |
289 | ||
290 | storefd = open(file, O_RDWR); | |
291 | if (storefd < 0) { | |
292 | perror(file); | |
293 | fprintf(stderr, "test_stripe: could not open %s.\n", file); | |
294 | exit(3); | |
295 | } | |
296 | for (i=0; i<raid_disks; i++) { | |
297 | fds[i] = open(argv[9+i], O_RDWR); | |
298 | if (fds[i] < 0) { | |
299 | perror(argv[9+i]); | |
300 | fprintf(stderr,"test_stripe: cannot open %s.\n", argv[9+i]); | |
301 | exit(3); | |
302 | } | |
303 | } | |
304 | ||
305 | if (save) { | |
306 | int rv = save_stripes(fds, offsets, | |
307 | raid_disks, chunk_size, level, layout, | |
308 | 1, &storefd, | |
309 | start, length); | |
310 | if (rv != 0) { | |
311 | fprintf(stderr, "test_stripe: save_stripes returned %d\n", rv); | |
312 | exit(1); | |
313 | } | |
314 | } else { | |
315 | int rv = restore_stripes(fds, offsets, | |
316 | raid_disks, chunk_size, level, layout, | |
353632d9 | 317 | storefd, 0ULL, |
e86c9dd6 NB |
318 | start, length); |
319 | if (rv != 0) { | |
320 | fprintf(stderr, "test_stripe: restore_stripes returned %d\n", rv); | |
321 | exit(1); | |
322 | } | |
323 | } | |
324 | exit(0); | |
325 | } | |
326 | ||
327 | #endif /* MAIN */ |