]> git.ipfire.org Git - thirdparty/mdadm.git/blame - super-ddf.c
tests/10ddf-fail-twice: New unit test
[thirdparty/mdadm.git] / super-ddf.c
CommitLineData
a322f70c
DW
1/*
2 * mdadm - manage Linux "md" devices aka RAID arrays.
3 *
e736b623 4 * Copyright (C) 2006-2009 Neil Brown <neilb@suse.de>
a322f70c
DW
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Neil Brown
22 * Email: <neil@brown.name>
23 *
24 * Specifications for DDF takes from Common RAID DDF Specification Revision 1.2
25 * (July 28 2006). Reused by permission of SNIA.
26 */
27
28#define HAVE_STDINT_H 1
29#include "mdadm.h"
549e9569 30#include "mdmon.h"
a322f70c
DW
31#include "sha1.h"
32#include <values.h>
33
a322f70c
DW
34/* a non-official T10 name for creation GUIDs */
35static char T10[] = "Linux-MD";
36
37/* DDF timestamps are 1980 based, so we need to add
38 * second-in-decade-of-seventies to convert to linux timestamps.
39 * 10 years with 2 leap years.
40 */
41#define DECADE (3600*24*(365*10+2))
42unsigned long crc32(
43 unsigned long crc,
44 const unsigned char *buf,
45 unsigned len);
46
bedbf68a 47#define DDF_NOTFOUND (~0U)
48#define DDF_CONTAINER (DDF_NOTFOUND-1)
49
a322f70c
DW
50/* The DDF metadata handling.
51 * DDF metadata lives at the end of the device.
52 * The last 512 byte block provides an 'anchor' which is used to locate
53 * the rest of the metadata which usually lives immediately behind the anchor.
54 *
55 * Note:
56 * - all multibyte numeric fields are bigendian.
57 * - all strings are space padded.
58 *
59 */
60
4d1bdc18 61typedef struct __be16 {
62 __u16 _v16;
63} be16;
64#define be16_eq(x, y) ((x)._v16 == (y)._v16)
a8173e43 65#define be16_and(x, y) ((x)._v16 & (y)._v16)
66#define be16_or(x, y) ((x)._v16 | (y)._v16)
67#define be16_clear(x, y) ((x)._v16 &= ~(y)._v16)
68#define be16_set(x, y) ((x)._v16 |= (y)._v16)
4d1bdc18 69
70typedef struct __be32 {
71 __u32 _v32;
72} be32;
73#define be32_eq(x, y) ((x)._v32 == (y)._v32)
74
75typedef struct __be64 {
76 __u64 _v64;
77} be64;
78#define be64_eq(x, y) ((x)._v64 == (y)._v64)
79
80#define be16_to_cpu(be) __be16_to_cpu((be)._v16)
81static inline be16 cpu_to_be16(__u16 x)
82{
83 be16 be = { ._v16 = __cpu_to_be16(x) };
84 return be;
85}
86
87#define be32_to_cpu(be) __be32_to_cpu((be)._v32)
88static inline be32 cpu_to_be32(__u32 x)
89{
90 be32 be = { ._v32 = __cpu_to_be32(x) };
91 return be;
92}
93
94#define be64_to_cpu(be) __be64_to_cpu((be)._v64)
95static inline be64 cpu_to_be64(__u64 x)
96{
97 be64 be = { ._v64 = __cpu_to_be64(x) };
98 return be;
99}
100
a322f70c
DW
101/* Primary Raid Level (PRL) */
102#define DDF_RAID0 0x00
103#define DDF_RAID1 0x01
104#define DDF_RAID3 0x03
105#define DDF_RAID4 0x04
106#define DDF_RAID5 0x05
107#define DDF_RAID1E 0x11
108#define DDF_JBOD 0x0f
109#define DDF_CONCAT 0x1f
110#define DDF_RAID5E 0x15
111#define DDF_RAID5EE 0x25
59e36268 112#define DDF_RAID6 0x06
a322f70c
DW
113
114/* Raid Level Qualifier (RLQ) */
115#define DDF_RAID0_SIMPLE 0x00
116#define DDF_RAID1_SIMPLE 0x00 /* just 2 devices in this plex */
117#define DDF_RAID1_MULTI 0x01 /* exactly 3 devices in this plex */
118#define DDF_RAID3_0 0x00 /* parity in first extent */
119#define DDF_RAID3_N 0x01 /* parity in last extent */
120#define DDF_RAID4_0 0x00 /* parity in first extent */
121#define DDF_RAID4_N 0x01 /* parity in last extent */
122/* these apply to raid5e and raid5ee as well */
123#define DDF_RAID5_0_RESTART 0x00 /* same as 'right asymmetric' - layout 1 */
59e36268 124#define DDF_RAID6_0_RESTART 0x01 /* raid6 different from raid5 here!!! */
a322f70c
DW
125#define DDF_RAID5_N_RESTART 0x02 /* same as 'left asymmetric' - layout 0 */
126#define DDF_RAID5_N_CONTINUE 0x03 /* same as 'left symmetric' - layout 2 */
127
128#define DDF_RAID1E_ADJACENT 0x00 /* raid10 nearcopies==2 */
129#define DDF_RAID1E_OFFSET 0x01 /* raid10 offsetcopies==2 */
130
131/* Secondary RAID Level (SRL) */
132#define DDF_2STRIPED 0x00 /* This is weirder than RAID0 !! */
133#define DDF_2MIRRORED 0x01
134#define DDF_2CONCAT 0x02
135#define DDF_2SPANNED 0x03 /* This is also weird - be careful */
136
137/* Magic numbers */
60931cf9 138#define DDF_HEADER_MAGIC cpu_to_be32(0xDE11DE11)
139#define DDF_CONTROLLER_MAGIC cpu_to_be32(0xAD111111)
140#define DDF_PHYS_RECORDS_MAGIC cpu_to_be32(0x22222222)
141#define DDF_PHYS_DATA_MAGIC cpu_to_be32(0x33333333)
142#define DDF_VIRT_RECORDS_MAGIC cpu_to_be32(0xDDDDDDDD)
143#define DDF_VD_CONF_MAGIC cpu_to_be32(0xEEEEEEEE)
144#define DDF_SPARE_ASSIGN_MAGIC cpu_to_be32(0x55555555)
145#define DDF_VU_CONF_MAGIC cpu_to_be32(0x88888888)
146#define DDF_VENDOR_LOG_MAGIC cpu_to_be32(0x01dBEEF0)
147#define DDF_BBM_LOG_MAGIC cpu_to_be32(0xABADB10C)
a322f70c
DW
148
149#define DDF_GUID_LEN 24
59e36268
NB
150#define DDF_REVISION_0 "01.00.00"
151#define DDF_REVISION_2 "01.02.00"
a322f70c
DW
152
153struct ddf_header {
60931cf9 154 be32 magic; /* DDF_HEADER_MAGIC */
155 be32 crc;
a322f70c 156 char guid[DDF_GUID_LEN];
59e36268 157 char revision[8]; /* 01.02.00 */
60931cf9 158 be32 seq; /* starts at '1' */
159 be32 timestamp;
a322f70c
DW
160 __u8 openflag;
161 __u8 foreignflag;
162 __u8 enforcegroups;
163 __u8 pad0; /* 0xff */
164 __u8 pad1[12]; /* 12 * 0xff */
165 /* 64 bytes so far */
166 __u8 header_ext[32]; /* reserved: fill with 0xff */
9d0c6b70 167 be64 primary_lba;
168 be64 secondary_lba;
a322f70c
DW
169 __u8 type;
170 __u8 pad2[3]; /* 0xff */
60931cf9 171 be32 workspace_len; /* sectors for vendor space -
a322f70c 172 * at least 32768(sectors) */
9d0c6b70 173 be64 workspace_lba;
a8173e43 174 be16 max_pd_entries; /* one of 15, 63, 255, 1023, 4095 */
175 be16 max_vd_entries; /* 2^(4,6,8,10,12)-1 : i.e. as above */
176 be16 max_partitions; /* i.e. max num of configuration
a322f70c 177 record entries per disk */
a8173e43 178 be16 config_record_len; /* 1 +ROUNDUP(max_primary_element_entries
a322f70c 179 *12/512) */
a8173e43 180 be16 max_primary_element_entries; /* 16, 64, 256, 1024, or 4096 */
a322f70c
DW
181 __u8 pad3[54]; /* 0xff */
182 /* 192 bytes so far */
60931cf9 183 be32 controller_section_offset;
184 be32 controller_section_length;
185 be32 phys_section_offset;
186 be32 phys_section_length;
187 be32 virt_section_offset;
188 be32 virt_section_length;
189 be32 config_section_offset;
190 be32 config_section_length;
191 be32 data_section_offset;
192 be32 data_section_length;
193 be32 bbm_section_offset;
194 be32 bbm_section_length;
195 be32 diag_space_offset;
196 be32 diag_space_length;
197 be32 vendor_offset;
198 be32 vendor_length;
a322f70c
DW
199 /* 256 bytes so far */
200 __u8 pad4[256]; /* 0xff */
201};
202
203/* type field */
204#define DDF_HEADER_ANCHOR 0x00
205#define DDF_HEADER_PRIMARY 0x01
206#define DDF_HEADER_SECONDARY 0x02
207
208/* The content of the 'controller section' - global scope */
209struct ddf_controller_data {
60931cf9 210 be32 magic; /* DDF_CONTROLLER_MAGIC */
211 be32 crc;
a322f70c
DW
212 char guid[DDF_GUID_LEN];
213 struct controller_type {
a8173e43 214 be16 vendor_id;
215 be16 device_id;
216 be16 sub_vendor_id;
217 be16 sub_device_id;
a322f70c
DW
218 } type;
219 char product_id[16];
220 __u8 pad[8]; /* 0xff */
221 __u8 vendor_data[448];
222};
223
224/* The content of phys_section - global scope */
225struct phys_disk {
60931cf9 226 be32 magic; /* DDF_PHYS_RECORDS_MAGIC */
227 be32 crc;
a8173e43 228 be16 used_pdes;
229 be16 max_pdes;
a322f70c
DW
230 __u8 pad[52];
231 struct phys_disk_entry {
232 char guid[DDF_GUID_LEN];
60931cf9 233 be32 refnum;
a8173e43 234 be16 type;
235 be16 state;
9d0c6b70 236 be64 config_size; /* DDF structures must be after here */
a322f70c
DW
237 char path[18]; /* another horrible structure really */
238 __u8 pad[6];
239 } entries[0];
240};
241
242/* phys_disk_entry.type is a bitmap - bigendian remember */
243#define DDF_Forced_PD_GUID 1
244#define DDF_Active_in_VD 2
88c164f4 245#define DDF_Global_Spare 4 /* VD_CONF records are ignored */
a322f70c
DW
246#define DDF_Spare 8 /* overrides Global_spare */
247#define DDF_Foreign 16
248#define DDF_Legacy 32 /* no DDF on this device */
249
250#define DDF_Interface_mask 0xf00
251#define DDF_Interface_SCSI 0x100
252#define DDF_Interface_SAS 0x200
253#define DDF_Interface_SATA 0x300
254#define DDF_Interface_FC 0x400
255
256/* phys_disk_entry.state is a bigendian bitmap */
257#define DDF_Online 1
258#define DDF_Failed 2 /* overrides 1,4,8 */
259#define DDF_Rebuilding 4
260#define DDF_Transition 8
261#define DDF_SMART 16
262#define DDF_ReadErrors 32
263#define DDF_Missing 64
264
265/* The content of the virt_section global scope */
266struct virtual_disk {
60931cf9 267 be32 magic; /* DDF_VIRT_RECORDS_MAGIC */
268 be32 crc;
a8173e43 269 be16 populated_vdes;
270 be16 max_vdes;
a322f70c
DW
271 __u8 pad[52];
272 struct virtual_entry {
273 char guid[DDF_GUID_LEN];
a8173e43 274 be16 unit;
a322f70c 275 __u16 pad0; /* 0xffff */
a8173e43 276 be16 guid_crc;
277 be16 type;
a322f70c
DW
278 __u8 state;
279 __u8 init_state;
280 __u8 pad1[14];
281 char name[16];
282 } entries[0];
283};
284
285/* virtual_entry.type is a bitmap - bigendian */
286#define DDF_Shared 1
287#define DDF_Enforce_Groups 2
288#define DDF_Unicode 4
289#define DDF_Owner_Valid 8
290
291/* virtual_entry.state is a bigendian bitmap */
292#define DDF_state_mask 0x7
293#define DDF_state_optimal 0x0
294#define DDF_state_degraded 0x1
295#define DDF_state_deleted 0x2
296#define DDF_state_missing 0x3
297#define DDF_state_failed 0x4
7a7cc504 298#define DDF_state_part_optimal 0x5
a322f70c
DW
299
300#define DDF_state_morphing 0x8
301#define DDF_state_inconsistent 0x10
302
303/* virtual_entry.init_state is a bigendian bitmap */
304#define DDF_initstate_mask 0x03
305#define DDF_init_not 0x00
7a7cc504
NB
306#define DDF_init_quick 0x01 /* initialisation is progress.
307 * i.e. 'state_inconsistent' */
a322f70c
DW
308#define DDF_init_full 0x02
309
310#define DDF_access_mask 0xc0
311#define DDF_access_rw 0x00
312#define DDF_access_ro 0x80
313#define DDF_access_blocked 0xc0
314
315/* The content of the config_section - local scope
316 * It has multiple records each config_record_len sectors
317 * They can be vd_config or spare_assign
318 */
319
320struct vd_config {
60931cf9 321 be32 magic; /* DDF_VD_CONF_MAGIC */
322 be32 crc;
a322f70c 323 char guid[DDF_GUID_LEN];
60931cf9 324 be32 timestamp;
325 be32 seqnum;
a322f70c 326 __u8 pad0[24];
a8173e43 327 be16 prim_elmnt_count;
a322f70c
DW
328 __u8 chunk_shift; /* 0 == 512, 1==1024 etc */
329 __u8 prl;
330 __u8 rlq;
331 __u8 sec_elmnt_count;
332 __u8 sec_elmnt_seq;
333 __u8 srl;
9d0c6b70 334 be64 blocks; /* blocks per component could be different
598f0d58
NB
335 * on different component devices...(only
336 * for concat I hope) */
9d0c6b70 337 be64 array_blocks; /* blocks in array */
a322f70c 338 __u8 pad1[8];
60931cf9 339 be32 spare_refs[8];
a322f70c
DW
340 __u8 cache_pol[8];
341 __u8 bg_rate;
342 __u8 pad2[3];
343 __u8 pad3[52];
344 __u8 pad4[192];
345 __u8 v0[32]; /* reserved- 0xff */
346 __u8 v1[32]; /* reserved- 0xff */
347 __u8 v2[16]; /* reserved- 0xff */
348 __u8 v3[16]; /* reserved- 0xff */
349 __u8 vendor[32];
60931cf9 350 be32 phys_refnum[0]; /* refnum of each disk in sequence */
a322f70c
DW
351 /*__u64 lba_offset[0]; LBA offset in each phys. Note extents in a
352 bvd are always the same size */
353};
9d0c6b70 354#define LBA_OFFSET(ddf, vd) ((be64 *) &(vd)->phys_refnum[(ddf)->mppe])
a322f70c
DW
355
356/* vd_config.cache_pol[7] is a bitmap */
357#define DDF_cache_writeback 1 /* else writethrough */
358#define DDF_cache_wadaptive 2 /* only applies if writeback */
359#define DDF_cache_readahead 4
360#define DDF_cache_radaptive 8 /* only if doing read-ahead */
361#define DDF_cache_ifnobatt 16 /* even to write cache if battery is poor */
362#define DDF_cache_wallowed 32 /* enable write caching */
363#define DDF_cache_rallowed 64 /* enable read caching */
364
365struct spare_assign {
60931cf9 366 be32 magic; /* DDF_SPARE_ASSIGN_MAGIC */
367 be32 crc;
368 be32 timestamp;
a322f70c
DW
369 __u8 reserved[7];
370 __u8 type;
a8173e43 371 be16 populated; /* SAEs used */
372 be16 max; /* max SAEs */
a322f70c
DW
373 __u8 pad[8];
374 struct spare_assign_entry {
375 char guid[DDF_GUID_LEN];
a8173e43 376 be16 secondary_element;
a322f70c
DW
377 __u8 pad[6];
378 } spare_ents[0];
379};
380/* spare_assign.type is a bitmap */
381#define DDF_spare_dedicated 0x1 /* else global */
382#define DDF_spare_revertible 0x2 /* else committable */
383#define DDF_spare_active 0x4 /* else not active */
384#define DDF_spare_affinity 0x8 /* enclosure affinity */
385
386/* The data_section contents - local scope */
387struct disk_data {
60931cf9 388 be32 magic; /* DDF_PHYS_DATA_MAGIC */
389 be32 crc;
a322f70c 390 char guid[DDF_GUID_LEN];
60931cf9 391 be32 refnum; /* crc of some magic drive data ... */
a322f70c
DW
392 __u8 forced_ref; /* set when above was not result of magic */
393 __u8 forced_guid; /* set if guid was forced rather than magic */
394 __u8 vendor[32];
395 __u8 pad[442];
396};
397
398/* bbm_section content */
399struct bad_block_log {
60931cf9 400 be32 magic;
401 be32 crc;
a8173e43 402 be16 entry_count;
60931cf9 403 be32 spare_count;
a322f70c 404 __u8 pad[10];
9d0c6b70 405 be64 first_spare;
a322f70c 406 struct mapped_block {
9d0c6b70 407 be64 defective_start;
60931cf9 408 be32 replacement_start;
a8173e43 409 be16 remap_count;
a322f70c
DW
410 __u8 pad[2];
411 } entries[0];
412};
413
414/* Struct for internally holding ddf structures */
415/* The DDF structure stored on each device is potentially
416 * quite different, as some data is global and some is local.
417 * The global data is:
418 * - ddf header
419 * - controller_data
420 * - Physical disk records
421 * - Virtual disk records
422 * The local data is:
423 * - Configuration records
424 * - Physical Disk data section
425 * ( and Bad block and vendor which I don't care about yet).
426 *
427 * The local data is parsed into separate lists as it is read
428 * and reconstructed for writing. This means that we only need
429 * to make config changes once and they are automatically
430 * propagated to all devices.
431 * Note that the ddf_super has space of the conf and disk data
432 * for this disk and also for a list of all such data.
433 * The list is only used for the superblock that is being
434 * built in Create or Assemble to describe the whole array.
435 */
436struct ddf_super {
6416d527 437 struct ddf_header anchor, primary, secondary;
a322f70c 438 struct ddf_controller_data controller;
6416d527 439 struct ddf_header *active;
a322f70c
DW
440 struct phys_disk *phys;
441 struct virtual_disk *virt;
442 int pdsize, vdsize;
f21e18ca 443 unsigned int max_part, mppe, conf_rec_len;
d2ca6449 444 int currentdev;
18a2f463 445 int updates_pending;
a322f70c 446 struct vcl {
6416d527
NB
447 union {
448 char space[512];
449 struct {
450 struct vcl *next;
f21e18ca 451 unsigned int vcnum; /* index into ->virt */
8ec5d685 452 struct vd_config **other_bvds;
6416d527
NB
453 __u64 *block_sizes; /* NULL if all the same */
454 };
455 };
a322f70c 456 struct vd_config conf;
d2ca6449 457 } *conflist, *currentconf;
a322f70c 458 struct dl {
6416d527
NB
459 union {
460 char space[512];
461 struct {
462 struct dl *next;
463 int major, minor;
464 char *devname;
465 int fd;
466 unsigned long long size; /* sectors */
9d0c6b70 467 be64 primary_lba; /* sectors */
468 be64 secondary_lba; /* sectors */
469 be64 workspace_lba; /* sectors */
6416d527
NB
470 int pdnum; /* index in ->phys */
471 struct spare_assign *spare;
8592f29d
N
472 void *mdupdate; /* hold metadata update */
473
474 /* These fields used by auto-layout */
475 int raiddisk; /* slot to fill in autolayout */
476 __u64 esize;
6416d527
NB
477 };
478 };
a322f70c 479 struct disk_data disk;
b2280677 480 struct vcl *vlist[0]; /* max_part in size */
2cc2983d 481 } *dlist, *add_list;
a322f70c
DW
482};
483
484#ifndef offsetof
485#define offsetof(t,f) ((size_t)&(((t*)0)->f))
486#endif
487
7d5a7ff3 488#if DEBUG
fb9d0acb 489static int all_ff(const char *guid);
7d5a7ff3 490static void pr_state(struct ddf_super *ddf, const char *msg)
491{
492 unsigned int i;
493 dprintf("%s/%s: ", __func__, msg);
a8173e43 494 for (i = 0; i < be16_to_cpu(ddf->active->max_vd_entries); i++) {
7d5a7ff3 495 if (all_ff(ddf->virt->entries[i].guid))
496 continue;
497 dprintf("%u(s=%02x i=%02x) ", i,
498 ddf->virt->entries[i].state,
499 ddf->virt->entries[i].init_state);
500 }
501 dprintf("\n");
502}
503#else
504static void pr_state(const struct ddf_super *ddf, const char *msg) {}
505#endif
506
35c3606d 507static void _ddf_set_updates_pending(struct ddf_super *ddf, const char *func)
508{
509 ddf->updates_pending = 1;
60931cf9 510 ddf->active->seq = cpu_to_be32((be32_to_cpu(ddf->active->seq)+1));
35c3606d 511 pr_state(ddf, func);
512}
513
514#define ddf_set_updates_pending(x) _ddf_set_updates_pending((x), __func__)
7d5a7ff3 515
fcc22180 516static unsigned int get_pd_index_from_refnum(const struct vcl *vc,
60931cf9 517 be32 refnum, unsigned int nmax,
fcc22180 518 const struct vd_config **bvd,
519 unsigned int *idx);
520
60931cf9 521static be32 calc_crc(void *buf, int len)
a322f70c
DW
522{
523 /* crcs are always at the same place as in the ddf_header */
524 struct ddf_header *ddf = buf;
60931cf9 525 be32 oldcrc = ddf->crc;
a322f70c 526 __u32 newcrc;
60931cf9 527 ddf->crc = cpu_to_be32(0xffffffff);
a322f70c
DW
528
529 newcrc = crc32(0, buf, len);
530 ddf->crc = oldcrc;
4abe6b70
N
531 /* The crc is store (like everything) bigendian, so convert
532 * here for simplicity
533 */
60931cf9 534 return cpu_to_be32(newcrc);
a322f70c
DW
535}
536
a3163bf0 537#define DDF_INVALID_LEVEL 0xff
538#define DDF_NO_SECONDARY 0xff
539static int err_bad_md_layout(const mdu_array_info_t *array)
540{
541 pr_err("RAID%d layout %x with %d disks is unsupported for DDF\n",
542 array->level, array->layout, array->raid_disks);
2aba583f 543 return -1;
a3163bf0 544}
545
546static int layout_md2ddf(const mdu_array_info_t *array,
547 struct vd_config *conf)
548{
a8173e43 549 be16 prim_elmnt_count = cpu_to_be16(array->raid_disks);
a3163bf0 550 __u8 prl = DDF_INVALID_LEVEL, rlq = 0;
551 __u8 sec_elmnt_count = 1;
552 __u8 srl = DDF_NO_SECONDARY;
553
554 switch (array->level) {
555 case LEVEL_LINEAR:
556 prl = DDF_CONCAT;
557 break;
558 case 0:
559 rlq = DDF_RAID0_SIMPLE;
560 prl = DDF_RAID0;
561 break;
562 case 1:
563 switch (array->raid_disks) {
564 case 2:
565 rlq = DDF_RAID1_SIMPLE;
566 break;
567 case 3:
568 rlq = DDF_RAID1_MULTI;
569 break;
570 default:
571 return err_bad_md_layout(array);
572 }
573 prl = DDF_RAID1;
574 break;
575 case 4:
576 if (array->layout != 0)
577 return err_bad_md_layout(array);
578 rlq = DDF_RAID4_N;
579 prl = DDF_RAID4;
580 break;
581 case 5:
582 switch (array->layout) {
583 case ALGORITHM_LEFT_ASYMMETRIC:
584 rlq = DDF_RAID5_N_RESTART;
585 break;
586 case ALGORITHM_RIGHT_ASYMMETRIC:
587 rlq = DDF_RAID5_0_RESTART;
588 break;
589 case ALGORITHM_LEFT_SYMMETRIC:
590 rlq = DDF_RAID5_N_CONTINUE;
591 break;
592 case ALGORITHM_RIGHT_SYMMETRIC:
593 /* not mentioned in standard */
594 default:
595 return err_bad_md_layout(array);
596 }
597 prl = DDF_RAID5;
598 break;
599 case 6:
600 switch (array->layout) {
601 case ALGORITHM_ROTATING_N_RESTART:
602 rlq = DDF_RAID5_N_RESTART;
603 break;
604 case ALGORITHM_ROTATING_ZERO_RESTART:
605 rlq = DDF_RAID6_0_RESTART;
606 break;
607 case ALGORITHM_ROTATING_N_CONTINUE:
608 rlq = DDF_RAID5_N_CONTINUE;
609 break;
610 default:
611 return err_bad_md_layout(array);
612 }
613 prl = DDF_RAID6;
614 break;
615 case 10:
616 if (array->raid_disks % 2 == 0 && array->layout == 0x102) {
617 rlq = DDF_RAID1_SIMPLE;
a8173e43 618 prim_elmnt_count = cpu_to_be16(2);
a3163bf0 619 sec_elmnt_count = array->raid_disks / 2;
620 } else if (array->raid_disks % 3 == 0
621 && array->layout == 0x103) {
622 rlq = DDF_RAID1_MULTI;
a8173e43 623 prim_elmnt_count = cpu_to_be16(3);
a3163bf0 624 sec_elmnt_count = array->raid_disks / 3;
625 } else
626 return err_bad_md_layout(array);
627 srl = DDF_2SPANNED;
628 prl = DDF_RAID1;
629 break;
630 default:
631 return err_bad_md_layout(array);
632 }
633 conf->prl = prl;
634 conf->prim_elmnt_count = prim_elmnt_count;
635 conf->rlq = rlq;
636 conf->srl = srl;
637 conf->sec_elmnt_count = sec_elmnt_count;
638 return 0;
639}
640
8a2848a7 641static int err_bad_ddf_layout(const struct vd_config *conf)
642{
643 pr_err("DDF RAID %u qualifier %u with %u disks is unsupported\n",
a8173e43 644 conf->prl, conf->rlq, be16_to_cpu(conf->prim_elmnt_count));
8a2848a7 645 return -1;
646}
647
648static int layout_ddf2md(const struct vd_config *conf,
649 mdu_array_info_t *array)
650{
651 int level = LEVEL_UNSUPPORTED;
652 int layout = 0;
a8173e43 653 int raiddisks = be16_to_cpu(conf->prim_elmnt_count);
8a2848a7 654
655 if (conf->sec_elmnt_count > 1) {
656 /* see also check_secondary() */
657 if (conf->prl != DDF_RAID1 ||
658 (conf->srl != DDF_2STRIPED && conf->srl != DDF_2SPANNED)) {
659 pr_err("Unsupported secondary RAID level %u/%u\n",
660 conf->prl, conf->srl);
661 return -1;
662 }
663 if (raiddisks == 2 && conf->rlq == DDF_RAID1_SIMPLE)
664 layout = 0x102;
665 else if (raiddisks == 3 && conf->rlq == DDF_RAID1_MULTI)
666 layout = 0x103;
667 else
668 return err_bad_ddf_layout(conf);
669 raiddisks *= conf->sec_elmnt_count;
670 level = 10;
671 goto good;
672 }
673
674 switch (conf->prl) {
675 case DDF_CONCAT:
676 level = LEVEL_LINEAR;
677 break;
678 case DDF_RAID0:
679 if (conf->rlq != DDF_RAID0_SIMPLE)
680 return err_bad_ddf_layout(conf);
681 level = 0;
682 break;
683 case DDF_RAID1:
684 if (!((conf->rlq == DDF_RAID1_SIMPLE && raiddisks == 2) ||
685 (conf->rlq == DDF_RAID1_MULTI && raiddisks == 3)))
686 return err_bad_ddf_layout(conf);
687 level = 1;
688 break;
689 case DDF_RAID4:
690 if (conf->rlq != DDF_RAID4_N)
691 return err_bad_ddf_layout(conf);
692 level = 4;
693 break;
694 case DDF_RAID5:
695 switch (conf->rlq) {
696 case DDF_RAID5_N_RESTART:
697 layout = ALGORITHM_LEFT_ASYMMETRIC;
698 break;
699 case DDF_RAID5_0_RESTART:
700 layout = ALGORITHM_RIGHT_ASYMMETRIC;
701 break;
702 case DDF_RAID5_N_CONTINUE:
703 layout = ALGORITHM_LEFT_SYMMETRIC;
704 break;
705 default:
706 return err_bad_ddf_layout(conf);
707 }
708 level = 5;
709 break;
710 case DDF_RAID6:
711 switch (conf->rlq) {
712 case DDF_RAID5_N_RESTART:
713 layout = ALGORITHM_ROTATING_N_RESTART;
714 break;
715 case DDF_RAID6_0_RESTART:
716 layout = ALGORITHM_ROTATING_ZERO_RESTART;
717 break;
718 case DDF_RAID5_N_CONTINUE:
719 layout = ALGORITHM_ROTATING_N_CONTINUE;
720 break;
721 default:
722 return err_bad_ddf_layout(conf);
723 }
724 level = 6;
725 break;
726 default:
727 return err_bad_ddf_layout(conf);
728 };
729
730good:
731 array->level = level;
732 array->layout = layout;
733 array->raid_disks = raiddisks;
734 return 0;
735}
736
a322f70c
DW
737static int load_ddf_header(int fd, unsigned long long lba,
738 unsigned long long size,
739 int type,
740 struct ddf_header *hdr, struct ddf_header *anchor)
741{
742 /* read a ddf header (primary or secondary) from fd/lba
743 * and check that it is consistent with anchor
744 * Need to check:
745 * magic, crc, guid, rev, and LBA's header_type, and
746 * everything after header_type must be the same
747 */
748 if (lba >= size-1)
749 return 0;
750
751 if (lseek64(fd, lba<<9, 0) < 0)
752 return 0;
753
754 if (read(fd, hdr, 512) != 512)
755 return 0;
756
0e5fa862
MW
757 if (!be32_eq(hdr->magic, DDF_HEADER_MAGIC)) {
758 pr_err("%s: bad header magic\n", __func__);
a322f70c 759 return 0;
0e5fa862
MW
760 }
761 if (!be32_eq(calc_crc(hdr, 512), hdr->crc)) {
762 pr_err("%s: bad CRC\n", __func__);
a322f70c 763 return 0;
0e5fa862 764 }
a322f70c
DW
765 if (memcmp(anchor->guid, hdr->guid, DDF_GUID_LEN) != 0 ||
766 memcmp(anchor->revision, hdr->revision, 8) != 0 ||
9d0c6b70 767 !be64_eq(anchor->primary_lba, hdr->primary_lba) ||
768 !be64_eq(anchor->secondary_lba, hdr->secondary_lba) ||
a322f70c
DW
769 hdr->type != type ||
770 memcmp(anchor->pad2, hdr->pad2, 512 -
0e5fa862
MW
771 offsetof(struct ddf_header, pad2)) != 0) {
772 pr_err("%s: header mismatch\n", __func__);
a322f70c 773 return 0;
0e5fa862 774 }
a322f70c
DW
775
776 /* Looks good enough to me... */
777 return 1;
778}
779
780static void *load_section(int fd, struct ddf_super *super, void *buf,
60931cf9 781 be32 offset_be, be32 len_be, int check)
a322f70c 782{
60931cf9 783 unsigned long long offset = be32_to_cpu(offset_be);
784 unsigned long long len = be32_to_cpu(len_be);
a322f70c
DW
785 int dofree = (buf == NULL);
786
787 if (check)
788 if (len != 2 && len != 8 && len != 32
789 && len != 128 && len != 512)
790 return NULL;
791
792 if (len > 1024)
793 return NULL;
794 if (buf) {
795 /* All pre-allocated sections are a single block */
796 if (len != 1)
797 return NULL;
3d2c4fc7
DW
798 } else if (posix_memalign(&buf, 512, len<<9) != 0)
799 buf = NULL;
6416d527 800
a322f70c
DW
801 if (!buf)
802 return NULL;
803
804 if (super->active->type == 1)
9d0c6b70 805 offset += be64_to_cpu(super->active->primary_lba);
a322f70c 806 else
9d0c6b70 807 offset += be64_to_cpu(super->active->secondary_lba);
a322f70c 808
f21e18ca 809 if ((unsigned long long)lseek64(fd, offset<<9, 0) != (offset<<9)) {
a322f70c
DW
810 if (dofree)
811 free(buf);
812 return NULL;
813 }
f21e18ca 814 if ((unsigned long long)read(fd, buf, len<<9) != (len<<9)) {
a322f70c
DW
815 if (dofree)
816 free(buf);
817 return NULL;
818 }
819 return buf;
820}
821
822static int load_ddf_headers(int fd, struct ddf_super *super, char *devname)
823{
824 unsigned long long dsize;
825
826 get_dev_size(fd, NULL, &dsize);
827
828 if (lseek64(fd, dsize-512, 0) < 0) {
829 if (devname)
e7b84f9d
N
830 pr_err("Cannot seek to anchor block on %s: %s\n",
831 devname, strerror(errno));
a322f70c
DW
832 return 1;
833 }
834 if (read(fd, &super->anchor, 512) != 512) {
835 if (devname)
e7b84f9d
N
836 pr_err("Cannot read anchor block on %s: %s\n",
837 devname, strerror(errno));
a322f70c
DW
838 return 1;
839 }
60931cf9 840 if (!be32_eq(super->anchor.magic, DDF_HEADER_MAGIC)) {
a322f70c 841 if (devname)
e7b84f9d 842 pr_err("no DDF anchor found on %s\n",
a322f70c
DW
843 devname);
844 return 2;
845 }
60931cf9 846 if (!be32_eq(calc_crc(&super->anchor, 512), super->anchor.crc)) {
a322f70c 847 if (devname)
e7b84f9d 848 pr_err("bad CRC on anchor on %s\n",
a322f70c
DW
849 devname);
850 return 2;
851 }
59e36268
NB
852 if (memcmp(super->anchor.revision, DDF_REVISION_0, 8) != 0 &&
853 memcmp(super->anchor.revision, DDF_REVISION_2, 8) != 0) {
a322f70c 854 if (devname)
e7b84f9d 855 pr_err("can only support super revision"
59e36268
NB
856 " %.8s and earlier, not %.8s on %s\n",
857 DDF_REVISION_2, super->anchor.revision,devname);
a322f70c
DW
858 return 2;
859 }
dbeb699a 860 super->active = NULL;
9d0c6b70 861 if (load_ddf_header(fd, be64_to_cpu(super->anchor.primary_lba),
a322f70c
DW
862 dsize >> 9, 1,
863 &super->primary, &super->anchor) == 0) {
864 if (devname)
e7b84f9d
N
865 pr_err("Failed to load primary DDF header "
866 "on %s\n", devname);
dbeb699a 867 } else
868 super->active = &super->primary;
60931cf9 869
9d0c6b70 870 if (load_ddf_header(fd, be64_to_cpu(super->anchor.secondary_lba),
a322f70c
DW
871 dsize >> 9, 2,
872 &super->secondary, &super->anchor)) {
3eff7c1d 873 if (super->active == NULL
60931cf9 874 || (be32_to_cpu(super->primary.seq)
875 < be32_to_cpu(super->secondary.seq) &&
3eff7c1d 876 !super->secondary.openflag)
60931cf9 877 || (be32_to_cpu(super->primary.seq)
878 == be32_to_cpu(super->secondary.seq) &&
a322f70c
DW
879 super->primary.openflag && !super->secondary.openflag)
880 )
881 super->active = &super->secondary;
dbeb699a 882 } else if (devname)
883 pr_err("Failed to load secondary DDF header on %s\n",
884 devname);
885 if (super->active == NULL)
886 return 2;
a322f70c
DW
887 return 0;
888}
889
890static int load_ddf_global(int fd, struct ddf_super *super, char *devname)
891{
892 void *ok;
893 ok = load_section(fd, super, &super->controller,
894 super->active->controller_section_offset,
895 super->active->controller_section_length,
896 0);
897 super->phys = load_section(fd, super, NULL,
898 super->active->phys_section_offset,
899 super->active->phys_section_length,
900 1);
60931cf9 901 super->pdsize = be32_to_cpu(super->active->phys_section_length) * 512;
a322f70c
DW
902
903 super->virt = load_section(fd, super, NULL,
904 super->active->virt_section_offset,
905 super->active->virt_section_length,
906 1);
60931cf9 907 super->vdsize = be32_to_cpu(super->active->virt_section_length) * 512;
a322f70c
DW
908 if (!ok ||
909 !super->phys ||
910 !super->virt) {
911 free(super->phys);
912 free(super->virt);
a2349791
NB
913 super->phys = NULL;
914 super->virt = NULL;
a322f70c
DW
915 return 2;
916 }
917 super->conflist = NULL;
918 super->dlist = NULL;
8c3b8c2c 919
a8173e43 920 super->max_part = be16_to_cpu(super->active->max_partitions);
921 super->mppe = be16_to_cpu(super->active->max_primary_element_entries);
922 super->conf_rec_len = be16_to_cpu(super->active->config_record_len);
a322f70c
DW
923 return 0;
924}
925
3c48f7be 926#define DDF_UNUSED_BVD 0xff
927static int alloc_other_bvds(const struct ddf_super *ddf, struct vcl *vcl)
928{
929 unsigned int n_vds = vcl->conf.sec_elmnt_count - 1;
930 unsigned int i, vdsize;
931 void *p;
932 if (n_vds == 0) {
933 vcl->other_bvds = NULL;
934 return 0;
935 }
936 vdsize = ddf->conf_rec_len * 512;
937 if (posix_memalign(&p, 512, n_vds *
938 (vdsize + sizeof(struct vd_config *))) != 0)
939 return -1;
940 vcl->other_bvds = (struct vd_config **) (p + n_vds * vdsize);
941 for (i = 0; i < n_vds; i++) {
942 vcl->other_bvds[i] = p + i * vdsize;
943 memset(vcl->other_bvds[i], 0, vdsize);
944 vcl->other_bvds[i]->sec_elmnt_seq = DDF_UNUSED_BVD;
945 }
946 return 0;
947}
948
3dc821b0 949static void add_other_bvd(struct vcl *vcl, struct vd_config *vd,
950 unsigned int len)
951{
952 int i;
953 for (i = 0; i < vcl->conf.sec_elmnt_count-1; i++)
3c48f7be 954 if (vcl->other_bvds[i]->sec_elmnt_seq == vd->sec_elmnt_seq)
3dc821b0 955 break;
956
957 if (i < vcl->conf.sec_elmnt_count-1) {
60931cf9 958 if (be32_to_cpu(vd->seqnum) <=
959 be32_to_cpu(vcl->other_bvds[i]->seqnum))
3dc821b0 960 return;
961 } else {
962 for (i = 0; i < vcl->conf.sec_elmnt_count-1; i++)
3c48f7be 963 if (vcl->other_bvds[i]->sec_elmnt_seq == DDF_UNUSED_BVD)
3dc821b0 964 break;
965 if (i == vcl->conf.sec_elmnt_count-1) {
966 pr_err("no space for sec level config %u, count is %u\n",
967 vd->sec_elmnt_seq, vcl->conf.sec_elmnt_count);
968 return;
969 }
3dc821b0 970 }
971 memcpy(vcl->other_bvds[i], vd, len);
972}
973
a322f70c
DW
974static int load_ddf_local(int fd, struct ddf_super *super,
975 char *devname, int keep)
976{
977 struct dl *dl;
978 struct stat stb;
979 char *conf;
f21e18ca
N
980 unsigned int i;
981 unsigned int confsec;
b2280677 982 int vnum;
a8173e43 983 unsigned int max_virt_disks = be16_to_cpu
984 (super->active->max_vd_entries);
d2ca6449 985 unsigned long long dsize;
a322f70c
DW
986
987 /* First the local disk info */
3d2c4fc7 988 if (posix_memalign((void**)&dl, 512,
6416d527 989 sizeof(*dl) +
3d2c4fc7 990 (super->max_part) * sizeof(dl->vlist[0])) != 0) {
e7b84f9d 991 pr_err("%s could not allocate disk info buffer\n",
3d2c4fc7
DW
992 __func__);
993 return 1;
994 }
a322f70c
DW
995
996 load_section(fd, super, &dl->disk,
997 super->active->data_section_offset,
998 super->active->data_section_length,
999 0);
503975b9 1000 dl->devname = devname ? xstrdup(devname) : NULL;
598f0d58 1001
a322f70c
DW
1002 fstat(fd, &stb);
1003 dl->major = major(stb.st_rdev);
1004 dl->minor = minor(stb.st_rdev);
1005 dl->next = super->dlist;
1006 dl->fd = keep ? fd : -1;
d2ca6449
NB
1007
1008 dl->size = 0;
1009 if (get_dev_size(fd, devname, &dsize))
1010 dl->size = dsize >> 9;
097bcf00 1011 /* If the disks have different sizes, the LBAs will differ
1012 * between phys disks.
1013 * At this point here, the values in super->active must be valid
1014 * for this phys disk. */
1015 dl->primary_lba = super->active->primary_lba;
1016 dl->secondary_lba = super->active->secondary_lba;
1017 dl->workspace_lba = super->active->workspace_lba;
b2280677 1018 dl->spare = NULL;
f21e18ca 1019 for (i = 0 ; i < super->max_part ; i++)
a322f70c
DW
1020 dl->vlist[i] = NULL;
1021 super->dlist = dl;
59e36268 1022 dl->pdnum = -1;
a8173e43 1023 for (i = 0; i < be16_to_cpu(super->active->max_pd_entries); i++)
5575e7d9
NB
1024 if (memcmp(super->phys->entries[i].guid,
1025 dl->disk.guid, DDF_GUID_LEN) == 0)
1026 dl->pdnum = i;
1027
a322f70c
DW
1028 /* Now the config list. */
1029 /* 'conf' is an array of config entries, some of which are
1030 * probably invalid. Those which are good need to be copied into
1031 * the conflist
1032 */
a322f70c
DW
1033
1034 conf = load_section(fd, super, NULL,
1035 super->active->config_section_offset,
1036 super->active->config_section_length,
1037 0);
1038
b2280677 1039 vnum = 0;
e223334f 1040 for (confsec = 0;
60931cf9 1041 confsec < be32_to_cpu(super->active->config_section_length);
e223334f 1042 confsec += super->conf_rec_len) {
a322f70c 1043 struct vd_config *vd =
e223334f 1044 (struct vd_config *)((char*)conf + confsec*512);
a322f70c
DW
1045 struct vcl *vcl;
1046
60931cf9 1047 if (be32_eq(vd->magic, DDF_SPARE_ASSIGN_MAGIC)) {
b2280677
NB
1048 if (dl->spare)
1049 continue;
3d2c4fc7
DW
1050 if (posix_memalign((void**)&dl->spare, 512,
1051 super->conf_rec_len*512) != 0) {
e7b84f9d
N
1052 pr_err("%s could not allocate spare info buf\n",
1053 __func__);
3d2c4fc7
DW
1054 return 1;
1055 }
613b0d17 1056
b2280677
NB
1057 memcpy(dl->spare, vd, super->conf_rec_len*512);
1058 continue;
1059 }
60931cf9 1060 if (!be32_eq(vd->magic, DDF_VD_CONF_MAGIC))
a322f70c
DW
1061 continue;
1062 for (vcl = super->conflist; vcl; vcl = vcl->next) {
1063 if (memcmp(vcl->conf.guid,
1064 vd->guid, DDF_GUID_LEN) == 0)
1065 break;
1066 }
1067
1068 if (vcl) {
b2280677 1069 dl->vlist[vnum++] = vcl;
3dc821b0 1070 if (vcl->other_bvds != NULL &&
1071 vcl->conf.sec_elmnt_seq != vd->sec_elmnt_seq) {
1072 add_other_bvd(vcl, vd, super->conf_rec_len*512);
1073 continue;
1074 }
60931cf9 1075 if (be32_to_cpu(vd->seqnum) <=
1076 be32_to_cpu(vcl->conf.seqnum))
a322f70c 1077 continue;
59e36268 1078 } else {
3d2c4fc7 1079 if (posix_memalign((void**)&vcl, 512,
6416d527 1080 (super->conf_rec_len*512 +
3d2c4fc7 1081 offsetof(struct vcl, conf))) != 0) {
e7b84f9d
N
1082 pr_err("%s could not allocate vcl buf\n",
1083 __func__);
3d2c4fc7
DW
1084 return 1;
1085 }
a322f70c 1086 vcl->next = super->conflist;
59e36268 1087 vcl->block_sizes = NULL; /* FIXME not for CONCAT */
3c48f7be 1088 vcl->conf.sec_elmnt_count = vd->sec_elmnt_count;
1089 if (alloc_other_bvds(super, vcl) != 0) {
1090 pr_err("%s could not allocate other bvds\n",
1091 __func__);
1092 free(vcl);
1093 return 1;
1094 };
a322f70c 1095 super->conflist = vcl;
b2280677 1096 dl->vlist[vnum++] = vcl;
a322f70c 1097 }
8c3b8c2c 1098 memcpy(&vcl->conf, vd, super->conf_rec_len*512);
59e36268
NB
1099 for (i=0; i < max_virt_disks ; i++)
1100 if (memcmp(super->virt->entries[i].guid,
1101 vcl->conf.guid, DDF_GUID_LEN)==0)
1102 break;
1103 if (i < max_virt_disks)
1104 vcl->vcnum = i;
a322f70c
DW
1105 }
1106 free(conf);
1107
1108 return 0;
1109}
1110
1111#ifndef MDASSEMBLE
1112static int load_super_ddf_all(struct supertype *st, int fd,
e1902a7b 1113 void **sbp, char *devname);
a322f70c 1114#endif
37424f13
DW
1115
1116static void free_super_ddf(struct supertype *st);
1117
a322f70c
DW
1118static int load_super_ddf(struct supertype *st, int fd,
1119 char *devname)
1120{
1121 unsigned long long dsize;
1122 struct ddf_super *super;
1123 int rv;
1124
a322f70c
DW
1125 if (get_dev_size(fd, devname, &dsize) == 0)
1126 return 1;
1127
b31df436 1128 if (!st->ignore_hw_compat && test_partition(fd))
691c6ee1
N
1129 /* DDF is not allowed on partitions */
1130 return 1;
1131
a322f70c
DW
1132 /* 32M is a lower bound */
1133 if (dsize <= 32*1024*1024) {
97320d7c 1134 if (devname)
e7b84f9d
N
1135 pr_err("%s is too small for ddf: "
1136 "size is %llu sectors.\n",
1137 devname, dsize>>9);
97320d7c 1138 return 1;
a322f70c
DW
1139 }
1140 if (dsize & 511) {
97320d7c 1141 if (devname)
e7b84f9d
N
1142 pr_err("%s is an odd size for ddf: "
1143 "size is %llu bytes.\n",
1144 devname, dsize);
97320d7c 1145 return 1;
a322f70c
DW
1146 }
1147
37424f13
DW
1148 free_super_ddf(st);
1149
6416d527 1150 if (posix_memalign((void**)&super, 512, sizeof(*super))!= 0) {
e7b84f9d 1151 pr_err("malloc of %zu failed.\n",
a322f70c
DW
1152 sizeof(*super));
1153 return 1;
1154 }
a2349791 1155 memset(super, 0, sizeof(*super));
a322f70c
DW
1156
1157 rv = load_ddf_headers(fd, super, devname);
1158 if (rv) {
1159 free(super);
1160 return rv;
1161 }
1162
1163 /* Have valid headers and have chosen the best. Let's read in the rest*/
1164
1165 rv = load_ddf_global(fd, super, devname);
1166
1167 if (rv) {
1168 if (devname)
e7b84f9d
N
1169 pr_err("Failed to load all information "
1170 "sections on %s\n", devname);
a322f70c
DW
1171 free(super);
1172 return rv;
1173 }
1174
3d2c4fc7
DW
1175 rv = load_ddf_local(fd, super, devname, 0);
1176
1177 if (rv) {
1178 if (devname)
e7b84f9d
N
1179 pr_err("Failed to load all information "
1180 "sections on %s\n", devname);
3d2c4fc7
DW
1181 free(super);
1182 return rv;
1183 }
a322f70c
DW
1184
1185 /* Should possibly check the sections .... */
1186
1187 st->sb = super;
1188 if (st->ss == NULL) {
1189 st->ss = &super_ddf;
1190 st->minor_version = 0;
1191 st->max_devs = 512;
1192 }
1193 return 0;
1194
1195}
1196
1197static void free_super_ddf(struct supertype *st)
1198{
1199 struct ddf_super *ddf = st->sb;
1200 if (ddf == NULL)
1201 return;
1202 free(ddf->phys);
1203 free(ddf->virt);
1204 while (ddf->conflist) {
1205 struct vcl *v = ddf->conflist;
1206 ddf->conflist = v->next;
59e36268
NB
1207 if (v->block_sizes)
1208 free(v->block_sizes);
3c48f7be 1209 if (v->other_bvds)
1210 /*
1211 v->other_bvds[0] points to beginning of buffer,
1212 see alloc_other_bvds()
1213 */
1214 free(v->other_bvds[0]);
a322f70c
DW
1215 free(v);
1216 }
1217 while (ddf->dlist) {
1218 struct dl *d = ddf->dlist;
1219 ddf->dlist = d->next;
1220 if (d->fd >= 0)
1221 close(d->fd);
b2280677
NB
1222 if (d->spare)
1223 free(d->spare);
a322f70c
DW
1224 free(d);
1225 }
8a38cb04
N
1226 while (ddf->add_list) {
1227 struct dl *d = ddf->add_list;
1228 ddf->add_list = d->next;
1229 if (d->fd >= 0)
1230 close(d->fd);
1231 if (d->spare)
1232 free(d->spare);
1233 free(d);
1234 }
a322f70c
DW
1235 free(ddf);
1236 st->sb = NULL;
1237}
1238
1239static struct supertype *match_metadata_desc_ddf(char *arg)
1240{
1241 /* 'ddf' only support containers */
1242 struct supertype *st;
1243 if (strcmp(arg, "ddf") != 0 &&
1244 strcmp(arg, "default") != 0
1245 )
1246 return NULL;
1247
503975b9 1248 st = xcalloc(1, sizeof(*st));
a322f70c
DW
1249 st->ss = &super_ddf;
1250 st->max_devs = 512;
1251 st->minor_version = 0;
1252 st->sb = NULL;
1253 return st;
1254}
1255
a322f70c
DW
1256#ifndef MDASSEMBLE
1257
1258static mapping_t ddf_state[] = {
1259 { "Optimal", 0},
1260 { "Degraded", 1},
1261 { "Deleted", 2},
1262 { "Missing", 3},
1263 { "Failed", 4},
1264 { "Partially Optimal", 5},
1265 { "-reserved-", 6},
1266 { "-reserved-", 7},
1267 { NULL, 0}
1268};
1269
1270static mapping_t ddf_init_state[] = {
1271 { "Not Initialised", 0},
1272 { "QuickInit in Progress", 1},
1273 { "Fully Initialised", 2},
1274 { "*UNKNOWN*", 3},
1275 { NULL, 0}
1276};
1277static mapping_t ddf_access[] = {
1278 { "Read/Write", 0},
1279 { "Reserved", 1},
1280 { "Read Only", 2},
1281 { "Blocked (no access)", 3},
1282 { NULL ,0}
1283};
1284
1285static mapping_t ddf_level[] = {
1286 { "RAID0", DDF_RAID0},
1287 { "RAID1", DDF_RAID1},
1288 { "RAID3", DDF_RAID3},
1289 { "RAID4", DDF_RAID4},
1290 { "RAID5", DDF_RAID5},
1291 { "RAID1E",DDF_RAID1E},
1292 { "JBOD", DDF_JBOD},
1293 { "CONCAT",DDF_CONCAT},
1294 { "RAID5E",DDF_RAID5E},
1295 { "RAID5EE",DDF_RAID5EE},
1296 { "RAID6", DDF_RAID6},
1297 { NULL, 0}
1298};
1299static mapping_t ddf_sec_level[] = {
1300 { "Striped", DDF_2STRIPED},
1301 { "Mirrored", DDF_2MIRRORED},
1302 { "Concat", DDF_2CONCAT},
1303 { "Spanned", DDF_2SPANNED},
1304 { NULL, 0}
1305};
1306#endif
1307
fb9d0acb 1308static int all_ff(const char *guid)
42dc2744
N
1309{
1310 int i;
1311 for (i = 0; i < DDF_GUID_LEN; i++)
1312 if (guid[i] != (char)0xff)
1313 return 0;
1314 return 1;
1315}
1316
4441541f
N
1317static const char *guid_str(const char *guid)
1318{
1319 static char buf[DDF_GUID_LEN*2+1];
1320 int i;
1321 char *p = buf;
1322 for (i = 0; i < DDF_GUID_LEN; i++) {
1323 unsigned char c = guid[i];
1324 if (c >= 32 && c < 127)
1325 p += sprintf(p, "%c", c);
1326 else
1327 p += sprintf(p, "%02x", c);
1328 }
1329 *p = '\0';
1330 return (const char *) buf;
1331}
1332
a322f70c
DW
1333#ifndef MDASSEMBLE
1334static void print_guid(char *guid, int tstamp)
1335{
1336 /* A GUIDs are part (or all) ASCII and part binary.
1337 * They tend to be space padded.
59e36268
NB
1338 * We print the GUID in HEX, then in parentheses add
1339 * any initial ASCII sequence, and a possible
1340 * time stamp from bytes 16-19
a322f70c
DW
1341 */
1342 int l = DDF_GUID_LEN;
1343 int i;
59e36268
NB
1344
1345 for (i=0 ; i<DDF_GUID_LEN ; i++) {
1346 if ((i&3)==0 && i != 0) printf(":");
1347 printf("%02X", guid[i]&255);
1348 }
1349
cfccea8c 1350 printf("\n (");
a322f70c
DW
1351 while (l && guid[l-1] == ' ')
1352 l--;
1353 for (i=0 ; i<l ; i++) {
1354 if (guid[i] >= 0x20 && guid[i] < 0x7f)
1355 fputc(guid[i], stdout);
1356 else
59e36268 1357 break;
a322f70c
DW
1358 }
1359 if (tstamp) {
1360 time_t then = __be32_to_cpu(*(__u32*)(guid+16)) + DECADE;
1361 char tbuf[100];
1362 struct tm *tm;
1363 tm = localtime(&then);
59e36268 1364 strftime(tbuf, 100, " %D %T",tm);
a322f70c
DW
1365 fputs(tbuf, stdout);
1366 }
59e36268 1367 printf(")");
a322f70c
DW
1368}
1369
1370static void examine_vd(int n, struct ddf_super *sb, char *guid)
1371{
8c3b8c2c 1372 int crl = sb->conf_rec_len;
a322f70c
DW
1373 struct vcl *vcl;
1374
1375 for (vcl = sb->conflist ; vcl ; vcl = vcl->next) {
f21e18ca 1376 unsigned int i;
a322f70c
DW
1377 struct vd_config *vc = &vcl->conf;
1378
60931cf9 1379 if (!be32_eq(calc_crc(vc, crl*512), vc->crc))
a322f70c
DW
1380 continue;
1381 if (memcmp(vc->guid, guid, DDF_GUID_LEN) != 0)
1382 continue;
1383
1384 /* Ok, we know about this VD, let's give more details */
b06e3095 1385 printf(" Raid Devices[%d] : %d (", n,
a8173e43 1386 be16_to_cpu(vc->prim_elmnt_count));
1387 for (i = 0; i < be16_to_cpu(vc->prim_elmnt_count); i++) {
b06e3095 1388 int j;
a8173e43 1389 int cnt = be16_to_cpu(sb->phys->used_pdes);
b06e3095 1390 for (j=0; j<cnt; j++)
60931cf9 1391 if (be32_eq(vc->phys_refnum[i],
1392 sb->phys->entries[j].refnum))
b06e3095
N
1393 break;
1394 if (i) printf(" ");
1395 if (j < cnt)
1396 printf("%d", j);
1397 else
1398 printf("--");
1399 }
1400 printf(")\n");
1401 if (vc->chunk_shift != 255)
613b0d17
N
1402 printf(" Chunk Size[%d] : %d sectors\n", n,
1403 1 << vc->chunk_shift);
a322f70c
DW
1404 printf(" Raid Level[%d] : %s\n", n,
1405 map_num(ddf_level, vc->prl)?:"-unknown-");
1406 if (vc->sec_elmnt_count != 1) {
1407 printf(" Secondary Position[%d] : %d of %d\n", n,
1408 vc->sec_elmnt_seq, vc->sec_elmnt_count);
1409 printf(" Secondary Level[%d] : %s\n", n,
1410 map_num(ddf_sec_level, vc->srl) ?: "-unknown-");
1411 }
1412 printf(" Device Size[%d] : %llu\n", n,
9d0c6b70 1413 be64_to_cpu(vc->blocks)/2);
a322f70c 1414 printf(" Array Size[%d] : %llu\n", n,
9d0c6b70 1415 be64_to_cpu(vc->array_blocks)/2);
a322f70c
DW
1416 }
1417}
1418
1419static void examine_vds(struct ddf_super *sb)
1420{
a8173e43 1421 int cnt = be16_to_cpu(sb->virt->populated_vdes);
fb9d0acb 1422 unsigned int i;
a322f70c
DW
1423 printf(" Virtual Disks : %d\n", cnt);
1424
a8173e43 1425 for (i = 0; i < be16_to_cpu(sb->virt->max_vdes); i++) {
a322f70c 1426 struct virtual_entry *ve = &sb->virt->entries[i];
fb9d0acb 1427 if (all_ff(ve->guid))
1428 continue;
b06e3095 1429 printf("\n");
a322f70c
DW
1430 printf(" VD GUID[%d] : ", i); print_guid(ve->guid, 1);
1431 printf("\n");
a8173e43 1432 printf(" unit[%d] : %d\n", i, be16_to_cpu(ve->unit));
a322f70c
DW
1433 printf(" state[%d] : %s, %s%s\n", i,
1434 map_num(ddf_state, ve->state & 7),
cc83a819
N
1435 (ve->state & DDF_state_morphing) ? "Morphing, ": "",
1436 (ve->state & DDF_state_inconsistent)? "Not Consistent" : "Consistent");
a322f70c 1437 printf(" init state[%d] : %s\n", i,
cc83a819 1438 map_num(ddf_init_state, ve->init_state&DDF_initstate_mask));
a322f70c 1439 printf(" access[%d] : %s\n", i,
cc83a819 1440 map_num(ddf_access, (ve->init_state & DDF_access_mask) >> 6));
a322f70c
DW
1441 printf(" Name[%d] : %.16s\n", i, ve->name);
1442 examine_vd(i, sb, ve->guid);
1443 }
1444 if (cnt) printf("\n");
1445}
1446
1447static void examine_pds(struct ddf_super *sb)
1448{
a8173e43 1449 int cnt = be16_to_cpu(sb->phys->used_pdes);
a322f70c
DW
1450 int i;
1451 struct dl *dl;
1452 printf(" Physical Disks : %d\n", cnt);
962371a5 1453 printf(" Number RefNo Size Device Type/State\n");
a322f70c
DW
1454
1455 for (i=0 ; i<cnt ; i++) {
1456 struct phys_disk_entry *pd = &sb->phys->entries[i];
a8173e43 1457 int type = be16_to_cpu(pd->type);
1458 int state = be16_to_cpu(pd->state);
a322f70c 1459
b06e3095
N
1460 //printf(" PD GUID[%d] : ", i); print_guid(pd->guid, 0);
1461 //printf("\n");
1462 printf(" %3d %08x ", i,
60931cf9 1463 be32_to_cpu(pd->refnum));
613b0d17 1464 printf("%8lluK ",
9d0c6b70 1465 be64_to_cpu(pd->config_size)>>1);
b06e3095 1466 for (dl = sb->dlist; dl ; dl = dl->next) {
60931cf9 1467 if (be32_eq(dl->disk.refnum, pd->refnum)) {
b06e3095
N
1468 char *dv = map_dev(dl->major, dl->minor, 0);
1469 if (dv) {
962371a5 1470 printf("%-15s", dv);
b06e3095
N
1471 break;
1472 }
1473 }
1474 }
1475 if (!dl)
962371a5 1476 printf("%15s","");
b06e3095 1477 printf(" %s%s%s%s%s",
a322f70c 1478 (type&2) ? "active":"",
b06e3095 1479 (type&4) ? "Global-Spare":"",
a322f70c
DW
1480 (type&8) ? "spare" : "",
1481 (type&16)? ", foreign" : "",
1482 (type&32)? "pass-through" : "");
18cb4496
N
1483 if (state & DDF_Failed)
1484 /* This over-rides these three */
1485 state &= ~(DDF_Online|DDF_Rebuilding|DDF_Transition);
b06e3095 1486 printf("/%s%s%s%s%s%s%s",
a322f70c
DW
1487 (state&1)? "Online": "Offline",
1488 (state&2)? ", Failed": "",
1489 (state&4)? ", Rebuilding": "",
1490 (state&8)? ", in-transition": "",
b06e3095
N
1491 (state&16)? ", SMART-errors": "",
1492 (state&32)? ", Unrecovered-Read-Errors": "",
a322f70c 1493 (state&64)? ", Missing" : "");
a322f70c
DW
1494 printf("\n");
1495 }
1496}
1497
1498static void examine_super_ddf(struct supertype *st, char *homehost)
1499{
1500 struct ddf_super *sb = st->sb;
1501
60931cf9 1502 printf(" Magic : %08x\n", be32_to_cpu(sb->anchor.magic));
a322f70c 1503 printf(" Version : %.8s\n", sb->anchor.revision);
598f0d58
NB
1504 printf("Controller GUID : "); print_guid(sb->controller.guid, 0);
1505 printf("\n");
1506 printf(" Container GUID : "); print_guid(sb->anchor.guid, 1);
a322f70c 1507 printf("\n");
60931cf9 1508 printf(" Seq : %08x\n", be32_to_cpu(sb->active->seq));
1509 printf(" Redundant hdr : %s\n", be32_eq(sb->secondary.magic,
1510 DDF_HEADER_MAGIC)
a322f70c
DW
1511 ?"yes" : "no");
1512 examine_vds(sb);
1513 examine_pds(sb);
1514}
1515
a5d85af7 1516static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info, char *map);
ff54de6e 1517
bedbf68a 1518static void uuid_from_ddf_guid(const char *guid, int uuid[4]);
42dc2744 1519static void uuid_from_super_ddf(struct supertype *st, int uuid[4]);
ff54de6e 1520
bedbf68a 1521static unsigned int get_vd_num_of_subarray(struct supertype *st)
1522{
1523 /*
1524 * Figure out the VD number for this supertype.
1525 * Returns DDF_CONTAINER for the container itself,
1526 * and DDF_NOTFOUND on error.
1527 */
1528 struct ddf_super *ddf = st->sb;
1529 struct mdinfo *sra;
1530 char *sub, *end;
1531 unsigned int vcnum;
1532
1533 if (*st->container_devnm == '\0')
1534 return DDF_CONTAINER;
1535
1536 sra = sysfs_read(-1, st->devnm, GET_VERSION);
1537 if (!sra || sra->array.major_version != -1 ||
1538 sra->array.minor_version != -2 ||
1539 !is_subarray(sra->text_version))
1540 return DDF_NOTFOUND;
1541
1542 sub = strchr(sra->text_version + 1, '/');
1543 if (sub != NULL)
1544 vcnum = strtoul(sub + 1, &end, 10);
1545 if (sub == NULL || *sub == '\0' || *end != '\0' ||
a8173e43 1546 vcnum >= be16_to_cpu(ddf->active->max_vd_entries))
bedbf68a 1547 return DDF_NOTFOUND;
1548
1549 return vcnum;
1550}
1551
061f2c6a 1552static void brief_examine_super_ddf(struct supertype *st, int verbose)
4737ae25
N
1553{
1554 /* We just write a generic DDF ARRAY entry
1555 */
1556 struct mdinfo info;
1557 char nbuf[64];
a5d85af7 1558 getinfo_super_ddf(st, &info, NULL);
4737ae25
N
1559 fname_from_uuid(st, &info, nbuf, ':');
1560
1561 printf("ARRAY metadata=ddf UUID=%s\n", nbuf + 5);
1562}
1563
1564static void brief_examine_subarrays_ddf(struct supertype *st, int verbose)
a322f70c
DW
1565{
1566 /* We just write a generic DDF ARRAY entry
a322f70c 1567 */
42dc2744 1568 struct ddf_super *ddf = st->sb;
ff54de6e 1569 struct mdinfo info;
f21e18ca 1570 unsigned int i;
ff54de6e 1571 char nbuf[64];
a5d85af7 1572 getinfo_super_ddf(st, &info, NULL);
ff54de6e 1573 fname_from_uuid(st, &info, nbuf, ':');
42dc2744 1574
a8173e43 1575 for (i = 0; i < be16_to_cpu(ddf->virt->max_vdes); i++) {
42dc2744
N
1576 struct virtual_entry *ve = &ddf->virt->entries[i];
1577 struct vcl vcl;
1578 char nbuf1[64];
1579 if (all_ff(ve->guid))
1580 continue;
1581 memcpy(vcl.conf.guid, ve->guid, DDF_GUID_LEN);
1582 ddf->currentconf =&vcl;
1583 uuid_from_super_ddf(st, info.uuid);
1584 fname_from_uuid(st, &info, nbuf1, ':');
1585 printf("ARRAY container=%s member=%d UUID=%s\n",
1586 nbuf+5, i, nbuf1+5);
1587 }
a322f70c
DW
1588}
1589
bceedeec
N
1590static void export_examine_super_ddf(struct supertype *st)
1591{
1592 struct mdinfo info;
1593 char nbuf[64];
a5d85af7 1594 getinfo_super_ddf(st, &info, NULL);
bceedeec
N
1595 fname_from_uuid(st, &info, nbuf, ':');
1596 printf("MD_METADATA=ddf\n");
1597 printf("MD_LEVEL=container\n");
1598 printf("MD_UUID=%s\n", nbuf+5);
1599}
bceedeec 1600
74db60b0
N
1601static int copy_metadata_ddf(struct supertype *st, int from, int to)
1602{
1603 void *buf;
1604 unsigned long long dsize, offset;
1605 int bytes;
1606 struct ddf_header *ddf;
1607 int written = 0;
1608
1609 /* The meta consists of an anchor, a primary, and a secondary.
1610 * This all lives at the end of the device.
1611 * So it is easiest to find the earliest of primary and
1612 * secondary, and copy everything from there.
1613 *
1614 * Anchor is 512 from end It contains primary_lba and secondary_lba
1615 * we choose one of those
1616 */
1617
1618 if (posix_memalign(&buf, 4096, 4096) != 0)
1619 return 1;
1620
1621 if (!get_dev_size(from, NULL, &dsize))
1622 goto err;
1623
1624 if (lseek64(from, dsize-512, 0) < 0)
1625 goto err;
1626 if (read(from, buf, 512) != 512)
1627 goto err;
1628 ddf = buf;
60931cf9 1629 if (!be32_eq(ddf->magic, DDF_HEADER_MAGIC) ||
1630 !be32_eq(calc_crc(ddf, 512), ddf->crc) ||
74db60b0
N
1631 (memcmp(ddf->revision, DDF_REVISION_0, 8) != 0 &&
1632 memcmp(ddf->revision, DDF_REVISION_2, 8) != 0))
1633 goto err;
1634
1635 offset = dsize - 512;
9d0c6b70 1636 if ((be64_to_cpu(ddf->primary_lba) << 9) < offset)
1637 offset = be64_to_cpu(ddf->primary_lba) << 9;
1638 if ((be64_to_cpu(ddf->secondary_lba) << 9) < offset)
1639 offset = be64_to_cpu(ddf->secondary_lba) << 9;
74db60b0
N
1640
1641 bytes = dsize - offset;
1642
1643 if (lseek64(from, offset, 0) < 0 ||
1644 lseek64(to, offset, 0) < 0)
1645 goto err;
1646 while (written < bytes) {
1647 int n = bytes - written;
1648 if (n > 4096)
1649 n = 4096;
1650 if (read(from, buf, n) != n)
1651 goto err;
1652 if (write(to, buf, n) != n)
1653 goto err;
1654 written += n;
1655 }
1656 free(buf);
1657 return 0;
1658err:
1659 free(buf);
1660 return 1;
1661}
1662
a322f70c
DW
1663static void detail_super_ddf(struct supertype *st, char *homehost)
1664{
1665 /* FIXME later
1666 * Could print DDF GUID
1667 * Need to find which array
1668 * If whole, briefly list all arrays
1669 * If one, give name
1670 */
1671}
1672
1673static void brief_detail_super_ddf(struct supertype *st)
1674{
ff54de6e
N
1675 struct mdinfo info;
1676 char nbuf[64];
bedbf68a 1677 struct ddf_super *ddf = st->sb;
1678 unsigned int vcnum = get_vd_num_of_subarray(st);
1679 if (vcnum == DDF_CONTAINER)
1680 uuid_from_super_ddf(st, info.uuid);
1681 else if (vcnum == DDF_NOTFOUND)
1682 return;
1683 else
1684 uuid_from_ddf_guid(ddf->virt->entries[vcnum].guid, info.uuid);
ff54de6e
N
1685 fname_from_uuid(st, &info, nbuf,':');
1686 printf(" UUID=%s", nbuf + 5);
a322f70c 1687}
a322f70c
DW
1688#endif
1689
1690static int match_home_ddf(struct supertype *st, char *homehost)
1691{
1692 /* It matches 'this' host if the controller is a
1693 * Linux-MD controller with vendor_data matching
1694 * the hostname
1695 */
1696 struct ddf_super *ddf = st->sb;
f21e18ca 1697 unsigned int len;
d1d3482b
N
1698
1699 if (!homehost)
1700 return 0;
1701 len = strlen(homehost);
a322f70c
DW
1702
1703 return (memcmp(ddf->controller.guid, T10, 8) == 0 &&
1704 len < sizeof(ddf->controller.vendor_data) &&
1705 memcmp(ddf->controller.vendor_data, homehost,len) == 0 &&
1706 ddf->controller.vendor_data[len] == 0);
1707}
1708
0e600426 1709#ifndef MDASSEMBLE
baba3f4e 1710static int find_index_in_bvd(const struct ddf_super *ddf,
1711 const struct vd_config *conf, unsigned int n,
1712 unsigned int *n_bvd)
1713{
1714 /*
1715 * Find the index of the n-th valid physical disk in this BVD
1716 */
1717 unsigned int i, j;
1718 for (i = 0, j = 0; i < ddf->mppe &&
a8173e43 1719 j < be16_to_cpu(conf->prim_elmnt_count); i++) {
60931cf9 1720 if (be32_to_cpu(conf->phys_refnum[i]) != 0xffffffff) {
baba3f4e 1721 if (n == j) {
1722 *n_bvd = i;
1723 return 1;
1724 }
1725 j++;
1726 }
1727 }
1728 dprintf("%s: couldn't find BVD member %u (total %u)\n",
a8173e43 1729 __func__, n, be16_to_cpu(conf->prim_elmnt_count));
baba3f4e 1730 return 0;
1731}
1732
1733static struct vd_config *find_vdcr(struct ddf_super *ddf, unsigned int inst,
1734 unsigned int n,
1735 unsigned int *n_bvd, struct vcl **vcl)
a322f70c 1736{
7a7cc504 1737 struct vcl *v;
59e36268 1738
baba3f4e 1739 for (v = ddf->conflist; v; v = v->next) {
84e32e19 1740 unsigned int nsec, ibvd = 0;
baba3f4e 1741 struct vd_config *conf;
1742 if (inst != v->vcnum)
1743 continue;
1744 conf = &v->conf;
1745 if (conf->sec_elmnt_count == 1) {
1746 if (find_index_in_bvd(ddf, conf, n, n_bvd)) {
1747 *vcl = v;
1748 return conf;
1749 } else
1750 goto bad;
1751 }
1752 if (v->other_bvds == NULL) {
1753 pr_err("%s: BUG: other_bvds is NULL, nsec=%u\n",
1754 __func__, conf->sec_elmnt_count);
1755 goto bad;
1756 }
a8173e43 1757 nsec = n / be16_to_cpu(conf->prim_elmnt_count);
baba3f4e 1758 if (conf->sec_elmnt_seq != nsec) {
1759 for (ibvd = 1; ibvd < conf->sec_elmnt_count; ibvd++) {
baba3f4e 1760 if (v->other_bvds[ibvd-1]->sec_elmnt_seq
1761 == nsec)
1762 break;
1763 }
1764 if (ibvd == conf->sec_elmnt_count)
1765 goto bad;
1766 conf = v->other_bvds[ibvd-1];
1767 }
1768 if (!find_index_in_bvd(ddf, conf,
1769 n - nsec*conf->sec_elmnt_count, n_bvd))
1770 goto bad;
1771 dprintf("%s: found disk %u as member %u in bvd %d of array %u\n"
84e32e19 1772 , __func__, n, *n_bvd, ibvd, inst);
baba3f4e 1773 *vcl = v;
1774 return conf;
1775 }
1776bad:
1777 pr_err("%s: Could't find disk %d in array %u\n", __func__, n, inst);
7a7cc504
NB
1778 return NULL;
1779}
0e600426 1780#endif
7a7cc504 1781
60931cf9 1782static int find_phys(const struct ddf_super *ddf, be32 phys_refnum)
7a7cc504
NB
1783{
1784 /* Find the entry in phys_disk which has the given refnum
1785 * and return it's index
1786 */
f21e18ca 1787 unsigned int i;
a8173e43 1788 for (i = 0; i < be16_to_cpu(ddf->phys->max_pdes); i++)
60931cf9 1789 if (be32_eq(ddf->phys->entries[i].refnum, phys_refnum))
7a7cc504
NB
1790 return i;
1791 return -1;
a322f70c
DW
1792}
1793
bedbf68a 1794static void uuid_from_ddf_guid(const char *guid, int uuid[4])
1795{
1796 char buf[20];
1797 struct sha1_ctx ctx;
1798 sha1_init_ctx(&ctx);
1799 sha1_process_bytes(guid, DDF_GUID_LEN, &ctx);
1800 sha1_finish_ctx(&ctx, buf);
1801 memcpy(uuid, buf, 4*4);
1802}
1803
a322f70c
DW
1804static void uuid_from_super_ddf(struct supertype *st, int uuid[4])
1805{
1806 /* The uuid returned here is used for:
1807 * uuid to put into bitmap file (Create, Grow)
1808 * uuid for backup header when saving critical section (Grow)
1809 * comparing uuids when re-adding a device into an array
51006d85
N
1810 * In these cases the uuid required is that of the data-array,
1811 * not the device-set.
1812 * uuid to recognise same set when adding a missing device back
1813 * to an array. This is a uuid for the device-set.
613b0d17 1814 *
a322f70c
DW
1815 * For each of these we can make do with a truncated
1816 * or hashed uuid rather than the original, as long as
1817 * everyone agrees.
a322f70c
DW
1818 * In the case of SVD we assume the BVD is of interest,
1819 * though that might be the case if a bitmap were made for
1820 * a mirrored SVD - worry about that later.
1821 * So we need to find the VD configuration record for the
1822 * relevant BVD and extract the GUID and Secondary_Element_Seq.
1823 * The first 16 bytes of the sha1 of these is used.
1824 */
1825 struct ddf_super *ddf = st->sb;
d2ca6449 1826 struct vcl *vcl = ddf->currentconf;
c5afc314 1827 char *guid;
a322f70c 1828
c5afc314
N
1829 if (vcl)
1830 guid = vcl->conf.guid;
1831 else
1832 guid = ddf->anchor.guid;
bedbf68a 1833 uuid_from_ddf_guid(guid, uuid);
a322f70c
DW
1834}
1835
a5d85af7 1836static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info, char *map);
78e44928 1837
a5d85af7 1838static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info, char *map)
a322f70c
DW
1839{
1840 struct ddf_super *ddf = st->sb;
a5d85af7 1841 int map_disks = info->array.raid_disks;
90fa1a29 1842 __u32 *cptr;
a322f70c 1843
78e44928 1844 if (ddf->currentconf) {
a5d85af7 1845 getinfo_super_ddf_bvd(st, info, map);
78e44928
NB
1846 return;
1847 }
95eeceeb 1848 memset(info, 0, sizeof(*info));
78e44928 1849
a8173e43 1850 info->array.raid_disks = be16_to_cpu(ddf->phys->used_pdes);
a322f70c
DW
1851 info->array.level = LEVEL_CONTAINER;
1852 info->array.layout = 0;
1853 info->array.md_minor = -1;
90fa1a29
JS
1854 cptr = (__u32 *)(ddf->anchor.guid + 16);
1855 info->array.ctime = DECADE + __be32_to_cpu(*cptr);
1856
a322f70c
DW
1857 info->array.utime = 0;
1858 info->array.chunk_size = 0;
510242aa 1859 info->container_enough = 1;
a322f70c 1860
a322f70c
DW
1861 info->disk.major = 0;
1862 info->disk.minor = 0;
cba0191b 1863 if (ddf->dlist) {
60931cf9 1864 info->disk.number = be32_to_cpu(ddf->dlist->disk.refnum);
59e36268 1865 info->disk.raid_disk = find_phys(ddf, ddf->dlist->disk.refnum);
d2ca6449 1866
9d0c6b70 1867 info->data_offset = be64_to_cpu(ddf->phys->
613b0d17
N
1868 entries[info->disk.raid_disk].
1869 config_size);
d2ca6449 1870 info->component_size = ddf->dlist->size - info->data_offset;
cba0191b
NB
1871 } else {
1872 info->disk.number = -1;
661dce36 1873 info->disk.raid_disk = -1;
cba0191b
NB
1874// info->disk.raid_disk = find refnum in the table and use index;
1875 }
f22385f9 1876 info->disk.state = (1 << MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE);
a19c88b8 1877
921d9e16 1878 info->recovery_start = MaxSector;
a19c88b8 1879 info->reshape_active = 0;
6e75048b 1880 info->recovery_blocked = 0;
c5afc314 1881 info->name[0] = 0;
a322f70c 1882
f35f2525
N
1883 info->array.major_version = -1;
1884 info->array.minor_version = -2;
159c3a1a 1885 strcpy(info->text_version, "ddf");
a67dd8cc 1886 info->safe_mode_delay = 0;
159c3a1a 1887
c5afc314 1888 uuid_from_super_ddf(st, info->uuid);
a322f70c 1889
a5d85af7
N
1890 if (map) {
1891 int i;
1892 for (i = 0 ; i < map_disks; i++) {
1893 if (i < info->array.raid_disks &&
a8173e43 1894 (be16_to_cpu(ddf->phys->entries[i].state)
1895 & DDF_Online) &&
1896 !(be16_to_cpu(ddf->phys->entries[i].state)
1897 & DDF_Failed))
a5d85af7
N
1898 map[i] = 1;
1899 else
1900 map[i] = 0;
1901 }
1902 }
a322f70c
DW
1903}
1904
a5d85af7 1905static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info, char *map)
a322f70c
DW
1906{
1907 struct ddf_super *ddf = st->sb;
d2ca6449
NB
1908 struct vcl *vc = ddf->currentconf;
1909 int cd = ddf->currentdev;
ddf94a43 1910 int n_prim;
db42fa9b 1911 int j;
8592f29d 1912 struct dl *dl;
a5d85af7 1913 int map_disks = info->array.raid_disks;
90fa1a29 1914 __u32 *cptr;
ddf94a43 1915 struct vd_config *conf;
a322f70c 1916
95eeceeb 1917 memset(info, 0, sizeof(*info));
8a2848a7 1918 if (layout_ddf2md(&vc->conf, &info->array) == -1)
1919 return;
a322f70c 1920 info->array.md_minor = -1;
90fa1a29
JS
1921 cptr = (__u32 *)(vc->conf.guid + 16);
1922 info->array.ctime = DECADE + __be32_to_cpu(*cptr);
60931cf9 1923 info->array.utime = DECADE + be32_to_cpu(vc->conf.timestamp);
d2ca6449 1924 info->array.chunk_size = 512 << vc->conf.chunk_shift;
da9b4a62 1925 info->custom_array_size = 0;
d2ca6449 1926
ddf94a43 1927 conf = &vc->conf;
a8173e43 1928 n_prim = be16_to_cpu(conf->prim_elmnt_count);
ddf94a43 1929 if (conf->sec_elmnt_count > 1 && cd >= n_prim) {
1930 int ibvd = cd / n_prim - 1;
1931 cd %= n_prim;
1932 conf = vc->other_bvds[ibvd];
1933 }
1934
f21e18ca 1935 if (cd >= 0 && (unsigned)cd < ddf->mppe) {
57a66662 1936 info->data_offset =
9d0c6b70 1937 be64_to_cpu(LBA_OFFSET(ddf, conf)[cd]);
d2ca6449
NB
1938 if (vc->block_sizes)
1939 info->component_size = vc->block_sizes[cd];
1940 else
9d0c6b70 1941 info->component_size = be64_to_cpu(conf->blocks);
d2ca6449 1942 }
a322f70c 1943
fb204fb2 1944 for (dl = ddf->dlist; dl ; dl = dl->next)
60931cf9 1945 if (be32_eq(dl->disk.refnum, conf->phys_refnum[cd]))
fb204fb2
N
1946 break;
1947
a322f70c
DW
1948 info->disk.major = 0;
1949 info->disk.minor = 0;
fb204fb2 1950 info->disk.state = 0;
8592f29d
N
1951 if (dl) {
1952 info->disk.major = dl->major;
1953 info->disk.minor = dl->minor;
7c3fb3ec 1954 info->disk.raid_disk = cd + conf->sec_elmnt_seq
a8173e43 1955 * be16_to_cpu(conf->prim_elmnt_count);
fb204fb2
N
1956 info->disk.number = dl->pdnum;
1957 info->disk.state = (1<<MD_DISK_SYNC)|(1<<MD_DISK_ACTIVE);
8592f29d 1958 }
a322f70c 1959
103f2410
NB
1960 info->container_member = ddf->currentconf->vcnum;
1961
921d9e16 1962 info->recovery_start = MaxSector;
80d26cb2 1963 info->resync_start = 0;
624c5ad4 1964 info->reshape_active = 0;
6e75048b 1965 info->recovery_blocked = 0;
80d26cb2
NB
1966 if (!(ddf->virt->entries[info->container_member].state
1967 & DDF_state_inconsistent) &&
1968 (ddf->virt->entries[info->container_member].init_state
1969 & DDF_initstate_mask)
1970 == DDF_init_full)
b7528a20 1971 info->resync_start = MaxSector;
80d26cb2 1972
a322f70c
DW
1973 uuid_from_super_ddf(st, info->uuid);
1974
f35f2525
N
1975 info->array.major_version = -1;
1976 info->array.minor_version = -2;
9b63e648 1977 sprintf(info->text_version, "/%s/%d",
4dd2df09 1978 st->container_devnm,
9b63e648 1979 info->container_member);
a67dd8cc 1980 info->safe_mode_delay = 200;
159c3a1a 1981
db42fa9b
N
1982 memcpy(info->name, ddf->virt->entries[info->container_member].name, 16);
1983 info->name[16]=0;
1984 for(j=0; j<16; j++)
1985 if (info->name[j] == ' ')
1986 info->name[j] = 0;
a5d85af7
N
1987
1988 if (map)
1989 for (j = 0; j < map_disks; j++) {
1990 map[j] = 0;
1991 if (j < info->array.raid_disks) {
1992 int i = find_phys(ddf, vc->conf.phys_refnum[j]);
613b0d17 1993 if (i >= 0 &&
a8173e43 1994 (be16_to_cpu(ddf->phys->entries[i].state)
1995 & DDF_Online) &&
1996 !(be16_to_cpu(ddf->phys->entries[i].state)
1997 & DDF_Failed))
a5d85af7
N
1998 map[i] = 1;
1999 }
2000 }
a322f70c
DW
2001}
2002
2003static int update_super_ddf(struct supertype *st, struct mdinfo *info,
2004 char *update,
2005 char *devname, int verbose,
2006 int uuid_set, char *homehost)
2007{
2008 /* For 'assemble' and 'force' we need to return non-zero if any
2009 * change was made. For others, the return value is ignored.
2010 * Update options are:
2011 * force-one : This device looks a bit old but needs to be included,
2012 * update age info appropriately.
2013 * assemble: clear any 'faulty' flag to allow this device to
2014 * be assembled.
2015 * force-array: Array is degraded but being forced, mark it clean
2016 * if that will be needed to assemble it.
2017 *
2018 * newdev: not used ????
2019 * grow: Array has gained a new device - this is currently for
2020 * linear only
2021 * resync: mark as dirty so a resync will happen.
59e36268 2022 * uuid: Change the uuid of the array to match what is given
a322f70c
DW
2023 * homehost: update the recorded homehost
2024 * name: update the name - preserving the homehost
2025 * _reshape_progress: record new reshape_progress position.
2026 *
2027 * Following are not relevant for this version:
2028 * sparc2.2 : update from old dodgey metadata
2029 * super-minor: change the preferred_minor number
2030 * summaries: update redundant counters.
2031 */
2032 int rv = 0;
2033// struct ddf_super *ddf = st->sb;
7a7cc504 2034// struct vd_config *vd = find_vdcr(ddf, info->container_member);
a322f70c
DW
2035// struct virtual_entry *ve = find_ve(ddf);
2036
a322f70c
DW
2037 /* we don't need to handle "force-*" or "assemble" as
2038 * there is no need to 'trick' the kernel. We the metadata is
2039 * first updated to activate the array, all the implied modifications
2040 * will just happen.
2041 */
2042
2043 if (strcmp(update, "grow") == 0) {
2044 /* FIXME */
1e2b2765 2045 } else if (strcmp(update, "resync") == 0) {
a322f70c 2046// info->resync_checkpoint = 0;
1e2b2765 2047 } else if (strcmp(update, "homehost") == 0) {
a322f70c
DW
2048 /* homehost is stored in controller->vendor_data,
2049 * or it is when we are the vendor
2050 */
2051// if (info->vendor_is_local)
2052// strcpy(ddf->controller.vendor_data, homehost);
1e2b2765 2053 rv = -1;
f49208ec 2054 } else if (strcmp(update, "name") == 0) {
a322f70c
DW
2055 /* name is stored in virtual_entry->name */
2056// memset(ve->name, ' ', 16);
2057// strncpy(ve->name, info->name, 16);
1e2b2765 2058 rv = -1;
f49208ec 2059 } else if (strcmp(update, "_reshape_progress") == 0) {
a322f70c 2060 /* We don't support reshape yet */
f49208ec
N
2061 } else if (strcmp(update, "assemble") == 0 ) {
2062 /* Do nothing, just succeed */
2063 rv = 0;
1e2b2765
N
2064 } else
2065 rv = -1;
a322f70c
DW
2066
2067// update_all_csum(ddf);
2068
2069 return rv;
2070}
2071
5f8097be
NB
2072static void make_header_guid(char *guid)
2073{
60931cf9 2074 be32 stamp;
5f8097be
NB
2075 /* Create a DDF Header of Virtual Disk GUID */
2076
2077 /* 24 bytes of fiction required.
2078 * first 8 are a 'vendor-id' - "Linux-MD"
2079 * next 8 are controller type.. how about 0X DEAD BEEF 0000 0000
2080 * Remaining 8 random number plus timestamp
2081 */
2082 memcpy(guid, T10, sizeof(T10));
60931cf9 2083 stamp = cpu_to_be32(0xdeadbeef);
5f8097be 2084 memcpy(guid+8, &stamp, 4);
60931cf9 2085 stamp = cpu_to_be32(0);
5f8097be 2086 memcpy(guid+12, &stamp, 4);
60931cf9 2087 stamp = cpu_to_be32(time(0) - DECADE);
5f8097be 2088 memcpy(guid+16, &stamp, 4);
60931cf9 2089 stamp._v32 = random32();
5f8097be 2090 memcpy(guid+20, &stamp, 4);
5f8097be 2091}
59e36268 2092
fb9d0acb 2093static unsigned int find_unused_vde(const struct ddf_super *ddf)
2094{
2095 unsigned int i;
a8173e43 2096 for (i = 0; i < be16_to_cpu(ddf->virt->max_vdes); i++) {
fb9d0acb 2097 if (all_ff(ddf->virt->entries[i].guid))
2098 return i;
2099 }
2100 return DDF_NOTFOUND;
2101}
2102
2103static unsigned int find_vde_by_name(const struct ddf_super *ddf,
2104 const char *name)
2105{
2106 unsigned int i;
2107 if (name == NULL)
2108 return DDF_NOTFOUND;
a8173e43 2109 for (i = 0; i < be16_to_cpu(ddf->virt->max_vdes); i++) {
fb9d0acb 2110 if (all_ff(ddf->virt->entries[i].guid))
2111 continue;
2112 if (!strncmp(name, ddf->virt->entries[i].name,
2113 sizeof(ddf->virt->entries[i].name)))
2114 return i;
2115 }
2116 return DDF_NOTFOUND;
2117}
2118
4441541f 2119#ifndef MDASSEMBLE
fb9d0acb 2120static unsigned int find_vde_by_guid(const struct ddf_super *ddf,
2121 const char *guid)
2122{
2123 unsigned int i;
2124 if (guid == NULL || all_ff(guid))
2125 return DDF_NOTFOUND;
a8173e43 2126 for (i = 0; i < be16_to_cpu(ddf->virt->max_vdes); i++)
fb9d0acb 2127 if (!memcmp(ddf->virt->entries[i].guid, guid, DDF_GUID_LEN))
2128 return i;
2129 return DDF_NOTFOUND;
2130}
4441541f 2131#endif
fb9d0acb 2132
78e44928
NB
2133static int init_super_ddf_bvd(struct supertype *st,
2134 mdu_array_info_t *info,
2135 unsigned long long size,
2136 char *name, char *homehost,
83cd1e97 2137 int *uuid, unsigned long long data_offset);
78e44928 2138
a322f70c
DW
2139static int init_super_ddf(struct supertype *st,
2140 mdu_array_info_t *info,
2141 unsigned long long size, char *name, char *homehost,
83cd1e97 2142 int *uuid, unsigned long long data_offset)
a322f70c
DW
2143{
2144 /* This is primarily called by Create when creating a new array.
2145 * We will then get add_to_super called for each component, and then
2146 * write_init_super called to write it out to each device.
2147 * For DDF, Create can create on fresh devices or on a pre-existing
2148 * array.
2149 * To create on a pre-existing array a different method will be called.
2150 * This one is just for fresh drives.
2151 *
2152 * We need to create the entire 'ddf' structure which includes:
2153 * DDF headers - these are easy.
2154 * Controller data - a Sector describing this controller .. not that
2155 * this is a controller exactly.
2156 * Physical Disk Record - one entry per device, so
2157 * leave plenty of space.
2158 * Virtual Disk Records - again, just leave plenty of space.
2159 * This just lists VDs, doesn't give details
2160 * Config records - describes the VDs that use this disk
2161 * DiskData - describes 'this' device.
2162 * BadBlockManagement - empty
2163 * Diag Space - empty
2164 * Vendor Logs - Could we put bitmaps here?
2165 *
2166 */
2167 struct ddf_super *ddf;
2168 char hostname[17];
2169 int hostlen;
a322f70c
DW
2170 int max_phys_disks, max_virt_disks;
2171 unsigned long long sector;
2172 int clen;
2173 int i;
2174 int pdsize, vdsize;
2175 struct phys_disk *pd;
2176 struct virtual_disk *vd;
2177
83cd1e97 2178 if (data_offset != INVALID_SECTORS) {
ed503f89 2179 pr_err("data-offset not supported by DDF\n");
83cd1e97
N
2180 return 0;
2181 }
2182
78e44928 2183 if (st->sb)
83cd1e97
N
2184 return init_super_ddf_bvd(st, info, size, name, homehost, uuid,
2185 data_offset);
ba7eb04f 2186
3d2c4fc7 2187 if (posix_memalign((void**)&ddf, 512, sizeof(*ddf)) != 0) {
e7b84f9d 2188 pr_err("%s could not allocate superblock\n", __func__);
3d2c4fc7
DW
2189 return 0;
2190 }
6264b437 2191 memset(ddf, 0, sizeof(*ddf));
a322f70c
DW
2192 ddf->dlist = NULL; /* no physical disks yet */
2193 ddf->conflist = NULL; /* No virtual disks yet */
955e9ea1
DW
2194 st->sb = ddf;
2195
2196 if (info == NULL) {
2197 /* zeroing superblock */
2198 return 0;
2199 }
a322f70c
DW
2200
2201 /* At least 32MB *must* be reserved for the ddf. So let's just
2202 * start 32MB from the end, and put the primary header there.
2203 * Don't do secondary for now.
2204 * We don't know exactly where that will be yet as it could be
2205 * different on each device. To just set up the lengths.
2206 *
2207 */
2208
2209 ddf->anchor.magic = DDF_HEADER_MAGIC;
5f8097be 2210 make_header_guid(ddf->anchor.guid);
a322f70c 2211
59e36268 2212 memcpy(ddf->anchor.revision, DDF_REVISION_2, 8);
60931cf9 2213 ddf->anchor.seq = cpu_to_be32(1);
2214 ddf->anchor.timestamp = cpu_to_be32(time(0) - DECADE);
a322f70c
DW
2215 ddf->anchor.openflag = 0xFF;
2216 ddf->anchor.foreignflag = 0;
2217 ddf->anchor.enforcegroups = 0; /* Is this best?? */
2218 ddf->anchor.pad0 = 0xff;
2219 memset(ddf->anchor.pad1, 0xff, 12);
2220 memset(ddf->anchor.header_ext, 0xff, 32);
9d0c6b70 2221 ddf->anchor.primary_lba = cpu_to_be64(~(__u64)0);
2222 ddf->anchor.secondary_lba = cpu_to_be64(~(__u64)0);
a322f70c
DW
2223 ddf->anchor.type = DDF_HEADER_ANCHOR;
2224 memset(ddf->anchor.pad2, 0xff, 3);
60931cf9 2225 ddf->anchor.workspace_len = cpu_to_be32(32768); /* Must be reserved */
9d0c6b70 2226 /* Put this at bottom of 32M reserved.. */
2227 ddf->anchor.workspace_lba = cpu_to_be64(~(__u64)0);
a322f70c 2228 max_phys_disks = 1023; /* Should be enough */
a8173e43 2229 ddf->anchor.max_pd_entries = cpu_to_be16(max_phys_disks);
a322f70c 2230 max_virt_disks = 255;
a8173e43 2231 ddf->anchor.max_vd_entries = cpu_to_be16(max_virt_disks); /* ?? */
2232 ddf->anchor.max_partitions = cpu_to_be16(64); /* ?? */
a322f70c 2233 ddf->max_part = 64;
8c3b8c2c 2234 ddf->mppe = 256;
59e36268 2235 ddf->conf_rec_len = 1 + ROUND_UP(ddf->mppe * (4+8), 512)/512;
a8173e43 2236 ddf->anchor.config_record_len = cpu_to_be16(ddf->conf_rec_len);
2237 ddf->anchor.max_primary_element_entries = cpu_to_be16(ddf->mppe);
a322f70c 2238 memset(ddf->anchor.pad3, 0xff, 54);
a322f70c
DW
2239 /* controller sections is one sector long immediately
2240 * after the ddf header */
2241 sector = 1;
60931cf9 2242 ddf->anchor.controller_section_offset = cpu_to_be32(sector);
2243 ddf->anchor.controller_section_length = cpu_to_be32(1);
a322f70c
DW
2244 sector += 1;
2245
2246 /* phys is 8 sectors after that */
2247 pdsize = ROUND_UP(sizeof(struct phys_disk) +
2248 sizeof(struct phys_disk_entry)*max_phys_disks,
2249 512);
2250 switch(pdsize/512) {
2251 case 2: case 8: case 32: case 128: case 512: break;
2252 default: abort();
2253 }
60931cf9 2254 ddf->anchor.phys_section_offset = cpu_to_be32(sector);
a322f70c 2255 ddf->anchor.phys_section_length =
60931cf9 2256 cpu_to_be32(pdsize/512); /* max_primary_element_entries/8 */
a322f70c
DW
2257 sector += pdsize/512;
2258
2259 /* virt is another 32 sectors */
2260 vdsize = ROUND_UP(sizeof(struct virtual_disk) +
2261 sizeof(struct virtual_entry) * max_virt_disks,
2262 512);
2263 switch(vdsize/512) {
2264 case 2: case 8: case 32: case 128: case 512: break;
2265 default: abort();
2266 }
60931cf9 2267 ddf->anchor.virt_section_offset = cpu_to_be32(sector);
a322f70c 2268 ddf->anchor.virt_section_length =
60931cf9 2269 cpu_to_be32(vdsize/512); /* max_vd_entries/8 */
a322f70c
DW
2270 sector += vdsize/512;
2271
59e36268 2272 clen = ddf->conf_rec_len * (ddf->max_part+1);
60931cf9 2273 ddf->anchor.config_section_offset = cpu_to_be32(sector);
2274 ddf->anchor.config_section_length = cpu_to_be32(clen);
a322f70c
DW
2275 sector += clen;
2276
60931cf9 2277 ddf->anchor.data_section_offset = cpu_to_be32(sector);
2278 ddf->anchor.data_section_length = cpu_to_be32(1);
a322f70c
DW
2279 sector += 1;
2280
60931cf9 2281 ddf->anchor.bbm_section_length = cpu_to_be32(0);
2282 ddf->anchor.bbm_section_offset = cpu_to_be32(0xFFFFFFFF);
2283 ddf->anchor.diag_space_length = cpu_to_be32(0);
2284 ddf->anchor.diag_space_offset = cpu_to_be32(0xFFFFFFFF);
2285 ddf->anchor.vendor_length = cpu_to_be32(0);
2286 ddf->anchor.vendor_offset = cpu_to_be32(0xFFFFFFFF);
a322f70c
DW
2287
2288 memset(ddf->anchor.pad4, 0xff, 256);
2289
2290 memcpy(&ddf->primary, &ddf->anchor, 512);
2291 memcpy(&ddf->secondary, &ddf->anchor, 512);
2292
2293 ddf->primary.openflag = 1; /* I guess.. */
2294 ddf->primary.type = DDF_HEADER_PRIMARY;
2295
2296 ddf->secondary.openflag = 1; /* I guess.. */
2297 ddf->secondary.type = DDF_HEADER_SECONDARY;
2298
2299 ddf->active = &ddf->primary;
2300
2301 ddf->controller.magic = DDF_CONTROLLER_MAGIC;
2302
2303 /* 24 more bytes of fiction required.
2304 * first 8 are a 'vendor-id' - "Linux-MD"
2305 * Remaining 16 are serial number.... maybe a hostname would do?
2306 */
2307 memcpy(ddf->controller.guid, T10, sizeof(T10));
1ba6bff9
DW
2308 gethostname(hostname, sizeof(hostname));
2309 hostname[sizeof(hostname) - 1] = 0;
a322f70c
DW
2310 hostlen = strlen(hostname);
2311 memcpy(ddf->controller.guid + 24 - hostlen, hostname, hostlen);
2312 for (i = strlen(T10) ; i+hostlen < 24; i++)
2313 ddf->controller.guid[i] = ' ';
2314
a8173e43 2315 ddf->controller.type.vendor_id = cpu_to_be16(0xDEAD);
2316 ddf->controller.type.device_id = cpu_to_be16(0xBEEF);
2317 ddf->controller.type.sub_vendor_id = cpu_to_be16(0);
2318 ddf->controller.type.sub_device_id = cpu_to_be16(0);
a322f70c
DW
2319 memcpy(ddf->controller.product_id, "What Is My PID??", 16);
2320 memset(ddf->controller.pad, 0xff, 8);
2321 memset(ddf->controller.vendor_data, 0xff, 448);
a9e1c11d
N
2322 if (homehost && strlen(homehost) < 440)
2323 strcpy((char*)ddf->controller.vendor_data, homehost);
a322f70c 2324
3d2c4fc7 2325 if (posix_memalign((void**)&pd, 512, pdsize) != 0) {
e7b84f9d 2326 pr_err("%s could not allocate pd\n", __func__);
3d2c4fc7
DW
2327 return 0;
2328 }
6416d527 2329 ddf->phys = pd;
a322f70c
DW
2330 ddf->pdsize = pdsize;
2331
2332 memset(pd, 0xff, pdsize);
2333 memset(pd, 0, sizeof(*pd));
076515ba 2334 pd->magic = DDF_PHYS_RECORDS_MAGIC;
a8173e43 2335 pd->used_pdes = cpu_to_be16(0);
2336 pd->max_pdes = cpu_to_be16(max_phys_disks);
a322f70c 2337 memset(pd->pad, 0xff, 52);
4a3ca8ac 2338 for (i = 0; i < max_phys_disks; i++)
2339 memset(pd->entries[i].guid, 0xff, DDF_GUID_LEN);
a322f70c 2340
3d2c4fc7 2341 if (posix_memalign((void**)&vd, 512, vdsize) != 0) {
e7b84f9d 2342 pr_err("%s could not allocate vd\n", __func__);
3d2c4fc7
DW
2343 return 0;
2344 }
6416d527 2345 ddf->virt = vd;
a322f70c
DW
2346 ddf->vdsize = vdsize;
2347 memset(vd, 0, vdsize);
2348 vd->magic = DDF_VIRT_RECORDS_MAGIC;
a8173e43 2349 vd->populated_vdes = cpu_to_be16(0);
2350 vd->max_vdes = cpu_to_be16(max_virt_disks);
a322f70c
DW
2351 memset(vd->pad, 0xff, 52);
2352
5f8097be
NB
2353 for (i=0; i<max_virt_disks; i++)
2354 memset(&vd->entries[i], 0xff, sizeof(struct virtual_entry));
2355
a322f70c 2356 st->sb = ddf;
7d5a7ff3 2357 ddf_set_updates_pending(ddf);
a322f70c
DW
2358 return 1;
2359}
2360
5f8097be
NB
2361static int chunk_to_shift(int chunksize)
2362{
2363 return ffs(chunksize/512)-1;
2364}
2365
0e600426 2366#ifndef MDASSEMBLE
59e36268
NB
2367struct extent {
2368 unsigned long long start, size;
2369};
78e44928 2370static int cmp_extent(const void *av, const void *bv)
59e36268
NB
2371{
2372 const struct extent *a = av;
2373 const struct extent *b = bv;
2374 if (a->start < b->start)
2375 return -1;
2376 if (a->start > b->start)
2377 return 1;
2378 return 0;
2379}
2380
78e44928 2381static struct extent *get_extents(struct ddf_super *ddf, struct dl *dl)
59e36268
NB
2382{
2383 /* find a list of used extents on the give physical device
2384 * (dnum) of the given ddf.
2385 * Return a malloced array of 'struct extent'
2386
613b0d17 2387 * FIXME ignore DDF_Legacy devices?
59e36268
NB
2388
2389 */
2390 struct extent *rv;
2391 int n = 0;
fcc22180 2392 unsigned int i;
59e36268 2393
503975b9 2394 rv = xmalloc(sizeof(struct extent) * (ddf->max_part + 2));
59e36268
NB
2395
2396 for (i = 0; i < ddf->max_part; i++) {
fcc22180 2397 const struct vd_config *bvd;
2398 unsigned int ibvd;
59e36268 2399 struct vcl *v = dl->vlist[i];
fcc22180 2400 if (v == NULL ||
2401 get_pd_index_from_refnum(v, dl->disk.refnum, ddf->mppe,
2402 &bvd, &ibvd) == DDF_NOTFOUND)
59e36268 2403 continue;
9d0c6b70 2404 rv[n].start = be64_to_cpu(LBA_OFFSET(ddf, bvd)[ibvd]);
2405 rv[n].size = be64_to_cpu(bvd->blocks);
fcc22180 2406 n++;
59e36268
NB
2407 }
2408 qsort(rv, n, sizeof(*rv), cmp_extent);
2409
9d0c6b70 2410 rv[n].start = be64_to_cpu(ddf->phys->entries[dl->pdnum].config_size);
59e36268
NB
2411 rv[n].size = 0;
2412 return rv;
2413}
0e600426 2414#endif
59e36268 2415
5f8097be
NB
2416static int init_super_ddf_bvd(struct supertype *st,
2417 mdu_array_info_t *info,
2418 unsigned long long size,
2419 char *name, char *homehost,
83cd1e97 2420 int *uuid, unsigned long long data_offset)
5f8097be
NB
2421{
2422 /* We are creating a BVD inside a pre-existing container.
2423 * so st->sb is already set.
2424 * We need to create a new vd_config and a new virtual_entry
2425 */
2426 struct ddf_super *ddf = st->sb;
5aaf6c7b 2427 unsigned int venum, i;
5f8097be
NB
2428 struct virtual_entry *ve;
2429 struct vcl *vcl;
2430 struct vd_config *vc;
5f8097be 2431
fb9d0acb 2432 if (find_vde_by_name(ddf, name) != DDF_NOTFOUND) {
2433 pr_err("This ddf already has an array called %s\n", name);
5f8097be
NB
2434 return 0;
2435 }
fb9d0acb 2436 venum = find_unused_vde(ddf);
2437 if (venum == DDF_NOTFOUND) {
2438 pr_err("Cannot find spare slot for virtual disk\n");
5f8097be
NB
2439 return 0;
2440 }
2441 ve = &ddf->virt->entries[venum];
2442
2443 /* A Virtual Disk GUID contains the T10 Vendor ID, controller type,
2444 * timestamp, random number
2445 */
2446 make_header_guid(ve->guid);
a8173e43 2447 ve->unit = cpu_to_be16(info->md_minor);
5f8097be 2448 ve->pad0 = 0xFFFF;
a8173e43 2449 ve->guid_crc._v16 = crc32(0, (unsigned char *)ddf->anchor.guid,
2450 DDF_GUID_LEN);
2451 ve->type = cpu_to_be16(0);
7a7cc504
NB
2452 ve->state = DDF_state_degraded; /* Will be modified as devices are added */
2453 if (info->state & 1) /* clean */
2454 ve->init_state = DDF_init_full;
2455 else
2456 ve->init_state = DDF_init_not;
2457
5f8097be
NB
2458 memset(ve->pad1, 0xff, 14);
2459 memset(ve->name, ' ', 16);
2460 if (name)
2461 strncpy(ve->name, name, 16);
2462 ddf->virt->populated_vdes =
a8173e43 2463 cpu_to_be16(be16_to_cpu(ddf->virt->populated_vdes)+1);
5f8097be
NB
2464
2465 /* Now create a new vd_config */
3d2c4fc7
DW
2466 if (posix_memalign((void**)&vcl, 512,
2467 (offsetof(struct vcl, conf) + ddf->conf_rec_len * 512)) != 0) {
e7b84f9d 2468 pr_err("%s could not allocate vd_config\n", __func__);
3d2c4fc7
DW
2469 return 0;
2470 }
59e36268
NB
2471 vcl->vcnum = venum;
2472 vcl->block_sizes = NULL; /* FIXME not for CONCAT */
5f8097be
NB
2473 vc = &vcl->conf;
2474
2475 vc->magic = DDF_VD_CONF_MAGIC;
2476 memcpy(vc->guid, ve->guid, DDF_GUID_LEN);
60931cf9 2477 vc->timestamp = cpu_to_be32(time(0)-DECADE);
2478 vc->seqnum = cpu_to_be32(1);
5f8097be 2479 memset(vc->pad0, 0xff, 24);
5f8097be 2480 vc->chunk_shift = chunk_to_shift(info->chunk_size);
a3163bf0 2481 if (layout_md2ddf(info, vc) == -1 ||
a8173e43 2482 be16_to_cpu(vc->prim_elmnt_count) > ddf->mppe) {
a3163bf0 2483 pr_err("%s: unsupported RAID level/layout %d/%d with %d disks\n",
2484 __func__, info->level, info->layout, info->raid_disks);
2485 free(vcl);
2486 return 0;
2487 }
5f8097be 2488 vc->sec_elmnt_seq = 0;
3c48f7be 2489 if (alloc_other_bvds(ddf, vcl) != 0) {
2490 pr_err("%s could not allocate other bvds\n",
2491 __func__);
2492 free(vcl);
2493 return 0;
2494 }
9d0c6b70 2495 vc->blocks = cpu_to_be64(info->size * 2);
2496 vc->array_blocks = cpu_to_be64(
5f8097be
NB
2497 calc_array_size(info->level, info->raid_disks, info->layout,
2498 info->chunk_size, info->size*2));
2499 memset(vc->pad1, 0xff, 8);
60931cf9 2500 vc->spare_refs[0] = cpu_to_be32(0xffffffff);
2501 vc->spare_refs[1] = cpu_to_be32(0xffffffff);
2502 vc->spare_refs[2] = cpu_to_be32(0xffffffff);
2503 vc->spare_refs[3] = cpu_to_be32(0xffffffff);
2504 vc->spare_refs[4] = cpu_to_be32(0xffffffff);
2505 vc->spare_refs[5] = cpu_to_be32(0xffffffff);
2506 vc->spare_refs[6] = cpu_to_be32(0xffffffff);
2507 vc->spare_refs[7] = cpu_to_be32(0xffffffff);
5f8097be
NB
2508 memset(vc->cache_pol, 0, 8);
2509 vc->bg_rate = 0x80;
2510 memset(vc->pad2, 0xff, 3);
2511 memset(vc->pad3, 0xff, 52);
2512 memset(vc->pad4, 0xff, 192);
2513 memset(vc->v0, 0xff, 32);
2514 memset(vc->v1, 0xff, 32);
2515 memset(vc->v2, 0xff, 16);
2516 memset(vc->v3, 0xff, 16);
2517 memset(vc->vendor, 0xff, 32);
598f0d58 2518
8c3b8c2c 2519 memset(vc->phys_refnum, 0xff, 4*ddf->mppe);
e5a2a3cf 2520 memset(vc->phys_refnum+ddf->mppe, 0x00, 8*ddf->mppe);
5f8097be 2521
5aaf6c7b 2522 for (i = 1; i < vc->sec_elmnt_count; i++) {
2523 memcpy(vcl->other_bvds[i-1], vc, ddf->conf_rec_len * 512);
2524 vcl->other_bvds[i-1]->sec_elmnt_seq = i;
2525 }
2526
5f8097be
NB
2527 vcl->next = ddf->conflist;
2528 ddf->conflist = vcl;
d2ca6449 2529 ddf->currentconf = vcl;
7d5a7ff3 2530 ddf_set_updates_pending(ddf);
5f8097be
NB
2531 return 1;
2532}
2533
63eb2454 2534
0e600426 2535#ifndef MDASSEMBLE
4441541f
N
2536static int get_svd_state(const struct ddf_super *, const struct vcl *);
2537
5f8097be
NB
2538static void add_to_super_ddf_bvd(struct supertype *st,
2539 mdu_disk_info_t *dk, int fd, char *devname)
2540{
2541 /* fd and devname identify a device with-in the ddf container (st).
2542 * dk identifies a location in the new BVD.
2543 * We need to find suitable free space in that device and update
2544 * the phys_refnum and lba_offset for the newly created vd_config.
2545 * We might also want to update the type in the phys_disk
5575e7d9 2546 * section.
8592f29d
N
2547 *
2548 * Alternately: fd == -1 and we have already chosen which device to
2549 * use and recorded in dlist->raid_disk;
5f8097be
NB
2550 */
2551 struct dl *dl;
2552 struct ddf_super *ddf = st->sb;
2553 struct vd_config *vc;
f21e18ca 2554 unsigned int i;
59e36268
NB
2555 unsigned long long blocks, pos, esize;
2556 struct extent *ex;
475ccbdb 2557 unsigned int raid_disk = dk->raid_disk;
5f8097be 2558
8592f29d
N
2559 if (fd == -1) {
2560 for (dl = ddf->dlist; dl ; dl = dl->next)
2561 if (dl->raiddisk == dk->raid_disk)
2562 break;
2563 } else {
2564 for (dl = ddf->dlist; dl ; dl = dl->next)
2565 if (dl->major == dk->major &&
2566 dl->minor == dk->minor)
2567 break;
2568 }
5f8097be
NB
2569 if (!dl || ! (dk->state & (1<<MD_DISK_SYNC)))
2570 return;
2571
d2ca6449 2572 vc = &ddf->currentconf->conf;
475ccbdb 2573 if (vc->sec_elmnt_count > 1) {
a8173e43 2574 unsigned int n = be16_to_cpu(vc->prim_elmnt_count);
475ccbdb 2575 if (raid_disk >= n)
2576 vc = ddf->currentconf->other_bvds[raid_disk / n - 1];
2577 raid_disk %= n;
2578 }
59e36268
NB
2579
2580 ex = get_extents(ddf, dl);
2581 if (!ex)
2582 return;
2583
2584 i = 0; pos = 0;
9d0c6b70 2585 blocks = be64_to_cpu(vc->blocks);
d2ca6449
NB
2586 if (ddf->currentconf->block_sizes)
2587 blocks = ddf->currentconf->block_sizes[dk->raid_disk];
59e36268
NB
2588
2589 do {
2590 esize = ex[i].start - pos;
2591 if (esize >= blocks)
2592 break;
2593 pos = ex[i].start + ex[i].size;
2594 i++;
2595 } while (ex[i-1].size);
2596
2597 free(ex);
2598 if (esize < blocks)
2599 return;
2600
d2ca6449 2601 ddf->currentdev = dk->raid_disk;
475ccbdb 2602 vc->phys_refnum[raid_disk] = dl->disk.refnum;
9d0c6b70 2603 LBA_OFFSET(ddf, vc)[raid_disk] = cpu_to_be64(pos);
5f8097be 2604
f21e18ca 2605 for (i = 0; i < ddf->max_part ; i++)
5575e7d9
NB
2606 if (dl->vlist[i] == NULL)
2607 break;
2608 if (i == ddf->max_part)
2609 return;
d2ca6449 2610 dl->vlist[i] = ddf->currentconf;
5f8097be 2611
8592f29d
N
2612 if (fd >= 0)
2613 dl->fd = fd;
2614 if (devname)
2615 dl->devname = devname;
7a7cc504 2616
63eb2454 2617 /* Check if we can mark array as optimal yet */
d2ca6449 2618 i = ddf->currentconf->vcnum;
63eb2454 2619 ddf->virt->entries[i].state =
2620 (ddf->virt->entries[i].state & ~DDF_state_mask)
2621 | get_svd_state(ddf, ddf->currentconf);
a8173e43 2622 be16_clear(ddf->phys->entries[dl->pdnum].type,
2623 cpu_to_be16(DDF_Global_Spare));
2624 be16_set(ddf->phys->entries[dl->pdnum].type,
2625 cpu_to_be16(DDF_Active_in_VD));
4f9bbe63 2626 dprintf("%s: added disk %d/%08x to VD %d/%s as disk %d\n",
60931cf9 2627 __func__, dl->pdnum, be32_to_cpu(dl->disk.refnum),
4f9bbe63 2628 ddf->currentconf->vcnum, guid_str(vc->guid),
2629 dk->raid_disk);
7d5a7ff3 2630 ddf_set_updates_pending(ddf);
5f8097be
NB
2631}
2632
4a3ca8ac 2633static unsigned int find_unused_pde(const struct ddf_super *ddf)
2634{
2635 unsigned int i;
a8173e43 2636 for (i = 0; i < be16_to_cpu(ddf->phys->max_pdes); i++) {
4a3ca8ac 2637 if (all_ff(ddf->phys->entries[i].guid))
2638 return i;
2639 }
2640 return DDF_NOTFOUND;
2641}
2642
a322f70c
DW
2643/* add a device to a container, either while creating it or while
2644 * expanding a pre-existing container
2645 */
f20c3968 2646static int add_to_super_ddf(struct supertype *st,
72ca9bcf
N
2647 mdu_disk_info_t *dk, int fd, char *devname,
2648 unsigned long long data_offset)
a322f70c
DW
2649{
2650 struct ddf_super *ddf = st->sb;
2651 struct dl *dd;
2652 time_t now;
2653 struct tm *tm;
2654 unsigned long long size;
2655 struct phys_disk_entry *pde;
f21e18ca 2656 unsigned int n, i;
a322f70c 2657 struct stat stb;
90fa1a29 2658 __u32 *tptr;
a322f70c 2659
78e44928
NB
2660 if (ddf->currentconf) {
2661 add_to_super_ddf_bvd(st, dk, fd, devname);
f20c3968 2662 return 0;
78e44928
NB
2663 }
2664
a322f70c
DW
2665 /* This is device numbered dk->number. We need to create
2666 * a phys_disk entry and a more detailed disk_data entry.
2667 */
2668 fstat(fd, &stb);
4a3ca8ac 2669 n = find_unused_pde(ddf);
2670 if (n == DDF_NOTFOUND) {
2671 pr_err("%s: No free slot in array, cannot add disk\n",
2672 __func__);
2673 return 1;
2674 }
2675 pde = &ddf->phys->entries[n];
4ee8cca9 2676 get_dev_size(fd, NULL, &size);
2677 if (size <= 32*1024*1024) {
2678 pr_err("%s: device size must be at least 32MB\n",
2679 __func__);
2680 return 1;
2681 }
2682 size >>= 9;
4a3ca8ac 2683
3d2c4fc7
DW
2684 if (posix_memalign((void**)&dd, 512,
2685 sizeof(*dd) + sizeof(dd->vlist[0]) * ddf->max_part) != 0) {
e7b84f9d
N
2686 pr_err("%s could allocate buffer for new disk, aborting\n",
2687 __func__);
f20c3968 2688 return 1;
3d2c4fc7 2689 }
a322f70c
DW
2690 dd->major = major(stb.st_rdev);
2691 dd->minor = minor(stb.st_rdev);
2692 dd->devname = devname;
a322f70c 2693 dd->fd = fd;
b2280677 2694 dd->spare = NULL;
a322f70c
DW
2695
2696 dd->disk.magic = DDF_PHYS_DATA_MAGIC;
2697 now = time(0);
2698 tm = localtime(&now);
2699 sprintf(dd->disk.guid, "%8s%04d%02d%02d",
2700 T10, tm->tm_year+1900, tm->tm_mon+1, tm->tm_mday);
90fa1a29
JS
2701 tptr = (__u32 *)(dd->disk.guid + 16);
2702 *tptr++ = random32();
2703 *tptr = random32();
a322f70c 2704
59e36268
NB
2705 do {
2706 /* Cannot be bothered finding a CRC of some irrelevant details*/
60931cf9 2707 dd->disk.refnum._v32 = random32();
a8173e43 2708 for (i = be16_to_cpu(ddf->active->max_pd_entries);
f21e18ca 2709 i > 0; i--)
60931cf9 2710 if (be32_eq(ddf->phys->entries[i-1].refnum,
2711 dd->disk.refnum))
59e36268 2712 break;
f21e18ca 2713 } while (i > 0);
59e36268 2714
a322f70c
DW
2715 dd->disk.forced_ref = 1;
2716 dd->disk.forced_guid = 1;
2717 memset(dd->disk.vendor, ' ', 32);
2718 memcpy(dd->disk.vendor, "Linux", 5);
2719 memset(dd->disk.pad, 0xff, 442);
b2280677 2720 for (i = 0; i < ddf->max_part ; i++)
a322f70c
DW
2721 dd->vlist[i] = NULL;
2722
5575e7d9
NB
2723 dd->pdnum = n;
2724
2cc2983d
N
2725 if (st->update_tail) {
2726 int len = (sizeof(struct phys_disk) +
2727 sizeof(struct phys_disk_entry));
2728 struct phys_disk *pd;
2729
503975b9 2730 pd = xmalloc(len);
2cc2983d 2731 pd->magic = DDF_PHYS_RECORDS_MAGIC;
a8173e43 2732 pd->used_pdes = cpu_to_be16(n);
2cc2983d
N
2733 pde = &pd->entries[0];
2734 dd->mdupdate = pd;
4a3ca8ac 2735 } else
a8173e43 2736 ddf->phys->used_pdes = cpu_to_be16(
2737 1 + be16_to_cpu(ddf->phys->used_pdes));
a322f70c
DW
2738
2739 memcpy(pde->guid, dd->disk.guid, DDF_GUID_LEN);
2740 pde->refnum = dd->disk.refnum;
a8173e43 2741 pde->type = cpu_to_be16(DDF_Forced_PD_GUID | DDF_Global_Spare);
2742 pde->state = cpu_to_be16(DDF_Online);
4ee8cca9 2743 dd->size = size;
2744 /*
2745 * If there is already a device in dlist, try to reserve the same
2746 * amount of workspace. Otherwise, use 32MB.
2747 * We checked disk size above already.
2748 */
2749#define __calc_lba(new, old, lba, mb) do { \
2750 unsigned long long dif; \
2751 if ((old) != NULL) \
9d0c6b70 2752 dif = (old)->size - be64_to_cpu((old)->lba); \
4ee8cca9 2753 else \
2754 dif = (new)->size; \
2755 if ((new)->size > dif) \
9d0c6b70 2756 (new)->lba = cpu_to_be64((new)->size - dif); \
4ee8cca9 2757 else \
9d0c6b70 2758 (new)->lba = cpu_to_be64((new)->size - (mb*1024*2)); \
4ee8cca9 2759 } while (0)
2760 __calc_lba(dd, ddf->dlist, workspace_lba, 32);
2761 __calc_lba(dd, ddf->dlist, primary_lba, 16);
2762 __calc_lba(dd, ddf->dlist, secondary_lba, 32);
2763 pde->config_size = dd->workspace_lba;
2764
a322f70c
DW
2765 sprintf(pde->path, "%17.17s","Information: nil") ;
2766 memset(pde->pad, 0xff, 6);
2767
2cc2983d
N
2768 if (st->update_tail) {
2769 dd->next = ddf->add_list;
2770 ddf->add_list = dd;
2771 } else {
2772 dd->next = ddf->dlist;
2773 ddf->dlist = dd;
7d5a7ff3 2774 ddf_set_updates_pending(ddf);
2cc2983d 2775 }
f20c3968
DW
2776
2777 return 0;
a322f70c
DW
2778}
2779
4dd968cc
N
2780static int remove_from_super_ddf(struct supertype *st, mdu_disk_info_t *dk)
2781{
2782 struct ddf_super *ddf = st->sb;
2783 struct dl *dl;
2784
2785 /* mdmon has noticed that this disk (dk->major/dk->minor) has
2786 * disappeared from the container.
2787 * We need to arrange that it disappears from the metadata and
2788 * internal data structures too.
2789 * Most of the work is done by ddf_process_update which edits
2790 * the metadata and closes the file handle and attaches the memory
2791 * where free_updates will free it.
2792 */
2793 for (dl = ddf->dlist; dl ; dl = dl->next)
2794 if (dl->major == dk->major &&
2795 dl->minor == dk->minor)
2796 break;
2797 if (!dl)
2798 return -1;
2799
2800 if (st->update_tail) {
2801 int len = (sizeof(struct phys_disk) +
2802 sizeof(struct phys_disk_entry));
2803 struct phys_disk *pd;
2804
503975b9 2805 pd = xmalloc(len);
4dd968cc 2806 pd->magic = DDF_PHYS_RECORDS_MAGIC;
a8173e43 2807 pd->used_pdes = cpu_to_be16(dl->pdnum);
2808 pd->entries[0].state = cpu_to_be16(DDF_Missing);
4dd968cc
N
2809 append_metadata_update(st, pd, len);
2810 }
2811 return 0;
2812}
4441541f 2813#endif
4dd968cc 2814
a322f70c
DW
2815/*
2816 * This is the write_init_super method for a ddf container. It is
2817 * called when creating a container or adding another device to a
2818 * container.
2819 */
42d5dfd9 2820#define NULL_CONF_SZ 4096
18a2f463 2821
8e9387ac 2822static char *null_aligned;
273989b9
N
2823static int __write_ddf_structure(struct dl *d, struct ddf_super *ddf, __u8 type,
2824 int update)
a322f70c 2825{
7f798aca 2826 unsigned long long sector;
2827 struct ddf_header *header;
2828 int fd, i, n_config, conf_size;
a4057a88 2829 int ret = 0;
7f798aca 2830
8e9387ac 2831 if (null_aligned == NULL) {
2832 if (posix_memalign((void **)&null_aligned, 4096, NULL_CONF_SZ)
2833 != 0)
2834 return 0;
2835 memset(null_aligned, 0xff, NULL_CONF_SZ);
2836 }
2837
7f798aca 2838 fd = d->fd;
2839
2840 switch (type) {
2841 case DDF_HEADER_PRIMARY:
2842 header = &ddf->primary;
9d0c6b70 2843 sector = be64_to_cpu(header->primary_lba);
7f798aca 2844 break;
2845 case DDF_HEADER_SECONDARY:
2846 header = &ddf->secondary;
9d0c6b70 2847 sector = be64_to_cpu(header->secondary_lba);
7f798aca 2848 break;
2849 default:
2850 return 0;
2851 }
2852
2853 header->type = type;
a4057a88 2854 header->openflag = 1;
7f798aca 2855 header->crc = calc_crc(header, 512);
2856
2857 lseek64(fd, sector<<9, 0);
2858 if (write(fd, header, 512) < 0)
a4057a88 2859 goto out;
7f798aca 2860
2861 ddf->controller.crc = calc_crc(&ddf->controller, 512);
2862 if (write(fd, &ddf->controller, 512) < 0)
a4057a88 2863 goto out;
a322f70c 2864
7f798aca 2865 ddf->phys->crc = calc_crc(ddf->phys, ddf->pdsize);
2866 if (write(fd, ddf->phys, ddf->pdsize) < 0)
a4057a88 2867 goto out;
7f798aca 2868 ddf->virt->crc = calc_crc(ddf->virt, ddf->vdsize);
2869 if (write(fd, ddf->virt, ddf->vdsize) < 0)
a4057a88 2870 goto out;
7f798aca 2871
2872 /* Now write lots of config records. */
2873 n_config = ddf->max_part;
2874 conf_size = ddf->conf_rec_len * 512;
2875 for (i = 0 ; i <= n_config ; i++) {
e3c2a365 2876 struct vcl *c;
2877 struct vd_config *vdc = NULL;
2878 if (i == n_config) {
7f798aca 2879 c = (struct vcl *)d->spare;
e3c2a365 2880 if (c)
2881 vdc = &c->conf;
2882 } else {
2883 unsigned int dummy;
2884 c = d->vlist[i];
2885 if (c)
2886 get_pd_index_from_refnum(
2887 c, d->disk.refnum,
2888 ddf->mppe,
2889 (const struct vd_config **)&vdc,
2890 &dummy);
2891 }
7f798aca 2892 if (c) {
be9b9ef4 2893 dprintf("writing conf record %i on disk %08x for %s/%u\n",
60931cf9 2894 i, be32_to_cpu(d->disk.refnum),
ad60eea1 2895 guid_str(vdc->guid),
be9b9ef4 2896 vdc->sec_elmnt_seq);
dacf3dc5 2897 vdc->seqnum = header->seq;
e3c2a365 2898 vdc->crc = calc_crc(vdc, conf_size);
2899 if (write(fd, vdc, conf_size) < 0)
7f798aca 2900 break;
273989b9 2901 } else if (!update) {
7f798aca 2902 unsigned int togo = conf_size;
2903 while (togo > NULL_CONF_SZ) {
2904 if (write(fd, null_aligned, NULL_CONF_SZ) < 0)
2905 break;
2906 togo -= NULL_CONF_SZ;
2907 }
2908 if (write(fd, null_aligned, togo) < 0)
2909 break;
2910 }
2911 }
2912 if (i <= n_config)
a4057a88 2913 goto out;
7f798aca 2914
2915 d->disk.crc = calc_crc(&d->disk, 512);
2916 if (write(fd, &d->disk, 512) < 0)
a4057a88 2917 goto out;
7f798aca 2918
a4057a88 2919 ret = 1;
2920out:
2921 header->openflag = 0;
2922 header->crc = calc_crc(header, 512);
2923
2924 lseek64(fd, sector<<9, 0);
2925 if (write(fd, header, 512) < 0)
2926 ret = 0;
2927
2928 return ret;
7f798aca 2929}
2930
273989b9
N
2931static int _write_super_to_disk(struct ddf_super *ddf, struct dl *d,
2932 int update)
9bf38704 2933{
2934 unsigned long long size;
2935 int fd = d->fd;
2936 if (fd < 0)
2937 return 0;
2938
2939 /* We need to fill in the primary, (secondary) and workspace
2940 * lba's in the headers, set their checksums,
2941 * Also checksum phys, virt....
2942 *
2943 * Then write everything out, finally the anchor is written.
2944 */
2945 get_dev_size(fd, NULL, &size);
2946 size /= 512;
9d0c6b70 2947 if (be64_to_cpu(d->workspace_lba) != 0ULL)
9bf38704 2948 ddf->anchor.workspace_lba = d->workspace_lba;
2949 else
2950 ddf->anchor.workspace_lba =
9d0c6b70 2951 cpu_to_be64(size - 32*1024*2);
2952 if (be64_to_cpu(d->primary_lba) != 0ULL)
9bf38704 2953 ddf->anchor.primary_lba = d->primary_lba;
2954 else
2955 ddf->anchor.primary_lba =
9d0c6b70 2956 cpu_to_be64(size - 16*1024*2);
2957 if (be64_to_cpu(d->secondary_lba) != 0ULL)
9bf38704 2958 ddf->anchor.secondary_lba = d->secondary_lba;
2959 else
2960 ddf->anchor.secondary_lba =
9d0c6b70 2961 cpu_to_be64(size - 32*1024*2);
9bf38704 2962 ddf->anchor.seq = ddf->active->seq;
2963 memcpy(&ddf->primary, &ddf->anchor, 512);
2964 memcpy(&ddf->secondary, &ddf->anchor, 512);
2965
2966 ddf->anchor.openflag = 0xFF; /* 'open' means nothing */
60931cf9 2967 ddf->anchor.seq = cpu_to_be32(0xFFFFFFFF); /* no sequencing in anchor */
9bf38704 2968 ddf->anchor.crc = calc_crc(&ddf->anchor, 512);
2969
273989b9 2970 if (!__write_ddf_structure(d, ddf, DDF_HEADER_PRIMARY, update))
9bf38704 2971 return 0;
2972
273989b9 2973 if (!__write_ddf_structure(d, ddf, DDF_HEADER_SECONDARY, update))
9bf38704 2974 return 0;
2975
2976 lseek64(fd, (size-1)*512, SEEK_SET);
2977 if (write(fd, &ddf->anchor, 512) < 0)
2978 return 0;
2979
2980 return 1;
2981}
2982
4441541f 2983#ifndef MDASSEMBLE
273989b9 2984static int __write_init_super_ddf(struct supertype *st, int update)
7f798aca 2985{
a322f70c 2986 struct ddf_super *ddf = st->sb;
a322f70c 2987 struct dl *d;
175593bf
DW
2988 int attempts = 0;
2989 int successes = 0;
42d5dfd9 2990
7d5a7ff3 2991 pr_state(ddf, __func__);
a322f70c 2992
175593bf
DW
2993 /* try to write updated metadata,
2994 * if we catch a failure move on to the next disk
2995 */
a322f70c 2996 for (d = ddf->dlist; d; d=d->next) {
175593bf 2997 attempts++;
273989b9 2998 successes += _write_super_to_disk(ddf, d, update);
175593bf
DW
2999 }
3000
175593bf 3001 return attempts != successes;
a322f70c 3002}
7a7cc504
NB
3003
3004static int write_init_super_ddf(struct supertype *st)
3005{
9b1fb677
DW
3006 struct ddf_super *ddf = st->sb;
3007 struct vcl *currentconf = ddf->currentconf;
3008
3009 /* we are done with currentconf reset it to point st at the container */
3010 ddf->currentconf = NULL;
edd8d13c
NB
3011
3012 if (st->update_tail) {
3013 /* queue the virtual_disk and vd_config as metadata updates */
3014 struct virtual_disk *vd;
3015 struct vd_config *vc;
c5943560 3016 int len, tlen;
3017 unsigned int i;
edd8d13c 3018
9b1fb677 3019 if (!currentconf) {
2cc2983d
N
3020 int len = (sizeof(struct phys_disk) +
3021 sizeof(struct phys_disk_entry));
3022
3023 /* adding a disk to the container. */
3024 if (!ddf->add_list)
3025 return 0;
3026
3027 append_metadata_update(st, ddf->add_list->mdupdate, len);
3028 ddf->add_list->mdupdate = NULL;
3029 return 0;
3030 }
3031
3032 /* Newly created VD */
3033
edd8d13c
NB
3034 /* First the virtual disk. We have a slightly fake header */
3035 len = sizeof(struct virtual_disk) + sizeof(struct virtual_entry);
503975b9 3036 vd = xmalloc(len);
edd8d13c 3037 *vd = *ddf->virt;
9b1fb677 3038 vd->entries[0] = ddf->virt->entries[currentconf->vcnum];
a8173e43 3039 vd->populated_vdes = cpu_to_be16(currentconf->vcnum);
edd8d13c
NB
3040 append_metadata_update(st, vd, len);
3041
3042 /* Then the vd_config */
3043 len = ddf->conf_rec_len * 512;
c5943560 3044 tlen = len * currentconf->conf.sec_elmnt_count;
3045 vc = xmalloc(tlen);
9b1fb677 3046 memcpy(vc, &currentconf->conf, len);
c5943560 3047 for (i = 1; i < currentconf->conf.sec_elmnt_count; i++)
3048 memcpy((char *)vc + i*len, currentconf->other_bvds[i-1],
3049 len);
3050 append_metadata_update(st, vc, tlen);
edd8d13c
NB
3051
3052 /* FIXME I need to close the fds! */
3053 return 0;
613b0d17 3054 } else {
d682f344 3055 struct dl *d;
19041058 3056 if (!currentconf)
3057 for (d = ddf->dlist; d; d=d->next)
3058 while (Kill(d->devname, NULL, 0, -1, 1) == 0);
273989b9 3059 return __write_init_super_ddf(st, 0);
d682f344 3060 }
7a7cc504
NB
3061}
3062
a322f70c
DW
3063#endif
3064
387fcd59
N
3065static __u64 avail_size_ddf(struct supertype *st, __u64 devsize,
3066 unsigned long long data_offset)
a322f70c
DW
3067{
3068 /* We must reserve the last 32Meg */
3069 if (devsize <= 32*1024*2)
3070 return 0;
3071 return devsize - 32*1024*2;
3072}
3073
3074#ifndef MDASSEMBLE
8592f29d
N
3075
3076static int reserve_space(struct supertype *st, int raiddisks,
3077 unsigned long long size, int chunk,
3078 unsigned long long *freesize)
3079{
3080 /* Find 'raiddisks' spare extents at least 'size' big (but
3081 * only caring about multiples of 'chunk') and remember
3082 * them.
3083 * If the cannot be found, fail.
3084 */
3085 struct dl *dl;
3086 struct ddf_super *ddf = st->sb;
3087 int cnt = 0;
3088
3089 for (dl = ddf->dlist; dl ; dl=dl->next) {
613b0d17 3090 dl->raiddisk = -1;
8592f29d
N
3091 dl->esize = 0;
3092 }
3093 /* Now find largest extent on each device */
3094 for (dl = ddf->dlist ; dl ; dl=dl->next) {
3095 struct extent *e = get_extents(ddf, dl);
3096 unsigned long long pos = 0;
3097 int i = 0;
3098 int found = 0;
3099 unsigned long long minsize = size;
3100
3101 if (size == 0)
3102 minsize = chunk;
3103
3104 if (!e)
3105 continue;
3106 do {
3107 unsigned long long esize;
3108 esize = e[i].start - pos;
3109 if (esize >= minsize) {
3110 found = 1;
3111 minsize = esize;
3112 }
3113 pos = e[i].start + e[i].size;
3114 i++;
3115 } while (e[i-1].size);
3116 if (found) {
3117 cnt++;
3118 dl->esize = minsize;
3119 }
3120 free(e);
3121 }
3122 if (cnt < raiddisks) {
e7b84f9d 3123 pr_err("not enough devices with space to create array.\n");
8592f29d
N
3124 return 0; /* No enough free spaces large enough */
3125 }
3126 if (size == 0) {
3127 /* choose the largest size of which there are at least 'raiddisk' */
3128 for (dl = ddf->dlist ; dl ; dl=dl->next) {
3129 struct dl *dl2;
3130 if (dl->esize <= size)
3131 continue;
3132 /* This is bigger than 'size', see if there are enough */
3133 cnt = 0;
7b80ad6a 3134 for (dl2 = ddf->dlist; dl2 ; dl2=dl2->next)
8592f29d
N
3135 if (dl2->esize >= dl->esize)
3136 cnt++;
3137 if (cnt >= raiddisks)
3138 size = dl->esize;
3139 }
3140 if (chunk) {
3141 size = size / chunk;
3142 size *= chunk;
3143 }
3144 *freesize = size;
3145 if (size < 32) {
e7b84f9d 3146 pr_err("not enough spare devices to create array.\n");
8592f29d
N
3147 return 0;
3148 }
3149 }
3150 /* We have a 'size' of which there are enough spaces.
3151 * We simply do a first-fit */
3152 cnt = 0;
3153 for (dl = ddf->dlist ; dl && cnt < raiddisks ; dl=dl->next) {
3154 if (dl->esize < size)
3155 continue;
613b0d17 3156
8592f29d
N
3157 dl->raiddisk = cnt;
3158 cnt++;
3159 }
3160 return 1;
3161}
3162
2c514b71
NB
3163static int
3164validate_geometry_ddf_container(struct supertype *st,
3165 int level, int layout, int raiddisks,
3166 int chunk, unsigned long long size,
af4348dd 3167 unsigned long long data_offset,
2c514b71
NB
3168 char *dev, unsigned long long *freesize,
3169 int verbose);
78e44928
NB
3170
3171static int validate_geometry_ddf_bvd(struct supertype *st,
3172 int level, int layout, int raiddisks,
c21e737b 3173 int *chunk, unsigned long long size,
af4348dd 3174 unsigned long long data_offset,
2c514b71
NB
3175 char *dev, unsigned long long *freesize,
3176 int verbose);
78e44928
NB
3177
3178static int validate_geometry_ddf(struct supertype *st,
2c514b71 3179 int level, int layout, int raiddisks,
c21e737b 3180 int *chunk, unsigned long long size,
af4348dd 3181 unsigned long long data_offset,
2c514b71
NB
3182 char *dev, unsigned long long *freesize,
3183 int verbose)
a322f70c
DW
3184{
3185 int fd;
3186 struct mdinfo *sra;
3187 int cfd;
3188
3189 /* ddf potentially supports lots of things, but it depends on
3190 * what devices are offered (and maybe kernel version?)
3191 * If given unused devices, we will make a container.
3192 * If given devices in a container, we will make a BVD.
3193 * If given BVDs, we make an SVD, changing all the GUIDs in the process.
3194 */
3195
7ccc4cc4 3196 if (*chunk == UnSet)
bb7295f1
N
3197 *chunk = DEFAULT_CHUNK;
3198
542ef4ec 3199 if (level == -1000000) level = LEVEL_CONTAINER;
a322f70c 3200 if (level == LEVEL_CONTAINER) {
78e44928
NB
3201 /* Must be a fresh device to add to a container */
3202 return validate_geometry_ddf_container(st, level, layout,
7ccc4cc4 3203 raiddisks, *chunk,
af4348dd
N
3204 size, data_offset, dev,
3205 freesize,
2c514b71 3206 verbose);
5f8097be
NB
3207 }
3208
78e44928 3209 if (!dev) {
a3163bf0 3210 mdu_array_info_t array = {
3211 .level = level, .layout = layout,
3212 .raid_disks = raiddisks
3213 };
3214 struct vd_config conf;
3215 if (layout_md2ddf(&array, &conf) == -1) {
b42f577a 3216 if (verbose)
94b08b7c 3217 pr_err("DDF does not support level %d /layout %d arrays with %d disks\n",
3218 level, layout, raiddisks);
78e44928 3219 return 0;
b42f577a 3220 }
78e44928 3221 /* Should check layout? etc */
8592f29d
N
3222
3223 if (st->sb && freesize) {
3224 /* --create was given a container to create in.
3225 * So we need to check that there are enough
3226 * free spaces and return the amount of space.
3227 * We may as well remember which drives were
3228 * chosen so that add_to_super/getinfo_super
3229 * can return them.
3230 */
7ccc4cc4 3231 return reserve_space(st, raiddisks, size, *chunk, freesize);
8592f29d 3232 }
a322f70c 3233 return 1;
78e44928 3234 }
a322f70c 3235
8592f29d
N
3236 if (st->sb) {
3237 /* A container has already been opened, so we are
3238 * creating in there. Maybe a BVD, maybe an SVD.
3239 * Should make a distinction one day.
3240 */
3241 return validate_geometry_ddf_bvd(st, level, layout, raiddisks,
af4348dd
N
3242 chunk, size, data_offset, dev,
3243 freesize,
8592f29d
N
3244 verbose);
3245 }
78e44928
NB
3246 /* This is the first device for the array.
3247 * If it is a container, we read it in and do automagic allocations,
3248 * no other devices should be given.
3249 * Otherwise it must be a member device of a container, and we
3250 * do manual allocation.
3251 * Later we should check for a BVD and make an SVD.
a322f70c 3252 */
a322f70c
DW
3253 fd = open(dev, O_RDONLY|O_EXCL, 0);
3254 if (fd >= 0) {
4dd2df09 3255 sra = sysfs_read(fd, NULL, GET_VERSION);
a322f70c
DW
3256 close(fd);
3257 if (sra && sra->array.major_version == -1 &&
78e44928
NB
3258 strcmp(sra->text_version, "ddf") == 0) {
3259
3260 /* load super */
3261 /* find space for 'n' devices. */
3262 /* remember the devices */
3263 /* Somehow return the fact that we have enough */
a322f70c
DW
3264 }
3265
2c514b71 3266 if (verbose)
e7b84f9d
N
3267 pr_err("ddf: Cannot create this array "
3268 "on device %s - a container is required.\n",
3269 dev);
a322f70c
DW
3270 return 0;
3271 }
3272 if (errno != EBUSY || (fd = open(dev, O_RDONLY, 0)) < 0) {
2c514b71 3273 if (verbose)
e7b84f9d 3274 pr_err("ddf: Cannot open %s: %s\n",
613b0d17 3275 dev, strerror(errno));
a322f70c
DW
3276 return 0;
3277 }
3278 /* Well, it is in use by someone, maybe a 'ddf' container. */
3279 cfd = open_container(fd);
3280 if (cfd < 0) {
3281 close(fd);
2c514b71 3282 if (verbose)
e7b84f9d 3283 pr_err("ddf: Cannot use %s: %s\n",
613b0d17 3284 dev, strerror(EBUSY));
a322f70c
DW
3285 return 0;
3286 }
4dd2df09 3287 sra = sysfs_read(cfd, NULL, GET_VERSION);
a322f70c
DW
3288 close(fd);
3289 if (sra && sra->array.major_version == -1 &&
3290 strcmp(sra->text_version, "ddf") == 0) {
3291 /* This is a member of a ddf container. Load the container
3292 * and try to create a bvd
3293 */
3294 struct ddf_super *ddf;
e1902a7b 3295 if (load_super_ddf_all(st, cfd, (void **)&ddf, NULL) == 0) {
5f8097be 3296 st->sb = ddf;
4dd2df09 3297 strcpy(st->container_devnm, fd2devnm(cfd));
a322f70c 3298 close(cfd);
78e44928 3299 return validate_geometry_ddf_bvd(st, level, layout,
a322f70c 3300 raiddisks, chunk, size,
af4348dd 3301 data_offset,
2c514b71
NB
3302 dev, freesize,
3303 verbose);
a322f70c
DW
3304 }
3305 close(cfd);
c42ec1ed
DW
3306 } else /* device may belong to a different container */
3307 return 0;
3308
a322f70c
DW
3309 return 1;
3310}
3311
2c514b71
NB
3312static int
3313validate_geometry_ddf_container(struct supertype *st,
3314 int level, int layout, int raiddisks,
3315 int chunk, unsigned long long size,
af4348dd 3316 unsigned long long data_offset,
2c514b71
NB
3317 char *dev, unsigned long long *freesize,
3318 int verbose)
a322f70c
DW
3319{
3320 int fd;
3321 unsigned long long ldsize;
3322
3323 if (level != LEVEL_CONTAINER)
3324 return 0;
3325 if (!dev)
3326 return 1;
3327
3328 fd = open(dev, O_RDONLY|O_EXCL, 0);
3329 if (fd < 0) {
2c514b71 3330 if (verbose)
e7b84f9d 3331 pr_err("ddf: Cannot open %s: %s\n",
613b0d17 3332 dev, strerror(errno));
a322f70c
DW
3333 return 0;
3334 }
3335 if (!get_dev_size(fd, dev, &ldsize)) {
3336 close(fd);
3337 return 0;
3338 }
3339 close(fd);
3340
387fcd59 3341 *freesize = avail_size_ddf(st, ldsize >> 9, INVALID_SECTORS);
ea17e7aa
N
3342 if (*freesize == 0)
3343 return 0;
a322f70c
DW
3344
3345 return 1;
3346}
3347
78e44928
NB
3348static int validate_geometry_ddf_bvd(struct supertype *st,
3349 int level, int layout, int raiddisks,
c21e737b 3350 int *chunk, unsigned long long size,
af4348dd 3351 unsigned long long data_offset,
2c514b71
NB
3352 char *dev, unsigned long long *freesize,
3353 int verbose)
a322f70c
DW
3354{
3355 struct stat stb;
3356 struct ddf_super *ddf = st->sb;
3357 struct dl *dl;
5f8097be
NB
3358 unsigned long long pos = 0;
3359 unsigned long long maxsize;
3360 struct extent *e;
3361 int i;
a322f70c 3362 /* ddf/bvd supports lots of things, but not containers */
b42f577a
N
3363 if (level == LEVEL_CONTAINER) {
3364 if (verbose)
e7b84f9d 3365 pr_err("DDF cannot create a container within an container\n");
a322f70c 3366 return 0;
b42f577a 3367 }
a322f70c
DW
3368 /* We must have the container info already read in. */
3369 if (!ddf)
3370 return 0;
3371
5f8097be
NB
3372 if (!dev) {
3373 /* General test: make sure there is space for
3374 * 'raiddisks' device extents of size 'size'.
3375 */
3376 unsigned long long minsize = size;
3377 int dcnt = 0;
3378 if (minsize == 0)
3379 minsize = 8;
3380 for (dl = ddf->dlist; dl ; dl = dl->next)
3381 {
3382 int found = 0;
7e1432fb 3383 pos = 0;
5f8097be
NB
3384
3385 i = 0;
3386 e = get_extents(ddf, dl);
3387 if (!e) continue;
3388 do {
3389 unsigned long long esize;
3390 esize = e[i].start - pos;
3391 if (esize >= minsize)
3392 found = 1;
3393 pos = e[i].start + e[i].size;
3394 i++;
3395 } while (e[i-1].size);
3396 if (found)
3397 dcnt++;
3398 free(e);
3399 }
3400 if (dcnt < raiddisks) {
2c514b71 3401 if (verbose)
e7b84f9d
N
3402 pr_err("ddf: Not enough devices with "
3403 "space for this array (%d < %d)\n",
3404 dcnt, raiddisks);
5f8097be
NB
3405 return 0;
3406 }
3407 return 1;
3408 }
a322f70c
DW
3409 /* This device must be a member of the set */
3410 if (stat(dev, &stb) < 0)
3411 return 0;
3412 if ((S_IFMT & stb.st_mode) != S_IFBLK)
3413 return 0;
3414 for (dl = ddf->dlist ; dl ; dl = dl->next) {
f21e18ca
N
3415 if (dl->major == (int)major(stb.st_rdev) &&
3416 dl->minor == (int)minor(stb.st_rdev))
a322f70c
DW
3417 break;
3418 }
5f8097be 3419 if (!dl) {
2c514b71 3420 if (verbose)
e7b84f9d 3421 pr_err("ddf: %s is not in the "
613b0d17
N
3422 "same DDF set\n",
3423 dev);
5f8097be
NB
3424 return 0;
3425 }
3426 e = get_extents(ddf, dl);
3427 maxsize = 0;
3428 i = 0;
3429 if (e) do {
613b0d17
N
3430 unsigned long long esize;
3431 esize = e[i].start - pos;
3432 if (esize >= maxsize)
3433 maxsize = esize;
3434 pos = e[i].start + e[i].size;
3435 i++;
3436 } while (e[i-1].size);
5f8097be 3437 *freesize = maxsize;
a322f70c
DW
3438 // FIXME here I am
3439
3440 return 1;
3441}
59e36268 3442
a322f70c 3443static int load_super_ddf_all(struct supertype *st, int fd,
e1902a7b 3444 void **sbp, char *devname)
a322f70c
DW
3445{
3446 struct mdinfo *sra;
3447 struct ddf_super *super;
3448 struct mdinfo *sd, *best = NULL;
3449 int bestseq = 0;
3450 int seq;
3451 char nm[20];
3452 int dfd;
3453
b526e52d 3454 sra = sysfs_read(fd, 0, GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE);
a322f70c
DW
3455 if (!sra)
3456 return 1;
3457 if (sra->array.major_version != -1 ||
3458 sra->array.minor_version != -2 ||
3459 strcmp(sra->text_version, "ddf") != 0)
3460 return 1;
3461
6416d527 3462 if (posix_memalign((void**)&super, 512, sizeof(*super)) != 0)
a322f70c 3463 return 1;
a2349791 3464 memset(super, 0, sizeof(*super));
a322f70c
DW
3465
3466 /* first, try each device, and choose the best ddf */
3467 for (sd = sra->devs ; sd ; sd = sd->next) {
3468 int rv;
3469 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
7a7cc504
NB
3470 dfd = dev_open(nm, O_RDONLY);
3471 if (dfd < 0)
a322f70c
DW
3472 return 2;
3473 rv = load_ddf_headers(dfd, super, NULL);
7a7cc504 3474 close(dfd);
a322f70c 3475 if (rv == 0) {
60931cf9 3476 seq = be32_to_cpu(super->active->seq);
a322f70c
DW
3477 if (super->active->openflag)
3478 seq--;
3479 if (!best || seq > bestseq) {
3480 bestseq = seq;
3481 best = sd;
3482 }
3483 }
3484 }
3485 if (!best)
3486 return 1;
3487 /* OK, load this ddf */
3488 sprintf(nm, "%d:%d", best->disk.major, best->disk.minor);
3489 dfd = dev_open(nm, O_RDONLY);
7a7cc504 3490 if (dfd < 0)
a322f70c
DW
3491 return 1;
3492 load_ddf_headers(dfd, super, NULL);
3493 load_ddf_global(dfd, super, NULL);
3494 close(dfd);
3495 /* Now we need the device-local bits */
3496 for (sd = sra->devs ; sd ; sd = sd->next) {
3d2c4fc7
DW
3497 int rv;
3498
a322f70c 3499 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
e1902a7b 3500 dfd = dev_open(nm, O_RDWR);
7a7cc504 3501 if (dfd < 0)
a322f70c 3502 return 2;
3d2c4fc7
DW
3503 rv = load_ddf_headers(dfd, super, NULL);
3504 if (rv == 0)
e1902a7b 3505 rv = load_ddf_local(dfd, super, NULL, 1);
3d2c4fc7
DW
3506 if (rv)
3507 return 1;
a322f70c 3508 }
33414a01 3509
a322f70c
DW
3510 *sbp = super;
3511 if (st->ss == NULL) {
78e44928 3512 st->ss = &super_ddf;
a322f70c
DW
3513 st->minor_version = 0;
3514 st->max_devs = 512;
3515 }
4dd2df09 3516 strcpy(st->container_devnm, fd2devnm(fd));
a322f70c
DW
3517 return 0;
3518}
2b959fbf
N
3519
3520static int load_container_ddf(struct supertype *st, int fd,
3521 char *devname)
3522{
3523 return load_super_ddf_all(st, fd, &st->sb, devname);
3524}
3525
0e600426 3526#endif /* MDASSEMBLE */
a322f70c 3527
a5c7adb3 3528static int check_secondary(const struct vcl *vc)
3529{
3530 const struct vd_config *conf = &vc->conf;
3531 int i;
3532
3533 /* The only DDF secondary RAID level md can support is
3534 * RAID 10, if the stripe sizes and Basic volume sizes
3535 * are all equal.
3536 * Other configurations could in theory be supported by exposing
3537 * the BVDs to user space and using device mapper for the secondary
3538 * mapping. So far we don't support that.
3539 */
3540
3541 __u64 sec_elements[4] = {0, 0, 0, 0};
3542#define __set_sec_seen(n) (sec_elements[(n)>>6] |= (1<<((n)&63)))
3543#define __was_sec_seen(n) ((sec_elements[(n)>>6] & (1<<((n)&63))) != 0)
3544
3545 if (vc->other_bvds == NULL) {
3546 pr_err("No BVDs for secondary RAID found\n");
3547 return -1;
3548 }
3549 if (conf->prl != DDF_RAID1) {
3550 pr_err("Secondary RAID level only supported for mirrored BVD\n");
3551 return -1;
3552 }
3553 if (conf->srl != DDF_2STRIPED && conf->srl != DDF_2SPANNED) {
3554 pr_err("Secondary RAID level %d is unsupported\n",
3555 conf->srl);
3556 return -1;
3557 }
3558 __set_sec_seen(conf->sec_elmnt_seq);
3559 for (i = 0; i < conf->sec_elmnt_count-1; i++) {
3560 const struct vd_config *bvd = vc->other_bvds[i];
3c48f7be 3561 if (bvd->sec_elmnt_seq == DDF_UNUSED_BVD)
c98567ba 3562 continue;
a5c7adb3 3563 if (bvd->srl != conf->srl) {
3564 pr_err("Inconsistent secondary RAID level across BVDs\n");
3565 return -1;
3566 }
3567 if (bvd->prl != conf->prl) {
3568 pr_err("Different RAID levels for BVDs are unsupported\n");
3569 return -1;
3570 }
a8173e43 3571 if (!be16_eq(bvd->prim_elmnt_count, conf->prim_elmnt_count)) {
a5c7adb3 3572 pr_err("All BVDs must have the same number of primary elements\n");
3573 return -1;
3574 }
3575 if (bvd->chunk_shift != conf->chunk_shift) {
3576 pr_err("Different strip sizes for BVDs are unsupported\n");
3577 return -1;
3578 }
9d0c6b70 3579 if (!be64_eq(bvd->array_blocks, conf->array_blocks)) {
a5c7adb3 3580 pr_err("Different BVD sizes are unsupported\n");
3581 return -1;
3582 }
3583 __set_sec_seen(bvd->sec_elmnt_seq);
3584 }
3585 for (i = 0; i < conf->sec_elmnt_count; i++) {
3586 if (!__was_sec_seen(i)) {
3587 pr_err("BVD %d is missing\n", i);
3588 return -1;
3589 }
3590 }
3591 return 0;
3592}
3593
8a38db86 3594static unsigned int get_pd_index_from_refnum(const struct vcl *vc,
60931cf9 3595 be32 refnum, unsigned int nmax,
4e587018 3596 const struct vd_config **bvd,
3597 unsigned int *idx)
8a38db86 3598{
4e587018 3599 unsigned int i, j, n, sec, cnt;
3600
a8173e43 3601 cnt = be16_to_cpu(vc->conf.prim_elmnt_count);
4e587018 3602 sec = (vc->conf.sec_elmnt_count == 1 ? 0 : vc->conf.sec_elmnt_seq);
3603
3604 for (i = 0, j = 0 ; i < nmax ; i++) {
3605 /* j counts valid entries for this BVD */
60931cf9 3606 if (be32_to_cpu(vc->conf.phys_refnum[i]) != 0xffffffff)
4e587018 3607 j++;
60931cf9 3608 if (be32_eq(vc->conf.phys_refnum[i], refnum)) {
4e587018 3609 *bvd = &vc->conf;
3610 *idx = i;
3611 return sec * cnt + j - 1;
3612 }
3613 }
3614 if (vc->other_bvds == NULL)
3615 goto bad;
3616
3617 for (n = 1; n < vc->conf.sec_elmnt_count; n++) {
3618 struct vd_config *vd = vc->other_bvds[n-1];
4e587018 3619 sec = vd->sec_elmnt_seq;
3c48f7be 3620 if (sec == DDF_UNUSED_BVD)
3621 continue;
4e587018 3622 for (i = 0, j = 0 ; i < nmax ; i++) {
60931cf9 3623 if (be32_to_cpu(vd->phys_refnum[i]) != 0xffffffff)
4e587018 3624 j++;
60931cf9 3625 if (be32_eq(vd->phys_refnum[i], refnum)) {
4e587018 3626 *bvd = vd;
3627 *idx = i;
3628 return sec * cnt + j - 1;
3629 }
3630 }
3631 }
3632bad:
3633 *bvd = NULL;
d6e7b083 3634 return DDF_NOTFOUND;
8a38db86 3635}
3636
00bbdbda 3637static struct mdinfo *container_content_ddf(struct supertype *st, char *subarray)
598f0d58
NB
3638{
3639 /* Given a container loaded by load_super_ddf_all,
3640 * extract information about all the arrays into
3641 * an mdinfo tree.
3642 *
3643 * For each vcl in conflist: create an mdinfo, fill it in,
3644 * then look for matching devices (phys_refnum) in dlist
3645 * and create appropriate device mdinfo.
3646 */
3647 struct ddf_super *ddf = st->sb;
3648 struct mdinfo *rest = NULL;
3649 struct vcl *vc;
3650
3651 for (vc = ddf->conflist ; vc ; vc=vc->next)
3652 {
f21e18ca
N
3653 unsigned int i;
3654 unsigned int j;
598f0d58 3655 struct mdinfo *this;
00bbdbda 3656 char *ep;
90fa1a29 3657 __u32 *cptr;
8a38db86 3658 unsigned int pd;
00bbdbda
N
3659
3660 if (subarray &&
3661 (strtoul(subarray, &ep, 10) != vc->vcnum ||
3662 *ep != '\0'))
3663 continue;
3664
a5c7adb3 3665 if (vc->conf.sec_elmnt_count > 1) {
3666 if (check_secondary(vc) != 0)
3667 continue;
3668 }
3669
503975b9 3670 this = xcalloc(1, sizeof(*this));
598f0d58
NB
3671 this->next = rest;
3672 rest = this;
3673
8a2848a7 3674 if (layout_ddf2md(&vc->conf, &this->array))
3675 continue;
598f0d58 3676 this->array.md_minor = -1;
f35f2525
N
3677 this->array.major_version = -1;
3678 this->array.minor_version = -2;
90fa1a29
JS
3679 cptr = (__u32 *)(vc->conf.guid + 16);
3680 this->array.ctime = DECADE + __be32_to_cpu(*cptr);
598f0d58 3681 this->array.utime = DECADE +
60931cf9 3682 be32_to_cpu(vc->conf.timestamp);
598f0d58
NB
3683 this->array.chunk_size = 512 << vc->conf.chunk_shift;
3684
59e36268 3685 i = vc->vcnum;
7a7cc504
NB
3686 if ((ddf->virt->entries[i].state & DDF_state_inconsistent) ||
3687 (ddf->virt->entries[i].init_state & DDF_initstate_mask) !=
ed9d66aa 3688 DDF_init_full) {
598f0d58 3689 this->array.state = 0;
ed9d66aa
NB
3690 this->resync_start = 0;
3691 } else {
598f0d58 3692 this->array.state = 1;
b7528a20 3693 this->resync_start = MaxSector;
ed9d66aa 3694 }
db42fa9b
N
3695 memcpy(this->name, ddf->virt->entries[i].name, 16);
3696 this->name[16]=0;
3697 for(j=0; j<16; j++)
3698 if (this->name[j] == ' ')
3699 this->name[j] = 0;
598f0d58
NB
3700
3701 memset(this->uuid, 0, sizeof(this->uuid));
9d0c6b70 3702 this->component_size = be64_to_cpu(vc->conf.blocks);
598f0d58 3703 this->array.size = this->component_size / 2;
5f2aace8 3704 this->container_member = i;
598f0d58 3705
c5afc314
N
3706 ddf->currentconf = vc;
3707 uuid_from_super_ddf(st, this->uuid);
f646805e 3708 if (!subarray)
3709 ddf->currentconf = NULL;
c5afc314 3710
60f18132 3711 sprintf(this->text_version, "/%s/%d",
4dd2df09 3712 st->container_devnm, this->container_member);
60f18132 3713
a8173e43 3714 for (pd = 0; pd < be16_to_cpu(ddf->phys->used_pdes); pd++) {
598f0d58
NB
3715 struct mdinfo *dev;
3716 struct dl *d;
4e587018 3717 const struct vd_config *bvd;
3718 unsigned int iphys;
fa033bec 3719 int stt;
598f0d58 3720
60931cf9 3721 if (be32_to_cpu(ddf->phys->entries[pd].refnum)
3722 == 0xFFFFFFFF)
bc17324f 3723 continue;
0cf5ef67 3724
a8173e43 3725 stt = be16_to_cpu(ddf->phys->entries[pd].state);
fa033bec
N
3726 if ((stt & (DDF_Online|DDF_Failed|DDF_Rebuilding))
3727 != DDF_Online)
3728 continue;
3729
8a38db86 3730 i = get_pd_index_from_refnum(
4e587018 3731 vc, ddf->phys->entries[pd].refnum,
3732 ddf->mppe, &bvd, &iphys);
d6e7b083 3733 if (i == DDF_NOTFOUND)
8a38db86 3734 continue;
3735
fa033bec 3736 this->array.working_disks++;
bc17324f 3737
0cf5ef67 3738 for (d = ddf->dlist; d ; d=d->next)
60931cf9 3739 if (be32_eq(d->disk.refnum,
3740 ddf->phys->entries[pd].refnum))
0cf5ef67
N
3741 break;
3742 if (d == NULL)
3743 /* Haven't found that one yet, maybe there are others */
3744 continue;
3745
503975b9 3746 dev = xcalloc(1, sizeof(*dev));
598f0d58
NB
3747 dev->next = this->devs;
3748 this->devs = dev;
3749
60931cf9 3750 dev->disk.number = be32_to_cpu(d->disk.refnum);
598f0d58
NB
3751 dev->disk.major = d->major;
3752 dev->disk.minor = d->minor;
3753 dev->disk.raid_disk = i;
3754 dev->disk.state = (1<<MD_DISK_SYNC)|(1<<MD_DISK_ACTIVE);
d23534e4 3755 dev->recovery_start = MaxSector;
598f0d58 3756
60931cf9 3757 dev->events = be32_to_cpu(ddf->primary.seq);
57a66662 3758 dev->data_offset =
9d0c6b70 3759 be64_to_cpu(LBA_OFFSET(ddf, bvd)[iphys]);
3760 dev->component_size = be64_to_cpu(bvd->blocks);
598f0d58
NB
3761 if (d->devname)
3762 strcpy(dev->name, d->devname);
3763 }
3764 }
3765 return rest;
3766}
3767
955e9ea1 3768static int store_super_ddf(struct supertype *st, int fd)
a322f70c 3769{
955e9ea1 3770 struct ddf_super *ddf = st->sb;
a322f70c 3771 unsigned long long dsize;
6416d527 3772 void *buf;
3d2c4fc7 3773 int rc;
a322f70c 3774
955e9ea1
DW
3775 if (!ddf)
3776 return 1;
3777
a322f70c
DW
3778 if (!get_dev_size(fd, NULL, &dsize))
3779 return 1;
3780
dbf98368 3781 if (ddf->dlist || ddf->conflist) {
3782 struct stat sta;
3783 struct dl *dl;
3784 int ofd, ret;
3785
3786 if (fstat(fd, &sta) == -1 || !S_ISBLK(sta.st_mode)) {
3787 pr_err("%s: file descriptor for invalid device\n",
3788 __func__);
3789 return 1;
3790 }
3791 for (dl = ddf->dlist; dl; dl = dl->next)
3792 if (dl->major == (int)major(sta.st_rdev) &&
3793 dl->minor == (int)minor(sta.st_rdev))
3794 break;
3795 if (!dl) {
3796 pr_err("%s: couldn't find disk %d/%d\n", __func__,
3797 (int)major(sta.st_rdev),
3798 (int)minor(sta.st_rdev));
3799 return 1;
3800 }
dbf98368 3801 ofd = dl->fd;
3802 dl->fd = fd;
273989b9 3803 ret = (_write_super_to_disk(ddf, dl, 0) != 1);
dbf98368 3804 dl->fd = ofd;
3805 return ret;
3806 }
3807
3d2c4fc7
DW
3808 if (posix_memalign(&buf, 512, 512) != 0)
3809 return 1;
6416d527
NB
3810 memset(buf, 0, 512);
3811
a322f70c 3812 lseek64(fd, dsize-512, 0);
3d2c4fc7 3813 rc = write(fd, buf, 512);
6416d527 3814 free(buf);
3d2c4fc7
DW
3815 if (rc < 0)
3816 return 1;
a322f70c
DW
3817 return 0;
3818}
3819
a19c88b8
NB
3820static int compare_super_ddf(struct supertype *st, struct supertype *tst)
3821{
3822 /*
3823 * return:
3824 * 0 same, or first was empty, and second was copied
3825 * 1 second had wrong number
3826 * 2 wrong uuid
3827 * 3 wrong other info
3828 */
3829 struct ddf_super *first = st->sb;
3830 struct ddf_super *second = tst->sb;
4eefd651 3831 struct dl *dl1, *dl2;
3832 struct vcl *vl1, *vl2;
2d210697 3833 unsigned int max_vds, max_pds, pd, vd;
a19c88b8
NB
3834
3835 if (!first) {
3836 st->sb = tst->sb;
3837 tst->sb = NULL;
3838 return 0;
3839 }
3840
3841 if (memcmp(first->anchor.guid, second->anchor.guid, DDF_GUID_LEN) != 0)
3842 return 2;
3843
60931cf9 3844 if (!be32_eq(first->anchor.seq, second->anchor.seq)) {
2d210697 3845 dprintf("%s: sequence number mismatch %u/%u\n", __func__,
60931cf9 3846 be32_to_cpu(first->anchor.seq),
3847 be32_to_cpu(second->anchor.seq));
2d210697 3848 return 3;
3849 }
3850 if (first->max_part != second->max_part ||
a8173e43 3851 !be16_eq(first->phys->used_pdes, second->phys->used_pdes) ||
3852 !be16_eq(first->virt->populated_vdes,
3853 second->virt->populated_vdes)) {
2d210697 3854 dprintf("%s: PD/VD number mismatch\n", __func__);
3855 return 3;
3856 }
3857
a8173e43 3858 max_pds = be16_to_cpu(first->phys->used_pdes);
2d210697 3859 for (dl2 = second->dlist; dl2; dl2 = dl2->next) {
3860 for (pd = 0; pd < max_pds; pd++)
60931cf9 3861 if (be32_eq(first->phys->entries[pd].refnum,
3862 dl2->disk.refnum))
2d210697 3863 break;
3864 if (pd == max_pds) {
3865 dprintf("%s: no match for disk %08x\n", __func__,
60931cf9 3866 be32_to_cpu(dl2->disk.refnum));
2d210697 3867 return 3;
3868 }
3869 }
3870
a8173e43 3871 max_vds = be16_to_cpu(first->active->max_vd_entries);
2d210697 3872 for (vl2 = second->conflist; vl2; vl2 = vl2->next) {
60931cf9 3873 if (!be32_eq(vl2->conf.magic, DDF_VD_CONF_MAGIC))
2d210697 3874 continue;
3875 for (vd = 0; vd < max_vds; vd++)
3876 if (!memcmp(first->virt->entries[vd].guid,
3877 vl2->conf.guid, DDF_GUID_LEN))
3878 break;
3879 if (vd == max_vds) {
3880 dprintf("%s: no match for VD config\n", __func__);
3881 return 3;
3882 }
3883 }
a19c88b8 3884 /* FIXME should I look at anything else? */
2d210697 3885
4eefd651 3886 /*
3887 At this point we are fairly sure that the meta data matches.
3888 But the new disk may contain additional local data.
3889 Add it to the super block.
3890 */
3891 for (vl2 = second->conflist; vl2; vl2 = vl2->next) {
3892 for (vl1 = first->conflist; vl1; vl1 = vl1->next)
3893 if (!memcmp(vl1->conf.guid, vl2->conf.guid,
3894 DDF_GUID_LEN))
3895 break;
3896 if (vl1) {
3897 if (vl1->other_bvds != NULL &&
3898 vl1->conf.sec_elmnt_seq !=
3899 vl2->conf.sec_elmnt_seq) {
3900 dprintf("%s: adding BVD %u\n", __func__,
3901 vl2->conf.sec_elmnt_seq);
3902 add_other_bvd(vl1, &vl2->conf,
3903 first->conf_rec_len*512);
3904 }
3905 continue;
3906 }
3907
3908 if (posix_memalign((void **)&vl1, 512,
3909 (first->conf_rec_len*512 +
3910 offsetof(struct vcl, conf))) != 0) {
3911 pr_err("%s could not allocate vcl buf\n",
3912 __func__);
3913 return 3;
3914 }
3915
3916 vl1->next = first->conflist;
3917 vl1->block_sizes = NULL;
4eefd651 3918 memcpy(&vl1->conf, &vl2->conf, first->conf_rec_len*512);
3c48f7be 3919 if (alloc_other_bvds(first, vl1) != 0) {
3920 pr_err("%s could not allocate other bvds\n",
3921 __func__);
3922 free(vl1);
3923 return 3;
3924 }
4eefd651 3925 for (vd = 0; vd < max_vds; vd++)
3926 if (!memcmp(first->virt->entries[vd].guid,
3927 vl1->conf.guid, DDF_GUID_LEN))
3928 break;
3929 vl1->vcnum = vd;
3930 dprintf("%s: added config for VD %u\n", __func__, vl1->vcnum);
3931 first->conflist = vl1;
3932 }
3933
3934 for (dl2 = second->dlist; dl2; dl2 = dl2->next) {
3935 for (dl1 = first->dlist; dl1; dl1 = dl1->next)
60931cf9 3936 if (be32_eq(dl1->disk.refnum, dl2->disk.refnum))
4eefd651 3937 break;
3938 if (dl1)
3939 continue;
3940
3941 if (posix_memalign((void **)&dl1, 512,
3942 sizeof(*dl1) + (first->max_part) * sizeof(dl1->vlist[0]))
3943 != 0) {
3944 pr_err("%s could not allocate disk info buffer\n",
3945 __func__);
3946 return 3;
3947 }
3948 memcpy(dl1, dl2, sizeof(*dl1));
3949 dl1->mdupdate = NULL;
3950 dl1->next = first->dlist;
3951 dl1->fd = -1;
3952 for (pd = 0; pd < max_pds; pd++)
60931cf9 3953 if (be32_eq(first->phys->entries[pd].refnum,
3954 dl1->disk.refnum))
4eefd651 3955 break;
3956 dl1->pdnum = pd;
3957 if (dl2->spare) {
3958 if (posix_memalign((void **)&dl1->spare, 512,
3959 first->conf_rec_len*512) != 0) {
3960 pr_err("%s could not allocate spare info buf\n",
3961 __func__);
3962 return 3;
3963 }
3964 memcpy(dl1->spare, dl2->spare, first->conf_rec_len*512);
3965 }
3966 for (vd = 0 ; vd < first->max_part ; vd++) {
3967 if (!dl2->vlist[vd]) {
3968 dl1->vlist[vd] = NULL;
3969 continue;
3970 }
3971 for (vl1 = first->conflist; vl1; vl1 = vl1->next) {
3972 if (!memcmp(vl1->conf.guid,
3973 dl2->vlist[vd]->conf.guid,
3974 DDF_GUID_LEN))
3975 break;
3976 dl1->vlist[vd] = vl1;
3977 }
3978 }
3979 first->dlist = dl1;
3980 dprintf("%s: added disk %d: %08x\n", __func__, dl1->pdnum,
60931cf9 3981 be32_to_cpu(dl1->disk.refnum));
4eefd651 3982 }
3983
a19c88b8
NB
3984 return 0;
3985}
3986
0e600426 3987#ifndef MDASSEMBLE
4e5528c6
NB
3988/*
3989 * A new array 'a' has been started which claims to be instance 'inst'
3990 * within container 'c'.
3991 * We need to confirm that the array matches the metadata in 'c' so
3992 * that we don't corrupt any metadata.
3993 */
cba0191b 3994static int ddf_open_new(struct supertype *c, struct active_array *a, char *inst)
549e9569 3995{
a2aa439e 3996 struct ddf_super *ddf = c->sb;
3997 int n = atoi(inst);
fb9d0acb 3998 if (all_ff(ddf->virt->entries[n].guid)) {
3999 pr_err("%s: subarray %d doesn't exist\n", __func__, n);
a2aa439e 4000 return -ENODEV;
4001 }
4002 dprintf("ddf: open_new %d\n", n);
4003 a->info.container_member = n;
549e9569
NB
4004 return 0;
4005}
4006
4e5528c6
NB
4007/*
4008 * The array 'a' is to be marked clean in the metadata.
ed9d66aa 4009 * If '->resync_start' is not ~(unsigned long long)0, then the array is only
4e5528c6
NB
4010 * clean up to the point (in sectors). If that cannot be recorded in the
4011 * metadata, then leave it as dirty.
4012 *
4013 * For DDF, we need to clear the DDF_state_inconsistent bit in the
4014 * !global! virtual_disk.virtual_entry structure.
4015 */
01f157d7 4016static int ddf_set_array_state(struct active_array *a, int consistent)
549e9569 4017{
4e5528c6
NB
4018 struct ddf_super *ddf = a->container->sb;
4019 int inst = a->info.container_member;
18a2f463 4020 int old = ddf->virt->entries[inst].state;
01f157d7
N
4021 if (consistent == 2) {
4022 /* Should check if a recovery should be started FIXME */
4023 consistent = 1;
b7941fd6 4024 if (!is_resync_complete(&a->info))
01f157d7
N
4025 consistent = 0;
4026 }
ed9d66aa
NB
4027 if (consistent)
4028 ddf->virt->entries[inst].state &= ~DDF_state_inconsistent;
4029 else
4e5528c6 4030 ddf->virt->entries[inst].state |= DDF_state_inconsistent;
18a2f463 4031 if (old != ddf->virt->entries[inst].state)
7d5a7ff3 4032 ddf_set_updates_pending(ddf);
18a2f463
NB
4033
4034 old = ddf->virt->entries[inst].init_state;
ed9d66aa 4035 ddf->virt->entries[inst].init_state &= ~DDF_initstate_mask;
b7941fd6 4036 if (is_resync_complete(&a->info))
ed9d66aa 4037 ddf->virt->entries[inst].init_state |= DDF_init_full;
b7941fd6 4038 else if (a->info.resync_start == 0)
ed9d66aa 4039 ddf->virt->entries[inst].init_state |= DDF_init_not;
4e5528c6 4040 else
ed9d66aa 4041 ddf->virt->entries[inst].init_state |= DDF_init_quick;
18a2f463 4042 if (old != ddf->virt->entries[inst].init_state)
7d5a7ff3 4043 ddf_set_updates_pending(ddf);
ed9d66aa 4044
b27336a2 4045 dprintf("ddf mark %d/%s (%d) %s %llu\n", inst,
4046 guid_str(ddf->virt->entries[inst].guid), a->curr_state,
4047 consistent?"clean":"dirty",
b7941fd6 4048 a->info.resync_start);
01f157d7 4049 return consistent;
fd7cde1b
DW
4050}
4051
5ec636b7 4052static int get_bvd_state(const struct ddf_super *ddf,
4053 const struct vd_config *vc)
4054{
4055 unsigned int i, n_bvd, working = 0;
a8173e43 4056 unsigned int n_prim = be16_to_cpu(vc->prim_elmnt_count);
5ec636b7 4057 int pd, st, state;
4058 for (i = 0; i < n_prim; i++) {
4059 if (!find_index_in_bvd(ddf, vc, i, &n_bvd))
4060 continue;
4061 pd = find_phys(ddf, vc->phys_refnum[n_bvd]);
4062 if (pd < 0)
4063 continue;
a8173e43 4064 st = be16_to_cpu(ddf->phys->entries[pd].state);
5ec636b7 4065 if ((st & (DDF_Online|DDF_Failed|DDF_Rebuilding))
4066 == DDF_Online)
4067 working++;
4068 }
4069
4070 state = DDF_state_degraded;
4071 if (working == n_prim)
4072 state = DDF_state_optimal;
4073 else
4074 switch (vc->prl) {
4075 case DDF_RAID0:
4076 case DDF_CONCAT:
4077 case DDF_JBOD:
4078 state = DDF_state_failed;
4079 break;
4080 case DDF_RAID1:
4081 if (working == 0)
4082 state = DDF_state_failed;
4083 else if (working >= 2)
4084 state = DDF_state_part_optimal;
4085 break;
4086 case DDF_RAID4:
4087 case DDF_RAID5:
4088 if (working < n_prim - 1)
4089 state = DDF_state_failed;
4090 break;
4091 case DDF_RAID6:
4092 if (working < n_prim - 2)
4093 state = DDF_state_failed;
4094 else if (working == n_prim - 1)
4095 state = DDF_state_part_optimal;
4096 break;
4097 }
4098 return state;
4099}
4100
0777d17d 4101static int secondary_state(int state, int other, int seclevel)
4102{
4103 if (state == DDF_state_optimal && other == DDF_state_optimal)
4104 return DDF_state_optimal;
4105 if (seclevel == DDF_2MIRRORED) {
4106 if (state == DDF_state_optimal || other == DDF_state_optimal)
4107 return DDF_state_part_optimal;
4108 if (state == DDF_state_failed && other == DDF_state_failed)
4109 return DDF_state_failed;
4110 return DDF_state_degraded;
4111 } else {
4112 if (state == DDF_state_failed || other == DDF_state_failed)
4113 return DDF_state_failed;
4114 if (state == DDF_state_degraded || other == DDF_state_degraded)
4115 return DDF_state_degraded;
4116 return DDF_state_part_optimal;
4117 }
4118}
4119
4120static int get_svd_state(const struct ddf_super *ddf, const struct vcl *vcl)
4121{
4122 int state = get_bvd_state(ddf, &vcl->conf);
4123 unsigned int i;
4124 for (i = 1; i < vcl->conf.sec_elmnt_count; i++) {
4125 state = secondary_state(
4126 state,
4127 get_bvd_state(ddf, vcl->other_bvds[i-1]),
4128 vcl->conf.srl);
4129 }
4130 return state;
4131}
4132
7a7cc504
NB
4133/*
4134 * The state of each disk is stored in the global phys_disk structure
4135 * in phys_disk.entries[n].state.
4136 * This makes various combinations awkward.
4137 * - When a device fails in any array, it must be failed in all arrays
4138 * that include a part of this device.
4139 * - When a component is rebuilding, we cannot include it officially in the
4140 * array unless this is the only array that uses the device.
4141 *
4142 * So: when transitioning:
4143 * Online -> failed, just set failed flag. monitor will propagate
4144 * spare -> online, the device might need to be added to the array.
4145 * spare -> failed, just set failed. Don't worry if in array or not.
4146 */
8d45d196 4147static void ddf_set_disk(struct active_array *a, int n, int state)
549e9569 4148{
7a7cc504 4149 struct ddf_super *ddf = a->container->sb;
baba3f4e 4150 unsigned int inst = a->info.container_member, n_bvd;
4151 struct vcl *vcl;
4152 struct vd_config *vc = find_vdcr(ddf, inst, (unsigned int)n,
4153 &n_bvd, &vcl);
4154 int pd;
e1316fab
N
4155 struct mdinfo *mdi;
4156 struct dl *dl;
7a7cc504 4157
ce6844b9 4158 dprintf("%s: %d to %x\n", __func__, n, state);
7a7cc504 4159 if (vc == NULL) {
2c514b71 4160 dprintf("ddf: cannot find instance %d!!\n", inst);
7a7cc504
NB
4161 return;
4162 }
e1316fab
N
4163 /* Find the matching slot in 'info'. */
4164 for (mdi = a->info.devs; mdi; mdi = mdi->next)
4165 if (mdi->disk.raid_disk == n)
4166 break;
ce6844b9
MW
4167 if (!mdi) {
4168 pr_err("%s: cannot find raid disk %d\n",
4169 __func__, n);
e1316fab 4170 return;
ce6844b9 4171 }
e1316fab
N
4172
4173 /* and find the 'dl' entry corresponding to that. */
4174 for (dl = ddf->dlist; dl; dl = dl->next)
77632af9
N
4175 if (mdi->state_fd >= 0 &&
4176 mdi->disk.major == dl->major &&
e1316fab
N
4177 mdi->disk.minor == dl->minor)
4178 break;
ce6844b9
MW
4179 if (!dl) {
4180 pr_err("%s: cannot find raid disk %d (%d/%d)\n",
4181 __func__, n,
4182 mdi->disk.major, mdi->disk.minor);
e1316fab 4183 return;
ce6844b9 4184 }
e1316fab 4185
baba3f4e 4186 pd = find_phys(ddf, vc->phys_refnum[n_bvd]);
e1316fab
N
4187 if (pd < 0 || pd != dl->pdnum) {
4188 /* disk doesn't currently exist or has changed.
4189 * If it is now in_sync, insert it. */
baba3f4e 4190 dprintf("%s: phys disk not found for %d: %d/%d ref %08x\n",
4191 __func__, dl->pdnum, dl->major, dl->minor,
60931cf9 4192 be32_to_cpu(dl->disk.refnum));
baba3f4e 4193 dprintf("%s: array %u disk %u ref %08x pd %d\n",
60931cf9 4194 __func__, inst, n_bvd,
4195 be32_to_cpu(vc->phys_refnum[n_bvd]), pd);
7a7cc504 4196 if ((state & DS_INSYNC) && ! (state & DS_FAULTY)) {
baba3f4e 4197 pd = dl->pdnum; /* FIXME: is this really correct ? */
4198 vc->phys_refnum[n_bvd] = dl->disk.refnum;
57a66662 4199 LBA_OFFSET(ddf, vc)[n_bvd] =
9d0c6b70 4200 cpu_to_be64(mdi->data_offset);
a8173e43 4201 be16_clear(ddf->phys->entries[pd].type,
4202 cpu_to_be16(DDF_Global_Spare));
4203 be16_set(ddf->phys->entries[pd].type,
4204 cpu_to_be16(DDF_Active_in_VD));
7d5a7ff3 4205 ddf_set_updates_pending(ddf);
7a7cc504
NB
4206 }
4207 } else {
a8173e43 4208 be16 old = ddf->phys->entries[pd].state;
7a7cc504 4209 if (state & DS_FAULTY)
a8173e43 4210 be16_set(ddf->phys->entries[pd].state,
4211 cpu_to_be16(DDF_Failed));
7a7cc504 4212 if (state & DS_INSYNC) {
a8173e43 4213 be16_set(ddf->phys->entries[pd].state,
4214 cpu_to_be16(DDF_Online));
4215 be16_clear(ddf->phys->entries[pd].state,
4216 cpu_to_be16(DDF_Rebuilding));
7a7cc504 4217 }
a8173e43 4218 if (!be16_eq(old, ddf->phys->entries[pd].state))
7d5a7ff3 4219 ddf_set_updates_pending(ddf);
7a7cc504
NB
4220 }
4221
ce6844b9
MW
4222 dprintf("ddf: set_disk %d (%08x) to %x->%02x\n", n,
4223 be32_to_cpu(dl->disk.refnum), state,
4224 be16_to_cpu(ddf->phys->entries[pd].state));
7e1432fb 4225
7a7cc504
NB
4226 /* Now we need to check the state of the array and update
4227 * virtual_disk.entries[n].state.
4228 * It needs to be one of "optimal", "degraded", "failed".
4229 * I don't understand 'deleted' or 'missing'.
4230 */
0777d17d 4231 state = get_svd_state(ddf, vcl);
7a7cc504 4232
18a2f463
NB
4233 if (ddf->virt->entries[inst].state !=
4234 ((ddf->virt->entries[inst].state & ~DDF_state_mask)
4235 | state)) {
4236
4237 ddf->virt->entries[inst].state =
4238 (ddf->virt->entries[inst].state & ~DDF_state_mask)
4239 | state;
7d5a7ff3 4240 ddf_set_updates_pending(ddf);
18a2f463 4241 }
7a7cc504 4242
549e9569
NB
4243}
4244
2e735d19 4245static void ddf_sync_metadata(struct supertype *st)
549e9569 4246{
7a7cc504
NB
4247
4248 /*
4249 * Write all data to all devices.
4250 * Later, we might be able to track whether only local changes
4251 * have been made, or whether any global data has been changed,
4252 * but ddf is sufficiently weird that it probably always
4253 * changes global data ....
4254 */
18a2f463
NB
4255 struct ddf_super *ddf = st->sb;
4256 if (!ddf->updates_pending)
4257 return;
4258 ddf->updates_pending = 0;
273989b9 4259 __write_init_super_ddf(st, 1);
2c514b71 4260 dprintf("ddf: sync_metadata\n");
549e9569
NB
4261}
4262
f646805e 4263static int del_from_conflist(struct vcl **list, const char *guid)
4264{
4265 struct vcl **p;
4266 int found = 0;
4267 for (p = list; p && *p; p = &((*p)->next))
4268 if (!memcmp((*p)->conf.guid, guid, DDF_GUID_LEN)) {
4269 found = 1;
4270 *p = (*p)->next;
4271 }
4272 return found;
4273}
4274
4275static int _kill_subarray_ddf(struct ddf_super *ddf, const char *guid)
4276{
4277 struct dl *dl;
4278 unsigned int vdnum, i;
4279 vdnum = find_vde_by_guid(ddf, guid);
4280 if (vdnum == DDF_NOTFOUND) {
4281 pr_err("%s: could not find VD %s\n", __func__,
4282 guid_str(guid));
4283 return -1;
4284 }
4285 if (del_from_conflist(&ddf->conflist, guid) == 0) {
4286 pr_err("%s: could not find conf %s\n", __func__,
4287 guid_str(guid));
4288 return -1;
4289 }
4290 for (dl = ddf->dlist; dl; dl = dl->next)
4291 for (i = 0; i < ddf->max_part; i++)
4292 if (dl->vlist[i] != NULL &&
4293 !memcmp(dl->vlist[i]->conf.guid, guid,
4294 DDF_GUID_LEN))
4295 dl->vlist[i] = NULL;
4296 memset(ddf->virt->entries[vdnum].guid, 0xff, DDF_GUID_LEN);
4297 dprintf("%s: deleted %s\n", __func__, guid_str(guid));
4298 return 0;
4299}
4300
4301static int kill_subarray_ddf(struct supertype *st)
4302{
4303 struct ddf_super *ddf = st->sb;
4304 /*
4305 * currentconf is set in container_content_ddf,
4306 * called with subarray arg
4307 */
4308 struct vcl *victim = ddf->currentconf;
4309 struct vd_config *conf;
4310 ddf->currentconf = NULL;
4311 unsigned int vdnum;
4312 if (!victim) {
4313 pr_err("%s: nothing to kill\n", __func__);
4314 return -1;
4315 }
4316 conf = &victim->conf;
4317 vdnum = find_vde_by_guid(ddf, conf->guid);
4318 if (vdnum == DDF_NOTFOUND) {
4319 pr_err("%s: could not find VD %s\n", __func__,
4320 guid_str(conf->guid));
4321 return -1;
4322 }
4323 if (st->update_tail) {
4324 struct virtual_disk *vd;
4325 int len = sizeof(struct virtual_disk)
4326 + sizeof(struct virtual_entry);
4327 vd = xmalloc(len);
4328 if (vd == NULL) {
4329 pr_err("%s: failed to allocate %d bytes\n", __func__,
4330 len);
4331 return -1;
4332 }
4333 memset(vd, 0 , len);
4334 vd->magic = DDF_VIRT_RECORDS_MAGIC;
a8173e43 4335 vd->populated_vdes = cpu_to_be16(0);
f646805e 4336 memcpy(vd->entries[0].guid, conf->guid, DDF_GUID_LEN);
4337 /* we use DDF_state_deleted as marker */
4338 vd->entries[0].state = DDF_state_deleted;
4339 append_metadata_update(st, vd, len);
6a350d82 4340 } else {
f646805e 4341 _kill_subarray_ddf(ddf, conf->guid);
6a350d82 4342 ddf_set_updates_pending(ddf);
4343 ddf_sync_metadata(st);
4344 }
f646805e 4345 return 0;
4346}
4347
c5943560 4348static void copy_matching_bvd(struct ddf_super *ddf,
4349 struct vd_config *conf,
4350 const struct metadata_update *update)
4351{
4352 unsigned int mppe =
a8173e43 4353 be16_to_cpu(ddf->anchor.max_primary_element_entries);
c5943560 4354 unsigned int len = ddf->conf_rec_len * 512;
4355 char *p;
4356 struct vd_config *vc;
4357 for (p = update->buf; p < update->buf + update->len; p += len) {
4358 vc = (struct vd_config *) p;
4359 if (vc->sec_elmnt_seq == conf->sec_elmnt_seq) {
4360 memcpy(conf->phys_refnum, vc->phys_refnum,
4361 mppe * (sizeof(__u32) + sizeof(__u64)));
4362 return;
4363 }
4364 }
4365 pr_err("%s: no match for BVD %d of %s in update\n", __func__,
4366 conf->sec_elmnt_seq, guid_str(conf->guid));
4367}
4368
88c164f4
NB
4369static void ddf_process_update(struct supertype *st,
4370 struct metadata_update *update)
4371{
4372 /* Apply this update to the metadata.
4373 * The first 4 bytes are a DDF_*_MAGIC which guides
4374 * our actions.
4375 * Possible update are:
4376 * DDF_PHYS_RECORDS_MAGIC
4dd968cc
N
4377 * Add a new physical device or remove an old one.
4378 * Changes to this record only happen implicitly.
88c164f4
NB
4379 * used_pdes is the device number.
4380 * DDF_VIRT_RECORDS_MAGIC
4381 * Add a new VD. Possibly also change the 'access' bits.
4382 * populated_vdes is the entry number.
4383 * DDF_VD_CONF_MAGIC
4384 * New or updated VD. the VIRT_RECORD must already
4385 * exist. For an update, phys_refnum and lba_offset
4386 * (at least) are updated, and the VD_CONF must
4387 * be written to precisely those devices listed with
4388 * a phys_refnum.
4389 * DDF_SPARE_ASSIGN_MAGIC
4390 * replacement Spare Assignment Record... but for which device?
4391 *
4392 * So, e.g.:
4393 * - to create a new array, we send a VIRT_RECORD and
4394 * a VD_CONF. Then assemble and start the array.
4395 * - to activate a spare we send a VD_CONF to add the phys_refnum
4396 * and offset. This will also mark the spare as active with
4397 * a spare-assignment record.
4398 */
4399 struct ddf_super *ddf = st->sb;
60931cf9 4400 be32 *magic = (be32 *)update->buf;
88c164f4
NB
4401 struct phys_disk *pd;
4402 struct virtual_disk *vd;
4403 struct vd_config *vc;
4404 struct vcl *vcl;
4405 struct dl *dl;
f21e18ca 4406 unsigned int ent;
c5943560 4407 unsigned int pdnum, pd2, len;
88c164f4 4408
60931cf9 4409 dprintf("Process update %x\n", be32_to_cpu(*magic));
7e1432fb 4410
60931cf9 4411 if (be32_eq(*magic, DDF_PHYS_RECORDS_MAGIC)) {
88c164f4
NB
4412
4413 if (update->len != (sizeof(struct phys_disk) +
4414 sizeof(struct phys_disk_entry)))
4415 return;
4416 pd = (struct phys_disk*)update->buf;
4417
a8173e43 4418 ent = be16_to_cpu(pd->used_pdes);
4419 if (ent >= be16_to_cpu(ddf->phys->max_pdes))
88c164f4 4420 return;
a8173e43 4421 if (be16_and(pd->entries[0].state, cpu_to_be16(DDF_Missing))) {
4dd968cc
N
4422 struct dl **dlp;
4423 /* removing this disk. */
a8173e43 4424 be16_set(ddf->phys->entries[ent].state,
4425 cpu_to_be16(DDF_Missing));
4dd968cc
N
4426 for (dlp = &ddf->dlist; *dlp; dlp = &(*dlp)->next) {
4427 struct dl *dl = *dlp;
4428 if (dl->pdnum == (signed)ent) {
4429 close(dl->fd);
4430 dl->fd = -1;
4431 /* FIXME this doesn't free
4432 * dl->devname */
4433 update->space = dl;
4434 *dlp = dl->next;
4435 break;
4436 }
4437 }
7d5a7ff3 4438 ddf_set_updates_pending(ddf);
4dd968cc
N
4439 return;
4440 }
88c164f4
NB
4441 if (!all_ff(ddf->phys->entries[ent].guid))
4442 return;
4443 ddf->phys->entries[ent] = pd->entries[0];
a8173e43 4444 ddf->phys->used_pdes = cpu_to_be16
4445 (1 + be16_to_cpu(ddf->phys->used_pdes));
7d5a7ff3 4446 ddf_set_updates_pending(ddf);
2cc2983d
N
4447 if (ddf->add_list) {
4448 struct active_array *a;
4449 struct dl *al = ddf->add_list;
4450 ddf->add_list = al->next;
4451
4452 al->next = ddf->dlist;
4453 ddf->dlist = al;
4454
4455 /* As a device has been added, we should check
4456 * for any degraded devices that might make
4457 * use of this spare */
4458 for (a = st->arrays ; a; a=a->next)
4459 a->check_degraded = 1;
4460 }
60931cf9 4461 } else if (be32_eq(*magic, DDF_VIRT_RECORDS_MAGIC)) {
88c164f4
NB
4462
4463 if (update->len != (sizeof(struct virtual_disk) +
4464 sizeof(struct virtual_entry)))
4465 return;
4466 vd = (struct virtual_disk*)update->buf;
4467
f646805e 4468 if (vd->entries[0].state == DDF_state_deleted) {
4469 if (_kill_subarray_ddf(ddf, vd->entries[0].guid))
4470 return;
4471 } else {
4472
6a7e7ecc 4473 ent = find_vde_by_guid(ddf, vd->entries[0].guid);
4474 if (ent != DDF_NOTFOUND) {
4475 dprintf("%s: VD %s exists already in slot %d\n",
4476 __func__, guid_str(vd->entries[0].guid),
4477 ent);
4478 return;
4479 }
f646805e 4480 ent = find_unused_vde(ddf);
4481 if (ent == DDF_NOTFOUND)
4482 return;
4483 ddf->virt->entries[ent] = vd->entries[0];
4484 ddf->virt->populated_vdes =
a8173e43 4485 cpu_to_be16(
4486 1 + be16_to_cpu(
f646805e 4487 ddf->virt->populated_vdes));
ed5ff7a2 4488 dprintf("%s: added VD %s in slot %d(s=%02x i=%02x)\n",
4489 __func__, guid_str(vd->entries[0].guid), ent,
4490 ddf->virt->entries[ent].state,
4491 ddf->virt->entries[ent].init_state);
f646805e 4492 }
7d5a7ff3 4493 ddf_set_updates_pending(ddf);
60931cf9 4494 }
88c164f4 4495
60931cf9 4496 else if (be32_eq(*magic, DDF_VD_CONF_MAGIC)) {
88c164f4 4497 vc = (struct vd_config*)update->buf;
c5943560 4498 len = ddf->conf_rec_len * 512;
4499 if ((unsigned int)update->len != len * vc->sec_elmnt_count) {
4500 pr_err("%s: %s: insufficient data (%d) for %u BVDs\n",
4501 __func__, guid_str(vc->guid), update->len,
4502 vc->sec_elmnt_count);
4503 return;
4504 }
88c164f4
NB
4505 for (vcl = ddf->conflist; vcl ; vcl = vcl->next)
4506 if (memcmp(vcl->conf.guid, vc->guid, DDF_GUID_LEN) == 0)
4507 break;
ed5ff7a2 4508 dprintf("%s: conf update for %s (%s)\n", __func__,
4509 guid_str(vc->guid), (vcl ? "old" : "new"));
88c164f4
NB
4510 if (vcl) {
4511 /* An update, just copy the phys_refnum and lba_offset
4512 * fields
4513 */
c5943560 4514 unsigned int i;
0847945b 4515 unsigned int k;
c5943560 4516 copy_matching_bvd(ddf, &vcl->conf, update);
0847945b
MW
4517 for (k = 0; k < be16_to_cpu(vc->prim_elmnt_count); k++)
4518 dprintf("BVD %u has %08x at %llu\n", 0,
4519 be32_to_cpu(vcl->conf.phys_refnum[k]),
4520 be64_to_cpu(LBA_OFFSET(ddf,
4521 &vcl->conf)[k]));
4522 for (i = 1; i < vc->sec_elmnt_count; i++) {
c5943560 4523 copy_matching_bvd(ddf, vcl->other_bvds[i-1],
4524 update);
0847945b
MW
4525 for (k = 0; k < be16_to_cpu(
4526 vc->prim_elmnt_count); k++)
4527 dprintf("BVD %u has %08x at %llu\n", i,
4528 be32_to_cpu
4529 (vcl->other_bvds[i-1]->
4530 phys_refnum[k]),
4531 be64_to_cpu
4532 (LBA_OFFSET
4533 (ddf,
4534 vcl->other_bvds[i-1])[k]));
4535 }
88c164f4
NB
4536 } else {
4537 /* A new VD_CONF */
c5943560 4538 unsigned int i;
e6b9548d
DW
4539 if (!update->space)
4540 return;
88c164f4
NB
4541 vcl = update->space;
4542 update->space = NULL;
4543 vcl->next = ddf->conflist;
c5943560 4544 memcpy(&vcl->conf, vc, len);
fb9d0acb 4545 ent = find_vde_by_guid(ddf, vc->guid);
4546 if (ent == DDF_NOTFOUND)
4547 return;
4548 vcl->vcnum = ent;
88c164f4 4549 ddf->conflist = vcl;
c5943560 4550 for (i = 1; i < vc->sec_elmnt_count; i++)
4551 memcpy(vcl->other_bvds[i-1],
4552 update->buf + len * i, len);
88c164f4 4553 }
c7079c84
N
4554 /* Set DDF_Transition on all Failed devices - to help
4555 * us detect those that are no longer in use
4556 */
a8173e43 4557 for (pdnum = 0; pdnum < be16_to_cpu(ddf->phys->used_pdes);
4558 pdnum++)
4559 if (be16_and(ddf->phys->entries[pdnum].state,
4560 cpu_to_be16(DDF_Failed)))
4561 be16_set(ddf->phys->entries[pdnum].state,
4562 cpu_to_be16(DDF_Transition));
88c164f4
NB
4563 /* Now make sure vlist is correct for each dl. */
4564 for (dl = ddf->dlist; dl; dl = dl->next) {
f21e18ca 4565 unsigned int vn = 0;
8401644c 4566 int in_degraded = 0;
5838fccd 4567 for (vcl = ddf->conflist; vcl ; vcl = vcl->next) {
4568 unsigned int dn, ibvd;
4569 const struct vd_config *conf;
4570 int vstate;
4571 dn = get_pd_index_from_refnum(vcl,
4572 dl->disk.refnum,
4573 ddf->mppe,
4574 &conf, &ibvd);
4575 if (dn == DDF_NOTFOUND)
4576 continue;
4577 dprintf("dev %d/%08x has %s (sec=%u) at %d\n",
ad60eea1 4578 dl->pdnum,
60931cf9 4579 be32_to_cpu(dl->disk.refnum),
5838fccd 4580 guid_str(conf->guid),
4581 conf->sec_elmnt_seq, vn);
4582 /* Clear the Transition flag */
a8173e43 4583 if (be16_and
4584 (ddf->phys->entries[dl->pdnum].state,
4585 cpu_to_be16(DDF_Failed)))
4586 be16_clear(ddf->phys
4587 ->entries[dl->pdnum].state,
4588 cpu_to_be16(DDF_Transition));
5838fccd 4589 dl->vlist[vn++] = vcl;
4590 vstate = ddf->virt->entries[vcl->vcnum].state
4591 & DDF_state_mask;
4592 if (vstate == DDF_state_degraded ||
4593 vstate == DDF_state_part_optimal)
4594 in_degraded = 1;
4595 }
88c164f4
NB
4596 while (vn < ddf->max_part)
4597 dl->vlist[vn++] = NULL;
7e1432fb 4598 if (dl->vlist[0]) {
a8173e43 4599 be16_clear(ddf->phys->entries[dl->pdnum].type,
4600 cpu_to_be16(DDF_Global_Spare));
4601 if (!be16_and(ddf->phys
4602 ->entries[dl->pdnum].type,
4603 cpu_to_be16(DDF_Active_in_VD))) {
4604 be16_set(ddf->phys
4605 ->entries[dl->pdnum].type,
4606 cpu_to_be16(DDF_Active_in_VD));
613b0d17 4607 if (in_degraded)
a8173e43 4608 be16_set(ddf->phys
4609 ->entries[dl->pdnum]
4610 .state,
4611 cpu_to_be16
4612 (DDF_Rebuilding));
613b0d17 4613 }
7e1432fb
NB
4614 }
4615 if (dl->spare) {
a8173e43 4616 be16_clear(ddf->phys->entries[dl->pdnum].type,
4617 cpu_to_be16(DDF_Global_Spare));
4618 be16_set(ddf->phys->entries[dl->pdnum].type,
4619 cpu_to_be16(DDF_Spare));
7e1432fb
NB
4620 }
4621 if (!dl->vlist[0] && !dl->spare) {
a8173e43 4622 be16_set(ddf->phys->entries[dl->pdnum].type,
4623 cpu_to_be16(DDF_Global_Spare));
4624 be16_clear(ddf->phys->entries[dl->pdnum].type,
4625 cpu_to_be16(DDF_Spare));
4626 be16_clear(ddf->phys->entries[dl->pdnum].type,
4627 cpu_to_be16(DDF_Active_in_VD));
7e1432fb 4628 }
88c164f4 4629 }
c7079c84
N
4630
4631 /* Now remove any 'Failed' devices that are not part
4632 * of any VD. They will have the Transition flag set.
4633 * Once done, we need to update all dl->pdnum numbers.
4634 */
4635 pd2 = 0;
a8173e43 4636 for (pdnum = 0; pdnum < be16_to_cpu(ddf->phys->used_pdes);
4637 pdnum++)
4638 if (be16_and(ddf->phys->entries[pdnum].state,
4639 cpu_to_be16(DDF_Failed))
4640 && be16_and(ddf->phys->entries[pdnum].state,
4641 cpu_to_be16(DDF_Transition)))
c7079c84
N
4642 /* skip this one */;
4643 else if (pdnum == pd2)
4644 pd2++;
4645 else {
a8173e43 4646 ddf->phys->entries[pd2] =
4647 ddf->phys->entries[pdnum];
c7079c84
N
4648 for (dl = ddf->dlist; dl; dl = dl->next)
4649 if (dl->pdnum == (int)pdnum)
4650 dl->pdnum = pd2;
4651 pd2++;
4652 }
a8173e43 4653 ddf->phys->used_pdes = cpu_to_be16(pd2);
c7079c84 4654 while (pd2 < pdnum) {
a8173e43 4655 memset(ddf->phys->entries[pd2].guid, 0xff,
4656 DDF_GUID_LEN);
c7079c84
N
4657 pd2++;
4658 }
4659
7d5a7ff3 4660 ddf_set_updates_pending(ddf);
88c164f4 4661 }
60931cf9 4662 /* case DDF_SPARE_ASSIGN_MAGIC */
88c164f4
NB
4663}
4664
edd8d13c
NB
4665static void ddf_prepare_update(struct supertype *st,
4666 struct metadata_update *update)
4667{
4668 /* This update arrived at managemon.
4669 * We are about to pass it to monitor.
4670 * If a malloc is needed, do it here.
4671 */
4672 struct ddf_super *ddf = st->sb;
60931cf9 4673 be32 *magic = (be32 *)update->buf;
4674 if (be32_eq(*magic, DDF_VD_CONF_MAGIC)) {
c5943560 4675 struct vcl *vcl;
4676 struct vd_config *conf = (struct vd_config *) update->buf;
e6b9548d 4677 if (posix_memalign(&update->space, 512,
613b0d17 4678 offsetof(struct vcl, conf)
c5943560 4679 + ddf->conf_rec_len * 512) != 0) {
4680 update->space = NULL;
4681 return;
4682 }
4683 vcl = update->space;
4684 vcl->conf.sec_elmnt_count = conf->sec_elmnt_count;
4685 if (alloc_other_bvds(ddf, vcl) != 0) {
4686 free(update->space);
e6b9548d 4687 update->space = NULL;
c5943560 4688 }
4689 }
edd8d13c
NB
4690}
4691
7733b91d 4692/*
4693 * Check degraded state of a RAID10.
4694 * returns 2 for good, 1 for degraded, 0 for failed, and -1 for error
4695 */
4696static int raid10_degraded(struct mdinfo *info)
4697{
4698 int n_prim, n_bvds;
4699 int i;
9591a2de 4700 struct mdinfo *d;
7733b91d 4701 char *found;
4702 int ret = -1;
4703
7733b91d 4704 n_prim = info->array.layout & ~0x100;
4705 n_bvds = info->array.raid_disks / n_prim;
4706 found = xmalloc(n_bvds);
4707 if (found == NULL)
4708 return ret;
4709 memset(found, 0, n_bvds);
4710 for (d = info->devs; d; d = d->next) {
4711 i = d->disk.raid_disk / n_prim;
4712 if (i >= n_bvds) {
4713 pr_err("%s: BUG: invalid raid disk\n", __func__);
4714 goto out;
4715 }
4716 if (d->state_fd > 0)
4717 found[i]++;
4718 }
4719 ret = 2;
4720 for (i = 0; i < n_bvds; i++)
4721 if (!found[i]) {
4722 dprintf("%s: BVD %d/%d failed\n", __func__, i, n_bvds);
4723 ret = 0;
4724 goto out;
4725 } else if (found[i] < n_prim) {
4726 dprintf("%s: BVD %d/%d degraded\n", __func__, i,
4727 n_bvds);
4728 ret = 1;
4729 }
4730out:
4731 free(found);
4732 return ret;
4733}
4734
7e1432fb
NB
4735/*
4736 * Check if the array 'a' is degraded but not failed.
4737 * If it is, find as many spares as are available and needed and
4738 * arrange for their inclusion.
4739 * We only choose devices which are not already in the array,
4740 * and prefer those with a spare-assignment to this array.
4741 * otherwise we choose global spares - assuming always that
4742 * there is enough room.
4743 * For each spare that we assign, we return an 'mdinfo' which
4744 * describes the position for the device in the array.
4745 * We also add to 'updates' a DDF_VD_CONF_MAGIC update with
4746 * the new phys_refnum and lba_offset values.
4747 *
4748 * Only worry about BVDs at the moment.
4749 */
4750static struct mdinfo *ddf_activate_spare(struct active_array *a,
4751 struct metadata_update **updates)
4752{
4753 int working = 0;
4754 struct mdinfo *d;
4755 struct ddf_super *ddf = a->container->sb;
4756 int global_ok = 0;
4757 struct mdinfo *rv = NULL;
4758 struct mdinfo *di;
4759 struct metadata_update *mu;
4760 struct dl *dl;
4761 int i;
0c78849f 4762 unsigned int j;
baba3f4e 4763 struct vcl *vcl;
7e1432fb 4764 struct vd_config *vc;
baba3f4e 4765 unsigned int n_bvd;
7e1432fb 4766
7e1432fb
NB
4767 for (d = a->info.devs ; d ; d = d->next) {
4768 if ((d->curr_state & DS_FAULTY) &&
613b0d17 4769 d->state_fd >= 0)
7e1432fb
NB
4770 /* wait for Removal to happen */
4771 return NULL;
4772 if (d->state_fd >= 0)
4773 working ++;
4774 }
4775
7733b91d 4776 dprintf("%s: working=%d (%d) level=%d\n", __func__, working,
a8173e43 4777 a->info.array.raid_disks,
2c514b71 4778 a->info.array.level);
7e1432fb
NB
4779 if (working == a->info.array.raid_disks)
4780 return NULL; /* array not degraded */
4781 switch (a->info.array.level) {
4782 case 1:
4783 if (working == 0)
4784 return NULL; /* failed */
4785 break;
4786 case 4:
4787 case 5:
4788 if (working < a->info.array.raid_disks - 1)
4789 return NULL; /* failed */
4790 break;
4791 case 6:
4792 if (working < a->info.array.raid_disks - 2)
4793 return NULL; /* failed */
4794 break;
7733b91d 4795 case 10:
4796 if (raid10_degraded(&a->info) < 1)
4797 return NULL;
4798 break;
7e1432fb
NB
4799 default: /* concat or stripe */
4800 return NULL; /* failed */
4801 }
4802
4803 /* For each slot, if it is not working, find a spare */
4804 dl = ddf->dlist;
4805 for (i = 0; i < a->info.array.raid_disks; i++) {
4806 for (d = a->info.devs ; d ; d = d->next)
4807 if (d->disk.raid_disk == i)
4808 break;
2c514b71 4809 dprintf("found %d: %p %x\n", i, d, d?d->curr_state:0);
7e1432fb
NB
4810 if (d && (d->state_fd >= 0))
4811 continue;
4812
4813 /* OK, this device needs recovery. Find a spare */
4814 again:
4815 for ( ; dl ; dl = dl->next) {
4816 unsigned long long esize;
4817 unsigned long long pos;
4818 struct mdinfo *d2;
4819 int is_global = 0;
4820 int is_dedicated = 0;
4821 struct extent *ex;
f21e18ca 4822 unsigned int j;
6f56dbb9
MW
4823 be16 state = ddf->phys->entries[dl->pdnum].state;
4824 if (be16_and(state,
4825 cpu_to_be16(DDF_Failed|DDF_Missing)) ||
4826 !be16_and(state,
4827 cpu_to_be16(DDF_Online)))
4828 continue;
4829
7e1432fb
NB
4830 /* If in this array, skip */
4831 for (d2 = a->info.devs ; d2 ; d2 = d2->next)
7590d562
N
4832 if (d2->state_fd >= 0 &&
4833 d2->disk.major == dl->major &&
7e1432fb 4834 d2->disk.minor == dl->minor) {
2a645ee2
MW
4835 dprintf("%x:%x (%08x) already in array\n",
4836 dl->major, dl->minor,
4837 be32_to_cpu(dl->disk.refnum));
7e1432fb
NB
4838 break;
4839 }
4840 if (d2)
4841 continue;
a8173e43 4842 if (be16_and(ddf->phys->entries[dl->pdnum].type,
4843 cpu_to_be16(DDF_Spare))) {
7e1432fb
NB
4844 /* Check spare assign record */
4845 if (dl->spare) {
4846 if (dl->spare->type & DDF_spare_dedicated) {
4847 /* check spare_ents for guid */
4848 for (j = 0 ;
a8173e43 4849 j < be16_to_cpu
4850 (dl->spare
4851 ->populated);
7e1432fb
NB
4852 j++) {
4853 if (memcmp(dl->spare->spare_ents[j].guid,
4854 ddf->virt->entries[a->info.container_member].guid,
4855 DDF_GUID_LEN) == 0)
4856 is_dedicated = 1;
4857 }
4858 } else
4859 is_global = 1;
4860 }
a8173e43 4861 } else if (be16_and(ddf->phys->entries[dl->pdnum].type,
4862 cpu_to_be16(DDF_Global_Spare))) {
7e1432fb 4863 is_global = 1;
a8173e43 4864 } else if (!be16_and(ddf->phys
4865 ->entries[dl->pdnum].state,
4866 cpu_to_be16(DDF_Failed))) {
e0e7aeaa
N
4867 /* we can possibly use some of this */
4868 is_global = 1;
7e1432fb
NB
4869 }
4870 if ( ! (is_dedicated ||
4871 (is_global && global_ok))) {
2c514b71 4872 dprintf("%x:%x not suitable: %d %d\n", dl->major, dl->minor,
613b0d17 4873 is_dedicated, is_global);
7e1432fb
NB
4874 continue;
4875 }
4876
4877 /* We are allowed to use this device - is there space?
4878 * We need a->info.component_size sectors */
4879 ex = get_extents(ddf, dl);
4880 if (!ex) {
2c514b71 4881 dprintf("cannot get extents\n");
7e1432fb
NB
4882 continue;
4883 }
4884 j = 0; pos = 0;
4885 esize = 0;
4886
4887 do {
4888 esize = ex[j].start - pos;
4889 if (esize >= a->info.component_size)
4890 break;
e5cc7d46
N
4891 pos = ex[j].start + ex[j].size;
4892 j++;
4893 } while (ex[j-1].size);
7e1432fb
NB
4894
4895 free(ex);
4896 if (esize < a->info.component_size) {
e5cc7d46
N
4897 dprintf("%x:%x has no room: %llu %llu\n",
4898 dl->major, dl->minor,
2c514b71 4899 esize, a->info.component_size);
7e1432fb
NB
4900 /* No room */
4901 continue;
4902 }
4903
4904 /* Cool, we have a device with some space at pos */
503975b9 4905 di = xcalloc(1, sizeof(*di));
7e1432fb
NB
4906 di->disk.number = i;
4907 di->disk.raid_disk = i;
4908 di->disk.major = dl->major;
4909 di->disk.minor = dl->minor;
4910 di->disk.state = 0;
d23534e4 4911 di->recovery_start = 0;
7e1432fb
NB
4912 di->data_offset = pos;
4913 di->component_size = a->info.component_size;
4914 di->container_member = dl->pdnum;
4915 di->next = rv;
4916 rv = di;
2a645ee2
MW
4917 dprintf("%x:%x (%08x) to be %d at %llu\n",
4918 dl->major, dl->minor,
4919 be32_to_cpu(dl->disk.refnum), i, pos);
7e1432fb
NB
4920
4921 break;
4922 }
4923 if (!dl && ! global_ok) {
4924 /* not enough dedicated spares, try global */
4925 global_ok = 1;
4926 dl = ddf->dlist;
4927 goto again;
4928 }
4929 }
4930
4931 if (!rv)
4932 /* No spares found */
4933 return rv;
4934 /* Now 'rv' has a list of devices to return.
4935 * Create a metadata_update record to update the
4936 * phys_refnum and lba_offset values
4937 */
0c78849f 4938 vc = find_vdcr(ddf, a->info.container_member, di->disk.raid_disk,
4939 &n_bvd, &vcl);
4940 if (vc == NULL)
4941 return NULL;
4942
503975b9
N
4943 mu = xmalloc(sizeof(*mu));
4944 if (posix_memalign(&mu->space, 512, sizeof(struct vcl)) != 0) {
79244939
DW
4945 free(mu);
4946 mu = NULL;
4947 }
0c78849f 4948
4949 mu->len = ddf->conf_rec_len * 512 * vcl->conf.sec_elmnt_count;
4950 mu->buf = xmalloc(mu->len);
7590d562 4951 mu->space = NULL;
f50ae22e 4952 mu->space_list = NULL;
7e1432fb 4953 mu->next = *updates;
0c78849f 4954 memcpy(mu->buf, &vcl->conf, ddf->conf_rec_len * 512);
4955 for (j = 1; j < vcl->conf.sec_elmnt_count; j++)
4956 memcpy(mu->buf + j * ddf->conf_rec_len * 512,
4957 vcl->other_bvds[j-1], ddf->conf_rec_len * 512);
7e1432fb
NB
4958
4959 vc = (struct vd_config*)mu->buf;
7e1432fb 4960 for (di = rv ; di ; di = di->next) {
0c78849f 4961 unsigned int i_sec, i_prim;
4962 i_sec = di->disk.raid_disk
4963 / be16_to_cpu(vcl->conf.prim_elmnt_count);
4964 i_prim = di->disk.raid_disk
4965 % be16_to_cpu(vcl->conf.prim_elmnt_count);
4966 vc = (struct vd_config *)(mu->buf
4967 + i_sec * ddf->conf_rec_len * 512);
4968 for (dl = ddf->dlist; dl; dl = dl->next)
4969 if (dl->major == di->disk.major
4970 && dl->minor == di->disk.minor)
4971 break;
4972 if (!dl) {
4973 pr_err("%s: BUG: can't find disk %d (%d/%d)\n",
4974 __func__, di->disk.raid_disk,
4975 di->disk.major, di->disk.minor);
4976 return NULL;
4977 }
4978 vc->phys_refnum[i_prim] = ddf->phys->entries[dl->pdnum].refnum;
4979 LBA_OFFSET(ddf, vc)[i_prim] = cpu_to_be64(di->data_offset);
2a645ee2
MW
4980 dprintf("BVD %u gets %u: %08x at %llu\n", i_sec, i_prim,
4981 be32_to_cpu(vc->phys_refnum[i_prim]),
4982 be64_to_cpu(LBA_OFFSET(ddf, vc)[i_prim]));
7e1432fb
NB
4983 }
4984 *updates = mu;
4985 return rv;
4986}
0e600426 4987#endif /* MDASSEMBLE */
7e1432fb 4988
b640a252
N
4989static int ddf_level_to_layout(int level)
4990{
4991 switch(level) {
4992 case 0:
4993 case 1:
4994 return 0;
4995 case 5:
4996 return ALGORITHM_LEFT_SYMMETRIC;
4997 case 6:
4998 return ALGORITHM_ROTATING_N_CONTINUE;
4999 case 10:
5000 return 0x102;
5001 default:
5002 return UnSet;
5003 }
5004}
5005
30f58b22
DW
5006static void default_geometry_ddf(struct supertype *st, int *level, int *layout, int *chunk)
5007{
5008 if (level && *level == UnSet)
5009 *level = LEVEL_CONTAINER;
5010
5011 if (level && layout && *layout == UnSet)
5012 *layout = ddf_level_to_layout(*level);
5013}
5014
a322f70c
DW
5015struct superswitch super_ddf = {
5016#ifndef MDASSEMBLE
5017 .examine_super = examine_super_ddf,
5018 .brief_examine_super = brief_examine_super_ddf,
4737ae25 5019 .brief_examine_subarrays = brief_examine_subarrays_ddf,
bceedeec 5020 .export_examine_super = export_examine_super_ddf,
a322f70c
DW
5021 .detail_super = detail_super_ddf,
5022 .brief_detail_super = brief_detail_super_ddf,
5023 .validate_geometry = validate_geometry_ddf,
78e44928 5024 .write_init_super = write_init_super_ddf,
0e600426 5025 .add_to_super = add_to_super_ddf,
4dd968cc 5026 .remove_from_super = remove_from_super_ddf,
2b959fbf 5027 .load_container = load_container_ddf,
74db60b0 5028 .copy_metadata = copy_metadata_ddf,
4441541f 5029 .kill_subarray = kill_subarray_ddf,
a322f70c
DW
5030#endif
5031 .match_home = match_home_ddf,
5032 .uuid_from_super= uuid_from_super_ddf,
5033 .getinfo_super = getinfo_super_ddf,
5034 .update_super = update_super_ddf,
5035
5036 .avail_size = avail_size_ddf,
5037
a19c88b8
NB
5038 .compare_super = compare_super_ddf,
5039
a322f70c 5040 .load_super = load_super_ddf,
ba7eb04f 5041 .init_super = init_super_ddf,
955e9ea1 5042 .store_super = store_super_ddf,
a322f70c
DW
5043 .free_super = free_super_ddf,
5044 .match_metadata_desc = match_metadata_desc_ddf,
78e44928 5045 .container_content = container_content_ddf,
30f58b22 5046 .default_geometry = default_geometry_ddf,
a322f70c 5047
a322f70c 5048 .external = 1,
549e9569 5049
0e600426 5050#ifndef MDASSEMBLE
549e9569
NB
5051/* for mdmon */
5052 .open_new = ddf_open_new,
ed9d66aa 5053 .set_array_state= ddf_set_array_state,
549e9569
NB
5054 .set_disk = ddf_set_disk,
5055 .sync_metadata = ddf_sync_metadata,
88c164f4 5056 .process_update = ddf_process_update,
edd8d13c 5057 .prepare_update = ddf_prepare_update,
7e1432fb 5058 .activate_spare = ddf_activate_spare,
0e600426 5059#endif
4cce4069 5060 .name = "ddf",
a322f70c 5061};