]> git.ipfire.org Git - thirdparty/mdadm.git/blame - super-ddf.c
DDF: convert big endian to be32 type
[thirdparty/mdadm.git] / super-ddf.c
CommitLineData
a322f70c
DW
1/*
2 * mdadm - manage Linux "md" devices aka RAID arrays.
3 *
e736b623 4 * Copyright (C) 2006-2009 Neil Brown <neilb@suse.de>
a322f70c
DW
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Neil Brown
22 * Email: <neil@brown.name>
23 *
24 * Specifications for DDF takes from Common RAID DDF Specification Revision 1.2
25 * (July 28 2006). Reused by permission of SNIA.
26 */
27
28#define HAVE_STDINT_H 1
29#include "mdadm.h"
549e9569 30#include "mdmon.h"
a322f70c
DW
31#include "sha1.h"
32#include <values.h>
33
a322f70c
DW
34/* a non-official T10 name for creation GUIDs */
35static char T10[] = "Linux-MD";
36
37/* DDF timestamps are 1980 based, so we need to add
38 * second-in-decade-of-seventies to convert to linux timestamps.
39 * 10 years with 2 leap years.
40 */
41#define DECADE (3600*24*(365*10+2))
42unsigned long crc32(
43 unsigned long crc,
44 const unsigned char *buf,
45 unsigned len);
46
bedbf68a 47#define DDF_NOTFOUND (~0U)
48#define DDF_CONTAINER (DDF_NOTFOUND-1)
49
a322f70c
DW
50/* The DDF metadata handling.
51 * DDF metadata lives at the end of the device.
52 * The last 512 byte block provides an 'anchor' which is used to locate
53 * the rest of the metadata which usually lives immediately behind the anchor.
54 *
55 * Note:
56 * - all multibyte numeric fields are bigendian.
57 * - all strings are space padded.
58 *
59 */
60
4d1bdc18 61typedef struct __be16 {
62 __u16 _v16;
63} be16;
64#define be16_eq(x, y) ((x)._v16 == (y)._v16)
65
66typedef struct __be32 {
67 __u32 _v32;
68} be32;
69#define be32_eq(x, y) ((x)._v32 == (y)._v32)
70
71typedef struct __be64 {
72 __u64 _v64;
73} be64;
74#define be64_eq(x, y) ((x)._v64 == (y)._v64)
75
76#define be16_to_cpu(be) __be16_to_cpu((be)._v16)
77static inline be16 cpu_to_be16(__u16 x)
78{
79 be16 be = { ._v16 = __cpu_to_be16(x) };
80 return be;
81}
82
83#define be32_to_cpu(be) __be32_to_cpu((be)._v32)
84static inline be32 cpu_to_be32(__u32 x)
85{
86 be32 be = { ._v32 = __cpu_to_be32(x) };
87 return be;
88}
89
90#define be64_to_cpu(be) __be64_to_cpu((be)._v64)
91static inline be64 cpu_to_be64(__u64 x)
92{
93 be64 be = { ._v64 = __cpu_to_be64(x) };
94 return be;
95}
96
a322f70c
DW
97/* Primary Raid Level (PRL) */
98#define DDF_RAID0 0x00
99#define DDF_RAID1 0x01
100#define DDF_RAID3 0x03
101#define DDF_RAID4 0x04
102#define DDF_RAID5 0x05
103#define DDF_RAID1E 0x11
104#define DDF_JBOD 0x0f
105#define DDF_CONCAT 0x1f
106#define DDF_RAID5E 0x15
107#define DDF_RAID5EE 0x25
59e36268 108#define DDF_RAID6 0x06
a322f70c
DW
109
110/* Raid Level Qualifier (RLQ) */
111#define DDF_RAID0_SIMPLE 0x00
112#define DDF_RAID1_SIMPLE 0x00 /* just 2 devices in this plex */
113#define DDF_RAID1_MULTI 0x01 /* exactly 3 devices in this plex */
114#define DDF_RAID3_0 0x00 /* parity in first extent */
115#define DDF_RAID3_N 0x01 /* parity in last extent */
116#define DDF_RAID4_0 0x00 /* parity in first extent */
117#define DDF_RAID4_N 0x01 /* parity in last extent */
118/* these apply to raid5e and raid5ee as well */
119#define DDF_RAID5_0_RESTART 0x00 /* same as 'right asymmetric' - layout 1 */
59e36268 120#define DDF_RAID6_0_RESTART 0x01 /* raid6 different from raid5 here!!! */
a322f70c
DW
121#define DDF_RAID5_N_RESTART 0x02 /* same as 'left asymmetric' - layout 0 */
122#define DDF_RAID5_N_CONTINUE 0x03 /* same as 'left symmetric' - layout 2 */
123
124#define DDF_RAID1E_ADJACENT 0x00 /* raid10 nearcopies==2 */
125#define DDF_RAID1E_OFFSET 0x01 /* raid10 offsetcopies==2 */
126
127/* Secondary RAID Level (SRL) */
128#define DDF_2STRIPED 0x00 /* This is weirder than RAID0 !! */
129#define DDF_2MIRRORED 0x01
130#define DDF_2CONCAT 0x02
131#define DDF_2SPANNED 0x03 /* This is also weird - be careful */
132
133/* Magic numbers */
60931cf9 134#define DDF_HEADER_MAGIC cpu_to_be32(0xDE11DE11)
135#define DDF_CONTROLLER_MAGIC cpu_to_be32(0xAD111111)
136#define DDF_PHYS_RECORDS_MAGIC cpu_to_be32(0x22222222)
137#define DDF_PHYS_DATA_MAGIC cpu_to_be32(0x33333333)
138#define DDF_VIRT_RECORDS_MAGIC cpu_to_be32(0xDDDDDDDD)
139#define DDF_VD_CONF_MAGIC cpu_to_be32(0xEEEEEEEE)
140#define DDF_SPARE_ASSIGN_MAGIC cpu_to_be32(0x55555555)
141#define DDF_VU_CONF_MAGIC cpu_to_be32(0x88888888)
142#define DDF_VENDOR_LOG_MAGIC cpu_to_be32(0x01dBEEF0)
143#define DDF_BBM_LOG_MAGIC cpu_to_be32(0xABADB10C)
a322f70c
DW
144
145#define DDF_GUID_LEN 24
59e36268
NB
146#define DDF_REVISION_0 "01.00.00"
147#define DDF_REVISION_2 "01.02.00"
a322f70c
DW
148
149struct ddf_header {
60931cf9 150 be32 magic; /* DDF_HEADER_MAGIC */
151 be32 crc;
a322f70c 152 char guid[DDF_GUID_LEN];
59e36268 153 char revision[8]; /* 01.02.00 */
60931cf9 154 be32 seq; /* starts at '1' */
155 be32 timestamp;
a322f70c
DW
156 __u8 openflag;
157 __u8 foreignflag;
158 __u8 enforcegroups;
159 __u8 pad0; /* 0xff */
160 __u8 pad1[12]; /* 12 * 0xff */
161 /* 64 bytes so far */
162 __u8 header_ext[32]; /* reserved: fill with 0xff */
163 __u64 primary_lba;
164 __u64 secondary_lba;
165 __u8 type;
166 __u8 pad2[3]; /* 0xff */
60931cf9 167 be32 workspace_len; /* sectors for vendor space -
a322f70c
DW
168 * at least 32768(sectors) */
169 __u64 workspace_lba;
170 __u16 max_pd_entries; /* one of 15, 63, 255, 1023, 4095 */
171 __u16 max_vd_entries; /* 2^(4,6,8,10,12)-1 : i.e. as above */
172 __u16 max_partitions; /* i.e. max num of configuration
173 record entries per disk */
174 __u16 config_record_len; /* 1 +ROUNDUP(max_primary_element_entries
175 *12/512) */
176 __u16 max_primary_element_entries; /* 16, 64, 256, 1024, or 4096 */
177 __u8 pad3[54]; /* 0xff */
178 /* 192 bytes so far */
60931cf9 179 be32 controller_section_offset;
180 be32 controller_section_length;
181 be32 phys_section_offset;
182 be32 phys_section_length;
183 be32 virt_section_offset;
184 be32 virt_section_length;
185 be32 config_section_offset;
186 be32 config_section_length;
187 be32 data_section_offset;
188 be32 data_section_length;
189 be32 bbm_section_offset;
190 be32 bbm_section_length;
191 be32 diag_space_offset;
192 be32 diag_space_length;
193 be32 vendor_offset;
194 be32 vendor_length;
a322f70c
DW
195 /* 256 bytes so far */
196 __u8 pad4[256]; /* 0xff */
197};
198
199/* type field */
200#define DDF_HEADER_ANCHOR 0x00
201#define DDF_HEADER_PRIMARY 0x01
202#define DDF_HEADER_SECONDARY 0x02
203
204/* The content of the 'controller section' - global scope */
205struct ddf_controller_data {
60931cf9 206 be32 magic; /* DDF_CONTROLLER_MAGIC */
207 be32 crc;
a322f70c
DW
208 char guid[DDF_GUID_LEN];
209 struct controller_type {
210 __u16 vendor_id;
211 __u16 device_id;
212 __u16 sub_vendor_id;
213 __u16 sub_device_id;
214 } type;
215 char product_id[16];
216 __u8 pad[8]; /* 0xff */
217 __u8 vendor_data[448];
218};
219
220/* The content of phys_section - global scope */
221struct phys_disk {
60931cf9 222 be32 magic; /* DDF_PHYS_RECORDS_MAGIC */
223 be32 crc;
a322f70c
DW
224 __u16 used_pdes;
225 __u16 max_pdes;
226 __u8 pad[52];
227 struct phys_disk_entry {
228 char guid[DDF_GUID_LEN];
60931cf9 229 be32 refnum;
a322f70c
DW
230 __u16 type;
231 __u16 state;
232 __u64 config_size; /* DDF structures must be after here */
233 char path[18]; /* another horrible structure really */
234 __u8 pad[6];
235 } entries[0];
236};
237
238/* phys_disk_entry.type is a bitmap - bigendian remember */
239#define DDF_Forced_PD_GUID 1
240#define DDF_Active_in_VD 2
88c164f4 241#define DDF_Global_Spare 4 /* VD_CONF records are ignored */
a322f70c
DW
242#define DDF_Spare 8 /* overrides Global_spare */
243#define DDF_Foreign 16
244#define DDF_Legacy 32 /* no DDF on this device */
245
246#define DDF_Interface_mask 0xf00
247#define DDF_Interface_SCSI 0x100
248#define DDF_Interface_SAS 0x200
249#define DDF_Interface_SATA 0x300
250#define DDF_Interface_FC 0x400
251
252/* phys_disk_entry.state is a bigendian bitmap */
253#define DDF_Online 1
254#define DDF_Failed 2 /* overrides 1,4,8 */
255#define DDF_Rebuilding 4
256#define DDF_Transition 8
257#define DDF_SMART 16
258#define DDF_ReadErrors 32
259#define DDF_Missing 64
260
261/* The content of the virt_section global scope */
262struct virtual_disk {
60931cf9 263 be32 magic; /* DDF_VIRT_RECORDS_MAGIC */
264 be32 crc;
a322f70c
DW
265 __u16 populated_vdes;
266 __u16 max_vdes;
267 __u8 pad[52];
268 struct virtual_entry {
269 char guid[DDF_GUID_LEN];
270 __u16 unit;
271 __u16 pad0; /* 0xffff */
272 __u16 guid_crc;
273 __u16 type;
274 __u8 state;
275 __u8 init_state;
276 __u8 pad1[14];
277 char name[16];
278 } entries[0];
279};
280
281/* virtual_entry.type is a bitmap - bigendian */
282#define DDF_Shared 1
283#define DDF_Enforce_Groups 2
284#define DDF_Unicode 4
285#define DDF_Owner_Valid 8
286
287/* virtual_entry.state is a bigendian bitmap */
288#define DDF_state_mask 0x7
289#define DDF_state_optimal 0x0
290#define DDF_state_degraded 0x1
291#define DDF_state_deleted 0x2
292#define DDF_state_missing 0x3
293#define DDF_state_failed 0x4
7a7cc504 294#define DDF_state_part_optimal 0x5
a322f70c
DW
295
296#define DDF_state_morphing 0x8
297#define DDF_state_inconsistent 0x10
298
299/* virtual_entry.init_state is a bigendian bitmap */
300#define DDF_initstate_mask 0x03
301#define DDF_init_not 0x00
7a7cc504
NB
302#define DDF_init_quick 0x01 /* initialisation is progress.
303 * i.e. 'state_inconsistent' */
a322f70c
DW
304#define DDF_init_full 0x02
305
306#define DDF_access_mask 0xc0
307#define DDF_access_rw 0x00
308#define DDF_access_ro 0x80
309#define DDF_access_blocked 0xc0
310
311/* The content of the config_section - local scope
312 * It has multiple records each config_record_len sectors
313 * They can be vd_config or spare_assign
314 */
315
316struct vd_config {
60931cf9 317 be32 magic; /* DDF_VD_CONF_MAGIC */
318 be32 crc;
a322f70c 319 char guid[DDF_GUID_LEN];
60931cf9 320 be32 timestamp;
321 be32 seqnum;
a322f70c
DW
322 __u8 pad0[24];
323 __u16 prim_elmnt_count;
324 __u8 chunk_shift; /* 0 == 512, 1==1024 etc */
325 __u8 prl;
326 __u8 rlq;
327 __u8 sec_elmnt_count;
328 __u8 sec_elmnt_seq;
329 __u8 srl;
598f0d58
NB
330 __u64 blocks; /* blocks per component could be different
331 * on different component devices...(only
332 * for concat I hope) */
333 __u64 array_blocks; /* blocks in array */
a322f70c 334 __u8 pad1[8];
60931cf9 335 be32 spare_refs[8];
a322f70c
DW
336 __u8 cache_pol[8];
337 __u8 bg_rate;
338 __u8 pad2[3];
339 __u8 pad3[52];
340 __u8 pad4[192];
341 __u8 v0[32]; /* reserved- 0xff */
342 __u8 v1[32]; /* reserved- 0xff */
343 __u8 v2[16]; /* reserved- 0xff */
344 __u8 v3[16]; /* reserved- 0xff */
345 __u8 vendor[32];
60931cf9 346 be32 phys_refnum[0]; /* refnum of each disk in sequence */
a322f70c
DW
347 /*__u64 lba_offset[0]; LBA offset in each phys. Note extents in a
348 bvd are always the same size */
349};
57a66662 350#define LBA_OFFSET(ddf, vd) ((__u64 *) &(vd)->phys_refnum[(ddf)->mppe])
a322f70c
DW
351
352/* vd_config.cache_pol[7] is a bitmap */
353#define DDF_cache_writeback 1 /* else writethrough */
354#define DDF_cache_wadaptive 2 /* only applies if writeback */
355#define DDF_cache_readahead 4
356#define DDF_cache_radaptive 8 /* only if doing read-ahead */
357#define DDF_cache_ifnobatt 16 /* even to write cache if battery is poor */
358#define DDF_cache_wallowed 32 /* enable write caching */
359#define DDF_cache_rallowed 64 /* enable read caching */
360
361struct spare_assign {
60931cf9 362 be32 magic; /* DDF_SPARE_ASSIGN_MAGIC */
363 be32 crc;
364 be32 timestamp;
a322f70c
DW
365 __u8 reserved[7];
366 __u8 type;
367 __u16 populated; /* SAEs used */
368 __u16 max; /* max SAEs */
369 __u8 pad[8];
370 struct spare_assign_entry {
371 char guid[DDF_GUID_LEN];
372 __u16 secondary_element;
373 __u8 pad[6];
374 } spare_ents[0];
375};
376/* spare_assign.type is a bitmap */
377#define DDF_spare_dedicated 0x1 /* else global */
378#define DDF_spare_revertible 0x2 /* else committable */
379#define DDF_spare_active 0x4 /* else not active */
380#define DDF_spare_affinity 0x8 /* enclosure affinity */
381
382/* The data_section contents - local scope */
383struct disk_data {
60931cf9 384 be32 magic; /* DDF_PHYS_DATA_MAGIC */
385 be32 crc;
a322f70c 386 char guid[DDF_GUID_LEN];
60931cf9 387 be32 refnum; /* crc of some magic drive data ... */
a322f70c
DW
388 __u8 forced_ref; /* set when above was not result of magic */
389 __u8 forced_guid; /* set if guid was forced rather than magic */
390 __u8 vendor[32];
391 __u8 pad[442];
392};
393
394/* bbm_section content */
395struct bad_block_log {
60931cf9 396 be32 magic;
397 be32 crc;
a322f70c 398 __u16 entry_count;
60931cf9 399 be32 spare_count;
a322f70c
DW
400 __u8 pad[10];
401 __u64 first_spare;
402 struct mapped_block {
403 __u64 defective_start;
60931cf9 404 be32 replacement_start;
a322f70c
DW
405 __u16 remap_count;
406 __u8 pad[2];
407 } entries[0];
408};
409
410/* Struct for internally holding ddf structures */
411/* The DDF structure stored on each device is potentially
412 * quite different, as some data is global and some is local.
413 * The global data is:
414 * - ddf header
415 * - controller_data
416 * - Physical disk records
417 * - Virtual disk records
418 * The local data is:
419 * - Configuration records
420 * - Physical Disk data section
421 * ( and Bad block and vendor which I don't care about yet).
422 *
423 * The local data is parsed into separate lists as it is read
424 * and reconstructed for writing. This means that we only need
425 * to make config changes once and they are automatically
426 * propagated to all devices.
427 * Note that the ddf_super has space of the conf and disk data
428 * for this disk and also for a list of all such data.
429 * The list is only used for the superblock that is being
430 * built in Create or Assemble to describe the whole array.
431 */
432struct ddf_super {
6416d527 433 struct ddf_header anchor, primary, secondary;
a322f70c 434 struct ddf_controller_data controller;
6416d527 435 struct ddf_header *active;
a322f70c
DW
436 struct phys_disk *phys;
437 struct virtual_disk *virt;
438 int pdsize, vdsize;
f21e18ca 439 unsigned int max_part, mppe, conf_rec_len;
d2ca6449 440 int currentdev;
18a2f463 441 int updates_pending;
a322f70c 442 struct vcl {
6416d527
NB
443 union {
444 char space[512];
445 struct {
446 struct vcl *next;
f21e18ca 447 unsigned int vcnum; /* index into ->virt */
8ec5d685 448 struct vd_config **other_bvds;
6416d527
NB
449 __u64 *block_sizes; /* NULL if all the same */
450 };
451 };
a322f70c 452 struct vd_config conf;
d2ca6449 453 } *conflist, *currentconf;
a322f70c 454 struct dl {
6416d527
NB
455 union {
456 char space[512];
457 struct {
458 struct dl *next;
459 int major, minor;
460 char *devname;
461 int fd;
462 unsigned long long size; /* sectors */
097bcf00 463 unsigned long long primary_lba; /* sectors */
464 unsigned long long secondary_lba; /* sectors */
465 unsigned long long workspace_lba; /* sectors */
6416d527
NB
466 int pdnum; /* index in ->phys */
467 struct spare_assign *spare;
8592f29d
N
468 void *mdupdate; /* hold metadata update */
469
470 /* These fields used by auto-layout */
471 int raiddisk; /* slot to fill in autolayout */
472 __u64 esize;
6416d527
NB
473 };
474 };
a322f70c 475 struct disk_data disk;
b2280677 476 struct vcl *vlist[0]; /* max_part in size */
2cc2983d 477 } *dlist, *add_list;
a322f70c
DW
478};
479
480#ifndef offsetof
481#define offsetof(t,f) ((size_t)&(((t*)0)->f))
482#endif
483
7d5a7ff3 484#if DEBUG
fb9d0acb 485static int all_ff(const char *guid);
7d5a7ff3 486static void pr_state(struct ddf_super *ddf, const char *msg)
487{
488 unsigned int i;
489 dprintf("%s/%s: ", __func__, msg);
490 for (i = 0; i < __be16_to_cpu(ddf->active->max_vd_entries); i++) {
491 if (all_ff(ddf->virt->entries[i].guid))
492 continue;
493 dprintf("%u(s=%02x i=%02x) ", i,
494 ddf->virt->entries[i].state,
495 ddf->virt->entries[i].init_state);
496 }
497 dprintf("\n");
498}
499#else
500static void pr_state(const struct ddf_super *ddf, const char *msg) {}
501#endif
502
35c3606d 503static void _ddf_set_updates_pending(struct ddf_super *ddf, const char *func)
504{
505 ddf->updates_pending = 1;
60931cf9 506 ddf->active->seq = cpu_to_be32((be32_to_cpu(ddf->active->seq)+1));
35c3606d 507 pr_state(ddf, func);
508}
509
510#define ddf_set_updates_pending(x) _ddf_set_updates_pending((x), __func__)
7d5a7ff3 511
fcc22180 512static unsigned int get_pd_index_from_refnum(const struct vcl *vc,
60931cf9 513 be32 refnum, unsigned int nmax,
fcc22180 514 const struct vd_config **bvd,
515 unsigned int *idx);
516
60931cf9 517static be32 calc_crc(void *buf, int len)
a322f70c
DW
518{
519 /* crcs are always at the same place as in the ddf_header */
520 struct ddf_header *ddf = buf;
60931cf9 521 be32 oldcrc = ddf->crc;
a322f70c 522 __u32 newcrc;
60931cf9 523 ddf->crc = cpu_to_be32(0xffffffff);
a322f70c
DW
524
525 newcrc = crc32(0, buf, len);
526 ddf->crc = oldcrc;
4abe6b70
N
527 /* The crc is store (like everything) bigendian, so convert
528 * here for simplicity
529 */
60931cf9 530 return cpu_to_be32(newcrc);
a322f70c
DW
531}
532
a3163bf0 533#define DDF_INVALID_LEVEL 0xff
534#define DDF_NO_SECONDARY 0xff
535static int err_bad_md_layout(const mdu_array_info_t *array)
536{
537 pr_err("RAID%d layout %x with %d disks is unsupported for DDF\n",
538 array->level, array->layout, array->raid_disks);
2aba583f 539 return -1;
a3163bf0 540}
541
542static int layout_md2ddf(const mdu_array_info_t *array,
543 struct vd_config *conf)
544{
545 __u16 prim_elmnt_count = __cpu_to_be16(array->raid_disks);
546 __u8 prl = DDF_INVALID_LEVEL, rlq = 0;
547 __u8 sec_elmnt_count = 1;
548 __u8 srl = DDF_NO_SECONDARY;
549
550 switch (array->level) {
551 case LEVEL_LINEAR:
552 prl = DDF_CONCAT;
553 break;
554 case 0:
555 rlq = DDF_RAID0_SIMPLE;
556 prl = DDF_RAID0;
557 break;
558 case 1:
559 switch (array->raid_disks) {
560 case 2:
561 rlq = DDF_RAID1_SIMPLE;
562 break;
563 case 3:
564 rlq = DDF_RAID1_MULTI;
565 break;
566 default:
567 return err_bad_md_layout(array);
568 }
569 prl = DDF_RAID1;
570 break;
571 case 4:
572 if (array->layout != 0)
573 return err_bad_md_layout(array);
574 rlq = DDF_RAID4_N;
575 prl = DDF_RAID4;
576 break;
577 case 5:
578 switch (array->layout) {
579 case ALGORITHM_LEFT_ASYMMETRIC:
580 rlq = DDF_RAID5_N_RESTART;
581 break;
582 case ALGORITHM_RIGHT_ASYMMETRIC:
583 rlq = DDF_RAID5_0_RESTART;
584 break;
585 case ALGORITHM_LEFT_SYMMETRIC:
586 rlq = DDF_RAID5_N_CONTINUE;
587 break;
588 case ALGORITHM_RIGHT_SYMMETRIC:
589 /* not mentioned in standard */
590 default:
591 return err_bad_md_layout(array);
592 }
593 prl = DDF_RAID5;
594 break;
595 case 6:
596 switch (array->layout) {
597 case ALGORITHM_ROTATING_N_RESTART:
598 rlq = DDF_RAID5_N_RESTART;
599 break;
600 case ALGORITHM_ROTATING_ZERO_RESTART:
601 rlq = DDF_RAID6_0_RESTART;
602 break;
603 case ALGORITHM_ROTATING_N_CONTINUE:
604 rlq = DDF_RAID5_N_CONTINUE;
605 break;
606 default:
607 return err_bad_md_layout(array);
608 }
609 prl = DDF_RAID6;
610 break;
611 case 10:
612 if (array->raid_disks % 2 == 0 && array->layout == 0x102) {
613 rlq = DDF_RAID1_SIMPLE;
614 prim_elmnt_count = __cpu_to_be16(2);
615 sec_elmnt_count = array->raid_disks / 2;
616 } else if (array->raid_disks % 3 == 0
617 && array->layout == 0x103) {
618 rlq = DDF_RAID1_MULTI;
619 prim_elmnt_count = __cpu_to_be16(3);
620 sec_elmnt_count = array->raid_disks / 3;
621 } else
622 return err_bad_md_layout(array);
623 srl = DDF_2SPANNED;
624 prl = DDF_RAID1;
625 break;
626 default:
627 return err_bad_md_layout(array);
628 }
629 conf->prl = prl;
630 conf->prim_elmnt_count = prim_elmnt_count;
631 conf->rlq = rlq;
632 conf->srl = srl;
633 conf->sec_elmnt_count = sec_elmnt_count;
634 return 0;
635}
636
8a2848a7 637static int err_bad_ddf_layout(const struct vd_config *conf)
638{
639 pr_err("DDF RAID %u qualifier %u with %u disks is unsupported\n",
640 conf->prl, conf->rlq, __be16_to_cpu(conf->prim_elmnt_count));
641 return -1;
642}
643
644static int layout_ddf2md(const struct vd_config *conf,
645 mdu_array_info_t *array)
646{
647 int level = LEVEL_UNSUPPORTED;
648 int layout = 0;
649 int raiddisks = __be16_to_cpu(conf->prim_elmnt_count);
650
651 if (conf->sec_elmnt_count > 1) {
652 /* see also check_secondary() */
653 if (conf->prl != DDF_RAID1 ||
654 (conf->srl != DDF_2STRIPED && conf->srl != DDF_2SPANNED)) {
655 pr_err("Unsupported secondary RAID level %u/%u\n",
656 conf->prl, conf->srl);
657 return -1;
658 }
659 if (raiddisks == 2 && conf->rlq == DDF_RAID1_SIMPLE)
660 layout = 0x102;
661 else if (raiddisks == 3 && conf->rlq == DDF_RAID1_MULTI)
662 layout = 0x103;
663 else
664 return err_bad_ddf_layout(conf);
665 raiddisks *= conf->sec_elmnt_count;
666 level = 10;
667 goto good;
668 }
669
670 switch (conf->prl) {
671 case DDF_CONCAT:
672 level = LEVEL_LINEAR;
673 break;
674 case DDF_RAID0:
675 if (conf->rlq != DDF_RAID0_SIMPLE)
676 return err_bad_ddf_layout(conf);
677 level = 0;
678 break;
679 case DDF_RAID1:
680 if (!((conf->rlq == DDF_RAID1_SIMPLE && raiddisks == 2) ||
681 (conf->rlq == DDF_RAID1_MULTI && raiddisks == 3)))
682 return err_bad_ddf_layout(conf);
683 level = 1;
684 break;
685 case DDF_RAID4:
686 if (conf->rlq != DDF_RAID4_N)
687 return err_bad_ddf_layout(conf);
688 level = 4;
689 break;
690 case DDF_RAID5:
691 switch (conf->rlq) {
692 case DDF_RAID5_N_RESTART:
693 layout = ALGORITHM_LEFT_ASYMMETRIC;
694 break;
695 case DDF_RAID5_0_RESTART:
696 layout = ALGORITHM_RIGHT_ASYMMETRIC;
697 break;
698 case DDF_RAID5_N_CONTINUE:
699 layout = ALGORITHM_LEFT_SYMMETRIC;
700 break;
701 default:
702 return err_bad_ddf_layout(conf);
703 }
704 level = 5;
705 break;
706 case DDF_RAID6:
707 switch (conf->rlq) {
708 case DDF_RAID5_N_RESTART:
709 layout = ALGORITHM_ROTATING_N_RESTART;
710 break;
711 case DDF_RAID6_0_RESTART:
712 layout = ALGORITHM_ROTATING_ZERO_RESTART;
713 break;
714 case DDF_RAID5_N_CONTINUE:
715 layout = ALGORITHM_ROTATING_N_CONTINUE;
716 break;
717 default:
718 return err_bad_ddf_layout(conf);
719 }
720 level = 6;
721 break;
722 default:
723 return err_bad_ddf_layout(conf);
724 };
725
726good:
727 array->level = level;
728 array->layout = layout;
729 array->raid_disks = raiddisks;
730 return 0;
731}
732
a322f70c
DW
733static int load_ddf_header(int fd, unsigned long long lba,
734 unsigned long long size,
735 int type,
736 struct ddf_header *hdr, struct ddf_header *anchor)
737{
738 /* read a ddf header (primary or secondary) from fd/lba
739 * and check that it is consistent with anchor
740 * Need to check:
741 * magic, crc, guid, rev, and LBA's header_type, and
742 * everything after header_type must be the same
743 */
744 if (lba >= size-1)
745 return 0;
746
747 if (lseek64(fd, lba<<9, 0) < 0)
748 return 0;
749
750 if (read(fd, hdr, 512) != 512)
751 return 0;
752
60931cf9 753 if (!be32_eq(hdr->magic, DDF_HEADER_MAGIC))
a322f70c 754 return 0;
60931cf9 755 if (!be32_eq(calc_crc(hdr, 512), hdr->crc))
a322f70c
DW
756 return 0;
757 if (memcmp(anchor->guid, hdr->guid, DDF_GUID_LEN) != 0 ||
758 memcmp(anchor->revision, hdr->revision, 8) != 0 ||
759 anchor->primary_lba != hdr->primary_lba ||
760 anchor->secondary_lba != hdr->secondary_lba ||
761 hdr->type != type ||
762 memcmp(anchor->pad2, hdr->pad2, 512 -
763 offsetof(struct ddf_header, pad2)) != 0)
764 return 0;
765
766 /* Looks good enough to me... */
767 return 1;
768}
769
770static void *load_section(int fd, struct ddf_super *super, void *buf,
60931cf9 771 be32 offset_be, be32 len_be, int check)
a322f70c 772{
60931cf9 773 unsigned long long offset = be32_to_cpu(offset_be);
774 unsigned long long len = be32_to_cpu(len_be);
a322f70c
DW
775 int dofree = (buf == NULL);
776
777 if (check)
778 if (len != 2 && len != 8 && len != 32
779 && len != 128 && len != 512)
780 return NULL;
781
782 if (len > 1024)
783 return NULL;
784 if (buf) {
785 /* All pre-allocated sections are a single block */
786 if (len != 1)
787 return NULL;
3d2c4fc7
DW
788 } else if (posix_memalign(&buf, 512, len<<9) != 0)
789 buf = NULL;
6416d527 790
a322f70c
DW
791 if (!buf)
792 return NULL;
793
794 if (super->active->type == 1)
795 offset += __be64_to_cpu(super->active->primary_lba);
796 else
797 offset += __be64_to_cpu(super->active->secondary_lba);
798
f21e18ca 799 if ((unsigned long long)lseek64(fd, offset<<9, 0) != (offset<<9)) {
a322f70c
DW
800 if (dofree)
801 free(buf);
802 return NULL;
803 }
f21e18ca 804 if ((unsigned long long)read(fd, buf, len<<9) != (len<<9)) {
a322f70c
DW
805 if (dofree)
806 free(buf);
807 return NULL;
808 }
809 return buf;
810}
811
812static int load_ddf_headers(int fd, struct ddf_super *super, char *devname)
813{
814 unsigned long long dsize;
815
816 get_dev_size(fd, NULL, &dsize);
817
818 if (lseek64(fd, dsize-512, 0) < 0) {
819 if (devname)
e7b84f9d
N
820 pr_err("Cannot seek to anchor block on %s: %s\n",
821 devname, strerror(errno));
a322f70c
DW
822 return 1;
823 }
824 if (read(fd, &super->anchor, 512) != 512) {
825 if (devname)
e7b84f9d
N
826 pr_err("Cannot read anchor block on %s: %s\n",
827 devname, strerror(errno));
a322f70c
DW
828 return 1;
829 }
60931cf9 830 if (!be32_eq(super->anchor.magic, DDF_HEADER_MAGIC)) {
a322f70c 831 if (devname)
e7b84f9d 832 pr_err("no DDF anchor found on %s\n",
a322f70c
DW
833 devname);
834 return 2;
835 }
60931cf9 836 if (!be32_eq(calc_crc(&super->anchor, 512), super->anchor.crc)) {
a322f70c 837 if (devname)
e7b84f9d 838 pr_err("bad CRC on anchor on %s\n",
a322f70c
DW
839 devname);
840 return 2;
841 }
59e36268
NB
842 if (memcmp(super->anchor.revision, DDF_REVISION_0, 8) != 0 &&
843 memcmp(super->anchor.revision, DDF_REVISION_2, 8) != 0) {
a322f70c 844 if (devname)
e7b84f9d 845 pr_err("can only support super revision"
59e36268
NB
846 " %.8s and earlier, not %.8s on %s\n",
847 DDF_REVISION_2, super->anchor.revision,devname);
a322f70c
DW
848 return 2;
849 }
dbeb699a 850 super->active = NULL;
a322f70c
DW
851 if (load_ddf_header(fd, __be64_to_cpu(super->anchor.primary_lba),
852 dsize >> 9, 1,
853 &super->primary, &super->anchor) == 0) {
854 if (devname)
e7b84f9d
N
855 pr_err("Failed to load primary DDF header "
856 "on %s\n", devname);
dbeb699a 857 } else
858 super->active = &super->primary;
60931cf9 859
a322f70c
DW
860 if (load_ddf_header(fd, __be64_to_cpu(super->anchor.secondary_lba),
861 dsize >> 9, 2,
862 &super->secondary, &super->anchor)) {
3eff7c1d 863 if (super->active == NULL
60931cf9 864 || (be32_to_cpu(super->primary.seq)
865 < be32_to_cpu(super->secondary.seq) &&
3eff7c1d 866 !super->secondary.openflag)
60931cf9 867 || (be32_to_cpu(super->primary.seq)
868 == be32_to_cpu(super->secondary.seq) &&
a322f70c
DW
869 super->primary.openflag && !super->secondary.openflag)
870 )
871 super->active = &super->secondary;
dbeb699a 872 } else if (devname)
873 pr_err("Failed to load secondary DDF header on %s\n",
874 devname);
875 if (super->active == NULL)
876 return 2;
a322f70c
DW
877 return 0;
878}
879
880static int load_ddf_global(int fd, struct ddf_super *super, char *devname)
881{
882 void *ok;
883 ok = load_section(fd, super, &super->controller,
884 super->active->controller_section_offset,
885 super->active->controller_section_length,
886 0);
887 super->phys = load_section(fd, super, NULL,
888 super->active->phys_section_offset,
889 super->active->phys_section_length,
890 1);
60931cf9 891 super->pdsize = be32_to_cpu(super->active->phys_section_length) * 512;
a322f70c
DW
892
893 super->virt = load_section(fd, super, NULL,
894 super->active->virt_section_offset,
895 super->active->virt_section_length,
896 1);
60931cf9 897 super->vdsize = be32_to_cpu(super->active->virt_section_length) * 512;
a322f70c
DW
898 if (!ok ||
899 !super->phys ||
900 !super->virt) {
901 free(super->phys);
902 free(super->virt);
a2349791
NB
903 super->phys = NULL;
904 super->virt = NULL;
a322f70c
DW
905 return 2;
906 }
907 super->conflist = NULL;
908 super->dlist = NULL;
8c3b8c2c
NB
909
910 super->max_part = __be16_to_cpu(super->active->max_partitions);
911 super->mppe = __be16_to_cpu(super->active->max_primary_element_entries);
912 super->conf_rec_len = __be16_to_cpu(super->active->config_record_len);
a322f70c
DW
913 return 0;
914}
915
3c48f7be 916#define DDF_UNUSED_BVD 0xff
917static int alloc_other_bvds(const struct ddf_super *ddf, struct vcl *vcl)
918{
919 unsigned int n_vds = vcl->conf.sec_elmnt_count - 1;
920 unsigned int i, vdsize;
921 void *p;
922 if (n_vds == 0) {
923 vcl->other_bvds = NULL;
924 return 0;
925 }
926 vdsize = ddf->conf_rec_len * 512;
927 if (posix_memalign(&p, 512, n_vds *
928 (vdsize + sizeof(struct vd_config *))) != 0)
929 return -1;
930 vcl->other_bvds = (struct vd_config **) (p + n_vds * vdsize);
931 for (i = 0; i < n_vds; i++) {
932 vcl->other_bvds[i] = p + i * vdsize;
933 memset(vcl->other_bvds[i], 0, vdsize);
934 vcl->other_bvds[i]->sec_elmnt_seq = DDF_UNUSED_BVD;
935 }
936 return 0;
937}
938
3dc821b0 939static void add_other_bvd(struct vcl *vcl, struct vd_config *vd,
940 unsigned int len)
941{
942 int i;
943 for (i = 0; i < vcl->conf.sec_elmnt_count-1; i++)
3c48f7be 944 if (vcl->other_bvds[i]->sec_elmnt_seq == vd->sec_elmnt_seq)
3dc821b0 945 break;
946
947 if (i < vcl->conf.sec_elmnt_count-1) {
60931cf9 948 if (be32_to_cpu(vd->seqnum) <=
949 be32_to_cpu(vcl->other_bvds[i]->seqnum))
3dc821b0 950 return;
951 } else {
952 for (i = 0; i < vcl->conf.sec_elmnt_count-1; i++)
3c48f7be 953 if (vcl->other_bvds[i]->sec_elmnt_seq == DDF_UNUSED_BVD)
3dc821b0 954 break;
955 if (i == vcl->conf.sec_elmnt_count-1) {
956 pr_err("no space for sec level config %u, count is %u\n",
957 vd->sec_elmnt_seq, vcl->conf.sec_elmnt_count);
958 return;
959 }
3dc821b0 960 }
961 memcpy(vcl->other_bvds[i], vd, len);
962}
963
a322f70c
DW
964static int load_ddf_local(int fd, struct ddf_super *super,
965 char *devname, int keep)
966{
967 struct dl *dl;
968 struct stat stb;
969 char *conf;
f21e18ca
N
970 unsigned int i;
971 unsigned int confsec;
b2280677 972 int vnum;
f21e18ca 973 unsigned int max_virt_disks = __be16_to_cpu(super->active->max_vd_entries);
d2ca6449 974 unsigned long long dsize;
a322f70c
DW
975
976 /* First the local disk info */
3d2c4fc7 977 if (posix_memalign((void**)&dl, 512,
6416d527 978 sizeof(*dl) +
3d2c4fc7 979 (super->max_part) * sizeof(dl->vlist[0])) != 0) {
e7b84f9d 980 pr_err("%s could not allocate disk info buffer\n",
3d2c4fc7
DW
981 __func__);
982 return 1;
983 }
a322f70c
DW
984
985 load_section(fd, super, &dl->disk,
986 super->active->data_section_offset,
987 super->active->data_section_length,
988 0);
503975b9 989 dl->devname = devname ? xstrdup(devname) : NULL;
598f0d58 990
a322f70c
DW
991 fstat(fd, &stb);
992 dl->major = major(stb.st_rdev);
993 dl->minor = minor(stb.st_rdev);
994 dl->next = super->dlist;
995 dl->fd = keep ? fd : -1;
d2ca6449
NB
996
997 dl->size = 0;
998 if (get_dev_size(fd, devname, &dsize))
999 dl->size = dsize >> 9;
097bcf00 1000 /* If the disks have different sizes, the LBAs will differ
1001 * between phys disks.
1002 * At this point here, the values in super->active must be valid
1003 * for this phys disk. */
1004 dl->primary_lba = super->active->primary_lba;
1005 dl->secondary_lba = super->active->secondary_lba;
1006 dl->workspace_lba = super->active->workspace_lba;
b2280677 1007 dl->spare = NULL;
f21e18ca 1008 for (i = 0 ; i < super->max_part ; i++)
a322f70c
DW
1009 dl->vlist[i] = NULL;
1010 super->dlist = dl;
59e36268 1011 dl->pdnum = -1;
f21e18ca 1012 for (i = 0; i < __be16_to_cpu(super->active->max_pd_entries); i++)
5575e7d9
NB
1013 if (memcmp(super->phys->entries[i].guid,
1014 dl->disk.guid, DDF_GUID_LEN) == 0)
1015 dl->pdnum = i;
1016
a322f70c
DW
1017 /* Now the config list. */
1018 /* 'conf' is an array of config entries, some of which are
1019 * probably invalid. Those which are good need to be copied into
1020 * the conflist
1021 */
a322f70c
DW
1022
1023 conf = load_section(fd, super, NULL,
1024 super->active->config_section_offset,
1025 super->active->config_section_length,
1026 0);
1027
b2280677 1028 vnum = 0;
e223334f 1029 for (confsec = 0;
60931cf9 1030 confsec < be32_to_cpu(super->active->config_section_length);
e223334f 1031 confsec += super->conf_rec_len) {
a322f70c 1032 struct vd_config *vd =
e223334f 1033 (struct vd_config *)((char*)conf + confsec*512);
a322f70c
DW
1034 struct vcl *vcl;
1035
60931cf9 1036 if (be32_eq(vd->magic, DDF_SPARE_ASSIGN_MAGIC)) {
b2280677
NB
1037 if (dl->spare)
1038 continue;
3d2c4fc7
DW
1039 if (posix_memalign((void**)&dl->spare, 512,
1040 super->conf_rec_len*512) != 0) {
e7b84f9d
N
1041 pr_err("%s could not allocate spare info buf\n",
1042 __func__);
3d2c4fc7
DW
1043 return 1;
1044 }
613b0d17 1045
b2280677
NB
1046 memcpy(dl->spare, vd, super->conf_rec_len*512);
1047 continue;
1048 }
60931cf9 1049 if (!be32_eq(vd->magic, DDF_VD_CONF_MAGIC))
a322f70c
DW
1050 continue;
1051 for (vcl = super->conflist; vcl; vcl = vcl->next) {
1052 if (memcmp(vcl->conf.guid,
1053 vd->guid, DDF_GUID_LEN) == 0)
1054 break;
1055 }
1056
1057 if (vcl) {
b2280677 1058 dl->vlist[vnum++] = vcl;
3dc821b0 1059 if (vcl->other_bvds != NULL &&
1060 vcl->conf.sec_elmnt_seq != vd->sec_elmnt_seq) {
1061 add_other_bvd(vcl, vd, super->conf_rec_len*512);
1062 continue;
1063 }
60931cf9 1064 if (be32_to_cpu(vd->seqnum) <=
1065 be32_to_cpu(vcl->conf.seqnum))
a322f70c 1066 continue;
59e36268 1067 } else {
3d2c4fc7 1068 if (posix_memalign((void**)&vcl, 512,
6416d527 1069 (super->conf_rec_len*512 +
3d2c4fc7 1070 offsetof(struct vcl, conf))) != 0) {
e7b84f9d
N
1071 pr_err("%s could not allocate vcl buf\n",
1072 __func__);
3d2c4fc7
DW
1073 return 1;
1074 }
a322f70c 1075 vcl->next = super->conflist;
59e36268 1076 vcl->block_sizes = NULL; /* FIXME not for CONCAT */
3c48f7be 1077 vcl->conf.sec_elmnt_count = vd->sec_elmnt_count;
1078 if (alloc_other_bvds(super, vcl) != 0) {
1079 pr_err("%s could not allocate other bvds\n",
1080 __func__);
1081 free(vcl);
1082 return 1;
1083 };
a322f70c 1084 super->conflist = vcl;
b2280677 1085 dl->vlist[vnum++] = vcl;
a322f70c 1086 }
8c3b8c2c 1087 memcpy(&vcl->conf, vd, super->conf_rec_len*512);
59e36268
NB
1088 for (i=0; i < max_virt_disks ; i++)
1089 if (memcmp(super->virt->entries[i].guid,
1090 vcl->conf.guid, DDF_GUID_LEN)==0)
1091 break;
1092 if (i < max_virt_disks)
1093 vcl->vcnum = i;
a322f70c
DW
1094 }
1095 free(conf);
1096
1097 return 0;
1098}
1099
1100#ifndef MDASSEMBLE
1101static int load_super_ddf_all(struct supertype *st, int fd,
e1902a7b 1102 void **sbp, char *devname);
a322f70c 1103#endif
37424f13
DW
1104
1105static void free_super_ddf(struct supertype *st);
1106
a322f70c
DW
1107static int load_super_ddf(struct supertype *st, int fd,
1108 char *devname)
1109{
1110 unsigned long long dsize;
1111 struct ddf_super *super;
1112 int rv;
1113
a322f70c
DW
1114 if (get_dev_size(fd, devname, &dsize) == 0)
1115 return 1;
1116
b31df436 1117 if (!st->ignore_hw_compat && test_partition(fd))
691c6ee1
N
1118 /* DDF is not allowed on partitions */
1119 return 1;
1120
a322f70c
DW
1121 /* 32M is a lower bound */
1122 if (dsize <= 32*1024*1024) {
97320d7c 1123 if (devname)
e7b84f9d
N
1124 pr_err("%s is too small for ddf: "
1125 "size is %llu sectors.\n",
1126 devname, dsize>>9);
97320d7c 1127 return 1;
a322f70c
DW
1128 }
1129 if (dsize & 511) {
97320d7c 1130 if (devname)
e7b84f9d
N
1131 pr_err("%s is an odd size for ddf: "
1132 "size is %llu bytes.\n",
1133 devname, dsize);
97320d7c 1134 return 1;
a322f70c
DW
1135 }
1136
37424f13
DW
1137 free_super_ddf(st);
1138
6416d527 1139 if (posix_memalign((void**)&super, 512, sizeof(*super))!= 0) {
e7b84f9d 1140 pr_err("malloc of %zu failed.\n",
a322f70c
DW
1141 sizeof(*super));
1142 return 1;
1143 }
a2349791 1144 memset(super, 0, sizeof(*super));
a322f70c
DW
1145
1146 rv = load_ddf_headers(fd, super, devname);
1147 if (rv) {
1148 free(super);
1149 return rv;
1150 }
1151
1152 /* Have valid headers and have chosen the best. Let's read in the rest*/
1153
1154 rv = load_ddf_global(fd, super, devname);
1155
1156 if (rv) {
1157 if (devname)
e7b84f9d
N
1158 pr_err("Failed to load all information "
1159 "sections on %s\n", devname);
a322f70c
DW
1160 free(super);
1161 return rv;
1162 }
1163
3d2c4fc7
DW
1164 rv = load_ddf_local(fd, super, devname, 0);
1165
1166 if (rv) {
1167 if (devname)
e7b84f9d
N
1168 pr_err("Failed to load all information "
1169 "sections on %s\n", devname);
3d2c4fc7
DW
1170 free(super);
1171 return rv;
1172 }
a322f70c
DW
1173
1174 /* Should possibly check the sections .... */
1175
1176 st->sb = super;
1177 if (st->ss == NULL) {
1178 st->ss = &super_ddf;
1179 st->minor_version = 0;
1180 st->max_devs = 512;
1181 }
1182 return 0;
1183
1184}
1185
1186static void free_super_ddf(struct supertype *st)
1187{
1188 struct ddf_super *ddf = st->sb;
1189 if (ddf == NULL)
1190 return;
1191 free(ddf->phys);
1192 free(ddf->virt);
1193 while (ddf->conflist) {
1194 struct vcl *v = ddf->conflist;
1195 ddf->conflist = v->next;
59e36268
NB
1196 if (v->block_sizes)
1197 free(v->block_sizes);
3c48f7be 1198 if (v->other_bvds)
1199 /*
1200 v->other_bvds[0] points to beginning of buffer,
1201 see alloc_other_bvds()
1202 */
1203 free(v->other_bvds[0]);
a322f70c
DW
1204 free(v);
1205 }
1206 while (ddf->dlist) {
1207 struct dl *d = ddf->dlist;
1208 ddf->dlist = d->next;
1209 if (d->fd >= 0)
1210 close(d->fd);
b2280677
NB
1211 if (d->spare)
1212 free(d->spare);
a322f70c
DW
1213 free(d);
1214 }
8a38cb04
N
1215 while (ddf->add_list) {
1216 struct dl *d = ddf->add_list;
1217 ddf->add_list = d->next;
1218 if (d->fd >= 0)
1219 close(d->fd);
1220 if (d->spare)
1221 free(d->spare);
1222 free(d);
1223 }
a322f70c
DW
1224 free(ddf);
1225 st->sb = NULL;
1226}
1227
1228static struct supertype *match_metadata_desc_ddf(char *arg)
1229{
1230 /* 'ddf' only support containers */
1231 struct supertype *st;
1232 if (strcmp(arg, "ddf") != 0 &&
1233 strcmp(arg, "default") != 0
1234 )
1235 return NULL;
1236
503975b9 1237 st = xcalloc(1, sizeof(*st));
a322f70c
DW
1238 st->ss = &super_ddf;
1239 st->max_devs = 512;
1240 st->minor_version = 0;
1241 st->sb = NULL;
1242 return st;
1243}
1244
a322f70c
DW
1245#ifndef MDASSEMBLE
1246
1247static mapping_t ddf_state[] = {
1248 { "Optimal", 0},
1249 { "Degraded", 1},
1250 { "Deleted", 2},
1251 { "Missing", 3},
1252 { "Failed", 4},
1253 { "Partially Optimal", 5},
1254 { "-reserved-", 6},
1255 { "-reserved-", 7},
1256 { NULL, 0}
1257};
1258
1259static mapping_t ddf_init_state[] = {
1260 { "Not Initialised", 0},
1261 { "QuickInit in Progress", 1},
1262 { "Fully Initialised", 2},
1263 { "*UNKNOWN*", 3},
1264 { NULL, 0}
1265};
1266static mapping_t ddf_access[] = {
1267 { "Read/Write", 0},
1268 { "Reserved", 1},
1269 { "Read Only", 2},
1270 { "Blocked (no access)", 3},
1271 { NULL ,0}
1272};
1273
1274static mapping_t ddf_level[] = {
1275 { "RAID0", DDF_RAID0},
1276 { "RAID1", DDF_RAID1},
1277 { "RAID3", DDF_RAID3},
1278 { "RAID4", DDF_RAID4},
1279 { "RAID5", DDF_RAID5},
1280 { "RAID1E",DDF_RAID1E},
1281 { "JBOD", DDF_JBOD},
1282 { "CONCAT",DDF_CONCAT},
1283 { "RAID5E",DDF_RAID5E},
1284 { "RAID5EE",DDF_RAID5EE},
1285 { "RAID6", DDF_RAID6},
1286 { NULL, 0}
1287};
1288static mapping_t ddf_sec_level[] = {
1289 { "Striped", DDF_2STRIPED},
1290 { "Mirrored", DDF_2MIRRORED},
1291 { "Concat", DDF_2CONCAT},
1292 { "Spanned", DDF_2SPANNED},
1293 { NULL, 0}
1294};
1295#endif
1296
fb9d0acb 1297static int all_ff(const char *guid)
42dc2744
N
1298{
1299 int i;
1300 for (i = 0; i < DDF_GUID_LEN; i++)
1301 if (guid[i] != (char)0xff)
1302 return 0;
1303 return 1;
1304}
1305
a322f70c
DW
1306#ifndef MDASSEMBLE
1307static void print_guid(char *guid, int tstamp)
1308{
1309 /* A GUIDs are part (or all) ASCII and part binary.
1310 * They tend to be space padded.
59e36268
NB
1311 * We print the GUID in HEX, then in parentheses add
1312 * any initial ASCII sequence, and a possible
1313 * time stamp from bytes 16-19
a322f70c
DW
1314 */
1315 int l = DDF_GUID_LEN;
1316 int i;
59e36268
NB
1317
1318 for (i=0 ; i<DDF_GUID_LEN ; i++) {
1319 if ((i&3)==0 && i != 0) printf(":");
1320 printf("%02X", guid[i]&255);
1321 }
1322
cfccea8c 1323 printf("\n (");
a322f70c
DW
1324 while (l && guid[l-1] == ' ')
1325 l--;
1326 for (i=0 ; i<l ; i++) {
1327 if (guid[i] >= 0x20 && guid[i] < 0x7f)
1328 fputc(guid[i], stdout);
1329 else
59e36268 1330 break;
a322f70c
DW
1331 }
1332 if (tstamp) {
1333 time_t then = __be32_to_cpu(*(__u32*)(guid+16)) + DECADE;
1334 char tbuf[100];
1335 struct tm *tm;
1336 tm = localtime(&then);
59e36268 1337 strftime(tbuf, 100, " %D %T",tm);
a322f70c
DW
1338 fputs(tbuf, stdout);
1339 }
59e36268 1340 printf(")");
a322f70c
DW
1341}
1342
be9b9ef4 1343static const char *guid_str(const char *guid)
1344{
1345 static char buf[DDF_GUID_LEN*2+1];
1346 int i;
1347 char *p = buf;
4a03cbd1 1348 for (i = 0; i < DDF_GUID_LEN; i++) {
1349 unsigned char c = guid[i];
1350 if (c >= 32 && c < 127)
1351 p += sprintf(p, "%c", c);
1352 else
1353 p += sprintf(p, "%02x", c);
1354 }
be9b9ef4 1355 *p = '\0';
1356 return (const char *) buf;
1357}
1358
a322f70c
DW
1359static void examine_vd(int n, struct ddf_super *sb, char *guid)
1360{
8c3b8c2c 1361 int crl = sb->conf_rec_len;
a322f70c
DW
1362 struct vcl *vcl;
1363
1364 for (vcl = sb->conflist ; vcl ; vcl = vcl->next) {
f21e18ca 1365 unsigned int i;
a322f70c
DW
1366 struct vd_config *vc = &vcl->conf;
1367
60931cf9 1368 if (!be32_eq(calc_crc(vc, crl*512), vc->crc))
a322f70c
DW
1369 continue;
1370 if (memcmp(vc->guid, guid, DDF_GUID_LEN) != 0)
1371 continue;
1372
1373 /* Ok, we know about this VD, let's give more details */
b06e3095 1374 printf(" Raid Devices[%d] : %d (", n,
a322f70c 1375 __be16_to_cpu(vc->prim_elmnt_count));
f21e18ca 1376 for (i = 0; i < __be16_to_cpu(vc->prim_elmnt_count); i++) {
b06e3095
N
1377 int j;
1378 int cnt = __be16_to_cpu(sb->phys->used_pdes);
1379 for (j=0; j<cnt; j++)
60931cf9 1380 if (be32_eq(vc->phys_refnum[i],
1381 sb->phys->entries[j].refnum))
b06e3095
N
1382 break;
1383 if (i) printf(" ");
1384 if (j < cnt)
1385 printf("%d", j);
1386 else
1387 printf("--");
1388 }
1389 printf(")\n");
1390 if (vc->chunk_shift != 255)
613b0d17
N
1391 printf(" Chunk Size[%d] : %d sectors\n", n,
1392 1 << vc->chunk_shift);
a322f70c
DW
1393 printf(" Raid Level[%d] : %s\n", n,
1394 map_num(ddf_level, vc->prl)?:"-unknown-");
1395 if (vc->sec_elmnt_count != 1) {
1396 printf(" Secondary Position[%d] : %d of %d\n", n,
1397 vc->sec_elmnt_seq, vc->sec_elmnt_count);
1398 printf(" Secondary Level[%d] : %s\n", n,
1399 map_num(ddf_sec_level, vc->srl) ?: "-unknown-");
1400 }
1401 printf(" Device Size[%d] : %llu\n", n,
c9b6907b 1402 (unsigned long long)__be64_to_cpu(vc->blocks)/2);
a322f70c 1403 printf(" Array Size[%d] : %llu\n", n,
c9b6907b 1404 (unsigned long long)__be64_to_cpu(vc->array_blocks)/2);
a322f70c
DW
1405 }
1406}
1407
1408static void examine_vds(struct ddf_super *sb)
1409{
1410 int cnt = __be16_to_cpu(sb->virt->populated_vdes);
fb9d0acb 1411 unsigned int i;
a322f70c
DW
1412 printf(" Virtual Disks : %d\n", cnt);
1413
fb9d0acb 1414 for (i = 0; i < __be16_to_cpu(sb->virt->max_vdes); i++) {
a322f70c 1415 struct virtual_entry *ve = &sb->virt->entries[i];
fb9d0acb 1416 if (all_ff(ve->guid))
1417 continue;
b06e3095 1418 printf("\n");
a322f70c
DW
1419 printf(" VD GUID[%d] : ", i); print_guid(ve->guid, 1);
1420 printf("\n");
1421 printf(" unit[%d] : %d\n", i, __be16_to_cpu(ve->unit));
1422 printf(" state[%d] : %s, %s%s\n", i,
1423 map_num(ddf_state, ve->state & 7),
1424 (ve->state & 8) ? "Morphing, ": "",
1425 (ve->state & 16)? "Not Consistent" : "Consistent");
1426 printf(" init state[%d] : %s\n", i,
1427 map_num(ddf_init_state, ve->init_state&3));
1428 printf(" access[%d] : %s\n", i,
1429 map_num(ddf_access, (ve->init_state>>6) & 3));
1430 printf(" Name[%d] : %.16s\n", i, ve->name);
1431 examine_vd(i, sb, ve->guid);
1432 }
1433 if (cnt) printf("\n");
1434}
1435
1436static void examine_pds(struct ddf_super *sb)
1437{
1438 int cnt = __be16_to_cpu(sb->phys->used_pdes);
1439 int i;
1440 struct dl *dl;
1441 printf(" Physical Disks : %d\n", cnt);
962371a5 1442 printf(" Number RefNo Size Device Type/State\n");
a322f70c
DW
1443
1444 for (i=0 ; i<cnt ; i++) {
1445 struct phys_disk_entry *pd = &sb->phys->entries[i];
1446 int type = __be16_to_cpu(pd->type);
1447 int state = __be16_to_cpu(pd->state);
1448
b06e3095
N
1449 //printf(" PD GUID[%d] : ", i); print_guid(pd->guid, 0);
1450 //printf("\n");
1451 printf(" %3d %08x ", i,
60931cf9 1452 be32_to_cpu(pd->refnum));
613b0d17 1453 printf("%8lluK ",
c9b6907b 1454 (unsigned long long)__be64_to_cpu(pd->config_size)>>1);
b06e3095 1455 for (dl = sb->dlist; dl ; dl = dl->next) {
60931cf9 1456 if (be32_eq(dl->disk.refnum, pd->refnum)) {
b06e3095
N
1457 char *dv = map_dev(dl->major, dl->minor, 0);
1458 if (dv) {
962371a5 1459 printf("%-15s", dv);
b06e3095
N
1460 break;
1461 }
1462 }
1463 }
1464 if (!dl)
962371a5 1465 printf("%15s","");
b06e3095 1466 printf(" %s%s%s%s%s",
a322f70c 1467 (type&2) ? "active":"",
b06e3095 1468 (type&4) ? "Global-Spare":"",
a322f70c
DW
1469 (type&8) ? "spare" : "",
1470 (type&16)? ", foreign" : "",
1471 (type&32)? "pass-through" : "");
18cb4496
N
1472 if (state & DDF_Failed)
1473 /* This over-rides these three */
1474 state &= ~(DDF_Online|DDF_Rebuilding|DDF_Transition);
b06e3095 1475 printf("/%s%s%s%s%s%s%s",
a322f70c
DW
1476 (state&1)? "Online": "Offline",
1477 (state&2)? ", Failed": "",
1478 (state&4)? ", Rebuilding": "",
1479 (state&8)? ", in-transition": "",
b06e3095
N
1480 (state&16)? ", SMART-errors": "",
1481 (state&32)? ", Unrecovered-Read-Errors": "",
a322f70c 1482 (state&64)? ", Missing" : "");
a322f70c
DW
1483 printf("\n");
1484 }
1485}
1486
1487static void examine_super_ddf(struct supertype *st, char *homehost)
1488{
1489 struct ddf_super *sb = st->sb;
1490
60931cf9 1491 printf(" Magic : %08x\n", be32_to_cpu(sb->anchor.magic));
a322f70c 1492 printf(" Version : %.8s\n", sb->anchor.revision);
598f0d58
NB
1493 printf("Controller GUID : "); print_guid(sb->controller.guid, 0);
1494 printf("\n");
1495 printf(" Container GUID : "); print_guid(sb->anchor.guid, 1);
a322f70c 1496 printf("\n");
60931cf9 1497 printf(" Seq : %08x\n", be32_to_cpu(sb->active->seq));
1498 printf(" Redundant hdr : %s\n", be32_eq(sb->secondary.magic,
1499 DDF_HEADER_MAGIC)
a322f70c
DW
1500 ?"yes" : "no");
1501 examine_vds(sb);
1502 examine_pds(sb);
1503}
1504
a5d85af7 1505static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info, char *map);
ff54de6e 1506
bedbf68a 1507static void uuid_from_ddf_guid(const char *guid, int uuid[4]);
42dc2744 1508static void uuid_from_super_ddf(struct supertype *st, int uuid[4]);
ff54de6e 1509
bedbf68a 1510static unsigned int get_vd_num_of_subarray(struct supertype *st)
1511{
1512 /*
1513 * Figure out the VD number for this supertype.
1514 * Returns DDF_CONTAINER for the container itself,
1515 * and DDF_NOTFOUND on error.
1516 */
1517 struct ddf_super *ddf = st->sb;
1518 struct mdinfo *sra;
1519 char *sub, *end;
1520 unsigned int vcnum;
1521
1522 if (*st->container_devnm == '\0')
1523 return DDF_CONTAINER;
1524
1525 sra = sysfs_read(-1, st->devnm, GET_VERSION);
1526 if (!sra || sra->array.major_version != -1 ||
1527 sra->array.minor_version != -2 ||
1528 !is_subarray(sra->text_version))
1529 return DDF_NOTFOUND;
1530
1531 sub = strchr(sra->text_version + 1, '/');
1532 if (sub != NULL)
1533 vcnum = strtoul(sub + 1, &end, 10);
1534 if (sub == NULL || *sub == '\0' || *end != '\0' ||
1535 vcnum >= __be16_to_cpu(ddf->active->max_vd_entries))
1536 return DDF_NOTFOUND;
1537
1538 return vcnum;
1539}
1540
061f2c6a 1541static void brief_examine_super_ddf(struct supertype *st, int verbose)
4737ae25
N
1542{
1543 /* We just write a generic DDF ARRAY entry
1544 */
1545 struct mdinfo info;
1546 char nbuf[64];
a5d85af7 1547 getinfo_super_ddf(st, &info, NULL);
4737ae25
N
1548 fname_from_uuid(st, &info, nbuf, ':');
1549
1550 printf("ARRAY metadata=ddf UUID=%s\n", nbuf + 5);
1551}
1552
1553static void brief_examine_subarrays_ddf(struct supertype *st, int verbose)
a322f70c
DW
1554{
1555 /* We just write a generic DDF ARRAY entry
a322f70c 1556 */
42dc2744 1557 struct ddf_super *ddf = st->sb;
ff54de6e 1558 struct mdinfo info;
f21e18ca 1559 unsigned int i;
ff54de6e 1560 char nbuf[64];
a5d85af7 1561 getinfo_super_ddf(st, &info, NULL);
ff54de6e 1562 fname_from_uuid(st, &info, nbuf, ':');
42dc2744 1563
f21e18ca 1564 for (i = 0; i < __be16_to_cpu(ddf->virt->max_vdes); i++) {
42dc2744
N
1565 struct virtual_entry *ve = &ddf->virt->entries[i];
1566 struct vcl vcl;
1567 char nbuf1[64];
1568 if (all_ff(ve->guid))
1569 continue;
1570 memcpy(vcl.conf.guid, ve->guid, DDF_GUID_LEN);
1571 ddf->currentconf =&vcl;
1572 uuid_from_super_ddf(st, info.uuid);
1573 fname_from_uuid(st, &info, nbuf1, ':');
1574 printf("ARRAY container=%s member=%d UUID=%s\n",
1575 nbuf+5, i, nbuf1+5);
1576 }
a322f70c
DW
1577}
1578
bceedeec
N
1579static void export_examine_super_ddf(struct supertype *st)
1580{
1581 struct mdinfo info;
1582 char nbuf[64];
a5d85af7 1583 getinfo_super_ddf(st, &info, NULL);
bceedeec
N
1584 fname_from_uuid(st, &info, nbuf, ':');
1585 printf("MD_METADATA=ddf\n");
1586 printf("MD_LEVEL=container\n");
1587 printf("MD_UUID=%s\n", nbuf+5);
1588}
bceedeec 1589
74db60b0
N
1590static int copy_metadata_ddf(struct supertype *st, int from, int to)
1591{
1592 void *buf;
1593 unsigned long long dsize, offset;
1594 int bytes;
1595 struct ddf_header *ddf;
1596 int written = 0;
1597
1598 /* The meta consists of an anchor, a primary, and a secondary.
1599 * This all lives at the end of the device.
1600 * So it is easiest to find the earliest of primary and
1601 * secondary, and copy everything from there.
1602 *
1603 * Anchor is 512 from end It contains primary_lba and secondary_lba
1604 * we choose one of those
1605 */
1606
1607 if (posix_memalign(&buf, 4096, 4096) != 0)
1608 return 1;
1609
1610 if (!get_dev_size(from, NULL, &dsize))
1611 goto err;
1612
1613 if (lseek64(from, dsize-512, 0) < 0)
1614 goto err;
1615 if (read(from, buf, 512) != 512)
1616 goto err;
1617 ddf = buf;
60931cf9 1618 if (!be32_eq(ddf->magic, DDF_HEADER_MAGIC) ||
1619 !be32_eq(calc_crc(ddf, 512), ddf->crc) ||
74db60b0
N
1620 (memcmp(ddf->revision, DDF_REVISION_0, 8) != 0 &&
1621 memcmp(ddf->revision, DDF_REVISION_2, 8) != 0))
1622 goto err;
1623
1624 offset = dsize - 512;
1625 if ((__be64_to_cpu(ddf->primary_lba) << 9) < offset)
1626 offset = __be64_to_cpu(ddf->primary_lba) << 9;
1627 if ((__be64_to_cpu(ddf->secondary_lba) << 9) < offset)
1628 offset = __be64_to_cpu(ddf->secondary_lba) << 9;
1629
1630 bytes = dsize - offset;
1631
1632 if (lseek64(from, offset, 0) < 0 ||
1633 lseek64(to, offset, 0) < 0)
1634 goto err;
1635 while (written < bytes) {
1636 int n = bytes - written;
1637 if (n > 4096)
1638 n = 4096;
1639 if (read(from, buf, n) != n)
1640 goto err;
1641 if (write(to, buf, n) != n)
1642 goto err;
1643 written += n;
1644 }
1645 free(buf);
1646 return 0;
1647err:
1648 free(buf);
1649 return 1;
1650}
1651
a322f70c
DW
1652static void detail_super_ddf(struct supertype *st, char *homehost)
1653{
1654 /* FIXME later
1655 * Could print DDF GUID
1656 * Need to find which array
1657 * If whole, briefly list all arrays
1658 * If one, give name
1659 */
1660}
1661
1662static void brief_detail_super_ddf(struct supertype *st)
1663{
ff54de6e
N
1664 struct mdinfo info;
1665 char nbuf[64];
bedbf68a 1666 struct ddf_super *ddf = st->sb;
1667 unsigned int vcnum = get_vd_num_of_subarray(st);
1668 if (vcnum == DDF_CONTAINER)
1669 uuid_from_super_ddf(st, info.uuid);
1670 else if (vcnum == DDF_NOTFOUND)
1671 return;
1672 else
1673 uuid_from_ddf_guid(ddf->virt->entries[vcnum].guid, info.uuid);
ff54de6e
N
1674 fname_from_uuid(st, &info, nbuf,':');
1675 printf(" UUID=%s", nbuf + 5);
a322f70c 1676}
a322f70c
DW
1677#endif
1678
1679static int match_home_ddf(struct supertype *st, char *homehost)
1680{
1681 /* It matches 'this' host if the controller is a
1682 * Linux-MD controller with vendor_data matching
1683 * the hostname
1684 */
1685 struct ddf_super *ddf = st->sb;
f21e18ca 1686 unsigned int len;
d1d3482b
N
1687
1688 if (!homehost)
1689 return 0;
1690 len = strlen(homehost);
a322f70c
DW
1691
1692 return (memcmp(ddf->controller.guid, T10, 8) == 0 &&
1693 len < sizeof(ddf->controller.vendor_data) &&
1694 memcmp(ddf->controller.vendor_data, homehost,len) == 0 &&
1695 ddf->controller.vendor_data[len] == 0);
1696}
1697
0e600426 1698#ifndef MDASSEMBLE
baba3f4e 1699static int find_index_in_bvd(const struct ddf_super *ddf,
1700 const struct vd_config *conf, unsigned int n,
1701 unsigned int *n_bvd)
1702{
1703 /*
1704 * Find the index of the n-th valid physical disk in this BVD
1705 */
1706 unsigned int i, j;
1707 for (i = 0, j = 0; i < ddf->mppe &&
1708 j < __be16_to_cpu(conf->prim_elmnt_count); i++) {
60931cf9 1709 if (be32_to_cpu(conf->phys_refnum[i]) != 0xffffffff) {
baba3f4e 1710 if (n == j) {
1711 *n_bvd = i;
1712 return 1;
1713 }
1714 j++;
1715 }
1716 }
1717 dprintf("%s: couldn't find BVD member %u (total %u)\n",
1718 __func__, n, __be16_to_cpu(conf->prim_elmnt_count));
1719 return 0;
1720}
1721
1722static struct vd_config *find_vdcr(struct ddf_super *ddf, unsigned int inst,
1723 unsigned int n,
1724 unsigned int *n_bvd, struct vcl **vcl)
a322f70c 1725{
7a7cc504 1726 struct vcl *v;
59e36268 1727
baba3f4e 1728 for (v = ddf->conflist; v; v = v->next) {
1729 unsigned int nsec, ibvd;
1730 struct vd_config *conf;
1731 if (inst != v->vcnum)
1732 continue;
1733 conf = &v->conf;
1734 if (conf->sec_elmnt_count == 1) {
1735 if (find_index_in_bvd(ddf, conf, n, n_bvd)) {
1736 *vcl = v;
1737 return conf;
1738 } else
1739 goto bad;
1740 }
1741 if (v->other_bvds == NULL) {
1742 pr_err("%s: BUG: other_bvds is NULL, nsec=%u\n",
1743 __func__, conf->sec_elmnt_count);
1744 goto bad;
1745 }
1746 nsec = n / __be16_to_cpu(conf->prim_elmnt_count);
1747 if (conf->sec_elmnt_seq != nsec) {
1748 for (ibvd = 1; ibvd < conf->sec_elmnt_count; ibvd++) {
baba3f4e 1749 if (v->other_bvds[ibvd-1]->sec_elmnt_seq
1750 == nsec)
1751 break;
1752 }
1753 if (ibvd == conf->sec_elmnt_count)
1754 goto bad;
1755 conf = v->other_bvds[ibvd-1];
1756 }
1757 if (!find_index_in_bvd(ddf, conf,
1758 n - nsec*conf->sec_elmnt_count, n_bvd))
1759 goto bad;
1760 dprintf("%s: found disk %u as member %u in bvd %d of array %u\n"
1761 , __func__, n, *n_bvd, ibvd-1, inst);
1762 *vcl = v;
1763 return conf;
1764 }
1765bad:
1766 pr_err("%s: Could't find disk %d in array %u\n", __func__, n, inst);
7a7cc504
NB
1767 return NULL;
1768}
0e600426 1769#endif
7a7cc504 1770
60931cf9 1771static int find_phys(const struct ddf_super *ddf, be32 phys_refnum)
7a7cc504
NB
1772{
1773 /* Find the entry in phys_disk which has the given refnum
1774 * and return it's index
1775 */
f21e18ca
N
1776 unsigned int i;
1777 for (i = 0; i < __be16_to_cpu(ddf->phys->max_pdes); i++)
60931cf9 1778 if (be32_eq(ddf->phys->entries[i].refnum, phys_refnum))
7a7cc504
NB
1779 return i;
1780 return -1;
a322f70c
DW
1781}
1782
bedbf68a 1783static void uuid_from_ddf_guid(const char *guid, int uuid[4])
1784{
1785 char buf[20];
1786 struct sha1_ctx ctx;
1787 sha1_init_ctx(&ctx);
1788 sha1_process_bytes(guid, DDF_GUID_LEN, &ctx);
1789 sha1_finish_ctx(&ctx, buf);
1790 memcpy(uuid, buf, 4*4);
1791}
1792
a322f70c
DW
1793static void uuid_from_super_ddf(struct supertype *st, int uuid[4])
1794{
1795 /* The uuid returned here is used for:
1796 * uuid to put into bitmap file (Create, Grow)
1797 * uuid for backup header when saving critical section (Grow)
1798 * comparing uuids when re-adding a device into an array
51006d85
N
1799 * In these cases the uuid required is that of the data-array,
1800 * not the device-set.
1801 * uuid to recognise same set when adding a missing device back
1802 * to an array. This is a uuid for the device-set.
613b0d17 1803 *
a322f70c
DW
1804 * For each of these we can make do with a truncated
1805 * or hashed uuid rather than the original, as long as
1806 * everyone agrees.
a322f70c
DW
1807 * In the case of SVD we assume the BVD is of interest,
1808 * though that might be the case if a bitmap were made for
1809 * a mirrored SVD - worry about that later.
1810 * So we need to find the VD configuration record for the
1811 * relevant BVD and extract the GUID and Secondary_Element_Seq.
1812 * The first 16 bytes of the sha1 of these is used.
1813 */
1814 struct ddf_super *ddf = st->sb;
d2ca6449 1815 struct vcl *vcl = ddf->currentconf;
c5afc314 1816 char *guid;
a322f70c 1817
c5afc314
N
1818 if (vcl)
1819 guid = vcl->conf.guid;
1820 else
1821 guid = ddf->anchor.guid;
bedbf68a 1822 uuid_from_ddf_guid(guid, uuid);
a322f70c
DW
1823}
1824
a5d85af7 1825static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info, char *map);
78e44928 1826
a5d85af7 1827static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info, char *map)
a322f70c
DW
1828{
1829 struct ddf_super *ddf = st->sb;
a5d85af7 1830 int map_disks = info->array.raid_disks;
90fa1a29 1831 __u32 *cptr;
a322f70c 1832
78e44928 1833 if (ddf->currentconf) {
a5d85af7 1834 getinfo_super_ddf_bvd(st, info, map);
78e44928
NB
1835 return;
1836 }
95eeceeb 1837 memset(info, 0, sizeof(*info));
78e44928 1838
a322f70c
DW
1839 info->array.raid_disks = __be16_to_cpu(ddf->phys->used_pdes);
1840 info->array.level = LEVEL_CONTAINER;
1841 info->array.layout = 0;
1842 info->array.md_minor = -1;
90fa1a29
JS
1843 cptr = (__u32 *)(ddf->anchor.guid + 16);
1844 info->array.ctime = DECADE + __be32_to_cpu(*cptr);
1845
a322f70c
DW
1846 info->array.utime = 0;
1847 info->array.chunk_size = 0;
510242aa 1848 info->container_enough = 1;
a322f70c 1849
a322f70c
DW
1850 info->disk.major = 0;
1851 info->disk.minor = 0;
cba0191b 1852 if (ddf->dlist) {
60931cf9 1853 info->disk.number = be32_to_cpu(ddf->dlist->disk.refnum);
59e36268 1854 info->disk.raid_disk = find_phys(ddf, ddf->dlist->disk.refnum);
d2ca6449
NB
1855
1856 info->data_offset = __be64_to_cpu(ddf->phys->
613b0d17
N
1857 entries[info->disk.raid_disk].
1858 config_size);
d2ca6449 1859 info->component_size = ddf->dlist->size - info->data_offset;
cba0191b
NB
1860 } else {
1861 info->disk.number = -1;
661dce36 1862 info->disk.raid_disk = -1;
cba0191b
NB
1863// info->disk.raid_disk = find refnum in the table and use index;
1864 }
f22385f9 1865 info->disk.state = (1 << MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE);
a19c88b8 1866
921d9e16 1867 info->recovery_start = MaxSector;
a19c88b8 1868 info->reshape_active = 0;
6e75048b 1869 info->recovery_blocked = 0;
c5afc314 1870 info->name[0] = 0;
a322f70c 1871
f35f2525
N
1872 info->array.major_version = -1;
1873 info->array.minor_version = -2;
159c3a1a 1874 strcpy(info->text_version, "ddf");
a67dd8cc 1875 info->safe_mode_delay = 0;
159c3a1a 1876
c5afc314 1877 uuid_from_super_ddf(st, info->uuid);
a322f70c 1878
a5d85af7
N
1879 if (map) {
1880 int i;
1881 for (i = 0 ; i < map_disks; i++) {
1882 if (i < info->array.raid_disks &&
1883 (__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Online) &&
1884 !(__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Failed))
1885 map[i] = 1;
1886 else
1887 map[i] = 0;
1888 }
1889 }
a322f70c
DW
1890}
1891
a5d85af7 1892static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info, char *map)
a322f70c
DW
1893{
1894 struct ddf_super *ddf = st->sb;
d2ca6449
NB
1895 struct vcl *vc = ddf->currentconf;
1896 int cd = ddf->currentdev;
ddf94a43 1897 int n_prim;
db42fa9b 1898 int j;
8592f29d 1899 struct dl *dl;
a5d85af7 1900 int map_disks = info->array.raid_disks;
90fa1a29 1901 __u32 *cptr;
ddf94a43 1902 struct vd_config *conf;
a322f70c 1903
95eeceeb 1904 memset(info, 0, sizeof(*info));
8a2848a7 1905 if (layout_ddf2md(&vc->conf, &info->array) == -1)
1906 return;
a322f70c 1907 info->array.md_minor = -1;
90fa1a29
JS
1908 cptr = (__u32 *)(vc->conf.guid + 16);
1909 info->array.ctime = DECADE + __be32_to_cpu(*cptr);
60931cf9 1910 info->array.utime = DECADE + be32_to_cpu(vc->conf.timestamp);
d2ca6449 1911 info->array.chunk_size = 512 << vc->conf.chunk_shift;
da9b4a62 1912 info->custom_array_size = 0;
d2ca6449 1913
ddf94a43 1914 conf = &vc->conf;
1915 n_prim = __be16_to_cpu(conf->prim_elmnt_count);
1916 if (conf->sec_elmnt_count > 1 && cd >= n_prim) {
1917 int ibvd = cd / n_prim - 1;
1918 cd %= n_prim;
1919 conf = vc->other_bvds[ibvd];
1920 }
1921
f21e18ca 1922 if (cd >= 0 && (unsigned)cd < ddf->mppe) {
57a66662 1923 info->data_offset =
fbf0c2a7 1924 __be64_to_cpu(LBA_OFFSET(ddf, conf)[cd]);
d2ca6449
NB
1925 if (vc->block_sizes)
1926 info->component_size = vc->block_sizes[cd];
1927 else
fbf0c2a7 1928 info->component_size = __be64_to_cpu(conf->blocks);
d2ca6449 1929 }
a322f70c 1930
fb204fb2 1931 for (dl = ddf->dlist; dl ; dl = dl->next)
60931cf9 1932 if (be32_eq(dl->disk.refnum, conf->phys_refnum[cd]))
fb204fb2
N
1933 break;
1934
a322f70c
DW
1935 info->disk.major = 0;
1936 info->disk.minor = 0;
fb204fb2 1937 info->disk.state = 0;
8592f29d
N
1938 if (dl) {
1939 info->disk.major = dl->major;
1940 info->disk.minor = dl->minor;
7c3fb3ec 1941 info->disk.raid_disk = cd + conf->sec_elmnt_seq
1942 * __be16_to_cpu(conf->prim_elmnt_count);
fb204fb2
N
1943 info->disk.number = dl->pdnum;
1944 info->disk.state = (1<<MD_DISK_SYNC)|(1<<MD_DISK_ACTIVE);
8592f29d 1945 }
a322f70c 1946
103f2410
NB
1947 info->container_member = ddf->currentconf->vcnum;
1948
921d9e16 1949 info->recovery_start = MaxSector;
80d26cb2 1950 info->resync_start = 0;
624c5ad4 1951 info->reshape_active = 0;
6e75048b 1952 info->recovery_blocked = 0;
80d26cb2
NB
1953 if (!(ddf->virt->entries[info->container_member].state
1954 & DDF_state_inconsistent) &&
1955 (ddf->virt->entries[info->container_member].init_state
1956 & DDF_initstate_mask)
1957 == DDF_init_full)
b7528a20 1958 info->resync_start = MaxSector;
80d26cb2 1959
a322f70c
DW
1960 uuid_from_super_ddf(st, info->uuid);
1961
f35f2525
N
1962 info->array.major_version = -1;
1963 info->array.minor_version = -2;
9b63e648 1964 sprintf(info->text_version, "/%s/%d",
4dd2df09 1965 st->container_devnm,
9b63e648 1966 info->container_member);
a67dd8cc 1967 info->safe_mode_delay = 200;
159c3a1a 1968
db42fa9b
N
1969 memcpy(info->name, ddf->virt->entries[info->container_member].name, 16);
1970 info->name[16]=0;
1971 for(j=0; j<16; j++)
1972 if (info->name[j] == ' ')
1973 info->name[j] = 0;
a5d85af7
N
1974
1975 if (map)
1976 for (j = 0; j < map_disks; j++) {
1977 map[j] = 0;
1978 if (j < info->array.raid_disks) {
1979 int i = find_phys(ddf, vc->conf.phys_refnum[j]);
613b0d17 1980 if (i >= 0 &&
a5d85af7
N
1981 (__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Online) &&
1982 !(__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Failed))
1983 map[i] = 1;
1984 }
1985 }
a322f70c
DW
1986}
1987
1988static int update_super_ddf(struct supertype *st, struct mdinfo *info,
1989 char *update,
1990 char *devname, int verbose,
1991 int uuid_set, char *homehost)
1992{
1993 /* For 'assemble' and 'force' we need to return non-zero if any
1994 * change was made. For others, the return value is ignored.
1995 * Update options are:
1996 * force-one : This device looks a bit old but needs to be included,
1997 * update age info appropriately.
1998 * assemble: clear any 'faulty' flag to allow this device to
1999 * be assembled.
2000 * force-array: Array is degraded but being forced, mark it clean
2001 * if that will be needed to assemble it.
2002 *
2003 * newdev: not used ????
2004 * grow: Array has gained a new device - this is currently for
2005 * linear only
2006 * resync: mark as dirty so a resync will happen.
59e36268 2007 * uuid: Change the uuid of the array to match what is given
a322f70c
DW
2008 * homehost: update the recorded homehost
2009 * name: update the name - preserving the homehost
2010 * _reshape_progress: record new reshape_progress position.
2011 *
2012 * Following are not relevant for this version:
2013 * sparc2.2 : update from old dodgey metadata
2014 * super-minor: change the preferred_minor number
2015 * summaries: update redundant counters.
2016 */
2017 int rv = 0;
2018// struct ddf_super *ddf = st->sb;
7a7cc504 2019// struct vd_config *vd = find_vdcr(ddf, info->container_member);
a322f70c
DW
2020// struct virtual_entry *ve = find_ve(ddf);
2021
a322f70c
DW
2022 /* we don't need to handle "force-*" or "assemble" as
2023 * there is no need to 'trick' the kernel. We the metadata is
2024 * first updated to activate the array, all the implied modifications
2025 * will just happen.
2026 */
2027
2028 if (strcmp(update, "grow") == 0) {
2029 /* FIXME */
1e2b2765 2030 } else if (strcmp(update, "resync") == 0) {
a322f70c 2031// info->resync_checkpoint = 0;
1e2b2765 2032 } else if (strcmp(update, "homehost") == 0) {
a322f70c
DW
2033 /* homehost is stored in controller->vendor_data,
2034 * or it is when we are the vendor
2035 */
2036// if (info->vendor_is_local)
2037// strcpy(ddf->controller.vendor_data, homehost);
1e2b2765 2038 rv = -1;
f49208ec 2039 } else if (strcmp(update, "name") == 0) {
a322f70c
DW
2040 /* name is stored in virtual_entry->name */
2041// memset(ve->name, ' ', 16);
2042// strncpy(ve->name, info->name, 16);
1e2b2765 2043 rv = -1;
f49208ec 2044 } else if (strcmp(update, "_reshape_progress") == 0) {
a322f70c 2045 /* We don't support reshape yet */
f49208ec
N
2046 } else if (strcmp(update, "assemble") == 0 ) {
2047 /* Do nothing, just succeed */
2048 rv = 0;
1e2b2765
N
2049 } else
2050 rv = -1;
a322f70c
DW
2051
2052// update_all_csum(ddf);
2053
2054 return rv;
2055}
2056
5f8097be
NB
2057static void make_header_guid(char *guid)
2058{
60931cf9 2059 be32 stamp;
5f8097be
NB
2060 /* Create a DDF Header of Virtual Disk GUID */
2061
2062 /* 24 bytes of fiction required.
2063 * first 8 are a 'vendor-id' - "Linux-MD"
2064 * next 8 are controller type.. how about 0X DEAD BEEF 0000 0000
2065 * Remaining 8 random number plus timestamp
2066 */
2067 memcpy(guid, T10, sizeof(T10));
60931cf9 2068 stamp = cpu_to_be32(0xdeadbeef);
5f8097be 2069 memcpy(guid+8, &stamp, 4);
60931cf9 2070 stamp = cpu_to_be32(0);
5f8097be 2071 memcpy(guid+12, &stamp, 4);
60931cf9 2072 stamp = cpu_to_be32(time(0) - DECADE);
5f8097be 2073 memcpy(guid+16, &stamp, 4);
60931cf9 2074 stamp._v32 = random32();
5f8097be 2075 memcpy(guid+20, &stamp, 4);
5f8097be 2076}
59e36268 2077
fb9d0acb 2078static unsigned int find_unused_vde(const struct ddf_super *ddf)
2079{
2080 unsigned int i;
2081 for (i = 0; i < __be16_to_cpu(ddf->virt->max_vdes); i++) {
2082 if (all_ff(ddf->virt->entries[i].guid))
2083 return i;
2084 }
2085 return DDF_NOTFOUND;
2086}
2087
2088static unsigned int find_vde_by_name(const struct ddf_super *ddf,
2089 const char *name)
2090{
2091 unsigned int i;
2092 if (name == NULL)
2093 return DDF_NOTFOUND;
2094 for (i = 0; i < __be16_to_cpu(ddf->virt->max_vdes); i++) {
2095 if (all_ff(ddf->virt->entries[i].guid))
2096 continue;
2097 if (!strncmp(name, ddf->virt->entries[i].name,
2098 sizeof(ddf->virt->entries[i].name)))
2099 return i;
2100 }
2101 return DDF_NOTFOUND;
2102}
2103
2104static unsigned int find_vde_by_guid(const struct ddf_super *ddf,
2105 const char *guid)
2106{
2107 unsigned int i;
2108 if (guid == NULL || all_ff(guid))
2109 return DDF_NOTFOUND;
2110 for (i = 0; i < __be16_to_cpu(ddf->virt->max_vdes); i++)
2111 if (!memcmp(ddf->virt->entries[i].guid, guid, DDF_GUID_LEN))
2112 return i;
2113 return DDF_NOTFOUND;
2114}
2115
78e44928
NB
2116static int init_super_ddf_bvd(struct supertype *st,
2117 mdu_array_info_t *info,
2118 unsigned long long size,
2119 char *name, char *homehost,
83cd1e97 2120 int *uuid, unsigned long long data_offset);
78e44928 2121
a322f70c
DW
2122static int init_super_ddf(struct supertype *st,
2123 mdu_array_info_t *info,
2124 unsigned long long size, char *name, char *homehost,
83cd1e97 2125 int *uuid, unsigned long long data_offset)
a322f70c
DW
2126{
2127 /* This is primarily called by Create when creating a new array.
2128 * We will then get add_to_super called for each component, and then
2129 * write_init_super called to write it out to each device.
2130 * For DDF, Create can create on fresh devices or on a pre-existing
2131 * array.
2132 * To create on a pre-existing array a different method will be called.
2133 * This one is just for fresh drives.
2134 *
2135 * We need to create the entire 'ddf' structure which includes:
2136 * DDF headers - these are easy.
2137 * Controller data - a Sector describing this controller .. not that
2138 * this is a controller exactly.
2139 * Physical Disk Record - one entry per device, so
2140 * leave plenty of space.
2141 * Virtual Disk Records - again, just leave plenty of space.
2142 * This just lists VDs, doesn't give details
2143 * Config records - describes the VDs that use this disk
2144 * DiskData - describes 'this' device.
2145 * BadBlockManagement - empty
2146 * Diag Space - empty
2147 * Vendor Logs - Could we put bitmaps here?
2148 *
2149 */
2150 struct ddf_super *ddf;
2151 char hostname[17];
2152 int hostlen;
a322f70c
DW
2153 int max_phys_disks, max_virt_disks;
2154 unsigned long long sector;
2155 int clen;
2156 int i;
2157 int pdsize, vdsize;
2158 struct phys_disk *pd;
2159 struct virtual_disk *vd;
2160
83cd1e97 2161 if (data_offset != INVALID_SECTORS) {
ed503f89 2162 pr_err("data-offset not supported by DDF\n");
83cd1e97
N
2163 return 0;
2164 }
2165
78e44928 2166 if (st->sb)
83cd1e97
N
2167 return init_super_ddf_bvd(st, info, size, name, homehost, uuid,
2168 data_offset);
ba7eb04f 2169
3d2c4fc7 2170 if (posix_memalign((void**)&ddf, 512, sizeof(*ddf)) != 0) {
e7b84f9d 2171 pr_err("%s could not allocate superblock\n", __func__);
3d2c4fc7
DW
2172 return 0;
2173 }
6264b437 2174 memset(ddf, 0, sizeof(*ddf));
a322f70c
DW
2175 ddf->dlist = NULL; /* no physical disks yet */
2176 ddf->conflist = NULL; /* No virtual disks yet */
955e9ea1
DW
2177 st->sb = ddf;
2178
2179 if (info == NULL) {
2180 /* zeroing superblock */
2181 return 0;
2182 }
a322f70c
DW
2183
2184 /* At least 32MB *must* be reserved for the ddf. So let's just
2185 * start 32MB from the end, and put the primary header there.
2186 * Don't do secondary for now.
2187 * We don't know exactly where that will be yet as it could be
2188 * different on each device. To just set up the lengths.
2189 *
2190 */
2191
2192 ddf->anchor.magic = DDF_HEADER_MAGIC;
5f8097be 2193 make_header_guid(ddf->anchor.guid);
a322f70c 2194
59e36268 2195 memcpy(ddf->anchor.revision, DDF_REVISION_2, 8);
60931cf9 2196 ddf->anchor.seq = cpu_to_be32(1);
2197 ddf->anchor.timestamp = cpu_to_be32(time(0) - DECADE);
a322f70c
DW
2198 ddf->anchor.openflag = 0xFF;
2199 ddf->anchor.foreignflag = 0;
2200 ddf->anchor.enforcegroups = 0; /* Is this best?? */
2201 ddf->anchor.pad0 = 0xff;
2202 memset(ddf->anchor.pad1, 0xff, 12);
2203 memset(ddf->anchor.header_ext, 0xff, 32);
2204 ddf->anchor.primary_lba = ~(__u64)0;
2205 ddf->anchor.secondary_lba = ~(__u64)0;
2206 ddf->anchor.type = DDF_HEADER_ANCHOR;
2207 memset(ddf->anchor.pad2, 0xff, 3);
60931cf9 2208 ddf->anchor.workspace_len = cpu_to_be32(32768); /* Must be reserved */
a322f70c
DW
2209 ddf->anchor.workspace_lba = ~(__u64)0; /* Put this at bottom
2210 of 32M reserved.. */
2211 max_phys_disks = 1023; /* Should be enough */
2212 ddf->anchor.max_pd_entries = __cpu_to_be16(max_phys_disks);
2213 max_virt_disks = 255;
2214 ddf->anchor.max_vd_entries = __cpu_to_be16(max_virt_disks); /* ?? */
2215 ddf->anchor.max_partitions = __cpu_to_be16(64); /* ?? */
2216 ddf->max_part = 64;
8c3b8c2c 2217 ddf->mppe = 256;
59e36268
NB
2218 ddf->conf_rec_len = 1 + ROUND_UP(ddf->mppe * (4+8), 512)/512;
2219 ddf->anchor.config_record_len = __cpu_to_be16(ddf->conf_rec_len);
2220 ddf->anchor.max_primary_element_entries = __cpu_to_be16(ddf->mppe);
a322f70c 2221 memset(ddf->anchor.pad3, 0xff, 54);
a322f70c
DW
2222 /* controller sections is one sector long immediately
2223 * after the ddf header */
2224 sector = 1;
60931cf9 2225 ddf->anchor.controller_section_offset = cpu_to_be32(sector);
2226 ddf->anchor.controller_section_length = cpu_to_be32(1);
a322f70c
DW
2227 sector += 1;
2228
2229 /* phys is 8 sectors after that */
2230 pdsize = ROUND_UP(sizeof(struct phys_disk) +
2231 sizeof(struct phys_disk_entry)*max_phys_disks,
2232 512);
2233 switch(pdsize/512) {
2234 case 2: case 8: case 32: case 128: case 512: break;
2235 default: abort();
2236 }
60931cf9 2237 ddf->anchor.phys_section_offset = cpu_to_be32(sector);
a322f70c 2238 ddf->anchor.phys_section_length =
60931cf9 2239 cpu_to_be32(pdsize/512); /* max_primary_element_entries/8 */
a322f70c
DW
2240 sector += pdsize/512;
2241
2242 /* virt is another 32 sectors */
2243 vdsize = ROUND_UP(sizeof(struct virtual_disk) +
2244 sizeof(struct virtual_entry) * max_virt_disks,
2245 512);
2246 switch(vdsize/512) {
2247 case 2: case 8: case 32: case 128: case 512: break;
2248 default: abort();
2249 }
60931cf9 2250 ddf->anchor.virt_section_offset = cpu_to_be32(sector);
a322f70c 2251 ddf->anchor.virt_section_length =
60931cf9 2252 cpu_to_be32(vdsize/512); /* max_vd_entries/8 */
a322f70c
DW
2253 sector += vdsize/512;
2254
59e36268 2255 clen = ddf->conf_rec_len * (ddf->max_part+1);
60931cf9 2256 ddf->anchor.config_section_offset = cpu_to_be32(sector);
2257 ddf->anchor.config_section_length = cpu_to_be32(clen);
a322f70c
DW
2258 sector += clen;
2259
60931cf9 2260 ddf->anchor.data_section_offset = cpu_to_be32(sector);
2261 ddf->anchor.data_section_length = cpu_to_be32(1);
a322f70c
DW
2262 sector += 1;
2263
60931cf9 2264 ddf->anchor.bbm_section_length = cpu_to_be32(0);
2265 ddf->anchor.bbm_section_offset = cpu_to_be32(0xFFFFFFFF);
2266 ddf->anchor.diag_space_length = cpu_to_be32(0);
2267 ddf->anchor.diag_space_offset = cpu_to_be32(0xFFFFFFFF);
2268 ddf->anchor.vendor_length = cpu_to_be32(0);
2269 ddf->anchor.vendor_offset = cpu_to_be32(0xFFFFFFFF);
a322f70c
DW
2270
2271 memset(ddf->anchor.pad4, 0xff, 256);
2272
2273 memcpy(&ddf->primary, &ddf->anchor, 512);
2274 memcpy(&ddf->secondary, &ddf->anchor, 512);
2275
2276 ddf->primary.openflag = 1; /* I guess.. */
2277 ddf->primary.type = DDF_HEADER_PRIMARY;
2278
2279 ddf->secondary.openflag = 1; /* I guess.. */
2280 ddf->secondary.type = DDF_HEADER_SECONDARY;
2281
2282 ddf->active = &ddf->primary;
2283
2284 ddf->controller.magic = DDF_CONTROLLER_MAGIC;
2285
2286 /* 24 more bytes of fiction required.
2287 * first 8 are a 'vendor-id' - "Linux-MD"
2288 * Remaining 16 are serial number.... maybe a hostname would do?
2289 */
2290 memcpy(ddf->controller.guid, T10, sizeof(T10));
1ba6bff9
DW
2291 gethostname(hostname, sizeof(hostname));
2292 hostname[sizeof(hostname) - 1] = 0;
a322f70c
DW
2293 hostlen = strlen(hostname);
2294 memcpy(ddf->controller.guid + 24 - hostlen, hostname, hostlen);
2295 for (i = strlen(T10) ; i+hostlen < 24; i++)
2296 ddf->controller.guid[i] = ' ';
2297
2298 ddf->controller.type.vendor_id = __cpu_to_be16(0xDEAD);
2299 ddf->controller.type.device_id = __cpu_to_be16(0xBEEF);
2300 ddf->controller.type.sub_vendor_id = 0;
2301 ddf->controller.type.sub_device_id = 0;
2302 memcpy(ddf->controller.product_id, "What Is My PID??", 16);
2303 memset(ddf->controller.pad, 0xff, 8);
2304 memset(ddf->controller.vendor_data, 0xff, 448);
a9e1c11d
N
2305 if (homehost && strlen(homehost) < 440)
2306 strcpy((char*)ddf->controller.vendor_data, homehost);
a322f70c 2307
3d2c4fc7 2308 if (posix_memalign((void**)&pd, 512, pdsize) != 0) {
e7b84f9d 2309 pr_err("%s could not allocate pd\n", __func__);
3d2c4fc7
DW
2310 return 0;
2311 }
6416d527 2312 ddf->phys = pd;
a322f70c
DW
2313 ddf->pdsize = pdsize;
2314
2315 memset(pd, 0xff, pdsize);
2316 memset(pd, 0, sizeof(*pd));
076515ba 2317 pd->magic = DDF_PHYS_RECORDS_MAGIC;
a322f70c
DW
2318 pd->used_pdes = __cpu_to_be16(0);
2319 pd->max_pdes = __cpu_to_be16(max_phys_disks);
2320 memset(pd->pad, 0xff, 52);
4a3ca8ac 2321 for (i = 0; i < max_phys_disks; i++)
2322 memset(pd->entries[i].guid, 0xff, DDF_GUID_LEN);
a322f70c 2323
3d2c4fc7 2324 if (posix_memalign((void**)&vd, 512, vdsize) != 0) {
e7b84f9d 2325 pr_err("%s could not allocate vd\n", __func__);
3d2c4fc7
DW
2326 return 0;
2327 }
6416d527 2328 ddf->virt = vd;
a322f70c
DW
2329 ddf->vdsize = vdsize;
2330 memset(vd, 0, vdsize);
2331 vd->magic = DDF_VIRT_RECORDS_MAGIC;
2332 vd->populated_vdes = __cpu_to_be16(0);
2333 vd->max_vdes = __cpu_to_be16(max_virt_disks);
2334 memset(vd->pad, 0xff, 52);
2335
5f8097be
NB
2336 for (i=0; i<max_virt_disks; i++)
2337 memset(&vd->entries[i], 0xff, sizeof(struct virtual_entry));
2338
a322f70c 2339 st->sb = ddf;
7d5a7ff3 2340 ddf_set_updates_pending(ddf);
a322f70c
DW
2341 return 1;
2342}
2343
5f8097be
NB
2344static int chunk_to_shift(int chunksize)
2345{
2346 return ffs(chunksize/512)-1;
2347}
2348
0e600426 2349#ifndef MDASSEMBLE
59e36268
NB
2350struct extent {
2351 unsigned long long start, size;
2352};
78e44928 2353static int cmp_extent(const void *av, const void *bv)
59e36268
NB
2354{
2355 const struct extent *a = av;
2356 const struct extent *b = bv;
2357 if (a->start < b->start)
2358 return -1;
2359 if (a->start > b->start)
2360 return 1;
2361 return 0;
2362}
2363
78e44928 2364static struct extent *get_extents(struct ddf_super *ddf, struct dl *dl)
59e36268
NB
2365{
2366 /* find a list of used extents on the give physical device
2367 * (dnum) of the given ddf.
2368 * Return a malloced array of 'struct extent'
2369
613b0d17 2370 * FIXME ignore DDF_Legacy devices?
59e36268
NB
2371
2372 */
2373 struct extent *rv;
2374 int n = 0;
fcc22180 2375 unsigned int i;
59e36268 2376
503975b9 2377 rv = xmalloc(sizeof(struct extent) * (ddf->max_part + 2));
59e36268
NB
2378
2379 for (i = 0; i < ddf->max_part; i++) {
fcc22180 2380 const struct vd_config *bvd;
2381 unsigned int ibvd;
59e36268 2382 struct vcl *v = dl->vlist[i];
fcc22180 2383 if (v == NULL ||
2384 get_pd_index_from_refnum(v, dl->disk.refnum, ddf->mppe,
2385 &bvd, &ibvd) == DDF_NOTFOUND)
59e36268 2386 continue;
fcc22180 2387 rv[n].start = __be64_to_cpu(LBA_OFFSET(ddf, bvd)[ibvd]);
2388 rv[n].size = __be64_to_cpu(bvd->blocks);
2389 n++;
59e36268
NB
2390 }
2391 qsort(rv, n, sizeof(*rv), cmp_extent);
2392
2393 rv[n].start = __be64_to_cpu(ddf->phys->entries[dl->pdnum].config_size);
2394 rv[n].size = 0;
2395 return rv;
2396}
0e600426 2397#endif
59e36268 2398
5f8097be
NB
2399static int init_super_ddf_bvd(struct supertype *st,
2400 mdu_array_info_t *info,
2401 unsigned long long size,
2402 char *name, char *homehost,
83cd1e97 2403 int *uuid, unsigned long long data_offset)
5f8097be
NB
2404{
2405 /* We are creating a BVD inside a pre-existing container.
2406 * so st->sb is already set.
2407 * We need to create a new vd_config and a new virtual_entry
2408 */
2409 struct ddf_super *ddf = st->sb;
5aaf6c7b 2410 unsigned int venum, i;
5f8097be
NB
2411 struct virtual_entry *ve;
2412 struct vcl *vcl;
2413 struct vd_config *vc;
5f8097be 2414
fb9d0acb 2415 if (find_vde_by_name(ddf, name) != DDF_NOTFOUND) {
2416 pr_err("This ddf already has an array called %s\n", name);
5f8097be
NB
2417 return 0;
2418 }
fb9d0acb 2419 venum = find_unused_vde(ddf);
2420 if (venum == DDF_NOTFOUND) {
2421 pr_err("Cannot find spare slot for virtual disk\n");
5f8097be
NB
2422 return 0;
2423 }
2424 ve = &ddf->virt->entries[venum];
2425
2426 /* A Virtual Disk GUID contains the T10 Vendor ID, controller type,
2427 * timestamp, random number
2428 */
2429 make_header_guid(ve->guid);
2430 ve->unit = __cpu_to_be16(info->md_minor);
2431 ve->pad0 = 0xFFFF;
2432 ve->guid_crc = crc32(0, (unsigned char*)ddf->anchor.guid, DDF_GUID_LEN);
2433 ve->type = 0;
7a7cc504
NB
2434 ve->state = DDF_state_degraded; /* Will be modified as devices are added */
2435 if (info->state & 1) /* clean */
2436 ve->init_state = DDF_init_full;
2437 else
2438 ve->init_state = DDF_init_not;
2439
5f8097be
NB
2440 memset(ve->pad1, 0xff, 14);
2441 memset(ve->name, ' ', 16);
2442 if (name)
2443 strncpy(ve->name, name, 16);
2444 ddf->virt->populated_vdes =
2445 __cpu_to_be16(__be16_to_cpu(ddf->virt->populated_vdes)+1);
2446
2447 /* Now create a new vd_config */
3d2c4fc7
DW
2448 if (posix_memalign((void**)&vcl, 512,
2449 (offsetof(struct vcl, conf) + ddf->conf_rec_len * 512)) != 0) {
e7b84f9d 2450 pr_err("%s could not allocate vd_config\n", __func__);
3d2c4fc7
DW
2451 return 0;
2452 }
59e36268
NB
2453 vcl->vcnum = venum;
2454 vcl->block_sizes = NULL; /* FIXME not for CONCAT */
5f8097be
NB
2455 vc = &vcl->conf;
2456
2457 vc->magic = DDF_VD_CONF_MAGIC;
2458 memcpy(vc->guid, ve->guid, DDF_GUID_LEN);
60931cf9 2459 vc->timestamp = cpu_to_be32(time(0)-DECADE);
2460 vc->seqnum = cpu_to_be32(1);
5f8097be 2461 memset(vc->pad0, 0xff, 24);
5f8097be 2462 vc->chunk_shift = chunk_to_shift(info->chunk_size);
a3163bf0 2463 if (layout_md2ddf(info, vc) == -1 ||
2464 __be16_to_cpu(vc->prim_elmnt_count) > ddf->mppe) {
2465 pr_err("%s: unsupported RAID level/layout %d/%d with %d disks\n",
2466 __func__, info->level, info->layout, info->raid_disks);
2467 free(vcl);
2468 return 0;
2469 }
5f8097be 2470 vc->sec_elmnt_seq = 0;
3c48f7be 2471 if (alloc_other_bvds(ddf, vcl) != 0) {
2472 pr_err("%s could not allocate other bvds\n",
2473 __func__);
2474 free(vcl);
2475 return 0;
2476 }
5f8097be
NB
2477 vc->blocks = __cpu_to_be64(info->size * 2);
2478 vc->array_blocks = __cpu_to_be64(
2479 calc_array_size(info->level, info->raid_disks, info->layout,
2480 info->chunk_size, info->size*2));
2481 memset(vc->pad1, 0xff, 8);
60931cf9 2482 vc->spare_refs[0] = cpu_to_be32(0xffffffff);
2483 vc->spare_refs[1] = cpu_to_be32(0xffffffff);
2484 vc->spare_refs[2] = cpu_to_be32(0xffffffff);
2485 vc->spare_refs[3] = cpu_to_be32(0xffffffff);
2486 vc->spare_refs[4] = cpu_to_be32(0xffffffff);
2487 vc->spare_refs[5] = cpu_to_be32(0xffffffff);
2488 vc->spare_refs[6] = cpu_to_be32(0xffffffff);
2489 vc->spare_refs[7] = cpu_to_be32(0xffffffff);
5f8097be
NB
2490 memset(vc->cache_pol, 0, 8);
2491 vc->bg_rate = 0x80;
2492 memset(vc->pad2, 0xff, 3);
2493 memset(vc->pad3, 0xff, 52);
2494 memset(vc->pad4, 0xff, 192);
2495 memset(vc->v0, 0xff, 32);
2496 memset(vc->v1, 0xff, 32);
2497 memset(vc->v2, 0xff, 16);
2498 memset(vc->v3, 0xff, 16);
2499 memset(vc->vendor, 0xff, 32);
598f0d58 2500
8c3b8c2c 2501 memset(vc->phys_refnum, 0xff, 4*ddf->mppe);
e5a2a3cf 2502 memset(vc->phys_refnum+ddf->mppe, 0x00, 8*ddf->mppe);
5f8097be 2503
5aaf6c7b 2504 for (i = 1; i < vc->sec_elmnt_count; i++) {
2505 memcpy(vcl->other_bvds[i-1], vc, ddf->conf_rec_len * 512);
2506 vcl->other_bvds[i-1]->sec_elmnt_seq = i;
2507 }
2508
5f8097be
NB
2509 vcl->next = ddf->conflist;
2510 ddf->conflist = vcl;
d2ca6449 2511 ddf->currentconf = vcl;
7d5a7ff3 2512 ddf_set_updates_pending(ddf);
5f8097be
NB
2513 return 1;
2514}
2515
63eb2454 2516static int get_svd_state(const struct ddf_super *, const struct vcl *);
2517
0e600426 2518#ifndef MDASSEMBLE
5f8097be
NB
2519static void add_to_super_ddf_bvd(struct supertype *st,
2520 mdu_disk_info_t *dk, int fd, char *devname)
2521{
2522 /* fd and devname identify a device with-in the ddf container (st).
2523 * dk identifies a location in the new BVD.
2524 * We need to find suitable free space in that device and update
2525 * the phys_refnum and lba_offset for the newly created vd_config.
2526 * We might also want to update the type in the phys_disk
5575e7d9 2527 * section.
8592f29d
N
2528 *
2529 * Alternately: fd == -1 and we have already chosen which device to
2530 * use and recorded in dlist->raid_disk;
5f8097be
NB
2531 */
2532 struct dl *dl;
2533 struct ddf_super *ddf = st->sb;
2534 struct vd_config *vc;
f21e18ca 2535 unsigned int i;
59e36268
NB
2536 unsigned long long blocks, pos, esize;
2537 struct extent *ex;
475ccbdb 2538 unsigned int raid_disk = dk->raid_disk;
5f8097be 2539
8592f29d
N
2540 if (fd == -1) {
2541 for (dl = ddf->dlist; dl ; dl = dl->next)
2542 if (dl->raiddisk == dk->raid_disk)
2543 break;
2544 } else {
2545 for (dl = ddf->dlist; dl ; dl = dl->next)
2546 if (dl->major == dk->major &&
2547 dl->minor == dk->minor)
2548 break;
2549 }
5f8097be
NB
2550 if (!dl || ! (dk->state & (1<<MD_DISK_SYNC)))
2551 return;
2552
d2ca6449 2553 vc = &ddf->currentconf->conf;
475ccbdb 2554 if (vc->sec_elmnt_count > 1) {
2555 unsigned int n = __be16_to_cpu(vc->prim_elmnt_count);
2556 if (raid_disk >= n)
2557 vc = ddf->currentconf->other_bvds[raid_disk / n - 1];
2558 raid_disk %= n;
2559 }
59e36268
NB
2560
2561 ex = get_extents(ddf, dl);
2562 if (!ex)
2563 return;
2564
2565 i = 0; pos = 0;
2566 blocks = __be64_to_cpu(vc->blocks);
d2ca6449
NB
2567 if (ddf->currentconf->block_sizes)
2568 blocks = ddf->currentconf->block_sizes[dk->raid_disk];
59e36268
NB
2569
2570 do {
2571 esize = ex[i].start - pos;
2572 if (esize >= blocks)
2573 break;
2574 pos = ex[i].start + ex[i].size;
2575 i++;
2576 } while (ex[i-1].size);
2577
2578 free(ex);
2579 if (esize < blocks)
2580 return;
2581
d2ca6449 2582 ddf->currentdev = dk->raid_disk;
475ccbdb 2583 vc->phys_refnum[raid_disk] = dl->disk.refnum;
2584 LBA_OFFSET(ddf, vc)[raid_disk] = __cpu_to_be64(pos);
5f8097be 2585
f21e18ca 2586 for (i = 0; i < ddf->max_part ; i++)
5575e7d9
NB
2587 if (dl->vlist[i] == NULL)
2588 break;
2589 if (i == ddf->max_part)
2590 return;
d2ca6449 2591 dl->vlist[i] = ddf->currentconf;
5f8097be 2592
8592f29d
N
2593 if (fd >= 0)
2594 dl->fd = fd;
2595 if (devname)
2596 dl->devname = devname;
7a7cc504 2597
63eb2454 2598 /* Check if we can mark array as optimal yet */
d2ca6449 2599 i = ddf->currentconf->vcnum;
63eb2454 2600 ddf->virt->entries[i].state =
2601 (ddf->virt->entries[i].state & ~DDF_state_mask)
2602 | get_svd_state(ddf, ddf->currentconf);
5575e7d9
NB
2603 ddf->phys->entries[dl->pdnum].type &= ~__cpu_to_be16(DDF_Global_Spare);
2604 ddf->phys->entries[dl->pdnum].type |= __cpu_to_be16(DDF_Active_in_VD);
4f9bbe63 2605 dprintf("%s: added disk %d/%08x to VD %d/%s as disk %d\n",
60931cf9 2606 __func__, dl->pdnum, be32_to_cpu(dl->disk.refnum),
4f9bbe63 2607 ddf->currentconf->vcnum, guid_str(vc->guid),
2608 dk->raid_disk);
7d5a7ff3 2609 ddf_set_updates_pending(ddf);
5f8097be
NB
2610}
2611
4a3ca8ac 2612static unsigned int find_unused_pde(const struct ddf_super *ddf)
2613{
2614 unsigned int i;
2615 for (i = 0; i < __be16_to_cpu(ddf->phys->max_pdes); i++) {
2616 if (all_ff(ddf->phys->entries[i].guid))
2617 return i;
2618 }
2619 return DDF_NOTFOUND;
2620}
2621
a322f70c
DW
2622/* add a device to a container, either while creating it or while
2623 * expanding a pre-existing container
2624 */
f20c3968 2625static int add_to_super_ddf(struct supertype *st,
72ca9bcf
N
2626 mdu_disk_info_t *dk, int fd, char *devname,
2627 unsigned long long data_offset)
a322f70c
DW
2628{
2629 struct ddf_super *ddf = st->sb;
2630 struct dl *dd;
2631 time_t now;
2632 struct tm *tm;
2633 unsigned long long size;
2634 struct phys_disk_entry *pde;
f21e18ca 2635 unsigned int n, i;
a322f70c 2636 struct stat stb;
90fa1a29 2637 __u32 *tptr;
a322f70c 2638
78e44928
NB
2639 if (ddf->currentconf) {
2640 add_to_super_ddf_bvd(st, dk, fd, devname);
f20c3968 2641 return 0;
78e44928
NB
2642 }
2643
a322f70c
DW
2644 /* This is device numbered dk->number. We need to create
2645 * a phys_disk entry and a more detailed disk_data entry.
2646 */
2647 fstat(fd, &stb);
4a3ca8ac 2648 n = find_unused_pde(ddf);
2649 if (n == DDF_NOTFOUND) {
2650 pr_err("%s: No free slot in array, cannot add disk\n",
2651 __func__);
2652 return 1;
2653 }
2654 pde = &ddf->phys->entries[n];
4ee8cca9 2655 get_dev_size(fd, NULL, &size);
2656 if (size <= 32*1024*1024) {
2657 pr_err("%s: device size must be at least 32MB\n",
2658 __func__);
2659 return 1;
2660 }
2661 size >>= 9;
4a3ca8ac 2662
3d2c4fc7
DW
2663 if (posix_memalign((void**)&dd, 512,
2664 sizeof(*dd) + sizeof(dd->vlist[0]) * ddf->max_part) != 0) {
e7b84f9d
N
2665 pr_err("%s could allocate buffer for new disk, aborting\n",
2666 __func__);
f20c3968 2667 return 1;
3d2c4fc7 2668 }
a322f70c
DW
2669 dd->major = major(stb.st_rdev);
2670 dd->minor = minor(stb.st_rdev);
2671 dd->devname = devname;
a322f70c 2672 dd->fd = fd;
b2280677 2673 dd->spare = NULL;
a322f70c
DW
2674
2675 dd->disk.magic = DDF_PHYS_DATA_MAGIC;
2676 now = time(0);
2677 tm = localtime(&now);
2678 sprintf(dd->disk.guid, "%8s%04d%02d%02d",
2679 T10, tm->tm_year+1900, tm->tm_mon+1, tm->tm_mday);
90fa1a29
JS
2680 tptr = (__u32 *)(dd->disk.guid + 16);
2681 *tptr++ = random32();
2682 *tptr = random32();
a322f70c 2683
59e36268
NB
2684 do {
2685 /* Cannot be bothered finding a CRC of some irrelevant details*/
60931cf9 2686 dd->disk.refnum._v32 = random32();
f21e18ca
N
2687 for (i = __be16_to_cpu(ddf->active->max_pd_entries);
2688 i > 0; i--)
60931cf9 2689 if (be32_eq(ddf->phys->entries[i-1].refnum,
2690 dd->disk.refnum))
59e36268 2691 break;
f21e18ca 2692 } while (i > 0);
59e36268 2693
a322f70c
DW
2694 dd->disk.forced_ref = 1;
2695 dd->disk.forced_guid = 1;
2696 memset(dd->disk.vendor, ' ', 32);
2697 memcpy(dd->disk.vendor, "Linux", 5);
2698 memset(dd->disk.pad, 0xff, 442);
b2280677 2699 for (i = 0; i < ddf->max_part ; i++)
a322f70c
DW
2700 dd->vlist[i] = NULL;
2701
5575e7d9
NB
2702 dd->pdnum = n;
2703
2cc2983d
N
2704 if (st->update_tail) {
2705 int len = (sizeof(struct phys_disk) +
2706 sizeof(struct phys_disk_entry));
2707 struct phys_disk *pd;
2708
503975b9 2709 pd = xmalloc(len);
2cc2983d
N
2710 pd->magic = DDF_PHYS_RECORDS_MAGIC;
2711 pd->used_pdes = __cpu_to_be16(n);
2712 pde = &pd->entries[0];
2713 dd->mdupdate = pd;
4a3ca8ac 2714 } else
2715 ddf->phys->used_pdes = __cpu_to_be16(
2716 1 + __be16_to_cpu(ddf->phys->used_pdes));
a322f70c
DW
2717
2718 memcpy(pde->guid, dd->disk.guid, DDF_GUID_LEN);
2719 pde->refnum = dd->disk.refnum;
5575e7d9 2720 pde->type = __cpu_to_be16(DDF_Forced_PD_GUID | DDF_Global_Spare);
a322f70c 2721 pde->state = __cpu_to_be16(DDF_Online);
4ee8cca9 2722 dd->size = size;
2723 /*
2724 * If there is already a device in dlist, try to reserve the same
2725 * amount of workspace. Otherwise, use 32MB.
2726 * We checked disk size above already.
2727 */
2728#define __calc_lba(new, old, lba, mb) do { \
2729 unsigned long long dif; \
2730 if ((old) != NULL) \
2731 dif = (old)->size - __be64_to_cpu((old)->lba); \
2732 else \
2733 dif = (new)->size; \
2734 if ((new)->size > dif) \
2735 (new)->lba = __cpu_to_be64((new)->size - dif); \
2736 else \
2737 (new)->lba = __cpu_to_be64((new)->size - (mb*1024*2)); \
2738 } while (0)
2739 __calc_lba(dd, ddf->dlist, workspace_lba, 32);
2740 __calc_lba(dd, ddf->dlist, primary_lba, 16);
2741 __calc_lba(dd, ddf->dlist, secondary_lba, 32);
2742 pde->config_size = dd->workspace_lba;
2743
a322f70c
DW
2744 sprintf(pde->path, "%17.17s","Information: nil") ;
2745 memset(pde->pad, 0xff, 6);
2746
2cc2983d
N
2747 if (st->update_tail) {
2748 dd->next = ddf->add_list;
2749 ddf->add_list = dd;
2750 } else {
2751 dd->next = ddf->dlist;
2752 ddf->dlist = dd;
7d5a7ff3 2753 ddf_set_updates_pending(ddf);
2cc2983d 2754 }
f20c3968
DW
2755
2756 return 0;
a322f70c
DW
2757}
2758
4dd968cc
N
2759static int remove_from_super_ddf(struct supertype *st, mdu_disk_info_t *dk)
2760{
2761 struct ddf_super *ddf = st->sb;
2762 struct dl *dl;
2763
2764 /* mdmon has noticed that this disk (dk->major/dk->minor) has
2765 * disappeared from the container.
2766 * We need to arrange that it disappears from the metadata and
2767 * internal data structures too.
2768 * Most of the work is done by ddf_process_update which edits
2769 * the metadata and closes the file handle and attaches the memory
2770 * where free_updates will free it.
2771 */
2772 for (dl = ddf->dlist; dl ; dl = dl->next)
2773 if (dl->major == dk->major &&
2774 dl->minor == dk->minor)
2775 break;
2776 if (!dl)
2777 return -1;
2778
2779 if (st->update_tail) {
2780 int len = (sizeof(struct phys_disk) +
2781 sizeof(struct phys_disk_entry));
2782 struct phys_disk *pd;
2783
503975b9 2784 pd = xmalloc(len);
4dd968cc
N
2785 pd->magic = DDF_PHYS_RECORDS_MAGIC;
2786 pd->used_pdes = __cpu_to_be16(dl->pdnum);
2787 pd->entries[0].state = __cpu_to_be16(DDF_Missing);
2788 append_metadata_update(st, pd, len);
2789 }
2790 return 0;
2791}
2792
a322f70c
DW
2793/*
2794 * This is the write_init_super method for a ddf container. It is
2795 * called when creating a container or adding another device to a
2796 * container.
2797 */
42d5dfd9 2798#define NULL_CONF_SZ 4096
18a2f463 2799
8e9387ac 2800static char *null_aligned;
2801static int __write_ddf_structure(struct dl *d, struct ddf_super *ddf, __u8 type)
a322f70c 2802{
7f798aca 2803 unsigned long long sector;
2804 struct ddf_header *header;
2805 int fd, i, n_config, conf_size;
a4057a88 2806 int ret = 0;
7f798aca 2807
8e9387ac 2808 if (null_aligned == NULL) {
2809 if (posix_memalign((void **)&null_aligned, 4096, NULL_CONF_SZ)
2810 != 0)
2811 return 0;
2812 memset(null_aligned, 0xff, NULL_CONF_SZ);
2813 }
2814
7f798aca 2815 fd = d->fd;
2816
2817 switch (type) {
2818 case DDF_HEADER_PRIMARY:
2819 header = &ddf->primary;
2820 sector = __be64_to_cpu(header->primary_lba);
2821 break;
2822 case DDF_HEADER_SECONDARY:
2823 header = &ddf->secondary;
2824 sector = __be64_to_cpu(header->secondary_lba);
2825 break;
2826 default:
2827 return 0;
2828 }
2829
2830 header->type = type;
a4057a88 2831 header->openflag = 1;
7f798aca 2832 header->crc = calc_crc(header, 512);
2833
2834 lseek64(fd, sector<<9, 0);
2835 if (write(fd, header, 512) < 0)
a4057a88 2836 goto out;
7f798aca 2837
2838 ddf->controller.crc = calc_crc(&ddf->controller, 512);
2839 if (write(fd, &ddf->controller, 512) < 0)
a4057a88 2840 goto out;
a322f70c 2841
7f798aca 2842 ddf->phys->crc = calc_crc(ddf->phys, ddf->pdsize);
2843 if (write(fd, ddf->phys, ddf->pdsize) < 0)
a4057a88 2844 goto out;
7f798aca 2845 ddf->virt->crc = calc_crc(ddf->virt, ddf->vdsize);
2846 if (write(fd, ddf->virt, ddf->vdsize) < 0)
a4057a88 2847 goto out;
7f798aca 2848
2849 /* Now write lots of config records. */
2850 n_config = ddf->max_part;
2851 conf_size = ddf->conf_rec_len * 512;
2852 for (i = 0 ; i <= n_config ; i++) {
e3c2a365 2853 struct vcl *c;
2854 struct vd_config *vdc = NULL;
2855 if (i == n_config) {
7f798aca 2856 c = (struct vcl *)d->spare;
e3c2a365 2857 if (c)
2858 vdc = &c->conf;
2859 } else {
2860 unsigned int dummy;
2861 c = d->vlist[i];
2862 if (c)
2863 get_pd_index_from_refnum(
2864 c, d->disk.refnum,
2865 ddf->mppe,
2866 (const struct vd_config **)&vdc,
2867 &dummy);
2868 }
7f798aca 2869 if (c) {
be9b9ef4 2870 dprintf("writing conf record %i on disk %08x for %s/%u\n",
60931cf9 2871 i, be32_to_cpu(d->disk.refnum),
ad60eea1 2872 guid_str(vdc->guid),
be9b9ef4 2873 vdc->sec_elmnt_seq);
dacf3dc5 2874 vdc->seqnum = header->seq;
e3c2a365 2875 vdc->crc = calc_crc(vdc, conf_size);
2876 if (write(fd, vdc, conf_size) < 0)
7f798aca 2877 break;
2878 } else {
2879 unsigned int togo = conf_size;
2880 while (togo > NULL_CONF_SZ) {
2881 if (write(fd, null_aligned, NULL_CONF_SZ) < 0)
2882 break;
2883 togo -= NULL_CONF_SZ;
2884 }
2885 if (write(fd, null_aligned, togo) < 0)
2886 break;
2887 }
2888 }
2889 if (i <= n_config)
a4057a88 2890 goto out;
7f798aca 2891
2892 d->disk.crc = calc_crc(&d->disk, 512);
2893 if (write(fd, &d->disk, 512) < 0)
a4057a88 2894 goto out;
7f798aca 2895
a4057a88 2896 ret = 1;
2897out:
2898 header->openflag = 0;
2899 header->crc = calc_crc(header, 512);
2900
2901 lseek64(fd, sector<<9, 0);
2902 if (write(fd, header, 512) < 0)
2903 ret = 0;
2904
2905 return ret;
7f798aca 2906}
2907
9bf38704 2908static int _write_super_to_disk(struct ddf_super *ddf, struct dl *d)
2909{
2910 unsigned long long size;
2911 int fd = d->fd;
2912 if (fd < 0)
2913 return 0;
2914
2915 /* We need to fill in the primary, (secondary) and workspace
2916 * lba's in the headers, set their checksums,
2917 * Also checksum phys, virt....
2918 *
2919 * Then write everything out, finally the anchor is written.
2920 */
2921 get_dev_size(fd, NULL, &size);
2922 size /= 512;
2923 if (d->workspace_lba != 0)
2924 ddf->anchor.workspace_lba = d->workspace_lba;
2925 else
2926 ddf->anchor.workspace_lba =
2927 __cpu_to_be64(size - 32*1024*2);
2928 if (d->primary_lba != 0)
2929 ddf->anchor.primary_lba = d->primary_lba;
2930 else
2931 ddf->anchor.primary_lba =
2932 __cpu_to_be64(size - 16*1024*2);
2933 if (d->secondary_lba != 0)
2934 ddf->anchor.secondary_lba = d->secondary_lba;
2935 else
2936 ddf->anchor.secondary_lba =
2937 __cpu_to_be64(size - 32*1024*2);
2938 ddf->anchor.seq = ddf->active->seq;
2939 memcpy(&ddf->primary, &ddf->anchor, 512);
2940 memcpy(&ddf->secondary, &ddf->anchor, 512);
2941
2942 ddf->anchor.openflag = 0xFF; /* 'open' means nothing */
60931cf9 2943 ddf->anchor.seq = cpu_to_be32(0xFFFFFFFF); /* no sequencing in anchor */
9bf38704 2944 ddf->anchor.crc = calc_crc(&ddf->anchor, 512);
2945
2946 if (!__write_ddf_structure(d, ddf, DDF_HEADER_PRIMARY))
2947 return 0;
2948
2949 if (!__write_ddf_structure(d, ddf, DDF_HEADER_SECONDARY))
2950 return 0;
2951
2952 lseek64(fd, (size-1)*512, SEEK_SET);
2953 if (write(fd, &ddf->anchor, 512) < 0)
2954 return 0;
2955
2956 return 1;
2957}
2958
7f798aca 2959static int __write_init_super_ddf(struct supertype *st)
2960{
a322f70c 2961 struct ddf_super *ddf = st->sb;
a322f70c 2962 struct dl *d;
175593bf
DW
2963 int attempts = 0;
2964 int successes = 0;
42d5dfd9 2965
7d5a7ff3 2966 pr_state(ddf, __func__);
a322f70c 2967
175593bf
DW
2968 /* try to write updated metadata,
2969 * if we catch a failure move on to the next disk
2970 */
a322f70c 2971 for (d = ddf->dlist; d; d=d->next) {
175593bf 2972 attempts++;
9bf38704 2973 successes += _write_super_to_disk(ddf, d);
175593bf
DW
2974 }
2975
175593bf 2976 return attempts != successes;
a322f70c 2977}
7a7cc504
NB
2978
2979static int write_init_super_ddf(struct supertype *st)
2980{
9b1fb677
DW
2981 struct ddf_super *ddf = st->sb;
2982 struct vcl *currentconf = ddf->currentconf;
2983
2984 /* we are done with currentconf reset it to point st at the container */
2985 ddf->currentconf = NULL;
edd8d13c
NB
2986
2987 if (st->update_tail) {
2988 /* queue the virtual_disk and vd_config as metadata updates */
2989 struct virtual_disk *vd;
2990 struct vd_config *vc;
c5943560 2991 int len, tlen;
2992 unsigned int i;
edd8d13c 2993
9b1fb677 2994 if (!currentconf) {
2cc2983d
N
2995 int len = (sizeof(struct phys_disk) +
2996 sizeof(struct phys_disk_entry));
2997
2998 /* adding a disk to the container. */
2999 if (!ddf->add_list)
3000 return 0;
3001
3002 append_metadata_update(st, ddf->add_list->mdupdate, len);
3003 ddf->add_list->mdupdate = NULL;
3004 return 0;
3005 }
3006
3007 /* Newly created VD */
3008
edd8d13c
NB
3009 /* First the virtual disk. We have a slightly fake header */
3010 len = sizeof(struct virtual_disk) + sizeof(struct virtual_entry);
503975b9 3011 vd = xmalloc(len);
edd8d13c 3012 *vd = *ddf->virt;
9b1fb677
DW
3013 vd->entries[0] = ddf->virt->entries[currentconf->vcnum];
3014 vd->populated_vdes = __cpu_to_be16(currentconf->vcnum);
edd8d13c
NB
3015 append_metadata_update(st, vd, len);
3016
3017 /* Then the vd_config */
3018 len = ddf->conf_rec_len * 512;
c5943560 3019 tlen = len * currentconf->conf.sec_elmnt_count;
3020 vc = xmalloc(tlen);
9b1fb677 3021 memcpy(vc, &currentconf->conf, len);
c5943560 3022 for (i = 1; i < currentconf->conf.sec_elmnt_count; i++)
3023 memcpy((char *)vc + i*len, currentconf->other_bvds[i-1],
3024 len);
3025 append_metadata_update(st, vc, tlen);
edd8d13c
NB
3026
3027 /* FIXME I need to close the fds! */
3028 return 0;
613b0d17 3029 } else {
d682f344 3030 struct dl *d;
19041058 3031 if (!currentconf)
3032 for (d = ddf->dlist; d; d=d->next)
3033 while (Kill(d->devname, NULL, 0, -1, 1) == 0);
1cc7f4fe 3034 return __write_init_super_ddf(st);
d682f344 3035 }
7a7cc504
NB
3036}
3037
a322f70c
DW
3038#endif
3039
387fcd59
N
3040static __u64 avail_size_ddf(struct supertype *st, __u64 devsize,
3041 unsigned long long data_offset)
a322f70c
DW
3042{
3043 /* We must reserve the last 32Meg */
3044 if (devsize <= 32*1024*2)
3045 return 0;
3046 return devsize - 32*1024*2;
3047}
3048
3049#ifndef MDASSEMBLE
8592f29d
N
3050
3051static int reserve_space(struct supertype *st, int raiddisks,
3052 unsigned long long size, int chunk,
3053 unsigned long long *freesize)
3054{
3055 /* Find 'raiddisks' spare extents at least 'size' big (but
3056 * only caring about multiples of 'chunk') and remember
3057 * them.
3058 * If the cannot be found, fail.
3059 */
3060 struct dl *dl;
3061 struct ddf_super *ddf = st->sb;
3062 int cnt = 0;
3063
3064 for (dl = ddf->dlist; dl ; dl=dl->next) {
613b0d17 3065 dl->raiddisk = -1;
8592f29d
N
3066 dl->esize = 0;
3067 }
3068 /* Now find largest extent on each device */
3069 for (dl = ddf->dlist ; dl ; dl=dl->next) {
3070 struct extent *e = get_extents(ddf, dl);
3071 unsigned long long pos = 0;
3072 int i = 0;
3073 int found = 0;
3074 unsigned long long minsize = size;
3075
3076 if (size == 0)
3077 minsize = chunk;
3078
3079 if (!e)
3080 continue;
3081 do {
3082 unsigned long long esize;
3083 esize = e[i].start - pos;
3084 if (esize >= minsize) {
3085 found = 1;
3086 minsize = esize;
3087 }
3088 pos = e[i].start + e[i].size;
3089 i++;
3090 } while (e[i-1].size);
3091 if (found) {
3092 cnt++;
3093 dl->esize = minsize;
3094 }
3095 free(e);
3096 }
3097 if (cnt < raiddisks) {
e7b84f9d 3098 pr_err("not enough devices with space to create array.\n");
8592f29d
N
3099 return 0; /* No enough free spaces large enough */
3100 }
3101 if (size == 0) {
3102 /* choose the largest size of which there are at least 'raiddisk' */
3103 for (dl = ddf->dlist ; dl ; dl=dl->next) {
3104 struct dl *dl2;
3105 if (dl->esize <= size)
3106 continue;
3107 /* This is bigger than 'size', see if there are enough */
3108 cnt = 0;
7b80ad6a 3109 for (dl2 = ddf->dlist; dl2 ; dl2=dl2->next)
8592f29d
N
3110 if (dl2->esize >= dl->esize)
3111 cnt++;
3112 if (cnt >= raiddisks)
3113 size = dl->esize;
3114 }
3115 if (chunk) {
3116 size = size / chunk;
3117 size *= chunk;
3118 }
3119 *freesize = size;
3120 if (size < 32) {
e7b84f9d 3121 pr_err("not enough spare devices to create array.\n");
8592f29d
N
3122 return 0;
3123 }
3124 }
3125 /* We have a 'size' of which there are enough spaces.
3126 * We simply do a first-fit */
3127 cnt = 0;
3128 for (dl = ddf->dlist ; dl && cnt < raiddisks ; dl=dl->next) {
3129 if (dl->esize < size)
3130 continue;
613b0d17 3131
8592f29d
N
3132 dl->raiddisk = cnt;
3133 cnt++;
3134 }
3135 return 1;
3136}
3137
2c514b71
NB
3138static int
3139validate_geometry_ddf_container(struct supertype *st,
3140 int level, int layout, int raiddisks,
3141 int chunk, unsigned long long size,
af4348dd 3142 unsigned long long data_offset,
2c514b71
NB
3143 char *dev, unsigned long long *freesize,
3144 int verbose);
78e44928
NB
3145
3146static int validate_geometry_ddf_bvd(struct supertype *st,
3147 int level, int layout, int raiddisks,
c21e737b 3148 int *chunk, unsigned long long size,
af4348dd 3149 unsigned long long data_offset,
2c514b71
NB
3150 char *dev, unsigned long long *freesize,
3151 int verbose);
78e44928
NB
3152
3153static int validate_geometry_ddf(struct supertype *st,
2c514b71 3154 int level, int layout, int raiddisks,
c21e737b 3155 int *chunk, unsigned long long size,
af4348dd 3156 unsigned long long data_offset,
2c514b71
NB
3157 char *dev, unsigned long long *freesize,
3158 int verbose)
a322f70c
DW
3159{
3160 int fd;
3161 struct mdinfo *sra;
3162 int cfd;
3163
3164 /* ddf potentially supports lots of things, but it depends on
3165 * what devices are offered (and maybe kernel version?)
3166 * If given unused devices, we will make a container.
3167 * If given devices in a container, we will make a BVD.
3168 * If given BVDs, we make an SVD, changing all the GUIDs in the process.
3169 */
3170
bb7295f1
N
3171 if (chunk && *chunk == UnSet)
3172 *chunk = DEFAULT_CHUNK;
3173
542ef4ec 3174 if (level == -1000000) level = LEVEL_CONTAINER;
a322f70c 3175 if (level == LEVEL_CONTAINER) {
78e44928
NB
3176 /* Must be a fresh device to add to a container */
3177 return validate_geometry_ddf_container(st, level, layout,
c21e737b 3178 raiddisks, chunk?*chunk:0,
af4348dd
N
3179 size, data_offset, dev,
3180 freesize,
2c514b71 3181 verbose);
5f8097be
NB
3182 }
3183
78e44928 3184 if (!dev) {
a3163bf0 3185 mdu_array_info_t array = {
3186 .level = level, .layout = layout,
3187 .raid_disks = raiddisks
3188 };
3189 struct vd_config conf;
3190 if (layout_md2ddf(&array, &conf) == -1) {
b42f577a 3191 if (verbose)
94b08b7c 3192 pr_err("DDF does not support level %d /layout %d arrays with %d disks\n",
3193 level, layout, raiddisks);
78e44928 3194 return 0;
b42f577a 3195 }
78e44928 3196 /* Should check layout? etc */
8592f29d
N
3197
3198 if (st->sb && freesize) {
3199 /* --create was given a container to create in.
3200 * So we need to check that there are enough
3201 * free spaces and return the amount of space.
3202 * We may as well remember which drives were
3203 * chosen so that add_to_super/getinfo_super
3204 * can return them.
3205 */
c21e737b 3206 return reserve_space(st, raiddisks, size, chunk?*chunk:0, freesize);
8592f29d 3207 }
a322f70c 3208 return 1;
78e44928 3209 }
a322f70c 3210
8592f29d
N
3211 if (st->sb) {
3212 /* A container has already been opened, so we are
3213 * creating in there. Maybe a BVD, maybe an SVD.
3214 * Should make a distinction one day.
3215 */
3216 return validate_geometry_ddf_bvd(st, level, layout, raiddisks,
af4348dd
N
3217 chunk, size, data_offset, dev,
3218 freesize,
8592f29d
N
3219 verbose);
3220 }
78e44928
NB
3221 /* This is the first device for the array.
3222 * If it is a container, we read it in and do automagic allocations,
3223 * no other devices should be given.
3224 * Otherwise it must be a member device of a container, and we
3225 * do manual allocation.
3226 * Later we should check for a BVD and make an SVD.
a322f70c 3227 */
a322f70c
DW
3228 fd = open(dev, O_RDONLY|O_EXCL, 0);
3229 if (fd >= 0) {
4dd2df09 3230 sra = sysfs_read(fd, NULL, GET_VERSION);
a322f70c
DW
3231 close(fd);
3232 if (sra && sra->array.major_version == -1 &&
78e44928
NB
3233 strcmp(sra->text_version, "ddf") == 0) {
3234
3235 /* load super */
3236 /* find space for 'n' devices. */
3237 /* remember the devices */
3238 /* Somehow return the fact that we have enough */
a322f70c
DW
3239 }
3240
2c514b71 3241 if (verbose)
e7b84f9d
N
3242 pr_err("ddf: Cannot create this array "
3243 "on device %s - a container is required.\n",
3244 dev);
a322f70c
DW
3245 return 0;
3246 }
3247 if (errno != EBUSY || (fd = open(dev, O_RDONLY, 0)) < 0) {
2c514b71 3248 if (verbose)
e7b84f9d 3249 pr_err("ddf: Cannot open %s: %s\n",
613b0d17 3250 dev, strerror(errno));
a322f70c
DW
3251 return 0;
3252 }
3253 /* Well, it is in use by someone, maybe a 'ddf' container. */
3254 cfd = open_container(fd);
3255 if (cfd < 0) {
3256 close(fd);
2c514b71 3257 if (verbose)
e7b84f9d 3258 pr_err("ddf: Cannot use %s: %s\n",
613b0d17 3259 dev, strerror(EBUSY));
a322f70c
DW
3260 return 0;
3261 }
4dd2df09 3262 sra = sysfs_read(cfd, NULL, GET_VERSION);
a322f70c
DW
3263 close(fd);
3264 if (sra && sra->array.major_version == -1 &&
3265 strcmp(sra->text_version, "ddf") == 0) {
3266 /* This is a member of a ddf container. Load the container
3267 * and try to create a bvd
3268 */
3269 struct ddf_super *ddf;
e1902a7b 3270 if (load_super_ddf_all(st, cfd, (void **)&ddf, NULL) == 0) {
5f8097be 3271 st->sb = ddf;
4dd2df09 3272 strcpy(st->container_devnm, fd2devnm(cfd));
a322f70c 3273 close(cfd);
78e44928 3274 return validate_geometry_ddf_bvd(st, level, layout,
a322f70c 3275 raiddisks, chunk, size,
af4348dd 3276 data_offset,
2c514b71
NB
3277 dev, freesize,
3278 verbose);
a322f70c
DW
3279 }
3280 close(cfd);
c42ec1ed
DW
3281 } else /* device may belong to a different container */
3282 return 0;
3283
a322f70c
DW
3284 return 1;
3285}
3286
2c514b71
NB
3287static int
3288validate_geometry_ddf_container(struct supertype *st,
3289 int level, int layout, int raiddisks,
3290 int chunk, unsigned long long size,
af4348dd 3291 unsigned long long data_offset,
2c514b71
NB
3292 char *dev, unsigned long long *freesize,
3293 int verbose)
a322f70c
DW
3294{
3295 int fd;
3296 unsigned long long ldsize;
3297
3298 if (level != LEVEL_CONTAINER)
3299 return 0;
3300 if (!dev)
3301 return 1;
3302
3303 fd = open(dev, O_RDONLY|O_EXCL, 0);
3304 if (fd < 0) {
2c514b71 3305 if (verbose)
e7b84f9d 3306 pr_err("ddf: Cannot open %s: %s\n",
613b0d17 3307 dev, strerror(errno));
a322f70c
DW
3308 return 0;
3309 }
3310 if (!get_dev_size(fd, dev, &ldsize)) {
3311 close(fd);
3312 return 0;
3313 }
3314 close(fd);
3315
387fcd59 3316 *freesize = avail_size_ddf(st, ldsize >> 9, INVALID_SECTORS);
ea17e7aa
N
3317 if (*freesize == 0)
3318 return 0;
a322f70c
DW
3319
3320 return 1;
3321}
3322
78e44928
NB
3323static int validate_geometry_ddf_bvd(struct supertype *st,
3324 int level, int layout, int raiddisks,
c21e737b 3325 int *chunk, unsigned long long size,
af4348dd 3326 unsigned long long data_offset,
2c514b71
NB
3327 char *dev, unsigned long long *freesize,
3328 int verbose)
a322f70c
DW
3329{
3330 struct stat stb;
3331 struct ddf_super *ddf = st->sb;
3332 struct dl *dl;
5f8097be
NB
3333 unsigned long long pos = 0;
3334 unsigned long long maxsize;
3335 struct extent *e;
3336 int i;
a322f70c 3337 /* ddf/bvd supports lots of things, but not containers */
b42f577a
N
3338 if (level == LEVEL_CONTAINER) {
3339 if (verbose)
e7b84f9d 3340 pr_err("DDF cannot create a container within an container\n");
a322f70c 3341 return 0;
b42f577a 3342 }
a322f70c
DW
3343 /* We must have the container info already read in. */
3344 if (!ddf)
3345 return 0;
3346
5f8097be
NB
3347 if (!dev) {
3348 /* General test: make sure there is space for
3349 * 'raiddisks' device extents of size 'size'.
3350 */
3351 unsigned long long minsize = size;
3352 int dcnt = 0;
3353 if (minsize == 0)
3354 minsize = 8;
3355 for (dl = ddf->dlist; dl ; dl = dl->next)
3356 {
3357 int found = 0;
7e1432fb 3358 pos = 0;
5f8097be
NB
3359
3360 i = 0;
3361 e = get_extents(ddf, dl);
3362 if (!e) continue;
3363 do {
3364 unsigned long long esize;
3365 esize = e[i].start - pos;
3366 if (esize >= minsize)
3367 found = 1;
3368 pos = e[i].start + e[i].size;
3369 i++;
3370 } while (e[i-1].size);
3371 if (found)
3372 dcnt++;
3373 free(e);
3374 }
3375 if (dcnt < raiddisks) {
2c514b71 3376 if (verbose)
e7b84f9d
N
3377 pr_err("ddf: Not enough devices with "
3378 "space for this array (%d < %d)\n",
3379 dcnt, raiddisks);
5f8097be
NB
3380 return 0;
3381 }
3382 return 1;
3383 }
a322f70c
DW
3384 /* This device must be a member of the set */
3385 if (stat(dev, &stb) < 0)
3386 return 0;
3387 if ((S_IFMT & stb.st_mode) != S_IFBLK)
3388 return 0;
3389 for (dl = ddf->dlist ; dl ; dl = dl->next) {
f21e18ca
N
3390 if (dl->major == (int)major(stb.st_rdev) &&
3391 dl->minor == (int)minor(stb.st_rdev))
a322f70c
DW
3392 break;
3393 }
5f8097be 3394 if (!dl) {
2c514b71 3395 if (verbose)
e7b84f9d 3396 pr_err("ddf: %s is not in the "
613b0d17
N
3397 "same DDF set\n",
3398 dev);
5f8097be
NB
3399 return 0;
3400 }
3401 e = get_extents(ddf, dl);
3402 maxsize = 0;
3403 i = 0;
3404 if (e) do {
613b0d17
N
3405 unsigned long long esize;
3406 esize = e[i].start - pos;
3407 if (esize >= maxsize)
3408 maxsize = esize;
3409 pos = e[i].start + e[i].size;
3410 i++;
3411 } while (e[i-1].size);
5f8097be 3412 *freesize = maxsize;
a322f70c
DW
3413 // FIXME here I am
3414
3415 return 1;
3416}
59e36268 3417
a322f70c 3418static int load_super_ddf_all(struct supertype *st, int fd,
e1902a7b 3419 void **sbp, char *devname)
a322f70c
DW
3420{
3421 struct mdinfo *sra;
3422 struct ddf_super *super;
3423 struct mdinfo *sd, *best = NULL;
3424 int bestseq = 0;
3425 int seq;
3426 char nm[20];
3427 int dfd;
3428
b526e52d 3429 sra = sysfs_read(fd, 0, GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE);
a322f70c
DW
3430 if (!sra)
3431 return 1;
3432 if (sra->array.major_version != -1 ||
3433 sra->array.minor_version != -2 ||
3434 strcmp(sra->text_version, "ddf") != 0)
3435 return 1;
3436
6416d527 3437 if (posix_memalign((void**)&super, 512, sizeof(*super)) != 0)
a322f70c 3438 return 1;
a2349791 3439 memset(super, 0, sizeof(*super));
a322f70c
DW
3440
3441 /* first, try each device, and choose the best ddf */
3442 for (sd = sra->devs ; sd ; sd = sd->next) {
3443 int rv;
3444 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
7a7cc504
NB
3445 dfd = dev_open(nm, O_RDONLY);
3446 if (dfd < 0)
a322f70c
DW
3447 return 2;
3448 rv = load_ddf_headers(dfd, super, NULL);
7a7cc504 3449 close(dfd);
a322f70c 3450 if (rv == 0) {
60931cf9 3451 seq = be32_to_cpu(super->active->seq);
a322f70c
DW
3452 if (super->active->openflag)
3453 seq--;
3454 if (!best || seq > bestseq) {
3455 bestseq = seq;
3456 best = sd;
3457 }
3458 }
3459 }
3460 if (!best)
3461 return 1;
3462 /* OK, load this ddf */
3463 sprintf(nm, "%d:%d", best->disk.major, best->disk.minor);
3464 dfd = dev_open(nm, O_RDONLY);
7a7cc504 3465 if (dfd < 0)
a322f70c
DW
3466 return 1;
3467 load_ddf_headers(dfd, super, NULL);
3468 load_ddf_global(dfd, super, NULL);
3469 close(dfd);
3470 /* Now we need the device-local bits */
3471 for (sd = sra->devs ; sd ; sd = sd->next) {
3d2c4fc7
DW
3472 int rv;
3473
a322f70c 3474 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
e1902a7b 3475 dfd = dev_open(nm, O_RDWR);
7a7cc504 3476 if (dfd < 0)
a322f70c 3477 return 2;
3d2c4fc7
DW
3478 rv = load_ddf_headers(dfd, super, NULL);
3479 if (rv == 0)
e1902a7b 3480 rv = load_ddf_local(dfd, super, NULL, 1);
3d2c4fc7
DW
3481 if (rv)
3482 return 1;
a322f70c 3483 }
33414a01 3484
a322f70c
DW
3485 *sbp = super;
3486 if (st->ss == NULL) {
78e44928 3487 st->ss = &super_ddf;
a322f70c
DW
3488 st->minor_version = 0;
3489 st->max_devs = 512;
3490 }
4dd2df09 3491 strcpy(st->container_devnm, fd2devnm(fd));
a322f70c
DW
3492 return 0;
3493}
2b959fbf
N
3494
3495static int load_container_ddf(struct supertype *st, int fd,
3496 char *devname)
3497{
3498 return load_super_ddf_all(st, fd, &st->sb, devname);
3499}
3500
0e600426 3501#endif /* MDASSEMBLE */
a322f70c 3502
a5c7adb3 3503static int check_secondary(const struct vcl *vc)
3504{
3505 const struct vd_config *conf = &vc->conf;
3506 int i;
3507
3508 /* The only DDF secondary RAID level md can support is
3509 * RAID 10, if the stripe sizes and Basic volume sizes
3510 * are all equal.
3511 * Other configurations could in theory be supported by exposing
3512 * the BVDs to user space and using device mapper for the secondary
3513 * mapping. So far we don't support that.
3514 */
3515
3516 __u64 sec_elements[4] = {0, 0, 0, 0};
3517#define __set_sec_seen(n) (sec_elements[(n)>>6] |= (1<<((n)&63)))
3518#define __was_sec_seen(n) ((sec_elements[(n)>>6] & (1<<((n)&63))) != 0)
3519
3520 if (vc->other_bvds == NULL) {
3521 pr_err("No BVDs for secondary RAID found\n");
3522 return -1;
3523 }
3524 if (conf->prl != DDF_RAID1) {
3525 pr_err("Secondary RAID level only supported for mirrored BVD\n");
3526 return -1;
3527 }
3528 if (conf->srl != DDF_2STRIPED && conf->srl != DDF_2SPANNED) {
3529 pr_err("Secondary RAID level %d is unsupported\n",
3530 conf->srl);
3531 return -1;
3532 }
3533 __set_sec_seen(conf->sec_elmnt_seq);
3534 for (i = 0; i < conf->sec_elmnt_count-1; i++) {
3535 const struct vd_config *bvd = vc->other_bvds[i];
3c48f7be 3536 if (bvd->sec_elmnt_seq == DDF_UNUSED_BVD)
c98567ba 3537 continue;
a5c7adb3 3538 if (bvd->srl != conf->srl) {
3539 pr_err("Inconsistent secondary RAID level across BVDs\n");
3540 return -1;
3541 }
3542 if (bvd->prl != conf->prl) {
3543 pr_err("Different RAID levels for BVDs are unsupported\n");
3544 return -1;
3545 }
3546 if (bvd->prim_elmnt_count != conf->prim_elmnt_count) {
3547 pr_err("All BVDs must have the same number of primary elements\n");
3548 return -1;
3549 }
3550 if (bvd->chunk_shift != conf->chunk_shift) {
3551 pr_err("Different strip sizes for BVDs are unsupported\n");
3552 return -1;
3553 }
3554 if (bvd->array_blocks != conf->array_blocks) {
3555 pr_err("Different BVD sizes are unsupported\n");
3556 return -1;
3557 }
3558 __set_sec_seen(bvd->sec_elmnt_seq);
3559 }
3560 for (i = 0; i < conf->sec_elmnt_count; i++) {
3561 if (!__was_sec_seen(i)) {
3562 pr_err("BVD %d is missing\n", i);
3563 return -1;
3564 }
3565 }
3566 return 0;
3567}
3568
8a38db86 3569static unsigned int get_pd_index_from_refnum(const struct vcl *vc,
60931cf9 3570 be32 refnum, unsigned int nmax,
4e587018 3571 const struct vd_config **bvd,
3572 unsigned int *idx)
8a38db86 3573{
4e587018 3574 unsigned int i, j, n, sec, cnt;
3575
3576 cnt = __be16_to_cpu(vc->conf.prim_elmnt_count);
3577 sec = (vc->conf.sec_elmnt_count == 1 ? 0 : vc->conf.sec_elmnt_seq);
3578
3579 for (i = 0, j = 0 ; i < nmax ; i++) {
3580 /* j counts valid entries for this BVD */
60931cf9 3581 if (be32_to_cpu(vc->conf.phys_refnum[i]) != 0xffffffff)
4e587018 3582 j++;
60931cf9 3583 if (be32_eq(vc->conf.phys_refnum[i], refnum)) {
4e587018 3584 *bvd = &vc->conf;
3585 *idx = i;
3586 return sec * cnt + j - 1;
3587 }
3588 }
3589 if (vc->other_bvds == NULL)
3590 goto bad;
3591
3592 for (n = 1; n < vc->conf.sec_elmnt_count; n++) {
3593 struct vd_config *vd = vc->other_bvds[n-1];
4e587018 3594 sec = vd->sec_elmnt_seq;
3c48f7be 3595 if (sec == DDF_UNUSED_BVD)
3596 continue;
4e587018 3597 for (i = 0, j = 0 ; i < nmax ; i++) {
60931cf9 3598 if (be32_to_cpu(vd->phys_refnum[i]) != 0xffffffff)
4e587018 3599 j++;
60931cf9 3600 if (be32_eq(vd->phys_refnum[i], refnum)) {
4e587018 3601 *bvd = vd;
3602 *idx = i;
3603 return sec * cnt + j - 1;
3604 }
3605 }
3606 }
3607bad:
3608 *bvd = NULL;
d6e7b083 3609 return DDF_NOTFOUND;
8a38db86 3610}
3611
00bbdbda 3612static struct mdinfo *container_content_ddf(struct supertype *st, char *subarray)
598f0d58
NB
3613{
3614 /* Given a container loaded by load_super_ddf_all,
3615 * extract information about all the arrays into
3616 * an mdinfo tree.
3617 *
3618 * For each vcl in conflist: create an mdinfo, fill it in,
3619 * then look for matching devices (phys_refnum) in dlist
3620 * and create appropriate device mdinfo.
3621 */
3622 struct ddf_super *ddf = st->sb;
3623 struct mdinfo *rest = NULL;
3624 struct vcl *vc;
3625
3626 for (vc = ddf->conflist ; vc ; vc=vc->next)
3627 {
f21e18ca
N
3628 unsigned int i;
3629 unsigned int j;
598f0d58 3630 struct mdinfo *this;
00bbdbda 3631 char *ep;
90fa1a29 3632 __u32 *cptr;
8a38db86 3633 unsigned int pd;
00bbdbda
N
3634
3635 if (subarray &&
3636 (strtoul(subarray, &ep, 10) != vc->vcnum ||
3637 *ep != '\0'))
3638 continue;
3639
a5c7adb3 3640 if (vc->conf.sec_elmnt_count > 1) {
3641 if (check_secondary(vc) != 0)
3642 continue;
3643 }
3644
503975b9 3645 this = xcalloc(1, sizeof(*this));
598f0d58
NB
3646 this->next = rest;
3647 rest = this;
3648
8a2848a7 3649 if (layout_ddf2md(&vc->conf, &this->array))
3650 continue;
598f0d58 3651 this->array.md_minor = -1;
f35f2525
N
3652 this->array.major_version = -1;
3653 this->array.minor_version = -2;
90fa1a29
JS
3654 cptr = (__u32 *)(vc->conf.guid + 16);
3655 this->array.ctime = DECADE + __be32_to_cpu(*cptr);
598f0d58 3656 this->array.utime = DECADE +
60931cf9 3657 be32_to_cpu(vc->conf.timestamp);
598f0d58
NB
3658 this->array.chunk_size = 512 << vc->conf.chunk_shift;
3659
59e36268 3660 i = vc->vcnum;
7a7cc504
NB
3661 if ((ddf->virt->entries[i].state & DDF_state_inconsistent) ||
3662 (ddf->virt->entries[i].init_state & DDF_initstate_mask) !=
ed9d66aa 3663 DDF_init_full) {
598f0d58 3664 this->array.state = 0;
ed9d66aa
NB
3665 this->resync_start = 0;
3666 } else {
598f0d58 3667 this->array.state = 1;
b7528a20 3668 this->resync_start = MaxSector;
ed9d66aa 3669 }
db42fa9b
N
3670 memcpy(this->name, ddf->virt->entries[i].name, 16);
3671 this->name[16]=0;
3672 for(j=0; j<16; j++)
3673 if (this->name[j] == ' ')
3674 this->name[j] = 0;
598f0d58
NB
3675
3676 memset(this->uuid, 0, sizeof(this->uuid));
3677 this->component_size = __be64_to_cpu(vc->conf.blocks);
3678 this->array.size = this->component_size / 2;
5f2aace8 3679 this->container_member = i;
598f0d58 3680
c5afc314
N
3681 ddf->currentconf = vc;
3682 uuid_from_super_ddf(st, this->uuid);
f646805e 3683 if (!subarray)
3684 ddf->currentconf = NULL;
c5afc314 3685
60f18132 3686 sprintf(this->text_version, "/%s/%d",
4dd2df09 3687 st->container_devnm, this->container_member);
60f18132 3688
8a38db86 3689 for (pd = 0; pd < __be16_to_cpu(ddf->phys->used_pdes); pd++) {
598f0d58
NB
3690 struct mdinfo *dev;
3691 struct dl *d;
4e587018 3692 const struct vd_config *bvd;
3693 unsigned int iphys;
fa033bec 3694 int stt;
598f0d58 3695
60931cf9 3696 if (be32_to_cpu(ddf->phys->entries[pd].refnum)
3697 == 0xFFFFFFFF)
bc17324f 3698 continue;
0cf5ef67
N
3699
3700 stt = __be16_to_cpu(ddf->phys->entries[pd].state);
fa033bec
N
3701 if ((stt & (DDF_Online|DDF_Failed|DDF_Rebuilding))
3702 != DDF_Online)
3703 continue;
3704
8a38db86 3705 i = get_pd_index_from_refnum(
4e587018 3706 vc, ddf->phys->entries[pd].refnum,
3707 ddf->mppe, &bvd, &iphys);
d6e7b083 3708 if (i == DDF_NOTFOUND)
8a38db86 3709 continue;
3710
fa033bec 3711 this->array.working_disks++;
bc17324f 3712
0cf5ef67 3713 for (d = ddf->dlist; d ; d=d->next)
60931cf9 3714 if (be32_eq(d->disk.refnum,
3715 ddf->phys->entries[pd].refnum))
0cf5ef67
N
3716 break;
3717 if (d == NULL)
3718 /* Haven't found that one yet, maybe there are others */
3719 continue;
3720
503975b9 3721 dev = xcalloc(1, sizeof(*dev));
598f0d58
NB
3722 dev->next = this->devs;
3723 this->devs = dev;
3724
60931cf9 3725 dev->disk.number = be32_to_cpu(d->disk.refnum);
598f0d58
NB
3726 dev->disk.major = d->major;
3727 dev->disk.minor = d->minor;
3728 dev->disk.raid_disk = i;
3729 dev->disk.state = (1<<MD_DISK_SYNC)|(1<<MD_DISK_ACTIVE);
d23534e4 3730 dev->recovery_start = MaxSector;
598f0d58 3731
60931cf9 3732 dev->events = be32_to_cpu(ddf->primary.seq);
57a66662 3733 dev->data_offset =
3734 __be64_to_cpu(LBA_OFFSET(ddf, bvd)[iphys]);
4e587018 3735 dev->component_size = __be64_to_cpu(bvd->blocks);
598f0d58
NB
3736 if (d->devname)
3737 strcpy(dev->name, d->devname);
3738 }
3739 }
3740 return rest;
3741}
3742
955e9ea1 3743static int store_super_ddf(struct supertype *st, int fd)
a322f70c 3744{
955e9ea1 3745 struct ddf_super *ddf = st->sb;
a322f70c 3746 unsigned long long dsize;
6416d527 3747 void *buf;
3d2c4fc7 3748 int rc;
a322f70c 3749
955e9ea1
DW
3750 if (!ddf)
3751 return 1;
3752
a322f70c
DW
3753 if (!get_dev_size(fd, NULL, &dsize))
3754 return 1;
3755
dbf98368 3756 if (ddf->dlist || ddf->conflist) {
3757 struct stat sta;
3758 struct dl *dl;
3759 int ofd, ret;
3760
3761 if (fstat(fd, &sta) == -1 || !S_ISBLK(sta.st_mode)) {
3762 pr_err("%s: file descriptor for invalid device\n",
3763 __func__);
3764 return 1;
3765 }
3766 for (dl = ddf->dlist; dl; dl = dl->next)
3767 if (dl->major == (int)major(sta.st_rdev) &&
3768 dl->minor == (int)minor(sta.st_rdev))
3769 break;
3770 if (!dl) {
3771 pr_err("%s: couldn't find disk %d/%d\n", __func__,
3772 (int)major(sta.st_rdev),
3773 (int)minor(sta.st_rdev));
3774 return 1;
3775 }
dbf98368 3776 ofd = dl->fd;
3777 dl->fd = fd;
9bf38704 3778 ret = (_write_super_to_disk(ddf, dl) != 1);
dbf98368 3779 dl->fd = ofd;
3780 return ret;
3781 }
3782
3d2c4fc7
DW
3783 if (posix_memalign(&buf, 512, 512) != 0)
3784 return 1;
6416d527
NB
3785 memset(buf, 0, 512);
3786
a322f70c 3787 lseek64(fd, dsize-512, 0);
3d2c4fc7 3788 rc = write(fd, buf, 512);
6416d527 3789 free(buf);
3d2c4fc7
DW
3790 if (rc < 0)
3791 return 1;
a322f70c
DW
3792 return 0;
3793}
3794
a19c88b8
NB
3795static int compare_super_ddf(struct supertype *st, struct supertype *tst)
3796{
3797 /*
3798 * return:
3799 * 0 same, or first was empty, and second was copied
3800 * 1 second had wrong number
3801 * 2 wrong uuid
3802 * 3 wrong other info
3803 */
3804 struct ddf_super *first = st->sb;
3805 struct ddf_super *second = tst->sb;
4eefd651 3806 struct dl *dl1, *dl2;
3807 struct vcl *vl1, *vl2;
2d210697 3808 unsigned int max_vds, max_pds, pd, vd;
a19c88b8
NB
3809
3810 if (!first) {
3811 st->sb = tst->sb;
3812 tst->sb = NULL;
3813 return 0;
3814 }
3815
3816 if (memcmp(first->anchor.guid, second->anchor.guid, DDF_GUID_LEN) != 0)
3817 return 2;
3818
60931cf9 3819 if (!be32_eq(first->anchor.seq, second->anchor.seq)) {
2d210697 3820 dprintf("%s: sequence number mismatch %u/%u\n", __func__,
60931cf9 3821 be32_to_cpu(first->anchor.seq),
3822 be32_to_cpu(second->anchor.seq));
2d210697 3823 return 3;
3824 }
3825 if (first->max_part != second->max_part ||
3826 first->phys->used_pdes != second->phys->used_pdes ||
3827 first->virt->populated_vdes != second->virt->populated_vdes) {
3828 dprintf("%s: PD/VD number mismatch\n", __func__);
3829 return 3;
3830 }
3831
3832 max_pds = __be16_to_cpu(first->phys->used_pdes);
3833 for (dl2 = second->dlist; dl2; dl2 = dl2->next) {
3834 for (pd = 0; pd < max_pds; pd++)
60931cf9 3835 if (be32_eq(first->phys->entries[pd].refnum,
3836 dl2->disk.refnum))
2d210697 3837 break;
3838 if (pd == max_pds) {
3839 dprintf("%s: no match for disk %08x\n", __func__,
60931cf9 3840 be32_to_cpu(dl2->disk.refnum));
2d210697 3841 return 3;
3842 }
3843 }
3844
3845 max_vds = __be16_to_cpu(first->active->max_vd_entries);
3846 for (vl2 = second->conflist; vl2; vl2 = vl2->next) {
60931cf9 3847 if (!be32_eq(vl2->conf.magic, DDF_VD_CONF_MAGIC))
2d210697 3848 continue;
3849 for (vd = 0; vd < max_vds; vd++)
3850 if (!memcmp(first->virt->entries[vd].guid,
3851 vl2->conf.guid, DDF_GUID_LEN))
3852 break;
3853 if (vd == max_vds) {
3854 dprintf("%s: no match for VD config\n", __func__);
3855 return 3;
3856 }
3857 }
a19c88b8 3858 /* FIXME should I look at anything else? */
2d210697 3859
4eefd651 3860 /*
3861 At this point we are fairly sure that the meta data matches.
3862 But the new disk may contain additional local data.
3863 Add it to the super block.
3864 */
3865 for (vl2 = second->conflist; vl2; vl2 = vl2->next) {
3866 for (vl1 = first->conflist; vl1; vl1 = vl1->next)
3867 if (!memcmp(vl1->conf.guid, vl2->conf.guid,
3868 DDF_GUID_LEN))
3869 break;
3870 if (vl1) {
3871 if (vl1->other_bvds != NULL &&
3872 vl1->conf.sec_elmnt_seq !=
3873 vl2->conf.sec_elmnt_seq) {
3874 dprintf("%s: adding BVD %u\n", __func__,
3875 vl2->conf.sec_elmnt_seq);
3876 add_other_bvd(vl1, &vl2->conf,
3877 first->conf_rec_len*512);
3878 }
3879 continue;
3880 }
3881
3882 if (posix_memalign((void **)&vl1, 512,
3883 (first->conf_rec_len*512 +
3884 offsetof(struct vcl, conf))) != 0) {
3885 pr_err("%s could not allocate vcl buf\n",
3886 __func__);
3887 return 3;
3888 }
3889
3890 vl1->next = first->conflist;
3891 vl1->block_sizes = NULL;
4eefd651 3892 memcpy(&vl1->conf, &vl2->conf, first->conf_rec_len*512);
3c48f7be 3893 if (alloc_other_bvds(first, vl1) != 0) {
3894 pr_err("%s could not allocate other bvds\n",
3895 __func__);
3896 free(vl1);
3897 return 3;
3898 }
4eefd651 3899 for (vd = 0; vd < max_vds; vd++)
3900 if (!memcmp(first->virt->entries[vd].guid,
3901 vl1->conf.guid, DDF_GUID_LEN))
3902 break;
3903 vl1->vcnum = vd;
3904 dprintf("%s: added config for VD %u\n", __func__, vl1->vcnum);
3905 first->conflist = vl1;
3906 }
3907
3908 for (dl2 = second->dlist; dl2; dl2 = dl2->next) {
3909 for (dl1 = first->dlist; dl1; dl1 = dl1->next)
60931cf9 3910 if (be32_eq(dl1->disk.refnum, dl2->disk.refnum))
4eefd651 3911 break;
3912 if (dl1)
3913 continue;
3914
3915 if (posix_memalign((void **)&dl1, 512,
3916 sizeof(*dl1) + (first->max_part) * sizeof(dl1->vlist[0]))
3917 != 0) {
3918 pr_err("%s could not allocate disk info buffer\n",
3919 __func__);
3920 return 3;
3921 }
3922 memcpy(dl1, dl2, sizeof(*dl1));
3923 dl1->mdupdate = NULL;
3924 dl1->next = first->dlist;
3925 dl1->fd = -1;
3926 for (pd = 0; pd < max_pds; pd++)
60931cf9 3927 if (be32_eq(first->phys->entries[pd].refnum,
3928 dl1->disk.refnum))
4eefd651 3929 break;
3930 dl1->pdnum = pd;
3931 if (dl2->spare) {
3932 if (posix_memalign((void **)&dl1->spare, 512,
3933 first->conf_rec_len*512) != 0) {
3934 pr_err("%s could not allocate spare info buf\n",
3935 __func__);
3936 return 3;
3937 }
3938 memcpy(dl1->spare, dl2->spare, first->conf_rec_len*512);
3939 }
3940 for (vd = 0 ; vd < first->max_part ; vd++) {
3941 if (!dl2->vlist[vd]) {
3942 dl1->vlist[vd] = NULL;
3943 continue;
3944 }
3945 for (vl1 = first->conflist; vl1; vl1 = vl1->next) {
3946 if (!memcmp(vl1->conf.guid,
3947 dl2->vlist[vd]->conf.guid,
3948 DDF_GUID_LEN))
3949 break;
3950 dl1->vlist[vd] = vl1;
3951 }
3952 }
3953 first->dlist = dl1;
3954 dprintf("%s: added disk %d: %08x\n", __func__, dl1->pdnum,
60931cf9 3955 be32_to_cpu(dl1->disk.refnum));
4eefd651 3956 }
3957
a19c88b8
NB
3958 return 0;
3959}
3960
0e600426 3961#ifndef MDASSEMBLE
4e5528c6
NB
3962/*
3963 * A new array 'a' has been started which claims to be instance 'inst'
3964 * within container 'c'.
3965 * We need to confirm that the array matches the metadata in 'c' so
3966 * that we don't corrupt any metadata.
3967 */
cba0191b 3968static int ddf_open_new(struct supertype *c, struct active_array *a, char *inst)
549e9569 3969{
a2aa439e 3970 struct ddf_super *ddf = c->sb;
3971 int n = atoi(inst);
fb9d0acb 3972 if (all_ff(ddf->virt->entries[n].guid)) {
3973 pr_err("%s: subarray %d doesn't exist\n", __func__, n);
a2aa439e 3974 return -ENODEV;
3975 }
3976 dprintf("ddf: open_new %d\n", n);
3977 a->info.container_member = n;
549e9569
NB
3978 return 0;
3979}
3980
4e5528c6
NB
3981/*
3982 * The array 'a' is to be marked clean in the metadata.
ed9d66aa 3983 * If '->resync_start' is not ~(unsigned long long)0, then the array is only
4e5528c6
NB
3984 * clean up to the point (in sectors). If that cannot be recorded in the
3985 * metadata, then leave it as dirty.
3986 *
3987 * For DDF, we need to clear the DDF_state_inconsistent bit in the
3988 * !global! virtual_disk.virtual_entry structure.
3989 */
01f157d7 3990static int ddf_set_array_state(struct active_array *a, int consistent)
549e9569 3991{
4e5528c6
NB
3992 struct ddf_super *ddf = a->container->sb;
3993 int inst = a->info.container_member;
18a2f463 3994 int old = ddf->virt->entries[inst].state;
01f157d7
N
3995 if (consistent == 2) {
3996 /* Should check if a recovery should be started FIXME */
3997 consistent = 1;
b7941fd6 3998 if (!is_resync_complete(&a->info))
01f157d7
N
3999 consistent = 0;
4000 }
ed9d66aa
NB
4001 if (consistent)
4002 ddf->virt->entries[inst].state &= ~DDF_state_inconsistent;
4003 else
4e5528c6 4004 ddf->virt->entries[inst].state |= DDF_state_inconsistent;
18a2f463 4005 if (old != ddf->virt->entries[inst].state)
7d5a7ff3 4006 ddf_set_updates_pending(ddf);
18a2f463
NB
4007
4008 old = ddf->virt->entries[inst].init_state;
ed9d66aa 4009 ddf->virt->entries[inst].init_state &= ~DDF_initstate_mask;
b7941fd6 4010 if (is_resync_complete(&a->info))
ed9d66aa 4011 ddf->virt->entries[inst].init_state |= DDF_init_full;
b7941fd6 4012 else if (a->info.resync_start == 0)
ed9d66aa 4013 ddf->virt->entries[inst].init_state |= DDF_init_not;
4e5528c6 4014 else
ed9d66aa 4015 ddf->virt->entries[inst].init_state |= DDF_init_quick;
18a2f463 4016 if (old != ddf->virt->entries[inst].init_state)
7d5a7ff3 4017 ddf_set_updates_pending(ddf);
ed9d66aa 4018
b27336a2 4019 dprintf("ddf mark %d/%s (%d) %s %llu\n", inst,
4020 guid_str(ddf->virt->entries[inst].guid), a->curr_state,
4021 consistent?"clean":"dirty",
b7941fd6 4022 a->info.resync_start);
01f157d7 4023 return consistent;
fd7cde1b
DW
4024}
4025
5ec636b7 4026static int get_bvd_state(const struct ddf_super *ddf,
4027 const struct vd_config *vc)
4028{
4029 unsigned int i, n_bvd, working = 0;
4030 unsigned int n_prim = __be16_to_cpu(vc->prim_elmnt_count);
4031 int pd, st, state;
4032 for (i = 0; i < n_prim; i++) {
4033 if (!find_index_in_bvd(ddf, vc, i, &n_bvd))
4034 continue;
4035 pd = find_phys(ddf, vc->phys_refnum[n_bvd]);
4036 if (pd < 0)
4037 continue;
4038 st = __be16_to_cpu(ddf->phys->entries[pd].state);
4039 if ((st & (DDF_Online|DDF_Failed|DDF_Rebuilding))
4040 == DDF_Online)
4041 working++;
4042 }
4043
4044 state = DDF_state_degraded;
4045 if (working == n_prim)
4046 state = DDF_state_optimal;
4047 else
4048 switch (vc->prl) {
4049 case DDF_RAID0:
4050 case DDF_CONCAT:
4051 case DDF_JBOD:
4052 state = DDF_state_failed;
4053 break;
4054 case DDF_RAID1:
4055 if (working == 0)
4056 state = DDF_state_failed;
4057 else if (working >= 2)
4058 state = DDF_state_part_optimal;
4059 break;
4060 case DDF_RAID4:
4061 case DDF_RAID5:
4062 if (working < n_prim - 1)
4063 state = DDF_state_failed;
4064 break;
4065 case DDF_RAID6:
4066 if (working < n_prim - 2)
4067 state = DDF_state_failed;
4068 else if (working == n_prim - 1)
4069 state = DDF_state_part_optimal;
4070 break;
4071 }
4072 return state;
4073}
4074
0777d17d 4075static int secondary_state(int state, int other, int seclevel)
4076{
4077 if (state == DDF_state_optimal && other == DDF_state_optimal)
4078 return DDF_state_optimal;
4079 if (seclevel == DDF_2MIRRORED) {
4080 if (state == DDF_state_optimal || other == DDF_state_optimal)
4081 return DDF_state_part_optimal;
4082 if (state == DDF_state_failed && other == DDF_state_failed)
4083 return DDF_state_failed;
4084 return DDF_state_degraded;
4085 } else {
4086 if (state == DDF_state_failed || other == DDF_state_failed)
4087 return DDF_state_failed;
4088 if (state == DDF_state_degraded || other == DDF_state_degraded)
4089 return DDF_state_degraded;
4090 return DDF_state_part_optimal;
4091 }
4092}
4093
4094static int get_svd_state(const struct ddf_super *ddf, const struct vcl *vcl)
4095{
4096 int state = get_bvd_state(ddf, &vcl->conf);
4097 unsigned int i;
4098 for (i = 1; i < vcl->conf.sec_elmnt_count; i++) {
4099 state = secondary_state(
4100 state,
4101 get_bvd_state(ddf, vcl->other_bvds[i-1]),
4102 vcl->conf.srl);
4103 }
4104 return state;
4105}
4106
7a7cc504
NB
4107/*
4108 * The state of each disk is stored in the global phys_disk structure
4109 * in phys_disk.entries[n].state.
4110 * This makes various combinations awkward.
4111 * - When a device fails in any array, it must be failed in all arrays
4112 * that include a part of this device.
4113 * - When a component is rebuilding, we cannot include it officially in the
4114 * array unless this is the only array that uses the device.
4115 *
4116 * So: when transitioning:
4117 * Online -> failed, just set failed flag. monitor will propagate
4118 * spare -> online, the device might need to be added to the array.
4119 * spare -> failed, just set failed. Don't worry if in array or not.
4120 */
8d45d196 4121static void ddf_set_disk(struct active_array *a, int n, int state)
549e9569 4122{
7a7cc504 4123 struct ddf_super *ddf = a->container->sb;
baba3f4e 4124 unsigned int inst = a->info.container_member, n_bvd;
4125 struct vcl *vcl;
4126 struct vd_config *vc = find_vdcr(ddf, inst, (unsigned int)n,
4127 &n_bvd, &vcl);
4128 int pd;
e1316fab
N
4129 struct mdinfo *mdi;
4130 struct dl *dl;
7a7cc504
NB
4131
4132 if (vc == NULL) {
2c514b71 4133 dprintf("ddf: cannot find instance %d!!\n", inst);
7a7cc504
NB
4134 return;
4135 }
e1316fab
N
4136 /* Find the matching slot in 'info'. */
4137 for (mdi = a->info.devs; mdi; mdi = mdi->next)
4138 if (mdi->disk.raid_disk == n)
4139 break;
4140 if (!mdi)
4141 return;
4142
4143 /* and find the 'dl' entry corresponding to that. */
4144 for (dl = ddf->dlist; dl; dl = dl->next)
77632af9
N
4145 if (mdi->state_fd >= 0 &&
4146 mdi->disk.major == dl->major &&
e1316fab
N
4147 mdi->disk.minor == dl->minor)
4148 break;
4149 if (!dl)
4150 return;
4151
baba3f4e 4152 pd = find_phys(ddf, vc->phys_refnum[n_bvd]);
e1316fab
N
4153 if (pd < 0 || pd != dl->pdnum) {
4154 /* disk doesn't currently exist or has changed.
4155 * If it is now in_sync, insert it. */
baba3f4e 4156 dprintf("%s: phys disk not found for %d: %d/%d ref %08x\n",
4157 __func__, dl->pdnum, dl->major, dl->minor,
60931cf9 4158 be32_to_cpu(dl->disk.refnum));
baba3f4e 4159 dprintf("%s: array %u disk %u ref %08x pd %d\n",
60931cf9 4160 __func__, inst, n_bvd,
4161 be32_to_cpu(vc->phys_refnum[n_bvd]), pd);
7a7cc504 4162 if ((state & DS_INSYNC) && ! (state & DS_FAULTY)) {
baba3f4e 4163 pd = dl->pdnum; /* FIXME: is this really correct ? */
4164 vc->phys_refnum[n_bvd] = dl->disk.refnum;
57a66662 4165 LBA_OFFSET(ddf, vc)[n_bvd] =
4166 __cpu_to_be64(mdi->data_offset);
e1316fab
N
4167 ddf->phys->entries[pd].type &=
4168 ~__cpu_to_be16(DDF_Global_Spare);
4169 ddf->phys->entries[pd].type |=
4170 __cpu_to_be16(DDF_Active_in_VD);
7d5a7ff3 4171 ddf_set_updates_pending(ddf);
7a7cc504
NB
4172 }
4173 } else {
18a2f463 4174 int old = ddf->phys->entries[pd].state;
7a7cc504
NB
4175 if (state & DS_FAULTY)
4176 ddf->phys->entries[pd].state |= __cpu_to_be16(DDF_Failed);
4177 if (state & DS_INSYNC) {
4178 ddf->phys->entries[pd].state |= __cpu_to_be16(DDF_Online);
4179 ddf->phys->entries[pd].state &= __cpu_to_be16(~DDF_Rebuilding);
4180 }
18a2f463 4181 if (old != ddf->phys->entries[pd].state)
7d5a7ff3 4182 ddf_set_updates_pending(ddf);
7a7cc504
NB
4183 }
4184
2c514b71 4185 dprintf("ddf: set_disk %d to %x\n", n, state);
7e1432fb 4186
7a7cc504
NB
4187 /* Now we need to check the state of the array and update
4188 * virtual_disk.entries[n].state.
4189 * It needs to be one of "optimal", "degraded", "failed".
4190 * I don't understand 'deleted' or 'missing'.
4191 */
0777d17d 4192 state = get_svd_state(ddf, vcl);
7a7cc504 4193
18a2f463
NB
4194 if (ddf->virt->entries[inst].state !=
4195 ((ddf->virt->entries[inst].state & ~DDF_state_mask)
4196 | state)) {
4197
4198 ddf->virt->entries[inst].state =
4199 (ddf->virt->entries[inst].state & ~DDF_state_mask)
4200 | state;
7d5a7ff3 4201 ddf_set_updates_pending(ddf);
18a2f463 4202 }
7a7cc504 4203
549e9569
NB
4204}
4205
2e735d19 4206static void ddf_sync_metadata(struct supertype *st)
549e9569 4207{
7a7cc504
NB
4208
4209 /*
4210 * Write all data to all devices.
4211 * Later, we might be able to track whether only local changes
4212 * have been made, or whether any global data has been changed,
4213 * but ddf is sufficiently weird that it probably always
4214 * changes global data ....
4215 */
18a2f463
NB
4216 struct ddf_super *ddf = st->sb;
4217 if (!ddf->updates_pending)
4218 return;
4219 ddf->updates_pending = 0;
1cc7f4fe 4220 __write_init_super_ddf(st);
2c514b71 4221 dprintf("ddf: sync_metadata\n");
549e9569
NB
4222}
4223
f646805e 4224static int del_from_conflist(struct vcl **list, const char *guid)
4225{
4226 struct vcl **p;
4227 int found = 0;
4228 for (p = list; p && *p; p = &((*p)->next))
4229 if (!memcmp((*p)->conf.guid, guid, DDF_GUID_LEN)) {
4230 found = 1;
4231 *p = (*p)->next;
4232 }
4233 return found;
4234}
4235
4236static int _kill_subarray_ddf(struct ddf_super *ddf, const char *guid)
4237{
4238 struct dl *dl;
4239 unsigned int vdnum, i;
4240 vdnum = find_vde_by_guid(ddf, guid);
4241 if (vdnum == DDF_NOTFOUND) {
4242 pr_err("%s: could not find VD %s\n", __func__,
4243 guid_str(guid));
4244 return -1;
4245 }
4246 if (del_from_conflist(&ddf->conflist, guid) == 0) {
4247 pr_err("%s: could not find conf %s\n", __func__,
4248 guid_str(guid));
4249 return -1;
4250 }
4251 for (dl = ddf->dlist; dl; dl = dl->next)
4252 for (i = 0; i < ddf->max_part; i++)
4253 if (dl->vlist[i] != NULL &&
4254 !memcmp(dl->vlist[i]->conf.guid, guid,
4255 DDF_GUID_LEN))
4256 dl->vlist[i] = NULL;
4257 memset(ddf->virt->entries[vdnum].guid, 0xff, DDF_GUID_LEN);
4258 dprintf("%s: deleted %s\n", __func__, guid_str(guid));
4259 return 0;
4260}
4261
4262static int kill_subarray_ddf(struct supertype *st)
4263{
4264 struct ddf_super *ddf = st->sb;
4265 /*
4266 * currentconf is set in container_content_ddf,
4267 * called with subarray arg
4268 */
4269 struct vcl *victim = ddf->currentconf;
4270 struct vd_config *conf;
4271 ddf->currentconf = NULL;
4272 unsigned int vdnum;
4273 if (!victim) {
4274 pr_err("%s: nothing to kill\n", __func__);
4275 return -1;
4276 }
4277 conf = &victim->conf;
4278 vdnum = find_vde_by_guid(ddf, conf->guid);
4279 if (vdnum == DDF_NOTFOUND) {
4280 pr_err("%s: could not find VD %s\n", __func__,
4281 guid_str(conf->guid));
4282 return -1;
4283 }
4284 if (st->update_tail) {
4285 struct virtual_disk *vd;
4286 int len = sizeof(struct virtual_disk)
4287 + sizeof(struct virtual_entry);
4288 vd = xmalloc(len);
4289 if (vd == NULL) {
4290 pr_err("%s: failed to allocate %d bytes\n", __func__,
4291 len);
4292 return -1;
4293 }
4294 memset(vd, 0 , len);
4295 vd->magic = DDF_VIRT_RECORDS_MAGIC;
4296 vd->populated_vdes = 0;
4297 memcpy(vd->entries[0].guid, conf->guid, DDF_GUID_LEN);
4298 /* we use DDF_state_deleted as marker */
4299 vd->entries[0].state = DDF_state_deleted;
4300 append_metadata_update(st, vd, len);
6a350d82 4301 } else {
f646805e 4302 _kill_subarray_ddf(ddf, conf->guid);
6a350d82 4303 ddf_set_updates_pending(ddf);
4304 ddf_sync_metadata(st);
4305 }
f646805e 4306 return 0;
4307}
4308
c5943560 4309static void copy_matching_bvd(struct ddf_super *ddf,
4310 struct vd_config *conf,
4311 const struct metadata_update *update)
4312{
4313 unsigned int mppe =
4314 __be16_to_cpu(ddf->anchor.max_primary_element_entries);
4315 unsigned int len = ddf->conf_rec_len * 512;
4316 char *p;
4317 struct vd_config *vc;
4318 for (p = update->buf; p < update->buf + update->len; p += len) {
4319 vc = (struct vd_config *) p;
4320 if (vc->sec_elmnt_seq == conf->sec_elmnt_seq) {
4321 memcpy(conf->phys_refnum, vc->phys_refnum,
4322 mppe * (sizeof(__u32) + sizeof(__u64)));
4323 return;
4324 }
4325 }
4326 pr_err("%s: no match for BVD %d of %s in update\n", __func__,
4327 conf->sec_elmnt_seq, guid_str(conf->guid));
4328}
4329
88c164f4
NB
4330static void ddf_process_update(struct supertype *st,
4331 struct metadata_update *update)
4332{
4333 /* Apply this update to the metadata.
4334 * The first 4 bytes are a DDF_*_MAGIC which guides
4335 * our actions.
4336 * Possible update are:
4337 * DDF_PHYS_RECORDS_MAGIC
4dd968cc
N
4338 * Add a new physical device or remove an old one.
4339 * Changes to this record only happen implicitly.
88c164f4
NB
4340 * used_pdes is the device number.
4341 * DDF_VIRT_RECORDS_MAGIC
4342 * Add a new VD. Possibly also change the 'access' bits.
4343 * populated_vdes is the entry number.
4344 * DDF_VD_CONF_MAGIC
4345 * New or updated VD. the VIRT_RECORD must already
4346 * exist. For an update, phys_refnum and lba_offset
4347 * (at least) are updated, and the VD_CONF must
4348 * be written to precisely those devices listed with
4349 * a phys_refnum.
4350 * DDF_SPARE_ASSIGN_MAGIC
4351 * replacement Spare Assignment Record... but for which device?
4352 *
4353 * So, e.g.:
4354 * - to create a new array, we send a VIRT_RECORD and
4355 * a VD_CONF. Then assemble and start the array.
4356 * - to activate a spare we send a VD_CONF to add the phys_refnum
4357 * and offset. This will also mark the spare as active with
4358 * a spare-assignment record.
4359 */
4360 struct ddf_super *ddf = st->sb;
60931cf9 4361 be32 *magic = (be32 *)update->buf;
88c164f4
NB
4362 struct phys_disk *pd;
4363 struct virtual_disk *vd;
4364 struct vd_config *vc;
4365 struct vcl *vcl;
4366 struct dl *dl;
f21e18ca 4367 unsigned int ent;
c5943560 4368 unsigned int pdnum, pd2, len;
88c164f4 4369
60931cf9 4370 dprintf("Process update %x\n", be32_to_cpu(*magic));
7e1432fb 4371
60931cf9 4372 if (be32_eq(*magic, DDF_PHYS_RECORDS_MAGIC)) {
88c164f4
NB
4373
4374 if (update->len != (sizeof(struct phys_disk) +
4375 sizeof(struct phys_disk_entry)))
4376 return;
4377 pd = (struct phys_disk*)update->buf;
4378
4379 ent = __be16_to_cpu(pd->used_pdes);
4380 if (ent >= __be16_to_cpu(ddf->phys->max_pdes))
4381 return;
4dd968cc
N
4382 if (pd->entries[0].state & __cpu_to_be16(DDF_Missing)) {
4383 struct dl **dlp;
4384 /* removing this disk. */
4385 ddf->phys->entries[ent].state |= __cpu_to_be16(DDF_Missing);
4386 for (dlp = &ddf->dlist; *dlp; dlp = &(*dlp)->next) {
4387 struct dl *dl = *dlp;
4388 if (dl->pdnum == (signed)ent) {
4389 close(dl->fd);
4390 dl->fd = -1;
4391 /* FIXME this doesn't free
4392 * dl->devname */
4393 update->space = dl;
4394 *dlp = dl->next;
4395 break;
4396 }
4397 }
7d5a7ff3 4398 ddf_set_updates_pending(ddf);
4dd968cc
N
4399 return;
4400 }
88c164f4
NB
4401 if (!all_ff(ddf->phys->entries[ent].guid))
4402 return;
4403 ddf->phys->entries[ent] = pd->entries[0];
4404 ddf->phys->used_pdes = __cpu_to_be16(1 +
613b0d17 4405 __be16_to_cpu(ddf->phys->used_pdes));
7d5a7ff3 4406 ddf_set_updates_pending(ddf);
2cc2983d
N
4407 if (ddf->add_list) {
4408 struct active_array *a;
4409 struct dl *al = ddf->add_list;
4410 ddf->add_list = al->next;
4411
4412 al->next = ddf->dlist;
4413 ddf->dlist = al;
4414
4415 /* As a device has been added, we should check
4416 * for any degraded devices that might make
4417 * use of this spare */
4418 for (a = st->arrays ; a; a=a->next)
4419 a->check_degraded = 1;
4420 }
60931cf9 4421 } else if (be32_eq(*magic, DDF_VIRT_RECORDS_MAGIC)) {
88c164f4
NB
4422
4423 if (update->len != (sizeof(struct virtual_disk) +
4424 sizeof(struct virtual_entry)))
4425 return;
4426 vd = (struct virtual_disk*)update->buf;
4427
f646805e 4428 if (vd->entries[0].state == DDF_state_deleted) {
4429 if (_kill_subarray_ddf(ddf, vd->entries[0].guid))
4430 return;
4431 } else {
4432
6a7e7ecc 4433 ent = find_vde_by_guid(ddf, vd->entries[0].guid);
4434 if (ent != DDF_NOTFOUND) {
4435 dprintf("%s: VD %s exists already in slot %d\n",
4436 __func__, guid_str(vd->entries[0].guid),
4437 ent);
4438 return;
4439 }
f646805e 4440 ent = find_unused_vde(ddf);
4441 if (ent == DDF_NOTFOUND)
4442 return;
4443 ddf->virt->entries[ent] = vd->entries[0];
4444 ddf->virt->populated_vdes =
4445 __cpu_to_be16(
4446 1 + __be16_to_cpu(
4447 ddf->virt->populated_vdes));
ed5ff7a2 4448 dprintf("%s: added VD %s in slot %d(s=%02x i=%02x)\n",
4449 __func__, guid_str(vd->entries[0].guid), ent,
4450 ddf->virt->entries[ent].state,
4451 ddf->virt->entries[ent].init_state);
f646805e 4452 }
7d5a7ff3 4453 ddf_set_updates_pending(ddf);
60931cf9 4454 }
88c164f4 4455
60931cf9 4456 else if (be32_eq(*magic, DDF_VD_CONF_MAGIC)) {
88c164f4 4457 vc = (struct vd_config*)update->buf;
c5943560 4458 len = ddf->conf_rec_len * 512;
4459 if ((unsigned int)update->len != len * vc->sec_elmnt_count) {
4460 pr_err("%s: %s: insufficient data (%d) for %u BVDs\n",
4461 __func__, guid_str(vc->guid), update->len,
4462 vc->sec_elmnt_count);
4463 return;
4464 }
88c164f4
NB
4465 for (vcl = ddf->conflist; vcl ; vcl = vcl->next)
4466 if (memcmp(vcl->conf.guid, vc->guid, DDF_GUID_LEN) == 0)
4467 break;
ed5ff7a2 4468 dprintf("%s: conf update for %s (%s)\n", __func__,
4469 guid_str(vc->guid), (vcl ? "old" : "new"));
88c164f4
NB
4470 if (vcl) {
4471 /* An update, just copy the phys_refnum and lba_offset
4472 * fields
4473 */
c5943560 4474 unsigned int i;
4475 copy_matching_bvd(ddf, &vcl->conf, update);
4476 for (i = 1; i < vc->sec_elmnt_count; i++)
4477 copy_matching_bvd(ddf, vcl->other_bvds[i-1],
4478 update);
88c164f4
NB
4479 } else {
4480 /* A new VD_CONF */
c5943560 4481 unsigned int i;
e6b9548d
DW
4482 if (!update->space)
4483 return;
88c164f4
NB
4484 vcl = update->space;
4485 update->space = NULL;
4486 vcl->next = ddf->conflist;
c5943560 4487 memcpy(&vcl->conf, vc, len);
fb9d0acb 4488 ent = find_vde_by_guid(ddf, vc->guid);
4489 if (ent == DDF_NOTFOUND)
4490 return;
4491 vcl->vcnum = ent;
88c164f4 4492 ddf->conflist = vcl;
c5943560 4493 for (i = 1; i < vc->sec_elmnt_count; i++)
4494 memcpy(vcl->other_bvds[i-1],
4495 update->buf + len * i, len);
88c164f4 4496 }
c7079c84
N
4497 /* Set DDF_Transition on all Failed devices - to help
4498 * us detect those that are no longer in use
4499 */
4500 for (pdnum = 0; pdnum < __be16_to_cpu(ddf->phys->used_pdes); pdnum++)
4501 if (ddf->phys->entries[pdnum].state
4502 & __be16_to_cpu(DDF_Failed))
4503 ddf->phys->entries[pdnum].state
4504 |= __be16_to_cpu(DDF_Transition);
88c164f4
NB
4505 /* Now make sure vlist is correct for each dl. */
4506 for (dl = ddf->dlist; dl; dl = dl->next) {
f21e18ca 4507 unsigned int vn = 0;
8401644c 4508 int in_degraded = 0;
5838fccd 4509 for (vcl = ddf->conflist; vcl ; vcl = vcl->next) {
4510 unsigned int dn, ibvd;
4511 const struct vd_config *conf;
4512 int vstate;
4513 dn = get_pd_index_from_refnum(vcl,
4514 dl->disk.refnum,
4515 ddf->mppe,
4516 &conf, &ibvd);
4517 if (dn == DDF_NOTFOUND)
4518 continue;
4519 dprintf("dev %d/%08x has %s (sec=%u) at %d\n",
ad60eea1 4520 dl->pdnum,
60931cf9 4521 be32_to_cpu(dl->disk.refnum),
5838fccd 4522 guid_str(conf->guid),
4523 conf->sec_elmnt_seq, vn);
4524 /* Clear the Transition flag */
4525 if (ddf->phys->entries[dl->pdnum].state
4526 & __be16_to_cpu(DDF_Failed))
4527 ddf->phys->entries[dl->pdnum].state &=
4528 ~__be16_to_cpu(DDF_Transition);
4529 dl->vlist[vn++] = vcl;
4530 vstate = ddf->virt->entries[vcl->vcnum].state
4531 & DDF_state_mask;
4532 if (vstate == DDF_state_degraded ||
4533 vstate == DDF_state_part_optimal)
4534 in_degraded = 1;
4535 }
88c164f4
NB
4536 while (vn < ddf->max_part)
4537 dl->vlist[vn++] = NULL;
7e1432fb
NB
4538 if (dl->vlist[0]) {
4539 ddf->phys->entries[dl->pdnum].type &=
4540 ~__cpu_to_be16(DDF_Global_Spare);
8401644c
N
4541 if (!(ddf->phys->entries[dl->pdnum].type &
4542 __cpu_to_be16(DDF_Active_in_VD))) {
613b0d17
N
4543 ddf->phys->entries[dl->pdnum].type |=
4544 __cpu_to_be16(DDF_Active_in_VD);
4545 if (in_degraded)
4546 ddf->phys->entries[dl->pdnum].state |=
4547 __cpu_to_be16(DDF_Rebuilding);
4548 }
7e1432fb
NB
4549 }
4550 if (dl->spare) {
4551 ddf->phys->entries[dl->pdnum].type &=
4552 ~__cpu_to_be16(DDF_Global_Spare);
4553 ddf->phys->entries[dl->pdnum].type |=
4554 __cpu_to_be16(DDF_Spare);
4555 }
4556 if (!dl->vlist[0] && !dl->spare) {
4557 ddf->phys->entries[dl->pdnum].type |=
4558 __cpu_to_be16(DDF_Global_Spare);
4559 ddf->phys->entries[dl->pdnum].type &=
4560 ~__cpu_to_be16(DDF_Spare |
4561 DDF_Active_in_VD);
4562 }
88c164f4 4563 }
c7079c84
N
4564
4565 /* Now remove any 'Failed' devices that are not part
4566 * of any VD. They will have the Transition flag set.
4567 * Once done, we need to update all dl->pdnum numbers.
4568 */
4569 pd2 = 0;
4570 for (pdnum = 0; pdnum < __be16_to_cpu(ddf->phys->used_pdes); pdnum++)
4571 if ((ddf->phys->entries[pdnum].state
4572 & __be16_to_cpu(DDF_Failed))
4573 && (ddf->phys->entries[pdnum].state
4574 & __be16_to_cpu(DDF_Transition)))
4575 /* skip this one */;
4576 else if (pdnum == pd2)
4577 pd2++;
4578 else {
4579 ddf->phys->entries[pd2] = ddf->phys->entries[pdnum];
4580 for (dl = ddf->dlist; dl; dl = dl->next)
4581 if (dl->pdnum == (int)pdnum)
4582 dl->pdnum = pd2;
4583 pd2++;
4584 }
4585 ddf->phys->used_pdes = __cpu_to_be16(pd2);
4586 while (pd2 < pdnum) {
4587 memset(ddf->phys->entries[pd2].guid, 0xff, DDF_GUID_LEN);
4588 pd2++;
4589 }
4590
7d5a7ff3 4591 ddf_set_updates_pending(ddf);
88c164f4 4592 }
60931cf9 4593 /* case DDF_SPARE_ASSIGN_MAGIC */
88c164f4
NB
4594}
4595
edd8d13c
NB
4596static void ddf_prepare_update(struct supertype *st,
4597 struct metadata_update *update)
4598{
4599 /* This update arrived at managemon.
4600 * We are about to pass it to monitor.
4601 * If a malloc is needed, do it here.
4602 */
4603 struct ddf_super *ddf = st->sb;
60931cf9 4604 be32 *magic = (be32 *)update->buf;
4605 if (be32_eq(*magic, DDF_VD_CONF_MAGIC)) {
c5943560 4606 struct vcl *vcl;
4607 struct vd_config *conf = (struct vd_config *) update->buf;
e6b9548d 4608 if (posix_memalign(&update->space, 512,
613b0d17 4609 offsetof(struct vcl, conf)
c5943560 4610 + ddf->conf_rec_len * 512) != 0) {
4611 update->space = NULL;
4612 return;
4613 }
4614 vcl = update->space;
4615 vcl->conf.sec_elmnt_count = conf->sec_elmnt_count;
4616 if (alloc_other_bvds(ddf, vcl) != 0) {
4617 free(update->space);
e6b9548d 4618 update->space = NULL;
c5943560 4619 }
4620 }
edd8d13c
NB
4621}
4622
7e1432fb
NB
4623/*
4624 * Check if the array 'a' is degraded but not failed.
4625 * If it is, find as many spares as are available and needed and
4626 * arrange for their inclusion.
4627 * We only choose devices which are not already in the array,
4628 * and prefer those with a spare-assignment to this array.
4629 * otherwise we choose global spares - assuming always that
4630 * there is enough room.
4631 * For each spare that we assign, we return an 'mdinfo' which
4632 * describes the position for the device in the array.
4633 * We also add to 'updates' a DDF_VD_CONF_MAGIC update with
4634 * the new phys_refnum and lba_offset values.
4635 *
4636 * Only worry about BVDs at the moment.
4637 */
4638static struct mdinfo *ddf_activate_spare(struct active_array *a,
4639 struct metadata_update **updates)
4640{
4641 int working = 0;
4642 struct mdinfo *d;
4643 struct ddf_super *ddf = a->container->sb;
4644 int global_ok = 0;
4645 struct mdinfo *rv = NULL;
4646 struct mdinfo *di;
4647 struct metadata_update *mu;
4648 struct dl *dl;
4649 int i;
baba3f4e 4650 struct vcl *vcl;
7e1432fb 4651 struct vd_config *vc;
baba3f4e 4652 unsigned int n_bvd;
7e1432fb 4653
7e1432fb
NB
4654 for (d = a->info.devs ; d ; d = d->next) {
4655 if ((d->curr_state & DS_FAULTY) &&
613b0d17 4656 d->state_fd >= 0)
7e1432fb
NB
4657 /* wait for Removal to happen */
4658 return NULL;
4659 if (d->state_fd >= 0)
4660 working ++;
4661 }
4662
2c514b71
NB
4663 dprintf("ddf_activate: working=%d (%d) level=%d\n", working, a->info.array.raid_disks,
4664 a->info.array.level);
7e1432fb
NB
4665 if (working == a->info.array.raid_disks)
4666 return NULL; /* array not degraded */
4667 switch (a->info.array.level) {
4668 case 1:
4669 if (working == 0)
4670 return NULL; /* failed */
4671 break;
4672 case 4:
4673 case 5:
4674 if (working < a->info.array.raid_disks - 1)
4675 return NULL; /* failed */
4676 break;
4677 case 6:
4678 if (working < a->info.array.raid_disks - 2)
4679 return NULL; /* failed */
4680 break;
4681 default: /* concat or stripe */
4682 return NULL; /* failed */
4683 }
4684
4685 /* For each slot, if it is not working, find a spare */
4686 dl = ddf->dlist;
4687 for (i = 0; i < a->info.array.raid_disks; i++) {
4688 for (d = a->info.devs ; d ; d = d->next)
4689 if (d->disk.raid_disk == i)
4690 break;
2c514b71 4691 dprintf("found %d: %p %x\n", i, d, d?d->curr_state:0);
7e1432fb
NB
4692 if (d && (d->state_fd >= 0))
4693 continue;
4694
4695 /* OK, this device needs recovery. Find a spare */
4696 again:
4697 for ( ; dl ; dl = dl->next) {
4698 unsigned long long esize;
4699 unsigned long long pos;
4700 struct mdinfo *d2;
4701 int is_global = 0;
4702 int is_dedicated = 0;
4703 struct extent *ex;
f21e18ca 4704 unsigned int j;
7e1432fb
NB
4705 /* If in this array, skip */
4706 for (d2 = a->info.devs ; d2 ; d2 = d2->next)
7590d562
N
4707 if (d2->state_fd >= 0 &&
4708 d2->disk.major == dl->major &&
7e1432fb 4709 d2->disk.minor == dl->minor) {
2c514b71 4710 dprintf("%x:%x already in array\n", dl->major, dl->minor);
7e1432fb
NB
4711 break;
4712 }
4713 if (d2)
4714 continue;
4715 if (ddf->phys->entries[dl->pdnum].type &
4716 __cpu_to_be16(DDF_Spare)) {
4717 /* Check spare assign record */
4718 if (dl->spare) {
4719 if (dl->spare->type & DDF_spare_dedicated) {
4720 /* check spare_ents for guid */
4721 for (j = 0 ;
4722 j < __be16_to_cpu(dl->spare->populated);
4723 j++) {
4724 if (memcmp(dl->spare->spare_ents[j].guid,
4725 ddf->virt->entries[a->info.container_member].guid,
4726 DDF_GUID_LEN) == 0)
4727 is_dedicated = 1;
4728 }
4729 } else
4730 is_global = 1;
4731 }
4732 } else if (ddf->phys->entries[dl->pdnum].type &
4733 __cpu_to_be16(DDF_Global_Spare)) {
4734 is_global = 1;
e0e7aeaa
N
4735 } else if (!(ddf->phys->entries[dl->pdnum].state &
4736 __cpu_to_be16(DDF_Failed))) {
4737 /* we can possibly use some of this */
4738 is_global = 1;
7e1432fb
NB
4739 }
4740 if ( ! (is_dedicated ||
4741 (is_global && global_ok))) {
2c514b71 4742 dprintf("%x:%x not suitable: %d %d\n", dl->major, dl->minor,
613b0d17 4743 is_dedicated, is_global);
7e1432fb
NB
4744 continue;
4745 }
4746
4747 /* We are allowed to use this device - is there space?
4748 * We need a->info.component_size sectors */
4749 ex = get_extents(ddf, dl);
4750 if (!ex) {
2c514b71 4751 dprintf("cannot get extents\n");
7e1432fb
NB
4752 continue;
4753 }
4754 j = 0; pos = 0;
4755 esize = 0;
4756
4757 do {
4758 esize = ex[j].start - pos;
4759 if (esize >= a->info.component_size)
4760 break;
e5cc7d46
N
4761 pos = ex[j].start + ex[j].size;
4762 j++;
4763 } while (ex[j-1].size);
7e1432fb
NB
4764
4765 free(ex);
4766 if (esize < a->info.component_size) {
e5cc7d46
N
4767 dprintf("%x:%x has no room: %llu %llu\n",
4768 dl->major, dl->minor,
2c514b71 4769 esize, a->info.component_size);
7e1432fb
NB
4770 /* No room */
4771 continue;
4772 }
4773
4774 /* Cool, we have a device with some space at pos */
503975b9 4775 di = xcalloc(1, sizeof(*di));
7e1432fb
NB
4776 di->disk.number = i;
4777 di->disk.raid_disk = i;
4778 di->disk.major = dl->major;
4779 di->disk.minor = dl->minor;
4780 di->disk.state = 0;
d23534e4 4781 di->recovery_start = 0;
7e1432fb
NB
4782 di->data_offset = pos;
4783 di->component_size = a->info.component_size;
4784 di->container_member = dl->pdnum;
4785 di->next = rv;
4786 rv = di;
2c514b71
NB
4787 dprintf("%x:%x to be %d at %llu\n", dl->major, dl->minor,
4788 i, pos);
7e1432fb
NB
4789
4790 break;
4791 }
4792 if (!dl && ! global_ok) {
4793 /* not enough dedicated spares, try global */
4794 global_ok = 1;
4795 dl = ddf->dlist;
4796 goto again;
4797 }
4798 }
4799
4800 if (!rv)
4801 /* No spares found */
4802 return rv;
4803 /* Now 'rv' has a list of devices to return.
4804 * Create a metadata_update record to update the
4805 * phys_refnum and lba_offset values
4806 */
503975b9
N
4807 mu = xmalloc(sizeof(*mu));
4808 if (posix_memalign(&mu->space, 512, sizeof(struct vcl)) != 0) {
79244939
DW
4809 free(mu);
4810 mu = NULL;
4811 }
503975b9 4812 mu->buf = xmalloc(ddf->conf_rec_len * 512);
7590d562
N
4813 mu->len = ddf->conf_rec_len * 512;
4814 mu->space = NULL;
f50ae22e 4815 mu->space_list = NULL;
7e1432fb 4816 mu->next = *updates;
baba3f4e 4817 vc = find_vdcr(ddf, a->info.container_member, di->disk.raid_disk,
4818 &n_bvd, &vcl);
7e1432fb
NB
4819 memcpy(mu->buf, vc, ddf->conf_rec_len * 512);
4820
4821 vc = (struct vd_config*)mu->buf;
7e1432fb
NB
4822 for (di = rv ; di ; di = di->next) {
4823 vc->phys_refnum[di->disk.raid_disk] =
4824 ddf->phys->entries[dl->pdnum].refnum;
57a66662 4825 LBA_OFFSET(ddf, vc)[di->disk.raid_disk]
4826 = __cpu_to_be64(di->data_offset);
7e1432fb
NB
4827 }
4828 *updates = mu;
4829 return rv;
4830}
0e600426 4831#endif /* MDASSEMBLE */
7e1432fb 4832
b640a252
N
4833static int ddf_level_to_layout(int level)
4834{
4835 switch(level) {
4836 case 0:
4837 case 1:
4838 return 0;
4839 case 5:
4840 return ALGORITHM_LEFT_SYMMETRIC;
4841 case 6:
4842 return ALGORITHM_ROTATING_N_CONTINUE;
4843 case 10:
4844 return 0x102;
4845 default:
4846 return UnSet;
4847 }
4848}
4849
30f58b22
DW
4850static void default_geometry_ddf(struct supertype *st, int *level, int *layout, int *chunk)
4851{
4852 if (level && *level == UnSet)
4853 *level = LEVEL_CONTAINER;
4854
4855 if (level && layout && *layout == UnSet)
4856 *layout = ddf_level_to_layout(*level);
4857}
4858
a322f70c
DW
4859struct superswitch super_ddf = {
4860#ifndef MDASSEMBLE
4861 .examine_super = examine_super_ddf,
4862 .brief_examine_super = brief_examine_super_ddf,
4737ae25 4863 .brief_examine_subarrays = brief_examine_subarrays_ddf,
bceedeec 4864 .export_examine_super = export_examine_super_ddf,
a322f70c
DW
4865 .detail_super = detail_super_ddf,
4866 .brief_detail_super = brief_detail_super_ddf,
4867 .validate_geometry = validate_geometry_ddf,
78e44928 4868 .write_init_super = write_init_super_ddf,
0e600426 4869 .add_to_super = add_to_super_ddf,
4dd968cc 4870 .remove_from_super = remove_from_super_ddf,
2b959fbf 4871 .load_container = load_container_ddf,
74db60b0 4872 .copy_metadata = copy_metadata_ddf,
a322f70c
DW
4873#endif
4874 .match_home = match_home_ddf,
4875 .uuid_from_super= uuid_from_super_ddf,
4876 .getinfo_super = getinfo_super_ddf,
4877 .update_super = update_super_ddf,
4878
4879 .avail_size = avail_size_ddf,
4880
a19c88b8
NB
4881 .compare_super = compare_super_ddf,
4882
a322f70c 4883 .load_super = load_super_ddf,
ba7eb04f 4884 .init_super = init_super_ddf,
955e9ea1 4885 .store_super = store_super_ddf,
a322f70c
DW
4886 .free_super = free_super_ddf,
4887 .match_metadata_desc = match_metadata_desc_ddf,
78e44928 4888 .container_content = container_content_ddf,
30f58b22 4889 .default_geometry = default_geometry_ddf,
f646805e 4890 .kill_subarray = kill_subarray_ddf,
a322f70c 4891
a322f70c 4892 .external = 1,
549e9569 4893
0e600426 4894#ifndef MDASSEMBLE
549e9569
NB
4895/* for mdmon */
4896 .open_new = ddf_open_new,
ed9d66aa 4897 .set_array_state= ddf_set_array_state,
549e9569
NB
4898 .set_disk = ddf_set_disk,
4899 .sync_metadata = ddf_sync_metadata,
88c164f4 4900 .process_update = ddf_process_update,
edd8d13c 4901 .prepare_update = ddf_prepare_update,
7e1432fb 4902 .activate_spare = ddf_activate_spare,
0e600426 4903#endif
4cce4069 4904 .name = "ddf",
a322f70c 4905};