]> git.ipfire.org Git - thirdparty/mdadm.git/blame - platform-intel.c
imsm: support RAID 10 with more than 4 drives
[thirdparty/mdadm.git] / platform-intel.c
CommitLineData
b390f610
DW
1/*
2 * Intel(R) Matrix Storage Manager hardware and firmware support routines
3 *
4 * Copyright (C) 2008 Intel Corporation
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19#include "mdadm.h"
20#include "platform-intel.h"
21#include "probe_roms.h"
22#include <stdio.h>
23#include <stdlib.h>
24#include <string.h>
25#include <unistd.h>
26#include <dirent.h>
27#include <fcntl.h>
28#include <sys/mman.h>
29#include <sys/types.h>
30#include <sys/stat.h>
fc13853f
LM
31#include <limits.h>
32
d835518b
BK
33#define NVME_SUBSYS_PATH "/sys/devices/virtual/nvme-subsystem/"
34
191e6ddb
MK
35static bool imsm_orom_has_raid0(const struct imsm_orom *orom)
36{
37 return imsm_rlc_has_bit(orom, IMSM_OROM_RLC_RAID0);
38}
39
40static bool imsm_orom_has_raid1(const struct imsm_orom *orom)
41{
42 return imsm_rlc_has_bit(orom, IMSM_OROM_RLC_RAID1);
43}
44
45static bool imsm_orom_has_raid10(const struct imsm_orom *orom)
46{
47 return imsm_rlc_has_bit(orom, IMSM_OROM_RLC_RAID10);
48}
49
50static bool imsm_orom_has_raid5(const struct imsm_orom *orom)
51{
52 return imsm_rlc_has_bit(orom, IMSM_OROM_RLC_RAID5);
53}
54
55/* IMSM platforms do not define how many disks are allowed for each level,
56 * but there are some global limitations we need to follow.
57 */
58static bool imsm_orom_support_raid_disks_count_raid0(const int raid_disks)
59{
60 return true;
61}
62
63static bool imsm_orom_support_raid_disks_count_raid1(const int raid_disks)
64{
65 if (raid_disks == 2)
66 return true;
67 return false;
68}
69
70static bool imsm_orom_support_raid_disks_count_raid5(const int raid_disks)
71{
72 if (raid_disks > 2)
73 return true;
74 return false;
75}
76
77static bool imsm_orom_support_raid_disks_count_raid10(const int raid_disks)
78{
610fc2ee
MK
79 /* raid_disks count must be higher than 4 and even */
80 if (raid_disks >= 4 && (raid_disks & 1) == 0)
191e6ddb
MK
81 return true;
82 return false;
83}
84
85struct imsm_level_ops imsm_level_ops[] = {
86 {0, imsm_orom_has_raid0, imsm_orom_support_raid_disks_count_raid0, "raid0"},
87 {1, imsm_orom_has_raid1, imsm_orom_support_raid_disks_count_raid1, "raid1"},
88 {5, imsm_orom_has_raid5, imsm_orom_support_raid_disks_count_raid5, "raid5"},
89 {10, imsm_orom_has_raid10, imsm_orom_support_raid_disks_count_raid10, "raid10"},
90 {-1, NULL, NULL, NULL}
91};
92
9c747fa0
LM
93static int devpath_to_ll(const char *dev_path, const char *entry,
94 unsigned long long *val);
95
9bc4ae77 96static void free_sys_dev(struct sys_dev **list)
b390f610
DW
97{
98 while (*list) {
99 struct sys_dev *next = (*list)->next;
100
101 if ((*list)->path)
102 free((*list)->path);
103 free(*list);
104 *list = next;
105 }
106}
107
108struct sys_dev *find_driver_devices(const char *bus, const char *driver)
109{
110 /* search sysfs for devices driven by 'driver' */
30730cb8
AO
111 char path[PATH_MAX];
112 char link[PATH_MAX];
113 char *c, *p;
b390f610
DW
114 DIR *driver_dir;
115 struct dirent *de;
116 struct sys_dev *head = NULL;
117 struct sys_dev *list = NULL;
60f0f54d 118 struct sys_dev *vmd = NULL;
a8e5382a 119 enum sys_dev_type type;
9c747fa0 120 unsigned long long dev_id;
6b781d33 121 unsigned long long class;
a8e5382a
LM
122
123 if (strcmp(driver, "isci") == 0)
124 type = SYS_DEV_SAS;
75350d87
KF
125 else if (strcmp(driver, "ahci") == 0) {
126 vmd = find_driver_devices("pci", "vmd");
a8e5382a 127 type = SYS_DEV_SATA;
75350d87 128 } else if (strcmp(driver, "nvme") == 0) {
60f0f54d
PB
129 /* if looking for nvme devs, first look for vmd */
130 vmd = find_driver_devices("pci", "vmd");
614902f6 131 type = SYS_DEV_NVME;
60f0f54d
PB
132 } else if (strcmp(driver, "vmd") == 0)
133 type = SYS_DEV_VMD;
a8e5382a
LM
134 else
135 type = SYS_DEV_UNKNOWN;
b390f610
DW
136
137 sprintf(path, "/sys/bus/%s/drivers/%s", bus, driver);
138 driver_dir = opendir(path);
60f0f54d
PB
139 if (!driver_dir) {
140 if (vmd)
141 free_sys_dev(&vmd);
b390f610 142 return NULL;
60f0f54d 143 }
b390f610 144 for (de = readdir(driver_dir); de; de = readdir(driver_dir)) {
5a1920f2 145 int n;
60f0f54d 146 int skip = 0;
5a1920f2 147
b390f610
DW
148 /* is 'de' a device? check that the 'subsystem' link exists and
149 * that its target matches 'bus'
150 */
151 sprintf(path, "/sys/bus/%s/drivers/%s/%s/subsystem",
152 bus, driver, de->d_name);
5a1920f2 153 n = readlink(path, link, sizeof(link));
f21e18ca 154 if (n < 0 || n >= (int)sizeof(link))
b390f610 155 continue;
5a1920f2 156 link[n] = '\0';
b390f610
DW
157 c = strrchr(link, '/');
158 if (!c)
159 continue;
160 if (strncmp(bus, c+1, strlen(bus)) != 0)
161 continue;
162
a8e5382a
LM
163 sprintf(path, "/sys/bus/%s/drivers/%s/%s",
164 bus, driver, de->d_name);
165
60f0f54d
PB
166 /* if searching for nvme - skip vmd connected one */
167 if (type == SYS_DEV_NVME) {
168 struct sys_dev *dev;
169 char *rp = realpath(path, NULL);
170 for (dev = vmd; dev; dev = dev->next) {
171 if ((strncmp(dev->path, rp, strlen(dev->path)) == 0))
172 skip = 1;
173 }
174 free(rp);
175 }
176
75350d87
KF
177 /* change sata type if under a vmd controller */
178 if (type == SYS_DEV_SATA) {
179 struct sys_dev *dev;
180 char *rp = realpath(path, NULL);
181 for (dev = vmd; dev; dev = dev->next) {
182 if ((strncmp(dev->path, rp, strlen(dev->path)) == 0))
183 type = SYS_DEV_SATA_VMD;
184 }
185 free(rp);
186 }
187
60f0f54d
PB
188 /* if it's not Intel device or mark as VMD connected - skip it. */
189 if (devpath_to_vendor(path) != 0x8086 || skip == 1)
a8e5382a
LM
190 continue;
191
9c747fa0
LM
192 if (devpath_to_ll(path, "device", &dev_id) != 0)
193 continue;
194
6b781d33
AP
195 if (devpath_to_ll(path, "class", &class) != 0)
196 continue;
197
30730cb8
AO
198 /*
199 * Each VMD device (domain) adds separate PCI bus, it is better
200 * to store path as a path to that bus (easier further
201 * determination which NVMe dev is connected to this particular
202 * VMD domain).
203 */
204 if (type == SYS_DEV_VMD) {
205 sprintf(path, "/sys/bus/%s/drivers/%s/%s/domain/device",
206 bus, driver, de->d_name);
207 }
208 p = realpath(path, NULL);
209 if (p == NULL) {
210 pr_err("Unable to get real path for '%s'\n", path);
211 continue;
212 }
213
b390f610
DW
214 /* start / add list entry */
215 if (!head) {
503975b9 216 head = xmalloc(sizeof(*head));
b390f610
DW
217 list = head;
218 } else {
503975b9 219 list->next = xmalloc(sizeof(*head));
b390f610
DW
220 list = list->next;
221 }
222
223 if (!list) {
224 free_sys_dev(&head);
225 break;
226 }
227
9c747fa0 228 list->dev_id = (__u16) dev_id;
6b781d33 229 list->class = (__u32) class;
a8e5382a 230 list->type = type;
b390f610 231 list->next = NULL;
30730cb8
AO
232 list->path = p;
233
a8e5382a
LM
234 if ((list->pci_id = strrchr(list->path, '/')) != NULL)
235 list->pci_id++;
b390f610 236 }
2a17c77b 237 closedir(driver_dir);
60f0f54d 238
75350d87
KF
239 /* nvme vmd needs a list separate from sata vmd */
240 if (vmd && type == SYS_DEV_NVME) {
60f0f54d
PB
241 if (list)
242 list->next = vmd;
243 else
244 head = vmd;
245 }
246
b390f610
DW
247 return head;
248}
249
9c747fa0 250static struct sys_dev *intel_devices=NULL;
9bc4ae77 251static time_t valid_time = 0;
9c747fa0 252
0858eccf
AP
253struct sys_dev *device_by_id(__u16 device_id)
254{
255 struct sys_dev *iter;
256
257 for (iter = intel_devices; iter != NULL; iter = iter->next)
258 if (iter->dev_id == device_id)
259 return iter;
260 return NULL;
261}
262
d3c11416
AO
263struct sys_dev *device_by_id_and_path(__u16 device_id, const char *path)
264{
265 struct sys_dev *iter;
266
267 for (iter = intel_devices; iter != NULL; iter = iter->next)
268 if ((iter->dev_id == device_id) && strstr(iter->path, path))
269 return iter;
270 return NULL;
271}
272
9c747fa0
LM
273static int devpath_to_ll(const char *dev_path, const char *entry, unsigned long long *val)
274{
dd5ab402 275 char path[strnlen(dev_path, PATH_MAX) + strnlen(entry, PATH_MAX) + 2];
9c747fa0
LM
276 int fd;
277 int n;
278
279 sprintf(path, "%s/%s", dev_path, entry);
280
281 fd = open(path, O_RDONLY);
282 if (fd < 0)
283 return -1;
284 n = sysfs_fd_get_ll(fd, val);
285 close(fd);
286 return n;
287}
288
60f0f54d 289__u16 devpath_to_vendor(const char *dev_path)
b390f610
DW
290{
291 char path[strlen(dev_path) + strlen("/vendor") + 1];
292 char vendor[7];
293 int fd;
294 __u16 id = 0xffff;
295 int n;
296
297 sprintf(path, "%s/vendor", dev_path);
298
299 fd = open(path, O_RDONLY);
300 if (fd < 0)
301 return 0xffff;
302
303 n = read(fd, vendor, sizeof(vendor));
304 if (n == sizeof(vendor)) {
305 vendor[n - 1] = '\0';
306 id = strtoul(vendor, NULL, 16);
307 }
308 close(fd);
309
310 return id;
311}
312
fcebeb77
MT
313/* Description: Read text value of dev_path/entry field
314 * Parameters:
315 * dev_path - sysfs path to the device
316 * entry - entry to be read
317 * buf - buffer for read value
318 * len - size of buf
319 * verbose - error logging level
320 */
321int devpath_to_char(const char *dev_path, const char *entry, char *buf, int len,
322 int verbose)
323{
324 char path[PATH_MAX];
325
326 snprintf(path, sizeof(path), "%s/%s", dev_path, entry);
327 if (load_sys(path, buf, len)) {
328 if (verbose)
329 pr_err("Cannot read %s, aborting\n", path);
330 return 1;
331 }
332
333 return 0;
334}
335
a8e5382a 336struct sys_dev *find_intel_devices(void)
b390f610 337{
614902f6 338 struct sys_dev *ahci, *isci, *nvme;
a8e5382a 339
9bc4ae77
N
340 if (valid_time > time(0) - 10)
341 return intel_devices;
342
343 if (intel_devices)
344 free_sys_dev(&intel_devices);
345
a8e5382a 346 isci = find_driver_devices("pci", "isci");
75350d87 347 /* Searching for AHCI will return list of SATA and SATA VMD controllers */
a8e5382a 348 ahci = find_driver_devices("pci", "ahci");
60f0f54d 349 /* Searching for NVMe will return list of NVMe and VMD controllers */
614902f6 350 nvme = find_driver_devices("pci", "nvme");
a8e5382a 351
614902f6
PB
352 if (!isci && !ahci) {
353 ahci = nvme;
354 } else if (!ahci) {
a8e5382a 355 ahci = isci;
614902f6
PB
356 struct sys_dev *elem = ahci;
357 while (elem->next)
358 elem = elem->next;
359 elem->next = nvme;
a8e5382a
LM
360 } else {
361 struct sys_dev *elem = ahci;
362 while (elem->next)
363 elem = elem->next;
364 elem->next = isci;
614902f6
PB
365 while (elem->next)
366 elem = elem->next;
367 elem->next = nvme;
a8e5382a 368 }
9bc4ae77
N
369 intel_devices = ahci;
370 valid_time = time(0);
371 return intel_devices;
b390f610
DW
372}
373
3c8bfb5d 374/*
fc13853f 375 * PCI Expansion ROM Data Structure Format */
3c8bfb5d
LM
376struct pciExpDataStructFormat {
377 __u8 ver[4];
378 __u16 vendorID;
379 __u16 deviceID;
6b781d33 380 __u16 devListOffset;
f666bcc6
PB
381 __u16 pciDataStructLen;
382 __u8 pciDataStructRev;
3c8bfb5d
LM
383} __attribute__ ((packed));
384
5e1d6128 385struct orom_entry *orom_entries;
0858eccf 386
72a45777 387const struct orom_entry *get_orom_entry_by_device_id(__u16 dev_id)
6b781d33 388{
5e1d6128
AP
389 struct orom_entry *entry;
390 struct devid_list *devid;
6b781d33 391
5e1d6128
AP
392 for (entry = orom_entries; entry; entry = entry->next) {
393 for (devid = entry->devid_list; devid; devid = devid->next) {
394 if (devid->devid == dev_id)
72a45777 395 return entry;
6b781d33
AP
396 }
397 }
5e1d6128 398
6b781d33
AP
399 return NULL;
400}
401
72a45777
PB
402const struct imsm_orom *get_orom_by_device_id(__u16 dev_id)
403{
404 const struct orom_entry *entry = get_orom_entry_by_device_id(dev_id);
405
406 if (entry)
407 return &entry->orom;
408
409 return NULL;
410}
411
5e1d6128 412static struct orom_entry *add_orom(const struct imsm_orom *orom)
6b781d33 413{
5e1d6128
AP
414 struct orom_entry *list;
415 struct orom_entry *prev = NULL;
416
417 for (list = orom_entries; list; prev = list, list = list->next)
418 ;
419
420 list = xmalloc(sizeof(struct orom_entry));
421 list->orom = *orom;
422 list->devid_list = NULL;
423 list->next = NULL;
424
425 if (prev == NULL)
426 orom_entries = list;
427 else
428 prev->next = list;
429
430 return list;
6b781d33
AP
431}
432
5e1d6128 433static void add_orom_device_id(struct orom_entry *entry, __u16 dev_id)
6b781d33 434{
6b781d33
AP
435 struct devid_list *list;
436 struct devid_list *prev = NULL;
437
5e1d6128
AP
438 for (list = entry->devid_list; list; prev = list, list = list->next) {
439 if (list->devid == dev_id)
6b781d33 440 return;
6b781d33 441 }
5e1d6128
AP
442 list = xmalloc(sizeof(struct devid_list));
443 list->devid = dev_id;
444 list->next = NULL;
445
446 if (prev == NULL)
447 entry->devid_list = list;
448 else
449 prev->next = list;
6b781d33 450}
fc13853f 451
3c8bfb5d 452static int scan(const void *start, const void *end, const void *data)
b390f610
DW
453{
454 int offset;
6b781d33 455 const struct imsm_orom *imsm_mem = NULL;
b390f610 456 int len = (end - start);
3c8bfb5d
LM
457 struct pciExpDataStructFormat *ptr= (struct pciExpDataStructFormat *)data;
458
9c747fa0
LM
459 if (data + 0x18 > end) {
460 dprintf("cannot find pciExpDataStruct \n");
461 return 0;
462 }
463
3c8bfb5d
LM
464 dprintf("ptr->vendorID: %lx __le16_to_cpu(ptr->deviceID): %lx \n",
465 (ulong) __le16_to_cpu(ptr->vendorID),
466 (ulong) __le16_to_cpu(ptr->deviceID));
467
6b781d33 468 if (__le16_to_cpu(ptr->vendorID) != 0x8086)
3c8bfb5d 469 return 0;
b390f610 470
5c4cc0c8
RS
471 if (get_orom_by_device_id(ptr->deviceID))
472 return 0;
473
b390f610 474 for (offset = 0; offset < len; offset += 4) {
6b781d33
AP
475 const void *mem = start + offset;
476
477 if ((memcmp(mem, IMSM_OROM_SIGNATURE, 4) == 0)) {
478 imsm_mem = mem;
479 break;
b390f610
DW
480 }
481 }
6b781d33
AP
482
483 if (!imsm_mem)
484 return 0;
485
5e1d6128 486 struct orom_entry *orom = add_orom(imsm_mem);
6b781d33 487
f666bcc6
PB
488 /* only PciDataStructure with revision 3 and above supports devices list. */
489 if (ptr->pciDataStructRev >= 3 && ptr->devListOffset) {
6b781d33
AP
490 const __u16 *dev_list = (void *)ptr + ptr->devListOffset;
491 int i;
492
493 for (i = 0; dev_list[i] != 0; i++)
494 add_orom_device_id(orom, dev_list[i]);
495 } else {
496 add_orom_device_id(orom, __le16_to_cpu(ptr->deviceID));
497 }
498
b390f610
DW
499 return 0;
500}
501
6b781d33 502const struct imsm_orom *imsm_platform_test(struct sys_dev *hba)
fc13853f 503{
6b781d33
AP
504 struct imsm_orom orom = {
505 .signature = IMSM_OROM_SIGNATURE,
506 .rlc = IMSM_OROM_RLC_RAID0 | IMSM_OROM_RLC_RAID1 |
507 IMSM_OROM_RLC_RAID10 | IMSM_OROM_RLC_RAID5,
508 .sss = IMSM_OROM_SSS_4kB | IMSM_OROM_SSS_8kB |
509 IMSM_OROM_SSS_16kB | IMSM_OROM_SSS_32kB |
510 IMSM_OROM_SSS_64kB | IMSM_OROM_SSS_128kB |
511 IMSM_OROM_SSS_256kB | IMSM_OROM_SSS_512kB |
512 IMSM_OROM_SSS_1MB | IMSM_OROM_SSS_2MB,
513 .dpa = IMSM_OROM_DISKS_PER_ARRAY,
514 .tds = IMSM_OROM_TOTAL_DISKS,
515 .vpa = IMSM_OROM_VOLUMES_PER_ARRAY,
516 .vphba = IMSM_OROM_VOLUMES_PER_HBA
517 };
518 orom.attr = orom.rlc | IMSM_OROM_ATTR_ChecksumVerify;
fc13853f
LM
519
520 if (check_env("IMSM_TEST_OROM_NORAID5")) {
6b781d33 521 orom.rlc = IMSM_OROM_RLC_RAID0 | IMSM_OROM_RLC_RAID1 |
fc13853f
LM
522 IMSM_OROM_RLC_RAID10;
523 }
6b781d33
AP
524 if (check_env("IMSM_TEST_AHCI_EFI_NORAID5") && (hba->type == SYS_DEV_SAS)) {
525 orom.rlc = IMSM_OROM_RLC_RAID0 | IMSM_OROM_RLC_RAID1 |
fc13853f
LM
526 IMSM_OROM_RLC_RAID10;
527 }
6b781d33
AP
528 if (check_env("IMSM_TEST_SCU_EFI_NORAID5") && (hba->type == SYS_DEV_SATA)) {
529 orom.rlc = IMSM_OROM_RLC_RAID0 | IMSM_OROM_RLC_RAID1 |
fc13853f 530 IMSM_OROM_RLC_RAID10;
cceebc67
DW
531 }
532
5e1d6128 533 struct orom_entry *ret = add_orom(&orom);
6b781d33
AP
534
535 add_orom_device_id(ret, hba->dev_id);
536
5e1d6128 537 return &ret->orom;
fc13853f
LM
538}
539
6b781d33 540static const struct imsm_orom *find_imsm_hba_orom(struct sys_dev *hba)
fc13853f
LM
541{
542 unsigned long align;
543
6b781d33
AP
544 if (check_env("IMSM_TEST_OROM"))
545 return imsm_platform_test(hba);
fc13853f 546
fc13853f 547 /* return empty OROM capabilities in EFI test mode */
6b781d33 548 if (check_env("IMSM_TEST_AHCI_EFI") || check_env("IMSM_TEST_SCU_EFI"))
fc13853f
LM
549 return NULL;
550
9bc4ae77 551 find_intel_devices();
9c747fa0
LM
552
553 if (intel_devices == NULL)
b390f610
DW
554 return NULL;
555
556 /* scan option-rom memory looking for an imsm signature */
969c2555
DW
557 if (check_env("IMSM_SAFE_OROM_SCAN"))
558 align = 2048;
559 else
560 align = 512;
561 if (probe_roms_init(align) != 0)
b390f610
DW
562 return NULL;
563 probe_roms();
9c747fa0 564 /* ignore return value - True is returned if both adapater roms are found */
fc13853f 565 scan_adapter_roms(scan);
b390f610
DW
566 probe_roms_exit();
567
6b781d33 568 return get_orom_by_device_id(hba->dev_id);
fc13853f
LM
569}
570
1a901471
LM
571#define EFI_GUID(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \
572((struct efi_guid) \
573{{ (a) & 0xff, ((a) >> 8) & 0xff, ((a) >> 16) & 0xff, ((a) >> 24) & 0xff, \
574 (b) & 0xff, ((b) >> 8) & 0xff, \
575 (c) & 0xff, ((c) >> 8) & 0xff, \
576 (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }})
fc13853f 577
5a6baf66 578#define SYS_EFI_VAR_PATH "/sys/firmware/efi/vars"
88605db9 579#define SYS_EFIVARS_PATH "/sys/firmware/efi/efivars"
5a6baf66
LM
580#define SCU_PROP "RstScuV"
581#define AHCI_PROP "RstSataV"
81188ef8 582#define AHCI_SSATA_PROP "RstsSatV"
f94df5cf 583#define AHCI_TSATA_PROP "RsttSatV"
ea7a02a3
OJ
584#define VROC_VMD_PROP "RstUefiV"
585#define RST_VMD_PROP "RstVmdV"
5a6baf66
LM
586
587#define VENDOR_GUID \
588 EFI_GUID(0x193dfefa, 0xa445, 0x4302, 0x99, 0xd8, 0xef, 0x3a, 0xad, 0x1a, 0x04, 0xc6)
589
6b781d33 590#define PCI_CLASS_RAID_CNTRL 0x010400
5a6baf66 591
f94df5cf
MT
592static int read_efi_var(void *buffer, ssize_t buf_size,
593 const char *variable_name, struct efi_guid guid)
88605db9
AP
594{
595 char path[PATH_MAX];
596 char buf[GUID_STR_MAX];
597 int fd;
598 ssize_t n;
599
600 snprintf(path, PATH_MAX, "%s/%s-%s", SYS_EFIVARS_PATH, variable_name, guid_str(buf, guid));
601
602 fd = open(path, O_RDONLY);
603 if (fd < 0)
604 return 1;
605
606 /* read the variable attributes and ignore it */
607 n = read(fd, buf, sizeof(__u32));
608 if (n < 0) {
609 close(fd);
610 return 1;
611 }
612
613 /* read the variable data */
614 n = read(fd, buffer, buf_size);
615 close(fd);
616 if (n < buf_size)
617 return 1;
618
619 return 0;
620}
621
f94df5cf
MT
622static int read_efi_variable(void *buffer, ssize_t buf_size,
623 const char *variable_name, struct efi_guid guid)
5a6baf66 624{
5a6baf66
LM
625 char path[PATH_MAX];
626 char buf[GUID_STR_MAX];
aae4c111
HCP
627 int dfd;
628 ssize_t n, var_data_len;
629
88605db9
AP
630 /* Try to read the variable using the new efivarfs interface first.
631 * If that fails, fall back to the old sysfs-efivars interface. */
632 if (!read_efi_var(buffer, buf_size, variable_name, guid))
633 return 0;
634
aae4c111
HCP
635 snprintf(path, PATH_MAX, "%s/%s-%s/size", SYS_EFI_VAR_PATH, variable_name, guid_str(buf, guid));
636
637 dprintf("EFI VAR: path=%s\n", path);
638 /* get size of variable data */
639 dfd = open(path, O_RDONLY);
640 if (dfd < 0)
641 return 1;
642
643 n = read(dfd, &buf, sizeof(buf));
644 close(dfd);
645 if (n < 0)
646 return 1;
647 buf[n] = '\0';
648
649 errno = 0;
650 var_data_len = strtoul(buf, NULL, 16);
d7be7d87
JS
651 if ((errno == ERANGE && (var_data_len == LONG_MAX)) ||
652 (errno != 0 && var_data_len == 0))
aae4c111
HCP
653 return 1;
654
655 /* get data */
656 snprintf(path, PATH_MAX, "%s/%s-%s/data", SYS_EFI_VAR_PATH, variable_name, guid_str(buf, guid));
657
658 dprintf("EFI VAR: path=%s\n", path);
659 dfd = open(path, O_RDONLY);
660 if (dfd < 0)
661 return 1;
662
663 n = read(dfd, buffer, buf_size);
664 close(dfd);
665 if (n != var_data_len || n < buf_size) {
666 return 1;
667 }
668
669 return 0;
670}
5a6baf66 671
6b781d33 672const struct imsm_orom *find_imsm_efi(struct sys_dev *hba)
aae4c111 673{
6b781d33 674 struct imsm_orom orom;
5e1d6128 675 struct orom_entry *ret;
f94df5cf
MT
676 static const char * const sata_efivars[] = {AHCI_PROP, AHCI_SSATA_PROP,
677 AHCI_TSATA_PROP};
ea7a02a3 678 static const char * const vmd_efivars[] = {VROC_VMD_PROP, RST_VMD_PROP};
f94df5cf 679 unsigned long i;
5a6baf66 680
6b781d33
AP
681 if (check_env("IMSM_TEST_AHCI_EFI") || check_env("IMSM_TEST_SCU_EFI"))
682 return imsm_platform_test(hba);
5a6baf66 683
5a6baf66 684 /* OROM test is set, return that there is no EFI capabilities */
aae4c111 685 if (check_env("IMSM_TEST_OROM"))
5a6baf66 686 return NULL;
5a6baf66 687
f94df5cf
MT
688 switch (hba->type) {
689 case SYS_DEV_SAS:
690 if (!read_efi_variable(&orom, sizeof(orom), SCU_PROP,
691 VENDOR_GUID))
692 break;
81188ef8 693
f94df5cf
MT
694 return NULL;
695 case SYS_DEV_SATA:
696 if (hba->class != PCI_CLASS_RAID_CNTRL)
697 return NULL;
81188ef8 698
f94df5cf
MT
699 for (i = 0; i < ARRAY_SIZE(sata_efivars); i++) {
700 if (!read_efi_variable(&orom, sizeof(orom),
701 sata_efivars[i], VENDOR_GUID))
702 break;
81188ef8 703
81188ef8 704 }
f94df5cf
MT
705 if (i == ARRAY_SIZE(sata_efivars))
706 return NULL;
81188ef8 707
f94df5cf
MT
708 break;
709 case SYS_DEV_VMD:
75350d87 710 case SYS_DEV_SATA_VMD:
ea7a02a3
OJ
711 for (i = 0; i < ARRAY_SIZE(vmd_efivars); i++) {
712 if (!read_efi_variable(&orom, sizeof(orom),
713 vmd_efivars[i], VENDOR_GUID))
714 break;
715 }
716
717 if (i == ARRAY_SIZE(vmd_efivars))
718 return NULL;
719
720 break;
f94df5cf
MT
721 default:
722 return NULL;
723 }
5a6baf66 724
6b781d33
AP
725 ret = add_orom(&orom);
726 add_orom_device_id(ret, hba->dev_id);
60f0f54d 727 ret->type = hba->type;
6b781d33 728
5e1d6128 729 return &ret->orom;
fc13853f
LM
730}
731
614902f6
PB
732const struct imsm_orom *find_imsm_nvme(struct sys_dev *hba)
733{
5e1d6128 734 static struct orom_entry *nvme_orom;
614902f6
PB
735
736 if (hba->type != SYS_DEV_NVME)
737 return NULL;
738
739 if (!nvme_orom) {
740 struct imsm_orom nvme_orom_compat = {
741 .signature = IMSM_NVME_OROM_COMPAT_SIGNATURE,
742 .rlc = IMSM_OROM_RLC_RAID0 | IMSM_OROM_RLC_RAID1 |
743 IMSM_OROM_RLC_RAID10 | IMSM_OROM_RLC_RAID5,
744 .sss = IMSM_OROM_SSS_4kB | IMSM_OROM_SSS_8kB |
745 IMSM_OROM_SSS_16kB | IMSM_OROM_SSS_32kB |
746 IMSM_OROM_SSS_64kB | IMSM_OROM_SSS_128kB,
747 .dpa = IMSM_OROM_DISKS_PER_ARRAY_NVME,
748 .tds = IMSM_OROM_TOTAL_DISKS_NVME,
749 .vpa = IMSM_OROM_VOLUMES_PER_ARRAY,
750 .vphba = IMSM_OROM_TOTAL_DISKS_NVME / 2 * IMSM_OROM_VOLUMES_PER_ARRAY,
751 .attr = IMSM_OROM_ATTR_2TB | IMSM_OROM_ATTR_2TB_DISK,
0858eccf 752 .driver_features = IMSM_OROM_CAPABILITIES_EnterpriseSystem
614902f6
PB
753 };
754 nvme_orom = add_orom(&nvme_orom_compat);
755 }
756 add_orom_device_id(nvme_orom, hba->dev_id);
60f0f54d 757 nvme_orom->type = SYS_DEV_NVME;
5e1d6128 758 return &nvme_orom->orom;
614902f6
PB
759}
760
8d1114be
MG
761#define VMD_REGISTER_OFFSET 0x3FC
762#define VMD_REGISTER_SKU_SHIFT 1
763#define VMD_REGISTER_SKU_MASK (0x00000007)
764#define VMD_REGISTER_SKU_PREMIUM 2
765#define MD_REGISTER_VER_MAJOR_SHIFT 4
766#define MD_REGISTER_VER_MAJOR_MASK (0x0000000F)
767#define MD_REGISTER_VER_MINOR_SHIFT 8
768#define MD_REGISTER_VER_MINOR_MASK (0x0000000F)
769
770/*
771 * read_vmd_register() - Reads VMD register and writes contents to buff ptr
772 * @buff: buffer for vmd register data, should be the size of uint32_t
773 *
774 * Return: 0 on success, 1 on error
775 */
776int read_vmd_register(uint32_t *buff, struct sys_dev *hba)
777{
778 int fd;
779 char vmd_pci_config_path[PATH_MAX];
780
781 if (!vmd_domain_to_controller(hba, vmd_pci_config_path))
782 return 1;
783
784 strncat(vmd_pci_config_path, "/config", PATH_MAX - strnlen(vmd_pci_config_path, PATH_MAX));
785
786 fd = open(vmd_pci_config_path, O_RDONLY);
787 if (fd < 0)
788 return 1;
789
790 if (pread(fd, buff, sizeof(uint32_t), VMD_REGISTER_OFFSET) != sizeof(uint32_t)) {
791 close(fd);
792 return 1;
793 }
794 close(fd);
795 return 0;
796}
797
798/*
799 * add_vmd_orom() - Adds VMD orom cap to orom list, writes orom_entry ptr into vmd_orom
800 * @vmd_orom: pointer to orom entry pointer
801 *
802 * Return: 0 on success, 1 on error
803 */
804int add_vmd_orom(struct orom_entry **vmd_orom, struct sys_dev *hba)
805{
806 uint8_t sku;
807 uint32_t vmd_register_data;
808 struct imsm_orom vmd_orom_cap = {
809 .signature = IMSM_VMD_OROM_COMPAT_SIGNATURE,
810 .sss = IMSM_OROM_SSS_4kB | IMSM_OROM_SSS_8kB |
811 IMSM_OROM_SSS_16kB | IMSM_OROM_SSS_32kB |
812 IMSM_OROM_SSS_64kB | IMSM_OROM_SSS_128kB,
813 .dpa = IMSM_OROM_DISKS_PER_ARRAY_NVME,
814 .tds = IMSM_OROM_TOTAL_DISKS_VMD,
815 .vpa = IMSM_OROM_VOLUMES_PER_ARRAY,
816 .vphba = IMSM_OROM_VOLUMES_PER_HBA_VMD,
817 .attr = IMSM_OROM_ATTR_2TB | IMSM_OROM_ATTR_2TB_DISK,
818 .driver_features = IMSM_OROM_CAPABILITIES_EnterpriseSystem |
819 IMSM_OROM_CAPABILITIES_TPV
820 };
821
822 if (read_vmd_register(&vmd_register_data, hba) != 0)
823 return 1;
824
825 sku = (uint8_t)((vmd_register_data >> VMD_REGISTER_SKU_SHIFT) &
826 VMD_REGISTER_SKU_MASK);
827
828 if (sku == VMD_REGISTER_SKU_PREMIUM)
829 vmd_orom_cap.rlc = IMSM_OROM_RLC_RAID0 | IMSM_OROM_RLC_RAID1 |
830 IMSM_OROM_RLC_RAID10 | IMSM_OROM_RLC_RAID5;
831 else
832 vmd_orom_cap.rlc = IMSM_OROM_RLC_RAID_CNG;
833
834 vmd_orom_cap.major_ver = (uint8_t)
835 ((vmd_register_data >> MD_REGISTER_VER_MAJOR_SHIFT) &
836 MD_REGISTER_VER_MAJOR_MASK);
837 vmd_orom_cap.minor_ver = (uint8_t)
838 ((vmd_register_data >> MD_REGISTER_VER_MINOR_SHIFT) &
839 MD_REGISTER_VER_MINOR_MASK);
840
841 *vmd_orom = add_orom(&vmd_orom_cap);
842
843 return 0;
844}
845
846const struct imsm_orom *find_imsm_vmd(struct sys_dev *hba)
847{
848 static struct orom_entry *vmd_orom;
849
850 if (hba->type != SYS_DEV_VMD)
851 return NULL;
852
853 if (!vmd_orom && add_vmd_orom(&vmd_orom, hba) != 0)
854 return NULL;
855
856 add_orom_device_id(vmd_orom, hba->dev_id);
857 vmd_orom->type = SYS_DEV_VMD;
858 return &vmd_orom->orom;
859}
860
6b781d33 861const struct imsm_orom *find_imsm_capability(struct sys_dev *hba)
fc13853f 862{
6b781d33
AP
863 const struct imsm_orom *cap = get_orom_by_device_id(hba->dev_id);
864
865 if (cap)
866 return cap;
fc13853f 867
614902f6
PB
868 if (hba->type == SYS_DEV_NVME)
869 return find_imsm_nvme(hba);
8d1114be
MG
870
871 cap = find_imsm_efi(hba);
872 if (cap)
5a6baf66 873 return cap;
8d1114be
MG
874
875 if (hba->type == SYS_DEV_VMD) {
876 cap = find_imsm_vmd(hba);
877 if (cap)
878 return cap;
879 }
880
881 cap = find_imsm_hba_orom(hba);
882 if (cap)
fc13853f 883 return cap;
614902f6 884
b390f610
DW
885 return NULL;
886}
25921536 887
d835518b
BK
888/* Check whether the nvme device is represented by nvme subsytem,
889 * if yes virtual path should be changed to hardware device path,
890 * to allow IMSM capabilities detection.
891 * Returns:
892 * hardware path to device - if the device is represented via
893 * nvme virtual subsytem
894 * NULL - if the device is not represented via nvme virtual subsytem
895 */
896char *get_nvme_multipath_dev_hw_path(const char *dev_path)
897{
898 DIR *dir;
899 struct dirent *ent;
900 char *rp = NULL;
901
902 if (strncmp(dev_path, NVME_SUBSYS_PATH, strlen(NVME_SUBSYS_PATH)) != 0)
903 return NULL;
904
905 dir = opendir(dev_path);
906 if (!dir)
907 return NULL;
908
909 for (ent = readdir(dir); ent; ent = readdir(dir)) {
910 char buf[strlen(dev_path) + strlen(ent->d_name) + 1];
911
912 /* Check if dir is a controller, ignore namespaces*/
913 if (!(strncmp(ent->d_name, "nvme", 4) == 0) ||
914 (strrchr(ent->d_name, 'n') != &ent->d_name[0]))
915 continue;
916
917 sprintf(buf, "%s/%s", dev_path, ent->d_name);
918 rp = realpath(buf, NULL);
919 break;
920 }
921
922 closedir(dir);
923 return rp;
924}
925
7c798f87
MT
926/* Description: Return part or whole realpath for the dev
927 * Parameters:
928 * dev - the device to be quered
929 * dev_level - level of "/device" entries. It allows to caller to access
930 * virtual or physical devices which are on "path" to quered
931 * one.
932 * buf - optional, must be PATH_MAX size. If set, then will be used.
933 */
934char *devt_to_devpath(dev_t dev, int dev_level, char *buf)
25921536 935{
7c798f87
MT
936 char device[PATH_MAX];
937 char *hw_path;
938 int i;
939 unsigned long device_free_len = sizeof(device) - 1;
940 char dev_str[] = "/device";
941 unsigned long dev_str_len = strlen(dev_str);
942
943 snprintf(device, sizeof(device), "/sys/dev/block/%d:%d", major(dev),
944 minor(dev));
945
946 /* If caller wants block device, return path to it even if it is exposed
947 * via virtual layer.
948 */
949 if (dev_level == 0)
950 return realpath(device, buf);
25921536 951
7c798f87
MT
952 device_free_len -= strlen(device);
953 for (i = 0; i < dev_level; i++) {
954 if (device_free_len < dev_str_len)
955 return NULL;
d835518b 956
7c798f87 957 strncat(device, dev_str, device_free_len);
d835518b 958
7c798f87
MT
959 /* Resolve nvme-subsystem abstraction if needed
960 */
961 device_free_len -= dev_str_len;
962 if (i == 0) {
963 char rp[PATH_MAX];
964
965 if (!realpath(device, rp))
966 return NULL;
967 hw_path = get_nvme_multipath_dev_hw_path(rp);
968 if (hw_path) {
969 strcpy(device, hw_path);
970 device_free_len = sizeof(device) -
971 strlen(device) - 1;
972 free(hw_path);
973 }
974 }
d835518b
BK
975 }
976
7c798f87 977 return realpath(device, buf);
25921536
DW
978}
979
7c798f87 980char *diskfd_to_devpath(int fd, int dev_level, char *buf)
25921536
DW
981{
982 /* return the device path for a disk, return NULL on error or fd
983 * refers to a partition
984 */
985 struct stat st;
986
987 if (fstat(fd, &st) != 0)
988 return NULL;
989 if (!S_ISBLK(st.st_mode))
990 return NULL;
991
7c798f87 992 return devt_to_devpath(st.st_rdev, dev_level, buf);
25921536
DW
993}
994
995int path_attached_to_hba(const char *disk_path, const char *hba_path)
996{
997 int rc;
998
fc13853f
LM
999 if (check_env("IMSM_TEST_AHCI_DEV") ||
1000 check_env("IMSM_TEST_SCU_DEV")) {
1001 return 1;
1002 }
1003
25921536
DW
1004 if (!disk_path || !hba_path)
1005 return 0;
a8e5382a 1006 dprintf("hba: %s - disk: %s\n", hba_path, disk_path);
25921536
DW
1007 if (strncmp(disk_path, hba_path, strlen(hba_path)) == 0)
1008 rc = 1;
1009 else
1010 rc = 0;
1011
1012 return rc;
1013}
1014
1015int devt_attached_to_hba(dev_t dev, const char *hba_path)
1016{
7c798f87 1017 char *disk_path = devt_to_devpath(dev, 1, NULL);
25921536
DW
1018 int rc = path_attached_to_hba(disk_path, hba_path);
1019
1020 if (disk_path)
1021 free(disk_path);
1022
1023 return rc;
1024}
1025
1026int disk_attached_to_hba(int fd, const char *hba_path)
1027{
7c798f87 1028 char *disk_path = diskfd_to_devpath(fd, 1, NULL);
25921536
DW
1029 int rc = path_attached_to_hba(disk_path, hba_path);
1030
1031 if (disk_path)
1032 free(disk_path);
1033
1034 return rc;
1035}
60f0f54d
PB
1036
1037char *vmd_domain_to_controller(struct sys_dev *hba, char *buf)
1038{
1039 struct dirent *ent;
1040 DIR *dir;
1041 char path[PATH_MAX];
1042
1043 if (!hba)
1044 return NULL;
1045
1046 if (hba->type != SYS_DEV_VMD)
1047 return NULL;
1048
1049 dir = opendir("/sys/bus/pci/drivers/vmd");
b9135011
JS
1050 if (!dir)
1051 return NULL;
60f0f54d 1052
b9135011 1053 for (ent = readdir(dir); ent; ent = readdir(dir)) {
60f0f54d
PB
1054 sprintf(path, "/sys/bus/pci/drivers/vmd/%s/domain/device",
1055 ent->d_name);
1056
1057 if (!realpath(path, buf))
1058 continue;
1059
1060 if (strncmp(buf, hba->path, strlen(buf)) == 0) {
1061 sprintf(path, "/sys/bus/pci/drivers/vmd/%s", ent->d_name);
b9135011 1062 closedir(dir);
7071320a 1063 return realpath(path, buf);
60f0f54d
PB
1064 }
1065 }
b9135011
JS
1066
1067 closedir(dir);
60f0f54d
PB
1068 return NULL;
1069}
8662f92d
MT
1070
1071/* Scan over all controller's namespaces and compare nsid value to verify if
1072 * current one is supported. The routine doesn't check IMSM capabilities for
1073 * namespace. Only one nvme namespace is supported by IMSM.
1074 * Paramteres:
1075 * fd - open descriptor to the nvme namespace
1076 * verbose - error logging level
a8f3cfd5 1077 * Returns:
8662f92d
MT
1078 * 1 - if namespace is supported
1079 * 0 - otherwise
a8f3cfd5 1080 */
8662f92d 1081int imsm_is_nvme_namespace_supported(int fd, int verbose)
a8f3cfd5 1082{
8662f92d
MT
1083 DIR *dir = NULL;
1084 struct dirent *ent;
1085 char cntrl_path[PATH_MAX];
1086 char ns_path[PATH_MAX];
1087 unsigned long long lowest_nsid = ULLONG_MAX;
1088 unsigned long long this_nsid;
1089 int rv = 0;
a8f3cfd5 1090
a8f3cfd5 1091
8662f92d
MT
1092 if (!diskfd_to_devpath(fd, 1, cntrl_path) ||
1093 !diskfd_to_devpath(fd, 0, ns_path)) {
1094 if (verbose)
1095 pr_err("Cannot get device paths\n");
1096 goto abort;
1097 }
a8f3cfd5 1098
a8f3cfd5 1099
8662f92d 1100 if (devpath_to_ll(ns_path, "nsid", &this_nsid)) {
a8f3cfd5 1101 if (verbose)
8662f92d
MT
1102 pr_err("Cannot read nsid value for %s",
1103 basename(ns_path));
1104 goto abort;
a8f3cfd5 1105 }
8662f92d
MT
1106
1107 dir = opendir(cntrl_path);
1108 if (!dir)
1109 goto abort;
1110
1111 /* The lowest nvme namespace is supported */
1112 for (ent = readdir(dir); ent; ent = readdir(dir)) {
1113 unsigned long long curr_nsid;
1114 char curr_ns_path[PATH_MAX + 256];
1115
1116 if (!strstr(ent->d_name, "nvme"))
1117 continue;
1118
1119 snprintf(curr_ns_path, sizeof(curr_ns_path), "%s/%s",
1120 cntrl_path, ent->d_name);
1121
1122 if (devpath_to_ll(curr_ns_path, "nsid", &curr_nsid))
1123 goto abort;
1124
1125 if (lowest_nsid > curr_nsid)
1126 lowest_nsid = curr_nsid;
1127 }
1128
1129 if (this_nsid == lowest_nsid)
1130 rv = 1;
1131 else if (verbose)
1132 pr_err("IMSM is supported on the lowest NVMe namespace\n");
1133
1134abort:
1135 if (dir)
1136 closedir(dir);
1137
1138 return rv;
a8f3cfd5 1139}
d835518b
BK
1140
1141/* Verify if multipath is supported by NVMe controller
1142 * Returns:
1143 * 0 - not supported
1144 * 1 - supported
1145 */
1146int is_multipath_nvme(int disk_fd)
1147{
d835518b 1148 char ns_path[PATH_MAX];
d835518b 1149
7c798f87 1150 if (!diskfd_to_devpath(disk_fd, 0, ns_path))
d835518b
BK
1151 return 0;
1152
1153 if (strncmp(ns_path, NVME_SUBSYS_PATH, strlen(NVME_SUBSYS_PATH)) == 0)
1154 return 1;
1155
1156 return 0;
1157}