]>
git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blob - scrub/disk.c
1 // SPDX-License-Identifier: GPL-2.0+
3 * Copyright (C) 2018 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
11 #include <sys/types.h>
12 #include <sys/statvfs.h>
16 #ifdef HAVE_HDIO_GETGEO
17 # include <linux/hdreg.h>
19 #include "platform_defs.h"
20 #include "libfrog/util.h"
21 #include "libfrog/paths.h"
22 #include "xfs_scrub.h"
25 #include "platform_defs.h"
28 # define BLKROTATIONAL _IO(0x12, 126)
34 * These routines help us to discover the geometry of a block device,
35 * estimate the amount of concurrent IOs that we can send to it, and
36 * abstract the process of performing read verification of disk blocks.
39 /* Figure out how many disk heads are available. */
46 int nproc
= platform_nproc();
50 /* If it's not a block device, throw all the CPUs at it. */
51 if (!S_ISBLK(disk
->d_sb
.st_mode
))
54 /* Non-rotational device? Throw all the CPUs at the problem. */
56 error
= ioctl(disk
->d_fd
, BLKROTATIONAL
, &rot
);
57 if (error
== 0 && rot
== 0)
61 * Sometimes we can infer the number of devices from the
62 * min/optimal IO sizes.
65 if (ioctl(disk
->d_fd
, BLKIOMIN
, &iomin
) == 0 &&
66 ioctl(disk
->d_fd
, BLKIOOPT
, &ioopt
) == 0 &&
67 iomin
> 0 && ioopt
> 0) {
68 return min(nproc
, max(1, ioopt
/ iomin
));
71 /* Rotating device? I guess? */
75 /* Figure out how many disk heads are available. */
81 return force_nr_threads
;
82 return __disk_heads(disk
);
86 * Execute a SCSI VERIFY(16) to verify disk contents.
87 * For devices that support this command, this can sharply reduce the
88 * runtime of the data block verification phase if the storage device's
89 * internal bandwidth exceeds its link bandwidth. However, it only
90 * works if we're talking to a raw SCSI device, and only if we trust the
94 # define SENSE_BUF_LEN 64
95 # define VERIFY16_CMDLEN 16
96 # define VERIFY16_CMD 0x8F
98 # ifndef SG_FLAG_Q_AT_TAIL
99 # define SG_FLAG_Q_AT_TAIL 0x10
104 uint64_t startblock
, /* lba */
105 uint64_t blockcount
) /* lba */
107 struct sg_io_hdr iohdr
;
108 unsigned char cdb
[VERIFY16_CMDLEN
];
109 unsigned char sense
[SENSE_BUF_LEN
];
111 uint64_t veri_len
= blockcount
;
114 assert(!debug_tweak_on("XFS_SCRUB_NO_SCSI_VERIFY"));
116 llba
= startblock
+ (disk
->d_start
>> BBSHIFT
);
118 /* Borrowed from sg_verify */
119 cdb
[0] = VERIFY16_CMD
;
120 cdb
[1] = 0; /* skip PI, DPO, and byte check. */
121 cdb
[2] = (llba
>> 56) & 0xff;
122 cdb
[3] = (llba
>> 48) & 0xff;
123 cdb
[4] = (llba
>> 40) & 0xff;
124 cdb
[5] = (llba
>> 32) & 0xff;
125 cdb
[6] = (llba
>> 24) & 0xff;
126 cdb
[7] = (llba
>> 16) & 0xff;
127 cdb
[8] = (llba
>> 8) & 0xff;
128 cdb
[9] = llba
& 0xff;
129 cdb
[10] = (veri_len
>> 24) & 0xff;
130 cdb
[11] = (veri_len
>> 16) & 0xff;
131 cdb
[12] = (veri_len
>> 8) & 0xff;
132 cdb
[13] = veri_len
& 0xff;
135 memset(sense
, 0, SENSE_BUF_LEN
);
138 memset(&iohdr
, 0, sizeof(iohdr
));
139 iohdr
.interface_id
= 'S';
140 iohdr
.dxfer_direction
= SG_DXFER_NONE
;
142 iohdr
.cmd_len
= VERIFY16_CMDLEN
;
144 iohdr
.mx_sb_len
= SENSE_BUF_LEN
;
145 iohdr
.flags
|= SG_FLAG_Q_AT_TAIL
;
146 iohdr
.timeout
= 30000; /* 30s */
148 error
= ioctl(disk
->d_fd
, SG_IO
, &iohdr
);
152 dbg_printf("VERIFY(16) fd %d lba %"PRIu64
" len %"PRIu64
" info %x "
153 "status %d masked %d msg %d host %d driver %d "
154 "duration %d resid %d\n",
155 disk
->d_fd
, startblock
, blockcount
, iohdr
.info
,
156 iohdr
.status
, iohdr
.masked_status
, iohdr
.msg_status
,
157 iohdr
.host_status
, iohdr
.driver_status
, iohdr
.duration
,
160 if (iohdr
.info
& SG_INFO_CHECK
) {
161 dbg_printf("status: msg %x host %x driver %x\n",
162 iohdr
.msg_status
, iohdr
.host_status
,
163 iohdr
.driver_status
);
168 return blockcount
<< BBSHIFT
;
171 # define disk_scsi_verify(...) (ENOTTY)
172 #endif /* HAVE_SG_IO */
174 /* Test the availability of the kernel scrub ioctl. */
176 disk_can_scsi_verify(
181 if (debug_tweak_on("XFS_SCRUB_NO_SCSI_VERIFY"))
184 error
= disk_scsi_verify(disk
, 0, 1);
188 /* Open a disk device and discover its geometry. */
191 const char *pathname
)
193 #ifdef HAVE_HDIO_GETGEO
194 struct hd_geometry bdgeo
;
197 bool suspicious_disk
= false;
200 disk
= calloc(1, sizeof(struct disk
));
204 disk
->d_fd
= open(pathname
, O_RDONLY
| O_DIRECT
| O_NOATIME
);
208 /* Try to get LBA size. */
209 error
= ioctl(disk
->d_fd
, BLKSSZGET
, &disk
->d_lbasize
);
211 disk
->d_lbasize
= 512;
212 disk
->d_lbalog
= log2_roundup(disk
->d_lbasize
);
214 /* Obtain disk's stat info. */
215 error
= fstat(disk
->d_fd
, &disk
->d_sb
);
219 /* Determine bdev size, block size, and offset. */
220 if (S_ISBLK(disk
->d_sb
.st_mode
)) {
221 error
= ioctl(disk
->d_fd
, BLKGETSIZE64
, &disk
->d_size
);
224 error
= ioctl(disk
->d_fd
, BLKBSZGET
, &disk
->d_blksize
);
227 #ifdef HAVE_HDIO_GETGEO
228 error
= ioctl(disk
->d_fd
, HDIO_GETGEO
, &bdgeo
);
231 * dm devices will pass through ioctls, which means
232 * we can't use SCSI VERIFY unless the start is 0.
233 * Most dm devices don't set geometry (unlike scsi
234 * and nvme) so use a zeroed out CHS to screen them
237 if (bdgeo
.start
!= 0 &&
238 (unsigned long long)bdgeo
.heads
* bdgeo
.sectors
*
240 suspicious_disk
= true;
241 disk
->d_start
= bdgeo
.start
<< BBSHIFT
;
246 disk
->d_size
= disk
->d_sb
.st_size
;
247 disk
->d_blksize
= disk
->d_sb
.st_blksize
;
251 /* Can we issue SCSI VERIFY? */
252 if (!suspicious_disk
&& disk_can_scsi_verify(disk
))
253 disk
->d_flags
|= DISK_FLAG_SCSI_VERIFY
;
263 /* Close a disk device. */
271 error
= close(disk
->d_fd
);
277 #define BTOLBAT(d, bytes) ((uint64_t)(bytes) >> (d)->d_lbalog)
278 #define LBASIZE(d) (1ULL << (d)->d_lbalog)
279 #define BTOLBA(d, bytes) (((uint64_t)(bytes) + LBASIZE(d) - 1) >> (d)->d_lbalog)
281 /* Simulate disk errors. */
283 disk_simulate_read_error(
288 static int64_t interval
;
289 uint64_t start_interval
;
291 /* Simulated disk errors are disabled. */
295 /* Figure out the disk read error interval. */
299 /* Pretend there's bad media every so often, in bytes. */
300 p
= getenv("XFS_SCRUB_DISK_ERROR_INTERVAL");
305 interval
= strtoull(p
, NULL
, 10);
306 interval
&= ~((1U << disk
->d_lbalog
) - 1);
314 * We simulate disk errors by pretending that there are media errors at
315 * predetermined intervals across the disk. If a read verify request
316 * crosses one of those intervals we shorten it so that the next read
317 * will start on an interval threshold. If the read verify request
318 * starts on an interval threshold, we send back EIO as if it had
321 if ((start
% interval
) == 0) {
322 dbg_printf("fd %d: simulating disk error at %"PRIu64
".\n",
327 start_interval
= start
/ interval
;
328 if (start_interval
!= (start
+ *length
) / interval
) {
329 *length
= ((start_interval
+ 1) * interval
) - start
;
331 "fd %d: simulating short read at %"PRIu64
" to length %"PRIu64
".\n",
332 disk
->d_fd
, start
, *length
);
338 /* Read-verify an extent of a disk device. */
349 ret
= disk_simulate_read_error(disk
, start
, &length
);
355 /* Don't actually issue the IO */
356 if (getenv("XFS_SCRUB_DISK_VERIFY_SKIP"))
360 /* Convert to logical block size. */
361 if (disk
->d_flags
& DISK_FLAG_SCSI_VERIFY
)
362 return disk_scsi_verify(disk
, BTOLBAT(disk
, start
),
363 BTOLBA(disk
, length
));
365 return pread(disk
->d_fd
, buf
, length
, start
);