]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blame - scrub/disk.c
xfs_scrub: fix weirdness in directory name check code
[thirdparty/xfsprogs-dev.git] / scrub / disk.c
CommitLineData
959ef981 1// SPDX-License-Identifier: GPL-2.0+
a555a1f4
DW
2/*
3 * Copyright (C) 2018 Oracle. All Rights Reserved.
a555a1f4 4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
a555a1f4 5 */
a440f877 6#include "xfs.h"
a555a1f4
DW
7#include <stdint.h>
8#include <stdlib.h>
a555a1f4
DW
9#include <unistd.h>
10#include <fcntl.h>
11#include <sys/types.h>
a555a1f4 12#include <sys/statvfs.h>
03c0cd8f
DW
13#ifdef HAVE_SG_IO
14# include <scsi/sg.h>
15#endif
16#ifdef HAVE_HDIO_GETGEO
17# include <linux/hdreg.h>
18#endif
a555a1f4 19#include "platform_defs.h"
660b5d96 20#include "libfrog/util.h"
42b4c8e8 21#include "libfrog/paths.h"
a555a1f4 22#include "xfs_scrub.h"
03c0cd8f 23#include "common.h"
a555a1f4 24#include "disk.h"
ae14fe63 25#include "platform_defs.h"
a555a1f4
DW
26
27#ifndef BLKROTATIONAL
28# define BLKROTATIONAL _IO(0x12, 126)
29#endif
30
31/*
32 * Disk Abstraction
33 *
34 * These routines help us to discover the geometry of a block device,
35 * estimate the amount of concurrent IOs that we can send to it, and
36 * abstract the process of performing read verification of disk blocks.
37 */
38
39/* Figure out how many disk heads are available. */
40static unsigned int
41__disk_heads(
42 struct disk *disk)
43{
44 int iomin;
45 int ioopt;
ae14fe63 46 int nproc = platform_nproc();
a555a1f4
DW
47 unsigned short rot;
48 int error;
49
50 /* If it's not a block device, throw all the CPUs at it. */
51 if (!S_ISBLK(disk->d_sb.st_mode))
52 return nproc;
53
54 /* Non-rotational device? Throw all the CPUs at the problem. */
55 rot = 1;
56 error = ioctl(disk->d_fd, BLKROTATIONAL, &rot);
57 if (error == 0 && rot == 0)
58 return nproc;
59
60 /*
61 * Sometimes we can infer the number of devices from the
62 * min/optimal IO sizes.
63 */
64 iomin = ioopt = 0;
65 if (ioctl(disk->d_fd, BLKIOMIN, &iomin) == 0 &&
66 ioctl(disk->d_fd, BLKIOOPT, &ioopt) == 0 &&
67 iomin > 0 && ioopt > 0) {
68 return min(nproc, max(1, ioopt / iomin));
69 }
70
71 /* Rotating device? I guess? */
72 return 2;
73}
74
75/* Figure out how many disk heads are available. */
76unsigned int
77disk_heads(
78 struct disk *disk)
79{
32c6cc09
DW
80 if (force_nr_threads)
81 return force_nr_threads;
a555a1f4
DW
82 return __disk_heads(disk);
83}
84
03c0cd8f
DW
85/*
86 * Execute a SCSI VERIFY(16) to verify disk contents.
87 * For devices that support this command, this can sharply reduce the
88 * runtime of the data block verification phase if the storage device's
89 * internal bandwidth exceeds its link bandwidth. However, it only
90 * works if we're talking to a raw SCSI device, and only if we trust the
91 * firmware.
92 */
93#ifdef HAVE_SG_IO
94# define SENSE_BUF_LEN 64
95# define VERIFY16_CMDLEN 16
96# define VERIFY16_CMD 0x8F
97
98# ifndef SG_FLAG_Q_AT_TAIL
99# define SG_FLAG_Q_AT_TAIL 0x10
100# endif
101static int
102disk_scsi_verify(
103 struct disk *disk,
104 uint64_t startblock, /* lba */
105 uint64_t blockcount) /* lba */
106{
107 struct sg_io_hdr iohdr;
108 unsigned char cdb[VERIFY16_CMDLEN];
109 unsigned char sense[SENSE_BUF_LEN];
110 uint64_t llba;
111 uint64_t veri_len = blockcount;
112 int error;
113
114 assert(!debug_tweak_on("XFS_SCRUB_NO_SCSI_VERIFY"));
115
116 llba = startblock + (disk->d_start >> BBSHIFT);
117
118 /* Borrowed from sg_verify */
119 cdb[0] = VERIFY16_CMD;
120 cdb[1] = 0; /* skip PI, DPO, and byte check. */
121 cdb[2] = (llba >> 56) & 0xff;
122 cdb[3] = (llba >> 48) & 0xff;
123 cdb[4] = (llba >> 40) & 0xff;
124 cdb[5] = (llba >> 32) & 0xff;
125 cdb[6] = (llba >> 24) & 0xff;
126 cdb[7] = (llba >> 16) & 0xff;
127 cdb[8] = (llba >> 8) & 0xff;
128 cdb[9] = llba & 0xff;
129 cdb[10] = (veri_len >> 24) & 0xff;
130 cdb[11] = (veri_len >> 16) & 0xff;
131 cdb[12] = (veri_len >> 8) & 0xff;
132 cdb[13] = veri_len & 0xff;
133 cdb[14] = 0;
134 cdb[15] = 0;
135 memset(sense, 0, SENSE_BUF_LEN);
136
137 /* v3 SG_IO */
138 memset(&iohdr, 0, sizeof(iohdr));
139 iohdr.interface_id = 'S';
140 iohdr.dxfer_direction = SG_DXFER_NONE;
141 iohdr.cmdp = cdb;
142 iohdr.cmd_len = VERIFY16_CMDLEN;
143 iohdr.sbp = sense;
144 iohdr.mx_sb_len = SENSE_BUF_LEN;
145 iohdr.flags |= SG_FLAG_Q_AT_TAIL;
146 iohdr.timeout = 30000; /* 30s */
147
148 error = ioctl(disk->d_fd, SG_IO, &iohdr);
323ef14c 149 if (error < 0)
03c0cd8f
DW
150 return error;
151
152 dbg_printf("VERIFY(16) fd %d lba %"PRIu64" len %"PRIu64" info %x "
153 "status %d masked %d msg %d host %d driver %d "
154 "duration %d resid %d\n",
155 disk->d_fd, startblock, blockcount, iohdr.info,
156 iohdr.status, iohdr.masked_status, iohdr.msg_status,
157 iohdr.host_status, iohdr.driver_status, iohdr.duration,
158 iohdr.resid);
159
160 if (iohdr.info & SG_INFO_CHECK) {
161 dbg_printf("status: msg %x host %x driver %x\n",
162 iohdr.msg_status, iohdr.host_status,
163 iohdr.driver_status);
164 errno = EIO;
165 return -1;
166 }
167
323ef14c 168 return blockcount << BBSHIFT;
03c0cd8f
DW
169}
170#else
171# define disk_scsi_verify(...) (ENOTTY)
172#endif /* HAVE_SG_IO */
173
174/* Test the availability of the kernel scrub ioctl. */
175static bool
176disk_can_scsi_verify(
177 struct disk *disk)
178{
179 int error;
180
181 if (debug_tweak_on("XFS_SCRUB_NO_SCSI_VERIFY"))
182 return false;
183
184 error = disk_scsi_verify(disk, 0, 1);
185 return error == 0;
186}
187
a555a1f4
DW
188/* Open a disk device and discover its geometry. */
189struct disk *
190disk_open(
191 const char *pathname)
192{
03c0cd8f
DW
193#ifdef HAVE_HDIO_GETGEO
194 struct hd_geometry bdgeo;
195#endif
a555a1f4 196 struct disk *disk;
03c0cd8f 197 bool suspicious_disk = false;
a555a1f4
DW
198 int error;
199
200 disk = calloc(1, sizeof(struct disk));
201 if (!disk)
202 return NULL;
203
204 disk->d_fd = open(pathname, O_RDONLY | O_DIRECT | O_NOATIME);
205 if (disk->d_fd < 0)
206 goto out_free;
207
208 /* Try to get LBA size. */
20e10ad4 209 error = ioctl(disk->d_fd, BLKSSZGET, &disk->d_lbasize);
a555a1f4 210 if (error)
20e10ad4
DW
211 disk->d_lbasize = 512;
212 disk->d_lbalog = log2_roundup(disk->d_lbasize);
a555a1f4
DW
213
214 /* Obtain disk's stat info. */
215 error = fstat(disk->d_fd, &disk->d_sb);
216 if (error)
217 goto out_close;
218
219 /* Determine bdev size, block size, and offset. */
220 if (S_ISBLK(disk->d_sb.st_mode)) {
221 error = ioctl(disk->d_fd, BLKGETSIZE64, &disk->d_size);
222 if (error)
223 disk->d_size = 0;
224 error = ioctl(disk->d_fd, BLKBSZGET, &disk->d_blksize);
225 if (error)
226 disk->d_blksize = 0;
03c0cd8f
DW
227#ifdef HAVE_HDIO_GETGEO
228 error = ioctl(disk->d_fd, HDIO_GETGEO, &bdgeo);
229 if (!error) {
230 /*
231 * dm devices will pass through ioctls, which means
232 * we can't use SCSI VERIFY unless the start is 0.
233 * Most dm devices don't set geometry (unlike scsi
234 * and nvme) so use a zeroed out CHS to screen them
235 * out.
236 */
237 if (bdgeo.start != 0 &&
238 (unsigned long long)bdgeo.heads * bdgeo.sectors *
239 bdgeo.sectors == 0)
240 suspicious_disk = true;
241 disk->d_start = bdgeo.start << BBSHIFT;
242 } else
243#endif
244 disk->d_start = 0;
a555a1f4
DW
245 } else {
246 disk->d_size = disk->d_sb.st_size;
247 disk->d_blksize = disk->d_sb.st_blksize;
248 disk->d_start = 0;
249 }
250
03c0cd8f
DW
251 /* Can we issue SCSI VERIFY? */
252 if (!suspicious_disk && disk_can_scsi_verify(disk))
253 disk->d_flags |= DISK_FLAG_SCSI_VERIFY;
254
a555a1f4
DW
255 return disk;
256out_close:
257 close(disk->d_fd);
258out_free:
259 free(disk);
260 return NULL;
261}
262
263/* Close a disk device. */
264int
265disk_close(
266 struct disk *disk)
267{
268 int error = 0;
269
270 if (disk->d_fd >= 0)
271 error = close(disk->d_fd);
272 disk->d_fd = -1;
273 free(disk);
274 return error;
275}
276
03c0cd8f
DW
277#define BTOLBAT(d, bytes) ((uint64_t)(bytes) >> (d)->d_lbalog)
278#define LBASIZE(d) (1ULL << (d)->d_lbalog)
279#define BTOLBA(d, bytes) (((uint64_t)(bytes) + LBASIZE(d) - 1) >> (d)->d_lbalog)
280
cac2b8b0
DW
281/* Simulate disk errors. */
282static int
283disk_simulate_read_error(
284 struct disk *disk,
285 uint64_t start,
286 uint64_t *length)
287{
288 static int64_t interval;
289 uint64_t start_interval;
290
291 /* Simulated disk errors are disabled. */
292 if (interval < 0)
293 return 0;
294
295 /* Figure out the disk read error interval. */
296 if (interval == 0) {
297 char *p;
298
299 /* Pretend there's bad media every so often, in bytes. */
300 p = getenv("XFS_SCRUB_DISK_ERROR_INTERVAL");
301 if (p == NULL) {
302 interval = -1;
303 return 0;
304 }
305 interval = strtoull(p, NULL, 10);
306 interval &= ~((1U << disk->d_lbalog) - 1);
307 }
b8302b7f
DW
308 if (interval <= 0) {
309 interval = -1;
310 return 0;
311 }
cac2b8b0
DW
312
313 /*
314 * We simulate disk errors by pretending that there are media errors at
315 * predetermined intervals across the disk. If a read verify request
316 * crosses one of those intervals we shorten it so that the next read
317 * will start on an interval threshold. If the read verify request
318 * starts on an interval threshold, we send back EIO as if it had
319 * failed.
320 */
321 if ((start % interval) == 0) {
322 dbg_printf("fd %d: simulating disk error at %"PRIu64".\n",
323 disk->d_fd, start);
324 return EIO;
325 }
326
327 start_interval = start / interval;
328 if (start_interval != (start + *length) / interval) {
329 *length = ((start_interval + 1) * interval) - start;
330 dbg_printf(
331"fd %d: simulating short read at %"PRIu64" to length %"PRIu64".\n",
332 disk->d_fd, start, *length);
333 }
334
335 return 0;
336}
337
a555a1f4
DW
338/* Read-verify an extent of a disk device. */
339ssize_t
340disk_read_verify(
341 struct disk *disk,
342 void *buf,
343 uint64_t start,
344 uint64_t length)
345{
cac2b8b0
DW
346 if (debug) {
347 int ret;
348
349 ret = disk_simulate_read_error(disk, start, &length);
350 if (ret) {
351 errno = ret;
352 return -1;
353 }
354
355 /* Don't actually issue the IO */
356 if (getenv("XFS_SCRUB_DISK_VERIFY_SKIP"))
357 return length;
358 }
359
03c0cd8f
DW
360 /* Convert to logical block size. */
361 if (disk->d_flags & DISK_FLAG_SCSI_VERIFY)
362 return disk_scsi_verify(disk, BTOLBAT(disk, start),
363 BTOLBA(disk, length));
364
a555a1f4
DW
365 return pread(disk->d_fd, buf, length, start);
366}