]>
Commit | Line | Data |
---|---|---|
a555a1f4 DW |
1 | /* |
2 | * Copyright (C) 2018 Oracle. All Rights Reserved. | |
3 | * | |
4 | * Author: Darrick J. Wong <darrick.wong@oracle.com> | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU General Public License | |
8 | * as published by the Free Software Foundation; either version 2 | |
9 | * of the License, or (at your option) any later version. | |
10 | * | |
11 | * This program is distributed in the hope that it would be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | * GNU General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * along with this program; if not, write the Free Software Foundation, | |
18 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. | |
19 | */ | |
20 | #include <stdio.h> | |
21 | #include <stdint.h> | |
22 | #include <stdlib.h> | |
23 | #include <stdbool.h> | |
24 | #include <unistd.h> | |
25 | #include <fcntl.h> | |
26 | #include <sys/types.h> | |
27 | #include <sys/stat.h> | |
28 | #include <sys/ioctl.h> | |
29 | #include <sys/statvfs.h> | |
30 | #include <sys/vfs.h> | |
31 | #include <linux/fs.h> | |
03c0cd8f DW |
32 | #ifdef HAVE_SG_IO |
33 | # include <scsi/sg.h> | |
34 | #endif | |
35 | #ifdef HAVE_HDIO_GETGEO | |
36 | # include <linux/hdreg.h> | |
37 | #endif | |
a555a1f4 DW |
38 | #include "platform_defs.h" |
39 | #include "libfrog.h" | |
50a573a7 DW |
40 | #include "xfs.h" |
41 | #include "path.h" | |
42 | #include "xfs_fs.h" | |
a555a1f4 | 43 | #include "xfs_scrub.h" |
03c0cd8f | 44 | #include "common.h" |
a555a1f4 DW |
45 | #include "disk.h" |
46 | ||
47 | #ifndef BLKROTATIONAL | |
48 | # define BLKROTATIONAL _IO(0x12, 126) | |
49 | #endif | |
50 | ||
51 | /* | |
52 | * Disk Abstraction | |
53 | * | |
54 | * These routines help us to discover the geometry of a block device, | |
55 | * estimate the amount of concurrent IOs that we can send to it, and | |
56 | * abstract the process of performing read verification of disk blocks. | |
57 | */ | |
58 | ||
59 | /* Figure out how many disk heads are available. */ | |
60 | static unsigned int | |
61 | __disk_heads( | |
62 | struct disk *disk) | |
63 | { | |
64 | int iomin; | |
65 | int ioopt; | |
66 | unsigned short rot; | |
67 | int error; | |
68 | ||
69 | /* If it's not a block device, throw all the CPUs at it. */ | |
70 | if (!S_ISBLK(disk->d_sb.st_mode)) | |
71 | return nproc; | |
72 | ||
73 | /* Non-rotational device? Throw all the CPUs at the problem. */ | |
74 | rot = 1; | |
75 | error = ioctl(disk->d_fd, BLKROTATIONAL, &rot); | |
76 | if (error == 0 && rot == 0) | |
77 | return nproc; | |
78 | ||
79 | /* | |
80 | * Sometimes we can infer the number of devices from the | |
81 | * min/optimal IO sizes. | |
82 | */ | |
83 | iomin = ioopt = 0; | |
84 | if (ioctl(disk->d_fd, BLKIOMIN, &iomin) == 0 && | |
85 | ioctl(disk->d_fd, BLKIOOPT, &ioopt) == 0 && | |
86 | iomin > 0 && ioopt > 0) { | |
87 | return min(nproc, max(1, ioopt / iomin)); | |
88 | } | |
89 | ||
90 | /* Rotating device? I guess? */ | |
91 | return 2; | |
92 | } | |
93 | ||
94 | /* Figure out how many disk heads are available. */ | |
95 | unsigned int | |
96 | disk_heads( | |
97 | struct disk *disk) | |
98 | { | |
99 | if (nr_threads) | |
100 | return nr_threads; | |
101 | return __disk_heads(disk); | |
102 | } | |
103 | ||
03c0cd8f DW |
104 | /* |
105 | * Execute a SCSI VERIFY(16) to verify disk contents. | |
106 | * For devices that support this command, this can sharply reduce the | |
107 | * runtime of the data block verification phase if the storage device's | |
108 | * internal bandwidth exceeds its link bandwidth. However, it only | |
109 | * works if we're talking to a raw SCSI device, and only if we trust the | |
110 | * firmware. | |
111 | */ | |
112 | #ifdef HAVE_SG_IO | |
113 | # define SENSE_BUF_LEN 64 | |
114 | # define VERIFY16_CMDLEN 16 | |
115 | # define VERIFY16_CMD 0x8F | |
116 | ||
117 | # ifndef SG_FLAG_Q_AT_TAIL | |
118 | # define SG_FLAG_Q_AT_TAIL 0x10 | |
119 | # endif | |
120 | static int | |
121 | disk_scsi_verify( | |
122 | struct disk *disk, | |
123 | uint64_t startblock, /* lba */ | |
124 | uint64_t blockcount) /* lba */ | |
125 | { | |
126 | struct sg_io_hdr iohdr; | |
127 | unsigned char cdb[VERIFY16_CMDLEN]; | |
128 | unsigned char sense[SENSE_BUF_LEN]; | |
129 | uint64_t llba; | |
130 | uint64_t veri_len = blockcount; | |
131 | int error; | |
132 | ||
133 | assert(!debug_tweak_on("XFS_SCRUB_NO_SCSI_VERIFY")); | |
134 | ||
135 | llba = startblock + (disk->d_start >> BBSHIFT); | |
136 | ||
137 | /* Borrowed from sg_verify */ | |
138 | cdb[0] = VERIFY16_CMD; | |
139 | cdb[1] = 0; /* skip PI, DPO, and byte check. */ | |
140 | cdb[2] = (llba >> 56) & 0xff; | |
141 | cdb[3] = (llba >> 48) & 0xff; | |
142 | cdb[4] = (llba >> 40) & 0xff; | |
143 | cdb[5] = (llba >> 32) & 0xff; | |
144 | cdb[6] = (llba >> 24) & 0xff; | |
145 | cdb[7] = (llba >> 16) & 0xff; | |
146 | cdb[8] = (llba >> 8) & 0xff; | |
147 | cdb[9] = llba & 0xff; | |
148 | cdb[10] = (veri_len >> 24) & 0xff; | |
149 | cdb[11] = (veri_len >> 16) & 0xff; | |
150 | cdb[12] = (veri_len >> 8) & 0xff; | |
151 | cdb[13] = veri_len & 0xff; | |
152 | cdb[14] = 0; | |
153 | cdb[15] = 0; | |
154 | memset(sense, 0, SENSE_BUF_LEN); | |
155 | ||
156 | /* v3 SG_IO */ | |
157 | memset(&iohdr, 0, sizeof(iohdr)); | |
158 | iohdr.interface_id = 'S'; | |
159 | iohdr.dxfer_direction = SG_DXFER_NONE; | |
160 | iohdr.cmdp = cdb; | |
161 | iohdr.cmd_len = VERIFY16_CMDLEN; | |
162 | iohdr.sbp = sense; | |
163 | iohdr.mx_sb_len = SENSE_BUF_LEN; | |
164 | iohdr.flags |= SG_FLAG_Q_AT_TAIL; | |
165 | iohdr.timeout = 30000; /* 30s */ | |
166 | ||
167 | error = ioctl(disk->d_fd, SG_IO, &iohdr); | |
168 | if (error) | |
169 | return error; | |
170 | ||
171 | dbg_printf("VERIFY(16) fd %d lba %"PRIu64" len %"PRIu64" info %x " | |
172 | "status %d masked %d msg %d host %d driver %d " | |
173 | "duration %d resid %d\n", | |
174 | disk->d_fd, startblock, blockcount, iohdr.info, | |
175 | iohdr.status, iohdr.masked_status, iohdr.msg_status, | |
176 | iohdr.host_status, iohdr.driver_status, iohdr.duration, | |
177 | iohdr.resid); | |
178 | ||
179 | if (iohdr.info & SG_INFO_CHECK) { | |
180 | dbg_printf("status: msg %x host %x driver %x\n", | |
181 | iohdr.msg_status, iohdr.host_status, | |
182 | iohdr.driver_status); | |
183 | errno = EIO; | |
184 | return -1; | |
185 | } | |
186 | ||
187 | return error; | |
188 | } | |
189 | #else | |
190 | # define disk_scsi_verify(...) (ENOTTY) | |
191 | #endif /* HAVE_SG_IO */ | |
192 | ||
193 | /* Test the availability of the kernel scrub ioctl. */ | |
194 | static bool | |
195 | disk_can_scsi_verify( | |
196 | struct disk *disk) | |
197 | { | |
198 | int error; | |
199 | ||
200 | if (debug_tweak_on("XFS_SCRUB_NO_SCSI_VERIFY")) | |
201 | return false; | |
202 | ||
203 | error = disk_scsi_verify(disk, 0, 1); | |
204 | return error == 0; | |
205 | } | |
206 | ||
a555a1f4 DW |
207 | /* Open a disk device and discover its geometry. */ |
208 | struct disk * | |
209 | disk_open( | |
210 | const char *pathname) | |
211 | { | |
03c0cd8f DW |
212 | #ifdef HAVE_HDIO_GETGEO |
213 | struct hd_geometry bdgeo; | |
214 | #endif | |
a555a1f4 | 215 | struct disk *disk; |
03c0cd8f | 216 | bool suspicious_disk = false; |
a555a1f4 DW |
217 | int lba_sz; |
218 | int error; | |
219 | ||
220 | disk = calloc(1, sizeof(struct disk)); | |
221 | if (!disk) | |
222 | return NULL; | |
223 | ||
224 | disk->d_fd = open(pathname, O_RDONLY | O_DIRECT | O_NOATIME); | |
225 | if (disk->d_fd < 0) | |
226 | goto out_free; | |
227 | ||
228 | /* Try to get LBA size. */ | |
229 | error = ioctl(disk->d_fd, BLKSSZGET, &lba_sz); | |
230 | if (error) | |
231 | lba_sz = 512; | |
232 | disk->d_lbalog = log2_roundup(lba_sz); | |
233 | ||
234 | /* Obtain disk's stat info. */ | |
235 | error = fstat(disk->d_fd, &disk->d_sb); | |
236 | if (error) | |
237 | goto out_close; | |
238 | ||
239 | /* Determine bdev size, block size, and offset. */ | |
240 | if (S_ISBLK(disk->d_sb.st_mode)) { | |
241 | error = ioctl(disk->d_fd, BLKGETSIZE64, &disk->d_size); | |
242 | if (error) | |
243 | disk->d_size = 0; | |
244 | error = ioctl(disk->d_fd, BLKBSZGET, &disk->d_blksize); | |
245 | if (error) | |
246 | disk->d_blksize = 0; | |
03c0cd8f DW |
247 | #ifdef HAVE_HDIO_GETGEO |
248 | error = ioctl(disk->d_fd, HDIO_GETGEO, &bdgeo); | |
249 | if (!error) { | |
250 | /* | |
251 | * dm devices will pass through ioctls, which means | |
252 | * we can't use SCSI VERIFY unless the start is 0. | |
253 | * Most dm devices don't set geometry (unlike scsi | |
254 | * and nvme) so use a zeroed out CHS to screen them | |
255 | * out. | |
256 | */ | |
257 | if (bdgeo.start != 0 && | |
258 | (unsigned long long)bdgeo.heads * bdgeo.sectors * | |
259 | bdgeo.sectors == 0) | |
260 | suspicious_disk = true; | |
261 | disk->d_start = bdgeo.start << BBSHIFT; | |
262 | } else | |
263 | #endif | |
264 | disk->d_start = 0; | |
a555a1f4 DW |
265 | } else { |
266 | disk->d_size = disk->d_sb.st_size; | |
267 | disk->d_blksize = disk->d_sb.st_blksize; | |
268 | disk->d_start = 0; | |
269 | } | |
270 | ||
03c0cd8f DW |
271 | /* Can we issue SCSI VERIFY? */ |
272 | if (!suspicious_disk && disk_can_scsi_verify(disk)) | |
273 | disk->d_flags |= DISK_FLAG_SCSI_VERIFY; | |
274 | ||
a555a1f4 DW |
275 | return disk; |
276 | out_close: | |
277 | close(disk->d_fd); | |
278 | out_free: | |
279 | free(disk); | |
280 | return NULL; | |
281 | } | |
282 | ||
283 | /* Close a disk device. */ | |
284 | int | |
285 | disk_close( | |
286 | struct disk *disk) | |
287 | { | |
288 | int error = 0; | |
289 | ||
290 | if (disk->d_fd >= 0) | |
291 | error = close(disk->d_fd); | |
292 | disk->d_fd = -1; | |
293 | free(disk); | |
294 | return error; | |
295 | } | |
296 | ||
03c0cd8f DW |
297 | #define BTOLBAT(d, bytes) ((uint64_t)(bytes) >> (d)->d_lbalog) |
298 | #define LBASIZE(d) (1ULL << (d)->d_lbalog) | |
299 | #define BTOLBA(d, bytes) (((uint64_t)(bytes) + LBASIZE(d) - 1) >> (d)->d_lbalog) | |
300 | ||
a555a1f4 DW |
301 | /* Read-verify an extent of a disk device. */ |
302 | ssize_t | |
303 | disk_read_verify( | |
304 | struct disk *disk, | |
305 | void *buf, | |
306 | uint64_t start, | |
307 | uint64_t length) | |
308 | { | |
03c0cd8f DW |
309 | /* Convert to logical block size. */ |
310 | if (disk->d_flags & DISK_FLAG_SCSI_VERIFY) | |
311 | return disk_scsi_verify(disk, BTOLBAT(disk, start), | |
312 | BTOLBA(disk, length)); | |
313 | ||
a555a1f4 DW |
314 | return pread(disk->d_fd, buf, length, start); |
315 | } |