]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blob - libxfs/topology.c
7764687beac000379d0183e075514584a3981f6c
[thirdparty/xfsprogs-dev.git] / libxfs / topology.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
4 * All Rights Reserved.
5 */
6
7 #ifdef OVERRIDE_SYSTEM_STATX
8 #define statx sys_statx
9 #endif
10 #include <fcntl.h>
11 #include <sys/stat.h>
12
13 #include "libxfs_priv.h"
14 #include "libxcmd.h"
15 #include <blkid/blkid.h>
16 #include "xfs_multidisk.h"
17 #include "libfrog/platform.h"
18 #include "libfrog/statx.h"
19
20 #define TERABYTES(count, blog) ((uint64_t)(count) << (40 - (blog)))
21 #define GIGABYTES(count, blog) ((uint64_t)(count) << (30 - (blog)))
22 #define MEGABYTES(count, blog) ((uint64_t)(count) << (20 - (blog)))
23
24 void
25 calc_default_ag_geometry(
26 int blocklog,
27 uint64_t dblocks,
28 int multidisk,
29 uint64_t *agsize,
30 uint64_t *agcount)
31 {
32 uint64_t blocks = 0;
33 int shift = 0;
34
35 /*
36 * First handle the high extreme - the point at which we will
37 * always use the maximum AG size.
38 *
39 * This applies regardless of storage configuration.
40 */
41 if (dblocks >= TERABYTES(32, blocklog)) {
42 blocks = XFS_AG_MAX_BLOCKS(blocklog);
43 goto done;
44 }
45
46 /*
47 * For a single underlying storage device over 4TB in size
48 * use the maximum AG size. Between 128MB and 4TB, just use
49 * 4 AGs and scale up smoothly between min/max AG sizes.
50 */
51 if (!multidisk) {
52 if (dblocks >= TERABYTES(4, blocklog)) {
53 blocks = XFS_AG_MAX_BLOCKS(blocklog);
54 goto done;
55 } else if (dblocks >= MEGABYTES(128, blocklog)) {
56 shift = XFS_NOMULTIDISK_AGLOG;
57 goto calc_blocks;
58 }
59 }
60
61 /*
62 * For the multidisk configs we choose an AG count based on the number
63 * of data blocks available, trying to keep the number of AGs higher
64 * than the single disk configurations. This makes the assumption that
65 * larger filesystems have more parallelism available to them.
66 */
67 shift = XFS_MULTIDISK_AGLOG;
68 if (dblocks <= GIGABYTES(512, blocklog))
69 shift--;
70 if (dblocks <= GIGABYTES(8, blocklog))
71 shift--;
72 if (dblocks < MEGABYTES(128, blocklog))
73 shift--;
74 if (dblocks < MEGABYTES(64, blocklog))
75 shift--;
76 if (dblocks < MEGABYTES(32, blocklog))
77 shift--;
78
79 /*
80 * If dblocks is not evenly divisible by the number of
81 * desired AGs, round "blocks" up so we don't lose the
82 * last bit of the filesystem. The same principle applies
83 * to the AG count, so we don't lose the last AG!
84 */
85 calc_blocks:
86 ASSERT(shift >= 0 && shift <= XFS_MULTIDISK_AGLOG);
87 blocks = dblocks >> shift;
88 if (dblocks & xfs_mask32lo(shift)) {
89 if (blocks < XFS_AG_MAX_BLOCKS(blocklog))
90 blocks++;
91 }
92 done:
93 *agsize = blocks;
94 *agcount = dblocks / blocks + (dblocks % blocks != 0);
95 }
96
97 void
98 calc_default_rtgroup_geometry(
99 int blocklog,
100 uint64_t rblocks,
101 uint64_t *rgsize,
102 uint64_t *rgcount)
103 {
104 uint64_t blocks = 0;
105 int shift = 0;
106
107 /*
108 * For a single underlying storage device over 4TB in size use the
109 * maximum rtgroup size. Between 128MB and 4TB, just use 4 rtgroups
110 * and scale up smoothly between min/max rtgroup sizes.
111 */
112 if (rblocks >= TERABYTES(4, blocklog)) {
113 blocks = XFS_MAX_RGBLOCKS;
114 goto done;
115 }
116 if (rblocks >= MEGABYTES(128, blocklog)) {
117 shift = XFS_NOMULTIDISK_AGLOG;
118 goto calc_blocks;
119 }
120
121 /*
122 * If rblocks is not evenly divisible by the number of desired rt
123 * groups, round "blocks" up so we don't lose the last bit of the
124 * filesystem. The same principle applies to the rt group count, so we
125 * don't lose the last rt group!
126 */
127 calc_blocks:
128 ASSERT(shift >= 0 && shift <= XFS_MULTIDISK_AGLOG);
129 blocks = rblocks >> shift;
130 if (rblocks & xfs_mask32lo(shift)) {
131 if (blocks < XFS_MAX_RGBLOCKS)
132 blocks++;
133 }
134 done:
135 *rgsize = blocks;
136 *rgcount = rblocks / blocks + (rblocks % blocks != 0);
137 }
138
139 /*
140 * Check for existing filesystem or partition table on device.
141 * Returns:
142 * 1 for existing fs or partition
143 * 0 for nothing found
144 * -1 for internal error
145 */
146 int
147 check_overwrite(
148 const char *device)
149 {
150 const char *type;
151 blkid_probe pr = NULL;
152 int ret;
153 int fd;
154 long long size;
155 int bsz;
156
157 if (!device || !*device)
158 return 0;
159
160 ret = -1; /* will reset on success of all setup calls */
161
162 fd = open(device, O_RDONLY);
163 if (fd < 0)
164 goto out;
165 platform_findsizes((char *)device, fd, &size, &bsz);
166 close(fd);
167
168 /* nothing to overwrite on a 0-length device */
169 if (size == 0) {
170 ret = 0;
171 goto out;
172 }
173
174 pr = blkid_new_probe_from_filename(device);
175 if (!pr)
176 goto out;
177
178 ret = blkid_probe_enable_partitions(pr, 1);
179 if (ret < 0)
180 goto out;
181
182 ret = blkid_do_fullprobe(pr);
183 if (ret < 0)
184 goto out;
185
186 /*
187 * Blkid returns 1 for nothing found and 0 when it finds a signature,
188 * but we want the exact opposite, so reverse the return value here.
189 *
190 * In addition print some useful diagnostics about what actually is
191 * on the device.
192 */
193 if (ret) {
194 ret = 0;
195 goto out;
196 }
197
198 if (!blkid_probe_lookup_value(pr, "TYPE", &type, NULL)) {
199 fprintf(stderr,
200 _("%s: %s appears to contain an existing "
201 "filesystem (%s).\n"), progname, device, type);
202 } else if (!blkid_probe_lookup_value(pr, "PTTYPE", &type, NULL)) {
203 fprintf(stderr,
204 _("%s: %s appears to contain a partition "
205 "table (%s).\n"), progname, device, type);
206 } else {
207 fprintf(stderr,
208 _("%s: %s appears to contain something weird "
209 "according to blkid\n"), progname, device);
210 }
211 ret = 1;
212 out:
213 if (pr)
214 blkid_free_probe(pr);
215 /* libblkid 2.38.1 lies and can return -EIO */
216 if (ret < 0)
217 fprintf(stderr,
218 _("%s: probe of %s failed, cannot detect "
219 "existing filesystem.\n"), progname, device);
220 return ret;
221 }
222
223 static void
224 blkid_get_topology(
225 const char *device,
226 struct device_topology *dt,
227 int force_overwrite)
228 {
229 blkid_topology tp;
230 blkid_probe pr;
231
232 pr = blkid_new_probe_from_filename(device);
233 if (!pr)
234 return;
235
236 tp = blkid_probe_get_topology(pr);
237 if (!tp)
238 goto out_free_probe;
239
240 dt->logical_sector_size = blkid_topology_get_logical_sector_size(tp);
241 dt->physical_sector_size = blkid_topology_get_physical_sector_size(tp);
242 dt->sunit = blkid_topology_get_minimum_io_size(tp);
243 dt->swidth = blkid_topology_get_optimal_io_size(tp);
244
245 /*
246 * If the reported values are the same as the physical sector size
247 * do not bother to report anything. It will only cause warnings
248 * if people specify larger stripe units or widths manually.
249 */
250 if (dt->sunit == dt->physical_sector_size ||
251 dt->swidth == dt->physical_sector_size) {
252 dt->sunit = 0;
253 dt->swidth = 0;
254 }
255
256 /*
257 * Blkid reports the information in terms of bytes, but we want it in
258 * terms of 512 bytes blocks (only to convert it to bytes later..)
259 */
260 dt->sunit >>= 9;
261 dt->swidth >>= 9;
262
263 if (blkid_topology_get_alignment_offset(tp) != 0) {
264 fprintf(stderr,
265 _("warning: device is not properly aligned %s\n"),
266 device);
267
268 if (!force_overwrite) {
269 fprintf(stderr,
270 _("Use -f to force usage of a misaligned device\n"));
271
272 exit(EXIT_FAILURE);
273 }
274 /* Do not use physical sector size if the device is misaligned */
275 dt->physical_sector_size = dt->logical_sector_size;
276 }
277
278 blkid_free_probe(pr);
279 return;
280
281 out_free_probe:
282 blkid_free_probe(pr);
283 fprintf(stderr,
284 _("warning: unable to probe device topology for device %s\n"),
285 device);
286 }
287
288 static void
289 get_hw_atomic_writes_topology(
290 struct libxfs_dev *dev,
291 struct device_topology *dt)
292 {
293 struct statx sx;
294 int fd;
295 int ret;
296
297 fd = open(dev->name, O_RDONLY);
298 if (fd < 0)
299 return;
300
301 ret = statx(fd, "", AT_EMPTY_PATH, STATX_WRITE_ATOMIC, &sx);
302 if (ret)
303 goto out_close;
304
305 if (!(sx.stx_mask & STATX_WRITE_ATOMIC))
306 goto out_close;
307
308 dt->awu_min = sx.stx_atomic_write_unit_min >> 9;
309 dt->awu_max = max(sx.stx_atomic_write_unit_max_opt,
310 sx.stx_atomic_write_unit_max) >> 9;
311
312 out_close:
313 close(fd);
314 }
315
316 static void
317 get_device_topology(
318 struct libxfs_dev *dev,
319 struct device_topology *dt,
320 int force_overwrite)
321 {
322 struct stat st;
323
324 /*
325 * Nothing to do if this particular subvolume doesn't exist.
326 */
327 if (!dev->name)
328 return;
329
330 /*
331 * If our target is a regular file, use platform_findsizes
332 * to try to obtain the underlying filesystem's requirements
333 * for direct IO; we'll set our sector size to that if possible.
334 */
335 if (dev->isfile || (!stat(dev->name, &st) && S_ISREG(st.st_mode))) {
336 int flags = O_RDONLY;
337 long long dummy;
338 int fd;
339
340 /* with xi->disfile we may not have the file yet! */
341 if (dev->isfile)
342 flags |= O_CREAT;
343
344 fd = open(dev->name, flags, 0666);
345 if (fd >= 0) {
346 platform_findsizes(dev->name, fd, &dummy,
347 &dt->logical_sector_size);
348 close(fd);
349 } else {
350 dt->logical_sector_size = BBSIZE;
351 }
352 } else {
353 blkid_get_topology(dev->name, dt, force_overwrite);
354 get_hw_atomic_writes_topology(dev, dt);
355 }
356
357 ASSERT(dt->logical_sector_size);
358
359 /*
360 * Older kernels may not have physical/logical distinction.
361 */
362 if (!dt->physical_sector_size)
363 dt->physical_sector_size = dt->logical_sector_size;
364 }
365
366 void
367 get_topology(
368 struct libxfs_init *xi,
369 struct fs_topology *ft,
370 int force_overwrite)
371 {
372 get_device_topology(&xi->data, &ft->data, force_overwrite);
373 get_device_topology(&xi->rt, &ft->rt, force_overwrite);
374 get_device_topology(&xi->log, &ft->log, force_overwrite);
375 }