]>
Commit | Line | Data |
---|---|---|
959ef981 | 1 | // SPDX-License-Identifier: GPL-2.0 |
2bd0ea18 | 2 | /* |
f1b058f9 | 3 | * Copyright (c) 2000-2006 Silicon Graphics, Inc. |
da23017d | 4 | * All Rights Reserved. |
2bd0ea18 NS |
5 | */ |
6 | ||
b626fb59 | 7 | |
9c799827 | 8 | #include "libxfs_priv.h" |
1aef52f8 | 9 | #include "init.h" |
b626fb59 DC |
10 | #include "xfs_fs.h" |
11 | #include "xfs_shared.h" | |
12 | #include "xfs_format.h" | |
13 | #include "xfs_log_format.h" | |
14 | #include "xfs_trans_resv.h" | |
15 | #include "xfs_mount.h" | |
16 | #include "xfs_inode_buf.h" | |
17 | #include "xfs_inode_fork.h" | |
18 | #include "xfs_inode.h" | |
19 | #include "xfs_trans.h" | |
c335b673 | 20 | #include "libfrog/platform.h" |
b626fb59 | 21 | |
ac7ad9aa | 22 | #include "libxfs.h" |
2bd0ea18 | 23 | |
1a12e432 DW |
24 | static void libxfs_brelse(struct cache_node *node); |
25 | ||
6af7c1ea DC |
26 | /* |
27 | * Important design/architecture note: | |
28 | * | |
29 | * The userspace code that uses the buffer cache is much less constrained than | |
30 | * the kernel code. The userspace code is pretty nasty in places, especially | |
31 | * when it comes to buffer error handling. Very little of the userspace code | |
32 | * outside libxfs clears bp->b_error - very little code even checks it - so the | |
33 | * libxfs code is tripping on stale errors left by the userspace code. | |
34 | * | |
8b4de37c | 35 | * We can't clear errors or zero buffer contents in libxfs_buf_get-* like we do |
6af7c1ea DC |
36 | * in the kernel, because those functions are used by the libxfs_readbuf_* |
37 | * functions and hence need to leave the buffers unchanged on cache hits. This | |
38 | * is actually the only way to gather a write error from a libxfs_writebuf() | |
39 | * call - you need to get the buffer again so you can check bp->b_error field - | |
40 | * assuming that the buffer is still in the cache when you check, that is. | |
41 | * | |
42 | * This is very different to the kernel code which does not release buffers on a | |
43 | * write so we can wait on IO and check errors. The kernel buffer cache also | |
44 | * guarantees a buffer of a known initial state from xfs_buf_get() even on a | |
45 | * cache hit. | |
46 | * | |
47 | * IOWs, userspace is behaving quite differently to the kernel and as a result | |
48 | * it leaks errors from reads, invalidations and writes through | |
361379e0 | 49 | * libxfs_buf_get/libxfs_buf_read. |
6af7c1ea DC |
50 | * |
51 | * The result of this is that until the userspace code outside libxfs is cleaned | |
52 | * up, functions that release buffers from userspace control (i.e | |
e02ba985 | 53 | * libxfs_writebuf/libxfs_buf_relse) need to zero bp->b_error to prevent |
6af7c1ea DC |
54 | * propagation of stale errors into future buffer operations. |
55 | */ | |
56 | ||
5000d01d | 57 | #define BDSTRAT_SIZE (256 * 1024) |
2bd0ea18 | 58 | |
2556c98b BN |
59 | #define IO_BCOMPARE_CHECK |
60 | ||
9542ae13 DC |
61 | /* XXX: (dgc) Propagate errors, only exit if fail-on-error flag set */ |
62 | int | |
75c8b434 | 63 | libxfs_device_zero(struct xfs_buftarg *btp, xfs_daddr_t start, uint len) |
2bd0ea18 | 64 | { |
3cc4d0db NS |
65 | xfs_off_t start_offset, end_offset, offset; |
66 | ssize_t zsize, bytes; | |
9d6023a8 | 67 | size_t len_bytes; |
2bd0ea18 | 68 | char *z; |
9d6023a8 ES |
69 | int error, fd; |
70 | ||
71 | fd = libxfs_device_to_fd(btp->dev); | |
72 | start_offset = LIBXFS_BBTOOFF64(start); | |
73 | ||
74 | /* try to use special zeroing methods, fall back to writes if needed */ | |
75 | len_bytes = LIBXFS_BBTOOFF64(len); | |
76 | error = platform_zero_range(fd, start_offset, len_bytes); | |
77 | if (!error) | |
78 | return 0; | |
2bd0ea18 | 79 | |
3cc4d0db | 80 | zsize = min(BDSTRAT_SIZE, BBTOB(len)); |
b74a1f6a | 81 | if ((z = memalign(libxfs_device_alignment(), zsize)) == NULL) { |
9440d84d NS |
82 | fprintf(stderr, |
83 | _("%s: %s can't memalign %d bytes: %s\n"), | |
7dfd8291 | 84 | progname, __FUNCTION__, (int)zsize, strerror(errno)); |
2bd0ea18 NS |
85 | exit(1); |
86 | } | |
3cc4d0db NS |
87 | memset(z, 0, zsize); |
88 | ||
dc8878f4 | 89 | if ((lseek(fd, start_offset, SEEK_SET)) < 0) { |
3cc4d0db | 90 | fprintf(stderr, _("%s: %s seek to offset %llu failed: %s\n"), |
7dfd8291 NS |
91 | progname, __FUNCTION__, |
92 | (unsigned long long)start_offset, strerror(errno)); | |
3cc4d0db NS |
93 | exit(1); |
94 | } | |
95 | ||
cb5b3ef4 | 96 | end_offset = LIBXFS_BBTOOFF64(start + len) - start_offset; |
3cc4d0db NS |
97 | for (offset = 0; offset < end_offset; ) { |
98 | bytes = min((ssize_t)(end_offset - offset), zsize); | |
99 | if ((bytes = write(fd, z, bytes)) < 0) { | |
100 | fprintf(stderr, _("%s: %s write failed: %s\n"), | |
9440d84d | 101 | progname, __FUNCTION__, strerror(errno)); |
2bd0ea18 | 102 | exit(1); |
3cc4d0db NS |
103 | } else if (bytes == 0) { |
104 | fprintf(stderr, _("%s: %s not progressing?\n"), | |
105 | progname, __FUNCTION__); | |
106 | exit(1); | |
2bd0ea18 | 107 | } |
3cc4d0db | 108 | offset += bytes; |
2bd0ea18 NS |
109 | } |
110 | free(z); | |
9542ae13 | 111 | return 0; |
2bd0ea18 NS |
112 | } |
113 | ||
989b74bc | 114 | static void unmount_record(void *p) |
2bd0ea18 | 115 | { |
989b74bc | 116 | xlog_op_header_t *op = (xlog_op_header_t *)p; |
5000d01d SL |
117 | /* the data section must be 32 bit size aligned */ |
118 | struct { | |
14f8b681 DW |
119 | uint16_t magic; |
120 | uint16_t pad1; | |
121 | uint32_t pad2; /* may as well make it 64 bits */ | |
5000d01d SL |
122 | } magic = { XLOG_UNMOUNT_TYPE, 0, 0 }; |
123 | ||
989b74bc | 124 | memset(p, 0, BBSIZE); |
ad9b88eb BF |
125 | /* dummy tid to mark this as written from userspace */ |
126 | op->oh_tid = cpu_to_be32(0xb0c0d0d0); | |
5e656dbb BN |
127 | op->oh_len = cpu_to_be32(sizeof(magic)); |
128 | op->oh_clientid = XFS_LOG; | |
129 | op->oh_flags = XLOG_UNMOUNT_TRANS; | |
130 | op->oh_res2 = 0; | |
989b74bc NS |
131 | |
132 | /* and the data for this op */ | |
1552a820 | 133 | memcpy((char *)p + sizeof(xlog_op_header_t), &magic, sizeof(magic)); |
989b74bc NS |
134 | } |
135 | ||
1c12a814 BF |
136 | static char *next( |
137 | char *ptr, | |
138 | int offset, | |
139 | void *private) | |
989b74bc | 140 | { |
1c12a814 | 141 | struct xfs_buf *buf = (struct xfs_buf *)private; |
989b74bc | 142 | |
1c12a814 | 143 | if (buf && |
135e4bfe | 144 | (buf->b_bcount < (int)(ptr - (char *)buf->b_addr) + offset)) |
989b74bc | 145 | abort(); |
1c12a814 | 146 | |
989b74bc NS |
147 | return ptr + offset; |
148 | } | |
149 | ||
2556c98b BN |
150 | /* |
151 | * Simple I/O (buffer cache) interface | |
152 | */ | |
153 | ||
154 | ||
155 | #ifdef XFS_BUF_TRACING | |
156 | ||
8288ea3d | 157 | #undef libxfs_buf_read_map |
2556c98b | 158 | #undef libxfs_writebuf |
f315ae4f | 159 | #undef libxfs_buf_get_map |
2556c98b | 160 | |
4c947857 DW |
161 | int libxfs_buf_read_map(struct xfs_buftarg *btp, |
162 | struct xfs_buf_map *maps, int nmaps, int flags, | |
163 | struct xfs_buf **bpp, | |
164 | const struct xfs_buf_ops *ops); | |
2556c98b | 165 | int libxfs_writebuf(xfs_buf_t *, int); |
583ca112 DW |
166 | int libxfs_buf_get_map(struct xfs_buftarg *btp, |
167 | struct xfs_buf_map *maps, int nmaps, int flags, | |
168 | struct xfs_buf **bpp); | |
e02ba985 | 169 | void libxfs_buf_relse(struct xfs_buf *bp); |
2556c98b | 170 | |
a2ceac1f DC |
171 | #define __add_trace(bp, func, file, line) \ |
172 | do { \ | |
173 | if (bp) { \ | |
174 | (bp)->b_func = (func); \ | |
175 | (bp)->b_file = (file); \ | |
176 | (bp)->b_line = (line); \ | |
177 | } \ | |
178 | } while (0) | |
179 | ||
31079e67 | 180 | int |
361379e0 DW |
181 | libxfs_trace_readbuf( |
182 | const char *func, | |
183 | const char *file, | |
184 | int line, | |
185 | struct xfs_buftarg *btp, | |
186 | xfs_daddr_t blkno, | |
187 | size_t len, | |
188 | int flags, | |
31079e67 DW |
189 | const struct xfs_buf_ops *ops, |
190 | struct xfs_buf **bpp) | |
2556c98b | 191 | { |
31079e67 | 192 | int error; |
361379e0 | 193 | DEFINE_SINGLE_BUF_MAP(map, blkno, numblks); |
2556c98b | 194 | |
31079e67 DW |
195 | error = libxfs_buf_read_map(btp, &map, 1, flags, bpp, ops); |
196 | __add_trace(*bpp, func, file, line); | |
197 | return error; | |
2556c98b BN |
198 | } |
199 | ||
4c947857 DW |
200 | int |
201 | libxfs_trace_readbuf_map( | |
202 | const char *func, | |
203 | const char *file, | |
204 | int line, | |
205 | struct xfs_buftarg *btp, | |
206 | struct xfs_buf_map *map, | |
207 | int nmaps, | |
208 | int flags, | |
209 | struct xfs_buf **bpp, | |
210 | const struct xfs_buf_ops *ops) | |
211 | { | |
212 | int error; | |
213 | ||
214 | error = libxfs_buf_read_map(btp, map, nmaps, flags, bpp, ops); | |
215 | __add_trace(*bpp, func, file, line); | |
216 | return error; | |
217 | } | |
218 | ||
18b4f688 DW |
219 | void |
220 | libxfs_trace_dirtybuf( | |
221 | const char *func, | |
222 | const char *file, | |
223 | int line, | |
f524ae04 | 224 | struct xfs_buf *bp) |
2556c98b | 225 | { |
a2ceac1f | 226 | __add_trace(bp, func, file, line); |
f524ae04 | 227 | libxfs_buf_mark_dirty(bp); |
2556c98b BN |
228 | } |
229 | ||
58a8b31f | 230 | int |
8b4de37c DW |
231 | libxfs_trace_getbuf( |
232 | const char *func, | |
233 | const char *file, | |
234 | int line, | |
235 | struct xfs_buftarg *btp, | |
236 | xfs_daddr_t blkno, | |
58a8b31f DW |
237 | size_t len, |
238 | struct xfs_buf **bpp) | |
2556c98b | 239 | { |
58a8b31f | 240 | int error; |
8b4de37c DW |
241 | DEFINE_SINGLE_BUF_MAP(map, blkno, numblks); |
242 | ||
58a8b31f | 243 | error = libxfs_buf_get_map(target, &map, 1, 0, bpp); |
a2ceac1f | 244 | __add_trace(bp, func, file, line); |
58a8b31f | 245 | return error; |
a2ceac1f | 246 | } |
2556c98b | 247 | |
583ca112 DW |
248 | int |
249 | libxfs_trace_getbuf_map( | |
250 | const char *func, | |
251 | const char *file, | |
252 | int line, | |
253 | struct xfs_buftarg *btp, | |
254 | struct xfs_buf_map *map, | |
255 | int nmaps, | |
256 | int flags, | |
257 | struct xfs_buf **bpp) | |
a2ceac1f | 258 | { |
583ca112 DW |
259 | int error; |
260 | ||
261 | error = libxfs_buf_get_map(btp, map, nmaps, flags, bpp); | |
262 | __add_trace(*bpp, func, file, line); | |
263 | return error; | |
2556c98b BN |
264 | } |
265 | ||
266 | void | |
267 | libxfs_trace_putbuf(const char *func, const char *file, int line, xfs_buf_t *bp) | |
268 | { | |
a2ceac1f | 269 | __add_trace(bp, func, file, line); |
e02ba985 | 270 | libxfs_buf_relse(bp); |
2556c98b BN |
271 | } |
272 | ||
273 | ||
274 | #endif | |
275 | ||
276 | ||
361379e0 DW |
277 | struct xfs_buf * |
278 | libxfs_getsb( | |
279 | struct xfs_mount *mp) | |
f1b058f9 | 280 | { |
31079e67 DW |
281 | struct xfs_buf *bp; |
282 | ||
283 | libxfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR, XFS_FSS_TO_BB(mp, 1), | |
284 | 0, &bp, &xfs_sb_buf_ops); | |
285 | return bp; | |
f1b058f9 NS |
286 | } |
287 | ||
5e656dbb | 288 | kmem_zone_t *xfs_buf_zone; |
69ec88b5 BN |
289 | |
290 | static struct cache_mru xfs_buf_freelist = | |
291 | {{&xfs_buf_freelist.cm_list, &xfs_buf_freelist.cm_list}, | |
292 | 0, PTHREAD_MUTEX_INITIALIZER }; | |
f1b058f9 | 293 | |
a2ceac1f DC |
294 | /* |
295 | * The bufkey is used to pass the new buffer information to the cache object | |
296 | * allocation routine. Because discontiguous buffers need to pass different | |
297 | * information, we need fields to pass that information. However, because the | |
298 | * blkno and bblen is needed for the initial cache entry lookup (i.e. for | |
299 | * bcompare) the fact that the map/nmaps is non-null to switch to discontiguous | |
300 | * buffer initialisation instead of a contiguous buffer. | |
301 | */ | |
302 | struct xfs_bufkey { | |
75c8b434 | 303 | struct xfs_buftarg *buftarg; |
a2ceac1f DC |
304 | xfs_daddr_t blkno; |
305 | unsigned int bblen; | |
306 | struct xfs_buf_map *map; | |
307 | int nmaps; | |
308 | }; | |
f1b058f9 | 309 | |
602dcc0e DC |
310 | /* 2^63 + 2^61 - 2^57 + 2^54 - 2^51 - 2^18 + 1 */ |
311 | #define GOLDEN_RATIO_PRIME 0x9e37fffffffc0001UL | |
312 | #define CACHE_LINE_SIZE 64 | |
f1b058f9 | 313 | static unsigned int |
602dcc0e | 314 | libxfs_bhash(cache_key_t key, unsigned int hashsize, unsigned int hashshift) |
f1b058f9 | 315 | { |
602dcc0e DC |
316 | uint64_t hashval = ((struct xfs_bufkey *)key)->blkno; |
317 | uint64_t tmp; | |
318 | ||
319 | tmp = hashval ^ (GOLDEN_RATIO_PRIME + hashval) / CACHE_LINE_SIZE; | |
320 | tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> hashshift); | |
321 | return tmp % hashsize; | |
f1b058f9 NS |
322 | } |
323 | ||
324 | static int | |
325 | libxfs_bcompare(struct cache_node *node, cache_key_t key) | |
326 | { | |
063516bb DW |
327 | struct xfs_buf *bp = container_of(node, struct xfs_buf, |
328 | b_node); | |
329 | struct xfs_bufkey *bkey = (struct xfs_bufkey *)key; | |
f1b058f9 | 330 | |
75c8b434 | 331 | if (bp->b_target->dev == bkey->buftarg->dev && |
ba9ecd40 DC |
332 | bp->b_bn == bkey->blkno) { |
333 | if (bp->b_bcount == BBTOB(bkey->bblen)) | |
334 | return CACHE_HIT; | |
335 | #ifdef IO_BCOMPARE_CHECK | |
336 | if (!(libxfs_bcache->c_flags & CACHE_MISCOMPARE_PURGE)) { | |
337 | fprintf(stderr, | |
338 | "%lx: Badness in key lookup (length)\n" | |
339 | "bp=(bno 0x%llx, len %u bytes) key=(bno 0x%llx, len %u bytes)\n", | |
340 | pthread_self(), | |
341 | (unsigned long long)bp->b_bn, (int)bp->b_bcount, | |
342 | (unsigned long long)bkey->blkno, | |
343 | BBTOB(bkey->bblen)); | |
344 | } | |
f1b058f9 | 345 | #endif |
ba9ecd40 DC |
346 | return CACHE_PURGE; |
347 | } | |
348 | return CACHE_MISS; | |
f1b058f9 NS |
349 | } |
350 | ||
e6b359b3 | 351 | static void |
75c8b434 DC |
352 | __initbuf(xfs_buf_t *bp, struct xfs_buftarg *btp, xfs_daddr_t bno, |
353 | unsigned int bytes) | |
e6b359b3 NS |
354 | { |
355 | bp->b_flags = 0; | |
5dfa5cd2 | 356 | bp->b_bn = bno; |
e6b359b3 | 357 | bp->b_bcount = bytes; |
a2ceac1f | 358 | bp->b_length = BTOBB(bytes); |
75c8b434 | 359 | bp->b_target = btp; |
7861ef77 | 360 | bp->b_mount = btp->bt_mount; |
a6a7776a | 361 | bp->b_error = 0; |
69ec88b5 BN |
362 | if (!bp->b_addr) |
363 | bp->b_addr = memalign(libxfs_device_alignment(), bytes); | |
e6b359b3 NS |
364 | if (!bp->b_addr) { |
365 | fprintf(stderr, | |
366 | _("%s: %s can't memalign %u bytes: %s\n"), | |
367 | progname, __FUNCTION__, bytes, | |
368 | strerror(errno)); | |
369 | exit(1); | |
370 | } | |
bf43fd28 | 371 | memset(bp->b_addr, 0, bytes); |
2556c98b BN |
372 | #ifdef XFS_BUF_TRACING |
373 | list_head_init(&bp->b_lock_list); | |
374 | #endif | |
375 | pthread_mutex_init(&bp->b_lock, NULL); | |
50722af1 CH |
376 | bp->b_holder = 0; |
377 | bp->b_recur = 0; | |
75c8b434 | 378 | bp->b_ops = NULL; |
2c6c6328 BF |
379 | |
380 | if (!bp->b_maps) { | |
381 | bp->b_nmaps = 1; | |
382 | bp->b_maps = &bp->__b_map; | |
383 | bp->b_maps[0].bm_bn = bp->b_bn; | |
384 | bp->b_maps[0].bm_len = bp->b_length; | |
385 | } | |
e6b359b3 NS |
386 | } |
387 | ||
a2ceac1f | 388 | static void |
75c8b434 DC |
389 | libxfs_initbuf(xfs_buf_t *bp, struct xfs_buftarg *btp, xfs_daddr_t bno, |
390 | unsigned int bytes) | |
a2ceac1f | 391 | { |
75c8b434 | 392 | __initbuf(bp, btp, bno, bytes); |
a2ceac1f DC |
393 | } |
394 | ||
395 | static void | |
75c8b434 DC |
396 | libxfs_initbuf_map(xfs_buf_t *bp, struct xfs_buftarg *btp, |
397 | struct xfs_buf_map *map, int nmaps) | |
a2ceac1f DC |
398 | { |
399 | unsigned int bytes = 0; | |
400 | int i; | |
401 | ||
402 | bytes = sizeof(struct xfs_buf_map) * nmaps; | |
85428dd2 DC |
403 | bp->b_maps = malloc(bytes); |
404 | if (!bp->b_maps) { | |
a2ceac1f DC |
405 | fprintf(stderr, |
406 | _("%s: %s can't malloc %u bytes: %s\n"), | |
407 | progname, __FUNCTION__, bytes, | |
408 | strerror(errno)); | |
409 | exit(1); | |
410 | } | |
411 | bp->b_nmaps = nmaps; | |
412 | ||
413 | bytes = 0; | |
414 | for ( i = 0; i < nmaps; i++) { | |
85428dd2 DC |
415 | bp->b_maps[i].bm_bn = map[i].bm_bn; |
416 | bp->b_maps[i].bm_len = map[i].bm_len; | |
a2ceac1f DC |
417 | bytes += BBTOB(map[i].bm_len); |
418 | } | |
419 | ||
75c8b434 | 420 | __initbuf(bp, btp, map[0].bm_bn, bytes); |
a2ceac1f DC |
421 | bp->b_flags |= LIBXFS_B_DISCONTIG; |
422 | } | |
423 | ||
00ff2b10 | 424 | static xfs_buf_t * |
a2ceac1f | 425 | __libxfs_getbufr(int blen) |
e6b359b3 NS |
426 | { |
427 | xfs_buf_t *bp; | |
69ec88b5 BN |
428 | |
429 | /* | |
430 | * first look for a buffer that can be used as-is, | |
431 | * if one cannot be found, see if there is a buffer, | |
ff1f79a7 | 432 | * and if so, free its buffer and set b_addr to NULL |
69ec88b5 BN |
433 | * before calling libxfs_initbuf. |
434 | */ | |
435 | pthread_mutex_lock(&xfs_buf_freelist.cm_mutex); | |
436 | if (!list_empty(&xfs_buf_freelist.cm_list)) { | |
437 | list_for_each_entry(bp, &xfs_buf_freelist.cm_list, b_node.cn_mru) { | |
438 | if (bp->b_bcount == blen) { | |
439 | list_del_init(&bp->b_node.cn_mru); | |
440 | break; | |
441 | } | |
442 | } | |
443 | if (&bp->b_node.cn_mru == &xfs_buf_freelist.cm_list) { | |
444 | bp = list_entry(xfs_buf_freelist.cm_list.next, | |
445 | xfs_buf_t, b_node.cn_mru); | |
446 | list_del_init(&bp->b_node.cn_mru); | |
447 | free(bp->b_addr); | |
448 | bp->b_addr = NULL; | |
2c6c6328 BF |
449 | if (bp->b_maps != &bp->__b_map) |
450 | free(bp->b_maps); | |
85428dd2 | 451 | bp->b_maps = NULL; |
69ec88b5 BN |
452 | } |
453 | } else | |
5e656dbb | 454 | bp = kmem_zone_zalloc(xfs_buf_zone, 0); |
69ec88b5 | 455 | pthread_mutex_unlock(&xfs_buf_freelist.cm_mutex); |
e0607266 | 456 | bp->b_ops = NULL; |
0a7942b3 | 457 | if (bp->b_flags & LIBXFS_B_DIRTY) |
2ab6ea6a | 458 | fprintf(stderr, "found dirty buffer (bulk) on free list!\n"); |
e6b359b3 | 459 | |
a2ceac1f DC |
460 | return bp; |
461 | } | |
462 | ||
1a12e432 | 463 | static xfs_buf_t * |
75c8b434 | 464 | libxfs_getbufr(struct xfs_buftarg *btp, xfs_daddr_t blkno, int bblen) |
a2ceac1f DC |
465 | { |
466 | xfs_buf_t *bp; | |
467 | int blen = BBTOB(bblen); | |
468 | ||
469 | bp =__libxfs_getbufr(blen); | |
470 | if (bp) | |
75c8b434 | 471 | libxfs_initbuf(bp, btp, blkno, blen); |
2556c98b | 472 | #ifdef IO_DEBUG |
a2ceac1f | 473 | printf("%lx: %s: allocated %u bytes buffer, key=0x%llx(0x%llx), %p\n", |
f63fd268 | 474 | pthread_self(), __FUNCTION__, blen, |
2556c98b BN |
475 | (long long)LIBXFS_BBTOOFF64(blkno), (long long)blkno, bp); |
476 | #endif | |
69ec88b5 | 477 | |
e6b359b3 NS |
478 | return bp; |
479 | } | |
480 | ||
00ff2b10 | 481 | static xfs_buf_t * |
75c8b434 | 482 | libxfs_getbufr_map(struct xfs_buftarg *btp, xfs_daddr_t blkno, int bblen, |
a2ceac1f DC |
483 | struct xfs_buf_map *map, int nmaps) |
484 | { | |
485 | xfs_buf_t *bp; | |
486 | int blen = BBTOB(bblen); | |
487 | ||
488 | if (!map || !nmaps) { | |
489 | fprintf(stderr, | |
490 | _("%s: %s invalid map %p or nmaps %d\n"), | |
491 | progname, __FUNCTION__, map, nmaps); | |
492 | exit(1); | |
493 | } | |
494 | ||
495 | if (blkno != map[0].bm_bn) { | |
496 | fprintf(stderr, | |
b47c8cae NS |
497 | _("%s: %s map blkno 0x%llx doesn't match key 0x%llx\n"), |
498 | progname, __FUNCTION__, (long long)map[0].bm_bn, | |
499 | (long long)blkno); | |
a2ceac1f DC |
500 | exit(1); |
501 | } | |
502 | ||
503 | bp =__libxfs_getbufr(blen); | |
504 | if (bp) | |
75c8b434 | 505 | libxfs_initbuf_map(bp, btp, map, nmaps); |
a2ceac1f DC |
506 | #ifdef IO_DEBUG |
507 | printf("%lx: %s: allocated %u bytes buffer, key=0x%llx(0x%llx), %p\n", | |
508 | pthread_self(), __FUNCTION__, blen, | |
509 | (long long)LIBXFS_BBTOOFF64(blkno), (long long)blkno, bp); | |
510 | #endif | |
511 | ||
512 | return bp; | |
513 | } | |
2556c98b BN |
514 | |
515 | #ifdef XFS_BUF_TRACING | |
516 | struct list_head lock_buf_list = {&lock_buf_list, &lock_buf_list}; | |
517 | int lock_buf_count = 0; | |
518 | #endif | |
e6b359b3 | 519 | |
b3b1affe DW |
520 | static int |
521 | __cache_lookup( | |
522 | struct xfs_bufkey *key, | |
523 | unsigned int flags, | |
524 | struct xfs_buf **bpp) | |
2bd0ea18 | 525 | { |
b3b1affe DW |
526 | struct cache_node *cn = NULL; |
527 | struct xfs_buf *bp; | |
2556c98b | 528 | |
b3b1affe DW |
529 | *bpp = NULL; |
530 | ||
531 | cache_node_get(libxfs_bcache, key, &cn); | |
532 | if (!cn) | |
533 | return -ENOMEM; | |
534 | bp = container_of(cn, struct xfs_buf, b_node); | |
2ae22647 CH |
535 | |
536 | if (use_xfs_buf_lock) { | |
b3b1affe | 537 | int ret; |
50722af1 CH |
538 | |
539 | ret = pthread_mutex_trylock(&bp->b_lock); | |
540 | if (ret) { | |
541 | ASSERT(ret == EAGAIN); | |
b3b1affe DW |
542 | if (flags & LIBXFS_GETBUF_TRYLOCK) { |
543 | cache_node_put(libxfs_bcache, cn); | |
544 | return -EAGAIN; | |
545 | } | |
50722af1 CH |
546 | |
547 | if (pthread_equal(bp->b_holder, pthread_self())) { | |
548 | fprintf(stderr, | |
549 | _("Warning: recursive buffer locking at block %" PRIu64 " detected\n"), | |
a2ceac1f | 550 | key->blkno); |
50722af1 | 551 | bp->b_recur++; |
b3b1affe DW |
552 | *bpp = bp; |
553 | return 0; | |
50722af1 CH |
554 | } else { |
555 | pthread_mutex_lock(&bp->b_lock); | |
2ae22647 | 556 | } |
2ae22647 | 557 | } |
50722af1 CH |
558 | |
559 | bp->b_holder = pthread_self(); | |
2ae22647 CH |
560 | } |
561 | ||
b3b1affe DW |
562 | cache_node_set_priority(libxfs_bcache, cn, |
563 | cache_node_get_priority(cn) - CACHE_PREFETCH_PRIORITY); | |
2556c98b | 564 | #ifdef XFS_BUF_TRACING |
2ae22647 CH |
565 | pthread_mutex_lock(&libxfs_bcache->c_mutex); |
566 | lock_buf_count++; | |
567 | list_add(&bp->b_lock_list, &lock_buf_list); | |
568 | pthread_mutex_unlock(&libxfs_bcache->c_mutex); | |
2556c98b | 569 | #endif |
2bd0ea18 | 570 | #ifdef IO_DEBUG |
a2ceac1f DC |
571 | printf("%lx %s: hit buffer %p for bno = 0x%llx/0x%llx\n", |
572 | pthread_self(), __FUNCTION__, | |
573 | bp, bp->b_bn, (long long)LIBXFS_BBTOOFF64(key->blkno)); | |
2bd0ea18 | 574 | #endif |
2556c98b | 575 | |
b3b1affe DW |
576 | *bpp = bp; |
577 | return 0; | |
f1b058f9 NS |
578 | } |
579 | ||
a5ab418c | 580 | static int |
b45650ab DW |
581 | libxfs_getbuf_flags( |
582 | struct xfs_buftarg *btp, | |
583 | xfs_daddr_t blkno, | |
584 | int len, | |
a5ab418c DW |
585 | unsigned int flags, |
586 | struct xfs_buf **bpp) | |
a2ceac1f | 587 | { |
b3b1affe | 588 | struct xfs_bufkey key = {NULL}; |
a2ceac1f | 589 | |
75c8b434 | 590 | key.buftarg = btp; |
a2ceac1f DC |
591 | key.blkno = blkno; |
592 | key.bblen = len; | |
593 | ||
a5ab418c | 594 | return __cache_lookup(&key, flags, bpp); |
a2ceac1f DC |
595 | } |
596 | ||
e8ecd760 DW |
597 | /* |
598 | * Clean the buffer flags for libxfs_getbuf*(), which wants to return | |
599 | * an unused buffer with clean state. This prevents CRC errors on a | |
600 | * re-read of a corrupt block that was prefetched and freed. This | |
601 | * can happen with a massively corrupt directory that is discarded, | |
602 | * but whose blocks are then recycled into expanding lost+found. | |
603 | * | |
604 | * Note however that if the buffer's dirty (prefetch calls getbuf) | |
605 | * we'll leave the state alone because we don't want to discard blocks | |
606 | * that have been fixed. | |
607 | */ | |
608 | static void | |
609 | reset_buf_state( | |
610 | struct xfs_buf *bp) | |
611 | { | |
612 | if (bp && !(bp->b_flags & LIBXFS_B_DIRTY)) | |
613 | bp->b_flags &= ~(LIBXFS_B_UNCHECKED | LIBXFS_B_STALE | | |
614 | LIBXFS_B_UPTODATE); | |
615 | } | |
616 | ||
583ca112 | 617 | static int |
b3b1affe DW |
618 | __libxfs_buf_get_map( |
619 | struct xfs_buftarg *btp, | |
620 | struct xfs_buf_map *map, | |
621 | int nmaps, | |
583ca112 DW |
622 | int flags, |
623 | struct xfs_buf **bpp) | |
a2ceac1f | 624 | { |
b3b1affe | 625 | struct xfs_bufkey key = {NULL}; |
b3b1affe | 626 | int i; |
a2ceac1f | 627 | |
583ca112 DW |
628 | if (nmaps == 1) |
629 | return libxfs_getbuf_flags(btp, map[0].bm_bn, map[0].bm_len, | |
630 | flags, bpp); | |
f388124d | 631 | |
75c8b434 | 632 | key.buftarg = btp; |
a2ceac1f DC |
633 | key.blkno = map[0].bm_bn; |
634 | for (i = 0; i < nmaps; i++) { | |
635 | key.bblen += map[i].bm_len; | |
636 | } | |
637 | key.map = map; | |
638 | key.nmaps = nmaps; | |
639 | ||
583ca112 | 640 | return __cache_lookup(&key, flags, bpp); |
a2ceac1f DC |
641 | } |
642 | ||
583ca112 DW |
643 | int |
644 | libxfs_buf_get_map( | |
645 | struct xfs_buftarg *btp, | |
646 | struct xfs_buf_map *map, | |
647 | int nmaps, | |
648 | int flags, | |
649 | struct xfs_buf **bpp) | |
e8ecd760 | 650 | { |
583ca112 | 651 | int error; |
e8ecd760 | 652 | |
583ca112 DW |
653 | error = __libxfs_buf_get_map(btp, map, nmaps, flags, bpp); |
654 | if (error) | |
655 | return error; | |
656 | ||
657 | reset_buf_state(*bpp); | |
658 | return 0; | |
e8ecd760 DW |
659 | } |
660 | ||
f1b058f9 | 661 | void |
e02ba985 DW |
662 | libxfs_buf_relse( |
663 | struct xfs_buf *bp) | |
f1b058f9 | 664 | { |
cee99cfa DC |
665 | /* |
666 | * ensure that any errors on this use of the buffer don't carry | |
667 | * over to the next user. | |
668 | */ | |
669 | bp->b_error = 0; | |
670 | ||
2556c98b BN |
671 | #ifdef XFS_BUF_TRACING |
672 | pthread_mutex_lock(&libxfs_bcache->c_mutex); | |
673 | lock_buf_count--; | |
674 | ASSERT(lock_buf_count >= 0); | |
675 | list_del_init(&bp->b_lock_list); | |
676 | pthread_mutex_unlock(&libxfs_bcache->c_mutex); | |
677 | #endif | |
50722af1 CH |
678 | if (use_xfs_buf_lock) { |
679 | if (bp->b_recur) { | |
680 | bp->b_recur--; | |
681 | } else { | |
682 | bp->b_holder = 0; | |
683 | pthread_mutex_unlock(&bp->b_lock); | |
684 | } | |
685 | } | |
6af7c1ea | 686 | |
cad15696 | 687 | if (!list_empty(&bp->b_node.cn_hash)) |
e7e49100 | 688 | cache_node_put(libxfs_bcache, &bp->b_node); |
1a12e432 DW |
689 | else if (--bp->b_node.cn_count == 0) { |
690 | if (bp->b_flags & LIBXFS_B_DIRTY) | |
691 | libxfs_bwrite(bp); | |
692 | libxfs_brelse(&bp->b_node); | |
693 | } | |
f1b058f9 NS |
694 | } |
695 | ||
f1b058f9 | 696 | static struct cache_node * |
e7e49100 DW |
697 | libxfs_balloc( |
698 | cache_key_t key) | |
f1b058f9 | 699 | { |
e7e49100 DW |
700 | struct xfs_bufkey *bufkey = (struct xfs_bufkey *)key; |
701 | struct xfs_buf *bp; | |
2556c98b | 702 | |
a2ceac1f | 703 | if (bufkey->map) |
e7e49100 DW |
704 | bp = libxfs_getbufr_map(bufkey->buftarg, bufkey->blkno, |
705 | bufkey->bblen, bufkey->map, bufkey->nmaps); | |
706 | else | |
707 | bp = libxfs_getbufr(bufkey->buftarg, bufkey->blkno, | |
708 | bufkey->bblen); | |
709 | return &bp->b_node; | |
2bd0ea18 NS |
710 | } |
711 | ||
a2ceac1f DC |
712 | |
713 | static int | |
714 | __read_buf(int fd, void *buf, int len, off64_t offset, int flags) | |
2bd0ea18 | 715 | { |
bcea58c7 | 716 | int sts; |
2bd0ea18 | 717 | |
2f9a125c | 718 | sts = pread(fd, buf, len, offset); |
bcea58c7 | 719 | if (sts < 0) { |
11202ec2 | 720 | int error = errno; |
9440d84d | 721 | fprintf(stderr, _("%s: read failed: %s\n"), |
c3928e39 | 722 | progname, strerror(error)); |
11202ec2 | 723 | return -error; |
a2ceac1f | 724 | } else if (sts != len) { |
bcea58c7 | 725 | fprintf(stderr, _("%s: error - read only %d of %d bytes\n"), |
a2ceac1f | 726 | progname, sts, len); |
12b53197 | 727 | return -EIO; |
2bd0ea18 | 728 | } |
a2ceac1f DC |
729 | return 0; |
730 | } | |
731 | ||
732 | int | |
75c8b434 DC |
733 | libxfs_readbufr(struct xfs_buftarg *btp, xfs_daddr_t blkno, xfs_buf_t *bp, |
734 | int len, int flags) | |
a2ceac1f | 735 | { |
75c8b434 | 736 | int fd = libxfs_device_to_fd(btp->dev); |
a2ceac1f DC |
737 | int bytes = BBTOB(len); |
738 | int error; | |
739 | ||
740 | ASSERT(BBTOB(len) <= bp->b_bcount); | |
741 | ||
742 | error = __read_buf(fd, bp->b_addr, bytes, LIBXFS_BBTOOFF64(blkno), flags); | |
743 | if (!error && | |
75c8b434 | 744 | bp->b_target->dev == btp->dev && |
5dfa5cd2 | 745 | bp->b_bn == blkno && |
f1b058f9 NS |
746 | bp->b_bcount == bytes) |
747 | bp->b_flags |= LIBXFS_B_UPTODATE; | |
a2ceac1f DC |
748 | #ifdef IO_DEBUG |
749 | printf("%lx: %s: read %u bytes, error %d, blkno=0x%llx(0x%llx), %p\n", | |
750 | pthread_self(), __FUNCTION__, bytes, error, | |
751 | (long long)LIBXFS_BBTOOFF64(blkno), (long long)blkno, bp); | |
752 | #endif | |
e214b18a | 753 | bp->b_error = error; |
a2ceac1f | 754 | return error; |
2bd0ea18 NS |
755 | } |
756 | ||
456371d8 DW |
757 | int |
758 | libxfs_readbuf_verify( | |
759 | struct xfs_buf *bp, | |
760 | const struct xfs_buf_ops *ops) | |
adbb3573 DC |
761 | { |
762 | if (!ops) | |
456371d8 DW |
763 | return bp->b_error; |
764 | ||
adbb3573 DC |
765 | bp->b_ops = ops; |
766 | bp->b_ops->verify_read(bp); | |
767 | bp->b_flags &= ~LIBXFS_B_UNCHECKED; | |
456371d8 | 768 | return bp->b_error; |
adbb3573 DC |
769 | } |
770 | ||
800db1c1 | 771 | int |
6d5e5ee0 | 772 | libxfs_readbufr_map(struct xfs_buftarg *btp, struct xfs_buf *bp, int flags) |
a2ceac1f | 773 | { |
d0bbcbcb | 774 | int fd; |
800db1c1 | 775 | int error = 0; |
04338619 | 776 | void *buf; |
800db1c1 | 777 | int i; |
75c8b434 | 778 | |
75c8b434 | 779 | fd = libxfs_device_to_fd(btp->dev); |
a2ceac1f DC |
780 | buf = bp->b_addr; |
781 | for (i = 0; i < bp->b_nmaps; i++) { | |
85428dd2 DC |
782 | off64_t offset = LIBXFS_BBTOOFF64(bp->b_maps[i].bm_bn); |
783 | int len = BBTOB(bp->b_maps[i].bm_len); | |
a2ceac1f | 784 | |
a2ceac1f DC |
785 | error = __read_buf(fd, buf, len, offset, flags); |
786 | if (error) { | |
787 | bp->b_error = error; | |
788 | break; | |
789 | } | |
790 | buf += len; | |
a2ceac1f DC |
791 | } |
792 | ||
64eb960f | 793 | if (!error) |
800db1c1 DC |
794 | bp->b_flags |= LIBXFS_B_UPTODATE; |
795 | #ifdef IO_DEBUG | |
15028317 DW |
796 | printf("%lx: %s: read %lu bytes, error %d, blkno=%llu(%llu), %p\n", |
797 | pthread_self(), __FUNCTION__, buf - (char *)bp->b_addr, error, | |
798 | (long long)LIBXFS_BBTOOFF64(bp->b_bn), (long long)bp->b_bn, bp); | |
800db1c1 DC |
799 | #endif |
800 | return error; | |
801 | } | |
802 | ||
4c947857 DW |
803 | int |
804 | libxfs_buf_read_map( | |
805 | struct xfs_buftarg *btp, | |
806 | struct xfs_buf_map *map, | |
807 | int nmaps, | |
808 | int flags, | |
809 | struct xfs_buf **bpp, | |
810 | const struct xfs_buf_ops *ops) | |
800db1c1 | 811 | { |
4c947857 DW |
812 | struct xfs_buf *bp; |
813 | bool salvage = flags & LIBXFS_READBUF_SALVAGE; | |
814 | int error = 0; | |
800db1c1 | 815 | |
4c947857 | 816 | *bpp = NULL; |
800db1c1 | 817 | if (nmaps == 1) |
e5008359 DW |
818 | error = libxfs_getbuf_flags(btp, map[0].bm_bn, map[0].bm_len, |
819 | 0, &bp); | |
820 | else | |
821 | error = __libxfs_buf_get_map(btp, map, nmaps, 0, &bp); | |
583ca112 | 822 | if (error) |
4c947857 | 823 | return error; |
800db1c1 | 824 | |
e5008359 DW |
825 | /* |
826 | * If the buffer was prefetched, it is likely that it was not validated. | |
827 | * Hence if we are supplied an ops function and the buffer is marked as | |
828 | * unchecked, we need to validate it now. | |
829 | * | |
830 | * We do this verification even if the buffer is dirty - the | |
831 | * verification is almost certainly going to fail the CRC check in this | |
832 | * case as a dirty buffer has not had the CRC recalculated. However, we | |
833 | * should not be dirtying unchecked buffers and therefore failing it | |
834 | * here because it's dirty and unchecked indicates we've screwed up | |
835 | * somewhere else. | |
4c947857 DW |
836 | * |
837 | * Note that if the caller passes in LIBXFS_READBUF_SALVAGE, that means | |
838 | * they want the buffer even if it fails verification. | |
e5008359 | 839 | */ |
800db1c1 | 840 | bp->b_error = 0; |
e5008359 | 841 | if (bp->b_flags & (LIBXFS_B_UPTODATE | LIBXFS_B_DIRTY)) { |
adbb3573 | 842 | if (bp->b_flags & LIBXFS_B_UNCHECKED) |
4c947857 DW |
843 | error = libxfs_readbuf_verify(bp, ops); |
844 | if (error && !salvage) | |
845 | goto err; | |
846 | goto ok; | |
75c8b434 | 847 | } |
e5008359 DW |
848 | |
849 | /* | |
850 | * Set the ops on a cache miss (i.e. first physical read) as the | |
851 | * verifier may change the ops to match the type of buffer it contains. | |
852 | * A cache hit might reset the verifier to the original type if we set | |
853 | * it again, but it won't get called again and set to match the buffer | |
854 | * contents. *cough* xfs_da_node_buf_ops *cough*. | |
855 | */ | |
856 | if (nmaps == 1) | |
857 | error = libxfs_readbufr(btp, map[0].bm_bn, bp, map[0].bm_len, | |
858 | flags); | |
859 | else | |
860 | error = libxfs_readbufr_map(btp, bp, flags); | |
4c947857 DW |
861 | if (error) |
862 | goto err; | |
adbb3573 | 863 | |
4c947857 DW |
864 | error = libxfs_readbuf_verify(bp, ops); |
865 | if (error && !salvage) | |
866 | goto err; | |
867 | ||
868 | ok: | |
15028317 | 869 | #ifdef IO_DEBUGX |
a2ceac1f DC |
870 | printf("%lx: %s: read %lu bytes, error %d, blkno=%llu(%llu), %p\n", |
871 | pthread_self(), __FUNCTION__, buf - (char *)bp->b_addr, error, | |
5dfa5cd2 | 872 | (long long)LIBXFS_BBTOOFF64(bp->b_bn), (long long)bp->b_bn, bp); |
a2ceac1f | 873 | #endif |
4c947857 DW |
874 | *bpp = bp; |
875 | return 0; | |
876 | err: | |
877 | libxfs_buf_relse(bp); | |
878 | return error; | |
a2ceac1f DC |
879 | } |
880 | ||
3f8a028e DW |
881 | /* Allocate a raw uncached buffer. */ |
882 | static inline struct xfs_buf * | |
883 | libxfs_getbufr_uncached( | |
884 | struct xfs_buftarg *targ, | |
885 | xfs_daddr_t daddr, | |
886 | size_t bblen) | |
887 | { | |
888 | struct xfs_buf *bp; | |
889 | ||
890 | bp = libxfs_getbufr(targ, daddr, bblen); | |
891 | if (!bp) | |
892 | return NULL; | |
893 | ||
894 | INIT_LIST_HEAD(&bp->b_node.cn_hash); | |
895 | bp->b_node.cn_count = 1; | |
896 | return bp; | |
897 | } | |
898 | ||
899 | /* | |
900 | * Allocate an uncached buffer that points nowhere. The refcount will be 1, | |
901 | * and the cache node hash list will be empty to indicate that it's uncached. | |
902 | */ | |
d918bc57 | 903 | int |
3f8a028e DW |
904 | libxfs_buf_get_uncached( |
905 | struct xfs_buftarg *targ, | |
906 | size_t bblen, | |
d918bc57 DW |
907 | int flags, |
908 | struct xfs_buf **bpp) | |
3f8a028e | 909 | { |
d918bc57 DW |
910 | *bpp = libxfs_getbufr_uncached(targ, XFS_BUF_DADDR_NULL, bblen); |
911 | return *bpp != NULL ? 0 : -ENOMEM; | |
3f8a028e DW |
912 | } |
913 | ||
914 | /* | |
915 | * Allocate and read an uncached buffer. The refcount will be 1, and the cache | |
916 | * node hash list will be empty to indicate that it's uncached. | |
917 | */ | |
918 | int | |
919 | libxfs_buf_read_uncached( | |
920 | struct xfs_buftarg *targ, | |
921 | xfs_daddr_t daddr, | |
922 | size_t bblen, | |
923 | int flags, | |
924 | struct xfs_buf **bpp, | |
925 | const struct xfs_buf_ops *ops) | |
926 | { | |
927 | struct xfs_buf *bp; | |
928 | int error; | |
929 | ||
930 | *bpp = NULL; | |
931 | bp = libxfs_getbufr_uncached(targ, daddr, bblen); | |
932 | if (!bp) | |
933 | return -ENOMEM; | |
934 | ||
935 | error = libxfs_readbufr(targ, daddr, bp, bblen, flags); | |
936 | if (error) | |
937 | goto err; | |
938 | ||
939 | error = libxfs_readbuf_verify(bp, ops); | |
940 | if (error) | |
941 | goto err; | |
942 | ||
943 | *bpp = bp; | |
944 | return 0; | |
945 | err: | |
946 | libxfs_buf_relse(bp); | |
947 | return error; | |
948 | } | |
949 | ||
a2ceac1f DC |
950 | static int |
951 | __write_buf(int fd, void *buf, int len, off64_t offset, int flags) | |
2bd0ea18 NS |
952 | { |
953 | int sts; | |
2bd0ea18 | 954 | |
2f9a125c | 955 | sts = pwrite(fd, buf, len, offset); |
2bd0ea18 | 956 | if (sts < 0) { |
11202ec2 | 957 | int error = errno; |
2f9a125c | 958 | fprintf(stderr, _("%s: pwrite failed: %s\n"), |
c3928e39 | 959 | progname, strerror(error)); |
11202ec2 | 960 | return -error; |
a2ceac1f | 961 | } else if (sts != len) { |
2f9a125c | 962 | fprintf(stderr, _("%s: error - pwrite only %d of %d bytes\n"), |
a2ceac1f | 963 | progname, sts, len); |
12b53197 | 964 | return -EIO; |
2bd0ea18 | 965 | } |
a2ceac1f DC |
966 | return 0; |
967 | } | |
968 | ||
969 | int | |
331d5956 DW |
970 | libxfs_bwrite( |
971 | struct xfs_buf *bp) | |
a2ceac1f | 972 | { |
331d5956 | 973 | int fd = libxfs_device_to_fd(bp->b_target->dev); |
a2ceac1f | 974 | |
75c8b434 DC |
975 | /* |
976 | * we never write buffers that are marked stale. This indicates they | |
977 | * contain data that has been invalidated, and even if the buffer is | |
978 | * dirty it must *never* be written. Verifiers are wonderful for finding | |
979 | * bugs like this. Make sure the error is obvious as to the cause. | |
980 | */ | |
981 | if (bp->b_flags & LIBXFS_B_STALE) { | |
12b53197 | 982 | bp->b_error = -ESTALE; |
75c8b434 DC |
983 | return bp->b_error; |
984 | } | |
985 | ||
986 | /* | |
987 | * clear any pre-existing error status on the buffer. This can occur if | |
988 | * the buffer is corrupt on disk and the repair process doesn't clear | |
989 | * the error before fixing and writing it back. | |
990 | */ | |
991 | bp->b_error = 0; | |
992 | if (bp->b_ops) { | |
993 | bp->b_ops->verify_write(bp); | |
994 | if (bp->b_error) { | |
995 | fprintf(stderr, | |
eb20c4ca | 996 | _("%s: write verifier failed on %s bno 0x%llx/0x%x\n"), |
a3fac935 ES |
997 | __func__, bp->b_ops->name, |
998 | (long long)bp->b_bn, bp->b_bcount); | |
75c8b434 DC |
999 | return bp->b_error; |
1000 | } | |
1001 | } | |
1002 | ||
a2ceac1f | 1003 | if (!(bp->b_flags & LIBXFS_B_DISCONTIG)) { |
e8f1e8aa | 1004 | bp->b_error = __write_buf(fd, bp->b_addr, bp->b_bcount, |
5dfa5cd2 | 1005 | LIBXFS_BBTOOFF64(bp->b_bn), bp->b_flags); |
a2ceac1f DC |
1006 | } else { |
1007 | int i; | |
04338619 | 1008 | void *buf = bp->b_addr; |
a2ceac1f DC |
1009 | |
1010 | for (i = 0; i < bp->b_nmaps; i++) { | |
85428dd2 DC |
1011 | off64_t offset = LIBXFS_BBTOOFF64(bp->b_maps[i].bm_bn); |
1012 | int len = BBTOB(bp->b_maps[i].bm_len); | |
a2ceac1f | 1013 | |
e8f1e8aa DC |
1014 | bp->b_error = __write_buf(fd, buf, len, offset, |
1015 | bp->b_flags); | |
1016 | if (bp->b_error) | |
a2ceac1f | 1017 | break; |
a2ceac1f | 1018 | buf += len; |
a2ceac1f DC |
1019 | } |
1020 | } | |
1021 | ||
f1b058f9 | 1022 | #ifdef IO_DEBUG |
d085fb48 | 1023 | printf("%lx: %s: wrote %u bytes, blkno=%llu(%llu), %p, error %d\n", |
2556c98b | 1024 | pthread_self(), __FUNCTION__, bp->b_bcount, |
5dfa5cd2 | 1025 | (long long)LIBXFS_BBTOOFF64(bp->b_bn), |
e8f1e8aa | 1026 | (long long)bp->b_bn, bp, bp->b_error); |
f1b058f9 | 1027 | #endif |
9a54569a DW |
1028 | if (bp->b_error) { |
1029 | fprintf(stderr, | |
1030 | _("%s: write failed on %s bno 0x%llx/0x%x, err=%d\n"), | |
0a82d75e | 1031 | __func__, bp->b_ops ? bp->b_ops->name : "(unknown)", |
9a54569a DW |
1032 | (long long)bp->b_bn, bp->b_bcount, -bp->b_error); |
1033 | } else { | |
a2ceac1f | 1034 | bp->b_flags |= LIBXFS_B_UPTODATE; |
b98336dd | 1035 | bp->b_flags &= ~(LIBXFS_B_DIRTY | LIBXFS_B_UNCHECKED); |
a2ceac1f | 1036 | } |
e8f1e8aa | 1037 | return bp->b_error; |
2bd0ea18 NS |
1038 | } |
1039 | ||
18b4f688 DW |
1040 | /* |
1041 | * Mark a buffer dirty. The dirty data will be written out when the cache | |
1042 | * is flushed (or at release time if the buffer is uncached). | |
1043 | */ | |
1044 | void | |
1045 | libxfs_buf_mark_dirty( | |
f524ae04 | 1046 | struct xfs_buf *bp) |
f1b058f9 | 1047 | { |
e0607266 DC |
1048 | #ifdef IO_DEBUG |
1049 | printf("%lx: %s: dirty blkno=%llu(%llu)\n", | |
1050 | pthread_self(), __FUNCTION__, | |
1051 | (long long)LIBXFS_BBTOOFF64(bp->b_bn), | |
1052 | (long long)bp->b_bn); | |
1053 | #endif | |
203d38cc DC |
1054 | /* |
1055 | * Clear any error hanging over from reading the buffer. This prevents | |
1056 | * subsequent reads after this write from seeing stale errors. | |
1057 | */ | |
1058 | bp->b_error = 0; | |
6af7c1ea | 1059 | bp->b_flags &= ~LIBXFS_B_STALE; |
f524ae04 | 1060 | bp->b_flags |= LIBXFS_B_DIRTY; |
2bd0ea18 NS |
1061 | } |
1062 | ||
c335b673 DW |
1063 | /* Complain about (and remember) dropping dirty buffers. */ |
1064 | static void | |
1065 | libxfs_whine_dirty_buf( | |
1066 | struct xfs_buf *bp) | |
1067 | { | |
1068 | fprintf(stderr, _("%s: Releasing dirty buffer to free list!\n"), | |
1069 | progname); | |
1070 | ||
1071 | if (bp->b_error == -EFSCORRUPTED) | |
1072 | bp->b_target->flags |= XFS_BUFTARG_CORRUPT_WRITE; | |
1073 | bp->b_target->flags |= XFS_BUFTARG_LOST_WRITE; | |
1074 | } | |
1075 | ||
33165ec3 | 1076 | static void |
0a7942b3 DC |
1077 | libxfs_brelse( |
1078 | struct cache_node *node) | |
33165ec3 | 1079 | { |
063516bb DW |
1080 | struct xfs_buf *bp = container_of(node, struct xfs_buf, |
1081 | b_node); | |
33165ec3 | 1082 | |
0a7942b3 DC |
1083 | if (!bp) |
1084 | return; | |
1085 | if (bp->b_flags & LIBXFS_B_DIRTY) | |
c335b673 | 1086 | libxfs_whine_dirty_buf(bp); |
0a7942b3 DC |
1087 | |
1088 | pthread_mutex_lock(&xfs_buf_freelist.cm_mutex); | |
1089 | list_add(&bp->b_node.cn_mru, &xfs_buf_freelist.cm_list); | |
1090 | pthread_mutex_unlock(&xfs_buf_freelist.cm_mutex); | |
33165ec3 BN |
1091 | } |
1092 | ||
e08f5594 | 1093 | static unsigned int |
69ec88b5 | 1094 | libxfs_bulkrelse( |
0a7942b3 DC |
1095 | struct cache *cache, |
1096 | struct list_head *list) | |
2556c98b | 1097 | { |
69ec88b5 | 1098 | xfs_buf_t *bp; |
e08f5594 | 1099 | int count = 0; |
2556c98b | 1100 | |
69ec88b5 | 1101 | if (list_empty(list)) |
e08f5594 | 1102 | return 0 ; |
69ec88b5 BN |
1103 | |
1104 | list_for_each_entry(bp, list, b_node.cn_mru) { | |
2556c98b | 1105 | if (bp->b_flags & LIBXFS_B_DIRTY) |
c335b673 | 1106 | libxfs_whine_dirty_buf(bp); |
e08f5594 | 1107 | count++; |
2556c98b | 1108 | } |
69ec88b5 BN |
1109 | |
1110 | pthread_mutex_lock(&xfs_buf_freelist.cm_mutex); | |
0b90dda6 | 1111 | list_splice(list, &xfs_buf_freelist.cm_list); |
69ec88b5 | 1112 | pthread_mutex_unlock(&xfs_buf_freelist.cm_mutex); |
e08f5594 BN |
1113 | |
1114 | return count; | |
69ec88b5 BN |
1115 | } |
1116 | ||
864028ed ES |
1117 | /* |
1118 | * Free everything from the xfs_buf_freelist MRU, used at final teardown | |
1119 | */ | |
1120 | void | |
1121 | libxfs_bcache_free(void) | |
1122 | { | |
1123 | struct list_head *cm_list; | |
1124 | xfs_buf_t *bp, *next; | |
1125 | ||
1126 | cm_list = &xfs_buf_freelist.cm_list; | |
1127 | list_for_each_entry_safe(bp, next, cm_list, b_node.cn_mru) { | |
1128 | free(bp->b_addr); | |
1129 | if (bp->b_maps != &bp->__b_map) | |
1130 | free(bp->b_maps); | |
cef0cc3b | 1131 | kmem_cache_free(xfs_buf_zone, bp); |
864028ed ES |
1132 | } |
1133 | } | |
1134 | ||
e8f1e8aa DC |
1135 | /* |
1136 | * When a buffer is marked dirty, the error is cleared. Hence if we are trying | |
1137 | * to flush a buffer prior to cache reclaim that has an error on it it means | |
1138 | * we've already tried to flush it and it failed. Prevent repeated corruption | |
1139 | * errors from being reported by skipping such buffers - when the corruption is | |
1140 | * fixed the buffer will be marked dirty again and we can write it again. | |
1141 | */ | |
0a7942b3 DC |
1142 | static int |
1143 | libxfs_bflush( | |
1144 | struct cache_node *node) | |
69ec88b5 | 1145 | { |
063516bb DW |
1146 | struct xfs_buf *bp = container_of(node, struct xfs_buf, |
1147 | b_node); | |
69ec88b5 | 1148 | |
e8f1e8aa | 1149 | if (!bp->b_error && bp->b_flags & LIBXFS_B_DIRTY) |
331d5956 | 1150 | return libxfs_bwrite(bp); |
e8f1e8aa | 1151 | return bp->b_error; |
2556c98b BN |
1152 | } |
1153 | ||
f1b058f9 NS |
1154 | void |
1155 | libxfs_bcache_purge(void) | |
1156 | { | |
1157 | cache_purge(libxfs_bcache); | |
1158 | } | |
1159 | ||
e8cb94ee | 1160 | void |
33165ec3 BN |
1161 | libxfs_bcache_flush(void) |
1162 | { | |
1163 | cache_flush(libxfs_bcache); | |
1164 | } | |
1165 | ||
2556c98b BN |
1166 | int |
1167 | libxfs_bcache_overflowed(void) | |
1168 | { | |
1169 | return cache_overflowed(libxfs_bcache); | |
1170 | } | |
1171 | ||
f1b058f9 | 1172 | struct cache_operations libxfs_bcache_operations = { |
bd9cc49a ES |
1173 | .hash = libxfs_bhash, |
1174 | .alloc = libxfs_balloc, | |
1175 | .flush = libxfs_bflush, | |
1176 | .relse = libxfs_brelse, | |
1177 | .compare = libxfs_bcompare, | |
1178 | .bulkrelse = libxfs_bulkrelse | |
f1b058f9 NS |
1179 | }; |
1180 | ||
c08793bd BF |
1181 | /* |
1182 | * Verify an on-disk magic value against the magic value specified in the | |
1183 | * verifier structure. The verifier magic is in disk byte order so the caller is | |
1184 | * expected to pass the value directly from disk. | |
1185 | */ | |
1186 | bool | |
1187 | xfs_verify_magic( | |
1188 | struct xfs_buf *bp, | |
9e26de8f | 1189 | __be32 dmagic) |
c08793bd | 1190 | { |
7861ef77 | 1191 | struct xfs_mount *mp = bp->b_mount; |
c08793bd BF |
1192 | int idx; |
1193 | ||
1194 | idx = xfs_sb_version_hascrc(&mp->m_sb); | |
1195 | if (unlikely(WARN_ON(!bp->b_ops || !bp->b_ops->magic[idx]))) | |
1196 | return false; | |
1197 | return dmagic == bp->b_ops->magic[idx]; | |
9e26de8f DW |
1198 | } |
1199 | ||
1200 | /* | |
1201 | * Verify an on-disk magic value against the magic value specified in the | |
1202 | * verifier structure. The verifier magic is in disk byte order so the caller is | |
1203 | * expected to pass the value directly from disk. | |
1204 | */ | |
1205 | bool | |
1206 | xfs_verify_magic16( | |
1207 | struct xfs_buf *bp, | |
1208 | __be16 dmagic) | |
1209 | { | |
7861ef77 | 1210 | struct xfs_mount *mp = bp->b_mount; |
9e26de8f DW |
1211 | int idx; |
1212 | ||
1213 | idx = xfs_sb_version_hascrc(&mp->m_sb); | |
1214 | if (unlikely(WARN_ON(!bp->b_ops || !bp->b_ops->magic16[idx]))) | |
1215 | return false; | |
1216 | return dmagic == bp->b_ops->magic16[idx]; | |
c08793bd | 1217 | } |
2bd0ea18 | 1218 | |
f1b058f9 | 1219 | /* |
3a19fb7d | 1220 | * Inode cache stubs. |
f1b058f9 NS |
1221 | */ |
1222 | ||
bf0e024f | 1223 | kmem_zone_t *xfs_inode_zone; |
5e656dbb | 1224 | extern kmem_zone_t *xfs_ili_zone; |
f1b058f9 | 1225 | |
2bd0ea18 | 1226 | int |
12ac6e04 DW |
1227 | libxfs_iget( |
1228 | struct xfs_mount *mp, | |
1229 | struct xfs_trans *tp, | |
1230 | xfs_ino_t ino, | |
1231 | uint lock_flags, | |
1fecabf9 | 1232 | struct xfs_inode **ipp) |
2bd0ea18 | 1233 | { |
12ac6e04 | 1234 | struct xfs_inode *ip; |
89522615 CH |
1235 | struct xfs_dinode *dip; |
1236 | struct xfs_buf *bp; | |
12ac6e04 | 1237 | int error = 0; |
2bd0ea18 | 1238 | |
3a19fb7d CH |
1239 | ip = kmem_zone_zalloc(xfs_inode_zone, 0); |
1240 | if (!ip) | |
12b53197 | 1241 | return -ENOMEM; |
2bd0ea18 | 1242 | |
3a19fb7d CH |
1243 | ip->i_ino = ino; |
1244 | ip->i_mount = mp; | |
89522615 CH |
1245 | error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, 0); |
1246 | if (error) | |
1247 | goto out_destroy; | |
1248 | ||
1249 | error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &bp, 0); | |
1250 | if (error) | |
1251 | goto out_destroy; | |
1252 | ||
1253 | error = xfs_inode_from_disk(ip, dip); | |
1254 | if (!error) | |
1255 | xfs_buf_set_ref(bp, XFS_INO_REF); | |
1256 | xfs_trans_brelse(tp, bp); | |
1257 | ||
1258 | if (error) | |
1259 | goto out_destroy; | |
f1b058f9 | 1260 | |
3a19fb7d CH |
1261 | *ipp = ip; |
1262 | return 0; | |
89522615 CH |
1263 | |
1264 | out_destroy: | |
1265 | kmem_cache_free(xfs_inode_zone, ip); | |
1266 | *ipp = NULL; | |
1267 | return error; | |
f1b058f9 NS |
1268 | } |
1269 | ||
1270 | static void | |
014e5f6d ES |
1271 | libxfs_idestroy(xfs_inode_t *ip) |
1272 | { | |
e37bf53c | 1273 | switch (VFS_I(ip)->i_mode & S_IFMT) { |
014e5f6d ES |
1274 | case S_IFREG: |
1275 | case S_IFDIR: | |
1276 | case S_IFLNK: | |
a87a40a2 | 1277 | libxfs_idestroy_fork(&ip->i_df); |
014e5f6d ES |
1278 | break; |
1279 | } | |
a87a40a2 CH |
1280 | if (ip->i_afp) { |
1281 | libxfs_idestroy_fork(ip->i_afp); | |
1282 | kmem_cache_free(xfs_ifork_zone, ip->i_afp); | |
1283 | } | |
1284 | if (ip->i_cowfp) { | |
1285 | libxfs_idestroy_fork(ip->i_cowfp); | |
1286 | kmem_cache_free(xfs_ifork_zone, ip->i_cowfp); | |
1287 | } | |
014e5f6d ES |
1288 | } |
1289 | ||
2bd0ea18 | 1290 | void |
31845e4c DW |
1291 | libxfs_irele( |
1292 | struct xfs_inode *ip) | |
2bd0ea18 | 1293 | { |
b1a24a81 | 1294 | ASSERT(ip->i_itemp == NULL); |
3a19fb7d | 1295 | libxfs_idestroy(ip); |
cef0cc3b | 1296 | kmem_cache_free(xfs_inode_zone, ip); |
2bd0ea18 | 1297 | } |
7db2e3c1 | 1298 | |
c335b673 DW |
1299 | /* |
1300 | * Flush everything dirty in the kernel and disk write caches to stable media. | |
1301 | * Returns 0 for success or a negative error code. | |
1302 | */ | |
1303 | int | |
1304 | libxfs_blkdev_issue_flush( | |
1305 | struct xfs_buftarg *btp) | |
1306 | { | |
1307 | int fd, ret; | |
1308 | ||
1309 | if (btp->dev == 0) | |
1310 | return 0; | |
1311 | ||
1312 | fd = libxfs_device_to_fd(btp->dev); | |
1313 | ret = platform_flush_device(fd, btp->dev); | |
1314 | return ret ? -errno : 0; | |
1315 | } | |
1316 | ||
7db2e3c1 DW |
1317 | /* |
1318 | * Write out a buffer list synchronously. | |
1319 | * | |
1320 | * This will take the @buffer_list, write all buffers out and wait for I/O | |
1321 | * completion on all of the buffers. @buffer_list is consumed by the function, | |
1322 | * so callers must have some other way of tracking buffers if they require such | |
1323 | * functionality. | |
1324 | */ | |
1325 | int | |
1326 | xfs_buf_delwri_submit( | |
1327 | struct list_head *buffer_list) | |
1328 | { | |
1329 | struct xfs_buf *bp, *n; | |
1330 | int error = 0, error2; | |
1331 | ||
1332 | list_for_each_entry_safe(bp, n, buffer_list, b_list) { | |
1333 | list_del_init(&bp->b_list); | |
331d5956 | 1334 | error2 = libxfs_bwrite(bp); |
7db2e3c1 DW |
1335 | if (!error) |
1336 | error = error2; | |
e02ba985 | 1337 | libxfs_buf_relse(bp); |
7db2e3c1 DW |
1338 | } |
1339 | ||
1340 | return error; | |
1341 | } | |
c92c796e | 1342 | |
e6d5a6f8 DW |
1343 | /* |
1344 | * Cancel a delayed write list. | |
1345 | * | |
1346 | * Remove each buffer from the list, clear the delwri queue flag and drop the | |
1347 | * associated buffer reference. | |
1348 | */ | |
1349 | void | |
1350 | xfs_buf_delwri_cancel( | |
1351 | struct list_head *list) | |
1352 | { | |
1353 | struct xfs_buf *bp; | |
1354 | ||
1355 | while (!list_empty(list)) { | |
1356 | bp = list_first_entry(list, struct xfs_buf, b_list); | |
1357 | ||
1358 | list_del_init(&bp->b_list); | |
1359 | libxfs_buf_relse(bp); | |
1360 | } | |
1361 | } | |
1362 | ||
c92c796e DW |
1363 | /* |
1364 | * Format the log. The caller provides either a buftarg which is used to access | |
1365 | * the log via buffers or a direct pointer to a buffer that encapsulates the | |
1366 | * entire log. | |
1367 | */ | |
1368 | int | |
1369 | libxfs_log_clear( | |
1370 | struct xfs_buftarg *btp, | |
1371 | char *dptr, | |
1372 | xfs_daddr_t start, | |
1373 | uint length, /* basic blocks */ | |
1374 | uuid_t *fs_uuid, | |
1375 | int version, | |
1376 | int sunit, /* bytes */ | |
1377 | int fmt, | |
1378 | int cycle, | |
1379 | bool max) | |
1380 | { | |
1381 | struct xfs_buf *bp = NULL; | |
1382 | int len; | |
1383 | xfs_lsn_t lsn; | |
1384 | xfs_lsn_t tail_lsn; | |
1385 | xfs_daddr_t blk; | |
1386 | xfs_daddr_t end_blk; | |
1387 | char *ptr; | |
1388 | ||
1389 | if (((btp && dptr) || (!btp && !dptr)) || | |
1390 | (btp && !btp->dev) || !fs_uuid) | |
1391 | return -EINVAL; | |
1392 | ||
1393 | /* first zero the log */ | |
1394 | if (btp) | |
1395 | libxfs_device_zero(btp, start, length); | |
1396 | else | |
1397 | memset(dptr, 0, BBTOB(length)); | |
1398 | ||
1399 | /* | |
1400 | * Initialize the log record length and LSNs. XLOG_INIT_CYCLE is a | |
1401 | * special reset case where we only write a single record where the lsn | |
1402 | * and tail_lsn match. Otherwise, the record lsn starts at block 0 of | |
1403 | * the specified cycle and points tail_lsn at the last record of the | |
1404 | * previous cycle. | |
1405 | */ | |
1406 | len = ((version == 2) && sunit) ? BTOBB(sunit) : 2; | |
1407 | len = max(len, 2); | |
1408 | lsn = xlog_assign_lsn(cycle, 0); | |
1409 | if (cycle == XLOG_INIT_CYCLE) | |
1410 | tail_lsn = lsn; | |
1411 | else | |
1412 | tail_lsn = xlog_assign_lsn(cycle - 1, length - len); | |
1413 | ||
1414 | /* write out the first log record */ | |
1415 | ptr = dptr; | |
1416 | if (btp) { | |
de319479 | 1417 | bp = libxfs_getbufr_uncached(btp, start, len); |
c92c796e DW |
1418 | ptr = bp->b_addr; |
1419 | } | |
1420 | libxfs_log_header(ptr, fs_uuid, version, sunit, fmt, lsn, tail_lsn, | |
1421 | next, bp); | |
18b4f688 | 1422 | if (bp) { |
f524ae04 | 1423 | libxfs_buf_mark_dirty(bp); |
18b4f688 DW |
1424 | libxfs_buf_relse(bp); |
1425 | } | |
c92c796e DW |
1426 | |
1427 | /* | |
1428 | * There's nothing else to do if this is a log reset. The kernel detects | |
1429 | * the rest of the log is zeroed and starts at cycle 1. | |
1430 | */ | |
1431 | if (cycle == XLOG_INIT_CYCLE) | |
1432 | return 0; | |
1433 | ||
1434 | /* | |
1435 | * Bump the record size for a full log format if the caller allows it. | |
1436 | * This is primarily for performance reasons and most callers don't care | |
1437 | * about record size since the log is clean after we're done. | |
1438 | */ | |
1439 | if (max) | |
1440 | len = BTOBB(BDSTRAT_SIZE); | |
1441 | ||
1442 | /* | |
1443 | * Otherwise, fill everything beyond the initial record with records of | |
1444 | * the previous cycle so the kernel head/tail detection works correctly. | |
1445 | * | |
1446 | * We don't particularly care about the record size or content here. | |
1447 | * It's only important that the headers are in place such that the | |
1448 | * kernel finds 1.) a clean log and 2.) the correct current cycle value. | |
1449 | * Therefore, bump up the record size to the max to use larger I/Os and | |
1450 | * improve performance. | |
1451 | */ | |
1452 | cycle--; | |
1453 | blk = start + len; | |
1454 | if (dptr) | |
1455 | dptr += BBTOB(len); | |
1456 | end_blk = start + length; | |
1457 | ||
1458 | len = min(end_blk - blk, len); | |
1459 | while (blk < end_blk) { | |
1460 | lsn = xlog_assign_lsn(cycle, blk - start); | |
1461 | tail_lsn = xlog_assign_lsn(cycle, blk - start - len); | |
1462 | ||
1463 | ptr = dptr; | |
1464 | if (btp) { | |
de319479 | 1465 | bp = libxfs_getbufr_uncached(btp, blk, len); |
c92c796e DW |
1466 | ptr = bp->b_addr; |
1467 | } | |
1468 | /* | |
1469 | * Note: pass the full buffer length as the sunit to initialize | |
1470 | * the entire buffer. | |
1471 | */ | |
1472 | libxfs_log_header(ptr, fs_uuid, version, BBTOB(len), fmt, lsn, | |
1473 | tail_lsn, next, bp); | |
18b4f688 | 1474 | if (bp) { |
f524ae04 | 1475 | libxfs_buf_mark_dirty(bp); |
18b4f688 DW |
1476 | libxfs_buf_relse(bp); |
1477 | } | |
c92c796e DW |
1478 | |
1479 | blk += len; | |
1480 | if (dptr) | |
1481 | dptr += BBTOB(len); | |
1482 | len = min(end_blk - blk, len); | |
1483 | } | |
1484 | ||
1485 | return 0; | |
1486 | } | |
1487 | ||
1488 | int | |
1489 | libxfs_log_header( | |
1490 | char *caddr, | |
1491 | uuid_t *fs_uuid, | |
1492 | int version, | |
1493 | int sunit, | |
1494 | int fmt, | |
1495 | xfs_lsn_t lsn, | |
1496 | xfs_lsn_t tail_lsn, | |
1497 | libxfs_get_block_t *nextfunc, | |
1498 | void *private) | |
1499 | { | |
1500 | xlog_rec_header_t *head = (xlog_rec_header_t *)caddr; | |
1501 | char *p = caddr; | |
1502 | __be32 cycle_lsn; | |
1503 | int i, len; | |
1504 | int hdrs = 1; | |
1505 | ||
1506 | if (lsn == NULLCOMMITLSN) | |
1507 | lsn = xlog_assign_lsn(XLOG_INIT_CYCLE, 0); | |
1508 | if (tail_lsn == NULLCOMMITLSN) | |
1509 | tail_lsn = lsn; | |
1510 | ||
1511 | len = ((version == 2) && sunit) ? BTOBB(sunit) : 1; | |
1512 | ||
1513 | memset(p, 0, BBSIZE); | |
1514 | head->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM); | |
1515 | head->h_cycle = cpu_to_be32(CYCLE_LSN(lsn)); | |
1516 | head->h_version = cpu_to_be32(version); | |
1517 | head->h_crc = cpu_to_le32(0); | |
1518 | head->h_prev_block = cpu_to_be32(-1); | |
1519 | head->h_num_logops = cpu_to_be32(1); | |
1520 | head->h_fmt = cpu_to_be32(fmt); | |
1521 | head->h_size = cpu_to_be32(max(sunit, XLOG_BIG_RECORD_BSIZE)); | |
1522 | ||
1523 | head->h_lsn = cpu_to_be64(lsn); | |
1524 | head->h_tail_lsn = cpu_to_be64(tail_lsn); | |
1525 | ||
1526 | memcpy(&head->h_fs_uuid, fs_uuid, sizeof(uuid_t)); | |
1527 | ||
1528 | /* | |
1529 | * The kernel expects to see either a log record header magic value or | |
1530 | * the LSN cycle at the top of every log block. The first word of each | |
1531 | * non-header block is copied to the record headers and replaced with | |
1532 | * the cycle value (see xlog_[un]pack_data() and xlog_get_cycle() for | |
1533 | * details). | |
1534 | * | |
1535 | * Even though we only ever write an unmount record (one block), we | |
1536 | * support writing log records up to the max log buffer size of 256k to | |
1537 | * improve log format performance. This means a record can require up | |
1538 | * to 8 headers (1 rec. header + 7 ext. headers) for the packed cycle | |
1539 | * data (each header supports 32k of data). | |
1540 | */ | |
1541 | cycle_lsn = CYCLE_LSN_DISK(head->h_lsn); | |
1542 | if (version == 2 && sunit > XLOG_HEADER_CYCLE_SIZE) { | |
1543 | hdrs = sunit / XLOG_HEADER_CYCLE_SIZE; | |
1544 | if (sunit % XLOG_HEADER_CYCLE_SIZE) | |
1545 | hdrs++; | |
1546 | } | |
1547 | ||
1548 | /* | |
1549 | * A fixed number of extended headers is expected based on h_size. If | |
1550 | * required, format those now so the unmount record is located | |
1551 | * correctly. | |
1552 | * | |
1553 | * Since we only write an unmount record, we only need one h_cycle_data | |
1554 | * entry for the unmount record block. The subsequent record data | |
1555 | * blocks are zeroed, which means we can stamp them directly with the | |
1556 | * cycle and zero the rest of the cycle data in the extended headers. | |
1557 | */ | |
1558 | if (hdrs > 1) { | |
1559 | for (i = 1; i < hdrs; i++) { | |
1560 | p = nextfunc(p, BBSIZE, private); | |
1561 | memset(p, 0, BBSIZE); | |
1562 | /* xlog_rec_ext_header.xh_cycle */ | |
1563 | *(__be32 *)p = cycle_lsn; | |
1564 | } | |
1565 | } | |
1566 | ||
1567 | /* | |
1568 | * The total length is the max of the stripe unit or 2 basic block | |
1569 | * minimum (1 hdr blk + 1 data blk). The record length is the total | |
1570 | * minus however many header blocks are required. | |
1571 | */ | |
1572 | head->h_len = cpu_to_be32(max(BBTOB(2), sunit) - hdrs * BBSIZE); | |
1573 | ||
1574 | /* | |
1575 | * Write out the unmount record, pack the first word into the record | |
1576 | * header and stamp the block with the cycle. | |
1577 | */ | |
1578 | p = nextfunc(p, BBSIZE, private); | |
1579 | unmount_record(p); | |
1580 | ||
1581 | head->h_cycle_data[0] = *(__be32 *)p; | |
1582 | *(__be32 *)p = cycle_lsn; | |
1583 | ||
1584 | /* | |
1585 | * Finally, zero all remaining blocks in the record and stamp each with | |
1586 | * the cycle. We don't need to pack any of these blocks because the | |
1587 | * cycle data in the headers has already been zeroed. | |
1588 | */ | |
1589 | len = max(len, hdrs + 1); | |
1590 | for (i = hdrs + 1; i < len; i++) { | |
1591 | p = nextfunc(p, BBSIZE, private); | |
1592 | memset(p, 0, BBSIZE); | |
1593 | *(__be32 *)p = cycle_lsn; | |
1594 | } | |
1595 | ||
1596 | return BBTOB(len); | |
1597 | } | |
1598 | ||
af60a998 DW |
1599 | void |
1600 | libxfs_buf_set_priority( | |
1601 | struct xfs_buf *bp, | |
1602 | int priority) | |
1603 | { | |
1604 | cache_node_set_priority(libxfs_bcache, &bp->b_node, priority); | |
1605 | } | |
1606 | ||
1607 | int | |
1608 | libxfs_buf_priority( | |
1609 | struct xfs_buf *bp) | |
1610 | { | |
1611 | return cache_node_get_priority(&bp->b_node); | |
1612 | } | |
1be76d11 DW |
1613 | |
1614 | /* | |
1615 | * Log a message about and stale a buffer that a caller has decided is corrupt. | |
1616 | * | |
1617 | * This function should be called for the kinds of metadata corruption that | |
1618 | * cannot be detect from a verifier, such as incorrect inter-block relationship | |
1619 | * data. Do /not/ call this function from a verifier function. | |
1620 | * | |
1621 | * The buffer must be XBF_DONE prior to the call. Afterwards, the buffer will | |
1622 | * be marked stale, but b_error will not be set. The caller is responsible for | |
1623 | * releasing the buffer or fixing it. | |
1624 | */ | |
1625 | void | |
1626 | __xfs_buf_mark_corrupt( | |
1627 | struct xfs_buf *bp, | |
1628 | xfs_failaddr_t fa) | |
1629 | { | |
1630 | ASSERT(bp->b_flags & XBF_DONE); | |
1631 | ||
9f09216e | 1632 | xfs_buf_corruption_error(bp, fa); |
1be76d11 DW |
1633 | xfs_buf_stale(bp); |
1634 | } |