]>
Commit | Line | Data |
---|---|---|
0b61f8a4 | 1 | // SPDX-License-Identifier: GPL-2.0 |
1da177e4 | 2 | /* |
7b718769 | 3 | * Copyright (c) 2000-2005 Silicon Graphics, Inc. |
98c1a7c0 | 4 | * Copyright (c) 2016-2018 Christoph Hellwig. |
7b718769 | 5 | * All Rights Reserved. |
1da177e4 | 6 | */ |
1da177e4 | 7 | #include "xfs.h" |
70a9883c | 8 | #include "xfs_shared.h" |
239880ef DC |
9 | #include "xfs_format.h" |
10 | #include "xfs_log_format.h" | |
11 | #include "xfs_trans_resv.h" | |
1da177e4 | 12 | #include "xfs_mount.h" |
1da177e4 | 13 | #include "xfs_inode.h" |
239880ef | 14 | #include "xfs_trans.h" |
281627df | 15 | #include "xfs_inode_item.h" |
a844f451 | 16 | #include "xfs_alloc.h" |
1da177e4 | 17 | #include "xfs_error.h" |
1da177e4 | 18 | #include "xfs_iomap.h" |
0b1b213f | 19 | #include "xfs_trace.h" |
3ed3a434 | 20 | #include "xfs_bmap.h" |
68988114 | 21 | #include "xfs_bmap_util.h" |
a4fbe6ab | 22 | #include "xfs_bmap_btree.h" |
ef473667 | 23 | #include "xfs_reflink.h" |
1da177e4 LT |
24 | #include <linux/writeback.h> |
25 | ||
fbcc0256 DC |
26 | /* |
27 | * structure owned by writepages passed to individual writepage calls | |
28 | */ | |
29 | struct xfs_writepage_ctx { | |
30 | struct xfs_bmbt_irec imap; | |
fbcc0256 | 31 | unsigned int io_type; |
d9252d52 | 32 | unsigned int data_seq; |
e666aa37 | 33 | unsigned int cow_seq; |
fbcc0256 | 34 | struct xfs_ioend *ioend; |
fbcc0256 DC |
35 | }; |
36 | ||
20a90f58 | 37 | struct block_device * |
6214ed44 | 38 | xfs_find_bdev_for_inode( |
046f1685 | 39 | struct inode *inode) |
6214ed44 | 40 | { |
046f1685 | 41 | struct xfs_inode *ip = XFS_I(inode); |
6214ed44 CH |
42 | struct xfs_mount *mp = ip->i_mount; |
43 | ||
71ddabb9 | 44 | if (XFS_IS_REALTIME_INODE(ip)) |
6214ed44 CH |
45 | return mp->m_rtdev_targp->bt_bdev; |
46 | else | |
47 | return mp->m_ddev_targp->bt_bdev; | |
48 | } | |
49 | ||
486aff5e DW |
50 | struct dax_device * |
51 | xfs_find_daxdev_for_inode( | |
52 | struct inode *inode) | |
53 | { | |
54 | struct xfs_inode *ip = XFS_I(inode); | |
55 | struct xfs_mount *mp = ip->i_mount; | |
56 | ||
57 | if (XFS_IS_REALTIME_INODE(ip)) | |
58 | return mp->m_rtdev_targp->bt_daxdev; | |
59 | else | |
60 | return mp->m_ddev_targp->bt_daxdev; | |
61 | } | |
62 | ||
ac8ee546 CH |
63 | static void |
64 | xfs_finish_page_writeback( | |
65 | struct inode *inode, | |
66 | struct bio_vec *bvec, | |
67 | int error) | |
68 | { | |
82cb1417 CH |
69 | struct iomap_page *iop = to_iomap_page(bvec->bv_page); |
70 | ||
ac8ee546 CH |
71 | if (error) { |
72 | SetPageError(bvec->bv_page); | |
73 | mapping_set_error(inode->i_mapping, -EIO); | |
74 | } | |
ac8ee546 | 75 | |
82cb1417 CH |
76 | ASSERT(iop || i_blocksize(inode) == PAGE_SIZE); |
77 | ASSERT(!iop || atomic_read(&iop->write_count) > 0); | |
8353a814 | 78 | |
82cb1417 | 79 | if (!iop || atomic_dec_and_test(&iop->write_count)) |
8353a814 | 80 | end_page_writeback(bvec->bv_page); |
37992c18 DC |
81 | } |
82 | ||
83 | /* | |
84 | * We're now finished for good with this ioend structure. Update the page | |
85 | * state, release holds on bios, and finally free up memory. Do not use the | |
86 | * ioend after this. | |
f6d6d4fc | 87 | */ |
0829c360 CH |
88 | STATIC void |
89 | xfs_destroy_ioend( | |
0e51a8e1 CH |
90 | struct xfs_ioend *ioend, |
91 | int error) | |
0829c360 | 92 | { |
37992c18 | 93 | struct inode *inode = ioend->io_inode; |
8353a814 CH |
94 | struct bio *bio = &ioend->io_inline_bio; |
95 | struct bio *last = ioend->io_bio, *next; | |
96 | u64 start = bio->bi_iter.bi_sector; | |
97 | bool quiet = bio_flagged(bio, BIO_QUIET); | |
f6d6d4fc | 98 | |
0e51a8e1 | 99 | for (bio = &ioend->io_inline_bio; bio; bio = next) { |
37992c18 DC |
100 | struct bio_vec *bvec; |
101 | int i; | |
102 | ||
0e51a8e1 CH |
103 | /* |
104 | * For the last bio, bi_private points to the ioend, so we | |
105 | * need to explicitly end the iteration here. | |
106 | */ | |
107 | if (bio == last) | |
108 | next = NULL; | |
109 | else | |
110 | next = bio->bi_private; | |
583fa586 | 111 | |
37992c18 | 112 | /* walk each page on bio, ending page IO on them */ |
82cb1417 CH |
113 | bio_for_each_segment_all(bvec, bio, i) |
114 | xfs_finish_page_writeback(inode, bvec, error); | |
37992c18 | 115 | bio_put(bio); |
f6d6d4fc | 116 | } |
8353a814 CH |
117 | |
118 | if (unlikely(error && !quiet)) { | |
119 | xfs_err_ratelimited(XFS_I(inode)->i_mount, | |
120 | "writeback error on sector %llu", start); | |
121 | } | |
0829c360 CH |
122 | } |
123 | ||
fc0063c4 CH |
124 | /* |
125 | * Fast and loose check if this write could update the on-disk inode size. | |
126 | */ | |
127 | static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend) | |
128 | { | |
129 | return ioend->io_offset + ioend->io_size > | |
130 | XFS_I(ioend->io_inode)->i_d.di_size; | |
131 | } | |
132 | ||
281627df CH |
133 | STATIC int |
134 | xfs_setfilesize_trans_alloc( | |
135 | struct xfs_ioend *ioend) | |
136 | { | |
137 | struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount; | |
138 | struct xfs_trans *tp; | |
139 | int error; | |
140 | ||
4df0f7f1 DC |
141 | error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, |
142 | XFS_TRANS_NOFS, &tp); | |
253f4911 | 143 | if (error) |
281627df | 144 | return error; |
281627df CH |
145 | |
146 | ioend->io_append_trans = tp; | |
147 | ||
d9457dc0 | 148 | /* |
437a255a | 149 | * We may pass freeze protection with a transaction. So tell lockdep |
d9457dc0 JK |
150 | * we released it. |
151 | */ | |
bee9182d | 152 | __sb_writers_release(ioend->io_inode->i_sb, SB_FREEZE_FS); |
281627df CH |
153 | /* |
154 | * We hand off the transaction to the completion thread now, so | |
155 | * clear the flag here. | |
156 | */ | |
9070733b | 157 | current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); |
281627df CH |
158 | return 0; |
159 | } | |
160 | ||
ba87ea69 | 161 | /* |
2813d682 | 162 | * Update on-disk file size now that data has been written to disk. |
ba87ea69 | 163 | */ |
281627df | 164 | STATIC int |
e372843a | 165 | __xfs_setfilesize( |
2ba66237 CH |
166 | struct xfs_inode *ip, |
167 | struct xfs_trans *tp, | |
168 | xfs_off_t offset, | |
169 | size_t size) | |
ba87ea69 | 170 | { |
ba87ea69 | 171 | xfs_fsize_t isize; |
ba87ea69 | 172 | |
aa6bf01d | 173 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
2ba66237 | 174 | isize = xfs_new_eof(ip, offset + size); |
281627df CH |
175 | if (!isize) { |
176 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | |
4906e215 | 177 | xfs_trans_cancel(tp); |
281627df | 178 | return 0; |
ba87ea69 LM |
179 | } |
180 | ||
2ba66237 | 181 | trace_xfs_setfilesize(ip, offset, size); |
281627df CH |
182 | |
183 | ip->i_d.di_size = isize; | |
184 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); | |
185 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | |
186 | ||
70393313 | 187 | return xfs_trans_commit(tp); |
77d7a0c2 DC |
188 | } |
189 | ||
e372843a CH |
190 | int |
191 | xfs_setfilesize( | |
192 | struct xfs_inode *ip, | |
193 | xfs_off_t offset, | |
194 | size_t size) | |
195 | { | |
196 | struct xfs_mount *mp = ip->i_mount; | |
197 | struct xfs_trans *tp; | |
198 | int error; | |
199 | ||
200 | error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp); | |
201 | if (error) | |
202 | return error; | |
203 | ||
204 | return __xfs_setfilesize(ip, tp, offset, size); | |
205 | } | |
206 | ||
2ba66237 CH |
207 | STATIC int |
208 | xfs_setfilesize_ioend( | |
0e51a8e1 CH |
209 | struct xfs_ioend *ioend, |
210 | int error) | |
2ba66237 CH |
211 | { |
212 | struct xfs_inode *ip = XFS_I(ioend->io_inode); | |
213 | struct xfs_trans *tp = ioend->io_append_trans; | |
214 | ||
215 | /* | |
216 | * The transaction may have been allocated in the I/O submission thread, | |
217 | * thus we need to mark ourselves as being in a transaction manually. | |
218 | * Similarly for freeze protection. | |
219 | */ | |
9070733b | 220 | current_set_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); |
bee9182d | 221 | __sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS); |
2ba66237 | 222 | |
5cb13dcd | 223 | /* we abort the update if there was an IO error */ |
0e51a8e1 | 224 | if (error) { |
5cb13dcd | 225 | xfs_trans_cancel(tp); |
0e51a8e1 | 226 | return error; |
5cb13dcd Z |
227 | } |
228 | ||
e372843a | 229 | return __xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size); |
2ba66237 CH |
230 | } |
231 | ||
0829c360 | 232 | /* |
5ec4fabb | 233 | * IO write completion. |
f6d6d4fc CH |
234 | */ |
235 | STATIC void | |
5ec4fabb | 236 | xfs_end_io( |
77d7a0c2 | 237 | struct work_struct *work) |
0829c360 | 238 | { |
0e51a8e1 CH |
239 | struct xfs_ioend *ioend = |
240 | container_of(work, struct xfs_ioend, io_work); | |
241 | struct xfs_inode *ip = XFS_I(ioend->io_inode); | |
787eb485 CH |
242 | xfs_off_t offset = ioend->io_offset; |
243 | size_t size = ioend->io_size; | |
4e4cbee9 | 244 | int error; |
ba87ea69 | 245 | |
af055e37 | 246 | /* |
787eb485 | 247 | * Just clean up the in-memory strutures if the fs has been shut down. |
af055e37 | 248 | */ |
787eb485 | 249 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { |
0e51a8e1 | 250 | error = -EIO; |
787eb485 CH |
251 | goto done; |
252 | } | |
04f658ee | 253 | |
43caeb18 | 254 | /* |
787eb485 | 255 | * Clean up any COW blocks on an I/O error. |
43caeb18 | 256 | */ |
4e4cbee9 | 257 | error = blk_status_to_errno(ioend->io_bio->bi_status); |
787eb485 CH |
258 | if (unlikely(error)) { |
259 | switch (ioend->io_type) { | |
260 | case XFS_IO_COW: | |
261 | xfs_reflink_cancel_cow_range(ip, offset, size, true); | |
262 | break; | |
43caeb18 | 263 | } |
787eb485 CH |
264 | |
265 | goto done; | |
43caeb18 DW |
266 | } |
267 | ||
5ec4fabb | 268 | /* |
787eb485 | 269 | * Success: commit the COW or unwritten blocks if needed. |
5ec4fabb | 270 | */ |
787eb485 CH |
271 | switch (ioend->io_type) { |
272 | case XFS_IO_COW: | |
273 | error = xfs_reflink_end_cow(ip, offset, size); | |
274 | break; | |
275 | case XFS_IO_UNWRITTEN: | |
ee70daab EG |
276 | /* writeback should never update isize */ |
277 | error = xfs_iomap_write_unwritten(ip, offset, size, false); | |
787eb485 CH |
278 | break; |
279 | default: | |
280 | ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans); | |
281 | break; | |
5ec4fabb | 282 | } |
ba87ea69 | 283 | |
04f658ee | 284 | done: |
787eb485 CH |
285 | if (ioend->io_append_trans) |
286 | error = xfs_setfilesize_ioend(ioend, error); | |
0e51a8e1 | 287 | xfs_destroy_ioend(ioend, error); |
c626d174 DC |
288 | } |
289 | ||
0e51a8e1 CH |
290 | STATIC void |
291 | xfs_end_bio( | |
292 | struct bio *bio) | |
0829c360 | 293 | { |
0e51a8e1 CH |
294 | struct xfs_ioend *ioend = bio->bi_private; |
295 | struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount; | |
0829c360 | 296 | |
43caeb18 | 297 | if (ioend->io_type == XFS_IO_UNWRITTEN || ioend->io_type == XFS_IO_COW) |
0e51a8e1 CH |
298 | queue_work(mp->m_unwritten_workqueue, &ioend->io_work); |
299 | else if (ioend->io_append_trans) | |
300 | queue_work(mp->m_data_workqueue, &ioend->io_work); | |
301 | else | |
4e4cbee9 | 302 | xfs_destroy_ioend(ioend, blk_status_to_errno(bio->bi_status)); |
0829c360 CH |
303 | } |
304 | ||
d9252d52 BF |
305 | /* |
306 | * Fast revalidation of the cached writeback mapping. Return true if the current | |
307 | * mapping is valid, false otherwise. | |
308 | */ | |
309 | static bool | |
310 | xfs_imap_valid( | |
311 | struct xfs_writepage_ctx *wpc, | |
312 | struct xfs_inode *ip, | |
313 | xfs_fileoff_t offset_fsb) | |
314 | { | |
315 | if (offset_fsb < wpc->imap.br_startoff || | |
316 | offset_fsb >= wpc->imap.br_startoff + wpc->imap.br_blockcount) | |
317 | return false; | |
318 | /* | |
319 | * If this is a COW mapping, it is sufficient to check that the mapping | |
320 | * covers the offset. Be careful to check this first because the caller | |
321 | * can revalidate a COW mapping without updating the data seqno. | |
322 | */ | |
323 | if (wpc->io_type == XFS_IO_COW) | |
324 | return true; | |
325 | ||
326 | /* | |
327 | * This is not a COW mapping. Check the sequence number of the data fork | |
328 | * because concurrent changes could have invalidated the extent. Check | |
329 | * the COW fork because concurrent changes since the last time we | |
330 | * checked (and found nothing at this offset) could have added | |
331 | * overlapping blocks. | |
332 | */ | |
333 | if (wpc->data_seq != READ_ONCE(ip->i_df.if_seq)) | |
334 | return false; | |
335 | if (xfs_inode_has_cow_data(ip) && | |
336 | wpc->cow_seq != READ_ONCE(ip->i_cowfp->if_seq)) | |
337 | return false; | |
338 | return true; | |
339 | } | |
340 | ||
1da177e4 LT |
341 | STATIC int |
342 | xfs_map_blocks( | |
5c665e5b | 343 | struct xfs_writepage_ctx *wpc, |
1da177e4 | 344 | struct inode *inode, |
5c665e5b | 345 | loff_t offset) |
1da177e4 | 346 | { |
a206c817 CH |
347 | struct xfs_inode *ip = XFS_I(inode); |
348 | struct xfs_mount *mp = ip->i_mount; | |
93407472 | 349 | ssize_t count = i_blocksize(inode); |
889c65b3 | 350 | xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset), end_fsb; |
e666aa37 | 351 | xfs_fileoff_t cow_fsb = NULLFILEOFF; |
5c665e5b CH |
352 | struct xfs_bmbt_irec imap; |
353 | int whichfork = XFS_DATA_FORK; | |
060d4eaa | 354 | struct xfs_iext_cursor icur; |
a206c817 | 355 | int error = 0; |
a206c817 | 356 | |
d9252d52 BF |
357 | if (XFS_FORCED_SHUTDOWN(mp)) |
358 | return -EIO; | |
359 | ||
889c65b3 CH |
360 | /* |
361 | * COW fork blocks can overlap data fork blocks even if the blocks | |
362 | * aren't shared. COW I/O always takes precedent, so we must always | |
363 | * check for overlap on reflink inodes unless the mapping is already a | |
e666aa37 CH |
364 | * COW one, or the COW fork hasn't changed from the last time we looked |
365 | * at it. | |
366 | * | |
367 | * It's safe to check the COW fork if_seq here without the ILOCK because | |
368 | * we've indirectly protected against concurrent updates: writeback has | |
369 | * the page locked, which prevents concurrent invalidations by reflink | |
370 | * and directio and prevents concurrent buffered writes to the same | |
371 | * page. Changes to if_seq always happen under i_lock, which protects | |
372 | * against concurrent updates and provides a memory barrier on the way | |
373 | * out that ensures that we always see the current value. | |
889c65b3 | 374 | */ |
d9252d52 | 375 | if (xfs_imap_valid(wpc, ip, offset_fsb)) |
889c65b3 CH |
376 | return 0; |
377 | ||
889c65b3 CH |
378 | /* |
379 | * If we don't have a valid map, now it's time to get a new one for this | |
380 | * offset. This will convert delayed allocations (including COW ones) | |
381 | * into real extents. If we return without a valid map, it means we | |
382 | * landed in a hole and we skip the block. | |
383 | */ | |
988ef927 | 384 | xfs_ilock(ip, XFS_ILOCK_SHARED); |
8ff2957d CH |
385 | ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || |
386 | (ip->i_df.if_flags & XFS_IFEXTENTS)); | |
d2c28191 | 387 | ASSERT(offset <= mp->m_super->s_maxbytes); |
8ff2957d | 388 | |
060d4eaa CH |
389 | if (offset > mp->m_super->s_maxbytes - count) |
390 | count = mp->m_super->s_maxbytes - offset; | |
391 | end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); | |
060d4eaa CH |
392 | |
393 | /* | |
394 | * Check if this is offset is covered by a COW extents, and if yes use | |
395 | * it directly instead of looking up anything in the data fork. | |
396 | */ | |
51d62690 | 397 | if (xfs_inode_has_cow_data(ip) && |
e666aa37 CH |
398 | xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &imap)) |
399 | cow_fsb = imap.br_startoff; | |
400 | if (cow_fsb != NULLFILEOFF && cow_fsb <= offset_fsb) { | |
2ba090d5 | 401 | wpc->cow_seq = READ_ONCE(ip->i_cowfp->if_seq); |
5c665e5b CH |
402 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
403 | /* | |
404 | * Truncate can race with writeback since writeback doesn't | |
405 | * take the iolock and truncate decreases the file size before | |
406 | * it starts truncating the pages between new_size and old_size. | |
407 | * Therefore, we can end up in the situation where writeback | |
408 | * gets a CoW fork mapping but the truncate makes the mapping | |
409 | * invalid and we end up in here trying to get a new mapping. | |
410 | * bail out here so that we simply never get a valid mapping | |
411 | * and so we drop the write altogether. The page truncation | |
412 | * will kill the contents anyway. | |
413 | */ | |
414 | if (offset > i_size_read(inode)) { | |
415 | wpc->io_type = XFS_IO_HOLE; | |
416 | return 0; | |
417 | } | |
418 | whichfork = XFS_COW_FORK; | |
419 | wpc->io_type = XFS_IO_COW; | |
420 | goto allocate_blocks; | |
421 | } | |
422 | ||
423 | /* | |
d9252d52 BF |
424 | * No COW extent overlap. Revalidate now that we may have updated |
425 | * ->cow_seq. If the data mapping is still valid, we're done. | |
5c665e5b | 426 | */ |
d9252d52 | 427 | if (xfs_imap_valid(wpc, ip, offset_fsb)) { |
5c665e5b CH |
428 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
429 | return 0; | |
430 | } | |
431 | ||
432 | /* | |
433 | * If we don't have a valid map, now it's time to get a new one for this | |
434 | * offset. This will convert delayed allocations (including COW ones) | |
435 | * into real extents. | |
436 | */ | |
3345746e CH |
437 | if (!xfs_iext_lookup_extent(ip, &ip->i_df, offset_fsb, &icur, &imap)) |
438 | imap.br_startoff = end_fsb; /* fake a hole past EOF */ | |
d9252d52 | 439 | wpc->data_seq = READ_ONCE(ip->i_df.if_seq); |
8ff2957d | 440 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
a206c817 | 441 | |
3345746e CH |
442 | if (imap.br_startoff > offset_fsb) { |
443 | /* landed in a hole or beyond EOF */ | |
444 | imap.br_blockcount = imap.br_startoff - offset_fsb; | |
5c665e5b | 445 | imap.br_startoff = offset_fsb; |
5c665e5b CH |
446 | imap.br_startblock = HOLESTARTBLOCK; |
447 | wpc->io_type = XFS_IO_HOLE; | |
e2f6ad46 | 448 | } else { |
e666aa37 CH |
449 | /* |
450 | * Truncate to the next COW extent if there is one. This is the | |
451 | * only opportunity to do this because we can skip COW fork | |
452 | * lookups for the subsequent blocks in the mapping; however, | |
453 | * the requirement to treat the COW range separately remains. | |
454 | */ | |
455 | if (cow_fsb != NULLFILEOFF && | |
456 | cow_fsb < imap.br_startoff + imap.br_blockcount) | |
457 | imap.br_blockcount = cow_fsb - imap.br_startoff; | |
458 | ||
e2f6ad46 DC |
459 | if (isnullstartblock(imap.br_startblock)) { |
460 | /* got a delalloc extent */ | |
461 | wpc->io_type = XFS_IO_DELALLOC; | |
462 | goto allocate_blocks; | |
463 | } | |
5c665e5b | 464 | |
e2f6ad46 DC |
465 | if (imap.br_state == XFS_EXT_UNWRITTEN) |
466 | wpc->io_type = XFS_IO_UNWRITTEN; | |
467 | else | |
468 | wpc->io_type = XFS_IO_OVERWRITE; | |
8ff2957d | 469 | } |
e2f6ad46 | 470 | |
5c665e5b CH |
471 | wpc->imap = imap; |
472 | trace_xfs_map_blocks_found(ip, offset, count, wpc->io_type, &imap); | |
473 | return 0; | |
474 | allocate_blocks: | |
e666aa37 | 475 | error = xfs_iomap_write_allocate(ip, whichfork, offset, &imap, |
d9252d52 BF |
476 | whichfork == XFS_COW_FORK ? |
477 | &wpc->cow_seq : &wpc->data_seq); | |
5c665e5b CH |
478 | if (error) |
479 | return error; | |
e666aa37 CH |
480 | ASSERT(whichfork == XFS_COW_FORK || cow_fsb == NULLFILEOFF || |
481 | imap.br_startoff + imap.br_blockcount <= cow_fsb); | |
5c665e5b CH |
482 | wpc->imap = imap; |
483 | trace_xfs_map_blocks_alloc(ip, offset, count, wpc->io_type, &imap); | |
8ff2957d | 484 | return 0; |
1da177e4 LT |
485 | } |
486 | ||
f6d6d4fc | 487 | /* |
bb18782a DC |
488 | * Submit the bio for an ioend. We are passed an ioend with a bio attached to |
489 | * it, and we submit that bio. The ioend may be used for multiple bio | |
490 | * submissions, so we only want to allocate an append transaction for the ioend | |
491 | * once. In the case of multiple bio submission, each bio will take an IO | |
492 | * reference to the ioend to ensure that the ioend completion is only done once | |
493 | * all bios have been submitted and the ioend is really done. | |
7bf7f352 DC |
494 | * |
495 | * If @fail is non-zero, it means that we have a situation where some part of | |
496 | * the submission process has failed after we have marked paged for writeback | |
bb18782a DC |
497 | * and unlocked them. In this situation, we need to fail the bio and ioend |
498 | * rather than submit it to IO. This typically only happens on a filesystem | |
499 | * shutdown. | |
f6d6d4fc | 500 | */ |
e10de372 | 501 | STATIC int |
f6d6d4fc | 502 | xfs_submit_ioend( |
06342cf8 | 503 | struct writeback_control *wbc, |
0e51a8e1 | 504 | struct xfs_ioend *ioend, |
e10de372 | 505 | int status) |
f6d6d4fc | 506 | { |
5eda4300 DW |
507 | /* Convert CoW extents to regular */ |
508 | if (!status && ioend->io_type == XFS_IO_COW) { | |
4a2d01b0 DC |
509 | /* |
510 | * Yuk. This can do memory allocation, but is not a | |
511 | * transactional operation so everything is done in GFP_KERNEL | |
512 | * context. That can deadlock, because we hold pages in | |
513 | * writeback state and GFP_KERNEL allocations can block on them. | |
514 | * Hence we must operate in nofs conditions here. | |
515 | */ | |
516 | unsigned nofs_flag; | |
517 | ||
518 | nofs_flag = memalloc_nofs_save(); | |
5eda4300 DW |
519 | status = xfs_reflink_convert_cow(XFS_I(ioend->io_inode), |
520 | ioend->io_offset, ioend->io_size); | |
4a2d01b0 | 521 | memalloc_nofs_restore(nofs_flag); |
5eda4300 DW |
522 | } |
523 | ||
e10de372 DC |
524 | /* Reserve log space if we might write beyond the on-disk inode size. */ |
525 | if (!status && | |
0e51a8e1 | 526 | ioend->io_type != XFS_IO_UNWRITTEN && |
bb18782a DC |
527 | xfs_ioend_is_append(ioend) && |
528 | !ioend->io_append_trans) | |
e10de372 | 529 | status = xfs_setfilesize_trans_alloc(ioend); |
bb18782a | 530 | |
0e51a8e1 CH |
531 | ioend->io_bio->bi_private = ioend; |
532 | ioend->io_bio->bi_end_io = xfs_end_bio; | |
7637241e | 533 | ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc); |
70fd7614 | 534 | |
e10de372 DC |
535 | /* |
536 | * If we are failing the IO now, just mark the ioend with an | |
537 | * error and finish it. This will run IO completion immediately | |
538 | * as there is only one reference to the ioend at this point in | |
539 | * time. | |
540 | */ | |
541 | if (status) { | |
4e4cbee9 | 542 | ioend->io_bio->bi_status = errno_to_blk_status(status); |
0e51a8e1 | 543 | bio_endio(ioend->io_bio); |
e10de372 DC |
544 | return status; |
545 | } | |
d88992f6 | 546 | |
31d7d58d | 547 | ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint; |
4e49ea4a | 548 | submit_bio(ioend->io_bio); |
e10de372 | 549 | return 0; |
f6d6d4fc | 550 | } |
f6d6d4fc | 551 | |
0e51a8e1 CH |
552 | static struct xfs_ioend * |
553 | xfs_alloc_ioend( | |
554 | struct inode *inode, | |
555 | unsigned int type, | |
556 | xfs_off_t offset, | |
3faed667 CH |
557 | struct block_device *bdev, |
558 | sector_t sector) | |
0e51a8e1 CH |
559 | { |
560 | struct xfs_ioend *ioend; | |
561 | struct bio *bio; | |
f6d6d4fc | 562 | |
e292d7bc | 563 | bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, &xfs_ioend_bioset); |
3faed667 CH |
564 | bio_set_dev(bio, bdev); |
565 | bio->bi_iter.bi_sector = sector; | |
0e51a8e1 CH |
566 | |
567 | ioend = container_of(bio, struct xfs_ioend, io_inline_bio); | |
568 | INIT_LIST_HEAD(&ioend->io_list); | |
569 | ioend->io_type = type; | |
570 | ioend->io_inode = inode; | |
571 | ioend->io_size = 0; | |
572 | ioend->io_offset = offset; | |
573 | INIT_WORK(&ioend->io_work, xfs_end_io); | |
574 | ioend->io_append_trans = NULL; | |
575 | ioend->io_bio = bio; | |
576 | return ioend; | |
577 | } | |
578 | ||
579 | /* | |
580 | * Allocate a new bio, and chain the old bio to the new one. | |
581 | * | |
582 | * Note that we have to do perform the chaining in this unintuitive order | |
583 | * so that the bi_private linkage is set up in the right direction for the | |
584 | * traversal in xfs_destroy_ioend(). | |
585 | */ | |
586 | static void | |
587 | xfs_chain_bio( | |
588 | struct xfs_ioend *ioend, | |
589 | struct writeback_control *wbc, | |
3faed667 CH |
590 | struct block_device *bdev, |
591 | sector_t sector) | |
0e51a8e1 CH |
592 | { |
593 | struct bio *new; | |
594 | ||
595 | new = bio_alloc(GFP_NOFS, BIO_MAX_PAGES); | |
3faed667 CH |
596 | bio_set_dev(new, bdev); |
597 | new->bi_iter.bi_sector = sector; | |
0e51a8e1 CH |
598 | bio_chain(ioend->io_bio, new); |
599 | bio_get(ioend->io_bio); /* for xfs_destroy_ioend */ | |
7637241e | 600 | ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc); |
31d7d58d | 601 | ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint; |
4e49ea4a | 602 | submit_bio(ioend->io_bio); |
0e51a8e1 | 603 | ioend->io_bio = new; |
f6d6d4fc CH |
604 | } |
605 | ||
606 | /* | |
3faed667 CH |
607 | * Test to see if we have an existing ioend structure that we could append to |
608 | * first, otherwise finish off the current ioend and start another. | |
f6d6d4fc CH |
609 | */ |
610 | STATIC void | |
611 | xfs_add_to_ioend( | |
612 | struct inode *inode, | |
7336cea8 | 613 | xfs_off_t offset, |
3faed667 | 614 | struct page *page, |
82cb1417 | 615 | struct iomap_page *iop, |
e10de372 | 616 | struct xfs_writepage_ctx *wpc, |
bb18782a | 617 | struct writeback_control *wbc, |
e10de372 | 618 | struct list_head *iolist) |
f6d6d4fc | 619 | { |
3faed667 CH |
620 | struct xfs_inode *ip = XFS_I(inode); |
621 | struct xfs_mount *mp = ip->i_mount; | |
622 | struct block_device *bdev = xfs_find_bdev_for_inode(inode); | |
623 | unsigned len = i_blocksize(inode); | |
624 | unsigned poff = offset & (PAGE_SIZE - 1); | |
625 | sector_t sector; | |
626 | ||
627 | sector = xfs_fsb_to_db(ip, wpc->imap.br_startblock) + | |
628 | ((offset - XFS_FSB_TO_B(mp, wpc->imap.br_startoff)) >> 9); | |
629 | ||
fbcc0256 | 630 | if (!wpc->ioend || wpc->io_type != wpc->ioend->io_type || |
3faed667 | 631 | sector != bio_end_sector(wpc->ioend->io_bio) || |
0df61da8 | 632 | offset != wpc->ioend->io_offset + wpc->ioend->io_size) { |
e10de372 DC |
633 | if (wpc->ioend) |
634 | list_add(&wpc->ioend->io_list, iolist); | |
3faed667 CH |
635 | wpc->ioend = xfs_alloc_ioend(inode, wpc->io_type, offset, |
636 | bdev, sector); | |
f6d6d4fc CH |
637 | } |
638 | ||
82cb1417 CH |
639 | if (!__bio_try_merge_page(wpc->ioend->io_bio, page, len, poff)) { |
640 | if (iop) | |
641 | atomic_inc(&iop->write_count); | |
642 | if (bio_full(wpc->ioend->io_bio)) | |
643 | xfs_chain_bio(wpc->ioend, wbc, bdev, sector); | |
644 | __bio_add_page(wpc->ioend->io_bio, page, len, poff); | |
645 | } | |
bb18782a | 646 | |
3faed667 | 647 | wpc->ioend->io_size += len; |
f6d6d4fc CH |
648 | } |
649 | ||
3ed3a434 DC |
650 | STATIC void |
651 | xfs_vm_invalidatepage( | |
652 | struct page *page, | |
d47992f8 LC |
653 | unsigned int offset, |
654 | unsigned int length) | |
3ed3a434 | 655 | { |
82cb1417 CH |
656 | trace_xfs_invalidatepage(page->mapping->host, page, offset, length); |
657 | iomap_invalidatepage(page, offset, length); | |
3ed3a434 DC |
658 | } |
659 | ||
660 | /* | |
82cb1417 CH |
661 | * If the page has delalloc blocks on it, we need to punch them out before we |
662 | * invalidate the page. If we don't, we leave a stale delalloc mapping on the | |
663 | * inode that can trip up a later direct I/O read operation on the same region. | |
3ed3a434 | 664 | * |
82cb1417 CH |
665 | * We prevent this by truncating away the delalloc regions on the page. Because |
666 | * they are delalloc, we can do this without needing a transaction. Indeed - if | |
667 | * we get ENOSPC errors, we have to be able to do this truncation without a | |
668 | * transaction as there is no space left for block reservation (typically why we | |
669 | * see a ENOSPC in writeback). | |
3ed3a434 DC |
670 | */ |
671 | STATIC void | |
672 | xfs_aops_discard_page( | |
673 | struct page *page) | |
674 | { | |
675 | struct inode *inode = page->mapping->host; | |
676 | struct xfs_inode *ip = XFS_I(inode); | |
03625721 | 677 | struct xfs_mount *mp = ip->i_mount; |
3ed3a434 | 678 | loff_t offset = page_offset(page); |
03625721 CH |
679 | xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, offset); |
680 | int error; | |
3ed3a434 | 681 | |
03625721 | 682 | if (XFS_FORCED_SHUTDOWN(mp)) |
e8c3753c DC |
683 | goto out_invalidate; |
684 | ||
03625721 | 685 | xfs_alert(mp, |
c9690043 | 686 | "page discard on page "PTR_FMT", inode 0x%llx, offset %llu.", |
3ed3a434 DC |
687 | page, ip->i_ino, offset); |
688 | ||
03625721 CH |
689 | error = xfs_bmap_punch_delalloc_range(ip, start_fsb, |
690 | PAGE_SIZE / i_blocksize(inode)); | |
03625721 CH |
691 | if (error && !XFS_FORCED_SHUTDOWN(mp)) |
692 | xfs_alert(mp, "page discard unable to remove delalloc mapping."); | |
3ed3a434 | 693 | out_invalidate: |
09cbfeaf | 694 | xfs_vm_invalidatepage(page, 0, PAGE_SIZE); |
3ed3a434 DC |
695 | } |
696 | ||
e10de372 DC |
697 | /* |
698 | * We implement an immediate ioend submission policy here to avoid needing to | |
699 | * chain multiple ioends and hence nest mempool allocations which can violate | |
700 | * forward progress guarantees we need to provide. The current ioend we are | |
82cb1417 | 701 | * adding blocks to is cached on the writepage context, and if the new block |
e10de372 DC |
702 | * does not append to the cached ioend it will create a new ioend and cache that |
703 | * instead. | |
704 | * | |
705 | * If a new ioend is created and cached, the old ioend is returned and queued | |
706 | * locally for submission once the entire page is processed or an error has been | |
707 | * detected. While ioends are submitted immediately after they are completed, | |
708 | * batching optimisations are provided by higher level block plugging. | |
709 | * | |
710 | * At the end of a writeback pass, there will be a cached ioend remaining on the | |
711 | * writepage context that the caller will need to submit. | |
712 | */ | |
bfce7d2e DC |
713 | static int |
714 | xfs_writepage_map( | |
715 | struct xfs_writepage_ctx *wpc, | |
e10de372 | 716 | struct writeback_control *wbc, |
bfce7d2e DC |
717 | struct inode *inode, |
718 | struct page *page, | |
2d5f4b5b | 719 | uint64_t end_offset) |
bfce7d2e | 720 | { |
e10de372 | 721 | LIST_HEAD(submit_list); |
82cb1417 CH |
722 | struct iomap_page *iop = to_iomap_page(page); |
723 | unsigned len = i_blocksize(inode); | |
e10de372 | 724 | struct xfs_ioend *ioend, *next; |
6a4c9501 | 725 | uint64_t file_offset; /* file offset of page */ |
82cb1417 | 726 | int error = 0, count = 0, i; |
bfce7d2e | 727 | |
82cb1417 CH |
728 | ASSERT(iop || i_blocksize(inode) == PAGE_SIZE); |
729 | ASSERT(!iop || atomic_read(&iop->write_count) == 0); | |
ac8ee546 | 730 | |
e2f6ad46 | 731 | /* |
82cb1417 CH |
732 | * Walk through the page to find areas to write back. If we run off the |
733 | * end of the current map or find the current map invalid, grab a new | |
734 | * one. | |
e2f6ad46 | 735 | */ |
82cb1417 CH |
736 | for (i = 0, file_offset = page_offset(page); |
737 | i < (PAGE_SIZE >> inode->i_blkbits) && file_offset < end_offset; | |
738 | i++, file_offset += len) { | |
739 | if (iop && !test_bit(i, iop->uptodate)) | |
bfce7d2e | 740 | continue; |
bfce7d2e | 741 | |
889c65b3 CH |
742 | error = xfs_map_blocks(wpc, inode, file_offset); |
743 | if (error) | |
744 | break; | |
82cb1417 | 745 | if (wpc->io_type == XFS_IO_HOLE) |
5c665e5b | 746 | continue; |
82cb1417 CH |
747 | xfs_add_to_ioend(inode, file_offset, page, iop, wpc, wbc, |
748 | &submit_list); | |
5c665e5b | 749 | count++; |
e2f6ad46 | 750 | } |
bfce7d2e | 751 | |
e10de372 | 752 | ASSERT(wpc->ioend || list_empty(&submit_list)); |
1b65d3dd CH |
753 | ASSERT(PageLocked(page)); |
754 | ASSERT(!PageWriteback(page)); | |
bfce7d2e | 755 | |
bfce7d2e | 756 | /* |
82cb1417 CH |
757 | * On error, we have to fail the ioend here because we may have set |
758 | * pages under writeback, we have to make sure we run IO completion to | |
759 | * mark the error state of the IO appropriately, so we can't cancel the | |
760 | * ioend directly here. That means we have to mark this page as under | |
761 | * writeback if we included any blocks from it in the ioend chain so | |
762 | * that completion treats it correctly. | |
bfce7d2e | 763 | * |
e10de372 DC |
764 | * If we didn't include the page in the ioend, the on error we can |
765 | * simply discard and unlock it as there are no other users of the page | |
82cb1417 CH |
766 | * now. The caller will still need to trigger submission of outstanding |
767 | * ioends on the writepage context so they are treated correctly on | |
768 | * error. | |
bfce7d2e | 769 | */ |
8e1f065b CH |
770 | if (unlikely(error)) { |
771 | if (!count) { | |
772 | xfs_aops_discard_page(page); | |
773 | ClearPageUptodate(page); | |
774 | unlock_page(page); | |
775 | goto done; | |
776 | } | |
777 | ||
1b65d3dd CH |
778 | /* |
779 | * If the page was not fully cleaned, we need to ensure that the | |
780 | * higher layers come back to it correctly. That means we need | |
781 | * to keep the page dirty, and for WB_SYNC_ALL writeback we need | |
782 | * to ensure the PAGECACHE_TAG_TOWRITE index mark is not removed | |
783 | * so another attempt to write this page in this writeback sweep | |
784 | * will be made. | |
785 | */ | |
8e1f065b | 786 | set_page_writeback_keepwrite(page); |
e10de372 | 787 | } else { |
1b65d3dd CH |
788 | clear_page_dirty_for_io(page); |
789 | set_page_writeback(page); | |
bfce7d2e | 790 | } |
e10de372 | 791 | |
8e1f065b CH |
792 | unlock_page(page); |
793 | ||
794 | /* | |
795 | * Preserve the original error if there was one, otherwise catch | |
796 | * submission errors here and propagate into subsequent ioend | |
797 | * submissions. | |
798 | */ | |
799 | list_for_each_entry_safe(ioend, next, &submit_list, io_list) { | |
800 | int error2; | |
801 | ||
802 | list_del_init(&ioend->io_list); | |
803 | error2 = xfs_submit_ioend(wbc, ioend, error); | |
804 | if (error2 && !error) | |
805 | error = error2; | |
806 | } | |
807 | ||
808 | /* | |
82cb1417 CH |
809 | * We can end up here with no error and nothing to write only if we race |
810 | * with a partial page truncate on a sub-page block sized filesystem. | |
8e1f065b CH |
811 | */ |
812 | if (!count) | |
813 | end_page_writeback(page); | |
814 | done: | |
bfce7d2e DC |
815 | mapping_set_error(page->mapping, error); |
816 | return error; | |
817 | } | |
818 | ||
1da177e4 | 819 | /* |
89f3b363 CH |
820 | * Write out a dirty page. |
821 | * | |
822 | * For delalloc space on the page we need to allocate space and flush it. | |
823 | * For unwritten space on the page we need to start the conversion to | |
824 | * regular allocated space. | |
1da177e4 | 825 | */ |
1da177e4 | 826 | STATIC int |
fbcc0256 | 827 | xfs_do_writepage( |
89f3b363 | 828 | struct page *page, |
fbcc0256 DC |
829 | struct writeback_control *wbc, |
830 | void *data) | |
1da177e4 | 831 | { |
fbcc0256 | 832 | struct xfs_writepage_ctx *wpc = data; |
89f3b363 | 833 | struct inode *inode = page->mapping->host; |
1da177e4 | 834 | loff_t offset; |
c8ce540d | 835 | uint64_t end_offset; |
ad68972a | 836 | pgoff_t end_index; |
89f3b363 | 837 | |
34097dfe | 838 | trace_xfs_writepage(inode, page, 0, 0); |
89f3b363 CH |
839 | |
840 | /* | |
841 | * Refuse to write the page out if we are called from reclaim context. | |
842 | * | |
d4f7a5cb CH |
843 | * This avoids stack overflows when called from deeply used stacks in |
844 | * random callers for direct reclaim or memcg reclaim. We explicitly | |
845 | * allow reclaim from kswapd as the stack usage there is relatively low. | |
89f3b363 | 846 | * |
94054fa3 MG |
847 | * This should never happen except in the case of a VM regression so |
848 | * warn about it. | |
89f3b363 | 849 | */ |
94054fa3 MG |
850 | if (WARN_ON_ONCE((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == |
851 | PF_MEMALLOC)) | |
b5420f23 | 852 | goto redirty; |
1da177e4 | 853 | |
89f3b363 | 854 | /* |
680a647b CH |
855 | * Given that we do not allow direct reclaim to call us, we should |
856 | * never be called while in a filesystem transaction. | |
89f3b363 | 857 | */ |
9070733b | 858 | if (WARN_ON_ONCE(current->flags & PF_MEMALLOC_NOFS)) |
b5420f23 | 859 | goto redirty; |
89f3b363 | 860 | |
8695d27e | 861 | /* |
ad68972a DC |
862 | * Is this page beyond the end of the file? |
863 | * | |
8695d27e JL |
864 | * The page index is less than the end_index, adjust the end_offset |
865 | * to the highest offset that this page should represent. | |
866 | * ----------------------------------------------------- | |
867 | * | file mapping | <EOF> | | |
868 | * ----------------------------------------------------- | |
869 | * | Page ... | Page N-2 | Page N-1 | Page N | | | |
870 | * ^--------------------------------^----------|-------- | |
871 | * | desired writeback range | see else | | |
872 | * ---------------------------------^------------------| | |
873 | */ | |
ad68972a | 874 | offset = i_size_read(inode); |
09cbfeaf | 875 | end_index = offset >> PAGE_SHIFT; |
8695d27e | 876 | if (page->index < end_index) |
09cbfeaf | 877 | end_offset = (xfs_off_t)(page->index + 1) << PAGE_SHIFT; |
8695d27e JL |
878 | else { |
879 | /* | |
880 | * Check whether the page to write out is beyond or straddles | |
881 | * i_size or not. | |
882 | * ------------------------------------------------------- | |
883 | * | file mapping | <EOF> | | |
884 | * ------------------------------------------------------- | |
885 | * | Page ... | Page N-2 | Page N-1 | Page N | Beyond | | |
886 | * ^--------------------------------^-----------|--------- | |
887 | * | | Straddles | | |
888 | * ---------------------------------^-----------|--------| | |
889 | */ | |
09cbfeaf | 890 | unsigned offset_into_page = offset & (PAGE_SIZE - 1); |
6b7a03f0 CH |
891 | |
892 | /* | |
ff9a28f6 JK |
893 | * Skip the page if it is fully outside i_size, e.g. due to a |
894 | * truncate operation that is in progress. We must redirty the | |
895 | * page so that reclaim stops reclaiming it. Otherwise | |
896 | * xfs_vm_releasepage() is called on it and gets confused. | |
8695d27e JL |
897 | * |
898 | * Note that the end_index is unsigned long, it would overflow | |
899 | * if the given offset is greater than 16TB on 32-bit system | |
900 | * and if we do check the page is fully outside i_size or not | |
901 | * via "if (page->index >= end_index + 1)" as "end_index + 1" | |
902 | * will be evaluated to 0. Hence this page will be redirtied | |
903 | * and be written out repeatedly which would result in an | |
904 | * infinite loop, the user program that perform this operation | |
905 | * will hang. Instead, we can verify this situation by checking | |
906 | * if the page to write is totally beyond the i_size or if it's | |
907 | * offset is just equal to the EOF. | |
6b7a03f0 | 908 | */ |
8695d27e JL |
909 | if (page->index > end_index || |
910 | (page->index == end_index && offset_into_page == 0)) | |
ff9a28f6 | 911 | goto redirty; |
6b7a03f0 CH |
912 | |
913 | /* | |
914 | * The page straddles i_size. It must be zeroed out on each | |
915 | * and every writepage invocation because it may be mmapped. | |
916 | * "A file is mapped in multiples of the page size. For a file | |
8695d27e | 917 | * that is not a multiple of the page size, the remaining |
6b7a03f0 CH |
918 | * memory is zeroed when mapped, and writes to that region are |
919 | * not written out to the file." | |
920 | */ | |
09cbfeaf | 921 | zero_user_segment(page, offset_into_page, PAGE_SIZE); |
8695d27e JL |
922 | |
923 | /* Adjust the end_offset to the end of file */ | |
924 | end_offset = offset; | |
1da177e4 LT |
925 | } |
926 | ||
2d5f4b5b | 927 | return xfs_writepage_map(wpc, wbc, inode, page, end_offset); |
f51623b2 | 928 | |
b5420f23 | 929 | redirty: |
f51623b2 NS |
930 | redirty_page_for_writepage(wbc, page); |
931 | unlock_page(page); | |
932 | return 0; | |
f51623b2 NS |
933 | } |
934 | ||
fbcc0256 DC |
935 | STATIC int |
936 | xfs_vm_writepage( | |
937 | struct page *page, | |
938 | struct writeback_control *wbc) | |
939 | { | |
940 | struct xfs_writepage_ctx wpc = { | |
97e5a6e6 | 941 | .io_type = XFS_IO_HOLE, |
fbcc0256 DC |
942 | }; |
943 | int ret; | |
944 | ||
945 | ret = xfs_do_writepage(page, wbc, &wpc); | |
e10de372 DC |
946 | if (wpc.ioend) |
947 | ret = xfs_submit_ioend(wbc, wpc.ioend, ret); | |
948 | return ret; | |
fbcc0256 DC |
949 | } |
950 | ||
7d4fb40a NS |
951 | STATIC int |
952 | xfs_vm_writepages( | |
953 | struct address_space *mapping, | |
954 | struct writeback_control *wbc) | |
955 | { | |
fbcc0256 | 956 | struct xfs_writepage_ctx wpc = { |
97e5a6e6 | 957 | .io_type = XFS_IO_HOLE, |
fbcc0256 DC |
958 | }; |
959 | int ret; | |
960 | ||
b3aea4ed | 961 | xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED); |
fbcc0256 | 962 | ret = write_cache_pages(mapping, wbc, xfs_do_writepage, &wpc); |
e10de372 DC |
963 | if (wpc.ioend) |
964 | ret = xfs_submit_ioend(wbc, wpc.ioend, ret); | |
965 | return ret; | |
7d4fb40a NS |
966 | } |
967 | ||
6e2608df DW |
968 | STATIC int |
969 | xfs_dax_writepages( | |
970 | struct address_space *mapping, | |
971 | struct writeback_control *wbc) | |
972 | { | |
973 | xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED); | |
974 | return dax_writeback_mapping_range(mapping, | |
975 | xfs_find_bdev_for_inode(mapping->host), wbc); | |
976 | } | |
977 | ||
f51623b2 | 978 | STATIC int |
238f4c54 | 979 | xfs_vm_releasepage( |
f51623b2 NS |
980 | struct page *page, |
981 | gfp_t gfp_mask) | |
982 | { | |
34097dfe | 983 | trace_xfs_releasepage(page->mapping->host, page, 0, 0); |
82cb1417 | 984 | return iomap_releasepage(page, gfp_mask); |
1da177e4 LT |
985 | } |
986 | ||
1da177e4 | 987 | STATIC sector_t |
e4c573bb | 988 | xfs_vm_bmap( |
1da177e4 LT |
989 | struct address_space *mapping, |
990 | sector_t block) | |
991 | { | |
b84e7722 | 992 | struct xfs_inode *ip = XFS_I(mapping->host); |
1da177e4 | 993 | |
b84e7722 | 994 | trace_xfs_vm_bmap(ip); |
db1327b1 DW |
995 | |
996 | /* | |
997 | * The swap code (ab-)uses ->bmap to get a block mapping and then | |
793057e1 | 998 | * bypasses the file system for actual I/O. We really can't allow |
db1327b1 | 999 | * that on reflinks inodes, so we have to skip out here. And yes, |
eb5e248d DW |
1000 | * 0 is the magic code for a bmap error. |
1001 | * | |
1002 | * Since we don't pass back blockdev info, we can't return bmap | |
1003 | * information for rt files either. | |
db1327b1 | 1004 | */ |
eb5e248d | 1005 | if (xfs_is_reflink_inode(ip) || XFS_IS_REALTIME_INODE(ip)) |
db1327b1 | 1006 | return 0; |
b84e7722 | 1007 | return iomap_bmap(mapping, block, &xfs_iomap_ops); |
1da177e4 LT |
1008 | } |
1009 | ||
1010 | STATIC int | |
e4c573bb | 1011 | xfs_vm_readpage( |
1da177e4 LT |
1012 | struct file *unused, |
1013 | struct page *page) | |
1014 | { | |
121e213e | 1015 | trace_xfs_vm_readpage(page->mapping->host, 1); |
82cb1417 | 1016 | return iomap_readpage(page, &xfs_iomap_ops); |
1da177e4 LT |
1017 | } |
1018 | ||
1019 | STATIC int | |
e4c573bb | 1020 | xfs_vm_readpages( |
1da177e4 LT |
1021 | struct file *unused, |
1022 | struct address_space *mapping, | |
1023 | struct list_head *pages, | |
1024 | unsigned nr_pages) | |
1025 | { | |
121e213e | 1026 | trace_xfs_vm_readpages(mapping->host, nr_pages); |
82cb1417 | 1027 | return iomap_readpages(mapping, pages, nr_pages, &xfs_iomap_ops); |
22e757a4 DC |
1028 | } |
1029 | ||
67482129 DW |
1030 | static int |
1031 | xfs_iomap_swapfile_activate( | |
1032 | struct swap_info_struct *sis, | |
1033 | struct file *swap_file, | |
1034 | sector_t *span) | |
1035 | { | |
1036 | sis->bdev = xfs_find_bdev_for_inode(file_inode(swap_file)); | |
1037 | return iomap_swapfile_activate(sis, swap_file, span, &xfs_iomap_ops); | |
1038 | } | |
1039 | ||
f5e54d6e | 1040 | const struct address_space_operations xfs_address_space_operations = { |
e4c573bb NS |
1041 | .readpage = xfs_vm_readpage, |
1042 | .readpages = xfs_vm_readpages, | |
1043 | .writepage = xfs_vm_writepage, | |
7d4fb40a | 1044 | .writepages = xfs_vm_writepages, |
82cb1417 | 1045 | .set_page_dirty = iomap_set_page_dirty, |
238f4c54 NS |
1046 | .releasepage = xfs_vm_releasepage, |
1047 | .invalidatepage = xfs_vm_invalidatepage, | |
e4c573bb | 1048 | .bmap = xfs_vm_bmap, |
6e2608df | 1049 | .direct_IO = noop_direct_IO, |
82cb1417 CH |
1050 | .migratepage = iomap_migrate_page, |
1051 | .is_partially_uptodate = iomap_is_partially_uptodate, | |
aa261f54 | 1052 | .error_remove_page = generic_error_remove_page, |
67482129 | 1053 | .swap_activate = xfs_iomap_swapfile_activate, |
1da177e4 | 1054 | }; |
6e2608df DW |
1055 | |
1056 | const struct address_space_operations xfs_dax_aops = { | |
1057 | .writepages = xfs_dax_writepages, | |
1058 | .direct_IO = noop_direct_IO, | |
1059 | .set_page_dirty = noop_set_page_dirty, | |
1060 | .invalidatepage = noop_invalidatepage, | |
67482129 | 1061 | .swap_activate = xfs_iomap_swapfile_activate, |
6e2608df | 1062 | }; |