1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /* Network filesystem support services.
4 * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
5 * Written by David Howells (dhowells@redhat.com)
9 * Documentation/filesystems/netfs_library.rst
11 * for a description of the network filesystem interface declared here.
14 #ifndef _LINUX_NETFS_H
15 #define _LINUX_NETFS_H
17 #include <linux/workqueue.h>
19 #include <linux/pagemap.h>
20 #include <linux/uio.h>
21 #include <linux/rolling_buffer.h>
23 enum netfs_sreq_ref_trace
;
24 typedef struct mempool_s mempool_t
;
28 * folio_start_private_2 - Start an fscache write on a folio. [DEPRECATED]
31 * Call this function before writing a folio to a local cache. Starting a
32 * second write before the first one finishes is not allowed.
34 * Note that this should no longer be used.
36 static inline void folio_start_private_2(struct folio
*folio
)
38 VM_BUG_ON_FOLIO(folio_test_private_2(folio
), folio
);
40 folio_set_private_2(folio
);
43 enum netfs_io_source
{
45 NETFS_FILL_WITH_ZEROES
,
46 NETFS_DOWNLOAD_FROM_SERVER
,
47 NETFS_READ_FROM_CACHE
,
49 NETFS_UPLOAD_TO_SERVER
,
53 typedef void (*netfs_io_terminated_t
)(void *priv
, ssize_t transferred_or_error
);
56 * Per-inode context. This wraps the VFS inode.
59 struct inode inode
; /* The VFS inode */
60 const struct netfs_request_ops
*ops
;
61 #if IS_ENABLED(CONFIG_FSCACHE)
62 struct fscache_cookie
*cache
;
64 struct mutex wb_lock
; /* Writeback serialisation */
65 loff_t remote_i_size
; /* Size of the remote file */
66 loff_t zero_point
; /* Size after which we assume there's no data
68 atomic_t io_count
; /* Number of outstanding reqs */
70 #define NETFS_ICTX_ODIRECT 0 /* The file has DIO in progress */
71 #define NETFS_ICTX_UNBUFFERED 1 /* I/O should not use the pagecache */
72 #define NETFS_ICTX_MODIFIED_ATTR 3 /* Indicate change in mtime/ctime */
73 #define NETFS_ICTX_SINGLE_NO_UPLOAD 4 /* Monolithic payload, cache but no upload */
77 * A netfs group - for instance a ceph snap. This is marked on dirty pages and
78 * pages marked with a group must be flushed before they can be written under
79 * the domain of another group.
83 void (*free
)(struct netfs_group
*netfs_group
);
87 * Information about a dirty page (attached only if necessary).
91 struct netfs_group
*netfs_group
; /* Filesystem's grouping marker (or NULL). */
92 unsigned int dirty_offset
; /* Write-streaming dirty data offset */
93 unsigned int dirty_len
; /* Write-streaming dirty data length */
95 #define NETFS_FOLIO_INFO 0x1UL /* OR'd with folio->private. */
96 #define NETFS_FOLIO_COPY_TO_CACHE ((struct netfs_group *)0x356UL) /* Write to the cache only */
98 static inline bool netfs_is_folio_info(const void *priv
)
100 return (unsigned long)priv
& NETFS_FOLIO_INFO
;
103 static inline struct netfs_folio
*__netfs_folio_info(const void *priv
)
105 if (netfs_is_folio_info(priv
))
106 return (struct netfs_folio
*)((unsigned long)priv
& ~NETFS_FOLIO_INFO
);
110 static inline struct netfs_folio
*netfs_folio_info(struct folio
*folio
)
112 return __netfs_folio_info(folio_get_private(folio
));
115 static inline struct netfs_group
*netfs_folio_group(struct folio
*folio
)
117 struct netfs_folio
*finfo
;
118 void *priv
= folio_get_private(folio
);
120 finfo
= netfs_folio_info(folio
);
122 return finfo
->netfs_group
;
127 * Stream of I/O subrequests going to a particular destination, such as the
128 * server or the local cache. This is mainly intended for writing where we may
129 * have to write to multiple destinations concurrently.
131 struct netfs_io_stream
{
132 /* Submission tracking */
133 struct netfs_io_subrequest
*construct
; /* Op being constructed */
134 size_t sreq_max_len
; /* Maximum size of a subrequest */
135 unsigned int sreq_max_segs
; /* 0 or max number of segments in an iterator */
136 unsigned int submit_off
; /* Folio offset we're submitting from */
137 unsigned int submit_len
; /* Amount of data left to submit */
138 unsigned int submit_extendable_to
; /* Amount I/O can be rounded up to */
139 void (*prepare_write
)(struct netfs_io_subrequest
*subreq
);
140 void (*issue_write
)(struct netfs_io_subrequest
*subreq
);
141 /* Collection tracking */
142 struct list_head subrequests
; /* Contributory I/O operations */
143 struct netfs_io_subrequest
*front
; /* Op being collected */
144 unsigned long long collected_to
; /* Position we've collected results to */
145 size_t transferred
; /* The amount transferred from this stream */
146 unsigned short error
; /* Aggregate error for the stream */
147 enum netfs_io_source source
; /* Where to read from/write to */
148 unsigned char stream_nr
; /* Index of stream in parent table */
149 bool avail
; /* T if stream is available */
150 bool active
; /* T if stream is active */
151 bool need_retry
; /* T if this stream needs retrying */
152 bool failed
; /* T if this stream failed */
156 * Resources required to do operations on a cache.
158 struct netfs_cache_resources
{
159 const struct netfs_cache_ops
*ops
;
162 unsigned int debug_id
; /* Cookie debug ID */
163 unsigned int inval_counter
; /* object->inval_counter at begin_op */
167 * Descriptor for a single component subrequest. Each operation represents an
168 * individual read/write from/to a server, a cache, a journal, etc..
170 * The buffer iterator is persistent for the life of the subrequest struct and
171 * the pages it points to can be relied on to exist for the duration.
173 struct netfs_io_subrequest
{
174 struct netfs_io_request
*rreq
; /* Supervising I/O request */
175 struct work_struct work
;
176 struct list_head rreq_link
; /* Link in rreq->subrequests */
177 struct iov_iter io_iter
; /* Iterator for this subrequest */
178 unsigned long long start
; /* Where to start the I/O */
179 size_t len
; /* Size of the I/O */
180 size_t transferred
; /* Amount of data transferred */
182 short error
; /* 0 or error that occurred */
183 unsigned short debug_index
; /* Index in list (for debugging output) */
184 unsigned int nr_segs
; /* Number of segs in io_iter */
185 u8 retry_count
; /* The number of retries (0 on initial pass) */
186 enum netfs_io_source source
; /* Where to read from/write to */
187 unsigned char stream_nr
; /* I/O stream this belongs to */
189 #define NETFS_SREQ_COPY_TO_CACHE 0 /* Set if should copy the data to the cache */
190 #define NETFS_SREQ_CLEAR_TAIL 1 /* Set if the rest of the read should be cleared */
191 #define NETFS_SREQ_MADE_PROGRESS 4 /* Set if we transferred at least some data */
192 #define NETFS_SREQ_ONDEMAND 5 /* Set if it's from on-demand read mode */
193 #define NETFS_SREQ_BOUNDARY 6 /* Set if ends on hard boundary (eg. ceph object) */
194 #define NETFS_SREQ_HIT_EOF 7 /* Set if short due to EOF */
195 #define NETFS_SREQ_IN_PROGRESS 8 /* Unlocked when the subrequest completes */
196 #define NETFS_SREQ_NEED_RETRY 9 /* Set if the filesystem requests a retry */
197 #define NETFS_SREQ_FAILED 10 /* Set if the subreq failed unretryably */
200 enum netfs_io_origin
{
201 NETFS_READAHEAD
, /* This read was triggered by readahead */
202 NETFS_READPAGE
, /* This read is a synchronous read */
203 NETFS_READ_GAPS
, /* This read is a synchronous read to fill gaps */
204 NETFS_READ_SINGLE
, /* This read should be treated as a single object */
205 NETFS_READ_FOR_WRITE
, /* This read is to prepare a write */
206 NETFS_UNBUFFERED_READ
, /* This is an unbuffered read */
207 NETFS_DIO_READ
, /* This is a direct I/O read */
208 NETFS_WRITEBACK
, /* This write was triggered by writepages */
209 NETFS_WRITEBACK_SINGLE
, /* This monolithic write was triggered by writepages */
210 NETFS_WRITETHROUGH
, /* This write was made by netfs_perform_write() */
211 NETFS_UNBUFFERED_WRITE
, /* This is an unbuffered write */
212 NETFS_DIO_WRITE
, /* This is a direct I/O write */
213 NETFS_PGPRIV2_COPY_TO_CACHE
, /* [DEPRECATED] This is writing read data to the cache */
218 * Descriptor for an I/O helper request. This is used to make multiple I/O
219 * operations to a variety of data stores and then stitch the result together.
221 struct netfs_io_request
{
223 struct work_struct cleanup_work
; /* Deferred cleanup work */
226 struct work_struct work
; /* Result collector work */
227 struct inode
*inode
; /* The file being accessed */
228 struct address_space
*mapping
; /* The mapping being accessed */
229 struct kiocb
*iocb
; /* AIO completion vector */
230 struct netfs_cache_resources cache_resources
;
231 struct netfs_io_request
*copy_to_cache
; /* Request to write just-read data to the cache */
232 #ifdef CONFIG_PROC_FS
233 struct list_head proc_link
; /* Link in netfs_iorequests */
235 struct netfs_io_stream io_streams
[2]; /* Streams of parallel I/O operations */
236 #define NR_IO_STREAMS 2 //wreq->nr_io_streams
237 struct netfs_group
*group
; /* Writeback group being written back */
238 struct rolling_buffer buffer
; /* Unencrypted buffer */
239 #define NETFS_ROLLBUF_PUT_MARK ROLLBUF_MARK_1
240 #define NETFS_ROLLBUF_PAGECACHE_MARK ROLLBUF_MARK_2
241 wait_queue_head_t waitq
; /* Processor waiter */
242 void *netfs_priv
; /* Private data for the netfs */
243 void *netfs_priv2
; /* Private data for the netfs */
244 struct bio_vec
*direct_bv
; /* DIO buffer list (when handling iovec-iter) */
245 unsigned long long submitted
; /* Amount submitted for I/O so far */
246 unsigned long long len
; /* Length of the request */
247 size_t transferred
; /* Amount to be indicated as transferred */
248 long error
; /* 0 or error that occurred */
249 unsigned long long i_size
; /* Size of the file */
250 unsigned long long start
; /* Start position */
251 atomic64_t issued_to
; /* Write issuer folio cursor */
252 unsigned long long collected_to
; /* Point we've collected to */
253 unsigned long long cleaned_to
; /* Position we've cleaned folios to */
254 unsigned long long abandon_to
; /* Position to abandon folios to */
255 pgoff_t no_unlock_folio
; /* Don't unlock this folio after read */
256 unsigned int direct_bv_count
; /* Number of elements in direct_bv[] */
257 unsigned int debug_id
;
258 unsigned int rsize
; /* Maximum read size (0 for none) */
259 unsigned int wsize
; /* Maximum write size (0 for none) */
260 atomic_t subreq_counter
; /* Next subreq->debug_index */
261 unsigned int nr_group_rel
; /* Number of refs to release on ->group */
262 spinlock_t lock
; /* Lock for queuing subreqs */
263 unsigned char front_folio_order
; /* Order (size) of front folio */
264 enum netfs_io_origin origin
; /* Origin of the request */
265 bool direct_bv_unpin
; /* T if direct_bv[] must be unpinned */
268 #define NETFS_RREQ_IN_PROGRESS 0 /* Unlocked when the request completes (has ref) */
269 #define NETFS_RREQ_ALL_QUEUED 1 /* All subreqs are now queued */
270 #define NETFS_RREQ_PAUSE 2 /* Pause subrequest generation */
271 #define NETFS_RREQ_FAILED 3 /* The request failed */
272 #define NETFS_RREQ_RETRYING 4 /* Set if we're in the retry path */
273 #define NETFS_RREQ_SHORT_TRANSFER 5 /* Set if we have a short transfer */
274 #define NETFS_RREQ_OFFLOAD_COLLECTION 8 /* Offload collection to workqueue */
275 #define NETFS_RREQ_NO_UNLOCK_FOLIO 9 /* Don't unlock no_unlock_folio on completion */
276 #define NETFS_RREQ_FOLIO_COPY_TO_CACHE 10 /* Copy current folio to cache from read */
277 #define NETFS_RREQ_UPLOAD_TO_SERVER 11 /* Need to write to the server */
278 #define NETFS_RREQ_USE_IO_ITER 12 /* Use ->io_iter rather than ->i_pages */
279 #define NETFS_RREQ_USE_PGPRIV2 31 /* [DEPRECATED] Use PG_private_2 to mark
280 * write to cache on read */
281 const struct netfs_request_ops
*netfs_ops
;
285 * Operations the network filesystem can/must provide to the helpers.
287 struct netfs_request_ops
{
288 mempool_t
*request_pool
;
289 mempool_t
*subrequest_pool
;
290 int (*init_request
)(struct netfs_io_request
*rreq
, struct file
*file
);
291 void (*free_request
)(struct netfs_io_request
*rreq
);
292 void (*free_subrequest
)(struct netfs_io_subrequest
*rreq
);
294 /* Read request handling */
295 void (*expand_readahead
)(struct netfs_io_request
*rreq
);
296 int (*prepare_read
)(struct netfs_io_subrequest
*subreq
);
297 void (*issue_read
)(struct netfs_io_subrequest
*subreq
);
298 bool (*is_still_valid
)(struct netfs_io_request
*rreq
);
299 int (*check_write_begin
)(struct file
*file
, loff_t pos
, unsigned len
,
300 struct folio
**foliop
, void **_fsdata
);
301 void (*done
)(struct netfs_io_request
*rreq
);
303 /* Modification handling */
304 void (*update_i_size
)(struct inode
*inode
, loff_t i_size
);
305 void (*post_modify
)(struct inode
*inode
);
307 /* Write request handling */
308 void (*begin_writeback
)(struct netfs_io_request
*wreq
);
309 void (*prepare_write
)(struct netfs_io_subrequest
*subreq
);
310 void (*issue_write
)(struct netfs_io_subrequest
*subreq
);
311 void (*retry_request
)(struct netfs_io_request
*wreq
, struct netfs_io_stream
*stream
);
312 void (*invalidate_cache
)(struct netfs_io_request
*wreq
);
316 * How to handle reading from a hole.
318 enum netfs_read_from_hole
{
319 NETFS_READ_HOLE_IGNORE
,
320 NETFS_READ_HOLE_FAIL
,
324 * Table of operations for access to a cache.
326 struct netfs_cache_ops
{
327 /* End an operation */
328 void (*end_operation
)(struct netfs_cache_resources
*cres
);
330 /* Read data from the cache */
331 int (*read
)(struct netfs_cache_resources
*cres
,
333 struct iov_iter
*iter
,
334 enum netfs_read_from_hole read_hole
,
335 netfs_io_terminated_t term_func
,
336 void *term_func_priv
);
338 /* Write data to the cache */
339 int (*write
)(struct netfs_cache_resources
*cres
,
341 struct iov_iter
*iter
,
342 netfs_io_terminated_t term_func
,
343 void *term_func_priv
);
345 /* Write data to the cache from a netfs subrequest. */
346 void (*issue_write
)(struct netfs_io_subrequest
*subreq
);
348 /* Expand readahead request */
349 void (*expand_readahead
)(struct netfs_cache_resources
*cres
,
350 unsigned long long *_start
,
351 unsigned long long *_len
,
352 unsigned long long i_size
);
354 /* Prepare a read operation, shortening it to a cached/uncached
355 * boundary as appropriate.
357 enum netfs_io_source (*prepare_read
)(struct netfs_io_subrequest
*subreq
,
358 unsigned long long i_size
);
360 /* Prepare a write subrequest, working out if we're allowed to do it
361 * and finding out the maximum amount of data to gather before
362 * attempting to submit. If we're not permitted to do it, the
363 * subrequest should be marked failed.
365 void (*prepare_write_subreq
)(struct netfs_io_subrequest
*subreq
);
367 /* Prepare a write operation, working out what part of the write we can
370 int (*prepare_write
)(struct netfs_cache_resources
*cres
,
371 loff_t
*_start
, size_t *_len
, size_t upper_len
,
372 loff_t i_size
, bool no_space_allocated_yet
);
374 /* Prepare an on-demand read operation, shortening it to a cached/uncached
375 * boundary as appropriate.
377 enum netfs_io_source (*prepare_ondemand_read
)(struct netfs_cache_resources
*cres
,
378 loff_t start
, size_t *_len
,
380 unsigned long *_flags
, ino_t ino
);
382 /* Query the occupancy of the cache in a region, returning where the
383 * next chunk of data starts and how long it is.
385 int (*query_occupancy
)(struct netfs_cache_resources
*cres
,
386 loff_t start
, size_t len
, size_t granularity
,
387 loff_t
*_data_start
, size_t *_data_len
);
390 /* High-level read API. */
391 ssize_t
netfs_unbuffered_read_iter_locked(struct kiocb
*iocb
, struct iov_iter
*iter
);
392 ssize_t
netfs_unbuffered_read_iter(struct kiocb
*iocb
, struct iov_iter
*iter
);
393 ssize_t
netfs_buffered_read_iter(struct kiocb
*iocb
, struct iov_iter
*iter
);
394 ssize_t
netfs_file_read_iter(struct kiocb
*iocb
, struct iov_iter
*iter
);
396 /* High-level write API */
397 ssize_t
netfs_perform_write(struct kiocb
*iocb
, struct iov_iter
*iter
,
398 struct netfs_group
*netfs_group
);
399 ssize_t
netfs_buffered_write_iter_locked(struct kiocb
*iocb
, struct iov_iter
*from
,
400 struct netfs_group
*netfs_group
);
401 ssize_t
netfs_unbuffered_write_iter(struct kiocb
*iocb
, struct iov_iter
*from
);
402 ssize_t
netfs_unbuffered_write_iter_locked(struct kiocb
*iocb
, struct iov_iter
*iter
,
403 struct netfs_group
*netfs_group
);
404 ssize_t
netfs_file_write_iter(struct kiocb
*iocb
, struct iov_iter
*from
);
406 /* Single, monolithic object read/write API. */
407 void netfs_single_mark_inode_dirty(struct inode
*inode
);
408 ssize_t
netfs_read_single(struct inode
*inode
, struct file
*file
, struct iov_iter
*iter
);
409 int netfs_writeback_single(struct address_space
*mapping
,
410 struct writeback_control
*wbc
,
411 struct iov_iter
*iter
);
413 /* Address operations API */
414 struct readahead_control
;
415 void netfs_readahead(struct readahead_control
*);
416 int netfs_read_folio(struct file
*, struct folio
*);
417 int netfs_write_begin(struct netfs_inode
*, struct file
*,
418 struct address_space
*, loff_t pos
, unsigned int len
,
419 struct folio
**, void **fsdata
);
420 int netfs_writepages(struct address_space
*mapping
,
421 struct writeback_control
*wbc
);
422 bool netfs_dirty_folio(struct address_space
*mapping
, struct folio
*folio
);
423 int netfs_unpin_writeback(struct inode
*inode
, struct writeback_control
*wbc
);
424 void netfs_clear_inode_writeback(struct inode
*inode
, const void *aux
);
425 void netfs_invalidate_folio(struct folio
*folio
, size_t offset
, size_t length
);
426 bool netfs_release_folio(struct folio
*folio
, gfp_t gfp
);
428 /* VMA operations API. */
429 vm_fault_t
netfs_page_mkwrite(struct vm_fault
*vmf
, struct netfs_group
*netfs_group
);
431 /* (Sub)request management API. */
432 void netfs_read_subreq_progress(struct netfs_io_subrequest
*subreq
);
433 void netfs_read_subreq_terminated(struct netfs_io_subrequest
*subreq
);
434 void netfs_get_subrequest(struct netfs_io_subrequest
*subreq
,
435 enum netfs_sreq_ref_trace what
);
436 void netfs_put_subrequest(struct netfs_io_subrequest
*subreq
,
437 enum netfs_sreq_ref_trace what
);
438 ssize_t
netfs_extract_user_iter(struct iov_iter
*orig
, size_t orig_len
,
439 struct iov_iter
*new,
440 iov_iter_extraction_t extraction_flags
);
441 size_t netfs_limit_iter(const struct iov_iter
*iter
, size_t start_offset
,
442 size_t max_size
, size_t max_segs
);
443 void netfs_prepare_write_failed(struct netfs_io_subrequest
*subreq
);
444 void netfs_write_subrequest_terminated(void *_op
, ssize_t transferred_or_error
);
445 void netfs_queue_write_request(struct netfs_io_subrequest
*subreq
);
447 int netfs_start_io_read(struct inode
*inode
);
448 void netfs_end_io_read(struct inode
*inode
);
449 int netfs_start_io_write(struct inode
*inode
);
450 void netfs_end_io_write(struct inode
*inode
);
451 int netfs_start_io_direct(struct inode
*inode
);
452 void netfs_end_io_direct(struct inode
*inode
);
454 /* Miscellaneous APIs. */
455 struct folio_queue
*netfs_folioq_alloc(unsigned int rreq_id
, gfp_t gfp
,
456 unsigned int trace
/*enum netfs_folioq_trace*/);
457 void netfs_folioq_free(struct folio_queue
*folioq
,
458 unsigned int trace
/*enum netfs_trace_folioq*/);
460 /* Buffer wrangling helpers API. */
461 int netfs_alloc_folioq_buffer(struct address_space
*mapping
,
462 struct folio_queue
**_buffer
,
463 size_t *_cur_size
, ssize_t size
, gfp_t gfp
);
464 void netfs_free_folioq_buffer(struct folio_queue
*fq
);
467 * netfs_inode - Get the netfs inode context from the inode
468 * @inode: The inode to query
470 * Get the netfs lib inode context from the network filesystem's inode. The
471 * context struct is expected to directly follow on from the VFS inode struct.
473 static inline struct netfs_inode
*netfs_inode(struct inode
*inode
)
475 return container_of(inode
, struct netfs_inode
, inode
);
479 * netfs_inode_init - Initialise a netfslib inode context
480 * @ctx: The netfs inode to initialise
481 * @ops: The netfs's operations list
482 * @use_zero_point: True to use the zero_point read optimisation
484 * Initialise the netfs library context struct. This is expected to follow on
485 * directly from the VFS inode struct.
487 static inline void netfs_inode_init(struct netfs_inode
*ctx
,
488 const struct netfs_request_ops
*ops
,
492 ctx
->remote_i_size
= i_size_read(&ctx
->inode
);
493 ctx
->zero_point
= LLONG_MAX
;
495 atomic_set(&ctx
->io_count
, 0);
496 #if IS_ENABLED(CONFIG_FSCACHE)
499 mutex_init(&ctx
->wb_lock
);
500 /* ->releasepage() drives zero_point */
501 if (use_zero_point
) {
502 ctx
->zero_point
= ctx
->remote_i_size
;
503 mapping_set_release_always(ctx
->inode
.i_mapping
);
508 * netfs_resize_file - Note that a file got resized
509 * @ctx: The netfs inode being resized
510 * @new_i_size: The new file size
511 * @changed_on_server: The change was applied to the server
513 * Inform the netfs lib that a file got resized so that it can adjust its state.
515 static inline void netfs_resize_file(struct netfs_inode
*ctx
, loff_t new_i_size
,
516 bool changed_on_server
)
518 if (changed_on_server
)
519 ctx
->remote_i_size
= new_i_size
;
520 if (new_i_size
< ctx
->zero_point
)
521 ctx
->zero_point
= new_i_size
;
525 * netfs_i_cookie - Get the cache cookie from the inode
526 * @ctx: The netfs inode to query
528 * Get the caching cookie (if enabled) from the network filesystem's inode.
530 static inline struct fscache_cookie
*netfs_i_cookie(struct netfs_inode
*ctx
)
532 #if IS_ENABLED(CONFIG_FSCACHE)
540 * netfs_wait_for_outstanding_io - Wait for outstanding I/O to complete
541 * @inode: The netfs inode to wait on
543 * Wait for outstanding I/O requests of any type to complete. This is intended
544 * to be called from inode eviction routines. This makes sure that any
545 * resources held by those requests are cleaned up before we let the inode get
548 static inline void netfs_wait_for_outstanding_io(struct inode
*inode
)
550 struct netfs_inode
*ictx
= netfs_inode(inode
);
552 wait_var_event(&ictx
->io_count
, atomic_read(&ictx
->io_count
) == 0);
555 #endif /* _LINUX_NETFS_H */