]> git.ipfire.org Git - thirdparty/kernel/linux.git/blob - fs/fuse/fuse_i.h
fuse: implement splice read/write passthrough
[thirdparty/kernel/linux.git] / fs / fuse / fuse_i.h
1 /*
2 FUSE: Filesystem in Userspace
3 Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu>
4
5 This program can be distributed under the terms of the GNU GPL.
6 See the file COPYING.
7 */
8
9 #ifndef _FS_FUSE_I_H
10 #define _FS_FUSE_I_H
11
12 #ifndef pr_fmt
13 # define pr_fmt(fmt) "fuse: " fmt
14 #endif
15
16 #include <linux/fuse.h>
17 #include <linux/fs.h>
18 #include <linux/mount.h>
19 #include <linux/wait.h>
20 #include <linux/list.h>
21 #include <linux/spinlock.h>
22 #include <linux/mm.h>
23 #include <linux/backing-dev.h>
24 #include <linux/mutex.h>
25 #include <linux/rwsem.h>
26 #include <linux/rbtree.h>
27 #include <linux/poll.h>
28 #include <linux/workqueue.h>
29 #include <linux/kref.h>
30 #include <linux/xattr.h>
31 #include <linux/pid_namespace.h>
32 #include <linux/refcount.h>
33 #include <linux/user_namespace.h>
34
35 /** Default max number of pages that can be used in a single read request */
36 #define FUSE_DEFAULT_MAX_PAGES_PER_REQ 32
37
38 /** Maximum of max_pages received in init_out */
39 #define FUSE_MAX_MAX_PAGES 256
40
41 /** Bias for fi->writectr, meaning new writepages must not be sent */
42 #define FUSE_NOWRITE INT_MIN
43
44 /** It could be as large as PATH_MAX, but would that have any uses? */
45 #define FUSE_NAME_MAX 1024
46
47 /** Number of dentries for each connection in the control filesystem */
48 #define FUSE_CTL_NUM_DENTRIES 5
49
50 /** List of active connections */
51 extern struct list_head fuse_conn_list;
52
53 /** Global mutex protecting fuse_conn_list and the control filesystem */
54 extern struct mutex fuse_mutex;
55
56 /** Module parameters */
57 extern unsigned max_user_bgreq;
58 extern unsigned max_user_congthresh;
59
60 /* One forget request */
61 struct fuse_forget_link {
62 struct fuse_forget_one forget_one;
63 struct fuse_forget_link *next;
64 };
65
66 /* Submount lookup tracking */
67 struct fuse_submount_lookup {
68 /** Refcount */
69 refcount_t count;
70
71 /** Unique ID, which identifies the inode between userspace
72 * and kernel */
73 u64 nodeid;
74
75 /** The request used for sending the FORGET message */
76 struct fuse_forget_link *forget;
77 };
78
79 /** Container for data related to mapping to backing file */
80 struct fuse_backing {
81 struct file *file;
82 struct cred *cred;
83
84 /** refcount */
85 refcount_t count;
86 struct rcu_head rcu;
87 };
88
89 /** FUSE inode */
90 struct fuse_inode {
91 /** Inode data */
92 struct inode inode;
93
94 /** Unique ID, which identifies the inode between userspace
95 * and kernel */
96 u64 nodeid;
97
98 /** Number of lookups on this inode */
99 u64 nlookup;
100
101 /** The request used for sending the FORGET message */
102 struct fuse_forget_link *forget;
103
104 /** Time in jiffies until the file attributes are valid */
105 u64 i_time;
106
107 /* Which attributes are invalid */
108 u32 inval_mask;
109
110 /** The sticky bit in inode->i_mode may have been removed, so
111 preserve the original mode */
112 umode_t orig_i_mode;
113
114 /* Cache birthtime */
115 struct timespec64 i_btime;
116
117 /** 64 bit inode number */
118 u64 orig_ino;
119
120 /** Version of last attribute change */
121 u64 attr_version;
122
123 union {
124 /* read/write io cache (regular file only) */
125 struct {
126 /* Files usable in writepage. Protected by fi->lock */
127 struct list_head write_files;
128
129 /* Writepages pending on truncate or fsync */
130 struct list_head queued_writes;
131
132 /* Number of sent writes, a negative bias
133 * (FUSE_NOWRITE) means more writes are blocked */
134 int writectr;
135
136 /** Number of files/maps using page cache */
137 int iocachectr;
138
139 /* Waitq for writepage completion */
140 wait_queue_head_t page_waitq;
141
142 /* waitq for direct-io completion */
143 wait_queue_head_t direct_io_waitq;
144
145 /* List of writepage requestst (pending or sent) */
146 struct rb_root writepages;
147 };
148
149 /* readdir cache (directory only) */
150 struct {
151 /* true if fully cached */
152 bool cached;
153
154 /* size of cache */
155 loff_t size;
156
157 /* position at end of cache (position of next entry) */
158 loff_t pos;
159
160 /* version of the cache */
161 u64 version;
162
163 /* modification time of directory when cache was
164 * started */
165 struct timespec64 mtime;
166
167 /* iversion of directory when cache was started */
168 u64 iversion;
169
170 /* protects above fields */
171 spinlock_t lock;
172 } rdc;
173 };
174
175 /** Miscellaneous bits describing inode state */
176 unsigned long state;
177
178 /** Lock for serializing lookup and readdir for back compatibility*/
179 struct mutex mutex;
180
181 /** Lock to protect write related fields */
182 spinlock_t lock;
183
184 #ifdef CONFIG_FUSE_DAX
185 /*
186 * Dax specific inode data
187 */
188 struct fuse_inode_dax *dax;
189 #endif
190 /** Submount specific lookup tracking */
191 struct fuse_submount_lookup *submount_lookup;
192 #ifdef CONFIG_FUSE_PASSTHROUGH
193 /** Reference to backing file in passthrough mode */
194 struct fuse_backing *fb;
195 #endif
196 };
197
198 /** FUSE inode state bits */
199 enum {
200 /** Advise readdirplus */
201 FUSE_I_ADVISE_RDPLUS,
202 /** Initialized with readdirplus */
203 FUSE_I_INIT_RDPLUS,
204 /** An operation changing file size is in progress */
205 FUSE_I_SIZE_UNSTABLE,
206 /* Bad inode */
207 FUSE_I_BAD,
208 /* Has btime */
209 FUSE_I_BTIME,
210 /* Wants or already has page cache IO */
211 FUSE_I_CACHE_IO_MODE,
212 };
213
214 struct fuse_conn;
215 struct fuse_mount;
216 union fuse_file_args;
217
218 /** FUSE specific file data */
219 struct fuse_file {
220 /** Fuse connection for this file */
221 struct fuse_mount *fm;
222
223 /* Argument space reserved for open/release */
224 union fuse_file_args *args;
225
226 /** Kernel file handle guaranteed to be unique */
227 u64 kh;
228
229 /** File handle used by userspace */
230 u64 fh;
231
232 /** Node id of this file */
233 u64 nodeid;
234
235 /** Refcount */
236 refcount_t count;
237
238 /** FOPEN_* flags returned by open */
239 u32 open_flags;
240
241 /** Entry on inode's write_files list */
242 struct list_head write_entry;
243
244 /* Readdir related */
245 struct {
246 /*
247 * Protects below fields against (crazy) parallel readdir on
248 * same open file. Uncontended in the normal case.
249 */
250 struct mutex lock;
251
252 /* Dir stream position */
253 loff_t pos;
254
255 /* Offset in cache */
256 loff_t cache_off;
257
258 /* Version of cache we are reading */
259 u64 version;
260
261 } readdir;
262
263 /** RB node to be linked on fuse_conn->polled_files */
264 struct rb_node polled_node;
265
266 /** Wait queue head for poll */
267 wait_queue_head_t poll_wait;
268
269 /** Does file hold a fi->iocachectr refcount? */
270 enum { IOM_NONE, IOM_CACHED, IOM_UNCACHED } iomode;
271
272 #ifdef CONFIG_FUSE_PASSTHROUGH
273 /** Reference to backing file in passthrough mode */
274 struct file *passthrough;
275 const struct cred *cred;
276 #endif
277
278 /** Has flock been performed on this file? */
279 bool flock:1;
280 };
281
282 /** One input argument of a request */
283 struct fuse_in_arg {
284 unsigned size;
285 const void *value;
286 };
287
288 /** One output argument of a request */
289 struct fuse_arg {
290 unsigned size;
291 void *value;
292 };
293
294 /** FUSE page descriptor */
295 struct fuse_page_desc {
296 unsigned int length;
297 unsigned int offset;
298 };
299
300 struct fuse_args {
301 uint64_t nodeid;
302 uint32_t opcode;
303 uint8_t in_numargs;
304 uint8_t out_numargs;
305 uint8_t ext_idx;
306 bool force:1;
307 bool noreply:1;
308 bool nocreds:1;
309 bool in_pages:1;
310 bool out_pages:1;
311 bool user_pages:1;
312 bool out_argvar:1;
313 bool page_zeroing:1;
314 bool page_replace:1;
315 bool may_block:1;
316 bool is_ext:1;
317 struct fuse_in_arg in_args[3];
318 struct fuse_arg out_args[2];
319 void (*end)(struct fuse_mount *fm, struct fuse_args *args, int error);
320 };
321
322 struct fuse_args_pages {
323 struct fuse_args args;
324 struct page **pages;
325 struct fuse_page_desc *descs;
326 unsigned int num_pages;
327 };
328
329 struct fuse_release_args {
330 struct fuse_args args;
331 struct fuse_release_in inarg;
332 struct inode *inode;
333 };
334
335 union fuse_file_args {
336 /* Used during open() */
337 struct fuse_open_out open_outarg;
338 /* Used during release() */
339 struct fuse_release_args release_args;
340 };
341
342 #define FUSE_ARGS(args) struct fuse_args args = {}
343
344 /** The request IO state (for asynchronous processing) */
345 struct fuse_io_priv {
346 struct kref refcnt;
347 int async;
348 spinlock_t lock;
349 unsigned reqs;
350 ssize_t bytes;
351 size_t size;
352 __u64 offset;
353 bool write;
354 bool should_dirty;
355 int err;
356 struct kiocb *iocb;
357 struct completion *done;
358 bool blocking;
359 };
360
361 #define FUSE_IO_PRIV_SYNC(i) \
362 { \
363 .refcnt = KREF_INIT(1), \
364 .async = 0, \
365 .iocb = i, \
366 }
367
368 /**
369 * Request flags
370 *
371 * FR_ISREPLY: set if the request has reply
372 * FR_FORCE: force sending of the request even if interrupted
373 * FR_BACKGROUND: request is sent in the background
374 * FR_WAITING: request is counted as "waiting"
375 * FR_ABORTED: the request was aborted
376 * FR_INTERRUPTED: the request has been interrupted
377 * FR_LOCKED: data is being copied to/from the request
378 * FR_PENDING: request is not yet in userspace
379 * FR_SENT: request is in userspace, waiting for an answer
380 * FR_FINISHED: request is finished
381 * FR_PRIVATE: request is on private list
382 * FR_ASYNC: request is asynchronous
383 */
384 enum fuse_req_flag {
385 FR_ISREPLY,
386 FR_FORCE,
387 FR_BACKGROUND,
388 FR_WAITING,
389 FR_ABORTED,
390 FR_INTERRUPTED,
391 FR_LOCKED,
392 FR_PENDING,
393 FR_SENT,
394 FR_FINISHED,
395 FR_PRIVATE,
396 FR_ASYNC,
397 };
398
399 /**
400 * A request to the client
401 *
402 * .waitq.lock protects the following fields:
403 * - FR_ABORTED
404 * - FR_LOCKED (may also be modified under fc->lock, tested under both)
405 */
406 struct fuse_req {
407 /** This can be on either pending processing or io lists in
408 fuse_conn */
409 struct list_head list;
410
411 /** Entry on the interrupts list */
412 struct list_head intr_entry;
413
414 /* Input/output arguments */
415 struct fuse_args *args;
416
417 /** refcount */
418 refcount_t count;
419
420 /* Request flags, updated with test/set/clear_bit() */
421 unsigned long flags;
422
423 /* The request input header */
424 struct {
425 struct fuse_in_header h;
426 } in;
427
428 /* The request output header */
429 struct {
430 struct fuse_out_header h;
431 } out;
432
433 /** Used to wake up the task waiting for completion of request*/
434 wait_queue_head_t waitq;
435
436 #if IS_ENABLED(CONFIG_VIRTIO_FS)
437 /** virtio-fs's physically contiguous buffer for in and out args */
438 void *argbuf;
439 #endif
440
441 /** fuse_mount this request belongs to */
442 struct fuse_mount *fm;
443 };
444
445 struct fuse_iqueue;
446
447 /**
448 * Input queue callbacks
449 *
450 * Input queue signalling is device-specific. For example, the /dev/fuse file
451 * uses fiq->waitq and fasync to wake processes that are waiting on queue
452 * readiness. These callbacks allow other device types to respond to input
453 * queue activity.
454 */
455 struct fuse_iqueue_ops {
456 /**
457 * Signal that a forget has been queued
458 */
459 void (*wake_forget_and_unlock)(struct fuse_iqueue *fiq)
460 __releases(fiq->lock);
461
462 /**
463 * Signal that an INTERRUPT request has been queued
464 */
465 void (*wake_interrupt_and_unlock)(struct fuse_iqueue *fiq)
466 __releases(fiq->lock);
467
468 /**
469 * Signal that a request has been queued
470 */
471 void (*wake_pending_and_unlock)(struct fuse_iqueue *fiq)
472 __releases(fiq->lock);
473
474 /**
475 * Clean up when fuse_iqueue is destroyed
476 */
477 void (*release)(struct fuse_iqueue *fiq);
478 };
479
480 /** /dev/fuse input queue operations */
481 extern const struct fuse_iqueue_ops fuse_dev_fiq_ops;
482
483 struct fuse_iqueue {
484 /** Connection established */
485 unsigned connected;
486
487 /** Lock protecting accesses to members of this structure */
488 spinlock_t lock;
489
490 /** Readers of the connection are waiting on this */
491 wait_queue_head_t waitq;
492
493 /** The next unique request id */
494 u64 reqctr;
495
496 /** The list of pending requests */
497 struct list_head pending;
498
499 /** Pending interrupts */
500 struct list_head interrupts;
501
502 /** Queue of pending forgets */
503 struct fuse_forget_link forget_list_head;
504 struct fuse_forget_link *forget_list_tail;
505
506 /** Batching of FORGET requests (positive indicates FORGET batch) */
507 int forget_batch;
508
509 /** O_ASYNC requests */
510 struct fasync_struct *fasync;
511
512 /** Device-specific callbacks */
513 const struct fuse_iqueue_ops *ops;
514
515 /** Device-specific state */
516 void *priv;
517 };
518
519 #define FUSE_PQ_HASH_BITS 8
520 #define FUSE_PQ_HASH_SIZE (1 << FUSE_PQ_HASH_BITS)
521
522 struct fuse_pqueue {
523 /** Connection established */
524 unsigned connected;
525
526 /** Lock protecting accessess to members of this structure */
527 spinlock_t lock;
528
529 /** Hash table of requests being processed */
530 struct list_head *processing;
531
532 /** The list of requests under I/O */
533 struct list_head io;
534 };
535
536 /**
537 * Fuse device instance
538 */
539 struct fuse_dev {
540 /** Fuse connection for this device */
541 struct fuse_conn *fc;
542
543 /** Processing queue */
544 struct fuse_pqueue pq;
545
546 /** list entry on fc->devices */
547 struct list_head entry;
548 };
549
550 enum fuse_dax_mode {
551 FUSE_DAX_INODE_DEFAULT, /* default */
552 FUSE_DAX_ALWAYS, /* "-o dax=always" */
553 FUSE_DAX_NEVER, /* "-o dax=never" */
554 FUSE_DAX_INODE_USER, /* "-o dax=inode" */
555 };
556
557 static inline bool fuse_is_inode_dax_mode(enum fuse_dax_mode mode)
558 {
559 return mode == FUSE_DAX_INODE_DEFAULT || mode == FUSE_DAX_INODE_USER;
560 }
561
562 struct fuse_fs_context {
563 int fd;
564 struct file *file;
565 unsigned int rootmode;
566 kuid_t user_id;
567 kgid_t group_id;
568 bool is_bdev:1;
569 bool fd_present:1;
570 bool rootmode_present:1;
571 bool user_id_present:1;
572 bool group_id_present:1;
573 bool default_permissions:1;
574 bool allow_other:1;
575 bool destroy:1;
576 bool no_control:1;
577 bool no_force_umount:1;
578 bool legacy_opts_show:1;
579 enum fuse_dax_mode dax_mode;
580 unsigned int max_read;
581 unsigned int blksize;
582 const char *subtype;
583
584 /* DAX device, may be NULL */
585 struct dax_device *dax_dev;
586
587 /* fuse_dev pointer to fill in, should contain NULL on entry */
588 void **fudptr;
589 };
590
591 struct fuse_sync_bucket {
592 /* count is a possible scalability bottleneck */
593 atomic_t count;
594 wait_queue_head_t waitq;
595 struct rcu_head rcu;
596 };
597
598 /**
599 * A Fuse connection.
600 *
601 * This structure is created, when the root filesystem is mounted, and
602 * is destroyed, when the client device is closed and the last
603 * fuse_mount is destroyed.
604 */
605 struct fuse_conn {
606 /** Lock protecting accessess to members of this structure */
607 spinlock_t lock;
608
609 /** Refcount */
610 refcount_t count;
611
612 /** Number of fuse_dev's */
613 atomic_t dev_count;
614
615 struct rcu_head rcu;
616
617 /** The user id for this mount */
618 kuid_t user_id;
619
620 /** The group id for this mount */
621 kgid_t group_id;
622
623 /** The pid namespace for this mount */
624 struct pid_namespace *pid_ns;
625
626 /** The user namespace for this mount */
627 struct user_namespace *user_ns;
628
629 /** Maximum read size */
630 unsigned max_read;
631
632 /** Maximum write size */
633 unsigned max_write;
634
635 /** Maximum number of pages that can be used in a single request */
636 unsigned int max_pages;
637
638 /** Constrain ->max_pages to this value during feature negotiation */
639 unsigned int max_pages_limit;
640
641 /** Input queue */
642 struct fuse_iqueue iq;
643
644 /** The next unique kernel file handle */
645 atomic64_t khctr;
646
647 /** rbtree of fuse_files waiting for poll events indexed by ph */
648 struct rb_root polled_files;
649
650 /** Maximum number of outstanding background requests */
651 unsigned max_background;
652
653 /** Number of background requests at which congestion starts */
654 unsigned congestion_threshold;
655
656 /** Number of requests currently in the background */
657 unsigned num_background;
658
659 /** Number of background requests currently queued for userspace */
660 unsigned active_background;
661
662 /** The list of background requests set aside for later queuing */
663 struct list_head bg_queue;
664
665 /** Protects: max_background, congestion_threshold, num_background,
666 * active_background, bg_queue, blocked */
667 spinlock_t bg_lock;
668
669 /** Flag indicating that INIT reply has been received. Allocating
670 * any fuse request will be suspended until the flag is set */
671 int initialized;
672
673 /** Flag indicating if connection is blocked. This will be
674 the case before the INIT reply is received, and if there
675 are too many outstading backgrounds requests */
676 int blocked;
677
678 /** waitq for blocked connection */
679 wait_queue_head_t blocked_waitq;
680
681 /** Connection established, cleared on umount, connection
682 abort and device release */
683 unsigned connected;
684
685 /** Connection aborted via sysfs */
686 bool aborted;
687
688 /** Connection failed (version mismatch). Cannot race with
689 setting other bitfields since it is only set once in INIT
690 reply, before any other request, and never cleared */
691 unsigned conn_error:1;
692
693 /** Connection successful. Only set in INIT */
694 unsigned conn_init:1;
695
696 /** Do readahead asynchronously? Only set in INIT */
697 unsigned async_read:1;
698
699 /** Return an unique read error after abort. Only set in INIT */
700 unsigned abort_err:1;
701
702 /** Do not send separate SETATTR request before open(O_TRUNC) */
703 unsigned atomic_o_trunc:1;
704
705 /** Filesystem supports NFS exporting. Only set in INIT */
706 unsigned export_support:1;
707
708 /** write-back cache policy (default is write-through) */
709 unsigned writeback_cache:1;
710
711 /** allow parallel lookups and readdir (default is serialized) */
712 unsigned parallel_dirops:1;
713
714 /** handle fs handles killing suid/sgid/cap on write/chown/trunc */
715 unsigned handle_killpriv:1;
716
717 /** cache READLINK responses in page cache */
718 unsigned cache_symlinks:1;
719
720 /* show legacy mount options */
721 unsigned int legacy_opts_show:1;
722
723 /*
724 * fs kills suid/sgid/cap on write/chown/trunc. suid is killed on
725 * write/trunc only if caller did not have CAP_FSETID. sgid is killed
726 * on write/truncate only if caller did not have CAP_FSETID as well as
727 * file has group execute permission.
728 */
729 unsigned handle_killpriv_v2:1;
730
731 /*
732 * The following bitfields are only for optimization purposes
733 * and hence races in setting them will not cause malfunction
734 */
735
736 /** Is open/release not implemented by fs? */
737 unsigned no_open:1;
738
739 /** Is opendir/releasedir not implemented by fs? */
740 unsigned no_opendir:1;
741
742 /** Is fsync not implemented by fs? */
743 unsigned no_fsync:1;
744
745 /** Is fsyncdir not implemented by fs? */
746 unsigned no_fsyncdir:1;
747
748 /** Is flush not implemented by fs? */
749 unsigned no_flush:1;
750
751 /** Is setxattr not implemented by fs? */
752 unsigned no_setxattr:1;
753
754 /** Does file server support extended setxattr */
755 unsigned setxattr_ext:1;
756
757 /** Is getxattr not implemented by fs? */
758 unsigned no_getxattr:1;
759
760 /** Is listxattr not implemented by fs? */
761 unsigned no_listxattr:1;
762
763 /** Is removexattr not implemented by fs? */
764 unsigned no_removexattr:1;
765
766 /** Are posix file locking primitives not implemented by fs? */
767 unsigned no_lock:1;
768
769 /** Is access not implemented by fs? */
770 unsigned no_access:1;
771
772 /** Is create not implemented by fs? */
773 unsigned no_create:1;
774
775 /** Is interrupt not implemented by fs? */
776 unsigned no_interrupt:1;
777
778 /** Is bmap not implemented by fs? */
779 unsigned no_bmap:1;
780
781 /** Is poll not implemented by fs? */
782 unsigned no_poll:1;
783
784 /** Do multi-page cached writes */
785 unsigned big_writes:1;
786
787 /** Don't apply umask to creation modes */
788 unsigned dont_mask:1;
789
790 /** Are BSD file locking primitives not implemented by fs? */
791 unsigned no_flock:1;
792
793 /** Is fallocate not implemented by fs? */
794 unsigned no_fallocate:1;
795
796 /** Is rename with flags implemented by fs? */
797 unsigned no_rename2:1;
798
799 /** Use enhanced/automatic page cache invalidation. */
800 unsigned auto_inval_data:1;
801
802 /** Filesystem is fully responsible for page cache invalidation. */
803 unsigned explicit_inval_data:1;
804
805 /** Does the filesystem support readdirplus? */
806 unsigned do_readdirplus:1;
807
808 /** Does the filesystem want adaptive readdirplus? */
809 unsigned readdirplus_auto:1;
810
811 /** Does the filesystem support asynchronous direct-IO submission? */
812 unsigned async_dio:1;
813
814 /** Is lseek not implemented by fs? */
815 unsigned no_lseek:1;
816
817 /** Does the filesystem support posix acls? */
818 unsigned posix_acl:1;
819
820 /** Check permissions based on the file mode or not? */
821 unsigned default_permissions:1;
822
823 /** Allow other than the mounter user to access the filesystem ? */
824 unsigned allow_other:1;
825
826 /** Does the filesystem support copy_file_range? */
827 unsigned no_copy_file_range:1;
828
829 /* Send DESTROY request */
830 unsigned int destroy:1;
831
832 /* Delete dentries that have gone stale */
833 unsigned int delete_stale:1;
834
835 /** Do not create entry in fusectl fs */
836 unsigned int no_control:1;
837
838 /** Do not allow MNT_FORCE umount */
839 unsigned int no_force_umount:1;
840
841 /* Auto-mount submounts announced by the server */
842 unsigned int auto_submounts:1;
843
844 /* Propagate syncfs() to server */
845 unsigned int sync_fs:1;
846
847 /* Initialize security xattrs when creating a new inode */
848 unsigned int init_security:1;
849
850 /* Add supplementary group info when creating a new inode */
851 unsigned int create_supp_group:1;
852
853 /* Does the filesystem support per inode DAX? */
854 unsigned int inode_dax:1;
855
856 /* Is tmpfile not implemented by fs? */
857 unsigned int no_tmpfile:1;
858
859 /* Relax restrictions to allow shared mmap in FOPEN_DIRECT_IO mode */
860 unsigned int direct_io_allow_mmap:1;
861
862 /* Is statx not implemented by fs? */
863 unsigned int no_statx:1;
864
865 /** Passthrough support for read/write IO */
866 unsigned int passthrough:1;
867
868 /** Maximum stack depth for passthrough backing files */
869 int max_stack_depth;
870
871 /** The number of requests waiting for completion */
872 atomic_t num_waiting;
873
874 /** Negotiated minor version */
875 unsigned minor;
876
877 /** Entry on the fuse_mount_list */
878 struct list_head entry;
879
880 /** Device ID from the root super block */
881 dev_t dev;
882
883 /** Dentries in the control filesystem */
884 struct dentry *ctl_dentry[FUSE_CTL_NUM_DENTRIES];
885
886 /** number of dentries used in the above array */
887 int ctl_ndents;
888
889 /** Key for lock owner ID scrambling */
890 u32 scramble_key[4];
891
892 /** Version counter for attribute changes */
893 atomic64_t attr_version;
894
895 /** Called on final put */
896 void (*release)(struct fuse_conn *);
897
898 /**
899 * Read/write semaphore to hold when accessing the sb of any
900 * fuse_mount belonging to this connection
901 */
902 struct rw_semaphore killsb;
903
904 /** List of device instances belonging to this connection */
905 struct list_head devices;
906
907 #ifdef CONFIG_FUSE_DAX
908 /* Dax mode */
909 enum fuse_dax_mode dax_mode;
910
911 /* Dax specific conn data, non-NULL if DAX is enabled */
912 struct fuse_conn_dax *dax;
913 #endif
914
915 /** List of filesystems using this connection */
916 struct list_head mounts;
917
918 /* New writepages go into this bucket */
919 struct fuse_sync_bucket __rcu *curr_bucket;
920
921 #ifdef CONFIG_FUSE_PASSTHROUGH
922 /** IDR for backing files ids */
923 struct idr backing_files_map;
924 #endif
925 };
926
927 /*
928 * Represents a mounted filesystem, potentially a submount.
929 *
930 * This object allows sharing a fuse_conn between separate mounts to
931 * allow submounts with dedicated superblocks and thus separate device
932 * IDs.
933 */
934 struct fuse_mount {
935 /* Underlying (potentially shared) connection to the FUSE server */
936 struct fuse_conn *fc;
937
938 /*
939 * Super block for this connection (fc->killsb must be held when
940 * accessing this).
941 */
942 struct super_block *sb;
943
944 /* Entry on fc->mounts */
945 struct list_head fc_entry;
946 };
947
948 static inline struct fuse_mount *get_fuse_mount_super(struct super_block *sb)
949 {
950 return sb->s_fs_info;
951 }
952
953 static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb)
954 {
955 return get_fuse_mount_super(sb)->fc;
956 }
957
958 static inline struct fuse_mount *get_fuse_mount(struct inode *inode)
959 {
960 return get_fuse_mount_super(inode->i_sb);
961 }
962
963 static inline struct fuse_conn *get_fuse_conn(struct inode *inode)
964 {
965 return get_fuse_mount_super(inode->i_sb)->fc;
966 }
967
968 static inline struct fuse_inode *get_fuse_inode(struct inode *inode)
969 {
970 return container_of(inode, struct fuse_inode, inode);
971 }
972
973 static inline u64 get_node_id(struct inode *inode)
974 {
975 return get_fuse_inode(inode)->nodeid;
976 }
977
978 static inline int invalid_nodeid(u64 nodeid)
979 {
980 return !nodeid || nodeid == FUSE_ROOT_ID;
981 }
982
983 static inline u64 fuse_get_attr_version(struct fuse_conn *fc)
984 {
985 return atomic64_read(&fc->attr_version);
986 }
987
988 static inline bool fuse_stale_inode(const struct inode *inode, int generation,
989 struct fuse_attr *attr)
990 {
991 return inode->i_generation != generation ||
992 inode_wrong_type(inode, attr->mode);
993 }
994
995 static inline void fuse_make_bad(struct inode *inode)
996 {
997 remove_inode_hash(inode);
998 set_bit(FUSE_I_BAD, &get_fuse_inode(inode)->state);
999 }
1000
1001 static inline bool fuse_is_bad(struct inode *inode)
1002 {
1003 return unlikely(test_bit(FUSE_I_BAD, &get_fuse_inode(inode)->state));
1004 }
1005
1006 static inline struct page **fuse_pages_alloc(unsigned int npages, gfp_t flags,
1007 struct fuse_page_desc **desc)
1008 {
1009 struct page **pages;
1010
1011 pages = kzalloc(npages * (sizeof(struct page *) +
1012 sizeof(struct fuse_page_desc)), flags);
1013 *desc = (void *) (pages + npages);
1014
1015 return pages;
1016 }
1017
1018 static inline void fuse_page_descs_length_init(struct fuse_page_desc *descs,
1019 unsigned int index,
1020 unsigned int nr_pages)
1021 {
1022 int i;
1023
1024 for (i = index; i < index + nr_pages; i++)
1025 descs[i].length = PAGE_SIZE - descs[i].offset;
1026 }
1027
1028 static inline void fuse_sync_bucket_dec(struct fuse_sync_bucket *bucket)
1029 {
1030 /* Need RCU protection to prevent use after free after the decrement */
1031 rcu_read_lock();
1032 if (atomic_dec_and_test(&bucket->count))
1033 wake_up(&bucket->waitq);
1034 rcu_read_unlock();
1035 }
1036
1037 /** Device operations */
1038 extern const struct file_operations fuse_dev_operations;
1039
1040 extern const struct dentry_operations fuse_dentry_operations;
1041 extern const struct dentry_operations fuse_root_dentry_operations;
1042
1043 /**
1044 * Get a filled in inode
1045 */
1046 struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
1047 int generation, struct fuse_attr *attr,
1048 u64 attr_valid, u64 attr_version);
1049
1050 int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name,
1051 struct fuse_entry_out *outarg, struct inode **inode);
1052
1053 /**
1054 * Send FORGET command
1055 */
1056 void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
1057 u64 nodeid, u64 nlookup);
1058
1059 struct fuse_forget_link *fuse_alloc_forget(void);
1060
1061 struct fuse_forget_link *fuse_dequeue_forget(struct fuse_iqueue *fiq,
1062 unsigned int max,
1063 unsigned int *countp);
1064
1065 /*
1066 * Initialize READ or READDIR request
1067 */
1068 struct fuse_io_args {
1069 union {
1070 struct {
1071 struct fuse_read_in in;
1072 u64 attr_ver;
1073 } read;
1074 struct {
1075 struct fuse_write_in in;
1076 struct fuse_write_out out;
1077 bool page_locked;
1078 } write;
1079 };
1080 struct fuse_args_pages ap;
1081 struct fuse_io_priv *io;
1082 struct fuse_file *ff;
1083 };
1084
1085 void fuse_read_args_fill(struct fuse_io_args *ia, struct file *file, loff_t pos,
1086 size_t count, int opcode);
1087
1088
1089 struct fuse_file *fuse_file_alloc(struct fuse_mount *fm, bool release);
1090 void fuse_file_free(struct fuse_file *ff);
1091 int fuse_finish_open(struct inode *inode, struct file *file);
1092
1093 void fuse_sync_release(struct fuse_inode *fi, struct fuse_file *ff,
1094 unsigned int flags);
1095
1096 /**
1097 * Send RELEASE or RELEASEDIR request
1098 */
1099 void fuse_release_common(struct file *file, bool isdir);
1100
1101 /**
1102 * Send FSYNC or FSYNCDIR request
1103 */
1104 int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
1105 int datasync, int opcode);
1106
1107 /**
1108 * Notify poll wakeup
1109 */
1110 int fuse_notify_poll_wakeup(struct fuse_conn *fc,
1111 struct fuse_notify_poll_wakeup_out *outarg);
1112
1113 /**
1114 * Initialize file operations on a regular file
1115 */
1116 void fuse_init_file_inode(struct inode *inode, unsigned int flags);
1117
1118 /**
1119 * Initialize inode operations on regular files and special files
1120 */
1121 void fuse_init_common(struct inode *inode);
1122
1123 /**
1124 * Initialize inode and file operations on a directory
1125 */
1126 void fuse_init_dir(struct inode *inode);
1127
1128 /**
1129 * Initialize inode operations on a symlink
1130 */
1131 void fuse_init_symlink(struct inode *inode);
1132
1133 /**
1134 * Change attributes of an inode
1135 */
1136 void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
1137 struct fuse_statx *sx,
1138 u64 attr_valid, u64 attr_version);
1139
1140 void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
1141 struct fuse_statx *sx,
1142 u64 attr_valid, u32 cache_mask);
1143
1144 u32 fuse_get_cache_mask(struct inode *inode);
1145
1146 /**
1147 * Initialize the client device
1148 */
1149 int fuse_dev_init(void);
1150
1151 /**
1152 * Cleanup the client device
1153 */
1154 void fuse_dev_cleanup(void);
1155
1156 int fuse_ctl_init(void);
1157 void __exit fuse_ctl_cleanup(void);
1158
1159 /**
1160 * Simple request sending that does request allocation and freeing
1161 */
1162 ssize_t fuse_simple_request(struct fuse_mount *fm, struct fuse_args *args);
1163 int fuse_simple_background(struct fuse_mount *fm, struct fuse_args *args,
1164 gfp_t gfp_flags);
1165
1166 /**
1167 * End a finished request
1168 */
1169 void fuse_request_end(struct fuse_req *req);
1170
1171 /* Abort all requests */
1172 void fuse_abort_conn(struct fuse_conn *fc);
1173 void fuse_wait_aborted(struct fuse_conn *fc);
1174
1175 /**
1176 * Invalidate inode attributes
1177 */
1178
1179 /* Attributes possibly changed on data modification */
1180 #define FUSE_STATX_MODIFY (STATX_MTIME | STATX_CTIME | STATX_BLOCKS)
1181
1182 /* Attributes possibly changed on data and/or size modification */
1183 #define FUSE_STATX_MODSIZE (FUSE_STATX_MODIFY | STATX_SIZE)
1184
1185 void fuse_invalidate_attr(struct inode *inode);
1186 void fuse_invalidate_attr_mask(struct inode *inode, u32 mask);
1187
1188 void fuse_invalidate_entry_cache(struct dentry *entry);
1189
1190 void fuse_invalidate_atime(struct inode *inode);
1191
1192 u64 fuse_time_to_jiffies(u64 sec, u32 nsec);
1193 #define ATTR_TIMEOUT(o) \
1194 fuse_time_to_jiffies((o)->attr_valid, (o)->attr_valid_nsec)
1195
1196 void fuse_change_entry_timeout(struct dentry *entry, struct fuse_entry_out *o);
1197
1198 /**
1199 * Acquire reference to fuse_conn
1200 */
1201 struct fuse_conn *fuse_conn_get(struct fuse_conn *fc);
1202
1203 /**
1204 * Initialize fuse_conn
1205 */
1206 void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm,
1207 struct user_namespace *user_ns,
1208 const struct fuse_iqueue_ops *fiq_ops, void *fiq_priv);
1209
1210 /**
1211 * Release reference to fuse_conn
1212 */
1213 void fuse_conn_put(struct fuse_conn *fc);
1214
1215 struct fuse_dev *fuse_dev_alloc_install(struct fuse_conn *fc);
1216 struct fuse_dev *fuse_dev_alloc(void);
1217 void fuse_dev_install(struct fuse_dev *fud, struct fuse_conn *fc);
1218 void fuse_dev_free(struct fuse_dev *fud);
1219 void fuse_send_init(struct fuse_mount *fm);
1220
1221 /**
1222 * Fill in superblock and initialize fuse connection
1223 * @sb: partially-initialized superblock to fill in
1224 * @ctx: mount context
1225 */
1226 int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx);
1227
1228 /*
1229 * Remove the mount from the connection
1230 *
1231 * Returns whether this was the last mount
1232 */
1233 bool fuse_mount_remove(struct fuse_mount *fm);
1234
1235 /*
1236 * Setup context ops for submounts
1237 */
1238 int fuse_init_fs_context_submount(struct fs_context *fsc);
1239
1240 /*
1241 * Shut down the connection (possibly sending DESTROY request).
1242 */
1243 void fuse_conn_destroy(struct fuse_mount *fm);
1244
1245 /* Drop the connection and free the fuse mount */
1246 void fuse_mount_destroy(struct fuse_mount *fm);
1247
1248 /**
1249 * Add connection to control filesystem
1250 */
1251 int fuse_ctl_add_conn(struct fuse_conn *fc);
1252
1253 /**
1254 * Remove connection from control filesystem
1255 */
1256 void fuse_ctl_remove_conn(struct fuse_conn *fc);
1257
1258 /**
1259 * Is file type valid?
1260 */
1261 int fuse_valid_type(int m);
1262
1263 bool fuse_invalid_attr(struct fuse_attr *attr);
1264
1265 /**
1266 * Is current process allowed to perform filesystem operation?
1267 */
1268 bool fuse_allow_current_process(struct fuse_conn *fc);
1269
1270 u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id);
1271
1272 void fuse_flush_time_update(struct inode *inode);
1273 void fuse_update_ctime(struct inode *inode);
1274
1275 int fuse_update_attributes(struct inode *inode, struct file *file, u32 mask);
1276
1277 void fuse_flush_writepages(struct inode *inode);
1278
1279 void fuse_set_nowrite(struct inode *inode);
1280 void fuse_release_nowrite(struct inode *inode);
1281
1282 /**
1283 * Scan all fuse_mounts belonging to fc to find the first where
1284 * ilookup5() returns a result. Return that result and the
1285 * respective fuse_mount in *fm (unless fm is NULL).
1286 *
1287 * The caller must hold fc->killsb.
1288 */
1289 struct inode *fuse_ilookup(struct fuse_conn *fc, u64 nodeid,
1290 struct fuse_mount **fm);
1291
1292 /**
1293 * File-system tells the kernel to invalidate cache for the given node id.
1294 */
1295 int fuse_reverse_inval_inode(struct fuse_conn *fc, u64 nodeid,
1296 loff_t offset, loff_t len);
1297
1298 /**
1299 * File-system tells the kernel to invalidate parent attributes and
1300 * the dentry matching parent/name.
1301 *
1302 * If the child_nodeid is non-zero and:
1303 * - matches the inode number for the dentry matching parent/name,
1304 * - is not a mount point
1305 * - is a file or oan empty directory
1306 * then the dentry is unhashed (d_delete()).
1307 */
1308 int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid,
1309 u64 child_nodeid, struct qstr *name, u32 flags);
1310
1311 int fuse_do_open(struct fuse_mount *fm, u64 nodeid, struct file *file,
1312 bool isdir);
1313
1314 /**
1315 * fuse_direct_io() flags
1316 */
1317
1318 /** If set, it is WRITE; otherwise - READ */
1319 #define FUSE_DIO_WRITE (1 << 0)
1320
1321 /** CUSE pass fuse_direct_io() a file which f_mapping->host is not from FUSE */
1322 #define FUSE_DIO_CUSE (1 << 1)
1323
1324 ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
1325 loff_t *ppos, int flags);
1326 long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
1327 unsigned int flags);
1328 long fuse_ioctl_common(struct file *file, unsigned int cmd,
1329 unsigned long arg, unsigned int flags);
1330 __poll_t fuse_file_poll(struct file *file, poll_table *wait);
1331 int fuse_dev_release(struct inode *inode, struct file *file);
1332
1333 bool fuse_write_update_attr(struct inode *inode, loff_t pos, ssize_t written);
1334
1335 int fuse_flush_times(struct inode *inode, struct fuse_file *ff);
1336 int fuse_write_inode(struct inode *inode, struct writeback_control *wbc);
1337
1338 int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
1339 struct file *file);
1340
1341 void fuse_set_initialized(struct fuse_conn *fc);
1342
1343 void fuse_unlock_inode(struct inode *inode, bool locked);
1344 bool fuse_lock_inode(struct inode *inode);
1345
1346 int fuse_setxattr(struct inode *inode, const char *name, const void *value,
1347 size_t size, int flags, unsigned int extra_flags);
1348 ssize_t fuse_getxattr(struct inode *inode, const char *name, void *value,
1349 size_t size);
1350 ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size);
1351 int fuse_removexattr(struct inode *inode, const char *name);
1352 extern const struct xattr_handler * const fuse_xattr_handlers[];
1353
1354 struct posix_acl;
1355 struct posix_acl *fuse_get_inode_acl(struct inode *inode, int type, bool rcu);
1356 struct posix_acl *fuse_get_acl(struct mnt_idmap *idmap,
1357 struct dentry *dentry, int type);
1358 int fuse_set_acl(struct mnt_idmap *, struct dentry *dentry,
1359 struct posix_acl *acl, int type);
1360
1361 /* readdir.c */
1362 int fuse_readdir(struct file *file, struct dir_context *ctx);
1363
1364 /**
1365 * Return the number of bytes in an arguments list
1366 */
1367 unsigned int fuse_len_args(unsigned int numargs, struct fuse_arg *args);
1368
1369 /**
1370 * Get the next unique ID for a request
1371 */
1372 u64 fuse_get_unique(struct fuse_iqueue *fiq);
1373 void fuse_free_conn(struct fuse_conn *fc);
1374
1375 /* dax.c */
1376
1377 #define FUSE_IS_DAX(inode) (IS_ENABLED(CONFIG_FUSE_DAX) && IS_DAX(inode))
1378
1379 ssize_t fuse_dax_read_iter(struct kiocb *iocb, struct iov_iter *to);
1380 ssize_t fuse_dax_write_iter(struct kiocb *iocb, struct iov_iter *from);
1381 int fuse_dax_mmap(struct file *file, struct vm_area_struct *vma);
1382 int fuse_dax_break_layouts(struct inode *inode, u64 dmap_start, u64 dmap_end);
1383 int fuse_dax_conn_alloc(struct fuse_conn *fc, enum fuse_dax_mode mode,
1384 struct dax_device *dax_dev);
1385 void fuse_dax_conn_free(struct fuse_conn *fc);
1386 bool fuse_dax_inode_alloc(struct super_block *sb, struct fuse_inode *fi);
1387 void fuse_dax_inode_init(struct inode *inode, unsigned int flags);
1388 void fuse_dax_inode_cleanup(struct inode *inode);
1389 void fuse_dax_dontcache(struct inode *inode, unsigned int flags);
1390 bool fuse_dax_check_alignment(struct fuse_conn *fc, unsigned int map_alignment);
1391 void fuse_dax_cancel_work(struct fuse_conn *fc);
1392
1393 /* ioctl.c */
1394 long fuse_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
1395 long fuse_file_compat_ioctl(struct file *file, unsigned int cmd,
1396 unsigned long arg);
1397 int fuse_fileattr_get(struct dentry *dentry, struct fileattr *fa);
1398 int fuse_fileattr_set(struct mnt_idmap *idmap,
1399 struct dentry *dentry, struct fileattr *fa);
1400
1401 /* iomode.c */
1402 int fuse_file_cached_io_start(struct inode *inode, struct fuse_file *ff);
1403 int fuse_file_uncached_io_start(struct inode *inode, struct fuse_file *ff, struct fuse_backing *fb);
1404 void fuse_file_uncached_io_end(struct inode *inode, struct fuse_file *ff);
1405
1406 int fuse_file_io_open(struct file *file, struct inode *inode);
1407 void fuse_file_io_release(struct fuse_file *ff, struct inode *inode);
1408
1409 /* file.c */
1410 struct fuse_file *fuse_file_open(struct fuse_mount *fm, u64 nodeid,
1411 unsigned int open_flags, bool isdir);
1412 void fuse_file_release(struct inode *inode, struct fuse_file *ff,
1413 unsigned int open_flags, fl_owner_t id, bool isdir);
1414
1415 /* passthrough.c */
1416 static inline struct fuse_backing *fuse_inode_backing(struct fuse_inode *fi)
1417 {
1418 #ifdef CONFIG_FUSE_PASSTHROUGH
1419 return READ_ONCE(fi->fb);
1420 #else
1421 return NULL;
1422 #endif
1423 }
1424
1425 static inline struct fuse_backing *fuse_inode_backing_set(struct fuse_inode *fi,
1426 struct fuse_backing *fb)
1427 {
1428 #ifdef CONFIG_FUSE_PASSTHROUGH
1429 return xchg(&fi->fb, fb);
1430 #else
1431 return NULL;
1432 #endif
1433 }
1434
1435 #ifdef CONFIG_FUSE_PASSTHROUGH
1436 struct fuse_backing *fuse_backing_get(struct fuse_backing *fb);
1437 void fuse_backing_put(struct fuse_backing *fb);
1438 #else
1439
1440 static inline struct fuse_backing *fuse_backing_get(struct fuse_backing *fb)
1441 {
1442 return NULL;
1443 }
1444
1445 static inline void fuse_backing_put(struct fuse_backing *fb)
1446 {
1447 }
1448 #endif
1449
1450 void fuse_backing_files_init(struct fuse_conn *fc);
1451 void fuse_backing_files_free(struct fuse_conn *fc);
1452 int fuse_backing_open(struct fuse_conn *fc, struct fuse_backing_map *map);
1453 int fuse_backing_close(struct fuse_conn *fc, int backing_id);
1454
1455 struct fuse_backing *fuse_passthrough_open(struct file *file,
1456 struct inode *inode,
1457 int backing_id);
1458 void fuse_passthrough_release(struct fuse_file *ff, struct fuse_backing *fb);
1459
1460 static inline struct file *fuse_file_passthrough(struct fuse_file *ff)
1461 {
1462 #ifdef CONFIG_FUSE_PASSTHROUGH
1463 return ff->passthrough;
1464 #else
1465 return NULL;
1466 #endif
1467 }
1468
1469 ssize_t fuse_passthrough_read_iter(struct kiocb *iocb, struct iov_iter *iter);
1470 ssize_t fuse_passthrough_write_iter(struct kiocb *iocb, struct iov_iter *iter);
1471 ssize_t fuse_passthrough_splice_read(struct file *in, loff_t *ppos,
1472 struct pipe_inode_info *pipe,
1473 size_t len, unsigned int flags);
1474 ssize_t fuse_passthrough_splice_write(struct pipe_inode_info *pipe,
1475 struct file *out, loff_t *ppos,
1476 size_t len, unsigned int flags);
1477
1478 #endif /* _FS_FUSE_I_H */