1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (C) 2017 Red Hat, Inc.
6 #include <linux/cred.h>
7 #include <linux/file.h>
8 #include <linux/mount.h>
9 #include <linux/xattr.h>
10 #include <linux/uio.h>
11 #include <linux/uaccess.h>
12 #include <linux/splice.h>
13 #include <linux/security.h>
16 #include <linux/backing-file.h>
17 #include "overlayfs.h"
19 static char ovl_whatisit(struct inode
*inode
, struct inode
*realinode
)
21 if (realinode
!= ovl_inode_upper(inode
))
23 if (ovl_has_upperdata(inode
))
29 /* No atime modification on underlying */
30 #define OVL_OPEN_FLAGS (O_NOATIME)
32 static struct file
*ovl_open_realfile(const struct file
*file
,
33 const struct path
*realpath
)
35 struct inode
*realinode
= d_inode(realpath
->dentry
);
36 struct inode
*inode
= file_inode(file
);
37 struct mnt_idmap
*real_idmap
;
38 struct file
*realfile
;
39 const struct cred
*old_cred
;
40 int flags
= file
->f_flags
| OVL_OPEN_FLAGS
;
41 int acc_mode
= ACC_MODE(flags
);
45 acc_mode
|= MAY_APPEND
;
47 old_cred
= ovl_override_creds(inode
->i_sb
);
48 real_idmap
= mnt_idmap(realpath
->mnt
);
49 err
= inode_permission(real_idmap
, realinode
, MAY_OPEN
| acc_mode
);
51 realfile
= ERR_PTR(err
);
53 if (!inode_owner_or_capable(real_idmap
, realinode
))
56 realfile
= backing_file_open(&file
->f_path
, flags
, realpath
,
59 revert_creds(old_cred
);
61 pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n",
62 file
, file
, ovl_whatisit(inode
, realinode
), file
->f_flags
,
63 realfile
, IS_ERR(realfile
) ? 0 : realfile
->f_flags
);
68 #define OVL_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT)
70 static int ovl_change_flags(struct file
*file
, unsigned int flags
)
72 struct inode
*inode
= file_inode(file
);
75 flags
&= OVL_SETFL_MASK
;
77 if (((flags
^ file
->f_flags
) & O_APPEND
) && IS_APPEND(inode
))
80 if ((flags
& O_DIRECT
) && !(file
->f_mode
& FMODE_CAN_ODIRECT
))
83 if (file
->f_op
->check_flags
) {
84 err
= file
->f_op
->check_flags(flags
);
89 spin_lock(&file
->f_lock
);
90 file
->f_flags
= (file
->f_flags
& ~OVL_SETFL_MASK
) | flags
;
91 file
->f_iocb_flags
= iocb_flags(file
);
92 spin_unlock(&file
->f_lock
);
97 static int ovl_real_fdget_meta(const struct file
*file
, struct fd
*real
,
100 struct dentry
*dentry
= file_dentry(file
);
101 struct path realpath
;
105 real
->file
= file
->private_data
;
108 ovl_path_real(dentry
, &realpath
);
110 /* lazy lookup and verify of lowerdata */
111 err
= ovl_verify_lowerdata(dentry
);
115 ovl_path_realdata(dentry
, &realpath
);
117 if (!realpath
.dentry
)
120 /* Has it been copied up since we'd opened it? */
121 if (unlikely(file_inode(real
->file
) != d_inode(realpath
.dentry
))) {
122 real
->flags
= FDPUT_FPUT
;
123 real
->file
= ovl_open_realfile(file
, &realpath
);
125 return PTR_ERR_OR_ZERO(real
->file
);
128 /* Did the flags change since open? */
129 if (unlikely((file
->f_flags
^ real
->file
->f_flags
) & ~OVL_OPEN_FLAGS
))
130 return ovl_change_flags(real
->file
, file
->f_flags
);
135 static int ovl_real_fdget(const struct file
*file
, struct fd
*real
)
137 if (d_is_dir(file_dentry(file
))) {
139 real
->file
= ovl_dir_real_file(file
, false);
141 return PTR_ERR_OR_ZERO(real
->file
);
144 return ovl_real_fdget_meta(file
, real
, false);
147 static int ovl_open(struct inode
*inode
, struct file
*file
)
149 struct dentry
*dentry
= file_dentry(file
);
150 struct file
*realfile
;
151 struct path realpath
;
154 /* lazy lookup and verify lowerdata */
155 err
= ovl_verify_lowerdata(dentry
);
159 err
= ovl_maybe_copy_up(dentry
, file
->f_flags
);
163 /* No longer need these flags, so don't pass them on to underlying fs */
164 file
->f_flags
&= ~(O_CREAT
| O_EXCL
| O_NOCTTY
| O_TRUNC
);
166 ovl_path_realdata(dentry
, &realpath
);
167 if (!realpath
.dentry
)
170 realfile
= ovl_open_realfile(file
, &realpath
);
171 if (IS_ERR(realfile
))
172 return PTR_ERR(realfile
);
174 file
->private_data
= realfile
;
179 static int ovl_release(struct inode
*inode
, struct file
*file
)
181 fput(file
->private_data
);
186 static loff_t
ovl_llseek(struct file
*file
, loff_t offset
, int whence
)
188 struct inode
*inode
= file_inode(file
);
190 const struct cred
*old_cred
;
194 * The two special cases below do not need to involve real fs,
195 * so we can optimizing concurrent callers.
198 if (whence
== SEEK_CUR
)
201 if (whence
== SEEK_SET
)
202 return vfs_setpos(file
, 0, 0);
205 ret
= ovl_real_fdget(file
, &real
);
210 * Overlay file f_pos is the master copy that is preserved
211 * through copy up and modified on read/write, but only real
212 * fs knows how to SEEK_HOLE/SEEK_DATA and real fs may impose
213 * limitations that are more strict than ->s_maxbytes for specific
214 * files, so we use the real file to perform seeks.
216 ovl_inode_lock(inode
);
217 real
.file
->f_pos
= file
->f_pos
;
219 old_cred
= ovl_override_creds(inode
->i_sb
);
220 ret
= vfs_llseek(real
.file
, offset
, whence
);
221 revert_creds(old_cred
);
223 file
->f_pos
= real
.file
->f_pos
;
224 ovl_inode_unlock(inode
);
231 static void ovl_file_modified(struct file
*file
)
233 /* Update size/mtime */
234 ovl_copyattr(file_inode(file
));
237 static void ovl_file_accessed(struct file
*file
)
239 struct inode
*inode
, *upperinode
;
240 struct timespec64 ctime
, uctime
;
241 struct timespec64 mtime
, umtime
;
243 if (file
->f_flags
& O_NOATIME
)
246 inode
= file_inode(file
);
247 upperinode
= ovl_inode_upper(inode
);
252 ctime
= inode_get_ctime(inode
);
253 uctime
= inode_get_ctime(upperinode
);
254 mtime
= inode_get_mtime(inode
);
255 umtime
= inode_get_mtime(upperinode
);
256 if ((!timespec64_equal(&mtime
, &umtime
)) ||
257 !timespec64_equal(&ctime
, &uctime
)) {
258 inode_set_mtime_to_ts(inode
, inode_get_mtime(upperinode
));
259 inode_set_ctime_to_ts(inode
, uctime
);
262 touch_atime(&file
->f_path
);
265 static ssize_t
ovl_read_iter(struct kiocb
*iocb
, struct iov_iter
*iter
)
267 struct file
*file
= iocb
->ki_filp
;
270 struct backing_file_ctx ctx
= {
271 .cred
= ovl_creds(file_inode(file
)->i_sb
),
273 .accessed
= ovl_file_accessed
,
276 if (!iov_iter_count(iter
))
279 ret
= ovl_real_fdget(file
, &real
);
283 ret
= backing_file_read_iter(real
.file
, iter
, iocb
, iocb
->ki_flags
,
290 static ssize_t
ovl_write_iter(struct kiocb
*iocb
, struct iov_iter
*iter
)
292 struct file
*file
= iocb
->ki_filp
;
293 struct inode
*inode
= file_inode(file
);
296 int ifl
= iocb
->ki_flags
;
297 struct backing_file_ctx ctx
= {
298 .cred
= ovl_creds(inode
->i_sb
),
300 .end_write
= ovl_file_modified
,
303 if (!iov_iter_count(iter
))
310 ret
= ovl_real_fdget(file
, &real
);
314 if (!ovl_should_sync(OVL_FS(inode
->i_sb
)))
315 ifl
&= ~(IOCB_DSYNC
| IOCB_SYNC
);
318 * Overlayfs doesn't support deferred completions, don't copy
319 * this property in case it is set by the issuer.
321 ifl
&= ~IOCB_DIO_CALLER_COMP
;
322 ret
= backing_file_write_iter(real
.file
, iter
, iocb
, ifl
, &ctx
);
331 static ssize_t
ovl_splice_read(struct file
*in
, loff_t
*ppos
,
332 struct pipe_inode_info
*pipe
, size_t len
,
335 const struct cred
*old_cred
;
339 ret
= ovl_real_fdget(in
, &real
);
343 old_cred
= ovl_override_creds(file_inode(in
)->i_sb
);
344 ret
= vfs_splice_read(real
.file
, ppos
, pipe
, len
, flags
);
345 revert_creds(old_cred
);
346 ovl_file_accessed(in
);
353 * Calling iter_file_splice_write() directly from overlay's f_op may deadlock
354 * due to lock order inversion between pipe->mutex in iter_file_splice_write()
355 * and file_start_write(real.file) in ovl_write_iter().
357 * So do everything ovl_write_iter() does and call iter_file_splice_write() on
360 static ssize_t
ovl_splice_write(struct pipe_inode_info
*pipe
, struct file
*out
,
361 loff_t
*ppos
, size_t len
, unsigned int flags
)
364 const struct cred
*old_cred
;
365 struct inode
*inode
= file_inode(out
);
371 ret
= file_remove_privs(out
);
375 ret
= ovl_real_fdget(out
, &real
);
379 old_cred
= ovl_override_creds(inode
->i_sb
);
380 file_start_write(real
.file
);
382 ret
= iter_file_splice_write(pipe
, real
.file
, ppos
, len
, flags
);
384 file_end_write(real
.file
);
386 ovl_file_modified(out
);
387 revert_creds(old_cred
);
396 static int ovl_fsync(struct file
*file
, loff_t start
, loff_t end
, int datasync
)
399 const struct cred
*old_cred
;
402 ret
= ovl_sync_status(OVL_FS(file_inode(file
)->i_sb
));
406 ret
= ovl_real_fdget_meta(file
, &real
, !datasync
);
410 /* Don't sync lower file for fear of receiving EROFS error */
411 if (file_inode(real
.file
) == ovl_inode_upper(file_inode(file
))) {
412 old_cred
= ovl_override_creds(file_inode(file
)->i_sb
);
413 ret
= vfs_fsync_range(real
.file
, start
, end
, datasync
);
414 revert_creds(old_cred
);
422 static int ovl_mmap(struct file
*file
, struct vm_area_struct
*vma
)
424 struct file
*realfile
= file
->private_data
;
425 const struct cred
*old_cred
;
428 if (!realfile
->f_op
->mmap
)
431 if (WARN_ON(file
!= vma
->vm_file
))
434 vma_set_file(vma
, realfile
);
436 old_cred
= ovl_override_creds(file_inode(file
)->i_sb
);
437 ret
= call_mmap(vma
->vm_file
, vma
);
438 revert_creds(old_cred
);
439 ovl_file_accessed(file
);
444 static long ovl_fallocate(struct file
*file
, int mode
, loff_t offset
, loff_t len
)
446 struct inode
*inode
= file_inode(file
);
448 const struct cred
*old_cred
;
454 ret
= file_remove_privs(file
);
458 ret
= ovl_real_fdget(file
, &real
);
462 old_cred
= ovl_override_creds(file_inode(file
)->i_sb
);
463 ret
= vfs_fallocate(real
.file
, mode
, offset
, len
);
464 revert_creds(old_cred
);
467 ovl_file_modified(file
);
477 static int ovl_fadvise(struct file
*file
, loff_t offset
, loff_t len
, int advice
)
480 const struct cred
*old_cred
;
483 ret
= ovl_real_fdget(file
, &real
);
487 old_cred
= ovl_override_creds(file_inode(file
)->i_sb
);
488 ret
= vfs_fadvise(real
.file
, offset
, len
, advice
);
489 revert_creds(old_cred
);
502 static loff_t
ovl_copyfile(struct file
*file_in
, loff_t pos_in
,
503 struct file
*file_out
, loff_t pos_out
,
504 loff_t len
, unsigned int flags
, enum ovl_copyop op
)
506 struct inode
*inode_out
= file_inode(file_out
);
507 struct fd real_in
, real_out
;
508 const struct cred
*old_cred
;
511 inode_lock(inode_out
);
512 if (op
!= OVL_DEDUPE
) {
514 ovl_copyattr(inode_out
);
515 ret
= file_remove_privs(file_out
);
520 ret
= ovl_real_fdget(file_out
, &real_out
);
524 ret
= ovl_real_fdget(file_in
, &real_in
);
530 old_cred
= ovl_override_creds(file_inode(file_out
)->i_sb
);
533 ret
= vfs_copy_file_range(real_in
.file
, pos_in
,
534 real_out
.file
, pos_out
, len
, flags
);
538 ret
= vfs_clone_file_range(real_in
.file
, pos_in
,
539 real_out
.file
, pos_out
, len
, flags
);
543 ret
= vfs_dedupe_file_range_one(real_in
.file
, pos_in
,
544 real_out
.file
, pos_out
, len
,
548 revert_creds(old_cred
);
551 ovl_file_modified(file_out
);
557 inode_unlock(inode_out
);
562 static ssize_t
ovl_copy_file_range(struct file
*file_in
, loff_t pos_in
,
563 struct file
*file_out
, loff_t pos_out
,
564 size_t len
, unsigned int flags
)
566 return ovl_copyfile(file_in
, pos_in
, file_out
, pos_out
, len
, flags
,
570 static loff_t
ovl_remap_file_range(struct file
*file_in
, loff_t pos_in
,
571 struct file
*file_out
, loff_t pos_out
,
572 loff_t len
, unsigned int remap_flags
)
576 if (remap_flags
& ~(REMAP_FILE_DEDUP
| REMAP_FILE_ADVISORY
))
579 if (remap_flags
& REMAP_FILE_DEDUP
)
585 * Don't copy up because of a dedupe request, this wouldn't make sense
586 * most of the time (data would be duplicated instead of deduplicated).
588 if (op
== OVL_DEDUPE
&&
589 (!ovl_inode_upper(file_inode(file_in
)) ||
590 !ovl_inode_upper(file_inode(file_out
))))
593 return ovl_copyfile(file_in
, pos_in
, file_out
, pos_out
, len
,
597 static int ovl_flush(struct file
*file
, fl_owner_t id
)
600 const struct cred
*old_cred
;
603 err
= ovl_real_fdget(file
, &real
);
607 if (real
.file
->f_op
->flush
) {
608 old_cred
= ovl_override_creds(file_inode(file
)->i_sb
);
609 err
= real
.file
->f_op
->flush(real
.file
, id
);
610 revert_creds(old_cred
);
617 const struct file_operations ovl_file_operations
= {
619 .release
= ovl_release
,
620 .llseek
= ovl_llseek
,
621 .read_iter
= ovl_read_iter
,
622 .write_iter
= ovl_write_iter
,
625 .fallocate
= ovl_fallocate
,
626 .fadvise
= ovl_fadvise
,
628 .splice_read
= ovl_splice_read
,
629 .splice_write
= ovl_splice_write
,
631 .copy_file_range
= ovl_copy_file_range
,
632 .remap_file_range
= ovl_remap_file_range
,