1 // SPDX-License-Identifier: GPL-2.0
3 * (C) 2001 Clemson University and The University of Chicago
4 * Copyright 2018 Omnibond Systems, L.L.C.
6 * See COPYING in top-level directory.
10 * Linux VFS inode operations.
13 #include <linux/blkdev.h>
14 #include <linux/fileattr.h>
16 #include "orangefs-kernel.h"
17 #include "orangefs-bufmap.h"
19 static int orangefs_writepage_locked(struct page
*page
,
20 struct writeback_control
*wbc
)
22 struct inode
*inode
= page
->mapping
->host
;
23 struct orangefs_write_range
*wr
= NULL
;
30 set_page_writeback(page
);
32 len
= i_size_read(inode
);
33 if (PagePrivate(page
)) {
34 wr
= (struct orangefs_write_range
*)page_private(page
);
35 WARN_ON(wr
->pos
>= len
);
37 if (off
+ wr
->len
> len
)
43 off
= page_offset(page
);
44 if (off
+ PAGE_SIZE
> len
)
49 /* Should've been handled in orangefs_invalidate_folio. */
50 WARN_ON(off
== len
|| off
+ wlen
> len
);
54 bv
.bv_offset
= off
% PAGE_SIZE
;
56 iov_iter_bvec(&iter
, WRITE
, &bv
, 1, wlen
);
58 ret
= wait_for_direct_io(ORANGEFS_IO_WRITE
, inode
, &off
, &iter
, wlen
,
62 mapping_set_error(page
->mapping
, ret
);
66 kfree(detach_page_private(page
));
70 static int orangefs_writepage(struct page
*page
, struct writeback_control
*wbc
)
73 ret
= orangefs_writepage_locked(page
, wbc
);
75 end_page_writeback(page
);
79 struct orangefs_writepages
{
90 static int orangefs_writepages_work(struct orangefs_writepages
*ow
,
91 struct writeback_control
*wbc
)
93 struct inode
*inode
= ow
->pages
[0]->mapping
->host
;
94 struct orangefs_write_range
*wrp
, wr
;
101 len
= i_size_read(inode
);
103 for (i
= 0; i
< ow
->npages
; i
++) {
104 set_page_writeback(ow
->pages
[i
]);
105 ow
->bv
[i
].bv_page
= ow
->pages
[i
];
106 ow
->bv
[i
].bv_len
= min(page_offset(ow
->pages
[i
]) + PAGE_SIZE
,
108 max(ow
->off
, page_offset(ow
->pages
[i
]));
110 ow
->bv
[i
].bv_offset
= ow
->off
-
111 page_offset(ow
->pages
[i
]);
113 ow
->bv
[i
].bv_offset
= 0;
115 iov_iter_bvec(&iter
, WRITE
, ow
->bv
, ow
->npages
, ow
->len
);
117 WARN_ON(ow
->off
>= len
);
118 if (ow
->off
+ ow
->len
> len
)
119 ow
->len
= len
- ow
->off
;
124 ret
= wait_for_direct_io(ORANGEFS_IO_WRITE
, inode
, &off
, &iter
, ow
->len
,
127 for (i
= 0; i
< ow
->npages
; i
++) {
128 SetPageError(ow
->pages
[i
]);
129 mapping_set_error(ow
->pages
[i
]->mapping
, ret
);
130 if (PagePrivate(ow
->pages
[i
])) {
131 wrp
= (struct orangefs_write_range
*)
132 page_private(ow
->pages
[i
]);
133 ClearPagePrivate(ow
->pages
[i
]);
134 put_page(ow
->pages
[i
]);
137 end_page_writeback(ow
->pages
[i
]);
138 unlock_page(ow
->pages
[i
]);
142 for (i
= 0; i
< ow
->npages
; i
++) {
143 if (PagePrivate(ow
->pages
[i
])) {
144 wrp
= (struct orangefs_write_range
*)
145 page_private(ow
->pages
[i
]);
146 ClearPagePrivate(ow
->pages
[i
]);
147 put_page(ow
->pages
[i
]);
150 end_page_writeback(ow
->pages
[i
]);
151 unlock_page(ow
->pages
[i
]);
157 static int orangefs_writepages_callback(struct page
*page
,
158 struct writeback_control
*wbc
, void *data
)
160 struct orangefs_writepages
*ow
= data
;
161 struct orangefs_write_range
*wr
;
164 if (!PagePrivate(page
)) {
166 /* It's not private so there's nothing to write, right? */
167 printk("writepages_callback not private!\n");
171 wr
= (struct orangefs_write_range
*)page_private(page
);
174 if (ow
->npages
== 0) {
179 ow
->pages
[ow
->npages
++] = page
;
183 if (!uid_eq(ow
->uid
, wr
->uid
) || !gid_eq(ow
->gid
, wr
->gid
)) {
184 orangefs_writepages_work(ow
, wbc
);
189 if (ow
->off
+ ow
->len
== wr
->pos
) {
191 ow
->pages
[ow
->npages
++] = page
;
198 orangefs_writepages_work(ow
, wbc
);
201 ret
= orangefs_writepage_locked(page
, wbc
);
202 mapping_set_error(page
->mapping
, ret
);
204 end_page_writeback(page
);
206 if (ow
->npages
== ow
->maxpages
) {
207 orangefs_writepages_work(ow
, wbc
);
214 static int orangefs_writepages(struct address_space
*mapping
,
215 struct writeback_control
*wbc
)
217 struct orangefs_writepages
*ow
;
218 struct blk_plug plug
;
220 ow
= kzalloc(sizeof(struct orangefs_writepages
), GFP_KERNEL
);
223 ow
->maxpages
= orangefs_bufmap_size_query()/PAGE_SIZE
;
224 ow
->pages
= kcalloc(ow
->maxpages
, sizeof(struct page
*), GFP_KERNEL
);
229 ow
->bv
= kcalloc(ow
->maxpages
, sizeof(struct bio_vec
), GFP_KERNEL
);
235 blk_start_plug(&plug
);
236 ret
= write_cache_pages(mapping
, wbc
, orangefs_writepages_callback
, ow
);
238 ret
= orangefs_writepages_work(ow
, wbc
);
239 blk_finish_plug(&plug
);
246 static int orangefs_launder_folio(struct folio
*);
248 static void orangefs_readahead(struct readahead_control
*rac
)
251 struct iov_iter iter
;
252 struct inode
*inode
= rac
->mapping
->host
;
253 struct xarray
*i_pages
;
255 loff_t new_start
= readahead_pos(rac
);
259 loff_t bytes_remaining
= inode
->i_size
- readahead_pos(rac
);
260 loff_t pages_remaining
= bytes_remaining
/ PAGE_SIZE
;
262 if (pages_remaining
>= 1024)
264 else if (pages_remaining
> readahead_count(rac
))
265 new_len
= bytes_remaining
;
268 readahead_expand(rac
, new_start
, new_len
);
270 offset
= readahead_pos(rac
);
271 i_pages
= &rac
->mapping
->i_pages
;
273 iov_iter_xarray(&iter
, READ
, i_pages
, offset
, readahead_length(rac
));
275 /* read in the pages. */
276 if ((ret
= wait_for_direct_io(ORANGEFS_IO_READ
, inode
,
277 &offset
, &iter
, readahead_length(rac
),
278 inode
->i_size
, NULL
, NULL
, rac
->file
)) < 0)
279 gossip_debug(GOSSIP_FILE_DEBUG
,
280 "%s: wait_for_direct_io failed. \n", __func__
);
285 while ((page
= readahead_page(rac
))) {
286 page_endio(page
, false, ret
);
291 static int orangefs_read_folio(struct file
*file
, struct folio
*folio
)
293 struct inode
*inode
= folio
->mapping
->host
;
294 struct iov_iter iter
;
297 loff_t off
; /* offset of this folio in the file */
299 if (folio_test_dirty(folio
))
300 orangefs_launder_folio(folio
);
302 off
= folio_pos(folio
);
303 bv
.bv_page
= &folio
->page
;
304 bv
.bv_len
= folio_size(folio
);
306 iov_iter_bvec(&iter
, READ
, &bv
, 1, folio_size(folio
));
308 ret
= wait_for_direct_io(ORANGEFS_IO_READ
, inode
, &off
, &iter
,
309 folio_size(folio
), inode
->i_size
, NULL
, NULL
, file
);
310 /* this will only zero remaining unread portions of the page data */
311 iov_iter_zero(~0U, &iter
);
312 /* takes care of potential aliasing */
313 flush_dcache_folio(folio
);
315 folio_set_error(folio
);
317 folio_mark_uptodate(folio
);
318 if (folio_test_error(folio
))
319 folio_clear_error(folio
);
322 /* unlock the folio after the ->read_folio() routine completes */
327 static int orangefs_write_begin(struct file
*file
,
328 struct address_space
*mapping
, loff_t pos
, unsigned len
,
329 struct page
**pagep
, void **fsdata
)
331 struct orangefs_write_range
*wr
;
337 index
= pos
>> PAGE_SHIFT
;
339 page
= grab_cache_page_write_begin(mapping
, index
);
344 folio
= page_folio(page
);
346 if (folio_test_dirty(folio
) && !folio_test_private(folio
)) {
348 * Should be impossible. If it happens, launder the page
349 * since we don't know what's dirty. This will WARN in
350 * orangefs_writepage_locked.
352 ret
= orangefs_launder_folio(folio
);
356 if (folio_test_private(folio
)) {
357 struct orangefs_write_range
*wr
;
358 wr
= folio_get_private(folio
);
359 if (wr
->pos
+ wr
->len
== pos
&&
360 uid_eq(wr
->uid
, current_fsuid()) &&
361 gid_eq(wr
->gid
, current_fsgid())) {
365 ret
= orangefs_launder_folio(folio
);
371 wr
= kmalloc(sizeof *wr
, GFP_KERNEL
);
377 wr
->uid
= current_fsuid();
378 wr
->gid
= current_fsgid();
379 folio_attach_private(folio
, wr
);
384 static int orangefs_write_end(struct file
*file
, struct address_space
*mapping
,
385 loff_t pos
, unsigned len
, unsigned copied
, struct page
*page
, void *fsdata
)
387 struct inode
*inode
= page
->mapping
->host
;
388 loff_t last_pos
= pos
+ copied
;
391 * No need to use i_size_read() here, the i_size
392 * cannot change under us because we hold the i_mutex.
394 if (last_pos
> inode
->i_size
)
395 i_size_write(inode
, last_pos
);
397 /* zero the stale part of the page if we did a short copy */
398 if (!PageUptodate(page
)) {
399 unsigned from
= pos
& (PAGE_SIZE
- 1);
401 zero_user(page
, from
+ copied
, len
- copied
);
403 /* Set fully written pages uptodate. */
404 if (pos
== page_offset(page
) &&
405 (len
== PAGE_SIZE
|| pos
+ len
== inode
->i_size
)) {
406 zero_user_segment(page
, from
+ copied
, PAGE_SIZE
);
407 SetPageUptodate(page
);
411 set_page_dirty(page
);
415 mark_inode_dirty_sync(file_inode(file
));
419 static void orangefs_invalidate_folio(struct folio
*folio
,
420 size_t offset
, size_t length
)
422 struct orangefs_write_range
*wr
= folio_get_private(folio
);
424 if (offset
== 0 && length
== PAGE_SIZE
) {
425 kfree(folio_detach_private(folio
));
427 /* write range entirely within invalidate range (or equal) */
428 } else if (folio_pos(folio
) + offset
<= wr
->pos
&&
429 wr
->pos
+ wr
->len
<= folio_pos(folio
) + offset
+ length
) {
430 kfree(folio_detach_private(folio
));
431 /* XXX is this right? only caller in fs */
432 folio_cancel_dirty(folio
);
434 /* invalidate range chops off end of write range */
435 } else if (wr
->pos
< folio_pos(folio
) + offset
&&
436 wr
->pos
+ wr
->len
<= folio_pos(folio
) + offset
+ length
&&
437 folio_pos(folio
) + offset
< wr
->pos
+ wr
->len
) {
439 x
= wr
->pos
+ wr
->len
- (folio_pos(folio
) + offset
);
440 WARN_ON(x
> wr
->len
);
442 wr
->uid
= current_fsuid();
443 wr
->gid
= current_fsgid();
444 /* invalidate range chops off beginning of write range */
445 } else if (folio_pos(folio
) + offset
<= wr
->pos
&&
446 folio_pos(folio
) + offset
+ length
< wr
->pos
+ wr
->len
&&
447 wr
->pos
< folio_pos(folio
) + offset
+ length
) {
449 x
= folio_pos(folio
) + offset
+ length
- wr
->pos
;
450 WARN_ON(x
> wr
->len
);
453 wr
->uid
= current_fsuid();
454 wr
->gid
= current_fsgid();
455 /* invalidate range entirely within write range (punch hole) */
456 } else if (wr
->pos
< folio_pos(folio
) + offset
&&
457 folio_pos(folio
) + offset
+ length
< wr
->pos
+ wr
->len
) {
458 /* XXX what do we do here... should not WARN_ON */
462 * should we just ignore this and write it out anyway?
463 * it hardly makes sense
466 /* non-overlapping ranges */
468 /* WARN if they do overlap */
469 if (!((folio_pos(folio
) + offset
+ length
<= wr
->pos
) ^
470 (wr
->pos
+ wr
->len
<= folio_pos(folio
) + offset
))) {
472 printk("invalidate range offset %llu length %zu\n",
473 folio_pos(folio
) + offset
, length
);
474 printk("write range offset %llu length %zu\n",
481 * Above there are returns where wr is freed or where we WARN.
482 * Thus the following runs if wr was modified above.
485 orangefs_launder_folio(folio
);
488 static bool orangefs_release_folio(struct folio
*folio
, gfp_t foo
)
490 return !folio_test_private(folio
);
493 static void orangefs_free_folio(struct folio
*folio
)
495 kfree(folio_detach_private(folio
));
498 static int orangefs_launder_folio(struct folio
*folio
)
501 struct writeback_control wbc
= {
502 .sync_mode
= WB_SYNC_ALL
,
505 folio_wait_writeback(folio
);
506 if (folio_clear_dirty_for_io(folio
)) {
507 r
= orangefs_writepage_locked(&folio
->page
, &wbc
);
508 folio_end_writeback(folio
);
513 static ssize_t
orangefs_direct_IO(struct kiocb
*iocb
,
514 struct iov_iter
*iter
)
517 * Comment from original do_readv_writev:
518 * Common entry point for read/write/readv/writev
519 * This function will dispatch it to either the direct I/O
520 * or buffered I/O path depending on the mount options and/or
521 * augmented/extended metadata attached to the file.
522 * Note: File extended attributes override any mount options.
524 struct file
*file
= iocb
->ki_filp
;
525 loff_t pos
= iocb
->ki_pos
;
526 enum ORANGEFS_io_type type
= iov_iter_rw(iter
) == WRITE
?
527 ORANGEFS_IO_WRITE
: ORANGEFS_IO_READ
;
528 loff_t
*offset
= &pos
;
529 struct inode
*inode
= file
->f_mapping
->host
;
530 struct orangefs_inode_s
*orangefs_inode
= ORANGEFS_I(inode
);
531 struct orangefs_khandle
*handle
= &orangefs_inode
->refn
.khandle
;
532 size_t count
= iov_iter_count(iter
);
533 ssize_t total_count
= 0;
534 ssize_t ret
= -EINVAL
;
537 gossip_debug(GOSSIP_FILE_DEBUG
,
538 "%s-BEGIN(%pU): count(%d) after estimate_max_iovecs.\n",
543 if (type
== ORANGEFS_IO_WRITE
) {
544 gossip_debug(GOSSIP_FILE_DEBUG
,
545 "%s(%pU): proceeding with offset : %llu, "
558 while (iov_iter_count(iter
)) {
559 size_t each_count
= iov_iter_count(iter
);
563 /* how much to transfer in this loop iteration */
564 if (each_count
> orangefs_bufmap_size_query())
565 each_count
= orangefs_bufmap_size_query();
567 gossip_debug(GOSSIP_FILE_DEBUG
,
568 "%s(%pU): size of each_count(%d)\n",
572 gossip_debug(GOSSIP_FILE_DEBUG
,
573 "%s(%pU): BEFORE wait_for_io: offset is %d\n",
578 ret
= wait_for_direct_io(type
, inode
, offset
, iter
,
579 each_count
, 0, NULL
, NULL
, file
);
580 gossip_debug(GOSSIP_FILE_DEBUG
,
581 "%s(%pU): return from wait_for_io:%d\n",
593 gossip_debug(GOSSIP_FILE_DEBUG
,
594 "%s(%pU): AFTER wait_for_io: offset is %d\n",
600 * if we got a short I/O operations,
601 * fall out and return what we got so far
603 if (amt_complete
< each_count
)
611 if (type
== ORANGEFS_IO_READ
) {
614 file_update_time(file
);
615 if (*offset
> i_size_read(inode
))
616 i_size_write(inode
, *offset
);
620 gossip_debug(GOSSIP_FILE_DEBUG
,
621 "%s(%pU): Value(%d) returned.\n",
629 /** ORANGEFS2 implementation of address space operations */
630 static const struct address_space_operations orangefs_address_operations
= {
631 .writepage
= orangefs_writepage
,
632 .readahead
= orangefs_readahead
,
633 .read_folio
= orangefs_read_folio
,
634 .writepages
= orangefs_writepages
,
635 .dirty_folio
= filemap_dirty_folio
,
636 .write_begin
= orangefs_write_begin
,
637 .write_end
= orangefs_write_end
,
638 .invalidate_folio
= orangefs_invalidate_folio
,
639 .release_folio
= orangefs_release_folio
,
640 .free_folio
= orangefs_free_folio
,
641 .launder_folio
= orangefs_launder_folio
,
642 .direct_IO
= orangefs_direct_IO
,
645 vm_fault_t
orangefs_page_mkwrite(struct vm_fault
*vmf
)
647 struct folio
*folio
= page_folio(vmf
->page
);
648 struct inode
*inode
= file_inode(vmf
->vma
->vm_file
);
649 struct orangefs_inode_s
*orangefs_inode
= ORANGEFS_I(inode
);
650 unsigned long *bitlock
= &orangefs_inode
->bitlock
;
652 struct orangefs_write_range
*wr
;
654 sb_start_pagefault(inode
->i_sb
);
656 if (wait_on_bit(bitlock
, 1, TASK_KILLABLE
)) {
657 ret
= VM_FAULT_RETRY
;
662 if (folio_test_dirty(folio
) && !folio_test_private(folio
)) {
664 * Should be impossible. If it happens, launder the folio
665 * since we don't know what's dirty. This will WARN in
666 * orangefs_writepage_locked.
668 if (orangefs_launder_folio(folio
)) {
669 ret
= VM_FAULT_LOCKED
|VM_FAULT_RETRY
;
673 if (folio_test_private(folio
)) {
674 wr
= folio_get_private(folio
);
675 if (uid_eq(wr
->uid
, current_fsuid()) &&
676 gid_eq(wr
->gid
, current_fsgid())) {
677 wr
->pos
= page_offset(vmf
->page
);
681 if (orangefs_launder_folio(folio
)) {
682 ret
= VM_FAULT_LOCKED
|VM_FAULT_RETRY
;
687 wr
= kmalloc(sizeof *wr
, GFP_KERNEL
);
689 ret
= VM_FAULT_LOCKED
|VM_FAULT_RETRY
;
692 wr
->pos
= page_offset(vmf
->page
);
694 wr
->uid
= current_fsuid();
695 wr
->gid
= current_fsgid();
696 folio_attach_private(folio
, wr
);
699 file_update_time(vmf
->vma
->vm_file
);
700 if (folio
->mapping
!= inode
->i_mapping
) {
702 ret
= VM_FAULT_LOCKED
|VM_FAULT_NOPAGE
;
707 * We mark the folio dirty already here so that when freeze is in
708 * progress, we are guaranteed that writeback during freezing will
709 * see the dirty folio and writeprotect it again.
711 folio_mark_dirty(folio
);
712 folio_wait_stable(folio
);
713 ret
= VM_FAULT_LOCKED
;
715 sb_end_pagefault(inode
->i_sb
);
719 static int orangefs_setattr_size(struct inode
*inode
, struct iattr
*iattr
)
721 struct orangefs_inode_s
*orangefs_inode
= ORANGEFS_I(inode
);
722 struct orangefs_kernel_op_s
*new_op
;
726 gossip_debug(GOSSIP_INODE_DEBUG
,
727 "%s: %pU: Handle is %pU | fs_id %d | size is %llu\n",
729 get_khandle_from_ino(inode
),
730 &orangefs_inode
->refn
.khandle
,
731 orangefs_inode
->refn
.fs_id
,
734 /* Ensure that we have a up to date size, so we know if it changed. */
735 ret
= orangefs_inode_getattr(inode
, ORANGEFS_GETATTR_SIZE
);
739 gossip_err("%s: orangefs_inode_getattr failed, ret:%d:.\n",
743 orig_size
= i_size_read(inode
);
745 /* This is truncate_setsize in a different order. */
746 truncate_pagecache(inode
, iattr
->ia_size
);
747 i_size_write(inode
, iattr
->ia_size
);
748 if (iattr
->ia_size
> orig_size
)
749 pagecache_isize_extended(inode
, orig_size
, iattr
->ia_size
);
751 new_op
= op_alloc(ORANGEFS_VFS_OP_TRUNCATE
);
755 new_op
->upcall
.req
.truncate
.refn
= orangefs_inode
->refn
;
756 new_op
->upcall
.req
.truncate
.size
= (__s64
) iattr
->ia_size
;
758 ret
= service_operation(new_op
,
760 get_interruptible_flag(inode
));
763 * the truncate has no downcall members to retrieve, but
764 * the status value tells us if it went through ok or not
766 gossip_debug(GOSSIP_INODE_DEBUG
, "%s: ret:%d:\n", __func__
, ret
);
773 if (orig_size
!= i_size_read(inode
))
774 iattr
->ia_valid
|= ATTR_CTIME
| ATTR_MTIME
;
779 int __orangefs_setattr(struct inode
*inode
, struct iattr
*iattr
)
783 if (iattr
->ia_valid
& ATTR_MODE
) {
784 if (iattr
->ia_mode
& (S_ISVTX
)) {
785 if (is_root_handle(inode
)) {
787 * allow sticky bit to be set on root (since
788 * it shows up that way by default anyhow),
789 * but don't show it to the server
791 iattr
->ia_mode
-= S_ISVTX
;
793 gossip_debug(GOSSIP_UTILS_DEBUG
,
794 "User attempted to set sticky bit on non-root directory; returning EINVAL.\n");
799 if (iattr
->ia_mode
& (S_ISUID
)) {
800 gossip_debug(GOSSIP_UTILS_DEBUG
,
801 "Attempting to set setuid bit (not supported); returning EINVAL.\n");
807 if (iattr
->ia_valid
& ATTR_SIZE
) {
808 ret
= orangefs_setattr_size(inode
, iattr
);
814 spin_lock(&inode
->i_lock
);
815 if (ORANGEFS_I(inode
)->attr_valid
) {
816 if (uid_eq(ORANGEFS_I(inode
)->attr_uid
, current_fsuid()) &&
817 gid_eq(ORANGEFS_I(inode
)->attr_gid
, current_fsgid())) {
818 ORANGEFS_I(inode
)->attr_valid
= iattr
->ia_valid
;
820 spin_unlock(&inode
->i_lock
);
821 write_inode_now(inode
, 1);
825 ORANGEFS_I(inode
)->attr_valid
= iattr
->ia_valid
;
826 ORANGEFS_I(inode
)->attr_uid
= current_fsuid();
827 ORANGEFS_I(inode
)->attr_gid
= current_fsgid();
829 setattr_copy(&init_user_ns
, inode
, iattr
);
830 spin_unlock(&inode
->i_lock
);
831 mark_inode_dirty(inode
);
833 if (iattr
->ia_valid
& ATTR_MODE
)
834 /* change mod on a file that has ACLs */
835 ret
= posix_acl_chmod(&init_user_ns
, inode
, inode
->i_mode
);
843 * Change attributes of an object referenced by dentry.
845 int orangefs_setattr(struct user_namespace
*mnt_userns
, struct dentry
*dentry
,
849 gossip_debug(GOSSIP_INODE_DEBUG
, "__orangefs_setattr: called on %pd\n",
851 ret
= setattr_prepare(&init_user_ns
, dentry
, iattr
);
854 ret
= __orangefs_setattr(d_inode(dentry
), iattr
);
855 sync_inode_metadata(d_inode(dentry
), 1);
857 gossip_debug(GOSSIP_INODE_DEBUG
, "orangefs_setattr: returning %d\n",
863 * Obtain attributes of an object given a dentry
865 int orangefs_getattr(struct user_namespace
*mnt_userns
, const struct path
*path
,
866 struct kstat
*stat
, u32 request_mask
, unsigned int flags
)
869 struct inode
*inode
= path
->dentry
->d_inode
;
871 gossip_debug(GOSSIP_INODE_DEBUG
,
872 "orangefs_getattr: called on %pd mask %u\n",
873 path
->dentry
, request_mask
);
875 ret
= orangefs_inode_getattr(inode
,
876 request_mask
& STATX_SIZE
? ORANGEFS_GETATTR_SIZE
: 0);
878 generic_fillattr(&init_user_ns
, inode
, stat
);
880 /* override block size reported to stat */
881 if (!(request_mask
& STATX_SIZE
))
882 stat
->result_mask
&= ~STATX_SIZE
;
884 generic_fill_statx_attr(inode
, stat
);
889 int orangefs_permission(struct user_namespace
*mnt_userns
,
890 struct inode
*inode
, int mask
)
894 if (mask
& MAY_NOT_BLOCK
)
897 gossip_debug(GOSSIP_INODE_DEBUG
, "%s: refreshing\n", __func__
);
899 /* Make sure the permission (and other common attrs) are up to date. */
900 ret
= orangefs_inode_getattr(inode
, 0);
904 return generic_permission(&init_user_ns
, inode
, mask
);
907 int orangefs_update_time(struct inode
*inode
, struct timespec64
*time
, int flags
)
910 gossip_debug(GOSSIP_INODE_DEBUG
, "orangefs_update_time: %pU\n",
911 get_khandle_from_ino(inode
));
912 generic_update_time(inode
, time
, flags
);
913 memset(&iattr
, 0, sizeof iattr
);
915 iattr
.ia_valid
|= ATTR_ATIME
;
917 iattr
.ia_valid
|= ATTR_CTIME
;
919 iattr
.ia_valid
|= ATTR_MTIME
;
920 return __orangefs_setattr(inode
, &iattr
);
923 static int orangefs_fileattr_get(struct dentry
*dentry
, struct fileattr
*fa
)
928 gossip_debug(GOSSIP_FILE_DEBUG
, "%s: called on %pd\n", __func__
,
931 ret
= orangefs_inode_getxattr(d_inode(dentry
),
932 "user.pvfs2.meta_hint",
934 if (ret
< 0 && ret
!= -ENODATA
)
937 gossip_debug(GOSSIP_FILE_DEBUG
, "%s: flags=%u\n", __func__
, (u32
) val
);
939 fileattr_fill_flags(fa
, val
);
943 static int orangefs_fileattr_set(struct user_namespace
*mnt_userns
,
944 struct dentry
*dentry
, struct fileattr
*fa
)
948 gossip_debug(GOSSIP_FILE_DEBUG
, "%s: called on %pd\n", __func__
,
951 * ORANGEFS_MIRROR_FL is set internally when the mirroring mode is
952 * turned on for a file. The user is not allowed to turn on this bit,
953 * but the bit is present if the user first gets the flags and then
954 * updates the flags with some new settings. So, we ignore it in the
955 * following edit. bligon.
957 if (fileattr_has_fsx(fa
) ||
958 (fa
->flags
& ~(FS_IMMUTABLE_FL
| FS_APPEND_FL
| FS_NOATIME_FL
| ORANGEFS_MIRROR_FL
))) {
959 gossip_err("%s: only supports setting one of FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NOATIME_FL\n",
964 gossip_debug(GOSSIP_FILE_DEBUG
, "%s: flags=%u\n", __func__
, (u32
) val
);
965 return orangefs_inode_setxattr(d_inode(dentry
),
966 "user.pvfs2.meta_hint",
967 &val
, sizeof(val
), 0);
970 /* ORANGEFS2 implementation of VFS inode operations for files */
971 static const struct inode_operations orangefs_file_inode_operations
= {
972 .get_acl
= orangefs_get_acl
,
973 .set_acl
= orangefs_set_acl
,
974 .setattr
= orangefs_setattr
,
975 .getattr
= orangefs_getattr
,
976 .listxattr
= orangefs_listxattr
,
977 .permission
= orangefs_permission
,
978 .update_time
= orangefs_update_time
,
979 .fileattr_get
= orangefs_fileattr_get
,
980 .fileattr_set
= orangefs_fileattr_set
,
983 static int orangefs_init_iops(struct inode
*inode
)
985 inode
->i_mapping
->a_ops
= &orangefs_address_operations
;
987 switch (inode
->i_mode
& S_IFMT
) {
989 inode
->i_op
= &orangefs_file_inode_operations
;
990 inode
->i_fop
= &orangefs_file_operations
;
993 inode
->i_op
= &orangefs_symlink_inode_operations
;
996 inode
->i_op
= &orangefs_dir_inode_operations
;
997 inode
->i_fop
= &orangefs_dir_operations
;
1000 gossip_debug(GOSSIP_INODE_DEBUG
,
1001 "%s: unsupported mode\n",
1010 * Given an ORANGEFS object identifier (fsid, handle), convert it into
1011 * a ino_t type that will be used as a hash-index from where the handle will
1012 * be searched for in the VFS hash table of inodes.
1014 static inline ino_t
orangefs_handle_hash(struct orangefs_object_kref
*ref
)
1018 return orangefs_khandle_to_ino(&(ref
->khandle
));
1022 * Called to set up an inode from iget5_locked.
1024 static int orangefs_set_inode(struct inode
*inode
, void *data
)
1026 struct orangefs_object_kref
*ref
= (struct orangefs_object_kref
*) data
;
1027 ORANGEFS_I(inode
)->refn
.fs_id
= ref
->fs_id
;
1028 ORANGEFS_I(inode
)->refn
.khandle
= ref
->khandle
;
1029 ORANGEFS_I(inode
)->attr_valid
= 0;
1030 hash_init(ORANGEFS_I(inode
)->xattr_cache
);
1031 ORANGEFS_I(inode
)->mapping_time
= jiffies
- 1;
1032 ORANGEFS_I(inode
)->bitlock
= 0;
1037 * Called to determine if handles match.
1039 static int orangefs_test_inode(struct inode
*inode
, void *data
)
1041 struct orangefs_object_kref
*ref
= (struct orangefs_object_kref
*) data
;
1042 struct orangefs_inode_s
*orangefs_inode
= NULL
;
1044 orangefs_inode
= ORANGEFS_I(inode
);
1045 /* test handles and fs_ids... */
1046 return (!ORANGEFS_khandle_cmp(&(orangefs_inode
->refn
.khandle
),
1048 orangefs_inode
->refn
.fs_id
== ref
->fs_id
);
1052 * Front-end to lookup the inode-cache maintained by the VFS using the ORANGEFS
1055 * @sb: the file system super block instance.
1056 * @ref: The ORANGEFS object for which we are trying to locate an inode.
1058 struct inode
*orangefs_iget(struct super_block
*sb
,
1059 struct orangefs_object_kref
*ref
)
1061 struct inode
*inode
= NULL
;
1065 hash
= orangefs_handle_hash(ref
);
1066 inode
= iget5_locked(sb
,
1068 orangefs_test_inode
,
1073 return ERR_PTR(-ENOMEM
);
1075 if (!(inode
->i_state
& I_NEW
))
1078 error
= orangefs_inode_getattr(inode
, ORANGEFS_GETATTR_NEW
);
1081 return ERR_PTR(error
);
1084 inode
->i_ino
= hash
; /* needed for stat etc */
1085 orangefs_init_iops(inode
);
1086 unlock_new_inode(inode
);
1088 gossip_debug(GOSSIP_INODE_DEBUG
,
1089 "iget handle %pU, fsid %d hash %ld i_ino %lu\n",
1099 * Allocate an inode for a newly created file and insert it into the inode hash.
1101 struct inode
*orangefs_new_inode(struct super_block
*sb
, struct inode
*dir
,
1102 int mode
, dev_t dev
, struct orangefs_object_kref
*ref
)
1104 unsigned long hash
= orangefs_handle_hash(ref
);
1105 struct inode
*inode
;
1108 gossip_debug(GOSSIP_INODE_DEBUG
,
1109 "%s:(sb is %p | MAJOR(dev)=%u | MINOR(dev)=%u mode=%o)\n",
1116 inode
= new_inode(sb
);
1118 return ERR_PTR(-ENOMEM
);
1120 orangefs_set_inode(inode
, ref
);
1121 inode
->i_ino
= hash
; /* needed for stat etc */
1123 error
= orangefs_inode_getattr(inode
, ORANGEFS_GETATTR_NEW
);
1127 orangefs_init_iops(inode
);
1128 inode
->i_rdev
= dev
;
1130 error
= insert_inode_locked4(inode
, hash
, orangefs_test_inode
, ref
);
1134 gossip_debug(GOSSIP_INODE_DEBUG
,
1135 "Initializing ACL's for inode %pU\n",
1136 get_khandle_from_ino(inode
));
1137 orangefs_init_acl(inode
, dir
);
1142 return ERR_PTR(error
);