1 // SPDX-License-Identifier: GPL-2.0
3 * (C) 2001 Clemson University and The University of Chicago
4 * Copyright 2018 Omnibond Systems, L.L.C.
6 * See COPYING in top-level directory.
10 * Linux VFS inode operations.
13 #include <linux/blkdev.h>
14 #include <linux/fileattr.h>
16 #include "orangefs-kernel.h"
17 #include "orangefs-bufmap.h"
19 static int orangefs_writepage_locked(struct page
*page
,
20 struct writeback_control
*wbc
)
22 struct inode
*inode
= page
->mapping
->host
;
23 struct orangefs_write_range
*wr
= NULL
;
30 set_page_writeback(page
);
32 len
= i_size_read(inode
);
33 if (PagePrivate(page
)) {
34 wr
= (struct orangefs_write_range
*)page_private(page
);
35 WARN_ON(wr
->pos
>= len
);
37 if (off
+ wr
->len
> len
)
43 off
= page_offset(page
);
44 if (off
+ PAGE_SIZE
> len
)
49 /* Should've been handled in orangefs_invalidate_folio. */
50 WARN_ON(off
== len
|| off
+ wlen
> len
);
53 bvec_set_page(&bv
, page
, wlen
, off
% PAGE_SIZE
);
54 iov_iter_bvec(&iter
, ITER_SOURCE
, &bv
, 1, wlen
);
56 ret
= wait_for_direct_io(ORANGEFS_IO_WRITE
, inode
, &off
, &iter
, wlen
,
60 mapping_set_error(page
->mapping
, ret
);
64 kfree(detach_page_private(page
));
68 static int orangefs_writepage(struct page
*page
, struct writeback_control
*wbc
)
71 ret
= orangefs_writepage_locked(page
, wbc
);
73 end_page_writeback(page
);
77 struct orangefs_writepages
{
88 static int orangefs_writepages_work(struct orangefs_writepages
*ow
,
89 struct writeback_control
*wbc
)
91 struct inode
*inode
= ow
->pages
[0]->mapping
->host
;
92 struct orangefs_write_range
*wrp
, wr
;
99 len
= i_size_read(inode
);
101 for (i
= 0; i
< ow
->npages
; i
++) {
102 set_page_writeback(ow
->pages
[i
]);
103 bvec_set_page(&ow
->bv
[i
], ow
->pages
[i
],
104 min(page_offset(ow
->pages
[i
]) + PAGE_SIZE
,
106 max(ow
->off
, page_offset(ow
->pages
[i
])),
107 i
== 0 ? ow
->off
- page_offset(ow
->pages
[i
]) : 0);
109 iov_iter_bvec(&iter
, ITER_SOURCE
, ow
->bv
, ow
->npages
, ow
->len
);
111 WARN_ON(ow
->off
>= len
);
112 if (ow
->off
+ ow
->len
> len
)
113 ow
->len
= len
- ow
->off
;
118 ret
= wait_for_direct_io(ORANGEFS_IO_WRITE
, inode
, &off
, &iter
, ow
->len
,
121 for (i
= 0; i
< ow
->npages
; i
++) {
122 SetPageError(ow
->pages
[i
]);
123 mapping_set_error(ow
->pages
[i
]->mapping
, ret
);
124 if (PagePrivate(ow
->pages
[i
])) {
125 wrp
= (struct orangefs_write_range
*)
126 page_private(ow
->pages
[i
]);
127 ClearPagePrivate(ow
->pages
[i
]);
128 put_page(ow
->pages
[i
]);
131 end_page_writeback(ow
->pages
[i
]);
132 unlock_page(ow
->pages
[i
]);
136 for (i
= 0; i
< ow
->npages
; i
++) {
137 if (PagePrivate(ow
->pages
[i
])) {
138 wrp
= (struct orangefs_write_range
*)
139 page_private(ow
->pages
[i
]);
140 ClearPagePrivate(ow
->pages
[i
]);
141 put_page(ow
->pages
[i
]);
144 end_page_writeback(ow
->pages
[i
]);
145 unlock_page(ow
->pages
[i
]);
151 static int orangefs_writepages_callback(struct folio
*folio
,
152 struct writeback_control
*wbc
, void *data
)
154 struct orangefs_writepages
*ow
= data
;
155 struct orangefs_write_range
*wr
= folio
->private;
160 /* It's not private so there's nothing to write, right? */
161 printk("writepages_callback not private!\n");
167 if (ow
->npages
== 0) {
172 ow
->pages
[ow
->npages
++] = &folio
->page
;
176 if (!uid_eq(ow
->uid
, wr
->uid
) || !gid_eq(ow
->gid
, wr
->gid
)) {
177 orangefs_writepages_work(ow
, wbc
);
182 if (ow
->off
+ ow
->len
== wr
->pos
) {
184 ow
->pages
[ow
->npages
++] = &folio
->page
;
191 orangefs_writepages_work(ow
, wbc
);
194 ret
= orangefs_writepage_locked(&folio
->page
, wbc
);
195 mapping_set_error(folio
->mapping
, ret
);
197 folio_end_writeback(folio
);
199 if (ow
->npages
== ow
->maxpages
) {
200 orangefs_writepages_work(ow
, wbc
);
207 static int orangefs_writepages(struct address_space
*mapping
,
208 struct writeback_control
*wbc
)
210 struct orangefs_writepages
*ow
;
211 struct blk_plug plug
;
213 ow
= kzalloc(sizeof(struct orangefs_writepages
), GFP_KERNEL
);
216 ow
->maxpages
= orangefs_bufmap_size_query()/PAGE_SIZE
;
217 ow
->pages
= kcalloc(ow
->maxpages
, sizeof(struct page
*), GFP_KERNEL
);
222 ow
->bv
= kcalloc(ow
->maxpages
, sizeof(struct bio_vec
), GFP_KERNEL
);
228 blk_start_plug(&plug
);
229 ret
= write_cache_pages(mapping
, wbc
, orangefs_writepages_callback
, ow
);
231 ret
= orangefs_writepages_work(ow
, wbc
);
232 blk_finish_plug(&plug
);
239 static int orangefs_launder_folio(struct folio
*);
241 static void orangefs_readahead(struct readahead_control
*rac
)
244 struct iov_iter iter
;
245 struct inode
*inode
= rac
->mapping
->host
;
246 struct xarray
*i_pages
;
248 loff_t new_start
= readahead_pos(rac
);
252 loff_t bytes_remaining
= inode
->i_size
- readahead_pos(rac
);
253 loff_t pages_remaining
= bytes_remaining
/ PAGE_SIZE
;
255 if (pages_remaining
>= 1024)
257 else if (pages_remaining
> readahead_count(rac
))
258 new_len
= bytes_remaining
;
261 readahead_expand(rac
, new_start
, new_len
);
263 offset
= readahead_pos(rac
);
264 i_pages
= &rac
->mapping
->i_pages
;
266 iov_iter_xarray(&iter
, ITER_DEST
, i_pages
, offset
, readahead_length(rac
));
268 /* read in the pages. */
269 if ((ret
= wait_for_direct_io(ORANGEFS_IO_READ
, inode
,
270 &offset
, &iter
, readahead_length(rac
),
271 inode
->i_size
, NULL
, NULL
, rac
->file
)) < 0)
272 gossip_debug(GOSSIP_FILE_DEBUG
,
273 "%s: wait_for_direct_io failed. \n", __func__
);
278 while ((page
= readahead_page(rac
))) {
279 page_endio(page
, false, ret
);
284 static int orangefs_read_folio(struct file
*file
, struct folio
*folio
)
286 struct inode
*inode
= folio
->mapping
->host
;
287 struct iov_iter iter
;
290 loff_t off
; /* offset of this folio in the file */
292 if (folio_test_dirty(folio
))
293 orangefs_launder_folio(folio
);
295 off
= folio_pos(folio
);
296 bvec_set_folio(&bv
, folio
, folio_size(folio
), 0);
297 iov_iter_bvec(&iter
, ITER_DEST
, &bv
, 1, folio_size(folio
));
299 ret
= wait_for_direct_io(ORANGEFS_IO_READ
, inode
, &off
, &iter
,
300 folio_size(folio
), inode
->i_size
, NULL
, NULL
, file
);
301 /* this will only zero remaining unread portions of the folio data */
302 iov_iter_zero(~0U, &iter
);
303 /* takes care of potential aliasing */
304 flush_dcache_folio(folio
);
306 folio_set_error(folio
);
308 folio_mark_uptodate(folio
);
311 /* unlock the folio after the ->read_folio() routine completes */
316 static int orangefs_write_begin(struct file
*file
,
317 struct address_space
*mapping
, loff_t pos
, unsigned len
,
318 struct page
**pagep
, void **fsdata
)
320 struct orangefs_write_range
*wr
;
326 index
= pos
>> PAGE_SHIFT
;
328 page
= grab_cache_page_write_begin(mapping
, index
);
333 folio
= page_folio(page
);
335 if (folio_test_dirty(folio
) && !folio_test_private(folio
)) {
337 * Should be impossible. If it happens, launder the page
338 * since we don't know what's dirty. This will WARN in
339 * orangefs_writepage_locked.
341 ret
= orangefs_launder_folio(folio
);
345 if (folio_test_private(folio
)) {
346 struct orangefs_write_range
*wr
;
347 wr
= folio_get_private(folio
);
348 if (wr
->pos
+ wr
->len
== pos
&&
349 uid_eq(wr
->uid
, current_fsuid()) &&
350 gid_eq(wr
->gid
, current_fsgid())) {
354 ret
= orangefs_launder_folio(folio
);
360 wr
= kmalloc(sizeof *wr
, GFP_KERNEL
);
366 wr
->uid
= current_fsuid();
367 wr
->gid
= current_fsgid();
368 folio_attach_private(folio
, wr
);
373 static int orangefs_write_end(struct file
*file
, struct address_space
*mapping
,
374 loff_t pos
, unsigned len
, unsigned copied
, struct page
*page
, void *fsdata
)
376 struct inode
*inode
= page
->mapping
->host
;
377 loff_t last_pos
= pos
+ copied
;
380 * No need to use i_size_read() here, the i_size
381 * cannot change under us because we hold the i_mutex.
383 if (last_pos
> inode
->i_size
)
384 i_size_write(inode
, last_pos
);
386 /* zero the stale part of the page if we did a short copy */
387 if (!PageUptodate(page
)) {
388 unsigned from
= pos
& (PAGE_SIZE
- 1);
390 zero_user(page
, from
+ copied
, len
- copied
);
392 /* Set fully written pages uptodate. */
393 if (pos
== page_offset(page
) &&
394 (len
== PAGE_SIZE
|| pos
+ len
== inode
->i_size
)) {
395 zero_user_segment(page
, from
+ copied
, PAGE_SIZE
);
396 SetPageUptodate(page
);
400 set_page_dirty(page
);
404 mark_inode_dirty_sync(file_inode(file
));
408 static void orangefs_invalidate_folio(struct folio
*folio
,
409 size_t offset
, size_t length
)
411 struct orangefs_write_range
*wr
= folio_get_private(folio
);
413 if (offset
== 0 && length
== PAGE_SIZE
) {
414 kfree(folio_detach_private(folio
));
416 /* write range entirely within invalidate range (or equal) */
417 } else if (folio_pos(folio
) + offset
<= wr
->pos
&&
418 wr
->pos
+ wr
->len
<= folio_pos(folio
) + offset
+ length
) {
419 kfree(folio_detach_private(folio
));
420 /* XXX is this right? only caller in fs */
421 folio_cancel_dirty(folio
);
423 /* invalidate range chops off end of write range */
424 } else if (wr
->pos
< folio_pos(folio
) + offset
&&
425 wr
->pos
+ wr
->len
<= folio_pos(folio
) + offset
+ length
&&
426 folio_pos(folio
) + offset
< wr
->pos
+ wr
->len
) {
428 x
= wr
->pos
+ wr
->len
- (folio_pos(folio
) + offset
);
429 WARN_ON(x
> wr
->len
);
431 wr
->uid
= current_fsuid();
432 wr
->gid
= current_fsgid();
433 /* invalidate range chops off beginning of write range */
434 } else if (folio_pos(folio
) + offset
<= wr
->pos
&&
435 folio_pos(folio
) + offset
+ length
< wr
->pos
+ wr
->len
&&
436 wr
->pos
< folio_pos(folio
) + offset
+ length
) {
438 x
= folio_pos(folio
) + offset
+ length
- wr
->pos
;
439 WARN_ON(x
> wr
->len
);
442 wr
->uid
= current_fsuid();
443 wr
->gid
= current_fsgid();
444 /* invalidate range entirely within write range (punch hole) */
445 } else if (wr
->pos
< folio_pos(folio
) + offset
&&
446 folio_pos(folio
) + offset
+ length
< wr
->pos
+ wr
->len
) {
447 /* XXX what do we do here... should not WARN_ON */
451 * should we just ignore this and write it out anyway?
452 * it hardly makes sense
455 /* non-overlapping ranges */
457 /* WARN if they do overlap */
458 if (!((folio_pos(folio
) + offset
+ length
<= wr
->pos
) ^
459 (wr
->pos
+ wr
->len
<= folio_pos(folio
) + offset
))) {
461 printk("invalidate range offset %llu length %zu\n",
462 folio_pos(folio
) + offset
, length
);
463 printk("write range offset %llu length %zu\n",
470 * Above there are returns where wr is freed or where we WARN.
471 * Thus the following runs if wr was modified above.
474 orangefs_launder_folio(folio
);
477 static bool orangefs_release_folio(struct folio
*folio
, gfp_t foo
)
479 return !folio_test_private(folio
);
482 static void orangefs_free_folio(struct folio
*folio
)
484 kfree(folio_detach_private(folio
));
487 static int orangefs_launder_folio(struct folio
*folio
)
490 struct writeback_control wbc
= {
491 .sync_mode
= WB_SYNC_ALL
,
494 folio_wait_writeback(folio
);
495 if (folio_clear_dirty_for_io(folio
)) {
496 r
= orangefs_writepage_locked(&folio
->page
, &wbc
);
497 folio_end_writeback(folio
);
502 static ssize_t
orangefs_direct_IO(struct kiocb
*iocb
,
503 struct iov_iter
*iter
)
506 * Comment from original do_readv_writev:
507 * Common entry point for read/write/readv/writev
508 * This function will dispatch it to either the direct I/O
509 * or buffered I/O path depending on the mount options and/or
510 * augmented/extended metadata attached to the file.
511 * Note: File extended attributes override any mount options.
513 struct file
*file
= iocb
->ki_filp
;
514 loff_t pos
= iocb
->ki_pos
;
515 enum ORANGEFS_io_type type
= iov_iter_rw(iter
) == WRITE
?
516 ORANGEFS_IO_WRITE
: ORANGEFS_IO_READ
;
517 loff_t
*offset
= &pos
;
518 struct inode
*inode
= file
->f_mapping
->host
;
519 struct orangefs_inode_s
*orangefs_inode
= ORANGEFS_I(inode
);
520 struct orangefs_khandle
*handle
= &orangefs_inode
->refn
.khandle
;
521 size_t count
= iov_iter_count(iter
);
522 ssize_t total_count
= 0;
523 ssize_t ret
= -EINVAL
;
525 gossip_debug(GOSSIP_FILE_DEBUG
,
526 "%s-BEGIN(%pU): count(%d) after estimate_max_iovecs.\n",
531 if (type
== ORANGEFS_IO_WRITE
) {
532 gossip_debug(GOSSIP_FILE_DEBUG
,
533 "%s(%pU): proceeding with offset : %llu, "
546 while (iov_iter_count(iter
)) {
547 size_t each_count
= iov_iter_count(iter
);
550 /* how much to transfer in this loop iteration */
551 if (each_count
> orangefs_bufmap_size_query())
552 each_count
= orangefs_bufmap_size_query();
554 gossip_debug(GOSSIP_FILE_DEBUG
,
555 "%s(%pU): size of each_count(%d)\n",
559 gossip_debug(GOSSIP_FILE_DEBUG
,
560 "%s(%pU): BEFORE wait_for_io: offset is %d\n",
565 ret
= wait_for_direct_io(type
, inode
, offset
, iter
,
566 each_count
, 0, NULL
, NULL
, file
);
567 gossip_debug(GOSSIP_FILE_DEBUG
,
568 "%s(%pU): return from wait_for_io:%d\n",
580 gossip_debug(GOSSIP_FILE_DEBUG
,
581 "%s(%pU): AFTER wait_for_io: offset is %d\n",
587 * if we got a short I/O operations,
588 * fall out and return what we got so far
590 if (amt_complete
< each_count
)
598 if (type
== ORANGEFS_IO_READ
) {
601 file_update_time(file
);
602 if (*offset
> i_size_read(inode
))
603 i_size_write(inode
, *offset
);
607 gossip_debug(GOSSIP_FILE_DEBUG
,
608 "%s(%pU): Value(%d) returned.\n",
616 /** ORANGEFS2 implementation of address space operations */
617 static const struct address_space_operations orangefs_address_operations
= {
618 .writepage
= orangefs_writepage
,
619 .readahead
= orangefs_readahead
,
620 .read_folio
= orangefs_read_folio
,
621 .writepages
= orangefs_writepages
,
622 .dirty_folio
= filemap_dirty_folio
,
623 .write_begin
= orangefs_write_begin
,
624 .write_end
= orangefs_write_end
,
625 .invalidate_folio
= orangefs_invalidate_folio
,
626 .release_folio
= orangefs_release_folio
,
627 .free_folio
= orangefs_free_folio
,
628 .launder_folio
= orangefs_launder_folio
,
629 .direct_IO
= orangefs_direct_IO
,
632 vm_fault_t
orangefs_page_mkwrite(struct vm_fault
*vmf
)
634 struct folio
*folio
= page_folio(vmf
->page
);
635 struct inode
*inode
= file_inode(vmf
->vma
->vm_file
);
636 struct orangefs_inode_s
*orangefs_inode
= ORANGEFS_I(inode
);
637 unsigned long *bitlock
= &orangefs_inode
->bitlock
;
639 struct orangefs_write_range
*wr
;
641 sb_start_pagefault(inode
->i_sb
);
643 if (wait_on_bit(bitlock
, 1, TASK_KILLABLE
)) {
644 ret
= VM_FAULT_RETRY
;
649 if (folio_test_dirty(folio
) && !folio_test_private(folio
)) {
651 * Should be impossible. If it happens, launder the folio
652 * since we don't know what's dirty. This will WARN in
653 * orangefs_writepage_locked.
655 if (orangefs_launder_folio(folio
)) {
656 ret
= VM_FAULT_LOCKED
|VM_FAULT_RETRY
;
660 if (folio_test_private(folio
)) {
661 wr
= folio_get_private(folio
);
662 if (uid_eq(wr
->uid
, current_fsuid()) &&
663 gid_eq(wr
->gid
, current_fsgid())) {
664 wr
->pos
= page_offset(vmf
->page
);
668 if (orangefs_launder_folio(folio
)) {
669 ret
= VM_FAULT_LOCKED
|VM_FAULT_RETRY
;
674 wr
= kmalloc(sizeof *wr
, GFP_KERNEL
);
676 ret
= VM_FAULT_LOCKED
|VM_FAULT_RETRY
;
679 wr
->pos
= page_offset(vmf
->page
);
681 wr
->uid
= current_fsuid();
682 wr
->gid
= current_fsgid();
683 folio_attach_private(folio
, wr
);
686 file_update_time(vmf
->vma
->vm_file
);
687 if (folio
->mapping
!= inode
->i_mapping
) {
689 ret
= VM_FAULT_LOCKED
|VM_FAULT_NOPAGE
;
694 * We mark the folio dirty already here so that when freeze is in
695 * progress, we are guaranteed that writeback during freezing will
696 * see the dirty folio and writeprotect it again.
698 folio_mark_dirty(folio
);
699 folio_wait_stable(folio
);
700 ret
= VM_FAULT_LOCKED
;
702 sb_end_pagefault(inode
->i_sb
);
706 static int orangefs_setattr_size(struct inode
*inode
, struct iattr
*iattr
)
708 struct orangefs_inode_s
*orangefs_inode
= ORANGEFS_I(inode
);
709 struct orangefs_kernel_op_s
*new_op
;
713 gossip_debug(GOSSIP_INODE_DEBUG
,
714 "%s: %pU: Handle is %pU | fs_id %d | size is %llu\n",
716 get_khandle_from_ino(inode
),
717 &orangefs_inode
->refn
.khandle
,
718 orangefs_inode
->refn
.fs_id
,
721 /* Ensure that we have a up to date size, so we know if it changed. */
722 ret
= orangefs_inode_getattr(inode
, ORANGEFS_GETATTR_SIZE
);
726 gossip_err("%s: orangefs_inode_getattr failed, ret:%d:.\n",
730 orig_size
= i_size_read(inode
);
732 /* This is truncate_setsize in a different order. */
733 truncate_pagecache(inode
, iattr
->ia_size
);
734 i_size_write(inode
, iattr
->ia_size
);
735 if (iattr
->ia_size
> orig_size
)
736 pagecache_isize_extended(inode
, orig_size
, iattr
->ia_size
);
738 new_op
= op_alloc(ORANGEFS_VFS_OP_TRUNCATE
);
742 new_op
->upcall
.req
.truncate
.refn
= orangefs_inode
->refn
;
743 new_op
->upcall
.req
.truncate
.size
= (__s64
) iattr
->ia_size
;
745 ret
= service_operation(new_op
,
747 get_interruptible_flag(inode
));
750 * the truncate has no downcall members to retrieve, but
751 * the status value tells us if it went through ok or not
753 gossip_debug(GOSSIP_INODE_DEBUG
, "%s: ret:%d:\n", __func__
, ret
);
760 if (orig_size
!= i_size_read(inode
))
761 iattr
->ia_valid
|= ATTR_CTIME
| ATTR_MTIME
;
766 int __orangefs_setattr(struct inode
*inode
, struct iattr
*iattr
)
770 if (iattr
->ia_valid
& ATTR_MODE
) {
771 if (iattr
->ia_mode
& (S_ISVTX
)) {
772 if (is_root_handle(inode
)) {
774 * allow sticky bit to be set on root (since
775 * it shows up that way by default anyhow),
776 * but don't show it to the server
778 iattr
->ia_mode
-= S_ISVTX
;
780 gossip_debug(GOSSIP_UTILS_DEBUG
,
781 "User attempted to set sticky bit on non-root directory; returning EINVAL.\n");
786 if (iattr
->ia_mode
& (S_ISUID
)) {
787 gossip_debug(GOSSIP_UTILS_DEBUG
,
788 "Attempting to set setuid bit (not supported); returning EINVAL.\n");
794 if (iattr
->ia_valid
& ATTR_SIZE
) {
795 ret
= orangefs_setattr_size(inode
, iattr
);
801 spin_lock(&inode
->i_lock
);
802 if (ORANGEFS_I(inode
)->attr_valid
) {
803 if (uid_eq(ORANGEFS_I(inode
)->attr_uid
, current_fsuid()) &&
804 gid_eq(ORANGEFS_I(inode
)->attr_gid
, current_fsgid())) {
805 ORANGEFS_I(inode
)->attr_valid
= iattr
->ia_valid
;
807 spin_unlock(&inode
->i_lock
);
808 write_inode_now(inode
, 1);
812 ORANGEFS_I(inode
)->attr_valid
= iattr
->ia_valid
;
813 ORANGEFS_I(inode
)->attr_uid
= current_fsuid();
814 ORANGEFS_I(inode
)->attr_gid
= current_fsgid();
816 setattr_copy(&nop_mnt_idmap
, inode
, iattr
);
817 spin_unlock(&inode
->i_lock
);
818 mark_inode_dirty(inode
);
825 int __orangefs_setattr_mode(struct dentry
*dentry
, struct iattr
*iattr
)
828 struct inode
*inode
= d_inode(dentry
);
830 ret
= __orangefs_setattr(inode
, iattr
);
831 /* change mode on a file that has ACLs */
832 if (!ret
&& (iattr
->ia_valid
& ATTR_MODE
))
833 ret
= posix_acl_chmod(&nop_mnt_idmap
, dentry
, inode
->i_mode
);
838 * Change attributes of an object referenced by dentry.
840 int orangefs_setattr(struct mnt_idmap
*idmap
, struct dentry
*dentry
,
844 gossip_debug(GOSSIP_INODE_DEBUG
, "__orangefs_setattr: called on %pd\n",
846 ret
= setattr_prepare(&nop_mnt_idmap
, dentry
, iattr
);
849 ret
= __orangefs_setattr_mode(dentry
, iattr
);
850 sync_inode_metadata(d_inode(dentry
), 1);
852 gossip_debug(GOSSIP_INODE_DEBUG
, "orangefs_setattr: returning %d\n",
858 * Obtain attributes of an object given a dentry
860 int orangefs_getattr(struct mnt_idmap
*idmap
, const struct path
*path
,
861 struct kstat
*stat
, u32 request_mask
, unsigned int flags
)
864 struct inode
*inode
= path
->dentry
->d_inode
;
866 gossip_debug(GOSSIP_INODE_DEBUG
,
867 "orangefs_getattr: called on %pd mask %u\n",
868 path
->dentry
, request_mask
);
870 ret
= orangefs_inode_getattr(inode
,
871 request_mask
& STATX_SIZE
? ORANGEFS_GETATTR_SIZE
: 0);
873 generic_fillattr(&nop_mnt_idmap
, inode
, stat
);
875 /* override block size reported to stat */
876 if (!(request_mask
& STATX_SIZE
))
877 stat
->result_mask
&= ~STATX_SIZE
;
879 generic_fill_statx_attr(inode
, stat
);
884 int orangefs_permission(struct mnt_idmap
*idmap
,
885 struct inode
*inode
, int mask
)
889 if (mask
& MAY_NOT_BLOCK
)
892 gossip_debug(GOSSIP_INODE_DEBUG
, "%s: refreshing\n", __func__
);
894 /* Make sure the permission (and other common attrs) are up to date. */
895 ret
= orangefs_inode_getattr(inode
, 0);
899 return generic_permission(&nop_mnt_idmap
, inode
, mask
);
902 int orangefs_update_time(struct inode
*inode
, struct timespec64
*time
, int flags
)
905 gossip_debug(GOSSIP_INODE_DEBUG
, "orangefs_update_time: %pU\n",
906 get_khandle_from_ino(inode
));
907 generic_update_time(inode
, time
, flags
);
908 memset(&iattr
, 0, sizeof iattr
);
910 iattr
.ia_valid
|= ATTR_ATIME
;
912 iattr
.ia_valid
|= ATTR_CTIME
;
914 iattr
.ia_valid
|= ATTR_MTIME
;
915 return __orangefs_setattr(inode
, &iattr
);
918 static int orangefs_fileattr_get(struct dentry
*dentry
, struct fileattr
*fa
)
923 gossip_debug(GOSSIP_FILE_DEBUG
, "%s: called on %pd\n", __func__
,
926 ret
= orangefs_inode_getxattr(d_inode(dentry
),
927 "user.pvfs2.meta_hint",
929 if (ret
< 0 && ret
!= -ENODATA
)
932 gossip_debug(GOSSIP_FILE_DEBUG
, "%s: flags=%u\n", __func__
, (u32
) val
);
934 fileattr_fill_flags(fa
, val
);
938 static int orangefs_fileattr_set(struct mnt_idmap
*idmap
,
939 struct dentry
*dentry
, struct fileattr
*fa
)
943 gossip_debug(GOSSIP_FILE_DEBUG
, "%s: called on %pd\n", __func__
,
946 * ORANGEFS_MIRROR_FL is set internally when the mirroring mode is
947 * turned on for a file. The user is not allowed to turn on this bit,
948 * but the bit is present if the user first gets the flags and then
949 * updates the flags with some new settings. So, we ignore it in the
950 * following edit. bligon.
952 if (fileattr_has_fsx(fa
) ||
953 (fa
->flags
& ~(FS_IMMUTABLE_FL
| FS_APPEND_FL
| FS_NOATIME_FL
| ORANGEFS_MIRROR_FL
))) {
954 gossip_err("%s: only supports setting one of FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NOATIME_FL\n",
959 gossip_debug(GOSSIP_FILE_DEBUG
, "%s: flags=%u\n", __func__
, (u32
) val
);
960 return orangefs_inode_setxattr(d_inode(dentry
),
961 "user.pvfs2.meta_hint",
962 &val
, sizeof(val
), 0);
965 /* ORANGEFS2 implementation of VFS inode operations for files */
966 static const struct inode_operations orangefs_file_inode_operations
= {
967 .get_inode_acl
= orangefs_get_acl
,
968 .set_acl
= orangefs_set_acl
,
969 .setattr
= orangefs_setattr
,
970 .getattr
= orangefs_getattr
,
971 .listxattr
= orangefs_listxattr
,
972 .permission
= orangefs_permission
,
973 .update_time
= orangefs_update_time
,
974 .fileattr_get
= orangefs_fileattr_get
,
975 .fileattr_set
= orangefs_fileattr_set
,
978 static int orangefs_init_iops(struct inode
*inode
)
980 inode
->i_mapping
->a_ops
= &orangefs_address_operations
;
982 switch (inode
->i_mode
& S_IFMT
) {
984 inode
->i_op
= &orangefs_file_inode_operations
;
985 inode
->i_fop
= &orangefs_file_operations
;
988 inode
->i_op
= &orangefs_symlink_inode_operations
;
991 inode
->i_op
= &orangefs_dir_inode_operations
;
992 inode
->i_fop
= &orangefs_dir_operations
;
995 gossip_debug(GOSSIP_INODE_DEBUG
,
996 "%s: unsupported mode\n",
1005 * Given an ORANGEFS object identifier (fsid, handle), convert it into
1006 * a ino_t type that will be used as a hash-index from where the handle will
1007 * be searched for in the VFS hash table of inodes.
1009 static inline ino_t
orangefs_handle_hash(struct orangefs_object_kref
*ref
)
1013 return orangefs_khandle_to_ino(&(ref
->khandle
));
1017 * Called to set up an inode from iget5_locked.
1019 static int orangefs_set_inode(struct inode
*inode
, void *data
)
1021 struct orangefs_object_kref
*ref
= (struct orangefs_object_kref
*) data
;
1022 ORANGEFS_I(inode
)->refn
.fs_id
= ref
->fs_id
;
1023 ORANGEFS_I(inode
)->refn
.khandle
= ref
->khandle
;
1024 ORANGEFS_I(inode
)->attr_valid
= 0;
1025 hash_init(ORANGEFS_I(inode
)->xattr_cache
);
1026 ORANGEFS_I(inode
)->mapping_time
= jiffies
- 1;
1027 ORANGEFS_I(inode
)->bitlock
= 0;
1032 * Called to determine if handles match.
1034 static int orangefs_test_inode(struct inode
*inode
, void *data
)
1036 struct orangefs_object_kref
*ref
= (struct orangefs_object_kref
*) data
;
1037 struct orangefs_inode_s
*orangefs_inode
= NULL
;
1039 orangefs_inode
= ORANGEFS_I(inode
);
1040 /* test handles and fs_ids... */
1041 return (!ORANGEFS_khandle_cmp(&(orangefs_inode
->refn
.khandle
),
1043 orangefs_inode
->refn
.fs_id
== ref
->fs_id
);
1047 * Front-end to lookup the inode-cache maintained by the VFS using the ORANGEFS
1050 * @sb: the file system super block instance.
1051 * @ref: The ORANGEFS object for which we are trying to locate an inode.
1053 struct inode
*orangefs_iget(struct super_block
*sb
,
1054 struct orangefs_object_kref
*ref
)
1056 struct inode
*inode
= NULL
;
1060 hash
= orangefs_handle_hash(ref
);
1061 inode
= iget5_locked(sb
,
1063 orangefs_test_inode
,
1068 return ERR_PTR(-ENOMEM
);
1070 if (!(inode
->i_state
& I_NEW
))
1073 error
= orangefs_inode_getattr(inode
, ORANGEFS_GETATTR_NEW
);
1076 return ERR_PTR(error
);
1079 inode
->i_ino
= hash
; /* needed for stat etc */
1080 orangefs_init_iops(inode
);
1081 unlock_new_inode(inode
);
1083 gossip_debug(GOSSIP_INODE_DEBUG
,
1084 "iget handle %pU, fsid %d hash %ld i_ino %lu\n",
1094 * Allocate an inode for a newly created file and insert it into the inode hash.
1096 struct inode
*orangefs_new_inode(struct super_block
*sb
, struct inode
*dir
,
1097 umode_t mode
, dev_t dev
, struct orangefs_object_kref
*ref
)
1099 struct posix_acl
*acl
= NULL
, *default_acl
= NULL
;
1100 unsigned long hash
= orangefs_handle_hash(ref
);
1101 struct inode
*inode
;
1104 gossip_debug(GOSSIP_INODE_DEBUG
,
1105 "%s:(sb is %p | MAJOR(dev)=%u | MINOR(dev)=%u mode=%o)\n",
1112 inode
= new_inode(sb
);
1114 return ERR_PTR(-ENOMEM
);
1116 error
= posix_acl_create(dir
, &mode
, &default_acl
, &acl
);
1120 orangefs_set_inode(inode
, ref
);
1121 inode
->i_ino
= hash
; /* needed for stat etc */
1123 error
= orangefs_inode_getattr(inode
, ORANGEFS_GETATTR_NEW
);
1127 orangefs_init_iops(inode
);
1128 inode
->i_rdev
= dev
;
1131 error
= __orangefs_set_acl(inode
, default_acl
,
1138 error
= __orangefs_set_acl(inode
, acl
, ACL_TYPE_ACCESS
);
1143 error
= insert_inode_locked4(inode
, hash
, orangefs_test_inode
, ref
);
1147 gossip_debug(GOSSIP_INODE_DEBUG
,
1148 "Initializing ACL's for inode %pU\n",
1149 get_khandle_from_ino(inode
));
1150 if (mode
!= inode
->i_mode
) {
1151 struct iattr iattr
= {
1153 .ia_valid
= ATTR_MODE
,
1155 inode
->i_mode
= mode
;
1156 __orangefs_setattr(inode
, &iattr
);
1157 __posix_acl_chmod(&acl
, GFP_KERNEL
, inode
->i_mode
);
1159 posix_acl_release(acl
);
1160 posix_acl_release(default_acl
);
1165 posix_acl_release(acl
);
1166 posix_acl_release(default_acl
);
1167 return ERR_PTR(error
);