]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
mm,tmpfs: consider end of file write in shmem_is_huge
authorRik van Riel <riel@surriel.com>
Tue, 3 Sep 2024 15:19:28 +0000 (11:19 -0400)
committerAndrew Morton <akpm@linux-foundation.org>
Mon, 9 Sep 2024 23:39:12 +0000 (16:39 -0700)
Take the end of a file write into consideration when deciding whether or
not to use huge pages for tmpfs files when the tmpfs filesystem is mounted
with huge=within_size

This allows large writes that append to the end of a file to automatically
use large pages.

Doing 4MB sequential writes without fallocate to a 16GB tmpfs file with
fio.  The numbers without THP or with huge=always stay the same, but the
performance with huge=within_size now matches that of huge=always.

huge before after
4kB pages 1560 MB/s 1560 MB/s
within_size 1560 MB/s 4720 MB/s
always: 4720 MB/s 4720 MB/s

[akpm@linux-foundation.org: coding-style cleanups]
Link: https://lkml.kernel.org/r/20240903111928.7171e60c@imladris.surriel.com
Signed-off-by: Rik van Riel <riel@surriel.com>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Tested-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Darrick J. Wong <djwong@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
fs/xfs/scrub/xfile.c
fs/xfs/xfs_buf_mem.c
include/linux/shmem_fs.h
mm/huge_memory.c
mm/khugepaged.c
mm/shmem.c
mm/userfaultfd.c

index 9b5d98fe1f8ab3125bce69622d8e63edd887d3fa..c753c79df203fe1b6ab5e4f582a70a83db6eab73 100644 (file)
@@ -126,7 +126,7 @@ xfile_load(
                unsigned int    len;
                unsigned int    offset;
 
-               if (shmem_get_folio(inode, pos >> PAGE_SHIFT, &folio,
+               if (shmem_get_folio(inode, pos >> PAGE_SHIFT, 0, &folio,
                                SGP_READ) < 0)
                        break;
                if (!folio) {
@@ -196,7 +196,7 @@ xfile_store(
                unsigned int    len;
                unsigned int    offset;
 
-               if (shmem_get_folio(inode, pos >> PAGE_SHIFT, &folio,
+               if (shmem_get_folio(inode, pos >> PAGE_SHIFT, 0, &folio,
                                SGP_CACHE) < 0)
                        break;
                if (filemap_check_wb_err(inode->i_mapping, 0)) {
@@ -267,7 +267,7 @@ xfile_get_folio(
                i_size_write(inode, pos + len);
 
        pflags = memalloc_nofs_save();
-       error = shmem_get_folio(inode, pos >> PAGE_SHIFT, &folio,
+       error = shmem_get_folio(inode, pos >> PAGE_SHIFT, 0, &folio,
                        (flags & XFILE_ALLOC) ? SGP_CACHE : SGP_READ);
        memalloc_nofs_restore(pflags);
        if (error)
index 9bb2d24de7094114a6f8c303647fe1b857805fb4..07bebbfb16ee183d0d896dc2a3e1d927a76ea73e 100644 (file)
@@ -149,7 +149,7 @@ xmbuf_map_page(
                return -ENOMEM;
        }
 
-       error = shmem_get_folio(inode, pos >> PAGE_SHIFT, &folio, SGP_CACHE);
+       error = shmem_get_folio(inode, pos >> PAGE_SHIFT, 0, &folio, SGP_CACHE);
        if (error)
                return error;
 
index 1564d7d3ca6151e6b58ea8a70b2c63d66da8de22..515a9a6a3c6f82c55952d821887514217a6a00d1 100644 (file)
@@ -113,11 +113,11 @@ int shmem_unuse(unsigned int type);
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 unsigned long shmem_allowable_huge_orders(struct inode *inode,
                                struct vm_area_struct *vma, pgoff_t index,
-                               bool shmem_huge_force);
+                               loff_t write_end, bool shmem_huge_force);
 #else
 static inline unsigned long shmem_allowable_huge_orders(struct inode *inode,
                                struct vm_area_struct *vma, pgoff_t index,
-                               bool shmem_huge_force)
+                               loff_t write_end, bool shmem_huge_force)
 {
        return 0;
 }
@@ -143,8 +143,8 @@ enum sgp_type {
        SGP_FALLOC,     /* like SGP_WRITE, but make existing page Uptodate */
 };
 
-int shmem_get_folio(struct inode *inode, pgoff_t index, struct folio **foliop,
-               enum sgp_type sgp);
+int shmem_get_folio(struct inode *inode, pgoff_t index, loff_t write_end,
+               struct folio **foliop, enum sgp_type sgp);
 struct folio *shmem_read_folio_gfp(struct address_space *mapping,
                pgoff_t index, gfp_t gfp);
 
index 691702e39f851ee1e92f5bc5300d99e78d4269be..77092581f90da18d5254f4467455ae1208a929a5 100644 (file)
@@ -164,7 +164,7 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma,
         */
        if (!in_pf && shmem_file(vma->vm_file))
                return shmem_allowable_huge_orders(file_inode(vma->vm_file),
-                                                  vma, vma->vm_pgoff,
+                                                  vma, vma->vm_pgoff, 0,
                                                   !enforce_sysfs);
 
        if (!vma_is_anonymous(vma)) {
index 32100041aef3a7b077fa5a0bc4d9f1e66a157544..f9c39898eaff6563a4aae6274b2cc7fdb5dc22d0 100644 (file)
@@ -1870,7 +1870,7 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
                        if (xa_is_value(folio) || !folio_test_uptodate(folio)) {
                                xas_unlock_irq(&xas);
                                /* swap in or instantiate fallocated page */
-                               if (shmem_get_folio(mapping->host, index,
+                               if (shmem_get_folio(mapping->host, index, 0,
                                                &folio, SGP_NOALLOC)) {
                                        result = SCAN_FAIL;
                                        goto xa_unlocked;
index 553b99cb265e542948cf1f41b7503e89223f1566..74f093d88c782f892b51c9086f15b5d6df5fd7db 100644 (file)
@@ -549,7 +549,8 @@ static bool shmem_confirm_swap(struct address_space *mapping,
 static int shmem_huge __read_mostly = SHMEM_HUGE_NEVER;
 
 static bool __shmem_huge_global_enabled(struct inode *inode, pgoff_t index,
-                                       bool shmem_huge_force, struct vm_area_struct *vma,
+                                       loff_t write_end, bool shmem_huge_force,
+                                       struct vm_area_struct *vma,
                                        unsigned long vm_flags)
 {
        struct mm_struct *mm = vma ? vma->vm_mm : NULL;
@@ -569,7 +570,8 @@ static bool __shmem_huge_global_enabled(struct inode *inode, pgoff_t index,
                return true;
        case SHMEM_HUGE_WITHIN_SIZE:
                index = round_up(index + 1, HPAGE_PMD_NR);
-               i_size = round_up(i_size_read(inode), PAGE_SIZE);
+               i_size = max(write_end, i_size_read(inode));
+               i_size = round_up(i_size, PAGE_SIZE);
                if (i_size >> PAGE_SHIFT >= index)
                        return true;
                fallthrough;
@@ -583,14 +585,14 @@ static bool __shmem_huge_global_enabled(struct inode *inode, pgoff_t index,
 }
 
 static bool shmem_huge_global_enabled(struct inode *inode, pgoff_t index,
-                  bool shmem_huge_force, struct vm_area_struct *vma,
-                  unsigned long vm_flags)
+                  loff_t write_end, bool shmem_huge_force,
+                  struct vm_area_struct *vma, unsigned long vm_flags)
 {
        if (HPAGE_PMD_ORDER > MAX_PAGECACHE_ORDER)
                return false;
 
-       return __shmem_huge_global_enabled(inode, index, shmem_huge_force,
-                                          vma, vm_flags);
+       return __shmem_huge_global_enabled(inode, index, write_end,
+                                          shmem_huge_force, vma, vm_flags);
 }
 
 #if defined(CONFIG_SYSFS)
@@ -770,8 +772,8 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
 }
 
 static bool shmem_huge_global_enabled(struct inode *inode, pgoff_t index,
-               bool shmem_huge_force, struct vm_area_struct *vma,
-               unsigned long vm_flags)
+               loff_t write_end, bool shmem_huge_force,
+               struct vm_area_struct *vma, unsigned long vm_flags)
 {
        return false;
 }
@@ -978,7 +980,7 @@ static struct folio *shmem_get_partial_folio(struct inode *inode, pgoff_t index)
         * (although in some cases this is just a waste of time).
         */
        folio = NULL;
-       shmem_get_folio(inode, index, &folio, SGP_READ);
+       shmem_get_folio(inode, index, 0, &folio, SGP_READ);
        return folio;
 }
 
@@ -1166,7 +1168,7 @@ static int shmem_getattr(struct mnt_idmap *idmap,
                        STATX_ATTR_NODUMP);
        generic_fillattr(idmap, request_mask, inode, stat);
 
-       if (shmem_huge_global_enabled(inode, 0, false, NULL, 0))
+       if (shmem_huge_global_enabled(inode, 0, 0, false, NULL, 0))
                stat->blksize = HPAGE_PMD_SIZE;
 
        if (request_mask & STATX_BTIME) {
@@ -1653,7 +1655,7 @@ static gfp_t limit_gfp_mask(gfp_t huge_gfp, gfp_t limit_gfp)
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 unsigned long shmem_allowable_huge_orders(struct inode *inode,
                                struct vm_area_struct *vma, pgoff_t index,
-                               bool shmem_huge_force)
+                               loff_t write_end, bool shmem_huge_force)
 {
        unsigned long mask = READ_ONCE(huge_shmem_orders_always);
        unsigned long within_size_orders = READ_ONCE(huge_shmem_orders_within_size);
@@ -1670,8 +1672,8 @@ unsigned long shmem_allowable_huge_orders(struct inode *inode,
        if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_UNSUPPORTED))
                return 0;
 
-       global_huge = shmem_huge_global_enabled(inode, index, shmem_huge_force,
-                                               vma, vm_flags);
+       global_huge = shmem_huge_global_enabled(inode, index, write_end,
+                                       shmem_huge_force, vma, vm_flags);
        if (!vma || !vma_is_anon_shmem(vma)) {
                /*
                 * For tmpfs, we now only support PMD sized THP if huge page
@@ -2231,8 +2233,8 @@ unlock:
  * vmf and fault_type are only supplied by shmem_fault: otherwise they are NULL.
  */
 static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index,
-               struct folio **foliop, enum sgp_type sgp, gfp_t gfp,
-               struct vm_fault *vmf, vm_fault_t *fault_type)
+               loff_t write_end, struct folio **foliop, enum sgp_type sgp,
+               gfp_t gfp, struct vm_fault *vmf, vm_fault_t *fault_type)
 {
        struct vm_area_struct *vma = vmf ? vmf->vma : NULL;
        struct mm_struct *fault_mm;
@@ -2312,7 +2314,7 @@ repeat:
        }
 
        /* Find hugepage orders that are allowed for anonymous shmem and tmpfs. */
-       orders = shmem_allowable_huge_orders(inode, vma, index, false);
+       orders = shmem_allowable_huge_orders(inode, vma, index, write_end, false);
        if (orders > 0) {
                gfp_t huge_gfp;
 
@@ -2413,6 +2415,7 @@ unlock:
  * shmem_get_folio - find, and lock a shmem folio.
  * @inode:     inode to search
  * @index:     the page index.
+ * @write_end: end of a write, could extend inode size
  * @foliop:    pointer to the folio if found
  * @sgp:       SGP_* flags to control behavior
  *
@@ -2432,10 +2435,10 @@ unlock:
  * Context: May sleep.
  * Return: 0 if successful, else a negative error code.
  */
-int shmem_get_folio(struct inode *inode, pgoff_t index, struct folio **foliop,
-               enum sgp_type sgp)
+int shmem_get_folio(struct inode *inode, pgoff_t index, loff_t write_end,
+                   struct folio **foliop, enum sgp_type sgp)
 {
-       return shmem_get_folio_gfp(inode, index, foliop, sgp,
+       return shmem_get_folio_gfp(inode, index, write_end, foliop, sgp,
                        mapping_gfp_mask(inode->i_mapping), NULL, NULL);
 }
 EXPORT_SYMBOL_GPL(shmem_get_folio);
@@ -2530,7 +2533,7 @@ static vm_fault_t shmem_fault(struct vm_fault *vmf)
        }
 
        WARN_ON_ONCE(vmf->page != NULL);
-       err = shmem_get_folio_gfp(inode, vmf->pgoff, &folio, SGP_CACHE,
+       err = shmem_get_folio_gfp(inode, vmf->pgoff, 0, &folio, SGP_CACHE,
                                  gfp, vmf, &ret);
        if (err)
                return vmf_error(err);
@@ -3040,7 +3043,7 @@ shmem_write_begin(struct file *file, struct address_space *mapping,
                        return -EPERM;
        }
 
-       ret = shmem_get_folio(inode, index, &folio, SGP_WRITE);
+       ret = shmem_get_folio(inode, index, pos + len, &folio, SGP_WRITE);
        if (ret)
                return ret;
 
@@ -3111,7 +3114,7 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
                                break;
                }
 
-               error = shmem_get_folio(inode, index, &folio, SGP_READ);
+               error = shmem_get_folio(inode, index, 0, &folio, SGP_READ);
                if (error) {
                        if (error == -EINVAL)
                                error = 0;
@@ -3287,7 +3290,7 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
                if (*ppos >= i_size_read(inode))
                        break;
 
-               error = shmem_get_folio(inode, *ppos / PAGE_SIZE, &folio,
+               error = shmem_get_folio(inode, *ppos / PAGE_SIZE, 0, &folio,
                                        SGP_READ);
                if (error) {
                        if (error == -EINVAL)
@@ -3477,8 +3480,8 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
                else if (shmem_falloc.nr_unswapped > shmem_falloc.nr_falloced)
                        error = -ENOMEM;
                else
-                       error = shmem_get_folio(inode, index, &folio,
-                                               SGP_FALLOC);
+                       error = shmem_get_folio(inode, index, offset + len,
+                                               &folio, SGP_FALLOC);
                if (error) {
                        info->fallocend = undo_fallocend;
                        /* Remove the !uptodate folios we added */
@@ -3829,7 +3832,7 @@ static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir,
        } else {
                inode_nohighmem(inode);
                inode->i_mapping->a_ops = &shmem_aops;
-               error = shmem_get_folio(inode, 0, &folio, SGP_WRITE);
+               error = shmem_get_folio(inode, 0, 0, &folio, SGP_WRITE);
                if (error)
                        goto out_remove_offset;
                inode->i_op = &shmem_symlink_inode_operations;
@@ -3875,7 +3878,7 @@ static const char *shmem_get_link(struct dentry *dentry, struct inode *inode,
                        return ERR_PTR(-ECHILD);
                }
        } else {
-               error = shmem_get_folio(inode, 0, &folio, SGP_READ);
+               error = shmem_get_folio(inode, 0, 0, &folio, SGP_READ);
                if (error)
                        return ERR_PTR(error);
                if (!folio)
@@ -5343,7 +5346,7 @@ struct folio *shmem_read_folio_gfp(struct address_space *mapping,
        struct folio *folio;
        int error;
 
-       error = shmem_get_folio_gfp(inode, index, &folio, SGP_CACHE,
+       error = shmem_get_folio_gfp(inode, index, 0, &folio, SGP_CACHE,
                                    gfp, NULL, NULL);
        if (error)
                return ERR_PTR(error);
index 966e6c81a685d5e205f1a7e914383d745eccab7d..a609b2927848e2967c0da82cd9783bb50d610258 100644 (file)
@@ -391,7 +391,7 @@ static int mfill_atomic_pte_continue(pmd_t *dst_pmd,
        struct page *page;
        int ret;
 
-       ret = shmem_get_folio(inode, pgoff, &folio, SGP_NOALLOC);
+       ret = shmem_get_folio(inode, pgoff, 0, &folio, SGP_NOALLOC);
        /* Our caller expects us to return -EFAULT if we failed to find folio */
        if (ret == -ENOENT)
                ret = -EFAULT;