From 45955f5d967360948cf0015a1058a53d60e46f55 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 23 Jun 2022 18:01:18 +0200 Subject: [PATCH] 5.18-stable patches added patches: s390-mm-use-non-quiescing-sske-for-kvm-switch-to-keyed-guest.patch zonefs-fix-zonefs_iomap_begin-for-reads.patch --- ...g-sske-for-kvm-switch-to-keyed-guest.patch | 35 +++ queue-5.18/series | 2 + ...efs-fix-zonefs_iomap_begin-for-reads.patch | 261 ++++++++++++++++++ 3 files changed, 298 insertions(+) create mode 100644 queue-5.18/s390-mm-use-non-quiescing-sske-for-kvm-switch-to-keyed-guest.patch create mode 100644 queue-5.18/zonefs-fix-zonefs_iomap_begin-for-reads.patch diff --git a/queue-5.18/s390-mm-use-non-quiescing-sske-for-kvm-switch-to-keyed-guest.patch b/queue-5.18/s390-mm-use-non-quiescing-sske-for-kvm-switch-to-keyed-guest.patch new file mode 100644 index 00000000000..3abe0ead18f --- /dev/null +++ b/queue-5.18/s390-mm-use-non-quiescing-sske-for-kvm-switch-to-keyed-guest.patch @@ -0,0 +1,35 @@ +From 3ae11dbcfac906a8c3a480e98660a823130dc16a Mon Sep 17 00:00:00 2001 +From: Christian Borntraeger +Date: Mon, 30 May 2022 11:27:06 +0200 +Subject: s390/mm: use non-quiescing sske for KVM switch to keyed guest + +From: Christian Borntraeger + +commit 3ae11dbcfac906a8c3a480e98660a823130dc16a upstream. + +The switch to a keyed guest does not require a classic sske as the other +guest CPUs are not accessing the key before the switch is complete. +By using the NQ SSKE things are faster especially with multiple guests. + +Signed-off-by: Christian Borntraeger +Suggested-by: Janis Schoetterl-Glausch +Reviewed-by: Claudio Imbrenda +Link: https://lore.kernel.org/r/20220530092706.11637-3-borntraeger@linux.ibm.com +Signed-off-by: Christian Borntraeger +Signed-off-by: Heiko Carstens +Signed-off-by: Greg Kroah-Hartman +--- + arch/s390/mm/pgtable.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/s390/mm/pgtable.c ++++ b/arch/s390/mm/pgtable.c +@@ -748,7 +748,7 @@ void ptep_zap_key(struct mm_struct *mm, + pgste_val(pgste) |= PGSTE_GR_BIT | PGSTE_GC_BIT; + ptev = pte_val(*ptep); + if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE)) +- page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1); ++ page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 0); + pgste_set_unlock(ptep, pgste); + preempt_enable(); + } diff --git a/queue-5.18/series b/queue-5.18/series index e69de29bb2d..e668d35b79b 100644 --- a/queue-5.18/series +++ b/queue-5.18/series @@ -0,0 +1,2 @@ +s390-mm-use-non-quiescing-sske-for-kvm-switch-to-keyed-guest.patch +zonefs-fix-zonefs_iomap_begin-for-reads.patch diff --git a/queue-5.18/zonefs-fix-zonefs_iomap_begin-for-reads.patch b/queue-5.18/zonefs-fix-zonefs_iomap_begin-for-reads.patch new file mode 100644 index 00000000000..8d2b5a63fec --- /dev/null +++ b/queue-5.18/zonefs-fix-zonefs_iomap_begin-for-reads.patch @@ -0,0 +1,261 @@ +From c1c1204c0d0c1dccc1310b9277fb2bd8b663d8fe Mon Sep 17 00:00:00 2001 +From: Damien Le Moal +Date: Mon, 23 May 2022 16:29:10 +0900 +Subject: zonefs: fix zonefs_iomap_begin() for reads + +From: Damien Le Moal + +commit c1c1204c0d0c1dccc1310b9277fb2bd8b663d8fe upstream. + +If a readahead is issued to a sequential zone file with an offset +exactly equal to the current file size, the iomap type is set to +IOMAP_UNWRITTEN, which will prevent an IO, but the iomap length is +calculated as 0. This causes a WARN_ON() in iomap_iter(): + +[17309.548939] WARNING: CPU: 3 PID: 2137 at fs/iomap/iter.c:34 iomap_iter+0x9cf/0xe80 +[...] +[17309.650907] RIP: 0010:iomap_iter+0x9cf/0xe80 +[...] +[17309.754560] Call Trace: +[17309.757078] +[17309.759240] ? lock_is_held_type+0xd8/0x130 +[17309.763531] iomap_readahead+0x1a8/0x870 +[17309.767550] ? iomap_read_folio+0x4c0/0x4c0 +[17309.771817] ? lockdep_hardirqs_on_prepare+0x400/0x400 +[17309.778848] ? lock_release+0x370/0x750 +[17309.784462] ? folio_add_lru+0x217/0x3f0 +[17309.790220] ? reacquire_held_locks+0x4e0/0x4e0 +[17309.796543] read_pages+0x17d/0xb60 +[17309.801854] ? folio_add_lru+0x238/0x3f0 +[17309.807573] ? readahead_expand+0x5f0/0x5f0 +[17309.813554] ? policy_node+0xb5/0x140 +[17309.819018] page_cache_ra_unbounded+0x27d/0x450 +[17309.825439] filemap_get_pages+0x500/0x1450 +[17309.831444] ? filemap_add_folio+0x140/0x140 +[17309.837519] ? lock_is_held_type+0xd8/0x130 +[17309.843509] filemap_read+0x28c/0x9f0 +[17309.848953] ? zonefs_file_read_iter+0x1ea/0x4d0 [zonefs] +[17309.856162] ? trace_contention_end+0xd6/0x130 +[17309.862416] ? __mutex_lock+0x221/0x1480 +[17309.868151] ? zonefs_file_read_iter+0x166/0x4d0 [zonefs] +[17309.875364] ? filemap_get_pages+0x1450/0x1450 +[17309.881647] ? __mutex_unlock_slowpath+0x15e/0x620 +[17309.888248] ? wait_for_completion_io_timeout+0x20/0x20 +[17309.895231] ? lock_is_held_type+0xd8/0x130 +[17309.901115] ? lock_is_held_type+0xd8/0x130 +[17309.906934] zonefs_file_read_iter+0x356/0x4d0 [zonefs] +[17309.913750] new_sync_read+0x2d8/0x520 +[17309.919035] ? __x64_sys_lseek+0x1d0/0x1d0 + +Furthermore, this causes iomap_readahead() to loop forever as +iomap_readahead_iter() always returns 0, making no progress. + +Fix this by treating reads after the file size as access to holes, +setting the iomap type to IOMAP_HOLE, the iomap addr to IOMAP_NULL_ADDR +and using the length argument as is for the iomap length. To simplify +the code with this change, zonefs_iomap_begin() is split into the read +variant, zonefs_read_iomap_begin() and zonefs_read_iomap_ops, and the +write variant, zonefs_write_iomap_begin() and zonefs_write_iomap_ops. + +Reported-by: Jorgen Hansen +Fixes: 8dcc1a9d90c1 ("fs: New zonefs file system") +Signed-off-by: Damien Le Moal +Reviewed-by: Christoph Hellwig +Reviewed-by: Johannes Thumshirn +Reviewed-by: Jorgen Hansen +Signed-off-by: Greg Kroah-Hartman +--- + fs/zonefs/super.c | 94 ++++++++++++++++++++++++++++++++++++------------------ + 1 file changed, 64 insertions(+), 30 deletions(-) + +--- a/fs/zonefs/super.c ++++ b/fs/zonefs/super.c +@@ -72,15 +72,51 @@ static inline void zonefs_i_size_write(s + zi->i_flags &= ~ZONEFS_ZONE_OPEN; + } + +-static int zonefs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, +- unsigned int flags, struct iomap *iomap, +- struct iomap *srcmap) ++static int zonefs_read_iomap_begin(struct inode *inode, loff_t offset, ++ loff_t length, unsigned int flags, ++ struct iomap *iomap, struct iomap *srcmap) + { + struct zonefs_inode_info *zi = ZONEFS_I(inode); + struct super_block *sb = inode->i_sb; + loff_t isize; + +- /* All I/Os should always be within the file maximum size */ ++ /* ++ * All blocks are always mapped below EOF. If reading past EOF, ++ * act as if there is a hole up to the file maximum size. ++ */ ++ mutex_lock(&zi->i_truncate_mutex); ++ iomap->bdev = inode->i_sb->s_bdev; ++ iomap->offset = ALIGN_DOWN(offset, sb->s_blocksize); ++ isize = i_size_read(inode); ++ if (iomap->offset >= isize) { ++ iomap->type = IOMAP_HOLE; ++ iomap->addr = IOMAP_NULL_ADDR; ++ iomap->length = length; ++ } else { ++ iomap->type = IOMAP_MAPPED; ++ iomap->addr = (zi->i_zsector << SECTOR_SHIFT) + iomap->offset; ++ iomap->length = isize - iomap->offset; ++ } ++ mutex_unlock(&zi->i_truncate_mutex); ++ ++ trace_zonefs_iomap_begin(inode, iomap); ++ ++ return 0; ++} ++ ++static const struct iomap_ops zonefs_read_iomap_ops = { ++ .iomap_begin = zonefs_read_iomap_begin, ++}; ++ ++static int zonefs_write_iomap_begin(struct inode *inode, loff_t offset, ++ loff_t length, unsigned int flags, ++ struct iomap *iomap, struct iomap *srcmap) ++{ ++ struct zonefs_inode_info *zi = ZONEFS_I(inode); ++ struct super_block *sb = inode->i_sb; ++ loff_t isize; ++ ++ /* All write I/Os should always be within the file maximum size */ + if (WARN_ON_ONCE(offset + length > zi->i_max_size)) + return -EIO; + +@@ -90,7 +126,7 @@ static int zonefs_iomap_begin(struct ino + * operation. + */ + if (WARN_ON_ONCE(zi->i_ztype == ZONEFS_ZTYPE_SEQ && +- (flags & IOMAP_WRITE) && !(flags & IOMAP_DIRECT))) ++ !(flags & IOMAP_DIRECT))) + return -EIO; + + /* +@@ -99,47 +135,44 @@ static int zonefs_iomap_begin(struct ino + * write pointer) and unwriten beyond. + */ + mutex_lock(&zi->i_truncate_mutex); ++ iomap->bdev = inode->i_sb->s_bdev; ++ iomap->offset = ALIGN_DOWN(offset, sb->s_blocksize); ++ iomap->addr = (zi->i_zsector << SECTOR_SHIFT) + iomap->offset; + isize = i_size_read(inode); +- if (offset >= isize) ++ if (iomap->offset >= isize) { + iomap->type = IOMAP_UNWRITTEN; +- else ++ iomap->length = zi->i_max_size - iomap->offset; ++ } else { + iomap->type = IOMAP_MAPPED; +- if (flags & IOMAP_WRITE) +- length = zi->i_max_size - offset; +- else +- length = min(length, isize - offset); ++ iomap->length = isize - iomap->offset; ++ } + mutex_unlock(&zi->i_truncate_mutex); + +- iomap->offset = ALIGN_DOWN(offset, sb->s_blocksize); +- iomap->length = ALIGN(offset + length, sb->s_blocksize) - iomap->offset; +- iomap->bdev = inode->i_sb->s_bdev; +- iomap->addr = (zi->i_zsector << SECTOR_SHIFT) + iomap->offset; +- + trace_zonefs_iomap_begin(inode, iomap); + + return 0; + } + +-static const struct iomap_ops zonefs_iomap_ops = { +- .iomap_begin = zonefs_iomap_begin, ++static const struct iomap_ops zonefs_write_iomap_ops = { ++ .iomap_begin = zonefs_write_iomap_begin, + }; + + static int zonefs_readpage(struct file *unused, struct page *page) + { +- return iomap_readpage(page, &zonefs_iomap_ops); ++ return iomap_readpage(page, &zonefs_read_iomap_ops); + } + + static void zonefs_readahead(struct readahead_control *rac) + { +- iomap_readahead(rac, &zonefs_iomap_ops); ++ iomap_readahead(rac, &zonefs_read_iomap_ops); + } + + /* + * Map blocks for page writeback. This is used only on conventional zone files, + * which implies that the page range can only be within the fixed inode size. + */ +-static int zonefs_map_blocks(struct iomap_writepage_ctx *wpc, +- struct inode *inode, loff_t offset) ++static int zonefs_write_map_blocks(struct iomap_writepage_ctx *wpc, ++ struct inode *inode, loff_t offset) + { + struct zonefs_inode_info *zi = ZONEFS_I(inode); + +@@ -153,12 +186,12 @@ static int zonefs_map_blocks(struct ioma + offset < wpc->iomap.offset + wpc->iomap.length) + return 0; + +- return zonefs_iomap_begin(inode, offset, zi->i_max_size - offset, +- IOMAP_WRITE, &wpc->iomap, NULL); ++ return zonefs_write_iomap_begin(inode, offset, zi->i_max_size - offset, ++ IOMAP_WRITE, &wpc->iomap, NULL); + } + + static const struct iomap_writeback_ops zonefs_writeback_ops = { +- .map_blocks = zonefs_map_blocks, ++ .map_blocks = zonefs_write_map_blocks, + }; + + static int zonefs_writepage(struct page *page, struct writeback_control *wbc) +@@ -188,7 +221,8 @@ static int zonefs_swap_activate(struct s + return -EINVAL; + } + +- return iomap_swapfile_activate(sis, swap_file, span, &zonefs_iomap_ops); ++ return iomap_swapfile_activate(sis, swap_file, span, ++ &zonefs_read_iomap_ops); + } + + static const struct address_space_operations zonefs_file_aops = { +@@ -607,7 +641,7 @@ static vm_fault_t zonefs_filemap_page_mk + + /* Serialize against truncates */ + filemap_invalidate_lock_shared(inode->i_mapping); +- ret = iomap_page_mkwrite(vmf, &zonefs_iomap_ops); ++ ret = iomap_page_mkwrite(vmf, &zonefs_write_iomap_ops); + filemap_invalidate_unlock_shared(inode->i_mapping); + + sb_end_pagefault(inode->i_sb); +@@ -860,7 +894,7 @@ static ssize_t zonefs_file_dio_write(str + if (append) + ret = zonefs_file_dio_append(iocb, from); + else +- ret = iomap_dio_rw(iocb, from, &zonefs_iomap_ops, ++ ret = iomap_dio_rw(iocb, from, &zonefs_write_iomap_ops, + &zonefs_write_dio_ops, 0, 0); + if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && + (ret > 0 || ret == -EIOCBQUEUED)) { +@@ -902,7 +936,7 @@ static ssize_t zonefs_file_buffered_writ + if (ret <= 0) + goto inode_unlock; + +- ret = iomap_file_buffered_write(iocb, from, &zonefs_iomap_ops); ++ ret = iomap_file_buffered_write(iocb, from, &zonefs_write_iomap_ops); + if (ret > 0) + iocb->ki_pos += ret; + else if (ret == -EIO) +@@ -995,7 +1029,7 @@ static ssize_t zonefs_file_read_iter(str + goto inode_unlock; + } + file_accessed(iocb->ki_filp); +- ret = iomap_dio_rw(iocb, to, &zonefs_iomap_ops, ++ ret = iomap_dio_rw(iocb, to, &zonefs_read_iomap_ops, + &zonefs_read_dio_ops, 0, 0); + } else { + ret = generic_file_read_iter(iocb, to); -- 2.47.3