From: Greg Kroah-Hartman Date: Wed, 30 Nov 2022 17:39:51 +0000 (+0100) Subject: 6.0-stable patches X-Git-Tag: v5.10.157~19 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=a661cb13d386809dd1b8636c4d5f58b96cac5871;p=thirdparty%2Fkernel%2Fstable-queue.git 6.0-stable patches added patches: btrfs-do-not-modify-log-tree-while-holding-a-leaf-from-fs-tree-locked.patch btrfs-free-btrfs_path-before-copying-fspath-to-userspace.patch btrfs-free-btrfs_path-before-copying-inodes-to-userspace.patch btrfs-free-btrfs_path-before-copying-root-refs-to-userspace.patch btrfs-free-btrfs_path-before-copying-subvol-info-to-userspace.patch btrfs-sysfs-normalize-the-error-handling-branch-in-btrfs_init_sysfs.patch btrfs-use-kvcalloc-in-btrfs_get_dev_zone_info.patch btrfs-zoned-fix-missing-endianness-conversion-in-sb_write_pointer.patch drm-amd-amdgpu-reserve-vm-invalidation-engine-for-firmware.patch drm-amd-dc-dce120-fix-audio-register-mapping-stop-triggering-kasan.patch drm-amd-display-no-display-after-resume-from-wb-cb.patch drm-amd-display-update-soc-bounding-box-for-dcn32-dcn321.patch drm-amdgpu-always-register-an-mmu-notifier-for-userptr.patch drm-amdgpu-enable-aldebaran-devices-to-report-cu-occupancy.patch drm-amdgpu-partially-revert-drm-amdgpu-update-drm_display_info-correctly-when-the-edid-is-read.patch drm-amdgpu-psp-don-t-free-psp-buffers-on-suspend.patch drm-display-dp_mst-fix-drm_dp_mst_add_affected_dsc_crtcs-return-code.patch drm-i915-ttm-never-purge-busy-objects.patch --- diff --git a/queue-6.0/btrfs-do-not-modify-log-tree-while-holding-a-leaf-from-fs-tree-locked.patch b/queue-6.0/btrfs-do-not-modify-log-tree-while-holding-a-leaf-from-fs-tree-locked.patch new file mode 100644 index 00000000000..eb60178b23e --- /dev/null +++ b/queue-6.0/btrfs-do-not-modify-log-tree-while-holding-a-leaf-from-fs-tree-locked.patch @@ -0,0 +1,298 @@ +From 796787c978efbbdb50e245718c784eb94f59eac4 Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Mon, 21 Nov 2022 10:23:22 +0000 +Subject: btrfs: do not modify log tree while holding a leaf from fs tree locked + +From: Filipe Manana + +commit 796787c978efbbdb50e245718c784eb94f59eac4 upstream. + +When logging an inode in full mode, or when logging xattrs or when logging +the dir index items of a directory, we are modifying the log tree while +holding a read lock on a leaf from the fs/subvolume tree. This can lead to +a deadlock in rare circumstances, but it is a real possibility, and it was +recently reported by syzbot with the following trace from lockdep: + + WARNING: possible circular locking dependency detected + 6.1.0-rc5-next-20221116-syzkaller #0 Not tainted + ------------------------------------------------------ + syz-executor.1/16154 is trying to acquire lock: + ffff88807e3084a0 (&delayed_node->mutex){+.+.}-{3:3}, at: __btrfs_release_delayed_node.part.0+0xa1/0xf30 fs/btrfs/delayed-inode.c:256 + + but task is already holding lock: + ffff88807df33078 (btrfs-log-00){++++}-{3:3}, at: __btrfs_tree_lock+0x32/0x3d0 fs/btrfs/locking.c:197 + + which lock already depends on the new lock. + + the existing dependency chain (in reverse order) is: + + -> #2 (btrfs-log-00){++++}-{3:3}: + down_read_nested+0x9e/0x450 kernel/locking/rwsem.c:1634 + __btrfs_tree_read_lock+0x32/0x350 fs/btrfs/locking.c:135 + btrfs_tree_read_lock fs/btrfs/locking.c:141 [inline] + btrfs_read_lock_root_node+0x82/0x3a0 fs/btrfs/locking.c:280 + btrfs_search_slot_get_root fs/btrfs/ctree.c:1678 [inline] + btrfs_search_slot+0x3ca/0x2c70 fs/btrfs/ctree.c:1998 + btrfs_lookup_csum+0x116/0x3f0 fs/btrfs/file-item.c:209 + btrfs_csum_file_blocks+0x40e/0x1370 fs/btrfs/file-item.c:1021 + log_csums.isra.0+0x244/0x2d0 fs/btrfs/tree-log.c:4258 + copy_items.isra.0+0xbfb/0xed0 fs/btrfs/tree-log.c:4403 + copy_inode_items_to_log+0x13d6/0x1d90 fs/btrfs/tree-log.c:5873 + btrfs_log_inode+0xb19/0x4680 fs/btrfs/tree-log.c:6495 + btrfs_log_inode_parent+0x890/0x2a20 fs/btrfs/tree-log.c:6982 + btrfs_log_dentry_safe+0x59/0x80 fs/btrfs/tree-log.c:7083 + btrfs_sync_file+0xa41/0x13c0 fs/btrfs/file.c:1921 + vfs_fsync_range+0x13e/0x230 fs/sync.c:188 + generic_write_sync include/linux/fs.h:2856 [inline] + iomap_dio_complete+0x73a/0x920 fs/iomap/direct-io.c:128 + btrfs_direct_write fs/btrfs/file.c:1536 [inline] + btrfs_do_write_iter+0xba2/0x1470 fs/btrfs/file.c:1668 + call_write_iter include/linux/fs.h:2160 [inline] + do_iter_readv_writev+0x20b/0x3b0 fs/read_write.c:735 + do_iter_write+0x182/0x700 fs/read_write.c:861 + vfs_iter_write+0x74/0xa0 fs/read_write.c:902 + iter_file_splice_write+0x745/0xc90 fs/splice.c:686 + do_splice_from fs/splice.c:764 [inline] + direct_splice_actor+0x114/0x180 fs/splice.c:931 + splice_direct_to_actor+0x335/0x8a0 fs/splice.c:886 + do_splice_direct+0x1ab/0x280 fs/splice.c:974 + do_sendfile+0xb19/0x1270 fs/read_write.c:1255 + __do_sys_sendfile64 fs/read_write.c:1323 [inline] + __se_sys_sendfile64 fs/read_write.c:1309 [inline] + __x64_sys_sendfile64+0x259/0x2c0 fs/read_write.c:1309 + do_syscall_x64 arch/x86/entry/common.c:50 [inline] + do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80 + entry_SYSCALL_64_after_hwframe+0x63/0xcd + + -> #1 (btrfs-tree-00){++++}-{3:3}: + __lock_release kernel/locking/lockdep.c:5382 [inline] + lock_release+0x371/0x810 kernel/locking/lockdep.c:5688 + up_write+0x2a/0x520 kernel/locking/rwsem.c:1614 + btrfs_tree_unlock_rw fs/btrfs/locking.h:189 [inline] + btrfs_unlock_up_safe+0x1e3/0x290 fs/btrfs/locking.c:238 + search_leaf fs/btrfs/ctree.c:1832 [inline] + btrfs_search_slot+0x265e/0x2c70 fs/btrfs/ctree.c:2074 + btrfs_insert_empty_items+0xbd/0x1c0 fs/btrfs/ctree.c:4133 + btrfs_insert_delayed_item+0x826/0xfa0 fs/btrfs/delayed-inode.c:746 + btrfs_insert_delayed_items fs/btrfs/delayed-inode.c:824 [inline] + __btrfs_commit_inode_delayed_items fs/btrfs/delayed-inode.c:1111 [inline] + __btrfs_run_delayed_items+0x280/0x590 fs/btrfs/delayed-inode.c:1153 + flush_space+0x147/0xe90 fs/btrfs/space-info.c:728 + btrfs_async_reclaim_metadata_space+0x541/0xc10 fs/btrfs/space-info.c:1086 + process_one_work+0x9bf/0x1710 kernel/workqueue.c:2289 + worker_thread+0x669/0x1090 kernel/workqueue.c:2436 + kthread+0x2e8/0x3a0 kernel/kthread.c:376 + ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:308 + + -> #0 (&delayed_node->mutex){+.+.}-{3:3}: + check_prev_add kernel/locking/lockdep.c:3097 [inline] + check_prevs_add kernel/locking/lockdep.c:3216 [inline] + validate_chain kernel/locking/lockdep.c:3831 [inline] + __lock_acquire+0x2a43/0x56d0 kernel/locking/lockdep.c:5055 + lock_acquire kernel/locking/lockdep.c:5668 [inline] + lock_acquire+0x1e3/0x630 kernel/locking/lockdep.c:5633 + __mutex_lock_common kernel/locking/mutex.c:603 [inline] + __mutex_lock+0x12f/0x1360 kernel/locking/mutex.c:747 + __btrfs_release_delayed_node.part.0+0xa1/0xf30 fs/btrfs/delayed-inode.c:256 + __btrfs_release_delayed_node fs/btrfs/delayed-inode.c:251 [inline] + btrfs_release_delayed_node fs/btrfs/delayed-inode.c:281 [inline] + btrfs_remove_delayed_node+0x52/0x60 fs/btrfs/delayed-inode.c:1285 + btrfs_evict_inode+0x511/0xf30 fs/btrfs/inode.c:5554 + evict+0x2ed/0x6b0 fs/inode.c:664 + dispose_list+0x117/0x1e0 fs/inode.c:697 + prune_icache_sb+0xeb/0x150 fs/inode.c:896 + super_cache_scan+0x391/0x590 fs/super.c:106 + do_shrink_slab+0x464/0xce0 mm/vmscan.c:843 + shrink_slab_memcg mm/vmscan.c:912 [inline] + shrink_slab+0x388/0x660 mm/vmscan.c:991 + shrink_node_memcgs mm/vmscan.c:6088 [inline] + shrink_node+0x93d/0x1f30 mm/vmscan.c:6117 + shrink_zones mm/vmscan.c:6355 [inline] + do_try_to_free_pages+0x3b4/0x17a0 mm/vmscan.c:6417 + try_to_free_mem_cgroup_pages+0x3a4/0xa70 mm/vmscan.c:6732 + reclaim_high.constprop.0+0x182/0x230 mm/memcontrol.c:2393 + mem_cgroup_handle_over_high+0x190/0x520 mm/memcontrol.c:2578 + try_charge_memcg+0xe0c/0x12f0 mm/memcontrol.c:2816 + try_charge mm/memcontrol.c:2827 [inline] + charge_memcg+0x90/0x3b0 mm/memcontrol.c:6889 + __mem_cgroup_charge+0x2b/0x90 mm/memcontrol.c:6910 + mem_cgroup_charge include/linux/memcontrol.h:667 [inline] + __filemap_add_folio+0x615/0xf80 mm/filemap.c:852 + filemap_add_folio+0xaf/0x1e0 mm/filemap.c:934 + __filemap_get_folio+0x389/0xd80 mm/filemap.c:1976 + pagecache_get_page+0x2e/0x280 mm/folio-compat.c:104 + find_or_create_page include/linux/pagemap.h:612 [inline] + alloc_extent_buffer+0x2b9/0x1580 fs/btrfs/extent_io.c:4588 + btrfs_init_new_buffer fs/btrfs/extent-tree.c:4869 [inline] + btrfs_alloc_tree_block+0x2e1/0x1320 fs/btrfs/extent-tree.c:4988 + __btrfs_cow_block+0x3b2/0x1420 fs/btrfs/ctree.c:440 + btrfs_cow_block+0x2fa/0x950 fs/btrfs/ctree.c:595 + btrfs_search_slot+0x11b0/0x2c70 fs/btrfs/ctree.c:2038 + btrfs_update_root+0xdb/0x630 fs/btrfs/root-tree.c:137 + update_log_root fs/btrfs/tree-log.c:2841 [inline] + btrfs_sync_log+0xbfb/0x2870 fs/btrfs/tree-log.c:3064 + btrfs_sync_file+0xdb9/0x13c0 fs/btrfs/file.c:1947 + vfs_fsync_range+0x13e/0x230 fs/sync.c:188 + generic_write_sync include/linux/fs.h:2856 [inline] + iomap_dio_complete+0x73a/0x920 fs/iomap/direct-io.c:128 + btrfs_direct_write fs/btrfs/file.c:1536 [inline] + btrfs_do_write_iter+0xba2/0x1470 fs/btrfs/file.c:1668 + call_write_iter include/linux/fs.h:2160 [inline] + do_iter_readv_writev+0x20b/0x3b0 fs/read_write.c:735 + do_iter_write+0x182/0x700 fs/read_write.c:861 + vfs_iter_write+0x74/0xa0 fs/read_write.c:902 + iter_file_splice_write+0x745/0xc90 fs/splice.c:686 + do_splice_from fs/splice.c:764 [inline] + direct_splice_actor+0x114/0x180 fs/splice.c:931 + splice_direct_to_actor+0x335/0x8a0 fs/splice.c:886 + do_splice_direct+0x1ab/0x280 fs/splice.c:974 + do_sendfile+0xb19/0x1270 fs/read_write.c:1255 + __do_sys_sendfile64 fs/read_write.c:1323 [inline] + __se_sys_sendfile64 fs/read_write.c:1309 [inline] + __x64_sys_sendfile64+0x259/0x2c0 fs/read_write.c:1309 + do_syscall_x64 arch/x86/entry/common.c:50 [inline] + do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80 + entry_SYSCALL_64_after_hwframe+0x63/0xcd + + other info that might help us debug this: + + Chain exists of: + &delayed_node->mutex --> btrfs-tree-00 --> btrfs-log-00 + + Possible unsafe locking scenario: + + CPU0 CPU1 + ---- ---- + lock(btrfs-log-00); + lock(btrfs-tree-00); + lock(btrfs-log-00); + lock(&delayed_node->mutex); + +Holding a read lock on a leaf from a fs/subvolume tree creates a nasty +lock dependency when we are COWing extent buffers for the log tree and we +have two tasks modifying the log tree, with each one in one of the +following 2 scenarios: + +1) Modifying the log tree triggers an extent buffer allocation while + holding a write lock on a parent extent buffer from the log tree. + Allocating the pages for an extent buffer, or the extent buffer + struct, can trigger inode eviction and finally the inode eviction + will trigger a release/remove of a delayed node, which requires + taking the delayed node's mutex; + +2) Allocating a metadata extent for a log tree can trigger the async + reclaim thread and make us wait for it to release enough space and + unblock our reservation ticket. The reclaim thread can start flushing + delayed items, and that in turn results in the need to lock delayed + node mutexes and in the need to write lock extent buffers of a + subvolume tree - all this while holding a write lock on the parent + extent buffer in the log tree. + +So one task in scenario 1) running in parallel with another task in +scenario 2) could lead to a deadlock, one wanting to lock a delayed node +mutex while having a read lock on a leaf from the subvolume, while the +other is holding the delayed node's mutex and wants to write lock the same +subvolume leaf for flushing delayed items. + +Fix this by cloning the leaf of the fs/subvolume tree, release/unlock the +fs/subvolume leaf and use the clone leaf instead. + +Reported-by: syzbot+9b7c21f486f5e7f8d029@syzkaller.appspotmail.com +Link: https://lore.kernel.org/linux-btrfs/000000000000ccc93c05edc4d8cf@google.com/ +CC: stable@vger.kernel.org # 6.0+ +Reviewed-by: Josef Bacik +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/tree-log.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++---- + 1 file changed, 55 insertions(+), 4 deletions(-) + +--- a/fs/btrfs/tree-log.c ++++ b/fs/btrfs/tree-log.c +@@ -3834,15 +3834,29 @@ static int process_dir_items_leaf(struct + u64 *last_old_dentry_offset) + { + struct btrfs_root *log = inode->root->log_root; +- struct extent_buffer *src = path->nodes[0]; +- const int nritems = btrfs_header_nritems(src); ++ struct extent_buffer *src; ++ const int nritems = btrfs_header_nritems(path->nodes[0]); + const u64 ino = btrfs_ino(inode); + bool last_found = false; + int batch_start = 0; + int batch_size = 0; + int i; + +- for (i = path->slots[0]; i < nritems; i++) { ++ /* ++ * We need to clone the leaf, release the read lock on it, and use the ++ * clone before modifying the log tree. See the comment at copy_items() ++ * about why we need to do this. ++ */ ++ src = btrfs_clone_extent_buffer(path->nodes[0]); ++ if (!src) ++ return -ENOMEM; ++ ++ i = path->slots[0]; ++ btrfs_release_path(path); ++ path->nodes[0] = src; ++ path->slots[0] = i; ++ ++ for (; i < nritems; i++) { + struct btrfs_dir_item *di; + struct btrfs_key key; + int ret; +@@ -4414,7 +4428,7 @@ static noinline int copy_items(struct bt + { + struct btrfs_root *log = inode->root->log_root; + struct btrfs_file_extent_item *extent; +- struct extent_buffer *src = src_path->nodes[0]; ++ struct extent_buffer *src; + int ret = 0; + struct btrfs_key *ins_keys; + u32 *ins_sizes; +@@ -4425,6 +4439,43 @@ static noinline int copy_items(struct bt + const bool skip_csum = (inode->flags & BTRFS_INODE_NODATASUM); + const u64 i_size = i_size_read(&inode->vfs_inode); + ++ /* ++ * To keep lockdep happy and avoid deadlocks, clone the source leaf and ++ * use the clone. This is because otherwise we would be changing the log ++ * tree, to insert items from the subvolume tree or insert csum items, ++ * while holding a read lock on a leaf from the subvolume tree, which ++ * creates a nasty lock dependency when COWing log tree nodes/leaves: ++ * ++ * 1) Modifying the log tree triggers an extent buffer allocation while ++ * holding a write lock on a parent extent buffer from the log tree. ++ * Allocating the pages for an extent buffer, or the extent buffer ++ * struct, can trigger inode eviction and finally the inode eviction ++ * will trigger a release/remove of a delayed node, which requires ++ * taking the delayed node's mutex; ++ * ++ * 2) Allocating a metadata extent for a log tree can trigger the async ++ * reclaim thread and make us wait for it to release enough space and ++ * unblock our reservation ticket. The reclaim thread can start ++ * flushing delayed items, and that in turn results in the need to ++ * lock delayed node mutexes and in the need to write lock extent ++ * buffers of a subvolume tree - all this while holding a write lock ++ * on the parent extent buffer in the log tree. ++ * ++ * So one task in scenario 1) running in parallel with another task in ++ * scenario 2) could lead to a deadlock, one wanting to lock a delayed ++ * node mutex while having a read lock on a leaf from the subvolume, ++ * while the other is holding the delayed node's mutex and wants to ++ * write lock the same subvolume leaf for flushing delayed items. ++ */ ++ src = btrfs_clone_extent_buffer(src_path->nodes[0]); ++ if (!src) ++ return -ENOMEM; ++ ++ i = src_path->slots[0]; ++ btrfs_release_path(src_path); ++ src_path->nodes[0] = src; ++ src_path->slots[0] = i; ++ + ins_data = kmalloc(nr * sizeof(struct btrfs_key) + + nr * sizeof(u32), GFP_NOFS); + if (!ins_data) diff --git a/queue-6.0/btrfs-free-btrfs_path-before-copying-fspath-to-userspace.patch b/queue-6.0/btrfs-free-btrfs_path-before-copying-fspath-to-userspace.patch new file mode 100644 index 00000000000..1822af559e0 --- /dev/null +++ b/queue-6.0/btrfs-free-btrfs_path-before-copying-fspath-to-userspace.patch @@ -0,0 +1,35 @@ +From 8cf96b409d9b3946ece58ced13f92d0f775b0442 Mon Sep 17 00:00:00 2001 +From: Anand Jain +Date: Thu, 10 Nov 2022 11:36:29 +0530 +Subject: btrfs: free btrfs_path before copying fspath to userspace + +From: Anand Jain + +commit 8cf96b409d9b3946ece58ced13f92d0f775b0442 upstream. + +btrfs_ioctl_ino_to_path() frees the search path after the userspace copy +from the temp buffer @ipath->fspath. Which potentially can lead to a lock +splat warning. + +Fix this by freeing the path before we copy it to userspace. + +CC: stable@vger.kernel.org # 4.19+ +Signed-off-by: Anand Jain +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/ioctl.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/fs/btrfs/ioctl.c ++++ b/fs/btrfs/ioctl.c +@@ -4232,6 +4232,8 @@ static long btrfs_ioctl_ino_to_path(stru + ipath->fspath->val[i] = rel_ptr; + } + ++ btrfs_free_path(path); ++ path = NULL; + ret = copy_to_user((void __user *)(unsigned long)ipa->fspath, + ipath->fspath, size); + if (ret) { diff --git a/queue-6.0/btrfs-free-btrfs_path-before-copying-inodes-to-userspace.patch b/queue-6.0/btrfs-free-btrfs_path-before-copying-inodes-to-userspace.patch new file mode 100644 index 00000000000..99f0781719c --- /dev/null +++ b/queue-6.0/btrfs-free-btrfs_path-before-copying-inodes-to-userspace.patch @@ -0,0 +1,63 @@ +From 418ffb9e3cf6c4e2574d3a732b724916684bd133 Mon Sep 17 00:00:00 2001 +From: Anand Jain +Date: Thu, 10 Nov 2022 11:36:28 +0530 +Subject: btrfs: free btrfs_path before copying inodes to userspace + +From: Anand Jain + +commit 418ffb9e3cf6c4e2574d3a732b724916684bd133 upstream. + +btrfs_ioctl_logical_to_ino() frees the search path after the userspace +copy from the temp buffer @inodes. Which potentially can lead to a lock +splat. + +Fix this by freeing the path before we copy @inodes to userspace. + +CC: stable@vger.kernel.org # 4.19+ +Signed-off-by: Anand Jain +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/ioctl.c | 16 +++++++--------- + 1 file changed, 7 insertions(+), 9 deletions(-) + +--- a/fs/btrfs/ioctl.c ++++ b/fs/btrfs/ioctl.c +@@ -4282,21 +4282,20 @@ static long btrfs_ioctl_logical_to_ino(s + size = min_t(u32, loi->size, SZ_16M); + } + +- path = btrfs_alloc_path(); +- if (!path) { +- ret = -ENOMEM; +- goto out; +- } +- + inodes = init_data_container(size); + if (IS_ERR(inodes)) { + ret = PTR_ERR(inodes); +- inodes = NULL; +- goto out; ++ goto out_loi; + } + ++ path = btrfs_alloc_path(); ++ if (!path) { ++ ret = -ENOMEM; ++ goto out; ++ } + ret = iterate_inodes_from_logical(loi->logical, fs_info, path, + inodes, ignore_offset); ++ btrfs_free_path(path); + if (ret == -EINVAL) + ret = -ENOENT; + if (ret < 0) +@@ -4308,7 +4307,6 @@ static long btrfs_ioctl_logical_to_ino(s + ret = -EFAULT; + + out: +- btrfs_free_path(path); + kvfree(inodes); + out_loi: + kfree(loi); diff --git a/queue-6.0/btrfs-free-btrfs_path-before-copying-root-refs-to-userspace.patch b/queue-6.0/btrfs-free-btrfs_path-before-copying-root-refs-to-userspace.patch new file mode 100644 index 00000000000..be035864901 --- /dev/null +++ b/queue-6.0/btrfs-free-btrfs_path-before-copying-root-refs-to-userspace.patch @@ -0,0 +1,210 @@ +From b740d806166979488e798e41743aaec051f2443f Mon Sep 17 00:00:00 2001 +From: Josef Bacik +Date: Mon, 7 Nov 2022 11:44:51 -0500 +Subject: btrfs: free btrfs_path before copying root refs to userspace + +From: Josef Bacik + +commit b740d806166979488e798e41743aaec051f2443f upstream. + +Syzbot reported the following lockdep splat + +====================================================== +WARNING: possible circular locking dependency detected +6.0.0-rc7-syzkaller-18095-gbbed346d5a96 #0 Not tainted +------------------------------------------------------ +syz-executor307/3029 is trying to acquire lock: +ffff0000c02525d8 (&mm->mmap_lock){++++}-{3:3}, at: __might_fault+0x54/0xb4 mm/memory.c:5576 + +but task is already holding lock: +ffff0000c958a608 (btrfs-root-00){++++}-{3:3}, at: __btrfs_tree_read_lock fs/btrfs/locking.c:134 [inline] +ffff0000c958a608 (btrfs-root-00){++++}-{3:3}, at: btrfs_tree_read_lock fs/btrfs/locking.c:140 [inline] +ffff0000c958a608 (btrfs-root-00){++++}-{3:3}, at: btrfs_read_lock_root_node+0x13c/0x1c0 fs/btrfs/locking.c:279 + +which lock already depends on the new lock. + +the existing dependency chain (in reverse order) is: + +-> #3 (btrfs-root-00){++++}-{3:3}: + down_read_nested+0x64/0x84 kernel/locking/rwsem.c:1624 + __btrfs_tree_read_lock fs/btrfs/locking.c:134 [inline] + btrfs_tree_read_lock fs/btrfs/locking.c:140 [inline] + btrfs_read_lock_root_node+0x13c/0x1c0 fs/btrfs/locking.c:279 + btrfs_search_slot_get_root+0x74/0x338 fs/btrfs/ctree.c:1637 + btrfs_search_slot+0x1b0/0xfd8 fs/btrfs/ctree.c:1944 + btrfs_update_root+0x6c/0x5a0 fs/btrfs/root-tree.c:132 + commit_fs_roots+0x1f0/0x33c fs/btrfs/transaction.c:1459 + btrfs_commit_transaction+0x89c/0x12d8 fs/btrfs/transaction.c:2343 + flush_space+0x66c/0x738 fs/btrfs/space-info.c:786 + btrfs_async_reclaim_metadata_space+0x43c/0x4e0 fs/btrfs/space-info.c:1059 + process_one_work+0x2d8/0x504 kernel/workqueue.c:2289 + worker_thread+0x340/0x610 kernel/workqueue.c:2436 + kthread+0x12c/0x158 kernel/kthread.c:376 + ret_from_fork+0x10/0x20 arch/arm64/kernel/entry.S:860 + +-> #2 (&fs_info->reloc_mutex){+.+.}-{3:3}: + __mutex_lock_common+0xd4/0xca8 kernel/locking/mutex.c:603 + __mutex_lock kernel/locking/mutex.c:747 [inline] + mutex_lock_nested+0x38/0x44 kernel/locking/mutex.c:799 + btrfs_record_root_in_trans fs/btrfs/transaction.c:516 [inline] + start_transaction+0x248/0x944 fs/btrfs/transaction.c:752 + btrfs_start_transaction+0x34/0x44 fs/btrfs/transaction.c:781 + btrfs_create_common+0xf0/0x1b4 fs/btrfs/inode.c:6651 + btrfs_create+0x8c/0xb0 fs/btrfs/inode.c:6697 + lookup_open fs/namei.c:3413 [inline] + open_last_lookups fs/namei.c:3481 [inline] + path_openat+0x804/0x11c4 fs/namei.c:3688 + do_filp_open+0xdc/0x1b8 fs/namei.c:3718 + do_sys_openat2+0xb8/0x22c fs/open.c:1313 + do_sys_open fs/open.c:1329 [inline] + __do_sys_openat fs/open.c:1345 [inline] + __se_sys_openat fs/open.c:1340 [inline] + __arm64_sys_openat+0xb0/0xe0 fs/open.c:1340 + __invoke_syscall arch/arm64/kernel/syscall.c:38 [inline] + invoke_syscall arch/arm64/kernel/syscall.c:52 [inline] + el0_svc_common+0x138/0x220 arch/arm64/kernel/syscall.c:142 + do_el0_svc+0x48/0x164 arch/arm64/kernel/syscall.c:206 + el0_svc+0x58/0x150 arch/arm64/kernel/entry-common.c:636 + el0t_64_sync_handler+0x84/0xf0 arch/arm64/kernel/entry-common.c:654 + el0t_64_sync+0x18c/0x190 arch/arm64/kernel/entry.S:581 + +-> #1 (sb_internal#2){.+.+}-{0:0}: + percpu_down_read include/linux/percpu-rwsem.h:51 [inline] + __sb_start_write include/linux/fs.h:1826 [inline] + sb_start_intwrite include/linux/fs.h:1948 [inline] + start_transaction+0x360/0x944 fs/btrfs/transaction.c:683 + btrfs_join_transaction+0x30/0x40 fs/btrfs/transaction.c:795 + btrfs_dirty_inode+0x50/0x140 fs/btrfs/inode.c:6103 + btrfs_update_time+0x1c0/0x1e8 fs/btrfs/inode.c:6145 + inode_update_time fs/inode.c:1872 [inline] + touch_atime+0x1f0/0x4a8 fs/inode.c:1945 + file_accessed include/linux/fs.h:2516 [inline] + btrfs_file_mmap+0x50/0x88 fs/btrfs/file.c:2407 + call_mmap include/linux/fs.h:2192 [inline] + mmap_region+0x7fc/0xc14 mm/mmap.c:1752 + do_mmap+0x644/0x97c mm/mmap.c:1540 + vm_mmap_pgoff+0xe8/0x1d0 mm/util.c:552 + ksys_mmap_pgoff+0x1cc/0x278 mm/mmap.c:1586 + __do_sys_mmap arch/arm64/kernel/sys.c:28 [inline] + __se_sys_mmap arch/arm64/kernel/sys.c:21 [inline] + __arm64_sys_mmap+0x58/0x6c arch/arm64/kernel/sys.c:21 + __invoke_syscall arch/arm64/kernel/syscall.c:38 [inline] + invoke_syscall arch/arm64/kernel/syscall.c:52 [inline] + el0_svc_common+0x138/0x220 arch/arm64/kernel/syscall.c:142 + do_el0_svc+0x48/0x164 arch/arm64/kernel/syscall.c:206 + el0_svc+0x58/0x150 arch/arm64/kernel/entry-common.c:636 + el0t_64_sync_handler+0x84/0xf0 arch/arm64/kernel/entry-common.c:654 + el0t_64_sync+0x18c/0x190 arch/arm64/kernel/entry.S:581 + +-> #0 (&mm->mmap_lock){++++}-{3:3}: + check_prev_add kernel/locking/lockdep.c:3095 [inline] + check_prevs_add kernel/locking/lockdep.c:3214 [inline] + validate_chain kernel/locking/lockdep.c:3829 [inline] + __lock_acquire+0x1530/0x30a4 kernel/locking/lockdep.c:5053 + lock_acquire+0x100/0x1f8 kernel/locking/lockdep.c:5666 + __might_fault+0x7c/0xb4 mm/memory.c:5577 + _copy_to_user include/linux/uaccess.h:134 [inline] + copy_to_user include/linux/uaccess.h:160 [inline] + btrfs_ioctl_get_subvol_rootref+0x3a8/0x4bc fs/btrfs/ioctl.c:3203 + btrfs_ioctl+0xa08/0xa64 fs/btrfs/ioctl.c:5556 + vfs_ioctl fs/ioctl.c:51 [inline] + __do_sys_ioctl fs/ioctl.c:870 [inline] + __se_sys_ioctl fs/ioctl.c:856 [inline] + __arm64_sys_ioctl+0xd0/0x140 fs/ioctl.c:856 + __invoke_syscall arch/arm64/kernel/syscall.c:38 [inline] + invoke_syscall arch/arm64/kernel/syscall.c:52 [inline] + el0_svc_common+0x138/0x220 arch/arm64/kernel/syscall.c:142 + do_el0_svc+0x48/0x164 arch/arm64/kernel/syscall.c:206 + el0_svc+0x58/0x150 arch/arm64/kernel/entry-common.c:636 + el0t_64_sync_handler+0x84/0xf0 arch/arm64/kernel/entry-common.c:654 + el0t_64_sync+0x18c/0x190 arch/arm64/kernel/entry.S:581 + +other info that might help us debug this: + +Chain exists of: + &mm->mmap_lock --> &fs_info->reloc_mutex --> btrfs-root-00 + + Possible unsafe locking scenario: + + CPU0 CPU1 + ---- ---- + lock(btrfs-root-00); + lock(&fs_info->reloc_mutex); + lock(btrfs-root-00); + lock(&mm->mmap_lock); + + *** DEADLOCK *** + +1 lock held by syz-executor307/3029: + #0: ffff0000c958a608 (btrfs-root-00){++++}-{3:3}, at: __btrfs_tree_read_lock fs/btrfs/locking.c:134 [inline] + #0: ffff0000c958a608 (btrfs-root-00){++++}-{3:3}, at: btrfs_tree_read_lock fs/btrfs/locking.c:140 [inline] + #0: ffff0000c958a608 (btrfs-root-00){++++}-{3:3}, at: btrfs_read_lock_root_node+0x13c/0x1c0 fs/btrfs/locking.c:279 + +stack backtrace: +CPU: 0 PID: 3029 Comm: syz-executor307 Not tainted 6.0.0-rc7-syzkaller-18095-gbbed346d5a96 #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/30/2022 +Call trace: + dump_backtrace+0x1c4/0x1f0 arch/arm64/kernel/stacktrace.c:156 + show_stack+0x2c/0x54 arch/arm64/kernel/stacktrace.c:163 + __dump_stack lib/dump_stack.c:88 [inline] + dump_stack_lvl+0x104/0x16c lib/dump_stack.c:106 + dump_stack+0x1c/0x58 lib/dump_stack.c:113 + print_circular_bug+0x2c4/0x2c8 kernel/locking/lockdep.c:2053 + check_noncircular+0x14c/0x154 kernel/locking/lockdep.c:2175 + check_prev_add kernel/locking/lockdep.c:3095 [inline] + check_prevs_add kernel/locking/lockdep.c:3214 [inline] + validate_chain kernel/locking/lockdep.c:3829 [inline] + __lock_acquire+0x1530/0x30a4 kernel/locking/lockdep.c:5053 + lock_acquire+0x100/0x1f8 kernel/locking/lockdep.c:5666 + __might_fault+0x7c/0xb4 mm/memory.c:5577 + _copy_to_user include/linux/uaccess.h:134 [inline] + copy_to_user include/linux/uaccess.h:160 [inline] + btrfs_ioctl_get_subvol_rootref+0x3a8/0x4bc fs/btrfs/ioctl.c:3203 + btrfs_ioctl+0xa08/0xa64 fs/btrfs/ioctl.c:5556 + vfs_ioctl fs/ioctl.c:51 [inline] + __do_sys_ioctl fs/ioctl.c:870 [inline] + __se_sys_ioctl fs/ioctl.c:856 [inline] + __arm64_sys_ioctl+0xd0/0x140 fs/ioctl.c:856 + __invoke_syscall arch/arm64/kernel/syscall.c:38 [inline] + invoke_syscall arch/arm64/kernel/syscall.c:52 [inline] + el0_svc_common+0x138/0x220 arch/arm64/kernel/syscall.c:142 + do_el0_svc+0x48/0x164 arch/arm64/kernel/syscall.c:206 + el0_svc+0x58/0x150 arch/arm64/kernel/entry-common.c:636 + el0t_64_sync_handler+0x84/0xf0 arch/arm64/kernel/entry-common.c:654 + el0t_64_sync+0x18c/0x190 arch/arm64/kernel/entry.S:581 + +We do generally the right thing here, copying the references into a +temporary buffer, however we are still holding the path when we do +copy_to_user from the temporary buffer. Fix this by freeing the path +before we copy to user space. + +Reported-by: syzbot+4ef9e52e464c6ff47d9d@syzkaller.appspotmail.com +CC: stable@vger.kernel.org # 4.19+ +Reviewed-by: Anand Jain +Signed-off-by: Josef Bacik +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/ioctl.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/fs/btrfs/ioctl.c ++++ b/fs/btrfs/ioctl.c +@@ -3194,6 +3194,8 @@ static int btrfs_ioctl_get_subvol_rootre + } + + out: ++ btrfs_free_path(path); ++ + if (!ret || ret == -EOVERFLOW) { + rootrefs->num_items = found; + /* update min_treeid for next search */ +@@ -3205,7 +3207,6 @@ out: + } + + kfree(rootrefs); +- btrfs_free_path(path); + + return ret; + } diff --git a/queue-6.0/btrfs-free-btrfs_path-before-copying-subvol-info-to-userspace.patch b/queue-6.0/btrfs-free-btrfs_path-before-copying-subvol-info-to-userspace.patch new file mode 100644 index 00000000000..32be34a80a1 --- /dev/null +++ b/queue-6.0/btrfs-free-btrfs_path-before-copying-subvol-info-to-userspace.patch @@ -0,0 +1,35 @@ +From 013c1c5585ebcfb19c88efe79063d0463b1b6159 Mon Sep 17 00:00:00 2001 +From: Anand Jain +Date: Thu, 10 Nov 2022 11:36:31 +0530 +Subject: btrfs: free btrfs_path before copying subvol info to userspace + +From: Anand Jain + +commit 013c1c5585ebcfb19c88efe79063d0463b1b6159 upstream. + +btrfs_ioctl_get_subvol_info() frees the search path after the userspace +copy from the temp buffer @subvol_info. This can lead to a lock splat +warning. + +Fix this by freeing the path before we copy it to userspace. + +CC: stable@vger.kernel.org # 4.19+ +Signed-off-by: Anand Jain +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/ioctl.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/fs/btrfs/ioctl.c ++++ b/fs/btrfs/ioctl.c +@@ -3105,6 +3105,8 @@ static int btrfs_ioctl_get_subvol_info(s + } + } + ++ btrfs_free_path(path); ++ path = NULL; + if (copy_to_user(argp, subvol_info, sizeof(*subvol_info))) + ret = -EFAULT; + diff --git a/queue-6.0/btrfs-sysfs-normalize-the-error-handling-branch-in-btrfs_init_sysfs.patch b/queue-6.0/btrfs-sysfs-normalize-the-error-handling-branch-in-btrfs_init_sysfs.patch new file mode 100644 index 00000000000..94bcd22b327 --- /dev/null +++ b/queue-6.0/btrfs-sysfs-normalize-the-error-handling-branch-in-btrfs_init_sysfs.patch @@ -0,0 +1,39 @@ +From ffdbb44f2f23f963b8f5672e35c3a26088177a62 Mon Sep 17 00:00:00 2001 +From: Zhen Lei +Date: Tue, 22 Nov 2022 19:50:02 +0800 +Subject: btrfs: sysfs: normalize the error handling branch in btrfs_init_sysfs() + +From: Zhen Lei + +commit ffdbb44f2f23f963b8f5672e35c3a26088177a62 upstream. + +Although kset_unregister() can eventually remove all attribute files, +explicitly rolling back with the matching function makes the code logic +look clearer. + +CC: stable@vger.kernel.org # 5.4+ +Reviewed-by: Qu Wenruo +Signed-off-by: Zhen Lei +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/sysfs.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/fs/btrfs/sysfs.c ++++ b/fs/btrfs/sysfs.c +@@ -2251,8 +2251,11 @@ int __init btrfs_init_sysfs(void) + + #ifdef CONFIG_BTRFS_DEBUG + ret = sysfs_create_group(&btrfs_kset->kobj, &btrfs_debug_feature_attr_group); +- if (ret) +- goto out2; ++ if (ret) { ++ sysfs_unmerge_group(&btrfs_kset->kobj, ++ &btrfs_static_feature_attr_group); ++ goto out_remove_group; ++ } + #endif + + return 0; diff --git a/queue-6.0/btrfs-use-kvcalloc-in-btrfs_get_dev_zone_info.patch b/queue-6.0/btrfs-use-kvcalloc-in-btrfs_get_dev_zone_info.patch new file mode 100644 index 00000000000..a187246edf0 --- /dev/null +++ b/queue-6.0/btrfs-use-kvcalloc-in-btrfs_get_dev_zone_info.patch @@ -0,0 +1,86 @@ +From 8fe97d47b52ae1ad130470b1780f0ded4ba609a4 Mon Sep 17 00:00:00 2001 +From: Christoph Hellwig +Date: Sun, 20 Nov 2022 13:43:03 +0100 +Subject: btrfs: use kvcalloc in btrfs_get_dev_zone_info + +From: Christoph Hellwig + +commit 8fe97d47b52ae1ad130470b1780f0ded4ba609a4 upstream. + +Otherwise the kernel memory allocator seems to be unhappy about failing +order 6 allocations for the zones array, that cause 100% reproducible +mount failures in my qemu setup: + + [26.078981] mount: page allocation failure: order:6, mode:0x40dc0(GFP_KERNEL|__GFP_COMP|__GFP_ZERO), nodemask=(null) + [26.079741] CPU: 0 PID: 2965 Comm: mount Not tainted 6.1.0-rc5+ #185 + [26.080181] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014 + [26.080950] Call Trace: + [26.081132] + [26.081291] dump_stack_lvl+0x56/0x6f + [26.081554] warn_alloc+0x117/0x140 + [26.081808] ? __alloc_pages_direct_compact+0x1b5/0x300 + [26.082174] __alloc_pages_slowpath.constprop.0+0xd0e/0xde0 + [26.082569] __alloc_pages+0x32a/0x340 + [26.082836] __kmalloc_large_node+0x4d/0xa0 + [26.083133] ? trace_kmalloc+0x29/0xd0 + [26.083399] kmalloc_large+0x14/0x60 + [26.083654] btrfs_get_dev_zone_info+0x1b9/0xc00 + [26.083980] ? _raw_spin_unlock_irqrestore+0x28/0x50 + [26.084328] btrfs_get_dev_zone_info_all_devices+0x54/0x80 + [26.084708] open_ctree+0xed4/0x1654 + [26.084974] btrfs_mount_root.cold+0x12/0xde + [26.085288] ? lock_is_held_type+0xe2/0x140 + [26.085603] legacy_get_tree+0x28/0x50 + [26.085876] vfs_get_tree+0x1d/0xb0 + [26.086139] vfs_kern_mount.part.0+0x6c/0xb0 + [26.086456] btrfs_mount+0x118/0x3a0 + [26.086728] ? lock_is_held_type+0xe2/0x140 + [26.087043] legacy_get_tree+0x28/0x50 + [26.087323] vfs_get_tree+0x1d/0xb0 + [26.087587] path_mount+0x2ba/0xbe0 + [26.087850] ? _raw_spin_unlock_irqrestore+0x38/0x50 + [26.088217] __x64_sys_mount+0xfe/0x140 + [26.088506] do_syscall_64+0x35/0x80 + [26.088776] entry_SYSCALL_64_after_hwframe+0x63/0xcd + +Fixes: 5b316468983d ("btrfs: get zone information of zoned block devices") +CC: stable@vger.kernel.org # 5.15+ +Reviewed-by: Damien Le Moal +Reviewed-by: Johannes Thumshirn +Signed-off-by: Christoph Hellwig +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/zoned.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/fs/btrfs/zoned.c ++++ b/fs/btrfs/zoned.c +@@ -467,7 +467,7 @@ int btrfs_get_dev_zone_info(struct btrfs + goto out; + } + +- zones = kcalloc(BTRFS_REPORT_NR_ZONES, sizeof(struct blk_zone), GFP_KERNEL); ++ zones = kvcalloc(BTRFS_REPORT_NR_ZONES, sizeof(struct blk_zone), GFP_KERNEL); + if (!zones) { + ret = -ENOMEM; + goto out; +@@ -586,7 +586,7 @@ int btrfs_get_dev_zone_info(struct btrfs + } + + +- kfree(zones); ++ kvfree(zones); + + switch (bdev_zoned_model(bdev)) { + case BLK_ZONED_HM: +@@ -618,7 +618,7 @@ int btrfs_get_dev_zone_info(struct btrfs + return 0; + + out: +- kfree(zones); ++ kvfree(zones); + out_free_zone_info: + btrfs_destroy_dev_zone_info(device); + diff --git a/queue-6.0/btrfs-zoned-fix-missing-endianness-conversion-in-sb_write_pointer.patch b/queue-6.0/btrfs-zoned-fix-missing-endianness-conversion-in-sb_write_pointer.patch new file mode 100644 index 00000000000..ad216f56c76 --- /dev/null +++ b/queue-6.0/btrfs-zoned-fix-missing-endianness-conversion-in-sb_write_pointer.patch @@ -0,0 +1,36 @@ +From c51f0e6a1254b3ac2d308e1c6fd8fb936992b455 Mon Sep 17 00:00:00 2001 +From: Christoph Hellwig +Date: Tue, 15 Nov 2022 10:39:44 +0100 +Subject: btrfs: zoned: fix missing endianness conversion in sb_write_pointer + +From: Christoph Hellwig + +commit c51f0e6a1254b3ac2d308e1c6fd8fb936992b455 upstream. + +generation is an on-disk __le64 value, so use btrfs_super_generation to +convert it to host endian before comparing it. + +Fixes: 12659251ca5d ("btrfs: implement log-structured superblock for ZONED mode") +CC: stable@vger.kernel.org # 5.15+ +Reviewed-by: Johannes Thumshirn +Reviewed-by: Qu Wenruo +Signed-off-by: Christoph Hellwig +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/zoned.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/fs/btrfs/zoned.c ++++ b/fs/btrfs/zoned.c +@@ -134,7 +134,8 @@ static int sb_write_pointer(struct block + super[i] = page_address(page[i]); + } + +- if (super[0]->generation > super[1]->generation) ++ if (btrfs_super_generation(super[0]) > ++ btrfs_super_generation(super[1])) + sector = zones[1].start; + else + sector = zones[0].start; diff --git a/queue-6.0/drm-amd-amdgpu-reserve-vm-invalidation-engine-for-firmware.patch b/queue-6.0/drm-amd-amdgpu-reserve-vm-invalidation-engine-for-firmware.patch new file mode 100644 index 00000000000..f4c79c3d14e --- /dev/null +++ b/queue-6.0/drm-amd-amdgpu-reserve-vm-invalidation-engine-for-firmware.patch @@ -0,0 +1,38 @@ +From 91abf28a636291135ea5cab9af40f017cff6afce Mon Sep 17 00:00:00 2001 +From: Jack Xiao +Date: Wed, 16 Nov 2022 16:44:21 +0800 +Subject: drm/amd/amdgpu: reserve vm invalidation engine for firmware +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Jack Xiao + +commit 91abf28a636291135ea5cab9af40f017cff6afce upstream. + +If mes enabled, reserve VM invalidation engine 5 for firmware. + +Signed-off-by: Jack Xiao +Reviewed-by: Christian König +Signed-off-by: Alex Deucher +Cc: stable@vger.kernel.org # 6.0.x +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +@@ -479,6 +479,12 @@ int amdgpu_gmc_allocate_vm_inv_eng(struc + unsigned i; + unsigned vmhub, inv_eng; + ++ if (adev->enable_mes) { ++ /* reserve engine 5 for firmware */ ++ for (vmhub = 0; vmhub < AMDGPU_MAX_VMHUBS; vmhub++) ++ vm_inv_engs[vmhub] &= ~(1 << 5); ++ } ++ + for (i = 0; i < adev->num_rings; ++i) { + ring = adev->rings[i]; + vmhub = ring->funcs->vmhub; diff --git a/queue-6.0/drm-amd-dc-dce120-fix-audio-register-mapping-stop-triggering-kasan.patch b/queue-6.0/drm-amd-dc-dce120-fix-audio-register-mapping-stop-triggering-kasan.patch new file mode 100644 index 00000000000..df72058555b --- /dev/null +++ b/queue-6.0/drm-amd-dc-dce120-fix-audio-register-mapping-stop-triggering-kasan.patch @@ -0,0 +1,43 @@ +From 44035ec2fde1114254ee465f9ba3bb246b0b6283 Mon Sep 17 00:00:00 2001 +From: Lyude Paul +Date: Mon, 14 Nov 2022 17:20:45 -0500 +Subject: drm/amd/dc/dce120: Fix audio register mapping, stop triggering KASAN + +From: Lyude Paul + +commit 44035ec2fde1114254ee465f9ba3bb246b0b6283 upstream. + +There's been a very long running bug that seems to have been neglected for +a while, where amdgpu consistently triggers a KASAN error at start: + + BUG: KASAN: global-out-of-bounds in read_indirect_azalia_reg+0x1d4/0x2a0 [amdgpu] + Read of size 4 at addr ffffffffc2274b28 by task modprobe/1889 + +After digging through amd's rather creative method for accessing registers, +I eventually discovered the problem likely has to do with the fact that on +my dce120 GPU there are supposedly 7 sets of audio registers. But we only +define a register mapping for 6 sets. + +So, fix this and fix the KASAN warning finally. + +Signed-off-by: Lyude Paul +Cc: stable@vger.kernel.org +Reviewed-by: Alex Deucher +Signed-off-by: Alex Deucher +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c ++++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c +@@ -359,7 +359,8 @@ static const struct dce_audio_registers + audio_regs(2), + audio_regs(3), + audio_regs(4), +- audio_regs(5) ++ audio_regs(5), ++ audio_regs(6), + }; + + #define DCE120_AUD_COMMON_MASK_SH_LIST(mask_sh)\ diff --git a/queue-6.0/drm-amd-display-no-display-after-resume-from-wb-cb.patch b/queue-6.0/drm-amd-display-no-display-after-resume-from-wb-cb.patch new file mode 100644 index 00000000000..8f9dbbbefe8 --- /dev/null +++ b/queue-6.0/drm-amd-display-no-display-after-resume-from-wb-cb.patch @@ -0,0 +1,79 @@ +From a6e1775da04ab042bc9e2e42399fa25714c253da Mon Sep 17 00:00:00 2001 +From: Tsung-hua Lin +Date: Wed, 9 Nov 2022 12:54:22 +0800 +Subject: drm/amd/display: No display after resume from WB/CB + +From: Tsung-hua Lin + +commit a6e1775da04ab042bc9e2e42399fa25714c253da upstream. + +[why] +First MST sideband message returns AUX_RET_ERROR_HPD_DISCON +on certain intel platform. Aux transaction considered failure +if HPD unexpected pulled low. The actual aux transaction success +in such case, hence do not return error. + +[how] +Not returning error when AUX_RET_ERROR_HPD_DISCON detected +on the first sideband message. + +v2: squash in fix (Alex) + +Reviewed-by: Jerry Zuo +Acked-by: Brian Chang +Signed-off-by: Tsung-hua Lin +Tested-by: Daniel Wheeler +Signed-off-by: Alex Deucher +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 37 ++++++++++++++++++++++ + 1 file changed, 37 insertions(+) + +--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c ++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +@@ -1371,7 +1371,44 @@ static const struct dmi_system_id hpd_di + DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3460"), + }, + }, ++ { ++ .matches = { ++ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), ++ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Tower Plus 7010"), ++ }, ++ }, ++ { ++ .matches = { ++ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), ++ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Tower 7010"), ++ }, ++ }, ++ { ++ .matches = { ++ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), ++ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex SFF Plus 7010"), ++ }, ++ }, ++ { ++ .matches = { ++ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), ++ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex SFF 7010"), ++ }, ++ }, ++ { ++ .matches = { ++ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), ++ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Micro Plus 7010"), ++ }, ++ }, ++ { ++ .matches = { ++ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), ++ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Micro 7010"), ++ }, ++ }, + {} ++ /* TODO: refactor this from a fixed table to a dynamic option */ + }; + + static void retrieve_dmi_info(struct amdgpu_display_manager *dm) diff --git a/queue-6.0/drm-amd-display-update-soc-bounding-box-for-dcn32-dcn321.patch b/queue-6.0/drm-amd-display-update-soc-bounding-box-for-dcn32-dcn321.patch new file mode 100644 index 00000000000..f216ad15061 --- /dev/null +++ b/queue-6.0/drm-amd-display-update-soc-bounding-box-for-dcn32-dcn321.patch @@ -0,0 +1,77 @@ +From 5d82c82f1dbee264f7a94587adbbfee607706902 Mon Sep 17 00:00:00 2001 +From: Dillon Varone +Date: Mon, 7 Nov 2022 15:18:47 -0500 +Subject: drm/amd/display: Update soc bounding box for dcn32/dcn321 + +From: Dillon Varone + +commit 5d82c82f1dbee264f7a94587adbbfee607706902 upstream. + +[Description] +New values for soc bounding box and dummy pstate. + +Reviewed-by: Jun Lei +Acked-by: Brian Chang +Signed-off-by: Dillon Varone +Tested-by: Daniel Wheeler +Signed-off-by: Alex Deucher +Cc: stable@vger.kernel.org # 6.0.x +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 6 +++--- + drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c | 8 ++++---- + 2 files changed, 7 insertions(+), 7 deletions(-) + +--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c ++++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +@@ -157,7 +157,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3 + .dispclk_dppclk_vco_speed_mhz = 4300.0, + .do_urgent_latency_adjustment = true, + .urgent_latency_adjustment_fabric_clock_component_us = 1.0, +- .urgent_latency_adjustment_fabric_clock_reference_mhz = 1000, ++ .urgent_latency_adjustment_fabric_clock_reference_mhz = 3000, + }; + + void dcn32_build_wm_range_table_fpu(struct clk_mgr_internal *clk_mgr) +@@ -211,7 +211,7 @@ void dcn32_build_wm_range_table_fpu(stru + /* 'DalDummyClockChangeLatencyNs' registry key option set to 0x7FFFFFFF can be used to disable Set C for dummy p-state */ + if (clk_mgr->base.ctx->dc->bb_overrides.dummy_clock_change_latency_ns != 0x7FFFFFFF) { + clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].valid = true; +- clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.pstate_latency_us = 38; ++ clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.pstate_latency_us = 50; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.fclk_change_latency_us = fclk_change_latency_us; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us = sr_exit_time_us; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; +@@ -221,7 +221,7 @@ void dcn32_build_wm_range_table_fpu(stru + clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_uclk = min_uclk_mhz; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_uclk = 0xFFFF; + clk_mgr->base.bw_params->dummy_pstate_table[0].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[0].memclk_mhz * 16; +- clk_mgr->base.bw_params->dummy_pstate_table[0].dummy_pstate_latency_us = 38; ++ clk_mgr->base.bw_params->dummy_pstate_table[0].dummy_pstate_latency_us = 50; + clk_mgr->base.bw_params->dummy_pstate_table[1].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[1].memclk_mhz * 16; + clk_mgr->base.bw_params->dummy_pstate_table[1].dummy_pstate_latency_us = 9; + clk_mgr->base.bw_params->dummy_pstate_table[2].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[2].memclk_mhz * 16; +--- a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c ++++ b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c +@@ -125,9 +125,9 @@ struct _vcs_dpi_soc_bounding_box_st dcn3 + .sr_enter_plus_exit_z8_time_us = 320, + .writeback_latency_us = 12.0, + .round_trip_ping_latency_dcfclk_cycles = 263, +- .urgent_latency_pixel_data_only_us = 9.35, +- .urgent_latency_pixel_mixed_with_vm_data_us = 9.35, +- .urgent_latency_vm_data_only_us = 9.35, ++ .urgent_latency_pixel_data_only_us = 4, ++ .urgent_latency_pixel_mixed_with_vm_data_us = 4, ++ .urgent_latency_vm_data_only_us = 4, + .fclk_change_latency_us = 20, + .usr_retraining_latency_us = 2, + .smn_latency_us = 2, +@@ -155,7 +155,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3 + .dispclk_dppclk_vco_speed_mhz = 4300.0, + .do_urgent_latency_adjustment = true, + .urgent_latency_adjustment_fabric_clock_component_us = 1.0, +- .urgent_latency_adjustment_fabric_clock_reference_mhz = 1000, ++ .urgent_latency_adjustment_fabric_clock_reference_mhz = 3000, + }; + + static void get_optimal_ntuple(struct _vcs_dpi_voltage_scaling_st *entry) diff --git a/queue-6.0/drm-amdgpu-always-register-an-mmu-notifier-for-userptr.patch b/queue-6.0/drm-amdgpu-always-register-an-mmu-notifier-for-userptr.patch new file mode 100644 index 00000000000..498f0950b65 --- /dev/null +++ b/queue-6.0/drm-amdgpu-always-register-an-mmu-notifier-for-userptr.patch @@ -0,0 +1,42 @@ +From b39df63b16b64a3af42695acb9bc567aad144776 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Christian=20K=C3=B6nig?= +Date: Wed, 9 Nov 2022 12:14:44 +0100 +Subject: drm/amdgpu: always register an MMU notifier for userptr +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Christian König + +commit b39df63b16b64a3af42695acb9bc567aad144776 upstream. + +Since switching to HMM we always need that because we no longer grab +references to the pages. + +Signed-off-by: Christian König +Reviewed-by: Alex Deucher +Acked-by: Felix Kuehling +CC: stable@vger.kernel.org +Signed-off-by: Alex Deucher +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 8 +++----- + 1 file changed, 3 insertions(+), 5 deletions(-) + +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +@@ -413,11 +413,9 @@ int amdgpu_gem_userptr_ioctl(struct drm_ + if (r) + goto release_object; + +- if (args->flags & AMDGPU_GEM_USERPTR_REGISTER) { +- r = amdgpu_mn_register(bo, args->addr); +- if (r) +- goto release_object; +- } ++ r = amdgpu_mn_register(bo, args->addr); ++ if (r) ++ goto release_object; + + if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) { + r = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages); diff --git a/queue-6.0/drm-amdgpu-enable-aldebaran-devices-to-report-cu-occupancy.patch b/queue-6.0/drm-amdgpu-enable-aldebaran-devices-to-report-cu-occupancy.patch new file mode 100644 index 00000000000..9995b917a45 --- /dev/null +++ b/queue-6.0/drm-amdgpu-enable-aldebaran-devices-to-report-cu-occupancy.patch @@ -0,0 +1,31 @@ +From b9ab82da8804ec22c7e91ffd9d56c7a3abff0c8e Mon Sep 17 00:00:00 2001 +From: Ramesh Errabolu +Date: Wed, 16 Nov 2022 10:46:08 -0600 +Subject: drm/amdgpu: Enable Aldebaran devices to report CU Occupancy + +From: Ramesh Errabolu + +commit b9ab82da8804ec22c7e91ffd9d56c7a3abff0c8e upstream. + +Allow user to know number of compute units (CU) that are in use at any +given moment. Enable access to the method kgd_gfx_v9_get_cu_occupancy +that computes CU occupancy. + +Signed-off-by: Ramesh Errabolu +Reviewed-by: Felix Kuehling +Signed-off-by: Alex Deucher +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c +@@ -41,5 +41,6 @@ const struct kfd2kgd_calls aldebaran_kfd + .get_atc_vmid_pasid_mapping_info = + kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, + .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, ++ .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, + .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings + }; diff --git a/queue-6.0/drm-amdgpu-partially-revert-drm-amdgpu-update-drm_display_info-correctly-when-the-edid-is-read.patch b/queue-6.0/drm-amdgpu-partially-revert-drm-amdgpu-update-drm_display_info-correctly-when-the-edid-is-read.patch new file mode 100644 index 00000000000..c5f6d598696 --- /dev/null +++ b/queue-6.0/drm-amdgpu-partially-revert-drm-amdgpu-update-drm_display_info-correctly-when-the-edid-is-read.patch @@ -0,0 +1,36 @@ +From 602ad43c3cd8f15cbb25ce9bb494129edb2024ed Mon Sep 17 00:00:00 2001 +From: Alex Deucher +Date: Mon, 21 Nov 2022 12:34:14 -0500 +Subject: drm/amdgpu: Partially revert "drm/amdgpu: update drm_display_info correctly when the edid is read" + +From: Alex Deucher + +commit 602ad43c3cd8f15cbb25ce9bb494129edb2024ed upstream. + +This partially reverts 20543be93ca45968f344261c1a997177e51bd7e1. + +Calling drm_connector_update_edid_property() in +amdgpu_connector_free_edid() causes a noticeable pause in +the system every 10 seconds on polled outputs so revert this +part of the change. + +Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/2257 +Cc: Claudio Suarez +Acked-by: Luben Tuikov +Signed-off-by: Alex Deucher +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +@@ -328,7 +328,6 @@ static void amdgpu_connector_free_edid(s + + kfree(amdgpu_connector->edid); + amdgpu_connector->edid = NULL; +- drm_connector_update_edid_property(connector, NULL); + } + + static int amdgpu_connector_ddc_get_modes(struct drm_connector *connector) diff --git a/queue-6.0/drm-amdgpu-psp-don-t-free-psp-buffers-on-suspend.patch b/queue-6.0/drm-amdgpu-psp-don-t-free-psp-buffers-on-suspend.patch new file mode 100644 index 00000000000..2551f1241e0 --- /dev/null +++ b/queue-6.0/drm-amdgpu-psp-don-t-free-psp-buffers-on-suspend.patch @@ -0,0 +1,79 @@ +From 4f2bea62cf3874c5a58e987b0b472f9fb57117a2 Mon Sep 17 00:00:00 2001 +From: Alex Deucher +Date: Wed, 16 Nov 2022 11:26:53 -0500 +Subject: drm/amdgpu/psp: don't free PSP buffers on suspend +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Alex Deucher + +commit 4f2bea62cf3874c5a58e987b0b472f9fb57117a2 upstream. + +We can reuse the same buffers on resume. + +v2: squash in S4 fix from Shikai + +Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/2213 +Reviewed-by: Christian König +Tested-by: Guilherme G. Piccoli +Signed-off-by: Alex Deucher +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 16 +++++++++------- + 1 file changed, 9 insertions(+), 7 deletions(-) + +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +@@ -171,6 +171,7 @@ void psp_ta_free_shared_buf(struct ta_me + { + amdgpu_bo_free_kernel(&mem_ctx->shared_bo, &mem_ctx->shared_mc_addr, + &mem_ctx->shared_buf); ++ mem_ctx->shared_bo = NULL; + } + + static void psp_free_shared_bufs(struct psp_context *psp) +@@ -181,6 +182,7 @@ static void psp_free_shared_bufs(struct + /* free TMR memory buffer */ + pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL; + amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, pptr); ++ psp->tmr_bo = NULL; + + /* free xgmi shared memory */ + psp_ta_free_shared_buf(&psp->xgmi_context.context.mem_context); +@@ -728,7 +730,7 @@ static int psp_load_toc(struct psp_conte + /* Set up Trusted Memory Region */ + static int psp_tmr_init(struct psp_context *psp) + { +- int ret; ++ int ret = 0; + int tmr_size; + void *tmr_buf; + void **pptr; +@@ -755,10 +757,12 @@ static int psp_tmr_init(struct psp_conte + } + } + +- pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL; +- ret = amdgpu_bo_create_kernel(psp->adev, tmr_size, PSP_TMR_ALIGNMENT, +- AMDGPU_GEM_DOMAIN_VRAM, +- &psp->tmr_bo, &psp->tmr_mc_addr, pptr); ++ if (!psp->tmr_bo) { ++ pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL; ++ ret = amdgpu_bo_create_kernel(psp->adev, tmr_size, PSP_TMR_ALIGNMENT, ++ AMDGPU_GEM_DOMAIN_VRAM, ++ &psp->tmr_bo, &psp->tmr_mc_addr, pptr); ++ } + + return ret; + } +@@ -2720,8 +2724,6 @@ static int psp_suspend(void *handle) + } + + out: +- psp_free_shared_bufs(psp); +- + return ret; + } + diff --git a/queue-6.0/drm-display-dp_mst-fix-drm_dp_mst_add_affected_dsc_crtcs-return-code.patch b/queue-6.0/drm-display-dp_mst-fix-drm_dp_mst_add_affected_dsc_crtcs-return-code.patch new file mode 100644 index 00000000000..3d877c83aad --- /dev/null +++ b/queue-6.0/drm-display-dp_mst-fix-drm_dp_mst_add_affected_dsc_crtcs-return-code.patch @@ -0,0 +1,36 @@ +From 2f3a1273862cb82cca227630cc7f04ce0c94b6bb Mon Sep 17 00:00:00 2001 +From: Lyude Paul +Date: Mon, 14 Nov 2022 17:17:53 -0500 +Subject: drm/display/dp_mst: Fix drm_dp_mst_add_affected_dsc_crtcs() return code + +From: Lyude Paul + +commit 2f3a1273862cb82cca227630cc7f04ce0c94b6bb upstream. + +Looks like that we're accidentally dropping a pretty important return code +here. For some reason, we just return -EINVAL if we fail to get the MST +topology state. This is wrong: error codes are important and should never +be squashed without being handled, which here seems to have the potential +to cause a deadlock. + +Signed-off-by: Lyude Paul +Reviewed-by: Wayne Lin +Fixes: 8ec046716ca8 ("drm/dp_mst: Add helper to trigger modeset on affected DSC MST CRTCs") +Cc: # v5.6+ +Signed-off-by: Alex Deucher +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/display/drm_dp_mst_topology.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/gpu/drm/display/drm_dp_mst_topology.c ++++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c +@@ -5293,7 +5293,7 @@ int drm_dp_mst_add_affected_dsc_crtcs(st + mst_state = drm_atomic_get_mst_topology_state(state, mgr); + + if (IS_ERR(mst_state)) +- return -EINVAL; ++ return PTR_ERR(mst_state); + + list_for_each_entry(pos, &mst_state->vcpis, next) { + diff --git a/queue-6.0/drm-i915-ttm-never-purge-busy-objects.patch b/queue-6.0/drm-i915-ttm-never-purge-busy-objects.patch new file mode 100644 index 00000000000..82edccb2da5 --- /dev/null +++ b/queue-6.0/drm-i915-ttm-never-purge-busy-objects.patch @@ -0,0 +1,92 @@ +From 00a6c36cca760d0b659f894dee728555b193c5e1 Mon Sep 17 00:00:00 2001 +From: Matthew Auld +Date: Tue, 15 Nov 2022 10:46:20 +0000 +Subject: drm/i915/ttm: never purge busy objects + +From: Matthew Auld + +commit 00a6c36cca760d0b659f894dee728555b193c5e1 upstream. + +In i915_gem_madvise_ioctl() we immediately purge the object is not +currently used, like when the mm.pages are NULL. With shmem the pages +might still be hanging around or are perhaps swapped out. Similarly with +ttm we might still have the pages hanging around on the ttm resource, +like with lmem or shmem, but here we need to be extra careful since +async unbinds are possible as well as in-progress kernel moves. In +i915_ttm_purge() we expect the pipeline-gutting to nuke the ttm resource +for us, however if it's busy the memory is only moved to a ghost object, +which then leads to broken behaviour when for example clearing the +i915_tt->filp, since the actual ttm_tt is still alive and populated, +even though it's been moved to the ghost object. When we later destroy +the ghost object we hit the following, since the filp is now NULL: + +[ +0.006982] #PF: supervisor read access in kernel mode +[ +0.005149] #PF: error_code(0x0000) - not-present page +[ +0.005147] PGD 11631d067 P4D 11631d067 PUD 115972067 PMD 0 +[ +0.005676] Oops: 0000 [#1] PREEMPT SMP NOPTI +[ +0.012962] Workqueue: events ttm_device_delayed_workqueue [ttm] +[ +0.006022] RIP: 0010:i915_ttm_tt_unpopulate+0x3a/0x70 [i915] +[ +0.005879] Code: 89 fb 48 85 f6 74 11 8b 55 4c 48 8b 7d 30 45 31 c0 31 c9 e8 18 6a e5 e0 80 7d 60 00 74 20 48 8b 45 68 +8b 55 08 4c 89 e7 5b 5d <48> 8b 40 20 83 e2 01 41 5c 89 d1 48 8b 70 + 30 e9 42 b2 ff ff 4c 89 +[ +0.018782] RSP: 0000:ffffc9000bf6fd70 EFLAGS: 00010202 +[ +0.005244] RAX: 0000000000000000 RBX: ffff8883e12ae380 RCX: 0000000000000000 +[ +0.007150] RDX: 000000008000000e RSI: ffffffff823559b4 RDI: ffff8883e12ae3c0 +[ +0.007142] RBP: ffff888103b65d48 R08: 0000000000000001 R09: 0000000000000001 +[ +0.007144] R10: 0000000000000001 R11: ffff88829c2c8040 R12: ffff8883e12ae3c0 +[ +0.007148] R13: 0000000000000001 R14: ffff888115184140 R15: ffff888115184248 +[ +0.007154] FS: 0000000000000000(0000) GS:ffff88844db00000(0000) knlGS:0000000000000000 +[ +0.008108] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ +0.005763] CR2: 0000000000000020 CR3: 000000013fdb4004 CR4: 00000000003706e0 +[ +0.007152] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +[ +0.007145] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +[ +0.007154] Call Trace: +[ +0.002459] +[ +0.002126] ttm_tt_unpopulate.part.0+0x17/0x70 [ttm] +[ +0.005068] ttm_bo_tt_destroy+0x1c/0x50 [ttm] +[ +0.004464] ttm_bo_cleanup_memtype_use+0x25/0x40 [ttm] +[ +0.005244] ttm_bo_cleanup_refs+0x90/0x2c0 [ttm] +[ +0.004721] ttm_bo_delayed_delete+0x235/0x250 [ttm] +[ +0.004981] ttm_device_delayed_workqueue+0x13/0x40 [ttm] +[ +0.005422] process_one_work+0x248/0x560 +[ +0.004028] worker_thread+0x4b/0x390 +[ +0.003682] ? process_one_work+0x560/0x560 +[ +0.004199] kthread+0xeb/0x120 +[ +0.003163] ? kthread_complete_and_exit+0x20/0x20 +[ +0.004815] ret_from_fork+0x1f/0x30 + +v2: + - Just use ttm_bo_wait() directly (Niranjana) + - Add testcase reference + +Testcase: igt@gem_madvise@dontneed-evict-race +Fixes: 213d50927763 ("drm/i915/ttm: Introduce a TTM i915 gem object backend") +Reported-by: Niranjana Vishwanathapura +Signed-off-by: Matthew Auld +Cc: Andrzej Hajda +Cc: Nirmoy Das +Cc: # v5.15+ +Reviewed-by: Niranjana Vishwanathapura +Acked-by: Nirmoy Das +Reviewed-by: Andrzej Hajda +Link: https://patchwork.freedesktop.org/patch/msgid/20221115104620.120432-1-matthew.auld@intel.com +(cherry picked from commit 5524b5e52e08f675116a93296fe5bee60bc43c03) +Signed-off-by: Tvrtko Ursulin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c ++++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +@@ -642,6 +642,10 @@ static int i915_ttm_truncate(struct drm_ + + WARN_ON_ONCE(obj->mm.madv == I915_MADV_WILLNEED); + ++ err = ttm_bo_wait(bo, true, false); ++ if (err) ++ return err; ++ + err = i915_ttm_move_notify(bo); + if (err) + return err; diff --git a/queue-6.0/series b/queue-6.0/series index 56d705307b5..72b23db7aa3 100644 --- a/queue-6.0/series +++ b/queue-6.0/series @@ -268,3 +268,21 @@ wifi-wilc1000-validate-pairwise-and-authentication-suite-offsets.patch wifi-wilc1000-validate-length-of-ieee80211_p2p_attr_oper_channel-attribute.patch wifi-wilc1000-validate-length-of-ieee80211_p2p_attr_channel_list-attribute.patch wifi-wilc1000-validate-number-of-channels.patch +btrfs-free-btrfs_path-before-copying-root-refs-to-userspace.patch +btrfs-free-btrfs_path-before-copying-inodes-to-userspace.patch +btrfs-free-btrfs_path-before-copying-fspath-to-userspace.patch +btrfs-free-btrfs_path-before-copying-subvol-info-to-userspace.patch +btrfs-zoned-fix-missing-endianness-conversion-in-sb_write_pointer.patch +btrfs-use-kvcalloc-in-btrfs_get_dev_zone_info.patch +btrfs-sysfs-normalize-the-error-handling-branch-in-btrfs_init_sysfs.patch +btrfs-do-not-modify-log-tree-while-holding-a-leaf-from-fs-tree-locked.patch +drm-i915-ttm-never-purge-busy-objects.patch +drm-display-dp_mst-fix-drm_dp_mst_add_affected_dsc_crtcs-return-code.patch +drm-amd-dc-dce120-fix-audio-register-mapping-stop-triggering-kasan.patch +drm-amd-display-no-display-after-resume-from-wb-cb.patch +drm-amdgpu-psp-don-t-free-psp-buffers-on-suspend.patch +drm-amdgpu-enable-aldebaran-devices-to-report-cu-occupancy.patch +drm-amd-amdgpu-reserve-vm-invalidation-engine-for-firmware.patch +drm-amd-display-update-soc-bounding-box-for-dcn32-dcn321.patch +drm-amdgpu-always-register-an-mmu-notifier-for-userptr.patch +drm-amdgpu-partially-revert-drm-amdgpu-update-drm_display_info-correctly-when-the-edid-is-read.patch