--- /dev/null
+From 0697d9a610998b8bdee6b2390836cb2391d8fd1a Mon Sep 17 00:00:00 2001
+From: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Date: Wed, 18 Nov 2020 18:03:26 +0900
+Subject: btrfs: don't access possibly stale fs_info data for printing duplicate device
+
+From: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+
+commit 0697d9a610998b8bdee6b2390836cb2391d8fd1a upstream.
+
+Syzbot reported a possible use-after-free when printing a duplicate device
+warning device_list_add().
+
+At this point it can happen that a btrfs_device::fs_info is not correctly
+setup yet, so we're accessing stale data, when printing the warning
+message using the btrfs_printk() wrappers.
+
+ ==================================================================
+ BUG: KASAN: use-after-free in btrfs_printk+0x3eb/0x435 fs/btrfs/super.c:245
+ Read of size 8 at addr ffff8880878e06a8 by task syz-executor225/7068
+
+ CPU: 1 PID: 7068 Comm: syz-executor225 Not tainted 5.9.0-rc5-syzkaller #0
+ Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+ Call Trace:
+ __dump_stack lib/dump_stack.c:77 [inline]
+ dump_stack+0x1d6/0x29e lib/dump_stack.c:118
+ print_address_description+0x66/0x620 mm/kasan/report.c:383
+ __kasan_report mm/kasan/report.c:513 [inline]
+ kasan_report+0x132/0x1d0 mm/kasan/report.c:530
+ btrfs_printk+0x3eb/0x435 fs/btrfs/super.c:245
+ device_list_add+0x1a88/0x1d60 fs/btrfs/volumes.c:943
+ btrfs_scan_one_device+0x196/0x490 fs/btrfs/volumes.c:1359
+ btrfs_mount_root+0x48f/0xb60 fs/btrfs/super.c:1634
+ legacy_get_tree+0xea/0x180 fs/fs_context.c:592
+ vfs_get_tree+0x88/0x270 fs/super.c:1547
+ fc_mount fs/namespace.c:978 [inline]
+ vfs_kern_mount+0xc9/0x160 fs/namespace.c:1008
+ btrfs_mount+0x33c/0xae0 fs/btrfs/super.c:1732
+ legacy_get_tree+0xea/0x180 fs/fs_context.c:592
+ vfs_get_tree+0x88/0x270 fs/super.c:1547
+ do_new_mount fs/namespace.c:2875 [inline]
+ path_mount+0x179d/0x29e0 fs/namespace.c:3192
+ do_mount fs/namespace.c:3205 [inline]
+ __do_sys_mount fs/namespace.c:3413 [inline]
+ __se_sys_mount+0x126/0x180 fs/namespace.c:3390
+ do_syscall_64+0x31/0x70 arch/x86/entry/common.c:46
+ entry_SYSCALL_64_after_hwframe+0x44/0xa9
+ RIP: 0033:0x44840a
+ RSP: 002b:00007ffedfffd608 EFLAGS: 00000293 ORIG_RAX: 00000000000000a5
+ RAX: ffffffffffffffda RBX: 00007ffedfffd670 RCX: 000000000044840a
+ RDX: 0000000020000000 RSI: 0000000020000100 RDI: 00007ffedfffd630
+ RBP: 00007ffedfffd630 R08: 00007ffedfffd670 R09: 0000000000000000
+ R10: 0000000000000000 R11: 0000000000000293 R12: 000000000000001a
+ R13: 0000000000000004 R14: 0000000000000003 R15: 0000000000000003
+
+ Allocated by task 6945:
+ kasan_save_stack mm/kasan/common.c:48 [inline]
+ kasan_set_track mm/kasan/common.c:56 [inline]
+ __kasan_kmalloc+0x100/0x130 mm/kasan/common.c:461
+ kmalloc_node include/linux/slab.h:577 [inline]
+ kvmalloc_node+0x81/0x110 mm/util.c:574
+ kvmalloc include/linux/mm.h:757 [inline]
+ kvzalloc include/linux/mm.h:765 [inline]
+ btrfs_mount_root+0xd0/0xb60 fs/btrfs/super.c:1613
+ legacy_get_tree+0xea/0x180 fs/fs_context.c:592
+ vfs_get_tree+0x88/0x270 fs/super.c:1547
+ fc_mount fs/namespace.c:978 [inline]
+ vfs_kern_mount+0xc9/0x160 fs/namespace.c:1008
+ btrfs_mount+0x33c/0xae0 fs/btrfs/super.c:1732
+ legacy_get_tree+0xea/0x180 fs/fs_context.c:592
+ vfs_get_tree+0x88/0x270 fs/super.c:1547
+ do_new_mount fs/namespace.c:2875 [inline]
+ path_mount+0x179d/0x29e0 fs/namespace.c:3192
+ do_mount fs/namespace.c:3205 [inline]
+ __do_sys_mount fs/namespace.c:3413 [inline]
+ __se_sys_mount+0x126/0x180 fs/namespace.c:3390
+ do_syscall_64+0x31/0x70 arch/x86/entry/common.c:46
+ entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+ Freed by task 6945:
+ kasan_save_stack mm/kasan/common.c:48 [inline]
+ kasan_set_track+0x3d/0x70 mm/kasan/common.c:56
+ kasan_set_free_info+0x17/0x30 mm/kasan/generic.c:355
+ __kasan_slab_free+0xdd/0x110 mm/kasan/common.c:422
+ __cache_free mm/slab.c:3418 [inline]
+ kfree+0x113/0x200 mm/slab.c:3756
+ deactivate_locked_super+0xa7/0xf0 fs/super.c:335
+ btrfs_mount_root+0x72b/0xb60 fs/btrfs/super.c:1678
+ legacy_get_tree+0xea/0x180 fs/fs_context.c:592
+ vfs_get_tree+0x88/0x270 fs/super.c:1547
+ fc_mount fs/namespace.c:978 [inline]
+ vfs_kern_mount+0xc9/0x160 fs/namespace.c:1008
+ btrfs_mount+0x33c/0xae0 fs/btrfs/super.c:1732
+ legacy_get_tree+0xea/0x180 fs/fs_context.c:592
+ vfs_get_tree+0x88/0x270 fs/super.c:1547
+ do_new_mount fs/namespace.c:2875 [inline]
+ path_mount+0x179d/0x29e0 fs/namespace.c:3192
+ do_mount fs/namespace.c:3205 [inline]
+ __do_sys_mount fs/namespace.c:3413 [inline]
+ __se_sys_mount+0x126/0x180 fs/namespace.c:3390
+ do_syscall_64+0x31/0x70 arch/x86/entry/common.c:46
+ entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+ The buggy address belongs to the object at ffff8880878e0000
+ which belongs to the cache kmalloc-16k of size 16384
+ The buggy address is located 1704 bytes inside of
+ 16384-byte region [ffff8880878e0000, ffff8880878e4000)
+ The buggy address belongs to the page:
+ page:0000000060704f30 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x878e0
+ head:0000000060704f30 order:3 compound_mapcount:0 compound_pincount:0
+ flags: 0xfffe0000010200(slab|head)
+ raw: 00fffe0000010200 ffffea00028e9a08 ffffea00021e3608 ffff8880aa440b00
+ raw: 0000000000000000 ffff8880878e0000 0000000100000001 0000000000000000
+ page dumped because: kasan: bad access detected
+
+ Memory state around the buggy address:
+ ffff8880878e0580: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+ ffff8880878e0600: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+ >ffff8880878e0680: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+ ^
+ ffff8880878e0700: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+ ffff8880878e0780: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+ ==================================================================
+
+The syzkaller reproducer for this use-after-free crafts a filesystem image
+and loop mounts it twice in a loop. The mount will fail as the crafted
+image has an invalid chunk tree. When this happens btrfs_mount_root() will
+call deactivate_locked_super(), which then cleans up fs_info and
+fs_info::sb. If a second thread now adds the same block-device to the
+filesystem, it will get detected as a duplicate device and
+device_list_add() will reject the duplicate and print a warning. But as
+the fs_info pointer passed in is non-NULL this will result in a
+use-after-free.
+
+Instead of printing possibly uninitialized or already freed memory in
+btrfs_printk(), explicitly pass in a NULL fs_info so the printing of the
+device name will be skipped altogether.
+
+There was a slightly different approach discussed in
+https://lore.kernel.org/linux-btrfs/20200114060920.4527-1-anand.jain@oracle.com/t/#u
+
+Link: https://lore.kernel.org/linux-btrfs/000000000000c9e14b05afcc41ba@google.com
+Reported-by: syzbot+582e66e5edf36a22c7b0@syzkaller.appspotmail.com
+CC: stable@vger.kernel.org # 4.19+
+Reviewed-by: Nikolay Borisov <nborisov@suse.com>
+Reviewed-by: Anand Jain <anand.jain@oracle.com>
+Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/volumes.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/volumes.c
++++ b/fs/btrfs/volumes.c
+@@ -941,7 +941,13 @@ static noinline struct btrfs_device *dev
+ if (device->bdev != path_bdev) {
+ bdput(path_bdev);
+ mutex_unlock(&fs_devices->device_list_mutex);
+- btrfs_warn_in_rcu(device->fs_info,
++ /*
++ * device->fs_info may not be reliable here, so
++ * pass in a NULL instead. This avoids a
++ * possible use-after-free when the fs_info and
++ * fs_info->sb are already torn down.
++ */
++ btrfs_warn_in_rcu(NULL,
+ "duplicate device %s devid %llu generation %llu scanned by %s (%d)",
+ path, devid, found_transid,
+ current->comm,
--- /dev/null
+From 3d05cad3c357a2b749912914356072b38435edfa Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Mon, 23 Nov 2020 14:28:44 +0000
+Subject: btrfs: fix lockdep splat when reading qgroup config on mount
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 3d05cad3c357a2b749912914356072b38435edfa upstream.
+
+Lockdep reported the following splat when running test btrfs/190 from
+fstests:
+
+ [ 9482.126098] ======================================================
+ [ 9482.126184] WARNING: possible circular locking dependency detected
+ [ 9482.126281] 5.10.0-rc4-btrfs-next-73 #1 Not tainted
+ [ 9482.126365] ------------------------------------------------------
+ [ 9482.126456] mount/24187 is trying to acquire lock:
+ [ 9482.126534] ffffa0c869a7dac0 (&fs_info->qgroup_rescan_lock){+.+.}-{3:3}, at: qgroup_rescan_init+0x43/0xf0 [btrfs]
+ [ 9482.126647]
+ but task is already holding lock:
+ [ 9482.126777] ffffa0c892ebd3a0 (btrfs-quota-00){++++}-{3:3}, at: __btrfs_tree_read_lock+0x27/0x120 [btrfs]
+ [ 9482.126886]
+ which lock already depends on the new lock.
+
+ [ 9482.127078]
+ the existing dependency chain (in reverse order) is:
+ [ 9482.127213]
+ -> #1 (btrfs-quota-00){++++}-{3:3}:
+ [ 9482.127366] lock_acquire+0xd8/0x490
+ [ 9482.127436] down_read_nested+0x45/0x220
+ [ 9482.127528] __btrfs_tree_read_lock+0x27/0x120 [btrfs]
+ [ 9482.127613] btrfs_read_lock_root_node+0x41/0x130 [btrfs]
+ [ 9482.127702] btrfs_search_slot+0x514/0xc30 [btrfs]
+ [ 9482.127788] update_qgroup_status_item+0x72/0x140 [btrfs]
+ [ 9482.127877] btrfs_qgroup_rescan_worker+0xde/0x680 [btrfs]
+ [ 9482.127964] btrfs_work_helper+0xf1/0x600 [btrfs]
+ [ 9482.128039] process_one_work+0x24e/0x5e0
+ [ 9482.128110] worker_thread+0x50/0x3b0
+ [ 9482.128181] kthread+0x153/0x170
+ [ 9482.128256] ret_from_fork+0x22/0x30
+ [ 9482.128327]
+ -> #0 (&fs_info->qgroup_rescan_lock){+.+.}-{3:3}:
+ [ 9482.128464] check_prev_add+0x91/0xc60
+ [ 9482.128551] __lock_acquire+0x1740/0x3110
+ [ 9482.128623] lock_acquire+0xd8/0x490
+ [ 9482.130029] __mutex_lock+0xa3/0xb30
+ [ 9482.130590] qgroup_rescan_init+0x43/0xf0 [btrfs]
+ [ 9482.131577] btrfs_read_qgroup_config+0x43a/0x550 [btrfs]
+ [ 9482.132175] open_ctree+0x1228/0x18a0 [btrfs]
+ [ 9482.132756] btrfs_mount_root.cold+0x13/0xed [btrfs]
+ [ 9482.133325] legacy_get_tree+0x30/0x60
+ [ 9482.133866] vfs_get_tree+0x28/0xe0
+ [ 9482.134392] fc_mount+0xe/0x40
+ [ 9482.134908] vfs_kern_mount.part.0+0x71/0x90
+ [ 9482.135428] btrfs_mount+0x13b/0x3e0 [btrfs]
+ [ 9482.135942] legacy_get_tree+0x30/0x60
+ [ 9482.136444] vfs_get_tree+0x28/0xe0
+ [ 9482.136949] path_mount+0x2d7/0xa70
+ [ 9482.137438] do_mount+0x75/0x90
+ [ 9482.137923] __x64_sys_mount+0x8e/0xd0
+ [ 9482.138400] do_syscall_64+0x33/0x80
+ [ 9482.138873] entry_SYSCALL_64_after_hwframe+0x44/0xa9
+ [ 9482.139346]
+ other info that might help us debug this:
+
+ [ 9482.140735] Possible unsafe locking scenario:
+
+ [ 9482.141594] CPU0 CPU1
+ [ 9482.142011] ---- ----
+ [ 9482.142411] lock(btrfs-quota-00);
+ [ 9482.142806] lock(&fs_info->qgroup_rescan_lock);
+ [ 9482.143216] lock(btrfs-quota-00);
+ [ 9482.143629] lock(&fs_info->qgroup_rescan_lock);
+ [ 9482.144056]
+ *** DEADLOCK ***
+
+ [ 9482.145242] 2 locks held by mount/24187:
+ [ 9482.145637] #0: ffffa0c8411c40e8 (&type->s_umount_key#44/1){+.+.}-{3:3}, at: alloc_super+0xb9/0x400
+ [ 9482.146061] #1: ffffa0c892ebd3a0 (btrfs-quota-00){++++}-{3:3}, at: __btrfs_tree_read_lock+0x27/0x120 [btrfs]
+ [ 9482.146509]
+ stack backtrace:
+ [ 9482.147350] CPU: 1 PID: 24187 Comm: mount Not tainted 5.10.0-rc4-btrfs-next-73 #1
+ [ 9482.147788] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
+ [ 9482.148709] Call Trace:
+ [ 9482.149169] dump_stack+0x8d/0xb5
+ [ 9482.149628] check_noncircular+0xff/0x110
+ [ 9482.150090] check_prev_add+0x91/0xc60
+ [ 9482.150561] ? kvm_clock_read+0x14/0x30
+ [ 9482.151017] ? kvm_sched_clock_read+0x5/0x10
+ [ 9482.151470] __lock_acquire+0x1740/0x3110
+ [ 9482.151941] ? __btrfs_tree_read_lock+0x27/0x120 [btrfs]
+ [ 9482.152402] lock_acquire+0xd8/0x490
+ [ 9482.152887] ? qgroup_rescan_init+0x43/0xf0 [btrfs]
+ [ 9482.153354] __mutex_lock+0xa3/0xb30
+ [ 9482.153826] ? qgroup_rescan_init+0x43/0xf0 [btrfs]
+ [ 9482.154301] ? qgroup_rescan_init+0x43/0xf0 [btrfs]
+ [ 9482.154768] ? qgroup_rescan_init+0x43/0xf0 [btrfs]
+ [ 9482.155226] qgroup_rescan_init+0x43/0xf0 [btrfs]
+ [ 9482.155690] btrfs_read_qgroup_config+0x43a/0x550 [btrfs]
+ [ 9482.156160] open_ctree+0x1228/0x18a0 [btrfs]
+ [ 9482.156643] btrfs_mount_root.cold+0x13/0xed [btrfs]
+ [ 9482.157108] ? rcu_read_lock_sched_held+0x5d/0x90
+ [ 9482.157567] ? kfree+0x31f/0x3e0
+ [ 9482.158030] legacy_get_tree+0x30/0x60
+ [ 9482.158489] vfs_get_tree+0x28/0xe0
+ [ 9482.158947] fc_mount+0xe/0x40
+ [ 9482.159403] vfs_kern_mount.part.0+0x71/0x90
+ [ 9482.159875] btrfs_mount+0x13b/0x3e0 [btrfs]
+ [ 9482.160335] ? rcu_read_lock_sched_held+0x5d/0x90
+ [ 9482.160805] ? kfree+0x31f/0x3e0
+ [ 9482.161260] ? legacy_get_tree+0x30/0x60
+ [ 9482.161714] legacy_get_tree+0x30/0x60
+ [ 9482.162166] vfs_get_tree+0x28/0xe0
+ [ 9482.162616] path_mount+0x2d7/0xa70
+ [ 9482.163070] do_mount+0x75/0x90
+ [ 9482.163525] __x64_sys_mount+0x8e/0xd0
+ [ 9482.163986] do_syscall_64+0x33/0x80
+ [ 9482.164437] entry_SYSCALL_64_after_hwframe+0x44/0xa9
+ [ 9482.164902] RIP: 0033:0x7f51e907caaa
+
+This happens because at btrfs_read_qgroup_config() we can call
+qgroup_rescan_init() while holding a read lock on a quota btree leaf,
+acquired by the previous call to btrfs_search_slot_for_read(), and
+qgroup_rescan_init() acquires the mutex qgroup_rescan_lock.
+
+A qgroup rescan worker does the opposite: it acquires the mutex
+qgroup_rescan_lock, at btrfs_qgroup_rescan_worker(), and then tries to
+update the qgroup status item in the quota btree through the call to
+update_qgroup_status_item(). This inversion of locking order
+between the qgroup_rescan_lock mutex and quota btree locks causes the
+splat.
+
+Fix this simply by releasing and freeing the path before calling
+qgroup_rescan_init() at btrfs_read_qgroup_config().
+
+CC: stable@vger.kernel.org # 4.4+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/qgroup.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/btrfs/qgroup.c
++++ b/fs/btrfs/qgroup.c
+@@ -497,13 +497,13 @@ next2:
+ break;
+ }
+ out:
++ btrfs_free_path(path);
+ fs_info->qgroup_flags |= flags;
+ if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
+ clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
+ else if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN &&
+ ret >= 0)
+ ret = qgroup_rescan_init(fs_info, rescan_progress, 0);
+- btrfs_free_path(path);
+
+ if (ret < 0) {
+ ulist_free(fs_info->qgroup_ulist);
--- /dev/null
+From c334730988ee07908ba4eb816ce78d3fe06fecaa Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Wed, 4 Nov 2020 11:07:31 +0000
+Subject: btrfs: fix missing delalloc new bit for new delalloc ranges
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit c334730988ee07908ba4eb816ce78d3fe06fecaa upstream.
+
+When doing a buffered write, through one of the write family syscalls, we
+look for ranges which currently don't have allocated extents and set the
+'delalloc new' bit on them, so that we can report a correct number of used
+blocks to the stat(2) syscall until delalloc is flushed and ordered extents
+complete.
+
+However there are a few other places where we can do a buffered write
+against a range that is mapped to a hole (no extent allocated) and where
+we do not set the 'new delalloc' bit. Those places are:
+
+- Doing a memory mapped write against a hole;
+
+- Cloning an inline extent into a hole starting at file offset 0;
+
+- Calling btrfs_cont_expand() when the i_size of the file is not aligned
+ to the sector size and is located in a hole. For example when cloning
+ to a destination offset beyond EOF.
+
+So after such cases, until the corresponding delalloc range is flushed and
+the respective ordered extents complete, we can report an incorrect number
+of blocks used through the stat(2) syscall.
+
+In some cases we can end up reporting 0 used blocks to stat(2), which is a
+particular bad value to report as it may mislead tools to think a file is
+completely sparse when its i_size is not zero, making them skip reading
+any data, an undesired consequence for tools such as archivers and other
+backup tools, as reported a long time ago in the following thread (and
+other past threads):
+
+ https://lists.gnu.org/archive/html/bug-tar/2016-07/msg00001.html
+
+Example reproducer:
+
+ $ cat reproducer.sh
+ #!/bin/bash
+
+ MNT=/mnt/sdi
+ DEV=/dev/sdi
+
+ mkfs.btrfs -f $DEV > /dev/null
+ # mkfs.xfs -f $DEV > /dev/null
+ # mkfs.ext4 -F $DEV > /dev/null
+ # mkfs.f2fs -f $DEV > /dev/null
+ mount $DEV $MNT
+
+ xfs_io -f -c "truncate 64K" \
+ -c "mmap -w 0 64K" \
+ -c "mwrite -S 0xab 0 64K" \
+ -c "munmap" \
+ $MNT/foo
+
+ blocks_used=$(stat -c %b $MNT/foo)
+ echo "blocks used: $blocks_used"
+
+ if [ $blocks_used -eq 0 ]; then
+ echo "ERROR: blocks used is 0"
+ fi
+
+ umount $DEV
+
+ $ ./reproducer.sh
+ blocks used: 0
+ ERROR: blocks used is 0
+
+So move the logic that decides to set the 'delalloc bit' bit into the
+function btrfs_set_extent_delalloc(), since that is what we use for all
+those missing cases as well as for the cases that currently work well.
+
+This change is also preparatory work for an upcoming patch that fixes
+other problems related to tracking and reporting the number of bytes used
+by an inode.
+
+CC: stable@vger.kernel.org # 4.19+
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/file.c | 57 ------------------------------------------
+ fs/btrfs/inode.c | 58 +++++++++++++++++++++++++++++++++++++++++++
+ fs/btrfs/tests/inode-tests.c | 12 +++++---
+ 3 files changed, 66 insertions(+), 61 deletions(-)
+
+--- a/fs/btrfs/file.c
++++ b/fs/btrfs/file.c
+@@ -452,46 +452,6 @@ static void btrfs_drop_pages(struct page
+ }
+ }
+
+-static int btrfs_find_new_delalloc_bytes(struct btrfs_inode *inode,
+- const u64 start,
+- const u64 len,
+- struct extent_state **cached_state)
+-{
+- u64 search_start = start;
+- const u64 end = start + len - 1;
+-
+- while (search_start < end) {
+- const u64 search_len = end - search_start + 1;
+- struct extent_map *em;
+- u64 em_len;
+- int ret = 0;
+-
+- em = btrfs_get_extent(inode, NULL, 0, search_start, search_len);
+- if (IS_ERR(em))
+- return PTR_ERR(em);
+-
+- if (em->block_start != EXTENT_MAP_HOLE)
+- goto next;
+-
+- em_len = em->len;
+- if (em->start < search_start)
+- em_len -= search_start - em->start;
+- if (em_len > search_len)
+- em_len = search_len;
+-
+- ret = set_extent_bit(&inode->io_tree, search_start,
+- search_start + em_len - 1,
+- EXTENT_DELALLOC_NEW,
+- NULL, cached_state, GFP_NOFS);
+-next:
+- search_start = extent_map_end(em);
+- free_extent_map(em);
+- if (ret)
+- return ret;
+- }
+- return 0;
+-}
+-
+ /*
+ * after copy_from_user, pages need to be dirtied and we need to make
+ * sure holes are created between the current EOF and the start of
+@@ -528,23 +488,6 @@ int btrfs_dirty_pages(struct btrfs_inode
+ EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
+ 0, 0, cached);
+
+- if (!btrfs_is_free_space_inode(inode)) {
+- if (start_pos >= isize &&
+- !(inode->flags & BTRFS_INODE_PREALLOC)) {
+- /*
+- * There can't be any extents following eof in this case
+- * so just set the delalloc new bit for the range
+- * directly.
+- */
+- extra_bits |= EXTENT_DELALLOC_NEW;
+- } else {
+- err = btrfs_find_new_delalloc_bytes(inode, start_pos,
+- num_bytes, cached);
+- if (err)
+- return err;
+- }
+- }
+-
+ err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block,
+ extra_bits, cached);
+ if (err)
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -2262,11 +2262,69 @@ static noinline int add_pending_csums(st
+ return 0;
+ }
+
++static int btrfs_find_new_delalloc_bytes(struct btrfs_inode *inode,
++ const u64 start,
++ const u64 len,
++ struct extent_state **cached_state)
++{
++ u64 search_start = start;
++ const u64 end = start + len - 1;
++
++ while (search_start < end) {
++ const u64 search_len = end - search_start + 1;
++ struct extent_map *em;
++ u64 em_len;
++ int ret = 0;
++
++ em = btrfs_get_extent(inode, NULL, 0, search_start, search_len);
++ if (IS_ERR(em))
++ return PTR_ERR(em);
++
++ if (em->block_start != EXTENT_MAP_HOLE)
++ goto next;
++
++ em_len = em->len;
++ if (em->start < search_start)
++ em_len -= search_start - em->start;
++ if (em_len > search_len)
++ em_len = search_len;
++
++ ret = set_extent_bit(&inode->io_tree, search_start,
++ search_start + em_len - 1,
++ EXTENT_DELALLOC_NEW,
++ NULL, cached_state, GFP_NOFS);
++next:
++ search_start = extent_map_end(em);
++ free_extent_map(em);
++ if (ret)
++ return ret;
++ }
++ return 0;
++}
++
+ int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
+ unsigned int extra_bits,
+ struct extent_state **cached_state)
+ {
+ WARN_ON(PAGE_ALIGNED(end));
++
++ if (start >= i_size_read(&inode->vfs_inode) &&
++ !(inode->flags & BTRFS_INODE_PREALLOC)) {
++ /*
++ * There can't be any extents following eof in this case so just
++ * set the delalloc new bit for the range directly.
++ */
++ extra_bits |= EXTENT_DELALLOC_NEW;
++ } else {
++ int ret;
++
++ ret = btrfs_find_new_delalloc_bytes(inode, start,
++ end + 1 - start,
++ cached_state);
++ if (ret)
++ return ret;
++ }
++
+ return set_extent_delalloc(&inode->io_tree, start, end, extra_bits,
+ cached_state);
+ }
+--- a/fs/btrfs/tests/inode-tests.c
++++ b/fs/btrfs/tests/inode-tests.c
+@@ -986,7 +986,8 @@ static int test_extent_accounting(u32 se
+ ret = clear_extent_bit(&BTRFS_I(inode)->io_tree,
+ BTRFS_MAX_EXTENT_SIZE >> 1,
+ (BTRFS_MAX_EXTENT_SIZE >> 1) + sectorsize - 1,
+- EXTENT_DELALLOC | EXTENT_UPTODATE, 0, 0, NULL);
++ EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
++ EXTENT_UPTODATE, 0, 0, NULL);
+ if (ret) {
+ test_err("clear_extent_bit returned %d", ret);
+ goto out;
+@@ -1053,7 +1054,8 @@ static int test_extent_accounting(u32 se
+ ret = clear_extent_bit(&BTRFS_I(inode)->io_tree,
+ BTRFS_MAX_EXTENT_SIZE + sectorsize,
+ BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1,
+- EXTENT_DELALLOC | EXTENT_UPTODATE, 0, 0, NULL);
++ EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
++ EXTENT_UPTODATE, 0, 0, NULL);
+ if (ret) {
+ test_err("clear_extent_bit returned %d", ret);
+ goto out;
+@@ -1085,7 +1087,8 @@ static int test_extent_accounting(u32 se
+
+ /* Empty */
+ ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
+- EXTENT_DELALLOC | EXTENT_UPTODATE, 0, 0, NULL);
++ EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
++ EXTENT_UPTODATE, 0, 0, NULL);
+ if (ret) {
+ test_err("clear_extent_bit returned %d", ret);
+ goto out;
+@@ -1100,7 +1103,8 @@ static int test_extent_accounting(u32 se
+ out:
+ if (ret)
+ clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
+- EXTENT_DELALLOC | EXTENT_UPTODATE, 0, 0, NULL);
++ EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
++ EXTENT_UPTODATE, 0, 0, NULL);
+ iput(inode);
+ btrfs_free_dummy_root(root);
+ btrfs_free_dummy_fs_info(fs_info);
--- /dev/null
+From 1a49a97df657c63a4e8ffcd1ea9b6ed95581789b Mon Sep 17 00:00:00 2001
+From: Daniel Xu <dxu@dxuuu.xyz>
+Date: Thu, 12 Nov 2020 17:55:06 -0800
+Subject: btrfs: tree-checker: add missing return after error in root_item
+
+From: Daniel Xu <dxu@dxuuu.xyz>
+
+commit 1a49a97df657c63a4e8ffcd1ea9b6ed95581789b upstream.
+
+There's a missing return statement after an error is found in the
+root_item, this can cause further problems when a crafted image triggers
+the error.
+
+Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=210181
+Fixes: 259ee7754b67 ("btrfs: tree-checker: Add ROOT_ITEM check")
+CC: stable@vger.kernel.org # 5.4+
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Daniel Xu <dxu@dxuuu.xyz>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/tree-checker.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/btrfs/tree-checker.c
++++ b/fs/btrfs/tree-checker.c
+@@ -1068,6 +1068,7 @@ static int check_root_item(struct extent
+ "invalid root item size, have %u expect %zu or %u",
+ btrfs_item_size_nr(leaf, slot), sizeof(ri),
+ btrfs_legacy_root_item_size());
++ return -EUCLEAN;
+ }
+
+ /*
--- /dev/null
+From 6d06b0ad94d3dd7e3503d8ad39c39c4634884611 Mon Sep 17 00:00:00 2001
+From: David Sterba <dsterba@suse.com>
+Date: Mon, 16 Nov 2020 19:53:52 +0100
+Subject: btrfs: tree-checker: add missing returns after data_ref alignment checks
+
+From: David Sterba <dsterba@suse.com>
+
+commit 6d06b0ad94d3dd7e3503d8ad39c39c4634884611 upstream.
+
+There are sectorsize alignment checks that are reported but then
+check_extent_data_ref continues. This was not intended, wrong alignment
+is not a minor problem and we should return with error.
+
+CC: stable@vger.kernel.org # 5.4+
+Fixes: 0785a9aacf9d ("btrfs: tree-checker: Add EXTENT_DATA_REF check")
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/tree-checker.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/fs/btrfs/tree-checker.c
++++ b/fs/btrfs/tree-checker.c
+@@ -1424,6 +1424,7 @@ static int check_extent_data_ref(struct
+ "invalid item size, have %u expect aligned to %zu for key type %u",
+ btrfs_item_size_nr(leaf, slot),
+ sizeof(*dref), key->type);
++ return -EUCLEAN;
+ }
+ if (!IS_ALIGNED(key->objectid, leaf->fs_info->sectorsize)) {
+ generic_err(leaf, slot,
+@@ -1452,6 +1453,7 @@ static int check_extent_data_ref(struct
+ extent_err(leaf, slot,
+ "invalid extent data backref offset, have %llu expect aligned to %u",
+ offset, leaf->fs_info->sectorsize);
++ return -EUCLEAN;
+ }
+ }
+ return 0;
--- /dev/null
+From 3d2a9d642512c21a12d19b9250e7a835dcb41a79 Mon Sep 17 00:00:00 2001
+From: Dennis Dalessandro <dennis.dalessandro@cornelisnetworks.com>
+Date: Wed, 25 Nov 2020 16:01:12 -0500
+Subject: IB/hfi1: Ensure correct mm is used at all times
+
+From: Dennis Dalessandro <dennis.dalessandro@cornelisnetworks.com>
+
+commit 3d2a9d642512c21a12d19b9250e7a835dcb41a79 upstream.
+
+Two earlier bug fixes have created a security problem in the hfi1
+driver. One fix aimed to solve an issue where current->mm was not valid
+when closing the hfi1 cdev. It attempted to do this by saving a cached
+value of the current->mm pointer at file open time. This is a problem if
+another process with access to the FD calls in via write() or ioctl() to
+pin pages via the hfi driver. The other fix tried to solve a use after
+free by taking a reference on the mm.
+
+To fix this correctly we use the existing cached value of the mm in the
+mmu notifier. Now we can check in the insert, evict, etc. routines that
+current->mm matched what the notifier was registered for. If not, then
+don't allow access. The register of the mmu notifier will save the mm
+pointer.
+
+Since in do_exit() the exit_mm() is called before exit_files(), which
+would call our close routine a reference is needed on the mm. We rely on
+the mmgrab done by the registration of the notifier, whereas before it was
+explicit. The mmu notifier deregistration happens when the user context is
+torn down, the creation of which triggered the registration.
+
+Also of note is we do not do any explicit work to protect the interval
+tree notifier. It doesn't seem that this is going to be needed since we
+aren't actually doing anything with current->mm. The interval tree
+notifier stuff still has a FIXME noted from a previous commit that will be
+addressed in a follow on patch.
+
+Cc: <stable@vger.kernel.org>
+Fixes: e0cf75deab81 ("IB/hfi1: Fix mm_struct use after free")
+Fixes: 3faa3d9a308e ("IB/hfi1: Make use of mm consistent")
+Link: https://lore.kernel.org/r/20201125210112.104301.51331.stgit@awfm-01.aw.intel.com
+Suggested-by: Jann Horn <jannh@google.com>
+Reported-by: Jason Gunthorpe <jgg@nvidia.com>
+Reviewed-by: Ira Weiny <ira.weiny@intel.com>
+Reviewed-by: Mike Marciniszyn <mike.marciniszyn@cornelisnetworks.com>
+Signed-off-by: Dennis Dalessandro <dennis.dalessandro@cornelisnetworks.com>
+Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/infiniband/hw/hfi1/file_ops.c | 4 -
+ drivers/infiniband/hw/hfi1/hfi.h | 2
+ drivers/infiniband/hw/hfi1/mmu_rb.c | 66 +++++++++++++++---------------
+ drivers/infiniband/hw/hfi1/mmu_rb.h | 16 ++++++-
+ drivers/infiniband/hw/hfi1/user_exp_rcv.c | 12 +++--
+ drivers/infiniband/hw/hfi1/user_exp_rcv.h | 6 ++
+ drivers/infiniband/hw/hfi1/user_sdma.c | 13 +++--
+ drivers/infiniband/hw/hfi1/user_sdma.h | 7 ++-
+ 8 files changed, 78 insertions(+), 48 deletions(-)
+
+--- a/drivers/infiniband/hw/hfi1/file_ops.c
++++ b/drivers/infiniband/hw/hfi1/file_ops.c
+@@ -1,4 +1,5 @@
+ /*
++ * Copyright(c) 2020 Cornelis Networks, Inc.
+ * Copyright(c) 2015-2020 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+@@ -206,8 +207,6 @@ static int hfi1_file_open(struct inode *
+ spin_lock_init(&fd->tid_lock);
+ spin_lock_init(&fd->invalid_lock);
+ fd->rec_cpu_num = -1; /* no cpu affinity by default */
+- fd->mm = current->mm;
+- mmgrab(fd->mm);
+ fd->dd = dd;
+ fp->private_data = fd;
+ return 0;
+@@ -711,7 +710,6 @@ static int hfi1_file_close(struct inode
+
+ deallocate_ctxt(uctxt);
+ done:
+- mmdrop(fdata->mm);
+
+ if (atomic_dec_and_test(&dd->user_refcount))
+ complete(&dd->user_comp);
+--- a/drivers/infiniband/hw/hfi1/hfi.h
++++ b/drivers/infiniband/hw/hfi1/hfi.h
+@@ -1,6 +1,7 @@
+ #ifndef _HFI1_KERNEL_H
+ #define _HFI1_KERNEL_H
+ /*
++ * Copyright(c) 2020 Cornelis Networks, Inc.
+ * Copyright(c) 2015-2020 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+@@ -1451,7 +1452,6 @@ struct hfi1_filedata {
+ u32 invalid_tid_idx;
+ /* protect invalid_tids array and invalid_tid_idx */
+ spinlock_t invalid_lock;
+- struct mm_struct *mm;
+ };
+
+ extern struct xarray hfi1_dev_table;
+--- a/drivers/infiniband/hw/hfi1/mmu_rb.c
++++ b/drivers/infiniband/hw/hfi1/mmu_rb.c
+@@ -1,4 +1,5 @@
+ /*
++ * Copyright(c) 2020 Cornelis Networks, Inc.
+ * Copyright(c) 2016 - 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+@@ -48,23 +49,11 @@
+ #include <linux/rculist.h>
+ #include <linux/mmu_notifier.h>
+ #include <linux/interval_tree_generic.h>
++#include <linux/sched/mm.h>
+
+ #include "mmu_rb.h"
+ #include "trace.h"
+
+-struct mmu_rb_handler {
+- struct mmu_notifier mn;
+- struct rb_root_cached root;
+- void *ops_arg;
+- spinlock_t lock; /* protect the RB tree */
+- struct mmu_rb_ops *ops;
+- struct mm_struct *mm;
+- struct list_head lru_list;
+- struct work_struct del_work;
+- struct list_head del_list;
+- struct workqueue_struct *wq;
+-};
+-
+ static unsigned long mmu_node_start(struct mmu_rb_node *);
+ static unsigned long mmu_node_last(struct mmu_rb_node *);
+ static int mmu_notifier_range_start(struct mmu_notifier *,
+@@ -92,37 +81,36 @@ static unsigned long mmu_node_last(struc
+ return PAGE_ALIGN(node->addr + node->len) - 1;
+ }
+
+-int hfi1_mmu_rb_register(void *ops_arg, struct mm_struct *mm,
++int hfi1_mmu_rb_register(void *ops_arg,
+ struct mmu_rb_ops *ops,
+ struct workqueue_struct *wq,
+ struct mmu_rb_handler **handler)
+ {
+- struct mmu_rb_handler *handlr;
++ struct mmu_rb_handler *h;
+ int ret;
+
+- handlr = kmalloc(sizeof(*handlr), GFP_KERNEL);
+- if (!handlr)
++ h = kmalloc(sizeof(*h), GFP_KERNEL);
++ if (!h)
+ return -ENOMEM;
+
+- handlr->root = RB_ROOT_CACHED;
+- handlr->ops = ops;
+- handlr->ops_arg = ops_arg;
+- INIT_HLIST_NODE(&handlr->mn.hlist);
+- spin_lock_init(&handlr->lock);
+- handlr->mn.ops = &mn_opts;
+- handlr->mm = mm;
+- INIT_WORK(&handlr->del_work, handle_remove);
+- INIT_LIST_HEAD(&handlr->del_list);
+- INIT_LIST_HEAD(&handlr->lru_list);
+- handlr->wq = wq;
++ h->root = RB_ROOT_CACHED;
++ h->ops = ops;
++ h->ops_arg = ops_arg;
++ INIT_HLIST_NODE(&h->mn.hlist);
++ spin_lock_init(&h->lock);
++ h->mn.ops = &mn_opts;
++ INIT_WORK(&h->del_work, handle_remove);
++ INIT_LIST_HEAD(&h->del_list);
++ INIT_LIST_HEAD(&h->lru_list);
++ h->wq = wq;
+
+- ret = mmu_notifier_register(&handlr->mn, handlr->mm);
++ ret = mmu_notifier_register(&h->mn, current->mm);
+ if (ret) {
+- kfree(handlr);
++ kfree(h);
+ return ret;
+ }
+
+- *handler = handlr;
++ *handler = h;
+ return 0;
+ }
+
+@@ -134,7 +122,7 @@ void hfi1_mmu_rb_unregister(struct mmu_r
+ struct list_head del_list;
+
+ /* Unregister first so we don't get any more notifications. */
+- mmu_notifier_unregister(&handler->mn, handler->mm);
++ mmu_notifier_unregister(&handler->mn, handler->mn.mm);
+
+ /*
+ * Make sure the wq delete handler is finished running. It will not
+@@ -166,6 +154,10 @@ int hfi1_mmu_rb_insert(struct mmu_rb_han
+ int ret = 0;
+
+ trace_hfi1_mmu_rb_insert(mnode->addr, mnode->len);
++
++ if (current->mm != handler->mn.mm)
++ return -EPERM;
++
+ spin_lock_irqsave(&handler->lock, flags);
+ node = __mmu_rb_search(handler, mnode->addr, mnode->len);
+ if (node) {
+@@ -180,6 +172,7 @@ int hfi1_mmu_rb_insert(struct mmu_rb_han
+ __mmu_int_rb_remove(mnode, &handler->root);
+ list_del(&mnode->list); /* remove from LRU list */
+ }
++ mnode->handler = handler;
+ unlock:
+ spin_unlock_irqrestore(&handler->lock, flags);
+ return ret;
+@@ -217,6 +210,9 @@ bool hfi1_mmu_rb_remove_unless_exact(str
+ unsigned long flags;
+ bool ret = false;
+
++ if (current->mm != handler->mn.mm)
++ return ret;
++
+ spin_lock_irqsave(&handler->lock, flags);
+ node = __mmu_rb_search(handler, addr, len);
+ if (node) {
+@@ -239,6 +235,9 @@ void hfi1_mmu_rb_evict(struct mmu_rb_han
+ unsigned long flags;
+ bool stop = false;
+
++ if (current->mm != handler->mn.mm)
++ return;
++
+ INIT_LIST_HEAD(&del_list);
+
+ spin_lock_irqsave(&handler->lock, flags);
+@@ -272,6 +271,9 @@ void hfi1_mmu_rb_remove(struct mmu_rb_ha
+ {
+ unsigned long flags;
+
++ if (current->mm != handler->mn.mm)
++ return;
++
+ /* Validity of handler and node pointers has been checked by caller. */
+ trace_hfi1_mmu_rb_remove(node->addr, node->len);
+ spin_lock_irqsave(&handler->lock, flags);
+--- a/drivers/infiniband/hw/hfi1/mmu_rb.h
++++ b/drivers/infiniband/hw/hfi1/mmu_rb.h
+@@ -1,4 +1,5 @@
+ /*
++ * Copyright(c) 2020 Cornelis Networks, Inc.
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+@@ -54,6 +55,7 @@ struct mmu_rb_node {
+ unsigned long len;
+ unsigned long __last;
+ struct rb_node node;
++ struct mmu_rb_handler *handler;
+ struct list_head list;
+ };
+
+@@ -71,7 +73,19 @@ struct mmu_rb_ops {
+ void *evict_arg, bool *stop);
+ };
+
+-int hfi1_mmu_rb_register(void *ops_arg, struct mm_struct *mm,
++struct mmu_rb_handler {
++ struct mmu_notifier mn;
++ struct rb_root_cached root;
++ void *ops_arg;
++ spinlock_t lock; /* protect the RB tree */
++ struct mmu_rb_ops *ops;
++ struct list_head lru_list;
++ struct work_struct del_work;
++ struct list_head del_list;
++ struct workqueue_struct *wq;
++};
++
++int hfi1_mmu_rb_register(void *ops_arg,
+ struct mmu_rb_ops *ops,
+ struct workqueue_struct *wq,
+ struct mmu_rb_handler **handler);
+--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c
++++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
+@@ -1,4 +1,5 @@
+ /*
++ * Copyright(c) 2020 Cornelis Networks, Inc.
+ * Copyright(c) 2015-2018 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+@@ -173,15 +174,18 @@ static void unpin_rcv_pages(struct hfi1_
+ {
+ struct page **pages;
+ struct hfi1_devdata *dd = fd->uctxt->dd;
++ struct mm_struct *mm;
+
+ if (mapped) {
+ pci_unmap_single(dd->pcidev, node->dma_addr,
+ node->npages * PAGE_SIZE, PCI_DMA_FROMDEVICE);
+ pages = &node->pages[idx];
++ mm = mm_from_tid_node(node);
+ } else {
+ pages = &tidbuf->pages[idx];
++ mm = current->mm;
+ }
+- hfi1_release_user_pages(fd->mm, pages, npages, mapped);
++ hfi1_release_user_pages(mm, pages, npages, mapped);
+ fd->tid_n_pinned -= npages;
+ }
+
+@@ -216,12 +220,12 @@ static int pin_rcv_pages(struct hfi1_fil
+ * pages, accept the amount pinned so far and program only that.
+ * User space knows how to deal with partially programmed buffers.
+ */
+- if (!hfi1_can_pin_pages(dd, fd->mm, fd->tid_n_pinned, npages)) {
++ if (!hfi1_can_pin_pages(dd, current->mm, fd->tid_n_pinned, npages)) {
+ kfree(pages);
+ return -ENOMEM;
+ }
+
+- pinned = hfi1_acquire_user_pages(fd->mm, vaddr, npages, true, pages);
++ pinned = hfi1_acquire_user_pages(current->mm, vaddr, npages, true, pages);
+ if (pinned <= 0) {
+ kfree(pages);
+ return pinned;
+@@ -756,7 +760,7 @@ static int set_rcvarray_entry(struct hfi
+
+ if (fd->use_mn) {
+ ret = mmu_interval_notifier_insert(
+- &node->notifier, fd->mm,
++ &node->notifier, current->mm,
+ tbuf->vaddr + (pageidx * PAGE_SIZE), npages * PAGE_SIZE,
+ &tid_mn_ops);
+ if (ret)
+--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.h
++++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.h
+@@ -1,6 +1,7 @@
+ #ifndef _HFI1_USER_EXP_RCV_H
+ #define _HFI1_USER_EXP_RCV_H
+ /*
++ * Copyright(c) 2020 - Cornelis Networks, Inc.
+ * Copyright(c) 2015 - 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+@@ -95,4 +96,9 @@ int hfi1_user_exp_rcv_clear(struct hfi1_
+ int hfi1_user_exp_rcv_invalid(struct hfi1_filedata *fd,
+ struct hfi1_tid_info *tinfo);
+
++static inline struct mm_struct *mm_from_tid_node(struct tid_rb_node *node)
++{
++ return node->notifier.mm;
++}
++
+ #endif /* _HFI1_USER_EXP_RCV_H */
+--- a/drivers/infiniband/hw/hfi1/user_sdma.c
++++ b/drivers/infiniband/hw/hfi1/user_sdma.c
+@@ -1,4 +1,5 @@
+ /*
++ * Copyright(c) 2020 - Cornelis Networks, Inc.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+@@ -188,7 +189,6 @@ int hfi1_user_sdma_alloc_queues(struct h
+ atomic_set(&pq->n_reqs, 0);
+ init_waitqueue_head(&pq->wait);
+ atomic_set(&pq->n_locked, 0);
+- pq->mm = fd->mm;
+
+ iowait_init(&pq->busy, 0, NULL, NULL, defer_packet_queue,
+ activate_packet_queue, NULL, NULL);
+@@ -230,7 +230,7 @@ int hfi1_user_sdma_alloc_queues(struct h
+
+ cq->nentries = hfi1_sdma_comp_ring_size;
+
+- ret = hfi1_mmu_rb_register(pq, pq->mm, &sdma_rb_ops, dd->pport->hfi1_wq,
++ ret = hfi1_mmu_rb_register(pq, &sdma_rb_ops, dd->pport->hfi1_wq,
+ &pq->handler);
+ if (ret) {
+ dd_dev_err(dd, "Failed to register with MMU %d", ret);
+@@ -980,13 +980,13 @@ static int pin_sdma_pages(struct user_sd
+
+ npages -= node->npages;
+ retry:
+- if (!hfi1_can_pin_pages(pq->dd, pq->mm,
++ if (!hfi1_can_pin_pages(pq->dd, current->mm,
+ atomic_read(&pq->n_locked), npages)) {
+ cleared = sdma_cache_evict(pq, npages);
+ if (cleared >= npages)
+ goto retry;
+ }
+- pinned = hfi1_acquire_user_pages(pq->mm,
++ pinned = hfi1_acquire_user_pages(current->mm,
+ ((unsigned long)iovec->iov.iov_base +
+ (node->npages * PAGE_SIZE)), npages, 0,
+ pages + node->npages);
+@@ -995,7 +995,7 @@ retry:
+ return pinned;
+ }
+ if (pinned != npages) {
+- unpin_vector_pages(pq->mm, pages, node->npages, pinned);
++ unpin_vector_pages(current->mm, pages, node->npages, pinned);
+ return -EFAULT;
+ }
+ kfree(node->pages);
+@@ -1008,7 +1008,8 @@ retry:
+ static void unpin_sdma_pages(struct sdma_mmu_node *node)
+ {
+ if (node->npages) {
+- unpin_vector_pages(node->pq->mm, node->pages, 0, node->npages);
++ unpin_vector_pages(mm_from_sdma_node(node), node->pages, 0,
++ node->npages);
+ atomic_sub(node->npages, &node->pq->n_locked);
+ }
+ }
+--- a/drivers/infiniband/hw/hfi1/user_sdma.h
++++ b/drivers/infiniband/hw/hfi1/user_sdma.h
+@@ -1,6 +1,7 @@
+ #ifndef _HFI1_USER_SDMA_H
+ #define _HFI1_USER_SDMA_H
+ /*
++ * Copyright(c) 2020 - Cornelis Networks, Inc.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+@@ -133,7 +134,6 @@ struct hfi1_user_sdma_pkt_q {
+ unsigned long unpinned;
+ struct mmu_rb_handler *handler;
+ atomic_t n_locked;
+- struct mm_struct *mm;
+ };
+
+ struct hfi1_user_sdma_comp_q {
+@@ -250,4 +250,9 @@ int hfi1_user_sdma_process_request(struc
+ struct iovec *iovec, unsigned long dim,
+ unsigned long *count);
+
++static inline struct mm_struct *mm_from_sdma_node(struct sdma_mmu_node *node)
++{
++ return node->rb.handler->mn.mm;
++}
++
+ #endif /* _HFI1_USER_SDMA_H */
--- /dev/null
+From 2ed381439e89fa6d1a0839ef45ccd45d99d8e915 Mon Sep 17 00:00:00 2001
+From: Shiraz Saleem <shiraz.saleem@intel.com>
+Date: Tue, 24 Nov 2020 18:56:16 -0600
+Subject: RDMA/i40iw: Address an mmap handler exploit in i40iw
+
+From: Shiraz Saleem <shiraz.saleem@intel.com>
+
+commit 2ed381439e89fa6d1a0839ef45ccd45d99d8e915 upstream.
+
+i40iw_mmap manipulates the vma->vm_pgoff to differentiate a push page mmap
+vs a doorbell mmap, and uses it to compute the pfn in remap_pfn_range
+without any validation. This is vulnerable to an mmap exploit as described
+in: https://lore.kernel.org/r/20201119093523.7588-1-zhudi21@huawei.com
+
+The push feature is disabled in the driver currently and therefore no push
+mmaps are issued from user-space. The feature does not work as expected in
+the x722 product.
+
+Remove the push module parameter and all VMA attribute manipulations for
+this feature in i40iw_mmap. Update i40iw_mmap to only allow DB user
+mmapings at offset = 0. Check vm_pgoff for zero and if the mmaps are bound
+to a single page.
+
+Cc: <stable@kernel.org>
+Fixes: d37498417947 ("i40iw: add files for iwarp interface")
+Link: https://lore.kernel.org/r/20201125005616.1800-2-shiraz.saleem@intel.com
+Reported-by: Di Zhu <zhudi21@huawei.com>
+Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
+Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/infiniband/hw/i40iw/i40iw_main.c | 5 ----
+ drivers/infiniband/hw/i40iw/i40iw_verbs.c | 37 +++++-------------------------
+ 2 files changed, 7 insertions(+), 35 deletions(-)
+
+--- a/drivers/infiniband/hw/i40iw/i40iw_main.c
++++ b/drivers/infiniband/hw/i40iw/i40iw_main.c
+@@ -54,10 +54,6 @@
+ #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \
+ __stringify(DRV_VERSION_MINOR) "." __stringify(DRV_VERSION_BUILD)
+
+-static int push_mode;
+-module_param(push_mode, int, 0644);
+-MODULE_PARM_DESC(push_mode, "Low latency mode: 0=disabled (default), 1=enabled)");
+-
+ static int debug;
+ module_param(debug, int, 0644);
+ MODULE_PARM_DESC(debug, "debug flags: 0=disabled (default), 0x7fffffff=all");
+@@ -1580,7 +1576,6 @@ static enum i40iw_status_code i40iw_setu
+ if (status)
+ goto exit;
+ iwdev->obj_next = iwdev->obj_mem;
+- iwdev->push_mode = push_mode;
+
+ init_waitqueue_head(&iwdev->vchnl_waitq);
+ init_waitqueue_head(&dev->vf_reqs);
+--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
++++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+@@ -167,39 +167,16 @@ static void i40iw_dealloc_ucontext(struc
+ */
+ static int i40iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+ {
+- struct i40iw_ucontext *ucontext;
+- u64 db_addr_offset, push_offset, pfn;
++ struct i40iw_ucontext *ucontext = to_ucontext(context);
++ u64 dbaddr;
+
+- ucontext = to_ucontext(context);
+- if (ucontext->iwdev->sc_dev.is_pf) {
+- db_addr_offset = I40IW_DB_ADDR_OFFSET;
+- push_offset = I40IW_PUSH_OFFSET;
+- if (vma->vm_pgoff)
+- vma->vm_pgoff += I40IW_PF_FIRST_PUSH_PAGE_INDEX - 1;
+- } else {
+- db_addr_offset = I40IW_VF_DB_ADDR_OFFSET;
+- push_offset = I40IW_VF_PUSH_OFFSET;
+- if (vma->vm_pgoff)
+- vma->vm_pgoff += I40IW_VF_FIRST_PUSH_PAGE_INDEX - 1;
+- }
++ if (vma->vm_pgoff || vma->vm_end - vma->vm_start != PAGE_SIZE)
++ return -EINVAL;
+
+- vma->vm_pgoff += db_addr_offset >> PAGE_SHIFT;
++ dbaddr = I40IW_DB_ADDR_OFFSET + pci_resource_start(ucontext->iwdev->ldev->pcidev, 0);
+
+- if (vma->vm_pgoff == (db_addr_offset >> PAGE_SHIFT)) {
+- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+- } else {
+- if ((vma->vm_pgoff - (push_offset >> PAGE_SHIFT)) % 2)
+- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+- else
+- vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+- }
+-
+- pfn = vma->vm_pgoff +
+- (pci_resource_start(ucontext->iwdev->ldev->pcidev, 0) >>
+- PAGE_SHIFT);
+-
+- return rdma_user_mmap_io(context, vma, pfn, PAGE_SIZE,
+- vma->vm_page_prot, NULL);
++ return rdma_user_mmap_io(context, vma, dbaddr >> PAGE_SHIFT, PAGE_SIZE,
++ pgprot_noncached(vma->vm_page_prot), NULL);
+ }
+
+ /**
spi-bcm-qspi-fix-use-after-free-on-unbind.patch
spi-bcm2835-fix-use-after-free-on-unbind.patch
ipv4-use-is_enabled-instead-of-ifdef.patch
+ib-hfi1-ensure-correct-mm-is-used-at-all-times.patch
+rdma-i40iw-address-an-mmap-handler-exploit-in-i40iw.patch
+btrfs-fix-missing-delalloc-new-bit-for-new-delalloc-ranges.patch
+btrfs-tree-checker-add-missing-return-after-error-in-root_item.patch
+btrfs-tree-checker-add-missing-returns-after-data_ref-alignment-checks.patch
+btrfs-don-t-access-possibly-stale-fs_info-data-for-printing-duplicate-device.patch
+btrfs-fix-lockdep-splat-when-reading-qgroup-config-on-mount.patch