From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sat, 28 Nov 2020 12:38:41 +0000 (+0100)
Subject: 5.9-stable patches
X-Git-Tag: v4.4.247~41
X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=06d3ca5474323d7adfefbe76a53b32caa2488c9e;p=thirdparty%2Fkernel%2Fstable-queue.git

5.9-stable patches

added patches:
	btrfs-don-t-access-possibly-stale-fs_info-data-for-printing-duplicate-device.patch
	btrfs-fix-lockdep-splat-when-reading-qgroup-config-on-mount.patch
	btrfs-fix-missing-delalloc-new-bit-for-new-delalloc-ranges.patch
	btrfs-tree-checker-add-missing-return-after-error-in-root_item.patch
	btrfs-tree-checker-add-missing-returns-after-data_ref-alignment-checks.patch
	ib-hfi1-ensure-correct-mm-is-used-at-all-times.patch
	rdma-i40iw-address-an-mmap-handler-exploit-in-i40iw.patch
---

diff --git a/queue-4.4/series b/queue-4.4/series
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/queue-5.9/btrfs-don-t-access-possibly-stale-fs_info-data-for-printing-duplicate-device.patch b/queue-5.9/btrfs-don-t-access-possibly-stale-fs_info-data-for-printing-duplicate-device.patch
new file mode 100644
index 00000000000..4f185adea87
--- /dev/null
+++ b/queue-5.9/btrfs-don-t-access-possibly-stale-fs_info-data-for-printing-duplicate-device.patch
@@ -0,0 +1,171 @@
+From 0697d9a610998b8bdee6b2390836cb2391d8fd1a Mon Sep 17 00:00:00 2001
+From: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Date: Wed, 18 Nov 2020 18:03:26 +0900
+Subject: btrfs: don't access possibly stale fs_info data for printing duplicate device
+
+From: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+
+commit 0697d9a610998b8bdee6b2390836cb2391d8fd1a upstream.
+
+Syzbot reported a possible use-after-free when printing a duplicate device
+warning device_list_add().
+
+At this point it can happen that a btrfs_device::fs_info is not correctly
+setup yet, so we're accessing stale data, when printing the warning
+message using the btrfs_printk() wrappers.
+
+  ==================================================================
+  BUG: KASAN: use-after-free in btrfs_printk+0x3eb/0x435 fs/btrfs/super.c:245
+  Read of size 8 at addr ffff8880878e06a8 by task syz-executor225/7068
+
+  CPU: 1 PID: 7068 Comm: syz-executor225 Not tainted 5.9.0-rc5-syzkaller #0
+  Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+  Call Trace:
+   __dump_stack lib/dump_stack.c:77 [inline]
+   dump_stack+0x1d6/0x29e lib/dump_stack.c:118
+   print_address_description+0x66/0x620 mm/kasan/report.c:383
+   __kasan_report mm/kasan/report.c:513 [inline]
+   kasan_report+0x132/0x1d0 mm/kasan/report.c:530
+   btrfs_printk+0x3eb/0x435 fs/btrfs/super.c:245
+   device_list_add+0x1a88/0x1d60 fs/btrfs/volumes.c:943
+   btrfs_scan_one_device+0x196/0x490 fs/btrfs/volumes.c:1359
+   btrfs_mount_root+0x48f/0xb60 fs/btrfs/super.c:1634
+   legacy_get_tree+0xea/0x180 fs/fs_context.c:592
+   vfs_get_tree+0x88/0x270 fs/super.c:1547
+   fc_mount fs/namespace.c:978 [inline]
+   vfs_kern_mount+0xc9/0x160 fs/namespace.c:1008
+   btrfs_mount+0x33c/0xae0 fs/btrfs/super.c:1732
+   legacy_get_tree+0xea/0x180 fs/fs_context.c:592
+   vfs_get_tree+0x88/0x270 fs/super.c:1547
+   do_new_mount fs/namespace.c:2875 [inline]
+   path_mount+0x179d/0x29e0 fs/namespace.c:3192
+   do_mount fs/namespace.c:3205 [inline]
+   __do_sys_mount fs/namespace.c:3413 [inline]
+   __se_sys_mount+0x126/0x180 fs/namespace.c:3390
+   do_syscall_64+0x31/0x70 arch/x86/entry/common.c:46
+   entry_SYSCALL_64_after_hwframe+0x44/0xa9
+  RIP: 0033:0x44840a
+  RSP: 002b:00007ffedfffd608 EFLAGS: 00000293 ORIG_RAX: 00000000000000a5
+  RAX: ffffffffffffffda RBX: 00007ffedfffd670 RCX: 000000000044840a
+  RDX: 0000000020000000 RSI: 0000000020000100 RDI: 00007ffedfffd630
+  RBP: 00007ffedfffd630 R08: 00007ffedfffd670 R09: 0000000000000000
+  R10: 0000000000000000 R11: 0000000000000293 R12: 000000000000001a
+  R13: 0000000000000004 R14: 0000000000000003 R15: 0000000000000003
+
+  Allocated by task 6945:
+   kasan_save_stack mm/kasan/common.c:48 [inline]
+   kasan_set_track mm/kasan/common.c:56 [inline]
+   __kasan_kmalloc+0x100/0x130 mm/kasan/common.c:461
+   kmalloc_node include/linux/slab.h:577 [inline]
+   kvmalloc_node+0x81/0x110 mm/util.c:574
+   kvmalloc include/linux/mm.h:757 [inline]
+   kvzalloc include/linux/mm.h:765 [inline]
+   btrfs_mount_root+0xd0/0xb60 fs/btrfs/super.c:1613
+   legacy_get_tree+0xea/0x180 fs/fs_context.c:592
+   vfs_get_tree+0x88/0x270 fs/super.c:1547
+   fc_mount fs/namespace.c:978 [inline]
+   vfs_kern_mount+0xc9/0x160 fs/namespace.c:1008
+   btrfs_mount+0x33c/0xae0 fs/btrfs/super.c:1732
+   legacy_get_tree+0xea/0x180 fs/fs_context.c:592
+   vfs_get_tree+0x88/0x270 fs/super.c:1547
+   do_new_mount fs/namespace.c:2875 [inline]
+   path_mount+0x179d/0x29e0 fs/namespace.c:3192
+   do_mount fs/namespace.c:3205 [inline]
+   __do_sys_mount fs/namespace.c:3413 [inline]
+   __se_sys_mount+0x126/0x180 fs/namespace.c:3390
+   do_syscall_64+0x31/0x70 arch/x86/entry/common.c:46
+   entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+  Freed by task 6945:
+   kasan_save_stack mm/kasan/common.c:48 [inline]
+   kasan_set_track+0x3d/0x70 mm/kasan/common.c:56
+   kasan_set_free_info+0x17/0x30 mm/kasan/generic.c:355
+   __kasan_slab_free+0xdd/0x110 mm/kasan/common.c:422
+   __cache_free mm/slab.c:3418 [inline]
+   kfree+0x113/0x200 mm/slab.c:3756
+   deactivate_locked_super+0xa7/0xf0 fs/super.c:335
+   btrfs_mount_root+0x72b/0xb60 fs/btrfs/super.c:1678
+   legacy_get_tree+0xea/0x180 fs/fs_context.c:592
+   vfs_get_tree+0x88/0x270 fs/super.c:1547
+   fc_mount fs/namespace.c:978 [inline]
+   vfs_kern_mount+0xc9/0x160 fs/namespace.c:1008
+   btrfs_mount+0x33c/0xae0 fs/btrfs/super.c:1732
+   legacy_get_tree+0xea/0x180 fs/fs_context.c:592
+   vfs_get_tree+0x88/0x270 fs/super.c:1547
+   do_new_mount fs/namespace.c:2875 [inline]
+   path_mount+0x179d/0x29e0 fs/namespace.c:3192
+   do_mount fs/namespace.c:3205 [inline]
+   __do_sys_mount fs/namespace.c:3413 [inline]
+   __se_sys_mount+0x126/0x180 fs/namespace.c:3390
+   do_syscall_64+0x31/0x70 arch/x86/entry/common.c:46
+   entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+  The buggy address belongs to the object at ffff8880878e0000
+   which belongs to the cache kmalloc-16k of size 16384
+  The buggy address is located 1704 bytes inside of
+   16384-byte region [ffff8880878e0000, ffff8880878e4000)
+  The buggy address belongs to the page:
+  page:0000000060704f30 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x878e0
+  head:0000000060704f30 order:3 compound_mapcount:0 compound_pincount:0
+  flags: 0xfffe0000010200(slab|head)
+  raw: 00fffe0000010200 ffffea00028e9a08 ffffea00021e3608 ffff8880aa440b00
+  raw: 0000000000000000 ffff8880878e0000 0000000100000001 0000000000000000
+  page dumped because: kasan: bad access detected
+
+  Memory state around the buggy address:
+   ffff8880878e0580: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+   ffff8880878e0600: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+  >ffff8880878e0680: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+				    ^
+   ffff8880878e0700: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+   ffff8880878e0780: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+  ==================================================================
+
+The syzkaller reproducer for this use-after-free crafts a filesystem image
+and loop mounts it twice in a loop. The mount will fail as the crafted
+image has an invalid chunk tree. When this happens btrfs_mount_root() will
+call deactivate_locked_super(), which then cleans up fs_info and
+fs_info::sb. If a second thread now adds the same block-device to the
+filesystem, it will get detected as a duplicate device and
+device_list_add() will reject the duplicate and print a warning. But as
+the fs_info pointer passed in is non-NULL this will result in a
+use-after-free.
+
+Instead of printing possibly uninitialized or already freed memory in
+btrfs_printk(), explicitly pass in a NULL fs_info so the printing of the
+device name will be skipped altogether.
+
+There was a slightly different approach discussed in
+https://lore.kernel.org/linux-btrfs/20200114060920.4527-1-anand.jain@oracle.com/t/#u
+
+Link: https://lore.kernel.org/linux-btrfs/000000000000c9e14b05afcc41ba@google.com
+Reported-by: syzbot+582e66e5edf36a22c7b0@syzkaller.appspotmail.com
+CC: stable@vger.kernel.org # 4.19+
+Reviewed-by: Nikolay Borisov <nborisov@suse.com>
+Reviewed-by: Anand Jain <anand.jain@oracle.com>
+Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/volumes.c |    8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/volumes.c
++++ b/fs/btrfs/volumes.c
+@@ -941,7 +941,13 @@ static noinline struct btrfs_device *dev
+ 			if (device->bdev != path_bdev) {
+ 				bdput(path_bdev);
+ 				mutex_unlock(&fs_devices->device_list_mutex);
+-				btrfs_warn_in_rcu(device->fs_info,
++				/*
++				 * device->fs_info may not be reliable here, so
++				 * pass in a NULL instead. This avoids a
++				 * possible use-after-free when the fs_info and
++				 * fs_info->sb are already torn down.
++				 */
++				btrfs_warn_in_rcu(NULL,
+ 	"duplicate device %s devid %llu generation %llu scanned by %s (%d)",
+ 						  path, devid, found_transid,
+ 						  current->comm,
diff --git a/queue-5.9/btrfs-fix-lockdep-splat-when-reading-qgroup-config-on-mount.patch b/queue-5.9/btrfs-fix-lockdep-splat-when-reading-qgroup-config-on-mount.patch
new file mode 100644
index 00000000000..07f87918784
--- /dev/null
+++ b/queue-5.9/btrfs-fix-lockdep-splat-when-reading-qgroup-config-on-mount.patch
@@ -0,0 +1,162 @@
+From 3d05cad3c357a2b749912914356072b38435edfa Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Mon, 23 Nov 2020 14:28:44 +0000
+Subject: btrfs: fix lockdep splat when reading qgroup config on mount
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 3d05cad3c357a2b749912914356072b38435edfa upstream.
+
+Lockdep reported the following splat when running test btrfs/190 from
+fstests:
+
+  [ 9482.126098] ======================================================
+  [ 9482.126184] WARNING: possible circular locking dependency detected
+  [ 9482.126281] 5.10.0-rc4-btrfs-next-73 #1 Not tainted
+  [ 9482.126365] ------------------------------------------------------
+  [ 9482.126456] mount/24187 is trying to acquire lock:
+  [ 9482.126534] ffffa0c869a7dac0 (&fs_info->qgroup_rescan_lock){+.+.}-{3:3}, at: qgroup_rescan_init+0x43/0xf0 [btrfs]
+  [ 9482.126647]
+		 but task is already holding lock:
+  [ 9482.126777] ffffa0c892ebd3a0 (btrfs-quota-00){++++}-{3:3}, at: __btrfs_tree_read_lock+0x27/0x120 [btrfs]
+  [ 9482.126886]
+		 which lock already depends on the new lock.
+
+  [ 9482.127078]
+		 the existing dependency chain (in reverse order) is:
+  [ 9482.127213]
+		 -> #1 (btrfs-quota-00){++++}-{3:3}:
+  [ 9482.127366]        lock_acquire+0xd8/0x490
+  [ 9482.127436]        down_read_nested+0x45/0x220
+  [ 9482.127528]        __btrfs_tree_read_lock+0x27/0x120 [btrfs]
+  [ 9482.127613]        btrfs_read_lock_root_node+0x41/0x130 [btrfs]
+  [ 9482.127702]        btrfs_search_slot+0x514/0xc30 [btrfs]
+  [ 9482.127788]        update_qgroup_status_item+0x72/0x140 [btrfs]
+  [ 9482.127877]        btrfs_qgroup_rescan_worker+0xde/0x680 [btrfs]
+  [ 9482.127964]        btrfs_work_helper+0xf1/0x600 [btrfs]
+  [ 9482.128039]        process_one_work+0x24e/0x5e0
+  [ 9482.128110]        worker_thread+0x50/0x3b0
+  [ 9482.128181]        kthread+0x153/0x170
+  [ 9482.128256]        ret_from_fork+0x22/0x30
+  [ 9482.128327]
+		 -> #0 (&fs_info->qgroup_rescan_lock){+.+.}-{3:3}:
+  [ 9482.128464]        check_prev_add+0x91/0xc60
+  [ 9482.128551]        __lock_acquire+0x1740/0x3110
+  [ 9482.128623]        lock_acquire+0xd8/0x490
+  [ 9482.130029]        __mutex_lock+0xa3/0xb30
+  [ 9482.130590]        qgroup_rescan_init+0x43/0xf0 [btrfs]
+  [ 9482.131577]        btrfs_read_qgroup_config+0x43a/0x550 [btrfs]
+  [ 9482.132175]        open_ctree+0x1228/0x18a0 [btrfs]
+  [ 9482.132756]        btrfs_mount_root.cold+0x13/0xed [btrfs]
+  [ 9482.133325]        legacy_get_tree+0x30/0x60
+  [ 9482.133866]        vfs_get_tree+0x28/0xe0
+  [ 9482.134392]        fc_mount+0xe/0x40
+  [ 9482.134908]        vfs_kern_mount.part.0+0x71/0x90
+  [ 9482.135428]        btrfs_mount+0x13b/0x3e0 [btrfs]
+  [ 9482.135942]        legacy_get_tree+0x30/0x60
+  [ 9482.136444]        vfs_get_tree+0x28/0xe0
+  [ 9482.136949]        path_mount+0x2d7/0xa70
+  [ 9482.137438]        do_mount+0x75/0x90
+  [ 9482.137923]        __x64_sys_mount+0x8e/0xd0
+  [ 9482.138400]        do_syscall_64+0x33/0x80
+  [ 9482.138873]        entry_SYSCALL_64_after_hwframe+0x44/0xa9
+  [ 9482.139346]
+		 other info that might help us debug this:
+
+  [ 9482.140735]  Possible unsafe locking scenario:
+
+  [ 9482.141594]        CPU0                    CPU1
+  [ 9482.142011]        ----                    ----
+  [ 9482.142411]   lock(btrfs-quota-00);
+  [ 9482.142806]                                lock(&fs_info->qgroup_rescan_lock);
+  [ 9482.143216]                                lock(btrfs-quota-00);
+  [ 9482.143629]   lock(&fs_info->qgroup_rescan_lock);
+  [ 9482.144056]
+		  *** DEADLOCK ***
+
+  [ 9482.145242] 2 locks held by mount/24187:
+  [ 9482.145637]  #0: ffffa0c8411c40e8 (&type->s_umount_key#44/1){+.+.}-{3:3}, at: alloc_super+0xb9/0x400
+  [ 9482.146061]  #1: ffffa0c892ebd3a0 (btrfs-quota-00){++++}-{3:3}, at: __btrfs_tree_read_lock+0x27/0x120 [btrfs]
+  [ 9482.146509]
+		 stack backtrace:
+  [ 9482.147350] CPU: 1 PID: 24187 Comm: mount Not tainted 5.10.0-rc4-btrfs-next-73 #1
+  [ 9482.147788] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
+  [ 9482.148709] Call Trace:
+  [ 9482.149169]  dump_stack+0x8d/0xb5
+  [ 9482.149628]  check_noncircular+0xff/0x110
+  [ 9482.150090]  check_prev_add+0x91/0xc60
+  [ 9482.150561]  ? kvm_clock_read+0x14/0x30
+  [ 9482.151017]  ? kvm_sched_clock_read+0x5/0x10
+  [ 9482.151470]  __lock_acquire+0x1740/0x3110
+  [ 9482.151941]  ? __btrfs_tree_read_lock+0x27/0x120 [btrfs]
+  [ 9482.152402]  lock_acquire+0xd8/0x490
+  [ 9482.152887]  ? qgroup_rescan_init+0x43/0xf0 [btrfs]
+  [ 9482.153354]  __mutex_lock+0xa3/0xb30
+  [ 9482.153826]  ? qgroup_rescan_init+0x43/0xf0 [btrfs]
+  [ 9482.154301]  ? qgroup_rescan_init+0x43/0xf0 [btrfs]
+  [ 9482.154768]  ? qgroup_rescan_init+0x43/0xf0 [btrfs]
+  [ 9482.155226]  qgroup_rescan_init+0x43/0xf0 [btrfs]
+  [ 9482.155690]  btrfs_read_qgroup_config+0x43a/0x550 [btrfs]
+  [ 9482.156160]  open_ctree+0x1228/0x18a0 [btrfs]
+  [ 9482.156643]  btrfs_mount_root.cold+0x13/0xed [btrfs]
+  [ 9482.157108]  ? rcu_read_lock_sched_held+0x5d/0x90
+  [ 9482.157567]  ? kfree+0x31f/0x3e0
+  [ 9482.158030]  legacy_get_tree+0x30/0x60
+  [ 9482.158489]  vfs_get_tree+0x28/0xe0
+  [ 9482.158947]  fc_mount+0xe/0x40
+  [ 9482.159403]  vfs_kern_mount.part.0+0x71/0x90
+  [ 9482.159875]  btrfs_mount+0x13b/0x3e0 [btrfs]
+  [ 9482.160335]  ? rcu_read_lock_sched_held+0x5d/0x90
+  [ 9482.160805]  ? kfree+0x31f/0x3e0
+  [ 9482.161260]  ? legacy_get_tree+0x30/0x60
+  [ 9482.161714]  legacy_get_tree+0x30/0x60
+  [ 9482.162166]  vfs_get_tree+0x28/0xe0
+  [ 9482.162616]  path_mount+0x2d7/0xa70
+  [ 9482.163070]  do_mount+0x75/0x90
+  [ 9482.163525]  __x64_sys_mount+0x8e/0xd0
+  [ 9482.163986]  do_syscall_64+0x33/0x80
+  [ 9482.164437]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
+  [ 9482.164902] RIP: 0033:0x7f51e907caaa
+
+This happens because at btrfs_read_qgroup_config() we can call
+qgroup_rescan_init() while holding a read lock on a quota btree leaf,
+acquired by the previous call to btrfs_search_slot_for_read(), and
+qgroup_rescan_init() acquires the mutex qgroup_rescan_lock.
+
+A qgroup rescan worker does the opposite: it acquires the mutex
+qgroup_rescan_lock, at btrfs_qgroup_rescan_worker(), and then tries to
+update the qgroup status item in the quota btree through the call to
+update_qgroup_status_item(). This inversion of locking order
+between the qgroup_rescan_lock mutex and quota btree locks causes the
+splat.
+
+Fix this simply by releasing and freeing the path before calling
+qgroup_rescan_init() at btrfs_read_qgroup_config().
+
+CC: stable@vger.kernel.org # 4.4+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/qgroup.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/btrfs/qgroup.c
++++ b/fs/btrfs/qgroup.c
+@@ -497,13 +497,13 @@ next2:
+ 			break;
+ 	}
+ out:
++	btrfs_free_path(path);
+ 	fs_info->qgroup_flags |= flags;
+ 	if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
+ 		clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
+ 	else if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN &&
+ 		 ret >= 0)
+ 		ret = qgroup_rescan_init(fs_info, rescan_progress, 0);
+-	btrfs_free_path(path);
+ 
+ 	if (ret < 0) {
+ 		ulist_free(fs_info->qgroup_ulist);
diff --git a/queue-5.9/btrfs-fix-missing-delalloc-new-bit-for-new-delalloc-ranges.patch b/queue-5.9/btrfs-fix-missing-delalloc-new-bit-for-new-delalloc-ranges.patch
new file mode 100644
index 00000000000..2c76d7b86f4
--- /dev/null
+++ b/queue-5.9/btrfs-fix-missing-delalloc-new-bit-for-new-delalloc-ranges.patch
@@ -0,0 +1,280 @@
+From c334730988ee07908ba4eb816ce78d3fe06fecaa Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Wed, 4 Nov 2020 11:07:31 +0000
+Subject: btrfs: fix missing delalloc new bit for new delalloc ranges
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit c334730988ee07908ba4eb816ce78d3fe06fecaa upstream.
+
+When doing a buffered write, through one of the write family syscalls, we
+look for ranges which currently don't have allocated extents and set the
+'delalloc new' bit on them, so that we can report a correct number of used
+blocks to the stat(2) syscall until delalloc is flushed and ordered extents
+complete.
+
+However there are a few other places where we can do a buffered write
+against a range that is mapped to a hole (no extent allocated) and where
+we do not set the 'new delalloc' bit. Those places are:
+
+- Doing a memory mapped write against a hole;
+
+- Cloning an inline extent into a hole starting at file offset 0;
+
+- Calling btrfs_cont_expand() when the i_size of the file is not aligned
+  to the sector size and is located in a hole. For example when cloning
+  to a destination offset beyond EOF.
+
+So after such cases, until the corresponding delalloc range is flushed and
+the respective ordered extents complete, we can report an incorrect number
+of blocks used through the stat(2) syscall.
+
+In some cases we can end up reporting 0 used blocks to stat(2), which is a
+particular bad value to report as it may mislead tools to think a file is
+completely sparse when its i_size is not zero, making them skip reading
+any data, an undesired consequence for tools such as archivers and other
+backup tools, as reported a long time ago in the following thread (and
+other past threads):
+
+  https://lists.gnu.org/archive/html/bug-tar/2016-07/msg00001.html
+
+Example reproducer:
+
+  $ cat reproducer.sh
+  #!/bin/bash
+
+  MNT=/mnt/sdi
+  DEV=/dev/sdi
+
+  mkfs.btrfs -f $DEV > /dev/null
+  # mkfs.xfs -f $DEV > /dev/null
+  # mkfs.ext4 -F $DEV > /dev/null
+  # mkfs.f2fs -f $DEV > /dev/null
+  mount $DEV $MNT
+
+  xfs_io -f -c "truncate 64K"   \
+      -c "mmap -w 0 64K"        \
+      -c "mwrite -S 0xab 0 64K" \
+      -c "munmap"               \
+      $MNT/foo
+
+  blocks_used=$(stat -c %b $MNT/foo)
+  echo "blocks used: $blocks_used"
+
+  if [ $blocks_used -eq 0 ]; then
+      echo "ERROR: blocks used is 0"
+  fi
+
+  umount $DEV
+
+  $ ./reproducer.sh
+  blocks used: 0
+  ERROR: blocks used is 0
+
+So move the logic that decides to set the 'delalloc bit' bit into the
+function btrfs_set_extent_delalloc(), since that is what we use for all
+those missing cases as well as for the cases that currently work well.
+
+This change is also preparatory work for an upcoming patch that fixes
+other problems related to tracking and reporting the number of bytes used
+by an inode.
+
+CC: stable@vger.kernel.org # 4.19+
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/file.c              |   57 ------------------------------------------
+ fs/btrfs/inode.c             |   58 +++++++++++++++++++++++++++++++++++++++++++
+ fs/btrfs/tests/inode-tests.c |   12 +++++---
+ 3 files changed, 66 insertions(+), 61 deletions(-)
+
+--- a/fs/btrfs/file.c
++++ b/fs/btrfs/file.c
+@@ -452,46 +452,6 @@ static void btrfs_drop_pages(struct page
+ 	}
+ }
+ 
+-static int btrfs_find_new_delalloc_bytes(struct btrfs_inode *inode,
+-					 const u64 start,
+-					 const u64 len,
+-					 struct extent_state **cached_state)
+-{
+-	u64 search_start = start;
+-	const u64 end = start + len - 1;
+-
+-	while (search_start < end) {
+-		const u64 search_len = end - search_start + 1;
+-		struct extent_map *em;
+-		u64 em_len;
+-		int ret = 0;
+-
+-		em = btrfs_get_extent(inode, NULL, 0, search_start, search_len);
+-		if (IS_ERR(em))
+-			return PTR_ERR(em);
+-
+-		if (em->block_start != EXTENT_MAP_HOLE)
+-			goto next;
+-
+-		em_len = em->len;
+-		if (em->start < search_start)
+-			em_len -= search_start - em->start;
+-		if (em_len > search_len)
+-			em_len = search_len;
+-
+-		ret = set_extent_bit(&inode->io_tree, search_start,
+-				     search_start + em_len - 1,
+-				     EXTENT_DELALLOC_NEW,
+-				     NULL, cached_state, GFP_NOFS);
+-next:
+-		search_start = extent_map_end(em);
+-		free_extent_map(em);
+-		if (ret)
+-			return ret;
+-	}
+-	return 0;
+-}
+-
+ /*
+  * after copy_from_user, pages need to be dirtied and we need to make
+  * sure holes are created between the current EOF and the start of
+@@ -528,23 +488,6 @@ int btrfs_dirty_pages(struct btrfs_inode
+ 			 EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
+ 			 0, 0, cached);
+ 
+-	if (!btrfs_is_free_space_inode(inode)) {
+-		if (start_pos >= isize &&
+-		    !(inode->flags & BTRFS_INODE_PREALLOC)) {
+-			/*
+-			 * There can't be any extents following eof in this case
+-			 * so just set the delalloc new bit for the range
+-			 * directly.
+-			 */
+-			extra_bits |= EXTENT_DELALLOC_NEW;
+-		} else {
+-			err = btrfs_find_new_delalloc_bytes(inode, start_pos,
+-							    num_bytes, cached);
+-			if (err)
+-				return err;
+-		}
+-	}
+-
+ 	err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block,
+ 					extra_bits, cached);
+ 	if (err)
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -2262,11 +2262,69 @@ static noinline int add_pending_csums(st
+ 	return 0;
+ }
+ 
++static int btrfs_find_new_delalloc_bytes(struct btrfs_inode *inode,
++					 const u64 start,
++					 const u64 len,
++					 struct extent_state **cached_state)
++{
++	u64 search_start = start;
++	const u64 end = start + len - 1;
++
++	while (search_start < end) {
++		const u64 search_len = end - search_start + 1;
++		struct extent_map *em;
++		u64 em_len;
++		int ret = 0;
++
++		em = btrfs_get_extent(inode, NULL, 0, search_start, search_len);
++		if (IS_ERR(em))
++			return PTR_ERR(em);
++
++		if (em->block_start != EXTENT_MAP_HOLE)
++			goto next;
++
++		em_len = em->len;
++		if (em->start < search_start)
++			em_len -= search_start - em->start;
++		if (em_len > search_len)
++			em_len = search_len;
++
++		ret = set_extent_bit(&inode->io_tree, search_start,
++				     search_start + em_len - 1,
++				     EXTENT_DELALLOC_NEW,
++				     NULL, cached_state, GFP_NOFS);
++next:
++		search_start = extent_map_end(em);
++		free_extent_map(em);
++		if (ret)
++			return ret;
++	}
++	return 0;
++}
++
+ int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
+ 			      unsigned int extra_bits,
+ 			      struct extent_state **cached_state)
+ {
+ 	WARN_ON(PAGE_ALIGNED(end));
++
++	if (start >= i_size_read(&inode->vfs_inode) &&
++	    !(inode->flags & BTRFS_INODE_PREALLOC)) {
++		/*
++		 * There can't be any extents following eof in this case so just
++		 * set the delalloc new bit for the range directly.
++		 */
++		extra_bits |= EXTENT_DELALLOC_NEW;
++	} else {
++		int ret;
++
++		ret = btrfs_find_new_delalloc_bytes(inode, start,
++						    end + 1 - start,
++						    cached_state);
++		if (ret)
++			return ret;
++	}
++
+ 	return set_extent_delalloc(&inode->io_tree, start, end, extra_bits,
+ 				   cached_state);
+ }
+--- a/fs/btrfs/tests/inode-tests.c
++++ b/fs/btrfs/tests/inode-tests.c
+@@ -986,7 +986,8 @@ static int test_extent_accounting(u32 se
+ 	ret = clear_extent_bit(&BTRFS_I(inode)->io_tree,
+ 			       BTRFS_MAX_EXTENT_SIZE >> 1,
+ 			       (BTRFS_MAX_EXTENT_SIZE >> 1) + sectorsize - 1,
+-			       EXTENT_DELALLOC | EXTENT_UPTODATE, 0, 0, NULL);
++			       EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
++			       EXTENT_UPTODATE, 0, 0, NULL);
+ 	if (ret) {
+ 		test_err("clear_extent_bit returned %d", ret);
+ 		goto out;
+@@ -1053,7 +1054,8 @@ static int test_extent_accounting(u32 se
+ 	ret = clear_extent_bit(&BTRFS_I(inode)->io_tree,
+ 			       BTRFS_MAX_EXTENT_SIZE + sectorsize,
+ 			       BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1,
+-			       EXTENT_DELALLOC | EXTENT_UPTODATE, 0, 0, NULL);
++			       EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
++			       EXTENT_UPTODATE, 0, 0, NULL);
+ 	if (ret) {
+ 		test_err("clear_extent_bit returned %d", ret);
+ 		goto out;
+@@ -1085,7 +1087,8 @@ static int test_extent_accounting(u32 se
+ 
+ 	/* Empty */
+ 	ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
+-			       EXTENT_DELALLOC | EXTENT_UPTODATE, 0, 0, NULL);
++			       EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
++			       EXTENT_UPTODATE, 0, 0, NULL);
+ 	if (ret) {
+ 		test_err("clear_extent_bit returned %d", ret);
+ 		goto out;
+@@ -1100,7 +1103,8 @@ static int test_extent_accounting(u32 se
+ out:
+ 	if (ret)
+ 		clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
+-				 EXTENT_DELALLOC | EXTENT_UPTODATE, 0, 0, NULL);
++				 EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
++				 EXTENT_UPTODATE, 0, 0, NULL);
+ 	iput(inode);
+ 	btrfs_free_dummy_root(root);
+ 	btrfs_free_dummy_fs_info(fs_info);
diff --git a/queue-5.9/btrfs-tree-checker-add-missing-return-after-error-in-root_item.patch b/queue-5.9/btrfs-tree-checker-add-missing-return-after-error-in-root_item.patch
new file mode 100644
index 00000000000..5804554fe85
--- /dev/null
+++ b/queue-5.9/btrfs-tree-checker-add-missing-return-after-error-in-root_item.patch
@@ -0,0 +1,36 @@
+From 1a49a97df657c63a4e8ffcd1ea9b6ed95581789b Mon Sep 17 00:00:00 2001
+From: Daniel Xu <dxu@dxuuu.xyz>
+Date: Thu, 12 Nov 2020 17:55:06 -0800
+Subject: btrfs: tree-checker: add missing return after error in root_item
+
+From: Daniel Xu <dxu@dxuuu.xyz>
+
+commit 1a49a97df657c63a4e8ffcd1ea9b6ed95581789b upstream.
+
+There's a missing return statement after an error is found in the
+root_item, this can cause further problems when a crafted image triggers
+the error.
+
+Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=210181
+Fixes: 259ee7754b67 ("btrfs: tree-checker: Add ROOT_ITEM check")
+CC: stable@vger.kernel.org # 5.4+
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Daniel Xu <dxu@dxuuu.xyz>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/tree-checker.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/btrfs/tree-checker.c
++++ b/fs/btrfs/tree-checker.c
+@@ -1068,6 +1068,7 @@ static int check_root_item(struct extent
+ 			    "invalid root item size, have %u expect %zu or %u",
+ 			    btrfs_item_size_nr(leaf, slot), sizeof(ri),
+ 			    btrfs_legacy_root_item_size());
++		return -EUCLEAN;
+ 	}
+ 
+ 	/*
diff --git a/queue-5.9/btrfs-tree-checker-add-missing-returns-after-data_ref-alignment-checks.patch b/queue-5.9/btrfs-tree-checker-add-missing-returns-after-data_ref-alignment-checks.patch
new file mode 100644
index 00000000000..8758d1e9cb3
--- /dev/null
+++ b/queue-5.9/btrfs-tree-checker-add-missing-returns-after-data_ref-alignment-checks.patch
@@ -0,0 +1,41 @@
+From 6d06b0ad94d3dd7e3503d8ad39c39c4634884611 Mon Sep 17 00:00:00 2001
+From: David Sterba <dsterba@suse.com>
+Date: Mon, 16 Nov 2020 19:53:52 +0100
+Subject: btrfs: tree-checker: add missing returns after data_ref alignment checks
+
+From: David Sterba <dsterba@suse.com>
+
+commit 6d06b0ad94d3dd7e3503d8ad39c39c4634884611 upstream.
+
+There are sectorsize alignment checks that are reported but then
+check_extent_data_ref continues. This was not intended, wrong alignment
+is not a minor problem and we should return with error.
+
+CC: stable@vger.kernel.org # 5.4+
+Fixes: 0785a9aacf9d ("btrfs: tree-checker: Add EXTENT_DATA_REF check")
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/tree-checker.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/fs/btrfs/tree-checker.c
++++ b/fs/btrfs/tree-checker.c
+@@ -1424,6 +1424,7 @@ static int check_extent_data_ref(struct
+ 	"invalid item size, have %u expect aligned to %zu for key type %u",
+ 			    btrfs_item_size_nr(leaf, slot),
+ 			    sizeof(*dref), key->type);
++		return -EUCLEAN;
+ 	}
+ 	if (!IS_ALIGNED(key->objectid, leaf->fs_info->sectorsize)) {
+ 		generic_err(leaf, slot,
+@@ -1452,6 +1453,7 @@ static int check_extent_data_ref(struct
+ 			extent_err(leaf, slot,
+ 	"invalid extent data backref offset, have %llu expect aligned to %u",
+ 				   offset, leaf->fs_info->sectorsize);
++			return -EUCLEAN;
+ 		}
+ 	}
+ 	return 0;
diff --git a/queue-5.9/ib-hfi1-ensure-correct-mm-is-used-at-all-times.patch b/queue-5.9/ib-hfi1-ensure-correct-mm-is-used-at-all-times.patch
new file mode 100644
index 00000000000..f5a1e0a36a8
--- /dev/null
+++ b/queue-5.9/ib-hfi1-ensure-correct-mm-is-used-at-all-times.patch
@@ -0,0 +1,444 @@
+From 3d2a9d642512c21a12d19b9250e7a835dcb41a79 Mon Sep 17 00:00:00 2001
+From: Dennis Dalessandro <dennis.dalessandro@cornelisnetworks.com>
+Date: Wed, 25 Nov 2020 16:01:12 -0500
+Subject: IB/hfi1: Ensure correct mm is used at all times
+
+From: Dennis Dalessandro <dennis.dalessandro@cornelisnetworks.com>
+
+commit 3d2a9d642512c21a12d19b9250e7a835dcb41a79 upstream.
+
+Two earlier bug fixes have created a security problem in the hfi1
+driver. One fix aimed to solve an issue where current->mm was not valid
+when closing the hfi1 cdev. It attempted to do this by saving a cached
+value of the current->mm pointer at file open time. This is a problem if
+another process with access to the FD calls in via write() or ioctl() to
+pin pages via the hfi driver. The other fix tried to solve a use after
+free by taking a reference on the mm.
+
+To fix this correctly we use the existing cached value of the mm in the
+mmu notifier. Now we can check in the insert, evict, etc. routines that
+current->mm matched what the notifier was registered for. If not, then
+don't allow access. The register of the mmu notifier will save the mm
+pointer.
+
+Since in do_exit() the exit_mm() is called before exit_files(), which
+would call our close routine a reference is needed on the mm. We rely on
+the mmgrab done by the registration of the notifier, whereas before it was
+explicit. The mmu notifier deregistration happens when the user context is
+torn down, the creation of which triggered the registration.
+
+Also of note is we do not do any explicit work to protect the interval
+tree notifier. It doesn't seem that this is going to be needed since we
+aren't actually doing anything with current->mm. The interval tree
+notifier stuff still has a FIXME noted from a previous commit that will be
+addressed in a follow on patch.
+
+Cc: <stable@vger.kernel.org>
+Fixes: e0cf75deab81 ("IB/hfi1: Fix mm_struct use after free")
+Fixes: 3faa3d9a308e ("IB/hfi1: Make use of mm consistent")
+Link: https://lore.kernel.org/r/20201125210112.104301.51331.stgit@awfm-01.aw.intel.com
+Suggested-by: Jann Horn <jannh@google.com>
+Reported-by: Jason Gunthorpe <jgg@nvidia.com>
+Reviewed-by: Ira Weiny <ira.weiny@intel.com>
+Reviewed-by: Mike Marciniszyn <mike.marciniszyn@cornelisnetworks.com>
+Signed-off-by: Dennis Dalessandro <dennis.dalessandro@cornelisnetworks.com>
+Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/infiniband/hw/hfi1/file_ops.c     |    4 -
+ drivers/infiniband/hw/hfi1/hfi.h          |    2 
+ drivers/infiniband/hw/hfi1/mmu_rb.c       |   66 +++++++++++++++---------------
+ drivers/infiniband/hw/hfi1/mmu_rb.h       |   16 ++++++-
+ drivers/infiniband/hw/hfi1/user_exp_rcv.c |   12 +++--
+ drivers/infiniband/hw/hfi1/user_exp_rcv.h |    6 ++
+ drivers/infiniband/hw/hfi1/user_sdma.c    |   13 +++--
+ drivers/infiniband/hw/hfi1/user_sdma.h    |    7 ++-
+ 8 files changed, 78 insertions(+), 48 deletions(-)
+
+--- a/drivers/infiniband/hw/hfi1/file_ops.c
++++ b/drivers/infiniband/hw/hfi1/file_ops.c
+@@ -1,4 +1,5 @@
+ /*
++ * Copyright(c) 2020 Cornelis Networks, Inc.
+  * Copyright(c) 2015-2020 Intel Corporation.
+  *
+  * This file is provided under a dual BSD/GPLv2 license.  When using or
+@@ -206,8 +207,6 @@ static int hfi1_file_open(struct inode *
+ 	spin_lock_init(&fd->tid_lock);
+ 	spin_lock_init(&fd->invalid_lock);
+ 	fd->rec_cpu_num = -1; /* no cpu affinity by default */
+-	fd->mm = current->mm;
+-	mmgrab(fd->mm);
+ 	fd->dd = dd;
+ 	fp->private_data = fd;
+ 	return 0;
+@@ -711,7 +710,6 @@ static int hfi1_file_close(struct inode
+ 
+ 	deallocate_ctxt(uctxt);
+ done:
+-	mmdrop(fdata->mm);
+ 
+ 	if (atomic_dec_and_test(&dd->user_refcount))
+ 		complete(&dd->user_comp);
+--- a/drivers/infiniband/hw/hfi1/hfi.h
++++ b/drivers/infiniband/hw/hfi1/hfi.h
+@@ -1,6 +1,7 @@
+ #ifndef _HFI1_KERNEL_H
+ #define _HFI1_KERNEL_H
+ /*
++ * Copyright(c) 2020 Cornelis Networks, Inc.
+  * Copyright(c) 2015-2020 Intel Corporation.
+  *
+  * This file is provided under a dual BSD/GPLv2 license.  When using or
+@@ -1451,7 +1452,6 @@ struct hfi1_filedata {
+ 	u32 invalid_tid_idx;
+ 	/* protect invalid_tids array and invalid_tid_idx */
+ 	spinlock_t invalid_lock;
+-	struct mm_struct *mm;
+ };
+ 
+ extern struct xarray hfi1_dev_table;
+--- a/drivers/infiniband/hw/hfi1/mmu_rb.c
++++ b/drivers/infiniband/hw/hfi1/mmu_rb.c
+@@ -1,4 +1,5 @@
+ /*
++ * Copyright(c) 2020 Cornelis Networks, Inc.
+  * Copyright(c) 2016 - 2017 Intel Corporation.
+  *
+  * This file is provided under a dual BSD/GPLv2 license.  When using or
+@@ -48,23 +49,11 @@
+ #include <linux/rculist.h>
+ #include <linux/mmu_notifier.h>
+ #include <linux/interval_tree_generic.h>
++#include <linux/sched/mm.h>
+ 
+ #include "mmu_rb.h"
+ #include "trace.h"
+ 
+-struct mmu_rb_handler {
+-	struct mmu_notifier mn;
+-	struct rb_root_cached root;
+-	void *ops_arg;
+-	spinlock_t lock;        /* protect the RB tree */
+-	struct mmu_rb_ops *ops;
+-	struct mm_struct *mm;
+-	struct list_head lru_list;
+-	struct work_struct del_work;
+-	struct list_head del_list;
+-	struct workqueue_struct *wq;
+-};
+-
+ static unsigned long mmu_node_start(struct mmu_rb_node *);
+ static unsigned long mmu_node_last(struct mmu_rb_node *);
+ static int mmu_notifier_range_start(struct mmu_notifier *,
+@@ -92,37 +81,36 @@ static unsigned long mmu_node_last(struc
+ 	return PAGE_ALIGN(node->addr + node->len) - 1;
+ }
+ 
+-int hfi1_mmu_rb_register(void *ops_arg, struct mm_struct *mm,
++int hfi1_mmu_rb_register(void *ops_arg,
+ 			 struct mmu_rb_ops *ops,
+ 			 struct workqueue_struct *wq,
+ 			 struct mmu_rb_handler **handler)
+ {
+-	struct mmu_rb_handler *handlr;
++	struct mmu_rb_handler *h;
+ 	int ret;
+ 
+-	handlr = kmalloc(sizeof(*handlr), GFP_KERNEL);
+-	if (!handlr)
++	h = kmalloc(sizeof(*h), GFP_KERNEL);
++	if (!h)
+ 		return -ENOMEM;
+ 
+-	handlr->root = RB_ROOT_CACHED;
+-	handlr->ops = ops;
+-	handlr->ops_arg = ops_arg;
+-	INIT_HLIST_NODE(&handlr->mn.hlist);
+-	spin_lock_init(&handlr->lock);
+-	handlr->mn.ops = &mn_opts;
+-	handlr->mm = mm;
+-	INIT_WORK(&handlr->del_work, handle_remove);
+-	INIT_LIST_HEAD(&handlr->del_list);
+-	INIT_LIST_HEAD(&handlr->lru_list);
+-	handlr->wq = wq;
++	h->root = RB_ROOT_CACHED;
++	h->ops = ops;
++	h->ops_arg = ops_arg;
++	INIT_HLIST_NODE(&h->mn.hlist);
++	spin_lock_init(&h->lock);
++	h->mn.ops = &mn_opts;
++	INIT_WORK(&h->del_work, handle_remove);
++	INIT_LIST_HEAD(&h->del_list);
++	INIT_LIST_HEAD(&h->lru_list);
++	h->wq = wq;
+ 
+-	ret = mmu_notifier_register(&handlr->mn, handlr->mm);
++	ret = mmu_notifier_register(&h->mn, current->mm);
+ 	if (ret) {
+-		kfree(handlr);
++		kfree(h);
+ 		return ret;
+ 	}
+ 
+-	*handler = handlr;
++	*handler = h;
+ 	return 0;
+ }
+ 
+@@ -134,7 +122,7 @@ void hfi1_mmu_rb_unregister(struct mmu_r
+ 	struct list_head del_list;
+ 
+ 	/* Unregister first so we don't get any more notifications. */
+-	mmu_notifier_unregister(&handler->mn, handler->mm);
++	mmu_notifier_unregister(&handler->mn, handler->mn.mm);
+ 
+ 	/*
+ 	 * Make sure the wq delete handler is finished running.  It will not
+@@ -166,6 +154,10 @@ int hfi1_mmu_rb_insert(struct mmu_rb_han
+ 	int ret = 0;
+ 
+ 	trace_hfi1_mmu_rb_insert(mnode->addr, mnode->len);
++
++	if (current->mm != handler->mn.mm)
++		return -EPERM;
++
+ 	spin_lock_irqsave(&handler->lock, flags);
+ 	node = __mmu_rb_search(handler, mnode->addr, mnode->len);
+ 	if (node) {
+@@ -180,6 +172,7 @@ int hfi1_mmu_rb_insert(struct mmu_rb_han
+ 		__mmu_int_rb_remove(mnode, &handler->root);
+ 		list_del(&mnode->list); /* remove from LRU list */
+ 	}
++	mnode->handler = handler;
+ unlock:
+ 	spin_unlock_irqrestore(&handler->lock, flags);
+ 	return ret;
+@@ -217,6 +210,9 @@ bool hfi1_mmu_rb_remove_unless_exact(str
+ 	unsigned long flags;
+ 	bool ret = false;
+ 
++	if (current->mm != handler->mn.mm)
++		return ret;
++
+ 	spin_lock_irqsave(&handler->lock, flags);
+ 	node = __mmu_rb_search(handler, addr, len);
+ 	if (node) {
+@@ -239,6 +235,9 @@ void hfi1_mmu_rb_evict(struct mmu_rb_han
+ 	unsigned long flags;
+ 	bool stop = false;
+ 
++	if (current->mm != handler->mn.mm)
++		return;
++
+ 	INIT_LIST_HEAD(&del_list);
+ 
+ 	spin_lock_irqsave(&handler->lock, flags);
+@@ -272,6 +271,9 @@ void hfi1_mmu_rb_remove(struct mmu_rb_ha
+ {
+ 	unsigned long flags;
+ 
++	if (current->mm != handler->mn.mm)
++		return;
++
+ 	/* Validity of handler and node pointers has been checked by caller. */
+ 	trace_hfi1_mmu_rb_remove(node->addr, node->len);
+ 	spin_lock_irqsave(&handler->lock, flags);
+--- a/drivers/infiniband/hw/hfi1/mmu_rb.h
++++ b/drivers/infiniband/hw/hfi1/mmu_rb.h
+@@ -1,4 +1,5 @@
+ /*
++ * Copyright(c) 2020 Cornelis Networks, Inc.
+  * Copyright(c) 2016 Intel Corporation.
+  *
+  * This file is provided under a dual BSD/GPLv2 license.  When using or
+@@ -54,6 +55,7 @@ struct mmu_rb_node {
+ 	unsigned long len;
+ 	unsigned long __last;
+ 	struct rb_node node;
++	struct mmu_rb_handler *handler;
+ 	struct list_head list;
+ };
+ 
+@@ -71,7 +73,19 @@ struct mmu_rb_ops {
+ 		     void *evict_arg, bool *stop);
+ };
+ 
+-int hfi1_mmu_rb_register(void *ops_arg, struct mm_struct *mm,
++struct mmu_rb_handler {
++	struct mmu_notifier mn;
++	struct rb_root_cached root;
++	void *ops_arg;
++	spinlock_t lock;        /* protect the RB tree */
++	struct mmu_rb_ops *ops;
++	struct list_head lru_list;
++	struct work_struct del_work;
++	struct list_head del_list;
++	struct workqueue_struct *wq;
++};
++
++int hfi1_mmu_rb_register(void *ops_arg,
+ 			 struct mmu_rb_ops *ops,
+ 			 struct workqueue_struct *wq,
+ 			 struct mmu_rb_handler **handler);
+--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c
++++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
+@@ -1,4 +1,5 @@
+ /*
++ * Copyright(c) 2020 Cornelis Networks, Inc.
+  * Copyright(c) 2015-2018 Intel Corporation.
+  *
+  * This file is provided under a dual BSD/GPLv2 license.  When using or
+@@ -173,15 +174,18 @@ static void unpin_rcv_pages(struct hfi1_
+ {
+ 	struct page **pages;
+ 	struct hfi1_devdata *dd = fd->uctxt->dd;
++	struct mm_struct *mm;
+ 
+ 	if (mapped) {
+ 		pci_unmap_single(dd->pcidev, node->dma_addr,
+ 				 node->npages * PAGE_SIZE, PCI_DMA_FROMDEVICE);
+ 		pages = &node->pages[idx];
++		mm = mm_from_tid_node(node);
+ 	} else {
+ 		pages = &tidbuf->pages[idx];
++		mm = current->mm;
+ 	}
+-	hfi1_release_user_pages(fd->mm, pages, npages, mapped);
++	hfi1_release_user_pages(mm, pages, npages, mapped);
+ 	fd->tid_n_pinned -= npages;
+ }
+ 
+@@ -216,12 +220,12 @@ static int pin_rcv_pages(struct hfi1_fil
+ 	 * pages, accept the amount pinned so far and program only that.
+ 	 * User space knows how to deal with partially programmed buffers.
+ 	 */
+-	if (!hfi1_can_pin_pages(dd, fd->mm, fd->tid_n_pinned, npages)) {
++	if (!hfi1_can_pin_pages(dd, current->mm, fd->tid_n_pinned, npages)) {
+ 		kfree(pages);
+ 		return -ENOMEM;
+ 	}
+ 
+-	pinned = hfi1_acquire_user_pages(fd->mm, vaddr, npages, true, pages);
++	pinned = hfi1_acquire_user_pages(current->mm, vaddr, npages, true, pages);
+ 	if (pinned <= 0) {
+ 		kfree(pages);
+ 		return pinned;
+@@ -756,7 +760,7 @@ static int set_rcvarray_entry(struct hfi
+ 
+ 	if (fd->use_mn) {
+ 		ret = mmu_interval_notifier_insert(
+-			&node->notifier, fd->mm,
++			&node->notifier, current->mm,
+ 			tbuf->vaddr + (pageidx * PAGE_SIZE), npages * PAGE_SIZE,
+ 			&tid_mn_ops);
+ 		if (ret)
+--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.h
++++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.h
+@@ -1,6 +1,7 @@
+ #ifndef _HFI1_USER_EXP_RCV_H
+ #define _HFI1_USER_EXP_RCV_H
+ /*
++ * Copyright(c) 2020 - Cornelis Networks, Inc.
+  * Copyright(c) 2015 - 2017 Intel Corporation.
+  *
+  * This file is provided under a dual BSD/GPLv2 license.  When using or
+@@ -95,4 +96,9 @@ int hfi1_user_exp_rcv_clear(struct hfi1_
+ int hfi1_user_exp_rcv_invalid(struct hfi1_filedata *fd,
+ 			      struct hfi1_tid_info *tinfo);
+ 
++static inline struct mm_struct *mm_from_tid_node(struct tid_rb_node *node)
++{
++	return node->notifier.mm;
++}
++
+ #endif /* _HFI1_USER_EXP_RCV_H */
+--- a/drivers/infiniband/hw/hfi1/user_sdma.c
++++ b/drivers/infiniband/hw/hfi1/user_sdma.c
+@@ -1,4 +1,5 @@
+ /*
++ * Copyright(c) 2020 - Cornelis Networks, Inc.
+  * Copyright(c) 2015 - 2018 Intel Corporation.
+  *
+  * This file is provided under a dual BSD/GPLv2 license.  When using or
+@@ -188,7 +189,6 @@ int hfi1_user_sdma_alloc_queues(struct h
+ 	atomic_set(&pq->n_reqs, 0);
+ 	init_waitqueue_head(&pq->wait);
+ 	atomic_set(&pq->n_locked, 0);
+-	pq->mm = fd->mm;
+ 
+ 	iowait_init(&pq->busy, 0, NULL, NULL, defer_packet_queue,
+ 		    activate_packet_queue, NULL, NULL);
+@@ -230,7 +230,7 @@ int hfi1_user_sdma_alloc_queues(struct h
+ 
+ 	cq->nentries = hfi1_sdma_comp_ring_size;
+ 
+-	ret = hfi1_mmu_rb_register(pq, pq->mm, &sdma_rb_ops, dd->pport->hfi1_wq,
++	ret = hfi1_mmu_rb_register(pq, &sdma_rb_ops, dd->pport->hfi1_wq,
+ 				   &pq->handler);
+ 	if (ret) {
+ 		dd_dev_err(dd, "Failed to register with MMU %d", ret);
+@@ -980,13 +980,13 @@ static int pin_sdma_pages(struct user_sd
+ 
+ 	npages -= node->npages;
+ retry:
+-	if (!hfi1_can_pin_pages(pq->dd, pq->mm,
++	if (!hfi1_can_pin_pages(pq->dd, current->mm,
+ 				atomic_read(&pq->n_locked), npages)) {
+ 		cleared = sdma_cache_evict(pq, npages);
+ 		if (cleared >= npages)
+ 			goto retry;
+ 	}
+-	pinned = hfi1_acquire_user_pages(pq->mm,
++	pinned = hfi1_acquire_user_pages(current->mm,
+ 					 ((unsigned long)iovec->iov.iov_base +
+ 					 (node->npages * PAGE_SIZE)), npages, 0,
+ 					 pages + node->npages);
+@@ -995,7 +995,7 @@ retry:
+ 		return pinned;
+ 	}
+ 	if (pinned != npages) {
+-		unpin_vector_pages(pq->mm, pages, node->npages, pinned);
++		unpin_vector_pages(current->mm, pages, node->npages, pinned);
+ 		return -EFAULT;
+ 	}
+ 	kfree(node->pages);
+@@ -1008,7 +1008,8 @@ retry:
+ static void unpin_sdma_pages(struct sdma_mmu_node *node)
+ {
+ 	if (node->npages) {
+-		unpin_vector_pages(node->pq->mm, node->pages, 0, node->npages);
++		unpin_vector_pages(mm_from_sdma_node(node), node->pages, 0,
++				   node->npages);
+ 		atomic_sub(node->npages, &node->pq->n_locked);
+ 	}
+ }
+--- a/drivers/infiniband/hw/hfi1/user_sdma.h
++++ b/drivers/infiniband/hw/hfi1/user_sdma.h
+@@ -1,6 +1,7 @@
+ #ifndef _HFI1_USER_SDMA_H
+ #define _HFI1_USER_SDMA_H
+ /*
++ * Copyright(c) 2020 - Cornelis Networks, Inc.
+  * Copyright(c) 2015 - 2018 Intel Corporation.
+  *
+  * This file is provided under a dual BSD/GPLv2 license.  When using or
+@@ -133,7 +134,6 @@ struct hfi1_user_sdma_pkt_q {
+ 	unsigned long unpinned;
+ 	struct mmu_rb_handler *handler;
+ 	atomic_t n_locked;
+-	struct mm_struct *mm;
+ };
+ 
+ struct hfi1_user_sdma_comp_q {
+@@ -250,4 +250,9 @@ int hfi1_user_sdma_process_request(struc
+ 				   struct iovec *iovec, unsigned long dim,
+ 				   unsigned long *count);
+ 
++static inline struct mm_struct *mm_from_sdma_node(struct sdma_mmu_node *node)
++{
++	return node->rb.handler->mn.mm;
++}
++
+ #endif /* _HFI1_USER_SDMA_H */
diff --git a/queue-5.9/rdma-i40iw-address-an-mmap-handler-exploit-in-i40iw.patch b/queue-5.9/rdma-i40iw-address-an-mmap-handler-exploit-in-i40iw.patch
new file mode 100644
index 00000000000..e692987ef5f
--- /dev/null
+++ b/queue-5.9/rdma-i40iw-address-an-mmap-handler-exploit-in-i40iw.patch
@@ -0,0 +1,106 @@
+From 2ed381439e89fa6d1a0839ef45ccd45d99d8e915 Mon Sep 17 00:00:00 2001
+From: Shiraz Saleem <shiraz.saleem@intel.com>
+Date: Tue, 24 Nov 2020 18:56:16 -0600
+Subject: RDMA/i40iw: Address an mmap handler exploit in i40iw
+
+From: Shiraz Saleem <shiraz.saleem@intel.com>
+
+commit 2ed381439e89fa6d1a0839ef45ccd45d99d8e915 upstream.
+
+i40iw_mmap manipulates the vma->vm_pgoff to differentiate a push page mmap
+vs a doorbell mmap, and uses it to compute the pfn in remap_pfn_range
+without any validation. This is vulnerable to an mmap exploit as described
+in: https://lore.kernel.org/r/20201119093523.7588-1-zhudi21@huawei.com
+
+The push feature is disabled in the driver currently and therefore no push
+mmaps are issued from user-space. The feature does not work as expected in
+the x722 product.
+
+Remove the push module parameter and all VMA attribute manipulations for
+this feature in i40iw_mmap. Update i40iw_mmap to only allow DB user
+mmapings at offset = 0. Check vm_pgoff for zero and if the mmaps are bound
+to a single page.
+
+Cc: <stable@kernel.org>
+Fixes: d37498417947 ("i40iw: add files for iwarp interface")
+Link: https://lore.kernel.org/r/20201125005616.1800-2-shiraz.saleem@intel.com
+Reported-by: Di Zhu <zhudi21@huawei.com>
+Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
+Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/infiniband/hw/i40iw/i40iw_main.c  |    5 ----
+ drivers/infiniband/hw/i40iw/i40iw_verbs.c |   37 +++++-------------------------
+ 2 files changed, 7 insertions(+), 35 deletions(-)
+
+--- a/drivers/infiniband/hw/i40iw/i40iw_main.c
++++ b/drivers/infiniband/hw/i40iw/i40iw_main.c
+@@ -54,10 +54,6 @@
+ #define DRV_VERSION	__stringify(DRV_VERSION_MAJOR) "."		\
+ 	__stringify(DRV_VERSION_MINOR) "." __stringify(DRV_VERSION_BUILD)
+ 
+-static int push_mode;
+-module_param(push_mode, int, 0644);
+-MODULE_PARM_DESC(push_mode, "Low latency mode: 0=disabled (default), 1=enabled)");
+-
+ static int debug;
+ module_param(debug, int, 0644);
+ MODULE_PARM_DESC(debug, "debug flags: 0=disabled (default), 0x7fffffff=all");
+@@ -1580,7 +1576,6 @@ static enum i40iw_status_code i40iw_setu
+ 	if (status)
+ 		goto exit;
+ 	iwdev->obj_next = iwdev->obj_mem;
+-	iwdev->push_mode = push_mode;
+ 
+ 	init_waitqueue_head(&iwdev->vchnl_waitq);
+ 	init_waitqueue_head(&dev->vf_reqs);
+--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
++++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+@@ -167,39 +167,16 @@ static void i40iw_dealloc_ucontext(struc
+  */
+ static int i40iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+ {
+-	struct i40iw_ucontext *ucontext;
+-	u64 db_addr_offset, push_offset, pfn;
++	struct i40iw_ucontext *ucontext = to_ucontext(context);
++	u64 dbaddr;
+ 
+-	ucontext = to_ucontext(context);
+-	if (ucontext->iwdev->sc_dev.is_pf) {
+-		db_addr_offset = I40IW_DB_ADDR_OFFSET;
+-		push_offset = I40IW_PUSH_OFFSET;
+-		if (vma->vm_pgoff)
+-			vma->vm_pgoff += I40IW_PF_FIRST_PUSH_PAGE_INDEX - 1;
+-	} else {
+-		db_addr_offset = I40IW_VF_DB_ADDR_OFFSET;
+-		push_offset = I40IW_VF_PUSH_OFFSET;
+-		if (vma->vm_pgoff)
+-			vma->vm_pgoff += I40IW_VF_FIRST_PUSH_PAGE_INDEX - 1;
+-	}
++	if (vma->vm_pgoff || vma->vm_end - vma->vm_start != PAGE_SIZE)
++		return -EINVAL;
+ 
+-	vma->vm_pgoff += db_addr_offset >> PAGE_SHIFT;
++	dbaddr = I40IW_DB_ADDR_OFFSET + pci_resource_start(ucontext->iwdev->ldev->pcidev, 0);
+ 
+-	if (vma->vm_pgoff == (db_addr_offset >> PAGE_SHIFT)) {
+-		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+-	} else {
+-		if ((vma->vm_pgoff - (push_offset >> PAGE_SHIFT)) % 2)
+-			vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+-		else
+-			vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+-	}
+-
+-	pfn = vma->vm_pgoff +
+-	      (pci_resource_start(ucontext->iwdev->ldev->pcidev, 0) >>
+-	       PAGE_SHIFT);
+-
+-	return rdma_user_mmap_io(context, vma, pfn, PAGE_SIZE,
+-				 vma->vm_page_prot, NULL);
++	return rdma_user_mmap_io(context, vma, dbaddr >> PAGE_SHIFT, PAGE_SIZE,
++				 pgprot_noncached(vma->vm_page_prot), NULL);
+ }
+ 
+ /**
diff --git a/queue-5.9/series b/queue-5.9/series
index e421fbb407b..85cf3033dcf 100644
--- a/queue-5.9/series
+++ b/queue-5.9/series
@@ -3,3 +3,10 @@ io_uring-order-refnode-recycling.patch
 spi-bcm-qspi-fix-use-after-free-on-unbind.patch
 spi-bcm2835-fix-use-after-free-on-unbind.patch
 ipv4-use-is_enabled-instead-of-ifdef.patch
+ib-hfi1-ensure-correct-mm-is-used-at-all-times.patch
+rdma-i40iw-address-an-mmap-handler-exploit-in-i40iw.patch
+btrfs-fix-missing-delalloc-new-bit-for-new-delalloc-ranges.patch
+btrfs-tree-checker-add-missing-return-after-error-in-root_item.patch
+btrfs-tree-checker-add-missing-returns-after-data_ref-alignment-checks.patch
+btrfs-don-t-access-possibly-stale-fs_info-data-for-printing-duplicate-device.patch
+btrfs-fix-lockdep-splat-when-reading-qgroup-config-on-mount.patch