5.8-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 7 Sep 2020 16:20:58 +0000 (18:20 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 7 Sep 2020 16:20:58 +0000 (18:20 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 7 Sep 2020 16:20:58 +0000 (18:20 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 7 Sep 2020 16:20:58 +0000 (18:20 +0200)
diff --git a/queue-5.8/arc-fix-memory-initialization-for-systems-with-two-memory-banks.patch b/queue-5.8/arc-fix-memory-initialization-for-systems-with-two-memory-banks.patch

new file mode 100644 (file)

index 0000000..f489122
--- /dev/null
+++ b/queue-5.8/arc-fix-memory-initialization-for-systems-with-two-memory-banks.patch
@@ -0,0 +1,106 @@
+From 4af22ded0ecf23adea1b26ea264c53f9f1cfc310 Mon Sep 17 00:00:00 2001
+From: Mike Rapoport <rppt@kernel.org>
+Date: Fri, 28 Aug 2020 19:39:02 +0300
+Subject: arc: fix memory initialization for systems with two memory banks
+
+From: Mike Rapoport <rppt@linux.ibm.com>
+
+commit 4af22ded0ecf23adea1b26ea264c53f9f1cfc310 upstream.
+
+Rework of memory map initialization broke initialization of ARC systems
+with two memory banks. Before these changes, memblock was not aware of
+nodes configuration and the memory map was always allocated from the
+"lowmem" bank. After the addition of node information to memblock, the core
+mm attempts to allocate the memory map for the "highmem" bank from its
+node. The access to this memory using __va() fails because it can be only
+accessed using kmap.
+
+Anther problem that was uncovered is that {min,max}_high_pfn are calculated
+from u64 high_mem_start variable which prevents truncation to 32-bit
+physical address and the PFN values are above the node and zone boundaries.
+
+Use phys_addr_t type for high_mem_start and high_mem_size to ensure
+correspondence between PFNs and highmem zone boundaries and reserve the
+entire highmem bank until mem_init() to avoid accesses to it before highmem
+is enabled.
+
+To test this:
+1. Enable HIGHMEM in ARC config
+2. Enable 2 memory banks in haps_hs.dts (uncomment the 2nd bank)
+
+Fixes: 51930df5801e ("mm: free_area_init: allow defining max_zone_pfn in descending order")
+Cc: stable@vger.kernel.org   [5.8]
+Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
+Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
+[vgupta: added instructions to test highmem]
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arc/mm/init.c |   27 ++++++++++++++++-----------
+ 1 file changed, 16 insertions(+), 11 deletions(-)
+
+--- a/arch/arc/mm/init.c
++++ b/arch/arc/mm/init.c
+@@ -27,8 +27,8 @@ static unsigned long low_mem_sz;
+ 
+ #ifdef CONFIG_HIGHMEM
+ static unsigned long min_high_pfn, max_high_pfn;
+-static u64 high_mem_start;
+-static u64 high_mem_sz;
++static phys_addr_t high_mem_start;
++static phys_addr_t high_mem_sz;
+ #endif
+ 
+ #ifdef CONFIG_DISCONTIGMEM
+@@ -70,6 +70,7 @@ void __init early_init_dt_add_memory_arc
+               high_mem_sz = size;
+               in_use = 1;
+               memblock_add_node(base, size, 1);
++              memblock_reserve(base, size);
+ #endif
+       }
+ 
+@@ -158,7 +159,7 @@ void __init setup_arch_memory(void)
+       min_high_pfn = PFN_DOWN(high_mem_start);
+       max_high_pfn = PFN_DOWN(high_mem_start + high_mem_sz);
+ 
+-      max_zone_pfn[ZONE_HIGHMEM] = max_high_pfn;
++      max_zone_pfn[ZONE_HIGHMEM] = min_low_pfn;
+ 
+       high_memory = (void *)(min_high_pfn << PAGE_SHIFT);
+       kmap_init();
+@@ -167,22 +168,26 @@ void __init setup_arch_memory(void)
+       free_area_init(max_zone_pfn);
+ }
+ 
+-/*
+- * mem_init - initializes memory
+- *
+- * Frees up bootmem
+- * Calculates and displays memory available/used
+- */
+-void __init mem_init(void)
++static void __init highmem_init(void)
+ {
+ #ifdef CONFIG_HIGHMEM
+       unsigned long tmp;
+ 
+-      reset_all_zones_managed_pages();
++      memblock_free(high_mem_start, high_mem_sz);
+       for (tmp = min_high_pfn; tmp < max_high_pfn; tmp++)
+               free_highmem_page(pfn_to_page(tmp));
+ #endif
++}
+ 
++/*
++ * mem_init - initializes memory
++ *
++ * Frees up bootmem
++ * Calculates and displays memory available/used
++ */
++void __init mem_init(void)
++{
+       memblock_free_all();
++      highmem_init();
+       mem_init_print_info(NULL);
+ }
diff --git a/queue-5.8/arc-perf-don-t-bail-setup-if-pct-irq-missing-in-device-tree.patch b/queue-5.8/arc-perf-don-t-bail-setup-if-pct-irq-missing-in-device-tree.patch

new file mode 100644 (file)

index 0000000..7236b8c
--- /dev/null
+++ b/queue-5.8/arc-perf-don-t-bail-setup-if-pct-irq-missing-in-device-tree.patch
@@ -0,0 +1,73 @@
+From feb92d7d3813456c11dce215b3421801a78a8986 Mon Sep 17 00:00:00 2001
+From: Vineet Gupta <vgupta@synopsys.com>
+Date: Sun, 26 Jul 2020 21:51:59 -0700
+Subject: ARC: perf: don't bail setup if pct irq missing in device-tree
+
+From: Vineet Gupta <vgupta@synopsys.com>
+
+commit feb92d7d3813456c11dce215b3421801a78a8986 upstream.
+
+Current code inadventely bails if hardware supports sampling/overflow
+interrupts, but the irq is missing from device tree.
+
+|
+| # perf stat -e cycles,instructions,major-faults,minor-faults ../hackbench
+| Running with 10 groups 400 process
+| Time: 0.921
+|
+| Performance counter stats for '../hackbench':
+|
+|   <not supported>      cycles
+|   <not supported>      instructions
+|                 0      major-faults
+|              8679      minor-faults
+
+This need not be as we can still do simple counting based perf stat.
+This unborks perf on HSDK-4xD
+
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arc/kernel/perf_event.c |   14 ++++----------
+ 1 file changed, 4 insertions(+), 10 deletions(-)
+
+--- a/arch/arc/kernel/perf_event.c
++++ b/arch/arc/kernel/perf_event.c
+@@ -562,7 +562,7 @@ static int arc_pmu_device_probe(struct p
+ {
+       struct arc_reg_pct_build pct_bcr;
+       struct arc_reg_cc_build cc_bcr;
+-      int i, has_interrupts;
++      int i, has_interrupts, irq;
+       int counter_size;       /* in bits */
+ 
+       union cc_name {
+@@ -637,13 +637,7 @@ static int arc_pmu_device_probe(struct p
+               .attr_groups    = arc_pmu->attr_groups,
+       };
+ 
+-      if (has_interrupts) {
+-              int irq = platform_get_irq(pdev, 0);
+-
+-              if (irq < 0) {
+-                      pr_err("Cannot get IRQ number for the platform\n");
+-                      return -ENODEV;
+-              }
++      if (has_interrupts && (irq = platform_get_irq(pdev, 0) >= 0)) {
+ 
+               arc_pmu->irq = irq;
+ 
+@@ -652,9 +646,9 @@ static int arc_pmu_device_probe(struct p
+                                  this_cpu_ptr(&arc_pmu_cpu));
+ 
+               on_each_cpu(arc_cpu_pmu_irq_init, &irq, 1);
+-
+-      } else
++      } else {
+               arc_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
++      }
+ 
+       /*
+        * perf parser doesn't really like '-' symbol in events name, so let's
diff --git a/queue-5.8/btrfs-allocate-scrub-workqueues-outside-of-locks.patch b/queue-5.8/btrfs-allocate-scrub-workqueues-outside-of-locks.patch

new file mode 100644 (file)

index 0000000..d412c9e
--- /dev/null
+++ b/queue-5.8/btrfs-allocate-scrub-workqueues-outside-of-locks.patch
@@ -0,0 +1,407 @@
+From e89c4a9c8e6ce3a84cab4f342687d3fbbb1234eb Mon Sep 17 00:00:00 2001
+From: Josef Bacik <josef@toxicpanda.com>
+Date: Mon, 10 Aug 2020 11:42:29 -0400
+Subject: btrfs: allocate scrub workqueues outside of locks
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+commit e89c4a9c8e6ce3a84cab4f342687d3fbbb1234eb upstream.
+
+I got the following lockdep splat while testing:
+
+  ======================================================
+  WARNING: possible circular locking dependency detected
+  5.8.0-rc7-00172-g021118712e59 #932 Not tainted
+  ------------------------------------------------------
+  btrfs/229626 is trying to acquire lock:
+  ffffffff828513f0 (cpu_hotplug_lock){++++}-{0:0}, at: alloc_workqueue+0x378/0x450
+
+  but task is already holding lock:
+  ffff889dd3889518 (&fs_info->scrub_lock){+.+.}-{3:3}, at: btrfs_scrub_dev+0x11c/0x630
+
+  which lock already depends on the new lock.
+
+  the existing dependency chain (in reverse order) is:
+
+  -> #7 (&fs_info->scrub_lock){+.+.}-{3:3}:
+        __mutex_lock+0x9f/0x930
+        btrfs_scrub_dev+0x11c/0x630
+        btrfs_dev_replace_by_ioctl.cold.21+0x10a/0x1d4
+        btrfs_ioctl+0x2799/0x30a0
+        ksys_ioctl+0x83/0xc0
+        __x64_sys_ioctl+0x16/0x20
+        do_syscall_64+0x50/0x90
+        entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+  -> #6 (&fs_devs->device_list_mutex){+.+.}-{3:3}:
+        __mutex_lock+0x9f/0x930
+        btrfs_run_dev_stats+0x49/0x480
+        commit_cowonly_roots+0xb5/0x2a0
+        btrfs_commit_transaction+0x516/0xa60
+        sync_filesystem+0x6b/0x90
+        generic_shutdown_super+0x22/0x100
+        kill_anon_super+0xe/0x30
+        btrfs_kill_super+0x12/0x20
+        deactivate_locked_super+0x29/0x60
+        cleanup_mnt+0xb8/0x140
+        task_work_run+0x6d/0xb0
+        __prepare_exit_to_usermode+0x1cc/0x1e0
+        do_syscall_64+0x5c/0x90
+        entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+  -> #5 (&fs_info->tree_log_mutex){+.+.}-{3:3}:
+        __mutex_lock+0x9f/0x930
+        btrfs_commit_transaction+0x4bb/0xa60
+        sync_filesystem+0x6b/0x90
+        generic_shutdown_super+0x22/0x100
+        kill_anon_super+0xe/0x30
+        btrfs_kill_super+0x12/0x20
+        deactivate_locked_super+0x29/0x60
+        cleanup_mnt+0xb8/0x140
+        task_work_run+0x6d/0xb0
+        __prepare_exit_to_usermode+0x1cc/0x1e0
+        do_syscall_64+0x5c/0x90
+        entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+  -> #4 (&fs_info->reloc_mutex){+.+.}-{3:3}:
+        __mutex_lock+0x9f/0x930
+        btrfs_record_root_in_trans+0x43/0x70
+        start_transaction+0xd1/0x5d0
+        btrfs_dirty_inode+0x42/0xd0
+        touch_atime+0xa1/0xd0
+        btrfs_file_mmap+0x3f/0x60
+        mmap_region+0x3a4/0x640
+        do_mmap+0x376/0x580
+        vm_mmap_pgoff+0xd5/0x120
+        ksys_mmap_pgoff+0x193/0x230
+        do_syscall_64+0x50/0x90
+        entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+  -> #3 (&mm->mmap_lock#2){++++}-{3:3}:
+        __might_fault+0x68/0x90
+        _copy_to_user+0x1e/0x80
+        perf_read+0x141/0x2c0
+        vfs_read+0xad/0x1b0
+        ksys_read+0x5f/0xe0
+        do_syscall_64+0x50/0x90
+        entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+  -> #2 (&cpuctx_mutex){+.+.}-{3:3}:
+        __mutex_lock+0x9f/0x930
+        perf_event_init_cpu+0x88/0x150
+        perf_event_init+0x1db/0x20b
+        start_kernel+0x3ae/0x53c
+        secondary_startup_64+0xa4/0xb0
+
+  -> #1 (pmus_lock){+.+.}-{3:3}:
+        __mutex_lock+0x9f/0x930
+        perf_event_init_cpu+0x4f/0x150
+        cpuhp_invoke_callback+0xb1/0x900
+        _cpu_up.constprop.26+0x9f/0x130
+        cpu_up+0x7b/0xc0
+        bringup_nonboot_cpus+0x4f/0x60
+        smp_init+0x26/0x71
+        kernel_init_freeable+0x110/0x258
+        kernel_init+0xa/0x103
+        ret_from_fork+0x1f/0x30
+
+  -> #0 (cpu_hotplug_lock){++++}-{0:0}:
+        __lock_acquire+0x1272/0x2310
+        lock_acquire+0x9e/0x360
+        cpus_read_lock+0x39/0xb0
+        alloc_workqueue+0x378/0x450
+        __btrfs_alloc_workqueue+0x15d/0x200
+        btrfs_alloc_workqueue+0x51/0x160
+        scrub_workers_get+0x5a/0x170
+        btrfs_scrub_dev+0x18c/0x630
+        btrfs_dev_replace_by_ioctl.cold.21+0x10a/0x1d4
+        btrfs_ioctl+0x2799/0x30a0
+        ksys_ioctl+0x83/0xc0
+        __x64_sys_ioctl+0x16/0x20
+        do_syscall_64+0x50/0x90
+        entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+  other info that might help us debug this:
+
+  Chain exists of:
+    cpu_hotplug_lock --> &fs_devs->device_list_mutex --> &fs_info->scrub_lock
+
+   Possible unsafe locking scenario:
+
+        CPU0                    CPU1
+        ----                    ----
+    lock(&fs_info->scrub_lock);
+                                lock(&fs_devs->device_list_mutex);
+                                lock(&fs_info->scrub_lock);
+    lock(cpu_hotplug_lock);
+
+   *** DEADLOCK ***
+
+  2 locks held by btrfs/229626:
+   #0: ffff88bfe8bb86e0 (&fs_devs->device_list_mutex){+.+.}-{3:3}, at: btrfs_scrub_dev+0xbd/0x630
+   #1: ffff889dd3889518 (&fs_info->scrub_lock){+.+.}-{3:3}, at: btrfs_scrub_dev+0x11c/0x630
+
+  stack backtrace:
+  CPU: 15 PID: 229626 Comm: btrfs Kdump: loaded Not tainted 5.8.0-rc7-00172-g021118712e59 #932
+  Hardware name: Quanta Tioga Pass Single Side 01-0030993006/Tioga Pass Single Side, BIOS F08_3A18 12/20/2018
+  Call Trace:
+   dump_stack+0x78/0xa0
+   check_noncircular+0x165/0x180
+   __lock_acquire+0x1272/0x2310
+   lock_acquire+0x9e/0x360
+   ? alloc_workqueue+0x378/0x450
+   cpus_read_lock+0x39/0xb0
+   ? alloc_workqueue+0x378/0x450
+   alloc_workqueue+0x378/0x450
+   ? rcu_read_lock_sched_held+0x52/0x80
+   __btrfs_alloc_workqueue+0x15d/0x200
+   btrfs_alloc_workqueue+0x51/0x160
+   scrub_workers_get+0x5a/0x170
+   btrfs_scrub_dev+0x18c/0x630
+   ? start_transaction+0xd1/0x5d0
+   btrfs_dev_replace_by_ioctl.cold.21+0x10a/0x1d4
+   btrfs_ioctl+0x2799/0x30a0
+   ? do_sigaction+0x102/0x250
+   ? lockdep_hardirqs_on_prepare+0xca/0x160
+   ? _raw_spin_unlock_irq+0x24/0x30
+   ? trace_hardirqs_on+0x1c/0xe0
+   ? _raw_spin_unlock_irq+0x24/0x30
+   ? do_sigaction+0x102/0x250
+   ? ksys_ioctl+0x83/0xc0
+   ksys_ioctl+0x83/0xc0
+   __x64_sys_ioctl+0x16/0x20
+   do_syscall_64+0x50/0x90
+   entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+This happens because we're allocating the scrub workqueues under the
+scrub and device list mutex, which brings in a whole host of other
+dependencies.
+
+Because the work queue allocation is done with GFP_KERNEL, it can
+trigger reclaim, which can lead to a transaction commit, which in turns
+needs the device_list_mutex, it can lead to a deadlock. A different
+problem for which this fix is a solution.
+
+Fix this by moving the actual allocation outside of the
+scrub lock, and then only take the lock once we're ready to actually
+assign them to the fs_info.  We'll now have to cleanup the workqueues in
+a few more places, so I've added a helper to do the refcount dance to
+safely free the workqueues.
+
+CC: stable@vger.kernel.org # 5.4+
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/scrub.c |  122 +++++++++++++++++++++++++++++++------------------------
+ 1 file changed, 70 insertions(+), 52 deletions(-)
+
+--- a/fs/btrfs/scrub.c
++++ b/fs/btrfs/scrub.c
+@@ -3783,50 +3783,84 @@ static noinline_for_stack int scrub_supe
+       return 0;
+ }
+ 
++static void scrub_workers_put(struct btrfs_fs_info *fs_info)
++{
++      if (refcount_dec_and_mutex_lock(&fs_info->scrub_workers_refcnt,
++                                      &fs_info->scrub_lock)) {
++              struct btrfs_workqueue *scrub_workers = NULL;
++              struct btrfs_workqueue *scrub_wr_comp = NULL;
++              struct btrfs_workqueue *scrub_parity = NULL;
++
++              scrub_workers = fs_info->scrub_workers;
++              scrub_wr_comp = fs_info->scrub_wr_completion_workers;
++              scrub_parity = fs_info->scrub_parity_workers;
++
++              fs_info->scrub_workers = NULL;
++              fs_info->scrub_wr_completion_workers = NULL;
++              fs_info->scrub_parity_workers = NULL;
++              mutex_unlock(&fs_info->scrub_lock);
++
++              btrfs_destroy_workqueue(scrub_workers);
++              btrfs_destroy_workqueue(scrub_wr_comp);
++              btrfs_destroy_workqueue(scrub_parity);
++      }
++}
++
+ /*
+  * get a reference count on fs_info->scrub_workers. start worker if necessary
+  */
+ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
+                                               int is_dev_replace)
+ {
++      struct btrfs_workqueue *scrub_workers = NULL;
++      struct btrfs_workqueue *scrub_wr_comp = NULL;
++      struct btrfs_workqueue *scrub_parity = NULL;
+       unsigned int flags = WQ_FREEZABLE | WQ_UNBOUND;
+       int max_active = fs_info->thread_pool_size;
++      int ret = -ENOMEM;
+ 
+-      lockdep_assert_held(&fs_info->scrub_lock);
++      if (refcount_inc_not_zero(&fs_info->scrub_workers_refcnt))
++              return 0;
+ 
+-      if (refcount_read(&fs_info->scrub_workers_refcnt) == 0) {
+-              ASSERT(fs_info->scrub_workers == NULL);
+-              fs_info->scrub_workers = btrfs_alloc_workqueue(fs_info, "scrub",
+-                              flags, is_dev_replace ? 1 : max_active, 4);
+-              if (!fs_info->scrub_workers)
+-                      goto fail_scrub_workers;
+-
+-              ASSERT(fs_info->scrub_wr_completion_workers == NULL);
+-              fs_info->scrub_wr_completion_workers =
+-                      btrfs_alloc_workqueue(fs_info, "scrubwrc", flags,
+-                                            max_active, 2);
+-              if (!fs_info->scrub_wr_completion_workers)
+-                      goto fail_scrub_wr_completion_workers;
++      scrub_workers = btrfs_alloc_workqueue(fs_info, "scrub", flags,
++                                            is_dev_replace ? 1 : max_active, 4);
++      if (!scrub_workers)
++              goto fail_scrub_workers;
+ 
+-              ASSERT(fs_info->scrub_parity_workers == NULL);
+-              fs_info->scrub_parity_workers =
+-                      btrfs_alloc_workqueue(fs_info, "scrubparity", flags,
++      scrub_wr_comp = btrfs_alloc_workqueue(fs_info, "scrubwrc", flags,
+                                             max_active, 2);
+-              if (!fs_info->scrub_parity_workers)
+-                      goto fail_scrub_parity_workers;
++      if (!scrub_wr_comp)
++              goto fail_scrub_wr_completion_workers;
+ 
++      scrub_parity = btrfs_alloc_workqueue(fs_info, "scrubparity", flags,
++                                           max_active, 2);
++      if (!scrub_parity)
++              goto fail_scrub_parity_workers;
++
++      mutex_lock(&fs_info->scrub_lock);
++      if (refcount_read(&fs_info->scrub_workers_refcnt) == 0) {
++              ASSERT(fs_info->scrub_workers == NULL &&
++                     fs_info->scrub_wr_completion_workers == NULL &&
++                     fs_info->scrub_parity_workers == NULL);
++              fs_info->scrub_workers = scrub_workers;
++              fs_info->scrub_wr_completion_workers = scrub_wr_comp;
++              fs_info->scrub_parity_workers = scrub_parity;
+               refcount_set(&fs_info->scrub_workers_refcnt, 1);
+-      } else {
+-              refcount_inc(&fs_info->scrub_workers_refcnt);
++              mutex_unlock(&fs_info->scrub_lock);
++              return 0;
+       }
+-      return 0;
++      /* Other thread raced in and created the workers for us */
++      refcount_inc(&fs_info->scrub_workers_refcnt);
++      mutex_unlock(&fs_info->scrub_lock);
+ 
++      ret = 0;
++      btrfs_destroy_workqueue(scrub_parity);
+ fail_scrub_parity_workers:
+-      btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers);
++      btrfs_destroy_workqueue(scrub_wr_comp);
+ fail_scrub_wr_completion_workers:
+-      btrfs_destroy_workqueue(fs_info->scrub_workers);
++      btrfs_destroy_workqueue(scrub_workers);
+ fail_scrub_workers:
+-      return -ENOMEM;
++      return ret;
+ }
+ 
+ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
+@@ -3837,9 +3871,6 @@ int btrfs_scrub_dev(struct btrfs_fs_info
+       int ret;
+       struct btrfs_device *dev;
+       unsigned int nofs_flag;
+-      struct btrfs_workqueue *scrub_workers = NULL;
+-      struct btrfs_workqueue *scrub_wr_comp = NULL;
+-      struct btrfs_workqueue *scrub_parity = NULL;
+ 
+       if (btrfs_fs_closing(fs_info))
+               return -EAGAIN;
+@@ -3886,13 +3917,17 @@ int btrfs_scrub_dev(struct btrfs_fs_info
+       if (IS_ERR(sctx))
+               return PTR_ERR(sctx);
+ 
++      ret = scrub_workers_get(fs_info, is_dev_replace);
++      if (ret)
++              goto out_free_ctx;
++
+       mutex_lock(&fs_info->fs_devices->device_list_mutex);
+       dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL, true);
+       if (!dev || (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) &&
+                    !is_dev_replace)) {
+               mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+               ret = -ENODEV;
+-              goto out_free_ctx;
++              goto out;
+       }
+ 
+       if (!is_dev_replace && !readonly &&
+@@ -3901,7 +3936,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info
+               btrfs_err_in_rcu(fs_info, "scrub: device %s is not writable",
+                               rcu_str_deref(dev->name));
+               ret = -EROFS;
+-              goto out_free_ctx;
++              goto out;
+       }
+ 
+       mutex_lock(&fs_info->scrub_lock);
+@@ -3910,7 +3945,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info
+               mutex_unlock(&fs_info->scrub_lock);
+               mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+               ret = -EIO;
+-              goto out_free_ctx;
++              goto out;
+       }
+ 
+       down_read(&fs_info->dev_replace.rwsem);
+@@ -3921,17 +3956,10 @@ int btrfs_scrub_dev(struct btrfs_fs_info
+               mutex_unlock(&fs_info->scrub_lock);
+               mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+               ret = -EINPROGRESS;
+-              goto out_free_ctx;
++              goto out;
+       }
+       up_read(&fs_info->dev_replace.rwsem);
+ 
+-      ret = scrub_workers_get(fs_info, is_dev_replace);
+-      if (ret) {
+-              mutex_unlock(&fs_info->scrub_lock);
+-              mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+-              goto out_free_ctx;
+-      }
+-
+       sctx->readonly = readonly;
+       dev->scrub_ctx = sctx;
+       mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+@@ -3984,24 +4012,14 @@ int btrfs_scrub_dev(struct btrfs_fs_info
+ 
+       mutex_lock(&fs_info->scrub_lock);
+       dev->scrub_ctx = NULL;
+-      if (refcount_dec_and_test(&fs_info->scrub_workers_refcnt)) {
+-              scrub_workers = fs_info->scrub_workers;
+-              scrub_wr_comp = fs_info->scrub_wr_completion_workers;
+-              scrub_parity = fs_info->scrub_parity_workers;
+-
+-              fs_info->scrub_workers = NULL;
+-              fs_info->scrub_wr_completion_workers = NULL;
+-              fs_info->scrub_parity_workers = NULL;
+-      }
+       mutex_unlock(&fs_info->scrub_lock);
+ 
+-      btrfs_destroy_workqueue(scrub_workers);
+-      btrfs_destroy_workqueue(scrub_wr_comp);
+-      btrfs_destroy_workqueue(scrub_parity);
++      scrub_workers_put(fs_info);
+       scrub_put_ctx(sctx);
+ 
+       return ret;
+-
++out:
++      scrub_workers_put(fs_info);
+ out_free_ctx:
+       scrub_free_ctx(sctx);
+ 
diff --git a/queue-5.8/btrfs-block-group-fix-free-space-bitmap-threshold.patch b/queue-5.8/btrfs-block-group-fix-free-space-bitmap-threshold.patch

new file mode 100644 (file)

index 0000000..78f02a3
--- /dev/null
+++ b/queue-5.8/btrfs-block-group-fix-free-space-bitmap-threshold.patch
@@ -0,0 +1,84 @@
+From e3e39c72b99f93bbd0420d38c858e7c4a061bb63 Mon Sep 17 00:00:00 2001
+From: Marcos Paulo de Souza <mpdesouza@suse.com>
+Date: Fri, 21 Aug 2020 11:54:44 -0300
+Subject: btrfs: block-group: fix free-space bitmap threshold
+
+From: Marcos Paulo de Souza <mpdesouza@suse.com>
+
+commit e3e39c72b99f93bbd0420d38c858e7c4a061bb63 upstream.
+
+[BUG]
+After commit 9afc66498a0b ("btrfs: block-group: refactor how we read one
+block group item"), cache->length is being assigned after calling
+btrfs_create_block_group_cache. This causes a problem since
+set_free_space_tree_thresholds calculates the free-space threshold to
+decide if the free-space tree should convert from extents to bitmaps.
+
+The current code calls set_free_space_tree_thresholds with cache->length
+being 0, which then makes cache->bitmap_high_thresh zero. This implies
+the system will always use bitmap instead of extents, which is not
+desired if the block group is not fragmented.
+
+This behavior can be seen by a test that expects to repair systems
+with FREE_SPACE_EXTENT and FREE_SPACE_BITMAP, but the current code only
+created FREE_SPACE_BITMAP.
+
+[FIX]
+Call set_free_space_tree_thresholds after setting cache->length. There
+is now a WARN_ON in set_free_space_tree_thresholds to help preventing
+the same mistake to happen again in the future.
+
+Link: https://github.com/kdave/btrfs-progs/issues/251
+Fixes: 9afc66498a0b ("btrfs: block-group: refactor how we read one block group item")
+CC: stable@vger.kernel.org # 5.8+
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Marcos Paulo de Souza <mpdesouza@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/block-group.c     |    4 +++-
+ fs/btrfs/free-space-tree.c |    4 ++++
+ 2 files changed, 7 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/block-group.c
++++ b/fs/btrfs/block-group.c
+@@ -1814,7 +1814,6 @@ static struct btrfs_block_group *btrfs_c
+ 
+       cache->fs_info = fs_info;
+       cache->full_stripe_len = btrfs_full_stripe_len(fs_info, start);
+-      set_free_space_tree_thresholds(cache);
+ 
+       cache->discard_index = BTRFS_DISCARD_INDEX_UNUSED;
+ 
+@@ -1928,6 +1927,8 @@ static int read_one_block_group(struct b
+       if (ret < 0)
+               goto error;
+ 
++      set_free_space_tree_thresholds(cache);
++
+       if (need_clear) {
+               /*
+                * When we mount with old space cache, we need to
+@@ -2148,6 +2149,7 @@ int btrfs_make_block_group(struct btrfs_
+               return -ENOMEM;
+ 
+       cache->length = size;
++      set_free_space_tree_thresholds(cache);
+       cache->used = bytes_used;
+       cache->flags = type;
+       cache->last_byte_to_unpin = (u64)-1;
+--- a/fs/btrfs/free-space-tree.c
++++ b/fs/btrfs/free-space-tree.c
+@@ -22,6 +22,10 @@ void set_free_space_tree_thresholds(stru
+       size_t bitmap_size;
+       u64 num_bitmaps, total_bitmap_size;
+ 
++      if (WARN_ON(cache->length == 0))
++              btrfs_warn(cache->fs_info, "block group %llu length is zero",
++                         cache->start);
++
+       /*
+        * We convert to bitmaps when the disk space required for using extents
+        * exceeds that required for using bitmaps.
diff --git a/queue-5.8/btrfs-drop-path-before-adding-new-uuid-tree-entry.patch b/queue-5.8/btrfs-drop-path-before-adding-new-uuid-tree-entry.patch

new file mode 100644 (file)

index 0000000..33e06b3
--- /dev/null
+++ b/queue-5.8/btrfs-drop-path-before-adding-new-uuid-tree-entry.patch
@@ -0,0 +1,145 @@
+From 9771a5cf937129307d9f58922d60484d58ababe7 Mon Sep 17 00:00:00 2001
+From: Josef Bacik <josef@toxicpanda.com>
+Date: Mon, 10 Aug 2020 11:42:26 -0400
+Subject: btrfs: drop path before adding new uuid tree entry
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+commit 9771a5cf937129307d9f58922d60484d58ababe7 upstream.
+
+With the conversion of the tree locks to rwsem I got the following
+lockdep splat:
+
+  ======================================================
+  WARNING: possible circular locking dependency detected
+  5.8.0-rc7-00167-g0d7ba0c5b375-dirty #925 Not tainted
+  ------------------------------------------------------
+  btrfs-uuid/7955 is trying to acquire lock:
+  ffff88bfbafec0f8 (btrfs-root-00){++++}-{3:3}, at: __btrfs_tree_read_lock+0x39/0x180
+
+  but task is already holding lock:
+  ffff88bfbafef2a8 (btrfs-uuid-00){++++}-{3:3}, at: __btrfs_tree_read_lock+0x39/0x180
+
+  which lock already depends on the new lock.
+
+  the existing dependency chain (in reverse order) is:
+
+  -> #1 (btrfs-uuid-00){++++}-{3:3}:
+        down_read_nested+0x3e/0x140
+        __btrfs_tree_read_lock+0x39/0x180
+        __btrfs_read_lock_root_node+0x3a/0x50
+        btrfs_search_slot+0x4bd/0x990
+        btrfs_uuid_tree_add+0x89/0x2d0
+        btrfs_uuid_scan_kthread+0x330/0x390
+        kthread+0x133/0x150
+        ret_from_fork+0x1f/0x30
+
+  -> #0 (btrfs-root-00){++++}-{3:3}:
+        __lock_acquire+0x1272/0x2310
+        lock_acquire+0x9e/0x360
+        down_read_nested+0x3e/0x140
+        __btrfs_tree_read_lock+0x39/0x180
+        __btrfs_read_lock_root_node+0x3a/0x50
+        btrfs_search_slot+0x4bd/0x990
+        btrfs_find_root+0x45/0x1b0
+        btrfs_read_tree_root+0x61/0x100
+        btrfs_get_root_ref.part.50+0x143/0x630
+        btrfs_uuid_tree_iterate+0x207/0x314
+        btrfs_uuid_rescan_kthread+0x12/0x50
+        kthread+0x133/0x150
+        ret_from_fork+0x1f/0x30
+
+  other info that might help us debug this:
+
+   Possible unsafe locking scenario:
+
+        CPU0                    CPU1
+        ----                    ----
+    lock(btrfs-uuid-00);
+                                lock(btrfs-root-00);
+                                lock(btrfs-uuid-00);
+    lock(btrfs-root-00);
+
+   *** DEADLOCK ***
+
+  1 lock held by btrfs-uuid/7955:
+   #0: ffff88bfbafef2a8 (btrfs-uuid-00){++++}-{3:3}, at: __btrfs_tree_read_lock+0x39/0x180
+
+  stack backtrace:
+  CPU: 73 PID: 7955 Comm: btrfs-uuid Kdump: loaded Not tainted 5.8.0-rc7-00167-g0d7ba0c5b375-dirty #925
+  Hardware name: Quanta Tioga Pass Single Side 01-0030993006/Tioga Pass Single Side, BIOS F08_3A18 12/20/2018
+  Call Trace:
+   dump_stack+0x78/0xa0
+   check_noncircular+0x165/0x180
+   __lock_acquire+0x1272/0x2310
+   lock_acquire+0x9e/0x360
+   ? __btrfs_tree_read_lock+0x39/0x180
+   ? btrfs_root_node+0x1c/0x1d0
+   down_read_nested+0x3e/0x140
+   ? __btrfs_tree_read_lock+0x39/0x180
+   __btrfs_tree_read_lock+0x39/0x180
+   __btrfs_read_lock_root_node+0x3a/0x50
+   btrfs_search_slot+0x4bd/0x990
+   btrfs_find_root+0x45/0x1b0
+   btrfs_read_tree_root+0x61/0x100
+   btrfs_get_root_ref.part.50+0x143/0x630
+   btrfs_uuid_tree_iterate+0x207/0x314
+   ? btree_readpage+0x20/0x20
+   btrfs_uuid_rescan_kthread+0x12/0x50
+   kthread+0x133/0x150
+   ? kthread_create_on_node+0x60/0x60
+   ret_from_fork+0x1f/0x30
+
+This problem exists because we have two different rescan threads,
+btrfs_uuid_scan_kthread which creates the uuid tree, and
+btrfs_uuid_tree_iterate that goes through and updates or deletes any out
+of date roots.  The problem is they both do things in different order.
+btrfs_uuid_scan_kthread() reads the tree_root, and then inserts entries
+into the uuid_root.  btrfs_uuid_tree_iterate() scans the uuid_root, but
+then does a btrfs_get_fs_root() which can read from the tree_root.
+
+It's actually easy enough to not be holding the path in
+btrfs_uuid_scan_kthread() when we add a uuid entry, as we already drop
+it further down and re-start the search when we loop.  So simply move
+the path release before we add our entry to the uuid tree.
+
+This also fixes a problem where we're holding a path open after we do
+btrfs_end_transaction(), which has it's own problems.
+
+CC: stable@vger.kernel.org # 4.4+
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/volumes.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/volumes.c
++++ b/fs/btrfs/volumes.c
+@@ -4462,6 +4462,7 @@ int btrfs_uuid_scan_kthread(void *data)
+                       goto skip;
+               }
+ update_tree:
++              btrfs_release_path(path);
+               if (!btrfs_is_empty_uuid(root_item.uuid)) {
+                       ret = btrfs_uuid_tree_add(trans, root_item.uuid,
+                                                 BTRFS_UUID_KEY_SUBVOL,
+@@ -4486,6 +4487,7 @@ update_tree:
+               }
+ 
+ skip:
++              btrfs_release_path(path);
+               if (trans) {
+                       ret = btrfs_end_transaction(trans);
+                       trans = NULL;
+@@ -4493,7 +4495,6 @@ skip:
+                               break;
+               }
+ 
+-              btrfs_release_path(path);
+               if (key.offset < (u64)-1) {
+                       key.offset++;
+               } else if (key.type < BTRFS_ROOT_ITEM_KEY) {
diff --git a/queue-5.8/btrfs-fix-potential-deadlock-in-the-search-ioctl.patch b/queue-5.8/btrfs-fix-potential-deadlock-in-the-search-ioctl.patch

new file mode 100644 (file)

index 0000000..8873ff2
--- /dev/null
+++ b/queue-5.8/btrfs-fix-potential-deadlock-in-the-search-ioctl.patch
@@ -0,0 +1,219 @@
+From a48b73eca4ceb9b8a4b97f290a065335dbcd8a04 Mon Sep 17 00:00:00 2001
+From: Josef Bacik <josef@toxicpanda.com>
+Date: Mon, 10 Aug 2020 11:42:27 -0400
+Subject: btrfs: fix potential deadlock in the search ioctl
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+commit a48b73eca4ceb9b8a4b97f290a065335dbcd8a04 upstream.
+
+With the conversion of the tree locks to rwsem I got the following
+lockdep splat:
+
+  ======================================================
+  WARNING: possible circular locking dependency detected
+  5.8.0-rc7-00165-g04ec4da5f45f-dirty #922 Not tainted
+  ------------------------------------------------------
+  compsize/11122 is trying to acquire lock:
+  ffff889fabca8768 (&mm->mmap_lock#2){++++}-{3:3}, at: __might_fault+0x3e/0x90
+
+  but task is already holding lock:
+  ffff889fe720fe40 (btrfs-fs-00){++++}-{3:3}, at: __btrfs_tree_read_lock+0x39/0x180
+
+  which lock already depends on the new lock.
+
+  the existing dependency chain (in reverse order) is:
+
+  -> #2 (btrfs-fs-00){++++}-{3:3}:
+        down_write_nested+0x3b/0x70
+        __btrfs_tree_lock+0x24/0x120
+        btrfs_search_slot+0x756/0x990
+        btrfs_lookup_inode+0x3a/0xb4
+        __btrfs_update_delayed_inode+0x93/0x270
+        btrfs_async_run_delayed_root+0x168/0x230
+        btrfs_work_helper+0xd4/0x570
+        process_one_work+0x2ad/0x5f0
+        worker_thread+0x3a/0x3d0
+        kthread+0x133/0x150
+        ret_from_fork+0x1f/0x30
+
+  -> #1 (&delayed_node->mutex){+.+.}-{3:3}:
+        __mutex_lock+0x9f/0x930
+        btrfs_delayed_update_inode+0x50/0x440
+        btrfs_update_inode+0x8a/0xf0
+        btrfs_dirty_inode+0x5b/0xd0
+        touch_atime+0xa1/0xd0
+        btrfs_file_mmap+0x3f/0x60
+        mmap_region+0x3a4/0x640
+        do_mmap+0x376/0x580
+        vm_mmap_pgoff+0xd5/0x120
+        ksys_mmap_pgoff+0x193/0x230
+        do_syscall_64+0x50/0x90
+        entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+  -> #0 (&mm->mmap_lock#2){++++}-{3:3}:
+        __lock_acquire+0x1272/0x2310
+        lock_acquire+0x9e/0x360
+        __might_fault+0x68/0x90
+        _copy_to_user+0x1e/0x80
+        copy_to_sk.isra.32+0x121/0x300
+        search_ioctl+0x106/0x200
+        btrfs_ioctl_tree_search_v2+0x7b/0xf0
+        btrfs_ioctl+0x106f/0x30a0
+        ksys_ioctl+0x83/0xc0
+        __x64_sys_ioctl+0x16/0x20
+        do_syscall_64+0x50/0x90
+        entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+  other info that might help us debug this:
+
+  Chain exists of:
+    &mm->mmap_lock#2 --> &delayed_node->mutex --> btrfs-fs-00
+
+   Possible unsafe locking scenario:
+
+        CPU0                    CPU1
+        ----                    ----
+    lock(btrfs-fs-00);
+                                lock(&delayed_node->mutex);
+                                lock(btrfs-fs-00);
+    lock(&mm->mmap_lock#2);
+
+   *** DEADLOCK ***
+
+  1 lock held by compsize/11122:
+   #0: ffff889fe720fe40 (btrfs-fs-00){++++}-{3:3}, at: __btrfs_tree_read_lock+0x39/0x180
+
+  stack backtrace:
+  CPU: 17 PID: 11122 Comm: compsize Kdump: loaded Not tainted 5.8.0-rc7-00165-g04ec4da5f45f-dirty #922
+  Hardware name: Quanta Tioga Pass Single Side 01-0030993006/Tioga Pass Single Side, BIOS F08_3A18 12/20/2018
+  Call Trace:
+   dump_stack+0x78/0xa0
+   check_noncircular+0x165/0x180
+   __lock_acquire+0x1272/0x2310
+   lock_acquire+0x9e/0x360
+   ? __might_fault+0x3e/0x90
+   ? find_held_lock+0x72/0x90
+   __might_fault+0x68/0x90
+   ? __might_fault+0x3e/0x90
+   _copy_to_user+0x1e/0x80
+   copy_to_sk.isra.32+0x121/0x300
+   ? btrfs_search_forward+0x2a6/0x360
+   search_ioctl+0x106/0x200
+   btrfs_ioctl_tree_search_v2+0x7b/0xf0
+   btrfs_ioctl+0x106f/0x30a0
+   ? __do_sys_newfstat+0x5a/0x70
+   ? ksys_ioctl+0x83/0xc0
+   ksys_ioctl+0x83/0xc0
+   __x64_sys_ioctl+0x16/0x20
+   do_syscall_64+0x50/0x90
+   entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+The problem is we're doing a copy_to_user() while holding tree locks,
+which can deadlock if we have to do a page fault for the copy_to_user().
+This exists even without my locking changes, so it needs to be fixed.
+Rework the search ioctl to do the pre-fault and then
+copy_to_user_nofault for the copying.
+
+CC: stable@vger.kernel.org # 4.4+
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/extent_io.c |    8 ++++----
+ fs/btrfs/extent_io.h |    6 +++---
+ fs/btrfs/ioctl.c     |   27 ++++++++++++++++++++-------
+ 3 files changed, 27 insertions(+), 14 deletions(-)
+
+--- a/fs/btrfs/extent_io.c
++++ b/fs/btrfs/extent_io.c
+@@ -5640,9 +5640,9 @@ void read_extent_buffer(const struct ext
+       }
+ }
+ 
+-int read_extent_buffer_to_user(const struct extent_buffer *eb,
+-                             void __user *dstv,
+-                             unsigned long start, unsigned long len)
++int read_extent_buffer_to_user_nofault(const struct extent_buffer *eb,
++                                     void __user *dstv,
++                                     unsigned long start, unsigned long len)
+ {
+       size_t cur;
+       size_t offset;
+@@ -5662,7 +5662,7 @@ int read_extent_buffer_to_user(const str
+ 
+               cur = min(len, (PAGE_SIZE - offset));
+               kaddr = page_address(page);
+-              if (copy_to_user(dst, kaddr + offset, cur)) {
++              if (copy_to_user_nofault(dst, kaddr + offset, cur)) {
+                       ret = -EFAULT;
+                       break;
+               }
+--- a/fs/btrfs/extent_io.h
++++ b/fs/btrfs/extent_io.h
+@@ -241,9 +241,9 @@ int memcmp_extent_buffer(const struct ex
+ void read_extent_buffer(const struct extent_buffer *eb, void *dst,
+                       unsigned long start,
+                       unsigned long len);
+-int read_extent_buffer_to_user(const struct extent_buffer *eb,
+-                             void __user *dst, unsigned long start,
+-                             unsigned long len);
++int read_extent_buffer_to_user_nofault(const struct extent_buffer *eb,
++                                     void __user *dst, unsigned long start,
++                                     unsigned long len);
+ void write_extent_buffer_fsid(const struct extent_buffer *eb, const void *src);
+ void write_extent_buffer_chunk_tree_uuid(const struct extent_buffer *eb,
+               const void *src);
+--- a/fs/btrfs/ioctl.c
++++ b/fs/btrfs/ioctl.c
+@@ -2086,9 +2086,14 @@ static noinline int copy_to_sk(struct bt
+               sh.len = item_len;
+               sh.transid = found_transid;
+ 
+-              /* copy search result header */
+-              if (copy_to_user(ubuf + *sk_offset, &sh, sizeof(sh))) {
+-                      ret = -EFAULT;
++              /*
++               * Copy search result header. If we fault then loop again so we
++               * can fault in the pages and -EFAULT there if there's a
++               * problem. Otherwise we'll fault and then copy the buffer in
++               * properly this next time through
++               */
++              if (copy_to_user_nofault(ubuf + *sk_offset, &sh, sizeof(sh))) {
++                      ret = 0;
+                       goto out;
+               }
+ 
+@@ -2096,10 +2101,14 @@ static noinline int copy_to_sk(struct bt
+ 
+               if (item_len) {
+                       char __user *up = ubuf + *sk_offset;
+-                      /* copy the item */
+-                      if (read_extent_buffer_to_user(leaf, up,
+-                                                     item_off, item_len)) {
+-                              ret = -EFAULT;
++                      /*
++                       * Copy the item, same behavior as above, but reset the
++                       * * sk_offset so we copy the full thing again.
++                       */
++                      if (read_extent_buffer_to_user_nofault(leaf, up,
++                                              item_off, item_len)) {
++                              ret = 0;
++                              *sk_offset -= sizeof(sh);
+                               goto out;
+                       }
+ 
+@@ -2184,6 +2193,10 @@ static noinline int search_ioctl(struct
+       key.offset = sk->min_offset;
+ 
+       while (1) {
++              ret = fault_in_pages_writeable(ubuf, *buf_size - sk_offset);
++              if (ret)
++                      break;
++
+               ret = btrfs_search_forward(root, &key, path, sk->min_transid);
+               if (ret != 0) {
+                       if (ret > 0)
diff --git a/queue-5.8/btrfs-set-the-correct-lockdep-class-for-new-nodes.patch b/queue-5.8/btrfs-set-the-correct-lockdep-class-for-new-nodes.patch

new file mode 100644 (file)

index 0000000..0a74aa3
--- /dev/null
+++ b/queue-5.8/btrfs-set-the-correct-lockdep-class-for-new-nodes.patch
@@ -0,0 +1,43 @@
+From ad24466588ab7d7c879053c5afd919b0c555fec0 Mon Sep 17 00:00:00 2001
+From: Josef Bacik <josef@toxicpanda.com>
+Date: Mon, 10 Aug 2020 11:42:30 -0400
+Subject: btrfs: set the correct lockdep class for new nodes
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+commit ad24466588ab7d7c879053c5afd919b0c555fec0 upstream.
+
+When flipping over to the rw_semaphore I noticed I'd get a lockdep splat
+in replace_path(), which is weird because we're swapping the reloc root
+with the actual target root.  Turns out this is because we're using the
+root->root_key.objectid as the root id for the newly allocated tree
+block when setting the lockdep class, however we need to be using the
+actual owner of this new block, which is saved in owner.
+
+The affected path is through btrfs_copy_root as all other callers of
+btrfs_alloc_tree_block (which calls init_new_buffer) have root_objectid
+== root->root_key.objectid .
+
+CC: stable@vger.kernel.org # 5.4+
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: Nikolay Borisov <nborisov@suse.com>
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/extent-tree.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -4527,7 +4527,7 @@ btrfs_init_new_buffer(struct btrfs_trans
+               return ERR_PTR(-EUCLEAN);
+       }
+ 
+-      btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
++      btrfs_set_buffer_lockdep_class(owner, buf, level);
+       btrfs_tree_lock(buf);
+       btrfs_clean_tree_block(buf);
+       clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
diff --git a/queue-5.8/btrfs-set-the-lockdep-class-for-log-tree-extent-buffers.patch b/queue-5.8/btrfs-set-the-lockdep-class-for-log-tree-extent-buffers.patch

new file mode 100644 (file)

index 0000000..b1b63d0
--- /dev/null
+++ b/queue-5.8/btrfs-set-the-lockdep-class-for-log-tree-extent-buffers.patch
@@ -0,0 +1,55 @@
+From d3beaa253fd6fa40b8b18a216398e6e5376a9d21 Mon Sep 17 00:00:00 2001
+From: Josef Bacik <josef@toxicpanda.com>
+Date: Mon, 10 Aug 2020 11:42:31 -0400
+Subject: btrfs: set the lockdep class for log tree extent buffers
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+commit d3beaa253fd6fa40b8b18a216398e6e5376a9d21 upstream.
+
+These are special extent buffers that get rewound in order to lookup
+the state of the tree at a specific point in time.  As such they do not
+go through the normal initialization paths that set their lockdep class,
+so handle them appropriately when they are created and before they are
+locked.
+
+CC: stable@vger.kernel.org # 4.4+
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/ctree.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/ctree.c
++++ b/fs/btrfs/ctree.c
+@@ -1297,6 +1297,8 @@ tree_mod_log_rewind(struct btrfs_fs_info
+       btrfs_tree_read_unlock_blocking(eb);
+       free_extent_buffer(eb);
+ 
++      btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb_rewin),
++                                     eb_rewin, btrfs_header_level(eb_rewin));
+       btrfs_tree_read_lock(eb_rewin);
+       __tree_mod_log_rewind(fs_info, eb_rewin, time_seq, tm);
+       WARN_ON(btrfs_header_nritems(eb_rewin) >
+@@ -1370,7 +1372,6 @@ get_old_root(struct btrfs_root *root, u6
+ 
+       if (!eb)
+               return NULL;
+-      btrfs_tree_read_lock(eb);
+       if (old_root) {
+               btrfs_set_header_bytenr(eb, eb->start);
+               btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV);
+@@ -1378,6 +1379,9 @@ get_old_root(struct btrfs_root *root, u6
+               btrfs_set_header_level(eb, old_root->level);
+               btrfs_set_header_generation(eb, old_generation);
+       }
++      btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb), eb,
++                                     btrfs_header_level(eb));
++      btrfs_tree_read_lock(eb);
+       if (tm)
+               __tree_mod_log_rewind(fs_info, eb, time_seq, tm);
+       else
diff --git a/queue-5.8/btrfs-tree-checker-fix-the-error-message-for-transid-error.patch b/queue-5.8/btrfs-tree-checker-fix-the-error-message-for-transid-error.patch

new file mode 100644 (file)

index 0000000..114d4f4
--- /dev/null
+++ b/queue-5.8/btrfs-tree-checker-fix-the-error-message-for-transid-error.patch
@@ -0,0 +1,35 @@
+From f96d6960abbc52e26ad124e69e6815283d3e1674 Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Tue, 25 Aug 2020 21:42:51 +0800
+Subject: btrfs: tree-checker: fix the error message for transid error
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit f96d6960abbc52e26ad124e69e6815283d3e1674 upstream.
+
+The error message for inode transid is the same as for inode generation,
+which makes us unable to detect the real problem.
+
+Reported-by: Tyler Richmond <t.d.richmond@gmail.com>
+Fixes: 496245cac57e ("btrfs: tree-checker: Verify inode item")
+CC: stable@vger.kernel.org # 5.4+
+Reviewed-by: Marcos Paulo de Souza <mpdesouza@suse.com>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/tree-checker.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/btrfs/tree-checker.c
++++ b/fs/btrfs/tree-checker.c
+@@ -984,7 +984,7 @@ static int check_inode_item(struct exten
+       /* Note for ROOT_TREE_DIR_ITEM, mkfs could set its transid 0 */
+       if (btrfs_inode_transid(leaf, iitem) > super_gen + 1) {
+               inode_item_err(leaf, slot,
+-                      "invalid inode generation: has %llu expect [0, %llu]",
++                      "invalid inode transid: has %llu expect [0, %llu]",
+                              btrfs_inode_transid(leaf, iitem), super_gen + 1);
+               return -EUCLEAN;
+       }
diff --git a/queue-5.8/ext2-don-t-update-mtime-on-cow-faults.patch b/queue-5.8/ext2-don-t-update-mtime-on-cow-faults.patch

new file mode 100644 (file)

index 0000000..528a66b
--- /dev/null
+++ b/queue-5.8/ext2-don-t-update-mtime-on-cow-faults.patch
@@ -0,0 +1,61 @@
+From 1ef6ea0efe8e68d0299dad44c39dc6ad9e5d1f39 Mon Sep 17 00:00:00 2001
+From: Mikulas Patocka <mpatocka@redhat.com>
+Date: Sat, 5 Sep 2020 08:12:01 -0400
+Subject: ext2: don't update mtime on COW faults
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+commit 1ef6ea0efe8e68d0299dad44c39dc6ad9e5d1f39 upstream.
+
+When running in a dax mode, if the user maps a page with MAP_PRIVATE and
+PROT_WRITE, the ext2 filesystem would incorrectly update ctime and mtime
+when the user hits a COW fault.
+
+This breaks building of the Linux kernel.  How to reproduce:
+
+ 1. extract the Linux kernel tree on dax-mounted ext2 filesystem
+ 2. run make clean
+ 3. run make -j12
+ 4. run make -j12
+
+at step 4, make would incorrectly rebuild the whole kernel (although it
+was already built in step 3).
+
+The reason for the breakage is that almost all object files depend on
+objtool.  When we run objtool, it takes COW page fault on its .data
+section, and these faults will incorrectly update the timestamp of the
+objtool binary.  The updated timestamp causes make to rebuild the whole
+tree.
+
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext2/file.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/fs/ext2/file.c
++++ b/fs/ext2/file.c
+@@ -93,8 +93,10 @@ static vm_fault_t ext2_dax_fault(struct
+       struct inode *inode = file_inode(vmf->vma->vm_file);
+       struct ext2_inode_info *ei = EXT2_I(inode);
+       vm_fault_t ret;
++      bool write = (vmf->flags & FAULT_FLAG_WRITE) &&
++              (vmf->vma->vm_flags & VM_SHARED);
+ 
+-      if (vmf->flags & FAULT_FLAG_WRITE) {
++      if (write) {
+               sb_start_pagefault(inode->i_sb);
+               file_update_time(vmf->vma->vm_file);
+       }
+@@ -103,7 +105,7 @@ static vm_fault_t ext2_dax_fault(struct
+       ret = dax_iomap_fault(vmf, PE_SIZE_PTE, NULL, NULL, &ext2_iomap_ops);
+ 
+       up_read(&ei->dax_sem);
+-      if (vmf->flags & FAULT_FLAG_WRITE)
++      if (write)
+               sb_end_pagefault(inode->i_sb);
+       return ret;
+ }
diff --git a/queue-5.8/iommu-vt-d-handle-36bit-addressing-for-x86-32.patch b/queue-5.8/iommu-vt-d-handle-36bit-addressing-for-x86-32.patch

new file mode 100644 (file)

index 0000000..d68ae8e
--- /dev/null
+++ b/queue-5.8/iommu-vt-d-handle-36bit-addressing-for-x86-32.patch
@@ -0,0 +1,74 @@
+From 29aaebbca4abc4cceb38738483051abefafb6950 Mon Sep 17 00:00:00 2001
+From: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sat, 22 Aug 2020 17:02:09 +0100
+Subject: iommu/vt-d: Handle 36bit addressing for x86-32
+
+From: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 29aaebbca4abc4cceb38738483051abefafb6950 upstream.
+
+Beware that the address size for x86-32 may exceed unsigned long.
+
+[    0.368971] UBSAN: shift-out-of-bounds in drivers/iommu/intel/iommu.c:128:14
+[    0.369055] shift exponent 36 is too large for 32-bit type 'long unsigned int'
+
+If we don't handle the wide addresses, the pages are mismapped and the
+device read/writes go astray, detected as DMAR faults and leading to
+device failure. The behaviour changed (from working to broken) in commit
+fa954e683178 ("iommu/vt-d: Delegate the dma domain to upper layer"), but
+the error looks older.
+
+Fixes: fa954e683178 ("iommu/vt-d: Delegate the dma domain to upper layer")
+Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+Acked-by: Lu Baolu <baolu.lu@linux.intel.com>
+Cc: James Sewart <jamessewart@arista.com>
+Cc: Lu Baolu <baolu.lu@linux.intel.com>
+Cc: Joerg Roedel <jroedel@suse.de>
+Cc: <stable@vger.kernel.org> # v5.3+
+Link: https://lore.kernel.org/r/20200822160209.28512-1-chris@chris-wilson.co.uk
+Signed-off-by: Joerg Roedel <jroedel@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/iommu/intel/iommu.c |   14 +++++++-------
+ 1 file changed, 7 insertions(+), 7 deletions(-)
+
+--- a/drivers/iommu/intel/iommu.c
++++ b/drivers/iommu/intel/iommu.c
+@@ -123,29 +123,29 @@ static inline unsigned int level_to_offs
+       return (level - 1) * LEVEL_STRIDE;
+ }
+ 
+-static inline int pfn_level_offset(unsigned long pfn, int level)
++static inline int pfn_level_offset(u64 pfn, int level)
+ {
+       return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
+ }
+ 
+-static inline unsigned long level_mask(int level)
++static inline u64 level_mask(int level)
+ {
+-      return -1UL << level_to_offset_bits(level);
++      return -1ULL << level_to_offset_bits(level);
+ }
+ 
+-static inline unsigned long level_size(int level)
++static inline u64 level_size(int level)
+ {
+-      return 1UL << level_to_offset_bits(level);
++      return 1ULL << level_to_offset_bits(level);
+ }
+ 
+-static inline unsigned long align_to_level(unsigned long pfn, int level)
++static inline u64 align_to_level(u64 pfn, int level)
+ {
+       return (pfn + level_size(level) - 1) & level_mask(level);
+ }
+ 
+ static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
+ {
+-      return  1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
++      return 1UL << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
+ }
+ 
+ /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
diff --git a/queue-5.8/series b/queue-5.8/series

index 649c97db770203eff2bd3da9e49e89dfe5dd4feb..254c80f81a4313ecfac2e8a82789278f1dace1f4 100644 (file)
--- a/queue-5.8/series
+++ b/queue-5.8/series
@@ -115,3 +115,18 @@ iommu-amd-restore-irte.remapen-bit-after-programming.patch
  iommu-amd-use-cmpxchg_double-when-updating-128-bit-i.patch
  net-packet-fix-overflow-in-tpacket_rcv.patch
  include-linux-log2.h-add-missing-around-n-in-roundup.patch
+iommu-vt-d-handle-36bit-addressing-for-x86-32.patch
+tracing-kprobes-x86-ptrace-fix-regs-argument-order-for-i386.patch
+x86-entry-fix-ac-assertion.patch
+x86-debug-allow-a-single-level-of-db-recursion.patch
+ext2-don-t-update-mtime-on-cow-faults.patch
+xfs-don-t-update-mtime-on-cow-faults.patch
+arc-perf-don-t-bail-setup-if-pct-irq-missing-in-device-tree.patch
+arc-fix-memory-initialization-for-systems-with-two-memory-banks.patch
+btrfs-drop-path-before-adding-new-uuid-tree-entry.patch
+btrfs-fix-potential-deadlock-in-the-search-ioctl.patch
+btrfs-allocate-scrub-workqueues-outside-of-locks.patch
+btrfs-set-the-correct-lockdep-class-for-new-nodes.patch
+btrfs-set-the-lockdep-class-for-log-tree-extent-buffers.patch
+btrfs-block-group-fix-free-space-bitmap-threshold.patch
+btrfs-tree-checker-fix-the-error-message-for-transid-error.patch
diff --git a/queue-5.8/tracing-kprobes-x86-ptrace-fix-regs-argument-order-for-i386.patch b/queue-5.8/tracing-kprobes-x86-ptrace-fix-regs-argument-order-for-i386.patch

new file mode 100644 (file)

index 0000000..9432b6a
--- /dev/null
+++ b/queue-5.8/tracing-kprobes-x86-ptrace-fix-regs-argument-order-for-i386.patch
@@ -0,0 +1,43 @@
+From 2356bb4b8221d7dc8c7beb810418122ed90254c9 Mon Sep 17 00:00:00 2001
+From: Vamshi K Sthambamkadi <vamshi.k.sthambamkadi@gmail.com>
+Date: Fri, 28 Aug 2020 17:02:46 +0530
+Subject: tracing/kprobes, x86/ptrace: Fix regs argument order for i386
+
+From: Vamshi K Sthambamkadi <vamshi.k.sthambamkadi@gmail.com>
+
+commit 2356bb4b8221d7dc8c7beb810418122ed90254c9 upstream.
+
+On i386, the order of parameters passed on regs is eax,edx,and ecx
+(as per regparm(3) calling conventions).
+
+Change the mapping in regs_get_kernel_argument(), so that arg1=ax
+arg2=dx, and arg3=cx.
+
+Running the selftests testcase kprobes_args_use.tc shows the result
+as passed.
+
+Fixes: 3c88ee194c28 ("x86: ptrace: Add function argument access API")
+Signed-off-by: Vamshi K Sthambamkadi <vamshi.k.sthambamkadi@gmail.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: <stable@vger.kernel.org>
+Link: https://lkml.kernel.org/r/20200828113242.GA1424@cosmos
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/ptrace.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/include/asm/ptrace.h
++++ b/arch/x86/include/asm/ptrace.h
+@@ -322,8 +322,8 @@ static inline unsigned long regs_get_ker
+       static const unsigned int argument_offs[] = {
+ #ifdef __i386__
+               offsetof(struct pt_regs, ax),
+-              offsetof(struct pt_regs, cx),
+               offsetof(struct pt_regs, dx),
++              offsetof(struct pt_regs, cx),
+ #define NR_REG_ARGUMENTS 3
+ #else
+               offsetof(struct pt_regs, di),
diff --git a/queue-5.8/x86-debug-allow-a-single-level-of-db-recursion.patch b/queue-5.8/x86-debug-allow-a-single-level-of-db-recursion.patch

new file mode 100644 (file)

index 0000000..67b7a8a
--- /dev/null
+++ b/queue-5.8/x86-debug-allow-a-single-level-of-db-recursion.patch
@@ -0,0 +1,161 @@
+From d5c678aed5eddb944b8e7ce451b107b39245962d Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Wed, 2 Sep 2020 15:25:51 +0200
+Subject: x86/debug: Allow a single level of #DB recursion
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit d5c678aed5eddb944b8e7ce451b107b39245962d upstream.
+
+Trying to clear DR7 around a #DB from usermode malfunctions if the tasks
+schedules when delivering SIGTRAP.
+
+Rather than trying to define a special no-recursion region, just allow a
+single level of recursion.  The same mechanism is used for NMI, and it
+hasn't caused any problems yet.
+
+Fixes: 9f58fdde95c9 ("x86/db: Split out dr6/7 handling")
+Reported-by: Kyle Huey <me@kylehuey.com>
+Debugged-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Daniel Thompson <daniel.thompson@linaro.org>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/8b9bd05f187231df008d48cf818a6a311cbd5c98.1597882384.git.luto@kernel.org
+Link: https://lore.kernel.org/r/20200902133200.726584153@infradead.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/traps.c |   66 +++++++++++++++++++++++-------------------------
+ 1 file changed, 32 insertions(+), 34 deletions(-)
+
+--- a/arch/x86/kernel/traps.c
++++ b/arch/x86/kernel/traps.c
+@@ -733,20 +733,9 @@ static bool is_sysenter_singlestep(struc
+ #endif
+ }
+ 
+-static __always_inline void debug_enter(unsigned long *dr6, unsigned long *dr7)
++static __always_inline unsigned long debug_read_clear_dr6(void)
+ {
+-      /*
+-       * Disable breakpoints during exception handling; recursive exceptions
+-       * are exceedingly 'fun'.
+-       *
+-       * Since this function is NOKPROBE, and that also applies to
+-       * HW_BREAKPOINT_X, we can't hit a breakpoint before this (XXX except a
+-       * HW_BREAKPOINT_W on our stack)
+-       *
+-       * Entry text is excluded for HW_BP_X and cpu_entry_area, which
+-       * includes the entry stack is excluded for everything.
+-       */
+-      *dr7 = local_db_save();
++      unsigned long dr6;
+ 
+       /*
+        * The Intel SDM says:
+@@ -759,15 +748,12 @@ static __always_inline void debug_enter(
+        *
+        * Keep it simple: clear DR6 immediately.
+        */
+-      get_debugreg(*dr6, 6);
++      get_debugreg(dr6, 6);
+       set_debugreg(0, 6);
+       /* Filter out all the reserved bits which are preset to 1 */
+-      *dr6 &= ~DR6_RESERVED;
+-}
++      dr6 &= ~DR6_RESERVED;
+ 
+-static __always_inline void debug_exit(unsigned long dr7)
+-{
+-      local_db_restore(dr7);
++      return dr6;
+ }
+ 
+ /*
+@@ -867,6 +853,19 @@ out:
+ static __always_inline void exc_debug_kernel(struct pt_regs *regs,
+                                            unsigned long dr6)
+ {
++      /*
++       * Disable breakpoints during exception handling; recursive exceptions
++       * are exceedingly 'fun'.
++       *
++       * Since this function is NOKPROBE, and that also applies to
++       * HW_BREAKPOINT_X, we can't hit a breakpoint before this (XXX except a
++       * HW_BREAKPOINT_W on our stack)
++       *
++       * Entry text is excluded for HW_BP_X and cpu_entry_area, which
++       * includes the entry stack is excluded for everything.
++       */
++      unsigned long dr7 = local_db_save();
++
+       nmi_enter();
+       instrumentation_begin();
+       trace_hardirqs_off_finish();
+@@ -890,6 +889,8 @@ static __always_inline void exc_debug_ke
+               trace_hardirqs_on_prepare();
+       instrumentation_end();
+       nmi_exit();
++
++      local_db_restore(dr7);
+ }
+ 
+ static __always_inline void exc_debug_user(struct pt_regs *regs,
+@@ -901,6 +902,15 @@ static __always_inline void exc_debug_us
+        */
+       WARN_ON_ONCE(!user_mode(regs));
+ 
++      /*
++       * NB: We can't easily clear DR7 here because
++       * idtentry_exit_to_usermode() can invoke ptrace, schedule, access
++       * user memory, etc.  This means that a recursive #DB is possible.  If
++       * this happens, that #DB will hit exc_debug_kernel() and clear DR7.
++       * Since we're not on the IST stack right now, everything will be
++       * fine.
++       */
++
+       idtentry_enter_user(regs);
+       instrumentation_begin();
+ 
+@@ -913,36 +923,24 @@ static __always_inline void exc_debug_us
+ /* IST stack entry */
+ DEFINE_IDTENTRY_DEBUG(exc_debug)
+ {
+-      unsigned long dr6, dr7;
+-
+-      debug_enter(&dr6, &dr7);
+-      exc_debug_kernel(regs, dr6);
+-      debug_exit(dr7);
++      exc_debug_kernel(regs, debug_read_clear_dr6());
+ }
+ 
+ /* User entry, runs on regular task stack */
+ DEFINE_IDTENTRY_DEBUG_USER(exc_debug)
+ {
+-      unsigned long dr6, dr7;
+-
+-      debug_enter(&dr6, &dr7);
+-      exc_debug_user(regs, dr6);
+-      debug_exit(dr7);
++      exc_debug_user(regs, debug_read_clear_dr6());
+ }
+ #else
+ /* 32 bit does not have separate entry points. */
+ DEFINE_IDTENTRY_RAW(exc_debug)
+ {
+-      unsigned long dr6, dr7;
+-
+-      debug_enter(&dr6, &dr7);
++      unsigned long dr6 = debug_read_clear_dr6();
+ 
+       if (user_mode(regs))
+               exc_debug_user(regs, dr6);
+       else
+               exc_debug_kernel(regs, dr6);
+-
+-      debug_exit(dr7);
+ }
+ #endif
+ 
diff --git a/queue-5.8/x86-entry-fix-ac-assertion.patch b/queue-5.8/x86-entry-fix-ac-assertion.patch

new file mode 100644 (file)

index 0000000..87cbdca
--- /dev/null
+++ b/queue-5.8/x86-entry-fix-ac-assertion.patch
@@ -0,0 +1,50 @@
+From 662a0221893a3d58aa72719671844264306f6e4b Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Wed, 2 Sep 2020 15:25:50 +0200
+Subject: x86/entry: Fix AC assertion
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 662a0221893a3d58aa72719671844264306f6e4b upstream.
+
+The WARN added in commit 3c73b81a9164 ("x86/entry, selftests: Further
+improve user entry sanity checks") unconditionally triggers on a IVB
+machine because it does not support SMAP.
+
+For !SMAP hardware the CLAC/STAC instructions are patched out and thus if
+userspace sets AC, it is still have set after entry.
+
+Fixes: 3c73b81a9164 ("x86/entry, selftests: Further improve user entry sanity checks")
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Daniel Thompson <daniel.thompson@linaro.org>
+Acked-by: Andy Lutomirski <luto@kernel.org>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20200902133200.666781610@infradead.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/entry/common.c |   12 ++++++++++--
+ 1 file changed, 10 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/entry/common.c
++++ b/arch/x86/entry/common.c
+@@ -55,8 +55,16 @@ static noinstr void check_user_regs(stru
+                * state, not the interrupt state as imagined by Xen.
+                */
+               unsigned long flags = native_save_fl();
+-              WARN_ON_ONCE(flags & (X86_EFLAGS_AC | X86_EFLAGS_DF |
+-                                    X86_EFLAGS_NT));
++              unsigned long mask = X86_EFLAGS_DF | X86_EFLAGS_NT;
++
++              /*
++               * For !SMAP hardware we patch out CLAC on entry.
++               */
++              if (boot_cpu_has(X86_FEATURE_SMAP) ||
++                  (IS_ENABLED(CONFIG_64_BIT) && boot_cpu_has(X86_FEATURE_XENPV)))
++                      mask |= X86_EFLAGS_AC;
++
++              WARN_ON_ONCE(flags & mask);
+ 
+               /* We think we came from user mode. Make sure pt_regs agrees. */
+               WARN_ON_ONCE(!user_mode(regs));
diff --git a/queue-5.8/xfs-don-t-update-mtime-on-cow-faults.patch b/queue-5.8/xfs-don-t-update-mtime-on-cow-faults.patch

new file mode 100644 (file)

index 0000000..2a88060
--- /dev/null
+++ b/queue-5.8/xfs-don-t-update-mtime-on-cow-faults.patch
@@ -0,0 +1,73 @@
+From b17164e258e3888d376a7434415013175d637377 Mon Sep 17 00:00:00 2001
+From: Mikulas Patocka <mpatocka@redhat.com>
+Date: Sat, 5 Sep 2020 08:13:02 -0400
+Subject: xfs: don't update mtime on COW faults
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+commit b17164e258e3888d376a7434415013175d637377 upstream.
+
+When running in a dax mode, if the user maps a page with MAP_PRIVATE and
+PROT_WRITE, the xfs filesystem would incorrectly update ctime and mtime
+when the user hits a COW fault.
+
+This breaks building of the Linux kernel.  How to reproduce:
+
+ 1. extract the Linux kernel tree on dax-mounted xfs filesystem
+ 2. run make clean
+ 3. run make -j12
+ 4. run make -j12
+
+at step 4, make would incorrectly rebuild the whole kernel (although it
+was already built in step 3).
+
+The reason for the breakage is that almost all object files depend on
+objtool.  When we run objtool, it takes COW page fault on its .data
+section, and these faults will incorrectly update the timestamp of the
+objtool binary.  The updated timestamp causes make to rebuild the whole
+tree.
+
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/xfs_file.c |   12 ++++++++++--
+ 1 file changed, 10 insertions(+), 2 deletions(-)
+
+--- a/fs/xfs/xfs_file.c
++++ b/fs/xfs/xfs_file.c
+@@ -1220,6 +1220,14 @@ __xfs_filemap_fault(
+       return ret;
+ }
+ 
++static inline bool
++xfs_is_write_fault(
++      struct vm_fault         *vmf)
++{
++      return (vmf->flags & FAULT_FLAG_WRITE) &&
++             (vmf->vma->vm_flags & VM_SHARED);
++}
++
+ static vm_fault_t
+ xfs_filemap_fault(
+       struct vm_fault         *vmf)
+@@ -1227,7 +1235,7 @@ xfs_filemap_fault(
+       /* DAX can shortcut the normal fault path on write faults! */
+       return __xfs_filemap_fault(vmf, PE_SIZE_PTE,
+                       IS_DAX(file_inode(vmf->vma->vm_file)) &&
+-                      (vmf->flags & FAULT_FLAG_WRITE));
++                      xfs_is_write_fault(vmf));
+ }
+ 
+ static vm_fault_t
+@@ -1240,7 +1248,7 @@ xfs_filemap_huge_fault(
+ 
+       /* DAX can shortcut the normal fault path on write faults! */
+       return __xfs_filemap_fault(vmf, pe_size,
+-                      (vmf->flags & FAULT_FLAG_WRITE));
++                      xfs_is_write_fault(vmf));
+ }
+ 
+ static vm_fault_t
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 7 Sep 2020 16:20:58 +0000 (18:20 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 7 Sep 2020 16:20:58 +0000 (18:20 +0200)
queue-5.8/arc-fix-memory-initialization-for-systems-with-two-memory-banks.patch	[new file with mode: 0644]	patch \| blob
queue-5.8/arc-perf-don-t-bail-setup-if-pct-irq-missing-in-device-tree.patch	[new file with mode: 0644]	patch \| blob
queue-5.8/btrfs-allocate-scrub-workqueues-outside-of-locks.patch	[new file with mode: 0644]	patch \| blob
queue-5.8/btrfs-block-group-fix-free-space-bitmap-threshold.patch	[new file with mode: 0644]	patch \| blob
queue-5.8/btrfs-drop-path-before-adding-new-uuid-tree-entry.patch	[new file with mode: 0644]	patch \| blob
queue-5.8/btrfs-fix-potential-deadlock-in-the-search-ioctl.patch	[new file with mode: 0644]	patch \| blob
queue-5.8/btrfs-set-the-correct-lockdep-class-for-new-nodes.patch	[new file with mode: 0644]	patch \| blob
queue-5.8/btrfs-set-the-lockdep-class-for-log-tree-extent-buffers.patch	[new file with mode: 0644]	patch \| blob
queue-5.8/btrfs-tree-checker-fix-the-error-message-for-transid-error.patch	[new file with mode: 0644]	patch \| blob
queue-5.8/ext2-don-t-update-mtime-on-cow-faults.patch	[new file with mode: 0644]	patch \| blob
queue-5.8/iommu-vt-d-handle-36bit-addressing-for-x86-32.patch	[new file with mode: 0644]	patch \| blob
queue-5.8/series		patch \| blob \| blame \| history
queue-5.8/tracing-kprobes-x86-ptrace-fix-regs-argument-order-for-i386.patch	[new file with mode: 0644]	patch \| blob
queue-5.8/x86-debug-allow-a-single-level-of-db-recursion.patch	[new file with mode: 0644]	patch \| blob
queue-5.8/x86-entry-fix-ac-assertion.patch	[new file with mode: 0644]	patch \| blob
queue-5.8/xfs-don-t-update-mtime-on-cow-faults.patch	[new file with mode: 0644]	patch \| blob