From: Greg Kroah-Hartman Date: Mon, 7 Sep 2020 16:20:58 +0000 (+0200) Subject: 5.8-stable patches X-Git-Tag: v4.14.197~31 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=e4c86329142c5ebb37dfa715edf58a565becb330;p=thirdparty%2Fkernel%2Fstable-queue.git 5.8-stable patches added patches: arc-fix-memory-initialization-for-systems-with-two-memory-banks.patch arc-perf-don-t-bail-setup-if-pct-irq-missing-in-device-tree.patch btrfs-allocate-scrub-workqueues-outside-of-locks.patch btrfs-block-group-fix-free-space-bitmap-threshold.patch btrfs-drop-path-before-adding-new-uuid-tree-entry.patch btrfs-fix-potential-deadlock-in-the-search-ioctl.patch btrfs-set-the-correct-lockdep-class-for-new-nodes.patch btrfs-set-the-lockdep-class-for-log-tree-extent-buffers.patch btrfs-tree-checker-fix-the-error-message-for-transid-error.patch ext2-don-t-update-mtime-on-cow-faults.patch iommu-vt-d-handle-36bit-addressing-for-x86-32.patch tracing-kprobes-x86-ptrace-fix-regs-argument-order-for-i386.patch x86-debug-allow-a-single-level-of-db-recursion.patch x86-entry-fix-ac-assertion.patch xfs-don-t-update-mtime-on-cow-faults.patch --- diff --git a/queue-5.8/arc-fix-memory-initialization-for-systems-with-two-memory-banks.patch b/queue-5.8/arc-fix-memory-initialization-for-systems-with-two-memory-banks.patch new file mode 100644 index 00000000000..f489122ec6b --- /dev/null +++ b/queue-5.8/arc-fix-memory-initialization-for-systems-with-two-memory-banks.patch @@ -0,0 +1,106 @@ +From 4af22ded0ecf23adea1b26ea264c53f9f1cfc310 Mon Sep 17 00:00:00 2001 +From: Mike Rapoport +Date: Fri, 28 Aug 2020 19:39:02 +0300 +Subject: arc: fix memory initialization for systems with two memory banks + +From: Mike Rapoport + +commit 4af22ded0ecf23adea1b26ea264c53f9f1cfc310 upstream. + +Rework of memory map initialization broke initialization of ARC systems +with two memory banks. Before these changes, memblock was not aware of +nodes configuration and the memory map was always allocated from the +"lowmem" bank. After the addition of node information to memblock, the core +mm attempts to allocate the memory map for the "highmem" bank from its +node. The access to this memory using __va() fails because it can be only +accessed using kmap. + +Anther problem that was uncovered is that {min,max}_high_pfn are calculated +from u64 high_mem_start variable which prevents truncation to 32-bit +physical address and the PFN values are above the node and zone boundaries. + +Use phys_addr_t type for high_mem_start and high_mem_size to ensure +correspondence between PFNs and highmem zone boundaries and reserve the +entire highmem bank until mem_init() to avoid accesses to it before highmem +is enabled. + +To test this: +1. Enable HIGHMEM in ARC config +2. Enable 2 memory banks in haps_hs.dts (uncomment the 2nd bank) + +Fixes: 51930df5801e ("mm: free_area_init: allow defining max_zone_pfn in descending order") +Cc: stable@vger.kernel.org [5.8] +Signed-off-by: Mike Rapoport +Signed-off-by: Vineet Gupta +[vgupta: added instructions to test highmem] +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arc/mm/init.c | 27 ++++++++++++++++----------- + 1 file changed, 16 insertions(+), 11 deletions(-) + +--- a/arch/arc/mm/init.c ++++ b/arch/arc/mm/init.c +@@ -27,8 +27,8 @@ static unsigned long low_mem_sz; + + #ifdef CONFIG_HIGHMEM + static unsigned long min_high_pfn, max_high_pfn; +-static u64 high_mem_start; +-static u64 high_mem_sz; ++static phys_addr_t high_mem_start; ++static phys_addr_t high_mem_sz; + #endif + + #ifdef CONFIG_DISCONTIGMEM +@@ -70,6 +70,7 @@ void __init early_init_dt_add_memory_arc + high_mem_sz = size; + in_use = 1; + memblock_add_node(base, size, 1); ++ memblock_reserve(base, size); + #endif + } + +@@ -158,7 +159,7 @@ void __init setup_arch_memory(void) + min_high_pfn = PFN_DOWN(high_mem_start); + max_high_pfn = PFN_DOWN(high_mem_start + high_mem_sz); + +- max_zone_pfn[ZONE_HIGHMEM] = max_high_pfn; ++ max_zone_pfn[ZONE_HIGHMEM] = min_low_pfn; + + high_memory = (void *)(min_high_pfn << PAGE_SHIFT); + kmap_init(); +@@ -167,22 +168,26 @@ void __init setup_arch_memory(void) + free_area_init(max_zone_pfn); + } + +-/* +- * mem_init - initializes memory +- * +- * Frees up bootmem +- * Calculates and displays memory available/used +- */ +-void __init mem_init(void) ++static void __init highmem_init(void) + { + #ifdef CONFIG_HIGHMEM + unsigned long tmp; + +- reset_all_zones_managed_pages(); ++ memblock_free(high_mem_start, high_mem_sz); + for (tmp = min_high_pfn; tmp < max_high_pfn; tmp++) + free_highmem_page(pfn_to_page(tmp)); + #endif ++} + ++/* ++ * mem_init - initializes memory ++ * ++ * Frees up bootmem ++ * Calculates and displays memory available/used ++ */ ++void __init mem_init(void) ++{ + memblock_free_all(); ++ highmem_init(); + mem_init_print_info(NULL); + } diff --git a/queue-5.8/arc-perf-don-t-bail-setup-if-pct-irq-missing-in-device-tree.patch b/queue-5.8/arc-perf-don-t-bail-setup-if-pct-irq-missing-in-device-tree.patch new file mode 100644 index 00000000000..7236b8cac87 --- /dev/null +++ b/queue-5.8/arc-perf-don-t-bail-setup-if-pct-irq-missing-in-device-tree.patch @@ -0,0 +1,73 @@ +From feb92d7d3813456c11dce215b3421801a78a8986 Mon Sep 17 00:00:00 2001 +From: Vineet Gupta +Date: Sun, 26 Jul 2020 21:51:59 -0700 +Subject: ARC: perf: don't bail setup if pct irq missing in device-tree + +From: Vineet Gupta + +commit feb92d7d3813456c11dce215b3421801a78a8986 upstream. + +Current code inadventely bails if hardware supports sampling/overflow +interrupts, but the irq is missing from device tree. + +| +| # perf stat -e cycles,instructions,major-faults,minor-faults ../hackbench +| Running with 10 groups 400 process +| Time: 0.921 +| +| Performance counter stats for '../hackbench': +| +| cycles +| instructions +| 0 major-faults +| 8679 minor-faults + +This need not be as we can still do simple counting based perf stat. +This unborks perf on HSDK-4xD + +Cc: +Signed-off-by: Vineet Gupta +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arc/kernel/perf_event.c | 14 ++++---------- + 1 file changed, 4 insertions(+), 10 deletions(-) + +--- a/arch/arc/kernel/perf_event.c ++++ b/arch/arc/kernel/perf_event.c +@@ -562,7 +562,7 @@ static int arc_pmu_device_probe(struct p + { + struct arc_reg_pct_build pct_bcr; + struct arc_reg_cc_build cc_bcr; +- int i, has_interrupts; ++ int i, has_interrupts, irq; + int counter_size; /* in bits */ + + union cc_name { +@@ -637,13 +637,7 @@ static int arc_pmu_device_probe(struct p + .attr_groups = arc_pmu->attr_groups, + }; + +- if (has_interrupts) { +- int irq = platform_get_irq(pdev, 0); +- +- if (irq < 0) { +- pr_err("Cannot get IRQ number for the platform\n"); +- return -ENODEV; +- } ++ if (has_interrupts && (irq = platform_get_irq(pdev, 0) >= 0)) { + + arc_pmu->irq = irq; + +@@ -652,9 +646,9 @@ static int arc_pmu_device_probe(struct p + this_cpu_ptr(&arc_pmu_cpu)); + + on_each_cpu(arc_cpu_pmu_irq_init, &irq, 1); +- +- } else ++ } else { + arc_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; ++ } + + /* + * perf parser doesn't really like '-' symbol in events name, so let's diff --git a/queue-5.8/btrfs-allocate-scrub-workqueues-outside-of-locks.patch b/queue-5.8/btrfs-allocate-scrub-workqueues-outside-of-locks.patch new file mode 100644 index 00000000000..d412c9e6f39 --- /dev/null +++ b/queue-5.8/btrfs-allocate-scrub-workqueues-outside-of-locks.patch @@ -0,0 +1,407 @@ +From e89c4a9c8e6ce3a84cab4f342687d3fbbb1234eb Mon Sep 17 00:00:00 2001 +From: Josef Bacik +Date: Mon, 10 Aug 2020 11:42:29 -0400 +Subject: btrfs: allocate scrub workqueues outside of locks + +From: Josef Bacik + +commit e89c4a9c8e6ce3a84cab4f342687d3fbbb1234eb upstream. + +I got the following lockdep splat while testing: + + ====================================================== + WARNING: possible circular locking dependency detected + 5.8.0-rc7-00172-g021118712e59 #932 Not tainted + ------------------------------------------------------ + btrfs/229626 is trying to acquire lock: + ffffffff828513f0 (cpu_hotplug_lock){++++}-{0:0}, at: alloc_workqueue+0x378/0x450 + + but task is already holding lock: + ffff889dd3889518 (&fs_info->scrub_lock){+.+.}-{3:3}, at: btrfs_scrub_dev+0x11c/0x630 + + which lock already depends on the new lock. + + the existing dependency chain (in reverse order) is: + + -> #7 (&fs_info->scrub_lock){+.+.}-{3:3}: + __mutex_lock+0x9f/0x930 + btrfs_scrub_dev+0x11c/0x630 + btrfs_dev_replace_by_ioctl.cold.21+0x10a/0x1d4 + btrfs_ioctl+0x2799/0x30a0 + ksys_ioctl+0x83/0xc0 + __x64_sys_ioctl+0x16/0x20 + do_syscall_64+0x50/0x90 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 + + -> #6 (&fs_devs->device_list_mutex){+.+.}-{3:3}: + __mutex_lock+0x9f/0x930 + btrfs_run_dev_stats+0x49/0x480 + commit_cowonly_roots+0xb5/0x2a0 + btrfs_commit_transaction+0x516/0xa60 + sync_filesystem+0x6b/0x90 + generic_shutdown_super+0x22/0x100 + kill_anon_super+0xe/0x30 + btrfs_kill_super+0x12/0x20 + deactivate_locked_super+0x29/0x60 + cleanup_mnt+0xb8/0x140 + task_work_run+0x6d/0xb0 + __prepare_exit_to_usermode+0x1cc/0x1e0 + do_syscall_64+0x5c/0x90 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 + + -> #5 (&fs_info->tree_log_mutex){+.+.}-{3:3}: + __mutex_lock+0x9f/0x930 + btrfs_commit_transaction+0x4bb/0xa60 + sync_filesystem+0x6b/0x90 + generic_shutdown_super+0x22/0x100 + kill_anon_super+0xe/0x30 + btrfs_kill_super+0x12/0x20 + deactivate_locked_super+0x29/0x60 + cleanup_mnt+0xb8/0x140 + task_work_run+0x6d/0xb0 + __prepare_exit_to_usermode+0x1cc/0x1e0 + do_syscall_64+0x5c/0x90 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 + + -> #4 (&fs_info->reloc_mutex){+.+.}-{3:3}: + __mutex_lock+0x9f/0x930 + btrfs_record_root_in_trans+0x43/0x70 + start_transaction+0xd1/0x5d0 + btrfs_dirty_inode+0x42/0xd0 + touch_atime+0xa1/0xd0 + btrfs_file_mmap+0x3f/0x60 + mmap_region+0x3a4/0x640 + do_mmap+0x376/0x580 + vm_mmap_pgoff+0xd5/0x120 + ksys_mmap_pgoff+0x193/0x230 + do_syscall_64+0x50/0x90 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 + + -> #3 (&mm->mmap_lock#2){++++}-{3:3}: + __might_fault+0x68/0x90 + _copy_to_user+0x1e/0x80 + perf_read+0x141/0x2c0 + vfs_read+0xad/0x1b0 + ksys_read+0x5f/0xe0 + do_syscall_64+0x50/0x90 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 + + -> #2 (&cpuctx_mutex){+.+.}-{3:3}: + __mutex_lock+0x9f/0x930 + perf_event_init_cpu+0x88/0x150 + perf_event_init+0x1db/0x20b + start_kernel+0x3ae/0x53c + secondary_startup_64+0xa4/0xb0 + + -> #1 (pmus_lock){+.+.}-{3:3}: + __mutex_lock+0x9f/0x930 + perf_event_init_cpu+0x4f/0x150 + cpuhp_invoke_callback+0xb1/0x900 + _cpu_up.constprop.26+0x9f/0x130 + cpu_up+0x7b/0xc0 + bringup_nonboot_cpus+0x4f/0x60 + smp_init+0x26/0x71 + kernel_init_freeable+0x110/0x258 + kernel_init+0xa/0x103 + ret_from_fork+0x1f/0x30 + + -> #0 (cpu_hotplug_lock){++++}-{0:0}: + __lock_acquire+0x1272/0x2310 + lock_acquire+0x9e/0x360 + cpus_read_lock+0x39/0xb0 + alloc_workqueue+0x378/0x450 + __btrfs_alloc_workqueue+0x15d/0x200 + btrfs_alloc_workqueue+0x51/0x160 + scrub_workers_get+0x5a/0x170 + btrfs_scrub_dev+0x18c/0x630 + btrfs_dev_replace_by_ioctl.cold.21+0x10a/0x1d4 + btrfs_ioctl+0x2799/0x30a0 + ksys_ioctl+0x83/0xc0 + __x64_sys_ioctl+0x16/0x20 + do_syscall_64+0x50/0x90 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 + + other info that might help us debug this: + + Chain exists of: + cpu_hotplug_lock --> &fs_devs->device_list_mutex --> &fs_info->scrub_lock + + Possible unsafe locking scenario: + + CPU0 CPU1 + ---- ---- + lock(&fs_info->scrub_lock); + lock(&fs_devs->device_list_mutex); + lock(&fs_info->scrub_lock); + lock(cpu_hotplug_lock); + + *** DEADLOCK *** + + 2 locks held by btrfs/229626: + #0: ffff88bfe8bb86e0 (&fs_devs->device_list_mutex){+.+.}-{3:3}, at: btrfs_scrub_dev+0xbd/0x630 + #1: ffff889dd3889518 (&fs_info->scrub_lock){+.+.}-{3:3}, at: btrfs_scrub_dev+0x11c/0x630 + + stack backtrace: + CPU: 15 PID: 229626 Comm: btrfs Kdump: loaded Not tainted 5.8.0-rc7-00172-g021118712e59 #932 + Hardware name: Quanta Tioga Pass Single Side 01-0030993006/Tioga Pass Single Side, BIOS F08_3A18 12/20/2018 + Call Trace: + dump_stack+0x78/0xa0 + check_noncircular+0x165/0x180 + __lock_acquire+0x1272/0x2310 + lock_acquire+0x9e/0x360 + ? alloc_workqueue+0x378/0x450 + cpus_read_lock+0x39/0xb0 + ? alloc_workqueue+0x378/0x450 + alloc_workqueue+0x378/0x450 + ? rcu_read_lock_sched_held+0x52/0x80 + __btrfs_alloc_workqueue+0x15d/0x200 + btrfs_alloc_workqueue+0x51/0x160 + scrub_workers_get+0x5a/0x170 + btrfs_scrub_dev+0x18c/0x630 + ? start_transaction+0xd1/0x5d0 + btrfs_dev_replace_by_ioctl.cold.21+0x10a/0x1d4 + btrfs_ioctl+0x2799/0x30a0 + ? do_sigaction+0x102/0x250 + ? lockdep_hardirqs_on_prepare+0xca/0x160 + ? _raw_spin_unlock_irq+0x24/0x30 + ? trace_hardirqs_on+0x1c/0xe0 + ? _raw_spin_unlock_irq+0x24/0x30 + ? do_sigaction+0x102/0x250 + ? ksys_ioctl+0x83/0xc0 + ksys_ioctl+0x83/0xc0 + __x64_sys_ioctl+0x16/0x20 + do_syscall_64+0x50/0x90 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 + +This happens because we're allocating the scrub workqueues under the +scrub and device list mutex, which brings in a whole host of other +dependencies. + +Because the work queue allocation is done with GFP_KERNEL, it can +trigger reclaim, which can lead to a transaction commit, which in turns +needs the device_list_mutex, it can lead to a deadlock. A different +problem for which this fix is a solution. + +Fix this by moving the actual allocation outside of the +scrub lock, and then only take the lock once we're ready to actually +assign them to the fs_info. We'll now have to cleanup the workqueues in +a few more places, so I've added a helper to do the refcount dance to +safely free the workqueues. + +CC: stable@vger.kernel.org # 5.4+ +Reviewed-by: Filipe Manana +Signed-off-by: Josef Bacik +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/scrub.c | 122 +++++++++++++++++++++++++++++++------------------------ + 1 file changed, 70 insertions(+), 52 deletions(-) + +--- a/fs/btrfs/scrub.c ++++ b/fs/btrfs/scrub.c +@@ -3783,50 +3783,84 @@ static noinline_for_stack int scrub_supe + return 0; + } + ++static void scrub_workers_put(struct btrfs_fs_info *fs_info) ++{ ++ if (refcount_dec_and_mutex_lock(&fs_info->scrub_workers_refcnt, ++ &fs_info->scrub_lock)) { ++ struct btrfs_workqueue *scrub_workers = NULL; ++ struct btrfs_workqueue *scrub_wr_comp = NULL; ++ struct btrfs_workqueue *scrub_parity = NULL; ++ ++ scrub_workers = fs_info->scrub_workers; ++ scrub_wr_comp = fs_info->scrub_wr_completion_workers; ++ scrub_parity = fs_info->scrub_parity_workers; ++ ++ fs_info->scrub_workers = NULL; ++ fs_info->scrub_wr_completion_workers = NULL; ++ fs_info->scrub_parity_workers = NULL; ++ mutex_unlock(&fs_info->scrub_lock); ++ ++ btrfs_destroy_workqueue(scrub_workers); ++ btrfs_destroy_workqueue(scrub_wr_comp); ++ btrfs_destroy_workqueue(scrub_parity); ++ } ++} ++ + /* + * get a reference count on fs_info->scrub_workers. start worker if necessary + */ + static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info, + int is_dev_replace) + { ++ struct btrfs_workqueue *scrub_workers = NULL; ++ struct btrfs_workqueue *scrub_wr_comp = NULL; ++ struct btrfs_workqueue *scrub_parity = NULL; + unsigned int flags = WQ_FREEZABLE | WQ_UNBOUND; + int max_active = fs_info->thread_pool_size; ++ int ret = -ENOMEM; + +- lockdep_assert_held(&fs_info->scrub_lock); ++ if (refcount_inc_not_zero(&fs_info->scrub_workers_refcnt)) ++ return 0; + +- if (refcount_read(&fs_info->scrub_workers_refcnt) == 0) { +- ASSERT(fs_info->scrub_workers == NULL); +- fs_info->scrub_workers = btrfs_alloc_workqueue(fs_info, "scrub", +- flags, is_dev_replace ? 1 : max_active, 4); +- if (!fs_info->scrub_workers) +- goto fail_scrub_workers; +- +- ASSERT(fs_info->scrub_wr_completion_workers == NULL); +- fs_info->scrub_wr_completion_workers = +- btrfs_alloc_workqueue(fs_info, "scrubwrc", flags, +- max_active, 2); +- if (!fs_info->scrub_wr_completion_workers) +- goto fail_scrub_wr_completion_workers; ++ scrub_workers = btrfs_alloc_workqueue(fs_info, "scrub", flags, ++ is_dev_replace ? 1 : max_active, 4); ++ if (!scrub_workers) ++ goto fail_scrub_workers; + +- ASSERT(fs_info->scrub_parity_workers == NULL); +- fs_info->scrub_parity_workers = +- btrfs_alloc_workqueue(fs_info, "scrubparity", flags, ++ scrub_wr_comp = btrfs_alloc_workqueue(fs_info, "scrubwrc", flags, + max_active, 2); +- if (!fs_info->scrub_parity_workers) +- goto fail_scrub_parity_workers; ++ if (!scrub_wr_comp) ++ goto fail_scrub_wr_completion_workers; + ++ scrub_parity = btrfs_alloc_workqueue(fs_info, "scrubparity", flags, ++ max_active, 2); ++ if (!scrub_parity) ++ goto fail_scrub_parity_workers; ++ ++ mutex_lock(&fs_info->scrub_lock); ++ if (refcount_read(&fs_info->scrub_workers_refcnt) == 0) { ++ ASSERT(fs_info->scrub_workers == NULL && ++ fs_info->scrub_wr_completion_workers == NULL && ++ fs_info->scrub_parity_workers == NULL); ++ fs_info->scrub_workers = scrub_workers; ++ fs_info->scrub_wr_completion_workers = scrub_wr_comp; ++ fs_info->scrub_parity_workers = scrub_parity; + refcount_set(&fs_info->scrub_workers_refcnt, 1); +- } else { +- refcount_inc(&fs_info->scrub_workers_refcnt); ++ mutex_unlock(&fs_info->scrub_lock); ++ return 0; + } +- return 0; ++ /* Other thread raced in and created the workers for us */ ++ refcount_inc(&fs_info->scrub_workers_refcnt); ++ mutex_unlock(&fs_info->scrub_lock); + ++ ret = 0; ++ btrfs_destroy_workqueue(scrub_parity); + fail_scrub_parity_workers: +- btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers); ++ btrfs_destroy_workqueue(scrub_wr_comp); + fail_scrub_wr_completion_workers: +- btrfs_destroy_workqueue(fs_info->scrub_workers); ++ btrfs_destroy_workqueue(scrub_workers); + fail_scrub_workers: +- return -ENOMEM; ++ return ret; + } + + int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, +@@ -3837,9 +3871,6 @@ int btrfs_scrub_dev(struct btrfs_fs_info + int ret; + struct btrfs_device *dev; + unsigned int nofs_flag; +- struct btrfs_workqueue *scrub_workers = NULL; +- struct btrfs_workqueue *scrub_wr_comp = NULL; +- struct btrfs_workqueue *scrub_parity = NULL; + + if (btrfs_fs_closing(fs_info)) + return -EAGAIN; +@@ -3886,13 +3917,17 @@ int btrfs_scrub_dev(struct btrfs_fs_info + if (IS_ERR(sctx)) + return PTR_ERR(sctx); + ++ ret = scrub_workers_get(fs_info, is_dev_replace); ++ if (ret) ++ goto out_free_ctx; ++ + mutex_lock(&fs_info->fs_devices->device_list_mutex); + dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL, true); + if (!dev || (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) && + !is_dev_replace)) { + mutex_unlock(&fs_info->fs_devices->device_list_mutex); + ret = -ENODEV; +- goto out_free_ctx; ++ goto out; + } + + if (!is_dev_replace && !readonly && +@@ -3901,7 +3936,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info + btrfs_err_in_rcu(fs_info, "scrub: device %s is not writable", + rcu_str_deref(dev->name)); + ret = -EROFS; +- goto out_free_ctx; ++ goto out; + } + + mutex_lock(&fs_info->scrub_lock); +@@ -3910,7 +3945,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info + mutex_unlock(&fs_info->scrub_lock); + mutex_unlock(&fs_info->fs_devices->device_list_mutex); + ret = -EIO; +- goto out_free_ctx; ++ goto out; + } + + down_read(&fs_info->dev_replace.rwsem); +@@ -3921,17 +3956,10 @@ int btrfs_scrub_dev(struct btrfs_fs_info + mutex_unlock(&fs_info->scrub_lock); + mutex_unlock(&fs_info->fs_devices->device_list_mutex); + ret = -EINPROGRESS; +- goto out_free_ctx; ++ goto out; + } + up_read(&fs_info->dev_replace.rwsem); + +- ret = scrub_workers_get(fs_info, is_dev_replace); +- if (ret) { +- mutex_unlock(&fs_info->scrub_lock); +- mutex_unlock(&fs_info->fs_devices->device_list_mutex); +- goto out_free_ctx; +- } +- + sctx->readonly = readonly; + dev->scrub_ctx = sctx; + mutex_unlock(&fs_info->fs_devices->device_list_mutex); +@@ -3984,24 +4012,14 @@ int btrfs_scrub_dev(struct btrfs_fs_info + + mutex_lock(&fs_info->scrub_lock); + dev->scrub_ctx = NULL; +- if (refcount_dec_and_test(&fs_info->scrub_workers_refcnt)) { +- scrub_workers = fs_info->scrub_workers; +- scrub_wr_comp = fs_info->scrub_wr_completion_workers; +- scrub_parity = fs_info->scrub_parity_workers; +- +- fs_info->scrub_workers = NULL; +- fs_info->scrub_wr_completion_workers = NULL; +- fs_info->scrub_parity_workers = NULL; +- } + mutex_unlock(&fs_info->scrub_lock); + +- btrfs_destroy_workqueue(scrub_workers); +- btrfs_destroy_workqueue(scrub_wr_comp); +- btrfs_destroy_workqueue(scrub_parity); ++ scrub_workers_put(fs_info); + scrub_put_ctx(sctx); + + return ret; +- ++out: ++ scrub_workers_put(fs_info); + out_free_ctx: + scrub_free_ctx(sctx); + diff --git a/queue-5.8/btrfs-block-group-fix-free-space-bitmap-threshold.patch b/queue-5.8/btrfs-block-group-fix-free-space-bitmap-threshold.patch new file mode 100644 index 00000000000..78f02a3eb4c --- /dev/null +++ b/queue-5.8/btrfs-block-group-fix-free-space-bitmap-threshold.patch @@ -0,0 +1,84 @@ +From e3e39c72b99f93bbd0420d38c858e7c4a061bb63 Mon Sep 17 00:00:00 2001 +From: Marcos Paulo de Souza +Date: Fri, 21 Aug 2020 11:54:44 -0300 +Subject: btrfs: block-group: fix free-space bitmap threshold + +From: Marcos Paulo de Souza + +commit e3e39c72b99f93bbd0420d38c858e7c4a061bb63 upstream. + +[BUG] +After commit 9afc66498a0b ("btrfs: block-group: refactor how we read one +block group item"), cache->length is being assigned after calling +btrfs_create_block_group_cache. This causes a problem since +set_free_space_tree_thresholds calculates the free-space threshold to +decide if the free-space tree should convert from extents to bitmaps. + +The current code calls set_free_space_tree_thresholds with cache->length +being 0, which then makes cache->bitmap_high_thresh zero. This implies +the system will always use bitmap instead of extents, which is not +desired if the block group is not fragmented. + +This behavior can be seen by a test that expects to repair systems +with FREE_SPACE_EXTENT and FREE_SPACE_BITMAP, but the current code only +created FREE_SPACE_BITMAP. + +[FIX] +Call set_free_space_tree_thresholds after setting cache->length. There +is now a WARN_ON in set_free_space_tree_thresholds to help preventing +the same mistake to happen again in the future. + +Link: https://github.com/kdave/btrfs-progs/issues/251 +Fixes: 9afc66498a0b ("btrfs: block-group: refactor how we read one block group item") +CC: stable@vger.kernel.org # 5.8+ +Reviewed-by: Qu Wenruo +Reviewed-by: Filipe Manana +Signed-off-by: Marcos Paulo de Souza +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/block-group.c | 4 +++- + fs/btrfs/free-space-tree.c | 4 ++++ + 2 files changed, 7 insertions(+), 1 deletion(-) + +--- a/fs/btrfs/block-group.c ++++ b/fs/btrfs/block-group.c +@@ -1814,7 +1814,6 @@ static struct btrfs_block_group *btrfs_c + + cache->fs_info = fs_info; + cache->full_stripe_len = btrfs_full_stripe_len(fs_info, start); +- set_free_space_tree_thresholds(cache); + + cache->discard_index = BTRFS_DISCARD_INDEX_UNUSED; + +@@ -1928,6 +1927,8 @@ static int read_one_block_group(struct b + if (ret < 0) + goto error; + ++ set_free_space_tree_thresholds(cache); ++ + if (need_clear) { + /* + * When we mount with old space cache, we need to +@@ -2148,6 +2149,7 @@ int btrfs_make_block_group(struct btrfs_ + return -ENOMEM; + + cache->length = size; ++ set_free_space_tree_thresholds(cache); + cache->used = bytes_used; + cache->flags = type; + cache->last_byte_to_unpin = (u64)-1; +--- a/fs/btrfs/free-space-tree.c ++++ b/fs/btrfs/free-space-tree.c +@@ -22,6 +22,10 @@ void set_free_space_tree_thresholds(stru + size_t bitmap_size; + u64 num_bitmaps, total_bitmap_size; + ++ if (WARN_ON(cache->length == 0)) ++ btrfs_warn(cache->fs_info, "block group %llu length is zero", ++ cache->start); ++ + /* + * We convert to bitmaps when the disk space required for using extents + * exceeds that required for using bitmaps. diff --git a/queue-5.8/btrfs-drop-path-before-adding-new-uuid-tree-entry.patch b/queue-5.8/btrfs-drop-path-before-adding-new-uuid-tree-entry.patch new file mode 100644 index 00000000000..33e06b33e5f --- /dev/null +++ b/queue-5.8/btrfs-drop-path-before-adding-new-uuid-tree-entry.patch @@ -0,0 +1,145 @@ +From 9771a5cf937129307d9f58922d60484d58ababe7 Mon Sep 17 00:00:00 2001 +From: Josef Bacik +Date: Mon, 10 Aug 2020 11:42:26 -0400 +Subject: btrfs: drop path before adding new uuid tree entry + +From: Josef Bacik + +commit 9771a5cf937129307d9f58922d60484d58ababe7 upstream. + +With the conversion of the tree locks to rwsem I got the following +lockdep splat: + + ====================================================== + WARNING: possible circular locking dependency detected + 5.8.0-rc7-00167-g0d7ba0c5b375-dirty #925 Not tainted + ------------------------------------------------------ + btrfs-uuid/7955 is trying to acquire lock: + ffff88bfbafec0f8 (btrfs-root-00){++++}-{3:3}, at: __btrfs_tree_read_lock+0x39/0x180 + + but task is already holding lock: + ffff88bfbafef2a8 (btrfs-uuid-00){++++}-{3:3}, at: __btrfs_tree_read_lock+0x39/0x180 + + which lock already depends on the new lock. + + the existing dependency chain (in reverse order) is: + + -> #1 (btrfs-uuid-00){++++}-{3:3}: + down_read_nested+0x3e/0x140 + __btrfs_tree_read_lock+0x39/0x180 + __btrfs_read_lock_root_node+0x3a/0x50 + btrfs_search_slot+0x4bd/0x990 + btrfs_uuid_tree_add+0x89/0x2d0 + btrfs_uuid_scan_kthread+0x330/0x390 + kthread+0x133/0x150 + ret_from_fork+0x1f/0x30 + + -> #0 (btrfs-root-00){++++}-{3:3}: + __lock_acquire+0x1272/0x2310 + lock_acquire+0x9e/0x360 + down_read_nested+0x3e/0x140 + __btrfs_tree_read_lock+0x39/0x180 + __btrfs_read_lock_root_node+0x3a/0x50 + btrfs_search_slot+0x4bd/0x990 + btrfs_find_root+0x45/0x1b0 + btrfs_read_tree_root+0x61/0x100 + btrfs_get_root_ref.part.50+0x143/0x630 + btrfs_uuid_tree_iterate+0x207/0x314 + btrfs_uuid_rescan_kthread+0x12/0x50 + kthread+0x133/0x150 + ret_from_fork+0x1f/0x30 + + other info that might help us debug this: + + Possible unsafe locking scenario: + + CPU0 CPU1 + ---- ---- + lock(btrfs-uuid-00); + lock(btrfs-root-00); + lock(btrfs-uuid-00); + lock(btrfs-root-00); + + *** DEADLOCK *** + + 1 lock held by btrfs-uuid/7955: + #0: ffff88bfbafef2a8 (btrfs-uuid-00){++++}-{3:3}, at: __btrfs_tree_read_lock+0x39/0x180 + + stack backtrace: + CPU: 73 PID: 7955 Comm: btrfs-uuid Kdump: loaded Not tainted 5.8.0-rc7-00167-g0d7ba0c5b375-dirty #925 + Hardware name: Quanta Tioga Pass Single Side 01-0030993006/Tioga Pass Single Side, BIOS F08_3A18 12/20/2018 + Call Trace: + dump_stack+0x78/0xa0 + check_noncircular+0x165/0x180 + __lock_acquire+0x1272/0x2310 + lock_acquire+0x9e/0x360 + ? __btrfs_tree_read_lock+0x39/0x180 + ? btrfs_root_node+0x1c/0x1d0 + down_read_nested+0x3e/0x140 + ? __btrfs_tree_read_lock+0x39/0x180 + __btrfs_tree_read_lock+0x39/0x180 + __btrfs_read_lock_root_node+0x3a/0x50 + btrfs_search_slot+0x4bd/0x990 + btrfs_find_root+0x45/0x1b0 + btrfs_read_tree_root+0x61/0x100 + btrfs_get_root_ref.part.50+0x143/0x630 + btrfs_uuid_tree_iterate+0x207/0x314 + ? btree_readpage+0x20/0x20 + btrfs_uuid_rescan_kthread+0x12/0x50 + kthread+0x133/0x150 + ? kthread_create_on_node+0x60/0x60 + ret_from_fork+0x1f/0x30 + +This problem exists because we have two different rescan threads, +btrfs_uuid_scan_kthread which creates the uuid tree, and +btrfs_uuid_tree_iterate that goes through and updates or deletes any out +of date roots. The problem is they both do things in different order. +btrfs_uuid_scan_kthread() reads the tree_root, and then inserts entries +into the uuid_root. btrfs_uuid_tree_iterate() scans the uuid_root, but +then does a btrfs_get_fs_root() which can read from the tree_root. + +It's actually easy enough to not be holding the path in +btrfs_uuid_scan_kthread() when we add a uuid entry, as we already drop +it further down and re-start the search when we loop. So simply move +the path release before we add our entry to the uuid tree. + +This also fixes a problem where we're holding a path open after we do +btrfs_end_transaction(), which has it's own problems. + +CC: stable@vger.kernel.org # 4.4+ +Reviewed-by: Filipe Manana +Signed-off-by: Josef Bacik +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/volumes.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/fs/btrfs/volumes.c ++++ b/fs/btrfs/volumes.c +@@ -4462,6 +4462,7 @@ int btrfs_uuid_scan_kthread(void *data) + goto skip; + } + update_tree: ++ btrfs_release_path(path); + if (!btrfs_is_empty_uuid(root_item.uuid)) { + ret = btrfs_uuid_tree_add(trans, root_item.uuid, + BTRFS_UUID_KEY_SUBVOL, +@@ -4486,6 +4487,7 @@ update_tree: + } + + skip: ++ btrfs_release_path(path); + if (trans) { + ret = btrfs_end_transaction(trans); + trans = NULL; +@@ -4493,7 +4495,6 @@ skip: + break; + } + +- btrfs_release_path(path); + if (key.offset < (u64)-1) { + key.offset++; + } else if (key.type < BTRFS_ROOT_ITEM_KEY) { diff --git a/queue-5.8/btrfs-fix-potential-deadlock-in-the-search-ioctl.patch b/queue-5.8/btrfs-fix-potential-deadlock-in-the-search-ioctl.patch new file mode 100644 index 00000000000..8873ff21aa5 --- /dev/null +++ b/queue-5.8/btrfs-fix-potential-deadlock-in-the-search-ioctl.patch @@ -0,0 +1,219 @@ +From a48b73eca4ceb9b8a4b97f290a065335dbcd8a04 Mon Sep 17 00:00:00 2001 +From: Josef Bacik +Date: Mon, 10 Aug 2020 11:42:27 -0400 +Subject: btrfs: fix potential deadlock in the search ioctl + +From: Josef Bacik + +commit a48b73eca4ceb9b8a4b97f290a065335dbcd8a04 upstream. + +With the conversion of the tree locks to rwsem I got the following +lockdep splat: + + ====================================================== + WARNING: possible circular locking dependency detected + 5.8.0-rc7-00165-g04ec4da5f45f-dirty #922 Not tainted + ------------------------------------------------------ + compsize/11122 is trying to acquire lock: + ffff889fabca8768 (&mm->mmap_lock#2){++++}-{3:3}, at: __might_fault+0x3e/0x90 + + but task is already holding lock: + ffff889fe720fe40 (btrfs-fs-00){++++}-{3:3}, at: __btrfs_tree_read_lock+0x39/0x180 + + which lock already depends on the new lock. + + the existing dependency chain (in reverse order) is: + + -> #2 (btrfs-fs-00){++++}-{3:3}: + down_write_nested+0x3b/0x70 + __btrfs_tree_lock+0x24/0x120 + btrfs_search_slot+0x756/0x990 + btrfs_lookup_inode+0x3a/0xb4 + __btrfs_update_delayed_inode+0x93/0x270 + btrfs_async_run_delayed_root+0x168/0x230 + btrfs_work_helper+0xd4/0x570 + process_one_work+0x2ad/0x5f0 + worker_thread+0x3a/0x3d0 + kthread+0x133/0x150 + ret_from_fork+0x1f/0x30 + + -> #1 (&delayed_node->mutex){+.+.}-{3:3}: + __mutex_lock+0x9f/0x930 + btrfs_delayed_update_inode+0x50/0x440 + btrfs_update_inode+0x8a/0xf0 + btrfs_dirty_inode+0x5b/0xd0 + touch_atime+0xa1/0xd0 + btrfs_file_mmap+0x3f/0x60 + mmap_region+0x3a4/0x640 + do_mmap+0x376/0x580 + vm_mmap_pgoff+0xd5/0x120 + ksys_mmap_pgoff+0x193/0x230 + do_syscall_64+0x50/0x90 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 + + -> #0 (&mm->mmap_lock#2){++++}-{3:3}: + __lock_acquire+0x1272/0x2310 + lock_acquire+0x9e/0x360 + __might_fault+0x68/0x90 + _copy_to_user+0x1e/0x80 + copy_to_sk.isra.32+0x121/0x300 + search_ioctl+0x106/0x200 + btrfs_ioctl_tree_search_v2+0x7b/0xf0 + btrfs_ioctl+0x106f/0x30a0 + ksys_ioctl+0x83/0xc0 + __x64_sys_ioctl+0x16/0x20 + do_syscall_64+0x50/0x90 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 + + other info that might help us debug this: + + Chain exists of: + &mm->mmap_lock#2 --> &delayed_node->mutex --> btrfs-fs-00 + + Possible unsafe locking scenario: + + CPU0 CPU1 + ---- ---- + lock(btrfs-fs-00); + lock(&delayed_node->mutex); + lock(btrfs-fs-00); + lock(&mm->mmap_lock#2); + + *** DEADLOCK *** + + 1 lock held by compsize/11122: + #0: ffff889fe720fe40 (btrfs-fs-00){++++}-{3:3}, at: __btrfs_tree_read_lock+0x39/0x180 + + stack backtrace: + CPU: 17 PID: 11122 Comm: compsize Kdump: loaded Not tainted 5.8.0-rc7-00165-g04ec4da5f45f-dirty #922 + Hardware name: Quanta Tioga Pass Single Side 01-0030993006/Tioga Pass Single Side, BIOS F08_3A18 12/20/2018 + Call Trace: + dump_stack+0x78/0xa0 + check_noncircular+0x165/0x180 + __lock_acquire+0x1272/0x2310 + lock_acquire+0x9e/0x360 + ? __might_fault+0x3e/0x90 + ? find_held_lock+0x72/0x90 + __might_fault+0x68/0x90 + ? __might_fault+0x3e/0x90 + _copy_to_user+0x1e/0x80 + copy_to_sk.isra.32+0x121/0x300 + ? btrfs_search_forward+0x2a6/0x360 + search_ioctl+0x106/0x200 + btrfs_ioctl_tree_search_v2+0x7b/0xf0 + btrfs_ioctl+0x106f/0x30a0 + ? __do_sys_newfstat+0x5a/0x70 + ? ksys_ioctl+0x83/0xc0 + ksys_ioctl+0x83/0xc0 + __x64_sys_ioctl+0x16/0x20 + do_syscall_64+0x50/0x90 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 + +The problem is we're doing a copy_to_user() while holding tree locks, +which can deadlock if we have to do a page fault for the copy_to_user(). +This exists even without my locking changes, so it needs to be fixed. +Rework the search ioctl to do the pre-fault and then +copy_to_user_nofault for the copying. + +CC: stable@vger.kernel.org # 4.4+ +Reviewed-by: Filipe Manana +Signed-off-by: Josef Bacik +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/extent_io.c | 8 ++++---- + fs/btrfs/extent_io.h | 6 +++--- + fs/btrfs/ioctl.c | 27 ++++++++++++++++++++------- + 3 files changed, 27 insertions(+), 14 deletions(-) + +--- a/fs/btrfs/extent_io.c ++++ b/fs/btrfs/extent_io.c +@@ -5640,9 +5640,9 @@ void read_extent_buffer(const struct ext + } + } + +-int read_extent_buffer_to_user(const struct extent_buffer *eb, +- void __user *dstv, +- unsigned long start, unsigned long len) ++int read_extent_buffer_to_user_nofault(const struct extent_buffer *eb, ++ void __user *dstv, ++ unsigned long start, unsigned long len) + { + size_t cur; + size_t offset; +@@ -5662,7 +5662,7 @@ int read_extent_buffer_to_user(const str + + cur = min(len, (PAGE_SIZE - offset)); + kaddr = page_address(page); +- if (copy_to_user(dst, kaddr + offset, cur)) { ++ if (copy_to_user_nofault(dst, kaddr + offset, cur)) { + ret = -EFAULT; + break; + } +--- a/fs/btrfs/extent_io.h ++++ b/fs/btrfs/extent_io.h +@@ -241,9 +241,9 @@ int memcmp_extent_buffer(const struct ex + void read_extent_buffer(const struct extent_buffer *eb, void *dst, + unsigned long start, + unsigned long len); +-int read_extent_buffer_to_user(const struct extent_buffer *eb, +- void __user *dst, unsigned long start, +- unsigned long len); ++int read_extent_buffer_to_user_nofault(const struct extent_buffer *eb, ++ void __user *dst, unsigned long start, ++ unsigned long len); + void write_extent_buffer_fsid(const struct extent_buffer *eb, const void *src); + void write_extent_buffer_chunk_tree_uuid(const struct extent_buffer *eb, + const void *src); +--- a/fs/btrfs/ioctl.c ++++ b/fs/btrfs/ioctl.c +@@ -2086,9 +2086,14 @@ static noinline int copy_to_sk(struct bt + sh.len = item_len; + sh.transid = found_transid; + +- /* copy search result header */ +- if (copy_to_user(ubuf + *sk_offset, &sh, sizeof(sh))) { +- ret = -EFAULT; ++ /* ++ * Copy search result header. If we fault then loop again so we ++ * can fault in the pages and -EFAULT there if there's a ++ * problem. Otherwise we'll fault and then copy the buffer in ++ * properly this next time through ++ */ ++ if (copy_to_user_nofault(ubuf + *sk_offset, &sh, sizeof(sh))) { ++ ret = 0; + goto out; + } + +@@ -2096,10 +2101,14 @@ static noinline int copy_to_sk(struct bt + + if (item_len) { + char __user *up = ubuf + *sk_offset; +- /* copy the item */ +- if (read_extent_buffer_to_user(leaf, up, +- item_off, item_len)) { +- ret = -EFAULT; ++ /* ++ * Copy the item, same behavior as above, but reset the ++ * * sk_offset so we copy the full thing again. ++ */ ++ if (read_extent_buffer_to_user_nofault(leaf, up, ++ item_off, item_len)) { ++ ret = 0; ++ *sk_offset -= sizeof(sh); + goto out; + } + +@@ -2184,6 +2193,10 @@ static noinline int search_ioctl(struct + key.offset = sk->min_offset; + + while (1) { ++ ret = fault_in_pages_writeable(ubuf, *buf_size - sk_offset); ++ if (ret) ++ break; ++ + ret = btrfs_search_forward(root, &key, path, sk->min_transid); + if (ret != 0) { + if (ret > 0) diff --git a/queue-5.8/btrfs-set-the-correct-lockdep-class-for-new-nodes.patch b/queue-5.8/btrfs-set-the-correct-lockdep-class-for-new-nodes.patch new file mode 100644 index 00000000000..0a74aa39773 --- /dev/null +++ b/queue-5.8/btrfs-set-the-correct-lockdep-class-for-new-nodes.patch @@ -0,0 +1,43 @@ +From ad24466588ab7d7c879053c5afd919b0c555fec0 Mon Sep 17 00:00:00 2001 +From: Josef Bacik +Date: Mon, 10 Aug 2020 11:42:30 -0400 +Subject: btrfs: set the correct lockdep class for new nodes + +From: Josef Bacik + +commit ad24466588ab7d7c879053c5afd919b0c555fec0 upstream. + +When flipping over to the rw_semaphore I noticed I'd get a lockdep splat +in replace_path(), which is weird because we're swapping the reloc root +with the actual target root. Turns out this is because we're using the +root->root_key.objectid as the root id for the newly allocated tree +block when setting the lockdep class, however we need to be using the +actual owner of this new block, which is saved in owner. + +The affected path is through btrfs_copy_root as all other callers of +btrfs_alloc_tree_block (which calls init_new_buffer) have root_objectid +== root->root_key.objectid . + +CC: stable@vger.kernel.org # 5.4+ +Reviewed-by: Filipe Manana +Reviewed-by: Nikolay Borisov +Signed-off-by: Josef Bacik +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/extent-tree.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/btrfs/extent-tree.c ++++ b/fs/btrfs/extent-tree.c +@@ -4527,7 +4527,7 @@ btrfs_init_new_buffer(struct btrfs_trans + return ERR_PTR(-EUCLEAN); + } + +- btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level); ++ btrfs_set_buffer_lockdep_class(owner, buf, level); + btrfs_tree_lock(buf); + btrfs_clean_tree_block(buf); + clear_bit(EXTENT_BUFFER_STALE, &buf->bflags); diff --git a/queue-5.8/btrfs-set-the-lockdep-class-for-log-tree-extent-buffers.patch b/queue-5.8/btrfs-set-the-lockdep-class-for-log-tree-extent-buffers.patch new file mode 100644 index 00000000000..b1b63d0fc3a --- /dev/null +++ b/queue-5.8/btrfs-set-the-lockdep-class-for-log-tree-extent-buffers.patch @@ -0,0 +1,55 @@ +From d3beaa253fd6fa40b8b18a216398e6e5376a9d21 Mon Sep 17 00:00:00 2001 +From: Josef Bacik +Date: Mon, 10 Aug 2020 11:42:31 -0400 +Subject: btrfs: set the lockdep class for log tree extent buffers + +From: Josef Bacik + +commit d3beaa253fd6fa40b8b18a216398e6e5376a9d21 upstream. + +These are special extent buffers that get rewound in order to lookup +the state of the tree at a specific point in time. As such they do not +go through the normal initialization paths that set their lockdep class, +so handle them appropriately when they are created and before they are +locked. + +CC: stable@vger.kernel.org # 4.4+ +Reviewed-by: Filipe Manana +Signed-off-by: Josef Bacik +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/ctree.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/fs/btrfs/ctree.c ++++ b/fs/btrfs/ctree.c +@@ -1297,6 +1297,8 @@ tree_mod_log_rewind(struct btrfs_fs_info + btrfs_tree_read_unlock_blocking(eb); + free_extent_buffer(eb); + ++ btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb_rewin), ++ eb_rewin, btrfs_header_level(eb_rewin)); + btrfs_tree_read_lock(eb_rewin); + __tree_mod_log_rewind(fs_info, eb_rewin, time_seq, tm); + WARN_ON(btrfs_header_nritems(eb_rewin) > +@@ -1370,7 +1372,6 @@ get_old_root(struct btrfs_root *root, u6 + + if (!eb) + return NULL; +- btrfs_tree_read_lock(eb); + if (old_root) { + btrfs_set_header_bytenr(eb, eb->start); + btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV); +@@ -1378,6 +1379,9 @@ get_old_root(struct btrfs_root *root, u6 + btrfs_set_header_level(eb, old_root->level); + btrfs_set_header_generation(eb, old_generation); + } ++ btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb), eb, ++ btrfs_header_level(eb)); ++ btrfs_tree_read_lock(eb); + if (tm) + __tree_mod_log_rewind(fs_info, eb, time_seq, tm); + else diff --git a/queue-5.8/btrfs-tree-checker-fix-the-error-message-for-transid-error.patch b/queue-5.8/btrfs-tree-checker-fix-the-error-message-for-transid-error.patch new file mode 100644 index 00000000000..114d4f4eb15 --- /dev/null +++ b/queue-5.8/btrfs-tree-checker-fix-the-error-message-for-transid-error.patch @@ -0,0 +1,35 @@ +From f96d6960abbc52e26ad124e69e6815283d3e1674 Mon Sep 17 00:00:00 2001 +From: Qu Wenruo +Date: Tue, 25 Aug 2020 21:42:51 +0800 +Subject: btrfs: tree-checker: fix the error message for transid error + +From: Qu Wenruo + +commit f96d6960abbc52e26ad124e69e6815283d3e1674 upstream. + +The error message for inode transid is the same as for inode generation, +which makes us unable to detect the real problem. + +Reported-by: Tyler Richmond +Fixes: 496245cac57e ("btrfs: tree-checker: Verify inode item") +CC: stable@vger.kernel.org # 5.4+ +Reviewed-by: Marcos Paulo de Souza +Signed-off-by: Qu Wenruo +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/tree-checker.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/btrfs/tree-checker.c ++++ b/fs/btrfs/tree-checker.c +@@ -984,7 +984,7 @@ static int check_inode_item(struct exten + /* Note for ROOT_TREE_DIR_ITEM, mkfs could set its transid 0 */ + if (btrfs_inode_transid(leaf, iitem) > super_gen + 1) { + inode_item_err(leaf, slot, +- "invalid inode generation: has %llu expect [0, %llu]", ++ "invalid inode transid: has %llu expect [0, %llu]", + btrfs_inode_transid(leaf, iitem), super_gen + 1); + return -EUCLEAN; + } diff --git a/queue-5.8/ext2-don-t-update-mtime-on-cow-faults.patch b/queue-5.8/ext2-don-t-update-mtime-on-cow-faults.patch new file mode 100644 index 00000000000..528a66b37ae --- /dev/null +++ b/queue-5.8/ext2-don-t-update-mtime-on-cow-faults.patch @@ -0,0 +1,61 @@ +From 1ef6ea0efe8e68d0299dad44c39dc6ad9e5d1f39 Mon Sep 17 00:00:00 2001 +From: Mikulas Patocka +Date: Sat, 5 Sep 2020 08:12:01 -0400 +Subject: ext2: don't update mtime on COW faults + +From: Mikulas Patocka + +commit 1ef6ea0efe8e68d0299dad44c39dc6ad9e5d1f39 upstream. + +When running in a dax mode, if the user maps a page with MAP_PRIVATE and +PROT_WRITE, the ext2 filesystem would incorrectly update ctime and mtime +when the user hits a COW fault. + +This breaks building of the Linux kernel. How to reproduce: + + 1. extract the Linux kernel tree on dax-mounted ext2 filesystem + 2. run make clean + 3. run make -j12 + 4. run make -j12 + +at step 4, make would incorrectly rebuild the whole kernel (although it +was already built in step 3). + +The reason for the breakage is that almost all object files depend on +objtool. When we run objtool, it takes COW page fault on its .data +section, and these faults will incorrectly update the timestamp of the +objtool binary. The updated timestamp causes make to rebuild the whole +tree. + +Signed-off-by: Mikulas Patocka +Cc: stable@vger.kernel.org +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext2/file.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/fs/ext2/file.c ++++ b/fs/ext2/file.c +@@ -93,8 +93,10 @@ static vm_fault_t ext2_dax_fault(struct + struct inode *inode = file_inode(vmf->vma->vm_file); + struct ext2_inode_info *ei = EXT2_I(inode); + vm_fault_t ret; ++ bool write = (vmf->flags & FAULT_FLAG_WRITE) && ++ (vmf->vma->vm_flags & VM_SHARED); + +- if (vmf->flags & FAULT_FLAG_WRITE) { ++ if (write) { + sb_start_pagefault(inode->i_sb); + file_update_time(vmf->vma->vm_file); + } +@@ -103,7 +105,7 @@ static vm_fault_t ext2_dax_fault(struct + ret = dax_iomap_fault(vmf, PE_SIZE_PTE, NULL, NULL, &ext2_iomap_ops); + + up_read(&ei->dax_sem); +- if (vmf->flags & FAULT_FLAG_WRITE) ++ if (write) + sb_end_pagefault(inode->i_sb); + return ret; + } diff --git a/queue-5.8/iommu-vt-d-handle-36bit-addressing-for-x86-32.patch b/queue-5.8/iommu-vt-d-handle-36bit-addressing-for-x86-32.patch new file mode 100644 index 00000000000..d68ae8e5b94 --- /dev/null +++ b/queue-5.8/iommu-vt-d-handle-36bit-addressing-for-x86-32.patch @@ -0,0 +1,74 @@ +From 29aaebbca4abc4cceb38738483051abefafb6950 Mon Sep 17 00:00:00 2001 +From: Chris Wilson +Date: Sat, 22 Aug 2020 17:02:09 +0100 +Subject: iommu/vt-d: Handle 36bit addressing for x86-32 + +From: Chris Wilson + +commit 29aaebbca4abc4cceb38738483051abefafb6950 upstream. + +Beware that the address size for x86-32 may exceed unsigned long. + +[ 0.368971] UBSAN: shift-out-of-bounds in drivers/iommu/intel/iommu.c:128:14 +[ 0.369055] shift exponent 36 is too large for 32-bit type 'long unsigned int' + +If we don't handle the wide addresses, the pages are mismapped and the +device read/writes go astray, detected as DMAR faults and leading to +device failure. The behaviour changed (from working to broken) in commit +fa954e683178 ("iommu/vt-d: Delegate the dma domain to upper layer"), but +the error looks older. + +Fixes: fa954e683178 ("iommu/vt-d: Delegate the dma domain to upper layer") +Signed-off-by: Chris Wilson +Acked-by: Lu Baolu +Cc: James Sewart +Cc: Lu Baolu +Cc: Joerg Roedel +Cc: # v5.3+ +Link: https://lore.kernel.org/r/20200822160209.28512-1-chris@chris-wilson.co.uk +Signed-off-by: Joerg Roedel +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/iommu/intel/iommu.c | 14 +++++++------- + 1 file changed, 7 insertions(+), 7 deletions(-) + +--- a/drivers/iommu/intel/iommu.c ++++ b/drivers/iommu/intel/iommu.c +@@ -123,29 +123,29 @@ static inline unsigned int level_to_offs + return (level - 1) * LEVEL_STRIDE; + } + +-static inline int pfn_level_offset(unsigned long pfn, int level) ++static inline int pfn_level_offset(u64 pfn, int level) + { + return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK; + } + +-static inline unsigned long level_mask(int level) ++static inline u64 level_mask(int level) + { +- return -1UL << level_to_offset_bits(level); ++ return -1ULL << level_to_offset_bits(level); + } + +-static inline unsigned long level_size(int level) ++static inline u64 level_size(int level) + { +- return 1UL << level_to_offset_bits(level); ++ return 1ULL << level_to_offset_bits(level); + } + +-static inline unsigned long align_to_level(unsigned long pfn, int level) ++static inline u64 align_to_level(u64 pfn, int level) + { + return (pfn + level_size(level) - 1) & level_mask(level); + } + + static inline unsigned long lvl_to_nr_pages(unsigned int lvl) + { +- return 1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH); ++ return 1UL << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH); + } + + /* VT-d pages must always be _smaller_ than MM pages. Otherwise things diff --git a/queue-5.8/series b/queue-5.8/series index 649c97db770..254c80f81a4 100644 --- a/queue-5.8/series +++ b/queue-5.8/series @@ -115,3 +115,18 @@ iommu-amd-restore-irte.remapen-bit-after-programming.patch iommu-amd-use-cmpxchg_double-when-updating-128-bit-i.patch net-packet-fix-overflow-in-tpacket_rcv.patch include-linux-log2.h-add-missing-around-n-in-roundup.patch +iommu-vt-d-handle-36bit-addressing-for-x86-32.patch +tracing-kprobes-x86-ptrace-fix-regs-argument-order-for-i386.patch +x86-entry-fix-ac-assertion.patch +x86-debug-allow-a-single-level-of-db-recursion.patch +ext2-don-t-update-mtime-on-cow-faults.patch +xfs-don-t-update-mtime-on-cow-faults.patch +arc-perf-don-t-bail-setup-if-pct-irq-missing-in-device-tree.patch +arc-fix-memory-initialization-for-systems-with-two-memory-banks.patch +btrfs-drop-path-before-adding-new-uuid-tree-entry.patch +btrfs-fix-potential-deadlock-in-the-search-ioctl.patch +btrfs-allocate-scrub-workqueues-outside-of-locks.patch +btrfs-set-the-correct-lockdep-class-for-new-nodes.patch +btrfs-set-the-lockdep-class-for-log-tree-extent-buffers.patch +btrfs-block-group-fix-free-space-bitmap-threshold.patch +btrfs-tree-checker-fix-the-error-message-for-transid-error.patch diff --git a/queue-5.8/tracing-kprobes-x86-ptrace-fix-regs-argument-order-for-i386.patch b/queue-5.8/tracing-kprobes-x86-ptrace-fix-regs-argument-order-for-i386.patch new file mode 100644 index 00000000000..9432b6a11b9 --- /dev/null +++ b/queue-5.8/tracing-kprobes-x86-ptrace-fix-regs-argument-order-for-i386.patch @@ -0,0 +1,43 @@ +From 2356bb4b8221d7dc8c7beb810418122ed90254c9 Mon Sep 17 00:00:00 2001 +From: Vamshi K Sthambamkadi +Date: Fri, 28 Aug 2020 17:02:46 +0530 +Subject: tracing/kprobes, x86/ptrace: Fix regs argument order for i386 + +From: Vamshi K Sthambamkadi + +commit 2356bb4b8221d7dc8c7beb810418122ed90254c9 upstream. + +On i386, the order of parameters passed on regs is eax,edx,and ecx +(as per regparm(3) calling conventions). + +Change the mapping in regs_get_kernel_argument(), so that arg1=ax +arg2=dx, and arg3=cx. + +Running the selftests testcase kprobes_args_use.tc shows the result +as passed. + +Fixes: 3c88ee194c28 ("x86: ptrace: Add function argument access API") +Signed-off-by: Vamshi K Sthambamkadi +Signed-off-by: Borislav Petkov +Acked-by: Masami Hiramatsu +Acked-by: Peter Zijlstra (Intel) +Cc: +Link: https://lkml.kernel.org/r/20200828113242.GA1424@cosmos +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/ptrace.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/include/asm/ptrace.h ++++ b/arch/x86/include/asm/ptrace.h +@@ -322,8 +322,8 @@ static inline unsigned long regs_get_ker + static const unsigned int argument_offs[] = { + #ifdef __i386__ + offsetof(struct pt_regs, ax), +- offsetof(struct pt_regs, cx), + offsetof(struct pt_regs, dx), ++ offsetof(struct pt_regs, cx), + #define NR_REG_ARGUMENTS 3 + #else + offsetof(struct pt_regs, di), diff --git a/queue-5.8/x86-debug-allow-a-single-level-of-db-recursion.patch b/queue-5.8/x86-debug-allow-a-single-level-of-db-recursion.patch new file mode 100644 index 00000000000..67b7a8af29a --- /dev/null +++ b/queue-5.8/x86-debug-allow-a-single-level-of-db-recursion.patch @@ -0,0 +1,161 @@ +From d5c678aed5eddb944b8e7ce451b107b39245962d Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Wed, 2 Sep 2020 15:25:51 +0200 +Subject: x86/debug: Allow a single level of #DB recursion + +From: Andy Lutomirski + +commit d5c678aed5eddb944b8e7ce451b107b39245962d upstream. + +Trying to clear DR7 around a #DB from usermode malfunctions if the tasks +schedules when delivering SIGTRAP. + +Rather than trying to define a special no-recursion region, just allow a +single level of recursion. The same mechanism is used for NMI, and it +hasn't caused any problems yet. + +Fixes: 9f58fdde95c9 ("x86/db: Split out dr6/7 handling") +Reported-by: Kyle Huey +Debugged-by: Josh Poimboeuf +Signed-off-by: Andy Lutomirski +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Thomas Gleixner +Tested-by: Daniel Thompson +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/8b9bd05f187231df008d48cf818a6a311cbd5c98.1597882384.git.luto@kernel.org +Link: https://lore.kernel.org/r/20200902133200.726584153@infradead.org +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/traps.c | 66 +++++++++++++++++++++++------------------------- + 1 file changed, 32 insertions(+), 34 deletions(-) + +--- a/arch/x86/kernel/traps.c ++++ b/arch/x86/kernel/traps.c +@@ -733,20 +733,9 @@ static bool is_sysenter_singlestep(struc + #endif + } + +-static __always_inline void debug_enter(unsigned long *dr6, unsigned long *dr7) ++static __always_inline unsigned long debug_read_clear_dr6(void) + { +- /* +- * Disable breakpoints during exception handling; recursive exceptions +- * are exceedingly 'fun'. +- * +- * Since this function is NOKPROBE, and that also applies to +- * HW_BREAKPOINT_X, we can't hit a breakpoint before this (XXX except a +- * HW_BREAKPOINT_W on our stack) +- * +- * Entry text is excluded for HW_BP_X and cpu_entry_area, which +- * includes the entry stack is excluded for everything. +- */ +- *dr7 = local_db_save(); ++ unsigned long dr6; + + /* + * The Intel SDM says: +@@ -759,15 +748,12 @@ static __always_inline void debug_enter( + * + * Keep it simple: clear DR6 immediately. + */ +- get_debugreg(*dr6, 6); ++ get_debugreg(dr6, 6); + set_debugreg(0, 6); + /* Filter out all the reserved bits which are preset to 1 */ +- *dr6 &= ~DR6_RESERVED; +-} ++ dr6 &= ~DR6_RESERVED; + +-static __always_inline void debug_exit(unsigned long dr7) +-{ +- local_db_restore(dr7); ++ return dr6; + } + + /* +@@ -867,6 +853,19 @@ out: + static __always_inline void exc_debug_kernel(struct pt_regs *regs, + unsigned long dr6) + { ++ /* ++ * Disable breakpoints during exception handling; recursive exceptions ++ * are exceedingly 'fun'. ++ * ++ * Since this function is NOKPROBE, and that also applies to ++ * HW_BREAKPOINT_X, we can't hit a breakpoint before this (XXX except a ++ * HW_BREAKPOINT_W on our stack) ++ * ++ * Entry text is excluded for HW_BP_X and cpu_entry_area, which ++ * includes the entry stack is excluded for everything. ++ */ ++ unsigned long dr7 = local_db_save(); ++ + nmi_enter(); + instrumentation_begin(); + trace_hardirqs_off_finish(); +@@ -890,6 +889,8 @@ static __always_inline void exc_debug_ke + trace_hardirqs_on_prepare(); + instrumentation_end(); + nmi_exit(); ++ ++ local_db_restore(dr7); + } + + static __always_inline void exc_debug_user(struct pt_regs *regs, +@@ -901,6 +902,15 @@ static __always_inline void exc_debug_us + */ + WARN_ON_ONCE(!user_mode(regs)); + ++ /* ++ * NB: We can't easily clear DR7 here because ++ * idtentry_exit_to_usermode() can invoke ptrace, schedule, access ++ * user memory, etc. This means that a recursive #DB is possible. If ++ * this happens, that #DB will hit exc_debug_kernel() and clear DR7. ++ * Since we're not on the IST stack right now, everything will be ++ * fine. ++ */ ++ + idtentry_enter_user(regs); + instrumentation_begin(); + +@@ -913,36 +923,24 @@ static __always_inline void exc_debug_us + /* IST stack entry */ + DEFINE_IDTENTRY_DEBUG(exc_debug) + { +- unsigned long dr6, dr7; +- +- debug_enter(&dr6, &dr7); +- exc_debug_kernel(regs, dr6); +- debug_exit(dr7); ++ exc_debug_kernel(regs, debug_read_clear_dr6()); + } + + /* User entry, runs on regular task stack */ + DEFINE_IDTENTRY_DEBUG_USER(exc_debug) + { +- unsigned long dr6, dr7; +- +- debug_enter(&dr6, &dr7); +- exc_debug_user(regs, dr6); +- debug_exit(dr7); ++ exc_debug_user(regs, debug_read_clear_dr6()); + } + #else + /* 32 bit does not have separate entry points. */ + DEFINE_IDTENTRY_RAW(exc_debug) + { +- unsigned long dr6, dr7; +- +- debug_enter(&dr6, &dr7); ++ unsigned long dr6 = debug_read_clear_dr6(); + + if (user_mode(regs)) + exc_debug_user(regs, dr6); + else + exc_debug_kernel(regs, dr6); +- +- debug_exit(dr7); + } + #endif + diff --git a/queue-5.8/x86-entry-fix-ac-assertion.patch b/queue-5.8/x86-entry-fix-ac-assertion.patch new file mode 100644 index 00000000000..87cbdca9719 --- /dev/null +++ b/queue-5.8/x86-entry-fix-ac-assertion.patch @@ -0,0 +1,50 @@ +From 662a0221893a3d58aa72719671844264306f6e4b Mon Sep 17 00:00:00 2001 +From: Peter Zijlstra +Date: Wed, 2 Sep 2020 15:25:50 +0200 +Subject: x86/entry: Fix AC assertion + +From: Peter Zijlstra + +commit 662a0221893a3d58aa72719671844264306f6e4b upstream. + +The WARN added in commit 3c73b81a9164 ("x86/entry, selftests: Further +improve user entry sanity checks") unconditionally triggers on a IVB +machine because it does not support SMAP. + +For !SMAP hardware the CLAC/STAC instructions are patched out and thus if +userspace sets AC, it is still have set after entry. + +Fixes: 3c73b81a9164 ("x86/entry, selftests: Further improve user entry sanity checks") +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Thomas Gleixner +Tested-by: Daniel Thompson +Acked-by: Andy Lutomirski +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20200902133200.666781610@infradead.org +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/entry/common.c | 12 ++++++++++-- + 1 file changed, 10 insertions(+), 2 deletions(-) + +--- a/arch/x86/entry/common.c ++++ b/arch/x86/entry/common.c +@@ -55,8 +55,16 @@ static noinstr void check_user_regs(stru + * state, not the interrupt state as imagined by Xen. + */ + unsigned long flags = native_save_fl(); +- WARN_ON_ONCE(flags & (X86_EFLAGS_AC | X86_EFLAGS_DF | +- X86_EFLAGS_NT)); ++ unsigned long mask = X86_EFLAGS_DF | X86_EFLAGS_NT; ++ ++ /* ++ * For !SMAP hardware we patch out CLAC on entry. ++ */ ++ if (boot_cpu_has(X86_FEATURE_SMAP) || ++ (IS_ENABLED(CONFIG_64_BIT) && boot_cpu_has(X86_FEATURE_XENPV))) ++ mask |= X86_EFLAGS_AC; ++ ++ WARN_ON_ONCE(flags & mask); + + /* We think we came from user mode. Make sure pt_regs agrees. */ + WARN_ON_ONCE(!user_mode(regs)); diff --git a/queue-5.8/xfs-don-t-update-mtime-on-cow-faults.patch b/queue-5.8/xfs-don-t-update-mtime-on-cow-faults.patch new file mode 100644 index 00000000000..2a88060bf66 --- /dev/null +++ b/queue-5.8/xfs-don-t-update-mtime-on-cow-faults.patch @@ -0,0 +1,73 @@ +From b17164e258e3888d376a7434415013175d637377 Mon Sep 17 00:00:00 2001 +From: Mikulas Patocka +Date: Sat, 5 Sep 2020 08:13:02 -0400 +Subject: xfs: don't update mtime on COW faults + +From: Mikulas Patocka + +commit b17164e258e3888d376a7434415013175d637377 upstream. + +When running in a dax mode, if the user maps a page with MAP_PRIVATE and +PROT_WRITE, the xfs filesystem would incorrectly update ctime and mtime +when the user hits a COW fault. + +This breaks building of the Linux kernel. How to reproduce: + + 1. extract the Linux kernel tree on dax-mounted xfs filesystem + 2. run make clean + 3. run make -j12 + 4. run make -j12 + +at step 4, make would incorrectly rebuild the whole kernel (although it +was already built in step 3). + +The reason for the breakage is that almost all object files depend on +objtool. When we run objtool, it takes COW page fault on its .data +section, and these faults will incorrectly update the timestamp of the +objtool binary. The updated timestamp causes make to rebuild the whole +tree. + +Signed-off-by: Mikulas Patocka +Cc: stable@vger.kernel.org +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + fs/xfs/xfs_file.c | 12 ++++++++++-- + 1 file changed, 10 insertions(+), 2 deletions(-) + +--- a/fs/xfs/xfs_file.c ++++ b/fs/xfs/xfs_file.c +@@ -1220,6 +1220,14 @@ __xfs_filemap_fault( + return ret; + } + ++static inline bool ++xfs_is_write_fault( ++ struct vm_fault *vmf) ++{ ++ return (vmf->flags & FAULT_FLAG_WRITE) && ++ (vmf->vma->vm_flags & VM_SHARED); ++} ++ + static vm_fault_t + xfs_filemap_fault( + struct vm_fault *vmf) +@@ -1227,7 +1235,7 @@ xfs_filemap_fault( + /* DAX can shortcut the normal fault path on write faults! */ + return __xfs_filemap_fault(vmf, PE_SIZE_PTE, + IS_DAX(file_inode(vmf->vma->vm_file)) && +- (vmf->flags & FAULT_FLAG_WRITE)); ++ xfs_is_write_fault(vmf)); + } + + static vm_fault_t +@@ -1240,7 +1248,7 @@ xfs_filemap_huge_fault( + + /* DAX can shortcut the normal fault path on write faults! */ + return __xfs_filemap_fault(vmf, pe_size, +- (vmf->flags & FAULT_FLAG_WRITE)); ++ xfs_is_write_fault(vmf)); + } + + static vm_fault_t