From: Greg Kroah-Hartman Date: Mon, 27 May 2019 14:11:18 +0000 (+0200) Subject: 5.1-stable patches X-Git-Tag: v5.1.6~33 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=965e8ce1d25c78b197392b409c4c55fec56deee3;p=thirdparty%2Fkernel%2Fstable-queue.git 5.1-stable patches added patches: arm64-iommu-handle-non-remapped-addresses-in-mmap-and-get_sgtable.patch arm64-kconfig-make-arm64_pseudo_nmi-depend-on-broken-for-now.patch arm64-kernel-kaslr-reduce-module-randomization-range-to-2-gb.patch btrfs-avoid-fallback-to-transaction-commit-during-fsync-of-files-with-holes.patch btrfs-check-the-compression-level-before-getting-a-workspace.patch btrfs-do-not-abort-transaction-at-btrfs_update_root-after-failure-to-cow-path.patch btrfs-don-t-double-unlock-on-error-in-btrfs_punch_hole.patch btrfs-fix-race-between-ranged-fsync-and-writeback-of-adjacent-ranges.patch btrfs-sysfs-don-t-leak-memory-when-failing-add-fsid.patch btrfs-sysfs-fix-error-path-kobject-memory-leak.patch fbdev-fix-divide-error-in-fb_var_to_videomode.patch gfs2-fix-sign-extension-bug-in-gfs2_update_stats.patch --- diff --git a/queue-5.1/arm64-iommu-handle-non-remapped-addresses-in-mmap-and-get_sgtable.patch b/queue-5.1/arm64-iommu-handle-non-remapped-addresses-in-mmap-and-get_sgtable.patch new file mode 100644 index 00000000000..e4ea8854799 --- /dev/null +++ b/queue-5.1/arm64-iommu-handle-non-remapped-addresses-in-mmap-and-get_sgtable.patch @@ -0,0 +1,51 @@ +From a98d9ae937d256ed679a935fc82d9deaa710d98e Mon Sep 17 00:00:00 2001 +From: Christoph Hellwig +Date: Tue, 30 Apr 2019 06:51:50 -0400 +Subject: arm64/iommu: handle non-remapped addresses in ->mmap and ->get_sgtable + +From: Christoph Hellwig + +commit a98d9ae937d256ed679a935fc82d9deaa710d98e upstream. + +DMA allocations that can't sleep may return non-remapped addresses, but +we do not properly handle them in the mmap and get_sgtable methods. +Resolve non-vmalloc addresses using virt_to_page to handle this corner +case. + +Cc: +Acked-by: Catalin Marinas +Reviewed-by: Robin Murphy +Signed-off-by: Christoph Hellwig +Signed-off-by: Will Deacon +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm64/mm/dma-mapping.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +--- a/arch/arm64/mm/dma-mapping.c ++++ b/arch/arm64/mm/dma-mapping.c +@@ -249,6 +249,11 @@ static int __iommu_mmap_attrs(struct dev + if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) + return ret; + ++ if (!is_vmalloc_addr(cpu_addr)) { ++ unsigned long pfn = page_to_pfn(virt_to_page(cpu_addr)); ++ return __swiotlb_mmap_pfn(vma, pfn, size); ++ } ++ + if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) { + /* + * DMA_ATTR_FORCE_CONTIGUOUS allocations are always remapped, +@@ -272,6 +277,11 @@ static int __iommu_get_sgtable(struct de + unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; + struct vm_struct *area = find_vm_area(cpu_addr); + ++ if (!is_vmalloc_addr(cpu_addr)) { ++ struct page *page = virt_to_page(cpu_addr); ++ return __swiotlb_get_sgtable_page(sgt, page, size); ++ } ++ + if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) { + /* + * DMA_ATTR_FORCE_CONTIGUOUS allocations are always remapped, diff --git a/queue-5.1/arm64-kconfig-make-arm64_pseudo_nmi-depend-on-broken-for-now.patch b/queue-5.1/arm64-kconfig-make-arm64_pseudo_nmi-depend-on-broken-for-now.patch new file mode 100644 index 00000000000..5b88c96b090 --- /dev/null +++ b/queue-5.1/arm64-kconfig-make-arm64_pseudo_nmi-depend-on-broken-for-now.patch @@ -0,0 +1,40 @@ +From 96a13f57b946be7a6c10405e4bd780c0b6b6fe63 Mon Sep 17 00:00:00 2001 +From: Will Deacon +Date: Fri, 24 May 2019 14:15:34 +0100 +Subject: arm64: Kconfig: Make ARM64_PSEUDO_NMI depend on BROKEN for now + +From: Will Deacon + +commit 96a13f57b946be7a6c10405e4bd780c0b6b6fe63 upstream. + +Although we merged support for pseudo-nmi using interrupt priority +masking in 5.1, we've since uncovered a number of non-trivial issues +with the implementation. Although there are patches pending to address +these problems, we're facing issues that prevent us from merging them at +this current time: + + https://lkml.kernel.org/r/1556553607-46531-1-git-send-email-julien.thierry@arm.com + +For now, simply mark this optional feature as BROKEN in the hope that we +can fix things properly in the near future. + +Cc: # 5.1 +Cc: Julien Thierry +Acked-by: Marc Zyngier +Signed-off-by: Will Deacon +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm64/Kconfig | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/arm64/Kconfig ++++ b/arch/arm64/Kconfig +@@ -1347,6 +1347,7 @@ config ARM64_MODULE_PLTS + + config ARM64_PSEUDO_NMI + bool "Support for NMI-like interrupts" ++ depends on BROKEN # 1556553607-46531-1-git-send-email-julien.thierry@arm.com + select CONFIG_ARM_GIC_V3 + help + Adds support for mimicking Non-Maskable Interrupts through the use of diff --git a/queue-5.1/arm64-kernel-kaslr-reduce-module-randomization-range-to-2-gb.patch b/queue-5.1/arm64-kernel-kaslr-reduce-module-randomization-range-to-2-gb.patch new file mode 100644 index 00000000000..06a906bbed6 --- /dev/null +++ b/queue-5.1/arm64-kernel-kaslr-reduce-module-randomization-range-to-2-gb.patch @@ -0,0 +1,85 @@ +From b2eed9b58811283d00fa861944cb75797d4e52a7 Mon Sep 17 00:00:00 2001 +From: Ard Biesheuvel +Date: Thu, 23 May 2019 10:17:37 +0100 +Subject: arm64/kernel: kaslr: reduce module randomization range to 2 GB + +From: Ard Biesheuvel + +commit b2eed9b58811283d00fa861944cb75797d4e52a7 upstream. + +The following commit + + 7290d5809571 ("module: use relative references for __ksymtab entries") + +updated the ksymtab handling of some KASLR capable architectures +so that ksymtab entries are emitted as pairs of 32-bit relative +references. This reduces the size of the entries, but more +importantly, it gets rid of statically assigned absolute +addresses, which require fixing up at boot time if the kernel +is self relocating (which takes a 24 byte RELA entry for each +member of the ksymtab struct). + +Since ksymtab entries are always part of the same module as the +symbol they export, it was assumed at the time that a 32-bit +relative reference is always sufficient to capture the offset +between a ksymtab entry and its target symbol. + +Unfortunately, this is not always true: in the case of per-CPU +variables, a per-CPU variable's base address (which usually differs +from the actual address of any of its per-CPU copies) is allocated +in the vicinity of the ..data.percpu section in the core kernel +(i.e., in the per-CPU reserved region which follows the section +containing the core kernel's statically allocated per-CPU variables). + +Since we randomize the module space over a 4 GB window covering +the core kernel (based on the -/+ 4 GB range of an ADRP/ADD pair), +we may end up putting the core kernel out of the -/+ 2 GB range of +32-bit relative references of module ksymtab entries that refer to +per-CPU variables. + +So reduce the module randomization range a bit further. We lose +1 bit of randomization this way, but this is something we can +tolerate. + +Cc: # v4.19+ +Signed-off-by: Ard Biesheuvel +Signed-off-by: Will Deacon +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm64/kernel/kaslr.c | 6 +++--- + arch/arm64/kernel/module.c | 2 +- + 2 files changed, 4 insertions(+), 4 deletions(-) + +--- a/arch/arm64/kernel/kaslr.c ++++ b/arch/arm64/kernel/kaslr.c +@@ -145,15 +145,15 @@ u64 __init kaslr_early_init(u64 dt_phys) + + if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) { + /* +- * Randomize the module region over a 4 GB window covering the ++ * Randomize the module region over a 2 GB window covering the + * kernel. This reduces the risk of modules leaking information + * about the address of the kernel itself, but results in + * branches between modules and the core kernel that are + * resolved via PLTs. (Branches between modules will be + * resolved normally.) + */ +- module_range = SZ_4G - (u64)(_end - _stext); +- module_alloc_base = max((u64)_end + offset - SZ_4G, ++ module_range = SZ_2G - (u64)(_end - _stext); ++ module_alloc_base = max((u64)_end + offset - SZ_2G, + (u64)MODULES_VADDR); + } else { + /* +--- a/arch/arm64/kernel/module.c ++++ b/arch/arm64/kernel/module.c +@@ -56,7 +56,7 @@ void *module_alloc(unsigned long size) + * can simply omit this fallback in that case. + */ + p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base, +- module_alloc_base + SZ_4G, GFP_KERNEL, ++ module_alloc_base + SZ_2G, GFP_KERNEL, + PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, + __builtin_return_address(0)); + diff --git a/queue-5.1/btrfs-avoid-fallback-to-transaction-commit-during-fsync-of-files-with-holes.patch b/queue-5.1/btrfs-avoid-fallback-to-transaction-commit-during-fsync-of-files-with-holes.patch new file mode 100644 index 00000000000..e443551de64 --- /dev/null +++ b/queue-5.1/btrfs-avoid-fallback-to-transaction-commit-during-fsync-of-files-with-holes.patch @@ -0,0 +1,84 @@ +From ebb929060aeb162417b4c1307e63daee47b208d9 Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Mon, 6 May 2019 16:43:51 +0100 +Subject: Btrfs: avoid fallback to transaction commit during fsync of files with holes + +From: Filipe Manana + +commit ebb929060aeb162417b4c1307e63daee47b208d9 upstream. + +When we are doing a full fsync (bit BTRFS_INODE_NEEDS_FULL_SYNC set) of a +file that has holes and has file extent items spanning two or more leafs, +we can end up falling to back to a full transaction commit due to a logic +bug that leads to failure to insert a duplicate file extent item that is +meant to represent a hole between the last file extent item of a leaf and +the first file extent item in the next leaf. The failure (EEXIST error) +leads to a transaction commit (as most errors when logging an inode do). + +For example, we have the two following leafs: + +Leaf N: + + ----------------------------------------------- + | ..., ..., ..., (257, FILE_EXTENT_ITEM, 64K) | + ----------------------------------------------- + The file extent item at the end of leaf N has a length of 4Kb, + representing the file range from 64K to 68K - 1. + +Leaf N + 1: + + ----------------------------------------------- + | (257, FILE_EXTENT_ITEM, 72K), ..., ..., ... | + ----------------------------------------------- + The file extent item at the first slot of leaf N + 1 has a length of + 4Kb too, representing the file range from 72K to 76K - 1. + +During the full fsync path, when we are at tree-log.c:copy_items() with +leaf N as a parameter, after processing the last file extent item, that +represents the extent at offset 64K, we take a look at the first file +extent item at the next leaf (leaf N + 1), and notice there's a 4K hole +between the two extents, and therefore we insert a file extent item +representing that hole, starting at file offset 68K and ending at offset +72K - 1. However we don't update the value of *last_extent, which is used +to represent the end offset (plus 1, non-inclusive end) of the last file +extent item inserted in the log, so it stays with a value of 68K and not +with a value of 72K. + +Then, when copy_items() is called for leaf N + 1, because the value of +*last_extent is smaller then the offset of the first extent item in the +leaf (68K < 72K), we look at the last file extent item in the previous +leaf (leaf N) and see it there's a 4K gap between it and our first file +extent item (again, 68K < 72K), so we decide to insert a file extent item +representing the hole, starting at file offset 68K and ending at offset +72K - 1, this insertion will fail with -EEXIST being returned from +btrfs_insert_file_extent() because we already inserted a file extent item +representing a hole for this offset (68K) in the previous call to +copy_items(), when processing leaf N. + +The -EEXIST error gets propagated to the fsync callback, btrfs_sync_file(), +which falls back to a full transaction commit. + +Fix this by adjusting *last_extent after inserting a hole when we had to +look at the next leaf. + +Fixes: 4ee3fad34a9c ("Btrfs: fix fsync after hole punching when using no-holes feature") +Cc: stable@vger.kernel.org # 4.14+ +Reviewed-by: Josef Bacik +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/tree-log.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/fs/btrfs/tree-log.c ++++ b/fs/btrfs/tree-log.c +@@ -4169,6 +4169,7 @@ fill_holes: + *last_extent, 0, + 0, len, 0, len, + 0, 0, 0); ++ *last_extent += len; + } + } + } diff --git a/queue-5.1/btrfs-check-the-compression-level-before-getting-a-workspace.patch b/queue-5.1/btrfs-check-the-compression-level-before-getting-a-workspace.patch new file mode 100644 index 00000000000..e633ab0420f --- /dev/null +++ b/queue-5.1/btrfs-check-the-compression-level-before-getting-a-workspace.patch @@ -0,0 +1,64 @@ +From 2b90883c561ddcc641741c2e4df1f702a4f2acb8 Mon Sep 17 00:00:00 2001 +From: Johnny Chang +Date: Fri, 26 Apr 2019 11:01:05 +0800 +Subject: btrfs: Check the compression level before getting a workspace + +From: Johnny Chang + +commit 2b90883c561ddcc641741c2e4df1f702a4f2acb8 upstream. + +When a file's compression property is set as zlib or zstd but leave +the compression mount option not be set, that means btrfs will try +to compress the file with default compression level. But in +btrfs_compress_pages(), it calls get_workspace() with level = 0. +This will return a workspace with a wrong compression level. +For zlib, the compression level in the workspace will be 0 +(that means "store only"). And for zstd, the compression in the +workspace will be 1, not the default level 3. + +How to reproduce: + mkfs -t btrfs /dev/sdb + mount /dev/sdb /mnt/ + mkdir /mnt/zlib + btrfs property set /mnt/zlib/ compression zlib + dd if=/dev/zero of=/mnt/zlib/compression-friendly-file-10M bs=1M count=10 + sync + btrfs-debugfs -f /mnt/zlib/compression-friendly-file-10M + +btrfs-debugfs output: +* before: + ... + (258 9961472): ram 524288 disk 1106247680 disk_size 524288 + file: ... extents 20 disk size 10485760 logical size 10485760 ratio 1.00 + +* after: + ... + (258 10354688): ram 131072 disk 14217216 disk_size 4096 + file: ... extents 80 disk size 327680 logical size 10485760 ratio 32.00 + +The steps for zstd are similar, but need to put a debugging message to +show the level of the return workspace in zstd_get_workspace(). + +This commit adds a check of the compression level before getting a +workspace by set_level(). + +CC: stable@vger.kernel.org # 5.1+ +Signed-off-by: Johnny Chang +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/compression.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/fs/btrfs/compression.c ++++ b/fs/btrfs/compression.c +@@ -1009,6 +1009,7 @@ int btrfs_compress_pages(unsigned int ty + struct list_head *workspace; + int ret; + ++ level = btrfs_compress_op[type]->set_level(level); + workspace = get_workspace(type, level); + ret = btrfs_compress_op[type]->compress_pages(workspace, mapping, + start, pages, diff --git a/queue-5.1/btrfs-do-not-abort-transaction-at-btrfs_update_root-after-failure-to-cow-path.patch b/queue-5.1/btrfs-do-not-abort-transaction-at-btrfs_update_root-after-failure-to-cow-path.patch new file mode 100644 index 00000000000..415e14f7840 --- /dev/null +++ b/queue-5.1/btrfs-do-not-abort-transaction-at-btrfs_update_root-after-failure-to-cow-path.patch @@ -0,0 +1,54 @@ +From 72bd2323ec87722c115a5906bc6a1b31d11e8f54 Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Mon, 29 Apr 2019 13:08:14 +0100 +Subject: Btrfs: do not abort transaction at btrfs_update_root() after failure to COW path + +From: Filipe Manana + +commit 72bd2323ec87722c115a5906bc6a1b31d11e8f54 upstream. + +Currently when we fail to COW a path at btrfs_update_root() we end up +always aborting the transaction. However all the current callers of +btrfs_update_root() are able to deal with errors returned from it, many do +end up aborting the transaction themselves (directly or not, such as the +transaction commit path), other BUG_ON() or just gracefully cancel whatever +they were doing. + +When syncing the fsync log, we call btrfs_update_root() through +tree-log.c:update_log_root(), and if it returns an -ENOSPC error, the log +sync code does not abort the transaction, instead it gracefully handles +the error and returns -EAGAIN to the fsync handler, so that it falls back +to a transaction commit. Any other error different from -ENOSPC, makes the +log sync code abort the transaction. + +So remove the transaction abort from btrfs_update_log() when we fail to +COW a path to update the root item, so that if an -ENOSPC failure happens +we avoid aborting the current transaction and have a chance of the fsync +succeeding after falling back to a transaction commit. + +Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=203413 +Fixes: 79787eaab46121 ("btrfs: replace many BUG_ONs with proper error handling") +Cc: stable@vger.kernel.org # 4.4+ +Signed-off-by: Filipe Manana +Reviewed-by: Anand Jain +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/root-tree.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +--- a/fs/btrfs/root-tree.c ++++ b/fs/btrfs/root-tree.c +@@ -132,10 +132,8 @@ int btrfs_update_root(struct btrfs_trans + return -ENOMEM; + + ret = btrfs_search_slot(trans, root, key, path, 0, 1); +- if (ret < 0) { +- btrfs_abort_transaction(trans, ret); ++ if (ret < 0) + goto out; +- } + + if (ret != 0) { + btrfs_print_leaf(path->nodes[0]); diff --git a/queue-5.1/btrfs-don-t-double-unlock-on-error-in-btrfs_punch_hole.patch b/queue-5.1/btrfs-don-t-double-unlock-on-error-in-btrfs_punch_hole.patch new file mode 100644 index 00000000000..6b48e3dd72c --- /dev/null +++ b/queue-5.1/btrfs-don-t-double-unlock-on-error-in-btrfs_punch_hole.patch @@ -0,0 +1,40 @@ +From 8fca955057b9c58467d1b231e43f19c4cf26ae8c Mon Sep 17 00:00:00 2001 +From: Josef Bacik +Date: Fri, 3 May 2019 11:10:06 -0400 +Subject: btrfs: don't double unlock on error in btrfs_punch_hole + +From: Josef Bacik + +commit 8fca955057b9c58467d1b231e43f19c4cf26ae8c upstream. + +If we have an error writing out a delalloc range in +btrfs_punch_hole_lock_range we'll unlock the inode and then goto +out_only_mutex, where we will again unlock the inode. This is bad, +don't do this. + +Fixes: f27451f22996 ("Btrfs: add support for fallocate's zero range operation") +CC: stable@vger.kernel.org # 4.19+ +Reviewed-by: Filipe Manana +Signed-off-by: Josef Bacik +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/file.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +--- a/fs/btrfs/file.c ++++ b/fs/btrfs/file.c +@@ -2546,10 +2546,8 @@ static int btrfs_punch_hole(struct inode + + ret = btrfs_punch_hole_lock_range(inode, lockstart, lockend, + &cached_state); +- if (ret) { +- inode_unlock(inode); ++ if (ret) + goto out_only_mutex; +- } + + path = btrfs_alloc_path(); + if (!path) { diff --git a/queue-5.1/btrfs-fix-race-between-ranged-fsync-and-writeback-of-adjacent-ranges.patch b/queue-5.1/btrfs-fix-race-between-ranged-fsync-and-writeback-of-adjacent-ranges.patch new file mode 100644 index 00000000000..630a7044a9b --- /dev/null +++ b/queue-5.1/btrfs-fix-race-between-ranged-fsync-and-writeback-of-adjacent-ranges.patch @@ -0,0 +1,243 @@ +From 0c713cbab6200b0ab6473b50435e450a6e1de85d Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Mon, 6 May 2019 16:44:02 +0100 +Subject: Btrfs: fix race between ranged fsync and writeback of adjacent ranges + +From: Filipe Manana + +commit 0c713cbab6200b0ab6473b50435e450a6e1de85d upstream. + +When we do a full fsync (the bit BTRFS_INODE_NEEDS_FULL_SYNC is set in the +inode) that happens to be ranged, which happens during a msync() or writes +for files opened with O_SYNC for example, we can end up with a corrupt log, +due to different file extent items representing ranges that overlap with +each other, or hit some assertion failures. + +When doing a ranged fsync we only flush delalloc and wait for ordered +exents within that range. If while we are logging items from our inode +ordered extents for adjacent ranges complete, we end up in a race that can +make us insert the file extent items that overlap with others we logged +previously and the assertion failures. + +For example, if tree-log.c:copy_items() receives a leaf that has the +following file extents items, all with a length of 4K and therefore there +is an implicit hole in the range 68K to 72K - 1: + + (257 EXTENT_ITEM 64K), (257 EXTENT_ITEM 72K), (257 EXTENT_ITEM 76K), ... + +It copies them to the log tree. However due to the need to detect implicit +holes, it may release the path, in order to look at the previous leaf to +detect an implicit hole, and then later it will search again in the tree +for the first file extent item key, with the goal of locking again the +leaf (which might have changed due to concurrent changes to other inodes). + +However when it locks again the leaf containing the first key, the key +corresponding to the extent at offset 72K may not be there anymore since +there is an ordered extent for that range that is finishing (that is, +somewhere in the middle of btrfs_finish_ordered_io()), and it just +removed the file extent item but has not yet replaced it with a new file +extent item, so the part of copy_items() that does hole detection will +decide that there is a hole in the range starting from 68K to 76K - 1, +and therefore insert a file extent item to represent that hole, having +a key offset of 68K. After that we now have a log tree with 2 different +extent items that have overlapping ranges: + + 1) The file extent item copied before copy_items() released the path, + which has a key offset of 72K and a length of 4K, representing the + file range 72K to 76K - 1. + + 2) And a file extent item representing a hole that has a key offset of + 68K and a length of 8K, representing the range 68K to 76K - 1. This + item was inserted after releasing the path, and overlaps with the + extent item inserted before. + +The overlapping extent items can cause all sorts of unpredictable and +incorrect behaviour, either when replayed or if a fast (non full) fsync +happens later, which can trigger a BUG_ON() when calling +btrfs_set_item_key_safe() through __btrfs_drop_extents(), producing a +trace like the following: + + [61666.783269] ------------[ cut here ]------------ + [61666.783943] kernel BUG at fs/btrfs/ctree.c:3182! + [61666.784644] invalid opcode: 0000 [#1] PREEMPT SMP + (...) + [61666.786253] task: ffff880117b88c40 task.stack: ffffc90008168000 + [61666.786253] RIP: 0010:btrfs_set_item_key_safe+0x7c/0xd2 [btrfs] + [61666.786253] RSP: 0018:ffffc9000816b958 EFLAGS: 00010246 + [61666.786253] RAX: 0000000000000000 RBX: 000000000000000f RCX: 0000000000030000 + [61666.786253] RDX: 0000000000000000 RSI: ffffc9000816ba4f RDI: ffffc9000816b937 + [61666.786253] RBP: ffffc9000816b998 R08: ffff88011dae2428 R09: 0000000000001000 + [61666.786253] R10: 0000160000000000 R11: 6db6db6db6db6db7 R12: ffff88011dae2418 + [61666.786253] R13: ffffc9000816ba4f R14: ffff8801e10c4118 R15: ffff8801e715c000 + [61666.786253] FS: 00007f6060a18700(0000) GS:ffff88023f5c0000(0000) knlGS:0000000000000000 + [61666.786253] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + [61666.786253] CR2: 00007f6060a28000 CR3: 0000000213e69000 CR4: 00000000000006e0 + [61666.786253] Call Trace: + [61666.786253] __btrfs_drop_extents+0x5e3/0xaad [btrfs] + [61666.786253] ? time_hardirqs_on+0x9/0x14 + [61666.786253] btrfs_log_changed_extents+0x294/0x4e0 [btrfs] + [61666.786253] ? release_extent_buffer+0x38/0xb4 [btrfs] + [61666.786253] btrfs_log_inode+0xb6e/0xcdc [btrfs] + [61666.786253] ? lock_acquire+0x131/0x1c5 + [61666.786253] ? btrfs_log_inode_parent+0xee/0x659 [btrfs] + [61666.786253] ? arch_local_irq_save+0x9/0xc + [61666.786253] ? btrfs_log_inode_parent+0x1f5/0x659 [btrfs] + [61666.786253] btrfs_log_inode_parent+0x223/0x659 [btrfs] + [61666.786253] ? arch_local_irq_save+0x9/0xc + [61666.786253] ? lockref_get_not_zero+0x2c/0x34 + [61666.786253] ? rcu_read_unlock+0x3e/0x5d + [61666.786253] btrfs_log_dentry_safe+0x60/0x7b [btrfs] + [61666.786253] btrfs_sync_file+0x317/0x42c [btrfs] + [61666.786253] vfs_fsync_range+0x8c/0x9e + [61666.786253] SyS_msync+0x13c/0x1c9 + [61666.786253] entry_SYSCALL_64_fastpath+0x18/0xad + +A sample of a corrupt log tree leaf with overlapping extents I got from +running btrfs/072: + + item 14 key (295 108 200704) itemoff 2599 itemsize 53 + extent data disk bytenr 0 nr 0 + extent data offset 0 nr 458752 ram 458752 + item 15 key (295 108 659456) itemoff 2546 itemsize 53 + extent data disk bytenr 4343541760 nr 770048 + extent data offset 606208 nr 163840 ram 770048 + item 16 key (295 108 663552) itemoff 2493 itemsize 53 + extent data disk bytenr 4343541760 nr 770048 + extent data offset 610304 nr 155648 ram 770048 + item 17 key (295 108 819200) itemoff 2440 itemsize 53 + extent data disk bytenr 4334788608 nr 4096 + extent data offset 0 nr 4096 ram 4096 + +The file extent item at offset 659456 (item 15) ends at offset 823296 +(659456 + 163840) while the next file extent item (item 16) starts at +offset 663552. + +Another different problem that the race can trigger is a failure in the +assertions at tree-log.c:copy_items(), which expect that the first file +extent item key we found before releasing the path exists after we have +released path and that the last key we found before releasing the path +also exists after releasing the path: + + $ cat -n fs/btrfs/tree-log.c + 4080 if (need_find_last_extent) { + 4081 /* btrfs_prev_leaf could return 1 without releasing the path */ + 4082 btrfs_release_path(src_path); + 4083 ret = btrfs_search_slot(NULL, inode->root, &first_key, + 4084 src_path, 0, 0); + 4085 if (ret < 0) + 4086 return ret; + 4087 ASSERT(ret == 0); + (...) + 4103 if (i >= btrfs_header_nritems(src_path->nodes[0])) { + 4104 ret = btrfs_next_leaf(inode->root, src_path); + 4105 if (ret < 0) + 4106 return ret; + 4107 ASSERT(ret == 0); + 4108 src = src_path->nodes[0]; + 4109 i = 0; + 4110 need_find_last_extent = true; + 4111 } + (...) + +The second assertion implicitly expects that the last key before the path +release still exists, because the surrounding while loop only stops after +we have found that key. When this assertion fails it produces a stack like +this: + + [139590.037075] assertion failed: ret == 0, file: fs/btrfs/tree-log.c, line: 4107 + [139590.037406] ------------[ cut here ]------------ + [139590.037707] kernel BUG at fs/btrfs/ctree.h:3546! + [139590.038034] invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC PTI + [139590.038340] CPU: 1 PID: 31841 Comm: fsstress Tainted: G W 5.0.0-btrfs-next-46 #1 + (...) + [139590.039354] RIP: 0010:assfail.constprop.24+0x18/0x1a [btrfs] + (...) + [139590.040397] RSP: 0018:ffffa27f48f2b9b0 EFLAGS: 00010282 + [139590.040730] RAX: 0000000000000041 RBX: ffff897c635d92c8 RCX: 0000000000000000 + [139590.041105] RDX: 0000000000000000 RSI: ffff897d36a96868 RDI: ffff897d36a96868 + [139590.041470] RBP: ffff897d1b9a0708 R08: 0000000000000000 R09: 0000000000000000 + [139590.041815] R10: 0000000000000008 R11: 0000000000000000 R12: 0000000000000013 + [139590.042159] R13: 0000000000000227 R14: ffff897cffcbba88 R15: 0000000000000001 + [139590.042501] FS: 00007f2efc8dee80(0000) GS:ffff897d36a80000(0000) knlGS:0000000000000000 + [139590.042847] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + [139590.043199] CR2: 00007f8c064935e0 CR3: 0000000232252002 CR4: 00000000003606e0 + [139590.043547] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 + [139590.043899] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 + [139590.044250] Call Trace: + [139590.044631] copy_items+0xa3f/0x1000 [btrfs] + [139590.045009] ? generic_bin_search.constprop.32+0x61/0x200 [btrfs] + [139590.045396] btrfs_log_inode+0x7b3/0xd70 [btrfs] + [139590.045773] btrfs_log_inode_parent+0x2b3/0xce0 [btrfs] + [139590.046143] ? do_raw_spin_unlock+0x49/0xc0 + [139590.046510] btrfs_log_dentry_safe+0x4a/0x70 [btrfs] + [139590.046872] btrfs_sync_file+0x3b6/0x440 [btrfs] + [139590.047243] btrfs_file_write_iter+0x45b/0x5c0 [btrfs] + [139590.047592] __vfs_write+0x129/0x1c0 + [139590.047932] vfs_write+0xc2/0x1b0 + [139590.048270] ksys_write+0x55/0xc0 + [139590.048608] do_syscall_64+0x60/0x1b0 + [139590.048946] entry_SYSCALL_64_after_hwframe+0x49/0xbe + [139590.049287] RIP: 0033:0x7f2efc4be190 + (...) + [139590.050342] RSP: 002b:00007ffe743243a8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 + [139590.050701] RAX: ffffffffffffffda RBX: 0000000000008d58 RCX: 00007f2efc4be190 + [139590.051067] RDX: 0000000000008d58 RSI: 00005567eca0f370 RDI: 0000000000000003 + [139590.051459] RBP: 0000000000000024 R08: 0000000000000003 R09: 0000000000008d60 + [139590.051863] R10: 0000000000000078 R11: 0000000000000246 R12: 0000000000000003 + [139590.052252] R13: 00000000003d3507 R14: 00005567eca0f370 R15: 0000000000000000 + (...) + [139590.055128] ---[ end trace 193f35d0215cdeeb ]--- + +So fix this race between a full ranged fsync and writeback of adjacent +ranges by flushing all delalloc and waiting for all ordered extents to +complete before logging the inode. This is the simplest way to solve the +problem because currently the full fsync path does not deal with ranges +at all (it assumes a full range from 0 to LLONG_MAX) and it always needs +to look at adjacent ranges for hole detection. For use cases of ranged +fsyncs this can make a few fsyncs slower but on the other hand it can +make some following fsyncs to other ranges do less work or no need to do +anything at all. A full fsync is rare anyway and happens only once after +loading/creating an inode and once after less common operations such as a +shrinking truncate. + +This is an issue that exists for a long time, and was often triggered by +generic/127, because it does mmap'ed writes and msync (which triggers a +ranged fsync). Adding support for the tree checker to detect overlapping +extents (next patch in the series) and trigger a WARN() when such cases +are found, and then calling btrfs_check_leaf_full() at the end of +btrfs_insert_file_extent() made the issue much easier to detect. Running +btrfs/072 with that change to the tree checker and making fsstress open +files always with O_SYNC made it much easier to trigger the issue (as +triggering it with generic/127 is very rare). + +CC: stable@vger.kernel.org # 3.16+ +Reviewed-by: Josef Bacik +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/file.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +--- a/fs/btrfs/file.c ++++ b/fs/btrfs/file.c +@@ -2059,6 +2059,18 @@ int btrfs_sync_file(struct file *file, l + u64 len; + + /* ++ * If the inode needs a full sync, make sure we use a full range to ++ * avoid log tree corruption, due to hole detection racing with ordered ++ * extent completion for adjacent ranges, and assertion failures during ++ * hole detection. ++ */ ++ if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, ++ &BTRFS_I(inode)->runtime_flags)) { ++ start = 0; ++ end = LLONG_MAX; ++ } ++ ++ /* + * The range length can be represented by u64, we have to do the typecasts + * to avoid signed overflow if it's [0, LLONG_MAX] eg. from fsync() + */ diff --git a/queue-5.1/btrfs-sysfs-don-t-leak-memory-when-failing-add-fsid.patch b/queue-5.1/btrfs-sysfs-don-t-leak-memory-when-failing-add-fsid.patch new file mode 100644 index 00000000000..d3da687f75b --- /dev/null +++ b/queue-5.1/btrfs-sysfs-don-t-leak-memory-when-failing-add-fsid.patch @@ -0,0 +1,53 @@ +From e32773357d5cc271b1d23550b3ed026eb5c2a468 Mon Sep 17 00:00:00 2001 +From: "Tobin C. Harding" +Date: Mon, 13 May 2019 13:39:12 +1000 +Subject: btrfs: sysfs: don't leak memory when failing add fsid + +From: Tobin C. Harding + +commit e32773357d5cc271b1d23550b3ed026eb5c2a468 upstream. + +A failed call to kobject_init_and_add() must be followed by a call to +kobject_put(). Currently in the error path when adding fs_devices we +are missing this call. This could be fixed by calling +btrfs_sysfs_remove_fsid() if btrfs_sysfs_add_fsid() returns an error or +by adding a call to kobject_put() directly in btrfs_sysfs_add_fsid(). +Here we choose the second option because it prevents the slightly +unusual error path handling requirements of kobject from leaking out +into btrfs functions. + +Add a call to kobject_put() in the error path of kobject_add_and_init(). +This causes the release method to be called if kobject_init_and_add() +fails. open_tree() is the function that calls btrfs_sysfs_add_fsid() +and the error code in this function is already written with the +assumption that the release method is called during the error path of +open_tree() (as seen by the call to btrfs_sysfs_remove_fsid() under the +fail_fsdev_sysfs label). + +Cc: stable@vger.kernel.org # v4.4+ +Reviewed-by: Greg Kroah-Hartman +Signed-off-by: Tobin C. Harding +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/sysfs.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/fs/btrfs/sysfs.c ++++ b/fs/btrfs/sysfs.c +@@ -825,7 +825,12 @@ int btrfs_sysfs_add_fsid(struct btrfs_fs + fs_devs->fsid_kobj.kset = btrfs_kset; + error = kobject_init_and_add(&fs_devs->fsid_kobj, + &btrfs_ktype, parent, "%pU", fs_devs->fsid); +- return error; ++ if (error) { ++ kobject_put(&fs_devs->fsid_kobj); ++ return error; ++ } ++ ++ return 0; + } + + int btrfs_sysfs_add_mounted(struct btrfs_fs_info *fs_info) diff --git a/queue-5.1/btrfs-sysfs-fix-error-path-kobject-memory-leak.patch b/queue-5.1/btrfs-sysfs-fix-error-path-kobject-memory-leak.patch new file mode 100644 index 00000000000..737fdbaa19f --- /dev/null +++ b/queue-5.1/btrfs-sysfs-fix-error-path-kobject-memory-leak.patch @@ -0,0 +1,42 @@ +From 450ff8348808a89cc27436771aa05c2b90c0eef1 Mon Sep 17 00:00:00 2001 +From: "Tobin C. Harding" +Date: Mon, 13 May 2019 13:39:11 +1000 +Subject: btrfs: sysfs: Fix error path kobject memory leak + +From: Tobin C. Harding + +commit 450ff8348808a89cc27436771aa05c2b90c0eef1 upstream. + +If a call to kobject_init_and_add() fails we must call kobject_put() +otherwise we leak memory. + +Calling kobject_put() when kobject_init_and_add() fails drops the +refcount back to 0 and calls the ktype release method (which in turn +calls the percpu destroy and kfree). + +Add call to kobject_put() in the error path of call to +kobject_init_and_add(). + +Cc: stable@vger.kernel.org # v4.4+ +Reviewed-by: Greg Kroah-Hartman +Signed-off-by: Tobin C. Harding +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/extent-tree.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/fs/btrfs/extent-tree.c ++++ b/fs/btrfs/extent-tree.c +@@ -3981,8 +3981,7 @@ static int create_space_info(struct btrf + info->space_info_kobj, "%s", + alloc_name(space_info->flags)); + if (ret) { +- percpu_counter_destroy(&space_info->total_bytes_pinned); +- kfree(space_info); ++ kobject_put(&space_info->kobj); + return ret; + } + diff --git a/queue-5.1/fbdev-fix-divide-error-in-fb_var_to_videomode.patch b/queue-5.1/fbdev-fix-divide-error-in-fb_var_to_videomode.patch new file mode 100644 index 00000000000..18fd6c0b272 --- /dev/null +++ b/queue-5.1/fbdev-fix-divide-error-in-fb_var_to_videomode.patch @@ -0,0 +1,81 @@ +From cf84807f6dd0be5214378e66460cfc9187f532f9 Mon Sep 17 00:00:00 2001 +From: Shile Zhang +Date: Mon, 1 Apr 2019 17:47:00 +0200 +Subject: fbdev: fix divide error in fb_var_to_videomode + +From: Shile Zhang + +commit cf84807f6dd0be5214378e66460cfc9187f532f9 upstream. + +To fix following divide-by-zero error found by Syzkaller: + + divide error: 0000 [#1] SMP PTI + CPU: 7 PID: 8447 Comm: test Kdump: loaded Not tainted 4.19.24-8.al7.x86_64 #1 + Hardware name: Alibaba Cloud Alibaba Cloud ECS, BIOS rel-1.12.0-0-ga698c8995f-prebuilt.qemu.org 04/01/2014 + RIP: 0010:fb_var_to_videomode+0xae/0xc0 + Code: 04 44 03 46 78 03 4e 7c 44 03 46 68 03 4e 70 89 ce d1 ee 69 c0 e8 03 00 00 f6 c2 01 0f 45 ce 83 e2 02 8d 34 09 0f 45 ce 31 d2 <41> f7 f0 31 d2 f7 f1 89 47 08 f3 c3 66 0f 1f 44 00 00 0f 1f 44 00 + RSP: 0018:ffffb7e189347bf0 EFLAGS: 00010246 + RAX: 00000000e1692410 RBX: ffffb7e189347d60 RCX: 0000000000000000 + RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffb7e189347c10 + RBP: ffff99972a091c00 R08: 0000000000000000 R09: 0000000000000000 + R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000100 + R13: 0000000000010000 R14: 00007ffd66baf6d0 R15: 0000000000000000 + FS: 00007f2054d11740(0000) GS:ffff99972fbc0000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 00007f205481fd20 CR3: 00000004288a0001 CR4: 00000000001606a0 + Call Trace: + fb_set_var+0x257/0x390 + ? lookup_fast+0xbb/0x2b0 + ? fb_open+0xc0/0x140 + ? chrdev_open+0xa6/0x1a0 + do_fb_ioctl+0x445/0x5a0 + do_vfs_ioctl+0x92/0x5f0 + ? __alloc_fd+0x3d/0x160 + ksys_ioctl+0x60/0x90 + __x64_sys_ioctl+0x16/0x20 + do_syscall_64+0x5b/0x190 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 + RIP: 0033:0x7f20548258d7 + Code: 44 00 00 48 8b 05 b9 15 2d 00 64 c7 00 26 00 00 00 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 89 15 2d 00 f7 d8 64 89 01 48 + +It can be triggered easily with following test code: + + #include + #include + #include + int main(void) + { + struct fb_var_screeninfo var = {.activate = 0x100, .pixclock = 60}; + int fd = open("/dev/fb0", O_RDWR); + if (fd < 0) + return 1; + + if (ioctl(fd, FBIOPUT_VSCREENINFO, &var)) + return 1; + + return 0; + } + +Signed-off-by: Shile Zhang +Cc: Fredrik Noring +Cc: Daniel Vetter +Reviewed-by: Mukesh Ojha +Signed-off-by: Bartlomiej Zolnierkiewicz +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/video/fbdev/core/modedb.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/drivers/video/fbdev/core/modedb.c ++++ b/drivers/video/fbdev/core/modedb.c +@@ -935,6 +935,9 @@ void fb_var_to_videomode(struct fb_video + if (var->vmode & FB_VMODE_DOUBLE) + vtotal *= 2; + ++ if (!htotal || !vtotal) ++ return; ++ + hfreq = pixclock/htotal; + mode->refresh = hfreq/vtotal; + } diff --git a/queue-5.1/gfs2-fix-sign-extension-bug-in-gfs2_update_stats.patch b/queue-5.1/gfs2-fix-sign-extension-bug-in-gfs2_update_stats.patch new file mode 100644 index 00000000000..aea0f253905 --- /dev/null +++ b/queue-5.1/gfs2-fix-sign-extension-bug-in-gfs2_update_stats.patch @@ -0,0 +1,49 @@ +From 5a5ec83d6ac974b12085cd99b196795f14079037 Mon Sep 17 00:00:00 2001 +From: Andreas Gruenbacher +Date: Fri, 17 May 2019 19:18:43 +0100 +Subject: gfs2: Fix sign extension bug in gfs2_update_stats + +From: Andreas Gruenbacher + +commit 5a5ec83d6ac974b12085cd99b196795f14079037 upstream. + +Commit 4d207133e9c3 changed the types of the statistic values in struct +gfs2_lkstats from s64 to u64. Because of that, what should be a signed +value in gfs2_update_stats turned into an unsigned value. When shifted +right, we end up with a large positive value instead of a small negative +value, which results in an incorrect variance estimate. + +Fixes: 4d207133e9c3 ("gfs2: Make statistics unsigned, suitable for use with do_div()") +Signed-off-by: Andreas Gruenbacher +Cc: stable@vger.kernel.org # v4.4+ +Signed-off-by: Greg Kroah-Hartman + +--- + fs/gfs2/lock_dlm.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +--- a/fs/gfs2/lock_dlm.c ++++ b/fs/gfs2/lock_dlm.c +@@ -31,9 +31,10 @@ + * @delta is the difference between the current rtt sample and the + * running average srtt. We add 1/8 of that to the srtt in order to + * update the current srtt estimate. The variance estimate is a bit +- * more complicated. We subtract the abs value of the @delta from +- * the current variance estimate and add 1/4 of that to the running +- * total. ++ * more complicated. We subtract the current variance estimate from ++ * the abs value of the @delta and add 1/4 of that to the running ++ * total. That's equivalent to 3/4 of the current variance ++ * estimate plus 1/4 of the abs of @delta. + * + * Note that the index points at the array entry containing the smoothed + * mean value, and the variance is always in the following entry +@@ -49,7 +50,7 @@ static inline void gfs2_update_stats(str + s64 delta = sample - s->stats[index]; + s->stats[index] += (delta >> 3); + index++; +- s->stats[index] += ((abs(delta) - s->stats[index]) >> 2); ++ s->stats[index] += (s64)(abs(delta) - s->stats[index]) >> 2; + } + + /** diff --git a/queue-5.1/series b/queue-5.1/series index ba68d9ec4b0..bbb8e796408 100644 --- a/queue-5.1/series +++ b/queue-5.1/series @@ -16,3 +16,15 @@ kvm-check-irqchip-mode-before-assign-irqfd.patch kvm-svm-avic-fix-off-by-one-in-checking-host-apic-id.patch kvm-nvmx-fix-using-__this_cpu_read-in-preemptible-context.patch libnvdimm-pmem-bypass-config_hardened_usercopy-overhead.patch +arm64-kernel-kaslr-reduce-module-randomization-range-to-2-gb.patch +arm64-kconfig-make-arm64_pseudo_nmi-depend-on-broken-for-now.patch +arm64-iommu-handle-non-remapped-addresses-in-mmap-and-get_sgtable.patch +gfs2-fix-sign-extension-bug-in-gfs2_update_stats.patch +btrfs-don-t-double-unlock-on-error-in-btrfs_punch_hole.patch +btrfs-check-the-compression-level-before-getting-a-workspace.patch +btrfs-do-not-abort-transaction-at-btrfs_update_root-after-failure-to-cow-path.patch +btrfs-avoid-fallback-to-transaction-commit-during-fsync-of-files-with-holes.patch +btrfs-fix-race-between-ranged-fsync-and-writeback-of-adjacent-ranges.patch +btrfs-sysfs-fix-error-path-kobject-memory-leak.patch +btrfs-sysfs-don-t-leak-memory-when-failing-add-fsid.patch +fbdev-fix-divide-error-in-fb_var_to_videomode.patch