--- /dev/null
+From a98d9ae937d256ed679a935fc82d9deaa710d98e Mon Sep 17 00:00:00 2001
+From: Christoph Hellwig <hch@lst.de>
+Date: Tue, 30 Apr 2019 06:51:50 -0400
+Subject: arm64/iommu: handle non-remapped addresses in ->mmap and ->get_sgtable
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit a98d9ae937d256ed679a935fc82d9deaa710d98e upstream.
+
+DMA allocations that can't sleep may return non-remapped addresses, but
+we do not properly handle them in the mmap and get_sgtable methods.
+Resolve non-vmalloc addresses using virt_to_page to handle this corner
+case.
+
+Cc: <stable@vger.kernel.org>
+Acked-by: Catalin Marinas <catalin.marinas@arm.com>
+Reviewed-by: Robin Murphy <robin.murphy@arm.com>
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Will Deacon <will.deacon@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm64/mm/dma-mapping.c | 10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+--- a/arch/arm64/mm/dma-mapping.c
++++ b/arch/arm64/mm/dma-mapping.c
+@@ -249,6 +249,11 @@ static int __iommu_mmap_attrs(struct dev
+ if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
+ return ret;
+
++ if (!is_vmalloc_addr(cpu_addr)) {
++ unsigned long pfn = page_to_pfn(virt_to_page(cpu_addr));
++ return __swiotlb_mmap_pfn(vma, pfn, size);
++ }
++
+ if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
+ /*
+ * DMA_ATTR_FORCE_CONTIGUOUS allocations are always remapped,
+@@ -272,6 +277,11 @@ static int __iommu_get_sgtable(struct de
+ unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+ struct vm_struct *area = find_vm_area(cpu_addr);
+
++ if (!is_vmalloc_addr(cpu_addr)) {
++ struct page *page = virt_to_page(cpu_addr);
++ return __swiotlb_get_sgtable_page(sgt, page, size);
++ }
++
+ if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
+ /*
+ * DMA_ATTR_FORCE_CONTIGUOUS allocations are always remapped,
--- /dev/null
+From 96a13f57b946be7a6c10405e4bd780c0b6b6fe63 Mon Sep 17 00:00:00 2001
+From: Will Deacon <will.deacon@arm.com>
+Date: Fri, 24 May 2019 14:15:34 +0100
+Subject: arm64: Kconfig: Make ARM64_PSEUDO_NMI depend on BROKEN for now
+
+From: Will Deacon <will.deacon@arm.com>
+
+commit 96a13f57b946be7a6c10405e4bd780c0b6b6fe63 upstream.
+
+Although we merged support for pseudo-nmi using interrupt priority
+masking in 5.1, we've since uncovered a number of non-trivial issues
+with the implementation. Although there are patches pending to address
+these problems, we're facing issues that prevent us from merging them at
+this current time:
+
+ https://lkml.kernel.org/r/1556553607-46531-1-git-send-email-julien.thierry@arm.com
+
+For now, simply mark this optional feature as BROKEN in the hope that we
+can fix things properly in the near future.
+
+Cc: <stable@vger.kernel.org> # 5.1
+Cc: Julien Thierry <julien.thierry@arm.com>
+Acked-by: Marc Zyngier <marc.zyngier@arm.com>
+Signed-off-by: Will Deacon <will.deacon@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm64/Kconfig | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/arm64/Kconfig
++++ b/arch/arm64/Kconfig
+@@ -1347,6 +1347,7 @@ config ARM64_MODULE_PLTS
+
+ config ARM64_PSEUDO_NMI
+ bool "Support for NMI-like interrupts"
++ depends on BROKEN # 1556553607-46531-1-git-send-email-julien.thierry@arm.com
+ select CONFIG_ARM_GIC_V3
+ help
+ Adds support for mimicking Non-Maskable Interrupts through the use of
--- /dev/null
+From b2eed9b58811283d00fa861944cb75797d4e52a7 Mon Sep 17 00:00:00 2001
+From: Ard Biesheuvel <ard.biesheuvel@arm.com>
+Date: Thu, 23 May 2019 10:17:37 +0100
+Subject: arm64/kernel: kaslr: reduce module randomization range to 2 GB
+
+From: Ard Biesheuvel <ard.biesheuvel@arm.com>
+
+commit b2eed9b58811283d00fa861944cb75797d4e52a7 upstream.
+
+The following commit
+
+ 7290d5809571 ("module: use relative references for __ksymtab entries")
+
+updated the ksymtab handling of some KASLR capable architectures
+so that ksymtab entries are emitted as pairs of 32-bit relative
+references. This reduces the size of the entries, but more
+importantly, it gets rid of statically assigned absolute
+addresses, which require fixing up at boot time if the kernel
+is self relocating (which takes a 24 byte RELA entry for each
+member of the ksymtab struct).
+
+Since ksymtab entries are always part of the same module as the
+symbol they export, it was assumed at the time that a 32-bit
+relative reference is always sufficient to capture the offset
+between a ksymtab entry and its target symbol.
+
+Unfortunately, this is not always true: in the case of per-CPU
+variables, a per-CPU variable's base address (which usually differs
+from the actual address of any of its per-CPU copies) is allocated
+in the vicinity of the ..data.percpu section in the core kernel
+(i.e., in the per-CPU reserved region which follows the section
+containing the core kernel's statically allocated per-CPU variables).
+
+Since we randomize the module space over a 4 GB window covering
+the core kernel (based on the -/+ 4 GB range of an ADRP/ADD pair),
+we may end up putting the core kernel out of the -/+ 2 GB range of
+32-bit relative references of module ksymtab entries that refer to
+per-CPU variables.
+
+So reduce the module randomization range a bit further. We lose
+1 bit of randomization this way, but this is something we can
+tolerate.
+
+Cc: <stable@vger.kernel.org> # v4.19+
+Signed-off-by: Ard Biesheuvel <ard.biesheuvel@arm.com>
+Signed-off-by: Will Deacon <will.deacon@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm64/kernel/kaslr.c | 6 +++---
+ arch/arm64/kernel/module.c | 2 +-
+ 2 files changed, 4 insertions(+), 4 deletions(-)
+
+--- a/arch/arm64/kernel/kaslr.c
++++ b/arch/arm64/kernel/kaslr.c
+@@ -145,15 +145,15 @@ u64 __init kaslr_early_init(u64 dt_phys)
+
+ if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) {
+ /*
+- * Randomize the module region over a 4 GB window covering the
++ * Randomize the module region over a 2 GB window covering the
+ * kernel. This reduces the risk of modules leaking information
+ * about the address of the kernel itself, but results in
+ * branches between modules and the core kernel that are
+ * resolved via PLTs. (Branches between modules will be
+ * resolved normally.)
+ */
+- module_range = SZ_4G - (u64)(_end - _stext);
+- module_alloc_base = max((u64)_end + offset - SZ_4G,
++ module_range = SZ_2G - (u64)(_end - _stext);
++ module_alloc_base = max((u64)_end + offset - SZ_2G,
+ (u64)MODULES_VADDR);
+ } else {
+ /*
+--- a/arch/arm64/kernel/module.c
++++ b/arch/arm64/kernel/module.c
+@@ -56,7 +56,7 @@ void *module_alloc(unsigned long size)
+ * can simply omit this fallback in that case.
+ */
+ p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base,
+- module_alloc_base + SZ_4G, GFP_KERNEL,
++ module_alloc_base + SZ_2G, GFP_KERNEL,
+ PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
+ __builtin_return_address(0));
+
--- /dev/null
+From ebb929060aeb162417b4c1307e63daee47b208d9 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Mon, 6 May 2019 16:43:51 +0100
+Subject: Btrfs: avoid fallback to transaction commit during fsync of files with holes
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit ebb929060aeb162417b4c1307e63daee47b208d9 upstream.
+
+When we are doing a full fsync (bit BTRFS_INODE_NEEDS_FULL_SYNC set) of a
+file that has holes and has file extent items spanning two or more leafs,
+we can end up falling to back to a full transaction commit due to a logic
+bug that leads to failure to insert a duplicate file extent item that is
+meant to represent a hole between the last file extent item of a leaf and
+the first file extent item in the next leaf. The failure (EEXIST error)
+leads to a transaction commit (as most errors when logging an inode do).
+
+For example, we have the two following leafs:
+
+Leaf N:
+
+ -----------------------------------------------
+ | ..., ..., ..., (257, FILE_EXTENT_ITEM, 64K) |
+ -----------------------------------------------
+ The file extent item at the end of leaf N has a length of 4Kb,
+ representing the file range from 64K to 68K - 1.
+
+Leaf N + 1:
+
+ -----------------------------------------------
+ | (257, FILE_EXTENT_ITEM, 72K), ..., ..., ... |
+ -----------------------------------------------
+ The file extent item at the first slot of leaf N + 1 has a length of
+ 4Kb too, representing the file range from 72K to 76K - 1.
+
+During the full fsync path, when we are at tree-log.c:copy_items() with
+leaf N as a parameter, after processing the last file extent item, that
+represents the extent at offset 64K, we take a look at the first file
+extent item at the next leaf (leaf N + 1), and notice there's a 4K hole
+between the two extents, and therefore we insert a file extent item
+representing that hole, starting at file offset 68K and ending at offset
+72K - 1. However we don't update the value of *last_extent, which is used
+to represent the end offset (plus 1, non-inclusive end) of the last file
+extent item inserted in the log, so it stays with a value of 68K and not
+with a value of 72K.
+
+Then, when copy_items() is called for leaf N + 1, because the value of
+*last_extent is smaller then the offset of the first extent item in the
+leaf (68K < 72K), we look at the last file extent item in the previous
+leaf (leaf N) and see it there's a 4K gap between it and our first file
+extent item (again, 68K < 72K), so we decide to insert a file extent item
+representing the hole, starting at file offset 68K and ending at offset
+72K - 1, this insertion will fail with -EEXIST being returned from
+btrfs_insert_file_extent() because we already inserted a file extent item
+representing a hole for this offset (68K) in the previous call to
+copy_items(), when processing leaf N.
+
+The -EEXIST error gets propagated to the fsync callback, btrfs_sync_file(),
+which falls back to a full transaction commit.
+
+Fix this by adjusting *last_extent after inserting a hole when we had to
+look at the next leaf.
+
+Fixes: 4ee3fad34a9c ("Btrfs: fix fsync after hole punching when using no-holes feature")
+Cc: stable@vger.kernel.org # 4.14+
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/tree-log.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -4169,6 +4169,7 @@ fill_holes:
+ *last_extent, 0,
+ 0, len, 0, len,
+ 0, 0, 0);
++ *last_extent += len;
+ }
+ }
+ }
--- /dev/null
+From 2b90883c561ddcc641741c2e4df1f702a4f2acb8 Mon Sep 17 00:00:00 2001
+From: Johnny Chang <johnnyc@synology.com>
+Date: Fri, 26 Apr 2019 11:01:05 +0800
+Subject: btrfs: Check the compression level before getting a workspace
+
+From: Johnny Chang <johnnyc@synology.com>
+
+commit 2b90883c561ddcc641741c2e4df1f702a4f2acb8 upstream.
+
+When a file's compression property is set as zlib or zstd but leave
+the compression mount option not be set, that means btrfs will try
+to compress the file with default compression level. But in
+btrfs_compress_pages(), it calls get_workspace() with level = 0.
+This will return a workspace with a wrong compression level.
+For zlib, the compression level in the workspace will be 0
+(that means "store only"). And for zstd, the compression in the
+workspace will be 1, not the default level 3.
+
+How to reproduce:
+ mkfs -t btrfs /dev/sdb
+ mount /dev/sdb /mnt/
+ mkdir /mnt/zlib
+ btrfs property set /mnt/zlib/ compression zlib
+ dd if=/dev/zero of=/mnt/zlib/compression-friendly-file-10M bs=1M count=10
+ sync
+ btrfs-debugfs -f /mnt/zlib/compression-friendly-file-10M
+
+btrfs-debugfs output:
+* before:
+ ...
+ (258 9961472): ram 524288 disk 1106247680 disk_size 524288
+ file: ... extents 20 disk size 10485760 logical size 10485760 ratio 1.00
+
+* after:
+ ...
+ (258 10354688): ram 131072 disk 14217216 disk_size 4096
+ file: ... extents 80 disk size 327680 logical size 10485760 ratio 32.00
+
+The steps for zstd are similar, but need to put a debugging message to
+show the level of the return workspace in zstd_get_workspace().
+
+This commit adds a check of the compression level before getting a
+workspace by set_level().
+
+CC: stable@vger.kernel.org # 5.1+
+Signed-off-by: Johnny Chang <johnnyc@synology.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/compression.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/btrfs/compression.c
++++ b/fs/btrfs/compression.c
+@@ -1009,6 +1009,7 @@ int btrfs_compress_pages(unsigned int ty
+ struct list_head *workspace;
+ int ret;
+
++ level = btrfs_compress_op[type]->set_level(level);
+ workspace = get_workspace(type, level);
+ ret = btrfs_compress_op[type]->compress_pages(workspace, mapping,
+ start, pages,
--- /dev/null
+From 72bd2323ec87722c115a5906bc6a1b31d11e8f54 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Mon, 29 Apr 2019 13:08:14 +0100
+Subject: Btrfs: do not abort transaction at btrfs_update_root() after failure to COW path
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 72bd2323ec87722c115a5906bc6a1b31d11e8f54 upstream.
+
+Currently when we fail to COW a path at btrfs_update_root() we end up
+always aborting the transaction. However all the current callers of
+btrfs_update_root() are able to deal with errors returned from it, many do
+end up aborting the transaction themselves (directly or not, such as the
+transaction commit path), other BUG_ON() or just gracefully cancel whatever
+they were doing.
+
+When syncing the fsync log, we call btrfs_update_root() through
+tree-log.c:update_log_root(), and if it returns an -ENOSPC error, the log
+sync code does not abort the transaction, instead it gracefully handles
+the error and returns -EAGAIN to the fsync handler, so that it falls back
+to a transaction commit. Any other error different from -ENOSPC, makes the
+log sync code abort the transaction.
+
+So remove the transaction abort from btrfs_update_log() when we fail to
+COW a path to update the root item, so that if an -ENOSPC failure happens
+we avoid aborting the current transaction and have a chance of the fsync
+succeeding after falling back to a transaction commit.
+
+Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=203413
+Fixes: 79787eaab46121 ("btrfs: replace many BUG_ONs with proper error handling")
+Cc: stable@vger.kernel.org # 4.4+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: Anand Jain <anand.jain@oracle.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/root-tree.c | 4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+--- a/fs/btrfs/root-tree.c
++++ b/fs/btrfs/root-tree.c
+@@ -132,10 +132,8 @@ int btrfs_update_root(struct btrfs_trans
+ return -ENOMEM;
+
+ ret = btrfs_search_slot(trans, root, key, path, 0, 1);
+- if (ret < 0) {
+- btrfs_abort_transaction(trans, ret);
++ if (ret < 0)
+ goto out;
+- }
+
+ if (ret != 0) {
+ btrfs_print_leaf(path->nodes[0]);
--- /dev/null
+From 8fca955057b9c58467d1b231e43f19c4cf26ae8c Mon Sep 17 00:00:00 2001
+From: Josef Bacik <josef@toxicpanda.com>
+Date: Fri, 3 May 2019 11:10:06 -0400
+Subject: btrfs: don't double unlock on error in btrfs_punch_hole
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+commit 8fca955057b9c58467d1b231e43f19c4cf26ae8c upstream.
+
+If we have an error writing out a delalloc range in
+btrfs_punch_hole_lock_range we'll unlock the inode and then goto
+out_only_mutex, where we will again unlock the inode. This is bad,
+don't do this.
+
+Fixes: f27451f22996 ("Btrfs: add support for fallocate's zero range operation")
+CC: stable@vger.kernel.org # 4.19+
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/file.c | 4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+--- a/fs/btrfs/file.c
++++ b/fs/btrfs/file.c
+@@ -2546,10 +2546,8 @@ static int btrfs_punch_hole(struct inode
+
+ ret = btrfs_punch_hole_lock_range(inode, lockstart, lockend,
+ &cached_state);
+- if (ret) {
+- inode_unlock(inode);
++ if (ret)
+ goto out_only_mutex;
+- }
+
+ path = btrfs_alloc_path();
+ if (!path) {
--- /dev/null
+From 0c713cbab6200b0ab6473b50435e450a6e1de85d Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Mon, 6 May 2019 16:44:02 +0100
+Subject: Btrfs: fix race between ranged fsync and writeback of adjacent ranges
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 0c713cbab6200b0ab6473b50435e450a6e1de85d upstream.
+
+When we do a full fsync (the bit BTRFS_INODE_NEEDS_FULL_SYNC is set in the
+inode) that happens to be ranged, which happens during a msync() or writes
+for files opened with O_SYNC for example, we can end up with a corrupt log,
+due to different file extent items representing ranges that overlap with
+each other, or hit some assertion failures.
+
+When doing a ranged fsync we only flush delalloc and wait for ordered
+exents within that range. If while we are logging items from our inode
+ordered extents for adjacent ranges complete, we end up in a race that can
+make us insert the file extent items that overlap with others we logged
+previously and the assertion failures.
+
+For example, if tree-log.c:copy_items() receives a leaf that has the
+following file extents items, all with a length of 4K and therefore there
+is an implicit hole in the range 68K to 72K - 1:
+
+ (257 EXTENT_ITEM 64K), (257 EXTENT_ITEM 72K), (257 EXTENT_ITEM 76K), ...
+
+It copies them to the log tree. However due to the need to detect implicit
+holes, it may release the path, in order to look at the previous leaf to
+detect an implicit hole, and then later it will search again in the tree
+for the first file extent item key, with the goal of locking again the
+leaf (which might have changed due to concurrent changes to other inodes).
+
+However when it locks again the leaf containing the first key, the key
+corresponding to the extent at offset 72K may not be there anymore since
+there is an ordered extent for that range that is finishing (that is,
+somewhere in the middle of btrfs_finish_ordered_io()), and it just
+removed the file extent item but has not yet replaced it with a new file
+extent item, so the part of copy_items() that does hole detection will
+decide that there is a hole in the range starting from 68K to 76K - 1,
+and therefore insert a file extent item to represent that hole, having
+a key offset of 68K. After that we now have a log tree with 2 different
+extent items that have overlapping ranges:
+
+ 1) The file extent item copied before copy_items() released the path,
+ which has a key offset of 72K and a length of 4K, representing the
+ file range 72K to 76K - 1.
+
+ 2) And a file extent item representing a hole that has a key offset of
+ 68K and a length of 8K, representing the range 68K to 76K - 1. This
+ item was inserted after releasing the path, and overlaps with the
+ extent item inserted before.
+
+The overlapping extent items can cause all sorts of unpredictable and
+incorrect behaviour, either when replayed or if a fast (non full) fsync
+happens later, which can trigger a BUG_ON() when calling
+btrfs_set_item_key_safe() through __btrfs_drop_extents(), producing a
+trace like the following:
+
+ [61666.783269] ------------[ cut here ]------------
+ [61666.783943] kernel BUG at fs/btrfs/ctree.c:3182!
+ [61666.784644] invalid opcode: 0000 [#1] PREEMPT SMP
+ (...)
+ [61666.786253] task: ffff880117b88c40 task.stack: ffffc90008168000
+ [61666.786253] RIP: 0010:btrfs_set_item_key_safe+0x7c/0xd2 [btrfs]
+ [61666.786253] RSP: 0018:ffffc9000816b958 EFLAGS: 00010246
+ [61666.786253] RAX: 0000000000000000 RBX: 000000000000000f RCX: 0000000000030000
+ [61666.786253] RDX: 0000000000000000 RSI: ffffc9000816ba4f RDI: ffffc9000816b937
+ [61666.786253] RBP: ffffc9000816b998 R08: ffff88011dae2428 R09: 0000000000001000
+ [61666.786253] R10: 0000160000000000 R11: 6db6db6db6db6db7 R12: ffff88011dae2418
+ [61666.786253] R13: ffffc9000816ba4f R14: ffff8801e10c4118 R15: ffff8801e715c000
+ [61666.786253] FS: 00007f6060a18700(0000) GS:ffff88023f5c0000(0000) knlGS:0000000000000000
+ [61666.786253] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ [61666.786253] CR2: 00007f6060a28000 CR3: 0000000213e69000 CR4: 00000000000006e0
+ [61666.786253] Call Trace:
+ [61666.786253] __btrfs_drop_extents+0x5e3/0xaad [btrfs]
+ [61666.786253] ? time_hardirqs_on+0x9/0x14
+ [61666.786253] btrfs_log_changed_extents+0x294/0x4e0 [btrfs]
+ [61666.786253] ? release_extent_buffer+0x38/0xb4 [btrfs]
+ [61666.786253] btrfs_log_inode+0xb6e/0xcdc [btrfs]
+ [61666.786253] ? lock_acquire+0x131/0x1c5
+ [61666.786253] ? btrfs_log_inode_parent+0xee/0x659 [btrfs]
+ [61666.786253] ? arch_local_irq_save+0x9/0xc
+ [61666.786253] ? btrfs_log_inode_parent+0x1f5/0x659 [btrfs]
+ [61666.786253] btrfs_log_inode_parent+0x223/0x659 [btrfs]
+ [61666.786253] ? arch_local_irq_save+0x9/0xc
+ [61666.786253] ? lockref_get_not_zero+0x2c/0x34
+ [61666.786253] ? rcu_read_unlock+0x3e/0x5d
+ [61666.786253] btrfs_log_dentry_safe+0x60/0x7b [btrfs]
+ [61666.786253] btrfs_sync_file+0x317/0x42c [btrfs]
+ [61666.786253] vfs_fsync_range+0x8c/0x9e
+ [61666.786253] SyS_msync+0x13c/0x1c9
+ [61666.786253] entry_SYSCALL_64_fastpath+0x18/0xad
+
+A sample of a corrupt log tree leaf with overlapping extents I got from
+running btrfs/072:
+
+ item 14 key (295 108 200704) itemoff 2599 itemsize 53
+ extent data disk bytenr 0 nr 0
+ extent data offset 0 nr 458752 ram 458752
+ item 15 key (295 108 659456) itemoff 2546 itemsize 53
+ extent data disk bytenr 4343541760 nr 770048
+ extent data offset 606208 nr 163840 ram 770048
+ item 16 key (295 108 663552) itemoff 2493 itemsize 53
+ extent data disk bytenr 4343541760 nr 770048
+ extent data offset 610304 nr 155648 ram 770048
+ item 17 key (295 108 819200) itemoff 2440 itemsize 53
+ extent data disk bytenr 4334788608 nr 4096
+ extent data offset 0 nr 4096 ram 4096
+
+The file extent item at offset 659456 (item 15) ends at offset 823296
+(659456 + 163840) while the next file extent item (item 16) starts at
+offset 663552.
+
+Another different problem that the race can trigger is a failure in the
+assertions at tree-log.c:copy_items(), which expect that the first file
+extent item key we found before releasing the path exists after we have
+released path and that the last key we found before releasing the path
+also exists after releasing the path:
+
+ $ cat -n fs/btrfs/tree-log.c
+ 4080 if (need_find_last_extent) {
+ 4081 /* btrfs_prev_leaf could return 1 without releasing the path */
+ 4082 btrfs_release_path(src_path);
+ 4083 ret = btrfs_search_slot(NULL, inode->root, &first_key,
+ 4084 src_path, 0, 0);
+ 4085 if (ret < 0)
+ 4086 return ret;
+ 4087 ASSERT(ret == 0);
+ (...)
+ 4103 if (i >= btrfs_header_nritems(src_path->nodes[0])) {
+ 4104 ret = btrfs_next_leaf(inode->root, src_path);
+ 4105 if (ret < 0)
+ 4106 return ret;
+ 4107 ASSERT(ret == 0);
+ 4108 src = src_path->nodes[0];
+ 4109 i = 0;
+ 4110 need_find_last_extent = true;
+ 4111 }
+ (...)
+
+The second assertion implicitly expects that the last key before the path
+release still exists, because the surrounding while loop only stops after
+we have found that key. When this assertion fails it produces a stack like
+this:
+
+ [139590.037075] assertion failed: ret == 0, file: fs/btrfs/tree-log.c, line: 4107
+ [139590.037406] ------------[ cut here ]------------
+ [139590.037707] kernel BUG at fs/btrfs/ctree.h:3546!
+ [139590.038034] invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC PTI
+ [139590.038340] CPU: 1 PID: 31841 Comm: fsstress Tainted: G W 5.0.0-btrfs-next-46 #1
+ (...)
+ [139590.039354] RIP: 0010:assfail.constprop.24+0x18/0x1a [btrfs]
+ (...)
+ [139590.040397] RSP: 0018:ffffa27f48f2b9b0 EFLAGS: 00010282
+ [139590.040730] RAX: 0000000000000041 RBX: ffff897c635d92c8 RCX: 0000000000000000
+ [139590.041105] RDX: 0000000000000000 RSI: ffff897d36a96868 RDI: ffff897d36a96868
+ [139590.041470] RBP: ffff897d1b9a0708 R08: 0000000000000000 R09: 0000000000000000
+ [139590.041815] R10: 0000000000000008 R11: 0000000000000000 R12: 0000000000000013
+ [139590.042159] R13: 0000000000000227 R14: ffff897cffcbba88 R15: 0000000000000001
+ [139590.042501] FS: 00007f2efc8dee80(0000) GS:ffff897d36a80000(0000) knlGS:0000000000000000
+ [139590.042847] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ [139590.043199] CR2: 00007f8c064935e0 CR3: 0000000232252002 CR4: 00000000003606e0
+ [139590.043547] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+ [139590.043899] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+ [139590.044250] Call Trace:
+ [139590.044631] copy_items+0xa3f/0x1000 [btrfs]
+ [139590.045009] ? generic_bin_search.constprop.32+0x61/0x200 [btrfs]
+ [139590.045396] btrfs_log_inode+0x7b3/0xd70 [btrfs]
+ [139590.045773] btrfs_log_inode_parent+0x2b3/0xce0 [btrfs]
+ [139590.046143] ? do_raw_spin_unlock+0x49/0xc0
+ [139590.046510] btrfs_log_dentry_safe+0x4a/0x70 [btrfs]
+ [139590.046872] btrfs_sync_file+0x3b6/0x440 [btrfs]
+ [139590.047243] btrfs_file_write_iter+0x45b/0x5c0 [btrfs]
+ [139590.047592] __vfs_write+0x129/0x1c0
+ [139590.047932] vfs_write+0xc2/0x1b0
+ [139590.048270] ksys_write+0x55/0xc0
+ [139590.048608] do_syscall_64+0x60/0x1b0
+ [139590.048946] entry_SYSCALL_64_after_hwframe+0x49/0xbe
+ [139590.049287] RIP: 0033:0x7f2efc4be190
+ (...)
+ [139590.050342] RSP: 002b:00007ffe743243a8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
+ [139590.050701] RAX: ffffffffffffffda RBX: 0000000000008d58 RCX: 00007f2efc4be190
+ [139590.051067] RDX: 0000000000008d58 RSI: 00005567eca0f370 RDI: 0000000000000003
+ [139590.051459] RBP: 0000000000000024 R08: 0000000000000003 R09: 0000000000008d60
+ [139590.051863] R10: 0000000000000078 R11: 0000000000000246 R12: 0000000000000003
+ [139590.052252] R13: 00000000003d3507 R14: 00005567eca0f370 R15: 0000000000000000
+ (...)
+ [139590.055128] ---[ end trace 193f35d0215cdeeb ]---
+
+So fix this race between a full ranged fsync and writeback of adjacent
+ranges by flushing all delalloc and waiting for all ordered extents to
+complete before logging the inode. This is the simplest way to solve the
+problem because currently the full fsync path does not deal with ranges
+at all (it assumes a full range from 0 to LLONG_MAX) and it always needs
+to look at adjacent ranges for hole detection. For use cases of ranged
+fsyncs this can make a few fsyncs slower but on the other hand it can
+make some following fsyncs to other ranges do less work or no need to do
+anything at all. A full fsync is rare anyway and happens only once after
+loading/creating an inode and once after less common operations such as a
+shrinking truncate.
+
+This is an issue that exists for a long time, and was often triggered by
+generic/127, because it does mmap'ed writes and msync (which triggers a
+ranged fsync). Adding support for the tree checker to detect overlapping
+extents (next patch in the series) and trigger a WARN() when such cases
+are found, and then calling btrfs_check_leaf_full() at the end of
+btrfs_insert_file_extent() made the issue much easier to detect. Running
+btrfs/072 with that change to the tree checker and making fsstress open
+files always with O_SYNC made it much easier to trigger the issue (as
+triggering it with generic/127 is very rare).
+
+CC: stable@vger.kernel.org # 3.16+
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/file.c | 12 ++++++++++++
+ 1 file changed, 12 insertions(+)
+
+--- a/fs/btrfs/file.c
++++ b/fs/btrfs/file.c
+@@ -2059,6 +2059,18 @@ int btrfs_sync_file(struct file *file, l
+ u64 len;
+
+ /*
++ * If the inode needs a full sync, make sure we use a full range to
++ * avoid log tree corruption, due to hole detection racing with ordered
++ * extent completion for adjacent ranges, and assertion failures during
++ * hole detection.
++ */
++ if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
++ &BTRFS_I(inode)->runtime_flags)) {
++ start = 0;
++ end = LLONG_MAX;
++ }
++
++ /*
+ * The range length can be represented by u64, we have to do the typecasts
+ * to avoid signed overflow if it's [0, LLONG_MAX] eg. from fsync()
+ */
--- /dev/null
+From e32773357d5cc271b1d23550b3ed026eb5c2a468 Mon Sep 17 00:00:00 2001
+From: "Tobin C. Harding" <tobin@kernel.org>
+Date: Mon, 13 May 2019 13:39:12 +1000
+Subject: btrfs: sysfs: don't leak memory when failing add fsid
+
+From: Tobin C. Harding <tobin@kernel.org>
+
+commit e32773357d5cc271b1d23550b3ed026eb5c2a468 upstream.
+
+A failed call to kobject_init_and_add() must be followed by a call to
+kobject_put(). Currently in the error path when adding fs_devices we
+are missing this call. This could be fixed by calling
+btrfs_sysfs_remove_fsid() if btrfs_sysfs_add_fsid() returns an error or
+by adding a call to kobject_put() directly in btrfs_sysfs_add_fsid().
+Here we choose the second option because it prevents the slightly
+unusual error path handling requirements of kobject from leaking out
+into btrfs functions.
+
+Add a call to kobject_put() in the error path of kobject_add_and_init().
+This causes the release method to be called if kobject_init_and_add()
+fails. open_tree() is the function that calls btrfs_sysfs_add_fsid()
+and the error code in this function is already written with the
+assumption that the release method is called during the error path of
+open_tree() (as seen by the call to btrfs_sysfs_remove_fsid() under the
+fail_fsdev_sysfs label).
+
+Cc: stable@vger.kernel.org # v4.4+
+Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Tobin C. Harding <tobin@kernel.org>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/sysfs.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/sysfs.c
++++ b/fs/btrfs/sysfs.c
+@@ -825,7 +825,12 @@ int btrfs_sysfs_add_fsid(struct btrfs_fs
+ fs_devs->fsid_kobj.kset = btrfs_kset;
+ error = kobject_init_and_add(&fs_devs->fsid_kobj,
+ &btrfs_ktype, parent, "%pU", fs_devs->fsid);
+- return error;
++ if (error) {
++ kobject_put(&fs_devs->fsid_kobj);
++ return error;
++ }
++
++ return 0;
+ }
+
+ int btrfs_sysfs_add_mounted(struct btrfs_fs_info *fs_info)
--- /dev/null
+From 450ff8348808a89cc27436771aa05c2b90c0eef1 Mon Sep 17 00:00:00 2001
+From: "Tobin C. Harding" <tobin@kernel.org>
+Date: Mon, 13 May 2019 13:39:11 +1000
+Subject: btrfs: sysfs: Fix error path kobject memory leak
+
+From: Tobin C. Harding <tobin@kernel.org>
+
+commit 450ff8348808a89cc27436771aa05c2b90c0eef1 upstream.
+
+If a call to kobject_init_and_add() fails we must call kobject_put()
+otherwise we leak memory.
+
+Calling kobject_put() when kobject_init_and_add() fails drops the
+refcount back to 0 and calls the ktype release method (which in turn
+calls the percpu destroy and kfree).
+
+Add call to kobject_put() in the error path of call to
+kobject_init_and_add().
+
+Cc: stable@vger.kernel.org # v4.4+
+Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Tobin C. Harding <tobin@kernel.org>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/extent-tree.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -3981,8 +3981,7 @@ static int create_space_info(struct btrf
+ info->space_info_kobj, "%s",
+ alloc_name(space_info->flags));
+ if (ret) {
+- percpu_counter_destroy(&space_info->total_bytes_pinned);
+- kfree(space_info);
++ kobject_put(&space_info->kobj);
+ return ret;
+ }
+
--- /dev/null
+From cf84807f6dd0be5214378e66460cfc9187f532f9 Mon Sep 17 00:00:00 2001
+From: Shile Zhang <shile.zhang@linux.alibaba.com>
+Date: Mon, 1 Apr 2019 17:47:00 +0200
+Subject: fbdev: fix divide error in fb_var_to_videomode
+
+From: Shile Zhang <shile.zhang@linux.alibaba.com>
+
+commit cf84807f6dd0be5214378e66460cfc9187f532f9 upstream.
+
+To fix following divide-by-zero error found by Syzkaller:
+
+ divide error: 0000 [#1] SMP PTI
+ CPU: 7 PID: 8447 Comm: test Kdump: loaded Not tainted 4.19.24-8.al7.x86_64 #1
+ Hardware name: Alibaba Cloud Alibaba Cloud ECS, BIOS rel-1.12.0-0-ga698c8995f-prebuilt.qemu.org 04/01/2014
+ RIP: 0010:fb_var_to_videomode+0xae/0xc0
+ Code: 04 44 03 46 78 03 4e 7c 44 03 46 68 03 4e 70 89 ce d1 ee 69 c0 e8 03 00 00 f6 c2 01 0f 45 ce 83 e2 02 8d 34 09 0f 45 ce 31 d2 <41> f7 f0 31 d2 f7 f1 89 47 08 f3 c3 66 0f 1f 44 00 00 0f 1f 44 00
+ RSP: 0018:ffffb7e189347bf0 EFLAGS: 00010246
+ RAX: 00000000e1692410 RBX: ffffb7e189347d60 RCX: 0000000000000000
+ RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffb7e189347c10
+ RBP: ffff99972a091c00 R08: 0000000000000000 R09: 0000000000000000
+ R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000100
+ R13: 0000000000010000 R14: 00007ffd66baf6d0 R15: 0000000000000000
+ FS: 00007f2054d11740(0000) GS:ffff99972fbc0000(0000) knlGS:0000000000000000
+ CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ CR2: 00007f205481fd20 CR3: 00000004288a0001 CR4: 00000000001606a0
+ Call Trace:
+ fb_set_var+0x257/0x390
+ ? lookup_fast+0xbb/0x2b0
+ ? fb_open+0xc0/0x140
+ ? chrdev_open+0xa6/0x1a0
+ do_fb_ioctl+0x445/0x5a0
+ do_vfs_ioctl+0x92/0x5f0
+ ? __alloc_fd+0x3d/0x160
+ ksys_ioctl+0x60/0x90
+ __x64_sys_ioctl+0x16/0x20
+ do_syscall_64+0x5b/0x190
+ entry_SYSCALL_64_after_hwframe+0x44/0xa9
+ RIP: 0033:0x7f20548258d7
+ Code: 44 00 00 48 8b 05 b9 15 2d 00 64 c7 00 26 00 00 00 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 89 15 2d 00 f7 d8 64 89 01 48
+
+It can be triggered easily with following test code:
+
+ #include <linux/fb.h>
+ #include <fcntl.h>
+ #include <sys/ioctl.h>
+ int main(void)
+ {
+ struct fb_var_screeninfo var = {.activate = 0x100, .pixclock = 60};
+ int fd = open("/dev/fb0", O_RDWR);
+ if (fd < 0)
+ return 1;
+
+ if (ioctl(fd, FBIOPUT_VSCREENINFO, &var))
+ return 1;
+
+ return 0;
+ }
+
+Signed-off-by: Shile Zhang <shile.zhang@linux.alibaba.com>
+Cc: Fredrik Noring <noring@nocrew.org>
+Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
+Reviewed-by: Mukesh Ojha <mojha@codeaurora.org>
+Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/video/fbdev/core/modedb.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/drivers/video/fbdev/core/modedb.c
++++ b/drivers/video/fbdev/core/modedb.c
+@@ -935,6 +935,9 @@ void fb_var_to_videomode(struct fb_video
+ if (var->vmode & FB_VMODE_DOUBLE)
+ vtotal *= 2;
+
++ if (!htotal || !vtotal)
++ return;
++
+ hfreq = pixclock/htotal;
+ mode->refresh = hfreq/vtotal;
+ }
--- /dev/null
+From 5a5ec83d6ac974b12085cd99b196795f14079037 Mon Sep 17 00:00:00 2001
+From: Andreas Gruenbacher <agruenba@redhat.com>
+Date: Fri, 17 May 2019 19:18:43 +0100
+Subject: gfs2: Fix sign extension bug in gfs2_update_stats
+
+From: Andreas Gruenbacher <agruenba@redhat.com>
+
+commit 5a5ec83d6ac974b12085cd99b196795f14079037 upstream.
+
+Commit 4d207133e9c3 changed the types of the statistic values in struct
+gfs2_lkstats from s64 to u64. Because of that, what should be a signed
+value in gfs2_update_stats turned into an unsigned value. When shifted
+right, we end up with a large positive value instead of a small negative
+value, which results in an incorrect variance estimate.
+
+Fixes: 4d207133e9c3 ("gfs2: Make statistics unsigned, suitable for use with do_div()")
+Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
+Cc: stable@vger.kernel.org # v4.4+
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/gfs2/lock_dlm.c | 9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+--- a/fs/gfs2/lock_dlm.c
++++ b/fs/gfs2/lock_dlm.c
+@@ -31,9 +31,10 @@
+ * @delta is the difference between the current rtt sample and the
+ * running average srtt. We add 1/8 of that to the srtt in order to
+ * update the current srtt estimate. The variance estimate is a bit
+- * more complicated. We subtract the abs value of the @delta from
+- * the current variance estimate and add 1/4 of that to the running
+- * total.
++ * more complicated. We subtract the current variance estimate from
++ * the abs value of the @delta and add 1/4 of that to the running
++ * total. That's equivalent to 3/4 of the current variance
++ * estimate plus 1/4 of the abs of @delta.
+ *
+ * Note that the index points at the array entry containing the smoothed
+ * mean value, and the variance is always in the following entry
+@@ -49,7 +50,7 @@ static inline void gfs2_update_stats(str
+ s64 delta = sample - s->stats[index];
+ s->stats[index] += (delta >> 3);
+ index++;
+- s->stats[index] += ((abs(delta) - s->stats[index]) >> 2);
++ s->stats[index] += (s64)(abs(delta) - s->stats[index]) >> 2;
+ }
+
+ /**
kvm-svm-avic-fix-off-by-one-in-checking-host-apic-id.patch
kvm-nvmx-fix-using-__this_cpu_read-in-preemptible-context.patch
libnvdimm-pmem-bypass-config_hardened_usercopy-overhead.patch
+arm64-kernel-kaslr-reduce-module-randomization-range-to-2-gb.patch
+arm64-kconfig-make-arm64_pseudo_nmi-depend-on-broken-for-now.patch
+arm64-iommu-handle-non-remapped-addresses-in-mmap-and-get_sgtable.patch
+gfs2-fix-sign-extension-bug-in-gfs2_update_stats.patch
+btrfs-don-t-double-unlock-on-error-in-btrfs_punch_hole.patch
+btrfs-check-the-compression-level-before-getting-a-workspace.patch
+btrfs-do-not-abort-transaction-at-btrfs_update_root-after-failure-to-cow-path.patch
+btrfs-avoid-fallback-to-transaction-commit-during-fsync-of-files-with-holes.patch
+btrfs-fix-race-between-ranged-fsync-and-writeback-of-adjacent-ranges.patch
+btrfs-sysfs-fix-error-path-kobject-memory-leak.patch
+btrfs-sysfs-don-t-leak-memory-when-failing-add-fsid.patch
+fbdev-fix-divide-error-in-fb_var_to_videomode.patch