]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.19-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 11 Nov 2018 20:56:52 +0000 (12:56 -0800)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 11 Nov 2018 20:56:52 +0000 (12:56 -0800)
added patches:
bpf-wait-for-running-bpf-programs-when-updating-map-in-map.patch
btrfs-don-t-run-delayed_iputs-in-commit.patch
btrfs-don-t-use-ctl-free_space-for-max_extent_size.patch
btrfs-fix-assertion-on-fsync-of-regular-file-when-using-no-holes-feature.patch
btrfs-fix-deadlock-when-writing-out-free-space-caches.patch
btrfs-fix-insert_reserved-error-handling.patch
btrfs-fix-null-pointer-dereference-on-compressed-write-path-error.patch
btrfs-fix-use-after-free-during-inode-eviction.patch
btrfs-fix-use-after-free-when-dumping-free-space.patch
btrfs-move-the-dio_sem-higher-up-the-callchain.patch
btrfs-only-free-reserved-extent-if-we-didn-t-insert-it.patch
btrfs-reset-max_extent_size-properly.patch
btrfs-set-max_extent_size-properly.patch
net-sched-remove-tca_options-from-policy.patch
userns-also-map-extents-in-the-reverse-map-to-kernel-ids.patch
vga_switcheroo-fix-missing-gpu_bound-call-at-audio-client-registration.patch
vt-fix-broken-display-when-running-aptitude.patch

18 files changed:
queue-4.19/bpf-wait-for-running-bpf-programs-when-updating-map-in-map.patch [new file with mode: 0644]
queue-4.19/btrfs-don-t-run-delayed_iputs-in-commit.patch [new file with mode: 0644]
queue-4.19/btrfs-don-t-use-ctl-free_space-for-max_extent_size.patch [new file with mode: 0644]
queue-4.19/btrfs-fix-assertion-on-fsync-of-regular-file-when-using-no-holes-feature.patch [new file with mode: 0644]
queue-4.19/btrfs-fix-deadlock-when-writing-out-free-space-caches.patch [new file with mode: 0644]
queue-4.19/btrfs-fix-insert_reserved-error-handling.patch [new file with mode: 0644]
queue-4.19/btrfs-fix-null-pointer-dereference-on-compressed-write-path-error.patch [new file with mode: 0644]
queue-4.19/btrfs-fix-use-after-free-during-inode-eviction.patch [new file with mode: 0644]
queue-4.19/btrfs-fix-use-after-free-when-dumping-free-space.patch [new file with mode: 0644]
queue-4.19/btrfs-move-the-dio_sem-higher-up-the-callchain.patch [new file with mode: 0644]
queue-4.19/btrfs-only-free-reserved-extent-if-we-didn-t-insert-it.patch [new file with mode: 0644]
queue-4.19/btrfs-reset-max_extent_size-properly.patch [new file with mode: 0644]
queue-4.19/btrfs-set-max_extent_size-properly.patch [new file with mode: 0644]
queue-4.19/net-sched-remove-tca_options-from-policy.patch [new file with mode: 0644]
queue-4.19/series
queue-4.19/userns-also-map-extents-in-the-reverse-map-to-kernel-ids.patch [new file with mode: 0644]
queue-4.19/vga_switcheroo-fix-missing-gpu_bound-call-at-audio-client-registration.patch [new file with mode: 0644]
queue-4.19/vt-fix-broken-display-when-running-aptitude.patch [new file with mode: 0644]

diff --git a/queue-4.19/bpf-wait-for-running-bpf-programs-when-updating-map-in-map.patch b/queue-4.19/bpf-wait-for-running-bpf-programs-when-updating-map-in-map.patch
new file mode 100644 (file)
index 0000000..65d319d
--- /dev/null
@@ -0,0 +1,65 @@
+From 1ae80cf31938c8f77c37a29bbe29e7f1cd492be8 Mon Sep 17 00:00:00 2001
+From: Daniel Colascione <dancol@google.com>
+Date: Fri, 12 Oct 2018 03:54:27 -0700
+Subject: bpf: wait for running BPF programs when updating map-in-map
+
+From: Daniel Colascione <dancol@google.com>
+
+commit 1ae80cf31938c8f77c37a29bbe29e7f1cd492be8 upstream.
+
+The map-in-map frequently serves as a mechanism for atomic
+snapshotting of state that a BPF program might record.  The current
+implementation is dangerous to use in this way, however, since
+userspace has no way of knowing when all programs that might have
+retrieved the "old" value of the map may have completed.
+
+This change ensures that map update operations on map-in-map map types
+always wait for all references to the old map to drop before returning
+to userspace.
+
+Signed-off-by: Daniel Colascione <dancol@google.com>
+Reviewed-by: Joel Fernandes (Google) <joel@joelfernandes.org>
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Chenbo Feng <fengc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/bpf/syscall.c |   13 +++++++++++++
+ 1 file changed, 13 insertions(+)
+
+--- a/kernel/bpf/syscall.c
++++ b/kernel/bpf/syscall.c
+@@ -741,6 +741,17 @@ err_put:
+       return err;
+ }
++static void maybe_wait_bpf_programs(struct bpf_map *map)
++{
++      /* Wait for any running BPF programs to complete so that
++       * userspace, when we return to it, knows that all programs
++       * that could be running use the new map value.
++       */
++      if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS ||
++          map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
++              synchronize_rcu();
++}
++
+ #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags
+ static int map_update_elem(union bpf_attr *attr)
+@@ -831,6 +842,7 @@ static int map_update_elem(union bpf_att
+       }
+       __this_cpu_dec(bpf_prog_active);
+       preempt_enable();
++      maybe_wait_bpf_programs(map);
+ out:
+ free_value:
+       kfree(value);
+@@ -883,6 +895,7 @@ static int map_delete_elem(union bpf_att
+       rcu_read_unlock();
+       __this_cpu_dec(bpf_prog_active);
+       preempt_enable();
++      maybe_wait_bpf_programs(map);
+ out:
+       kfree(key);
+ err_put:
diff --git a/queue-4.19/btrfs-don-t-run-delayed_iputs-in-commit.patch b/queue-4.19/btrfs-don-t-run-delayed_iputs-in-commit.patch
new file mode 100644 (file)
index 0000000..13da380
--- /dev/null
@@ -0,0 +1,52 @@
+From 30928e9baac238a7330085a1c5747f0b5df444b4 Mon Sep 17 00:00:00 2001
+From: Josef Bacik <josef@toxicpanda.com>
+Date: Thu, 11 Oct 2018 15:54:31 -0400
+Subject: btrfs: don't run delayed_iputs in commit
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+commit 30928e9baac238a7330085a1c5747f0b5df444b4 upstream.
+
+This could result in a really bad case where we do something like
+
+evict
+  evict_refill_and_join
+    btrfs_commit_transaction
+      btrfs_run_delayed_iputs
+        evict
+          evict_refill_and_join
+            btrfs_commit_transaction
+... forever
+
+We have plenty of other places where we run delayed iputs that are much
+safer, let those do the work.
+
+CC: stable@vger.kernel.org # 4.4+
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/transaction.c |    9 ---------
+ 1 file changed, 9 deletions(-)
+
+--- a/fs/btrfs/transaction.c
++++ b/fs/btrfs/transaction.c
+@@ -2280,15 +2280,6 @@ int btrfs_commit_transaction(struct btrf
+       kmem_cache_free(btrfs_trans_handle_cachep, trans);
+-      /*
+-       * If fs has been frozen, we can not handle delayed iputs, otherwise
+-       * it'll result in deadlock about SB_FREEZE_FS.
+-       */
+-      if (current != fs_info->transaction_kthread &&
+-          current != fs_info->cleaner_kthread &&
+-          !test_bit(BTRFS_FS_FROZEN, &fs_info->flags))
+-              btrfs_run_delayed_iputs(fs_info);
+-
+       return ret;
+ scrub_continue:
diff --git a/queue-4.19/btrfs-don-t-use-ctl-free_space-for-max_extent_size.patch b/queue-4.19/btrfs-don-t-use-ctl-free_space-for-max_extent_size.patch
new file mode 100644 (file)
index 0000000..ffee9cd
--- /dev/null
@@ -0,0 +1,56 @@
+From fb5c39d7a887108087de6ff93d3f326b01b4ef41 Mon Sep 17 00:00:00 2001
+From: Josef Bacik <jbacik@fb.com>
+Date: Thu, 11 Oct 2018 15:54:09 -0400
+Subject: btrfs: don't use ctl->free_space for max_extent_size
+
+From: Josef Bacik <jbacik@fb.com>
+
+commit fb5c39d7a887108087de6ff93d3f326b01b4ef41 upstream.
+
+max_extent_size is supposed to be the largest contiguous range for the
+space info, and ctl->free_space is the total free space in the block
+group.  We need to keep track of these separately and _only_ use the
+max_free_space if we don't have a max_extent_size, as that means our
+original request was too large to search any of the block groups for and
+therefore wouldn't have a max_extent_size set.
+
+CC: stable@vger.kernel.org # 4.14+
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Josef Bacik <jbacik@fb.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/extent-tree.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -7230,6 +7230,7 @@ static noinline int find_free_extent(str
+       struct btrfs_block_group_cache *block_group = NULL;
+       u64 search_start = 0;
+       u64 max_extent_size = 0;
++      u64 max_free_space = 0;
+       u64 empty_cluster = 0;
+       struct btrfs_space_info *space_info;
+       int loop = 0;
+@@ -7525,8 +7526,8 @@ unclustered_alloc:
+                       spin_lock(&ctl->tree_lock);
+                       if (ctl->free_space <
+                           num_bytes + empty_cluster + empty_size) {
+-                              if (ctl->free_space > max_extent_size)
+-                                      max_extent_size = ctl->free_space;
++                              max_free_space = max(max_free_space,
++                                                   ctl->free_space);
+                               spin_unlock(&ctl->tree_lock);
+                               goto loop;
+                       }
+@@ -7693,6 +7694,8 @@ loop:
+       }
+ out:
+       if (ret == -ENOSPC) {
++              if (!max_extent_size)
++                      max_extent_size = max_free_space;
+               spin_lock(&space_info->lock);
+               space_info->max_extent_size = max_extent_size;
+               spin_unlock(&space_info->lock);
diff --git a/queue-4.19/btrfs-fix-assertion-on-fsync-of-regular-file-when-using-no-holes-feature.patch b/queue-4.19/btrfs-fix-assertion-on-fsync-of-regular-file-when-using-no-holes-feature.patch
new file mode 100644 (file)
index 0000000..6004075
--- /dev/null
@@ -0,0 +1,59 @@
+From 7ed586d0a8241e81d58c656c5b315f781fa6fc97 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Mon, 15 Oct 2018 09:51:00 +0100
+Subject: Btrfs: fix assertion on fsync of regular file when using no-holes feature
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 7ed586d0a8241e81d58c656c5b315f781fa6fc97 upstream.
+
+When using the NO_HOLES feature and logging a regular file, we were
+expecting that if we find an inline extent, that either its size in RAM
+(uncompressed and unenconded) matches the size of the file or if it does
+not, that it matches the sector size and it represents compressed data.
+This assertion does not cover a case where the length of the inline extent
+is smaller than the sector size and also smaller the file's size, such
+case is possible through fallocate. Example:
+
+  $ mkfs.btrfs -f -O no-holes /dev/sdb
+  $ mount /dev/sdb /mnt
+
+  $ xfs_io -f -c "pwrite -S 0xb60 0 21" /mnt/foobar
+  $ xfs_io -c "falloc 40 40" /mnt/foobar
+  $ xfs_io -c "fsync" /mnt/foobar
+
+In the above example we trigger the assertion because the inline extent's
+length is 21 bytes while the file size is 80 bytes. The fallocate() call
+merely updated the file's size and did not touch the existing inline
+extent, as expected.
+
+So fix this by adjusting the assertion so that an inline extent length
+smaller than the file size is valid if the file size is smaller than the
+filesystem's sector size.
+
+A test case for fstests follows soon.
+
+Reported-by: Anatoly Trosinenko <anatoly.trosinenko@gmail.com>
+Fixes: a89ca6f24ffe ("Btrfs: fix fsync after truncate when no_holes feature is enabled")
+CC: stable@vger.kernel.org # 4.14+
+Link: https://lore.kernel.org/linux-btrfs/CAE5jQCfRSBC7n4pUTFJcmHh109=gwyT9mFkCOL+NKfzswmR=_Q@mail.gmail.com/
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/tree-log.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -4655,7 +4655,8 @@ static int btrfs_log_trailing_hole(struc
+                       ASSERT(len == i_size ||
+                              (len == fs_info->sectorsize &&
+                               btrfs_file_extent_compression(leaf, extent) !=
+-                              BTRFS_COMPRESS_NONE));
++                              BTRFS_COMPRESS_NONE) ||
++                             (len < i_size && i_size < fs_info->sectorsize));
+                       return 0;
+               }
diff --git a/queue-4.19/btrfs-fix-deadlock-when-writing-out-free-space-caches.patch b/queue-4.19/btrfs-fix-deadlock-when-writing-out-free-space-caches.patch
new file mode 100644 (file)
index 0000000..2f8629f
--- /dev/null
@@ -0,0 +1,185 @@
+From 5ce555578e0919237fa4bda92b4670e2dd176f85 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Fri, 12 Oct 2018 10:03:55 +0100
+Subject: Btrfs: fix deadlock when writing out free space caches
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 5ce555578e0919237fa4bda92b4670e2dd176f85 upstream.
+
+When writing out a block group free space cache we can end deadlocking
+with ourselves on an extent buffer lock resulting in a warning like the
+following:
+
+  [245043.379979] WARNING: CPU: 4 PID: 2608 at fs/btrfs/locking.c:251 btrfs_tree_lock+0x1be/0x1d0 [btrfs]
+  [245043.392792] CPU: 4 PID: 2608 Comm: btrfs-transacti Tainted: G
+    W I      4.16.8 #1
+  [245043.395489] RIP: 0010:btrfs_tree_lock+0x1be/0x1d0 [btrfs]
+  [245043.396791] RSP: 0018:ffffc9000424b840 EFLAGS: 00010246
+  [245043.398093] RAX: 0000000000000a30 RBX: ffff8807e20a3d20 RCX: 0000000000000001
+  [245043.399414] RDX: 0000000000000001 RSI: 0000000000000002 RDI: ffff8807e20a3d20
+  [245043.400732] RBP: 0000000000000001 R08: ffff88041f39a700 R09: ffff880000000000
+  [245043.402021] R10: 0000000000000040 R11: ffff8807e20a3d20 R12: ffff8807cb220630
+  [245043.403296] R13: 0000000000000001 R14: ffff8807cb220628 R15: ffff88041fbdf000
+  [245043.404780] FS:  0000000000000000(0000) GS:ffff88082fc80000(0000) knlGS:0000000000000000
+  [245043.406050] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+  [245043.407321] CR2: 00007fffdbdb9f10 CR3: 0000000001c09005 CR4: 00000000000206e0
+  [245043.408670] Call Trace:
+  [245043.409977]  btrfs_search_slot+0x761/0xa60 [btrfs]
+  [245043.411278]  btrfs_insert_empty_items+0x62/0xb0 [btrfs]
+  [245043.412572]  btrfs_insert_item+0x5b/0xc0 [btrfs]
+  [245043.413922]  btrfs_create_pending_block_groups+0xfb/0x1e0 [btrfs]
+  [245043.415216]  do_chunk_alloc+0x1e5/0x2a0 [btrfs]
+  [245043.416487]  find_free_extent+0xcd0/0xf60 [btrfs]
+  [245043.417813]  btrfs_reserve_extent+0x96/0x1e0 [btrfs]
+  [245043.419105]  btrfs_alloc_tree_block+0xfb/0x4a0 [btrfs]
+  [245043.420378]  __btrfs_cow_block+0x127/0x550 [btrfs]
+  [245043.421652]  btrfs_cow_block+0xee/0x190 [btrfs]
+  [245043.422979]  btrfs_search_slot+0x227/0xa60 [btrfs]
+  [245043.424279]  ? btrfs_update_inode_item+0x59/0x100 [btrfs]
+  [245043.425538]  ? iput+0x72/0x1e0
+  [245043.426798]  write_one_cache_group.isra.49+0x20/0x90 [btrfs]
+  [245043.428131]  btrfs_start_dirty_block_groups+0x102/0x420 [btrfs]
+  [245043.429419]  btrfs_commit_transaction+0x11b/0x880 [btrfs]
+  [245043.430712]  ? start_transaction+0x8e/0x410 [btrfs]
+  [245043.432006]  transaction_kthread+0x184/0x1a0 [btrfs]
+  [245043.433341]  kthread+0xf0/0x130
+  [245043.434628]  ? btrfs_cleanup_transaction+0x4e0/0x4e0 [btrfs]
+  [245043.435928]  ? kthread_create_worker_on_cpu+0x40/0x40
+  [245043.437236]  ret_from_fork+0x1f/0x30
+  [245043.441054] ---[ end trace 15abaa2aaf36827f ]---
+
+This is because at write_one_cache_group() when we are COWing a leaf from
+the extent tree we end up allocating a new block group (chunk) and,
+because we have hit a threshold on the number of bytes reserved for system
+chunks, we attempt to finalize the creation of new block groups from the
+current transaction, by calling btrfs_create_pending_block_groups().
+However here we also need to modify the extent tree in order to insert
+a block group item, and if the location for this new block group item
+happens to be in the same leaf that we were COWing earlier, we deadlock
+since btrfs_search_slot() tries to write lock the extent buffer that we
+locked before at write_one_cache_group().
+
+We have already hit similar cases in the past and commit d9a0540a79f8
+("Btrfs: fix deadlock when finalizing block group creation") fixed some
+of those cases by delaying the creation of pending block groups at the
+known specific spots that could lead to a deadlock. This change reworks
+that commit to be more generic so that we don't have to add similar logic
+to every possible path that can lead to a deadlock. This is done by
+making __btrfs_cow_block() disallowing the creation of new block groups
+(setting the transaction's can_flush_pending_bgs to false) before it
+attempts to allocate a new extent buffer for either the extent, chunk or
+device trees, since those are the trees that pending block creation
+modifies. Once the new extent buffer is allocated, it allows creation of
+pending block groups to happen again.
+
+This change depends on a recent patch from Josef which is not yet in
+Linus' tree, named "btrfs: make sure we create all new block groups" in
+order to avoid occasional warnings at btrfs_trans_release_chunk_metadata().
+
+Fixes: d9a0540a79f8 ("Btrfs: fix deadlock when finalizing block group creation")
+CC: stable@vger.kernel.org # 4.4+
+Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=199753
+Link: https://lore.kernel.org/linux-btrfs/CAJtFHUTHna09ST-_EEiyWmDH6gAqS6wa=zMNMBsifj8ABu99cw@mail.gmail.com/
+Reported-by: E V <eliventer@gmail.com>
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/ctree.c       |   17 +++++++++++++++++
+ fs/btrfs/extent-tree.c |   16 ++++++----------
+ 2 files changed, 23 insertions(+), 10 deletions(-)
+
+--- a/fs/btrfs/ctree.c
++++ b/fs/btrfs/ctree.c
+@@ -1050,9 +1050,26 @@ static noinline int __btrfs_cow_block(st
+       if ((root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && parent)
+               parent_start = parent->start;
++      /*
++       * If we are COWing a node/leaf from the extent, chunk or device trees,
++       * make sure that we do not finish block group creation of pending block
++       * groups. We do this to avoid a deadlock.
++       * COWing can result in allocation of a new chunk, and flushing pending
++       * block groups (btrfs_create_pending_block_groups()) can be triggered
++       * when finishing allocation of a new chunk. Creation of a pending block
++       * group modifies the extent, chunk and device trees, therefore we could
++       * deadlock with ourselves since we are holding a lock on an extent
++       * buffer that btrfs_create_pending_block_groups() may try to COW later.
++       */
++      if (root == fs_info->extent_root ||
++          root == fs_info->chunk_root ||
++          root == fs_info->dev_root)
++              trans->can_flush_pending_bgs = false;
++
+       cow = btrfs_alloc_tree_block(trans, root, parent_start,
+                       root->root_key.objectid, &disk_key, level,
+                       search_start, empty_size);
++      trans->can_flush_pending_bgs = true;
+       if (IS_ERR(cow))
+               return PTR_ERR(cow);
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -2911,7 +2911,6 @@ int btrfs_run_delayed_refs(struct btrfs_
+       struct btrfs_delayed_ref_head *head;
+       int ret;
+       int run_all = count == (unsigned long)-1;
+-      bool can_flush_pending_bgs = trans->can_flush_pending_bgs;
+       /* We'll clean this up in btrfs_cleanup_transaction */
+       if (trans->aborted)
+@@ -2928,7 +2927,6 @@ again:
+ #ifdef SCRAMBLE_DELAYED_REFS
+       delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
+ #endif
+-      trans->can_flush_pending_bgs = false;
+       ret = __btrfs_run_delayed_refs(trans, count);
+       if (ret < 0) {
+               btrfs_abort_transaction(trans, ret);
+@@ -2959,7 +2957,6 @@ again:
+               goto again;
+       }
+ out:
+-      trans->can_flush_pending_bgs = can_flush_pending_bgs;
+       return 0;
+ }
+@@ -4554,11 +4551,9 @@ out:
+        * the block groups that were made dirty during the lifetime of the
+        * transaction.
+        */
+-      if (trans->can_flush_pending_bgs &&
+-          trans->chunk_bytes_reserved >= (u64)SZ_2M) {
++      if (trans->chunk_bytes_reserved >= (u64)SZ_2M)
+               btrfs_create_pending_block_groups(trans);
+-              btrfs_trans_release_chunk_metadata(trans);
+-      }
++
+       return ret;
+ }
+@@ -10099,9 +10094,10 @@ void btrfs_create_pending_block_groups(s
+       struct btrfs_block_group_item item;
+       struct btrfs_key key;
+       int ret = 0;
+-      bool can_flush_pending_bgs = trans->can_flush_pending_bgs;
+-      trans->can_flush_pending_bgs = false;
++      if (!trans->can_flush_pending_bgs)
++              return;
++
+       while (!list_empty(&trans->new_bgs)) {
+               block_group = list_first_entry(&trans->new_bgs,
+                                              struct btrfs_block_group_cache,
+@@ -10126,7 +10122,7 @@ void btrfs_create_pending_block_groups(s
+ next:
+               list_del_init(&block_group->bg_list);
+       }
+-      trans->can_flush_pending_bgs = can_flush_pending_bgs;
++      btrfs_trans_release_chunk_metadata(trans);
+ }
+ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
diff --git a/queue-4.19/btrfs-fix-insert_reserved-error-handling.patch b/queue-4.19/btrfs-fix-insert_reserved-error-handling.patch
new file mode 100644 (file)
index 0000000..93a3a1d
--- /dev/null
@@ -0,0 +1,62 @@
+From 80ee54bfe8a3850015585ebc84e8d207fcae6831 Mon Sep 17 00:00:00 2001
+From: Josef Bacik <josef@toxicpanda.com>
+Date: Thu, 11 Oct 2018 15:54:22 -0400
+Subject: btrfs: fix insert_reserved error handling
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+commit 80ee54bfe8a3850015585ebc84e8d207fcae6831 upstream.
+
+We were not handling the reserved byte accounting properly for data
+references.  Metadata was fine, if it errored out the error paths would
+free the bytes_reserved count and pin the extent, but it even missed one
+of the error cases.  So instead move this handling up into
+run_one_delayed_ref so we are sure that both cases are properly cleaned
+up in case of a transaction abort.
+
+CC: stable@vger.kernel.org # 4.18+
+Reviewed-by: Nikolay Borisov <nborisov@suse.com>
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/extent-tree.c |   12 ++++--------
+ 1 file changed, 4 insertions(+), 8 deletions(-)
+
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -2366,6 +2366,9 @@ static int run_one_delayed_ref(struct bt
+                                          insert_reserved);
+       else
+               BUG();
++      if (ret && insert_reserved)
++              btrfs_pin_extent(trans->fs_info, node->bytenr,
++                               node->num_bytes, 1);
+       return ret;
+ }
+@@ -7977,21 +7980,14 @@ static int alloc_reserved_tree_block(str
+       }
+       path = btrfs_alloc_path();
+-      if (!path) {
+-              btrfs_free_and_pin_reserved_extent(fs_info,
+-                                                 extent_key.objectid,
+-                                                 fs_info->nodesize);
++      if (!path)
+               return -ENOMEM;
+-      }
+       path->leave_spinning = 1;
+       ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
+                                     &extent_key, size);
+       if (ret) {
+               btrfs_free_path(path);
+-              btrfs_free_and_pin_reserved_extent(fs_info,
+-                                                 extent_key.objectid,
+-                                                 fs_info->nodesize);
+               return ret;
+       }
diff --git a/queue-4.19/btrfs-fix-null-pointer-dereference-on-compressed-write-path-error.patch b/queue-4.19/btrfs-fix-null-pointer-dereference-on-compressed-write-path-error.patch
new file mode 100644 (file)
index 0000000..294b73d
--- /dev/null
@@ -0,0 +1,46 @@
+From 3527a018c00e5dbada2f9d7ed5576437b6dd5cfb Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Sat, 13 Oct 2018 00:37:25 +0100
+Subject: Btrfs: fix null pointer dereference on compressed write path error
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 3527a018c00e5dbada2f9d7ed5576437b6dd5cfb upstream.
+
+At inode.c:compress_file_range(), under the "free_pages_out" label, we can
+end up dereferencing the "pages" pointer when it has a NULL value. This
+case happens when "start" has a value of 0 and we fail to allocate memory
+for the "pages" pointer. When that happens we jump to the "cont" label and
+then enter the "if (start == 0)" branch where we immediately call the
+cow_file_range_inline() function. If that function returns 0 (success
+creating an inline extent) or an error (like -ENOMEM for example) we jump
+to the "free_pages_out" label and then access "pages[i]" leading to a NULL
+pointer dereference, since "nr_pages" has a value greater than zero at
+that point.
+
+Fix this by setting "nr_pages" to 0 when we fail to allocate memory for
+the "pages" pointer.
+
+Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=201119
+Fixes: 771ed689d2cd ("Btrfs: Optimize compressed writeback and reads")
+CC: stable@vger.kernel.org # 4.4+
+Reviewed-by: Liu Bo <bo.liu@linux.alibaba.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/inode.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -503,6 +503,7 @@ again:
+               pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
+               if (!pages) {
+                       /* just bail out to the uncompressed code */
++                      nr_pages = 0;
+                       goto cont;
+               }
diff --git a/queue-4.19/btrfs-fix-use-after-free-during-inode-eviction.patch b/queue-4.19/btrfs-fix-use-after-free-during-inode-eviction.patch
new file mode 100644 (file)
index 0000000..0dc6e96
--- /dev/null
@@ -0,0 +1,61 @@
+From 421f0922a2cfb0c75acd9746454aaa576c711a65 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Fri, 12 Oct 2018 13:02:48 +0100
+Subject: Btrfs: fix use-after-free during inode eviction
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 421f0922a2cfb0c75acd9746454aaa576c711a65 upstream.
+
+At inode.c:evict_inode_truncate_pages(), when we iterate over the
+inode's extent states, we access an extent state record's "state" field
+after we unlocked the inode's io tree lock. This can lead to a
+use-after-free issue because after we unlock the io tree that extent
+state record might have been freed due to being merged into another
+adjacent extent state record (a previous inflight bio for a read
+operation finished in the meanwhile which unlocked a range in the io
+tree and cause a merge of extent state records, as explained in the
+comment before the while loop added in commit 6ca0709756710 ("Btrfs: fix
+hang during inode eviction due to concurrent readahead")).
+
+Fix this by keeping a copy of the extent state's flags in a local
+variable and using it after unlocking the io tree.
+
+Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=201189
+Fixes: b9d0b38928e2 ("btrfs: Add handler for invalidate page")
+CC: stable@vger.kernel.org # 4.4+
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/inode.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -5283,11 +5283,13 @@ static void evict_inode_truncate_pages(s
+               struct extent_state *cached_state = NULL;
+               u64 start;
+               u64 end;
++              unsigned state_flags;
+               node = rb_first(&io_tree->state);
+               state = rb_entry(node, struct extent_state, rb_node);
+               start = state->start;
+               end = state->end;
++              state_flags = state->state;
+               spin_unlock(&io_tree->lock);
+               lock_extent_bits(io_tree, start, end, &cached_state);
+@@ -5300,7 +5302,7 @@ static void evict_inode_truncate_pages(s
+                *
+                * Note, end is the bytenr of last byte, so we need + 1 here.
+                */
+-              if (state->state & EXTENT_DELALLOC)
++              if (state_flags & EXTENT_DELALLOC)
+                       btrfs_qgroup_free_data(inode, NULL, start, end - start + 1);
+               clear_extent_bit(io_tree, start, end,
diff --git a/queue-4.19/btrfs-fix-use-after-free-when-dumping-free-space.patch b/queue-4.19/btrfs-fix-use-after-free-when-dumping-free-space.patch
new file mode 100644 (file)
index 0000000..6ca5a4c
--- /dev/null
@@ -0,0 +1,221 @@
+From 9084cb6a24bf5838a665af92ded1af8363f9e563 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Mon, 22 Oct 2018 10:43:06 +0100
+Subject: Btrfs: fix use-after-free when dumping free space
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 9084cb6a24bf5838a665af92ded1af8363f9e563 upstream.
+
+We were iterating a block group's free space cache rbtree without locking
+first the lock that protects it (the free_space_ctl->free_space_offset
+rbtree is protected by the free_space_ctl->tree_lock spinlock).
+
+KASAN reported an use-after-free problem when iterating such a rbtree due
+to a concurrent rbtree delete:
+
+[ 9520.359168] ==================================================================
+[ 9520.359656] BUG: KASAN: use-after-free in rb_next+0x13/0x90
+[ 9520.359949] Read of size 8 at addr ffff8800b7ada500 by task btrfs-transacti/1721
+[ 9520.360357]
+[ 9520.360530] CPU: 4 PID: 1721 Comm: btrfs-transacti Tainted: G             L    4.19.0-rc8-nbor #555
+[ 9520.360990] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014
+[ 9520.362682] Call Trace:
+[ 9520.362887]  dump_stack+0xa4/0xf5
+[ 9520.363146]  print_address_description+0x78/0x280
+[ 9520.363412]  kasan_report+0x263/0x390
+[ 9520.363650]  ? rb_next+0x13/0x90
+[ 9520.363873]  __asan_load8+0x54/0x90
+[ 9520.364102]  rb_next+0x13/0x90
+[ 9520.364380]  btrfs_dump_free_space+0x146/0x160 [btrfs]
+[ 9520.364697]  dump_space_info+0x2cd/0x310 [btrfs]
+[ 9520.364997]  btrfs_reserve_extent+0x1ee/0x1f0 [btrfs]
+[ 9520.365310]  __btrfs_prealloc_file_range+0x1cc/0x620 [btrfs]
+[ 9520.365646]  ? btrfs_update_time+0x180/0x180 [btrfs]
+[ 9520.365923]  ? _raw_spin_unlock+0x27/0x40
+[ 9520.366204]  ? btrfs_alloc_data_chunk_ondemand+0x2c0/0x5c0 [btrfs]
+[ 9520.366549]  btrfs_prealloc_file_range_trans+0x23/0x30 [btrfs]
+[ 9520.366880]  cache_save_setup+0x42e/0x580 [btrfs]
+[ 9520.367220]  ? btrfs_check_data_free_space+0xd0/0xd0 [btrfs]
+[ 9520.367518]  ? lock_downgrade+0x2f0/0x2f0
+[ 9520.367799]  ? btrfs_write_dirty_block_groups+0x11f/0x6e0 [btrfs]
+[ 9520.368104]  ? kasan_check_read+0x11/0x20
+[ 9520.368349]  ? do_raw_spin_unlock+0xa8/0x140
+[ 9520.368638]  btrfs_write_dirty_block_groups+0x2af/0x6e0 [btrfs]
+[ 9520.368978]  ? btrfs_start_dirty_block_groups+0x870/0x870 [btrfs]
+[ 9520.369282]  ? do_raw_spin_unlock+0xa8/0x140
+[ 9520.369534]  ? _raw_spin_unlock+0x27/0x40
+[ 9520.369811]  ? btrfs_run_delayed_refs+0x1b8/0x230 [btrfs]
+[ 9520.370137]  commit_cowonly_roots+0x4b9/0x610 [btrfs]
+[ 9520.370560]  ? commit_fs_roots+0x350/0x350 [btrfs]
+[ 9520.370926]  ? btrfs_run_delayed_refs+0x1b8/0x230 [btrfs]
+[ 9520.371285]  btrfs_commit_transaction+0x5e5/0x10e0 [btrfs]
+[ 9520.371612]  ? btrfs_apply_pending_changes+0x90/0x90 [btrfs]
+[ 9520.371943]  ? start_transaction+0x168/0x6c0 [btrfs]
+[ 9520.372257]  transaction_kthread+0x21c/0x240 [btrfs]
+[ 9520.372537]  kthread+0x1d2/0x1f0
+[ 9520.372793]  ? btrfs_cleanup_transaction+0xb50/0xb50 [btrfs]
+[ 9520.373090]  ? kthread_park+0xb0/0xb0
+[ 9520.373329]  ret_from_fork+0x3a/0x50
+[ 9520.373567]
+[ 9520.373738] Allocated by task 1804:
+[ 9520.373974]  kasan_kmalloc+0xff/0x180
+[ 9520.374208]  kasan_slab_alloc+0x11/0x20
+[ 9520.374447]  kmem_cache_alloc+0xfc/0x2d0
+[ 9520.374731]  __btrfs_add_free_space+0x40/0x580 [btrfs]
+[ 9520.375044]  unpin_extent_range+0x4f7/0x7a0 [btrfs]
+[ 9520.375383]  btrfs_finish_extent_commit+0x15f/0x4d0 [btrfs]
+[ 9520.375707]  btrfs_commit_transaction+0xb06/0x10e0 [btrfs]
+[ 9520.376027]  btrfs_alloc_data_chunk_ondemand+0x237/0x5c0 [btrfs]
+[ 9520.376365]  btrfs_check_data_free_space+0x81/0xd0 [btrfs]
+[ 9520.376689]  btrfs_delalloc_reserve_space+0x25/0x80 [btrfs]
+[ 9520.377018]  btrfs_direct_IO+0x42e/0x6d0 [btrfs]
+[ 9520.377284]  generic_file_direct_write+0x11e/0x220
+[ 9520.377587]  btrfs_file_write_iter+0x472/0xac0 [btrfs]
+[ 9520.377875]  aio_write+0x25c/0x360
+[ 9520.378106]  io_submit_one+0xaa0/0xdc0
+[ 9520.378343]  __se_sys_io_submit+0xfa/0x2f0
+[ 9520.378589]  __x64_sys_io_submit+0x43/0x50
+[ 9520.378840]  do_syscall_64+0x7d/0x240
+[ 9520.379081]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
+[ 9520.379387]
+[ 9520.379557] Freed by task 1802:
+[ 9520.379782]  __kasan_slab_free+0x173/0x260
+[ 9520.380028]  kasan_slab_free+0xe/0x10
+[ 9520.380262]  kmem_cache_free+0xc1/0x2c0
+[ 9520.380544]  btrfs_find_space_for_alloc+0x4cd/0x4e0 [btrfs]
+[ 9520.380866]  find_free_extent+0xa99/0x17e0 [btrfs]
+[ 9520.381166]  btrfs_reserve_extent+0xd5/0x1f0 [btrfs]
+[ 9520.381474]  btrfs_get_blocks_direct+0x60b/0xbd0 [btrfs]
+[ 9520.381761]  __blockdev_direct_IO+0x10ee/0x58a1
+[ 9520.382059]  btrfs_direct_IO+0x25a/0x6d0 [btrfs]
+[ 9520.382321]  generic_file_direct_write+0x11e/0x220
+[ 9520.382623]  btrfs_file_write_iter+0x472/0xac0 [btrfs]
+[ 9520.382904]  aio_write+0x25c/0x360
+[ 9520.383172]  io_submit_one+0xaa0/0xdc0
+[ 9520.383416]  __se_sys_io_submit+0xfa/0x2f0
+[ 9520.383678]  __x64_sys_io_submit+0x43/0x50
+[ 9520.383927]  do_syscall_64+0x7d/0x240
+[ 9520.384165]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
+[ 9520.384439]
+[ 9520.384610] The buggy address belongs to the object at ffff8800b7ada500
+                which belongs to the cache btrfs_free_space of size 72
+[ 9520.385175] The buggy address is located 0 bytes inside of
+                72-byte region [ffff8800b7ada500, ffff8800b7ada548)
+[ 9520.385691] The buggy address belongs to the page:
+[ 9520.385957] page:ffffea0002deb680 count:1 mapcount:0 mapping:ffff880108a1d700 index:0x0 compound_mapcount: 0
+[ 9520.388030] flags: 0x8100(slab|head)
+[ 9520.388281] raw: 0000000000008100 ffffea0002deb608 ffffea0002728808 ffff880108a1d700
+[ 9520.388722] raw: 0000000000000000 0000000000130013 00000001ffffffff 0000000000000000
+[ 9520.389169] page dumped because: kasan: bad access detected
+[ 9520.389473]
+[ 9520.389658] Memory state around the buggy address:
+[ 9520.389943]  ffff8800b7ada400: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+[ 9520.390368]  ffff8800b7ada480: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+[ 9520.390796] >ffff8800b7ada500: fb fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc
+[ 9520.391223]                    ^
+[ 9520.391461]  ffff8800b7ada580: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+[ 9520.391885]  ffff8800b7ada600: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+[ 9520.392313] ==================================================================
+[ 9520.392772] BTRFS critical (device vdc): entry offset 2258497536, bytes 131072, bitmap no
+[ 9520.393247] BUG: unable to handle kernel NULL pointer dereference at 0000000000000011
+[ 9520.393705] PGD 800000010dbab067 P4D 800000010dbab067 PUD 107551067 PMD 0
+[ 9520.394059] Oops: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI
+[ 9520.394378] CPU: 4 PID: 1721 Comm: btrfs-transacti Tainted: G    B        L    4.19.0-rc8-nbor #555
+[ 9520.394858] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014
+[ 9520.395350] RIP: 0010:rb_next+0x3c/0x90
+[ 9520.396461] RSP: 0018:ffff8801074ff780 EFLAGS: 00010292
+[ 9520.396762] RAX: 0000000000000000 RBX: 0000000000000001 RCX: ffffffff81b5ac4c
+[ 9520.397115] RDX: 0000000000000000 RSI: 0000000000000008 RDI: 0000000000000011
+[ 9520.397468] RBP: ffff8801074ff7a0 R08: ffffed0021d64ccc R09: ffffed0021d64ccc
+[ 9520.397821] R10: 0000000000000001 R11: ffffed0021d64ccb R12: ffff8800b91e0000
+[ 9520.398188] R13: ffff8800a3ceba48 R14: ffff8800b627bf80 R15: 0000000000020000
+[ 9520.398555] FS:  0000000000000000(0000) GS:ffff88010eb00000(0000) knlGS:0000000000000000
+[ 9520.399007] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[ 9520.399335] CR2: 0000000000000011 CR3: 0000000106b52000 CR4: 00000000000006a0
+[ 9520.399679] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+[ 9520.400023] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+[ 9520.400400] Call Trace:
+[ 9520.400648]  btrfs_dump_free_space+0x146/0x160 [btrfs]
+[ 9520.400974]  dump_space_info+0x2cd/0x310 [btrfs]
+[ 9520.401287]  btrfs_reserve_extent+0x1ee/0x1f0 [btrfs]
+[ 9520.401609]  __btrfs_prealloc_file_range+0x1cc/0x620 [btrfs]
+[ 9520.401952]  ? btrfs_update_time+0x180/0x180 [btrfs]
+[ 9520.402232]  ? _raw_spin_unlock+0x27/0x40
+[ 9520.402522]  ? btrfs_alloc_data_chunk_ondemand+0x2c0/0x5c0 [btrfs]
+[ 9520.402882]  btrfs_prealloc_file_range_trans+0x23/0x30 [btrfs]
+[ 9520.403261]  cache_save_setup+0x42e/0x580 [btrfs]
+[ 9520.403570]  ? btrfs_check_data_free_space+0xd0/0xd0 [btrfs]
+[ 9520.403871]  ? lock_downgrade+0x2f0/0x2f0
+[ 9520.404161]  ? btrfs_write_dirty_block_groups+0x11f/0x6e0 [btrfs]
+[ 9520.404481]  ? kasan_check_read+0x11/0x20
+[ 9520.404732]  ? do_raw_spin_unlock+0xa8/0x140
+[ 9520.405026]  btrfs_write_dirty_block_groups+0x2af/0x6e0 [btrfs]
+[ 9520.405375]  ? btrfs_start_dirty_block_groups+0x870/0x870 [btrfs]
+[ 9520.405694]  ? do_raw_spin_unlock+0xa8/0x140
+[ 9520.405958]  ? _raw_spin_unlock+0x27/0x40
+[ 9520.406243]  ? btrfs_run_delayed_refs+0x1b8/0x230 [btrfs]
+[ 9520.406574]  commit_cowonly_roots+0x4b9/0x610 [btrfs]
+[ 9520.406899]  ? commit_fs_roots+0x350/0x350 [btrfs]
+[ 9520.407253]  ? btrfs_run_delayed_refs+0x1b8/0x230 [btrfs]
+[ 9520.407589]  btrfs_commit_transaction+0x5e5/0x10e0 [btrfs]
+[ 9520.407925]  ? btrfs_apply_pending_changes+0x90/0x90 [btrfs]
+[ 9520.408262]  ? start_transaction+0x168/0x6c0 [btrfs]
+[ 9520.408582]  transaction_kthread+0x21c/0x240 [btrfs]
+[ 9520.408870]  kthread+0x1d2/0x1f0
+[ 9520.409138]  ? btrfs_cleanup_transaction+0xb50/0xb50 [btrfs]
+[ 9520.409440]  ? kthread_park+0xb0/0xb0
+[ 9520.409682]  ret_from_fork+0x3a/0x50
+[ 9520.410508] Dumping ftrace buffer:
+[ 9520.410764]    (ftrace buffer empty)
+[ 9520.411007] CR2: 0000000000000011
+[ 9520.411297] ---[ end trace 01a0863445cf360a ]---
+[ 9520.411568] RIP: 0010:rb_next+0x3c/0x90
+[ 9520.412644] RSP: 0018:ffff8801074ff780 EFLAGS: 00010292
+[ 9520.412932] RAX: 0000000000000000 RBX: 0000000000000001 RCX: ffffffff81b5ac4c
+[ 9520.413274] RDX: 0000000000000000 RSI: 0000000000000008 RDI: 0000000000000011
+[ 9520.413616] RBP: ffff8801074ff7a0 R08: ffffed0021d64ccc R09: ffffed0021d64ccc
+[ 9520.414007] R10: 0000000000000001 R11: ffffed0021d64ccb R12: ffff8800b91e0000
+[ 9520.414349] R13: ffff8800a3ceba48 R14: ffff8800b627bf80 R15: 0000000000020000
+[ 9520.416074] FS:  0000000000000000(0000) GS:ffff88010eb00000(0000) knlGS:0000000000000000
+[ 9520.416536] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[ 9520.416848] CR2: 0000000000000011 CR3: 0000000106b52000 CR4: 00000000000006a0
+[ 9520.418477] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+[ 9520.418846] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+[ 9520.419204] Kernel panic - not syncing: Fatal exception
+[ 9520.419666] Dumping ftrace buffer:
+[ 9520.419930]    (ftrace buffer empty)
+[ 9520.420168] Kernel Offset: disabled
+[ 9520.420406] ---[ end Kernel panic - not syncing: Fatal exception ]---
+
+Fix this by acquiring the respective lock before iterating the rbtree.
+
+Reported-by: Nikolay Borisov <nborisov@suse.com>
+CC: stable@vger.kernel.org # 4.4+
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/free-space-cache.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/fs/btrfs/free-space-cache.c
++++ b/fs/btrfs/free-space-cache.c
+@@ -2459,6 +2459,7 @@ void btrfs_dump_free_space(struct btrfs_
+       struct rb_node *n;
+       int count = 0;
++      spin_lock(&ctl->tree_lock);
+       for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
+               info = rb_entry(n, struct btrfs_free_space, offset_index);
+               if (info->bytes >= bytes && !block_group->ro)
+@@ -2467,6 +2468,7 @@ void btrfs_dump_free_space(struct btrfs_
+                          info->offset, info->bytes,
+                      (info->bitmap) ? "yes" : "no");
+       }
++      spin_unlock(&ctl->tree_lock);
+       btrfs_info(fs_info, "block group has cluster?: %s",
+              list_empty(&block_group->cluster_list) ? "no" : "yes");
+       btrfs_info(fs_info,
diff --git a/queue-4.19/btrfs-move-the-dio_sem-higher-up-the-callchain.patch b/queue-4.19/btrfs-move-the-dio_sem-higher-up-the-callchain.patch
new file mode 100644 (file)
index 0000000..61147dd
--- /dev/null
@@ -0,0 +1,239 @@
+From c495144bc6962186feae31d687596d2472000e45 Mon Sep 17 00:00:00 2001
+From: Josef Bacik <josef@toxicpanda.com>
+Date: Fri, 12 Oct 2018 15:32:32 -0400
+Subject: btrfs: move the dio_sem higher up the callchain
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+commit c495144bc6962186feae31d687596d2472000e45 upstream.
+
+We're getting a lockdep splat because we take the dio_sem under the
+log_mutex.  What we really need is to protect fsync() from logging an
+extent map for an extent we never waited on higher up, so just guard the
+whole thing with dio_sem.
+
+======================================================
+WARNING: possible circular locking dependency detected
+4.18.0-rc4-xfstests-00025-g5de5edbaf1d4 #411 Not tainted
+------------------------------------------------------
+aio-dio-invalid/30928 is trying to acquire lock:
+0000000092621cfd (&mm->mmap_sem){++++}, at: get_user_pages_unlocked+0x5a/0x1e0
+
+but task is already holding lock:
+00000000cefe6b35 (&ei->dio_sem){++++}, at: btrfs_direct_IO+0x3be/0x400
+
+which lock already depends on the new lock.
+
+the existing dependency chain (in reverse order) is:
+
+-> #5 (&ei->dio_sem){++++}:
+       lock_acquire+0xbd/0x220
+       down_write+0x51/0xb0
+       btrfs_log_changed_extents+0x80/0xa40
+       btrfs_log_inode+0xbaf/0x1000
+       btrfs_log_inode_parent+0x26f/0xa80
+       btrfs_log_dentry_safe+0x50/0x70
+       btrfs_sync_file+0x357/0x540
+       do_fsync+0x38/0x60
+       __ia32_sys_fdatasync+0x12/0x20
+       do_fast_syscall_32+0x9a/0x2f0
+       entry_SYSENTER_compat+0x84/0x96
+
+-> #4 (&ei->log_mutex){+.+.}:
+       lock_acquire+0xbd/0x220
+       __mutex_lock+0x86/0xa10
+       btrfs_record_unlink_dir+0x2a/0xa0
+       btrfs_unlink+0x5a/0xc0
+       vfs_unlink+0xb1/0x1a0
+       do_unlinkat+0x264/0x2b0
+       do_fast_syscall_32+0x9a/0x2f0
+       entry_SYSENTER_compat+0x84/0x96
+
+-> #3 (sb_internal#2){.+.+}:
+       lock_acquire+0xbd/0x220
+       __sb_start_write+0x14d/0x230
+       start_transaction+0x3e6/0x590
+       btrfs_evict_inode+0x475/0x640
+       evict+0xbf/0x1b0
+       btrfs_run_delayed_iputs+0x6c/0x90
+       cleaner_kthread+0x124/0x1a0
+       kthread+0x106/0x140
+       ret_from_fork+0x3a/0x50
+
+-> #2 (&fs_info->cleaner_delayed_iput_mutex){+.+.}:
+       lock_acquire+0xbd/0x220
+       __mutex_lock+0x86/0xa10
+       btrfs_alloc_data_chunk_ondemand+0x197/0x530
+       btrfs_check_data_free_space+0x4c/0x90
+       btrfs_delalloc_reserve_space+0x20/0x60
+       btrfs_page_mkwrite+0x87/0x520
+       do_page_mkwrite+0x31/0xa0
+       __handle_mm_fault+0x799/0xb00
+       handle_mm_fault+0x7c/0xe0
+       __do_page_fault+0x1d3/0x4a0
+       async_page_fault+0x1e/0x30
+
+-> #1 (sb_pagefaults){.+.+}:
+       lock_acquire+0xbd/0x220
+       __sb_start_write+0x14d/0x230
+       btrfs_page_mkwrite+0x6a/0x520
+       do_page_mkwrite+0x31/0xa0
+       __handle_mm_fault+0x799/0xb00
+       handle_mm_fault+0x7c/0xe0
+       __do_page_fault+0x1d3/0x4a0
+       async_page_fault+0x1e/0x30
+
+-> #0 (&mm->mmap_sem){++++}:
+       __lock_acquire+0x42e/0x7a0
+       lock_acquire+0xbd/0x220
+       down_read+0x48/0xb0
+       get_user_pages_unlocked+0x5a/0x1e0
+       get_user_pages_fast+0xa4/0x150
+       iov_iter_get_pages+0xc3/0x340
+       do_direct_IO+0xf93/0x1d70
+       __blockdev_direct_IO+0x32d/0x1c20
+       btrfs_direct_IO+0x227/0x400
+       generic_file_direct_write+0xcf/0x180
+       btrfs_file_write_iter+0x308/0x58c
+       aio_write+0xf8/0x1d0
+       io_submit_one+0x3a9/0x620
+       __ia32_compat_sys_io_submit+0xb2/0x270
+       do_int80_syscall_32+0x5b/0x1a0
+       entry_INT80_compat+0x88/0xa0
+
+other info that might help us debug this:
+
+Chain exists of:
+  &mm->mmap_sem --> &ei->log_mutex --> &ei->dio_sem
+
+ Possible unsafe locking scenario:
+
+       CPU0                    CPU1
+       ----                    ----
+  lock(&ei->dio_sem);
+                               lock(&ei->log_mutex);
+                               lock(&ei->dio_sem);
+  lock(&mm->mmap_sem);
+
+ *** DEADLOCK ***
+
+1 lock held by aio-dio-invalid/30928:
+ #0: 00000000cefe6b35 (&ei->dio_sem){++++}, at: btrfs_direct_IO+0x3be/0x400
+
+stack backtrace:
+CPU: 0 PID: 30928 Comm: aio-dio-invalid Not tainted 4.18.0-rc4-xfstests-00025-g5de5edbaf1d4 #411
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014
+Call Trace:
+ dump_stack+0x7c/0xbb
+ print_circular_bug.isra.37+0x297/0x2a4
+ check_prev_add.constprop.45+0x781/0x7a0
+ ? __lock_acquire+0x42e/0x7a0
+ validate_chain.isra.41+0x7f0/0xb00
+ __lock_acquire+0x42e/0x7a0
+ lock_acquire+0xbd/0x220
+ ? get_user_pages_unlocked+0x5a/0x1e0
+ down_read+0x48/0xb0
+ ? get_user_pages_unlocked+0x5a/0x1e0
+ get_user_pages_unlocked+0x5a/0x1e0
+ get_user_pages_fast+0xa4/0x150
+ iov_iter_get_pages+0xc3/0x340
+ do_direct_IO+0xf93/0x1d70
+ ? __alloc_workqueue_key+0x358/0x490
+ ? __blockdev_direct_IO+0x14b/0x1c20
+ __blockdev_direct_IO+0x32d/0x1c20
+ ? btrfs_run_delalloc_work+0x40/0x40
+ ? can_nocow_extent+0x490/0x490
+ ? kvm_clock_read+0x1f/0x30
+ ? can_nocow_extent+0x490/0x490
+ ? btrfs_run_delalloc_work+0x40/0x40
+ btrfs_direct_IO+0x227/0x400
+ ? btrfs_run_delalloc_work+0x40/0x40
+ generic_file_direct_write+0xcf/0x180
+ btrfs_file_write_iter+0x308/0x58c
+ aio_write+0xf8/0x1d0
+ ? kvm_clock_read+0x1f/0x30
+ ? __might_fault+0x3e/0x90
+ io_submit_one+0x3a9/0x620
+ ? io_submit_one+0xe5/0x620
+ __ia32_compat_sys_io_submit+0xb2/0x270
+ do_int80_syscall_32+0x5b/0x1a0
+ entry_INT80_compat+0x88/0xa0
+
+CC: stable@vger.kernel.org # 4.14+
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/file.c     |   12 ++++++++++++
+ fs/btrfs/tree-log.c |    2 --
+ 2 files changed, 12 insertions(+), 2 deletions(-)
+
+--- a/fs/btrfs/file.c
++++ b/fs/btrfs/file.c
+@@ -2078,6 +2078,14 @@ int btrfs_sync_file(struct file *file, l
+               goto out;
+       inode_lock(inode);
++
++      /*
++       * We take the dio_sem here because the tree log stuff can race with
++       * lockless dio writes and get an extent map logged for an extent we
++       * never waited on.  We need it this high up for lockdep reasons.
++       */
++      down_write(&BTRFS_I(inode)->dio_sem);
++
+       atomic_inc(&root->log_batch);
+       /*
+@@ -2086,6 +2094,7 @@ int btrfs_sync_file(struct file *file, l
+        */
+       ret = btrfs_wait_ordered_range(inode, start, len);
+       if (ret) {
++              up_write(&BTRFS_I(inode)->dio_sem);
+               inode_unlock(inode);
+               goto out;
+       }
+@@ -2109,6 +2118,7 @@ int btrfs_sync_file(struct file *file, l
+                * checked called fsync.
+                */
+               ret = filemap_check_wb_err(inode->i_mapping, file->f_wb_err);
++              up_write(&BTRFS_I(inode)->dio_sem);
+               inode_unlock(inode);
+               goto out;
+       }
+@@ -2127,6 +2137,7 @@ int btrfs_sync_file(struct file *file, l
+       trans = btrfs_start_transaction(root, 0);
+       if (IS_ERR(trans)) {
+               ret = PTR_ERR(trans);
++              up_write(&BTRFS_I(inode)->dio_sem);
+               inode_unlock(inode);
+               goto out;
+       }
+@@ -2148,6 +2159,7 @@ int btrfs_sync_file(struct file *file, l
+        * file again, but that will end up using the synchronization
+        * inside btrfs_sync_log to keep things safe.
+        */
++      up_write(&BTRFS_I(inode)->dio_sem);
+       inode_unlock(inode);
+       /*
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -4393,7 +4393,6 @@ static int btrfs_log_changed_extents(str
+       INIT_LIST_HEAD(&extents);
+-      down_write(&inode->dio_sem);
+       write_lock(&tree->lock);
+       test_gen = root->fs_info->last_trans_committed;
+       logged_start = start;
+@@ -4459,7 +4458,6 @@ process:
+       }
+       WARN_ON(!list_empty(&extents));
+       write_unlock(&tree->lock);
+-      up_write(&inode->dio_sem);
+       btrfs_release_path(path);
+       if (!ret)
diff --git a/queue-4.19/btrfs-only-free-reserved-extent-if-we-didn-t-insert-it.patch b/queue-4.19/btrfs-only-free-reserved-extent-if-we-didn-t-insert-it.patch
new file mode 100644 (file)
index 0000000..ff6a7b7
--- /dev/null
@@ -0,0 +1,66 @@
+From 49940bdd57779c78462da7aa5a8650b2fea8c2ff Mon Sep 17 00:00:00 2001
+From: Josef Bacik <josef@toxicpanda.com>
+Date: Thu, 11 Oct 2018 15:54:21 -0400
+Subject: btrfs: only free reserved extent if we didn't insert it
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+commit 49940bdd57779c78462da7aa5a8650b2fea8c2ff upstream.
+
+When we insert the file extent once the ordered extent completes we free
+the reserved extent reservation as it'll have been migrated to the
+bytes_used counter.  However if we error out after this step we'll still
+clear the reserved extent reservation, resulting in a negative
+accounting of the reserved bytes for the block group and space info.
+Fix this by only doing the free if we didn't successfully insert a file
+extent for this extent.
+
+CC: stable@vger.kernel.org # 4.14+
+Reviewed-by: Omar Sandoval <osandov@fb.com>
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/inode.c |   10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -2945,6 +2945,7 @@ static int btrfs_finish_ordered_io(struc
+       bool truncated = false;
+       bool range_locked = false;
+       bool clear_new_delalloc_bytes = false;
++      bool clear_reserved_extent = true;
+       if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
+           !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags) &&
+@@ -3048,10 +3049,12 @@ static int btrfs_finish_ordered_io(struc
+                                               logical_len, logical_len,
+                                               compress_type, 0, 0,
+                                               BTRFS_FILE_EXTENT_REG);
+-              if (!ret)
++              if (!ret) {
++                      clear_reserved_extent = false;
+                       btrfs_release_delalloc_bytes(fs_info,
+                                                    ordered_extent->start,
+                                                    ordered_extent->disk_len);
++              }
+       }
+       unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
+                          ordered_extent->file_offset, ordered_extent->len,
+@@ -3112,8 +3115,13 @@ out:
+                * wrong we need to return the space for this ordered extent
+                * back to the allocator.  We only free the extent in the
+                * truncated case if we didn't write out the extent at all.
++               *
++               * If we made it past insert_reserved_file_extent before we
++               * errored out then we don't need to do this as the accounting
++               * has already been done.
+                */
+               if ((ret || !logical_len) &&
++                  clear_reserved_extent &&
+                   !test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
+                   !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags))
+                       btrfs_free_reserved_extent(fs_info,
diff --git a/queue-4.19/btrfs-reset-max_extent_size-properly.patch b/queue-4.19/btrfs-reset-max_extent_size-properly.patch
new file mode 100644 (file)
index 0000000..03dbeb0
--- /dev/null
@@ -0,0 +1,42 @@
+From 21a94f7acf0f748599ea552af5d9ee7d7e41c72f Mon Sep 17 00:00:00 2001
+From: Josef Bacik <josef@toxicpanda.com>
+Date: Thu, 11 Oct 2018 15:54:03 -0400
+Subject: btrfs: reset max_extent_size properly
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+commit 21a94f7acf0f748599ea552af5d9ee7d7e41c72f upstream.
+
+If we use up our block group before allocating a new one we'll easily
+get a max_extent_size that's set really really low, which will result in
+a lot of fragmentation.  We need to make sure we're resetting the
+max_extent_size when we add a new chunk or add new space.
+
+CC: stable@vger.kernel.org # 4.4+
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/extent-tree.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -4530,6 +4530,7 @@ static int do_chunk_alloc(struct btrfs_t
+                       goto out;
+       } else {
+               ret = 1;
++              space_info->max_extent_size = 0;
+       }
+       space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
+@@ -6431,6 +6432,7 @@ static int btrfs_free_reserved_bytes(str
+               space_info->bytes_readonly += num_bytes;
+       cache->reserved -= num_bytes;
+       space_info->bytes_reserved -= num_bytes;
++      space_info->max_extent_size = 0;
+       if (delalloc)
+               cache->delalloc_bytes -= num_bytes;
diff --git a/queue-4.19/btrfs-set-max_extent_size-properly.patch b/queue-4.19/btrfs-set-max_extent_size-properly.patch
new file mode 100644 (file)
index 0000000..886151d
--- /dev/null
@@ -0,0 +1,97 @@
+From ad22cf6ea47fa20fbe11ac324a0a15c0a9a4a2a9 Mon Sep 17 00:00:00 2001
+From: Josef Bacik <jbacik@fb.com>
+Date: Fri, 12 Oct 2018 15:32:33 -0400
+Subject: btrfs: set max_extent_size properly
+
+From: Josef Bacik <jbacik@fb.com>
+
+commit ad22cf6ea47fa20fbe11ac324a0a15c0a9a4a2a9 upstream.
+
+We can't use entry->bytes if our entry is a bitmap entry, we need to use
+entry->max_extent_size in that case.  Fix up all the logic to make this
+consistent.
+
+CC: stable@vger.kernel.org # 4.4+
+Signed-off-by: Josef Bacik <jbacik@fb.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/free-space-cache.c |   30 ++++++++++++++++++++----------
+ 1 file changed, 20 insertions(+), 10 deletions(-)
+
+--- a/fs/btrfs/free-space-cache.c
++++ b/fs/btrfs/free-space-cache.c
+@@ -1772,6 +1772,13 @@ static int search_bitmap(struct btrfs_fr
+       return -1;
+ }
++static inline u64 get_max_extent_size(struct btrfs_free_space *entry)
++{
++      if (entry->bitmap)
++              return entry->max_extent_size;
++      return entry->bytes;
++}
++
+ /* Cache the size of the max extent in bytes */
+ static struct btrfs_free_space *
+ find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes,
+@@ -1793,8 +1800,8 @@ find_free_space(struct btrfs_free_space_
+       for (node = &entry->offset_index; node; node = rb_next(node)) {
+               entry = rb_entry(node, struct btrfs_free_space, offset_index);
+               if (entry->bytes < *bytes) {
+-                      if (entry->bytes > *max_extent_size)
+-                              *max_extent_size = entry->bytes;
++                      *max_extent_size = max(get_max_extent_size(entry),
++                                             *max_extent_size);
+                       continue;
+               }
+@@ -1812,8 +1819,8 @@ find_free_space(struct btrfs_free_space_
+               }
+               if (entry->bytes < *bytes + align_off) {
+-                      if (entry->bytes > *max_extent_size)
+-                              *max_extent_size = entry->bytes;
++                      *max_extent_size = max(get_max_extent_size(entry),
++                                             *max_extent_size);
+                       continue;
+               }
+@@ -1825,8 +1832,10 @@ find_free_space(struct btrfs_free_space_
+                               *offset = tmp;
+                               *bytes = size;
+                               return entry;
+-                      } else if (size > *max_extent_size) {
+-                              *max_extent_size = size;
++                      } else {
++                              *max_extent_size =
++                                      max(get_max_extent_size(entry),
++                                          *max_extent_size);
+                       }
+                       continue;
+               }
+@@ -2686,8 +2695,8 @@ static u64 btrfs_alloc_from_bitmap(struc
+       err = search_bitmap(ctl, entry, &search_start, &search_bytes, true);
+       if (err) {
+-              if (search_bytes > *max_extent_size)
+-                      *max_extent_size = search_bytes;
++              *max_extent_size = max(get_max_extent_size(entry),
++                                     *max_extent_size);
+               return 0;
+       }
+@@ -2724,8 +2733,9 @@ u64 btrfs_alloc_from_cluster(struct btrf
+       entry = rb_entry(node, struct btrfs_free_space, offset_index);
+       while (1) {
+-              if (entry->bytes < bytes && entry->bytes > *max_extent_size)
+-                      *max_extent_size = entry->bytes;
++              if (entry->bytes < bytes)
++                      *max_extent_size = max(get_max_extent_size(entry),
++                                             *max_extent_size);
+               if (entry->bytes < bytes ||
+                   (!entry->bitmap && entry->offset < min_start)) {
diff --git a/queue-4.19/net-sched-remove-tca_options-from-policy.patch b/queue-4.19/net-sched-remove-tca_options-from-policy.patch
new file mode 100644 (file)
index 0000000..ada038e
--- /dev/null
@@ -0,0 +1,36 @@
+From e72bde6b66299602087c8c2350d36a525e75d06e Mon Sep 17 00:00:00 2001
+From: David Ahern <dsahern@gmail.com>
+Date: Wed, 24 Oct 2018 08:32:49 -0700
+Subject: net: sched: Remove TCA_OPTIONS from policy
+
+From: David Ahern <dsahern@gmail.com>
+
+commit e72bde6b66299602087c8c2350d36a525e75d06e upstream.
+
+Marco reported an error with hfsc:
+root@Calimero:~# tc qdisc add dev eth0 root handle 1:0 hfsc default 1
+Error: Attribute failed policy validation.
+
+Apparently a few implementations pass TCA_OPTIONS as a binary instead
+of nested attribute, so drop TCA_OPTIONS from the policy.
+
+Fixes: 8b4c3cdd9dd8 ("net: sched: Add policy validation for tc attributes")
+Reported-by: Marco Berizzi <pupilla@libero.it>
+Signed-off-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/sched/sch_api.c |    1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/net/sched/sch_api.c
++++ b/net/sched/sch_api.c
+@@ -1309,7 +1309,6 @@ check_loop_fn(struct Qdisc *q, unsigned
+ const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = {
+       [TCA_KIND]              = { .type = NLA_STRING },
+-      [TCA_OPTIONS]           = { .type = NLA_NESTED },
+       [TCA_RATE]              = { .type = NLA_BINARY,
+                                   .len = sizeof(struct tc_estimator) },
+       [TCA_STAB]              = { .type = NLA_NESTED },
index a3a000968684e220e9febeaeb7de6b76ce165264..a21d1a3364352de47345d18548d2e1f3c3f400be 100644 (file)
@@ -342,3 +342,20 @@ btrfs-make-sure-we-create-all-new-block-groups.patch
 btrfs-fix-warning-when-replaying-log-after-fsync-of-a-tmpfile.patch
 btrfs-fix-wrong-dentries-after-fsync-of-file-that-got-its-parent-replaced.patch
 btrfs-qgroup-dirty-all-qgroups-before-rescan.patch
+btrfs-fix-null-pointer-dereference-on-compressed-write-path-error.patch
+btrfs-fix-assertion-on-fsync-of-regular-file-when-using-no-holes-feature.patch
+btrfs-fix-deadlock-when-writing-out-free-space-caches.patch
+btrfs-reset-max_extent_size-properly.patch
+btrfs-set-max_extent_size-properly.patch
+btrfs-don-t-use-ctl-free_space-for-max_extent_size.patch
+btrfs-only-free-reserved-extent-if-we-didn-t-insert-it.patch
+btrfs-fix-insert_reserved-error-handling.patch
+btrfs-don-t-run-delayed_iputs-in-commit.patch
+btrfs-move-the-dio_sem-higher-up-the-callchain.patch
+btrfs-fix-use-after-free-during-inode-eviction.patch
+btrfs-fix-use-after-free-when-dumping-free-space.patch
+net-sched-remove-tca_options-from-policy.patch
+vt-fix-broken-display-when-running-aptitude.patch
+userns-also-map-extents-in-the-reverse-map-to-kernel-ids.patch
+bpf-wait-for-running-bpf-programs-when-updating-map-in-map.patch
+vga_switcheroo-fix-missing-gpu_bound-call-at-audio-client-registration.patch
diff --git a/queue-4.19/userns-also-map-extents-in-the-reverse-map-to-kernel-ids.patch b/queue-4.19/userns-also-map-extents-in-the-reverse-map-to-kernel-ids.patch
new file mode 100644 (file)
index 0000000..9ada3ea
--- /dev/null
@@ -0,0 +1,65 @@
+From d2f007dbe7e4c9583eea6eb04d60001e85c6f1bd Mon Sep 17 00:00:00 2001
+From: Jann Horn <jannh@google.com>
+Date: Mon, 5 Nov 2018 20:55:09 +0100
+Subject: userns: also map extents in the reverse map to kernel IDs
+
+From: Jann Horn <jannh@google.com>
+
+commit d2f007dbe7e4c9583eea6eb04d60001e85c6f1bd upstream.
+
+The current logic first clones the extent array and sorts both copies, then
+maps the lower IDs of the forward mapping into the lower namespace, but
+doesn't map the lower IDs of the reverse mapping.
+
+This means that code in a nested user namespace with >5 extents will see
+incorrect IDs. It also breaks some access checks, like
+inode_owner_or_capable() and privileged_wrt_inode_uidgid(), so a process
+can incorrectly appear to be capable relative to an inode.
+
+To fix it, we have to make sure that the "lower_first" members of extents
+in both arrays are translated; and we have to make sure that the reverse
+map is sorted *after* the translation (since otherwise the translation can
+break the sorting).
+
+This is CVE-2018-18955.
+
+Fixes: 6397fac4915a ("userns: bump idmap limits to 340")
+Cc: stable@vger.kernel.org
+Signed-off-by: Jann Horn <jannh@google.com>
+Tested-by: Eric W. Biederman <ebiederm@xmission.com>
+Reviewed-by: Eric W. Biederman <ebiederm@xmission.com>
+Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/user_namespace.c |   12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+--- a/kernel/user_namespace.c
++++ b/kernel/user_namespace.c
+@@ -974,10 +974,6 @@ static ssize_t map_write(struct file *fi
+       if (!new_idmap_permitted(file, ns, cap_setid, &new_map))
+               goto out;
+-      ret = sort_idmaps(&new_map);
+-      if (ret < 0)
+-              goto out;
+-
+       ret = -EPERM;
+       /* Map the lower ids from the parent user namespace to the
+        * kernel global id space.
+@@ -1004,6 +1000,14 @@ static ssize_t map_write(struct file *fi
+               e->lower_first = lower_first;
+       }
++      /*
++       * If we want to use binary search for lookup, this clones the extent
++       * array and sorts both copies.
++       */
++      ret = sort_idmaps(&new_map);
++      if (ret < 0)
++              goto out;
++
+       /* Install the map */
+       if (new_map.nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS) {
+               memcpy(map->extent, new_map.extent,
diff --git a/queue-4.19/vga_switcheroo-fix-missing-gpu_bound-call-at-audio-client-registration.patch b/queue-4.19/vga_switcheroo-fix-missing-gpu_bound-call-at-audio-client-registration.patch
new file mode 100644 (file)
index 0000000..fe8d7fe
--- /dev/null
@@ -0,0 +1,43 @@
+From fc09ab7a767394f9ecdad84ea6e85d68b83c8e21 Mon Sep 17 00:00:00 2001
+From: Takashi Iwai <tiwai@suse.de>
+Date: Mon, 5 Nov 2018 11:52:50 +0100
+Subject: vga_switcheroo: Fix missing gpu_bound call at audio client registration
+
+From: Takashi Iwai <tiwai@suse.de>
+
+commit fc09ab7a767394f9ecdad84ea6e85d68b83c8e21 upstream.
+
+The commit 37a3a98ef601 ("ALSA: hda - Enable runtime PM only for
+discrete GPU") added a new ops gpu_bound to be called when GPU gets
+bound.  The patch overlooked, however, that vga_switcheroo_enable() is
+called only once at GPU is bound.  When an audio client is registered
+after that point, it would miss the gpu_bound call.  This leads to the
+unexpected lack of runtime PM in HD-audio side.
+
+For addressing that regression, just call gpu_bound callback manually
+at vga_switcheroo_register_audio_client() when the GPU was already
+bound.
+
+Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=201615
+Fixes: 37a3a98ef601 ("ALSA: hda - Enable runtime PM only for discrete GPU")
+Cc: <stable@vger.kernel.org>
+Reviewed-by: Lukas Wunner <lukas@wunner.de>
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpu/vga/vga_switcheroo.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/drivers/gpu/vga/vga_switcheroo.c
++++ b/drivers/gpu/vga/vga_switcheroo.c
+@@ -380,6 +380,9 @@ int vga_switcheroo_register_audio_client
+                       mutex_unlock(&vgasr_mutex);
+                       return -EINVAL;
+               }
++              /* notify if GPU has been already bound */
++              if (ops->gpu_bound)
++                      ops->gpu_bound(pdev, id);
+       }
+       mutex_unlock(&vgasr_mutex);
diff --git a/queue-4.19/vt-fix-broken-display-when-running-aptitude.patch b/queue-4.19/vt-fix-broken-display-when-running-aptitude.patch
new file mode 100644 (file)
index 0000000..7354c02
--- /dev/null
@@ -0,0 +1,35 @@
+From 943210ba807ec50aafa2fa7b13bd6d36a478969b Mon Sep 17 00:00:00 2001
+From: Mikulas Patocka <mpatocka@redhat.com>
+Date: Tue, 23 Oct 2018 11:28:28 -0400
+Subject: vt: fix broken display when running aptitude
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+commit 943210ba807ec50aafa2fa7b13bd6d36a478969b upstream.
+
+If you run aptitude on framebuffer console, the display is corrupted. The
+corruption is caused by the commit d8ae7242. The patch adds "offset" to
+"start" when calling scr_memsetw, but it forgets to do the same addition
+on a subsequent call to do_update_region.
+
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Fixes: d8ae72427187 ("vt: preserve unicode values corresponding to screen characters")
+Reviewed-by: Nicolas Pitre <nico@linaro.org>
+Cc: stable@vger.kernel.org     # 4.19
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/tty/vt/vt.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/tty/vt/vt.c
++++ b/drivers/tty/vt/vt.c
+@@ -1551,7 +1551,7 @@ static void csi_K(struct vc_data *vc, in
+       scr_memsetw(start + offset, vc->vc_video_erase_char, 2 * count);
+       vc->vc_need_wrap = 0;
+       if (con_should_update(vc))
+-              do_update_region(vc, (unsigned long) start, count);
++              do_update_region(vc, (unsigned long)(start + offset), count);
+ }
+ static void csi_X(struct vc_data *vc, int vpar) /* erase the following vpar positions */