From e2faafeb7bdad9aef61fc971a8babfcd756e7d7f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 15 Dec 2019 13:13:11 +0100 Subject: [PATCH] 4.19-stable patches added patches: btrfs-avoid-getting-stuck-during-cyclic-writebacks.patch btrfs-check-page-mapping-when-loading-free-space-cache.patch btrfs-fix-metadata-space-leak-on-fixup-worker-failure-to-set-range-as-delalloc.patch btrfs-fix-negative-subv_writers-counter-and-data-space-leak-after-buffered-write.patch btrfs-record-all-roots-for-rename-exchange-on-a-subvol.patch btrfs-remove-btrfs_bio-flags-member.patch btrfs-send-skip-backreference-walking-for-extents-with-many-references.patch btrfs-use-refcount_inc_not_zero-in-kill_all_nodes.patch dm-writecache-handle-req_fua.patch dm-zoned-reduce-overhead-of-backing-device-checks.patch hwrng-omap-fix-rng-wait-loop-timeout.patch lib-raid6-fix-awk-build-warnings.patch mtd-spear_smi-fix-write-burst-mode.patch ovl-fix-corner-case-of-non-unique-st_dev-st_ino.patch ovl-relax-warn_on-on-rename-to-self.patch phy-renesas-rcar-gen3-usb2-fix-sysfs-interface-of-role.patch rtlwifi-rtl8192de-fix-missing-callback-that-tests-for-hw-release-of-buffer.patch rtlwifi-rtl8192de-fix-missing-code-to-retrieve-rx-buffer-address.patch rtlwifi-rtl8192de-fix-missing-enable-interrupt-flag.patch tpm-add-check-after-commands-attribs-tab-allocation.patch usb-core-urb-fix-urb-structure-initialization-function.patch usb-dwc3-ep0-clear-started-flag-on-completion.patch usb-dwc3-gadget-fix-logical-condition.patch usb-dwc3-pci-add-id-for-the-intel-comet-lake-h-variant.patch usb-mon-fix-a-deadlock-in-usbmon-between-mmap-and-read.patch virtio-balloon-fix-managed-page-counts-when-migrating-pages-between-zones.patch workqueue-fix-pwq-ref-leak-in-rescuer_thread.patch workqueue-fix-spurious-sanity-check-failures-in-destroy_workqueue.patch --- ...tting-stuck-during-cyclic-writebacks.patch | 99 +++++++ ...apping-when-loading-free-space-cache.patch | 76 +++++ ...ker-failure-to-set-range-as-delalloc.patch | 55 ++++ ...data-space-leak-after-buffered-write.patch | 86 ++++++ ...oots-for-rename-exchange-on-a-subvol.patch | 43 +++ .../btrfs-remove-btrfs_bio-flags-member.patch | 36 +++ ...ing-for-extents-with-many-references.patch | 89 ++++++ ...count_inc_not_zero-in-kill_all_nodes.patch | 67 +++++ queue-4.19/dm-writecache-handle-req_fua.patch | 33 +++ ...ce-overhead-of-backing-device-checks.patch | 261 ++++++++++++++++++ ...hwrng-omap-fix-rng-wait-loop-timeout.patch | 49 ++++ .../lib-raid6-fix-awk-build-warnings.patch | 38 +++ .../mtd-spear_smi-fix-write-burst-mode.patch | 107 +++++++ ...ner-case-of-non-unique-st_dev-st_ino.patch | 60 ++++ .../ovl-relax-warn_on-on-rename-to-self.patch | 39 +++ ...en3-usb2-fix-sysfs-interface-of-role.patch | 48 ++++ ...-that-tests-for-hw-release-of-buffer.patch | 72 +++++ ...g-code-to-retrieve-rx-buffer-address.patch | 46 +++ ...de-fix-missing-enable-interrupt-flag.patch | 67 +++++ queue-4.19/series | 28 ++ ...fter-commands-attribs-tab-allocation.patch | 37 +++ ...rb-structure-initialization-function.patch | 34 +++ ...ep0-clear-started-flag-on-completion.patch | 47 ++++ ...sb-dwc3-gadget-fix-logical-condition.patch | 36 +++ ...d-for-the-intel-comet-lake-h-variant.patch | 45 +++ ...lock-in-usbmon-between-mmap-and-read.patch | 104 +++++++ ...s-when-migrating-pages-between-zones.patch | 159 +++++++++++ ...e-fix-pwq-ref-leak-in-rescuer_thread.patch | 60 ++++ ...-check-failures-in-destroy_workqueue.patch | 83 ++++++ 29 files changed, 2004 insertions(+) create mode 100644 queue-4.19/btrfs-avoid-getting-stuck-during-cyclic-writebacks.patch create mode 100644 queue-4.19/btrfs-check-page-mapping-when-loading-free-space-cache.patch create mode 100644 queue-4.19/btrfs-fix-metadata-space-leak-on-fixup-worker-failure-to-set-range-as-delalloc.patch create mode 100644 queue-4.19/btrfs-fix-negative-subv_writers-counter-and-data-space-leak-after-buffered-write.patch create mode 100644 queue-4.19/btrfs-record-all-roots-for-rename-exchange-on-a-subvol.patch create mode 100644 queue-4.19/btrfs-remove-btrfs_bio-flags-member.patch create mode 100644 queue-4.19/btrfs-send-skip-backreference-walking-for-extents-with-many-references.patch create mode 100644 queue-4.19/btrfs-use-refcount_inc_not_zero-in-kill_all_nodes.patch create mode 100644 queue-4.19/dm-writecache-handle-req_fua.patch create mode 100644 queue-4.19/dm-zoned-reduce-overhead-of-backing-device-checks.patch create mode 100644 queue-4.19/hwrng-omap-fix-rng-wait-loop-timeout.patch create mode 100644 queue-4.19/lib-raid6-fix-awk-build-warnings.patch create mode 100644 queue-4.19/mtd-spear_smi-fix-write-burst-mode.patch create mode 100644 queue-4.19/ovl-fix-corner-case-of-non-unique-st_dev-st_ino.patch create mode 100644 queue-4.19/ovl-relax-warn_on-on-rename-to-self.patch create mode 100644 queue-4.19/phy-renesas-rcar-gen3-usb2-fix-sysfs-interface-of-role.patch create mode 100644 queue-4.19/rtlwifi-rtl8192de-fix-missing-callback-that-tests-for-hw-release-of-buffer.patch create mode 100644 queue-4.19/rtlwifi-rtl8192de-fix-missing-code-to-retrieve-rx-buffer-address.patch create mode 100644 queue-4.19/rtlwifi-rtl8192de-fix-missing-enable-interrupt-flag.patch create mode 100644 queue-4.19/tpm-add-check-after-commands-attribs-tab-allocation.patch create mode 100644 queue-4.19/usb-core-urb-fix-urb-structure-initialization-function.patch create mode 100644 queue-4.19/usb-dwc3-ep0-clear-started-flag-on-completion.patch create mode 100644 queue-4.19/usb-dwc3-gadget-fix-logical-condition.patch create mode 100644 queue-4.19/usb-dwc3-pci-add-id-for-the-intel-comet-lake-h-variant.patch create mode 100644 queue-4.19/usb-mon-fix-a-deadlock-in-usbmon-between-mmap-and-read.patch create mode 100644 queue-4.19/virtio-balloon-fix-managed-page-counts-when-migrating-pages-between-zones.patch create mode 100644 queue-4.19/workqueue-fix-pwq-ref-leak-in-rescuer_thread.patch create mode 100644 queue-4.19/workqueue-fix-spurious-sanity-check-failures-in-destroy_workqueue.patch diff --git a/queue-4.19/btrfs-avoid-getting-stuck-during-cyclic-writebacks.patch b/queue-4.19/btrfs-avoid-getting-stuck-during-cyclic-writebacks.patch new file mode 100644 index 00000000000..aee166297e9 --- /dev/null +++ b/queue-4.19/btrfs-avoid-getting-stuck-during-cyclic-writebacks.patch @@ -0,0 +1,99 @@ +From f7bddf1e27d18fbc7d3e3056ba449cfbe4e20b0a Mon Sep 17 00:00:00 2001 +From: Tejun Heo +Date: Thu, 3 Oct 2019 07:27:13 -0700 +Subject: btrfs: Avoid getting stuck during cyclic writebacks + +From: Tejun Heo + +commit f7bddf1e27d18fbc7d3e3056ba449cfbe4e20b0a upstream. + +During a cyclic writeback, extent_write_cache_pages() uses done_index +to update the writeback_index after the current run is over. However, +instead of current index + 1, it gets to to the current index itself. + +Unfortunately, this, combined with returning on EOF instead of looping +back, can lead to the following pathlogical behavior. + +1. There is a single file which has accumulated enough dirty pages to + trigger balance_dirty_pages() and the writer appending to the file + with a series of short writes. + +2. balance_dirty_pages kicks in, wakes up background writeback and sleeps. + +3. Writeback kicks in and the cursor is on the last page of the dirty + file. Writeback is started or skipped if already in progress. As + it's EOF, extent_write_cache_pages() returns and the cursor is set + to done_index which is pointing to the last page. + +4. Writeback is done. Nothing happens till balance_dirty_pages + finishes, at which point we go back to #1. + +This can almost completely stall out writing back of the file and keep +the system over dirty threshold for a long time which can mess up the +whole system. We encountered this issue in production with a package +handling application which can reliably reproduce the issue when +running under tight memory limits. + +Reading the comment in the error handling section, this seems to be to +avoid accidentally skipping a page in case the write attempt on the +page doesn't succeed. However, this concern seems bogus. + +On each page, the code either: + +* Skips and moves onto the next page. + +* Fails issue and sets done_index to index + 1. + +* Successfully issues and continue to the next page if budget allows + and not EOF. + +IOW, as long as it's not EOF and there's budget, the code never +retries writing back the same page. Only when a page happens to be +the last page of a particular run, we end up retrying the page, which +can't possibly guarantee anything data integrity related. Besides, +cyclic writes are only used for non-syncing writebacks meaning that +there's no data integrity implication to begin with. + +Fix it by always setting done_index past the current page being +processed. + +Note that this problem exists in other writepages too. + +CC: stable@vger.kernel.org # 4.19+ +Signed-off-by: Tejun Heo +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/extent_io.c | 12 +----------- + 1 file changed, 1 insertion(+), 11 deletions(-) + +--- a/fs/btrfs/extent_io.c ++++ b/fs/btrfs/extent_io.c +@@ -3956,7 +3956,7 @@ retry: + for (i = 0; i < nr_pages; i++) { + struct page *page = pvec.pages[i]; + +- done_index = page->index; ++ done_index = page->index + 1; + /* + * At this point we hold neither the i_pages lock nor + * the page lock: the page may be truncated or +@@ -3993,16 +3993,6 @@ retry: + ret = 0; + } + if (ret < 0) { +- /* +- * done_index is set past this page, +- * so media errors will not choke +- * background writeout for the entire +- * file. This has consequences for +- * range_cyclic semantics (ie. it may +- * not be suitable for data integrity +- * writeout). +- */ +- done_index = page->index + 1; + done = 1; + break; + } diff --git a/queue-4.19/btrfs-check-page-mapping-when-loading-free-space-cache.patch b/queue-4.19/btrfs-check-page-mapping-when-loading-free-space-cache.patch new file mode 100644 index 00000000000..b6e8b404712 --- /dev/null +++ b/queue-4.19/btrfs-check-page-mapping-when-loading-free-space-cache.patch @@ -0,0 +1,76 @@ +From 3797136b626ad4b6582223660c041efdea8f26b2 Mon Sep 17 00:00:00 2001 +From: Josef Bacik +Date: Tue, 24 Sep 2019 16:50:43 -0400 +Subject: btrfs: check page->mapping when loading free space cache + +From: Josef Bacik + +commit 3797136b626ad4b6582223660c041efdea8f26b2 upstream. + +While testing 5.2 we ran into the following panic + +[52238.017028] BUG: kernel NULL pointer dereference, address: 0000000000000001 +[52238.105608] RIP: 0010:drop_buffers+0x3d/0x150 +[52238.304051] Call Trace: +[52238.308958] try_to_free_buffers+0x15b/0x1b0 +[52238.317503] shrink_page_list+0x1164/0x1780 +[52238.325877] shrink_inactive_list+0x18f/0x3b0 +[52238.334596] shrink_node_memcg+0x23e/0x7d0 +[52238.342790] ? do_shrink_slab+0x4f/0x290 +[52238.350648] shrink_node+0xce/0x4a0 +[52238.357628] balance_pgdat+0x2c7/0x510 +[52238.365135] kswapd+0x216/0x3e0 +[52238.371425] ? wait_woken+0x80/0x80 +[52238.378412] ? balance_pgdat+0x510/0x510 +[52238.386265] kthread+0x111/0x130 +[52238.392727] ? kthread_create_on_node+0x60/0x60 +[52238.401782] ret_from_fork+0x1f/0x30 + +The page we were trying to drop had a page->private, but had no +page->mapping and so called drop_buffers, assuming that we had a +buffer_head on the page, and then panic'ed trying to deref 1, which is +our page->private for data pages. + +This is happening because we're truncating the free space cache while +we're trying to load the free space cache. This isn't supposed to +happen, and I'll fix that in a followup patch. However we still +shouldn't allow those sort of mistakes to result in messing with pages +that do not belong to us. So add the page->mapping check to verify that +we still own this page after dropping and re-acquiring the page lock. + +This page being unlocked as: +btrfs_readpage + extent_read_full_page + __extent_read_full_page + __do_readpage + if (!nr) + unlock_page <-- nr can be 0 only if submit_extent_page + returns an error + +CC: stable@vger.kernel.org # 4.4+ +Reviewed-by: Filipe Manana +Reviewed-by: Nikolay Borisov +Signed-off-by: Josef Bacik +[ add callchain ] +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/free-space-cache.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/fs/btrfs/free-space-cache.c ++++ b/fs/btrfs/free-space-cache.c +@@ -382,6 +382,12 @@ static int io_ctl_prepare_pages(struct b + if (uptodate && !PageUptodate(page)) { + btrfs_readpage(NULL, page); + lock_page(page); ++ if (page->mapping != inode->i_mapping) { ++ btrfs_err(BTRFS_I(inode)->root->fs_info, ++ "free space cache page truncated"); ++ io_ctl_drop_pages(io_ctl); ++ return -EIO; ++ } + if (!PageUptodate(page)) { + btrfs_err(BTRFS_I(inode)->root->fs_info, + "error reading free space cache"); diff --git a/queue-4.19/btrfs-fix-metadata-space-leak-on-fixup-worker-failure-to-set-range-as-delalloc.patch b/queue-4.19/btrfs-fix-metadata-space-leak-on-fixup-worker-failure-to-set-range-as-delalloc.patch new file mode 100644 index 00000000000..a861c6c24d4 --- /dev/null +++ b/queue-4.19/btrfs-fix-metadata-space-leak-on-fixup-worker-failure-to-set-range-as-delalloc.patch @@ -0,0 +1,55 @@ +From 536870071dbc4278264f59c9a2f5f447e584d139 Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Wed, 9 Oct 2019 17:43:59 +0100 +Subject: Btrfs: fix metadata space leak on fixup worker failure to set range as delalloc + +From: Filipe Manana + +commit 536870071dbc4278264f59c9a2f5f447e584d139 upstream. + +In the fixup worker, if we fail to mark the range as delalloc in the io +tree, we must release the previously reserved metadata, as well as update +the outstanding extents counter for the inode, otherwise we leak metadata +space. + +In pratice we can't return an error from btrfs_set_extent_delalloc(), +which is just a wrapper around __set_extent_bit(), as for most errors +__set_extent_bit() does a BUG_ON() (or panics which hits a BUG_ON() as +well) and returning an -EEXIST error doesn't happen in this case since +the exclusive bits parameter always has a value of 0 through this code +path. Nevertheless, just fix the error handling in the fixup worker, +in case one day __set_extent_bit() can return an error to this code +path. + +Fixes: f3038ee3a3f101 ("btrfs: Handle btrfs_set_extent_delalloc failure in fixup worker") +CC: stable@vger.kernel.org # 4.19+ +Reviewed-by: Nikolay Borisov +Signed-off-by: Filipe Manana +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/inode.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -2161,12 +2161,16 @@ again: + mapping_set_error(page->mapping, ret); + end_extent_writepage(page, ret, page_start, page_end); + ClearPageChecked(page); +- goto out; ++ goto out_reserved; + } + + ClearPageChecked(page); + set_page_dirty(page); ++out_reserved: + btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE); ++ if (ret) ++ btrfs_delalloc_release_space(inode, data_reserved, page_start, ++ PAGE_SIZE, true); + out: + unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end, + &cached_state); diff --git a/queue-4.19/btrfs-fix-negative-subv_writers-counter-and-data-space-leak-after-buffered-write.patch b/queue-4.19/btrfs-fix-negative-subv_writers-counter-and-data-space-leak-after-buffered-write.patch new file mode 100644 index 00000000000..ee3e9b50d4c --- /dev/null +++ b/queue-4.19/btrfs-fix-negative-subv_writers-counter-and-data-space-leak-after-buffered-write.patch @@ -0,0 +1,86 @@ +From a0e248bb502d5165b3314ac3819e888fdcdf7d9f Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Fri, 11 Oct 2019 16:41:20 +0100 +Subject: Btrfs: fix negative subv_writers counter and data space leak after buffered write + +From: Filipe Manana + +commit a0e248bb502d5165b3314ac3819e888fdcdf7d9f upstream. + +When doing a buffered write it's possible to leave the subv_writers +counter of the root, used for synchronization between buffered nocow +writers and snapshotting. This happens in an exceptional case like the +following: + +1) We fail to allocate data space for the write, since there's not + enough available data space nor enough unallocated space for allocating + a new data block group; + +2) Because of that failure, we try to go to NOCOW mode, which succeeds + and therefore we set the local variable 'only_release_metadata' to true + and set the root's sub_writers counter to 1 through the call to + btrfs_start_write_no_snapshotting() made by check_can_nocow(); + +3) The call to btrfs_copy_from_user() returns zero, which is very unlikely + to happen but not impossible; + +4) No pages are copied because btrfs_copy_from_user() returned zero; + +5) We call btrfs_end_write_no_snapshotting() which decrements the root's + subv_writers counter to 0; + +6) We don't set 'only_release_metadata' back to 'false' because we do + it only if 'copied', the value returned by btrfs_copy_from_user(), is + greater than zero; + +7) On the next iteration of the while loop, which processes the same + page range, we are now able to allocate data space for the write (we + got enough data space released in the meanwhile); + +8) After this if we fail at btrfs_delalloc_reserve_metadata(), because + now there isn't enough free metadata space, or in some other place + further below (prepare_pages(), lock_and_cleanup_extent_if_need(), + btrfs_dirty_pages()), we break out of the while loop with + 'only_release_metadata' having a value of 'true'; + +9) Because 'only_release_metadata' is 'true' we end up decrementing the + root's subv_writers counter to -1 (through a call to + btrfs_end_write_no_snapshotting()), and we also end up not releasing the + data space previously reserved through btrfs_check_data_free_space(). + As a consequence the mechanism for synchronizing NOCOW buffered writes + with snapshotting gets broken. + +Fix this by always setting 'only_release_metadata' to false at the start +of each iteration. + +Fixes: 8257b2dc3c1a ("Btrfs: introduce btrfs_{start, end}_nocow_write() for each subvolume") +Fixes: 7ee9e4405f26 ("Btrfs: check if we can nocow if we don't have data space") +CC: stable@vger.kernel.org # 4.4+ +Reviewed-by: Josef Bacik +Signed-off-by: Filipe Manana +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/file.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/btrfs/file.c ++++ b/fs/btrfs/file.c +@@ -1636,6 +1636,7 @@ static noinline ssize_t btrfs_buffered_w + break; + } + ++ only_release_metadata = false; + sector_offset = pos & (fs_info->sectorsize - 1); + reserve_bytes = round_up(write_bytes + sector_offset, + fs_info->sectorsize); +@@ -1791,7 +1792,6 @@ again: + set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, + lockend, EXTENT_NORESERVE, NULL, + NULL, GFP_NOFS); +- only_release_metadata = false; + } + + btrfs_drop_pages(pages, num_pages); diff --git a/queue-4.19/btrfs-record-all-roots-for-rename-exchange-on-a-subvol.patch b/queue-4.19/btrfs-record-all-roots-for-rename-exchange-on-a-subvol.patch new file mode 100644 index 00000000000..2a6566b9a2b --- /dev/null +++ b/queue-4.19/btrfs-record-all-roots-for-rename-exchange-on-a-subvol.patch @@ -0,0 +1,43 @@ +From 3e1740993e43116b3bc71b0aad1e6872f6ccf341 Mon Sep 17 00:00:00 2001 +From: Josef Bacik +Date: Fri, 15 Nov 2019 15:43:06 -0500 +Subject: btrfs: record all roots for rename exchange on a subvol + +From: Josef Bacik + +commit 3e1740993e43116b3bc71b0aad1e6872f6ccf341 upstream. + +Testing with the new fsstress support for subvolumes uncovered a pretty +bad problem with rename exchange on subvolumes. We're modifying two +different subvolumes, but we only start the transaction on one of them, +so the other one is not added to the dirty root list. This is caught by +btrfs_cow_block() with a warning because the root has not been updated, +however if we do not modify this root again we'll end up pointing at an +invalid root because the root item is never updated. + +Fix this by making sure we add the destination root to the trans list, +the same as we do with normal renames. This fixes the corruption. + +Fixes: cdd1fedf8261 ("btrfs: add support for RENAME_EXCHANGE and RENAME_WHITEOUT") +CC: stable@vger.kernel.org # 4.9+ +Reviewed-by: Filipe Manana +Signed-off-by: Josef Bacik +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/inode.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -9510,6 +9510,9 @@ static int btrfs_rename_exchange(struct + goto out_notrans; + } + ++ if (dest != root) ++ btrfs_record_root_in_trans(trans, dest); ++ + /* + * We need to find a free sequence number both in the source and + * in the destination directory for the exchange. diff --git a/queue-4.19/btrfs-remove-btrfs_bio-flags-member.patch b/queue-4.19/btrfs-remove-btrfs_bio-flags-member.patch new file mode 100644 index 00000000000..b99786f001d --- /dev/null +++ b/queue-4.19/btrfs-remove-btrfs_bio-flags-member.patch @@ -0,0 +1,36 @@ +From 34b127aecd4fe8e6a3903e10f204a7b7ffddca22 Mon Sep 17 00:00:00 2001 +From: Qu Wenruo +Date: Thu, 24 Oct 2019 09:38:29 +0800 +Subject: btrfs: Remove btrfs_bio::flags member + +From: Qu Wenruo + +commit 34b127aecd4fe8e6a3903e10f204a7b7ffddca22 upstream. + +The last user of btrfs_bio::flags was removed in commit 326e1dbb5736 +("block: remove management of bi_remaining when restoring original +bi_end_io"), remove it. + +(Tagged for stable as the structure is heavily used and space savings +are desirable.) + +CC: stable@vger.kernel.org # 4.4+ +Signed-off-by: Qu Wenruo +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/volumes.h | 1 - + 1 file changed, 1 deletion(-) + +--- a/fs/btrfs/volumes.h ++++ b/fs/btrfs/volumes.h +@@ -304,7 +304,6 @@ struct btrfs_bio { + u64 map_type; /* get from map_lookup->type */ + bio_end_io_t *end_io; + struct bio *orig_bio; +- unsigned long flags; + void *private; + atomic_t error; + int max_errors; diff --git a/queue-4.19/btrfs-send-skip-backreference-walking-for-extents-with-many-references.patch b/queue-4.19/btrfs-send-skip-backreference-walking-for-extents-with-many-references.patch new file mode 100644 index 00000000000..da9e9e197b6 --- /dev/null +++ b/queue-4.19/btrfs-send-skip-backreference-walking-for-extents-with-many-references.patch @@ -0,0 +1,89 @@ +From fd0ddbe2509568b00df364156f47561e9f469f15 Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Wed, 30 Oct 2019 12:23:01 +0000 +Subject: Btrfs: send, skip backreference walking for extents with many references + +From: Filipe Manana + +commit fd0ddbe2509568b00df364156f47561e9f469f15 upstream. + +Backreference walking, which is used by send to figure if it can issue +clone operations instead of write operations, can be very slow and use +too much memory when extents have many references. This change simply +skips backreference walking when an extent has more than 64 references, +in which case we fallback to a write operation instead of a clone +operation. This limit is conservative and in practice I observed no +signicant slowdown with up to 100 references and still low memory usage +up to that limit. + +This is a temporary workaround until there are speedups in the backref +walking code, and as such it does not attempt to add extra interfaces or +knobs to tweak the threshold. + +Reported-by: Atemu +Link: https://lore.kernel.org/linux-btrfs/CAE4GHgkvqVADtS4AzcQJxo0Q1jKQgKaW3JGp3SGdoinVo=C9eQ@mail.gmail.com/T/#me55dc0987f9cc2acaa54372ce0492c65782be3fa +CC: stable@vger.kernel.org # 4.4+ +Reviewed-by: Qu Wenruo +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/send.c | 25 ++++++++++++++++++++++++- + 1 file changed, 24 insertions(+), 1 deletion(-) + +--- a/fs/btrfs/send.c ++++ b/fs/btrfs/send.c +@@ -25,6 +25,14 @@ + #include "compression.h" + + /* ++ * Maximum number of references an extent can have in order for us to attempt to ++ * issue clone operations instead of write operations. This currently exists to ++ * avoid hitting limitations of the backreference walking code (taking a lot of ++ * time and using too much memory for extents with large number of references). ++ */ ++#define SEND_MAX_EXTENT_REFS 64 ++ ++/* + * A fs_path is a helper to dynamically build path names with unknown size. + * It reallocates the internal buffer on demand. + * It allows fast adding of path elements on the right side (normal path) and +@@ -1303,6 +1311,7 @@ static int find_extent_clone(struct send + struct clone_root *cur_clone_root; + struct btrfs_key found_key; + struct btrfs_path *tmp_path; ++ struct btrfs_extent_item *ei; + int compressed; + u32 i; + +@@ -1352,7 +1361,6 @@ static int find_extent_clone(struct send + ret = extent_from_logical(fs_info, disk_byte, tmp_path, + &found_key, &flags); + up_read(&fs_info->commit_root_sem); +- btrfs_release_path(tmp_path); + + if (ret < 0) + goto out; +@@ -1361,6 +1369,21 @@ static int find_extent_clone(struct send + goto out; + } + ++ ei = btrfs_item_ptr(tmp_path->nodes[0], tmp_path->slots[0], ++ struct btrfs_extent_item); ++ /* ++ * Backreference walking (iterate_extent_inodes() below) is currently ++ * too expensive when an extent has a large number of references, both ++ * in time spent and used memory. So for now just fallback to write ++ * operations instead of clone operations when an extent has more than ++ * a certain amount of references. ++ */ ++ if (btrfs_extent_refs(tmp_path->nodes[0], ei) > SEND_MAX_EXTENT_REFS) { ++ ret = -ENOENT; ++ goto out; ++ } ++ btrfs_release_path(tmp_path); ++ + /* + * Setup the clone roots. + */ diff --git a/queue-4.19/btrfs-use-refcount_inc_not_zero-in-kill_all_nodes.patch b/queue-4.19/btrfs-use-refcount_inc_not_zero-in-kill_all_nodes.patch new file mode 100644 index 00000000000..f45c29e7f9b --- /dev/null +++ b/queue-4.19/btrfs-use-refcount_inc_not_zero-in-kill_all_nodes.patch @@ -0,0 +1,67 @@ +From baf320b9d531f1cfbf64c60dd155ff80a58b3796 Mon Sep 17 00:00:00 2001 +From: Josef Bacik +Date: Thu, 26 Sep 2019 08:29:32 -0400 +Subject: btrfs: use refcount_inc_not_zero in kill_all_nodes + +From: Josef Bacik + +commit baf320b9d531f1cfbf64c60dd155ff80a58b3796 upstream. + +We hit the following warning while running down a different problem + +[ 6197.175850] ------------[ cut here ]------------ +[ 6197.185082] refcount_t: underflow; use-after-free. +[ 6197.194704] WARNING: CPU: 47 PID: 966 at lib/refcount.c:190 refcount_sub_and_test_checked+0x53/0x60 +[ 6197.521792] Call Trace: +[ 6197.526687] __btrfs_release_delayed_node+0x76/0x1c0 +[ 6197.536615] btrfs_kill_all_delayed_nodes+0xec/0x130 +[ 6197.546532] ? __btrfs_btree_balance_dirty+0x60/0x60 +[ 6197.556482] btrfs_clean_one_deleted_snapshot+0x71/0xd0 +[ 6197.566910] cleaner_kthread+0xfa/0x120 +[ 6197.574573] kthread+0x111/0x130 +[ 6197.581022] ? kthread_create_on_node+0x60/0x60 +[ 6197.590086] ret_from_fork+0x1f/0x30 +[ 6197.597228] ---[ end trace 424bb7ae00509f56 ]--- + +This is because the free side drops the ref without the lock, and then +takes the lock if our refcount is 0. So you can have nodes on the tree +that have a refcount of 0. Fix this by zero'ing out that element in our +temporary array so we don't try to kill it again. + +CC: stable@vger.kernel.org # 4.14+ +Reviewed-by: Nikolay Borisov +Signed-off-by: Josef Bacik +Reviewed-by: David Sterba +[ add comment ] +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/delayed-inode.c | 13 ++++++++++--- + 1 file changed, 10 insertions(+), 3 deletions(-) + +--- a/fs/btrfs/delayed-inode.c ++++ b/fs/btrfs/delayed-inode.c +@@ -1939,12 +1939,19 @@ void btrfs_kill_all_delayed_nodes(struct + } + + inode_id = delayed_nodes[n - 1]->inode_id + 1; +- +- for (i = 0; i < n; i++) +- refcount_inc(&delayed_nodes[i]->refs); ++ for (i = 0; i < n; i++) { ++ /* ++ * Don't increase refs in case the node is dead and ++ * about to be removed from the tree in the loop below ++ */ ++ if (!refcount_inc_not_zero(&delayed_nodes[i]->refs)) ++ delayed_nodes[i] = NULL; ++ } + spin_unlock(&root->inode_lock); + + for (i = 0; i < n; i++) { ++ if (!delayed_nodes[i]) ++ continue; + __btrfs_kill_delayed_node(delayed_nodes[i]); + btrfs_release_delayed_node(delayed_nodes[i]); + } diff --git a/queue-4.19/dm-writecache-handle-req_fua.patch b/queue-4.19/dm-writecache-handle-req_fua.patch new file mode 100644 index 00000000000..5ccea66d9ff --- /dev/null +++ b/queue-4.19/dm-writecache-handle-req_fua.patch @@ -0,0 +1,33 @@ +From c1005322ff02110a4df7f0033368ea015062b583 Mon Sep 17 00:00:00 2001 +From: Maged Mokhtar +Date: Wed, 23 Oct 2019 22:41:17 +0200 +Subject: dm writecache: handle REQ_FUA + +From: Maged Mokhtar + +commit c1005322ff02110a4df7f0033368ea015062b583 upstream. + +Call writecache_flush() on REQ_FUA in writecache_map(). + +Cc: stable@vger.kernel.org # 4.18+ +Signed-off-by: Maged Mokhtar +Acked-by: Mikulas Patocka +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/md/dm-writecache.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/md/dm-writecache.c ++++ b/drivers/md/dm-writecache.c +@@ -1223,7 +1223,8 @@ bio_copy: + } + } while (bio->bi_iter.bi_size); + +- if (unlikely(wc->uncommitted_blocks >= wc->autocommit_blocks)) ++ if (unlikely(bio->bi_opf & REQ_FUA || ++ wc->uncommitted_blocks >= wc->autocommit_blocks)) + writecache_flush(wc); + else + writecache_schedule_autocommit(wc); diff --git a/queue-4.19/dm-zoned-reduce-overhead-of-backing-device-checks.patch b/queue-4.19/dm-zoned-reduce-overhead-of-backing-device-checks.patch new file mode 100644 index 00000000000..0eebd31a360 --- /dev/null +++ b/queue-4.19/dm-zoned-reduce-overhead-of-backing-device-checks.patch @@ -0,0 +1,261 @@ +From e7fad909b68aa37470d9f2d2731b5bec355ee5d6 Mon Sep 17 00:00:00 2001 +From: Dmitry Fomichev +Date: Wed, 6 Nov 2019 14:34:35 -0800 +Subject: dm zoned: reduce overhead of backing device checks + +From: Dmitry Fomichev + +commit e7fad909b68aa37470d9f2d2731b5bec355ee5d6 upstream. + +Commit 75d66ffb48efb3 added backing device health checks and as a part +of these checks, check_events() block ops template call is invoked in +dm-zoned mapping path as well as in reclaim and flush path. Calling +check_events() with ATA or SCSI backing devices introduces a blocking +scsi_test_unit_ready() call being made in sd_check_events(). Even though +the overhead of calling scsi_test_unit_ready() is small for ATA zoned +devices, it is much larger for SCSI and it affects performance in a very +negative way. + +Fix this performance regression by executing check_events() only in case +of any I/O errors. The function dmz_bdev_is_dying() is modified to call +only blk_queue_dying(), while calls to check_events() are made in a new +helper function, dmz_check_bdev(). + +Reported-by: zhangxiaoxu +Fixes: 75d66ffb48efb3 ("dm zoned: properly handle backing device failure") +Cc: stable@vger.kernel.org +Signed-off-by: Dmitry Fomichev +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/md/dm-zoned-metadata.c | 29 ++++++++++++++-------- + drivers/md/dm-zoned-reclaim.c | 8 +----- + drivers/md/dm-zoned-target.c | 54 ++++++++++++++++++++++++++++------------- + drivers/md/dm-zoned.h | 2 + + 4 files changed, 61 insertions(+), 32 deletions(-) + +--- a/drivers/md/dm-zoned-metadata.c ++++ b/drivers/md/dm-zoned-metadata.c +@@ -552,6 +552,7 @@ static struct dmz_mblock *dmz_get_mblock + TASK_UNINTERRUPTIBLE); + if (test_bit(DMZ_META_ERROR, &mblk->state)) { + dmz_release_mblock(zmd, mblk); ++ dmz_check_bdev(zmd->dev); + return ERR_PTR(-EIO); + } + +@@ -623,6 +624,8 @@ static int dmz_rdwr_block(struct dmz_met + ret = submit_bio_wait(bio); + bio_put(bio); + ++ if (ret) ++ dmz_check_bdev(zmd->dev); + return ret; + } + +@@ -689,6 +692,7 @@ static int dmz_write_dirty_mblocks(struc + TASK_UNINTERRUPTIBLE); + if (test_bit(DMZ_META_ERROR, &mblk->state)) { + clear_bit(DMZ_META_ERROR, &mblk->state); ++ dmz_check_bdev(zmd->dev); + ret = -EIO; + } + nr_mblks_submitted--; +@@ -766,7 +770,7 @@ int dmz_flush_metadata(struct dmz_metada + /* If there are no dirty metadata blocks, just flush the device cache */ + if (list_empty(&write_list)) { + ret = blkdev_issue_flush(zmd->dev->bdev, GFP_NOIO, NULL); +- goto out; ++ goto err; + } + + /* +@@ -776,7 +780,7 @@ int dmz_flush_metadata(struct dmz_metada + */ + ret = dmz_log_dirty_mblocks(zmd, &write_list); + if (ret) +- goto out; ++ goto err; + + /* + * The log is on disk. It is now safe to update in place +@@ -784,11 +788,11 @@ int dmz_flush_metadata(struct dmz_metada + */ + ret = dmz_write_dirty_mblocks(zmd, &write_list, zmd->mblk_primary); + if (ret) +- goto out; ++ goto err; + + ret = dmz_write_sb(zmd, zmd->mblk_primary); + if (ret) +- goto out; ++ goto err; + + while (!list_empty(&write_list)) { + mblk = list_first_entry(&write_list, struct dmz_mblock, link); +@@ -803,16 +807,20 @@ int dmz_flush_metadata(struct dmz_metada + + zmd->sb_gen++; + out: +- if (ret && !list_empty(&write_list)) { +- spin_lock(&zmd->mblk_lock); +- list_splice(&write_list, &zmd->mblk_dirty_list); +- spin_unlock(&zmd->mblk_lock); +- } +- + dmz_unlock_flush(zmd); + up_write(&zmd->mblk_sem); + + return ret; ++ ++err: ++ if (!list_empty(&write_list)) { ++ spin_lock(&zmd->mblk_lock); ++ list_splice(&write_list, &zmd->mblk_dirty_list); ++ spin_unlock(&zmd->mblk_lock); ++ } ++ if (!dmz_check_bdev(zmd->dev)) ++ ret = -EIO; ++ goto out; + } + + /* +@@ -1235,6 +1243,7 @@ static int dmz_update_zone(struct dmz_me + if (ret) { + dmz_dev_err(zmd->dev, "Get zone %u report failed", + dmz_id(zmd, zone)); ++ dmz_check_bdev(zmd->dev); + return ret; + } + +--- a/drivers/md/dm-zoned-reclaim.c ++++ b/drivers/md/dm-zoned-reclaim.c +@@ -81,6 +81,7 @@ static int dmz_reclaim_align_wp(struct d + "Align zone %u wp %llu to %llu (wp+%u) blocks failed %d", + dmz_id(zmd, zone), (unsigned long long)wp_block, + (unsigned long long)block, nr_blocks, ret); ++ dmz_check_bdev(zrc->dev); + return ret; + } + +@@ -488,12 +489,7 @@ static void dmz_reclaim_work(struct work + ret = dmz_do_reclaim(zrc); + if (ret) { + dmz_dev_debug(zrc->dev, "Reclaim error %d\n", ret); +- if (ret == -EIO) +- /* +- * LLD might be performing some error handling sequence +- * at the underlying device. To not interfere, do not +- * attempt to schedule the next reclaim run immediately. +- */ ++ if (!dmz_check_bdev(zrc->dev)) + return; + } + +--- a/drivers/md/dm-zoned-target.c ++++ b/drivers/md/dm-zoned-target.c +@@ -79,6 +79,8 @@ static inline void dmz_bio_endio(struct + + if (status != BLK_STS_OK && bio->bi_status == BLK_STS_OK) + bio->bi_status = status; ++ if (bio->bi_status != BLK_STS_OK) ++ bioctx->target->dev->flags |= DMZ_CHECK_BDEV; + + if (atomic_dec_and_test(&bioctx->ref)) { + struct dm_zone *zone = bioctx->zone; +@@ -564,32 +566,52 @@ out: + } + + /* +- * Check the backing device availability. If it's on the way out, ++ * Check if the backing device is being removed. If it's on the way out, + * start failing I/O. Reclaim and metadata components also call this + * function to cleanly abort operation in the event of such failure. + */ + bool dmz_bdev_is_dying(struct dmz_dev *dmz_dev) + { +- struct gendisk *disk; ++ if (dmz_dev->flags & DMZ_BDEV_DYING) ++ return true; + +- if (!(dmz_dev->flags & DMZ_BDEV_DYING)) { +- disk = dmz_dev->bdev->bd_disk; +- if (blk_queue_dying(bdev_get_queue(dmz_dev->bdev))) { +- dmz_dev_warn(dmz_dev, "Backing device queue dying"); +- dmz_dev->flags |= DMZ_BDEV_DYING; +- } else if (disk->fops->check_events) { +- if (disk->fops->check_events(disk, 0) & +- DISK_EVENT_MEDIA_CHANGE) { +- dmz_dev_warn(dmz_dev, "Backing device offline"); +- dmz_dev->flags |= DMZ_BDEV_DYING; +- } +- } ++ if (dmz_dev->flags & DMZ_CHECK_BDEV) ++ return !dmz_check_bdev(dmz_dev); ++ ++ if (blk_queue_dying(bdev_get_queue(dmz_dev->bdev))) { ++ dmz_dev_warn(dmz_dev, "Backing device queue dying"); ++ dmz_dev->flags |= DMZ_BDEV_DYING; + } + + return dmz_dev->flags & DMZ_BDEV_DYING; + } + + /* ++ * Check the backing device availability. This detects such events as ++ * backing device going offline due to errors, media removals, etc. ++ * This check is less efficient than dmz_bdev_is_dying() and should ++ * only be performed as a part of error handling. ++ */ ++bool dmz_check_bdev(struct dmz_dev *dmz_dev) ++{ ++ struct gendisk *disk; ++ ++ dmz_dev->flags &= ~DMZ_CHECK_BDEV; ++ ++ if (dmz_bdev_is_dying(dmz_dev)) ++ return false; ++ ++ disk = dmz_dev->bdev->bd_disk; ++ if (disk->fops->check_events && ++ disk->fops->check_events(disk, 0) & DISK_EVENT_MEDIA_CHANGE) { ++ dmz_dev_warn(dmz_dev, "Backing device offline"); ++ dmz_dev->flags |= DMZ_BDEV_DYING; ++ } ++ ++ return !(dmz_dev->flags & DMZ_BDEV_DYING); ++} ++ ++/* + * Process a new BIO. + */ + static int dmz_map(struct dm_target *ti, struct bio *bio) +@@ -902,8 +924,8 @@ static int dmz_prepare_ioctl(struct dm_t + { + struct dmz_target *dmz = ti->private; + +- if (dmz_bdev_is_dying(dmz->dev)) +- return -ENODEV; ++ if (!dmz_check_bdev(dmz->dev)) ++ return -EIO; + + *bdev = dmz->dev->bdev; + +--- a/drivers/md/dm-zoned.h ++++ b/drivers/md/dm-zoned.h +@@ -71,6 +71,7 @@ struct dmz_dev { + + /* Device flags. */ + #define DMZ_BDEV_DYING (1 << 0) ++#define DMZ_CHECK_BDEV (2 << 0) + + /* + * Zone descriptor. +@@ -254,5 +255,6 @@ void dmz_schedule_reclaim(struct dmz_rec + * Functions defined in dm-zoned-target.c + */ + bool dmz_bdev_is_dying(struct dmz_dev *dmz_dev); ++bool dmz_check_bdev(struct dmz_dev *dmz_dev); + + #endif /* DM_ZONED_H */ diff --git a/queue-4.19/hwrng-omap-fix-rng-wait-loop-timeout.patch b/queue-4.19/hwrng-omap-fix-rng-wait-loop-timeout.patch new file mode 100644 index 00000000000..be925b9bd62 --- /dev/null +++ b/queue-4.19/hwrng-omap-fix-rng-wait-loop-timeout.patch @@ -0,0 +1,49 @@ +From be867f987a4e1222114dd07a01838a17c26f3fff Mon Sep 17 00:00:00 2001 +From: Sumit Garg +Date: Mon, 14 Oct 2019 17:32:45 +0530 +Subject: hwrng: omap - Fix RNG wait loop timeout + +From: Sumit Garg + +commit be867f987a4e1222114dd07a01838a17c26f3fff upstream. + +Existing RNG data read timeout is 200us but it doesn't cover EIP76 RNG +data rate which takes approx. 700us to produce 16 bytes of output data +as per testing results. So configure the timeout as 1000us to also take +account of lack of udelay()'s reliability. + +Fixes: 383212425c92 ("hwrng: omap - Add device variant for SafeXcel IP-76 found in Armada 8K") +Cc: +Signed-off-by: Sumit Garg +Signed-off-by: Herbert Xu +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/char/hw_random/omap-rng.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +--- a/drivers/char/hw_random/omap-rng.c ++++ b/drivers/char/hw_random/omap-rng.c +@@ -66,6 +66,13 @@ + #define OMAP4_RNG_OUTPUT_SIZE 0x8 + #define EIP76_RNG_OUTPUT_SIZE 0x10 + ++/* ++ * EIP76 RNG takes approx. 700us to produce 16 bytes of output data ++ * as per testing results. And to account for the lack of udelay()'s ++ * reliability, we keep the timeout as 1000us. ++ */ ++#define RNG_DATA_FILL_TIMEOUT 100 ++ + enum { + RNG_OUTPUT_0_REG = 0, + RNG_OUTPUT_1_REG, +@@ -176,7 +183,7 @@ static int omap_rng_do_read(struct hwrng + if (max < priv->pdata->data_size) + return 0; + +- for (i = 0; i < 20; i++) { ++ for (i = 0; i < RNG_DATA_FILL_TIMEOUT; i++) { + present = priv->pdata->data_present(priv); + if (present || !wait) + break; diff --git a/queue-4.19/lib-raid6-fix-awk-build-warnings.patch b/queue-4.19/lib-raid6-fix-awk-build-warnings.patch new file mode 100644 index 00000000000..009d71d6d9b --- /dev/null +++ b/queue-4.19/lib-raid6-fix-awk-build-warnings.patch @@ -0,0 +1,38 @@ +From 702600eef73033ddd4eafcefcbb6560f3e3a90f7 Mon Sep 17 00:00:00 2001 +From: Greg Kroah-Hartman +Date: Fri, 6 Dec 2019 16:26:00 +0100 +Subject: lib: raid6: fix awk build warnings + +From: Greg Kroah-Hartman + +commit 702600eef73033ddd4eafcefcbb6560f3e3a90f7 upstream. + +Newer versions of awk spit out these fun warnings: + awk: ../lib/raid6/unroll.awk:16: warning: regexp escape sequence `\#' is not a known regexp operator + +As commit 700c1018b86d ("x86/insn: Fix awk regexp warnings") showed, it +turns out that there are a number of awk strings that do not need to be +escaped and newer versions of awk now warn about this. + +Fix the string up so that no warning is produced. The exact same kernel +module gets created before and after this patch, showing that it wasn't +needed. + +Link: https://lore.kernel.org/r/20191206152600.GA75093@kroah.com +Signed-off-by: Greg Kroah-Hartman + +--- + lib/raid6/unroll.awk | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/lib/raid6/unroll.awk ++++ b/lib/raid6/unroll.awk +@@ -13,7 +13,7 @@ BEGIN { + for (i = 0; i < rep; ++i) { + tmp = $0 + gsub(/\$\$/, i, tmp) +- gsub(/\$\#/, n, tmp) ++ gsub(/\$#/, n, tmp) + gsub(/\$\*/, "$", tmp) + print tmp + } diff --git a/queue-4.19/mtd-spear_smi-fix-write-burst-mode.patch b/queue-4.19/mtd-spear_smi-fix-write-burst-mode.patch new file mode 100644 index 00000000000..120e2847fc3 --- /dev/null +++ b/queue-4.19/mtd-spear_smi-fix-write-burst-mode.patch @@ -0,0 +1,107 @@ +From 69c7f4618c16b4678f8a4949b6bb5ace259c0033 Mon Sep 17 00:00:00 2001 +From: Miquel Raynal +Date: Tue, 22 Oct 2019 16:58:59 +0200 +Subject: mtd: spear_smi: Fix Write Burst mode + +From: Miquel Raynal + +commit 69c7f4618c16b4678f8a4949b6bb5ace259c0033 upstream. + +Any write with either dd or flashcp to a device driven by the +spear_smi.c driver will pass through the spear_smi_cpy_toio() +function. This function will get called for chunks of up to 256 bytes. +If the amount of data is smaller, we may have a problem if the data +length is not 4-byte aligned. In this situation, the kernel panics +during the memcpy: + + # dd if=/dev/urandom bs=1001 count=1 of=/dev/mtd6 + spear_smi_cpy_toio [620] dest c9070000, src c7be8800, len 256 + spear_smi_cpy_toio [620] dest c9070100, src c7be8900, len 256 + spear_smi_cpy_toio [620] dest c9070200, src c7be8a00, len 256 + spear_smi_cpy_toio [620] dest c9070300, src c7be8b00, len 233 + Unhandled fault: external abort on non-linefetch (0x808) at 0xc90703e8 + [...] + PC is at memcpy+0xcc/0x330 + +The above error occurs because the implementation of memcpy_toio() +tries to optimize the number of I/O by writing 4 bytes at a time as +much as possible, until there are less than 4 bytes left and then +switches to word or byte writes. + +Unfortunately, the specification states about the Write Burst mode: + + "the next AHB Write request should point to the next + incremented address and should have the same size (byte, + half-word or word)" + +This means ARM architecture implementation of memcpy_toio() cannot +reliably be used blindly here. Workaround this situation by update the +write path to stick to byte access when the burst length is not +multiple of 4. + +Fixes: f18dbbb1bfe0 ("mtd: ST SPEAr: Add SMI driver for serial NOR flash") +Cc: Russell King +Cc: Boris Brezillon +Cc: stable@vger.kernel.org +Signed-off-by: Miquel Raynal +Reviewed-by: Russell King +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/mtd/devices/spear_smi.c | 38 +++++++++++++++++++++++++++++++++++++- + 1 file changed, 37 insertions(+), 1 deletion(-) + +--- a/drivers/mtd/devices/spear_smi.c ++++ b/drivers/mtd/devices/spear_smi.c +@@ -592,6 +592,26 @@ static int spear_mtd_read(struct mtd_inf + return 0; + } + ++/* ++ * The purpose of this function is to ensure a memcpy_toio() with byte writes ++ * only. Its structure is inspired from the ARM implementation of _memcpy_toio() ++ * which also does single byte writes but cannot be used here as this is just an ++ * implementation detail and not part of the API. Not mentioning the comment ++ * stating that _memcpy_toio() should be optimized. ++ */ ++static void spear_smi_memcpy_toio_b(volatile void __iomem *dest, ++ const void *src, size_t len) ++{ ++ const unsigned char *from = src; ++ ++ while (len) { ++ len--; ++ writeb(*from, dest); ++ from++; ++ dest++; ++ } ++} ++ + static inline int spear_smi_cpy_toio(struct spear_smi *dev, u32 bank, + void __iomem *dest, const void *src, size_t len) + { +@@ -614,7 +634,23 @@ static inline int spear_smi_cpy_toio(str + ctrlreg1 = readl(dev->io_base + SMI_CR1); + writel((ctrlreg1 | WB_MODE) & ~SW_MODE, dev->io_base + SMI_CR1); + +- memcpy_toio(dest, src, len); ++ /* ++ * In Write Burst mode (WB_MODE), the specs states that writes must be: ++ * - incremental ++ * - of the same size ++ * The ARM implementation of memcpy_toio() will optimize the number of ++ * I/O by using as much 4-byte writes as possible, surrounded by ++ * 2-byte/1-byte access if: ++ * - the destination is not 4-byte aligned ++ * - the length is not a multiple of 4-byte. ++ * Avoid this alternance of write access size by using our own 'byte ++ * access' helper if at least one of the two conditions above is true. ++ */ ++ if (IS_ALIGNED(len, sizeof(u32)) && ++ IS_ALIGNED((uintptr_t)dest, sizeof(u32))) ++ memcpy_toio(dest, src, len); ++ else ++ spear_smi_memcpy_toio_b(dest, src, len); + + writel(ctrlreg1, dev->io_base + SMI_CR1); + diff --git a/queue-4.19/ovl-fix-corner-case-of-non-unique-st_dev-st_ino.patch b/queue-4.19/ovl-fix-corner-case-of-non-unique-st_dev-st_ino.patch new file mode 100644 index 00000000000..3652938cdac --- /dev/null +++ b/queue-4.19/ovl-fix-corner-case-of-non-unique-st_dev-st_ino.patch @@ -0,0 +1,60 @@ +From 9c6d8f13e9da10a26ad7f0a020ef86e8ef142835 Mon Sep 17 00:00:00 2001 +From: Amir Goldstein +Date: Sun, 17 Nov 2019 17:43:44 +0200 +Subject: ovl: fix corner case of non-unique st_dev;st_ino + +From: Amir Goldstein + +commit 9c6d8f13e9da10a26ad7f0a020ef86e8ef142835 upstream. + +On non-samefs overlay without xino, non pure upper inodes should use a +pseudo_dev assigned to each unique lower fs and pure upper inodes use the +real upper st_dev. + +It is fine for an overlay pure upper inode to use the same st_dev;st_ino +values as the real upper inode, because the content of those two different +filesystem objects is always the same. + +In this case, however: + - two filesystems, A and B + - upper layer is on A + - lower layer 1 is also on A + - lower layer 2 is on B + +Non pure upper overlay inode, whose origin is in layer 1 will have the same +st_dev;st_ino values as the real lower inode. This may result with a false +positive results of 'diff' between the real lower and copied up overlay +inode. + +Fix this by using the upper st_dev;st_ino values in this case. This breaks +the property of constant st_dev;st_ino across copy up of this case. This +breakage will be fixed by a later patch. + +Fixes: 5148626b806a ("ovl: allocate anon bdev per unique lower fs") +Cc: stable@vger.kernel.org # v4.17+ +Signed-off-by: Amir Goldstein +Signed-off-by: Miklos Szeredi +Signed-off-by: Greg Kroah-Hartman + +--- + fs/overlayfs/inode.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +--- a/fs/overlayfs/inode.c ++++ b/fs/overlayfs/inode.c +@@ -203,8 +203,14 @@ int ovl_getattr(const struct path *path, + if (ovl_test_flag(OVL_INDEX, d_inode(dentry)) || + (!ovl_verify_lower(dentry->d_sb) && + (is_dir || lowerstat.nlink == 1))) { +- stat->ino = lowerstat.ino; + lower_layer = ovl_layer_lower(dentry); ++ /* ++ * Cannot use origin st_dev;st_ino because ++ * origin inode content may differ from overlay ++ * inode content. ++ */ ++ if (samefs || lower_layer->fsid) ++ stat->ino = lowerstat.ino; + } + + /* diff --git a/queue-4.19/ovl-relax-warn_on-on-rename-to-self.patch b/queue-4.19/ovl-relax-warn_on-on-rename-to-self.patch new file mode 100644 index 00000000000..be709eb6722 --- /dev/null +++ b/queue-4.19/ovl-relax-warn_on-on-rename-to-self.patch @@ -0,0 +1,39 @@ +From 6889ee5a53b8d969aa542047f5ac8acdc0e79a91 Mon Sep 17 00:00:00 2001 +From: Amir Goldstein +Date: Fri, 6 Dec 2019 08:33:36 +0200 +Subject: ovl: relax WARN_ON() on rename to self + +From: Amir Goldstein + +commit 6889ee5a53b8d969aa542047f5ac8acdc0e79a91 upstream. + +In ovl_rename(), if new upper is hardlinked to old upper underneath +overlayfs before upper dirs are locked, user will get an ESTALE error +and a WARN_ON will be printed. + +Changes to underlying layers while overlayfs is mounted may result in +unexpected behavior, but it shouldn't crash the kernel and it shouldn't +trigger WARN_ON() either, so relax this WARN_ON(). + +Reported-by: syzbot+bb1836a212e69f8e201a@syzkaller.appspotmail.com +Fixes: 804032fabb3b ("ovl: don't check rename to self") +Cc: # v4.9+ +Signed-off-by: Amir Goldstein +Signed-off-by: Miklos Szeredi +Signed-off-by: Greg Kroah-Hartman + +--- + fs/overlayfs/dir.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/overlayfs/dir.c ++++ b/fs/overlayfs/dir.c +@@ -1174,7 +1174,7 @@ static int ovl_rename(struct inode *oldd + if (newdentry == trap) + goto out_dput; + +- if (WARN_ON(olddentry->d_inode == newdentry->d_inode)) ++ if (olddentry->d_inode == newdentry->d_inode) + goto out_dput; + + err = 0; diff --git a/queue-4.19/phy-renesas-rcar-gen3-usb2-fix-sysfs-interface-of-role.patch b/queue-4.19/phy-renesas-rcar-gen3-usb2-fix-sysfs-interface-of-role.patch new file mode 100644 index 00000000000..45c2e4f4175 --- /dev/null +++ b/queue-4.19/phy-renesas-rcar-gen3-usb2-fix-sysfs-interface-of-role.patch @@ -0,0 +1,48 @@ +From 4bd5ead82d4b877ebe41daf95f28cda53205b039 Mon Sep 17 00:00:00 2001 +From: Yoshihiro Shimoda +Date: Mon, 7 Oct 2019 16:55:10 +0900 +Subject: phy: renesas: rcar-gen3-usb2: Fix sysfs interface of "role" + +From: Yoshihiro Shimoda + +commit 4bd5ead82d4b877ebe41daf95f28cda53205b039 upstream. + +Since the role_store() uses strncmp(), it's possible to refer +out-of-memory if the sysfs data size is smaller than strlen("host"). +This patch fixes it by using sysfs_streq() instead of strncmp(). + +Reported-by: Pavel Machek +Fixes: 9bb86777fb71 ("phy: rcar-gen3-usb2: add sysfs for usb role swap") +Cc: # v4.10+ +Signed-off-by: Yoshihiro Shimoda +Reviewed-by: Geert Uytterhoeven +Acked-by: Pavel Machek +Signed-off-by: Kishon Vijay Abraham I +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/phy/renesas/phy-rcar-gen3-usb2.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/drivers/phy/renesas/phy-rcar-gen3-usb2.c ++++ b/drivers/phy/renesas/phy-rcar-gen3-usb2.c +@@ -23,6 +23,7 @@ + #include + #include + #include ++#include + #include + #include + +@@ -241,9 +242,9 @@ static ssize_t role_store(struct device + if (!ch->has_otg_pins || !ch->phy->init_count) + return -EIO; + +- if (!strncmp(buf, "host", strlen("host"))) ++ if (sysfs_streq(buf, "host")) + new_mode = PHY_MODE_USB_HOST; +- else if (!strncmp(buf, "peripheral", strlen("peripheral"))) ++ else if (sysfs_streq(buf, "peripheral")) + new_mode = PHY_MODE_USB_DEVICE; + else + return -EINVAL; diff --git a/queue-4.19/rtlwifi-rtl8192de-fix-missing-callback-that-tests-for-hw-release-of-buffer.patch b/queue-4.19/rtlwifi-rtl8192de-fix-missing-callback-that-tests-for-hw-release-of-buffer.patch new file mode 100644 index 00000000000..816240463fc --- /dev/null +++ b/queue-4.19/rtlwifi-rtl8192de-fix-missing-callback-that-tests-for-hw-release-of-buffer.patch @@ -0,0 +1,72 @@ +From 3155db7613edea8fb943624062baf1e4f9cfbfd6 Mon Sep 17 00:00:00 2001 +From: Larry Finger +Date: Mon, 11 Nov 2019 13:40:45 -0600 +Subject: rtlwifi: rtl8192de: Fix missing callback that tests for hw release of buffer + +From: Larry Finger + +commit 3155db7613edea8fb943624062baf1e4f9cfbfd6 upstream. + +In commit 38506ecefab9 ("rtlwifi: rtl_pci: Start modification for +new drivers"), a callback needed to check if the hardware has released +a buffer indicating that a DMA operation is completed was not added. + +Fixes: 38506ecefab9 ("rtlwifi: rtl_pci: Start modification for new drivers") +Cc: Stable # v3.18+ +Signed-off-by: Larry Finger +Signed-off-by: Kalle Valo +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c | 1 + + drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.c | 17 +++++++++++++++++ + drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.h | 2 ++ + 3 files changed, 20 insertions(+) + +--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c ++++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c +@@ -238,6 +238,7 @@ static struct rtl_hal_ops rtl8192de_hal_ + .led_control = rtl92de_led_control, + .set_desc = rtl92de_set_desc, + .get_desc = rtl92de_get_desc, ++ .is_tx_desc_closed = rtl92de_is_tx_desc_closed, + .tx_polling = rtl92de_tx_polling, + .enable_hw_sec = rtl92de_enable_hw_security_config, + .set_key = rtl92de_set_key, +--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.c ++++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.c +@@ -859,6 +859,23 @@ u64 rtl92de_get_desc(struct ieee80211_hw + return ret; + } + ++bool rtl92de_is_tx_desc_closed(struct ieee80211_hw *hw, ++ u8 hw_queue, u16 index) ++{ ++ struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); ++ struct rtl8192_tx_ring *ring = &rtlpci->tx_ring[hw_queue]; ++ u8 *entry = (u8 *)(&ring->desc[ring->idx]); ++ u8 own = (u8)rtl92de_get_desc(hw, entry, true, HW_DESC_OWN); ++ ++ /* a beacon packet will only use the first ++ * descriptor by defaut, and the own bit may not ++ * be cleared by the hardware ++ */ ++ if (own) ++ return false; ++ return true; ++} ++ + void rtl92de_tx_polling(struct ieee80211_hw *hw, u8 hw_queue) + { + struct rtl_priv *rtlpriv = rtl_priv(hw); +--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.h ++++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.h +@@ -737,6 +737,8 @@ void rtl92de_set_desc(struct ieee80211_h + u8 desc_name, u8 *val); + u64 rtl92de_get_desc(struct ieee80211_hw *hw, + u8 *p_desc, bool istx, u8 desc_name); ++bool rtl92de_is_tx_desc_closed(struct ieee80211_hw *hw, ++ u8 hw_queue, u16 index); + void rtl92de_tx_polling(struct ieee80211_hw *hw, u8 hw_queue); + void rtl92de_tx_fill_cmddesc(struct ieee80211_hw *hw, u8 *pdesc, + bool b_firstseg, bool b_lastseg, diff --git a/queue-4.19/rtlwifi-rtl8192de-fix-missing-code-to-retrieve-rx-buffer-address.patch b/queue-4.19/rtlwifi-rtl8192de-fix-missing-code-to-retrieve-rx-buffer-address.patch new file mode 100644 index 00000000000..06f3a345a8e --- /dev/null +++ b/queue-4.19/rtlwifi-rtl8192de-fix-missing-code-to-retrieve-rx-buffer-address.patch @@ -0,0 +1,46 @@ +From 0e531cc575c4e9e3dd52ad287b49d3c2dc74c810 Mon Sep 17 00:00:00 2001 +From: Larry Finger +Date: Mon, 11 Nov 2019 13:40:44 -0600 +Subject: rtlwifi: rtl8192de: Fix missing code to retrieve RX buffer address + +From: Larry Finger + +commit 0e531cc575c4e9e3dd52ad287b49d3c2dc74c810 upstream. + +In commit 38506ecefab9 ("rtlwifi: rtl_pci: Start modification for +new drivers"), a callback to get the RX buffer address was added to +the PCI driver. Unfortunately, driver rtl8192de was not modified +appropriately and the code runs into a WARN_ONCE() call. The use +of an incorrect array is also fixed. + +Fixes: 38506ecefab9 ("rtlwifi: rtl_pci: Start modification for new drivers") +Cc: Stable # 3.18+ +Signed-off-by: Larry Finger +Signed-off-by: Kalle Valo +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.c ++++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.c +@@ -840,13 +840,15 @@ u64 rtl92de_get_desc(struct ieee80211_hw + break; + } + } else { +- struct rx_desc_92c *pdesc = (struct rx_desc_92c *)p_desc; + switch (desc_name) { + case HW_DESC_OWN: +- ret = GET_RX_DESC_OWN(pdesc); ++ ret = GET_RX_DESC_OWN(p_desc); + break; + case HW_DESC_RXPKT_LEN: +- ret = GET_RX_DESC_PKT_LEN(pdesc); ++ ret = GET_RX_DESC_PKT_LEN(p_desc); ++ break; ++ case HW_DESC_RXBUFF_ADDR: ++ ret = GET_RX_DESC_BUFF_ADDR(p_desc); + break; + default: + WARN_ONCE(true, "rtl8192de: ERR rxdesc :%d not processed\n", diff --git a/queue-4.19/rtlwifi-rtl8192de-fix-missing-enable-interrupt-flag.patch b/queue-4.19/rtlwifi-rtl8192de-fix-missing-enable-interrupt-flag.patch new file mode 100644 index 00000000000..16e2fea318e --- /dev/null +++ b/queue-4.19/rtlwifi-rtl8192de-fix-missing-enable-interrupt-flag.patch @@ -0,0 +1,67 @@ +From 330bb7117101099c687e9c7f13d48068670b9c62 Mon Sep 17 00:00:00 2001 +From: Larry Finger +Date: Mon, 11 Nov 2019 13:40:46 -0600 +Subject: rtlwifi: rtl8192de: Fix missing enable interrupt flag + +From: Larry Finger + +commit 330bb7117101099c687e9c7f13d48068670b9c62 upstream. + +In commit 38506ecefab9 ("rtlwifi: rtl_pci: Start modification for +new drivers"), the flag that indicates that interrupts are enabled was +never set. + +In addition, there are several places when enable/disable interrupts +were commented out are restored. A sychronize_interrupts() call is +removed. + +Fixes: 38506ecefab9 ("rtlwifi: rtl_pci: Start modification for new drivers") +Cc: Stable # v3.18+ +Signed-off-by: Larry Finger +Signed-off-by: Kalle Valo +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c ++++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c +@@ -1198,6 +1198,7 @@ void rtl92de_enable_interrupt(struct iee + + rtl_write_dword(rtlpriv, REG_HIMR, rtlpci->irq_mask[0] & 0xFFFFFFFF); + rtl_write_dword(rtlpriv, REG_HIMRE, rtlpci->irq_mask[1] & 0xFFFFFFFF); ++ rtlpci->irq_enabled = true; + } + + void rtl92de_disable_interrupt(struct ieee80211_hw *hw) +@@ -1207,7 +1208,7 @@ void rtl92de_disable_interrupt(struct ie + + rtl_write_dword(rtlpriv, REG_HIMR, IMR8190_DISABLED); + rtl_write_dword(rtlpriv, REG_HIMRE, IMR8190_DISABLED); +- synchronize_irq(rtlpci->pdev->irq); ++ rtlpci->irq_enabled = false; + } + + static void _rtl92de_poweroff_adapter(struct ieee80211_hw *hw) +@@ -1373,7 +1374,7 @@ void rtl92de_set_beacon_related_register + + bcn_interval = mac->beacon_interval; + atim_window = 2; +- /*rtl92de_disable_interrupt(hw); */ ++ rtl92de_disable_interrupt(hw); + rtl_write_word(rtlpriv, REG_ATIMWND, atim_window); + rtl_write_word(rtlpriv, REG_BCN_INTERVAL, bcn_interval); + rtl_write_word(rtlpriv, REG_BCNTCFG, 0x660f); +@@ -1393,9 +1394,9 @@ void rtl92de_set_beacon_interval(struct + + RT_TRACE(rtlpriv, COMP_BEACON, DBG_DMESG, + "beacon_interval:%d\n", bcn_interval); +- /* rtl92de_disable_interrupt(hw); */ ++ rtl92de_disable_interrupt(hw); + rtl_write_word(rtlpriv, REG_BCN_INTERVAL, bcn_interval); +- /* rtl92de_enable_interrupt(hw); */ ++ rtl92de_enable_interrupt(hw); + } + + void rtl92de_update_interrupt_mask(struct ieee80211_hw *hw, diff --git a/queue-4.19/series b/queue-4.19/series index 68dad56ff56..c9d21fca70c 100644 --- a/queue-4.19/series +++ b/queue-4.19/series @@ -24,3 +24,31 @@ usb-idmouse-fix-interface-sanity-checks.patch usb-serial-io_edgeport-fix-epic-endpoint-lookup.patch usb-roles-fix-a-potential-use-after-free.patch usb-adutux-fix-interface-sanity-check.patch +usb-core-urb-fix-urb-structure-initialization-function.patch +usb-mon-fix-a-deadlock-in-usbmon-between-mmap-and-read.patch +tpm-add-check-after-commands-attribs-tab-allocation.patch +mtd-spear_smi-fix-write-burst-mode.patch +virtio-balloon-fix-managed-page-counts-when-migrating-pages-between-zones.patch +usb-dwc3-pci-add-id-for-the-intel-comet-lake-h-variant.patch +usb-dwc3-gadget-fix-logical-condition.patch +usb-dwc3-ep0-clear-started-flag-on-completion.patch +phy-renesas-rcar-gen3-usb2-fix-sysfs-interface-of-role.patch +btrfs-check-page-mapping-when-loading-free-space-cache.patch +btrfs-use-refcount_inc_not_zero-in-kill_all_nodes.patch +btrfs-fix-metadata-space-leak-on-fixup-worker-failure-to-set-range-as-delalloc.patch +btrfs-fix-negative-subv_writers-counter-and-data-space-leak-after-buffered-write.patch +btrfs-avoid-getting-stuck-during-cyclic-writebacks.patch +btrfs-remove-btrfs_bio-flags-member.patch +btrfs-send-skip-backreference-walking-for-extents-with-many-references.patch +btrfs-record-all-roots-for-rename-exchange-on-a-subvol.patch +rtlwifi-rtl8192de-fix-missing-code-to-retrieve-rx-buffer-address.patch +rtlwifi-rtl8192de-fix-missing-callback-that-tests-for-hw-release-of-buffer.patch +rtlwifi-rtl8192de-fix-missing-enable-interrupt-flag.patch +lib-raid6-fix-awk-build-warnings.patch +ovl-fix-corner-case-of-non-unique-st_dev-st_ino.patch +ovl-relax-warn_on-on-rename-to-self.patch +hwrng-omap-fix-rng-wait-loop-timeout.patch +dm-writecache-handle-req_fua.patch +dm-zoned-reduce-overhead-of-backing-device-checks.patch +workqueue-fix-spurious-sanity-check-failures-in-destroy_workqueue.patch +workqueue-fix-pwq-ref-leak-in-rescuer_thread.patch diff --git a/queue-4.19/tpm-add-check-after-commands-attribs-tab-allocation.patch b/queue-4.19/tpm-add-check-after-commands-attribs-tab-allocation.patch new file mode 100644 index 00000000000..6336b918b13 --- /dev/null +++ b/queue-4.19/tpm-add-check-after-commands-attribs-tab-allocation.patch @@ -0,0 +1,37 @@ +From f1689114acc5e89a196fec6d732dae3e48edb6ad Mon Sep 17 00:00:00 2001 +From: Tadeusz Struk +Date: Mon, 7 Oct 2019 14:46:37 -0700 +Subject: tpm: add check after commands attribs tab allocation + +From: Tadeusz Struk + +commit f1689114acc5e89a196fec6d732dae3e48edb6ad upstream. + +devm_kcalloc() can fail and return NULL so we need to check for that. + +Cc: stable@vger.kernel.org +Fixes: 58472f5cd4f6f ("tpm: validate TPM 2.0 commands") +Signed-off-by: Tadeusz Struk +Reviewed-by: Jerry Snitselaar +Reviewed-by: Jarkko Sakkinen +Tested-by: Jarkko Sakkinen +Signed-off-by: Jarkko Sakkinen +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/char/tpm/tpm2-cmd.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/drivers/char/tpm/tpm2-cmd.c ++++ b/drivers/char/tpm/tpm2-cmd.c +@@ -938,6 +938,10 @@ static int tpm2_get_cc_attrs_tbl(struct + + chip->cc_attrs_tbl = devm_kcalloc(&chip->dev, 4, nr_commands, + GFP_KERNEL); ++ if (!chip->cc_attrs_tbl) { ++ rc = -ENOMEM; ++ goto out; ++ } + + rc = tpm_buf_init(&buf, TPM2_ST_NO_SESSIONS, TPM2_CC_GET_CAPABILITY); + if (rc) diff --git a/queue-4.19/usb-core-urb-fix-urb-structure-initialization-function.patch b/queue-4.19/usb-core-urb-fix-urb-structure-initialization-function.patch new file mode 100644 index 00000000000..34a9bedeb1f --- /dev/null +++ b/queue-4.19/usb-core-urb-fix-urb-structure-initialization-function.patch @@ -0,0 +1,34 @@ +From 1cd17f7f0def31e3695501c4f86cd3faf8489840 Mon Sep 17 00:00:00 2001 +From: Emiliano Ingrassia +Date: Wed, 27 Nov 2019 17:03:55 +0100 +Subject: usb: core: urb: fix URB structure initialization function + +From: Emiliano Ingrassia + +commit 1cd17f7f0def31e3695501c4f86cd3faf8489840 upstream. + +Explicitly initialize URB structure urb_list field in usb_init_urb(). +This field can be potentially accessed uninitialized and its +initialization is coherent with the usage of list_del_init() in +usb_hcd_unlink_urb_from_ep() and usb_giveback_urb_bh() and its +explicit initialization in usb_hcd_submit_urb() error path. + +Signed-off-by: Emiliano Ingrassia +Cc: stable +Link: https://lore.kernel.org/r/20191127160355.GA27196@ingrassia.epigenesys.com +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/usb/core/urb.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/usb/core/urb.c ++++ b/drivers/usb/core/urb.c +@@ -45,6 +45,7 @@ void usb_init_urb(struct urb *urb) + if (urb) { + memset(urb, 0, sizeof(*urb)); + kref_init(&urb->kref); ++ INIT_LIST_HEAD(&urb->urb_list); + INIT_LIST_HEAD(&urb->anchor_list); + } + } diff --git a/queue-4.19/usb-dwc3-ep0-clear-started-flag-on-completion.patch b/queue-4.19/usb-dwc3-ep0-clear-started-flag-on-completion.patch new file mode 100644 index 00000000000..ff8fdca0697 --- /dev/null +++ b/queue-4.19/usb-dwc3-ep0-clear-started-flag-on-completion.patch @@ -0,0 +1,47 @@ +From 2d7b78f59e020b07fc6338eefe286f54ee2d6773 Mon Sep 17 00:00:00 2001 +From: Thinh Nguyen +Date: Wed, 27 Nov 2019 13:10:54 -0800 +Subject: usb: dwc3: ep0: Clear started flag on completion + +From: Thinh Nguyen + +commit 2d7b78f59e020b07fc6338eefe286f54ee2d6773 upstream. + +Clear ep0's DWC3_EP_TRANSFER_STARTED flag if the END_TRANSFER command is +completed. Otherwise, we can't start control transfer again after +END_TRANSFER. + +Cc: stable@vger.kernel.org +Signed-off-by: Thinh Nguyen +Signed-off-by: Felipe Balbi +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/usb/dwc3/ep0.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/drivers/usb/dwc3/ep0.c ++++ b/drivers/usb/dwc3/ep0.c +@@ -1110,6 +1110,9 @@ static void dwc3_ep0_xfernotready(struct + void dwc3_ep0_interrupt(struct dwc3 *dwc, + const struct dwc3_event_depevt *event) + { ++ struct dwc3_ep *dep = dwc->eps[event->endpoint_number]; ++ u8 cmd; ++ + switch (event->endpoint_event) { + case DWC3_DEPEVT_XFERCOMPLETE: + dwc3_ep0_xfer_complete(dwc, event); +@@ -1122,7 +1125,12 @@ void dwc3_ep0_interrupt(struct dwc3 *dwc + case DWC3_DEPEVT_XFERINPROGRESS: + case DWC3_DEPEVT_RXTXFIFOEVT: + case DWC3_DEPEVT_STREAMEVT: ++ break; + case DWC3_DEPEVT_EPCMDCMPLT: ++ cmd = DEPEVT_PARAMETER_CMD(event->parameters); ++ ++ if (cmd == DWC3_DEPCMD_ENDTRANSFER) ++ dep->flags &= ~DWC3_EP_TRANSFER_STARTED; + break; + } + } diff --git a/queue-4.19/usb-dwc3-gadget-fix-logical-condition.patch b/queue-4.19/usb-dwc3-gadget-fix-logical-condition.patch new file mode 100644 index 00000000000..4ca9e06783a --- /dev/null +++ b/queue-4.19/usb-dwc3-gadget-fix-logical-condition.patch @@ -0,0 +1,36 @@ +From 8c7d4b7b3d43c54c0b8c1e4adb917a151c754196 Mon Sep 17 00:00:00 2001 +From: Tejas Joglekar +Date: Wed, 13 Nov 2019 11:45:16 +0530 +Subject: usb: dwc3: gadget: Fix logical condition + +From: Tejas Joglekar + +commit 8c7d4b7b3d43c54c0b8c1e4adb917a151c754196 upstream. + +This patch corrects the condition to kick the transfer without +giving back the requests when either request has remaining data +or when there are pending SGs. The && check was introduced during +spliting up the dwc3_gadget_ep_cleanup_completed_requests() function. + +Fixes: f38e35dd84e2 ("usb: dwc3: gadget: split dwc3_gadget_ep_cleanup_completed_requests()") + +Cc: stable@vger.kernel.org +Signed-off-by: Tejas Joglekar +Signed-off-by: Felipe Balbi +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/usb/dwc3/gadget.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/usb/dwc3/gadget.c ++++ b/drivers/usb/dwc3/gadget.c +@@ -2295,7 +2295,7 @@ static int dwc3_gadget_ep_cleanup_comple + + req->request.actual = req->request.length - req->remaining; + +- if (!dwc3_gadget_ep_request_completed(req) && ++ if (!dwc3_gadget_ep_request_completed(req) || + req->num_pending_sgs) { + __dwc3_gadget_kick_transfer(dep); + goto out; diff --git a/queue-4.19/usb-dwc3-pci-add-id-for-the-intel-comet-lake-h-variant.patch b/queue-4.19/usb-dwc3-pci-add-id-for-the-intel-comet-lake-h-variant.patch new file mode 100644 index 00000000000..aae302bce3e --- /dev/null +++ b/queue-4.19/usb-dwc3-pci-add-id-for-the-intel-comet-lake-h-variant.patch @@ -0,0 +1,45 @@ +From 3c3caae4cd6e122472efcf64759ff6392fb6bce2 Mon Sep 17 00:00:00 2001 +From: Heikki Krogerus +Date: Thu, 12 Dec 2019 12:37:13 +0300 +Subject: usb: dwc3: pci: add ID for the Intel Comet Lake -H variant + +From: Heikki Krogerus + +commit 3c3caae4cd6e122472efcf64759ff6392fb6bce2 upstream. + +The original ID that was added for Comet Lake PCH was +actually for the -LP (low power) variant even though the +constant for it said CMLH. Changing that while at it. + +Signed-off-by: Heikki Krogerus +Acked-by: Felipe Balbi +Cc: stable +Link: https://lore.kernel.org/r/20191212093713.60614-1-heikki.krogerus@linux.intel.com +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/usb/dwc3/dwc3-pci.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/drivers/usb/dwc3/dwc3-pci.c ++++ b/drivers/usb/dwc3/dwc3-pci.c +@@ -29,7 +29,8 @@ + #define PCI_DEVICE_ID_INTEL_BXT_M 0x1aaa + #define PCI_DEVICE_ID_INTEL_APL 0x5aaa + #define PCI_DEVICE_ID_INTEL_KBP 0xa2b0 +-#define PCI_DEVICE_ID_INTEL_CMLH 0x02ee ++#define PCI_DEVICE_ID_INTEL_CMLLP 0x02ee ++#define PCI_DEVICE_ID_INTEL_CMLH 0x06ee + #define PCI_DEVICE_ID_INTEL_GLK 0x31aa + #define PCI_DEVICE_ID_INTEL_CNPLP 0x9dee + #define PCI_DEVICE_ID_INTEL_CNPH 0xa36e +@@ -306,6 +307,9 @@ static const struct pci_device_id dwc3_p + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_MRFLD), + (kernel_ulong_t) &dwc3_pci_mrfld_properties, }, + ++ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_CMLLP), ++ (kernel_ulong_t) &dwc3_pci_intel_properties, }, ++ + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_CMLH), + (kernel_ulong_t) &dwc3_pci_intel_properties, }, + diff --git a/queue-4.19/usb-mon-fix-a-deadlock-in-usbmon-between-mmap-and-read.patch b/queue-4.19/usb-mon-fix-a-deadlock-in-usbmon-between-mmap-and-read.patch new file mode 100644 index 00000000000..8e031012260 --- /dev/null +++ b/queue-4.19/usb-mon-fix-a-deadlock-in-usbmon-between-mmap-and-read.patch @@ -0,0 +1,104 @@ +From 19e6317d24c25ee737c65d1ffb7483bdda4bb54a Mon Sep 17 00:00:00 2001 +From: Pete Zaitcev +Date: Wed, 4 Dec 2019 20:39:41 -0600 +Subject: usb: mon: Fix a deadlock in usbmon between mmap and read + +From: Pete Zaitcev + +commit 19e6317d24c25ee737c65d1ffb7483bdda4bb54a upstream. + +The problem arises because our read() function grabs a lock of the +circular buffer, finds something of interest, then invokes copy_to_user() +straight from the buffer, which in turn takes mm->mmap_sem. In the same +time, the callback mon_bin_vma_fault() is invoked under mm->mmap_sem. +It attempts to take the fetch lock and deadlocks. + +This patch does away with protecting of our page list with any +semaphores, and instead relies on the kernel not close the device +while mmap is active in a process. + +In addition, we prohibit re-sizing of a buffer while mmap is active. +This way, when (now unlocked) fault is processed, it works with the +page that is intended to be mapped-in, and not some other random page. +Note that this may have an ABI impact, but hopefully no legitimate +program is this wrong. + +Signed-off-by: Pete Zaitcev +Reported-by: syzbot+56f9673bb4cdcbeb0e92@syzkaller.appspotmail.com +Reviewed-by: Alan Stern +Fixes: 46eb14a6e158 ("USB: fix usbmon BUG trigger") +Cc: +Link: https://lore.kernel.org/r/20191204203941.3503452b@suzdal.zaitcev.lan +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/usb/mon/mon_bin.c | 32 +++++++++++++++++++++----------- + 1 file changed, 21 insertions(+), 11 deletions(-) + +--- a/drivers/usb/mon/mon_bin.c ++++ b/drivers/usb/mon/mon_bin.c +@@ -1039,12 +1039,18 @@ static long mon_bin_ioctl(struct file *f + + mutex_lock(&rp->fetch_lock); + spin_lock_irqsave(&rp->b_lock, flags); +- mon_free_buff(rp->b_vec, rp->b_size/CHUNK_SIZE); +- kfree(rp->b_vec); +- rp->b_vec = vec; +- rp->b_size = size; +- rp->b_read = rp->b_in = rp->b_out = rp->b_cnt = 0; +- rp->cnt_lost = 0; ++ if (rp->mmap_active) { ++ mon_free_buff(vec, size/CHUNK_SIZE); ++ kfree(vec); ++ ret = -EBUSY; ++ } else { ++ mon_free_buff(rp->b_vec, rp->b_size/CHUNK_SIZE); ++ kfree(rp->b_vec); ++ rp->b_vec = vec; ++ rp->b_size = size; ++ rp->b_read = rp->b_in = rp->b_out = rp->b_cnt = 0; ++ rp->cnt_lost = 0; ++ } + spin_unlock_irqrestore(&rp->b_lock, flags); + mutex_unlock(&rp->fetch_lock); + } +@@ -1216,13 +1222,21 @@ mon_bin_poll(struct file *file, struct p + static void mon_bin_vma_open(struct vm_area_struct *vma) + { + struct mon_reader_bin *rp = vma->vm_private_data; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&rp->b_lock, flags); + rp->mmap_active++; ++ spin_unlock_irqrestore(&rp->b_lock, flags); + } + + static void mon_bin_vma_close(struct vm_area_struct *vma) + { ++ unsigned long flags; ++ + struct mon_reader_bin *rp = vma->vm_private_data; ++ spin_lock_irqsave(&rp->b_lock, flags); + rp->mmap_active--; ++ spin_unlock_irqrestore(&rp->b_lock, flags); + } + + /* +@@ -1234,16 +1248,12 @@ static vm_fault_t mon_bin_vma_fault(stru + unsigned long offset, chunk_idx; + struct page *pageptr; + +- mutex_lock(&rp->fetch_lock); + offset = vmf->pgoff << PAGE_SHIFT; +- if (offset >= rp->b_size) { +- mutex_unlock(&rp->fetch_lock); ++ if (offset >= rp->b_size) + return VM_FAULT_SIGBUS; +- } + chunk_idx = offset / CHUNK_SIZE; + pageptr = rp->b_vec[chunk_idx].pg; + get_page(pageptr); +- mutex_unlock(&rp->fetch_lock); + vmf->page = pageptr; + return 0; + } diff --git a/queue-4.19/virtio-balloon-fix-managed-page-counts-when-migrating-pages-between-zones.patch b/queue-4.19/virtio-balloon-fix-managed-page-counts-when-migrating-pages-between-zones.patch new file mode 100644 index 00000000000..7ea8a9c0a8c --- /dev/null +++ b/queue-4.19/virtio-balloon-fix-managed-page-counts-when-migrating-pages-between-zones.patch @@ -0,0 +1,159 @@ +From 63341ab03706e11a31e3dd8ccc0fbc9beaf723f0 Mon Sep 17 00:00:00 2001 +From: David Hildenbrand +Date: Wed, 11 Dec 2019 12:11:52 +0100 +Subject: virtio-balloon: fix managed page counts when migrating pages between zones + +From: David Hildenbrand + +commit 63341ab03706e11a31e3dd8ccc0fbc9beaf723f0 upstream. + +In case we have to migrate a ballon page to a newpage of another zone, the +managed page count of both zones is wrong. Paired with memory offlining +(which will adjust the managed page count), we can trigger kernel crashes +and all kinds of different symptoms. + +One way to reproduce: +1. Start a QEMU guest with 4GB, no NUMA +2. Hotplug a 1GB DIMM and online the memory to ZONE_NORMAL +3. Inflate the balloon to 1GB +4. Unplug the DIMM (be quick, otherwise unmovable data ends up on it) +5. Observe /proc/zoneinfo + Node 0, zone Normal + pages free 16810 + min 24848885473806 + low 18471592959183339 + high 36918337032892872 + spanned 262144 + present 262144 + managed 18446744073709533486 +6. Do anything that requires some memory (e.g., inflate the balloon some +more). The OOM goes crazy and the system crashes + [ 238.324946] Out of memory: Killed process 537 (login) total-vm:27584kB, anon-rss:860kB, file-rss:0kB, shmem-rss:00 + [ 238.338585] systemd invoked oom-killer: gfp_mask=0x100cca(GFP_HIGHUSER_MOVABLE), order=0, oom_score_adj=0 + [ 238.339420] CPU: 0 PID: 1 Comm: systemd Tainted: G D W 5.4.0-next-20191204+ #75 + [ 238.340139] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu4 + [ 238.341121] Call Trace: + [ 238.341337] dump_stack+0x8f/0xd0 + [ 238.341630] dump_header+0x61/0x5ea + [ 238.341942] oom_kill_process.cold+0xb/0x10 + [ 238.342299] out_of_memory+0x24d/0x5a0 + [ 238.342625] __alloc_pages_slowpath+0xd12/0x1020 + [ 238.343024] __alloc_pages_nodemask+0x391/0x410 + [ 238.343407] pagecache_get_page+0xc3/0x3a0 + [ 238.343757] filemap_fault+0x804/0xc30 + [ 238.344083] ? ext4_filemap_fault+0x28/0x42 + [ 238.344444] ext4_filemap_fault+0x30/0x42 + [ 238.344789] __do_fault+0x37/0x1a0 + [ 238.345087] __handle_mm_fault+0x104d/0x1ab0 + [ 238.345450] handle_mm_fault+0x169/0x360 + [ 238.345790] do_user_addr_fault+0x20d/0x490 + [ 238.346154] do_page_fault+0x31/0x210 + [ 238.346468] async_page_fault+0x43/0x50 + [ 238.346797] RIP: 0033:0x7f47eba4197e + [ 238.347110] Code: Bad RIP value. + [ 238.347387] RSP: 002b:00007ffd7c0c1890 EFLAGS: 00010293 + [ 238.347834] RAX: 0000000000000002 RBX: 000055d196a20a20 RCX: 00007f47eba4197e + [ 238.348437] RDX: 0000000000000033 RSI: 00007ffd7c0c18c0 RDI: 0000000000000004 + [ 238.349047] RBP: 00007ffd7c0c1c20 R08: 0000000000000000 R09: 0000000000000033 + [ 238.349660] R10: 00000000ffffffff R11: 0000000000000293 R12: 0000000000000001 + [ 238.350261] R13: ffffffffffffffff R14: 0000000000000000 R15: 00007ffd7c0c18c0 + [ 238.350878] Mem-Info: + [ 238.351085] active_anon:3121 inactive_anon:51 isolated_anon:0 + [ 238.351085] active_file:12 inactive_file:7 isolated_file:0 + [ 238.351085] unevictable:0 dirty:0 writeback:0 unstable:0 + [ 238.351085] slab_reclaimable:5565 slab_unreclaimable:10170 + [ 238.351085] mapped:3 shmem:111 pagetables:155 bounce:0 + [ 238.351085] free:720717 free_pcp:2 free_cma:0 + [ 238.353757] Node 0 active_anon:12484kB inactive_anon:204kB active_file:48kB inactive_file:28kB unevictable:0kB iss + [ 238.355979] Node 0 DMA free:11556kB min:36kB low:48kB high:60kB reserved_highatomic:0KB active_anon:152kB inactivB + [ 238.358345] lowmem_reserve[]: 0 2955 2884 2884 2884 + [ 238.358761] Node 0 DMA32 free:2677864kB min:7004kB low:10028kB high:13052kB reserved_highatomic:0KB active_anon:0B + [ 238.361202] lowmem_reserve[]: 0 0 72057594037927865 72057594037927865 72057594037927865 + [ 238.361888] Node 0 Normal free:193448kB min:99395541895224kB low:73886371836733356kB high:147673348131571488kB reB + [ 238.364765] lowmem_reserve[]: 0 0 0 0 0 + [ 238.365101] Node 0 DMA: 7*4kB (U) 5*8kB (UE) 6*16kB (UME) 2*32kB (UM) 1*64kB (U) 2*128kB (UE) 3*256kB (UME) 2*512B + [ 238.366379] Node 0 DMA32: 0*4kB 1*8kB (U) 2*16kB (UM) 2*32kB (UM) 2*64kB (UM) 1*128kB (U) 1*256kB (U) 1*512kB (U)B + [ 238.367654] Node 0 Normal: 1985*4kB (UME) 1321*8kB (UME) 844*16kB (UME) 524*32kB (UME) 300*64kB (UME) 138*128kB (B + [ 238.369184] Node 0 hugepages_total=0 hugepages_free=0 hugepages_surp=0 hugepages_size=2048kB + [ 238.369915] 130 total pagecache pages + [ 238.370241] 0 pages in swap cache + [ 238.370533] Swap cache stats: add 0, delete 0, find 0/0 + [ 238.370981] Free swap = 0kB + [ 238.371239] Total swap = 0kB + [ 238.371488] 1048445 pages RAM + [ 238.371756] 0 pages HighMem/MovableOnly + [ 238.372090] 306992 pages reserved + [ 238.372376] 0 pages cma reserved + [ 238.372661] 0 pages hwpoisoned + +In another instance (older kernel), I was able to observe this +(negative page count :/): + [ 180.896971] Offlined Pages 32768 + [ 182.667462] Offlined Pages 32768 + [ 184.408117] Offlined Pages 32768 + [ 186.026321] Offlined Pages 32768 + [ 187.684861] Offlined Pages 32768 + [ 189.227013] Offlined Pages 32768 + [ 190.830303] Offlined Pages 32768 + [ 190.833071] Built 1 zonelists, mobility grouping on. Total pages: -36920272750453009 + +In another instance (older kernel), I was no longer able to start any +process: + [root@vm ~]# [ 214.348068] Offlined Pages 32768 + [ 215.973009] Offlined Pages 32768 + cat /proc/meminfo + -bash: fork: Cannot allocate memory + [root@vm ~]# cat /proc/meminfo + -bash: fork: Cannot allocate memory + +Fix it by properly adjusting the managed page count when migrating if +the zone changed. The managed page count of the zones now looks after +unplug of the DIMM (and after deflating the balloon) just like before +inflating the balloon (and plugging+onlining the DIMM). + +We'll temporarily modify the totalram page count. If this ever becomes a +problem, we can fine tune by providing helpers that don't touch +the totalram pages (e.g., adjust_zone_managed_page_count()). + +Please note that fixing up the managed page count is only necessary when +we adjusted the managed page count when inflating - only if we +don't have VIRTIO_BALLOON_F_DEFLATE_ON_OOM. With that feature, the +managed page count is not touched when inflating/deflating. + +Reported-by: Yumei Huang +Fixes: 3dcc0571cd64 ("mm: correctly update zone->managed_pages") +Cc: # v3.11+ +Cc: "Michael S. Tsirkin" +Cc: Jason Wang +Cc: Jiang Liu +Cc: Andrew Morton +Cc: Igor Mammedov +Cc: virtualization@lists.linux-foundation.org +Signed-off-by: David Hildenbrand +Signed-off-by: Michael S. Tsirkin +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/virtio/virtio_balloon.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +--- a/drivers/virtio/virtio_balloon.c ++++ b/drivers/virtio/virtio_balloon.c +@@ -468,6 +468,17 @@ static int virtballoon_migratepage(struc + + get_page(newpage); /* balloon reference */ + ++ /* ++ * When we migrate a page to a different zone and adjusted the ++ * managed page count when inflating, we have to fixup the count of ++ * both involved zones. ++ */ ++ if (!virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM) && ++ page_zone(page) != page_zone(newpage)) { ++ adjust_managed_page_count(page, 1); ++ adjust_managed_page_count(newpage, -1); ++ } ++ + /* balloon's page migration 1st step -- inflate "newpage" */ + spin_lock_irqsave(&vb_dev_info->pages_lock, flags); + balloon_page_insert(vb_dev_info, newpage); diff --git a/queue-4.19/workqueue-fix-pwq-ref-leak-in-rescuer_thread.patch b/queue-4.19/workqueue-fix-pwq-ref-leak-in-rescuer_thread.patch new file mode 100644 index 00000000000..2467afa8991 --- /dev/null +++ b/queue-4.19/workqueue-fix-pwq-ref-leak-in-rescuer_thread.patch @@ -0,0 +1,60 @@ +From e66b39af00f426b3356b96433d620cb3367ba1ff Mon Sep 17 00:00:00 2001 +From: Tejun Heo +Date: Wed, 25 Sep 2019 06:59:15 -0700 +Subject: workqueue: Fix pwq ref leak in rescuer_thread() + +From: Tejun Heo + +commit e66b39af00f426b3356b96433d620cb3367ba1ff upstream. + +008847f66c3 ("workqueue: allow rescuer thread to do more work.") made +the rescuer worker requeue the pwq immediately if there may be more +work items which need rescuing instead of waiting for the next mayday +timer expiration. Unfortunately, it doesn't check whether the pwq is +already on the mayday list and unconditionally gets the ref and moves +it onto the list. This doesn't corrupt the list but creates an +additional reference to the pwq. It got queued twice but will only be +removed once. + +This leak later can trigger pwq refcnt warning on workqueue +destruction and prevent freeing of the workqueue. + +Signed-off-by: Tejun Heo +Cc: "Williams, Gerald S" +Cc: NeilBrown +Cc: stable@vger.kernel.org # v3.19+ +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/workqueue.c | 13 ++++++++++--- + 1 file changed, 10 insertions(+), 3 deletions(-) + +--- a/kernel/workqueue.c ++++ b/kernel/workqueue.c +@@ -2413,8 +2413,14 @@ repeat: + */ + if (need_to_create_worker(pool)) { + spin_lock(&wq_mayday_lock); +- get_pwq(pwq); +- list_move_tail(&pwq->mayday_node, &wq->maydays); ++ /* ++ * Queue iff we aren't racing destruction ++ * and somebody else hasn't queued it already. ++ */ ++ if (wq->rescuer && list_empty(&pwq->mayday_node)) { ++ get_pwq(pwq); ++ list_add_tail(&pwq->mayday_node, &wq->maydays); ++ } + spin_unlock(&wq_mayday_lock); + } + } +@@ -4478,7 +4484,8 @@ static void show_pwq(struct pool_workque + pr_info(" pwq %d:", pool->id); + pr_cont_pool_info(pool); + +- pr_cont(" active=%d/%d%s\n", pwq->nr_active, pwq->max_active, ++ pr_cont(" active=%d/%d refcnt=%d%s\n", ++ pwq->nr_active, pwq->max_active, pwq->refcnt, + !list_empty(&pwq->mayday_node) ? " MAYDAY" : ""); + + hash_for_each(pool->busy_hash, bkt, worker, hentry) { diff --git a/queue-4.19/workqueue-fix-spurious-sanity-check-failures-in-destroy_workqueue.patch b/queue-4.19/workqueue-fix-spurious-sanity-check-failures-in-destroy_workqueue.patch new file mode 100644 index 00000000000..90ab5e6c4bd --- /dev/null +++ b/queue-4.19/workqueue-fix-spurious-sanity-check-failures-in-destroy_workqueue.patch @@ -0,0 +1,83 @@ +From def98c84b6cdf2eeea19ec5736e90e316df5206b Mon Sep 17 00:00:00 2001 +From: Tejun Heo +Date: Wed, 18 Sep 2019 18:43:40 -0700 +Subject: workqueue: Fix spurious sanity check failures in destroy_workqueue() + +From: Tejun Heo + +commit def98c84b6cdf2eeea19ec5736e90e316df5206b upstream. + +Before actually destrying a workqueue, destroy_workqueue() checks +whether it's actually idle. If it isn't, it prints out a bunch of +warning messages and leaves the workqueue dangling. It unfortunately +has a couple issues. + +* Mayday list queueing increments pwq's refcnts which gets detected as + busy and fails the sanity checks. However, because mayday list + queueing is asynchronous, this condition can happen without any + actual work items left in the workqueue. + +* Sanity check failure leaves the sysfs interface behind too which can + lead to init failure of newer instances of the workqueue. + +This patch fixes the above two by + +* If a workqueue has a rescuer, disable and kill the rescuer before + sanity checks. Disabling and killing is guaranteed to flush the + existing mayday list. + +* Remove sysfs interface before sanity checks. + +Signed-off-by: Tejun Heo +Reported-by: Marcin Pawlowski +Reported-by: "Williams, Gerald S" +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/workqueue.c | 24 +++++++++++++++++++----- + 1 file changed, 19 insertions(+), 5 deletions(-) + +--- a/kernel/workqueue.c ++++ b/kernel/workqueue.c +@@ -4154,9 +4154,28 @@ void destroy_workqueue(struct workqueue_ + struct pool_workqueue *pwq; + int node; + ++ /* ++ * Remove it from sysfs first so that sanity check failure doesn't ++ * lead to sysfs name conflicts. ++ */ ++ workqueue_sysfs_unregister(wq); ++ + /* drain it before proceeding with destruction */ + drain_workqueue(wq); + ++ /* kill rescuer, if sanity checks fail, leave it w/o rescuer */ ++ if (wq->rescuer) { ++ struct worker *rescuer = wq->rescuer; ++ ++ /* this prevents new queueing */ ++ spin_lock_irq(&wq_mayday_lock); ++ wq->rescuer = NULL; ++ spin_unlock_irq(&wq_mayday_lock); ++ ++ /* rescuer will empty maydays list before exiting */ ++ kthread_stop(rescuer->task); ++ } ++ + /* sanity checks */ + mutex_lock(&wq->mutex); + for_each_pwq(pwq, wq) { +@@ -4188,11 +4207,6 @@ void destroy_workqueue(struct workqueue_ + list_del_rcu(&wq->list); + mutex_unlock(&wq_pool_mutex); + +- workqueue_sysfs_unregister(wq); +- +- if (wq->rescuer) +- kthread_stop(wq->rescuer->task); +- + if (!(wq->flags & WQ_UNBOUND)) { + /* + * The base ref is never dropped on per-cpu pwqs. Directly -- 2.47.3