]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.2-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 3 Oct 2019 11:57:01 +0000 (13:57 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 3 Oct 2019 11:57:01 +0000 (13:57 +0200)
added patches:
ath10k-fix-channel-info-parsing-for-non-tlv-target.patch
block-fix-null-pointer-dereference-in-blk_mq_rq_timed_out.patch
block-mq-deadline-fix-queue-restart-handling.patch
btrfs-adjust-dirty_metadata_bytes-after-writeback-failure-of-extent-buffer.patch
btrfs-fix-allocation-of-free-space-cache-v1-bitmap-pages.patch
btrfs-fix-race-setting-up-and-completing-qgroup-rescan-workers.patch
btrfs-fix-use-after-free-when-using-the-tree-modification-log.patch
btrfs-qgroup-fix-reserved-data-space-leak-if-we-have-multiple-reserve-calls.patch
btrfs-qgroup-fix-the-wrong-target-io_tree-when-freeing-reserved-data-space.patch
btrfs-relinquish-cpus-in-btrfs_compare_trees.patch
efifb-bgrt-improve-efifb_bgrt_sanity_check.patch
gfs2-clear-buf_in_tr-when-ending-a-transaction-in-sweep_bh_for_rgrps.patch
i40e-check-__i40e_vf_disable-bit-in-i40e_sync_filters_subtask.patch
keys-trusted-correctly-initialize-digests-and-fix-locking-issue.patch
lib-lzo-lzo1x_compress.c-fix-alignment-bug-in-lzo-rle.patch
memcg-kmem-do-not-fail-__gfp_nofail-charges.patch
memcg-oom-don-t-require-__gfp_fs-when-invoking-memcg-oom-killer.patch
mm-compaction.c-clear-total_-migrate-free-_scanned-before-scanning-a-new-zone.patch
mt76-round-up-length-on-mt76_wr_copy.patch
ovl-filter-of-trusted-xattr-results-in-audit.patch
ovl-fix-dereferencing-possible-err_ptr.patch
rtw88-pci-rearrange-the-memory-usage-for-skb-in-rx-isr.patch
rtw88-pci-use-dma-sync-instead-of-remapping-in-rx-isr.patch
smb3-allow-disabling-requesting-leases.patch
smb3-fix-leak-in-open-on-server-perf-counter.patch
z3fold-fix-memory-leak-in-kmem-cache.patch
z3fold-fix-retry-mechanism-in-page-reclaim.patch

28 files changed:
queue-5.2/ath10k-fix-channel-info-parsing-for-non-tlv-target.patch [new file with mode: 0644]
queue-5.2/block-fix-null-pointer-dereference-in-blk_mq_rq_timed_out.patch [new file with mode: 0644]
queue-5.2/block-mq-deadline-fix-queue-restart-handling.patch [new file with mode: 0644]
queue-5.2/btrfs-adjust-dirty_metadata_bytes-after-writeback-failure-of-extent-buffer.patch [new file with mode: 0644]
queue-5.2/btrfs-fix-allocation-of-free-space-cache-v1-bitmap-pages.patch [new file with mode: 0644]
queue-5.2/btrfs-fix-race-setting-up-and-completing-qgroup-rescan-workers.patch [new file with mode: 0644]
queue-5.2/btrfs-fix-use-after-free-when-using-the-tree-modification-log.patch [new file with mode: 0644]
queue-5.2/btrfs-qgroup-fix-reserved-data-space-leak-if-we-have-multiple-reserve-calls.patch [new file with mode: 0644]
queue-5.2/btrfs-qgroup-fix-the-wrong-target-io_tree-when-freeing-reserved-data-space.patch [new file with mode: 0644]
queue-5.2/btrfs-relinquish-cpus-in-btrfs_compare_trees.patch [new file with mode: 0644]
queue-5.2/efifb-bgrt-improve-efifb_bgrt_sanity_check.patch [new file with mode: 0644]
queue-5.2/gfs2-clear-buf_in_tr-when-ending-a-transaction-in-sweep_bh_for_rgrps.patch [new file with mode: 0644]
queue-5.2/i40e-check-__i40e_vf_disable-bit-in-i40e_sync_filters_subtask.patch [new file with mode: 0644]
queue-5.2/keys-trusted-correctly-initialize-digests-and-fix-locking-issue.patch [new file with mode: 0644]
queue-5.2/lib-lzo-lzo1x_compress.c-fix-alignment-bug-in-lzo-rle.patch [new file with mode: 0644]
queue-5.2/memcg-kmem-do-not-fail-__gfp_nofail-charges.patch [new file with mode: 0644]
queue-5.2/memcg-oom-don-t-require-__gfp_fs-when-invoking-memcg-oom-killer.patch [new file with mode: 0644]
queue-5.2/mm-compaction.c-clear-total_-migrate-free-_scanned-before-scanning-a-new-zone.patch [new file with mode: 0644]
queue-5.2/mt76-round-up-length-on-mt76_wr_copy.patch [new file with mode: 0644]
queue-5.2/ovl-filter-of-trusted-xattr-results-in-audit.patch [new file with mode: 0644]
queue-5.2/ovl-fix-dereferencing-possible-err_ptr.patch [new file with mode: 0644]
queue-5.2/rtw88-pci-rearrange-the-memory-usage-for-skb-in-rx-isr.patch [new file with mode: 0644]
queue-5.2/rtw88-pci-use-dma-sync-instead-of-remapping-in-rx-isr.patch [new file with mode: 0644]
queue-5.2/series
queue-5.2/smb3-allow-disabling-requesting-leases.patch [new file with mode: 0644]
queue-5.2/smb3-fix-leak-in-open-on-server-perf-counter.patch [new file with mode: 0644]
queue-5.2/z3fold-fix-memory-leak-in-kmem-cache.patch [new file with mode: 0644]
queue-5.2/z3fold-fix-retry-mechanism-in-page-reclaim.patch [new file with mode: 0644]

diff --git a/queue-5.2/ath10k-fix-channel-info-parsing-for-non-tlv-target.patch b/queue-5.2/ath10k-fix-channel-info-parsing-for-non-tlv-target.patch
new file mode 100644 (file)
index 0000000..935fa8f
--- /dev/null
@@ -0,0 +1,95 @@
+From 6be6c04bcc2e8770b8637632789ff15765124894 Mon Sep 17 00:00:00 2001
+From: Rakesh Pillai <pillair@codeaurora.org>
+Date: Fri, 8 Mar 2019 16:56:06 +0530
+Subject: ath10k: fix channel info parsing for non tlv target
+
+From: Rakesh Pillai <pillair@codeaurora.org>
+
+commit 6be6c04bcc2e8770b8637632789ff15765124894 upstream.
+
+The tlv targets such as WCN3990 send more data in the chan info event, which is
+not sent by the non tlv targets. There is a minimum size check in the wmi event
+for non-tlv targets and hence we cannot update the common channel info
+structure as it was done in commit 13104929d2ec ("ath10k: fill the channel
+survey results for WCN3990 correctly"). This broke channel survey results on
+10.x firmware versions.
+
+If the common channel info structure is updated, the size check for chan info
+event for non-tlv targets will fail and return -EPROTO and we see the below
+error messages
+
+   ath10k_pci 0000:01:00.0: failed to parse chan info event: -71
+
+Add tlv specific channel info structure and restore the original size of the
+common channel info structure to mitigate this issue.
+
+Tested HW: WCN3990
+          QCA9887
+Tested FW: WLAN.HL.3.1-00784-QCAHLSWMTPLZ-1
+          10.2.4-1.0-00037
+
+Fixes: 13104929d2ec ("ath10k: fill the channel survey results for WCN3990 correctly")
+Cc: stable@vger.kernel.org # 5.0
+Signed-off-by: Rakesh Pillai <pillair@codeaurora.org>
+Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/wireless/ath/ath10k/wmi-tlv.c |    2 +-
+ drivers/net/wireless/ath/ath10k/wmi-tlv.h |   16 ++++++++++++++++
+ drivers/net/wireless/ath/ath10k/wmi.h     |    8 --------
+ 3 files changed, 17 insertions(+), 9 deletions(-)
+
+--- a/drivers/net/wireless/ath/ath10k/wmi-tlv.c
++++ b/drivers/net/wireless/ath/ath10k/wmi-tlv.c
+@@ -810,7 +810,7 @@ static int ath10k_wmi_tlv_op_pull_ch_inf
+                                            struct wmi_ch_info_ev_arg *arg)
+ {
+       const void **tb;
+-      const struct wmi_chan_info_event *ev;
++      const struct wmi_tlv_chan_info_event *ev;
+       int ret;
+       tb = ath10k_wmi_tlv_parse_alloc(ar, skb->data, skb->len, GFP_ATOMIC);
+--- a/drivers/net/wireless/ath/ath10k/wmi-tlv.h
++++ b/drivers/net/wireless/ath/ath10k/wmi-tlv.h
+@@ -1607,6 +1607,22 @@ struct chan_info_params {
+ #define WMI_TLV_FLAG_MGMT_BUNDLE_TX_COMPL     BIT(9)
++struct wmi_tlv_chan_info_event {
++      __le32 err_code;
++      __le32 freq;
++      __le32 cmd_flags;
++      __le32 noise_floor;
++      __le32 rx_clear_count;
++      __le32 cycle_count;
++      __le32 chan_tx_pwr_range;
++      __le32 chan_tx_pwr_tp;
++      __le32 rx_frame_count;
++      __le32 my_bss_rx_cycle_count;
++      __le32 rx_11b_mode_data_duration;
++      __le32 tx_frame_cnt;
++      __le32 mac_clk_mhz;
++} __packed;
++
+ struct wmi_tlv_mgmt_tx_compl_ev {
+       __le32 desc_id;
+       __le32 status;
+--- a/drivers/net/wireless/ath/ath10k/wmi.h
++++ b/drivers/net/wireless/ath/ath10k/wmi.h
+@@ -6524,14 +6524,6 @@ struct wmi_chan_info_event {
+       __le32 noise_floor;
+       __le32 rx_clear_count;
+       __le32 cycle_count;
+-      __le32 chan_tx_pwr_range;
+-      __le32 chan_tx_pwr_tp;
+-      __le32 rx_frame_count;
+-      __le32 my_bss_rx_cycle_count;
+-      __le32 rx_11b_mode_data_duration;
+-      __le32 tx_frame_cnt;
+-      __le32 mac_clk_mhz;
+-
+ } __packed;
+ struct wmi_10_4_chan_info_event {
diff --git a/queue-5.2/block-fix-null-pointer-dereference-in-blk_mq_rq_timed_out.patch b/queue-5.2/block-fix-null-pointer-dereference-in-blk_mq_rq_timed_out.patch
new file mode 100644 (file)
index 0000000..c0de834
--- /dev/null
@@ -0,0 +1,137 @@
+From 8d6996630c03d7ceeabe2611378fea5ca1c3f1b3 Mon Sep 17 00:00:00 2001
+From: Yufen Yu <yuyufen@huawei.com>
+Date: Fri, 27 Sep 2019 16:19:55 +0800
+Subject: block: fix null pointer dereference in blk_mq_rq_timed_out()
+
+From: Yufen Yu <yuyufen@huawei.com>
+
+commit 8d6996630c03d7ceeabe2611378fea5ca1c3f1b3 upstream.
+
+We got a null pointer deference BUG_ON in blk_mq_rq_timed_out()
+as following:
+
+[  108.825472] BUG: kernel NULL pointer dereference, address: 0000000000000040
+[  108.827059] PGD 0 P4D 0
+[  108.827313] Oops: 0000 [#1] SMP PTI
+[  108.827657] CPU: 6 PID: 198 Comm: kworker/6:1H Not tainted 5.3.0-rc8+ #431
+[  108.829503] Workqueue: kblockd blk_mq_timeout_work
+[  108.829913] RIP: 0010:blk_mq_check_expired+0x258/0x330
+[  108.838191] Call Trace:
+[  108.838406]  bt_iter+0x74/0x80
+[  108.838665]  blk_mq_queue_tag_busy_iter+0x204/0x450
+[  108.839074]  ? __switch_to_asm+0x34/0x70
+[  108.839405]  ? blk_mq_stop_hw_queue+0x40/0x40
+[  108.839823]  ? blk_mq_stop_hw_queue+0x40/0x40
+[  108.840273]  ? syscall_return_via_sysret+0xf/0x7f
+[  108.840732]  blk_mq_timeout_work+0x74/0x200
+[  108.841151]  process_one_work+0x297/0x680
+[  108.841550]  worker_thread+0x29c/0x6f0
+[  108.841926]  ? rescuer_thread+0x580/0x580
+[  108.842344]  kthread+0x16a/0x1a0
+[  108.842666]  ? kthread_flush_work+0x170/0x170
+[  108.843100]  ret_from_fork+0x35/0x40
+
+The bug is caused by the race between timeout handle and completion for
+flush request.
+
+When timeout handle function blk_mq_rq_timed_out() try to read
+'req->q->mq_ops', the 'req' have completed and reinitiated by next
+flush request, which would call blk_rq_init() to clear 'req' as 0.
+
+After commit 12f5b93145 ("blk-mq: Remove generation seqeunce"),
+normal requests lifetime are protected by refcount. Until 'rq->ref'
+drop to zero, the request can really be free. Thus, these requests
+cannot been reused before timeout handle finish.
+
+However, flush request has defined .end_io and rq->end_io() is still
+called even if 'rq->ref' doesn't drop to zero. After that, the 'flush_rq'
+can be reused by the next flush request handle, resulting in null
+pointer deference BUG ON.
+
+We fix this problem by covering flush request with 'rq->ref'.
+If the refcount is not zero, flush_end_io() return and wait the
+last holder recall it. To record the request status, we add a new
+entry 'rq_status', which will be used in flush_end_io().
+
+Cc: Christoph Hellwig <hch@infradead.org>
+Cc: Keith Busch <keith.busch@intel.com>
+Cc: Bart Van Assche <bvanassche@acm.org>
+Cc: stable@vger.kernel.org # v4.18+
+Reviewed-by: Ming Lei <ming.lei@redhat.com>
+Reviewed-by: Bob Liu <bob.liu@oracle.com>
+Signed-off-by: Yufen Yu <yuyufen@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+-------
+v2:
+ - move rq_status from struct request to struct blk_flush_queue
+v3:
+ - remove unnecessary '{}' pair.
+v4:
+ - let spinlock to protect 'fq->rq_status'
+v5:
+ - move rq_status after flush_running_idx member of struct blk_flush_queue
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+
+---
+ block/blk-flush.c |   10 ++++++++++
+ block/blk-mq.c    |    5 ++++-
+ block/blk.h       |    7 +++++++
+ 3 files changed, 21 insertions(+), 1 deletion(-)
+
+--- a/block/blk-flush.c
++++ b/block/blk-flush.c
+@@ -214,6 +214,16 @@ static void flush_end_io(struct request
+       /* release the tag's ownership to the req cloned from */
+       spin_lock_irqsave(&fq->mq_flush_lock, flags);
++
++      if (!refcount_dec_and_test(&flush_rq->ref)) {
++              fq->rq_status = error;
++              spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
++              return;
++      }
++
++      if (fq->rq_status != BLK_STS_OK)
++              error = fq->rq_status;
++
+       hctx = flush_rq->mq_hctx;
+       if (!q->elevator) {
+               blk_mq_tag_set_rq(hctx, flush_rq->tag, fq->orig_rq);
+--- a/block/blk-mq.c
++++ b/block/blk-mq.c
+@@ -910,7 +910,10 @@ static bool blk_mq_check_expired(struct
+        */
+       if (blk_mq_req_expired(rq, next))
+               blk_mq_rq_timed_out(rq, reserved);
+-      if (refcount_dec_and_test(&rq->ref))
++
++      if (is_flush_rq(rq, hctx))
++              rq->end_io(rq, 0);
++      else if (refcount_dec_and_test(&rq->ref))
+               __blk_mq_free_request(rq);
+       return true;
+--- a/block/blk.h
++++ b/block/blk.h
+@@ -19,6 +19,7 @@ struct blk_flush_queue {
+       unsigned int            flush_queue_delayed:1;
+       unsigned int            flush_pending_idx:1;
+       unsigned int            flush_running_idx:1;
++      blk_status_t            rq_status;
+       unsigned long           flush_pending_since;
+       struct list_head        flush_queue[2];
+       struct list_head        flush_data_in_flight;
+@@ -47,6 +48,12 @@ static inline void __blk_get_queue(struc
+       kobject_get(&q->kobj);
+ }
++static inline bool
++is_flush_rq(struct request *req, struct blk_mq_hw_ctx *hctx)
++{
++      return hctx->fq->flush_rq == req;
++}
++
+ struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q,
+               int node, int cmd_size, gfp_t flags);
+ void blk_free_flush_queue(struct blk_flush_queue *q);
diff --git a/queue-5.2/block-mq-deadline-fix-queue-restart-handling.patch b/queue-5.2/block-mq-deadline-fix-queue-restart-handling.patch
new file mode 100644 (file)
index 0000000..04f7b61
--- /dev/null
@@ -0,0 +1,107 @@
+From cb8acabbe33b110157955a7425ee876fb81e6bbc Mon Sep 17 00:00:00 2001
+From: Damien Le Moal <damien.lemoal@wdc.com>
+Date: Wed, 28 Aug 2019 13:40:20 +0900
+Subject: block: mq-deadline: Fix queue restart handling
+
+From: Damien Le Moal <damien.lemoal@wdc.com>
+
+commit cb8acabbe33b110157955a7425ee876fb81e6bbc upstream.
+
+Commit 7211aef86f79 ("block: mq-deadline: Fix write completion
+handling") added a call to blk_mq_sched_mark_restart_hctx() in
+dd_dispatch_request() to make sure that write request dispatching does
+not stall when all target zones are locked. This fix left a subtle race
+when a write completion happens during a dispatch execution on another
+CPU:
+
+CPU 0: Dispatch                        CPU1: write completion
+
+dd_dispatch_request()
+    lock(&dd->lock);
+    ...
+    lock(&dd->zone_lock);      dd_finish_request()
+    rq = find request          lock(&dd->zone_lock);
+    unlock(&dd->zone_lock);
+                               zone write unlock
+                               unlock(&dd->zone_lock);
+                               ...
+                               __blk_mq_free_request
+                                      check restart flag (not set)
+                                     -> queue not run
+    ...
+    if (!rq && have writes)
+        blk_mq_sched_mark_restart_hctx()
+    unlock(&dd->lock)
+
+Since the dispatch context finishes after the write request completion
+handling, marking the queue as needing a restart is not seen from
+__blk_mq_free_request() and blk_mq_sched_restart() not executed leading
+to the dispatch stall under 100% write workloads.
+
+Fix this by moving the call to blk_mq_sched_mark_restart_hctx() from
+dd_dispatch_request() into dd_finish_request() under the zone lock to
+ensure full mutual exclusion between write request dispatch selection
+and zone unlock on write request completion.
+
+Fixes: 7211aef86f79 ("block: mq-deadline: Fix write completion handling")
+Cc: stable@vger.kernel.org
+Reported-by: Hans Holmberg <Hans.Holmberg@wdc.com>
+Reviewed-by: Hans Holmberg <hans.holmberg@wdc.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ block/mq-deadline.c |   19 +++++++++----------
+ 1 file changed, 9 insertions(+), 10 deletions(-)
+
+--- a/block/mq-deadline.c
++++ b/block/mq-deadline.c
+@@ -377,13 +377,6 @@ done:
+  * hardware queue, but we may return a request that is for a
+  * different hardware queue. This is because mq-deadline has shared
+  * state for all hardware queues, in terms of sorting, FIFOs, etc.
+- *
+- * For a zoned block device, __dd_dispatch_request() may return NULL
+- * if all the queued write requests are directed at zones that are already
+- * locked due to on-going write requests. In this case, make sure to mark
+- * the queue as needing a restart to ensure that the queue is run again
+- * and the pending writes dispatched once the target zones for the ongoing
+- * write requests are unlocked in dd_finish_request().
+  */
+ static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
+ {
+@@ -392,9 +385,6 @@ static struct request *dd_dispatch_reque
+       spin_lock(&dd->lock);
+       rq = __dd_dispatch_request(dd);
+-      if (!rq && blk_queue_is_zoned(hctx->queue) &&
+-          !list_empty(&dd->fifo_list[WRITE]))
+-              blk_mq_sched_mark_restart_hctx(hctx);
+       spin_unlock(&dd->lock);
+       return rq;
+@@ -560,6 +550,13 @@ static void dd_prepare_request(struct re
+  * spinlock so that the zone is never unlocked while deadline_fifo_request()
+  * or deadline_next_request() are executing. This function is called for
+  * all requests, whether or not these requests complete successfully.
++ *
++ * For a zoned block device, __dd_dispatch_request() may have stopped
++ * dispatching requests if all the queued requests are write requests directed
++ * at zones that are already locked due to on-going write requests. To ensure
++ * write request dispatch progress in this case, mark the queue as needing a
++ * restart to ensure that the queue is run again after completion of the
++ * request and zones being unlocked.
+  */
+ static void dd_finish_request(struct request *rq)
+ {
+@@ -571,6 +568,8 @@ static void dd_finish_request(struct req
+               spin_lock_irqsave(&dd->zone_lock, flags);
+               blk_req_zone_write_unlock(rq);
++              if (!list_empty(&dd->fifo_list[WRITE]))
++                      blk_mq_sched_mark_restart_hctx(rq->mq_hctx);
+               spin_unlock_irqrestore(&dd->zone_lock, flags);
+       }
+ }
diff --git a/queue-5.2/btrfs-adjust-dirty_metadata_bytes-after-writeback-failure-of-extent-buffer.patch b/queue-5.2/btrfs-adjust-dirty_metadata_bytes-after-writeback-failure-of-extent-buffer.patch
new file mode 100644 (file)
index 0000000..42fc068
--- /dev/null
@@ -0,0 +1,50 @@
+From eb5b64f142504a597d67e2109d603055ff765e52 Mon Sep 17 00:00:00 2001
+From: Dennis Zhou <dennis@kernel.org>
+Date: Fri, 13 Sep 2019 14:54:07 +0100
+Subject: btrfs: adjust dirty_metadata_bytes after writeback failure of extent buffer
+
+From: Dennis Zhou <dennis@kernel.org>
+
+commit eb5b64f142504a597d67e2109d603055ff765e52 upstream.
+
+Before, if a eb failed to write out, we would end up triggering a
+BUG_ON(). As of f4340622e0226 ("btrfs: extent_io: Move the BUG_ON() in
+flush_write_bio() one level up"), we no longer BUG_ON(), so we should
+make life consistent and add back the unwritten bytes to
+dirty_metadata_bytes.
+
+Fixes: f4340622e022 ("btrfs: extent_io: Move the BUG_ON() in flush_write_bio() one level up")
+CC: stable@vger.kernel.org # 5.2+
+Reviewed-by: Filipe Manana <fdmanana@kernel.org>
+Signed-off-by: Dennis Zhou <dennis@kernel.org>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/extent_io.c |    9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+--- a/fs/btrfs/extent_io.c
++++ b/fs/btrfs/extent_io.c
+@@ -3708,12 +3708,21 @@ err_unlock:
+ static void set_btree_ioerr(struct page *page)
+ {
+       struct extent_buffer *eb = (struct extent_buffer *)page->private;
++      struct btrfs_fs_info *fs_info;
+       SetPageError(page);
+       if (test_and_set_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags))
+               return;
+       /*
++       * If we error out, we should add back the dirty_metadata_bytes
++       * to make it consistent.
++       */
++      fs_info = eb->fs_info;
++      percpu_counter_add_batch(&fs_info->dirty_metadata_bytes,
++                               eb->len, fs_info->dirty_metadata_batch);
++
++      /*
+        * If writeback for a btree extent that doesn't belong to a log tree
+        * failed, increment the counter transaction->eb_write_errors.
+        * We do this because while the transaction is running and before it's
diff --git a/queue-5.2/btrfs-fix-allocation-of-free-space-cache-v1-bitmap-pages.patch b/queue-5.2/btrfs-fix-allocation-of-free-space-cache-v1-bitmap-pages.patch
new file mode 100644 (file)
index 0000000..a40de5d
--- /dev/null
@@ -0,0 +1,191 @@
+From 3acd48507dc43eeeb0a1fe965b8bad91cab904a7 Mon Sep 17 00:00:00 2001
+From: Christophe Leroy <christophe.leroy@c-s.fr>
+Date: Wed, 21 Aug 2019 15:05:55 +0000
+Subject: btrfs: fix allocation of free space cache v1 bitmap pages
+
+From: Christophe Leroy <christophe.leroy@c-s.fr>
+
+commit 3acd48507dc43eeeb0a1fe965b8bad91cab904a7 upstream.
+
+Various notifications of type "BUG kmalloc-4096 () : Redzone
+overwritten" have been observed recently in various parts of the kernel.
+After some time, it has been made a relation with the use of BTRFS
+filesystem and with SLUB_DEBUG turned on.
+
+[   22.809700] BUG kmalloc-4096 (Tainted: G        W        ): Redzone overwritten
+
+[   22.810286] INFO: 0xbe1a5921-0xfbfc06cd. First byte 0x0 instead of 0xcc
+[   22.810866] INFO: Allocated in __load_free_space_cache+0x588/0x780 [btrfs] age=22 cpu=0 pid=224
+[   22.811193]         __slab_alloc.constprop.26+0x44/0x70
+[   22.811345]         kmem_cache_alloc_trace+0xf0/0x2ec
+[   22.811588]         __load_free_space_cache+0x588/0x780 [btrfs]
+[   22.811848]         load_free_space_cache+0xf4/0x1b0 [btrfs]
+[   22.812090]         cache_block_group+0x1d0/0x3d0 [btrfs]
+[   22.812321]         find_free_extent+0x680/0x12a4 [btrfs]
+[   22.812549]         btrfs_reserve_extent+0xec/0x220 [btrfs]
+[   22.812785]         btrfs_alloc_tree_block+0x178/0x5f4 [btrfs]
+[   22.813032]         __btrfs_cow_block+0x150/0x5d4 [btrfs]
+[   22.813262]         btrfs_cow_block+0x194/0x298 [btrfs]
+[   22.813484]         commit_cowonly_roots+0x44/0x294 [btrfs]
+[   22.813718]         btrfs_commit_transaction+0x63c/0xc0c [btrfs]
+[   22.813973]         close_ctree+0xf8/0x2a4 [btrfs]
+[   22.814107]         generic_shutdown_super+0x80/0x110
+[   22.814250]         kill_anon_super+0x18/0x30
+[   22.814437]         btrfs_kill_super+0x18/0x90 [btrfs]
+[   22.814590] INFO: Freed in proc_cgroup_show+0xc0/0x248 age=41 cpu=0 pid=83
+[   22.814841]         proc_cgroup_show+0xc0/0x248
+[   22.814967]         proc_single_show+0x54/0x98
+[   22.815086]         seq_read+0x278/0x45c
+[   22.815190]         __vfs_read+0x28/0x17c
+[   22.815289]         vfs_read+0xa8/0x14c
+[   22.815381]         ksys_read+0x50/0x94
+[   22.815475]         ret_from_syscall+0x0/0x38
+
+Commit 69d2480456d1 ("btrfs: use copy_page for copying pages instead of
+memcpy") changed the way bitmap blocks are copied. But allthough bitmaps
+have the size of a page, they were allocated with kzalloc().
+
+Most of the time, kzalloc() allocates aligned blocks of memory, so
+copy_page() can be used. But when some debug options like SLAB_DEBUG are
+activated, kzalloc() may return unaligned pointer.
+
+On powerpc, memcpy(), copy_page() and other copying functions use
+'dcbz' instruction which provides an entire zeroed cacheline to avoid
+memory read when the intention is to overwrite a full line. Functions
+like memcpy() are writen to care about partial cachelines at the start
+and end of the destination, but copy_page() assumes it gets pages. As
+pages are naturally cache aligned, copy_page() doesn't care about
+partial lines. This means that when copy_page() is called with a
+misaligned pointer, a few leading bytes are zeroed.
+
+To fix it, allocate bitmaps through kmem_cache instead of using kzalloc()
+The cache pool is created with PAGE_SIZE alignment constraint.
+
+Reported-by: Erhard F. <erhard_f@mailbox.org>
+Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=204371
+Fixes: 69d2480456d1 ("btrfs: use copy_page for copying pages instead of memcpy")
+Cc: stable@vger.kernel.org # 4.19+
+Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
+Reviewed-by: David Sterba <dsterba@suse.com>
+[ rename to btrfs_free_space_bitmap ]
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/ctree.h            |    1 +
+ fs/btrfs/free-space-cache.c |   20 +++++++++++++-------
+ fs/btrfs/inode.c            |    8 ++++++++
+ 3 files changed, 22 insertions(+), 7 deletions(-)
+
+--- a/fs/btrfs/ctree.h
++++ b/fs/btrfs/ctree.h
+@@ -40,6 +40,7 @@ extern struct kmem_cache *btrfs_trans_ha
+ extern struct kmem_cache *btrfs_bit_radix_cachep;
+ extern struct kmem_cache *btrfs_path_cachep;
+ extern struct kmem_cache *btrfs_free_space_cachep;
++extern struct kmem_cache *btrfs_free_space_bitmap_cachep;
+ struct btrfs_ordered_sum;
+ struct btrfs_ref;
+--- a/fs/btrfs/free-space-cache.c
++++ b/fs/btrfs/free-space-cache.c
+@@ -764,7 +764,8 @@ static int __load_free_space_cache(struc
+               } else {
+                       ASSERT(num_bitmaps);
+                       num_bitmaps--;
+-                      e->bitmap = kzalloc(PAGE_SIZE, GFP_NOFS);
++                      e->bitmap = kmem_cache_zalloc(
++                                      btrfs_free_space_bitmap_cachep, GFP_NOFS);
+                       if (!e->bitmap) {
+                               kmem_cache_free(
+                                       btrfs_free_space_cachep, e);
+@@ -1881,7 +1882,7 @@ static void free_bitmap(struct btrfs_fre
+                       struct btrfs_free_space *bitmap_info)
+ {
+       unlink_free_space(ctl, bitmap_info);
+-      kfree(bitmap_info->bitmap);
++      kmem_cache_free(btrfs_free_space_bitmap_cachep, bitmap_info->bitmap);
+       kmem_cache_free(btrfs_free_space_cachep, bitmap_info);
+       ctl->total_bitmaps--;
+       ctl->op->recalc_thresholds(ctl);
+@@ -2135,7 +2136,8 @@ new_bitmap:
+               }
+               /* allocate the bitmap */
+-              info->bitmap = kzalloc(PAGE_SIZE, GFP_NOFS);
++              info->bitmap = kmem_cache_zalloc(btrfs_free_space_bitmap_cachep,
++                                               GFP_NOFS);
+               spin_lock(&ctl->tree_lock);
+               if (!info->bitmap) {
+                       ret = -ENOMEM;
+@@ -2146,7 +2148,9 @@ new_bitmap:
+ out:
+       if (info) {
+-              kfree(info->bitmap);
++              if (info->bitmap)
++                      kmem_cache_free(btrfs_free_space_bitmap_cachep,
++                                      info->bitmap);
+               kmem_cache_free(btrfs_free_space_cachep, info);
+       }
+@@ -2802,7 +2806,8 @@ out:
+       if (entry->bytes == 0) {
+               ctl->free_extents--;
+               if (entry->bitmap) {
+-                      kfree(entry->bitmap);
++                      kmem_cache_free(btrfs_free_space_bitmap_cachep,
++                                      entry->bitmap);
+                       ctl->total_bitmaps--;
+                       ctl->op->recalc_thresholds(ctl);
+               }
+@@ -3606,7 +3611,7 @@ again:
+       }
+       if (!map) {
+-              map = kzalloc(PAGE_SIZE, GFP_NOFS);
++              map = kmem_cache_zalloc(btrfs_free_space_bitmap_cachep, GFP_NOFS);
+               if (!map) {
+                       kmem_cache_free(btrfs_free_space_cachep, info);
+                       return -ENOMEM;
+@@ -3635,7 +3640,8 @@ again:
+       if (info)
+               kmem_cache_free(btrfs_free_space_cachep, info);
+-      kfree(map);
++      if (map)
++              kmem_cache_free(btrfs_free_space_bitmap_cachep, map);
+       return 0;
+ }
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -73,6 +73,7 @@ static struct kmem_cache *btrfs_inode_ca
+ struct kmem_cache *btrfs_trans_handle_cachep;
+ struct kmem_cache *btrfs_path_cachep;
+ struct kmem_cache *btrfs_free_space_cachep;
++struct kmem_cache *btrfs_free_space_bitmap_cachep;
+ static int btrfs_setsize(struct inode *inode, struct iattr *attr);
+ static int btrfs_truncate(struct inode *inode, bool skip_writeback);
+@@ -9361,6 +9362,7 @@ void __cold btrfs_destroy_cachep(void)
+       kmem_cache_destroy(btrfs_trans_handle_cachep);
+       kmem_cache_destroy(btrfs_path_cachep);
+       kmem_cache_destroy(btrfs_free_space_cachep);
++      kmem_cache_destroy(btrfs_free_space_bitmap_cachep);
+ }
+ int __init btrfs_init_cachep(void)
+@@ -9390,6 +9392,12 @@ int __init btrfs_init_cachep(void)
+       if (!btrfs_free_space_cachep)
+               goto fail;
++      btrfs_free_space_bitmap_cachep = kmem_cache_create("btrfs_free_space_bitmap",
++                                                      PAGE_SIZE, PAGE_SIZE,
++                                                      SLAB_RED_ZONE, NULL);
++      if (!btrfs_free_space_bitmap_cachep)
++              goto fail;
++
+       return 0;
+ fail:
+       btrfs_destroy_cachep();
diff --git a/queue-5.2/btrfs-fix-race-setting-up-and-completing-qgroup-rescan-workers.patch b/queue-5.2/btrfs-fix-race-setting-up-and-completing-qgroup-rescan-workers.patch
new file mode 100644 (file)
index 0000000..6aa18c6
--- /dev/null
@@ -0,0 +1,202 @@
+From 13fc1d271a2e3ab8a02071e711add01fab9271f6 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Tue, 24 Sep 2019 10:49:54 +0100
+Subject: Btrfs: fix race setting up and completing qgroup rescan workers
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 13fc1d271a2e3ab8a02071e711add01fab9271f6 upstream.
+
+There is a race between setting up a qgroup rescan worker and completing
+a qgroup rescan worker that can lead to callers of the qgroup rescan wait
+ioctl to either not wait for the rescan worker to complete or to hang
+forever due to missing wake ups. The following diagram shows a sequence
+of steps that illustrates the race.
+
+        CPU 1                                                         CPU 2                                  CPU 3
+
+ btrfs_ioctl_quota_rescan()
+  btrfs_qgroup_rescan()
+   qgroup_rescan_init()
+    mutex_lock(&fs_info->qgroup_rescan_lock)
+    spin_lock(&fs_info->qgroup_lock)
+
+    fs_info->qgroup_flags |=
+      BTRFS_QGROUP_STATUS_FLAG_RESCAN
+
+    init_completion(
+      &fs_info->qgroup_rescan_completion)
+
+    fs_info->qgroup_rescan_running = true
+
+    mutex_unlock(&fs_info->qgroup_rescan_lock)
+    spin_unlock(&fs_info->qgroup_lock)
+
+    btrfs_init_work()
+     --> starts the worker
+
+                                                        btrfs_qgroup_rescan_worker()
+                                                         mutex_lock(&fs_info->qgroup_rescan_lock)
+
+                                                         fs_info->qgroup_flags &=
+                                                           ~BTRFS_QGROUP_STATUS_FLAG_RESCAN
+
+                                                         mutex_unlock(&fs_info->qgroup_rescan_lock)
+
+                                                         starts transaction, updates qgroup status
+                                                         item, etc
+
+                                                                                                           btrfs_ioctl_quota_rescan()
+                                                                                                            btrfs_qgroup_rescan()
+                                                                                                             qgroup_rescan_init()
+                                                                                                              mutex_lock(&fs_info->qgroup_rescan_lock)
+                                                                                                              spin_lock(&fs_info->qgroup_lock)
+
+                                                                                                              fs_info->qgroup_flags |=
+                                                                                                                BTRFS_QGROUP_STATUS_FLAG_RESCAN
+
+                                                                                                              init_completion(
+                                                                                                                &fs_info->qgroup_rescan_completion)
+
+                                                                                                              fs_info->qgroup_rescan_running = true
+
+                                                                                                              mutex_unlock(&fs_info->qgroup_rescan_lock)
+                                                                                                              spin_unlock(&fs_info->qgroup_lock)
+
+                                                                                                              btrfs_init_work()
+                                                                                                               --> starts another worker
+
+                                                         mutex_lock(&fs_info->qgroup_rescan_lock)
+
+                                                         fs_info->qgroup_rescan_running = false
+
+                                                         mutex_unlock(&fs_info->qgroup_rescan_lock)
+
+                                                        complete_all(&fs_info->qgroup_rescan_completion)
+
+Before the rescan worker started by the task at CPU 3 completes, if
+another task calls btrfs_ioctl_quota_rescan(), it will get -EINPROGRESS
+because the flag BTRFS_QGROUP_STATUS_FLAG_RESCAN is set at
+fs_info->qgroup_flags, which is expected and correct behaviour.
+
+However if other task calls btrfs_ioctl_quota_rescan_wait() before the
+rescan worker started by the task at CPU 3 completes, it will return
+immediately without waiting for the new rescan worker to complete,
+because fs_info->qgroup_rescan_running is set to false by CPU 2.
+
+This race is making test case btrfs/171 (from fstests) to fail often:
+
+  btrfs/171 9s ... - output mismatch (see /home/fdmanana/git/hub/xfstests/results//btrfs/171.out.bad)
+#      --- tests/btrfs/171.out     2018-09-16 21:30:48.505104287 +0100
+#      +++ /home/fdmanana/git/hub/xfstests/results//btrfs/171.out.bad      2019-09-19 02:01:36.938486039 +0100
+#      @@ -1,2 +1,3 @@
+#       QA output created by 171
+#      +ERROR: quota rescan failed: Operation now in progress
+#       Silence is golden
+#      ...
+#      (Run 'diff -u /home/fdmanana/git/hub/xfstests/tests/btrfs/171.out /home/fdmanana/git/hub/xfstests/results//btrfs/171.out.bad'  to see the entire diff)
+
+That is because the test calls the btrfs-progs commands "qgroup quota
+rescan -w", "qgroup assign" and "qgroup remove" in a sequence that makes
+calls to the rescan start ioctl fail with -EINPROGRESS (note the "btrfs"
+commands 'qgroup assign' and 'qgroup remove' often call the rescan start
+ioctl after calling the qgroup assign ioctl,
+btrfs_ioctl_qgroup_assign()), since previous waits didn't actually wait
+for a rescan worker to complete.
+
+Another problem the race can cause is missing wake ups for waiters,
+since the call to complete_all() happens outside a critical section and
+after clearing the flag BTRFS_QGROUP_STATUS_FLAG_RESCAN. In the sequence
+diagram above, if we have a waiter for the first rescan task (executed
+by CPU 2), then fs_info->qgroup_rescan_completion.wait is not empty, and
+if after the rescan worker clears BTRFS_QGROUP_STATUS_FLAG_RESCAN and
+before it calls complete_all() against
+fs_info->qgroup_rescan_completion, the task at CPU 3 calls
+init_completion() against fs_info->qgroup_rescan_completion which
+re-initilizes its wait queue to an empty queue, therefore causing the
+rescan worker at CPU 2 to call complete_all() against an empty queue,
+never waking up the task waiting for that rescan worker.
+
+Fix this by clearing BTRFS_QGROUP_STATUS_FLAG_RESCAN and setting
+fs_info->qgroup_rescan_running to false in the same critical section,
+delimited by the mutex fs_info->qgroup_rescan_lock, as well as doing the
+call to complete_all() in that same critical section. This gives the
+protection needed to avoid rescan wait ioctl callers not waiting for a
+running rescan worker and the lost wake ups problem, since setting that
+rescan flag and boolean as well as initializing the wait queue is done
+already in a critical section delimited by that mutex (at
+qgroup_rescan_init()).
+
+Fixes: 57254b6ebce4ce ("Btrfs: add ioctl to wait for qgroup rescan completion")
+Fixes: d2c609b834d62f ("btrfs: properly track when rescan worker is running")
+CC: stable@vger.kernel.org # 4.4+
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/qgroup.c |   33 +++++++++++++++++++--------------
+ 1 file changed, 19 insertions(+), 14 deletions(-)
+
+--- a/fs/btrfs/qgroup.c
++++ b/fs/btrfs/qgroup.c
+@@ -3154,9 +3154,6 @@ out:
+       btrfs_free_path(path);
+       mutex_lock(&fs_info->qgroup_rescan_lock);
+-      if (!btrfs_fs_closing(fs_info))
+-              fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
+-
+       if (err > 0 &&
+           fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) {
+               fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
+@@ -3172,16 +3169,30 @@ out:
+       trans = btrfs_start_transaction(fs_info->quota_root, 1);
+       if (IS_ERR(trans)) {
+               err = PTR_ERR(trans);
++              trans = NULL;
+               btrfs_err(fs_info,
+                         "fail to start transaction for status update: %d",
+                         err);
+-              goto done;
+       }
+-      ret = update_qgroup_status_item(trans);
+-      if (ret < 0) {
+-              err = ret;
+-              btrfs_err(fs_info, "fail to update qgroup status: %d", err);
++
++      mutex_lock(&fs_info->qgroup_rescan_lock);
++      if (!btrfs_fs_closing(fs_info))
++              fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
++      if (trans) {
++              ret = update_qgroup_status_item(trans);
++              if (ret < 0) {
++                      err = ret;
++                      btrfs_err(fs_info, "fail to update qgroup status: %d",
++                                err);
++              }
+       }
++      fs_info->qgroup_rescan_running = false;
++      complete_all(&fs_info->qgroup_rescan_completion);
++      mutex_unlock(&fs_info->qgroup_rescan_lock);
++
++      if (!trans)
++              return;
++
+       btrfs_end_transaction(trans);
+       if (btrfs_fs_closing(fs_info)) {
+@@ -3192,12 +3203,6 @@ out:
+       } else {
+               btrfs_err(fs_info, "qgroup scan failed with %d", err);
+       }
+-
+-done:
+-      mutex_lock(&fs_info->qgroup_rescan_lock);
+-      fs_info->qgroup_rescan_running = false;
+-      mutex_unlock(&fs_info->qgroup_rescan_lock);
+-      complete_all(&fs_info->qgroup_rescan_completion);
+ }
+ /*
diff --git a/queue-5.2/btrfs-fix-use-after-free-when-using-the-tree-modification-log.patch b/queue-5.2/btrfs-fix-use-after-free-when-using-the-tree-modification-log.patch
new file mode 100644 (file)
index 0000000..f0b6893
--- /dev/null
@@ -0,0 +1,99 @@
+From efad8a853ad2057f96664328a0d327a05ce39c76 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Mon, 12 Aug 2019 19:14:29 +0100
+Subject: Btrfs: fix use-after-free when using the tree modification log
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit efad8a853ad2057f96664328a0d327a05ce39c76 upstream.
+
+At ctree.c:get_old_root(), we are accessing a root's header owner field
+after we have freed the respective extent buffer. This results in an
+use-after-free that can lead to crashes, and when CONFIG_DEBUG_PAGEALLOC
+is set, results in a stack trace like the following:
+
+  [ 3876.799331] stack segment: 0000 [#1] SMP DEBUG_PAGEALLOC PTI
+  [ 3876.799363] CPU: 0 PID: 15436 Comm: pool Not tainted 5.3.0-rc3-btrfs-next-54 #1
+  [ 3876.799385] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-0-ga698c8995f-prebuilt.qemu.org 04/01/2014
+  [ 3876.799433] RIP: 0010:btrfs_search_old_slot+0x652/0xd80 [btrfs]
+  (...)
+  [ 3876.799502] RSP: 0018:ffff9f08c1a2f9f0 EFLAGS: 00010286
+  [ 3876.799518] RAX: ffff8dd300000000 RBX: ffff8dd85a7a9348 RCX: 000000038da26000
+  [ 3876.799538] RDX: 0000000000000000 RSI: ffffe522ce368980 RDI: 0000000000000246
+  [ 3876.799559] RBP: dae1922adadad000 R08: 0000000008020000 R09: ffffe522c0000000
+  [ 3876.799579] R10: ffff8dd57fd788c8 R11: 000000007511b030 R12: ffff8dd781ddc000
+  [ 3876.799599] R13: ffff8dd9e6240578 R14: ffff8dd6896f7a88 R15: ffff8dd688cf90b8
+  [ 3876.799620] FS:  00007f23ddd97700(0000) GS:ffff8dda20200000(0000) knlGS:0000000000000000
+  [ 3876.799643] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+  [ 3876.799660] CR2: 00007f23d4024000 CR3: 0000000710bb0005 CR4: 00000000003606f0
+  [ 3876.799682] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+  [ 3876.799703] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+  [ 3876.799723] Call Trace:
+  [ 3876.799735]  ? do_raw_spin_unlock+0x49/0xc0
+  [ 3876.799749]  ? _raw_spin_unlock+0x24/0x30
+  [ 3876.799779]  resolve_indirect_refs+0x1eb/0xc80 [btrfs]
+  [ 3876.799810]  find_parent_nodes+0x38d/0x1180 [btrfs]
+  [ 3876.799841]  btrfs_check_shared+0x11a/0x1d0 [btrfs]
+  [ 3876.799870]  ? extent_fiemap+0x598/0x6e0 [btrfs]
+  [ 3876.799895]  extent_fiemap+0x598/0x6e0 [btrfs]
+  [ 3876.799913]  do_vfs_ioctl+0x45a/0x700
+  [ 3876.799926]  ksys_ioctl+0x70/0x80
+  [ 3876.799938]  ? trace_hardirqs_off_thunk+0x1a/0x20
+  [ 3876.799953]  __x64_sys_ioctl+0x16/0x20
+  [ 3876.799965]  do_syscall_64+0x62/0x220
+  [ 3876.799977]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
+  [ 3876.799993] RIP: 0033:0x7f23e0013dd7
+  (...)
+  [ 3876.800056] RSP: 002b:00007f23ddd96ca8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
+  [ 3876.800078] RAX: ffffffffffffffda RBX: 00007f23d80210f8 RCX: 00007f23e0013dd7
+  [ 3876.800099] RDX: 00007f23d80210f8 RSI: 00000000c020660b RDI: 0000000000000003
+  [ 3876.800626] RBP: 000055fa2a2a2440 R08: 0000000000000000 R09: 00007f23ddd96d7c
+  [ 3876.801143] R10: 00007f23d8022000 R11: 0000000000000246 R12: 00007f23ddd96d80
+  [ 3876.801662] R13: 00007f23ddd96d78 R14: 00007f23d80210f0 R15: 00007f23ddd96d80
+  (...)
+  [ 3876.805107] ---[ end trace e53161e179ef04f9 ]---
+
+Fix that by saving the root's header owner field into a local variable
+before freeing the root's extent buffer, and then use that local variable
+when needed.
+
+Fixes: 30b0463a9394d9 ("Btrfs: fix accessing the root pointer in tree mod log functions")
+CC: stable@vger.kernel.org # 3.10+
+Reviewed-by: Nikolay Borisov <nborisov@suse.com>
+Reviewed-by: Anand Jain <anand.jain@oracle.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/ctree.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/ctree.c
++++ b/fs/btrfs/ctree.c
+@@ -1343,6 +1343,7 @@ get_old_root(struct btrfs_root *root, u6
+       struct tree_mod_elem *tm;
+       struct extent_buffer *eb = NULL;
+       struct extent_buffer *eb_root;
++      u64 eb_root_owner = 0;
+       struct extent_buffer *old;
+       struct tree_mod_root *old_root = NULL;
+       u64 old_generation = 0;
+@@ -1380,6 +1381,7 @@ get_old_root(struct btrfs_root *root, u6
+                       free_extent_buffer(old);
+               }
+       } else if (old_root) {
++              eb_root_owner = btrfs_header_owner(eb_root);
+               btrfs_tree_read_unlock(eb_root);
+               free_extent_buffer(eb_root);
+               eb = alloc_dummy_extent_buffer(fs_info, logical);
+@@ -1396,7 +1398,7 @@ get_old_root(struct btrfs_root *root, u6
+       if (old_root) {
+               btrfs_set_header_bytenr(eb, eb->start);
+               btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV);
+-              btrfs_set_header_owner(eb, btrfs_header_owner(eb_root));
++              btrfs_set_header_owner(eb, eb_root_owner);
+               btrfs_set_header_level(eb, old_root->level);
+               btrfs_set_header_generation(eb, old_generation);
+       }
diff --git a/queue-5.2/btrfs-qgroup-fix-reserved-data-space-leak-if-we-have-multiple-reserve-calls.patch b/queue-5.2/btrfs-qgroup-fix-reserved-data-space-leak-if-we-have-multiple-reserve-calls.patch
new file mode 100644 (file)
index 0000000..082bd95
--- /dev/null
@@ -0,0 +1,92 @@
+From d4e204948fe3e0dc8e1fbf3f8f3290c9c2823be3 Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Mon, 16 Sep 2019 20:02:39 +0800
+Subject: btrfs: qgroup: Fix reserved data space leak if we have multiple reserve calls
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit d4e204948fe3e0dc8e1fbf3f8f3290c9c2823be3 upstream.
+
+[BUG]
+The following script can cause btrfs qgroup data space leak:
+
+  mkfs.btrfs -f $dev
+  mount $dev -o nospace_cache $mnt
+
+  btrfs subv create $mnt/subv
+  btrfs quota en $mnt
+  btrfs quota rescan -w $mnt
+  btrfs qgroup limit 128m $mnt/subv
+
+  for (( i = 0; i < 3; i++)); do
+          # Create 3 64M holes for latter fallocate to fail
+          truncate -s 192m $mnt/subv/file
+          xfs_io -c "pwrite 64m 4k" $mnt/subv/file > /dev/null
+          xfs_io -c "pwrite 128m 4k" $mnt/subv/file > /dev/null
+          sync
+
+          # it's supposed to fail, and each failure will leak at least 64M
+          # data space
+          xfs_io -f -c "falloc 0 192m" $mnt/subv/file &> /dev/null
+          rm $mnt/subv/file
+          sync
+  done
+
+  # Shouldn't fail after we removed the file
+  xfs_io -f -c "falloc 0 64m" $mnt/subv/file
+
+[CAUSE]
+Btrfs qgroup data reserve code allow multiple reservations to happen on
+a single extent_changeset:
+E.g:
+       btrfs_qgroup_reserve_data(inode, &data_reserved, 0, SZ_1M);
+       btrfs_qgroup_reserve_data(inode, &data_reserved, SZ_1M, SZ_2M);
+       btrfs_qgroup_reserve_data(inode, &data_reserved, 0, SZ_4M);
+
+Btrfs qgroup code has its internal tracking to make sure we don't
+double-reserve in above example.
+
+The only pattern utilizing this feature is in the main while loop of
+btrfs_fallocate() function.
+
+However btrfs_qgroup_reserve_data()'s error handling has a bug in that
+on error it clears all ranges in the io_tree with EXTENT_QGROUP_RESERVED
+flag but doesn't free previously reserved bytes.
+
+This bug has a two fold effect:
+- Clearing EXTENT_QGROUP_RESERVED ranges
+  This is the correct behavior, but it prevents
+  btrfs_qgroup_check_reserved_leak() to catch the leakage as the
+  detector is purely EXTENT_QGROUP_RESERVED flag based.
+
+- Leak the previously reserved data bytes.
+
+The bug manifests when N calls to btrfs_qgroup_reserve_data are made and
+the last one fails, leaking space reserved in the previous ones.
+
+[FIX]
+Also free previously reserved data bytes when btrfs_qgroup_reserve_data
+fails.
+
+Fixes: 524725537023 ("btrfs: qgroup: Introduce btrfs_qgroup_reserve_data function")
+CC: stable@vger.kernel.org # 4.4+
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/qgroup.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/fs/btrfs/qgroup.c
++++ b/fs/btrfs/qgroup.c
+@@ -3425,6 +3425,9 @@ cleanup:
+       while ((unode = ulist_next(&reserved->range_changed, &uiter)))
+               clear_extent_bit(&BTRFS_I(inode)->io_tree, unode->val,
+                                unode->aux, EXTENT_QGROUP_RESERVED, 0, 0, NULL);
++      /* Also free data bytes of already reserved one */
++      btrfs_qgroup_free_refroot(root->fs_info, root->root_key.objectid,
++                                orig_reserved, BTRFS_QGROUP_RSV_DATA);
+       extent_changeset_release(reserved);
+       return ret;
+ }
diff --git a/queue-5.2/btrfs-qgroup-fix-the-wrong-target-io_tree-when-freeing-reserved-data-space.patch b/queue-5.2/btrfs-qgroup-fix-the-wrong-target-io_tree-when-freeing-reserved-data-space.patch
new file mode 100644 (file)
index 0000000..cecc957
--- /dev/null
@@ -0,0 +1,81 @@
+From bab32fc069ce8829c416e8737c119f62a57970f9 Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Mon, 16 Sep 2019 20:02:38 +0800
+Subject: btrfs: qgroup: Fix the wrong target io_tree when freeing reserved data space
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit bab32fc069ce8829c416e8737c119f62a57970f9 upstream.
+
+[BUG]
+Under the following case with qgroup enabled, if some error happened
+after we have reserved delalloc space, then in error handling path, we
+could cause qgroup data space leakage:
+
+From btrfs_truncate_block() in inode.c:
+
+       ret = btrfs_delalloc_reserve_space(inode, &data_reserved,
+                                          block_start, blocksize);
+       if (ret)
+               goto out;
+
+ again:
+       page = find_or_create_page(mapping, index, mask);
+       if (!page) {
+               btrfs_delalloc_release_space(inode, data_reserved,
+                                            block_start, blocksize, true);
+               btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize, true);
+               ret = -ENOMEM;
+               goto out;
+       }
+
+[CAUSE]
+In the above case, btrfs_delalloc_reserve_space() will call
+btrfs_qgroup_reserve_data() and mark the io_tree range with
+EXTENT_QGROUP_RESERVED flag.
+
+In the error handling path, we have the following call stack:
+btrfs_delalloc_release_space()
+|- btrfs_free_reserved_data_space()
+   |- btrsf_qgroup_free_data()
+      |- __btrfs_qgroup_release_data(reserved=@reserved, free=1)
+         |- qgroup_free_reserved_data(reserved=@reserved)
+            |- clear_record_extent_bits();
+            |- freed += changeset.bytes_changed;
+
+However due to a completion bug, qgroup_free_reserved_data() will clear
+EXTENT_QGROUP_RESERVED flag in BTRFS_I(inode)->io_failure_tree, other
+than the correct BTRFS_I(inode)->io_tree.
+Since io_failure_tree is never marked with that flag,
+btrfs_qgroup_free_data() will not free any data reserved space at all,
+causing a leakage.
+
+This type of error handling can only be triggered by errors outside of
+qgroup code. So EDQUOT error from qgroup can't trigger it.
+
+[FIX]
+Fix the wrong target io_tree.
+
+Reported-by: Josef Bacik <josef@toxicpanda.com>
+Fixes: bc42bda22345 ("btrfs: qgroup: Fix qgroup reserved space underflow by only freeing reserved ranges")
+CC: stable@vger.kernel.org # 4.14+
+Reviewed-by: Nikolay Borisov <nborisov@suse.com>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/qgroup.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/btrfs/qgroup.c
++++ b/fs/btrfs/qgroup.c
+@@ -3469,7 +3469,7 @@ static int qgroup_free_reserved_data(str
+                * EXTENT_QGROUP_RESERVED, we won't double free.
+                * So not need to rush.
+                */
+-              ret = clear_record_extent_bits(&BTRFS_I(inode)->io_failure_tree,
++              ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree,
+                               free_start, free_start + free_len - 1,
+                               EXTENT_QGROUP_RESERVED, &changeset);
+               if (ret < 0)
diff --git a/queue-5.2/btrfs-relinquish-cpus-in-btrfs_compare_trees.patch b/queue-5.2/btrfs-relinquish-cpus-in-btrfs_compare_trees.patch
new file mode 100644 (file)
index 0000000..48d7691
--- /dev/null
@@ -0,0 +1,69 @@
+From 6af112b11a4bc1b560f60a618ac9c1dcefe9836e Mon Sep 17 00:00:00 2001
+From: Nikolay Borisov <nborisov@suse.com>
+Date: Wed, 4 Sep 2019 19:33:58 +0300
+Subject: btrfs: Relinquish CPUs in btrfs_compare_trees
+
+From: Nikolay Borisov <nborisov@suse.com>
+
+commit 6af112b11a4bc1b560f60a618ac9c1dcefe9836e upstream.
+
+When doing any form of incremental send the parent and the child trees
+need to be compared via btrfs_compare_trees. This  can result in long
+loop chains without ever relinquishing the CPU. This causes softlockup
+detector to trigger when comparing trees with a lot of items. Example
+report:
+
+watchdog: BUG: soft lockup - CPU#0 stuck for 24s! [snapperd:16153]
+CPU: 0 PID: 16153 Comm: snapperd Not tainted 5.2.9-1-default #1 openSUSE Tumbleweed (unreleased)
+Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015
+pstate: 40000005 (nZcv daif -PAN -UAO)
+pc : __ll_sc_arch_atomic_sub_return+0x14/0x20
+lr : btrfs_release_extent_buffer_pages+0xe0/0x1e8 [btrfs]
+sp : ffff00001273b7e0
+Call trace:
+ __ll_sc_arch_atomic_sub_return+0x14/0x20
+ release_extent_buffer+0xdc/0x120 [btrfs]
+ free_extent_buffer.part.0+0xb0/0x118 [btrfs]
+ free_extent_buffer+0x24/0x30 [btrfs]
+ btrfs_release_path+0x4c/0xa0 [btrfs]
+ btrfs_free_path.part.0+0x20/0x40 [btrfs]
+ btrfs_free_path+0x24/0x30 [btrfs]
+ get_inode_info+0xa8/0xf8 [btrfs]
+ finish_inode_if_needed+0xe0/0x6d8 [btrfs]
+ changed_cb+0x9c/0x410 [btrfs]
+ btrfs_compare_trees+0x284/0x648 [btrfs]
+ send_subvol+0x33c/0x520 [btrfs]
+ btrfs_ioctl_send+0x8a0/0xaf0 [btrfs]
+ btrfs_ioctl+0x199c/0x2288 [btrfs]
+ do_vfs_ioctl+0x4b0/0x820
+ ksys_ioctl+0x84/0xb8
+ __arm64_sys_ioctl+0x28/0x38
+ el0_svc_common.constprop.0+0x7c/0x188
+ el0_svc_handler+0x34/0x90
+ el0_svc+0x8/0xc
+
+Fix this by adding a call to cond_resched at the beginning of the main
+loop in btrfs_compare_trees.
+
+Fixes: 7069830a9e38 ("Btrfs: add btrfs_compare_trees function")
+CC: stable@vger.kernel.org # 4.4+
+Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
+Signed-off-by: Nikolay Borisov <nborisov@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/btrfs/ctree.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/btrfs/ctree.c
++++ b/fs/btrfs/ctree.c
+@@ -5477,6 +5477,7 @@ int btrfs_compare_trees(struct btrfs_roo
+       advance_left = advance_right = 0;
+       while (1) {
++              cond_resched();
+               if (advance_left && !left_end_reached) {
+                       ret = tree_advance(left_path, &left_level,
+                                       left_root_level,
diff --git a/queue-5.2/efifb-bgrt-improve-efifb_bgrt_sanity_check.patch b/queue-5.2/efifb-bgrt-improve-efifb_bgrt_sanity_check.patch
new file mode 100644 (file)
index 0000000..67845f9
--- /dev/null
@@ -0,0 +1,71 @@
+From 51677dfcc17f88ed754143df670ff064eae67f84 Mon Sep 17 00:00:00 2001
+From: Hans de Goede <hdegoede@redhat.com>
+Date: Sun, 21 Jul 2019 15:19:18 +0200
+Subject: efifb: BGRT: Improve efifb_bgrt_sanity_check
+
+From: Hans de Goede <hdegoede@redhat.com>
+
+commit 51677dfcc17f88ed754143df670ff064eae67f84 upstream.
+
+For various reasons, at least with x86 EFI firmwares, the xoffset and
+yoffset in the BGRT info are not always reliable.
+
+Extensive testing has shown that when the info is correct, the
+BGRT image is always exactly centered horizontally (the yoffset variable
+is more variable and not always predictable).
+
+This commit simplifies / improves the bgrt_sanity_check to simply
+check that the BGRT image is exactly centered horizontally and skips
+(re)drawing it when it is not.
+
+This fixes the BGRT image sometimes being drawn in the wrong place.
+
+Cc: stable@vger.kernel.org
+Fixes: 88fe4ceb2447 ("efifb: BGRT: Do not copy the boot graphics for non native resolutions")
+Signed-off-by: Hans de Goede <hdegoede@redhat.com>
+Cc: Peter Jones <pjones@redhat.com>,
+Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20190721131918.10115-1-hdegoede@redhat.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/video/fbdev/efifb.c |   27 ++++++---------------------
+ 1 file changed, 6 insertions(+), 21 deletions(-)
+
+--- a/drivers/video/fbdev/efifb.c
++++ b/drivers/video/fbdev/efifb.c
+@@ -122,28 +122,13 @@ static void efifb_copy_bmp(u8 *src, u32
+  */
+ static bool efifb_bgrt_sanity_check(struct screen_info *si, u32 bmp_width)
+ {
+-      static const int default_resolutions[][2] = {
+-              {  800,  600 },
+-              { 1024,  768 },
+-              { 1280, 1024 },
+-      };
+-      u32 i, right_margin;
++      /*
++       * All x86 firmwares horizontally center the image (the yoffset
++       * calculations differ between boards, but xoffset is predictable).
++       */
++      u32 expected_xoffset = (si->lfb_width - bmp_width) / 2;
+-      for (i = 0; i < ARRAY_SIZE(default_resolutions); i++) {
+-              if (default_resolutions[i][0] == si->lfb_width &&
+-                  default_resolutions[i][1] == si->lfb_height)
+-                      break;
+-      }
+-      /* If not a default resolution used for textmode, this should be fine */
+-      if (i >= ARRAY_SIZE(default_resolutions))
+-              return true;
+-
+-      /* If the right margin is 5 times smaller then the left one, reject */
+-      right_margin = si->lfb_width - (bgrt_tab.image_offset_x + bmp_width);
+-      if (right_margin < (bgrt_tab.image_offset_x / 5))
+-              return false;
+-
+-      return true;
++      return bgrt_tab.image_offset_x == expected_xoffset;
+ }
+ #else
+ static bool efifb_bgrt_sanity_check(struct screen_info *si, u32 bmp_width)
diff --git a/queue-5.2/gfs2-clear-buf_in_tr-when-ending-a-transaction-in-sweep_bh_for_rgrps.patch b/queue-5.2/gfs2-clear-buf_in_tr-when-ending-a-transaction-in-sweep_bh_for_rgrps.patch
new file mode 100644 (file)
index 0000000..57c4d3a
--- /dev/null
@@ -0,0 +1,39 @@
+From f0b444b349e33ae0d3dd93e25ca365482a5d17d4 Mon Sep 17 00:00:00 2001
+From: Bob Peterson <rpeterso@redhat.com>
+Date: Thu, 12 Sep 2019 13:54:27 -0400
+Subject: gfs2: clear buf_in_tr when ending a transaction in sweep_bh_for_rgrps
+
+From: Bob Peterson <rpeterso@redhat.com>
+
+commit f0b444b349e33ae0d3dd93e25ca365482a5d17d4 upstream.
+
+In function sweep_bh_for_rgrps, which is a helper for punch_hole,
+it uses variable buf_in_tr to keep track of when it needs to commit
+pending block frees on a partial delete that overflows the
+transaction created for the delete. The problem is that the
+variable was initialized at the start of function sweep_bh_for_rgrps
+but it was never cleared, even when starting a new transaction.
+
+This patch reinitializes the variable when the transaction is
+ended, so the next transaction starts out with it cleared.
+
+Fixes: d552a2b9b33e ("GFS2: Non-recursive delete")
+Cc: stable@vger.kernel.org # v4.12+
+Signed-off-by: Bob Peterson <rpeterso@redhat.com>
+Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/gfs2/bmap.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/gfs2/bmap.c
++++ b/fs/gfs2/bmap.c
+@@ -1670,6 +1670,7 @@ out_unlock:
+                       brelse(dibh);
+                       up_write(&ip->i_rw_mutex);
+                       gfs2_trans_end(sdp);
++                      buf_in_tr = false;
+               }
+               gfs2_glock_dq_uninit(rd_gh);
+               cond_resched();
diff --git a/queue-5.2/i40e-check-__i40e_vf_disable-bit-in-i40e_sync_filters_subtask.patch b/queue-5.2/i40e-check-__i40e_vf_disable-bit-in-i40e_sync_filters_subtask.patch
new file mode 100644 (file)
index 0000000..67aa425
--- /dev/null
@@ -0,0 +1,74 @@
+From a7542b87607560d0b89e7ff81d870bd6ff8835cb Mon Sep 17 00:00:00 2001
+From: Stefan Assmann <sassmann@kpanic.de>
+Date: Wed, 21 Aug 2019 16:09:29 +0200
+Subject: i40e: check __I40E_VF_DISABLE bit in i40e_sync_filters_subtask
+
+From: Stefan Assmann <sassmann@kpanic.de>
+
+commit a7542b87607560d0b89e7ff81d870bd6ff8835cb upstream.
+
+While testing VF spawn/destroy the following panic occurred.
+
+BUG: unable to handle kernel NULL pointer dereference at 0000000000000029
+[...]
+Workqueue: i40e i40e_service_task [i40e]
+RIP: 0010:i40e_sync_vsi_filters+0x6fd/0xc60 [i40e]
+[...]
+Call Trace:
+ ? __switch_to_asm+0x35/0x70
+ ? __switch_to_asm+0x41/0x70
+ ? __switch_to_asm+0x35/0x70
+ ? _cond_resched+0x15/0x30
+ i40e_sync_filters_subtask+0x56/0x70 [i40e]
+ i40e_service_task+0x382/0x11b0 [i40e]
+ ? __switch_to_asm+0x41/0x70
+ ? __switch_to_asm+0x41/0x70
+ process_one_work+0x1a7/0x3b0
+ worker_thread+0x30/0x390
+ ? create_worker+0x1a0/0x1a0
+ kthread+0x112/0x130
+ ? kthread_bind+0x30/0x30
+ ret_from_fork+0x35/0x40
+
+Investigation revealed a race where pf->vf[vsi->vf_id].trusted may get
+accessed by the watchdog via i40e_sync_filters_subtask() although
+i40e_free_vfs() already free'd pf->vf.
+To avoid this the call to i40e_sync_vsi_filters() in
+i40e_sync_filters_subtask() needs to be guarded by __I40E_VF_DISABLE,
+which is also used by i40e_free_vfs().
+
+Note: put the __I40E_VF_DISABLE check after the
+__I40E_MACVLAN_SYNC_PENDING check as the latter is more likely to
+trigger.
+
+CC: stable@vger.kernel.org
+Signed-off-by: Stefan Assmann <sassmann@kpanic.de>
+Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
+Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/ethernet/intel/i40e/i40e_main.c |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
+@@ -2586,6 +2586,10 @@ static void i40e_sync_filters_subtask(st
+               return;
+       if (!test_and_clear_bit(__I40E_MACVLAN_SYNC_PENDING, pf->state))
+               return;
++      if (test_and_set_bit(__I40E_VF_DISABLE, pf->state)) {
++              set_bit(__I40E_MACVLAN_SYNC_PENDING, pf->state);
++              return;
++      }
+       for (v = 0; v < pf->num_alloc_vsi; v++) {
+               if (pf->vsi[v] &&
+@@ -2600,6 +2604,7 @@ static void i40e_sync_filters_subtask(st
+                       }
+               }
+       }
++      clear_bit(__I40E_VF_DISABLE, pf->state);
+ }
+ /**
diff --git a/queue-5.2/keys-trusted-correctly-initialize-digests-and-fix-locking-issue.patch b/queue-5.2/keys-trusted-correctly-initialize-digests-and-fix-locking-issue.patch
new file mode 100644 (file)
index 0000000..31073aa
--- /dev/null
@@ -0,0 +1,83 @@
+From 9f75c82246313d4c2a6bc77e947b45655b3b5ad5 Mon Sep 17 00:00:00 2001
+From: Roberto Sassu <roberto.sassu@huawei.com>
+Date: Fri, 13 Sep 2019 20:51:36 +0200
+Subject: KEYS: trusted: correctly initialize digests and fix locking issue
+
+From: Roberto Sassu <roberto.sassu@huawei.com>
+
+commit 9f75c82246313d4c2a6bc77e947b45655b3b5ad5 upstream.
+
+Commit 0b6cf6b97b7e ("tpm: pass an array of tpm_extend_digest structures to
+tpm_pcr_extend()") modifies tpm_pcr_extend() to accept a digest for each
+PCR bank. After modification, tpm_pcr_extend() expects that digests are
+passed in the same order as the algorithms set in chip->allocated_banks.
+
+This patch fixes two issues introduced in the last iterations of the patch
+set: missing initialization of the TPM algorithm ID in the tpm_digest
+structures passed to tpm_pcr_extend() by the trusted key module, and
+unreleased locks in the TPM driver due to returning from tpm_pcr_extend()
+without calling tpm_put_ops().
+
+Cc: stable@vger.kernel.org
+Fixes: 0b6cf6b97b7e ("tpm: pass an array of tpm_extend_digest structures to tpm_pcr_extend()")
+Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
+Suggested-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
+Reviewed-by: Jerry Snitselaar <jsnitsel@redhat.com>
+Reviewed-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
+Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/char/tpm/tpm-interface.c |   14 +++++++++-----
+ security/keys/trusted.c          |    5 +++++
+ 2 files changed, 14 insertions(+), 5 deletions(-)
+
+--- a/drivers/char/tpm/tpm-interface.c
++++ b/drivers/char/tpm/tpm-interface.c
+@@ -320,18 +320,22 @@ int tpm_pcr_extend(struct tpm_chip *chip
+       if (!chip)
+               return -ENODEV;
+-      for (i = 0; i < chip->nr_allocated_banks; i++)
+-              if (digests[i].alg_id != chip->allocated_banks[i].alg_id)
+-                      return -EINVAL;
++      for (i = 0; i < chip->nr_allocated_banks; i++) {
++              if (digests[i].alg_id != chip->allocated_banks[i].alg_id) {
++                      rc = EINVAL;
++                      goto out;
++              }
++      }
+       if (chip->flags & TPM_CHIP_FLAG_TPM2) {
+               rc = tpm2_pcr_extend(chip, pcr_idx, digests);
+-              tpm_put_ops(chip);
+-              return rc;
++              goto out;
+       }
+       rc = tpm1_pcr_extend(chip, pcr_idx, digests[0].digest,
+                            "attempting extend a PCR value");
++
++out:
+       tpm_put_ops(chip);
+       return rc;
+ }
+--- a/security/keys/trusted.c
++++ b/security/keys/trusted.c
+@@ -1228,11 +1228,16 @@ hashalg_fail:
+ static int __init init_digests(void)
+ {
++      int i;
++
+       digests = kcalloc(chip->nr_allocated_banks, sizeof(*digests),
+                         GFP_KERNEL);
+       if (!digests)
+               return -ENOMEM;
++      for (i = 0; i < chip->nr_allocated_banks; i++)
++              digests[i].alg_id = chip->allocated_banks[i].alg_id;
++
+       return 0;
+ }
diff --git a/queue-5.2/lib-lzo-lzo1x_compress.c-fix-alignment-bug-in-lzo-rle.patch b/queue-5.2/lib-lzo-lzo1x_compress.c-fix-alignment-bug-in-lzo-rle.patch
new file mode 100644 (file)
index 0000000..53db45d
--- /dev/null
@@ -0,0 +1,54 @@
+From 09b35b4192f6682dff96a093ab1930998cdb73b4 Mon Sep 17 00:00:00 2001
+From: Dave Rodgman <dave.rodgman@arm.com>
+Date: Wed, 25 Sep 2019 16:48:24 -0700
+Subject: lib/lzo/lzo1x_compress.c: fix alignment bug in lzo-rle
+
+From: Dave Rodgman <dave.rodgman@arm.com>
+
+commit 09b35b4192f6682dff96a093ab1930998cdb73b4 upstream.
+
+Fix an unaligned access which breaks on platforms where this is not
+permitted (e.g., Sparc).
+
+Link: http://lkml.kernel.org/r/20190912145502.35229-1-dave.rodgman@arm.com
+Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
+Cc: Dave Rodgman <dave.rodgman@arm.com>
+Cc: Markus F.X.J. Oberhumer <markus@oberhumer.com>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ lib/lzo/lzo1x_compress.c |   14 ++++++++------
+ 1 file changed, 8 insertions(+), 6 deletions(-)
+
+--- a/lib/lzo/lzo1x_compress.c
++++ b/lib/lzo/lzo1x_compress.c
+@@ -83,17 +83,19 @@ next:
+                                       ALIGN((uintptr_t)ir, 4)) &&
+                                       (ir < limit) && (*ir == 0))
+                               ir++;
+-                      for (; (ir + 4) <= limit; ir += 4) {
+-                              dv = *((u32 *)ir);
+-                              if (dv) {
++                      if (IS_ALIGNED((uintptr_t)ir, 4)) {
++                              for (; (ir + 4) <= limit; ir += 4) {
++                                      dv = *((u32 *)ir);
++                                      if (dv) {
+ #  if defined(__LITTLE_ENDIAN)
+-                                      ir += __builtin_ctz(dv) >> 3;
++                                              ir += __builtin_ctz(dv) >> 3;
+ #  elif defined(__BIG_ENDIAN)
+-                                      ir += __builtin_clz(dv) >> 3;
++                                              ir += __builtin_clz(dv) >> 3;
+ #  else
+ #    error "missing endian definition"
+ #  endif
+-                                      break;
++                                              break;
++                                      }
+                               }
+                       }
+ #endif
diff --git a/queue-5.2/memcg-kmem-do-not-fail-__gfp_nofail-charges.patch b/queue-5.2/memcg-kmem-do-not-fail-__gfp_nofail-charges.patch
new file mode 100644 (file)
index 0000000..93aa0ce
--- /dev/null
@@ -0,0 +1,87 @@
+From e55d9d9bfb69405bd7615c0f8d229d8fafb3e9b8 Mon Sep 17 00:00:00 2001
+From: Michal Hocko <mhocko@suse.com>
+Date: Wed, 25 Sep 2019 16:45:53 -0700
+Subject: memcg, kmem: do not fail __GFP_NOFAIL charges
+
+From: Michal Hocko <mhocko@suse.com>
+
+commit e55d9d9bfb69405bd7615c0f8d229d8fafb3e9b8 upstream.
+
+Thomas has noticed the following NULL ptr dereference when using cgroup
+v1 kmem limit:
+BUG: unable to handle kernel NULL pointer dereference at 0000000000000008
+PGD 0
+P4D 0
+Oops: 0000 [#1] PREEMPT SMP PTI
+CPU: 3 PID: 16923 Comm: gtk-update-icon Not tainted 4.19.51 #42
+Hardware name: Gigabyte Technology Co., Ltd. Z97X-Gaming G1/Z97X-Gaming G1, BIOS F9 07/31/2015
+RIP: 0010:create_empty_buffers+0x24/0x100
+Code: cd 0f 1f 44 00 00 0f 1f 44 00 00 41 54 49 89 d4 ba 01 00 00 00 55 53 48 89 fb e8 97 fe ff ff 48 89 c5 48 89 c2 eb 03 48 89 ca <48> 8b 4a 08 4c 09 22 48 85 c9 75 f1 48 89 6a 08 48 8b 43 18 48 8d
+RSP: 0018:ffff927ac1b37bf8 EFLAGS: 00010286
+RAX: 0000000000000000 RBX: fffff2d4429fd740 RCX: 0000000100097149
+RDX: 0000000000000000 RSI: 0000000000000082 RDI: ffff9075a99fbe00
+RBP: 0000000000000000 R08: fffff2d440949cc8 R09: 00000000000960c0
+R10: 0000000000000002 R11: 0000000000000000 R12: 0000000000000000
+R13: ffff907601f18360 R14: 0000000000002000 R15: 0000000000001000
+FS:  00007fb55b288bc0(0000) GS:ffff90761f8c0000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 0000000000000008 CR3: 000000007aebc002 CR4: 00000000001606e0
+Call Trace:
+ create_page_buffers+0x4d/0x60
+ __block_write_begin_int+0x8e/0x5a0
+ ? ext4_inode_attach_jinode.part.82+0xb0/0xb0
+ ? jbd2__journal_start+0xd7/0x1f0
+ ext4_da_write_begin+0x112/0x3d0
+ generic_perform_write+0xf1/0x1b0
+ ? file_update_time+0x70/0x140
+ __generic_file_write_iter+0x141/0x1a0
+ ext4_file_write_iter+0xef/0x3b0
+ __vfs_write+0x17e/0x1e0
+ vfs_write+0xa5/0x1a0
+ ksys_write+0x57/0xd0
+ do_syscall_64+0x55/0x160
+ entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+Tetsuo then noticed that this is because the __memcg_kmem_charge_memcg
+fails __GFP_NOFAIL charge when the kmem limit is reached.  This is a wrong
+behavior because nofail allocations are not allowed to fail.  Normal
+charge path simply forces the charge even if that means to cross the
+limit.  Kmem accounting should be doing the same.
+
+Link: http://lkml.kernel.org/r/20190906125608.32129-1-mhocko@kernel.org
+Signed-off-by: Michal Hocko <mhocko@suse.com>
+Reported-by: Thomas Lindroth <thomas.lindroth@gmail.com>
+Debugged-by: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
+Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Cc: Thomas Lindroth <thomas.lindroth@gmail.com>
+Cc: Shakeel Butt <shakeelb@google.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/memcontrol.c |   10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -2719,6 +2719,16 @@ int __memcg_kmem_charge_memcg(struct pag
+       if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) &&
+           !page_counter_try_charge(&memcg->kmem, nr_pages, &counter)) {
++
++              /*
++               * Enforce __GFP_NOFAIL allocation because callers are not
++               * prepared to see failures and likely do not have any failure
++               * handling code.
++               */
++              if (gfp & __GFP_NOFAIL) {
++                      page_counter_charge(&memcg->kmem, nr_pages);
++                      return 0;
++              }
+               cancel_charge(memcg, nr_pages);
+               return -ENOMEM;
+       }
diff --git a/queue-5.2/memcg-oom-don-t-require-__gfp_fs-when-invoking-memcg-oom-killer.patch b/queue-5.2/memcg-oom-don-t-require-__gfp_fs-when-invoking-memcg-oom-killer.patch
new file mode 100644 (file)
index 0000000..b986ebd
--- /dev/null
@@ -0,0 +1,182 @@
+From f9c645621a28e37813a1de96d9cbd89cde94a1e4 Mon Sep 17 00:00:00 2001
+From: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
+Date: Mon, 23 Sep 2019 15:37:08 -0700
+Subject: memcg, oom: don't require __GFP_FS when invoking memcg OOM killer
+
+From: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
+
+commit f9c645621a28e37813a1de96d9cbd89cde94a1e4 upstream.
+
+Masoud Sharbiani noticed that commit 29ef680ae7c21110 ("memcg, oom: move
+out_of_memory back to the charge path") broke memcg OOM called from
+__xfs_filemap_fault() path.  It turned out that try_charge() is retrying
+forever without making forward progress because mem_cgroup_oom(GFP_NOFS)
+cannot invoke the OOM killer due to commit 3da88fb3bacfaa33 ("mm, oom:
+move GFP_NOFS check to out_of_memory").
+
+Allowing forced charge due to being unable to invoke memcg OOM killer will
+lead to global OOM situation.  Also, just returning -ENOMEM will be risky
+because OOM path is lost and some paths (e.g.  get_user_pages()) will leak
+-ENOMEM.  Therefore, invoking memcg OOM killer (despite GFP_NOFS) will be
+the only choice we can choose for now.
+
+Until 29ef680ae7c21110, we were able to invoke memcg OOM killer when
+GFP_KERNEL reclaim failed [1].  But since 29ef680ae7c21110, we need to
+invoke memcg OOM killer when GFP_NOFS reclaim failed [2].  Although in the
+past we did invoke memcg OOM killer for GFP_NOFS [3], we might get
+pre-mature memcg OOM reports due to this patch.
+
+[1]
+
+ leaker invoked oom-killer: gfp_mask=0x6200ca(GFP_HIGHUSER_MOVABLE), nodemask=(null), order=0, oom_score_adj=0
+ CPU: 0 PID: 2746 Comm: leaker Not tainted 4.18.0+ #19
+ Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 04/13/2018
+ Call Trace:
+  dump_stack+0x63/0x88
+  dump_header+0x67/0x27a
+  ? mem_cgroup_scan_tasks+0x91/0xf0
+  oom_kill_process+0x210/0x410
+  out_of_memory+0x10a/0x2c0
+  mem_cgroup_out_of_memory+0x46/0x80
+  mem_cgroup_oom_synchronize+0x2e4/0x310
+  ? high_work_func+0x20/0x20
+  pagefault_out_of_memory+0x31/0x76
+  mm_fault_error+0x55/0x115
+  ? handle_mm_fault+0xfd/0x220
+  __do_page_fault+0x433/0x4e0
+  do_page_fault+0x22/0x30
+  ? page_fault+0x8/0x30
+  page_fault+0x1e/0x30
+ RIP: 0033:0x4009f0
+ Code: 03 00 00 00 e8 71 fd ff ff 48 83 f8 ff 49 89 c6 74 74 48 89 c6 bf c0 0c 40 00 31 c0 e8 69 fd ff ff 45 85 ff 7e 21 31 c9 66 90 <41> 0f be 14 0e 01 d3 f7 c1 ff 0f 00 00 75 05 41 c6 04 0e 2a 48 83
+ RSP: 002b:00007ffe29ae96f0 EFLAGS: 00010206
+ RAX: 000000000000001b RBX: 0000000000000000 RCX: 0000000001ce1000
+ RDX: 0000000000000000 RSI: 000000007fffffe5 RDI: 0000000000000000
+ RBP: 000000000000000c R08: 0000000000000000 R09: 00007f94be09220d
+ R10: 0000000000000002 R11: 0000000000000246 R12: 00000000000186a0
+ R13: 0000000000000003 R14: 00007f949d845000 R15: 0000000002800000
+ Task in /leaker killed as a result of limit of /leaker
+ memory: usage 524288kB, limit 524288kB, failcnt 158965
+ memory+swap: usage 0kB, limit 9007199254740988kB, failcnt 0
+ kmem: usage 2016kB, limit 9007199254740988kB, failcnt 0
+ Memory cgroup stats for /leaker: cache:844KB rss:521136KB rss_huge:0KB shmem:0KB mapped_file:0KB dirty:132KB writeback:0KB inactive_anon:0KB active_anon:521224KB inactive_file:1012KB active_file:8KB unevictable:0KB
+ Memory cgroup out of memory: Kill process 2746 (leaker) score 998 or sacrifice child
+ Killed process 2746 (leaker) total-vm:536704kB, anon-rss:521176kB, file-rss:1208kB, shmem-rss:0kB
+ oom_reaper: reaped process 2746 (leaker), now anon-rss:0kB, file-rss:0kB, shmem-rss:0kB
+
+[2]
+
+ leaker invoked oom-killer: gfp_mask=0x600040(GFP_NOFS), nodemask=(null), order=0, oom_score_adj=0
+ CPU: 1 PID: 2746 Comm: leaker Not tainted 4.18.0+ #20
+ Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 04/13/2018
+ Call Trace:
+  dump_stack+0x63/0x88
+  dump_header+0x67/0x27a
+  ? mem_cgroup_scan_tasks+0x91/0xf0
+  oom_kill_process+0x210/0x410
+  out_of_memory+0x109/0x2d0
+  mem_cgroup_out_of_memory+0x46/0x80
+  try_charge+0x58d/0x650
+  ? __radix_tree_replace+0x81/0x100
+  mem_cgroup_try_charge+0x7a/0x100
+  __add_to_page_cache_locked+0x92/0x180
+  add_to_page_cache_lru+0x4d/0xf0
+  iomap_readpages_actor+0xde/0x1b0
+  ? iomap_zero_range_actor+0x1d0/0x1d0
+  iomap_apply+0xaf/0x130
+  iomap_readpages+0x9f/0x150
+  ? iomap_zero_range_actor+0x1d0/0x1d0
+  xfs_vm_readpages+0x18/0x20 [xfs]
+  read_pages+0x60/0x140
+  __do_page_cache_readahead+0x193/0x1b0
+  ondemand_readahead+0x16d/0x2c0
+  page_cache_async_readahead+0x9a/0xd0
+  filemap_fault+0x403/0x620
+  ? alloc_set_pte+0x12c/0x540
+  ? _cond_resched+0x14/0x30
+  __xfs_filemap_fault+0x66/0x180 [xfs]
+  xfs_filemap_fault+0x27/0x30 [xfs]
+  __do_fault+0x19/0x40
+  __handle_mm_fault+0x8e8/0xb60
+  handle_mm_fault+0xfd/0x220
+  __do_page_fault+0x238/0x4e0
+  do_page_fault+0x22/0x30
+  ? page_fault+0x8/0x30
+  page_fault+0x1e/0x30
+ RIP: 0033:0x4009f0
+ Code: 03 00 00 00 e8 71 fd ff ff 48 83 f8 ff 49 89 c6 74 74 48 89 c6 bf c0 0c 40 00 31 c0 e8 69 fd ff ff 45 85 ff 7e 21 31 c9 66 90 <41> 0f be 14 0e 01 d3 f7 c1 ff 0f 00 00 75 05 41 c6 04 0e 2a 48 83
+ RSP: 002b:00007ffda45c9290 EFLAGS: 00010206
+ RAX: 000000000000001b RBX: 0000000000000000 RCX: 0000000001a1e000
+ RDX: 0000000000000000 RSI: 000000007fffffe5 RDI: 0000000000000000
+ RBP: 000000000000000c R08: 0000000000000000 R09: 00007f6d061ff20d
+ R10: 0000000000000002 R11: 0000000000000246 R12: 00000000000186a0
+ R13: 0000000000000003 R14: 00007f6ce59b2000 R15: 0000000002800000
+ Task in /leaker killed as a result of limit of /leaker
+ memory: usage 524288kB, limit 524288kB, failcnt 7221
+ memory+swap: usage 0kB, limit 9007199254740988kB, failcnt 0
+ kmem: usage 1944kB, limit 9007199254740988kB, failcnt 0
+ Memory cgroup stats for /leaker: cache:3632KB rss:518232KB rss_huge:0KB shmem:0KB mapped_file:0KB dirty:0KB writeback:0KB inactive_anon:0KB active_anon:518408KB inactive_file:3908KB active_file:12KB unevictable:0KB
+ Memory cgroup out of memory: Kill process 2746 (leaker) score 992 or sacrifice child
+ Killed process 2746 (leaker) total-vm:536704kB, anon-rss:518264kB, file-rss:1188kB, shmem-rss:0kB
+ oom_reaper: reaped process 2746 (leaker), now anon-rss:0kB, file-rss:0kB, shmem-rss:0kB
+
+[3]
+
+ leaker invoked oom-killer: gfp_mask=0x50, order=0, oom_score_adj=0
+ leaker cpuset=/ mems_allowed=0
+ CPU: 1 PID: 3206 Comm: leaker Not tainted 3.10.0-957.27.2.el7.x86_64 #1
+ Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 04/13/2018
+ Call Trace:
+  [<ffffffffaf364147>] dump_stack+0x19/0x1b
+  [<ffffffffaf35eb6a>] dump_header+0x90/0x229
+  [<ffffffffaedbb456>] ? find_lock_task_mm+0x56/0xc0
+  [<ffffffffaee32a38>] ? try_get_mem_cgroup_from_mm+0x28/0x60
+  [<ffffffffaedbb904>] oom_kill_process+0x254/0x3d0
+  [<ffffffffaee36c36>] mem_cgroup_oom_synchronize+0x546/0x570
+  [<ffffffffaee360b0>] ? mem_cgroup_charge_common+0xc0/0xc0
+  [<ffffffffaedbc194>] pagefault_out_of_memory+0x14/0x90
+  [<ffffffffaf35d072>] mm_fault_error+0x6a/0x157
+  [<ffffffffaf3717c8>] __do_page_fault+0x3c8/0x4f0
+  [<ffffffffaf371925>] do_page_fault+0x35/0x90
+  [<ffffffffaf36d768>] page_fault+0x28/0x30
+ Task in /leaker killed as a result of limit of /leaker
+ memory: usage 524288kB, limit 524288kB, failcnt 20628
+ memory+swap: usage 524288kB, limit 9007199254740988kB, failcnt 0
+ kmem: usage 0kB, limit 9007199254740988kB, failcnt 0
+ Memory cgroup stats for /leaker: cache:840KB rss:523448KB rss_huge:0KB mapped_file:0KB swap:0KB inactive_anon:0KB active_anon:523448KB inactive_file:464KB active_file:376KB unevictable:0KB
+ Memory cgroup out of memory: Kill process 3206 (leaker) score 970 or sacrifice child
+ Killed process 3206 (leaker) total-vm:536692kB, anon-rss:523304kB, file-rss:412kB, shmem-rss:0kB
+
+Bisected by Masoud Sharbiani.
+
+Link: http://lkml.kernel.org/r/cbe54ed1-b6ba-a056-8899-2dc42526371d@i-love.sakura.ne.jp
+Fixes: 3da88fb3bacfaa33 ("mm, oom: move GFP_NOFS check to out_of_memory") [necessary after 29ef680ae7c21110]
+Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Reported-by: Masoud Sharbiani <msharbiani@apple.com>
+Tested-by: Masoud Sharbiani <msharbiani@apple.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: David Rientjes <rientjes@google.com>
+Cc: <stable@vger.kernel.org>   [4.19+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/oom_kill.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/mm/oom_kill.c
++++ b/mm/oom_kill.c
+@@ -1060,9 +1060,10 @@ bool out_of_memory(struct oom_control *o
+        * The OOM killer does not compensate for IO-less reclaim.
+        * pagefault_out_of_memory lost its gfp context so we have to
+        * make sure exclude 0 mask - all other users should have at least
+-       * ___GFP_DIRECT_RECLAIM to get here.
++       * ___GFP_DIRECT_RECLAIM to get here. But mem_cgroup_oom() has to
++       * invoke the OOM killer even if it is a GFP_NOFS allocation.
+        */
+-      if (oc->gfp_mask && !(oc->gfp_mask & __GFP_FS))
++      if (oc->gfp_mask && !(oc->gfp_mask & __GFP_FS) && !is_memcg_oom(oc))
+               return true;
+       /*
diff --git a/queue-5.2/mm-compaction.c-clear-total_-migrate-free-_scanned-before-scanning-a-new-zone.patch b/queue-5.2/mm-compaction.c-clear-total_-migrate-free-_scanned-before-scanning-a-new-zone.patch
new file mode 100644 (file)
index 0000000..d40636d
--- /dev/null
@@ -0,0 +1,127 @@
+From a94b525241c0fff3598809131d7cfcfe1d572d8c Mon Sep 17 00:00:00 2001
+From: Yafang Shao <laoar.shao@gmail.com>
+Date: Mon, 23 Sep 2019 15:36:54 -0700
+Subject: mm/compaction.c: clear total_{migrate,free}_scanned before scanning a new zone
+
+From: Yafang Shao <laoar.shao@gmail.com>
+
+commit a94b525241c0fff3598809131d7cfcfe1d572d8c upstream.
+
+total_{migrate,free}_scanned will be added to COMPACTMIGRATE_SCANNED and
+COMPACTFREE_SCANNED in compact_zone().  We should clear them before
+scanning a new zone.  In the proc triggered compaction, we forgot clearing
+them.
+
+[laoar.shao@gmail.com: introduce a helper compact_zone_counters_init()]
+  Link: http://lkml.kernel.org/r/1563869295-25748-1-git-send-email-laoar.shao@gmail.com
+[akpm@linux-foundation.org: expand compact_zone_counters_init() into its single callsite, per mhocko]
+[vbabka@suse.cz: squash compact_zone() list_head init as well]
+  Link: http://lkml.kernel.org/r/1fb6f7da-f776-9e42-22f8-bbb79b030b98@suse.cz
+[akpm@linux-foundation.org: kcompactd_do_work(): avoid unnecessary initialization of cc.zone]
+Link: http://lkml.kernel.org/r/1563789275-9639-1-git-send-email-laoar.shao@gmail.com
+Fixes: 7f354a548d1c ("mm, compaction: add vmstats for kcompactd work")
+Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: David Rientjes <rientjes@google.com>
+Cc: Yafang Shao <shaoyafang@didiglobal.com>
+Cc: Mel Gorman <mgorman@techsingularity.net>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/compaction.c |   35 +++++++++++++----------------------
+ 1 file changed, 13 insertions(+), 22 deletions(-)
+
+--- a/mm/compaction.c
++++ b/mm/compaction.c
+@@ -2078,6 +2078,17 @@ compact_zone(struct compact_control *cc,
+       const bool sync = cc->mode != MIGRATE_ASYNC;
+       bool update_cached;
++      /*
++       * These counters track activities during zone compaction.  Initialize
++       * them before compacting a new zone.
++       */
++      cc->total_migrate_scanned = 0;
++      cc->total_free_scanned = 0;
++      cc->nr_migratepages = 0;
++      cc->nr_freepages = 0;
++      INIT_LIST_HEAD(&cc->freepages);
++      INIT_LIST_HEAD(&cc->migratepages);
++
+       cc->migratetype = gfpflags_to_migratetype(cc->gfp_mask);
+       ret = compaction_suitable(cc->zone, cc->order, cc->alloc_flags,
+                                                       cc->classzone_idx);
+@@ -2281,10 +2292,6 @@ static enum compact_result compact_zone_
+ {
+       enum compact_result ret;
+       struct compact_control cc = {
+-              .nr_freepages = 0,
+-              .nr_migratepages = 0,
+-              .total_migrate_scanned = 0,
+-              .total_free_scanned = 0,
+               .order = order,
+               .search_order = order,
+               .gfp_mask = gfp_mask,
+@@ -2305,8 +2312,6 @@ static enum compact_result compact_zone_
+       if (capture)
+               current->capture_control = &capc;
+-      INIT_LIST_HEAD(&cc.freepages);
+-      INIT_LIST_HEAD(&cc.migratepages);
+       ret = compact_zone(&cc, &capc);
+@@ -2408,8 +2413,6 @@ static void compact_node(int nid)
+       struct zone *zone;
+       struct compact_control cc = {
+               .order = -1,
+-              .total_migrate_scanned = 0,
+-              .total_free_scanned = 0,
+               .mode = MIGRATE_SYNC,
+               .ignore_skip_hint = true,
+               .whole_zone = true,
+@@ -2423,11 +2426,7 @@ static void compact_node(int nid)
+               if (!populated_zone(zone))
+                       continue;
+-              cc.nr_freepages = 0;
+-              cc.nr_migratepages = 0;
+               cc.zone = zone;
+-              INIT_LIST_HEAD(&cc.freepages);
+-              INIT_LIST_HEAD(&cc.migratepages);
+               compact_zone(&cc, NULL);
+@@ -2529,8 +2528,6 @@ static void kcompactd_do_work(pg_data_t
+       struct compact_control cc = {
+               .order = pgdat->kcompactd_max_order,
+               .search_order = pgdat->kcompactd_max_order,
+-              .total_migrate_scanned = 0,
+-              .total_free_scanned = 0,
+               .classzone_idx = pgdat->kcompactd_classzone_idx,
+               .mode = MIGRATE_SYNC_LIGHT,
+               .ignore_skip_hint = false,
+@@ -2554,16 +2551,10 @@ static void kcompactd_do_work(pg_data_t
+                                                       COMPACT_CONTINUE)
+                       continue;
+-              cc.nr_freepages = 0;
+-              cc.nr_migratepages = 0;
+-              cc.total_migrate_scanned = 0;
+-              cc.total_free_scanned = 0;
+-              cc.zone = zone;
+-              INIT_LIST_HEAD(&cc.freepages);
+-              INIT_LIST_HEAD(&cc.migratepages);
+-
+               if (kthread_should_stop())
+                       return;
++
++              cc.zone = zone;
+               status = compact_zone(&cc, NULL);
+               if (status == COMPACT_SUCCESS) {
diff --git a/queue-5.2/mt76-round-up-length-on-mt76_wr_copy.patch b/queue-5.2/mt76-round-up-length-on-mt76_wr_copy.patch
new file mode 100644 (file)
index 0000000..c17d5fe
--- /dev/null
@@ -0,0 +1,46 @@
+From 850e8f6fbd5d0003b0f1119d19a01c6fef1644e2 Mon Sep 17 00:00:00 2001
+From: Felix Fietkau <nbd@nbd.name>
+Date: Mon, 1 Jul 2019 13:15:07 +0200
+Subject: mt76: round up length on mt76_wr_copy
+
+From: Felix Fietkau <nbd@nbd.name>
+
+commit 850e8f6fbd5d0003b0f1119d19a01c6fef1644e2 upstream.
+
+When beacon length is not a multiple of 4, the beacon could be sent with
+the last 1-3 bytes corrupted. The skb data is guaranteed to have enough
+room for reading beyond the end, because it is always followed by
+skb_shared_info, so rounding up is safe.
+All other callers of mt76_wr_copy have multiple-of-4 length already.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/wireless/mediatek/mt76/mmio.c |    2 +-
+ drivers/net/wireless/mediatek/mt76/usb.c  |    2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/wireless/mediatek/mt76/mmio.c
++++ b/drivers/net/wireless/mediatek/mt76/mmio.c
+@@ -43,7 +43,7 @@ static u32 mt76_mmio_rmw(struct mt76_dev
+ static void mt76_mmio_copy(struct mt76_dev *dev, u32 offset, const void *data,
+                          int len)
+ {
+-      __iowrite32_copy(dev->mmio.regs + offset, data, len >> 2);
++      __iowrite32_copy(dev->mmio.regs + offset, data, DIV_ROUND_UP(len, 4));
+ }
+ static int mt76_mmio_wr_rp(struct mt76_dev *dev, u32 base,
+--- a/drivers/net/wireless/mediatek/mt76/usb.c
++++ b/drivers/net/wireless/mediatek/mt76/usb.c
+@@ -164,7 +164,7 @@ static void mt76u_copy(struct mt76_dev *
+       int i, ret;
+       mutex_lock(&usb->usb_ctrl_mtx);
+-      for (i = 0; i < (len / 4); i++) {
++      for (i = 0; i < DIV_ROUND_UP(len, 4); i++) {
+               put_unaligned_le32(val[i], usb->data);
+               ret = __mt76u_vendor_request(dev, MT_VEND_MULTI_WRITE,
+                                            USB_DIR_OUT | USB_TYPE_VENDOR,
diff --git a/queue-5.2/ovl-filter-of-trusted-xattr-results-in-audit.patch b/queue-5.2/ovl-filter-of-trusted-xattr-results-in-audit.patch
new file mode 100644 (file)
index 0000000..6217951
--- /dev/null
@@ -0,0 +1,41 @@
+From 5c2e9f346b815841f9bed6029ebcb06415caf640 Mon Sep 17 00:00:00 2001
+From: Mark Salyzyn <salyzyn@android.com>
+Date: Thu, 29 Aug 2019 11:30:14 -0700
+Subject: ovl: filter of trusted xattr results in audit
+
+From: Mark Salyzyn <salyzyn@android.com>
+
+commit 5c2e9f346b815841f9bed6029ebcb06415caf640 upstream.
+
+When filtering xattr list for reading, presence of trusted xattr
+results in a security audit log.  However, if there is other content
+no errno will be set, and if there isn't, the errno will be -ENODATA
+and not -EPERM as is usually associated with a lack of capability.
+The check does not block the request to list the xattrs present.
+
+Switch to ns_capable_noaudit to reflect a more appropriate check.
+
+Signed-off-by: Mark Salyzyn <salyzyn@android.com>
+Cc: linux-security-module@vger.kernel.org
+Cc: kernel-team@android.com
+Cc: stable@vger.kernel.org # v3.18+
+Fixes: a082c6f680da ("ovl: filter trusted xattr for non-admin")
+Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/overlayfs/inode.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/overlayfs/inode.c
++++ b/fs/overlayfs/inode.c
+@@ -383,7 +383,8 @@ static bool ovl_can_list(const char *s)
+               return true;
+       /* Never list trusted.overlay, list other trusted for superuser only */
+-      return !ovl_is_private_xattr(s) && capable(CAP_SYS_ADMIN);
++      return !ovl_is_private_xattr(s) &&
++             ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN);
+ }
+ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
diff --git a/queue-5.2/ovl-fix-dereferencing-possible-err_ptr.patch b/queue-5.2/ovl-fix-dereferencing-possible-err_ptr.patch
new file mode 100644 (file)
index 0000000..665e203
--- /dev/null
@@ -0,0 +1,35 @@
+From 97f024b9171e74c4443bbe8a8dce31b917f97ac5 Mon Sep 17 00:00:00 2001
+From: Ding Xiang <dingxiang@cmss.chinamobile.com>
+Date: Mon, 9 Sep 2019 16:29:56 +0800
+Subject: ovl: Fix dereferencing possible ERR_PTR()
+
+From: Ding Xiang <dingxiang@cmss.chinamobile.com>
+
+commit 97f024b9171e74c4443bbe8a8dce31b917f97ac5 upstream.
+
+if ovl_encode_real_fh() fails, no memory was allocated
+and the error in the error-valued pointer should be returned.
+
+Fixes: 9b6faee07470 ("ovl: check ERR_PTR() return value from ovl_encode_fh()")
+Signed-off-by: Ding Xiang <dingxiang@cmss.chinamobile.com>
+Cc: <stable@vger.kernel.org> # v4.16+
+Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/overlayfs/export.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/fs/overlayfs/export.c
++++ b/fs/overlayfs/export.c
+@@ -227,9 +227,8 @@ static int ovl_d_to_fh(struct dentry *de
+       /* Encode an upper or lower file handle */
+       fh = ovl_encode_real_fh(enc_lower ? ovl_dentry_lower(dentry) :
+                               ovl_dentry_upper(dentry), !enc_lower);
+-      err = PTR_ERR(fh);
+       if (IS_ERR(fh))
+-              goto fail;
++              return PTR_ERR(fh);
+       err = -EOVERFLOW;
+       if (fh->len > buflen)
diff --git a/queue-5.2/rtw88-pci-rearrange-the-memory-usage-for-skb-in-rx-isr.patch b/queue-5.2/rtw88-pci-rearrange-the-memory-usage-for-skb-in-rx-isr.patch
new file mode 100644 (file)
index 0000000..b8a3aa2
--- /dev/null
@@ -0,0 +1,124 @@
+From ee6db78f5db9bfe426c57a1ec9713827ebccd2d4 Mon Sep 17 00:00:00 2001
+From: Jian-Hong Pan <jian-hong@endlessm.com>
+Date: Thu, 11 Jul 2019 13:24:26 +0800
+Subject: rtw88: pci: Rearrange the memory usage for skb in RX ISR
+
+From: Jian-Hong Pan <jian-hong@endlessm.com>
+
+commit ee6db78f5db9bfe426c57a1ec9713827ebccd2d4 upstream.
+
+Testing with RTL8822BE hardware, when available memory is low, we
+frequently see a kernel panic and system freeze.
+
+First, rtw_pci_rx_isr encounters a memory allocation failure (trimmed):
+
+rx routine starvation
+WARNING: CPU: 7 PID: 9871 at drivers/net/wireless/realtek/rtw88/pci.c:822 rtw_pci_rx_isr.constprop.25+0x35a/0x370 [rtwpci]
+[ 2356.580313] RIP: 0010:rtw_pci_rx_isr.constprop.25+0x35a/0x370 [rtwpci]
+
+Then we see a variety of different error conditions and kernel panics,
+such as this one (trimmed):
+
+rtw_pci 0000:02:00.0: pci bus timeout, check dma status
+skbuff: skb_over_panic: text:00000000091b6e66 len:415 put:415 head:00000000d2880c6f data:000000007a02b1ea tail:0x1df end:0xc0 dev:<NULL>
+------------[ cut here ]------------
+kernel BUG at net/core/skbuff.c:105!
+invalid opcode: 0000 [#1] SMP NOPTI
+RIP: 0010:skb_panic+0x43/0x45
+
+When skb allocation fails and the "rx routine starvation" is hit, the
+function returns immediately without updating the RX ring. At this
+point, the RX ring may continue referencing an old skb which was already
+handed off to ieee80211_rx_irqsafe(). When it comes to be used again,
+bad things happen.
+
+This patch allocates a new, data-sized skb first in RX ISR. After
+copying the data in, we pass it to the upper layers. However, if skb
+allocation fails, we effectively drop the frame. In both cases, the
+original, full size ring skb is reused.
+
+In addition, to fixing the kernel crash, the RX routine should now
+generally behave better under low memory conditions.
+
+Buglink: https://bugzilla.kernel.org/show_bug.cgi?id=204053
+Signed-off-by: Jian-Hong Pan <jian-hong@endlessm.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/wireless/realtek/rtw88/pci.c |   49 +++++++++++++------------------
+ 1 file changed, 22 insertions(+), 27 deletions(-)
+
+--- a/drivers/net/wireless/realtek/rtw88/pci.c
++++ b/drivers/net/wireless/realtek/rtw88/pci.c
+@@ -763,6 +763,7 @@ static void rtw_pci_rx_isr(struct rtw_de
+       u32 pkt_offset;
+       u32 pkt_desc_sz = chip->rx_pkt_desc_sz;
+       u32 buf_desc_sz = chip->rx_buf_desc_sz;
++      u32 new_len;
+       u8 *rx_desc;
+       dma_addr_t dma;
+@@ -790,40 +791,34 @@ static void rtw_pci_rx_isr(struct rtw_de
+               pkt_offset = pkt_desc_sz + pkt_stat.drv_info_sz +
+                            pkt_stat.shift;
+-              if (pkt_stat.is_c2h) {
+-                      /* keep rx_desc, halmac needs it */
+-                      skb_put(skb, pkt_stat.pkt_len + pkt_offset);
++              /* allocate a new skb for this frame,
++               * discard the frame if none available
++               */
++              new_len = pkt_stat.pkt_len + pkt_offset;
++              new = dev_alloc_skb(new_len);
++              if (WARN_ONCE(!new, "rx routine starvation\n"))
++                      goto next_rp;
++
++              /* put the DMA data including rx_desc from phy to new skb */
++              skb_put_data(new, skb->data, new_len);
+-                      /* pass offset for further operation */
+-                      *((u32 *)skb->cb) = pkt_offset;
+-                      skb_queue_tail(&rtwdev->c2h_queue, skb);
++              if (pkt_stat.is_c2h) {
++                       /* pass rx_desc & offset for further operation */
++                      *((u32 *)new->cb) = pkt_offset;
++                      skb_queue_tail(&rtwdev->c2h_queue, new);
+                       ieee80211_queue_work(rtwdev->hw, &rtwdev->c2h_work);
+               } else {
+-                      /* remove rx_desc, maybe use skb_pull? */
+-                      skb_put(skb, pkt_stat.pkt_len);
+-                      skb_reserve(skb, pkt_offset);
+-
+-                      /* alloc a smaller skb to mac80211 */
+-                      new = dev_alloc_skb(pkt_stat.pkt_len);
+-                      if (!new) {
+-                              new = skb;
+-                      } else {
+-                              skb_put_data(new, skb->data, skb->len);
+-                              dev_kfree_skb_any(skb);
+-                      }
+-                      /* TODO: merge into rx.c */
+-                      rtw_rx_stats(rtwdev, pkt_stat.vif, skb);
++                      /* remove rx_desc */
++                      skb_pull(new, pkt_offset);
++
++                      rtw_rx_stats(rtwdev, pkt_stat.vif, new);
+                       memcpy(new->cb, &rx_status, sizeof(rx_status));
+                       ieee80211_rx_irqsafe(rtwdev->hw, new);
+               }
+-              /* skb delivered to mac80211, alloc a new one in rx ring */
+-              new = dev_alloc_skb(RTK_PCI_RX_BUF_SIZE);
+-              if (WARN(!new, "rx routine starvation\n"))
+-                      return;
+-
+-              ring->buf[cur_rp] = new;
+-              rtw_pci_reset_rx_desc(rtwdev, new, ring, cur_rp, buf_desc_sz);
++next_rp:
++              /* new skb delivered to mac80211, re-enable original skb DMA */
++              rtw_pci_reset_rx_desc(rtwdev, skb, ring, cur_rp, buf_desc_sz);
+               /* host read next element in ring */
+               if (++cur_rp >= ring->r.len)
diff --git a/queue-5.2/rtw88-pci-use-dma-sync-instead-of-remapping-in-rx-isr.patch b/queue-5.2/rtw88-pci-use-dma-sync-instead-of-remapping-in-rx-isr.patch
new file mode 100644 (file)
index 0000000..498152a
--- /dev/null
@@ -0,0 +1,68 @@
+From 29b68a920f6abb7b5ba21ab4b779f62d536bac9b Mon Sep 17 00:00:00 2001
+From: Jian-Hong Pan <jian-hong@endlessm.com>
+Date: Thu, 11 Jul 2019 13:24:27 +0800
+Subject: rtw88: pci: Use DMA sync instead of remapping in RX ISR
+
+From: Jian-Hong Pan <jian-hong@endlessm.com>
+
+commit 29b68a920f6abb7b5ba21ab4b779f62d536bac9b upstream.
+
+Since each skb in RX ring is reused instead of new allocation, we can
+treat the DMA in a more efficient way by DMA synchronization.
+
+Signed-off-by: Jian-Hong Pan <jian-hong@endlessm.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/wireless/realtek/rtw88/pci.c |   24 +++++++++++++++++++++---
+ 1 file changed, 21 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/wireless/realtek/rtw88/pci.c
++++ b/drivers/net/wireless/realtek/rtw88/pci.c
+@@ -206,6 +206,23 @@ static int rtw_pci_reset_rx_desc(struct
+       return 0;
+ }
++static void rtw_pci_sync_rx_desc_device(struct rtw_dev *rtwdev, dma_addr_t dma,
++                                      struct rtw_pci_rx_ring *rx_ring,
++                                      u32 idx, u32 desc_sz)
++{
++      struct device *dev = rtwdev->dev;
++      struct rtw_pci_rx_buffer_desc *buf_desc;
++      int buf_sz = RTK_PCI_RX_BUF_SIZE;
++
++      dma_sync_single_for_device(dev, dma, buf_sz, DMA_FROM_DEVICE);
++
++      buf_desc = (struct rtw_pci_rx_buffer_desc *)(rx_ring->r.head +
++                                                   idx * desc_sz);
++      memset(buf_desc, 0, sizeof(*buf_desc));
++      buf_desc->buf_size = cpu_to_le16(RTK_PCI_RX_BUF_SIZE);
++      buf_desc->dma = cpu_to_le32(dma);
++}
++
+ static int rtw_pci_init_rx_ring(struct rtw_dev *rtwdev,
+                               struct rtw_pci_rx_ring *rx_ring,
+                               u8 desc_size, u32 len)
+@@ -782,8 +799,8 @@ static void rtw_pci_rx_isr(struct rtw_de
+               rtw_pci_dma_check(rtwdev, ring, cur_rp);
+               skb = ring->buf[cur_rp];
+               dma = *((dma_addr_t *)skb->cb);
+-              pci_unmap_single(rtwpci->pdev, dma, RTK_PCI_RX_BUF_SIZE,
+-                               PCI_DMA_FROMDEVICE);
++              dma_sync_single_for_cpu(rtwdev->dev, dma, RTK_PCI_RX_BUF_SIZE,
++                                      DMA_FROM_DEVICE);
+               rx_desc = skb->data;
+               chip->ops->query_rx_desc(rtwdev, rx_desc, &pkt_stat, &rx_status);
+@@ -818,7 +835,8 @@ static void rtw_pci_rx_isr(struct rtw_de
+ next_rp:
+               /* new skb delivered to mac80211, re-enable original skb DMA */
+-              rtw_pci_reset_rx_desc(rtwdev, skb, ring, cur_rp, buf_desc_sz);
++              rtw_pci_sync_rx_desc_device(rtwdev, dma, ring, cur_rp,
++                                          buf_desc_sz);
+               /* host read next element in ring */
+               if (++cur_rp >= ring->r.len)
index 8c0209fe3ebc794b6d4ced427022a9be0f30c3f8..d3e6dad9d3520e93b3e8da9034c9aaa41312c85b 100644 (file)
@@ -260,3 +260,30 @@ iommu-arm-smmu-v3-disable-detection-of-ats-and-pri.patch
 alarmtimer-use-eopnotsupp-instead-of-enotsupp.patch
 iommu-vt-d-fix-wrong-analysis-whether-devices-share-the-same-bus.patch
 regulator-defer-init-completion-for-a-while-after-late_initcall.patch
+efifb-bgrt-improve-efifb_bgrt_sanity_check.patch
+gfs2-clear-buf_in_tr-when-ending-a-transaction-in-sweep_bh_for_rgrps.patch
+z3fold-fix-retry-mechanism-in-page-reclaim.patch
+z3fold-fix-memory-leak-in-kmem-cache.patch
+mm-compaction.c-clear-total_-migrate-free-_scanned-before-scanning-a-new-zone.patch
+memcg-oom-don-t-require-__gfp_fs-when-invoking-memcg-oom-killer.patch
+memcg-kmem-do-not-fail-__gfp_nofail-charges.patch
+lib-lzo-lzo1x_compress.c-fix-alignment-bug-in-lzo-rle.patch
+mt76-round-up-length-on-mt76_wr_copy.patch
+keys-trusted-correctly-initialize-digests-and-fix-locking-issue.patch
+rtw88-pci-rearrange-the-memory-usage-for-skb-in-rx-isr.patch
+rtw88-pci-use-dma-sync-instead-of-remapping-in-rx-isr.patch
+ath10k-fix-channel-info-parsing-for-non-tlv-target.patch
+i40e-check-__i40e_vf_disable-bit-in-i40e_sync_filters_subtask.patch
+block-mq-deadline-fix-queue-restart-handling.patch
+block-fix-null-pointer-dereference-in-blk_mq_rq_timed_out.patch
+smb3-allow-disabling-requesting-leases.patch
+smb3-fix-leak-in-open-on-server-perf-counter.patch
+ovl-fix-dereferencing-possible-err_ptr.patch
+ovl-filter-of-trusted-xattr-results-in-audit.patch
+btrfs-fix-allocation-of-free-space-cache-v1-bitmap-pages.patch
+btrfs-fix-use-after-free-when-using-the-tree-modification-log.patch
+btrfs-relinquish-cpus-in-btrfs_compare_trees.patch
+btrfs-adjust-dirty_metadata_bytes-after-writeback-failure-of-extent-buffer.patch
+btrfs-qgroup-fix-the-wrong-target-io_tree-when-freeing-reserved-data-space.patch
+btrfs-qgroup-fix-reserved-data-space-leak-if-we-have-multiple-reserve-calls.patch
+btrfs-fix-race-setting-up-and-completing-qgroup-rescan-workers.patch
diff --git a/queue-5.2/smb3-allow-disabling-requesting-leases.patch b/queue-5.2/smb3-allow-disabling-requesting-leases.patch
new file mode 100644 (file)
index 0000000..afd11bd
--- /dev/null
@@ -0,0 +1,118 @@
+From 3e7a02d47872081f4b6234a9f72500f1d10f060c Mon Sep 17 00:00:00 2001
+From: Steve French <stfrench@microsoft.com>
+Date: Wed, 11 Sep 2019 21:46:20 -0500
+Subject: smb3: allow disabling requesting leases
+
+From: Steve French <stfrench@microsoft.com>
+
+commit 3e7a02d47872081f4b6234a9f72500f1d10f060c upstream.
+
+In some cases to work around server bugs or performance
+problems it can be helpful to be able to disable requesting
+SMB2.1/SMB3 leases on a particular mount (not to all servers
+and all shares we are mounted to). Add new mount parm
+"nolease" which turns off requesting leases on directory
+or file opens.  Currently the only way to disable leases is
+globally through a module load parameter. This is more
+granular.
+
+Suggested-by: Pavel Shilovsky <pshilov@microsoft.com>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
+Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
+CC: Stable <stable@vger.kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/cifs/cifsfs.c   |    2 ++
+ fs/cifs/cifsglob.h |    2 ++
+ fs/cifs/connect.c  |    9 ++++++++-
+ fs/cifs/smb2pdu.c  |    2 +-
+ 4 files changed, 13 insertions(+), 2 deletions(-)
+
+--- a/fs/cifs/cifsfs.c
++++ b/fs/cifs/cifsfs.c
+@@ -433,6 +433,8 @@ cifs_show_options(struct seq_file *s, st
+       cifs_show_security(s, tcon->ses);
+       cifs_show_cache_flavor(s, cifs_sb);
++      if (tcon->no_lease)
++              seq_puts(s, ",nolease");
+       if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)
+               seq_puts(s, ",multiuser");
+       else if (tcon->ses->user_name)
+--- a/fs/cifs/cifsglob.h
++++ b/fs/cifs/cifsglob.h
+@@ -575,6 +575,7 @@ struct smb_vol {
+       bool noblocksnd:1;
+       bool noautotune:1;
+       bool nostrictsync:1; /* do not force expensive SMBflush on every sync */
++      bool no_lease:1;     /* disable requesting leases */
+       bool fsc:1;     /* enable fscache */
+       bool mfsymlinks:1; /* use Minshall+French Symlinks */
+       bool multiuser:1;
+@@ -1079,6 +1080,7 @@ struct cifs_tcon {
+       bool need_reopen_files:1; /* need to reopen tcon file handles */
+       bool use_resilient:1; /* use resilient instead of durable handles */
+       bool use_persistent:1; /* use persistent instead of durable handles */
++      bool no_lease:1;    /* Do not request leases on files or directories */
+       __le32 capabilities;
+       __u32 share_flags;
+       __u32 maximal_access;
+--- a/fs/cifs/connect.c
++++ b/fs/cifs/connect.c
+@@ -74,7 +74,7 @@ enum {
+       Opt_user_xattr, Opt_nouser_xattr,
+       Opt_forceuid, Opt_noforceuid,
+       Opt_forcegid, Opt_noforcegid,
+-      Opt_noblocksend, Opt_noautotune,
++      Opt_noblocksend, Opt_noautotune, Opt_nolease,
+       Opt_hard, Opt_soft, Opt_perm, Opt_noperm,
+       Opt_mapposix, Opt_nomapposix,
+       Opt_mapchars, Opt_nomapchars, Opt_sfu,
+@@ -133,6 +133,7 @@ static const match_table_t cifs_mount_op
+       { Opt_noforcegid, "noforcegid" },
+       { Opt_noblocksend, "noblocksend" },
+       { Opt_noautotune, "noautotune" },
++      { Opt_nolease, "nolease" },
+       { Opt_hard, "hard" },
+       { Opt_soft, "soft" },
+       { Opt_perm, "perm" },
+@@ -1709,6 +1710,9 @@ cifs_parse_mount_options(const char *mou
+               case Opt_noautotune:
+                       vol->noautotune = 1;
+                       break;
++              case Opt_nolease:
++                      vol->no_lease = 1;
++                      break;
+               case Opt_hard:
+                       vol->retry = 1;
+                       break;
+@@ -3230,6 +3234,8 @@ static int match_tcon(struct cifs_tcon *
+               return 0;
+       if (tcon->handle_timeout != volume_info->handle_timeout)
+               return 0;
++      if (tcon->no_lease != volume_info->no_lease)
++              return 0;
+       return 1;
+ }
+@@ -3444,6 +3450,7 @@ cifs_get_tcon(struct cifs_ses *ses, stru
+       tcon->nocase = volume_info->nocase;
+       tcon->nohandlecache = volume_info->nohandlecache;
+       tcon->local_lease = volume_info->local_lease;
++      tcon->no_lease = volume_info->no_lease;
+       INIT_LIST_HEAD(&tcon->pending_opens);
+       spin_lock(&cifs_tcp_ses_lock);
+--- a/fs/cifs/smb2pdu.c
++++ b/fs/cifs/smb2pdu.c
+@@ -2370,7 +2370,7 @@ SMB2_open_init(struct cifs_tcon *tcon, s
+       iov[1].iov_len = uni_path_len;
+       iov[1].iov_base = path;
+-      if (!server->oplocks)
++      if ((!server->oplocks) || (tcon->no_lease))
+               *oplock = SMB2_OPLOCK_LEVEL_NONE;
+       if (!(server->capabilities & SMB2_GLOBAL_CAP_LEASING) ||
diff --git a/queue-5.2/smb3-fix-leak-in-open-on-server-perf-counter.patch b/queue-5.2/smb3-fix-leak-in-open-on-server-perf-counter.patch
new file mode 100644 (file)
index 0000000..fe53706
--- /dev/null
@@ -0,0 +1,61 @@
+From d2f15428d6a0ebfc0edc364094d7c4a2de7037ed Mon Sep 17 00:00:00 2001
+From: Steve French <stfrench@microsoft.com>
+Date: Sun, 22 Sep 2019 00:55:46 -0500
+Subject: smb3: fix leak in "open on server" perf counter
+
+From: Steve French <stfrench@microsoft.com>
+
+commit d2f15428d6a0ebfc0edc364094d7c4a2de7037ed upstream.
+
+We were not bumping up the "open on server" (num_remote_opens)
+counter (in some cases) on opens of the share root so
+could end up showing as a negative value.
+
+CC: Stable <stable@vger.kernel.org>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/cifs/smb2ops.c |    5 +++++
+ fs/cifs/smb2pdu.c |    1 +
+ 2 files changed, 6 insertions(+)
+
+--- a/fs/cifs/smb2ops.c
++++ b/fs/cifs/smb2ops.c
+@@ -743,6 +743,8 @@ int open_shroot(unsigned int xid, struct
+       if (rc)
+               goto oshr_exit;
++      atomic_inc(&tcon->num_remote_opens);
++
+       o_rsp = (struct smb2_create_rsp *)rsp_iov[0].iov_base;
+       oparms.fid->persistent_fid = o_rsp->PersistentFileId;
+       oparms.fid->volatile_fid = o_rsp->VolatileFileId;
+@@ -1167,6 +1169,7 @@ smb2_set_ea(const unsigned int xid, stru
+       rc = compound_send_recv(xid, ses, flags, 3, rqst,
+                               resp_buftype, rsp_iov);
++      /* no need to bump num_remote_opens because handle immediately closed */
+  sea_exit:
+       kfree(ea);
+@@ -1488,6 +1491,8 @@ smb2_ioctl_query_info(const unsigned int
+                               resp_buftype, rsp_iov);
+       if (rc)
+               goto iqinf_exit;
++
++      /* No need to bump num_remote_opens since handle immediately closed */
+       if (qi.flags & PASSTHRU_FSCTL) {
+               pqi = (struct smb_query_info __user *)arg;
+               io_rsp = (struct smb2_ioctl_rsp *)rsp_iov[1].iov_base;
+--- a/fs/cifs/smb2pdu.c
++++ b/fs/cifs/smb2pdu.c
+@@ -2263,6 +2263,7 @@ int smb311_posix_mkdir(const unsigned in
+       rqst.rq_iov = iov;
+       rqst.rq_nvec = n_iov;
++      /* no need to inc num_remote_opens because we close it just below */
+       trace_smb3_posix_mkdir_enter(xid, tcon->tid, ses->Suid, CREATE_NOT_FILE,
+                                   FILE_WRITE_ATTRIBUTES);
+       /* resource #4: response buffer */
diff --git a/queue-5.2/z3fold-fix-memory-leak-in-kmem-cache.patch b/queue-5.2/z3fold-fix-memory-leak-in-kmem-cache.patch
new file mode 100644 (file)
index 0000000..d1fc1ed
--- /dev/null
@@ -0,0 +1,72 @@
+From 63398413c00c7836ea87a1fa205c91d2199b25cf Mon Sep 17 00:00:00 2001
+From: Vitaly Wool <vitalywool@gmail.com>
+Date: Mon, 23 Sep 2019 15:36:51 -0700
+Subject: z3fold: fix memory leak in kmem cache
+
+From: Vitaly Wool <vitalywool@gmail.com>
+
+commit 63398413c00c7836ea87a1fa205c91d2199b25cf upstream.
+
+Currently there is a leak in init_z3fold_page() -- it allocates handles
+from kmem cache even for headless pages, but then they are never used and
+never freed, so eventually kmem cache may get exhausted.  This patch
+provides a fix for that.
+
+Link: http://lkml.kernel.org/r/20190917185352.44cf285d3ebd9e64548de5de@gmail.com
+Signed-off-by: Vitaly Wool <vitalywool@gmail.com>
+Reported-by: Markus Linnala <markus.linnala@gmail.com>
+Tested-by: Markus Linnala <markus.linnala@gmail.com>
+Cc: Dan Streetman <ddstreet@ieee.org>
+Cc: Henry Burns <henrywolfeburns@gmail.com>
+Cc: Shakeel Butt <shakeelb@google.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/z3fold.c |   15 +++++++++------
+ 1 file changed, 9 insertions(+), 6 deletions(-)
+
+--- a/mm/z3fold.c
++++ b/mm/z3fold.c
+@@ -297,14 +297,11 @@ static void z3fold_unregister_migration(
+  }
+ /* Initializes the z3fold header of a newly allocated z3fold page */
+-static struct z3fold_header *init_z3fold_page(struct page *page,
++static struct z3fold_header *init_z3fold_page(struct page *page, bool headless,
+                                       struct z3fold_pool *pool, gfp_t gfp)
+ {
+       struct z3fold_header *zhdr = page_address(page);
+-      struct z3fold_buddy_slots *slots = alloc_slots(pool, gfp);
+-
+-      if (!slots)
+-              return NULL;
++      struct z3fold_buddy_slots *slots;
+       INIT_LIST_HEAD(&page->lru);
+       clear_bit(PAGE_HEADLESS, &page->private);
+@@ -312,6 +309,12 @@ static struct z3fold_header *init_z3fold
+       clear_bit(NEEDS_COMPACTING, &page->private);
+       clear_bit(PAGE_STALE, &page->private);
+       clear_bit(PAGE_CLAIMED, &page->private);
++      if (headless)
++              return zhdr;
++
++      slots = alloc_slots(pool, gfp);
++      if (!slots)
++              return NULL;
+       spin_lock_init(&zhdr->page_lock);
+       kref_init(&zhdr->refcount);
+@@ -932,7 +935,7 @@ retry:
+       if (!page)
+               return -ENOMEM;
+-      zhdr = init_z3fold_page(page, pool, gfp);
++      zhdr = init_z3fold_page(page, bud == HEADLESS, pool, gfp);
+       if (!zhdr) {
+               __free_page(page);
+               return -ENOMEM;
diff --git a/queue-5.2/z3fold-fix-retry-mechanism-in-page-reclaim.patch b/queue-5.2/z3fold-fix-retry-mechanism-in-page-reclaim.patch
new file mode 100644 (file)
index 0000000..ae1a1bd
--- /dev/null
@@ -0,0 +1,176 @@
+From 3f9d2b5766aea06042630ac60b7316fd0cebf06f Mon Sep 17 00:00:00 2001
+From: Vitaly Wool <vitalywool@gmail.com>
+Date: Mon, 23 Sep 2019 15:33:02 -0700
+Subject: z3fold: fix retry mechanism in page reclaim
+
+From: Vitaly Wool <vitalywool@gmail.com>
+
+commit 3f9d2b5766aea06042630ac60b7316fd0cebf06f upstream.
+
+z3fold_page_reclaim()'s retry mechanism is broken: on a second iteration
+it will have zhdr from the first one so that zhdr is no longer in line
+with struct page.  That leads to crashes when the system is stressed.
+
+Fix that by moving zhdr assignment up.
+
+While at it, protect against using already freed handles by using own
+local slots structure in z3fold_page_reclaim().
+
+Link: http://lkml.kernel.org/r/20190908162919.830388dc7404d1e2c80f4095@gmail.com
+Signed-off-by: Vitaly Wool <vitalywool@gmail.com>
+Reported-by: Markus Linnala <markus.linnala@gmail.com>
+Reported-by: Chris Murphy <bugzilla@colorremedies.com>
+Reported-by: Agustin Dall'Alba <agustin@dallalba.com.ar>
+Cc: "Maciej S. Szmigiero" <mail@maciej.szmigiero.name>
+Cc: Shakeel Butt <shakeelb@google.com>
+Cc: Henry Burns <henrywolfeburns@gmail.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/z3fold.c |   49 ++++++++++++++++++++++++++++++++++---------------
+ 1 file changed, 34 insertions(+), 15 deletions(-)
+
+--- a/mm/z3fold.c
++++ b/mm/z3fold.c
+@@ -368,9 +368,10 @@ static inline int __idx(struct z3fold_he
+  * Encodes the handle of a particular buddy within a z3fold page
+  * Pool lock should be held as this function accesses first_num
+  */
+-static unsigned long encode_handle(struct z3fold_header *zhdr, enum buddy bud)
++static unsigned long __encode_handle(struct z3fold_header *zhdr,
++                              struct z3fold_buddy_slots *slots,
++                              enum buddy bud)
+ {
+-      struct z3fold_buddy_slots *slots;
+       unsigned long h = (unsigned long)zhdr;
+       int idx = 0;
+@@ -387,11 +388,15 @@ static unsigned long encode_handle(struc
+       if (bud == LAST)
+               h |= (zhdr->last_chunks << BUDDY_SHIFT);
+-      slots = zhdr->slots;
+       slots->slot[idx] = h;
+       return (unsigned long)&slots->slot[idx];
+ }
++static unsigned long encode_handle(struct z3fold_header *zhdr, enum buddy bud)
++{
++      return __encode_handle(zhdr, zhdr->slots, bud);
++}
++
+ /* Returns the z3fold page where a given handle is stored */
+ static inline struct z3fold_header *handle_to_z3fold_header(unsigned long h)
+ {
+@@ -626,6 +631,7 @@ static void do_compact_page(struct z3fol
+       }
+       if (unlikely(PageIsolated(page) ||
++                   test_bit(PAGE_CLAIMED, &page->private) ||
+                    test_bit(PAGE_STALE, &page->private))) {
+               z3fold_page_unlock(zhdr);
+               return;
+@@ -1102,6 +1108,7 @@ static int z3fold_reclaim_page(struct z3
+       struct z3fold_header *zhdr = NULL;
+       struct page *page = NULL;
+       struct list_head *pos;
++      struct z3fold_buddy_slots slots;
+       unsigned long first_handle = 0, middle_handle = 0, last_handle = 0;
+       spin_lock(&pool->lock);
+@@ -1120,16 +1127,22 @@ static int z3fold_reclaim_page(struct z3
+                       /* this bit could have been set by free, in which case
+                        * we pass over to the next page in the pool.
+                        */
+-                      if (test_and_set_bit(PAGE_CLAIMED, &page->private))
++                      if (test_and_set_bit(PAGE_CLAIMED, &page->private)) {
++                              page = NULL;
+                               continue;
++                      }
+-                      if (unlikely(PageIsolated(page)))
++                      if (unlikely(PageIsolated(page))) {
++                              clear_bit(PAGE_CLAIMED, &page->private);
++                              page = NULL;
+                               continue;
++                      }
++                      zhdr = page_address(page);
+                       if (test_bit(PAGE_HEADLESS, &page->private))
+                               break;
+-                      zhdr = page_address(page);
+                       if (!z3fold_page_trylock(zhdr)) {
++                              clear_bit(PAGE_CLAIMED, &page->private);
+                               zhdr = NULL;
+                               continue; /* can't evict at this point */
+                       }
+@@ -1147,26 +1160,30 @@ static int z3fold_reclaim_page(struct z3
+               if (!test_bit(PAGE_HEADLESS, &page->private)) {
+                       /*
+-                       * We need encode the handles before unlocking, since
+-                       * we can race with free that will set
+-                       * (first|last)_chunks to 0
++                       * We need encode the handles before unlocking, and
++                       * use our local slots structure because z3fold_free
++                       * can zero out zhdr->slots and we can't do much
++                       * about that
+                        */
+                       first_handle = 0;
+                       last_handle = 0;
+                       middle_handle = 0;
+                       if (zhdr->first_chunks)
+-                              first_handle = encode_handle(zhdr, FIRST);
++                              first_handle = __encode_handle(zhdr, &slots,
++                                                              FIRST);
+                       if (zhdr->middle_chunks)
+-                              middle_handle = encode_handle(zhdr, MIDDLE);
++                              middle_handle = __encode_handle(zhdr, &slots,
++                                                              MIDDLE);
+                       if (zhdr->last_chunks)
+-                              last_handle = encode_handle(zhdr, LAST);
++                              last_handle = __encode_handle(zhdr, &slots,
++                                                              LAST);
+                       /*
+                        * it's safe to unlock here because we hold a
+                        * reference to this page
+                        */
+                       z3fold_page_unlock(zhdr);
+               } else {
+-                      first_handle = encode_handle(zhdr, HEADLESS);
++                      first_handle = __encode_handle(zhdr, &slots, HEADLESS);
+                       last_handle = middle_handle = 0;
+               }
+@@ -1196,9 +1213,9 @@ next:
+                       spin_lock(&pool->lock);
+                       list_add(&page->lru, &pool->lru);
+                       spin_unlock(&pool->lock);
++                      clear_bit(PAGE_CLAIMED, &page->private);
+               } else {
+                       z3fold_page_lock(zhdr);
+-                      clear_bit(PAGE_CLAIMED, &page->private);
+                       if (kref_put(&zhdr->refcount,
+                                       release_z3fold_page_locked)) {
+                               atomic64_dec(&pool->pages_nr);
+@@ -1213,6 +1230,7 @@ next:
+                       list_add(&page->lru, &pool->lru);
+                       spin_unlock(&pool->lock);
+                       z3fold_page_unlock(zhdr);
++                      clear_bit(PAGE_CLAIMED, &page->private);
+               }
+               /* We started off locked to we need to lock the pool back */
+@@ -1317,7 +1335,8 @@ static bool z3fold_page_isolate(struct p
+       VM_BUG_ON_PAGE(!PageMovable(page), page);
+       VM_BUG_ON_PAGE(PageIsolated(page), page);
+-      if (test_bit(PAGE_HEADLESS, &page->private))
++      if (test_bit(PAGE_HEADLESS, &page->private) ||
++          test_bit(PAGE_CLAIMED, &page->private))
+               return false;
+       zhdr = page_address(page);