]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.18-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 3 Nov 2017 09:41:13 +0000 (10:41 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 3 Nov 2017 09:41:13 +0000 (10:41 +0100)
added patches:
blk-mq-fix-race-between-timeout-and-freeing-request.patch

queue-3.18/blk-mq-fix-race-between-timeout-and-freeing-request.patch [new file with mode: 0644]
queue-3.18/series [new file with mode: 0644]

diff --git a/queue-3.18/blk-mq-fix-race-between-timeout-and-freeing-request.patch b/queue-3.18/blk-mq-fix-race-between-timeout-and-freeing-request.patch
new file mode 100644 (file)
index 0000000..e5f29fa
--- /dev/null
@@ -0,0 +1,217 @@
+From 0048b4837affd153897ed1222283492070027aa9 Mon Sep 17 00:00:00 2001
+From: Ming Lei <ming.lei@canonical.com>
+Date: Sun, 9 Aug 2015 03:41:51 -0400
+Subject: blk-mq: fix race between timeout and freeing request
+
+From: Ming Lei <ming.lei@canonical.com>
+
+commit 0048b4837affd153897ed1222283492070027aa9 upstream.
+
+Inside timeout handler, blk_mq_tag_to_rq() is called
+to retrieve the request from one tag. This way is obviously
+wrong because the request can be freed any time and some
+fiedds of the request can't be trusted, then kernel oops
+might be triggered[1].
+
+Currently wrt. blk_mq_tag_to_rq(), the only special case is
+that the flush request can share same tag with the request
+cloned from, and the two requests can't be active at the same
+time, so this patch fixes the above issue by updating tags->rqs[tag]
+with the active request(either flush rq or the request cloned
+from) of the tag.
+
+Also blk_mq_tag_to_rq() gets much simplified with this patch.
+
+Given blk_mq_tag_to_rq() is mainly for drivers and the caller must
+make sure the request can't be freed, so in bt_for_each() this
+helper is replaced with tags->rqs[tag].
+
+[1] kernel oops log
+[  439.696220] BUG: unable to handle kernel NULL pointer dereference at 0000000000000158^M
+[  439.697162] IP: [<ffffffff812d89ba>] blk_mq_tag_to_rq+0x21/0x6e^M
+[  439.700653] PGD 7ef765067 PUD 7ef764067 PMD 0 ^M
+[  439.700653] Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC ^M
+[  439.700653] Dumping ftrace buffer:^M
+[  439.700653]    (ftrace buffer empty)^M
+[  439.700653] Modules linked in: nbd ipv6 kvm_intel kvm serio_raw^M
+[  439.700653] CPU: 6 PID: 2779 Comm: stress-ng-sigfd Not tainted 4.2.0-rc5-next-20150805+ #265^M
+[  439.730500] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011^M
+[  439.730500] task: ffff880605308000 ti: ffff88060530c000 task.ti: ffff88060530c000^M
+[  439.730500] RIP: 0010:[<ffffffff812d89ba>]  [<ffffffff812d89ba>] blk_mq_tag_to_rq+0x21/0x6e^M
+[  439.730500] RSP: 0018:ffff880819203da0  EFLAGS: 00010283^M
+[  439.730500] RAX: ffff880811b0e000 RBX: ffff8800bb465f00 RCX: 0000000000000002^M
+[  439.730500] RDX: 0000000000000000 RSI: 0000000000000202 RDI: 0000000000000000^M
+[  439.730500] RBP: ffff880819203db0 R08: 0000000000000002 R09: 0000000000000000^M
+[  439.730500] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000202^M
+[  439.730500] R13: ffff880814104800 R14: 0000000000000002 R15: ffff880811a2ea00^M
+[  439.730500] FS:  00007f165b3f5740(0000) GS:ffff880819200000(0000) knlGS:0000000000000000^M
+[  439.730500] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b^M
+[  439.730500] CR2: 0000000000000158 CR3: 00000007ef766000 CR4: 00000000000006e0^M
+[  439.730500] Stack:^M
+[  439.730500]  0000000000000008 ffff8808114eed90 ffff880819203e00 ffffffff812dc104^M
+[  439.755663]  ffff880819203e40 ffffffff812d9f5e 0000020000000000 ffff8808114eed80^M
+[  439.755663] Call Trace:^M
+[  439.755663]  <IRQ> ^M
+[  439.755663]  [<ffffffff812dc104>] bt_for_each+0x6e/0xc8^M
+[  439.755663]  [<ffffffff812d9f5e>] ? blk_mq_rq_timed_out+0x6a/0x6a^M
+[  439.755663]  [<ffffffff812d9f5e>] ? blk_mq_rq_timed_out+0x6a/0x6a^M
+[  439.755663]  [<ffffffff812dc1b3>] blk_mq_tag_busy_iter+0x55/0x5e^M
+[  439.755663]  [<ffffffff812d88b4>] ? blk_mq_bio_to_request+0x38/0x38^M
+[  439.755663]  [<ffffffff812d8911>] blk_mq_rq_timer+0x5d/0xd4^M
+[  439.755663]  [<ffffffff810a3e10>] call_timer_fn+0xf7/0x284^M
+[  439.755663]  [<ffffffff810a3d1e>] ? call_timer_fn+0x5/0x284^M
+[  439.755663]  [<ffffffff812d88b4>] ? blk_mq_bio_to_request+0x38/0x38^M
+[  439.755663]  [<ffffffff810a46d6>] run_timer_softirq+0x1ce/0x1f8^M
+[  439.755663]  [<ffffffff8104c367>] __do_softirq+0x181/0x3a4^M
+[  439.755663]  [<ffffffff8104c76e>] irq_exit+0x40/0x94^M
+[  439.755663]  [<ffffffff81031482>] smp_apic_timer_interrupt+0x33/0x3e^M
+[  439.755663]  [<ffffffff815559a4>] apic_timer_interrupt+0x84/0x90^M
+[  439.755663]  <EOI> ^M
+[  439.755663]  [<ffffffff81554350>] ? _raw_spin_unlock_irq+0x32/0x4a^M
+[  439.755663]  [<ffffffff8106a98b>] finish_task_switch+0xe0/0x163^M
+[  439.755663]  [<ffffffff8106a94d>] ? finish_task_switch+0xa2/0x163^M
+[  439.755663]  [<ffffffff81550066>] __schedule+0x469/0x6cd^M
+[  439.755663]  [<ffffffff8155039b>] schedule+0x82/0x9a^M
+[  439.789267]  [<ffffffff8119b28b>] signalfd_read+0x186/0x49a^M
+[  439.790911]  [<ffffffff8106d86a>] ? wake_up_q+0x47/0x47^M
+[  439.790911]  [<ffffffff811618c2>] __vfs_read+0x28/0x9f^M
+[  439.790911]  [<ffffffff8117a289>] ? __fget_light+0x4d/0x74^M
+[  439.790911]  [<ffffffff811620a7>] vfs_read+0x7a/0xc6^M
+[  439.790911]  [<ffffffff8116292b>] SyS_read+0x49/0x7f^M
+[  439.790911]  [<ffffffff81554c17>] entry_SYSCALL_64_fastpath+0x12/0x6f^M
+[  439.790911] Code: 48 89 e5 e8 a9 b8 e7 ff 5d c3 0f 1f 44 00 00 55 89
+f2 48 89 e5 41 54 41 89 f4 53 48 8b 47 60 48 8b 1c d0 48 8b 7b 30 48 8b
+53 38 <48> 8b 87 58 01 00 00 48 85 c0 75 09 48 8b 97 88 0c 00 00 eb 10
+^M
+[  439.790911] RIP  [<ffffffff812d89ba>] blk_mq_tag_to_rq+0x21/0x6e^M
+[  439.790911]  RSP <ffff880819203da0>^M
+[  439.790911] CR2: 0000000000000158^M
+[  439.790911] ---[ end trace d40af58949325661 ]---^M
+
+Signed-off-by: Ming Lei <ming.lei@canonical.com>
+Signed-off-by: Jens Axboe <axboe@fb.com>
+Signed-off-by: Dmitry Shmidt <dimitrysh@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ block/blk-flush.c  |   15 ++++++++++++++-
+ block/blk-mq-tag.c |    2 +-
+ block/blk-mq-tag.h |   12 ++++++++++++
+ block/blk-mq.c     |   16 +---------------
+ block/blk.h        |    6 ++++++
+ 5 files changed, 34 insertions(+), 17 deletions(-)
+
+--- a/block/blk-flush.c
++++ b/block/blk-flush.c
+@@ -73,6 +73,7 @@
+ #include "blk.h"
+ #include "blk-mq.h"
++#include "blk-mq-tag.h"
+ /* FLUSH/FUA sequences */
+ enum {
+@@ -226,7 +227,12 @@ static void flush_end_io(struct request
+       struct blk_flush_queue *fq = blk_get_flush_queue(q, flush_rq->mq_ctx);
+       if (q->mq_ops) {
++              struct blk_mq_hw_ctx *hctx;
++
++              /* release the tag's ownership to the req cloned from */
+               spin_lock_irqsave(&fq->mq_flush_lock, flags);
++              hctx = q->mq_ops->map_queue(q, flush_rq->mq_ctx->cpu);
++              blk_mq_tag_set_rq(hctx, flush_rq->tag, fq->orig_rq);
+               flush_rq->tag = -1;
+       }
+@@ -308,11 +314,18 @@ static bool blk_kick_flush(struct reques
+       /*
+        * Borrow tag from the first request since they can't
+-       * be in flight at the same time.
++       * be in flight at the same time. And acquire the tag's
++       * ownership for flush req.
+        */
+       if (q->mq_ops) {
++              struct blk_mq_hw_ctx *hctx;
++
+               flush_rq->mq_ctx = first_rq->mq_ctx;
+               flush_rq->tag = first_rq->tag;
++              fq->orig_rq = first_rq;
++
++              hctx = q->mq_ops->map_queue(q, first_rq->mq_ctx->cpu);
++              blk_mq_tag_set_rq(hctx, first_rq->tag, flush_rq);
+       }
+       flush_rq->cmd_type = REQ_TYPE_FS;
+--- a/block/blk-mq-tag.c
++++ b/block/blk-mq-tag.c
+@@ -403,7 +403,7 @@ static void bt_for_each(struct blk_mq_hw
+               for (bit = find_first_bit(&bm->word, bm->depth);
+                    bit < bm->depth;
+                    bit = find_next_bit(&bm->word, bm->depth, bit + 1)) {
+-                      rq = blk_mq_tag_to_rq(hctx->tags, off + bit);
++                      rq = hctx->tags->rqs[off + bit];
+                       if (rq->q == hctx->queue)
+                               fn(hctx, rq, data, reserved);
+               }
+--- a/block/blk-mq-tag.h
++++ b/block/blk-mq-tag.h
+@@ -85,4 +85,16 @@ static inline void blk_mq_tag_idle(struc
+       __blk_mq_tag_idle(hctx);
+ }
++/*
++ * This helper should only be used for flush request to share tag
++ * with the request cloned from, and both the two requests can't be
++ * in flight at the same time. The caller has to make sure the tag
++ * can't be freed.
++ */
++static inline void blk_mq_tag_set_rq(struct blk_mq_hw_ctx *hctx,
++              unsigned int tag, struct request *rq)
++{
++      hctx->tags->rqs[tag] = rq;
++}
++
+ #endif
+--- a/block/blk-mq.c
++++ b/block/blk-mq.c
+@@ -498,23 +498,9 @@ void blk_mq_kick_requeue_list(struct req
+ }
+ EXPORT_SYMBOL(blk_mq_kick_requeue_list);
+-static inline bool is_flush_request(struct request *rq,
+-              struct blk_flush_queue *fq, unsigned int tag)
+-{
+-      return ((rq->cmd_flags & REQ_FLUSH_SEQ) &&
+-                      fq->flush_rq->tag == tag);
+-}
+-
+ struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
+ {
+-      struct request *rq = tags->rqs[tag];
+-      /* mq_ctx of flush rq is always cloned from the corresponding req */
+-      struct blk_flush_queue *fq = blk_get_flush_queue(rq->q, rq->mq_ctx);
+-
+-      if (!is_flush_request(rq, fq, tag))
+-              return rq;
+-
+-      return fq->flush_rq;
++      return tags->rqs[tag];
+ }
+ EXPORT_SYMBOL(blk_mq_tag_to_rq);
+--- a/block/blk.h
++++ b/block/blk.h
+@@ -22,6 +22,12 @@ struct blk_flush_queue {
+       struct list_head        flush_queue[2];
+       struct list_head        flush_data_in_flight;
+       struct request          *flush_rq;
++
++      /*
++       * flush_rq shares tag with this rq, both can't be active
++       * at the same time
++       */
++      struct request          *orig_rq;
+       spinlock_t              mq_flush_lock;
+ };
diff --git a/queue-3.18/series b/queue-3.18/series
new file mode 100644 (file)
index 0000000..32e83a6
--- /dev/null
@@ -0,0 +1 @@
+blk-mq-fix-race-between-timeout-and-freeing-request.patch