From: Greg Kroah-Hartman Date: Fri, 3 Nov 2017 09:41:13 +0000 (+0100) Subject: 3.18-stable patches X-Git-Tag: v3.18.80~24 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=f3648d661c417c56746f1b8b20d99775b4e8e238;p=thirdparty%2Fkernel%2Fstable-queue.git 3.18-stable patches added patches: blk-mq-fix-race-between-timeout-and-freeing-request.patch --- diff --git a/queue-3.18/blk-mq-fix-race-between-timeout-and-freeing-request.patch b/queue-3.18/blk-mq-fix-race-between-timeout-and-freeing-request.patch new file mode 100644 index 00000000000..e5f29faa78e --- /dev/null +++ b/queue-3.18/blk-mq-fix-race-between-timeout-and-freeing-request.patch @@ -0,0 +1,217 @@ +From 0048b4837affd153897ed1222283492070027aa9 Mon Sep 17 00:00:00 2001 +From: Ming Lei +Date: Sun, 9 Aug 2015 03:41:51 -0400 +Subject: blk-mq: fix race between timeout and freeing request + +From: Ming Lei + +commit 0048b4837affd153897ed1222283492070027aa9 upstream. + +Inside timeout handler, blk_mq_tag_to_rq() is called +to retrieve the request from one tag. This way is obviously +wrong because the request can be freed any time and some +fiedds of the request can't be trusted, then kernel oops +might be triggered[1]. + +Currently wrt. blk_mq_tag_to_rq(), the only special case is +that the flush request can share same tag with the request +cloned from, and the two requests can't be active at the same +time, so this patch fixes the above issue by updating tags->rqs[tag] +with the active request(either flush rq or the request cloned +from) of the tag. + +Also blk_mq_tag_to_rq() gets much simplified with this patch. + +Given blk_mq_tag_to_rq() is mainly for drivers and the caller must +make sure the request can't be freed, so in bt_for_each() this +helper is replaced with tags->rqs[tag]. + +[1] kernel oops log +[ 439.696220] BUG: unable to handle kernel NULL pointer dereference at 0000000000000158^M +[ 439.697162] IP: [] blk_mq_tag_to_rq+0x21/0x6e^M +[ 439.700653] PGD 7ef765067 PUD 7ef764067 PMD 0 ^M +[ 439.700653] Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC ^M +[ 439.700653] Dumping ftrace buffer:^M +[ 439.700653] (ftrace buffer empty)^M +[ 439.700653] Modules linked in: nbd ipv6 kvm_intel kvm serio_raw^M +[ 439.700653] CPU: 6 PID: 2779 Comm: stress-ng-sigfd Not tainted 4.2.0-rc5-next-20150805+ #265^M +[ 439.730500] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011^M +[ 439.730500] task: ffff880605308000 ti: ffff88060530c000 task.ti: ffff88060530c000^M +[ 439.730500] RIP: 0010:[] [] blk_mq_tag_to_rq+0x21/0x6e^M +[ 439.730500] RSP: 0018:ffff880819203da0 EFLAGS: 00010283^M +[ 439.730500] RAX: ffff880811b0e000 RBX: ffff8800bb465f00 RCX: 0000000000000002^M +[ 439.730500] RDX: 0000000000000000 RSI: 0000000000000202 RDI: 0000000000000000^M +[ 439.730500] RBP: ffff880819203db0 R08: 0000000000000002 R09: 0000000000000000^M +[ 439.730500] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000202^M +[ 439.730500] R13: ffff880814104800 R14: 0000000000000002 R15: ffff880811a2ea00^M +[ 439.730500] FS: 00007f165b3f5740(0000) GS:ffff880819200000(0000) knlGS:0000000000000000^M +[ 439.730500] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b^M +[ 439.730500] CR2: 0000000000000158 CR3: 00000007ef766000 CR4: 00000000000006e0^M +[ 439.730500] Stack:^M +[ 439.730500] 0000000000000008 ffff8808114eed90 ffff880819203e00 ffffffff812dc104^M +[ 439.755663] ffff880819203e40 ffffffff812d9f5e 0000020000000000 ffff8808114eed80^M +[ 439.755663] Call Trace:^M +[ 439.755663] ^M +[ 439.755663] [] bt_for_each+0x6e/0xc8^M +[ 439.755663] [] ? blk_mq_rq_timed_out+0x6a/0x6a^M +[ 439.755663] [] ? blk_mq_rq_timed_out+0x6a/0x6a^M +[ 439.755663] [] blk_mq_tag_busy_iter+0x55/0x5e^M +[ 439.755663] [] ? blk_mq_bio_to_request+0x38/0x38^M +[ 439.755663] [] blk_mq_rq_timer+0x5d/0xd4^M +[ 439.755663] [] call_timer_fn+0xf7/0x284^M +[ 439.755663] [] ? call_timer_fn+0x5/0x284^M +[ 439.755663] [] ? blk_mq_bio_to_request+0x38/0x38^M +[ 439.755663] [] run_timer_softirq+0x1ce/0x1f8^M +[ 439.755663] [] __do_softirq+0x181/0x3a4^M +[ 439.755663] [] irq_exit+0x40/0x94^M +[ 439.755663] [] smp_apic_timer_interrupt+0x33/0x3e^M +[ 439.755663] [] apic_timer_interrupt+0x84/0x90^M +[ 439.755663] ^M +[ 439.755663] [] ? _raw_spin_unlock_irq+0x32/0x4a^M +[ 439.755663] [] finish_task_switch+0xe0/0x163^M +[ 439.755663] [] ? finish_task_switch+0xa2/0x163^M +[ 439.755663] [] __schedule+0x469/0x6cd^M +[ 439.755663] [] schedule+0x82/0x9a^M +[ 439.789267] [] signalfd_read+0x186/0x49a^M +[ 439.790911] [] ? wake_up_q+0x47/0x47^M +[ 439.790911] [] __vfs_read+0x28/0x9f^M +[ 439.790911] [] ? __fget_light+0x4d/0x74^M +[ 439.790911] [] vfs_read+0x7a/0xc6^M +[ 439.790911] [] SyS_read+0x49/0x7f^M +[ 439.790911] [] entry_SYSCALL_64_fastpath+0x12/0x6f^M +[ 439.790911] Code: 48 89 e5 e8 a9 b8 e7 ff 5d c3 0f 1f 44 00 00 55 89 +f2 48 89 e5 41 54 41 89 f4 53 48 8b 47 60 48 8b 1c d0 48 8b 7b 30 48 8b +53 38 <48> 8b 87 58 01 00 00 48 85 c0 75 09 48 8b 97 88 0c 00 00 eb 10 +^M +[ 439.790911] RIP [] blk_mq_tag_to_rq+0x21/0x6e^M +[ 439.790911] RSP ^M +[ 439.790911] CR2: 0000000000000158^M +[ 439.790911] ---[ end trace d40af58949325661 ]---^M + +Signed-off-by: Ming Lei +Signed-off-by: Jens Axboe +Signed-off-by: Dmitry Shmidt +Signed-off-by: Greg Kroah-Hartman +--- + block/blk-flush.c | 15 ++++++++++++++- + block/blk-mq-tag.c | 2 +- + block/blk-mq-tag.h | 12 ++++++++++++ + block/blk-mq.c | 16 +--------------- + block/blk.h | 6 ++++++ + 5 files changed, 34 insertions(+), 17 deletions(-) + +--- a/block/blk-flush.c ++++ b/block/blk-flush.c +@@ -73,6 +73,7 @@ + + #include "blk.h" + #include "blk-mq.h" ++#include "blk-mq-tag.h" + + /* FLUSH/FUA sequences */ + enum { +@@ -226,7 +227,12 @@ static void flush_end_io(struct request + struct blk_flush_queue *fq = blk_get_flush_queue(q, flush_rq->mq_ctx); + + if (q->mq_ops) { ++ struct blk_mq_hw_ctx *hctx; ++ ++ /* release the tag's ownership to the req cloned from */ + spin_lock_irqsave(&fq->mq_flush_lock, flags); ++ hctx = q->mq_ops->map_queue(q, flush_rq->mq_ctx->cpu); ++ blk_mq_tag_set_rq(hctx, flush_rq->tag, fq->orig_rq); + flush_rq->tag = -1; + } + +@@ -308,11 +314,18 @@ static bool blk_kick_flush(struct reques + + /* + * Borrow tag from the first request since they can't +- * be in flight at the same time. ++ * be in flight at the same time. And acquire the tag's ++ * ownership for flush req. + */ + if (q->mq_ops) { ++ struct blk_mq_hw_ctx *hctx; ++ + flush_rq->mq_ctx = first_rq->mq_ctx; + flush_rq->tag = first_rq->tag; ++ fq->orig_rq = first_rq; ++ ++ hctx = q->mq_ops->map_queue(q, first_rq->mq_ctx->cpu); ++ blk_mq_tag_set_rq(hctx, first_rq->tag, flush_rq); + } + + flush_rq->cmd_type = REQ_TYPE_FS; +--- a/block/blk-mq-tag.c ++++ b/block/blk-mq-tag.c +@@ -403,7 +403,7 @@ static void bt_for_each(struct blk_mq_hw + for (bit = find_first_bit(&bm->word, bm->depth); + bit < bm->depth; + bit = find_next_bit(&bm->word, bm->depth, bit + 1)) { +- rq = blk_mq_tag_to_rq(hctx->tags, off + bit); ++ rq = hctx->tags->rqs[off + bit]; + if (rq->q == hctx->queue) + fn(hctx, rq, data, reserved); + } +--- a/block/blk-mq-tag.h ++++ b/block/blk-mq-tag.h +@@ -85,4 +85,16 @@ static inline void blk_mq_tag_idle(struc + __blk_mq_tag_idle(hctx); + } + ++/* ++ * This helper should only be used for flush request to share tag ++ * with the request cloned from, and both the two requests can't be ++ * in flight at the same time. The caller has to make sure the tag ++ * can't be freed. ++ */ ++static inline void blk_mq_tag_set_rq(struct blk_mq_hw_ctx *hctx, ++ unsigned int tag, struct request *rq) ++{ ++ hctx->tags->rqs[tag] = rq; ++} ++ + #endif +--- a/block/blk-mq.c ++++ b/block/blk-mq.c +@@ -498,23 +498,9 @@ void blk_mq_kick_requeue_list(struct req + } + EXPORT_SYMBOL(blk_mq_kick_requeue_list); + +-static inline bool is_flush_request(struct request *rq, +- struct blk_flush_queue *fq, unsigned int tag) +-{ +- return ((rq->cmd_flags & REQ_FLUSH_SEQ) && +- fq->flush_rq->tag == tag); +-} +- + struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag) + { +- struct request *rq = tags->rqs[tag]; +- /* mq_ctx of flush rq is always cloned from the corresponding req */ +- struct blk_flush_queue *fq = blk_get_flush_queue(rq->q, rq->mq_ctx); +- +- if (!is_flush_request(rq, fq, tag)) +- return rq; +- +- return fq->flush_rq; ++ return tags->rqs[tag]; + } + EXPORT_SYMBOL(blk_mq_tag_to_rq); + +--- a/block/blk.h ++++ b/block/blk.h +@@ -22,6 +22,12 @@ struct blk_flush_queue { + struct list_head flush_queue[2]; + struct list_head flush_data_in_flight; + struct request *flush_rq; ++ ++ /* ++ * flush_rq shares tag with this rq, both can't be active ++ * at the same time ++ */ ++ struct request *orig_rq; + spinlock_t mq_flush_lock; + }; + diff --git a/queue-3.18/series b/queue-3.18/series new file mode 100644 index 00000000000..32e83a66613 --- /dev/null +++ b/queue-3.18/series @@ -0,0 +1 @@ +blk-mq-fix-race-between-timeout-and-freeing-request.patch