]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
fixes for 5.4
authorSasha Levin <sashal@kernel.org>
Tue, 3 Mar 2020 21:02:38 +0000 (16:02 -0500)
committerSasha Levin <sashal@kernel.org>
Tue, 3 Mar 2020 21:03:25 +0000 (16:03 -0500)
Signed-off-by: Sasha Levin <sashal@kernel.org>
queue-5.4/blktrace-protect-q-blk_trace-with-rcu.patch [new file with mode: 0644]
queue-5.4/series

diff --git a/queue-5.4/blktrace-protect-q-blk_trace-with-rcu.patch b/queue-5.4/blktrace-protect-q-blk_trace-with-rcu.patch
new file mode 100644 (file)
index 0000000..34846e1
--- /dev/null
@@ -0,0 +1,432 @@
+From 310642a760f309f54f3afbd6cc95fe4cabaf8d94 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 6 Feb 2020 15:28:12 +0100
+Subject: blktrace: Protect q->blk_trace with RCU
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit c780e86dd48ef6467a1146cf7d0fe1e05a635039 ]
+
+KASAN is reporting that __blk_add_trace() has a use-after-free issue
+when accessing q->blk_trace. Indeed the switching of block tracing (and
+thus eventual freeing of q->blk_trace) is completely unsynchronized with
+the currently running tracing and thus it can happen that the blk_trace
+structure is being freed just while __blk_add_trace() works on it.
+Protect accesses to q->blk_trace by RCU during tracing and make sure we
+wait for the end of RCU grace period when shutting down tracing. Luckily
+that is rare enough event that we can afford that. Note that postponing
+the freeing of blk_trace to an RCU callback should better be avoided as
+it could have unexpected user visible side-effects as debugfs files
+would be still existing for a short while block tracing has been shut
+down.
+
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=205711
+CC: stable@vger.kernel.org
+Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
+Reviewed-by: Ming Lei <ming.lei@redhat.com>
+Tested-by: Ming Lei <ming.lei@redhat.com>
+Reviewed-by: Bart Van Assche <bvanassche@acm.org>
+Reported-by: Tristan Madani <tristmd@gmail.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/blkdev.h       |   2 +-
+ include/linux/blktrace_api.h |  18 ++++--
+ kernel/trace/blktrace.c      | 114 +++++++++++++++++++++++++----------
+ 3 files changed, 97 insertions(+), 37 deletions(-)
+
+diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
+index 51ccb4b8770ae..bff1def62eed9 100644
+--- a/include/linux/blkdev.h
++++ b/include/linux/blkdev.h
+@@ -531,7 +531,7 @@ struct request_queue {
+       unsigned int            sg_reserved_size;
+       int                     node;
+ #ifdef CONFIG_BLK_DEV_IO_TRACE
+-      struct blk_trace        *blk_trace;
++      struct blk_trace __rcu  *blk_trace;
+       struct mutex            blk_trace_mutex;
+ #endif
+       /*
+diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
+index 7bb2d8de9f308..3b6ff5902edce 100644
+--- a/include/linux/blktrace_api.h
++++ b/include/linux/blktrace_api.h
+@@ -51,9 +51,13 @@ void __trace_note_message(struct blk_trace *, struct blkcg *blkcg, const char *f
+  **/
+ #define blk_add_cgroup_trace_msg(q, cg, fmt, ...)                     \
+       do {                                                            \
+-              struct blk_trace *bt = (q)->blk_trace;                  \
++              struct blk_trace *bt;                                   \
++                                                                      \
++              rcu_read_lock();                                        \
++              bt = rcu_dereference((q)->blk_trace);                   \
+               if (unlikely(bt))                                       \
+                       __trace_note_message(bt, cg, fmt, ##__VA_ARGS__);\
++              rcu_read_unlock();                                      \
+       } while (0)
+ #define blk_add_trace_msg(q, fmt, ...)                                        \
+       blk_add_cgroup_trace_msg(q, NULL, fmt, ##__VA_ARGS__)
+@@ -61,10 +65,14 @@ void __trace_note_message(struct blk_trace *, struct blkcg *blkcg, const char *f
+ static inline bool blk_trace_note_message_enabled(struct request_queue *q)
+ {
+-      struct blk_trace *bt = q->blk_trace;
+-      if (likely(!bt))
+-              return false;
+-      return bt->act_mask & BLK_TC_NOTIFY;
++      struct blk_trace *bt;
++      bool ret;
++
++      rcu_read_lock();
++      bt = rcu_dereference(q->blk_trace);
++      ret = bt && (bt->act_mask & BLK_TC_NOTIFY);
++      rcu_read_unlock();
++      return ret;
+ }
+ extern void blk_add_driver_data(struct request_queue *q, struct request *rq,
+diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
+index 2d6e93ab04783..4b2ad374167bc 100644
+--- a/kernel/trace/blktrace.c
++++ b/kernel/trace/blktrace.c
+@@ -336,6 +336,7 @@ static void put_probe_ref(void)
+ static void blk_trace_cleanup(struct blk_trace *bt)
+ {
++      synchronize_rcu();
+       blk_trace_free(bt);
+       put_probe_ref();
+ }
+@@ -630,8 +631,10 @@ static int compat_blk_trace_setup(struct request_queue *q, char *name,
+ static int __blk_trace_startstop(struct request_queue *q, int start)
+ {
+       int ret;
+-      struct blk_trace *bt = q->blk_trace;
++      struct blk_trace *bt;
++      bt = rcu_dereference_protected(q->blk_trace,
++                                     lockdep_is_held(&q->blk_trace_mutex));
+       if (bt == NULL)
+               return -EINVAL;
+@@ -741,8 +744,8 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
+ void blk_trace_shutdown(struct request_queue *q)
+ {
+       mutex_lock(&q->blk_trace_mutex);
+-
+-      if (q->blk_trace) {
++      if (rcu_dereference_protected(q->blk_trace,
++                                    lockdep_is_held(&q->blk_trace_mutex))) {
+               __blk_trace_startstop(q, 0);
+               __blk_trace_remove(q);
+       }
+@@ -754,8 +757,10 @@ void blk_trace_shutdown(struct request_queue *q)
+ static union kernfs_node_id *
+ blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio)
+ {
+-      struct blk_trace *bt = q->blk_trace;
++      struct blk_trace *bt;
++      /* We don't use the 'bt' value here except as an optimization... */
++      bt = rcu_dereference_protected(q->blk_trace, 1);
+       if (!bt || !(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP))
+               return NULL;
+@@ -800,10 +805,14 @@ static void blk_add_trace_rq(struct request *rq, int error,
+                            unsigned int nr_bytes, u32 what,
+                            union kernfs_node_id *cgid)
+ {
+-      struct blk_trace *bt = rq->q->blk_trace;
++      struct blk_trace *bt;
+-      if (likely(!bt))
++      rcu_read_lock();
++      bt = rcu_dereference(rq->q->blk_trace);
++      if (likely(!bt)) {
++              rcu_read_unlock();
+               return;
++      }
+       if (blk_rq_is_passthrough(rq))
+               what |= BLK_TC_ACT(BLK_TC_PC);
+@@ -812,6 +821,7 @@ static void blk_add_trace_rq(struct request *rq, int error,
+       __blk_add_trace(bt, blk_rq_trace_sector(rq), nr_bytes, req_op(rq),
+                       rq->cmd_flags, what, error, 0, NULL, cgid);
++      rcu_read_unlock();
+ }
+ static void blk_add_trace_rq_insert(void *ignore,
+@@ -857,14 +867,19 @@ static void blk_add_trace_rq_complete(void *ignore, struct request *rq,
+ static void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
+                             u32 what, int error)
+ {
+-      struct blk_trace *bt = q->blk_trace;
++      struct blk_trace *bt;
+-      if (likely(!bt))
++      rcu_read_lock();
++      bt = rcu_dereference(q->blk_trace);
++      if (likely(!bt)) {
++              rcu_read_unlock();
+               return;
++      }
+       __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
+                       bio_op(bio), bio->bi_opf, what, error, 0, NULL,
+                       blk_trace_bio_get_cgid(q, bio));
++      rcu_read_unlock();
+ }
+ static void blk_add_trace_bio_bounce(void *ignore,
+@@ -909,11 +924,14 @@ static void blk_add_trace_getrq(void *ignore,
+       if (bio)
+               blk_add_trace_bio(q, bio, BLK_TA_GETRQ, 0);
+       else {
+-              struct blk_trace *bt = q->blk_trace;
++              struct blk_trace *bt;
++              rcu_read_lock();
++              bt = rcu_dereference(q->blk_trace);
+               if (bt)
+                       __blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_GETRQ, 0, 0,
+                                       NULL, NULL);
++              rcu_read_unlock();
+       }
+ }
+@@ -925,27 +943,35 @@ static void blk_add_trace_sleeprq(void *ignore,
+       if (bio)
+               blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ, 0);
+       else {
+-              struct blk_trace *bt = q->blk_trace;
++              struct blk_trace *bt;
++              rcu_read_lock();
++              bt = rcu_dereference(q->blk_trace);
+               if (bt)
+                       __blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_SLEEPRQ,
+                                       0, 0, NULL, NULL);
++              rcu_read_unlock();
+       }
+ }
+ static void blk_add_trace_plug(void *ignore, struct request_queue *q)
+ {
+-      struct blk_trace *bt = q->blk_trace;
++      struct blk_trace *bt;
++      rcu_read_lock();
++      bt = rcu_dereference(q->blk_trace);
+       if (bt)
+               __blk_add_trace(bt, 0, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL, NULL);
++      rcu_read_unlock();
+ }
+ static void blk_add_trace_unplug(void *ignore, struct request_queue *q,
+                                   unsigned int depth, bool explicit)
+ {
+-      struct blk_trace *bt = q->blk_trace;
++      struct blk_trace *bt;
++      rcu_read_lock();
++      bt = rcu_dereference(q->blk_trace);
+       if (bt) {
+               __be64 rpdu = cpu_to_be64(depth);
+               u32 what;
+@@ -957,14 +983,17 @@ static void blk_add_trace_unplug(void *ignore, struct request_queue *q,
+               __blk_add_trace(bt, 0, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu, NULL);
+       }
++      rcu_read_unlock();
+ }
+ static void blk_add_trace_split(void *ignore,
+                               struct request_queue *q, struct bio *bio,
+                               unsigned int pdu)
+ {
+-      struct blk_trace *bt = q->blk_trace;
++      struct blk_trace *bt;
++      rcu_read_lock();
++      bt = rcu_dereference(q->blk_trace);
+       if (bt) {
+               __be64 rpdu = cpu_to_be64(pdu);
+@@ -973,6 +1002,7 @@ static void blk_add_trace_split(void *ignore,
+                               BLK_TA_SPLIT, bio->bi_status, sizeof(rpdu),
+                               &rpdu, blk_trace_bio_get_cgid(q, bio));
+       }
++      rcu_read_unlock();
+ }
+ /**
+@@ -992,11 +1022,15 @@ static void blk_add_trace_bio_remap(void *ignore,
+                                   struct request_queue *q, struct bio *bio,
+                                   dev_t dev, sector_t from)
+ {
+-      struct blk_trace *bt = q->blk_trace;
++      struct blk_trace *bt;
+       struct blk_io_trace_remap r;
+-      if (likely(!bt))
++      rcu_read_lock();
++      bt = rcu_dereference(q->blk_trace);
++      if (likely(!bt)) {
++              rcu_read_unlock();
+               return;
++      }
+       r.device_from = cpu_to_be32(dev);
+       r.device_to   = cpu_to_be32(bio_dev(bio));
+@@ -1005,6 +1039,7 @@ static void blk_add_trace_bio_remap(void *ignore,
+       __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
+                       bio_op(bio), bio->bi_opf, BLK_TA_REMAP, bio->bi_status,
+                       sizeof(r), &r, blk_trace_bio_get_cgid(q, bio));
++      rcu_read_unlock();
+ }
+ /**
+@@ -1025,11 +1060,15 @@ static void blk_add_trace_rq_remap(void *ignore,
+                                  struct request *rq, dev_t dev,
+                                  sector_t from)
+ {
+-      struct blk_trace *bt = q->blk_trace;
++      struct blk_trace *bt;
+       struct blk_io_trace_remap r;
+-      if (likely(!bt))
++      rcu_read_lock();
++      bt = rcu_dereference(q->blk_trace);
++      if (likely(!bt)) {
++              rcu_read_unlock();
+               return;
++      }
+       r.device_from = cpu_to_be32(dev);
+       r.device_to   = cpu_to_be32(disk_devt(rq->rq_disk));
+@@ -1038,6 +1077,7 @@ static void blk_add_trace_rq_remap(void *ignore,
+       __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq),
+                       rq_data_dir(rq), 0, BLK_TA_REMAP, 0,
+                       sizeof(r), &r, blk_trace_request_get_cgid(q, rq));
++      rcu_read_unlock();
+ }
+ /**
+@@ -1055,14 +1095,19 @@ void blk_add_driver_data(struct request_queue *q,
+                        struct request *rq,
+                        void *data, size_t len)
+ {
+-      struct blk_trace *bt = q->blk_trace;
++      struct blk_trace *bt;
+-      if (likely(!bt))
++      rcu_read_lock();
++      bt = rcu_dereference(q->blk_trace);
++      if (likely(!bt)) {
++              rcu_read_unlock();
+               return;
++      }
+       __blk_add_trace(bt, blk_rq_trace_sector(rq), blk_rq_bytes(rq), 0, 0,
+                               BLK_TA_DRV_DATA, 0, len, data,
+                               blk_trace_request_get_cgid(q, rq));
++      rcu_read_unlock();
+ }
+ EXPORT_SYMBOL_GPL(blk_add_driver_data);
+@@ -1589,6 +1634,7 @@ static int blk_trace_remove_queue(struct request_queue *q)
+               return -EINVAL;
+       put_probe_ref();
++      synchronize_rcu();
+       blk_trace_free(bt);
+       return 0;
+ }
+@@ -1750,6 +1796,7 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
+       struct hd_struct *p = dev_to_part(dev);
+       struct request_queue *q;
+       struct block_device *bdev;
++      struct blk_trace *bt;
+       ssize_t ret = -ENXIO;
+       bdev = bdget(part_devt(p));
+@@ -1762,21 +1809,23 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
+       mutex_lock(&q->blk_trace_mutex);
++      bt = rcu_dereference_protected(q->blk_trace,
++                                     lockdep_is_held(&q->blk_trace_mutex));
+       if (attr == &dev_attr_enable) {
+-              ret = sprintf(buf, "%u\n", !!q->blk_trace);
++              ret = sprintf(buf, "%u\n", !!bt);
+               goto out_unlock_bdev;
+       }
+-      if (q->blk_trace == NULL)
++      if (bt == NULL)
+               ret = sprintf(buf, "disabled\n");
+       else if (attr == &dev_attr_act_mask)
+-              ret = blk_trace_mask2str(buf, q->blk_trace->act_mask);
++              ret = blk_trace_mask2str(buf, bt->act_mask);
+       else if (attr == &dev_attr_pid)
+-              ret = sprintf(buf, "%u\n", q->blk_trace->pid);
++              ret = sprintf(buf, "%u\n", bt->pid);
+       else if (attr == &dev_attr_start_lba)
+-              ret = sprintf(buf, "%llu\n", q->blk_trace->start_lba);
++              ret = sprintf(buf, "%llu\n", bt->start_lba);
+       else if (attr == &dev_attr_end_lba)
+-              ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba);
++              ret = sprintf(buf, "%llu\n", bt->end_lba);
+ out_unlock_bdev:
+       mutex_unlock(&q->blk_trace_mutex);
+@@ -1793,6 +1842,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
+       struct block_device *bdev;
+       struct request_queue *q;
+       struct hd_struct *p;
++      struct blk_trace *bt;
+       u64 value;
+       ssize_t ret = -EINVAL;
+@@ -1823,8 +1873,10 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
+       mutex_lock(&q->blk_trace_mutex);
++      bt = rcu_dereference_protected(q->blk_trace,
++                                     lockdep_is_held(&q->blk_trace_mutex));
+       if (attr == &dev_attr_enable) {
+-              if (!!value == !!q->blk_trace) {
++              if (!!value == !!bt) {
+                       ret = 0;
+                       goto out_unlock_bdev;
+               }
+@@ -1836,18 +1888,18 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
+       }
+       ret = 0;
+-      if (q->blk_trace == NULL)
++      if (bt == NULL)
+               ret = blk_trace_setup_queue(q, bdev);
+       if (ret == 0) {
+               if (attr == &dev_attr_act_mask)
+-                      q->blk_trace->act_mask = value;
++                      bt->act_mask = value;
+               else if (attr == &dev_attr_pid)
+-                      q->blk_trace->pid = value;
++                      bt->pid = value;
+               else if (attr == &dev_attr_start_lba)
+-                      q->blk_trace->start_lba = value;
++                      bt->start_lba = value;
+               else if (attr == &dev_attr_end_lba)
+-                      q->blk_trace->end_lba = value;
++                      bt->end_lba = value;
+       }
+ out_unlock_bdev:
+-- 
+2.20.1
+
index 048dc22c271e01768a8d510edbbfe8c1877efeff..565c272397fbd15420721283276eef1fdaa261c6 100644 (file)
@@ -148,3 +148,4 @@ mm-huge_memory.c-use-head-to-check-huge-zero-page.patch
 mm-thp-fix-defrag-setting-if-newline-is-not-used.patch
 kvm-nvmx-vmwrite-checks-vmcs-link-pointer-before-vmcs-field.patch
 kvm-nvmx-vmwrite-checks-unsupported-field-before-read-only-field.patch
+blktrace-protect-q-blk_trace-with-rcu.patch