releases/4.19.31/soc-qcom-rpmh-avoid-accessing-freed-memory-from-batch-api.patch

   1 From baef1c90aac7e5bf13f0360a3b334825a23d31a1 Mon Sep 17 00:00:00 2001
   2 From: Stephen Boyd <swboyd@chromium.org>
   3 Date: Tue, 15 Jan 2019 14:54:47 -0800
   4 Subject: soc: qcom: rpmh: Avoid accessing freed memory from batch API
   5
   6 From: Stephen Boyd <swboyd@chromium.org>
   7
   8 commit baef1c90aac7e5bf13f0360a3b334825a23d31a1 upstream.
   9
  10 Using the batch API from the interconnect driver sometimes leads to a
  11 KASAN error due to an access to freed memory. This is easier to trigger
  12 with threadirqs on the kernel commandline.
  13
  14  BUG: KASAN: use-after-free in rpmh_tx_done+0x114/0x12c
  15  Read of size 1 at addr fffffff51414ad84 by task irq/110-apps_rs/57
  16
  17  CPU: 0 PID: 57 Comm: irq/110-apps_rs Tainted: G        W         4.19.10 #72
  18  Call trace:
  19   dump_backtrace+0x0/0x2f8
  20   show_stack+0x20/0x2c
  21   __dump_stack+0x20/0x28
  22   dump_stack+0xcc/0x10c
  23   print_address_description+0x74/0x240
  24   kasan_report+0x250/0x26c
  25   __asan_report_load1_noabort+0x20/0x2c
  26   rpmh_tx_done+0x114/0x12c
  27   tcs_tx_done+0x450/0x768
  28   irq_forced_thread_fn+0x58/0x9c
  29   irq_thread+0x120/0x1dc
  30   kthread+0x248/0x260
  31   ret_from_fork+0x10/0x18
  32
  33  Allocated by task 385:
  34   kasan_kmalloc+0xac/0x148
  35   __kmalloc+0x170/0x1e4
  36   rpmh_write_batch+0x174/0x540
  37   qcom_icc_set+0x8dc/0x9ac
  38   icc_set+0x288/0x2e8
  39   a6xx_gmu_stop+0x320/0x3c0
  40   a6xx_pm_suspend+0x108/0x124
  41   adreno_suspend+0x50/0x60
  42   pm_generic_runtime_suspend+0x60/0x78
  43   __rpm_callback+0x214/0x32c
  44   rpm_callback+0x54/0x184
  45   rpm_suspend+0x3f8/0xa90
  46   pm_runtime_work+0xb4/0x178
  47   process_one_work+0x544/0xbc0
  48   worker_thread+0x514/0x7d0
  49   kthread+0x248/0x260
  50   ret_from_fork+0x10/0x18
  51
  52  Freed by task 385:
  53   __kasan_slab_free+0x12c/0x1e0
  54   kasan_slab_free+0x10/0x1c
  55   kfree+0x134/0x588
  56   rpmh_write_batch+0x49c/0x540
  57   qcom_icc_set+0x8dc/0x9ac
  58   icc_set+0x288/0x2e8
  59   a6xx_gmu_stop+0x320/0x3c0
  60   a6xx_pm_suspend+0x108/0x124
  61   adreno_suspend+0x50/0x60
  62  cr50_spi spi5.0: SPI transfer timed out
  63   pm_generic_runtime_suspend+0x60/0x78
  64   __rpm_callback+0x214/0x32c
  65   rpm_callback+0x54/0x184
  66   rpm_suspend+0x3f8/0xa90
  67   pm_runtime_work+0xb4/0x178
  68   process_one_work+0x544/0xbc0
  69   worker_thread+0x514/0x7d0
  70   kthread+0x248/0x260
  71   ret_from_fork+0x10/0x18
  72
  73  The buggy address belongs to the object at fffffff51414ac80
  74   which belongs to the cache kmalloc-512 of size 512
  75  The buggy address is located 260 bytes inside of
  76   512-byte region [fffffff51414ac80, fffffff51414ae80)
  77  The buggy address belongs to the page:
  78  page:ffffffbfd4505200 count:1 mapcount:0 mapping:fffffff51e00c680 index:0x0 compound_mapcount: 0
  79  flags: 0x4000000000008100(slab|head)
  80  raw: 4000000000008100 ffffffbfd4529008 ffffffbfd44f9208 fffffff51e00c680
  81  raw: 0000000000000000 0000000000200020 00000001ffffffff 0000000000000000
  82  page dumped because: kasan: bad access detected
  83
  84  Memory state around the buggy address:
  85   fffffff51414ac80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
  86   fffffff51414ad00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
  87  >fffffff51414ad80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
  88                     ^
  89   fffffff51414ae00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
  90   fffffff51414ae80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
  91
  92 The batch API sets the same completion for each rpmh message that's sent
  93 and then loops through all the messages and waits for that single
  94 completion declared on the stack to be completed before returning from
  95 the function and freeing the message structures. Unfortunately, some
  96 messages may still be in process and 'stuck' in the TCS. At some later
  97 point, the tcs_tx_done() interrupt will run and try to process messages
  98 that have already been freed at the end of rpmh_write_batch(). This will
  99 in turn access the 'needs_free' member of the rpmh_request structure and
 100 cause KASAN to complain. Furthermore, if there's a message that's
 101 completed in rpmh_tx_done() and freed immediately after the complete()
 102 call is made we'll be racing with potentially freed memory when
 103 accessing the 'needs_free' member:
 104
 105         CPU0                         CPU1
 106         ----                         ----
 107         rpmh_tx_done()
 108          complete(&compl)
 109                                      wait_for_completion(&compl)
 110                                      kfree(rpm_msg)
 111          if (rpm_msg->needs_free)
 112          <KASAN warning splat>
 113
 114 Let's fix this by allocating a chunk of completions for each message and
 115 waiting for all of them to be completed before returning from the batch
 116 API. Alternatively, we could wait for the last message in the batch, but
 117 that may be a more complicated change because it looks like
 118 tcs_tx_done() just iterates through the indices of the queue and
 119 completes each message instead of tracking the last inserted message and
 120 completing that first.
 121
 122 Fixes: c8790cb6da58 ("drivers: qcom: rpmh: add support for batch RPMH request")
 123 Cc: Lina Iyer <ilina@codeaurora.org>
 124 Cc: "Raju P.L.S.S.S.N" <rplsssn@codeaurora.org>
 125 Cc: Matthias Kaehlcke <mka@chromium.org>
 126 Cc: Evan Green <evgreen@chromium.org>
 127 Cc: stable@vger.kernel.org
 128 Reviewed-by: Lina Iyer <ilina@codeaurora.org>
 129 Reviewed-by: Evan Green <evgreen@chromium.org>
 130 Signed-off-by: Stephen Boyd <swboyd@chromium.org>
 131 Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
 132 Signed-off-by: Andy Gross <andy.gross@linaro.org>
 133 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
 134
 135 ---
 136  drivers/soc/qcom/rpmh.c |   34 +++++++++++++++++++++-------------
 137  1 file changed, 21 insertions(+), 13 deletions(-)
 138
 139 --- a/drivers/soc/qcom/rpmh.c
 140 +++ b/drivers/soc/qcom/rpmh.c
 141 @@ -80,6 +80,7 @@ void rpmh_tx_done(const struct tcs_reque
 142         struct rpmh_request *rpm_msg = container_of(msg, struct rpmh_request,
 143                                                     msg);
 144         struct completion *compl = rpm_msg->completion;
 145 +       bool free = rpm_msg->needs_free;
 146
 147         rpm_msg->err = r;
 148
 149 @@ -94,7 +95,7 @@ void rpmh_tx_done(const struct tcs_reque
 150         complete(compl);
 151
 152  exit:
 153 -       if (rpm_msg->needs_free)
 154 +       if (free)
 155                 kfree(rpm_msg);
 156  }
 157
 158 @@ -348,11 +349,12 @@ int rpmh_write_batch(const struct device
 159  {
 160         struct batch_cache_req *req;
 161         struct rpmh_request *rpm_msgs;
 162 -       DECLARE_COMPLETION_ONSTACK(compl);
 163 +       struct completion *compls;
 164         struct rpmh_ctrlr *ctrlr = get_rpmh_ctrlr(dev);
 165         unsigned long time_left;
 166         int count = 0;
 167 -       int ret, i, j;
 168 +       int ret, i;
 169 +       void *ptr;
 170
 171         if (!cmd || !n)
 172                 return -EINVAL;
 173 @@ -362,10 +364,15 @@ int rpmh_write_batch(const struct device
 174         if (!count)
 175                 return -EINVAL;
 176
 177 -       req = kzalloc(sizeof(*req) + count * sizeof(req->rpm_msgs[0]),
 178 +       ptr = kzalloc(sizeof(*req) +
 179 +                     count * (sizeof(req->rpm_msgs[0]) + sizeof(*compls)),
 180                       GFP_ATOMIC);
 181 -       if (!req)
 182 +       if (!ptr)
 183                 return -ENOMEM;
 184 +
 185 +       req = ptr;
 186 +       compls = ptr + sizeof(*req) + count * sizeof(*rpm_msgs);
 187 +
 188         req->count = count;
 189         rpm_msgs = req->rpm_msgs;
 190
 191 @@ -380,25 +387,26 @@ int rpmh_write_batch(const struct device
 192         }
 193
 194         for (i = 0; i < count; i++) {
 195 -               rpm_msgs[i].completion = &compl;
 196 +               struct completion *compl = &compls[i];
 197 +
 198 +               init_completion(compl);
 199 +               rpm_msgs[i].completion = compl;
 200                 ret = rpmh_rsc_send_data(ctrlr_to_drv(ctrlr), &rpm_msgs[i].msg);
 201                 if (ret) {
 202                         pr_err("Error(%d) sending RPMH message addr=%#x\n",
 203                                ret, rpm_msgs[i].msg.cmds[0].addr);
 204 -                       for (j = i; j < count; j++)
 205 -                               rpmh_tx_done(&rpm_msgs[j].msg, ret);
 206                         break;
 207                 }
 208         }
 209
 210         time_left = RPMH_TIMEOUT_MS;
 211 -       for (i = 0; i < count; i++) {
 212 -               time_left = wait_for_completion_timeout(&compl, time_left);
 213 +       while (i--) {
 214 +               time_left = wait_for_completion_timeout(&compls[i], time_left);
 215                 if (!time_left) {
 216                         /*
 217                          * Better hope they never finish because they'll signal
 218 -                        * the completion on our stack and that's bad once
 219 -                        * we've returned from the function.
 220 +                        * the completion that we're going to free once
 221 +                        * we've returned from this function.
 222                          */
 223                         WARN_ON(1);
 224                         ret = -ETIMEDOUT;
 225 @@ -407,7 +415,7 @@ int rpmh_write_batch(const struct device
 226         }
 227
 228  exit:
 229 -       kfree(req);
 230 +       kfree(ptr);
 231
 232         return ret;
 233  }