]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.11-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 10 May 2021 08:56:28 +0000 (10:56 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 10 May 2021 08:56:28 +0000 (10:56 +0200)
added patches:
dm-integrity-fix-missing-goto-in-bitmap_flush_interval-error-handling.patch
dm-persistent-data-packed-struct-should-have-an-aligned-attribute-too.patch
dm-rq-fix-double-free-of-blk_mq_tag_set-in-dev-remove-after-table-load-fails.patch
dm-space-map-common-fix-division-bug-in-sm_ll_find_free_block.patch
pinctrl-ingenic-add-support-for-read-the-pin-configuration-of-x1830.patch
tools-power-turbostat-fix-offset-overflow-issue-in-index-converting.patch
tracing-map-all-pids-to-command-lines.patch
tracing-restructure-trace_clock_global-to-never-block.patch

queue-5.11/dm-integrity-fix-missing-goto-in-bitmap_flush_interval-error-handling.patch [new file with mode: 0644]
queue-5.11/dm-persistent-data-packed-struct-should-have-an-aligned-attribute-too.patch [new file with mode: 0644]
queue-5.11/dm-rq-fix-double-free-of-blk_mq_tag_set-in-dev-remove-after-table-load-fails.patch [new file with mode: 0644]
queue-5.11/dm-space-map-common-fix-division-bug-in-sm_ll_find_free_block.patch [new file with mode: 0644]
queue-5.11/pinctrl-ingenic-add-support-for-read-the-pin-configuration-of-x1830.patch [new file with mode: 0644]
queue-5.11/series
queue-5.11/tools-power-turbostat-fix-offset-overflow-issue-in-index-converting.patch [new file with mode: 0644]
queue-5.11/tracing-map-all-pids-to-command-lines.patch [new file with mode: 0644]
queue-5.11/tracing-restructure-trace_clock_global-to-never-block.patch [new file with mode: 0644]

diff --git a/queue-5.11/dm-integrity-fix-missing-goto-in-bitmap_flush_interval-error-handling.patch b/queue-5.11/dm-integrity-fix-missing-goto-in-bitmap_flush_interval-error-handling.patch
new file mode 100644 (file)
index 0000000..97ce03f
--- /dev/null
@@ -0,0 +1,28 @@
+From 17e9e134a8efabbbf689a0904eee92bb5a868172 Mon Sep 17 00:00:00 2001
+From: Tian Tao <tiantao6@hisilicon.com>
+Date: Wed, 14 Apr 2021 09:43:44 +0800
+Subject: dm integrity: fix missing goto in bitmap_flush_interval error handling
+
+From: Tian Tao <tiantao6@hisilicon.com>
+
+commit 17e9e134a8efabbbf689a0904eee92bb5a868172 upstream.
+
+Fixes: 468dfca38b1a ("dm integrity: add a bitmap mode")
+Cc: stable@vger.kernel.org
+Signed-off-by: Tian Tao <tiantao6@hisilicon.com>
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-integrity.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/md/dm-integrity.c
++++ b/drivers/md/dm-integrity.c
+@@ -3929,6 +3929,7 @@ static int dm_integrity_ctr(struct dm_ta
+                       if (val >= (uint64_t)UINT_MAX * 1000 / HZ) {
+                               r = -EINVAL;
+                               ti->error = "Invalid bitmap_flush_interval argument";
++                              goto bad;
+                       }
+                       ic->bitmap_flush_interval = msecs_to_jiffies(val);
+               } else if (!strncmp(opt_string, "internal_hash:", strlen("internal_hash:"))) {
diff --git a/queue-5.11/dm-persistent-data-packed-struct-should-have-an-aligned-attribute-too.patch b/queue-5.11/dm-persistent-data-packed-struct-should-have-an-aligned-attribute-too.patch
new file mode 100644 (file)
index 0000000..d76f912
--- /dev/null
@@ -0,0 +1,76 @@
+From a88b2358f1da2c9f9fcc432f2e0a79617fea397c Mon Sep 17 00:00:00 2001
+From: Joe Thornber <ejt@redhat.com>
+Date: Mon, 29 Mar 2021 16:34:57 +0100
+Subject: dm persistent data: packed struct should have an aligned() attribute too
+
+From: Joe Thornber <ejt@redhat.com>
+
+commit a88b2358f1da2c9f9fcc432f2e0a79617fea397c upstream.
+
+Otherwise most non-x86 architectures (e.g. riscv, arm) will resort to
+byte-by-byte access.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Joe Thornber <ejt@redhat.com>
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/persistent-data/dm-btree-internal.h   |    4 ++--
+ drivers/md/persistent-data/dm-space-map-common.h |    8 ++++----
+ 2 files changed, 6 insertions(+), 6 deletions(-)
+
+--- a/drivers/md/persistent-data/dm-btree-internal.h
++++ b/drivers/md/persistent-data/dm-btree-internal.h
+@@ -34,12 +34,12 @@ struct node_header {
+       __le32 max_entries;
+       __le32 value_size;
+       __le32 padding;
+-} __packed;
++} __attribute__((packed, aligned(8)));
+ struct btree_node {
+       struct node_header header;
+       __le64 keys[];
+-} __packed;
++} __attribute__((packed, aligned(8)));
+ /*
+--- a/drivers/md/persistent-data/dm-space-map-common.h
++++ b/drivers/md/persistent-data/dm-space-map-common.h
+@@ -33,7 +33,7 @@ struct disk_index_entry {
+       __le64 blocknr;
+       __le32 nr_free;
+       __le32 none_free_before;
+-} __packed;
++} __attribute__ ((packed, aligned(8)));
+ #define MAX_METADATA_BITMAPS 255
+@@ -43,7 +43,7 @@ struct disk_metadata_index {
+       __le64 blocknr;
+       struct disk_index_entry index[MAX_METADATA_BITMAPS];
+-} __packed;
++} __attribute__ ((packed, aligned(8)));
+ struct ll_disk;
+@@ -86,7 +86,7 @@ struct disk_sm_root {
+       __le64 nr_allocated;
+       __le64 bitmap_root;
+       __le64 ref_count_root;
+-} __packed;
++} __attribute__ ((packed, aligned(8)));
+ #define ENTRIES_PER_BYTE 4
+@@ -94,7 +94,7 @@ struct disk_bitmap_header {
+       __le32 csum;
+       __le32 not_used;
+       __le64 blocknr;
+-} __packed;
++} __attribute__ ((packed, aligned(8)));
+ enum allocation_event {
+       SM_NONE,
diff --git a/queue-5.11/dm-rq-fix-double-free-of-blk_mq_tag_set-in-dev-remove-after-table-load-fails.patch b/queue-5.11/dm-rq-fix-double-free-of-blk_mq_tag_set-in-dev-remove-after-table-load-fails.patch
new file mode 100644 (file)
index 0000000..a8fdc92
--- /dev/null
@@ -0,0 +1,91 @@
+From 8e947c8f4a5620df77e43c9c75310dc510250166 Mon Sep 17 00:00:00 2001
+From: Benjamin Block <bblock@linux.ibm.com>
+Date: Thu, 29 Apr 2021 23:37:00 +0200
+Subject: dm rq: fix double free of blk_mq_tag_set in dev remove after table load fails
+
+From: Benjamin Block <bblock@linux.ibm.com>
+
+commit 8e947c8f4a5620df77e43c9c75310dc510250166 upstream.
+
+When loading a device-mapper table for a request-based mapped device,
+and the allocation/initialization of the blk_mq_tag_set for the device
+fails, a following device remove will cause a double free.
+
+E.g. (dmesg):
+  device-mapper: core: Cannot initialize queue for request-based dm-mq mapped device
+  device-mapper: ioctl: unable to set up device queue for new table.
+  Unable to handle kernel pointer dereference in virtual kernel address space
+  Failing address: 0305e098835de000 TEID: 0305e098835de803
+  Fault in home space mode while using kernel ASCE.
+  AS:000000025efe0007 R3:0000000000000024
+  Oops: 0038 ilc:3 [#1] SMP
+  Modules linked in: ... lots of modules ...
+  Supported: Yes, External
+  CPU: 0 PID: 7348 Comm: multipathd Kdump: loaded Tainted: G        W      X    5.3.18-53-default #1 SLE15-SP3
+  Hardware name: IBM 8561 T01 7I2 (LPAR)
+  Krnl PSW : 0704e00180000000 000000025e368eca (kfree+0x42/0x330)
+             R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:2 PM:0 RI:0 EA:3
+  Krnl GPRS: 000000000000004a 000000025efe5230 c1773200d779968d 0000000000000000
+             000000025e520270 000000025e8d1b40 0000000000000003 00000007aae10000
+             000000025e5202a2 0000000000000001 c1773200d779968d 0305e098835de640
+             00000007a8170000 000003ff80138650 000000025e5202a2 000003e00396faa8
+  Krnl Code: 000000025e368eb8: c4180041e100       lgrl    %r1,25eba50b8
+             000000025e368ebe: ecba06b93a55       risbg   %r11,%r10,6,185,58
+            #000000025e368ec4: e3b010000008       ag      %r11,0(%r1)
+            >000000025e368eca: e310b0080004       lg      %r1,8(%r11)
+             000000025e368ed0: a7110001           tmll    %r1,1
+             000000025e368ed4: a7740129           brc     7,25e369126
+             000000025e368ed8: e320b0080004       lg      %r2,8(%r11)
+             000000025e368ede: b904001b           lgr     %r1,%r11
+  Call Trace:
+   [<000000025e368eca>] kfree+0x42/0x330
+   [<000000025e5202a2>] blk_mq_free_tag_set+0x72/0xb8
+   [<000003ff801316a8>] dm_mq_cleanup_mapped_device+0x38/0x50 [dm_mod]
+   [<000003ff80120082>] free_dev+0x52/0xd0 [dm_mod]
+   [<000003ff801233f0>] __dm_destroy+0x150/0x1d0 [dm_mod]
+   [<000003ff8012bb9a>] dev_remove+0x162/0x1c0 [dm_mod]
+   [<000003ff8012a988>] ctl_ioctl+0x198/0x478 [dm_mod]
+   [<000003ff8012ac8a>] dm_ctl_ioctl+0x22/0x38 [dm_mod]
+   [<000000025e3b11ee>] ksys_ioctl+0xbe/0xe0
+   [<000000025e3b127a>] __s390x_sys_ioctl+0x2a/0x40
+   [<000000025e8c15ac>] system_call+0xd8/0x2c8
+  Last Breaking-Event-Address:
+   [<000000025e52029c>] blk_mq_free_tag_set+0x6c/0xb8
+  Kernel panic - not syncing: Fatal exception: panic_on_oops
+
+When allocation/initialization of the blk_mq_tag_set fails in
+dm_mq_init_request_queue(), it is uninitialized/freed, but the pointer
+is not reset to NULL; so when dev_remove() later gets into
+dm_mq_cleanup_mapped_device() it sees the pointer and tries to
+uninitialize and free it again.
+
+Fix this by setting the pointer to NULL in dm_mq_init_request_queue()
+error-handling. Also set it to NULL in dm_mq_cleanup_mapped_device().
+
+Cc: <stable@vger.kernel.org> # 4.6+
+Fixes: 1c357a1e86a4 ("dm: allocate blk_mq_tag_set rather than embed in mapped_device")
+Signed-off-by: Benjamin Block <bblock@linux.ibm.com>
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-rq.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/md/dm-rq.c
++++ b/drivers/md/dm-rq.c
+@@ -569,6 +569,7 @@ out_tag_set:
+       blk_mq_free_tag_set(md->tag_set);
+ out_kfree_tag_set:
+       kfree(md->tag_set);
++      md->tag_set = NULL;
+       return err;
+ }
+@@ -578,6 +579,7 @@ void dm_mq_cleanup_mapped_device(struct
+       if (md->tag_set) {
+               blk_mq_free_tag_set(md->tag_set);
+               kfree(md->tag_set);
++              md->tag_set = NULL;
+       }
+ }
diff --git a/queue-5.11/dm-space-map-common-fix-division-bug-in-sm_ll_find_free_block.patch b/queue-5.11/dm-space-map-common-fix-division-bug-in-sm_ll_find_free_block.patch
new file mode 100644 (file)
index 0000000..9d9d9c1
--- /dev/null
@@ -0,0 +1,33 @@
+From 5208692e80a1f3c8ce2063a22b675dd5589d1d80 Mon Sep 17 00:00:00 2001
+From: Joe Thornber <ejt@redhat.com>
+Date: Tue, 13 Apr 2021 09:11:53 +0100
+Subject: dm space map common: fix division bug in sm_ll_find_free_block()
+
+From: Joe Thornber <ejt@redhat.com>
+
+commit 5208692e80a1f3c8ce2063a22b675dd5589d1d80 upstream.
+
+This division bug meant the search for free metadata space could skip
+the final allocation bitmap's worth of entries. Fix affects DM thinp,
+cache and era targets.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Joe Thornber <ejt@redhat.com>
+Tested-by: Ming-Hung Tsai <mtsai@redhat.com>
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/persistent-data/dm-space-map-common.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/md/persistent-data/dm-space-map-common.c
++++ b/drivers/md/persistent-data/dm-space-map-common.c
+@@ -339,6 +339,8 @@ int sm_ll_find_free_block(struct ll_disk
+        */
+       begin = do_div(index_begin, ll->entries_per_block);
+       end = do_div(end, ll->entries_per_block);
++      if (end == 0)
++              end = ll->entries_per_block;
+       for (i = index_begin; i < index_end; i++, begin = 0) {
+               struct dm_block *blk;
diff --git a/queue-5.11/pinctrl-ingenic-add-support-for-read-the-pin-configuration-of-x1830.patch b/queue-5.11/pinctrl-ingenic-add-support-for-read-the-pin-configuration-of-x1830.patch
new file mode 100644 (file)
index 0000000..77e2615
--- /dev/null
@@ -0,0 +1,96 @@
+From 1d0bd580ef83b78a10c0b37f3313eaa59d8c80db Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?=E5=91=A8=E7=90=B0=E6=9D=B0=20=28Zhou=20Yanjie=29?=
+ <zhouyanjie@wanyeetech.com>
+Date: Sun, 18 Apr 2021 22:44:23 +0800
+Subject: pinctrl: Ingenic: Add support for read the pin configuration of X1830.
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: 周琰杰 (Zhou Yanjie) <zhouyanjie@wanyeetech.com>
+
+commit 1d0bd580ef83b78a10c0b37f3313eaa59d8c80db upstream.
+
+Add X1830 support in "ingenic_pinconf_get()", so that it can read the
+configuration of X1830 SoC correctly.
+
+Fixes: d7da2a1e4e08 ("pinctrl: Ingenic: Add pinctrl driver for X1830.")
+Cc: <stable@vger.kernel.org>
+Signed-off-by: 周琰杰 (Zhou Yanjie) <zhouyanjie@wanyeetech.com>
+Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
+Reviewed-by: Paul Cercueil <paul@crapouillou.net>
+Link: https://lore.kernel.org/r/1618757073-1724-3-git-send-email-zhouyanjie@wanyeetech.com
+Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/pinctrl/pinctrl-ingenic.c |   40 +++++++++++++++++++++++++++++---------
+ 1 file changed, 31 insertions(+), 9 deletions(-)
+
+--- a/drivers/pinctrl/pinctrl-ingenic.c
++++ b/drivers/pinctrl/pinctrl-ingenic.c
+@@ -2089,26 +2089,48 @@ static int ingenic_pinconf_get(struct pi
+       enum pin_config_param param = pinconf_to_config_param(*config);
+       unsigned int idx = pin % PINS_PER_GPIO_CHIP;
+       unsigned int offt = pin / PINS_PER_GPIO_CHIP;
+-      bool pull;
++      unsigned int bias;
++      bool pull, pullup, pulldown;
+-      if (jzpc->info->version >= ID_JZ4770)
+-              pull = !ingenic_get_pin_config(jzpc, pin, JZ4770_GPIO_PEN);
+-      else
+-              pull = !ingenic_get_pin_config(jzpc, pin, JZ4740_GPIO_PULL_DIS);
++      if (jzpc->info->version >= ID_X1830) {
++              unsigned int half = PINS_PER_GPIO_CHIP / 2;
++              unsigned int idxh = (pin % half) * 2;
++
++              if (idx < half)
++                      regmap_read(jzpc->map, offt * jzpc->info->reg_offset +
++                                      X1830_GPIO_PEL, &bias);
++              else
++                      regmap_read(jzpc->map, offt * jzpc->info->reg_offset +
++                                      X1830_GPIO_PEH, &bias);
++
++              bias = (bias >> idxh) & (GPIO_PULL_UP | GPIO_PULL_DOWN);
++
++              pullup = (bias == GPIO_PULL_UP) && (jzpc->info->pull_ups[offt] & BIT(idx));
++              pulldown = (bias == GPIO_PULL_DOWN) && (jzpc->info->pull_downs[offt] & BIT(idx));
++
++      } else {
++              if (jzpc->info->version >= ID_JZ4770)
++                      pull = !ingenic_get_pin_config(jzpc, pin, JZ4770_GPIO_PEN);
++              else
++                      pull = !ingenic_get_pin_config(jzpc, pin, JZ4740_GPIO_PULL_DIS);
++
++              pullup = pull && (jzpc->info->pull_ups[offt] & BIT(idx));
++              pulldown = pull && (jzpc->info->pull_downs[offt] & BIT(idx));
++      }
+       switch (param) {
+       case PIN_CONFIG_BIAS_DISABLE:
+-              if (pull)
++              if (pullup || pulldown)
+                       return -EINVAL;
+               break;
+       case PIN_CONFIG_BIAS_PULL_UP:
+-              if (!pull || !(jzpc->info->pull_ups[offt] & BIT(idx)))
++              if (!pullup)
+                       return -EINVAL;
+               break;
+       case PIN_CONFIG_BIAS_PULL_DOWN:
+-              if (!pull || !(jzpc->info->pull_downs[offt] & BIT(idx)))
++              if (!pulldown)
+                       return -EINVAL;
+               break;
+@@ -2126,7 +2148,7 @@ static void ingenic_set_bias(struct inge
+       if (jzpc->info->version >= ID_X1830) {
+               unsigned int idx = pin % PINS_PER_GPIO_CHIP;
+               unsigned int half = PINS_PER_GPIO_CHIP / 2;
+-              unsigned int idxh = pin % half * 2;
++              unsigned int idxh = (pin % half) * 2;
+               unsigned int offt = pin / PINS_PER_GPIO_CHIP;
+               if (idx < half) {
index 2d27448297e4905ed5a97965e6e498856ec1989e..b946a2d797c417af53785234597ff0a527e9934f 100644 (file)
@@ -329,3 +329,11 @@ usb-dwc2-fix-session-request-interrupt-handler.patch
 pci-dwc-move-iatu-detection-earlier.patch
 tty-fix-memory-leak-in-vc_deallocate.patch
 rsi-use-resume_noirq-for-sdio.patch
+tools-power-turbostat-fix-offset-overflow-issue-in-index-converting.patch
+tracing-map-all-pids-to-command-lines.patch
+tracing-restructure-trace_clock_global-to-never-block.patch
+dm-persistent-data-packed-struct-should-have-an-aligned-attribute-too.patch
+dm-space-map-common-fix-division-bug-in-sm_ll_find_free_block.patch
+dm-integrity-fix-missing-goto-in-bitmap_flush_interval-error-handling.patch
+dm-rq-fix-double-free-of-blk_mq_tag_set-in-dev-remove-after-table-load-fails.patch
+pinctrl-ingenic-add-support-for-read-the-pin-configuration-of-x1830.patch
diff --git a/queue-5.11/tools-power-turbostat-fix-offset-overflow-issue-in-index-converting.patch b/queue-5.11/tools-power-turbostat-fix-offset-overflow-issue-in-index-converting.patch
new file mode 100644 (file)
index 0000000..655f7a8
--- /dev/null
@@ -0,0 +1,66 @@
+From 13a779de4175df602366d129e41782ad7168cef0 Mon Sep 17 00:00:00 2001
+From: Calvin Walton <calvin.walton@kepstin.ca>
+Date: Wed, 28 Apr 2021 17:09:16 +0800
+Subject: tools/power turbostat: Fix offset overflow issue in index converting
+
+From: Calvin Walton <calvin.walton@kepstin.ca>
+
+commit 13a779de4175df602366d129e41782ad7168cef0 upstream.
+
+The idx_to_offset() function returns type int (32-bit signed), but
+MSR_PKG_ENERGY_STAT is u32 and would be interpreted as a negative number.
+The end result is that it hits the if (offset < 0) check in update_msr_sum()
+which prevents the timer callback from updating the stat in the background when
+long durations are used. The similar issue exists in offset_to_idx() and
+update_msr_sum(). Fix this issue by converting the 'int' to 'off_t' accordingly.
+
+Fixes: 9972d5d84d76 ("tools/power turbostat: Enable accumulate RAPL display")
+Signed-off-by: Calvin Walton <calvin.walton@kepstin.ca>
+Signed-off-by: Len Brown <len.brown@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/power/x86/turbostat/turbostat.c |   11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+--- a/tools/power/x86/turbostat/turbostat.c
++++ b/tools/power/x86/turbostat/turbostat.c
+@@ -291,9 +291,9 @@ struct msr_sum_array {
+ /* The percpu MSR sum array.*/
+ struct msr_sum_array *per_cpu_msr_sum;
+-int idx_to_offset(int idx)
++off_t idx_to_offset(int idx)
+ {
+-      int offset;
++      off_t offset;
+       switch (idx) {
+       case IDX_PKG_ENERGY:
+@@ -323,7 +323,7 @@ int idx_to_offset(int idx)
+       return offset;
+ }
+-int offset_to_idx(int offset)
++int offset_to_idx(off_t offset)
+ {
+       int idx;
+@@ -3276,7 +3276,7 @@ static int update_msr_sum(struct thread_
+       for (i = IDX_PKG_ENERGY; i < IDX_COUNT; i++) {
+               unsigned long long msr_cur, msr_last;
+-              int offset;
++              off_t offset;
+               if (!idx_valid(i))
+                       continue;
+@@ -3285,7 +3285,8 @@ static int update_msr_sum(struct thread_
+                       continue;
+               ret = get_msr(cpu, offset, &msr_cur);
+               if (ret) {
+-                      fprintf(outf, "Can not update msr(0x%x)\n", offset);
++                      fprintf(outf, "Can not update msr(0x%llx)\n",
++                              (unsigned long long)offset);
+                       continue;
+               }
diff --git a/queue-5.11/tracing-map-all-pids-to-command-lines.patch b/queue-5.11/tracing-map-all-pids-to-command-lines.patch
new file mode 100644 (file)
index 0000000..b59529d
--- /dev/null
@@ -0,0 +1,123 @@
+From 785e3c0a3a870e72dc530856136ab4c8dd207128 Mon Sep 17 00:00:00 2001
+From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
+Date: Tue, 27 Apr 2021 11:32:07 -0400
+Subject: tracing: Map all PIDs to command lines
+
+From: Steven Rostedt (VMware) <rostedt@goodmis.org>
+
+commit 785e3c0a3a870e72dc530856136ab4c8dd207128 upstream.
+
+The default max PID is set by PID_MAX_DEFAULT, and the tracing
+infrastructure uses this number to map PIDs to the comm names of the
+tasks, such output of the trace can show names from the recorded PIDs in
+the ring buffer. This mapping is also exported to user space via the
+"saved_cmdlines" file in the tracefs directory.
+
+But currently the mapping expects the PIDs to be less than
+PID_MAX_DEFAULT, which is the default maximum and not the real maximum.
+Recently, systemd will increases the maximum value of a PID on the system,
+and when tasks are traced that have a PID higher than PID_MAX_DEFAULT, its
+comm is not recorded. This leads to the entire trace to have "<...>" as
+the comm name, which is pretty useless.
+
+Instead, keep the array mapping the size of PID_MAX_DEFAULT, but instead
+of just mapping the index to the comm, map a mask of the PID
+(PID_MAX_DEFAULT - 1) to the comm, and find the full PID from the
+map_cmdline_to_pid array (that already exists).
+
+This bug goes back to the beginning of ftrace, but hasn't been an issue
+until user space started increasing the maximum value of PIDs.
+
+Link: https://lkml.kernel.org/r/20210427113207.3c601884@gandalf.local.home
+
+Cc: stable@vger.kernel.org
+Fixes: bc0c38d139ec7 ("ftrace: latency tracer infrastructure")
+Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/trace/trace.c |   41 +++++++++++++++--------------------------
+ 1 file changed, 15 insertions(+), 26 deletions(-)
+
+--- a/kernel/trace/trace.c
++++ b/kernel/trace/trace.c
+@@ -2387,14 +2387,13 @@ static void tracing_stop_tr(struct trace
+ static int trace_save_cmdline(struct task_struct *tsk)
+ {
+-      unsigned pid, idx;
++      unsigned tpid, idx;
+       /* treat recording of idle task as a success */
+       if (!tsk->pid)
+               return 1;
+-      if (unlikely(tsk->pid > PID_MAX_DEFAULT))
+-              return 0;
++      tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
+       /*
+        * It's not the end of the world if we don't get
+@@ -2405,26 +2404,15 @@ static int trace_save_cmdline(struct tas
+       if (!arch_spin_trylock(&trace_cmdline_lock))
+               return 0;
+-      idx = savedcmd->map_pid_to_cmdline[tsk->pid];
++      idx = savedcmd->map_pid_to_cmdline[tpid];
+       if (idx == NO_CMDLINE_MAP) {
+               idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
+-              /*
+-               * Check whether the cmdline buffer at idx has a pid
+-               * mapped. We are going to overwrite that entry so we
+-               * need to clear the map_pid_to_cmdline. Otherwise we
+-               * would read the new comm for the old pid.
+-               */
+-              pid = savedcmd->map_cmdline_to_pid[idx];
+-              if (pid != NO_CMDLINE_MAP)
+-                      savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
+-
+-              savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
+-              savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
+-
++              savedcmd->map_pid_to_cmdline[tpid] = idx;
+               savedcmd->cmdline_idx = idx;
+       }
++      savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
+       set_cmdline(idx, tsk->comm);
+       arch_spin_unlock(&trace_cmdline_lock);
+@@ -2435,6 +2423,7 @@ static int trace_save_cmdline(struct tas
+ static void __trace_find_cmdline(int pid, char comm[])
+ {
+       unsigned map;
++      int tpid;
+       if (!pid) {
+               strcpy(comm, "<idle>");
+@@ -2446,16 +2435,16 @@ static void __trace_find_cmdline(int pid
+               return;
+       }
+-      if (pid > PID_MAX_DEFAULT) {
+-              strcpy(comm, "<...>");
+-              return;
++      tpid = pid & (PID_MAX_DEFAULT - 1);
++      map = savedcmd->map_pid_to_cmdline[tpid];
++      if (map != NO_CMDLINE_MAP) {
++              tpid = savedcmd->map_cmdline_to_pid[map];
++              if (tpid == pid) {
++                      strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
++                      return;
++              }
+       }
+-
+-      map = savedcmd->map_pid_to_cmdline[pid];
+-      if (map != NO_CMDLINE_MAP)
+-              strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
+-      else
+-              strcpy(comm, "<...>");
++      strcpy(comm, "<...>");
+ }
+ void trace_find_cmdline(int pid, char comm[])
diff --git a/queue-5.11/tracing-restructure-trace_clock_global-to-never-block.patch b/queue-5.11/tracing-restructure-trace_clock_global-to-never-block.patch
new file mode 100644 (file)
index 0000000..a0d0d40
--- /dev/null
@@ -0,0 +1,150 @@
+From aafe104aa9096827a429bc1358f8260ee565b7cc Mon Sep 17 00:00:00 2001
+From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
+Date: Fri, 30 Apr 2021 12:17:58 -0400
+Subject: tracing: Restructure trace_clock_global() to never block
+
+From: Steven Rostedt (VMware) <rostedt@goodmis.org>
+
+commit aafe104aa9096827a429bc1358f8260ee565b7cc upstream.
+
+It was reported that a fix to the ring buffer recursion detection would
+cause a hung machine when performing suspend / resume testing. The
+following backtrace was extracted from debugging that case:
+
+Call Trace:
+ trace_clock_global+0x91/0xa0
+ __rb_reserve_next+0x237/0x460
+ ring_buffer_lock_reserve+0x12a/0x3f0
+ trace_buffer_lock_reserve+0x10/0x50
+ __trace_graph_return+0x1f/0x80
+ trace_graph_return+0xb7/0xf0
+ ? trace_clock_global+0x91/0xa0
+ ftrace_return_to_handler+0x8b/0xf0
+ ? pv_hash+0xa0/0xa0
+ return_to_handler+0x15/0x30
+ ? ftrace_graph_caller+0xa0/0xa0
+ ? trace_clock_global+0x91/0xa0
+ ? __rb_reserve_next+0x237/0x460
+ ? ring_buffer_lock_reserve+0x12a/0x3f0
+ ? trace_event_buffer_lock_reserve+0x3c/0x120
+ ? trace_event_buffer_reserve+0x6b/0xc0
+ ? trace_event_raw_event_device_pm_callback_start+0x125/0x2d0
+ ? dpm_run_callback+0x3b/0xc0
+ ? pm_ops_is_empty+0x50/0x50
+ ? platform_get_irq_byname_optional+0x90/0x90
+ ? trace_device_pm_callback_start+0x82/0xd0
+ ? dpm_run_callback+0x49/0xc0
+
+With the following RIP:
+
+RIP: 0010:native_queued_spin_lock_slowpath+0x69/0x200
+
+Since the fix to the recursion detection would allow a single recursion to
+happen while tracing, this lead to the trace_clock_global() taking a spin
+lock and then trying to take it again:
+
+ring_buffer_lock_reserve() {
+  trace_clock_global() {
+    arch_spin_lock() {
+      queued_spin_lock_slowpath() {
+        /* lock taken */
+        (something else gets traced by function graph tracer)
+          ring_buffer_lock_reserve() {
+            trace_clock_global() {
+              arch_spin_lock() {
+                queued_spin_lock_slowpath() {
+                /* DEAD LOCK! */
+
+Tracing should *never* block, as it can lead to strange lockups like the
+above.
+
+Restructure the trace_clock_global() code to instead of simply taking a
+lock to update the recorded "prev_time" simply use it, as two events
+happening on two different CPUs that calls this at the same time, really
+doesn't matter which one goes first. Use a trylock to grab the lock for
+updating the prev_time, and if it fails, simply try again the next time.
+If it failed to be taken, that means something else is already updating
+it.
+
+Link: https://lkml.kernel.org/r/20210430121758.650b6e8a@gandalf.local.home
+
+Cc: stable@vger.kernel.org
+Tested-by: Konstantin Kharlamov <hi-angel@yandex.ru>
+Tested-by: Todd Brandt <todd.e.brandt@linux.intel.com>
+Fixes: b02414c8f045 ("ring-buffer: Fix recursion protection transitions between interrupt context") # started showing the problem
+Fixes: 14131f2f98ac3 ("tracing: implement trace_clock_*() APIs") # where the bug happened
+Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=212761
+Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/trace/trace_clock.c |   48 ++++++++++++++++++++++++++++++---------------
+ 1 file changed, 32 insertions(+), 16 deletions(-)
+
+--- a/kernel/trace/trace_clock.c
++++ b/kernel/trace/trace_clock.c
+@@ -95,33 +95,49 @@ u64 notrace trace_clock_global(void)
+ {
+       unsigned long flags;
+       int this_cpu;
+-      u64 now;
++      u64 now, prev_time;
+       raw_local_irq_save(flags);
+       this_cpu = raw_smp_processor_id();
+-      now = sched_clock_cpu(this_cpu);
++
+       /*
+-       * If in an NMI context then dont risk lockups and return the
+-       * cpu_clock() time:
++       * The global clock "guarantees" that the events are ordered
++       * between CPUs. But if two events on two different CPUS call
++       * trace_clock_global at roughly the same time, it really does
++       * not matter which one gets the earlier time. Just make sure
++       * that the same CPU will always show a monotonic clock.
++       *
++       * Use a read memory barrier to get the latest written
++       * time that was recorded.
+        */
+-      if (unlikely(in_nmi()))
+-              goto out;
++      smp_rmb();
++      prev_time = READ_ONCE(trace_clock_struct.prev_time);
++      now = sched_clock_cpu(this_cpu);
+-      arch_spin_lock(&trace_clock_struct.lock);
++      /* Make sure that now is always greater than prev_time */
++      if ((s64)(now - prev_time) < 0)
++              now = prev_time + 1;
+       /*
+-       * TODO: if this happens often then maybe we should reset
+-       * my_scd->clock to prev_time+1, to make sure
+-       * we start ticking with the local clock from now on?
++       * If in an NMI context then dont risk lockups and simply return
++       * the current time.
+        */
+-      if ((s64)(now - trace_clock_struct.prev_time) < 0)
+-              now = trace_clock_struct.prev_time + 1;
+-
+-      trace_clock_struct.prev_time = now;
+-
+-      arch_spin_unlock(&trace_clock_struct.lock);
++      if (unlikely(in_nmi()))
++              goto out;
++      /* Tracing can cause strange recursion, always use a try lock */
++      if (arch_spin_trylock(&trace_clock_struct.lock)) {
++              /* Reread prev_time in case it was already updated */
++              prev_time = READ_ONCE(trace_clock_struct.prev_time);
++              if ((s64)(now - prev_time) < 0)
++                      now = prev_time + 1;
++
++              trace_clock_struct.prev_time = now;
++
++              /* The unlock acts as the wmb for the above rmb */
++              arch_spin_unlock(&trace_clock_struct.lock);
++      }
+  out:
+       raw_local_irq_restore(flags);