--- /dev/null
+From 17e9e134a8efabbbf689a0904eee92bb5a868172 Mon Sep 17 00:00:00 2001
+From: Tian Tao <tiantao6@hisilicon.com>
+Date: Wed, 14 Apr 2021 09:43:44 +0800
+Subject: dm integrity: fix missing goto in bitmap_flush_interval error handling
+
+From: Tian Tao <tiantao6@hisilicon.com>
+
+commit 17e9e134a8efabbbf689a0904eee92bb5a868172 upstream.
+
+Fixes: 468dfca38b1a ("dm integrity: add a bitmap mode")
+Cc: stable@vger.kernel.org
+Signed-off-by: Tian Tao <tiantao6@hisilicon.com>
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-integrity.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/md/dm-integrity.c
++++ b/drivers/md/dm-integrity.c
+@@ -3929,6 +3929,7 @@ static int dm_integrity_ctr(struct dm_ta
+ if (val >= (uint64_t)UINT_MAX * 1000 / HZ) {
+ r = -EINVAL;
+ ti->error = "Invalid bitmap_flush_interval argument";
++ goto bad;
+ }
+ ic->bitmap_flush_interval = msecs_to_jiffies(val);
+ } else if (!strncmp(opt_string, "internal_hash:", strlen("internal_hash:"))) {
--- /dev/null
+From a88b2358f1da2c9f9fcc432f2e0a79617fea397c Mon Sep 17 00:00:00 2001
+From: Joe Thornber <ejt@redhat.com>
+Date: Mon, 29 Mar 2021 16:34:57 +0100
+Subject: dm persistent data: packed struct should have an aligned() attribute too
+
+From: Joe Thornber <ejt@redhat.com>
+
+commit a88b2358f1da2c9f9fcc432f2e0a79617fea397c upstream.
+
+Otherwise most non-x86 architectures (e.g. riscv, arm) will resort to
+byte-by-byte access.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Joe Thornber <ejt@redhat.com>
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/persistent-data/dm-btree-internal.h | 4 ++--
+ drivers/md/persistent-data/dm-space-map-common.h | 8 ++++----
+ 2 files changed, 6 insertions(+), 6 deletions(-)
+
+--- a/drivers/md/persistent-data/dm-btree-internal.h
++++ b/drivers/md/persistent-data/dm-btree-internal.h
+@@ -34,12 +34,12 @@ struct node_header {
+ __le32 max_entries;
+ __le32 value_size;
+ __le32 padding;
+-} __packed;
++} __attribute__((packed, aligned(8)));
+
+ struct btree_node {
+ struct node_header header;
+ __le64 keys[];
+-} __packed;
++} __attribute__((packed, aligned(8)));
+
+
+ /*
+--- a/drivers/md/persistent-data/dm-space-map-common.h
++++ b/drivers/md/persistent-data/dm-space-map-common.h
+@@ -33,7 +33,7 @@ struct disk_index_entry {
+ __le64 blocknr;
+ __le32 nr_free;
+ __le32 none_free_before;
+-} __packed;
++} __attribute__ ((packed, aligned(8)));
+
+
+ #define MAX_METADATA_BITMAPS 255
+@@ -43,7 +43,7 @@ struct disk_metadata_index {
+ __le64 blocknr;
+
+ struct disk_index_entry index[MAX_METADATA_BITMAPS];
+-} __packed;
++} __attribute__ ((packed, aligned(8)));
+
+ struct ll_disk;
+
+@@ -86,7 +86,7 @@ struct disk_sm_root {
+ __le64 nr_allocated;
+ __le64 bitmap_root;
+ __le64 ref_count_root;
+-} __packed;
++} __attribute__ ((packed, aligned(8)));
+
+ #define ENTRIES_PER_BYTE 4
+
+@@ -94,7 +94,7 @@ struct disk_bitmap_header {
+ __le32 csum;
+ __le32 not_used;
+ __le64 blocknr;
+-} __packed;
++} __attribute__ ((packed, aligned(8)));
+
+ enum allocation_event {
+ SM_NONE,
--- /dev/null
+From 8e947c8f4a5620df77e43c9c75310dc510250166 Mon Sep 17 00:00:00 2001
+From: Benjamin Block <bblock@linux.ibm.com>
+Date: Thu, 29 Apr 2021 23:37:00 +0200
+Subject: dm rq: fix double free of blk_mq_tag_set in dev remove after table load fails
+
+From: Benjamin Block <bblock@linux.ibm.com>
+
+commit 8e947c8f4a5620df77e43c9c75310dc510250166 upstream.
+
+When loading a device-mapper table for a request-based mapped device,
+and the allocation/initialization of the blk_mq_tag_set for the device
+fails, a following device remove will cause a double free.
+
+E.g. (dmesg):
+ device-mapper: core: Cannot initialize queue for request-based dm-mq mapped device
+ device-mapper: ioctl: unable to set up device queue for new table.
+ Unable to handle kernel pointer dereference in virtual kernel address space
+ Failing address: 0305e098835de000 TEID: 0305e098835de803
+ Fault in home space mode while using kernel ASCE.
+ AS:000000025efe0007 R3:0000000000000024
+ Oops: 0038 ilc:3 [#1] SMP
+ Modules linked in: ... lots of modules ...
+ Supported: Yes, External
+ CPU: 0 PID: 7348 Comm: multipathd Kdump: loaded Tainted: G W X 5.3.18-53-default #1 SLE15-SP3
+ Hardware name: IBM 8561 T01 7I2 (LPAR)
+ Krnl PSW : 0704e00180000000 000000025e368eca (kfree+0x42/0x330)
+ R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:2 PM:0 RI:0 EA:3
+ Krnl GPRS: 000000000000004a 000000025efe5230 c1773200d779968d 0000000000000000
+ 000000025e520270 000000025e8d1b40 0000000000000003 00000007aae10000
+ 000000025e5202a2 0000000000000001 c1773200d779968d 0305e098835de640
+ 00000007a8170000 000003ff80138650 000000025e5202a2 000003e00396faa8
+ Krnl Code: 000000025e368eb8: c4180041e100 lgrl %r1,25eba50b8
+ 000000025e368ebe: ecba06b93a55 risbg %r11,%r10,6,185,58
+ #000000025e368ec4: e3b010000008 ag %r11,0(%r1)
+ >000000025e368eca: e310b0080004 lg %r1,8(%r11)
+ 000000025e368ed0: a7110001 tmll %r1,1
+ 000000025e368ed4: a7740129 brc 7,25e369126
+ 000000025e368ed8: e320b0080004 lg %r2,8(%r11)
+ 000000025e368ede: b904001b lgr %r1,%r11
+ Call Trace:
+ [<000000025e368eca>] kfree+0x42/0x330
+ [<000000025e5202a2>] blk_mq_free_tag_set+0x72/0xb8
+ [<000003ff801316a8>] dm_mq_cleanup_mapped_device+0x38/0x50 [dm_mod]
+ [<000003ff80120082>] free_dev+0x52/0xd0 [dm_mod]
+ [<000003ff801233f0>] __dm_destroy+0x150/0x1d0 [dm_mod]
+ [<000003ff8012bb9a>] dev_remove+0x162/0x1c0 [dm_mod]
+ [<000003ff8012a988>] ctl_ioctl+0x198/0x478 [dm_mod]
+ [<000003ff8012ac8a>] dm_ctl_ioctl+0x22/0x38 [dm_mod]
+ [<000000025e3b11ee>] ksys_ioctl+0xbe/0xe0
+ [<000000025e3b127a>] __s390x_sys_ioctl+0x2a/0x40
+ [<000000025e8c15ac>] system_call+0xd8/0x2c8
+ Last Breaking-Event-Address:
+ [<000000025e52029c>] blk_mq_free_tag_set+0x6c/0xb8
+ Kernel panic - not syncing: Fatal exception: panic_on_oops
+
+When allocation/initialization of the blk_mq_tag_set fails in
+dm_mq_init_request_queue(), it is uninitialized/freed, but the pointer
+is not reset to NULL; so when dev_remove() later gets into
+dm_mq_cleanup_mapped_device() it sees the pointer and tries to
+uninitialize and free it again.
+
+Fix this by setting the pointer to NULL in dm_mq_init_request_queue()
+error-handling. Also set it to NULL in dm_mq_cleanup_mapped_device().
+
+Cc: <stable@vger.kernel.org> # 4.6+
+Fixes: 1c357a1e86a4 ("dm: allocate blk_mq_tag_set rather than embed in mapped_device")
+Signed-off-by: Benjamin Block <bblock@linux.ibm.com>
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-rq.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/md/dm-rq.c
++++ b/drivers/md/dm-rq.c
+@@ -569,6 +569,7 @@ out_tag_set:
+ blk_mq_free_tag_set(md->tag_set);
+ out_kfree_tag_set:
+ kfree(md->tag_set);
++ md->tag_set = NULL;
+
+ return err;
+ }
+@@ -578,6 +579,7 @@ void dm_mq_cleanup_mapped_device(struct
+ if (md->tag_set) {
+ blk_mq_free_tag_set(md->tag_set);
+ kfree(md->tag_set);
++ md->tag_set = NULL;
+ }
+ }
+
--- /dev/null
+From 5208692e80a1f3c8ce2063a22b675dd5589d1d80 Mon Sep 17 00:00:00 2001
+From: Joe Thornber <ejt@redhat.com>
+Date: Tue, 13 Apr 2021 09:11:53 +0100
+Subject: dm space map common: fix division bug in sm_ll_find_free_block()
+
+From: Joe Thornber <ejt@redhat.com>
+
+commit 5208692e80a1f3c8ce2063a22b675dd5589d1d80 upstream.
+
+This division bug meant the search for free metadata space could skip
+the final allocation bitmap's worth of entries. Fix affects DM thinp,
+cache and era targets.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Joe Thornber <ejt@redhat.com>
+Tested-by: Ming-Hung Tsai <mtsai@redhat.com>
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/persistent-data/dm-space-map-common.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/md/persistent-data/dm-space-map-common.c
++++ b/drivers/md/persistent-data/dm-space-map-common.c
+@@ -339,6 +339,8 @@ int sm_ll_find_free_block(struct ll_disk
+ */
+ begin = do_div(index_begin, ll->entries_per_block);
+ end = do_div(end, ll->entries_per_block);
++ if (end == 0)
++ end = ll->entries_per_block;
+
+ for (i = index_begin; i < index_end; i++, begin = 0) {
+ struct dm_block *blk;
--- /dev/null
+From 1d0bd580ef83b78a10c0b37f3313eaa59d8c80db Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?=E5=91=A8=E7=90=B0=E6=9D=B0=20=28Zhou=20Yanjie=29?=
+ <zhouyanjie@wanyeetech.com>
+Date: Sun, 18 Apr 2021 22:44:23 +0800
+Subject: pinctrl: Ingenic: Add support for read the pin configuration of X1830.
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: 周琰杰 (Zhou Yanjie) <zhouyanjie@wanyeetech.com>
+
+commit 1d0bd580ef83b78a10c0b37f3313eaa59d8c80db upstream.
+
+Add X1830 support in "ingenic_pinconf_get()", so that it can read the
+configuration of X1830 SoC correctly.
+
+Fixes: d7da2a1e4e08 ("pinctrl: Ingenic: Add pinctrl driver for X1830.")
+Cc: <stable@vger.kernel.org>
+Signed-off-by: 周琰杰 (Zhou Yanjie) <zhouyanjie@wanyeetech.com>
+Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
+Reviewed-by: Paul Cercueil <paul@crapouillou.net>
+Link: https://lore.kernel.org/r/1618757073-1724-3-git-send-email-zhouyanjie@wanyeetech.com
+Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/pinctrl/pinctrl-ingenic.c | 40 +++++++++++++++++++++++++++++---------
+ 1 file changed, 31 insertions(+), 9 deletions(-)
+
+--- a/drivers/pinctrl/pinctrl-ingenic.c
++++ b/drivers/pinctrl/pinctrl-ingenic.c
+@@ -2089,26 +2089,48 @@ static int ingenic_pinconf_get(struct pi
+ enum pin_config_param param = pinconf_to_config_param(*config);
+ unsigned int idx = pin % PINS_PER_GPIO_CHIP;
+ unsigned int offt = pin / PINS_PER_GPIO_CHIP;
+- bool pull;
++ unsigned int bias;
++ bool pull, pullup, pulldown;
+
+- if (jzpc->info->version >= ID_JZ4770)
+- pull = !ingenic_get_pin_config(jzpc, pin, JZ4770_GPIO_PEN);
+- else
+- pull = !ingenic_get_pin_config(jzpc, pin, JZ4740_GPIO_PULL_DIS);
++ if (jzpc->info->version >= ID_X1830) {
++ unsigned int half = PINS_PER_GPIO_CHIP / 2;
++ unsigned int idxh = (pin % half) * 2;
++
++ if (idx < half)
++ regmap_read(jzpc->map, offt * jzpc->info->reg_offset +
++ X1830_GPIO_PEL, &bias);
++ else
++ regmap_read(jzpc->map, offt * jzpc->info->reg_offset +
++ X1830_GPIO_PEH, &bias);
++
++ bias = (bias >> idxh) & (GPIO_PULL_UP | GPIO_PULL_DOWN);
++
++ pullup = (bias == GPIO_PULL_UP) && (jzpc->info->pull_ups[offt] & BIT(idx));
++ pulldown = (bias == GPIO_PULL_DOWN) && (jzpc->info->pull_downs[offt] & BIT(idx));
++
++ } else {
++ if (jzpc->info->version >= ID_JZ4770)
++ pull = !ingenic_get_pin_config(jzpc, pin, JZ4770_GPIO_PEN);
++ else
++ pull = !ingenic_get_pin_config(jzpc, pin, JZ4740_GPIO_PULL_DIS);
++
++ pullup = pull && (jzpc->info->pull_ups[offt] & BIT(idx));
++ pulldown = pull && (jzpc->info->pull_downs[offt] & BIT(idx));
++ }
+
+ switch (param) {
+ case PIN_CONFIG_BIAS_DISABLE:
+- if (pull)
++ if (pullup || pulldown)
+ return -EINVAL;
+ break;
+
+ case PIN_CONFIG_BIAS_PULL_UP:
+- if (!pull || !(jzpc->info->pull_ups[offt] & BIT(idx)))
++ if (!pullup)
+ return -EINVAL;
+ break;
+
+ case PIN_CONFIG_BIAS_PULL_DOWN:
+- if (!pull || !(jzpc->info->pull_downs[offt] & BIT(idx)))
++ if (!pulldown)
+ return -EINVAL;
+ break;
+
+@@ -2126,7 +2148,7 @@ static void ingenic_set_bias(struct inge
+ if (jzpc->info->version >= ID_X1830) {
+ unsigned int idx = pin % PINS_PER_GPIO_CHIP;
+ unsigned int half = PINS_PER_GPIO_CHIP / 2;
+- unsigned int idxh = pin % half * 2;
++ unsigned int idxh = (pin % half) * 2;
+ unsigned int offt = pin / PINS_PER_GPIO_CHIP;
+
+ if (idx < half) {
pci-dwc-move-iatu-detection-earlier.patch
tty-fix-memory-leak-in-vc_deallocate.patch
rsi-use-resume_noirq-for-sdio.patch
+tools-power-turbostat-fix-offset-overflow-issue-in-index-converting.patch
+tracing-map-all-pids-to-command-lines.patch
+tracing-restructure-trace_clock_global-to-never-block.patch
+dm-persistent-data-packed-struct-should-have-an-aligned-attribute-too.patch
+dm-space-map-common-fix-division-bug-in-sm_ll_find_free_block.patch
+dm-integrity-fix-missing-goto-in-bitmap_flush_interval-error-handling.patch
+dm-rq-fix-double-free-of-blk_mq_tag_set-in-dev-remove-after-table-load-fails.patch
+pinctrl-ingenic-add-support-for-read-the-pin-configuration-of-x1830.patch
--- /dev/null
+From 13a779de4175df602366d129e41782ad7168cef0 Mon Sep 17 00:00:00 2001
+From: Calvin Walton <calvin.walton@kepstin.ca>
+Date: Wed, 28 Apr 2021 17:09:16 +0800
+Subject: tools/power turbostat: Fix offset overflow issue in index converting
+
+From: Calvin Walton <calvin.walton@kepstin.ca>
+
+commit 13a779de4175df602366d129e41782ad7168cef0 upstream.
+
+The idx_to_offset() function returns type int (32-bit signed), but
+MSR_PKG_ENERGY_STAT is u32 and would be interpreted as a negative number.
+The end result is that it hits the if (offset < 0) check in update_msr_sum()
+which prevents the timer callback from updating the stat in the background when
+long durations are used. The similar issue exists in offset_to_idx() and
+update_msr_sum(). Fix this issue by converting the 'int' to 'off_t' accordingly.
+
+Fixes: 9972d5d84d76 ("tools/power turbostat: Enable accumulate RAPL display")
+Signed-off-by: Calvin Walton <calvin.walton@kepstin.ca>
+Signed-off-by: Len Brown <len.brown@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/power/x86/turbostat/turbostat.c | 11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+--- a/tools/power/x86/turbostat/turbostat.c
++++ b/tools/power/x86/turbostat/turbostat.c
+@@ -291,9 +291,9 @@ struct msr_sum_array {
+ /* The percpu MSR sum array.*/
+ struct msr_sum_array *per_cpu_msr_sum;
+
+-int idx_to_offset(int idx)
++off_t idx_to_offset(int idx)
+ {
+- int offset;
++ off_t offset;
+
+ switch (idx) {
+ case IDX_PKG_ENERGY:
+@@ -323,7 +323,7 @@ int idx_to_offset(int idx)
+ return offset;
+ }
+
+-int offset_to_idx(int offset)
++int offset_to_idx(off_t offset)
+ {
+ int idx;
+
+@@ -3276,7 +3276,7 @@ static int update_msr_sum(struct thread_
+
+ for (i = IDX_PKG_ENERGY; i < IDX_COUNT; i++) {
+ unsigned long long msr_cur, msr_last;
+- int offset;
++ off_t offset;
+
+ if (!idx_valid(i))
+ continue;
+@@ -3285,7 +3285,8 @@ static int update_msr_sum(struct thread_
+ continue;
+ ret = get_msr(cpu, offset, &msr_cur);
+ if (ret) {
+- fprintf(outf, "Can not update msr(0x%x)\n", offset);
++ fprintf(outf, "Can not update msr(0x%llx)\n",
++ (unsigned long long)offset);
+ continue;
+ }
+
--- /dev/null
+From 785e3c0a3a870e72dc530856136ab4c8dd207128 Mon Sep 17 00:00:00 2001
+From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
+Date: Tue, 27 Apr 2021 11:32:07 -0400
+Subject: tracing: Map all PIDs to command lines
+
+From: Steven Rostedt (VMware) <rostedt@goodmis.org>
+
+commit 785e3c0a3a870e72dc530856136ab4c8dd207128 upstream.
+
+The default max PID is set by PID_MAX_DEFAULT, and the tracing
+infrastructure uses this number to map PIDs to the comm names of the
+tasks, such output of the trace can show names from the recorded PIDs in
+the ring buffer. This mapping is also exported to user space via the
+"saved_cmdlines" file in the tracefs directory.
+
+But currently the mapping expects the PIDs to be less than
+PID_MAX_DEFAULT, which is the default maximum and not the real maximum.
+Recently, systemd will increases the maximum value of a PID on the system,
+and when tasks are traced that have a PID higher than PID_MAX_DEFAULT, its
+comm is not recorded. This leads to the entire trace to have "<...>" as
+the comm name, which is pretty useless.
+
+Instead, keep the array mapping the size of PID_MAX_DEFAULT, but instead
+of just mapping the index to the comm, map a mask of the PID
+(PID_MAX_DEFAULT - 1) to the comm, and find the full PID from the
+map_cmdline_to_pid array (that already exists).
+
+This bug goes back to the beginning of ftrace, but hasn't been an issue
+until user space started increasing the maximum value of PIDs.
+
+Link: https://lkml.kernel.org/r/20210427113207.3c601884@gandalf.local.home
+
+Cc: stable@vger.kernel.org
+Fixes: bc0c38d139ec7 ("ftrace: latency tracer infrastructure")
+Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/trace/trace.c | 41 +++++++++++++++--------------------------
+ 1 file changed, 15 insertions(+), 26 deletions(-)
+
+--- a/kernel/trace/trace.c
++++ b/kernel/trace/trace.c
+@@ -2387,14 +2387,13 @@ static void tracing_stop_tr(struct trace
+
+ static int trace_save_cmdline(struct task_struct *tsk)
+ {
+- unsigned pid, idx;
++ unsigned tpid, idx;
+
+ /* treat recording of idle task as a success */
+ if (!tsk->pid)
+ return 1;
+
+- if (unlikely(tsk->pid > PID_MAX_DEFAULT))
+- return 0;
++ tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
+
+ /*
+ * It's not the end of the world if we don't get
+@@ -2405,26 +2404,15 @@ static int trace_save_cmdline(struct tas
+ if (!arch_spin_trylock(&trace_cmdline_lock))
+ return 0;
+
+- idx = savedcmd->map_pid_to_cmdline[tsk->pid];
++ idx = savedcmd->map_pid_to_cmdline[tpid];
+ if (idx == NO_CMDLINE_MAP) {
+ idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
+
+- /*
+- * Check whether the cmdline buffer at idx has a pid
+- * mapped. We are going to overwrite that entry so we
+- * need to clear the map_pid_to_cmdline. Otherwise we
+- * would read the new comm for the old pid.
+- */
+- pid = savedcmd->map_cmdline_to_pid[idx];
+- if (pid != NO_CMDLINE_MAP)
+- savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
+-
+- savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
+- savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
+-
++ savedcmd->map_pid_to_cmdline[tpid] = idx;
+ savedcmd->cmdline_idx = idx;
+ }
+
++ savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
+ set_cmdline(idx, tsk->comm);
+
+ arch_spin_unlock(&trace_cmdline_lock);
+@@ -2435,6 +2423,7 @@ static int trace_save_cmdline(struct tas
+ static void __trace_find_cmdline(int pid, char comm[])
+ {
+ unsigned map;
++ int tpid;
+
+ if (!pid) {
+ strcpy(comm, "<idle>");
+@@ -2446,16 +2435,16 @@ static void __trace_find_cmdline(int pid
+ return;
+ }
+
+- if (pid > PID_MAX_DEFAULT) {
+- strcpy(comm, "<...>");
+- return;
++ tpid = pid & (PID_MAX_DEFAULT - 1);
++ map = savedcmd->map_pid_to_cmdline[tpid];
++ if (map != NO_CMDLINE_MAP) {
++ tpid = savedcmd->map_cmdline_to_pid[map];
++ if (tpid == pid) {
++ strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
++ return;
++ }
+ }
+-
+- map = savedcmd->map_pid_to_cmdline[pid];
+- if (map != NO_CMDLINE_MAP)
+- strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
+- else
+- strcpy(comm, "<...>");
++ strcpy(comm, "<...>");
+ }
+
+ void trace_find_cmdline(int pid, char comm[])
--- /dev/null
+From aafe104aa9096827a429bc1358f8260ee565b7cc Mon Sep 17 00:00:00 2001
+From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
+Date: Fri, 30 Apr 2021 12:17:58 -0400
+Subject: tracing: Restructure trace_clock_global() to never block
+
+From: Steven Rostedt (VMware) <rostedt@goodmis.org>
+
+commit aafe104aa9096827a429bc1358f8260ee565b7cc upstream.
+
+It was reported that a fix to the ring buffer recursion detection would
+cause a hung machine when performing suspend / resume testing. The
+following backtrace was extracted from debugging that case:
+
+Call Trace:
+ trace_clock_global+0x91/0xa0
+ __rb_reserve_next+0x237/0x460
+ ring_buffer_lock_reserve+0x12a/0x3f0
+ trace_buffer_lock_reserve+0x10/0x50
+ __trace_graph_return+0x1f/0x80
+ trace_graph_return+0xb7/0xf0
+ ? trace_clock_global+0x91/0xa0
+ ftrace_return_to_handler+0x8b/0xf0
+ ? pv_hash+0xa0/0xa0
+ return_to_handler+0x15/0x30
+ ? ftrace_graph_caller+0xa0/0xa0
+ ? trace_clock_global+0x91/0xa0
+ ? __rb_reserve_next+0x237/0x460
+ ? ring_buffer_lock_reserve+0x12a/0x3f0
+ ? trace_event_buffer_lock_reserve+0x3c/0x120
+ ? trace_event_buffer_reserve+0x6b/0xc0
+ ? trace_event_raw_event_device_pm_callback_start+0x125/0x2d0
+ ? dpm_run_callback+0x3b/0xc0
+ ? pm_ops_is_empty+0x50/0x50
+ ? platform_get_irq_byname_optional+0x90/0x90
+ ? trace_device_pm_callback_start+0x82/0xd0
+ ? dpm_run_callback+0x49/0xc0
+
+With the following RIP:
+
+RIP: 0010:native_queued_spin_lock_slowpath+0x69/0x200
+
+Since the fix to the recursion detection would allow a single recursion to
+happen while tracing, this lead to the trace_clock_global() taking a spin
+lock and then trying to take it again:
+
+ring_buffer_lock_reserve() {
+ trace_clock_global() {
+ arch_spin_lock() {
+ queued_spin_lock_slowpath() {
+ /* lock taken */
+ (something else gets traced by function graph tracer)
+ ring_buffer_lock_reserve() {
+ trace_clock_global() {
+ arch_spin_lock() {
+ queued_spin_lock_slowpath() {
+ /* DEAD LOCK! */
+
+Tracing should *never* block, as it can lead to strange lockups like the
+above.
+
+Restructure the trace_clock_global() code to instead of simply taking a
+lock to update the recorded "prev_time" simply use it, as two events
+happening on two different CPUs that calls this at the same time, really
+doesn't matter which one goes first. Use a trylock to grab the lock for
+updating the prev_time, and if it fails, simply try again the next time.
+If it failed to be taken, that means something else is already updating
+it.
+
+Link: https://lkml.kernel.org/r/20210430121758.650b6e8a@gandalf.local.home
+
+Cc: stable@vger.kernel.org
+Tested-by: Konstantin Kharlamov <hi-angel@yandex.ru>
+Tested-by: Todd Brandt <todd.e.brandt@linux.intel.com>
+Fixes: b02414c8f045 ("ring-buffer: Fix recursion protection transitions between interrupt context") # started showing the problem
+Fixes: 14131f2f98ac3 ("tracing: implement trace_clock_*() APIs") # where the bug happened
+Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=212761
+Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/trace/trace_clock.c | 48 ++++++++++++++++++++++++++++++---------------
+ 1 file changed, 32 insertions(+), 16 deletions(-)
+
+--- a/kernel/trace/trace_clock.c
++++ b/kernel/trace/trace_clock.c
+@@ -95,33 +95,49 @@ u64 notrace trace_clock_global(void)
+ {
+ unsigned long flags;
+ int this_cpu;
+- u64 now;
++ u64 now, prev_time;
+
+ raw_local_irq_save(flags);
+
+ this_cpu = raw_smp_processor_id();
+- now = sched_clock_cpu(this_cpu);
++
+ /*
+- * If in an NMI context then dont risk lockups and return the
+- * cpu_clock() time:
++ * The global clock "guarantees" that the events are ordered
++ * between CPUs. But if two events on two different CPUS call
++ * trace_clock_global at roughly the same time, it really does
++ * not matter which one gets the earlier time. Just make sure
++ * that the same CPU will always show a monotonic clock.
++ *
++ * Use a read memory barrier to get the latest written
++ * time that was recorded.
+ */
+- if (unlikely(in_nmi()))
+- goto out;
++ smp_rmb();
++ prev_time = READ_ONCE(trace_clock_struct.prev_time);
++ now = sched_clock_cpu(this_cpu);
+
+- arch_spin_lock(&trace_clock_struct.lock);
++ /* Make sure that now is always greater than prev_time */
++ if ((s64)(now - prev_time) < 0)
++ now = prev_time + 1;
+
+ /*
+- * TODO: if this happens often then maybe we should reset
+- * my_scd->clock to prev_time+1, to make sure
+- * we start ticking with the local clock from now on?
++ * If in an NMI context then dont risk lockups and simply return
++ * the current time.
+ */
+- if ((s64)(now - trace_clock_struct.prev_time) < 0)
+- now = trace_clock_struct.prev_time + 1;
+-
+- trace_clock_struct.prev_time = now;
+-
+- arch_spin_unlock(&trace_clock_struct.lock);
++ if (unlikely(in_nmi()))
++ goto out;
+
++ /* Tracing can cause strange recursion, always use a try lock */
++ if (arch_spin_trylock(&trace_clock_struct.lock)) {
++ /* Reread prev_time in case it was already updated */
++ prev_time = READ_ONCE(trace_clock_struct.prev_time);
++ if ((s64)(now - prev_time) < 0)
++ now = prev_time + 1;
++
++ trace_clock_struct.prev_time = now;
++
++ /* The unlock acts as the wmb for the above rmb */
++ arch_spin_unlock(&trace_clock_struct.lock);
++ }
+ out:
+ raw_local_irq_restore(flags);
+