From 7e753f6565195c428f1a1fe8f3bc0c46af90d09b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 10 May 2021 10:56:28 +0200 Subject: [PATCH] 5.11-stable patches added patches: dm-integrity-fix-missing-goto-in-bitmap_flush_interval-error-handling.patch dm-persistent-data-packed-struct-should-have-an-aligned-attribute-too.patch dm-rq-fix-double-free-of-blk_mq_tag_set-in-dev-remove-after-table-load-fails.patch dm-space-map-common-fix-division-bug-in-sm_ll_find_free_block.patch pinctrl-ingenic-add-support-for-read-the-pin-configuration-of-x1830.patch tools-power-turbostat-fix-offset-overflow-issue-in-index-converting.patch tracing-map-all-pids-to-command-lines.patch tracing-restructure-trace_clock_global-to-never-block.patch --- ...bitmap_flush_interval-error-handling.patch | 28 ++++ ...should-have-an-aligned-attribute-too.patch | 76 +++++++++ ...in-dev-remove-after-table-load-fails.patch | 91 +++++++++++ ...ivision-bug-in-sm_ll_find_free_block.patch | 33 ++++ ...-read-the-pin-configuration-of-x1830.patch | 96 +++++++++++ queue-5.11/series | 8 + ...t-overflow-issue-in-index-converting.patch | 66 ++++++++ ...racing-map-all-pids-to-command-lines.patch | 123 ++++++++++++++ ...re-trace_clock_global-to-never-block.patch | 150 ++++++++++++++++++ 9 files changed, 671 insertions(+) create mode 100644 queue-5.11/dm-integrity-fix-missing-goto-in-bitmap_flush_interval-error-handling.patch create mode 100644 queue-5.11/dm-persistent-data-packed-struct-should-have-an-aligned-attribute-too.patch create mode 100644 queue-5.11/dm-rq-fix-double-free-of-blk_mq_tag_set-in-dev-remove-after-table-load-fails.patch create mode 100644 queue-5.11/dm-space-map-common-fix-division-bug-in-sm_ll_find_free_block.patch create mode 100644 queue-5.11/pinctrl-ingenic-add-support-for-read-the-pin-configuration-of-x1830.patch create mode 100644 queue-5.11/tools-power-turbostat-fix-offset-overflow-issue-in-index-converting.patch create mode 100644 queue-5.11/tracing-map-all-pids-to-command-lines.patch create mode 100644 queue-5.11/tracing-restructure-trace_clock_global-to-never-block.patch diff --git a/queue-5.11/dm-integrity-fix-missing-goto-in-bitmap_flush_interval-error-handling.patch b/queue-5.11/dm-integrity-fix-missing-goto-in-bitmap_flush_interval-error-handling.patch new file mode 100644 index 00000000000..97ce03f28d6 --- /dev/null +++ b/queue-5.11/dm-integrity-fix-missing-goto-in-bitmap_flush_interval-error-handling.patch @@ -0,0 +1,28 @@ +From 17e9e134a8efabbbf689a0904eee92bb5a868172 Mon Sep 17 00:00:00 2001 +From: Tian Tao +Date: Wed, 14 Apr 2021 09:43:44 +0800 +Subject: dm integrity: fix missing goto in bitmap_flush_interval error handling + +From: Tian Tao + +commit 17e9e134a8efabbbf689a0904eee92bb5a868172 upstream. + +Fixes: 468dfca38b1a ("dm integrity: add a bitmap mode") +Cc: stable@vger.kernel.org +Signed-off-by: Tian Tao +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/dm-integrity.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/md/dm-integrity.c ++++ b/drivers/md/dm-integrity.c +@@ -3929,6 +3929,7 @@ static int dm_integrity_ctr(struct dm_ta + if (val >= (uint64_t)UINT_MAX * 1000 / HZ) { + r = -EINVAL; + ti->error = "Invalid bitmap_flush_interval argument"; ++ goto bad; + } + ic->bitmap_flush_interval = msecs_to_jiffies(val); + } else if (!strncmp(opt_string, "internal_hash:", strlen("internal_hash:"))) { diff --git a/queue-5.11/dm-persistent-data-packed-struct-should-have-an-aligned-attribute-too.patch b/queue-5.11/dm-persistent-data-packed-struct-should-have-an-aligned-attribute-too.patch new file mode 100644 index 00000000000..d76f912a10f --- /dev/null +++ b/queue-5.11/dm-persistent-data-packed-struct-should-have-an-aligned-attribute-too.patch @@ -0,0 +1,76 @@ +From a88b2358f1da2c9f9fcc432f2e0a79617fea397c Mon Sep 17 00:00:00 2001 +From: Joe Thornber +Date: Mon, 29 Mar 2021 16:34:57 +0100 +Subject: dm persistent data: packed struct should have an aligned() attribute too + +From: Joe Thornber + +commit a88b2358f1da2c9f9fcc432f2e0a79617fea397c upstream. + +Otherwise most non-x86 architectures (e.g. riscv, arm) will resort to +byte-by-byte access. + +Cc: stable@vger.kernel.org +Signed-off-by: Joe Thornber +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/persistent-data/dm-btree-internal.h | 4 ++-- + drivers/md/persistent-data/dm-space-map-common.h | 8 ++++---- + 2 files changed, 6 insertions(+), 6 deletions(-) + +--- a/drivers/md/persistent-data/dm-btree-internal.h ++++ b/drivers/md/persistent-data/dm-btree-internal.h +@@ -34,12 +34,12 @@ struct node_header { + __le32 max_entries; + __le32 value_size; + __le32 padding; +-} __packed; ++} __attribute__((packed, aligned(8))); + + struct btree_node { + struct node_header header; + __le64 keys[]; +-} __packed; ++} __attribute__((packed, aligned(8))); + + + /* +--- a/drivers/md/persistent-data/dm-space-map-common.h ++++ b/drivers/md/persistent-data/dm-space-map-common.h +@@ -33,7 +33,7 @@ struct disk_index_entry { + __le64 blocknr; + __le32 nr_free; + __le32 none_free_before; +-} __packed; ++} __attribute__ ((packed, aligned(8))); + + + #define MAX_METADATA_BITMAPS 255 +@@ -43,7 +43,7 @@ struct disk_metadata_index { + __le64 blocknr; + + struct disk_index_entry index[MAX_METADATA_BITMAPS]; +-} __packed; ++} __attribute__ ((packed, aligned(8))); + + struct ll_disk; + +@@ -86,7 +86,7 @@ struct disk_sm_root { + __le64 nr_allocated; + __le64 bitmap_root; + __le64 ref_count_root; +-} __packed; ++} __attribute__ ((packed, aligned(8))); + + #define ENTRIES_PER_BYTE 4 + +@@ -94,7 +94,7 @@ struct disk_bitmap_header { + __le32 csum; + __le32 not_used; + __le64 blocknr; +-} __packed; ++} __attribute__ ((packed, aligned(8))); + + enum allocation_event { + SM_NONE, diff --git a/queue-5.11/dm-rq-fix-double-free-of-blk_mq_tag_set-in-dev-remove-after-table-load-fails.patch b/queue-5.11/dm-rq-fix-double-free-of-blk_mq_tag_set-in-dev-remove-after-table-load-fails.patch new file mode 100644 index 00000000000..a8fdc92e13e --- /dev/null +++ b/queue-5.11/dm-rq-fix-double-free-of-blk_mq_tag_set-in-dev-remove-after-table-load-fails.patch @@ -0,0 +1,91 @@ +From 8e947c8f4a5620df77e43c9c75310dc510250166 Mon Sep 17 00:00:00 2001 +From: Benjamin Block +Date: Thu, 29 Apr 2021 23:37:00 +0200 +Subject: dm rq: fix double free of blk_mq_tag_set in dev remove after table load fails + +From: Benjamin Block + +commit 8e947c8f4a5620df77e43c9c75310dc510250166 upstream. + +When loading a device-mapper table for a request-based mapped device, +and the allocation/initialization of the blk_mq_tag_set for the device +fails, a following device remove will cause a double free. + +E.g. (dmesg): + device-mapper: core: Cannot initialize queue for request-based dm-mq mapped device + device-mapper: ioctl: unable to set up device queue for new table. + Unable to handle kernel pointer dereference in virtual kernel address space + Failing address: 0305e098835de000 TEID: 0305e098835de803 + Fault in home space mode while using kernel ASCE. + AS:000000025efe0007 R3:0000000000000024 + Oops: 0038 ilc:3 [#1] SMP + Modules linked in: ... lots of modules ... + Supported: Yes, External + CPU: 0 PID: 7348 Comm: multipathd Kdump: loaded Tainted: G W X 5.3.18-53-default #1 SLE15-SP3 + Hardware name: IBM 8561 T01 7I2 (LPAR) + Krnl PSW : 0704e00180000000 000000025e368eca (kfree+0x42/0x330) + R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:2 PM:0 RI:0 EA:3 + Krnl GPRS: 000000000000004a 000000025efe5230 c1773200d779968d 0000000000000000 + 000000025e520270 000000025e8d1b40 0000000000000003 00000007aae10000 + 000000025e5202a2 0000000000000001 c1773200d779968d 0305e098835de640 + 00000007a8170000 000003ff80138650 000000025e5202a2 000003e00396faa8 + Krnl Code: 000000025e368eb8: c4180041e100 lgrl %r1,25eba50b8 + 000000025e368ebe: ecba06b93a55 risbg %r11,%r10,6,185,58 + #000000025e368ec4: e3b010000008 ag %r11,0(%r1) + >000000025e368eca: e310b0080004 lg %r1,8(%r11) + 000000025e368ed0: a7110001 tmll %r1,1 + 000000025e368ed4: a7740129 brc 7,25e369126 + 000000025e368ed8: e320b0080004 lg %r2,8(%r11) + 000000025e368ede: b904001b lgr %r1,%r11 + Call Trace: + [<000000025e368eca>] kfree+0x42/0x330 + [<000000025e5202a2>] blk_mq_free_tag_set+0x72/0xb8 + [<000003ff801316a8>] dm_mq_cleanup_mapped_device+0x38/0x50 [dm_mod] + [<000003ff80120082>] free_dev+0x52/0xd0 [dm_mod] + [<000003ff801233f0>] __dm_destroy+0x150/0x1d0 [dm_mod] + [<000003ff8012bb9a>] dev_remove+0x162/0x1c0 [dm_mod] + [<000003ff8012a988>] ctl_ioctl+0x198/0x478 [dm_mod] + [<000003ff8012ac8a>] dm_ctl_ioctl+0x22/0x38 [dm_mod] + [<000000025e3b11ee>] ksys_ioctl+0xbe/0xe0 + [<000000025e3b127a>] __s390x_sys_ioctl+0x2a/0x40 + [<000000025e8c15ac>] system_call+0xd8/0x2c8 + Last Breaking-Event-Address: + [<000000025e52029c>] blk_mq_free_tag_set+0x6c/0xb8 + Kernel panic - not syncing: Fatal exception: panic_on_oops + +When allocation/initialization of the blk_mq_tag_set fails in +dm_mq_init_request_queue(), it is uninitialized/freed, but the pointer +is not reset to NULL; so when dev_remove() later gets into +dm_mq_cleanup_mapped_device() it sees the pointer and tries to +uninitialize and free it again. + +Fix this by setting the pointer to NULL in dm_mq_init_request_queue() +error-handling. Also set it to NULL in dm_mq_cleanup_mapped_device(). + +Cc: # 4.6+ +Fixes: 1c357a1e86a4 ("dm: allocate blk_mq_tag_set rather than embed in mapped_device") +Signed-off-by: Benjamin Block +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/dm-rq.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/md/dm-rq.c ++++ b/drivers/md/dm-rq.c +@@ -569,6 +569,7 @@ out_tag_set: + blk_mq_free_tag_set(md->tag_set); + out_kfree_tag_set: + kfree(md->tag_set); ++ md->tag_set = NULL; + + return err; + } +@@ -578,6 +579,7 @@ void dm_mq_cleanup_mapped_device(struct + if (md->tag_set) { + blk_mq_free_tag_set(md->tag_set); + kfree(md->tag_set); ++ md->tag_set = NULL; + } + } + diff --git a/queue-5.11/dm-space-map-common-fix-division-bug-in-sm_ll_find_free_block.patch b/queue-5.11/dm-space-map-common-fix-division-bug-in-sm_ll_find_free_block.patch new file mode 100644 index 00000000000..9d9d9c1a186 --- /dev/null +++ b/queue-5.11/dm-space-map-common-fix-division-bug-in-sm_ll_find_free_block.patch @@ -0,0 +1,33 @@ +From 5208692e80a1f3c8ce2063a22b675dd5589d1d80 Mon Sep 17 00:00:00 2001 +From: Joe Thornber +Date: Tue, 13 Apr 2021 09:11:53 +0100 +Subject: dm space map common: fix division bug in sm_ll_find_free_block() + +From: Joe Thornber + +commit 5208692e80a1f3c8ce2063a22b675dd5589d1d80 upstream. + +This division bug meant the search for free metadata space could skip +the final allocation bitmap's worth of entries. Fix affects DM thinp, +cache and era targets. + +Cc: stable@vger.kernel.org +Signed-off-by: Joe Thornber +Tested-by: Ming-Hung Tsai +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/persistent-data/dm-space-map-common.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/md/persistent-data/dm-space-map-common.c ++++ b/drivers/md/persistent-data/dm-space-map-common.c +@@ -339,6 +339,8 @@ int sm_ll_find_free_block(struct ll_disk + */ + begin = do_div(index_begin, ll->entries_per_block); + end = do_div(end, ll->entries_per_block); ++ if (end == 0) ++ end = ll->entries_per_block; + + for (i = index_begin; i < index_end; i++, begin = 0) { + struct dm_block *blk; diff --git a/queue-5.11/pinctrl-ingenic-add-support-for-read-the-pin-configuration-of-x1830.patch b/queue-5.11/pinctrl-ingenic-add-support-for-read-the-pin-configuration-of-x1830.patch new file mode 100644 index 00000000000..77e2615f8be --- /dev/null +++ b/queue-5.11/pinctrl-ingenic-add-support-for-read-the-pin-configuration-of-x1830.patch @@ -0,0 +1,96 @@ +From 1d0bd580ef83b78a10c0b37f3313eaa59d8c80db Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?=E5=91=A8=E7=90=B0=E6=9D=B0=20=28Zhou=20Yanjie=29?= + +Date: Sun, 18 Apr 2021 22:44:23 +0800 +Subject: pinctrl: Ingenic: Add support for read the pin configuration of X1830. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: 周琰杰 (Zhou Yanjie) + +commit 1d0bd580ef83b78a10c0b37f3313eaa59d8c80db upstream. + +Add X1830 support in "ingenic_pinconf_get()", so that it can read the +configuration of X1830 SoC correctly. + +Fixes: d7da2a1e4e08 ("pinctrl: Ingenic: Add pinctrl driver for X1830.") +Cc: +Signed-off-by: 周琰杰 (Zhou Yanjie) +Reviewed-by: Andy Shevchenko +Reviewed-by: Paul Cercueil +Link: https://lore.kernel.org/r/1618757073-1724-3-git-send-email-zhouyanjie@wanyeetech.com +Signed-off-by: Linus Walleij +Signed-off-by: Greg Kroah-Hartman +--- + drivers/pinctrl/pinctrl-ingenic.c | 40 +++++++++++++++++++++++++++++--------- + 1 file changed, 31 insertions(+), 9 deletions(-) + +--- a/drivers/pinctrl/pinctrl-ingenic.c ++++ b/drivers/pinctrl/pinctrl-ingenic.c +@@ -2089,26 +2089,48 @@ static int ingenic_pinconf_get(struct pi + enum pin_config_param param = pinconf_to_config_param(*config); + unsigned int idx = pin % PINS_PER_GPIO_CHIP; + unsigned int offt = pin / PINS_PER_GPIO_CHIP; +- bool pull; ++ unsigned int bias; ++ bool pull, pullup, pulldown; + +- if (jzpc->info->version >= ID_JZ4770) +- pull = !ingenic_get_pin_config(jzpc, pin, JZ4770_GPIO_PEN); +- else +- pull = !ingenic_get_pin_config(jzpc, pin, JZ4740_GPIO_PULL_DIS); ++ if (jzpc->info->version >= ID_X1830) { ++ unsigned int half = PINS_PER_GPIO_CHIP / 2; ++ unsigned int idxh = (pin % half) * 2; ++ ++ if (idx < half) ++ regmap_read(jzpc->map, offt * jzpc->info->reg_offset + ++ X1830_GPIO_PEL, &bias); ++ else ++ regmap_read(jzpc->map, offt * jzpc->info->reg_offset + ++ X1830_GPIO_PEH, &bias); ++ ++ bias = (bias >> idxh) & (GPIO_PULL_UP | GPIO_PULL_DOWN); ++ ++ pullup = (bias == GPIO_PULL_UP) && (jzpc->info->pull_ups[offt] & BIT(idx)); ++ pulldown = (bias == GPIO_PULL_DOWN) && (jzpc->info->pull_downs[offt] & BIT(idx)); ++ ++ } else { ++ if (jzpc->info->version >= ID_JZ4770) ++ pull = !ingenic_get_pin_config(jzpc, pin, JZ4770_GPIO_PEN); ++ else ++ pull = !ingenic_get_pin_config(jzpc, pin, JZ4740_GPIO_PULL_DIS); ++ ++ pullup = pull && (jzpc->info->pull_ups[offt] & BIT(idx)); ++ pulldown = pull && (jzpc->info->pull_downs[offt] & BIT(idx)); ++ } + + switch (param) { + case PIN_CONFIG_BIAS_DISABLE: +- if (pull) ++ if (pullup || pulldown) + return -EINVAL; + break; + + case PIN_CONFIG_BIAS_PULL_UP: +- if (!pull || !(jzpc->info->pull_ups[offt] & BIT(idx))) ++ if (!pullup) + return -EINVAL; + break; + + case PIN_CONFIG_BIAS_PULL_DOWN: +- if (!pull || !(jzpc->info->pull_downs[offt] & BIT(idx))) ++ if (!pulldown) + return -EINVAL; + break; + +@@ -2126,7 +2148,7 @@ static void ingenic_set_bias(struct inge + if (jzpc->info->version >= ID_X1830) { + unsigned int idx = pin % PINS_PER_GPIO_CHIP; + unsigned int half = PINS_PER_GPIO_CHIP / 2; +- unsigned int idxh = pin % half * 2; ++ unsigned int idxh = (pin % half) * 2; + unsigned int offt = pin / PINS_PER_GPIO_CHIP; + + if (idx < half) { diff --git a/queue-5.11/series b/queue-5.11/series index 2d27448297e..b946a2d797c 100644 --- a/queue-5.11/series +++ b/queue-5.11/series @@ -329,3 +329,11 @@ usb-dwc2-fix-session-request-interrupt-handler.patch pci-dwc-move-iatu-detection-earlier.patch tty-fix-memory-leak-in-vc_deallocate.patch rsi-use-resume_noirq-for-sdio.patch +tools-power-turbostat-fix-offset-overflow-issue-in-index-converting.patch +tracing-map-all-pids-to-command-lines.patch +tracing-restructure-trace_clock_global-to-never-block.patch +dm-persistent-data-packed-struct-should-have-an-aligned-attribute-too.patch +dm-space-map-common-fix-division-bug-in-sm_ll_find_free_block.patch +dm-integrity-fix-missing-goto-in-bitmap_flush_interval-error-handling.patch +dm-rq-fix-double-free-of-blk_mq_tag_set-in-dev-remove-after-table-load-fails.patch +pinctrl-ingenic-add-support-for-read-the-pin-configuration-of-x1830.patch diff --git a/queue-5.11/tools-power-turbostat-fix-offset-overflow-issue-in-index-converting.patch b/queue-5.11/tools-power-turbostat-fix-offset-overflow-issue-in-index-converting.patch new file mode 100644 index 00000000000..655f7a8604b --- /dev/null +++ b/queue-5.11/tools-power-turbostat-fix-offset-overflow-issue-in-index-converting.patch @@ -0,0 +1,66 @@ +From 13a779de4175df602366d129e41782ad7168cef0 Mon Sep 17 00:00:00 2001 +From: Calvin Walton +Date: Wed, 28 Apr 2021 17:09:16 +0800 +Subject: tools/power turbostat: Fix offset overflow issue in index converting + +From: Calvin Walton + +commit 13a779de4175df602366d129e41782ad7168cef0 upstream. + +The idx_to_offset() function returns type int (32-bit signed), but +MSR_PKG_ENERGY_STAT is u32 and would be interpreted as a negative number. +The end result is that it hits the if (offset < 0) check in update_msr_sum() +which prevents the timer callback from updating the stat in the background when +long durations are used. The similar issue exists in offset_to_idx() and +update_msr_sum(). Fix this issue by converting the 'int' to 'off_t' accordingly. + +Fixes: 9972d5d84d76 ("tools/power turbostat: Enable accumulate RAPL display") +Signed-off-by: Calvin Walton +Signed-off-by: Len Brown +Signed-off-by: Greg Kroah-Hartman +--- + tools/power/x86/turbostat/turbostat.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +--- a/tools/power/x86/turbostat/turbostat.c ++++ b/tools/power/x86/turbostat/turbostat.c +@@ -291,9 +291,9 @@ struct msr_sum_array { + /* The percpu MSR sum array.*/ + struct msr_sum_array *per_cpu_msr_sum; + +-int idx_to_offset(int idx) ++off_t idx_to_offset(int idx) + { +- int offset; ++ off_t offset; + + switch (idx) { + case IDX_PKG_ENERGY: +@@ -323,7 +323,7 @@ int idx_to_offset(int idx) + return offset; + } + +-int offset_to_idx(int offset) ++int offset_to_idx(off_t offset) + { + int idx; + +@@ -3276,7 +3276,7 @@ static int update_msr_sum(struct thread_ + + for (i = IDX_PKG_ENERGY; i < IDX_COUNT; i++) { + unsigned long long msr_cur, msr_last; +- int offset; ++ off_t offset; + + if (!idx_valid(i)) + continue; +@@ -3285,7 +3285,8 @@ static int update_msr_sum(struct thread_ + continue; + ret = get_msr(cpu, offset, &msr_cur); + if (ret) { +- fprintf(outf, "Can not update msr(0x%x)\n", offset); ++ fprintf(outf, "Can not update msr(0x%llx)\n", ++ (unsigned long long)offset); + continue; + } + diff --git a/queue-5.11/tracing-map-all-pids-to-command-lines.patch b/queue-5.11/tracing-map-all-pids-to-command-lines.patch new file mode 100644 index 00000000000..b59529dc4cc --- /dev/null +++ b/queue-5.11/tracing-map-all-pids-to-command-lines.patch @@ -0,0 +1,123 @@ +From 785e3c0a3a870e72dc530856136ab4c8dd207128 Mon Sep 17 00:00:00 2001 +From: "Steven Rostedt (VMware)" +Date: Tue, 27 Apr 2021 11:32:07 -0400 +Subject: tracing: Map all PIDs to command lines + +From: Steven Rostedt (VMware) + +commit 785e3c0a3a870e72dc530856136ab4c8dd207128 upstream. + +The default max PID is set by PID_MAX_DEFAULT, and the tracing +infrastructure uses this number to map PIDs to the comm names of the +tasks, such output of the trace can show names from the recorded PIDs in +the ring buffer. This mapping is also exported to user space via the +"saved_cmdlines" file in the tracefs directory. + +But currently the mapping expects the PIDs to be less than +PID_MAX_DEFAULT, which is the default maximum and not the real maximum. +Recently, systemd will increases the maximum value of a PID on the system, +and when tasks are traced that have a PID higher than PID_MAX_DEFAULT, its +comm is not recorded. This leads to the entire trace to have "<...>" as +the comm name, which is pretty useless. + +Instead, keep the array mapping the size of PID_MAX_DEFAULT, but instead +of just mapping the index to the comm, map a mask of the PID +(PID_MAX_DEFAULT - 1) to the comm, and find the full PID from the +map_cmdline_to_pid array (that already exists). + +This bug goes back to the beginning of ftrace, but hasn't been an issue +until user space started increasing the maximum value of PIDs. + +Link: https://lkml.kernel.org/r/20210427113207.3c601884@gandalf.local.home + +Cc: stable@vger.kernel.org +Fixes: bc0c38d139ec7 ("ftrace: latency tracer infrastructure") +Signed-off-by: Steven Rostedt (VMware) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/trace/trace.c | 41 +++++++++++++++-------------------------- + 1 file changed, 15 insertions(+), 26 deletions(-) + +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -2387,14 +2387,13 @@ static void tracing_stop_tr(struct trace + + static int trace_save_cmdline(struct task_struct *tsk) + { +- unsigned pid, idx; ++ unsigned tpid, idx; + + /* treat recording of idle task as a success */ + if (!tsk->pid) + return 1; + +- if (unlikely(tsk->pid > PID_MAX_DEFAULT)) +- return 0; ++ tpid = tsk->pid & (PID_MAX_DEFAULT - 1); + + /* + * It's not the end of the world if we don't get +@@ -2405,26 +2404,15 @@ static int trace_save_cmdline(struct tas + if (!arch_spin_trylock(&trace_cmdline_lock)) + return 0; + +- idx = savedcmd->map_pid_to_cmdline[tsk->pid]; ++ idx = savedcmd->map_pid_to_cmdline[tpid]; + if (idx == NO_CMDLINE_MAP) { + idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num; + +- /* +- * Check whether the cmdline buffer at idx has a pid +- * mapped. We are going to overwrite that entry so we +- * need to clear the map_pid_to_cmdline. Otherwise we +- * would read the new comm for the old pid. +- */ +- pid = savedcmd->map_cmdline_to_pid[idx]; +- if (pid != NO_CMDLINE_MAP) +- savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP; +- +- savedcmd->map_cmdline_to_pid[idx] = tsk->pid; +- savedcmd->map_pid_to_cmdline[tsk->pid] = idx; +- ++ savedcmd->map_pid_to_cmdline[tpid] = idx; + savedcmd->cmdline_idx = idx; + } + ++ savedcmd->map_cmdline_to_pid[idx] = tsk->pid; + set_cmdline(idx, tsk->comm); + + arch_spin_unlock(&trace_cmdline_lock); +@@ -2435,6 +2423,7 @@ static int trace_save_cmdline(struct tas + static void __trace_find_cmdline(int pid, char comm[]) + { + unsigned map; ++ int tpid; + + if (!pid) { + strcpy(comm, ""); +@@ -2446,16 +2435,16 @@ static void __trace_find_cmdline(int pid + return; + } + +- if (pid > PID_MAX_DEFAULT) { +- strcpy(comm, "<...>"); +- return; ++ tpid = pid & (PID_MAX_DEFAULT - 1); ++ map = savedcmd->map_pid_to_cmdline[tpid]; ++ if (map != NO_CMDLINE_MAP) { ++ tpid = savedcmd->map_cmdline_to_pid[map]; ++ if (tpid == pid) { ++ strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN); ++ return; ++ } + } +- +- map = savedcmd->map_pid_to_cmdline[pid]; +- if (map != NO_CMDLINE_MAP) +- strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN); +- else +- strcpy(comm, "<...>"); ++ strcpy(comm, "<...>"); + } + + void trace_find_cmdline(int pid, char comm[]) diff --git a/queue-5.11/tracing-restructure-trace_clock_global-to-never-block.patch b/queue-5.11/tracing-restructure-trace_clock_global-to-never-block.patch new file mode 100644 index 00000000000..a0d0d4048ba --- /dev/null +++ b/queue-5.11/tracing-restructure-trace_clock_global-to-never-block.patch @@ -0,0 +1,150 @@ +From aafe104aa9096827a429bc1358f8260ee565b7cc Mon Sep 17 00:00:00 2001 +From: "Steven Rostedt (VMware)" +Date: Fri, 30 Apr 2021 12:17:58 -0400 +Subject: tracing: Restructure trace_clock_global() to never block + +From: Steven Rostedt (VMware) + +commit aafe104aa9096827a429bc1358f8260ee565b7cc upstream. + +It was reported that a fix to the ring buffer recursion detection would +cause a hung machine when performing suspend / resume testing. The +following backtrace was extracted from debugging that case: + +Call Trace: + trace_clock_global+0x91/0xa0 + __rb_reserve_next+0x237/0x460 + ring_buffer_lock_reserve+0x12a/0x3f0 + trace_buffer_lock_reserve+0x10/0x50 + __trace_graph_return+0x1f/0x80 + trace_graph_return+0xb7/0xf0 + ? trace_clock_global+0x91/0xa0 + ftrace_return_to_handler+0x8b/0xf0 + ? pv_hash+0xa0/0xa0 + return_to_handler+0x15/0x30 + ? ftrace_graph_caller+0xa0/0xa0 + ? trace_clock_global+0x91/0xa0 + ? __rb_reserve_next+0x237/0x460 + ? ring_buffer_lock_reserve+0x12a/0x3f0 + ? trace_event_buffer_lock_reserve+0x3c/0x120 + ? trace_event_buffer_reserve+0x6b/0xc0 + ? trace_event_raw_event_device_pm_callback_start+0x125/0x2d0 + ? dpm_run_callback+0x3b/0xc0 + ? pm_ops_is_empty+0x50/0x50 + ? platform_get_irq_byname_optional+0x90/0x90 + ? trace_device_pm_callback_start+0x82/0xd0 + ? dpm_run_callback+0x49/0xc0 + +With the following RIP: + +RIP: 0010:native_queued_spin_lock_slowpath+0x69/0x200 + +Since the fix to the recursion detection would allow a single recursion to +happen while tracing, this lead to the trace_clock_global() taking a spin +lock and then trying to take it again: + +ring_buffer_lock_reserve() { + trace_clock_global() { + arch_spin_lock() { + queued_spin_lock_slowpath() { + /* lock taken */ + (something else gets traced by function graph tracer) + ring_buffer_lock_reserve() { + trace_clock_global() { + arch_spin_lock() { + queued_spin_lock_slowpath() { + /* DEAD LOCK! */ + +Tracing should *never* block, as it can lead to strange lockups like the +above. + +Restructure the trace_clock_global() code to instead of simply taking a +lock to update the recorded "prev_time" simply use it, as two events +happening on two different CPUs that calls this at the same time, really +doesn't matter which one goes first. Use a trylock to grab the lock for +updating the prev_time, and if it fails, simply try again the next time. +If it failed to be taken, that means something else is already updating +it. + +Link: https://lkml.kernel.org/r/20210430121758.650b6e8a@gandalf.local.home + +Cc: stable@vger.kernel.org +Tested-by: Konstantin Kharlamov +Tested-by: Todd Brandt +Fixes: b02414c8f045 ("ring-buffer: Fix recursion protection transitions between interrupt context") # started showing the problem +Fixes: 14131f2f98ac3 ("tracing: implement trace_clock_*() APIs") # where the bug happened +Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=212761 +Signed-off-by: Steven Rostedt (VMware) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/trace/trace_clock.c | 48 ++++++++++++++++++++++++++++++--------------- + 1 file changed, 32 insertions(+), 16 deletions(-) + +--- a/kernel/trace/trace_clock.c ++++ b/kernel/trace/trace_clock.c +@@ -95,33 +95,49 @@ u64 notrace trace_clock_global(void) + { + unsigned long flags; + int this_cpu; +- u64 now; ++ u64 now, prev_time; + + raw_local_irq_save(flags); + + this_cpu = raw_smp_processor_id(); +- now = sched_clock_cpu(this_cpu); ++ + /* +- * If in an NMI context then dont risk lockups and return the +- * cpu_clock() time: ++ * The global clock "guarantees" that the events are ordered ++ * between CPUs. But if two events on two different CPUS call ++ * trace_clock_global at roughly the same time, it really does ++ * not matter which one gets the earlier time. Just make sure ++ * that the same CPU will always show a monotonic clock. ++ * ++ * Use a read memory barrier to get the latest written ++ * time that was recorded. + */ +- if (unlikely(in_nmi())) +- goto out; ++ smp_rmb(); ++ prev_time = READ_ONCE(trace_clock_struct.prev_time); ++ now = sched_clock_cpu(this_cpu); + +- arch_spin_lock(&trace_clock_struct.lock); ++ /* Make sure that now is always greater than prev_time */ ++ if ((s64)(now - prev_time) < 0) ++ now = prev_time + 1; + + /* +- * TODO: if this happens often then maybe we should reset +- * my_scd->clock to prev_time+1, to make sure +- * we start ticking with the local clock from now on? ++ * If in an NMI context then dont risk lockups and simply return ++ * the current time. + */ +- if ((s64)(now - trace_clock_struct.prev_time) < 0) +- now = trace_clock_struct.prev_time + 1; +- +- trace_clock_struct.prev_time = now; +- +- arch_spin_unlock(&trace_clock_struct.lock); ++ if (unlikely(in_nmi())) ++ goto out; + ++ /* Tracing can cause strange recursion, always use a try lock */ ++ if (arch_spin_trylock(&trace_clock_struct.lock)) { ++ /* Reread prev_time in case it was already updated */ ++ prev_time = READ_ONCE(trace_clock_struct.prev_time); ++ if ((s64)(now - prev_time) < 0) ++ now = prev_time + 1; ++ ++ trace_clock_struct.prev_time = now; ++ ++ /* The unlock acts as the wmb for the above rmb */ ++ arch_spin_unlock(&trace_clock_struct.lock); ++ } + out: + raw_local_irq_restore(flags); + -- 2.47.3