]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.15-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 22 Jul 2025 10:03:29 +0000 (12:03 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 22 Jul 2025 10:03:29 +0000 (12:03 +0200)
added patches:
bpf-sockmap-fix-panic-when-calling-skb_linearize.patch
mm-vmalloc-leave-lazy-mmu-mode-on-pte-mapping-error.patch
platform-x86-think-lmi-fix-kobject-cleanup.patch
powercap-intel_rapl-do-not-change-clamping-bit-if-enable-bit-cannot-be-changed.patch
sched-add-wrapper-for-get_wchan-to-keep-task-blocked.patch
x86-fix-__get_wchan-for-stacktrace.patch
x86-fix-get_wchan-to-support-the-orc-unwinder.patch
x86-pin-task-stack-in-__get_wchan.patch

queue-5.15/bpf-sockmap-fix-panic-when-calling-skb_linearize.patch [new file with mode: 0644]
queue-5.15/mm-vmalloc-leave-lazy-mmu-mode-on-pte-mapping-error.patch [new file with mode: 0644]
queue-5.15/platform-x86-think-lmi-fix-kobject-cleanup.patch [new file with mode: 0644]
queue-5.15/powercap-intel_rapl-do-not-change-clamping-bit-if-enable-bit-cannot-be-changed.patch [new file with mode: 0644]
queue-5.15/sched-add-wrapper-for-get_wchan-to-keep-task-blocked.patch [new file with mode: 0644]
queue-5.15/series
queue-5.15/x86-fix-__get_wchan-for-stacktrace.patch [new file with mode: 0644]
queue-5.15/x86-fix-get_wchan-to-support-the-orc-unwinder.patch [new file with mode: 0644]
queue-5.15/x86-pin-task-stack-in-__get_wchan.patch [new file with mode: 0644]

diff --git a/queue-5.15/bpf-sockmap-fix-panic-when-calling-skb_linearize.patch b/queue-5.15/bpf-sockmap-fix-panic-when-calling-skb_linearize.patch
new file mode 100644 (file)
index 0000000..c5d489c
--- /dev/null
@@ -0,0 +1,208 @@
+From 5ca2e29f6834c64c0e5a9ccf1278c21fb49b827e Mon Sep 17 00:00:00 2001
+From: Jiayuan Chen <jiayuan.chen@linux.dev>
+Date: Mon, 7 Apr 2025 22:21:22 +0800
+Subject: bpf, sockmap: Fix panic when calling skb_linearize
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Jiayuan Chen <jiayuan.chen@linux.dev>
+
+commit 5ca2e29f6834c64c0e5a9ccf1278c21fb49b827e upstream.
+
+The panic can be reproduced by executing the command:
+./bench sockmap -c 2 -p 1 -a --rx-verdict-ingress --rx-strp 100000
+
+Then a kernel panic was captured:
+'''
+[  657.460555] kernel BUG at net/core/skbuff.c:2178!
+[  657.462680] Tainted: [W]=WARN
+[  657.463287] Workqueue: events sk_psock_backlog
+...
+[  657.469610]  <TASK>
+[  657.469738]  ? die+0x36/0x90
+[  657.469916]  ? do_trap+0x1d0/0x270
+[  657.470118]  ? pskb_expand_head+0x612/0xf40
+[  657.470376]  ? pskb_expand_head+0x612/0xf40
+[  657.470620]  ? do_error_trap+0xa3/0x170
+[  657.470846]  ? pskb_expand_head+0x612/0xf40
+[  657.471092]  ? handle_invalid_op+0x2c/0x40
+[  657.471335]  ? pskb_expand_head+0x612/0xf40
+[  657.471579]  ? exc_invalid_op+0x2d/0x40
+[  657.471805]  ? asm_exc_invalid_op+0x1a/0x20
+[  657.472052]  ? pskb_expand_head+0xd1/0xf40
+[  657.472292]  ? pskb_expand_head+0x612/0xf40
+[  657.472540]  ? lock_acquire+0x18f/0x4e0
+[  657.472766]  ? find_held_lock+0x2d/0x110
+[  657.472999]  ? __pfx_pskb_expand_head+0x10/0x10
+[  657.473263]  ? __kmalloc_cache_noprof+0x5b/0x470
+[  657.473537]  ? __pfx___lock_release.isra.0+0x10/0x10
+[  657.473826]  __pskb_pull_tail+0xfd/0x1d20
+[  657.474062]  ? __kasan_slab_alloc+0x4e/0x90
+[  657.474707]  sk_psock_skb_ingress_enqueue+0x3bf/0x510
+[  657.475392]  ? __kasan_kmalloc+0xaa/0xb0
+[  657.476010]  sk_psock_backlog+0x5cf/0xd70
+[  657.476637]  process_one_work+0x858/0x1a20
+'''
+
+The panic originates from the assertion BUG_ON(skb_shared(skb)) in
+skb_linearize(). A previous commit(see Fixes tag) introduced skb_get()
+to avoid race conditions between skb operations in the backlog and skb
+release in the recvmsg path. However, this caused the panic to always
+occur when skb_linearize is executed.
+
+The "--rx-strp 100000" parameter forces the RX path to use the strparser
+module which aggregates data until it reaches 100KB before calling sockmap
+logic. The 100KB payload exceeds MAX_MSG_FRAGS, triggering skb_linearize.
+
+To fix this issue, just move skb_get into sk_psock_skb_ingress_enqueue.
+
+'''
+sk_psock_backlog:
+    sk_psock_handle_skb
+       skb_get(skb) <== we move it into 'sk_psock_skb_ingress_enqueue'
+       sk_psock_skb_ingress____________
+                                       ↓
+                                       |
+                                       | → sk_psock_skb_ingress_self
+                                       |      sk_psock_skb_ingress_enqueue
+sk_psock_verdict_apply_________________↑          skb_linearize
+'''
+
+Note that for verdict_apply path, the skb_get operation is unnecessary so
+we add 'take_ref' param to control it's behavior.
+
+Fixes: a454d84ee20b ("bpf, sockmap: Fix skb refcnt race after locking changes")
+Signed-off-by: Jiayuan Chen <jiayuan.chen@linux.dev>
+Link: https://lore.kernel.org/r/20250407142234.47591-4-jiayuan.chen@linux.dev
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+[ adapted skb_linearize() fix to 5.15's sk_psock_skb_ingress_enqueue implementation without the s_data parameter ]
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/skmsg.c |   50 +++++++++++++++++++++++++++-----------------------
+ 1 file changed, 27 insertions(+), 23 deletions(-)
+
+--- a/net/core/skmsg.c
++++ b/net/core/skmsg.c
+@@ -525,26 +525,35 @@ static int sk_psock_skb_ingress_enqueue(
+                                       u32 off, u32 len,
+                                       struct sk_psock *psock,
+                                       struct sock *sk,
+-                                      struct sk_msg *msg)
++                                      struct sk_msg *msg,
++                                      bool take_ref)
+ {
+       int num_sge, copied;
+-      /* skb linearize may fail with ENOMEM, but lets simply try again
+-       * later if this happens. Under memory pressure we don't want to
+-       * drop the skb. We need to linearize the skb so that the mapping
+-       * in skb_to_sgvec can not error.
++      /* skb_to_sgvec will fail when the total number of fragments in
++       * frag_list and frags exceeds MAX_MSG_FRAGS. For example, the
++       * caller may aggregate multiple skbs.
+        */
+-      if (skb_linearize(skb))
+-              return -EAGAIN;
+       num_sge = skb_to_sgvec(skb, msg->sg.data, off, len);
+-      if (unlikely(num_sge < 0))
+-              return num_sge;
++      if (num_sge < 0) {
++              /* skb linearize may fail with ENOMEM, but lets simply try again
++               * later if this happens. Under memory pressure we don't want to
++               * drop the skb. We need to linearize the skb so that the mapping
++               * in skb_to_sgvec can not error.
++               * Note that skb_linearize requires the skb not to be shared.
++               */
++              if (skb_linearize(skb))
++                      return -EAGAIN;
++              num_sge = skb_to_sgvec(skb, msg->sg.data, off, len);
++              if (unlikely(num_sge < 0))
++                      return num_sge;
++      }
+       copied = len;
+       msg->sg.start = 0;
+       msg->sg.size = copied;
+       msg->sg.end = num_sge;
+-      msg->skb = skb;
++      msg->skb = take_ref ? skb_get(skb) : skb;
+       sk_psock_queue_msg(psock, msg);
+       sk_psock_data_ready(sk, psock);
+@@ -552,7 +561,7 @@ static int sk_psock_skb_ingress_enqueue(
+ }
+ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb,
+-                                   u32 off, u32 len);
++                                   u32 off, u32 len, bool take_ref);
+ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb,
+                               u32 off, u32 len)
+@@ -566,7 +575,7 @@ static int sk_psock_skb_ingress(struct s
+        * correctly.
+        */
+       if (unlikely(skb->sk == sk))
+-              return sk_psock_skb_ingress_self(psock, skb, off, len);
++              return sk_psock_skb_ingress_self(psock, skb, off, len, true);
+       msg = sk_psock_create_ingress_msg(sk, skb);
+       if (!msg)
+               return -EAGAIN;
+@@ -578,7 +587,7 @@ static int sk_psock_skb_ingress(struct s
+        * into user buffers.
+        */
+       skb_set_owner_r(skb, sk);
+-      err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg);
++      err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg, true);
+       if (err < 0)
+               kfree(msg);
+       return err;
+@@ -589,7 +598,7 @@ static int sk_psock_skb_ingress(struct s
+  * because the skb is already accounted for here.
+  */
+ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb,
+-                                   u32 off, u32 len)
++                                   u32 off, u32 len, bool take_ref)
+ {
+       struct sk_msg *msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC);
+       struct sock *sk = psock->sk;
+@@ -599,7 +608,7 @@ static int sk_psock_skb_ingress_self(str
+               return -EAGAIN;
+       sk_msg_init(msg);
+       skb_set_owner_r(skb, sk);
+-      err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg);
++      err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg, take_ref);
+       if (err < 0)
+               kfree(msg);
+       return err;
+@@ -608,18 +617,13 @@ static int sk_psock_skb_ingress_self(str
+ static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
+                              u32 off, u32 len, bool ingress)
+ {
+-      int err = 0;
+-
+       if (!ingress) {
+               if (!sock_writeable(psock->sk))
+                       return -EAGAIN;
+               return skb_send_sock(psock->sk, skb, off, len);
+       }
+-      skb_get(skb);
+-      err = sk_psock_skb_ingress(psock, skb, off, len);
+-      if (err < 0)
+-              kfree_skb(skb);
+-      return err;
++
++      return sk_psock_skb_ingress(psock, skb, off, len);
+ }
+ static void sk_psock_skb_state(struct sk_psock *psock,
+@@ -1016,7 +1020,7 @@ static int sk_psock_verdict_apply(struct
+                               off = stm->offset;
+                               len = stm->full_len;
+                       }
+-                      err = sk_psock_skb_ingress_self(psock, skb, off, len);
++                      err = sk_psock_skb_ingress_self(psock, skb, off, len, false);
+               }
+               if (err < 0) {
+                       spin_lock_bh(&psock->ingress_lock);
diff --git a/queue-5.15/mm-vmalloc-leave-lazy-mmu-mode-on-pte-mapping-error.patch b/queue-5.15/mm-vmalloc-leave-lazy-mmu-mode-on-pte-mapping-error.patch
new file mode 100644 (file)
index 0000000..5addc7e
--- /dev/null
@@ -0,0 +1,63 @@
+From fea18c686320a53fce7ad62a87a3e1d10ad02f31 Mon Sep 17 00:00:00 2001
+From: Alexander Gordeev <agordeev@linux.ibm.com>
+Date: Mon, 23 Jun 2025 09:57:21 +0200
+Subject: mm/vmalloc: leave lazy MMU mode on PTE mapping error
+
+From: Alexander Gordeev <agordeev@linux.ibm.com>
+
+commit fea18c686320a53fce7ad62a87a3e1d10ad02f31 upstream.
+
+vmap_pages_pte_range() enters the lazy MMU mode, but fails to leave it in
+case an error is encountered.
+
+Link: https://lkml.kernel.org/r/20250623075721.2817094-1-agordeev@linux.ibm.com
+Fixes: 2ba3e6947aed ("mm/vmalloc: track which page-table levels were modified")
+Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
+Reported-by: kernel test robot <lkp@intel.com>
+Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
+Closes: https://lore.kernel.org/r/202506132017.T1l1l6ME-lkp@intel.com/
+Reviewed-by: Ryan Roberts <ryan.roberts@arm.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/vmalloc.c |   17 ++++++++++++-----
+ 1 file changed, 12 insertions(+), 5 deletions(-)
+
+--- a/mm/vmalloc.c
++++ b/mm/vmalloc.c
+@@ -460,6 +460,7 @@ static int vmap_pages_pte_range(pmd_t *p
+               unsigned long end, pgprot_t prot, struct page **pages, int *nr,
+               pgtbl_mod_mask *mask)
+ {
++      int err = 0;
+       pte_t *pte;
+       /*
+@@ -473,15 +474,21 @@ static int vmap_pages_pte_range(pmd_t *p
+       do {
+               struct page *page = pages[*nr];
+-              if (WARN_ON(!pte_none(*pte)))
+-                      return -EBUSY;
+-              if (WARN_ON(!page))
+-                      return -ENOMEM;
++              if (WARN_ON(!pte_none(*pte))) {
++                      err = -EBUSY;
++                      break;
++              }
++              if (WARN_ON(!page)) {
++                      err = -ENOMEM;
++                      break;
++              }
++
+               set_pte_at(&init_mm, addr, pte, mk_pte(page, prot));
+               (*nr)++;
+       } while (pte++, addr += PAGE_SIZE, addr != end);
+       *mask |= PGTBL_PTE_MODIFIED;
+-      return 0;
++
++      return err;
+ }
+ static int vmap_pages_pmd_range(pud_t *pud, unsigned long addr,
diff --git a/queue-5.15/platform-x86-think-lmi-fix-kobject-cleanup.patch b/queue-5.15/platform-x86-think-lmi-fix-kobject-cleanup.patch
new file mode 100644 (file)
index 0000000..803594f
--- /dev/null
@@ -0,0 +1,128 @@
+From 9110056fe10b0519529bdbbac37311a5037ea0c2 Mon Sep 17 00:00:00 2001
+From: Kurt Borja <kuurtb@gmail.com>
+Date: Mon, 30 Jun 2025 14:31:20 -0300
+Subject: platform/x86: think-lmi: Fix kobject cleanup
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Kurt Borja <kuurtb@gmail.com>
+
+commit 9110056fe10b0519529bdbbac37311a5037ea0c2 upstream.
+
+In tlmi_analyze(), allocated structs with an embedded kobject are freed
+in error paths after the they were already initialized.
+
+Fix this by first by avoiding the initialization of kobjects in
+tlmi_analyze() and then by correctly cleaning them up in
+tlmi_release_attr() using their kset's kobject list.
+
+Fixes: a40cd7ef22fb ("platform/x86: think-lmi: Add WMI interface support on Lenovo platforms")
+Fixes: 30e78435d3bf ("platform/x86: think-lmi: Split kobject_init() and kobject_add() calls")
+Cc: stable@vger.kernel.org
+Reviewed-by: Mark Pearson <mpearson-lenovo@squebb.ca>
+Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
+Signed-off-by: Kurt Borja <kuurtb@gmail.com>
+Link: https://lore.kernel.org/r/20250630-lmi-fix-v3-2-ce4f81c9c481@gmail.com
+Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
+[ Adapted kobject cleanup to only pwd_admin and pwd_power password types present in 5.15. ]
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/platform/x86/think-lmi.c |   27 +++++++++++++++------------
+ 1 file changed, 15 insertions(+), 12 deletions(-)
+
+--- a/drivers/platform/x86/think-lmi.c
++++ b/drivers/platform/x86/think-lmi.c
+@@ -765,25 +765,31 @@ static struct kobj_attribute debug_cmd =
+ /* ---- Initialisation --------------------------------------------------------- */
+ static void tlmi_release_attr(void)
+ {
++      struct kobject *pos, *n;
+       int i;
+       /* Attribute structures */
+       for (i = 0; i < TLMI_SETTINGS_COUNT; i++) {
+               if (tlmi_priv.setting[i]) {
+                       sysfs_remove_group(&tlmi_priv.setting[i]->kobj, &tlmi_attr_group);
+-                      kobject_put(&tlmi_priv.setting[i]->kobj);
+               }
+       }
+       sysfs_remove_file(&tlmi_priv.attribute_kset->kobj, &pending_reboot.attr);
+       if (tlmi_priv.can_debug_cmd && debug_support)
+               sysfs_remove_file(&tlmi_priv.attribute_kset->kobj, &debug_cmd.attr);
++
++      list_for_each_entry_safe(pos, n, &tlmi_priv.attribute_kset->list, entry)
++              kobject_put(pos);
++
+       kset_unregister(tlmi_priv.attribute_kset);
+       /* Authentication structures */
+       sysfs_remove_group(&tlmi_priv.pwd_admin->kobj, &auth_attr_group);
+-      kobject_put(&tlmi_priv.pwd_admin->kobj);
+       sysfs_remove_group(&tlmi_priv.pwd_power->kobj, &auth_attr_group);
+-      kobject_put(&tlmi_priv.pwd_power->kobj);
++
++      list_for_each_entry_safe(pos, n, &tlmi_priv.authentication_kset->list, entry)
++              kobject_put(pos);
++
+       kset_unregister(tlmi_priv.authentication_kset);
+ }
+@@ -851,8 +857,8 @@ static int tlmi_sysfs_init(void)
+               /* Build attribute */
+               tlmi_priv.setting[i]->kobj.kset = tlmi_priv.attribute_kset;
+-              ret = kobject_add(&tlmi_priv.setting[i]->kobj, NULL,
+-                                "%s", tlmi_priv.setting[i]->display_name);
++              ret = kobject_init_and_add(&tlmi_priv.setting[i]->kobj, &tlmi_attr_setting_ktype,
++                                         NULL, "%s", tlmi_priv.setting[i]->display_name);
+               if (ret)
+                       goto fail_create_attr;
+@@ -872,7 +878,8 @@ static int tlmi_sysfs_init(void)
+       }
+       /* Create authentication entries */
+       tlmi_priv.pwd_admin->kobj.kset = tlmi_priv.authentication_kset;
+-      ret = kobject_add(&tlmi_priv.pwd_admin->kobj, NULL, "%s", "Admin");
++      ret = kobject_init_and_add(&tlmi_priv.pwd_admin->kobj, &tlmi_pwd_setting_ktype,
++                                 NULL, "%s", "Admin");
+       if (ret)
+               goto fail_create_attr;
+@@ -881,7 +888,8 @@ static int tlmi_sysfs_init(void)
+               goto fail_create_attr;
+       tlmi_priv.pwd_power->kobj.kset = tlmi_priv.authentication_kset;
+-      ret = kobject_add(&tlmi_priv.pwd_power->kobj, NULL, "%s", "System");
++      ret = kobject_init_and_add(&tlmi_priv.pwd_power->kobj, &tlmi_pwd_setting_ktype,
++                                 NULL, "%s", "Power-on");
+       if (ret)
+               goto fail_create_attr;
+@@ -995,7 +1003,6 @@ static int tlmi_analyze(void)
+               if (setting->possible_values)
+                       strreplace(setting->possible_values, ',', ';');
+-              kobject_init(&setting->kobj, &tlmi_attr_setting_ktype);
+               tlmi_priv.setting[i] = setting;
+               kfree(item);
+       }
+@@ -1021,8 +1028,6 @@ static int tlmi_analyze(void)
+       if (pwdcfg.password_state & TLMI_PAP_PWD)
+               tlmi_priv.pwd_admin->valid = true;
+-      kobject_init(&tlmi_priv.pwd_admin->kobj, &tlmi_pwd_setting_ktype);
+-
+       tlmi_priv.pwd_power = kzalloc(sizeof(struct tlmi_pwd_setting), GFP_KERNEL);
+       if (!tlmi_priv.pwd_power) {
+               ret = -ENOMEM;
+@@ -1038,8 +1043,6 @@ static int tlmi_analyze(void)
+       if (pwdcfg.password_state & TLMI_POP_PWD)
+               tlmi_priv.pwd_power->valid = true;
+-      kobject_init(&tlmi_priv.pwd_power->kobj, &tlmi_pwd_setting_ktype);
+-
+       return 0;
+ fail_free_pwd_admin:
diff --git a/queue-5.15/powercap-intel_rapl-do-not-change-clamping-bit-if-enable-bit-cannot-be-changed.patch b/queue-5.15/powercap-intel_rapl-do-not-change-clamping-bit-if-enable-bit-cannot-be-changed.patch
new file mode 100644 (file)
index 0000000..2741b86
--- /dev/null
@@ -0,0 +1,75 @@
+From 964209202ebe1569c858337441e87ef0f9d71416 Mon Sep 17 00:00:00 2001
+From: Zhang Rui <rui.zhang@intel.com>
+Date: Thu, 19 Jun 2025 15:13:40 +0800
+Subject: powercap: intel_rapl: Do not change CLAMPING bit if ENABLE bit cannot be changed
+
+From: Zhang Rui <rui.zhang@intel.com>
+
+commit 964209202ebe1569c858337441e87ef0f9d71416 upstream.
+
+PL1 cannot be disabled on some platforms. The ENABLE bit is still set
+after software clears it. This behavior leads to a scenario where, upon
+user request to disable the Power Limit through the powercap sysfs, the
+ENABLE bit remains set while the CLAMPING bit is inadvertently cleared.
+
+According to the Intel Software Developer's Manual, the CLAMPING bit,
+"When set, allows the processor to go below the OS requested P states in
+order to maintain the power below specified Platform Power Limit value."
+
+Thus this means the system may operate at higher power levels than
+intended on such platforms.
+
+Enhance the code to check ENABLE bit after writing to it, and stop
+further processing if ENABLE bit cannot be changed.
+
+Reported-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
+Fixes: 2d281d8196e3 ("PowerCap: Introduce Intel RAPL power capping driver")
+Cc: All applicable <stable@vger.kernel.org>
+Signed-off-by: Zhang Rui <rui.zhang@intel.com>
+Link: https://patch.msgid.link/20250619071340.384782-1-rui.zhang@intel.com
+[ rjw: Use str_enabled_disabled() instead of open-coded equivalent ]
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+[ replaced rapl_write_pl_data() and rapl_read_pl_data() with rapl_write_data_raw() and rapl_read_data_raw() ]
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/powercap/intel_rapl_common.c |   23 ++++++++++++++++++++++-
+ 1 file changed, 22 insertions(+), 1 deletion(-)
+
+--- a/drivers/powercap/intel_rapl_common.c
++++ b/drivers/powercap/intel_rapl_common.c
+@@ -212,12 +212,33 @@ static int find_nr_power_limit(struct ra
+ static int set_domain_enable(struct powercap_zone *power_zone, bool mode)
+ {
+       struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone);
++      u64 val;
++      int ret;
+       if (rd->state & DOMAIN_STATE_BIOS_LOCKED)
+               return -EACCES;
+       cpus_read_lock();
+-      rapl_write_data_raw(rd, PL1_ENABLE, mode);
++      ret = rapl_write_data_raw(rd, PL1_ENABLE, mode);
++      if (ret) {
++              cpus_read_unlock();
++              return ret;
++      }
++
++      /* Check if the ENABLE bit was actually changed */
++      ret = rapl_read_data_raw(rd, PL1_ENABLE, true, &val);
++      if (ret) {
++              cpus_read_unlock();
++              return ret;
++      }
++
++      if (mode != val) {
++              pr_debug("%s cannot be %s\n", power_zone->name,
++                       mode ? "enabled" : "disabled");
++              cpus_read_unlock();
++              return 0;
++      }
++
+       if (rapl_defaults->set_floor_freq)
+               rapl_defaults->set_floor_freq(rd, mode);
+       cpus_read_unlock();
diff --git a/queue-5.15/sched-add-wrapper-for-get_wchan-to-keep-task-blocked.patch b/queue-5.15/sched-add-wrapper-for-get_wchan-to-keep-task-blocked.patch
new file mode 100644 (file)
index 0000000..adfbe20
--- /dev/null
@@ -0,0 +1,841 @@
+From 42a20f86dc19f9282d974df0ba4d226c865ab9dd Mon Sep 17 00:00:00 2001
+From: Kees Cook <keescook@chromium.org>
+Date: Wed, 29 Sep 2021 15:02:14 -0700
+Subject: sched: Add wrapper for get_wchan() to keep task blocked
+
+From: Kees Cook <keescook@chromium.org>
+
+commit 42a20f86dc19f9282d974df0ba4d226c865ab9dd upstream.
+
+Having a stable wchan means the process must be blocked and for it to
+stay that way while performing stack unwinding.
+
+Suggested-by: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
+Acked-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk> [arm]
+Tested-by: Mark Rutland <mark.rutland@arm.com> [arm64]
+Link: https://lkml.kernel.org/r/20211008111626.332092234@infradead.org
+Signed-off-by: Siddhi Katage <siddhi.katage@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/alpha/include/asm/processor.h      |    2 +-
+ arch/alpha/kernel/process.c             |    5 ++---
+ arch/arc/include/asm/processor.h        |    2 +-
+ arch/arc/kernel/stacktrace.c            |    4 ++--
+ arch/arm/include/asm/processor.h        |    2 +-
+ arch/arm/kernel/process.c               |    4 +---
+ arch/arm64/include/asm/processor.h      |    2 +-
+ arch/arm64/kernel/process.c             |    4 +---
+ arch/csky/include/asm/processor.h       |    2 +-
+ arch/csky/kernel/stacktrace.c           |    5 ++---
+ arch/h8300/include/asm/processor.h      |    2 +-
+ arch/h8300/kernel/process.c             |    5 +----
+ arch/hexagon/include/asm/processor.h    |    2 +-
+ arch/hexagon/kernel/process.c           |    4 +---
+ arch/ia64/include/asm/processor.h       |    2 +-
+ arch/ia64/kernel/process.c              |    5 +----
+ arch/m68k/include/asm/processor.h       |    2 +-
+ arch/m68k/kernel/process.c              |    4 +---
+ arch/microblaze/include/asm/processor.h |    2 +-
+ arch/microblaze/kernel/process.c        |    2 +-
+ arch/mips/include/asm/processor.h       |    2 +-
+ arch/mips/kernel/process.c              |    8 +++-----
+ arch/nds32/include/asm/processor.h      |    2 +-
+ arch/nds32/kernel/process.c             |    7 +------
+ arch/nios2/include/asm/processor.h      |    2 +-
+ arch/nios2/kernel/process.c             |    5 +----
+ arch/openrisc/include/asm/processor.h   |    2 +-
+ arch/openrisc/kernel/process.c          |    2 +-
+ arch/parisc/include/asm/processor.h     |    2 +-
+ arch/parisc/kernel/process.c            |    5 +----
+ arch/powerpc/include/asm/processor.h    |    2 +-
+ arch/powerpc/kernel/process.c           |    9 +++------
+ arch/riscv/include/asm/processor.h      |    2 +-
+ arch/riscv/kernel/stacktrace.c          |   12 +++++-------
+ arch/s390/include/asm/processor.h       |    2 +-
+ arch/s390/kernel/process.c              |    4 ++--
+ arch/sh/include/asm/processor_32.h      |    2 +-
+ arch/sh/kernel/process_32.c             |    5 +----
+ arch/sparc/include/asm/processor_32.h   |    2 +-
+ arch/sparc/include/asm/processor_64.h   |    2 +-
+ arch/sparc/kernel/process_32.c          |    5 +----
+ arch/sparc/kernel/process_64.c          |    5 +----
+ arch/um/include/asm/processor-generic.h |    2 +-
+ arch/um/kernel/process.c                |    5 +----
+ arch/x86/include/asm/processor.h        |    2 +-
+ arch/x86/kernel/process.c               |    5 +----
+ arch/xtensa/include/asm/processor.h     |    2 +-
+ arch/xtensa/kernel/process.c            |    5 +----
+ include/linux/sched.h                   |    1 +
+ kernel/sched/core.c                     |   19 +++++++++++++++++++
+ 50 files changed, 80 insertions(+), 112 deletions(-)
+
+--- a/arch/alpha/include/asm/processor.h
++++ b/arch/alpha/include/asm/processor.h
+@@ -38,7 +38,7 @@ extern void start_thread(struct pt_regs
+ struct task_struct;
+ extern void release_thread(struct task_struct *);
+-unsigned long get_wchan(struct task_struct *p);
++unsigned long __get_wchan(struct task_struct *p);
+ #define KSTK_EIP(tsk) (task_pt_regs(tsk)->pc)
+--- a/arch/alpha/kernel/process.c
++++ b/arch/alpha/kernel/process.c
+@@ -376,12 +376,11 @@ thread_saved_pc(struct task_struct *t)
+ }
+ unsigned long
+-get_wchan(struct task_struct *p)
++__get_wchan(struct task_struct *p)
+ {
+       unsigned long schedule_frame;
+       unsigned long pc;
+-      if (!p || p == current || task_is_running(p))
+-              return 0;
++
+       /*
+        * This one depends on the frame size of schedule().  Do a
+        * "disass schedule" in gdb to find the frame size.  Also, the
+--- a/arch/arc/include/asm/processor.h
++++ b/arch/arc/include/asm/processor.h
+@@ -70,7 +70,7 @@ struct task_struct;
+ extern void start_thread(struct pt_regs * regs, unsigned long pc,
+                        unsigned long usp);
+-extern unsigned int get_wchan(struct task_struct *p);
++extern unsigned int __get_wchan(struct task_struct *p);
+ #endif /* !__ASSEMBLY__ */
+--- a/arch/arc/kernel/stacktrace.c
++++ b/arch/arc/kernel/stacktrace.c
+@@ -15,7 +15,7 @@
+  *      = specifics of data structs where trace is saved(CONFIG_STACKTRACE etc)
+  *
+  *  vineetg: March 2009
+- *  -Implemented correct versions of thread_saved_pc() and get_wchan()
++ *  -Implemented correct versions of thread_saved_pc() and __get_wchan()
+  *
+  *  rajeshwarr: 2008
+  *  -Initial implementation
+@@ -248,7 +248,7 @@ void show_stack(struct task_struct *tsk,
+  * Of course just returning schedule( ) would be pointless so unwind until
+  * the function is not in schedular code
+  */
+-unsigned int get_wchan(struct task_struct *tsk)
++unsigned int __get_wchan(struct task_struct *tsk)
+ {
+       return arc_unwind_core(tsk, NULL, __get_first_nonsched, NULL);
+ }
+--- a/arch/arm/include/asm/processor.h
++++ b/arch/arm/include/asm/processor.h
+@@ -84,7 +84,7 @@ struct task_struct;
+ /* Free all resources held by a thread. */
+ extern void release_thread(struct task_struct *);
+-unsigned long get_wchan(struct task_struct *p);
++unsigned long __get_wchan(struct task_struct *p);
+ #define task_pt_regs(p) \
+       ((struct pt_regs *)(THREAD_START_SP + task_stack_page(p)) - 1)
+--- a/arch/arm/kernel/process.c
++++ b/arch/arm/kernel/process.c
+@@ -276,13 +276,11 @@ int copy_thread(unsigned long clone_flag
+       return 0;
+ }
+-unsigned long get_wchan(struct task_struct *p)
++unsigned long __get_wchan(struct task_struct *p)
+ {
+       struct stackframe frame;
+       unsigned long stack_page;
+       int count = 0;
+-      if (!p || p == current || task_is_running(p))
+-              return 0;
+       frame.fp = thread_saved_fp(p);
+       frame.sp = thread_saved_sp(p);
+--- a/arch/arm64/include/asm/processor.h
++++ b/arch/arm64/include/asm/processor.h
+@@ -265,7 +265,7 @@ struct task_struct;
+ /* Free all resources held by a thread. */
+ extern void release_thread(struct task_struct *);
+-unsigned long get_wchan(struct task_struct *p);
++unsigned long __get_wchan(struct task_struct *p);
+ void update_sctlr_el1(u64 sctlr);
+--- a/arch/arm64/kernel/process.c
++++ b/arch/arm64/kernel/process.c
+@@ -523,13 +523,11 @@ __notrace_funcgraph struct task_struct *
+       return last;
+ }
+-unsigned long get_wchan(struct task_struct *p)
++unsigned long __get_wchan(struct task_struct *p)
+ {
+       struct stackframe frame;
+       unsigned long stack_page, ret = 0;
+       int count = 0;
+-      if (!p || p == current || task_is_running(p))
+-              return 0;
+       stack_page = (unsigned long)try_get_task_stack(p);
+       if (!stack_page)
+--- a/arch/csky/include/asm/processor.h
++++ b/arch/csky/include/asm/processor.h
+@@ -81,7 +81,7 @@ static inline void release_thread(struct
+ extern int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
+-unsigned long get_wchan(struct task_struct *p);
++unsigned long __get_wchan(struct task_struct *p);
+ #define KSTK_EIP(tsk)         (task_pt_regs(tsk)->pc)
+ #define KSTK_ESP(tsk)         (task_pt_regs(tsk)->usp)
+--- a/arch/csky/kernel/stacktrace.c
++++ b/arch/csky/kernel/stacktrace.c
+@@ -111,12 +111,11 @@ static bool save_wchan(unsigned long pc,
+       return false;
+ }
+-unsigned long get_wchan(struct task_struct *task)
++unsigned long __get_wchan(struct task_struct *task)
+ {
+       unsigned long pc = 0;
+-      if (likely(task && task != current && !task_is_running(task)))
+-              walk_stackframe(task, NULL, save_wchan, &pc);
++      walk_stackframe(task, NULL, save_wchan, &pc);
+       return pc;
+ }
+--- a/arch/h8300/include/asm/processor.h
++++ b/arch/h8300/include/asm/processor.h
+@@ -105,7 +105,7 @@ static inline void release_thread(struct
+ {
+ }
+-unsigned long get_wchan(struct task_struct *p);
++unsigned long __get_wchan(struct task_struct *p);
+ #define       KSTK_EIP(tsk)   \
+       ({                       \
+--- a/arch/h8300/kernel/process.c
++++ b/arch/h8300/kernel/process.c
+@@ -128,15 +128,12 @@ int copy_thread(unsigned long clone_flag
+       return 0;
+ }
+-unsigned long get_wchan(struct task_struct *p)
++unsigned long __get_wchan(struct task_struct *p)
+ {
+       unsigned long fp, pc;
+       unsigned long stack_page;
+       int count = 0;
+-      if (!p || p == current || task_is_running(p))
+-              return 0;
+-
+       stack_page = (unsigned long)p;
+       fp = ((struct pt_regs *)p->thread.ksp)->er6;
+       do {
+--- a/arch/hexagon/include/asm/processor.h
++++ b/arch/hexagon/include/asm/processor.h
+@@ -64,7 +64,7 @@ struct thread_struct {
+ extern void release_thread(struct task_struct *dead_task);
+ /* Get wait channel for task P.  */
+-extern unsigned long get_wchan(struct task_struct *p);
++extern unsigned long __get_wchan(struct task_struct *p);
+ /*  The following stuff is pretty HEXAGON specific.  */
+--- a/arch/hexagon/kernel/process.c
++++ b/arch/hexagon/kernel/process.c
+@@ -130,13 +130,11 @@ void flush_thread(void)
+  * is an identification of the point at which the scheduler
+  * was invoked by a blocked thread.
+  */
+-unsigned long get_wchan(struct task_struct *p)
++unsigned long __get_wchan(struct task_struct *p)
+ {
+       unsigned long fp, pc;
+       unsigned long stack_page;
+       int count = 0;
+-      if (!p || p == current || task_is_running(p))
+-              return 0;
+       stack_page = (unsigned long)task_stack_page(p);
+       fp = ((struct hexagon_switch_stack *)p->thread.switch_sp)->fp;
+--- a/arch/ia64/include/asm/processor.h
++++ b/arch/ia64/include/asm/processor.h
+@@ -330,7 +330,7 @@ struct task_struct;
+ #define release_thread(dead_task)
+ /* Get wait channel for task P.  */
+-extern unsigned long get_wchan (struct task_struct *p);
++extern unsigned long __get_wchan (struct task_struct *p);
+ /* Return instruction pointer of blocked task TSK.  */
+ #define KSTK_EIP(tsk)                                 \
+--- a/arch/ia64/kernel/process.c
++++ b/arch/ia64/kernel/process.c
+@@ -523,15 +523,12 @@ exit_thread (struct task_struct *tsk)
+ }
+ unsigned long
+-get_wchan (struct task_struct *p)
++__get_wchan (struct task_struct *p)
+ {
+       struct unw_frame_info info;
+       unsigned long ip;
+       int count = 0;
+-      if (!p || p == current || task_is_running(p))
+-              return 0;
+-
+       /*
+        * Note: p may not be a blocked task (it could be current or
+        * another process running on some other CPU.  Rather than
+--- a/arch/m68k/include/asm/processor.h
++++ b/arch/m68k/include/asm/processor.h
+@@ -150,7 +150,7 @@ static inline void release_thread(struct
+ {
+ }
+-unsigned long get_wchan(struct task_struct *p);
++unsigned long __get_wchan(struct task_struct *p);
+ #define       KSTK_EIP(tsk)   \
+     ({                        \
+--- a/arch/m68k/kernel/process.c
++++ b/arch/m68k/kernel/process.c
+@@ -263,13 +263,11 @@ int dump_fpu (struct pt_regs *regs, stru
+ }
+ EXPORT_SYMBOL(dump_fpu);
+-unsigned long get_wchan(struct task_struct *p)
++unsigned long __get_wchan(struct task_struct *p)
+ {
+       unsigned long fp, pc;
+       unsigned long stack_page;
+       int count = 0;
+-      if (!p || p == current || task_is_running(p))
+-              return 0;
+       stack_page = (unsigned long)task_stack_page(p);
+       fp = ((struct switch_stack *)p->thread.ksp)->a6;
+--- a/arch/microblaze/include/asm/processor.h
++++ b/arch/microblaze/include/asm/processor.h
+@@ -68,7 +68,7 @@ static inline void release_thread(struct
+ {
+ }
+-unsigned long get_wchan(struct task_struct *p);
++unsigned long __get_wchan(struct task_struct *p);
+ /* The size allocated for kernel stacks. This _must_ be a power of two! */
+ # define KERNEL_STACK_SIZE    0x2000
+--- a/arch/microblaze/kernel/process.c
++++ b/arch/microblaze/kernel/process.c
+@@ -112,7 +112,7 @@ int copy_thread(unsigned long clone_flag
+       return 0;
+ }
+-unsigned long get_wchan(struct task_struct *p)
++unsigned long __get_wchan(struct task_struct *p)
+ {
+ /* TBD (used by procfs) */
+       return 0;
+--- a/arch/mips/include/asm/processor.h
++++ b/arch/mips/include/asm/processor.h
+@@ -369,7 +369,7 @@ static inline void flush_thread(void)
+ {
+ }
+-unsigned long get_wchan(struct task_struct *p);
++unsigned long __get_wchan(struct task_struct *p);
+ #define __KSTK_TOS(tsk) ((unsigned long)task_stack_page(tsk) + \
+                        THREAD_SIZE - 32 - sizeof(struct pt_regs))
+--- a/arch/mips/kernel/process.c
++++ b/arch/mips/kernel/process.c
+@@ -511,7 +511,7 @@ static int __init frame_info_init(void)
+       /*
+        * Without schedule() frame info, result given by
+-       * thread_saved_pc() and get_wchan() are not reliable.
++       * thread_saved_pc() and __get_wchan() are not reliable.
+        */
+       if (schedule_mfi.pc_offset < 0)
+               printk("Can't analyze schedule() prologue at %p\n", schedule);
+@@ -652,9 +652,9 @@ unsigned long unwind_stack(struct task_s
+ #endif
+ /*
+- * get_wchan - a maintenance nightmare^W^Wpain in the ass ...
++ * __get_wchan - a maintenance nightmare^W^Wpain in the ass ...
+  */
+-unsigned long get_wchan(struct task_struct *task)
++unsigned long __get_wchan(struct task_struct *task)
+ {
+       unsigned long pc = 0;
+ #ifdef CONFIG_KALLSYMS
+@@ -662,8 +662,6 @@ unsigned long get_wchan(struct task_stru
+       unsigned long ra = 0;
+ #endif
+-      if (!task || task == current || task_is_running(task))
+-              goto out;
+       if (!task_stack_page(task))
+               goto out;
+--- a/arch/nds32/include/asm/processor.h
++++ b/arch/nds32/include/asm/processor.h
+@@ -83,7 +83,7 @@ extern struct task_struct *last_task_use
+ /* Prepare to copy thread state - unlazy all lazy status */
+ #define prepare_to_copy(tsk)  do { } while (0)
+-unsigned long get_wchan(struct task_struct *p);
++unsigned long __get_wchan(struct task_struct *p);
+ #define cpu_relax()                   barrier()
+--- a/arch/nds32/kernel/process.c
++++ b/arch/nds32/kernel/process.c
+@@ -233,15 +233,12 @@ int dump_fpu(struct pt_regs *regs, elf_f
+ EXPORT_SYMBOL(dump_fpu);
+-unsigned long get_wchan(struct task_struct *p)
++unsigned long __get_wchan(struct task_struct *p)
+ {
+       unsigned long fp, lr;
+       unsigned long stack_start, stack_end;
+       int count = 0;
+-      if (!p || p == current || task_is_running(p))
+-              return 0;
+-
+       if (IS_ENABLED(CONFIG_FRAME_POINTER)) {
+               stack_start = (unsigned long)end_of_stack(p);
+               stack_end = (unsigned long)task_stack_page(p) + THREAD_SIZE;
+@@ -258,5 +255,3 @@ unsigned long get_wchan(struct task_stru
+       }
+       return 0;
+ }
+-
+-EXPORT_SYMBOL(get_wchan);
+--- a/arch/nios2/include/asm/processor.h
++++ b/arch/nios2/include/asm/processor.h
+@@ -69,7 +69,7 @@ static inline void release_thread(struct
+ {
+ }
+-extern unsigned long get_wchan(struct task_struct *p);
++extern unsigned long __get_wchan(struct task_struct *p);
+ #define task_pt_regs(p) \
+       ((struct pt_regs *)(THREAD_SIZE + task_stack_page(p)) - 1)
+--- a/arch/nios2/kernel/process.c
++++ b/arch/nios2/kernel/process.c
+@@ -217,15 +217,12 @@ void dump(struct pt_regs *fp)
+       pr_emerg("\n\n");
+ }
+-unsigned long get_wchan(struct task_struct *p)
++unsigned long __get_wchan(struct task_struct *p)
+ {
+       unsigned long fp, pc;
+       unsigned long stack_page;
+       int count = 0;
+-      if (!p || p == current || task_is_running(p))
+-              return 0;
+-
+       stack_page = (unsigned long)p;
+       fp = ((struct switch_stack *)p->thread.ksp)->fp;        /* ;dgt2 */
+       do {
+--- a/arch/openrisc/include/asm/processor.h
++++ b/arch/openrisc/include/asm/processor.h
+@@ -73,7 +73,7 @@ struct thread_struct {
+ void start_thread(struct pt_regs *regs, unsigned long nip, unsigned long sp);
+ void release_thread(struct task_struct *);
+-unsigned long get_wchan(struct task_struct *p);
++unsigned long __get_wchan(struct task_struct *p);
+ #define cpu_relax()     barrier()
+--- a/arch/openrisc/kernel/process.c
++++ b/arch/openrisc/kernel/process.c
+@@ -263,7 +263,7 @@ void dump_elf_thread(elf_greg_t *dest, s
+       dest[35] = 0;
+ }
+-unsigned long get_wchan(struct task_struct *p)
++unsigned long __get_wchan(struct task_struct *p)
+ {
+       /* TODO */
+--- a/arch/parisc/include/asm/processor.h
++++ b/arch/parisc/include/asm/processor.h
+@@ -273,7 +273,7 @@ struct mm_struct;
+ /* Free all resources held by a thread. */
+ extern void release_thread(struct task_struct *);
+-extern unsigned long get_wchan(struct task_struct *p);
++extern unsigned long __get_wchan(struct task_struct *p);
+ #define KSTK_EIP(tsk) ((tsk)->thread.regs.iaoq[0])
+ #define KSTK_ESP(tsk) ((tsk)->thread.regs.gr[30])
+--- a/arch/parisc/kernel/process.c
++++ b/arch/parisc/kernel/process.c
+@@ -245,15 +245,12 @@ copy_thread(unsigned long clone_flags, u
+ }
+ unsigned long
+-get_wchan(struct task_struct *p)
++__get_wchan(struct task_struct *p)
+ {
+       struct unwind_frame_info info;
+       unsigned long ip;
+       int count = 0;
+-      if (!p || p == current || task_is_running(p))
+-              return 0;
+-
+       /*
+        * These bracket the sleeping functions..
+        */
+--- a/arch/powerpc/include/asm/processor.h
++++ b/arch/powerpc/include/asm/processor.h
+@@ -300,7 +300,7 @@ struct thread_struct {
+ #define task_pt_regs(tsk)     ((tsk)->thread.regs)
+-unsigned long get_wchan(struct task_struct *p);
++unsigned long __get_wchan(struct task_struct *p);
+ #define KSTK_EIP(tsk)  ((tsk)->thread.regs? (tsk)->thread.regs->nip: 0)
+ #define KSTK_ESP(tsk)  ((tsk)->thread.regs? (tsk)->thread.regs->gpr[1]: 0)
+--- a/arch/powerpc/kernel/process.c
++++ b/arch/powerpc/kernel/process.c
+@@ -2111,14 +2111,11 @@ int validate_sp(unsigned long sp, struct
+ EXPORT_SYMBOL(validate_sp);
+-static unsigned long __get_wchan(struct task_struct *p)
++static unsigned long ___get_wchan(struct task_struct *p)
+ {
+       unsigned long ip, sp;
+       int count = 0;
+-      if (!p || p == current || task_is_running(p))
+-              return 0;
+-
+       sp = p->thread.ksp;
+       if (!validate_sp(sp, p, STACK_FRAME_OVERHEAD))
+               return 0;
+@@ -2137,14 +2134,14 @@ static unsigned long __get_wchan(struct
+       return 0;
+ }
+-unsigned long get_wchan(struct task_struct *p)
++unsigned long __get_wchan(struct task_struct *p)
+ {
+       unsigned long ret;
+       if (!try_get_task_stack(p))
+               return 0;
+-      ret = __get_wchan(p);
++      ret = ___get_wchan(p);
+       put_task_stack(p);
+--- a/arch/riscv/include/asm/processor.h
++++ b/arch/riscv/include/asm/processor.h
+@@ -66,7 +66,7 @@ static inline void release_thread(struct
+ {
+ }
+-extern unsigned long get_wchan(struct task_struct *p);
++extern unsigned long __get_wchan(struct task_struct *p);
+ static inline void wait_for_interrupt(void)
+--- a/arch/riscv/kernel/stacktrace.c
++++ b/arch/riscv/kernel/stacktrace.c
+@@ -148,16 +148,14 @@ static bool save_wchan(void *arg, unsign
+       return true;
+ }
+-unsigned long get_wchan(struct task_struct *task)
++unsigned long __get_wchan(struct task_struct *task)
+ {
+       unsigned long pc = 0;
+-      if (likely(task && task != current && !task_is_running(task))) {
+-              if (!try_get_task_stack(task))
+-                      return 0;
+-              walk_stackframe(task, NULL, save_wchan, &pc);
+-              put_task_stack(task);
+-      }
++      if (!try_get_task_stack(task))
++              return 0;
++      walk_stackframe(task, NULL, save_wchan, &pc);
++      put_task_stack(task);
+       return pc;
+ }
+--- a/arch/s390/include/asm/processor.h
++++ b/arch/s390/include/asm/processor.h
+@@ -192,7 +192,7 @@ static inline void release_thread(struct
+ void guarded_storage_release(struct task_struct *tsk);
+ void gs_load_bc_cb(struct pt_regs *regs);
+-unsigned long get_wchan(struct task_struct *p);
++unsigned long __get_wchan(struct task_struct *p);
+ #define task_pt_regs(tsk) ((struct pt_regs *) \
+         (task_stack_page(tsk) + THREAD_SIZE) - 1)
+ #define KSTK_EIP(tsk) (task_pt_regs(tsk)->psw.addr)
+--- a/arch/s390/kernel/process.c
++++ b/arch/s390/kernel/process.c
+@@ -191,12 +191,12 @@ void execve_tail(void)
+       asm volatile("sfpc %0" : : "d" (0));
+ }
+-unsigned long get_wchan(struct task_struct *p)
++unsigned long __get_wchan(struct task_struct *p)
+ {
+       struct unwind_state state;
+       unsigned long ip = 0;
+-      if (!p || p == current || task_is_running(p) || !task_stack_page(p))
++      if (!task_stack_page(p))
+               return 0;
+       if (!try_get_task_stack(p))
+--- a/arch/sh/include/asm/processor_32.h
++++ b/arch/sh/include/asm/processor_32.h
+@@ -181,7 +181,7 @@ static inline void show_code(struct pt_r
+ }
+ #endif
+-extern unsigned long get_wchan(struct task_struct *p);
++extern unsigned long __get_wchan(struct task_struct *p);
+ #define KSTK_EIP(tsk)  (task_pt_regs(tsk)->pc)
+ #define KSTK_ESP(tsk)  (task_pt_regs(tsk)->regs[15])
+--- a/arch/sh/kernel/process_32.c
++++ b/arch/sh/kernel/process_32.c
+@@ -182,13 +182,10 @@ __switch_to(struct task_struct *prev, st
+       return prev;
+ }
+-unsigned long get_wchan(struct task_struct *p)
++unsigned long __get_wchan(struct task_struct *p)
+ {
+       unsigned long pc;
+-      if (!p || p == current || task_is_running(p))
+-              return 0;
+-
+       /*
+        * The same comment as on the Alpha applies here, too ...
+        */
+--- a/arch/sparc/include/asm/processor_32.h
++++ b/arch/sparc/include/asm/processor_32.h
+@@ -89,7 +89,7 @@ static inline void start_thread(struct p
+ /* Free all resources held by a thread. */
+ #define release_thread(tsk)           do { } while(0)
+-unsigned long get_wchan(struct task_struct *);
++unsigned long __get_wchan(struct task_struct *);
+ #define task_pt_regs(tsk) ((tsk)->thread.kregs)
+ #define KSTK_EIP(tsk)  ((tsk)->thread.kregs->pc)
+--- a/arch/sparc/include/asm/processor_64.h
++++ b/arch/sparc/include/asm/processor_64.h
+@@ -183,7 +183,7 @@ do { \
+ /* Free all resources held by a thread. */
+ #define release_thread(tsk)           do { } while (0)
+-unsigned long get_wchan(struct task_struct *task);
++unsigned long __get_wchan(struct task_struct *task);
+ #define task_pt_regs(tsk) (task_thread_info(tsk)->kregs)
+ #define KSTK_EIP(tsk)  (task_pt_regs(tsk)->tpc)
+--- a/arch/sparc/kernel/process_32.c
++++ b/arch/sparc/kernel/process_32.c
+@@ -365,7 +365,7 @@ int copy_thread(unsigned long clone_flag
+       return 0;
+ }
+-unsigned long get_wchan(struct task_struct *task)
++unsigned long __get_wchan(struct task_struct *task)
+ {
+       unsigned long pc, fp, bias = 0;
+       unsigned long task_base = (unsigned long) task;
+@@ -373,9 +373,6 @@ unsigned long get_wchan(struct task_stru
+       struct reg_window32 *rw;
+       int count = 0;
+-      if (!task || task == current || task_is_running(task))
+-              goto out;
+-
+       fp = task_thread_info(task)->ksp + bias;
+       do {
+               /* Bogus frame pointer? */
+--- a/arch/sparc/kernel/process_64.c
++++ b/arch/sparc/kernel/process_64.c
+@@ -663,7 +663,7 @@ int arch_dup_task_struct(struct task_str
+       return 0;
+ }
+-unsigned long get_wchan(struct task_struct *task)
++unsigned long __get_wchan(struct task_struct *task)
+ {
+       unsigned long pc, fp, bias = 0;
+       struct thread_info *tp;
+@@ -671,9 +671,6 @@ unsigned long get_wchan(struct task_stru
+         unsigned long ret = 0;
+       int count = 0; 
+-      if (!task || task == current || task_is_running(task))
+-              goto out;
+-
+       tp = task_thread_info(task);
+       bias = STACK_BIAS;
+       fp = task_thread_info(task)->ksp + bias;
+--- a/arch/um/include/asm/processor-generic.h
++++ b/arch/um/include/asm/processor-generic.h
+@@ -106,6 +106,6 @@ extern struct cpuinfo_um boot_cpu_data;
+ #define cache_line_size()     (boot_cpu_data.cache_alignment)
+ #define KSTK_REG(tsk, reg) get_thread_reg(reg, &tsk->thread.switch_buf)
+-extern unsigned long get_wchan(struct task_struct *p);
++extern unsigned long __get_wchan(struct task_struct *p);
+ #endif
+--- a/arch/um/kernel/process.c
++++ b/arch/um/kernel/process.c
+@@ -364,14 +364,11 @@ unsigned long arch_align_stack(unsigned
+ }
+ #endif
+-unsigned long get_wchan(struct task_struct *p)
++unsigned long __get_wchan(struct task_struct *p)
+ {
+       unsigned long stack_page, sp, ip;
+       bool seen_sched = 0;
+-      if ((p == NULL) || (p == current) || task_is_running(p))
+-              return 0;
+-
+       stack_page = (unsigned long) task_stack_page(p);
+       /* Bail if the process has no kernel stack for some reason */
+       if (stack_page == 0)
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -591,7 +591,7 @@ static inline void load_sp0(unsigned lon
+ /* Free all resources held by a thread. */
+ extern void release_thread(struct task_struct *);
+-unsigned long get_wchan(struct task_struct *p);
++unsigned long __get_wchan(struct task_struct *p);
+ /*
+  * Generic CPUID function
+--- a/arch/x86/kernel/process.c
++++ b/arch/x86/kernel/process.c
+@@ -969,13 +969,10 @@ unsigned long arch_randomize_brk(struct
+  * because the task might wake up and we might look at a stack
+  * changing under us.
+  */
+-unsigned long get_wchan(struct task_struct *p)
++unsigned long __get_wchan(struct task_struct *p)
+ {
+       unsigned long entry = 0;
+-      if (p == current || task_is_running(p))
+-              return 0;
+-
+       stack_trace_save_tsk(p, &entry, 1, 0);
+       return entry;
+ }
+--- a/arch/xtensa/include/asm/processor.h
++++ b/arch/xtensa/include/asm/processor.h
+@@ -215,7 +215,7 @@ struct mm_struct;
+ /* Free all resources held by a thread. */
+ #define release_thread(thread) do { } while(0)
+-extern unsigned long get_wchan(struct task_struct *p);
++extern unsigned long __get_wchan(struct task_struct *p);
+ #define KSTK_EIP(tsk)         (task_pt_regs(tsk)->pc)
+ #define KSTK_ESP(tsk)         (task_pt_regs(tsk)->areg[1])
+--- a/arch/xtensa/kernel/process.c
++++ b/arch/xtensa/kernel/process.c
+@@ -298,15 +298,12 @@ int copy_thread(unsigned long clone_flag
+  * These bracket the sleeping functions..
+  */
+-unsigned long get_wchan(struct task_struct *p)
++unsigned long __get_wchan(struct task_struct *p)
+ {
+       unsigned long sp, pc;
+       unsigned long stack_page = (unsigned long) task_stack_page(p);
+       int count = 0;
+-      if (!p || p == current || task_is_running(p))
+-              return 0;
+-
+       sp = p->thread.sp;
+       pc = MAKE_PC_FROM_RA(p->thread.ra, p->thread.sp);
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -2141,6 +2141,7 @@ static inline void set_task_cpu(struct t
+ #endif /* CONFIG_SMP */
+ extern bool sched_task_on_rq(struct task_struct *p);
++extern unsigned long get_wchan(struct task_struct *p);
+ /*
+  * In order to reduce various lock holder preemption latencies provide an
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -1963,6 +1963,25 @@ bool sched_task_on_rq(struct task_struct
+       return task_on_rq_queued(p);
+ }
++unsigned long get_wchan(struct task_struct *p)
++{
++      unsigned long ip = 0;
++      unsigned int state;
++
++      if (!p || p == current)
++              return 0;
++
++      /* Only get wchan if task is blocked and we can keep it that way. */
++      raw_spin_lock_irq(&p->pi_lock);
++      state = READ_ONCE(p->__state);
++      smp_rmb(); /* see try_to_wake_up() */
++      if (state != TASK_RUNNING && state != TASK_WAKING && !p->on_rq)
++              ip = __get_wchan(p);
++      raw_spin_unlock_irq(&p->pi_lock);
++
++      return ip;
++}
++
+ static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
+ {
+       if (!(flags & ENQUEUE_NOCLOCK))
index a894189ffe8ccfe80e4b1c3ca0da942f149d51d0..2053000fb65d70ee73c8ca499d904e496f3cce86 100644 (file)
@@ -65,3 +65,11 @@ usb-musb-add-and-use-inline-functions-musb_-get-set-_state.patch
 usb-musb-fix-gadget-state-on-disconnect.patch
 usb-dwc3-qcom-don-t-leave-bcr-asserted.patch
 asoc-fsl_sai-force-a-software-reset-when-starting-in-consumer-mode.patch
+mm-vmalloc-leave-lazy-mmu-mode-on-pte-mapping-error.patch
+powercap-intel_rapl-do-not-change-clamping-bit-if-enable-bit-cannot-be-changed.patch
+platform-x86-think-lmi-fix-kobject-cleanup.patch
+bpf-sockmap-fix-panic-when-calling-skb_linearize.patch
+x86-fix-get_wchan-to-support-the-orc-unwinder.patch
+sched-add-wrapper-for-get_wchan-to-keep-task-blocked.patch
+x86-fix-__get_wchan-for-stacktrace.patch
+x86-pin-task-stack-in-__get_wchan.patch
diff --git a/queue-5.15/x86-fix-__get_wchan-for-stacktrace.patch b/queue-5.15/x86-fix-__get_wchan-for-stacktrace.patch
new file mode 100644 (file)
index 0000000..a069768
--- /dev/null
@@ -0,0 +1,57 @@
+From 5d1ceb3969b6b2e47e2df6d17790a7c5a20fcbb4 Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 22 Oct 2021 16:53:02 +0200
+Subject: x86: Fix __get_wchan() for !STACKTRACE
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 5d1ceb3969b6b2e47e2df6d17790a7c5a20fcbb4 upstream.
+
+Use asm/unwind.h to implement wchan, since we cannot always rely on
+STACKTRACE=y.
+
+Fixes: bc9bbb81730e ("x86: Fix get_wchan() to support the ORC unwinder")
+Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Kees Cook <keescook@chromium.org>
+Link: https://lkml.kernel.org/r/20211022152104.137058575@infradead.org
+Signed-off-by: Siddhi Katage <siddhi.katage@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/process.c |   17 ++++++++++++++---
+ 1 file changed, 14 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/kernel/process.c
++++ b/arch/x86/kernel/process.c
+@@ -43,6 +43,7 @@
+ #include <asm/io_bitmap.h>
+ #include <asm/proto.h>
+ #include <asm/frame.h>
++#include <asm/unwind.h>
+ #include "process.h"
+@@ -971,10 +972,20 @@ unsigned long arch_randomize_brk(struct
+  */
+ unsigned long __get_wchan(struct task_struct *p)
+ {
+-      unsigned long entry = 0;
++      struct unwind_state state;
++      unsigned long addr = 0;
+-      stack_trace_save_tsk(p, &entry, 1, 0);
+-      return entry;
++      for (unwind_start(&state, p, NULL, NULL); !unwind_done(&state);
++           unwind_next_frame(&state)) {
++              addr = unwind_get_return_address(&state);
++              if (!addr)
++                      break;
++              if (in_sched_functions(addr))
++                      continue;
++              break;
++      }
++
++      return addr;
+ }
+ long do_arch_prctl_common(struct task_struct *task, int option,
diff --git a/queue-5.15/x86-fix-get_wchan-to-support-the-orc-unwinder.patch b/queue-5.15/x86-fix-get_wchan-to-support-the-orc-unwinder.patch
new file mode 100644 (file)
index 0000000..60ddd11
--- /dev/null
@@ -0,0 +1,92 @@
+From bc9bbb81730ea667c31c5b284f95ee312bab466f Mon Sep 17 00:00:00 2001
+From: Qi Zheng <zhengqi.arch@bytedance.com>
+Date: Wed, 29 Sep 2021 15:02:17 -0700
+Subject: x86: Fix get_wchan() to support the ORC unwinder
+
+From: Qi Zheng <zhengqi.arch@bytedance.com>
+
+commit bc9bbb81730ea667c31c5b284f95ee312bab466f upstream.
+
+Currently, the kernel CONFIG_UNWINDER_ORC option is enabled by default
+on x86, but the implementation of get_wchan() is still based on the frame
+pointer unwinder, so the /proc/<pid>/wchan usually returned 0 regardless
+of whether the task <pid> is running.
+
+Reimplement get_wchan() by calling stack_trace_save_tsk(), which is
+adapted to the ORC and frame pointer unwinders.
+
+Fixes: ee9f8fce9964 ("x86/unwind: Add the ORC unwinder")
+Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lkml.kernel.org/r/20211008111626.271115116@infradead.org
+Signed-off-by: Siddhi Katage <siddhi.katage@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/process.c |   51 ++--------------------------------------------
+ 1 file changed, 3 insertions(+), 48 deletions(-)
+
+--- a/arch/x86/kernel/process.c
++++ b/arch/x86/kernel/process.c
+@@ -971,58 +971,13 @@ unsigned long arch_randomize_brk(struct
+  */
+ unsigned long get_wchan(struct task_struct *p)
+ {
+-      unsigned long start, bottom, top, sp, fp, ip, ret = 0;
+-      int count = 0;
++      unsigned long entry = 0;
+       if (p == current || task_is_running(p))
+               return 0;
+-      if (!try_get_task_stack(p))
+-              return 0;
+-
+-      start = (unsigned long)task_stack_page(p);
+-      if (!start)
+-              goto out;
+-
+-      /*
+-       * Layout of the stack page:
+-       *
+-       * ----------- topmax = start + THREAD_SIZE - sizeof(unsigned long)
+-       * PADDING
+-       * ----------- top = topmax - TOP_OF_KERNEL_STACK_PADDING
+-       * stack
+-       * ----------- bottom = start
+-       *
+-       * The tasks stack pointer points at the location where the
+-       * framepointer is stored. The data on the stack is:
+-       * ... IP FP ... IP FP
+-       *
+-       * We need to read FP and IP, so we need to adjust the upper
+-       * bound by another unsigned long.
+-       */
+-      top = start + THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING;
+-      top -= 2 * sizeof(unsigned long);
+-      bottom = start;
+-
+-      sp = READ_ONCE(p->thread.sp);
+-      if (sp < bottom || sp > top)
+-              goto out;
+-
+-      fp = READ_ONCE_NOCHECK(((struct inactive_task_frame *)sp)->bp);
+-      do {
+-              if (fp < bottom || fp > top)
+-                      goto out;
+-              ip = READ_ONCE_NOCHECK(*(unsigned long *)(fp + sizeof(unsigned long)));
+-              if (!in_sched_functions(ip)) {
+-                      ret = ip;
+-                      goto out;
+-              }
+-              fp = READ_ONCE_NOCHECK(*(unsigned long *)fp);
+-      } while (count++ < 16 && !task_is_running(p));
+-
+-out:
+-      put_task_stack(p);
+-      return ret;
++      stack_trace_save_tsk(p, &entry, 1, 0);
++      return entry;
+ }
+ long do_arch_prctl_common(struct task_struct *task, int option,
diff --git a/queue-5.15/x86-pin-task-stack-in-__get_wchan.patch b/queue-5.15/x86-pin-task-stack-in-__get_wchan.patch
new file mode 100644 (file)
index 0000000..e6508a0
--- /dev/null
@@ -0,0 +1,54 @@
+From 0dc636b3b757a6b747a156de613275f9d74a4a66 Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 19 Nov 2021 10:29:47 +0100
+Subject: x86: Pin task-stack in __get_wchan()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 0dc636b3b757a6b747a156de613275f9d74a4a66 upstream.
+
+When commit 5d1ceb3969b6 ("x86: Fix __get_wchan() for !STACKTRACE")
+moved from stacktrace to native unwind_*() usage, the
+try_get_task_stack() got lost, leading to use-after-free issues for
+dying tasks.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Fixes: 5d1ceb3969b6 ("x86: Fix __get_wchan() for !STACKTRACE")
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=215031
+Link: https://lore.kernel.org/stable/YZV02RCRVHIa144u@fedora64.linuxtx.org/
+Reported-by: Justin Forbes <jmforbes@linuxtx.org>
+Reported-by: Holger Hoffstätte <holger@applied-asynchrony.com>
+Cc: Qi Zheng <zhengqi.arch@bytedance.com>
+Cc: Kees Cook <keescook@chromium.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Siddhi Katage <siddhi.katage@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/process.c |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/arch/x86/kernel/process.c
++++ b/arch/x86/kernel/process.c
+@@ -975,6 +975,9 @@ unsigned long __get_wchan(struct task_st
+       struct unwind_state state;
+       unsigned long addr = 0;
++      if (!try_get_task_stack(p))
++              return 0;
++
+       for (unwind_start(&state, p, NULL, NULL); !unwind_done(&state);
+            unwind_next_frame(&state)) {
+               addr = unwind_get_return_address(&state);
+@@ -985,6 +988,8 @@ unsigned long __get_wchan(struct task_st
+               break;
+       }
++      put_task_stack(p);
++
+       return addr;
+ }