From 6d5e9663faf2a81e7d25c2b0cf988e728146456c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 26 Aug 2023 22:47:14 +0200 Subject: [PATCH] 6.4-stable patches added patches: acpi-resource-fix-irq-override-quirk-for-pcspecialist-elimina-pro-16-m.patch alsa-ymfpci-fix-the-missing-snd_card_free-call-at-probe-error.patch batman-adv-do-not-get-eth-header-before-batadv_check_management_packet.patch batman-adv-don-t-increase-mtu-when-set-by-user.patch batman-adv-fix-batadv_v_ogm_aggr_send-memory-leak.patch batman-adv-fix-tt-global-entry-leak-when-client-roamed-back.patch batman-adv-hold-rtnl-lock-during-mtu-update-via-netlink.patch batman-adv-trigger-events-for-auto-adjusted-mtu.patch lib-clz_ctz.c-fix-__clzdi2-and-__ctzdi2-for-32-bit-kernels.patch media-vcodec-fix-potential-array-out-of-bounds-in-encoder-queue_setup.patch mm-add-a-call-to-flush_cache_vmap-in-vmap_pfn.patch mm-enable-page-walking-api-to-lock-vmas-during-the-walk.patch mm-gup-handle-cont-pte-hugetlb-pages-correctly-in-gup_must_unshare-via-gup-fast.patch mm-gup-reintroduce-foll_numa-as-foll_honor_numa_fault.patch mm-memory-failure-fix-unexpected-return-value-in-soft_offline_page.patch mm-multi-gen-lru-don-t-spin-during-memcg-release.patch nfs-fix-a-use-after-free-in-nfs_direct_join_group.patch nfsd-fix-race-to-free_stateid-and-cl_revoked.patch nilfs2-fix-general-protection-fault-in-nilfs_lookup_dirty_data_buffers.patch of-dynamic-refactor-action-prints-to-not-use-pof-inside-devtree_lock.patch of-unittest-fix-expect-for-parse_phandle_with_args_map-test.patch pinctrl-amd-mask-wake-bits-on-probe-again.patch radix-tree-remove-unused-variable.patch riscv-fix-build-errors-using-binutils2.37-toolchains.patch riscv-handle-zicsr-zifencei-issue-between-gcc-and-binutils.patch selftests-mm-foll_longterm-need-to-be-updated-to-0x100.patch selinux-set-next-pointer-before-attaching-to-list.patch --- ...rk-for-pcspecialist-elimina-pro-16-m.patch | 47 ++ ...ng-snd_card_free-call-at-probe-error.patch | 53 ++ ...efore-batadv_check_management_packet.patch | 122 ++++ ...-don-t-increase-mtu-when-set-by-user.patch | 83 +++ ...x-batadv_v_ogm_aggr_send-memory-leak.patch | 56 ++ ...l-entry-leak-when-client-roamed-back.patch | 85 +++ ...l-lock-during-mtu-update-via-netlink.patch | 45 ++ ...trigger-events-for-auto-adjusted-mtu.patch | 38 ++ ...zdi2-and-__ctzdi2-for-32-bit-kernels.patch | 120 ++++ ...out-of-bounds-in-encoder-queue_setup.patch | 37 ++ ...call-to-flush_cache_vmap-in-vmap_pfn.patch | 55 ++ ...ing-api-to-lock-vmas-during-the-walk.patch | 551 ++++++++++++++++++ ...tly-in-gup_must_unshare-via-gup-fast.patch | 149 +++++ ...e-foll_numa-as-foll_honor_numa_fault.patch | 250 ++++++++ ...ed-return-value-in-soft_offline_page.patch | 47 ++ ...-lru-don-t-spin-during-memcg-release.patch | 85 +++ ...-after-free-in-nfs_direct_join_group.patch | 60 ++ ...-race-to-free_stateid-and-cl_revoked.patch | 47 ++ ...t-in-nilfs_lookup_dirty_data_buffers.patch | 46 ++ ...s-to-not-use-pof-inside-devtree_lock.patch | 114 ++++ ...for-parse_phandle_with_args_map-test.patch | 40 ++ ...rl-amd-mask-wake-bits-on-probe-again.patch | 94 +++ .../radix-tree-remove-unused-variable.patch | 42 ++ ...errors-using-binutils2.37-toolchains.patch | 61 ++ ...encei-issue-between-gcc-and-binutils.patch | 111 ++++ ...longterm-need-to-be-updated-to-0x100.patch | 90 +++ ...ext-pointer-before-attaching-to-list.patch | 43 ++ queue-6.4/series | 27 + 28 files changed, 2598 insertions(+) create mode 100644 queue-6.4/acpi-resource-fix-irq-override-quirk-for-pcspecialist-elimina-pro-16-m.patch create mode 100644 queue-6.4/alsa-ymfpci-fix-the-missing-snd_card_free-call-at-probe-error.patch create mode 100644 queue-6.4/batman-adv-do-not-get-eth-header-before-batadv_check_management_packet.patch create mode 100644 queue-6.4/batman-adv-don-t-increase-mtu-when-set-by-user.patch create mode 100644 queue-6.4/batman-adv-fix-batadv_v_ogm_aggr_send-memory-leak.patch create mode 100644 queue-6.4/batman-adv-fix-tt-global-entry-leak-when-client-roamed-back.patch create mode 100644 queue-6.4/batman-adv-hold-rtnl-lock-during-mtu-update-via-netlink.patch create mode 100644 queue-6.4/batman-adv-trigger-events-for-auto-adjusted-mtu.patch create mode 100644 queue-6.4/lib-clz_ctz.c-fix-__clzdi2-and-__ctzdi2-for-32-bit-kernels.patch create mode 100644 queue-6.4/media-vcodec-fix-potential-array-out-of-bounds-in-encoder-queue_setup.patch create mode 100644 queue-6.4/mm-add-a-call-to-flush_cache_vmap-in-vmap_pfn.patch create mode 100644 queue-6.4/mm-enable-page-walking-api-to-lock-vmas-during-the-walk.patch create mode 100644 queue-6.4/mm-gup-handle-cont-pte-hugetlb-pages-correctly-in-gup_must_unshare-via-gup-fast.patch create mode 100644 queue-6.4/mm-gup-reintroduce-foll_numa-as-foll_honor_numa_fault.patch create mode 100644 queue-6.4/mm-memory-failure-fix-unexpected-return-value-in-soft_offline_page.patch create mode 100644 queue-6.4/mm-multi-gen-lru-don-t-spin-during-memcg-release.patch create mode 100644 queue-6.4/nfs-fix-a-use-after-free-in-nfs_direct_join_group.patch create mode 100644 queue-6.4/nfsd-fix-race-to-free_stateid-and-cl_revoked.patch create mode 100644 queue-6.4/nilfs2-fix-general-protection-fault-in-nilfs_lookup_dirty_data_buffers.patch create mode 100644 queue-6.4/of-dynamic-refactor-action-prints-to-not-use-pof-inside-devtree_lock.patch create mode 100644 queue-6.4/of-unittest-fix-expect-for-parse_phandle_with_args_map-test.patch create mode 100644 queue-6.4/pinctrl-amd-mask-wake-bits-on-probe-again.patch create mode 100644 queue-6.4/radix-tree-remove-unused-variable.patch create mode 100644 queue-6.4/riscv-fix-build-errors-using-binutils2.37-toolchains.patch create mode 100644 queue-6.4/riscv-handle-zicsr-zifencei-issue-between-gcc-and-binutils.patch create mode 100644 queue-6.4/selftests-mm-foll_longterm-need-to-be-updated-to-0x100.patch create mode 100644 queue-6.4/selinux-set-next-pointer-before-attaching-to-list.patch diff --git a/queue-6.4/acpi-resource-fix-irq-override-quirk-for-pcspecialist-elimina-pro-16-m.patch b/queue-6.4/acpi-resource-fix-irq-override-quirk-for-pcspecialist-elimina-pro-16-m.patch new file mode 100644 index 00000000000..5ebe11063bf --- /dev/null +++ b/queue-6.4/acpi-resource-fix-irq-override-quirk-for-pcspecialist-elimina-pro-16-m.patch @@ -0,0 +1,47 @@ +From 453b014e2c294abf762d3bce12e91ce4b34055e6 Mon Sep 17 00:00:00 2001 +From: Hans de Goede +Date: Mon, 21 Aug 2023 11:09:27 +0200 +Subject: ACPI: resource: Fix IRQ override quirk for PCSpecialist Elimina Pro 16 M + +From: Hans de Goede + +commit 453b014e2c294abf762d3bce12e91ce4b34055e6 upstream. + +It turns out that some PCSpecialist Elimina Pro 16 M models +have "GM6BGEQ" as DMI product-name instead of "Elimina Pro 16 M", +causing the existing DMI quirk to not work on these models. + +The DMI board-name is always "GM6BGEQ", so match on that instead. + +Fixes: 56fec0051a69 ("ACPI: resource: Add IRQ override quirk for PCSpecialist Elimina Pro 16 M") +Link: https://bugzilla.kernel.org/show_bug.cgi?id=217394#c36 +Cc: All applicable +Signed-off-by: Hans de Goede +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman +--- + drivers/acpi/resource.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c +index a4d9f149b48d..32cfa3f4efd3 100644 +--- a/drivers/acpi/resource.c ++++ b/drivers/acpi/resource.c +@@ -501,9 +501,13 @@ static const struct dmi_system_id maingear_laptop[] = { + static const struct dmi_system_id pcspecialist_laptop[] = { + { + .ident = "PCSpecialist Elimina Pro 16 M", ++ /* ++ * Some models have product-name "Elimina Pro 16 M", ++ * others "GM6BGEQ". Match on board-name to match both. ++ */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "PCSpecialist"), +- DMI_MATCH(DMI_PRODUCT_NAME, "Elimina Pro 16 M"), ++ DMI_MATCH(DMI_BOARD_NAME, "GM6BGEQ"), + }, + }, + { } +-- +2.42.0 + diff --git a/queue-6.4/alsa-ymfpci-fix-the-missing-snd_card_free-call-at-probe-error.patch b/queue-6.4/alsa-ymfpci-fix-the-missing-snd_card_free-call-at-probe-error.patch new file mode 100644 index 00000000000..7006529e189 --- /dev/null +++ b/queue-6.4/alsa-ymfpci-fix-the-missing-snd_card_free-call-at-probe-error.patch @@ -0,0 +1,53 @@ +From 1d0eb6143c1e85d3f9a3f5a616ee7e5dc351d33b Mon Sep 17 00:00:00 2001 +From: Takashi Iwai +Date: Wed, 23 Aug 2023 18:16:25 +0200 +Subject: ALSA: ymfpci: Fix the missing snd_card_free() call at probe error + +From: Takashi Iwai + +commit 1d0eb6143c1e85d3f9a3f5a616ee7e5dc351d33b upstream. + +Like a few other drivers, YMFPCI driver needs to clean up with +snd_card_free() call at an error path of the probe; otherwise the +other devres resources are released before the card and it results in +the UAF. + +This patch uses the helper for handling the probe error gracefully. + +Fixes: f33fc1576757 ("ALSA: ymfpci: Create card with device-managed snd_devm_card_new()") +Cc: +Reported-and-tested-by: Takashi Yano +Closes: https://lore.kernel.org/r/20230823135846.1812-1-takashi.yano@nifty.ne.jp +Link: https://lore.kernel.org/r/20230823161625.5807-1-tiwai@suse.de +Signed-off-by: Takashi Iwai +Signed-off-by: Greg Kroah-Hartman +--- + sound/pci/ymfpci/ymfpci.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +--- a/sound/pci/ymfpci/ymfpci.c ++++ b/sound/pci/ymfpci/ymfpci.c +@@ -152,8 +152,8 @@ static inline int snd_ymfpci_create_game + void snd_ymfpci_free_gameport(struct snd_ymfpci *chip) { } + #endif /* SUPPORT_JOYSTICK */ + +-static int snd_card_ymfpci_probe(struct pci_dev *pci, +- const struct pci_device_id *pci_id) ++static int __snd_card_ymfpci_probe(struct pci_dev *pci, ++ const struct pci_device_id *pci_id) + { + static int dev; + struct snd_card *card; +@@ -348,6 +348,12 @@ static int snd_card_ymfpci_probe(struct + return 0; + } + ++static int snd_card_ymfpci_probe(struct pci_dev *pci, ++ const struct pci_device_id *pci_id) ++{ ++ return snd_card_free_on_error(&pci->dev, __snd_card_ymfpci_probe(pci, pci_id)); ++} ++ + static struct pci_driver ymfpci_driver = { + .name = KBUILD_MODNAME, + .id_table = snd_ymfpci_ids, diff --git a/queue-6.4/batman-adv-do-not-get-eth-header-before-batadv_check_management_packet.patch b/queue-6.4/batman-adv-do-not-get-eth-header-before-batadv_check_management_packet.patch new file mode 100644 index 00000000000..05cab66c6b1 --- /dev/null +++ b/queue-6.4/batman-adv-do-not-get-eth-header-before-batadv_check_management_packet.patch @@ -0,0 +1,122 @@ +From eac27a41ab641de074655d2932fc7f8cdb446881 Mon Sep 17 00:00:00 2001 +From: Remi Pommarel +Date: Fri, 28 Jul 2023 15:38:50 +0200 +Subject: batman-adv: Do not get eth header before batadv_check_management_packet + +From: Remi Pommarel + +commit eac27a41ab641de074655d2932fc7f8cdb446881 upstream. + +If received skb in batadv_v_elp_packet_recv or batadv_v_ogm_packet_recv +is either cloned or non linearized then its data buffer will be +reallocated by batadv_check_management_packet when skb_cow or +skb_linearize get called. Thus geting ethernet header address inside +skb data buffer before batadv_check_management_packet had any chance to +reallocate it could lead to the following kernel panic: + + Unable to handle kernel paging request at virtual address ffffff8020ab069a + Mem abort info: + ESR = 0x96000007 + EC = 0x25: DABT (current EL), IL = 32 bits + SET = 0, FnV = 0 + EA = 0, S1PTW = 0 + FSC = 0x07: level 3 translation fault + Data abort info: + ISV = 0, ISS = 0x00000007 + CM = 0, WnR = 0 + swapper pgtable: 4k pages, 39-bit VAs, pgdp=0000000040f45000 + [ffffff8020ab069a] pgd=180000007fffa003, p4d=180000007fffa003, pud=180000007fffa003, pmd=180000007fefe003, pte=0068000020ab0706 + Internal error: Oops: 96000007 [#1] SMP + Modules linked in: ahci_mvebu libahci_platform libahci dvb_usb_af9035 dvb_usb_dib0700 dib0070 dib7000m dibx000_common ath11k_pci ath10k_pci ath10k_core mwl8k_new nf_nat_sip nf_conntrack_sip xhci_plat_hcd xhci_hcd nf_nat_pptp nf_conntrack_pptp at24 sbsa_gwdt + CPU: 1 PID: 16 Comm: ksoftirqd/1 Not tainted 5.15.42-00066-g3242268d425c-dirty #550 + Hardware name: A8k (DT) + pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) + pc : batadv_is_my_mac+0x60/0xc0 + lr : batadv_v_ogm_packet_recv+0x98/0x5d0 + sp : ffffff8000183820 + x29: ffffff8000183820 x28: 0000000000000001 x27: ffffff8014f9af00 + x26: 0000000000000000 x25: 0000000000000543 x24: 0000000000000003 + x23: ffffff8020ab0580 x22: 0000000000000110 x21: ffffff80168ae880 + x20: 0000000000000000 x19: ffffff800b561000 x18: 0000000000000000 + x17: 0000000000000000 x16: 0000000000000000 x15: 00dc098924ae0032 + x14: 0f0405433e0054b0 x13: ffffffff00000080 x12: 0000004000000001 + x11: 0000000000000000 x10: 0000000000000000 x9 : 0000000000000000 + x8 : 0000000000000000 x7 : ffffffc076dae000 x6 : ffffff8000183700 + x5 : ffffffc00955e698 x4 : ffffff80168ae000 x3 : ffffff80059cf000 + x2 : ffffff800b561000 x1 : ffffff8020ab0696 x0 : ffffff80168ae880 + Call trace: + batadv_is_my_mac+0x60/0xc0 + batadv_v_ogm_packet_recv+0x98/0x5d0 + batadv_batman_skb_recv+0x1b8/0x244 + __netif_receive_skb_core.isra.0+0x440/0xc74 + __netif_receive_skb_one_core+0x14/0x20 + netif_receive_skb+0x68/0x140 + br_pass_frame_up+0x70/0x80 + br_handle_frame_finish+0x108/0x284 + br_handle_frame+0x190/0x250 + __netif_receive_skb_core.isra.0+0x240/0xc74 + __netif_receive_skb_list_core+0x6c/0x90 + netif_receive_skb_list_internal+0x1f4/0x310 + napi_complete_done+0x64/0x1d0 + gro_cell_poll+0x7c/0xa0 + __napi_poll+0x34/0x174 + net_rx_action+0xf8/0x2a0 + _stext+0x12c/0x2ac + run_ksoftirqd+0x4c/0x7c + smpboot_thread_fn+0x120/0x210 + kthread+0x140/0x150 + ret_from_fork+0x10/0x20 + Code: f9403844 eb03009f 54fffee1 f94 + +Thus ethernet header address should only be fetched after +batadv_check_management_packet has been called. + +Fixes: 0da0035942d4 ("batman-adv: OGMv2 - add basic infrastructure") +Cc: stable@vger.kernel.org +Signed-off-by: Remi Pommarel +Signed-off-by: Sven Eckelmann +Signed-off-by: Simon Wunderlich +Signed-off-by: Greg Kroah-Hartman +--- + net/batman-adv/bat_v_elp.c | 3 ++- + net/batman-adv/bat_v_ogm.c | 3 ++- + 2 files changed, 4 insertions(+), 2 deletions(-) + +--- a/net/batman-adv/bat_v_elp.c ++++ b/net/batman-adv/bat_v_elp.c +@@ -505,7 +505,7 @@ int batadv_v_elp_packet_recv(struct sk_b + struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); + struct batadv_elp_packet *elp_packet; + struct batadv_hard_iface *primary_if; +- struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb); ++ struct ethhdr *ethhdr; + bool res; + int ret = NET_RX_DROP; + +@@ -513,6 +513,7 @@ int batadv_v_elp_packet_recv(struct sk_b + if (!res) + goto free_skb; + ++ ethhdr = eth_hdr(skb); + if (batadv_is_my_mac(bat_priv, ethhdr->h_source)) + goto free_skb; + +--- a/net/batman-adv/bat_v_ogm.c ++++ b/net/batman-adv/bat_v_ogm.c +@@ -985,7 +985,7 @@ int batadv_v_ogm_packet_recv(struct sk_b + { + struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); + struct batadv_ogm2_packet *ogm_packet; +- struct ethhdr *ethhdr = eth_hdr(skb); ++ struct ethhdr *ethhdr; + int ogm_offset; + u8 *packet_pos; + int ret = NET_RX_DROP; +@@ -999,6 +999,7 @@ int batadv_v_ogm_packet_recv(struct sk_b + if (!batadv_check_management_packet(skb, if_incoming, BATADV_OGM2_HLEN)) + goto free_skb; + ++ ethhdr = eth_hdr(skb); + if (batadv_is_my_mac(bat_priv, ethhdr->h_source)) + goto free_skb; + diff --git a/queue-6.4/batman-adv-don-t-increase-mtu-when-set-by-user.patch b/queue-6.4/batman-adv-don-t-increase-mtu-when-set-by-user.patch new file mode 100644 index 00000000000..d33e4c08e56 --- /dev/null +++ b/queue-6.4/batman-adv-don-t-increase-mtu-when-set-by-user.patch @@ -0,0 +1,83 @@ +From d8e42a2b0addf238be8b3b37dcd9795a5c1be459 Mon Sep 17 00:00:00 2001 +From: Sven Eckelmann +Date: Wed, 19 Jul 2023 10:01:15 +0200 +Subject: batman-adv: Don't increase MTU when set by user + +From: Sven Eckelmann + +commit d8e42a2b0addf238be8b3b37dcd9795a5c1be459 upstream. + +If the user set an MTU value, it usually means that there are special +requirements for the MTU. But if an interface gots activated, the MTU was +always recalculated and then the user set value was overwritten. + +The only reason why this user set value has to be overwritten, is when the +MTU has to be decreased because batman-adv is not able to transfer packets +with the user specified size. + +Fixes: c6c8fea29769 ("net: Add batman-adv meshing protocol") +Cc: stable@vger.kernel.org +Signed-off-by: Sven Eckelmann +Signed-off-by: Simon Wunderlich +Signed-off-by: Greg Kroah-Hartman +--- + net/batman-adv/hard-interface.c | 14 +++++++++++++- + net/batman-adv/soft-interface.c | 3 +++ + net/batman-adv/types.h | 6 ++++++ + 3 files changed, 22 insertions(+), 1 deletion(-) + +--- a/net/batman-adv/hard-interface.c ++++ b/net/batman-adv/hard-interface.c +@@ -630,7 +630,19 @@ out: + */ + void batadv_update_min_mtu(struct net_device *soft_iface) + { +- dev_set_mtu(soft_iface, batadv_hardif_min_mtu(soft_iface)); ++ struct batadv_priv *bat_priv = netdev_priv(soft_iface); ++ int limit_mtu; ++ int mtu; ++ ++ mtu = batadv_hardif_min_mtu(soft_iface); ++ ++ if (bat_priv->mtu_set_by_user) ++ limit_mtu = bat_priv->mtu_set_by_user; ++ else ++ limit_mtu = ETH_DATA_LEN; ++ ++ mtu = min(mtu, limit_mtu); ++ dev_set_mtu(soft_iface, mtu); + + /* Check if the local translate table should be cleaned up to match a + * new (and smaller) MTU. +--- a/net/batman-adv/soft-interface.c ++++ b/net/batman-adv/soft-interface.c +@@ -153,11 +153,14 @@ static int batadv_interface_set_mac_addr + + static int batadv_interface_change_mtu(struct net_device *dev, int new_mtu) + { ++ struct batadv_priv *bat_priv = netdev_priv(dev); ++ + /* check ranges */ + if (new_mtu < 68 || new_mtu > batadv_hardif_min_mtu(dev)) + return -EINVAL; + + dev->mtu = new_mtu; ++ bat_priv->mtu_set_by_user = new_mtu; + + return 0; + } +--- a/net/batman-adv/types.h ++++ b/net/batman-adv/types.h +@@ -1547,6 +1547,12 @@ struct batadv_priv { + struct net_device *soft_iface; + + /** ++ * @mtu_set_by_user: MTU was set once by user ++ * protected by rtnl_lock ++ */ ++ int mtu_set_by_user; ++ ++ /** + * @bat_counters: mesh internal traffic statistic counters (see + * batadv_counters) + */ diff --git a/queue-6.4/batman-adv-fix-batadv_v_ogm_aggr_send-memory-leak.patch b/queue-6.4/batman-adv-fix-batadv_v_ogm_aggr_send-memory-leak.patch new file mode 100644 index 00000000000..4f9cd60602a --- /dev/null +++ b/queue-6.4/batman-adv-fix-batadv_v_ogm_aggr_send-memory-leak.patch @@ -0,0 +1,56 @@ +From 421d467dc2d483175bad4fb76a31b9e5a3d744cf Mon Sep 17 00:00:00 2001 +From: Remi Pommarel +Date: Wed, 9 Aug 2023 17:29:13 +0200 +Subject: batman-adv: Fix batadv_v_ogm_aggr_send memory leak + +From: Remi Pommarel + +commit 421d467dc2d483175bad4fb76a31b9e5a3d744cf upstream. + +When batadv_v_ogm_aggr_send is called for an inactive interface, the skb +is silently dropped by batadv_v_ogm_send_to_if() but never freed causing +the following memory leak: + + unreferenced object 0xffff00000c164800 (size 512): + comm "kworker/u8:1", pid 2648, jiffies 4295122303 (age 97.656s) + hex dump (first 32 bytes): + 00 80 af 09 00 00 ff ff e1 09 00 00 75 01 60 83 ............u.`. + 1f 00 00 00 b8 00 00 00 15 00 05 00 da e3 d3 64 ...............d + backtrace: + [<0000000007ad20f6>] __kmalloc_track_caller+0x1a8/0x310 + [<00000000d1029e55>] kmalloc_reserve.constprop.0+0x70/0x13c + [<000000008b9d4183>] __alloc_skb+0xec/0x1fc + [<00000000c7af5051>] __netdev_alloc_skb+0x48/0x23c + [<00000000642ee5f5>] batadv_v_ogm_aggr_send+0x50/0x36c + [<0000000088660bd7>] batadv_v_ogm_aggr_work+0x24/0x40 + [<0000000042fc2606>] process_one_work+0x3b0/0x610 + [<000000002f2a0b1c>] worker_thread+0xa0/0x690 + [<0000000059fae5d4>] kthread+0x1fc/0x210 + [<000000000c587d3a>] ret_from_fork+0x10/0x20 + +Free the skb in that case to fix this leak. + +Cc: stable@vger.kernel.org +Fixes: 0da0035942d4 ("batman-adv: OGMv2 - add basic infrastructure") +Signed-off-by: Remi Pommarel +Signed-off-by: Sven Eckelmann +Signed-off-by: Simon Wunderlich +Signed-off-by: Greg Kroah-Hartman +--- + net/batman-adv/bat_v_ogm.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/net/batman-adv/bat_v_ogm.c ++++ b/net/batman-adv/bat_v_ogm.c +@@ -123,8 +123,10 @@ static void batadv_v_ogm_send_to_if(stru + { + struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); + +- if (hard_iface->if_status != BATADV_IF_ACTIVE) ++ if (hard_iface->if_status != BATADV_IF_ACTIVE) { ++ kfree_skb(skb); + return; ++ } + + batadv_inc_counter(bat_priv, BATADV_CNT_MGMT_TX); + batadv_add_counter(bat_priv, BATADV_CNT_MGMT_TX_BYTES, diff --git a/queue-6.4/batman-adv-fix-tt-global-entry-leak-when-client-roamed-back.patch b/queue-6.4/batman-adv-fix-tt-global-entry-leak-when-client-roamed-back.patch new file mode 100644 index 00000000000..53f32f5479a --- /dev/null +++ b/queue-6.4/batman-adv-fix-tt-global-entry-leak-when-client-roamed-back.patch @@ -0,0 +1,85 @@ +From d25ddb7e788d34cf27ff1738d11a87cb4b67d446 Mon Sep 17 00:00:00 2001 +From: Remi Pommarel +Date: Fri, 4 Aug 2023 11:39:36 +0200 +Subject: batman-adv: Fix TT global entry leak when client roamed back + +From: Remi Pommarel + +commit d25ddb7e788d34cf27ff1738d11a87cb4b67d446 upstream. + +When a client roamed back to a node before it got time to destroy the +pending local entry (i.e. within the same originator interval) the old +global one is directly removed from hash table and left as such. + +But because this entry had an extra reference taken at lookup (i.e using +batadv_tt_global_hash_find) there is no way its memory will be reclaimed +at any time causing the following memory leak: + + unreferenced object 0xffff0000073c8000 (size 18560): + comm "softirq", pid 0, jiffies 4294907738 (age 228.644s) + hex dump (first 32 bytes): + 06 31 ac 12 c7 7a 05 00 01 00 00 00 00 00 00 00 .1...z.......... + 2c ad be 08 00 80 ff ff 6c b6 be 08 00 80 ff ff ,.......l....... + backtrace: + [<00000000ee6e0ffa>] kmem_cache_alloc+0x1b4/0x300 + [<000000000ff2fdbc>] batadv_tt_global_add+0x700/0xe20 + [<00000000443897c7>] _batadv_tt_update_changes+0x21c/0x790 + [<000000005dd90463>] batadv_tt_update_changes+0x3c/0x110 + [<00000000a2d7fc57>] batadv_tt_tvlv_unicast_handler_v1+0xafc/0xe10 + [<0000000011793f2a>] batadv_tvlv_containers_process+0x168/0x2b0 + [<00000000b7cbe2ef>] batadv_recv_unicast_tvlv+0xec/0x1f4 + [<0000000042aef1d8>] batadv_batman_skb_recv+0x25c/0x3a0 + [<00000000bbd8b0a2>] __netif_receive_skb_core.isra.0+0x7a8/0xe90 + [<000000004033d428>] __netif_receive_skb_one_core+0x64/0x74 + [<000000000f39a009>] __netif_receive_skb+0x48/0xe0 + [<00000000f2cd8888>] process_backlog+0x174/0x344 + [<00000000507d6564>] __napi_poll+0x58/0x1f4 + [<00000000b64ef9eb>] net_rx_action+0x504/0x590 + [<00000000056fa5e4>] _stext+0x1b8/0x418 + [<00000000878879d6>] run_ksoftirqd+0x74/0xa4 + unreferenced object 0xffff00000bae1a80 (size 56): + comm "softirq", pid 0, jiffies 4294910888 (age 216.092s) + hex dump (first 32 bytes): + 00 78 b1 0b 00 00 ff ff 0d 50 00 00 00 00 00 00 .x.......P...... + 00 00 00 00 00 00 00 00 50 c8 3c 07 00 00 ff ff ........P.<..... + backtrace: + [<00000000ee6e0ffa>] kmem_cache_alloc+0x1b4/0x300 + [<00000000d9aaa49e>] batadv_tt_global_add+0x53c/0xe20 + [<00000000443897c7>] _batadv_tt_update_changes+0x21c/0x790 + [<000000005dd90463>] batadv_tt_update_changes+0x3c/0x110 + [<00000000a2d7fc57>] batadv_tt_tvlv_unicast_handler_v1+0xafc/0xe10 + [<0000000011793f2a>] batadv_tvlv_containers_process+0x168/0x2b0 + [<00000000b7cbe2ef>] batadv_recv_unicast_tvlv+0xec/0x1f4 + [<0000000042aef1d8>] batadv_batman_skb_recv+0x25c/0x3a0 + [<00000000bbd8b0a2>] __netif_receive_skb_core.isra.0+0x7a8/0xe90 + [<000000004033d428>] __netif_receive_skb_one_core+0x64/0x74 + [<000000000f39a009>] __netif_receive_skb+0x48/0xe0 + [<00000000f2cd8888>] process_backlog+0x174/0x344 + [<00000000507d6564>] __napi_poll+0x58/0x1f4 + [<00000000b64ef9eb>] net_rx_action+0x504/0x590 + [<00000000056fa5e4>] _stext+0x1b8/0x418 + [<00000000878879d6>] run_ksoftirqd+0x74/0xa4 + +Releasing the extra reference from batadv_tt_global_hash_find even at +roam back when batadv_tt_global_free is called fixes this memory leak. + +Cc: stable@vger.kernel.org +Fixes: 068ee6e204e1 ("batman-adv: roaming handling mechanism redesign") +Signed-off-by: Remi Pommarel +Signed-off-by; Sven Eckelmann +Signed-off-by: Simon Wunderlich +Signed-off-by: Greg Kroah-Hartman +--- + net/batman-adv/translation-table.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/net/batman-adv/translation-table.c ++++ b/net/batman-adv/translation-table.c +@@ -774,7 +774,6 @@ check_roaming: + if (roamed_back) { + batadv_tt_global_free(bat_priv, tt_global, + "Roaming canceled"); +- tt_global = NULL; + } else { + /* The global entry has to be marked as ROAMING and + * has to be kept for consistency purpose diff --git a/queue-6.4/batman-adv-hold-rtnl-lock-during-mtu-update-via-netlink.patch b/queue-6.4/batman-adv-hold-rtnl-lock-during-mtu-update-via-netlink.patch new file mode 100644 index 00000000000..d605f6a2c8e --- /dev/null +++ b/queue-6.4/batman-adv-hold-rtnl-lock-during-mtu-update-via-netlink.patch @@ -0,0 +1,45 @@ +From 987aae75fc1041072941ffb622b45ce2359a99b9 Mon Sep 17 00:00:00 2001 +From: Sven Eckelmann +Date: Mon, 21 Aug 2023 21:48:48 +0200 +Subject: batman-adv: Hold rtnl lock during MTU update via netlink + +From: Sven Eckelmann + +commit 987aae75fc1041072941ffb622b45ce2359a99b9 upstream. + +The automatic recalculation of the maximum allowed MTU is usually triggered +by code sections which are already rtnl lock protected by callers outside +of batman-adv. But when the fragmentation setting is changed via +batman-adv's own batadv genl family, then the rtnl lock is not yet taken. + +But dev_set_mtu requires that the caller holds the rtnl lock because it +uses netdevice notifiers. And this code will then fail the check for this +lock: + + RTNL: assertion failed at net/core/dev.c (1953) + +Cc: stable@vger.kernel.org +Reported-by: syzbot+f8812454d9b3ac00d282@syzkaller.appspotmail.com +Fixes: c6a953cce8d0 ("batman-adv: Trigger events for auto adjusted MTU") +Signed-off-by: Sven Eckelmann +Reviewed-by: Simon Horman +Link: https://lore.kernel.org/r/20230821-batadv-missing-mtu-rtnl-lock-v1-1-1c5a7bfe861e@narfation.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/batman-adv/netlink.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/net/batman-adv/netlink.c ++++ b/net/batman-adv/netlink.c +@@ -495,7 +495,10 @@ static int batadv_netlink_set_mesh(struc + attr = info->attrs[BATADV_ATTR_FRAGMENTATION_ENABLED]; + + atomic_set(&bat_priv->fragmentation, !!nla_get_u8(attr)); ++ ++ rtnl_lock(); + batadv_update_min_mtu(bat_priv->soft_iface); ++ rtnl_unlock(); + } + + if (info->attrs[BATADV_ATTR_GW_BANDWIDTH_DOWN]) { diff --git a/queue-6.4/batman-adv-trigger-events-for-auto-adjusted-mtu.patch b/queue-6.4/batman-adv-trigger-events-for-auto-adjusted-mtu.patch new file mode 100644 index 00000000000..b81ccd3131e --- /dev/null +++ b/queue-6.4/batman-adv-trigger-events-for-auto-adjusted-mtu.patch @@ -0,0 +1,38 @@ +From c6a953cce8d0438391e6da48c8d0793d3fbfcfa6 Mon Sep 17 00:00:00 2001 +From: Sven Eckelmann +Date: Wed, 19 Jul 2023 09:29:29 +0200 +Subject: batman-adv: Trigger events for auto adjusted MTU + +From: Sven Eckelmann + +commit c6a953cce8d0438391e6da48c8d0793d3fbfcfa6 upstream. + +If an interface changes the MTU, it is expected that an NETDEV_PRECHANGEMTU +and NETDEV_CHANGEMTU notification events is triggered. This worked fine for +.ndo_change_mtu based changes because core networking code took care of it. +But for auto-adjustments after hard-interfaces changes, these events were +simply missing. + +Due to this problem, non-batman-adv components weren't aware of MTU changes +and thus couldn't perform their own tasks correctly. + +Fixes: c6c8fea29769 ("net: Add batman-adv meshing protocol") +Cc: stable@vger.kernel.org +Signed-off-by: Sven Eckelmann +Signed-off-by: Simon Wunderlich +Signed-off-by: Greg Kroah-Hartman +--- + net/batman-adv/hard-interface.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/batman-adv/hard-interface.c ++++ b/net/batman-adv/hard-interface.c +@@ -630,7 +630,7 @@ out: + */ + void batadv_update_min_mtu(struct net_device *soft_iface) + { +- soft_iface->mtu = batadv_hardif_min_mtu(soft_iface); ++ dev_set_mtu(soft_iface, batadv_hardif_min_mtu(soft_iface)); + + /* Check if the local translate table should be cleaned up to match a + * new (and smaller) MTU. diff --git a/queue-6.4/lib-clz_ctz.c-fix-__clzdi2-and-__ctzdi2-for-32-bit-kernels.patch b/queue-6.4/lib-clz_ctz.c-fix-__clzdi2-and-__ctzdi2-for-32-bit-kernels.patch new file mode 100644 index 00000000000..be10831e130 --- /dev/null +++ b/queue-6.4/lib-clz_ctz.c-fix-__clzdi2-and-__ctzdi2-for-32-bit-kernels.patch @@ -0,0 +1,120 @@ +From 382d4cd1847517ffcb1800fd462b625db7b2ebea Mon Sep 17 00:00:00 2001 +From: Helge Deller +Date: Fri, 25 Aug 2023 21:50:33 +0200 +Subject: lib/clz_ctz.c: Fix __clzdi2() and __ctzdi2() for 32-bit kernels + +From: Helge Deller + +commit 382d4cd1847517ffcb1800fd462b625db7b2ebea upstream. + +The gcc compiler translates on some architectures the 64-bit +__builtin_clzll() function to a call to the libgcc function __clzdi2(), +which should take a 64-bit parameter on 32- and 64-bit platforms. + +But in the current kernel code, the built-in __clzdi2() function is +defined to operate (wrongly) on 32-bit parameters if BITS_PER_LONG == +32, thus the return values on 32-bit kernels are in the range from +[0..31] instead of the expected [0..63] range. + +This patch fixes the in-kernel functions __clzdi2() and __ctzdi2() to +take a 64-bit parameter on 32-bit kernels as well, thus it makes the +functions identical for 32- and 64-bit kernels. + +This bug went unnoticed since kernel 3.11 for over 10 years, and here +are some possible reasons for that: + + a) Some architectures have assembly instructions to count the bits and + which are used instead of calling __clzdi2(), e.g. on x86 the bsr + instruction and on ppc cntlz is used. On such architectures the + wrong __clzdi2() implementation isn't used and as such the bug has + no effect and won't be noticed. + + b) Some architectures link to libgcc.a, and the in-kernel weak + functions get replaced by the correct 64-bit variants from libgcc.a. + + c) __builtin_clzll() and __clzdi2() doesn't seem to be used in many + places in the kernel, and most likely only in uncritical functions, + e.g. when printing hex values via seq_put_hex_ll(). The wrong return + value will still print the correct number, but just in a wrong + formatting (e.g. with too many leading zeroes). + + d) 32-bit kernels aren't used that much any longer, so they are less + tested. + +A trivial testcase to verify if the currently running 32-bit kernel is +affected by the bug is to look at the output of /proc/self/maps: + +Here the kernel uses a correct implementation of __clzdi2(): + + root@debian:~# cat /proc/self/maps + 00010000-00019000 r-xp 00000000 08:05 787324 /usr/bin/cat + 00019000-0001a000 rwxp 00009000 08:05 787324 /usr/bin/cat + 0001a000-0003b000 rwxp 00000000 00:00 0 [heap] + f7551000-f770d000 r-xp 00000000 08:05 794765 /usr/lib/hppa-linux-gnu/libc.so.6 + ... + +and this kernel uses the broken implementation of __clzdi2(): + + root@debian:~# cat /proc/self/maps + 0000000010000-0000000019000 r-xp 00000000 000000008:000000005 787324 /usr/bin/cat + 0000000019000-000000001a000 rwxp 000000009000 000000008:000000005 787324 /usr/bin/cat + 000000001a000-000000003b000 rwxp 00000000 00:00 0 [heap] + 00000000f73d1000-00000000f758d000 r-xp 00000000 000000008:000000005 794765 /usr/lib/hppa-linux-gnu/libc.so.6 + ... + +Signed-off-by: Helge Deller +Fixes: 4df87bb7b6a22 ("lib: add weak clz/ctz functions") +Cc: Chanho Min +Cc: Geert Uytterhoeven +Cc: stable@vger.kernel.org # v3.11+ +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + lib/clz_ctz.c | 32 ++++++-------------------------- + 1 file changed, 6 insertions(+), 26 deletions(-) + +--- a/lib/clz_ctz.c ++++ b/lib/clz_ctz.c +@@ -28,36 +28,16 @@ int __weak __clzsi2(int val) + } + EXPORT_SYMBOL(__clzsi2); + +-int __weak __clzdi2(long val); +-int __weak __ctzdi2(long val); +-#if BITS_PER_LONG == 32 +- +-int __weak __clzdi2(long val) ++int __weak __clzdi2(u64 val); ++int __weak __clzdi2(u64 val) + { +- return 32 - fls((int)val); ++ return 64 - fls64(val); + } + EXPORT_SYMBOL(__clzdi2); + +-int __weak __ctzdi2(long val) ++int __weak __ctzdi2(u64 val); ++int __weak __ctzdi2(u64 val) + { +- return __ffs((u32)val); ++ return __ffs64(val); + } + EXPORT_SYMBOL(__ctzdi2); +- +-#elif BITS_PER_LONG == 64 +- +-int __weak __clzdi2(long val) +-{ +- return 64 - fls64((u64)val); +-} +-EXPORT_SYMBOL(__clzdi2); +- +-int __weak __ctzdi2(long val) +-{ +- return __ffs64((u64)val); +-} +-EXPORT_SYMBOL(__ctzdi2); +- +-#else +-#error BITS_PER_LONG not 32 or 64 +-#endif diff --git a/queue-6.4/media-vcodec-fix-potential-array-out-of-bounds-in-encoder-queue_setup.patch b/queue-6.4/media-vcodec-fix-potential-array-out-of-bounds-in-encoder-queue_setup.patch new file mode 100644 index 00000000000..33b20d3e6da --- /dev/null +++ b/queue-6.4/media-vcodec-fix-potential-array-out-of-bounds-in-encoder-queue_setup.patch @@ -0,0 +1,37 @@ +From e7f2e65699e2290fd547ec12a17008764e5d9620 Mon Sep 17 00:00:00 2001 +From: Wei Chen +Date: Thu, 10 Aug 2023 08:23:33 +0000 +Subject: media: vcodec: Fix potential array out-of-bounds in encoder queue_setup + +From: Wei Chen + +commit e7f2e65699e2290fd547ec12a17008764e5d9620 upstream. + +variable *nplanes is provided by user via system call argument. The +possible value of q_data->fmt->num_planes is 1-3, while the value +of *nplanes can be 1-8. The array access by index i can cause array +out-of-bounds. + +Fix this bug by checking *nplanes against the array size. + +Fixes: 4e855a6efa54 ("[media] vcodec: mediatek: Add Mediatek V4L2 Video Encoder Driver") +Signed-off-by: Wei Chen +Cc: stable@vger.kernel.org +Reviewed-by: Chen-Yu Tsai +Signed-off-by: Hans Verkuil +Signed-off-by: Greg Kroah-Hartman +--- + drivers/media/platform/mediatek/vcodec/mtk_vcodec_enc.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/media/platform/mediatek/vcodec/mtk_vcodec_enc.c ++++ b/drivers/media/platform/mediatek/vcodec/mtk_vcodec_enc.c +@@ -821,6 +821,8 @@ static int vb2ops_venc_queue_setup(struc + return -EINVAL; + + if (*nplanes) { ++ if (*nplanes != q_data->fmt->num_planes) ++ return -EINVAL; + for (i = 0; i < *nplanes; i++) + if (sizes[i] < q_data->sizeimage[i]) + return -EINVAL; diff --git a/queue-6.4/mm-add-a-call-to-flush_cache_vmap-in-vmap_pfn.patch b/queue-6.4/mm-add-a-call-to-flush_cache_vmap-in-vmap_pfn.patch new file mode 100644 index 00000000000..7e1e89a2df7 --- /dev/null +++ b/queue-6.4/mm-add-a-call-to-flush_cache_vmap-in-vmap_pfn.patch @@ -0,0 +1,55 @@ +From a50420c79731fc5cf27ad43719c1091e842a2606 Mon Sep 17 00:00:00 2001 +From: Alexandre Ghiti +Date: Wed, 9 Aug 2023 18:46:33 +0200 +Subject: mm: add a call to flush_cache_vmap() in vmap_pfn() + +From: Alexandre Ghiti + +commit a50420c79731fc5cf27ad43719c1091e842a2606 upstream. + +flush_cache_vmap() must be called after new vmalloc mappings are installed +in the page table in order to allow architectures to make sure the new +mapping is visible. + +It could lead to a panic since on some architectures (like powerpc), +the page table walker could see the wrong pte value and trigger a +spurious page fault that can not be resolved (see commit f1cb8f9beba8 +("powerpc/64s/radix: avoid ptesync after set_pte and +ptep_set_access_flags")). + +But actually the patch is aiming at riscv: the riscv specification +allows the caching of invalid entries in the TLB, and since we recently +removed the vmalloc page fault handling, we now need to emit a tlb +shootdown whenever a new vmalloc mapping is emitted +(https://lore.kernel.org/linux-riscv/20230725132246.817726-1-alexghiti@rivosinc.com/). +That's a temporary solution, there are ways to avoid that :) + +Link: https://lkml.kernel.org/r/20230809164633.1556126-1-alexghiti@rivosinc.com +Fixes: 3e9a9e256b1e ("mm: add a vmap_pfn function") +Reported-by: Dylan Jhong +Closes: https://lore.kernel.org/linux-riscv/ZMytNY2J8iyjbPPy@atctrx.andestech.com/ +Signed-off-by: Alexandre Ghiti +Reviewed-by: Christoph Hellwig +Reviewed-by: Palmer Dabbelt +Acked-by: Palmer Dabbelt +Reviewed-by: Dylan Jhong +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/vmalloc.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/mm/vmalloc.c ++++ b/mm/vmalloc.c +@@ -2929,6 +2929,10 @@ void *vmap_pfn(unsigned long *pfns, unsi + free_vm_area(area); + return NULL; + } ++ ++ flush_cache_vmap((unsigned long)area->addr, ++ (unsigned long)area->addr + count * PAGE_SIZE); ++ + return area->addr; + } + EXPORT_SYMBOL_GPL(vmap_pfn); diff --git a/queue-6.4/mm-enable-page-walking-api-to-lock-vmas-during-the-walk.patch b/queue-6.4/mm-enable-page-walking-api-to-lock-vmas-during-the-walk.patch new file mode 100644 index 00000000000..493be1a3291 --- /dev/null +++ b/queue-6.4/mm-enable-page-walking-api-to-lock-vmas-during-the-walk.patch @@ -0,0 +1,551 @@ +From 49b0638502da097c15d46cd4e871dbaa022caf7c Mon Sep 17 00:00:00 2001 +From: Suren Baghdasaryan +Date: Fri, 4 Aug 2023 08:27:19 -0700 +Subject: mm: enable page walking API to lock vmas during the walk + +From: Suren Baghdasaryan + +commit 49b0638502da097c15d46cd4e871dbaa022caf7c upstream. + +walk_page_range() and friends often operate under write-locked mmap_lock. +With introduction of vma locks, the vmas have to be locked as well during +such walks to prevent concurrent page faults in these areas. Add an +additional member to mm_walk_ops to indicate locking requirements for the +walk. + +The change ensures that page walks which prevent concurrent page faults +by write-locking mmap_lock, operate correctly after introduction of +per-vma locks. With per-vma locks page faults can be handled under vma +lock without taking mmap_lock at all, so write locking mmap_lock would +not stop them. The change ensures vmas are properly locked during such +walks. + +A sample issue this solves is do_mbind() performing queue_pages_range() +to queue pages for migration. Without this change a concurrent page +can be faulted into the area and be left out of migration. + +Link: https://lkml.kernel.org/r/20230804152724.3090321-2-surenb@google.com +Signed-off-by: Suren Baghdasaryan +Suggested-by: Linus Torvalds +Suggested-by: Jann Horn +Cc: David Hildenbrand +Cc: Davidlohr Bueso +Cc: Hugh Dickins +Cc: Johannes Weiner +Cc: Laurent Dufour +Cc: Liam Howlett +Cc: Matthew Wilcox (Oracle) +Cc: Michal Hocko +Cc: Michel Lespinasse +Cc: Peter Xu +Cc: Vlastimil Babka +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + arch/powerpc/mm/book3s64/subpage_prot.c | 1 + arch/riscv/mm/pageattr.c | 1 + arch/s390/mm/gmap.c | 5 ++++ + fs/proc/task_mmu.c | 5 ++++ + include/linux/pagewalk.h | 11 +++++++++ + mm/damon/vaddr.c | 2 + + mm/hmm.c | 1 + mm/ksm.c | 25 ++++++++++++++-------- + mm/madvise.c | 3 ++ + mm/memcontrol.c | 2 + + mm/memory-failure.c | 1 + mm/mempolicy.c | 22 ++++++++++++------- + mm/migrate_device.c | 1 + mm/mincore.c | 1 + mm/mlock.c | 1 + mm/mprotect.c | 1 + mm/pagewalk.c | 36 +++++++++++++++++++++++++++++--- + mm/vmscan.c | 1 + 18 files changed, 100 insertions(+), 20 deletions(-) + +--- a/arch/powerpc/mm/book3s64/subpage_prot.c ++++ b/arch/powerpc/mm/book3s64/subpage_prot.c +@@ -143,6 +143,7 @@ static int subpage_walk_pmd_entry(pmd_t + + static const struct mm_walk_ops subpage_walk_ops = { + .pmd_entry = subpage_walk_pmd_entry, ++ .walk_lock = PGWALK_WRLOCK_VERIFY, + }; + + static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr, +--- a/arch/riscv/mm/pageattr.c ++++ b/arch/riscv/mm/pageattr.c +@@ -102,6 +102,7 @@ static const struct mm_walk_ops pageattr + .pmd_entry = pageattr_pmd_entry, + .pte_entry = pageattr_pte_entry, + .pte_hole = pageattr_pte_hole, ++ .walk_lock = PGWALK_RDLOCK, + }; + + static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask, +--- a/arch/s390/mm/gmap.c ++++ b/arch/s390/mm/gmap.c +@@ -2514,6 +2514,7 @@ static int thp_split_walk_pmd_entry(pmd_ + + static const struct mm_walk_ops thp_split_walk_ops = { + .pmd_entry = thp_split_walk_pmd_entry, ++ .walk_lock = PGWALK_WRLOCK_VERIFY, + }; + + static inline void thp_split_mm(struct mm_struct *mm) +@@ -2558,6 +2559,7 @@ static int __zap_zero_pages(pmd_t *pmd, + + static const struct mm_walk_ops zap_zero_walk_ops = { + .pmd_entry = __zap_zero_pages, ++ .walk_lock = PGWALK_WRLOCK, + }; + + /* +@@ -2648,6 +2650,7 @@ static const struct mm_walk_ops enable_s + .hugetlb_entry = __s390_enable_skey_hugetlb, + .pte_entry = __s390_enable_skey_pte, + .pmd_entry = __s390_enable_skey_pmd, ++ .walk_lock = PGWALK_WRLOCK, + }; + + int s390_enable_skey(void) +@@ -2685,6 +2688,7 @@ static int __s390_reset_cmma(pte_t *pte, + + static const struct mm_walk_ops reset_cmma_walk_ops = { + .pte_entry = __s390_reset_cmma, ++ .walk_lock = PGWALK_WRLOCK, + }; + + void s390_reset_cmma(struct mm_struct *mm) +@@ -2721,6 +2725,7 @@ static int s390_gather_pages(pte_t *ptep + + static const struct mm_walk_ops gather_pages_ops = { + .pte_entry = s390_gather_pages, ++ .walk_lock = PGWALK_RDLOCK, + }; + + /* +--- a/fs/proc/task_mmu.c ++++ b/fs/proc/task_mmu.c +@@ -759,12 +759,14 @@ static int smaps_hugetlb_range(pte_t *pt + static const struct mm_walk_ops smaps_walk_ops = { + .pmd_entry = smaps_pte_range, + .hugetlb_entry = smaps_hugetlb_range, ++ .walk_lock = PGWALK_RDLOCK, + }; + + static const struct mm_walk_ops smaps_shmem_walk_ops = { + .pmd_entry = smaps_pte_range, + .hugetlb_entry = smaps_hugetlb_range, + .pte_hole = smaps_pte_hole, ++ .walk_lock = PGWALK_RDLOCK, + }; + + /* +@@ -1245,6 +1247,7 @@ static int clear_refs_test_walk(unsigned + static const struct mm_walk_ops clear_refs_walk_ops = { + .pmd_entry = clear_refs_pte_range, + .test_walk = clear_refs_test_walk, ++ .walk_lock = PGWALK_WRLOCK, + }; + + static ssize_t clear_refs_write(struct file *file, const char __user *buf, +@@ -1621,6 +1624,7 @@ static const struct mm_walk_ops pagemap_ + .pmd_entry = pagemap_pmd_range, + .pte_hole = pagemap_pte_hole, + .hugetlb_entry = pagemap_hugetlb_range, ++ .walk_lock = PGWALK_RDLOCK, + }; + + /* +@@ -1932,6 +1936,7 @@ static int gather_hugetlb_stats(pte_t *p + static const struct mm_walk_ops show_numa_ops = { + .hugetlb_entry = gather_hugetlb_stats, + .pmd_entry = gather_pte_stats, ++ .walk_lock = PGWALK_RDLOCK, + }; + + /* +--- a/include/linux/pagewalk.h ++++ b/include/linux/pagewalk.h +@@ -6,6 +6,16 @@ + + struct mm_walk; + ++/* Locking requirement during a page walk. */ ++enum page_walk_lock { ++ /* mmap_lock should be locked for read to stabilize the vma tree */ ++ PGWALK_RDLOCK = 0, ++ /* vma will be write-locked during the walk */ ++ PGWALK_WRLOCK = 1, ++ /* vma is expected to be already write-locked during the walk */ ++ PGWALK_WRLOCK_VERIFY = 2, ++}; ++ + /** + * struct mm_walk_ops - callbacks for walk_page_range + * @pgd_entry: if set, called for each non-empty PGD (top-level) entry +@@ -66,6 +76,7 @@ struct mm_walk_ops { + int (*pre_vma)(unsigned long start, unsigned long end, + struct mm_walk *walk); + void (*post_vma)(struct mm_walk *walk); ++ enum page_walk_lock walk_lock; + }; + + /* +--- a/mm/damon/vaddr.c ++++ b/mm/damon/vaddr.c +@@ -384,6 +384,7 @@ out: + static const struct mm_walk_ops damon_mkold_ops = { + .pmd_entry = damon_mkold_pmd_entry, + .hugetlb_entry = damon_mkold_hugetlb_entry, ++ .walk_lock = PGWALK_RDLOCK, + }; + + static void damon_va_mkold(struct mm_struct *mm, unsigned long addr) +@@ -519,6 +520,7 @@ out: + static const struct mm_walk_ops damon_young_ops = { + .pmd_entry = damon_young_pmd_entry, + .hugetlb_entry = damon_young_hugetlb_entry, ++ .walk_lock = PGWALK_RDLOCK, + }; + + static bool damon_va_young(struct mm_struct *mm, unsigned long addr, +--- a/mm/hmm.c ++++ b/mm/hmm.c +@@ -560,6 +560,7 @@ static const struct mm_walk_ops hmm_walk + .pte_hole = hmm_vma_walk_hole, + .hugetlb_entry = hmm_vma_walk_hugetlb_entry, + .test_walk = hmm_vma_walk_test, ++ .walk_lock = PGWALK_RDLOCK, + }; + + /** +--- a/mm/ksm.c ++++ b/mm/ksm.c +@@ -454,6 +454,12 @@ static int break_ksm_pmd_entry(pmd_t *pm + + static const struct mm_walk_ops break_ksm_ops = { + .pmd_entry = break_ksm_pmd_entry, ++ .walk_lock = PGWALK_RDLOCK, ++}; ++ ++static const struct mm_walk_ops break_ksm_lock_vma_ops = { ++ .pmd_entry = break_ksm_pmd_entry, ++ .walk_lock = PGWALK_WRLOCK, + }; + + /* +@@ -469,16 +475,17 @@ static const struct mm_walk_ops break_ks + * of the process that owns 'vma'. We also do not want to enforce + * protection keys here anyway. + */ +-static int break_ksm(struct vm_area_struct *vma, unsigned long addr) ++static int break_ksm(struct vm_area_struct *vma, unsigned long addr, bool lock_vma) + { + vm_fault_t ret = 0; ++ const struct mm_walk_ops *ops = lock_vma ? ++ &break_ksm_lock_vma_ops : &break_ksm_ops; + + do { + int ksm_page; + + cond_resched(); +- ksm_page = walk_page_range_vma(vma, addr, addr + 1, +- &break_ksm_ops, NULL); ++ ksm_page = walk_page_range_vma(vma, addr, addr + 1, ops, NULL); + if (WARN_ON_ONCE(ksm_page < 0)) + return ksm_page; + if (!ksm_page) +@@ -564,7 +571,7 @@ static void break_cow(struct ksm_rmap_it + mmap_read_lock(mm); + vma = find_mergeable_vma(mm, addr); + if (vma) +- break_ksm(vma, addr); ++ break_ksm(vma, addr, false); + mmap_read_unlock(mm); + } + +@@ -870,7 +877,7 @@ static void remove_trailing_rmap_items(s + * in cmp_and_merge_page on one of the rmap_items we would be removing. + */ + static int unmerge_ksm_pages(struct vm_area_struct *vma, +- unsigned long start, unsigned long end) ++ unsigned long start, unsigned long end, bool lock_vma) + { + unsigned long addr; + int err = 0; +@@ -881,7 +888,7 @@ static int unmerge_ksm_pages(struct vm_a + if (signal_pending(current)) + err = -ERESTARTSYS; + else +- err = break_ksm(vma, addr); ++ err = break_ksm(vma, addr, lock_vma); + } + return err; + } +@@ -1028,7 +1035,7 @@ static int unmerge_and_remove_all_rmap_i + if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma) + continue; + err = unmerge_ksm_pages(vma, +- vma->vm_start, vma->vm_end); ++ vma->vm_start, vma->vm_end, false); + if (err) + goto error; + } +@@ -2528,7 +2535,7 @@ static int __ksm_del_vma(struct vm_area_ + return 0; + + if (vma->anon_vma) { +- err = unmerge_ksm_pages(vma, vma->vm_start, vma->vm_end); ++ err = unmerge_ksm_pages(vma, vma->vm_start, vma->vm_end, true); + if (err) + return err; + } +@@ -2666,7 +2673,7 @@ int ksm_madvise(struct vm_area_struct *v + return 0; /* just ignore the advice */ + + if (vma->anon_vma) { +- err = unmerge_ksm_pages(vma, start, end); ++ err = unmerge_ksm_pages(vma, start, end, true); + if (err) + return err; + } +--- a/mm/madvise.c ++++ b/mm/madvise.c +@@ -227,6 +227,7 @@ static int swapin_walk_pmd_entry(pmd_t * + + static const struct mm_walk_ops swapin_walk_ops = { + .pmd_entry = swapin_walk_pmd_entry, ++ .walk_lock = PGWALK_RDLOCK, + }; + + static void force_shm_swapin_readahead(struct vm_area_struct *vma, +@@ -521,6 +522,7 @@ regular_folio: + + static const struct mm_walk_ops cold_walk_ops = { + .pmd_entry = madvise_cold_or_pageout_pte_range, ++ .walk_lock = PGWALK_RDLOCK, + }; + + static void madvise_cold_page_range(struct mmu_gather *tlb, +@@ -741,6 +743,7 @@ next: + + static const struct mm_walk_ops madvise_free_walk_ops = { + .pmd_entry = madvise_free_pte_range, ++ .walk_lock = PGWALK_RDLOCK, + }; + + static int madvise_free_single_vma(struct vm_area_struct *vma, +--- a/mm/memcontrol.c ++++ b/mm/memcontrol.c +@@ -6072,6 +6072,7 @@ static int mem_cgroup_count_precharge_pt + + static const struct mm_walk_ops precharge_walk_ops = { + .pmd_entry = mem_cgroup_count_precharge_pte_range, ++ .walk_lock = PGWALK_RDLOCK, + }; + + static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm) +@@ -6351,6 +6352,7 @@ put: /* get_mctgt_type() gets & locks + + static const struct mm_walk_ops charge_walk_ops = { + .pmd_entry = mem_cgroup_move_charge_pte_range, ++ .walk_lock = PGWALK_RDLOCK, + }; + + static void mem_cgroup_move_charge(void) +--- a/mm/memory-failure.c ++++ b/mm/memory-failure.c +@@ -836,6 +836,7 @@ static int hwpoison_hugetlb_range(pte_t + static const struct mm_walk_ops hwp_walk_ops = { + .pmd_entry = hwpoison_pte_range, + .hugetlb_entry = hwpoison_hugetlb_range, ++ .walk_lock = PGWALK_RDLOCK, + }; + + /* +--- a/mm/mempolicy.c ++++ b/mm/mempolicy.c +@@ -715,6 +715,14 @@ static const struct mm_walk_ops queue_pa + .hugetlb_entry = queue_folios_hugetlb, + .pmd_entry = queue_folios_pte_range, + .test_walk = queue_pages_test_walk, ++ .walk_lock = PGWALK_RDLOCK, ++}; ++ ++static const struct mm_walk_ops queue_pages_lock_vma_walk_ops = { ++ .hugetlb_entry = queue_folios_hugetlb, ++ .pmd_entry = queue_folios_pte_range, ++ .test_walk = queue_pages_test_walk, ++ .walk_lock = PGWALK_WRLOCK, + }; + + /* +@@ -735,7 +743,7 @@ static const struct mm_walk_ops queue_pa + static int + queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end, + nodemask_t *nodes, unsigned long flags, +- struct list_head *pagelist) ++ struct list_head *pagelist, bool lock_vma) + { + int err; + struct queue_pages qp = { +@@ -746,8 +754,10 @@ queue_pages_range(struct mm_struct *mm, + .end = end, + .first = NULL, + }; ++ const struct mm_walk_ops *ops = lock_vma ? ++ &queue_pages_lock_vma_walk_ops : &queue_pages_walk_ops; + +- err = walk_page_range(mm, start, end, &queue_pages_walk_ops, &qp); ++ err = walk_page_range(mm, start, end, ops, &qp); + + if (!qp.first) + /* whole range in hole */ +@@ -1075,7 +1085,7 @@ static int migrate_to_node(struct mm_str + vma = find_vma(mm, 0); + VM_BUG_ON(!(flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))); + queue_pages_range(mm, vma->vm_start, mm->task_size, &nmask, +- flags | MPOL_MF_DISCONTIG_OK, &pagelist); ++ flags | MPOL_MF_DISCONTIG_OK, &pagelist, false); + + if (!list_empty(&pagelist)) { + err = migrate_pages(&pagelist, alloc_migration_target, NULL, +@@ -1321,12 +1331,8 @@ static long do_mbind(unsigned long start + * Lock the VMAs before scanning for pages to migrate, to ensure we don't + * miss a concurrently inserted page. + */ +- vma_iter_init(&vmi, mm, start); +- for_each_vma_range(vmi, vma, end) +- vma_start_write(vma); +- + ret = queue_pages_range(mm, start, end, nmask, +- flags | MPOL_MF_INVERT, &pagelist); ++ flags | MPOL_MF_INVERT, &pagelist, true); + + if (ret < 0) { + err = ret; +--- a/mm/migrate_device.c ++++ b/mm/migrate_device.c +@@ -286,6 +286,7 @@ next: + static const struct mm_walk_ops migrate_vma_walk_ops = { + .pmd_entry = migrate_vma_collect_pmd, + .pte_hole = migrate_vma_collect_hole, ++ .walk_lock = PGWALK_RDLOCK, + }; + + /* +--- a/mm/mincore.c ++++ b/mm/mincore.c +@@ -177,6 +177,7 @@ static const struct mm_walk_ops mincore_ + .pmd_entry = mincore_pte_range, + .pte_hole = mincore_unmapped_range, + .hugetlb_entry = mincore_hugetlb, ++ .walk_lock = PGWALK_RDLOCK, + }; + + /* +--- a/mm/mlock.c ++++ b/mm/mlock.c +@@ -365,6 +365,7 @@ static void mlock_vma_pages_range(struct + { + static const struct mm_walk_ops mlock_walk_ops = { + .pmd_entry = mlock_pte_range, ++ .walk_lock = PGWALK_WRLOCK_VERIFY, + }; + + /* +--- a/mm/mprotect.c ++++ b/mm/mprotect.c +@@ -611,6 +611,7 @@ static const struct mm_walk_ops prot_non + .pte_entry = prot_none_pte_entry, + .hugetlb_entry = prot_none_hugetlb_entry, + .test_walk = prot_none_test, ++ .walk_lock = PGWALK_WRLOCK, + }; + + int +--- a/mm/pagewalk.c ++++ b/mm/pagewalk.c +@@ -384,6 +384,33 @@ static int __walk_page_range(unsigned lo + return err; + } + ++static inline void process_mm_walk_lock(struct mm_struct *mm, ++ enum page_walk_lock walk_lock) ++{ ++ if (walk_lock == PGWALK_RDLOCK) ++ mmap_assert_locked(mm); ++ else ++ mmap_assert_write_locked(mm); ++} ++ ++static inline void process_vma_walk_lock(struct vm_area_struct *vma, ++ enum page_walk_lock walk_lock) ++{ ++#ifdef CONFIG_PER_VMA_LOCK ++ switch (walk_lock) { ++ case PGWALK_WRLOCK: ++ vma_start_write(vma); ++ break; ++ case PGWALK_WRLOCK_VERIFY: ++ vma_assert_write_locked(vma); ++ break; ++ case PGWALK_RDLOCK: ++ /* PGWALK_RDLOCK is handled by process_mm_walk_lock */ ++ break; ++ } ++#endif ++} ++ + /** + * walk_page_range - walk page table with caller specific callbacks + * @mm: mm_struct representing the target process of page table walk +@@ -443,7 +470,7 @@ int walk_page_range(struct mm_struct *mm + if (!walk.mm) + return -EINVAL; + +- mmap_assert_locked(walk.mm); ++ process_mm_walk_lock(walk.mm, ops->walk_lock); + + vma = find_vma(walk.mm, start); + do { +@@ -458,6 +485,7 @@ int walk_page_range(struct mm_struct *mm + if (ops->pte_hole) + err = ops->pte_hole(start, next, -1, &walk); + } else { /* inside vma */ ++ process_vma_walk_lock(vma, ops->walk_lock); + walk.vma = vma; + next = min(end, vma->vm_end); + vma = find_vma(mm, vma->vm_end); +@@ -533,7 +561,8 @@ int walk_page_range_vma(struct vm_area_s + if (start < vma->vm_start || end > vma->vm_end) + return -EINVAL; + +- mmap_assert_locked(walk.mm); ++ process_mm_walk_lock(walk.mm, ops->walk_lock); ++ process_vma_walk_lock(vma, ops->walk_lock); + return __walk_page_range(start, end, &walk); + } + +@@ -550,7 +579,8 @@ int walk_page_vma(struct vm_area_struct + if (!walk.mm) + return -EINVAL; + +- mmap_assert_locked(walk.mm); ++ process_mm_walk_lock(walk.mm, ops->walk_lock); ++ process_vma_walk_lock(vma, ops->walk_lock); + return __walk_page_range(vma->vm_start, vma->vm_end, &walk); + } + +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -4249,6 +4249,7 @@ static void walk_mm(struct lruvec *lruve + static const struct mm_walk_ops mm_walk_ops = { + .test_walk = should_skip_vma, + .p4d_entry = walk_pud_range, ++ .walk_lock = PGWALK_RDLOCK, + }; + + int err; diff --git a/queue-6.4/mm-gup-handle-cont-pte-hugetlb-pages-correctly-in-gup_must_unshare-via-gup-fast.patch b/queue-6.4/mm-gup-handle-cont-pte-hugetlb-pages-correctly-in-gup_must_unshare-via-gup-fast.patch new file mode 100644 index 00000000000..c003212bfa7 --- /dev/null +++ b/queue-6.4/mm-gup-handle-cont-pte-hugetlb-pages-correctly-in-gup_must_unshare-via-gup-fast.patch @@ -0,0 +1,149 @@ +From 5805192c7b7257d290474cb1a3897d0567281bbc Mon Sep 17 00:00:00 2001 +From: David Hildenbrand +Date: Sat, 5 Aug 2023 12:12:56 +0200 +Subject: mm/gup: handle cont-PTE hugetlb pages correctly in gup_must_unshare() via GUP-fast + +From: David Hildenbrand + +commit 5805192c7b7257d290474cb1a3897d0567281bbc upstream. + +In contrast to most other GUP code, GUP-fast common page table walking +code like gup_pte_range() also handles hugetlb pages. But in contrast to +other hugetlb page table walking code, it does not look at the hugetlb PTE +abstraction whereby we have only a single logical hugetlb PTE per hugetlb +page, even when using multiple cont-PTEs underneath -- which is for +example what huge_ptep_get() abstracts. + +So when we have a hugetlb page that is mapped via cont-PTEs, GUP-fast +might stumble over a PTE that does not map the head page of a hugetlb page +-- not the first "head" PTE of such a cont mapping. + +Logically, the whole hugetlb page is mapped (entire_mapcount == 1), but we +might end up calling gup_must_unshare() with a tail page of a hugetlb +page. + +We only maintain a single PageAnonExclusive flag per hugetlb page (as +hugetlb pages cannot get partially COW-shared), stored for the head page. +That flag is clear for all tail pages. + +So when gup_must_unshare() ends up calling PageAnonExclusive() with a tail +page of a hugetlb page: + +1) With CONFIG_DEBUG_VM_PGFLAGS + +Stumbles over the: + + VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page); + +For example, when executing the COW selftests with 64k hugetlb pages on +arm64: + + [ 61.082187] page:00000000829819ff refcount:3 mapcount:1 mapping:0000000000000000 index:0x1 pfn:0x11ee11 + [ 61.082842] head:0000000080f79bf7 order:4 entire_mapcount:1 nr_pages_mapped:0 pincount:2 + [ 61.083384] anon flags: 0x17ffff80003000e(referenced|uptodate|dirty|head|mappedtodisk|node=0|zone=2|lastcpupid=0xfffff) + [ 61.084101] page_type: 0xffffffff() + [ 61.084332] raw: 017ffff800000000 fffffc00037b8401 0000000000000402 0000000200000000 + [ 61.084840] raw: 0000000000000010 0000000000000000 00000000ffffffff 0000000000000000 + [ 61.085359] head: 017ffff80003000e ffffd9e95b09b788 ffffd9e95b09b788 ffff0007ff63cf71 + [ 61.085885] head: 0000000000000000 0000000000000002 00000003ffffffff 0000000000000000 + [ 61.086415] page dumped because: VM_BUG_ON_PAGE(PageHuge(page) && !PageHead(page)) + [ 61.086914] ------------[ cut here ]------------ + [ 61.087220] kernel BUG at include/linux/page-flags.h:990! + [ 61.087591] Internal error: Oops - BUG: 00000000f2000800 [#1] SMP + [ 61.087999] Modules linked in: ... + [ 61.089404] CPU: 0 PID: 4612 Comm: cow Kdump: loaded Not tainted 6.5.0-rc4+ #3 + [ 61.089917] Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 + [ 61.090409] pstate: 604000c5 (nZCv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--) + [ 61.090897] pc : gup_must_unshare.part.0+0x64/0x98 + [ 61.091242] lr : gup_must_unshare.part.0+0x64/0x98 + [ 61.091592] sp : ffff8000825eb940 + [ 61.091826] x29: ffff8000825eb940 x28: 0000000000000000 x27: fffffc00037b8440 + [ 61.092329] x26: 0400000000000001 x25: 0000000000080101 x24: 0000000000080000 + [ 61.092835] x23: 0000000000080100 x22: ffff0000cffb9588 x21: ffff0000c8ec6b58 + [ 61.093341] x20: 0000ffffad6b1000 x19: fffffc00037b8440 x18: ffffffffffffffff + [ 61.093850] x17: 2864616548656761 x16: 5021202626202965 x15: 6761702865677548 + [ 61.094358] x14: 6567615028454741 x13: 2929656761702864 x12: 6165486567615021 + [ 61.094858] x11: 00000000ffff7fff x10: 00000000ffff7fff x9 : ffffd9e958b7a1c0 + [ 61.095359] x8 : 00000000000bffe8 x7 : c0000000ffff7fff x6 : 00000000002bffa8 + [ 61.095873] x5 : ffff0008bb19e708 x4 : 0000000000000000 x3 : 0000000000000000 + [ 61.096380] x2 : 0000000000000000 x1 : ffff0000cf6636c0 x0 : 0000000000000046 + [ 61.096894] Call trace: + [ 61.097080] gup_must_unshare.part.0+0x64/0x98 + [ 61.097392] gup_pte_range+0x3a8/0x3f0 + [ 61.097662] gup_pgd_range+0x1ec/0x280 + [ 61.097942] lockless_pages_from_mm+0x64/0x1a0 + [ 61.098258] internal_get_user_pages_fast+0xe4/0x1d0 + [ 61.098612] pin_user_pages_fast+0x58/0x78 + [ 61.098917] pin_longterm_test_start+0xf4/0x2b8 + [ 61.099243] gup_test_ioctl+0x170/0x3b0 + [ 61.099528] __arm64_sys_ioctl+0xa8/0xf0 + [ 61.099822] invoke_syscall.constprop.0+0x7c/0xd0 + [ 61.100160] el0_svc_common.constprop.0+0xe8/0x100 + [ 61.100500] do_el0_svc+0x38/0xa0 + [ 61.100736] el0_svc+0x3c/0x198 + [ 61.100971] el0t_64_sync_handler+0x134/0x150 + [ 61.101280] el0t_64_sync+0x17c/0x180 + [ 61.101543] Code: aa1303e0 f00074c1 912b0021 97fffeb2 (d4210000) + +2) Without CONFIG_DEBUG_VM_PGFLAGS + +Always detects "not exclusive" for passed tail pages and refuses to PIN +the tail pages R/O, as gup_must_unshare() == true. GUP-fast will fallback +to ordinary GUP. As ordinary GUP properly considers the logical hugetlb +PTE abstraction in hugetlb_follow_page_mask(), pinning the page will +succeed when looking at the PageAnonExclusive on the head page only. + +So the only real effect of this is that with cont-PTE hugetlb pages, we'll +always fallback from GUP-fast to ordinary GUP when not working on the head +page, which ends up checking the head page and do the right thing. + +Consequently, the cow selftests pass with cont-PTE hugetlb pages as well +without CONFIG_DEBUG_VM_PGFLAGS. + +Note that this only applies to anon hugetlb pages that are mapped using +cont-PTEs: for example 64k hugetlb pages on a 4k arm64 kernel. + +... and only when R/O-pinning (FOLL_PIN) such pages that are mapped into +the page table R/O using GUP-fast. + +On production kernels (and even most debug kernels, that don't set +CONFIG_DEBUG_VM_PGFLAGS) this patch should theoretically not be required +to be backported. But of course, it does not hurt. + +Link: https://lkml.kernel.org/r/20230805101256.87306-1-david@redhat.com +Fixes: a7f226604170 ("mm/gup: trigger FAULT_FLAG_UNSHARE when R/O-pinning a possibly shared anonymous page") +Signed-off-by: David Hildenbrand +Reported-by: Ryan Roberts +Reviewed-by: Ryan Roberts +Tested-by: Ryan Roberts +Cc: Vlastimil Babka +Cc: John Hubbard +Cc: Jason Gunthorpe +Cc: Peter Xu +Cc: Mike Kravetz +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/internal.h | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +--- a/mm/internal.h ++++ b/mm/internal.h +@@ -995,6 +995,16 @@ static inline bool gup_must_unshare(stru + smp_rmb(); + + /* ++ * During GUP-fast we might not get called on the head page for a ++ * hugetlb page that is mapped using cont-PTE, because GUP-fast does ++ * not work with the abstracted hugetlb PTEs that always point at the ++ * head page. For hugetlb, PageAnonExclusive only applies on the head ++ * page (as it cannot be partially COW-shared), so lookup the head page. ++ */ ++ if (unlikely(!PageHead(page) && PageHuge(page))) ++ page = compound_head(page); ++ ++ /* + * Note that PageKsm() pages cannot be exclusive, and consequently, + * cannot get pinned. + */ diff --git a/queue-6.4/mm-gup-reintroduce-foll_numa-as-foll_honor_numa_fault.patch b/queue-6.4/mm-gup-reintroduce-foll_numa-as-foll_honor_numa_fault.patch new file mode 100644 index 00000000000..63581d1ae1b --- /dev/null +++ b/queue-6.4/mm-gup-reintroduce-foll_numa-as-foll_honor_numa_fault.patch @@ -0,0 +1,250 @@ +From d74943a2f3cdade34e471b36f55f7979be656867 Mon Sep 17 00:00:00 2001 +From: David Hildenbrand +Date: Thu, 3 Aug 2023 16:32:02 +0200 +Subject: mm/gup: reintroduce FOLL_NUMA as FOLL_HONOR_NUMA_FAULT + +From: David Hildenbrand + +commit d74943a2f3cdade34e471b36f55f7979be656867 upstream. + +Unfortunately commit 474098edac26 ("mm/gup: replace FOLL_NUMA by +gup_can_follow_protnone()") missed that follow_page() and +follow_trans_huge_pmd() never implicitly set FOLL_NUMA because they really +don't want to fail on PROT_NONE-mapped pages -- either due to NUMA hinting +or due to inaccessible (PROT_NONE) VMAs. + +As spelled out in commit 0b9d705297b2 ("mm: numa: Support NUMA hinting +page faults from gup/gup_fast"): "Other follow_page callers like KSM +should not use FOLL_NUMA, or they would fail to get the pages if they use +follow_page instead of get_user_pages." + +liubo reported [1] that smaps_rollup results are imprecise, because they +miss accounting of pages that are mapped PROT_NONE. Further, it's easy to +reproduce that KSM no longer works on inaccessible VMAs on x86-64, because +pte_protnone()/pmd_protnone() also indictaes "true" in inaccessible VMAs, +and follow_page() refuses to return such pages right now. + +As KVM really depends on these NUMA hinting faults, removing the +pte_protnone()/pmd_protnone() handling in GUP code completely is not +really an option. + +To fix the issues at hand, let's revive FOLL_NUMA as FOLL_HONOR_NUMA_FAULT +to restore the original behavior for now and add better comments. + +Set FOLL_HONOR_NUMA_FAULT independent of FOLL_FORCE in +is_valid_gup_args(), to add that flag for all external GUP users. + +Note that there are three GUP-internal __get_user_pages() users that don't +end up calling is_valid_gup_args() and consequently won't get +FOLL_HONOR_NUMA_FAULT set. + +1) get_dump_page(): we really don't want to handle NUMA hinting + faults. It specifies FOLL_FORCE and wouldn't have honored NUMA + hinting faults already. +2) populate_vma_page_range(): we really don't want to handle NUMA hinting + faults. It specifies FOLL_FORCE on accessible VMAs, so it wouldn't have + honored NUMA hinting faults already. +3) faultin_vma_page_range(): we similarly don't want to handle NUMA + hinting faults. + +To make the combination of FOLL_FORCE and FOLL_HONOR_NUMA_FAULT work in +inaccessible VMAs properly, we have to perform VMA accessibility checks in +gup_can_follow_protnone(). + +As GUP-fast should reject such pages either way in +pte_access_permitted()/pmd_access_permitted() -- for example on x86-64 and +arm64 that both implement pte_protnone() -- let's just always fallback to +ordinary GUP when stumbling over pte_protnone()/pmd_protnone(). + +As Linus notes [2], honoring NUMA faults might only make sense for +selected GUP users. + +So we should really see if we can instead let relevant GUP callers specify +it manually, and not trigger NUMA hinting faults from GUP as default. +Prepare for that by making FOLL_HONOR_NUMA_FAULT an external GUP flag and +adding appropriate documenation. + +While at it, remove a stale comment from follow_trans_huge_pmd(): That +comment for pmd_protnone() was added in commit 2b4847e73004 ("mm: numa: +serialise parallel get_user_page against THP migration"), which noted: + + THP does not unmap pages due to a lack of support for migration + entries at a PMD level. This allows races with get_user_pages + +Nowadays, we do have PMD migration entries, so the comment no longer +applies. Let's drop it. + +[1] https://lore.kernel.org/r/20230726073409.631838-1-liubo254@huawei.com +[2] https://lore.kernel.org/r/CAHk-=wgRiP_9X0rRdZKT8nhemZGNateMtb366t37d8-x7VRs=g@mail.gmail.com + +Link: https://lkml.kernel.org/r/20230803143208.383663-2-david@redhat.com +Fixes: 474098edac26 ("mm/gup: replace FOLL_NUMA by gup_can_follow_protnone()") +Signed-off-by: David Hildenbrand +Reported-by: liubo +Closes: https://lore.kernel.org/r/20230726073409.631838-1-liubo254@huawei.com +Reported-by: Peter Xu +Closes: https://lore.kernel.org/all/ZMKJjDaqZ7FW0jfe@x1n/ +Acked-by: Mel Gorman +Acked-by: Peter Xu +Cc: Hugh Dickins +Cc: Jason Gunthorpe +Cc: John Hubbard +Cc: Linus Torvalds +Cc: Matthew Wilcox (Oracle) +Cc: Mel Gorman +Cc: Paolo Bonzini +Cc: Shuah Khan +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/mm.h | 21 +++++++++++++++------ + include/linux/mm_types.h | 9 +++++++++ + mm/gup.c | 30 ++++++++++++++++++++++++------ + mm/huge_memory.c | 3 +-- + 4 files changed, 49 insertions(+), 14 deletions(-) + +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -3381,15 +3381,24 @@ static inline int vm_fault_to_errno(vm_f + * Indicates whether GUP can follow a PROT_NONE mapped page, or whether + * a (NUMA hinting) fault is required. + */ +-static inline bool gup_can_follow_protnone(unsigned int flags) ++static inline bool gup_can_follow_protnone(struct vm_area_struct *vma, ++ unsigned int flags) + { + /* +- * FOLL_FORCE has to be able to make progress even if the VMA is +- * inaccessible. Further, FOLL_FORCE access usually does not represent +- * application behaviour and we should avoid triggering NUMA hinting +- * faults. ++ * If callers don't want to honor NUMA hinting faults, no need to ++ * determine if we would actually have to trigger a NUMA hinting fault. + */ +- return flags & FOLL_FORCE; ++ if (!(flags & FOLL_HONOR_NUMA_FAULT)) ++ return true; ++ ++ /* ++ * NUMA hinting faults don't apply in inaccessible (PROT_NONE) VMAs. ++ * ++ * Requiring a fault here even for inaccessible VMAs would mean that ++ * FOLL_FORCE cannot make any progress, because handle_mm_fault() ++ * refuses to process NUMA hinting faults in inaccessible VMAs. ++ */ ++ return !vma_is_accessible(vma); + } + + typedef int (*pte_fn_t)(pte_t *pte, unsigned long addr, void *data); +--- a/include/linux/mm_types.h ++++ b/include/linux/mm_types.h +@@ -1286,6 +1286,15 @@ enum { + FOLL_PCI_P2PDMA = 1 << 10, + /* allow interrupts from generic signals */ + FOLL_INTERRUPTIBLE = 1 << 11, ++ /* ++ * Always honor (trigger) NUMA hinting faults. ++ * ++ * FOLL_WRITE implicitly honors NUMA hinting faults because a ++ * PROT_NONE-mapped page is not writable (exceptions with FOLL_FORCE ++ * apply). get_user_pages_fast_only() always implicitly honors NUMA ++ * hinting faults. ++ */ ++ FOLL_HONOR_NUMA_FAULT = 1 << 12, + + /* See also internal only FOLL flags in mm/internal.h */ + }; +--- a/mm/gup.c ++++ b/mm/gup.c +@@ -551,7 +551,7 @@ static struct page *follow_page_pte(stru + pte = *ptep; + if (!pte_present(pte)) + goto no_page; +- if (pte_protnone(pte) && !gup_can_follow_protnone(flags)) ++ if (pte_protnone(pte) && !gup_can_follow_protnone(vma, flags)) + goto no_page; + + page = vm_normal_page(vma, address, pte); +@@ -672,7 +672,7 @@ static struct page *follow_pmd_mask(stru + if (likely(!pmd_trans_huge(pmdval))) + return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap); + +- if (pmd_protnone(pmdval) && !gup_can_follow_protnone(flags)) ++ if (pmd_protnone(pmdval) && !gup_can_follow_protnone(vma, flags)) + return no_page_table(vma, flags); + + ptl = pmd_lock(mm, pmd); +@@ -820,6 +820,10 @@ struct page *follow_page(struct vm_area_ + if (WARN_ON_ONCE(foll_flags & FOLL_PIN)) + return NULL; + ++ /* ++ * We never set FOLL_HONOR_NUMA_FAULT because callers don't expect ++ * to fail on PROT_NONE-mapped pages. ++ */ + page = follow_page_mask(vma, address, foll_flags, &ctx); + if (ctx.pgmap) + put_dev_pagemap(ctx.pgmap); +@@ -2134,6 +2138,13 @@ static bool is_valid_gup_args(struct pag + gup_flags |= FOLL_UNLOCKABLE; + } + ++ /* ++ * For now, always trigger NUMA hinting faults. Some GUP users like ++ * KVM require the hint to be as the calling context of GUP is ++ * functionally similar to a memory reference from task context. ++ */ ++ gup_flags |= FOLL_HONOR_NUMA_FAULT; ++ + /* FOLL_GET and FOLL_PIN are mutually exclusive. */ + if (WARN_ON_ONCE((gup_flags & (FOLL_PIN | FOLL_GET)) == + (FOLL_PIN | FOLL_GET))) +@@ -2394,7 +2405,14 @@ static int gup_pte_range(pmd_t pmd, pmd_ + struct page *page; + struct folio *folio; + +- if (pte_protnone(pte) && !gup_can_follow_protnone(flags)) ++ /* ++ * Always fallback to ordinary GUP on PROT_NONE-mapped pages: ++ * pte_access_permitted() better should reject these pages ++ * either way: otherwise, GUP-fast might succeed in ++ * cases where ordinary GUP would fail due to VMA access ++ * permissions. ++ */ ++ if (pte_protnone(pte)) + goto pte_unmap; + + if (!pte_access_permitted(pte, flags & FOLL_WRITE)) +@@ -2784,8 +2802,8 @@ static int gup_pmd_range(pud_t *pudp, pu + + if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd) || + pmd_devmap(pmd))) { +- if (pmd_protnone(pmd) && +- !gup_can_follow_protnone(flags)) ++ /* See gup_pte_range() */ ++ if (pmd_protnone(pmd)) + return 0; + + if (!gup_huge_pmd(pmd, pmdp, addr, next, flags, +@@ -2965,7 +2983,7 @@ static int internal_get_user_pages_fast( + if (WARN_ON_ONCE(gup_flags & ~(FOLL_WRITE | FOLL_LONGTERM | + FOLL_FORCE | FOLL_PIN | FOLL_GET | + FOLL_FAST_ONLY | FOLL_NOFAULT | +- FOLL_PCI_P2PDMA))) ++ FOLL_PCI_P2PDMA | FOLL_HONOR_NUMA_FAULT))) + return -EINVAL; + + if (gup_flags & FOLL_PIN) +--- a/mm/huge_memory.c ++++ b/mm/huge_memory.c +@@ -1467,8 +1467,7 @@ struct page *follow_trans_huge_pmd(struc + if ((flags & FOLL_DUMP) && is_huge_zero_pmd(*pmd)) + return ERR_PTR(-EFAULT); + +- /* Full NUMA hinting faults to serialise migration in fault paths */ +- if (pmd_protnone(*pmd) && !gup_can_follow_protnone(flags)) ++ if (pmd_protnone(*pmd) && !gup_can_follow_protnone(vma, flags)) + return NULL; + + if (!pmd_write(*pmd) && gup_must_unshare(vma, flags, page)) diff --git a/queue-6.4/mm-memory-failure-fix-unexpected-return-value-in-soft_offline_page.patch b/queue-6.4/mm-memory-failure-fix-unexpected-return-value-in-soft_offline_page.patch new file mode 100644 index 00000000000..c4227ea5e00 --- /dev/null +++ b/queue-6.4/mm-memory-failure-fix-unexpected-return-value-in-soft_offline_page.patch @@ -0,0 +1,47 @@ +From e2c1ab070fdc81010ec44634838d24fce9ff9e53 Mon Sep 17 00:00:00 2001 +From: Miaohe Lin +Date: Tue, 27 Jun 2023 19:28:08 +0800 +Subject: mm: memory-failure: fix unexpected return value in soft_offline_page() + +From: Miaohe Lin + +commit e2c1ab070fdc81010ec44634838d24fce9ff9e53 upstream. + +When page_handle_poison() fails to handle the hugepage or free page in +retry path, soft_offline_page() will return 0 while -EBUSY is expected in +this case. + +Consequently the user will think soft_offline_page succeeds while it in +fact failed. So the user will not try again later in this case. + +Link: https://lkml.kernel.org/r/20230627112808.1275241-1-linmiaohe@huawei.com +Fixes: b94e02822deb ("mm,hwpoison: try to narrow window race for free pages") +Signed-off-by: Miaohe Lin +Acked-by: Naoya Horiguchi +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/memory-failure.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +--- a/mm/memory-failure.c ++++ b/mm/memory-failure.c +@@ -2744,10 +2744,13 @@ retry: + if (ret > 0) { + ret = soft_offline_in_use_page(page); + } else if (ret == 0) { +- if (!page_handle_poison(page, true, false) && try_again) { +- try_again = false; +- flags &= ~MF_COUNT_INCREASED; +- goto retry; ++ if (!page_handle_poison(page, true, false)) { ++ if (try_again) { ++ try_again = false; ++ flags &= ~MF_COUNT_INCREASED; ++ goto retry; ++ } ++ ret = -EBUSY; + } + } + diff --git a/queue-6.4/mm-multi-gen-lru-don-t-spin-during-memcg-release.patch b/queue-6.4/mm-multi-gen-lru-don-t-spin-during-memcg-release.patch new file mode 100644 index 00000000000..8b1c5f1d569 --- /dev/null +++ b/queue-6.4/mm-multi-gen-lru-don-t-spin-during-memcg-release.patch @@ -0,0 +1,85 @@ +From 6867c7a3320669cbe44b905a3eb35db725c6d470 Mon Sep 17 00:00:00 2001 +From: "T.J. Mercier" +Date: Mon, 14 Aug 2023 15:16:36 +0000 +Subject: mm: multi-gen LRU: don't spin during memcg release + +From: T.J. Mercier + +commit 6867c7a3320669cbe44b905a3eb35db725c6d470 upstream. + +When a memcg is in the process of being released mem_cgroup_tryget will +fail because its reference count has already reached 0. This can happen +during reclaim if the memcg has already been offlined, and we reclaim all +remaining pages attributed to the offlined memcg. shrink_many attempts to +skip the empty memcg in this case, and continue reclaiming from the +remaining memcgs in the old generation. If there is only one memcg +remaining, or if all remaining memcgs are in the process of being released +then shrink_many will spin until all memcgs have finished being released. +The release occurs through a workqueue, so it can take a while before +kswapd is able to make any further progress. + +This fix results in reductions in kswapd activity and direct reclaim in +a test where 28 apps (working set size > total memory) are repeatedly +launched in a random sequence: + + A B delta ratio(%) + allocstall_movable 5962 3539 -2423 -40.64 + allocstall_normal 2661 2417 -244 -9.17 +kswapd_high_wmark_hit_quickly 53152 7594 -45558 -85.71 + pageoutrun 57365 11750 -45615 -79.52 + +Link: https://lkml.kernel.org/r/20230814151636.1639123-1-tjmercier@google.com +Fixes: e4dde56cd208 ("mm: multi-gen LRU: per-node lru_gen_folio lists") +Signed-off-by: T.J. Mercier +Acked-by: Yu Zhao +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/vmscan.c | 13 ++++++++----- + 1 file changed, 8 insertions(+), 5 deletions(-) + +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -4818,16 +4818,17 @@ void lru_gen_release_memcg(struct mem_cg + + spin_lock_irq(&pgdat->memcg_lru.lock); + +- VM_WARN_ON_ONCE(hlist_nulls_unhashed(&lruvec->lrugen.list)); ++ if (hlist_nulls_unhashed(&lruvec->lrugen.list)) ++ goto unlock; + + gen = lruvec->lrugen.gen; + +- hlist_nulls_del_rcu(&lruvec->lrugen.list); ++ hlist_nulls_del_init_rcu(&lruvec->lrugen.list); + pgdat->memcg_lru.nr_memcgs[gen]--; + + if (!pgdat->memcg_lru.nr_memcgs[gen] && gen == get_memcg_gen(pgdat->memcg_lru.seq)) + WRITE_ONCE(pgdat->memcg_lru.seq, pgdat->memcg_lru.seq + 1); +- ++unlock: + spin_unlock_irq(&pgdat->memcg_lru.lock); + } + } +@@ -5398,8 +5399,10 @@ restart: + rcu_read_lock(); + + hlist_nulls_for_each_entry_rcu(lrugen, pos, &pgdat->memcg_lru.fifo[gen][bin], list) { +- if (op) ++ if (op) { + lru_gen_rotate_memcg(lruvec, op); ++ op = 0; ++ } + + mem_cgroup_put(memcg); + +@@ -5407,7 +5410,7 @@ restart: + memcg = lruvec_memcg(lruvec); + + if (!mem_cgroup_tryget(memcg)) { +- op = 0; ++ lru_gen_release_memcg(memcg); + memcg = NULL; + continue; + } diff --git a/queue-6.4/nfs-fix-a-use-after-free-in-nfs_direct_join_group.patch b/queue-6.4/nfs-fix-a-use-after-free-in-nfs_direct_join_group.patch new file mode 100644 index 00000000000..7e239d01399 --- /dev/null +++ b/queue-6.4/nfs-fix-a-use-after-free-in-nfs_direct_join_group.patch @@ -0,0 +1,60 @@ +From be2fd1560eb57b7298aa3c258ddcca0d53ecdea3 Mon Sep 17 00:00:00 2001 +From: Trond Myklebust +Date: Tue, 8 Aug 2023 21:17:11 -0400 +Subject: NFS: Fix a use after free in nfs_direct_join_group() + +From: Trond Myklebust + +commit be2fd1560eb57b7298aa3c258ddcca0d53ecdea3 upstream. + +Be more careful when tearing down the subrequests of an O_DIRECT write +as part of a retransmission. + +Reported-by: Chris Mason +Fixes: ed5d588fe47f ("NFS: Try to join page groups before an O_DIRECT retransmission") +Cc: stable@vger.kernel.org +Signed-off-by: Trond Myklebust +Signed-off-by: Greg Kroah-Hartman +--- + fs/nfs/direct.c | 26 ++++++++++++++++---------- + 1 file changed, 16 insertions(+), 10 deletions(-) + +--- a/fs/nfs/direct.c ++++ b/fs/nfs/direct.c +@@ -472,20 +472,26 @@ out: + return result; + } + +-static void +-nfs_direct_join_group(struct list_head *list, struct inode *inode) ++static void nfs_direct_join_group(struct list_head *list, struct inode *inode) + { +- struct nfs_page *req, *next; ++ struct nfs_page *req, *subreq; + + list_for_each_entry(req, list, wb_list) { +- if (req->wb_head != req || req->wb_this_page == req) ++ if (req->wb_head != req) + continue; +- for (next = req->wb_this_page; +- next != req->wb_head; +- next = next->wb_this_page) { +- nfs_list_remove_request(next); +- nfs_release_request(next); +- } ++ subreq = req->wb_this_page; ++ if (subreq == req) ++ continue; ++ do { ++ /* ++ * Remove subrequests from this list before freeing ++ * them in the call to nfs_join_page_group(). ++ */ ++ if (!list_empty(&subreq->wb_list)) { ++ nfs_list_remove_request(subreq); ++ nfs_release_request(subreq); ++ } ++ } while ((subreq = subreq->wb_this_page) != req); + nfs_join_page_group(req, inode); + } + } diff --git a/queue-6.4/nfsd-fix-race-to-free_stateid-and-cl_revoked.patch b/queue-6.4/nfsd-fix-race-to-free_stateid-and-cl_revoked.patch new file mode 100644 index 00000000000..46a60e5dc1b --- /dev/null +++ b/queue-6.4/nfsd-fix-race-to-free_stateid-and-cl_revoked.patch @@ -0,0 +1,47 @@ +From 3b816601e279756e781e6c4d9b3f3bd21a72ac67 Mon Sep 17 00:00:00 2001 +From: Benjamin Coddington +Date: Fri, 4 Aug 2023 10:52:20 -0400 +Subject: nfsd: Fix race to FREE_STATEID and cl_revoked + +From: Benjamin Coddington + +commit 3b816601e279756e781e6c4d9b3f3bd21a72ac67 upstream. + +We have some reports of linux NFS clients that cannot satisfy a linux knfsd +server that always sets SEQ4_STATUS_RECALLABLE_STATE_REVOKED even though +those clients repeatedly walk all their known state using TEST_STATEID and +receive NFS4_OK for all. + +Its possible for revoke_delegation() to set NFS4_REVOKED_DELEG_STID, then +nfsd4_free_stateid() finds the delegation and returns NFS4_OK to +FREE_STATEID. Afterward, revoke_delegation() moves the same delegation to +cl_revoked. This would produce the observed client/server effect. + +Fix this by ensuring that the setting of sc_type to NFS4_REVOKED_DELEG_STID +and move to cl_revoked happens within the same cl_lock. This will allow +nfsd4_free_stateid() to properly remove the delegation from cl_revoked. + +Link: https://bugzilla.redhat.com/show_bug.cgi?id=2217103 +Link: https://bugzilla.redhat.com/show_bug.cgi?id=2176575 +Signed-off-by: Benjamin Coddington +Cc: stable@vger.kernel.org # v4.17+ +Reviewed-by: Jeff Layton +Signed-off-by: Chuck Lever +Signed-off-by: Greg Kroah-Hartman +--- + fs/nfsd/nfs4state.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/nfsd/nfs4state.c ++++ b/fs/nfsd/nfs4state.c +@@ -1354,9 +1354,9 @@ static void revoke_delegation(struct nfs + trace_nfsd_stid_revoke(&dp->dl_stid); + + if (clp->cl_minorversion) { ++ spin_lock(&clp->cl_lock); + dp->dl_stid.sc_type = NFS4_REVOKED_DELEG_STID; + refcount_inc(&dp->dl_stid.sc_count); +- spin_lock(&clp->cl_lock); + list_add(&dp->dl_recall_lru, &clp->cl_revoked); + spin_unlock(&clp->cl_lock); + } diff --git a/queue-6.4/nilfs2-fix-general-protection-fault-in-nilfs_lookup_dirty_data_buffers.patch b/queue-6.4/nilfs2-fix-general-protection-fault-in-nilfs_lookup_dirty_data_buffers.patch new file mode 100644 index 00000000000..d4abbf19edc --- /dev/null +++ b/queue-6.4/nilfs2-fix-general-protection-fault-in-nilfs_lookup_dirty_data_buffers.patch @@ -0,0 +1,46 @@ +From f83913f8c5b882a312e72b7669762f8a5c9385e4 Mon Sep 17 00:00:00 2001 +From: Ryusuke Konishi +Date: Sat, 5 Aug 2023 22:20:38 +0900 +Subject: nilfs2: fix general protection fault in nilfs_lookup_dirty_data_buffers() + +From: Ryusuke Konishi + +commit f83913f8c5b882a312e72b7669762f8a5c9385e4 upstream. + +A syzbot stress test reported that create_empty_buffers() called from +nilfs_lookup_dirty_data_buffers() can cause a general protection fault. + +Analysis using its reproducer revealed that the back reference "mapping" +from a page/folio has been changed to NULL after dirty page/folio gang +lookup in nilfs_lookup_dirty_data_buffers(). + +Fix this issue by excluding pages/folios from being collected if, after +acquiring a lock on each page/folio, its back reference "mapping" differs +from the pointer to the address space struct that held the page/folio. + +Link: https://lkml.kernel.org/r/20230805132038.6435-1-konishi.ryusuke@gmail.com +Signed-off-by: Ryusuke Konishi +Reported-by: syzbot+0ad741797f4565e7e2d2@syzkaller.appspotmail.com +Closes: https://lkml.kernel.org/r/0000000000002930a705fc32b231@google.com +Tested-by: Ryusuke Konishi +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + fs/nilfs2/segment.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/fs/nilfs2/segment.c ++++ b/fs/nilfs2/segment.c +@@ -725,6 +725,11 @@ static size_t nilfs_lookup_dirty_data_bu + struct folio *folio = fbatch.folios[i]; + + folio_lock(folio); ++ if (unlikely(folio->mapping != mapping)) { ++ /* Exclude folios removed from the address space */ ++ folio_unlock(folio); ++ continue; ++ } + head = folio_buffers(folio); + if (!head) { + create_empty_buffers(&folio->page, i_blocksize(inode), 0); diff --git a/queue-6.4/of-dynamic-refactor-action-prints-to-not-use-pof-inside-devtree_lock.patch b/queue-6.4/of-dynamic-refactor-action-prints-to-not-use-pof-inside-devtree_lock.patch new file mode 100644 index 00000000000..6d55179d0b2 --- /dev/null +++ b/queue-6.4/of-dynamic-refactor-action-prints-to-not-use-pof-inside-devtree_lock.patch @@ -0,0 +1,114 @@ +From 914d9d831e6126a6e7a92e27fcfaa250671be42c Mon Sep 17 00:00:00 2001 +From: Rob Herring +Date: Fri, 18 Aug 2023 15:40:57 -0500 +Subject: of: dynamic: Refactor action prints to not use "%pOF" inside devtree_lock + +From: Rob Herring + +commit 914d9d831e6126a6e7a92e27fcfaa250671be42c upstream. + +While originally it was fine to format strings using "%pOF" while +holding devtree_lock, this now causes a deadlock. Lockdep reports: + + of_get_parent from of_fwnode_get_parent+0x18/0x24 + ^^^^^^^^^^^^^ + of_fwnode_get_parent from fwnode_count_parents+0xc/0x28 + fwnode_count_parents from fwnode_full_name_string+0x18/0xac + fwnode_full_name_string from device_node_string+0x1a0/0x404 + device_node_string from pointer+0x3c0/0x534 + pointer from vsnprintf+0x248/0x36c + vsnprintf from vprintk_store+0x130/0x3b4 + +Fix this by moving the printing in __of_changeset_entry_apply() outside +the lock. As the only difference in the multiple prints is the action +name, use the existing "action_names" to refactor the prints into a +single print. + +Fixes: a92eb7621b9fb2c2 ("lib/vsprintf: Make use of fwnode API to obtain node names and separators") +Cc: stable@vger.kernel.org +Reported-by: Geert Uytterhoeven +Reviewed-by: Geert Uytterhoeven +Link: https://lore.kernel.org/r/20230801-dt-changeset-fixes-v3-2-5f0410e007dd@kernel.org +Signed-off-by: Rob Herring +Signed-off-by: Greg Kroah-Hartman +--- + drivers/of/dynamic.c | 31 +++++++++---------------------- + 1 file changed, 9 insertions(+), 22 deletions(-) + +--- a/drivers/of/dynamic.c ++++ b/drivers/of/dynamic.c +@@ -63,15 +63,14 @@ int of_reconfig_notifier_unregister(stru + } + EXPORT_SYMBOL_GPL(of_reconfig_notifier_unregister); + +-#ifdef DEBUG +-const char *action_names[] = { ++static const char *action_names[] = { ++ [0] = "INVALID", + [OF_RECONFIG_ATTACH_NODE] = "ATTACH_NODE", + [OF_RECONFIG_DETACH_NODE] = "DETACH_NODE", + [OF_RECONFIG_ADD_PROPERTY] = "ADD_PROPERTY", + [OF_RECONFIG_REMOVE_PROPERTY] = "REMOVE_PROPERTY", + [OF_RECONFIG_UPDATE_PROPERTY] = "UPDATE_PROPERTY", + }; +-#endif + + int of_reconfig_notify(unsigned long action, struct of_reconfig_data *p) + { +@@ -620,21 +619,9 @@ static int __of_changeset_entry_apply(st + } + + ret = __of_add_property(ce->np, ce->prop); +- if (ret) { +- pr_err("changeset: add_property failed @%pOF/%s\n", +- ce->np, +- ce->prop->name); +- break; +- } + break; + case OF_RECONFIG_REMOVE_PROPERTY: + ret = __of_remove_property(ce->np, ce->prop); +- if (ret) { +- pr_err("changeset: remove_property failed @%pOF/%s\n", +- ce->np, +- ce->prop->name); +- break; +- } + break; + + case OF_RECONFIG_UPDATE_PROPERTY: +@@ -648,20 +635,17 @@ static int __of_changeset_entry_apply(st + } + + ret = __of_update_property(ce->np, ce->prop, &old_prop); +- if (ret) { +- pr_err("changeset: update_property failed @%pOF/%s\n", +- ce->np, +- ce->prop->name); +- break; +- } + break; + default: + ret = -EINVAL; + } + raw_spin_unlock_irqrestore(&devtree_lock, flags); + +- if (ret) ++ if (ret) { ++ pr_err("changeset: apply failed: %-15s %pOF:%s\n", ++ action_names[ce->action], ce->np, ce->prop->name); + return ret; ++ } + + switch (ce->action) { + case OF_RECONFIG_ATTACH_NODE: +@@ -947,6 +931,9 @@ int of_changeset_action(struct of_change + if (!ce) + return -ENOMEM; + ++ if (WARN_ON(action >= ARRAY_SIZE(action_names))) ++ return -EINVAL; ++ + /* get a reference to the node */ + ce->action = action; + ce->np = of_node_get(np); diff --git a/queue-6.4/of-unittest-fix-expect-for-parse_phandle_with_args_map-test.patch b/queue-6.4/of-unittest-fix-expect-for-parse_phandle_with_args_map-test.patch new file mode 100644 index 00000000000..1be0b6c40ff --- /dev/null +++ b/queue-6.4/of-unittest-fix-expect-for-parse_phandle_with_args_map-test.patch @@ -0,0 +1,40 @@ +From 0aeae3788e28f64ccb95405d4dc8cd80637ffaea Mon Sep 17 00:00:00 2001 +From: Rob Herring +Date: Fri, 18 Aug 2023 15:40:56 -0500 +Subject: of: unittest: Fix EXPECT for parse_phandle_with_args_map() test + +From: Rob Herring + +commit 0aeae3788e28f64ccb95405d4dc8cd80637ffaea upstream. + +Commit 12e17243d8a1 ("of: base: improve error msg in +of_phandle_iterator_next()") added printing of the phandle value on +error, but failed to update the unittest. + +Fixes: 12e17243d8a1 ("of: base: improve error msg in of_phandle_iterator_next()") +Cc: stable@vger.kernel.org +Reviewed-by: Geert Uytterhoeven +Link: https://lore.kernel.org/r/20230801-dt-changeset-fixes-v3-1-5f0410e007dd@kernel.org +Signed-off-by: Rob Herring +Signed-off-by: Greg Kroah-Hartman +--- + drivers/of/unittest.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/of/unittest.c ++++ b/drivers/of/unittest.c +@@ -664,12 +664,12 @@ static void __init of_unittest_parse_pha + memset(&args, 0, sizeof(args)); + + EXPECT_BEGIN(KERN_INFO, +- "OF: /testcase-data/phandle-tests/consumer-b: could not find phandle"); ++ "OF: /testcase-data/phandle-tests/consumer-b: could not find phandle 12345678"); + + rc = of_parse_phandle_with_args_map(np, "phandle-list-bad-phandle", + "phandle", 0, &args); + EXPECT_END(KERN_INFO, +- "OF: /testcase-data/phandle-tests/consumer-b: could not find phandle"); ++ "OF: /testcase-data/phandle-tests/consumer-b: could not find phandle 12345678"); + + unittest(rc == -EINVAL, "expected:%i got:%i\n", -EINVAL, rc); + diff --git a/queue-6.4/pinctrl-amd-mask-wake-bits-on-probe-again.patch b/queue-6.4/pinctrl-amd-mask-wake-bits-on-probe-again.patch new file mode 100644 index 00000000000..d21c13a16af --- /dev/null +++ b/queue-6.4/pinctrl-amd-mask-wake-bits-on-probe-again.patch @@ -0,0 +1,94 @@ +From 6bc3462a0f5ecaa376a0b3d76dafc55796799e17 Mon Sep 17 00:00:00 2001 +From: Mario Limonciello +Date: Fri, 18 Aug 2023 09:48:50 -0500 +Subject: pinctrl: amd: Mask wake bits on probe again + +From: Mario Limonciello + +commit 6bc3462a0f5ecaa376a0b3d76dafc55796799e17 upstream. + +Shubhra reports that their laptop is heating up over s2idle. Even though +it's getting into the deepest state, it appears to be having spurious +wakeup events. + +While debugging a tangential issue with the RTC Carsten reports that recent +6.1.y based kernel face a similar problem. + +Looking at acpidump and GPIO register comparisons these spurious wakeup +events are from the GPIO associated with the I2C touchpad on both laptops +and occur even when the touchpad is not marked as a wake source by the +kernel. + +This means that the boot firmware has programmed these bits and because +Linux didn't touch them lead to spurious wakeup events from that GPIO. + +To fix this issue, restore most of the code that previously would clear all +the bits associated with wakeup sources. This will allow the kernel to only +program the wake up sources that are necessary. + +This is similar to what was done previously; but only the wake bits are +cleared by default instead of interrupts and wake bits. If any other +problems are reported then it may make sense to clear interrupts again too. + +Cc: Sachi King +Cc: stable@vger.kernel.org +Cc: Thorsten Leemhuis +Fixes: 65f6c7c91cb2 ("pinctrl: amd: Revert "pinctrl: amd: disable and mask interrupts on probe"") +Reported-by: Shubhra Prakash Nandi +Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217754 +Reported-by: Carsten Hatger +Link: https://bugzilla.kernel.org/show_bug.cgi?id=217626#c28 +Signed-off-by: Mario Limonciello +Link: https://lore.kernel.org/r/20230818144850.1439-1-mario.limonciello@amd.com +Signed-off-by: Linus Walleij +Signed-off-by: Greg Kroah-Hartman +--- + drivers/pinctrl/pinctrl-amd.c | 30 ++++++++++++++++++++++++++++++ + 1 file changed, 30 insertions(+) + +--- a/drivers/pinctrl/pinctrl-amd.c ++++ b/drivers/pinctrl/pinctrl-amd.c +@@ -862,6 +862,33 @@ static const struct pinconf_ops amd_pinc + .pin_config_group_set = amd_pinconf_group_set, + }; + ++static void amd_gpio_irq_init(struct amd_gpio *gpio_dev) ++{ ++ struct pinctrl_desc *desc = gpio_dev->pctrl->desc; ++ unsigned long flags; ++ u32 pin_reg, mask; ++ int i; ++ ++ mask = BIT(WAKE_CNTRL_OFF_S0I3) | BIT(WAKE_CNTRL_OFF_S3) | ++ BIT(WAKE_CNTRL_OFF_S4); ++ ++ for (i = 0; i < desc->npins; i++) { ++ int pin = desc->pins[i].number; ++ const struct pin_desc *pd = pin_desc_get(gpio_dev->pctrl, pin); ++ ++ if (!pd) ++ continue; ++ ++ raw_spin_lock_irqsave(&gpio_dev->lock, flags); ++ ++ pin_reg = readl(gpio_dev->base + pin * 4); ++ pin_reg &= ~mask; ++ writel(pin_reg, gpio_dev->base + pin * 4); ++ ++ raw_spin_unlock_irqrestore(&gpio_dev->lock, flags); ++ } ++} ++ + #ifdef CONFIG_PM_SLEEP + static bool amd_gpio_should_save(struct amd_gpio *gpio_dev, unsigned int pin) + { +@@ -1099,6 +1126,9 @@ static int amd_gpio_probe(struct platfor + return PTR_ERR(gpio_dev->pctrl); + } + ++ /* Disable and mask interrupts */ ++ amd_gpio_irq_init(gpio_dev); ++ + girq = &gpio_dev->gc.irq; + gpio_irq_chip_set_chip(girq, &amd_gpio_irqchip); + /* This will let us handle the parent IRQ in the driver */ diff --git a/queue-6.4/radix-tree-remove-unused-variable.patch b/queue-6.4/radix-tree-remove-unused-variable.patch new file mode 100644 index 00000000000..ed40a654c95 --- /dev/null +++ b/queue-6.4/radix-tree-remove-unused-variable.patch @@ -0,0 +1,42 @@ +From d59070d1076ec5114edb67c87658aeb1d691d381 Mon Sep 17 00:00:00 2001 +From: Arnd Bergmann +Date: Fri, 11 Aug 2023 15:10:13 +0200 +Subject: radix tree: remove unused variable + +From: Arnd Bergmann + +commit d59070d1076ec5114edb67c87658aeb1d691d381 upstream. + +Recent versions of clang warn about an unused variable, though older +versions saw the 'slot++' as a use and did not warn: + +radix-tree.c:1136:50: error: parameter 'slot' set but not used [-Werror,-Wunused-but-set-parameter] + +It's clearly not needed any more, so just remove it. + +Link: https://lkml.kernel.org/r/20230811131023.2226509-1-arnd@kernel.org +Fixes: 3a08cd52c37c7 ("radix tree: Remove multiorder support") +Signed-off-by: Arnd Bergmann +Cc: Matthew Wilcox +Cc: Nathan Chancellor +Cc: Nick Desaulniers +Cc: Peng Zhang +Cc: Rong Tao +Cc: Tom Rix +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + lib/radix-tree.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/lib/radix-tree.c ++++ b/lib/radix-tree.c +@@ -1136,7 +1136,6 @@ static void set_iter_tags(struct radix_t + void __rcu **radix_tree_iter_resume(void __rcu **slot, + struct radix_tree_iter *iter) + { +- slot++; + iter->index = __radix_tree_iter_add(iter, 1); + iter->next_index = iter->index; + iter->tags = 0; diff --git a/queue-6.4/riscv-fix-build-errors-using-binutils2.37-toolchains.patch b/queue-6.4/riscv-fix-build-errors-using-binutils2.37-toolchains.patch new file mode 100644 index 00000000000..2fc4ebbda8c --- /dev/null +++ b/queue-6.4/riscv-fix-build-errors-using-binutils2.37-toolchains.patch @@ -0,0 +1,61 @@ +From ef21fa7c198e04f3d3053b1c5b5f2b4b225c3350 Mon Sep 17 00:00:00 2001 +From: Mingzheng Xing +Date: Fri, 25 Aug 2023 03:08:52 +0800 +Subject: riscv: Fix build errors using binutils2.37 toolchains + +From: Mingzheng Xing + +commit ef21fa7c198e04f3d3053b1c5b5f2b4b225c3350 upstream. + +When building the kernel with binutils 2.37 and GCC-11.1.0/GCC-11.2.0, +the following error occurs: + + Assembler messages: + Error: cannot find default versions of the ISA extension `zicsr' + Error: cannot find default versions of the ISA extension `zifencei' + +The above error originated from this commit of binutils[0], which has been +resolved and backported by GCC-12.1.0[1] and GCC-11.3.0[2]. + +So fix this by change the GCC version in +CONFIG_TOOLCHAIN_NEEDS_OLD_ISA_SPEC to GCC-11.3.0. + +Link: https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=f0bae2552db1dd4f1995608fbf6648fcee4e9e0c [0] +Link: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=ca2bbb88f999f4d3cc40e89bc1aba712505dd598 [1] +Link: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=d29f5d6ab513c52fd872f532c492e35ae9fd6671 [2] +Fixes: ca09f772ccca ("riscv: Handle zicsr/zifencei issue between gcc and binutils") +Reported-by: Conor Dooley +Cc: +Signed-off-by: Mingzheng Xing +Link: https://lore.kernel.org/r/20230824190852.45470-1-xingmingzheng@iscas.ac.cn +Closes: https://lore.kernel.org/all/20230823-captive-abdomen-befd942a4a73@wendy/ +Reviewed-by: Conor Dooley +Tested-by: Conor Dooley +Signed-off-by: Palmer Dabbelt +Signed-off-by: Greg Kroah-Hartman +--- + arch/riscv/Kconfig | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/arch/riscv/Kconfig ++++ b/arch/riscv/Kconfig +@@ -535,15 +535,15 @@ config TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZI + and Zifencei are supported in binutils from version 2.36 onwards. + To make life easier, and avoid forcing toolchains that default to a + newer ISA spec to version 2.2, relax the check to binutils >= 2.36. +- For clang < 17 or GCC < 11.1.0, for which this is not possible, this is +- dealt with in CONFIG_TOOLCHAIN_NEEDS_OLD_ISA_SPEC. ++ For clang < 17 or GCC < 11.3.0, for which this is not possible or need ++ special treatment, this is dealt with in TOOLCHAIN_NEEDS_OLD_ISA_SPEC. + + config TOOLCHAIN_NEEDS_OLD_ISA_SPEC + def_bool y + depends on TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI + # https://github.com/llvm/llvm-project/commit/22e199e6afb1263c943c0c0d4498694e15bf8a16 +- # https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=b03be74bad08c382da47e048007a78fa3fb4ef49 +- depends on (CC_IS_CLANG && CLANG_VERSION < 170000) || (CC_IS_GCC && GCC_VERSION < 110100) ++ # https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=d29f5d6ab513c52fd872f532c492e35ae9fd6671 ++ depends on (CC_IS_CLANG && CLANG_VERSION < 170000) || (CC_IS_GCC && GCC_VERSION < 110300) + help + Certain versions of clang and GCC do not support zicsr and zifencei via + -march. This option causes an older ISA spec compatible with these older diff --git a/queue-6.4/riscv-handle-zicsr-zifencei-issue-between-gcc-and-binutils.patch b/queue-6.4/riscv-handle-zicsr-zifencei-issue-between-gcc-and-binutils.patch new file mode 100644 index 00000000000..7eeae8272e5 --- /dev/null +++ b/queue-6.4/riscv-handle-zicsr-zifencei-issue-between-gcc-and-binutils.patch @@ -0,0 +1,111 @@ +From ca09f772cccaeec4cd05a21528c37a260aa2dd2c Mon Sep 17 00:00:00 2001 +From: Mingzheng Xing +Date: Thu, 10 Aug 2023 00:56:48 +0800 +Subject: riscv: Handle zicsr/zifencei issue between gcc and binutils + +From: Mingzheng Xing + +commit ca09f772cccaeec4cd05a21528c37a260aa2dd2c upstream. + +Binutils-2.38 and GCC-12.1.0 bumped[0][1] the default ISA spec to the newer +20191213 version which moves some instructions from the I extension to the +Zicsr and Zifencei extensions. So if one of the binutils and GCC exceeds +that version, we should explicitly specifying Zicsr and Zifencei via -march +to cope with the new changes. but this only occurs when binutils >= 2.36 +and GCC >= 11.1.0. It's a different story when binutils < 2.36. + +binutils-2.36 supports the Zifencei extension[2] and splits Zifencei and +Zicsr from I[3]. GCC-11.1.0 is particular[4] because it add support Zicsr +and Zifencei extension for -march. binutils-2.35 does not support the +Zifencei extension, and does not need to specify Zicsr and Zifencei when +working with GCC >= 12.1.0. + +To make our lives easier, let's relax the check to binutils >= 2.36 in +CONFIG_TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI. For the other two cases, +where clang < 17 or GCC < 11.1.0, we will deal with them in +CONFIG_TOOLCHAIN_NEEDS_OLD_ISA_SPEC. + +For more information, please refer to: +commit 6df2a016c0c8 ("riscv: fix build with binutils 2.38") +commit e89c2e815e76 ("riscv: Handle zicsr/zifencei issues between clang and binutils") + +Link: https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=aed44286efa8ae8717a77d94b51ac3614e2ca6dc [0] +Link: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=98416dbb0a62579d4a7a4a76bab51b5b52fec2cd [1] +Link: https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=5a1b31e1e1cee6e9f1c92abff59cdcfff0dddf30 [2] +Link: https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=729a53530e86972d1143553a415db34e6e01d5d2 [3] +Link: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=b03be74bad08c382da47e048007a78fa3fb4ef49 [4] +Link: https://lore.kernel.org/all/20230308220842.1231003-1-conor@kernel.org +Link: https://lore.kernel.org/all/20230223220546.52879-1-conor@kernel.org +Reviewed-by: Conor Dooley +Acked-by: Guo Ren +Cc: +Signed-off-by: Mingzheng Xing +Link: https://lore.kernel.org/r/20230809165648.21071-1-xingmingzheng@iscas.ac.cn +Signed-off-by: Palmer Dabbelt +Signed-off-by: Greg Kroah-Hartman +--- + arch/riscv/Kconfig | 28 +++++++++++++++++----------- + arch/riscv/kernel/compat_vdso/Makefile | 8 +++++++- + 2 files changed, 24 insertions(+), 12 deletions(-) + +--- a/arch/riscv/Kconfig ++++ b/arch/riscv/Kconfig +@@ -525,24 +525,30 @@ config TOOLCHAIN_HAS_ZIHINTPAUSE + config TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI + def_bool y + # https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=aed44286efa8ae8717a77d94b51ac3614e2ca6dc +- depends on AS_IS_GNU && AS_VERSION >= 23800 ++ # https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=98416dbb0a62579d4a7a4a76bab51b5b52fec2cd ++ depends on AS_IS_GNU && AS_VERSION >= 23600 + help +- Newer binutils versions default to ISA spec version 20191213 which +- moves some instructions from the I extension to the Zicsr and Zifencei +- extensions. ++ Binutils-2.38 and GCC-12.1.0 bumped the default ISA spec to the newer ++ 20191213 version, which moves some instructions from the I extension to ++ the Zicsr and Zifencei extensions. This requires explicitly specifying ++ Zicsr and Zifencei when binutils >= 2.38 or GCC >= 12.1.0. Zicsr ++ and Zifencei are supported in binutils from version 2.36 onwards. ++ To make life easier, and avoid forcing toolchains that default to a ++ newer ISA spec to version 2.2, relax the check to binutils >= 2.36. ++ For clang < 17 or GCC < 11.1.0, for which this is not possible, this is ++ dealt with in CONFIG_TOOLCHAIN_NEEDS_OLD_ISA_SPEC. + + config TOOLCHAIN_NEEDS_OLD_ISA_SPEC + def_bool y + depends on TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI + # https://github.com/llvm/llvm-project/commit/22e199e6afb1263c943c0c0d4498694e15bf8a16 +- depends on CC_IS_CLANG && CLANG_VERSION < 170000 ++ # https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=b03be74bad08c382da47e048007a78fa3fb4ef49 ++ depends on (CC_IS_CLANG && CLANG_VERSION < 170000) || (CC_IS_GCC && GCC_VERSION < 110100) + help +- Certain versions of clang do not support zicsr and zifencei via -march +- but newer versions of binutils require it for the reasons noted in the +- help text of CONFIG_TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI. This +- option causes an older ISA spec compatible with these older versions +- of clang to be passed to GAS, which has the same result as passing zicsr +- and zifencei to -march. ++ Certain versions of clang and GCC do not support zicsr and zifencei via ++ -march. This option causes an older ISA spec compatible with these older ++ versions of clang and GCC to be passed to GAS, which has the same result ++ as passing zicsr and zifencei to -march. + + config FPU + bool "FPU support" +--- a/arch/riscv/kernel/compat_vdso/Makefile ++++ b/arch/riscv/kernel/compat_vdso/Makefile +@@ -11,7 +11,13 @@ compat_vdso-syms += flush_icache + COMPAT_CC := $(CC) + COMPAT_LD := $(LD) + +-COMPAT_CC_FLAGS := -march=rv32g -mabi=ilp32 ++# binutils 2.35 does not support the zifencei extension, but in the ISA ++# spec 20191213, G stands for IMAFD_ZICSR_ZIFENCEI. ++ifdef CONFIG_TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI ++ COMPAT_CC_FLAGS := -march=rv32g -mabi=ilp32 ++else ++ COMPAT_CC_FLAGS := -march=rv32imafd -mabi=ilp32 ++endif + COMPAT_LD_FLAGS := -melf32lriscv + + # Disable attributes, as they're useless and break the build. diff --git a/queue-6.4/selftests-mm-foll_longterm-need-to-be-updated-to-0x100.patch b/queue-6.4/selftests-mm-foll_longterm-need-to-be-updated-to-0x100.patch new file mode 100644 index 00000000000..b38aaf9246d --- /dev/null +++ b/queue-6.4/selftests-mm-foll_longterm-need-to-be-updated-to-0x100.patch @@ -0,0 +1,90 @@ +From 1738b949625c7e17a454b25de33f1f415da3db69 Mon Sep 17 00:00:00 2001 +From: Ayush Jain +Date: Tue, 8 Aug 2023 07:43:47 -0500 +Subject: selftests/mm: FOLL_LONGTERM need to be updated to 0x100 + +From: Ayush Jain + +commit 1738b949625c7e17a454b25de33f1f415da3db69 upstream. + +After commit 2c2241081f7d ("mm/gup: move private gup FOLL_ flags to +internal.h") FOLL_LONGTERM flag value got updated from 0x10000 to 0x100 at +include/linux/mm_types.h. + +As hmm.hmm_device_private.hmm_gup_test uses FOLL_LONGTERM Updating same +here as well. + +Before this change test goes in an infinite assert loop in +hmm.hmm_device_private.hmm_gup_test +========================================================== + RUN hmm.hmm_device_private.hmm_gup_test ... +hmm-tests.c:1962:hmm_gup_test:Expected HMM_DMIRROR_PROT_WRITE.. +..(2) == m[2] (34) +hmm-tests.c:157:hmm_gup_test:Expected ret (-1) == 0 (0) +hmm-tests.c:157:hmm_gup_test:Expected ret (-1) == 0 (0) +... +========================================================== + + Call Trace: + + ? sched_clock+0xd/0x20 + ? __lock_acquire.constprop.0+0x120/0x6c0 + ? ktime_get+0x2c/0xd0 + ? sched_clock+0xd/0x20 + ? local_clock+0x12/0xd0 + ? lock_release+0x26e/0x3b0 + pin_user_pages_fast+0x4c/0x70 + gup_test_ioctl+0x4ff/0xbb0 + ? gup_test_ioctl+0x68c/0xbb0 + __x64_sys_ioctl+0x99/0xd0 + do_syscall_64+0x60/0x90 + ? syscall_exit_to_user_mode+0x2a/0x50 + ? do_syscall_64+0x6d/0x90 + ? syscall_exit_to_user_mode+0x2a/0x50 + ? do_syscall_64+0x6d/0x90 + ? irqentry_exit_to_user_mode+0xd/0x20 + ? irqentry_exit+0x3f/0x50 + ? exc_page_fault+0x96/0x200 + entry_SYSCALL_64_after_hwframe+0x72/0xdc + RIP: 0033:0x7f6aaa31aaff + +After this change test is able to pass successfully. + +Link: https://lkml.kernel.org/r/20230808124347.79163-1-ayush.jain3@amd.com +Fixes: 2c2241081f7d ("mm/gup: move private gup FOLL_ flags to internal.h") +Signed-off-by: Ayush Jain +Reviewed-by: Raghavendra K T +Reviewed-by: John Hubbard +Acked-by: David Hildenbrand +Cc: Jason Gunthorpe +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/mm/hmm-tests.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +diff --git a/tools/testing/selftests/mm/hmm-tests.c b/tools/testing/selftests/mm/hmm-tests.c +index 4adaad1b822f..20294553a5dd 100644 +--- a/tools/testing/selftests/mm/hmm-tests.c ++++ b/tools/testing/selftests/mm/hmm-tests.c +@@ -57,9 +57,14 @@ enum { + + #define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1))) + /* Just the flags we need, copied from mm.h: */ +-#define FOLL_WRITE 0x01 /* check pte is writable */ +-#define FOLL_LONGTERM 0x10000 /* mapping lifetime is indefinite */ + ++#ifndef FOLL_WRITE ++#define FOLL_WRITE 0x01 /* check pte is writable */ ++#endif ++ ++#ifndef FOLL_LONGTERM ++#define FOLL_LONGTERM 0x100 /* mapping lifetime is indefinite */ ++#endif + FIXTURE(hmm) + { + int fd; +-- +2.42.0 + diff --git a/queue-6.4/selinux-set-next-pointer-before-attaching-to-list.patch b/queue-6.4/selinux-set-next-pointer-before-attaching-to-list.patch new file mode 100644 index 00000000000..31fe133ba1a --- /dev/null +++ b/queue-6.4/selinux-set-next-pointer-before-attaching-to-list.patch @@ -0,0 +1,43 @@ +From 70d91dc9b2ac91327d0eefd86163abc3548effa6 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Christian=20G=C3=B6ttsche?= +Date: Fri, 18 Aug 2023 17:33:58 +0200 +Subject: selinux: set next pointer before attaching to list +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Christian Göttsche + +commit 70d91dc9b2ac91327d0eefd86163abc3548effa6 upstream. + +Set the next pointer in filename_trans_read_helper() before attaching +the new node under construction to the list, otherwise garbage would be +dereferenced on subsequent failure during cleanup in the out goto label. + +Cc: +Fixes: 430059024389 ("selinux: implement new format of filename transitions") +Signed-off-by: Christian Göttsche +Signed-off-by: Paul Moore +Signed-off-by: Greg Kroah-Hartman +--- + security/selinux/ss/policydb.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/security/selinux/ss/policydb.c ++++ b/security/selinux/ss/policydb.c +@@ -2005,6 +2005,7 @@ static int filename_trans_read_helper(st + if (!datum) + goto out; + ++ datum->next = NULL; + *dst = datum; + + /* ebitmap_read() will at least init the bitmap */ +@@ -2017,7 +2018,6 @@ static int filename_trans_read_helper(st + goto out; + + datum->otype = le32_to_cpu(buf[0]); +- datum->next = NULL; + + dst = &datum->next; + } diff --git a/queue-6.4/series b/queue-6.4/series index 48601131af6..9fa41b8af24 100644 --- a/queue-6.4/series +++ b/queue-6.4/series @@ -59,3 +59,30 @@ nfsv4-fix-dropped-lock-for-racing-open-and-delegation-return.patch clk-fix-slab-out-of-bounds-error-in-devm_clk_release.patch mm-ima-kexec-of-use-memblock_free_late-from-ima_free_kexec_buffer.patch shmem-fix-smaps-bug-sleeping-while-atomic.patch +alsa-ymfpci-fix-the-missing-snd_card_free-call-at-probe-error.patch +selftests-mm-foll_longterm-need-to-be-updated-to-0x100.patch +mm-enable-page-walking-api-to-lock-vmas-during-the-walk.patch +mm-gup-reintroduce-foll_numa-as-foll_honor_numa_fault.patch +mm-gup-handle-cont-pte-hugetlb-pages-correctly-in-gup_must_unshare-via-gup-fast.patch +mm-add-a-call-to-flush_cache_vmap-in-vmap_pfn.patch +mm-memory-failure-fix-unexpected-return-value-in-soft_offline_page.patch +mm-multi-gen-lru-don-t-spin-during-memcg-release.patch +nilfs2-fix-general-protection-fault-in-nilfs_lookup_dirty_data_buffers.patch +nfs-fix-a-use-after-free-in-nfs_direct_join_group.patch +nfsd-fix-race-to-free_stateid-and-cl_revoked.patch +selinux-set-next-pointer-before-attaching-to-list.patch +batman-adv-trigger-events-for-auto-adjusted-mtu.patch +batman-adv-don-t-increase-mtu-when-set-by-user.patch +batman-adv-do-not-get-eth-header-before-batadv_check_management_packet.patch +batman-adv-fix-tt-global-entry-leak-when-client-roamed-back.patch +batman-adv-fix-batadv_v_ogm_aggr_send-memory-leak.patch +batman-adv-hold-rtnl-lock-during-mtu-update-via-netlink.patch +acpi-resource-fix-irq-override-quirk-for-pcspecialist-elimina-pro-16-m.patch +lib-clz_ctz.c-fix-__clzdi2-and-__ctzdi2-for-32-bit-kernels.patch +riscv-handle-zicsr-zifencei-issue-between-gcc-and-binutils.patch +riscv-fix-build-errors-using-binutils2.37-toolchains.patch +radix-tree-remove-unused-variable.patch +of-unittest-fix-expect-for-parse_phandle_with_args_map-test.patch +of-dynamic-refactor-action-prints-to-not-use-pof-inside-devtree_lock.patch +pinctrl-amd-mask-wake-bits-on-probe-again.patch +media-vcodec-fix-potential-array-out-of-bounds-in-encoder-queue_setup.patch -- 2.47.3