From: Greg Kroah-Hartman Date: Sun, 24 Oct 2021 11:47:49 +0000 (+0200) Subject: 5.14-stable patches X-Git-Tag: v4.4.290~56 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=e2eb8bc2ad61d118ffb10d413107622b31146fdd;p=thirdparty%2Fkernel%2Fstable-queue.git 5.14-stable patches added patches: alsa-hda-realtek-add-quirk-for-clevo-pc50hs.patch alsa-usb-audio-provide-quirk-for-sennheiser-gsp670-headset.patch asoc-dapm-fix-missing-kctl-change-notifications.patch asoc-nau8824-fix-headphone-vs-headset-button-press-detection-no-longer-working.patch audit-fix-possible-null-pointer-dereference-in-audit_filter_rules.patch blk-cgroup-blk_cgroup_bio_start-should-use-irq-safe-operations-on-blkg-iostat_cpu.patch can-isotp-isotp_sendmsg-add-result-check-for-wait_event_interruptible.patch can-isotp-isotp_sendmsg-fix-return-error-on-fc-timeout-on-tx-path.patch can-isotp-isotp_sendmsg-fix-tx-buffer-concurrent-access-in-isotp_sendmsg.patch can-j1939-j1939_netdev_start-fix-uaf-for-rx_kref-of-j1939_priv.patch can-j1939-j1939_tp_rxtimer-fix-errant-alert-in-j1939_tp_rxtimer.patch can-j1939-j1939_xtp_rx_dat_one-cancel-session-if-receive-tp.dt-with-error-length.patch can-j1939-j1939_xtp_rx_rts_session_new-abort-tp-less-than-9-bytes.patch can-peak_pci-peak_pci_remove-fix-uaf.patch can-peak_usb-pcan_usb_fd_decode_status-fix-back-to-error_active-state-notification.patch can-rcar_can-fix-suspend-resume.patch ceph-fix-handling-of-meta-errors.patch ceph-skip-existing-superblocks-that-are-blocklisted-or-shut-down-when-mounting.patch elfcore-correct-reference-to-config_uml.patch mm-mempolicy-do-not-allow-illegal-mpol_f_numa_balancing-mpol_local-in-mbind.patch mm-secretmem-fix-null-page-mapping-dereference-in-page_is_secretmem.patch mm-userfaultfd-selftests-fix-memory-corruption-with-thp-enabled.patch net-dsa-mt7530-correct-ds-num_ports.patch ocfs2-fix-data-corruption-after-conversion-from-inline-format.patch ocfs2-mount-fails-with-buffer-overflow-in-strlen.patch tracing-have-all-levels-of-checks-prevent-recursion.patch ucounts-fix-signal-ucount-refcounting.patch ucounts-move-get_ucounts-from-cred_alloc_blank-to-key_change_session_keyring.patch ucounts-pair-inc_rlimit_ucounts-with-dec_rlimit_ucoutns-in-commit_creds.patch ucounts-proper-error-handling-in-set_cred_ucounts.patch userfaultfd-fix-a-race-between-writeprotect-and-exit_mmap.patch vfs-check-fd-has-read-access-in-kernel_read_file_from_fd.patch --- diff --git a/queue-5.14/alsa-hda-realtek-add-quirk-for-clevo-pc50hs.patch b/queue-5.14/alsa-hda-realtek-add-quirk-for-clevo-pc50hs.patch new file mode 100644 index 00000000000..eae0feec630 --- /dev/null +++ b/queue-5.14/alsa-hda-realtek-add-quirk-for-clevo-pc50hs.patch @@ -0,0 +1,31 @@ +From aef454b40288158b850aab13e3d2a8c406779401 Mon Sep 17 00:00:00 2001 +From: Steven Clarkson +Date: Thu, 14 Oct 2021 06:35:54 -0700 +Subject: ALSA: hda/realtek: Add quirk for Clevo PC50HS + +From: Steven Clarkson + +commit aef454b40288158b850aab13e3d2a8c406779401 upstream. + +Apply existing PCI quirk to the Clevo PC50HS and related models to fix +audio output on the built in speakers. + +Signed-off-by: Steven Clarkson +Cc: +Link: https://lore.kernel.org/r/20211014133554.1326741-1-sc@lambdal.com +Signed-off-by: Takashi Iwai +Signed-off-by: Greg Kroah-Hartman +--- + sound/pci/hda/patch_realtek.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/sound/pci/hda/patch_realtek.c ++++ b/sound/pci/hda/patch_realtek.c +@@ -2547,6 +2547,7 @@ static const struct snd_pci_quirk alc882 + SND_PCI_QUIRK(0x1558, 0x65d2, "Clevo PB51R[CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), + SND_PCI_QUIRK(0x1558, 0x65e1, "Clevo PB51[ED][DF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), + SND_PCI_QUIRK(0x1558, 0x65e5, "Clevo PC50D[PRS](?:-D|-G)?", ALC1220_FIXUP_CLEVO_PB51ED_PINS), ++ SND_PCI_QUIRK(0x1558, 0x65f1, "Clevo PC50HS", ALC1220_FIXUP_CLEVO_PB51ED_PINS), + SND_PCI_QUIRK(0x1558, 0x67d1, "Clevo PB71[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), + SND_PCI_QUIRK(0x1558, 0x67e1, "Clevo PB71[DE][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), + SND_PCI_QUIRK(0x1558, 0x67e5, "Clevo PC70D[PRS](?:-D|-G)?", ALC1220_FIXUP_CLEVO_PB51ED_PINS), diff --git a/queue-5.14/alsa-usb-audio-provide-quirk-for-sennheiser-gsp670-headset.patch b/queue-5.14/alsa-usb-audio-provide-quirk-for-sennheiser-gsp670-headset.patch new file mode 100644 index 00000000000..6aece2b28a2 --- /dev/null +++ b/queue-5.14/alsa-usb-audio-provide-quirk-for-sennheiser-gsp670-headset.patch @@ -0,0 +1,68 @@ +From 3c414eb65c294719a91a746260085363413f91c1 Mon Sep 17 00:00:00 2001 +From: Brendan Grieve +Date: Fri, 15 Oct 2021 10:53:35 +0800 +Subject: ALSA: usb-audio: Provide quirk for Sennheiser GSP670 Headset + +From: Brendan Grieve + +commit 3c414eb65c294719a91a746260085363413f91c1 upstream. + +As per discussion at: https://github.com/szszoke/sennheiser-gsp670-pulseaudio-profile/issues/13 + +The GSP670 has 2 playback and 1 recording device that by default are +detected in an incompatible order for alsa. This may have been done to make +it compatible for the console by the manufacturer and only affects the +latest firmware which uses its own ID. + +This quirk will resolve this by reordering the channels. + +Signed-off-by: Brendan Grieve +Cc: +Link: https://lore.kernel.org/r/20211015025335.196592-1-brendan@grieve.com.au +Signed-off-by: Takashi Iwai +Signed-off-by: Greg Kroah-Hartman +--- + sound/usb/quirks-table.h | 32 ++++++++++++++++++++++++++++++++ + 1 file changed, 32 insertions(+) + +--- a/sound/usb/quirks-table.h ++++ b/sound/usb/quirks-table.h +@@ -4080,6 +4080,38 @@ AU0828_DEVICE(0x2040, 0x7270, "Hauppauge + } + } + }, ++{ ++ /* ++ * Sennheiser GSP670 ++ * Change order of interfaces loaded ++ */ ++ USB_DEVICE(0x1395, 0x0300), ++ .bInterfaceClass = USB_CLASS_PER_INTERFACE, ++ .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { ++ .ifnum = QUIRK_ANY_INTERFACE, ++ .type = QUIRK_COMPOSITE, ++ .data = &(const struct snd_usb_audio_quirk[]) { ++ // Communication ++ { ++ .ifnum = 3, ++ .type = QUIRK_AUDIO_STANDARD_INTERFACE ++ }, ++ // Recording ++ { ++ .ifnum = 4, ++ .type = QUIRK_AUDIO_STANDARD_INTERFACE ++ }, ++ // Main ++ { ++ .ifnum = 1, ++ .type = QUIRK_AUDIO_STANDARD_INTERFACE ++ }, ++ { ++ .ifnum = -1 ++ } ++ } ++ } ++}, + + #undef USB_DEVICE_VENDOR_SPEC + #undef USB_AUDIO_DEVICE diff --git a/queue-5.14/asoc-dapm-fix-missing-kctl-change-notifications.patch b/queue-5.14/asoc-dapm-fix-missing-kctl-change-notifications.patch new file mode 100644 index 00000000000..ab6e94c0c7e --- /dev/null +++ b/queue-5.14/asoc-dapm-fix-missing-kctl-change-notifications.patch @@ -0,0 +1,81 @@ +From 5af82c81b2c49cfb1cad84d9eb6eab0e3d1c4842 Mon Sep 17 00:00:00 2001 +From: Takashi Iwai +Date: Wed, 6 Oct 2021 16:17:12 +0200 +Subject: ASoC: DAPM: Fix missing kctl change notifications + +From: Takashi Iwai + +commit 5af82c81b2c49cfb1cad84d9eb6eab0e3d1c4842 upstream. + +The put callback of a kcontrol is supposed to return 1 when the value +is changed, and this will be notified to user-space. However, some +DAPM kcontrols always return 0 (except for errors), hence the +user-space misses the update of a control value. + +This patch corrects the behavior by properly returning 1 when the +value gets updated. + +Reported-and-tested-by: Hans de Goede +Cc: +Signed-off-by: Takashi Iwai +Link: https://lore.kernel.org/r/20211006141712.2439-1-tiwai@suse.de +Signed-off-by: Mark Brown +Signed-off-by: Greg Kroah-Hartman +--- + sound/soc/soc-dapm.c | 13 ++++++++----- + 1 file changed, 8 insertions(+), 5 deletions(-) + +--- a/sound/soc/soc-dapm.c ++++ b/sound/soc/soc-dapm.c +@@ -2559,6 +2559,7 @@ static int snd_soc_dapm_set_pin(struct s + const char *pin, int status) + { + struct snd_soc_dapm_widget *w = dapm_find_widget(dapm, pin, true); ++ int ret = 0; + + dapm_assert_locked(dapm); + +@@ -2571,13 +2572,14 @@ static int snd_soc_dapm_set_pin(struct s + dapm_mark_dirty(w, "pin configuration"); + dapm_widget_invalidate_input_paths(w); + dapm_widget_invalidate_output_paths(w); ++ ret = 1; + } + + w->connected = status; + if (status == 0) + w->force = 0; + +- return 0; ++ return ret; + } + + /** +@@ -3582,14 +3584,15 @@ int snd_soc_dapm_put_pin_switch(struct s + { + struct snd_soc_card *card = snd_kcontrol_chip(kcontrol); + const char *pin = (const char *)kcontrol->private_value; ++ int ret; + + if (ucontrol->value.integer.value[0]) +- snd_soc_dapm_enable_pin(&card->dapm, pin); ++ ret = snd_soc_dapm_enable_pin(&card->dapm, pin); + else +- snd_soc_dapm_disable_pin(&card->dapm, pin); ++ ret = snd_soc_dapm_disable_pin(&card->dapm, pin); + + snd_soc_dapm_sync(&card->dapm); +- return 0; ++ return ret; + } + EXPORT_SYMBOL_GPL(snd_soc_dapm_put_pin_switch); + +@@ -4023,7 +4026,7 @@ static int snd_soc_dapm_dai_link_put(str + + rtd->params_select = ucontrol->value.enumerated.item[0]; + +- return 0; ++ return 1; + } + + static void diff --git a/queue-5.14/asoc-nau8824-fix-headphone-vs-headset-button-press-detection-no-longer-working.patch b/queue-5.14/asoc-nau8824-fix-headphone-vs-headset-button-press-detection-no-longer-working.patch new file mode 100644 index 00000000000..9d87b0103fb --- /dev/null +++ b/queue-5.14/asoc-nau8824-fix-headphone-vs-headset-button-press-detection-no-longer-working.patch @@ -0,0 +1,41 @@ +From 42871e95a3afea8956d8cc567ea725b33a837775 Mon Sep 17 00:00:00 2001 +From: Hans de Goede +Date: Wed, 29 Sep 2021 22:15:12 +0200 +Subject: ASoC: nau8824: Fix headphone vs headset, button-press detection no longer working + +From: Hans de Goede + +commit 42871e95a3afea8956d8cc567ea725b33a837775 upstream. + +Commit 1d25684e2251 ("ASoC: nau8824: Fix open coded prefix handling") +replaced the nau8824_dapm_enable_pin() helper with direct calls to +snd_soc_dapm_enable_pin(), but the helper was using +snd_soc_dapm_force_enable_pin() and not forcing the MICBIAS + SAR +supplies on breaks headphone vs headset and button-press detection. + +Replace the snd_soc_dapm_enable_pin() calls with +snd_soc_dapm_force_enable_pin() to fix this. + +Cc: stable@vger.kernel.org +Fixes: 1d25684e2251 ("ASoC: nau8824: Fix open coded prefix handling") +Signed-off-by: Hans de Goede +Link: https://lore.kernel.org/r/20210929201512.460360-1-hdegoede@redhat.com +Signed-off-by: Mark Brown +Signed-off-by: Greg Kroah-Hartman +--- + sound/soc/codecs/nau8824.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/sound/soc/codecs/nau8824.c ++++ b/sound/soc/codecs/nau8824.c +@@ -867,8 +867,8 @@ static void nau8824_jdet_work(struct wor + struct regmap *regmap = nau8824->regmap; + int adc_value, event = 0, event_mask = 0; + +- snd_soc_dapm_enable_pin(dapm, "MICBIAS"); +- snd_soc_dapm_enable_pin(dapm, "SAR"); ++ snd_soc_dapm_force_enable_pin(dapm, "MICBIAS"); ++ snd_soc_dapm_force_enable_pin(dapm, "SAR"); + snd_soc_dapm_sync(dapm); + + msleep(100); diff --git a/queue-5.14/audit-fix-possible-null-pointer-dereference-in-audit_filter_rules.patch b/queue-5.14/audit-fix-possible-null-pointer-dereference-in-audit_filter_rules.patch new file mode 100644 index 00000000000..7c45e70f7d6 --- /dev/null +++ b/queue-5.14/audit-fix-possible-null-pointer-dereference-in-audit_filter_rules.patch @@ -0,0 +1,35 @@ +From 6e3ee990c90494561921c756481d0e2125d8b895 Mon Sep 17 00:00:00 2001 +From: Gaosheng Cui +Date: Sat, 16 Oct 2021 15:23:50 +0800 +Subject: audit: fix possible null-pointer dereference in audit_filter_rules + +From: Gaosheng Cui + +commit 6e3ee990c90494561921c756481d0e2125d8b895 upstream. + +Fix possible null-pointer dereference in audit_filter_rules. + +audit_filter_rules() error: we previously assumed 'ctx' could be null + +Cc: stable@vger.kernel.org +Fixes: bf361231c295 ("audit: add saddr_fam filter field") +Reported-by: kernel test robot +Reported-by: Dan Carpenter +Signed-off-by: Gaosheng Cui +Signed-off-by: Paul Moore +Signed-off-by: Greg Kroah-Hartman +--- + kernel/auditsc.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/kernel/auditsc.c ++++ b/kernel/auditsc.c +@@ -657,7 +657,7 @@ static int audit_filter_rules(struct tas + result = audit_comparator(audit_loginuid_set(tsk), f->op, f->val); + break; + case AUDIT_SADDR_FAM: +- if (ctx->sockaddr) ++ if (ctx && ctx->sockaddr) + result = audit_comparator(ctx->sockaddr->ss_family, + f->op, f->val); + break; diff --git a/queue-5.14/blk-cgroup-blk_cgroup_bio_start-should-use-irq-safe-operations-on-blkg-iostat_cpu.patch b/queue-5.14/blk-cgroup-blk_cgroup_bio_start-should-use-irq-safe-operations-on-blkg-iostat_cpu.patch new file mode 100644 index 00000000000..319bc56c7e1 --- /dev/null +++ b/queue-5.14/blk-cgroup-blk_cgroup_bio_start-should-use-irq-safe-operations-on-blkg-iostat_cpu.patch @@ -0,0 +1,48 @@ +From 5370b0f49078203acf3c064b634a09707167a864 Mon Sep 17 00:00:00 2001 +From: Tejun Heo +Date: Thu, 14 Oct 2021 13:20:22 -1000 +Subject: blk-cgroup: blk_cgroup_bio_start() should use irq-safe operations on blkg->iostat_cpu + +From: Tejun Heo + +commit 5370b0f49078203acf3c064b634a09707167a864 upstream. + +c3df5fb57fe8 ("cgroup: rstat: fix A-A deadlock on 32bit around +u64_stats_sync") made u64_stats updates irq-safe to avoid A-A deadlocks. +Unfortunately, the conversion missed one in blk_cgroup_bio_start(). Fix it. + +Fixes: 2d146aa3aa84 ("mm: memcontrol: switch to rstat") +Cc: stable@vger.kernel.org # v5.13+ +Reported-by: syzbot+9738c8815b375ce482a1@syzkaller.appspotmail.com +Signed-off-by: Tejun Heo +Link: https://lore.kernel.org/r/YWi7NrQdVlxD6J9W@slm.duckdns.org +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + block/blk-cgroup.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/block/blk-cgroup.c ++++ b/block/blk-cgroup.c +@@ -1916,10 +1916,11 @@ void blk_cgroup_bio_start(struct bio *bi + { + int rwd = blk_cgroup_io_type(bio), cpu; + struct blkg_iostat_set *bis; ++ unsigned long flags; + + cpu = get_cpu(); + bis = per_cpu_ptr(bio->bi_blkg->iostat_cpu, cpu); +- u64_stats_update_begin(&bis->sync); ++ flags = u64_stats_update_begin_irqsave(&bis->sync); + + /* + * If the bio is flagged with BIO_CGROUP_ACCT it means this is a split +@@ -1931,7 +1932,7 @@ void blk_cgroup_bio_start(struct bio *bi + } + bis->cur.ios[rwd]++; + +- u64_stats_update_end(&bis->sync); ++ u64_stats_update_end_irqrestore(&bis->sync, flags); + if (cgroup_subsys_on_dfl(io_cgrp_subsys)) + cgroup_rstat_updated(bio->bi_blkg->blkcg->css.cgroup, cpu); + put_cpu(); diff --git a/queue-5.14/can-isotp-isotp_sendmsg-add-result-check-for-wait_event_interruptible.patch b/queue-5.14/can-isotp-isotp_sendmsg-add-result-check-for-wait_event_interruptible.patch new file mode 100644 index 00000000000..04238e4b13e --- /dev/null +++ b/queue-5.14/can-isotp-isotp_sendmsg-add-result-check-for-wait_event_interruptible.patch @@ -0,0 +1,61 @@ +From 9acf636215a6ce9362fe618e7da4913b8bfe84c8 Mon Sep 17 00:00:00 2001 +From: Ziyang Xuan +Date: Sat, 9 Oct 2021 15:40:18 +0800 +Subject: can: isotp: isotp_sendmsg(): add result check for wait_event_interruptible() + +From: Ziyang Xuan + +commit 9acf636215a6ce9362fe618e7da4913b8bfe84c8 upstream. + +Using wait_event_interruptible() to wait for complete transmission, +but do not check the result of wait_event_interruptible() which can be +interrupted. It will result in TX buffer has multiple accessors and +the later process interferes with the previous process. + +Following is one of the problems reported by syzbot. + +============================================================= +WARNING: CPU: 0 PID: 0 at net/can/isotp.c:840 isotp_tx_timer_handler+0x2e0/0x4c0 +CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.13.0-rc7+ #68 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1 04/01/2014 +RIP: 0010:isotp_tx_timer_handler+0x2e0/0x4c0 +Call Trace: + + ? isotp_setsockopt+0x390/0x390 + __hrtimer_run_queues+0xb8/0x610 + hrtimer_run_softirq+0x91/0xd0 + ? rcu_read_lock_sched_held+0x4d/0x80 + __do_softirq+0xe8/0x553 + irq_exit_rcu+0xf8/0x100 + sysvec_apic_timer_interrupt+0x9e/0xc0 + + asm_sysvec_apic_timer_interrupt+0x12/0x20 + +Add result check for wait_event_interruptible() in isotp_sendmsg() +to avoid multiple accessers for tx buffer. + +Fixes: e057dd3fc20f ("can: add ISO 15765-2:2016 transport protocol") +Link: https://lore.kernel.org/all/10ca695732c9dd267c76a3c30f37aefe1ff7e32f.1633764159.git.william.xuanziyang@huawei.com +Cc: stable@vger.kernel.org +Reported-by: syzbot+78bab6958a614b0c80b9@syzkaller.appspotmail.com +Signed-off-by: Ziyang Xuan +Acked-by: Oliver Hartkopp +Signed-off-by: Marc Kleine-Budde +Signed-off-by: Greg Kroah-Hartman +--- + net/can/isotp.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/net/can/isotp.c ++++ b/net/can/isotp.c +@@ -865,7 +865,9 @@ static int isotp_sendmsg(struct socket * + return -EAGAIN; + + /* wait for complete transmission of current pdu */ +- wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE); ++ err = wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE); ++ if (err) ++ return err; + } + + if (!size || size > MAX_MSG_LENGTH) diff --git a/queue-5.14/can-isotp-isotp_sendmsg-fix-return-error-on-fc-timeout-on-tx-path.patch b/queue-5.14/can-isotp-isotp_sendmsg-fix-return-error-on-fc-timeout-on-tx-path.patch new file mode 100644 index 00000000000..9ac5e8db1fe --- /dev/null +++ b/queue-5.14/can-isotp-isotp_sendmsg-fix-return-error-on-fc-timeout-on-tx-path.patch @@ -0,0 +1,48 @@ +From d674a8f123b4096d85955c7eaabec688f29724c9 Mon Sep 17 00:00:00 2001 +From: Marc Kleine-Budde +Date: Fri, 7 May 2021 11:18:39 +0200 +Subject: can: isotp: isotp_sendmsg(): fix return error on FC timeout on TX path + +From: Marc Kleine-Budde + +commit d674a8f123b4096d85955c7eaabec688f29724c9 upstream. + +When the a large chunk of data send and the receiver does not send a +Flow Control frame back in time, the sendmsg() does not return a error +code, but the number of bytes sent corresponding to the size of the +packet. + +If a timeout occurs the isotp_tx_timer_handler() is fired, sets +sk->sk_err and calls the sk->sk_error_report() function. It was +wrongly expected that the error would be propagated to user space in +every case. For isotp_sendmsg() blocking on wait_event_interruptible() +this is not the case. + +This patch fixes the problem by checking if sk->sk_err is set and +returning the error to user space. + +Fixes: e057dd3fc20f ("can: add ISO 15765-2:2016 transport protocol") +Link: https://github.com/hartkopp/can-isotp/issues/42 +Link: https://github.com/hartkopp/can-isotp/pull/43 +Link: https://lore.kernel.org/all/20210507091839.1366379-1-mkl@pengutronix.de +Cc: stable@vger.kernel.org +Reported-by: Sottas Guillaume (LMB) +Tested-by: Oliver Hartkopp +Signed-off-by: Marc Kleine-Budde +Signed-off-by: Greg Kroah-Hartman +--- + net/can/isotp.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/net/can/isotp.c ++++ b/net/can/isotp.c +@@ -960,6 +960,9 @@ static int isotp_sendmsg(struct socket * + if (wait_tx_done) { + /* wait for complete transmission of current pdu */ + wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE); ++ ++ if (sk->sk_err) ++ return -sk->sk_err; + } + + return size; diff --git a/queue-5.14/can-isotp-isotp_sendmsg-fix-tx-buffer-concurrent-access-in-isotp_sendmsg.patch b/queue-5.14/can-isotp-isotp_sendmsg-fix-tx-buffer-concurrent-access-in-isotp_sendmsg.patch new file mode 100644 index 00000000000..1358ab6226d --- /dev/null +++ b/queue-5.14/can-isotp-isotp_sendmsg-fix-tx-buffer-concurrent-access-in-isotp_sendmsg.patch @@ -0,0 +1,142 @@ +From 43a08c3bdac4cb42eff8fe5e2278bffe0c5c3daa Mon Sep 17 00:00:00 2001 +From: Ziyang Xuan +Date: Sat, 9 Oct 2021 15:40:30 +0800 +Subject: can: isotp: isotp_sendmsg(): fix TX buffer concurrent access in isotp_sendmsg() + +From: Ziyang Xuan + +commit 43a08c3bdac4cb42eff8fe5e2278bffe0c5c3daa upstream. + +When isotp_sendmsg() concurrent, tx.state of all TX processes can be +ISOTP_IDLE. The conditions so->tx.state != ISOTP_IDLE and +wq_has_sleeper(&so->wait) can not protect TX buffer from being +accessed by multiple TX processes. + +We can use cmpxchg() to try to modify tx.state to ISOTP_SENDING firstly. +If the modification of the previous process succeed, the later process +must wait tx.state to ISOTP_IDLE firstly. Thus, we can ensure TX buffer +is accessed by only one process at the same time. And we should also +restore the original tx.state at the subsequent error processes. + +Fixes: e057dd3fc20f ("can: add ISO 15765-2:2016 transport protocol") +Link: https://lore.kernel.org/all/c2517874fbdf4188585cf9ddf67a8fa74d5dbde5.1633764159.git.william.xuanziyang@huawei.com +Cc: stable@vger.kernel.org +Signed-off-by: Ziyang Xuan +Acked-by: Oliver Hartkopp +Signed-off-by: Marc Kleine-Budde +Signed-off-by: Greg Kroah-Hartman +--- + net/can/isotp.c | 46 +++++++++++++++++++++++++++++++--------------- + 1 file changed, 31 insertions(+), 15 deletions(-) + +--- a/net/can/isotp.c ++++ b/net/can/isotp.c +@@ -121,7 +121,7 @@ enum { + struct tpcon { + int idx; + int len; +- u8 state; ++ u32 state; + u8 bs; + u8 sn; + u8 ll_dl; +@@ -848,6 +848,7 @@ static int isotp_sendmsg(struct socket * + { + struct sock *sk = sock->sk; + struct isotp_sock *so = isotp_sk(sk); ++ u32 old_state = so->tx.state; + struct sk_buff *skb; + struct net_device *dev; + struct canfd_frame *cf; +@@ -860,47 +861,55 @@ static int isotp_sendmsg(struct socket * + return -EADDRNOTAVAIL; + + /* we do not support multiple buffers - for now */ +- if (so->tx.state != ISOTP_IDLE || wq_has_sleeper(&so->wait)) { +- if (msg->msg_flags & MSG_DONTWAIT) +- return -EAGAIN; ++ if (cmpxchg(&so->tx.state, ISOTP_IDLE, ISOTP_SENDING) != ISOTP_IDLE || ++ wq_has_sleeper(&so->wait)) { ++ if (msg->msg_flags & MSG_DONTWAIT) { ++ err = -EAGAIN; ++ goto err_out; ++ } + + /* wait for complete transmission of current pdu */ + err = wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE); + if (err) +- return err; ++ goto err_out; + } + +- if (!size || size > MAX_MSG_LENGTH) +- return -EINVAL; ++ if (!size || size > MAX_MSG_LENGTH) { ++ err = -EINVAL; ++ goto err_out; ++ } + + /* take care of a potential SF_DL ESC offset for TX_DL > 8 */ + off = (so->tx.ll_dl > CAN_MAX_DLEN) ? 1 : 0; + + /* does the given data fit into a single frame for SF_BROADCAST? */ + if ((so->opt.flags & CAN_ISOTP_SF_BROADCAST) && +- (size > so->tx.ll_dl - SF_PCI_SZ4 - ae - off)) +- return -EINVAL; ++ (size > so->tx.ll_dl - SF_PCI_SZ4 - ae - off)) { ++ err = -EINVAL; ++ goto err_out; ++ } + + err = memcpy_from_msg(so->tx.buf, msg, size); + if (err < 0) +- return err; ++ goto err_out; + + dev = dev_get_by_index(sock_net(sk), so->ifindex); +- if (!dev) +- return -ENXIO; ++ if (!dev) { ++ err = -ENXIO; ++ goto err_out; ++ } + + skb = sock_alloc_send_skb(sk, so->ll.mtu + sizeof(struct can_skb_priv), + msg->msg_flags & MSG_DONTWAIT, &err); + if (!skb) { + dev_put(dev); +- return err; ++ goto err_out; + } + + can_skb_reserve(skb); + can_skb_prv(skb)->ifindex = dev->ifindex; + can_skb_prv(skb)->skbcnt = 0; + +- so->tx.state = ISOTP_SENDING; + so->tx.len = size; + so->tx.idx = 0; + +@@ -956,7 +965,7 @@ static int isotp_sendmsg(struct socket * + if (err) { + pr_notice_once("can-isotp: %s: can_send_ret %pe\n", + __func__, ERR_PTR(err)); +- return err; ++ goto err_out; + } + + if (wait_tx_done) { +@@ -968,6 +977,13 @@ static int isotp_sendmsg(struct socket * + } + + return size; ++ ++err_out: ++ so->tx.state = old_state; ++ if (so->tx.state == ISOTP_IDLE) ++ wake_up_interruptible(&so->wait); ++ ++ return err; + } + + static int isotp_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, diff --git a/queue-5.14/can-j1939-j1939_netdev_start-fix-uaf-for-rx_kref-of-j1939_priv.patch b/queue-5.14/can-j1939-j1939_netdev_start-fix-uaf-for-rx_kref-of-j1939_priv.patch new file mode 100644 index 00000000000..4c4487f5f6c --- /dev/null +++ b/queue-5.14/can-j1939-j1939_netdev_start-fix-uaf-for-rx_kref-of-j1939_priv.patch @@ -0,0 +1,79 @@ +From d9d52a3ebd284882f5562c88e55991add5d01586 Mon Sep 17 00:00:00 2001 +From: Ziyang Xuan +Date: Sun, 26 Sep 2021 18:47:57 +0800 +Subject: can: j1939: j1939_netdev_start(): fix UAF for rx_kref of j1939_priv + +From: Ziyang Xuan + +commit d9d52a3ebd284882f5562c88e55991add5d01586 upstream. + +It will trigger UAF for rx_kref of j1939_priv as following. + + cpu0 cpu1 +j1939_sk_bind(socket0, ndev0, ...) +j1939_netdev_start + j1939_sk_bind(socket1, ndev0, ...) + j1939_netdev_start +j1939_priv_set + j1939_priv_get_by_ndev_locked +j1939_jsk_add +..... +j1939_netdev_stop +kref_put_lock(&priv->rx_kref, ...) + kref_get(&priv->rx_kref, ...) + REFCOUNT_WARN("addition on 0;...") + +==================================================== +refcount_t: addition on 0; use-after-free. +WARNING: CPU: 1 PID: 20874 at lib/refcount.c:25 refcount_warn_saturate+0x169/0x1e0 +RIP: 0010:refcount_warn_saturate+0x169/0x1e0 +Call Trace: + j1939_netdev_start+0x68b/0x920 + j1939_sk_bind+0x426/0xeb0 + ? security_socket_bind+0x83/0xb0 + +The rx_kref's kref_get() and kref_put() should use j1939_netdev_lock to +protect. + +Fixes: 9d71dd0c70099 ("can: add support of SAE J1939 protocol") +Link: https://lore.kernel.org/all/20210926104757.2021540-1-william.xuanziyang@huawei.com +Cc: stable@vger.kernel.org +Reported-by: syzbot+85d9878b19c94f9019ad@syzkaller.appspotmail.com +Signed-off-by: Ziyang Xuan +Acked-by: Oleksij Rempel +Signed-off-by: Marc Kleine-Budde +Signed-off-by: Greg Kroah-Hartman +--- + net/can/j1939/main.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/net/can/j1939/main.c ++++ b/net/can/j1939/main.c +@@ -249,11 +249,14 @@ struct j1939_priv *j1939_netdev_start(st + struct j1939_priv *priv, *priv_new; + int ret; + +- priv = j1939_priv_get_by_ndev(ndev); ++ spin_lock(&j1939_netdev_lock); ++ priv = j1939_priv_get_by_ndev_locked(ndev); + if (priv) { + kref_get(&priv->rx_kref); ++ spin_unlock(&j1939_netdev_lock); + return priv; + } ++ spin_unlock(&j1939_netdev_lock); + + priv = j1939_priv_create(ndev); + if (!priv) +@@ -269,10 +272,10 @@ struct j1939_priv *j1939_netdev_start(st + /* Someone was faster than us, use their priv and roll + * back our's. + */ ++ kref_get(&priv_new->rx_kref); + spin_unlock(&j1939_netdev_lock); + dev_put(ndev); + kfree(priv); +- kref_get(&priv_new->rx_kref); + return priv_new; + } + j1939_priv_set(ndev, priv); diff --git a/queue-5.14/can-j1939-j1939_tp_rxtimer-fix-errant-alert-in-j1939_tp_rxtimer.patch b/queue-5.14/can-j1939-j1939_tp_rxtimer-fix-errant-alert-in-j1939_tp_rxtimer.patch new file mode 100644 index 00000000000..ada71ba1c22 --- /dev/null +++ b/queue-5.14/can-j1939-j1939_tp_rxtimer-fix-errant-alert-in-j1939_tp_rxtimer.patch @@ -0,0 +1,46 @@ +From b504a884f6b5a77dac7d580ffa08e482f70d1a30 Mon Sep 17 00:00:00 2001 +From: Ziyang Xuan +Date: Mon, 6 Sep 2021 17:42:19 +0800 +Subject: can: j1939: j1939_tp_rxtimer(): fix errant alert in j1939_tp_rxtimer + +From: Ziyang Xuan + +commit b504a884f6b5a77dac7d580ffa08e482f70d1a30 upstream. + +When the session state is J1939_SESSION_DONE, j1939_tp_rxtimer() will +give an alert "rx timeout, send abort", but do nothing actually. Move +the alert into session active judgment condition, it is more +reasonable. + +One of the scenarios is that j1939_tp_rxtimer() execute followed by +j1939_xtp_rx_abort_one(). After j1939_xtp_rx_abort_one(), the session +state is J1939_SESSION_DONE, then j1939_tp_rxtimer() give an alert. + +Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") +Link: https://lore.kernel.org/all/20210906094219.95924-1-william.xuanziyang@huawei.com +Cc: stable@vger.kernel.org +Signed-off-by: Ziyang Xuan +Acked-by: Oleksij Rempel +Signed-off-by: Marc Kleine-Budde +Signed-off-by: Greg Kroah-Hartman +--- + net/can/j1939/transport.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +--- a/net/can/j1939/transport.c ++++ b/net/can/j1939/transport.c +@@ -1230,12 +1230,11 @@ static enum hrtimer_restart j1939_tp_rxt + session->err = -ETIME; + j1939_session_deactivate(session); + } else { +- netdev_alert(priv->ndev, "%s: 0x%p: rx timeout, send abort\n", +- __func__, session); +- + j1939_session_list_lock(session->priv); + if (session->state >= J1939_SESSION_ACTIVE && + session->state < J1939_SESSION_ACTIVE_MAX) { ++ netdev_alert(priv->ndev, "%s: 0x%p: rx timeout, send abort\n", ++ __func__, session); + j1939_session_get(session); + hrtimer_start(&session->rxtimer, + ms_to_ktime(J1939_XTP_ABORT_TIMEOUT_MS), diff --git a/queue-5.14/can-j1939-j1939_xtp_rx_dat_one-cancel-session-if-receive-tp.dt-with-error-length.patch b/queue-5.14/can-j1939-j1939_xtp_rx_dat_one-cancel-session-if-receive-tp.dt-with-error-length.patch new file mode 100644 index 00000000000..313cfb84211 --- /dev/null +++ b/queue-5.14/can-j1939-j1939_xtp_rx_dat_one-cancel-session-if-receive-tp.dt-with-error-length.patch @@ -0,0 +1,55 @@ +From 379743985ab6cfe2cbd32067cf4ed497baca6d06 Mon Sep 17 00:00:00 2001 +From: Zhang Changzhong +Date: Thu, 30 Sep 2021 11:33:20 +0800 +Subject: can: j1939: j1939_xtp_rx_dat_one(): cancel session if receive TP.DT with error length + +From: Zhang Changzhong + +commit 379743985ab6cfe2cbd32067cf4ed497baca6d06 upstream. + +According to SAE-J1939-21, the data length of TP.DT must be 8 bytes, so +cancel session when receive unexpected TP.DT message. + +Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") +Link: https://lore.kernel.org/all/1632972800-45091-1-git-send-email-zhangchangzhong@huawei.com +Cc: stable@vger.kernel.org +Signed-off-by: Zhang Changzhong +Acked-by: Oleksij Rempel +Signed-off-by: Marc Kleine-Budde +Signed-off-by: Greg Kroah-Hartman +--- + net/can/j1939/transport.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/net/can/j1939/transport.c ++++ b/net/can/j1939/transport.c +@@ -1770,6 +1770,7 @@ static void j1939_xtp_rx_dpo(struct j193 + static void j1939_xtp_rx_dat_one(struct j1939_session *session, + struct sk_buff *skb) + { ++ enum j1939_xtp_abort abort = J1939_XTP_ABORT_FAULT; + struct j1939_priv *priv = session->priv; + struct j1939_sk_buff_cb *skcb; + struct sk_buff *se_skb = NULL; +@@ -1784,9 +1785,11 @@ static void j1939_xtp_rx_dat_one(struct + + skcb = j1939_skb_to_cb(skb); + dat = skb->data; +- if (skb->len <= 1) ++ if (skb->len != 8) { + /* makes no sense */ ++ abort = J1939_XTP_ABORT_UNEXPECTED_DATA; + goto out_session_cancel; ++ } + + switch (session->last_cmd) { + case 0xff: +@@ -1884,7 +1887,7 @@ static void j1939_xtp_rx_dat_one(struct + out_session_cancel: + kfree_skb(se_skb); + j1939_session_timers_cancel(session); +- j1939_session_cancel(session, J1939_XTP_ABORT_FAULT); ++ j1939_session_cancel(session, abort); + j1939_session_put(session); + } + diff --git a/queue-5.14/can-j1939-j1939_xtp_rx_rts_session_new-abort-tp-less-than-9-bytes.patch b/queue-5.14/can-j1939-j1939_xtp_rx_rts_session_new-abort-tp-less-than-9-bytes.patch new file mode 100644 index 00000000000..2a35b64a095 --- /dev/null +++ b/queue-5.14/can-j1939-j1939_xtp_rx_rts_session_new-abort-tp-less-than-9-bytes.patch @@ -0,0 +1,55 @@ +From a4fbe70c5cb746441d56b28cf88161d9e0e25378 Mon Sep 17 00:00:00 2001 +From: Zhang Changzhong +Date: Thu, 14 Oct 2021 17:26:40 +0800 +Subject: can: j1939: j1939_xtp_rx_rts_session_new(): abort TP less than 9 bytes + +From: Zhang Changzhong + +commit a4fbe70c5cb746441d56b28cf88161d9e0e25378 upstream. + +The receiver should abort TP if 'total message size' in TP.CM_RTS and +TP.CM_BAM is less than 9 or greater than 1785 [1], but currently the +j1939 stack only checks the upper bound and the receiver will accept +the following broadcast message: + + vcan1 18ECFF00 [8] 20 08 00 02 FF 00 23 01 + vcan1 18EBFF00 [8] 01 00 00 00 00 00 00 00 + vcan1 18EBFF00 [8] 02 00 FF FF FF FF FF FF + +This patch adds check for the lower bound and abort illegal TP. + +[1] SAE-J1939-82 A.3.4 Row 2 and A.3.6 Row 6. + +Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") +Link: https://lore.kernel.org/all/1634203601-3460-1-git-send-email-zhangchangzhong@huawei.com +Cc: stable@vger.kernel.org +Signed-off-by: Zhang Changzhong +Acked-by: Oleksij Rempel +Signed-off-by: Marc Kleine-Budde +Signed-off-by: Greg Kroah-Hartman +--- + net/can/j1939/j1939-priv.h | 1 + + net/can/j1939/transport.c | 2 ++ + 2 files changed, 3 insertions(+) + +--- a/net/can/j1939/j1939-priv.h ++++ b/net/can/j1939/j1939-priv.h +@@ -326,6 +326,7 @@ int j1939_session_activate(struct j1939_ + void j1939_tp_schedule_txtimer(struct j1939_session *session, int msec); + void j1939_session_timers_cancel(struct j1939_session *session); + ++#define J1939_MIN_TP_PACKET_SIZE 9 + #define J1939_MAX_TP_PACKET_SIZE (7 * 0xff) + #define J1939_MAX_ETP_PACKET_SIZE (7 * 0x00ffffff) + +--- a/net/can/j1939/transport.c ++++ b/net/can/j1939/transport.c +@@ -1596,6 +1596,8 @@ j1939_session *j1939_xtp_rx_rts_session_ + abort = J1939_XTP_ABORT_FAULT; + else if (len > priv->tp_max_packet_size) + abort = J1939_XTP_ABORT_RESOURCE; ++ else if (len < J1939_MIN_TP_PACKET_SIZE) ++ abort = J1939_XTP_ABORT_FAULT; + } + + if (abort != J1939_XTP_NO_ABORT) { diff --git a/queue-5.14/can-peak_pci-peak_pci_remove-fix-uaf.patch b/queue-5.14/can-peak_pci-peak_pci_remove-fix-uaf.patch new file mode 100644 index 00000000000..0ce0fab9d53 --- /dev/null +++ b/queue-5.14/can-peak_pci-peak_pci_remove-fix-uaf.patch @@ -0,0 +1,62 @@ +From 949fe9b35570361bc6ee2652f89a0561b26eec98 Mon Sep 17 00:00:00 2001 +From: Zheyu Ma +Date: Thu, 14 Oct 2021 06:28:33 +0000 +Subject: can: peak_pci: peak_pci_remove(): fix UAF + +From: Zheyu Ma + +commit 949fe9b35570361bc6ee2652f89a0561b26eec98 upstream. + +When remove the module peek_pci, referencing 'chan' again after +releasing 'dev' will cause UAF. + +Fix this by releasing 'dev' later. + +The following log reveals it: + +[ 35.961814 ] BUG: KASAN: use-after-free in peak_pci_remove+0x16f/0x270 [peak_pci] +[ 35.963414 ] Read of size 8 at addr ffff888136998ee8 by task modprobe/5537 +[ 35.965513 ] Call Trace: +[ 35.965718 ] dump_stack_lvl+0xa8/0xd1 +[ 35.966028 ] print_address_description+0x87/0x3b0 +[ 35.966420 ] kasan_report+0x172/0x1c0 +[ 35.966725 ] ? peak_pci_remove+0x16f/0x270 [peak_pci] +[ 35.967137 ] ? trace_irq_enable_rcuidle+0x10/0x170 +[ 35.967529 ] ? peak_pci_remove+0x16f/0x270 [peak_pci] +[ 35.967945 ] __asan_report_load8_noabort+0x14/0x20 +[ 35.968346 ] peak_pci_remove+0x16f/0x270 [peak_pci] +[ 35.968752 ] pci_device_remove+0xa9/0x250 + +Fixes: e6d9c80b7ca1 ("can: peak_pci: add support of some new PEAK-System PCI cards") +Link: https://lore.kernel.org/all/1634192913-15639-1-git-send-email-zheyuma97@gmail.com +Cc: stable@vger.kernel.org +Signed-off-by: Zheyu Ma +Signed-off-by: Marc Kleine-Budde +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/can/sja1000/peak_pci.c | 9 ++++----- + 1 file changed, 4 insertions(+), 5 deletions(-) + +--- a/drivers/net/can/sja1000/peak_pci.c ++++ b/drivers/net/can/sja1000/peak_pci.c +@@ -729,16 +729,15 @@ static void peak_pci_remove(struct pci_d + struct net_device *prev_dev = chan->prev_dev; + + dev_info(&pdev->dev, "removing device %s\n", dev->name); ++ /* do that only for first channel */ ++ if (!prev_dev && chan->pciec_card) ++ peak_pciec_remove(chan->pciec_card); + unregister_sja1000dev(dev); + free_sja1000dev(dev); + dev = prev_dev; + +- if (!dev) { +- /* do that only for first channel */ +- if (chan->pciec_card) +- peak_pciec_remove(chan->pciec_card); ++ if (!dev) + break; +- } + priv = netdev_priv(dev); + chan = priv->priv; + } diff --git a/queue-5.14/can-peak_usb-pcan_usb_fd_decode_status-fix-back-to-error_active-state-notification.patch b/queue-5.14/can-peak_usb-pcan_usb_fd_decode_status-fix-back-to-error_active-state-notification.patch new file mode 100644 index 00000000000..2b73a0129ac --- /dev/null +++ b/queue-5.14/can-peak_usb-pcan_usb_fd_decode_status-fix-back-to-error_active-state-notification.patch @@ -0,0 +1,38 @@ +From 3d031abc7e7249573148871180c28ecedb5e27df Mon Sep 17 00:00:00 2001 +From: Stephane Grosjean +Date: Wed, 29 Sep 2021 16:21:10 +0200 +Subject: can: peak_usb: pcan_usb_fd_decode_status(): fix back to ERROR_ACTIVE state notification + +From: Stephane Grosjean + +commit 3d031abc7e7249573148871180c28ecedb5e27df upstream. + +This corrects the lack of notification of a return to ERROR_ACTIVE +state for USB - CANFD devices from PEAK-System. + +Fixes: 0a25e1f4f185 ("can: peak_usb: add support for PEAK new CANFD USB adapters") +Link: https://lore.kernel.org/all/20210929142111.55757-1-s.grosjean@peak-system.com +Cc: stable@vger.kernel.org +Signed-off-by: Stephane Grosjean +Signed-off-by: Marc Kleine-Budde +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/can/usb/peak_usb/pcan_usb_fd.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +--- a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c ++++ b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c +@@ -551,11 +551,10 @@ static int pcan_usb_fd_decode_status(str + } else if (sm->channel_p_w_b & PUCAN_BUS_WARNING) { + new_state = CAN_STATE_ERROR_WARNING; + } else { +- /* no error bit (so, no error skb, back to active state) */ +- dev->can.state = CAN_STATE_ERROR_ACTIVE; ++ /* back to (or still in) ERROR_ACTIVE state */ ++ new_state = CAN_STATE_ERROR_ACTIVE; + pdev->bec.txerr = 0; + pdev->bec.rxerr = 0; +- return 0; + } + + /* state hasn't changed */ diff --git a/queue-5.14/can-rcar_can-fix-suspend-resume.patch b/queue-5.14/can-rcar_can-fix-suspend-resume.patch new file mode 100644 index 00000000000..5f319f5c9e2 --- /dev/null +++ b/queue-5.14/can-rcar_can-fix-suspend-resume.patch @@ -0,0 +1,68 @@ +From f7c05c3987dcfde9a4e8c2d533db013fabebca0d Mon Sep 17 00:00:00 2001 +From: Yoshihiro Shimoda +Date: Fri, 24 Sep 2021 16:55:56 +0900 +Subject: can: rcar_can: fix suspend/resume + +From: Yoshihiro Shimoda + +commit f7c05c3987dcfde9a4e8c2d533db013fabebca0d upstream. + +If the driver was not opened, rcar_can_suspend() should not call +clk_disable() because the clock was not enabled. + +Fixes: fd1159318e55 ("can: add Renesas R-Car CAN driver") +Link: https://lore.kernel.org/all/20210924075556.223685-1-yoshihiro.shimoda.uh@renesas.com +Cc: stable@vger.kernel.org +Signed-off-by: Yoshihiro Shimoda +Tested-by: Ayumi Nakamichi +Reviewed-by: Ulrich Hecht +Tested-by: Biju Das +Signed-off-by: Marc Kleine-Budde +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/can/rcar/rcar_can.c | 20 ++++++++++++-------- + 1 file changed, 12 insertions(+), 8 deletions(-) + +--- a/drivers/net/can/rcar/rcar_can.c ++++ b/drivers/net/can/rcar/rcar_can.c +@@ -846,10 +846,12 @@ static int __maybe_unused rcar_can_suspe + struct rcar_can_priv *priv = netdev_priv(ndev); + u16 ctlr; + +- if (netif_running(ndev)) { +- netif_stop_queue(ndev); +- netif_device_detach(ndev); +- } ++ if (!netif_running(ndev)) ++ return 0; ++ ++ netif_stop_queue(ndev); ++ netif_device_detach(ndev); ++ + ctlr = readw(&priv->regs->ctlr); + ctlr |= RCAR_CAN_CTLR_CANM_HALT; + writew(ctlr, &priv->regs->ctlr); +@@ -868,6 +870,9 @@ static int __maybe_unused rcar_can_resum + u16 ctlr; + int err; + ++ if (!netif_running(ndev)) ++ return 0; ++ + err = clk_enable(priv->clk); + if (err) { + netdev_err(ndev, "clk_enable() failed, error %d\n", err); +@@ -881,10 +886,9 @@ static int __maybe_unused rcar_can_resum + writew(ctlr, &priv->regs->ctlr); + priv->can.state = CAN_STATE_ERROR_ACTIVE; + +- if (netif_running(ndev)) { +- netif_device_attach(ndev); +- netif_start_queue(ndev); +- } ++ netif_device_attach(ndev); ++ netif_start_queue(ndev); ++ + return 0; + } + diff --git a/queue-5.14/ceph-fix-handling-of-meta-errors.patch b/queue-5.14/ceph-fix-handling-of-meta-errors.patch new file mode 100644 index 00000000000..e0fbce1c58e --- /dev/null +++ b/queue-5.14/ceph-fix-handling-of-meta-errors.patch @@ -0,0 +1,150 @@ +From 1bd85aa65d0e7b5e4d09240f492f37c569fdd431 Mon Sep 17 00:00:00 2001 +From: Jeff Layton +Date: Thu, 7 Oct 2021 14:19:49 -0400 +Subject: ceph: fix handling of "meta" errors + +From: Jeff Layton + +commit 1bd85aa65d0e7b5e4d09240f492f37c569fdd431 upstream. + +Currently, we check the wb_err too early for directories, before all of +the unsafe child requests have been waited on. In order to fix that we +need to check the mapping->wb_err later nearer to the end of ceph_fsync. + +We also have an overly-complex method for tracking errors after +blocklisting. The errors recorded in cleanup_session_requests go to a +completely separate field in the inode, but we end up reporting them the +same way we would for any other error (in fsync). + +There's no real benefit to tracking these errors in two different +places, since the only reporting mechanism for them is in fsync, and +we'd need to advance them both every time. + +Given that, we can just remove i_meta_err, and convert the places that +used it to instead just use mapping->wb_err instead. That also fixes +the original problem by ensuring that we do a check_and_advance of the +wb_err at the end of the fsync op. + +Cc: stable@vger.kernel.org +URL: https://tracker.ceph.com/issues/52864 +Reported-by: Patrick Donnelly +Signed-off-by: Jeff Layton +Reviewed-by: Xiubo Li +Signed-off-by: Ilya Dryomov +Signed-off-by: Greg Kroah-Hartman +--- + fs/ceph/caps.c | 12 +++--------- + fs/ceph/file.c | 1 - + fs/ceph/inode.c | 2 -- + fs/ceph/mds_client.c | 17 +++++------------ + fs/ceph/super.h | 3 --- + 5 files changed, 8 insertions(+), 27 deletions(-) + +--- a/fs/ceph/caps.c ++++ b/fs/ceph/caps.c +@@ -2264,7 +2264,6 @@ static int unsafe_request_wait(struct in + + int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync) + { +- struct ceph_file_info *fi = file->private_data; + struct inode *inode = file->f_mapping->host; + struct ceph_inode_info *ci = ceph_inode(inode); + u64 flush_tid; +@@ -2299,14 +2298,9 @@ int ceph_fsync(struct file *file, loff_t + if (err < 0) + ret = err; + +- if (errseq_check(&ci->i_meta_err, READ_ONCE(fi->meta_err))) { +- spin_lock(&file->f_lock); +- err = errseq_check_and_advance(&ci->i_meta_err, +- &fi->meta_err); +- spin_unlock(&file->f_lock); +- if (err < 0) +- ret = err; +- } ++ err = file_check_and_advance_wb_err(file); ++ if (err < 0) ++ ret = err; + out: + dout("fsync %p%s result=%d\n", inode, datasync ? " datasync" : "", ret); + return ret; +--- a/fs/ceph/file.c ++++ b/fs/ceph/file.c +@@ -233,7 +233,6 @@ static int ceph_init_file_info(struct in + + spin_lock_init(&fi->rw_contexts_lock); + INIT_LIST_HEAD(&fi->rw_contexts); +- fi->meta_err = errseq_sample(&ci->i_meta_err); + fi->filp_gen = READ_ONCE(ceph_inode_to_client(inode)->filp_gen); + + return 0; +--- a/fs/ceph/inode.c ++++ b/fs/ceph/inode.c +@@ -541,8 +541,6 @@ struct inode *ceph_alloc_inode(struct su + + ceph_fscache_inode_init(ci); + +- ci->i_meta_err = 0; +- + return &ci->vfs_inode; + } + +--- a/fs/ceph/mds_client.c ++++ b/fs/ceph/mds_client.c +@@ -1479,7 +1479,6 @@ static void cleanup_session_requests(str + { + struct ceph_mds_request *req; + struct rb_node *p; +- struct ceph_inode_info *ci; + + dout("cleanup_session_requests mds%d\n", session->s_mds); + mutex_lock(&mdsc->mutex); +@@ -1488,16 +1487,10 @@ static void cleanup_session_requests(str + struct ceph_mds_request, r_unsafe_item); + pr_warn_ratelimited(" dropping unsafe request %llu\n", + req->r_tid); +- if (req->r_target_inode) { +- /* dropping unsafe change of inode's attributes */ +- ci = ceph_inode(req->r_target_inode); +- errseq_set(&ci->i_meta_err, -EIO); +- } +- if (req->r_unsafe_dir) { +- /* dropping unsafe directory operation */ +- ci = ceph_inode(req->r_unsafe_dir); +- errseq_set(&ci->i_meta_err, -EIO); +- } ++ if (req->r_target_inode) ++ mapping_set_error(req->r_target_inode->i_mapping, -EIO); ++ if (req->r_unsafe_dir) ++ mapping_set_error(req->r_unsafe_dir->i_mapping, -EIO); + __unregister_request(mdsc, req); + } + /* zero r_attempts, so kick_requests() will re-send requests */ +@@ -1664,7 +1657,7 @@ static int remove_session_caps_cb(struct + spin_unlock(&mdsc->cap_dirty_lock); + + if (dirty_dropped) { +- errseq_set(&ci->i_meta_err, -EIO); ++ mapping_set_error(inode->i_mapping, -EIO); + + if (ci->i_wrbuffer_ref_head == 0 && + ci->i_wr_ref == 0 && +--- a/fs/ceph/super.h ++++ b/fs/ceph/super.h +@@ -430,8 +430,6 @@ struct ceph_inode_info { + #ifdef CONFIG_CEPH_FSCACHE + struct fscache_cookie *fscache; + #endif +- errseq_t i_meta_err; +- + struct inode vfs_inode; /* at end */ + }; + +@@ -775,7 +773,6 @@ struct ceph_file_info { + spinlock_t rw_contexts_lock; + struct list_head rw_contexts; + +- errseq_t meta_err; + u32 filp_gen; + atomic_t num_locks; + }; diff --git a/queue-5.14/ceph-skip-existing-superblocks-that-are-blocklisted-or-shut-down-when-mounting.patch b/queue-5.14/ceph-skip-existing-superblocks-that-are-blocklisted-or-shut-down-when-mounting.patch new file mode 100644 index 00000000000..0fdb94aef19 --- /dev/null +++ b/queue-5.14/ceph-skip-existing-superblocks-that-are-blocklisted-or-shut-down-when-mounting.patch @@ -0,0 +1,70 @@ +From 98d0a6fb7303a6f4a120b8b8ed05b86ff5db53e8 Mon Sep 17 00:00:00 2001 +From: Jeff Layton +Date: Thu, 30 Sep 2021 08:33:13 -0400 +Subject: ceph: skip existing superblocks that are blocklisted or shut down when mounting + +From: Jeff Layton + +commit 98d0a6fb7303a6f4a120b8b8ed05b86ff5db53e8 upstream. + +Currently when mounting, we may end up finding an existing superblock +that corresponds to a blocklisted MDS client. This means that the new +mount ends up being unusable. + +If we've found an existing superblock with a client that is already +blocklisted, and the client is not configured to recover on its own, +fail the match. Ditto if the superblock has been forcibly unmounted. + +While we're in here, also rename "other" to the more conventional "fsc". + +Cc: stable@vger.kernel.org +URL: https://bugzilla.redhat.com/show_bug.cgi?id=1901499 +Signed-off-by: Jeff Layton +Reviewed-by: Xiubo Li +Reviewed-by: Ilya Dryomov +Signed-off-by: Ilya Dryomov +Signed-off-by: Greg Kroah-Hartman +--- + fs/ceph/super.c | 17 ++++++++++++++--- + 1 file changed, 14 insertions(+), 3 deletions(-) + +--- a/fs/ceph/super.c ++++ b/fs/ceph/super.c +@@ -1002,16 +1002,16 @@ static int ceph_compare_super(struct sup + struct ceph_fs_client *new = fc->s_fs_info; + struct ceph_mount_options *fsopt = new->mount_options; + struct ceph_options *opt = new->client->options; +- struct ceph_fs_client *other = ceph_sb_to_client(sb); ++ struct ceph_fs_client *fsc = ceph_sb_to_client(sb); + + dout("ceph_compare_super %p\n", sb); + +- if (compare_mount_options(fsopt, opt, other)) { ++ if (compare_mount_options(fsopt, opt, fsc)) { + dout("monitor(s)/mount options don't match\n"); + return 0; + } + if ((opt->flags & CEPH_OPT_FSID) && +- ceph_fsid_compare(&opt->fsid, &other->client->fsid)) { ++ ceph_fsid_compare(&opt->fsid, &fsc->client->fsid)) { + dout("fsid doesn't match\n"); + return 0; + } +@@ -1019,6 +1019,17 @@ static int ceph_compare_super(struct sup + dout("flags differ\n"); + return 0; + } ++ ++ if (fsc->blocklisted && !ceph_test_mount_opt(fsc, CLEANRECOVER)) { ++ dout("client is blocklisted (and CLEANRECOVER is not set)\n"); ++ return 0; ++ } ++ ++ if (fsc->mount_state == CEPH_MOUNT_SHUTDOWN) { ++ dout("client has been forcibly unmounted\n"); ++ return 0; ++ } ++ + return 1; + } + diff --git a/queue-5.14/elfcore-correct-reference-to-config_uml.patch b/queue-5.14/elfcore-correct-reference-to-config_uml.patch new file mode 100644 index 00000000000..cc6e519ecac --- /dev/null +++ b/queue-5.14/elfcore-correct-reference-to-config_uml.patch @@ -0,0 +1,56 @@ +From b0e901280d9860a0a35055f220e8e457f300f40a Mon Sep 17 00:00:00 2001 +From: Lukas Bulwahn +Date: Mon, 18 Oct 2021 15:16:09 -0700 +Subject: elfcore: correct reference to CONFIG_UML + +From: Lukas Bulwahn + +commit b0e901280d9860a0a35055f220e8e457f300f40a upstream. + +Commit 6e7b64b9dd6d ("elfcore: fix building with clang") introduces +special handling for two architectures, ia64 and User Mode Linux. +However, the wrong name, i.e., CONFIG_UM, for the intended Kconfig +symbol for User-Mode Linux was used. + +Although the directory for User Mode Linux is ./arch/um; the Kconfig +symbol for this architecture is called CONFIG_UML. + +Luckily, ./scripts/checkkconfigsymbols.py warns on non-existing configs: + + UM + Referencing files: include/linux/elfcore.h + Similar symbols: UML, NUMA + +Correct the name of the config to the intended one. + +[akpm@linux-foundation.org: fix um/x86_64, per Catalin] + Link: https://lkml.kernel.org/r/20211006181119.2851441-1-catalin.marinas@arm.com + Link: https://lkml.kernel.org/r/YV6pejGzLy5ppEpt@arm.com + +Link: https://lkml.kernel.org/r/20211006082209.417-1-lukas.bulwahn@gmail.com +Fixes: 6e7b64b9dd6d ("elfcore: fix building with clang") +Signed-off-by: Lukas Bulwahn +Cc: Arnd Bergmann +Cc: Nathan Chancellor +Cc: Nick Desaulniers +Cc: Catalin Marinas +Cc: Barret Rhoden +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/elfcore.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/include/linux/elfcore.h ++++ b/include/linux/elfcore.h +@@ -109,7 +109,7 @@ static inline int elf_core_copy_task_fpr + #endif + } + +-#if defined(CONFIG_UM) || defined(CONFIG_IA64) ++#if (defined(CONFIG_UML) && defined(CONFIG_X86_32)) || defined(CONFIG_IA64) + /* + * These functions parameterize elf_core_dump in fs/binfmt_elf.c to write out + * extra segments containing the gate DSO contents. Dumping its diff --git a/queue-5.14/mm-mempolicy-do-not-allow-illegal-mpol_f_numa_balancing-mpol_local-in-mbind.patch b/queue-5.14/mm-mempolicy-do-not-allow-illegal-mpol_f_numa_balancing-mpol_local-in-mbind.patch new file mode 100644 index 00000000000..dd59d8f6a4e --- /dev/null +++ b/queue-5.14/mm-mempolicy-do-not-allow-illegal-mpol_f_numa_balancing-mpol_local-in-mbind.patch @@ -0,0 +1,120 @@ +From 6d2aec9e123bb9c49cb5c7fc654f25f81e688e8c Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Mon, 18 Oct 2021 15:15:49 -0700 +Subject: mm/mempolicy: do not allow illegal MPOL_F_NUMA_BALANCING | MPOL_LOCAL in mbind() + +From: Eric Dumazet + +commit 6d2aec9e123bb9c49cb5c7fc654f25f81e688e8c upstream. + +syzbot reported access to unitialized memory in mbind() [1] + +Issue came with commit bda420b98505 ("numa balancing: migrate on fault +among multiple bound nodes") + +This commit added a new bit in MPOL_MODE_FLAGS, but only checked valid +combination (MPOL_F_NUMA_BALANCING can only be used with MPOL_BIND) in +do_set_mempolicy() + +This patch moves the check in sanitize_mpol_flags() so that it is also +used by mbind() + + [1] + BUG: KMSAN: uninit-value in __mpol_equal+0x567/0x590 mm/mempolicy.c:2260 + __mpol_equal+0x567/0x590 mm/mempolicy.c:2260 + mpol_equal include/linux/mempolicy.h:105 [inline] + vma_merge+0x4a1/0x1e60 mm/mmap.c:1190 + mbind_range+0xcc8/0x1e80 mm/mempolicy.c:811 + do_mbind+0xf42/0x15f0 mm/mempolicy.c:1333 + kernel_mbind mm/mempolicy.c:1483 [inline] + __do_sys_mbind mm/mempolicy.c:1490 [inline] + __se_sys_mbind+0x437/0xb80 mm/mempolicy.c:1486 + __x64_sys_mbind+0x19d/0x200 mm/mempolicy.c:1486 + do_syscall_x64 arch/x86/entry/common.c:51 [inline] + do_syscall_64+0x54/0xd0 arch/x86/entry/common.c:82 + entry_SYSCALL_64_after_hwframe+0x44/0xae + + Uninit was created at: + slab_alloc_node mm/slub.c:3221 [inline] + slab_alloc mm/slub.c:3230 [inline] + kmem_cache_alloc+0x751/0xff0 mm/slub.c:3235 + mpol_new mm/mempolicy.c:293 [inline] + do_mbind+0x912/0x15f0 mm/mempolicy.c:1289 + kernel_mbind mm/mempolicy.c:1483 [inline] + __do_sys_mbind mm/mempolicy.c:1490 [inline] + __se_sys_mbind+0x437/0xb80 mm/mempolicy.c:1486 + __x64_sys_mbind+0x19d/0x200 mm/mempolicy.c:1486 + do_syscall_x64 arch/x86/entry/common.c:51 [inline] + do_syscall_64+0x54/0xd0 arch/x86/entry/common.c:82 + entry_SYSCALL_64_after_hwframe+0x44/0xae + ===================================================== + Kernel panic - not syncing: panic_on_kmsan set ... + CPU: 0 PID: 15049 Comm: syz-executor.0 Tainted: G B 5.15.0-rc2-syzkaller #0 + Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 + Call Trace: + __dump_stack lib/dump_stack.c:88 [inline] + dump_stack_lvl+0x1ff/0x28e lib/dump_stack.c:106 + dump_stack+0x25/0x28 lib/dump_stack.c:113 + panic+0x44f/0xdeb kernel/panic.c:232 + kmsan_report+0x2ee/0x300 mm/kmsan/report.c:186 + __msan_warning+0xd7/0x150 mm/kmsan/instrumentation.c:208 + __mpol_equal+0x567/0x590 mm/mempolicy.c:2260 + mpol_equal include/linux/mempolicy.h:105 [inline] + vma_merge+0x4a1/0x1e60 mm/mmap.c:1190 + mbind_range+0xcc8/0x1e80 mm/mempolicy.c:811 + do_mbind+0xf42/0x15f0 mm/mempolicy.c:1333 + kernel_mbind mm/mempolicy.c:1483 [inline] + __do_sys_mbind mm/mempolicy.c:1490 [inline] + __se_sys_mbind+0x437/0xb80 mm/mempolicy.c:1486 + __x64_sys_mbind+0x19d/0x200 mm/mempolicy.c:1486 + do_syscall_x64 arch/x86/entry/common.c:51 [inline] + do_syscall_64+0x54/0xd0 arch/x86/entry/common.c:82 + entry_SYSCALL_64_after_hwframe+0x44/0xae + +Link: https://lkml.kernel.org/r/20211001215630.810592-1-eric.dumazet@gmail.com +Fixes: bda420b98505 ("numa balancing: migrate on fault among multiple bound nodes") +Signed-off-by: Eric Dumazet +Reported-by: syzbot +Acked-by: Mel Gorman +Cc: "Huang, Ying" +Cc: Matthew Wilcox +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + mm/mempolicy.c | 16 +++++----------- + 1 file changed, 5 insertions(+), 11 deletions(-) + +--- a/mm/mempolicy.c ++++ b/mm/mempolicy.c +@@ -857,16 +857,6 @@ static long do_set_mempolicy(unsigned sh + goto out; + } + +- if (flags & MPOL_F_NUMA_BALANCING) { +- if (new && new->mode == MPOL_BIND) { +- new->flags |= (MPOL_F_MOF | MPOL_F_MORON); +- } else { +- ret = -EINVAL; +- mpol_put(new); +- goto out; +- } +- } +- + ret = mpol_set_nodemask(new, nodes, scratch); + if (ret) { + mpol_put(new); +@@ -1450,7 +1440,11 @@ static inline int sanitize_mpol_flags(in + return -EINVAL; + if ((*flags & MPOL_F_STATIC_NODES) && (*flags & MPOL_F_RELATIVE_NODES)) + return -EINVAL; +- ++ if (*flags & MPOL_F_NUMA_BALANCING) { ++ if (*mode != MPOL_BIND) ++ return -EINVAL; ++ *flags |= (MPOL_F_MOF | MPOL_F_MORON); ++ } + return 0; + } + diff --git a/queue-5.14/mm-secretmem-fix-null-page-mapping-dereference-in-page_is_secretmem.patch b/queue-5.14/mm-secretmem-fix-null-page-mapping-dereference-in-page_is_secretmem.patch new file mode 100644 index 00000000000..c128a595a11 --- /dev/null +++ b/queue-5.14/mm-secretmem-fix-null-page-mapping-dereference-in-page_is_secretmem.patch @@ -0,0 +1,66 @@ +From 79f9bc5843142b649575f887dccdf1c07ad75c20 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Mon, 18 Oct 2021 15:16:16 -0700 +Subject: mm/secretmem: fix NULL page->mapping dereference in page_is_secretmem() + +From: Sean Christopherson + +commit 79f9bc5843142b649575f887dccdf1c07ad75c20 upstream. + +Check for a NULL page->mapping before dereferencing the mapping in +page_is_secretmem(), as the page's mapping can be nullified while gup() +is running, e.g. by reclaim or truncation. + + BUG: kernel NULL pointer dereference, address: 0000000000000068 + #PF: supervisor read access in kernel mode + #PF: error_code(0x0000) - not-present page + PGD 0 P4D 0 + Oops: 0000 [#1] PREEMPT SMP NOPTI + CPU: 6 PID: 4173897 Comm: CPU 3/KVM Tainted: G W + RIP: 0010:internal_get_user_pages_fast+0x621/0x9d0 + Code: <48> 81 7a 68 80 08 04 bc 0f 85 21 ff ff 8 89 c7 be + RSP: 0018:ffffaa90087679b0 EFLAGS: 00010046 + RAX: ffffe3f37905b900 RBX: 00007f2dd561e000 RCX: ffffe3f37905b934 + RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffe3f37905b900 + ... + CR2: 0000000000000068 CR3: 00000004c5898003 CR4: 00000000001726e0 + Call Trace: + get_user_pages_fast_only+0x13/0x20 + hva_to_pfn+0xa9/0x3e0 + try_async_pf+0xa1/0x270 + direct_page_fault+0x113/0xad0 + kvm_mmu_page_fault+0x69/0x680 + vmx_handle_exit+0xe1/0x5d0 + kvm_arch_vcpu_ioctl_run+0xd81/0x1c70 + kvm_vcpu_ioctl+0x267/0x670 + __x64_sys_ioctl+0x83/0xa0 + do_syscall_64+0x56/0x80 + entry_SYSCALL_64_after_hwframe+0x44/0xae + +Link: https://lkml.kernel.org/r/20211007231502.3552715-1-seanjc@google.com +Fixes: 1507f51255c9 ("mm: introduce memfd_secret system call to create "secret" memory areas") +Signed-off-by: Sean Christopherson +Reported-by: Darrick J. Wong +Reported-by: Stephen +Tested-by: Darrick J. Wong +Reviewed-by: David Hildenbrand +Reviewed-by: Mike Rapoport +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/secretmem.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/include/linux/secretmem.h ++++ b/include/linux/secretmem.h +@@ -23,7 +23,7 @@ static inline bool page_is_secretmem(str + mapping = (struct address_space *) + ((unsigned long)page->mapping & ~PAGE_MAPPING_FLAGS); + +- if (mapping != page->mapping) ++ if (!mapping || mapping != page->mapping) + return false; + + return mapping->a_ops == &secretmem_aops; diff --git a/queue-5.14/mm-userfaultfd-selftests-fix-memory-corruption-with-thp-enabled.patch b/queue-5.14/mm-userfaultfd-selftests-fix-memory-corruption-with-thp-enabled.patch new file mode 100644 index 00000000000..b6435cb833b --- /dev/null +++ b/queue-5.14/mm-userfaultfd-selftests-fix-memory-corruption-with-thp-enabled.patch @@ -0,0 +1,116 @@ +From 8913970c19915bbe773d97d42989cd85b7fdc098 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Mon, 18 Oct 2021 15:15:22 -0700 +Subject: mm/userfaultfd: selftests: fix memory corruption with thp enabled + +From: Peter Xu + +commit 8913970c19915bbe773d97d42989cd85b7fdc098 upstream. + +In RHEL's gating selftests we've encountered memory corruption in the +uffd event test even with upstream kernel: + + # ./userfaultfd anon 128 4 + nr_pages: 32768, nr_pages_per_cpu: 32768 + bounces: 3, mode: rnd racing read, userfaults: 6240 missing (6240) 14729 wp (14729) + bounces: 2, mode: racing read, userfaults: 1444 missing (1444) 28877 wp (28877) + bounces: 1, mode: rnd read, userfaults: 6055 missing (6055) 14699 wp (14699) + bounces: 0, mode: read, userfaults: 82 missing (82) 25196 wp (25196) + testing uffd-wp with pagemap (pgsize=4096): done + testing uffd-wp with pagemap (pgsize=2097152): done + testing events (fork, remap, remove): ERROR: nr 32427 memory corruption 0 1 (errno=0, line=963) + ERROR: faulting process failed (errno=0, line=1117) + +It can be easily reproduced when global thp enabled, which is the +default for RHEL. + +It's also known as a side effect of commit 0db282ba2c12 ("selftest: use +mmap instead of posix_memalign to allocate memory", 2021-07-23), which +is imho right itself on using mmap() to make sure the addresses will be +untagged even on arm. + +The problem is, for each test we allocate buffers using two +allocate_area() calls. We assumed these two buffers won't affect each +other, however they could, because mmap() could have found that the two +buffers are near each other and having the same VMA flags, so they got +merged into one VMA. + +It won't be a big problem if thp is not enabled, but when thp is +agressively enabled it means when initializing the src buffer it could +accidentally setup part of the dest buffer too when there's a shared THP +that overlaps the two regions. Then some of the dest buffer won't be +able to be trapped by userfaultfd missing mode, then it'll cause memory +corruption as described. + +To fix it, do release_pages() after initializing the src buffer. + +Since the previous two release_pages() calls are after +uffd_test_ctx_clear() which will unmap all the buffers anyway (which is +stronger than release pages; as unmap() also tear town pgtables), drop +them as they shouldn't really be anything useful. + +We can mark the Fixes tag upon 0db282ba2c12 as it's reported to only +happen there, however the real "Fixes" IMHO should be 8ba6e8640844, as +before that commit we'll always do explicit release_pages() before +registration of uffd, and 8ba6e8640844 changed that logic by adding +extra unmap/map and we didn't release the pages at the right place. +Meanwhile I don't have a solid glue anyway on whether posix_memalign() +could always avoid triggering this bug, hence it's safer to attach this +fix to commit 8ba6e8640844. + +Link: https://lkml.kernel.org/r/20210923232512.210092-1-peterx@redhat.com +Fixes: 8ba6e8640844 ("userfaultfd/selftests: reinitialize test context in each test") +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1994931 +Signed-off-by: Peter Xu +Reported-by: Li Wang +Tested-by: Li Wang +Reviewed-by: Axel Rasmussen +Cc: Andrea Arcangeli +Cc: Nadav Amit +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/vm/userfaultfd.c | 23 ++++++++++++++++++++--- + 1 file changed, 20 insertions(+), 3 deletions(-) + +--- a/tools/testing/selftests/vm/userfaultfd.c ++++ b/tools/testing/selftests/vm/userfaultfd.c +@@ -414,9 +414,6 @@ static void uffd_test_ctx_init_ext(uint6 + uffd_test_ops->allocate_area((void **)&area_src); + uffd_test_ops->allocate_area((void **)&area_dst); + +- uffd_test_ops->release_pages(area_src); +- uffd_test_ops->release_pages(area_dst); +- + userfaultfd_open(features); + + count_verify = malloc(nr_pages * sizeof(unsigned long long)); +@@ -437,6 +434,26 @@ static void uffd_test_ctx_init_ext(uint6 + *(area_count(area_src, nr) + 1) = 1; + } + ++ /* ++ * After initialization of area_src, we must explicitly release pages ++ * for area_dst to make sure it's fully empty. Otherwise we could have ++ * some area_dst pages be errornously initialized with zero pages, ++ * hence we could hit memory corruption later in the test. ++ * ++ * One example is when THP is globally enabled, above allocate_area() ++ * calls could have the two areas merged into a single VMA (as they ++ * will have the same VMA flags so they're mergeable). When we ++ * initialize the area_src above, it's possible that some part of ++ * area_dst could have been faulted in via one huge THP that will be ++ * shared between area_src and area_dst. It could cause some of the ++ * area_dst won't be trapped by missing userfaults. ++ * ++ * This release_pages() will guarantee even if that happened, we'll ++ * proactively split the thp and drop any accidentally initialized ++ * pages within area_dst. ++ */ ++ uffd_test_ops->release_pages(area_dst); ++ + pipefd = malloc(sizeof(int) * nr_cpus * 2); + if (!pipefd) + err("pipefd"); diff --git a/queue-5.14/net-dsa-mt7530-correct-ds-num_ports.patch b/queue-5.14/net-dsa-mt7530-correct-ds-num_ports.patch new file mode 100644 index 00000000000..b22462e64f0 --- /dev/null +++ b/queue-5.14/net-dsa-mt7530-correct-ds-num_ports.patch @@ -0,0 +1,54 @@ +From 342afce10d6f61c443c95e244f812d4766f73f53 Mon Sep 17 00:00:00 2001 +From: DENG Qingfang +Date: Sat, 16 Oct 2021 14:24:14 +0800 +Subject: net: dsa: mt7530: correct ds->num_ports + +From: DENG Qingfang + +commit 342afce10d6f61c443c95e244f812d4766f73f53 upstream. + +Setting ds->num_ports to DSA_MAX_PORTS made DSA core allocate unnecessary +dsa_port's and call mt7530_port_disable for non-existent ports. + +Set it to MT7530_NUM_PORTS to fix that, and dsa_is_user_port check in +port_enable/disable is no longer required. + +Cc: stable@vger.kernel.org +Signed-off-by: DENG Qingfang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/dsa/mt7530.c | 8 +------- + 1 file changed, 1 insertion(+), 7 deletions(-) + +--- a/drivers/net/dsa/mt7530.c ++++ b/drivers/net/dsa/mt7530.c +@@ -1031,9 +1031,6 @@ mt7530_port_enable(struct dsa_switch *ds + { + struct mt7530_priv *priv = ds->priv; + +- if (!dsa_is_user_port(ds, port)) +- return 0; +- + mutex_lock(&priv->reg_mutex); + + /* Allow the user port gets connected to the cpu port and also +@@ -1056,9 +1053,6 @@ mt7530_port_disable(struct dsa_switch *d + { + struct mt7530_priv *priv = ds->priv; + +- if (!dsa_is_user_port(ds, port)) +- return; +- + mutex_lock(&priv->reg_mutex); + + /* Clear up all port matrix which could be restored in the next +@@ -3132,7 +3126,7 @@ mt7530_probe(struct mdio_device *mdiodev + return -ENOMEM; + + priv->ds->dev = &mdiodev->dev; +- priv->ds->num_ports = DSA_MAX_PORTS; ++ priv->ds->num_ports = MT7530_NUM_PORTS; + + /* Use medatek,mcm property to distinguish hardware type that would + * casues a little bit differences on power-on sequence. diff --git a/queue-5.14/ocfs2-fix-data-corruption-after-conversion-from-inline-format.patch b/queue-5.14/ocfs2-fix-data-corruption-after-conversion-from-inline-format.patch new file mode 100644 index 00000000000..553c2001268 --- /dev/null +++ b/queue-5.14/ocfs2-fix-data-corruption-after-conversion-from-inline-format.patch @@ -0,0 +1,179 @@ +From 5314454ea3ff6fc746eaf71b9a7ceebed52888fa Mon Sep 17 00:00:00 2001 +From: Jan Kara +Date: Mon, 18 Oct 2021 15:15:39 -0700 +Subject: ocfs2: fix data corruption after conversion from inline format + +From: Jan Kara + +commit 5314454ea3ff6fc746eaf71b9a7ceebed52888fa upstream. + +Commit 6dbf7bb55598 ("fs: Don't invalidate page buffers in +block_write_full_page()") uncovered a latent bug in ocfs2 conversion +from inline inode format to a normal inode format. + +The code in ocfs2_convert_inline_data_to_extents() attempts to zero out +the whole cluster allocated for file data by grabbing, zeroing, and +dirtying all pages covering this cluster. However these pages are +beyond i_size, thus writeback code generally ignores these dirty pages +and no blocks were ever actually zeroed on the disk. + +This oversight was fixed by commit 693c241a5f6a ("ocfs2: No need to zero +pages past i_size.") for standard ocfs2 write path, inline conversion +path was apparently forgotten; the commit log also has a reasoning why +the zeroing actually is not needed. + +After commit 6dbf7bb55598, things became worse as writeback code stopped +invalidating buffers on pages beyond i_size and thus these pages end up +with clean PageDirty bit but with buffers attached to these pages being +still dirty. So when a file is converted from inline format, then +writeback triggers, and then the file is grown so that these pages +become valid, the invalid dirtiness state is preserved, +mark_buffer_dirty() does nothing on these pages (buffers are already +dirty) but page is never written back because it is clean. So data +written to these pages is lost once pages are reclaimed. + +Simple reproducer for the problem is: + + xfs_io -f -c "pwrite 0 2000" -c "pwrite 2000 2000" -c "fsync" \ + -c "pwrite 4000 2000" ocfs2_file + +After unmounting and mounting the fs again, you can observe that end of +'ocfs2_file' has lost its contents. + +Fix the problem by not doing the pointless zeroing during conversion +from inline format similarly as in the standard write path. + +[akpm@linux-foundation.org: fix whitespace, per Joseph] + +Link: https://lkml.kernel.org/r/20210930095405.21433-1-jack@suse.cz +Fixes: 6dbf7bb55598 ("fs: Don't invalidate page buffers in block_write_full_page()") +Signed-off-by: Jan Kara +Reviewed-by: Joseph Qi +Tested-by: Joseph Qi +Acked-by: Gang He +Cc: Mark Fasheh +Cc: Joel Becker +Cc: Junxiao Bi +Cc: Changwei Ge +Cc: Jun Piao +Cc: "Markov, Andrey" +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + fs/ocfs2/alloc.c | 46 ++++++++++++---------------------------------- + 1 file changed, 12 insertions(+), 34 deletions(-) + +--- a/fs/ocfs2/alloc.c ++++ b/fs/ocfs2/alloc.c +@@ -7045,7 +7045,7 @@ void ocfs2_set_inode_data_inline(struct + int ocfs2_convert_inline_data_to_extents(struct inode *inode, + struct buffer_head *di_bh) + { +- int ret, i, has_data, num_pages = 0; ++ int ret, has_data, num_pages = 0; + int need_free = 0; + u32 bit_off, num; + handle_t *handle; +@@ -7054,26 +7054,17 @@ int ocfs2_convert_inline_data_to_extents + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; + struct ocfs2_alloc_context *data_ac = NULL; +- struct page **pages = NULL; +- loff_t end = osb->s_clustersize; ++ struct page *page = NULL; + struct ocfs2_extent_tree et; + int did_quota = 0; + + has_data = i_size_read(inode) ? 1 : 0; + + if (has_data) { +- pages = kcalloc(ocfs2_pages_per_cluster(osb->sb), +- sizeof(struct page *), GFP_NOFS); +- if (pages == NULL) { +- ret = -ENOMEM; +- mlog_errno(ret); +- return ret; +- } +- + ret = ocfs2_reserve_clusters(osb, 1, &data_ac); + if (ret) { + mlog_errno(ret); +- goto free_pages; ++ goto out; + } + } + +@@ -7093,7 +7084,8 @@ int ocfs2_convert_inline_data_to_extents + } + + if (has_data) { +- unsigned int page_end; ++ unsigned int page_end = min_t(unsigned, PAGE_SIZE, ++ osb->s_clustersize); + u64 phys; + + ret = dquot_alloc_space_nodirty(inode, +@@ -7117,15 +7109,8 @@ int ocfs2_convert_inline_data_to_extents + */ + block = phys = ocfs2_clusters_to_blocks(inode->i_sb, bit_off); + +- /* +- * Non sparse file systems zero on extend, so no need +- * to do that now. +- */ +- if (!ocfs2_sparse_alloc(osb) && +- PAGE_SIZE < osb->s_clustersize) +- end = PAGE_SIZE; +- +- ret = ocfs2_grab_eof_pages(inode, 0, end, pages, &num_pages); ++ ret = ocfs2_grab_eof_pages(inode, 0, page_end, &page, ++ &num_pages); + if (ret) { + mlog_errno(ret); + need_free = 1; +@@ -7136,20 +7121,15 @@ int ocfs2_convert_inline_data_to_extents + * This should populate the 1st page for us and mark + * it up to date. + */ +- ret = ocfs2_read_inline_data(inode, pages[0], di_bh); ++ ret = ocfs2_read_inline_data(inode, page, di_bh); + if (ret) { + mlog_errno(ret); + need_free = 1; + goto out_unlock; + } + +- page_end = PAGE_SIZE; +- if (PAGE_SIZE > osb->s_clustersize) +- page_end = osb->s_clustersize; +- +- for (i = 0; i < num_pages; i++) +- ocfs2_map_and_dirty_page(inode, handle, 0, page_end, +- pages[i], i > 0, &phys); ++ ocfs2_map_and_dirty_page(inode, handle, 0, page_end, page, 0, ++ &phys); + } + + spin_lock(&oi->ip_lock); +@@ -7180,8 +7160,8 @@ int ocfs2_convert_inline_data_to_extents + } + + out_unlock: +- if (pages) +- ocfs2_unlock_and_free_pages(pages, num_pages); ++ if (page) ++ ocfs2_unlock_and_free_pages(&page, num_pages); + + out_commit: + if (ret < 0 && did_quota) +@@ -7205,8 +7185,6 @@ out_commit: + out: + if (data_ac) + ocfs2_free_alloc_context(data_ac); +-free_pages: +- kfree(pages); + return ret; + } + diff --git a/queue-5.14/ocfs2-mount-fails-with-buffer-overflow-in-strlen.patch b/queue-5.14/ocfs2-mount-fails-with-buffer-overflow-in-strlen.patch new file mode 100644 index 00000000000..dbc20fa0353 --- /dev/null +++ b/queue-5.14/ocfs2-mount-fails-with-buffer-overflow-in-strlen.patch @@ -0,0 +1,87 @@ +From b15fa9224e6e1239414525d8d556d824701849fc Mon Sep 17 00:00:00 2001 +From: Valentin Vidic +Date: Mon, 18 Oct 2021 15:15:42 -0700 +Subject: ocfs2: mount fails with buffer overflow in strlen + +From: Valentin Vidic + +commit b15fa9224e6e1239414525d8d556d824701849fc upstream. + +Starting with kernel 5.11 built with CONFIG_FORTIFY_SOURCE mouting an +ocfs2 filesystem with either o2cb or pcmk cluster stack fails with the +trace below. Problem seems to be that strings for cluster stack and +cluster name are not guaranteed to be null terminated in the disk +representation, while strlcpy assumes that the source string is always +null terminated. This causes a read outside of the source string +triggering the buffer overflow detection. + + detected buffer overflow in strlen + ------------[ cut here ]------------ + kernel BUG at lib/string.c:1149! + invalid opcode: 0000 [#1] SMP PTI + CPU: 1 PID: 910 Comm: mount.ocfs2 Not tainted 5.14.0-1-amd64 #1 + Debian 5.14.6-2 + RIP: 0010:fortify_panic+0xf/0x11 + ... + Call Trace: + ocfs2_initialize_super.isra.0.cold+0xc/0x18 [ocfs2] + ocfs2_fill_super+0x359/0x19b0 [ocfs2] + mount_bdev+0x185/0x1b0 + legacy_get_tree+0x27/0x40 + vfs_get_tree+0x25/0xb0 + path_mount+0x454/0xa20 + __x64_sys_mount+0x103/0x140 + do_syscall_64+0x3b/0xc0 + entry_SYSCALL_64_after_hwframe+0x44/0xae + +Link: https://lkml.kernel.org/r/20210929180654.32460-1-vvidic@valentin-vidic.from.hr +Signed-off-by: Valentin Vidic +Reviewed-by: Joseph Qi +Cc: Mark Fasheh +Cc: Joel Becker +Cc: Junxiao Bi +Cc: Changwei Ge +Cc: Gang He +Cc: Jun Piao +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + fs/ocfs2/super.c | 14 ++++++++++---- + 1 file changed, 10 insertions(+), 4 deletions(-) + +--- a/fs/ocfs2/super.c ++++ b/fs/ocfs2/super.c +@@ -2167,11 +2167,17 @@ static int ocfs2_initialize_super(struct + } + + if (ocfs2_clusterinfo_valid(osb)) { ++ /* ++ * ci_stack and ci_cluster in ocfs2_cluster_info may not be null ++ * terminated, so make sure no overflow happens here by using ++ * memcpy. Destination strings will always be null terminated ++ * because osb is allocated using kzalloc. ++ */ + osb->osb_stackflags = + OCFS2_RAW_SB(di)->s_cluster_info.ci_stackflags; +- strlcpy(osb->osb_cluster_stack, ++ memcpy(osb->osb_cluster_stack, + OCFS2_RAW_SB(di)->s_cluster_info.ci_stack, +- OCFS2_STACK_LABEL_LEN + 1); ++ OCFS2_STACK_LABEL_LEN); + if (strlen(osb->osb_cluster_stack) != OCFS2_STACK_LABEL_LEN) { + mlog(ML_ERROR, + "couldn't mount because of an invalid " +@@ -2180,9 +2186,9 @@ static int ocfs2_initialize_super(struct + status = -EINVAL; + goto bail; + } +- strlcpy(osb->osb_cluster_name, ++ memcpy(osb->osb_cluster_name, + OCFS2_RAW_SB(di)->s_cluster_info.ci_cluster, +- OCFS2_CLUSTER_NAME_LEN + 1); ++ OCFS2_CLUSTER_NAME_LEN); + } else { + /* The empty string is identical with classic tools that + * don't know about s_cluster_info. */ diff --git a/queue-5.14/series b/queue-5.14/series index 639a0c20ea6..8c08fafed24 100644 --- a/queue-5.14/series +++ b/queue-5.14/series @@ -62,3 +62,35 @@ drm-kmb-corrected-typo-in-handle_lcd_irq.patch drm-kmb-enable-adv-bridge-after-modeset.patch net-enetc-fix-ethtool-counter-name-for-pm0_terr.patch net-enetc-make-sure-all-traffic-classes-can-send-lar.patch +can-rcar_can-fix-suspend-resume.patch +can-peak_usb-pcan_usb_fd_decode_status-fix-back-to-error_active-state-notification.patch +can-peak_pci-peak_pci_remove-fix-uaf.patch +can-isotp-isotp_sendmsg-fix-return-error-on-fc-timeout-on-tx-path.patch +can-isotp-isotp_sendmsg-add-result-check-for-wait_event_interruptible.patch +can-isotp-isotp_sendmsg-fix-tx-buffer-concurrent-access-in-isotp_sendmsg.patch +can-j1939-j1939_tp_rxtimer-fix-errant-alert-in-j1939_tp_rxtimer.patch +can-j1939-j1939_netdev_start-fix-uaf-for-rx_kref-of-j1939_priv.patch +can-j1939-j1939_xtp_rx_dat_one-cancel-session-if-receive-tp.dt-with-error-length.patch +can-j1939-j1939_xtp_rx_rts_session_new-abort-tp-less-than-9-bytes.patch +ceph-skip-existing-superblocks-that-are-blocklisted-or-shut-down-when-mounting.patch +ceph-fix-handling-of-meta-errors.patch +tracing-have-all-levels-of-checks-prevent-recursion.patch +ocfs2-fix-data-corruption-after-conversion-from-inline-format.patch +ocfs2-mount-fails-with-buffer-overflow-in-strlen.patch +mm-userfaultfd-selftests-fix-memory-corruption-with-thp-enabled.patch +userfaultfd-fix-a-race-between-writeprotect-and-exit_mmap.patch +mm-mempolicy-do-not-allow-illegal-mpol_f_numa_balancing-mpol_local-in-mbind.patch +elfcore-correct-reference-to-config_uml.patch +vfs-check-fd-has-read-access-in-kernel_read_file_from_fd.patch +mm-secretmem-fix-null-page-mapping-dereference-in-page_is_secretmem.patch +alsa-usb-audio-provide-quirk-for-sennheiser-gsp670-headset.patch +alsa-hda-realtek-add-quirk-for-clevo-pc50hs.patch +asoc-dapm-fix-missing-kctl-change-notifications.patch +asoc-nau8824-fix-headphone-vs-headset-button-press-detection-no-longer-working.patch +blk-cgroup-blk_cgroup_bio_start-should-use-irq-safe-operations-on-blkg-iostat_cpu.patch +audit-fix-possible-null-pointer-dereference-in-audit_filter_rules.patch +net-dsa-mt7530-correct-ds-num_ports.patch +ucounts-move-get_ucounts-from-cred_alloc_blank-to-key_change_session_keyring.patch +ucounts-pair-inc_rlimit_ucounts-with-dec_rlimit_ucoutns-in-commit_creds.patch +ucounts-proper-error-handling-in-set_cred_ucounts.patch +ucounts-fix-signal-ucount-refcounting.patch diff --git a/queue-5.14/tracing-have-all-levels-of-checks-prevent-recursion.patch b/queue-5.14/tracing-have-all-levels-of-checks-prevent-recursion.patch new file mode 100644 index 00000000000..99fda2cde60 --- /dev/null +++ b/queue-5.14/tracing-have-all-levels-of-checks-prevent-recursion.patch @@ -0,0 +1,283 @@ +From ed65df63a39a3f6ed04f7258de8b6789e5021c18 Mon Sep 17 00:00:00 2001 +From: "Steven Rostedt (VMware)" +Date: Mon, 18 Oct 2021 15:44:12 -0400 +Subject: tracing: Have all levels of checks prevent recursion + +From: Steven Rostedt (VMware) + +commit ed65df63a39a3f6ed04f7258de8b6789e5021c18 upstream. + +While writing an email explaining the "bit = 0" logic for a discussion on +making ftrace_test_recursion_trylock() disable preemption, I discovered a +path that makes the "not do the logic if bit is zero" unsafe. + +The recursion logic is done in hot paths like the function tracer. Thus, +any code executed causes noticeable overhead. Thus, tricks are done to try +to limit the amount of code executed. This included the recursion testing +logic. + +Having recursion testing is important, as there are many paths that can +end up in an infinite recursion cycle when tracing every function in the +kernel. Thus protection is needed to prevent that from happening. + +Because it is OK to recurse due to different running context levels (e.g. +an interrupt preempts a trace, and then a trace occurs in the interrupt +handler), a set of bits are used to know which context one is in (normal, +softirq, irq and NMI). If a recursion occurs in the same level, it is +prevented*. + +Then there are infrastructure levels of recursion as well. When more than +one callback is attached to the same function to trace, it calls a loop +function to iterate over all the callbacks. Both the callbacks and the +loop function have recursion protection. The callbacks use the +"ftrace_test_recursion_trylock()" which has a "function" set of context +bits to test, and the loop function calls the internal +trace_test_and_set_recursion() directly, with an "internal" set of bits. + +If an architecture does not implement all the features supported by ftrace +then the callbacks are never called directly, and the loop function is +called instead, which will implement the features of ftrace. + +Since both the loop function and the callbacks do recursion protection, it +was seemed unnecessary to do it in both locations. Thus, a trick was made +to have the internal set of recursion bits at a more significant bit +location than the function bits. Then, if any of the higher bits were set, +the logic of the function bits could be skipped, as any new recursion +would first have to go through the loop function. + +This is true for architectures that do not support all the ftrace +features, because all functions being traced must first go through the +loop function before going to the callbacks. But this is not true for +architectures that support all the ftrace features. That's because the +loop function could be called due to two callbacks attached to the same +function, but then a recursion function inside the callback could be +called that does not share any other callback, and it will be called +directly. + +i.e. + + traced_function_1: [ more than one callback tracing it ] + call loop_func + + loop_func: + trace_recursion set internal bit + call callback + + callback: + trace_recursion [ skipped because internal bit is set, return 0 ] + call traced_function_2 + + traced_function_2: [ only traced by above callback ] + call callback + + callback: + trace_recursion [ skipped because internal bit is set, return 0 ] + call traced_function_2 + + [ wash, rinse, repeat, BOOM! out of shampoo! ] + +Thus, the "bit == 0 skip" trick is not safe, unless the loop function is +call for all functions. + +Since we want to encourage architectures to implement all ftrace features, +having them slow down due to this extra logic may encourage the +maintainers to update to the latest ftrace features. And because this +logic is only safe for them, remove it completely. + + [*] There is on layer of recursion that is allowed, and that is to allow + for the transition between interrupt context (normal -> softirq -> + irq -> NMI), because a trace may occur before the context update is + visible to the trace recursion logic. + +Link: https://lore.kernel.org/all/609b565a-ed6e-a1da-f025-166691b5d994@linux.alibaba.com/ +Link: https://lkml.kernel.org/r/20211018154412.09fcad3c@gandalf.local.home + +Cc: Linus Torvalds +Cc: Petr Mladek +Cc: Ingo Molnar +Cc: "James E.J. Bottomley" +Cc: Helge Deller +Cc: Michael Ellerman +Cc: Benjamin Herrenschmidt +Cc: Paul Mackerras +Cc: Paul Walmsley +Cc: Palmer Dabbelt +Cc: Albert Ou +Cc: Thomas Gleixner +Cc: Borislav Petkov +Cc: "H. Peter Anvin" +Cc: Josh Poimboeuf +Cc: Jiri Kosina +Cc: Miroslav Benes +Cc: Joe Lawrence +Cc: Colin Ian King +Cc: Masami Hiramatsu +Cc: "Peter Zijlstra (Intel)" +Cc: Nicholas Piggin +Cc: Jisheng Zhang +Cc: =?utf-8?b?546L6LSH?= +Cc: Guo Ren +Cc: stable@vger.kernel.org +Fixes: edc15cafcbfa3 ("tracing: Avoid unnecessary multiple recursion checks") +Signed-off-by: Steven Rostedt (VMware) +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/trace_recursion.h | 49 +++++++--------------------------------- + kernel/trace/ftrace.c | 4 +-- + 2 files changed, 11 insertions(+), 42 deletions(-) + +--- a/include/linux/trace_recursion.h ++++ b/include/linux/trace_recursion.h +@@ -16,23 +16,8 @@ + * When function tracing occurs, the following steps are made: + * If arch does not support a ftrace feature: + * call internal function (uses INTERNAL bits) which calls... +- * If callback is registered to the "global" list, the list +- * function is called and recursion checks the GLOBAL bits. +- * then this function calls... + * The function callback, which can use the FTRACE bits to + * check for recursion. +- * +- * Now if the arch does not support a feature, and it calls +- * the global list function which calls the ftrace callback +- * all three of these steps will do a recursion protection. +- * There's no reason to do one if the previous caller already +- * did. The recursion that we are protecting against will +- * go through the same steps again. +- * +- * To prevent the multiple recursion checks, if a recursion +- * bit is set that is higher than the MAX bit of the current +- * check, then we know that the check was made by the previous +- * caller, and we can skip the current check. + */ + enum { + /* Function recursion bits */ +@@ -40,12 +25,14 @@ enum { + TRACE_FTRACE_NMI_BIT, + TRACE_FTRACE_IRQ_BIT, + TRACE_FTRACE_SIRQ_BIT, ++ TRACE_FTRACE_TRANSITION_BIT, + +- /* INTERNAL_BITs must be greater than FTRACE_BITs */ ++ /* Internal use recursion bits */ + TRACE_INTERNAL_BIT, + TRACE_INTERNAL_NMI_BIT, + TRACE_INTERNAL_IRQ_BIT, + TRACE_INTERNAL_SIRQ_BIT, ++ TRACE_INTERNAL_TRANSITION_BIT, + + TRACE_BRANCH_BIT, + /* +@@ -86,12 +73,6 @@ enum { + */ + TRACE_GRAPH_NOTRACE_BIT, + +- /* +- * When transitioning between context, the preempt_count() may +- * not be correct. Allow for a single recursion to cover this case. +- */ +- TRACE_TRANSITION_BIT, +- + /* Used to prevent recursion recording from recursing. */ + TRACE_RECORD_RECURSION_BIT, + }; +@@ -113,12 +94,10 @@ enum { + #define TRACE_CONTEXT_BITS 4 + + #define TRACE_FTRACE_START TRACE_FTRACE_BIT +-#define TRACE_FTRACE_MAX ((1 << (TRACE_FTRACE_START + TRACE_CONTEXT_BITS)) - 1) + + #define TRACE_LIST_START TRACE_INTERNAL_BIT +-#define TRACE_LIST_MAX ((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1) + +-#define TRACE_CONTEXT_MASK TRACE_LIST_MAX ++#define TRACE_CONTEXT_MASK ((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1) + + /* + * Used for setting context +@@ -132,6 +111,7 @@ enum { + TRACE_CTX_IRQ, + TRACE_CTX_SOFTIRQ, + TRACE_CTX_NORMAL, ++ TRACE_CTX_TRANSITION, + }; + + static __always_inline int trace_get_context_bit(void) +@@ -160,45 +140,34 @@ extern void ftrace_record_recursion(unsi + #endif + + static __always_inline int trace_test_and_set_recursion(unsigned long ip, unsigned long pip, +- int start, int max) ++ int start) + { + unsigned int val = READ_ONCE(current->trace_recursion); + int bit; + +- /* A previous recursion check was made */ +- if ((val & TRACE_CONTEXT_MASK) > max) +- return 0; +- + bit = trace_get_context_bit() + start; + if (unlikely(val & (1 << bit))) { + /* + * It could be that preempt_count has not been updated during + * a switch between contexts. Allow for a single recursion. + */ +- bit = TRACE_TRANSITION_BIT; ++ bit = TRACE_CTX_TRANSITION + start; + if (val & (1 << bit)) { + do_ftrace_record_recursion(ip, pip); + return -1; + } +- } else { +- /* Normal check passed, clear the transition to allow it again */ +- val &= ~(1 << TRACE_TRANSITION_BIT); + } + + val |= 1 << bit; + current->trace_recursion = val; + barrier(); + +- return bit + 1; ++ return bit; + } + + static __always_inline void trace_clear_recursion(int bit) + { +- if (!bit) +- return; +- + barrier(); +- bit--; + trace_recursion_clear(bit); + } + +@@ -214,7 +183,7 @@ static __always_inline void trace_clear_ + static __always_inline int ftrace_test_recursion_trylock(unsigned long ip, + unsigned long parent_ip) + { +- return trace_test_and_set_recursion(ip, parent_ip, TRACE_FTRACE_START, TRACE_FTRACE_MAX); ++ return trace_test_and_set_recursion(ip, parent_ip, TRACE_FTRACE_START); + } + + /** +--- a/kernel/trace/ftrace.c ++++ b/kernel/trace/ftrace.c +@@ -6977,7 +6977,7 @@ __ftrace_ops_list_func(unsigned long ip, + struct ftrace_ops *op; + int bit; + +- bit = trace_test_and_set_recursion(ip, parent_ip, TRACE_LIST_START, TRACE_LIST_MAX); ++ bit = trace_test_and_set_recursion(ip, parent_ip, TRACE_LIST_START); + if (bit < 0) + return; + +@@ -7052,7 +7052,7 @@ static void ftrace_ops_assist_func(unsig + { + int bit; + +- bit = trace_test_and_set_recursion(ip, parent_ip, TRACE_LIST_START, TRACE_LIST_MAX); ++ bit = trace_test_and_set_recursion(ip, parent_ip, TRACE_LIST_START); + if (bit < 0) + return; + diff --git a/queue-5.14/ucounts-fix-signal-ucount-refcounting.patch b/queue-5.14/ucounts-fix-signal-ucount-refcounting.patch new file mode 100644 index 00000000000..fd06f6ec7da --- /dev/null +++ b/queue-5.14/ucounts-fix-signal-ucount-refcounting.patch @@ -0,0 +1,169 @@ +From 15bc01effefe97757ef02ca09e9d1b927ab22725 Mon Sep 17 00:00:00 2001 +From: "Eric W. Biederman" +Date: Sat, 16 Oct 2021 15:59:49 -0500 +Subject: ucounts: Fix signal ucount refcounting + +From: Eric W. Biederman + +commit 15bc01effefe97757ef02ca09e9d1b927ab22725 upstream. + +In commit fda31c50292a ("signal: avoid double atomic counter +increments for user accounting") Linus made a clever optimization to +how rlimits and the struct user_struct. Unfortunately that +optimization does not work in the obvious way when moved to nested +rlimits. The problem is that the last decrement of the per user +namespace per user sigpending counter might also be the last decrement +of the sigpending counter in the parent user namespace as well. Which +means that simply freeing the leaf ucount in __free_sigqueue is not +enough. + +Maintain the optimization and handle the tricky cases by introducing +inc_rlimit_get_ucounts and dec_rlimit_put_ucounts. + +By moving the entire optimization into functions that perform all of +the work it becomes possible to ensure that every level is handled +properly. + +The new function inc_rlimit_get_ucounts returns 0 on failure to +increment the ucount. This is different than inc_rlimit_ucounts which +increments the ucounts and returns LONG_MAX if the ucount counter has +exceeded it's maximum or it wrapped (to indicate the counter needs to +decremented). + +I wish we had a single user to account all pending signals to across +all of the threads of a process so this complexity was not necessary + +Cc: stable@vger.kernel.org +Fixes: d64696905554 ("Reimplement RLIMIT_SIGPENDING on top of ucounts") +v1: https://lkml.kernel.org/r/87mtnavszx.fsf_-_@disp2133 +Link: https://lkml.kernel.org/r/87fssytizw.fsf_-_@disp2133 +Reviewed-by: Alexey Gladkov +Tested-by: Rune Kleveland +Tested-by: Yu Zhao +Tested-by: Jordan Glover +Signed-off-by: "Eric W. Biederman" +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/user_namespace.h | 2 + + kernel/signal.c | 25 +++++--------------- + kernel/ucount.c | 49 +++++++++++++++++++++++++++++++++++++++++ + 3 files changed, 57 insertions(+), 19 deletions(-) + +--- a/include/linux/user_namespace.h ++++ b/include/linux/user_namespace.h +@@ -127,6 +127,8 @@ static inline long get_ucounts_value(str + + long inc_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v); + bool dec_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v); ++long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum ucount_type type); ++void dec_rlimit_put_ucounts(struct ucounts *ucounts, enum ucount_type type); + bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsigned long max); + + static inline void set_rlimit_ucount_max(struct user_namespace *ns, +--- a/kernel/signal.c ++++ b/kernel/signal.c +@@ -425,22 +425,10 @@ __sigqueue_alloc(int sig, struct task_st + */ + rcu_read_lock(); + ucounts = task_ucounts(t); +- sigpending = inc_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING, 1); +- switch (sigpending) { +- case 1: +- if (likely(get_ucounts(ucounts))) +- break; +- fallthrough; +- case LONG_MAX: +- /* +- * we need to decrease the ucount in the userns tree on any +- * failure to avoid counts leaking. +- */ +- dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING, 1); +- rcu_read_unlock(); +- return NULL; +- } ++ sigpending = inc_rlimit_get_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING); + rcu_read_unlock(); ++ if (!sigpending) ++ return NULL; + + if (override_rlimit || likely(sigpending <= task_rlimit(t, RLIMIT_SIGPENDING))) { + q = kmem_cache_alloc(sigqueue_cachep, gfp_flags); +@@ -449,8 +437,7 @@ __sigqueue_alloc(int sig, struct task_st + } + + if (unlikely(q == NULL)) { +- if (dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING, 1)) +- put_ucounts(ucounts); ++ dec_rlimit_put_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING); + } else { + INIT_LIST_HEAD(&q->list); + q->flags = sigqueue_flags; +@@ -463,8 +450,8 @@ static void __sigqueue_free(struct sigqu + { + if (q->flags & SIGQUEUE_PREALLOC) + return; +- if (q->ucounts && dec_rlimit_ucounts(q->ucounts, UCOUNT_RLIMIT_SIGPENDING, 1)) { +- put_ucounts(q->ucounts); ++ if (q->ucounts) { ++ dec_rlimit_put_ucounts(q->ucounts, UCOUNT_RLIMIT_SIGPENDING); + q->ucounts = NULL; + } + kmem_cache_free(sigqueue_cachep, q); +--- a/kernel/ucount.c ++++ b/kernel/ucount.c +@@ -284,6 +284,55 @@ bool dec_rlimit_ucounts(struct ucounts * + return (new == 0); + } + ++static void do_dec_rlimit_put_ucounts(struct ucounts *ucounts, ++ struct ucounts *last, enum ucount_type type) ++{ ++ struct ucounts *iter, *next; ++ for (iter = ucounts; iter != last; iter = next) { ++ long dec = atomic_long_add_return(-1, &iter->ucount[type]); ++ WARN_ON_ONCE(dec < 0); ++ next = iter->ns->ucounts; ++ if (dec == 0) ++ put_ucounts(iter); ++ } ++} ++ ++void dec_rlimit_put_ucounts(struct ucounts *ucounts, enum ucount_type type) ++{ ++ do_dec_rlimit_put_ucounts(ucounts, NULL, type); ++} ++ ++long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum ucount_type type) ++{ ++ /* Caller must hold a reference to ucounts */ ++ struct ucounts *iter; ++ long dec, ret = 0; ++ ++ for (iter = ucounts; iter; iter = iter->ns->ucounts) { ++ long max = READ_ONCE(iter->ns->ucount_max[type]); ++ long new = atomic_long_add_return(1, &iter->ucount[type]); ++ if (new < 0 || new > max) ++ goto unwind; ++ if (iter == ucounts) ++ ret = new; ++ /* ++ * Grab an extra ucount reference for the caller when ++ * the rlimit count was previously 0. ++ */ ++ if (new != 1) ++ continue; ++ if (!get_ucounts(iter)) ++ goto dec_unwind; ++ } ++ return ret; ++dec_unwind: ++ dec = atomic_long_add_return(-1, &iter->ucount[type]); ++ WARN_ON_ONCE(dec < 0); ++unwind: ++ do_dec_rlimit_put_ucounts(ucounts, iter, type); ++ return 0; ++} ++ + bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsigned long max) + { + struct ucounts *iter; diff --git a/queue-5.14/ucounts-move-get_ucounts-from-cred_alloc_blank-to-key_change_session_keyring.patch b/queue-5.14/ucounts-move-get_ucounts-from-cred_alloc_blank-to-key_change_session_keyring.patch new file mode 100644 index 00000000000..d675d7e08c4 --- /dev/null +++ b/queue-5.14/ucounts-move-get_ucounts-from-cred_alloc_blank-to-key_change_session_keyring.patch @@ -0,0 +1,86 @@ +From 5ebcbe342b1c12fae44b4f83cbeae1520e09857e Mon Sep 17 00:00:00 2001 +From: "Eric W. Biederman" +Date: Sat, 16 Oct 2021 12:17:30 -0500 +Subject: ucounts: Move get_ucounts from cred_alloc_blank to key_change_session_keyring + +From: Eric W. Biederman + +commit 5ebcbe342b1c12fae44b4f83cbeae1520e09857e upstream. + +Setting cred->ucounts in cred_alloc_blank does not make sense. The +uid and user_ns are deliberately not set in cred_alloc_blank but +instead the setting is delayed until key_change_session_keyring. + +So move dealing with ucounts into key_change_session_keyring as well. + +Unfortunately that movement of get_ucounts adds a new failure mode to +key_change_session_keyring. I do not see anything stopping the parent +process from calling setuid and changing the relevant part of it's +cred while keyctl_session_to_parent is running making it fundamentally +necessary to call get_ucounts in key_change_session_keyring. Which +means that the new failure mode cannot be avoided. + +A failure of key_change_session_keyring results in a single threaded +parent keeping it's existing credentials. Which results in the parent +process not being able to access the session keyring and whichever +keys are in the new keyring. + +Further get_ucounts is only expected to fail if the number of bits in +the refernece count for the structure is too few. + +Since the code has no other way to report the failure of get_ucounts +and because such failures are not expected to be common add a WARN_ONCE +to report this problem to userspace. + +Between the WARN_ONCE and the parent process not having access to +the keys in the new session keyring I expect any failure of get_ucounts +will be noticed and reported and we can find another way to handle this +condition. (Possibly by just making ucounts->count an atomic_long_t). + +Cc: stable@vger.kernel.org +Fixes: 905ae01c4ae2 ("Add a reference to ucounts for each cred") +Link: https://lkml.kernel.org/r/7k0ias0uf.fsf_-_@disp2133 +Tested-by: Yu Zhao +Reviewed-by: Alexey Gladkov +Signed-off-by: "Eric W. Biederman" +Signed-off-by: Greg Kroah-Hartman +--- + kernel/cred.c | 2 -- + security/keys/process_keys.c | 8 ++++++++ + 2 files changed, 8 insertions(+), 2 deletions(-) + +--- a/kernel/cred.c ++++ b/kernel/cred.c +@@ -225,8 +225,6 @@ struct cred *cred_alloc_blank(void) + #ifdef CONFIG_DEBUG_CREDENTIALS + new->magic = CRED_MAGIC; + #endif +- new->ucounts = get_ucounts(&init_ucounts); +- + if (security_cred_alloc_blank(new, GFP_KERNEL_ACCOUNT) < 0) + goto error; + +--- a/security/keys/process_keys.c ++++ b/security/keys/process_keys.c +@@ -918,6 +918,13 @@ void key_change_session_keyring(struct c + return; + } + ++ /* If get_ucounts fails more bits are needed in the refcount */ ++ if (unlikely(!get_ucounts(old->ucounts))) { ++ WARN_ONCE(1, "In %s get_ucounts failed\n", __func__); ++ put_cred(new); ++ return; ++ } ++ + new-> uid = old-> uid; + new-> euid = old-> euid; + new-> suid = old-> suid; +@@ -927,6 +934,7 @@ void key_change_session_keyring(struct c + new-> sgid = old-> sgid; + new->fsgid = old->fsgid; + new->user = get_uid(old->user); ++ new->ucounts = old->ucounts; + new->user_ns = get_user_ns(old->user_ns); + new->group_info = get_group_info(old->group_info); + diff --git a/queue-5.14/ucounts-pair-inc_rlimit_ucounts-with-dec_rlimit_ucoutns-in-commit_creds.patch b/queue-5.14/ucounts-pair-inc_rlimit_ucounts-with-dec_rlimit_ucoutns-in-commit_creds.patch new file mode 100644 index 00000000000..af9cc1ccd65 --- /dev/null +++ b/queue-5.14/ucounts-pair-inc_rlimit_ucounts-with-dec_rlimit_ucoutns-in-commit_creds.patch @@ -0,0 +1,37 @@ +From 629715adc62b0ad27ab04d0aa73a71927f886910 Mon Sep 17 00:00:00 2001 +From: "Eric W. Biederman" +Date: Sat, 16 Oct 2021 12:30:00 -0500 +Subject: ucounts: Pair inc_rlimit_ucounts with dec_rlimit_ucoutns in commit_creds + +From: Eric W. Biederman + +commit 629715adc62b0ad27ab04d0aa73a71927f886910 upstream. + +The purpose of inc_rlimit_ucounts and dec_rlimit_ucounts in commit_creds +is to change which rlimit counter is used to track a process when the +credentials changes. + +Use the same test for both to guarantee the tracking is correct. + +Cc: stable@vger.kernel.org +Fixes: 21d1c5e386bc ("Reimplement RLIMIT_NPROC on top of ucounts") +Link: https://lkml.kernel.org/r/87v91us0w4.fsf_-_@disp2133 +Tested-by: Yu Zhao +Reviewed-by: Alexey Gladkov +Signed-off-by: "Eric W. Biederman" +Signed-off-by: Greg Kroah-Hartman +--- + kernel/cred.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/kernel/cred.c ++++ b/kernel/cred.c +@@ -499,7 +499,7 @@ int commit_creds(struct cred *new) + inc_rlimit_ucounts(new->ucounts, UCOUNT_RLIMIT_NPROC, 1); + rcu_assign_pointer(task->real_cred, new); + rcu_assign_pointer(task->cred, new); +- if (new->user != old->user) ++ if (new->user != old->user || new->user_ns != old->user_ns) + dec_rlimit_ucounts(old->ucounts, UCOUNT_RLIMIT_NPROC, 1); + alter_cred_subscribers(old, -2); + diff --git a/queue-5.14/ucounts-proper-error-handling-in-set_cred_ucounts.patch b/queue-5.14/ucounts-proper-error-handling-in-set_cred_ucounts.patch new file mode 100644 index 00000000000..b0785f4ec8e --- /dev/null +++ b/queue-5.14/ucounts-proper-error-handling-in-set_cred_ucounts.patch @@ -0,0 +1,47 @@ +From 34dc2fd6e6908499b669c7b45320cddf38b332e1 Mon Sep 17 00:00:00 2001 +From: "Eric W. Biederman" +Date: Sat, 16 Oct 2021 12:47:51 -0500 +Subject: ucounts: Proper error handling in set_cred_ucounts + +From: Eric W. Biederman + +commit 34dc2fd6e6908499b669c7b45320cddf38b332e1 upstream. + +Instead of leaking the ucounts in new if alloc_ucounts fails, store +the result of alloc_ucounts into a temporary variable, which is later +assigned to new->ucounts. + +Cc: stable@vger.kernel.org +Fixes: 905ae01c4ae2 ("Add a reference to ucounts for each cred") +Link: https://lkml.kernel.org/r/87pms2s0v8.fsf_-_@disp2133 +Tested-by: Yu Zhao +Reviewed-by: Alexey Gladkov +Signed-off-by: "Eric W. Biederman" +Signed-off-by: Greg Kroah-Hartman +--- + kernel/cred.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/kernel/cred.c ++++ b/kernel/cred.c +@@ -667,7 +667,7 @@ int set_cred_ucounts(struct cred *new) + { + struct task_struct *task = current; + const struct cred *old = task->real_cred; +- struct ucounts *old_ucounts = new->ucounts; ++ struct ucounts *new_ucounts, *old_ucounts = new->ucounts; + + if (new->user == old->user && new->user_ns == old->user_ns) + return 0; +@@ -679,9 +679,10 @@ int set_cred_ucounts(struct cred *new) + if (old_ucounts && old_ucounts->ns == new->user_ns && uid_eq(old_ucounts->uid, new->euid)) + return 0; + +- if (!(new->ucounts = alloc_ucounts(new->user_ns, new->euid))) ++ if (!(new_ucounts = alloc_ucounts(new->user_ns, new->euid))) + return -EAGAIN; + ++ new->ucounts = new_ucounts; + if (old_ucounts) + put_ucounts(old_ucounts); + diff --git a/queue-5.14/userfaultfd-fix-a-race-between-writeprotect-and-exit_mmap.patch b/queue-5.14/userfaultfd-fix-a-race-between-writeprotect-and-exit_mmap.patch new file mode 100644 index 00000000000..fa652b9af3a --- /dev/null +++ b/queue-5.14/userfaultfd-fix-a-race-between-writeprotect-and-exit_mmap.patch @@ -0,0 +1,53 @@ +From cb185d5f1ebf900f4ae3bf84cee212e6dd035aca Mon Sep 17 00:00:00 2001 +From: Nadav Amit +Date: Mon, 18 Oct 2021 15:15:25 -0700 +Subject: userfaultfd: fix a race between writeprotect and exit_mmap() + +From: Nadav Amit + +commit cb185d5f1ebf900f4ae3bf84cee212e6dd035aca upstream. + +A race is possible when a process exits, its VMAs are removed by +exit_mmap() and at the same time userfaultfd_writeprotect() is called. + +The race was detected by KASAN on a development kernel, but it appears +to be possible on vanilla kernels as well. + +Use mmget_not_zero() to prevent the race as done in other userfaultfd +operations. + +Link: https://lkml.kernel.org/r/20210921200247.25749-1-namit@vmware.com +Fixes: 63b2d4174c4ad ("userfaultfd: wp: add the writeprotect API to userfaultfd ioctl") +Signed-off-by: Nadav Amit +Tested-by: Li Wang +Reviewed-by: Peter Xu +Cc: Andrea Arcangeli +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + fs/userfaultfd.c | 12 +++++++++--- + 1 file changed, 9 insertions(+), 3 deletions(-) + +--- a/fs/userfaultfd.c ++++ b/fs/userfaultfd.c +@@ -1826,9 +1826,15 @@ static int userfaultfd_writeprotect(stru + if (mode_wp && mode_dontwake) + return -EINVAL; + +- ret = mwriteprotect_range(ctx->mm, uffdio_wp.range.start, +- uffdio_wp.range.len, mode_wp, +- &ctx->mmap_changing); ++ if (mmget_not_zero(ctx->mm)) { ++ ret = mwriteprotect_range(ctx->mm, uffdio_wp.range.start, ++ uffdio_wp.range.len, mode_wp, ++ &ctx->mmap_changing); ++ mmput(ctx->mm); ++ } else { ++ return -ESRCH; ++ } ++ + if (ret) + return ret; + diff --git a/queue-5.14/vfs-check-fd-has-read-access-in-kernel_read_file_from_fd.patch b/queue-5.14/vfs-check-fd-has-read-access-in-kernel_read_file_from_fd.patch new file mode 100644 index 00000000000..63fcba4d803 --- /dev/null +++ b/queue-5.14/vfs-check-fd-has-read-access-in-kernel_read_file_from_fd.patch @@ -0,0 +1,45 @@ +From 032146cda85566abcd1c4884d9d23e4e30a07e9a Mon Sep 17 00:00:00 2001 +From: "Matthew Wilcox (Oracle)" +Date: Mon, 18 Oct 2021 15:16:12 -0700 +Subject: vfs: check fd has read access in kernel_read_file_from_fd() + +From: Matthew Wilcox (Oracle) + +commit 032146cda85566abcd1c4884d9d23e4e30a07e9a upstream. + +If we open a file without read access and then pass the fd to a syscall +whose implementation calls kernel_read_file_from_fd(), we get a warning +from __kernel_read(): + + if (WARN_ON_ONCE(!(file->f_mode & FMODE_READ))) + +This currently affects both finit_module() and kexec_file_load(), but it +could affect other syscalls in the future. + +Link: https://lkml.kernel.org/r/20211007220110.600005-1-willy@infradead.org +Fixes: b844f0ecbc56 ("vfs: define kernel_copy_file_from_fd()") +Signed-off-by: Matthew Wilcox (Oracle) +Reported-by: Hao Sun +Reviewed-by: Kees Cook +Acked-by: Christian Brauner +Cc: Al Viro +Cc: Mimi Zohar +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + fs/kernel_read_file.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/kernel_read_file.c ++++ b/fs/kernel_read_file.c +@@ -178,7 +178,7 @@ int kernel_read_file_from_fd(int fd, lof + struct fd f = fdget(fd); + int ret = -EBADF; + +- if (!f.file) ++ if (!f.file || !(f.file->f_mode & FMODE_READ)) + goto out; + + ret = kernel_read_file(f.file, offset, buf, buf_size, file_size, id);