--- /dev/null
+From aef454b40288158b850aab13e3d2a8c406779401 Mon Sep 17 00:00:00 2001
+From: Steven Clarkson <sc@lambdal.com>
+Date: Thu, 14 Oct 2021 06:35:54 -0700
+Subject: ALSA: hda/realtek: Add quirk for Clevo PC50HS
+
+From: Steven Clarkson <sc@lambdal.com>
+
+commit aef454b40288158b850aab13e3d2a8c406779401 upstream.
+
+Apply existing PCI quirk to the Clevo PC50HS and related models to fix
+audio output on the built in speakers.
+
+Signed-off-by: Steven Clarkson <sc@lambdal.com>
+Cc: <stable@vger.kernel.org>
+Link: https://lore.kernel.org/r/20211014133554.1326741-1-sc@lambdal.com
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ sound/pci/hda/patch_realtek.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -2547,6 +2547,7 @@ static const struct snd_pci_quirk alc882
+ SND_PCI_QUIRK(0x1558, 0x65d2, "Clevo PB51R[CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
+ SND_PCI_QUIRK(0x1558, 0x65e1, "Clevo PB51[ED][DF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
+ SND_PCI_QUIRK(0x1558, 0x65e5, "Clevo PC50D[PRS](?:-D|-G)?", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
++ SND_PCI_QUIRK(0x1558, 0x65f1, "Clevo PC50HS", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
+ SND_PCI_QUIRK(0x1558, 0x67d1, "Clevo PB71[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
+ SND_PCI_QUIRK(0x1558, 0x67e1, "Clevo PB71[DE][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
+ SND_PCI_QUIRK(0x1558, 0x67e5, "Clevo PC70D[PRS](?:-D|-G)?", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
--- /dev/null
+From 3c414eb65c294719a91a746260085363413f91c1 Mon Sep 17 00:00:00 2001
+From: Brendan Grieve <brendan@grieve.com.au>
+Date: Fri, 15 Oct 2021 10:53:35 +0800
+Subject: ALSA: usb-audio: Provide quirk for Sennheiser GSP670 Headset
+
+From: Brendan Grieve <brendan@grieve.com.au>
+
+commit 3c414eb65c294719a91a746260085363413f91c1 upstream.
+
+As per discussion at: https://github.com/szszoke/sennheiser-gsp670-pulseaudio-profile/issues/13
+
+The GSP670 has 2 playback and 1 recording device that by default are
+detected in an incompatible order for alsa. This may have been done to make
+it compatible for the console by the manufacturer and only affects the
+latest firmware which uses its own ID.
+
+This quirk will resolve this by reordering the channels.
+
+Signed-off-by: Brendan Grieve <brendan@grieve.com.au>
+Cc: <stable@vger.kernel.org>
+Link: https://lore.kernel.org/r/20211015025335.196592-1-brendan@grieve.com.au
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ sound/usb/quirks-table.h | 32 ++++++++++++++++++++++++++++++++
+ 1 file changed, 32 insertions(+)
+
+--- a/sound/usb/quirks-table.h
++++ b/sound/usb/quirks-table.h
+@@ -4080,6 +4080,38 @@ AU0828_DEVICE(0x2040, 0x7270, "Hauppauge
+ }
+ }
+ },
++{
++ /*
++ * Sennheiser GSP670
++ * Change order of interfaces loaded
++ */
++ USB_DEVICE(0x1395, 0x0300),
++ .bInterfaceClass = USB_CLASS_PER_INTERFACE,
++ .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) {
++ .ifnum = QUIRK_ANY_INTERFACE,
++ .type = QUIRK_COMPOSITE,
++ .data = &(const struct snd_usb_audio_quirk[]) {
++ // Communication
++ {
++ .ifnum = 3,
++ .type = QUIRK_AUDIO_STANDARD_INTERFACE
++ },
++ // Recording
++ {
++ .ifnum = 4,
++ .type = QUIRK_AUDIO_STANDARD_INTERFACE
++ },
++ // Main
++ {
++ .ifnum = 1,
++ .type = QUIRK_AUDIO_STANDARD_INTERFACE
++ },
++ {
++ .ifnum = -1
++ }
++ }
++ }
++},
+
+ #undef USB_DEVICE_VENDOR_SPEC
+ #undef USB_AUDIO_DEVICE
--- /dev/null
+From 5af82c81b2c49cfb1cad84d9eb6eab0e3d1c4842 Mon Sep 17 00:00:00 2001
+From: Takashi Iwai <tiwai@suse.de>
+Date: Wed, 6 Oct 2021 16:17:12 +0200
+Subject: ASoC: DAPM: Fix missing kctl change notifications
+
+From: Takashi Iwai <tiwai@suse.de>
+
+commit 5af82c81b2c49cfb1cad84d9eb6eab0e3d1c4842 upstream.
+
+The put callback of a kcontrol is supposed to return 1 when the value
+is changed, and this will be notified to user-space. However, some
+DAPM kcontrols always return 0 (except for errors), hence the
+user-space misses the update of a control value.
+
+This patch corrects the behavior by properly returning 1 when the
+value gets updated.
+
+Reported-and-tested-by: Hans de Goede <hdegoede@redhat.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Link: https://lore.kernel.org/r/20211006141712.2439-1-tiwai@suse.de
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ sound/soc/soc-dapm.c | 13 ++++++++-----
+ 1 file changed, 8 insertions(+), 5 deletions(-)
+
+--- a/sound/soc/soc-dapm.c
++++ b/sound/soc/soc-dapm.c
+@@ -2559,6 +2559,7 @@ static int snd_soc_dapm_set_pin(struct s
+ const char *pin, int status)
+ {
+ struct snd_soc_dapm_widget *w = dapm_find_widget(dapm, pin, true);
++ int ret = 0;
+
+ dapm_assert_locked(dapm);
+
+@@ -2571,13 +2572,14 @@ static int snd_soc_dapm_set_pin(struct s
+ dapm_mark_dirty(w, "pin configuration");
+ dapm_widget_invalidate_input_paths(w);
+ dapm_widget_invalidate_output_paths(w);
++ ret = 1;
+ }
+
+ w->connected = status;
+ if (status == 0)
+ w->force = 0;
+
+- return 0;
++ return ret;
+ }
+
+ /**
+@@ -3582,14 +3584,15 @@ int snd_soc_dapm_put_pin_switch(struct s
+ {
+ struct snd_soc_card *card = snd_kcontrol_chip(kcontrol);
+ const char *pin = (const char *)kcontrol->private_value;
++ int ret;
+
+ if (ucontrol->value.integer.value[0])
+- snd_soc_dapm_enable_pin(&card->dapm, pin);
++ ret = snd_soc_dapm_enable_pin(&card->dapm, pin);
+ else
+- snd_soc_dapm_disable_pin(&card->dapm, pin);
++ ret = snd_soc_dapm_disable_pin(&card->dapm, pin);
+
+ snd_soc_dapm_sync(&card->dapm);
+- return 0;
++ return ret;
+ }
+ EXPORT_SYMBOL_GPL(snd_soc_dapm_put_pin_switch);
+
+@@ -4023,7 +4026,7 @@ static int snd_soc_dapm_dai_link_put(str
+
+ rtd->params_select = ucontrol->value.enumerated.item[0];
+
+- return 0;
++ return 1;
+ }
+
+ static void
--- /dev/null
+From 42871e95a3afea8956d8cc567ea725b33a837775 Mon Sep 17 00:00:00 2001
+From: Hans de Goede <hdegoede@redhat.com>
+Date: Wed, 29 Sep 2021 22:15:12 +0200
+Subject: ASoC: nau8824: Fix headphone vs headset, button-press detection no longer working
+
+From: Hans de Goede <hdegoede@redhat.com>
+
+commit 42871e95a3afea8956d8cc567ea725b33a837775 upstream.
+
+Commit 1d25684e2251 ("ASoC: nau8824: Fix open coded prefix handling")
+replaced the nau8824_dapm_enable_pin() helper with direct calls to
+snd_soc_dapm_enable_pin(), but the helper was using
+snd_soc_dapm_force_enable_pin() and not forcing the MICBIAS + SAR
+supplies on breaks headphone vs headset and button-press detection.
+
+Replace the snd_soc_dapm_enable_pin() calls with
+snd_soc_dapm_force_enable_pin() to fix this.
+
+Cc: stable@vger.kernel.org
+Fixes: 1d25684e2251 ("ASoC: nau8824: Fix open coded prefix handling")
+Signed-off-by: Hans de Goede <hdegoede@redhat.com>
+Link: https://lore.kernel.org/r/20210929201512.460360-1-hdegoede@redhat.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ sound/soc/codecs/nau8824.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/sound/soc/codecs/nau8824.c
++++ b/sound/soc/codecs/nau8824.c
+@@ -867,8 +867,8 @@ static void nau8824_jdet_work(struct wor
+ struct regmap *regmap = nau8824->regmap;
+ int adc_value, event = 0, event_mask = 0;
+
+- snd_soc_dapm_enable_pin(dapm, "MICBIAS");
+- snd_soc_dapm_enable_pin(dapm, "SAR");
++ snd_soc_dapm_force_enable_pin(dapm, "MICBIAS");
++ snd_soc_dapm_force_enable_pin(dapm, "SAR");
+ snd_soc_dapm_sync(dapm);
+
+ msleep(100);
--- /dev/null
+From 6e3ee990c90494561921c756481d0e2125d8b895 Mon Sep 17 00:00:00 2001
+From: Gaosheng Cui <cuigaosheng1@huawei.com>
+Date: Sat, 16 Oct 2021 15:23:50 +0800
+Subject: audit: fix possible null-pointer dereference in audit_filter_rules
+
+From: Gaosheng Cui <cuigaosheng1@huawei.com>
+
+commit 6e3ee990c90494561921c756481d0e2125d8b895 upstream.
+
+Fix possible null-pointer dereference in audit_filter_rules.
+
+audit_filter_rules() error: we previously assumed 'ctx' could be null
+
+Cc: stable@vger.kernel.org
+Fixes: bf361231c295 ("audit: add saddr_fam filter field")
+Reported-by: kernel test robot <lkp@intel.com>
+Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
+Signed-off-by: Gaosheng Cui <cuigaosheng1@huawei.com>
+Signed-off-by: Paul Moore <paul@paul-moore.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/auditsc.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/auditsc.c
++++ b/kernel/auditsc.c
+@@ -657,7 +657,7 @@ static int audit_filter_rules(struct tas
+ result = audit_comparator(audit_loginuid_set(tsk), f->op, f->val);
+ break;
+ case AUDIT_SADDR_FAM:
+- if (ctx->sockaddr)
++ if (ctx && ctx->sockaddr)
+ result = audit_comparator(ctx->sockaddr->ss_family,
+ f->op, f->val);
+ break;
--- /dev/null
+From 5370b0f49078203acf3c064b634a09707167a864 Mon Sep 17 00:00:00 2001
+From: Tejun Heo <tj@kernel.org>
+Date: Thu, 14 Oct 2021 13:20:22 -1000
+Subject: blk-cgroup: blk_cgroup_bio_start() should use irq-safe operations on blkg->iostat_cpu
+
+From: Tejun Heo <tj@kernel.org>
+
+commit 5370b0f49078203acf3c064b634a09707167a864 upstream.
+
+c3df5fb57fe8 ("cgroup: rstat: fix A-A deadlock on 32bit around
+u64_stats_sync") made u64_stats updates irq-safe to avoid A-A deadlocks.
+Unfortunately, the conversion missed one in blk_cgroup_bio_start(). Fix it.
+
+Fixes: 2d146aa3aa84 ("mm: memcontrol: switch to rstat")
+Cc: stable@vger.kernel.org # v5.13+
+Reported-by: syzbot+9738c8815b375ce482a1@syzkaller.appspotmail.com
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Link: https://lore.kernel.org/r/YWi7NrQdVlxD6J9W@slm.duckdns.org
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ block/blk-cgroup.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/block/blk-cgroup.c
++++ b/block/blk-cgroup.c
+@@ -1916,10 +1916,11 @@ void blk_cgroup_bio_start(struct bio *bi
+ {
+ int rwd = blk_cgroup_io_type(bio), cpu;
+ struct blkg_iostat_set *bis;
++ unsigned long flags;
+
+ cpu = get_cpu();
+ bis = per_cpu_ptr(bio->bi_blkg->iostat_cpu, cpu);
+- u64_stats_update_begin(&bis->sync);
++ flags = u64_stats_update_begin_irqsave(&bis->sync);
+
+ /*
+ * If the bio is flagged with BIO_CGROUP_ACCT it means this is a split
+@@ -1931,7 +1932,7 @@ void blk_cgroup_bio_start(struct bio *bi
+ }
+ bis->cur.ios[rwd]++;
+
+- u64_stats_update_end(&bis->sync);
++ u64_stats_update_end_irqrestore(&bis->sync, flags);
+ if (cgroup_subsys_on_dfl(io_cgrp_subsys))
+ cgroup_rstat_updated(bio->bi_blkg->blkcg->css.cgroup, cpu);
+ put_cpu();
--- /dev/null
+From 9acf636215a6ce9362fe618e7da4913b8bfe84c8 Mon Sep 17 00:00:00 2001
+From: Ziyang Xuan <william.xuanziyang@huawei.com>
+Date: Sat, 9 Oct 2021 15:40:18 +0800
+Subject: can: isotp: isotp_sendmsg(): add result check for wait_event_interruptible()
+
+From: Ziyang Xuan <william.xuanziyang@huawei.com>
+
+commit 9acf636215a6ce9362fe618e7da4913b8bfe84c8 upstream.
+
+Using wait_event_interruptible() to wait for complete transmission,
+but do not check the result of wait_event_interruptible() which can be
+interrupted. It will result in TX buffer has multiple accessors and
+the later process interferes with the previous process.
+
+Following is one of the problems reported by syzbot.
+
+=============================================================
+WARNING: CPU: 0 PID: 0 at net/can/isotp.c:840 isotp_tx_timer_handler+0x2e0/0x4c0
+CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.13.0-rc7+ #68
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1 04/01/2014
+RIP: 0010:isotp_tx_timer_handler+0x2e0/0x4c0
+Call Trace:
+ <IRQ>
+ ? isotp_setsockopt+0x390/0x390
+ __hrtimer_run_queues+0xb8/0x610
+ hrtimer_run_softirq+0x91/0xd0
+ ? rcu_read_lock_sched_held+0x4d/0x80
+ __do_softirq+0xe8/0x553
+ irq_exit_rcu+0xf8/0x100
+ sysvec_apic_timer_interrupt+0x9e/0xc0
+ </IRQ>
+ asm_sysvec_apic_timer_interrupt+0x12/0x20
+
+Add result check for wait_event_interruptible() in isotp_sendmsg()
+to avoid multiple accessers for tx buffer.
+
+Fixes: e057dd3fc20f ("can: add ISO 15765-2:2016 transport protocol")
+Link: https://lore.kernel.org/all/10ca695732c9dd267c76a3c30f37aefe1ff7e32f.1633764159.git.william.xuanziyang@huawei.com
+Cc: stable@vger.kernel.org
+Reported-by: syzbot+78bab6958a614b0c80b9@syzkaller.appspotmail.com
+Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com>
+Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
+Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/can/isotp.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/net/can/isotp.c
++++ b/net/can/isotp.c
+@@ -865,7 +865,9 @@ static int isotp_sendmsg(struct socket *
+ return -EAGAIN;
+
+ /* wait for complete transmission of current pdu */
+- wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE);
++ err = wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE);
++ if (err)
++ return err;
+ }
+
+ if (!size || size > MAX_MSG_LENGTH)
--- /dev/null
+From d674a8f123b4096d85955c7eaabec688f29724c9 Mon Sep 17 00:00:00 2001
+From: Marc Kleine-Budde <mkl@pengutronix.de>
+Date: Fri, 7 May 2021 11:18:39 +0200
+Subject: can: isotp: isotp_sendmsg(): fix return error on FC timeout on TX path
+
+From: Marc Kleine-Budde <mkl@pengutronix.de>
+
+commit d674a8f123b4096d85955c7eaabec688f29724c9 upstream.
+
+When the a large chunk of data send and the receiver does not send a
+Flow Control frame back in time, the sendmsg() does not return a error
+code, but the number of bytes sent corresponding to the size of the
+packet.
+
+If a timeout occurs the isotp_tx_timer_handler() is fired, sets
+sk->sk_err and calls the sk->sk_error_report() function. It was
+wrongly expected that the error would be propagated to user space in
+every case. For isotp_sendmsg() blocking on wait_event_interruptible()
+this is not the case.
+
+This patch fixes the problem by checking if sk->sk_err is set and
+returning the error to user space.
+
+Fixes: e057dd3fc20f ("can: add ISO 15765-2:2016 transport protocol")
+Link: https://github.com/hartkopp/can-isotp/issues/42
+Link: https://github.com/hartkopp/can-isotp/pull/43
+Link: https://lore.kernel.org/all/20210507091839.1366379-1-mkl@pengutronix.de
+Cc: stable@vger.kernel.org
+Reported-by: Sottas Guillaume (LMB) <Guillaume.Sottas@liebherr.com>
+Tested-by: Oliver Hartkopp <socketcan@hartkopp.net>
+Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/can/isotp.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/net/can/isotp.c
++++ b/net/can/isotp.c
+@@ -960,6 +960,9 @@ static int isotp_sendmsg(struct socket *
+ if (wait_tx_done) {
+ /* wait for complete transmission of current pdu */
+ wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE);
++
++ if (sk->sk_err)
++ return -sk->sk_err;
+ }
+
+ return size;
--- /dev/null
+From 43a08c3bdac4cb42eff8fe5e2278bffe0c5c3daa Mon Sep 17 00:00:00 2001
+From: Ziyang Xuan <william.xuanziyang@huawei.com>
+Date: Sat, 9 Oct 2021 15:40:30 +0800
+Subject: can: isotp: isotp_sendmsg(): fix TX buffer concurrent access in isotp_sendmsg()
+
+From: Ziyang Xuan <william.xuanziyang@huawei.com>
+
+commit 43a08c3bdac4cb42eff8fe5e2278bffe0c5c3daa upstream.
+
+When isotp_sendmsg() concurrent, tx.state of all TX processes can be
+ISOTP_IDLE. The conditions so->tx.state != ISOTP_IDLE and
+wq_has_sleeper(&so->wait) can not protect TX buffer from being
+accessed by multiple TX processes.
+
+We can use cmpxchg() to try to modify tx.state to ISOTP_SENDING firstly.
+If the modification of the previous process succeed, the later process
+must wait tx.state to ISOTP_IDLE firstly. Thus, we can ensure TX buffer
+is accessed by only one process at the same time. And we should also
+restore the original tx.state at the subsequent error processes.
+
+Fixes: e057dd3fc20f ("can: add ISO 15765-2:2016 transport protocol")
+Link: https://lore.kernel.org/all/c2517874fbdf4188585cf9ddf67a8fa74d5dbde5.1633764159.git.william.xuanziyang@huawei.com
+Cc: stable@vger.kernel.org
+Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com>
+Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
+Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/can/isotp.c | 46 +++++++++++++++++++++++++++++++---------------
+ 1 file changed, 31 insertions(+), 15 deletions(-)
+
+--- a/net/can/isotp.c
++++ b/net/can/isotp.c
+@@ -121,7 +121,7 @@ enum {
+ struct tpcon {
+ int idx;
+ int len;
+- u8 state;
++ u32 state;
+ u8 bs;
+ u8 sn;
+ u8 ll_dl;
+@@ -848,6 +848,7 @@ static int isotp_sendmsg(struct socket *
+ {
+ struct sock *sk = sock->sk;
+ struct isotp_sock *so = isotp_sk(sk);
++ u32 old_state = so->tx.state;
+ struct sk_buff *skb;
+ struct net_device *dev;
+ struct canfd_frame *cf;
+@@ -860,47 +861,55 @@ static int isotp_sendmsg(struct socket *
+ return -EADDRNOTAVAIL;
+
+ /* we do not support multiple buffers - for now */
+- if (so->tx.state != ISOTP_IDLE || wq_has_sleeper(&so->wait)) {
+- if (msg->msg_flags & MSG_DONTWAIT)
+- return -EAGAIN;
++ if (cmpxchg(&so->tx.state, ISOTP_IDLE, ISOTP_SENDING) != ISOTP_IDLE ||
++ wq_has_sleeper(&so->wait)) {
++ if (msg->msg_flags & MSG_DONTWAIT) {
++ err = -EAGAIN;
++ goto err_out;
++ }
+
+ /* wait for complete transmission of current pdu */
+ err = wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE);
+ if (err)
+- return err;
++ goto err_out;
+ }
+
+- if (!size || size > MAX_MSG_LENGTH)
+- return -EINVAL;
++ if (!size || size > MAX_MSG_LENGTH) {
++ err = -EINVAL;
++ goto err_out;
++ }
+
+ /* take care of a potential SF_DL ESC offset for TX_DL > 8 */
+ off = (so->tx.ll_dl > CAN_MAX_DLEN) ? 1 : 0;
+
+ /* does the given data fit into a single frame for SF_BROADCAST? */
+ if ((so->opt.flags & CAN_ISOTP_SF_BROADCAST) &&
+- (size > so->tx.ll_dl - SF_PCI_SZ4 - ae - off))
+- return -EINVAL;
++ (size > so->tx.ll_dl - SF_PCI_SZ4 - ae - off)) {
++ err = -EINVAL;
++ goto err_out;
++ }
+
+ err = memcpy_from_msg(so->tx.buf, msg, size);
+ if (err < 0)
+- return err;
++ goto err_out;
+
+ dev = dev_get_by_index(sock_net(sk), so->ifindex);
+- if (!dev)
+- return -ENXIO;
++ if (!dev) {
++ err = -ENXIO;
++ goto err_out;
++ }
+
+ skb = sock_alloc_send_skb(sk, so->ll.mtu + sizeof(struct can_skb_priv),
+ msg->msg_flags & MSG_DONTWAIT, &err);
+ if (!skb) {
+ dev_put(dev);
+- return err;
++ goto err_out;
+ }
+
+ can_skb_reserve(skb);
+ can_skb_prv(skb)->ifindex = dev->ifindex;
+ can_skb_prv(skb)->skbcnt = 0;
+
+- so->tx.state = ISOTP_SENDING;
+ so->tx.len = size;
+ so->tx.idx = 0;
+
+@@ -956,7 +965,7 @@ static int isotp_sendmsg(struct socket *
+ if (err) {
+ pr_notice_once("can-isotp: %s: can_send_ret %pe\n",
+ __func__, ERR_PTR(err));
+- return err;
++ goto err_out;
+ }
+
+ if (wait_tx_done) {
+@@ -968,6 +977,13 @@ static int isotp_sendmsg(struct socket *
+ }
+
+ return size;
++
++err_out:
++ so->tx.state = old_state;
++ if (so->tx.state == ISOTP_IDLE)
++ wake_up_interruptible(&so->wait);
++
++ return err;
+ }
+
+ static int isotp_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
--- /dev/null
+From d9d52a3ebd284882f5562c88e55991add5d01586 Mon Sep 17 00:00:00 2001
+From: Ziyang Xuan <william.xuanziyang@huawei.com>
+Date: Sun, 26 Sep 2021 18:47:57 +0800
+Subject: can: j1939: j1939_netdev_start(): fix UAF for rx_kref of j1939_priv
+
+From: Ziyang Xuan <william.xuanziyang@huawei.com>
+
+commit d9d52a3ebd284882f5562c88e55991add5d01586 upstream.
+
+It will trigger UAF for rx_kref of j1939_priv as following.
+
+ cpu0 cpu1
+j1939_sk_bind(socket0, ndev0, ...)
+j1939_netdev_start
+ j1939_sk_bind(socket1, ndev0, ...)
+ j1939_netdev_start
+j1939_priv_set
+ j1939_priv_get_by_ndev_locked
+j1939_jsk_add
+.....
+j1939_netdev_stop
+kref_put_lock(&priv->rx_kref, ...)
+ kref_get(&priv->rx_kref, ...)
+ REFCOUNT_WARN("addition on 0;...")
+
+====================================================
+refcount_t: addition on 0; use-after-free.
+WARNING: CPU: 1 PID: 20874 at lib/refcount.c:25 refcount_warn_saturate+0x169/0x1e0
+RIP: 0010:refcount_warn_saturate+0x169/0x1e0
+Call Trace:
+ j1939_netdev_start+0x68b/0x920
+ j1939_sk_bind+0x426/0xeb0
+ ? security_socket_bind+0x83/0xb0
+
+The rx_kref's kref_get() and kref_put() should use j1939_netdev_lock to
+protect.
+
+Fixes: 9d71dd0c70099 ("can: add support of SAE J1939 protocol")
+Link: https://lore.kernel.org/all/20210926104757.2021540-1-william.xuanziyang@huawei.com
+Cc: stable@vger.kernel.org
+Reported-by: syzbot+85d9878b19c94f9019ad@syzkaller.appspotmail.com
+Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com>
+Acked-by: Oleksij Rempel <o.rempel@pengutronix.de>
+Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/can/j1939/main.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/net/can/j1939/main.c
++++ b/net/can/j1939/main.c
+@@ -249,11 +249,14 @@ struct j1939_priv *j1939_netdev_start(st
+ struct j1939_priv *priv, *priv_new;
+ int ret;
+
+- priv = j1939_priv_get_by_ndev(ndev);
++ spin_lock(&j1939_netdev_lock);
++ priv = j1939_priv_get_by_ndev_locked(ndev);
+ if (priv) {
+ kref_get(&priv->rx_kref);
++ spin_unlock(&j1939_netdev_lock);
+ return priv;
+ }
++ spin_unlock(&j1939_netdev_lock);
+
+ priv = j1939_priv_create(ndev);
+ if (!priv)
+@@ -269,10 +272,10 @@ struct j1939_priv *j1939_netdev_start(st
+ /* Someone was faster than us, use their priv and roll
+ * back our's.
+ */
++ kref_get(&priv_new->rx_kref);
+ spin_unlock(&j1939_netdev_lock);
+ dev_put(ndev);
+ kfree(priv);
+- kref_get(&priv_new->rx_kref);
+ return priv_new;
+ }
+ j1939_priv_set(ndev, priv);
--- /dev/null
+From b504a884f6b5a77dac7d580ffa08e482f70d1a30 Mon Sep 17 00:00:00 2001
+From: Ziyang Xuan <william.xuanziyang@huawei.com>
+Date: Mon, 6 Sep 2021 17:42:19 +0800
+Subject: can: j1939: j1939_tp_rxtimer(): fix errant alert in j1939_tp_rxtimer
+
+From: Ziyang Xuan <william.xuanziyang@huawei.com>
+
+commit b504a884f6b5a77dac7d580ffa08e482f70d1a30 upstream.
+
+When the session state is J1939_SESSION_DONE, j1939_tp_rxtimer() will
+give an alert "rx timeout, send abort", but do nothing actually. Move
+the alert into session active judgment condition, it is more
+reasonable.
+
+One of the scenarios is that j1939_tp_rxtimer() execute followed by
+j1939_xtp_rx_abort_one(). After j1939_xtp_rx_abort_one(), the session
+state is J1939_SESSION_DONE, then j1939_tp_rxtimer() give an alert.
+
+Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol")
+Link: https://lore.kernel.org/all/20210906094219.95924-1-william.xuanziyang@huawei.com
+Cc: stable@vger.kernel.org
+Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com>
+Acked-by: Oleksij Rempel <o.rempel@pengutronix.de>
+Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/can/j1939/transport.c | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+--- a/net/can/j1939/transport.c
++++ b/net/can/j1939/transport.c
+@@ -1230,12 +1230,11 @@ static enum hrtimer_restart j1939_tp_rxt
+ session->err = -ETIME;
+ j1939_session_deactivate(session);
+ } else {
+- netdev_alert(priv->ndev, "%s: 0x%p: rx timeout, send abort\n",
+- __func__, session);
+-
+ j1939_session_list_lock(session->priv);
+ if (session->state >= J1939_SESSION_ACTIVE &&
+ session->state < J1939_SESSION_ACTIVE_MAX) {
++ netdev_alert(priv->ndev, "%s: 0x%p: rx timeout, send abort\n",
++ __func__, session);
+ j1939_session_get(session);
+ hrtimer_start(&session->rxtimer,
+ ms_to_ktime(J1939_XTP_ABORT_TIMEOUT_MS),
--- /dev/null
+From 379743985ab6cfe2cbd32067cf4ed497baca6d06 Mon Sep 17 00:00:00 2001
+From: Zhang Changzhong <zhangchangzhong@huawei.com>
+Date: Thu, 30 Sep 2021 11:33:20 +0800
+Subject: can: j1939: j1939_xtp_rx_dat_one(): cancel session if receive TP.DT with error length
+
+From: Zhang Changzhong <zhangchangzhong@huawei.com>
+
+commit 379743985ab6cfe2cbd32067cf4ed497baca6d06 upstream.
+
+According to SAE-J1939-21, the data length of TP.DT must be 8 bytes, so
+cancel session when receive unexpected TP.DT message.
+
+Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol")
+Link: https://lore.kernel.org/all/1632972800-45091-1-git-send-email-zhangchangzhong@huawei.com
+Cc: stable@vger.kernel.org
+Signed-off-by: Zhang Changzhong <zhangchangzhong@huawei.com>
+Acked-by: Oleksij Rempel <o.rempel@pengutronix.de>
+Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/can/j1939/transport.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/net/can/j1939/transport.c
++++ b/net/can/j1939/transport.c
+@@ -1770,6 +1770,7 @@ static void j1939_xtp_rx_dpo(struct j193
+ static void j1939_xtp_rx_dat_one(struct j1939_session *session,
+ struct sk_buff *skb)
+ {
++ enum j1939_xtp_abort abort = J1939_XTP_ABORT_FAULT;
+ struct j1939_priv *priv = session->priv;
+ struct j1939_sk_buff_cb *skcb;
+ struct sk_buff *se_skb = NULL;
+@@ -1784,9 +1785,11 @@ static void j1939_xtp_rx_dat_one(struct
+
+ skcb = j1939_skb_to_cb(skb);
+ dat = skb->data;
+- if (skb->len <= 1)
++ if (skb->len != 8) {
+ /* makes no sense */
++ abort = J1939_XTP_ABORT_UNEXPECTED_DATA;
+ goto out_session_cancel;
++ }
+
+ switch (session->last_cmd) {
+ case 0xff:
+@@ -1884,7 +1887,7 @@ static void j1939_xtp_rx_dat_one(struct
+ out_session_cancel:
+ kfree_skb(se_skb);
+ j1939_session_timers_cancel(session);
+- j1939_session_cancel(session, J1939_XTP_ABORT_FAULT);
++ j1939_session_cancel(session, abort);
+ j1939_session_put(session);
+ }
+
--- /dev/null
+From a4fbe70c5cb746441d56b28cf88161d9e0e25378 Mon Sep 17 00:00:00 2001
+From: Zhang Changzhong <zhangchangzhong@huawei.com>
+Date: Thu, 14 Oct 2021 17:26:40 +0800
+Subject: can: j1939: j1939_xtp_rx_rts_session_new(): abort TP less than 9 bytes
+
+From: Zhang Changzhong <zhangchangzhong@huawei.com>
+
+commit a4fbe70c5cb746441d56b28cf88161d9e0e25378 upstream.
+
+The receiver should abort TP if 'total message size' in TP.CM_RTS and
+TP.CM_BAM is less than 9 or greater than 1785 [1], but currently the
+j1939 stack only checks the upper bound and the receiver will accept
+the following broadcast message:
+
+ vcan1 18ECFF00 [8] 20 08 00 02 FF 00 23 01
+ vcan1 18EBFF00 [8] 01 00 00 00 00 00 00 00
+ vcan1 18EBFF00 [8] 02 00 FF FF FF FF FF FF
+
+This patch adds check for the lower bound and abort illegal TP.
+
+[1] SAE-J1939-82 A.3.4 Row 2 and A.3.6 Row 6.
+
+Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol")
+Link: https://lore.kernel.org/all/1634203601-3460-1-git-send-email-zhangchangzhong@huawei.com
+Cc: stable@vger.kernel.org
+Signed-off-by: Zhang Changzhong <zhangchangzhong@huawei.com>
+Acked-by: Oleksij Rempel <o.rempel@pengutronix.de>
+Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/can/j1939/j1939-priv.h | 1 +
+ net/can/j1939/transport.c | 2 ++
+ 2 files changed, 3 insertions(+)
+
+--- a/net/can/j1939/j1939-priv.h
++++ b/net/can/j1939/j1939-priv.h
+@@ -326,6 +326,7 @@ int j1939_session_activate(struct j1939_
+ void j1939_tp_schedule_txtimer(struct j1939_session *session, int msec);
+ void j1939_session_timers_cancel(struct j1939_session *session);
+
++#define J1939_MIN_TP_PACKET_SIZE 9
+ #define J1939_MAX_TP_PACKET_SIZE (7 * 0xff)
+ #define J1939_MAX_ETP_PACKET_SIZE (7 * 0x00ffffff)
+
+--- a/net/can/j1939/transport.c
++++ b/net/can/j1939/transport.c
+@@ -1596,6 +1596,8 @@ j1939_session *j1939_xtp_rx_rts_session_
+ abort = J1939_XTP_ABORT_FAULT;
+ else if (len > priv->tp_max_packet_size)
+ abort = J1939_XTP_ABORT_RESOURCE;
++ else if (len < J1939_MIN_TP_PACKET_SIZE)
++ abort = J1939_XTP_ABORT_FAULT;
+ }
+
+ if (abort != J1939_XTP_NO_ABORT) {
--- /dev/null
+From 949fe9b35570361bc6ee2652f89a0561b26eec98 Mon Sep 17 00:00:00 2001
+From: Zheyu Ma <zheyuma97@gmail.com>
+Date: Thu, 14 Oct 2021 06:28:33 +0000
+Subject: can: peak_pci: peak_pci_remove(): fix UAF
+
+From: Zheyu Ma <zheyuma97@gmail.com>
+
+commit 949fe9b35570361bc6ee2652f89a0561b26eec98 upstream.
+
+When remove the module peek_pci, referencing 'chan' again after
+releasing 'dev' will cause UAF.
+
+Fix this by releasing 'dev' later.
+
+The following log reveals it:
+
+[ 35.961814 ] BUG: KASAN: use-after-free in peak_pci_remove+0x16f/0x270 [peak_pci]
+[ 35.963414 ] Read of size 8 at addr ffff888136998ee8 by task modprobe/5537
+[ 35.965513 ] Call Trace:
+[ 35.965718 ] dump_stack_lvl+0xa8/0xd1
+[ 35.966028 ] print_address_description+0x87/0x3b0
+[ 35.966420 ] kasan_report+0x172/0x1c0
+[ 35.966725 ] ? peak_pci_remove+0x16f/0x270 [peak_pci]
+[ 35.967137 ] ? trace_irq_enable_rcuidle+0x10/0x170
+[ 35.967529 ] ? peak_pci_remove+0x16f/0x270 [peak_pci]
+[ 35.967945 ] __asan_report_load8_noabort+0x14/0x20
+[ 35.968346 ] peak_pci_remove+0x16f/0x270 [peak_pci]
+[ 35.968752 ] pci_device_remove+0xa9/0x250
+
+Fixes: e6d9c80b7ca1 ("can: peak_pci: add support of some new PEAK-System PCI cards")
+Link: https://lore.kernel.org/all/1634192913-15639-1-git-send-email-zheyuma97@gmail.com
+Cc: stable@vger.kernel.org
+Signed-off-by: Zheyu Ma <zheyuma97@gmail.com>
+Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/can/sja1000/peak_pci.c | 9 ++++-----
+ 1 file changed, 4 insertions(+), 5 deletions(-)
+
+--- a/drivers/net/can/sja1000/peak_pci.c
++++ b/drivers/net/can/sja1000/peak_pci.c
+@@ -729,16 +729,15 @@ static void peak_pci_remove(struct pci_d
+ struct net_device *prev_dev = chan->prev_dev;
+
+ dev_info(&pdev->dev, "removing device %s\n", dev->name);
++ /* do that only for first channel */
++ if (!prev_dev && chan->pciec_card)
++ peak_pciec_remove(chan->pciec_card);
+ unregister_sja1000dev(dev);
+ free_sja1000dev(dev);
+ dev = prev_dev;
+
+- if (!dev) {
+- /* do that only for first channel */
+- if (chan->pciec_card)
+- peak_pciec_remove(chan->pciec_card);
++ if (!dev)
+ break;
+- }
+ priv = netdev_priv(dev);
+ chan = priv->priv;
+ }
--- /dev/null
+From 3d031abc7e7249573148871180c28ecedb5e27df Mon Sep 17 00:00:00 2001
+From: Stephane Grosjean <s.grosjean@peak-system.com>
+Date: Wed, 29 Sep 2021 16:21:10 +0200
+Subject: can: peak_usb: pcan_usb_fd_decode_status(): fix back to ERROR_ACTIVE state notification
+
+From: Stephane Grosjean <s.grosjean@peak-system.com>
+
+commit 3d031abc7e7249573148871180c28ecedb5e27df upstream.
+
+This corrects the lack of notification of a return to ERROR_ACTIVE
+state for USB - CANFD devices from PEAK-System.
+
+Fixes: 0a25e1f4f185 ("can: peak_usb: add support for PEAK new CANFD USB adapters")
+Link: https://lore.kernel.org/all/20210929142111.55757-1-s.grosjean@peak-system.com
+Cc: stable@vger.kernel.org
+Signed-off-by: Stephane Grosjean <s.grosjean@peak-system.com>
+Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/can/usb/peak_usb/pcan_usb_fd.c | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
++++ b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
+@@ -551,11 +551,10 @@ static int pcan_usb_fd_decode_status(str
+ } else if (sm->channel_p_w_b & PUCAN_BUS_WARNING) {
+ new_state = CAN_STATE_ERROR_WARNING;
+ } else {
+- /* no error bit (so, no error skb, back to active state) */
+- dev->can.state = CAN_STATE_ERROR_ACTIVE;
++ /* back to (or still in) ERROR_ACTIVE state */
++ new_state = CAN_STATE_ERROR_ACTIVE;
+ pdev->bec.txerr = 0;
+ pdev->bec.rxerr = 0;
+- return 0;
+ }
+
+ /* state hasn't changed */
--- /dev/null
+From f7c05c3987dcfde9a4e8c2d533db013fabebca0d Mon Sep 17 00:00:00 2001
+From: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
+Date: Fri, 24 Sep 2021 16:55:56 +0900
+Subject: can: rcar_can: fix suspend/resume
+
+From: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
+
+commit f7c05c3987dcfde9a4e8c2d533db013fabebca0d upstream.
+
+If the driver was not opened, rcar_can_suspend() should not call
+clk_disable() because the clock was not enabled.
+
+Fixes: fd1159318e55 ("can: add Renesas R-Car CAN driver")
+Link: https://lore.kernel.org/all/20210924075556.223685-1-yoshihiro.shimoda.uh@renesas.com
+Cc: stable@vger.kernel.org
+Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
+Tested-by: Ayumi Nakamichi <ayumi.nakamichi.kf@renesas.com>
+Reviewed-by: Ulrich Hecht <uli+renesas@fpond.eu>
+Tested-by: Biju Das <biju.das.jz@bp.renesas.com>
+Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/can/rcar/rcar_can.c | 20 ++++++++++++--------
+ 1 file changed, 12 insertions(+), 8 deletions(-)
+
+--- a/drivers/net/can/rcar/rcar_can.c
++++ b/drivers/net/can/rcar/rcar_can.c
+@@ -846,10 +846,12 @@ static int __maybe_unused rcar_can_suspe
+ struct rcar_can_priv *priv = netdev_priv(ndev);
+ u16 ctlr;
+
+- if (netif_running(ndev)) {
+- netif_stop_queue(ndev);
+- netif_device_detach(ndev);
+- }
++ if (!netif_running(ndev))
++ return 0;
++
++ netif_stop_queue(ndev);
++ netif_device_detach(ndev);
++
+ ctlr = readw(&priv->regs->ctlr);
+ ctlr |= RCAR_CAN_CTLR_CANM_HALT;
+ writew(ctlr, &priv->regs->ctlr);
+@@ -868,6 +870,9 @@ static int __maybe_unused rcar_can_resum
+ u16 ctlr;
+ int err;
+
++ if (!netif_running(ndev))
++ return 0;
++
+ err = clk_enable(priv->clk);
+ if (err) {
+ netdev_err(ndev, "clk_enable() failed, error %d\n", err);
+@@ -881,10 +886,9 @@ static int __maybe_unused rcar_can_resum
+ writew(ctlr, &priv->regs->ctlr);
+ priv->can.state = CAN_STATE_ERROR_ACTIVE;
+
+- if (netif_running(ndev)) {
+- netif_device_attach(ndev);
+- netif_start_queue(ndev);
+- }
++ netif_device_attach(ndev);
++ netif_start_queue(ndev);
++
+ return 0;
+ }
+
--- /dev/null
+From 1bd85aa65d0e7b5e4d09240f492f37c569fdd431 Mon Sep 17 00:00:00 2001
+From: Jeff Layton <jlayton@kernel.org>
+Date: Thu, 7 Oct 2021 14:19:49 -0400
+Subject: ceph: fix handling of "meta" errors
+
+From: Jeff Layton <jlayton@kernel.org>
+
+commit 1bd85aa65d0e7b5e4d09240f492f37c569fdd431 upstream.
+
+Currently, we check the wb_err too early for directories, before all of
+the unsafe child requests have been waited on. In order to fix that we
+need to check the mapping->wb_err later nearer to the end of ceph_fsync.
+
+We also have an overly-complex method for tracking errors after
+blocklisting. The errors recorded in cleanup_session_requests go to a
+completely separate field in the inode, but we end up reporting them the
+same way we would for any other error (in fsync).
+
+There's no real benefit to tracking these errors in two different
+places, since the only reporting mechanism for them is in fsync, and
+we'd need to advance them both every time.
+
+Given that, we can just remove i_meta_err, and convert the places that
+used it to instead just use mapping->wb_err instead. That also fixes
+the original problem by ensuring that we do a check_and_advance of the
+wb_err at the end of the fsync op.
+
+Cc: stable@vger.kernel.org
+URL: https://tracker.ceph.com/issues/52864
+Reported-by: Patrick Donnelly <pdonnell@redhat.com>
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Reviewed-by: Xiubo Li <xiubli@redhat.com>
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ceph/caps.c | 12 +++---------
+ fs/ceph/file.c | 1 -
+ fs/ceph/inode.c | 2 --
+ fs/ceph/mds_client.c | 17 +++++------------
+ fs/ceph/super.h | 3 ---
+ 5 files changed, 8 insertions(+), 27 deletions(-)
+
+--- a/fs/ceph/caps.c
++++ b/fs/ceph/caps.c
+@@ -2264,7 +2264,6 @@ static int unsafe_request_wait(struct in
+
+ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync)
+ {
+- struct ceph_file_info *fi = file->private_data;
+ struct inode *inode = file->f_mapping->host;
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ u64 flush_tid;
+@@ -2299,14 +2298,9 @@ int ceph_fsync(struct file *file, loff_t
+ if (err < 0)
+ ret = err;
+
+- if (errseq_check(&ci->i_meta_err, READ_ONCE(fi->meta_err))) {
+- spin_lock(&file->f_lock);
+- err = errseq_check_and_advance(&ci->i_meta_err,
+- &fi->meta_err);
+- spin_unlock(&file->f_lock);
+- if (err < 0)
+- ret = err;
+- }
++ err = file_check_and_advance_wb_err(file);
++ if (err < 0)
++ ret = err;
+ out:
+ dout("fsync %p%s result=%d\n", inode, datasync ? " datasync" : "", ret);
+ return ret;
+--- a/fs/ceph/file.c
++++ b/fs/ceph/file.c
+@@ -233,7 +233,6 @@ static int ceph_init_file_info(struct in
+
+ spin_lock_init(&fi->rw_contexts_lock);
+ INIT_LIST_HEAD(&fi->rw_contexts);
+- fi->meta_err = errseq_sample(&ci->i_meta_err);
+ fi->filp_gen = READ_ONCE(ceph_inode_to_client(inode)->filp_gen);
+
+ return 0;
+--- a/fs/ceph/inode.c
++++ b/fs/ceph/inode.c
+@@ -541,8 +541,6 @@ struct inode *ceph_alloc_inode(struct su
+
+ ceph_fscache_inode_init(ci);
+
+- ci->i_meta_err = 0;
+-
+ return &ci->vfs_inode;
+ }
+
+--- a/fs/ceph/mds_client.c
++++ b/fs/ceph/mds_client.c
+@@ -1479,7 +1479,6 @@ static void cleanup_session_requests(str
+ {
+ struct ceph_mds_request *req;
+ struct rb_node *p;
+- struct ceph_inode_info *ci;
+
+ dout("cleanup_session_requests mds%d\n", session->s_mds);
+ mutex_lock(&mdsc->mutex);
+@@ -1488,16 +1487,10 @@ static void cleanup_session_requests(str
+ struct ceph_mds_request, r_unsafe_item);
+ pr_warn_ratelimited(" dropping unsafe request %llu\n",
+ req->r_tid);
+- if (req->r_target_inode) {
+- /* dropping unsafe change of inode's attributes */
+- ci = ceph_inode(req->r_target_inode);
+- errseq_set(&ci->i_meta_err, -EIO);
+- }
+- if (req->r_unsafe_dir) {
+- /* dropping unsafe directory operation */
+- ci = ceph_inode(req->r_unsafe_dir);
+- errseq_set(&ci->i_meta_err, -EIO);
+- }
++ if (req->r_target_inode)
++ mapping_set_error(req->r_target_inode->i_mapping, -EIO);
++ if (req->r_unsafe_dir)
++ mapping_set_error(req->r_unsafe_dir->i_mapping, -EIO);
+ __unregister_request(mdsc, req);
+ }
+ /* zero r_attempts, so kick_requests() will re-send requests */
+@@ -1664,7 +1657,7 @@ static int remove_session_caps_cb(struct
+ spin_unlock(&mdsc->cap_dirty_lock);
+
+ if (dirty_dropped) {
+- errseq_set(&ci->i_meta_err, -EIO);
++ mapping_set_error(inode->i_mapping, -EIO);
+
+ if (ci->i_wrbuffer_ref_head == 0 &&
+ ci->i_wr_ref == 0 &&
+--- a/fs/ceph/super.h
++++ b/fs/ceph/super.h
+@@ -430,8 +430,6 @@ struct ceph_inode_info {
+ #ifdef CONFIG_CEPH_FSCACHE
+ struct fscache_cookie *fscache;
+ #endif
+- errseq_t i_meta_err;
+-
+ struct inode vfs_inode; /* at end */
+ };
+
+@@ -775,7 +773,6 @@ struct ceph_file_info {
+ spinlock_t rw_contexts_lock;
+ struct list_head rw_contexts;
+
+- errseq_t meta_err;
+ u32 filp_gen;
+ atomic_t num_locks;
+ };
--- /dev/null
+From 98d0a6fb7303a6f4a120b8b8ed05b86ff5db53e8 Mon Sep 17 00:00:00 2001
+From: Jeff Layton <jlayton@kernel.org>
+Date: Thu, 30 Sep 2021 08:33:13 -0400
+Subject: ceph: skip existing superblocks that are blocklisted or shut down when mounting
+
+From: Jeff Layton <jlayton@kernel.org>
+
+commit 98d0a6fb7303a6f4a120b8b8ed05b86ff5db53e8 upstream.
+
+Currently when mounting, we may end up finding an existing superblock
+that corresponds to a blocklisted MDS client. This means that the new
+mount ends up being unusable.
+
+If we've found an existing superblock with a client that is already
+blocklisted, and the client is not configured to recover on its own,
+fail the match. Ditto if the superblock has been forcibly unmounted.
+
+While we're in here, also rename "other" to the more conventional "fsc".
+
+Cc: stable@vger.kernel.org
+URL: https://bugzilla.redhat.com/show_bug.cgi?id=1901499
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Reviewed-by: Xiubo Li <xiubli@redhat.com>
+Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ceph/super.c | 17 ++++++++++++++---
+ 1 file changed, 14 insertions(+), 3 deletions(-)
+
+--- a/fs/ceph/super.c
++++ b/fs/ceph/super.c
+@@ -1002,16 +1002,16 @@ static int ceph_compare_super(struct sup
+ struct ceph_fs_client *new = fc->s_fs_info;
+ struct ceph_mount_options *fsopt = new->mount_options;
+ struct ceph_options *opt = new->client->options;
+- struct ceph_fs_client *other = ceph_sb_to_client(sb);
++ struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
+
+ dout("ceph_compare_super %p\n", sb);
+
+- if (compare_mount_options(fsopt, opt, other)) {
++ if (compare_mount_options(fsopt, opt, fsc)) {
+ dout("monitor(s)/mount options don't match\n");
+ return 0;
+ }
+ if ((opt->flags & CEPH_OPT_FSID) &&
+- ceph_fsid_compare(&opt->fsid, &other->client->fsid)) {
++ ceph_fsid_compare(&opt->fsid, &fsc->client->fsid)) {
+ dout("fsid doesn't match\n");
+ return 0;
+ }
+@@ -1019,6 +1019,17 @@ static int ceph_compare_super(struct sup
+ dout("flags differ\n");
+ return 0;
+ }
++
++ if (fsc->blocklisted && !ceph_test_mount_opt(fsc, CLEANRECOVER)) {
++ dout("client is blocklisted (and CLEANRECOVER is not set)\n");
++ return 0;
++ }
++
++ if (fsc->mount_state == CEPH_MOUNT_SHUTDOWN) {
++ dout("client has been forcibly unmounted\n");
++ return 0;
++ }
++
+ return 1;
+ }
+
--- /dev/null
+From b0e901280d9860a0a35055f220e8e457f300f40a Mon Sep 17 00:00:00 2001
+From: Lukas Bulwahn <lukas.bulwahn@gmail.com>
+Date: Mon, 18 Oct 2021 15:16:09 -0700
+Subject: elfcore: correct reference to CONFIG_UML
+
+From: Lukas Bulwahn <lukas.bulwahn@gmail.com>
+
+commit b0e901280d9860a0a35055f220e8e457f300f40a upstream.
+
+Commit 6e7b64b9dd6d ("elfcore: fix building with clang") introduces
+special handling for two architectures, ia64 and User Mode Linux.
+However, the wrong name, i.e., CONFIG_UM, for the intended Kconfig
+symbol for User-Mode Linux was used.
+
+Although the directory for User Mode Linux is ./arch/um; the Kconfig
+symbol for this architecture is called CONFIG_UML.
+
+Luckily, ./scripts/checkkconfigsymbols.py warns on non-existing configs:
+
+ UM
+ Referencing files: include/linux/elfcore.h
+ Similar symbols: UML, NUMA
+
+Correct the name of the config to the intended one.
+
+[akpm@linux-foundation.org: fix um/x86_64, per Catalin]
+ Link: https://lkml.kernel.org/r/20211006181119.2851441-1-catalin.marinas@arm.com
+ Link: https://lkml.kernel.org/r/YV6pejGzLy5ppEpt@arm.com
+
+Link: https://lkml.kernel.org/r/20211006082209.417-1-lukas.bulwahn@gmail.com
+Fixes: 6e7b64b9dd6d ("elfcore: fix building with clang")
+Signed-off-by: Lukas Bulwahn <lukas.bulwahn@gmail.com>
+Cc: Arnd Bergmann <arnd@arndb.de>
+Cc: Nathan Chancellor <nathan@kernel.org>
+Cc: Nick Desaulniers <ndesaulniers@google.com>
+Cc: Catalin Marinas <catalin.marinas@arm.com>
+Cc: Barret Rhoden <brho@google.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/elfcore.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/include/linux/elfcore.h
++++ b/include/linux/elfcore.h
+@@ -109,7 +109,7 @@ static inline int elf_core_copy_task_fpr
+ #endif
+ }
+
+-#if defined(CONFIG_UM) || defined(CONFIG_IA64)
++#if (defined(CONFIG_UML) && defined(CONFIG_X86_32)) || defined(CONFIG_IA64)
+ /*
+ * These functions parameterize elf_core_dump in fs/binfmt_elf.c to write out
+ * extra segments containing the gate DSO contents. Dumping its
--- /dev/null
+From 6d2aec9e123bb9c49cb5c7fc654f25f81e688e8c Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 18 Oct 2021 15:15:49 -0700
+Subject: mm/mempolicy: do not allow illegal MPOL_F_NUMA_BALANCING | MPOL_LOCAL in mbind()
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit 6d2aec9e123bb9c49cb5c7fc654f25f81e688e8c upstream.
+
+syzbot reported access to unitialized memory in mbind() [1]
+
+Issue came with commit bda420b98505 ("numa balancing: migrate on fault
+among multiple bound nodes")
+
+This commit added a new bit in MPOL_MODE_FLAGS, but only checked valid
+combination (MPOL_F_NUMA_BALANCING can only be used with MPOL_BIND) in
+do_set_mempolicy()
+
+This patch moves the check in sanitize_mpol_flags() so that it is also
+used by mbind()
+
+ [1]
+ BUG: KMSAN: uninit-value in __mpol_equal+0x567/0x590 mm/mempolicy.c:2260
+ __mpol_equal+0x567/0x590 mm/mempolicy.c:2260
+ mpol_equal include/linux/mempolicy.h:105 [inline]
+ vma_merge+0x4a1/0x1e60 mm/mmap.c:1190
+ mbind_range+0xcc8/0x1e80 mm/mempolicy.c:811
+ do_mbind+0xf42/0x15f0 mm/mempolicy.c:1333
+ kernel_mbind mm/mempolicy.c:1483 [inline]
+ __do_sys_mbind mm/mempolicy.c:1490 [inline]
+ __se_sys_mbind+0x437/0xb80 mm/mempolicy.c:1486
+ __x64_sys_mbind+0x19d/0x200 mm/mempolicy.c:1486
+ do_syscall_x64 arch/x86/entry/common.c:51 [inline]
+ do_syscall_64+0x54/0xd0 arch/x86/entry/common.c:82
+ entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+ Uninit was created at:
+ slab_alloc_node mm/slub.c:3221 [inline]
+ slab_alloc mm/slub.c:3230 [inline]
+ kmem_cache_alloc+0x751/0xff0 mm/slub.c:3235
+ mpol_new mm/mempolicy.c:293 [inline]
+ do_mbind+0x912/0x15f0 mm/mempolicy.c:1289
+ kernel_mbind mm/mempolicy.c:1483 [inline]
+ __do_sys_mbind mm/mempolicy.c:1490 [inline]
+ __se_sys_mbind+0x437/0xb80 mm/mempolicy.c:1486
+ __x64_sys_mbind+0x19d/0x200 mm/mempolicy.c:1486
+ do_syscall_x64 arch/x86/entry/common.c:51 [inline]
+ do_syscall_64+0x54/0xd0 arch/x86/entry/common.c:82
+ entry_SYSCALL_64_after_hwframe+0x44/0xae
+ =====================================================
+ Kernel panic - not syncing: panic_on_kmsan set ...
+ CPU: 0 PID: 15049 Comm: syz-executor.0 Tainted: G B 5.15.0-rc2-syzkaller #0
+ Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+ Call Trace:
+ __dump_stack lib/dump_stack.c:88 [inline]
+ dump_stack_lvl+0x1ff/0x28e lib/dump_stack.c:106
+ dump_stack+0x25/0x28 lib/dump_stack.c:113
+ panic+0x44f/0xdeb kernel/panic.c:232
+ kmsan_report+0x2ee/0x300 mm/kmsan/report.c:186
+ __msan_warning+0xd7/0x150 mm/kmsan/instrumentation.c:208
+ __mpol_equal+0x567/0x590 mm/mempolicy.c:2260
+ mpol_equal include/linux/mempolicy.h:105 [inline]
+ vma_merge+0x4a1/0x1e60 mm/mmap.c:1190
+ mbind_range+0xcc8/0x1e80 mm/mempolicy.c:811
+ do_mbind+0xf42/0x15f0 mm/mempolicy.c:1333
+ kernel_mbind mm/mempolicy.c:1483 [inline]
+ __do_sys_mbind mm/mempolicy.c:1490 [inline]
+ __se_sys_mbind+0x437/0xb80 mm/mempolicy.c:1486
+ __x64_sys_mbind+0x19d/0x200 mm/mempolicy.c:1486
+ do_syscall_x64 arch/x86/entry/common.c:51 [inline]
+ do_syscall_64+0x54/0xd0 arch/x86/entry/common.c:82
+ entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+Link: https://lkml.kernel.org/r/20211001215630.810592-1-eric.dumazet@gmail.com
+Fixes: bda420b98505 ("numa balancing: migrate on fault among multiple bound nodes")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Acked-by: Mel Gorman <mgorman@suse.de>
+Cc: "Huang, Ying" <ying.huang@intel.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/mempolicy.c | 16 +++++-----------
+ 1 file changed, 5 insertions(+), 11 deletions(-)
+
+--- a/mm/mempolicy.c
++++ b/mm/mempolicy.c
+@@ -857,16 +857,6 @@ static long do_set_mempolicy(unsigned sh
+ goto out;
+ }
+
+- if (flags & MPOL_F_NUMA_BALANCING) {
+- if (new && new->mode == MPOL_BIND) {
+- new->flags |= (MPOL_F_MOF | MPOL_F_MORON);
+- } else {
+- ret = -EINVAL;
+- mpol_put(new);
+- goto out;
+- }
+- }
+-
+ ret = mpol_set_nodemask(new, nodes, scratch);
+ if (ret) {
+ mpol_put(new);
+@@ -1450,7 +1440,11 @@ static inline int sanitize_mpol_flags(in
+ return -EINVAL;
+ if ((*flags & MPOL_F_STATIC_NODES) && (*flags & MPOL_F_RELATIVE_NODES))
+ return -EINVAL;
+-
++ if (*flags & MPOL_F_NUMA_BALANCING) {
++ if (*mode != MPOL_BIND)
++ return -EINVAL;
++ *flags |= (MPOL_F_MOF | MPOL_F_MORON);
++ }
+ return 0;
+ }
+
--- /dev/null
+From 79f9bc5843142b649575f887dccdf1c07ad75c20 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Mon, 18 Oct 2021 15:16:16 -0700
+Subject: mm/secretmem: fix NULL page->mapping dereference in page_is_secretmem()
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 79f9bc5843142b649575f887dccdf1c07ad75c20 upstream.
+
+Check for a NULL page->mapping before dereferencing the mapping in
+page_is_secretmem(), as the page's mapping can be nullified while gup()
+is running, e.g. by reclaim or truncation.
+
+ BUG: kernel NULL pointer dereference, address: 0000000000000068
+ #PF: supervisor read access in kernel mode
+ #PF: error_code(0x0000) - not-present page
+ PGD 0 P4D 0
+ Oops: 0000 [#1] PREEMPT SMP NOPTI
+ CPU: 6 PID: 4173897 Comm: CPU 3/KVM Tainted: G W
+ RIP: 0010:internal_get_user_pages_fast+0x621/0x9d0
+ Code: <48> 81 7a 68 80 08 04 bc 0f 85 21 ff ff 8 89 c7 be
+ RSP: 0018:ffffaa90087679b0 EFLAGS: 00010046
+ RAX: ffffe3f37905b900 RBX: 00007f2dd561e000 RCX: ffffe3f37905b934
+ RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffe3f37905b900
+ ...
+ CR2: 0000000000000068 CR3: 00000004c5898003 CR4: 00000000001726e0
+ Call Trace:
+ get_user_pages_fast_only+0x13/0x20
+ hva_to_pfn+0xa9/0x3e0
+ try_async_pf+0xa1/0x270
+ direct_page_fault+0x113/0xad0
+ kvm_mmu_page_fault+0x69/0x680
+ vmx_handle_exit+0xe1/0x5d0
+ kvm_arch_vcpu_ioctl_run+0xd81/0x1c70
+ kvm_vcpu_ioctl+0x267/0x670
+ __x64_sys_ioctl+0x83/0xa0
+ do_syscall_64+0x56/0x80
+ entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+Link: https://lkml.kernel.org/r/20211007231502.3552715-1-seanjc@google.com
+Fixes: 1507f51255c9 ("mm: introduce memfd_secret system call to create "secret" memory areas")
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Reported-by: Darrick J. Wong <djwong@kernel.org>
+Reported-by: Stephen <stephenackerman16@gmail.com>
+Tested-by: Darrick J. Wong <djwong@kernel.org>
+Reviewed-by: David Hildenbrand <david@redhat.com>
+Reviewed-by: Mike Rapoport <rppt@linux.ibm.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/secretmem.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/include/linux/secretmem.h
++++ b/include/linux/secretmem.h
+@@ -23,7 +23,7 @@ static inline bool page_is_secretmem(str
+ mapping = (struct address_space *)
+ ((unsigned long)page->mapping & ~PAGE_MAPPING_FLAGS);
+
+- if (mapping != page->mapping)
++ if (!mapping || mapping != page->mapping)
+ return false;
+
+ return mapping->a_ops == &secretmem_aops;
--- /dev/null
+From 8913970c19915bbe773d97d42989cd85b7fdc098 Mon Sep 17 00:00:00 2001
+From: Peter Xu <peterx@redhat.com>
+Date: Mon, 18 Oct 2021 15:15:22 -0700
+Subject: mm/userfaultfd: selftests: fix memory corruption with thp enabled
+
+From: Peter Xu <peterx@redhat.com>
+
+commit 8913970c19915bbe773d97d42989cd85b7fdc098 upstream.
+
+In RHEL's gating selftests we've encountered memory corruption in the
+uffd event test even with upstream kernel:
+
+ # ./userfaultfd anon 128 4
+ nr_pages: 32768, nr_pages_per_cpu: 32768
+ bounces: 3, mode: rnd racing read, userfaults: 6240 missing (6240) 14729 wp (14729)
+ bounces: 2, mode: racing read, userfaults: 1444 missing (1444) 28877 wp (28877)
+ bounces: 1, mode: rnd read, userfaults: 6055 missing (6055) 14699 wp (14699)
+ bounces: 0, mode: read, userfaults: 82 missing (82) 25196 wp (25196)
+ testing uffd-wp with pagemap (pgsize=4096): done
+ testing uffd-wp with pagemap (pgsize=2097152): done
+ testing events (fork, remap, remove): ERROR: nr 32427 memory corruption 0 1 (errno=0, line=963)
+ ERROR: faulting process failed (errno=0, line=1117)
+
+It can be easily reproduced when global thp enabled, which is the
+default for RHEL.
+
+It's also known as a side effect of commit 0db282ba2c12 ("selftest: use
+mmap instead of posix_memalign to allocate memory", 2021-07-23), which
+is imho right itself on using mmap() to make sure the addresses will be
+untagged even on arm.
+
+The problem is, for each test we allocate buffers using two
+allocate_area() calls. We assumed these two buffers won't affect each
+other, however they could, because mmap() could have found that the two
+buffers are near each other and having the same VMA flags, so they got
+merged into one VMA.
+
+It won't be a big problem if thp is not enabled, but when thp is
+agressively enabled it means when initializing the src buffer it could
+accidentally setup part of the dest buffer too when there's a shared THP
+that overlaps the two regions. Then some of the dest buffer won't be
+able to be trapped by userfaultfd missing mode, then it'll cause memory
+corruption as described.
+
+To fix it, do release_pages() after initializing the src buffer.
+
+Since the previous two release_pages() calls are after
+uffd_test_ctx_clear() which will unmap all the buffers anyway (which is
+stronger than release pages; as unmap() also tear town pgtables), drop
+them as they shouldn't really be anything useful.
+
+We can mark the Fixes tag upon 0db282ba2c12 as it's reported to only
+happen there, however the real "Fixes" IMHO should be 8ba6e8640844, as
+before that commit we'll always do explicit release_pages() before
+registration of uffd, and 8ba6e8640844 changed that logic by adding
+extra unmap/map and we didn't release the pages at the right place.
+Meanwhile I don't have a solid glue anyway on whether posix_memalign()
+could always avoid triggering this bug, hence it's safer to attach this
+fix to commit 8ba6e8640844.
+
+Link: https://lkml.kernel.org/r/20210923232512.210092-1-peterx@redhat.com
+Fixes: 8ba6e8640844 ("userfaultfd/selftests: reinitialize test context in each test")
+Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1994931
+Signed-off-by: Peter Xu <peterx@redhat.com>
+Reported-by: Li Wang <liwan@redhat.com>
+Tested-by: Li Wang <liwang@redhat.com>
+Reviewed-by: Axel Rasmussen <axelrasmussen@google.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Nadav Amit <nadav.amit@gmail.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/vm/userfaultfd.c | 23 ++++++++++++++++++++---
+ 1 file changed, 20 insertions(+), 3 deletions(-)
+
+--- a/tools/testing/selftests/vm/userfaultfd.c
++++ b/tools/testing/selftests/vm/userfaultfd.c
+@@ -414,9 +414,6 @@ static void uffd_test_ctx_init_ext(uint6
+ uffd_test_ops->allocate_area((void **)&area_src);
+ uffd_test_ops->allocate_area((void **)&area_dst);
+
+- uffd_test_ops->release_pages(area_src);
+- uffd_test_ops->release_pages(area_dst);
+-
+ userfaultfd_open(features);
+
+ count_verify = malloc(nr_pages * sizeof(unsigned long long));
+@@ -437,6 +434,26 @@ static void uffd_test_ctx_init_ext(uint6
+ *(area_count(area_src, nr) + 1) = 1;
+ }
+
++ /*
++ * After initialization of area_src, we must explicitly release pages
++ * for area_dst to make sure it's fully empty. Otherwise we could have
++ * some area_dst pages be errornously initialized with zero pages,
++ * hence we could hit memory corruption later in the test.
++ *
++ * One example is when THP is globally enabled, above allocate_area()
++ * calls could have the two areas merged into a single VMA (as they
++ * will have the same VMA flags so they're mergeable). When we
++ * initialize the area_src above, it's possible that some part of
++ * area_dst could have been faulted in via one huge THP that will be
++ * shared between area_src and area_dst. It could cause some of the
++ * area_dst won't be trapped by missing userfaults.
++ *
++ * This release_pages() will guarantee even if that happened, we'll
++ * proactively split the thp and drop any accidentally initialized
++ * pages within area_dst.
++ */
++ uffd_test_ops->release_pages(area_dst);
++
+ pipefd = malloc(sizeof(int) * nr_cpus * 2);
+ if (!pipefd)
+ err("pipefd");
--- /dev/null
+From 342afce10d6f61c443c95e244f812d4766f73f53 Mon Sep 17 00:00:00 2001
+From: DENG Qingfang <dqfext@gmail.com>
+Date: Sat, 16 Oct 2021 14:24:14 +0800
+Subject: net: dsa: mt7530: correct ds->num_ports
+
+From: DENG Qingfang <dqfext@gmail.com>
+
+commit 342afce10d6f61c443c95e244f812d4766f73f53 upstream.
+
+Setting ds->num_ports to DSA_MAX_PORTS made DSA core allocate unnecessary
+dsa_port's and call mt7530_port_disable for non-existent ports.
+
+Set it to MT7530_NUM_PORTS to fix that, and dsa_is_user_port check in
+port_enable/disable is no longer required.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: DENG Qingfang <dqfext@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/dsa/mt7530.c | 8 +-------
+ 1 file changed, 1 insertion(+), 7 deletions(-)
+
+--- a/drivers/net/dsa/mt7530.c
++++ b/drivers/net/dsa/mt7530.c
+@@ -1031,9 +1031,6 @@ mt7530_port_enable(struct dsa_switch *ds
+ {
+ struct mt7530_priv *priv = ds->priv;
+
+- if (!dsa_is_user_port(ds, port))
+- return 0;
+-
+ mutex_lock(&priv->reg_mutex);
+
+ /* Allow the user port gets connected to the cpu port and also
+@@ -1056,9 +1053,6 @@ mt7530_port_disable(struct dsa_switch *d
+ {
+ struct mt7530_priv *priv = ds->priv;
+
+- if (!dsa_is_user_port(ds, port))
+- return;
+-
+ mutex_lock(&priv->reg_mutex);
+
+ /* Clear up all port matrix which could be restored in the next
+@@ -3132,7 +3126,7 @@ mt7530_probe(struct mdio_device *mdiodev
+ return -ENOMEM;
+
+ priv->ds->dev = &mdiodev->dev;
+- priv->ds->num_ports = DSA_MAX_PORTS;
++ priv->ds->num_ports = MT7530_NUM_PORTS;
+
+ /* Use medatek,mcm property to distinguish hardware type that would
+ * casues a little bit differences on power-on sequence.
--- /dev/null
+From 5314454ea3ff6fc746eaf71b9a7ceebed52888fa Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Mon, 18 Oct 2021 15:15:39 -0700
+Subject: ocfs2: fix data corruption after conversion from inline format
+
+From: Jan Kara <jack@suse.cz>
+
+commit 5314454ea3ff6fc746eaf71b9a7ceebed52888fa upstream.
+
+Commit 6dbf7bb55598 ("fs: Don't invalidate page buffers in
+block_write_full_page()") uncovered a latent bug in ocfs2 conversion
+from inline inode format to a normal inode format.
+
+The code in ocfs2_convert_inline_data_to_extents() attempts to zero out
+the whole cluster allocated for file data by grabbing, zeroing, and
+dirtying all pages covering this cluster. However these pages are
+beyond i_size, thus writeback code generally ignores these dirty pages
+and no blocks were ever actually zeroed on the disk.
+
+This oversight was fixed by commit 693c241a5f6a ("ocfs2: No need to zero
+pages past i_size.") for standard ocfs2 write path, inline conversion
+path was apparently forgotten; the commit log also has a reasoning why
+the zeroing actually is not needed.
+
+After commit 6dbf7bb55598, things became worse as writeback code stopped
+invalidating buffers on pages beyond i_size and thus these pages end up
+with clean PageDirty bit but with buffers attached to these pages being
+still dirty. So when a file is converted from inline format, then
+writeback triggers, and then the file is grown so that these pages
+become valid, the invalid dirtiness state is preserved,
+mark_buffer_dirty() does nothing on these pages (buffers are already
+dirty) but page is never written back because it is clean. So data
+written to these pages is lost once pages are reclaimed.
+
+Simple reproducer for the problem is:
+
+ xfs_io -f -c "pwrite 0 2000" -c "pwrite 2000 2000" -c "fsync" \
+ -c "pwrite 4000 2000" ocfs2_file
+
+After unmounting and mounting the fs again, you can observe that end of
+'ocfs2_file' has lost its contents.
+
+Fix the problem by not doing the pointless zeroing during conversion
+from inline format similarly as in the standard write path.
+
+[akpm@linux-foundation.org: fix whitespace, per Joseph]
+
+Link: https://lkml.kernel.org/r/20210930095405.21433-1-jack@suse.cz
+Fixes: 6dbf7bb55598 ("fs: Don't invalidate page buffers in block_write_full_page()")
+Signed-off-by: Jan Kara <jack@suse.cz>
+Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com>
+Tested-by: Joseph Qi <joseph.qi@linux.alibaba.com>
+Acked-by: Gang He <ghe@suse.com>
+Cc: Mark Fasheh <mark@fasheh.com>
+Cc: Joel Becker <jlbec@evilplan.org>
+Cc: Junxiao Bi <junxiao.bi@oracle.com>
+Cc: Changwei Ge <gechangwei@live.cn>
+Cc: Jun Piao <piaojun@huawei.com>
+Cc: "Markov, Andrey" <Markov.Andrey@Dell.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ocfs2/alloc.c | 46 ++++++++++++----------------------------------
+ 1 file changed, 12 insertions(+), 34 deletions(-)
+
+--- a/fs/ocfs2/alloc.c
++++ b/fs/ocfs2/alloc.c
+@@ -7045,7 +7045,7 @@ void ocfs2_set_inode_data_inline(struct
+ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
+ struct buffer_head *di_bh)
+ {
+- int ret, i, has_data, num_pages = 0;
++ int ret, has_data, num_pages = 0;
+ int need_free = 0;
+ u32 bit_off, num;
+ handle_t *handle;
+@@ -7054,26 +7054,17 @@ int ocfs2_convert_inline_data_to_extents
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
+ struct ocfs2_alloc_context *data_ac = NULL;
+- struct page **pages = NULL;
+- loff_t end = osb->s_clustersize;
++ struct page *page = NULL;
+ struct ocfs2_extent_tree et;
+ int did_quota = 0;
+
+ has_data = i_size_read(inode) ? 1 : 0;
+
+ if (has_data) {
+- pages = kcalloc(ocfs2_pages_per_cluster(osb->sb),
+- sizeof(struct page *), GFP_NOFS);
+- if (pages == NULL) {
+- ret = -ENOMEM;
+- mlog_errno(ret);
+- return ret;
+- }
+-
+ ret = ocfs2_reserve_clusters(osb, 1, &data_ac);
+ if (ret) {
+ mlog_errno(ret);
+- goto free_pages;
++ goto out;
+ }
+ }
+
+@@ -7093,7 +7084,8 @@ int ocfs2_convert_inline_data_to_extents
+ }
+
+ if (has_data) {
+- unsigned int page_end;
++ unsigned int page_end = min_t(unsigned, PAGE_SIZE,
++ osb->s_clustersize);
+ u64 phys;
+
+ ret = dquot_alloc_space_nodirty(inode,
+@@ -7117,15 +7109,8 @@ int ocfs2_convert_inline_data_to_extents
+ */
+ block = phys = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);
+
+- /*
+- * Non sparse file systems zero on extend, so no need
+- * to do that now.
+- */
+- if (!ocfs2_sparse_alloc(osb) &&
+- PAGE_SIZE < osb->s_clustersize)
+- end = PAGE_SIZE;
+-
+- ret = ocfs2_grab_eof_pages(inode, 0, end, pages, &num_pages);
++ ret = ocfs2_grab_eof_pages(inode, 0, page_end, &page,
++ &num_pages);
+ if (ret) {
+ mlog_errno(ret);
+ need_free = 1;
+@@ -7136,20 +7121,15 @@ int ocfs2_convert_inline_data_to_extents
+ * This should populate the 1st page for us and mark
+ * it up to date.
+ */
+- ret = ocfs2_read_inline_data(inode, pages[0], di_bh);
++ ret = ocfs2_read_inline_data(inode, page, di_bh);
+ if (ret) {
+ mlog_errno(ret);
+ need_free = 1;
+ goto out_unlock;
+ }
+
+- page_end = PAGE_SIZE;
+- if (PAGE_SIZE > osb->s_clustersize)
+- page_end = osb->s_clustersize;
+-
+- for (i = 0; i < num_pages; i++)
+- ocfs2_map_and_dirty_page(inode, handle, 0, page_end,
+- pages[i], i > 0, &phys);
++ ocfs2_map_and_dirty_page(inode, handle, 0, page_end, page, 0,
++ &phys);
+ }
+
+ spin_lock(&oi->ip_lock);
+@@ -7180,8 +7160,8 @@ int ocfs2_convert_inline_data_to_extents
+ }
+
+ out_unlock:
+- if (pages)
+- ocfs2_unlock_and_free_pages(pages, num_pages);
++ if (page)
++ ocfs2_unlock_and_free_pages(&page, num_pages);
+
+ out_commit:
+ if (ret < 0 && did_quota)
+@@ -7205,8 +7185,6 @@ out_commit:
+ out:
+ if (data_ac)
+ ocfs2_free_alloc_context(data_ac);
+-free_pages:
+- kfree(pages);
+ return ret;
+ }
+
--- /dev/null
+From b15fa9224e6e1239414525d8d556d824701849fc Mon Sep 17 00:00:00 2001
+From: Valentin Vidic <vvidic@valentin-vidic.from.hr>
+Date: Mon, 18 Oct 2021 15:15:42 -0700
+Subject: ocfs2: mount fails with buffer overflow in strlen
+
+From: Valentin Vidic <vvidic@valentin-vidic.from.hr>
+
+commit b15fa9224e6e1239414525d8d556d824701849fc upstream.
+
+Starting with kernel 5.11 built with CONFIG_FORTIFY_SOURCE mouting an
+ocfs2 filesystem with either o2cb or pcmk cluster stack fails with the
+trace below. Problem seems to be that strings for cluster stack and
+cluster name are not guaranteed to be null terminated in the disk
+representation, while strlcpy assumes that the source string is always
+null terminated. This causes a read outside of the source string
+triggering the buffer overflow detection.
+
+ detected buffer overflow in strlen
+ ------------[ cut here ]------------
+ kernel BUG at lib/string.c:1149!
+ invalid opcode: 0000 [#1] SMP PTI
+ CPU: 1 PID: 910 Comm: mount.ocfs2 Not tainted 5.14.0-1-amd64 #1
+ Debian 5.14.6-2
+ RIP: 0010:fortify_panic+0xf/0x11
+ ...
+ Call Trace:
+ ocfs2_initialize_super.isra.0.cold+0xc/0x18 [ocfs2]
+ ocfs2_fill_super+0x359/0x19b0 [ocfs2]
+ mount_bdev+0x185/0x1b0
+ legacy_get_tree+0x27/0x40
+ vfs_get_tree+0x25/0xb0
+ path_mount+0x454/0xa20
+ __x64_sys_mount+0x103/0x140
+ do_syscall_64+0x3b/0xc0
+ entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+Link: https://lkml.kernel.org/r/20210929180654.32460-1-vvidic@valentin-vidic.from.hr
+Signed-off-by: Valentin Vidic <vvidic@valentin-vidic.from.hr>
+Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com>
+Cc: Mark Fasheh <mark@fasheh.com>
+Cc: Joel Becker <jlbec@evilplan.org>
+Cc: Junxiao Bi <junxiao.bi@oracle.com>
+Cc: Changwei Ge <gechangwei@live.cn>
+Cc: Gang He <ghe@suse.com>
+Cc: Jun Piao <piaojun@huawei.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ocfs2/super.c | 14 ++++++++++----
+ 1 file changed, 10 insertions(+), 4 deletions(-)
+
+--- a/fs/ocfs2/super.c
++++ b/fs/ocfs2/super.c
+@@ -2167,11 +2167,17 @@ static int ocfs2_initialize_super(struct
+ }
+
+ if (ocfs2_clusterinfo_valid(osb)) {
++ /*
++ * ci_stack and ci_cluster in ocfs2_cluster_info may not be null
++ * terminated, so make sure no overflow happens here by using
++ * memcpy. Destination strings will always be null terminated
++ * because osb is allocated using kzalloc.
++ */
+ osb->osb_stackflags =
+ OCFS2_RAW_SB(di)->s_cluster_info.ci_stackflags;
+- strlcpy(osb->osb_cluster_stack,
++ memcpy(osb->osb_cluster_stack,
+ OCFS2_RAW_SB(di)->s_cluster_info.ci_stack,
+- OCFS2_STACK_LABEL_LEN + 1);
++ OCFS2_STACK_LABEL_LEN);
+ if (strlen(osb->osb_cluster_stack) != OCFS2_STACK_LABEL_LEN) {
+ mlog(ML_ERROR,
+ "couldn't mount because of an invalid "
+@@ -2180,9 +2186,9 @@ static int ocfs2_initialize_super(struct
+ status = -EINVAL;
+ goto bail;
+ }
+- strlcpy(osb->osb_cluster_name,
++ memcpy(osb->osb_cluster_name,
+ OCFS2_RAW_SB(di)->s_cluster_info.ci_cluster,
+- OCFS2_CLUSTER_NAME_LEN + 1);
++ OCFS2_CLUSTER_NAME_LEN);
+ } else {
+ /* The empty string is identical with classic tools that
+ * don't know about s_cluster_info. */
drm-kmb-enable-adv-bridge-after-modeset.patch
net-enetc-fix-ethtool-counter-name-for-pm0_terr.patch
net-enetc-make-sure-all-traffic-classes-can-send-lar.patch
+can-rcar_can-fix-suspend-resume.patch
+can-peak_usb-pcan_usb_fd_decode_status-fix-back-to-error_active-state-notification.patch
+can-peak_pci-peak_pci_remove-fix-uaf.patch
+can-isotp-isotp_sendmsg-fix-return-error-on-fc-timeout-on-tx-path.patch
+can-isotp-isotp_sendmsg-add-result-check-for-wait_event_interruptible.patch
+can-isotp-isotp_sendmsg-fix-tx-buffer-concurrent-access-in-isotp_sendmsg.patch
+can-j1939-j1939_tp_rxtimer-fix-errant-alert-in-j1939_tp_rxtimer.patch
+can-j1939-j1939_netdev_start-fix-uaf-for-rx_kref-of-j1939_priv.patch
+can-j1939-j1939_xtp_rx_dat_one-cancel-session-if-receive-tp.dt-with-error-length.patch
+can-j1939-j1939_xtp_rx_rts_session_new-abort-tp-less-than-9-bytes.patch
+ceph-skip-existing-superblocks-that-are-blocklisted-or-shut-down-when-mounting.patch
+ceph-fix-handling-of-meta-errors.patch
+tracing-have-all-levels-of-checks-prevent-recursion.patch
+ocfs2-fix-data-corruption-after-conversion-from-inline-format.patch
+ocfs2-mount-fails-with-buffer-overflow-in-strlen.patch
+mm-userfaultfd-selftests-fix-memory-corruption-with-thp-enabled.patch
+userfaultfd-fix-a-race-between-writeprotect-and-exit_mmap.patch
+mm-mempolicy-do-not-allow-illegal-mpol_f_numa_balancing-mpol_local-in-mbind.patch
+elfcore-correct-reference-to-config_uml.patch
+vfs-check-fd-has-read-access-in-kernel_read_file_from_fd.patch
+mm-secretmem-fix-null-page-mapping-dereference-in-page_is_secretmem.patch
+alsa-usb-audio-provide-quirk-for-sennheiser-gsp670-headset.patch
+alsa-hda-realtek-add-quirk-for-clevo-pc50hs.patch
+asoc-dapm-fix-missing-kctl-change-notifications.patch
+asoc-nau8824-fix-headphone-vs-headset-button-press-detection-no-longer-working.patch
+blk-cgroup-blk_cgroup_bio_start-should-use-irq-safe-operations-on-blkg-iostat_cpu.patch
+audit-fix-possible-null-pointer-dereference-in-audit_filter_rules.patch
+net-dsa-mt7530-correct-ds-num_ports.patch
+ucounts-move-get_ucounts-from-cred_alloc_blank-to-key_change_session_keyring.patch
+ucounts-pair-inc_rlimit_ucounts-with-dec_rlimit_ucoutns-in-commit_creds.patch
+ucounts-proper-error-handling-in-set_cred_ucounts.patch
+ucounts-fix-signal-ucount-refcounting.patch
--- /dev/null
+From ed65df63a39a3f6ed04f7258de8b6789e5021c18 Mon Sep 17 00:00:00 2001
+From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
+Date: Mon, 18 Oct 2021 15:44:12 -0400
+Subject: tracing: Have all levels of checks prevent recursion
+
+From: Steven Rostedt (VMware) <rostedt@goodmis.org>
+
+commit ed65df63a39a3f6ed04f7258de8b6789e5021c18 upstream.
+
+While writing an email explaining the "bit = 0" logic for a discussion on
+making ftrace_test_recursion_trylock() disable preemption, I discovered a
+path that makes the "not do the logic if bit is zero" unsafe.
+
+The recursion logic is done in hot paths like the function tracer. Thus,
+any code executed causes noticeable overhead. Thus, tricks are done to try
+to limit the amount of code executed. This included the recursion testing
+logic.
+
+Having recursion testing is important, as there are many paths that can
+end up in an infinite recursion cycle when tracing every function in the
+kernel. Thus protection is needed to prevent that from happening.
+
+Because it is OK to recurse due to different running context levels (e.g.
+an interrupt preempts a trace, and then a trace occurs in the interrupt
+handler), a set of bits are used to know which context one is in (normal,
+softirq, irq and NMI). If a recursion occurs in the same level, it is
+prevented*.
+
+Then there are infrastructure levels of recursion as well. When more than
+one callback is attached to the same function to trace, it calls a loop
+function to iterate over all the callbacks. Both the callbacks and the
+loop function have recursion protection. The callbacks use the
+"ftrace_test_recursion_trylock()" which has a "function" set of context
+bits to test, and the loop function calls the internal
+trace_test_and_set_recursion() directly, with an "internal" set of bits.
+
+If an architecture does not implement all the features supported by ftrace
+then the callbacks are never called directly, and the loop function is
+called instead, which will implement the features of ftrace.
+
+Since both the loop function and the callbacks do recursion protection, it
+was seemed unnecessary to do it in both locations. Thus, a trick was made
+to have the internal set of recursion bits at a more significant bit
+location than the function bits. Then, if any of the higher bits were set,
+the logic of the function bits could be skipped, as any new recursion
+would first have to go through the loop function.
+
+This is true for architectures that do not support all the ftrace
+features, because all functions being traced must first go through the
+loop function before going to the callbacks. But this is not true for
+architectures that support all the ftrace features. That's because the
+loop function could be called due to two callbacks attached to the same
+function, but then a recursion function inside the callback could be
+called that does not share any other callback, and it will be called
+directly.
+
+i.e.
+
+ traced_function_1: [ more than one callback tracing it ]
+ call loop_func
+
+ loop_func:
+ trace_recursion set internal bit
+ call callback
+
+ callback:
+ trace_recursion [ skipped because internal bit is set, return 0 ]
+ call traced_function_2
+
+ traced_function_2: [ only traced by above callback ]
+ call callback
+
+ callback:
+ trace_recursion [ skipped because internal bit is set, return 0 ]
+ call traced_function_2
+
+ [ wash, rinse, repeat, BOOM! out of shampoo! ]
+
+Thus, the "bit == 0 skip" trick is not safe, unless the loop function is
+call for all functions.
+
+Since we want to encourage architectures to implement all ftrace features,
+having them slow down due to this extra logic may encourage the
+maintainers to update to the latest ftrace features. And because this
+logic is only safe for them, remove it completely.
+
+ [*] There is on layer of recursion that is allowed, and that is to allow
+ for the transition between interrupt context (normal -> softirq ->
+ irq -> NMI), because a trace may occur before the context update is
+ visible to the trace recursion logic.
+
+Link: https://lore.kernel.org/all/609b565a-ed6e-a1da-f025-166691b5d994@linux.alibaba.com/
+Link: https://lkml.kernel.org/r/20211018154412.09fcad3c@gandalf.local.home
+
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Petr Mladek <pmladek@suse.com>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: "James E.J. Bottomley" <James.Bottomley@hansenpartnership.com>
+Cc: Helge Deller <deller@gmx.de>
+Cc: Michael Ellerman <mpe@ellerman.id.au>
+Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Cc: Paul Mackerras <paulus@samba.org>
+Cc: Paul Walmsley <paul.walmsley@sifive.com>
+Cc: Palmer Dabbelt <palmer@dabbelt.com>
+Cc: Albert Ou <aou@eecs.berkeley.edu>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: "H. Peter Anvin" <hpa@zytor.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Jiri Kosina <jikos@kernel.org>
+Cc: Miroslav Benes <mbenes@suse.cz>
+Cc: Joe Lawrence <joe.lawrence@redhat.com>
+Cc: Colin Ian King <colin.king@canonical.com>
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: "Peter Zijlstra (Intel)" <peterz@infradead.org>
+Cc: Nicholas Piggin <npiggin@gmail.com>
+Cc: Jisheng Zhang <jszhang@kernel.org>
+Cc: =?utf-8?b?546L6LSH?= <yun.wang@linux.alibaba.com>
+Cc: Guo Ren <guoren@kernel.org>
+Cc: stable@vger.kernel.org
+Fixes: edc15cafcbfa3 ("tracing: Avoid unnecessary multiple recursion checks")
+Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/trace_recursion.h | 49 +++++++---------------------------------
+ kernel/trace/ftrace.c | 4 +--
+ 2 files changed, 11 insertions(+), 42 deletions(-)
+
+--- a/include/linux/trace_recursion.h
++++ b/include/linux/trace_recursion.h
+@@ -16,23 +16,8 @@
+ * When function tracing occurs, the following steps are made:
+ * If arch does not support a ftrace feature:
+ * call internal function (uses INTERNAL bits) which calls...
+- * If callback is registered to the "global" list, the list
+- * function is called and recursion checks the GLOBAL bits.
+- * then this function calls...
+ * The function callback, which can use the FTRACE bits to
+ * check for recursion.
+- *
+- * Now if the arch does not support a feature, and it calls
+- * the global list function which calls the ftrace callback
+- * all three of these steps will do a recursion protection.
+- * There's no reason to do one if the previous caller already
+- * did. The recursion that we are protecting against will
+- * go through the same steps again.
+- *
+- * To prevent the multiple recursion checks, if a recursion
+- * bit is set that is higher than the MAX bit of the current
+- * check, then we know that the check was made by the previous
+- * caller, and we can skip the current check.
+ */
+ enum {
+ /* Function recursion bits */
+@@ -40,12 +25,14 @@ enum {
+ TRACE_FTRACE_NMI_BIT,
+ TRACE_FTRACE_IRQ_BIT,
+ TRACE_FTRACE_SIRQ_BIT,
++ TRACE_FTRACE_TRANSITION_BIT,
+
+- /* INTERNAL_BITs must be greater than FTRACE_BITs */
++ /* Internal use recursion bits */
+ TRACE_INTERNAL_BIT,
+ TRACE_INTERNAL_NMI_BIT,
+ TRACE_INTERNAL_IRQ_BIT,
+ TRACE_INTERNAL_SIRQ_BIT,
++ TRACE_INTERNAL_TRANSITION_BIT,
+
+ TRACE_BRANCH_BIT,
+ /*
+@@ -86,12 +73,6 @@ enum {
+ */
+ TRACE_GRAPH_NOTRACE_BIT,
+
+- /*
+- * When transitioning between context, the preempt_count() may
+- * not be correct. Allow for a single recursion to cover this case.
+- */
+- TRACE_TRANSITION_BIT,
+-
+ /* Used to prevent recursion recording from recursing. */
+ TRACE_RECORD_RECURSION_BIT,
+ };
+@@ -113,12 +94,10 @@ enum {
+ #define TRACE_CONTEXT_BITS 4
+
+ #define TRACE_FTRACE_START TRACE_FTRACE_BIT
+-#define TRACE_FTRACE_MAX ((1 << (TRACE_FTRACE_START + TRACE_CONTEXT_BITS)) - 1)
+
+ #define TRACE_LIST_START TRACE_INTERNAL_BIT
+-#define TRACE_LIST_MAX ((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1)
+
+-#define TRACE_CONTEXT_MASK TRACE_LIST_MAX
++#define TRACE_CONTEXT_MASK ((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1)
+
+ /*
+ * Used for setting context
+@@ -132,6 +111,7 @@ enum {
+ TRACE_CTX_IRQ,
+ TRACE_CTX_SOFTIRQ,
+ TRACE_CTX_NORMAL,
++ TRACE_CTX_TRANSITION,
+ };
+
+ static __always_inline int trace_get_context_bit(void)
+@@ -160,45 +140,34 @@ extern void ftrace_record_recursion(unsi
+ #endif
+
+ static __always_inline int trace_test_and_set_recursion(unsigned long ip, unsigned long pip,
+- int start, int max)
++ int start)
+ {
+ unsigned int val = READ_ONCE(current->trace_recursion);
+ int bit;
+
+- /* A previous recursion check was made */
+- if ((val & TRACE_CONTEXT_MASK) > max)
+- return 0;
+-
+ bit = trace_get_context_bit() + start;
+ if (unlikely(val & (1 << bit))) {
+ /*
+ * It could be that preempt_count has not been updated during
+ * a switch between contexts. Allow for a single recursion.
+ */
+- bit = TRACE_TRANSITION_BIT;
++ bit = TRACE_CTX_TRANSITION + start;
+ if (val & (1 << bit)) {
+ do_ftrace_record_recursion(ip, pip);
+ return -1;
+ }
+- } else {
+- /* Normal check passed, clear the transition to allow it again */
+- val &= ~(1 << TRACE_TRANSITION_BIT);
+ }
+
+ val |= 1 << bit;
+ current->trace_recursion = val;
+ barrier();
+
+- return bit + 1;
++ return bit;
+ }
+
+ static __always_inline void trace_clear_recursion(int bit)
+ {
+- if (!bit)
+- return;
+-
+ barrier();
+- bit--;
+ trace_recursion_clear(bit);
+ }
+
+@@ -214,7 +183,7 @@ static __always_inline void trace_clear_
+ static __always_inline int ftrace_test_recursion_trylock(unsigned long ip,
+ unsigned long parent_ip)
+ {
+- return trace_test_and_set_recursion(ip, parent_ip, TRACE_FTRACE_START, TRACE_FTRACE_MAX);
++ return trace_test_and_set_recursion(ip, parent_ip, TRACE_FTRACE_START);
+ }
+
+ /**
+--- a/kernel/trace/ftrace.c
++++ b/kernel/trace/ftrace.c
+@@ -6977,7 +6977,7 @@ __ftrace_ops_list_func(unsigned long ip,
+ struct ftrace_ops *op;
+ int bit;
+
+- bit = trace_test_and_set_recursion(ip, parent_ip, TRACE_LIST_START, TRACE_LIST_MAX);
++ bit = trace_test_and_set_recursion(ip, parent_ip, TRACE_LIST_START);
+ if (bit < 0)
+ return;
+
+@@ -7052,7 +7052,7 @@ static void ftrace_ops_assist_func(unsig
+ {
+ int bit;
+
+- bit = trace_test_and_set_recursion(ip, parent_ip, TRACE_LIST_START, TRACE_LIST_MAX);
++ bit = trace_test_and_set_recursion(ip, parent_ip, TRACE_LIST_START);
+ if (bit < 0)
+ return;
+
--- /dev/null
+From 15bc01effefe97757ef02ca09e9d1b927ab22725 Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Sat, 16 Oct 2021 15:59:49 -0500
+Subject: ucounts: Fix signal ucount refcounting
+
+From: Eric W. Biederman <ebiederm@xmission.com>
+
+commit 15bc01effefe97757ef02ca09e9d1b927ab22725 upstream.
+
+In commit fda31c50292a ("signal: avoid double atomic counter
+increments for user accounting") Linus made a clever optimization to
+how rlimits and the struct user_struct. Unfortunately that
+optimization does not work in the obvious way when moved to nested
+rlimits. The problem is that the last decrement of the per user
+namespace per user sigpending counter might also be the last decrement
+of the sigpending counter in the parent user namespace as well. Which
+means that simply freeing the leaf ucount in __free_sigqueue is not
+enough.
+
+Maintain the optimization and handle the tricky cases by introducing
+inc_rlimit_get_ucounts and dec_rlimit_put_ucounts.
+
+By moving the entire optimization into functions that perform all of
+the work it becomes possible to ensure that every level is handled
+properly.
+
+The new function inc_rlimit_get_ucounts returns 0 on failure to
+increment the ucount. This is different than inc_rlimit_ucounts which
+increments the ucounts and returns LONG_MAX if the ucount counter has
+exceeded it's maximum or it wrapped (to indicate the counter needs to
+decremented).
+
+I wish we had a single user to account all pending signals to across
+all of the threads of a process so this complexity was not necessary
+
+Cc: stable@vger.kernel.org
+Fixes: d64696905554 ("Reimplement RLIMIT_SIGPENDING on top of ucounts")
+v1: https://lkml.kernel.org/r/87mtnavszx.fsf_-_@disp2133
+Link: https://lkml.kernel.org/r/87fssytizw.fsf_-_@disp2133
+Reviewed-by: Alexey Gladkov <legion@kernel.org>
+Tested-by: Rune Kleveland <rune.kleveland@infomedia.dk>
+Tested-by: Yu Zhao <yuzhao@google.com>
+Tested-by: Jordan Glover <Golden_Miller83@protonmail.ch>
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/user_namespace.h | 2 +
+ kernel/signal.c | 25 +++++---------------
+ kernel/ucount.c | 49 +++++++++++++++++++++++++++++++++++++++++
+ 3 files changed, 57 insertions(+), 19 deletions(-)
+
+--- a/include/linux/user_namespace.h
++++ b/include/linux/user_namespace.h
+@@ -127,6 +127,8 @@ static inline long get_ucounts_value(str
+
+ long inc_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v);
+ bool dec_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v);
++long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum ucount_type type);
++void dec_rlimit_put_ucounts(struct ucounts *ucounts, enum ucount_type type);
+ bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsigned long max);
+
+ static inline void set_rlimit_ucount_max(struct user_namespace *ns,
+--- a/kernel/signal.c
++++ b/kernel/signal.c
+@@ -425,22 +425,10 @@ __sigqueue_alloc(int sig, struct task_st
+ */
+ rcu_read_lock();
+ ucounts = task_ucounts(t);
+- sigpending = inc_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING, 1);
+- switch (sigpending) {
+- case 1:
+- if (likely(get_ucounts(ucounts)))
+- break;
+- fallthrough;
+- case LONG_MAX:
+- /*
+- * we need to decrease the ucount in the userns tree on any
+- * failure to avoid counts leaking.
+- */
+- dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING, 1);
+- rcu_read_unlock();
+- return NULL;
+- }
++ sigpending = inc_rlimit_get_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING);
+ rcu_read_unlock();
++ if (!sigpending)
++ return NULL;
+
+ if (override_rlimit || likely(sigpending <= task_rlimit(t, RLIMIT_SIGPENDING))) {
+ q = kmem_cache_alloc(sigqueue_cachep, gfp_flags);
+@@ -449,8 +437,7 @@ __sigqueue_alloc(int sig, struct task_st
+ }
+
+ if (unlikely(q == NULL)) {
+- if (dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING, 1))
+- put_ucounts(ucounts);
++ dec_rlimit_put_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING);
+ } else {
+ INIT_LIST_HEAD(&q->list);
+ q->flags = sigqueue_flags;
+@@ -463,8 +450,8 @@ static void __sigqueue_free(struct sigqu
+ {
+ if (q->flags & SIGQUEUE_PREALLOC)
+ return;
+- if (q->ucounts && dec_rlimit_ucounts(q->ucounts, UCOUNT_RLIMIT_SIGPENDING, 1)) {
+- put_ucounts(q->ucounts);
++ if (q->ucounts) {
++ dec_rlimit_put_ucounts(q->ucounts, UCOUNT_RLIMIT_SIGPENDING);
+ q->ucounts = NULL;
+ }
+ kmem_cache_free(sigqueue_cachep, q);
+--- a/kernel/ucount.c
++++ b/kernel/ucount.c
+@@ -284,6 +284,55 @@ bool dec_rlimit_ucounts(struct ucounts *
+ return (new == 0);
+ }
+
++static void do_dec_rlimit_put_ucounts(struct ucounts *ucounts,
++ struct ucounts *last, enum ucount_type type)
++{
++ struct ucounts *iter, *next;
++ for (iter = ucounts; iter != last; iter = next) {
++ long dec = atomic_long_add_return(-1, &iter->ucount[type]);
++ WARN_ON_ONCE(dec < 0);
++ next = iter->ns->ucounts;
++ if (dec == 0)
++ put_ucounts(iter);
++ }
++}
++
++void dec_rlimit_put_ucounts(struct ucounts *ucounts, enum ucount_type type)
++{
++ do_dec_rlimit_put_ucounts(ucounts, NULL, type);
++}
++
++long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum ucount_type type)
++{
++ /* Caller must hold a reference to ucounts */
++ struct ucounts *iter;
++ long dec, ret = 0;
++
++ for (iter = ucounts; iter; iter = iter->ns->ucounts) {
++ long max = READ_ONCE(iter->ns->ucount_max[type]);
++ long new = atomic_long_add_return(1, &iter->ucount[type]);
++ if (new < 0 || new > max)
++ goto unwind;
++ if (iter == ucounts)
++ ret = new;
++ /*
++ * Grab an extra ucount reference for the caller when
++ * the rlimit count was previously 0.
++ */
++ if (new != 1)
++ continue;
++ if (!get_ucounts(iter))
++ goto dec_unwind;
++ }
++ return ret;
++dec_unwind:
++ dec = atomic_long_add_return(-1, &iter->ucount[type]);
++ WARN_ON_ONCE(dec < 0);
++unwind:
++ do_dec_rlimit_put_ucounts(ucounts, iter, type);
++ return 0;
++}
++
+ bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsigned long max)
+ {
+ struct ucounts *iter;
--- /dev/null
+From 5ebcbe342b1c12fae44b4f83cbeae1520e09857e Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Sat, 16 Oct 2021 12:17:30 -0500
+Subject: ucounts: Move get_ucounts from cred_alloc_blank to key_change_session_keyring
+
+From: Eric W. Biederman <ebiederm@xmission.com>
+
+commit 5ebcbe342b1c12fae44b4f83cbeae1520e09857e upstream.
+
+Setting cred->ucounts in cred_alloc_blank does not make sense. The
+uid and user_ns are deliberately not set in cred_alloc_blank but
+instead the setting is delayed until key_change_session_keyring.
+
+So move dealing with ucounts into key_change_session_keyring as well.
+
+Unfortunately that movement of get_ucounts adds a new failure mode to
+key_change_session_keyring. I do not see anything stopping the parent
+process from calling setuid and changing the relevant part of it's
+cred while keyctl_session_to_parent is running making it fundamentally
+necessary to call get_ucounts in key_change_session_keyring. Which
+means that the new failure mode cannot be avoided.
+
+A failure of key_change_session_keyring results in a single threaded
+parent keeping it's existing credentials. Which results in the parent
+process not being able to access the session keyring and whichever
+keys are in the new keyring.
+
+Further get_ucounts is only expected to fail if the number of bits in
+the refernece count for the structure is too few.
+
+Since the code has no other way to report the failure of get_ucounts
+and because such failures are not expected to be common add a WARN_ONCE
+to report this problem to userspace.
+
+Between the WARN_ONCE and the parent process not having access to
+the keys in the new session keyring I expect any failure of get_ucounts
+will be noticed and reported and we can find another way to handle this
+condition. (Possibly by just making ucounts->count an atomic_long_t).
+
+Cc: stable@vger.kernel.org
+Fixes: 905ae01c4ae2 ("Add a reference to ucounts for each cred")
+Link: https://lkml.kernel.org/r/7k0ias0uf.fsf_-_@disp2133
+Tested-by: Yu Zhao <yuzhao@google.com>
+Reviewed-by: Alexey Gladkov <legion@kernel.org>
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/cred.c | 2 --
+ security/keys/process_keys.c | 8 ++++++++
+ 2 files changed, 8 insertions(+), 2 deletions(-)
+
+--- a/kernel/cred.c
++++ b/kernel/cred.c
+@@ -225,8 +225,6 @@ struct cred *cred_alloc_blank(void)
+ #ifdef CONFIG_DEBUG_CREDENTIALS
+ new->magic = CRED_MAGIC;
+ #endif
+- new->ucounts = get_ucounts(&init_ucounts);
+-
+ if (security_cred_alloc_blank(new, GFP_KERNEL_ACCOUNT) < 0)
+ goto error;
+
+--- a/security/keys/process_keys.c
++++ b/security/keys/process_keys.c
+@@ -918,6 +918,13 @@ void key_change_session_keyring(struct c
+ return;
+ }
+
++ /* If get_ucounts fails more bits are needed in the refcount */
++ if (unlikely(!get_ucounts(old->ucounts))) {
++ WARN_ONCE(1, "In %s get_ucounts failed\n", __func__);
++ put_cred(new);
++ return;
++ }
++
+ new-> uid = old-> uid;
+ new-> euid = old-> euid;
+ new-> suid = old-> suid;
+@@ -927,6 +934,7 @@ void key_change_session_keyring(struct c
+ new-> sgid = old-> sgid;
+ new->fsgid = old->fsgid;
+ new->user = get_uid(old->user);
++ new->ucounts = old->ucounts;
+ new->user_ns = get_user_ns(old->user_ns);
+ new->group_info = get_group_info(old->group_info);
+
--- /dev/null
+From 629715adc62b0ad27ab04d0aa73a71927f886910 Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Sat, 16 Oct 2021 12:30:00 -0500
+Subject: ucounts: Pair inc_rlimit_ucounts with dec_rlimit_ucoutns in commit_creds
+
+From: Eric W. Biederman <ebiederm@xmission.com>
+
+commit 629715adc62b0ad27ab04d0aa73a71927f886910 upstream.
+
+The purpose of inc_rlimit_ucounts and dec_rlimit_ucounts in commit_creds
+is to change which rlimit counter is used to track a process when the
+credentials changes.
+
+Use the same test for both to guarantee the tracking is correct.
+
+Cc: stable@vger.kernel.org
+Fixes: 21d1c5e386bc ("Reimplement RLIMIT_NPROC on top of ucounts")
+Link: https://lkml.kernel.org/r/87v91us0w4.fsf_-_@disp2133
+Tested-by: Yu Zhao <yuzhao@google.com>
+Reviewed-by: Alexey Gladkov <legion@kernel.org>
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/cred.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/cred.c
++++ b/kernel/cred.c
+@@ -499,7 +499,7 @@ int commit_creds(struct cred *new)
+ inc_rlimit_ucounts(new->ucounts, UCOUNT_RLIMIT_NPROC, 1);
+ rcu_assign_pointer(task->real_cred, new);
+ rcu_assign_pointer(task->cred, new);
+- if (new->user != old->user)
++ if (new->user != old->user || new->user_ns != old->user_ns)
+ dec_rlimit_ucounts(old->ucounts, UCOUNT_RLIMIT_NPROC, 1);
+ alter_cred_subscribers(old, -2);
+
--- /dev/null
+From 34dc2fd6e6908499b669c7b45320cddf38b332e1 Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Sat, 16 Oct 2021 12:47:51 -0500
+Subject: ucounts: Proper error handling in set_cred_ucounts
+
+From: Eric W. Biederman <ebiederm@xmission.com>
+
+commit 34dc2fd6e6908499b669c7b45320cddf38b332e1 upstream.
+
+Instead of leaking the ucounts in new if alloc_ucounts fails, store
+the result of alloc_ucounts into a temporary variable, which is later
+assigned to new->ucounts.
+
+Cc: stable@vger.kernel.org
+Fixes: 905ae01c4ae2 ("Add a reference to ucounts for each cred")
+Link: https://lkml.kernel.org/r/87pms2s0v8.fsf_-_@disp2133
+Tested-by: Yu Zhao <yuzhao@google.com>
+Reviewed-by: Alexey Gladkov <legion@kernel.org>
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/cred.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/kernel/cred.c
++++ b/kernel/cred.c
+@@ -667,7 +667,7 @@ int set_cred_ucounts(struct cred *new)
+ {
+ struct task_struct *task = current;
+ const struct cred *old = task->real_cred;
+- struct ucounts *old_ucounts = new->ucounts;
++ struct ucounts *new_ucounts, *old_ucounts = new->ucounts;
+
+ if (new->user == old->user && new->user_ns == old->user_ns)
+ return 0;
+@@ -679,9 +679,10 @@ int set_cred_ucounts(struct cred *new)
+ if (old_ucounts && old_ucounts->ns == new->user_ns && uid_eq(old_ucounts->uid, new->euid))
+ return 0;
+
+- if (!(new->ucounts = alloc_ucounts(new->user_ns, new->euid)))
++ if (!(new_ucounts = alloc_ucounts(new->user_ns, new->euid)))
+ return -EAGAIN;
+
++ new->ucounts = new_ucounts;
+ if (old_ucounts)
+ put_ucounts(old_ucounts);
+
--- /dev/null
+From cb185d5f1ebf900f4ae3bf84cee212e6dd035aca Mon Sep 17 00:00:00 2001
+From: Nadav Amit <namit@vmware.com>
+Date: Mon, 18 Oct 2021 15:15:25 -0700
+Subject: userfaultfd: fix a race between writeprotect and exit_mmap()
+
+From: Nadav Amit <namit@vmware.com>
+
+commit cb185d5f1ebf900f4ae3bf84cee212e6dd035aca upstream.
+
+A race is possible when a process exits, its VMAs are removed by
+exit_mmap() and at the same time userfaultfd_writeprotect() is called.
+
+The race was detected by KASAN on a development kernel, but it appears
+to be possible on vanilla kernels as well.
+
+Use mmget_not_zero() to prevent the race as done in other userfaultfd
+operations.
+
+Link: https://lkml.kernel.org/r/20210921200247.25749-1-namit@vmware.com
+Fixes: 63b2d4174c4ad ("userfaultfd: wp: add the writeprotect API to userfaultfd ioctl")
+Signed-off-by: Nadav Amit <namit@vmware.com>
+Tested-by: Li Wang <liwang@redhat.com>
+Reviewed-by: Peter Xu <peterx@redhat.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/userfaultfd.c | 12 +++++++++---
+ 1 file changed, 9 insertions(+), 3 deletions(-)
+
+--- a/fs/userfaultfd.c
++++ b/fs/userfaultfd.c
+@@ -1826,9 +1826,15 @@ static int userfaultfd_writeprotect(stru
+ if (mode_wp && mode_dontwake)
+ return -EINVAL;
+
+- ret = mwriteprotect_range(ctx->mm, uffdio_wp.range.start,
+- uffdio_wp.range.len, mode_wp,
+- &ctx->mmap_changing);
++ if (mmget_not_zero(ctx->mm)) {
++ ret = mwriteprotect_range(ctx->mm, uffdio_wp.range.start,
++ uffdio_wp.range.len, mode_wp,
++ &ctx->mmap_changing);
++ mmput(ctx->mm);
++ } else {
++ return -ESRCH;
++ }
++
+ if (ret)
+ return ret;
+
--- /dev/null
+From 032146cda85566abcd1c4884d9d23e4e30a07e9a Mon Sep 17 00:00:00 2001
+From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
+Date: Mon, 18 Oct 2021 15:16:12 -0700
+Subject: vfs: check fd has read access in kernel_read_file_from_fd()
+
+From: Matthew Wilcox (Oracle) <willy@infradead.org>
+
+commit 032146cda85566abcd1c4884d9d23e4e30a07e9a upstream.
+
+If we open a file without read access and then pass the fd to a syscall
+whose implementation calls kernel_read_file_from_fd(), we get a warning
+from __kernel_read():
+
+ if (WARN_ON_ONCE(!(file->f_mode & FMODE_READ)))
+
+This currently affects both finit_module() and kexec_file_load(), but it
+could affect other syscalls in the future.
+
+Link: https://lkml.kernel.org/r/20211007220110.600005-1-willy@infradead.org
+Fixes: b844f0ecbc56 ("vfs: define kernel_copy_file_from_fd()")
+Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
+Reported-by: Hao Sun <sunhao.th@gmail.com>
+Reviewed-by: Kees Cook <keescook@chromium.org>
+Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: Mimi Zohar <zohar@linux.ibm.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/kernel_read_file.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/kernel_read_file.c
++++ b/fs/kernel_read_file.c
+@@ -178,7 +178,7 @@ int kernel_read_file_from_fd(int fd, lof
+ struct fd f = fdget(fd);
+ int ret = -EBADF;
+
+- if (!f.file)
++ if (!f.file || !(f.file->f_mode & FMODE_READ))
+ goto out;
+
+ ret = kernel_read_file(f.file, offset, buf, buf_size, file_size, id);