--- /dev/null
+From 9435f2bb66874a0c4dd25e7c978957a7ca2c93b1 Mon Sep 17 00:00:00 2001
+From: Takashi Iwai <tiwai@suse.de>
+Date: Fri, 22 Nov 2019 12:28:40 +0100
+Subject: ALSA: usb-audio: Fix NULL dereference at parsing BADD
+
+From: Takashi Iwai <tiwai@suse.de>
+
+commit 9435f2bb66874a0c4dd25e7c978957a7ca2c93b1 upstream.
+
+snd_usb_mixer_controls_badd() that parses UAC3 BADD profiles misses a
+NULL check for the given interfaces. When a malformed USB descriptor
+is passed, this may lead to an Oops, as spotted by syzkaller.
+Skip the iteration if the interface doesn't exist for avoiding the
+crash.
+
+Fixes: 17156f23e93c ("ALSA: usb: add UAC3 BADD profiles support")
+Reported-by: syzbot+a36ab65c6653d7ccdd62@syzkaller.appspotmail.com
+Suggested-by: Dan Carpenter <dan.carpenter@oracle.com>
+Cc: <stable@vger.kernel.org>
+Link: https://lore.kernel.org/r/20191122112840.24797-1-tiwai@suse.de
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ sound/usb/mixer.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/sound/usb/mixer.c
++++ b/sound/usb/mixer.c
+@@ -2949,6 +2949,9 @@ static int snd_usb_mixer_controls_badd(s
+ continue;
+
+ iface = usb_ifnum_to_if(dev, intf);
++ if (!iface)
++ continue;
++
+ num = iface->num_altsetting;
+
+ if (num < 2)
--- /dev/null
+From 3ef240eaff36b8119ac9e2ea17cbf41179c930ba Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 6 Nov 2019 22:55:46 +0100
+Subject: futex: Prevent exit livelock
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 3ef240eaff36b8119ac9e2ea17cbf41179c930ba upstream.
+
+Oleg provided the following test case:
+
+int main(void)
+{
+ struct sched_param sp = {};
+
+ sp.sched_priority = 2;
+ assert(sched_setscheduler(0, SCHED_FIFO, &sp) == 0);
+
+ int lock = vfork();
+ if (!lock) {
+ sp.sched_priority = 1;
+ assert(sched_setscheduler(0, SCHED_FIFO, &sp) == 0);
+ _exit(0);
+ }
+
+ syscall(__NR_futex, &lock, FUTEX_LOCK_PI, 0,0,0);
+ return 0;
+}
+
+This creates an unkillable RT process spinning in futex_lock_pi() on a UP
+machine or if the process is affine to a single CPU. The reason is:
+
+ parent child
+
+ set FIFO prio 2
+
+ vfork() -> set FIFO prio 1
+ implies wait_for_child() sched_setscheduler(...)
+ exit()
+ do_exit()
+ ....
+ mm_release()
+ tsk->futex_state = FUTEX_STATE_EXITING;
+ exit_futex(); (NOOP in this case)
+ complete() --> wakes parent
+ sys_futex()
+ loop infinite because
+ tsk->futex_state == FUTEX_STATE_EXITING
+
+The same problem can happen just by regular preemption as well:
+
+ task holds futex
+ ...
+ do_exit()
+ tsk->futex_state = FUTEX_STATE_EXITING;
+
+ --> preemption (unrelated wakeup of some other higher prio task, e.g. timer)
+
+ switch_to(other_task)
+
+ return to user
+ sys_futex()
+ loop infinite as above
+
+Just for the fun of it the futex exit cleanup could trigger the wakeup
+itself before the task sets its futex state to DEAD.
+
+To cure this, the handling of the exiting owner is changed so:
+
+ - A refcount is held on the task
+
+ - The task pointer is stored in a caller visible location
+
+ - The caller drops all locks (hash bucket, mmap_sem) and blocks
+ on task::futex_exit_mutex. When the mutex is acquired then
+ the exiting task has completed the cleanup and the state
+ is consistent and can be reevaluated.
+
+This is not a pretty solution, but there is no choice other than returning
+an error code to user space, which would break the state consistency
+guarantee and open another can of problems including regressions.
+
+For stable backports the preparatory commits ac31c7ff8624 .. ba31c1a48538
+are required as well, but for anything older than 5.3.y the backports are
+going to be provided when this hits mainline as the other dependencies for
+those kernels are definitely not stable material.
+
+Fixes: 778e9a9c3e71 ("pi-futex: fix exit races and locking problems")
+Reported-by: Oleg Nesterov <oleg@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Ingo Molnar <mingo@kernel.org>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: Stable Team <stable@vger.kernel.org>
+Link: https://lkml.kernel.org/r/20191106224557.041676471@linutronix.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/futex.c | 106 ++++++++++++++++++++++++++++++++++++++++++++++++---------
+ 1 file changed, 91 insertions(+), 15 deletions(-)
+
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -1148,6 +1148,36 @@ out_error:
+ return ret;
+ }
+
++/**
++ * wait_for_owner_exiting - Block until the owner has exited
++ * @exiting: Pointer to the exiting task
++ *
++ * Caller must hold a refcount on @exiting.
++ */
++static void wait_for_owner_exiting(int ret, struct task_struct *exiting)
++{
++ if (ret != -EBUSY) {
++ WARN_ON_ONCE(exiting);
++ return;
++ }
++
++ if (WARN_ON_ONCE(ret == -EBUSY && !exiting))
++ return;
++
++ mutex_lock(&exiting->futex_exit_mutex);
++ /*
++ * No point in doing state checking here. If the waiter got here
++ * while the task was in exec()->exec_futex_release() then it can
++ * have any FUTEX_STATE_* value when the waiter has acquired the
++ * mutex. OK, if running, EXITING or DEAD if it reached exit()
++ * already. Highly unlikely and not a problem. Just one more round
++ * through the futex maze.
++ */
++ mutex_unlock(&exiting->futex_exit_mutex);
++
++ put_task_struct(exiting);
++}
++
+ static int handle_exit_race(u32 __user *uaddr, u32 uval,
+ struct task_struct *tsk)
+ {
+@@ -1207,7 +1237,8 @@ static int handle_exit_race(u32 __user *
+ * it after doing proper sanity checks.
+ */
+ static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key,
+- struct futex_pi_state **ps)
++ struct futex_pi_state **ps,
++ struct task_struct **exiting)
+ {
+ pid_t pid = uval & FUTEX_TID_MASK;
+ struct futex_pi_state *pi_state;
+@@ -1247,7 +1278,19 @@ static int attach_to_pi_owner(u32 __user
+ int ret = handle_exit_race(uaddr, uval, p);
+
+ raw_spin_unlock_irq(&p->pi_lock);
+- put_task_struct(p);
++ /*
++ * If the owner task is between FUTEX_STATE_EXITING and
++ * FUTEX_STATE_DEAD then store the task pointer and keep
++ * the reference on the task struct. The calling code will
++ * drop all locks, wait for the task to reach
++ * FUTEX_STATE_DEAD and then drop the refcount. This is
++ * required to prevent a live lock when the current task
++ * preempted the exiting task between the two states.
++ */
++ if (ret == -EBUSY)
++ *exiting = p;
++ else
++ put_task_struct(p);
+ return ret;
+ }
+
+@@ -1286,7 +1329,8 @@ static int attach_to_pi_owner(u32 __user
+
+ static int lookup_pi_state(u32 __user *uaddr, u32 uval,
+ struct futex_hash_bucket *hb,
+- union futex_key *key, struct futex_pi_state **ps)
++ union futex_key *key, struct futex_pi_state **ps,
++ struct task_struct **exiting)
+ {
+ struct futex_q *top_waiter = futex_top_waiter(hb, key);
+
+@@ -1301,7 +1345,7 @@ static int lookup_pi_state(u32 __user *u
+ * We are the first waiter - try to look up the owner based on
+ * @uval and attach to it.
+ */
+- return attach_to_pi_owner(uaddr, uval, key, ps);
++ return attach_to_pi_owner(uaddr, uval, key, ps, exiting);
+ }
+
+ static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
+@@ -1329,6 +1373,8 @@ static int lock_pi_update_atomic(u32 __u
+ * lookup
+ * @task: the task to perform the atomic lock work for. This will
+ * be "current" except in the case of requeue pi.
++ * @exiting: Pointer to store the task pointer of the owner task
++ * which is in the middle of exiting
+ * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0)
+ *
+ * Return:
+@@ -1337,11 +1383,17 @@ static int lock_pi_update_atomic(u32 __u
+ * - <0 - error
+ *
+ * The hb->lock and futex_key refs shall be held by the caller.
++ *
++ * @exiting is only set when the return value is -EBUSY. If so, this holds
++ * a refcount on the exiting task on return and the caller needs to drop it
++ * after waiting for the exit to complete.
+ */
+ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
+ union futex_key *key,
+ struct futex_pi_state **ps,
+- struct task_struct *task, int set_waiters)
++ struct task_struct *task,
++ struct task_struct **exiting,
++ int set_waiters)
+ {
+ u32 uval, newval, vpid = task_pid_vnr(task);
+ struct futex_q *top_waiter;
+@@ -1411,7 +1463,7 @@ static int futex_lock_pi_atomic(u32 __us
+ * attach to the owner. If that fails, no harm done, we only
+ * set the FUTEX_WAITERS bit in the user space variable.
+ */
+- return attach_to_pi_owner(uaddr, newval, key, ps);
++ return attach_to_pi_owner(uaddr, newval, key, ps, exiting);
+ }
+
+ /**
+@@ -1830,6 +1882,8 @@ void requeue_pi_wake_futex(struct futex_
+ * @key1: the from futex key
+ * @key2: the to futex key
+ * @ps: address to store the pi_state pointer
++ * @exiting: Pointer to store the task pointer of the owner task
++ * which is in the middle of exiting
+ * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0)
+ *
+ * Try and get the lock on behalf of the top waiter if we can do it atomically.
+@@ -1837,16 +1891,20 @@ void requeue_pi_wake_futex(struct futex_
+ * then direct futex_lock_pi_atomic() to force setting the FUTEX_WAITERS bit.
+ * hb1 and hb2 must be held by the caller.
+ *
++ * @exiting is only set when the return value is -EBUSY. If so, this holds
++ * a refcount on the exiting task on return and the caller needs to drop it
++ * after waiting for the exit to complete.
++ *
+ * Return:
+ * - 0 - failed to acquire the lock atomically;
+ * - >0 - acquired the lock, return value is vpid of the top_waiter
+ * - <0 - error
+ */
+-static int futex_proxy_trylock_atomic(u32 __user *pifutex,
+- struct futex_hash_bucket *hb1,
+- struct futex_hash_bucket *hb2,
+- union futex_key *key1, union futex_key *key2,
+- struct futex_pi_state **ps, int set_waiters)
++static int
++futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1,
++ struct futex_hash_bucket *hb2, union futex_key *key1,
++ union futex_key *key2, struct futex_pi_state **ps,
++ struct task_struct **exiting, int set_waiters)
+ {
+ struct futex_q *top_waiter = NULL;
+ u32 curval;
+@@ -1883,7 +1941,7 @@ static int futex_proxy_trylock_atomic(u3
+ */
+ vpid = task_pid_vnr(top_waiter->task);
+ ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
+- set_waiters);
++ exiting, set_waiters);
+ if (ret == 1) {
+ requeue_pi_wake_futex(top_waiter, key2, hb2);
+ return vpid;
+@@ -2012,6 +2070,8 @@ retry_private:
+ }
+
+ if (requeue_pi && (task_count - nr_wake < nr_requeue)) {
++ struct task_struct *exiting = NULL;
++
+ /*
+ * Attempt to acquire uaddr2 and wake the top waiter. If we
+ * intend to requeue waiters, force setting the FUTEX_WAITERS
+@@ -2019,7 +2079,8 @@ retry_private:
+ * faults rather in the requeue loop below.
+ */
+ ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
+- &key2, &pi_state, nr_requeue);
++ &key2, &pi_state,
++ &exiting, nr_requeue);
+
+ /*
+ * At this point the top_waiter has either taken uaddr2 or is
+@@ -2046,7 +2107,8 @@ retry_private:
+ * If that call succeeds then we have pi_state and an
+ * initial refcount on it.
+ */
+- ret = lookup_pi_state(uaddr2, ret, hb2, &key2, &pi_state);
++ ret = lookup_pi_state(uaddr2, ret, hb2, &key2,
++ &pi_state, &exiting);
+ }
+
+ switch (ret) {
+@@ -2075,6 +2137,12 @@ retry_private:
+ hb_waiters_dec(hb2);
+ put_futex_key(&key2);
+ put_futex_key(&key1);
++ /*
++ * Handle the case where the owner is in the middle of
++ * exiting. Wait for the exit to complete otherwise
++ * this task might loop forever, aka. live lock.
++ */
++ wait_for_owner_exiting(ret, exiting);
+ cond_resched();
+ goto retry;
+ default:
+@@ -2790,6 +2858,7 @@ static int futex_lock_pi(u32 __user *uad
+ {
+ struct hrtimer_sleeper timeout, *to = NULL;
+ struct futex_pi_state *pi_state = NULL;
++ struct task_struct *exiting = NULL;
+ struct rt_mutex_waiter rt_waiter;
+ struct futex_hash_bucket *hb;
+ struct futex_q q = futex_q_init;
+@@ -2817,7 +2886,8 @@ retry:
+ retry_private:
+ hb = queue_lock(&q);
+
+- ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, 0);
++ ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current,
++ &exiting, 0);
+ if (unlikely(ret)) {
+ /*
+ * Atomic work succeeded and we got the lock,
+@@ -2839,6 +2909,12 @@ retry_private:
+ */
+ queue_unlock(hb);
+ put_futex_key(&q.key);
++ /*
++ * Handle the case where the owner is in the middle of
++ * exiting. Wait for the exit to complete otherwise
++ * this task might loop forever, aka. live lock.
++ */
++ wait_for_owner_exiting(ret, exiting);
+ cond_resched();
+ goto retry;
+ default:
--- /dev/null
+From 03bf73c315edca28f47451913177e14cd040a216 Mon Sep 17 00:00:00 2001
+From: Navid Emamdoost <navid.emamdoost@gmail.com>
+Date: Mon, 23 Sep 2019 15:09:58 -0500
+Subject: nbd: prevent memory leak
+
+From: Navid Emamdoost <navid.emamdoost@gmail.com>
+
+commit 03bf73c315edca28f47451913177e14cd040a216 upstream.
+
+In nbd_add_socket when krealloc succeeds, if nsock's allocation fail the
+reallocted memory is leak. The correct behaviour should be assigning the
+reallocted memory to config->socks right after success.
+
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: Navid Emamdoost <navid.emamdoost@gmail.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/block/nbd.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/drivers/block/nbd.c
++++ b/drivers/block/nbd.c
+@@ -984,14 +984,15 @@ static int nbd_add_socket(struct nbd_dev
+ sockfd_put(sock);
+ return -ENOMEM;
+ }
++
++ config->socks = socks;
++
+ nsock = kzalloc(sizeof(struct nbd_sock), GFP_KERNEL);
+ if (!nsock) {
+ sockfd_put(sock);
+ return -ENOMEM;
+ }
+
+- config->socks = socks;
+-
+ nsock->fallback_index = -1;
+ nsock->dead = false;
+ mutex_init(&nsock->tx_lock);
--- /dev/null
+From b8eb718348b8fb30b5a7d0a8fce26fb3f4ac741b Mon Sep 17 00:00:00 2001
+From: Jouni Hogander <jouni.hogander@unikie.com>
+Date: Wed, 20 Nov 2019 09:08:16 +0200
+Subject: net-sysfs: Fix reference count leak in rx|netdev_queue_add_kobject
+
+From: Jouni Hogander <jouni.hogander@unikie.com>
+
+commit b8eb718348b8fb30b5a7d0a8fce26fb3f4ac741b upstream.
+
+kobject_init_and_add takes reference even when it fails. This has
+to be given up by the caller in error handling. Otherwise memory
+allocated by kobject_init_and_add is never freed. Originally found
+by Syzkaller:
+
+BUG: memory leak
+unreferenced object 0xffff8880679f8b08 (size 8):
+ comm "netdev_register", pid 269, jiffies 4294693094 (age 12.132s)
+ hex dump (first 8 bytes):
+ 72 78 2d 30 00 36 20 d4 rx-0.6 .
+ backtrace:
+ [<000000008c93818e>] __kmalloc_track_caller+0x16e/0x290
+ [<000000001f2e4e49>] kvasprintf+0xb1/0x140
+ [<000000007f313394>] kvasprintf_const+0x56/0x160
+ [<00000000aeca11c8>] kobject_set_name_vargs+0x5b/0x140
+ [<0000000073a0367c>] kobject_init_and_add+0xd8/0x170
+ [<0000000088838e4b>] net_rx_queue_update_kobjects+0x152/0x560
+ [<000000006be5f104>] netdev_register_kobject+0x210/0x380
+ [<00000000e31dab9d>] register_netdevice+0xa1b/0xf00
+ [<00000000f68b2465>] __tun_chr_ioctl+0x20d5/0x3dd0
+ [<000000004c50599f>] tun_chr_ioctl+0x2f/0x40
+ [<00000000bbd4c317>] do_vfs_ioctl+0x1c7/0x1510
+ [<00000000d4c59e8f>] ksys_ioctl+0x99/0xb0
+ [<00000000946aea81>] __x64_sys_ioctl+0x78/0xb0
+ [<0000000038d946e5>] do_syscall_64+0x16f/0x580
+ [<00000000e0aa5d8f>] entry_SYSCALL_64_after_hwframe+0x44/0xa9
+ [<00000000285b3d1a>] 0xffffffffffffffff
+
+Cc: David Miller <davem@davemloft.net>
+Cc: Lukas Bulwahn <lukas.bulwahn@gmail.com>
+Signed-off-by: Jouni Hogander <jouni.hogander@unikie.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/core/net-sysfs.c | 24 +++++++++++++-----------
+ 1 file changed, 13 insertions(+), 11 deletions(-)
+
+--- a/net/core/net-sysfs.c
++++ b/net/core/net-sysfs.c
+@@ -932,21 +932,23 @@ static int rx_queue_add_kobject(struct n
+ error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL,
+ "rx-%u", index);
+ if (error)
+- return error;
++ goto err;
+
+ dev_hold(queue->dev);
+
+ if (dev->sysfs_rx_queue_group) {
+ error = sysfs_create_group(kobj, dev->sysfs_rx_queue_group);
+- if (error) {
+- kobject_put(kobj);
+- return error;
+- }
++ if (error)
++ goto err;
+ }
+
+ kobject_uevent(kobj, KOBJ_ADD);
+
+ return error;
++
++err:
++ kobject_put(kobj);
++ return error;
+ }
+ #endif /* CONFIG_SYSFS */
+
+@@ -1471,21 +1473,21 @@ static int netdev_queue_add_kobject(stru
+ error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL,
+ "tx-%u", index);
+ if (error)
+- return error;
++ goto err;
+
+ dev_hold(queue->dev);
+
+ #ifdef CONFIG_BQL
+ error = sysfs_create_group(kobj, &dql_group);
+- if (error) {
+- kobject_put(kobj);
+- return error;
+- }
++ if (error)
++ goto err;
+ #endif
+
+ kobject_uevent(kobj, KOBJ_ADD);
+
+- return 0;
++err:
++ kobject_put(kobj);
++ return error;
+ }
+ #endif /* CONFIG_SYSFS */
+
--- /dev/null
+From 5f9f0b11f0816b35867f2cf71e54d95f53f03902 Mon Sep 17 00:00:00 2001
+From: Oliver Neukum <oneukum@suse.com>
+Date: Thu, 21 Nov 2019 11:37:10 +0100
+Subject: nfc: port100: handle command failure cleanly
+
+From: Oliver Neukum <oneukum@suse.com>
+
+commit 5f9f0b11f0816b35867f2cf71e54d95f53f03902 upstream.
+
+If starting the transfer of a command suceeds but the transfer for the reply
+fails, it is not enough to initiate killing the transfer for the
+command may still be running. You need to wait for the killing to finish
+before you can reuse URB and buffer.
+
+Reported-and-tested-by: syzbot+711468aa5c3a1eabf863@syzkaller.appspotmail.com
+Signed-off-by: Oliver Neukum <oneukum@suse.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/nfc/port100.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/nfc/port100.c
++++ b/drivers/nfc/port100.c
+@@ -792,7 +792,7 @@ static int port100_send_frame_async(stru
+
+ rc = port100_submit_urb_for_ack(dev, GFP_KERNEL);
+ if (rc)
+- usb_unlink_urb(dev->out_urb);
++ usb_kill_urb(dev->out_urb);
+
+ exit:
+ mutex_unlock(&dev->out_urb_lock);
--- /dev/null
+From 8caa016bfc129f2c925d52da43022171d1d1de91 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Wed, 20 Nov 2019 12:59:13 -0800
+Subject: selftests/x86/mov_ss_trap: Fix the SYSENTER test
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 8caa016bfc129f2c925d52da43022171d1d1de91 upstream.
+
+For reasons that I haven't quite fully diagnosed, running
+mov_ss_trap_32 on a 32-bit kernel results in an infinite loop in
+userspace. This appears to be because the hacky SYSENTER test
+doesn't segfault as desired; instead it corrupts the program state
+such that it infinite loops.
+
+Fix it by explicitly clearing EBP before doing SYSENTER. This will
+give a more reliable segfault.
+
+Fixes: 59c2a7226fc5 ("x86/selftests: Add mov_to_ss test")
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: stable@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ tools/testing/selftests/x86/mov_ss_trap.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/tools/testing/selftests/x86/mov_ss_trap.c
++++ b/tools/testing/selftests/x86/mov_ss_trap.c
+@@ -257,7 +257,8 @@ int main()
+ err(1, "sigaltstack");
+ sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND | SA_ONSTACK);
+ nr = SYS_getpid;
+- asm volatile ("mov %[ss], %%ss; SYSENTER" : "+a" (nr)
++ /* Clear EBP first to make sure we segfault cleanly. */
++ asm volatile ("xorl %%ebp, %%ebp; mov %[ss], %%ss; SYSENTER" : "+a" (nr)
+ : [ss] "m" (ss) : "flags", "rcx"
+ #ifdef __x86_64__
+ , "r11"
--- /dev/null
+From 4d2fa82d98d2d296043a04eb517d7dbade5b13b8 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Wed, 20 Nov 2019 11:58:32 -0800
+Subject: selftests/x86/sigreturn/32: Invalidate DS and ES when abusing the kernel
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 4d2fa82d98d2d296043a04eb517d7dbade5b13b8 upstream.
+
+If the kernel accidentally uses DS or ES while the user values are
+loaded, it will work fine for sane userspace. In the interest of
+simulating maximally insane userspace, make sigreturn_32 zero out DS
+and ES for the nasty parts so that inadvertent use of these segments
+will crash.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: stable@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ tools/testing/selftests/x86/sigreturn.c | 13 +++++++++++++
+ 1 file changed, 13 insertions(+)
+
+--- a/tools/testing/selftests/x86/sigreturn.c
++++ b/tools/testing/selftests/x86/sigreturn.c
+@@ -459,6 +459,19 @@ static void sigusr1(int sig, siginfo_t *
+ ctx->uc_mcontext.gregs[REG_SP] = (unsigned long)0x8badf00d5aadc0deULL;
+ ctx->uc_mcontext.gregs[REG_CX] = 0;
+
++#ifdef __i386__
++ /*
++ * Make sure the kernel doesn't inadvertently use DS or ES-relative
++ * accesses in a region where user DS or ES is loaded.
++ *
++ * Skip this for 64-bit builds because long mode doesn't care about
++ * DS and ES and skipping it increases test coverage a little bit,
++ * since 64-bit kernels can still run the 32-bit build.
++ */
++ ctx->uc_mcontext.gregs[REG_DS] = 0;
++ ctx->uc_mcontext.gregs[REG_ES] = 0;
++#endif
++
+ memcpy(&requested_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
+ requested_regs[REG_CX] = *ssptr(ctx); /* The asm code does this. */
+
arm-8904-1-skip-nomap-memblocks-while-finding-the-lowmem-highmem-boundary.patch
arc-perf-accommodate-big-endian-cpu.patch
x86-insn-fix-awk-regexp-warnings.patch
+x86-speculation-fix-incorrect-mds-taa-mitigation-status.patch
+x86-speculation-fix-redundant-mds-mitigation-message.patch
+nbd-prevent-memory-leak.patch
+x86-doublefault-32-fix-stack-canaries-in-the-double-fault-handler.patch
+x86-pti-32-size-initial_page_table-correctly.patch
+x86-cpu_entry_area-add-guard-page-for-entry-stack-on-32bit.patch
+selftests-x86-mov_ss_trap-fix-the-sysenter-test.patch
+selftests-x86-sigreturn-32-invalidate-ds-and-es-when-abusing-the-kernel.patch
+x86-pti-32-calculate-the-various-pti-cpu_entry_area-sizes-correctly-make-the-cpu_entry_area_pages-assert-precise.patch
+x86-entry-32-fix-fixup_espfix_stack-with-user-cr3.patch
+futex-prevent-exit-livelock.patch
+alsa-usb-audio-fix-null-dereference-at-parsing-badd.patch
+nfc-port100-handle-command-failure-cleanly.patch
+net-sysfs-fix-reference-count-leak-in-rx-netdev_queue_add_kobject.patch
+x86-entry-32-unwind-the-espfix-stack-earlier-on-exception-entry.patch
--- /dev/null
+From 880a98c339961eaa074393e3a2117cbe9125b8bb Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 21 Nov 2019 00:40:24 +0100
+Subject: x86/cpu_entry_area: Add guard page for entry stack on 32bit
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 880a98c339961eaa074393e3a2117cbe9125b8bb upstream.
+
+The entry stack in the cpu entry area is protected against overflow by the
+readonly GDT on 64-bit, but on 32-bit the GDT needs to be writeable and
+therefore does not trigger a fault on stack overflow.
+
+Add a guard page.
+
+Fixes: c482feefe1ae ("x86/entry/64: Make cpu_entry_area.tss read-only")
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: stable@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/cpu_entry_area.h | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/include/asm/cpu_entry_area.h
++++ b/arch/x86/include/asm/cpu_entry_area.h
+@@ -20,8 +20,12 @@ struct cpu_entry_area {
+
+ /*
+ * The GDT is just below entry_stack and thus serves (on x86_64) as
+- * a a read-only guard page.
++ * a read-only guard page. On 32-bit the GDT must be writeable, so
++ * it needs an extra guard page.
+ */
++#ifdef CONFIG_X86_32
++ char guard_entry_stack[PAGE_SIZE];
++#endif
+ struct entry_stack_page entry_stack_page;
+
+ /*
--- /dev/null
+From 3580d0b29cab08483f84a16ce6a1151a1013695f Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 21 Nov 2019 11:50:12 +0100
+Subject: x86/doublefault/32: Fix stack canaries in the double fault handler
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 3580d0b29cab08483f84a16ce6a1151a1013695f upstream.
+
+The double fault TSS was missing GS setup, which is needed for stack
+canaries to work.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: stable@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/doublefault.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/arch/x86/kernel/doublefault.c
++++ b/arch/x86/kernel/doublefault.c
+@@ -65,6 +65,9 @@ struct x86_hw_tss doublefault_tss __cach
+ .ss = __KERNEL_DS,
+ .ds = __USER_DS,
+ .fs = __KERNEL_PERCPU,
++#ifndef CONFIG_X86_32_LAZY_GS
++ .gs = __KERNEL_STACK_CANARY,
++#endif
+
+ .__cr3 = __pa_nodebug(swapper_pg_dir),
+ };
--- /dev/null
+From 4a13b0e3e10996b9aa0b45a764ecfe49f6fcd360 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Sun, 24 Nov 2019 08:50:03 -0800
+Subject: x86/entry/32: Fix FIXUP_ESPFIX_STACK with user CR3
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 4a13b0e3e10996b9aa0b45a764ecfe49f6fcd360 upstream.
+
+UNWIND_ESPFIX_STACK needs to read the GDT, and the GDT mapping that
+can be accessed via %fs is not mapped in the user pagetables. Use
+SGDT to find the cpu_entry_area mapping and read the espfix offset
+from that instead.
+
+Reported-and-tested-by: Borislav Petkov <bp@alien8.de>
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/entry/entry_32.S | 21 ++++++++++++++++++---
+ 1 file changed, 18 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/entry/entry_32.S
++++ b/arch/x86/entry/entry_32.S
+@@ -315,7 +315,8 @@
+
+ .macro CHECK_AND_APPLY_ESPFIX
+ #ifdef CONFIG_X86_ESPFIX32
+-#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + (GDT_ENTRY_ESPFIX_SS * 8)
++#define GDT_ESPFIX_OFFSET (GDT_ENTRY_ESPFIX_SS * 8)
++#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + GDT_ESPFIX_OFFSET
+
+ ALTERNATIVE "jmp .Lend_\@", "", X86_BUG_ESPFIX
+
+@@ -1056,12 +1057,26 @@ ENDPROC(entry_INT80_32)
+ * We can't call C functions using the ESPFIX stack. This code reads
+ * the high word of the segment base from the GDT and swiches to the
+ * normal stack and adjusts ESP with the matching offset.
++ *
++ * We might be on user CR3 here, so percpu data is not mapped and we can't
++ * access the GDT through the percpu segment. Instead, use SGDT to find
++ * the cpu_entry_area alias of the GDT.
+ */
+ #ifdef CONFIG_X86_ESPFIX32
+ /* fixup the stack */
+- mov GDT_ESPFIX_SS + 4, %al /* bits 16..23 */
+- mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */
++ pushl %ecx
++ subl $2*4, %esp
++ sgdt (%esp)
++ movl 2(%esp), %ecx /* GDT address */
++ /*
++ * Careful: ECX is a linear pointer, so we need to force base
++ * zero. %cs is the only known-linear segment we have right now.
++ */
++ mov %cs:GDT_ESPFIX_OFFSET + 4(%ecx), %al /* bits 16..23 */
++ mov %cs:GDT_ESPFIX_OFFSET + 7(%ecx), %ah /* bits 24..31 */
+ shl $16, %eax
++ addl $2*4, %esp
++ popl %ecx
+ addl %esp, %eax /* the adjusted stack pointer */
+ pushl $__KERNEL_DS
+ pushl %eax
--- /dev/null
+From a1a338e5b6fe9e0a39c57c232dc96c198bb53e47 Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Wed, 20 Nov 2019 10:10:49 +0100
+Subject: x86/entry/32: Unwind the ESPFIX stack earlier on exception entry
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit a1a338e5b6fe9e0a39c57c232dc96c198bb53e47 upstream.
+
+Right now, we do some fancy parts of the exception entry path while SS
+might have a nonzero base: we fill in regs->ss and regs->sp, and we
+consider switching to the kernel stack. This results in regs->ss and
+regs->sp referring to a non-flat stack and it may result in
+overflowing the entry stack. The former issue means that we can try to
+call iret_exc on a non-flat stack, which doesn't work.
+
+Tested with selftests/x86/sigreturn_32.
+
+Fixes: 45d7b255747c ("x86/entry/32: Enter the kernel via trampoline stack")
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: stable@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
+index d9f401995278..647e2a272d08 100644
+--- a/arch/x86/entry/entry_32.S
++++ b/arch/x86/entry/entry_32.S
+@@ -210,8 +210,6 @@
+ /*
+ * The high bits of the CS dword (__csh) are used for CS_FROM_*.
+ * Clear them in case hardware didn't do this for us.
+- *
+- * Be careful: we may have nonzero SS base due to ESPFIX.
+ */
+ andl $0x0000ffff, 4*4(%esp)
+
+@@ -307,12 +305,21 @@
+ .Lfinished_frame_\@:
+ .endm
+
+-.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0 skip_gs=0
++.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0 skip_gs=0 unwind_espfix=0
+ cld
+ .if \skip_gs == 0
+ PUSH_GS
+ .endif
+ pushl %fs
++
++ pushl %eax
++ movl $(__KERNEL_PERCPU), %eax
++ movl %eax, %fs
++.if \unwind_espfix > 0
++ UNWIND_ESPFIX_STACK
++.endif
++ popl %eax
++
+ FIXUP_FRAME
+ pushl %es
+ pushl %ds
+@@ -326,8 +333,6 @@
+ movl $(__USER_DS), %edx
+ movl %edx, %ds
+ movl %edx, %es
+- movl $(__KERNEL_PERCPU), %edx
+- movl %edx, %fs
+ .if \skip_gs == 0
+ SET_KERNEL_GS %edx
+ .endif
+@@ -1153,18 +1158,17 @@ ENDPROC(entry_INT80_32)
+ lss (%esp), %esp /* switch to the normal stack segment */
+ #endif
+ .endm
++
+ .macro UNWIND_ESPFIX_STACK
++ /* It's safe to clobber %eax, all other regs need to be preserved */
+ #ifdef CONFIG_X86_ESPFIX32
+ movl %ss, %eax
+ /* see if on espfix stack */
+ cmpw $__ESPFIX_SS, %ax
+- jne 27f
+- movl $__KERNEL_DS, %eax
+- movl %eax, %ds
+- movl %eax, %es
++ jne .Lno_fixup_\@
+ /* switch to normal stack */
+ FIXUP_ESPFIX_STACK
+-27:
++.Lno_fixup_\@:
+ #endif
+ .endm
+
+@@ -1458,10 +1462,9 @@ END(page_fault)
+
+ common_exception_read_cr2:
+ /* the function address is in %gs's slot on the stack */
+- SAVE_ALL switch_stacks=1 skip_gs=1
++ SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1
+
+ ENCODE_FRAME_POINTER
+- UNWIND_ESPFIX_STACK
+
+ /* fixup %gs */
+ GS_TO_REG %ecx
+@@ -1483,9 +1486,8 @@ END(common_exception_read_cr2)
+
+ common_exception:
+ /* the function address is in %gs's slot on the stack */
+- SAVE_ALL switch_stacks=1 skip_gs=1
++ SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1
+ ENCODE_FRAME_POINTER
+- UNWIND_ESPFIX_STACK
+
+ /* fixup %gs */
+ GS_TO_REG %ecx
--- /dev/null
+From 05b042a1944322844eaae7ea596d5f154166d68a Mon Sep 17 00:00:00 2001
+From: Ingo Molnar <mingo@kernel.org>
+Date: Sun, 24 Nov 2019 11:21:44 +0100
+Subject: x86/pti/32: Calculate the various PTI cpu_entry_area sizes correctly, make the CPU_ENTRY_AREA_PAGES assert precise
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Ingo Molnar <mingo@kernel.org>
+
+commit 05b042a1944322844eaae7ea596d5f154166d68a upstream.
+
+When two recent commits that increased the size of the 'struct cpu_entry_area'
+were merged in -tip, the 32-bit defconfig build started failing on the following
+build time assert:
+
+ ./include/linux/compiler.h:391:38: error: call to ‘__compiletime_assert_189’ declared with attribute error: BUILD_BUG_ON failed: CPU_ENTRY_AREA_PAGES * PAGE_SIZE < CPU_ENTRY_AREA_MAP_SIZE
+ arch/x86/mm/cpu_entry_area.c:189:2: note: in expansion of macro ‘BUILD_BUG_ON’
+ In function ‘setup_cpu_entry_area_ptes’,
+
+Which corresponds to the following build time assert:
+
+ BUILD_BUG_ON(CPU_ENTRY_AREA_PAGES * PAGE_SIZE < CPU_ENTRY_AREA_MAP_SIZE);
+
+The purpose of this assert is to sanity check the fixed-value definition of
+CPU_ENTRY_AREA_PAGES arch/x86/include/asm/pgtable_32_types.h:
+
+ #define CPU_ENTRY_AREA_PAGES (NR_CPUS * 41)
+
+The '41' is supposed to match sizeof(struct cpu_entry_area)/PAGE_SIZE, which value
+we didn't want to define in such a low level header, because it would cause
+dependency hell.
+
+Every time the size of cpu_entry_area is changed, we have to adjust CPU_ENTRY_AREA_PAGES
+accordingly - and this assert is checking that constraint.
+
+But the assert is both imprecise and buggy, primarily because it doesn't
+include the single readonly IDT page that is mapped at CPU_ENTRY_AREA_BASE
+(which begins at a PMD boundary).
+
+This bug was hidden by the fact that by accident CPU_ENTRY_AREA_PAGES is defined
+too large upstream (v5.4-rc8):
+
+ #define CPU_ENTRY_AREA_PAGES (NR_CPUS * 40)
+
+While 'struct cpu_entry_area' is 155648 bytes, or 38 pages. So we had two extra
+pages, which hid the bug.
+
+The following commit (not yet upstream) increased the size to 40 pages:
+
+ x86/iopl: ("Restrict iopl() permission scope")
+
+... but increased CPU_ENTRY_AREA_PAGES only 41 - i.e. shortening the gap
+to just 1 extra page.
+
+Then another not-yet-upstream commit changed the size again:
+
+ 880a98c33996: ("x86/cpu_entry_area: Add guard page for entry stack on 32bit")
+
+Which increased the cpu_entry_area size from 38 to 39 pages, but
+didn't change CPU_ENTRY_AREA_PAGES (kept it at 40). This worked
+fine, because we still had a page left from the accidental 'reserve'.
+
+But when these two commits were merged into the same tree, the
+combined size of cpu_entry_area grew from 38 to 40 pages, while
+CPU_ENTRY_AREA_PAGES finally caught up to 40 as well.
+
+Which is fine in terms of functionality, but the assert broke:
+
+ BUILD_BUG_ON(CPU_ENTRY_AREA_PAGES * PAGE_SIZE < CPU_ENTRY_AREA_MAP_SIZE);
+
+because CPU_ENTRY_AREA_MAP_SIZE is the total size of the area,
+which is 1 page larger due to the IDT page.
+
+To fix all this, change the assert to two precise asserts:
+
+ BUILD_BUG_ON((CPU_ENTRY_AREA_PAGES+1)*PAGE_SIZE != CPU_ENTRY_AREA_MAP_SIZE);
+ BUILD_BUG_ON(CPU_ENTRY_AREA_TOTAL_SIZE != CPU_ENTRY_AREA_MAP_SIZE);
+
+This takes the IDT page into account, and also connects the size-based
+define of CPU_ENTRY_AREA_TOTAL_SIZE with the address-subtraction based
+define of CPU_ENTRY_AREA_MAP_SIZE.
+
+Also clean up some of the names which made it rather confusing:
+
+ - 'CPU_ENTRY_AREA_TOT_SIZE' wasn't actually the 'total' size of
+ the cpu-entry-area, but the per-cpu array size, so rename this
+ to CPU_ENTRY_AREA_ARRAY_SIZE.
+
+ - Introduce CPU_ENTRY_AREA_TOTAL_SIZE that _is_ the total mapping
+ size, with the IDT included.
+
+ - Add comments where '+1' denotes the IDT mapping - it wasn't
+ obvious and took me about 3 hours to decode...
+
+Finally, because this particular commit is actually applied after
+this patch:
+
+ 880a98c33996: ("x86/cpu_entry_area: Add guard page for entry stack on 32bit")
+
+Fix the CPU_ENTRY_AREA_PAGES value from 40 pages to the correct 39 pages.
+
+All future commits that change cpu_entry_area will have to adjust
+this value precisely.
+
+As a side note, we should probably attempt to remove CPU_ENTRY_AREA_PAGES
+and derive its value directly from the structure, without causing
+header hell - but that is an adventure for another day! :-)
+
+Fixes: 880a98c33996: ("x86/cpu_entry_area: Add guard page for entry stack on 32bit")
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: stable@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/cpu_entry_area.h | 12 +++++++-----
+ arch/x86/include/asm/pgtable_32_types.h | 8 ++++----
+ arch/x86/mm/cpu_entry_area.c | 4 +++-
+ 3 files changed, 14 insertions(+), 10 deletions(-)
+
+--- a/arch/x86/include/asm/cpu_entry_area.h
++++ b/arch/x86/include/asm/cpu_entry_area.h
+@@ -45,7 +45,6 @@ struct cpu_entry_area {
+ */
+ char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
+ #endif
+-#ifdef CONFIG_CPU_SUP_INTEL
+ /*
+ * Per CPU debug store for Intel performance monitoring. Wastes a
+ * full page at the moment.
+@@ -56,24 +55,27 @@ struct cpu_entry_area {
+ * Reserve enough fixmap PTEs.
+ */
+ struct debug_store_buffers cpu_debug_buffers;
+-#endif
+ };
+
+-#define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area))
+-#define CPU_ENTRY_AREA_TOT_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS)
++#define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area))
++#define CPU_ENTRY_AREA_ARRAY_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS)
++
++/* Total size includes the readonly IDT mapping page as well: */
++#define CPU_ENTRY_AREA_TOTAL_SIZE (CPU_ENTRY_AREA_ARRAY_SIZE + PAGE_SIZE)
+
+ DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
+
+ extern void setup_cpu_entry_areas(void);
+ extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags);
+
++/* Single page reserved for the readonly IDT mapping: */
+ #define CPU_ENTRY_AREA_RO_IDT CPU_ENTRY_AREA_BASE
+ #define CPU_ENTRY_AREA_PER_CPU (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE)
+
+ #define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT)
+
+ #define CPU_ENTRY_AREA_MAP_SIZE \
+- (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_TOT_SIZE - CPU_ENTRY_AREA_BASE)
++ (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_ARRAY_SIZE - CPU_ENTRY_AREA_BASE)
+
+ extern struct cpu_entry_area *get_cpu_entry_area(int cpu);
+
+--- a/arch/x86/include/asm/pgtable_32_types.h
++++ b/arch/x86/include/asm/pgtable_32_types.h
+@@ -44,11 +44,11 @@ extern bool __vmalloc_start_set; /* set
+ * Define this here and validate with BUILD_BUG_ON() in pgtable_32.c
+ * to avoid include recursion hell
+ */
+-#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 40)
++#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 39)
+
+-#define CPU_ENTRY_AREA_BASE \
+- ((FIXADDR_TOT_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) \
+- & PMD_MASK)
++/* The +1 is for the readonly IDT page: */
++#define CPU_ENTRY_AREA_BASE \
++ ((FIXADDR_TOT_START - PAGE_SIZE*(CPU_ENTRY_AREA_PAGES+1)) & PMD_MASK)
+
+ #define LDT_BASE_ADDR \
+ ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK)
+--- a/arch/x86/mm/cpu_entry_area.c
++++ b/arch/x86/mm/cpu_entry_area.c
+@@ -188,7 +188,9 @@ static __init void setup_cpu_entry_area_
+ #ifdef CONFIG_X86_32
+ unsigned long start, end;
+
+- BUILD_BUG_ON(CPU_ENTRY_AREA_PAGES * PAGE_SIZE < CPU_ENTRY_AREA_MAP_SIZE);
++ /* The +1 is for the readonly IDT: */
++ BUILD_BUG_ON((CPU_ENTRY_AREA_PAGES+1)*PAGE_SIZE != CPU_ENTRY_AREA_MAP_SIZE);
++ BUILD_BUG_ON(CPU_ENTRY_AREA_TOTAL_SIZE != CPU_ENTRY_AREA_MAP_SIZE);
+ BUG_ON(CPU_ENTRY_AREA_BASE & ~PMD_MASK);
+
+ start = CPU_ENTRY_AREA_BASE;
--- /dev/null
+From f490e07c53d66045d9d739e134145ec9b38653d3 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 21 Nov 2019 00:40:23 +0100
+Subject: x86/pti/32: Size initial_page_table correctly
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit f490e07c53d66045d9d739e134145ec9b38653d3 upstream.
+
+Commit 945fd17ab6ba ("x86/cpu_entry_area: Sync cpu_entry_area to
+initial_page_table") introduced the sync for the initial page table for
+32bit.
+
+sync_initial_page_table() uses clone_pgd_range() which does the update for
+the kernel page table. If PTI is enabled it also updates the user space
+page table counterpart, which is assumed to be in the next page after the
+target PGD.
+
+At this point in time 32-bit did not have PTI support, so the user space
+page table update was not taking place.
+
+The support for PTI on 32-bit which was introduced later on, did not take
+that into account and missed to add the user space counter part for the
+initial page table.
+
+As a consequence sync_initial_page_table() overwrites any data which is
+located in the page behing initial_page_table causing random failures,
+e.g. by corrupting doublefault_tss and wreckaging the doublefault handler
+on 32bit.
+
+Fix it by adding a "user" page table right after initial_page_table.
+
+Fixes: 7757d607c6b3 ("x86/pti: Allow CONFIG_PAGE_TABLE_ISOLATION for x86_32")
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Joerg Roedel <jroedel@suse.de>
+Cc: stable@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/head_32.S | 10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+--- a/arch/x86/kernel/head_32.S
++++ b/arch/x86/kernel/head_32.S
+@@ -571,6 +571,16 @@ ENTRY(initial_page_table)
+ # error "Kernel PMDs should be 1, 2 or 3"
+ # endif
+ .align PAGE_SIZE /* needs to be page-sized too */
++
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
++ /*
++ * PTI needs another page so sync_initial_pagetable() works correctly
++ * and does not scribble over the data which is placed behind the
++ * actual initial_page_table. See clone_pgd_range().
++ */
++ .fill 1024, 4, 0
++#endif
++
+ #endif
+
+ .data
--- /dev/null
+From 64870ed1b12e235cfca3f6c6da75b542c973ff78 Mon Sep 17 00:00:00 2001
+From: Waiman Long <longman@redhat.com>
+Date: Fri, 15 Nov 2019 11:14:44 -0500
+Subject: x86/speculation: Fix incorrect MDS/TAA mitigation status
+
+From: Waiman Long <longman@redhat.com>
+
+commit 64870ed1b12e235cfca3f6c6da75b542c973ff78 upstream.
+
+For MDS vulnerable processors with TSX support, enabling either MDS or
+TAA mitigations will enable the use of VERW to flush internal processor
+buffers at the right code path. IOW, they are either both mitigated
+or both not. However, if the command line options are inconsistent,
+the vulnerabilites sysfs files may not report the mitigation status
+correctly.
+
+For example, with only the "mds=off" option:
+
+ vulnerabilities/mds:Vulnerable; SMT vulnerable
+ vulnerabilities/tsx_async_abort:Mitigation: Clear CPU buffers; SMT vulnerable
+
+The mds vulnerabilities file has wrong status in this case. Similarly,
+the taa vulnerability file will be wrong with mds mitigation on, but
+taa off.
+
+Change taa_select_mitigation() to sync up the two mitigation status
+and have them turned off if both "mds=off" and "tsx_async_abort=off"
+are present.
+
+Update documentation to emphasize the fact that both "mds=off" and
+"tsx_async_abort=off" have to be specified together for processors that
+are affected by both TAA and MDS to be effective.
+
+ [ bp: Massage and add kernel-parameters.txt change too. ]
+
+Fixes: 1b42f017415b ("x86/speculation/taa: Add mitigation for TSX Async Abort")
+Signed-off-by: Waiman Long <longman@redhat.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: "H. Peter Anvin" <hpa@zytor.com>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Jiri Kosina <jkosina@suse.cz>
+Cc: Jonathan Corbet <corbet@lwn.net>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: linux-doc@vger.kernel.org
+Cc: Mark Gross <mgross@linux.intel.com>
+Cc: <stable@vger.kernel.org>
+Cc: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Cc: Tony Luck <tony.luck@intel.com>
+Cc: Tyler Hicks <tyhicks@canonical.com>
+Cc: x86-ml <x86@kernel.org>
+Link: https://lkml.kernel.org/r/20191115161445.30809-2-longman@redhat.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ Documentation/admin-guide/hw-vuln/mds.rst | 7 +++++--
+ Documentation/admin-guide/hw-vuln/tsx_async_abort.rst | 5 ++++-
+ Documentation/admin-guide/kernel-parameters.txt | 11 +++++++++++
+ arch/x86/kernel/cpu/bugs.c | 17 +++++++++++++++--
+ 4 files changed, 35 insertions(+), 5 deletions(-)
+
+--- a/Documentation/admin-guide/hw-vuln/mds.rst
++++ b/Documentation/admin-guide/hw-vuln/mds.rst
+@@ -265,8 +265,11 @@ time with the option "mds=". The valid a
+
+ ============ =============================================================
+
+-Not specifying this option is equivalent to "mds=full".
+-
++Not specifying this option is equivalent to "mds=full". For processors
++that are affected by both TAA (TSX Asynchronous Abort) and MDS,
++specifying just "mds=off" without an accompanying "tsx_async_abort=off"
++will have no effect as the same mitigation is used for both
++vulnerabilities.
+
+ Mitigation selection guide
+ --------------------------
+--- a/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst
++++ b/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst
+@@ -174,7 +174,10 @@ the option "tsx_async_abort=". The valid
+ CPU is not vulnerable to cross-thread TAA attacks.
+ ============ =============================================================
+
+-Not specifying this option is equivalent to "tsx_async_abort=full".
++Not specifying this option is equivalent to "tsx_async_abort=full". For
++processors that are affected by both TAA and MDS, specifying just
++"tsx_async_abort=off" without an accompanying "mds=off" will have no
++effect as the same mitigation is used for both vulnerabilities.
+
+ The kernel command line also allows to control the TSX feature using the
+ parameter "tsx=" on CPUs which support TSX control. MSR_IA32_TSX_CTRL is used
+--- a/Documentation/admin-guide/kernel-parameters.txt
++++ b/Documentation/admin-guide/kernel-parameters.txt
+@@ -2359,6 +2359,12 @@
+ SMT on vulnerable CPUs
+ off - Unconditionally disable MDS mitigation
+
++ On TAA-affected machines, mds=off can be prevented by
++ an active TAA mitigation as both vulnerabilities are
++ mitigated with the same mechanism so in order to disable
++ this mitigation, you need to specify tsx_async_abort=off
++ too.
++
+ Not specifying this option is equivalent to
+ mds=full.
+
+@@ -4773,6 +4779,11 @@
+ vulnerable to cross-thread TAA attacks.
+ off - Unconditionally disable TAA mitigation
+
++ On MDS-affected machines, tsx_async_abort=off can be
++ prevented by an active MDS mitigation as both vulnerabilities
++ are mitigated with the same mechanism so in order to disable
++ this mitigation, you need to specify mds=off too.
++
+ Not specifying this option is equivalent to
+ tsx_async_abort=full. On CPUs which are MDS affected
+ and deploy MDS mitigation, TAA mitigation is not
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -304,8 +304,12 @@ static void __init taa_select_mitigation
+ return;
+ }
+
+- /* TAA mitigation is turned off on the cmdline (tsx_async_abort=off) */
+- if (taa_mitigation == TAA_MITIGATION_OFF)
++ /*
++ * TAA mitigation via VERW is turned off if both
++ * tsx_async_abort=off and mds=off are specified.
++ */
++ if (taa_mitigation == TAA_MITIGATION_OFF &&
++ mds_mitigation == MDS_MITIGATION_OFF)
+ goto out;
+
+ if (boot_cpu_has(X86_FEATURE_MD_CLEAR))
+@@ -339,6 +343,15 @@ static void __init taa_select_mitigation
+ if (taa_nosmt || cpu_mitigations_auto_nosmt())
+ cpu_smt_disable(false);
+
++ /*
++ * Update MDS mitigation, if necessary, as the mds_user_clear is
++ * now enabled for TAA mitigation.
++ */
++ if (mds_mitigation == MDS_MITIGATION_OFF &&
++ boot_cpu_has_bug(X86_BUG_MDS)) {
++ mds_mitigation = MDS_MITIGATION_FULL;
++ mds_select_mitigation();
++ }
+ out:
+ pr_info("%s\n", taa_strings[taa_mitigation]);
+ }
--- /dev/null
+From cd5a2aa89e847bdda7b62029d94e95488d73f6b2 Mon Sep 17 00:00:00 2001
+From: Waiman Long <longman@redhat.com>
+Date: Fri, 15 Nov 2019 11:14:45 -0500
+Subject: x86/speculation: Fix redundant MDS mitigation message
+
+From: Waiman Long <longman@redhat.com>
+
+commit cd5a2aa89e847bdda7b62029d94e95488d73f6b2 upstream.
+
+Since MDS and TAA mitigations are inter-related for processors that are
+affected by both vulnerabilities, the followiing confusing messages can
+be printed in the kernel log:
+
+ MDS: Vulnerable
+ MDS: Mitigation: Clear CPU buffers
+
+To avoid the first incorrect message, defer the printing of MDS
+mitigation after the TAA mitigation selection has been done. However,
+that has the side effect of printing TAA mitigation first before MDS
+mitigation.
+
+ [ bp: Check box is affected/mitigations are disabled first before
+ printing and massage. ]
+
+Suggested-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Signed-off-by: Waiman Long <longman@redhat.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: "H. Peter Anvin" <hpa@zytor.com>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Mark Gross <mgross@linux.intel.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Cc: Tony Luck <tony.luck@intel.com>
+Cc: Tyler Hicks <tyhicks@canonical.com>
+Cc: x86-ml <x86@kernel.org>
+Link: https://lkml.kernel.org/r/20191115161445.30809-3-longman@redhat.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/cpu/bugs.c | 13 +++++++++++++
+ 1 file changed, 13 insertions(+)
+
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -39,6 +39,7 @@ static void __init spectre_v2_select_mit
+ static void __init ssb_select_mitigation(void);
+ static void __init l1tf_select_mitigation(void);
+ static void __init mds_select_mitigation(void);
++static void __init mds_print_mitigation(void);
+ static void __init taa_select_mitigation(void);
+
+ /* The base value of the SPEC_CTRL MSR that always has to be preserved. */
+@@ -108,6 +109,12 @@ void __init check_bugs(void)
+ mds_select_mitigation();
+ taa_select_mitigation();
+
++ /*
++ * As MDS and TAA mitigations are inter-related, print MDS
++ * mitigation until after TAA mitigation selection is done.
++ */
++ mds_print_mitigation();
++
+ arch_smt_update();
+
+ #ifdef CONFIG_X86_32
+@@ -245,6 +252,12 @@ static void __init mds_select_mitigation
+ (mds_nosmt || cpu_mitigations_auto_nosmt()))
+ cpu_smt_disable(false);
+ }
++}
++
++static void __init mds_print_mitigation(void)
++{
++ if (!boot_cpu_has_bug(X86_BUG_MDS) || cpu_mitigations_off())
++ return;
+
+ pr_info("%s\n", mds_strings[mds_mitigation]);
+ }