--- /dev/null
+From 602b8593d2b4138c10e922eeaafe306f6b51817b Mon Sep 17 00:00:00 2001
+From: "Herton R. Krzesinski" <herton@redhat.com>
+Date: Fri, 14 Aug 2015 15:35:02 -0700
+Subject: ipc,sem: fix use after free on IPC_RMID after a task using same semaphore set exits
+
+From: "Herton R. Krzesinski" <herton@redhat.com>
+
+commit 602b8593d2b4138c10e922eeaafe306f6b51817b upstream.
+
+The current semaphore code allows a potential use after free: in
+exit_sem we may free the task's sem_undo_list while there is still
+another task looping through the same semaphore set and cleaning the
+sem_undo list at freeary function (the task called IPC_RMID for the same
+semaphore set).
+
+For example, with a test program [1] running which keeps forking a lot
+of processes (which then do a semop call with SEM_UNDO flag), and with
+the parent right after removing the semaphore set with IPC_RMID, and a
+kernel built with CONFIG_SLAB, CONFIG_SLAB_DEBUG and
+CONFIG_DEBUG_SPINLOCK, you can easily see something like the following
+in the kernel log:
+
+ Slab corruption (Not tainted): kmalloc-64 start=ffff88003b45c1c0, len=64
+ 000: 6b 6b 6b 6b 6b 6b 6b 6b 00 6b 6b 6b 6b 6b 6b 6b kkkkkkkk.kkkkkkk
+ 010: ff ff ff ff 6b 6b 6b 6b ff ff ff ff ff ff ff ff ....kkkk........
+ Prev obj: start=ffff88003b45c180, len=64
+ 000: 00 00 00 00 ad 4e ad de ff ff ff ff 5a 5a 5a 5a .....N......ZZZZ
+ 010: ff ff ff ff ff ff ff ff c0 fb 01 37 00 88 ff ff ...........7....
+ Next obj: start=ffff88003b45c200, len=64
+ 000: 00 00 00 00 ad 4e ad de ff ff ff ff 5a 5a 5a 5a .....N......ZZZZ
+ 010: ff ff ff ff ff ff ff ff 68 29 a7 3c 00 88 ff ff ........h).<....
+ BUG: spinlock wrong CPU on CPU#2, test/18028
+ general protection fault: 0000 [#1] SMP
+ Modules linked in: 8021q mrp garp stp llc nf_conntrack_ipv4 nf_defrag_ipv4 ip6t_REJECT nf_reject_ipv6 nf_conntrack_ipv6 nf_defrag_ipv6 xt_state nf_conntrack ip6table_filter ip6_tables binfmt_misc ppdev input_leds joydev parport_pc parport floppy serio_raw virtio_balloon virtio_rng virtio_console virtio_net iosf_mbi crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcspkr qxl ttm drm_kms_helper drm snd_hda_codec_generic i2c_piix4 snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_seq snd_seq_device snd_pcm snd_timer snd soundcore crc32c_intel virtio_pci virtio_ring virtio pata_acpi ata_generic [last unloaded: speedstep_lib]
+ CPU: 2 PID: 18028 Comm: test Not tainted 4.2.0-rc5+ #1
+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.8.1-20150318_183358- 04/01/2014
+ RIP: spin_dump+0x53/0xc0
+ Call Trace:
+ spin_bug+0x30/0x40
+ do_raw_spin_unlock+0x71/0xa0
+ _raw_spin_unlock+0xe/0x10
+ freeary+0x82/0x2a0
+ ? _raw_spin_lock+0xe/0x10
+ semctl_down.clone.0+0xce/0x160
+ ? __do_page_fault+0x19a/0x430
+ ? __audit_syscall_entry+0xa8/0x100
+ SyS_semctl+0x236/0x2c0
+ ? syscall_trace_leave+0xde/0x130
+ entry_SYSCALL_64_fastpath+0x12/0x71
+ Code: 8b 80 88 03 00 00 48 8d 88 60 05 00 00 48 c7 c7 a0 2c a4 81 31 c0 65 8b 15 eb 40 f3 7e e8 08 31 68 00 4d 85 e4 44 8b 4b 08 74 5e <45> 8b 84 24 88 03 00 00 49 8d 8c 24 60 05 00 00 8b 53 04 48 89
+ RIP [<ffffffff810d6053>] spin_dump+0x53/0xc0
+ RSP <ffff88003750fd68>
+ ---[ end trace 783ebb76612867a0 ]---
+ NMI watchdog: BUG: soft lockup - CPU#3 stuck for 22s! [test:18053]
+ Modules linked in: 8021q mrp garp stp llc nf_conntrack_ipv4 nf_defrag_ipv4 ip6t_REJECT nf_reject_ipv6 nf_conntrack_ipv6 nf_defrag_ipv6 xt_state nf_conntrack ip6table_filter ip6_tables binfmt_misc ppdev input_leds joydev parport_pc parport floppy serio_raw virtio_balloon virtio_rng virtio_console virtio_net iosf_mbi crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcspkr qxl ttm drm_kms_helper drm snd_hda_codec_generic i2c_piix4 snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_seq snd_seq_device snd_pcm snd_timer snd soundcore crc32c_intel virtio_pci virtio_ring virtio pata_acpi ata_generic [last unloaded: speedstep_lib]
+ CPU: 3 PID: 18053 Comm: test Tainted: G D 4.2.0-rc5+ #1
+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.8.1-20150318_183358- 04/01/2014
+ RIP: native_read_tsc+0x0/0x20
+ Call Trace:
+ ? delay_tsc+0x40/0x70
+ __delay+0xf/0x20
+ do_raw_spin_lock+0x96/0x140
+ _raw_spin_lock+0xe/0x10
+ sem_lock_and_putref+0x11/0x70
+ SYSC_semtimedop+0x7bf/0x960
+ ? handle_mm_fault+0xbf6/0x1880
+ ? dequeue_task_fair+0x79/0x4a0
+ ? __do_page_fault+0x19a/0x430
+ ? kfree_debugcheck+0x16/0x40
+ ? __do_page_fault+0x19a/0x430
+ ? __audit_syscall_entry+0xa8/0x100
+ ? do_audit_syscall_entry+0x66/0x70
+ ? syscall_trace_enter_phase1+0x139/0x160
+ SyS_semtimedop+0xe/0x10
+ SyS_semop+0x10/0x20
+ entry_SYSCALL_64_fastpath+0x12/0x71
+ Code: 47 10 83 e8 01 85 c0 89 47 10 75 08 65 48 89 3d 1f 74 ff 7e c9 c3 0f 1f 44 00 00 55 48 89 e5 e8 87 17 04 00 66 90 c9 c3 0f 1f 00 <55> 48 89 e5 0f 31 89 c1 48 89 d0 48 c1 e0 20 89 c9 48 09 c8 c9
+ Kernel panic - not syncing: softlockup: hung tasks
+
+I wasn't able to trigger any badness on a recent kernel without the
+proper config debugs enabled, however I have softlockup reports on some
+kernel versions, in the semaphore code, which are similar as above (the
+scenario is seen on some servers running IBM DB2 which uses semaphore
+syscalls).
+
+The patch here fixes the race against freeary, by acquiring or waiting
+on the sem_undo_list lock as necessary (exit_sem can race with freeary,
+while freeary sets un->semid to -1 and removes the same sem_undo from
+list_proc or when it removes the last sem_undo).
+
+After the patch I'm unable to reproduce the problem using the test case
+[1].
+
+[1] Test case used below:
+
+ #include <stdio.h>
+ #include <sys/types.h>
+ #include <sys/ipc.h>
+ #include <sys/sem.h>
+ #include <sys/wait.h>
+ #include <stdlib.h>
+ #include <time.h>
+ #include <unistd.h>
+ #include <errno.h>
+
+ #define NSEM 1
+ #define NSET 5
+
+ int sid[NSET];
+
+ void thread()
+ {
+ struct sembuf op;
+ int s;
+ uid_t pid = getuid();
+
+ s = rand() % NSET;
+ op.sem_num = pid % NSEM;
+ op.sem_op = 1;
+ op.sem_flg = SEM_UNDO;
+
+ semop(sid[s], &op, 1);
+ exit(EXIT_SUCCESS);
+ }
+
+ void create_set()
+ {
+ int i, j;
+ pid_t p;
+ union {
+ int val;
+ struct semid_ds *buf;
+ unsigned short int *array;
+ struct seminfo *__buf;
+ } un;
+
+ /* Create and initialize semaphore set */
+ for (i = 0; i < NSET; i++) {
+ sid[i] = semget(IPC_PRIVATE , NSEM, 0644 | IPC_CREAT);
+ if (sid[i] < 0) {
+ perror("semget");
+ exit(EXIT_FAILURE);
+ }
+ }
+ un.val = 0;
+ for (i = 0; i < NSET; i++) {
+ for (j = 0; j < NSEM; j++) {
+ if (semctl(sid[i], j, SETVAL, un) < 0)
+ perror("semctl");
+ }
+ }
+
+ /* Launch threads that operate on semaphore set */
+ for (i = 0; i < NSEM * NSET * NSET; i++) {
+ p = fork();
+ if (p < 0)
+ perror("fork");
+ if (p == 0)
+ thread();
+ }
+
+ /* Free semaphore set */
+ for (i = 0; i < NSET; i++) {
+ if (semctl(sid[i], NSEM, IPC_RMID))
+ perror("IPC_RMID");
+ }
+
+ /* Wait for forked processes to exit */
+ while (wait(NULL)) {
+ if (errno == ECHILD)
+ break;
+ };
+ }
+
+ int main(int argc, char **argv)
+ {
+ pid_t p;
+
+ srand(time(NULL));
+
+ while (1) {
+ p = fork();
+ if (p < 0) {
+ perror("fork");
+ exit(EXIT_FAILURE);
+ }
+ if (p == 0) {
+ create_set();
+ goto end;
+ }
+
+ /* Wait for forked processes to exit */
+ while (wait(NULL)) {
+ if (errno == ECHILD)
+ break;
+ };
+ }
+ end:
+ return 0;
+ }
+
+[akpm@linux-foundation.org: use normal comment layout]
+Signed-off-by: Herton R. Krzesinski <herton@redhat.com>
+Acked-by: Manfred Spraul <manfred@colorfullife.com>
+Cc: Davidlohr Bueso <dave@stgolabs.net>
+Cc: Rafael Aquini <aquini@redhat.com>
+CC: Aristeu Rozanski <aris@redhat.com>
+Cc: David Jeffery <djeffery@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+
+---
+ ipc/sem.c | 23 +++++++++++++++++------
+ 1 file changed, 17 insertions(+), 6 deletions(-)
+
+--- a/ipc/sem.c
++++ b/ipc/sem.c
+@@ -2049,17 +2049,28 @@ void exit_sem(struct task_struct *tsk)
+ rcu_read_lock();
+ un = list_entry_rcu(ulp->list_proc.next,
+ struct sem_undo, list_proc);
+- if (&un->list_proc == &ulp->list_proc)
+- semid = -1;
+- else
+- semid = un->semid;
++ if (&un->list_proc == &ulp->list_proc) {
++ /*
++ * We must wait for freeary() before freeing this ulp,
++ * in case we raced with last sem_undo. There is a small
++ * possibility where we exit while freeary() didn't
++ * finish unlocking sem_undo_list.
++ */
++ spin_unlock_wait(&ulp->lock);
++ rcu_read_unlock();
++ break;
++ }
++ spin_lock(&ulp->lock);
++ semid = un->semid;
++ spin_unlock(&ulp->lock);
+
++ /* exit_sem raced with IPC_RMID, nothing to do */
+ if (semid == -1) {
+ rcu_read_unlock();
+- break;
++ continue;
+ }
+
+- sma = sem_obtain_object_check(tsk->nsproxy->ipc_ns, un->semid);
++ sma = sem_obtain_object_check(tsk->nsproxy->ipc_ns, semid);
+ /* exit_sem raced with IPC_RMID, nothing to do */
+ if (IS_ERR(sma)) {
+ rcu_read_unlock();
--- /dev/null
+From 3ed1f8a99d70ea1cd1508910eb107d0edcae5009 Mon Sep 17 00:00:00 2001
+From: Manfred Spraul <manfred@colorfullife.com>
+Date: Fri, 14 Aug 2015 15:35:10 -0700
+Subject: ipc/sem.c: update/correct memory barriers
+
+From: Manfred Spraul <manfred@colorfullife.com>
+
+commit 3ed1f8a99d70ea1cd1508910eb107d0edcae5009 upstream.
+
+sem_lock() did not properly pair memory barriers:
+
+!spin_is_locked() and spin_unlock_wait() are both only control barriers.
+The code needs an acquire barrier, otherwise the cpu might perform read
+operations before the lock test.
+
+As no primitive exists inside <include/spinlock.h> and since it seems
+noone wants another primitive, the code creates a local primitive within
+ipc/sem.c.
+
+With regards to -stable:
+
+The change of sem_wait_array() is a bugfix, the change to sem_lock() is a
+nop (just a preprocessor redefinition to improve the readability). The
+bugfix is necessary for all kernels that use sem_wait_array() (i.e.:
+starting from 3.10).
+
+Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
+Reported-by: Oleg Nesterov <oleg@redhat.com>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
+Cc: Kirill Tkhai <ktkhai@parallels.com>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Davidlohr Bueso <dave@stgolabs.net>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ ipc/sem.c | 20 ++++++++++++++++++--
+ 1 file changed, 18 insertions(+), 2 deletions(-)
+
+--- a/ipc/sem.c
++++ b/ipc/sem.c
+@@ -253,6 +253,16 @@ static void sem_rcu_free(struct rcu_head
+ }
+
+ /*
++ * spin_unlock_wait() and !spin_is_locked() are not memory barriers, they
++ * are only control barriers.
++ * The code must pair with spin_unlock(&sem->lock) or
++ * spin_unlock(&sem_perm.lock), thus just the control barrier is insufficient.
++ *
++ * smp_rmb() is sufficient, as writes cannot pass the control barrier.
++ */
++#define ipc_smp_acquire__after_spin_is_unlocked() smp_rmb()
++
++/*
+ * Wait until all currently ongoing simple ops have completed.
+ * Caller must own sem_perm.lock.
+ * New simple ops cannot start, because simple ops first check
+@@ -275,6 +285,7 @@ static void sem_wait_array(struct sem_ar
+ sem = sma->sem_base + i;
+ spin_unlock_wait(&sem->lock);
+ }
++ ipc_smp_acquire__after_spin_is_unlocked();
+ }
+
+ /*
+@@ -326,8 +337,13 @@ static inline int sem_lock(struct sem_ar
+
+ /* Then check that the global lock is free */
+ if (!spin_is_locked(&sma->sem_perm.lock)) {
+- /* spin_is_locked() is not a memory barrier */
+- smp_mb();
++ /*
++ * We need a memory barrier with acquire semantics,
++ * otherwise we can race with another thread that does:
++ * complex_count++;
++ * spin_unlock(sem_perm.lock);
++ */
++ ipc_smp_acquire__after_spin_is_unlocked();
+
+ /* Now repeat the test of complex_count:
+ * It can't change anymore until we drop sem->lock.
--- /dev/null
+From 4f32be677b124a49459e2603321c7a5605ceb9f8 Mon Sep 17 00:00:00 2001
+From: Wanpeng Li <wanpeng.li@hotmail.com>
+Date: Fri, 14 Aug 2015 15:34:56 -0700
+Subject: mm/hwpoison: fix page refcount of unknown non LRU page
+
+From: Wanpeng Li <wanpeng.li@hotmail.com>
+
+commit 4f32be677b124a49459e2603321c7a5605ceb9f8 upstream.
+
+After trying to drain pages from pagevec/pageset, we try to get reference
+count of the page again, however, the reference count of the page is not
+reduced if the page is still not on LRU list.
+
+Fix it by adding the put_page() to drop the page reference which is from
+__get_any_page().
+
+Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
+Acked-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/memory-failure.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/mm/memory-failure.c
++++ b/mm/memory-failure.c
+@@ -1473,6 +1473,8 @@ static int get_any_page(struct page *pag
+ */
+ ret = __get_any_page(page, pfn, 0);
+ if (!PageLRU(page)) {
++ /* Drop page reference which is from __get_any_page() */
++ put_page(page);
+ pr_info("soft_offline: %#lx: unknown non LRU page type %lx\n",
+ pfn, page->flags);
+ return -EIO;