--- /dev/null
+From 53dad6d3a8e5ac1af8bacc6ac2134ae1a8b085f1 Mon Sep 17 00:00:00 2001
+From: Davidlohr Bueso <davidlohr@hp.com>
+Date: Mon, 23 Sep 2013 17:04:45 -0700
+Subject: ipc: fix race with LSMs
+
+From: Davidlohr Bueso <davidlohr@hp.com>
+
+commit 53dad6d3a8e5ac1af8bacc6ac2134ae1a8b085f1 upstream.
+
+Currently, IPC mechanisms do security and auditing related checks under
+RCU. However, since security modules can free the security structure,
+for example, through selinux_[sem,msg_queue,shm]_free_security(), we can
+race if the structure is freed before other tasks are done with it,
+creating a use-after-free condition. Manfred illustrates this nicely,
+for instance with shared mem and selinux:
+
+ -> do_shmat calls rcu_read_lock()
+ -> do_shmat calls shm_object_check().
+ Checks that the object is still valid - but doesn't acquire any locks.
+ Then it returns.
+ -> do_shmat calls security_shm_shmat (e.g. selinux_shm_shmat)
+ -> selinux_shm_shmat calls ipc_has_perm()
+ -> ipc_has_perm accesses ipc_perms->security
+
+shm_close()
+ -> shm_close acquires rw_mutex & shm_lock
+ -> shm_close calls shm_destroy
+ -> shm_destroy calls security_shm_free (e.g. selinux_shm_free_security)
+ -> selinux_shm_free_security calls ipc_free_security(&shp->shm_perm)
+ -> ipc_free_security calls kfree(ipc_perms->security)
+
+This patch delays the freeing of the security structures after all RCU
+readers are done. Furthermore it aligns the security life cycle with
+that of the rest of IPC - freeing them based on the reference counter.
+For situations where we need not free security, the current behavior is
+kept. Linus states:
+
+ "... the old behavior was suspect for another reason too: having the
+ security blob go away from under a user sounds like it could cause
+ various other problems anyway, so I think the old code was at least
+ _prone_ to bugs even if it didn't have catastrophic behavior."
+
+I have tested this patch with IPC testcases from LTP on both my
+quad-core laptop and on a 64 core NUMA server. In both cases selinux is
+enabled, and tests pass for both voluntary and forced preemption models.
+While the mentioned races are theoretical (at least no one as reported
+them), I wanted to make sure that this new logic doesn't break anything
+we weren't aware of.
+
+Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Davidlohr Bueso <davidlohr@hp.com>
+Acked-by: Manfred Spraul <manfred@colorfullife.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ ipc/msg.c | 19 +++++++++++++------
+ ipc/sem.c | 34 ++++++++++++++++++----------------
+ ipc/shm.c | 17 ++++++++++++-----
+ ipc/util.c | 32 ++++++++++++--------------------
+ ipc/util.h | 10 +++++++++-
+ 5 files changed, 64 insertions(+), 48 deletions(-)
+
+--- a/ipc/msg.c
++++ b/ipc/msg.c
+@@ -165,6 +165,15 @@ static inline void msg_rmid(struct ipc_n
+ ipc_rmid(&msg_ids(ns), &s->q_perm);
+ }
+
++static void msg_rcu_free(struct rcu_head *head)
++{
++ struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
++ struct msg_queue *msq = ipc_rcu_to_struct(p);
++
++ security_msg_queue_free(msq);
++ ipc_rcu_free(head);
++}
++
+ /**
+ * newque - Create a new msg queue
+ * @ns: namespace
+@@ -189,15 +198,14 @@ static int newque(struct ipc_namespace *
+ msq->q_perm.security = NULL;
+ retval = security_msg_queue_alloc(msq);
+ if (retval) {
+- ipc_rcu_putref(msq);
++ ipc_rcu_putref(msq, ipc_rcu_free);
+ return retval;
+ }
+
+ /* ipc_addid() locks msq upon success. */
+ id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni);
+ if (id < 0) {
+- security_msg_queue_free(msq);
+- ipc_rcu_putref(msq);
++ ipc_rcu_putref(msq, msg_rcu_free);
+ return id;
+ }
+
+@@ -276,8 +284,7 @@ static void freeque(struct ipc_namespace
+ free_msg(msg);
+ }
+ atomic_sub(msq->q_cbytes, &ns->msg_bytes);
+- security_msg_queue_free(msq);
+- ipc_rcu_putref(msq);
++ ipc_rcu_putref(msq, msg_rcu_free);
+ }
+
+ /*
+@@ -717,7 +724,7 @@ long do_msgsnd(int msqid, long mtype, vo
+ rcu_read_lock();
+ ipc_lock_object(&msq->q_perm);
+
+- ipc_rcu_putref(msq);
++ ipc_rcu_putref(msq, ipc_rcu_free);
+ if (msq->q_perm.deleted) {
+ err = -EIDRM;
+ goto out_unlock0;
+--- a/ipc/sem.c
++++ b/ipc/sem.c
+@@ -243,6 +243,15 @@ static void merge_queues(struct sem_arra
+ }
+ }
+
++static void sem_rcu_free(struct rcu_head *head)
++{
++ struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
++ struct sem_array *sma = ipc_rcu_to_struct(p);
++
++ security_sem_free(sma);
++ ipc_rcu_free(head);
++}
++
+ /*
+ * If the request contains only one semaphore operation, and there are
+ * no complex transactions pending, lock only the semaphore involved.
+@@ -374,12 +383,7 @@ static inline struct sem_array *sem_obta
+ static inline void sem_lock_and_putref(struct sem_array *sma)
+ {
+ sem_lock(sma, NULL, -1);
+- ipc_rcu_putref(sma);
+-}
+-
+-static inline void sem_putref(struct sem_array *sma)
+-{
+- ipc_rcu_putref(sma);
++ ipc_rcu_putref(sma, ipc_rcu_free);
+ }
+
+ static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
+@@ -458,14 +462,13 @@ static int newary(struct ipc_namespace *
+ sma->sem_perm.security = NULL;
+ retval = security_sem_alloc(sma);
+ if (retval) {
+- ipc_rcu_putref(sma);
++ ipc_rcu_putref(sma, ipc_rcu_free);
+ return retval;
+ }
+
+ id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni);
+ if (id < 0) {
+- security_sem_free(sma);
+- ipc_rcu_putref(sma);
++ ipc_rcu_putref(sma, sem_rcu_free);
+ return id;
+ }
+ ns->used_sems += nsems;
+@@ -1047,8 +1050,7 @@ static void freeary(struct ipc_namespace
+
+ wake_up_sem_queue_do(&tasks);
+ ns->used_sems -= sma->sem_nsems;
+- security_sem_free(sma);
+- ipc_rcu_putref(sma);
++ ipc_rcu_putref(sma, sem_rcu_free);
+ }
+
+ static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version)
+@@ -1292,7 +1294,7 @@ static int semctl_main(struct ipc_namesp
+ rcu_read_unlock();
+ sem_io = ipc_alloc(sizeof(ushort)*nsems);
+ if(sem_io == NULL) {
+- sem_putref(sma);
++ ipc_rcu_putref(sma, ipc_rcu_free);
+ return -ENOMEM;
+ }
+
+@@ -1328,20 +1330,20 @@ static int semctl_main(struct ipc_namesp
+ if(nsems > SEMMSL_FAST) {
+ sem_io = ipc_alloc(sizeof(ushort)*nsems);
+ if(sem_io == NULL) {
+- sem_putref(sma);
++ ipc_rcu_putref(sma, ipc_rcu_free);
+ return -ENOMEM;
+ }
+ }
+
+ if (copy_from_user (sem_io, p, nsems*sizeof(ushort))) {
+- sem_putref(sma);
++ ipc_rcu_putref(sma, ipc_rcu_free);
+ err = -EFAULT;
+ goto out_free;
+ }
+
+ for (i = 0; i < nsems; i++) {
+ if (sem_io[i] > SEMVMX) {
+- sem_putref(sma);
++ ipc_rcu_putref(sma, ipc_rcu_free);
+ err = -ERANGE;
+ goto out_free;
+ }
+@@ -1629,7 +1631,7 @@ static struct sem_undo *find_alloc_undo(
+ /* step 2: allocate new undo structure */
+ new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
+ if (!new) {
+- sem_putref(sma);
++ ipc_rcu_putref(sma, ipc_rcu_free);
+ return ERR_PTR(-ENOMEM);
+ }
+
+--- a/ipc/shm.c
++++ b/ipc/shm.c
+@@ -167,6 +167,15 @@ static inline void shm_lock_by_ptr(struc
+ ipc_lock_object(&ipcp->shm_perm);
+ }
+
++static void shm_rcu_free(struct rcu_head *head)
++{
++ struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
++ struct shmid_kernel *shp = ipc_rcu_to_struct(p);
++
++ security_shm_free(shp);
++ ipc_rcu_free(head);
++}
++
+ static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
+ {
+ ipc_rmid(&shm_ids(ns), &s->shm_perm);
+@@ -208,8 +217,7 @@ static void shm_destroy(struct ipc_names
+ user_shm_unlock(file_inode(shp->shm_file)->i_size,
+ shp->mlock_user);
+ fput (shp->shm_file);
+- security_shm_free(shp);
+- ipc_rcu_putref(shp);
++ ipc_rcu_putref(shp, shm_rcu_free);
+ }
+
+ /*
+@@ -497,7 +505,7 @@ static int newseg(struct ipc_namespace *
+ shp->shm_perm.security = NULL;
+ error = security_shm_alloc(shp);
+ if (error) {
+- ipc_rcu_putref(shp);
++ ipc_rcu_putref(shp, ipc_rcu_free);
+ return error;
+ }
+
+@@ -566,8 +574,7 @@ no_id:
+ user_shm_unlock(size, shp->mlock_user);
+ fput(file);
+ no_file:
+- security_shm_free(shp);
+- ipc_rcu_putref(shp);
++ ipc_rcu_putref(shp, shm_rcu_free);
+ return error;
+ }
+
+--- a/ipc/util.c
++++ b/ipc/util.c
+@@ -474,11 +474,6 @@ void ipc_free(void* ptr, int size)
+ kfree(ptr);
+ }
+
+-struct ipc_rcu {
+- struct rcu_head rcu;
+- atomic_t refcount;
+-} ____cacheline_aligned_in_smp;
+-
+ /**
+ * ipc_rcu_alloc - allocate ipc and rcu space
+ * @size: size desired
+@@ -505,27 +500,24 @@ int ipc_rcu_getref(void *ptr)
+ return atomic_inc_not_zero(&p->refcount);
+ }
+
+-/**
+- * ipc_schedule_free - free ipc + rcu space
+- * @head: RCU callback structure for queued work
+- */
+-static void ipc_schedule_free(struct rcu_head *head)
+-{
+- vfree(container_of(head, struct ipc_rcu, rcu));
+-}
+-
+-void ipc_rcu_putref(void *ptr)
++void ipc_rcu_putref(void *ptr, void (*func)(struct rcu_head *head))
+ {
+ struct ipc_rcu *p = ((struct ipc_rcu *)ptr) - 1;
+
+ if (!atomic_dec_and_test(&p->refcount))
+ return;
+
+- if (is_vmalloc_addr(ptr)) {
+- call_rcu(&p->rcu, ipc_schedule_free);
+- } else {
+- kfree_rcu(p, rcu);
+- }
++ call_rcu(&p->rcu, func);
++}
++
++void ipc_rcu_free(struct rcu_head *head)
++{
++ struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
++
++ if (is_vmalloc_addr(p))
++ vfree(p);
++ else
++ kfree(p);
+ }
+
+ /**
+--- a/ipc/util.h
++++ b/ipc/util.h
+@@ -47,6 +47,13 @@ static inline void msg_exit_ns(struct ip
+ static inline void shm_exit_ns(struct ipc_namespace *ns) { }
+ #endif
+
++struct ipc_rcu {
++ struct rcu_head rcu;
++ atomic_t refcount;
++} ____cacheline_aligned_in_smp;
++
++#define ipc_rcu_to_struct(p) ((void *)(p+1))
++
+ /*
+ * Structure that holds the parameters needed by the ipc operations
+ * (see after)
+@@ -120,7 +127,8 @@ void ipc_free(void* ptr, int size);
+ */
+ void* ipc_rcu_alloc(int size);
+ int ipc_rcu_getref(void *ptr);
+-void ipc_rcu_putref(void *ptr);
++void ipc_rcu_putref(void *ptr, void (*func)(struct rcu_head *head));
++void ipc_rcu_free(struct rcu_head *head);
+
+ struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int);
+ struct kern_ipc_perm *ipc_obtain_object(struct ipc_ids *ids, int id);
--- /dev/null
+From 4271b05a227dc6175b66c3d9941aeab09048aeb2 Mon Sep 17 00:00:00 2001
+From: Davidlohr Bueso <davidlohr@hp.com>
+Date: Mon, 30 Sep 2013 13:45:26 -0700
+Subject: ipc,msg: prevent race with rmid in msgsnd,msgrcv
+
+From: Davidlohr Bueso <davidlohr@hp.com>
+
+commit 4271b05a227dc6175b66c3d9941aeab09048aeb2 upstream.
+
+This fixes a race in both msgrcv() and msgsnd() between finding the msg
+and actually dealing with the queue, as another thread can delete shmid
+underneath us if we are preempted before acquiring the
+kern_ipc_perm.lock.
+
+Manfred illustrates this nicely:
+
+Assume a preemptible kernel that is preempted just after
+
+ msq = msq_obtain_object_check(ns, msqid)
+
+in do_msgrcv(). The only lock that is held is rcu_read_lock().
+
+Now the other thread processes IPC_RMID. When the first task is
+resumed, then it will happily wait for messages on a deleted queue.
+
+Fix this by checking for if the queue has been deleted after taking the
+lock.
+
+Signed-off-by: Davidlohr Bueso <davidlohr@hp.com>
+Reported-by: Manfred Spraul <manfred@colorfullife.com>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ ipc/msg.c | 13 +++++++++++++
+ 1 file changed, 13 insertions(+)
+
+--- a/ipc/msg.c
++++ b/ipc/msg.c
+@@ -695,6 +695,12 @@ long do_msgsnd(int msqid, long mtype, vo
+ if (ipcperms(ns, &msq->q_perm, S_IWUGO))
+ goto out_unlock0;
+
++ /* raced with RMID? */
++ if (msq->q_perm.deleted) {
++ err = -EIDRM;
++ goto out_unlock0;
++ }
++
+ err = security_msg_queue_msgsnd(msq, msg, msgflg);
+ if (err)
+ goto out_unlock0;
+@@ -901,6 +907,13 @@ long do_msgrcv(int msqid, void __user *b
+ goto out_unlock1;
+
+ ipc_lock_object(&msq->q_perm);
++
++ /* raced with RMID? */
++ if (msq->q_perm.deleted) {
++ msg = ERR_PTR(-EIDRM);
++ goto out_unlock0;
++ }
++
+ msg = find_msg(msq, &msgtyp, mode);
+ if (!IS_ERR(msg)) {
+ /*
--- /dev/null
+From 5e9d527591421ccdb16acb8c23662231135d8686 Mon Sep 17 00:00:00 2001
+From: Manfred Spraul <manfred@colorfullife.com>
+Date: Mon, 30 Sep 2013 13:45:04 -0700
+Subject: ipc/sem.c: fix race in sem_lock()
+
+From: Manfred Spraul <manfred@colorfullife.com>
+
+commit 5e9d527591421ccdb16acb8c23662231135d8686 upstream.
+
+The exclusion of complex operations in sem_lock() is insufficient: after
+acquiring the per-semaphore lock, a simple op must first check that
+sem_perm.lock is not locked and only after that test check
+complex_count. The current code does it the other way around - and that
+creates a race. Details are below.
+
+The patch is a complete rewrite of sem_lock(), based in part on the code
+from Mike Galbraith. It removes all gotos and all loops and thus the
+risk of livelocks.
+
+I have tested the patch (together with the next one) on my i3 laptop and
+it didn't cause any problems.
+
+The bug is probably also present in 3.10 and 3.11, but for these kernels
+it might be simpler just to move the test of sma->complex_count after
+the spin_is_locked() test.
+
+Details of the bug:
+
+Assume:
+ - sma->complex_count = 0.
+ - Thread 1: semtimedop(complex op that must sleep)
+ - Thread 2: semtimedop(simple op).
+
+Pseudo-Trace:
+
+Thread 1: sem_lock(): acquire sem_perm.lock
+Thread 1: sem_lock(): check for ongoing simple ops
+ Nothing ongoing, thread 2 is still before sem_lock().
+Thread 1: try_atomic_semop()
+ <<< preempted.
+
+Thread 2: sem_lock():
+ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops,
+ int nsops)
+ {
+ int locknum;
+ again:
+ if (nsops == 1 && !sma->complex_count) {
+ struct sem *sem = sma->sem_base + sops->sem_num;
+
+ /* Lock just the semaphore we are interested in. */
+ spin_lock(&sem->lock);
+
+ /*
+ * If sma->complex_count was set while we were spinning,
+ * we may need to look at things we did not lock here.
+ */
+ if (unlikely(sma->complex_count)) {
+ spin_unlock(&sem->lock);
+ goto lock_array;
+ }
+ <<<<<<<<<
+ <<< complex_count is still 0.
+ <<<
+ <<< Here it is preempted
+ <<<<<<<<<
+
+Thread 1: try_atomic_semop() returns, notices that it must sleep.
+Thread 1: increases sma->complex_count.
+Thread 1: drops sem_perm.lock
+Thread 2:
+ /*
+ * Another process is holding the global lock on the
+ * sem_array; we cannot enter our critical section,
+ * but have to wait for the global lock to be released.
+ */
+ if (unlikely(spin_is_locked(&sma->sem_perm.lock))) {
+ spin_unlock(&sem->lock);
+ spin_unlock_wait(&sma->sem_perm.lock);
+ goto again;
+ }
+ <<< sem_perm.lock already dropped, thus no "goto again;"
+
+ locknum = sops->sem_num;
+
+Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
+Cc: Mike Galbraith <bitbucket@online.de>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Davidlohr Bueso <davidlohr.bueso@hp.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ ipc/sem.c | 122 +++++++++++++++++++++++++++++++++++++++-----------------------
+ 1 file changed, 78 insertions(+), 44 deletions(-)
+
+--- a/ipc/sem.c
++++ b/ipc/sem.c
+@@ -253,70 +253,104 @@ static void sem_rcu_free(struct rcu_head
+ }
+
+ /*
++ * Wait until all currently ongoing simple ops have completed.
++ * Caller must own sem_perm.lock.
++ * New simple ops cannot start, because simple ops first check
++ * that sem_perm.lock is free.
++ */
++static void sem_wait_array(struct sem_array *sma)
++{
++ int i;
++ struct sem *sem;
++
++ for (i = 0; i < sma->sem_nsems; i++) {
++ sem = sma->sem_base + i;
++ spin_unlock_wait(&sem->lock);
++ }
++}
++
++/*
+ * If the request contains only one semaphore operation, and there are
+ * no complex transactions pending, lock only the semaphore involved.
+ * Otherwise, lock the entire semaphore array, since we either have
+ * multiple semaphores in our own semops, or we need to look at
+ * semaphores from other pending complex operations.
+- *
+- * Carefully guard against sma->complex_count changing between zero
+- * and non-zero while we are spinning for the lock. The value of
+- * sma->complex_count cannot change while we are holding the lock,
+- * so sem_unlock should be fine.
+- *
+- * The global lock path checks that all the local locks have been released,
+- * checking each local lock once. This means that the local lock paths
+- * cannot start their critical sections while the global lock is held.
+ */
+ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops,
+ int nsops)
+ {
+- int locknum;
+- again:
+- if (nsops == 1 && !sma->complex_count) {
+- struct sem *sem = sma->sem_base + sops->sem_num;
++ struct sem *sem;
+
+- /* Lock just the semaphore we are interested in. */
+- spin_lock(&sem->lock);
++ if (nsops != 1) {
++ /* Complex operation - acquire a full lock */
++ ipc_lock_object(&sma->sem_perm);
+
+- /*
+- * If sma->complex_count was set while we were spinning,
+- * we may need to look at things we did not lock here.
++ /* And wait until all simple ops that are processed
++ * right now have dropped their locks.
+ */
+- if (unlikely(sma->complex_count)) {
+- spin_unlock(&sem->lock);
+- goto lock_array;
+- }
++ sem_wait_array(sma);
++ return -1;
++ }
++
++ /*
++ * Only one semaphore affected - try to optimize locking.
++ * The rules are:
++ * - optimized locking is possible if no complex operation
++ * is either enqueued or processed right now.
++ * - The test for enqueued complex ops is simple:
++ * sma->complex_count != 0
++ * - Testing for complex ops that are processed right now is
++ * a bit more difficult. Complex ops acquire the full lock
++ * and first wait that the running simple ops have completed.
++ * (see above)
++ * Thus: If we own a simple lock and the global lock is free
++ * and complex_count is now 0, then it will stay 0 and
++ * thus just locking sem->lock is sufficient.
++ */
++ sem = sma->sem_base + sops->sem_num;
+
++ if (sma->complex_count == 0) {
+ /*
+- * Another process is holding the global lock on the
+- * sem_array; we cannot enter our critical section,
+- * but have to wait for the global lock to be released.
++ * It appears that no complex operation is around.
++ * Acquire the per-semaphore lock.
+ */
+- if (unlikely(spin_is_locked(&sma->sem_perm.lock))) {
+- spin_unlock(&sem->lock);
+- spin_unlock_wait(&sma->sem_perm.lock);
+- goto again;
++ spin_lock(&sem->lock);
++
++ /* Then check that the global lock is free */
++ if (!spin_is_locked(&sma->sem_perm.lock)) {
++ /* spin_is_locked() is not a memory barrier */
++ smp_mb();
++
++ /* Now repeat the test of complex_count:
++ * It can't change anymore until we drop sem->lock.
++ * Thus: if is now 0, then it will stay 0.
++ */
++ if (sma->complex_count == 0) {
++ /* fast path successful! */
++ return sops->sem_num;
++ }
+ }
++ spin_unlock(&sem->lock);
++ }
++
++ /* slow path: acquire the full lock */
++ ipc_lock_object(&sma->sem_perm);
+
+- locknum = sops->sem_num;
++ if (sma->complex_count == 0) {
++ /* False alarm:
++ * There is no complex operation, thus we can switch
++ * back to the fast path.
++ */
++ spin_lock(&sem->lock);
++ ipc_unlock_object(&sma->sem_perm);
++ return sops->sem_num;
+ } else {
+- int i;
+- /*
+- * Lock the semaphore array, and wait for all of the
+- * individual semaphore locks to go away. The code
+- * above ensures no new single-lock holders will enter
+- * their critical section while the array lock is held.
++ /* Not a false alarm, thus complete the sequence for a
++ * full lock.
+ */
+- lock_array:
+- ipc_lock_object(&sma->sem_perm);
+- for (i = 0; i < sma->sem_nsems; i++) {
+- struct sem *sem = sma->sem_base + i;
+- spin_unlock_wait(&sem->lock);
+- }
+- locknum = -1;
++ sem_wait_array(sma);
++ return -1;
+ }
+- return locknum;
+ }
+
+ static inline void sem_unlock(struct sem_array *sma, int locknum)
--- /dev/null
+From 6d07b68ce16ae9535955ba2059dedba5309c3ca1 Mon Sep 17 00:00:00 2001
+From: Manfred Spraul <manfred@colorfullife.com>
+Date: Mon, 30 Sep 2013 13:45:06 -0700
+Subject: ipc/sem.c: optimize sem_lock()
+
+From: Manfred Spraul <manfred@colorfullife.com>
+
+commit 6d07b68ce16ae9535955ba2059dedba5309c3ca1 upstream.
+
+Operations that need access to the whole array must guarantee that there
+are no simple operations ongoing. Right now this is achieved by
+spin_unlock_wait(sem->lock) on all semaphores.
+
+If complex_count is nonzero, then this spin_unlock_wait() is not
+necessary, because it was already performed in the past by the thread
+that increased complex_count and even though sem_perm.lock was dropped
+inbetween, no simple operation could have started, because simple
+operations cannot start when complex_count is non-zero.
+
+Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
+Cc: Mike Galbraith <bitbucket@online.de>
+Cc: Rik van Riel <riel@redhat.com>
+Reviewed-by: Davidlohr Bueso <davidlohr@hp.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ ipc/sem.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/ipc/sem.c
++++ b/ipc/sem.c
+@@ -257,12 +257,20 @@ static void sem_rcu_free(struct rcu_head
+ * Caller must own sem_perm.lock.
+ * New simple ops cannot start, because simple ops first check
+ * that sem_perm.lock is free.
++ * that a) sem_perm.lock is free and b) complex_count is 0.
+ */
+ static void sem_wait_array(struct sem_array *sma)
+ {
+ int i;
+ struct sem *sem;
+
++ if (sma->complex_count) {
++ /* The thread that increased sma->complex_count waited on
++ * all sem->lock locks. Thus we don't need to wait again.
++ */
++ return;
++ }
++
+ for (i = 0; i < sma->sem_nsems; i++) {
+ sem = sma->sem_base + i;
+ spin_unlock_wait(&sem->lock);
--- /dev/null
+From d8c633766ad88527f25d9f81a5c2f083d78a2b39 Mon Sep 17 00:00:00 2001
+From: Manfred Spraul <manfred@colorfullife.com>
+Date: Mon, 30 Sep 2013 13:45:07 -0700
+Subject: ipc/sem.c: synchronize the proc interface
+
+From: Manfred Spraul <manfred@colorfullife.com>
+
+commit d8c633766ad88527f25d9f81a5c2f083d78a2b39 upstream.
+
+The proc interface is not aware of sem_lock(), it instead calls
+ipc_lock_object() directly. This means that simple semop() operations
+can run in parallel with the proc interface. Right now, this is
+uncritical, because the implementation doesn't do anything that requires
+a proper synchronization.
+
+But it is dangerous and therefore should be fixed.
+
+Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
+Cc: Davidlohr Bueso <davidlohr.bueso@hp.com>
+Cc: Mike Galbraith <efault@gmx.de>
+Cc: Rik van Riel <riel@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ ipc/sem.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/ipc/sem.c
++++ b/ipc/sem.c
+@@ -2103,6 +2103,14 @@ static int sysvipc_sem_proc_show(struct
+ struct sem_array *sma = it;
+ time_t sem_otime;
+
++ /*
++ * The proc interface isn't aware of sem_lock(), it calls
++ * ipc_lock_object() directly (in sysvipc_find_ipc).
++ * In order to stay compatible with sem_lock(), we must wait until
++ * all simple semop() calls have left their critical regions.
++ */
++ sem_wait_array(sma);
++
+ sem_otime = get_semotime(sma);
+
+ return seq_printf(s,
--- /dev/null
+From 0e8c665699e953fa58dc1b0b0d09e5dce7343cc7 Mon Sep 17 00:00:00 2001
+From: Manfred Spraul <manfred@colorfullife.com>
+Date: Mon, 30 Sep 2013 13:45:25 -0700
+Subject: ipc/sem.c: update sem_otime for all operations
+
+From: Manfred Spraul <manfred@colorfullife.com>
+
+commit 0e8c665699e953fa58dc1b0b0d09e5dce7343cc7 upstream.
+
+In commit 0a2b9d4c7967 ("ipc/sem.c: move wake_up_process out of the
+spinlock section"), the update of semaphore's sem_otime(last semop time)
+was moved to one central position (do_smart_update).
+
+But since do_smart_update() is only called for operations that modify
+the array, this means that wait-for-zero semops do not update sem_otime
+anymore.
+
+The fix is simple:
+Non-alter operations must update sem_otime.
+
+[akpm@linux-foundation.org: coding-style fixes]
+Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
+Reported-by: Jia He <jiakernel@gmail.com>
+Tested-by: Jia He <jiakernel@gmail.com>
+Cc: Davidlohr Bueso <davidlohr.bueso@hp.com>
+Cc: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ ipc/sem.c | 42 +++++++++++++++++++++++++++++-------------
+ 1 file changed, 29 insertions(+), 13 deletions(-)
+
+--- a/ipc/sem.c
++++ b/ipc/sem.c
+@@ -918,6 +918,24 @@ again:
+ }
+
+ /**
++ * set_semotime(sma, sops) - set sem_otime
++ * @sma: semaphore array
++ * @sops: operations that modified the array, may be NULL
++ *
++ * sem_otime is replicated to avoid cache line trashing.
++ * This function sets one instance to the current time.
++ */
++static void set_semotime(struct sem_array *sma, struct sembuf *sops)
++{
++ if (sops == NULL) {
++ sma->sem_base[0].sem_otime = get_seconds();
++ } else {
++ sma->sem_base[sops[0].sem_num].sem_otime =
++ get_seconds();
++ }
++}
++
++/**
+ * do_smart_update(sma, sops, nsops, otime, pt) - optimized update_queue
+ * @sma: semaphore array
+ * @sops: operations that were performed
+@@ -967,17 +985,10 @@ static void do_smart_update(struct sem_a
+ }
+ }
+ }
+- if (otime) {
+- if (sops == NULL) {
+- sma->sem_base[0].sem_otime = get_seconds();
+- } else {
+- sma->sem_base[sops[0].sem_num].sem_otime =
+- get_seconds();
+- }
+- }
++ if (otime)
++ set_semotime(sma, sops);
+ }
+
+-
+ /* The following counts are associated to each semaphore:
+ * semncnt number of tasks waiting on semval being nonzero
+ * semzcnt number of tasks waiting on semval being zero
+@@ -1839,12 +1850,17 @@ SYSCALL_DEFINE4(semtimedop, int, semid,
+
+ error = perform_atomic_semop(sma, sops, nsops, un,
+ task_tgid_vnr(current));
+- if (error <= 0) {
+- if (alter && error == 0)
++ if (error == 0) {
++ /* If the operation was successful, then do
++ * the required updates.
++ */
++ if (alter)
+ do_smart_update(sma, sops, nsops, 1, &tasks);
+-
+- goto out_unlock_free;
++ else
++ set_semotime(sma, sops);
+ }
++ if (error <= 0)
++ goto out_unlock_free;
+
+ /* We need to sleep on this operation, so we put the current
+ * task into the pending queue and go to sleep.
ipc-drop-ipc_lock_by_ptr.patch
ipc-shm-drop-shm_lock_check.patch
ipc-drop-ipc_lock_check.patch
+ipc-fix-race-with-lsms.patch
+ipc-sem.c-fix-race-in-sem_lock.patch
+ipc-sem.c-optimize-sem_lock.patch
+ipc-sem.c-synchronize-the-proc-interface.patch
+ipc-sem.c-update-sem_otime-for-all-operations.patch
+ipc-msg-prevent-race-with-rmid-in-msgsnd-msgrcv.patch