]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.10-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 16 Oct 2013 00:10:22 +0000 (17:10 -0700)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 16 Oct 2013 00:10:22 +0000 (17:10 -0700)
added patches:
ipc-fix-race-with-lsms.patch
ipc-msg-prevent-race-with-rmid-in-msgsnd-msgrcv.patch
ipc-sem.c-fix-race-in-sem_lock.patch
ipc-sem.c-optimize-sem_lock.patch
ipc-sem.c-synchronize-the-proc-interface.patch
ipc-sem.c-update-sem_otime-for-all-operations.patch

queue-3.10/ipc-fix-race-with-lsms.patch [new file with mode: 0644]
queue-3.10/ipc-msg-prevent-race-with-rmid-in-msgsnd-msgrcv.patch [new file with mode: 0644]
queue-3.10/ipc-sem.c-fix-race-in-sem_lock.patch [new file with mode: 0644]
queue-3.10/ipc-sem.c-optimize-sem_lock.patch [new file with mode: 0644]
queue-3.10/ipc-sem.c-synchronize-the-proc-interface.patch [new file with mode: 0644]
queue-3.10/ipc-sem.c-update-sem_otime-for-all-operations.patch [new file with mode: 0644]
queue-3.10/series

diff --git a/queue-3.10/ipc-fix-race-with-lsms.patch b/queue-3.10/ipc-fix-race-with-lsms.patch
new file mode 100644 (file)
index 0000000..7438aeb
--- /dev/null
@@ -0,0 +1,347 @@
+From 53dad6d3a8e5ac1af8bacc6ac2134ae1a8b085f1 Mon Sep 17 00:00:00 2001
+From: Davidlohr Bueso <davidlohr@hp.com>
+Date: Mon, 23 Sep 2013 17:04:45 -0700
+Subject: ipc: fix race with LSMs
+
+From: Davidlohr Bueso <davidlohr@hp.com>
+
+commit 53dad6d3a8e5ac1af8bacc6ac2134ae1a8b085f1 upstream.
+
+Currently, IPC mechanisms do security and auditing related checks under
+RCU.  However, since security modules can free the security structure,
+for example, through selinux_[sem,msg_queue,shm]_free_security(), we can
+race if the structure is freed before other tasks are done with it,
+creating a use-after-free condition.  Manfred illustrates this nicely,
+for instance with shared mem and selinux:
+
+ -> do_shmat calls rcu_read_lock()
+ -> do_shmat calls shm_object_check().
+     Checks that the object is still valid - but doesn't acquire any locks.
+     Then it returns.
+ -> do_shmat calls security_shm_shmat (e.g. selinux_shm_shmat)
+ -> selinux_shm_shmat calls ipc_has_perm()
+ -> ipc_has_perm accesses ipc_perms->security
+
+shm_close()
+ -> shm_close acquires rw_mutex & shm_lock
+ -> shm_close calls shm_destroy
+ -> shm_destroy calls security_shm_free (e.g. selinux_shm_free_security)
+ -> selinux_shm_free_security calls ipc_free_security(&shp->shm_perm)
+ -> ipc_free_security calls kfree(ipc_perms->security)
+
+This patch delays the freeing of the security structures after all RCU
+readers are done.  Furthermore it aligns the security life cycle with
+that of the rest of IPC - freeing them based on the reference counter.
+For situations where we need not free security, the current behavior is
+kept.  Linus states:
+
+ "... the old behavior was suspect for another reason too: having the
+  security blob go away from under a user sounds like it could cause
+  various other problems anyway, so I think the old code was at least
+  _prone_ to bugs even if it didn't have catastrophic behavior."
+
+I have tested this patch with IPC testcases from LTP on both my
+quad-core laptop and on a 64 core NUMA server.  In both cases selinux is
+enabled, and tests pass for both voluntary and forced preemption models.
+While the mentioned races are theoretical (at least no one as reported
+them), I wanted to make sure that this new logic doesn't break anything
+we weren't aware of.
+
+Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Davidlohr Bueso <davidlohr@hp.com>
+Acked-by: Manfred Spraul <manfred@colorfullife.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ ipc/msg.c  |   19 +++++++++++++------
+ ipc/sem.c  |   34 ++++++++++++++++++----------------
+ ipc/shm.c  |   17 ++++++++++++-----
+ ipc/util.c |   32 ++++++++++++--------------------
+ ipc/util.h |   10 +++++++++-
+ 5 files changed, 64 insertions(+), 48 deletions(-)
+
+--- a/ipc/msg.c
++++ b/ipc/msg.c
+@@ -165,6 +165,15 @@ static inline void msg_rmid(struct ipc_n
+       ipc_rmid(&msg_ids(ns), &s->q_perm);
+ }
++static void msg_rcu_free(struct rcu_head *head)
++{
++      struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
++      struct msg_queue *msq = ipc_rcu_to_struct(p);
++
++      security_msg_queue_free(msq);
++      ipc_rcu_free(head);
++}
++
+ /**
+  * newque - Create a new msg queue
+  * @ns: namespace
+@@ -189,15 +198,14 @@ static int newque(struct ipc_namespace *
+       msq->q_perm.security = NULL;
+       retval = security_msg_queue_alloc(msq);
+       if (retval) {
+-              ipc_rcu_putref(msq);
++              ipc_rcu_putref(msq, ipc_rcu_free);
+               return retval;
+       }
+       /* ipc_addid() locks msq upon success. */
+       id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni);
+       if (id < 0) {
+-              security_msg_queue_free(msq);
+-              ipc_rcu_putref(msq);
++              ipc_rcu_putref(msq, msg_rcu_free);
+               return id;
+       }
+@@ -276,8 +284,7 @@ static void freeque(struct ipc_namespace
+               free_msg(msg);
+       }
+       atomic_sub(msq->q_cbytes, &ns->msg_bytes);
+-      security_msg_queue_free(msq);
+-      ipc_rcu_putref(msq);
++      ipc_rcu_putref(msq, msg_rcu_free);
+ }
+ /*
+@@ -717,7 +724,7 @@ long do_msgsnd(int msqid, long mtype, vo
+               rcu_read_lock();
+               ipc_lock_object(&msq->q_perm);
+-              ipc_rcu_putref(msq);
++              ipc_rcu_putref(msq, ipc_rcu_free);
+               if (msq->q_perm.deleted) {
+                       err = -EIDRM;
+                       goto out_unlock0;
+--- a/ipc/sem.c
++++ b/ipc/sem.c
+@@ -243,6 +243,15 @@ static void merge_queues(struct sem_arra
+       }
+ }
++static void sem_rcu_free(struct rcu_head *head)
++{
++      struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
++      struct sem_array *sma = ipc_rcu_to_struct(p);
++
++      security_sem_free(sma);
++      ipc_rcu_free(head);
++}
++
+ /*
+  * If the request contains only one semaphore operation, and there are
+  * no complex transactions pending, lock only the semaphore involved.
+@@ -374,12 +383,7 @@ static inline struct sem_array *sem_obta
+ static inline void sem_lock_and_putref(struct sem_array *sma)
+ {
+       sem_lock(sma, NULL, -1);
+-      ipc_rcu_putref(sma);
+-}
+-
+-static inline void sem_putref(struct sem_array *sma)
+-{
+-      ipc_rcu_putref(sma);
++      ipc_rcu_putref(sma, ipc_rcu_free);
+ }
+ static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
+@@ -458,14 +462,13 @@ static int newary(struct ipc_namespace *
+       sma->sem_perm.security = NULL;
+       retval = security_sem_alloc(sma);
+       if (retval) {
+-              ipc_rcu_putref(sma);
++              ipc_rcu_putref(sma, ipc_rcu_free);
+               return retval;
+       }
+       id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni);
+       if (id < 0) {
+-              security_sem_free(sma);
+-              ipc_rcu_putref(sma);
++              ipc_rcu_putref(sma, sem_rcu_free);
+               return id;
+       }
+       ns->used_sems += nsems;
+@@ -1047,8 +1050,7 @@ static void freeary(struct ipc_namespace
+       wake_up_sem_queue_do(&tasks);
+       ns->used_sems -= sma->sem_nsems;
+-      security_sem_free(sma);
+-      ipc_rcu_putref(sma);
++      ipc_rcu_putref(sma, sem_rcu_free);
+ }
+ static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version)
+@@ -1292,7 +1294,7 @@ static int semctl_main(struct ipc_namesp
+                       rcu_read_unlock();
+                       sem_io = ipc_alloc(sizeof(ushort)*nsems);
+                       if(sem_io == NULL) {
+-                              sem_putref(sma);
++                              ipc_rcu_putref(sma, ipc_rcu_free);
+                               return -ENOMEM;
+                       }
+@@ -1328,20 +1330,20 @@ static int semctl_main(struct ipc_namesp
+               if(nsems > SEMMSL_FAST) {
+                       sem_io = ipc_alloc(sizeof(ushort)*nsems);
+                       if(sem_io == NULL) {
+-                              sem_putref(sma);
++                              ipc_rcu_putref(sma, ipc_rcu_free);
+                               return -ENOMEM;
+                       }
+               }
+               if (copy_from_user (sem_io, p, nsems*sizeof(ushort))) {
+-                      sem_putref(sma);
++                      ipc_rcu_putref(sma, ipc_rcu_free);
+                       err = -EFAULT;
+                       goto out_free;
+               }
+               for (i = 0; i < nsems; i++) {
+                       if (sem_io[i] > SEMVMX) {
+-                              sem_putref(sma);
++                              ipc_rcu_putref(sma, ipc_rcu_free);
+                               err = -ERANGE;
+                               goto out_free;
+                       }
+@@ -1629,7 +1631,7 @@ static struct sem_undo *find_alloc_undo(
+       /* step 2: allocate new undo structure */
+       new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
+       if (!new) {
+-              sem_putref(sma);
++              ipc_rcu_putref(sma, ipc_rcu_free);
+               return ERR_PTR(-ENOMEM);
+       }
+--- a/ipc/shm.c
++++ b/ipc/shm.c
+@@ -167,6 +167,15 @@ static inline void shm_lock_by_ptr(struc
+       ipc_lock_object(&ipcp->shm_perm);
+ }
++static void shm_rcu_free(struct rcu_head *head)
++{
++      struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
++      struct shmid_kernel *shp = ipc_rcu_to_struct(p);
++
++      security_shm_free(shp);
++      ipc_rcu_free(head);
++}
++
+ static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
+ {
+       ipc_rmid(&shm_ids(ns), &s->shm_perm);
+@@ -208,8 +217,7 @@ static void shm_destroy(struct ipc_names
+               user_shm_unlock(file_inode(shp->shm_file)->i_size,
+                                               shp->mlock_user);
+       fput (shp->shm_file);
+-      security_shm_free(shp);
+-      ipc_rcu_putref(shp);
++      ipc_rcu_putref(shp, shm_rcu_free);
+ }
+ /*
+@@ -497,7 +505,7 @@ static int newseg(struct ipc_namespace *
+       shp->shm_perm.security = NULL;
+       error = security_shm_alloc(shp);
+       if (error) {
+-              ipc_rcu_putref(shp);
++              ipc_rcu_putref(shp, ipc_rcu_free);
+               return error;
+       }
+@@ -566,8 +574,7 @@ no_id:
+               user_shm_unlock(size, shp->mlock_user);
+       fput(file);
+ no_file:
+-      security_shm_free(shp);
+-      ipc_rcu_putref(shp);
++      ipc_rcu_putref(shp, shm_rcu_free);
+       return error;
+ }
+--- a/ipc/util.c
++++ b/ipc/util.c
+@@ -474,11 +474,6 @@ void ipc_free(void* ptr, int size)
+               kfree(ptr);
+ }
+-struct ipc_rcu {
+-      struct rcu_head rcu;
+-      atomic_t refcount;
+-} ____cacheline_aligned_in_smp;
+-
+ /**
+  *    ipc_rcu_alloc   -       allocate ipc and rcu space 
+  *    @size: size desired
+@@ -505,27 +500,24 @@ int ipc_rcu_getref(void *ptr)
+       return atomic_inc_not_zero(&p->refcount);
+ }
+-/**
+- * ipc_schedule_free - free ipc + rcu space
+- * @head: RCU callback structure for queued work
+- */
+-static void ipc_schedule_free(struct rcu_head *head)
+-{
+-      vfree(container_of(head, struct ipc_rcu, rcu));
+-}
+-
+-void ipc_rcu_putref(void *ptr)
++void ipc_rcu_putref(void *ptr, void (*func)(struct rcu_head *head))
+ {
+       struct ipc_rcu *p = ((struct ipc_rcu *)ptr) - 1;
+       if (!atomic_dec_and_test(&p->refcount))
+               return;
+-      if (is_vmalloc_addr(ptr)) {
+-              call_rcu(&p->rcu, ipc_schedule_free);
+-      } else {
+-              kfree_rcu(p, rcu);
+-      }
++      call_rcu(&p->rcu, func);
++}
++
++void ipc_rcu_free(struct rcu_head *head)
++{
++      struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
++
++      if (is_vmalloc_addr(p))
++              vfree(p);
++      else
++              kfree(p);
+ }
+ /**
+--- a/ipc/util.h
++++ b/ipc/util.h
+@@ -47,6 +47,13 @@ static inline void msg_exit_ns(struct ip
+ static inline void shm_exit_ns(struct ipc_namespace *ns) { }
+ #endif
++struct ipc_rcu {
++      struct rcu_head rcu;
++      atomic_t refcount;
++} ____cacheline_aligned_in_smp;
++
++#define ipc_rcu_to_struct(p)  ((void *)(p+1))
++
+ /*
+  * Structure that holds the parameters needed by the ipc operations
+  * (see after)
+@@ -120,7 +127,8 @@ void ipc_free(void* ptr, int size);
+  */
+ void* ipc_rcu_alloc(int size);
+ int ipc_rcu_getref(void *ptr);
+-void ipc_rcu_putref(void *ptr);
++void ipc_rcu_putref(void *ptr, void (*func)(struct rcu_head *head));
++void ipc_rcu_free(struct rcu_head *head);
+ struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int);
+ struct kern_ipc_perm *ipc_obtain_object(struct ipc_ids *ids, int id);
diff --git a/queue-3.10/ipc-msg-prevent-race-with-rmid-in-msgsnd-msgrcv.patch b/queue-3.10/ipc-msg-prevent-race-with-rmid-in-msgsnd-msgrcv.patch
new file mode 100644 (file)
index 0000000..dc77723
--- /dev/null
@@ -0,0 +1,69 @@
+From 4271b05a227dc6175b66c3d9941aeab09048aeb2 Mon Sep 17 00:00:00 2001
+From: Davidlohr Bueso <davidlohr@hp.com>
+Date: Mon, 30 Sep 2013 13:45:26 -0700
+Subject: ipc,msg: prevent race with rmid in msgsnd,msgrcv
+
+From: Davidlohr Bueso <davidlohr@hp.com>
+
+commit 4271b05a227dc6175b66c3d9941aeab09048aeb2 upstream.
+
+This fixes a race in both msgrcv() and msgsnd() between finding the msg
+and actually dealing with the queue, as another thread can delete shmid
+underneath us if we are preempted before acquiring the
+kern_ipc_perm.lock.
+
+Manfred illustrates this nicely:
+
+Assume a preemptible kernel that is preempted just after
+
+    msq = msq_obtain_object_check(ns, msqid)
+
+in do_msgrcv().  The only lock that is held is rcu_read_lock().
+
+Now the other thread processes IPC_RMID.  When the first task is
+resumed, then it will happily wait for messages on a deleted queue.
+
+Fix this by checking for if the queue has been deleted after taking the
+lock.
+
+Signed-off-by: Davidlohr Bueso <davidlohr@hp.com>
+Reported-by: Manfred Spraul <manfred@colorfullife.com>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ ipc/msg.c |   13 +++++++++++++
+ 1 file changed, 13 insertions(+)
+
+--- a/ipc/msg.c
++++ b/ipc/msg.c
+@@ -695,6 +695,12 @@ long do_msgsnd(int msqid, long mtype, vo
+               if (ipcperms(ns, &msq->q_perm, S_IWUGO))
+                       goto out_unlock0;
++              /* raced with RMID? */
++              if (msq->q_perm.deleted) {
++                      err = -EIDRM;
++                      goto out_unlock0;
++              }
++
+               err = security_msg_queue_msgsnd(msq, msg, msgflg);
+               if (err)
+                       goto out_unlock0;
+@@ -901,6 +907,13 @@ long do_msgrcv(int msqid, void __user *b
+                       goto out_unlock1;
+               ipc_lock_object(&msq->q_perm);
++
++              /* raced with RMID? */
++              if (msq->q_perm.deleted) {
++                      msg = ERR_PTR(-EIDRM);
++                      goto out_unlock0;
++              }
++
+               msg = find_msg(msq, &msgtyp, mode);
+               if (!IS_ERR(msg)) {
+                       /*
diff --git a/queue-3.10/ipc-sem.c-fix-race-in-sem_lock.patch b/queue-3.10/ipc-sem.c-fix-race-in-sem_lock.patch
new file mode 100644 (file)
index 0000000..ba3fb72
--- /dev/null
@@ -0,0 +1,249 @@
+From 5e9d527591421ccdb16acb8c23662231135d8686 Mon Sep 17 00:00:00 2001
+From: Manfred Spraul <manfred@colorfullife.com>
+Date: Mon, 30 Sep 2013 13:45:04 -0700
+Subject: ipc/sem.c: fix race in sem_lock()
+
+From: Manfred Spraul <manfred@colorfullife.com>
+
+commit 5e9d527591421ccdb16acb8c23662231135d8686 upstream.
+
+The exclusion of complex operations in sem_lock() is insufficient: after
+acquiring the per-semaphore lock, a simple op must first check that
+sem_perm.lock is not locked and only after that test check
+complex_count.  The current code does it the other way around - and that
+creates a race.  Details are below.
+
+The patch is a complete rewrite of sem_lock(), based in part on the code
+from Mike Galbraith.  It removes all gotos and all loops and thus the
+risk of livelocks.
+
+I have tested the patch (together with the next one) on my i3 laptop and
+it didn't cause any problems.
+
+The bug is probably also present in 3.10 and 3.11, but for these kernels
+it might be simpler just to move the test of sma->complex_count after
+the spin_is_locked() test.
+
+Details of the bug:
+
+Assume:
+ - sma->complex_count = 0.
+ - Thread 1: semtimedop(complex op that must sleep)
+ - Thread 2: semtimedop(simple op).
+
+Pseudo-Trace:
+
+Thread 1: sem_lock(): acquire sem_perm.lock
+Thread 1: sem_lock(): check for ongoing simple ops
+                       Nothing ongoing, thread 2 is still before sem_lock().
+Thread 1: try_atomic_semop()
+       <<< preempted.
+
+Thread 2: sem_lock():
+        static inline int sem_lock(struct sem_array *sma, struct sembuf *sops,
+                                      int nsops)
+        {
+                int locknum;
+         again:
+                if (nsops == 1 && !sma->complex_count) {
+                        struct sem *sem = sma->sem_base + sops->sem_num;
+
+                        /* Lock just the semaphore we are interested in. */
+                        spin_lock(&sem->lock);
+
+                        /*
+                         * If sma->complex_count was set while we were spinning,
+                         * we may need to look at things we did not lock here.
+                         */
+                        if (unlikely(sma->complex_count)) {
+                                spin_unlock(&sem->lock);
+                                goto lock_array;
+                        }
+        <<<<<<<<<
+       <<< complex_count is still 0.
+       <<<
+        <<< Here it is preempted
+        <<<<<<<<<
+
+Thread 1: try_atomic_semop() returns, notices that it must sleep.
+Thread 1: increases sma->complex_count.
+Thread 1: drops sem_perm.lock
+Thread 2:
+                /*
+                 * Another process is holding the global lock on the
+                 * sem_array; we cannot enter our critical section,
+                 * but have to wait for the global lock to be released.
+                 */
+                if (unlikely(spin_is_locked(&sma->sem_perm.lock))) {
+                        spin_unlock(&sem->lock);
+                        spin_unlock_wait(&sma->sem_perm.lock);
+                        goto again;
+                }
+       <<< sem_perm.lock already dropped, thus no "goto again;"
+
+                locknum = sops->sem_num;
+
+Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
+Cc: Mike Galbraith <bitbucket@online.de>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Davidlohr Bueso <davidlohr.bueso@hp.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ ipc/sem.c |  122 +++++++++++++++++++++++++++++++++++++++-----------------------
+ 1 file changed, 78 insertions(+), 44 deletions(-)
+
+--- a/ipc/sem.c
++++ b/ipc/sem.c
+@@ -253,70 +253,104 @@ static void sem_rcu_free(struct rcu_head
+ }
+ /*
++ * Wait until all currently ongoing simple ops have completed.
++ * Caller must own sem_perm.lock.
++ * New simple ops cannot start, because simple ops first check
++ * that sem_perm.lock is free.
++ */
++static void sem_wait_array(struct sem_array *sma)
++{
++      int i;
++      struct sem *sem;
++
++      for (i = 0; i < sma->sem_nsems; i++) {
++              sem = sma->sem_base + i;
++              spin_unlock_wait(&sem->lock);
++      }
++}
++
++/*
+  * If the request contains only one semaphore operation, and there are
+  * no complex transactions pending, lock only the semaphore involved.
+  * Otherwise, lock the entire semaphore array, since we either have
+  * multiple semaphores in our own semops, or we need to look at
+  * semaphores from other pending complex operations.
+- *
+- * Carefully guard against sma->complex_count changing between zero
+- * and non-zero while we are spinning for the lock. The value of
+- * sma->complex_count cannot change while we are holding the lock,
+- * so sem_unlock should be fine.
+- *
+- * The global lock path checks that all the local locks have been released,
+- * checking each local lock once. This means that the local lock paths
+- * cannot start their critical sections while the global lock is held.
+  */
+ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops,
+                             int nsops)
+ {
+-      int locknum;
+- again:
+-      if (nsops == 1 && !sma->complex_count) {
+-              struct sem *sem = sma->sem_base + sops->sem_num;
++      struct sem *sem;
+-              /* Lock just the semaphore we are interested in. */
+-              spin_lock(&sem->lock);
++      if (nsops != 1) {
++              /* Complex operation - acquire a full lock */
++              ipc_lock_object(&sma->sem_perm);
+-              /*
+-               * If sma->complex_count was set while we were spinning,
+-               * we may need to look at things we did not lock here.
++              /* And wait until all simple ops that are processed
++               * right now have dropped their locks.
+                */
+-              if (unlikely(sma->complex_count)) {
+-                      spin_unlock(&sem->lock);
+-                      goto lock_array;
+-              }
++              sem_wait_array(sma);
++              return -1;
++      }
++
++      /*
++       * Only one semaphore affected - try to optimize locking.
++       * The rules are:
++       * - optimized locking is possible if no complex operation
++       *   is either enqueued or processed right now.
++       * - The test for enqueued complex ops is simple:
++       *      sma->complex_count != 0
++       * - Testing for complex ops that are processed right now is
++       *   a bit more difficult. Complex ops acquire the full lock
++       *   and first wait that the running simple ops have completed.
++       *   (see above)
++       *   Thus: If we own a simple lock and the global lock is free
++       *      and complex_count is now 0, then it will stay 0 and
++       *      thus just locking sem->lock is sufficient.
++       */
++      sem = sma->sem_base + sops->sem_num;
++      if (sma->complex_count == 0) {
+               /*
+-               * Another process is holding the global lock on the
+-               * sem_array; we cannot enter our critical section,
+-               * but have to wait for the global lock to be released.
++               * It appears that no complex operation is around.
++               * Acquire the per-semaphore lock.
+                */
+-              if (unlikely(spin_is_locked(&sma->sem_perm.lock))) {
+-                      spin_unlock(&sem->lock);
+-                      spin_unlock_wait(&sma->sem_perm.lock);
+-                      goto again;
++              spin_lock(&sem->lock);
++
++              /* Then check that the global lock is free */
++              if (!spin_is_locked(&sma->sem_perm.lock)) {
++                      /* spin_is_locked() is not a memory barrier */
++                      smp_mb();
++
++                      /* Now repeat the test of complex_count:
++                       * It can't change anymore until we drop sem->lock.
++                       * Thus: if is now 0, then it will stay 0.
++                       */
++                      if (sma->complex_count == 0) {
++                              /* fast path successful! */
++                              return sops->sem_num;
++                      }
+               }
++              spin_unlock(&sem->lock);
++      }
++
++      /* slow path: acquire the full lock */
++      ipc_lock_object(&sma->sem_perm);
+-              locknum = sops->sem_num;
++      if (sma->complex_count == 0) {
++              /* False alarm:
++               * There is no complex operation, thus we can switch
++               * back to the fast path.
++               */
++              spin_lock(&sem->lock);
++              ipc_unlock_object(&sma->sem_perm);
++              return sops->sem_num;
+       } else {
+-              int i;
+-              /*
+-               * Lock the semaphore array, and wait for all of the
+-               * individual semaphore locks to go away.  The code
+-               * above ensures no new single-lock holders will enter
+-               * their critical section while the array lock is held.
++              /* Not a false alarm, thus complete the sequence for a
++               * full lock.
+                */
+- lock_array:
+-              ipc_lock_object(&sma->sem_perm);
+-              for (i = 0; i < sma->sem_nsems; i++) {
+-                      struct sem *sem = sma->sem_base + i;
+-                      spin_unlock_wait(&sem->lock);
+-              }
+-              locknum = -1;
++              sem_wait_array(sma);
++              return -1;
+       }
+-      return locknum;
+ }
+ static inline void sem_unlock(struct sem_array *sma, int locknum)
diff --git a/queue-3.10/ipc-sem.c-optimize-sem_lock.patch b/queue-3.10/ipc-sem.c-optimize-sem_lock.patch
new file mode 100644 (file)
index 0000000..a1ce06d
--- /dev/null
@@ -0,0 +1,55 @@
+From 6d07b68ce16ae9535955ba2059dedba5309c3ca1 Mon Sep 17 00:00:00 2001
+From: Manfred Spraul <manfred@colorfullife.com>
+Date: Mon, 30 Sep 2013 13:45:06 -0700
+Subject: ipc/sem.c: optimize sem_lock()
+
+From: Manfred Spraul <manfred@colorfullife.com>
+
+commit 6d07b68ce16ae9535955ba2059dedba5309c3ca1 upstream.
+
+Operations that need access to the whole array must guarantee that there
+are no simple operations ongoing.  Right now this is achieved by
+spin_unlock_wait(sem->lock) on all semaphores.
+
+If complex_count is nonzero, then this spin_unlock_wait() is not
+necessary, because it was already performed in the past by the thread
+that increased complex_count and even though sem_perm.lock was dropped
+inbetween, no simple operation could have started, because simple
+operations cannot start when complex_count is non-zero.
+
+Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
+Cc: Mike Galbraith <bitbucket@online.de>
+Cc: Rik van Riel <riel@redhat.com>
+Reviewed-by: Davidlohr Bueso <davidlohr@hp.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ ipc/sem.c |    8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/ipc/sem.c
++++ b/ipc/sem.c
+@@ -257,12 +257,20 @@ static void sem_rcu_free(struct rcu_head
+  * Caller must own sem_perm.lock.
+  * New simple ops cannot start, because simple ops first check
+  * that sem_perm.lock is free.
++ * that a) sem_perm.lock is free and b) complex_count is 0.
+  */
+ static void sem_wait_array(struct sem_array *sma)
+ {
+       int i;
+       struct sem *sem;
++      if (sma->complex_count)  {
++              /* The thread that increased sma->complex_count waited on
++               * all sem->lock locks. Thus we don't need to wait again.
++               */
++              return;
++      }
++
+       for (i = 0; i < sma->sem_nsems; i++) {
+               sem = sma->sem_base + i;
+               spin_unlock_wait(&sem->lock);
diff --git a/queue-3.10/ipc-sem.c-synchronize-the-proc-interface.patch b/queue-3.10/ipc-sem.c-synchronize-the-proc-interface.patch
new file mode 100644 (file)
index 0000000..1ca7d80
--- /dev/null
@@ -0,0 +1,46 @@
+From d8c633766ad88527f25d9f81a5c2f083d78a2b39 Mon Sep 17 00:00:00 2001
+From: Manfred Spraul <manfred@colorfullife.com>
+Date: Mon, 30 Sep 2013 13:45:07 -0700
+Subject: ipc/sem.c: synchronize the proc interface
+
+From: Manfred Spraul <manfred@colorfullife.com>
+
+commit d8c633766ad88527f25d9f81a5c2f083d78a2b39 upstream.
+
+The proc interface is not aware of sem_lock(), it instead calls
+ipc_lock_object() directly.  This means that simple semop() operations
+can run in parallel with the proc interface.  Right now, this is
+uncritical, because the implementation doesn't do anything that requires
+a proper synchronization.
+
+But it is dangerous and therefore should be fixed.
+
+Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
+Cc: Davidlohr Bueso <davidlohr.bueso@hp.com>
+Cc: Mike Galbraith <efault@gmx.de>
+Cc: Rik van Riel <riel@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ ipc/sem.c |    8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/ipc/sem.c
++++ b/ipc/sem.c
+@@ -2103,6 +2103,14 @@ static int sysvipc_sem_proc_show(struct
+       struct sem_array *sma = it;
+       time_t sem_otime;
++      /*
++       * The proc interface isn't aware of sem_lock(), it calls
++       * ipc_lock_object() directly (in sysvipc_find_ipc).
++       * In order to stay compatible with sem_lock(), we must wait until
++       * all simple semop() calls have left their critical regions.
++       */
++      sem_wait_array(sma);
++
+       sem_otime = get_semotime(sma);
+       return seq_printf(s,
diff --git a/queue-3.10/ipc-sem.c-update-sem_otime-for-all-operations.patch b/queue-3.10/ipc-sem.c-update-sem_otime-for-all-operations.patch
new file mode 100644 (file)
index 0000000..d004617
--- /dev/null
@@ -0,0 +1,103 @@
+From 0e8c665699e953fa58dc1b0b0d09e5dce7343cc7 Mon Sep 17 00:00:00 2001
+From: Manfred Spraul <manfred@colorfullife.com>
+Date: Mon, 30 Sep 2013 13:45:25 -0700
+Subject: ipc/sem.c: update sem_otime for all operations
+
+From: Manfred Spraul <manfred@colorfullife.com>
+
+commit 0e8c665699e953fa58dc1b0b0d09e5dce7343cc7 upstream.
+
+In commit 0a2b9d4c7967 ("ipc/sem.c: move wake_up_process out of the
+spinlock section"), the update of semaphore's sem_otime(last semop time)
+was moved to one central position (do_smart_update).
+
+But since do_smart_update() is only called for operations that modify
+the array, this means that wait-for-zero semops do not update sem_otime
+anymore.
+
+The fix is simple:
+Non-alter operations must update sem_otime.
+
+[akpm@linux-foundation.org: coding-style fixes]
+Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
+Reported-by: Jia He <jiakernel@gmail.com>
+Tested-by: Jia He <jiakernel@gmail.com>
+Cc: Davidlohr Bueso <davidlohr.bueso@hp.com>
+Cc: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ ipc/sem.c |   42 +++++++++++++++++++++++++++++-------------
+ 1 file changed, 29 insertions(+), 13 deletions(-)
+
+--- a/ipc/sem.c
++++ b/ipc/sem.c
+@@ -918,6 +918,24 @@ again:
+ }
+ /**
++ * set_semotime(sma, sops) - set sem_otime
++ * @sma: semaphore array
++ * @sops: operations that modified the array, may be NULL
++ *
++ * sem_otime is replicated to avoid cache line trashing.
++ * This function sets one instance to the current time.
++ */
++static void set_semotime(struct sem_array *sma, struct sembuf *sops)
++{
++      if (sops == NULL) {
++              sma->sem_base[0].sem_otime = get_seconds();
++      } else {
++              sma->sem_base[sops[0].sem_num].sem_otime =
++                                                      get_seconds();
++      }
++}
++
++/**
+  * do_smart_update(sma, sops, nsops, otime, pt) - optimized update_queue
+  * @sma: semaphore array
+  * @sops: operations that were performed
+@@ -967,17 +985,10 @@ static void do_smart_update(struct sem_a
+                       }
+               }
+       }
+-      if (otime) {
+-              if (sops == NULL) {
+-                      sma->sem_base[0].sem_otime = get_seconds();
+-              } else {
+-                      sma->sem_base[sops[0].sem_num].sem_otime =
+-                                                              get_seconds();
+-              }
+-      }
++      if (otime)
++              set_semotime(sma, sops);
+ }
+-
+ /* The following counts are associated to each semaphore:
+  *   semncnt        number of tasks waiting on semval being nonzero
+  *   semzcnt        number of tasks waiting on semval being zero
+@@ -1839,12 +1850,17 @@ SYSCALL_DEFINE4(semtimedop, int, semid,
+       error = perform_atomic_semop(sma, sops, nsops, un,
+                                       task_tgid_vnr(current));
+-      if (error <= 0) {
+-              if (alter && error == 0)
++      if (error == 0) {
++              /* If the operation was successful, then do
++               * the required updates.
++               */
++              if (alter)
+                       do_smart_update(sma, sops, nsops, 1, &tasks);
+-
+-              goto out_unlock_free;
++              else
++                      set_semotime(sma, sops);
+       }
++      if (error <= 0)
++              goto out_unlock_free;
+       /* We need to sleep on this operation, so we put the current
+        * task into the pending queue and go to sleep.
index 7b814dd4bb72cc9dd9495ceb27bae25f7a746851..7e8461b501f8108c63d5e020635555b627719bdf 100644 (file)
@@ -60,3 +60,9 @@ ipc-shm-guard-against-non-existant-vma-in-shmdt-2.patch
 ipc-drop-ipc_lock_by_ptr.patch
 ipc-shm-drop-shm_lock_check.patch
 ipc-drop-ipc_lock_check.patch
+ipc-fix-race-with-lsms.patch
+ipc-sem.c-fix-race-in-sem_lock.patch
+ipc-sem.c-optimize-sem_lock.patch
+ipc-sem.c-synchronize-the-proc-interface.patch
+ipc-sem.c-update-sem_otime-for-all-operations.patch
+ipc-msg-prevent-race-with-rmid-in-msgsnd-msgrcv.patch