From: Greg Kroah-Hartman Date: Wed, 16 Oct 2013 00:10:22 +0000 (-0700) Subject: 3.10-stable patches X-Git-Tag: v3.10.17~5 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=a4e401df43ca4c1f231aab1dba2ea7fe8253c972;p=thirdparty%2Fkernel%2Fstable-queue.git 3.10-stable patches added patches: ipc-fix-race-with-lsms.patch ipc-msg-prevent-race-with-rmid-in-msgsnd-msgrcv.patch ipc-sem.c-fix-race-in-sem_lock.patch ipc-sem.c-optimize-sem_lock.patch ipc-sem.c-synchronize-the-proc-interface.patch ipc-sem.c-update-sem_otime-for-all-operations.patch --- diff --git a/queue-3.10/ipc-fix-race-with-lsms.patch b/queue-3.10/ipc-fix-race-with-lsms.patch new file mode 100644 index 00000000000..7438aebfb5b --- /dev/null +++ b/queue-3.10/ipc-fix-race-with-lsms.patch @@ -0,0 +1,347 @@ +From 53dad6d3a8e5ac1af8bacc6ac2134ae1a8b085f1 Mon Sep 17 00:00:00 2001 +From: Davidlohr Bueso +Date: Mon, 23 Sep 2013 17:04:45 -0700 +Subject: ipc: fix race with LSMs + +From: Davidlohr Bueso + +commit 53dad6d3a8e5ac1af8bacc6ac2134ae1a8b085f1 upstream. + +Currently, IPC mechanisms do security and auditing related checks under +RCU. However, since security modules can free the security structure, +for example, through selinux_[sem,msg_queue,shm]_free_security(), we can +race if the structure is freed before other tasks are done with it, +creating a use-after-free condition. Manfred illustrates this nicely, +for instance with shared mem and selinux: + + -> do_shmat calls rcu_read_lock() + -> do_shmat calls shm_object_check(). + Checks that the object is still valid - but doesn't acquire any locks. + Then it returns. + -> do_shmat calls security_shm_shmat (e.g. selinux_shm_shmat) + -> selinux_shm_shmat calls ipc_has_perm() + -> ipc_has_perm accesses ipc_perms->security + +shm_close() + -> shm_close acquires rw_mutex & shm_lock + -> shm_close calls shm_destroy + -> shm_destroy calls security_shm_free (e.g. selinux_shm_free_security) + -> selinux_shm_free_security calls ipc_free_security(&shp->shm_perm) + -> ipc_free_security calls kfree(ipc_perms->security) + +This patch delays the freeing of the security structures after all RCU +readers are done. Furthermore it aligns the security life cycle with +that of the rest of IPC - freeing them based on the reference counter. +For situations where we need not free security, the current behavior is +kept. Linus states: + + "... the old behavior was suspect for another reason too: having the + security blob go away from under a user sounds like it could cause + various other problems anyway, so I think the old code was at least + _prone_ to bugs even if it didn't have catastrophic behavior." + +I have tested this patch with IPC testcases from LTP on both my +quad-core laptop and on a 64 core NUMA server. In both cases selinux is +enabled, and tests pass for both voluntary and forced preemption models. +While the mentioned races are theoretical (at least no one as reported +them), I wanted to make sure that this new logic doesn't break anything +we weren't aware of. + +Suggested-by: Linus Torvalds +Signed-off-by: Davidlohr Bueso +Acked-by: Manfred Spraul +Signed-off-by: Linus Torvalds +Cc: Mike Galbraith +Signed-off-by: Greg Kroah-Hartman + +--- + ipc/msg.c | 19 +++++++++++++------ + ipc/sem.c | 34 ++++++++++++++++++---------------- + ipc/shm.c | 17 ++++++++++++----- + ipc/util.c | 32 ++++++++++++-------------------- + ipc/util.h | 10 +++++++++- + 5 files changed, 64 insertions(+), 48 deletions(-) + +--- a/ipc/msg.c ++++ b/ipc/msg.c +@@ -165,6 +165,15 @@ static inline void msg_rmid(struct ipc_n + ipc_rmid(&msg_ids(ns), &s->q_perm); + } + ++static void msg_rcu_free(struct rcu_head *head) ++{ ++ struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu); ++ struct msg_queue *msq = ipc_rcu_to_struct(p); ++ ++ security_msg_queue_free(msq); ++ ipc_rcu_free(head); ++} ++ + /** + * newque - Create a new msg queue + * @ns: namespace +@@ -189,15 +198,14 @@ static int newque(struct ipc_namespace * + msq->q_perm.security = NULL; + retval = security_msg_queue_alloc(msq); + if (retval) { +- ipc_rcu_putref(msq); ++ ipc_rcu_putref(msq, ipc_rcu_free); + return retval; + } + + /* ipc_addid() locks msq upon success. */ + id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni); + if (id < 0) { +- security_msg_queue_free(msq); +- ipc_rcu_putref(msq); ++ ipc_rcu_putref(msq, msg_rcu_free); + return id; + } + +@@ -276,8 +284,7 @@ static void freeque(struct ipc_namespace + free_msg(msg); + } + atomic_sub(msq->q_cbytes, &ns->msg_bytes); +- security_msg_queue_free(msq); +- ipc_rcu_putref(msq); ++ ipc_rcu_putref(msq, msg_rcu_free); + } + + /* +@@ -717,7 +724,7 @@ long do_msgsnd(int msqid, long mtype, vo + rcu_read_lock(); + ipc_lock_object(&msq->q_perm); + +- ipc_rcu_putref(msq); ++ ipc_rcu_putref(msq, ipc_rcu_free); + if (msq->q_perm.deleted) { + err = -EIDRM; + goto out_unlock0; +--- a/ipc/sem.c ++++ b/ipc/sem.c +@@ -243,6 +243,15 @@ static void merge_queues(struct sem_arra + } + } + ++static void sem_rcu_free(struct rcu_head *head) ++{ ++ struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu); ++ struct sem_array *sma = ipc_rcu_to_struct(p); ++ ++ security_sem_free(sma); ++ ipc_rcu_free(head); ++} ++ + /* + * If the request contains only one semaphore operation, and there are + * no complex transactions pending, lock only the semaphore involved. +@@ -374,12 +383,7 @@ static inline struct sem_array *sem_obta + static inline void sem_lock_and_putref(struct sem_array *sma) + { + sem_lock(sma, NULL, -1); +- ipc_rcu_putref(sma); +-} +- +-static inline void sem_putref(struct sem_array *sma) +-{ +- ipc_rcu_putref(sma); ++ ipc_rcu_putref(sma, ipc_rcu_free); + } + + static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) +@@ -458,14 +462,13 @@ static int newary(struct ipc_namespace * + sma->sem_perm.security = NULL; + retval = security_sem_alloc(sma); + if (retval) { +- ipc_rcu_putref(sma); ++ ipc_rcu_putref(sma, ipc_rcu_free); + return retval; + } + + id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni); + if (id < 0) { +- security_sem_free(sma); +- ipc_rcu_putref(sma); ++ ipc_rcu_putref(sma, sem_rcu_free); + return id; + } + ns->used_sems += nsems; +@@ -1047,8 +1050,7 @@ static void freeary(struct ipc_namespace + + wake_up_sem_queue_do(&tasks); + ns->used_sems -= sma->sem_nsems; +- security_sem_free(sma); +- ipc_rcu_putref(sma); ++ ipc_rcu_putref(sma, sem_rcu_free); + } + + static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version) +@@ -1292,7 +1294,7 @@ static int semctl_main(struct ipc_namesp + rcu_read_unlock(); + sem_io = ipc_alloc(sizeof(ushort)*nsems); + if(sem_io == NULL) { +- sem_putref(sma); ++ ipc_rcu_putref(sma, ipc_rcu_free); + return -ENOMEM; + } + +@@ -1328,20 +1330,20 @@ static int semctl_main(struct ipc_namesp + if(nsems > SEMMSL_FAST) { + sem_io = ipc_alloc(sizeof(ushort)*nsems); + if(sem_io == NULL) { +- sem_putref(sma); ++ ipc_rcu_putref(sma, ipc_rcu_free); + return -ENOMEM; + } + } + + if (copy_from_user (sem_io, p, nsems*sizeof(ushort))) { +- sem_putref(sma); ++ ipc_rcu_putref(sma, ipc_rcu_free); + err = -EFAULT; + goto out_free; + } + + for (i = 0; i < nsems; i++) { + if (sem_io[i] > SEMVMX) { +- sem_putref(sma); ++ ipc_rcu_putref(sma, ipc_rcu_free); + err = -ERANGE; + goto out_free; + } +@@ -1629,7 +1631,7 @@ static struct sem_undo *find_alloc_undo( + /* step 2: allocate new undo structure */ + new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL); + if (!new) { +- sem_putref(sma); ++ ipc_rcu_putref(sma, ipc_rcu_free); + return ERR_PTR(-ENOMEM); + } + +--- a/ipc/shm.c ++++ b/ipc/shm.c +@@ -167,6 +167,15 @@ static inline void shm_lock_by_ptr(struc + ipc_lock_object(&ipcp->shm_perm); + } + ++static void shm_rcu_free(struct rcu_head *head) ++{ ++ struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu); ++ struct shmid_kernel *shp = ipc_rcu_to_struct(p); ++ ++ security_shm_free(shp); ++ ipc_rcu_free(head); ++} ++ + static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s) + { + ipc_rmid(&shm_ids(ns), &s->shm_perm); +@@ -208,8 +217,7 @@ static void shm_destroy(struct ipc_names + user_shm_unlock(file_inode(shp->shm_file)->i_size, + shp->mlock_user); + fput (shp->shm_file); +- security_shm_free(shp); +- ipc_rcu_putref(shp); ++ ipc_rcu_putref(shp, shm_rcu_free); + } + + /* +@@ -497,7 +505,7 @@ static int newseg(struct ipc_namespace * + shp->shm_perm.security = NULL; + error = security_shm_alloc(shp); + if (error) { +- ipc_rcu_putref(shp); ++ ipc_rcu_putref(shp, ipc_rcu_free); + return error; + } + +@@ -566,8 +574,7 @@ no_id: + user_shm_unlock(size, shp->mlock_user); + fput(file); + no_file: +- security_shm_free(shp); +- ipc_rcu_putref(shp); ++ ipc_rcu_putref(shp, shm_rcu_free); + return error; + } + +--- a/ipc/util.c ++++ b/ipc/util.c +@@ -474,11 +474,6 @@ void ipc_free(void* ptr, int size) + kfree(ptr); + } + +-struct ipc_rcu { +- struct rcu_head rcu; +- atomic_t refcount; +-} ____cacheline_aligned_in_smp; +- + /** + * ipc_rcu_alloc - allocate ipc and rcu space + * @size: size desired +@@ -505,27 +500,24 @@ int ipc_rcu_getref(void *ptr) + return atomic_inc_not_zero(&p->refcount); + } + +-/** +- * ipc_schedule_free - free ipc + rcu space +- * @head: RCU callback structure for queued work +- */ +-static void ipc_schedule_free(struct rcu_head *head) +-{ +- vfree(container_of(head, struct ipc_rcu, rcu)); +-} +- +-void ipc_rcu_putref(void *ptr) ++void ipc_rcu_putref(void *ptr, void (*func)(struct rcu_head *head)) + { + struct ipc_rcu *p = ((struct ipc_rcu *)ptr) - 1; + + if (!atomic_dec_and_test(&p->refcount)) + return; + +- if (is_vmalloc_addr(ptr)) { +- call_rcu(&p->rcu, ipc_schedule_free); +- } else { +- kfree_rcu(p, rcu); +- } ++ call_rcu(&p->rcu, func); ++} ++ ++void ipc_rcu_free(struct rcu_head *head) ++{ ++ struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu); ++ ++ if (is_vmalloc_addr(p)) ++ vfree(p); ++ else ++ kfree(p); + } + + /** +--- a/ipc/util.h ++++ b/ipc/util.h +@@ -47,6 +47,13 @@ static inline void msg_exit_ns(struct ip + static inline void shm_exit_ns(struct ipc_namespace *ns) { } + #endif + ++struct ipc_rcu { ++ struct rcu_head rcu; ++ atomic_t refcount; ++} ____cacheline_aligned_in_smp; ++ ++#define ipc_rcu_to_struct(p) ((void *)(p+1)) ++ + /* + * Structure that holds the parameters needed by the ipc operations + * (see after) +@@ -120,7 +127,8 @@ void ipc_free(void* ptr, int size); + */ + void* ipc_rcu_alloc(int size); + int ipc_rcu_getref(void *ptr); +-void ipc_rcu_putref(void *ptr); ++void ipc_rcu_putref(void *ptr, void (*func)(struct rcu_head *head)); ++void ipc_rcu_free(struct rcu_head *head); + + struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int); + struct kern_ipc_perm *ipc_obtain_object(struct ipc_ids *ids, int id); diff --git a/queue-3.10/ipc-msg-prevent-race-with-rmid-in-msgsnd-msgrcv.patch b/queue-3.10/ipc-msg-prevent-race-with-rmid-in-msgsnd-msgrcv.patch new file mode 100644 index 00000000000..dc777235795 --- /dev/null +++ b/queue-3.10/ipc-msg-prevent-race-with-rmid-in-msgsnd-msgrcv.patch @@ -0,0 +1,69 @@ +From 4271b05a227dc6175b66c3d9941aeab09048aeb2 Mon Sep 17 00:00:00 2001 +From: Davidlohr Bueso +Date: Mon, 30 Sep 2013 13:45:26 -0700 +Subject: ipc,msg: prevent race with rmid in msgsnd,msgrcv + +From: Davidlohr Bueso + +commit 4271b05a227dc6175b66c3d9941aeab09048aeb2 upstream. + +This fixes a race in both msgrcv() and msgsnd() between finding the msg +and actually dealing with the queue, as another thread can delete shmid +underneath us if we are preempted before acquiring the +kern_ipc_perm.lock. + +Manfred illustrates this nicely: + +Assume a preemptible kernel that is preempted just after + + msq = msq_obtain_object_check(ns, msqid) + +in do_msgrcv(). The only lock that is held is rcu_read_lock(). + +Now the other thread processes IPC_RMID. When the first task is +resumed, then it will happily wait for messages on a deleted queue. + +Fix this by checking for if the queue has been deleted after taking the +lock. + +Signed-off-by: Davidlohr Bueso +Reported-by: Manfred Spraul +Cc: Rik van Riel +Cc: Mike Galbraith +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + ipc/msg.c | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +--- a/ipc/msg.c ++++ b/ipc/msg.c +@@ -695,6 +695,12 @@ long do_msgsnd(int msqid, long mtype, vo + if (ipcperms(ns, &msq->q_perm, S_IWUGO)) + goto out_unlock0; + ++ /* raced with RMID? */ ++ if (msq->q_perm.deleted) { ++ err = -EIDRM; ++ goto out_unlock0; ++ } ++ + err = security_msg_queue_msgsnd(msq, msg, msgflg); + if (err) + goto out_unlock0; +@@ -901,6 +907,13 @@ long do_msgrcv(int msqid, void __user *b + goto out_unlock1; + + ipc_lock_object(&msq->q_perm); ++ ++ /* raced with RMID? */ ++ if (msq->q_perm.deleted) { ++ msg = ERR_PTR(-EIDRM); ++ goto out_unlock0; ++ } ++ + msg = find_msg(msq, &msgtyp, mode); + if (!IS_ERR(msg)) { + /* diff --git a/queue-3.10/ipc-sem.c-fix-race-in-sem_lock.patch b/queue-3.10/ipc-sem.c-fix-race-in-sem_lock.patch new file mode 100644 index 00000000000..ba3fb720736 --- /dev/null +++ b/queue-3.10/ipc-sem.c-fix-race-in-sem_lock.patch @@ -0,0 +1,249 @@ +From 5e9d527591421ccdb16acb8c23662231135d8686 Mon Sep 17 00:00:00 2001 +From: Manfred Spraul +Date: Mon, 30 Sep 2013 13:45:04 -0700 +Subject: ipc/sem.c: fix race in sem_lock() + +From: Manfred Spraul + +commit 5e9d527591421ccdb16acb8c23662231135d8686 upstream. + +The exclusion of complex operations in sem_lock() is insufficient: after +acquiring the per-semaphore lock, a simple op must first check that +sem_perm.lock is not locked and only after that test check +complex_count. The current code does it the other way around - and that +creates a race. Details are below. + +The patch is a complete rewrite of sem_lock(), based in part on the code +from Mike Galbraith. It removes all gotos and all loops and thus the +risk of livelocks. + +I have tested the patch (together with the next one) on my i3 laptop and +it didn't cause any problems. + +The bug is probably also present in 3.10 and 3.11, but for these kernels +it might be simpler just to move the test of sma->complex_count after +the spin_is_locked() test. + +Details of the bug: + +Assume: + - sma->complex_count = 0. + - Thread 1: semtimedop(complex op that must sleep) + - Thread 2: semtimedop(simple op). + +Pseudo-Trace: + +Thread 1: sem_lock(): acquire sem_perm.lock +Thread 1: sem_lock(): check for ongoing simple ops + Nothing ongoing, thread 2 is still before sem_lock(). +Thread 1: try_atomic_semop() + <<< preempted. + +Thread 2: sem_lock(): + static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, + int nsops) + { + int locknum; + again: + if (nsops == 1 && !sma->complex_count) { + struct sem *sem = sma->sem_base + sops->sem_num; + + /* Lock just the semaphore we are interested in. */ + spin_lock(&sem->lock); + + /* + * If sma->complex_count was set while we were spinning, + * we may need to look at things we did not lock here. + */ + if (unlikely(sma->complex_count)) { + spin_unlock(&sem->lock); + goto lock_array; + } + <<<<<<<<< + <<< complex_count is still 0. + <<< + <<< Here it is preempted + <<<<<<<<< + +Thread 1: try_atomic_semop() returns, notices that it must sleep. +Thread 1: increases sma->complex_count. +Thread 1: drops sem_perm.lock +Thread 2: + /* + * Another process is holding the global lock on the + * sem_array; we cannot enter our critical section, + * but have to wait for the global lock to be released. + */ + if (unlikely(spin_is_locked(&sma->sem_perm.lock))) { + spin_unlock(&sem->lock); + spin_unlock_wait(&sma->sem_perm.lock); + goto again; + } + <<< sem_perm.lock already dropped, thus no "goto again;" + + locknum = sops->sem_num; + +Signed-off-by: Manfred Spraul +Cc: Mike Galbraith +Cc: Rik van Riel +Cc: Davidlohr Bueso +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Cc: Mike Galbraith +Signed-off-by: Greg Kroah-Hartman + +--- + ipc/sem.c | 122 +++++++++++++++++++++++++++++++++++++++----------------------- + 1 file changed, 78 insertions(+), 44 deletions(-) + +--- a/ipc/sem.c ++++ b/ipc/sem.c +@@ -253,70 +253,104 @@ static void sem_rcu_free(struct rcu_head + } + + /* ++ * Wait until all currently ongoing simple ops have completed. ++ * Caller must own sem_perm.lock. ++ * New simple ops cannot start, because simple ops first check ++ * that sem_perm.lock is free. ++ */ ++static void sem_wait_array(struct sem_array *sma) ++{ ++ int i; ++ struct sem *sem; ++ ++ for (i = 0; i < sma->sem_nsems; i++) { ++ sem = sma->sem_base + i; ++ spin_unlock_wait(&sem->lock); ++ } ++} ++ ++/* + * If the request contains only one semaphore operation, and there are + * no complex transactions pending, lock only the semaphore involved. + * Otherwise, lock the entire semaphore array, since we either have + * multiple semaphores in our own semops, or we need to look at + * semaphores from other pending complex operations. +- * +- * Carefully guard against sma->complex_count changing between zero +- * and non-zero while we are spinning for the lock. The value of +- * sma->complex_count cannot change while we are holding the lock, +- * so sem_unlock should be fine. +- * +- * The global lock path checks that all the local locks have been released, +- * checking each local lock once. This means that the local lock paths +- * cannot start their critical sections while the global lock is held. + */ + static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, + int nsops) + { +- int locknum; +- again: +- if (nsops == 1 && !sma->complex_count) { +- struct sem *sem = sma->sem_base + sops->sem_num; ++ struct sem *sem; + +- /* Lock just the semaphore we are interested in. */ +- spin_lock(&sem->lock); ++ if (nsops != 1) { ++ /* Complex operation - acquire a full lock */ ++ ipc_lock_object(&sma->sem_perm); + +- /* +- * If sma->complex_count was set while we were spinning, +- * we may need to look at things we did not lock here. ++ /* And wait until all simple ops that are processed ++ * right now have dropped their locks. + */ +- if (unlikely(sma->complex_count)) { +- spin_unlock(&sem->lock); +- goto lock_array; +- } ++ sem_wait_array(sma); ++ return -1; ++ } ++ ++ /* ++ * Only one semaphore affected - try to optimize locking. ++ * The rules are: ++ * - optimized locking is possible if no complex operation ++ * is either enqueued or processed right now. ++ * - The test for enqueued complex ops is simple: ++ * sma->complex_count != 0 ++ * - Testing for complex ops that are processed right now is ++ * a bit more difficult. Complex ops acquire the full lock ++ * and first wait that the running simple ops have completed. ++ * (see above) ++ * Thus: If we own a simple lock and the global lock is free ++ * and complex_count is now 0, then it will stay 0 and ++ * thus just locking sem->lock is sufficient. ++ */ ++ sem = sma->sem_base + sops->sem_num; + ++ if (sma->complex_count == 0) { + /* +- * Another process is holding the global lock on the +- * sem_array; we cannot enter our critical section, +- * but have to wait for the global lock to be released. ++ * It appears that no complex operation is around. ++ * Acquire the per-semaphore lock. + */ +- if (unlikely(spin_is_locked(&sma->sem_perm.lock))) { +- spin_unlock(&sem->lock); +- spin_unlock_wait(&sma->sem_perm.lock); +- goto again; ++ spin_lock(&sem->lock); ++ ++ /* Then check that the global lock is free */ ++ if (!spin_is_locked(&sma->sem_perm.lock)) { ++ /* spin_is_locked() is not a memory barrier */ ++ smp_mb(); ++ ++ /* Now repeat the test of complex_count: ++ * It can't change anymore until we drop sem->lock. ++ * Thus: if is now 0, then it will stay 0. ++ */ ++ if (sma->complex_count == 0) { ++ /* fast path successful! */ ++ return sops->sem_num; ++ } + } ++ spin_unlock(&sem->lock); ++ } ++ ++ /* slow path: acquire the full lock */ ++ ipc_lock_object(&sma->sem_perm); + +- locknum = sops->sem_num; ++ if (sma->complex_count == 0) { ++ /* False alarm: ++ * There is no complex operation, thus we can switch ++ * back to the fast path. ++ */ ++ spin_lock(&sem->lock); ++ ipc_unlock_object(&sma->sem_perm); ++ return sops->sem_num; + } else { +- int i; +- /* +- * Lock the semaphore array, and wait for all of the +- * individual semaphore locks to go away. The code +- * above ensures no new single-lock holders will enter +- * their critical section while the array lock is held. ++ /* Not a false alarm, thus complete the sequence for a ++ * full lock. + */ +- lock_array: +- ipc_lock_object(&sma->sem_perm); +- for (i = 0; i < sma->sem_nsems; i++) { +- struct sem *sem = sma->sem_base + i; +- spin_unlock_wait(&sem->lock); +- } +- locknum = -1; ++ sem_wait_array(sma); ++ return -1; + } +- return locknum; + } + + static inline void sem_unlock(struct sem_array *sma, int locknum) diff --git a/queue-3.10/ipc-sem.c-optimize-sem_lock.patch b/queue-3.10/ipc-sem.c-optimize-sem_lock.patch new file mode 100644 index 00000000000..a1ce06d2fa0 --- /dev/null +++ b/queue-3.10/ipc-sem.c-optimize-sem_lock.patch @@ -0,0 +1,55 @@ +From 6d07b68ce16ae9535955ba2059dedba5309c3ca1 Mon Sep 17 00:00:00 2001 +From: Manfred Spraul +Date: Mon, 30 Sep 2013 13:45:06 -0700 +Subject: ipc/sem.c: optimize sem_lock() + +From: Manfred Spraul + +commit 6d07b68ce16ae9535955ba2059dedba5309c3ca1 upstream. + +Operations that need access to the whole array must guarantee that there +are no simple operations ongoing. Right now this is achieved by +spin_unlock_wait(sem->lock) on all semaphores. + +If complex_count is nonzero, then this spin_unlock_wait() is not +necessary, because it was already performed in the past by the thread +that increased complex_count and even though sem_perm.lock was dropped +inbetween, no simple operation could have started, because simple +operations cannot start when complex_count is non-zero. + +Signed-off-by: Manfred Spraul +Cc: Mike Galbraith +Cc: Rik van Riel +Reviewed-by: Davidlohr Bueso +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Cc: Mike Galbraith +Signed-off-by: Greg Kroah-Hartman + +--- + ipc/sem.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/ipc/sem.c ++++ b/ipc/sem.c +@@ -257,12 +257,20 @@ static void sem_rcu_free(struct rcu_head + * Caller must own sem_perm.lock. + * New simple ops cannot start, because simple ops first check + * that sem_perm.lock is free. ++ * that a) sem_perm.lock is free and b) complex_count is 0. + */ + static void sem_wait_array(struct sem_array *sma) + { + int i; + struct sem *sem; + ++ if (sma->complex_count) { ++ /* The thread that increased sma->complex_count waited on ++ * all sem->lock locks. Thus we don't need to wait again. ++ */ ++ return; ++ } ++ + for (i = 0; i < sma->sem_nsems; i++) { + sem = sma->sem_base + i; + spin_unlock_wait(&sem->lock); diff --git a/queue-3.10/ipc-sem.c-synchronize-the-proc-interface.patch b/queue-3.10/ipc-sem.c-synchronize-the-proc-interface.patch new file mode 100644 index 00000000000..1ca7d808cbd --- /dev/null +++ b/queue-3.10/ipc-sem.c-synchronize-the-proc-interface.patch @@ -0,0 +1,46 @@ +From d8c633766ad88527f25d9f81a5c2f083d78a2b39 Mon Sep 17 00:00:00 2001 +From: Manfred Spraul +Date: Mon, 30 Sep 2013 13:45:07 -0700 +Subject: ipc/sem.c: synchronize the proc interface + +From: Manfred Spraul + +commit d8c633766ad88527f25d9f81a5c2f083d78a2b39 upstream. + +The proc interface is not aware of sem_lock(), it instead calls +ipc_lock_object() directly. This means that simple semop() operations +can run in parallel with the proc interface. Right now, this is +uncritical, because the implementation doesn't do anything that requires +a proper synchronization. + +But it is dangerous and therefore should be fixed. + +Signed-off-by: Manfred Spraul +Cc: Davidlohr Bueso +Cc: Mike Galbraith +Cc: Rik van Riel +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + ipc/sem.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/ipc/sem.c ++++ b/ipc/sem.c +@@ -2103,6 +2103,14 @@ static int sysvipc_sem_proc_show(struct + struct sem_array *sma = it; + time_t sem_otime; + ++ /* ++ * The proc interface isn't aware of sem_lock(), it calls ++ * ipc_lock_object() directly (in sysvipc_find_ipc). ++ * In order to stay compatible with sem_lock(), we must wait until ++ * all simple semop() calls have left their critical regions. ++ */ ++ sem_wait_array(sma); ++ + sem_otime = get_semotime(sma); + + return seq_printf(s, diff --git a/queue-3.10/ipc-sem.c-update-sem_otime-for-all-operations.patch b/queue-3.10/ipc-sem.c-update-sem_otime-for-all-operations.patch new file mode 100644 index 00000000000..d0046179e76 --- /dev/null +++ b/queue-3.10/ipc-sem.c-update-sem_otime-for-all-operations.patch @@ -0,0 +1,103 @@ +From 0e8c665699e953fa58dc1b0b0d09e5dce7343cc7 Mon Sep 17 00:00:00 2001 +From: Manfred Spraul +Date: Mon, 30 Sep 2013 13:45:25 -0700 +Subject: ipc/sem.c: update sem_otime for all operations + +From: Manfred Spraul + +commit 0e8c665699e953fa58dc1b0b0d09e5dce7343cc7 upstream. + +In commit 0a2b9d4c7967 ("ipc/sem.c: move wake_up_process out of the +spinlock section"), the update of semaphore's sem_otime(last semop time) +was moved to one central position (do_smart_update). + +But since do_smart_update() is only called for operations that modify +the array, this means that wait-for-zero semops do not update sem_otime +anymore. + +The fix is simple: +Non-alter operations must update sem_otime. + +[akpm@linux-foundation.org: coding-style fixes] +Signed-off-by: Manfred Spraul +Reported-by: Jia He +Tested-by: Jia He +Cc: Davidlohr Bueso +Cc: Mike Galbraith +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + ipc/sem.c | 42 +++++++++++++++++++++++++++++------------- + 1 file changed, 29 insertions(+), 13 deletions(-) + +--- a/ipc/sem.c ++++ b/ipc/sem.c +@@ -918,6 +918,24 @@ again: + } + + /** ++ * set_semotime(sma, sops) - set sem_otime ++ * @sma: semaphore array ++ * @sops: operations that modified the array, may be NULL ++ * ++ * sem_otime is replicated to avoid cache line trashing. ++ * This function sets one instance to the current time. ++ */ ++static void set_semotime(struct sem_array *sma, struct sembuf *sops) ++{ ++ if (sops == NULL) { ++ sma->sem_base[0].sem_otime = get_seconds(); ++ } else { ++ sma->sem_base[sops[0].sem_num].sem_otime = ++ get_seconds(); ++ } ++} ++ ++/** + * do_smart_update(sma, sops, nsops, otime, pt) - optimized update_queue + * @sma: semaphore array + * @sops: operations that were performed +@@ -967,17 +985,10 @@ static void do_smart_update(struct sem_a + } + } + } +- if (otime) { +- if (sops == NULL) { +- sma->sem_base[0].sem_otime = get_seconds(); +- } else { +- sma->sem_base[sops[0].sem_num].sem_otime = +- get_seconds(); +- } +- } ++ if (otime) ++ set_semotime(sma, sops); + } + +- + /* The following counts are associated to each semaphore: + * semncnt number of tasks waiting on semval being nonzero + * semzcnt number of tasks waiting on semval being zero +@@ -1839,12 +1850,17 @@ SYSCALL_DEFINE4(semtimedop, int, semid, + + error = perform_atomic_semop(sma, sops, nsops, un, + task_tgid_vnr(current)); +- if (error <= 0) { +- if (alter && error == 0) ++ if (error == 0) { ++ /* If the operation was successful, then do ++ * the required updates. ++ */ ++ if (alter) + do_smart_update(sma, sops, nsops, 1, &tasks); +- +- goto out_unlock_free; ++ else ++ set_semotime(sma, sops); + } ++ if (error <= 0) ++ goto out_unlock_free; + + /* We need to sleep on this operation, so we put the current + * task into the pending queue and go to sleep. diff --git a/queue-3.10/series b/queue-3.10/series index 7b814dd4bb7..7e8461b501f 100644 --- a/queue-3.10/series +++ b/queue-3.10/series @@ -60,3 +60,9 @@ ipc-shm-guard-against-non-existant-vma-in-shmdt-2.patch ipc-drop-ipc_lock_by_ptr.patch ipc-shm-drop-shm_lock_check.patch ipc-drop-ipc_lock_check.patch +ipc-fix-race-with-lsms.patch +ipc-sem.c-fix-race-in-sem_lock.patch +ipc-sem.c-optimize-sem_lock.patch +ipc-sem.c-synchronize-the-proc-interface.patch +ipc-sem.c-update-sem_otime-for-all-operations.patch +ipc-msg-prevent-race-with-rmid-in-msgsnd-msgrcv.patch