From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Tue, 15 Oct 2013 20:50:21 +0000 (-0700)
Subject: 3.10-stable patches
X-Git-Tag: v3.10.17~11
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=7538f1c1d3f5d22c3aea65e8ade1898e1f7119e2;p=thirdparty%2Fkernel%2Fstable-queue.git

3.10-stable patches

added patches:
	ipc-sem.c-always-use-only-one-queue-for-alter-operations.patch
	ipc-sem.c-cacheline-align-the-semaphore-structures.patch
	ipc-sem.c-rename-try_atomic_semop-to-perform_atomic_semop-docu-update.patch
	ipc-sem.c-replace-shared-sem_otime-with-per-semaphore-value.patch
	ipc-sem-separate-wait-for-zero-and-alter-tasks-into-seperate-queues.patch
	ipc-util.c-ipc_rcu_alloc-cacheline-align-allocation.patch
---

diff --git a/queue-3.10/ipc-sem-separate-wait-for-zero-and-alter-tasks-into-seperate-queues.patch b/queue-3.10/ipc-sem-separate-wait-for-zero-and-alter-tasks-into-seperate-queues.patch
new file mode 100644
index 00000000000..a296de4641a
--- /dev/null
+++ b/queue-3.10/ipc-sem-separate-wait-for-zero-and-alter-tasks-into-seperate-queues.patch
@@ -0,0 +1,408 @@
+From 1a82e9e1d0f1b45f47a97c9e2349020536ff8987 Mon Sep 17 00:00:00 2001
+From: Manfred Spraul <manfred@colorfullife.com>
+Date: Mon, 8 Jul 2013 16:01:23 -0700
+Subject: ipc/sem: separate wait-for-zero and alter tasks into seperate queues
+
+From: Manfred Spraul <manfred@colorfullife.com>
+
+commit 1a82e9e1d0f1b45f47a97c9e2349020536ff8987 upstream.
+
+Introduce separate queues for operations that do not modify the
+semaphore values.  Advantages:
+
+ - Simpler logic in check_restart().
+ - Faster update_queue(): Right now, all wait-for-zero operations are
+   always tested, even if the semaphore value is not 0.
+ - wait-for-zero gets again priority, as in linux <=3.0.9
+
+Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Davidlohr Bueso <davidlohr.bueso@hp.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/sem.h |    5 -
+ ipc/sem.c           |  211 +++++++++++++++++++++++++++++++++++++---------------
+ 2 files changed, 155 insertions(+), 61 deletions(-)
+
+--- a/include/linux/sem.h
++++ b/include/linux/sem.h
+@@ -15,7 +15,10 @@ struct sem_array {
+ 	time_t			sem_otime;	/* last semop time */
+ 	time_t			sem_ctime;	/* last change time */
+ 	struct sem		*sem_base;	/* ptr to first semaphore in array */
+-	struct list_head	sem_pending;	/* pending operations to be processed */
++	struct list_head	pending_alter;	/* pending operations */
++						/* that alter the array */
++	struct list_head	pending_const;	/* pending complex operations */
++						/* that do not alter semvals */
+ 	struct list_head	list_id;	/* undo requests on this array */
+ 	int			sem_nsems;	/* no. of semaphores in array */
+ 	int			complex_count;	/* pending complex operations */
+--- a/ipc/sem.c
++++ b/ipc/sem.c
+@@ -95,7 +95,10 @@ struct sem {
+ 	int	semval;		/* current value */
+ 	int	sempid;		/* pid of last operation */
+ 	spinlock_t	lock;	/* spinlock for fine-grained semtimedop */
+-	struct list_head sem_pending; /* pending single-sop operations */
++	struct list_head pending_alter; /* pending single-sop operations */
++					/* that alter the semaphore */
++	struct list_head pending_const; /* pending single-sop operations */
++					/* that do not alter the semaphore*/
+ } ____cacheline_aligned_in_smp;
+ 
+ /* One queue for each sleeping process in the system. */
+@@ -152,7 +155,7 @@ static int sysvipc_sem_proc_show(struct
+ /*
+  * linked list protection:
+  *	sem_undo.id_next,
+- *	sem_array.sem_pending{,last},
++ *	sem_array.pending{_alter,_cont},
+  *	sem_array.sem_undo: sem_lock() for read/write
+  *	sem_undo.proc_next: only "current" is allowed to read/write that field.
+  *	
+@@ -337,7 +340,7 @@ static inline void sem_rmid(struct ipc_n
+  * Without the check/retry algorithm a lockless wakeup is possible:
+  * - queue.status is initialized to -EINTR before blocking.
+  * - wakeup is performed by
+- *	* unlinking the queue entry from sma->sem_pending
++ *	* unlinking the queue entry from the pending list
+  *	* setting queue.status to IN_WAKEUP
+  *	  This is the notification for the blocked thread that a
+  *	  result value is imminent.
+@@ -418,12 +421,14 @@ static int newary(struct ipc_namespace *
+ 	sma->sem_base = (struct sem *) &sma[1];
+ 
+ 	for (i = 0; i < nsems; i++) {
+-		INIT_LIST_HEAD(&sma->sem_base[i].sem_pending);
++		INIT_LIST_HEAD(&sma->sem_base[i].pending_alter);
++		INIT_LIST_HEAD(&sma->sem_base[i].pending_const);
+ 		spin_lock_init(&sma->sem_base[i].lock);
+ 	}
+ 
+ 	sma->complex_count = 0;
+-	INIT_LIST_HEAD(&sma->sem_pending);
++	INIT_LIST_HEAD(&sma->pending_alter);
++	INIT_LIST_HEAD(&sma->pending_const);
+ 	INIT_LIST_HEAD(&sma->list_id);
+ 	sma->sem_nsems = nsems;
+ 	sma->sem_ctime = get_seconds();
+@@ -609,60 +614,132 @@ static void unlink_queue(struct sem_arra
+  * update_queue is O(N^2) when it restarts scanning the whole queue of
+  * waiting operations. Therefore this function checks if the restart is
+  * really necessary. It is called after a previously waiting operation
+- * was completed.
++ * modified the array.
++ * Note that wait-for-zero operations are handled without restart.
+  */
+ static int check_restart(struct sem_array *sma, struct sem_queue *q)
+ {
+-	struct sem *curr;
+-	struct sem_queue *h;
+-
+-	/* if the operation didn't modify the array, then no restart */
+-	if (q->alter == 0)
+-		return 0;
+-
+-	/* pending complex operations are too difficult to analyse */
+-	if (sma->complex_count)
++	/* pending complex alter operations are too difficult to analyse */
++	if (!list_empty(&sma->pending_alter))
+ 		return 1;
+ 
+ 	/* we were a sleeping complex operation. Too difficult */
+ 	if (q->nsops > 1)
+ 		return 1;
+ 
+-	curr = sma->sem_base + q->sops[0].sem_num;
++	/* It is impossible that someone waits for the new value:
++	 * - complex operations always restart.
++	 * - wait-for-zero are handled seperately.
++	 * - q is a previously sleeping simple operation that
++	 *   altered the array. It must be a decrement, because
++	 *   simple increments never sleep.
++	 * - If there are older (higher priority) decrements
++	 *   in the queue, then they have observed the original
++	 *   semval value and couldn't proceed. The operation
++	 *   decremented to value - thus they won't proceed either.
++	 */
++	return 0;
++}
+ 
+-	/* No-one waits on this queue */
+-	if (list_empty(&curr->sem_pending))
+-		return 0;
++/**
++ * wake_const_ops(sma, semnum, pt) - Wake up non-alter tasks
++ * @sma: semaphore array.
++ * @semnum: semaphore that was modified.
++ * @pt: list head for the tasks that must be woken up.
++ *
++ * wake_const_ops must be called after a semaphore in a semaphore array
++ * was set to 0. If complex const operations are pending, wake_const_ops must
++ * be called with semnum = -1, as well as with the number of each modified
++ * semaphore.
++ * The tasks that must be woken up are added to @pt. The return code
++ * is stored in q->pid.
++ * The function returns 1 if at least one operation was completed successfully.
++ */
++static int wake_const_ops(struct sem_array *sma, int semnum,
++				struct list_head *pt)
++{
++	struct sem_queue *q;
++	struct list_head *walk;
++	struct list_head *pending_list;
++	int semop_completed = 0;
++
++	if (semnum == -1)
++		pending_list = &sma->pending_const;
++	else
++		pending_list = &sma->sem_base[semnum].pending_const;
++
++	walk = pending_list->next;
++	while (walk != pending_list) {
++		int error;
++
++		q = container_of(walk, struct sem_queue, list);
++		walk = walk->next;
++
++		error = try_atomic_semop(sma, q->sops, q->nsops,
++						q->undo, q->pid);
++
++		if (error <= 0) {
++			/* operation completed, remove from queue & wakeup */
++
++			unlink_queue(sma, q);
++
++			wake_up_sem_queue_prepare(pt, q, error);
++			if (error == 0)
++				semop_completed = 1;
++		}
++	}
++	return semop_completed;
++}
+ 
+-	/* the new semaphore value */
+-	if (curr->semval) {
+-		/* It is impossible that someone waits for the new value:
+-		 * - q is a previously sleeping simple operation that
+-		 *   altered the array. It must be a decrement, because
+-		 *   simple increments never sleep.
+-		 * - The value is not 0, thus wait-for-zero won't proceed.
+-		 * - If there are older (higher priority) decrements
+-		 *   in the queue, then they have observed the original
+-		 *   semval value and couldn't proceed. The operation
+-		 *   decremented to value - thus they won't proceed either.
++/**
++ * do_smart_wakeup_zero(sma, sops, nsops, pt) - wakeup all wait for zero tasks
++ * @sma: semaphore array
++ * @sops: operations that were performed
++ * @nsops: number of operations
++ * @pt: list head of the tasks that must be woken up.
++ *
++ * do_smart_wakeup_zero() checks all required queue for wait-for-zero
++ * operations, based on the actual changes that were performed on the
++ * semaphore array.
++ * The function returns 1 if at least one operation was completed successfully.
++ */
++static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops,
++					int nsops, struct list_head *pt)
++{
++	int i;
++	int semop_completed = 0;
++	int got_zero = 0;
++
++	/* first: the per-semaphore queues, if known */
++	if (sops) {
++		for (i = 0; i < nsops; i++) {
++			int num = sops[i].sem_num;
++
++			if (sma->sem_base[num].semval == 0) {
++				got_zero = 1;
++				semop_completed |= wake_const_ops(sma, num, pt);
++			}
++		}
++	} else {
++		/*
++		 * No sops means modified semaphores not known.
++		 * Assume all were changed.
+ 		 */
+-		BUG_ON(q->sops[0].sem_op >= 0);
+-		return 0;
++		for (i = 0; i < sma->sem_nsems; i++) {
++			if (sma->sem_base[i].semval == 0) {
++				got_zero = 1;
++				semop_completed |= wake_const_ops(sma, i, pt);
++			}
++		}
+ 	}
+ 	/*
+-	 * semval is 0. Check if there are wait-for-zero semops.
+-	 * They must be the first entries in the per-semaphore queue
++	 * If one of the modified semaphores got 0,
++	 * then check the global queue, too.
+ 	 */
+-	h = list_first_entry(&curr->sem_pending, struct sem_queue, list);
+-	BUG_ON(h->nsops != 1);
+-	BUG_ON(h->sops[0].sem_num != q->sops[0].sem_num);
++	if (got_zero)
++		semop_completed |= wake_const_ops(sma, -1, pt);
+ 
+-	/* Yes, there is a wait-for-zero semop. Restart */
+-	if (h->sops[0].sem_op == 0)
+-		return 1;
+-
+-	/* Again - no-one is waiting for the new value. */
+-	return 0;
++	return semop_completed;
+ }
+ 
+ 
+@@ -678,6 +755,8 @@ static int check_restart(struct sem_arra
+  * semaphore.
+  * The tasks that must be woken up are added to @pt. The return code
+  * is stored in q->pid.
++ * The function internally checks if const operations can now succeed.
++ *
+  * The function return 1 if at least one semop was completed successfully.
+  */
+ static int update_queue(struct sem_array *sma, int semnum, struct list_head *pt)
+@@ -688,9 +767,9 @@ static int update_queue(struct sem_array
+ 	int semop_completed = 0;
+ 
+ 	if (semnum == -1)
+-		pending_list = &sma->sem_pending;
++		pending_list = &sma->pending_alter;
+ 	else
+-		pending_list = &sma->sem_base[semnum].sem_pending;
++		pending_list = &sma->sem_base[semnum].pending_alter;
+ 
+ again:
+ 	walk = pending_list->next;
+@@ -702,13 +781,12 @@ again:
+ 
+ 		/* If we are scanning the single sop, per-semaphore list of
+ 		 * one semaphore and that semaphore is 0, then it is not
+-		 * necessary to scan the "alter" entries: simple increments
++		 * necessary to scan further: simple increments
+ 		 * that affect only one entry succeed immediately and cannot
+ 		 * be in the  per semaphore pending queue, and decrements
+ 		 * cannot be successful if the value is already 0.
+ 		 */
+-		if (semnum != -1 && sma->sem_base[semnum].semval == 0 &&
+-				q->alter)
++		if (semnum != -1 && sma->sem_base[semnum].semval == 0)
+ 			break;
+ 
+ 		error = try_atomic_semop(sma, q->sops, q->nsops,
+@@ -724,6 +802,7 @@ again:
+ 			restart = 0;
+ 		} else {
+ 			semop_completed = 1;
++			do_smart_wakeup_zero(sma, q->sops, q->nsops, pt);
+ 			restart = check_restart(sma, q);
+ 		}
+ 
+@@ -742,8 +821,8 @@ again:
+  * @otime: force setting otime
+  * @pt: list head of the tasks that must be woken up.
+  *
+- * do_smart_update() does the required called to update_queue, based on the
+- * actual changes that were performed on the semaphore array.
++ * do_smart_update() does the required calls to update_queue and wakeup_zero,
++ * based on the actual changes that were performed on the semaphore array.
+  * Note that the function does not do the actual wake-up: the caller is
+  * responsible for calling wake_up_sem_queue_do(@pt).
+  * It is safe to perform this call after dropping all locks.
+@@ -754,6 +833,8 @@ static void do_smart_update(struct sem_a
+ 	int i;
+ 	int progress;
+ 
++	otime |= do_smart_wakeup_zero(sma, sops, nsops, pt);
++
+ 	progress = 1;
+ retry_global:
+ 	if (sma->complex_count) {
+@@ -813,14 +894,14 @@ static int count_semncnt (struct sem_arr
+ 	struct sem_queue * q;
+ 
+ 	semncnt = 0;
+-	list_for_each_entry(q, &sma->sem_base[semnum].sem_pending, list) {
++	list_for_each_entry(q, &sma->sem_base[semnum].pending_alter, list) {
+ 		struct sembuf * sops = q->sops;
+ 		BUG_ON(sops->sem_num != semnum);
+ 		if ((sops->sem_op < 0) && !(sops->sem_flg & IPC_NOWAIT))
+ 			semncnt++;
+ 	}
+ 
+-	list_for_each_entry(q, &sma->sem_pending, list) {
++	list_for_each_entry(q, &sma->pending_alter, list) {
+ 		struct sembuf * sops = q->sops;
+ 		int nsops = q->nsops;
+ 		int i;
+@@ -839,14 +920,14 @@ static int count_semzcnt (struct sem_arr
+ 	struct sem_queue * q;
+ 
+ 	semzcnt = 0;
+-	list_for_each_entry(q, &sma->sem_base[semnum].sem_pending, list) {
++	list_for_each_entry(q, &sma->sem_base[semnum].pending_const, list) {
+ 		struct sembuf * sops = q->sops;
+ 		BUG_ON(sops->sem_num != semnum);
+ 		if ((sops->sem_op == 0) && !(sops->sem_flg & IPC_NOWAIT))
+ 			semzcnt++;
+ 	}
+ 
+-	list_for_each_entry(q, &sma->sem_pending, list) {
++	list_for_each_entry(q, &sma->pending_const, list) {
+ 		struct sembuf * sops = q->sops;
+ 		int nsops = q->nsops;
+ 		int i;
+@@ -884,13 +965,22 @@ static void freeary(struct ipc_namespace
+ 
+ 	/* Wake up all pending processes and let them fail with EIDRM. */
+ 	INIT_LIST_HEAD(&tasks);
+-	list_for_each_entry_safe(q, tq, &sma->sem_pending, list) {
++	list_for_each_entry_safe(q, tq, &sma->pending_const, list) {
++		unlink_queue(sma, q);
++		wake_up_sem_queue_prepare(&tasks, q, -EIDRM);
++	}
++
++	list_for_each_entry_safe(q, tq, &sma->pending_alter, list) {
+ 		unlink_queue(sma, q);
+ 		wake_up_sem_queue_prepare(&tasks, q, -EIDRM);
+ 	}
+ 	for (i = 0; i < sma->sem_nsems; i++) {
+ 		struct sem *sem = sma->sem_base + i;
+-		list_for_each_entry_safe(q, tq, &sem->sem_pending, list) {
++		list_for_each_entry_safe(q, tq, &sem->pending_const, list) {
++			unlink_queue(sma, q);
++			wake_up_sem_queue_prepare(&tasks, q, -EIDRM);
++		}
++		list_for_each_entry_safe(q, tq, &sem->pending_alter, list) {
+ 			unlink_queue(sma, q);
+ 			wake_up_sem_queue_prepare(&tasks, q, -EIDRM);
+ 		}
+@@ -1658,14 +1748,15 @@ SYSCALL_DEFINE4(semtimedop, int, semid,
+ 		curr = &sma->sem_base[sops->sem_num];
+ 
+ 		if (alter)
+-			list_add_tail(&queue.list, &curr->sem_pending);
++			list_add_tail(&queue.list, &curr->pending_alter);
+ 		else
+-			list_add(&queue.list, &curr->sem_pending);
++			list_add_tail(&queue.list, &curr->pending_const);
+ 	} else {
+ 		if (alter)
+-			list_add_tail(&queue.list, &sma->sem_pending);
++			list_add_tail(&queue.list, &sma->pending_alter);
+ 		else
+-			list_add(&queue.list, &sma->sem_pending);
++			list_add_tail(&queue.list, &sma->pending_const);
++
+ 		sma->complex_count++;
+ 	}
+ 
diff --git a/queue-3.10/ipc-sem.c-always-use-only-one-queue-for-alter-operations.patch b/queue-3.10/ipc-sem.c-always-use-only-one-queue-for-alter-operations.patch
new file mode 100644
index 00000000000..ed1aaa0cca8
--- /dev/null
+++ b/queue-3.10/ipc-sem.c-always-use-only-one-queue-for-alter-operations.patch
@@ -0,0 +1,205 @@
+From f269f40ad5aeee229ed70044926f44318abe41ef Mon Sep 17 00:00:00 2001
+From: Manfred Spraul <manfred@colorfullife.com>
+Date: Mon, 8 Jul 2013 16:01:24 -0700
+Subject: ipc/sem.c: always use only one queue for alter operations
+
+From: Manfred Spraul <manfred@colorfullife.com>
+
+commit f269f40ad5aeee229ed70044926f44318abe41ef upstream.
+
+There are two places that can contain alter operations:
+ - the global queue: sma->pending_alter
+ - the per-semaphore queues: sma->sem_base[].pending_alter.
+
+Since one of the queues must be processed first, this causes an odd
+priorization of the wakeups: complex operations have priority over
+simple ops.
+
+The patch restores the behavior of linux <=3.0.9: The longest waiting
+operation has the highest priority.
+
+This is done by using only one queue:
+ - if there are complex ops, then sma->pending_alter is used.
+ - otherwise, the per-semaphore queues are used.
+
+As a side effect, do_smart_update_queue() becomes much simpler: no more
+goto logic.
+
+Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Davidlohr Bueso <davidlohr.bueso@hp.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ ipc/sem.c |  128 ++++++++++++++++++++++++++++++++++++++++++--------------------
+ 1 file changed, 88 insertions(+), 40 deletions(-)
+
+--- a/ipc/sem.c
++++ b/ipc/sem.c
+@@ -192,6 +192,53 @@ void __init sem_init (void)
+ 				IPC_SEM_IDS, sysvipc_sem_proc_show);
+ }
+ 
++/**
++ * unmerge_queues - unmerge queues, if possible.
++ * @sma: semaphore array
++ *
++ * The function unmerges the wait queues if complex_count is 0.
++ * It must be called prior to dropping the global semaphore array lock.
++ */
++static void unmerge_queues(struct sem_array *sma)
++{
++	struct sem_queue *q, *tq;
++
++	/* complex operations still around? */
++	if (sma->complex_count)
++		return;
++	/*
++	 * We will switch back to simple mode.
++	 * Move all pending operation back into the per-semaphore
++	 * queues.
++	 */
++	list_for_each_entry_safe(q, tq, &sma->pending_alter, list) {
++		struct sem *curr;
++		curr = &sma->sem_base[q->sops[0].sem_num];
++
++		list_add_tail(&q->list, &curr->pending_alter);
++	}
++	INIT_LIST_HEAD(&sma->pending_alter);
++}
++
++/**
++ * merge_queues - Merge single semop queues into global queue
++ * @sma: semaphore array
++ *
++ * This function merges all per-semaphore queues into the global queue.
++ * It is necessary to achieve FIFO ordering for the pending single-sop
++ * operations when a multi-semop operation must sleep.
++ * Only the alter operations must be moved, the const operations can stay.
++ */
++static void merge_queues(struct sem_array *sma)
++{
++	int i;
++	for (i = 0; i < sma->sem_nsems; i++) {
++		struct sem *sem = sma->sem_base + i;
++
++		list_splice_init(&sem->pending_alter, &sma->pending_alter);
++	}
++}
++
+ /*
+  * If the request contains only one semaphore operation, and there are
+  * no complex transactions pending, lock only the semaphore involved.
+@@ -262,6 +309,7 @@ static inline int sem_lock(struct sem_ar
+ static inline void sem_unlock(struct sem_array *sma, int locknum)
+ {
+ 	if (locknum == -1) {
++		unmerge_queues(sma);
+ 		ipc_unlock_object(&sma->sem_perm);
+ 	} else {
+ 		struct sem *sem = sma->sem_base + locknum;
+@@ -831,49 +879,38 @@ static void do_smart_update(struct sem_a
+ 			int otime, struct list_head *pt)
+ {
+ 	int i;
+-	int progress;
+ 
+ 	otime |= do_smart_wakeup_zero(sma, sops, nsops, pt);
+ 
+-	progress = 1;
+-retry_global:
+-	if (sma->complex_count) {
+-		if (update_queue(sma, -1, pt)) {
+-			progress = 1;
+-			otime = 1;
+-			sops = NULL;
+-		}
+-	}
+-	if (!progress)
+-		goto done;
+-
+-	if (!sops) {
+-		/* No semops; something special is going on. */
+-		for (i = 0; i < sma->sem_nsems; i++) {
+-			if (update_queue(sma, i, pt)) {
+-				otime = 1;
+-				progress = 1;
++	if (!list_empty(&sma->pending_alter)) {
++		/* semaphore array uses the global queue - just process it. */
++		otime |= update_queue(sma, -1, pt);
++	} else {
++		if (!sops) {
++			/*
++			 * No sops, thus the modified semaphores are not
++			 * known. Check all.
++			 */
++			for (i = 0; i < sma->sem_nsems; i++)
++				otime |= update_queue(sma, i, pt);
++		} else {
++			/*
++			 * Check the semaphores that were increased:
++			 * - No complex ops, thus all sleeping ops are
++			 *   decrease.
++			 * - if we decreased the value, then any sleeping
++			 *   semaphore ops wont be able to run: If the
++			 *   previous value was too small, then the new
++			 *   value will be too small, too.
++			 */
++			for (i = 0; i < nsops; i++) {
++				if (sops[i].sem_op > 0) {
++					otime |= update_queue(sma,
++							sops[i].sem_num, pt);
++				}
+ 			}
+ 		}
+-		goto done_checkretry;
+-	}
+-
+-	/* Check the semaphores that were modified. */
+-	for (i = 0; i < nsops; i++) {
+-		if (sops[i].sem_op > 0 ||
+-			(sops[i].sem_op < 0 &&
+-				sma->sem_base[sops[i].sem_num].semval == 0))
+-			if (update_queue(sma, sops[i].sem_num, pt)) {
+-				otime = 1;
+-				progress = 1;
+-			}
+-	}
+-done_checkretry:
+-	if (progress) {
+-		progress = 0;
+-		goto retry_global;
+ 	}
+-done:
+ 	if (otime)
+ 		sma->sem_otime = get_seconds();
+ }
+@@ -1747,11 +1784,22 @@ SYSCALL_DEFINE4(semtimedop, int, semid,
+ 		struct sem *curr;
+ 		curr = &sma->sem_base[sops->sem_num];
+ 
+-		if (alter)
+-			list_add_tail(&queue.list, &curr->pending_alter);
+-		else
++		if (alter) {
++			if (sma->complex_count) {
++				list_add_tail(&queue.list,
++						&sma->pending_alter);
++			} else {
++
++				list_add_tail(&queue.list,
++						&curr->pending_alter);
++			}
++		} else {
+ 			list_add_tail(&queue.list, &curr->pending_const);
++		}
+ 	} else {
++		if (!sma->complex_count)
++			merge_queues(sma);
++
+ 		if (alter)
+ 			list_add_tail(&queue.list, &sma->pending_alter);
+ 		else
diff --git a/queue-3.10/ipc-sem.c-cacheline-align-the-semaphore-structures.patch b/queue-3.10/ipc-sem.c-cacheline-align-the-semaphore-structures.patch
new file mode 100644
index 00000000000..62b5bd865f8
--- /dev/null
+++ b/queue-3.10/ipc-sem.c-cacheline-align-the-semaphore-structures.patch
@@ -0,0 +1,54 @@
+From f5c936c0f267ec58641451cf8b8d39b4c207ee4d Mon Sep 17 00:00:00 2001
+From: Manfred Spraul <manfred@colorfullife.com>
+Date: Mon, 8 Jul 2013 16:01:22 -0700
+Subject: ipc/sem.c: cacheline align the semaphore structures
+
+From: Manfred Spraul <manfred@colorfullife.com>
+
+commit f5c936c0f267ec58641451cf8b8d39b4c207ee4d upstream.
+
+As now each semaphore has its own spinlock and parallel operations are
+possible, give each semaphore its own cacheline.
+
+On a i3 laptop, this gives up to 28% better performance:
+
+  #semscale 10 | grep "interleave 2"
+  - before:
+  Cpus 1, interleave 2 delay 0: 36109234 in 10 secs
+  Cpus 2, interleave 2 delay 0: 55276317 in 10 secs
+  Cpus 3, interleave 2 delay 0: 62411025 in 10 secs
+  Cpus 4, interleave 2 delay 0: 81963928 in 10 secs
+
+  -after:
+  Cpus 1, interleave 2 delay 0: 35527306 in 10 secs
+  Cpus 2, interleave 2 delay 0: 70922909 in 10 secs <<< + 28%
+  Cpus 3, interleave 2 delay 0: 80518538 in 10 secs
+  Cpus 4, interleave 2 delay 0: 89115148 in 10 secs <<< + 8.7%
+
+i3, with 2 cores and with hyperthreading enabled.  Interleave 2 in order
+use first the full cores.  HT partially hides the delay from cacheline
+trashing, thus the improvement is "only" 8.7% if 4 threads are running.
+
+Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Davidlohr Bueso <davidlohr.bueso@hp.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ ipc/sem.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/ipc/sem.c
++++ b/ipc/sem.c
+@@ -96,7 +96,7 @@ struct sem {
+ 	int	sempid;		/* pid of last operation */
+ 	spinlock_t	lock;	/* spinlock for fine-grained semtimedop */
+ 	struct list_head sem_pending; /* pending single-sop operations */
+-};
++} ____cacheline_aligned_in_smp;
+ 
+ /* One queue for each sleeping process in the system. */
+ struct sem_queue {
diff --git a/queue-3.10/ipc-sem.c-rename-try_atomic_semop-to-perform_atomic_semop-docu-update.patch b/queue-3.10/ipc-sem.c-rename-try_atomic_semop-to-perform_atomic_semop-docu-update.patch
new file mode 100644
index 00000000000..09adc2e6fc4
--- /dev/null
+++ b/queue-3.10/ipc-sem.c-rename-try_atomic_semop-to-perform_atomic_semop-docu-update.patch
@@ -0,0 +1,113 @@
+From 758a6ba39ef6df4cdc615e5edd7bd86eab81a5f7 Mon Sep 17 00:00:00 2001
+From: Manfred Spraul <manfred@colorfullife.com>
+Date: Mon, 8 Jul 2013 16:01:26 -0700
+Subject: ipc/sem.c: rename try_atomic_semop() to perform_atomic_semop(), docu update
+
+From: Manfred Spraul <manfred@colorfullife.com>
+
+commit 758a6ba39ef6df4cdc615e5edd7bd86eab81a5f7 upstream.
+
+Cleanup: Some minor points that I noticed while writing the previous
+patches
+
+1) The name try_atomic_semop() is misleading: The function performs the
+   operation (if it is possible).
+
+2) Some documentation updates.
+
+No real code change, a rename and documentation changes.
+
+Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Davidlohr Bueso <davidlohr.bueso@hp.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ ipc/sem.c |   32 +++++++++++++++++++++-----------
+ 1 file changed, 21 insertions(+), 11 deletions(-)
+
+--- a/ipc/sem.c
++++ b/ipc/sem.c
+@@ -154,12 +154,15 @@ static int sysvipc_sem_proc_show(struct
+ #define SEMOPM_FAST	64  /* ~ 372 bytes on stack */
+ 
+ /*
+- * linked list protection:
++ * Locking:
+  *	sem_undo.id_next,
++ *	sem_array.complex_count,
+  *	sem_array.pending{_alter,_cont},
+- *	sem_array.sem_undo: sem_lock() for read/write
++ *	sem_array.sem_undo: global sem_lock() for read/write
+  *	sem_undo.proc_next: only "current" is allowed to read/write that field.
+  *	
++ *	sem_array.sem_base[i].pending_{const,alter}:
++ *		global or semaphore sem_lock() for read/write
+  */
+ 
+ #define sc_semmsl	sem_ctls[0]
+@@ -536,12 +539,19 @@ SYSCALL_DEFINE3(semget, key_t, key, int,
+ 	return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params);
+ }
+ 
+-/*
+- * Determine whether a sequence of semaphore operations would succeed
+- * all at once. Return 0 if yes, 1 if need to sleep, else return error code.
++/** perform_atomic_semop - Perform (if possible) a semaphore operation
++ * @sma: semaphore array
++ * @sops: array with operations that should be checked
++ * @nsems: number of sops
++ * @un: undo array
++ * @pid: pid that did the change
++ *
++ * Returns 0 if the operation was possible.
++ * Returns 1 if the operation is impossible, the caller must sleep.
++ * Negative values are error codes.
+  */
+ 
+-static int try_atomic_semop (struct sem_array * sma, struct sembuf * sops,
++static int perform_atomic_semop(struct sem_array *sma, struct sembuf *sops,
+ 			     int nsops, struct sem_undo *un, int pid)
+ {
+ 	int result, sem_op;
+@@ -724,8 +734,8 @@ static int wake_const_ops(struct sem_arr
+ 		q = container_of(walk, struct sem_queue, list);
+ 		walk = walk->next;
+ 
+-		error = try_atomic_semop(sma, q->sops, q->nsops,
+-						q->undo, q->pid);
++		error = perform_atomic_semop(sma, q->sops, q->nsops,
++						 q->undo, q->pid);
+ 
+ 		if (error <= 0) {
+ 			/* operation completed, remove from queue & wakeup */
+@@ -838,7 +848,7 @@ again:
+ 		if (semnum != -1 && sma->sem_base[semnum].semval == 0)
+ 			break;
+ 
+-		error = try_atomic_semop(sma, q->sops, q->nsops,
++		error = perform_atomic_semop(sma, q->sops, q->nsops,
+ 					 q->undo, q->pid);
+ 
+ 		/* Does q->sleeper still need to sleep? */
+@@ -1686,7 +1696,6 @@ static int get_queue_result(struct sem_q
+ 	return error;
+ }
+ 
+-
+ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
+ 		unsigned, nsops, const struct timespec __user *, timeout)
+ {
+@@ -1784,7 +1793,8 @@ SYSCALL_DEFINE4(semtimedop, int, semid,
+ 	if (un && un->semid == -1)
+ 		goto out_unlock_free;
+ 
+-	error = try_atomic_semop (sma, sops, nsops, un, task_tgid_vnr(current));
++	error = perform_atomic_semop(sma, sops, nsops, un,
++					task_tgid_vnr(current));
+ 	if (error <= 0) {
+ 		if (alter && error == 0)
+ 			do_smart_update(sma, sops, nsops, 1, &tasks);
diff --git a/queue-3.10/ipc-sem.c-replace-shared-sem_otime-with-per-semaphore-value.patch b/queue-3.10/ipc-sem.c-replace-shared-sem_otime-with-per-semaphore-value.patch
new file mode 100644
index 00000000000..8daa59431bd
--- /dev/null
+++ b/queue-3.10/ipc-sem.c-replace-shared-sem_otime-with-per-semaphore-value.patch
@@ -0,0 +1,123 @@
+From d12e1e50e47e0900dbbf52237b7e171f4f15ea1e Mon Sep 17 00:00:00 2001
+From: Manfred Spraul <manfred@colorfullife.com>
+Date: Mon, 8 Jul 2013 16:01:25 -0700
+Subject: ipc/sem.c: replace shared sem_otime with per-semaphore value
+
+From: Manfred Spraul <manfred@colorfullife.com>
+
+commit d12e1e50e47e0900dbbf52237b7e171f4f15ea1e upstream.
+
+sem_otime contains the time of the last semaphore operation that
+completed successfully.  Every operation updates this value, thus access
+from multiple cpus can cause thrashing.
+
+Therefore the patch replaces the variable with a per-semaphore variable.
+The per-array sem_otime is only calculated when required.
+
+No performance improvement on a single-socket i3 - only important for
+larger systems.
+
+Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Davidlohr Bueso <davidlohr.bueso@hp.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/sem.h |    1 -
+ ipc/sem.c           |   37 +++++++++++++++++++++++++++++++------
+ 2 files changed, 31 insertions(+), 7 deletions(-)
+
+--- a/include/linux/sem.h
++++ b/include/linux/sem.h
+@@ -12,7 +12,6 @@ struct task_struct;
+ struct sem_array {
+ 	struct kern_ipc_perm	____cacheline_aligned_in_smp
+ 				sem_perm;	/* permissions .. see ipc.h */
+-	time_t			sem_otime;	/* last semop time */
+ 	time_t			sem_ctime;	/* last change time */
+ 	struct sem		*sem_base;	/* ptr to first semaphore in array */
+ 	struct list_head	pending_alter;	/* pending operations */
+--- a/ipc/sem.c
++++ b/ipc/sem.c
+@@ -99,6 +99,7 @@ struct sem {
+ 					/* that alter the semaphore */
+ 	struct list_head pending_const; /* pending single-sop operations */
+ 					/* that do not alter the semaphore*/
++	time_t	sem_otime;	/* candidate for sem_otime */
+ } ____cacheline_aligned_in_smp;
+ 
+ /* One queue for each sleeping process in the system. */
+@@ -911,8 +912,14 @@ static void do_smart_update(struct sem_a
+ 			}
+ 		}
+ 	}
+-	if (otime)
+-		sma->sem_otime = get_seconds();
++	if (otime) {
++		if (sops == NULL) {
++			sma->sem_base[0].sem_otime = get_seconds();
++		} else {
++			sma->sem_base[sops[0].sem_num].sem_otime =
++								get_seconds();
++		}
++	}
+ }
+ 
+ 
+@@ -1058,6 +1065,21 @@ static unsigned long copy_semid_to_user(
+ 	}
+ }
+ 
++static time_t get_semotime(struct sem_array *sma)
++{
++	int i;
++	time_t res;
++
++	res = sma->sem_base[0].sem_otime;
++	for (i = 1; i < sma->sem_nsems; i++) {
++		time_t to = sma->sem_base[i].sem_otime;
++
++		if (to > res)
++			res = to;
++	}
++	return res;
++}
++
+ static int semctl_nolock(struct ipc_namespace *ns, int semid,
+ 			 int cmd, int version, void __user *p)
+ {
+@@ -1131,9 +1153,9 @@ static int semctl_nolock(struct ipc_name
+ 			goto out_unlock;
+ 
+ 		kernel_to_ipc64_perm(&sma->sem_perm, &tbuf.sem_perm);
+-		tbuf.sem_otime  = sma->sem_otime;
+-		tbuf.sem_ctime  = sma->sem_ctime;
+-		tbuf.sem_nsems  = sma->sem_nsems;
++		tbuf.sem_otime = get_semotime(sma);
++		tbuf.sem_ctime = sma->sem_ctime;
++		tbuf.sem_nsems = sma->sem_nsems;
+ 		rcu_read_unlock();
+ 		if (copy_semid_to_user(p, &tbuf, version))
+ 			return -EFAULT;
+@@ -2025,6 +2047,9 @@ static int sysvipc_sem_proc_show(struct
+ {
+ 	struct user_namespace *user_ns = seq_user_ns(s);
+ 	struct sem_array *sma = it;
++	time_t sem_otime;
++
++	sem_otime = get_semotime(sma);
+ 
+ 	return seq_printf(s,
+ 			  "%10d %10d  %4o %10u %5u %5u %5u %5u %10lu %10lu\n",
+@@ -2036,7 +2061,7 @@ static int sysvipc_sem_proc_show(struct
+ 			  from_kgid_munged(user_ns, sma->sem_perm.gid),
+ 			  from_kuid_munged(user_ns, sma->sem_perm.cuid),
+ 			  from_kgid_munged(user_ns, sma->sem_perm.cgid),
+-			  sma->sem_otime,
++			  sem_otime,
+ 			  sma->sem_ctime);
+ }
+ #endif
diff --git a/queue-3.10/ipc-util.c-ipc_rcu_alloc-cacheline-align-allocation.patch b/queue-3.10/ipc-util.c-ipc_rcu_alloc-cacheline-align-allocation.patch
new file mode 100644
index 00000000000..89598f3f510
--- /dev/null
+++ b/queue-3.10/ipc-util.c-ipc_rcu_alloc-cacheline-align-allocation.patch
@@ -0,0 +1,71 @@
+From 196aa0132fc7261f34b10ae1bfb44abc1bc69b3c Mon Sep 17 00:00:00 2001
+From: Manfred Spraul <manfred@colorfullife.com>
+Date: Mon, 8 Jul 2013 16:01:20 -0700
+Subject: ipc/util.c, ipc_rcu_alloc: cacheline align allocation
+
+From: Manfred Spraul <manfred@colorfullife.com>
+
+commit 196aa0132fc7261f34b10ae1bfb44abc1bc69b3c upstream.
+
+Enforce that ipc_rcu_alloc returns a cacheline aligned pointer on SMP.
+
+Rationale:
+
+The SysV sem code tries to move the main spinlock into a seperate
+cacheline (____cacheline_aligned_in_smp).  This works only if
+ipc_rcu_alloc returns cacheline aligned pointers.  vmalloc and kmalloc
+return cacheline algined pointers, the implementation of ipc_rcu_alloc
+breaks that.
+
+[akpm@linux-foundation.org: coding-style fixes]
+Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Davidlohr Bueso <davidlohr.bueso@hp.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ ipc/util.c |   12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+--- a/ipc/util.c
++++ b/ipc/util.c
+@@ -468,9 +468,7 @@ void ipc_free(void* ptr, int size)
+ struct ipc_rcu {
+ 	struct rcu_head rcu;
+ 	atomic_t refcount;
+-	/* "void *" makes sure alignment of following data is sane. */
+-	void *data[0];
+-};
++} ____cacheline_aligned_in_smp;
+ 
+ /**
+  *	ipc_rcu_alloc	-	allocate ipc and rcu space 
+@@ -488,12 +486,14 @@ void *ipc_rcu_alloc(int size)
+ 	if (unlikely(!out))
+ 		return NULL;
+ 	atomic_set(&out->refcount, 1);
+-	return out->data;
++	return out + 1;
+ }
+ 
+ int ipc_rcu_getref(void *ptr)
+ {
+-	return atomic_inc_not_zero(&container_of(ptr, struct ipc_rcu, data)->refcount);
++	struct ipc_rcu *p = ((struct ipc_rcu *)ptr) - 1;
++
++	return atomic_inc_not_zero(&p->refcount);
+ }
+ 
+ /**
+@@ -507,7 +507,7 @@ static void ipc_schedule_free(struct rcu
+ 
+ void ipc_rcu_putref(void *ptr)
+ {
+-	struct ipc_rcu *p = container_of(ptr, struct ipc_rcu, data);
++	struct ipc_rcu *p = ((struct ipc_rcu *)ptr) - 1;
+ 
+ 	if (!atomic_dec_and_test(&p->refcount))
+ 		return;
diff --git a/queue-3.10/series b/queue-3.10/series
index 456597e69cc..74a2f032320 100644
--- a/queue-3.10/series
+++ b/queue-3.10/series
@@ -38,3 +38,9 @@ ipc-msg-make-msgctl_nolock-lockless.patch
 ipc-msg-shorten-critical-region-in-msgsnd.patch
 ipc-msg-shorten-critical-region-in-msgrcv.patch
 ipc-remove-unused-functions.patch
+ipc-util.c-ipc_rcu_alloc-cacheline-align-allocation.patch
+ipc-sem.c-cacheline-align-the-semaphore-structures.patch
+ipc-sem-separate-wait-for-zero-and-alter-tasks-into-seperate-queues.patch
+ipc-sem.c-always-use-only-one-queue-for-alter-operations.patch
+ipc-sem.c-replace-shared-sem_otime-with-per-semaphore-value.patch
+ipc-sem.c-rename-try_atomic_semop-to-perform_atomic_semop-docu-update.patch