From: Greg Kroah-Hartman Date: Tue, 15 Oct 2013 20:50:21 +0000 (-0700) Subject: 3.10-stable patches X-Git-Tag: v3.10.17~11 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=7538f1c1d3f5d22c3aea65e8ade1898e1f7119e2;p=thirdparty%2Fkernel%2Fstable-queue.git 3.10-stable patches added patches: ipc-sem.c-always-use-only-one-queue-for-alter-operations.patch ipc-sem.c-cacheline-align-the-semaphore-structures.patch ipc-sem.c-rename-try_atomic_semop-to-perform_atomic_semop-docu-update.patch ipc-sem.c-replace-shared-sem_otime-with-per-semaphore-value.patch ipc-sem-separate-wait-for-zero-and-alter-tasks-into-seperate-queues.patch ipc-util.c-ipc_rcu_alloc-cacheline-align-allocation.patch --- diff --git a/queue-3.10/ipc-sem-separate-wait-for-zero-and-alter-tasks-into-seperate-queues.patch b/queue-3.10/ipc-sem-separate-wait-for-zero-and-alter-tasks-into-seperate-queues.patch new file mode 100644 index 00000000000..a296de4641a --- /dev/null +++ b/queue-3.10/ipc-sem-separate-wait-for-zero-and-alter-tasks-into-seperate-queues.patch @@ -0,0 +1,408 @@ +From 1a82e9e1d0f1b45f47a97c9e2349020536ff8987 Mon Sep 17 00:00:00 2001 +From: Manfred Spraul +Date: Mon, 8 Jul 2013 16:01:23 -0700 +Subject: ipc/sem: separate wait-for-zero and alter tasks into seperate queues + +From: Manfred Spraul + +commit 1a82e9e1d0f1b45f47a97c9e2349020536ff8987 upstream. + +Introduce separate queues for operations that do not modify the +semaphore values. Advantages: + + - Simpler logic in check_restart(). + - Faster update_queue(): Right now, all wait-for-zero operations are + always tested, even if the semaphore value is not 0. + - wait-for-zero gets again priority, as in linux <=3.0.9 + +Signed-off-by: Manfred Spraul +Cc: Rik van Riel +Cc: Davidlohr Bueso +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Cc: Mike Galbraith +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/sem.h | 5 - + ipc/sem.c | 211 +++++++++++++++++++++++++++++++++++++--------------- + 2 files changed, 155 insertions(+), 61 deletions(-) + +--- a/include/linux/sem.h ++++ b/include/linux/sem.h +@@ -15,7 +15,10 @@ struct sem_array { + time_t sem_otime; /* last semop time */ + time_t sem_ctime; /* last change time */ + struct sem *sem_base; /* ptr to first semaphore in array */ +- struct list_head sem_pending; /* pending operations to be processed */ ++ struct list_head pending_alter; /* pending operations */ ++ /* that alter the array */ ++ struct list_head pending_const; /* pending complex operations */ ++ /* that do not alter semvals */ + struct list_head list_id; /* undo requests on this array */ + int sem_nsems; /* no. of semaphores in array */ + int complex_count; /* pending complex operations */ +--- a/ipc/sem.c ++++ b/ipc/sem.c +@@ -95,7 +95,10 @@ struct sem { + int semval; /* current value */ + int sempid; /* pid of last operation */ + spinlock_t lock; /* spinlock for fine-grained semtimedop */ +- struct list_head sem_pending; /* pending single-sop operations */ ++ struct list_head pending_alter; /* pending single-sop operations */ ++ /* that alter the semaphore */ ++ struct list_head pending_const; /* pending single-sop operations */ ++ /* that do not alter the semaphore*/ + } ____cacheline_aligned_in_smp; + + /* One queue for each sleeping process in the system. */ +@@ -152,7 +155,7 @@ static int sysvipc_sem_proc_show(struct + /* + * linked list protection: + * sem_undo.id_next, +- * sem_array.sem_pending{,last}, ++ * sem_array.pending{_alter,_cont}, + * sem_array.sem_undo: sem_lock() for read/write + * sem_undo.proc_next: only "current" is allowed to read/write that field. + * +@@ -337,7 +340,7 @@ static inline void sem_rmid(struct ipc_n + * Without the check/retry algorithm a lockless wakeup is possible: + * - queue.status is initialized to -EINTR before blocking. + * - wakeup is performed by +- * * unlinking the queue entry from sma->sem_pending ++ * * unlinking the queue entry from the pending list + * * setting queue.status to IN_WAKEUP + * This is the notification for the blocked thread that a + * result value is imminent. +@@ -418,12 +421,14 @@ static int newary(struct ipc_namespace * + sma->sem_base = (struct sem *) &sma[1]; + + for (i = 0; i < nsems; i++) { +- INIT_LIST_HEAD(&sma->sem_base[i].sem_pending); ++ INIT_LIST_HEAD(&sma->sem_base[i].pending_alter); ++ INIT_LIST_HEAD(&sma->sem_base[i].pending_const); + spin_lock_init(&sma->sem_base[i].lock); + } + + sma->complex_count = 0; +- INIT_LIST_HEAD(&sma->sem_pending); ++ INIT_LIST_HEAD(&sma->pending_alter); ++ INIT_LIST_HEAD(&sma->pending_const); + INIT_LIST_HEAD(&sma->list_id); + sma->sem_nsems = nsems; + sma->sem_ctime = get_seconds(); +@@ -609,60 +614,132 @@ static void unlink_queue(struct sem_arra + * update_queue is O(N^2) when it restarts scanning the whole queue of + * waiting operations. Therefore this function checks if the restart is + * really necessary. It is called after a previously waiting operation +- * was completed. ++ * modified the array. ++ * Note that wait-for-zero operations are handled without restart. + */ + static int check_restart(struct sem_array *sma, struct sem_queue *q) + { +- struct sem *curr; +- struct sem_queue *h; +- +- /* if the operation didn't modify the array, then no restart */ +- if (q->alter == 0) +- return 0; +- +- /* pending complex operations are too difficult to analyse */ +- if (sma->complex_count) ++ /* pending complex alter operations are too difficult to analyse */ ++ if (!list_empty(&sma->pending_alter)) + return 1; + + /* we were a sleeping complex operation. Too difficult */ + if (q->nsops > 1) + return 1; + +- curr = sma->sem_base + q->sops[0].sem_num; ++ /* It is impossible that someone waits for the new value: ++ * - complex operations always restart. ++ * - wait-for-zero are handled seperately. ++ * - q is a previously sleeping simple operation that ++ * altered the array. It must be a decrement, because ++ * simple increments never sleep. ++ * - If there are older (higher priority) decrements ++ * in the queue, then they have observed the original ++ * semval value and couldn't proceed. The operation ++ * decremented to value - thus they won't proceed either. ++ */ ++ return 0; ++} + +- /* No-one waits on this queue */ +- if (list_empty(&curr->sem_pending)) +- return 0; ++/** ++ * wake_const_ops(sma, semnum, pt) - Wake up non-alter tasks ++ * @sma: semaphore array. ++ * @semnum: semaphore that was modified. ++ * @pt: list head for the tasks that must be woken up. ++ * ++ * wake_const_ops must be called after a semaphore in a semaphore array ++ * was set to 0. If complex const operations are pending, wake_const_ops must ++ * be called with semnum = -1, as well as with the number of each modified ++ * semaphore. ++ * The tasks that must be woken up are added to @pt. The return code ++ * is stored in q->pid. ++ * The function returns 1 if at least one operation was completed successfully. ++ */ ++static int wake_const_ops(struct sem_array *sma, int semnum, ++ struct list_head *pt) ++{ ++ struct sem_queue *q; ++ struct list_head *walk; ++ struct list_head *pending_list; ++ int semop_completed = 0; ++ ++ if (semnum == -1) ++ pending_list = &sma->pending_const; ++ else ++ pending_list = &sma->sem_base[semnum].pending_const; ++ ++ walk = pending_list->next; ++ while (walk != pending_list) { ++ int error; ++ ++ q = container_of(walk, struct sem_queue, list); ++ walk = walk->next; ++ ++ error = try_atomic_semop(sma, q->sops, q->nsops, ++ q->undo, q->pid); ++ ++ if (error <= 0) { ++ /* operation completed, remove from queue & wakeup */ ++ ++ unlink_queue(sma, q); ++ ++ wake_up_sem_queue_prepare(pt, q, error); ++ if (error == 0) ++ semop_completed = 1; ++ } ++ } ++ return semop_completed; ++} + +- /* the new semaphore value */ +- if (curr->semval) { +- /* It is impossible that someone waits for the new value: +- * - q is a previously sleeping simple operation that +- * altered the array. It must be a decrement, because +- * simple increments never sleep. +- * - The value is not 0, thus wait-for-zero won't proceed. +- * - If there are older (higher priority) decrements +- * in the queue, then they have observed the original +- * semval value and couldn't proceed. The operation +- * decremented to value - thus they won't proceed either. ++/** ++ * do_smart_wakeup_zero(sma, sops, nsops, pt) - wakeup all wait for zero tasks ++ * @sma: semaphore array ++ * @sops: operations that were performed ++ * @nsops: number of operations ++ * @pt: list head of the tasks that must be woken up. ++ * ++ * do_smart_wakeup_zero() checks all required queue for wait-for-zero ++ * operations, based on the actual changes that were performed on the ++ * semaphore array. ++ * The function returns 1 if at least one operation was completed successfully. ++ */ ++static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops, ++ int nsops, struct list_head *pt) ++{ ++ int i; ++ int semop_completed = 0; ++ int got_zero = 0; ++ ++ /* first: the per-semaphore queues, if known */ ++ if (sops) { ++ for (i = 0; i < nsops; i++) { ++ int num = sops[i].sem_num; ++ ++ if (sma->sem_base[num].semval == 0) { ++ got_zero = 1; ++ semop_completed |= wake_const_ops(sma, num, pt); ++ } ++ } ++ } else { ++ /* ++ * No sops means modified semaphores not known. ++ * Assume all were changed. + */ +- BUG_ON(q->sops[0].sem_op >= 0); +- return 0; ++ for (i = 0; i < sma->sem_nsems; i++) { ++ if (sma->sem_base[i].semval == 0) { ++ got_zero = 1; ++ semop_completed |= wake_const_ops(sma, i, pt); ++ } ++ } + } + /* +- * semval is 0. Check if there are wait-for-zero semops. +- * They must be the first entries in the per-semaphore queue ++ * If one of the modified semaphores got 0, ++ * then check the global queue, too. + */ +- h = list_first_entry(&curr->sem_pending, struct sem_queue, list); +- BUG_ON(h->nsops != 1); +- BUG_ON(h->sops[0].sem_num != q->sops[0].sem_num); ++ if (got_zero) ++ semop_completed |= wake_const_ops(sma, -1, pt); + +- /* Yes, there is a wait-for-zero semop. Restart */ +- if (h->sops[0].sem_op == 0) +- return 1; +- +- /* Again - no-one is waiting for the new value. */ +- return 0; ++ return semop_completed; + } + + +@@ -678,6 +755,8 @@ static int check_restart(struct sem_arra + * semaphore. + * The tasks that must be woken up are added to @pt. The return code + * is stored in q->pid. ++ * The function internally checks if const operations can now succeed. ++ * + * The function return 1 if at least one semop was completed successfully. + */ + static int update_queue(struct sem_array *sma, int semnum, struct list_head *pt) +@@ -688,9 +767,9 @@ static int update_queue(struct sem_array + int semop_completed = 0; + + if (semnum == -1) +- pending_list = &sma->sem_pending; ++ pending_list = &sma->pending_alter; + else +- pending_list = &sma->sem_base[semnum].sem_pending; ++ pending_list = &sma->sem_base[semnum].pending_alter; + + again: + walk = pending_list->next; +@@ -702,13 +781,12 @@ again: + + /* If we are scanning the single sop, per-semaphore list of + * one semaphore and that semaphore is 0, then it is not +- * necessary to scan the "alter" entries: simple increments ++ * necessary to scan further: simple increments + * that affect only one entry succeed immediately and cannot + * be in the per semaphore pending queue, and decrements + * cannot be successful if the value is already 0. + */ +- if (semnum != -1 && sma->sem_base[semnum].semval == 0 && +- q->alter) ++ if (semnum != -1 && sma->sem_base[semnum].semval == 0) + break; + + error = try_atomic_semop(sma, q->sops, q->nsops, +@@ -724,6 +802,7 @@ again: + restart = 0; + } else { + semop_completed = 1; ++ do_smart_wakeup_zero(sma, q->sops, q->nsops, pt); + restart = check_restart(sma, q); + } + +@@ -742,8 +821,8 @@ again: + * @otime: force setting otime + * @pt: list head of the tasks that must be woken up. + * +- * do_smart_update() does the required called to update_queue, based on the +- * actual changes that were performed on the semaphore array. ++ * do_smart_update() does the required calls to update_queue and wakeup_zero, ++ * based on the actual changes that were performed on the semaphore array. + * Note that the function does not do the actual wake-up: the caller is + * responsible for calling wake_up_sem_queue_do(@pt). + * It is safe to perform this call after dropping all locks. +@@ -754,6 +833,8 @@ static void do_smart_update(struct sem_a + int i; + int progress; + ++ otime |= do_smart_wakeup_zero(sma, sops, nsops, pt); ++ + progress = 1; + retry_global: + if (sma->complex_count) { +@@ -813,14 +894,14 @@ static int count_semncnt (struct sem_arr + struct sem_queue * q; + + semncnt = 0; +- list_for_each_entry(q, &sma->sem_base[semnum].sem_pending, list) { ++ list_for_each_entry(q, &sma->sem_base[semnum].pending_alter, list) { + struct sembuf * sops = q->sops; + BUG_ON(sops->sem_num != semnum); + if ((sops->sem_op < 0) && !(sops->sem_flg & IPC_NOWAIT)) + semncnt++; + } + +- list_for_each_entry(q, &sma->sem_pending, list) { ++ list_for_each_entry(q, &sma->pending_alter, list) { + struct sembuf * sops = q->sops; + int nsops = q->nsops; + int i; +@@ -839,14 +920,14 @@ static int count_semzcnt (struct sem_arr + struct sem_queue * q; + + semzcnt = 0; +- list_for_each_entry(q, &sma->sem_base[semnum].sem_pending, list) { ++ list_for_each_entry(q, &sma->sem_base[semnum].pending_const, list) { + struct sembuf * sops = q->sops; + BUG_ON(sops->sem_num != semnum); + if ((sops->sem_op == 0) && !(sops->sem_flg & IPC_NOWAIT)) + semzcnt++; + } + +- list_for_each_entry(q, &sma->sem_pending, list) { ++ list_for_each_entry(q, &sma->pending_const, list) { + struct sembuf * sops = q->sops; + int nsops = q->nsops; + int i; +@@ -884,13 +965,22 @@ static void freeary(struct ipc_namespace + + /* Wake up all pending processes and let them fail with EIDRM. */ + INIT_LIST_HEAD(&tasks); +- list_for_each_entry_safe(q, tq, &sma->sem_pending, list) { ++ list_for_each_entry_safe(q, tq, &sma->pending_const, list) { ++ unlink_queue(sma, q); ++ wake_up_sem_queue_prepare(&tasks, q, -EIDRM); ++ } ++ ++ list_for_each_entry_safe(q, tq, &sma->pending_alter, list) { + unlink_queue(sma, q); + wake_up_sem_queue_prepare(&tasks, q, -EIDRM); + } + for (i = 0; i < sma->sem_nsems; i++) { + struct sem *sem = sma->sem_base + i; +- list_for_each_entry_safe(q, tq, &sem->sem_pending, list) { ++ list_for_each_entry_safe(q, tq, &sem->pending_const, list) { ++ unlink_queue(sma, q); ++ wake_up_sem_queue_prepare(&tasks, q, -EIDRM); ++ } ++ list_for_each_entry_safe(q, tq, &sem->pending_alter, list) { + unlink_queue(sma, q); + wake_up_sem_queue_prepare(&tasks, q, -EIDRM); + } +@@ -1658,14 +1748,15 @@ SYSCALL_DEFINE4(semtimedop, int, semid, + curr = &sma->sem_base[sops->sem_num]; + + if (alter) +- list_add_tail(&queue.list, &curr->sem_pending); ++ list_add_tail(&queue.list, &curr->pending_alter); + else +- list_add(&queue.list, &curr->sem_pending); ++ list_add_tail(&queue.list, &curr->pending_const); + } else { + if (alter) +- list_add_tail(&queue.list, &sma->sem_pending); ++ list_add_tail(&queue.list, &sma->pending_alter); + else +- list_add(&queue.list, &sma->sem_pending); ++ list_add_tail(&queue.list, &sma->pending_const); ++ + sma->complex_count++; + } + diff --git a/queue-3.10/ipc-sem.c-always-use-only-one-queue-for-alter-operations.patch b/queue-3.10/ipc-sem.c-always-use-only-one-queue-for-alter-operations.patch new file mode 100644 index 00000000000..ed1aaa0cca8 --- /dev/null +++ b/queue-3.10/ipc-sem.c-always-use-only-one-queue-for-alter-operations.patch @@ -0,0 +1,205 @@ +From f269f40ad5aeee229ed70044926f44318abe41ef Mon Sep 17 00:00:00 2001 +From: Manfred Spraul +Date: Mon, 8 Jul 2013 16:01:24 -0700 +Subject: ipc/sem.c: always use only one queue for alter operations + +From: Manfred Spraul + +commit f269f40ad5aeee229ed70044926f44318abe41ef upstream. + +There are two places that can contain alter operations: + - the global queue: sma->pending_alter + - the per-semaphore queues: sma->sem_base[].pending_alter. + +Since one of the queues must be processed first, this causes an odd +priorization of the wakeups: complex operations have priority over +simple ops. + +The patch restores the behavior of linux <=3.0.9: The longest waiting +operation has the highest priority. + +This is done by using only one queue: + - if there are complex ops, then sma->pending_alter is used. + - otherwise, the per-semaphore queues are used. + +As a side effect, do_smart_update_queue() becomes much simpler: no more +goto logic. + +Signed-off-by: Manfred Spraul +Cc: Rik van Riel +Cc: Davidlohr Bueso +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Cc: Mike Galbraith +Signed-off-by: Greg Kroah-Hartman + +--- + ipc/sem.c | 128 ++++++++++++++++++++++++++++++++++++++++++-------------------- + 1 file changed, 88 insertions(+), 40 deletions(-) + +--- a/ipc/sem.c ++++ b/ipc/sem.c +@@ -192,6 +192,53 @@ void __init sem_init (void) + IPC_SEM_IDS, sysvipc_sem_proc_show); + } + ++/** ++ * unmerge_queues - unmerge queues, if possible. ++ * @sma: semaphore array ++ * ++ * The function unmerges the wait queues if complex_count is 0. ++ * It must be called prior to dropping the global semaphore array lock. ++ */ ++static void unmerge_queues(struct sem_array *sma) ++{ ++ struct sem_queue *q, *tq; ++ ++ /* complex operations still around? */ ++ if (sma->complex_count) ++ return; ++ /* ++ * We will switch back to simple mode. ++ * Move all pending operation back into the per-semaphore ++ * queues. ++ */ ++ list_for_each_entry_safe(q, tq, &sma->pending_alter, list) { ++ struct sem *curr; ++ curr = &sma->sem_base[q->sops[0].sem_num]; ++ ++ list_add_tail(&q->list, &curr->pending_alter); ++ } ++ INIT_LIST_HEAD(&sma->pending_alter); ++} ++ ++/** ++ * merge_queues - Merge single semop queues into global queue ++ * @sma: semaphore array ++ * ++ * This function merges all per-semaphore queues into the global queue. ++ * It is necessary to achieve FIFO ordering for the pending single-sop ++ * operations when a multi-semop operation must sleep. ++ * Only the alter operations must be moved, the const operations can stay. ++ */ ++static void merge_queues(struct sem_array *sma) ++{ ++ int i; ++ for (i = 0; i < sma->sem_nsems; i++) { ++ struct sem *sem = sma->sem_base + i; ++ ++ list_splice_init(&sem->pending_alter, &sma->pending_alter); ++ } ++} ++ + /* + * If the request contains only one semaphore operation, and there are + * no complex transactions pending, lock only the semaphore involved. +@@ -262,6 +309,7 @@ static inline int sem_lock(struct sem_ar + static inline void sem_unlock(struct sem_array *sma, int locknum) + { + if (locknum == -1) { ++ unmerge_queues(sma); + ipc_unlock_object(&sma->sem_perm); + } else { + struct sem *sem = sma->sem_base + locknum; +@@ -831,49 +879,38 @@ static void do_smart_update(struct sem_a + int otime, struct list_head *pt) + { + int i; +- int progress; + + otime |= do_smart_wakeup_zero(sma, sops, nsops, pt); + +- progress = 1; +-retry_global: +- if (sma->complex_count) { +- if (update_queue(sma, -1, pt)) { +- progress = 1; +- otime = 1; +- sops = NULL; +- } +- } +- if (!progress) +- goto done; +- +- if (!sops) { +- /* No semops; something special is going on. */ +- for (i = 0; i < sma->sem_nsems; i++) { +- if (update_queue(sma, i, pt)) { +- otime = 1; +- progress = 1; ++ if (!list_empty(&sma->pending_alter)) { ++ /* semaphore array uses the global queue - just process it. */ ++ otime |= update_queue(sma, -1, pt); ++ } else { ++ if (!sops) { ++ /* ++ * No sops, thus the modified semaphores are not ++ * known. Check all. ++ */ ++ for (i = 0; i < sma->sem_nsems; i++) ++ otime |= update_queue(sma, i, pt); ++ } else { ++ /* ++ * Check the semaphores that were increased: ++ * - No complex ops, thus all sleeping ops are ++ * decrease. ++ * - if we decreased the value, then any sleeping ++ * semaphore ops wont be able to run: If the ++ * previous value was too small, then the new ++ * value will be too small, too. ++ */ ++ for (i = 0; i < nsops; i++) { ++ if (sops[i].sem_op > 0) { ++ otime |= update_queue(sma, ++ sops[i].sem_num, pt); ++ } + } + } +- goto done_checkretry; +- } +- +- /* Check the semaphores that were modified. */ +- for (i = 0; i < nsops; i++) { +- if (sops[i].sem_op > 0 || +- (sops[i].sem_op < 0 && +- sma->sem_base[sops[i].sem_num].semval == 0)) +- if (update_queue(sma, sops[i].sem_num, pt)) { +- otime = 1; +- progress = 1; +- } +- } +-done_checkretry: +- if (progress) { +- progress = 0; +- goto retry_global; + } +-done: + if (otime) + sma->sem_otime = get_seconds(); + } +@@ -1747,11 +1784,22 @@ SYSCALL_DEFINE4(semtimedop, int, semid, + struct sem *curr; + curr = &sma->sem_base[sops->sem_num]; + +- if (alter) +- list_add_tail(&queue.list, &curr->pending_alter); +- else ++ if (alter) { ++ if (sma->complex_count) { ++ list_add_tail(&queue.list, ++ &sma->pending_alter); ++ } else { ++ ++ list_add_tail(&queue.list, ++ &curr->pending_alter); ++ } ++ } else { + list_add_tail(&queue.list, &curr->pending_const); ++ } + } else { ++ if (!sma->complex_count) ++ merge_queues(sma); ++ + if (alter) + list_add_tail(&queue.list, &sma->pending_alter); + else diff --git a/queue-3.10/ipc-sem.c-cacheline-align-the-semaphore-structures.patch b/queue-3.10/ipc-sem.c-cacheline-align-the-semaphore-structures.patch new file mode 100644 index 00000000000..62b5bd865f8 --- /dev/null +++ b/queue-3.10/ipc-sem.c-cacheline-align-the-semaphore-structures.patch @@ -0,0 +1,54 @@ +From f5c936c0f267ec58641451cf8b8d39b4c207ee4d Mon Sep 17 00:00:00 2001 +From: Manfred Spraul +Date: Mon, 8 Jul 2013 16:01:22 -0700 +Subject: ipc/sem.c: cacheline align the semaphore structures + +From: Manfred Spraul + +commit f5c936c0f267ec58641451cf8b8d39b4c207ee4d upstream. + +As now each semaphore has its own spinlock and parallel operations are +possible, give each semaphore its own cacheline. + +On a i3 laptop, this gives up to 28% better performance: + + #semscale 10 | grep "interleave 2" + - before: + Cpus 1, interleave 2 delay 0: 36109234 in 10 secs + Cpus 2, interleave 2 delay 0: 55276317 in 10 secs + Cpus 3, interleave 2 delay 0: 62411025 in 10 secs + Cpus 4, interleave 2 delay 0: 81963928 in 10 secs + + -after: + Cpus 1, interleave 2 delay 0: 35527306 in 10 secs + Cpus 2, interleave 2 delay 0: 70922909 in 10 secs <<< + 28% + Cpus 3, interleave 2 delay 0: 80518538 in 10 secs + Cpus 4, interleave 2 delay 0: 89115148 in 10 secs <<< + 8.7% + +i3, with 2 cores and with hyperthreading enabled. Interleave 2 in order +use first the full cores. HT partially hides the delay from cacheline +trashing, thus the improvement is "only" 8.7% if 4 threads are running. + +Signed-off-by: Manfred Spraul +Cc: Rik van Riel +Cc: Davidlohr Bueso +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Cc: Mike Galbraith +Signed-off-by: Greg Kroah-Hartman + +--- + ipc/sem.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/ipc/sem.c ++++ b/ipc/sem.c +@@ -96,7 +96,7 @@ struct sem { + int sempid; /* pid of last operation */ + spinlock_t lock; /* spinlock for fine-grained semtimedop */ + struct list_head sem_pending; /* pending single-sop operations */ +-}; ++} ____cacheline_aligned_in_smp; + + /* One queue for each sleeping process in the system. */ + struct sem_queue { diff --git a/queue-3.10/ipc-sem.c-rename-try_atomic_semop-to-perform_atomic_semop-docu-update.patch b/queue-3.10/ipc-sem.c-rename-try_atomic_semop-to-perform_atomic_semop-docu-update.patch new file mode 100644 index 00000000000..09adc2e6fc4 --- /dev/null +++ b/queue-3.10/ipc-sem.c-rename-try_atomic_semop-to-perform_atomic_semop-docu-update.patch @@ -0,0 +1,113 @@ +From 758a6ba39ef6df4cdc615e5edd7bd86eab81a5f7 Mon Sep 17 00:00:00 2001 +From: Manfred Spraul +Date: Mon, 8 Jul 2013 16:01:26 -0700 +Subject: ipc/sem.c: rename try_atomic_semop() to perform_atomic_semop(), docu update + +From: Manfred Spraul + +commit 758a6ba39ef6df4cdc615e5edd7bd86eab81a5f7 upstream. + +Cleanup: Some minor points that I noticed while writing the previous +patches + +1) The name try_atomic_semop() is misleading: The function performs the + operation (if it is possible). + +2) Some documentation updates. + +No real code change, a rename and documentation changes. + +Signed-off-by: Manfred Spraul +Cc: Rik van Riel +Cc: Davidlohr Bueso +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Cc: Mike Galbraith +Signed-off-by: Greg Kroah-Hartman + +--- + ipc/sem.c | 32 +++++++++++++++++++++----------- + 1 file changed, 21 insertions(+), 11 deletions(-) + +--- a/ipc/sem.c ++++ b/ipc/sem.c +@@ -154,12 +154,15 @@ static int sysvipc_sem_proc_show(struct + #define SEMOPM_FAST 64 /* ~ 372 bytes on stack */ + + /* +- * linked list protection: ++ * Locking: + * sem_undo.id_next, ++ * sem_array.complex_count, + * sem_array.pending{_alter,_cont}, +- * sem_array.sem_undo: sem_lock() for read/write ++ * sem_array.sem_undo: global sem_lock() for read/write + * sem_undo.proc_next: only "current" is allowed to read/write that field. + * ++ * sem_array.sem_base[i].pending_{const,alter}: ++ * global or semaphore sem_lock() for read/write + */ + + #define sc_semmsl sem_ctls[0] +@@ -536,12 +539,19 @@ SYSCALL_DEFINE3(semget, key_t, key, int, + return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params); + } + +-/* +- * Determine whether a sequence of semaphore operations would succeed +- * all at once. Return 0 if yes, 1 if need to sleep, else return error code. ++/** perform_atomic_semop - Perform (if possible) a semaphore operation ++ * @sma: semaphore array ++ * @sops: array with operations that should be checked ++ * @nsems: number of sops ++ * @un: undo array ++ * @pid: pid that did the change ++ * ++ * Returns 0 if the operation was possible. ++ * Returns 1 if the operation is impossible, the caller must sleep. ++ * Negative values are error codes. + */ + +-static int try_atomic_semop (struct sem_array * sma, struct sembuf * sops, ++static int perform_atomic_semop(struct sem_array *sma, struct sembuf *sops, + int nsops, struct sem_undo *un, int pid) + { + int result, sem_op; +@@ -724,8 +734,8 @@ static int wake_const_ops(struct sem_arr + q = container_of(walk, struct sem_queue, list); + walk = walk->next; + +- error = try_atomic_semop(sma, q->sops, q->nsops, +- q->undo, q->pid); ++ error = perform_atomic_semop(sma, q->sops, q->nsops, ++ q->undo, q->pid); + + if (error <= 0) { + /* operation completed, remove from queue & wakeup */ +@@ -838,7 +848,7 @@ again: + if (semnum != -1 && sma->sem_base[semnum].semval == 0) + break; + +- error = try_atomic_semop(sma, q->sops, q->nsops, ++ error = perform_atomic_semop(sma, q->sops, q->nsops, + q->undo, q->pid); + + /* Does q->sleeper still need to sleep? */ +@@ -1686,7 +1696,6 @@ static int get_queue_result(struct sem_q + return error; + } + +- + SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, + unsigned, nsops, const struct timespec __user *, timeout) + { +@@ -1784,7 +1793,8 @@ SYSCALL_DEFINE4(semtimedop, int, semid, + if (un && un->semid == -1) + goto out_unlock_free; + +- error = try_atomic_semop (sma, sops, nsops, un, task_tgid_vnr(current)); ++ error = perform_atomic_semop(sma, sops, nsops, un, ++ task_tgid_vnr(current)); + if (error <= 0) { + if (alter && error == 0) + do_smart_update(sma, sops, nsops, 1, &tasks); diff --git a/queue-3.10/ipc-sem.c-replace-shared-sem_otime-with-per-semaphore-value.patch b/queue-3.10/ipc-sem.c-replace-shared-sem_otime-with-per-semaphore-value.patch new file mode 100644 index 00000000000..8daa59431bd --- /dev/null +++ b/queue-3.10/ipc-sem.c-replace-shared-sem_otime-with-per-semaphore-value.patch @@ -0,0 +1,123 @@ +From d12e1e50e47e0900dbbf52237b7e171f4f15ea1e Mon Sep 17 00:00:00 2001 +From: Manfred Spraul +Date: Mon, 8 Jul 2013 16:01:25 -0700 +Subject: ipc/sem.c: replace shared sem_otime with per-semaphore value + +From: Manfred Spraul + +commit d12e1e50e47e0900dbbf52237b7e171f4f15ea1e upstream. + +sem_otime contains the time of the last semaphore operation that +completed successfully. Every operation updates this value, thus access +from multiple cpus can cause thrashing. + +Therefore the patch replaces the variable with a per-semaphore variable. +The per-array sem_otime is only calculated when required. + +No performance improvement on a single-socket i3 - only important for +larger systems. + +Signed-off-by: Manfred Spraul +Cc: Rik van Riel +Cc: Davidlohr Bueso +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Cc: Mike Galbraith +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/sem.h | 1 - + ipc/sem.c | 37 +++++++++++++++++++++++++++++++------ + 2 files changed, 31 insertions(+), 7 deletions(-) + +--- a/include/linux/sem.h ++++ b/include/linux/sem.h +@@ -12,7 +12,6 @@ struct task_struct; + struct sem_array { + struct kern_ipc_perm ____cacheline_aligned_in_smp + sem_perm; /* permissions .. see ipc.h */ +- time_t sem_otime; /* last semop time */ + time_t sem_ctime; /* last change time */ + struct sem *sem_base; /* ptr to first semaphore in array */ + struct list_head pending_alter; /* pending operations */ +--- a/ipc/sem.c ++++ b/ipc/sem.c +@@ -99,6 +99,7 @@ struct sem { + /* that alter the semaphore */ + struct list_head pending_const; /* pending single-sop operations */ + /* that do not alter the semaphore*/ ++ time_t sem_otime; /* candidate for sem_otime */ + } ____cacheline_aligned_in_smp; + + /* One queue for each sleeping process in the system. */ +@@ -911,8 +912,14 @@ static void do_smart_update(struct sem_a + } + } + } +- if (otime) +- sma->sem_otime = get_seconds(); ++ if (otime) { ++ if (sops == NULL) { ++ sma->sem_base[0].sem_otime = get_seconds(); ++ } else { ++ sma->sem_base[sops[0].sem_num].sem_otime = ++ get_seconds(); ++ } ++ } + } + + +@@ -1058,6 +1065,21 @@ static unsigned long copy_semid_to_user( + } + } + ++static time_t get_semotime(struct sem_array *sma) ++{ ++ int i; ++ time_t res; ++ ++ res = sma->sem_base[0].sem_otime; ++ for (i = 1; i < sma->sem_nsems; i++) { ++ time_t to = sma->sem_base[i].sem_otime; ++ ++ if (to > res) ++ res = to; ++ } ++ return res; ++} ++ + static int semctl_nolock(struct ipc_namespace *ns, int semid, + int cmd, int version, void __user *p) + { +@@ -1131,9 +1153,9 @@ static int semctl_nolock(struct ipc_name + goto out_unlock; + + kernel_to_ipc64_perm(&sma->sem_perm, &tbuf.sem_perm); +- tbuf.sem_otime = sma->sem_otime; +- tbuf.sem_ctime = sma->sem_ctime; +- tbuf.sem_nsems = sma->sem_nsems; ++ tbuf.sem_otime = get_semotime(sma); ++ tbuf.sem_ctime = sma->sem_ctime; ++ tbuf.sem_nsems = sma->sem_nsems; + rcu_read_unlock(); + if (copy_semid_to_user(p, &tbuf, version)) + return -EFAULT; +@@ -2025,6 +2047,9 @@ static int sysvipc_sem_proc_show(struct + { + struct user_namespace *user_ns = seq_user_ns(s); + struct sem_array *sma = it; ++ time_t sem_otime; ++ ++ sem_otime = get_semotime(sma); + + return seq_printf(s, + "%10d %10d %4o %10u %5u %5u %5u %5u %10lu %10lu\n", +@@ -2036,7 +2061,7 @@ static int sysvipc_sem_proc_show(struct + from_kgid_munged(user_ns, sma->sem_perm.gid), + from_kuid_munged(user_ns, sma->sem_perm.cuid), + from_kgid_munged(user_ns, sma->sem_perm.cgid), +- sma->sem_otime, ++ sem_otime, + sma->sem_ctime); + } + #endif diff --git a/queue-3.10/ipc-util.c-ipc_rcu_alloc-cacheline-align-allocation.patch b/queue-3.10/ipc-util.c-ipc_rcu_alloc-cacheline-align-allocation.patch new file mode 100644 index 00000000000..89598f3f510 --- /dev/null +++ b/queue-3.10/ipc-util.c-ipc_rcu_alloc-cacheline-align-allocation.patch @@ -0,0 +1,71 @@ +From 196aa0132fc7261f34b10ae1bfb44abc1bc69b3c Mon Sep 17 00:00:00 2001 +From: Manfred Spraul +Date: Mon, 8 Jul 2013 16:01:20 -0700 +Subject: ipc/util.c, ipc_rcu_alloc: cacheline align allocation + +From: Manfred Spraul + +commit 196aa0132fc7261f34b10ae1bfb44abc1bc69b3c upstream. + +Enforce that ipc_rcu_alloc returns a cacheline aligned pointer on SMP. + +Rationale: + +The SysV sem code tries to move the main spinlock into a seperate +cacheline (____cacheline_aligned_in_smp). This works only if +ipc_rcu_alloc returns cacheline aligned pointers. vmalloc and kmalloc +return cacheline algined pointers, the implementation of ipc_rcu_alloc +breaks that. + +[akpm@linux-foundation.org: coding-style fixes] +Signed-off-by: Manfred Spraul +Cc: Rik van Riel +Cc: Davidlohr Bueso +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Cc: Mike Galbraith +Signed-off-by: Greg Kroah-Hartman + +--- + ipc/util.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +--- a/ipc/util.c ++++ b/ipc/util.c +@@ -468,9 +468,7 @@ void ipc_free(void* ptr, int size) + struct ipc_rcu { + struct rcu_head rcu; + atomic_t refcount; +- /* "void *" makes sure alignment of following data is sane. */ +- void *data[0]; +-}; ++} ____cacheline_aligned_in_smp; + + /** + * ipc_rcu_alloc - allocate ipc and rcu space +@@ -488,12 +486,14 @@ void *ipc_rcu_alloc(int size) + if (unlikely(!out)) + return NULL; + atomic_set(&out->refcount, 1); +- return out->data; ++ return out + 1; + } + + int ipc_rcu_getref(void *ptr) + { +- return atomic_inc_not_zero(&container_of(ptr, struct ipc_rcu, data)->refcount); ++ struct ipc_rcu *p = ((struct ipc_rcu *)ptr) - 1; ++ ++ return atomic_inc_not_zero(&p->refcount); + } + + /** +@@ -507,7 +507,7 @@ static void ipc_schedule_free(struct rcu + + void ipc_rcu_putref(void *ptr) + { +- struct ipc_rcu *p = container_of(ptr, struct ipc_rcu, data); ++ struct ipc_rcu *p = ((struct ipc_rcu *)ptr) - 1; + + if (!atomic_dec_and_test(&p->refcount)) + return; diff --git a/queue-3.10/series b/queue-3.10/series index 456597e69cc..74a2f032320 100644 --- a/queue-3.10/series +++ b/queue-3.10/series @@ -38,3 +38,9 @@ ipc-msg-make-msgctl_nolock-lockless.patch ipc-msg-shorten-critical-region-in-msgsnd.patch ipc-msg-shorten-critical-region-in-msgrcv.patch ipc-remove-unused-functions.patch +ipc-util.c-ipc_rcu_alloc-cacheline-align-allocation.patch +ipc-sem.c-cacheline-align-the-semaphore-structures.patch +ipc-sem-separate-wait-for-zero-and-alter-tasks-into-seperate-queues.patch +ipc-sem.c-always-use-only-one-queue-for-alter-operations.patch +ipc-sem.c-replace-shared-sem_otime-with-per-semaphore-value.patch +ipc-sem.c-rename-try_atomic_semop-to-perform_atomic_semop-docu-update.patch