From: Greg Kroah-Hartman Date: Sun, 7 Jun 2015 00:00:59 +0000 (-0700) Subject: 4.0-stable patches X-Git-Tag: v3.10.81~20 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=d856161315896b9d6032ca6f09b6c5b6fd893796;p=thirdparty%2Fkernel%2Fstable-queue.git 4.0-stable patches added patches: aio-fix-serial-draining-in-exit_aio.patch crush-ensuring-at-most-num-rep-osds-are-selected.patch --- diff --git a/queue-4.0/aio-fix-serial-draining-in-exit_aio.patch b/queue-4.0/aio-fix-serial-draining-in-exit_aio.patch new file mode 100644 index 00000000000..2df4bd1d1c3 --- /dev/null +++ b/queue-4.0/aio-fix-serial-draining-in-exit_aio.patch @@ -0,0 +1,152 @@ +From dc48e56d761610da4ea1088d1bea0a030b8e3e43 Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Wed, 15 Apr 2015 11:17:23 -0600 +Subject: aio: fix serial draining in exit_aio() + +From: Jens Axboe + +commit dc48e56d761610da4ea1088d1bea0a030b8e3e43 upstream. + +exit_aio() currently serializes killing io contexts. Each context +killing ends up having to do percpu_ref_kill(), which in turns has +to wait for an RCU grace period. This can take a long time, depending +on the number of contexts. And there's no point in doing them serially, +when we could be waiting for all of them in one fell swoop. + +This patches makes my fio thread offload test case exit 0.2s instead +of almost 6s. + +Reviewed-by: Jeff Moyer +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman + +--- + fs/aio.c | 45 ++++++++++++++++++++++++++++++--------------- + 1 file changed, 30 insertions(+), 15 deletions(-) + +--- a/fs/aio.c ++++ b/fs/aio.c +@@ -77,6 +77,11 @@ struct kioctx_cpu { + unsigned reqs_available; + }; + ++struct ctx_rq_wait { ++ struct completion comp; ++ atomic_t count; ++}; ++ + struct kioctx { + struct percpu_ref users; + atomic_t dead; +@@ -115,7 +120,7 @@ struct kioctx { + /* + * signals when all in-flight requests are done + */ +- struct completion *requests_done; ++ struct ctx_rq_wait *rq_wait; + + struct { + /* +@@ -539,8 +544,8 @@ static void free_ioctx_reqs(struct percp + struct kioctx *ctx = container_of(ref, struct kioctx, reqs); + + /* At this point we know that there are no any in-flight requests */ +- if (ctx->requests_done) +- complete(ctx->requests_done); ++ if (ctx->rq_wait && atomic_dec_and_test(&ctx->rq_wait->count)) ++ complete(&ctx->rq_wait->comp); + + INIT_WORK(&ctx->free_work, free_ioctx); + schedule_work(&ctx->free_work); +@@ -751,7 +756,7 @@ err: + * the rapid destruction of the kioctx. + */ + static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx, +- struct completion *requests_done) ++ struct ctx_rq_wait *wait) + { + struct kioctx_table *table; + +@@ -781,7 +786,7 @@ static int kill_ioctx(struct mm_struct * + if (ctx->mmap_size) + vm_munmap(ctx->mmap_base, ctx->mmap_size); + +- ctx->requests_done = requests_done; ++ ctx->rq_wait = wait; + percpu_ref_kill(&ctx->users); + return 0; + } +@@ -813,18 +818,24 @@ EXPORT_SYMBOL(wait_on_sync_kiocb); + void exit_aio(struct mm_struct *mm) + { + struct kioctx_table *table = rcu_dereference_raw(mm->ioctx_table); +- int i; ++ struct ctx_rq_wait wait; ++ int i, skipped; + + if (!table) + return; + ++ atomic_set(&wait.count, table->nr); ++ init_completion(&wait.comp); ++ ++ skipped = 0; + for (i = 0; i < table->nr; ++i) { + struct kioctx *ctx = table->table[i]; +- struct completion requests_done = +- COMPLETION_INITIALIZER_ONSTACK(requests_done); + +- if (!ctx) ++ if (!ctx) { ++ skipped++; + continue; ++ } ++ + /* + * We don't need to bother with munmap() here - exit_mmap(mm) + * is coming and it'll unmap everything. And we simply can't, +@@ -833,10 +844,12 @@ void exit_aio(struct mm_struct *mm) + * that it needs to unmap the area, just set it to 0. + */ + ctx->mmap_size = 0; +- kill_ioctx(mm, ctx, &requests_done); ++ kill_ioctx(mm, ctx, &wait); ++ } + ++ if (!atomic_sub_and_test(skipped, &wait.count)) { + /* Wait until all IO for the context are done. */ +- wait_for_completion(&requests_done); ++ wait_for_completion(&wait.comp); + } + + RCU_INIT_POINTER(mm->ioctx_table, NULL); +@@ -1321,15 +1334,17 @@ SYSCALL_DEFINE1(io_destroy, aio_context_ + { + struct kioctx *ioctx = lookup_ioctx(ctx); + if (likely(NULL != ioctx)) { +- struct completion requests_done = +- COMPLETION_INITIALIZER_ONSTACK(requests_done); ++ struct ctx_rq_wait wait; + int ret; + ++ init_completion(&wait.comp); ++ atomic_set(&wait.count, 1); ++ + /* Pass requests_done to kill_ioctx() where it can be set + * in a thread-safe way. If we try to set it here then we have + * a race condition if two io_destroy() called simultaneously. + */ +- ret = kill_ioctx(current->mm, ioctx, &requests_done); ++ ret = kill_ioctx(current->mm, ioctx, &wait); + percpu_ref_put(&ioctx->users); + + /* Wait until all IO for the context are done. Otherwise kernel +@@ -1337,7 +1352,7 @@ SYSCALL_DEFINE1(io_destroy, aio_context_ + * is destroyed. + */ + if (!ret) +- wait_for_completion(&requests_done); ++ wait_for_completion(&wait.comp); + + return ret; + } diff --git a/queue-4.0/crush-ensuring-at-most-num-rep-osds-are-selected.patch b/queue-4.0/crush-ensuring-at-most-num-rep-osds-are-selected.patch new file mode 100644 index 00000000000..171e1c05a2f --- /dev/null +++ b/queue-4.0/crush-ensuring-at-most-num-rep-osds-are-selected.patch @@ -0,0 +1,117 @@ +From 45002267e8d2699bf9b022315bee3dd13b044843 Mon Sep 17 00:00:00 2001 +From: Ilya Dryomov +Date: Tue, 14 Apr 2015 16:04:23 +0300 +Subject: crush: ensuring at most num-rep osds are selected + +From: Ilya Dryomov + +commit 45002267e8d2699bf9b022315bee3dd13b044843 upstream. + +Crush temporary buffers are allocated as per replica size configured +by the user. When there are more final osds (to be selected as per +rule) than the replicas, buffer overlaps and it causes crash. Now, it +ensures that at most num-rep osds are selected even if more number of +osds are allowed by the rule. + +Reflects ceph.git commits 6b4d1aa99718e3b367496326c1e64551330fabc0, + 234b066ba04976783d15ff2abc3e81b6cc06fb10. + +Signed-off-by: Ilya Dryomov +Signed-off-by: Greg Kroah-Hartman + +--- + net/ceph/crush/mapper.c | 16 ++++++++++++---- + 1 file changed, 12 insertions(+), 4 deletions(-) + +--- a/net/ceph/crush/mapper.c ++++ b/net/ceph/crush/mapper.c +@@ -290,6 +290,7 @@ static int is_out(const struct crush_map + * @type: the type of item to choose + * @out: pointer to output vector + * @outpos: our position in that vector ++ * @out_size: size of the out vector + * @tries: number of attempts to make + * @recurse_tries: number of attempts to have recursive chooseleaf make + * @local_retries: localized retries +@@ -304,6 +305,7 @@ static int crush_choose_firstn(const str + const __u32 *weight, int weight_max, + int x, int numrep, int type, + int *out, int outpos, ++ int out_size, + unsigned int tries, + unsigned int recurse_tries, + unsigned int local_retries, +@@ -322,6 +324,7 @@ static int crush_choose_firstn(const str + int item = 0; + int itemtype; + int collide, reject; ++ int count = out_size; + + dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d tries %d recurse_tries %d local_retries %d local_fallback_retries %d parent_r %d\n", + recurse_to_leaf ? "_LEAF" : "", +@@ -329,7 +332,7 @@ static int crush_choose_firstn(const str + tries, recurse_tries, local_retries, local_fallback_retries, + parent_r); + +- for (rep = outpos; rep < numrep; rep++) { ++ for (rep = outpos; rep < numrep && count > 0 ; rep++) { + /* keep trying until we get a non-out, non-colliding item */ + ftotal = 0; + skip_rep = 0; +@@ -403,7 +406,7 @@ static int crush_choose_firstn(const str + map->buckets[-1-item], + weight, weight_max, + x, outpos+1, 0, +- out2, outpos, ++ out2, outpos, count, + recurse_tries, 0, + local_retries, + local_fallback_retries, +@@ -463,6 +466,7 @@ reject: + dprintk("CHOOSE got %d\n", item); + out[outpos] = item; + outpos++; ++ count--; + } + + dprintk("CHOOSE returns %d\n", outpos); +@@ -654,6 +658,7 @@ int crush_do_rule(const struct crush_map + __u32 step; + int i, j; + int numrep; ++ int out_size; + /* + * the original choose_total_tries value was off by one (it + * counted "retries" and not "tries"). add one. +@@ -761,6 +766,7 @@ int crush_do_rule(const struct crush_map + x, numrep, + curstep->arg2, + o+osize, j, ++ result_max-osize, + choose_tries, + recurse_tries, + choose_local_retries, +@@ -770,11 +776,13 @@ int crush_do_rule(const struct crush_map + c+osize, + 0); + } else { ++ out_size = ((numrep < (result_max-osize)) ? ++ numrep : (result_max-osize)); + crush_choose_indep( + map, + map->buckets[-1-w[i]], + weight, weight_max, +- x, numrep, numrep, ++ x, out_size, numrep, + curstep->arg2, + o+osize, j, + choose_tries, +@@ -783,7 +791,7 @@ int crush_do_rule(const struct crush_map + recurse_to_leaf, + c+osize, + 0); +- osize += numrep; ++ osize += out_size; + } + } +