From 9ed91d34c567ae49b416f67204edb1a1b6f3eecd Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 9 May 2014 05:41:22 +0200 Subject: [PATCH] 3.14-stable patches added patches: crush-fix-off-by-one-errors-in-total_tries-refactor.patch --- ...y-one-errors-in-total_tries-refactor.patch | 150 ++++++++++++++++++ queue-3.14/series | 1 + 2 files changed, 151 insertions(+) create mode 100644 queue-3.14/crush-fix-off-by-one-errors-in-total_tries-refactor.patch diff --git a/queue-3.14/crush-fix-off-by-one-errors-in-total_tries-refactor.patch b/queue-3.14/crush-fix-off-by-one-errors-in-total_tries-refactor.patch new file mode 100644 index 00000000000..2ef7fda05ec --- /dev/null +++ b/queue-3.14/crush-fix-off-by-one-errors-in-total_tries-refactor.patch @@ -0,0 +1,150 @@ +From 48a163dbb517eba13643bf404a0d695c1ab0a60d Mon Sep 17 00:00:00 2001 +From: Ilya Dryomov +Date: Wed, 19 Mar 2014 16:58:36 +0200 +Subject: crush: fix off-by-one errors in total_tries refactor + +From: Ilya Dryomov + +commit 48a163dbb517eba13643bf404a0d695c1ab0a60d upstream. + +Back in 27f4d1f6bc32c2ed7b2c5080cbd58b14df622607 we refactored the CRUSH +code to allow adjustment of the retry counts on a per-pool basis. That +commit had an off-by-one bug: the previous "tries" counter was a *retry* +count, not a *try* count, but the new code was passing in 1 meaning +there should be no retries. + +Fix the ftotal vs tries comparison to use < instead of <= to fix the +problem. Note that the original code used <= here, which means the +global "choose_total_tries" tunable is actually counting retries. +Compensate for that by adding 1 in crush_do_rule when we pull the tunable +into the local variable. + +This was noticed looking at output from a user provided osdmap. +Unfortunately the map doesn't illustrate the change in mapping behavior +and I haven't managed to construct one yet that does. Inspection of the +crush debug output now aligns with prior versions, though. + +Reflects ceph.git commit 795704fd615f0b008dcc81aa088a859b2d075138. + +Signed-off-by: Ilya Dryomov +Reviewed-by: Josh Durgin +Signed-off-by: Greg Kroah-Hartman + +--- + net/ceph/crush/mapper.c | 46 +++++++++++++++++++++++++++------------------- + 1 file changed, 27 insertions(+), 19 deletions(-) + +--- a/net/ceph/crush/mapper.c ++++ b/net/ceph/crush/mapper.c +@@ -292,8 +292,8 @@ static int is_out(const struct crush_map + * @outpos: our position in that vector + * @tries: number of attempts to make + * @recurse_tries: number of attempts to have recursive chooseleaf make +- * @local_tries: localized retries +- * @local_fallback_tries: localized fallback retries ++ * @local_retries: localized retries ++ * @local_fallback_retries: localized fallback retries + * @recurse_to_leaf: true if we want one device under each item of given type (chooseleaf instead of choose) + * @out2: second output vector for leaf items (if @recurse_to_leaf) + */ +@@ -304,8 +304,8 @@ static int crush_choose_firstn(const str + int *out, int outpos, + unsigned int tries, + unsigned int recurse_tries, +- unsigned int local_tries, +- unsigned int local_fallback_tries, ++ unsigned int local_retries, ++ unsigned int local_fallback_retries, + int recurse_to_leaf, + int *out2) + { +@@ -344,9 +344,9 @@ static int crush_choose_firstn(const str + reject = 1; + goto reject; + } +- if (local_fallback_tries > 0 && ++ if (local_fallback_retries > 0 && + flocal >= (in->size>>1) && +- flocal > local_fallback_tries) ++ flocal > local_fallback_retries) + item = bucket_perm_choose(in, x, r); + else + item = crush_bucket_choose(in, x, r); +@@ -393,8 +393,8 @@ static int crush_choose_firstn(const str + x, outpos+1, 0, + out2, outpos, + recurse_tries, 0, +- local_tries, +- local_fallback_tries, ++ local_retries, ++ local_fallback_retries, + 0, + NULL) <= outpos) + /* didn't get leaf */ +@@ -420,14 +420,14 @@ reject: + ftotal++; + flocal++; + +- if (collide && flocal <= local_tries) ++ if (collide && flocal <= local_retries) + /* retry locally a few times */ + retry_bucket = 1; +- else if (local_fallback_tries > 0 && +- flocal <= in->size + local_fallback_tries) ++ else if (local_fallback_retries > 0 && ++ flocal <= in->size + local_fallback_retries) + /* exhaustive bucket search */ + retry_bucket = 1; +- else if (ftotal <= tries) ++ else if (ftotal < tries) + /* then retry descent */ + retry_descent = 1; + else +@@ -640,10 +640,18 @@ int crush_do_rule(const struct crush_map + __u32 step; + int i, j; + int numrep; +- int choose_tries = map->choose_total_tries; +- int choose_local_tries = map->choose_local_tries; +- int choose_local_fallback_tries = map->choose_local_fallback_tries; ++ /* ++ * the original choose_total_tries value was off by one (it ++ * counted "retries" and not "tries"). add one. ++ */ ++ int choose_tries = map->choose_total_tries + 1; + int choose_leaf_tries = 0; ++ /* ++ * the local tries values were counted as "retries", though, ++ * and need no adjustment ++ */ ++ int choose_local_retries = map->choose_local_tries; ++ int choose_local_fallback_retries = map->choose_local_fallback_tries; + + if ((__u32)ruleno >= map->max_rules) { + dprintk(" bad ruleno %d\n", ruleno); +@@ -677,12 +685,12 @@ int crush_do_rule(const struct crush_map + + case CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES: + if (curstep->arg1 > 0) +- choose_local_tries = curstep->arg1; ++ choose_local_retries = curstep->arg1; + break; + + case CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES: + if (curstep->arg1 > 0) +- choose_local_fallback_tries = curstep->arg1; ++ choose_local_fallback_retries = curstep->arg1; + break; + + case CRUSH_RULE_CHOOSELEAF_FIRSTN: +@@ -734,8 +742,8 @@ int crush_do_rule(const struct crush_map + o+osize, j, + choose_tries, + recurse_tries, +- choose_local_tries, +- choose_local_fallback_tries, ++ choose_local_retries, ++ choose_local_fallback_retries, + recurse_to_leaf, + c+osize); + } else { diff --git a/queue-3.14/series b/queue-3.14/series index 3d200f47dd9..45ff2a78af5 100644 --- a/queue-3.14/series +++ b/queue-3.14/series @@ -38,3 +38,4 @@ ahci-do-not-receive-interrupts-sent-by-dummy-ports.patch libata-update-queued-trim-blacklist-for-m5x0-drives.patch iwlwifi-dvm-take-mutex-when-sending-sync-bt-config-command.patch iwlwifi-mvm-disable-uapsd-due-to-bugs-in-the-firmware.patch +crush-fix-off-by-one-errors-in-total_tries-refactor.patch -- 2.47.3