From: Greg Kroah-Hartman Date: Mon, 8 Aug 2016 14:22:31 +0000 (+0200) Subject: 3.14-stable patches X-Git-Tag: v3.14.75~8 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=c75ef1465ba0f03d38afa1ea17ba54610fc1eb4d;p=thirdparty%2Fkernel%2Fstable-queue.git 3.14-stable patches added patches: libceph-apply-new_state-before-new_up_client-on-incrementals.patch net-mvneta-set-real-interrupt-per-packet-for-tx_done.patch --- diff --git a/queue-3.14/libceph-apply-new_state-before-new_up_client-on-incrementals.patch b/queue-3.14/libceph-apply-new_state-before-new_up_client-on-incrementals.patch new file mode 100644 index 00000000000..36627f944f7 --- /dev/null +++ b/queue-3.14/libceph-apply-new_state-before-new_up_client-on-incrementals.patch @@ -0,0 +1,224 @@ +From 930c532869774ebf8af9efe9484c597f896a7d46 Mon Sep 17 00:00:00 2001 +From: Ilya Dryomov +Date: Tue, 19 Jul 2016 03:50:28 +0200 +Subject: libceph: apply new_state before new_up_client on incrementals + +From: Ilya Dryomov + +commit 930c532869774ebf8af9efe9484c597f896a7d46 upstream. + +Currently, osd_weight and osd_state fields are updated in the encoding +order. This is wrong, because an incremental map may look like e.g. + + new_up_client: { osd=6, addr=... } # set osd_state and addr + new_state: { osd=6, xorstate=EXISTS } # clear osd_state + +Suppose osd6's current osd_state is EXISTS (i.e. osd6 is down). After +applying new_up_client, osd_state is changed to EXISTS | UP. Carrying +on with the new_state update, we flip EXISTS and leave osd6 in a weird +"!EXISTS but UP" state. A non-existent OSD is considered down by the +mapping code + +2087 for (i = 0; i < pg->pg_temp.len; i++) { +2088 if (ceph_osd_is_down(osdmap, pg->pg_temp.osds[i])) { +2089 if (ceph_can_shift_osds(pi)) +2090 continue; +2091 +2092 temp->osds[temp->size++] = CRUSH_ITEM_NONE; + +and so requests get directed to the second OSD in the set instead of +the first, resulting in OSD-side errors like: + +[WRN] : client.4239 192.168.122.21:0/2444980242 misdirected client.4239.1:2827 pg 2.5df899f2 to osd.4 not [1,4,6] in e680/680 + +and hung rbds on the client: + +[ 493.566367] rbd: rbd0: write 400000 at 11cc00000 (0) +[ 493.566805] rbd: rbd0: result -6 xferred 400000 +[ 493.567011] blk_update_request: I/O error, dev rbd0, sector 9330688 + +The fix is to decouple application from the decoding and: +- apply new_weight first +- apply new_state before new_up_client +- twiddle osd_state flags if marking in +- clear out some of the state if osd is destroyed + +Fixes: http://tracker.ceph.com/issues/14901 + +Signed-off-by: Ilya Dryomov +Reviewed-by: Josh Durgin +[idryomov@gmail.com: backport to 3.10-3.14: strip primary-affinity] +Signed-off-by: Greg Kroah-Hartman + +--- + net/ceph/osdmap.c | 152 ++++++++++++++++++++++++++++++++++++++---------------- + 1 file changed, 108 insertions(+), 44 deletions(-) + +--- a/net/ceph/osdmap.c ++++ b/net/ceph/osdmap.c +@@ -825,6 +825,110 @@ bad: + } + + /* ++ * Encoding order is (new_up_client, new_state, new_weight). Need to ++ * apply in the (new_weight, new_state, new_up_client) order, because ++ * an incremental map may look like e.g. ++ * ++ * new_up_client: { osd=6, addr=... } # set osd_state and addr ++ * new_state: { osd=6, xorstate=EXISTS } # clear osd_state ++ */ ++static int decode_new_up_state_weight(void **p, void *end, ++ struct ceph_osdmap *map) ++{ ++ void *new_up_client; ++ void *new_state; ++ void *new_weight_end; ++ u32 len; ++ ++ new_up_client = *p; ++ ceph_decode_32_safe(p, end, len, e_inval); ++ len *= sizeof(u32) + sizeof(struct ceph_entity_addr); ++ ceph_decode_need(p, end, len, e_inval); ++ *p += len; ++ ++ new_state = *p; ++ ceph_decode_32_safe(p, end, len, e_inval); ++ len *= sizeof(u32) + sizeof(u8); ++ ceph_decode_need(p, end, len, e_inval); ++ *p += len; ++ ++ /* new_weight */ ++ ceph_decode_32_safe(p, end, len, e_inval); ++ while (len--) { ++ s32 osd; ++ u32 w; ++ ++ ceph_decode_need(p, end, 2*sizeof(u32), e_inval); ++ osd = ceph_decode_32(p); ++ w = ceph_decode_32(p); ++ BUG_ON(osd >= map->max_osd); ++ pr_info("osd%d weight 0x%x %s\n", osd, w, ++ w == CEPH_OSD_IN ? "(in)" : ++ (w == CEPH_OSD_OUT ? "(out)" : "")); ++ map->osd_weight[osd] = w; ++ ++ /* ++ * If we are marking in, set the EXISTS, and clear the ++ * AUTOOUT and NEW bits. ++ */ ++ if (w) { ++ map->osd_state[osd] |= CEPH_OSD_EXISTS; ++ map->osd_state[osd] &= ~(CEPH_OSD_AUTOOUT | ++ CEPH_OSD_NEW); ++ } ++ } ++ new_weight_end = *p; ++ ++ /* new_state (up/down) */ ++ *p = new_state; ++ len = ceph_decode_32(p); ++ while (len--) { ++ s32 osd; ++ u8 xorstate; ++ ++ osd = ceph_decode_32(p); ++ xorstate = ceph_decode_8(p); ++ if (xorstate == 0) ++ xorstate = CEPH_OSD_UP; ++ BUG_ON(osd >= map->max_osd); ++ if ((map->osd_state[osd] & CEPH_OSD_UP) && ++ (xorstate & CEPH_OSD_UP)) ++ pr_info("osd%d down\n", osd); ++ if ((map->osd_state[osd] & CEPH_OSD_EXISTS) && ++ (xorstate & CEPH_OSD_EXISTS)) { ++ pr_info("osd%d does not exist\n", osd); ++ map->osd_weight[osd] = CEPH_OSD_IN; ++ memset(map->osd_addr + osd, 0, sizeof(*map->osd_addr)); ++ map->osd_state[osd] = 0; ++ } else { ++ map->osd_state[osd] ^= xorstate; ++ } ++ } ++ ++ /* new_up_client */ ++ *p = new_up_client; ++ len = ceph_decode_32(p); ++ while (len--) { ++ s32 osd; ++ struct ceph_entity_addr addr; ++ ++ osd = ceph_decode_32(p); ++ ceph_decode_copy(p, &addr, sizeof(addr)); ++ ceph_decode_addr(&addr); ++ BUG_ON(osd >= map->max_osd); ++ pr_info("osd%d up\n", osd); ++ map->osd_state[osd] |= CEPH_OSD_EXISTS | CEPH_OSD_UP; ++ map->osd_addr[osd] = addr; ++ } ++ ++ *p = new_weight_end; ++ return 0; ++ ++e_inval: ++ return -EINVAL; ++} ++ ++/* + * decode and apply an incremental map update. + */ + struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, +@@ -939,50 +1043,10 @@ struct ceph_osdmap *osdmap_apply_increme + __remove_pg_pool(&map->pg_pools, pi); + } + +- /* new_up */ +- err = -EINVAL; +- ceph_decode_32_safe(p, end, len, bad); +- while (len--) { +- u32 osd; +- struct ceph_entity_addr addr; +- ceph_decode_32_safe(p, end, osd, bad); +- ceph_decode_copy_safe(p, end, &addr, sizeof(addr), bad); +- ceph_decode_addr(&addr); +- pr_info("osd%d up\n", osd); +- BUG_ON(osd >= map->max_osd); +- map->osd_state[osd] |= CEPH_OSD_UP; +- map->osd_addr[osd] = addr; +- } +- +- /* new_state */ +- ceph_decode_32_safe(p, end, len, bad); +- while (len--) { +- u32 osd; +- u8 xorstate; +- ceph_decode_32_safe(p, end, osd, bad); +- xorstate = **(u8 **)p; +- (*p)++; /* clean flag */ +- if (xorstate == 0) +- xorstate = CEPH_OSD_UP; +- if (xorstate & CEPH_OSD_UP) +- pr_info("osd%d down\n", osd); +- if (osd < map->max_osd) +- map->osd_state[osd] ^= xorstate; +- } +- +- /* new_weight */ +- ceph_decode_32_safe(p, end, len, bad); +- while (len--) { +- u32 osd, off; +- ceph_decode_need(p, end, sizeof(u32)*2, bad); +- osd = ceph_decode_32(p); +- off = ceph_decode_32(p); +- pr_info("osd%d weight 0x%x %s\n", osd, off, +- off == CEPH_OSD_IN ? "(in)" : +- (off == CEPH_OSD_OUT ? "(out)" : "")); +- if (osd < map->max_osd) +- map->osd_weight[osd] = off; +- } ++ /* new_up_client, new_state, new_weight */ ++ err = decode_new_up_state_weight(p, end, map); ++ if (err) ++ goto bad; + + /* new_pg_temp */ + ceph_decode_32_safe(p, end, len, bad); diff --git a/queue-3.14/net-mvneta-set-real-interrupt-per-packet-for-tx_done.patch b/queue-3.14/net-mvneta-set-real-interrupt-per-packet-for-tx_done.patch new file mode 100644 index 00000000000..919f0886fac --- /dev/null +++ b/queue-3.14/net-mvneta-set-real-interrupt-per-packet-for-tx_done.patch @@ -0,0 +1,47 @@ +From 06708f81528725148473c0869d6af5f809c6824b Mon Sep 17 00:00:00 2001 +From: Dmitri Epshtein +Date: Wed, 6 Jul 2016 04:18:58 +0200 +Subject: net: mvneta: set real interrupt per packet for tx_done + +From: Dmitri Epshtein + +commit 06708f81528725148473c0869d6af5f809c6824b upstream. + +Commit aebea2ba0f74 ("net: mvneta: fix Tx interrupt delay") intended to +set coalescing threshold to a value guaranteeing interrupt generation +per each sent packet, so that buffers can be released with no delay. + +In fact setting threshold to '1' was wrong, because it causes interrupt +every two packets. According to the documentation a reason behind it is +following - interrupt occurs once sent buffers counter reaches a value, +which is higher than one specified in MVNETA_TXQ_SIZE_REG(q). This +behavior was confirmed during tests. Also when testing the SoC working +as a NAS device, better performance was observed with int-per-packet, +as it strongly depends on the fact that all transmitted packets are +released immediately. + +This commit enables NETA controller work in interrupt per sent packet mode +by setting coalescing threshold to 0. + +Signed-off-by: Dmitri Epshtein +Signed-off-by: Marcin Wojtas +Fixes aebea2ba0f74 ("net: mvneta: fix Tx interrupt delay") +Acked-by: Willy Tarreau +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/net/ethernet/marvell/mvneta.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/marvell/mvneta.c ++++ b/drivers/net/ethernet/marvell/mvneta.c +@@ -213,7 +213,7 @@ + /* Various constants */ + + /* Coalescing */ +-#define MVNETA_TXDONE_COAL_PKTS 1 ++#define MVNETA_TXDONE_COAL_PKTS 0 /* interrupt per packet */ + #define MVNETA_RX_COAL_PKTS 32 + #define MVNETA_RX_COAL_USEC 100 + diff --git a/queue-3.14/series b/queue-3.14/series index d75ef0de440..bd8da562477 100644 --- a/queue-3.14/series +++ b/queue-3.14/series @@ -17,3 +17,5 @@ can-at91_can-rx-queue-could-get-stuck-at-high-bus-load.patch can-fix-handling-of-unmodifiable-configuration-options-fix.patch can-fix-oops-caused-by-wrong-rtnl-dellink-usage.patch ipr-clear-interrupt-on-croc-crocodile-when-running-with-lsi.patch +libceph-apply-new_state-before-new_up_client-on-incrementals.patch +net-mvneta-set-real-interrupt-per-packet-for-tx_done.patch